Coverage Report

Created: 2023-11-19 06:13

/src/libxml2-2.11.5/parser.c
Line
Count
Source (jump to first uncovered line)
1
/*
2
 * parser.c : an XML 1.0 parser, namespaces and validity support are mostly
3
 *            implemented on top of the SAX interfaces
4
 *
5
 * References:
6
 *   The XML specification:
7
 *     http://www.w3.org/TR/REC-xml
8
 *   Original 1.0 version:
9
 *     http://www.w3.org/TR/1998/REC-xml-19980210
10
 *   XML second edition working draft
11
 *     http://www.w3.org/TR/2000/WD-xml-2e-20000814
12
 *
13
 * Okay this is a big file, the parser core is around 7000 lines, then it
14
 * is followed by the progressive parser top routines, then the various
15
 * high level APIs to call the parser and a few miscellaneous functions.
16
 * A number of helper functions and deprecated ones have been moved to
17
 * parserInternals.c to reduce this file size.
18
 * As much as possible the functions are associated with their relative
19
 * production in the XML specification. A few productions defining the
20
 * different ranges of character are actually implanted either in
21
 * parserInternals.h or parserInternals.c
22
 * The DOM tree build is realized from the default SAX callbacks in
23
 * the module SAX.c.
24
 * The routines doing the validation checks are in valid.c and called either
25
 * from the SAX callbacks or as standalone functions using a preparsed
26
 * document.
27
 *
28
 * See Copyright for the status of this software.
29
 *
30
 * daniel@veillard.com
31
 */
32
33
/* To avoid EBCDIC trouble when parsing on zOS */
34
#if defined(__MVS__)
35
#pragma convert("ISO8859-1")
36
#endif
37
38
#define IN_LIBXML
39
#include "libxml.h"
40
41
#if defined(_WIN32)
42
#define XML_DIR_SEP '\\'
43
#else
44
#define XML_DIR_SEP '/'
45
#endif
46
47
#include <stdlib.h>
48
#include <limits.h>
49
#include <string.h>
50
#include <stdarg.h>
51
#include <stddef.h>
52
#include <ctype.h>
53
#include <stdlib.h>
54
#include <libxml/xmlmemory.h>
55
#include <libxml/threads.h>
56
#include <libxml/globals.h>
57
#include <libxml/tree.h>
58
#include <libxml/parser.h>
59
#include <libxml/parserInternals.h>
60
#include <libxml/HTMLparser.h>
61
#include <libxml/valid.h>
62
#include <libxml/entities.h>
63
#include <libxml/xmlerror.h>
64
#include <libxml/encoding.h>
65
#include <libxml/xmlIO.h>
66
#include <libxml/uri.h>
67
#ifdef LIBXML_CATALOG_ENABLED
68
#include <libxml/catalog.h>
69
#endif
70
#ifdef LIBXML_SCHEMAS_ENABLED
71
#include <libxml/xmlschemastypes.h>
72
#include <libxml/relaxng.h>
73
#endif
74
#if defined(LIBXML_XPATH_ENABLED) || defined(LIBXML_SCHEMAS_ENABLED)
75
#include <libxml/xpath.h>
76
#endif
77
78
#include "private/buf.h"
79
#include "private/dict.h"
80
#include "private/enc.h"
81
#include "private/entities.h"
82
#include "private/error.h"
83
#include "private/globals.h"
84
#include "private/html.h"
85
#include "private/io.h"
86
#include "private/memory.h"
87
#include "private/parser.h"
88
#include "private/threads.h"
89
#include "private/xpath.h"
90
91
struct _xmlStartTag {
92
    const xmlChar *prefix;
93
    const xmlChar *URI;
94
    int line;
95
    int nsNr;
96
};
97
98
static xmlParserCtxtPtr
99
xmlCreateEntityParserCtxtInternal(xmlSAXHandlerPtr sax, void *userData,
100
        const xmlChar *URL, const xmlChar *ID, const xmlChar *base,
101
        xmlParserCtxtPtr pctx);
102
103
static int
104
xmlParseElementStart(xmlParserCtxtPtr ctxt);
105
106
static void
107
xmlParseElementEnd(xmlParserCtxtPtr ctxt);
108
109
/************************************************************************
110
 *                  *
111
 *  Arbitrary limits set in the parser. See XML_PARSE_HUGE    *
112
 *                  *
113
 ************************************************************************/
114
115
#define XML_PARSER_BIG_ENTITY 1000
116
#define XML_PARSER_LOT_ENTITY 5000
117
118
/*
119
 * Constants for protection against abusive entity expansion
120
 * ("billion laughs").
121
 */
122
123
/*
124
 * XML_PARSER_NON_LINEAR is roughly the maximum allowed amplification factor
125
 * of serialized output after entity expansion.
126
 */
127
0
#define XML_PARSER_NON_LINEAR 5
128
129
/*
130
 * A certain amount is always allowed.
131
 */
132
0
#define XML_PARSER_ALLOWED_EXPANSION 1000000
133
134
/*
135
 * Fixed cost for each entity reference. This crudely models processing time
136
 * as well to protect, for example, against exponential expansion of empty
137
 * or very short entities.
138
 */
139
0
#define XML_ENT_FIXED_COST 20
140
141
/**
142
 * xmlParserMaxDepth:
143
 *
144
 * arbitrary depth limit for the XML documents that we allow to
145
 * process. This is not a limitation of the parser but a safety
146
 * boundary feature. It can be disabled with the XML_PARSE_HUGE
147
 * parser option.
148
 */
149
unsigned int xmlParserMaxDepth = 256;
150
151
152
153
#define SAX2 1
154
4.64M
#define XML_PARSER_BIG_BUFFER_SIZE 300
155
24.5M
#define XML_PARSER_BUFFER_SIZE 100
156
8.21k
#define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document"
157
158
/**
159
 * XML_PARSER_CHUNK_SIZE
160
 *
161
 * When calling GROW that's the minimal amount of data
162
 * the parser expected to have received. It is not a hard
163
 * limit but an optimization when reading strings like Names
164
 * It is not strictly needed as long as inputs available characters
165
 * are followed by 0, which should be provided by the I/O level
166
 */
167
#define XML_PARSER_CHUNK_SIZE 100
168
169
/*
170
 * List of XML prefixed PI allowed by W3C specs
171
 */
172
173
static const char* const xmlW3CPIs[] = {
174
    "xml-stylesheet",
175
    "xml-model",
176
    NULL
177
};
178
179
180
/* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
181
static xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
182
                                              const xmlChar **str);
183
184
static xmlParserErrors
185
xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
186
                xmlSAXHandlerPtr sax,
187
          void *user_data, int depth, const xmlChar *URL,
188
          const xmlChar *ID, xmlNodePtr *list);
189
190
static int
191
xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options,
192
                          const char *encoding);
193
#ifdef LIBXML_LEGACY_ENABLED
194
static void
195
xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
196
                      xmlNodePtr lastNode);
197
#endif /* LIBXML_LEGACY_ENABLED */
198
199
static xmlParserErrors
200
xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
201
          const xmlChar *string, void *user_data, xmlNodePtr *lst);
202
203
static int
204
xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity);
205
206
/************************************************************************
207
 *                  *
208
 *    Some factorized error routines        *
209
 *                  *
210
 ************************************************************************/
211
212
/**
213
 * xmlErrAttributeDup:
214
 * @ctxt:  an XML parser context
215
 * @prefix:  the attribute prefix
216
 * @localname:  the attribute localname
217
 *
218
 * Handle a redefinition of attribute error
219
 */
220
static void
221
xmlErrAttributeDup(xmlParserCtxtPtr ctxt, const xmlChar * prefix,
222
                   const xmlChar * localname)
223
41.5k
{
224
41.5k
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
225
41.5k
        (ctxt->instate == XML_PARSER_EOF))
226
0
  return;
227
41.5k
    if (ctxt != NULL)
228
41.5k
  ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED;
229
230
41.5k
    if (prefix == NULL)
231
29.4k
        __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
232
29.4k
                        XML_ERR_ATTRIBUTE_REDEFINED, XML_ERR_FATAL, NULL, 0,
233
29.4k
                        (const char *) localname, NULL, NULL, 0, 0,
234
29.4k
                        "Attribute %s redefined\n", localname);
235
12.0k
    else
236
12.0k
        __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
237
12.0k
                        XML_ERR_ATTRIBUTE_REDEFINED, XML_ERR_FATAL, NULL, 0,
238
12.0k
                        (const char *) prefix, (const char *) localname,
239
12.0k
                        NULL, 0, 0, "Attribute %s:%s redefined\n", prefix,
240
12.0k
                        localname);
241
41.5k
    if (ctxt != NULL) {
242
41.5k
  ctxt->wellFormed = 0;
243
41.5k
  if (ctxt->recovery == 0)
244
41.5k
      ctxt->disableSAX = 1;
245
41.5k
    }
246
41.5k
}
247
248
/**
249
 * xmlFatalErr:
250
 * @ctxt:  an XML parser context
251
 * @error:  the error number
252
 * @extra:  extra information string
253
 *
254
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
255
 */
256
static void
257
xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info)
258
87.5k
{
259
87.5k
    const char *errmsg;
260
261
87.5k
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
262
87.5k
        (ctxt->instate == XML_PARSER_EOF))
263
728
  return;
264
86.7k
    switch (error) {
265
5.47k
        case XML_ERR_INVALID_HEX_CHARREF:
266
5.47k
            errmsg = "CharRef: invalid hexadecimal value";
267
5.47k
            break;
268
8.37k
        case XML_ERR_INVALID_DEC_CHARREF:
269
8.37k
            errmsg = "CharRef: invalid decimal value";
270
8.37k
            break;
271
0
        case XML_ERR_INVALID_CHARREF:
272
0
            errmsg = "CharRef: invalid value";
273
0
            break;
274
2.28k
        case XML_ERR_INTERNAL_ERROR:
275
2.28k
            errmsg = "internal error";
276
2.28k
            break;
277
0
        case XML_ERR_PEREF_AT_EOF:
278
0
            errmsg = "PEReference at end of document";
279
0
            break;
280
0
        case XML_ERR_PEREF_IN_PROLOG:
281
0
            errmsg = "PEReference in prolog";
282
0
            break;
283
0
        case XML_ERR_PEREF_IN_EPILOG:
284
0
            errmsg = "PEReference in epilog";
285
0
            break;
286
0
        case XML_ERR_PEREF_NO_NAME:
287
0
            errmsg = "PEReference: no name";
288
0
            break;
289
1.11k
        case XML_ERR_PEREF_SEMICOL_MISSING:
290
1.11k
            errmsg = "PEReference: expecting ';'";
291
1.11k
            break;
292
0
        case XML_ERR_ENTITY_LOOP:
293
0
            errmsg = "Detected an entity reference loop";
294
0
            break;
295
0
        case XML_ERR_ENTITY_NOT_STARTED:
296
0
            errmsg = "EntityValue: \" or ' expected";
297
0
            break;
298
87
        case XML_ERR_ENTITY_PE_INTERNAL:
299
87
            errmsg = "PEReferences forbidden in internal subset";
300
87
            break;
301
121
        case XML_ERR_ENTITY_NOT_FINISHED:
302
121
            errmsg = "EntityValue: \" or ' expected";
303
121
            break;
304
371
        case XML_ERR_ATTRIBUTE_NOT_STARTED:
305
371
            errmsg = "AttValue: \" or ' expected";
306
371
            break;
307
267
        case XML_ERR_LT_IN_ATTRIBUTE:
308
267
            errmsg = "Unescaped '<' not allowed in attributes values";
309
267
            break;
310
1.23k
        case XML_ERR_LITERAL_NOT_STARTED:
311
1.23k
            errmsg = "SystemLiteral \" or ' expected";
312
1.23k
            break;
313
634
        case XML_ERR_LITERAL_NOT_FINISHED:
314
634
            errmsg = "Unfinished System or Public ID \" or ' expected";
315
634
            break;
316
314
        case XML_ERR_MISPLACED_CDATA_END:
317
314
            errmsg = "Sequence ']]>' not allowed in content";
318
314
            break;
319
935
        case XML_ERR_URI_REQUIRED:
320
935
            errmsg = "SYSTEM or PUBLIC, the URI is missing";
321
935
            break;
322
302
        case XML_ERR_PUBID_REQUIRED:
323
302
            errmsg = "PUBLIC, the Public Identifier is missing";
324
302
            break;
325
875
        case XML_ERR_HYPHEN_IN_COMMENT:
326
875
            errmsg = "Comment must not contain '--' (double-hyphen)";
327
875
            break;
328
345
        case XML_ERR_PI_NOT_STARTED:
329
345
            errmsg = "xmlParsePI : no target name";
330
345
            break;
331
229
        case XML_ERR_RESERVED_XML_NAME:
332
229
            errmsg = "Invalid PI name";
333
229
            break;
334
575
        case XML_ERR_NOTATION_NOT_STARTED:
335
575
            errmsg = "NOTATION: Name expected here";
336
575
            break;
337
1.32k
        case XML_ERR_NOTATION_NOT_FINISHED:
338
1.32k
            errmsg = "'>' required to close NOTATION declaration";
339
1.32k
            break;
340
855
        case XML_ERR_VALUE_REQUIRED:
341
855
            errmsg = "Entity value required";
342
855
            break;
343
73
        case XML_ERR_URI_FRAGMENT:
344
73
            errmsg = "Fragment not allowed";
345
73
            break;
346
419
        case XML_ERR_ATTLIST_NOT_STARTED:
347
419
            errmsg = "'(' required to start ATTLIST enumeration";
348
419
            break;
349
62
        case XML_ERR_NMTOKEN_REQUIRED:
350
62
            errmsg = "NmToken expected in ATTLIST enumeration";
351
62
            break;
352
131
        case XML_ERR_ATTLIST_NOT_FINISHED:
353
131
            errmsg = "')' required to finish ATTLIST enumeration";
354
131
            break;
355
55
        case XML_ERR_MIXED_NOT_STARTED:
356
55
            errmsg = "MixedContentDecl : '|' or ')*' expected";
357
55
            break;
358
0
        case XML_ERR_PCDATA_REQUIRED:
359
0
            errmsg = "MixedContentDecl : '#PCDATA' expected";
360
0
            break;
361
852
        case XML_ERR_ELEMCONTENT_NOT_STARTED:
362
852
            errmsg = "ContentDecl : Name or '(' expected";
363
852
            break;
364
760
        case XML_ERR_ELEMCONTENT_NOT_FINISHED:
365
760
            errmsg = "ContentDecl : ',' '|' or ')' expected";
366
760
            break;
367
0
        case XML_ERR_PEREF_IN_INT_SUBSET:
368
0
            errmsg =
369
0
                "PEReference: forbidden within markup decl in internal subset";
370
0
            break;
371
2.00k
        case XML_ERR_GT_REQUIRED:
372
2.00k
            errmsg = "expected '>'";
373
2.00k
            break;
374
0
        case XML_ERR_CONDSEC_INVALID:
375
0
            errmsg = "XML conditional section '[' expected";
376
0
            break;
377
0
        case XML_ERR_EXT_SUBSET_NOT_FINISHED:
378
0
            errmsg = "Content error in the external subset";
379
0
            break;
380
0
        case XML_ERR_CONDSEC_INVALID_KEYWORD:
381
0
            errmsg =
382
0
                "conditional section INCLUDE or IGNORE keyword expected";
383
0
            break;
384
0
        case XML_ERR_CONDSEC_NOT_FINISHED:
385
0
            errmsg = "XML conditional section not closed";
386
0
            break;
387
0
        case XML_ERR_XMLDECL_NOT_STARTED:
388
0
            errmsg = "Text declaration '<?xml' required";
389
0
            break;
390
430
        case XML_ERR_XMLDECL_NOT_FINISHED:
391
430
            errmsg = "parsing XML declaration: '?>' expected";
392
430
            break;
393
0
        case XML_ERR_EXT_ENTITY_STANDALONE:
394
0
            errmsg = "external parsed entities cannot be standalone";
395
0
            break;
396
53.8k
        case XML_ERR_ENTITYREF_SEMICOL_MISSING:
397
53.8k
            errmsg = "EntityRef: expecting ';'";
398
53.8k
            break;
399
175
        case XML_ERR_DOCTYPE_NOT_FINISHED:
400
175
            errmsg = "DOCTYPE improperly terminated";
401
175
            break;
402
0
        case XML_ERR_LTSLASH_REQUIRED:
403
0
            errmsg = "EndTag: '</' not found";
404
0
            break;
405
29
        case XML_ERR_EQUAL_REQUIRED:
406
29
            errmsg = "expected '='";
407
29
            break;
408
138
        case XML_ERR_STRING_NOT_CLOSED:
409
138
            errmsg = "String not closed expecting \" or '";
410
138
            break;
411
48
        case XML_ERR_STRING_NOT_STARTED:
412
48
            errmsg = "String not started expecting ' or \"";
413
48
            break;
414
5
        case XML_ERR_ENCODING_NAME:
415
5
            errmsg = "Invalid XML encoding name";
416
5
            break;
417
23
        case XML_ERR_STANDALONE_VALUE:
418
23
            errmsg = "standalone accepts only 'yes' or 'no'";
419
23
            break;
420
221
        case XML_ERR_DOCUMENT_EMPTY:
421
221
            errmsg = "Document is empty";
422
221
            break;
423
1.18k
        case XML_ERR_DOCUMENT_END:
424
1.18k
            errmsg = "Extra content at the end of the document";
425
1.18k
            break;
426
0
        case XML_ERR_NOT_WELL_BALANCED:
427
0
            errmsg = "chunk is not well balanced";
428
0
            break;
429
0
        case XML_ERR_EXTRA_CONTENT:
430
0
            errmsg = "extra content at the end of well balanced chunk";
431
0
            break;
432
608
        case XML_ERR_VERSION_MISSING:
433
608
            errmsg = "Malformed declaration expecting version";
434
608
            break;
435
12
        case XML_ERR_NAME_TOO_LONG:
436
12
            errmsg = "Name too long";
437
12
            break;
438
#if 0
439
        case:
440
            errmsg = "";
441
            break;
442
#endif
443
0
        default:
444
0
            errmsg = "Unregistered error message";
445
86.7k
    }
446
86.7k
    if (ctxt != NULL)
447
86.7k
  ctxt->errNo = error;
448
86.7k
    if (info == NULL) {
449
84.4k
        __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
450
84.4k
                        XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, "%s\n",
451
84.4k
                        errmsg);
452
84.4k
    } else {
453
2.29k
        __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
454
2.29k
                        XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, "%s: %s\n",
455
2.29k
                        errmsg, info);
456
2.29k
    }
457
86.7k
    if (ctxt != NULL) {
458
86.7k
  ctxt->wellFormed = 0;
459
86.7k
  if (ctxt->recovery == 0)
460
86.7k
      ctxt->disableSAX = 1;
461
86.7k
    }
462
86.7k
}
463
464
/**
465
 * xmlFatalErrMsg:
466
 * @ctxt:  an XML parser context
467
 * @error:  the error number
468
 * @msg:  the error message
469
 *
470
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
471
 */
472
static void LIBXML_ATTR_FORMAT(3,0)
473
xmlFatalErrMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
474
               const char *msg)
475
120k
{
476
120k
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
477
120k
        (ctxt->instate == XML_PARSER_EOF))
478
0
  return;
479
120k
    if (ctxt != NULL)
480
120k
  ctxt->errNo = error;
481
120k
    __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
482
120k
                    XML_ERR_FATAL, NULL, 0, NULL, NULL, NULL, 0, 0, "%s", msg);
483
120k
    if (ctxt != NULL) {
484
120k
  ctxt->wellFormed = 0;
485
120k
  if (ctxt->recovery == 0)
486
120k
      ctxt->disableSAX = 1;
487
120k
    }
488
120k
}
489
490
/**
491
 * xmlWarningMsg:
492
 * @ctxt:  an XML parser context
493
 * @error:  the error number
494
 * @msg:  the error message
495
 * @str1:  extra data
496
 * @str2:  extra data
497
 *
498
 * Handle a warning.
499
 */
500
static void LIBXML_ATTR_FORMAT(3,0)
501
xmlWarningMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
502
              const char *msg, const xmlChar *str1, const xmlChar *str2)
503
3.28k
{
504
3.28k
    xmlStructuredErrorFunc schannel = NULL;
505
506
3.28k
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
507
3.28k
        (ctxt->instate == XML_PARSER_EOF))
508
0
  return;
509
3.28k
    if ((ctxt != NULL) && (ctxt->sax != NULL) &&
510
3.28k
        (ctxt->sax->initialized == XML_SAX2_MAGIC))
511
3.28k
        schannel = ctxt->sax->serror;
512
3.28k
    if (ctxt != NULL) {
513
3.28k
        __xmlRaiseError(schannel,
514
3.28k
                    (ctxt->sax) ? ctxt->sax->warning : NULL,
515
3.28k
                    ctxt->userData,
516
3.28k
                    ctxt, NULL, XML_FROM_PARSER, error,
517
3.28k
                    XML_ERR_WARNING, NULL, 0,
518
3.28k
        (const char *) str1, (const char *) str2, NULL, 0, 0,
519
3.28k
        msg, (const char *) str1, (const char *) str2);
520
3.28k
    } else {
521
0
        __xmlRaiseError(schannel, NULL, NULL,
522
0
                    ctxt, NULL, XML_FROM_PARSER, error,
523
0
                    XML_ERR_WARNING, NULL, 0,
524
0
        (const char *) str1, (const char *) str2, NULL, 0, 0,
525
0
        msg, (const char *) str1, (const char *) str2);
526
0
    }
527
3.28k
}
528
529
/**
530
 * xmlValidityError:
531
 * @ctxt:  an XML parser context
532
 * @error:  the error number
533
 * @msg:  the error message
534
 * @str1:  extra data
535
 *
536
 * Handle a validity error.
537
 */
538
static void LIBXML_ATTR_FORMAT(3,0)
539
xmlValidityError(xmlParserCtxtPtr ctxt, xmlParserErrors error,
540
              const char *msg, const xmlChar *str1, const xmlChar *str2)
541
1.31k
{
542
1.31k
    xmlStructuredErrorFunc schannel = NULL;
543
544
1.31k
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
545
1.31k
        (ctxt->instate == XML_PARSER_EOF))
546
0
  return;
547
1.31k
    if (ctxt != NULL) {
548
1.31k
  ctxt->errNo = error;
549
1.31k
  if ((ctxt->sax != NULL) && (ctxt->sax->initialized == XML_SAX2_MAGIC))
550
1.31k
      schannel = ctxt->sax->serror;
551
1.31k
    }
552
1.31k
    if (ctxt != NULL) {
553
1.31k
        __xmlRaiseError(schannel,
554
1.31k
                    ctxt->vctxt.error, ctxt->vctxt.userData,
555
1.31k
                    ctxt, NULL, XML_FROM_DTD, error,
556
1.31k
                    XML_ERR_ERROR, NULL, 0, (const char *) str1,
557
1.31k
        (const char *) str2, NULL, 0, 0,
558
1.31k
        msg, (const char *) str1, (const char *) str2);
559
1.31k
  ctxt->valid = 0;
560
1.31k
    } else {
561
0
        __xmlRaiseError(schannel, NULL, NULL,
562
0
                    ctxt, NULL, XML_FROM_DTD, error,
563
0
                    XML_ERR_ERROR, NULL, 0, (const char *) str1,
564
0
        (const char *) str2, NULL, 0, 0,
565
0
        msg, (const char *) str1, (const char *) str2);
566
0
    }
567
1.31k
}
568
569
/**
570
 * xmlFatalErrMsgInt:
571
 * @ctxt:  an XML parser context
572
 * @error:  the error number
573
 * @msg:  the error message
574
 * @val:  an integer value
575
 *
576
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
577
 */
578
static void LIBXML_ATTR_FORMAT(3,0)
579
xmlFatalErrMsgInt(xmlParserCtxtPtr ctxt, xmlParserErrors error,
580
                  const char *msg, int val)
581
21.1k
{
582
21.1k
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
583
21.1k
        (ctxt->instate == XML_PARSER_EOF))
584
0
  return;
585
21.1k
    if (ctxt != NULL)
586
21.1k
  ctxt->errNo = error;
587
21.1k
    __xmlRaiseError(NULL, NULL, NULL,
588
21.1k
                    ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
589
21.1k
                    NULL, 0, NULL, NULL, NULL, val, 0, msg, val);
590
21.1k
    if (ctxt != NULL) {
591
21.1k
  ctxt->wellFormed = 0;
592
21.1k
  if (ctxt->recovery == 0)
593
21.1k
      ctxt->disableSAX = 1;
594
21.1k
    }
595
21.1k
}
596
597
/**
598
 * xmlFatalErrMsgStrIntStr:
599
 * @ctxt:  an XML parser context
600
 * @error:  the error number
601
 * @msg:  the error message
602
 * @str1:  an string info
603
 * @val:  an integer value
604
 * @str2:  an string info
605
 *
606
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
607
 */
608
static void LIBXML_ATTR_FORMAT(3,0)
609
xmlFatalErrMsgStrIntStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
610
                  const char *msg, const xmlChar *str1, int val,
611
      const xmlChar *str2)
612
180
{
613
180
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
614
180
        (ctxt->instate == XML_PARSER_EOF))
615
0
  return;
616
180
    if (ctxt != NULL)
617
180
  ctxt->errNo = error;
618
180
    __xmlRaiseError(NULL, NULL, NULL,
619
180
                    ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
620
180
                    NULL, 0, (const char *) str1, (const char *) str2,
621
180
        NULL, val, 0, msg, str1, val, str2);
622
180
    if (ctxt != NULL) {
623
180
  ctxt->wellFormed = 0;
624
180
  if (ctxt->recovery == 0)
625
180
      ctxt->disableSAX = 1;
626
180
    }
627
180
}
628
629
/**
630
 * xmlFatalErrMsgStr:
631
 * @ctxt:  an XML parser context
632
 * @error:  the error number
633
 * @msg:  the error message
634
 * @val:  a string value
635
 *
636
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
637
 */
638
static void LIBXML_ATTR_FORMAT(3,0)
639
xmlFatalErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
640
                  const char *msg, const xmlChar * val)
641
22.4k
{
642
22.4k
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
643
22.4k
        (ctxt->instate == XML_PARSER_EOF))
644
0
  return;
645
22.4k
    if (ctxt != NULL)
646
22.4k
  ctxt->errNo = error;
647
22.4k
    __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
648
22.4k
                    XML_FROM_PARSER, error, XML_ERR_FATAL,
649
22.4k
                    NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
650
22.4k
                    val);
651
22.4k
    if (ctxt != NULL) {
652
22.4k
  ctxt->wellFormed = 0;
653
22.4k
  if (ctxt->recovery == 0)
654
22.4k
      ctxt->disableSAX = 1;
655
22.4k
    }
656
22.4k
}
657
658
/**
659
 * xmlErrMsgStr:
660
 * @ctxt:  an XML parser context
661
 * @error:  the error number
662
 * @msg:  the error message
663
 * @val:  a string value
664
 *
665
 * Handle a non fatal parser error
666
 */
667
static void LIBXML_ATTR_FORMAT(3,0)
668
xmlErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
669
                  const char *msg, const xmlChar * val)
670
765
{
671
765
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
672
765
        (ctxt->instate == XML_PARSER_EOF))
673
0
  return;
674
765
    if (ctxt != NULL)
675
765
  ctxt->errNo = error;
676
765
    __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
677
765
                    XML_FROM_PARSER, error, XML_ERR_ERROR,
678
765
                    NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
679
765
                    val);
680
765
}
681
682
/**
683
 * xmlNsErr:
684
 * @ctxt:  an XML parser context
685
 * @error:  the error number
686
 * @msg:  the message
687
 * @info1:  extra information string
688
 * @info2:  extra information string
689
 *
690
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
691
 */
692
static void LIBXML_ATTR_FORMAT(3,0)
693
xmlNsErr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
694
         const char *msg,
695
         const xmlChar * info1, const xmlChar * info2,
696
         const xmlChar * info3)
697
47.1k
{
698
47.1k
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
699
47.1k
        (ctxt->instate == XML_PARSER_EOF))
700
0
  return;
701
47.1k
    if (ctxt != NULL)
702
47.1k
  ctxt->errNo = error;
703
47.1k
    __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
704
47.1k
                    XML_ERR_ERROR, NULL, 0, (const char *) info1,
705
47.1k
                    (const char *) info2, (const char *) info3, 0, 0, msg,
706
47.1k
                    info1, info2, info3);
707
47.1k
    if (ctxt != NULL)
708
47.1k
  ctxt->nsWellFormed = 0;
709
47.1k
}
710
711
/**
712
 * xmlNsWarn
713
 * @ctxt:  an XML parser context
714
 * @error:  the error number
715
 * @msg:  the message
716
 * @info1:  extra information string
717
 * @info2:  extra information string
718
 *
719
 * Handle a namespace warning error
720
 */
721
static void LIBXML_ATTR_FORMAT(3,0)
722
xmlNsWarn(xmlParserCtxtPtr ctxt, xmlParserErrors error,
723
         const char *msg,
724
         const xmlChar * info1, const xmlChar * info2,
725
         const xmlChar * info3)
726
6.13k
{
727
6.13k
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
728
6.13k
        (ctxt->instate == XML_PARSER_EOF))
729
0
  return;
730
6.13k
    __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
731
6.13k
                    XML_ERR_WARNING, NULL, 0, (const char *) info1,
732
6.13k
                    (const char *) info2, (const char *) info3, 0, 0, msg,
733
6.13k
                    info1, info2, info3);
734
6.13k
}
735
736
static void
737
0
xmlSaturatedAdd(unsigned long *dst, unsigned long val) {
738
0
    if (val > ULONG_MAX - *dst)
739
0
        *dst = ULONG_MAX;
740
0
    else
741
0
        *dst += val;
742
0
}
743
744
static void
745
0
xmlSaturatedAddSizeT(unsigned long *dst, unsigned long val) {
746
0
    if (val > ULONG_MAX - *dst)
747
0
        *dst = ULONG_MAX;
748
0
    else
749
0
        *dst += val;
750
0
}
751
752
/**
753
 * xmlParserEntityCheck:
754
 * @ctxt:  parser context
755
 * @extra:  sum of unexpanded entity sizes
756
 *
757
 * Check for non-linear entity expansion behaviour.
758
 *
759
 * In some cases like xmlStringDecodeEntities, this function is called
760
 * for each, possibly nested entity and its unexpanded content length.
761
 *
762
 * In other cases like xmlParseReference, it's only called for each
763
 * top-level entity with its unexpanded content length plus the sum of
764
 * the unexpanded content lengths (plus fixed cost) of all nested
765
 * entities.
766
 *
767
 * Summing the unexpanded lengths also adds the length of the reference.
768
 * This is by design. Taking the length of the entity name into account
769
 * discourages attacks that try to waste CPU time with abusively long
770
 * entity names. See test/recurse/lol6.xml for example. Each call also
771
 * adds some fixed cost XML_ENT_FIXED_COST to discourage attacks with
772
 * short entities.
773
 *
774
 * Returns 1 on error, 0 on success.
775
 */
776
static int
777
xmlParserEntityCheck(xmlParserCtxtPtr ctxt, unsigned long extra)
778
0
{
779
0
    unsigned long consumed;
780
0
    xmlParserInputPtr input = ctxt->input;
781
0
    xmlEntityPtr entity = input->entity;
782
783
    /*
784
     * Compute total consumed bytes so far, including input streams of
785
     * external entities.
786
     */
787
0
    consumed = input->parentConsumed;
788
0
    if ((entity == NULL) ||
789
0
        ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
790
0
         ((entity->flags & XML_ENT_PARSED) == 0))) {
791
0
        xmlSaturatedAdd(&consumed, input->consumed);
792
0
        xmlSaturatedAddSizeT(&consumed, input->cur - input->base);
793
0
    }
794
0
    xmlSaturatedAdd(&consumed, ctxt->sizeentities);
795
796
    /*
797
     * Add extra cost and some fixed cost.
798
     */
799
0
    xmlSaturatedAdd(&ctxt->sizeentcopy, extra);
800
0
    xmlSaturatedAdd(&ctxt->sizeentcopy, XML_ENT_FIXED_COST);
801
802
    /*
803
     * It's important to always use saturation arithmetic when tracking
804
     * entity sizes to make the size checks reliable. If "sizeentcopy"
805
     * overflows, we have to abort.
806
     */
807
0
    if ((ctxt->sizeentcopy > XML_PARSER_ALLOWED_EXPANSION) &&
808
0
        ((ctxt->sizeentcopy >= ULONG_MAX) ||
809
0
         (ctxt->sizeentcopy / XML_PARSER_NON_LINEAR > consumed))) {
810
0
        xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_LOOP,
811
0
                       "Maximum entity amplification factor exceeded");
812
0
        xmlHaltParser(ctxt);
813
0
        return(1);
814
0
    }
815
816
0
    return(0);
817
0
}
818
819
/************************************************************************
820
 *                  *
821
 *    Library wide options          *
822
 *                  *
823
 ************************************************************************/
824
825
/**
826
  * xmlHasFeature:
827
  * @feature: the feature to be examined
828
  *
829
  * Examines if the library has been compiled with a given feature.
830
  *
831
  * Returns a non-zero value if the feature exist, otherwise zero.
832
  * Returns zero (0) if the feature does not exist or an unknown
833
  * unknown feature is requested, non-zero otherwise.
834
  */
835
int
836
xmlHasFeature(xmlFeature feature)
837
0
{
838
0
    switch (feature) {
839
0
  case XML_WITH_THREAD:
840
0
#ifdef LIBXML_THREAD_ENABLED
841
0
      return(1);
842
#else
843
      return(0);
844
#endif
845
0
        case XML_WITH_TREE:
846
0
#ifdef LIBXML_TREE_ENABLED
847
0
            return(1);
848
#else
849
            return(0);
850
#endif
851
0
        case XML_WITH_OUTPUT:
852
0
#ifdef LIBXML_OUTPUT_ENABLED
853
0
            return(1);
854
#else
855
            return(0);
856
#endif
857
0
        case XML_WITH_PUSH:
858
0
#ifdef LIBXML_PUSH_ENABLED
859
0
            return(1);
860
#else
861
            return(0);
862
#endif
863
0
        case XML_WITH_READER:
864
0
#ifdef LIBXML_READER_ENABLED
865
0
            return(1);
866
#else
867
            return(0);
868
#endif
869
0
        case XML_WITH_PATTERN:
870
0
#ifdef LIBXML_PATTERN_ENABLED
871
0
            return(1);
872
#else
873
            return(0);
874
#endif
875
0
        case XML_WITH_WRITER:
876
0
#ifdef LIBXML_WRITER_ENABLED
877
0
            return(1);
878
#else
879
            return(0);
880
#endif
881
0
        case XML_WITH_SAX1:
882
0
#ifdef LIBXML_SAX1_ENABLED
883
0
            return(1);
884
#else
885
            return(0);
886
#endif
887
0
        case XML_WITH_FTP:
888
#ifdef LIBXML_FTP_ENABLED
889
            return(1);
890
#else
891
0
            return(0);
892
0
#endif
893
0
        case XML_WITH_HTTP:
894
0
#ifdef LIBXML_HTTP_ENABLED
895
0
            return(1);
896
#else
897
            return(0);
898
#endif
899
0
        case XML_WITH_VALID:
900
0
#ifdef LIBXML_VALID_ENABLED
901
0
            return(1);
902
#else
903
            return(0);
904
#endif
905
0
        case XML_WITH_HTML:
906
0
#ifdef LIBXML_HTML_ENABLED
907
0
            return(1);
908
#else
909
            return(0);
910
#endif
911
0
        case XML_WITH_LEGACY:
912
#ifdef LIBXML_LEGACY_ENABLED
913
            return(1);
914
#else
915
0
            return(0);
916
0
#endif
917
0
        case XML_WITH_C14N:
918
0
#ifdef LIBXML_C14N_ENABLED
919
0
            return(1);
920
#else
921
            return(0);
922
#endif
923
0
        case XML_WITH_CATALOG:
924
0
#ifdef LIBXML_CATALOG_ENABLED
925
0
            return(1);
926
#else
927
            return(0);
928
#endif
929
0
        case XML_WITH_XPATH:
930
0
#ifdef LIBXML_XPATH_ENABLED
931
0
            return(1);
932
#else
933
            return(0);
934
#endif
935
0
        case XML_WITH_XPTR:
936
0
#ifdef LIBXML_XPTR_ENABLED
937
0
            return(1);
938
#else
939
            return(0);
940
#endif
941
0
        case XML_WITH_XINCLUDE:
942
0
#ifdef LIBXML_XINCLUDE_ENABLED
943
0
            return(1);
944
#else
945
            return(0);
946
#endif
947
0
        case XML_WITH_ICONV:
948
0
#ifdef LIBXML_ICONV_ENABLED
949
0
            return(1);
950
#else
951
            return(0);
952
#endif
953
0
        case XML_WITH_ISO8859X:
954
0
#ifdef LIBXML_ISO8859X_ENABLED
955
0
            return(1);
956
#else
957
            return(0);
958
#endif
959
0
        case XML_WITH_UNICODE:
960
0
#ifdef LIBXML_UNICODE_ENABLED
961
0
            return(1);
962
#else
963
            return(0);
964
#endif
965
0
        case XML_WITH_REGEXP:
966
0
#ifdef LIBXML_REGEXP_ENABLED
967
0
            return(1);
968
#else
969
            return(0);
970
#endif
971
0
        case XML_WITH_AUTOMATA:
972
0
#ifdef LIBXML_AUTOMATA_ENABLED
973
0
            return(1);
974
#else
975
            return(0);
976
#endif
977
0
        case XML_WITH_EXPR:
978
#ifdef LIBXML_EXPR_ENABLED
979
            return(1);
980
#else
981
0
            return(0);
982
0
#endif
983
0
        case XML_WITH_SCHEMAS:
984
0
#ifdef LIBXML_SCHEMAS_ENABLED
985
0
            return(1);
986
#else
987
            return(0);
988
#endif
989
0
        case XML_WITH_SCHEMATRON:
990
0
#ifdef LIBXML_SCHEMATRON_ENABLED
991
0
            return(1);
992
#else
993
            return(0);
994
#endif
995
0
        case XML_WITH_MODULES:
996
#ifdef LIBXML_MODULES_ENABLED
997
            return(1);
998
#else
999
0
            return(0);
1000
0
#endif
1001
0
        case XML_WITH_DEBUG:
1002
0
#ifdef LIBXML_DEBUG_ENABLED
1003
0
            return(1);
1004
#else
1005
            return(0);
1006
#endif
1007
0
        case XML_WITH_DEBUG_MEM:
1008
#ifdef DEBUG_MEMORY_LOCATION
1009
            return(1);
1010
#else
1011
0
            return(0);
1012
0
#endif
1013
0
        case XML_WITH_DEBUG_RUN:
1014
0
            return(0);
1015
0
        case XML_WITH_ZLIB:
1016
#ifdef LIBXML_ZLIB_ENABLED
1017
            return(1);
1018
#else
1019
0
            return(0);
1020
0
#endif
1021
0
        case XML_WITH_LZMA:
1022
#ifdef LIBXML_LZMA_ENABLED
1023
            return(1);
1024
#else
1025
0
            return(0);
1026
0
#endif
1027
0
        case XML_WITH_ICU:
1028
#ifdef LIBXML_ICU_ENABLED
1029
            return(1);
1030
#else
1031
0
            return(0);
1032
0
#endif
1033
0
        default:
1034
0
      break;
1035
0
     }
1036
0
     return(0);
1037
0
}
1038
1039
/************************************************************************
1040
 *                  *
1041
 *    SAX2 defaulted attributes handling      *
1042
 *                  *
1043
 ************************************************************************/
1044
1045
/**
1046
 * xmlDetectSAX2:
1047
 * @ctxt:  an XML parser context
1048
 *
1049
 * Do the SAX2 detection and specific initialization
1050
 */
1051
static void
1052
15.7k
xmlDetectSAX2(xmlParserCtxtPtr ctxt) {
1053
15.7k
    xmlSAXHandlerPtr sax;
1054
1055
    /* Avoid unused variable warning if features are disabled. */
1056
15.7k
    (void) sax;
1057
1058
15.7k
    if (ctxt == NULL) return;
1059
15.7k
    sax = ctxt->sax;
1060
15.7k
#ifdef LIBXML_SAX1_ENABLED
1061
15.7k
    if ((sax) &&  (sax->initialized == XML_SAX2_MAGIC) &&
1062
15.7k
        ((sax->startElementNs != NULL) ||
1063
15.7k
         (sax->endElementNs != NULL) ||
1064
15.7k
         ((sax->startElement == NULL) && (sax->endElement == NULL))))
1065
15.7k
        ctxt->sax2 = 1;
1066
#else
1067
    ctxt->sax2 = 1;
1068
#endif /* LIBXML_SAX1_ENABLED */
1069
1070
15.7k
    ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
1071
15.7k
    ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
1072
15.7k
    ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
1073
15.7k
    if ((ctxt->str_xml==NULL) || (ctxt->str_xmlns==NULL) ||
1074
15.7k
    (ctxt->str_xml_ns == NULL)) {
1075
0
        xmlErrMemory(ctxt, NULL);
1076
0
    }
1077
15.7k
}
1078
1079
typedef struct _xmlDefAttrs xmlDefAttrs;
1080
typedef xmlDefAttrs *xmlDefAttrsPtr;
1081
struct _xmlDefAttrs {
1082
    int nbAttrs;  /* number of defaulted attributes on that element */
1083
    int maxAttrs;       /* the size of the array */
1084
#if __STDC_VERSION__ >= 199901L
1085
    /* Using a C99 flexible array member avoids UBSan errors. */
1086
    const xmlChar *values[]; /* array of localname/prefix/values/external */
1087
#else
1088
    const xmlChar *values[5];
1089
#endif
1090
};
1091
1092
/**
1093
 * xmlAttrNormalizeSpace:
1094
 * @src: the source string
1095
 * @dst: the target string
1096
 *
1097
 * Normalize the space in non CDATA attribute values:
1098
 * If the attribute type is not CDATA, then the XML processor MUST further
1099
 * process the normalized attribute value by discarding any leading and
1100
 * trailing space (#x20) characters, and by replacing sequences of space
1101
 * (#x20) characters by a single space (#x20) character.
1102
 * Note that the size of dst need to be at least src, and if one doesn't need
1103
 * to preserve dst (and it doesn't come from a dictionary or read-only) then
1104
 * passing src as dst is just fine.
1105
 *
1106
 * Returns a pointer to the normalized value (dst) or NULL if no conversion
1107
 *         is needed.
1108
 */
1109
static xmlChar *
1110
xmlAttrNormalizeSpace(const xmlChar *src, xmlChar *dst)
1111
12.5k
{
1112
12.5k
    if ((src == NULL) || (dst == NULL))
1113
0
        return(NULL);
1114
1115
13.4k
    while (*src == 0x20) src++;
1116
2.04M
    while (*src != 0) {
1117
2.03M
  if (*src == 0x20) {
1118
4.77k
      while (*src == 0x20) src++;
1119
1.63k
      if (*src != 0)
1120
775
    *dst++ = 0x20;
1121
2.03M
  } else {
1122
2.03M
      *dst++ = *src++;
1123
2.03M
  }
1124
2.03M
    }
1125
12.5k
    *dst = 0;
1126
12.5k
    if (dst == src)
1127
11.3k
       return(NULL);
1128
1.16k
    return(dst);
1129
12.5k
}
1130
1131
/**
1132
 * xmlAttrNormalizeSpace2:
1133
 * @src: the source string
1134
 *
1135
 * Normalize the space in non CDATA attribute values, a slightly more complex
1136
 * front end to avoid allocation problems when running on attribute values
1137
 * coming from the input.
1138
 *
1139
 * Returns a pointer to the normalized value (dst) or NULL if no conversion
1140
 *         is needed.
1141
 */
1142
static const xmlChar *
1143
xmlAttrNormalizeSpace2(xmlParserCtxtPtr ctxt, xmlChar *src, int *len)
1144
2.48k
{
1145
2.48k
    int i;
1146
2.48k
    int remove_head = 0;
1147
2.48k
    int need_realloc = 0;
1148
2.48k
    const xmlChar *cur;
1149
1150
2.48k
    if ((ctxt == NULL) || (src == NULL) || (len == NULL))
1151
0
        return(NULL);
1152
2.48k
    i = *len;
1153
2.48k
    if (i <= 0)
1154
161
        return(NULL);
1155
1156
2.32k
    cur = src;
1157
2.98k
    while (*cur == 0x20) {
1158
660
        cur++;
1159
660
  remove_head++;
1160
660
    }
1161
2.13M
    while (*cur != 0) {
1162
2.12M
  if (*cur == 0x20) {
1163
3.40k
      cur++;
1164
3.40k
      if ((*cur == 0x20) || (*cur == 0)) {
1165
135
          need_realloc = 1;
1166
135
    break;
1167
135
      }
1168
3.40k
  } else
1169
2.12M
      cur++;
1170
2.12M
    }
1171
2.32k
    if (need_realloc) {
1172
135
        xmlChar *ret;
1173
1174
135
  ret = xmlStrndup(src + remove_head, i - remove_head + 1);
1175
135
  if (ret == NULL) {
1176
0
      xmlErrMemory(ctxt, NULL);
1177
0
      return(NULL);
1178
0
  }
1179
135
  xmlAttrNormalizeSpace(ret, ret);
1180
135
  *len = strlen((const char *)ret);
1181
135
        return(ret);
1182
2.18k
    } else if (remove_head) {
1183
648
        *len -= remove_head;
1184
648
        memmove(src, src + remove_head, 1 + *len);
1185
648
  return(src);
1186
648
    }
1187
1.54k
    return(NULL);
1188
2.32k
}
1189
1190
/**
1191
 * xmlAddDefAttrs:
1192
 * @ctxt:  an XML parser context
1193
 * @fullname:  the element fullname
1194
 * @fullattr:  the attribute fullname
1195
 * @value:  the attribute value
1196
 *
1197
 * Add a defaulted attribute for an element
1198
 */
1199
static void
1200
xmlAddDefAttrs(xmlParserCtxtPtr ctxt,
1201
               const xmlChar *fullname,
1202
               const xmlChar *fullattr,
1203
13.0k
               const xmlChar *value) {
1204
13.0k
    xmlDefAttrsPtr defaults;
1205
13.0k
    int len;
1206
13.0k
    const xmlChar *name;
1207
13.0k
    const xmlChar *prefix;
1208
1209
    /*
1210
     * Allows to detect attribute redefinitions
1211
     */
1212
13.0k
    if (ctxt->attsSpecial != NULL) {
1213
12.2k
        if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1214
2.20k
      return;
1215
12.2k
    }
1216
1217
10.8k
    if (ctxt->attsDefault == NULL) {
1218
814
        ctxt->attsDefault = xmlHashCreateDict(10, ctxt->dict);
1219
814
  if (ctxt->attsDefault == NULL)
1220
0
      goto mem_error;
1221
814
    }
1222
1223
    /*
1224
     * split the element name into prefix:localname , the string found
1225
     * are within the DTD and then not associated to namespace names.
1226
     */
1227
10.8k
    name = xmlSplitQName3(fullname, &len);
1228
10.8k
    if (name == NULL) {
1229
8.66k
        name = xmlDictLookup(ctxt->dict, fullname, -1);
1230
8.66k
  prefix = NULL;
1231
8.66k
    } else {
1232
2.20k
        name = xmlDictLookup(ctxt->dict, name, -1);
1233
2.20k
  prefix = xmlDictLookup(ctxt->dict, fullname, len);
1234
2.20k
    }
1235
1236
    /*
1237
     * make sure there is some storage
1238
     */
1239
10.8k
    defaults = xmlHashLookup2(ctxt->attsDefault, name, prefix);
1240
10.8k
    if (defaults == NULL) {
1241
1.78k
        defaults = (xmlDefAttrsPtr) xmlMalloc(sizeof(xmlDefAttrs) +
1242
1.78k
                     (4 * 5) * sizeof(const xmlChar *));
1243
1.78k
  if (defaults == NULL)
1244
0
      goto mem_error;
1245
1.78k
  defaults->nbAttrs = 0;
1246
1.78k
  defaults->maxAttrs = 4;
1247
1.78k
  if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix,
1248
1.78k
                          defaults, NULL) < 0) {
1249
0
      xmlFree(defaults);
1250
0
      goto mem_error;
1251
0
  }
1252
9.08k
    } else if (defaults->nbAttrs >= defaults->maxAttrs) {
1253
1.44k
        xmlDefAttrsPtr temp;
1254
1255
1.44k
        temp = (xmlDefAttrsPtr) xmlRealloc(defaults, sizeof(xmlDefAttrs) +
1256
1.44k
           (2 * defaults->maxAttrs * 5) * sizeof(const xmlChar *));
1257
1.44k
  if (temp == NULL)
1258
0
      goto mem_error;
1259
1.44k
  defaults = temp;
1260
1.44k
  defaults->maxAttrs *= 2;
1261
1.44k
  if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix,
1262
1.44k
                          defaults, NULL) < 0) {
1263
0
      xmlFree(defaults);
1264
0
      goto mem_error;
1265
0
  }
1266
1.44k
    }
1267
1268
    /*
1269
     * Split the element name into prefix:localname , the string found
1270
     * are within the DTD and hen not associated to namespace names.
1271
     */
1272
10.8k
    name = xmlSplitQName3(fullattr, &len);
1273
10.8k
    if (name == NULL) {
1274
8.57k
        name = xmlDictLookup(ctxt->dict, fullattr, -1);
1275
8.57k
  prefix = NULL;
1276
8.57k
    } else {
1277
2.29k
        name = xmlDictLookup(ctxt->dict, name, -1);
1278
2.29k
  prefix = xmlDictLookup(ctxt->dict, fullattr, len);
1279
2.29k
    }
1280
1281
10.8k
    defaults->values[5 * defaults->nbAttrs] = name;
1282
10.8k
    defaults->values[5 * defaults->nbAttrs + 1] = prefix;
1283
    /* intern the string and precompute the end */
1284
10.8k
    len = xmlStrlen(value);
1285
10.8k
    value = xmlDictLookup(ctxt->dict, value, len);
1286
10.8k
    if (value == NULL)
1287
0
        goto mem_error;
1288
10.8k
    defaults->values[5 * defaults->nbAttrs + 2] = value;
1289
10.8k
    defaults->values[5 * defaults->nbAttrs + 3] = value + len;
1290
10.8k
    if (ctxt->external)
1291
0
        defaults->values[5 * defaults->nbAttrs + 4] = BAD_CAST "external";
1292
10.8k
    else
1293
10.8k
        defaults->values[5 * defaults->nbAttrs + 4] = NULL;
1294
10.8k
    defaults->nbAttrs++;
1295
1296
10.8k
    return;
1297
1298
0
mem_error:
1299
0
    xmlErrMemory(ctxt, NULL);
1300
0
    return;
1301
10.8k
}
1302
1303
/**
1304
 * xmlAddSpecialAttr:
1305
 * @ctxt:  an XML parser context
1306
 * @fullname:  the element fullname
1307
 * @fullattr:  the attribute fullname
1308
 * @type:  the attribute type
1309
 *
1310
 * Register this attribute type
1311
 */
1312
static void
1313
xmlAddSpecialAttr(xmlParserCtxtPtr ctxt,
1314
      const xmlChar *fullname,
1315
      const xmlChar *fullattr,
1316
      int type)
1317
13.3k
{
1318
13.3k
    if (ctxt->attsSpecial == NULL) {
1319
931
        ctxt->attsSpecial = xmlHashCreateDict(10, ctxt->dict);
1320
931
  if (ctxt->attsSpecial == NULL)
1321
0
      goto mem_error;
1322
931
    }
1323
1324
13.3k
    if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1325
2.22k
        return;
1326
1327
11.0k
    xmlHashAddEntry2(ctxt->attsSpecial, fullname, fullattr,
1328
11.0k
                     (void *) (ptrdiff_t) type);
1329
11.0k
    return;
1330
1331
0
mem_error:
1332
0
    xmlErrMemory(ctxt, NULL);
1333
0
    return;
1334
13.3k
}
1335
1336
/**
1337
 * xmlCleanSpecialAttrCallback:
1338
 *
1339
 * Removes CDATA attributes from the special attribute table
1340
 */
1341
static void
1342
xmlCleanSpecialAttrCallback(void *payload, void *data,
1343
                            const xmlChar *fullname, const xmlChar *fullattr,
1344
2.68k
                            const xmlChar *unused ATTRIBUTE_UNUSED) {
1345
2.68k
    xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) data;
1346
1347
2.68k
    if (((ptrdiff_t) payload) == XML_ATTRIBUTE_CDATA) {
1348
477
        xmlHashRemoveEntry2(ctxt->attsSpecial, fullname, fullattr, NULL);
1349
477
    }
1350
2.68k
}
1351
1352
/**
1353
 * xmlCleanSpecialAttr:
1354
 * @ctxt:  an XML parser context
1355
 *
1356
 * Trim the list of attributes defined to remove all those of type
1357
 * CDATA as they are not special. This call should be done when finishing
1358
 * to parse the DTD and before starting to parse the document root.
1359
 */
1360
static void
1361
xmlCleanSpecialAttr(xmlParserCtxtPtr ctxt)
1362
778
{
1363
778
    if (ctxt->attsSpecial == NULL)
1364
139
        return;
1365
1366
639
    xmlHashScanFull(ctxt->attsSpecial, xmlCleanSpecialAttrCallback, ctxt);
1367
1368
639
    if (xmlHashSize(ctxt->attsSpecial) == 0) {
1369
24
        xmlHashFree(ctxt->attsSpecial, NULL);
1370
24
        ctxt->attsSpecial = NULL;
1371
24
    }
1372
639
    return;
1373
778
}
1374
1375
/**
1376
 * xmlCheckLanguageID:
1377
 * @lang:  pointer to the string value
1378
 *
1379
 * DEPRECATED: Internal function, do not use.
1380
 *
1381
 * Checks that the value conforms to the LanguageID production:
1382
 *
1383
 * NOTE: this is somewhat deprecated, those productions were removed from
1384
 *       the XML Second edition.
1385
 *
1386
 * [33] LanguageID ::= Langcode ('-' Subcode)*
1387
 * [34] Langcode ::= ISO639Code |  IanaCode |  UserCode
1388
 * [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z])
1389
 * [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+
1390
 * [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+
1391
 * [38] Subcode ::= ([a-z] | [A-Z])+
1392
 *
1393
 * The current REC reference the successors of RFC 1766, currently 5646
1394
 *
1395
 * http://www.rfc-editor.org/rfc/rfc5646.txt
1396
 * langtag       = language
1397
 *                 ["-" script]
1398
 *                 ["-" region]
1399
 *                 *("-" variant)
1400
 *                 *("-" extension)
1401
 *                 ["-" privateuse]
1402
 * language      = 2*3ALPHA            ; shortest ISO 639 code
1403
 *                 ["-" extlang]       ; sometimes followed by
1404
 *                                     ; extended language subtags
1405
 *               / 4ALPHA              ; or reserved for future use
1406
 *               / 5*8ALPHA            ; or registered language subtag
1407
 *
1408
 * extlang       = 3ALPHA              ; selected ISO 639 codes
1409
 *                 *2("-" 3ALPHA)      ; permanently reserved
1410
 *
1411
 * script        = 4ALPHA              ; ISO 15924 code
1412
 *
1413
 * region        = 2ALPHA              ; ISO 3166-1 code
1414
 *               / 3DIGIT              ; UN M.49 code
1415
 *
1416
 * variant       = 5*8alphanum         ; registered variants
1417
 *               / (DIGIT 3alphanum)
1418
 *
1419
 * extension     = singleton 1*("-" (2*8alphanum))
1420
 *
1421
 *                                     ; Single alphanumerics
1422
 *                                     ; "x" reserved for private use
1423
 * singleton     = DIGIT               ; 0 - 9
1424
 *               / %x41-57             ; A - W
1425
 *               / %x59-5A             ; Y - Z
1426
 *               / %x61-77             ; a - w
1427
 *               / %x79-7A             ; y - z
1428
 *
1429
 * it sounds right to still allow Irregular i-xxx IANA and user codes too
1430
 * The parser below doesn't try to cope with extension or privateuse
1431
 * that could be added but that's not interoperable anyway
1432
 *
1433
 * Returns 1 if correct 0 otherwise
1434
 **/
1435
int
1436
xmlCheckLanguageID(const xmlChar * lang)
1437
0
{
1438
0
    const xmlChar *cur = lang, *nxt;
1439
1440
0
    if (cur == NULL)
1441
0
        return (0);
1442
0
    if (((cur[0] == 'i') && (cur[1] == '-')) ||
1443
0
        ((cur[0] == 'I') && (cur[1] == '-')) ||
1444
0
        ((cur[0] == 'x') && (cur[1] == '-')) ||
1445
0
        ((cur[0] == 'X') && (cur[1] == '-'))) {
1446
        /*
1447
         * Still allow IANA code and user code which were coming
1448
         * from the previous version of the XML-1.0 specification
1449
         * it's deprecated but we should not fail
1450
         */
1451
0
        cur += 2;
1452
0
        while (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
1453
0
               ((cur[0] >= 'a') && (cur[0] <= 'z')))
1454
0
            cur++;
1455
0
        return(cur[0] == 0);
1456
0
    }
1457
0
    nxt = cur;
1458
0
    while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1459
0
           ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1460
0
           nxt++;
1461
0
    if (nxt - cur >= 4) {
1462
        /*
1463
         * Reserved
1464
         */
1465
0
        if ((nxt - cur > 8) || (nxt[0] != 0))
1466
0
            return(0);
1467
0
        return(1);
1468
0
    }
1469
0
    if (nxt - cur < 2)
1470
0
        return(0);
1471
    /* we got an ISO 639 code */
1472
0
    if (nxt[0] == 0)
1473
0
        return(1);
1474
0
    if (nxt[0] != '-')
1475
0
        return(0);
1476
1477
0
    nxt++;
1478
0
    cur = nxt;
1479
    /* now we can have extlang or script or region or variant */
1480
0
    if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1481
0
        goto region_m49;
1482
1483
0
    while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1484
0
           ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1485
0
           nxt++;
1486
0
    if (nxt - cur == 4)
1487
0
        goto script;
1488
0
    if (nxt - cur == 2)
1489
0
        goto region;
1490
0
    if ((nxt - cur >= 5) && (nxt - cur <= 8))
1491
0
        goto variant;
1492
0
    if (nxt - cur != 3)
1493
0
        return(0);
1494
    /* we parsed an extlang */
1495
0
    if (nxt[0] == 0)
1496
0
        return(1);
1497
0
    if (nxt[0] != '-')
1498
0
        return(0);
1499
1500
0
    nxt++;
1501
0
    cur = nxt;
1502
    /* now we can have script or region or variant */
1503
0
    if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1504
0
        goto region_m49;
1505
1506
0
    while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1507
0
           ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1508
0
           nxt++;
1509
0
    if (nxt - cur == 2)
1510
0
        goto region;
1511
0
    if ((nxt - cur >= 5) && (nxt - cur <= 8))
1512
0
        goto variant;
1513
0
    if (nxt - cur != 4)
1514
0
        return(0);
1515
    /* we parsed a script */
1516
0
script:
1517
0
    if (nxt[0] == 0)
1518
0
        return(1);
1519
0
    if (nxt[0] != '-')
1520
0
        return(0);
1521
1522
0
    nxt++;
1523
0
    cur = nxt;
1524
    /* now we can have region or variant */
1525
0
    if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1526
0
        goto region_m49;
1527
1528
0
    while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1529
0
           ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1530
0
           nxt++;
1531
1532
0
    if ((nxt - cur >= 5) && (nxt - cur <= 8))
1533
0
        goto variant;
1534
0
    if (nxt - cur != 2)
1535
0
        return(0);
1536
    /* we parsed a region */
1537
0
region:
1538
0
    if (nxt[0] == 0)
1539
0
        return(1);
1540
0
    if (nxt[0] != '-')
1541
0
        return(0);
1542
1543
0
    nxt++;
1544
0
    cur = nxt;
1545
    /* now we can just have a variant */
1546
0
    while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1547
0
           ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1548
0
           nxt++;
1549
1550
0
    if ((nxt - cur < 5) || (nxt - cur > 8))
1551
0
        return(0);
1552
1553
    /* we parsed a variant */
1554
0
variant:
1555
0
    if (nxt[0] == 0)
1556
0
        return(1);
1557
0
    if (nxt[0] != '-')
1558
0
        return(0);
1559
    /* extensions and private use subtags not checked */
1560
0
    return (1);
1561
1562
0
region_m49:
1563
0
    if (((nxt[1] >= '0') && (nxt[1] <= '9')) &&
1564
0
        ((nxt[2] >= '0') && (nxt[2] <= '9'))) {
1565
0
        nxt += 3;
1566
0
        goto region;
1567
0
    }
1568
0
    return(0);
1569
0
}
1570
1571
/************************************************************************
1572
 *                  *
1573
 *    Parser stacks related functions and macros    *
1574
 *                  *
1575
 ************************************************************************/
1576
1577
static xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,
1578
                                            const xmlChar ** str);
1579
1580
#ifdef SAX2
1581
/**
1582
 * nsPush:
1583
 * @ctxt:  an XML parser context
1584
 * @prefix:  the namespace prefix or NULL
1585
 * @URL:  the namespace name
1586
 *
1587
 * Pushes a new parser namespace on top of the ns stack
1588
 *
1589
 * Returns -1 in case of error, -2 if the namespace should be discarded
1590
 *     and the index in the stack otherwise.
1591
 */
1592
static int
1593
nsPush(xmlParserCtxtPtr ctxt, const xmlChar *prefix, const xmlChar *URL)
1594
20.3k
{
1595
20.3k
    if (ctxt->options & XML_PARSE_NSCLEAN) {
1596
20.3k
        int i;
1597
52.5k
  for (i = ctxt->nsNr - 2;i >= 0;i -= 2) {
1598
45.6k
      if (ctxt->nsTab[i] == prefix) {
1599
    /* in scope */
1600
13.3k
          if (ctxt->nsTab[i + 1] == URL)
1601
4.69k
        return(-2);
1602
    /* out of scope keep it */
1603
8.68k
    break;
1604
13.3k
      }
1605
45.6k
  }
1606
20.3k
    }
1607
15.6k
    if ((ctxt->nsMax == 0) || (ctxt->nsTab == NULL)) {
1608
2.69k
  ctxt->nsMax = 10;
1609
2.69k
  ctxt->nsNr = 0;
1610
2.69k
  ctxt->nsTab = (const xmlChar **)
1611
2.69k
                xmlMalloc(ctxt->nsMax * sizeof(xmlChar *));
1612
2.69k
  if (ctxt->nsTab == NULL) {
1613
0
      xmlErrMemory(ctxt, NULL);
1614
0
      ctxt->nsMax = 0;
1615
0
            return (-1);
1616
0
  }
1617
12.9k
    } else if (ctxt->nsNr >= ctxt->nsMax) {
1618
238
        const xmlChar ** tmp;
1619
238
        ctxt->nsMax *= 2;
1620
238
        tmp = (const xmlChar **) xmlRealloc((char *) ctxt->nsTab,
1621
238
            ctxt->nsMax * sizeof(ctxt->nsTab[0]));
1622
238
        if (tmp == NULL) {
1623
0
            xmlErrMemory(ctxt, NULL);
1624
0
      ctxt->nsMax /= 2;
1625
0
            return (-1);
1626
0
        }
1627
238
  ctxt->nsTab = tmp;
1628
238
    }
1629
15.6k
    ctxt->nsTab[ctxt->nsNr++] = prefix;
1630
15.6k
    ctxt->nsTab[ctxt->nsNr++] = URL;
1631
15.6k
    return (ctxt->nsNr);
1632
15.6k
}
1633
/**
1634
 * nsPop:
1635
 * @ctxt: an XML parser context
1636
 * @nr:  the number to pop
1637
 *
1638
 * Pops the top @nr parser prefix/namespace from the ns stack
1639
 *
1640
 * Returns the number of namespaces removed
1641
 */
1642
static int
1643
nsPop(xmlParserCtxtPtr ctxt, int nr)
1644
1.65k
{
1645
1.65k
    int i;
1646
1647
1.65k
    if (ctxt->nsTab == NULL) return(0);
1648
1.65k
    if (ctxt->nsNr < nr) {
1649
0
        xmlGenericError(xmlGenericErrorContext, "Pbm popping %d NS\n", nr);
1650
0
        nr = ctxt->nsNr;
1651
0
    }
1652
1.65k
    if (ctxt->nsNr <= 0)
1653
0
        return (0);
1654
1655
9.52k
    for (i = 0;i < nr;i++) {
1656
7.87k
         ctxt->nsNr--;
1657
7.87k
   ctxt->nsTab[ctxt->nsNr] = NULL;
1658
7.87k
    }
1659
1.65k
    return(nr);
1660
1.65k
}
1661
#endif
1662
1663
static int
1664
3.12k
xmlCtxtGrowAttrs(xmlParserCtxtPtr ctxt, int nr) {
1665
3.12k
    const xmlChar **atts;
1666
3.12k
    int *attallocs;
1667
3.12k
    int maxatts;
1668
1669
3.12k
    if (nr + 5 > ctxt->maxatts) {
1670
3.12k
  maxatts = ctxt->maxatts == 0 ? 55 : (nr + 5) * 2;
1671
3.12k
  atts = (const xmlChar **) xmlMalloc(
1672
3.12k
             maxatts * sizeof(const xmlChar *));
1673
3.12k
  if (atts == NULL) goto mem_error;
1674
3.12k
  attallocs = (int *) xmlRealloc((void *) ctxt->attallocs,
1675
3.12k
                               (maxatts / 5) * sizeof(int));
1676
3.12k
  if (attallocs == NULL) {
1677
0
            xmlFree(atts);
1678
0
            goto mem_error;
1679
0
        }
1680
3.12k
        if (ctxt->maxatts > 0)
1681
291
            memcpy(atts, ctxt->atts, ctxt->maxatts * sizeof(const xmlChar *));
1682
3.12k
        xmlFree(ctxt->atts);
1683
3.12k
  ctxt->atts = atts;
1684
3.12k
  ctxt->attallocs = attallocs;
1685
3.12k
  ctxt->maxatts = maxatts;
1686
3.12k
    }
1687
3.12k
    return(ctxt->maxatts);
1688
0
mem_error:
1689
0
    xmlErrMemory(ctxt, NULL);
1690
0
    return(-1);
1691
3.12k
}
1692
1693
/**
1694
 * inputPush:
1695
 * @ctxt:  an XML parser context
1696
 * @value:  the parser input
1697
 *
1698
 * Pushes a new parser input on top of the input stack
1699
 *
1700
 * Returns -1 in case of error, the index in the stack otherwise
1701
 */
1702
int
1703
inputPush(xmlParserCtxtPtr ctxt, xmlParserInputPtr value)
1704
12.4k
{
1705
12.4k
    if ((ctxt == NULL) || (value == NULL))
1706
0
        return(-1);
1707
12.4k
    if (ctxt->inputNr >= ctxt->inputMax) {
1708
0
        size_t newSize = ctxt->inputMax * 2;
1709
0
        xmlParserInputPtr *tmp;
1710
1711
0
        tmp = (xmlParserInputPtr *) xmlRealloc(ctxt->inputTab,
1712
0
                                               newSize * sizeof(*tmp));
1713
0
        if (tmp == NULL) {
1714
0
            xmlErrMemory(ctxt, NULL);
1715
0
            return (-1);
1716
0
        }
1717
0
        ctxt->inputTab = tmp;
1718
0
        ctxt->inputMax = newSize;
1719
0
    }
1720
12.4k
    ctxt->inputTab[ctxt->inputNr] = value;
1721
12.4k
    ctxt->input = value;
1722
12.4k
    return (ctxt->inputNr++);
1723
12.4k
}
1724
/**
1725
 * inputPop:
1726
 * @ctxt: an XML parser context
1727
 *
1728
 * Pops the top parser input from the input stack
1729
 *
1730
 * Returns the input just removed
1731
 */
1732
xmlParserInputPtr
1733
inputPop(xmlParserCtxtPtr ctxt)
1734
37.3k
{
1735
37.3k
    xmlParserInputPtr ret;
1736
1737
37.3k
    if (ctxt == NULL)
1738
0
        return(NULL);
1739
37.3k
    if (ctxt->inputNr <= 0)
1740
24.9k
        return (NULL);
1741
12.4k
    ctxt->inputNr--;
1742
12.4k
    if (ctxt->inputNr > 0)
1743
0
        ctxt->input = ctxt->inputTab[ctxt->inputNr - 1];
1744
12.4k
    else
1745
12.4k
        ctxt->input = NULL;
1746
12.4k
    ret = ctxt->inputTab[ctxt->inputNr];
1747
12.4k
    ctxt->inputTab[ctxt->inputNr] = NULL;
1748
12.4k
    return (ret);
1749
37.3k
}
1750
/**
1751
 * nodePush:
1752
 * @ctxt:  an XML parser context
1753
 * @value:  the element node
1754
 *
1755
 * DEPRECATED: Internal function, do not use.
1756
 *
1757
 * Pushes a new element node on top of the node stack
1758
 *
1759
 * Returns -1 in case of error, the index in the stack otherwise
1760
 */
1761
int
1762
nodePush(xmlParserCtxtPtr ctxt, xmlNodePtr value)
1763
0
{
1764
0
    if (ctxt == NULL) return(0);
1765
0
    if (ctxt->nodeNr >= ctxt->nodeMax) {
1766
0
        xmlNodePtr *tmp;
1767
1768
0
  tmp = (xmlNodePtr *) xmlRealloc(ctxt->nodeTab,
1769
0
                                      ctxt->nodeMax * 2 *
1770
0
                                      sizeof(ctxt->nodeTab[0]));
1771
0
        if (tmp == NULL) {
1772
0
            xmlErrMemory(ctxt, NULL);
1773
0
            return (-1);
1774
0
        }
1775
0
        ctxt->nodeTab = tmp;
1776
0
  ctxt->nodeMax *= 2;
1777
0
    }
1778
0
    if ((((unsigned int) ctxt->nodeNr) > xmlParserMaxDepth) &&
1779
0
        ((ctxt->options & XML_PARSE_HUGE) == 0)) {
1780
0
  xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
1781
0
     "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
1782
0
        xmlParserMaxDepth);
1783
0
  xmlHaltParser(ctxt);
1784
0
  return(-1);
1785
0
    }
1786
0
    ctxt->nodeTab[ctxt->nodeNr] = value;
1787
0
    ctxt->node = value;
1788
0
    return (ctxt->nodeNr++);
1789
0
}
1790
1791
/**
1792
 * nodePop:
1793
 * @ctxt: an XML parser context
1794
 *
1795
 * DEPRECATED: Internal function, do not use.
1796
 *
1797
 * Pops the top element node from the node stack
1798
 *
1799
 * Returns the node just removed
1800
 */
1801
xmlNodePtr
1802
nodePop(xmlParserCtxtPtr ctxt)
1803
4.95k
{
1804
4.95k
    xmlNodePtr ret;
1805
1806
4.95k
    if (ctxt == NULL) return(NULL);
1807
4.95k
    if (ctxt->nodeNr <= 0)
1808
4.95k
        return (NULL);
1809
0
    ctxt->nodeNr--;
1810
0
    if (ctxt->nodeNr > 0)
1811
0
        ctxt->node = ctxt->nodeTab[ctxt->nodeNr - 1];
1812
0
    else
1813
0
        ctxt->node = NULL;
1814
0
    ret = ctxt->nodeTab[ctxt->nodeNr];
1815
0
    ctxt->nodeTab[ctxt->nodeNr] = NULL;
1816
0
    return (ret);
1817
4.95k
}
1818
1819
/**
1820
 * nameNsPush:
1821
 * @ctxt:  an XML parser context
1822
 * @value:  the element name
1823
 * @prefix:  the element prefix
1824
 * @URI:  the element namespace name
1825
 * @line:  the current line number for error messages
1826
 * @nsNr:  the number of namespaces pushed on the namespace table
1827
 *
1828
 * Pushes a new element name/prefix/URL on top of the name stack
1829
 *
1830
 * Returns -1 in case of error, the index in the stack otherwise
1831
 */
1832
static int
1833
nameNsPush(xmlParserCtxtPtr ctxt, const xmlChar * value,
1834
           const xmlChar *prefix, const xmlChar *URI, int line, int nsNr)
1835
1.77M
{
1836
1.77M
    xmlStartTag *tag;
1837
1838
1.77M
    if (ctxt->nameNr >= ctxt->nameMax) {
1839
1.35k
        const xmlChar * *tmp;
1840
1.35k
        xmlStartTag *tmp2;
1841
1.35k
        ctxt->nameMax *= 2;
1842
1.35k
        tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
1843
1.35k
                                    ctxt->nameMax *
1844
1.35k
                                    sizeof(ctxt->nameTab[0]));
1845
1.35k
        if (tmp == NULL) {
1846
0
      ctxt->nameMax /= 2;
1847
0
      goto mem_error;
1848
0
        }
1849
1.35k
  ctxt->nameTab = tmp;
1850
1.35k
        tmp2 = (xmlStartTag *) xmlRealloc((void * *)ctxt->pushTab,
1851
1.35k
                                    ctxt->nameMax *
1852
1.35k
                                    sizeof(ctxt->pushTab[0]));
1853
1.35k
        if (tmp2 == NULL) {
1854
0
      ctxt->nameMax /= 2;
1855
0
      goto mem_error;
1856
0
        }
1857
1.35k
  ctxt->pushTab = tmp2;
1858
1.77M
    } else if (ctxt->pushTab == NULL) {
1859
7.45k
        ctxt->pushTab = (xmlStartTag *) xmlMalloc(ctxt->nameMax *
1860
7.45k
                                            sizeof(ctxt->pushTab[0]));
1861
7.45k
        if (ctxt->pushTab == NULL)
1862
0
            goto mem_error;
1863
7.45k
    }
1864
1.77M
    ctxt->nameTab[ctxt->nameNr] = value;
1865
1.77M
    ctxt->name = value;
1866
1.77M
    tag = &ctxt->pushTab[ctxt->nameNr];
1867
1.77M
    tag->prefix = prefix;
1868
1.77M
    tag->URI = URI;
1869
1.77M
    tag->line = line;
1870
1.77M
    tag->nsNr = nsNr;
1871
1.77M
    return (ctxt->nameNr++);
1872
0
mem_error:
1873
0
    xmlErrMemory(ctxt, NULL);
1874
0
    return (-1);
1875
1.77M
}
1876
#ifdef LIBXML_PUSH_ENABLED
1877
/**
1878
 * nameNsPop:
1879
 * @ctxt: an XML parser context
1880
 *
1881
 * Pops the top element/prefix/URI name from the name stack
1882
 *
1883
 * Returns the name just removed
1884
 */
1885
static const xmlChar *
1886
nameNsPop(xmlParserCtxtPtr ctxt)
1887
74.9k
{
1888
74.9k
    const xmlChar *ret;
1889
1890
74.9k
    if (ctxt->nameNr <= 0)
1891
0
        return (NULL);
1892
74.9k
    ctxt->nameNr--;
1893
74.9k
    if (ctxt->nameNr > 0)
1894
74.4k
        ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1895
514
    else
1896
514
        ctxt->name = NULL;
1897
74.9k
    ret = ctxt->nameTab[ctxt->nameNr];
1898
74.9k
    ctxt->nameTab[ctxt->nameNr] = NULL;
1899
74.9k
    return (ret);
1900
74.9k
}
1901
#endif /* LIBXML_PUSH_ENABLED */
1902
1903
/**
1904
 * namePush:
1905
 * @ctxt:  an XML parser context
1906
 * @value:  the element name
1907
 *
1908
 * DEPRECATED: Internal function, do not use.
1909
 *
1910
 * Pushes a new element name on top of the name stack
1911
 *
1912
 * Returns -1 in case of error, the index in the stack otherwise
1913
 */
1914
int
1915
namePush(xmlParserCtxtPtr ctxt, const xmlChar * value)
1916
0
{
1917
0
    if (ctxt == NULL) return (-1);
1918
1919
0
    if (ctxt->nameNr >= ctxt->nameMax) {
1920
0
        const xmlChar * *tmp;
1921
0
        tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
1922
0
                                    ctxt->nameMax * 2 *
1923
0
                                    sizeof(ctxt->nameTab[0]));
1924
0
        if (tmp == NULL) {
1925
0
      goto mem_error;
1926
0
        }
1927
0
  ctxt->nameTab = tmp;
1928
0
        ctxt->nameMax *= 2;
1929
0
    }
1930
0
    ctxt->nameTab[ctxt->nameNr] = value;
1931
0
    ctxt->name = value;
1932
0
    return (ctxt->nameNr++);
1933
0
mem_error:
1934
0
    xmlErrMemory(ctxt, NULL);
1935
0
    return (-1);
1936
0
}
1937
1938
/**
1939
 * namePop:
1940
 * @ctxt: an XML parser context
1941
 *
1942
 * DEPRECATED: Internal function, do not use.
1943
 *
1944
 * Pops the top element name from the name stack
1945
 *
1946
 * Returns the name just removed
1947
 */
1948
const xmlChar *
1949
namePop(xmlParserCtxtPtr ctxt)
1950
0
{
1951
0
    const xmlChar *ret;
1952
1953
0
    if ((ctxt == NULL) || (ctxt->nameNr <= 0))
1954
0
        return (NULL);
1955
0
    ctxt->nameNr--;
1956
0
    if (ctxt->nameNr > 0)
1957
0
        ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1958
0
    else
1959
0
        ctxt->name = NULL;
1960
0
    ret = ctxt->nameTab[ctxt->nameNr];
1961
0
    ctxt->nameTab[ctxt->nameNr] = NULL;
1962
0
    return (ret);
1963
0
}
1964
1965
1.83M
static int spacePush(xmlParserCtxtPtr ctxt, int val) {
1966
1.83M
    if (ctxt->spaceNr >= ctxt->spaceMax) {
1967
1.39k
        int *tmp;
1968
1969
1.39k
  ctxt->spaceMax *= 2;
1970
1.39k
        tmp = (int *) xmlRealloc(ctxt->spaceTab,
1971
1.39k
                           ctxt->spaceMax * sizeof(ctxt->spaceTab[0]));
1972
1.39k
        if (tmp == NULL) {
1973
0
      xmlErrMemory(ctxt, NULL);
1974
0
      ctxt->spaceMax /=2;
1975
0
      return(-1);
1976
0
  }
1977
1.39k
  ctxt->spaceTab = tmp;
1978
1.39k
    }
1979
1.83M
    ctxt->spaceTab[ctxt->spaceNr] = val;
1980
1.83M
    ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
1981
1.83M
    return(ctxt->spaceNr++);
1982
1.83M
}
1983
1984
135k
static int spacePop(xmlParserCtxtPtr ctxt) {
1985
135k
    int ret;
1986
135k
    if (ctxt->spaceNr <= 0) return(0);
1987
135k
    ctxt->spaceNr--;
1988
135k
    if (ctxt->spaceNr > 0)
1989
135k
  ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
1990
0
    else
1991
0
        ctxt->space = &ctxt->spaceTab[0];
1992
135k
    ret = ctxt->spaceTab[ctxt->spaceNr];
1993
135k
    ctxt->spaceTab[ctxt->spaceNr] = -1;
1994
135k
    return(ret);
1995
135k
}
1996
1997
/*
1998
 * Macros for accessing the content. Those should be used only by the parser,
1999
 * and not exported.
2000
 *
2001
 * Dirty macros, i.e. one often need to make assumption on the context to
2002
 * use them
2003
 *
2004
 *   CUR_PTR return the current pointer to the xmlChar to be parsed.
2005
 *           To be used with extreme caution since operations consuming
2006
 *           characters may move the input buffer to a different location !
2007
 *   CUR     returns the current xmlChar value, i.e. a 8 bit value if compiled
2008
 *           This should be used internally by the parser
2009
 *           only to compare to ASCII values otherwise it would break when
2010
 *           running with UTF-8 encoding.
2011
 *   RAW     same as CUR but in the input buffer, bypass any token
2012
 *           extraction that may have been done
2013
 *   NXT(n)  returns the n'th next xmlChar. Same as CUR is should be used only
2014
 *           to compare on ASCII based substring.
2015
 *   SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
2016
 *           strings without newlines within the parser.
2017
 *   NEXT1(l) Skip 1 xmlChar, and must also be used only to skip 1 non-newline ASCII
2018
 *           defined char within the parser.
2019
 * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
2020
 *
2021
 *   NEXT    Skip to the next character, this does the proper decoding
2022
 *           in UTF-8 mode. It also pop-up unfinished entities on the fly.
2023
 *   NEXTL(l) Skip the current unicode character of l xmlChars long.
2024
 *   CUR_CHAR(l) returns the current unicode character (int), set l
2025
 *           to the number of xmlChars used for the encoding [0-5].
2026
 *   CUR_SCHAR  same but operate on a string instead of the context
2027
 *   COPY_BUF  copy the current unicode char to the target buffer, increment
2028
 *            the index
2029
 *   GROW, SHRINK  handling of input buffers
2030
 */
2031
2032
10.6M
#define RAW (*ctxt->input->cur)
2033
6.45M
#define CUR (*ctxt->input->cur)
2034
38.1M
#define NXT(val) ctxt->input->cur[(val)]
2035
1.46M
#define CUR_PTR ctxt->input->cur
2036
23.1k
#define BASE_PTR ctxt->input->base
2037
2038
#define CMP4( s, c1, c2, c3, c4 ) \
2039
291k
  ( ((unsigned char *) s)[ 0 ] == c1 && ((unsigned char *) s)[ 1 ] == c2 && \
2040
145k
    ((unsigned char *) s)[ 2 ] == c3 && ((unsigned char *) s)[ 3 ] == c4 )
2041
#define CMP5( s, c1, c2, c3, c4, c5 ) \
2042
258k
  ( CMP4( s, c1, c2, c3, c4 ) && ((unsigned char *) s)[ 4 ] == c5 )
2043
#define CMP6( s, c1, c2, c3, c4, c5, c6 ) \
2044
168k
  ( CMP5( s, c1, c2, c3, c4, c5 ) && ((unsigned char *) s)[ 5 ] == c6 )
2045
#define CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) \
2046
94.8k
  ( CMP6( s, c1, c2, c3, c4, c5, c6 ) && ((unsigned char *) s)[ 6 ] == c7 )
2047
#define CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) \
2048
54.2k
  ( CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) && ((unsigned char *) s)[ 7 ] == c8 )
2049
#define CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) \
2050
15.5k
  ( CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) && \
2051
15.5k
    ((unsigned char *) s)[ 8 ] == c9 )
2052
#define CMP10( s, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10 ) \
2053
865
  ( CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) && \
2054
865
    ((unsigned char *) s)[ 9 ] == c10 )
2055
2056
1.56M
#define SKIP(val) do {             \
2057
1.56M
    ctxt->input->cur += (val),ctxt->input->col+=(val);      \
2058
1.56M
    if (*ctxt->input->cur == 0)           \
2059
1.56M
        xmlParserGrow(ctxt);           \
2060
1.56M
  } while (0)
2061
2062
790k
#define SKIPL(val) do {             \
2063
790k
    int skipl;                \
2064
8.52M
    for(skipl=0; skipl<val; skipl++) {         \
2065
7.73M
  if (*(ctxt->input->cur) == '\n') {       \
2066
35.0k
  ctxt->input->line++; ctxt->input->col = 1;      \
2067
7.70M
  } else ctxt->input->col++;         \
2068
7.73M
  ctxt->input->cur++;           \
2069
7.73M
    }                  \
2070
790k
    if (*ctxt->input->cur == 0)           \
2071
790k
        xmlParserGrow(ctxt);           \
2072
790k
  } while (0)
2073
2074
198k
#define SHRINK if ((ctxt->progressive == 0) &&       \
2075
198k
       (ctxt->input->cur - ctxt->input->base > 2 * INPUT_CHUNK) && \
2076
198k
       (ctxt->input->end - ctxt->input->cur < 2 * INPUT_CHUNK)) \
2077
198k
  xmlParserShrink(ctxt);
2078
2079
69.3M
#define GROW if ((ctxt->progressive == 0) &&       \
2080
69.3M
     (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK)) \
2081
69.3M
  xmlParserGrow(ctxt);
2082
2083
3.25M
#define SKIP_BLANKS xmlSkipBlankChars(ctxt)
2084
2085
8.85M
#define NEXT xmlNextChar(ctxt)
2086
2087
1.90M
#define NEXT1 {               \
2088
1.90M
  ctxt->input->col++;           \
2089
1.90M
  ctxt->input->cur++;           \
2090
1.90M
  if (*ctxt->input->cur == 0)         \
2091
1.90M
      xmlParserGrow(ctxt);           \
2092
1.90M
    }
2093
2094
269M
#define NEXTL(l) do {             \
2095
269M
    if (*(ctxt->input->cur) == '\n') {         \
2096
262k
  ctxt->input->line++; ctxt->input->col = 1;      \
2097
269M
    } else ctxt->input->col++;           \
2098
269M
    ctxt->input->cur += l;        \
2099
269M
  } while (0)
2100
2101
270M
#define CUR_CHAR(l) xmlCurrentChar(ctxt, &l)
2102
24.3M
#define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
2103
2104
#define COPY_BUF(l,b,i,v)           \
2105
151M
    if (l == 1) b[i++] = v;           \
2106
151M
    else i += xmlCopyCharMultiByte(&b[i],v)
2107
2108
/**
2109
 * xmlSkipBlankChars:
2110
 * @ctxt:  the XML parser context
2111
 *
2112
 * DEPRECATED: Internal function, do not use.
2113
 *
2114
 * skip all blanks character found at that point in the input streams.
2115
 * It pops up finished entities in the process if allowable at that point.
2116
 *
2117
 * Returns the number of space chars skipped
2118
 */
2119
2120
int
2121
3.25M
xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
2122
3.25M
    int res = 0;
2123
2124
    /*
2125
     * It's Okay to use CUR/NEXT here since all the blanks are on
2126
     * the ASCII range.
2127
     */
2128
3.25M
    if (((ctxt->inputNr == 1) && (ctxt->instate != XML_PARSER_DTD)) ||
2129
3.25M
        (ctxt->instate == XML_PARSER_START)) {
2130
2.98M
  const xmlChar *cur;
2131
  /*
2132
   * if we are in the document content, go really fast
2133
   */
2134
2.98M
  cur = ctxt->input->cur;
2135
2.98M
  while (IS_BLANK_CH(*cur)) {
2136
1.47M
      if (*cur == '\n') {
2137
63.7k
    ctxt->input->line++; ctxt->input->col = 1;
2138
1.41M
      } else {
2139
1.41M
    ctxt->input->col++;
2140
1.41M
      }
2141
1.47M
      cur++;
2142
1.47M
      if (res < INT_MAX)
2143
1.47M
    res++;
2144
1.47M
      if (*cur == 0) {
2145
775
    ctxt->input->cur = cur;
2146
775
    xmlParserGrow(ctxt);
2147
775
    cur = ctxt->input->cur;
2148
775
      }
2149
1.47M
  }
2150
2.98M
  ctxt->input->cur = cur;
2151
2.98M
    } else {
2152
273k
        int expandPE = ((ctxt->external != 0) || (ctxt->inputNr != 1));
2153
2154
2.86M
  while (ctxt->instate != XML_PARSER_EOF) {
2155
2.86M
            if (IS_BLANK_CH(CUR)) { /* CHECKED tstblanks.xml */
2156
2.59M
    NEXT;
2157
2.59M
      } else if (CUR == '%') {
2158
                /*
2159
                 * Need to handle support of entities branching here
2160
                 */
2161
15.6k
          if ((expandPE == 0) || (IS_BLANK_CH(NXT(1))) || (NXT(1) == 0))
2162
15.6k
                    break;
2163
0
          xmlParsePEReference(ctxt);
2164
257k
            } else if (CUR == 0) {
2165
1.65k
                unsigned long consumed;
2166
1.65k
                xmlEntityPtr ent;
2167
2168
1.65k
                if (ctxt->inputNr <= 1)
2169
1.65k
                    break;
2170
2171
0
                consumed = ctxt->input->consumed;
2172
0
                xmlSaturatedAddSizeT(&consumed,
2173
0
                                     ctxt->input->cur - ctxt->input->base);
2174
2175
                /*
2176
                 * Add to sizeentities when parsing an external entity
2177
                 * for the first time.
2178
                 */
2179
0
                ent = ctxt->input->entity;
2180
0
                if ((ent->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
2181
0
                    ((ent->flags & XML_ENT_PARSED) == 0)) {
2182
0
                    ent->flags |= XML_ENT_PARSED;
2183
2184
0
                    xmlSaturatedAdd(&ctxt->sizeentities, consumed);
2185
0
                }
2186
2187
0
                xmlParserEntityCheck(ctxt, consumed);
2188
2189
0
                xmlPopInput(ctxt);
2190
255k
            } else {
2191
255k
                break;
2192
255k
            }
2193
2194
            /*
2195
             * Also increase the counter when entering or exiting a PERef.
2196
             * The spec says: "When a parameter-entity reference is recognized
2197
             * in the DTD and included, its replacement text MUST be enlarged
2198
             * by the attachment of one leading and one following space (#x20)
2199
             * character."
2200
             */
2201
2.59M
      if (res < INT_MAX)
2202
2.59M
    res++;
2203
2.59M
        }
2204
273k
    }
2205
3.25M
    return(res);
2206
3.25M
}
2207
2208
/************************************************************************
2209
 *                  *
2210
 *    Commodity functions to handle entities      *
2211
 *                  *
2212
 ************************************************************************/
2213
2214
/**
2215
 * xmlPopInput:
2216
 * @ctxt:  an XML parser context
2217
 *
2218
 * xmlPopInput: the current input pointed by ctxt->input came to an end
2219
 *          pop it and return the next char.
2220
 *
2221
 * Returns the current xmlChar in the parser context
2222
 */
2223
xmlChar
2224
0
xmlPopInput(xmlParserCtxtPtr ctxt) {
2225
0
    xmlParserInputPtr input;
2226
2227
0
    if ((ctxt == NULL) || (ctxt->inputNr <= 1)) return(0);
2228
0
    if (xmlParserDebugEntities)
2229
0
  xmlGenericError(xmlGenericErrorContext,
2230
0
    "Popping input %d\n", ctxt->inputNr);
2231
0
    if ((ctxt->inputNr > 1) && (ctxt->inSubset == 0) &&
2232
0
        (ctxt->instate != XML_PARSER_EOF))
2233
0
        xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
2234
0
                    "Unfinished entity outside the DTD");
2235
0
    input = inputPop(ctxt);
2236
0
    if (input->entity != NULL)
2237
0
        input->entity->flags &= ~XML_ENT_EXPANDING;
2238
0
    xmlFreeInputStream(input);
2239
0
    if (*ctxt->input->cur == 0)
2240
0
        xmlParserGrow(ctxt);
2241
0
    return(CUR);
2242
0
}
2243
2244
/**
2245
 * xmlPushInput:
2246
 * @ctxt:  an XML parser context
2247
 * @input:  an XML parser input fragment (entity, XML fragment ...).
2248
 *
2249
 * xmlPushInput: switch to a new input stream which is stacked on top
2250
 *               of the previous one(s).
2251
 * Returns -1 in case of error or the index in the input stack
2252
 */
2253
int
2254
0
xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
2255
0
    int ret;
2256
0
    if (input == NULL) return(-1);
2257
2258
0
    if (xmlParserDebugEntities) {
2259
0
  if ((ctxt->input != NULL) && (ctxt->input->filename))
2260
0
      xmlGenericError(xmlGenericErrorContext,
2261
0
        "%s(%d): ", ctxt->input->filename,
2262
0
        ctxt->input->line);
2263
0
  xmlGenericError(xmlGenericErrorContext,
2264
0
    "Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur);
2265
0
    }
2266
0
    if (((ctxt->inputNr > 40) && ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
2267
0
        (ctxt->inputNr > 100)) {
2268
0
        xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
2269
0
        while (ctxt->inputNr > 1)
2270
0
            xmlFreeInputStream(inputPop(ctxt));
2271
0
  return(-1);
2272
0
    }
2273
0
    ret = inputPush(ctxt, input);
2274
0
    if (ctxt->instate == XML_PARSER_EOF)
2275
0
        return(-1);
2276
0
    GROW;
2277
0
    return(ret);
2278
0
}
2279
2280
/**
2281
 * xmlParseCharRef:
2282
 * @ctxt:  an XML parser context
2283
 *
2284
 * DEPRECATED: Internal function, don't use.
2285
 *
2286
 * Parse a numeric character reference. Always consumes '&'.
2287
 *
2288
 * [66] CharRef ::= '&#' [0-9]+ ';' |
2289
 *                  '&#x' [0-9a-fA-F]+ ';'
2290
 *
2291
 * [ WFC: Legal Character ]
2292
 * Characters referred to using character references must match the
2293
 * production for Char.
2294
 *
2295
 * Returns the value parsed (as an int), 0 in case of error
2296
 */
2297
int
2298
33.4k
xmlParseCharRef(xmlParserCtxtPtr ctxt) {
2299
33.4k
    int val = 0;
2300
33.4k
    int count = 0;
2301
2302
    /*
2303
     * Using RAW/CUR/NEXT is okay since we are working on ASCII range here
2304
     */
2305
33.4k
    if ((RAW == '&') && (NXT(1) == '#') &&
2306
33.4k
        (NXT(2) == 'x')) {
2307
23.4k
  SKIP(3);
2308
23.4k
  GROW;
2309
107k
  while (RAW != ';') { /* loop blocked by count */
2310
88.8k
      if (count++ > 20) {
2311
1.28k
    count = 0;
2312
1.28k
    GROW;
2313
1.28k
                if (ctxt->instate == XML_PARSER_EOF)
2314
0
                    return(0);
2315
1.28k
      }
2316
88.8k
      if ((RAW >= '0') && (RAW <= '9'))
2317
49.7k
          val = val * 16 + (CUR - '0');
2318
39.0k
      else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20))
2319
20.2k
          val = val * 16 + (CUR - 'a') + 10;
2320
18.8k
      else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20))
2321
14.1k
          val = val * 16 + (CUR - 'A') + 10;
2322
4.67k
      else {
2323
4.67k
    xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
2324
4.67k
    val = 0;
2325
4.67k
    break;
2326
4.67k
      }
2327
84.1k
      if (val > 0x110000)
2328
16.0k
          val = 0x110000;
2329
2330
84.1k
      NEXT;
2331
84.1k
      count++;
2332
84.1k
  }
2333
23.4k
  if (RAW == ';') {
2334
      /* on purpose to avoid reentrancy problems with NEXT and SKIP */
2335
18.7k
      ctxt->input->col++;
2336
18.7k
      ctxt->input->cur++;
2337
18.7k
  }
2338
23.4k
    } else if  ((RAW == '&') && (NXT(1) == '#')) {
2339
9.97k
  SKIP(2);
2340
9.97k
  GROW;
2341
23.0k
  while (RAW != ';') { /* loop blocked by count */
2342
19.4k
      if (count++ > 20) {
2343
375
    count = 0;
2344
375
    GROW;
2345
375
                if (ctxt->instate == XML_PARSER_EOF)
2346
0
                    return(0);
2347
375
      }
2348
19.4k
      if ((RAW >= '0') && (RAW <= '9'))
2349
13.0k
          val = val * 10 + (CUR - '0');
2350
6.42k
      else {
2351
6.42k
    xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
2352
6.42k
    val = 0;
2353
6.42k
    break;
2354
6.42k
      }
2355
13.0k
      if (val > 0x110000)
2356
2.56k
          val = 0x110000;
2357
2358
13.0k
      NEXT;
2359
13.0k
      count++;
2360
13.0k
  }
2361
9.97k
  if (RAW == ';') {
2362
      /* on purpose to avoid reentrancy problems with NEXT and SKIP */
2363
3.55k
      ctxt->input->col++;
2364
3.55k
      ctxt->input->cur++;
2365
3.55k
  }
2366
9.97k
    } else {
2367
0
        if (RAW == '&')
2368
0
            SKIP(1);
2369
0
        xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
2370
0
    }
2371
2372
    /*
2373
     * [ WFC: Legal Character ]
2374
     * Characters referred to using character references must match the
2375
     * production for Char.
2376
     */
2377
33.4k
    if (val >= 0x110000) {
2378
674
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2379
674
                "xmlParseCharRef: character reference out of bounds\n",
2380
674
          val);
2381
32.7k
    } else if (IS_CHAR(val)) {
2382
19.5k
        return(val);
2383
19.5k
    } else {
2384
13.1k
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2385
13.1k
                          "xmlParseCharRef: invalid xmlChar value %d\n",
2386
13.1k
                    val);
2387
13.1k
    }
2388
13.8k
    return(0);
2389
33.4k
}
2390
2391
/**
2392
 * xmlParseStringCharRef:
2393
 * @ctxt:  an XML parser context
2394
 * @str:  a pointer to an index in the string
2395
 *
2396
 * parse Reference declarations, variant parsing from a string rather
2397
 * than an an input flow.
2398
 *
2399
 * [66] CharRef ::= '&#' [0-9]+ ';' |
2400
 *                  '&#x' [0-9a-fA-F]+ ';'
2401
 *
2402
 * [ WFC: Legal Character ]
2403
 * Characters referred to using character references must match the
2404
 * production for Char.
2405
 *
2406
 * Returns the value parsed (as an int), 0 in case of error, str will be
2407
 *         updated to the current value of the index
2408
 */
2409
static int
2410
5.60k
xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
2411
5.60k
    const xmlChar *ptr;
2412
5.60k
    xmlChar cur;
2413
5.60k
    int val = 0;
2414
2415
5.60k
    if ((str == NULL) || (*str == NULL)) return(0);
2416
5.60k
    ptr = *str;
2417
5.60k
    cur = *ptr;
2418
5.60k
    if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
2419
2.87k
  ptr += 3;
2420
2.87k
  cur = *ptr;
2421
11.4k
  while (cur != ';') { /* Non input consuming loop */
2422
9.38k
      if ((cur >= '0') && (cur <= '9'))
2423
2.00k
          val = val * 16 + (cur - '0');
2424
7.38k
      else if ((cur >= 'a') && (cur <= 'f'))
2425
2.05k
          val = val * 16 + (cur - 'a') + 10;
2426
5.33k
      else if ((cur >= 'A') && (cur <= 'F'))
2427
4.53k
          val = val * 16 + (cur - 'A') + 10;
2428
803
      else {
2429
803
    xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
2430
803
    val = 0;
2431
803
    break;
2432
803
      }
2433
8.58k
      if (val > 0x110000)
2434
488
          val = 0x110000;
2435
2436
8.58k
      ptr++;
2437
8.58k
      cur = *ptr;
2438
8.58k
  }
2439
2.87k
  if (cur == ';')
2440
2.06k
      ptr++;
2441
2.87k
    } else if  ((cur == '&') && (ptr[1] == '#')){
2442
2.73k
  ptr += 2;
2443
2.73k
  cur = *ptr;
2444
9.13k
  while (cur != ';') { /* Non input consuming loops */
2445
8.35k
      if ((cur >= '0') && (cur <= '9'))
2446
6.40k
          val = val * 10 + (cur - '0');
2447
1.95k
      else {
2448
1.95k
    xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
2449
1.95k
    val = 0;
2450
1.95k
    break;
2451
1.95k
      }
2452
6.40k
      if (val > 0x110000)
2453
151
          val = 0x110000;
2454
2455
6.40k
      ptr++;
2456
6.40k
      cur = *ptr;
2457
6.40k
  }
2458
2.73k
  if (cur == ';')
2459
783
      ptr++;
2460
2.73k
    } else {
2461
0
  xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
2462
0
  return(0);
2463
0
    }
2464
5.60k
    *str = ptr;
2465
2466
    /*
2467
     * [ WFC: Legal Character ]
2468
     * Characters referred to using character references must match the
2469
     * production for Char.
2470
     */
2471
5.60k
    if (val >= 0x110000) {
2472
61
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2473
61
                "xmlParseStringCharRef: character reference out of bounds\n",
2474
61
                val);
2475
5.54k
    } else if (IS_CHAR(val)) {
2476
2.03k
        return(val);
2477
3.51k
    } else {
2478
3.51k
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2479
3.51k
        "xmlParseStringCharRef: invalid xmlChar value %d\n",
2480
3.51k
        val);
2481
3.51k
    }
2482
3.57k
    return(0);
2483
5.60k
}
2484
2485
/**
2486
 * xmlParserHandlePEReference:
2487
 * @ctxt:  the parser context
2488
 *
2489
 * DEPRECATED: Internal function, do not use.
2490
 *
2491
 * [69] PEReference ::= '%' Name ';'
2492
 *
2493
 * [ WFC: No Recursion ]
2494
 * A parsed entity must not contain a recursive
2495
 * reference to itself, either directly or indirectly.
2496
 *
2497
 * [ WFC: Entity Declared ]
2498
 * In a document without any DTD, a document with only an internal DTD
2499
 * subset which contains no parameter entity references, or a document
2500
 * with "standalone='yes'", ...  ... The declaration of a parameter
2501
 * entity must precede any reference to it...
2502
 *
2503
 * [ VC: Entity Declared ]
2504
 * In a document with an external subset or external parameter entities
2505
 * with "standalone='no'", ...  ... The declaration of a parameter entity
2506
 * must precede any reference to it...
2507
 *
2508
 * [ WFC: In DTD ]
2509
 * Parameter-entity references may only appear in the DTD.
2510
 * NOTE: misleading but this is handled.
2511
 *
2512
 * A PEReference may have been detected in the current input stream
2513
 * the handling is done accordingly to
2514
 *      http://www.w3.org/TR/REC-xml#entproc
2515
 * i.e.
2516
 *   - Included in literal in entity values
2517
 *   - Included as Parameter Entity reference within DTDs
2518
 */
2519
void
2520
0
xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
2521
0
    switch(ctxt->instate) {
2522
0
  case XML_PARSER_CDATA_SECTION:
2523
0
      return;
2524
0
        case XML_PARSER_COMMENT:
2525
0
      return;
2526
0
  case XML_PARSER_START_TAG:
2527
0
      return;
2528
0
  case XML_PARSER_END_TAG:
2529
0
      return;
2530
0
        case XML_PARSER_EOF:
2531
0
      xmlFatalErr(ctxt, XML_ERR_PEREF_AT_EOF, NULL);
2532
0
      return;
2533
0
        case XML_PARSER_PROLOG:
2534
0
  case XML_PARSER_START:
2535
0
  case XML_PARSER_MISC:
2536
0
      xmlFatalErr(ctxt, XML_ERR_PEREF_IN_PROLOG, NULL);
2537
0
      return;
2538
0
  case XML_PARSER_ENTITY_DECL:
2539
0
        case XML_PARSER_CONTENT:
2540
0
        case XML_PARSER_ATTRIBUTE_VALUE:
2541
0
        case XML_PARSER_PI:
2542
0
  case XML_PARSER_SYSTEM_LITERAL:
2543
0
  case XML_PARSER_PUBLIC_LITERAL:
2544
      /* we just ignore it there */
2545
0
      return;
2546
0
        case XML_PARSER_EPILOG:
2547
0
      xmlFatalErr(ctxt, XML_ERR_PEREF_IN_EPILOG, NULL);
2548
0
      return;
2549
0
  case XML_PARSER_ENTITY_VALUE:
2550
      /*
2551
       * NOTE: in the case of entity values, we don't do the
2552
       *       substitution here since we need the literal
2553
       *       entity value to be able to save the internal
2554
       *       subset of the document.
2555
       *       This will be handled by xmlStringDecodeEntities
2556
       */
2557
0
      return;
2558
0
        case XML_PARSER_DTD:
2559
      /*
2560
       * [WFC: Well-Formedness Constraint: PEs in Internal Subset]
2561
       * In the internal DTD subset, parameter-entity references
2562
       * can occur only where markup declarations can occur, not
2563
       * within markup declarations.
2564
       * In that case this is handled in xmlParseMarkupDecl
2565
       */
2566
0
      if ((ctxt->external == 0) && (ctxt->inputNr == 1))
2567
0
    return;
2568
0
      if (IS_BLANK_CH(NXT(1)) || NXT(1) == 0)
2569
0
    return;
2570
0
            break;
2571
0
        case XML_PARSER_IGNORE:
2572
0
            return;
2573
0
    }
2574
2575
0
    xmlParsePEReference(ctxt);
2576
0
}
2577
2578
/*
2579
 * Macro used to grow the current buffer.
2580
 * buffer##_size is expected to be a size_t
2581
 * mem_error: is expected to handle memory allocation failures
2582
 */
2583
8.49k
#define growBuffer(buffer, n) {           \
2584
8.49k
    xmlChar *tmp;             \
2585
8.49k
    size_t new_size = buffer##_size * 2 + n;                            \
2586
8.49k
    if (new_size < buffer##_size) goto mem_error;                       \
2587
8.49k
    tmp = (xmlChar *) xmlRealloc(buffer, new_size);                     \
2588
8.49k
    if (tmp == NULL) goto mem_error;         \
2589
8.49k
    buffer = tmp;             \
2590
8.49k
    buffer##_size = new_size;                                           \
2591
8.49k
}
2592
2593
/**
2594
 * xmlStringDecodeEntitiesInt:
2595
 * @ctxt:  the parser context
2596
 * @str:  the input string
2597
 * @len: the string length
2598
 * @what:  combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2599
 * @end:  an end marker xmlChar, 0 if none
2600
 * @end2:  an end marker xmlChar, 0 if none
2601
 * @end3:  an end marker xmlChar, 0 if none
2602
 * @check:  whether to perform entity checks
2603
 */
2604
static xmlChar *
2605
xmlStringDecodeEntitiesInt(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2606
               int what, xmlChar end, xmlChar  end2, xmlChar end3,
2607
5.96k
                           int check) {
2608
5.96k
    xmlChar *buffer = NULL;
2609
5.96k
    size_t buffer_size = 0;
2610
5.96k
    size_t nbchars = 0;
2611
2612
5.96k
    xmlChar *current = NULL;
2613
5.96k
    xmlChar *rep = NULL;
2614
5.96k
    const xmlChar *last;
2615
5.96k
    xmlEntityPtr ent;
2616
5.96k
    int c,l;
2617
2618
5.96k
    if (str == NULL)
2619
0
        return(NULL);
2620
5.96k
    last = str + len;
2621
2622
5.96k
    if (((ctxt->depth > 40) &&
2623
5.96k
         ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
2624
5.96k
  (ctxt->depth > 100)) {
2625
0
  xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_LOOP,
2626
0
                       "Maximum entity nesting depth exceeded");
2627
0
  return(NULL);
2628
0
    }
2629
2630
    /*
2631
     * allocate a translation buffer.
2632
     */
2633
5.96k
    buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
2634
5.96k
    buffer = (xmlChar *) xmlMallocAtomic(buffer_size);
2635
5.96k
    if (buffer == NULL) goto mem_error;
2636
2637
    /*
2638
     * OK loop until we reach one of the ending char or a size limit.
2639
     * we are operating on already parsed values.
2640
     */
2641
5.96k
    if (str < last)
2642
5.39k
  c = CUR_SCHAR(str, l);
2643
572
    else
2644
572
        c = 0;
2645
24.2M
    while ((c != 0) && (c != end) && /* non input consuming loop */
2646
24.2M
           (c != end2) && (c != end3) &&
2647
24.2M
           (ctxt->instate != XML_PARSER_EOF)) {
2648
2649
24.2M
  if (c == 0) break;
2650
24.2M
        if ((c == '&') && (str[1] == '#')) {
2651
5.60k
      int val = xmlParseStringCharRef(ctxt, &str);
2652
5.60k
      if (val == 0)
2653
3.57k
                goto int_error;
2654
2.03k
      COPY_BUF(0,buffer,nbchars,val);
2655
2.03k
      if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2656
450
          growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2657
450
      }
2658
24.2M
  } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) {
2659
0
      if (xmlParserDebugEntities)
2660
0
    xmlGenericError(xmlGenericErrorContext,
2661
0
      "String decoding Entity Reference: %.30s\n",
2662
0
      str);
2663
0
      ent = xmlParseStringEntityRef(ctxt, &str);
2664
0
      if ((ent != NULL) &&
2665
0
    (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
2666
0
    if (ent->content != NULL) {
2667
0
        COPY_BUF(0,buffer,nbchars,ent->content[0]);
2668
0
        if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2669
0
      growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2670
0
        }
2671
0
    } else {
2672
0
        xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
2673
0
          "predefined entity has no content\n");
2674
0
                    goto int_error;
2675
0
    }
2676
0
      } else if ((ent != NULL) && (ent->content != NULL)) {
2677
0
          if ((check) && (xmlParserEntityCheck(ctxt, ent->length)))
2678
0
                    goto int_error;
2679
2680
0
                if (ent->flags & XML_ENT_EXPANDING) {
2681
0
              xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
2682
0
                    xmlHaltParser(ctxt);
2683
0
                    ent->content[0] = 0;
2684
0
                    goto int_error;
2685
0
                }
2686
2687
0
                ent->flags |= XML_ENT_EXPANDING;
2688
0
    ctxt->depth++;
2689
0
    rep = xmlStringDecodeEntitiesInt(ctxt, ent->content,
2690
0
                        ent->length, what, 0, 0, 0, check);
2691
0
    ctxt->depth--;
2692
0
                ent->flags &= ~XML_ENT_EXPANDING;
2693
2694
0
    if (rep == NULL) {
2695
0
                    ent->content[0] = 0;
2696
0
                    goto int_error;
2697
0
                }
2698
2699
0
                current = rep;
2700
0
                while (*current != 0) { /* non input consuming loop */
2701
0
                    buffer[nbchars++] = *current++;
2702
0
                    if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2703
0
                        growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2704
0
                    }
2705
0
                }
2706
0
                xmlFree(rep);
2707
0
                rep = NULL;
2708
0
      } else if (ent != NULL) {
2709
0
    int i = xmlStrlen(ent->name);
2710
0
    const xmlChar *cur = ent->name;
2711
2712
0
    buffer[nbchars++] = '&';
2713
0
    if (nbchars + i + XML_PARSER_BUFFER_SIZE > buffer_size) {
2714
0
        growBuffer(buffer, i + XML_PARSER_BUFFER_SIZE);
2715
0
    }
2716
0
    for (;i > 0;i--)
2717
0
        buffer[nbchars++] = *cur++;
2718
0
    buffer[nbchars++] = ';';
2719
0
      }
2720
24.2M
  } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
2721
0
      if (xmlParserDebugEntities)
2722
0
    xmlGenericError(xmlGenericErrorContext,
2723
0
      "String decoding PE Reference: %.30s\n", str);
2724
0
      ent = xmlParseStringPEReference(ctxt, &str);
2725
0
      if (ent != NULL) {
2726
0
                if (ent->content == NULL) {
2727
        /*
2728
         * Note: external parsed entities will not be loaded,
2729
         * it is not required for a non-validating parser to
2730
         * complete external PEReferences coming from the
2731
         * internal subset
2732
         */
2733
0
        if (((ctxt->options & XML_PARSE_NOENT) != 0) ||
2734
0
      ((ctxt->options & XML_PARSE_DTDVALID) != 0) ||
2735
0
      (ctxt->validate != 0)) {
2736
0
      xmlLoadEntityContent(ctxt, ent);
2737
0
        } else {
2738
0
      xmlWarningMsg(ctxt, XML_ERR_ENTITY_PROCESSING,
2739
0
      "not validating will not read content for PE entity %s\n",
2740
0
                          ent->name, NULL);
2741
0
        }
2742
0
    }
2743
2744
0
          if ((check) && (xmlParserEntityCheck(ctxt, ent->length)))
2745
0
                    goto int_error;
2746
2747
0
                if (ent->flags & XML_ENT_EXPANDING) {
2748
0
              xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
2749
0
                    xmlHaltParser(ctxt);
2750
0
                    if (ent->content != NULL)
2751
0
                        ent->content[0] = 0;
2752
0
                    goto int_error;
2753
0
                }
2754
2755
0
                ent->flags |= XML_ENT_EXPANDING;
2756
0
    ctxt->depth++;
2757
0
    rep = xmlStringDecodeEntitiesInt(ctxt, ent->content,
2758
0
                        ent->length, what, 0, 0, 0, check);
2759
0
    ctxt->depth--;
2760
0
                ent->flags &= ~XML_ENT_EXPANDING;
2761
2762
0
    if (rep == NULL) {
2763
0
                    if (ent->content != NULL)
2764
0
                        ent->content[0] = 0;
2765
0
                    goto int_error;
2766
0
                }
2767
0
                current = rep;
2768
0
                while (*current != 0) { /* non input consuming loop */
2769
0
                    buffer[nbchars++] = *current++;
2770
0
                    if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2771
0
                        growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2772
0
                    }
2773
0
                }
2774
0
                xmlFree(rep);
2775
0
                rep = NULL;
2776
0
      }
2777
24.2M
  } else {
2778
24.2M
      COPY_BUF(l,buffer,nbchars,c);
2779
24.2M
      str += l;
2780
24.2M
      if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2781
1.58k
          growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2782
1.58k
      }
2783
24.2M
  }
2784
24.2M
  if (str < last)
2785
24.2M
      c = CUR_SCHAR(str, l);
2786
1.82k
  else
2787
1.82k
      c = 0;
2788
24.2M
    }
2789
2.39k
    buffer[nbchars] = 0;
2790
2.39k
    return(buffer);
2791
2792
0
mem_error:
2793
0
    xmlErrMemory(ctxt, NULL);
2794
3.57k
int_error:
2795
3.57k
    if (rep != NULL)
2796
0
        xmlFree(rep);
2797
3.57k
    if (buffer != NULL)
2798
3.57k
        xmlFree(buffer);
2799
3.57k
    return(NULL);
2800
0
}
2801
2802
/**
2803
 * xmlStringLenDecodeEntities:
2804
 * @ctxt:  the parser context
2805
 * @str:  the input string
2806
 * @len: the string length
2807
 * @what:  combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2808
 * @end:  an end marker xmlChar, 0 if none
2809
 * @end2:  an end marker xmlChar, 0 if none
2810
 * @end3:  an end marker xmlChar, 0 if none
2811
 *
2812
 * DEPRECATED: Internal function, don't use.
2813
 *
2814
 * Takes a entity string content and process to do the adequate substitutions.
2815
 *
2816
 * [67] Reference ::= EntityRef | CharRef
2817
 *
2818
 * [69] PEReference ::= '%' Name ';'
2819
 *
2820
 * Returns A newly allocated string with the substitution done. The caller
2821
 *      must deallocate it !
2822
 */
2823
xmlChar *
2824
xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2825
                           int what, xmlChar end, xmlChar  end2,
2826
0
                           xmlChar end3) {
2827
0
    if ((ctxt == NULL) || (str == NULL) || (len < 0))
2828
0
        return(NULL);
2829
0
    return(xmlStringDecodeEntitiesInt(ctxt, str, len, what,
2830
0
                                      end, end2, end3, 0));
2831
0
}
2832
2833
/**
2834
 * xmlStringDecodeEntities:
2835
 * @ctxt:  the parser context
2836
 * @str:  the input string
2837
 * @what:  combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2838
 * @end:  an end marker xmlChar, 0 if none
2839
 * @end2:  an end marker xmlChar, 0 if none
2840
 * @end3:  an end marker xmlChar, 0 if none
2841
 *
2842
 * DEPRECATED: Internal function, don't use.
2843
 *
2844
 * Takes a entity string content and process to do the adequate substitutions.
2845
 *
2846
 * [67] Reference ::= EntityRef | CharRef
2847
 *
2848
 * [69] PEReference ::= '%' Name ';'
2849
 *
2850
 * Returns A newly allocated string with the substitution done. The caller
2851
 *      must deallocate it !
2852
 */
2853
xmlChar *
2854
xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what,
2855
0
            xmlChar end, xmlChar  end2, xmlChar end3) {
2856
0
    if ((ctxt == NULL) || (str == NULL)) return(NULL);
2857
0
    return(xmlStringDecodeEntitiesInt(ctxt, str, xmlStrlen(str), what,
2858
0
                                      end, end2, end3, 0));
2859
0
}
2860
2861
/************************************************************************
2862
 *                  *
2863
 *    Commodity functions, cleanup needed ?     *
2864
 *                  *
2865
 ************************************************************************/
2866
2867
/**
2868
 * areBlanks:
2869
 * @ctxt:  an XML parser context
2870
 * @str:  a xmlChar *
2871
 * @len:  the size of @str
2872
 * @blank_chars: we know the chars are blanks
2873
 *
2874
 * Is this a sequence of blank chars that one can ignore ?
2875
 *
2876
 * Returns 1 if ignorable 0 otherwise.
2877
 */
2878
2879
static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2880
491k
                     int blank_chars) {
2881
491k
    int i, ret;
2882
491k
    xmlNodePtr lastChild;
2883
2884
    /*
2885
     * Don't spend time trying to differentiate them, the same callback is
2886
     * used !
2887
     */
2888
491k
    if (ctxt->sax->ignorableWhitespace == ctxt->sax->characters)
2889
0
  return(0);
2890
2891
    /*
2892
     * Check for xml:space value.
2893
     */
2894
491k
    if ((ctxt->space == NULL) || (*(ctxt->space) == 1) ||
2895
491k
        (*(ctxt->space) == -2))
2896
445k
  return(0);
2897
2898
    /*
2899
     * Check that the string is made of blanks
2900
     */
2901
45.3k
    if (blank_chars == 0) {
2902
78.2k
  for (i = 0;i < len;i++)
2903
75.1k
      if (!(IS_BLANK_CH(str[i]))) return(0);
2904
22.4k
    }
2905
2906
    /*
2907
     * Look if the element is mixed content in the DTD if available
2908
     */
2909
25.9k
    if (ctxt->node == NULL) return(0);
2910
0
    if (ctxt->myDoc != NULL) {
2911
0
  ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name);
2912
0
        if (ret == 0) return(1);
2913
0
        if (ret == 1) return(0);
2914
0
    }
2915
2916
    /*
2917
     * Otherwise, heuristic :-\
2918
     */
2919
0
    if ((RAW != '<') && (RAW != 0xD)) return(0);
2920
0
    if ((ctxt->node->children == NULL) &&
2921
0
  (RAW == '<') && (NXT(1) == '/')) return(0);
2922
2923
0
    lastChild = xmlGetLastChild(ctxt->node);
2924
0
    if (lastChild == NULL) {
2925
0
        if ((ctxt->node->type != XML_ELEMENT_NODE) &&
2926
0
            (ctxt->node->content != NULL)) return(0);
2927
0
    } else if (xmlNodeIsText(lastChild))
2928
0
        return(0);
2929
0
    else if ((ctxt->node->children != NULL) &&
2930
0
             (xmlNodeIsText(ctxt->node->children)))
2931
0
        return(0);
2932
0
    return(1);
2933
0
}
2934
2935
/************************************************************************
2936
 *                  *
2937
 *    Extra stuff for namespace support     *
2938
 *  Relates to http://www.w3.org/TR/WD-xml-names      *
2939
 *                  *
2940
 ************************************************************************/
2941
2942
/**
2943
 * xmlSplitQName:
2944
 * @ctxt:  an XML parser context
2945
 * @name:  an XML parser context
2946
 * @prefix:  a xmlChar **
2947
 *
2948
 * parse an UTF8 encoded XML qualified name string
2949
 *
2950
 * [NS 5] QName ::= (Prefix ':')? LocalPart
2951
 *
2952
 * [NS 6] Prefix ::= NCName
2953
 *
2954
 * [NS 7] LocalPart ::= NCName
2955
 *
2956
 * Returns the local part, and prefix is updated
2957
 *   to get the Prefix if any.
2958
 */
2959
2960
xmlChar *
2961
0
xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) {
2962
0
    xmlChar buf[XML_MAX_NAMELEN + 5];
2963
0
    xmlChar *buffer = NULL;
2964
0
    int len = 0;
2965
0
    int max = XML_MAX_NAMELEN;
2966
0
    xmlChar *ret = NULL;
2967
0
    const xmlChar *cur = name;
2968
0
    int c;
2969
2970
0
    if (prefix == NULL) return(NULL);
2971
0
    *prefix = NULL;
2972
2973
0
    if (cur == NULL) return(NULL);
2974
2975
#ifndef XML_XML_NAMESPACE
2976
    /* xml: prefix is not really a namespace */
2977
    if ((cur[0] == 'x') && (cur[1] == 'm') &&
2978
        (cur[2] == 'l') && (cur[3] == ':'))
2979
  return(xmlStrdup(name));
2980
#endif
2981
2982
    /* nasty but well=formed */
2983
0
    if (cur[0] == ':')
2984
0
  return(xmlStrdup(name));
2985
2986
0
    c = *cur++;
2987
0
    while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */
2988
0
  buf[len++] = c;
2989
0
  c = *cur++;
2990
0
    }
2991
0
    if (len >= max) {
2992
  /*
2993
   * Okay someone managed to make a huge name, so he's ready to pay
2994
   * for the processing speed.
2995
   */
2996
0
  max = len * 2;
2997
2998
0
  buffer = (xmlChar *) xmlMallocAtomic(max);
2999
0
  if (buffer == NULL) {
3000
0
      xmlErrMemory(ctxt, NULL);
3001
0
      return(NULL);
3002
0
  }
3003
0
  memcpy(buffer, buf, len);
3004
0
  while ((c != 0) && (c != ':')) { /* tested bigname.xml */
3005
0
      if (len + 10 > max) {
3006
0
          xmlChar *tmp;
3007
3008
0
    max *= 2;
3009
0
    tmp = (xmlChar *) xmlRealloc(buffer, max);
3010
0
    if (tmp == NULL) {
3011
0
        xmlFree(buffer);
3012
0
        xmlErrMemory(ctxt, NULL);
3013
0
        return(NULL);
3014
0
    }
3015
0
    buffer = tmp;
3016
0
      }
3017
0
      buffer[len++] = c;
3018
0
      c = *cur++;
3019
0
  }
3020
0
  buffer[len] = 0;
3021
0
    }
3022
3023
0
    if ((c == ':') && (*cur == 0)) {
3024
0
        if (buffer != NULL)
3025
0
      xmlFree(buffer);
3026
0
  *prefix = NULL;
3027
0
  return(xmlStrdup(name));
3028
0
    }
3029
3030
0
    if (buffer == NULL)
3031
0
  ret = xmlStrndup(buf, len);
3032
0
    else {
3033
0
  ret = buffer;
3034
0
  buffer = NULL;
3035
0
  max = XML_MAX_NAMELEN;
3036
0
    }
3037
3038
3039
0
    if (c == ':') {
3040
0
  c = *cur;
3041
0
        *prefix = ret;
3042
0
  if (c == 0) {
3043
0
      return(xmlStrndup(BAD_CAST "", 0));
3044
0
  }
3045
0
  len = 0;
3046
3047
  /*
3048
   * Check that the first character is proper to start
3049
   * a new name
3050
   */
3051
0
  if (!(((c >= 0x61) && (c <= 0x7A)) ||
3052
0
        ((c >= 0x41) && (c <= 0x5A)) ||
3053
0
        (c == '_') || (c == ':'))) {
3054
0
      int l;
3055
0
      int first = CUR_SCHAR(cur, l);
3056
3057
0
      if (!IS_LETTER(first) && (first != '_')) {
3058
0
    xmlFatalErrMsgStr(ctxt, XML_NS_ERR_QNAME,
3059
0
          "Name %s is not XML Namespace compliant\n",
3060
0
          name);
3061
0
      }
3062
0
  }
3063
0
  cur++;
3064
3065
0
  while ((c != 0) && (len < max)) { /* tested bigname2.xml */
3066
0
      buf[len++] = c;
3067
0
      c = *cur++;
3068
0
  }
3069
0
  if (len >= max) {
3070
      /*
3071
       * Okay someone managed to make a huge name, so he's ready to pay
3072
       * for the processing speed.
3073
       */
3074
0
      max = len * 2;
3075
3076
0
      buffer = (xmlChar *) xmlMallocAtomic(max);
3077
0
      if (buffer == NULL) {
3078
0
          xmlErrMemory(ctxt, NULL);
3079
0
    return(NULL);
3080
0
      }
3081
0
      memcpy(buffer, buf, len);
3082
0
      while (c != 0) { /* tested bigname2.xml */
3083
0
    if (len + 10 > max) {
3084
0
        xmlChar *tmp;
3085
3086
0
        max *= 2;
3087
0
        tmp = (xmlChar *) xmlRealloc(buffer, max);
3088
0
        if (tmp == NULL) {
3089
0
      xmlErrMemory(ctxt, NULL);
3090
0
      xmlFree(buffer);
3091
0
      return(NULL);
3092
0
        }
3093
0
        buffer = tmp;
3094
0
    }
3095
0
    buffer[len++] = c;
3096
0
    c = *cur++;
3097
0
      }
3098
0
      buffer[len] = 0;
3099
0
  }
3100
3101
0
  if (buffer == NULL)
3102
0
      ret = xmlStrndup(buf, len);
3103
0
  else {
3104
0
      ret = buffer;
3105
0
  }
3106
0
    }
3107
3108
0
    return(ret);
3109
0
}
3110
3111
/************************************************************************
3112
 *                  *
3113
 *      The parser itself       *
3114
 *  Relates to http://www.w3.org/TR/REC-xml       *
3115
 *                  *
3116
 ************************************************************************/
3117
3118
/************************************************************************
3119
 *                  *
3120
 *  Routines to parse Name, NCName and NmToken      *
3121
 *                  *
3122
 ************************************************************************/
3123
#ifdef DEBUG
3124
static unsigned long nbParseName = 0;
3125
static unsigned long nbParseNmToken = 0;
3126
static unsigned long nbParseNCName = 0;
3127
static unsigned long nbParseNCNameComplex = 0;
3128
static unsigned long nbParseNameComplex = 0;
3129
static unsigned long nbParseStringName = 0;
3130
#endif
3131
3132
/*
3133
 * The two following functions are related to the change of accepted
3134
 * characters for Name and NmToken in the Revision 5 of XML-1.0
3135
 * They correspond to the modified production [4] and the new production [4a]
3136
 * changes in that revision. Also note that the macros used for the
3137
 * productions Letter, Digit, CombiningChar and Extender are not needed
3138
 * anymore.
3139
 * We still keep compatibility to pre-revision5 parsing semantic if the
3140
 * new XML_PARSE_OLD10 option is given to the parser.
3141
 */
3142
static int
3143
29.4k
xmlIsNameStartChar(xmlParserCtxtPtr ctxt, int c) {
3144
29.4k
    if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3145
        /*
3146
   * Use the new checks of production [4] [4a] amd [5] of the
3147
   * Update 5 of XML-1.0
3148
   */
3149
29.4k
  if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3150
29.4k
      (((c >= 'a') && (c <= 'z')) ||
3151
28.7k
       ((c >= 'A') && (c <= 'Z')) ||
3152
28.7k
       (c == '_') || (c == ':') ||
3153
28.7k
       ((c >= 0xC0) && (c <= 0xD6)) ||
3154
28.7k
       ((c >= 0xD8) && (c <= 0xF6)) ||
3155
28.7k
       ((c >= 0xF8) && (c <= 0x2FF)) ||
3156
28.7k
       ((c >= 0x370) && (c <= 0x37D)) ||
3157
28.7k
       ((c >= 0x37F) && (c <= 0x1FFF)) ||
3158
28.7k
       ((c >= 0x200C) && (c <= 0x200D)) ||
3159
28.7k
       ((c >= 0x2070) && (c <= 0x218F)) ||
3160
28.7k
       ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3161
28.7k
       ((c >= 0x3001) && (c <= 0xD7FF)) ||
3162
28.7k
       ((c >= 0xF900) && (c <= 0xFDCF)) ||
3163
28.7k
       ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3164
28.7k
       ((c >= 0x10000) && (c <= 0xEFFFF))))
3165
27.0k
      return(1);
3166
29.4k
    } else {
3167
0
        if (IS_LETTER(c) || (c == '_') || (c == ':'))
3168
0
      return(1);
3169
0
    }
3170
2.47k
    return(0);
3171
29.4k
}
3172
3173
static int
3174
95.0M
xmlIsNameChar(xmlParserCtxtPtr ctxt, int c) {
3175
95.0M
    if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3176
        /*
3177
   * Use the new checks of production [4] [4a] amd [5] of the
3178
   * Update 5 of XML-1.0
3179
   */
3180
95.0M
  if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3181
95.0M
      (((c >= 'a') && (c <= 'z')) ||
3182
95.0M
       ((c >= 'A') && (c <= 'Z')) ||
3183
95.0M
       ((c >= '0') && (c <= '9')) || /* !start */
3184
95.0M
       (c == '_') || (c == ':') ||
3185
95.0M
       (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
3186
95.0M
       ((c >= 0xC0) && (c <= 0xD6)) ||
3187
95.0M
       ((c >= 0xD8) && (c <= 0xF6)) ||
3188
95.0M
       ((c >= 0xF8) && (c <= 0x2FF)) ||
3189
95.0M
       ((c >= 0x300) && (c <= 0x36F)) || /* !start */
3190
95.0M
       ((c >= 0x370) && (c <= 0x37D)) ||
3191
95.0M
       ((c >= 0x37F) && (c <= 0x1FFF)) ||
3192
95.0M
       ((c >= 0x200C) && (c <= 0x200D)) ||
3193
95.0M
       ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
3194
95.0M
       ((c >= 0x2070) && (c <= 0x218F)) ||
3195
95.0M
       ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3196
95.0M
       ((c >= 0x3001) && (c <= 0xD7FF)) ||
3197
95.0M
       ((c >= 0xF900) && (c <= 0xFDCF)) ||
3198
95.0M
       ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3199
95.0M
       ((c >= 0x10000) && (c <= 0xEFFFF))))
3200
95.0M
       return(1);
3201
95.0M
    } else {
3202
0
        if ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3203
0
            (c == '.') || (c == '-') ||
3204
0
      (c == '_') || (c == ':') ||
3205
0
      (IS_COMBINING(c)) ||
3206
0
      (IS_EXTENDER(c)))
3207
0
      return(1);
3208
0
    }
3209
21.1k
    return(0);
3210
95.0M
}
3211
3212
static xmlChar * xmlParseAttValueInternal(xmlParserCtxtPtr ctxt,
3213
                                          int *len, int *alloc, int normalize);
3214
3215
static const xmlChar *
3216
148k
xmlParseNameComplex(xmlParserCtxtPtr ctxt) {
3217
148k
    int len = 0, l;
3218
148k
    int c;
3219
148k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3220
148k
                    XML_MAX_TEXT_LENGTH :
3221
148k
                    XML_MAX_NAME_LENGTH;
3222
3223
#ifdef DEBUG
3224
    nbParseNameComplex++;
3225
#endif
3226
3227
    /*
3228
     * Handler for more complex cases
3229
     */
3230
148k
    c = CUR_CHAR(l);
3231
148k
    if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3232
        /*
3233
   * Use the new checks of production [4] [4a] amd [5] of the
3234
   * Update 5 of XML-1.0
3235
   */
3236
148k
  if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3237
148k
      (!(((c >= 'a') && (c <= 'z')) ||
3238
144k
         ((c >= 'A') && (c <= 'Z')) ||
3239
144k
         (c == '_') || (c == ':') ||
3240
144k
         ((c >= 0xC0) && (c <= 0xD6)) ||
3241
144k
         ((c >= 0xD8) && (c <= 0xF6)) ||
3242
144k
         ((c >= 0xF8) && (c <= 0x2FF)) ||
3243
144k
         ((c >= 0x370) && (c <= 0x37D)) ||
3244
144k
         ((c >= 0x37F) && (c <= 0x1FFF)) ||
3245
144k
         ((c >= 0x200C) && (c <= 0x200D)) ||
3246
144k
         ((c >= 0x2070) && (c <= 0x218F)) ||
3247
144k
         ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3248
144k
         ((c >= 0x3001) && (c <= 0xD7FF)) ||
3249
144k
         ((c >= 0xF900) && (c <= 0xFDCF)) ||
3250
144k
         ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3251
144k
         ((c >= 0x10000) && (c <= 0xEFFFF))))) {
3252
88.0k
      return(NULL);
3253
88.0k
  }
3254
60.2k
  len += l;
3255
60.2k
  NEXTL(l);
3256
60.2k
  c = CUR_CHAR(l);
3257
57.1M
  while ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3258
57.1M
         (((c >= 'a') && (c <= 'z')) ||
3259
57.1M
          ((c >= 'A') && (c <= 'Z')) ||
3260
57.1M
          ((c >= '0') && (c <= '9')) || /* !start */
3261
57.1M
          (c == '_') || (c == ':') ||
3262
57.1M
          (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
3263
57.1M
          ((c >= 0xC0) && (c <= 0xD6)) ||
3264
57.1M
          ((c >= 0xD8) && (c <= 0xF6)) ||
3265
57.1M
          ((c >= 0xF8) && (c <= 0x2FF)) ||
3266
57.1M
          ((c >= 0x300) && (c <= 0x36F)) || /* !start */
3267
57.1M
          ((c >= 0x370) && (c <= 0x37D)) ||
3268
57.1M
          ((c >= 0x37F) && (c <= 0x1FFF)) ||
3269
57.1M
          ((c >= 0x200C) && (c <= 0x200D)) ||
3270
57.1M
          ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
3271
57.1M
          ((c >= 0x2070) && (c <= 0x218F)) ||
3272
57.1M
          ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3273
57.1M
          ((c >= 0x3001) && (c <= 0xD7FF)) ||
3274
57.1M
          ((c >= 0xF900) && (c <= 0xFDCF)) ||
3275
57.1M
          ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3276
57.1M
          ((c >= 0x10000) && (c <= 0xEFFFF))
3277
57.1M
    )) {
3278
57.1M
            if (len <= INT_MAX - l)
3279
57.1M
          len += l;
3280
57.1M
      NEXTL(l);
3281
57.1M
      c = CUR_CHAR(l);
3282
57.1M
  }
3283
60.2k
    } else {
3284
0
  if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3285
0
      (!IS_LETTER(c) && (c != '_') &&
3286
0
       (c != ':'))) {
3287
0
      return(NULL);
3288
0
  }
3289
0
  len += l;
3290
0
  NEXTL(l);
3291
0
  c = CUR_CHAR(l);
3292
3293
0
  while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3294
0
         ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3295
0
    (c == '.') || (c == '-') ||
3296
0
    (c == '_') || (c == ':') ||
3297
0
    (IS_COMBINING(c)) ||
3298
0
    (IS_EXTENDER(c)))) {
3299
0
            if (len <= INT_MAX - l)
3300
0
          len += l;
3301
0
      NEXTL(l);
3302
0
      c = CUR_CHAR(l);
3303
0
  }
3304
0
    }
3305
60.2k
    if (ctxt->instate == XML_PARSER_EOF)
3306
0
        return(NULL);
3307
60.2k
    if (len > maxLength) {
3308
1
        xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name");
3309
1
        return(NULL);
3310
1
    }
3311
60.2k
    if (ctxt->input->cur - ctxt->input->base < len) {
3312
        /*
3313
         * There were a couple of bugs where PERefs lead to to a change
3314
         * of the buffer. Check the buffer size to avoid passing an invalid
3315
         * pointer to xmlDictLookup.
3316
         */
3317
0
        xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
3318
0
                    "unexpected change of input buffer");
3319
0
        return (NULL);
3320
0
    }
3321
60.2k
    if ((*ctxt->input->cur == '\n') && (ctxt->input->cur[-1] == '\r'))
3322
801
        return(xmlDictLookup(ctxt->dict, ctxt->input->cur - (len + 1), len));
3323
59.4k
    return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
3324
60.2k
}
3325
3326
/**
3327
 * xmlParseName:
3328
 * @ctxt:  an XML parser context
3329
 *
3330
 * DEPRECATED: Internal function, don't use.
3331
 *
3332
 * parse an XML name.
3333
 *
3334
 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3335
 *                  CombiningChar | Extender
3336
 *
3337
 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3338
 *
3339
 * [6] Names ::= Name (#x20 Name)*
3340
 *
3341
 * Returns the Name parsed or NULL
3342
 */
3343
3344
const xmlChar *
3345
441k
xmlParseName(xmlParserCtxtPtr ctxt) {
3346
441k
    const xmlChar *in;
3347
441k
    const xmlChar *ret;
3348
441k
    size_t count = 0;
3349
441k
    size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3350
441k
                       XML_MAX_TEXT_LENGTH :
3351
441k
                       XML_MAX_NAME_LENGTH;
3352
3353
441k
    GROW;
3354
441k
    if (ctxt->instate == XML_PARSER_EOF)
3355
0
        return(NULL);
3356
3357
#ifdef DEBUG
3358
    nbParseName++;
3359
#endif
3360
3361
    /*
3362
     * Accelerator for simple ASCII names
3363
     */
3364
441k
    in = ctxt->input->cur;
3365
441k
    if (((*in >= 0x61) && (*in <= 0x7A)) ||
3366
441k
  ((*in >= 0x41) && (*in <= 0x5A)) ||
3367
441k
  (*in == '_') || (*in == ':')) {
3368
308k
  in++;
3369
1.15M
  while (((*in >= 0x61) && (*in <= 0x7A)) ||
3370
1.15M
         ((*in >= 0x41) && (*in <= 0x5A)) ||
3371
1.15M
         ((*in >= 0x30) && (*in <= 0x39)) ||
3372
1.15M
         (*in == '_') || (*in == '-') ||
3373
1.15M
         (*in == ':') || (*in == '.'))
3374
847k
      in++;
3375
308k
  if ((*in > 0) && (*in < 0x80)) {
3376
293k
      count = in - ctxt->input->cur;
3377
293k
            if (count > maxLength) {
3378
0
                xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name");
3379
0
                return(NULL);
3380
0
            }
3381
293k
      ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
3382
293k
      ctxt->input->cur = in;
3383
293k
      ctxt->input->col += count;
3384
293k
      if (ret == NULL)
3385
0
          xmlErrMemory(ctxt, NULL);
3386
293k
      return(ret);
3387
293k
  }
3388
308k
    }
3389
    /* accelerator for special cases */
3390
148k
    return(xmlParseNameComplex(ctxt));
3391
441k
}
3392
3393
static const xmlChar *
3394
15.9k
xmlParseNCNameComplex(xmlParserCtxtPtr ctxt) {
3395
15.9k
    int len = 0, l;
3396
15.9k
    int c;
3397
15.9k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3398
15.9k
                    XML_MAX_TEXT_LENGTH :
3399
15.9k
                    XML_MAX_NAME_LENGTH;
3400
15.9k
    size_t startPosition = 0;
3401
3402
#ifdef DEBUG
3403
    nbParseNCNameComplex++;
3404
#endif
3405
3406
    /*
3407
     * Handler for more complex cases
3408
     */
3409
15.9k
    startPosition = CUR_PTR - BASE_PTR;
3410
15.9k
    c = CUR_CHAR(l);
3411
15.9k
    if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3412
15.9k
  (!xmlIsNameStartChar(ctxt, c) || (c == ':'))) {
3413
8.70k
  return(NULL);
3414
8.70k
    }
3415
3416
84.8M
    while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3417
84.8M
     (xmlIsNameChar(ctxt, c) && (c != ':'))) {
3418
84.8M
        if (len <= INT_MAX - l)
3419
84.8M
      len += l;
3420
84.8M
  NEXTL(l);
3421
84.8M
  c = CUR_CHAR(l);
3422
84.8M
    }
3423
7.22k
    if (ctxt->instate == XML_PARSER_EOF)
3424
0
        return(NULL);
3425
7.22k
    if (len > maxLength) {
3426
8
        xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3427
8
        return(NULL);
3428
8
    }
3429
7.21k
    return(xmlDictLookup(ctxt->dict, (BASE_PTR + startPosition), len));
3430
7.22k
}
3431
3432
/**
3433
 * xmlParseNCName:
3434
 * @ctxt:  an XML parser context
3435
 * @len:  length of the string parsed
3436
 *
3437
 * parse an XML name.
3438
 *
3439
 * [4NS] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
3440
 *                      CombiningChar | Extender
3441
 *
3442
 * [5NS] NCName ::= (Letter | '_') (NCNameChar)*
3443
 *
3444
 * Returns the Name parsed or NULL
3445
 */
3446
3447
static const xmlChar *
3448
2.20M
xmlParseNCName(xmlParserCtxtPtr ctxt) {
3449
2.20M
    const xmlChar *in, *e;
3450
2.20M
    const xmlChar *ret;
3451
2.20M
    size_t count = 0;
3452
2.20M
    size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3453
2.20M
                       XML_MAX_TEXT_LENGTH :
3454
2.20M
                       XML_MAX_NAME_LENGTH;
3455
3456
#ifdef DEBUG
3457
    nbParseNCName++;
3458
#endif
3459
3460
    /*
3461
     * Accelerator for simple ASCII names
3462
     */
3463
2.20M
    in = ctxt->input->cur;
3464
2.20M
    e = ctxt->input->end;
3465
2.20M
    if ((((*in >= 0x61) && (*in <= 0x7A)) ||
3466
2.20M
   ((*in >= 0x41) && (*in <= 0x5A)) ||
3467
2.20M
   (*in == '_')) && (in < e)) {
3468
2.18M
  in++;
3469
4.22M
  while ((((*in >= 0x61) && (*in <= 0x7A)) ||
3470
4.22M
          ((*in >= 0x41) && (*in <= 0x5A)) ||
3471
4.22M
          ((*in >= 0x30) && (*in <= 0x39)) ||
3472
4.22M
          (*in == '_') || (*in == '-') ||
3473
4.22M
          (*in == '.')) && (in < e))
3474
2.04M
      in++;
3475
2.18M
  if (in >= e)
3476
393
      goto complex;
3477
2.18M
  if ((*in > 0) && (*in < 0x80)) {
3478
2.18M
      count = in - ctxt->input->cur;
3479
2.18M
            if (count > maxLength) {
3480
0
                xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3481
0
                return(NULL);
3482
0
            }
3483
2.18M
      ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
3484
2.18M
      ctxt->input->cur = in;
3485
2.18M
      ctxt->input->col += count;
3486
2.18M
      if (ret == NULL) {
3487
0
          xmlErrMemory(ctxt, NULL);
3488
0
      }
3489
2.18M
      return(ret);
3490
2.18M
  }
3491
2.18M
    }
3492
15.9k
complex:
3493
15.9k
    return(xmlParseNCNameComplex(ctxt));
3494
2.20M
}
3495
3496
/**
3497
 * xmlParseNameAndCompare:
3498
 * @ctxt:  an XML parser context
3499
 *
3500
 * parse an XML name and compares for match
3501
 * (specialized for endtag parsing)
3502
 *
3503
 * Returns NULL for an illegal name, (xmlChar*) 1 for success
3504
 * and the name for mismatch
3505
 */
3506
3507
static const xmlChar *
3508
74.3k
xmlParseNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *other) {
3509
74.3k
    register const xmlChar *cmp = other;
3510
74.3k
    register const xmlChar *in;
3511
74.3k
    const xmlChar *ret;
3512
3513
74.3k
    GROW;
3514
74.3k
    if (ctxt->instate == XML_PARSER_EOF)
3515
0
        return(NULL);
3516
3517
74.3k
    in = ctxt->input->cur;
3518
369k
    while (*in != 0 && *in == *cmp) {
3519
295k
  ++in;
3520
295k
  ++cmp;
3521
295k
    }
3522
74.3k
    if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
3523
  /* success */
3524
74.1k
  ctxt->input->col += in - ctxt->input->cur;
3525
74.1k
  ctxt->input->cur = in;
3526
74.1k
  return (const xmlChar*) 1;
3527
74.1k
    }
3528
    /* failure (or end of input buffer), check with full function */
3529
160
    ret = xmlParseName (ctxt);
3530
    /* strings coming from the dictionary direct compare possible */
3531
160
    if (ret == other) {
3532
12
  return (const xmlChar*) 1;
3533
12
    }
3534
148
    return ret;
3535
160
}
3536
3537
/**
3538
 * xmlParseStringName:
3539
 * @ctxt:  an XML parser context
3540
 * @str:  a pointer to the string pointer (IN/OUT)
3541
 *
3542
 * parse an XML name.
3543
 *
3544
 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3545
 *                  CombiningChar | Extender
3546
 *
3547
 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3548
 *
3549
 * [6] Names ::= Name (#x20 Name)*
3550
 *
3551
 * Returns the Name parsed or NULL. The @str pointer
3552
 * is updated to the current location in the string.
3553
 */
3554
3555
static xmlChar *
3556
15.0k
xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
3557
15.0k
    xmlChar buf[XML_MAX_NAMELEN + 5];
3558
15.0k
    const xmlChar *cur = *str;
3559
15.0k
    int len = 0, l;
3560
15.0k
    int c;
3561
15.0k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3562
15.0k
                    XML_MAX_TEXT_LENGTH :
3563
15.0k
                    XML_MAX_NAME_LENGTH;
3564
3565
#ifdef DEBUG
3566
    nbParseStringName++;
3567
#endif
3568
3569
15.0k
    c = CUR_SCHAR(cur, l);
3570
15.0k
    if (!xmlIsNameStartChar(ctxt, c)) {
3571
993
  return(NULL);
3572
993
    }
3573
3574
14.0k
    COPY_BUF(l,buf,len,c);
3575
14.0k
    cur += l;
3576
14.0k
    c = CUR_SCHAR(cur, l);
3577
63.2k
    while (xmlIsNameChar(ctxt, c)) {
3578
49.9k
  COPY_BUF(l,buf,len,c);
3579
49.9k
  cur += l;
3580
49.9k
  c = CUR_SCHAR(cur, l);
3581
49.9k
  if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */
3582
      /*
3583
       * Okay someone managed to make a huge name, so he's ready to pay
3584
       * for the processing speed.
3585
       */
3586
809
      xmlChar *buffer;
3587
809
      int max = len * 2;
3588
3589
809
      buffer = (xmlChar *) xmlMallocAtomic(max);
3590
809
      if (buffer == NULL) {
3591
0
          xmlErrMemory(ctxt, NULL);
3592
0
    return(NULL);
3593
0
      }
3594
809
      memcpy(buffer, buf, len);
3595
18.4k
      while (xmlIsNameChar(ctxt, c)) {
3596
17.5k
    if (len + 10 > max) {
3597
159
        xmlChar *tmp;
3598
3599
159
        max *= 2;
3600
159
        tmp = (xmlChar *) xmlRealloc(buffer, max);
3601
159
        if (tmp == NULL) {
3602
0
      xmlErrMemory(ctxt, NULL);
3603
0
      xmlFree(buffer);
3604
0
      return(NULL);
3605
0
        }
3606
159
        buffer = tmp;
3607
159
    }
3608
17.5k
    COPY_BUF(l,buffer,len,c);
3609
17.5k
    cur += l;
3610
17.5k
    c = CUR_SCHAR(cur, l);
3611
17.5k
                if (len > maxLength) {
3612
0
                    xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3613
0
                    xmlFree(buffer);
3614
0
                    return(NULL);
3615
0
                }
3616
17.5k
      }
3617
809
      buffer[len] = 0;
3618
809
      *str = cur;
3619
809
      return(buffer);
3620
809
  }
3621
49.9k
    }
3622
13.2k
    if (len > maxLength) {
3623
0
        xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3624
0
        return(NULL);
3625
0
    }
3626
13.2k
    *str = cur;
3627
13.2k
    return(xmlStrndup(buf, len));
3628
13.2k
}
3629
3630
/**
3631
 * xmlParseNmtoken:
3632
 * @ctxt:  an XML parser context
3633
 *
3634
 * DEPRECATED: Internal function, don't use.
3635
 *
3636
 * parse an XML Nmtoken.
3637
 *
3638
 * [7] Nmtoken ::= (NameChar)+
3639
 *
3640
 * [8] Nmtokens ::= Nmtoken (#x20 Nmtoken)*
3641
 *
3642
 * Returns the Nmtoken parsed or NULL
3643
 */
3644
3645
xmlChar *
3646
5.09k
xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
3647
5.09k
    xmlChar buf[XML_MAX_NAMELEN + 5];
3648
5.09k
    int len = 0, l;
3649
5.09k
    int c;
3650
5.09k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3651
5.09k
                    XML_MAX_TEXT_LENGTH :
3652
5.09k
                    XML_MAX_NAME_LENGTH;
3653
3654
#ifdef DEBUG
3655
    nbParseNmToken++;
3656
#endif
3657
3658
5.09k
    c = CUR_CHAR(l);
3659
3660
55.3k
    while (xmlIsNameChar(ctxt, c)) {
3661
51.3k
  COPY_BUF(l,buf,len,c);
3662
51.3k
  NEXTL(l);
3663
51.3k
  c = CUR_CHAR(l);
3664
51.3k
  if (len >= XML_MAX_NAMELEN) {
3665
      /*
3666
       * Okay someone managed to make a huge token, so he's ready to pay
3667
       * for the processing speed.
3668
       */
3669
1.16k
      xmlChar *buffer;
3670
1.16k
      int max = len * 2;
3671
3672
1.16k
      buffer = (xmlChar *) xmlMallocAtomic(max);
3673
1.16k
      if (buffer == NULL) {
3674
0
          xmlErrMemory(ctxt, NULL);
3675
0
    return(NULL);
3676
0
      }
3677
1.16k
      memcpy(buffer, buf, len);
3678
10.0M
      while (xmlIsNameChar(ctxt, c)) {
3679
10.0M
    if (len + 10 > max) {
3680
1.63k
        xmlChar *tmp;
3681
3682
1.63k
        max *= 2;
3683
1.63k
        tmp = (xmlChar *) xmlRealloc(buffer, max);
3684
1.63k
        if (tmp == NULL) {
3685
0
      xmlErrMemory(ctxt, NULL);
3686
0
      xmlFree(buffer);
3687
0
      return(NULL);
3688
0
        }
3689
1.63k
        buffer = tmp;
3690
1.63k
    }
3691
10.0M
    COPY_BUF(l,buffer,len,c);
3692
10.0M
                if (len > maxLength) {
3693
1
                    xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken");
3694
1
                    xmlFree(buffer);
3695
1
                    return(NULL);
3696
1
                }
3697
10.0M
    NEXTL(l);
3698
10.0M
    c = CUR_CHAR(l);
3699
10.0M
      }
3700
1.16k
      buffer[len] = 0;
3701
1.16k
            if (ctxt->instate == XML_PARSER_EOF) {
3702
0
                xmlFree(buffer);
3703
0
                return(NULL);
3704
0
            }
3705
1.16k
      return(buffer);
3706
1.16k
  }
3707
51.3k
    }
3708
3.93k
    if (ctxt->instate == XML_PARSER_EOF)
3709
0
        return(NULL);
3710
3.93k
    if (len == 0)
3711
1.61k
        return(NULL);
3712
2.32k
    if (len > maxLength) {
3713
0
        xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken");
3714
0
        return(NULL);
3715
0
    }
3716
2.32k
    return(xmlStrndup(buf, len));
3717
2.32k
}
3718
3719
/**
3720
 * xmlParseEntityValue:
3721
 * @ctxt:  an XML parser context
3722
 * @orig:  if non-NULL store a copy of the original entity value
3723
 *
3724
 * DEPRECATED: Internal function, don't use.
3725
 *
3726
 * parse a value for ENTITY declarations
3727
 *
3728
 * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
3729
 *                 "'" ([^%&'] | PEReference | Reference)* "'"
3730
 *
3731
 * Returns the EntityValue parsed with reference substituted or NULL
3732
 */
3733
3734
xmlChar *
3735
9.74k
xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
3736
9.74k
    xmlChar *buf = NULL;
3737
9.74k
    int len = 0;
3738
9.74k
    int size = XML_PARSER_BUFFER_SIZE;
3739
9.74k
    int c, l;
3740
9.74k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3741
9.74k
                    XML_MAX_HUGE_LENGTH :
3742
9.74k
                    XML_MAX_TEXT_LENGTH;
3743
9.74k
    xmlChar stop;
3744
9.74k
    xmlChar *ret = NULL;
3745
9.74k
    const xmlChar *cur = NULL;
3746
9.74k
    xmlParserInputPtr input;
3747
3748
9.74k
    if (RAW == '"') stop = '"';
3749
8.76k
    else if (RAW == '\'') stop = '\'';
3750
0
    else {
3751
0
  xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_STARTED, NULL);
3752
0
  return(NULL);
3753
0
    }
3754
9.74k
    buf = (xmlChar *) xmlMallocAtomic(size);
3755
9.74k
    if (buf == NULL) {
3756
0
  xmlErrMemory(ctxt, NULL);
3757
0
  return(NULL);
3758
0
    }
3759
3760
    /*
3761
     * The content of the entity definition is copied in a buffer.
3762
     */
3763
3764
9.74k
    ctxt->instate = XML_PARSER_ENTITY_VALUE;
3765
9.74k
    input = ctxt->input;
3766
9.74k
    GROW;
3767
9.74k
    if (ctxt->instate == XML_PARSER_EOF)
3768
0
        goto error;
3769
9.74k
    NEXT;
3770
9.74k
    c = CUR_CHAR(l);
3771
    /*
3772
     * NOTE: 4.4.5 Included in Literal
3773
     * When a parameter entity reference appears in a literal entity
3774
     * value, ... a single or double quote character in the replacement
3775
     * text is always treated as a normal data character and will not
3776
     * terminate the literal.
3777
     * In practice it means we stop the loop only when back at parsing
3778
     * the initial entity and the quote is found
3779
     */
3780
25.3M
    while (((IS_CHAR(c)) && ((c != stop) || /* checked */
3781
25.3M
      (ctxt->input != input))) && (ctxt->instate != XML_PARSER_EOF)) {
3782
25.3M
  if (len + 5 >= size) {
3783
3.58k
      xmlChar *tmp;
3784
3785
3.58k
      size *= 2;
3786
3.58k
      tmp = (xmlChar *) xmlRealloc(buf, size);
3787
3.58k
      if (tmp == NULL) {
3788
0
    xmlErrMemory(ctxt, NULL);
3789
0
                goto error;
3790
0
      }
3791
3.58k
      buf = tmp;
3792
3.58k
  }
3793
25.3M
  COPY_BUF(l,buf,len,c);
3794
25.3M
  NEXTL(l);
3795
3796
25.3M
  GROW;
3797
25.3M
  c = CUR_CHAR(l);
3798
25.3M
  if (c == 0) {
3799
70
      GROW;
3800
70
      c = CUR_CHAR(l);
3801
70
  }
3802
3803
25.3M
        if (len > maxLength) {
3804
0
            xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_NOT_FINISHED,
3805
0
                           "entity value too long\n");
3806
0
            goto error;
3807
0
        }
3808
25.3M
    }
3809
9.74k
    buf[len] = 0;
3810
9.74k
    if (ctxt->instate == XML_PARSER_EOF)
3811
0
        goto error;
3812
9.74k
    if (c != stop) {
3813
121
        xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_FINISHED, NULL);
3814
121
        goto error;
3815
121
    }
3816
9.62k
    NEXT;
3817
3818
    /*
3819
     * Raise problem w.r.t. '&' and '%' being used in non-entities
3820
     * reference constructs. Note Charref will be handled in
3821
     * xmlStringDecodeEntities()
3822
     */
3823
9.62k
    cur = buf;
3824
72.2M
    while (*cur != 0) { /* non input consuming */
3825
72.2M
  if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) {
3826
15.0k
      xmlChar *name;
3827
15.0k
      xmlChar tmp = *cur;
3828
15.0k
            int nameOk = 0;
3829
3830
15.0k
      cur++;
3831
15.0k
      name = xmlParseStringName(ctxt, &cur);
3832
15.0k
            if (name != NULL) {
3833
14.0k
                nameOk = 1;
3834
14.0k
                xmlFree(name);
3835
14.0k
            }
3836
15.0k
            if ((nameOk == 0) || (*cur != ';')) {
3837
3.57k
    xmlFatalErrMsgInt(ctxt, XML_ERR_ENTITY_CHAR_ERROR,
3838
3.57k
      "EntityValue: '%c' forbidden except for entities references\n",
3839
3.57k
                            tmp);
3840
3.57k
                goto error;
3841
3.57k
      }
3842
11.4k
      if ((tmp == '%') && (ctxt->inSubset == 1) &&
3843
11.4k
    (ctxt->inputNr == 1)) {
3844
87
    xmlFatalErr(ctxt, XML_ERR_ENTITY_PE_INTERNAL, NULL);
3845
87
                goto error;
3846
87
      }
3847
11.3k
      if (*cur == 0)
3848
0
          break;
3849
11.3k
  }
3850
72.2M
  cur++;
3851
72.2M
    }
3852
3853
    /*
3854
     * Then PEReference entities are substituted.
3855
     *
3856
     * NOTE: 4.4.7 Bypassed
3857
     * When a general entity reference appears in the EntityValue in
3858
     * an entity declaration, it is bypassed and left as is.
3859
     * so XML_SUBSTITUTE_REF is not set here.
3860
     */
3861
5.96k
    ++ctxt->depth;
3862
5.96k
    ret = xmlStringDecodeEntitiesInt(ctxt, buf, len, XML_SUBSTITUTE_PEREF,
3863
5.96k
                                     0, 0, 0, /* check */ 1);
3864
5.96k
    --ctxt->depth;
3865
3866
5.96k
    if (orig != NULL) {
3867
5.96k
        *orig = buf;
3868
5.96k
        buf = NULL;
3869
5.96k
    }
3870
3871
9.74k
error:
3872
9.74k
    if (buf != NULL)
3873
3.78k
        xmlFree(buf);
3874
9.74k
    return(ret);
3875
5.96k
}
3876
3877
/**
3878
 * xmlParseAttValueComplex:
3879
 * @ctxt:  an XML parser context
3880
 * @len:   the resulting attribute len
3881
 * @normalize:  whether to apply the inner normalization
3882
 *
3883
 * parse a value for an attribute, this is the fallback function
3884
 * of xmlParseAttValue() when the attribute parsing requires handling
3885
 * of non-ASCII characters, or normalization compaction.
3886
 *
3887
 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
3888
 */
3889
static xmlChar *
3890
20.4k
xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *attlen, int normalize) {
3891
20.4k
    xmlChar limit = 0;
3892
20.4k
    xmlChar *buf = NULL;
3893
20.4k
    xmlChar *rep = NULL;
3894
20.4k
    size_t len = 0;
3895
20.4k
    size_t buf_size = 0;
3896
20.4k
    size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3897
20.4k
                       XML_MAX_HUGE_LENGTH :
3898
20.4k
                       XML_MAX_TEXT_LENGTH;
3899
20.4k
    int c, l, in_space = 0;
3900
20.4k
    xmlChar *current = NULL;
3901
20.4k
    xmlEntityPtr ent;
3902
3903
20.4k
    if (NXT(0) == '"') {
3904
16.9k
  ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3905
16.9k
  limit = '"';
3906
16.9k
        NEXT;
3907
16.9k
    } else if (NXT(0) == '\'') {
3908
3.44k
  limit = '\'';
3909
3.44k
  ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3910
3.44k
        NEXT;
3911
3.44k
    } else {
3912
0
  xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
3913
0
  return(NULL);
3914
0
    }
3915
3916
    /*
3917
     * allocate a translation buffer.
3918
     */
3919
20.4k
    buf_size = XML_PARSER_BUFFER_SIZE;
3920
20.4k
    buf = (xmlChar *) xmlMallocAtomic(buf_size);
3921
20.4k
    if (buf == NULL) goto mem_error;
3922
3923
    /*
3924
     * OK loop until we reach one of the ending char or a size limit.
3925
     */
3926
20.4k
    c = CUR_CHAR(l);
3927
36.9M
    while (((NXT(0) != limit) && /* checked */
3928
36.9M
            (IS_CHAR(c)) && (c != '<')) &&
3929
36.9M
            (ctxt->instate != XML_PARSER_EOF)) {
3930
36.8M
  if (c == '&') {
3931
204k
      in_space = 0;
3932
204k
      if (NXT(1) == '#') {
3933
32.8k
    int val = xmlParseCharRef(ctxt);
3934
3935
32.8k
    if (val == '&') {
3936
997
        if (ctxt->replaceEntities) {
3937
0
      if (len + 10 > buf_size) {
3938
0
          growBuffer(buf, 10);
3939
0
      }
3940
0
      buf[len++] = '&';
3941
997
        } else {
3942
      /*
3943
       * The reparsing will be done in xmlStringGetNodeList()
3944
       * called by the attribute() function in SAX.c
3945
       */
3946
997
      if (len + 10 > buf_size) {
3947
98
          growBuffer(buf, 10);
3948
98
      }
3949
997
      buf[len++] = '&';
3950
997
      buf[len++] = '#';
3951
997
      buf[len++] = '3';
3952
997
      buf[len++] = '8';
3953
997
      buf[len++] = ';';
3954
997
        }
3955
31.8k
    } else if (val != 0) {
3956
18.1k
        if (len + 10 > buf_size) {
3957
40
      growBuffer(buf, 10);
3958
40
        }
3959
18.1k
        len += xmlCopyChar(0, &buf[len], val);
3960
18.1k
    }
3961
171k
      } else {
3962
171k
    ent = xmlParseEntityRef(ctxt);
3963
171k
    if ((ent != NULL) &&
3964
171k
        (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
3965
30.2k
        if (len + 10 > buf_size) {
3966
1.58k
      growBuffer(buf, 10);
3967
1.58k
        }
3968
30.2k
        if ((ctxt->replaceEntities == 0) &&
3969
30.2k
            (ent->content[0] == '&')) {
3970
26.9k
      buf[len++] = '&';
3971
26.9k
      buf[len++] = '#';
3972
26.9k
      buf[len++] = '3';
3973
26.9k
      buf[len++] = '8';
3974
26.9k
      buf[len++] = ';';
3975
26.9k
        } else {
3976
3.36k
      buf[len++] = ent->content[0];
3977
3.36k
        }
3978
141k
    } else if ((ent != NULL) &&
3979
141k
               (ctxt->replaceEntities != 0)) {
3980
0
        if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) {
3981
0
                        if (xmlParserEntityCheck(ctxt, ent->length))
3982
0
                            goto error;
3983
3984
0
      ++ctxt->depth;
3985
0
      rep = xmlStringDecodeEntitiesInt(ctxt, ent->content,
3986
0
                                ent->length, XML_SUBSTITUTE_REF, 0, 0, 0,
3987
0
                                /* check */ 1);
3988
0
      --ctxt->depth;
3989
0
      if (rep != NULL) {
3990
0
          current = rep;
3991
0
          while (*current != 0) { /* non input consuming */
3992
0
                                if ((*current == 0xD) || (*current == 0xA) ||
3993
0
                                    (*current == 0x9)) {
3994
0
                                    buf[len++] = 0x20;
3995
0
                                    current++;
3996
0
                                } else
3997
0
                                    buf[len++] = *current++;
3998
0
        if (len + 10 > buf_size) {
3999
0
            growBuffer(buf, 10);
4000
0
        }
4001
0
          }
4002
0
          xmlFree(rep);
4003
0
          rep = NULL;
4004
0
      }
4005
0
        } else {
4006
0
      if (len + 10 > buf_size) {
4007
0
          growBuffer(buf, 10);
4008
0
      }
4009
0
      if (ent->content != NULL)
4010
0
          buf[len++] = ent->content[0];
4011
0
        }
4012
141k
    } else if (ent != NULL) {
4013
12.8k
        int i = xmlStrlen(ent->name);
4014
12.8k
        const xmlChar *cur = ent->name;
4015
4016
        /*
4017
                     * We also check for recursion and amplification
4018
                     * when entities are not substituted. They're
4019
                     * often expanded later.
4020
         */
4021
12.8k
        if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
4022
12.8k
      (ent->content != NULL)) {
4023
0
                        if ((ent->flags & XML_ENT_CHECKED) == 0) {
4024
0
                            unsigned long oldCopy = ctxt->sizeentcopy;
4025
4026
0
                            ctxt->sizeentcopy = ent->length;
4027
4028
0
                            ++ctxt->depth;
4029
0
                            rep = xmlStringDecodeEntitiesInt(ctxt,
4030
0
                                    ent->content, ent->length,
4031
0
                                    XML_SUBSTITUTE_REF, 0, 0, 0,
4032
0
                                    /* check */ 1);
4033
0
                            --ctxt->depth;
4034
4035
                            /*
4036
                             * If we're parsing DTD content, the entity
4037
                             * might reference other entities which
4038
                             * weren't defined yet, so the check isn't
4039
                             * reliable.
4040
                             */
4041
0
                            if (ctxt->inSubset == 0) {
4042
0
                                ent->flags |= XML_ENT_CHECKED;
4043
0
                                ent->expandedSize = ctxt->sizeentcopy;
4044
0
                            }
4045
4046
0
                            if (rep != NULL) {
4047
0
                                xmlFree(rep);
4048
0
                                rep = NULL;
4049
0
                            } else {
4050
0
                                ent->content[0] = 0;
4051
0
                            }
4052
4053
0
                            if (xmlParserEntityCheck(ctxt, oldCopy))
4054
0
                                goto error;
4055
0
                        } else {
4056
0
                            if (xmlParserEntityCheck(ctxt, ent->expandedSize))
4057
0
                                goto error;
4058
0
                        }
4059
0
        }
4060
4061
        /*
4062
         * Just output the reference
4063
         */
4064
12.8k
        buf[len++] = '&';
4065
13.2k
        while (len + i + 10 > buf_size) {
4066
660
      growBuffer(buf, i + 10);
4067
660
        }
4068
12.8k
        for (;i > 0;i--)
4069
0
      buf[len++] = *cur++;
4070
12.8k
        buf[len++] = ';';
4071
12.8k
    }
4072
171k
      }
4073
36.6M
  } else {
4074
36.6M
      if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) {
4075
531k
          if ((len != 0) || (!normalize)) {
4076
528k
        if ((!normalize) || (!in_space)) {
4077
520k
      COPY_BUF(l,buf,len,0x20);
4078
520k
      while (len + 10 > buf_size) {
4079
918
          growBuffer(buf, 10);
4080
918
      }
4081
520k
        }
4082
528k
        in_space = 1;
4083
528k
    }
4084
36.1M
      } else {
4085
36.1M
          in_space = 0;
4086
36.1M
    COPY_BUF(l,buf,len,c);
4087
36.1M
    if (len + 10 > buf_size) {
4088
11.6k
        growBuffer(buf, 10);
4089
11.6k
    }
4090
36.1M
      }
4091
36.6M
      NEXTL(l);
4092
36.6M
  }
4093
36.8M
  GROW;
4094
36.8M
  c = CUR_CHAR(l);
4095
36.8M
        if (len > maxLength) {
4096
0
            xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
4097
0
                           "AttValue length too long\n");
4098
0
            goto mem_error;
4099
0
        }
4100
36.8M
    }
4101
20.4k
    if (ctxt->instate == XML_PARSER_EOF)
4102
0
        goto error;
4103
4104
20.4k
    if ((in_space) && (normalize)) {
4105
1.90k
        while ((len > 0) && (buf[len - 1] == 0x20)) len--;
4106
664
    }
4107
20.4k
    buf[len] = 0;
4108
20.4k
    if (RAW == '<') {
4109
267
  xmlFatalErr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, NULL);
4110
20.1k
    } else if (RAW != limit) {
4111
2.51k
  if ((c != 0) && (!IS_CHAR(c))) {
4112
111
      xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR,
4113
111
         "invalid character in attribute value\n");
4114
2.40k
  } else {
4115
2.40k
      xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
4116
2.40k
         "AttValue: ' expected\n");
4117
2.40k
        }
4118
2.51k
    } else
4119
17.6k
  NEXT;
4120
4121
20.4k
    if (attlen != NULL) *attlen = len;
4122
20.4k
    return(buf);
4123
4124
0
mem_error:
4125
0
    xmlErrMemory(ctxt, NULL);
4126
0
error:
4127
0
    if (buf != NULL)
4128
0
        xmlFree(buf);
4129
0
    if (rep != NULL)
4130
0
        xmlFree(rep);
4131
0
    return(NULL);
4132
0
}
4133
4134
/**
4135
 * xmlParseAttValue:
4136
 * @ctxt:  an XML parser context
4137
 *
4138
 * DEPRECATED: Internal function, don't use.
4139
 *
4140
 * parse a value for an attribute
4141
 * Note: the parser won't do substitution of entities here, this
4142
 * will be handled later in xmlStringGetNodeList
4143
 *
4144
 * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
4145
 *                   "'" ([^<&'] | Reference)* "'"
4146
 *
4147
 * 3.3.3 Attribute-Value Normalization:
4148
 * Before the value of an attribute is passed to the application or
4149
 * checked for validity, the XML processor must normalize it as follows:
4150
 * - a character reference is processed by appending the referenced
4151
 *   character to the attribute value
4152
 * - an entity reference is processed by recursively processing the
4153
 *   replacement text of the entity
4154
 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
4155
 *   appending #x20 to the normalized value, except that only a single
4156
 *   #x20 is appended for a "#xD#xA" sequence that is part of an external
4157
 *   parsed entity or the literal entity value of an internal parsed entity
4158
 * - other characters are processed by appending them to the normalized value
4159
 * If the declared value is not CDATA, then the XML processor must further
4160
 * process the normalized attribute value by discarding any leading and
4161
 * trailing space (#x20) characters, and by replacing sequences of space
4162
 * (#x20) characters by a single space (#x20) character.
4163
 * All attributes for which no declaration has been read should be treated
4164
 * by a non-validating parser as if declared CDATA.
4165
 *
4166
 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
4167
 */
4168
4169
4170
xmlChar *
4171
13.5k
xmlParseAttValue(xmlParserCtxtPtr ctxt) {
4172
13.5k
    if ((ctxt == NULL) || (ctxt->input == NULL)) return(NULL);
4173
13.5k
    return(xmlParseAttValueInternal(ctxt, NULL, NULL, 0));
4174
13.5k
}
4175
4176
/**
4177
 * xmlParseSystemLiteral:
4178
 * @ctxt:  an XML parser context
4179
 *
4180
 * DEPRECATED: Internal function, don't use.
4181
 *
4182
 * parse an XML Literal
4183
 *
4184
 * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
4185
 *
4186
 * Returns the SystemLiteral parsed or NULL
4187
 */
4188
4189
xmlChar *
4190
3.27k
xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
4191
3.27k
    xmlChar *buf = NULL;
4192
3.27k
    int len = 0;
4193
3.27k
    int size = XML_PARSER_BUFFER_SIZE;
4194
3.27k
    int cur, l;
4195
3.27k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4196
3.27k
                    XML_MAX_TEXT_LENGTH :
4197
3.27k
                    XML_MAX_NAME_LENGTH;
4198
3.27k
    xmlChar stop;
4199
3.27k
    int state = ctxt->instate;
4200
4201
3.27k
    if (RAW == '"') {
4202
464
        NEXT;
4203
464
  stop = '"';
4204
2.81k
    } else if (RAW == '\'') {
4205
1.88k
        NEXT;
4206
1.88k
  stop = '\'';
4207
1.88k
    } else {
4208
933
  xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
4209
933
  return(NULL);
4210
933
    }
4211
4212
2.34k
    buf = (xmlChar *) xmlMallocAtomic(size);
4213
2.34k
    if (buf == NULL) {
4214
0
        xmlErrMemory(ctxt, NULL);
4215
0
  return(NULL);
4216
0
    }
4217
2.34k
    ctxt->instate = XML_PARSER_SYSTEM_LITERAL;
4218
2.34k
    cur = CUR_CHAR(l);
4219
16.8M
    while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */
4220
16.8M
  if (len + 5 >= size) {
4221
457
      xmlChar *tmp;
4222
4223
457
      size *= 2;
4224
457
      tmp = (xmlChar *) xmlRealloc(buf, size);
4225
457
      if (tmp == NULL) {
4226
0
          xmlFree(buf);
4227
0
    xmlErrMemory(ctxt, NULL);
4228
0
    ctxt->instate = (xmlParserInputState) state;
4229
0
    return(NULL);
4230
0
      }
4231
457
      buf = tmp;
4232
457
  }
4233
16.8M
  COPY_BUF(l,buf,len,cur);
4234
16.8M
        if (len > maxLength) {
4235
2
            xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "SystemLiteral");
4236
2
            xmlFree(buf);
4237
2
            ctxt->instate = (xmlParserInputState) state;
4238
2
            return(NULL);
4239
2
        }
4240
16.8M
  NEXTL(l);
4241
16.8M
  cur = CUR_CHAR(l);
4242
16.8M
    }
4243
2.34k
    buf[len] = 0;
4244
2.34k
    if (ctxt->instate == XML_PARSER_EOF) {
4245
0
        xmlFree(buf);
4246
0
        return(NULL);
4247
0
    }
4248
2.34k
    ctxt->instate = (xmlParserInputState) state;
4249
2.34k
    if (!IS_CHAR(cur)) {
4250
141
  xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
4251
2.20k
    } else {
4252
2.20k
  NEXT;
4253
2.20k
    }
4254
2.34k
    return(buf);
4255
2.34k
}
4256
4257
/**
4258
 * xmlParsePubidLiteral:
4259
 * @ctxt:  an XML parser context
4260
 *
4261
 * DEPRECATED: Internal function, don't use.
4262
 *
4263
 * parse an XML public literal
4264
 *
4265
 * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
4266
 *
4267
 * Returns the PubidLiteral parsed or NULL.
4268
 */
4269
4270
xmlChar *
4271
1.63k
xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
4272
1.63k
    xmlChar *buf = NULL;
4273
1.63k
    int len = 0;
4274
1.63k
    int size = XML_PARSER_BUFFER_SIZE;
4275
1.63k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4276
1.63k
                    XML_MAX_TEXT_LENGTH :
4277
1.63k
                    XML_MAX_NAME_LENGTH;
4278
1.63k
    xmlChar cur;
4279
1.63k
    xmlChar stop;
4280
1.63k
    xmlParserInputState oldstate = ctxt->instate;
4281
4282
1.63k
    if (RAW == '"') {
4283
512
        NEXT;
4284
512
  stop = '"';
4285
1.12k
    } else if (RAW == '\'') {
4286
825
        NEXT;
4287
825
  stop = '\'';
4288
825
    } else {
4289
302
  xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
4290
302
  return(NULL);
4291
302
    }
4292
1.33k
    buf = (xmlChar *) xmlMallocAtomic(size);
4293
1.33k
    if (buf == NULL) {
4294
0
  xmlErrMemory(ctxt, NULL);
4295
0
  return(NULL);
4296
0
    }
4297
1.33k
    ctxt->instate = XML_PARSER_PUBLIC_LITERAL;
4298
1.33k
    cur = CUR;
4299
78.4k
    while ((IS_PUBIDCHAR_CH(cur)) && (cur != stop)) { /* checked */
4300
77.1k
  if (len + 1 >= size) {
4301
302
      xmlChar *tmp;
4302
4303
302
      size *= 2;
4304
302
      tmp = (xmlChar *) xmlRealloc(buf, size);
4305
302
      if (tmp == NULL) {
4306
0
    xmlErrMemory(ctxt, NULL);
4307
0
    xmlFree(buf);
4308
0
    return(NULL);
4309
0
      }
4310
302
      buf = tmp;
4311
302
  }
4312
77.1k
  buf[len++] = cur;
4313
77.1k
        if (len > maxLength) {
4314
0
            xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Public ID");
4315
0
            xmlFree(buf);
4316
0
            return(NULL);
4317
0
        }
4318
77.1k
  NEXT;
4319
77.1k
  cur = CUR;
4320
77.1k
    }
4321
1.33k
    buf[len] = 0;
4322
1.33k
    if (ctxt->instate == XML_PARSER_EOF) {
4323
0
        xmlFree(buf);
4324
0
        return(NULL);
4325
0
    }
4326
1.33k
    if (cur != stop) {
4327
493
  xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
4328
844
    } else {
4329
844
  NEXTL(1);
4330
844
    }
4331
1.33k
    ctxt->instate = oldstate;
4332
1.33k
    return(buf);
4333
1.33k
}
4334
4335
static void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int partial);
4336
4337
/*
4338
 * used for the test in the inner loop of the char data testing
4339
 */
4340
static const unsigned char test_char_data[256] = {
4341
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4342
    0x00, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x9, CR/LF separated */
4343
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4344
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4345
    0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x00, 0x27, /* & */
4346
    0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F,
4347
    0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
4348
    0x38, 0x39, 0x3A, 0x3B, 0x00, 0x3D, 0x3E, 0x3F, /* < */
4349
    0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
4350
    0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F,
4351
    0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
4352
    0x58, 0x59, 0x5A, 0x5B, 0x5C, 0x00, 0x5E, 0x5F, /* ] */
4353
    0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
4354
    0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
4355
    0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
4356
    0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F,
4357
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* non-ascii */
4358
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4359
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4360
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4361
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4362
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4363
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4364
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4365
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4366
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4367
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4368
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4369
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4370
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4371
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4372
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
4373
};
4374
4375
/**
4376
 * xmlParseCharDataInternal:
4377
 * @ctxt:  an XML parser context
4378
 * @partial:  buffer may contain partial UTF-8 sequences
4379
 *
4380
 * Parse character data. Always makes progress if the first char isn't
4381
 * '<' or '&'.
4382
 *
4383
 * The right angle bracket (>) may be represented using the string "&gt;",
4384
 * and must, for compatibility, be escaped using "&gt;" or a character
4385
 * reference when it appears in the string "]]>" in content, when that
4386
 * string is not marking the end of a CDATA section.
4387
 *
4388
 * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
4389
 */
4390
static void
4391
556k
xmlParseCharDataInternal(xmlParserCtxtPtr ctxt, int partial) {
4392
556k
    const xmlChar *in;
4393
556k
    int nbchar = 0;
4394
556k
    int line = ctxt->input->line;
4395
556k
    int col = ctxt->input->col;
4396
556k
    int ccol;
4397
4398
556k
    GROW;
4399
    /*
4400
     * Accelerated common case where input don't need to be
4401
     * modified before passing it to the handler.
4402
     */
4403
556k
    in = ctxt->input->cur;
4404
641k
    do {
4405
1.03M
get_more_space:
4406
1.48M
        while (*in == 0x20) { in++; ctxt->input->col++; }
4407
1.03M
        if (*in == 0xA) {
4408
417k
            do {
4409
417k
                ctxt->input->line++; ctxt->input->col = 1;
4410
417k
                in++;
4411
417k
            } while (*in == 0xA);
4412
395k
            goto get_more_space;
4413
395k
        }
4414
641k
        if (*in == '<') {
4415
267k
            nbchar = in - ctxt->input->cur;
4416
267k
            if (nbchar > 0) {
4417
267k
                const xmlChar *tmp = ctxt->input->cur;
4418
267k
                ctxt->input->cur = in;
4419
4420
267k
                if ((ctxt->sax != NULL) &&
4421
267k
                    (ctxt->sax->ignorableWhitespace !=
4422
267k
                     ctxt->sax->characters)) {
4423
267k
                    if (areBlanks(ctxt, tmp, nbchar, 1)) {
4424
0
                        if (ctxt->sax->ignorableWhitespace != NULL)
4425
0
                            ctxt->sax->ignorableWhitespace(ctxt->userData,
4426
0
                                                   tmp, nbchar);
4427
267k
                    } else {
4428
267k
                        if (ctxt->sax->characters != NULL)
4429
267k
                            ctxt->sax->characters(ctxt->userData,
4430
267k
                                                  tmp, nbchar);
4431
267k
                        if (*ctxt->space == -1)
4432
22.8k
                            *ctxt->space = -2;
4433
267k
                    }
4434
267k
                } else if ((ctxt->sax != NULL) &&
4435
0
                           (ctxt->sax->characters != NULL)) {
4436
0
                    ctxt->sax->characters(ctxt->userData,
4437
0
                                          tmp, nbchar);
4438
0
                }
4439
267k
            }
4440
267k
            return;
4441
267k
        }
4442
4443
499k
get_more:
4444
499k
        ccol = ctxt->input->col;
4445
4.63M
        while (test_char_data[*in]) {
4446
4.13M
            in++;
4447
4.13M
            ccol++;
4448
4.13M
        }
4449
499k
        ctxt->input->col = ccol;
4450
499k
        if (*in == 0xA) {
4451
116k
            do {
4452
116k
                ctxt->input->line++; ctxt->input->col = 1;
4453
116k
                in++;
4454
116k
            } while (*in == 0xA);
4455
112k
            goto get_more;
4456
112k
        }
4457
386k
        if (*in == ']') {
4458
12.1k
            if ((in[1] == ']') && (in[2] == '>')) {
4459
3
                xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
4460
3
                if (ctxt->instate != XML_PARSER_EOF)
4461
3
                    ctxt->input->cur = in + 1;
4462
3
                return;
4463
3
            }
4464
12.1k
            in++;
4465
12.1k
            ctxt->input->col++;
4466
12.1k
            goto get_more;
4467
12.1k
        }
4468
374k
        nbchar = in - ctxt->input->cur;
4469
374k
        if (nbchar > 0) {
4470
361k
            if ((ctxt->sax != NULL) &&
4471
361k
                (ctxt->sax->ignorableWhitespace !=
4472
361k
                 ctxt->sax->characters) &&
4473
361k
                (IS_BLANK_CH(*ctxt->input->cur))) {
4474
177k
                const xmlChar *tmp = ctxt->input->cur;
4475
177k
                ctxt->input->cur = in;
4476
4477
177k
                if (areBlanks(ctxt, tmp, nbchar, 0)) {
4478
0
                    if (ctxt->sax->ignorableWhitespace != NULL)
4479
0
                        ctxt->sax->ignorableWhitespace(ctxt->userData,
4480
0
                                                       tmp, nbchar);
4481
177k
                } else {
4482
177k
                    if (ctxt->sax->characters != NULL)
4483
177k
                        ctxt->sax->characters(ctxt->userData,
4484
177k
                                              tmp, nbchar);
4485
177k
                    if (*ctxt->space == -1)
4486
16.6k
                        *ctxt->space = -2;
4487
177k
                }
4488
177k
                line = ctxt->input->line;
4489
177k
                col = ctxt->input->col;
4490
183k
            } else if (ctxt->sax != NULL) {
4491
183k
                if (ctxt->sax->characters != NULL)
4492
183k
                    ctxt->sax->characters(ctxt->userData,
4493
183k
                                          ctxt->input->cur, nbchar);
4494
183k
                line = ctxt->input->line;
4495
183k
                col = ctxt->input->col;
4496
183k
            }
4497
361k
        }
4498
374k
        ctxt->input->cur = in;
4499
374k
        if (*in == 0xD) {
4500
96.0k
            in++;
4501
96.0k
            if (*in == 0xA) {
4502
89.2k
                ctxt->input->cur = in;
4503
89.2k
                in++;
4504
89.2k
                ctxt->input->line++; ctxt->input->col = 1;
4505
89.2k
                continue; /* while */
4506
89.2k
            }
4507
6.88k
            in--;
4508
6.88k
        }
4509
285k
        if (*in == '<') {
4510
268k
            return;
4511
268k
        }
4512
16.7k
        if (*in == '&') {
4513
5.51k
            return;
4514
5.51k
        }
4515
11.1k
        SHRINK;
4516
11.1k
        GROW;
4517
11.1k
        if (ctxt->instate == XML_PARSER_EOF)
4518
0
            return;
4519
11.1k
        in = ctxt->input->cur;
4520
100k
    } while (((*in >= 0x20) && (*in <= 0x7F)) ||
4521
100k
             (*in == 0x09) || (*in == 0x0a));
4522
14.9k
    ctxt->input->line = line;
4523
14.9k
    ctxt->input->col = col;
4524
14.9k
    xmlParseCharDataComplex(ctxt, partial);
4525
14.9k
}
4526
4527
/**
4528
 * xmlParseCharDataComplex:
4529
 * @ctxt:  an XML parser context
4530
 * @cdata:  int indicating whether we are within a CDATA section
4531
 *
4532
 * Always makes progress if the first char isn't '<' or '&'.
4533
 *
4534
 * parse a CharData section.this is the fallback function
4535
 * of xmlParseCharData() when the parsing requires handling
4536
 * of non-ASCII characters.
4537
 */
4538
static void
4539
14.9k
xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int partial) {
4540
14.9k
    xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
4541
14.9k
    int nbchar = 0;
4542
14.9k
    int cur, l;
4543
4544
14.9k
    cur = CUR_CHAR(l);
4545
4.07M
    while ((cur != '<') && /* checked */
4546
4.07M
           (cur != '&') &&
4547
4.07M
     (IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ {
4548
4.05M
  if ((cur == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
4549
311
      xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
4550
311
  }
4551
4.05M
  COPY_BUF(l,buf,nbchar,cur);
4552
  /* move current position before possible calling of ctxt->sax->characters */
4553
4.05M
  NEXTL(l);
4554
4.05M
  if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
4555
31.4k
      buf[nbchar] = 0;
4556
4557
      /*
4558
       * OK the segment is to be consumed as chars.
4559
       */
4560
31.4k
      if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
4561
31.2k
    if (areBlanks(ctxt, buf, nbchar, 0)) {
4562
0
        if (ctxt->sax->ignorableWhitespace != NULL)
4563
0
      ctxt->sax->ignorableWhitespace(ctxt->userData,
4564
0
                                     buf, nbchar);
4565
31.2k
    } else {
4566
31.2k
        if (ctxt->sax->characters != NULL)
4567
31.2k
      ctxt->sax->characters(ctxt->userData, buf, nbchar);
4568
31.2k
        if ((ctxt->sax->characters !=
4569
31.2k
             ctxt->sax->ignorableWhitespace) &&
4570
31.2k
      (*ctxt->space == -1))
4571
381
      *ctxt->space = -2;
4572
31.2k
    }
4573
31.2k
      }
4574
31.4k
      nbchar = 0;
4575
            /* something really bad happened in the SAX callback */
4576
31.4k
            if (ctxt->instate != XML_PARSER_CONTENT)
4577
0
                return;
4578
31.4k
            SHRINK;
4579
31.4k
  }
4580
4.05M
  cur = CUR_CHAR(l);
4581
4.05M
    }
4582
14.9k
    if (ctxt->instate == XML_PARSER_EOF)
4583
0
        return;
4584
14.9k
    if (nbchar != 0) {
4585
14.6k
        buf[nbchar] = 0;
4586
  /*
4587
   * OK the segment is to be consumed as chars.
4588
   */
4589
14.6k
  if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
4590
14.4k
      if (areBlanks(ctxt, buf, nbchar, 0)) {
4591
0
    if (ctxt->sax->ignorableWhitespace != NULL)
4592
0
        ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
4593
14.4k
      } else {
4594
14.4k
    if (ctxt->sax->characters != NULL)
4595
14.4k
        ctxt->sax->characters(ctxt->userData, buf, nbchar);
4596
14.4k
    if ((ctxt->sax->characters != ctxt->sax->ignorableWhitespace) &&
4597
14.4k
        (*ctxt->space == -1))
4598
5.17k
        *ctxt->space = -2;
4599
14.4k
      }
4600
14.4k
  }
4601
14.6k
    }
4602
    /*
4603
     * cur == 0 can mean
4604
     *
4605
     * - XML_PARSER_EOF or memory error. This is checked above.
4606
     * - An actual 0 character.
4607
     * - End of buffer.
4608
     * - An incomplete UTF-8 sequence. This is allowed if partial is set.
4609
     */
4610
14.9k
    if (ctxt->input->cur < ctxt->input->end) {
4611
14.0k
        if ((cur == 0) && (CUR != 0)) {
4612
123
            if (partial == 0) {
4613
13
                xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4614
13
                        "Incomplete UTF-8 sequence starting with %02X\n", CUR);
4615
13
                NEXTL(1);
4616
13
            }
4617
13.8k
        } else if ((cur != '<') && (cur != '&')) {
4618
            /* Generate the error and skip the offending character */
4619
85
            xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4620
85
                              "PCDATA invalid Char value %d\n", cur);
4621
85
            NEXTL(l);
4622
85
        }
4623
14.0k
    }
4624
14.9k
}
4625
4626
/**
4627
 * xmlParseCharData:
4628
 * @ctxt:  an XML parser context
4629
 * @cdata:  unused
4630
 *
4631
 * DEPRECATED: Internal function, don't use.
4632
 */
4633
void
4634
0
xmlParseCharData(xmlParserCtxtPtr ctxt, ATTRIBUTE_UNUSED int cdata) {
4635
0
    xmlParseCharDataInternal(ctxt, 0);
4636
0
}
4637
4638
/**
4639
 * xmlParseExternalID:
4640
 * @ctxt:  an XML parser context
4641
 * @publicID:  a xmlChar** receiving PubidLiteral
4642
 * @strict: indicate whether we should restrict parsing to only
4643
 *          production [75], see NOTE below
4644
 *
4645
 * DEPRECATED: Internal function, don't use.
4646
 *
4647
 * Parse an External ID or a Public ID
4648
 *
4649
 * NOTE: Productions [75] and [83] interact badly since [75] can generate
4650
 *       'PUBLIC' S PubidLiteral S SystemLiteral
4651
 *
4652
 * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
4653
 *                   | 'PUBLIC' S PubidLiteral S SystemLiteral
4654
 *
4655
 * [83] PublicID ::= 'PUBLIC' S PubidLiteral
4656
 *
4657
 * Returns the function returns SystemLiteral and in the second
4658
 *                case publicID receives PubidLiteral, is strict is off
4659
 *                it is possible to return NULL and have publicID set.
4660
 */
4661
4662
xmlChar *
4663
8.05k
xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
4664
8.05k
    xmlChar *URI = NULL;
4665
4666
8.05k
    *publicID = NULL;
4667
8.05k
    if (CMP6(CUR_PTR, 'S', 'Y', 'S', 'T', 'E', 'M')) {
4668
2.32k
        SKIP(6);
4669
2.32k
  if (SKIP_BLANKS == 0) {
4670
734
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4671
734
                     "Space required after 'SYSTEM'\n");
4672
734
  }
4673
2.32k
  URI = xmlParseSystemLiteral(ctxt);
4674
2.32k
  if (URI == NULL) {
4675
350
      xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
4676
350
        }
4677
5.72k
    } else if (CMP6(CUR_PTR, 'P', 'U', 'B', 'L', 'I', 'C')) {
4678
1.63k
        SKIP(6);
4679
1.63k
  if (SKIP_BLANKS == 0) {
4680
106
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4681
106
        "Space required after 'PUBLIC'\n");
4682
106
  }
4683
1.63k
  *publicID = xmlParsePubidLiteral(ctxt);
4684
1.63k
  if (*publicID == NULL) {
4685
302
      xmlFatalErr(ctxt, XML_ERR_PUBID_REQUIRED, NULL);
4686
302
  }
4687
1.63k
  if (strict) {
4688
      /*
4689
       * We don't handle [83] so "S SystemLiteral" is required.
4690
       */
4691
813
      if (SKIP_BLANKS == 0) {
4692
595
    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4693
595
      "Space required after the Public Identifier\n");
4694
595
      }
4695
826
  } else {
4696
      /*
4697
       * We handle [83] so we return immediately, if
4698
       * "S SystemLiteral" is not detected. We skip blanks if no
4699
             * system literal was found, but this is harmless since we must
4700
             * be at the end of a NotationDecl.
4701
       */
4702
826
      if (SKIP_BLANKS == 0) return(NULL);
4703
307
      if ((CUR != '\'') && (CUR != '"')) return(NULL);
4704
307
  }
4705
951
  URI = xmlParseSystemLiteral(ctxt);
4706
951
  if (URI == NULL) {
4707
585
      xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
4708
585
        }
4709
951
    }
4710
7.36k
    return(URI);
4711
8.05k
}
4712
4713
/**
4714
 * xmlParseCommentComplex:
4715
 * @ctxt:  an XML parser context
4716
 * @buf:  the already parsed part of the buffer
4717
 * @len:  number of bytes in the buffer
4718
 * @size:  allocated size of the buffer
4719
 *
4720
 * Skip an XML (SGML) comment <!-- .... -->
4721
 *  The spec says that "For compatibility, the string "--" (double-hyphen)
4722
 *  must not occur within comments. "
4723
 * This is the slow routine in case the accelerator for ascii didn't work
4724
 *
4725
 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
4726
 */
4727
static void
4728
xmlParseCommentComplex(xmlParserCtxtPtr ctxt, xmlChar *buf,
4729
46.8k
                       size_t len, size_t size) {
4730
46.8k
    int q, ql;
4731
46.8k
    int r, rl;
4732
46.8k
    int cur, l;
4733
46.8k
    size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4734
46.8k
                       XML_MAX_HUGE_LENGTH :
4735
46.8k
                       XML_MAX_TEXT_LENGTH;
4736
46.8k
    int inputid;
4737
4738
46.8k
    inputid = ctxt->input->id;
4739
4740
46.8k
    if (buf == NULL) {
4741
46.8k
        len = 0;
4742
46.8k
  size = XML_PARSER_BUFFER_SIZE;
4743
46.8k
  buf = (xmlChar *) xmlMallocAtomic(size);
4744
46.8k
  if (buf == NULL) {
4745
0
      xmlErrMemory(ctxt, NULL);
4746
0
      return;
4747
0
  }
4748
46.8k
    }
4749
46.8k
    q = CUR_CHAR(ql);
4750
46.8k
    if (q == 0)
4751
76
        goto not_terminated;
4752
46.7k
    if (!IS_CHAR(q)) {
4753
27
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4754
27
                          "xmlParseComment: invalid xmlChar value %d\n",
4755
27
                    q);
4756
27
  xmlFree (buf);
4757
27
  return;
4758
27
    }
4759
46.7k
    NEXTL(ql);
4760
46.7k
    r = CUR_CHAR(rl);
4761
46.7k
    if (r == 0)
4762
41
        goto not_terminated;
4763
46.7k
    if (!IS_CHAR(r)) {
4764
25
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4765
25
                          "xmlParseComment: invalid xmlChar value %d\n",
4766
25
                    r);
4767
25
  xmlFree (buf);
4768
25
  return;
4769
25
    }
4770
46.6k
    NEXTL(rl);
4771
46.6k
    cur = CUR_CHAR(l);
4772
46.6k
    if (cur == 0)
4773
37
        goto not_terminated;
4774
26.4M
    while (IS_CHAR(cur) && /* checked */
4775
26.4M
           ((cur != '>') ||
4776
26.4M
      (r != '-') || (q != '-'))) {
4777
26.4M
  if ((r == '-') && (q == '-')) {
4778
875
      xmlFatalErr(ctxt, XML_ERR_HYPHEN_IN_COMMENT, NULL);
4779
875
  }
4780
26.4M
  if (len + 5 >= size) {
4781
3.24k
      xmlChar *new_buf;
4782
3.24k
            size_t new_size;
4783
4784
3.24k
      new_size = size * 2;
4785
3.24k
      new_buf = (xmlChar *) xmlRealloc(buf, new_size);
4786
3.24k
      if (new_buf == NULL) {
4787
0
    xmlFree (buf);
4788
0
    xmlErrMemory(ctxt, NULL);
4789
0
    return;
4790
0
      }
4791
3.24k
      buf = new_buf;
4792
3.24k
            size = new_size;
4793
3.24k
  }
4794
26.4M
  COPY_BUF(ql,buf,len,q);
4795
26.4M
        if (len > maxLength) {
4796
0
            xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4797
0
                         "Comment too big found", NULL);
4798
0
            xmlFree (buf);
4799
0
            return;
4800
0
        }
4801
4802
26.4M
  q = r;
4803
26.4M
  ql = rl;
4804
26.4M
  r = cur;
4805
26.4M
  rl = l;
4806
4807
26.4M
  NEXTL(l);
4808
26.4M
  cur = CUR_CHAR(l);
4809
4810
26.4M
    }
4811
46.6k
    buf[len] = 0;
4812
46.6k
    if (ctxt->instate == XML_PARSER_EOF) {
4813
0
        xmlFree(buf);
4814
0
        return;
4815
0
    }
4816
46.6k
    if (cur == 0) {
4817
160
  xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4818
160
                       "Comment not terminated \n<!--%.50s\n", buf);
4819
46.4k
    } else if (!IS_CHAR(cur)) {
4820
49
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4821
49
                          "xmlParseComment: invalid xmlChar value %d\n",
4822
49
                    cur);
4823
46.4k
    } else {
4824
46.4k
  if (inputid != ctxt->input->id) {
4825
0
      xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4826
0
               "Comment doesn't start and stop in the same"
4827
0
                           " entity\n");
4828
0
  }
4829
46.4k
        NEXT;
4830
46.4k
  if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
4831
46.4k
      (!ctxt->disableSAX))
4832
0
      ctxt->sax->comment(ctxt->userData, buf);
4833
46.4k
    }
4834
46.6k
    xmlFree(buf);
4835
46.6k
    return;
4836
154
not_terminated:
4837
154
    xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4838
154
       "Comment not terminated\n", NULL);
4839
154
    xmlFree(buf);
4840
154
    return;
4841
46.6k
}
4842
4843
/**
4844
 * xmlParseComment:
4845
 * @ctxt:  an XML parser context
4846
 *
4847
 * DEPRECATED: Internal function, don't use.
4848
 *
4849
 * Parse an XML (SGML) comment. Always consumes '<!'.
4850
 *
4851
 *  The spec says that "For compatibility, the string "--" (double-hyphen)
4852
 *  must not occur within comments. "
4853
 *
4854
 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
4855
 */
4856
void
4857
73.7k
xmlParseComment(xmlParserCtxtPtr ctxt) {
4858
73.7k
    xmlChar *buf = NULL;
4859
73.7k
    size_t size = XML_PARSER_BUFFER_SIZE;
4860
73.7k
    size_t len = 0;
4861
73.7k
    size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4862
73.7k
                       XML_MAX_HUGE_LENGTH :
4863
73.7k
                       XML_MAX_TEXT_LENGTH;
4864
73.7k
    xmlParserInputState state;
4865
73.7k
    const xmlChar *in;
4866
73.7k
    size_t nbchar = 0;
4867
73.7k
    int ccol;
4868
73.7k
    int inputid;
4869
4870
    /*
4871
     * Check that there is a comment right here.
4872
     */
4873
73.7k
    if ((RAW != '<') || (NXT(1) != '!'))
4874
0
        return;
4875
73.7k
    SKIP(2);
4876
73.7k
    if ((RAW != '-') || (NXT(1) != '-'))
4877
5
        return;
4878
73.7k
    state = ctxt->instate;
4879
73.7k
    ctxt->instate = XML_PARSER_COMMENT;
4880
73.7k
    inputid = ctxt->input->id;
4881
73.7k
    SKIP(2);
4882
73.7k
    GROW;
4883
4884
    /*
4885
     * Accelerated common case where input don't need to be
4886
     * modified before passing it to the handler.
4887
     */
4888
73.7k
    in = ctxt->input->cur;
4889
73.7k
    do {
4890
73.7k
  if (*in == 0xA) {
4891
22.5k
      do {
4892
22.5k
    ctxt->input->line++; ctxt->input->col = 1;
4893
22.5k
    in++;
4894
22.5k
      } while (*in == 0xA);
4895
21.6k
  }
4896
233k
get_more:
4897
233k
        ccol = ctxt->input->col;
4898
5.44M
  while (((*in > '-') && (*in <= 0x7F)) ||
4899
5.44M
         ((*in >= 0x20) && (*in < '-')) ||
4900
5.44M
         (*in == 0x09)) {
4901
5.20M
        in++;
4902
5.20M
        ccol++;
4903
5.20M
  }
4904
233k
  ctxt->input->col = ccol;
4905
233k
  if (*in == 0xA) {
4906
228k
      do {
4907
228k
    ctxt->input->line++; ctxt->input->col = 1;
4908
228k
    in++;
4909
228k
      } while (*in == 0xA);
4910
117k
      goto get_more;
4911
117k
  }
4912
116k
  nbchar = in - ctxt->input->cur;
4913
  /*
4914
   * save current set of data
4915
   */
4916
116k
  if (nbchar > 0) {
4917
107k
      if ((ctxt->sax != NULL) &&
4918
107k
    (ctxt->sax->comment != NULL)) {
4919
0
    if (buf == NULL) {
4920
0
        if ((*in == '-') && (in[1] == '-'))
4921
0
            size = nbchar + 1;
4922
0
        else
4923
0
            size = XML_PARSER_BUFFER_SIZE + nbchar;
4924
0
        buf = (xmlChar *) xmlMallocAtomic(size);
4925
0
        if (buf == NULL) {
4926
0
            xmlErrMemory(ctxt, NULL);
4927
0
      ctxt->instate = state;
4928
0
      return;
4929
0
        }
4930
0
        len = 0;
4931
0
    } else if (len + nbchar + 1 >= size) {
4932
0
        xmlChar *new_buf;
4933
0
        size  += len + nbchar + XML_PARSER_BUFFER_SIZE;
4934
0
        new_buf = (xmlChar *) xmlRealloc(buf, size);
4935
0
        if (new_buf == NULL) {
4936
0
            xmlFree (buf);
4937
0
      xmlErrMemory(ctxt, NULL);
4938
0
      ctxt->instate = state;
4939
0
      return;
4940
0
        }
4941
0
        buf = new_buf;
4942
0
    }
4943
0
    memcpy(&buf[len], ctxt->input->cur, nbchar);
4944
0
    len += nbchar;
4945
0
    buf[len] = 0;
4946
0
      }
4947
107k
  }
4948
116k
        if (len > maxLength) {
4949
0
            xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4950
0
                         "Comment too big found", NULL);
4951
0
            xmlFree (buf);
4952
0
            return;
4953
0
        }
4954
116k
  ctxt->input->cur = in;
4955
116k
  if (*in == 0xA) {
4956
0
      in++;
4957
0
      ctxt->input->line++; ctxt->input->col = 1;
4958
0
  }
4959
116k
  if (*in == 0xD) {
4960
44.0k
      in++;
4961
44.0k
      if (*in == 0xA) {
4962
395
    ctxt->input->cur = in;
4963
395
    in++;
4964
395
    ctxt->input->line++; ctxt->input->col = 1;
4965
395
    goto get_more;
4966
395
      }
4967
43.6k
      in--;
4968
43.6k
  }
4969
115k
  SHRINK;
4970
115k
  GROW;
4971
115k
        if (ctxt->instate == XML_PARSER_EOF) {
4972
0
            xmlFree(buf);
4973
0
            return;
4974
0
        }
4975
115k
  in = ctxt->input->cur;
4976
115k
  if (*in == '-') {
4977
68.7k
      if (in[1] == '-') {
4978
27.6k
          if (in[2] == '>') {
4979
26.9k
        if (ctxt->input->id != inputid) {
4980
0
      xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4981
0
                     "comment doesn't start and stop in the"
4982
0
                                       " same entity\n");
4983
0
        }
4984
26.9k
        SKIP(3);
4985
26.9k
        if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
4986
26.9k
            (!ctxt->disableSAX)) {
4987
0
      if (buf != NULL)
4988
0
          ctxt->sax->comment(ctxt->userData, buf);
4989
0
      else
4990
0
          ctxt->sax->comment(ctxt->userData, BAD_CAST "");
4991
0
        }
4992
26.9k
        if (buf != NULL)
4993
0
            xmlFree(buf);
4994
26.9k
        if (ctxt->instate != XML_PARSER_EOF)
4995
26.9k
      ctxt->instate = state;
4996
26.9k
        return;
4997
26.9k
    }
4998
688
    if (buf != NULL) {
4999
0
        xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT,
5000
0
                          "Double hyphen within comment: "
5001
0
                                      "<!--%.50s\n",
5002
0
              buf);
5003
0
    } else
5004
688
        xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT,
5005
688
                          "Double hyphen within comment\n", NULL);
5006
688
                if (ctxt->instate == XML_PARSER_EOF) {
5007
0
                    xmlFree(buf);
5008
0
                    return;
5009
0
                }
5010
688
    in++;
5011
688
    ctxt->input->col++;
5012
688
      }
5013
41.8k
      in++;
5014
41.8k
      ctxt->input->col++;
5015
41.8k
      goto get_more;
5016
68.7k
  }
5017
115k
    } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09) || (*in == 0x0a));
5018
46.8k
    xmlParseCommentComplex(ctxt, buf, len, size);
5019
46.8k
    ctxt->instate = state;
5020
46.8k
    return;
5021
73.7k
}
5022
5023
5024
/**
5025
 * xmlParsePITarget:
5026
 * @ctxt:  an XML parser context
5027
 *
5028
 * DEPRECATED: Internal function, don't use.
5029
 *
5030
 * parse the name of a PI
5031
 *
5032
 * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
5033
 *
5034
 * Returns the PITarget name or NULL
5035
 */
5036
5037
const xmlChar *
5038
184k
xmlParsePITarget(xmlParserCtxtPtr ctxt) {
5039
184k
    const xmlChar *name;
5040
5041
184k
    name = xmlParseName(ctxt);
5042
184k
    if ((name != NULL) &&
5043
184k
        ((name[0] == 'x') || (name[0] == 'X')) &&
5044
184k
        ((name[1] == 'm') || (name[1] == 'M')) &&
5045
184k
        ((name[2] == 'l') || (name[2] == 'L'))) {
5046
2.02k
  int i;
5047
2.02k
  if ((name[0] == 'x') && (name[1] == 'm') &&
5048
2.02k
      (name[2] == 'l') && (name[3] == 0)) {
5049
461
      xmlFatalErrMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
5050
461
     "XML declaration allowed only at the start of the document\n");
5051
461
      return(name);
5052
1.56k
  } else if (name[3] == 0) {
5053
229
      xmlFatalErr(ctxt, XML_ERR_RESERVED_XML_NAME, NULL);
5054
229
      return(name);
5055
229
  }
5056
3.80k
  for (i = 0;;i++) {
5057
3.80k
      if (xmlW3CPIs[i] == NULL) break;
5058
2.66k
      if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i]))
5059
190
          return(name);
5060
2.66k
  }
5061
1.14k
  xmlWarningMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
5062
1.14k
          "xmlParsePITarget: invalid name prefix 'xml'\n",
5063
1.14k
          NULL, NULL);
5064
1.14k
    }
5065
183k
    if ((name != NULL) && (xmlStrchr(name, ':') != NULL)) {
5066
458
  xmlNsErr(ctxt, XML_NS_ERR_COLON,
5067
458
     "colons are forbidden from PI names '%s'\n", name, NULL, NULL);
5068
458
    }
5069
183k
    return(name);
5070
184k
}
5071
5072
#ifdef LIBXML_CATALOG_ENABLED
5073
/**
5074
 * xmlParseCatalogPI:
5075
 * @ctxt:  an XML parser context
5076
 * @catalog:  the PI value string
5077
 *
5078
 * parse an XML Catalog Processing Instruction.
5079
 *
5080
 * <?oasis-xml-catalog catalog="http://example.com/catalog.xml"?>
5081
 *
5082
 * Occurs only if allowed by the user and if happening in the Misc
5083
 * part of the document before any doctype information
5084
 * This will add the given catalog to the parsing context in order
5085
 * to be used if there is a resolution need further down in the document
5086
 */
5087
5088
static void
5089
1.27k
xmlParseCatalogPI(xmlParserCtxtPtr ctxt, const xmlChar *catalog) {
5090
1.27k
    xmlChar *URL = NULL;
5091
1.27k
    const xmlChar *tmp, *base;
5092
1.27k
    xmlChar marker;
5093
5094
1.27k
    tmp = catalog;
5095
1.27k
    while (IS_BLANK_CH(*tmp)) tmp++;
5096
1.27k
    if (xmlStrncmp(tmp, BAD_CAST"catalog", 7))
5097
196
  goto error;
5098
1.08k
    tmp += 7;
5099
1.50k
    while (IS_BLANK_CH(*tmp)) tmp++;
5100
1.08k
    if (*tmp != '=') {
5101
432
  return;
5102
432
    }
5103
648
    tmp++;
5104
768
    while (IS_BLANK_CH(*tmp)) tmp++;
5105
648
    marker = *tmp;
5106
648
    if ((marker != '\'') && (marker != '"'))
5107
235
  goto error;
5108
413
    tmp++;
5109
413
    base = tmp;
5110
3.70k
    while ((*tmp != 0) && (*tmp != marker)) tmp++;
5111
413
    if (*tmp == 0)
5112
49
  goto error;
5113
364
    URL = xmlStrndup(base, tmp - base);
5114
364
    tmp++;
5115
565
    while (IS_BLANK_CH(*tmp)) tmp++;
5116
364
    if (*tmp != 0)
5117
94
  goto error;
5118
5119
270
    if (URL != NULL) {
5120
270
  ctxt->catalogs = xmlCatalogAddLocal(ctxt->catalogs, URL);
5121
270
  xmlFree(URL);
5122
270
    }
5123
270
    return;
5124
5125
574
error:
5126
574
    xmlWarningMsg(ctxt, XML_WAR_CATALOG_PI,
5127
574
            "Catalog PI syntax error: %s\n",
5128
574
      catalog, NULL);
5129
574
    if (URL != NULL)
5130
94
  xmlFree(URL);
5131
574
}
5132
#endif
5133
5134
/**
5135
 * xmlParsePI:
5136
 * @ctxt:  an XML parser context
5137
 *
5138
 * DEPRECATED: Internal function, don't use.
5139
 *
5140
 * parse an XML Processing Instruction.
5141
 *
5142
 * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
5143
 *
5144
 * The processing is transferred to SAX once parsed.
5145
 */
5146
5147
void
5148
184k
xmlParsePI(xmlParserCtxtPtr ctxt) {
5149
184k
    xmlChar *buf = NULL;
5150
184k
    size_t len = 0;
5151
184k
    size_t size = XML_PARSER_BUFFER_SIZE;
5152
184k
    size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
5153
184k
                       XML_MAX_HUGE_LENGTH :
5154
184k
                       XML_MAX_TEXT_LENGTH;
5155
184k
    int cur, l;
5156
184k
    const xmlChar *target;
5157
184k
    xmlParserInputState state;
5158
5159
184k
    if ((RAW == '<') && (NXT(1) == '?')) {
5160
184k
  int inputid = ctxt->input->id;
5161
184k
  state = ctxt->instate;
5162
184k
        ctxt->instate = XML_PARSER_PI;
5163
  /*
5164
   * this is a Processing Instruction.
5165
   */
5166
184k
  SKIP(2);
5167
5168
  /*
5169
   * Parse the target name and check for special support like
5170
   * namespace.
5171
   */
5172
184k
        target = xmlParsePITarget(ctxt);
5173
184k
  if (target != NULL) {
5174
183k
      if ((RAW == '?') && (NXT(1) == '>')) {
5175
86.7k
    if (inputid != ctxt->input->id) {
5176
0
        xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5177
0
                             "PI declaration doesn't start and stop in"
5178
0
                                   " the same entity\n");
5179
0
    }
5180
86.7k
    SKIP(2);
5181
5182
    /*
5183
     * SAX: PI detected.
5184
     */
5185
86.7k
    if ((ctxt->sax) && (!ctxt->disableSAX) &&
5186
86.7k
        (ctxt->sax->processingInstruction != NULL))
5187
0
        ctxt->sax->processingInstruction(ctxt->userData,
5188
0
                                         target, NULL);
5189
86.7k
    if (ctxt->instate != XML_PARSER_EOF)
5190
86.7k
        ctxt->instate = state;
5191
86.7k
    return;
5192
86.7k
      }
5193
97.2k
      buf = (xmlChar *) xmlMallocAtomic(size);
5194
97.2k
      if (buf == NULL) {
5195
0
    xmlErrMemory(ctxt, NULL);
5196
0
    ctxt->instate = state;
5197
0
    return;
5198
0
      }
5199
97.2k
      if (SKIP_BLANKS == 0) {
5200
1.30k
    xmlFatalErrMsgStr(ctxt, XML_ERR_SPACE_REQUIRED,
5201
1.30k
        "ParsePI: PI %s space expected\n", target);
5202
1.30k
      }
5203
97.2k
      cur = CUR_CHAR(l);