/src/libprotobuf-mutator/build/examples/libxml2/external.libxml2/src/external.libxml2/parser.c
Line | Count | Source (jump to first uncovered line) |
1 | | /* |
2 | | * parser.c : an XML 1.0 parser, namespaces and validity support are mostly |
3 | | * implemented on top of the SAX interfaces |
4 | | * |
5 | | * References: |
6 | | * The XML specification: |
7 | | * http://www.w3.org/TR/REC-xml |
8 | | * Original 1.0 version: |
9 | | * http://www.w3.org/TR/1998/REC-xml-19980210 |
10 | | * XML second edition working draft |
11 | | * http://www.w3.org/TR/2000/WD-xml-2e-20000814 |
12 | | * |
13 | | * Okay this is a big file, the parser core is around 7000 lines, then it |
14 | | * is followed by the progressive parser top routines, then the various |
15 | | * high level APIs to call the parser and a few miscellaneous functions. |
16 | | * A number of helper functions and deprecated ones have been moved to |
17 | | * parserInternals.c to reduce this file size. |
18 | | * As much as possible the functions are associated with their relative |
19 | | * production in the XML specification. A few productions defining the |
20 | | * different ranges of character are actually implanted either in |
21 | | * parserInternals.h or parserInternals.c |
22 | | * The DOM tree build is realized from the default SAX callbacks in |
23 | | * the module SAX.c. |
24 | | * The routines doing the validation checks are in valid.c and called either |
25 | | * from the SAX callbacks or as standalone functions using a preparsed |
26 | | * document. |
27 | | * |
28 | | * See Copyright for the status of this software. |
29 | | * |
30 | | * daniel@veillard.com |
31 | | */ |
32 | | |
33 | | /* To avoid EBCDIC trouble when parsing on zOS */ |
34 | | #if defined(__MVS__) |
35 | | #pragma convert("ISO8859-1") |
36 | | #endif |
37 | | |
38 | | #define IN_LIBXML |
39 | | #include "libxml.h" |
40 | | |
41 | | #if defined(_WIN32) |
42 | | #define XML_DIR_SEP '\\' |
43 | | #else |
44 | | #define XML_DIR_SEP '/' |
45 | | #endif |
46 | | |
47 | | #include <stdlib.h> |
48 | | #include <limits.h> |
49 | | #include <string.h> |
50 | | #include <stdarg.h> |
51 | | #include <stddef.h> |
52 | | #include <ctype.h> |
53 | | #include <stdlib.h> |
54 | | #include <libxml/parser.h> |
55 | | #include <libxml/xmlmemory.h> |
56 | | #include <libxml/tree.h> |
57 | | #include <libxml/parserInternals.h> |
58 | | #include <libxml/valid.h> |
59 | | #include <libxml/entities.h> |
60 | | #include <libxml/xmlerror.h> |
61 | | #include <libxml/encoding.h> |
62 | | #include <libxml/xmlIO.h> |
63 | | #include <libxml/uri.h> |
64 | | #include <libxml/SAX2.h> |
65 | | #ifdef LIBXML_CATALOG_ENABLED |
66 | | #include <libxml/catalog.h> |
67 | | #endif |
68 | | |
69 | | #include "private/buf.h" |
70 | | #include "private/dict.h" |
71 | | #include "private/entities.h" |
72 | | #include "private/error.h" |
73 | | #include "private/html.h" |
74 | | #include "private/io.h" |
75 | | #include "private/parser.h" |
76 | | |
77 | 59.6k | #define NS_INDEX_EMPTY INT_MAX |
78 | 29.4k | #define NS_INDEX_XML (INT_MAX - 1) |
79 | 15.1k | #define URI_HASH_EMPTY 0xD943A04E |
80 | 2.17k | #define URI_HASH_XML 0xF0451F02 |
81 | | |
82 | | struct _xmlStartTag { |
83 | | const xmlChar *prefix; |
84 | | const xmlChar *URI; |
85 | | int line; |
86 | | int nsNr; |
87 | | }; |
88 | | |
89 | | typedef struct { |
90 | | void *saxData; |
91 | | unsigned prefixHashValue; |
92 | | unsigned uriHashValue; |
93 | | unsigned elementId; |
94 | | int oldIndex; |
95 | | } xmlParserNsExtra; |
96 | | |
97 | | typedef struct { |
98 | | unsigned hashValue; |
99 | | int index; |
100 | | } xmlParserNsBucket; |
101 | | |
102 | | struct _xmlParserNsData { |
103 | | xmlParserNsExtra *extra; |
104 | | |
105 | | unsigned hashSize; |
106 | | unsigned hashElems; |
107 | | xmlParserNsBucket *hash; |
108 | | |
109 | | unsigned elementId; |
110 | | int defaultNsIndex; |
111 | | }; |
112 | | |
113 | | struct _xmlAttrHashBucket { |
114 | | int index; |
115 | | }; |
116 | | |
117 | | static xmlParserCtxtPtr |
118 | | xmlCreateEntityParserCtxtInternal(xmlSAXHandlerPtr sax, void *userData, |
119 | | const xmlChar *URL, const xmlChar *ID, const xmlChar *base, |
120 | | xmlParserCtxtPtr pctx); |
121 | | |
122 | | static int |
123 | | xmlParseElementStart(xmlParserCtxtPtr ctxt); |
124 | | |
125 | | static void |
126 | | xmlParseElementEnd(xmlParserCtxtPtr ctxt); |
127 | | |
128 | | /************************************************************************ |
129 | | * * |
130 | | * Arbitrary limits set in the parser. See XML_PARSE_HUGE * |
131 | | * * |
132 | | ************************************************************************/ |
133 | | |
134 | | #define XML_PARSER_BIG_ENTITY 1000 |
135 | | #define XML_PARSER_LOT_ENTITY 5000 |
136 | | |
137 | | /* |
138 | | * Constants for protection against abusive entity expansion |
139 | | * ("billion laughs"). |
140 | | */ |
141 | | |
142 | | /* |
143 | | * A certain amount of entity expansion which is always allowed. |
144 | | */ |
145 | 212k | #define XML_PARSER_ALLOWED_EXPANSION 1000000 |
146 | | |
147 | | /* |
148 | | * Fixed cost for each entity reference. This crudely models processing time |
149 | | * as well to protect, for example, against exponential expansion of empty |
150 | | * or very short entities. |
151 | | */ |
152 | 212k | #define XML_ENT_FIXED_COST 20 |
153 | | |
154 | | /** |
155 | | * xmlParserMaxDepth: |
156 | | * |
157 | | * arbitrary depth limit for the XML documents that we allow to |
158 | | * process. This is not a limitation of the parser but a safety |
159 | | * boundary feature. It can be disabled with the XML_PARSE_HUGE |
160 | | * parser option. |
161 | | */ |
162 | | unsigned int xmlParserMaxDepth = 256; |
163 | | |
164 | | |
165 | | |
166 | 509k | #define XML_PARSER_BIG_BUFFER_SIZE 300 |
167 | 147M | #define XML_PARSER_BUFFER_SIZE 100 |
168 | 20.1k | #define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document" |
169 | | |
170 | | /** |
171 | | * XML_PARSER_CHUNK_SIZE |
172 | | * |
173 | | * When calling GROW that's the minimal amount of data |
174 | | * the parser expected to have received. It is not a hard |
175 | | * limit but an optimization when reading strings like Names |
176 | | * It is not strictly needed as long as inputs available characters |
177 | | * are followed by 0, which should be provided by the I/O level |
178 | | */ |
179 | | #define XML_PARSER_CHUNK_SIZE 100 |
180 | | |
181 | | /** |
182 | | * xmlParserVersion: |
183 | | * |
184 | | * Constant string describing the internal version of the library |
185 | | */ |
186 | | const char *const |
187 | | xmlParserVersion = LIBXML_VERSION_STRING LIBXML_VERSION_EXTRA; |
188 | | |
189 | | /* |
190 | | * List of XML prefixed PI allowed by W3C specs |
191 | | */ |
192 | | |
193 | | static const char* const xmlW3CPIs[] = { |
194 | | "xml-stylesheet", |
195 | | "xml-model", |
196 | | NULL |
197 | | }; |
198 | | |
199 | | |
200 | | /* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */ |
201 | | static xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt, |
202 | | const xmlChar **str); |
203 | | |
204 | | static xmlParserErrors |
205 | | xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt, |
206 | | xmlSAXHandlerPtr sax, |
207 | | void *user_data, int depth, const xmlChar *URL, |
208 | | const xmlChar *ID, xmlNodePtr *list); |
209 | | |
210 | | static int |
211 | | xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options, |
212 | | const char *encoding); |
213 | | #ifdef LIBXML_LEGACY_ENABLED |
214 | | static void |
215 | | xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode, |
216 | | xmlNodePtr lastNode); |
217 | | #endif /* LIBXML_LEGACY_ENABLED */ |
218 | | |
219 | | static xmlParserErrors |
220 | | xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt, |
221 | | const xmlChar *string, void *user_data, xmlNodePtr *lst); |
222 | | |
223 | | static int |
224 | | xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity); |
225 | | |
226 | | /************************************************************************ |
227 | | * * |
228 | | * Some factorized error routines * |
229 | | * * |
230 | | ************************************************************************/ |
231 | | |
232 | | /** |
233 | | * xmlErrAttributeDup: |
234 | | * @ctxt: an XML parser context |
235 | | * @prefix: the attribute prefix |
236 | | * @localname: the attribute localname |
237 | | * |
238 | | * Handle a redefinition of attribute error |
239 | | */ |
240 | | static void |
241 | | xmlErrAttributeDup(xmlParserCtxtPtr ctxt, const xmlChar * prefix, |
242 | | const xmlChar * localname) |
243 | 6.38k | { |
244 | 6.38k | if ((ctxt != NULL) && (ctxt->disableSAX != 0) && |
245 | 6.38k | (ctxt->instate == XML_PARSER_EOF)) |
246 | 89 | return; |
247 | 6.29k | if (ctxt != NULL) |
248 | 6.29k | ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED; |
249 | | |
250 | 6.29k | if (prefix == NULL) |
251 | 5.74k | __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, |
252 | 5.74k | XML_ERR_ATTRIBUTE_REDEFINED, XML_ERR_FATAL, NULL, 0, |
253 | 5.74k | (const char *) localname, NULL, NULL, 0, 0, |
254 | 5.74k | "Attribute %s redefined\n", localname); |
255 | 544 | else |
256 | 544 | __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, |
257 | 544 | XML_ERR_ATTRIBUTE_REDEFINED, XML_ERR_FATAL, NULL, 0, |
258 | 544 | (const char *) prefix, (const char *) localname, |
259 | 544 | NULL, 0, 0, "Attribute %s:%s redefined\n", prefix, |
260 | 544 | localname); |
261 | 6.29k | if (ctxt != NULL) { |
262 | 6.29k | ctxt->wellFormed = 0; |
263 | 6.29k | if (ctxt->recovery == 0) |
264 | 3.14k | ctxt->disableSAX = 1; |
265 | 6.29k | } |
266 | 6.29k | } |
267 | | |
268 | | /** |
269 | | * xmlFatalErrMsg: |
270 | | * @ctxt: an XML parser context |
271 | | * @error: the error number |
272 | | * @msg: the error message |
273 | | * |
274 | | * Handle a fatal parser error, i.e. violating Well-Formedness constraints |
275 | | */ |
276 | | static void LIBXML_ATTR_FORMAT(3,0) |
277 | | xmlFatalErrMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error, |
278 | | const char *msg) |
279 | 333k | { |
280 | 333k | if ((ctxt != NULL) && (ctxt->disableSAX != 0) && |
281 | 333k | (ctxt->instate == XML_PARSER_EOF)) |
282 | 263 | return; |
283 | 332k | if (ctxt != NULL) |
284 | 332k | ctxt->errNo = error; |
285 | 332k | __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error, |
286 | 332k | XML_ERR_FATAL, NULL, 0, NULL, NULL, NULL, 0, 0, "%s", msg); |
287 | 332k | if (ctxt != NULL) { |
288 | 332k | ctxt->wellFormed = 0; |
289 | 332k | if (ctxt->recovery == 0) |
290 | 61.7k | ctxt->disableSAX = 1; |
291 | 332k | } |
292 | 332k | } |
293 | | |
294 | | /** |
295 | | * xmlWarningMsg: |
296 | | * @ctxt: an XML parser context |
297 | | * @error: the error number |
298 | | * @msg: the error message |
299 | | * @str1: extra data |
300 | | * @str2: extra data |
301 | | * |
302 | | * Handle a warning. |
303 | | */ |
304 | | void LIBXML_ATTR_FORMAT(3,0) |
305 | | xmlWarningMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error, |
306 | | const char *msg, const xmlChar *str1, const xmlChar *str2) |
307 | 5.77k | { |
308 | 5.77k | xmlStructuredErrorFunc schannel = NULL; |
309 | | |
310 | 5.77k | if ((ctxt != NULL) && (ctxt->disableSAX != 0) && |
311 | 5.77k | (ctxt->instate == XML_PARSER_EOF)) |
312 | 0 | return; |
313 | 5.77k | if ((ctxt != NULL) && (ctxt->sax != NULL) && |
314 | 5.77k | (ctxt->sax->initialized == XML_SAX2_MAGIC)) |
315 | 3.71k | schannel = ctxt->sax->serror; |
316 | 5.77k | if (ctxt != NULL) { |
317 | 5.77k | __xmlRaiseError(schannel, |
318 | 5.77k | (ctxt->sax) ? ctxt->sax->warning : NULL, |
319 | 5.77k | ctxt->userData, |
320 | 5.77k | ctxt, NULL, XML_FROM_PARSER, error, |
321 | 5.77k | XML_ERR_WARNING, NULL, 0, |
322 | 5.77k | (const char *) str1, (const char *) str2, NULL, 0, 0, |
323 | 5.77k | msg, (const char *) str1, (const char *) str2); |
324 | 5.77k | } else { |
325 | 0 | __xmlRaiseError(schannel, NULL, NULL, |
326 | 0 | ctxt, NULL, XML_FROM_PARSER, error, |
327 | 0 | XML_ERR_WARNING, NULL, 0, |
328 | 0 | (const char *) str1, (const char *) str2, NULL, 0, 0, |
329 | 0 | msg, (const char *) str1, (const char *) str2); |
330 | 0 | } |
331 | 5.77k | } |
332 | | |
333 | | /** |
334 | | * xmlValidityError: |
335 | | * @ctxt: an XML parser context |
336 | | * @error: the error number |
337 | | * @msg: the error message |
338 | | * @str1: extra data |
339 | | * |
340 | | * Handle a validity error. |
341 | | */ |
342 | | static void LIBXML_ATTR_FORMAT(3,0) |
343 | | xmlValidityError(xmlParserCtxtPtr ctxt, xmlParserErrors error, |
344 | | const char *msg, const xmlChar *str1, const xmlChar *str2) |
345 | 1.88k | { |
346 | 1.88k | xmlStructuredErrorFunc schannel = NULL; |
347 | | |
348 | 1.88k | if ((ctxt != NULL) && (ctxt->disableSAX != 0) && |
349 | 1.88k | (ctxt->instate == XML_PARSER_EOF)) |
350 | 0 | return; |
351 | 1.88k | if (ctxt != NULL) { |
352 | 1.88k | ctxt->errNo = error; |
353 | 1.88k | if ((ctxt->sax != NULL) && (ctxt->sax->initialized == XML_SAX2_MAGIC)) |
354 | 1.69k | schannel = ctxt->sax->serror; |
355 | 1.88k | } |
356 | 1.88k | if (ctxt != NULL) { |
357 | 1.88k | __xmlRaiseError(schannel, |
358 | 1.88k | ctxt->vctxt.error, ctxt->vctxt.userData, |
359 | 1.88k | ctxt, NULL, XML_FROM_DTD, error, |
360 | 1.88k | XML_ERR_ERROR, NULL, 0, (const char *) str1, |
361 | 1.88k | (const char *) str2, NULL, 0, 0, |
362 | 1.88k | msg, (const char *) str1, (const char *) str2); |
363 | 1.88k | ctxt->valid = 0; |
364 | 1.88k | } else { |
365 | 0 | __xmlRaiseError(schannel, NULL, NULL, |
366 | 0 | ctxt, NULL, XML_FROM_DTD, error, |
367 | 0 | XML_ERR_ERROR, NULL, 0, (const char *) str1, |
368 | 0 | (const char *) str2, NULL, 0, 0, |
369 | 0 | msg, (const char *) str1, (const char *) str2); |
370 | 0 | } |
371 | 1.88k | } |
372 | | |
373 | | /** |
374 | | * xmlFatalErrMsgInt: |
375 | | * @ctxt: an XML parser context |
376 | | * @error: the error number |
377 | | * @msg: the error message |
378 | | * @val: an integer value |
379 | | * |
380 | | * Handle a fatal parser error, i.e. violating Well-Formedness constraints |
381 | | */ |
382 | | static void LIBXML_ATTR_FORMAT(3,0) |
383 | | xmlFatalErrMsgInt(xmlParserCtxtPtr ctxt, xmlParserErrors error, |
384 | | const char *msg, int val) |
385 | 14.5k | { |
386 | 14.5k | if ((ctxt != NULL) && (ctxt->disableSAX != 0) && |
387 | 14.5k | (ctxt->instate == XML_PARSER_EOF)) |
388 | 0 | return; |
389 | 14.5k | if (ctxt != NULL) |
390 | 14.5k | ctxt->errNo = error; |
391 | 14.5k | __xmlRaiseError(NULL, NULL, NULL, |
392 | 14.5k | ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL, |
393 | 14.5k | NULL, 0, NULL, NULL, NULL, val, 0, msg, val); |
394 | 14.5k | if (ctxt != NULL) { |
395 | 14.5k | ctxt->wellFormed = 0; |
396 | 14.5k | if (ctxt->recovery == 0) |
397 | 7.30k | ctxt->disableSAX = 1; |
398 | 14.5k | } |
399 | 14.5k | } |
400 | | |
401 | | /** |
402 | | * xmlFatalErrMsgStrIntStr: |
403 | | * @ctxt: an XML parser context |
404 | | * @error: the error number |
405 | | * @msg: the error message |
406 | | * @str1: an string info |
407 | | * @val: an integer value |
408 | | * @str2: an string info |
409 | | * |
410 | | * Handle a fatal parser error, i.e. violating Well-Formedness constraints |
411 | | */ |
412 | | static void LIBXML_ATTR_FORMAT(3,0) |
413 | | xmlFatalErrMsgStrIntStr(xmlParserCtxtPtr ctxt, xmlParserErrors error, |
414 | | const char *msg, const xmlChar *str1, int val, |
415 | | const xmlChar *str2) |
416 | 117k | { |
417 | 117k | if ((ctxt != NULL) && (ctxt->disableSAX != 0) && |
418 | 117k | (ctxt->instate == XML_PARSER_EOF)) |
419 | 0 | return; |
420 | 117k | if (ctxt != NULL) |
421 | 117k | ctxt->errNo = error; |
422 | 117k | __xmlRaiseError(NULL, NULL, NULL, |
423 | 117k | ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL, |
424 | 117k | NULL, 0, (const char *) str1, (const char *) str2, |
425 | 117k | NULL, val, 0, msg, str1, val, str2); |
426 | 117k | if (ctxt != NULL) { |
427 | 117k | ctxt->wellFormed = 0; |
428 | 117k | if (ctxt->recovery == 0) |
429 | 33.6k | ctxt->disableSAX = 1; |
430 | 117k | } |
431 | 117k | } |
432 | | |
433 | | /** |
434 | | * xmlFatalErrMsgStr: |
435 | | * @ctxt: an XML parser context |
436 | | * @error: the error number |
437 | | * @msg: the error message |
438 | | * @val: a string value |
439 | | * |
440 | | * Handle a fatal parser error, i.e. violating Well-Formedness constraints |
441 | | */ |
442 | | static void LIBXML_ATTR_FORMAT(3,0) |
443 | | xmlFatalErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error, |
444 | | const char *msg, const xmlChar * val) |
445 | 181k | { |
446 | 181k | if ((ctxt != NULL) && (ctxt->disableSAX != 0) && |
447 | 181k | (ctxt->instate == XML_PARSER_EOF)) |
448 | 1 | return; |
449 | 181k | if (ctxt != NULL) |
450 | 181k | ctxt->errNo = error; |
451 | 181k | __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, |
452 | 181k | XML_FROM_PARSER, error, XML_ERR_FATAL, |
453 | 181k | NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg, |
454 | 181k | val); |
455 | 181k | if (ctxt != NULL) { |
456 | 181k | ctxt->wellFormed = 0; |
457 | 181k | if (ctxt->recovery == 0) |
458 | 10.1k | ctxt->disableSAX = 1; |
459 | 181k | } |
460 | 181k | } |
461 | | |
462 | | /** |
463 | | * xmlErrMsgStr: |
464 | | * @ctxt: an XML parser context |
465 | | * @error: the error number |
466 | | * @msg: the error message |
467 | | * @val: a string value |
468 | | * |
469 | | * Handle a non fatal parser error |
470 | | */ |
471 | | static void LIBXML_ATTR_FORMAT(3,0) |
472 | | xmlErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error, |
473 | | const char *msg, const xmlChar * val) |
474 | 100k | { |
475 | 100k | if ((ctxt != NULL) && (ctxt->disableSAX != 0) && |
476 | 100k | (ctxt->instate == XML_PARSER_EOF)) |
477 | 0 | return; |
478 | 100k | if (ctxt != NULL) |
479 | 100k | ctxt->errNo = error; |
480 | 100k | __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, |
481 | 100k | XML_FROM_PARSER, error, XML_ERR_ERROR, |
482 | 100k | NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg, |
483 | 100k | val); |
484 | 100k | } |
485 | | |
486 | | /** |
487 | | * xmlNsErr: |
488 | | * @ctxt: an XML parser context |
489 | | * @error: the error number |
490 | | * @msg: the message |
491 | | * @info1: extra information string |
492 | | * @info2: extra information string |
493 | | * |
494 | | * Handle a fatal parser error, i.e. violating Well-Formedness constraints |
495 | | */ |
496 | | static void LIBXML_ATTR_FORMAT(3,0) |
497 | | xmlNsErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, |
498 | | const char *msg, |
499 | | const xmlChar * info1, const xmlChar * info2, |
500 | | const xmlChar * info3) |
501 | 54.8k | { |
502 | 54.8k | if ((ctxt != NULL) && (ctxt->disableSAX != 0) && |
503 | 54.8k | (ctxt->instate == XML_PARSER_EOF)) |
504 | 325 | return; |
505 | 54.5k | if (ctxt != NULL) |
506 | 54.5k | ctxt->errNo = error; |
507 | 54.5k | __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error, |
508 | 54.5k | XML_ERR_ERROR, NULL, 0, (const char *) info1, |
509 | 54.5k | (const char *) info2, (const char *) info3, 0, 0, msg, |
510 | 54.5k | info1, info2, info3); |
511 | 54.5k | if (ctxt != NULL) |
512 | 54.5k | ctxt->nsWellFormed = 0; |
513 | 54.5k | } |
514 | | |
515 | | /** |
516 | | * xmlNsWarn |
517 | | * @ctxt: an XML parser context |
518 | | * @error: the error number |
519 | | * @msg: the message |
520 | | * @info1: extra information string |
521 | | * @info2: extra information string |
522 | | * |
523 | | * Handle a namespace warning error |
524 | | */ |
525 | | static void LIBXML_ATTR_FORMAT(3,0) |
526 | | xmlNsWarn(xmlParserCtxtPtr ctxt, xmlParserErrors error, |
527 | | const char *msg, |
528 | | const xmlChar * info1, const xmlChar * info2, |
529 | | const xmlChar * info3) |
530 | 870 | { |
531 | 870 | if ((ctxt != NULL) && (ctxt->disableSAX != 0) && |
532 | 870 | (ctxt->instate == XML_PARSER_EOF)) |
533 | 0 | return; |
534 | 870 | __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error, |
535 | 870 | XML_ERR_WARNING, NULL, 0, (const char *) info1, |
536 | 870 | (const char *) info2, (const char *) info3, 0, 0, msg, |
537 | 870 | info1, info2, info3); |
538 | 870 | } |
539 | | |
540 | | static void |
541 | 760k | xmlSaturatedAdd(unsigned long *dst, unsigned long val) { |
542 | 760k | if (val > ULONG_MAX - *dst) |
543 | 0 | *dst = ULONG_MAX; |
544 | 760k | else |
545 | 760k | *dst += val; |
546 | 760k | } |
547 | | |
548 | | static void |
549 | 166k | xmlSaturatedAddSizeT(unsigned long *dst, unsigned long val) { |
550 | 166k | if (val > ULONG_MAX - *dst) |
551 | 0 | *dst = ULONG_MAX; |
552 | 166k | else |
553 | 166k | *dst += val; |
554 | 166k | } |
555 | | |
556 | | /** |
557 | | * xmlParserEntityCheck: |
558 | | * @ctxt: parser context |
559 | | * @extra: sum of unexpanded entity sizes |
560 | | * |
561 | | * Check for non-linear entity expansion behaviour. |
562 | | * |
563 | | * In some cases like xmlStringDecodeEntities, this function is called |
564 | | * for each, possibly nested entity and its unexpanded content length. |
565 | | * |
566 | | * In other cases like xmlParseReference, it's only called for each |
567 | | * top-level entity with its unexpanded content length plus the sum of |
568 | | * the unexpanded content lengths (plus fixed cost) of all nested |
569 | | * entities. |
570 | | * |
571 | | * Summing the unexpanded lengths also adds the length of the reference. |
572 | | * This is by design. Taking the length of the entity name into account |
573 | | * discourages attacks that try to waste CPU time with abusively long |
574 | | * entity names. See test/recurse/lol6.xml for example. Each call also |
575 | | * adds some fixed cost XML_ENT_FIXED_COST to discourage attacks with |
576 | | * short entities. |
577 | | * |
578 | | * Returns 1 on error, 0 on success. |
579 | | */ |
580 | | static int |
581 | | xmlParserEntityCheck(xmlParserCtxtPtr ctxt, unsigned long extra) |
582 | 212k | { |
583 | 212k | unsigned long consumed; |
584 | 212k | xmlParserInputPtr input = ctxt->input; |
585 | 212k | xmlEntityPtr entity = input->entity; |
586 | | |
587 | | /* |
588 | | * Compute total consumed bytes so far, including input streams of |
589 | | * external entities. |
590 | | */ |
591 | 212k | consumed = input->parentConsumed; |
592 | 212k | if ((entity == NULL) || |
593 | 212k | ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) && |
594 | 142k | ((entity->flags & XML_ENT_PARSED) == 0))) { |
595 | 69.3k | xmlSaturatedAdd(&consumed, input->consumed); |
596 | 69.3k | xmlSaturatedAddSizeT(&consumed, input->cur - input->base); |
597 | 69.3k | } |
598 | 212k | xmlSaturatedAdd(&consumed, ctxt->sizeentities); |
599 | | |
600 | | /* |
601 | | * Add extra cost and some fixed cost. |
602 | | */ |
603 | 212k | xmlSaturatedAdd(&ctxt->sizeentcopy, extra); |
604 | 212k | xmlSaturatedAdd(&ctxt->sizeentcopy, XML_ENT_FIXED_COST); |
605 | | |
606 | | /* |
607 | | * It's important to always use saturation arithmetic when tracking |
608 | | * entity sizes to make the size checks reliable. If "sizeentcopy" |
609 | | * overflows, we have to abort. |
610 | | */ |
611 | 212k | if ((ctxt->sizeentcopy > XML_PARSER_ALLOWED_EXPANSION) && |
612 | 212k | ((ctxt->sizeentcopy >= ULONG_MAX) || |
613 | 32 | (ctxt->sizeentcopy / ctxt->maxAmpl > consumed))) { |
614 | 32 | xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_LOOP, |
615 | 32 | "Maximum entity amplification factor exceeded, see " |
616 | 32 | "xmlCtxtSetMaxAmplification.\n"); |
617 | 32 | xmlHaltParser(ctxt); |
618 | 32 | return(1); |
619 | 32 | } |
620 | | |
621 | 212k | return(0); |
622 | 212k | } |
623 | | |
624 | | /************************************************************************ |
625 | | * * |
626 | | * Library wide options * |
627 | | * * |
628 | | ************************************************************************/ |
629 | | |
630 | | /** |
631 | | * xmlHasFeature: |
632 | | * @feature: the feature to be examined |
633 | | * |
634 | | * Examines if the library has been compiled with a given feature. |
635 | | * |
636 | | * Returns a non-zero value if the feature exist, otherwise zero. |
637 | | * Returns zero (0) if the feature does not exist or an unknown |
638 | | * unknown feature is requested, non-zero otherwise. |
639 | | */ |
640 | | int |
641 | | xmlHasFeature(xmlFeature feature) |
642 | 0 | { |
643 | 0 | switch (feature) { |
644 | 0 | case XML_WITH_THREAD: |
645 | 0 | #ifdef LIBXML_THREAD_ENABLED |
646 | 0 | return(1); |
647 | | #else |
648 | | return(0); |
649 | | #endif |
650 | 0 | case XML_WITH_TREE: |
651 | 0 | #ifdef LIBXML_TREE_ENABLED |
652 | 0 | return(1); |
653 | | #else |
654 | | return(0); |
655 | | #endif |
656 | 0 | case XML_WITH_OUTPUT: |
657 | 0 | #ifdef LIBXML_OUTPUT_ENABLED |
658 | 0 | return(1); |
659 | | #else |
660 | | return(0); |
661 | | #endif |
662 | 0 | case XML_WITH_PUSH: |
663 | 0 | #ifdef LIBXML_PUSH_ENABLED |
664 | 0 | return(1); |
665 | | #else |
666 | | return(0); |
667 | | #endif |
668 | 0 | case XML_WITH_READER: |
669 | 0 | #ifdef LIBXML_READER_ENABLED |
670 | 0 | return(1); |
671 | | #else |
672 | | return(0); |
673 | | #endif |
674 | 0 | case XML_WITH_PATTERN: |
675 | 0 | #ifdef LIBXML_PATTERN_ENABLED |
676 | 0 | return(1); |
677 | | #else |
678 | | return(0); |
679 | | #endif |
680 | 0 | case XML_WITH_WRITER: |
681 | 0 | #ifdef LIBXML_WRITER_ENABLED |
682 | 0 | return(1); |
683 | | #else |
684 | | return(0); |
685 | | #endif |
686 | 0 | case XML_WITH_SAX1: |
687 | 0 | #ifdef LIBXML_SAX1_ENABLED |
688 | 0 | return(1); |
689 | | #else |
690 | | return(0); |
691 | | #endif |
692 | 0 | case XML_WITH_FTP: |
693 | | #ifdef LIBXML_FTP_ENABLED |
694 | | return(1); |
695 | | #else |
696 | 0 | return(0); |
697 | 0 | #endif |
698 | 0 | case XML_WITH_HTTP: |
699 | 0 | #ifdef LIBXML_HTTP_ENABLED |
700 | 0 | return(1); |
701 | | #else |
702 | | return(0); |
703 | | #endif |
704 | 0 | case XML_WITH_VALID: |
705 | 0 | #ifdef LIBXML_VALID_ENABLED |
706 | 0 | return(1); |
707 | | #else |
708 | | return(0); |
709 | | #endif |
710 | 0 | case XML_WITH_HTML: |
711 | 0 | #ifdef LIBXML_HTML_ENABLED |
712 | 0 | return(1); |
713 | | #else |
714 | | return(0); |
715 | | #endif |
716 | 0 | case XML_WITH_LEGACY: |
717 | | #ifdef LIBXML_LEGACY_ENABLED |
718 | | return(1); |
719 | | #else |
720 | 0 | return(0); |
721 | 0 | #endif |
722 | 0 | case XML_WITH_C14N: |
723 | 0 | #ifdef LIBXML_C14N_ENABLED |
724 | 0 | return(1); |
725 | | #else |
726 | | return(0); |
727 | | #endif |
728 | 0 | case XML_WITH_CATALOG: |
729 | 0 | #ifdef LIBXML_CATALOG_ENABLED |
730 | 0 | return(1); |
731 | | #else |
732 | | return(0); |
733 | | #endif |
734 | 0 | case XML_WITH_XPATH: |
735 | 0 | #ifdef LIBXML_XPATH_ENABLED |
736 | 0 | return(1); |
737 | | #else |
738 | | return(0); |
739 | | #endif |
740 | 0 | case XML_WITH_XPTR: |
741 | 0 | #ifdef LIBXML_XPTR_ENABLED |
742 | 0 | return(1); |
743 | | #else |
744 | | return(0); |
745 | | #endif |
746 | 0 | case XML_WITH_XINCLUDE: |
747 | 0 | #ifdef LIBXML_XINCLUDE_ENABLED |
748 | 0 | return(1); |
749 | | #else |
750 | | return(0); |
751 | | #endif |
752 | 0 | case XML_WITH_ICONV: |
753 | 0 | #ifdef LIBXML_ICONV_ENABLED |
754 | 0 | return(1); |
755 | | #else |
756 | | return(0); |
757 | | #endif |
758 | 0 | case XML_WITH_ISO8859X: |
759 | 0 | #ifdef LIBXML_ISO8859X_ENABLED |
760 | 0 | return(1); |
761 | | #else |
762 | | return(0); |
763 | | #endif |
764 | 0 | case XML_WITH_UNICODE: |
765 | 0 | #ifdef LIBXML_UNICODE_ENABLED |
766 | 0 | return(1); |
767 | | #else |
768 | | return(0); |
769 | | #endif |
770 | 0 | case XML_WITH_REGEXP: |
771 | 0 | #ifdef LIBXML_REGEXP_ENABLED |
772 | 0 | return(1); |
773 | | #else |
774 | | return(0); |
775 | | #endif |
776 | 0 | case XML_WITH_AUTOMATA: |
777 | 0 | #ifdef LIBXML_AUTOMATA_ENABLED |
778 | 0 | return(1); |
779 | | #else |
780 | | return(0); |
781 | | #endif |
782 | 0 | case XML_WITH_EXPR: |
783 | | #ifdef LIBXML_EXPR_ENABLED |
784 | | return(1); |
785 | | #else |
786 | 0 | return(0); |
787 | 0 | #endif |
788 | 0 | case XML_WITH_SCHEMAS: |
789 | 0 | #ifdef LIBXML_SCHEMAS_ENABLED |
790 | 0 | return(1); |
791 | | #else |
792 | | return(0); |
793 | | #endif |
794 | 0 | case XML_WITH_SCHEMATRON: |
795 | 0 | #ifdef LIBXML_SCHEMATRON_ENABLED |
796 | 0 | return(1); |
797 | | #else |
798 | | return(0); |
799 | | #endif |
800 | 0 | case XML_WITH_MODULES: |
801 | 0 | #ifdef LIBXML_MODULES_ENABLED |
802 | 0 | return(1); |
803 | | #else |
804 | | return(0); |
805 | | #endif |
806 | 0 | case XML_WITH_DEBUG: |
807 | 0 | #ifdef LIBXML_DEBUG_ENABLED |
808 | 0 | return(1); |
809 | | #else |
810 | | return(0); |
811 | | #endif |
812 | 0 | case XML_WITH_DEBUG_MEM: |
813 | | #ifdef DEBUG_MEMORY_LOCATION |
814 | | return(1); |
815 | | #else |
816 | 0 | return(0); |
817 | 0 | #endif |
818 | 0 | case XML_WITH_DEBUG_RUN: |
819 | 0 | return(0); |
820 | 0 | case XML_WITH_ZLIB: |
821 | 0 | #ifdef LIBXML_ZLIB_ENABLED |
822 | 0 | return(1); |
823 | | #else |
824 | | return(0); |
825 | | #endif |
826 | 0 | case XML_WITH_LZMA: |
827 | 0 | #ifdef LIBXML_LZMA_ENABLED |
828 | 0 | return(1); |
829 | | #else |
830 | | return(0); |
831 | | #endif |
832 | 0 | case XML_WITH_ICU: |
833 | | #ifdef LIBXML_ICU_ENABLED |
834 | | return(1); |
835 | | #else |
836 | 0 | return(0); |
837 | 0 | #endif |
838 | 0 | default: |
839 | 0 | break; |
840 | 0 | } |
841 | 0 | return(0); |
842 | 0 | } |
843 | | |
844 | | /************************************************************************ |
845 | | * * |
846 | | * SAX2 defaulted attributes handling * |
847 | | * * |
848 | | ************************************************************************/ |
849 | | |
850 | | /** |
851 | | * xmlDetectSAX2: |
852 | | * @ctxt: an XML parser context |
853 | | * |
854 | | * Do the SAX2 detection and specific initialization |
855 | | */ |
856 | | static void |
857 | 28.4k | xmlDetectSAX2(xmlParserCtxtPtr ctxt) { |
858 | 28.4k | xmlSAXHandlerPtr sax; |
859 | | |
860 | | /* Avoid unused variable warning if features are disabled. */ |
861 | 28.4k | (void) sax; |
862 | | |
863 | 28.4k | if (ctxt == NULL) return; |
864 | 28.4k | sax = ctxt->sax; |
865 | 28.4k | #ifdef LIBXML_SAX1_ENABLED |
866 | 28.4k | if ((sax) && (sax->initialized == XML_SAX2_MAGIC)) |
867 | 23.9k | ctxt->sax2 = 1; |
868 | | #else |
869 | | ctxt->sax2 = 1; |
870 | | #endif /* LIBXML_SAX1_ENABLED */ |
871 | | |
872 | 28.4k | ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3); |
873 | 28.4k | ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5); |
874 | 28.4k | ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36); |
875 | 28.4k | if ((ctxt->str_xml==NULL) || (ctxt->str_xmlns==NULL) || |
876 | 28.4k | (ctxt->str_xml_ns == NULL)) { |
877 | 0 | xmlErrMemory(ctxt, NULL); |
878 | 0 | } |
879 | 28.4k | } |
880 | | |
881 | | typedef struct { |
882 | | xmlHashedString prefix; |
883 | | xmlHashedString name; |
884 | | xmlHashedString value; |
885 | | const xmlChar *valueEnd; |
886 | | int external; |
887 | | int expandedSize; |
888 | | } xmlDefAttr; |
889 | | |
890 | | typedef struct _xmlDefAttrs xmlDefAttrs; |
891 | | typedef xmlDefAttrs *xmlDefAttrsPtr; |
892 | | struct _xmlDefAttrs { |
893 | | int nbAttrs; /* number of defaulted attributes on that element */ |
894 | | int maxAttrs; /* the size of the array */ |
895 | | #if __STDC_VERSION__ >= 199901L |
896 | | /* Using a C99 flexible array member avoids UBSan errors. */ |
897 | | xmlDefAttr attrs[]; /* array of localname/prefix/values/external */ |
898 | | #else |
899 | | xmlDefAttr attrs[1]; |
900 | | #endif |
901 | | }; |
902 | | |
903 | | /** |
904 | | * xmlAttrNormalizeSpace: |
905 | | * @src: the source string |
906 | | * @dst: the target string |
907 | | * |
908 | | * Normalize the space in non CDATA attribute values: |
909 | | * If the attribute type is not CDATA, then the XML processor MUST further |
910 | | * process the normalized attribute value by discarding any leading and |
911 | | * trailing space (#x20) characters, and by replacing sequences of space |
912 | | * (#x20) characters by a single space (#x20) character. |
913 | | * Note that the size of dst need to be at least src, and if one doesn't need |
914 | | * to preserve dst (and it doesn't come from a dictionary or read-only) then |
915 | | * passing src as dst is just fine. |
916 | | * |
917 | | * Returns a pointer to the normalized value (dst) or NULL if no conversion |
918 | | * is needed. |
919 | | */ |
920 | | static xmlChar * |
921 | | xmlAttrNormalizeSpace(const xmlChar *src, xmlChar *dst) |
922 | 28.9k | { |
923 | 28.9k | if ((src == NULL) || (dst == NULL)) |
924 | 0 | return(NULL); |
925 | | |
926 | 40.0k | while (*src == 0x20) src++; |
927 | 330k | while (*src != 0) { |
928 | 301k | if (*src == 0x20) { |
929 | 45.7k | while (*src == 0x20) src++; |
930 | 16.2k | if (*src != 0) |
931 | 14.9k | *dst++ = 0x20; |
932 | 284k | } else { |
933 | 284k | *dst++ = *src++; |
934 | 284k | } |
935 | 301k | } |
936 | 28.9k | *dst = 0; |
937 | 28.9k | if (dst == src) |
938 | 17.4k | return(NULL); |
939 | 11.5k | return(dst); |
940 | 28.9k | } |
941 | | |
942 | | /** |
943 | | * xmlAttrNormalizeSpace2: |
944 | | * @src: the source string |
945 | | * |
946 | | * Normalize the space in non CDATA attribute values, a slightly more complex |
947 | | * front end to avoid allocation problems when running on attribute values |
948 | | * coming from the input. |
949 | | * |
950 | | * Returns a pointer to the normalized value (dst) or NULL if no conversion |
951 | | * is needed. |
952 | | */ |
953 | | static const xmlChar * |
954 | | xmlAttrNormalizeSpace2(xmlParserCtxtPtr ctxt, xmlChar *src, int *len) |
955 | 3.24k | { |
956 | 3.24k | int i; |
957 | 3.24k | int remove_head = 0; |
958 | 3.24k | int need_realloc = 0; |
959 | 3.24k | const xmlChar *cur; |
960 | | |
961 | 3.24k | if ((ctxt == NULL) || (src == NULL) || (len == NULL)) |
962 | 0 | return(NULL); |
963 | 3.24k | i = *len; |
964 | 3.24k | if (i <= 0) |
965 | 1.40k | return(NULL); |
966 | | |
967 | 1.84k | cur = src; |
968 | 2.12k | while (*cur == 0x20) { |
969 | 283 | cur++; |
970 | 283 | remove_head++; |
971 | 283 | } |
972 | 17.9k | while (*cur != 0) { |
973 | 16.4k | if (*cur == 0x20) { |
974 | 1.06k | cur++; |
975 | 1.06k | if ((*cur == 0x20) || (*cur == 0)) { |
976 | 387 | need_realloc = 1; |
977 | 387 | break; |
978 | 387 | } |
979 | 1.06k | } else |
980 | 15.3k | cur++; |
981 | 16.4k | } |
982 | 1.84k | if (need_realloc) { |
983 | 387 | xmlChar *ret; |
984 | | |
985 | 387 | ret = xmlStrndup(src + remove_head, i - remove_head + 1); |
986 | 387 | if (ret == NULL) { |
987 | 0 | xmlErrMemory(ctxt, NULL); |
988 | 0 | return(NULL); |
989 | 0 | } |
990 | 387 | xmlAttrNormalizeSpace(ret, ret); |
991 | 387 | *len = strlen((const char *)ret); |
992 | 387 | return(ret); |
993 | 1.45k | } else if (remove_head) { |
994 | 85 | *len -= remove_head; |
995 | 85 | memmove(src, src + remove_head, 1 + *len); |
996 | 85 | return(src); |
997 | 85 | } |
998 | 1.37k | return(NULL); |
999 | 1.84k | } |
1000 | | |
1001 | | /** |
1002 | | * xmlAddDefAttrs: |
1003 | | * @ctxt: an XML parser context |
1004 | | * @fullname: the element fullname |
1005 | | * @fullattr: the attribute fullname |
1006 | | * @value: the attribute value |
1007 | | * |
1008 | | * Add a defaulted attribute for an element |
1009 | | */ |
1010 | | static void |
1011 | | xmlAddDefAttrs(xmlParserCtxtPtr ctxt, |
1012 | | const xmlChar *fullname, |
1013 | | const xmlChar *fullattr, |
1014 | 21.2k | const xmlChar *value) { |
1015 | 21.2k | xmlDefAttrsPtr defaults; |
1016 | 21.2k | xmlDefAttr *attr; |
1017 | 21.2k | int len, expandedSize; |
1018 | 21.2k | xmlHashedString name; |
1019 | 21.2k | xmlHashedString prefix; |
1020 | 21.2k | xmlHashedString hvalue; |
1021 | 21.2k | const xmlChar *localname; |
1022 | | |
1023 | | /* |
1024 | | * Allows to detect attribute redefinitions |
1025 | | */ |
1026 | 21.2k | if (ctxt->attsSpecial != NULL) { |
1027 | 18.5k | if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL) |
1028 | 14.7k | return; |
1029 | 18.5k | } |
1030 | | |
1031 | 6.46k | if (ctxt->attsDefault == NULL) { |
1032 | 2.63k | ctxt->attsDefault = xmlHashCreateDict(10, ctxt->dict); |
1033 | 2.63k | if (ctxt->attsDefault == NULL) |
1034 | 0 | goto mem_error; |
1035 | 2.63k | } |
1036 | | |
1037 | | /* |
1038 | | * split the element name into prefix:localname , the string found |
1039 | | * are within the DTD and then not associated to namespace names. |
1040 | | */ |
1041 | 6.46k | localname = xmlSplitQName3(fullname, &len); |
1042 | 6.46k | if (localname == NULL) { |
1043 | 6.01k | name = xmlDictLookupHashed(ctxt->dict, fullname, -1); |
1044 | 6.01k | prefix.name = NULL; |
1045 | 6.01k | } else { |
1046 | 456 | name = xmlDictLookupHashed(ctxt->dict, localname, -1); |
1047 | 456 | prefix = xmlDictLookupHashed(ctxt->dict, fullname, len); |
1048 | 456 | if (prefix.name == NULL) |
1049 | 0 | goto mem_error; |
1050 | 456 | } |
1051 | 6.46k | if (name.name == NULL) |
1052 | 0 | goto mem_error; |
1053 | | |
1054 | | /* |
1055 | | * make sure there is some storage |
1056 | | */ |
1057 | 6.46k | defaults = xmlHashLookup2(ctxt->attsDefault, name.name, prefix.name); |
1058 | 6.46k | if ((defaults == NULL) || |
1059 | 6.46k | (defaults->nbAttrs >= defaults->maxAttrs)) { |
1060 | 3.29k | xmlDefAttrsPtr temp; |
1061 | 3.29k | int newSize; |
1062 | | |
1063 | 3.29k | newSize = (defaults != NULL) ? 2 * defaults->maxAttrs : 4; |
1064 | 3.29k | temp = xmlRealloc(defaults, |
1065 | 3.29k | sizeof(*defaults) + newSize * sizeof(xmlDefAttr)); |
1066 | 3.29k | if (temp == NULL) |
1067 | 0 | goto mem_error; |
1068 | 3.29k | if (defaults == NULL) |
1069 | 2.83k | temp->nbAttrs = 0; |
1070 | 3.29k | temp->maxAttrs = newSize; |
1071 | 3.29k | defaults = temp; |
1072 | 3.29k | if (xmlHashUpdateEntry2(ctxt->attsDefault, name.name, prefix.name, |
1073 | 3.29k | defaults, NULL) < 0) { |
1074 | 0 | xmlFree(defaults); |
1075 | 0 | goto mem_error; |
1076 | 0 | } |
1077 | 3.29k | } |
1078 | | |
1079 | | /* |
1080 | | * Split the attribute name into prefix:localname , the string found |
1081 | | * are within the DTD and hen not associated to namespace names. |
1082 | | */ |
1083 | 6.46k | localname = xmlSplitQName3(fullattr, &len); |
1084 | 6.46k | if (localname == NULL) { |
1085 | 5.10k | name = xmlDictLookupHashed(ctxt->dict, fullattr, -1); |
1086 | 5.10k | prefix.name = NULL; |
1087 | 5.10k | } else { |
1088 | 1.36k | name = xmlDictLookupHashed(ctxt->dict, localname, -1); |
1089 | 1.36k | prefix = xmlDictLookupHashed(ctxt->dict, fullattr, len); |
1090 | 1.36k | if (prefix.name == NULL) |
1091 | 0 | goto mem_error; |
1092 | 1.36k | } |
1093 | 6.46k | if (name.name == NULL) |
1094 | 0 | goto mem_error; |
1095 | | |
1096 | | /* intern the string and precompute the end */ |
1097 | 6.46k | len = strlen((const char *) value); |
1098 | 6.46k | hvalue = xmlDictLookupHashed(ctxt->dict, value, len); |
1099 | 6.46k | if (hvalue.name == NULL) |
1100 | 0 | goto mem_error; |
1101 | | |
1102 | 6.46k | expandedSize = strlen((const char *) name.name); |
1103 | 6.46k | if (prefix.name != NULL) |
1104 | 1.36k | expandedSize += strlen((const char *) prefix.name); |
1105 | 6.46k | expandedSize += len; |
1106 | | |
1107 | 6.46k | attr = &defaults->attrs[defaults->nbAttrs++]; |
1108 | 6.46k | attr->name = name; |
1109 | 6.46k | attr->prefix = prefix; |
1110 | 6.46k | attr->value = hvalue; |
1111 | 6.46k | attr->valueEnd = hvalue.name + len; |
1112 | 6.46k | attr->external = ctxt->external; |
1113 | 6.46k | attr->expandedSize = expandedSize; |
1114 | | |
1115 | 6.46k | return; |
1116 | | |
1117 | 0 | mem_error: |
1118 | 0 | xmlErrMemory(ctxt, NULL); |
1119 | 0 | return; |
1120 | 6.46k | } |
1121 | | |
1122 | | /** |
1123 | | * xmlAddSpecialAttr: |
1124 | | * @ctxt: an XML parser context |
1125 | | * @fullname: the element fullname |
1126 | | * @fullattr: the attribute fullname |
1127 | | * @type: the attribute type |
1128 | | * |
1129 | | * Register this attribute type |
1130 | | */ |
1131 | | static void |
1132 | | xmlAddSpecialAttr(xmlParserCtxtPtr ctxt, |
1133 | | const xmlChar *fullname, |
1134 | | const xmlChar *fullattr, |
1135 | | int type) |
1136 | 22.9k | { |
1137 | 22.9k | if (ctxt->attsSpecial == NULL) { |
1138 | 3.20k | ctxt->attsSpecial = xmlHashCreateDict(10, ctxt->dict); |
1139 | 3.20k | if (ctxt->attsSpecial == NULL) |
1140 | 0 | goto mem_error; |
1141 | 3.20k | } |
1142 | | |
1143 | 22.9k | if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL) |
1144 | 15.7k | return; |
1145 | | |
1146 | 7.18k | xmlHashAddEntry2(ctxt->attsSpecial, fullname, fullattr, |
1147 | 7.18k | (void *) (ptrdiff_t) type); |
1148 | 7.18k | return; |
1149 | | |
1150 | 0 | mem_error: |
1151 | 0 | xmlErrMemory(ctxt, NULL); |
1152 | 0 | return; |
1153 | 22.9k | } |
1154 | | |
1155 | | /** |
1156 | | * xmlCleanSpecialAttrCallback: |
1157 | | * |
1158 | | * Removes CDATA attributes from the special attribute table |
1159 | | */ |
1160 | | static void |
1161 | | xmlCleanSpecialAttrCallback(void *payload, void *data, |
1162 | | const xmlChar *fullname, const xmlChar *fullattr, |
1163 | 3.68k | const xmlChar *unused ATTRIBUTE_UNUSED) { |
1164 | 3.68k | xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) data; |
1165 | | |
1166 | 3.68k | if (((ptrdiff_t) payload) == XML_ATTRIBUTE_CDATA) { |
1167 | 626 | xmlHashRemoveEntry2(ctxt->attsSpecial, fullname, fullattr, NULL); |
1168 | 626 | } |
1169 | 3.68k | } |
1170 | | |
1171 | | /** |
1172 | | * xmlCleanSpecialAttr: |
1173 | | * @ctxt: an XML parser context |
1174 | | * |
1175 | | * Trim the list of attributes defined to remove all those of type |
1176 | | * CDATA as they are not special. This call should be done when finishing |
1177 | | * to parse the DTD and before starting to parse the document root. |
1178 | | */ |
1179 | | static void |
1180 | | xmlCleanSpecialAttr(xmlParserCtxtPtr ctxt) |
1181 | 6.40k | { |
1182 | 6.40k | if (ctxt->attsSpecial == NULL) |
1183 | 4.37k | return; |
1184 | | |
1185 | 2.02k | xmlHashScanFull(ctxt->attsSpecial, xmlCleanSpecialAttrCallback, ctxt); |
1186 | | |
1187 | 2.02k | if (xmlHashSize(ctxt->attsSpecial) == 0) { |
1188 | 204 | xmlHashFree(ctxt->attsSpecial, NULL); |
1189 | 204 | ctxt->attsSpecial = NULL; |
1190 | 204 | } |
1191 | 2.02k | return; |
1192 | 6.40k | } |
1193 | | |
1194 | | /** |
1195 | | * xmlCheckLanguageID: |
1196 | | * @lang: pointer to the string value |
1197 | | * |
1198 | | * DEPRECATED: Internal function, do not use. |
1199 | | * |
1200 | | * Checks that the value conforms to the LanguageID production: |
1201 | | * |
1202 | | * NOTE: this is somewhat deprecated, those productions were removed from |
1203 | | * the XML Second edition. |
1204 | | * |
1205 | | * [33] LanguageID ::= Langcode ('-' Subcode)* |
1206 | | * [34] Langcode ::= ISO639Code | IanaCode | UserCode |
1207 | | * [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z]) |
1208 | | * [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+ |
1209 | | * [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+ |
1210 | | * [38] Subcode ::= ([a-z] | [A-Z])+ |
1211 | | * |
1212 | | * The current REC reference the successors of RFC 1766, currently 5646 |
1213 | | * |
1214 | | * http://www.rfc-editor.org/rfc/rfc5646.txt |
1215 | | * langtag = language |
1216 | | * ["-" script] |
1217 | | * ["-" region] |
1218 | | * *("-" variant) |
1219 | | * *("-" extension) |
1220 | | * ["-" privateuse] |
1221 | | * language = 2*3ALPHA ; shortest ISO 639 code |
1222 | | * ["-" extlang] ; sometimes followed by |
1223 | | * ; extended language subtags |
1224 | | * / 4ALPHA ; or reserved for future use |
1225 | | * / 5*8ALPHA ; or registered language subtag |
1226 | | * |
1227 | | * extlang = 3ALPHA ; selected ISO 639 codes |
1228 | | * *2("-" 3ALPHA) ; permanently reserved |
1229 | | * |
1230 | | * script = 4ALPHA ; ISO 15924 code |
1231 | | * |
1232 | | * region = 2ALPHA ; ISO 3166-1 code |
1233 | | * / 3DIGIT ; UN M.49 code |
1234 | | * |
1235 | | * variant = 5*8alphanum ; registered variants |
1236 | | * / (DIGIT 3alphanum) |
1237 | | * |
1238 | | * extension = singleton 1*("-" (2*8alphanum)) |
1239 | | * |
1240 | | * ; Single alphanumerics |
1241 | | * ; "x" reserved for private use |
1242 | | * singleton = DIGIT ; 0 - 9 |
1243 | | * / %x41-57 ; A - W |
1244 | | * / %x59-5A ; Y - Z |
1245 | | * / %x61-77 ; a - w |
1246 | | * / %x79-7A ; y - z |
1247 | | * |
1248 | | * it sounds right to still allow Irregular i-xxx IANA and user codes too |
1249 | | * The parser below doesn't try to cope with extension or privateuse |
1250 | | * that could be added but that's not interoperable anyway |
1251 | | * |
1252 | | * Returns 1 if correct 0 otherwise |
1253 | | **/ |
1254 | | int |
1255 | | xmlCheckLanguageID(const xmlChar * lang) |
1256 | 3.05k | { |
1257 | 3.05k | const xmlChar *cur = lang, *nxt; |
1258 | | |
1259 | 3.05k | if (cur == NULL) |
1260 | 72 | return (0); |
1261 | 2.98k | if (((cur[0] == 'i') && (cur[1] == '-')) || |
1262 | 2.98k | ((cur[0] == 'I') && (cur[1] == '-')) || |
1263 | 2.98k | ((cur[0] == 'x') && (cur[1] == '-')) || |
1264 | 2.98k | ((cur[0] == 'X') && (cur[1] == '-'))) { |
1265 | | /* |
1266 | | * Still allow IANA code and user code which were coming |
1267 | | * from the previous version of the XML-1.0 specification |
1268 | | * it's deprecated but we should not fail |
1269 | | */ |
1270 | 299 | cur += 2; |
1271 | 828 | while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || |
1272 | 828 | ((cur[0] >= 'a') && (cur[0] <= 'z'))) |
1273 | 529 | cur++; |
1274 | 299 | return(cur[0] == 0); |
1275 | 299 | } |
1276 | 2.68k | nxt = cur; |
1277 | 9.18k | while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) || |
1278 | 9.18k | ((nxt[0] >= 'a') && (nxt[0] <= 'z'))) |
1279 | 6.50k | nxt++; |
1280 | 2.68k | if (nxt - cur >= 4) { |
1281 | | /* |
1282 | | * Reserved |
1283 | | */ |
1284 | 213 | if ((nxt - cur > 8) || (nxt[0] != 0)) |
1285 | 147 | return(0); |
1286 | 66 | return(1); |
1287 | 213 | } |
1288 | 2.47k | if (nxt - cur < 2) |
1289 | 228 | return(0); |
1290 | | /* we got an ISO 639 code */ |
1291 | 2.24k | if (nxt[0] == 0) |
1292 | 102 | return(1); |
1293 | 2.14k | if (nxt[0] != '-') |
1294 | 86 | return(0); |
1295 | | |
1296 | 2.05k | nxt++; |
1297 | 2.05k | cur = nxt; |
1298 | | /* now we can have extlang or script or region or variant */ |
1299 | 2.05k | if ((nxt[0] >= '0') && (nxt[0] <= '9')) |
1300 | 236 | goto region_m49; |
1301 | | |
1302 | 8.38k | while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) || |
1303 | 8.38k | ((nxt[0] >= 'a') && (nxt[0] <= 'z'))) |
1304 | 6.56k | nxt++; |
1305 | 1.82k | if (nxt - cur == 4) |
1306 | 528 | goto script; |
1307 | 1.29k | if (nxt - cur == 2) |
1308 | 307 | goto region; |
1309 | 985 | if ((nxt - cur >= 5) && (nxt - cur <= 8)) |
1310 | 149 | goto variant; |
1311 | 836 | if (nxt - cur != 3) |
1312 | 200 | return(0); |
1313 | | /* we parsed an extlang */ |
1314 | 636 | if (nxt[0] == 0) |
1315 | 67 | return(1); |
1316 | 569 | if (nxt[0] != '-') |
1317 | 67 | return(0); |
1318 | | |
1319 | 502 | nxt++; |
1320 | 502 | cur = nxt; |
1321 | | /* now we can have script or region or variant */ |
1322 | 502 | if ((nxt[0] >= '0') && (nxt[0] <= '9')) |
1323 | 114 | goto region_m49; |
1324 | | |
1325 | 2.01k | while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) || |
1326 | 2.01k | ((nxt[0] >= 'a') && (nxt[0] <= 'z'))) |
1327 | 1.62k | nxt++; |
1328 | 388 | if (nxt - cur == 2) |
1329 | 77 | goto region; |
1330 | 311 | if ((nxt - cur >= 5) && (nxt - cur <= 8)) |
1331 | 79 | goto variant; |
1332 | 232 | if (nxt - cur != 4) |
1333 | 138 | return(0); |
1334 | | /* we parsed a script */ |
1335 | 622 | script: |
1336 | 622 | if (nxt[0] == 0) |
1337 | 73 | return(1); |
1338 | 549 | if (nxt[0] != '-') |
1339 | 117 | return(0); |
1340 | | |
1341 | 432 | nxt++; |
1342 | 432 | cur = nxt; |
1343 | | /* now we can have region or variant */ |
1344 | 432 | if ((nxt[0] >= '0') && (nxt[0] <= '9')) |
1345 | 67 | goto region_m49; |
1346 | | |
1347 | 2.02k | while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) || |
1348 | 2.02k | ((nxt[0] >= 'a') && (nxt[0] <= 'z'))) |
1349 | 1.65k | nxt++; |
1350 | | |
1351 | 365 | if ((nxt - cur >= 5) && (nxt - cur <= 8)) |
1352 | 100 | goto variant; |
1353 | 265 | if (nxt - cur != 2) |
1354 | 190 | return(0); |
1355 | | /* we parsed a region */ |
1356 | 531 | region: |
1357 | 531 | if (nxt[0] == 0) |
1358 | 93 | return(1); |
1359 | 438 | if (nxt[0] != '-') |
1360 | 181 | return(0); |
1361 | | |
1362 | 257 | nxt++; |
1363 | 257 | cur = nxt; |
1364 | | /* now we can just have a variant */ |
1365 | 1.85k | while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) || |
1366 | 1.85k | ((nxt[0] >= 'a') && (nxt[0] <= 'z'))) |
1367 | 1.59k | nxt++; |
1368 | | |
1369 | 257 | if ((nxt - cur < 5) || (nxt - cur > 8)) |
1370 | 174 | return(0); |
1371 | | |
1372 | | /* we parsed a variant */ |
1373 | 411 | variant: |
1374 | 411 | if (nxt[0] == 0) |
1375 | 167 | return(1); |
1376 | 244 | if (nxt[0] != '-') |
1377 | 159 | return(0); |
1378 | | /* extensions and private use subtags not checked */ |
1379 | 85 | return (1); |
1380 | | |
1381 | 417 | region_m49: |
1382 | 417 | if (((nxt[1] >= '0') && (nxt[1] <= '9')) && |
1383 | 417 | ((nxt[2] >= '0') && (nxt[2] <= '9'))) { |
1384 | 72 | nxt += 3; |
1385 | 72 | goto region; |
1386 | 72 | } |
1387 | 345 | return(0); |
1388 | 417 | } |
1389 | | |
1390 | | /************************************************************************ |
1391 | | * * |
1392 | | * Parser stacks related functions and macros * |
1393 | | * * |
1394 | | ************************************************************************/ |
1395 | | |
1396 | | static xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, |
1397 | | const xmlChar ** str); |
1398 | | |
1399 | | /** |
1400 | | * xmlParserNsCreate: |
1401 | | * |
1402 | | * Create a new namespace database. |
1403 | | * |
1404 | | * Returns the new obejct. |
1405 | | */ |
1406 | | xmlParserNsData * |
1407 | 28.4k | xmlParserNsCreate(void) { |
1408 | 28.4k | xmlParserNsData *nsdb = xmlMalloc(sizeof(*nsdb)); |
1409 | | |
1410 | 28.4k | if (nsdb == NULL) |
1411 | 0 | return(NULL); |
1412 | 28.4k | memset(nsdb, 0, sizeof(*nsdb)); |
1413 | 28.4k | nsdb->defaultNsIndex = INT_MAX; |
1414 | | |
1415 | 28.4k | return(nsdb); |
1416 | 28.4k | } |
1417 | | |
1418 | | /** |
1419 | | * xmlParserNsFree: |
1420 | | * @nsdb: namespace database |
1421 | | * |
1422 | | * Free a namespace database. |
1423 | | */ |
1424 | | void |
1425 | 28.4k | xmlParserNsFree(xmlParserNsData *nsdb) { |
1426 | 28.4k | if (nsdb == NULL) |
1427 | 0 | return; |
1428 | | |
1429 | 28.4k | xmlFree(nsdb->extra); |
1430 | 28.4k | xmlFree(nsdb->hash); |
1431 | 28.4k | xmlFree(nsdb); |
1432 | 28.4k | } |
1433 | | |
1434 | | /** |
1435 | | * xmlParserNsReset: |
1436 | | * @nsdb: namespace database |
1437 | | * |
1438 | | * Reset a namespace database. |
1439 | | */ |
1440 | | static void |
1441 | 0 | xmlParserNsReset(xmlParserNsData *nsdb) { |
1442 | 0 | if (nsdb == NULL) |
1443 | 0 | return; |
1444 | | |
1445 | 0 | nsdb->hashElems = 0; |
1446 | 0 | nsdb->elementId = 0; |
1447 | 0 | nsdb->defaultNsIndex = INT_MAX; |
1448 | |
|
1449 | 0 | if (nsdb->hash) |
1450 | 0 | memset(nsdb->hash, 0, nsdb->hashSize * sizeof(nsdb->hash[0])); |
1451 | 0 | } |
1452 | | |
1453 | | /** |
1454 | | * xmlParserStartElement: |
1455 | | * @nsdb: namespace database |
1456 | | * |
1457 | | * Signal that a new element has started. |
1458 | | * |
1459 | | * Returns 0 on success, -1 if the element counter overflowed. |
1460 | | */ |
1461 | | static int |
1462 | 102k | xmlParserNsStartElement(xmlParserNsData *nsdb) { |
1463 | 102k | if (nsdb->elementId == UINT_MAX) |
1464 | 0 | return(-1); |
1465 | 102k | nsdb->elementId++; |
1466 | | |
1467 | 102k | return(0); |
1468 | 102k | } |
1469 | | |
1470 | | /** |
1471 | | * xmlParserNsLookup: |
1472 | | * @ctxt: parser context |
1473 | | * @prefix: namespace prefix |
1474 | | * @bucketPtr: optional bucket (return value) |
1475 | | * |
1476 | | * Lookup namespace with given prefix. If @bucketPtr is non-NULL, it will |
1477 | | * be set to the matching bucket, or the first empty bucket if no match |
1478 | | * was found. |
1479 | | * |
1480 | | * Returns the namespace index on success, INT_MAX if no namespace was |
1481 | | * found. |
1482 | | */ |
1483 | | static int |
1484 | | xmlParserNsLookup(xmlParserCtxtPtr ctxt, const xmlHashedString *prefix, |
1485 | 152k | xmlParserNsBucket **bucketPtr) { |
1486 | 152k | xmlParserNsBucket *bucket; |
1487 | 152k | unsigned index, hashValue; |
1488 | | |
1489 | 152k | if (prefix->name == NULL) |
1490 | 99.1k | return(ctxt->nsdb->defaultNsIndex); |
1491 | | |
1492 | 53.2k | if (ctxt->nsdb->hashSize == 0) |
1493 | 8.91k | return(INT_MAX); |
1494 | | |
1495 | 44.3k | hashValue = prefix->hashValue; |
1496 | 44.3k | index = hashValue & (ctxt->nsdb->hashSize - 1); |
1497 | 44.3k | bucket = &ctxt->nsdb->hash[index]; |
1498 | | |
1499 | 4.24M | while (bucket->hashValue) { |
1500 | 4.22M | if ((bucket->hashValue == hashValue) && |
1501 | 4.22M | (bucket->index != INT_MAX)) { |
1502 | 23.9k | if (ctxt->nsTab[bucket->index * 2] == prefix->name) { |
1503 | 23.9k | if (bucketPtr != NULL) |
1504 | 18.5k | *bucketPtr = bucket; |
1505 | 23.9k | return(bucket->index); |
1506 | 23.9k | } |
1507 | 23.9k | } |
1508 | | |
1509 | 4.19M | index++; |
1510 | 4.19M | bucket++; |
1511 | 4.19M | if (index == ctxt->nsdb->hashSize) { |
1512 | 10.1k | index = 0; |
1513 | 10.1k | bucket = ctxt->nsdb->hash; |
1514 | 10.1k | } |
1515 | 4.19M | } |
1516 | | |
1517 | 20.3k | if (bucketPtr != NULL) |
1518 | 17.4k | *bucketPtr = bucket; |
1519 | 20.3k | return(INT_MAX); |
1520 | 44.3k | } |
1521 | | |
1522 | | /** |
1523 | | * xmlParserNsLookupUri: |
1524 | | * @ctxt: parser context |
1525 | | * @prefix: namespace prefix |
1526 | | * |
1527 | | * Lookup namespace URI with given prefix. |
1528 | | * |
1529 | | * Returns the namespace URI on success, NULL if no namespace was found. |
1530 | | */ |
1531 | | static const xmlChar * |
1532 | 90.3k | xmlParserNsLookupUri(xmlParserCtxtPtr ctxt, const xmlHashedString *prefix) { |
1533 | 90.3k | const xmlChar *ret; |
1534 | 90.3k | int nsIndex; |
1535 | | |
1536 | 90.3k | if (prefix->name == ctxt->str_xml) |
1537 | 657 | return(ctxt->str_xml_ns); |
1538 | | |
1539 | 89.6k | nsIndex = xmlParserNsLookup(ctxt, prefix, NULL); |
1540 | 89.6k | if (nsIndex == INT_MAX) |
1541 | 70.2k | return(NULL); |
1542 | | |
1543 | 19.4k | ret = ctxt->nsTab[nsIndex * 2 + 1]; |
1544 | 19.4k | if (ret[0] == 0) |
1545 | 1.01k | ret = NULL; |
1546 | 19.4k | return(ret); |
1547 | 89.6k | } |
1548 | | |
1549 | | /** |
1550 | | * xmlParserNsLookupSax: |
1551 | | * @ctxt: parser context |
1552 | | * @prefix: namespace prefix |
1553 | | * |
1554 | | * Lookup extra data for the given prefix. This returns data stored |
1555 | | * with xmlParserNsUdpateSax. |
1556 | | * |
1557 | | * Returns the data on success, NULL if no namespace was found. |
1558 | | */ |
1559 | | void * |
1560 | 6.77k | xmlParserNsLookupSax(xmlParserCtxtPtr ctxt, const xmlChar *prefix) { |
1561 | 6.77k | xmlHashedString hprefix; |
1562 | 6.77k | int nsIndex; |
1563 | | |
1564 | 6.77k | if (prefix == ctxt->str_xml) |
1565 | 4.50k | return(NULL); |
1566 | | |
1567 | 2.27k | hprefix.name = prefix; |
1568 | 2.27k | if (prefix != NULL) |
1569 | 715 | hprefix.hashValue = xmlDictComputeHash(ctxt->dict, prefix); |
1570 | 1.55k | else |
1571 | 1.55k | hprefix.hashValue = 0; |
1572 | 2.27k | nsIndex = xmlParserNsLookup(ctxt, &hprefix, NULL); |
1573 | 2.27k | if (nsIndex == INT_MAX) |
1574 | 0 | return(NULL); |
1575 | | |
1576 | 2.27k | return(ctxt->nsdb->extra[nsIndex].saxData); |
1577 | 2.27k | } |
1578 | | |
1579 | | /** |
1580 | | * xmlParserNsUpdateSax: |
1581 | | * @ctxt: parser context |
1582 | | * @prefix: namespace prefix |
1583 | | * @saxData: extra data for SAX handler |
1584 | | * |
1585 | | * Sets or updates extra data for the given prefix. This value will be |
1586 | | * returned by xmlParserNsLookupSax as long as the namespace with the |
1587 | | * given prefix is in scope. |
1588 | | * |
1589 | | * Returns the data on success, NULL if no namespace was found. |
1590 | | */ |
1591 | | int |
1592 | | xmlParserNsUpdateSax(xmlParserCtxtPtr ctxt, const xmlChar *prefix, |
1593 | 15.0k | void *saxData) { |
1594 | 15.0k | xmlHashedString hprefix; |
1595 | 15.0k | int nsIndex; |
1596 | | |
1597 | 15.0k | if (prefix == ctxt->str_xml) |
1598 | 0 | return(-1); |
1599 | | |
1600 | 15.0k | hprefix.name = prefix; |
1601 | 15.0k | if (prefix != NULL) |
1602 | 2.53k | hprefix.hashValue = xmlDictComputeHash(ctxt->dict, prefix); |
1603 | 12.4k | else |
1604 | 12.4k | hprefix.hashValue = 0; |
1605 | 15.0k | nsIndex = xmlParserNsLookup(ctxt, &hprefix, NULL); |
1606 | 15.0k | if (nsIndex == INT_MAX) |
1607 | 0 | return(-1); |
1608 | | |
1609 | 15.0k | ctxt->nsdb->extra[nsIndex].saxData = saxData; |
1610 | 15.0k | return(0); |
1611 | 15.0k | } |
1612 | | |
1613 | | /** |
1614 | | * xmlParserNsGrow: |
1615 | | * @ctxt: parser context |
1616 | | * |
1617 | | * Grows the namespace tables. |
1618 | | * |
1619 | | * Returns 0 on success, -1 if a memory allocation failed. |
1620 | | */ |
1621 | | static int |
1622 | 4.40k | xmlParserNsGrow(xmlParserCtxtPtr ctxt) { |
1623 | 4.40k | const xmlChar **table; |
1624 | 4.40k | xmlParserNsExtra *extra; |
1625 | 4.40k | int newSize; |
1626 | | |
1627 | 4.40k | if (ctxt->nsMax > INT_MAX / 2) |
1628 | 0 | goto error; |
1629 | 4.40k | newSize = ctxt->nsMax ? ctxt->nsMax * 2 : 16; |
1630 | | |
1631 | 4.40k | table = xmlRealloc(ctxt->nsTab, 2 * newSize * sizeof(table[0])); |
1632 | 4.40k | if (table == NULL) |
1633 | 0 | goto error; |
1634 | 4.40k | ctxt->nsTab = table; |
1635 | | |
1636 | 4.40k | extra = xmlRealloc(ctxt->nsdb->extra, newSize * sizeof(extra[0])); |
1637 | 4.40k | if (extra == NULL) |
1638 | 0 | goto error; |
1639 | 4.40k | ctxt->nsdb->extra = extra; |
1640 | | |
1641 | 4.40k | ctxt->nsMax = newSize; |
1642 | 4.40k | return(0); |
1643 | | |
1644 | 0 | error: |
1645 | 0 | xmlErrMemory(ctxt, NULL); |
1646 | 0 | return(-1); |
1647 | 4.40k | } |
1648 | | |
1649 | | /** |
1650 | | * xmlParserNsPush: |
1651 | | * @ctxt: parser context |
1652 | | * @prefix: prefix with hash value |
1653 | | * @uri: uri with hash value |
1654 | | * @saxData: extra data for SAX handler |
1655 | | * @defAttr: whether the namespace comes from a default attribute |
1656 | | * |
1657 | | * Push a new namespace on the table. |
1658 | | * |
1659 | | * Returns 1 if the namespace was pushed, 0 if the namespace was ignored, |
1660 | | * -1 if a memory allocation failed. |
1661 | | */ |
1662 | | static int |
1663 | | xmlParserNsPush(xmlParserCtxtPtr ctxt, const xmlHashedString *prefix, |
1664 | 39.8k | const xmlHashedString *uri, void *saxData, int defAttr) { |
1665 | 39.8k | xmlParserNsBucket *bucket = NULL; |
1666 | 39.8k | xmlParserNsExtra *extra; |
1667 | 39.8k | const xmlChar **ns; |
1668 | 39.8k | unsigned hashValue, nsIndex, oldIndex; |
1669 | | |
1670 | 39.8k | if ((prefix != NULL) && (prefix->name == ctxt->str_xml)) |
1671 | 195 | return(0); |
1672 | | |
1673 | 39.6k | if ((ctxt->nsNr >= ctxt->nsMax) && (xmlParserNsGrow(ctxt) < 0)) { |
1674 | 0 | xmlErrMemory(ctxt, NULL); |
1675 | 0 | return(-1); |
1676 | 0 | } |
1677 | | |
1678 | | /* |
1679 | | * Default namespace and 'xml' namespace |
1680 | | */ |
1681 | 39.6k | if ((prefix == NULL) || (prefix->name == NULL)) { |
1682 | 17.3k | oldIndex = ctxt->nsdb->defaultNsIndex; |
1683 | | |
1684 | 17.3k | if (oldIndex != INT_MAX) { |
1685 | 1.47k | if (defAttr != 0) |
1686 | 717 | return(0); |
1687 | | |
1688 | 762 | extra = &ctxt->nsdb->extra[oldIndex]; |
1689 | | |
1690 | 762 | if (extra->elementId == ctxt->nsdb->elementId) { |
1691 | 400 | xmlErrAttributeDup(ctxt, NULL, BAD_CAST "xmlns"); |
1692 | 400 | return(0); |
1693 | 400 | } |
1694 | | |
1695 | 362 | if ((ctxt->options & XML_PARSE_NSCLEAN) && |
1696 | 362 | (uri->name == ctxt->nsTab[oldIndex * 2 + 1])) |
1697 | 67 | return(0); |
1698 | 362 | } |
1699 | | |
1700 | 16.1k | ctxt->nsdb->defaultNsIndex = ctxt->nsNr; |
1701 | 16.1k | goto populate_entry; |
1702 | 17.3k | } |
1703 | | |
1704 | | /* |
1705 | | * Hash table lookup |
1706 | | */ |
1707 | 22.3k | oldIndex = xmlParserNsLookup(ctxt, prefix, &bucket); |
1708 | 22.3k | if (oldIndex != INT_MAX) { |
1709 | 2.76k | extra = &ctxt->nsdb->extra[oldIndex]; |
1710 | | |
1711 | 2.76k | if (defAttr != 0) |
1712 | 1.50k | return(0); |
1713 | | |
1714 | | /* |
1715 | | * Check for duplicate definitions on the same element. |
1716 | | */ |
1717 | 1.26k | if (extra->elementId == ctxt->nsdb->elementId) { |
1718 | 241 | xmlErrAttributeDup(ctxt, BAD_CAST "xmlns", prefix->name); |
1719 | 241 | return(0); |
1720 | 241 | } |
1721 | | |
1722 | 1.02k | if ((ctxt->options & XML_PARSE_NSCLEAN) && |
1723 | 1.02k | (uri->name == ctxt->nsTab[bucket->index * 2 + 1])) |
1724 | 66 | return(0); |
1725 | | |
1726 | 956 | bucket->index = ctxt->nsNr; |
1727 | 956 | goto populate_entry; |
1728 | 1.02k | } |
1729 | | |
1730 | | /* |
1731 | | * Insert new bucket |
1732 | | */ |
1733 | | |
1734 | 19.5k | hashValue = prefix->hashValue; |
1735 | | |
1736 | | /* |
1737 | | * Grow hash table, 50% fill factor |
1738 | | */ |
1739 | 19.5k | if (ctxt->nsdb->hashElems + 1 > ctxt->nsdb->hashSize / 2) { |
1740 | 2.83k | xmlParserNsBucket *newHash; |
1741 | 2.83k | unsigned newSize, i, index; |
1742 | | |
1743 | 2.83k | if (ctxt->nsdb->hashSize > UINT_MAX / 2) { |
1744 | 0 | xmlErrMemory(ctxt, NULL); |
1745 | 0 | return(-1); |
1746 | 0 | } |
1747 | 2.83k | newSize = ctxt->nsdb->hashSize ? ctxt->nsdb->hashSize * 2 : 16; |
1748 | 2.83k | newHash = xmlMalloc(newSize * sizeof(newHash[0])); |
1749 | 2.83k | if (newHash == NULL) { |
1750 | 0 | xmlErrMemory(ctxt, NULL); |
1751 | 0 | return(-1); |
1752 | 0 | } |
1753 | 2.83k | memset(newHash, 0, newSize * sizeof(newHash[0])); |
1754 | | |
1755 | 50.3k | for (i = 0; i < ctxt->nsdb->hashSize; i++) { |
1756 | 47.5k | unsigned hv = ctxt->nsdb->hash[i].hashValue; |
1757 | 47.5k | unsigned newIndex; |
1758 | | |
1759 | 47.5k | if (hv == 0) |
1760 | 23.7k | continue; |
1761 | 23.7k | newIndex = hv & (newSize - 1); |
1762 | | |
1763 | 1.79M | while (newHash[newIndex].hashValue != 0) { |
1764 | 1.76M | newIndex++; |
1765 | 1.76M | if (newIndex == newSize) |
1766 | 2.48k | newIndex = 0; |
1767 | 1.76M | } |
1768 | | |
1769 | 23.7k | newHash[newIndex] = ctxt->nsdb->hash[i]; |
1770 | 23.7k | } |
1771 | | |
1772 | 2.83k | xmlFree(ctxt->nsdb->hash); |
1773 | 2.83k | ctxt->nsdb->hash = newHash; |
1774 | 2.83k | ctxt->nsdb->hashSize = newSize; |
1775 | | |
1776 | | /* |
1777 | | * Relookup |
1778 | | */ |
1779 | 2.83k | index = hashValue & (newSize - 1); |
1780 | | |
1781 | 13.5k | while (newHash[index].hashValue != 0) { |
1782 | 10.7k | index++; |
1783 | 10.7k | if (index == newSize) |
1784 | 176 | index = 0; |
1785 | 10.7k | } |
1786 | | |
1787 | 2.83k | bucket = &newHash[index]; |
1788 | 2.83k | } |
1789 | | |
1790 | 19.5k | bucket->hashValue = hashValue; |
1791 | 19.5k | bucket->index = ctxt->nsNr; |
1792 | 19.5k | ctxt->nsdb->hashElems++; |
1793 | 19.5k | oldIndex = INT_MAX; |
1794 | | |
1795 | 36.6k | populate_entry: |
1796 | 36.6k | nsIndex = ctxt->nsNr; |
1797 | | |
1798 | 36.6k | ns = &ctxt->nsTab[nsIndex * 2]; |
1799 | 36.6k | ns[0] = prefix ? prefix->name : NULL; |
1800 | 36.6k | ns[1] = uri->name; |
1801 | | |
1802 | 36.6k | extra = &ctxt->nsdb->extra[nsIndex]; |
1803 | 36.6k | extra->saxData = saxData; |
1804 | 36.6k | extra->prefixHashValue = prefix ? prefix->hashValue : 0; |
1805 | 36.6k | extra->uriHashValue = uri->hashValue; |
1806 | 36.6k | extra->elementId = ctxt->nsdb->elementId; |
1807 | 36.6k | extra->oldIndex = oldIndex; |
1808 | | |
1809 | 36.6k | ctxt->nsNr++; |
1810 | | |
1811 | 36.6k | return(1); |
1812 | 19.5k | } |
1813 | | |
1814 | | /** |
1815 | | * xmlParserNsPop: |
1816 | | * @ctxt: an XML parser context |
1817 | | * @nr: the number to pop |
1818 | | * |
1819 | | * Pops the top @nr namespaces and restores the hash table. |
1820 | | * |
1821 | | * Returns the number of namespaces popped. |
1822 | | */ |
1823 | | static int |
1824 | | xmlParserNsPop(xmlParserCtxtPtr ctxt, int nr) |
1825 | 22.0k | { |
1826 | 22.0k | int i; |
1827 | | |
1828 | | /* assert(nr <= ctxt->nsNr); */ |
1829 | | |
1830 | 53.6k | for (i = ctxt->nsNr - 1; i >= ctxt->nsNr - nr; i--) { |
1831 | 31.5k | const xmlChar *prefix = ctxt->nsTab[i * 2]; |
1832 | 31.5k | xmlParserNsExtra *extra = &ctxt->nsdb->extra[i]; |
1833 | | |
1834 | 31.5k | if (prefix == NULL) { |
1835 | 15.7k | ctxt->nsdb->defaultNsIndex = extra->oldIndex; |
1836 | 15.7k | } else { |
1837 | 15.7k | xmlHashedString hprefix; |
1838 | 15.7k | xmlParserNsBucket *bucket = NULL; |
1839 | | |
1840 | 15.7k | hprefix.name = prefix; |
1841 | 15.7k | hprefix.hashValue = extra->prefixHashValue; |
1842 | 15.7k | xmlParserNsLookup(ctxt, &hprefix, &bucket); |
1843 | | /* assert(bucket && bucket->hashValue); */ |
1844 | 15.7k | bucket->index = extra->oldIndex; |
1845 | 15.7k | } |
1846 | 31.5k | } |
1847 | | |
1848 | 22.0k | ctxt->nsNr -= nr; |
1849 | 22.0k | return(nr); |
1850 | 22.0k | } |
1851 | | |
1852 | | static int |
1853 | 3.23k | xmlCtxtGrowAttrs(xmlParserCtxtPtr ctxt, int nr) { |
1854 | 3.23k | const xmlChar **atts; |
1855 | 3.23k | unsigned *attallocs; |
1856 | 3.23k | int maxatts; |
1857 | | |
1858 | 3.23k | if (nr + 5 > ctxt->maxatts) { |
1859 | 3.23k | maxatts = ctxt->maxatts == 0 ? 55 : (nr + 5) * 2; |
1860 | 3.23k | atts = (const xmlChar **) xmlMalloc( |
1861 | 3.23k | maxatts * sizeof(const xmlChar *)); |
1862 | 3.23k | if (atts == NULL) goto mem_error; |
1863 | 3.23k | attallocs = xmlRealloc(ctxt->attallocs, |
1864 | 3.23k | (maxatts / 5) * sizeof(attallocs[0])); |
1865 | 3.23k | if (attallocs == NULL) { |
1866 | 0 | xmlFree(atts); |
1867 | 0 | goto mem_error; |
1868 | 0 | } |
1869 | 3.23k | if (ctxt->maxatts > 0) |
1870 | 171 | memcpy(atts, ctxt->atts, ctxt->maxatts * sizeof(const xmlChar *)); |
1871 | 3.23k | xmlFree(ctxt->atts); |
1872 | 3.23k | ctxt->atts = atts; |
1873 | 3.23k | ctxt->attallocs = attallocs; |
1874 | 3.23k | ctxt->maxatts = maxatts; |
1875 | 3.23k | } |
1876 | 3.23k | return(ctxt->maxatts); |
1877 | 0 | mem_error: |
1878 | 0 | xmlErrMemory(ctxt, NULL); |
1879 | 0 | return(-1); |
1880 | 3.23k | } |
1881 | | |
1882 | | /** |
1883 | | * inputPush: |
1884 | | * @ctxt: an XML parser context |
1885 | | * @value: the parser input |
1886 | | * |
1887 | | * Pushes a new parser input on top of the input stack |
1888 | | * |
1889 | | * Returns -1 in case of error, the index in the stack otherwise |
1890 | | */ |
1891 | | int |
1892 | | inputPush(xmlParserCtxtPtr ctxt, xmlParserInputPtr value) |
1893 | 75.2k | { |
1894 | 75.2k | if ((ctxt == NULL) || (value == NULL)) |
1895 | 0 | return(-1); |
1896 | 75.2k | if (ctxt->inputNr >= ctxt->inputMax) { |
1897 | 0 | size_t newSize = ctxt->inputMax * 2; |
1898 | 0 | xmlParserInputPtr *tmp; |
1899 | |
|
1900 | 0 | tmp = (xmlParserInputPtr *) xmlRealloc(ctxt->inputTab, |
1901 | 0 | newSize * sizeof(*tmp)); |
1902 | 0 | if (tmp == NULL) { |
1903 | 0 | xmlErrMemory(ctxt, NULL); |
1904 | 0 | return (-1); |
1905 | 0 | } |
1906 | 0 | ctxt->inputTab = tmp; |
1907 | 0 | ctxt->inputMax = newSize; |
1908 | 0 | } |
1909 | 75.2k | ctxt->inputTab[ctxt->inputNr] = value; |
1910 | 75.2k | ctxt->input = value; |
1911 | 75.2k | return (ctxt->inputNr++); |
1912 | 75.2k | } |
1913 | | /** |
1914 | | * inputPop: |
1915 | | * @ctxt: an XML parser context |
1916 | | * |
1917 | | * Pops the top parser input from the input stack |
1918 | | * |
1919 | | * Returns the input just removed |
1920 | | */ |
1921 | | xmlParserInputPtr |
1922 | | inputPop(xmlParserCtxtPtr ctxt) |
1923 | 132k | { |
1924 | 132k | xmlParserInputPtr ret; |
1925 | | |
1926 | 132k | if (ctxt == NULL) |
1927 | 0 | return(NULL); |
1928 | 132k | if (ctxt->inputNr <= 0) |
1929 | 56.9k | return (NULL); |
1930 | 75.2k | ctxt->inputNr--; |
1931 | 75.2k | if (ctxt->inputNr > 0) |
1932 | 46.7k | ctxt->input = ctxt->inputTab[ctxt->inputNr - 1]; |
1933 | 28.4k | else |
1934 | 28.4k | ctxt->input = NULL; |
1935 | 75.2k | ret = ctxt->inputTab[ctxt->inputNr]; |
1936 | 75.2k | ctxt->inputTab[ctxt->inputNr] = NULL; |
1937 | 75.2k | return (ret); |
1938 | 132k | } |
1939 | | /** |
1940 | | * nodePush: |
1941 | | * @ctxt: an XML parser context |
1942 | | * @value: the element node |
1943 | | * |
1944 | | * DEPRECATED: Internal function, do not use. |
1945 | | * |
1946 | | * Pushes a new element node on top of the node stack |
1947 | | * |
1948 | | * Returns -1 in case of error, the index in the stack otherwise |
1949 | | */ |
1950 | | int |
1951 | | nodePush(xmlParserCtxtPtr ctxt, xmlNodePtr value) |
1952 | 103k | { |
1953 | 103k | if (ctxt == NULL) return(0); |
1954 | 103k | if (ctxt->nodeNr >= ctxt->nodeMax) { |
1955 | 324 | xmlNodePtr *tmp; |
1956 | | |
1957 | 324 | tmp = (xmlNodePtr *) xmlRealloc(ctxt->nodeTab, |
1958 | 324 | ctxt->nodeMax * 2 * |
1959 | 324 | sizeof(ctxt->nodeTab[0])); |
1960 | 324 | if (tmp == NULL) { |
1961 | 0 | xmlErrMemory(ctxt, NULL); |
1962 | 0 | return (-1); |
1963 | 0 | } |
1964 | 324 | ctxt->nodeTab = tmp; |
1965 | 324 | ctxt->nodeMax *= 2; |
1966 | 324 | } |
1967 | 103k | if ((((unsigned int) ctxt->nodeNr) > xmlParserMaxDepth) && |
1968 | 103k | ((ctxt->options & XML_PARSE_HUGE) == 0)) { |
1969 | 3 | xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR, |
1970 | 3 | "Excessive depth in document: %d use XML_PARSE_HUGE option\n", |
1971 | 3 | xmlParserMaxDepth); |
1972 | 3 | xmlHaltParser(ctxt); |
1973 | 3 | return(-1); |
1974 | 3 | } |
1975 | 103k | ctxt->nodeTab[ctxt->nodeNr] = value; |
1976 | 103k | ctxt->node = value; |
1977 | 103k | return (ctxt->nodeNr++); |
1978 | 103k | } |
1979 | | |
1980 | | /** |
1981 | | * nodePop: |
1982 | | * @ctxt: an XML parser context |
1983 | | * |
1984 | | * DEPRECATED: Internal function, do not use. |
1985 | | * |
1986 | | * Pops the top element node from the node stack |
1987 | | * |
1988 | | * Returns the node just removed |
1989 | | */ |
1990 | | xmlNodePtr |
1991 | | nodePop(xmlParserCtxtPtr ctxt) |
1992 | 112k | { |
1993 | 112k | xmlNodePtr ret; |
1994 | | |
1995 | 112k | if (ctxt == NULL) return(NULL); |
1996 | 112k | if (ctxt->nodeNr <= 0) |
1997 | 26.2k | return (NULL); |
1998 | 85.7k | ctxt->nodeNr--; |
1999 | 85.7k | if (ctxt->nodeNr > 0) |
2000 | 80.6k | ctxt->node = ctxt->nodeTab[ctxt->nodeNr - 1]; |
2001 | 5.16k | else |
2002 | 5.16k | ctxt->node = NULL; |
2003 | 85.7k | ret = ctxt->nodeTab[ctxt->nodeNr]; |
2004 | 85.7k | ctxt->nodeTab[ctxt->nodeNr] = NULL; |
2005 | 85.7k | return (ret); |
2006 | 112k | } |
2007 | | |
2008 | | /** |
2009 | | * nameNsPush: |
2010 | | * @ctxt: an XML parser context |
2011 | | * @value: the element name |
2012 | | * @prefix: the element prefix |
2013 | | * @URI: the element namespace name |
2014 | | * @line: the current line number for error messages |
2015 | | * @nsNr: the number of namespaces pushed on the namespace table |
2016 | | * |
2017 | | * Pushes a new element name/prefix/URL on top of the name stack |
2018 | | * |
2019 | | * Returns -1 in case of error, the index in the stack otherwise |
2020 | | */ |
2021 | | static int |
2022 | | nameNsPush(xmlParserCtxtPtr ctxt, const xmlChar * value, |
2023 | | const xmlChar *prefix, const xmlChar *URI, int line, int nsNr) |
2024 | 141k | { |
2025 | 141k | xmlStartTag *tag; |
2026 | | |
2027 | 141k | if (ctxt->nameNr >= ctxt->nameMax) { |
2028 | 446 | const xmlChar * *tmp; |
2029 | 446 | xmlStartTag *tmp2; |
2030 | 446 | ctxt->nameMax *= 2; |
2031 | 446 | tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab, |
2032 | 446 | ctxt->nameMax * |
2033 | 446 | sizeof(ctxt->nameTab[0])); |
2034 | 446 | if (tmp == NULL) { |
2035 | 0 | ctxt->nameMax /= 2; |
2036 | 0 | goto mem_error; |
2037 | 0 | } |
2038 | 446 | ctxt->nameTab = tmp; |
2039 | 446 | tmp2 = (xmlStartTag *) xmlRealloc((void * *)ctxt->pushTab, |
2040 | 446 | ctxt->nameMax * |
2041 | 446 | sizeof(ctxt->pushTab[0])); |
2042 | 446 | if (tmp2 == NULL) { |
2043 | 0 | ctxt->nameMax /= 2; |
2044 | 0 | goto mem_error; |
2045 | 0 | } |
2046 | 446 | ctxt->pushTab = tmp2; |
2047 | 141k | } else if (ctxt->pushTab == NULL) { |
2048 | 14.8k | ctxt->pushTab = (xmlStartTag *) xmlMalloc(ctxt->nameMax * |
2049 | 14.8k | sizeof(ctxt->pushTab[0])); |
2050 | 14.8k | if (ctxt->pushTab == NULL) |
2051 | 0 | goto mem_error; |
2052 | 14.8k | } |
2053 | 141k | ctxt->nameTab[ctxt->nameNr] = value; |
2054 | 141k | ctxt->name = value; |
2055 | 141k | tag = &ctxt->pushTab[ctxt->nameNr]; |
2056 | 141k | tag->prefix = prefix; |
2057 | 141k | tag->URI = URI; |
2058 | 141k | tag->line = line; |
2059 | 141k | tag->nsNr = nsNr; |
2060 | 141k | return (ctxt->nameNr++); |
2061 | 0 | mem_error: |
2062 | 0 | xmlErrMemory(ctxt, NULL); |
2063 | 0 | return (-1); |
2064 | 141k | } |
2065 | | #ifdef LIBXML_PUSH_ENABLED |
2066 | | /** |
2067 | | * nameNsPop: |
2068 | | * @ctxt: an XML parser context |
2069 | | * |
2070 | | * Pops the top element/prefix/URI name from the name stack |
2071 | | * |
2072 | | * Returns the name just removed |
2073 | | */ |
2074 | | static const xmlChar * |
2075 | | nameNsPop(xmlParserCtxtPtr ctxt) |
2076 | 0 | { |
2077 | 0 | const xmlChar *ret; |
2078 | |
|
2079 | 0 | if (ctxt->nameNr <= 0) |
2080 | 0 | return (NULL); |
2081 | 0 | ctxt->nameNr--; |
2082 | 0 | if (ctxt->nameNr > 0) |
2083 | 0 | ctxt->name = ctxt->nameTab[ctxt->nameNr - 1]; |
2084 | 0 | else |
2085 | 0 | ctxt->name = NULL; |
2086 | 0 | ret = ctxt->nameTab[ctxt->nameNr]; |
2087 | 0 | ctxt->nameTab[ctxt->nameNr] = NULL; |
2088 | 0 | return (ret); |
2089 | 0 | } |
2090 | | #endif /* LIBXML_PUSH_ENABLED */ |
2091 | | |
2092 | | /** |
2093 | | * namePush: |
2094 | | * @ctxt: an XML parser context |
2095 | | * @value: the element name |
2096 | | * |
2097 | | * DEPRECATED: Internal function, do not use. |
2098 | | * |
2099 | | * Pushes a new element name on top of the name stack |
2100 | | * |
2101 | | * Returns -1 in case of error, the index in the stack otherwise |
2102 | | */ |
2103 | | int |
2104 | | namePush(xmlParserCtxtPtr ctxt, const xmlChar * value) |
2105 | 0 | { |
2106 | 0 | if (ctxt == NULL) return (-1); |
2107 | | |
2108 | 0 | if (ctxt->nameNr >= ctxt->nameMax) { |
2109 | 0 | const xmlChar * *tmp; |
2110 | 0 | tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab, |
2111 | 0 | ctxt->nameMax * 2 * |
2112 | 0 | sizeof(ctxt->nameTab[0])); |
2113 | 0 | if (tmp == NULL) { |
2114 | 0 | goto mem_error; |
2115 | 0 | } |
2116 | 0 | ctxt->nameTab = tmp; |
2117 | 0 | ctxt->nameMax *= 2; |
2118 | 0 | } |
2119 | 0 | ctxt->nameTab[ctxt->nameNr] = value; |
2120 | 0 | ctxt->name = value; |
2121 | 0 | return (ctxt->nameNr++); |
2122 | 0 | mem_error: |
2123 | 0 | xmlErrMemory(ctxt, NULL); |
2124 | 0 | return (-1); |
2125 | 0 | } |
2126 | | |
2127 | | /** |
2128 | | * namePop: |
2129 | | * @ctxt: an XML parser context |
2130 | | * |
2131 | | * DEPRECATED: Internal function, do not use. |
2132 | | * |
2133 | | * Pops the top element name from the name stack |
2134 | | * |
2135 | | * Returns the name just removed |
2136 | | */ |
2137 | | const xmlChar * |
2138 | | namePop(xmlParserCtxtPtr ctxt) |
2139 | 123k | { |
2140 | 123k | const xmlChar *ret; |
2141 | | |
2142 | 123k | if ((ctxt == NULL) || (ctxt->nameNr <= 0)) |
2143 | 0 | return (NULL); |
2144 | 123k | ctxt->nameNr--; |
2145 | 123k | if (ctxt->nameNr > 0) |
2146 | 112k | ctxt->name = ctxt->nameTab[ctxt->nameNr - 1]; |
2147 | 11.1k | else |
2148 | 11.1k | ctxt->name = NULL; |
2149 | 123k | ret = ctxt->nameTab[ctxt->nameNr]; |
2150 | 123k | ctxt->nameTab[ctxt->nameNr] = NULL; |
2151 | 123k | return (ret); |
2152 | 123k | } |
2153 | | |
2154 | 166k | static int spacePush(xmlParserCtxtPtr ctxt, int val) { |
2155 | 166k | if (ctxt->spaceNr >= ctxt->spaceMax) { |
2156 | 480 | int *tmp; |
2157 | | |
2158 | 480 | ctxt->spaceMax *= 2; |
2159 | 480 | tmp = (int *) xmlRealloc(ctxt->spaceTab, |
2160 | 480 | ctxt->spaceMax * sizeof(ctxt->spaceTab[0])); |
2161 | 480 | if (tmp == NULL) { |
2162 | 0 | xmlErrMemory(ctxt, NULL); |
2163 | 0 | ctxt->spaceMax /=2; |
2164 | 0 | return(-1); |
2165 | 0 | } |
2166 | 480 | ctxt->spaceTab = tmp; |
2167 | 480 | } |
2168 | 166k | ctxt->spaceTab[ctxt->spaceNr] = val; |
2169 | 166k | ctxt->space = &ctxt->spaceTab[ctxt->spaceNr]; |
2170 | 166k | return(ctxt->spaceNr++); |
2171 | 166k | } |
2172 | | |
2173 | 147k | static int spacePop(xmlParserCtxtPtr ctxt) { |
2174 | 147k | int ret; |
2175 | 147k | if (ctxt->spaceNr <= 0) return(0); |
2176 | 147k | ctxt->spaceNr--; |
2177 | 147k | if (ctxt->spaceNr > 0) |
2178 | 147k | ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1]; |
2179 | 0 | else |
2180 | 0 | ctxt->space = &ctxt->spaceTab[0]; |
2181 | 147k | ret = ctxt->spaceTab[ctxt->spaceNr]; |
2182 | 147k | ctxt->spaceTab[ctxt->spaceNr] = -1; |
2183 | 147k | return(ret); |
2184 | 147k | } |
2185 | | |
2186 | | /* |
2187 | | * Macros for accessing the content. Those should be used only by the parser, |
2188 | | * and not exported. |
2189 | | * |
2190 | | * Dirty macros, i.e. one often need to make assumption on the context to |
2191 | | * use them |
2192 | | * |
2193 | | * CUR_PTR return the current pointer to the xmlChar to be parsed. |
2194 | | * To be used with extreme caution since operations consuming |
2195 | | * characters may move the input buffer to a different location ! |
2196 | | * CUR returns the current xmlChar value, i.e. a 8 bit value if compiled |
2197 | | * This should be used internally by the parser |
2198 | | * only to compare to ASCII values otherwise it would break when |
2199 | | * running with UTF-8 encoding. |
2200 | | * RAW same as CUR but in the input buffer, bypass any token |
2201 | | * extraction that may have been done |
2202 | | * NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only |
2203 | | * to compare on ASCII based substring. |
2204 | | * SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined |
2205 | | * strings without newlines within the parser. |
2206 | | * NEXT1(l) Skip 1 xmlChar, and must also be used only to skip 1 non-newline ASCII |
2207 | | * defined char within the parser. |
2208 | | * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding |
2209 | | * |
2210 | | * NEXT Skip to the next character, this does the proper decoding |
2211 | | * in UTF-8 mode. It also pop-up unfinished entities on the fly. |
2212 | | * NEXTL(l) Skip the current unicode character of l xmlChars long. |
2213 | | * CUR_CHAR(l) returns the current unicode character (int), set l |
2214 | | * to the number of xmlChars used for the encoding [0-5]. |
2215 | | * CUR_SCHAR same but operate on a string instead of the context |
2216 | | * COPY_BUF copy the current unicode char to the target buffer, increment |
2217 | | * the index |
2218 | | * GROW, SHRINK handling of input buffers |
2219 | | */ |
2220 | | |
2221 | 2.43M | #define RAW (*ctxt->input->cur) |
2222 | 1.89M | #define CUR (*ctxt->input->cur) |
2223 | 1.59M | #define NXT(val) ctxt->input->cur[(val)] |
2224 | 472k | #define CUR_PTR ctxt->input->cur |
2225 | 447k | #define BASE_PTR ctxt->input->base |
2226 | | |
2227 | | #define CMP4( s, c1, c2, c3, c4 ) \ |
2228 | 1.50M | ( ((unsigned char *) s)[ 0 ] == c1 && ((unsigned char *) s)[ 1 ] == c2 && \ |
2229 | 779k | ((unsigned char *) s)[ 2 ] == c3 && ((unsigned char *) s)[ 3 ] == c4 ) |
2230 | | #define CMP5( s, c1, c2, c3, c4, c5 ) \ |
2231 | 1.34M | ( CMP4( s, c1, c2, c3, c4 ) && ((unsigned char *) s)[ 4 ] == c5 ) |
2232 | | #define CMP6( s, c1, c2, c3, c4, c5, c6 ) \ |
2233 | 1.08M | ( CMP5( s, c1, c2, c3, c4, c5 ) && ((unsigned char *) s)[ 5 ] == c6 ) |
2234 | | #define CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) \ |
2235 | 866k | ( CMP6( s, c1, c2, c3, c4, c5, c6 ) && ((unsigned char *) s)[ 6 ] == c7 ) |
2236 | | #define CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) \ |
2237 | 704k | ( CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) && ((unsigned char *) s)[ 7 ] == c8 ) |
2238 | | #define CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) \ |
2239 | 319k | ( CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) && \ |
2240 | 319k | ((unsigned char *) s)[ 8 ] == c9 ) |
2241 | | #define CMP10( s, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10 ) \ |
2242 | 11.1k | ( CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) && \ |
2243 | 11.1k | ((unsigned char *) s)[ 9 ] == c10 ) |
2244 | | |
2245 | 329k | #define SKIP(val) do { \ |
2246 | 329k | ctxt->input->cur += (val),ctxt->input->col+=(val); \ |
2247 | 329k | if (*ctxt->input->cur == 0) \ |
2248 | 329k | xmlParserGrow(ctxt); \ |
2249 | 329k | } while (0) |
2250 | | |
2251 | 0 | #define SKIPL(val) do { \ |
2252 | 0 | int skipl; \ |
2253 | 0 | for(skipl=0; skipl<val; skipl++) { \ |
2254 | 0 | if (*(ctxt->input->cur) == '\n') { \ |
2255 | 0 | ctxt->input->line++; ctxt->input->col = 1; \ |
2256 | 0 | } else ctxt->input->col++; \ |
2257 | 0 | ctxt->input->cur++; \ |
2258 | 0 | } \ |
2259 | 0 | if (*ctxt->input->cur == 0) \ |
2260 | 0 | xmlParserGrow(ctxt); \ |
2261 | 0 | } while (0) |
2262 | | |
2263 | | /* Don't shrink push parser buffer. */ |
2264 | | #define SHRINK \ |
2265 | 400k | if (((ctxt->progressive == 0) || (ctxt->inputNr > 1)) && \ |
2266 | 400k | (ctxt->input->cur - ctxt->input->base > 2 * INPUT_CHUNK) && \ |
2267 | 400k | (ctxt->input->end - ctxt->input->cur < 2 * INPUT_CHUNK)) \ |
2268 | 400k | xmlParserShrink(ctxt); |
2269 | | |
2270 | 2.90M | #define GROW if (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK) \ |
2271 | 2.90M | xmlParserGrow(ctxt); |
2272 | | |
2273 | 1.03M | #define SKIP_BLANKS xmlSkipBlankChars(ctxt) |
2274 | | |
2275 | 818k | #define NEXT xmlNextChar(ctxt) |
2276 | | |
2277 | 211k | #define NEXT1 { \ |
2278 | 211k | ctxt->input->col++; \ |
2279 | 211k | ctxt->input->cur++; \ |
2280 | 211k | if (*ctxt->input->cur == 0) \ |
2281 | 211k | xmlParserGrow(ctxt); \ |
2282 | 211k | } |
2283 | | |
2284 | 1.28M | #define NEXTL(l) do { \ |
2285 | 1.28M | if (*(ctxt->input->cur) == '\n') { \ |
2286 | 2.93k | ctxt->input->line++; ctxt->input->col = 1; \ |
2287 | 1.28M | } else ctxt->input->col++; \ |
2288 | 1.28M | ctxt->input->cur += l; \ |
2289 | 1.28M | } while (0) |
2290 | | |
2291 | 1.68M | #define CUR_CHAR(l) xmlCurrentChar(ctxt, &l) |
2292 | 36.3M | #define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l) |
2293 | | |
2294 | | #define COPY_BUF(b, i, v) \ |
2295 | 36.3M | if (v < 0x80) b[i++] = v; \ |
2296 | 36.3M | else i += xmlCopyCharMultiByte(&b[i],v) |
2297 | | |
2298 | | /** |
2299 | | * xmlSkipBlankChars: |
2300 | | * @ctxt: the XML parser context |
2301 | | * |
2302 | | * DEPRECATED: Internal function, do not use. |
2303 | | * |
2304 | | * skip all blanks character found at that point in the input streams. |
2305 | | * It pops up finished entities in the process if allowable at that point. |
2306 | | * |
2307 | | * Returns the number of space chars skipped |
2308 | | */ |
2309 | | |
2310 | | int |
2311 | 1.03M | xmlSkipBlankChars(xmlParserCtxtPtr ctxt) { |
2312 | 1.03M | int res = 0; |
2313 | | |
2314 | | /* |
2315 | | * It's Okay to use CUR/NEXT here since all the blanks are on |
2316 | | * the ASCII range. |
2317 | | */ |
2318 | 1.03M | if (((ctxt->inputNr == 1) && (ctxt->instate != XML_PARSER_DTD)) || |
2319 | 1.03M | (ctxt->instate == XML_PARSER_START)) { |
2320 | 485k | const xmlChar *cur; |
2321 | | /* |
2322 | | * if we are in the document content, go really fast |
2323 | | */ |
2324 | 485k | cur = ctxt->input->cur; |
2325 | 485k | while (IS_BLANK_CH(*cur)) { |
2326 | 76.9k | if (*cur == '\n') { |
2327 | 471 | ctxt->input->line++; ctxt->input->col = 1; |
2328 | 76.4k | } else { |
2329 | 76.4k | ctxt->input->col++; |
2330 | 76.4k | } |
2331 | 76.9k | cur++; |
2332 | 76.9k | if (res < INT_MAX) |
2333 | 76.9k | res++; |
2334 | 76.9k | if (*cur == 0) { |
2335 | 91 | ctxt->input->cur = cur; |
2336 | 91 | xmlParserGrow(ctxt); |
2337 | 91 | cur = ctxt->input->cur; |
2338 | 91 | } |
2339 | 76.9k | } |
2340 | 485k | ctxt->input->cur = cur; |
2341 | 554k | } else { |
2342 | 554k | int expandPE = ((ctxt->external != 0) || (ctxt->inputNr != 1)); |
2343 | | |
2344 | 805k | while (ctxt->instate != XML_PARSER_EOF) { |
2345 | 805k | if (IS_BLANK_CH(CUR)) { /* CHECKED tstblanks.xml */ |
2346 | 160k | NEXT; |
2347 | 645k | } else if (CUR == '%') { |
2348 | | /* |
2349 | | * Need to handle support of entities branching here |
2350 | | */ |
2351 | 55.6k | if ((expandPE == 0) || (IS_BLANK_CH(NXT(1))) || (NXT(1) == 0)) |
2352 | 11.6k | break; |
2353 | 44.0k | xmlParsePEReference(ctxt); |
2354 | 589k | } else if (CUR == 0) { |
2355 | 46.8k | unsigned long consumed; |
2356 | 46.8k | xmlEntityPtr ent; |
2357 | | |
2358 | 46.8k | if (ctxt->inputNr <= 1) |
2359 | 122 | break; |
2360 | | |
2361 | 46.7k | consumed = ctxt->input->consumed; |
2362 | 46.7k | xmlSaturatedAddSizeT(&consumed, |
2363 | 46.7k | ctxt->input->cur - ctxt->input->base); |
2364 | | |
2365 | | /* |
2366 | | * Add to sizeentities when parsing an external entity |
2367 | | * for the first time. |
2368 | | */ |
2369 | 46.7k | ent = ctxt->input->entity; |
2370 | 46.7k | if ((ent->etype == XML_EXTERNAL_PARAMETER_ENTITY) && |
2371 | 46.7k | ((ent->flags & XML_ENT_PARSED) == 0)) { |
2372 | 0 | ent->flags |= XML_ENT_PARSED; |
2373 | |
|
2374 | 0 | xmlSaturatedAdd(&ctxt->sizeentities, consumed); |
2375 | 0 | } |
2376 | | |
2377 | 46.7k | xmlParserEntityCheck(ctxt, consumed); |
2378 | | |
2379 | 46.7k | xmlPopInput(ctxt); |
2380 | 542k | } else { |
2381 | 542k | break; |
2382 | 542k | } |
2383 | | |
2384 | | /* |
2385 | | * Also increase the counter when entering or exiting a PERef. |
2386 | | * The spec says: "When a parameter-entity reference is recognized |
2387 | | * in the DTD and included, its replacement text MUST be enlarged |
2388 | | * by the attachment of one leading and one following space (#x20) |
2389 | | * character." |
2390 | | */ |
2391 | 251k | if (res < INT_MAX) |
2392 | 251k | res++; |
2393 | 251k | } |
2394 | 554k | } |
2395 | 1.03M | return(res); |
2396 | 1.03M | } |
2397 | | |
2398 | | /************************************************************************ |
2399 | | * * |
2400 | | * Commodity functions to handle entities * |
2401 | | * * |
2402 | | ************************************************************************/ |
2403 | | |
2404 | | /** |
2405 | | * xmlPopInput: |
2406 | | * @ctxt: an XML parser context |
2407 | | * |
2408 | | * xmlPopInput: the current input pointed by ctxt->input came to an end |
2409 | | * pop it and return the next char. |
2410 | | * |
2411 | | * Returns the current xmlChar in the parser context |
2412 | | */ |
2413 | | xmlChar |
2414 | 46.7k | xmlPopInput(xmlParserCtxtPtr ctxt) { |
2415 | 46.7k | xmlParserInputPtr input; |
2416 | | |
2417 | 46.7k | if ((ctxt == NULL) || (ctxt->inputNr <= 1)) return(0); |
2418 | 46.7k | if (xmlParserDebugEntities) |
2419 | 0 | xmlGenericError(xmlGenericErrorContext, |
2420 | 0 | "Popping input %d\n", ctxt->inputNr); |
2421 | 46.7k | if ((ctxt->inputNr > 1) && (ctxt->inSubset == 0) && |
2422 | 46.7k | (ctxt->instate != XML_PARSER_EOF)) |
2423 | 0 | xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, |
2424 | 0 | "Unfinished entity outside the DTD"); |
2425 | 46.7k | input = inputPop(ctxt); |
2426 | 46.7k | if (input->entity != NULL) |
2427 | 46.7k | input->entity->flags &= ~XML_ENT_EXPANDING; |
2428 | 46.7k | xmlFreeInputStream(input); |
2429 | 46.7k | if (*ctxt->input->cur == 0) |
2430 | 410 | xmlParserGrow(ctxt); |
2431 | 46.7k | return(CUR); |
2432 | 46.7k | } |
2433 | | |
2434 | | /** |
2435 | | * xmlPushInput: |
2436 | | * @ctxt: an XML parser context |
2437 | | * @input: an XML parser input fragment (entity, XML fragment ...). |
2438 | | * |
2439 | | * xmlPushInput: switch to a new input stream which is stacked on top |
2440 | | * of the previous one(s). |
2441 | | * Returns -1 in case of error or the index in the input stack |
2442 | | */ |
2443 | | int |
2444 | 46.7k | xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) { |
2445 | 46.7k | int ret; |
2446 | 46.7k | if (input == NULL) return(-1); |
2447 | | |
2448 | 46.7k | if (xmlParserDebugEntities) { |
2449 | 0 | if ((ctxt->input != NULL) && (ctxt->input->filename)) |
2450 | 0 | xmlGenericError(xmlGenericErrorContext, |
2451 | 0 | "%s(%d): ", ctxt->input->filename, |
2452 | 0 | ctxt->input->line); |
2453 | 0 | xmlGenericError(xmlGenericErrorContext, |
2454 | 0 | "Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur); |
2455 | 0 | } |
2456 | 46.7k | if (((ctxt->inputNr > 40) && ((ctxt->options & XML_PARSE_HUGE) == 0)) || |
2457 | 46.7k | (ctxt->inputNr > 100)) { |
2458 | 0 | xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL); |
2459 | 0 | while (ctxt->inputNr > 1) |
2460 | 0 | xmlFreeInputStream(inputPop(ctxt)); |
2461 | 0 | return(-1); |
2462 | 0 | } |
2463 | 46.7k | ret = inputPush(ctxt, input); |
2464 | 46.7k | if (ctxt->instate == XML_PARSER_EOF) |
2465 | 0 | return(-1); |
2466 | 46.7k | GROW; |
2467 | 46.7k | return(ret); |
2468 | 46.7k | } |
2469 | | |
2470 | | /** |
2471 | | * xmlParseCharRef: |
2472 | | * @ctxt: an XML parser context |
2473 | | * |
2474 | | * DEPRECATED: Internal function, don't use. |
2475 | | * |
2476 | | * Parse a numeric character reference. Always consumes '&'. |
2477 | | * |
2478 | | * [66] CharRef ::= '&#' [0-9]+ ';' | |
2479 | | * '&#x' [0-9a-fA-F]+ ';' |
2480 | | * |
2481 | | * [ WFC: Legal Character ] |
2482 | | * Characters referred to using character references must match the |
2483 | | * production for Char. |
2484 | | * |
2485 | | * Returns the value parsed (as an int), 0 in case of error |
2486 | | */ |
2487 | | int |
2488 | 13.4k | xmlParseCharRef(xmlParserCtxtPtr ctxt) { |
2489 | 13.4k | int val = 0; |
2490 | 13.4k | int count = 0; |
2491 | | |
2492 | | /* |
2493 | | * Using RAW/CUR/NEXT is okay since we are working on ASCII range here |
2494 | | */ |
2495 | 13.4k | if ((RAW == '&') && (NXT(1) == '#') && |
2496 | 13.4k | (NXT(2) == 'x')) { |
2497 | 6.94k | SKIP(3); |
2498 | 6.94k | GROW; |
2499 | 30.5k | while (RAW != ';') { /* loop blocked by count */ |
2500 | 25.0k | if (count++ > 20) { |
2501 | 473 | count = 0; |
2502 | 473 | GROW; |
2503 | 473 | if (ctxt->instate == XML_PARSER_EOF) |
2504 | 0 | return(0); |
2505 | 473 | } |
2506 | 25.0k | if ((RAW >= '0') && (RAW <= '9')) |
2507 | 7.99k | val = val * 16 + (CUR - '0'); |
2508 | 17.0k | else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20)) |
2509 | 5.80k | val = val * 16 + (CUR - 'a') + 10; |
2510 | 11.2k | else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20)) |
2511 | 9.81k | val = val * 16 + (CUR - 'A') + 10; |
2512 | 1.45k | else { |
2513 | 1.45k | xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL); |
2514 | 1.45k | val = 0; |
2515 | 1.45k | break; |
2516 | 1.45k | } |
2517 | 23.6k | if (val > 0x110000) |
2518 | 5.39k | val = 0x110000; |
2519 | | |
2520 | 23.6k | NEXT; |
2521 | 23.6k | count++; |
2522 | 23.6k | } |
2523 | 6.94k | if (RAW == ';') { |
2524 | | /* on purpose to avoid reentrancy problems with NEXT and SKIP */ |
2525 | 5.48k | ctxt->input->col++; |
2526 | 5.48k | ctxt->input->cur++; |
2527 | 5.48k | } |
2528 | 6.94k | } else if ((RAW == '&') && (NXT(1) == '#')) { |
2529 | 6.55k | SKIP(2); |
2530 | 6.55k | GROW; |
2531 | 23.4k | while (RAW != ';') { /* loop blocked by count */ |
2532 | 18.8k | if (count++ > 20) { |
2533 | 473 | count = 0; |
2534 | 473 | GROW; |
2535 | 473 | if (ctxt->instate == XML_PARSER_EOF) |
2536 | 0 | return(0); |
2537 | 473 | } |
2538 | 18.8k | if ((RAW >= '0') && (RAW <= '9')) |
2539 | 16.9k | val = val * 10 + (CUR - '0'); |
2540 | 1.93k | else { |
2541 | 1.93k | xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL); |
2542 | 1.93k | val = 0; |
2543 | 1.93k | break; |
2544 | 1.93k | } |
2545 | 16.9k | if (val > 0x110000) |
2546 | 1.85k | val = 0x110000; |
2547 | | |
2548 | 16.9k | NEXT; |
2549 | 16.9k | count++; |
2550 | 16.9k | } |
2551 | 6.55k | if (RAW == ';') { |
2552 | | /* on purpose to avoid reentrancy problems with NEXT and SKIP */ |
2553 | 4.61k | ctxt->input->col++; |
2554 | 4.61k | ctxt->input->cur++; |
2555 | 4.61k | } |
2556 | 6.55k | } else { |
2557 | 0 | if (RAW == '&') |
2558 | 0 | SKIP(1); |
2559 | 0 | xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL); |
2560 | 0 | } |
2561 | | |
2562 | | /* |
2563 | | * [ WFC: Legal Character ] |
2564 | | * Characters referred to using character references must match the |
2565 | | * production for Char. |
2566 | | */ |
2567 | 13.4k | if (val >= 0x110000) { |
2568 | 300 | xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR, |
2569 | 300 | "xmlParseCharRef: character reference out of bounds\n", |
2570 | 300 | val); |
2571 | 13.1k | } else if (IS_CHAR(val)) { |
2572 | 9.05k | return(val); |
2573 | 9.05k | } else { |
2574 | 4.13k | xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR, |
2575 | 4.13k | "xmlParseCharRef: invalid xmlChar value %d\n", |
2576 | 4.13k | val); |
2577 | 4.13k | } |
2578 | 4.43k | return(0); |
2579 | 13.4k | } |
2580 | | |
2581 | | /** |
2582 | | * xmlParseStringCharRef: |
2583 | | * @ctxt: an XML parser context |
2584 | | * @str: a pointer to an index in the string |
2585 | | * |
2586 | | * parse Reference declarations, variant parsing from a string rather |
2587 | | * than an an input flow. |
2588 | | * |
2589 | | * [66] CharRef ::= '&#' [0-9]+ ';' | |
2590 | | * '&#x' [0-9a-fA-F]+ ';' |
2591 | | * |
2592 | | * [ WFC: Legal Character ] |
2593 | | * Characters referred to using character references must match the |
2594 | | * production for Char. |
2595 | | * |
2596 | | * Returns the value parsed (as an int), 0 in case of error, str will be |
2597 | | * updated to the current value of the index |
2598 | | */ |
2599 | | static int |
2600 | 6.37k | xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) { |
2601 | 6.37k | const xmlChar *ptr; |
2602 | 6.37k | xmlChar cur; |
2603 | 6.37k | int val = 0; |
2604 | | |
2605 | 6.37k | if ((str == NULL) || (*str == NULL)) return(0); |
2606 | 6.37k | ptr = *str; |
2607 | 6.37k | cur = *ptr; |
2608 | 6.37k | if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) { |
2609 | 2.52k | ptr += 3; |
2610 | 2.52k | cur = *ptr; |
2611 | 10.2k | while (cur != ';') { /* Non input consuming loop */ |
2612 | 8.18k | if ((cur >= '0') && (cur <= '9')) |
2613 | 2.47k | val = val * 16 + (cur - '0'); |
2614 | 5.70k | else if ((cur >= 'a') && (cur <= 'f')) |
2615 | 2.02k | val = val * 16 + (cur - 'a') + 10; |
2616 | 3.68k | else if ((cur >= 'A') && (cur <= 'F')) |
2617 | 3.22k | val = val * 16 + (cur - 'A') + 10; |
2618 | 462 | else { |
2619 | 462 | xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL); |
2620 | 462 | val = 0; |
2621 | 462 | break; |
2622 | 462 | } |
2623 | 7.71k | if (val > 0x110000) |
2624 | 584 | val = 0x110000; |
2625 | | |
2626 | 7.71k | ptr++; |
2627 | 7.71k | cur = *ptr; |
2628 | 7.71k | } |
2629 | 2.52k | if (cur == ';') |
2630 | 2.06k | ptr++; |
2631 | 3.85k | } else if ((cur == '&') && (ptr[1] == '#')){ |
2632 | 3.85k | ptr += 2; |
2633 | 3.85k | cur = *ptr; |
2634 | 10.8k | while (cur != ';') { /* Non input consuming loops */ |
2635 | 8.06k | if ((cur >= '0') && (cur <= '9')) |
2636 | 7.00k | val = val * 10 + (cur - '0'); |
2637 | 1.06k | else { |
2638 | 1.06k | xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL); |
2639 | 1.06k | val = 0; |
2640 | 1.06k | break; |
2641 | 1.06k | } |
2642 | 7.00k | if (val > 0x110000) |
2643 | 565 | val = 0x110000; |
2644 | | |
2645 | 7.00k | ptr++; |
2646 | 7.00k | cur = *ptr; |
2647 | 7.00k | } |
2648 | 3.85k | if (cur == ';') |
2649 | 2.79k | ptr++; |
2650 | 3.85k | } else { |
2651 | 0 | xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL); |
2652 | 0 | return(0); |
2653 | 0 | } |
2654 | 6.37k | *str = ptr; |
2655 | | |
2656 | | /* |
2657 | | * [ WFC: Legal Character ] |
2658 | | * Characters referred to using character references must match the |
2659 | | * production for Char. |
2660 | | */ |
2661 | 6.37k | if (val >= 0x110000) { |
2662 | 275 | xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR, |
2663 | 275 | "xmlParseStringCharRef: character reference out of bounds\n", |
2664 | 275 | val); |
2665 | 6.10k | } else if (IS_CHAR(val)) { |
2666 | 3.95k | return(val); |
2667 | 3.95k | } else { |
2668 | 2.15k | xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR, |
2669 | 2.15k | "xmlParseStringCharRef: invalid xmlChar value %d\n", |
2670 | 2.15k | val); |
2671 | 2.15k | } |
2672 | 2.42k | return(0); |
2673 | 6.37k | } |
2674 | | |
2675 | | /** |
2676 | | * xmlParserHandlePEReference: |
2677 | | * @ctxt: the parser context |
2678 | | * |
2679 | | * DEPRECATED: Internal function, do not use. |
2680 | | * |
2681 | | * [69] PEReference ::= '%' Name ';' |
2682 | | * |
2683 | | * [ WFC: No Recursion ] |
2684 | | * A parsed entity must not contain a recursive |
2685 | | * reference to itself, either directly or indirectly. |
2686 | | * |
2687 | | * [ WFC: Entity Declared ] |
2688 | | * In a document without any DTD, a document with only an internal DTD |
2689 | | * subset which contains no parameter entity references, or a document |
2690 | | * with "standalone='yes'", ... ... The declaration of a parameter |
2691 | | * entity must precede any reference to it... |
2692 | | * |
2693 | | * [ VC: Entity Declared ] |
2694 | | * In a document with an external subset or external parameter entities |
2695 | | * with "standalone='no'", ... ... The declaration of a parameter entity |
2696 | | * must precede any reference to it... |
2697 | | * |
2698 | | * [ WFC: In DTD ] |
2699 | | * Parameter-entity references may only appear in the DTD. |
2700 | | * NOTE: misleading but this is handled. |
2701 | | * |
2702 | | * A PEReference may have been detected in the current input stream |
2703 | | * the handling is done accordingly to |
2704 | | * http://www.w3.org/TR/REC-xml#entproc |
2705 | | * i.e. |
2706 | | * - Included in literal in entity values |
2707 | | * - Included as Parameter Entity reference within DTDs |
2708 | | */ |
2709 | | void |
2710 | 0 | xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) { |
2711 | 0 | switch(ctxt->instate) { |
2712 | 0 | case XML_PARSER_CDATA_SECTION: |
2713 | 0 | return; |
2714 | 0 | case XML_PARSER_COMMENT: |
2715 | 0 | return; |
2716 | 0 | case XML_PARSER_START_TAG: |
2717 | 0 | return; |
2718 | 0 | case XML_PARSER_END_TAG: |
2719 | 0 | return; |
2720 | 0 | case XML_PARSER_EOF: |
2721 | 0 | xmlFatalErr(ctxt, XML_ERR_PEREF_AT_EOF, NULL); |
2722 | 0 | return; |
2723 | 0 | case XML_PARSER_PROLOG: |
2724 | 0 | case XML_PARSER_START: |
2725 | 0 | case XML_PARSER_XML_DECL: |
2726 | 0 | case XML_PARSER_MISC: |
2727 | 0 | xmlFatalErr(ctxt, XML_ERR_PEREF_IN_PROLOG, NULL); |
2728 | 0 | return; |
2729 | 0 | case XML_PARSER_ENTITY_DECL: |
2730 | 0 | case XML_PARSER_CONTENT: |
2731 | 0 | case XML_PARSER_ATTRIBUTE_VALUE: |
2732 | 0 | case XML_PARSER_PI: |
2733 | 0 | case XML_PARSER_SYSTEM_LITERAL: |
2734 | 0 | case XML_PARSER_PUBLIC_LITERAL: |
2735 | | /* we just ignore it there */ |
2736 | 0 | return; |
2737 | 0 | case XML_PARSER_EPILOG: |
2738 | 0 | xmlFatalErr(ctxt, XML_ERR_PEREF_IN_EPILOG, NULL); |
2739 | 0 | return; |
2740 | 0 | case XML_PARSER_ENTITY_VALUE: |
2741 | | /* |
2742 | | * NOTE: in the case of entity values, we don't do the |
2743 | | * substitution here since we need the literal |
2744 | | * entity value to be able to save the internal |
2745 | | * subset of the document. |
2746 | | * This will be handled by xmlStringDecodeEntities |
2747 | | */ |
2748 | 0 | return; |
2749 | 0 | case XML_PARSER_DTD: |
2750 | | /* |
2751 | | * [WFC: Well-Formedness Constraint: PEs in Internal Subset] |
2752 | | * In the internal DTD subset, parameter-entity references |
2753 | | * can occur only where markup declarations can occur, not |
2754 | | * within markup declarations. |
2755 | | * In that case this is handled in xmlParseMarkupDecl |
2756 | | */ |
2757 | 0 | if ((ctxt->external == 0) && (ctxt->inputNr == 1)) |
2758 | 0 | return; |
2759 | 0 | if (IS_BLANK_CH(NXT(1)) || NXT(1) == 0) |
2760 | 0 | return; |
2761 | 0 | break; |
2762 | 0 | case XML_PARSER_IGNORE: |
2763 | 0 | return; |
2764 | 0 | } |
2765 | | |
2766 | 0 | xmlParsePEReference(ctxt); |
2767 | 0 | } |
2768 | | |
2769 | | /* |
2770 | | * Macro used to grow the current buffer. |
2771 | | * buffer##_size is expected to be a size_t |
2772 | | * mem_error: is expected to handle memory allocation failures |
2773 | | */ |
2774 | 154k | #define growBuffer(buffer, n) { \ |
2775 | 154k | xmlChar *tmp; \ |
2776 | 154k | size_t new_size = buffer##_size * 2 + n; \ |
2777 | 154k | if (new_size < buffer##_size) goto mem_error; \ |
2778 | 154k | tmp = (xmlChar *) xmlRealloc(buffer, new_size); \ |
2779 | 154k | if (tmp == NULL) goto mem_error; \ |
2780 | 154k | buffer = tmp; \ |
2781 | 154k | buffer##_size = new_size; \ |
2782 | 154k | } |
2783 | | |
2784 | | /** |
2785 | | * xmlStringDecodeEntitiesInt: |
2786 | | * @ctxt: the parser context |
2787 | | * @str: the input string |
2788 | | * @len: the string length |
2789 | | * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF |
2790 | | * @end: an end marker xmlChar, 0 if none |
2791 | | * @end2: an end marker xmlChar, 0 if none |
2792 | | * @end3: an end marker xmlChar, 0 if none |
2793 | | * @check: whether to perform entity checks |
2794 | | */ |
2795 | | static xmlChar * |
2796 | | xmlStringDecodeEntitiesInt(xmlParserCtxtPtr ctxt, const xmlChar *str, int len, |
2797 | | int what, xmlChar end, xmlChar end2, xmlChar end3, |
2798 | 444k | int check) { |
2799 | 444k | xmlChar *buffer = NULL; |
2800 | 444k | size_t buffer_size = 0; |
2801 | 444k | size_t nbchars = 0; |
2802 | | |
2803 | 444k | xmlChar *current = NULL; |
2804 | 444k | xmlChar *rep = NULL; |
2805 | 444k | const xmlChar *last; |
2806 | 444k | xmlEntityPtr ent; |
2807 | 444k | int c,l; |
2808 | | |
2809 | 444k | if (str == NULL) |
2810 | 0 | return(NULL); |
2811 | 444k | last = str + len; |
2812 | | |
2813 | 444k | if (((ctxt->depth > 40) && |
2814 | 444k | ((ctxt->options & XML_PARSE_HUGE) == 0)) || |
2815 | 444k | (ctxt->depth > 100)) { |
2816 | 0 | xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_LOOP, |
2817 | 0 | "Maximum entity nesting depth exceeded"); |
2818 | 0 | return(NULL); |
2819 | 0 | } |
2820 | | |
2821 | | /* |
2822 | | * allocate a translation buffer. |
2823 | | */ |
2824 | 444k | buffer_size = XML_PARSER_BIG_BUFFER_SIZE; |
2825 | 444k | buffer = (xmlChar *) xmlMallocAtomic(buffer_size); |
2826 | 444k | if (buffer == NULL) goto mem_error; |
2827 | | |
2828 | | /* |
2829 | | * OK loop until we reach one of the ending char or a size limit. |
2830 | | * we are operating on already parsed values. |
2831 | | */ |
2832 | 444k | if (str < last) |
2833 | 442k | c = CUR_SCHAR(str, l); |
2834 | 2.35k | else |
2835 | 2.35k | c = 0; |
2836 | 35.6M | while ((c != 0) && (c != end) && /* non input consuming loop */ |
2837 | 35.6M | (c != end2) && (c != end3) && |
2838 | 35.6M | (ctxt->instate != XML_PARSER_EOF)) { |
2839 | | |
2840 | 35.1M | if (c == 0) break; |
2841 | 35.1M | if ((c == '&') && (str[1] == '#')) { |
2842 | 6.37k | int val = xmlParseStringCharRef(ctxt, &str); |
2843 | 6.37k | if (val == 0) |
2844 | 2.42k | goto int_error; |
2845 | 3.95k | COPY_BUF(buffer, nbchars, val); |
2846 | 3.95k | if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) { |
2847 | 396 | growBuffer(buffer, XML_PARSER_BUFFER_SIZE); |
2848 | 396 | } |
2849 | 35.1M | } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) { |
2850 | 513k | if (xmlParserDebugEntities) |
2851 | 0 | xmlGenericError(xmlGenericErrorContext, |
2852 | 0 | "String decoding Entity Reference: %.30s\n", |
2853 | 0 | str); |
2854 | 513k | ent = xmlParseStringEntityRef(ctxt, &str); |
2855 | 513k | if ((ent != NULL) && |
2856 | 513k | (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) { |
2857 | 1.94k | if (ent->content != NULL) { |
2858 | 1.94k | COPY_BUF(buffer, nbchars, ent->content[0]); |
2859 | 1.94k | if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) { |
2860 | 392 | growBuffer(buffer, XML_PARSER_BUFFER_SIZE); |
2861 | 392 | } |
2862 | 1.94k | } else { |
2863 | 0 | xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR, |
2864 | 0 | "predefined entity has no content\n"); |
2865 | 0 | goto int_error; |
2866 | 0 | } |
2867 | 511k | } else if ((ent != NULL) && (ent->content != NULL)) { |
2868 | 386k | if ((check) && (xmlParserEntityCheck(ctxt, ent->length))) |
2869 | 0 | goto int_error; |
2870 | | |
2871 | 386k | if (ent->flags & XML_ENT_EXPANDING) { |
2872 | 149 | xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL); |
2873 | 149 | xmlHaltParser(ctxt); |
2874 | 149 | ent->content[0] = 0; |
2875 | 149 | goto int_error; |
2876 | 149 | } |
2877 | | |
2878 | 386k | ent->flags |= XML_ENT_EXPANDING; |
2879 | 386k | ctxt->depth++; |
2880 | 386k | rep = xmlStringDecodeEntitiesInt(ctxt, ent->content, |
2881 | 386k | ent->length, what, 0, 0, 0, check); |
2882 | 386k | ctxt->depth--; |
2883 | 386k | ent->flags &= ~XML_ENT_EXPANDING; |
2884 | | |
2885 | 386k | if (rep == NULL) { |
2886 | 133 | ent->content[0] = 0; |
2887 | 133 | goto int_error; |
2888 | 133 | } |
2889 | | |
2890 | 386k | current = rep; |
2891 | 113M | while (*current != 0) { /* non input consuming loop */ |
2892 | 112M | buffer[nbchars++] = *current++; |
2893 | 112M | if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) { |
2894 | 90.0k | growBuffer(buffer, XML_PARSER_BUFFER_SIZE); |
2895 | 90.0k | } |
2896 | 112M | } |
2897 | 386k | xmlFree(rep); |
2898 | 386k | rep = NULL; |
2899 | 386k | } else if (ent != NULL) { |
2900 | 8.12k | int i = xmlStrlen(ent->name); |
2901 | 8.12k | const xmlChar *cur = ent->name; |
2902 | | |
2903 | 8.12k | buffer[nbchars++] = '&'; |
2904 | 8.12k | if (nbchars + i + XML_PARSER_BUFFER_SIZE > buffer_size) { |
2905 | 470 | growBuffer(buffer, i + XML_PARSER_BUFFER_SIZE); |
2906 | 470 | } |
2907 | 16.4k | for (;i > 0;i--) |
2908 | 8.32k | buffer[nbchars++] = *cur++; |
2909 | 8.12k | buffer[nbchars++] = ';'; |
2910 | 8.12k | } |
2911 | 34.6M | } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) { |
2912 | 2.98k | if (xmlParserDebugEntities) |
2913 | 0 | xmlGenericError(xmlGenericErrorContext, |
2914 | 0 | "String decoding PE Reference: %.30s\n", str); |
2915 | 2.98k | ent = xmlParseStringPEReference(ctxt, &str); |
2916 | 2.98k | if (ent != NULL) { |
2917 | 1.45k | if (ent->content == NULL) { |
2918 | | /* |
2919 | | * Note: external parsed entities will not be loaded, |
2920 | | * it is not required for a non-validating parser to |
2921 | | * complete external PEReferences coming from the |
2922 | | * internal subset |
2923 | | */ |
2924 | 0 | if (((ctxt->options & XML_PARSE_NOENT) != 0) || |
2925 | 0 | ((ctxt->options & XML_PARSE_DTDVALID) != 0) || |
2926 | 0 | (ctxt->validate != 0)) { |
2927 | 0 | xmlLoadEntityContent(ctxt, ent); |
2928 | 0 | } else { |
2929 | 0 | xmlWarningMsg(ctxt, XML_ERR_ENTITY_PROCESSING, |
2930 | 0 | "not validating will not read content for PE entity %s\n", |
2931 | 0 | ent->name, NULL); |
2932 | 0 | } |
2933 | 0 | } |
2934 | | |
2935 | 1.45k | if ((check) && (xmlParserEntityCheck(ctxt, ent->length))) |
2936 | 0 | goto int_error; |
2937 | | |
2938 | 1.45k | if (ent->flags & XML_ENT_EXPANDING) { |
2939 | 1 | xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL); |
2940 | 1 | xmlHaltParser(ctxt); |
2941 | 1 | if (ent->content != NULL) |
2942 | 1 | ent->content[0] = 0; |
2943 | 1 | goto int_error; |
2944 | 1 | } |
2945 | | |
2946 | 1.45k | ent->flags |= XML_ENT_EXPANDING; |
2947 | 1.45k | ctxt->depth++; |
2948 | 1.45k | rep = xmlStringDecodeEntitiesInt(ctxt, ent->content, |
2949 | 1.45k | ent->length, what, 0, 0, 0, check); |
2950 | 1.45k | ctxt->depth--; |
2951 | 1.45k | ent->flags &= ~XML_ENT_EXPANDING; |
2952 | | |
2953 | 1.45k | if (rep == NULL) { |
2954 | 1 | if (ent->content != NULL) |
2955 | 1 | ent->content[0] = 0; |
2956 | 1 | goto int_error; |
2957 | 1 | } |
2958 | 1.45k | current = rep; |
2959 | 48.6k | while (*current != 0) { /* non input consuming loop */ |
2960 | 47.1k | buffer[nbchars++] = *current++; |
2961 | 47.1k | if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) { |
2962 | 398 | growBuffer(buffer, XML_PARSER_BUFFER_SIZE); |
2963 | 398 | } |
2964 | 47.1k | } |
2965 | 1.45k | xmlFree(rep); |
2966 | 1.45k | rep = NULL; |
2967 | 1.45k | } |
2968 | 34.6M | } else { |
2969 | 34.6M | COPY_BUF(buffer, nbchars, c); |
2970 | 34.6M | str += l; |
2971 | 34.6M | if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) { |
2972 | 211k | growBuffer(buffer, XML_PARSER_BUFFER_SIZE); |
2973 | 211k | } |
2974 | 34.6M | } |
2975 | 35.1M | if (str < last) |
2976 | 34.7M | c = CUR_SCHAR(str, l); |
2977 | 439k | else |
2978 | 439k | c = 0; |
2979 | 35.1M | } |
2980 | 441k | buffer[nbchars] = 0; |
2981 | 441k | return(buffer); |
2982 | | |
2983 | 0 | mem_error: |
2984 | 0 | xmlErrMemory(ctxt, NULL); |
2985 | 2.70k | int_error: |
2986 | 2.70k | if (rep != NULL) |
2987 | 0 | xmlFree(rep); |
2988 | 2.70k | if (buffer != NULL) |
2989 | 2.70k | xmlFree(buffer); |
2990 | 2.70k | return(NULL); |
2991 | 0 | } |
2992 | | |
2993 | | /** |
2994 | | * xmlStringLenDecodeEntities: |
2995 | | * @ctxt: the parser context |
2996 | | * @str: the input string |
2997 | | * @len: the string length |
2998 | | * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF |
2999 | | * @end: an end marker xmlChar, 0 if none |
3000 | | * @end2: an end marker xmlChar, 0 if none |
3001 | | * @end3: an end marker xmlChar, 0 if none |
3002 | | * |
3003 | | * DEPRECATED: Internal function, don't use. |
3004 | | * |
3005 | | * Takes a entity string content and process to do the adequate substitutions. |
3006 | | * |
3007 | | * [67] Reference ::= EntityRef | CharRef |
3008 | | * |
3009 | | * [69] PEReference ::= '%' Name ';' |
3010 | | * |
3011 | | * Returns A newly allocated string with the substitution done. The caller |
3012 | | * must deallocate it ! |
3013 | | */ |
3014 | | xmlChar * |
3015 | | xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len, |
3016 | | int what, xmlChar end, xmlChar end2, |
3017 | 0 | xmlChar end3) { |
3018 | 0 | if ((ctxt == NULL) || (str == NULL) || (len < 0)) |
3019 | 0 | return(NULL); |
3020 | 0 | return(xmlStringDecodeEntitiesInt(ctxt, str, len, what, |
3021 | 0 | end, end2, end3, 0)); |
3022 | 0 | } |
3023 | | |
3024 | | /** |
3025 | | * xmlStringDecodeEntities: |
3026 | | * @ctxt: the parser context |
3027 | | * @str: the input string |
3028 | | * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF |
3029 | | * @end: an end marker xmlChar, 0 if none |
3030 | | * @end2: an end marker xmlChar, 0 if none |
3031 | | * @end3: an end marker xmlChar, 0 if none |
3032 | | * |
3033 | | * DEPRECATED: Internal function, don't use. |
3034 | | * |
3035 | | * Takes a entity string content and process to do the adequate substitutions. |
3036 | | * |
3037 | | * [67] Reference ::= EntityRef | CharRef |
3038 | | * |
3039 | | * [69] PEReference ::= '%' Name ';' |
3040 | | * |
3041 | | * Returns A newly allocated string with the substitution done. The caller |
3042 | | * must deallocate it ! |
3043 | | */ |
3044 | | xmlChar * |
3045 | | xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what, |
3046 | 31.4k | xmlChar end, xmlChar end2, xmlChar end3) { |
3047 | 31.4k | if ((ctxt == NULL) || (str == NULL)) return(NULL); |
3048 | 31.4k | return(xmlStringDecodeEntitiesInt(ctxt, str, xmlStrlen(str), what, |
3049 | 31.4k | end, end2, end3, 0)); |
3050 | 31.4k | } |
3051 | | |
3052 | | /************************************************************************ |
3053 | | * * |
3054 | | * Commodity functions, cleanup needed ? * |
3055 | | * * |
3056 | | ************************************************************************/ |
3057 | | |
3058 | | /** |
3059 | | * areBlanks: |
3060 | | * @ctxt: an XML parser context |
3061 | | * @str: a xmlChar * |
3062 | | * @len: the size of @str |
3063 | | * @blank_chars: we know the chars are blanks |
3064 | | * |
3065 | | * Is this a sequence of blank chars that one can ignore ? |
3066 | | * |
3067 | | * Returns 1 if ignorable 0 otherwise. |
3068 | | */ |
3069 | | |
3070 | | static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len, |
3071 | 7.77k | int blank_chars) { |
3072 | 7.77k | int i, ret; |
3073 | 7.77k | xmlNodePtr lastChild; |
3074 | | |
3075 | | /* |
3076 | | * Don't spend time trying to differentiate them, the same callback is |
3077 | | * used ! |
3078 | | */ |
3079 | 7.77k | if (ctxt->sax->ignorableWhitespace == ctxt->sax->characters) |
3080 | 1.30k | return(0); |
3081 | | |
3082 | | /* |
3083 | | * Check for xml:space value. |
3084 | | */ |
3085 | 6.46k | if ((ctxt->space == NULL) || (*(ctxt->space) == 1) || |
3086 | 6.46k | (*(ctxt->space) == -2)) |
3087 | 1.89k | return(0); |
3088 | | |
3089 | | /* |
3090 | | * Check that the string is made of blanks |
3091 | | */ |
3092 | 4.56k | if (blank_chars == 0) { |
3093 | 8.26k | for (i = 0;i < len;i++) |
3094 | 6.63k | if (!(IS_BLANK_CH(str[i]))) return(0); |
3095 | 2.57k | } |
3096 | | |
3097 | | /* |
3098 | | * Look if the element is mixed content in the DTD if available |
3099 | | */ |
3100 | 3.62k | if (ctxt->node == NULL) return(0); |
3101 | 3.62k | if (ctxt->myDoc != NULL) { |
3102 | 3.62k | ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name); |
3103 | 3.62k | if (ret == 0) return(1); |
3104 | 3.42k | if (ret == 1) return(0); |
3105 | 3.42k | } |
3106 | | |
3107 | | /* |
3108 | | * Otherwise, heuristic :-\ |
3109 | | */ |
3110 | 3.23k | if ((RAW != '<') && (RAW != 0xD)) return(0); |
3111 | 2.71k | if ((ctxt->node->children == NULL) && |
3112 | 2.71k | (RAW == '<') && (NXT(1) == '/')) return(0); |
3113 | | |
3114 | 2.31k | lastChild = xmlGetLastChild(ctxt->node); |
3115 | 2.31k | if (lastChild == NULL) { |
3116 | 1.67k | if ((ctxt->node->type != XML_ELEMENT_NODE) && |
3117 | 1.67k | (ctxt->node->content != NULL)) return(0); |
3118 | 1.67k | } else if (xmlNodeIsText(lastChild)) |
3119 | 220 | return(0); |
3120 | 420 | else if ((ctxt->node->children != NULL) && |
3121 | 420 | (xmlNodeIsText(ctxt->node->children))) |
3122 | 81 | return(0); |
3123 | 2.01k | return(1); |
3124 | 2.31k | } |
3125 | | |
3126 | | /************************************************************************ |
3127 | | * * |
3128 | | * Extra stuff for namespace support * |
3129 | | * Relates to http://www.w3.org/TR/WD-xml-names * |
3130 | | * * |
3131 | | ************************************************************************/ |
3132 | | |
3133 | | /** |
3134 | | * xmlSplitQName: |
3135 | | * @ctxt: an XML parser context |
3136 | | * @name: an XML parser context |
3137 | | * @prefix: a xmlChar ** |
3138 | | * |
3139 | | * parse an UTF8 encoded XML qualified name string |
3140 | | * |
3141 | | * [NS 5] QName ::= (Prefix ':')? LocalPart |
3142 | | * |
3143 | | * [NS 6] Prefix ::= NCName |
3144 | | * |
3145 | | * [NS 7] LocalPart ::= NCName |
3146 | | * |
3147 | | * Returns the local part, and prefix is updated |
3148 | | * to get the Prefix if any. |
3149 | | */ |
3150 | | |
3151 | | xmlChar * |
3152 | 114k | xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) { |
3153 | 114k | xmlChar buf[XML_MAX_NAMELEN + 5]; |
3154 | 114k | xmlChar *buffer = NULL; |
3155 | 114k | int len = 0; |
3156 | 114k | int max = XML_MAX_NAMELEN; |
3157 | 114k | xmlChar *ret = NULL; |
3158 | 114k | const xmlChar *cur = name; |
3159 | 114k | int c; |
3160 | | |
3161 | 114k | if (prefix == NULL) return(NULL); |
3162 | 114k | *prefix = NULL; |
3163 | | |
3164 | 114k | if (cur == NULL) return(NULL); |
3165 | | |
3166 | | #ifndef XML_XML_NAMESPACE |
3167 | | /* xml: prefix is not really a namespace */ |
3168 | | if ((cur[0] == 'x') && (cur[1] == 'm') && |
3169 | | (cur[2] == 'l') && (cur[3] == ':')) |
3170 | | return(xmlStrdup(name)); |
3171 | | #endif |
3172 | | |
3173 | | /* nasty but well=formed */ |
3174 | 114k | if (cur[0] == ':') |
3175 | 11.8k | return(xmlStrdup(name)); |
3176 | | |
3177 | 102k | c = *cur++; |
3178 | 433k | while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */ |
3179 | 330k | buf[len++] = c; |
3180 | 330k | c = *cur++; |
3181 | 330k | } |
3182 | 102k | if (len >= max) { |
3183 | | /* |
3184 | | * Okay someone managed to make a huge name, so he's ready to pay |
3185 | | * for the processing speed. |
3186 | | */ |
3187 | 696 | max = len * 2; |
3188 | | |
3189 | 696 | buffer = (xmlChar *) xmlMallocAtomic(max); |
3190 | 696 | if (buffer == NULL) { |
3191 | 0 | xmlErrMemory(ctxt, NULL); |
3192 | 0 | return(NULL); |
3193 | 0 | } |
3194 | 696 | memcpy(buffer, buf, len); |
3195 | 59.3k | while ((c != 0) && (c != ':')) { /* tested bigname.xml */ |
3196 | 58.6k | if (len + 10 > max) { |
3197 | 309 | xmlChar *tmp; |
3198 | | |
3199 | 309 | max *= 2; |
3200 | 309 | tmp = (xmlChar *) xmlRealloc(buffer, max); |
3201 | 309 | if (tmp == NULL) { |
3202 | 0 | xmlFree(buffer); |
3203 | 0 | xmlErrMemory(ctxt, NULL); |
3204 | 0 | return(NULL); |
3205 | 0 | } |
3206 | 309 | buffer = tmp; |
3207 | 309 | } |
3208 | 58.6k | buffer[len++] = c; |
3209 | 58.6k | c = *cur++; |
3210 | 58.6k | } |
3211 | 696 | buffer[len] = 0; |
3212 | 696 | } |
3213 | | |
3214 | 102k | if ((c == ':') && (*cur == 0)) { |
3215 | 695 | if (buffer != NULL) |
3216 | 195 | xmlFree(buffer); |
3217 | 695 | *prefix = NULL; |
3218 | 695 | return(xmlStrdup(name)); |
3219 | 695 | } |
3220 | | |
3221 | 102k | if (buffer == NULL) |
3222 | 101k | ret = xmlStrndup(buf, len); |
3223 | 501 | else { |
3224 | 501 | ret = buffer; |
3225 | 501 | buffer = NULL; |
3226 | 501 | max = XML_MAX_NAMELEN; |
3227 | 501 | } |
3228 | | |
3229 | | |
3230 | 102k | if (c == ':') { |
3231 | 15.0k | c = *cur; |
3232 | 15.0k | *prefix = ret; |
3233 | 15.0k | if (c == 0) { |
3234 | 0 | return(xmlStrndup(BAD_CAST "", 0)); |
3235 | 0 | } |
3236 | 15.0k | len = 0; |
3237 | | |
3238 | | /* |
3239 | | * Check that the first character is proper to start |
3240 | | * a new name |
3241 | | */ |
3242 | 15.0k | if (!(((c >= 0x61) && (c <= 0x7A)) || |
3243 | 15.0k | ((c >= 0x41) && (c <= 0x5A)) || |
3244 | 15.0k | (c == '_') || (c == ':'))) { |
3245 | 4.56k | int l; |
3246 | 4.56k | int first = CUR_SCHAR(cur, l); |
3247 | | |
3248 | 4.56k | if (!IS_LETTER(first) && (first != '_')) { |
3249 | 2.85k | xmlFatalErrMsgStr(ctxt, XML_NS_ERR_QNAME, |
3250 | 2.85k | "Name %s is not XML Namespace compliant\n", |
3251 | 2.85k | name); |
3252 | 2.85k | } |
3253 | 4.56k | } |
3254 | 15.0k | cur++; |
3255 | | |
3256 | 124k | while ((c != 0) && (len < max)) { /* tested bigname2.xml */ |
3257 | 108k | buf[len++] = c; |
3258 | 108k | c = *cur++; |
3259 | 108k | } |
3260 | 15.0k | if (len >= max) { |
3261 | | /* |
3262 | | * Okay someone managed to make a huge name, so he's ready to pay |
3263 | | * for the processing speed. |
3264 | | */ |
3265 | 687 | max = len * 2; |
3266 | | |
3267 | 687 | buffer = (xmlChar *) xmlMallocAtomic(max); |
3268 | 687 | if (buffer == NULL) { |
3269 | 0 | xmlErrMemory(ctxt, NULL); |
3270 | 0 | return(NULL); |
3271 | 0 | } |
3272 | 687 | memcpy(buffer, buf, len); |
3273 | 35.7k | while (c != 0) { /* tested bigname2.xml */ |
3274 | 35.0k | if (len + 10 > max) { |
3275 | 286 | xmlChar *tmp; |
3276 | | |
3277 | 286 | max *= 2; |
3278 | 286 | tmp = (xmlChar *) xmlRealloc(buffer, max); |
3279 | 286 | if (tmp == NULL) { |
3280 | 0 | xmlErrMemory(ctxt, NULL); |
3281 | 0 | xmlFree(buffer); |
3282 | 0 | return(NULL); |
3283 | 0 | } |
3284 | 286 | buffer = tmp; |
3285 | 286 | } |
3286 | 35.0k | buffer[len++] = c; |
3287 | 35.0k | c = *cur++; |
3288 | 35.0k | } |
3289 | 687 | buffer[len] = 0; |
3290 | 687 | } |
3291 | | |
3292 | 15.0k | if (buffer == NULL) |
3293 | 14.4k | ret = xmlStrndup(buf, len); |
3294 | 687 | else { |
3295 | 687 | ret = buffer; |
3296 | 687 | } |
3297 | 15.0k | } |
3298 | | |
3299 | 102k | return(ret); |
3300 | 102k | } |
3301 | | |
3302 | | /************************************************************************ |
3303 | | * * |
3304 | | * The parser itself * |
3305 | | * Relates to http://www.w3.org/TR/REC-xml * |
3306 | | * * |
3307 | | ************************************************************************/ |
3308 | | |
3309 | | /************************************************************************ |
3310 | | * * |
3311 | | * Routines to parse Name, NCName and NmToken * |
3312 | | * * |
3313 | | ************************************************************************/ |
3314 | | |
3315 | | /* |
3316 | | * The two following functions are related to the change of accepted |
3317 | | * characters for Name and NmToken in the Revision 5 of XML-1.0 |
3318 | | * They correspond to the modified production [4] and the new production [4a] |
3319 | | * changes in that revision. Also note that the macros used for the |
3320 | | * productions Letter, Digit, CombiningChar and Extender are not needed |
3321 | | * anymore. |
3322 | | * We still keep compatibility to pre-revision5 parsing semantic if the |
3323 | | * new XML_PARSE_OLD10 option is given to the parser. |
3324 | | */ |
3325 | | static int |
3326 | 626k | xmlIsNameStartChar(xmlParserCtxtPtr ctxt, int c) { |
3327 | 626k | if ((ctxt->options & XML_PARSE_OLD10) == 0) { |
3328 | | /* |
3329 | | * Use the new checks of production [4] [4a] amd [5] of the |
3330 | | * Update 5 of XML-1.0 |
3331 | | */ |
3332 | 617k | if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */ |
3333 | 617k | (((c >= 'a') && (c <= 'z')) || |
3334 | 616k | ((c >= 'A') && (c <= 'Z')) || |
3335 | 616k | (c == '_') || (c == ':') || |
3336 | 616k | ((c >= 0xC0) && (c <= 0xD6)) || |
3337 | 616k | ((c >= 0xD8) && (c <= 0xF6)) || |
3338 | 616k | ((c >= 0xF8) && (c <= 0x2FF)) || |
3339 | 616k | ((c >= 0x370) && (c <= 0x37D)) || |
3340 | 616k | ((c >= 0x37F) && (c <= 0x1FFF)) || |
3341 | 616k | ((c >= 0x200C) && (c <= 0x200D)) || |
3342 | 616k | ((c >= 0x2070) && (c <= 0x218F)) || |
3343 | 616k | ((c >= 0x2C00) && (c <= 0x2FEF)) || |
3344 | 616k | ((c >= 0x3001) && (c <= 0xD7FF)) || |
3345 | 616k | ((c >= 0xF900) && (c <= 0xFDCF)) || |
3346 | 616k | ((c >= 0xFDF0) && (c <= 0xFFFD)) || |
3347 | 616k | ((c >= 0x10000) && (c <= 0xEFFFF)))) |
3348 | 564k | return(1); |
3349 | 617k | } else { |
3350 | 9.58k | if (IS_LETTER(c) || (c == '_') || (c == ':')) |
3351 | 7.22k | return(1); |
3352 | 9.58k | } |
3353 | 55.6k | return(0); |
3354 | 626k | } |
3355 | | |
3356 | | static int |
3357 | 904k | xmlIsNameChar(xmlParserCtxtPtr ctxt, int c) { |
3358 | 904k | if ((ctxt->options & XML_PARSE_OLD10) == 0) { |
3359 | | /* |
3360 | | * Use the new checks of production [4] [4a] amd [5] of the |
3361 | | * Update 5 of XML-1.0 |
3362 | | */ |
3363 | 888k | if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */ |
3364 | 888k | (((c >= 'a') && (c <= 'z')) || |
3365 | 876k | ((c >= 'A') && (c <= 'Z')) || |
3366 | 876k | ((c >= '0') && (c <= '9')) || /* !start */ |
3367 | 876k | (c == '_') || (c == ':') || |
3368 | 876k | (c == '-') || (c == '.') || (c == 0xB7) || /* !start */ |
3369 | 876k | ((c >= 0xC0) && (c <= 0xD6)) || |
3370 | 876k | ((c >= 0xD8) && (c <= 0xF6)) || |
3371 | 876k | ((c >= 0xF8) && (c <= 0x2FF)) || |
3372 | 876k | ((c >= 0x300) && (c <= 0x36F)) || /* !start */ |
3373 | 876k | ((c >= 0x370) && (c <= 0x37D)) || |
3374 | 876k | ((c >= 0x37F) && (c <= 0x1FFF)) || |
3375 | 876k | ((c >= 0x200C) && (c <= 0x200D)) || |
3376 | 876k | ((c >= 0x203F) && (c <= 0x2040)) || /* !start */ |
3377 | 876k | ((c >= 0x2070) && (c <= 0x218F)) || |
3378 | 876k | ((c >= 0x2C00) && (c <= 0x2FEF)) || |
3379 | 876k | ((c >= 0x3001) && (c <= 0xD7FF)) || |
3380 | 876k | ((c >= 0xF900) && (c <= 0xFDCF)) || |
3381 | 876k | ((c >= 0xFDF0) && (c <= 0xFFFD)) || |
3382 | 876k | ((c >= 0x10000) && (c <= 0xEFFFF)))) |
3383 | 315k | return(1); |
3384 | 888k | } else { |
3385 | 15.6k | if ((IS_LETTER(c)) || (IS_DIGIT(c)) || |
3386 | 15.6k | (c == '.') || (c == '-') || |
3387 | 15.6k | (c == '_') || (c == ':') || |
3388 | 15.6k | (IS_COMBINING(c)) || |
3389 | 15.6k | (IS_EXTENDER(c))) |
3390 | 7.71k | return(1); |
3391 | 15.6k | } |
3392 | 581k | return(0); |
3393 | 904k | } |
3394 | | |
3395 | | static xmlChar * xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, |
3396 | | int *len, int *alloc, int normalize); |
3397 | | |
3398 | | static const xmlChar * |
3399 | 111k | xmlParseNameComplex(xmlParserCtxtPtr ctxt) { |
3400 | 111k | int len = 0, l; |
3401 | 111k | int c; |
3402 | 111k | int maxLength = (ctxt->options & XML_PARSE_HUGE) ? |
3403 | 0 | XML_MAX_TEXT_LENGTH : |
3404 | 111k | XML_MAX_NAME_LENGTH; |
3405 | | |
3406 | | /* |
3407 | | * Handler for more complex cases |
3408 | | */ |
3409 | 111k | c = CUR_CHAR(l); |
3410 | 111k | if ((ctxt->options & XML_PARSE_OLD10) == 0) { |
3411 | | /* |
3412 | | * Use the new checks of production [4] [4a] amd [5] of the |
3413 | | * Update 5 of XML-1.0 |
3414 | | */ |
3415 | 102k | if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */ |
3416 | 102k | (!(((c >= 'a') && (c <= 'z')) || |
3417 | 86.5k | ((c >= 'A') && (c <= 'Z')) || |
3418 | 86.5k | (c == '_') || (c == ':') || |
3419 | 86.5k | ((c >= 0xC0) && (c <= 0xD6)) || |
3420 | 86.5k | ((c >= 0xD8) && (c <= 0xF6)) || |
3421 | 86.5k | ((c >= 0xF8) && (c <= 0x2FF)) || |
3422 | 86.5k | ((c >= 0x370) && (c <= 0x37D)) || |
3423 | 86.5k | ((c >= 0x37F) && (c <= 0x1FFF)) || |
3424 | 86.5k | ((c >= 0x200C) && (c <= 0x200D)) || |
3425 | 86.5k | ((c >= 0x2070) && (c <= 0x218F)) || |
3426 | 86.5k | ((c >= 0x2C00) && (c <= 0x2FEF)) || |
3427 | 86.5k | ((c >= 0x3001) && (c <= 0xD7FF)) || |
3428 | 86.5k | ((c >= 0xF900) && (c <= 0xFDCF)) || |
3429 | 86.5k | ((c >= 0xFDF0) && (c <= 0xFFFD)) || |
3430 | 93.6k | ((c >= 0x10000) && (c <= 0xEFFFF))))) { |
3431 | 93.6k | return(NULL); |
3432 | 93.6k | } |
3433 | 8.58k | len += l; |
3434 | 8.58k | NEXTL(l); |
3435 | 8.58k | c = CUR_CHAR(l); |
3436 | 116k | while ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */ |
3437 | 116k | (((c >= 'a') && (c <= 'z')) || |
3438 | 114k | ((c >= 'A') && (c <= 'Z')) || |
3439 | 114k | ((c >= '0') && (c <= '9')) || /* !start */ |
3440 | 114k | (c == '_') || (c == ':') || |
3441 | 114k | (c == '-') || (c == '.') || (c == 0xB7) || /* !start */ |
3442 | 114k | ((c >= 0xC0) && (c <= 0xD6)) || |
3443 | 114k | ((c >= 0xD8) && (c <= 0xF6)) || |
3444 | 114k | ((c >= 0xF8) && (c <= 0x2FF)) || |
3445 | 114k | ((c >= 0x300) && (c <= 0x36F)) || /* !start */ |
3446 | 114k | ((c >= 0x370) && (c <= 0x37D)) || |
3447 | 114k | ((c >= 0x37F) && (c <= 0x1FFF)) || |
3448 | 114k | ((c >= 0x200C) && (c <= 0x200D)) || |
3449 | 114k | ((c >= 0x203F) && (c <= 0x2040)) || /* !start */ |
3450 | 114k | ((c >= 0x2070) && (c <= 0x218F)) || |
3451 | 114k | ((c >= 0x2C00) && (c <= 0x2FEF)) || |
3452 | 114k | ((c >= 0x3001) && (c <= 0xD7FF)) || |
3453 | 114k | ((c >= 0xF900) && (c <= 0xFDCF)) || |
3454 | 114k | ((c >= 0xFDF0) && (c <= 0xFFFD)) || |
3455 | 114k | ((c >= 0x10000) && (c <= 0xEFFFF)) |
3456 | 114k | )) { |
3457 | 108k | if (len <= INT_MAX - l) |
3458 | 108k | len += l; |
3459 | 108k | NEXTL(l); |
3460 | 108k | c = CUR_CHAR(l); |
3461 | 108k | } |
3462 | 8.93k | } else { |
3463 | 8.93k | if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */ |
3464 | 8.93k | (!IS_LETTER(c) && (c != '_') && |
3465 | 7.84k | (c != ':'))) { |
3466 | 5.64k | return(NULL); |
3467 | 5.64k | } |
3468 | 3.28k | len += l; |
3469 | 3.28k | NEXTL(l); |
3470 | 3.28k | c = CUR_CHAR(l); |
3471 | | |
3472 | 12.1k | while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */ |
3473 | 12.1k | ((IS_LETTER(c)) || (IS_DIGIT(c)) || |
3474 | 11.1k | (c == '.') || (c == '-') || |
3475 | 11.1k | (c == '_') || (c == ':') || |
3476 | 11.1k | (IS_COMBINING(c)) || |
3477 | 11.1k | (IS_EXTENDER(c)))) { |
3478 | 8.84k | if (len <= INT_MAX - l) |
3479 | 8.84k | len += l; |
3480 | 8.84k | NEXTL(l); |
3481 | 8.84k | c = CUR_CHAR(l); |
3482 | 8.84k | } |
3483 | 3.28k | } |
3484 | 11.8k | if (ctxt->instate == XML_PARSER_EOF) |
3485 | 0 | return(NULL); |
3486 | 11.8k | if (len > maxLength) { |
3487 | 0 | xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name"); |
3488 | 0 | return(NULL); |
3489 | 0 | } |
3490 | 11.8k | if (ctxt->input->cur - ctxt->input->base < len) { |
3491 | | /* |
3492 | | * There were a couple of bugs where PERefs lead to to a change |
3493 | | * of the buffer. Check the buffer size to avoid passing an invalid |
3494 | | * pointer to xmlDictLookup. |
3495 | | */ |
3496 | 0 | xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, |
3497 | 0 | "unexpected change of input buffer"); |
3498 | 0 | return (NULL); |
3499 | 0 | } |
3500 | 11.8k | if ((*ctxt->input->cur == '\n') && (ctxt->input->cur[-1] == '\r')) |
3501 | 0 | return(xmlDictLookup(ctxt->dict, ctxt->input->cur - (len + 1), len)); |
3502 | 11.8k | return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len)); |
3503 | 11.8k | } |
3504 | | |
3505 | | /** |
3506 | | * xmlParseName: |
3507 | | * @ctxt: an XML parser context |
3508 | | * |
3509 | | * DEPRECATED: Internal function, don't use. |
3510 | | * |
3511 | | * parse an XML name. |
3512 | | * |
3513 | | * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' | |
3514 | | * CombiningChar | Extender |
3515 | | * |
3516 | | * [5] Name ::= (Letter | '_' | ':') (NameChar)* |
3517 | | * |
3518 | | * [6] Names ::= Name (#x20 Name)* |
3519 | | * |
3520 | | * Returns the Name parsed or NULL |
3521 | | */ |
3522 | | |
3523 | | const xmlChar * |
3524 | 418k | xmlParseName(xmlParserCtxtPtr ctxt) { |
3525 | 418k | const xmlChar *in; |
3526 | 418k | const xmlChar *ret; |
3527 | 418k | size_t count = 0; |
3528 | 418k | size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ? |
3529 | 0 | XML_MAX_TEXT_LENGTH : |
3530 | 418k | XML_MAX_NAME_LENGTH; |
3531 | | |
3532 | 418k | GROW; |
3533 | 418k | if (ctxt->instate == XML_PARSER_EOF) |
3534 | 2 | return(NULL); |
3535 | | |
3536 | | /* |
3537 | | * Accelerator for simple ASCII names |
3538 | | */ |
3539 | 418k | in = ctxt->input->cur; |
3540 | 418k | if (((*in >= 0x61) && (*in <= 0x7A)) || |
3541 | 418k | ((*in >= 0x41) && (*in <= 0x5A)) || |
3542 | 418k | (*in == '_') || (*in == ':')) { |
3543 | 312k | in++; |
3544 | 590k | while (((*in >= 0x61) && (*in <= 0x7A)) || |
3545 | 590k | ((*in >= 0x41) && (*in <= 0x5A)) || |
3546 | 590k | ((*in >= 0x30) && (*in <= 0x39)) || |
3547 | 590k | (*in == '_') || (*in == '-') || |
3548 | 590k | (*in == ':') || (*in == '.')) |
3549 | 278k | in++; |
3550 | 312k | if ((*in > 0) && (*in < 0x80)) { |
3551 | 307k | count = in - ctxt->input->cur; |
3552 | 307k | if (count > maxLength) { |
3553 | 0 | xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name"); |
3554 | 0 | return(NULL); |
3555 | 0 | } |
3556 | 307k | ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count); |
3557 | 307k | ctxt->input->cur = in; |
3558 | 307k | ctxt->input->col += count; |
3559 | 307k | if (ret == NULL) |
3560 | 0 | xmlErrMemory(ctxt, NULL); |
3561 | 307k | return(ret); |
3562 | 307k | } |
3563 | 312k | } |
3564 | | /* accelerator for special cases */ |
3565 | 111k | return(xmlParseNameComplex(ctxt)); |
3566 | 418k | } |
3567 | | |
3568 | | static xmlHashedString |
3569 | 99.4k | xmlParseNCNameComplex(xmlParserCtxtPtr ctxt) { |
3570 | 99.4k | xmlHashedString ret; |
3571 | 99.4k | int len = 0, l; |
3572 | 99.4k | int c; |
3573 | 99.4k | int maxLength = (ctxt->options & XML_PARSE_HUGE) ? |
3574 | 0 | XML_MAX_TEXT_LENGTH : |
3575 | 99.4k | XML_MAX_NAME_LENGTH; |
3576 | 99.4k | size_t startPosition = 0; |
3577 | | |
3578 | 99.4k | ret.name = NULL; |
3579 | 99.4k | ret.hashValue = 0; |
3580 | | |
3581 | | /* |
3582 | | * Handler for more complex cases |
3583 | | */ |
3584 | 99.4k | startPosition = CUR_PTR - BASE_PTR; |
3585 | 99.4k | c = CUR_CHAR(l); |
3586 | 99.4k | if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */ |
3587 | 99.4k | (!xmlIsNameStartChar(ctxt, c) || (c == ':'))) { |
3588 | 93.6k | return(ret); |
3589 | 93.6k | } |
3590 | | |
3591 | 98.1k | while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */ |
3592 | 98.1k | (xmlIsNameChar(ctxt, c) && (c != ':'))) { |
3593 | 92.3k | if (len <= INT_MAX - l) |
3594 | 92.3k | len += l; |
3595 | 92.3k | NEXTL(l); |
3596 | 92.3k | c = CUR_CHAR(l); |
3597 | 92.3k | } |
3598 | 5.80k | if (ctxt->instate == XML_PARSER_EOF) |
3599 | 0 | return(ret); |
3600 | 5.80k | if (len > maxLength) { |
3601 | 0 | xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName"); |
3602 | 0 | return(ret); |
3603 | 0 | } |
3604 | 5.80k | ret = xmlDictLookupHashed(ctxt->dict, (BASE_PTR + startPosition), len); |
3605 | 5.80k | return(ret); |
3606 | 5.80k | } |
3607 | | |
3608 | | /** |
3609 | | * xmlParseNCName: |
3610 | | * @ctxt: an XML parser context |
3611 | | * @len: length of the string parsed |
3612 | | * |
3613 | | * parse an XML name. |
3614 | | * |
3615 | | * [4NS] NCNameChar ::= Letter | Digit | '.' | '-' | '_' | |
3616 | | * CombiningChar | Extender |
3617 | | * |
3618 | | * [5NS] NCName ::= (Letter | '_') (NCNameChar)* |
3619 | | * |
3620 | | * Returns the Name parsed or NULL |
3621 | | */ |
3622 | | |
3623 | | static xmlHashedString |
3624 | 195k | xmlParseNCName(xmlParserCtxtPtr ctxt) { |
3625 | 195k | const xmlChar *in, *e; |
3626 | 195k | xmlHashedString ret; |
3627 | 195k | size_t count = 0; |
3628 | 195k | size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ? |
3629 | 0 | XML_MAX_TEXT_LENGTH : |
3630 | 195k | XML_MAX_NAME_LENGTH; |
3631 | | |
3632 | 195k | ret.name = NULL; |
3633 | | |
3634 | | /* |
3635 | | * Accelerator for simple ASCII names |
3636 | | */ |
3637 | 195k | in = ctxt->input->cur; |
3638 | 195k | e = ctxt->input->end; |
3639 | 195k | if ((((*in >= 0x61) && (*in <= 0x7A)) || |
3640 | 195k | ((*in >= 0x41) && (*in <= 0x5A)) || |
3641 | 195k | (*in == '_')) && (in < e)) { |
3642 | 97.3k | in++; |
3643 | 178k | while ((((*in >= 0x61) && (*in <= 0x7A)) || |
3644 | 178k | ((*in >= 0x41) && (*in <= 0x5A)) || |
3645 | 178k | ((*in >= 0x30) && (*in <= 0x39)) || |
3646 | 178k | (*in == '_') || (*in == '-') || |
3647 | 178k | (*in == '.')) && (in < e)) |
3648 | 81.5k | in++; |
3649 | 97.3k | if (in >= e) |
3650 | 22 | goto complex; |
3651 | 97.3k | if ((*in > 0) && (*in < 0x80)) { |
3652 | 96.2k | count = in - ctxt->input->cur; |
3653 | 96.2k | if (count > maxLength) { |
3654 | 0 | xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName"); |
3655 | 0 | return(ret); |
3656 | 0 | } |
3657 | 96.2k | ret = xmlDictLookupHashed(ctxt->dict, ctxt->input->cur, count); |
3658 | 96.2k | ctxt->input->cur = in; |
3659 | 96.2k | ctxt->input->col += count; |
3660 | 96.2k | if (ret.name == NULL) { |
3661 | 0 | xmlErrMemory(ctxt, NULL); |
3662 | 0 | } |
3663 | 96.2k | return(ret); |
3664 | 96.2k | } |
3665 | 97.3k | } |
3666 | 99.4k | complex: |
3667 | 99.4k | return(xmlParseNCNameComplex(ctxt)); |
3668 | 195k | } |
3669 | | |
3670 | | /** |
3671 | | * xmlParseNameAndCompare: |
3672 | | * @ctxt: an XML parser context |
3673 | | * |
3674 | | * parse an XML name and compares for match |
3675 | | * (specialized for endtag parsing) |
3676 | | * |
3677 | | * Returns NULL for an illegal name, (xmlChar*) 1 for success |
3678 | | * and the name for mismatch |
3679 | | */ |
3680 | | |
3681 | | static const xmlChar * |
3682 | 14.5k | xmlParseNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *other) { |
3683 | 14.5k | register const xmlChar *cmp = other; |
3684 | 14.5k | register const xmlChar *in; |
3685 | 14.5k | const xmlChar *ret; |
3686 | | |
3687 | 14.5k | GROW; |
3688 | 14.5k | if (ctxt->instate == XML_PARSER_EOF) |
3689 | 0 | return(NULL); |
3690 | | |
3691 | 14.5k | in = ctxt->input->cur; |
3692 | 52.2k | while (*in != 0 && *in == *cmp) { |
3693 | 37.7k | ++in; |
3694 | 37.7k | ++cmp; |
3695 | 37.7k | } |
3696 | 14.5k | if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) { |
3697 | | /* success */ |
3698 | 2.15k | ctxt->input->col += in - ctxt->input->cur; |
3699 | 2.15k | ctxt->input->cur = in; |
3700 | 2.15k | return (const xmlChar*) 1; |
3701 | 2.15k | } |
3702 | | /* failure (or end of input buffer), check with full function */ |
3703 | 12.3k | ret = xmlParseName (ctxt); |
3704 | | /* strings coming from the dictionary direct compare possible */ |
3705 | 12.3k | if (ret == other) { |
3706 | 618 | return (const xmlChar*) 1; |
3707 | 618 | } |
3708 | 11.7k | return ret; |
3709 | 12.3k | } |
3710 | | |
3711 | | /** |
3712 | | * xmlParseStringName: |
3713 | | * @ctxt: an XML parser context |
3714 | | * @str: a pointer to the string pointer (IN/OUT) |
3715 | | * |
3716 | | * parse an XML name. |
3717 | | * |
3718 | | * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' | |
3719 | | * CombiningChar | Extender |
3720 | | * |
3721 | | * [5] Name ::= (Letter | '_' | ':') (NameChar)* |
3722 | | * |
3723 | | * [6] Names ::= Name (#x20 Name)* |
3724 | | * |
3725 | | * Returns the Name parsed or NULL. The @str pointer |
3726 | | * is updated to the current location in the string. |
3727 | | */ |
3728 | | |
3729 | | static xmlChar * |
3730 | 535k | xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) { |
3731 | 535k | xmlChar buf[XML_MAX_NAMELEN + 5]; |
3732 | 535k | const xmlChar *cur = *str; |
3733 | 535k | int len = 0, l; |
3734 | 535k | int c; |
3735 | 535k | int maxLength = (ctxt->options & XML_PARSE_HUGE) ? |
3736 | 0 | XML_MAX_TEXT_LENGTH : |
3737 | 535k | XML_MAX_NAME_LENGTH; |
3738 | | |
3739 | 535k | c = CUR_SCHAR(cur, l); |
3740 | 535k | if (!xmlIsNameStartChar(ctxt, c)) { |
3741 | 2.37k | return(NULL); |
3742 | 2.37k | } |
3743 | | |
3744 | 532k | COPY_BUF(buf, len, c); |
3745 | 532k | cur += l; |
3746 | 532k | c = CUR_SCHAR(cur, l); |
3747 | 608k | while (xmlIsNameChar(ctxt, c)) { |
3748 | 76.2k | COPY_BUF(buf, len, c); |
3749 | 76.2k | cur += l; |
3750 | 76.2k | c = CUR_SCHAR(cur, l); |
3751 | 76.2k | if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */ |
3752 | | /* |
3753 | | * Okay someone managed to make a huge name, so he's ready to pay |
3754 | | * for the processing speed. |
3755 | | */ |
3756 | 423 | xmlChar *buffer; |
3757 | 423 | int max = len * 2; |
3758 | | |
3759 | 423 | buffer = (xmlChar *) xmlMallocAtomic(max); |
3760 | 423 | if (buffer == NULL) { |
3761 | 0 | xmlErrMemory(ctxt, NULL); |
3762 | 0 | return(NULL); |
3763 | 0 | } |
3764 | 423 | memcpy(buffer, buf, len); |
3765 | 8.77k | while (xmlIsNameChar(ctxt, c)) { |
3766 | 8.35k | if (len + 10 > max) { |
3767 | 205 | xmlChar *tmp; |
3768 | | |
3769 | 205 | max *= 2; |
3770 | 205 | tmp = (xmlChar *) xmlRealloc(buffer, max); |
3771 | 205 | if (tmp == NULL) { |
3772 | 0 | xmlErrMemory(ctxt, NULL); |
3773 | 0 | xmlFree(buffer); |
3774 | 0 | return(NULL); |
3775 | 0 | } |
3776 | 205 | buffer = tmp; |
3777 | 205 | } |
3778 | 8.35k | COPY_BUF(buffer, len, c); |
3779 | 8.35k | cur += l; |
3780 | 8.35k | c = CUR_SCHAR(cur, l); |
3781 | 8.35k | if (len > maxLength) { |
3782 | 0 | xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName"); |
3783 | 0 | xmlFree(buffer); |
3784 | 0 | return(NULL); |
3785 | 0 | } |
3786 | 8.35k | } |
3787 | 423 | buffer[len] = 0; |
3788 | 423 | *str = cur; |
3789 | 423 | return(buffer); |
3790 | 423 | } |
3791 | 76.2k | } |
3792 | 532k | if (len > maxLength) { |
3793 | 0 | xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName"); |
3794 | 0 | return(NULL); |
3795 | 0 | } |
3796 | 532k | *str = cur; |
3797 | 532k | return(xmlStrndup(buf, len)); |
3798 | 532k | } |
3799 | | |
3800 | | /** |
3801 | | * xmlParseNmtoken: |
3802 | | * @ctxt: an XML parser context |
3803 | | * |
3804 | | * DEPRECATED: Internal function, don't use. |
3805 | | * |
3806 | | * parse an XML Nmtoken. |
3807 | | * |
3808 | | * [7] Nmtoken ::= (NameChar)+ |
3809 | | * |
3810 | | * [8] Nmtokens ::= Nmtoken (#x20 Nmtoken)* |
3811 | | * |
3812 | | * Returns the Nmtoken parsed or NULL |
3813 | | */ |
3814 | | |
3815 | | xmlChar * |
3816 | 44.6k | xmlParseNmtoken(xmlParserCtxtPtr ctxt) { |
3817 | 44.6k | xmlChar buf[XML_MAX_NAMELEN + 5]; |
3818 | 44.6k | int len = 0, l; |
3819 | 44.6k | int c; |
3820 | 44.6k | int maxLength = (ctxt->options & XML_PARSE_HUGE) ? |
3821 | 0 | XML_MAX_TEXT_LENGTH : |
3822 | 44.6k | XML_MAX_NAME_LENGTH; |
3823 | | |
3824 | 44.6k | c = CUR_CHAR(l); |
3825 | | |
3826 | 122k | while (xmlIsNameChar(ctxt, c)) { |
3827 | 78.2k | COPY_BUF(buf, len, c); |
3828 | 78.2k | NEXTL(l); |
3829 | 78.2k | c = CUR_CHAR(l); |
3830 | 78.2k | if (len >= XML_MAX_NAMELEN) { |
3831 | | /* |
3832 | | * Okay someone managed to make a huge token, so he's ready to pay |
3833 | | * for the processing speed. |
3834 | | */ |
3835 | 763 | xmlChar *buffer; |
3836 | 763 | int max = len * 2; |
3837 | | |
3838 | 763 | buffer = (xmlChar *) xmlMallocAtomic(max); |
3839 | 763 | if (buffer == NULL) { |
3840 | 0 | xmlErrMemory(ctxt, NULL); |
3841 | 0 | return(NULL); |
3842 | 0 | } |
3843 | 763 | memcpy(buffer, buf, len); |
3844 | 68.0k | while (xmlIsNameChar(ctxt, c)) { |
3845 | 67.2k | if (len + 10 > max) { |
3846 | 803 | xmlChar *tmp; |
3847 | | |
3848 | 803 | max *= 2; |
3849 | 803 | tmp = (xmlChar *) xmlRealloc(buffer, max); |
3850 | 803 | if (tmp == NULL) { |
3851 | 0 | xmlErrMemory(ctxt, NULL); |
3852 | 0 | xmlFree(buffer); |
3853 | 0 | return(NULL); |
3854 | 0 | } |
3855 | 803 | buffer = tmp; |
3856 | 803 | } |
3857 | 67.2k | COPY_BUF(buffer, len, c); |
3858 | 67.2k | if (len > maxLength) { |
3859 | 0 | xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken"); |
3860 | 0 | xmlFree(buffer); |
3861 | 0 | return(NULL); |
3862 | 0 | } |
3863 | 67.2k | NEXTL(l); |
3864 | 67.2k | c = CUR_CHAR(l); |
3865 | 67.2k | } |
3866 | 763 | buffer[len] = 0; |
3867 | 763 | if (ctxt->instate == XML_PARSER_EOF) { |
3868 | 0 | xmlFree(buffer); |
3869 | 0 | return(NULL); |
3870 | 0 | } |
3871 | 763 | return(buffer); |
3872 | 763 | } |
3873 | 78.2k | } |
3874 | 43.8k | if (ctxt->instate == XML_PARSER_EOF) |
3875 | 0 | return(NULL); |
3876 | 43.8k | if (len == 0) |
3877 | 1.95k | return(NULL); |
3878 | 41.8k | if (len > maxLength) { |
3879 | 0 | xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken"); |
3880 | 0 | return(NULL); |
3881 | 0 | } |
3882 | 41.8k | return(xmlStrndup(buf, len)); |
3883 | 41.8k | } |
3884 | | |
3885 | | /** |
3886 | | * xmlParseEntityValue: |
3887 | | * @ctxt: an XML parser context |
3888 | | * @orig: if non-NULL store a copy of the original entity value |
3889 | | * |
3890 | | * DEPRECATED: Internal function, don't use. |
3891 | | * |
3892 | | * parse a value for ENTITY declarations |
3893 | | * |
3894 | | * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' | |
3895 | | * "'" ([^%&'] | PEReference | Reference)* "'" |
3896 | | * |
3897 | | * Returns the EntityValue parsed with reference substituted or NULL |
3898 | | */ |
3899 | | |
3900 | | xmlChar * |
3901 | 12.2k | xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) { |
3902 | 12.2k | xmlChar *buf = NULL; |
3903 | 12.2k | int len = 0; |
3904 | 12.2k | int size = XML_PARSER_BUFFER_SIZE; |
3905 | 12.2k | int c, l; |
3906 | 12.2k | int maxLength = (ctxt->options & XML_PARSE_HUGE) ? |
3907 | 0 | XML_MAX_HUGE_LENGTH : |
3908 | 12.2k | XML_MAX_TEXT_LENGTH; |
3909 | 12.2k | xmlChar stop; |
3910 | 12.2k | xmlChar *ret = NULL; |
3911 | 12.2k | const xmlChar *cur = NULL; |
3912 | 12.2k | xmlParserInputPtr input; |
3913 | | |
3914 | 12.2k | if (RAW == '"') stop = '"'; |
3915 | 6.65k | else if (RAW == '\'') stop = '\''; |
3916 | 0 | else { |
3917 | 0 | xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_STARTED, NULL); |
3918 | 0 | return(NULL); |
3919 | 0 | } |
3920 | 12.2k | buf = (xmlChar *) xmlMallocAtomic(size); |
3921 | 12.2k | if (buf == NULL) { |
3922 | 0 | xmlErrMemory(ctxt, NULL); |
3923 | 0 | return(NULL); |
3924 | 0 | } |
3925 | | |
3926 | | /* |
3927 | | * The content of the entity definition is copied in a buffer. |
3928 | | */ |
3929 | | |
3930 | 12.2k | ctxt->instate = XML_PARSER_ENTITY_VALUE; |
3931 | 12.2k | input = ctxt->input; |
3932 | 12.2k | GROW; |
3933 | 12.2k | if (ctxt->instate == XML_PARSER_EOF) |
3934 | 0 | goto error; |
3935 | 12.2k | NEXT; |
3936 | 12.2k | c = CUR_CHAR(l); |
3937 | | /* |
3938 | | * NOTE: 4.4.5 Included in Literal |
3939 | | * When a parameter entity reference appears in a literal entity |
3940 | | * value, ... a single or double quote character in the replacement |
3941 | | * text is always treated as a normal data character and will not |
3942 | | * terminate the literal. |
3943 | | * In practice it means we stop the loop only when back at parsing |
3944 | | * the initial entity and the quote is found |
3945 | | */ |
3946 | 310k | while (((IS_CHAR(c)) && ((c != stop) || /* checked */ |
3947 | 309k | (ctxt->input != input))) && (ctxt->instate != XML_PARSER_EOF)) { |
3948 | 297k | if (len + 5 >= size) { |
3949 | 1.74k | xmlChar *tmp; |
3950 | | |
3951 | 1.74k | size *= 2; |
3952 | 1.74k | tmp = (xmlChar *) xmlRealloc(buf, size); |
3953 | 1.74k | if (tmp == NULL) { |
3954 | 0 | xmlErrMemory(ctxt, NULL); |
3955 | 0 | goto error; |
3956 | 0 | } |
3957 | 1.74k | buf = tmp; |
3958 | 1.74k | } |
3959 | 297k | COPY_BUF(buf, len, c); |
3960 | 297k | NEXTL(l); |
3961 | | |
3962 | 297k | GROW; |
3963 | 297k | c = CUR_CHAR(l); |
3964 | 297k | if (c == 0) { |
3965 | 780 | GROW; |
3966 | 780 | c = CUR_CHAR(l); |
3967 | 780 | } |
3968 | | |
3969 | 297k | if (len > maxLength) { |
3970 | 0 | xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_NOT_FINISHED, |
3971 | 0 | "entity value too long\n"); |
3972 | 0 | goto error; |
3973 | 0 | } |
3974 | 297k | } |
3975 | 12.2k | buf[len] = 0; |
3976 | 12.2k | if (ctxt->instate == XML_PARSER_EOF) |
3977 | 0 | goto error; |
3978 | 12.2k | if (c != stop) { |
3979 | 783 | xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_FINISHED, NULL); |
3980 | 783 | goto error; |
3981 | 783 | } |
3982 | 11.4k | NEXT; |
3983 | | |
3984 | | /* |
3985 | | * Raise problem w.r.t. '&' and '%' being used in non-entities |
3986 | | * reference constructs. Note Charref will be handled in |
3987 | | * xmlStringDecodeEntities() |
3988 | | */ |
3989 | 11.4k | cur = buf; |
3990 | 380k | while (*cur != 0) { /* non input consuming */ |
3991 | 370k | if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) { |
3992 | 18.5k | xmlChar *name; |
3993 | 18.5k | xmlChar tmp = *cur; |
3994 | 18.5k | int nameOk = 0; |
3995 | | |
3996 | 18.5k | cur++; |
3997 | 18.5k | name = xmlParseStringName(ctxt, &cur); |
3998 | 18.5k | if (name != NULL) { |
3999 | 18.1k | nameOk = 1; |
4000 | 18.1k | xmlFree(name); |
4001 | 18.1k | } |
4002 | 18.5k | if ((nameOk == 0) || (*cur != ';')) { |
4003 | 705 | xmlFatalErrMsgInt(ctxt, XML_ERR_ENTITY_CHAR_ERROR, |
4004 | 705 | "EntityValue: '%c' forbidden except for entities references\n", |
4005 | 705 | tmp); |
4006 | 705 | goto error; |
4007 | 705 | } |
4008 | 17.8k | if ((tmp == '%') && (ctxt->inSubset == 1) && |
4009 | 17.8k | (ctxt->inputNr == 1)) { |
4010 | 82 | xmlFatalErr(ctxt, XML_ERR_ENTITY_PE_INTERNAL, NULL); |
4011 | 82 | goto error; |
4012 | 82 | } |
4013 | 17.7k | if (*cur == 0) |
4014 | 0 | break; |
4015 | 17.7k | } |
4016 | 369k | cur++; |
4017 | 369k | } |
4018 | | |
4019 | | /* |
4020 | | * Then PEReference entities are substituted. |
4021 | | * |
4022 | | * NOTE: 4.4.7 Bypassed |
4023 | | * When a general entity reference appears in the EntityValue in |
4024 | | * an entity declaration, it is bypassed and left as is. |
4025 | | * so XML_SUBSTITUTE_REF is not set here. |
4026 | | */ |
4027 | 10.6k | ++ctxt->depth; |
4028 | 10.6k | ret = xmlStringDecodeEntitiesInt(ctxt, buf, len, XML_SUBSTITUTE_PEREF, |
4029 | 10.6k | 0, 0, 0, /* check */ 1); |
4030 | 10.6k | --ctxt->depth; |
4031 | | |
4032 | 10.6k | if (orig != NULL) { |
4033 | 10.6k | *orig = buf; |
4034 | 10.6k | buf = NULL; |
4035 | 10.6k | } |
4036 | | |
4037 | 12.2k | error: |
4038 | 12.2k | if (buf != NULL) |
4039 | 1.57k | xmlFree(buf); |
4040 | 12.2k | return(ret); |
4041 | 10.6k | } |
4042 | | |
4043 | | /** |
4044 | | * xmlParseAttValueComplex: |
4045 | | * @ctxt: an XML parser context |
4046 | | * @len: the resulting attribute len |
4047 | | * @normalize: whether to apply the inner normalization |
4048 | | * |
4049 | | * parse a value for an attribute, this is the fallback function |
4050 | | * of xmlParseAttValue() when the attribute parsing requires handling |
4051 | | * of non-ASCII characters, or normalization compaction. |
4052 | | * |
4053 | | * Returns the AttValue parsed or NULL. The value has to be freed by the caller. |
4054 | | */ |
4055 | | static xmlChar * |
4056 | 38.7k | xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *attlen, int normalize) { |
4057 | 38.7k | xmlChar limit = 0; |
4058 | 38.7k | xmlChar *buf = NULL; |
4059 | 38.7k | xmlChar *rep = NULL; |
4060 | 38.7k | size_t len = 0; |
4061 | 38.7k | size_t buf_size = 0; |
4062 | 38.7k | size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ? |
4063 | 0 | XML_MAX_HUGE_LENGTH : |
4064 | 38.7k | XML_MAX_TEXT_LENGTH; |
4065 | 38.7k | int c, l, in_space = 0; |
4066 | 38.7k | xmlChar *current = NULL; |
4067 | 38.7k | xmlEntityPtr ent; |
4068 | | |
4069 | 38.7k | if (NXT(0) == '"') { |
4070 | 29.0k | ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE; |
4071 | 29.0k | limit = '"'; |
4072 | 29.0k | NEXT; |
4073 | 29.0k | } else if (NXT(0) == '\'') { |
4074 | 9.76k | limit = '\''; |
4075 | 9.76k | ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE; |
4076 | 9.76k | NEXT; |
4077 | 9.76k | } else { |
4078 | 0 | xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL); |
4079 | 0 | return(NULL); |
4080 | 0 | } |
4081 | | |
4082 | | /* |
4083 | | * allocate a translation buffer. |
4084 | | */ |
4085 | 38.7k | buf_size = XML_PARSER_BUFFER_SIZE; |
4086 | 38.7k | buf = (xmlChar *) xmlMallocAtomic(buf_size); |
4087 | 38.7k | if (buf == NULL) goto mem_error; |
4088 | | |
4089 | | /* |
4090 | | * OK loop until we reach one of the ending char or a size limit. |
4091 | | */ |
4092 | 38.7k | c = CUR_CHAR(l); |
4093 | 419k | while (((NXT(0) != limit) && /* checked */ |
4094 | 419k | (IS_CHAR(c)) && (c != '<')) && |
4095 | 419k | (ctxt->instate != XML_PARSER_EOF)) { |
4096 | 381k | if (c == '&') { |
4097 | 54.2k | in_space = 0; |
4098 | 54.2k | if (NXT(1) == '#') { |
4099 | 8.76k | int val = xmlParseCharRef(ctxt); |
4100 | | |
4101 | 8.76k | if (val == '&') { |
4102 | 901 | if (ctxt->replaceEntities) { |
4103 | 0 | if (len + 10 > buf_size) { |
4104 | 0 | growBuffer(buf, 10); |
4105 | 0 | } |
4106 | 0 | buf[len++] = '&'; |
4107 | 901 | } else { |
4108 | | /* |
4109 | | * The reparsing will be done in xmlStringGetNodeList() |
4110 | | * called by the attribute() function in SAX.c |
4111 | | */ |
4112 | 901 | if (len + 10 > buf_size) { |
4113 | 390 | growBuffer(buf, 10); |
4114 | 390 | } |
4115 | 901 | buf[len++] = '&'; |
4116 | 901 | buf[len++] = '#'; |
4117 | 901 | buf[len++] = '3'; |
4118 | 901 | buf[len++] = '8'; |
4119 | 901 | buf[len++] = ';'; |
4120 | 901 | } |
4121 | 7.86k | } else if (val != 0) { |
4122 | 5.43k | if (len + 10 > buf_size) { |
4123 | 394 | growBuffer(buf, 10); |
4124 | 394 | } |
4125 | 5.43k | len += xmlCopyChar(0, &buf[len], val); |
4126 | 5.43k | } |
4127 | 45.5k | } else { |
4128 | 45.5k | ent = xmlParseEntityRef(ctxt); |
4129 | 45.5k | if ((ent != NULL) && |
4130 | 45.5k | (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) { |
4131 | 2.04k | if (len + 10 > buf_size) { |
4132 | 416 | growBuffer(buf, 10); |
4133 | 416 | } |
4134 | 2.04k | if ((ctxt->replaceEntities == 0) && |
4135 | 2.04k | (ent->content[0] == '&')) { |
4136 | 866 | buf[len++] = '&'; |
4137 | 866 | buf[len++] = '#'; |
4138 | 866 | buf[len++] = '3'; |
4139 | 866 | buf[len++] = '8'; |
4140 | 866 | buf[len++] = ';'; |
4141 | 1.17k | } else { |
4142 | 1.17k | buf[len++] = ent->content[0]; |
4143 | 1.17k | } |
4144 | 43.4k | } else if ((ent != NULL) && |
4145 | 43.4k | (ctxt->replaceEntities != 0)) { |
4146 | 0 | if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) { |
4147 | 0 | if (xmlParserEntityCheck(ctxt, ent->length)) |
4148 | 0 | goto error; |
4149 | | |
4150 | 0 | ++ctxt->depth; |
4151 | 0 | rep = xmlStringDecodeEntitiesInt(ctxt, ent->content, |
4152 | 0 | ent->length, XML_SUBSTITUTE_REF, 0, 0, 0, |
4153 | 0 | /* check */ 1); |
4154 | 0 | --ctxt->depth; |
4155 | 0 | if (rep != NULL) { |
4156 | 0 | current = rep; |
4157 | 0 | while (*current != 0) { /* non input consuming */ |
4158 | 0 | if ((*current == 0xD) || (*current == 0xA) || |
4159 | 0 | (*current == 0x9)) { |
4160 | 0 | buf[len++] = 0x20; |
4161 | 0 | current++; |
4162 | 0 | } else |
4163 | 0 | buf[len++] = *current++; |
4164 | 0 | if (len + 10 > buf_size) { |
4165 | 0 | growBuffer(buf, 10); |
4166 | 0 | } |
4167 | 0 | } |
4168 | 0 | xmlFree(rep); |
4169 | 0 | rep = NULL; |
4170 | 0 | } |
4171 | 0 | } else { |
4172 | 0 | if (len + 10 > buf_size) { |
4173 | 0 | growBuffer(buf, 10); |
4174 | 0 | } |
4175 | 0 | if (ent->content != NULL) |
4176 | 0 | buf[len++] = ent->content[0]; |
4177 | 0 | } |
4178 | 43.4k | } else if (ent != NULL) { |
4179 | 30.0k | int i = xmlStrlen(ent->name); |
4180 | 30.0k | const xmlChar *cur = ent->name; |
4181 | | |
4182 | | /* |
4183 | | * We also check for recursion and amplification |
4184 | | * when entities are not substituted. They're |
4185 | | * often expanded later. |
4186 | | */ |
4187 | 30.0k | if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) && |
4188 | 30.0k | (ent->content != NULL)) { |
4189 | 27.2k | if ((ent->flags & XML_ENT_CHECKED) == 0) { |
4190 | 14.9k | unsigned long oldCopy = ctxt->sizeentcopy; |
4191 | | |
4192 | 14.9k | ctxt->sizeentcopy = ent->length; |
4193 | | |
4194 | 14.9k | ++ctxt->depth; |
4195 | 14.9k | rep = xmlStringDecodeEntitiesInt(ctxt, |
4196 | 14.9k | ent->content, ent->length, |
4197 | 14.9k | XML_SUBSTITUTE_REF, 0, 0, 0, |
4198 | 14.9k | /* check */ 1); |
4199 | 14.9k | --ctxt->depth; |
4200 | | |
4201 | | /* |
4202 | | * If we're parsing DTD content, the entity |
4203 | | * might reference other entities which |
4204 | | * weren't defined yet, so the check isn't |
4205 | | * reliable. |
4206 | | */ |
4207 | 14.9k | if (ctxt->inSubset == 0) { |
4208 | 804 | ent->flags |= XML_ENT_CHECKED; |
4209 | 804 | ent->expandedSize = ctxt->sizeentcopy; |
4210 | 804 | } |
4211 | | |
4212 | 14.9k | if (rep != NULL) { |
4213 | 14.7k | xmlFree(rep); |
4214 | 14.7k | rep = NULL; |
4215 | 14.7k | } else { |
4216 | 173 | ent->content[0] = 0; |
4217 | 173 | } |
4218 | | |
4219 | 14.9k | if (xmlParserEntityCheck(ctxt, oldCopy)) |
4220 | 2 | goto error; |
4221 | 14.9k | } else { |
4222 | 12.2k | if (xmlParserEntityCheck(ctxt, ent->expandedSize)) |
4223 | 28 | goto error; |
4224 | 12.2k | } |
4225 | 27.2k | } |
4226 | | |
4227 | | /* |
4228 | | * Just output the reference |
4229 | | */ |
4230 | 30.0k | buf[len++] = '&'; |
4231 | 30.5k | while (len + i + 10 > buf_size) { |
4232 | 976 | growBuffer(buf, i + 10); |
4233 | 976 | } |
4234 | 60.3k | for (;i > 0;i--) |
4235 | 30.2k | buf[len++] = *cur++; |
4236 | 30.0k | buf[len++] = ';'; |
4237 | 30.0k | } |
4238 | 45.5k | } |
4239 | 326k | } else { |
4240 | 326k | if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) { |
4241 | 44.8k | if ((len != 0) || (!normalize)) { |
4242 | 43.3k | if ((!normalize) || (!in_space)) { |
4243 | 42.3k | COPY_BUF(buf, len, 0x20); |
4244 | 42.5k | while (len + 10 > buf_size) { |
4245 | 434 | growBuffer(buf, 10); |
4246 | 434 | } |
4247 | 42.3k | } |
4248 | 43.3k | in_space = 1; |
4249 | 43.3k | } |
4250 | 281k | } else { |
4251 | 281k | in_space = 0; |
4252 | 281k | COPY_BUF(buf, len, c); |
4253 | 281k | if (len + 10 > buf_size) { |
4254 | 2.27k | growBuffer(buf, 10); |
4255 | 2.27k | } |
4256 | 281k | } |
4257 | 326k | NEXTL(l); |
4258 | 326k | } |
4259 | 381k | GROW; |
4260 | 381k | c = CUR_CHAR(l); |
4261 | 381k | if (len > maxLength) { |
4262 | 0 | xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED, |
4263 | 0 | "AttValue length too long\n"); |
4264 | 0 | goto mem_error; |
4265 | 0 | } |
4266 | 381k | } |
4267 | 38.7k | if (ctxt->instate == XML_PARSER_EOF) |
4268 | 149 | goto error; |
4269 | | |
4270 | 38.6k | if ((in_space) && (normalize)) { |
4271 | 875 | while ((len > 0) && (buf[len - 1] == 0x20)) len--; |
4272 | 199 | } |
4273 | 38.6k | buf[len] = 0; |
4274 | 38.6k | if (RAW == '<') { |
4275 | 25.6k | xmlFatalErr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, NULL); |
4276 | 25.6k | } else if (RAW != limit) { |
4277 | 8.02k | if ((c != 0) && (!IS_CHAR(c))) { |
4278 | 292 | xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR, |
4279 | 292 | "invalid character in attribute value\n"); |
4280 | 7.73k | } else { |
4281 | 7.73k | xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED, |
4282 | 7.73k | "AttValue: ' expected\n"); |
4283 | 7.73k | } |
4284 | 8.02k | } else |
4285 | 4.96k | NEXT; |
4286 | | |
4287 | 38.6k | if (attlen != NULL) *attlen = len; |
4288 | 38.6k | return(buf); |
4289 | | |
4290 | 0 | mem_error: |
4291 | 0 | xmlErrMemory(ctxt, NULL); |
4292 | 179 | error: |
4293 | 179 | if (buf != NULL) |
4294 | 179 | xmlFree(buf); |
4295 | 179 | if (rep != NULL) |
4296 | 0 | xmlFree(rep); |
4297 | 179 | return(NULL); |
4298 | 0 | } |
4299 | | |
4300 | | /** |
4301 | | * xmlParseAttValue: |
4302 | | * @ctxt: an XML parser context |
4303 | | * |
4304 | | * DEPRECATED: Internal function, don't use. |
4305 | | * |
4306 | | * parse a value for an attribute |
4307 | | * Note: the parser won't do substitution of entities here, this |
4308 | | * will be handled later in xmlStringGetNodeList |
4309 | | * |
4310 | | * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' | |
4311 | | * "'" ([^<&'] | Reference)* "'" |
4312 | | * |
4313 | | * 3.3.3 Attribute-Value Normalization: |
4314 | | * Before the value of an attribute is passed to the application or |
4315 | | * checked for validity, the XML processor must normalize it as follows: |
4316 | | * - a character reference is processed by appending the referenced |
4317 | | * character to the attribute value |
4318 | | * - an entity reference is processed by recursively processing the |
4319 | | * replacement text of the entity |
4320 | | * - a whitespace character (#x20, #xD, #xA, #x9) is processed by |
4321 | | * appending #x20 to the normalized value, except that only a single |
4322 | | * #x20 is appended for a "#xD#xA" sequence that is part of an external |
4323 | | * parsed entity or the literal entity value of an internal parsed entity |
4324 | | * - other characters are processed by appending them to the normalized value |
4325 | | * If the declared value is not CDATA, then the XML processor must further |
4326 | | * process the normalized attribute value by discarding any leading and |
4327 | | * trailing space (#x20) characters, and by replacing sequences of space |
4328 | | * (#x20) characters by a single space (#x20) character. |
4329 | | * All attributes for which no declaration has been read should be treated |
4330 | | * by a non-validating parser as if declared CDATA. |
4331 | | * |
4332 | | * Returns the AttValue parsed or NULL. The value has to be freed by the caller. |
4333 | | */ |
4334 | | |
4335 | | |
4336 | | xmlChar * |
4337 | 45.0k | xmlParseAttValue(xmlParserCtxtPtr ctxt) { |
4338 | 45.0k | if ((ctxt == NULL) || (ctxt->input == NULL)) return(NULL); |
4339 | 45.0k | return(xmlParseAttValueInternal(ctxt, NULL, NULL, 0)); |
4340 | 45.0k | } |
4341 | | |
4342 | | /** |
4343 | | * xmlParseSystemLiteral: |
4344 | | * @ctxt: an XML parser context |
4345 | | * |
4346 | | * DEPRECATED: Internal function, don't use. |
4347 | | * |
4348 | | * parse an XML Literal |
4349 | | * |
4350 | | * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'") |
4351 | | * |
4352 | | * Returns the SystemLiteral parsed or NULL |
4353 | | */ |
4354 | | |
4355 | | xmlChar * |
4356 | 7.05k | xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) { |
4357 | 7.05k | xmlChar *buf = NULL; |
4358 | 7.05k | int len = 0; |
4359 | 7.05k | int size = XML_PARSER_BUFFER_SIZE; |
4360 | 7.05k | int cur, l; |
4361 | 7.05k | int maxLength = (ctxt->options & XML_PARSE_HUGE) ? |
4362 | 0 | XML_MAX_TEXT_LENGTH : |
4363 | 7.05k | XML_MAX_NAME_LENGTH; |
4364 | 7.05k | xmlChar stop; |
4365 | 7.05k | int state = ctxt->instate; |
4366 | | |
4367 | 7.05k | if (RAW == '"') { |
4368 | 3.78k | NEXT; |
4369 | 3.78k | stop = '"'; |
4370 | 3.78k | } else if (RAW == '\'') { |
4371 | 1.07k | NEXT; |
4372 | 1.07k | stop = '\''; |
4373 | 2.19k | } else { |
4374 | 2.19k | xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL); |
4375 | 2.19k | return(NULL); |
4376 | 2.19k | } |
4377 | | |
4378 | 4.85k | buf = (xmlChar *) xmlMallocAtomic(size); |
4379 | 4.85k | if (buf == NULL) { |
4380 | 0 | xmlErrMemory(ctxt, NULL); |
4381 | 0 | return(NULL); |
4382 | 0 | } |
4383 | 4.85k | ctxt->instate = XML_PARSER_SYSTEM_LITERAL; |
4384 | 4.85k | cur = CUR_CHAR(l); |
4385 | 81.7k | while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */ |
4386 | 76.9k | if (len + 5 >= size) { |
4387 | 424 | xmlChar *tmp; |
4388 | | |
4389 | 424 | size *= 2; |
4390 | 424 | tmp = (xmlChar *) xmlRealloc(buf, size); |
4391 | 424 | if (tmp == NULL) { |
4392 | 0 | xmlFree(buf); |
4393 | 0 | xmlErrMemory(ctxt, NULL); |
4394 | 0 | ctxt->instate = (xmlParserInputState) state; |
4395 | 0 | return(NULL); |
4396 | 0 | } |
4397 | 424 | buf = tmp; |
4398 | 424 | } |
4399 | 76.9k | COPY_BUF(buf, len, cur); |
4400 | 76.9k | if (len > maxLength) { |
4401 | 0 | xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "SystemLiteral"); |
4402 | 0 | xmlFree(buf); |
4403 | 0 | ctxt->instate = (xmlParserInputState) state; |
4404 | 0 | return(NULL); |
4405 | 0 | } |
4406 | 76.9k | NEXTL(l); |
4407 | 76.9k | cur = CUR_CHAR(l); |
4408 | 76.9k | } |
4409 | 4.85k | buf[len] = 0; |
4410 | 4.85k | if (ctxt->instate == XML_PARSER_EOF) { |
4411 | 0 | xmlFree(buf); |
4412 | 0 | return(NULL); |
4413 | 0 | } |
4414 | 4.85k | ctxt->instate = (xmlParserInputState) state; |
4415 | 4.85k | if (!IS_CHAR(cur)) { |
4416 | 1.34k | xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL); |
4417 | 3.50k | } else { |
4418 | 3.50k | NEXT; |
4419 | 3.50k | } |
4420 | 4.85k | return(buf); |
4421 | 4.85k | } |
4422 | | |
4423 | | /** |
4424 | | * xmlParsePubidLiteral: |
4425 | | * @ctxt: an XML parser context |
4426 | | * |
4427 | | * DEPRECATED: Internal function, don't use. |
4428 | | * |
4429 | | * parse an XML public literal |
4430 | | * |
4431 | | * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'" |
4432 | | * |
4433 | | * Returns the PubidLiteral parsed or NULL. |
4434 | | */ |
4435 | | |
4436 | | xmlChar * |
4437 | 4.30k | xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) { |
4438 | 4.30k | xmlChar *buf = NULL; |
4439 | 4.30k | int len = 0; |
4440 | 4.30k | int size = XML_PARSER_BUFFER_SIZE; |
4441 | 4.30k | int maxLength = (ctxt->options & XML_PARSE_HUGE) ? |
4442 | 0 | XML_MAX_TEXT_LENGTH : |
4443 | 4.30k | XML_MAX_NAME_LENGTH; |
4444 | 4.30k | xmlChar cur; |
4445 | 4.30k | xmlChar stop; |
4446 | 4.30k | xmlParserInputState oldstate = ctxt->instate; |
4447 | | |
4448 | 4.30k | if (RAW == '"') { |
4449 | 2.41k | NEXT; |
4450 | 2.41k | stop = '"'; |
4451 | 2.41k | } else if (RAW == '\'') { |
4452 | 1.19k | NEXT; |
4453 | 1.19k | stop = '\''; |
4454 | 1.19k | } else { |
4455 | 696 | xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL); |
4456 | 696 | return(NULL); |
4457 | 696 | } |
4458 | 3.60k | buf = (xmlChar *) xmlMallocAtomic(size); |
4459 | 3.60k | if (buf == NULL) { |
4460 | 0 | xmlErrMemory(ctxt, NULL); |
4461 | 0 | return(NULL); |
4462 | 0 | } |
4463 | 3.60k | ctxt->instate = XML_PARSER_PUBLIC_LITERAL; |
4464 | 3.60k | cur = CUR; |
4465 | 45.9k | while ((IS_PUBIDCHAR_CH(cur)) && (cur != stop)) { /* checked */ |
4466 | 42.3k | if (len + 1 >= size) { |
4467 | 194 | xmlChar *tmp; |
4468 | | |
4469 | 194 | size *= 2; |
4470 | 194 | tmp = (xmlChar *) xmlRealloc(buf, size); |
4471 | 194 | if (tmp == NULL) { |
4472 | 0 | xmlErrMemory(ctxt, NULL); |
4473 | 0 | xmlFree(buf); |
4474 | 0 | return(NULL); |
4475 | 0 | } |
4476 | 194 | buf = tmp; |
4477 | 194 | } |
4478 | 42.3k | buf[len++] = cur; |
4479 | 42.3k | if (len > maxLength) { |
4480 | 0 | xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Public ID"); |
4481 | 0 | xmlFree(buf); |
4482 | 0 | return(NULL); |
4483 | 0 | } |
4484 | 42.3k | NEXT; |
4485 | 42.3k | cur = CUR; |
4486 | 42.3k | } |
4487 | 3.60k | buf[len] = 0; |
4488 | 3.60k | if (ctxt->instate == XML_PARSER_EOF) { |
4489 | 0 | xmlFree(buf); |
4490 | 0 | return(NULL); |
4491 | 0 | } |
4492 | 3.60k | if (cur != stop) { |
4493 | 3.05k | xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL); |
4494 | 3.05k | } else { |
4495 | 556 | NEXTL(1); |
4496 | 556 | } |
4497 | 3.60k | ctxt->instate = oldstate; |
4498 | 3.60k | return(buf); |
4499 | 3.60k | } |
4500 | | |
4501 | | static void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int partial); |
4502 | | |
4503 | | /* |
4504 | | * used for the test in the inner loop of the char data testing |
4505 | | */ |
4506 | | static const unsigned char test_char_data[256] = { |
4507 | | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, |
4508 | | 0x00, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x9, CR/LF separated */ |
4509 | | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, |
4510 | | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, |
4511 | | 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x00, 0x27, /* & */ |
4512 | | 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F, |
4513 | | 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, |
4514 | | 0x38, 0x39, 0x3A, 0x3B, 0x00, 0x3D, 0x3E, 0x3F, /* < */ |
4515 | | 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, |
4516 | | 0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F, |
4517 | | 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, |
4518 | | 0x58, 0x59, 0x5A, 0x5B, 0x5C, 0x00, 0x5E, 0x5F, /* ] */ |
4519 | | 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, |
4520 | | 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, |
4521 | | 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, |
4522 | | 0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F, |
4523 | | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* non-ascii */ |
4524 | | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, |
4525 | | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, |
4526 | | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, |
4527 | | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, |
4528 | | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, |
4529 | | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, |
4530 | | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, |
4531 | | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, |
4532 | | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, |
4533 | | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, |
4534 | | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, |
4535 | | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, |
4536 | | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, |
4537 | | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, |
4538 | | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 |
4539 | | }; |
4540 | | |
4541 | | /** |
4542 | | * xmlParseCharDataInternal: |
4543 | | * @ctxt: an XML parser context |
4544 | | * @partial: buffer may contain partial UTF-8 sequences |
4545 | | * |
4546 | | * Parse character data. Always makes progress if the first char isn't |
4547 | | * '<' or '&'. |
4548 | | * |
4549 | | * The right angle bracket (>) may be represented using the string ">", |
4550 | | * and must, for compatibility, be escaped using ">" or a character |
4551 | | * reference when it appears in the string "]]>" in content, when that |
4552 | | * string is not marking the end of a CDATA section. |
4553 | | * |
4554 | | * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*) |
4555 | | */ |
4556 | | static void |
4557 | 50.1k | xmlParseCharDataInternal(xmlParserCtxtPtr ctxt, int partial) { |
4558 | 50.1k | const xmlChar *in; |
4559 | 50.1k | int nbchar = 0; |
4560 | 50.1k | int line = ctxt->input->line; |
4561 | 50.1k | int col = ctxt->input->col; |
4562 | 50.1k | int ccol; |
4563 | | |
4564 | 50.1k | GROW; |
4565 | | /* |
4566 | | * Accelerated common case where input don't need to be |
4567 | | * modified before passing it to the handler. |
4568 | | */ |
4569 | 50.1k | in = ctxt->input->cur; |
4570 | 50.5k | do { |
4571 | 51.0k | get_more_space: |
4572 | 60.7k | while (*in == 0x20) { in++; ctxt->input->col++; } |
4573 | 51.0k | if (*in == 0xA) { |
4574 | 1.16k | do { |
4575 | 1.16k | ctxt->input->line++; ctxt->input->col = 1; |
4576 | 1.16k | in++; |
4577 | 1.16k | } while (*in == 0xA); |
4578 | 506 | goto get_more_space; |
4579 | 506 | } |
4580 | 50.5k | if (*in == '<') { |
4581 | 4.35k | nbchar = in - ctxt->input->cur; |
4582 | 4.35k | if (nbchar > 0) { |
4583 | 4.35k | const xmlChar *tmp = ctxt->input->cur; |
4584 | 4.35k | ctxt->input->cur = in; |
4585 | | |
4586 | 4.35k | if ((ctxt->sax != NULL) && |
4587 | 4.35k | (ctxt->disableSAX == 0) && |
4588 | 4.35k | (ctxt->sax->ignorableWhitespace != |
4589 | 3.40k | ctxt->sax->characters)) { |
4590 | 2.42k | if (areBlanks(ctxt, tmp, nbchar, 1)) { |
4591 | 1.09k | if (ctxt->sax->ignorableWhitespace != NULL) |
4592 | 1.09k | ctxt->sax->ignorableWhitespace(ctxt->userData, |
4593 | 1.09k | tmp, nbchar); |
4594 | 1.33k | } else { |
4595 | 1.33k | if (ctxt->sax->characters != NULL) |
4596 | 1.33k | ctxt->sax->characters(ctxt->userData, |
4597 | 1.33k | tmp, nbchar); |
4598 | 1.33k | if (*ctxt->space == -1) |
4599 | 712 | *ctxt->space = -2; |
4600 | 1.33k | } |
4601 | 2.42k | } else if ((ctxt->sax != NULL) && |
4602 | 1.93k | (ctxt->disableSAX == 0) && |
4603 | 1.93k | (ctxt->sax->characters != NULL)) { |
4604 | 981 | ctxt->sax->characters(ctxt->userData, |
4605 | 981 | tmp, nbchar); |
4606 | 981 | } |
4607 | 4.35k | } |
4608 | 4.35k | return; |
4609 | 4.35k | } |
4610 | | |
4611 | 49.4k | get_more: |
4612 | 49.4k | ccol = ctxt->input->col; |
4613 | 128k | while (test_char_data[*in]) { |
4614 | 78.8k | in++; |
4615 | 78.8k | ccol++; |
4616 | 78.8k | } |
4617 | 49.4k | ctxt->input->col = ccol; |
4618 | 49.4k | if (*in == 0xA) { |
4619 | 563 | do { |
4620 | 563 | ctxt->input->line++; ctxt->input->col = 1; |
4621 | 563 | in++; |
4622 | 563 | } while (*in == 0xA); |
4623 | 266 | goto get_more; |
4624 | 266 | } |
4625 | 49.1k | if (*in == ']') { |
4626 | 3.26k | if ((in[1] == ']') && (in[2] == '>')) { |
4627 | 280 | xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL); |
4628 | 280 | if (ctxt->instate != XML_PARSER_EOF) |
4629 | 280 | ctxt->input->cur = in + 1; |
4630 | 280 | return; |
4631 | 280 | } |
4632 | 2.98k | in++; |
4633 | 2.98k | ctxt->input->col++; |
4634 | 2.98k | goto get_more; |
4635 | 3.26k | } |
4636 | 45.9k | nbchar = in - ctxt->input->cur; |
4637 | 45.9k | if (nbchar > 0) { |
4638 | 35.8k | if ((ctxt->sax != NULL) && |
4639 | 35.8k | (ctxt->disableSAX == 0) && |
4640 | 35.8k | (ctxt->sax->ignorableWhitespace != |
4641 | 21.7k | ctxt->sax->characters) && |
4642 | 35.8k | (IS_BLANK_CH(*ctxt->input->cur))) { |
4643 | 2.43k | const xmlChar *tmp = ctxt->input->cur; |
4644 | 2.43k | ctxt->input->cur = in; |
4645 | | |
4646 | 2.43k | if (areBlanks(ctxt, tmp, nbchar, 0)) { |
4647 | 643 | if (ctxt->sax->ignorableWhitespace != NULL) |
4648 | 643 | ctxt->sax->ignorableWhitespace(ctxt->userData, |
4649 | 643 | tmp, nbchar); |
4650 | 1.79k | } else { |
4651 | 1.79k | if (ctxt->sax->characters != NULL) |
4652 | 1.79k | ctxt->sax->characters(ctxt->userData, |
4653 | 1.79k | tmp, nbchar); |
4654 | 1.79k | if (*ctxt->space == -1) |
4655 | 737 | *ctxt->space = -2; |
4656 | 1.79k | } |
4657 | 2.43k | line = ctxt->input->line; |
4658 | 2.43k | col = ctxt->input->col; |
4659 | 33.4k | } else if ((ctxt->sax != NULL) && |
4660 | 33.4k | (ctxt->disableSAX == 0)) { |
4661 | 19.2k | if (ctxt->sax->characters != NULL) |
4662 | 19.2k | ctxt->sax->characters(ctxt->userData, |
4663 | 19.2k | ctxt->input->cur, nbchar); |
4664 | 19.2k | line = ctxt->input->line; |
4665 | 19.2k | col = ctxt->input->col; |
4666 | 19.2k | } |
4667 | 35.8k | if (ctxt->instate == XML_PARSER_EOF) |
4668 | 0 | return; |
4669 | 35.8k | } |
4670 | 45.9k | ctxt->input->cur = in; |
4671 | 45.9k | if (*in == 0xD) { |
4672 | 1.51k | in++; |
4673 | 1.51k | if (*in == 0xA) { |
4674 | 392 | ctxt->input->cur = in; |
4675 | 392 | in++; |
4676 | 392 | ctxt->input->line++; ctxt->input->col = 1; |
4677 | 392 | continue; /* while */ |
4678 | 392 | } |
4679 | 1.12k | in--; |
4680 | 1.12k | } |
4681 | 45.5k | if (*in == '<') { |
4682 | 27.6k | return; |
4683 | 27.6k | } |
4684 | 17.8k | if (*in == '&') { |
4685 | 5.93k | return; |
4686 | 5.93k | } |
4687 | 11.9k | SHRINK; |
4688 | 11.9k | GROW; |
4689 | 11.9k | if (ctxt->instate == XML_PARSER_EOF) |
4690 | 0 | return; |
4691 | 11.9k | in = ctxt->input->cur; |
4692 | 12.3k | } while (((*in >= 0x20) && (*in <= 0x7F)) || |
4693 | 12.3k | (*in == 0x09) || (*in == 0x0a)); |
4694 | 11.9k | ctxt->input->line = line; |
4695 | 11.9k | ctxt->input->col = col; |
4696 | 11.9k | xmlParseCharDataComplex(ctxt, partial); |
4697 | 11.9k | } |
4698 | | |
4699 | | /** |
4700 | | * xmlParseCharDataComplex: |
4701 | | * @ctxt: an XML parser context |
4702 | | * @cdata: int indicating whether we are within a CDATA section |
4703 | | * |
4704 | | * Always makes progress if the first char isn't '<' or '&'. |
4705 | | * |
4706 | | * parse a CharData section.this is the fallback function |
4707 | | * of xmlParseCharData() when the parsing requires handling |
4708 | | * of non-ASCII characters. |
4709 | | */ |
4710 | | static void |
4711 | 11.9k | xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int partial) { |
4712 | 11.9k | xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5]; |
4713 | 11.9k | int nbchar = 0; |
4714 | 11.9k | int cur, l; |
4715 | | |
4716 | 11.9k | cur = CUR_CHAR(l); |
4717 | 76.8k | while ((cur != '<') && /* checked */ |
4718 | 76.8k | (cur != '&') && |
4719 | 76.8k | (IS_CHAR(cur))) { |
4720 | 64.9k | if ((cur == ']') && (NXT(1) == ']') && (NXT(2) == '>')) { |
4721 | 322 | xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL); |
4722 | 322 | } |
4723 | 64.9k | COPY_BUF(buf, nbchar, cur); |
4724 | | /* move current position before possible calling of ctxt->sax->characters */ |
4725 | 64.9k | NEXTL(l); |
4726 | 64.9k | if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) { |
4727 | 400 | buf[nbchar] = 0; |
4728 | | |
4729 | | /* |
4730 | | * OK the segment is to be consumed as chars. |
4731 | | */ |
4732 | 400 | if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) { |
4733 | 227 | if (areBlanks(ctxt, buf, nbchar, 0)) { |
4734 | 10 | if (ctxt->sax->ignorableWhitespace != NULL) |
4735 | 10 | ctxt->sax->ignorableWhitespace(ctxt->userData, |
4736 | 10 | buf, nbchar); |
4737 | 217 | } else { |
4738 | 217 | if (ctxt->sax->characters != NULL) |
4739 | 217 | ctxt->sax->characters(ctxt->userData, buf, nbchar); |
4740 | 217 | if ((ctxt->sax->characters != |
4741 | 217 | ctxt->sax->ignorableWhitespace) && |
4742 | 217 | (*ctxt->space == -1)) |
4743 | 34 | *ctxt->space = -2; |
4744 | 217 | } |
4745 | 227 | } |
4746 | 400 | nbchar = 0; |
4747 | | /* something really bad happened in the SAX callback */ |
4748 | 400 | if (ctxt->instate != XML_PARSER_CONTENT) |
4749 | 0 | return; |
4750 | 400 | SHRINK; |
4751 | 400 | } |
4752 | 64.9k | cur = CUR_CHAR(l); |
4753 | 64.9k | } |
4754 | 11.9k | if (ctxt->instate == XML_PARSER_EOF) |
4755 | 0 | return; |
4756 | 11.9k | if (nbchar != 0) { |
4757 | 5.13k | buf[nbchar] = 0; |
4758 | | /* |
4759 | | * OK the segment is to be consumed as chars. |
4760 | | */ |
4761 | 5.13k | if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) { |
4762 | 2.68k | if (areBlanks(ctxt, buf, nbchar, 0)) { |
4763 | 461 | if (ctxt->sax->ignorableWhitespace != NULL) |
4764 | 461 | ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar); |
4765 | 2.22k | } else { |
4766 | 2.22k | if (ctxt->sax->characters != NULL) |
4767 | 2.22k | ctxt->sax->characters(ctxt->userData, buf, nbchar); |
4768 | 2.22k | if ((ctxt->sax->characters != ctxt->sax->ignorableWhitespace) && |
4769 | 2.22k | (*ctxt->space == -1)) |
4770 | 345 | *ctxt->space = -2; |
4771 | 2.22k | } |
4772 | 2.68k | } |
4773 | 5.13k | } |
4774 | | /* |
4775 | | * cur == 0 can mean |
4776 | | * |
4777 | | * - XML_PARSER_EOF or memory error. This is checked above. |
4778 | | * - An actual 0 character. |
4779 | | * - End of buffer. |
4780 | | * - An incomplete UTF-8 sequence. This is allowed if partial is set. |
4781 | | */ |
4782 | 11.9k | if (ctxt->input->cur < ctxt->input->end) { |
4783 | 10.9k | if ((cur == 0) && (CUR != 0)) { |
4784 | 9 | if (partial == 0) { |
4785 | 9 | xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR, |
4786 | 9 | "Incomplete UTF-8 sequence starting with %02X\n", CUR); |
4787 | 9 | NEXTL(1); |
4788 | 9 | } |
4789 | 10.9k | } else if ((cur != '<') && (cur != '&')) { |
4790 | | /* Generate the error and skip the offending character */ |
4791 | 6.02k | xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR, |
4792 | 6.02k | "PCDATA invalid Char value %d\n", cur); |
4793 | 6.02k | NEXTL(l); |
4794 | 6.02k | } |
4795 | 10.9k | } |
4796 | 11.9k | } |
4797 | | |
4798 | | /** |
4799 | | * xmlParseCharData: |
4800 | | * @ctxt: an XML parser context |
4801 | | * @cdata: unused |
4802 | | * |
4803 | | * DEPRECATED: Internal function, don't use. |
4804 | | */ |
4805 | | void |
4806 | 0 | xmlParseCharData(xmlParserCtxtPtr ctxt, ATTRIBUTE_UNUSED int cdata) { |
4807 | 0 | xmlParseCharDataInternal(ctxt, 0); |
4808 | 0 | } |
4809 | | |
4810 | | /** |
4811 | | * xmlParseExternalID: |
4812 | | * @ctxt: an XML parser context |
4813 | | * @publicID: a xmlChar** receiving PubidLiteral |
4814 | | * @strict: indicate whether we should restrict parsing to only |
4815 | | * production [75], see NOTE below |
4816 | | * |
4817 | | * DEPRECATED: Internal function, don't use. |
4818 | | * |
4819 | | * Parse an External ID or a Public ID |
4820 | | * |
4821 | | * NOTE: Productions [75] and [83] interact badly since [75] can generate |
4822 | | * 'PUBLIC' S PubidLiteral S SystemLiteral |
4823 | | * |
4824 | | * [75] ExternalID ::= 'SYSTEM' S SystemLiteral |
4825 | | * | 'PUBLIC' S PubidLiteral S SystemLiteral |
4826 | | * |
4827 | | * [83] PublicID ::= 'PUBLIC' S PubidLiteral |
4828 | | * |
4829 | | * Returns the function returns SystemLiteral and in the second |
4830 | | * case publicID receives PubidLiteral, is strict is off |
4831 | | * it is possible to return NULL and have publicID set. |
4832 | | */ |
4833 | | |
4834 | | xmlChar * |
4835 | 23.0k | xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) { |
4836 | 23.0k | xmlChar *URI = NULL; |
4837 | | |
4838 | 23.0k | *publicID = NULL; |
4839 | 23.0k | if (CMP6(CUR_PTR, 'S', 'Y', 'S', 'T', 'E', 'M')) { |
4840 | 4.11k | SKIP(6); |
4841 | 4.11k | if (SKIP_BLANKS == 0) { |
4842 | 3.16k | xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, |
4843 | 3.16k | "Space required after 'SYSTEM'\n"); |
4844 | 3.16k | } |
4845 | 4.11k | URI = xmlParseSystemLiteral(ctxt); |
4846 | 4.11k | if (URI == NULL) { |
4847 | 481 | xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL); |
4848 | 481 | } |
4849 | 18.9k | } else if (CMP6(CUR_PTR, 'P', 'U', 'B', 'L', 'I', 'C')) { |
4850 | 4.30k | SKIP(6); |
4851 | 4.30k | if (SKIP_BLANKS == 0) { |
4852 | 3.57k | xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, |
4853 | 3.57k | "Space required after 'PUBLIC'\n"); |
4854 | 3.57k | } |
4855 | 4.30k | *publicID = xmlParsePubidLiteral(ctxt); |
4856 | 4.30k | if (*publicID == NULL) { |
4857 | 696 | xmlFatalErr(ctxt, XML_ERR_PUBID_REQUIRED, NULL); |
4858 | 696 | } |
4859 | 4.30k | if (strict) { |
4860 | | /* |
4861 | | * We don't handle [83] so "S SystemLiteral" is required. |
4862 | | */ |
4863 | 2.00k | if (SKIP_BLANKS == 0) { |
4864 | 1.79k | xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, |
4865 | 1.79k | "Space required after the Public Identifier\n"); |
4866 | 1.79k | } |
4867 | 2.29k | } else { |
4868 | | /* |
4869 | | * We handle [83] so we return immediately, if |
4870 | | * "S SystemLiteral" is not detected. We skip blanks if no |
4871 | | * system literal was found, but this is harmless since we must |
4872 | | * be at the end of a NotationDecl. |
4873 | | */ |
4874 | 2.29k | if (SKIP_BLANKS == 0) return(NULL); |
4875 | 1.19k | if ((CUR != '\'') && (CUR != '"')) return(NULL); |
4876 | 1.19k | } |
4877 | 2.93k | URI = xmlParseSystemLiteral(ctxt); |
4878 | 2.93k | if (URI == NULL) { |
4879 | 1.71k | xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL); |
4880 | 1.71k | } |
4881 | 2.93k | } |
4882 | 21.6k | return(URI); |
4883 | 23.0k | } |
4884 | | |
4885 | | /** |
4886 | | * xmlParseCommentComplex: |
4887 | | * @ctxt: an XML parser context |
4888 | | * @buf: the already parsed part of the buffer |
4889 | | * @len: number of bytes in the buffer |
4890 | | * @size: allocated size of the buffer |
4891 | | * |
4892 | | * Skip an XML (SGML) comment <!-- .... --> |
4893 | | * The spec says that "For compatibility, the string "--" (double-hyphen) |
4894 | | * must not occur within comments. " |
4895 | | * This is the slow routine in case the accelerator for ascii didn't work |
4896 | | * |
4897 | | * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->' |
4898 | | */ |
4899 | | static void |
4900 | | xmlParseCommentComplex(xmlParserCtxtPtr ctxt, xmlChar *buf, |
4901 | 10.2k | size_t len, size_t size) { |
4902 | 10.2k | int q, ql; |
4903 | 10.2k | int r, rl; |
4904 | 10.2k | int cur, l; |
4905 | 10.2k | size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ? |
4906 | 0 | XML_MAX_HUGE_LENGTH : |
4907 | 10.2k | XML_MAX_TEXT_LENGTH; |
4908 | 10.2k | int inputid; |
4909 | | |
4910 | 10.2k | inputid = ctxt->input->id; |
4911 | | |
4912 | 10.2k | if (buf == NULL) { |
4913 | 4.67k | len = 0; |
4914 | 4.67k | size = XML_PARSER_BUFFER_SIZE; |
4915 | 4.67k | buf = (xmlChar *) xmlMallocAtomic(size); |
4916 | 4.67k | if (buf == NULL) { |
4917 | 0 | xmlErrMemory(ctxt, NULL); |
4918 | 0 | return; |
4919 | 0 | } |
4920 | 4.67k | } |
4921 | 10.2k | q = CUR_CHAR(ql); |
4922 | 10.2k | if (q == 0) |
4923 | 6.04k | goto not_terminated; |
4924 | 4.24k | if (!IS_CHAR(q)) { |
4925 | 431 | xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR, |
4926 | 431 | "xmlParseComment: invalid xmlChar value %d\n", |
4927 | 431 | q); |
4928 | 431 | xmlFree (buf); |
4929 | 431 | return; |
4930 | 431 | } |
4931 | 3.81k | NEXTL(ql); |
4932 | 3.81k | r = CUR_CHAR(rl); |
4933 | 3.81k | if (r == 0) |
4934 | 213 | goto not_terminated; |
4935 | 3.60k | if (!IS_CHAR(r)) { |
4936 | 171 | xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR, |
4937 | 171 | "xmlParseComment: invalid xmlChar value %d\n", |
4938 | 171 | r); |
4939 | 171 | xmlFree (buf); |
4940 | 171 | return; |
4941 | 171 | } |
4942 | 3.43k | NEXTL(rl); |
4943 | 3.43k | cur = CUR_CHAR(l); |
4944 | 3.43k | if (cur == 0) |
4945 | 210 | goto not_terminated; |
4946 | 34.8k | while (IS_CHAR(cur) && /* checked */ |
4947 | 34.8k | ((cur != '>') || |
4948 | 33.5k | (r != '-') || (q != '-'))) { |
4949 | 31.5k | if ((r == '-') && (q == '-')) { |
4950 | 450 | xmlFatalErr(ctxt, XML_ERR_HYPHEN_IN_COMMENT, NULL); |
4951 | 450 | } |
4952 | 31.5k | if (len + 5 >= size) { |
4953 | 455 | xmlChar *new_buf; |
4954 | 455 | size_t new_size; |
4955 | | |
4956 | 455 | new_size = size * 2; |
4957 | 455 | new_buf = (xmlChar *) xmlRealloc(buf, new_size); |
4958 | 455 | if (new_buf == NULL) { |
4959 | 0 | xmlFree (buf); |
4960 | 0 | xmlErrMemory(ctxt, NULL); |
4961 | 0 | return; |
4962 | 0 | } |
4963 | 455 | buf = new_buf; |
4964 | 455 | size = new_size; |
4965 | 455 | } |
4966 | 31.5k | COPY_BUF(buf, len, q); |
4967 | 31.5k | if (len > maxLength) { |
4968 | 0 | xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED, |
4969 | 0 | "Comment too big found", NULL); |
4970 | 0 | xmlFree (buf); |
4971 | 0 | return; |
4972 | 0 | } |
4973 | | |
4974 | 31.5k | q = r; |
4975 | 31.5k | ql = rl; |
4976 | 31.5k | r = cur; |
4977 | 31.5k | rl = l; |
4978 | | |
4979 | 31.5k | NEXTL(l); |
4980 | 31.5k | cur = CUR_CHAR(l); |
4981 | | |
4982 | 31.5k | } |
4983 | 3.22k | buf[len] = 0; |
4984 | 3.22k | if (ctxt->instate == XML_PARSER_EOF) { |
4985 | 0 | xmlFree(buf); |
4986 | 0 | return; |
4987 | 0 | } |
4988 | 3.22k | if (cur == 0) { |
4989 | 1.11k | xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED, |
4990 | 1.11k | "Comment not terminated \n<!--%.50s\n", buf); |
4991 | 2.10k | } else if (!IS_CHAR(cur)) { |
4992 | 129 | xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR, |
4993 | 129 | "xmlParseComment: invalid xmlChar value %d\n", |
4994 | 129 | cur); |
4995 | 1.97k | } else { |
4996 | 1.97k | if (inputid != ctxt->input->id) { |
4997 | 0 | xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY, |
4998 | 0 | "Comment doesn't start and stop in the same" |
4999 | 0 | " entity\n"); |
5000 | 0 | } |
5001 | 1.97k | NEXT; |
5002 | 1.97k | if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) && |
5003 | 1.97k | (!ctxt->disableSAX)) |
5004 | 1.78k | ctxt->sax->comment(ctxt->userData, buf); |
5005 | 1.97k | } |
5006 | 3.22k | xmlFree(buf); |
5007 | 3.22k | return; |
5008 | 6.46k | not_terminated: |
5009 | 6.46k | xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED, |
5010 | 6.46k | "Comment not terminated\n", NULL); |
5011 | 6.46k | xmlFree(buf); |
5012 | 6.46k | return; |
5013 | 3.22k | } |
5014 | | |
5015 | | /** |
5016 | | * xmlParseComment: |
5017 | | * @ctxt: an XML parser context |
5018 | | * |
5019 | | * DEPRECATED: Internal function, don't use. |
5020 | | * |
5021 | | * Parse an XML (SGML) comment. Always consumes '<!'. |
5022 | | * |
5023 | | * The spec says that "For compatibility, the string "--" (double-hyphen) |
5024 | | * must not occur within comments. " |
5025 | | * |
5026 | | * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->' |
5027 | | */ |
5028 | | void |
5029 | 14.2k | xmlParseComment(xmlParserCtxtPtr ctxt) { |
5030 | 14.2k | xmlChar *buf = NULL; |
5031 | 14.2k | size_t size = XML_PARSER_BUFFER_SIZE; |
5032 | 14.2k | size_t len = 0; |
5033 | 14.2k | size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ? |
5034 | 0 | XML_MAX_HUGE_LENGTH : |
5035 | 14.2k | XML_MAX_TEXT_LENGTH; |
5036 | 14.2k | xmlParserInputState state; |
5037 | 14.2k | const xmlChar *in; |
5038 | 14.2k | size_t nbchar = 0; |
5039 | 14.2k | int ccol; |
5040 | 14.2k | int inputid; |
5041 | | |
5042 | | /* |
5043 | | * Check that there is a comment right here. |
5044 | | */ |
5045 | 14.2k | if ((RAW != '<') || (NXT(1) != '!')) |
5046 | 0 | return; |
5047 | 14.2k | SKIP(2); |
5048 | 14.2k | if ((RAW != '-') || (NXT(1) != '-')) |
5049 | 5 | return; |
5050 | 14.2k | state = ctxt->instate; |
5051 | 14.2k | ctxt->instate = XML_PARSER_COMMENT; |
5052 | 14.2k | inputid = ctxt->input->id; |
5053 | 14.2k | SKIP(2); |
5054 | 14.2k | GROW; |
5055 | | |
5056 | | /* |
5057 | | * Accelerated common case where input don't need to be |
5058 | | * modified before passing it to the handler. |
5059 | | */ |
5060 | 14.2k | in = ctxt->input->cur; |
5061 | 14.2k | do { |
5062 | 14.2k | if (*in == 0xA) { |
5063 | 806 | do { |
5064 | 806 | ctxt->input->line++; ctxt->input->col = 1; |
5065 | 806 | in++; |
5066 | 806 | } while (*in == 0xA); |
5067 | 218 | } |
5068 | 30.6k | get_more: |
5069 | 30.6k | ccol = ctxt->input->col; |
5070 | 58.9k | while (((*in > '-') && (*in <= 0x7F)) || |
5071 | 58.9k | ((*in >= 0x20) && (*in < '-')) || |
5072 | 58.9k | (*in == 0x09)) { |
5073 | 28.2k | in++; |
5074 | 28.2k | ccol++; |
5075 | 28.2k | } |
5076 | 30.6k | ctxt->input->col = ccol; |
5077 | 30.6k | if (*in == 0xA) { |
5078 | 667 | do { |
5079 | 667 | ctxt->input->line++; ctxt->input->col = 1; |
5080 | 667 | in++; |
5081 | 667 | } while (*in == 0xA); |
5082 | 471 | goto get_more; |
5083 | 471 | } |
5084 | 30.2k | nbchar = in - ctxt->input->cur; |
5085 | | /* |
5086 | | * save current set of data |
5087 | | */ |
5088 | 30.2k | if (nbchar > 0) { |
5089 | 18.6k | if (buf == NULL) { |
5090 | 7.12k | if ((*in == '-') && (in[1] == '-')) |
5091 | 1.77k | size = nbchar + 1; |
5092 | 5.35k | else |
5093 | 5.35k | size = XML_PARSER_BUFFER_SIZE + nbchar; |
5094 | 7.12k | buf = (xmlChar *) xmlMallocAtomic(size); |
5095 | 7.12k | if (buf == NULL) { |
5096 | 0 | xmlErrMemory(ctxt, NULL); |
5097 | 0 | ctxt->instate = state; |
5098 | 0 | return; |
5099 | 0 | } |
5100 | 7.12k | len = 0; |
5101 | 11.5k | } else if (len + nbchar + 1 >= size) { |
5102 | 931 | xmlChar *new_buf; |
5103 | 931 | size += len + nbchar + XML_PARSER_BUFFER_SIZE; |
5104 | 931 | new_buf = (xmlChar *) xmlRealloc(buf, size); |
5105 | 931 | if (new_buf == NULL) { |
5106 | 0 | xmlFree (buf); |
5107 | 0 | xmlErrMemory(ctxt, NULL); |
5108 | 0 | ctxt->instate = state; |
5109 | 0 | return; |
5110 | 0 | } |
5111 | 931 | buf = new_buf; |
5112 | 931 | } |
5113 | 18.6k | memcpy(&buf[len], ctxt->input->cur, nbchar); |
5114 | 18.6k | len += nbchar; |
5115 | 18.6k | buf[len] = 0; |
5116 | 18.6k | } |
5117 | 30.2k | if (len > maxLength) { |
5118 | 0 | xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED, |
5119 | 0 | "Comment too big found", NULL); |
5120 | 0 | xmlFree (buf); |
5121 | 0 | return; |
5122 | 0 | } |
5123 | 30.2k | ctxt->input->cur = in; |
5124 | 30.2k | if (*in == 0xA) { |
5125 | 0 | in++; |
5126 | 0 | ctxt->input->line++; ctxt->input->col = 1; |
5127 | 0 | } |
5128 | 30.2k | if (*in == 0xD) { |
5129 | 973 | in++; |
5130 | 973 | if (*in == 0xA) { |
5131 | 194 | ctxt->input->cur = in; |
5132 | 194 | in++; |
5133 | 194 | ctxt->input->line++; ctxt->input->col = 1; |
5134 | 194 | goto get_more; |
5135 | 194 | } |
5136 | 779 | in--; |
5137 | 779 | } |
5138 | 30.0k | SHRINK; |
5139 | 30.0k | GROW; |
5140 | 30.0k | if (ctxt->instate == XML_PARSER_EOF) { |
5141 | 0 | xmlFree(buf); |
5142 | 0 | return; |
5143 | 0 | } |
5144 | 30.0k | in = ctxt->input->cur; |
5145 | 30.0k | if (*in == '-') { |
5146 | 19.7k | if (in[1] == '-') { |
5147 | 15.0k | if (in[2] == '>') { |
5148 | 3.96k | if (ctxt->input->id != inputid) { |
5149 | 0 | xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY, |
5150 | 0 | "comment doesn't start and stop in the" |
5151 | 0 | " same entity\n"); |
5152 | 0 | } |
5153 | 3.96k | SKIP(3); |
5154 | 3.96k | if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) && |
5155 | 3.96k | (!ctxt->disableSAX)) { |
5156 | 3.25k | if (buf != NULL) |
5157 | 1.24k | ctxt->sax->comment(ctxt->userData, buf); |
5158 | 2.00k | else |
5159 | 2.00k | ctxt->sax->comment(ctxt->userData, BAD_CAST ""); |
5160 | 3.25k | } |
5161 | 3.96k | if (buf != NULL) |
5162 | 1.51k | xmlFree(buf); |
5163 | 3.96k | if (ctxt->instate != XML_PARSER_EOF) |
5164 | 3.96k | ctxt->instate = state; |
5165 | 3.96k | return; |
5166 | 3.96k | } |
5167 | 11.1k | if (buf != NULL) { |
5168 | 7.12k | xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT, |
5169 | 7.12k | "Double hyphen within comment: " |
5170 | 7.12k | "<!--%.50s\n", |
5171 | 7.12k | buf); |
5172 | 7.12k | } else |
5173 | 4.00k | xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT, |
5174 | 4.00k | "Double hyphen within comment\n", NULL); |
5175 | 11.1k | if (ctxt->instate == XML_PARSER_EOF) { |
5176 | 0 | xmlFree(buf); |
5177 | 0 | return; |
5178 | 0 | } |
5179 | 11.1k | in++; |
5180 | 11.1k | ctxt->input->col++; |
5181 | 11.1k | } |
5182 | 15.7k | in++; |
5183 | 15.7k | ctxt->input->col++; |
5184 | 15.7k | goto get_more; |
5185 | 19.7k | } |
5186 | 30.0k | } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09) || (*in == 0x0a)); |
5187 | 10.2k | xmlParseCommentComplex(ctxt, buf, len, size); |
5188 | 10.2k | ctxt->instate = state; |
5189 | 10.2k | return; |
5190 | 14.2k | } |
5191 | | |
5192 | | |
5193 | | /** |
5194 | | * xmlParsePITarget: |
5195 | | * @ctxt: an XML parser context |
5196 | | * |
5197 | | * DEPRECATED: Internal function, don't use. |
5198 | | * |
5199 | | * parse the name of a PI |
5200 | | * |
5201 | | * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l')) |
5202 | | * |
5203 | | * Returns the PITarget name or NULL |
5204 | | */ |
5205 | | |
5206 | | const xmlChar * |
5207 | 32.1k | xmlParsePITarget(xmlParserCtxtPtr ctxt) { |
5208 | 32.1k | const xmlChar *name; |
5209 | | |
5210 | 32.1k | name = xmlParseName(ctxt); |
5211 | 32.1k | if ((name != NULL) && |
5212 | 32.1k | ((name[0] == 'x') || (name[0] == 'X')) && |
5213 | 32.1k | ((name[1] == 'm') || (name[1] == 'M')) && |
5214 | 32.1k | ((name[2] == 'l') || (name[2] == 'L'))) { |
5215 | 1.62k | int i; |
5216 | 1.62k | if ((name[0] == 'x') && (name[1] == 'm') && |
5217 | 1.62k | (name[2] == 'l') && (name[3] == 0)) { |
5218 | 208 | xmlFatalErrMsg(ctxt, XML_ERR_RESERVED_XML_NAME, |
5219 | 208 | "XML declaration allowed only at the start of the document\n"); |
5220 | 208 | return(name); |
5221 | 1.41k | } else if (name[3] == 0) { |
5222 | 514 | xmlFatalErr(ctxt, XML_ERR_RESERVED_XML_NAME, NULL); |
5223 | 514 | return(name); |
5224 | 514 | } |
5225 | 2.50k | for (i = 0;;i++) { |
5226 | 2.50k | if (xmlW3CPIs[i] == NULL) break; |
5227 | 1.79k | if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i])) |
5228 | 194 | return(name); |
5229 | 1.79k | } |
5230 | 707 | xmlWarningMsg(ctxt, XML_ERR_RESERVED_XML_NAME, |
5231 | 707 | "xmlParsePITarget: invalid name prefix 'xml'\n", |
5232 | 707 | NULL, NULL); |
5233 | 707 | } |
5234 | 31.2k | if ((name != NULL) && (xmlStrchr(name, ':') != NULL)) { |
5235 | 1.01k | xmlNsErr(ctxt, XML_NS_ERR_COLON, |
5236 | 1.01k | "colons are forbidden from PI names '%s'\n", name, NULL, NULL); |
5237 | 1.01k | } |
5238 | 31.2k | return(name); |
5239 | 32.1k | } |
5240 | | |
5241 | | #ifdef LIBXML_CATALOG_ENABLED |
5242 | | /** |
5243 | | * xmlParseCatalogPI: |
5244 | | * @ctxt: an XML parser context |
5245 | | * @catalog: the PI value string |
5246 | | * |
5247 | | * parse an XML Catalog Processing Instruction. |
5248 | | * |
5249 | | * <?oasis-xml-catalog catalog="http://example.com/catalog.xml"?> |
5250 | | * |
5251 | | * Occurs only if allowed by the user and if happening in the Misc |
5252 | | * part of the document before any doctype information |
5253 | | * This will add the given catalog to the parsing context in order |
5254 | | * to be used if there is a resolution need further down in the document |
5255 | | */ |
5256 | | |
5257 | | static void |
5258 | 583 | xmlParseCatalogPI(xmlParserCtxtPtr ctxt, const xmlChar *catalog) { |
5259 | 583 | xmlChar *URL = NULL; |
5260 | 583 | const xmlChar *tmp, *base; |
5261 | 583 | xmlChar marker; |
5262 | | |
5263 | 583 | tmp = catalog; |
5264 | 583 | while (IS_BLANK_CH(*tmp)) tmp++; |
5265 | 583 | if (xmlStrncmp(tmp, BAD_CAST"catalog", 7)) |
5266 | 168 | goto error; |
5267 | 415 | tmp += 7; |
5268 | 428 | while (IS_BLANK_CH(*tmp)) tmp++; |
5269 | 415 | if (*tmp != '=') { |
5270 | 58 | return; |
5271 | 58 | } |
5272 | 357 | tmp++; |
5273 | 506 | while (IS_BLANK_CH(*tmp)) tmp++; |
5274 | 357 | marker = *tmp; |
5275 | 357 | if ((marker != '\'') && (marker != '"')) |
5276 | 94 | goto error; |
5277 | 263 | tmp++; |
5278 | 263 | base = tmp; |
5279 | 873 | while ((*tmp != 0) && (*tmp != marker)) tmp++; |
5280 | 263 | if (*tmp == 0) |
5281 | 103 | goto error; |
5282 | 160 | URL = xmlStrndup(base, tmp - base); |
5283 | 160 | tmp++; |
5284 | 432 | while (IS_BLANK_CH(*tmp)) tmp++; |
5285 | 160 | if (*tmp != 0) |
5286 | 55 | goto error; |
5287 | | |
5288 | 105 | if (URL != NULL) { |
5289 | 105 | ctxt->catalogs = xmlCatalogAddLocal(ctxt->catalogs, URL); |
5290 | 105 | xmlFree(URL); |
5291 | 105 | } |
5292 | 105 | return; |
5293 | | |
5294 | 420 | error: |
5295 | 420 | xmlWarningMsg(ctxt, XML_WAR_CATALOG_PI, |
5296 | 420 | "Catalog PI syntax error: %s\n", |
5297 | 420 | catalog, NULL); |
5298 | 420 | if (URL != NULL) |
5299 | 55 | xmlFree(URL); |
5300 | 420 | } |
5301 | | #endif |
5302 | | |
5303 | | /** |
5304 | | * xmlParsePI: |
5305 | | * @ctxt: an XML parser context |
5306 | | * |
5307 | | * DEPRECATED: Internal function, don't use. |
5308 | | * |
5309 | | * parse an XML Processing Instruction. |
5310 | | * |
5311 | | * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>' |
5312 | | * |
5313 | | * The processing is transferred to SAX once parsed. |
5314 | | */ |
5315 | | |
5316 | | void |
5317 | 32.1k | xmlParsePI(xmlParserCtxtPtr ctxt) { |
5318 | 32.1k | xmlChar *buf = NULL; |
5319 | 32.1k | size_t len = 0; |
5320 | 32.1k | size_t size = XML_PARSER_BUFFER_SIZE; |
5321 | 32.1k | size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ? |
5322 | 0 | XML_MAX_HUGE_LENGTH : |
5323 | 32.1k | XML_MAX_TEXT_LENGTH; |
5324 | 32.1k | int cur, l; |
5325 | 32.1k | const xmlChar *target; |
5326 | 32.1k | xmlParserInputState state; |
5327 | | |
5328 | 32.1k | if ((RAW == '<') && (NXT(1) == '?')) { |
5329 | 32.1k | int inputid = ctxt->input->id; |
5330 | 32.1k | state = ctxt->instate; |
5331 | 32.1k | ctxt->instate = XML_PARSER_PI; |
5332 | | /* |
5333 | | * this is a Processing Instruction. |
5334 | | */ |
5335 | 32.1k | SKIP(2); |
5336 | | |
5337 | | /* |
5338 | | * Parse the target name and check for special support like |
5339 | | * namespace. |
5340 | | */ |
5341 | 32.1k | target = xmlParsePITarget(ctxt); |
5342 | 32.1k | if (target != NULL) { |
5343 | 24.4k | if ((RAW == '?') && (NXT(1) == '>')) { |
5344 | 6.38k | if (inputid != ctxt->input->id) { |
5345 | 0 | xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY, |
5346 | 0 | "PI declaration doesn't start and stop in" |
5347 | 0 | " the same entity\n"); |
5348 | 0 | } |
5349 | 6.38k | SKIP(2); |
5350 | | |
5351 | | /* |
5352 | | * SAX: PI detected. |
5353 | | */ |
5354 | 6.38k | if ((ctxt->sax) && (!ctxt->disableSAX) && |
5355 | 6.38k | (ctxt->sax->processingInstruction != NULL)) |
5356 | 5.35k | ctxt->sax->processingInstruction(ctxt->userData, |
5357 | 5.35k | target, NULL); |
5358 | 6.38k | if (ctxt->instate != XML_PARSER_EOF) |
5359 | 6.38k | ctxt->instate = state; |
5360 | 6.38k | return; |
5361 | 6.38k | } |
5362 | 18.0k | buf = (xmlChar *) xmlMallocAtomic(size); |
5363 | 18.0k | if (buf == NULL) { |
5364 | 0 | xmlErrMemory(ctxt, NULL); |
5365 | 0 | ctxt->instate = state; |
5366 | 0 | return; |
5367 | 0 | } |
5368 | 18.0k | if (SKIP_BLANKS == 0) { |
5369 | 15.9k | xmlFatalErrMsgStr(ctxt, XML_ERR_SPACE_REQUIRED, |
5370 | 15.9k | "ParsePI: PI %s space expected\n", target); |
5371 | 15.9k | } |
5372 | 18.0k | cur = CUR_CHAR(l); |
5373 | 104k | while (IS_CHAR(cur) && /* checked */ |
5374 | 104k | ((cur != '?') || (NXT(1) != '>'))) { |
5375 | 86.8k | if (len + 5 >= size) { |
5376 | 695 | xmlChar *tmp; |
5377 | 695 | size_t new_size = size * 2; |
5378 | 695 | tmp = (xmlChar *) xmlRealloc(buf, new_size); |
5379 | 695 | if (tmp == NULL) { |
5380 | 0 | xmlErrMemory(ctxt, NULL); |
5381 | 0 | xmlFree(buf); |
5382 | 0 | ctxt->instate = state; |
5383 | 0 | return; |
5384 | 0 | } |
5385 | 695 | buf = tmp; |
5386 | 695 | size = new_size; |
5387 | 695 | } |
5388 | 86.8k | COPY_BUF(buf, len, cur); |
5389 | 86.8k | if (len > maxLength) { |
5390 | 0 | xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED, |
5391 | 0 | "PI %s too big found", target); |
5392 | 0 | xmlFree(buf); |
5393 | 0 | ctxt->instate = state; |
5394 | 0 | return; |
5395 | 0 | } |
5396 | 86.8k | NEXTL(l); |
5397 | 86.8k | cur = CUR_CHAR(l); |
5398 | 86.8k | } |
5399 | 18.0k | buf[len] = 0; |
5400 | 18.0k | if (ctxt->instate == XML_PARSER_EOF) { |
5401 | 1 | xmlFree(buf); |
5402 | 1 | return; |
5403 | 1 | } |
5404 | 18.0k | if (cur != '?') { |
5405 | 13.7k | xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED, |
5406 | 13.7k | "ParsePI: PI %s never end ...\n", target); |
5407 | 13.7k | } else { |
5408 | 4.27k | if (inputid != ctxt->input->id) { |
5409 | 69 | xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY, |
5410 | 69 | "PI declaration doesn't start and stop in" |
5411 | 69 | " the same entity\n"); |
5412 | 69 | } |
5413 | 4.27k | SKIP(2); |
5414 | | |
5415 | 4.27k | #ifdef LIBXML_CATAL
|