/src/libxml2/parserInternals.c
Line  | Count  | Source (jump to first uncovered line)  | 
1  |  | /*  | 
2  |  |  * parserInternals.c : Internal routines (and obsolete ones) needed for the  | 
3  |  |  *                     XML and HTML parsers.  | 
4  |  |  *  | 
5  |  |  * See Copyright for the status of this software.  | 
6  |  |  *  | 
7  |  |  * daniel@veillard.com  | 
8  |  |  */  | 
9  |  |  | 
10  |  | #define IN_LIBXML  | 
11  |  | #include "libxml.h"  | 
12  |  |  | 
13  |  | #if defined(_WIN32)  | 
14  |  | #define XML_DIR_SEP '\\'  | 
15  |  | #else  | 
16  |  | #define XML_DIR_SEP '/'  | 
17  |  | #endif  | 
18  |  |  | 
19  |  | #include <string.h>  | 
20  |  | #include <ctype.h>  | 
21  |  | #include <stdlib.h>  | 
22  |  |  | 
23  |  | #include <libxml/xmlmemory.h>  | 
24  |  | #include <libxml/tree.h>  | 
25  |  | #include <libxml/parser.h>  | 
26  |  | #include <libxml/parserInternals.h>  | 
27  |  | #include <libxml/valid.h>  | 
28  |  | #include <libxml/entities.h>  | 
29  |  | #include <libxml/xmlerror.h>  | 
30  |  | #include <libxml/encoding.h>  | 
31  |  | #include <libxml/valid.h>  | 
32  |  | #include <libxml/xmlIO.h>  | 
33  |  | #include <libxml/uri.h>  | 
34  |  | #include <libxml/dict.h>  | 
35  |  | #include <libxml/SAX.h>  | 
36  |  | #ifdef LIBXML_CATALOG_ENABLED  | 
37  |  | #include <libxml/catalog.h>  | 
38  |  | #endif  | 
39  |  | #include <libxml/globals.h>  | 
40  |  | #include <libxml/chvalid.h>  | 
41  |  |  | 
42  | 2.12G  | #define CUR(ctxt) ctxt->input->cur  | 
43  | 2.12G  | #define END(ctxt) ctxt->input->end  | 
44  | 2.12G  | #define VALID_CTXT(ctxt) (CUR(ctxt) <= END(ctxt))  | 
45  |  |  | 
46  |  | #include "private/buf.h"  | 
47  |  | #include "private/enc.h"  | 
48  |  | #include "private/error.h"  | 
49  |  | #include "private/io.h"  | 
50  |  | #include "private/parser.h"  | 
51  |  |  | 
52  |  | /*  | 
53  |  |  * Various global defaults for parsing  | 
54  |  |  */  | 
55  |  |  | 
56  |  | /**  | 
57  |  |  * xmlCheckVersion:  | 
58  |  |  * @version: the include version number  | 
59  |  |  *  | 
60  |  |  * check the compiled lib version against the include one.  | 
61  |  |  * This can warn or immediately kill the application  | 
62  |  |  */  | 
63  |  | void  | 
64  | 0  | xmlCheckVersion(int version) { | 
65  | 0  |     int myversion = LIBXML_VERSION;  | 
66  |  | 
  | 
67  | 0  |     xmlInitParser();  | 
68  |  | 
  | 
69  | 0  |     if ((myversion / 10000) != (version / 10000)) { | 
70  | 0  |   xmlGenericError(xmlGenericErrorContext,  | 
71  | 0  |     "Fatal: program compiled against libxml %d using libxml %d\n",  | 
72  | 0  |     (version / 10000), (myversion / 10000));  | 
73  | 0  |   fprintf(stderr,  | 
74  | 0  |     "Fatal: program compiled against libxml %d using libxml %d\n",  | 
75  | 0  |     (version / 10000), (myversion / 10000));  | 
76  | 0  |     }  | 
77  | 0  |     if ((myversion / 100) < (version / 100)) { | 
78  | 0  |   xmlGenericError(xmlGenericErrorContext,  | 
79  | 0  |     "Warning: program compiled against libxml %d using older %d\n",  | 
80  | 0  |     (version / 100), (myversion / 100));  | 
81  | 0  |     }  | 
82  | 0  | }  | 
83  |  |  | 
84  |  |  | 
85  |  | /************************************************************************  | 
86  |  |  *                  *  | 
87  |  |  *    Some factorized error routines        *  | 
88  |  |  *                  *  | 
89  |  |  ************************************************************************/  | 
90  |  |  | 
91  |  |  | 
92  |  | /**  | 
93  |  |  * xmlErrMemory:  | 
94  |  |  * @ctxt:  an XML parser context  | 
95  |  |  * @extra:  extra information  | 
96  |  |  *  | 
97  |  |  * Handle a redefinition of attribute error  | 
98  |  |  */  | 
99  |  | void  | 
100  |  | xmlErrMemory(xmlParserCtxtPtr ctxt, const char *extra)  | 
101  | 0  | { | 
102  | 0  |     if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&  | 
103  | 0  |         (ctxt->instate == XML_PARSER_EOF))  | 
104  | 0  |   return;  | 
105  | 0  |     if (ctxt != NULL) { | 
106  | 0  |         ctxt->errNo = XML_ERR_NO_MEMORY;  | 
107  | 0  |         ctxt->instate = XML_PARSER_EOF;  | 
108  | 0  |         ctxt->disableSAX = 1;  | 
109  | 0  |     }  | 
110  | 0  |     if (extra)  | 
111  | 0  |         __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,  | 
112  | 0  |                         XML_ERR_NO_MEMORY, XML_ERR_FATAL, NULL, 0, extra,  | 
113  | 0  |                         NULL, NULL, 0, 0,  | 
114  | 0  |                         "Memory allocation failed : %s\n", extra);  | 
115  | 0  |     else  | 
116  | 0  |         __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,  | 
117  | 0  |                         XML_ERR_NO_MEMORY, XML_ERR_FATAL, NULL, 0, NULL,  | 
118  | 0  |                         NULL, NULL, 0, 0, "Memory allocation failed\n");  | 
119  | 0  | }  | 
120  |  |  | 
121  |  | /**  | 
122  |  |  * __xmlErrEncoding:  | 
123  |  |  * @ctxt:  an XML parser context  | 
124  |  |  * @xmlerr:  the error number  | 
125  |  |  * @msg:  the error message  | 
126  |  |  * @str1:  an string info  | 
127  |  |  * @str2:  an string info  | 
128  |  |  *  | 
129  |  |  * Handle an encoding error  | 
130  |  |  */  | 
131  |  | void  | 
132  |  | __xmlErrEncoding(xmlParserCtxtPtr ctxt, xmlParserErrors xmlerr,  | 
133  |  |                  const char *msg, const xmlChar * str1, const xmlChar * str2)  | 
134  | 1.20M  | { | 
135  | 1.20M  |     if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&  | 
136  | 1.20M  |         (ctxt->instate == XML_PARSER_EOF))  | 
137  | 0  |   return;  | 
138  | 1.20M  |     if (ctxt != NULL)  | 
139  | 1.20M  |         ctxt->errNo = xmlerr;  | 
140  | 1.20M  |     __xmlRaiseError(NULL, NULL, NULL,  | 
141  | 1.20M  |                     ctxt, NULL, XML_FROM_PARSER, xmlerr, XML_ERR_FATAL,  | 
142  | 1.20M  |                     NULL, 0, (const char *) str1, (const char *) str2,  | 
143  | 1.20M  |                     NULL, 0, 0, msg, str1, str2);  | 
144  | 1.20M  |     if (ctxt != NULL) { | 
145  | 1.20M  |         ctxt->wellFormed = 0;  | 
146  | 1.20M  |         if (ctxt->recovery == 0)  | 
147  | 118k  |             ctxt->disableSAX = 1;  | 
148  | 1.20M  |     }  | 
149  | 1.20M  | }  | 
150  |  |  | 
151  |  | /**  | 
152  |  |  * xmlErrInternal:  | 
153  |  |  * @ctxt:  an XML parser context  | 
154  |  |  * @msg:  the error message  | 
155  |  |  * @str:  error information  | 
156  |  |  *  | 
157  |  |  * Handle an internal error  | 
158  |  |  */  | 
159  |  | static void LIBXML_ATTR_FORMAT(2,0)  | 
160  |  | xmlErrInternal(xmlParserCtxtPtr ctxt, const char *msg, const xmlChar * str)  | 
161  | 425  | { | 
162  | 425  |     if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&  | 
163  | 425  |         (ctxt->instate == XML_PARSER_EOF))  | 
164  | 0  |   return;  | 
165  | 425  |     if (ctxt != NULL)  | 
166  | 425  |         ctxt->errNo = XML_ERR_INTERNAL_ERROR;  | 
167  | 425  |     __xmlRaiseError(NULL, NULL, NULL,  | 
168  | 425  |                     ctxt, NULL, XML_FROM_PARSER, XML_ERR_INTERNAL_ERROR,  | 
169  | 425  |                     XML_ERR_FATAL, NULL, 0, (const char *) str, NULL, NULL,  | 
170  | 425  |                     0, 0, msg, str);  | 
171  | 425  |     if (ctxt != NULL) { | 
172  | 425  |         ctxt->wellFormed = 0;  | 
173  | 425  |         if (ctxt->recovery == 0)  | 
174  | 259  |             ctxt->disableSAX = 1;  | 
175  | 425  |     }  | 
176  | 425  | }  | 
177  |  |  | 
178  |  | /**  | 
179  |  |  * xmlErrEncodingInt:  | 
180  |  |  * @ctxt:  an XML parser context  | 
181  |  |  * @error:  the error number  | 
182  |  |  * @msg:  the error message  | 
183  |  |  * @val:  an integer value  | 
184  |  |  *  | 
185  |  |  * n encoding error  | 
186  |  |  */  | 
187  |  | static void LIBXML_ATTR_FORMAT(3,0)  | 
188  |  | xmlErrEncodingInt(xmlParserCtxtPtr ctxt, xmlParserErrors error,  | 
189  |  |                   const char *msg, int val)  | 
190  | 353k  | { | 
191  | 353k  |     if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&  | 
192  | 353k  |         (ctxt->instate == XML_PARSER_EOF))  | 
193  | 0  |   return;  | 
194  | 353k  |     if (ctxt != NULL)  | 
195  | 351k  |         ctxt->errNo = error;  | 
196  | 353k  |     __xmlRaiseError(NULL, NULL, NULL,  | 
197  | 353k  |                     ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,  | 
198  | 353k  |                     NULL, 0, NULL, NULL, NULL, val, 0, msg, val);  | 
199  | 353k  |     if (ctxt != NULL) { | 
200  | 351k  |         ctxt->wellFormed = 0;  | 
201  | 351k  |         if (ctxt->recovery == 0)  | 
202  | 57.9k  |             ctxt->disableSAX = 1;  | 
203  | 351k  |     }  | 
204  | 353k  | }  | 
205  |  |  | 
206  |  | /**  | 
207  |  |  * xmlIsLetter:  | 
208  |  |  * @c:  an unicode character (int)  | 
209  |  |  *  | 
210  |  |  * Check whether the character is allowed by the production  | 
211  |  |  * [84] Letter ::= BaseChar | Ideographic  | 
212  |  |  *  | 
213  |  |  * Returns 0 if not, non-zero otherwise  | 
214  |  |  */  | 
215  |  | int  | 
216  | 0  | xmlIsLetter(int c) { | 
217  | 0  |     return(IS_BASECHAR(c) || IS_IDEOGRAPHIC(c));  | 
218  | 0  | }  | 
219  |  |  | 
220  |  | /************************************************************************  | 
221  |  |  *                  *  | 
222  |  |  *    Input handling functions for progressive parsing  *  | 
223  |  |  *                  *  | 
224  |  |  ************************************************************************/  | 
225  |  |  | 
226  |  | /* #define DEBUG_INPUT */  | 
227  |  | /* #define DEBUG_STACK */  | 
228  |  | /* #define DEBUG_PUSH */  | 
229  |  |  | 
230  |  |  | 
231  |  | /* we need to keep enough input to show errors in context */  | 
232  | 405k  | #define LINE_LEN        80  | 
233  |  |  | 
234  |  | #ifdef DEBUG_INPUT  | 
235  |  | #define CHECK_BUFFER(in) check_buffer(in)  | 
236  |  |  | 
237  |  | static  | 
238  |  | void check_buffer(xmlParserInputPtr in) { | 
239  |  |     if (in->base != xmlBufContent(in->buf->buffer)) { | 
240  |  |         xmlGenericError(xmlGenericErrorContext,  | 
241  |  |     "xmlParserInput: base mismatch problem\n");  | 
242  |  |     }  | 
243  |  |     if (in->cur < in->base) { | 
244  |  |         xmlGenericError(xmlGenericErrorContext,  | 
245  |  |     "xmlParserInput: cur < base problem\n");  | 
246  |  |     }  | 
247  |  |     if (in->cur > in->base + xmlBufUse(in->buf->buffer)) { | 
248  |  |         xmlGenericError(xmlGenericErrorContext,  | 
249  |  |     "xmlParserInput: cur > base + use problem\n");  | 
250  |  |     }  | 
251  |  |     xmlGenericError(xmlGenericErrorContext,"buffer %p : content %x, cur %d, use %d\n",  | 
252  |  |             (void *) in, (int) xmlBufContent(in->buf->buffer),  | 
253  |  |             in->cur - in->base, xmlBufUse(in->buf->buffer));  | 
254  |  | }  | 
255  |  |  | 
256  |  | #else  | 
257  |  | #define CHECK_BUFFER(in)  | 
258  |  | #endif  | 
259  |  |  | 
260  |  |  | 
261  |  | /**  | 
262  |  |  * xmlParserInputRead:  | 
263  |  |  * @in:  an XML parser input  | 
264  |  |  * @len:  an indicative size for the lookahead  | 
265  |  |  *  | 
266  |  |  * DEPRECATED: This function was internal and is deprecated.  | 
267  |  |  *  | 
268  |  |  * Returns -1 as this is an error to use it.  | 
269  |  |  */  | 
270  |  | int  | 
271  | 0  | xmlParserInputRead(xmlParserInputPtr in ATTRIBUTE_UNUSED, int len ATTRIBUTE_UNUSED) { | 
272  | 0  |     return(-1);  | 
273  | 0  | }  | 
274  |  |  | 
275  |  | /**  | 
276  |  |  * xmlParserInputGrow:  | 
277  |  |  * @in:  an XML parser input  | 
278  |  |  * @len:  an indicative size for the lookahead  | 
279  |  |  *  | 
280  |  |  * DEPRECATED: Don't use.  | 
281  |  |  *  | 
282  |  |  * This function increase the input for the parser. It tries to  | 
283  |  |  * preserve pointers to the input buffer, and keep already read data  | 
284  |  |  *  | 
285  |  |  * Returns the amount of char read, or -1 in case of error, 0 indicate the  | 
286  |  |  * end of this entity  | 
287  |  |  */  | 
288  |  | int  | 
289  | 1.62G  | xmlParserInputGrow(xmlParserInputPtr in, int len) { | 
290  | 1.62G  |     int ret;  | 
291  | 1.62G  |     size_t indx;  | 
292  |  |  | 
293  | 1.62G  |     if ((in == NULL) || (len < 0)) return(-1);  | 
294  |  | #ifdef DEBUG_INPUT  | 
295  |  |     xmlGenericError(xmlGenericErrorContext, "Grow\n");  | 
296  |  | #endif  | 
297  | 1.62G  |     if (in->buf == NULL) return(-1);  | 
298  | 23.0M  |     if (in->base == NULL) return(-1);  | 
299  | 23.0M  |     if (in->cur == NULL) return(-1);  | 
300  | 23.0M  |     if (in->buf->buffer == NULL) return(-1);  | 
301  |  |  | 
302  |  |     /* Don't grow memory buffers. */  | 
303  | 23.0M  |     if ((in->buf->encoder == NULL) && (in->buf->readcallback == NULL))  | 
304  | 22.0M  |         return(0);  | 
305  |  |  | 
306  | 1.07M  |     CHECK_BUFFER(in);  | 
307  |  |  | 
308  | 1.07M  |     indx = in->cur - in->base;  | 
309  | 1.07M  |     if (xmlBufUse(in->buf->buffer) > (unsigned int) indx + INPUT_CHUNK) { | 
310  |  |  | 
311  | 99.7k  |   CHECK_BUFFER(in);  | 
312  |  |  | 
313  | 99.7k  |         return(0);  | 
314  | 99.7k  |     }  | 
315  | 980k  |     ret = xmlParserInputBufferGrow(in->buf, len);  | 
316  |  |  | 
317  | 980k  |     in->base = xmlBufContent(in->buf->buffer);  | 
318  | 980k  |     if (in->base == NULL) { | 
319  | 0  |         in->base = BAD_CAST "";  | 
320  | 0  |         in->cur = in->base;  | 
321  | 0  |         in->end = in->base;  | 
322  | 0  |         return(-1);  | 
323  | 0  |     }  | 
324  | 980k  |     in->cur = in->base + indx;  | 
325  | 980k  |     in->end = xmlBufEnd(in->buf->buffer);  | 
326  |  |  | 
327  | 980k  |     CHECK_BUFFER(in);  | 
328  |  |  | 
329  | 980k  |     return(ret);  | 
330  | 980k  | }  | 
331  |  |  | 
332  |  | /**  | 
333  |  |  * xmlParserInputShrink:  | 
334  |  |  * @in:  an XML parser input  | 
335  |  |  *  | 
336  |  |  * This function removes used input for the parser.  | 
337  |  |  */  | 
338  |  | void  | 
339  | 405k  | xmlParserInputShrink(xmlParserInputPtr in) { | 
340  | 405k  |     size_t used;  | 
341  | 405k  |     size_t ret;  | 
342  |  |  | 
343  |  | #ifdef DEBUG_INPUT  | 
344  |  |     xmlGenericError(xmlGenericErrorContext, "Shrink\n");  | 
345  |  | #endif  | 
346  | 405k  |     if (in == NULL) return;  | 
347  | 405k  |     if (in->buf == NULL) return;  | 
348  | 405k  |     if (in->base == NULL) return;  | 
349  | 405k  |     if (in->cur == NULL) return;  | 
350  | 405k  |     if (in->buf->buffer == NULL) return;  | 
351  |  |  | 
352  | 405k  |     CHECK_BUFFER(in);  | 
353  |  |  | 
354  | 405k  |     used = in->cur - in->base;  | 
355  |  |     /*  | 
356  |  |      * Do not shrink on large buffers whose only a tiny fraction  | 
357  |  |      * was consumed  | 
358  |  |      */  | 
359  | 405k  |     if (used > INPUT_CHUNK) { | 
360  | 405k  |   ret = xmlBufShrink(in->buf->buffer, used - LINE_LEN);  | 
361  | 405k  |   if (ret > 0) { | 
362  | 405k  |             used -= ret;  | 
363  | 405k  |             if ((ret > ULONG_MAX) ||  | 
364  | 405k  |                 (in->consumed > ULONG_MAX - (unsigned long)ret))  | 
365  | 0  |                 in->consumed = ULONG_MAX;  | 
366  | 405k  |             else  | 
367  | 405k  |                 in->consumed += ret;  | 
368  | 405k  |   }  | 
369  | 405k  |     }  | 
370  |  |  | 
371  | 405k  |     if (xmlBufUse(in->buf->buffer) <= INPUT_CHUNK) { | 
372  | 260k  |         xmlParserInputBufferRead(in->buf, 2 * INPUT_CHUNK);  | 
373  | 260k  |     }  | 
374  |  |  | 
375  | 405k  |     in->base = xmlBufContent(in->buf->buffer);  | 
376  | 405k  |     in->cur = in->base + used;  | 
377  | 405k  |     in->end = xmlBufEnd(in->buf->buffer);  | 
378  |  |  | 
379  | 405k  |     CHECK_BUFFER(in);  | 
380  | 405k  | }  | 
381  |  |  | 
382  |  | /************************************************************************  | 
383  |  |  *                  *  | 
384  |  |  *    UTF8 character input and related functions    *  | 
385  |  |  *                  *  | 
386  |  |  ************************************************************************/  | 
387  |  |  | 
388  |  | /**  | 
389  |  |  * xmlNextChar:  | 
390  |  |  * @ctxt:  the XML parser context  | 
391  |  |  *  | 
392  |  |  * Skip to the next char input char.  | 
393  |  |  */  | 
394  |  |  | 
395  |  | void  | 
396  |  | xmlNextChar(xmlParserCtxtPtr ctxt)  | 
397  | 2.12G  | { | 
398  | 2.12G  |     if ((ctxt == NULL) || (ctxt->instate == XML_PARSER_EOF) ||  | 
399  | 2.12G  |         (ctxt->input == NULL))  | 
400  | 20  |         return;  | 
401  |  |  | 
402  | 2.12G  |     if (!(VALID_CTXT(ctxt))) { | 
403  | 0  |         xmlErrInternal(ctxt, "Parser input data memory error\n", NULL);  | 
404  | 0  |   ctxt->errNo = XML_ERR_INTERNAL_ERROR;  | 
405  | 0  |         xmlStopParser(ctxt);  | 
406  | 0  |   return;  | 
407  | 0  |     }  | 
408  |  |  | 
409  | 2.12G  |     if ((ctxt->input->cur >= ctxt->input->end) &&  | 
410  | 2.12G  |         (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) { | 
411  | 1.23k  |         return;  | 
412  | 1.23k  |     }  | 
413  |  |  | 
414  | 2.12G  |     if (ctxt->charset == XML_CHAR_ENCODING_UTF8) { | 
415  | 1.54G  |         const unsigned char *cur;  | 
416  | 1.54G  |         unsigned char c;  | 
417  |  |  | 
418  |  |         /*  | 
419  |  |          *   2.11 End-of-Line Handling  | 
420  |  |          *   the literal two-character sequence "#xD#xA" or a standalone  | 
421  |  |          *   literal #xD, an XML processor must pass to the application  | 
422  |  |          *   the single character #xA.  | 
423  |  |          */  | 
424  | 1.54G  |         if (*(ctxt->input->cur) == '\n') { | 
425  | 53.4M  |             ctxt->input->line++; ctxt->input->col = 1;  | 
426  | 53.4M  |         } else  | 
427  | 1.49G  |             ctxt->input->col++;  | 
428  |  |  | 
429  |  |         /*  | 
430  |  |          * We are supposed to handle UTF8, check it's valid  | 
431  |  |          * From rfc2044: encoding of the Unicode values on UTF-8:  | 
432  |  |          *  | 
433  |  |          * UCS-4 range (hex.)           UTF-8 octet sequence (binary)  | 
434  |  |          * 0000 0000-0000 007F   0xxxxxxx  | 
435  |  |          * 0000 0080-0000 07FF   110xxxxx 10xxxxxx  | 
436  |  |          * 0000 0800-0000 FFFF   1110xxxx 10xxxxxx 10xxxxxx  | 
437  |  |          *  | 
438  |  |          * Check for the 0x110000 limit too  | 
439  |  |          */  | 
440  | 1.54G  |         cur = ctxt->input->cur;  | 
441  |  |  | 
442  | 1.54G  |         c = *cur;  | 
443  | 1.54G  |         if (c & 0x80) { | 
444  | 340k  |             if (c == 0xC0)  | 
445  | 252  |           goto encoding_error;  | 
446  | 339k  |             if (cur[1] == 0) { | 
447  | 2.02k  |                 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);  | 
448  | 2.02k  |                 cur = ctxt->input->cur;  | 
449  | 2.02k  |             }  | 
450  | 339k  |             if ((cur[1] & 0xc0) != 0x80)  | 
451  | 34.7k  |                 goto encoding_error;  | 
452  | 305k  |             if ((c & 0xe0) == 0xe0) { | 
453  | 174k  |                 unsigned int val;  | 
454  |  |  | 
455  | 174k  |                 if (cur[2] == 0) { | 
456  | 346  |                     xmlParserInputGrow(ctxt->input, INPUT_CHUNK);  | 
457  | 346  |                     cur = ctxt->input->cur;  | 
458  | 346  |                 }  | 
459  | 174k  |                 if ((cur[2] & 0xc0) != 0x80)  | 
460  | 1.03k  |                     goto encoding_error;  | 
461  | 173k  |                 if ((c & 0xf0) == 0xf0) { | 
462  | 19.0k  |                     if (cur[3] == 0) { | 
463  | 352  |                         xmlParserInputGrow(ctxt->input, INPUT_CHUNK);  | 
464  | 352  |                         cur = ctxt->input->cur;  | 
465  | 352  |                     }  | 
466  | 19.0k  |                     if (((c & 0xf8) != 0xf0) ||  | 
467  | 19.0k  |                         ((cur[3] & 0xc0) != 0x80))  | 
468  | 707  |                         goto encoding_error;  | 
469  |  |                     /* 4-byte code */  | 
470  | 18.3k  |                     ctxt->input->cur += 4;  | 
471  | 18.3k  |                     val = (cur[0] & 0x7) << 18;  | 
472  | 18.3k  |                     val |= (cur[1] & 0x3f) << 12;  | 
473  | 18.3k  |                     val |= (cur[2] & 0x3f) << 6;  | 
474  | 18.3k  |                     val |= cur[3] & 0x3f;  | 
475  | 154k  |                 } else { | 
476  |  |                     /* 3-byte code */  | 
477  | 154k  |                     ctxt->input->cur += 3;  | 
478  | 154k  |                     val = (cur[0] & 0xf) << 12;  | 
479  | 154k  |                     val |= (cur[1] & 0x3f) << 6;  | 
480  | 154k  |                     val |= cur[2] & 0x3f;  | 
481  | 154k  |                 }  | 
482  | 172k  |                 if (((val > 0xd7ff) && (val < 0xe000)) ||  | 
483  | 172k  |                     ((val > 0xfffd) && (val < 0x10000)) ||  | 
484  | 172k  |                     (val >= 0x110000)) { | 
485  | 76.8k  |     xmlErrEncodingInt(ctxt, XML_ERR_INVALID_CHAR,  | 
486  | 76.8k  |           "Char 0x%X out of allowed range\n",  | 
487  | 76.8k  |           val);  | 
488  | 76.8k  |                 }  | 
489  | 172k  |             } else  | 
490  |  |                 /* 2-byte code */  | 
491  | 130k  |                 ctxt->input->cur += 2;  | 
492  | 305k  |         } else  | 
493  |  |             /* 1-byte code */  | 
494  | 1.54G  |             ctxt->input->cur++;  | 
495  | 1.54G  |     } else { | 
496  |  |         /*  | 
497  |  |          * Assume it's a fixed length encoding (1) with  | 
498  |  |          * a compatible encoding for the ASCII set, since  | 
499  |  |          * XML constructs only use < 128 chars  | 
500  |  |          */  | 
501  |  |  | 
502  | 574M  |         if (*(ctxt->input->cur) == '\n') { | 
503  | 7.71M  |             ctxt->input->line++; ctxt->input->col = 1;  | 
504  | 7.71M  |         } else  | 
505  | 567M  |             ctxt->input->col++;  | 
506  | 574M  |         ctxt->input->cur++;  | 
507  | 574M  |     }  | 
508  | 2.12G  |     if (*ctxt->input->cur == 0)  | 
509  | 269M  |         xmlParserInputGrow(ctxt->input, INPUT_CHUNK);  | 
510  | 2.12G  |     return;  | 
511  | 36.7k  | encoding_error:  | 
512  |  |     /*  | 
513  |  |      * If we detect an UTF8 error that probably mean that the  | 
514  |  |      * input encoding didn't get properly advertised in the  | 
515  |  |      * declaration header. Report the error and switch the encoding  | 
516  |  |      * to ISO-Latin-1 (if you don't like this policy, just declare the  | 
517  |  |      * encoding !)  | 
518  |  |      */  | 
519  | 36.7k  |     if ((ctxt == NULL) || (ctxt->input == NULL) ||  | 
520  | 36.7k  |         (ctxt->input->end - ctxt->input->cur < 4)) { | 
521  | 3.58k  |   __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,  | 
522  | 3.58k  |          "Input is not proper UTF-8, indicate encoding !\n",  | 
523  | 3.58k  |          NULL, NULL);  | 
524  | 33.1k  |     } else { | 
525  | 33.1k  |         char buffer[150];  | 
526  |  |  | 
527  | 33.1k  |   snprintf(buffer, 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",  | 
528  | 33.1k  |       ctxt->input->cur[0], ctxt->input->cur[1],  | 
529  | 33.1k  |       ctxt->input->cur[2], ctxt->input->cur[3]);  | 
530  | 33.1k  |   __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,  | 
531  | 33.1k  |          "Input is not proper UTF-8, indicate encoding !\n%s",  | 
532  | 33.1k  |          BAD_CAST buffer, NULL);  | 
533  | 33.1k  |     }  | 
534  | 36.7k  |     ctxt->charset = XML_CHAR_ENCODING_8859_1;  | 
535  | 36.7k  |     ctxt->input->cur++;  | 
536  | 36.7k  |     return;  | 
537  | 2.12G  | }  | 
538  |  |  | 
539  |  | /**  | 
540  |  |  * xmlCurrentChar:  | 
541  |  |  * @ctxt:  the XML parser context  | 
542  |  |  * @len:  pointer to the length of the char read  | 
543  |  |  *  | 
544  |  |  * The current char value, if using UTF-8 this may actually span multiple  | 
545  |  |  * bytes in the input buffer. Implement the end of line normalization:  | 
546  |  |  * 2.11 End-of-Line Handling  | 
547  |  |  * Wherever an external parsed entity or the literal entity value  | 
548  |  |  * of an internal parsed entity contains either the literal two-character  | 
549  |  |  * sequence "#xD#xA" or a standalone literal #xD, an XML processor  | 
550  |  |  * must pass to the application the single character #xA.  | 
551  |  |  * This behavior can conveniently be produced by normalizing all  | 
552  |  |  * line breaks to #xA on input, before parsing.)  | 
553  |  |  *  | 
554  |  |  * Returns the current char value and its length  | 
555  |  |  */  | 
556  |  |  | 
557  |  | int  | 
558  | 1.19G  | xmlCurrentChar(xmlParserCtxtPtr ctxt, int *len) { | 
559  | 1.19G  |     if ((ctxt == NULL) || (len == NULL) || (ctxt->input == NULL)) return(0);  | 
560  | 1.19G  |     if (ctxt->instate == XML_PARSER_EOF)  | 
561  | 3.14k  |   return(0);  | 
562  |  |  | 
563  | 1.19G  |     if ((*ctxt->input->cur >= 0x20) && (*ctxt->input->cur <= 0x7F)) { | 
564  | 947M  |       *len = 1;  | 
565  | 947M  |       return(*ctxt->input->cur);  | 
566  | 947M  |     }  | 
567  | 245M  |     if (ctxt->charset == XML_CHAR_ENCODING_UTF8) { | 
568  |  |   /*  | 
569  |  |    * We are supposed to handle UTF8, check it's valid  | 
570  |  |    * From rfc2044: encoding of the Unicode values on UTF-8:  | 
571  |  |    *  | 
572  |  |    * UCS-4 range (hex.)           UTF-8 octet sequence (binary)  | 
573  |  |    * 0000 0000-0000 007F   0xxxxxxx  | 
574  |  |    * 0000 0080-0000 07FF   110xxxxx 10xxxxxx  | 
575  |  |    * 0000 0800-0000 FFFF   1110xxxx 10xxxxxx 10xxxxxx  | 
576  |  |    *  | 
577  |  |    * Check for the 0x110000 limit too  | 
578  |  |    */  | 
579  | 58.5M  |   const unsigned char *cur = ctxt->input->cur;  | 
580  | 58.5M  |   unsigned char c;  | 
581  | 58.5M  |   unsigned int val;  | 
582  |  |  | 
583  | 58.5M  |   c = *cur;  | 
584  | 58.5M  |   if (c & 0x80) { | 
585  | 32.5M  |       if (((c & 0x40) == 0) || (c == 0xC0))  | 
586  | 146k  |     goto encoding_error;  | 
587  | 32.3M  |       if (cur[1] == 0) { | 
588  | 9.70k  |     xmlParserInputGrow(ctxt->input, INPUT_CHUNK);  | 
589  | 9.70k  |                 cur = ctxt->input->cur;  | 
590  | 9.70k  |             }  | 
591  | 32.3M  |       if ((cur[1] & 0xc0) != 0x80)  | 
592  | 171k  |     goto encoding_error;  | 
593  | 32.2M  |       if ((c & 0xe0) == 0xe0) { | 
594  | 5.45M  |     if (cur[2] == 0) { | 
595  | 1.23k  |         xmlParserInputGrow(ctxt->input, INPUT_CHUNK);  | 
596  | 1.23k  |                     cur = ctxt->input->cur;  | 
597  | 1.23k  |                 }  | 
598  | 5.45M  |     if ((cur[2] & 0xc0) != 0x80)  | 
599  | 6.85k  |         goto encoding_error;  | 
600  | 5.44M  |     if ((c & 0xf0) == 0xf0) { | 
601  | 1.00M  |         if (cur[3] == 0) { | 
602  | 1.14k  |       xmlParserInputGrow(ctxt->input, INPUT_CHUNK);  | 
603  | 1.14k  |                         cur = ctxt->input->cur;  | 
604  | 1.14k  |                     }  | 
605  | 1.00M  |         if (((c & 0xf8) != 0xf0) ||  | 
606  | 1.00M  |       ((cur[3] & 0xc0) != 0x80))  | 
607  | 3.12k  |       goto encoding_error;  | 
608  |  |         /* 4-byte code */  | 
609  | 997k  |         *len = 4;  | 
610  | 997k  |         val = (cur[0] & 0x7) << 18;  | 
611  | 997k  |         val |= (cur[1] & 0x3f) << 12;  | 
612  | 997k  |         val |= (cur[2] & 0x3f) << 6;  | 
613  | 997k  |         val |= cur[3] & 0x3f;  | 
614  | 997k  |         if (val < 0x10000)  | 
615  | 496  |       goto encoding_error;  | 
616  | 4.44M  |     } else { | 
617  |  |       /* 3-byte code */  | 
618  | 4.44M  |         *len = 3;  | 
619  | 4.44M  |         val = (cur[0] & 0xf) << 12;  | 
620  | 4.44M  |         val |= (cur[1] & 0x3f) << 6;  | 
621  | 4.44M  |         val |= cur[2] & 0x3f;  | 
622  | 4.44M  |         if (val < 0x800)  | 
623  | 498  |       goto encoding_error;  | 
624  | 4.44M  |     }  | 
625  | 26.7M  |       } else { | 
626  |  |         /* 2-byte code */  | 
627  | 26.7M  |     *len = 2;  | 
628  | 26.7M  |     val = (cur[0] & 0x1f) << 6;  | 
629  | 26.7M  |     val |= cur[1] & 0x3f;  | 
630  | 26.7M  |     if (val < 0x80)  | 
631  | 553  |         goto encoding_error;  | 
632  | 26.7M  |       }  | 
633  | 32.2M  |       if (!IS_CHAR(val)) { | 
634  | 28.3k  |           xmlErrEncodingInt(ctxt, XML_ERR_INVALID_CHAR,  | 
635  | 28.3k  |           "Char 0x%X out of allowed range\n", val);  | 
636  | 28.3k  |       }  | 
637  | 32.2M  |       return(val);  | 
638  | 32.2M  |   } else { | 
639  |  |       /* 1-byte code */  | 
640  | 25.9M  |       *len = 1;  | 
641  | 25.9M  |       if (*ctxt->input->cur == 0)  | 
642  | 13.6M  |     xmlParserInputGrow(ctxt->input, INPUT_CHUNK);  | 
643  | 25.9M  |       if ((*ctxt->input->cur == 0) &&  | 
644  | 25.9M  |           (ctxt->input->end > ctxt->input->cur)) { | 
645  | 245k  |           xmlErrEncodingInt(ctxt, XML_ERR_INVALID_CHAR,  | 
646  | 245k  |           "Char 0x0 out of allowed range\n", 0);  | 
647  | 245k  |       }  | 
648  | 25.9M  |       if (*ctxt->input->cur == 0xD) { | 
649  | 4.38M  |     if (ctxt->input->cur[1] == 0xA) { | 
650  | 2.09M  |         ctxt->input->cur++;  | 
651  | 2.09M  |     }  | 
652  | 4.38M  |     return(0xA);  | 
653  | 4.38M  |       }  | 
654  | 21.5M  |       return(*ctxt->input->cur);  | 
655  | 25.9M  |   }  | 
656  | 58.5M  |     }  | 
657  |  |     /*  | 
658  |  |      * Assume it's a fixed length encoding (1) with  | 
659  |  |      * a compatible encoding for the ASCII set, since  | 
660  |  |      * XML constructs only use < 128 chars  | 
661  |  |      */  | 
662  | 187M  |     *len = 1;  | 
663  | 187M  |     if (*ctxt->input->cur == 0xD) { | 
664  | 3.43M  |   if (ctxt->input->cur[1] == 0xA) { | 
665  | 1.04M  |       ctxt->input->cur++;  | 
666  | 1.04M  |   }  | 
667  | 3.43M  |   return(0xA);  | 
668  | 3.43M  |     }  | 
669  | 183M  |     return(*ctxt->input->cur);  | 
670  | 329k  | encoding_error:  | 
671  |  |     /*  | 
672  |  |      * An encoding problem may arise from a truncated input buffer  | 
673  |  |      * splitting a character in the middle. In that case do not raise  | 
674  |  |      * an error but return 0 to indicate an end of stream problem  | 
675  |  |      */  | 
676  | 329k  |     if (ctxt->input->end - ctxt->input->cur < 4) { | 
677  | 9.54k  |   *len = 0;  | 
678  | 9.54k  |   return(0);  | 
679  | 9.54k  |     }  | 
680  |  |  | 
681  |  |     /*  | 
682  |  |      * If we detect an UTF8 error that probably mean that the  | 
683  |  |      * input encoding didn't get properly advertised in the  | 
684  |  |      * declaration header. Report the error and switch the encoding  | 
685  |  |      * to ISO-Latin-1 (if you don't like this policy, just declare the  | 
686  |  |      * encoding !)  | 
687  |  |      */  | 
688  | 319k  |     { | 
689  | 319k  |         char buffer[150];  | 
690  |  |  | 
691  | 319k  |   snprintf(&buffer[0], 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",  | 
692  | 319k  |       ctxt->input->cur[0], ctxt->input->cur[1],  | 
693  | 319k  |       ctxt->input->cur[2], ctxt->input->cur[3]);  | 
694  | 319k  |   __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,  | 
695  | 319k  |          "Input is not proper UTF-8, indicate encoding !\n%s",  | 
696  | 319k  |          BAD_CAST buffer, NULL);  | 
697  | 319k  |     }  | 
698  | 319k  |     ctxt->charset = XML_CHAR_ENCODING_8859_1;  | 
699  | 319k  |     *len = 1;  | 
700  | 319k  |     return(*ctxt->input->cur);  | 
701  | 329k  | }  | 
702  |  |  | 
703  |  | /**  | 
704  |  |  * xmlStringCurrentChar:  | 
705  |  |  * @ctxt:  the XML parser context  | 
706  |  |  * @cur:  pointer to the beginning of the char  | 
707  |  |  * @len:  pointer to the length of the char read  | 
708  |  |  *  | 
709  |  |  * The current char value, if using UTF-8 this may actually span multiple  | 
710  |  |  * bytes in the input buffer.  | 
711  |  |  *  | 
712  |  |  * Returns the current char value and its length  | 
713  |  |  */  | 
714  |  |  | 
715  |  | int  | 
716  |  | xmlStringCurrentChar(xmlParserCtxtPtr ctxt, const xmlChar * cur, int *len)  | 
717  | 44.5G  | { | 
718  | 44.5G  |     if ((len == NULL) || (cur == NULL)) return(0);  | 
719  | 44.5G  |     if ((ctxt == NULL) || (ctxt->charset == XML_CHAR_ENCODING_UTF8)) { | 
720  |  |         /*  | 
721  |  |          * We are supposed to handle UTF8, check it's valid  | 
722  |  |          * From rfc2044: encoding of the Unicode values on UTF-8:  | 
723  |  |          *  | 
724  |  |          * UCS-4 range (hex.)           UTF-8 octet sequence (binary)  | 
725  |  |          * 0000 0000-0000 007F   0xxxxxxx  | 
726  |  |          * 0000 0080-0000 07FF   110xxxxx 10xxxxxx  | 
727  |  |          * 0000 0800-0000 FFFF   1110xxxx 10xxxxxx 10xxxxxx  | 
728  |  |          *  | 
729  |  |          * Check for the 0x110000 limit too  | 
730  |  |          */  | 
731  | 32.2G  |         unsigned char c;  | 
732  | 32.2G  |         unsigned int val;  | 
733  |  |  | 
734  | 32.2G  |         c = *cur;  | 
735  | 32.2G  |         if (c & 0x80) { | 
736  | 723k  |             if ((cur[1] & 0xc0) != 0x80)  | 
737  | 28.7k  |                 goto encoding_error;  | 
738  | 694k  |             if ((c & 0xe0) == 0xe0) { | 
739  |  |  | 
740  | 166k  |                 if ((cur[2] & 0xc0) != 0x80)  | 
741  | 3.13k  |                     goto encoding_error;  | 
742  | 163k  |                 if ((c & 0xf0) == 0xf0) { | 
743  | 122k  |                     if (((c & 0xf8) != 0xf0) || ((cur[3] & 0xc0) != 0x80))  | 
744  | 1.86k  |                         goto encoding_error;  | 
745  |  |                     /* 4-byte code */  | 
746  | 120k  |                     *len = 4;  | 
747  | 120k  |                     val = (cur[0] & 0x7) << 18;  | 
748  | 120k  |                     val |= (cur[1] & 0x3f) << 12;  | 
749  | 120k  |                     val |= (cur[2] & 0x3f) << 6;  | 
750  | 120k  |                     val |= cur[3] & 0x3f;  | 
751  | 120k  |                 } else { | 
752  |  |                     /* 3-byte code */  | 
753  | 41.4k  |                     *len = 3;  | 
754  | 41.4k  |                     val = (cur[0] & 0xf) << 12;  | 
755  | 41.4k  |                     val |= (cur[1] & 0x3f) << 6;  | 
756  | 41.4k  |                     val |= cur[2] & 0x3f;  | 
757  | 41.4k  |                 }  | 
758  | 527k  |             } else { | 
759  |  |                 /* 2-byte code */  | 
760  | 527k  |                 *len = 2;  | 
761  | 527k  |                 val = (cur[0] & 0x1f) << 6;  | 
762  | 527k  |                 val |= cur[1] & 0x3f;  | 
763  | 527k  |             }  | 
764  | 689k  |             if (!IS_CHAR(val)) { | 
765  | 2.60k  |           xmlErrEncodingInt(ctxt, XML_ERR_INVALID_CHAR,  | 
766  | 2.60k  |           "Char 0x%X out of allowed range\n", val);  | 
767  | 2.60k  |             }  | 
768  | 689k  |             return (val);  | 
769  | 32.2G  |         } else { | 
770  |  |             /* 1-byte code */  | 
771  | 32.2G  |             *len = 1;  | 
772  | 32.2G  |             return (*cur);  | 
773  | 32.2G  |         }  | 
774  | 32.2G  |     }  | 
775  |  |     /*  | 
776  |  |      * Assume it's a fixed length encoding (1) with  | 
777  |  |      * a compatible encoding for the ASCII set, since  | 
778  |  |      * XML constructs only use < 128 chars  | 
779  |  |      */  | 
780  | 12.2G  |     *len = 1;  | 
781  | 12.2G  |     return (*cur);  | 
782  | 33.7k  | encoding_error:  | 
783  |  |  | 
784  |  |     /*  | 
785  |  |      * An encoding problem may arise from a truncated input buffer  | 
786  |  |      * splitting a character in the middle. In that case do not raise  | 
787  |  |      * an error but return 0 to indicate an end of stream problem  | 
788  |  |      */  | 
789  | 33.7k  |     if ((ctxt == NULL) || (ctxt->input == NULL) ||  | 
790  | 33.7k  |         (ctxt->input->end - ctxt->input->cur < 4)) { | 
791  | 30.0k  |   *len = 0;  | 
792  | 30.0k  |   return(0);  | 
793  | 30.0k  |     }  | 
794  |  |     /*  | 
795  |  |      * If we detect an UTF8 error that probably mean that the  | 
796  |  |      * input encoding didn't get properly advertised in the  | 
797  |  |      * declaration header. Report the error and switch the encoding  | 
798  |  |      * to ISO-Latin-1 (if you don't like this policy, just declare the  | 
799  |  |      * encoding !)  | 
800  |  |      */  | 
801  | 3.74k  |     { | 
802  | 3.74k  |         char buffer[150];  | 
803  |  |  | 
804  | 3.74k  |   snprintf(buffer, 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",  | 
805  | 3.74k  |       ctxt->input->cur[0], ctxt->input->cur[1],  | 
806  | 3.74k  |       ctxt->input->cur[2], ctxt->input->cur[3]);  | 
807  | 3.74k  |   __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,  | 
808  | 3.74k  |          "Input is not proper UTF-8, indicate encoding !\n%s",  | 
809  | 3.74k  |          BAD_CAST buffer, NULL);  | 
810  | 3.74k  |     }  | 
811  | 3.74k  |     *len = 1;  | 
812  | 3.74k  |     return (*cur);  | 
813  | 33.7k  | }  | 
814  |  |  | 
815  |  | /**  | 
816  |  |  * xmlCopyCharMultiByte:  | 
817  |  |  * @out:  pointer to an array of xmlChar  | 
818  |  |  * @val:  the char value  | 
819  |  |  *  | 
820  |  |  * append the char value in the array  | 
821  |  |  *  | 
822  |  |  * Returns the number of xmlChar written  | 
823  |  |  */  | 
824  |  | int  | 
825  | 23.6M  | xmlCopyCharMultiByte(xmlChar *out, int val) { | 
826  | 23.6M  |     if ((out == NULL) || (val < 0)) return(0);  | 
827  |  |     /*  | 
828  |  |      * We are supposed to handle UTF8, check it's valid  | 
829  |  |      * From rfc2044: encoding of the Unicode values on UTF-8:  | 
830  |  |      *  | 
831  |  |      * UCS-4 range (hex.)           UTF-8 octet sequence (binary)  | 
832  |  |      * 0000 0000-0000 007F   0xxxxxxx  | 
833  |  |      * 0000 0080-0000 07FF   110xxxxx 10xxxxxx  | 
834  |  |      * 0000 0800-0000 FFFF   1110xxxx 10xxxxxx 10xxxxxx  | 
835  |  |      */  | 
836  | 23.6M  |     if  (val >= 0x80) { | 
837  | 20.4M  |   xmlChar *savedout = out;  | 
838  | 20.4M  |   int bits;  | 
839  | 20.4M  |   if (val <   0x800) { *out++= (val >>  6) | 0xC0;  bits=  0; } | 
840  | 3.85M  |   else if (val < 0x10000) { *out++= (val >> 12) | 0xE0;  bits=  6;} | 
841  | 1.09M  |   else if (val < 0x110000)  { *out++= (val >> 18) | 0xF0;  bits=  12; } | 
842  | 0  |   else { | 
843  | 0  |       xmlErrEncodingInt(NULL, XML_ERR_INVALID_CHAR,  | 
844  | 0  |         "Internal error, xmlCopyCharMultiByte 0x%X out of bound\n",  | 
845  | 0  |             val);  | 
846  | 0  |       return(0);  | 
847  | 0  |   }  | 
848  | 45.8M  |   for ( ; bits >= 0; bits-= 6)  | 
849  | 25.4M  |       *out++= ((val >> bits) & 0x3F) | 0x80 ;  | 
850  | 20.4M  |   return (out - savedout);  | 
851  | 20.4M  |     }  | 
852  | 3.19M  |     *out = val;  | 
853  | 3.19M  |     return 1;  | 
854  | 23.6M  | }  | 
855  |  |  | 
856  |  | /**  | 
857  |  |  * xmlCopyChar:  | 
858  |  |  * @len:  Ignored, compatibility  | 
859  |  |  * @out:  pointer to an array of xmlChar  | 
860  |  |  * @val:  the char value  | 
861  |  |  *  | 
862  |  |  * append the char value in the array  | 
863  |  |  *  | 
864  |  |  * Returns the number of xmlChar written  | 
865  |  |  */  | 
866  |  |  | 
867  |  | int  | 
868  | 506k  | xmlCopyChar(int len ATTRIBUTE_UNUSED, xmlChar *out, int val) { | 
869  | 506k  |     if ((out == NULL) || (val < 0)) return(0);  | 
870  |  |     /* the len parameter is ignored */  | 
871  | 506k  |     if  (val >= 0x80) { | 
872  | 108k  |   return(xmlCopyCharMultiByte (out, val));  | 
873  | 108k  |     }  | 
874  | 398k  |     *out = val;  | 
875  | 398k  |     return 1;  | 
876  | 506k  | }  | 
877  |  |  | 
878  |  | /************************************************************************  | 
879  |  |  *                  *  | 
880  |  |  *    Commodity functions to switch encodings     *  | 
881  |  |  *                  *  | 
882  |  |  ************************************************************************/  | 
883  |  |  | 
884  |  | static int  | 
885  |  | xmlSwitchInputEncodingInt(xmlParserCtxtPtr ctxt, xmlParserInputPtr input,  | 
886  |  |                           xmlCharEncodingHandlerPtr handler, int len);  | 
887  |  | /**  | 
888  |  |  * xmlSwitchEncoding:  | 
889  |  |  * @ctxt:  the parser context  | 
890  |  |  * @enc:  the encoding value (number)  | 
891  |  |  *  | 
892  |  |  * change the input functions when discovering the character encoding  | 
893  |  |  * of a given entity.  | 
894  |  |  *  | 
895  |  |  * Returns 0 in case of success, -1 otherwise  | 
896  |  |  */  | 
897  |  | int  | 
898  |  | xmlSwitchEncoding(xmlParserCtxtPtr ctxt, xmlCharEncoding enc)  | 
899  | 867k  | { | 
900  | 867k  |     xmlCharEncodingHandlerPtr handler;  | 
901  | 867k  |     int len = -1;  | 
902  | 867k  |     int ret;  | 
903  |  |  | 
904  | 867k  |     if (ctxt == NULL) return(-1);  | 
905  | 867k  |     switch (enc) { | 
906  | 0  |   case XML_CHAR_ENCODING_ERROR:  | 
907  | 0  |       __xmlErrEncoding(ctxt, XML_ERR_UNKNOWN_ENCODING,  | 
908  | 0  |                      "encoding unknown\n", NULL, NULL);  | 
909  | 0  |       return(-1);  | 
910  | 272k  |   case XML_CHAR_ENCODING_NONE:  | 
911  |  |       /* let's assume it's UTF-8 without the XML decl */  | 
912  | 272k  |       ctxt->charset = XML_CHAR_ENCODING_UTF8;  | 
913  | 272k  |       return(0);  | 
914  | 570k  |   case XML_CHAR_ENCODING_UTF8:  | 
915  |  |       /* default encoding, no conversion should be needed */  | 
916  | 570k  |       ctxt->charset = XML_CHAR_ENCODING_UTF8;  | 
917  |  |  | 
918  |  |       /*  | 
919  |  |        * Errata on XML-1.0 June 20 2001  | 
920  |  |        * Specific handling of the Byte Order Mark for  | 
921  |  |        * UTF-8  | 
922  |  |        */  | 
923  | 570k  |       if ((ctxt->input != NULL) &&  | 
924  | 570k  |     (ctxt->input->cur[0] == 0xEF) &&  | 
925  | 570k  |     (ctxt->input->cur[1] == 0xBB) &&  | 
926  | 570k  |     (ctxt->input->cur[2] == 0xBF)) { | 
927  | 5.22k  |     ctxt->input->cur += 3;  | 
928  | 5.22k  |       }  | 
929  | 570k  |       return(0);  | 
930  | 6.78k  |     case XML_CHAR_ENCODING_UTF16LE:  | 
931  | 14.3k  |     case XML_CHAR_ENCODING_UTF16BE:  | 
932  |  |         /*The raw input characters are encoded  | 
933  |  |          *in UTF-16. As we expect this function  | 
934  |  |          *to be called after xmlCharEncInFunc, we expect  | 
935  |  |          *ctxt->input->cur to contain UTF-8 encoded characters.  | 
936  |  |          *So the raw UTF16 Byte Order Mark  | 
937  |  |          *has also been converted into  | 
938  |  |          *an UTF-8 BOM. Let's skip that BOM.  | 
939  |  |          */  | 
940  | 14.3k  |         if ((ctxt->input != NULL) && (ctxt->input->cur != NULL) &&  | 
941  | 14.3k  |             (ctxt->input->cur[0] == 0xEF) &&  | 
942  | 14.3k  |             (ctxt->input->cur[1] == 0xBB) &&  | 
943  | 14.3k  |             (ctxt->input->cur[2] == 0xBF)) { | 
944  | 3.30k  |             ctxt->input->cur += 3;  | 
945  | 3.30k  |         }  | 
946  | 14.3k  |         len = 90;  | 
947  | 14.3k  |   break;  | 
948  | 0  |     case XML_CHAR_ENCODING_UCS2:  | 
949  | 0  |         len = 90;  | 
950  | 0  |   break;  | 
951  | 685  |     case XML_CHAR_ENCODING_UCS4BE:  | 
952  | 1.79k  |     case XML_CHAR_ENCODING_UCS4LE:  | 
953  | 2.05k  |     case XML_CHAR_ENCODING_UCS4_2143:  | 
954  | 2.23k  |     case XML_CHAR_ENCODING_UCS4_3412:  | 
955  | 2.23k  |         len = 180;  | 
956  | 2.23k  |   break;  | 
957  | 7.53k  |     case XML_CHAR_ENCODING_EBCDIC:  | 
958  | 7.53k  |     case XML_CHAR_ENCODING_8859_1:  | 
959  | 7.53k  |     case XML_CHAR_ENCODING_8859_2:  | 
960  | 7.53k  |     case XML_CHAR_ENCODING_8859_3:  | 
961  | 7.53k  |     case XML_CHAR_ENCODING_8859_4:  | 
962  | 7.53k  |     case XML_CHAR_ENCODING_8859_5:  | 
963  | 7.53k  |     case XML_CHAR_ENCODING_8859_6:  | 
964  | 7.53k  |     case XML_CHAR_ENCODING_8859_7:  | 
965  | 7.53k  |     case XML_CHAR_ENCODING_8859_8:  | 
966  | 7.53k  |     case XML_CHAR_ENCODING_8859_9:  | 
967  | 7.53k  |     case XML_CHAR_ENCODING_ASCII:  | 
968  | 7.53k  |     case XML_CHAR_ENCODING_2022_JP:  | 
969  | 7.53k  |     case XML_CHAR_ENCODING_SHIFT_JIS:  | 
970  | 7.53k  |     case XML_CHAR_ENCODING_EUC_JP:  | 
971  | 7.53k  |         len = 45;  | 
972  | 7.53k  |   break;  | 
973  | 867k  |     }  | 
974  | 24.1k  |     handler = xmlGetCharEncodingHandler(enc);  | 
975  | 24.1k  |     if (handler == NULL) { | 
976  |  |   /*  | 
977  |  |    * Default handlers.  | 
978  |  |    */  | 
979  | 443  |   switch (enc) { | 
980  | 0  |       case XML_CHAR_ENCODING_ASCII:  | 
981  |  |     /* default encoding, no conversion should be needed */  | 
982  | 0  |     ctxt->charset = XML_CHAR_ENCODING_UTF8;  | 
983  | 0  |     return(0);  | 
984  | 0  |       case XML_CHAR_ENCODING_8859_1:  | 
985  | 0  |     if ((ctxt->inputNr == 1) &&  | 
986  | 0  |         (ctxt->encoding == NULL) &&  | 
987  | 0  |         (ctxt->input != NULL) &&  | 
988  | 0  |         (ctxt->input->encoding != NULL)) { | 
989  | 0  |         ctxt->encoding = xmlStrdup(ctxt->input->encoding);  | 
990  | 0  |     }  | 
991  | 0  |     ctxt->charset = enc;  | 
992  | 0  |     return(0);  | 
993  | 443  |       default:  | 
994  | 443  |     __xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING,  | 
995  | 443  |                         "encoding not supported: %s\n",  | 
996  | 443  |       BAD_CAST xmlGetCharEncodingName(enc), NULL);  | 
997  |  |                 /*  | 
998  |  |                  * TODO: We could recover from errors in external entities  | 
999  |  |                  * if we didn't stop the parser. But most callers of this  | 
1000  |  |                  * function don't check the return value.  | 
1001  |  |                  */  | 
1002  | 443  |                 xmlStopParser(ctxt);  | 
1003  | 443  |                 return(-1);  | 
1004  | 443  |         }  | 
1005  | 443  |     }  | 
1006  | 23.6k  |     ret = xmlSwitchInputEncodingInt(ctxt, ctxt->input, handler, len);  | 
1007  | 23.6k  |     if ((ret < 0) || (ctxt->errNo == XML_I18N_CONV_FAILED)) { | 
1008  |  |         /*  | 
1009  |  |    * on encoding conversion errors, stop the parser  | 
1010  |  |    */  | 
1011  | 197  |         xmlStopParser(ctxt);  | 
1012  | 197  |   ctxt->errNo = XML_I18N_CONV_FAILED;  | 
1013  | 197  |     }  | 
1014  | 23.6k  |     return(ret);  | 
1015  | 24.1k  | }  | 
1016  |  |  | 
1017  |  | /**  | 
1018  |  |  * xmlSwitchInputEncodingInt:  | 
1019  |  |  * @ctxt:  the parser context  | 
1020  |  |  * @input:  the input stream  | 
1021  |  |  * @handler:  the encoding handler  | 
1022  |  |  * @len:  the number of bytes to convert for the first line or -1  | 
1023  |  |  *  | 
1024  |  |  * change the input functions when discovering the character encoding  | 
1025  |  |  * of a given entity.  | 
1026  |  |  *  | 
1027  |  |  * Returns 0 in case of success, -1 otherwise  | 
1028  |  |  */  | 
1029  |  | static int  | 
1030  |  | xmlSwitchInputEncodingInt(xmlParserCtxtPtr ctxt, xmlParserInputPtr input,  | 
1031  |  |                           xmlCharEncodingHandlerPtr handler, int len)  | 
1032  | 97.6k  | { | 
1033  | 97.6k  |     int nbchars;  | 
1034  | 97.6k  |     xmlParserInputBufferPtr in;  | 
1035  |  |  | 
1036  | 97.6k  |     if (handler == NULL)  | 
1037  | 0  |         return (-1);  | 
1038  | 97.6k  |     if (input == NULL)  | 
1039  | 0  |         return (-1);  | 
1040  | 97.6k  |     in = input->buf;  | 
1041  | 97.6k  |     if (in == NULL) { | 
1042  | 0  |   xmlErrInternal(ctxt,  | 
1043  | 0  |                 "static memory buffer doesn't support encoding\n", NULL);  | 
1044  |  |         /*  | 
1045  |  |          * Callers assume that the input buffer takes ownership of the  | 
1046  |  |          * encoding handler. xmlCharEncCloseFunc frees unregistered  | 
1047  |  |          * handlers and avoids a memory leak.  | 
1048  |  |          */  | 
1049  | 0  |         xmlCharEncCloseFunc(handler);  | 
1050  | 0  |   return (-1);  | 
1051  | 0  |     }  | 
1052  |  |  | 
1053  | 97.6k  |     ctxt->charset = XML_CHAR_ENCODING_UTF8;  | 
1054  |  |  | 
1055  | 97.6k  |     if (in->encoder != NULL) { | 
1056  |  |         /*  | 
1057  |  |          * Check in case the auto encoding detection triggered  | 
1058  |  |          * in already.  | 
1059  |  |          */  | 
1060  | 7.48k  |         if (in->encoder == handler)  | 
1061  | 4.25k  |             return (0);  | 
1062  |  |  | 
1063  |  |         /*  | 
1064  |  |          * Note: this is a bit dangerous, but that's what it  | 
1065  |  |          * takes to use nearly compatible signature for different  | 
1066  |  |          * encodings.  | 
1067  |  |          *  | 
1068  |  |          * FIXME: Encoders might buffer partial byte sequences, so  | 
1069  |  |          * this probably can't work. We should return an error and  | 
1070  |  |          * make sure that callers never try to switch the encoding  | 
1071  |  |          * twice.  | 
1072  |  |          */  | 
1073  | 3.22k  |         xmlCharEncCloseFunc(in->encoder);  | 
1074  | 3.22k  |         in->encoder = handler;  | 
1075  | 3.22k  |         return (0);  | 
1076  | 7.48k  |     }  | 
1077  | 90.2k  |     in->encoder = handler;  | 
1078  |  |  | 
1079  |  |     /*  | 
1080  |  |      * Is there already some content down the pipe to convert ?  | 
1081  |  |      */  | 
1082  | 90.2k  |     if (xmlBufIsEmpty(in->buffer) == 0) { | 
1083  | 90.2k  |         size_t processed, use, consumed;  | 
1084  |  |  | 
1085  |  |         /*  | 
1086  |  |          * Specific handling of the Byte Order Mark for  | 
1087  |  |          * UTF-16  | 
1088  |  |          */  | 
1089  | 90.2k  |         if ((handler->name != NULL) &&  | 
1090  | 90.2k  |             (!strcmp(handler->name, "UTF-16LE") ||  | 
1091  | 90.2k  |              !strcmp(handler->name, "UTF-16")) &&  | 
1092  | 90.2k  |             (input->cur[0] == 0xFF) && (input->cur[1] == 0xFE)) { | 
1093  | 3.08k  |             input->cur += 2;  | 
1094  | 3.08k  |         }  | 
1095  | 90.2k  |         if ((handler->name != NULL) &&  | 
1096  | 90.2k  |             (!strcmp(handler->name, "UTF-16BE")) &&  | 
1097  | 90.2k  |             (input->cur[0] == 0xFE) && (input->cur[1] == 0xFF)) { | 
1098  | 4.87k  |             input->cur += 2;  | 
1099  | 4.87k  |         }  | 
1100  |  |         /*  | 
1101  |  |          * Errata on XML-1.0 June 20 2001  | 
1102  |  |          * Specific handling of the Byte Order Mark for  | 
1103  |  |          * UTF-8  | 
1104  |  |          */  | 
1105  | 90.2k  |         if ((handler->name != NULL) &&  | 
1106  | 90.2k  |             (!strcmp(handler->name, "UTF-8")) &&  | 
1107  | 90.2k  |             (input->cur[0] == 0xEF) &&  | 
1108  | 90.2k  |             (input->cur[1] == 0xBB) && (input->cur[2] == 0xBF)) { | 
1109  | 0  |             input->cur += 3;  | 
1110  | 0  |         }  | 
1111  |  |  | 
1112  |  |         /*  | 
1113  |  |          * Shrink the current input buffer.  | 
1114  |  |          * Move it as the raw buffer and create a new input buffer  | 
1115  |  |          */  | 
1116  | 90.2k  |         processed = input->cur - input->base;  | 
1117  | 90.2k  |         xmlBufShrink(in->buffer, processed);  | 
1118  | 90.2k  |         input->consumed += processed;  | 
1119  | 90.2k  |         in->raw = in->buffer;  | 
1120  | 90.2k  |         in->buffer = xmlBufCreate();  | 
1121  | 90.2k  |         in->rawconsumed = processed;  | 
1122  | 90.2k  |         use = xmlBufUse(in->raw);  | 
1123  |  |  | 
1124  | 90.2k  |         if (ctxt->html) { | 
1125  |  |             /*  | 
1126  |  |              * convert as much as possible of the buffer  | 
1127  |  |              */  | 
1128  | 0  |             nbchars = xmlCharEncInput(in, 1);  | 
1129  | 90.2k  |         } else { | 
1130  |  |             /*  | 
1131  |  |              * convert just enough to get  | 
1132  |  |              * '<?xml version="1.0" encoding="xxx"?>'  | 
1133  |  |              * parsed with the autodetected encoding  | 
1134  |  |              * into the parser reading buffer.  | 
1135  |  |              */  | 
1136  | 90.2k  |             nbchars = xmlCharEncFirstLineInput(in, len);  | 
1137  | 90.2k  |         }  | 
1138  | 90.2k  |         xmlBufResetInput(in->buffer, input);  | 
1139  | 90.2k  |         if (nbchars < 0) { | 
1140  | 425  |             xmlErrInternal(ctxt,  | 
1141  | 425  |                            "switching encoding: encoder error\n",  | 
1142  | 425  |                            NULL);  | 
1143  | 425  |             return (-1);  | 
1144  | 425  |         }  | 
1145  | 89.7k  |         consumed = use - xmlBufUse(in->raw);  | 
1146  | 89.7k  |         if ((consumed > ULONG_MAX) ||  | 
1147  | 89.7k  |             (in->rawconsumed > ULONG_MAX - (unsigned long)consumed))  | 
1148  | 0  |             in->rawconsumed = ULONG_MAX;  | 
1149  | 89.7k  |         else  | 
1150  | 89.7k  |       in->rawconsumed += consumed;  | 
1151  | 89.7k  |     }  | 
1152  | 89.7k  |     return (0);  | 
1153  | 90.2k  | }  | 
1154  |  |  | 
1155  |  | /**  | 
1156  |  |  * xmlSwitchInputEncoding:  | 
1157  |  |  * @ctxt:  the parser context  | 
1158  |  |  * @input:  the input stream  | 
1159  |  |  * @handler:  the encoding handler  | 
1160  |  |  *  | 
1161  |  |  * DEPRECATED: Use xmlSwitchToEncoding  | 
1162  |  |  *  | 
1163  |  |  * change the input functions when discovering the character encoding  | 
1164  |  |  * of a given entity.  | 
1165  |  |  *  | 
1166  |  |  * Returns 0 in case of success, -1 otherwise  | 
1167  |  |  */  | 
1168  |  | int  | 
1169  |  | xmlSwitchInputEncoding(xmlParserCtxtPtr ctxt, xmlParserInputPtr input,  | 
1170  | 0  |                           xmlCharEncodingHandlerPtr handler) { | 
1171  | 0  |     return(xmlSwitchInputEncodingInt(ctxt, input, handler, -1));  | 
1172  | 0  | }  | 
1173  |  |  | 
1174  |  | /**  | 
1175  |  |  * xmlSwitchToEncoding:  | 
1176  |  |  * @ctxt:  the parser context  | 
1177  |  |  * @handler:  the encoding handler  | 
1178  |  |  *  | 
1179  |  |  * change the input functions when discovering the character encoding  | 
1180  |  |  * of a given entity.  | 
1181  |  |  *  | 
1182  |  |  * Returns 0 in case of success, -1 otherwise  | 
1183  |  |  */  | 
1184  |  | int  | 
1185  |  | xmlSwitchToEncoding(xmlParserCtxtPtr ctxt, xmlCharEncodingHandlerPtr handler)  | 
1186  | 73.9k  | { | 
1187  | 73.9k  |     if (ctxt == NULL)  | 
1188  | 0  |         return(-1);  | 
1189  | 73.9k  |     return(xmlSwitchInputEncodingInt(ctxt, ctxt->input, handler, -1));  | 
1190  | 73.9k  | }  | 
1191  |  |  | 
1192  |  | /************************************************************************  | 
1193  |  |  *                  *  | 
1194  |  |  *  Commodity functions to handle entities processing   *  | 
1195  |  |  *                  *  | 
1196  |  |  ************************************************************************/  | 
1197  |  |  | 
1198  |  | /**  | 
1199  |  |  * xmlFreeInputStream:  | 
1200  |  |  * @input:  an xmlParserInputPtr  | 
1201  |  |  *  | 
1202  |  |  * Free up an input stream.  | 
1203  |  |  */  | 
1204  |  | void  | 
1205  | 439M  | xmlFreeInputStream(xmlParserInputPtr input) { | 
1206  | 439M  |     if (input == NULL) return;  | 
1207  |  |  | 
1208  | 439M  |     if (input->filename != NULL) xmlFree((char *) input->filename);  | 
1209  | 439M  |     if (input->directory != NULL) xmlFree((char *) input->directory);  | 
1210  | 439M  |     if (input->encoding != NULL) xmlFree((char *) input->encoding);  | 
1211  | 439M  |     if (input->version != NULL) xmlFree((char *) input->version);  | 
1212  | 439M  |     if ((input->free != NULL) && (input->base != NULL))  | 
1213  | 0  |         input->free((xmlChar *) input->base);  | 
1214  | 439M  |     if (input->buf != NULL)  | 
1215  | 969k  |         xmlFreeParserInputBuffer(input->buf);  | 
1216  | 439M  |     xmlFree(input);  | 
1217  | 439M  | }  | 
1218  |  |  | 
1219  |  | /**  | 
1220  |  |  * xmlNewInputStream:  | 
1221  |  |  * @ctxt:  an XML parser context  | 
1222  |  |  *  | 
1223  |  |  * Create a new input stream structure.  | 
1224  |  |  *  | 
1225  |  |  * Returns the new input stream or NULL  | 
1226  |  |  */  | 
1227  |  | xmlParserInputPtr  | 
1228  | 439M  | xmlNewInputStream(xmlParserCtxtPtr ctxt) { | 
1229  | 439M  |     xmlParserInputPtr input;  | 
1230  |  |  | 
1231  | 439M  |     input = (xmlParserInputPtr) xmlMalloc(sizeof(xmlParserInput));  | 
1232  | 439M  |     if (input == NULL) { | 
1233  | 0  |         xmlErrMemory(ctxt,  "couldn't allocate a new input stream\n");  | 
1234  | 0  |   return(NULL);  | 
1235  | 0  |     }  | 
1236  | 439M  |     memset(input, 0, sizeof(xmlParserInput));  | 
1237  | 439M  |     input->line = 1;  | 
1238  | 439M  |     input->col = 1;  | 
1239  | 439M  |     input->standalone = -1;  | 
1240  |  |  | 
1241  |  |     /*  | 
1242  |  |      * If the context is NULL the id cannot be initialized, but that  | 
1243  |  |      * should not happen while parsing which is the situation where  | 
1244  |  |      * the id is actually needed.  | 
1245  |  |      */  | 
1246  | 439M  |     if (ctxt != NULL) { | 
1247  | 439M  |         if (input->id >= INT_MAX) { | 
1248  | 0  |             xmlErrMemory(ctxt, "Input ID overflow\n");  | 
1249  | 0  |             return(NULL);  | 
1250  | 0  |         }  | 
1251  | 439M  |         input->id = ctxt->input_id++;  | 
1252  | 439M  |     }  | 
1253  |  |  | 
1254  | 439M  |     return(input);  | 
1255  | 439M  | }  | 
1256  |  |  | 
1257  |  | /**  | 
1258  |  |  * xmlNewIOInputStream:  | 
1259  |  |  * @ctxt:  an XML parser context  | 
1260  |  |  * @input:  an I/O Input  | 
1261  |  |  * @enc:  the charset encoding if known  | 
1262  |  |  *  | 
1263  |  |  * Create a new input stream structure encapsulating the @input into  | 
1264  |  |  * a stream suitable for the parser.  | 
1265  |  |  *  | 
1266  |  |  * Returns the new input stream or NULL  | 
1267  |  |  */  | 
1268  |  | xmlParserInputPtr  | 
1269  |  | xmlNewIOInputStream(xmlParserCtxtPtr ctxt, xmlParserInputBufferPtr input,  | 
1270  | 0  |               xmlCharEncoding enc) { | 
1271  | 0  |     xmlParserInputPtr inputStream;  | 
1272  |  | 
  | 
1273  | 0  |     if (input == NULL) return(NULL);  | 
1274  | 0  |     if (xmlParserDebugEntities)  | 
1275  | 0  |   xmlGenericError(xmlGenericErrorContext, "new input from I/O\n");  | 
1276  | 0  |     inputStream = xmlNewInputStream(ctxt);  | 
1277  | 0  |     if (inputStream == NULL) { | 
1278  | 0  |   return(NULL);  | 
1279  | 0  |     }  | 
1280  | 0  |     inputStream->filename = NULL;  | 
1281  | 0  |     inputStream->buf = input;  | 
1282  | 0  |     xmlBufResetInput(inputStream->buf->buffer, inputStream);  | 
1283  |  | 
  | 
1284  | 0  |     if (enc != XML_CHAR_ENCODING_NONE) { | 
1285  | 0  |         xmlSwitchEncoding(ctxt, enc);  | 
1286  | 0  |     }  | 
1287  |  | 
  | 
1288  | 0  |     return(inputStream);  | 
1289  | 0  | }  | 
1290  |  |  | 
1291  |  | /**  | 
1292  |  |  * xmlNewEntityInputStream:  | 
1293  |  |  * @ctxt:  an XML parser context  | 
1294  |  |  * @entity:  an Entity pointer  | 
1295  |  |  *  | 
1296  |  |  * Create a new input stream based on an xmlEntityPtr  | 
1297  |  |  *  | 
1298  |  |  * Returns the new input stream or NULL  | 
1299  |  |  */  | 
1300  |  | xmlParserInputPtr  | 
1301  | 437M  | xmlNewEntityInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) { | 
1302  | 437M  |     xmlParserInputPtr input;  | 
1303  |  |  | 
1304  | 437M  |     if (entity == NULL) { | 
1305  | 0  |         xmlErrInternal(ctxt, "xmlNewEntityInputStream entity = NULL\n",  | 
1306  | 0  |                  NULL);  | 
1307  | 0  |   return(NULL);  | 
1308  | 0  |     }  | 
1309  | 437M  |     if (xmlParserDebugEntities)  | 
1310  | 0  |   xmlGenericError(xmlGenericErrorContext,  | 
1311  | 0  |     "new input from entity: %s\n", entity->name);  | 
1312  | 437M  |     if (entity->content == NULL) { | 
1313  | 35.9k  |   switch (entity->etype) { | 
1314  | 0  |             case XML_EXTERNAL_GENERAL_UNPARSED_ENTITY:  | 
1315  | 0  |           xmlErrInternal(ctxt, "Cannot parse entity %s\n",  | 
1316  | 0  |                    entity->name);  | 
1317  | 0  |                 break;  | 
1318  | 0  |             case XML_EXTERNAL_GENERAL_PARSED_ENTITY:  | 
1319  | 35.9k  |             case XML_EXTERNAL_PARAMETER_ENTITY:  | 
1320  | 35.9k  |     input = xmlLoadExternalEntity((char *) entity->URI,  | 
1321  | 35.9k  |            (char *) entity->ExternalID, ctxt);  | 
1322  | 35.9k  |                 if (input != NULL)  | 
1323  | 28.5k  |                     input->entity = entity;  | 
1324  | 35.9k  |                 return(input);  | 
1325  | 0  |             case XML_INTERNAL_GENERAL_ENTITY:  | 
1326  | 0  |           xmlErrInternal(ctxt,  | 
1327  | 0  |           "Internal entity %s without content !\n",  | 
1328  | 0  |                    entity->name);  | 
1329  | 0  |                 break;  | 
1330  | 0  |             case XML_INTERNAL_PARAMETER_ENTITY:  | 
1331  | 0  |           xmlErrInternal(ctxt,  | 
1332  | 0  |           "Internal parameter entity %s without content !\n",  | 
1333  | 0  |                    entity->name);  | 
1334  | 0  |                 break;  | 
1335  | 0  |             case XML_INTERNAL_PREDEFINED_ENTITY:  | 
1336  | 0  |           xmlErrInternal(ctxt,  | 
1337  | 0  |           "Predefined entity %s without content !\n",  | 
1338  | 0  |                    entity->name);  | 
1339  | 0  |                 break;  | 
1340  | 35.9k  |   }  | 
1341  | 0  |   return(NULL);  | 
1342  | 35.9k  |     }  | 
1343  | 437M  |     input = xmlNewInputStream(ctxt);  | 
1344  | 437M  |     if (input == NULL) { | 
1345  | 0  |   return(NULL);  | 
1346  | 0  |     }  | 
1347  | 437M  |     if (entity->URI != NULL)  | 
1348  | 6  |   input->filename = (char *) xmlStrdup((xmlChar *) entity->URI);  | 
1349  | 437M  |     input->base = entity->content;  | 
1350  | 437M  |     if (entity->length == 0)  | 
1351  | 36.7k  |         entity->length = xmlStrlen(entity->content);  | 
1352  | 437M  |     input->cur = entity->content;  | 
1353  | 437M  |     input->length = entity->length;  | 
1354  | 437M  |     input->end = &entity->content[input->length];  | 
1355  | 437M  |     input->entity = entity;  | 
1356  | 437M  |     return(input);  | 
1357  | 437M  | }  | 
1358  |  |  | 
1359  |  | /**  | 
1360  |  |  * xmlNewStringInputStream:  | 
1361  |  |  * @ctxt:  an XML parser context  | 
1362  |  |  * @buffer:  an memory buffer  | 
1363  |  |  *  | 
1364  |  |  * Create a new input stream based on a memory buffer.  | 
1365  |  |  * Returns the new input stream  | 
1366  |  |  */  | 
1367  |  | xmlParserInputPtr  | 
1368  | 0  | xmlNewStringInputStream(xmlParserCtxtPtr ctxt, const xmlChar *buffer) { | 
1369  | 0  |     xmlParserInputPtr input;  | 
1370  | 0  |     xmlParserInputBufferPtr buf;  | 
1371  |  | 
  | 
1372  | 0  |     if (buffer == NULL) { | 
1373  | 0  |         xmlErrInternal(ctxt, "xmlNewStringInputStream string = NULL\n",  | 
1374  | 0  |                  NULL);  | 
1375  | 0  |   return(NULL);  | 
1376  | 0  |     }  | 
1377  | 0  |     if (xmlParserDebugEntities)  | 
1378  | 0  |   xmlGenericError(xmlGenericErrorContext,  | 
1379  | 0  |     "new fixed input: %.30s\n", buffer);  | 
1380  | 0  |     buf = xmlParserInputBufferCreateMem((const char *) buffer,  | 
1381  | 0  |                                         xmlStrlen(buffer),  | 
1382  | 0  |                                         XML_CHAR_ENCODING_NONE);  | 
1383  | 0  |     if (buf == NULL) { | 
1384  | 0  |   xmlErrMemory(ctxt, NULL);  | 
1385  | 0  |         return(NULL);  | 
1386  | 0  |     }  | 
1387  | 0  |     input = xmlNewInputStream(ctxt);  | 
1388  | 0  |     if (input == NULL) { | 
1389  | 0  |         xmlErrMemory(ctxt,  "couldn't allocate a new input stream\n");  | 
1390  | 0  |   xmlFreeParserInputBuffer(buf);  | 
1391  | 0  |   return(NULL);  | 
1392  | 0  |     }  | 
1393  | 0  |     input->buf = buf;  | 
1394  | 0  |     xmlBufResetInput(input->buf->buffer, input);  | 
1395  | 0  |     return(input);  | 
1396  | 0  | }  | 
1397  |  |  | 
1398  |  | /**  | 
1399  |  |  * xmlNewInputFromFile:  | 
1400  |  |  * @ctxt:  an XML parser context  | 
1401  |  |  * @filename:  the filename to use as entity  | 
1402  |  |  *  | 
1403  |  |  * Create a new input stream based on a file or an URL.  | 
1404  |  |  *  | 
1405  |  |  * Returns the new input stream or NULL in case of error  | 
1406  |  |  */  | 
1407  |  | xmlParserInputPtr  | 
1408  | 0  | xmlNewInputFromFile(xmlParserCtxtPtr ctxt, const char *filename) { | 
1409  | 0  |     xmlParserInputBufferPtr buf;  | 
1410  | 0  |     xmlParserInputPtr inputStream;  | 
1411  | 0  |     char *directory = NULL;  | 
1412  | 0  |     xmlChar *URI = NULL;  | 
1413  |  | 
  | 
1414  | 0  |     if (xmlParserDebugEntities)  | 
1415  | 0  |   xmlGenericError(xmlGenericErrorContext,  | 
1416  | 0  |     "new input from file: %s\n", filename);  | 
1417  | 0  |     if (ctxt == NULL) return(NULL);  | 
1418  | 0  |     buf = xmlParserInputBufferCreateFilename(filename, XML_CHAR_ENCODING_NONE);  | 
1419  | 0  |     if (buf == NULL) { | 
1420  | 0  |   if (filename == NULL)  | 
1421  | 0  |       __xmlLoaderErr(ctxt,  | 
1422  | 0  |                      "failed to load external entity: NULL filename \n",  | 
1423  | 0  |          NULL);  | 
1424  | 0  |   else  | 
1425  | 0  |       __xmlLoaderErr(ctxt, "failed to load external entity \"%s\"\n",  | 
1426  | 0  |          (const char *) filename);  | 
1427  | 0  |   return(NULL);  | 
1428  | 0  |     }  | 
1429  |  |  | 
1430  | 0  |     inputStream = xmlNewInputStream(ctxt);  | 
1431  | 0  |     if (inputStream == NULL) { | 
1432  | 0  |   xmlFreeParserInputBuffer(buf);  | 
1433  | 0  |   return(NULL);  | 
1434  | 0  |     }  | 
1435  |  |  | 
1436  | 0  |     inputStream->buf = buf;  | 
1437  | 0  |     inputStream = xmlCheckHTTPInput(ctxt, inputStream);  | 
1438  | 0  |     if (inputStream == NULL)  | 
1439  | 0  |         return(NULL);  | 
1440  |  |  | 
1441  | 0  |     if (inputStream->filename == NULL)  | 
1442  | 0  |   URI = xmlStrdup((xmlChar *) filename);  | 
1443  | 0  |     else  | 
1444  | 0  |   URI = xmlStrdup((xmlChar *) inputStream->filename);  | 
1445  | 0  |     directory = xmlParserGetDirectory((const char *) URI);  | 
1446  | 0  |     if (inputStream->filename != NULL) xmlFree((char *)inputStream->filename);  | 
1447  | 0  |     inputStream->filename = (char *) xmlCanonicPath((const xmlChar *) URI);  | 
1448  | 0  |     if (URI != NULL) xmlFree((char *) URI);  | 
1449  | 0  |     inputStream->directory = directory;  | 
1450  |  | 
  | 
1451  | 0  |     xmlBufResetInput(inputStream->buf->buffer, inputStream);  | 
1452  | 0  |     if ((ctxt->directory == NULL) && (directory != NULL))  | 
1453  | 0  |         ctxt->directory = (char *) xmlStrdup((const xmlChar *) directory);  | 
1454  | 0  |     return(inputStream);  | 
1455  | 0  | }  | 
1456  |  |  | 
1457  |  | /************************************************************************  | 
1458  |  |  *                  *  | 
1459  |  |  *    Commodity functions to handle parser contexts   *  | 
1460  |  |  *                  *  | 
1461  |  |  ************************************************************************/  | 
1462  |  |  | 
1463  |  | /**  | 
1464  |  |  * xmlInitSAXParserCtxt:  | 
1465  |  |  * @ctxt:  XML parser context  | 
1466  |  |  * @sax:  SAX handlert  | 
1467  |  |  * @userData:  user data  | 
1468  |  |  *  | 
1469  |  |  * Initialize a SAX parser context  | 
1470  |  |  *  | 
1471  |  |  * Returns 0 in case of success and -1 in case of error  | 
1472  |  |  */  | 
1473  |  |  | 
1474  |  | static int  | 
1475  |  | xmlInitSAXParserCtxt(xmlParserCtxtPtr ctxt, const xmlSAXHandler *sax,  | 
1476  |  |                      void *userData)  | 
1477  | 2.03M  | { | 
1478  | 2.03M  |     xmlParserInputPtr input;  | 
1479  |  |  | 
1480  | 2.03M  |     if(ctxt==NULL) { | 
1481  | 0  |         xmlErrInternal(NULL, "Got NULL parser context\n", NULL);  | 
1482  | 0  |         return(-1);  | 
1483  | 0  |     }  | 
1484  |  |  | 
1485  | 2.03M  |     xmlInitParser();  | 
1486  |  |  | 
1487  | 2.03M  |     if (ctxt->dict == NULL)  | 
1488  | 2.03M  |   ctxt->dict = xmlDictCreate();  | 
1489  | 2.03M  |     if (ctxt->dict == NULL) { | 
1490  | 0  |         xmlErrMemory(NULL, "cannot initialize parser context\n");  | 
1491  | 0  |   return(-1);  | 
1492  | 0  |     }  | 
1493  | 2.03M  |     xmlDictSetLimit(ctxt->dict, XML_MAX_DICTIONARY_LIMIT);  | 
1494  |  |  | 
1495  | 2.03M  |     if (ctxt->sax == NULL)  | 
1496  | 2.03M  |   ctxt->sax = (xmlSAXHandler *) xmlMalloc(sizeof(xmlSAXHandler));  | 
1497  | 2.03M  |     if (ctxt->sax == NULL) { | 
1498  | 0  |         xmlErrMemory(NULL, "cannot initialize parser context\n");  | 
1499  | 0  |   return(-1);  | 
1500  | 0  |     }  | 
1501  | 2.03M  |     if (sax == NULL) { | 
1502  | 832k  |   memset(ctxt->sax, 0, sizeof(xmlSAXHandler));  | 
1503  | 832k  |         xmlSAXVersion(ctxt->sax, 2);  | 
1504  | 832k  |         ctxt->userData = ctxt;  | 
1505  | 1.20M  |     } else { | 
1506  | 1.20M  |   if (sax->initialized == XML_SAX2_MAGIC) { | 
1507  | 904k  |       memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));  | 
1508  | 904k  |         } else { | 
1509  | 299k  |       memset(ctxt->sax, 0, sizeof(xmlSAXHandler));  | 
1510  | 299k  |       memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));  | 
1511  | 299k  |         }  | 
1512  | 1.20M  |         ctxt->userData = userData ? userData : ctxt;  | 
1513  | 1.20M  |     }  | 
1514  |  |  | 
1515  | 2.03M  |     ctxt->maxatts = 0;  | 
1516  | 2.03M  |     ctxt->atts = NULL;  | 
1517  |  |     /* Allocate the Input stack */  | 
1518  | 2.03M  |     if (ctxt->inputTab == NULL) { | 
1519  | 2.03M  |   ctxt->inputTab = (xmlParserInputPtr *)  | 
1520  | 2.03M  |         xmlMalloc(5 * sizeof(xmlParserInputPtr));  | 
1521  | 2.03M  |   ctxt->inputMax = 5;  | 
1522  | 2.03M  |     }  | 
1523  | 2.03M  |     if (ctxt->inputTab == NULL) { | 
1524  | 0  |         xmlErrMemory(NULL, "cannot initialize parser context\n");  | 
1525  | 0  |   ctxt->inputNr = 0;  | 
1526  | 0  |   ctxt->inputMax = 0;  | 
1527  | 0  |   ctxt->input = NULL;  | 
1528  | 0  |   return(-1);  | 
1529  | 0  |     }  | 
1530  | 2.03M  |     while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */ | 
1531  | 0  |         xmlFreeInputStream(input);  | 
1532  | 0  |     }  | 
1533  | 2.03M  |     ctxt->inputNr = 0;  | 
1534  | 2.03M  |     ctxt->input = NULL;  | 
1535  |  |  | 
1536  | 2.03M  |     ctxt->version = NULL;  | 
1537  | 2.03M  |     ctxt->encoding = NULL;  | 
1538  | 2.03M  |     ctxt->standalone = -1;  | 
1539  | 2.03M  |     ctxt->hasExternalSubset = 0;  | 
1540  | 2.03M  |     ctxt->hasPErefs = 0;  | 
1541  | 2.03M  |     ctxt->html = 0;  | 
1542  | 2.03M  |     ctxt->external = 0;  | 
1543  | 2.03M  |     ctxt->instate = XML_PARSER_START;  | 
1544  | 2.03M  |     ctxt->token = 0;  | 
1545  | 2.03M  |     ctxt->directory = NULL;  | 
1546  |  |  | 
1547  |  |     /* Allocate the Node stack */  | 
1548  | 2.03M  |     if (ctxt->nodeTab == NULL) { | 
1549  | 2.03M  |   ctxt->nodeTab = (xmlNodePtr *) xmlMalloc(10 * sizeof(xmlNodePtr));  | 
1550  | 2.03M  |   ctxt->nodeMax = 10;  | 
1551  | 2.03M  |     }  | 
1552  | 2.03M  |     if (ctxt->nodeTab == NULL) { | 
1553  | 0  |         xmlErrMemory(NULL, "cannot initialize parser context\n");  | 
1554  | 0  |   ctxt->nodeNr = 0;  | 
1555  | 0  |   ctxt->nodeMax = 0;  | 
1556  | 0  |   ctxt->node = NULL;  | 
1557  | 0  |   ctxt->inputNr = 0;  | 
1558  | 0  |   ctxt->inputMax = 0;  | 
1559  | 0  |   ctxt->input = NULL;  | 
1560  | 0  |   return(-1);  | 
1561  | 0  |     }  | 
1562  | 2.03M  |     ctxt->nodeNr = 0;  | 
1563  | 2.03M  |     ctxt->node = NULL;  | 
1564  |  |  | 
1565  |  |     /* Allocate the Name stack */  | 
1566  | 2.03M  |     if (ctxt->nameTab == NULL) { | 
1567  | 2.03M  |   ctxt->nameTab = (const xmlChar **) xmlMalloc(10 * sizeof(xmlChar *));  | 
1568  | 2.03M  |   ctxt->nameMax = 10;  | 
1569  | 2.03M  |     }  | 
1570  | 2.03M  |     if (ctxt->nameTab == NULL) { | 
1571  | 0  |         xmlErrMemory(NULL, "cannot initialize parser context\n");  | 
1572  | 0  |   ctxt->nodeNr = 0;  | 
1573  | 0  |   ctxt->nodeMax = 0;  | 
1574  | 0  |   ctxt->node = NULL;  | 
1575  | 0  |   ctxt->inputNr = 0;  | 
1576  | 0  |   ctxt->inputMax = 0;  | 
1577  | 0  |   ctxt->input = NULL;  | 
1578  | 0  |   ctxt->nameNr = 0;  | 
1579  | 0  |   ctxt->nameMax = 0;  | 
1580  | 0  |   ctxt->name = NULL;  | 
1581  | 0  |   return(-1);  | 
1582  | 0  |     }  | 
1583  | 2.03M  |     ctxt->nameNr = 0;  | 
1584  | 2.03M  |     ctxt->name = NULL;  | 
1585  |  |  | 
1586  |  |     /* Allocate the space stack */  | 
1587  | 2.03M  |     if (ctxt->spaceTab == NULL) { | 
1588  | 2.03M  |   ctxt->spaceTab = (int *) xmlMalloc(10 * sizeof(int));  | 
1589  | 2.03M  |   ctxt->spaceMax = 10;  | 
1590  | 2.03M  |     }  | 
1591  | 2.03M  |     if (ctxt->spaceTab == NULL) { | 
1592  | 0  |         xmlErrMemory(NULL, "cannot initialize parser context\n");  | 
1593  | 0  |   ctxt->nodeNr = 0;  | 
1594  | 0  |   ctxt->nodeMax = 0;  | 
1595  | 0  |   ctxt->node = NULL;  | 
1596  | 0  |   ctxt->inputNr = 0;  | 
1597  | 0  |   ctxt->inputMax = 0;  | 
1598  | 0  |   ctxt->input = NULL;  | 
1599  | 0  |   ctxt->nameNr = 0;  | 
1600  | 0  |   ctxt->nameMax = 0;  | 
1601  | 0  |   ctxt->name = NULL;  | 
1602  | 0  |   ctxt->spaceNr = 0;  | 
1603  | 0  |   ctxt->spaceMax = 0;  | 
1604  | 0  |   ctxt->space = NULL;  | 
1605  | 0  |   return(-1);  | 
1606  | 0  |     }  | 
1607  | 2.03M  |     ctxt->spaceNr = 1;  | 
1608  | 2.03M  |     ctxt->spaceMax = 10;  | 
1609  | 2.03M  |     ctxt->spaceTab[0] = -1;  | 
1610  | 2.03M  |     ctxt->space = &ctxt->spaceTab[0];  | 
1611  | 2.03M  |     ctxt->myDoc = NULL;  | 
1612  | 2.03M  |     ctxt->wellFormed = 1;  | 
1613  | 2.03M  |     ctxt->nsWellFormed = 1;  | 
1614  | 2.03M  |     ctxt->valid = 1;  | 
1615  | 2.03M  |     ctxt->loadsubset = xmlLoadExtDtdDefaultValue;  | 
1616  | 2.03M  |     if (ctxt->loadsubset) { | 
1617  | 0  |         ctxt->options |= XML_PARSE_DTDLOAD;  | 
1618  | 0  |     }  | 
1619  | 2.03M  |     ctxt->validate = xmlDoValidityCheckingDefaultValue;  | 
1620  | 2.03M  |     ctxt->pedantic = xmlPedanticParserDefaultValue;  | 
1621  | 2.03M  |     if (ctxt->pedantic) { | 
1622  | 0  |         ctxt->options |= XML_PARSE_PEDANTIC;  | 
1623  | 0  |     }  | 
1624  | 2.03M  |     ctxt->linenumbers = xmlLineNumbersDefaultValue;  | 
1625  | 2.03M  |     ctxt->keepBlanks = xmlKeepBlanksDefaultValue;  | 
1626  | 2.03M  |     if (ctxt->keepBlanks == 0) { | 
1627  | 0  |   ctxt->sax->ignorableWhitespace = xmlSAX2IgnorableWhitespace;  | 
1628  | 0  |   ctxt->options |= XML_PARSE_NOBLANKS;  | 
1629  | 0  |     }  | 
1630  |  |  | 
1631  | 2.03M  |     ctxt->vctxt.flags = XML_VCTXT_USE_PCTXT;  | 
1632  | 2.03M  |     ctxt->vctxt.userData = ctxt;  | 
1633  | 2.03M  |     ctxt->vctxt.error = xmlParserValidityError;  | 
1634  | 2.03M  |     ctxt->vctxt.warning = xmlParserValidityWarning;  | 
1635  | 2.03M  |     if (ctxt->validate) { | 
1636  | 0  |   if (xmlGetWarningsDefaultValue == 0)  | 
1637  | 0  |       ctxt->vctxt.warning = NULL;  | 
1638  | 0  |   else  | 
1639  | 0  |       ctxt->vctxt.warning = xmlParserValidityWarning;  | 
1640  | 0  |   ctxt->vctxt.nodeMax = 0;  | 
1641  | 0  |         ctxt->options |= XML_PARSE_DTDVALID;  | 
1642  | 0  |     }  | 
1643  | 2.03M  |     ctxt->replaceEntities = xmlSubstituteEntitiesDefaultValue;  | 
1644  | 2.03M  |     if (ctxt->replaceEntities) { | 
1645  | 0  |         ctxt->options |= XML_PARSE_NOENT;  | 
1646  | 0  |     }  | 
1647  | 2.03M  |     ctxt->record_info = 0;  | 
1648  | 2.03M  |     ctxt->checkIndex = 0;  | 
1649  | 2.03M  |     ctxt->inSubset = 0;  | 
1650  | 2.03M  |     ctxt->errNo = XML_ERR_OK;  | 
1651  | 2.03M  |     ctxt->depth = 0;  | 
1652  | 2.03M  |     ctxt->charset = XML_CHAR_ENCODING_UTF8;  | 
1653  | 2.03M  |     ctxt->catalogs = NULL;  | 
1654  | 2.03M  |     ctxt->sizeentities = 0;  | 
1655  | 2.03M  |     ctxt->sizeentcopy = 0;  | 
1656  | 2.03M  |     ctxt->input_id = 1;  | 
1657  | 2.03M  |     xmlInitNodeInfoSeq(&ctxt->node_seq);  | 
1658  | 2.03M  |     return(0);  | 
1659  | 2.03M  | }  | 
1660  |  |  | 
1661  |  | /**  | 
1662  |  |  * xmlInitParserCtxt:  | 
1663  |  |  * @ctxt:  an XML parser context  | 
1664  |  |  *  | 
1665  |  |  * DEPRECATED: Internal function which will be made private in a future  | 
1666  |  |  * version.  | 
1667  |  |  *  | 
1668  |  |  * Initialize a parser context  | 
1669  |  |  *  | 
1670  |  |  * Returns 0 in case of success and -1 in case of error  | 
1671  |  |  */  | 
1672  |  |  | 
1673  |  | int  | 
1674  |  | xmlInitParserCtxt(xmlParserCtxtPtr ctxt)  | 
1675  | 0  | { | 
1676  | 0  |     return(xmlInitSAXParserCtxt(ctxt, NULL, NULL));  | 
1677  | 0  | }  | 
1678  |  |  | 
1679  |  | /**  | 
1680  |  |  * xmlFreeParserCtxt:  | 
1681  |  |  * @ctxt:  an XML parser context  | 
1682  |  |  *  | 
1683  |  |  * Free all the memory used by a parser context. However the parsed  | 
1684  |  |  * document in ctxt->myDoc is not freed.  | 
1685  |  |  */  | 
1686  |  |  | 
1687  |  | void  | 
1688  |  | xmlFreeParserCtxt(xmlParserCtxtPtr ctxt)  | 
1689  | 2.03M  | { | 
1690  | 2.03M  |     xmlParserInputPtr input;  | 
1691  |  |  | 
1692  | 2.03M  |     if (ctxt == NULL) return;  | 
1693  |  |  | 
1694  | 3.39M  |     while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */ | 
1695  | 1.35M  |         xmlFreeInputStream(input);  | 
1696  | 1.35M  |     }  | 
1697  | 2.03M  |     if (ctxt->spaceTab != NULL) xmlFree(ctxt->spaceTab);  | 
1698  | 2.03M  |     if (ctxt->nameTab != NULL) xmlFree((xmlChar * *)ctxt->nameTab);  | 
1699  | 2.03M  |     if (ctxt->nodeTab != NULL) xmlFree(ctxt->nodeTab);  | 
1700  | 2.03M  |     if (ctxt->nodeInfoTab != NULL) xmlFree(ctxt->nodeInfoTab);  | 
1701  | 2.03M  |     if (ctxt->inputTab != NULL) xmlFree(ctxt->inputTab);  | 
1702  | 2.03M  |     if (ctxt->version != NULL) xmlFree((char *) ctxt->version);  | 
1703  | 2.03M  |     if (ctxt->encoding != NULL) xmlFree((char *) ctxt->encoding);  | 
1704  | 2.03M  |     if (ctxt->extSubURI != NULL) xmlFree((char *) ctxt->extSubURI);  | 
1705  | 2.03M  |     if (ctxt->extSubSystem != NULL) xmlFree((char *) ctxt->extSubSystem);  | 
1706  | 2.03M  | #ifdef LIBXML_SAX1_ENABLED  | 
1707  | 2.03M  |     if ((ctxt->sax != NULL) &&  | 
1708  | 2.03M  |         (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler))  | 
1709  |  | #else  | 
1710  |  |     if (ctxt->sax != NULL)  | 
1711  |  | #endif /* LIBXML_SAX1_ENABLED */  | 
1712  | 2.03M  |         xmlFree(ctxt->sax);  | 
1713  | 2.03M  |     if (ctxt->directory != NULL) xmlFree((char *) ctxt->directory);  | 
1714  | 2.03M  |     if (ctxt->vctxt.nodeTab != NULL) xmlFree(ctxt->vctxt.nodeTab);  | 
1715  | 2.03M  |     if (ctxt->atts != NULL) xmlFree((xmlChar * *)ctxt->atts);  | 
1716  | 2.03M  |     if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);  | 
1717  | 2.03M  |     if (ctxt->nsTab != NULL) xmlFree((char *) ctxt->nsTab);  | 
1718  | 2.03M  |     if (ctxt->pushTab != NULL) xmlFree(ctxt->pushTab);  | 
1719  | 2.03M  |     if (ctxt->attallocs != NULL) xmlFree(ctxt->attallocs);  | 
1720  | 2.03M  |     if (ctxt->attsDefault != NULL)  | 
1721  | 70.0k  |         xmlHashFree(ctxt->attsDefault, xmlHashDefaultDeallocator);  | 
1722  | 2.03M  |     if (ctxt->attsSpecial != NULL)  | 
1723  | 106k  |         xmlHashFree(ctxt->attsSpecial, NULL);  | 
1724  | 2.03M  |     if (ctxt->freeElems != NULL) { | 
1725  | 177k  |         xmlNodePtr cur, next;  | 
1726  |  |  | 
1727  | 177k  |   cur = ctxt->freeElems;  | 
1728  | 355k  |   while (cur != NULL) { | 
1729  | 177k  |       next = cur->next;  | 
1730  | 177k  |       xmlFree(cur);  | 
1731  | 177k  |       cur = next;  | 
1732  | 177k  |   }  | 
1733  | 177k  |     }  | 
1734  | 2.03M  |     if (ctxt->freeAttrs != NULL) { | 
1735  | 82.3k  |         xmlAttrPtr cur, next;  | 
1736  |  |  | 
1737  | 82.3k  |   cur = ctxt->freeAttrs;  | 
1738  | 164k  |   while (cur != NULL) { | 
1739  | 82.3k  |       next = cur->next;  | 
1740  | 82.3k  |       xmlFree(cur);  | 
1741  | 82.3k  |       cur = next;  | 
1742  | 82.3k  |   }  | 
1743  | 82.3k  |     }  | 
1744  |  |     /*  | 
1745  |  |      * cleanup the error strings  | 
1746  |  |      */  | 
1747  | 2.03M  |     if (ctxt->lastError.message != NULL)  | 
1748  | 1.02M  |         xmlFree(ctxt->lastError.message);  | 
1749  | 2.03M  |     if (ctxt->lastError.file != NULL)  | 
1750  | 672k  |         xmlFree(ctxt->lastError.file);  | 
1751  | 2.03M  |     if (ctxt->lastError.str1 != NULL)  | 
1752  | 481k  |         xmlFree(ctxt->lastError.str1);  | 
1753  | 2.03M  |     if (ctxt->lastError.str2 != NULL)  | 
1754  | 64.0k  |         xmlFree(ctxt->lastError.str2);  | 
1755  | 2.03M  |     if (ctxt->lastError.str3 != NULL)  | 
1756  | 6.87k  |         xmlFree(ctxt->lastError.str3);  | 
1757  |  |  | 
1758  | 2.03M  | #ifdef LIBXML_CATALOG_ENABLED  | 
1759  | 2.03M  |     if (ctxt->catalogs != NULL)  | 
1760  | 0  |   xmlCatalogFreeLocal(ctxt->catalogs);  | 
1761  | 2.03M  | #endif  | 
1762  | 2.03M  |     xmlFree(ctxt);  | 
1763  | 2.03M  | }  | 
1764  |  |  | 
1765  |  | /**  | 
1766  |  |  * xmlNewParserCtxt:  | 
1767  |  |  *  | 
1768  |  |  * Allocate and initialize a new parser context.  | 
1769  |  |  *  | 
1770  |  |  * Returns the xmlParserCtxtPtr or NULL  | 
1771  |  |  */  | 
1772  |  |  | 
1773  |  | xmlParserCtxtPtr  | 
1774  |  | xmlNewParserCtxt(void)  | 
1775  | 497k  | { | 
1776  | 497k  |     return(xmlNewSAXParserCtxt(NULL, NULL));  | 
1777  | 497k  | }  | 
1778  |  |  | 
1779  |  | /**  | 
1780  |  |  * xmlNewSAXParserCtxt:  | 
1781  |  |  * @sax:  SAX handler  | 
1782  |  |  * @userData:  user data  | 
1783  |  |  *  | 
1784  |  |  * Allocate and initialize a new SAX parser context. If userData is NULL,  | 
1785  |  |  * the parser context will be passed as user data.  | 
1786  |  |  *  | 
1787  |  |  * Returns the xmlParserCtxtPtr or NULL if memory allocation failed.  | 
1788  |  |  */  | 
1789  |  |  | 
1790  |  | xmlParserCtxtPtr  | 
1791  |  | xmlNewSAXParserCtxt(const xmlSAXHandler *sax, void *userData)  | 
1792  | 2.03M  | { | 
1793  | 2.03M  |     xmlParserCtxtPtr ctxt;  | 
1794  |  |  | 
1795  | 2.03M  |     ctxt = (xmlParserCtxtPtr) xmlMalloc(sizeof(xmlParserCtxt));  | 
1796  | 2.03M  |     if (ctxt == NULL) { | 
1797  | 0  |   xmlErrMemory(NULL, "cannot allocate parser context\n");  | 
1798  | 0  |   return(NULL);  | 
1799  | 0  |     }  | 
1800  | 2.03M  |     memset(ctxt, 0, sizeof(xmlParserCtxt));  | 
1801  | 2.03M  |     if (xmlInitSAXParserCtxt(ctxt, sax, userData) < 0) { | 
1802  | 0  |         xmlFreeParserCtxt(ctxt);  | 
1803  | 0  |   return(NULL);  | 
1804  | 0  |     }  | 
1805  | 2.03M  |     return(ctxt);  | 
1806  | 2.03M  | }  | 
1807  |  |  | 
1808  |  | /************************************************************************  | 
1809  |  |  *                  *  | 
1810  |  |  *    Handling of node information        *  | 
1811  |  |  *                  *  | 
1812  |  |  ************************************************************************/  | 
1813  |  |  | 
1814  |  | /**  | 
1815  |  |  * xmlClearParserCtxt:  | 
1816  |  |  * @ctxt:  an XML parser context  | 
1817  |  |  *  | 
1818  |  |  * Clear (release owned resources) and reinitialize a parser context  | 
1819  |  |  */  | 
1820  |  |  | 
1821  |  | void  | 
1822  |  | xmlClearParserCtxt(xmlParserCtxtPtr ctxt)  | 
1823  | 0  | { | 
1824  | 0  |   if (ctxt==NULL)  | 
1825  | 0  |     return;  | 
1826  | 0  |   xmlClearNodeInfoSeq(&ctxt->node_seq);  | 
1827  | 0  |   xmlCtxtReset(ctxt);  | 
1828  | 0  | }  | 
1829  |  |  | 
1830  |  |  | 
1831  |  | /**  | 
1832  |  |  * xmlParserFindNodeInfo:  | 
1833  |  |  * @ctx:  an XML parser context  | 
1834  |  |  * @node:  an XML node within the tree  | 
1835  |  |  *  | 
1836  |  |  * DEPRECATED: Don't use.  | 
1837  |  |  *  | 
1838  |  |  * Find the parser node info struct for a given node  | 
1839  |  |  *  | 
1840  |  |  * Returns an xmlParserNodeInfo block pointer or NULL  | 
1841  |  |  */  | 
1842  |  | const xmlParserNodeInfo *  | 
1843  |  | xmlParserFindNodeInfo(const xmlParserCtxtPtr ctx, const xmlNodePtr node)  | 
1844  | 0  | { | 
1845  | 0  |     unsigned long pos;  | 
1846  |  | 
  | 
1847  | 0  |     if ((ctx == NULL) || (node == NULL))  | 
1848  | 0  |         return (NULL);  | 
1849  |  |     /* Find position where node should be at */  | 
1850  | 0  |     pos = xmlParserFindNodeInfoIndex(&ctx->node_seq, node);  | 
1851  | 0  |     if (pos < ctx->node_seq.length  | 
1852  | 0  |         && ctx->node_seq.buffer[pos].node == node)  | 
1853  | 0  |         return &ctx->node_seq.buffer[pos];  | 
1854  | 0  |     else  | 
1855  | 0  |         return NULL;  | 
1856  | 0  | }  | 
1857  |  |  | 
1858  |  |  | 
1859  |  | /**  | 
1860  |  |  * xmlInitNodeInfoSeq:  | 
1861  |  |  * @seq:  a node info sequence pointer  | 
1862  |  |  *  | 
1863  |  |  * DEPRECATED: Don't use.  | 
1864  |  |  *  | 
1865  |  |  * -- Initialize (set to initial state) node info sequence  | 
1866  |  |  */  | 
1867  |  | void  | 
1868  |  | xmlInitNodeInfoSeq(xmlParserNodeInfoSeqPtr seq)  | 
1869  | 2.03M  | { | 
1870  | 2.03M  |     if (seq == NULL)  | 
1871  | 0  |         return;  | 
1872  | 2.03M  |     seq->length = 0;  | 
1873  | 2.03M  |     seq->maximum = 0;  | 
1874  | 2.03M  |     seq->buffer = NULL;  | 
1875  | 2.03M  | }  | 
1876  |  |  | 
1877  |  | /**  | 
1878  |  |  * xmlClearNodeInfoSeq:  | 
1879  |  |  * @seq:  a node info sequence pointer  | 
1880  |  |  *  | 
1881  |  |  * DEPRECATED: Don't use.  | 
1882  |  |  *  | 
1883  |  |  * -- Clear (release memory and reinitialize) node  | 
1884  |  |  *   info sequence  | 
1885  |  |  */  | 
1886  |  | void  | 
1887  |  | xmlClearNodeInfoSeq(xmlParserNodeInfoSeqPtr seq)  | 
1888  | 0  | { | 
1889  | 0  |     if (seq == NULL)  | 
1890  | 0  |         return;  | 
1891  | 0  |     if (seq->buffer != NULL)  | 
1892  | 0  |         xmlFree(seq->buffer);  | 
1893  | 0  |     xmlInitNodeInfoSeq(seq);  | 
1894  | 0  | }  | 
1895  |  |  | 
1896  |  | /**  | 
1897  |  |  * xmlParserFindNodeInfoIndex:  | 
1898  |  |  * @seq:  a node info sequence pointer  | 
1899  |  |  * @node:  an XML node pointer  | 
1900  |  |  *  | 
1901  |  |  * DEPRECATED: Don't use.  | 
1902  |  |  *  | 
1903  |  |  * xmlParserFindNodeInfoIndex : Find the index that the info record for  | 
1904  |  |  *   the given node is or should be at in a sorted sequence  | 
1905  |  |  *  | 
1906  |  |  * Returns a long indicating the position of the record  | 
1907  |  |  */  | 
1908  |  | unsigned long  | 
1909  |  | xmlParserFindNodeInfoIndex(const xmlParserNodeInfoSeqPtr seq,  | 
1910  |  |                            const xmlNodePtr node)  | 
1911  | 0  | { | 
1912  | 0  |     unsigned long upper, lower, middle;  | 
1913  | 0  |     int found = 0;  | 
1914  |  | 
  | 
1915  | 0  |     if ((seq == NULL) || (node == NULL))  | 
1916  | 0  |         return ((unsigned long) -1);  | 
1917  |  |  | 
1918  |  |     /* Do a binary search for the key */  | 
1919  | 0  |     lower = 1;  | 
1920  | 0  |     upper = seq->length;  | 
1921  | 0  |     middle = 0;  | 
1922  | 0  |     while (lower <= upper && !found) { | 
1923  | 0  |         middle = lower + (upper - lower) / 2;  | 
1924  | 0  |         if (node == seq->buffer[middle - 1].node)  | 
1925  | 0  |             found = 1;  | 
1926  | 0  |         else if (node < seq->buffer[middle - 1].node)  | 
1927  | 0  |             upper = middle - 1;  | 
1928  | 0  |         else  | 
1929  | 0  |             lower = middle + 1;  | 
1930  | 0  |     }  | 
1931  |  |  | 
1932  |  |     /* Return position */  | 
1933  | 0  |     if (middle == 0 || seq->buffer[middle - 1].node < node)  | 
1934  | 0  |         return middle;  | 
1935  | 0  |     else  | 
1936  | 0  |         return middle - 1;  | 
1937  | 0  | }  | 
1938  |  |  | 
1939  |  |  | 
1940  |  | /**  | 
1941  |  |  * xmlParserAddNodeInfo:  | 
1942  |  |  * @ctxt:  an XML parser context  | 
1943  |  |  * @info:  a node info sequence pointer  | 
1944  |  |  *  | 
1945  |  |  * DEPRECATED: Don't use.  | 
1946  |  |  *  | 
1947  |  |  * Insert node info record into the sorted sequence  | 
1948  |  |  */  | 
1949  |  | void  | 
1950  |  | xmlParserAddNodeInfo(xmlParserCtxtPtr ctxt,  | 
1951  |  |                      const xmlParserNodeInfoPtr info)  | 
1952  | 0  | { | 
1953  | 0  |     unsigned long pos;  | 
1954  |  | 
  | 
1955  | 0  |     if ((ctxt == NULL) || (info == NULL)) return;  | 
1956  |  |  | 
1957  |  |     /* Find pos and check to see if node is already in the sequence */  | 
1958  | 0  |     pos = xmlParserFindNodeInfoIndex(&ctxt->node_seq, (xmlNodePtr)  | 
1959  | 0  |                                      info->node);  | 
1960  |  | 
  | 
1961  | 0  |     if ((pos < ctxt->node_seq.length) &&  | 
1962  | 0  |         (ctxt->node_seq.buffer != NULL) &&  | 
1963  | 0  |         (ctxt->node_seq.buffer[pos].node == info->node)) { | 
1964  | 0  |         ctxt->node_seq.buffer[pos] = *info;  | 
1965  | 0  |     }  | 
1966  |  |  | 
1967  |  |     /* Otherwise, we need to add new node to buffer */  | 
1968  | 0  |     else { | 
1969  | 0  |         if ((ctxt->node_seq.length + 1 > ctxt->node_seq.maximum) ||  | 
1970  | 0  |       (ctxt->node_seq.buffer == NULL)) { | 
1971  | 0  |             xmlParserNodeInfo *tmp_buffer;  | 
1972  | 0  |             unsigned int byte_size;  | 
1973  |  | 
  | 
1974  | 0  |             if (ctxt->node_seq.maximum == 0)  | 
1975  | 0  |                 ctxt->node_seq.maximum = 2;  | 
1976  | 0  |             byte_size = (sizeof(*ctxt->node_seq.buffer) *  | 
1977  | 0  |       (2 * ctxt->node_seq.maximum));  | 
1978  |  | 
  | 
1979  | 0  |             if (ctxt->node_seq.buffer == NULL)  | 
1980  | 0  |                 tmp_buffer = (xmlParserNodeInfo *) xmlMalloc(byte_size);  | 
1981  | 0  |             else  | 
1982  | 0  |                 tmp_buffer =  | 
1983  | 0  |                     (xmlParserNodeInfo *) xmlRealloc(ctxt->node_seq.buffer,  | 
1984  | 0  |                                                      byte_size);  | 
1985  |  | 
  | 
1986  | 0  |             if (tmp_buffer == NULL) { | 
1987  | 0  |     xmlErrMemory(ctxt, "failed to allocate buffer\n");  | 
1988  | 0  |                 return;  | 
1989  | 0  |             }  | 
1990  | 0  |             ctxt->node_seq.buffer = tmp_buffer;  | 
1991  | 0  |             ctxt->node_seq.maximum *= 2;  | 
1992  | 0  |         }  | 
1993  |  |  | 
1994  |  |         /* If position is not at end, move elements out of the way */  | 
1995  | 0  |         if (pos != ctxt->node_seq.length) { | 
1996  | 0  |             unsigned long i;  | 
1997  |  | 
  | 
1998  | 0  |             for (i = ctxt->node_seq.length; i > pos; i--)  | 
1999  | 0  |                 ctxt->node_seq.buffer[i] = ctxt->node_seq.buffer[i - 1];  | 
2000  | 0  |         }  | 
2001  |  |  | 
2002  |  |         /* Copy element and increase length */  | 
2003  | 0  |         ctxt->node_seq.buffer[pos] = *info;  | 
2004  | 0  |         ctxt->node_seq.length++;  | 
2005  | 0  |     }  | 
2006  | 0  | }  | 
2007  |  |  | 
2008  |  | /************************************************************************  | 
2009  |  |  *                  *  | 
2010  |  |  *    Defaults settings         *  | 
2011  |  |  *                  *  | 
2012  |  |  ************************************************************************/  | 
2013  |  | /**  | 
2014  |  |  * xmlPedanticParserDefault:  | 
2015  |  |  * @val:  int 0 or 1  | 
2016  |  |  *  | 
2017  |  |  * DEPRECATED: Use the modern options API with XML_PARSE_PEDANTIC.  | 
2018  |  |  *  | 
2019  |  |  * Set and return the previous value for enabling pedantic warnings.  | 
2020  |  |  *  | 
2021  |  |  * Returns the last value for 0 for no substitution, 1 for substitution.  | 
2022  |  |  */  | 
2023  |  |  | 
2024  |  | int  | 
2025  | 0  | xmlPedanticParserDefault(int val) { | 
2026  | 0  |     int old = xmlPedanticParserDefaultValue;  | 
2027  |  | 
  | 
2028  | 0  |     xmlPedanticParserDefaultValue = val;  | 
2029  | 0  |     return(old);  | 
2030  | 0  | }  | 
2031  |  |  | 
2032  |  | /**  | 
2033  |  |  * xmlLineNumbersDefault:  | 
2034  |  |  * @val:  int 0 or 1  | 
2035  |  |  *  | 
2036  |  |  * DEPRECATED: The modern options API always enables line numbers.  | 
2037  |  |  *  | 
2038  |  |  * Set and return the previous value for enabling line numbers in elements  | 
2039  |  |  * contents. This may break on old application and is turned off by default.  | 
2040  |  |  *  | 
2041  |  |  * Returns the last value for 0 for no substitution, 1 for substitution.  | 
2042  |  |  */  | 
2043  |  |  | 
2044  |  | int  | 
2045  | 0  | xmlLineNumbersDefault(int val) { | 
2046  | 0  |     int old = xmlLineNumbersDefaultValue;  | 
2047  |  | 
  | 
2048  | 0  |     xmlLineNumbersDefaultValue = val;  | 
2049  | 0  |     return(old);  | 
2050  | 0  | }  | 
2051  |  |  | 
2052  |  | /**  | 
2053  |  |  * xmlSubstituteEntitiesDefault:  | 
2054  |  |  * @val:  int 0 or 1  | 
2055  |  |  *  | 
2056  |  |  * DEPRECATED: Use the modern options API with XML_PARSE_NOENT.  | 
2057  |  |  *  | 
2058  |  |  * Set and return the previous value for default entity support.  | 
2059  |  |  * Initially the parser always keep entity references instead of substituting  | 
2060  |  |  * entity values in the output. This function has to be used to change the  | 
2061  |  |  * default parser behavior  | 
2062  |  |  * SAX::substituteEntities() has to be used for changing that on a file by  | 
2063  |  |  * file basis.  | 
2064  |  |  *  | 
2065  |  |  * Returns the last value for 0 for no substitution, 1 for substitution.  | 
2066  |  |  */  | 
2067  |  |  | 
2068  |  | int  | 
2069  | 0  | xmlSubstituteEntitiesDefault(int val) { | 
2070  | 0  |     int old = xmlSubstituteEntitiesDefaultValue;  | 
2071  |  | 
  | 
2072  | 0  |     xmlSubstituteEntitiesDefaultValue = val;  | 
2073  | 0  |     return(old);  | 
2074  | 0  | }  | 
2075  |  |  | 
2076  |  | /**  | 
2077  |  |  * xmlKeepBlanksDefault:  | 
2078  |  |  * @val:  int 0 or 1  | 
2079  |  |  *  | 
2080  |  |  * DEPRECATED: Use the modern options API with XML_PARSE_NOBLANKS.  | 
2081  |  |  *  | 
2082  |  |  * Set and return the previous value for default blanks text nodes support.  | 
2083  |  |  * The 1.x version of the parser used an heuristic to try to detect  | 
2084  |  |  * ignorable white spaces. As a result the SAX callback was generating  | 
2085  |  |  * xmlSAX2IgnorableWhitespace() callbacks instead of characters() one, and when  | 
2086  |  |  * using the DOM output text nodes containing those blanks were not generated.  | 
2087  |  |  * The 2.x and later version will switch to the XML standard way and  | 
2088  |  |  * ignorableWhitespace() are only generated when running the parser in  | 
2089  |  |  * validating mode and when the current element doesn't allow CDATA or  | 
2090  |  |  * mixed content.  | 
2091  |  |  * This function is provided as a way to force the standard behavior  | 
2092  |  |  * on 1.X libs and to switch back to the old mode for compatibility when  | 
2093  |  |  * running 1.X client code on 2.X . Upgrade of 1.X code should be done  | 
2094  |  |  * by using xmlIsBlankNode() commodity function to detect the "empty"  | 
2095  |  |  * nodes generated.  | 
2096  |  |  * This value also affect autogeneration of indentation when saving code  | 
2097  |  |  * if blanks sections are kept, indentation is not generated.  | 
2098  |  |  *  | 
2099  |  |  * Returns the last value for 0 for no substitution, 1 for substitution.  | 
2100  |  |  */  | 
2101  |  |  | 
2102  |  | int  | 
2103  | 0  | xmlKeepBlanksDefault(int val) { | 
2104  | 0  |     int old = xmlKeepBlanksDefaultValue;  | 
2105  |  | 
  | 
2106  | 0  |     xmlKeepBlanksDefaultValue = val;  | 
2107  | 0  |     if (!val) xmlIndentTreeOutput = 1;  | 
2108  | 0  |     return(old);  | 
2109  | 0  | }  | 
2110  |  |  |