Line  | Count  | Source (jump to first uncovered line)  | 
1  |  | /*  | 
2  |  |  * string.c : an XML string utilities module  | 
3  |  |  *  | 
4  |  |  * This module provides various utility functions for manipulating  | 
5  |  |  * the xmlChar* type. All functions named xmlStr* have been moved here  | 
6  |  |  * from the parser.c file (their original home).  | 
7  |  |  *  | 
8  |  |  * See Copyright for the status of this software.  | 
9  |  |  *  | 
10  |  |  * UTF8 string routines from:  | 
11  |  |  * William Brack <wbrack@mmm.com.hk>  | 
12  |  |  *  | 
13  |  |  * daniel@veillard.com  | 
14  |  |  */  | 
15  |  |  | 
16  |  | #define IN_LIBXML  | 
17  |  | #include "libxml.h"  | 
18  |  |  | 
19  |  | #include <stdlib.h>  | 
20  |  | #include <string.h>  | 
21  |  | #include <limits.h>  | 
22  |  | #include <libxml/xmlmemory.h>  | 
23  |  | #include <libxml/parserInternals.h>  | 
24  |  | #include <libxml/xmlstring.h>  | 
25  |  |  | 
26  |  | #include "private/parser.h"  | 
27  |  | #include "private/string.h"  | 
28  |  |  | 
29  |  | /************************************************************************  | 
30  |  |  *                                                                      *  | 
31  |  |  *                Commodity functions to handle xmlChars                *  | 
32  |  |  *                                                                      *  | 
33  |  |  ************************************************************************/  | 
34  |  |  | 
35  |  | /**  | 
36  |  |  * xmlStrndup:  | 
37  |  |  * @cur:  the input xmlChar *  | 
38  |  |  * @len:  the len of @cur  | 
39  |  |  *  | 
40  |  |  * a strndup for array of xmlChar's  | 
41  |  |  *  | 
42  |  |  * Returns a new xmlChar * or NULL  | 
43  |  |  */  | 
44  |  | xmlChar *  | 
45  | 3.76M  | xmlStrndup(const xmlChar *cur, int len) { | 
46  | 3.76M  |     xmlChar *ret;  | 
47  |  |  | 
48  | 3.76M  |     if ((cur == NULL) || (len < 0)) return(NULL);  | 
49  | 3.76M  |     ret = (xmlChar *) xmlMallocAtomic((size_t) len + 1);  | 
50  | 3.76M  |     if (ret == NULL) { | 
51  | 0  |         return(NULL);  | 
52  | 0  |     }  | 
53  | 3.76M  |     memcpy(ret, cur, len);  | 
54  | 3.76M  |     ret[len] = 0;  | 
55  | 3.76M  |     return(ret);  | 
56  | 3.76M  | }  | 
57  |  |  | 
58  |  | /**  | 
59  |  |  * xmlStrdup:  | 
60  |  |  * @cur:  the input xmlChar *  | 
61  |  |  *  | 
62  |  |  * a strdup for array of xmlChar's. Since they are supposed to be  | 
63  |  |  * encoded in UTF-8 or an encoding with 8bit based chars, we assume  | 
64  |  |  * a termination mark of '0'.  | 
65  |  |  *  | 
66  |  |  * Returns a new xmlChar * or NULL  | 
67  |  |  */  | 
68  |  | xmlChar *  | 
69  | 3.40M  | xmlStrdup(const xmlChar *cur) { | 
70  | 3.40M  |     const xmlChar *p = cur;  | 
71  |  |  | 
72  | 3.40M  |     if (cur == NULL) return(NULL);  | 
73  | 66.7G  |     while (*p != 0) p++; /* non input consuming */  | 
74  | 3.39M  |     return(xmlStrndup(cur, p - cur));  | 
75  | 3.40M  | }  | 
76  |  |  | 
77  |  | /**  | 
78  |  |  * xmlCharStrndup:  | 
79  |  |  * @cur:  the input char *  | 
80  |  |  * @len:  the len of @cur  | 
81  |  |  *  | 
82  |  |  * a strndup for char's to xmlChar's  | 
83  |  |  *  | 
84  |  |  * Returns a new xmlChar * or NULL  | 
85  |  |  */  | 
86  |  |  | 
87  |  | xmlChar *  | 
88  | 5.05k  | xmlCharStrndup(const char *cur, int len) { | 
89  | 5.05k  |     int i;  | 
90  | 5.05k  |     xmlChar *ret;  | 
91  |  |  | 
92  | 5.05k  |     if ((cur == NULL) || (len < 0)) return(NULL);  | 
93  | 5.05k  |     ret = (xmlChar *) xmlMallocAtomic((size_t) len + 1);  | 
94  | 5.05k  |     if (ret == NULL) { | 
95  | 0  |         return(NULL);  | 
96  | 0  |     }  | 
97  | 636k  |     for (i = 0;i < len;i++) { | 
98  |  |         /* Explicit sign change */  | 
99  | 631k  |         ret[i] = (xmlChar) cur[i];  | 
100  | 631k  |         if (ret[i] == 0) return(ret);  | 
101  | 631k  |     }  | 
102  | 5.05k  |     ret[len] = 0;  | 
103  | 5.05k  |     return(ret);  | 
104  | 5.05k  | }  | 
105  |  |  | 
106  |  | /**  | 
107  |  |  * xmlCharStrdup:  | 
108  |  |  * @cur:  the input char *  | 
109  |  |  *  | 
110  |  |  * a strdup for char's to xmlChar's  | 
111  |  |  *  | 
112  |  |  * Returns a new xmlChar * or NULL  | 
113  |  |  */  | 
114  |  |  | 
115  |  | xmlChar *  | 
116  | 5.05k  | xmlCharStrdup(const char *cur) { | 
117  | 5.05k  |     const char *p = cur;  | 
118  |  |  | 
119  | 5.05k  |     if (cur == NULL) return(NULL);  | 
120  | 636k  |     while (*p != '\0') p++; /* non input consuming */  | 
121  | 5.05k  |     return(xmlCharStrndup(cur, p - cur));  | 
122  | 5.05k  | }  | 
123  |  |  | 
124  |  | /**  | 
125  |  |  * xmlStrcmp:  | 
126  |  |  * @str1:  the first xmlChar *  | 
127  |  |  * @str2:  the second xmlChar *  | 
128  |  |  *  | 
129  |  |  * a strcmp for xmlChar's  | 
130  |  |  *  | 
131  |  |  * Returns the integer result of the comparison  | 
132  |  |  */  | 
133  |  |  | 
134  |  | int  | 
135  | 5  | xmlStrcmp(const xmlChar *str1, const xmlChar *str2) { | 
136  | 5  |     if (str1 == str2) return(0);  | 
137  | 5  |     if (str1 == NULL) return(-1);  | 
138  | 5  |     if (str2 == NULL) return(1);  | 
139  | 5  | #ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION  | 
140  | 5  |     return(strcmp((const char *)str1, (const char *)str2));  | 
141  |  | #else  | 
142  |  |     do { | 
143  |  |         int tmp = *str1++ - *str2;  | 
144  |  |         if (tmp != 0) return(tmp);  | 
145  |  |     } while (*str2++ != 0);  | 
146  |  |     return 0;  | 
147  |  | #endif  | 
148  | 5  | }  | 
149  |  |  | 
150  |  | /**  | 
151  |  |  * xmlStrEqual:  | 
152  |  |  * @str1:  the first xmlChar *  | 
153  |  |  * @str2:  the second xmlChar *  | 
154  |  |  *  | 
155  |  |  * Check if both strings are equal of have same content.  | 
156  |  |  * Should be a bit more readable and faster than xmlStrcmp()  | 
157  |  |  *  | 
158  |  |  * Returns 1 if they are equal, 0 if they are different  | 
159  |  |  */  | 
160  |  |  | 
161  |  | int  | 
162  | 1.25M  | xmlStrEqual(const xmlChar *str1, const xmlChar *str2) { | 
163  | 1.25M  |     if (str1 == str2) return(1);  | 
164  | 1.22M  |     if (str1 == NULL) return(0);  | 
165  | 1.22M  |     if (str2 == NULL) return(0);  | 
166  | 1.22M  | #ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION  | 
167  | 1.22M  |     return(strcmp((const char *)str1, (const char *)str2) == 0);  | 
168  |  | #else  | 
169  |  |     do { | 
170  |  |         if (*str1++ != *str2) return(0);  | 
171  |  |     } while (*str2++);  | 
172  |  |     return(1);  | 
173  |  | #endif  | 
174  | 1.22M  | }  | 
175  |  |  | 
176  |  | /**  | 
177  |  |  * xmlStrQEqual:  | 
178  |  |  * @pref:  the prefix of the QName  | 
179  |  |  * @name:  the localname of the QName  | 
180  |  |  * @str:  the second xmlChar *  | 
181  |  |  *  | 
182  |  |  * Check if a QName is Equal to a given string  | 
183  |  |  *  | 
184  |  |  * Returns 1 if they are equal, 0 if they are different  | 
185  |  |  */  | 
186  |  |  | 
187  |  | int  | 
188  | 0  | xmlStrQEqual(const xmlChar *pref, const xmlChar *name, const xmlChar *str) { | 
189  | 0  |     if (pref == NULL) return(xmlStrEqual(name, str));  | 
190  | 0  |     if (name == NULL) return(0);  | 
191  | 0  |     if (str == NULL) return(0);  | 
192  |  |  | 
193  | 0  |     do { | 
194  | 0  |         if (*pref++ != *str) return(0);  | 
195  | 0  |     } while ((*str++) && (*pref));  | 
196  | 0  |     if (*str++ != ':') return(0);  | 
197  | 0  |     do { | 
198  | 0  |         if (*name++ != *str) return(0);  | 
199  | 0  |     } while (*str++);  | 
200  | 0  |     return(1);  | 
201  | 0  | }  | 
202  |  |  | 
203  |  | /**  | 
204  |  |  * xmlStrncmp:  | 
205  |  |  * @str1:  the first xmlChar *  | 
206  |  |  * @str2:  the second xmlChar *  | 
207  |  |  * @len:  the max comparison length  | 
208  |  |  *  | 
209  |  |  * a strncmp for xmlChar's  | 
210  |  |  *  | 
211  |  |  * Returns the integer result of the comparison  | 
212  |  |  */  | 
213  |  |  | 
214  |  | int  | 
215  | 23.4k  | xmlStrncmp(const xmlChar *str1, const xmlChar *str2, int len) { | 
216  | 23.4k  |     if (len <= 0) return(0);  | 
217  | 22.6k  |     if (str1 == str2) return(0);  | 
218  | 22.6k  |     if (str1 == NULL) return(-1);  | 
219  | 22.6k  |     if (str2 == NULL) return(1);  | 
220  | 22.6k  | #ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION  | 
221  | 22.6k  |     return(strncmp((const char *)str1, (const char *)str2, len));  | 
222  |  | #else  | 
223  |  |     do { | 
224  |  |         int tmp = *str1++ - *str2;  | 
225  |  |         if (tmp != 0 || --len == 0) return(tmp);  | 
226  |  |     } while (*str2++ != 0);  | 
227  |  |     return 0;  | 
228  |  | #endif  | 
229  | 22.6k  | }  | 
230  |  |  | 
231  |  | static const xmlChar casemap[256] = { | 
232  |  |     0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,  | 
233  |  |     0x08,0x09,0x0A,0x0B,0x0C,0x0D,0x0E,0x0F,  | 
234  |  |     0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,  | 
235  |  |     0x18,0x19,0x1A,0x1B,0x1C,0x1D,0x1E,0x1F,  | 
236  |  |     0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,  | 
237  |  |     0x28,0x29,0x2A,0x2B,0x2C,0x2D,0x2E,0x2F,  | 
238  |  |     0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,  | 
239  |  |     0x38,0x39,0x3A,0x3B,0x3C,0x3D,0x3E,0x3F,  | 
240  |  |     0x40,0x61,0x62,0x63,0x64,0x65,0x66,0x67,  | 
241  |  |     0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,  | 
242  |  |     0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,  | 
243  |  |     0x78,0x79,0x7A,0x7B,0x5C,0x5D,0x5E,0x5F,  | 
244  |  |     0x60,0x61,0x62,0x63,0x64,0x65,0x66,0x67,  | 
245  |  |     0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,  | 
246  |  |     0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,  | 
247  |  |     0x78,0x79,0x7A,0x7B,0x7C,0x7D,0x7E,0x7F,  | 
248  |  |     0x80,0x81,0x82,0x83,0x84,0x85,0x86,0x87,  | 
249  |  |     0x88,0x89,0x8A,0x8B,0x8C,0x8D,0x8E,0x8F,  | 
250  |  |     0x90,0x91,0x92,0x93,0x94,0x95,0x96,0x97,  | 
251  |  |     0x98,0x99,0x9A,0x9B,0x9C,0x9D,0x9E,0x9F,  | 
252  |  |     0xA0,0xA1,0xA2,0xA3,0xA4,0xA5,0xA6,0xA7,  | 
253  |  |     0xA8,0xA9,0xAA,0xAB,0xAC,0xAD,0xAE,0xAF,  | 
254  |  |     0xB0,0xB1,0xB2,0xB3,0xB4,0xB5,0xB6,0xB7,  | 
255  |  |     0xB8,0xB9,0xBA,0xBB,0xBC,0xBD,0xBE,0xBF,  | 
256  |  |     0xC0,0xC1,0xC2,0xC3,0xC4,0xC5,0xC6,0xC7,  | 
257  |  |     0xC8,0xC9,0xCA,0xCB,0xCC,0xCD,0xCE,0xCF,  | 
258  |  |     0xD0,0xD1,0xD2,0xD3,0xD4,0xD5,0xD6,0xD7,  | 
259  |  |     0xD8,0xD9,0xDA,0xDB,0xDC,0xDD,0xDE,0xDF,  | 
260  |  |     0xE0,0xE1,0xE2,0xE3,0xE4,0xE5,0xE6,0xE7,  | 
261  |  |     0xE8,0xE9,0xEA,0xEB,0xEC,0xED,0xEE,0xEF,  | 
262  |  |     0xF0,0xF1,0xF2,0xF3,0xF4,0xF5,0xF6,0xF7,  | 
263  |  |     0xF8,0xF9,0xFA,0xFB,0xFC,0xFD,0xFE,0xFF  | 
264  |  | };  | 
265  |  |  | 
266  |  | /**  | 
267  |  |  * xmlStrcasecmp:  | 
268  |  |  * @str1:  the first xmlChar *  | 
269  |  |  * @str2:  the second xmlChar *  | 
270  |  |  *  | 
271  |  |  * a strcasecmp for xmlChar's  | 
272  |  |  *  | 
273  |  |  * Returns the integer result of the comparison  | 
274  |  |  */  | 
275  |  |  | 
276  |  | int  | 
277  | 0  | xmlStrcasecmp(const xmlChar *str1, const xmlChar *str2) { | 
278  | 0  |     register int tmp;  | 
279  |  | 
  | 
280  | 0  |     if (str1 == str2) return(0);  | 
281  | 0  |     if (str1 == NULL) return(-1);  | 
282  | 0  |     if (str2 == NULL) return(1);  | 
283  | 0  |     do { | 
284  | 0  |         tmp = casemap[*str1++] - casemap[*str2];  | 
285  | 0  |         if (tmp != 0) return(tmp);  | 
286  | 0  |     } while (*str2++ != 0);  | 
287  | 0  |     return 0;  | 
288  | 0  | }  | 
289  |  |  | 
290  |  | /**  | 
291  |  |  * xmlStrncasecmp:  | 
292  |  |  * @str1:  the first xmlChar *  | 
293  |  |  * @str2:  the second xmlChar *  | 
294  |  |  * @len:  the max comparison length  | 
295  |  |  *  | 
296  |  |  * a strncasecmp for xmlChar's  | 
297  |  |  *  | 
298  |  |  * Returns the integer result of the comparison  | 
299  |  |  */  | 
300  |  |  | 
301  |  | int  | 
302  | 54.2k  | xmlStrncasecmp(const xmlChar *str1, const xmlChar *str2, int len) { | 
303  | 54.2k  |     register int tmp;  | 
304  |  |  | 
305  | 54.2k  |     if (len <= 0) return(0);  | 
306  | 54.2k  |     if (str1 == str2) return(0);  | 
307  | 54.2k  |     if (str1 == NULL) return(-1);  | 
308  | 54.2k  |     if (str2 == NULL) return(1);  | 
309  | 62.3k  |     do { | 
310  | 62.3k  |         tmp = casemap[*str1++] - casemap[*str2];  | 
311  | 62.3k  |         if (tmp != 0 || --len == 0) return(tmp);  | 
312  | 62.3k  |     } while (*str2++ != 0);  | 
313  | 0  |     return 0;  | 
314  | 54.2k  | }  | 
315  |  |  | 
316  |  | /**  | 
317  |  |  * xmlStrchr:  | 
318  |  |  * @str:  the xmlChar * array  | 
319  |  |  * @val:  the xmlChar to search  | 
320  |  |  *  | 
321  |  |  * a strchr for xmlChar's  | 
322  |  |  *  | 
323  |  |  * Returns the xmlChar * for the first occurrence or NULL.  | 
324  |  |  */  | 
325  |  |  | 
326  |  | const xmlChar *  | 
327  | 117k  | xmlStrchr(const xmlChar *str, xmlChar val) { | 
328  | 117k  |     if (str == NULL) return(NULL);  | 
329  | 13.4M  |     while (*str != 0) { /* non input consuming */ | 
330  | 13.3M  |         if (*str == val) return((xmlChar *) str);  | 
331  | 13.3M  |         str++;  | 
332  | 13.3M  |     }  | 
333  | 93.0k  |     return(NULL);  | 
334  | 117k  | }  | 
335  |  |  | 
336  |  | /**  | 
337  |  |  * xmlStrstr:  | 
338  |  |  * @str:  the xmlChar * array (haystack)  | 
339  |  |  * @val:  the xmlChar to search (needle)  | 
340  |  |  *  | 
341  |  |  * a strstr for xmlChar's  | 
342  |  |  *  | 
343  |  |  * Returns the xmlChar * for the first occurrence or NULL.  | 
344  |  |  */  | 
345  |  |  | 
346  |  | const xmlChar *  | 
347  | 86.0k  | xmlStrstr(const xmlChar *str, const xmlChar *val) { | 
348  | 86.0k  |     int n;  | 
349  |  |  | 
350  | 86.0k  |     if (str == NULL) return(NULL);  | 
351  | 86.0k  |     if (val == NULL) return(NULL);  | 
352  | 86.0k  |     n = xmlStrlen(val);  | 
353  |  |  | 
354  | 86.0k  |     if (n == 0) return(str);  | 
355  | 861k  |     while (*str != 0) { /* non input consuming */ | 
356  | 775k  |         if (*str == *val) { | 
357  | 8.77k  |             if (!xmlStrncmp(str, val, n)) return((const xmlChar *) str);  | 
358  | 8.77k  |         }  | 
359  | 775k  |         str++;  | 
360  | 775k  |     }  | 
361  | 85.4k  |     return(NULL);  | 
362  | 86.0k  | }  | 
363  |  |  | 
364  |  | /**  | 
365  |  |  * xmlStrcasestr:  | 
366  |  |  * @str:  the xmlChar * array (haystack)  | 
367  |  |  * @val:  the xmlChar to search (needle)  | 
368  |  |  *  | 
369  |  |  * a case-ignoring strstr for xmlChar's  | 
370  |  |  *  | 
371  |  |  * Returns the xmlChar * for the first occurrence or NULL.  | 
372  |  |  */  | 
373  |  |  | 
374  |  | const xmlChar *  | 
375  | 0  | xmlStrcasestr(const xmlChar *str, const xmlChar *val) { | 
376  | 0  |     int n;  | 
377  |  | 
  | 
378  | 0  |     if (str == NULL) return(NULL);  | 
379  | 0  |     if (val == NULL) return(NULL);  | 
380  | 0  |     n = xmlStrlen(val);  | 
381  |  | 
  | 
382  | 0  |     if (n == 0) return(str);  | 
383  | 0  |     while (*str != 0) { /* non input consuming */ | 
384  | 0  |         if (casemap[*str] == casemap[*val])  | 
385  | 0  |             if (!xmlStrncasecmp(str, val, n)) return(str);  | 
386  | 0  |         str++;  | 
387  | 0  |     }  | 
388  | 0  |     return(NULL);  | 
389  | 0  | }  | 
390  |  |  | 
391  |  | /**  | 
392  |  |  * xmlStrsub:  | 
393  |  |  * @str:  the xmlChar * array (haystack)  | 
394  |  |  * @start:  the index of the first char (zero based)  | 
395  |  |  * @len:  the length of the substring  | 
396  |  |  *  | 
397  |  |  * Extract a substring of a given string  | 
398  |  |  *  | 
399  |  |  * Returns the xmlChar * for the first occurrence or NULL.  | 
400  |  |  */  | 
401  |  |  | 
402  |  | xmlChar *  | 
403  | 0  | xmlStrsub(const xmlChar *str, int start, int len) { | 
404  | 0  |     int i;  | 
405  |  | 
  | 
406  | 0  |     if (str == NULL) return(NULL);  | 
407  | 0  |     if (start < 0) return(NULL);  | 
408  | 0  |     if (len < 0) return(NULL);  | 
409  |  |  | 
410  | 0  |     for (i = 0;i < start;i++) { | 
411  | 0  |         if (*str == 0) return(NULL);  | 
412  | 0  |         str++;  | 
413  | 0  |     }  | 
414  | 0  |     if (*str == 0) return(NULL);  | 
415  | 0  |     return(xmlStrndup(str, len));  | 
416  | 0  | }  | 
417  |  |  | 
418  |  | /**  | 
419  |  |  * xmlStrlen:  | 
420  |  |  * @str:  the xmlChar * array  | 
421  |  |  *  | 
422  |  |  * length of a xmlChar's string  | 
423  |  |  *  | 
424  |  |  * Returns the number of xmlChar contained in the ARRAY.  | 
425  |  |  */  | 
426  |  |  | 
427  |  | int  | 
428  | 1.94M  | xmlStrlen(const xmlChar *str) { | 
429  | 1.94M  |     size_t len = str ? strlen((const char *)str) : 0;  | 
430  | 1.94M  |     return(len > INT_MAX ? 0 : len);  | 
431  | 1.94M  | }  | 
432  |  |  | 
433  |  | /**  | 
434  |  |  * xmlStrncat:  | 
435  |  |  * @cur:  the original xmlChar * array  | 
436  |  |  * @add:  the xmlChar * array added  | 
437  |  |  * @len:  the length of @add  | 
438  |  |  *  | 
439  |  |  * a strncat for array of xmlChar's, it will extend @cur with the len  | 
440  |  |  * first bytes of @add. Note that if @len < 0 then this is an API error  | 
441  |  |  * and NULL will be returned.  | 
442  |  |  *  | 
443  |  |  * Returns a new xmlChar *, the original @cur is reallocated and should  | 
444  |  |  * not be freed.  | 
445  |  |  */  | 
446  |  |  | 
447  |  | xmlChar *  | 
448  | 0  | xmlStrncat(xmlChar *cur, const xmlChar *add, int len) { | 
449  | 0  |     int size;  | 
450  | 0  |     xmlChar *ret;  | 
451  |  | 
  | 
452  | 0  |     if ((add == NULL) || (len == 0))  | 
453  | 0  |         return(cur);  | 
454  | 0  |     if (len < 0)  | 
455  | 0  |   return(NULL);  | 
456  | 0  |     if (cur == NULL)  | 
457  | 0  |         return(xmlStrndup(add, len));  | 
458  |  |  | 
459  | 0  |     size = xmlStrlen(cur);  | 
460  | 0  |     if ((size < 0) || (size > INT_MAX - len))  | 
461  | 0  |         return(NULL);  | 
462  | 0  |     ret = (xmlChar *) xmlRealloc(cur, (size_t) size + len + 1);  | 
463  | 0  |     if (ret == NULL) { | 
464  | 0  |         return(cur);  | 
465  | 0  |     }  | 
466  | 0  |     memcpy(&ret[size], add, len);  | 
467  | 0  |     ret[size + len] = 0;  | 
468  | 0  |     return(ret);  | 
469  | 0  | }  | 
470  |  |  | 
471  |  | /**  | 
472  |  |  * xmlStrncatNew:  | 
473  |  |  * @str1:  first xmlChar string  | 
474  |  |  * @str2:  second xmlChar string  | 
475  |  |  * @len:  the len of @str2 or < 0  | 
476  |  |  *  | 
477  |  |  * same as xmlStrncat, but creates a new string.  The original  | 
478  |  |  * two strings are not freed. If @len is < 0 then the length  | 
479  |  |  * will be calculated automatically.  | 
480  |  |  *  | 
481  |  |  * Returns a new xmlChar * or NULL  | 
482  |  |  */  | 
483  |  | xmlChar *  | 
484  | 0  | xmlStrncatNew(const xmlChar *str1, const xmlChar *str2, int len) { | 
485  | 0  |     int size;  | 
486  | 0  |     xmlChar *ret;  | 
487  |  | 
  | 
488  | 0  |     if (len < 0) { | 
489  | 0  |         len = xmlStrlen(str2);  | 
490  | 0  |         if (len < 0)  | 
491  | 0  |             return(NULL);  | 
492  | 0  |     }  | 
493  | 0  |     if ((str2 == NULL) || (len == 0))  | 
494  | 0  |         return(xmlStrdup(str1));  | 
495  | 0  |     if (str1 == NULL)  | 
496  | 0  |         return(xmlStrndup(str2, len));  | 
497  |  |  | 
498  | 0  |     size = xmlStrlen(str1);  | 
499  | 0  |     if ((size < 0) || (size > INT_MAX - len))  | 
500  | 0  |         return(NULL);  | 
501  | 0  |     ret = (xmlChar *) xmlMalloc((size_t) size + len + 1);  | 
502  | 0  |     if (ret == NULL) { | 
503  | 0  |         return(xmlStrndup(str1, size));  | 
504  | 0  |     }  | 
505  | 0  |     memcpy(ret, str1, size);  | 
506  | 0  |     memcpy(&ret[size], str2, len);  | 
507  | 0  |     ret[size + len] = 0;  | 
508  | 0  |     return(ret);  | 
509  | 0  | }  | 
510  |  |  | 
511  |  | /**  | 
512  |  |  * xmlStrcat:  | 
513  |  |  * @cur:  the original xmlChar * array  | 
514  |  |  * @add:  the xmlChar * array added  | 
515  |  |  *  | 
516  |  |  * a strcat for array of xmlChar's. Since they are supposed to be  | 
517  |  |  * encoded in UTF-8 or an encoding with 8bit based chars, we assume  | 
518  |  |  * a termination mark of '0'.  | 
519  |  |  *  | 
520  |  |  * Returns a new xmlChar * containing the concatenated string. The original  | 
521  |  |  * @cur is reallocated and should not be freed.  | 
522  |  |  */  | 
523  |  | xmlChar *  | 
524  | 0  | xmlStrcat(xmlChar *cur, const xmlChar *add) { | 
525  | 0  |     const xmlChar *p = add;  | 
526  |  | 
  | 
527  | 0  |     if (add == NULL) return(cur);  | 
528  | 0  |     if (cur == NULL)  | 
529  | 0  |         return(xmlStrdup(add));  | 
530  |  |  | 
531  | 0  |     while (*p != 0) p++; /* non input consuming */  | 
532  | 0  |     return(xmlStrncat(cur, add, p - add));  | 
533  | 0  | }  | 
534  |  |  | 
535  |  | /**  | 
536  |  |  * xmlStrPrintf:  | 
537  |  |  * @buf:   the result buffer.  | 
538  |  |  * @len:   the result buffer length.  | 
539  |  |  * @msg:   the message with printf formatting.  | 
540  |  |  * @...:   extra parameters for the message.  | 
541  |  |  *  | 
542  |  |  * Formats @msg and places result into @buf.  | 
543  |  |  *  | 
544  |  |  * Returns the number of characters written to @buf or -1 if an error occurs.  | 
545  |  |  */  | 
546  |  | int  | 
547  | 0  | xmlStrPrintf(xmlChar *buf, int len, const char *msg, ...) { | 
548  | 0  |     va_list args;  | 
549  | 0  |     int ret;  | 
550  |  | 
  | 
551  | 0  |     if((buf == NULL) || (msg == NULL)) { | 
552  | 0  |         return(-1);  | 
553  | 0  |     }  | 
554  |  |  | 
555  | 0  |     va_start(args, msg);  | 
556  | 0  |     ret = vsnprintf((char *) buf, len, (const char *) msg, args);  | 
557  | 0  |     va_end(args);  | 
558  | 0  |     buf[len - 1] = 0; /* be safe ! */  | 
559  |  | 
  | 
560  | 0  |     return(ret);  | 
561  | 0  | }  | 
562  |  |  | 
563  |  | /**  | 
564  |  |  * xmlStrVPrintf:  | 
565  |  |  * @buf:   the result buffer.  | 
566  |  |  * @len:   the result buffer length.  | 
567  |  |  * @msg:   the message with printf formatting.  | 
568  |  |  * @ap:    extra parameters for the message.  | 
569  |  |  *  | 
570  |  |  * Formats @msg and places result into @buf.  | 
571  |  |  *  | 
572  |  |  * Returns the number of characters written to @buf or -1 if an error occurs.  | 
573  |  |  */  | 
574  |  | int  | 
575  | 0  | xmlStrVPrintf(xmlChar *buf, int len, const char *msg, va_list ap) { | 
576  | 0  |     int ret;  | 
577  |  | 
  | 
578  | 0  |     if((buf == NULL) || (msg == NULL)) { | 
579  | 0  |         return(-1);  | 
580  | 0  |     }  | 
581  |  |  | 
582  | 0  |     ret = vsnprintf((char *) buf, len, (const char *) msg, ap);  | 
583  | 0  |     buf[len - 1] = 0; /* be safe ! */  | 
584  |  | 
  | 
585  | 0  |     return(ret);  | 
586  | 0  | }  | 
587  |  |  | 
588  |  | /************************************************************************  | 
589  |  |  *                                                                      *  | 
590  |  |  *              Generic UTF8 handling routines                          *  | 
591  |  |  *                                                                      *  | 
592  |  |  * From rfc2044: encoding of the Unicode values on UTF-8:               *  | 
593  |  |  *                                                                      *  | 
594  |  |  * UCS-4 range (hex.)           UTF-8 octet sequence (binary)           *  | 
595  |  |  * 0000 0000-0000 007F   0xxxxxxx                                       *  | 
596  |  |  * 0000 0080-0000 07FF   110xxxxx 10xxxxxx                              *  | 
597  |  |  * 0000 0800-0000 FFFF   1110xxxx 10xxxxxx 10xxxxxx                     *  | 
598  |  |  *                                                                      *  | 
599  |  |  * I hope we won't use values > 0xFFFF anytime soon !                   *  | 
600  |  |  *                                                                      *  | 
601  |  |  ************************************************************************/  | 
602  |  |  | 
603  |  |  | 
604  |  | /**  | 
605  |  |  * xmlUTF8Size:  | 
606  |  |  * @utf: pointer to the UTF8 character  | 
607  |  |  *  | 
608  |  |  * calculates the internal size of a UTF8 character  | 
609  |  |  *  | 
610  |  |  * returns the numbers of bytes in the character, -1 on format error  | 
611  |  |  */  | 
612  |  | int  | 
613  | 14.7k  | xmlUTF8Size(const xmlChar *utf) { | 
614  | 14.7k  |     xmlChar mask;  | 
615  | 14.7k  |     int len;  | 
616  |  |  | 
617  | 14.7k  |     if (utf == NULL)  | 
618  | 0  |         return -1;  | 
619  | 14.7k  |     if (*utf < 0x80)  | 
620  | 10.9k  |         return 1;  | 
621  |  |     /* check valid UTF8 character */  | 
622  | 3.74k  |     if (!(*utf & 0x40))  | 
623  | 609  |         return -1;  | 
624  |  |     /* determine number of bytes in char */  | 
625  | 3.13k  |     len = 2;  | 
626  | 10.0k  |     for (mask=0x20; mask != 0; mask>>=1) { | 
627  | 9.84k  |         if (!(*utf & mask))  | 
628  | 2.90k  |             return len;  | 
629  | 6.94k  |         len++;  | 
630  | 6.94k  |     }  | 
631  | 239  |     return -1;  | 
632  | 3.13k  | }  | 
633  |  |  | 
634  |  | /**  | 
635  |  |  * xmlUTF8Charcmp:  | 
636  |  |  * @utf1: pointer to first UTF8 char  | 
637  |  |  * @utf2: pointer to second UTF8 char  | 
638  |  |  *  | 
639  |  |  * compares the two UCS4 values  | 
640  |  |  *  | 
641  |  |  * returns result of the compare as with xmlStrncmp  | 
642  |  |  */  | 
643  |  | int  | 
644  | 14.7k  | xmlUTF8Charcmp(const xmlChar *utf1, const xmlChar *utf2) { | 
645  |  |  | 
646  | 14.7k  |     if (utf1 == NULL ) { | 
647  | 0  |         if (utf2 == NULL)  | 
648  | 0  |             return 0;  | 
649  | 0  |         return -1;  | 
650  | 0  |     }  | 
651  | 14.7k  |     return xmlStrncmp(utf1, utf2, xmlUTF8Size(utf1));  | 
652  | 14.7k  | }  | 
653  |  |  | 
654  |  | /**  | 
655  |  |  * xmlUTF8Strlen:  | 
656  |  |  * @utf:  a sequence of UTF-8 encoded bytes  | 
657  |  |  *  | 
658  |  |  * compute the length of an UTF8 string, it doesn't do a full UTF8  | 
659  |  |  * checking of the content of the string.  | 
660  |  |  *  | 
661  |  |  * Returns the number of characters in the string or -1 in case of error  | 
662  |  |  */  | 
663  |  | int  | 
664  | 320  | xmlUTF8Strlen(const xmlChar *utf) { | 
665  | 320  |     size_t ret = 0;  | 
666  |  |  | 
667  | 320  |     if (utf == NULL)  | 
668  | 0  |         return(-1);  | 
669  |  |  | 
670  | 16.4k  |     while (*utf != 0) { | 
671  | 16.1k  |         if (utf[0] & 0x80) { | 
672  | 3.41k  |             if ((utf[1] & 0xc0) != 0x80)  | 
673  | 37  |                 return(-1);  | 
674  | 3.37k  |             if ((utf[0] & 0xe0) == 0xe0) { | 
675  | 47  |                 if ((utf[2] & 0xc0) != 0x80)  | 
676  | 6  |                     return(-1);  | 
677  | 41  |                 if ((utf[0] & 0xf0) == 0xf0) { | 
678  | 20  |                     if ((utf[0] & 0xf8) != 0xf0 || (utf[3] & 0xc0) != 0x80)  | 
679  | 5  |                         return(-1);  | 
680  | 15  |                     utf += 4;  | 
681  | 21  |                 } else { | 
682  | 21  |                     utf += 3;  | 
683  | 21  |                 }  | 
684  | 3.33k  |             } else { | 
685  | 3.33k  |                 utf += 2;  | 
686  | 3.33k  |             }  | 
687  | 12.7k  |         } else { | 
688  | 12.7k  |             utf++;  | 
689  | 12.7k  |         }  | 
690  | 16.1k  |         ret++;  | 
691  | 16.1k  |     }  | 
692  | 272  |     return(ret > INT_MAX ? 0 : ret);  | 
693  | 320  | }  | 
694  |  |  | 
695  |  | /**  | 
696  |  |  * xmlGetUTF8Char:  | 
697  |  |  * @utf:  a sequence of UTF-8 encoded bytes  | 
698  |  |  * @len:  a pointer to the minimum number of bytes present in  | 
699  |  |  *        the sequence.  This is used to assure the next character  | 
700  |  |  *        is completely contained within the sequence.  | 
701  |  |  *  | 
702  |  |  * Read the first UTF8 character from @utf  | 
703  |  |  *  | 
704  |  |  * Returns the char value or -1 in case of error, and sets *len to  | 
705  |  |  *        the actual number of bytes consumed (0 in case of error)  | 
706  |  |  */  | 
707  |  | int  | 
708  | 0  | xmlGetUTF8Char(const unsigned char *utf, int *len) { | 
709  | 0  |     unsigned int c;  | 
710  |  | 
  | 
711  | 0  |     if (utf == NULL)  | 
712  | 0  |         goto error;  | 
713  | 0  |     if (len == NULL)  | 
714  | 0  |         goto error;  | 
715  | 0  |     if (*len < 1)  | 
716  | 0  |         goto error;  | 
717  |  |  | 
718  | 0  |     c = utf[0];  | 
719  | 0  |     if (c & 0x80) { | 
720  | 0  |         if (*len < 2)  | 
721  | 0  |             goto error;  | 
722  | 0  |         if ((utf[1] & 0xc0) != 0x80)  | 
723  | 0  |             goto error;  | 
724  | 0  |         if ((c & 0xe0) == 0xe0) { | 
725  | 0  |             if (*len < 3)  | 
726  | 0  |                 goto error;  | 
727  | 0  |             if ((utf[2] & 0xc0) != 0x80)  | 
728  | 0  |                 goto error;  | 
729  | 0  |             if ((c & 0xf0) == 0xf0) { | 
730  | 0  |                 if (*len < 4)  | 
731  | 0  |                     goto error;  | 
732  | 0  |                 if ((c & 0xf8) != 0xf0 || (utf[3] & 0xc0) != 0x80)  | 
733  | 0  |                     goto error;  | 
734  | 0  |                 *len = 4;  | 
735  |  |                 /* 4-byte code */  | 
736  | 0  |                 c = (utf[0] & 0x7) << 18;  | 
737  | 0  |                 c |= (utf[1] & 0x3f) << 12;  | 
738  | 0  |                 c |= (utf[2] & 0x3f) << 6;  | 
739  | 0  |                 c |= utf[3] & 0x3f;  | 
740  | 0  |             } else { | 
741  |  |               /* 3-byte code */  | 
742  | 0  |                 *len = 3;  | 
743  | 0  |                 c = (utf[0] & 0xf) << 12;  | 
744  | 0  |                 c |= (utf[1] & 0x3f) << 6;  | 
745  | 0  |                 c |= utf[2] & 0x3f;  | 
746  | 0  |             }  | 
747  | 0  |         } else { | 
748  |  |           /* 2-byte code */  | 
749  | 0  |             *len = 2;  | 
750  | 0  |             c = (utf[0] & 0x1f) << 6;  | 
751  | 0  |             c |= utf[1] & 0x3f;  | 
752  | 0  |         }  | 
753  | 0  |     } else { | 
754  |  |         /* 1-byte code */  | 
755  | 0  |         *len = 1;  | 
756  | 0  |     }  | 
757  | 0  |     return(c);  | 
758  |  |  | 
759  | 0  | error:  | 
760  | 0  |     if (len != NULL)  | 
761  | 0  |   *len = 0;  | 
762  | 0  |     return(-1);  | 
763  | 0  | }  | 
764  |  |  | 
765  |  | /**  | 
766  |  |  * xmlCheckUTF8:  | 
767  |  |  * @utf: Pointer to putative UTF-8 encoded string.  | 
768  |  |  *  | 
769  |  |  * Checks @utf for being valid UTF-8. @utf is assumed to be  | 
770  |  |  * null-terminated. This function is not super-strict, as it will  | 
771  |  |  * allow longer UTF-8 sequences than necessary. Note that Java is  | 
772  |  |  * capable of producing these sequences if provoked. Also note, this  | 
773  |  |  * routine checks for the 4-byte maximum size, but does not check for  | 
774  |  |  * 0x10ffff maximum value.  | 
775  |  |  *  | 
776  |  |  * Return value: true if @utf is valid.  | 
777  |  |  **/  | 
778  |  | int  | 
779  |  | xmlCheckUTF8(const unsigned char *utf)  | 
780  | 0  | { | 
781  | 0  |     int ix;  | 
782  | 0  |     unsigned char c;  | 
783  |  | 
  | 
784  | 0  |     if (utf == NULL)  | 
785  | 0  |         return(0);  | 
786  |  |     /*  | 
787  |  |      * utf is a string of 1, 2, 3 or 4 bytes.  The valid strings  | 
788  |  |      * are as follows (in "bit format"):  | 
789  |  |      *    0xxxxxxx                                      valid 1-byte  | 
790  |  |      *    110xxxxx 10xxxxxx                             valid 2-byte  | 
791  |  |      *    1110xxxx 10xxxxxx 10xxxxxx                    valid 3-byte  | 
792  |  |      *    11110xxx 10xxxxxx 10xxxxxx 10xxxxxx           valid 4-byte  | 
793  |  |      */  | 
794  | 0  |     while ((c = utf[0])) {      /* string is 0-terminated */ | 
795  | 0  |         ix = 0;  | 
796  | 0  |         if ((c & 0x80) == 0x00) { /* 1-byte code, starts with 10 */ | 
797  | 0  |             ix = 1;  | 
798  | 0  |   } else if ((c & 0xe0) == 0xc0) {/* 2-byte code, starts with 110 */ | 
799  | 0  |       if ((utf[1] & 0xc0 ) != 0x80)  | 
800  | 0  |           return 0;  | 
801  | 0  |       ix = 2;  | 
802  | 0  |   } else if ((c & 0xf0) == 0xe0) {/* 3-byte code, starts with 1110 */ | 
803  | 0  |       if (((utf[1] & 0xc0) != 0x80) ||  | 
804  | 0  |           ((utf[2] & 0xc0) != 0x80))  | 
805  | 0  |         return 0;  | 
806  | 0  |       ix = 3;  | 
807  | 0  |   } else if ((c & 0xf8) == 0xf0) {/* 4-byte code, starts with 11110 */ | 
808  | 0  |       if (((utf[1] & 0xc0) != 0x80) ||  | 
809  | 0  |           ((utf[2] & 0xc0) != 0x80) ||  | 
810  | 0  |     ((utf[3] & 0xc0) != 0x80))  | 
811  | 0  |         return 0;  | 
812  | 0  |       ix = 4;  | 
813  | 0  |   } else       /* unknown encoding */  | 
814  | 0  |       return 0;  | 
815  | 0  |         utf += ix;  | 
816  | 0  |       }  | 
817  | 0  |       return(1);  | 
818  | 0  | }  | 
819  |  |  | 
820  |  | /**  | 
821  |  |  * xmlUTF8Strsize:  | 
822  |  |  * @utf:  a sequence of UTF-8 encoded bytes  | 
823  |  |  * @len:  the number of characters in the array  | 
824  |  |  *  | 
825  |  |  * storage size of an UTF8 string  | 
826  |  |  * the behaviour is not guaranteed if the input string is not UTF-8  | 
827  |  |  *  | 
828  |  |  * Returns the storage size of  | 
829  |  |  * the first 'len' characters of ARRAY  | 
830  |  |  */  | 
831  |  |  | 
832  |  | int  | 
833  | 18.3k  | xmlUTF8Strsize(const xmlChar *utf, int len) { | 
834  | 18.3k  |     const xmlChar *ptr=utf;  | 
835  | 18.3k  |     int ch;  | 
836  | 18.3k  |     size_t ret;  | 
837  |  |  | 
838  | 18.3k  |     if (utf == NULL)  | 
839  | 0  |         return(0);  | 
840  |  |  | 
841  | 18.3k  |     if (len <= 0)  | 
842  | 0  |         return(0);  | 
843  |  |  | 
844  | 37.0k  |     while ( len-- > 0) { | 
845  | 18.7k  |         if ( !*ptr )  | 
846  | 3  |             break;  | 
847  | 18.7k  |         if ( (ch = *ptr++) & 0x80)  | 
848  | 6.16k  |             while ((ch<<=1) & 0x80 ) { | 
849  | 4.41k  |     if (*ptr == 0) break;  | 
850  | 4.03k  |                 ptr++;  | 
851  | 4.03k  |       }  | 
852  | 18.7k  |     }  | 
853  | 18.3k  |     ret = ptr - utf;  | 
854  | 18.3k  |     return (ret > INT_MAX ? 0 : ret);  | 
855  | 18.3k  | }  | 
856  |  |  | 
857  |  |  | 
858  |  | /**  | 
859  |  |  * xmlUTF8Strndup:  | 
860  |  |  * @utf:  the input UTF8 *  | 
861  |  |  * @len:  the len of @utf (in chars)  | 
862  |  |  *  | 
863  |  |  * a strndup for array of UTF8's  | 
864  |  |  *  | 
865  |  |  * Returns a new UTF8 * or NULL  | 
866  |  |  */  | 
867  |  | xmlChar *  | 
868  | 3  | xmlUTF8Strndup(const xmlChar *utf, int len) { | 
869  | 3  |     xmlChar *ret;  | 
870  | 3  |     int i;  | 
871  |  |  | 
872  | 3  |     if ((utf == NULL) || (len < 0)) return(NULL);  | 
873  | 3  |     i = xmlUTF8Strsize(utf, len);  | 
874  | 3  |     ret = (xmlChar *) xmlMallocAtomic((size_t) i + 1);  | 
875  | 3  |     if (ret == NULL) { | 
876  | 0  |         return(NULL);  | 
877  | 0  |     }  | 
878  | 3  |     memcpy(ret, utf, i);  | 
879  | 3  |     ret[i] = 0;  | 
880  | 3  |     return(ret);  | 
881  | 3  | }  | 
882  |  |  | 
883  |  | /**  | 
884  |  |  * xmlUTF8Strpos:  | 
885  |  |  * @utf:  the input UTF8 *  | 
886  |  |  * @pos:  the position of the desired UTF8 char (in chars)  | 
887  |  |  *  | 
888  |  |  * a function to provide the equivalent of fetching a  | 
889  |  |  * character from a string array  | 
890  |  |  *  | 
891  |  |  * Returns a pointer to the UTF8 character or NULL  | 
892  |  |  */  | 
893  |  | const xmlChar *  | 
894  | 0  | xmlUTF8Strpos(const xmlChar *utf, int pos) { | 
895  | 0  |     int ch;  | 
896  |  | 
  | 
897  | 0  |     if (utf == NULL) return(NULL);  | 
898  | 0  |     if (pos < 0)  | 
899  | 0  |         return(NULL);  | 
900  | 0  |     while (pos--) { | 
901  | 0  |         if ((ch=*utf++) == 0) return(NULL);  | 
902  | 0  |         if ( ch & 0x80 ) { | 
903  |  |             /* if not simple ascii, verify proper format */  | 
904  | 0  |             if ( (ch & 0xc0) != 0xc0 )  | 
905  | 0  |                 return(NULL);  | 
906  |  |             /* then skip over remaining bytes for this char */  | 
907  | 0  |             while ( (ch <<= 1) & 0x80 )  | 
908  | 0  |                 if ( (*utf++ & 0xc0) != 0x80 )  | 
909  | 0  |                     return(NULL);  | 
910  | 0  |         }  | 
911  | 0  |     }  | 
912  | 0  |     return((xmlChar *)utf);  | 
913  | 0  | }  | 
914  |  |  | 
915  |  | /**  | 
916  |  |  * xmlUTF8Strloc:  | 
917  |  |  * @utf:  the input UTF8 *  | 
918  |  |  * @utfchar:  the UTF8 character to be found  | 
919  |  |  *  | 
920  |  |  * a function to provide the relative location of a UTF8 char  | 
921  |  |  *  | 
922  |  |  * Returns the relative character position of the desired char  | 
923  |  |  * or -1 if not found  | 
924  |  |  */  | 
925  |  | int  | 
926  | 0  | xmlUTF8Strloc(const xmlChar *utf, const xmlChar *utfchar) { | 
927  | 0  |     size_t i;  | 
928  | 0  |     int size;  | 
929  | 0  |     int ch;  | 
930  |  | 
  | 
931  | 0  |     if (utf==NULL || utfchar==NULL) return -1;  | 
932  | 0  |     size = xmlUTF8Strsize(utfchar, 1);  | 
933  | 0  |         for(i=0; (ch=*utf) != 0; i++) { | 
934  | 0  |             if (xmlStrncmp(utf, utfchar, size)==0)  | 
935  | 0  |                 return(i > INT_MAX ? 0 : i);  | 
936  | 0  |             utf++;  | 
937  | 0  |             if ( ch & 0x80 ) { | 
938  |  |                 /* if not simple ascii, verify proper format */  | 
939  | 0  |                 if ( (ch & 0xc0) != 0xc0 )  | 
940  | 0  |                     return(-1);  | 
941  |  |                 /* then skip over remaining bytes for this char */  | 
942  | 0  |                 while ( (ch <<= 1) & 0x80 )  | 
943  | 0  |                     if ( (*utf++ & 0xc0) != 0x80 )  | 
944  | 0  |                         return(-1);  | 
945  | 0  |             }  | 
946  | 0  |         }  | 
947  |  |  | 
948  | 0  |     return(-1);  | 
949  | 0  | }  | 
950  |  | /**  | 
951  |  |  * xmlUTF8Strsub:  | 
952  |  |  * @utf:  a sequence of UTF-8 encoded bytes  | 
953  |  |  * @start: relative pos of first char  | 
954  |  |  * @len:   total number to copy  | 
955  |  |  *  | 
956  |  |  * Create a substring from a given UTF-8 string  | 
957  |  |  * Note:  positions are given in units of UTF-8 chars  | 
958  |  |  *  | 
959  |  |  * Returns a pointer to a newly created string  | 
960  |  |  * or NULL if any problem  | 
961  |  |  */  | 
962  |  |  | 
963  |  | xmlChar *  | 
964  | 3  | xmlUTF8Strsub(const xmlChar *utf, int start, int len) { | 
965  | 3  |     int i;  | 
966  | 3  |     int ch;  | 
967  |  |  | 
968  | 3  |     if (utf == NULL) return(NULL);  | 
969  | 3  |     if (start < 0) return(NULL);  | 
970  | 3  |     if (len < 0) return(NULL);  | 
971  |  |  | 
972  |  |     /*  | 
973  |  |      * Skip over any leading chars  | 
974  |  |      */  | 
975  | 3  |     for (i = 0;i < start;i++) { | 
976  | 0  |         if ((ch=*utf++) == 0) return(NULL);  | 
977  | 0  |         if ( ch & 0x80 ) { | 
978  |  |             /* if not simple ascii, verify proper format */  | 
979  | 0  |             if ( (ch & 0xc0) != 0xc0 )  | 
980  | 0  |                 return(NULL);  | 
981  |  |             /* then skip over remaining bytes for this char */  | 
982  | 0  |             while ( (ch <<= 1) & 0x80 )  | 
983  | 0  |                 if ( (*utf++ & 0xc0) != 0x80 )  | 
984  | 0  |                     return(NULL);  | 
985  | 0  |         }  | 
986  | 0  |     }  | 
987  |  |  | 
988  | 3  |     return(xmlUTF8Strndup(utf, len));  | 
989  | 3  | }  | 
990  |  |  | 
991  |  | /**  | 
992  |  |  * xmlEscapeFormatString:  | 
993  |  |  * @msg:  a pointer to the string in which to escape '%' characters.  | 
994  |  |  * Must be a heap-allocated buffer created by libxml2 that may be  | 
995  |  |  * returned, or that may be freed and replaced.  | 
996  |  |  *  | 
997  |  |  * Replaces the string pointed to by 'msg' with an escaped string.  | 
998  |  |  * Returns the same string with all '%' characters escaped.  | 
999  |  |  */  | 
1000  |  | xmlChar *  | 
1001  |  | xmlEscapeFormatString(xmlChar **msg)  | 
1002  | 0  | { | 
1003  | 0  |     xmlChar *msgPtr = NULL;  | 
1004  | 0  |     xmlChar *result = NULL;  | 
1005  | 0  |     xmlChar *resultPtr = NULL;  | 
1006  | 0  |     size_t count = 0;  | 
1007  | 0  |     size_t msgLen = 0;  | 
1008  | 0  |     size_t resultLen = 0;  | 
1009  |  | 
  | 
1010  | 0  |     if (!msg || !*msg)  | 
1011  | 0  |         return(NULL);  | 
1012  |  |  | 
1013  | 0  |     for (msgPtr = *msg; *msgPtr != '\0'; ++msgPtr) { | 
1014  | 0  |         ++msgLen;  | 
1015  | 0  |         if (*msgPtr == '%')  | 
1016  | 0  |             ++count;  | 
1017  | 0  |     }  | 
1018  |  | 
  | 
1019  | 0  |     if (count == 0)  | 
1020  | 0  |         return(*msg);  | 
1021  |  |  | 
1022  | 0  |     if ((count > INT_MAX) || (msgLen > INT_MAX - count))  | 
1023  | 0  |         return(NULL);  | 
1024  | 0  |     resultLen = msgLen + count + 1;  | 
1025  | 0  |     result = (xmlChar *) xmlMallocAtomic(resultLen);  | 
1026  | 0  |     if (result == NULL) { | 
1027  |  |         /* Clear *msg to prevent format string vulnerabilities in  | 
1028  |  |            out-of-memory situations. */  | 
1029  | 0  |         xmlFree(*msg);  | 
1030  | 0  |         *msg = NULL;  | 
1031  | 0  |         return(NULL);  | 
1032  | 0  |     }  | 
1033  |  |  | 
1034  | 0  |     for (msgPtr = *msg, resultPtr = result; *msgPtr != '\0'; ++msgPtr, ++resultPtr) { | 
1035  | 0  |         *resultPtr = *msgPtr;  | 
1036  | 0  |         if (*msgPtr == '%')  | 
1037  | 0  |             *(++resultPtr) = '%';  | 
1038  | 0  |     }  | 
1039  | 0  |     result[resultLen - 1] = '\0';  | 
1040  |  | 
  | 
1041  | 0  |     xmlFree(*msg);  | 
1042  | 0  |     *msg = result;  | 
1043  |  | 
  | 
1044  | 0  |     return *msg;  | 
1045  | 0  | }  | 
1046  |  |  |