Line  | Count  | Source  | 
1  |  | /*  | 
2  |  |  * string.c : an XML string utilities module  | 
3  |  |  *  | 
4  |  |  * This module provides various utility functions for manipulating  | 
5  |  |  * the xmlChar* type. All functions named xmlStr* have been moved here  | 
6  |  |  * from the parser.c file (their original home).  | 
7  |  |  *  | 
8  |  |  * See Copyright for the status of this software.  | 
9  |  |  *  | 
10  |  |  * UTF8 string routines from: William Brack  | 
11  |  |  *  | 
12  |  |  * Author: Daniel Veillard  | 
13  |  |  */  | 
14  |  |  | 
15  |  | #define IN_LIBXML  | 
16  |  | #include "libxml.h"  | 
17  |  |  | 
18  |  | #include <stdlib.h>  | 
19  |  | #include <string.h>  | 
20  |  | #include <limits.h>  | 
21  |  | #include <libxml/xmlmemory.h>  | 
22  |  | #include <libxml/parserInternals.h>  | 
23  |  | #include <libxml/xmlstring.h>  | 
24  |  |  | 
25  |  | #include "private/parser.h"  | 
26  |  | #include "private/string.h"  | 
27  |  |  | 
28  |  | #ifndef va_copy  | 
29  |  |   #ifdef __va_copy  | 
30  |  |     #define va_copy(dest, src) __va_copy(dest, src)  | 
31  |  |   #else  | 
32  |  |     #define va_copy(dest, src) memcpy(&(dest), &(src), sizeof(va_list))  | 
33  |  |   #endif  | 
34  |  | #endif  | 
35  |  |  | 
36  |  | /************************************************************************  | 
37  |  |  *                                                                      *  | 
38  |  |  *                Commodity functions to handle xmlChars                *  | 
39  |  |  *                                                                      *  | 
40  |  |  ************************************************************************/  | 
41  |  |  | 
42  |  | /**  | 
43  |  |  * a strndup for array of xmlChar's  | 
44  |  |  *  | 
45  |  |  * @param cur  the input xmlChar *  | 
46  |  |  * @param len  the len of `cur`  | 
47  |  |  * @returns a new xmlChar * or NULL  | 
48  |  |  */  | 
49  |  | xmlChar *  | 
50  | 1.15M  | xmlStrndup(const xmlChar *cur, int len) { | 
51  | 1.15M  |     xmlChar *ret;  | 
52  |  |  | 
53  | 1.15M  |     if ((cur == NULL) || (len < 0)) return(NULL);  | 
54  | 1.15M  |     ret = xmlMalloc((size_t) len + 1);  | 
55  | 1.15M  |     if (ret == NULL) { | 
56  | 0  |         return(NULL);  | 
57  | 0  |     }  | 
58  | 1.15M  |     memcpy(ret, cur, len);  | 
59  | 1.15M  |     ret[len] = 0;  | 
60  | 1.15M  |     return(ret);  | 
61  | 1.15M  | }  | 
62  |  |  | 
63  |  | /**  | 
64  |  |  * a strdup for array of xmlChar's. Since they are supposed to be  | 
65  |  |  * encoded in UTF-8 or an encoding with 8bit based chars, we assume  | 
66  |  |  * a termination mark of '0'.  | 
67  |  |  *  | 
68  |  |  * @param cur  the input xmlChar *  | 
69  |  |  * @returns a new xmlChar * or NULL  | 
70  |  |  */  | 
71  |  | xmlChar *  | 
72  | 712k  | xmlStrdup(const xmlChar *cur) { | 
73  | 712k  |     const xmlChar *p = cur;  | 
74  |  |  | 
75  | 712k  |     if (cur == NULL) return(NULL);  | 
76  | 316M  |     while (*p != 0) p++; /* non input consuming */  | 
77  | 712k  |     return(xmlStrndup(cur, p - cur));  | 
78  | 712k  | }  | 
79  |  |  | 
80  |  | /**  | 
81  |  |  * a strndup for char's to xmlChar's  | 
82  |  |  *  | 
83  |  |  * @param cur  the input char *  | 
84  |  |  * @param len  the len of `cur`  | 
85  |  |  * @returns a new xmlChar * or NULL  | 
86  |  |  */  | 
87  |  |  | 
88  |  | xmlChar *  | 
89  | 11.6k  | xmlCharStrndup(const char *cur, int len) { | 
90  | 11.6k  |     int i;  | 
91  | 11.6k  |     xmlChar *ret;  | 
92  |  |  | 
93  | 11.6k  |     if ((cur == NULL) || (len < 0)) return(NULL);  | 
94  | 11.6k  |     ret = xmlMalloc((size_t) len + 1);  | 
95  | 11.6k  |     if (ret == NULL) { | 
96  | 0  |         return(NULL);  | 
97  | 0  |     }  | 
98  | 51.1k  |     for (i = 0;i < len;i++) { | 
99  |  |         /* Explicit sign change */  | 
100  | 39.5k  |         ret[i] = (xmlChar) cur[i];  | 
101  | 39.5k  |         if (ret[i] == 0) return(ret);  | 
102  | 39.5k  |     }  | 
103  | 11.6k  |     ret[len] = 0;  | 
104  | 11.6k  |     return(ret);  | 
105  | 11.6k  | }  | 
106  |  |  | 
107  |  | /**  | 
108  |  |  * a strdup for char's to xmlChar's  | 
109  |  |  *  | 
110  |  |  * @param cur  the input char *  | 
111  |  |  * @returns a new xmlChar * or NULL  | 
112  |  |  */  | 
113  |  |  | 
114  |  | xmlChar *  | 
115  | 11.6k  | xmlCharStrdup(const char *cur) { | 
116  | 11.6k  |     const char *p = cur;  | 
117  |  |  | 
118  | 11.6k  |     if (cur == NULL) return(NULL);  | 
119  | 51.1k  |     while (*p != '\0') p++; /* non input consuming */  | 
120  | 11.6k  |     return(xmlCharStrndup(cur, p - cur));  | 
121  | 11.6k  | }  | 
122  |  |  | 
123  |  | /**  | 
124  |  |  * a strcmp for xmlChar's  | 
125  |  |  *  | 
126  |  |  * @param str1  the first xmlChar *  | 
127  |  |  * @param str2  the second xmlChar *  | 
128  |  |  * @returns the integer result of the comparison  | 
129  |  |  */  | 
130  |  |  | 
131  |  | int  | 
132  | 0  | xmlStrcmp(const xmlChar *str1, const xmlChar *str2) { | 
133  | 0  |     if (str1 == str2) return(0);  | 
134  | 0  |     if (str1 == NULL) return(-1);  | 
135  | 0  |     if (str2 == NULL) return(1);  | 
136  | 0  | #ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION  | 
137  | 0  |     return(strcmp((const char *)str1, (const char *)str2));  | 
138  |  | #else  | 
139  |  |     do { | 
140  |  |         int tmp = *str1++ - *str2;  | 
141  |  |         if (tmp != 0) return(tmp);  | 
142  |  |     } while (*str2++ != 0);  | 
143  |  |     return 0;  | 
144  |  | #endif  | 
145  | 0  | }  | 
146  |  |  | 
147  |  | /**  | 
148  |  |  * Check if both strings are equal of have same content.  | 
149  |  |  * Should be a bit more readable and faster than #xmlStrcmp  | 
150  |  |  *  | 
151  |  |  * @param str1  the first xmlChar *  | 
152  |  |  * @param str2  the second xmlChar *  | 
153  |  |  * @returns 1 if they are equal, 0 if they are different  | 
154  |  |  */  | 
155  |  |  | 
156  |  | int  | 
157  | 1.01M  | xmlStrEqual(const xmlChar *str1, const xmlChar *str2) { | 
158  | 1.01M  |     if (str1 == str2) return(1);  | 
159  | 993k  |     if (str1 == NULL) return(0);  | 
160  | 992k  |     if (str2 == NULL) return(0);  | 
161  | 992k  | #ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION  | 
162  | 992k  |     return(strcmp((const char *)str1, (const char *)str2) == 0);  | 
163  |  | #else  | 
164  |  |     do { | 
165  |  |         if (*str1++ != *str2) return(0);  | 
166  |  |     } while (*str2++);  | 
167  |  |     return(1);  | 
168  |  | #endif  | 
169  | 992k  | }  | 
170  |  |  | 
171  |  | /**  | 
172  |  |  * Check if a QName is Equal to a given string  | 
173  |  |  *  | 
174  |  |  * @param pref  the prefix of the QName  | 
175  |  |  * @param name  the localname of the QName  | 
176  |  |  * @param str  the second xmlChar *  | 
177  |  |  * @returns 1 if they are equal, 0 if they are different  | 
178  |  |  */  | 
179  |  |  | 
180  |  | int  | 
181  | 16.4k  | xmlStrQEqual(const xmlChar *pref, const xmlChar *name, const xmlChar *str) { | 
182  | 16.4k  |     if (pref == NULL) return(xmlStrEqual(name, str));  | 
183  | 868  |     if (name == NULL) return(0);  | 
184  | 868  |     if (str == NULL) return(0);  | 
185  |  |  | 
186  | 2.85k  |     do { | 
187  | 2.85k  |         if (*pref++ != *str) return(0);  | 
188  | 2.85k  |     } while ((*str++) && (*pref));  | 
189  | 868  |     if (*str++ != ':') return(0);  | 
190  | 2.29k  |     do { | 
191  | 2.29k  |         if (*name++ != *str) return(0);  | 
192  | 2.29k  |     } while (*str++);  | 
193  | 868  |     return(1);  | 
194  | 868  | }  | 
195  |  |  | 
196  |  | /**  | 
197  |  |  * a strncmp for xmlChar's  | 
198  |  |  *  | 
199  |  |  * @param str1  the first xmlChar *  | 
200  |  |  * @param str2  the second xmlChar *  | 
201  |  |  * @param len  the max comparison length  | 
202  |  |  * @returns the integer result of the comparison  | 
203  |  |  */  | 
204  |  |  | 
205  |  | int  | 
206  | 370  | xmlStrncmp(const xmlChar *str1, const xmlChar *str2, int len) { | 
207  | 370  |     if (len <= 0) return(0);  | 
208  | 370  |     if (str1 == str2) return(0);  | 
209  | 370  |     if (str1 == NULL) return(-1);  | 
210  | 370  |     if (str2 == NULL) return(1);  | 
211  | 370  | #ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION  | 
212  | 370  |     return(strncmp((const char *)str1, (const char *)str2, len));  | 
213  |  | #else  | 
214  |  |     do { | 
215  |  |         int tmp = *str1++ - *str2;  | 
216  |  |         if (tmp != 0 || --len == 0) return(tmp);  | 
217  |  |     } while (*str2++ != 0);  | 
218  |  |     return 0;  | 
219  |  | #endif  | 
220  | 370  | }  | 
221  |  |  | 
222  |  | static const xmlChar casemap[256] = { | 
223  |  |     0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,  | 
224  |  |     0x08,0x09,0x0A,0x0B,0x0C,0x0D,0x0E,0x0F,  | 
225  |  |     0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,  | 
226  |  |     0x18,0x19,0x1A,0x1B,0x1C,0x1D,0x1E,0x1F,  | 
227  |  |     0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,  | 
228  |  |     0x28,0x29,0x2A,0x2B,0x2C,0x2D,0x2E,0x2F,  | 
229  |  |     0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,  | 
230  |  |     0x38,0x39,0x3A,0x3B,0x3C,0x3D,0x3E,0x3F,  | 
231  |  |     0x40,0x61,0x62,0x63,0x64,0x65,0x66,0x67,  | 
232  |  |     0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,  | 
233  |  |     0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,  | 
234  |  |     0x78,0x79,0x7A,0x7B,0x5C,0x5D,0x5E,0x5F,  | 
235  |  |     0x60,0x61,0x62,0x63,0x64,0x65,0x66,0x67,  | 
236  |  |     0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,  | 
237  |  |     0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,  | 
238  |  |     0x78,0x79,0x7A,0x7B,0x7C,0x7D,0x7E,0x7F,  | 
239  |  |     0x80,0x81,0x82,0x83,0x84,0x85,0x86,0x87,  | 
240  |  |     0x88,0x89,0x8A,0x8B,0x8C,0x8D,0x8E,0x8F,  | 
241  |  |     0x90,0x91,0x92,0x93,0x94,0x95,0x96,0x97,  | 
242  |  |     0x98,0x99,0x9A,0x9B,0x9C,0x9D,0x9E,0x9F,  | 
243  |  |     0xA0,0xA1,0xA2,0xA3,0xA4,0xA5,0xA6,0xA7,  | 
244  |  |     0xA8,0xA9,0xAA,0xAB,0xAC,0xAD,0xAE,0xAF,  | 
245  |  |     0xB0,0xB1,0xB2,0xB3,0xB4,0xB5,0xB6,0xB7,  | 
246  |  |     0xB8,0xB9,0xBA,0xBB,0xBC,0xBD,0xBE,0xBF,  | 
247  |  |     0xC0,0xC1,0xC2,0xC3,0xC4,0xC5,0xC6,0xC7,  | 
248  |  |     0xC8,0xC9,0xCA,0xCB,0xCC,0xCD,0xCE,0xCF,  | 
249  |  |     0xD0,0xD1,0xD2,0xD3,0xD4,0xD5,0xD6,0xD7,  | 
250  |  |     0xD8,0xD9,0xDA,0xDB,0xDC,0xDD,0xDE,0xDF,  | 
251  |  |     0xE0,0xE1,0xE2,0xE3,0xE4,0xE5,0xE6,0xE7,  | 
252  |  |     0xE8,0xE9,0xEA,0xEB,0xEC,0xED,0xEE,0xEF,  | 
253  |  |     0xF0,0xF1,0xF2,0xF3,0xF4,0xF5,0xF6,0xF7,  | 
254  |  |     0xF8,0xF9,0xFA,0xFB,0xFC,0xFD,0xFE,0xFF  | 
255  |  | };  | 
256  |  |  | 
257  |  | /**  | 
258  |  |  * a strcasecmp for xmlChar's  | 
259  |  |  *  | 
260  |  |  * @param str1  the first xmlChar *  | 
261  |  |  * @param str2  the second xmlChar *  | 
262  |  |  * @returns the integer result of the comparison  | 
263  |  |  */  | 
264  |  |  | 
265  |  | int  | 
266  | 9.96k  | xmlStrcasecmp(const xmlChar *str1, const xmlChar *str2) { | 
267  | 9.96k  |     register int tmp;  | 
268  |  |  | 
269  | 9.96k  |     if (str1 == str2) return(0);  | 
270  | 9.96k  |     if (str1 == NULL) return(-1);  | 
271  | 9.96k  |     if (str2 == NULL) return(1);  | 
272  | 32.3k  |     do { | 
273  | 32.3k  |         tmp = casemap[*str1++] - casemap[*str2];  | 
274  | 32.3k  |         if (tmp != 0) return(tmp);  | 
275  | 32.3k  |     } while (*str2++ != 0);  | 
276  | 906  |     return 0;  | 
277  | 9.96k  | }  | 
278  |  |  | 
279  |  | /**  | 
280  |  |  * a strncasecmp for xmlChar's  | 
281  |  |  *  | 
282  |  |  * @param str1  the first xmlChar *  | 
283  |  |  * @param str2  the second xmlChar *  | 
284  |  |  * @param len  the max comparison length  | 
285  |  |  * @returns the integer result of the comparison  | 
286  |  |  */  | 
287  |  |  | 
288  |  | int  | 
289  | 0  | xmlStrncasecmp(const xmlChar *str1, const xmlChar *str2, int len) { | 
290  | 0  |     register int tmp;  | 
291  |  | 
  | 
292  | 0  |     if (len <= 0) return(0);  | 
293  | 0  |     if (str1 == str2) return(0);  | 
294  | 0  |     if (str1 == NULL) return(-1);  | 
295  | 0  |     if (str2 == NULL) return(1);  | 
296  | 0  |     do { | 
297  | 0  |         tmp = casemap[*str1++] - casemap[*str2];  | 
298  | 0  |         if (tmp != 0 || --len == 0) return(tmp);  | 
299  | 0  |     } while (*str2++ != 0);  | 
300  | 0  |     return 0;  | 
301  | 0  | }  | 
302  |  |  | 
303  |  | /**  | 
304  |  |  * a strchr for xmlChar's  | 
305  |  |  *  | 
306  |  |  * @param str  the xmlChar * array  | 
307  |  |  * @param val  the xmlChar to search  | 
308  |  |  * @returns the xmlChar * for the first occurrence or NULL.  | 
309  |  |  */  | 
310  |  |  | 
311  |  | const xmlChar *  | 
312  | 42.2k  | xmlStrchr(const xmlChar *str, xmlChar val) { | 
313  | 42.2k  |     if (str == NULL) return(NULL);  | 
314  | 7.19M  |     while (*str != 0) { /* non input consuming */ | 
315  | 7.15M  |         if (*str == val) return((xmlChar *) str);  | 
316  | 7.15M  |         str++;  | 
317  | 7.15M  |     }  | 
318  | 38.4k  |     return(NULL);  | 
319  | 42.2k  | }  | 
320  |  |  | 
321  |  | /**  | 
322  |  |  * a strstr for xmlChar's  | 
323  |  |  *  | 
324  |  |  * @param str  the xmlChar * array (haystack)  | 
325  |  |  * @param val  the xmlChar to search (needle)  | 
326  |  |  * @returns the xmlChar * for the first occurrence or NULL.  | 
327  |  |  */  | 
328  |  |  | 
329  |  | const xmlChar *  | 
330  | 0  | xmlStrstr(const xmlChar *str, const xmlChar *val) { | 
331  | 0  |     int n;  | 
332  |  | 
  | 
333  | 0  |     if (str == NULL) return(NULL);  | 
334  | 0  |     if (val == NULL) return(NULL);  | 
335  | 0  |     n = xmlStrlen(val);  | 
336  |  | 
  | 
337  | 0  |     if (n == 0) return(str);  | 
338  | 0  |     while (*str != 0) { /* non input consuming */ | 
339  | 0  |         if (*str == *val) { | 
340  | 0  |             if (!xmlStrncmp(str, val, n)) return((const xmlChar *) str);  | 
341  | 0  |         }  | 
342  | 0  |         str++;  | 
343  | 0  |     }  | 
344  | 0  |     return(NULL);  | 
345  | 0  | }  | 
346  |  |  | 
347  |  | /**  | 
348  |  |  * a case-ignoring strstr for xmlChar's  | 
349  |  |  *  | 
350  |  |  * @param str  the xmlChar * array (haystack)  | 
351  |  |  * @param val  the xmlChar to search (needle)  | 
352  |  |  * @returns the xmlChar * for the first occurrence or NULL.  | 
353  |  |  */  | 
354  |  |  | 
355  |  | const xmlChar *  | 
356  | 0  | xmlStrcasestr(const xmlChar *str, const xmlChar *val) { | 
357  | 0  |     int n;  | 
358  |  | 
  | 
359  | 0  |     if (str == NULL) return(NULL);  | 
360  | 0  |     if (val == NULL) return(NULL);  | 
361  | 0  |     n = xmlStrlen(val);  | 
362  |  | 
  | 
363  | 0  |     if (n == 0) return(str);  | 
364  | 0  |     while (*str != 0) { /* non input consuming */ | 
365  | 0  |         if (casemap[*str] == casemap[*val])  | 
366  | 0  |             if (!xmlStrncasecmp(str, val, n)) return(str);  | 
367  | 0  |         str++;  | 
368  | 0  |     }  | 
369  | 0  |     return(NULL);  | 
370  | 0  | }  | 
371  |  |  | 
372  |  | /**  | 
373  |  |  * Extract a substring of a given string  | 
374  |  |  *  | 
375  |  |  * @param str  the xmlChar * array (haystack)  | 
376  |  |  * @param start  the index of the first char (zero based)  | 
377  |  |  * @param len  the length of the substring  | 
378  |  |  * @returns the xmlChar * for the first occurrence or NULL.  | 
379  |  |  */  | 
380  |  |  | 
381  |  | xmlChar *  | 
382  | 0  | xmlStrsub(const xmlChar *str, int start, int len) { | 
383  | 0  |     int i;  | 
384  |  | 
  | 
385  | 0  |     if (str == NULL) return(NULL);  | 
386  | 0  |     if (start < 0) return(NULL);  | 
387  | 0  |     if (len < 0) return(NULL);  | 
388  |  |  | 
389  | 0  |     for (i = 0;i < start;i++) { | 
390  | 0  |         if (*str == 0) return(NULL);  | 
391  | 0  |         str++;  | 
392  | 0  |     }  | 
393  | 0  |     if (*str == 0) return(NULL);  | 
394  | 0  |     return(xmlStrndup(str, len));  | 
395  | 0  | }  | 
396  |  |  | 
397  |  | /**  | 
398  |  |  * length of a xmlChar's string  | 
399  |  |  *  | 
400  |  |  * @param str  the xmlChar * array  | 
401  |  |  * @returns the number of xmlChar contained in the ARRAY.  | 
402  |  |  */  | 
403  |  |  | 
404  |  | int  | 
405  | 254k  | xmlStrlen(const xmlChar *str) { | 
406  | 254k  |     size_t len = str ? strlen((const char *)str) : 0;  | 
407  | 254k  |     return(len > INT_MAX ? 0 : len);  | 
408  | 254k  | }  | 
409  |  |  | 
410  |  | /**  | 
411  |  |  * a strncat for array of xmlChar's, it will extend `cur` with the len  | 
412  |  |  * first bytes of `add`. Note that if `len` < 0 then this is an API error  | 
413  |  |  * and NULL will be returned.  | 
414  |  |  *  | 
415  |  |  * @param cur  the original xmlChar * array  | 
416  |  |  * @param add  the xmlChar * array added  | 
417  |  |  * @param len  the length of `add`  | 
418  |  |  * @returns a new xmlChar *, the original `cur` is reallocated and should  | 
419  |  |  * not be freed.  | 
420  |  |  */  | 
421  |  |  | 
422  |  | xmlChar *  | 
423  | 0  | xmlStrncat(xmlChar *cur, const xmlChar *add, int len) { | 
424  | 0  |     int size;  | 
425  | 0  |     xmlChar *ret;  | 
426  |  | 
  | 
427  | 0  |     if ((add == NULL) || (len == 0))  | 
428  | 0  |         return(cur);  | 
429  | 0  |     if (len < 0)  | 
430  | 0  |   return(NULL);  | 
431  | 0  |     if (cur == NULL)  | 
432  | 0  |         return(xmlStrndup(add, len));  | 
433  |  |  | 
434  | 0  |     size = xmlStrlen(cur);  | 
435  | 0  |     if ((size < 0) || (size > INT_MAX - len))  | 
436  | 0  |         return(NULL);  | 
437  | 0  |     ret = (xmlChar *) xmlRealloc(cur, (size_t) size + len + 1);  | 
438  | 0  |     if (ret == NULL) { | 
439  | 0  |         xmlFree(cur);  | 
440  | 0  |         return(NULL);  | 
441  | 0  |     }  | 
442  | 0  |     memcpy(&ret[size], add, len);  | 
443  | 0  |     ret[size + len] = 0;  | 
444  | 0  |     return(ret);  | 
445  | 0  | }  | 
446  |  |  | 
447  |  | /**  | 
448  |  |  * same as #xmlStrncat, but creates a new string.  The original  | 
449  |  |  * two strings are not freed. If `len` is < 0 then the length  | 
450  |  |  * will be calculated automatically.  | 
451  |  |  *  | 
452  |  |  * @param str1  first xmlChar string  | 
453  |  |  * @param str2  second xmlChar string  | 
454  |  |  * @param len  the len of `str2` or < 0  | 
455  |  |  * @returns a new xmlChar * or NULL  | 
456  |  |  */  | 
457  |  | xmlChar *  | 
458  | 0  | xmlStrncatNew(const xmlChar *str1, const xmlChar *str2, int len) { | 
459  | 0  |     int size;  | 
460  | 0  |     xmlChar *ret;  | 
461  |  | 
  | 
462  | 0  |     if (len < 0) { | 
463  | 0  |         len = xmlStrlen(str2);  | 
464  | 0  |         if (len < 0)  | 
465  | 0  |             return(NULL);  | 
466  | 0  |     }  | 
467  | 0  |     if (str1 == NULL)  | 
468  | 0  |         return(xmlStrndup(str2, len));  | 
469  | 0  |     if ((str2 == NULL) || (len == 0))  | 
470  | 0  |         return(xmlStrdup(str1));  | 
471  |  |  | 
472  | 0  |     size = xmlStrlen(str1);  | 
473  | 0  |     if ((size < 0) || (size > INT_MAX - len))  | 
474  | 0  |         return(NULL);  | 
475  | 0  |     ret = (xmlChar *) xmlMalloc((size_t) size + len + 1);  | 
476  | 0  |     if (ret == NULL)  | 
477  | 0  |         return(NULL);  | 
478  | 0  |     memcpy(ret, str1, size);  | 
479  | 0  |     memcpy(&ret[size], str2, len);  | 
480  | 0  |     ret[size + len] = 0;  | 
481  | 0  |     return(ret);  | 
482  | 0  | }  | 
483  |  |  | 
484  |  | /**  | 
485  |  |  * a strcat for array of xmlChar's. Since they are supposed to be  | 
486  |  |  * encoded in UTF-8 or an encoding with 8bit based chars, we assume  | 
487  |  |  * a termination mark of '0'.  | 
488  |  |  *  | 
489  |  |  * @param cur  the original xmlChar * array  | 
490  |  |  * @param add  the xmlChar * array added  | 
491  |  |  * @returns a new xmlChar * containing the concatenated string. The original  | 
492  |  |  * `cur` is reallocated and should not be freed.  | 
493  |  |  */  | 
494  |  | xmlChar *  | 
495  | 0  | xmlStrcat(xmlChar *cur, const xmlChar *add) { | 
496  | 0  |     const xmlChar *p = add;  | 
497  |  | 
  | 
498  | 0  |     if (add == NULL) return(cur);  | 
499  | 0  |     if (cur == NULL)  | 
500  | 0  |         return(xmlStrdup(add));  | 
501  |  |  | 
502  | 0  |     while (*p != 0) p++; /* non input consuming */  | 
503  | 0  |     return(xmlStrncat(cur, add, p - add));  | 
504  | 0  | }  | 
505  |  |  | 
506  |  | /**  | 
507  |  |  * Formats `msg` and places result into `buf`.  | 
508  |  |  *  | 
509  |  |  * @param buf  the result buffer.  | 
510  |  |  * @param len  the result buffer length.  | 
511  |  |  * @param msg  the message with printf formatting.  | 
512  |  |  * @param ...   extra parameters for the message.  | 
513  |  |  * @returns the number of characters written to `buf` or -1 if an error occurs.  | 
514  |  |  */  | 
515  |  | int  | 
516  | 0  | xmlStrPrintf(xmlChar *buf, int len, const char *msg, ...) { | 
517  | 0  |     va_list args;  | 
518  | 0  |     int ret;  | 
519  |  | 
  | 
520  | 0  |     if((buf == NULL) || (msg == NULL)) { | 
521  | 0  |         return(-1);  | 
522  | 0  |     }  | 
523  |  |  | 
524  | 0  |     va_start(args, msg);  | 
525  | 0  |     ret = vsnprintf((char *) buf, len, (const char *) msg, args);  | 
526  | 0  |     va_end(args);  | 
527  | 0  |     buf[len - 1] = 0; /* be safe ! */  | 
528  |  | 
  | 
529  | 0  |     return(ret);  | 
530  | 0  | }  | 
531  |  |  | 
532  |  | /**  | 
533  |  |  * Formats `msg` and places result into `buf`.  | 
534  |  |  *  | 
535  |  |  * @param buf  the result buffer.  | 
536  |  |  * @param len  the result buffer length.  | 
537  |  |  * @param msg  the message with printf formatting.  | 
538  |  |  * @param ap  extra parameters for the message.  | 
539  |  |  * @returns the number of characters written to `buf` or -1 if an error occurs.  | 
540  |  |  */  | 
541  |  | int  | 
542  | 12.5k  | xmlStrVPrintf(xmlChar *buf, int len, const char *msg, va_list ap) { | 
543  | 12.5k  |     int ret;  | 
544  |  |  | 
545  | 12.5k  |     if((buf == NULL) || (msg == NULL)) { | 
546  | 0  |         return(-1);  | 
547  | 0  |     }  | 
548  |  |  | 
549  | 12.5k  |     ret = vsnprintf((char *) buf, len, (const char *) msg, ap);  | 
550  | 12.5k  |     buf[len - 1] = 0; /* be safe ! */  | 
551  |  |  | 
552  | 12.5k  |     return(ret);  | 
553  | 12.5k  | }  | 
554  |  |  | 
555  |  | /**  | 
556  |  |  * Creates a newly allocated string according to format.  | 
557  |  |  *  | 
558  |  |  * @param out  pointer to the resulting string  | 
559  |  |  * @param maxSize  maximum size of the output buffer  | 
560  |  |  * @param msg  printf format string  | 
561  |  |  * @param ap  arguments for format string  | 
562  |  |  * @returns 0 on success, 1 if the result was truncated or on other  | 
563  |  |  * errors, -1 if a memory allocation failed.  | 
564  |  |  */  | 
565  |  | int  | 
566  | 483k  | xmlStrVASPrintf(xmlChar **out, int maxSize, const char *msg, va_list ap) { | 
567  | 483k  |     char empty[1];  | 
568  | 483k  |     va_list copy;  | 
569  | 483k  |     xmlChar *buf;  | 
570  | 483k  |     int res, size;  | 
571  | 483k  |     int truncated = 0;  | 
572  |  |  | 
573  | 483k  |     if (out == NULL)  | 
574  | 0  |         return(1);  | 
575  | 483k  |     *out = NULL;  | 
576  | 483k  |     if (msg == NULL)  | 
577  | 0  |         return(1);  | 
578  | 483k  |     if (maxSize < 32)  | 
579  | 0  |         maxSize = 32;  | 
580  |  |  | 
581  | 483k  |     va_copy(copy, ap);  | 
582  | 483k  |     res = vsnprintf(empty, 1, msg, copy);  | 
583  | 483k  |     va_end(copy);  | 
584  |  |  | 
585  | 483k  |     if (res > 0) { | 
586  |  |         /* snprintf seems to work according to C99. */  | 
587  |  |  | 
588  | 483k  |         if (res < maxSize) { | 
589  | 483k  |             size = res + 1;  | 
590  | 483k  |         } else { | 
591  | 292  |             size = maxSize;  | 
592  | 292  |             truncated = 1;  | 
593  | 292  |         }  | 
594  | 483k  |         buf = xmlMalloc(size);  | 
595  | 483k  |         if (buf == NULL)  | 
596  | 0  |             return(-1);  | 
597  | 483k  |         if (vsnprintf((char *) buf, size, msg, ap) < 0) { | 
598  | 0  |             xmlFree(buf);  | 
599  | 0  |             return(1);  | 
600  | 0  |         }  | 
601  | 483k  |     } else { | 
602  |  |         /*  | 
603  |  |          * Unfortunately, older snprintf implementations don't follow the  | 
604  |  |          * C99 spec. If the output exceeds the size of the buffer, they can  | 
605  |  |          * return -1, 0 or the number of characters written instead of the  | 
606  |  |          * needed size. Older MSCVRT also won't write a terminating null  | 
607  |  |          * byte if the buffer is too small.  | 
608  |  |          *  | 
609  |  |          * If the value returned is non-negative and strictly less than  | 
610  |  |          * the buffer size (without terminating null), the result should  | 
611  |  |          * have been written completely, so we double the buffer size  | 
612  |  |          * until this condition is true. This assumes that snprintf will  | 
613  |  |          * eventually return a non-negative value. Otherwise, we will  | 
614  |  |          * allocate more and more memory until we run out.  | 
615  |  |          *  | 
616  |  |          * Note that this code path is also executed on conforming  | 
617  |  |          * platforms if the output is the empty string.  | 
618  |  |          */  | 
619  |  | 
  | 
620  | 0  |         buf = NULL;  | 
621  | 0  |         size = 32;  | 
622  | 0  |         while (1) { | 
623  | 0  |             buf = xmlMalloc(size);  | 
624  | 0  |             if (buf == NULL)  | 
625  | 0  |                 return(-1);  | 
626  |  |  | 
627  | 0  |             va_copy(copy, ap);  | 
628  | 0  |             res = vsnprintf((char *) buf, size, msg, copy);  | 
629  | 0  |             va_end(copy);  | 
630  | 0  |             if ((res >= 0) && (res < size - 1))  | 
631  | 0  |                 break;  | 
632  |  |  | 
633  | 0  |             if (size >= maxSize) { | 
634  | 0  |                 truncated = 1;  | 
635  | 0  |                 break;  | 
636  | 0  |             }  | 
637  |  |  | 
638  | 0  |             xmlFree(buf);  | 
639  |  | 
  | 
640  | 0  |             if (size > maxSize / 2)  | 
641  | 0  |                 size = maxSize;  | 
642  | 0  |             else  | 
643  | 0  |                 size *= 2;  | 
644  | 0  |         }  | 
645  | 0  |     }  | 
646  |  |  | 
647  |  |     /*  | 
648  |  |      * If the output was truncated, make sure that the buffer doesn't  | 
649  |  |      * end with a truncated UTF-8 sequence.  | 
650  |  |      */  | 
651  | 483k  |     if (truncated != 0) { | 
652  | 292  |         int i = size - 1;  | 
653  |  |  | 
654  | 439  |         while (i > 0) { | 
655  |  |             /* Break after ASCII */  | 
656  | 439  |             if (buf[i-1] < 0x80)  | 
657  | 134  |                 break;  | 
658  | 305  |             i -= 1;  | 
659  |  |             /* Break before non-ASCII */  | 
660  | 305  |             if (buf[i] >= 0xc0)  | 
661  | 158  |                 break;  | 
662  | 305  |         }  | 
663  |  |  | 
664  | 292  |         buf[i] = 0;  | 
665  | 292  |     }  | 
666  |  |  | 
667  | 483k  |     *out = (xmlChar *) buf;  | 
668  | 483k  |     return(truncated);  | 
669  | 483k  | }  | 
670  |  |  | 
671  |  | /**  | 
672  |  |  * See xmlStrVASPrintf.  | 
673  |  |  *  | 
674  |  |  * @param out  pointer to the resulting string  | 
675  |  |  * @param maxSize  maximum size of the output buffer  | 
676  |  |  * @param msg  printf format string  | 
677  |  |  * @param ...  arguments for format string  | 
678  |  |  * @returns 0 on success, 1 if the result was truncated or on other  | 
679  |  |  * errors, -1 if a memory allocation failed.  | 
680  |  |  */  | 
681  |  | int  | 
682  | 0  | xmlStrASPrintf(xmlChar **out, int maxSize, const char *msg, ...) { | 
683  | 0  |     va_list ap;  | 
684  | 0  |     int ret;  | 
685  |  | 
  | 
686  | 0  |     va_start(ap, msg);  | 
687  | 0  |     ret = xmlStrVASPrintf(out, maxSize, msg, ap);  | 
688  | 0  |     va_end(ap);  | 
689  |  | 
  | 
690  | 0  |     return(ret);  | 
691  | 0  | }  | 
692  |  |  | 
693  |  | /************************************************************************  | 
694  |  |  *                                                                      *  | 
695  |  |  *              Generic UTF8 handling routines                          *  | 
696  |  |  *                                                                      *  | 
697  |  |  * From rfc2044: encoding of the Unicode values on UTF-8:               *  | 
698  |  |  *                                                                      *  | 
699  |  |  * UCS-4 range (hex.)           UTF-8 octet sequence (binary)           *  | 
700  |  |  * 0000 0000-0000 007F   0xxxxxxx                                       *  | 
701  |  |  * 0000 0080-0000 07FF   110xxxxx 10xxxxxx                              *  | 
702  |  |  * 0000 0800-0000 FFFF   1110xxxx 10xxxxxx 10xxxxxx                     *  | 
703  |  |  *                                                                      *  | 
704  |  |  * I hope we won't use values > 0xFFFF anytime soon !                   *  | 
705  |  |  *                                                                      *  | 
706  |  |  ************************************************************************/  | 
707  |  |  | 
708  |  |  | 
709  |  | /**  | 
710  |  |  * calculates the internal size of a UTF8 character  | 
711  |  |  *  | 
712  |  |  * @param utf  pointer to the UTF8 character  | 
713  |  |  * @returns the numbers of bytes in the character, -1 on format error  | 
714  |  |  */  | 
715  |  | int  | 
716  | 0  | xmlUTF8Size(const xmlChar *utf) { | 
717  | 0  |     xmlChar mask;  | 
718  | 0  |     int len;  | 
719  |  | 
  | 
720  | 0  |     if (utf == NULL)  | 
721  | 0  |         return -1;  | 
722  | 0  |     if (*utf < 0x80)  | 
723  | 0  |         return 1;  | 
724  |  |     /* check valid UTF8 character */  | 
725  | 0  |     if (!(*utf & 0x40))  | 
726  | 0  |         return -1;  | 
727  |  |     /* determine number of bytes in char */  | 
728  | 0  |     len = 2;  | 
729  | 0  |     for (mask=0x20; mask != 0; mask>>=1) { | 
730  | 0  |         if (!(*utf & mask))  | 
731  | 0  |             return len;  | 
732  | 0  |         len++;  | 
733  | 0  |     }  | 
734  | 0  |     return -1;  | 
735  | 0  | }  | 
736  |  |  | 
737  |  | /**  | 
738  |  |  * compares the two UCS4 values  | 
739  |  |  *  | 
740  |  |  * @param utf1  pointer to first UTF8 char  | 
741  |  |  * @param utf2  pointer to second UTF8 char  | 
742  |  |  * @returns result of the compare as with #xmlStrncmp  | 
743  |  |  */  | 
744  |  | int  | 
745  | 0  | xmlUTF8Charcmp(const xmlChar *utf1, const xmlChar *utf2) { | 
746  |  | 
  | 
747  | 0  |     if (utf1 == NULL ) { | 
748  | 0  |         if (utf2 == NULL)  | 
749  | 0  |             return 0;  | 
750  | 0  |         return -1;  | 
751  | 0  |     }  | 
752  | 0  |     return xmlStrncmp(utf1, utf2, xmlUTF8Size(utf1));  | 
753  | 0  | }  | 
754  |  |  | 
755  |  | /**  | 
756  |  |  * compute the length of an UTF8 string, it doesn't do a full UTF8  | 
757  |  |  * checking of the content of the string.  | 
758  |  |  *  | 
759  |  |  * @param utf  a sequence of UTF-8 encoded bytes  | 
760  |  |  * @returns the number of characters in the string or -1 in case of error  | 
761  |  |  */  | 
762  |  | int  | 
763  | 0  | xmlUTF8Strlen(const xmlChar *utf) { | 
764  | 0  |     size_t ret = 0;  | 
765  |  | 
  | 
766  | 0  |     if (utf == NULL)  | 
767  | 0  |         return(-1);  | 
768  |  |  | 
769  | 0  |     while (*utf != 0) { | 
770  | 0  |         if (utf[0] & 0x80) { | 
771  | 0  |             if ((utf[1] & 0xc0) != 0x80)  | 
772  | 0  |                 return(-1);  | 
773  | 0  |             if ((utf[0] & 0xe0) == 0xe0) { | 
774  | 0  |                 if ((utf[2] & 0xc0) != 0x80)  | 
775  | 0  |                     return(-1);  | 
776  | 0  |                 if ((utf[0] & 0xf0) == 0xf0) { | 
777  | 0  |                     if ((utf[0] & 0xf8) != 0xf0 || (utf[3] & 0xc0) != 0x80)  | 
778  | 0  |                         return(-1);  | 
779  | 0  |                     utf += 4;  | 
780  | 0  |                 } else { | 
781  | 0  |                     utf += 3;  | 
782  | 0  |                 }  | 
783  | 0  |             } else { | 
784  | 0  |                 utf += 2;  | 
785  | 0  |             }  | 
786  | 0  |         } else { | 
787  | 0  |             utf++;  | 
788  | 0  |         }  | 
789  | 0  |         ret++;  | 
790  | 0  |     }  | 
791  | 0  |     return(ret > INT_MAX ? 0 : ret);  | 
792  | 0  | }  | 
793  |  |  | 
794  |  | /**  | 
795  |  |  * Read the first UTF8 character from `utf`  | 
796  |  |  *  | 
797  |  |  * @param utf  a sequence of UTF-8 encoded bytes  | 
798  |  |  * @param len  a pointer to the minimum number of bytes present in  | 
799  |  |  *        the sequence.  This is used to assure the next character  | 
800  |  |  *        is completely contained within the sequence.  | 
801  |  |  * @returns the char value or -1 in case of error, and sets *len to  | 
802  |  |  *        the actual number of bytes consumed (0 in case of error)  | 
803  |  |  */  | 
804  |  | int  | 
805  | 14.2M  | xmlGetUTF8Char(const unsigned char *utf, int *len) { | 
806  | 14.2M  |     unsigned int c;  | 
807  |  |  | 
808  | 14.2M  |     if (utf == NULL)  | 
809  | 0  |         goto error;  | 
810  | 14.2M  |     if (len == NULL)  | 
811  | 0  |         goto error;  | 
812  |  |  | 
813  | 14.2M  |     c = utf[0];  | 
814  | 14.2M  |     if (c < 0x80) { | 
815  | 11.7M  |         if (*len < 1)  | 
816  | 0  |             goto error;  | 
817  |  |         /* 1-byte code */  | 
818  | 11.7M  |         *len = 1;  | 
819  | 11.7M  |     } else { | 
820  | 2.51M  |         if ((*len < 2) || ((utf[1] & 0xc0) != 0x80))  | 
821  | 52.1k  |             goto error;  | 
822  | 2.46M  |         if (c < 0xe0) { | 
823  | 1.08M  |             if (c < 0xc2)  | 
824  | 6.98k  |                 goto error;  | 
825  |  |             /* 2-byte code */  | 
826  | 1.07M  |             *len = 2;  | 
827  | 1.07M  |             c = (c & 0x1f) << 6;  | 
828  | 1.07M  |             c |= utf[1] & 0x3f;  | 
829  | 1.37M  |         } else { | 
830  | 1.37M  |             if ((*len < 3) || ((utf[2] & 0xc0) != 0x80))  | 
831  | 2.58k  |                 goto error;  | 
832  | 1.37M  |             if (c < 0xf0) { | 
833  |  |                 /* 3-byte code */  | 
834  | 1.36M  |                 *len = 3;  | 
835  | 1.36M  |                 c = (c & 0xf) << 12;  | 
836  | 1.36M  |                 c |= (utf[1] & 0x3f) << 6;  | 
837  | 1.36M  |                 c |= utf[2] & 0x3f;  | 
838  | 1.36M  |                 if ((c < 0x800) || ((c >= 0xd800) && (c < 0xe000)))  | 
839  | 565  |                     goto error;  | 
840  | 1.36M  |             } else { | 
841  | 10.7k  |                 if ((*len < 4) || ((utf[3] & 0xc0) != 0x80))  | 
842  | 385  |                     goto error;  | 
843  | 10.3k  |                 *len = 4;  | 
844  |  |                 /* 4-byte code */  | 
845  | 10.3k  |                 c = (c & 0x7) << 18;  | 
846  | 10.3k  |                 c |= (utf[1] & 0x3f) << 12;  | 
847  | 10.3k  |                 c |= (utf[2] & 0x3f) << 6;  | 
848  | 10.3k  |                 c |= utf[3] & 0x3f;  | 
849  | 10.3k  |                 if ((c < 0x10000) || (c >= 0x110000))  | 
850  | 855  |                     goto error;  | 
851  | 10.3k  |             }  | 
852  | 1.37M  |         }  | 
853  | 2.46M  |     }  | 
854  | 14.2M  |     return(c);  | 
855  |  |  | 
856  | 63.5k  | error:  | 
857  | 63.5k  |     if (len != NULL)  | 
858  | 63.5k  |   *len = 0;  | 
859  | 63.5k  |     return(-1);  | 
860  | 14.2M  | }  | 
861  |  |  | 
862  |  | /**  | 
863  |  |  * Checks `utf` for being valid UTF-8. `utf` is assumed to be  | 
864  |  |  * null-terminated. This function is not super-strict, as it will  | 
865  |  |  * allow longer UTF-8 sequences than necessary. Note that Java is  | 
866  |  |  * capable of producing these sequences if provoked. Also note, this  | 
867  |  |  * routine checks for the 4-byte maximum size, but does not check for  | 
868  |  |  * 0x10ffff maximum value.  | 
869  |  |  *  | 
870  |  |  * @param utf  Pointer to putative UTF-8 encoded string.  | 
871  |  |  * @returns value: true if `utf` is valid.  | 
872  |  |  **/  | 
873  |  | int  | 
874  |  | xmlCheckUTF8(const unsigned char *utf)  | 
875  | 0  | { | 
876  | 0  |     int ix;  | 
877  | 0  |     unsigned char c;  | 
878  |  | 
  | 
879  | 0  |     if (utf == NULL)  | 
880  | 0  |         return(0);  | 
881  |  |     /*  | 
882  |  |      * utf is a string of 1, 2, 3 or 4 bytes.  The valid strings  | 
883  |  |      * are as follows (in "bit format"):  | 
884  |  |      *    0xxxxxxx                                      valid 1-byte  | 
885  |  |      *    110xxxxx 10xxxxxx                             valid 2-byte  | 
886  |  |      *    1110xxxx 10xxxxxx 10xxxxxx                    valid 3-byte  | 
887  |  |      *    11110xxx 10xxxxxx 10xxxxxx 10xxxxxx           valid 4-byte  | 
888  |  |      */  | 
889  | 0  |     while ((c = utf[0])) {      /* string is 0-terminated */ | 
890  | 0  |         ix = 0;  | 
891  | 0  |         if ((c & 0x80) == 0x00) { /* 1-byte code, starts with 10 */ | 
892  | 0  |             ix = 1;  | 
893  | 0  |   } else if ((c & 0xe0) == 0xc0) {/* 2-byte code, starts with 110 */ | 
894  | 0  |       if ((utf[1] & 0xc0 ) != 0x80)  | 
895  | 0  |           return 0;  | 
896  | 0  |       ix = 2;  | 
897  | 0  |   } else if ((c & 0xf0) == 0xe0) {/* 3-byte code, starts with 1110 */ | 
898  | 0  |       if (((utf[1] & 0xc0) != 0x80) ||  | 
899  | 0  |           ((utf[2] & 0xc0) != 0x80))  | 
900  | 0  |         return 0;  | 
901  | 0  |       ix = 3;  | 
902  | 0  |   } else if ((c & 0xf8) == 0xf0) {/* 4-byte code, starts with 11110 */ | 
903  | 0  |       if (((utf[1] & 0xc0) != 0x80) ||  | 
904  | 0  |           ((utf[2] & 0xc0) != 0x80) ||  | 
905  | 0  |     ((utf[3] & 0xc0) != 0x80))  | 
906  | 0  |         return 0;  | 
907  | 0  |       ix = 4;  | 
908  | 0  |   } else       /* unknown encoding */  | 
909  | 0  |       return 0;  | 
910  | 0  |         utf += ix;  | 
911  | 0  |       }  | 
912  | 0  |       return(1);  | 
913  | 0  | }  | 
914  |  |  | 
915  |  | /**  | 
916  |  |  * storage size of an UTF8 string  | 
917  |  |  * the behaviour is not guaranteed if the input string is not UTF-8  | 
918  |  |  *  | 
919  |  |  * @param utf  a sequence of UTF-8 encoded bytes  | 
920  |  |  * @param len  the number of characters in the array  | 
921  |  |  * @returns the storage size of  | 
922  |  |  * the first 'len' characters of ARRAY  | 
923  |  |  */  | 
924  |  |  | 
925  |  | int  | 
926  | 0  | xmlUTF8Strsize(const xmlChar *utf, int len) { | 
927  | 0  |     const xmlChar *ptr=utf;  | 
928  | 0  |     int ch;  | 
929  | 0  |     size_t ret;  | 
930  |  | 
  | 
931  | 0  |     if (utf == NULL)  | 
932  | 0  |         return(0);  | 
933  |  |  | 
934  | 0  |     if (len <= 0)  | 
935  | 0  |         return(0);  | 
936  |  |  | 
937  | 0  |     while ( len-- > 0) { | 
938  | 0  |         if ( !*ptr )  | 
939  | 0  |             break;  | 
940  | 0  |         ch = *ptr++;  | 
941  | 0  |         if ((ch & 0x80))  | 
942  | 0  |             while ((ch<<=1) & 0x80 ) { | 
943  | 0  |     if (*ptr == 0) break;  | 
944  | 0  |                 ptr++;  | 
945  | 0  |       }  | 
946  | 0  |     }  | 
947  | 0  |     ret = ptr - utf;  | 
948  | 0  |     return (ret > INT_MAX ? 0 : ret);  | 
949  | 0  | }  | 
950  |  |  | 
951  |  |  | 
952  |  | /**  | 
953  |  |  * a strndup for array of UTF8's  | 
954  |  |  *  | 
955  |  |  * @param utf  the input UTF8 *  | 
956  |  |  * @param len  the len of `utf` (in chars)  | 
957  |  |  * @returns a new UTF8 * or NULL  | 
958  |  |  */  | 
959  |  | xmlChar *  | 
960  | 0  | xmlUTF8Strndup(const xmlChar *utf, int len) { | 
961  | 0  |     xmlChar *ret;  | 
962  | 0  |     int i;  | 
963  |  | 
  | 
964  | 0  |     if ((utf == NULL) || (len < 0)) return(NULL);  | 
965  | 0  |     i = xmlUTF8Strsize(utf, len);  | 
966  | 0  |     ret = xmlMalloc((size_t) i + 1);  | 
967  | 0  |     if (ret == NULL) { | 
968  | 0  |         return(NULL);  | 
969  | 0  |     }  | 
970  | 0  |     memcpy(ret, utf, i);  | 
971  | 0  |     ret[i] = 0;  | 
972  | 0  |     return(ret);  | 
973  | 0  | }  | 
974  |  |  | 
975  |  | /**  | 
976  |  |  * a function to provide the equivalent of fetching a  | 
977  |  |  * character from a string array  | 
978  |  |  *  | 
979  |  |  * @param utf  the input UTF8 *  | 
980  |  |  * @param pos  the position of the desired UTF8 char (in chars)  | 
981  |  |  * @returns a pointer to the UTF8 character or NULL  | 
982  |  |  */  | 
983  |  | const xmlChar *  | 
984  | 0  | xmlUTF8Strpos(const xmlChar *utf, int pos) { | 
985  | 0  |     int ch;  | 
986  |  | 
  | 
987  | 0  |     if (utf == NULL) return(NULL);  | 
988  | 0  |     if (pos < 0)  | 
989  | 0  |         return(NULL);  | 
990  | 0  |     while (pos--) { | 
991  | 0  |         ch = *utf++;  | 
992  | 0  |         if (ch == 0)  | 
993  | 0  |             return(NULL);  | 
994  | 0  |         if ( ch & 0x80 ) { | 
995  |  |             /* if not simple ascii, verify proper format */  | 
996  | 0  |             if ( (ch & 0xc0) != 0xc0 )  | 
997  | 0  |                 return(NULL);  | 
998  |  |             /* then skip over remaining bytes for this char */  | 
999  | 0  |             while ( (ch <<= 1) & 0x80 )  | 
1000  | 0  |                 if ( (*utf++ & 0xc0) != 0x80 )  | 
1001  | 0  |                     return(NULL);  | 
1002  | 0  |         }  | 
1003  | 0  |     }  | 
1004  | 0  |     return((xmlChar *)utf);  | 
1005  | 0  | }  | 
1006  |  |  | 
1007  |  | /**  | 
1008  |  |  * a function to provide the relative location of a UTF8 char  | 
1009  |  |  *  | 
1010  |  |  * @param utf  the input UTF8 *  | 
1011  |  |  * @param utfchar  the UTF8 character to be found  | 
1012  |  |  * @returns the relative character position of the desired char  | 
1013  |  |  * or -1 if not found  | 
1014  |  |  */  | 
1015  |  | int  | 
1016  | 0  | xmlUTF8Strloc(const xmlChar *utf, const xmlChar *utfchar) { | 
1017  | 0  |     size_t i;  | 
1018  | 0  |     int size;  | 
1019  | 0  |     int ch;  | 
1020  |  | 
  | 
1021  | 0  |     if (utf==NULL || utfchar==NULL) return -1;  | 
1022  | 0  |     size = xmlUTF8Strsize(utfchar, 1);  | 
1023  | 0  |         for(i=0; (ch=*utf) != 0; i++) { | 
1024  | 0  |             if (xmlStrncmp(utf, utfchar, size)==0)  | 
1025  | 0  |                 return(i > INT_MAX ? 0 : i);  | 
1026  | 0  |             utf++;  | 
1027  | 0  |             if ( ch & 0x80 ) { | 
1028  |  |                 /* if not simple ascii, verify proper format */  | 
1029  | 0  |                 if ( (ch & 0xc0) != 0xc0 )  | 
1030  | 0  |                     return(-1);  | 
1031  |  |                 /* then skip over remaining bytes for this char */  | 
1032  | 0  |                 while ( (ch <<= 1) & 0x80 )  | 
1033  | 0  |                     if ( (*utf++ & 0xc0) != 0x80 )  | 
1034  | 0  |                         return(-1);  | 
1035  | 0  |             }  | 
1036  | 0  |         }  | 
1037  |  |  | 
1038  | 0  |     return(-1);  | 
1039  | 0  | }  | 
1040  |  | /**  | 
1041  |  |  * Create a substring from a given UTF-8 string  | 
1042  |  |  * Note:  positions are given in units of UTF-8 chars  | 
1043  |  |  *  | 
1044  |  |  * @param utf  a sequence of UTF-8 encoded bytes  | 
1045  |  |  * @param start  relative pos of first char  | 
1046  |  |  * @param len  total number to copy  | 
1047  |  |  * @returns a pointer to a newly created string or NULL if the  | 
1048  |  |  * start index is out of bounds or a memory allocation failed.  | 
1049  |  |  * If len is too large, the result is truncated.  | 
1050  |  |  */  | 
1051  |  |  | 
1052  |  | xmlChar *  | 
1053  | 0  | xmlUTF8Strsub(const xmlChar *utf, int start, int len) { | 
1054  | 0  |     int i;  | 
1055  | 0  |     int ch;  | 
1056  |  | 
  | 
1057  | 0  |     if (utf == NULL) return(NULL);  | 
1058  | 0  |     if (start < 0) return(NULL);  | 
1059  | 0  |     if (len < 0) return(NULL);  | 
1060  |  |  | 
1061  |  |     /*  | 
1062  |  |      * Skip over any leading chars  | 
1063  |  |      */  | 
1064  | 0  |     for (i = 0; i < start; i++) { | 
1065  | 0  |         ch = *utf++;  | 
1066  | 0  |         if (ch == 0)  | 
1067  | 0  |             return(NULL);  | 
1068  |  |         /* skip over remaining bytes for this char */  | 
1069  | 0  |         if (ch & 0x80) { | 
1070  | 0  |             ch <<= 1;  | 
1071  | 0  |             while (ch & 0x80) { | 
1072  | 0  |                 if (*utf++ == 0)  | 
1073  | 0  |                     return(NULL);  | 
1074  | 0  |                 ch <<= 1;  | 
1075  | 0  |             }  | 
1076  | 0  |         }  | 
1077  | 0  |     }  | 
1078  |  |  | 
1079  | 0  |     return(xmlUTF8Strndup(utf, len));  | 
1080  | 0  | }  | 
1081  |  |  | 
1082  |  | /**  | 
1083  |  |  * Replaces a string with an escaped string.  | 
1084  |  |  *  | 
1085  |  |  * `msg` must be a heap-allocated buffer created by libxml2 that may be  | 
1086  |  |  * returned, or that may be freed and replaced.  | 
1087  |  |  *  | 
1088  |  |  * @param msg  a pointer to the string in which to escape '%' characters.  | 
1089  |  |  * @returns the same string with all '%' characters escaped.  | 
1090  |  |  */  | 
1091  |  | xmlChar *  | 
1092  |  | xmlEscapeFormatString(xmlChar **msg)  | 
1093  | 0  | { | 
1094  | 0  |     xmlChar *msgPtr = NULL;  | 
1095  | 0  |     xmlChar *result = NULL;  | 
1096  | 0  |     xmlChar *resultPtr = NULL;  | 
1097  | 0  |     size_t count = 0;  | 
1098  | 0  |     size_t msgLen = 0;  | 
1099  | 0  |     size_t resultLen = 0;  | 
1100  |  | 
  | 
1101  | 0  |     if (!msg || !*msg)  | 
1102  | 0  |         return(NULL);  | 
1103  |  |  | 
1104  | 0  |     for (msgPtr = *msg; *msgPtr != '\0'; ++msgPtr) { | 
1105  | 0  |         ++msgLen;  | 
1106  | 0  |         if (*msgPtr == '%')  | 
1107  | 0  |             ++count;  | 
1108  | 0  |     }  | 
1109  |  | 
  | 
1110  | 0  |     if (count == 0)  | 
1111  | 0  |         return(*msg);  | 
1112  |  |  | 
1113  | 0  |     if ((count > INT_MAX) || (msgLen > INT_MAX - count))  | 
1114  | 0  |         return(NULL);  | 
1115  | 0  |     resultLen = msgLen + count + 1;  | 
1116  | 0  |     result = xmlMalloc(resultLen);  | 
1117  | 0  |     if (result == NULL) { | 
1118  |  |         /* Clear *msg to prevent format string vulnerabilities in  | 
1119  |  |            out-of-memory situations. */  | 
1120  | 0  |         xmlFree(*msg);  | 
1121  | 0  |         *msg = NULL;  | 
1122  | 0  |         return(NULL);  | 
1123  | 0  |     }  | 
1124  |  |  | 
1125  | 0  |     for (msgPtr = *msg, resultPtr = result; *msgPtr != '\0'; ++msgPtr, ++resultPtr) { | 
1126  | 0  |         *resultPtr = *msgPtr;  | 
1127  | 0  |         if (*msgPtr == '%')  | 
1128  | 0  |             *(++resultPtr) = '%';  | 
1129  | 0  |     }  | 
1130  | 0  |     result[resultLen - 1] = '\0';  | 
1131  |  | 
  | 
1132  | 0  |     xmlFree(*msg);  | 
1133  | 0  |     *msg = result;  | 
1134  |  | 
  | 
1135  | 0  |     return *msg;  | 
1136  | 0  | }  | 
1137  |  |  |