Coverage Report

Created: 2023-06-07 06:06

/src/libxml2-2.10.3/xmlstring.c
Line
Count
Source (jump to first uncovered line)
1
/*
2
 * string.c : an XML string utilities module
3
 *
4
 * This module provides various utility functions for manipulating
5
 * the xmlChar* type. All functions named xmlStr* have been moved here
6
 * from the parser.c file (their original home).
7
 *
8
 * See Copyright for the status of this software.
9
 *
10
 * UTF8 string routines from:
11
 * William Brack <wbrack@mmm.com.hk>
12
 *
13
 * daniel@veillard.com
14
 */
15
16
#define IN_LIBXML
17
#include "libxml.h"
18
19
#include <stdlib.h>
20
#include <string.h>
21
#include <limits.h>
22
#include <libxml/xmlmemory.h>
23
#include <libxml/parserInternals.h>
24
#include <libxml/xmlstring.h>
25
26
/************************************************************************
27
 *                                                                      *
28
 *                Commodity functions to handle xmlChars                *
29
 *                                                                      *
30
 ************************************************************************/
31
32
/**
33
 * xmlStrndup:
34
 * @cur:  the input xmlChar *
35
 * @len:  the len of @cur
36
 *
37
 * a strndup for array of xmlChar's
38
 *
39
 * Returns a new xmlChar * or NULL
40
 */
41
xmlChar *
42
2.15M
xmlStrndup(const xmlChar *cur, int len) {
43
2.15M
    xmlChar *ret;
44
45
2.15M
    if ((cur == NULL) || (len < 0)) return(NULL);
46
2.15M
    ret = (xmlChar *) xmlMallocAtomic(((size_t) len + 1) * sizeof(xmlChar));
47
2.15M
    if (ret == NULL) {
48
0
        xmlErrMemory(NULL, NULL);
49
0
        return(NULL);
50
0
    }
51
2.15M
    memcpy(ret, cur, len * sizeof(xmlChar));
52
2.15M
    ret[len] = 0;
53
2.15M
    return(ret);
54
2.15M
}
55
56
/**
57
 * xmlStrdup:
58
 * @cur:  the input xmlChar *
59
 *
60
 * a strdup for array of xmlChar's. Since they are supposed to be
61
 * encoded in UTF-8 or an encoding with 8bit based chars, we assume
62
 * a termination mark of '0'.
63
 *
64
 * Returns a new xmlChar * or NULL
65
 */
66
xmlChar *
67
5.80M
xmlStrdup(const xmlChar *cur) {
68
5.80M
    const xmlChar *p = cur;
69
70
5.80M
    if (cur == NULL) return(NULL);
71
2.88G
    while (*p != 0) p++; /* non input consuming */
72
1.79M
    return(xmlStrndup(cur, p - cur));
73
5.80M
}
74
75
/**
76
 * xmlCharStrndup:
77
 * @cur:  the input char *
78
 * @len:  the len of @cur
79
 *
80
 * a strndup for char's to xmlChar's
81
 *
82
 * Returns a new xmlChar * or NULL
83
 */
84
85
xmlChar *
86
17.1k
xmlCharStrndup(const char *cur, int len) {
87
17.1k
    int i;
88
17.1k
    xmlChar *ret;
89
90
17.1k
    if ((cur == NULL) || (len < 0)) return(NULL);
91
17.1k
    ret = (xmlChar *) xmlMallocAtomic(((size_t) len + 1) * sizeof(xmlChar));
92
17.1k
    if (ret == NULL) {
93
0
        xmlErrMemory(NULL, NULL);
94
0
        return(NULL);
95
0
    }
96
74.9k
    for (i = 0;i < len;i++) {
97
57.7k
        ret[i] = (xmlChar) cur[i];
98
57.7k
        if (ret[i] == 0) return(ret);
99
57.7k
    }
100
17.1k
    ret[len] = 0;
101
17.1k
    return(ret);
102
17.1k
}
103
104
/**
105
 * xmlCharStrdup:
106
 * @cur:  the input char *
107
 *
108
 * a strdup for char's to xmlChar's
109
 *
110
 * Returns a new xmlChar * or NULL
111
 */
112
113
xmlChar *
114
17.1k
xmlCharStrdup(const char *cur) {
115
17.1k
    const char *p = cur;
116
117
17.1k
    if (cur == NULL) return(NULL);
118
74.9k
    while (*p != '\0') p++; /* non input consuming */
119
17.1k
    return(xmlCharStrndup(cur, p - cur));
120
17.1k
}
121
122
/**
123
 * xmlStrcmp:
124
 * @str1:  the first xmlChar *
125
 * @str2:  the second xmlChar *
126
 *
127
 * a strcmp for xmlChar's
128
 *
129
 * Returns the integer result of the comparison
130
 */
131
132
int
133
0
xmlStrcmp(const xmlChar *str1, const xmlChar *str2) {
134
0
    if (str1 == str2) return(0);
135
0
    if (str1 == NULL) return(-1);
136
0
    if (str2 == NULL) return(1);
137
0
#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
138
0
    return(strcmp((const char *)str1, (const char *)str2));
139
#else
140
    do {
141
        int tmp = *str1++ - *str2;
142
        if (tmp != 0) return(tmp);
143
    } while (*str2++ != 0);
144
    return 0;
145
#endif
146
0
}
147
148
/**
149
 * xmlStrEqual:
150
 * @str1:  the first xmlChar *
151
 * @str2:  the second xmlChar *
152
 *
153
 * Check if both strings are equal of have same content.
154
 * Should be a bit more readable and faster than xmlStrcmp()
155
 *
156
 * Returns 1 if they are equal, 0 if they are different
157
 */
158
159
int
160
1.72M
xmlStrEqual(const xmlChar *str1, const xmlChar *str2) {
161
1.72M
    if (str1 == str2) return(1);
162
1.65M
    if (str1 == NULL) return(0);
163
1.65M
    if (str2 == NULL) return(0);
164
1.65M
#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
165
1.65M
    return(strcmp((const char *)str1, (const char *)str2) == 0);
166
#else
167
    do {
168
        if (*str1++ != *str2) return(0);
169
    } while (*str2++);
170
    return(1);
171
#endif
172
1.65M
}
173
174
/**
175
 * xmlStrQEqual:
176
 * @pref:  the prefix of the QName
177
 * @name:  the localname of the QName
178
 * @str:  the second xmlChar *
179
 *
180
 * Check if a QName is Equal to a given string
181
 *
182
 * Returns 1 if they are equal, 0 if they are different
183
 */
184
185
int
186
46.8k
xmlStrQEqual(const xmlChar *pref, const xmlChar *name, const xmlChar *str) {
187
46.8k
    if (pref == NULL) return(xmlStrEqual(name, str));
188
8.63k
    if (name == NULL) return(0);
189
8.63k
    if (str == NULL) return(0);
190
191
15.2k
    do {
192
15.2k
        if (*pref++ != *str) return(0);
193
15.2k
    } while ((*str++) && (*pref));
194
6.41k
    if (*str++ != ':') return(0);
195
3.79k
    do {
196
3.79k
        if (*name++ != *str) return(0);
197
3.79k
    } while (*str++);
198
767
    return(1);
199
1.14k
}
200
201
/**
202
 * xmlStrncmp:
203
 * @str1:  the first xmlChar *
204
 * @str2:  the second xmlChar *
205
 * @len:  the max comparison length
206
 *
207
 * a strncmp for xmlChar's
208
 *
209
 * Returns the integer result of the comparison
210
 */
211
212
int
213
278k
xmlStrncmp(const xmlChar *str1, const xmlChar *str2, int len) {
214
278k
    if (len <= 0) return(0);
215
267k
    if (str1 == str2) return(0);
216
267k
    if (str1 == NULL) return(-1);
217
267k
    if (str2 == NULL) return(1);
218
267k
#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
219
267k
    return(strncmp((const char *)str1, (const char *)str2, len));
220
#else
221
    do {
222
        int tmp = *str1++ - *str2;
223
        if (tmp != 0 || --len == 0) return(tmp);
224
    } while (*str2++ != 0);
225
    return 0;
226
#endif
227
267k
}
228
229
static const xmlChar casemap[256] = {
230
    0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,
231
    0x08,0x09,0x0A,0x0B,0x0C,0x0D,0x0E,0x0F,
232
    0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,
233
    0x18,0x19,0x1A,0x1B,0x1C,0x1D,0x1E,0x1F,
234
    0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,
235
    0x28,0x29,0x2A,0x2B,0x2C,0x2D,0x2E,0x2F,
236
    0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,
237
    0x38,0x39,0x3A,0x3B,0x3C,0x3D,0x3E,0x3F,
238
    0x40,0x61,0x62,0x63,0x64,0x65,0x66,0x67,
239
    0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,
240
    0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,
241
    0x78,0x79,0x7A,0x7B,0x5C,0x5D,0x5E,0x5F,
242
    0x60,0x61,0x62,0x63,0x64,0x65,0x66,0x67,
243
    0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,
244
    0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,
245
    0x78,0x79,0x7A,0x7B,0x7C,0x7D,0x7E,0x7F,
246
    0x80,0x81,0x82,0x83,0x84,0x85,0x86,0x87,
247
    0x88,0x89,0x8A,0x8B,0x8C,0x8D,0x8E,0x8F,
248
    0x90,0x91,0x92,0x93,0x94,0x95,0x96,0x97,
249
    0x98,0x99,0x9A,0x9B,0x9C,0x9D,0x9E,0x9F,
250
    0xA0,0xA1,0xA2,0xA3,0xA4,0xA5,0xA6,0xA7,
251
    0xA8,0xA9,0xAA,0xAB,0xAC,0xAD,0xAE,0xAF,
252
    0xB0,0xB1,0xB2,0xB3,0xB4,0xB5,0xB6,0xB7,
253
    0xB8,0xB9,0xBA,0xBB,0xBC,0xBD,0xBE,0xBF,
254
    0xC0,0xC1,0xC2,0xC3,0xC4,0xC5,0xC6,0xC7,
255
    0xC8,0xC9,0xCA,0xCB,0xCC,0xCD,0xCE,0xCF,
256
    0xD0,0xD1,0xD2,0xD3,0xD4,0xD5,0xD6,0xD7,
257
    0xD8,0xD9,0xDA,0xDB,0xDC,0xDD,0xDE,0xDF,
258
    0xE0,0xE1,0xE2,0xE3,0xE4,0xE5,0xE6,0xE7,
259
    0xE8,0xE9,0xEA,0xEB,0xEC,0xED,0xEE,0xEF,
260
    0xF0,0xF1,0xF2,0xF3,0xF4,0xF5,0xF6,0xF7,
261
    0xF8,0xF9,0xFA,0xFB,0xFC,0xFD,0xFE,0xFF
262
};
263
264
/**
265
 * xmlStrcasecmp:
266
 * @str1:  the first xmlChar *
267
 * @str2:  the second xmlChar *
268
 *
269
 * a strcasecmp for xmlChar's
270
 *
271
 * Returns the integer result of the comparison
272
 */
273
274
int
275
13.5k
xmlStrcasecmp(const xmlChar *str1, const xmlChar *str2) {
276
13.5k
    register int tmp;
277
278
13.5k
    if (str1 == str2) return(0);
279
13.5k
    if (str1 == NULL) return(-1);
280
13.5k
    if (str2 == NULL) return(1);
281
14.9k
    do {
282
14.9k
        tmp = casemap[*str1++] - casemap[*str2];
283
14.9k
        if (tmp != 0) return(tmp);
284
14.9k
    } while (*str2++ != 0);
285
38
    return 0;
286
13.5k
}
287
288
/**
289
 * xmlStrncasecmp:
290
 * @str1:  the first xmlChar *
291
 * @str2:  the second xmlChar *
292
 * @len:  the max comparison length
293
 *
294
 * a strncasecmp for xmlChar's
295
 *
296
 * Returns the integer result of the comparison
297
 */
298
299
int
300
63.6k
xmlStrncasecmp(const xmlChar *str1, const xmlChar *str2, int len) {
301
63.6k
    register int tmp;
302
303
63.6k
    if (len <= 0) return(0);
304
63.6k
    if (str1 == str2) return(0);
305
63.6k
    if (str1 == NULL) return(-1);
306
63.6k
    if (str2 == NULL) return(1);
307
359k
    do {
308
359k
        tmp = casemap[*str1++] - casemap[*str2];
309
359k
        if (tmp != 0 || --len == 0) return(tmp);
310
359k
    } while (*str2++ != 0);
311
0
    return 0;
312
63.6k
}
313
314
/**
315
 * xmlStrchr:
316
 * @str:  the xmlChar * array
317
 * @val:  the xmlChar to search
318
 *
319
 * a strchr for xmlChar's
320
 *
321
 * Returns the xmlChar * for the first occurrence or NULL.
322
 */
323
324
const xmlChar *
325
36.2k
xmlStrchr(const xmlChar *str, xmlChar val) {
326
36.2k
    if (str == NULL) return(NULL);
327
9.23M
    while (*str != 0) { /* non input consuming */
328
9.20M
        if (*str == val) return((xmlChar *) str);
329
9.19M
        str++;
330
9.19M
    }
331
33.5k
    return(NULL);
332
36.2k
}
333
334
/**
335
 * xmlStrstr:
336
 * @str:  the xmlChar * array (haystack)
337
 * @val:  the xmlChar to search (needle)
338
 *
339
 * a strstr for xmlChar's
340
 *
341
 * Returns the xmlChar * for the first occurrence or NULL.
342
 */
343
344
const xmlChar *
345
0
xmlStrstr(const xmlChar *str, const xmlChar *val) {
346
0
    int n;
347
348
0
    if (str == NULL) return(NULL);
349
0
    if (val == NULL) return(NULL);
350
0
    n = xmlStrlen(val);
351
352
0
    if (n == 0) return(str);
353
0
    while (*str != 0) { /* non input consuming */
354
0
        if (*str == *val) {
355
0
            if (!xmlStrncmp(str, val, n)) return((const xmlChar *) str);
356
0
        }
357
0
        str++;
358
0
    }
359
0
    return(NULL);
360
0
}
361
362
/**
363
 * xmlStrcasestr:
364
 * @str:  the xmlChar * array (haystack)
365
 * @val:  the xmlChar to search (needle)
366
 *
367
 * a case-ignoring strstr for xmlChar's
368
 *
369
 * Returns the xmlChar * for the first occurrence or NULL.
370
 */
371
372
const xmlChar *
373
63.6k
xmlStrcasestr(const xmlChar *str, const xmlChar *val) {
374
63.6k
    int n;
375
376
63.6k
    if (str == NULL) return(NULL);
377
63.6k
    if (val == NULL) return(NULL);
378
63.6k
    n = xmlStrlen(val);
379
380
63.6k
    if (n == 0) return(str);
381
111k
    while (*str != 0) { /* non input consuming */
382
105k
        if (casemap[*str] == casemap[*val])
383
63.6k
            if (!xmlStrncasecmp(str, val, n)) return(str);
384
47.6k
        str++;
385
47.6k
    }
386
5.50k
    return(NULL);
387
63.6k
}
388
389
/**
390
 * xmlStrsub:
391
 * @str:  the xmlChar * array (haystack)
392
 * @start:  the index of the first char (zero based)
393
 * @len:  the length of the substring
394
 *
395
 * Extract a substring of a given string
396
 *
397
 * Returns the xmlChar * for the first occurrence or NULL.
398
 */
399
400
xmlChar *
401
0
xmlStrsub(const xmlChar *str, int start, int len) {
402
0
    int i;
403
404
0
    if (str == NULL) return(NULL);
405
0
    if (start < 0) return(NULL);
406
0
    if (len < 0) return(NULL);
407
408
0
    for (i = 0;i < start;i++) {
409
0
        if (*str == 0) return(NULL);
410
0
        str++;
411
0
    }
412
0
    if (*str == 0) return(NULL);
413
0
    return(xmlStrndup(str, len));
414
0
}
415
416
/**
417
 * xmlStrlen:
418
 * @str:  the xmlChar * array
419
 *
420
 * length of a xmlChar's string
421
 *
422
 * Returns the number of xmlChar contained in the ARRAY.
423
 */
424
425
int
426
106k
xmlStrlen(const xmlChar *str) {
427
106k
    size_t len = str ? strlen((const char *)str) : 0;
428
106k
    return(len > INT_MAX ? 0 : len);
429
106k
}
430
431
/**
432
 * xmlStrncat:
433
 * @cur:  the original xmlChar * array
434
 * @add:  the xmlChar * array added
435
 * @len:  the length of @add
436
 *
437
 * a strncat for array of xmlChar's, it will extend @cur with the len
438
 * first bytes of @add. Note that if @len < 0 then this is an API error
439
 * and NULL will be returned.
440
 *
441
 * Returns a new xmlChar *, the original @cur is reallocated and should
442
 * not be freed.
443
 */
444
445
xmlChar *
446
0
xmlStrncat(xmlChar *cur, const xmlChar *add, int len) {
447
0
    int size;
448
0
    xmlChar *ret;
449
450
0
    if ((add == NULL) || (len == 0))
451
0
        return(cur);
452
0
    if (len < 0)
453
0
  return(NULL);
454
0
    if (cur == NULL)
455
0
        return(xmlStrndup(add, len));
456
457
0
    size = xmlStrlen(cur);
458
0
    if ((size < 0) || (size > INT_MAX - len))
459
0
        return(NULL);
460
0
    ret = (xmlChar *) xmlRealloc(cur, ((size_t) size + len + 1) * sizeof(xmlChar));
461
0
    if (ret == NULL) {
462
0
        xmlErrMemory(NULL, NULL);
463
0
        return(cur);
464
0
    }
465
0
    memcpy(&ret[size], add, len * sizeof(xmlChar));
466
0
    ret[size + len] = 0;
467
0
    return(ret);
468
0
}
469
470
/**
471
 * xmlStrncatNew:
472
 * @str1:  first xmlChar string
473
 * @str2:  second xmlChar string
474
 * @len:  the len of @str2 or < 0
475
 *
476
 * same as xmlStrncat, but creates a new string.  The original
477
 * two strings are not freed. If @len is < 0 then the length
478
 * will be calculated automatically.
479
 *
480
 * Returns a new xmlChar * or NULL
481
 */
482
xmlChar *
483
0
xmlStrncatNew(const xmlChar *str1, const xmlChar *str2, int len) {
484
0
    int size;
485
0
    xmlChar *ret;
486
487
0
    if (len < 0) {
488
0
        len = xmlStrlen(str2);
489
0
        if (len < 0)
490
0
            return(NULL);
491
0
    }
492
0
    if ((str2 == NULL) || (len == 0))
493
0
        return(xmlStrdup(str1));
494
0
    if (str1 == NULL)
495
0
        return(xmlStrndup(str2, len));
496
497
0
    size = xmlStrlen(str1);
498
0
    if ((size < 0) || (size > INT_MAX - len))
499
0
        return(NULL);
500
0
    ret = (xmlChar *) xmlMalloc(((size_t) size + len + 1) * sizeof(xmlChar));
501
0
    if (ret == NULL) {
502
0
        xmlErrMemory(NULL, NULL);
503
0
        return(xmlStrndup(str1, size));
504
0
    }
505
0
    memcpy(ret, str1, size * sizeof(xmlChar));
506
0
    memcpy(&ret[size], str2, len * sizeof(xmlChar));
507
0
    ret[size + len] = 0;
508
0
    return(ret);
509
0
}
510
511
/**
512
 * xmlStrcat:
513
 * @cur:  the original xmlChar * array
514
 * @add:  the xmlChar * array added
515
 *
516
 * a strcat for array of xmlChar's. Since they are supposed to be
517
 * encoded in UTF-8 or an encoding with 8bit based chars, we assume
518
 * a termination mark of '0'.
519
 *
520
 * Returns a new xmlChar * containing the concatenated string. The original
521
 * @cur is reallocated and should not be freed.
522
 */
523
xmlChar *
524
0
xmlStrcat(xmlChar *cur, const xmlChar *add) {
525
0
    const xmlChar *p = add;
526
527
0
    if (add == NULL) return(cur);
528
0
    if (cur == NULL)
529
0
        return(xmlStrdup(add));
530
531
0
    while (*p != 0) p++; /* non input consuming */
532
0
    return(xmlStrncat(cur, add, p - add));
533
0
}
534
535
/**
536
 * xmlStrPrintf:
537
 * @buf:   the result buffer.
538
 * @len:   the result buffer length.
539
 * @msg:   the message with printf formatting.
540
 * @...:   extra parameters for the message.
541
 *
542
 * Formats @msg and places result into @buf.
543
 *
544
 * Returns the number of characters written to @buf or -1 if an error occurs.
545
 */
546
int XMLCDECL
547
0
xmlStrPrintf(xmlChar *buf, int len, const char *msg, ...) {
548
0
    va_list args;
549
0
    int ret;
550
551
0
    if((buf == NULL) || (msg == NULL)) {
552
0
        return(-1);
553
0
    }
554
555
0
    va_start(args, msg);
556
0
    ret = vsnprintf((char *) buf, len, (const char *) msg, args);
557
0
    va_end(args);
558
0
    buf[len - 1] = 0; /* be safe ! */
559
560
0
    return(ret);
561
0
}
562
563
/**
564
 * xmlStrVPrintf:
565
 * @buf:   the result buffer.
566
 * @len:   the result buffer length.
567
 * @msg:   the message with printf formatting.
568
 * @ap:    extra parameters for the message.
569
 *
570
 * Formats @msg and places result into @buf.
571
 *
572
 * Returns the number of characters written to @buf or -1 if an error occurs.
573
 */
574
int
575
0
xmlStrVPrintf(xmlChar *buf, int len, const char *msg, va_list ap) {
576
0
    int ret;
577
578
0
    if((buf == NULL) || (msg == NULL)) {
579
0
        return(-1);
580
0
    }
581
582
0
    ret = vsnprintf((char *) buf, len, (const char *) msg, ap);
583
0
    buf[len - 1] = 0; /* be safe ! */
584
585
0
    return(ret);
586
0
}
587
588
/************************************************************************
589
 *                                                                      *
590
 *              Generic UTF8 handling routines                          *
591
 *                                                                      *
592
 * From rfc2044: encoding of the Unicode values on UTF-8:               *
593
 *                                                                      *
594
 * UCS-4 range (hex.)           UTF-8 octet sequence (binary)           *
595
 * 0000 0000-0000 007F   0xxxxxxx                                       *
596
 * 0000 0080-0000 07FF   110xxxxx 10xxxxxx                              *
597
 * 0000 0800-0000 FFFF   1110xxxx 10xxxxxx 10xxxxxx                     *
598
 *                                                                      *
599
 * I hope we won't use values > 0xFFFF anytime soon !                   *
600
 *                                                                      *
601
 ************************************************************************/
602
603
604
/**
605
 * xmlUTF8Size:
606
 * @utf: pointer to the UTF8 character
607
 *
608
 * calculates the internal size of a UTF8 character
609
 *
610
 * returns the numbers of bytes in the character, -1 on format error
611
 */
612
int
613
0
xmlUTF8Size(const xmlChar *utf) {
614
0
    xmlChar mask;
615
0
    int len;
616
617
0
    if (utf == NULL)
618
0
        return -1;
619
0
    if (*utf < 0x80)
620
0
        return 1;
621
    /* check valid UTF8 character */
622
0
    if (!(*utf & 0x40))
623
0
        return -1;
624
    /* determine number of bytes in char */
625
0
    len = 2;
626
0
    for (mask=0x20; mask != 0; mask>>=1) {
627
0
        if (!(*utf & mask))
628
0
            return len;
629
0
        len++;
630
0
    }
631
0
    return -1;
632
0
}
633
634
/**
635
 * xmlUTF8Charcmp:
636
 * @utf1: pointer to first UTF8 char
637
 * @utf2: pointer to second UTF8 char
638
 *
639
 * compares the two UCS4 values
640
 *
641
 * returns result of the compare as with xmlStrncmp
642
 */
643
int
644
0
xmlUTF8Charcmp(const xmlChar *utf1, const xmlChar *utf2) {
645
646
0
    if (utf1 == NULL ) {
647
0
        if (utf2 == NULL)
648
0
            return 0;
649
0
        return -1;
650
0
    }
651
0
    return xmlStrncmp(utf1, utf2, xmlUTF8Size(utf1));
652
0
}
653
654
/**
655
 * xmlUTF8Strlen:
656
 * @utf:  a sequence of UTF-8 encoded bytes
657
 *
658
 * compute the length of an UTF8 string, it doesn't do a full UTF8
659
 * checking of the content of the string.
660
 *
661
 * Returns the number of characters in the string or -1 in case of error
662
 */
663
int
664
0
xmlUTF8Strlen(const xmlChar *utf) {
665
0
    size_t ret = 0;
666
667
0
    if (utf == NULL)
668
0
        return(-1);
669
670
0
    while (*utf != 0) {
671
0
        if (utf[0] & 0x80) {
672
0
            if ((utf[1] & 0xc0) != 0x80)
673
0
                return(-1);
674
0
            if ((utf[0] & 0xe0) == 0xe0) {
675
0
                if ((utf[2] & 0xc0) != 0x80)
676
0
                    return(-1);
677
0
                if ((utf[0] & 0xf0) == 0xf0) {
678
0
                    if ((utf[0] & 0xf8) != 0xf0 || (utf[3] & 0xc0) != 0x80)
679
0
                        return(-1);
680
0
                    utf += 4;
681
0
                } else {
682
0
                    utf += 3;
683
0
                }
684
0
            } else {
685
0
                utf += 2;
686
0
            }
687
0
        } else {
688
0
            utf++;
689
0
        }
690
0
        ret++;
691
0
    }
692
0
    return(ret > INT_MAX ? 0 : ret);
693
0
}
694
695
/**
696
 * xmlGetUTF8Char:
697
 * @utf:  a sequence of UTF-8 encoded bytes
698
 * @len:  a pointer to the minimum number of bytes present in
699
 *        the sequence.  This is used to assure the next character
700
 *        is completely contained within the sequence.
701
 *
702
 * Read the first UTF8 character from @utf
703
 *
704
 * Returns the char value or -1 in case of error, and sets *len to
705
 *        the actual number of bytes consumed (0 in case of error)
706
 */
707
int
708
0
xmlGetUTF8Char(const unsigned char *utf, int *len) {
709
0
    unsigned int c;
710
711
0
    if (utf == NULL)
712
0
        goto error;
713
0
    if (len == NULL)
714
0
        goto error;
715
0
    if (*len < 1)
716
0
        goto error;
717
718
0
    c = utf[0];
719
0
    if (c & 0x80) {
720
0
        if (*len < 2)
721
0
            goto error;
722
0
        if ((utf[1] & 0xc0) != 0x80)
723
0
            goto error;
724
0
        if ((c & 0xe0) == 0xe0) {
725
0
            if (*len < 3)
726
0
                goto error;
727
0
            if ((utf[2] & 0xc0) != 0x80)
728
0
                goto error;
729
0
            if ((c & 0xf0) == 0xf0) {
730
0
                if (*len < 4)
731
0
                    goto error;
732
0
                if ((c & 0xf8) != 0xf0 || (utf[3] & 0xc0) != 0x80)
733
0
                    goto error;
734
0
                *len = 4;
735
                /* 4-byte code */
736
0
                c = (utf[0] & 0x7) << 18;
737
0
                c |= (utf[1] & 0x3f) << 12;
738
0
                c |= (utf[2] & 0x3f) << 6;
739
0
                c |= utf[3] & 0x3f;
740
0
            } else {
741
              /* 3-byte code */
742
0
                *len = 3;
743
0
                c = (utf[0] & 0xf) << 12;
744
0
                c |= (utf[1] & 0x3f) << 6;
745
0
                c |= utf[2] & 0x3f;
746
0
            }
747
0
        } else {
748
          /* 2-byte code */
749
0
            *len = 2;
750
0
            c = (utf[0] & 0x1f) << 6;
751
0
            c |= utf[1] & 0x3f;
752
0
        }
753
0
    } else {
754
        /* 1-byte code */
755
0
        *len = 1;
756
0
    }
757
0
    return(c);
758
759
0
error:
760
0
    if (len != NULL)
761
0
  *len = 0;
762
0
    return(-1);
763
0
}
764
765
/**
766
 * xmlCheckUTF8:
767
 * @utf: Pointer to putative UTF-8 encoded string.
768
 *
769
 * Checks @utf for being valid UTF-8. @utf is assumed to be
770
 * null-terminated. This function is not super-strict, as it will
771
 * allow longer UTF-8 sequences than necessary. Note that Java is
772
 * capable of producing these sequences if provoked. Also note, this
773
 * routine checks for the 4-byte maximum size, but does not check for
774
 * 0x10ffff maximum value.
775
 *
776
 * Return value: true if @utf is valid.
777
 **/
778
int
779
xmlCheckUTF8(const unsigned char *utf)
780
0
{
781
0
    int ix;
782
0
    unsigned char c;
783
784
0
    if (utf == NULL)
785
0
        return(0);
786
    /*
787
     * utf is a string of 1, 2, 3 or 4 bytes.  The valid strings
788
     * are as follows (in "bit format"):
789
     *    0xxxxxxx                                      valid 1-byte
790
     *    110xxxxx 10xxxxxx                             valid 2-byte
791
     *    1110xxxx 10xxxxxx 10xxxxxx                    valid 3-byte
792
     *    11110xxx 10xxxxxx 10xxxxxx 10xxxxxx           valid 4-byte
793
     */
794
0
    while ((c = utf[0])) {      /* string is 0-terminated */
795
0
        ix = 0;
796
0
        if ((c & 0x80) == 0x00) { /* 1-byte code, starts with 10 */
797
0
            ix = 1;
798
0
  } else if ((c & 0xe0) == 0xc0) {/* 2-byte code, starts with 110 */
799
0
      if ((utf[1] & 0xc0 ) != 0x80)
800
0
          return 0;
801
0
      ix = 2;
802
0
  } else if ((c & 0xf0) == 0xe0) {/* 3-byte code, starts with 1110 */
803
0
      if (((utf[1] & 0xc0) != 0x80) ||
804
0
          ((utf[2] & 0xc0) != 0x80))
805
0
        return 0;
806
0
      ix = 3;
807
0
  } else if ((c & 0xf8) == 0xf0) {/* 4-byte code, starts with 11110 */
808
0
      if (((utf[1] & 0xc0) != 0x80) ||
809
0
          ((utf[2] & 0xc0) != 0x80) ||
810
0
    ((utf[3] & 0xc0) != 0x80))
811
0
        return 0;
812
0
      ix = 4;
813
0
  } else       /* unknown encoding */
814
0
      return 0;
815
0
        utf += ix;
816
0
      }
817
0
      return(1);
818
0
}
819
820
/**
821
 * xmlUTF8Strsize:
822
 * @utf:  a sequence of UTF-8 encoded bytes
823
 * @len:  the number of characters in the array
824
 *
825
 * storage size of an UTF8 string
826
 * the behaviour is not guaranteed if the input string is not UTF-8
827
 *
828
 * Returns the storage size of
829
 * the first 'len' characters of ARRAY
830
 */
831
832
int
833
0
xmlUTF8Strsize(const xmlChar *utf, int len) {
834
0
    const xmlChar *ptr=utf;
835
0
    int ch;
836
0
    size_t ret;
837
838
0
    if (utf == NULL)
839
0
        return(0);
840
841
0
    if (len <= 0)
842
0
        return(0);
843
844
0
    while ( len-- > 0) {
845
0
        if ( !*ptr )
846
0
            break;
847
0
        if ( (ch = *ptr++) & 0x80)
848
0
            while ((ch<<=1) & 0x80 ) {
849
0
    if (*ptr == 0) break;
850
0
                ptr++;
851
0
      }
852
0
    }
853
0
    ret = ptr - utf;
854
0
    return (ret > INT_MAX ? 0 : ret);
855
0
}
856
857
858
/**
859
 * xmlUTF8Strndup:
860
 * @utf:  the input UTF8 *
861
 * @len:  the len of @utf (in chars)
862
 *
863
 * a strndup for array of UTF8's
864
 *
865
 * Returns a new UTF8 * or NULL
866
 */
867
xmlChar *
868
0
xmlUTF8Strndup(const xmlChar *utf, int len) {
869
0
    xmlChar *ret;
870
0
    int i;
871
872
0
    if ((utf == NULL) || (len < 0)) return(NULL);
873
0
    i = xmlUTF8Strsize(utf, len);
874
0
    ret = (xmlChar *) xmlMallocAtomic(((size_t) i + 1) * sizeof(xmlChar));
875
0
    if (ret == NULL) {
876
0
        return(NULL);
877
0
    }
878
0
    memcpy(ret, utf, i * sizeof(xmlChar));
879
0
    ret[i] = 0;
880
0
    return(ret);
881
0
}
882
883
/**
884
 * xmlUTF8Strpos:
885
 * @utf:  the input UTF8 *
886
 * @pos:  the position of the desired UTF8 char (in chars)
887
 *
888
 * a function to provide the equivalent of fetching a
889
 * character from a string array
890
 *
891
 * Returns a pointer to the UTF8 character or NULL
892
 */
893
const xmlChar *
894
0
xmlUTF8Strpos(const xmlChar *utf, int pos) {
895
0
    int ch;
896
897
0
    if (utf == NULL) return(NULL);
898
0
    if (pos < 0)
899
0
        return(NULL);
900
0
    while (pos--) {
901
0
        if ((ch=*utf++) == 0) return(NULL);
902
0
        if ( ch & 0x80 ) {
903
            /* if not simple ascii, verify proper format */
904
0
            if ( (ch & 0xc0) != 0xc0 )
905
0
                return(NULL);
906
            /* then skip over remaining bytes for this char */
907
0
            while ( (ch <<= 1) & 0x80 )
908
0
                if ( (*utf++ & 0xc0) != 0x80 )
909
0
                    return(NULL);
910
0
        }
911
0
    }
912
0
    return((xmlChar *)utf);
913
0
}
914
915
/**
916
 * xmlUTF8Strloc:
917
 * @utf:  the input UTF8 *
918
 * @utfchar:  the UTF8 character to be found
919
 *
920
 * a function to provide the relative location of a UTF8 char
921
 *
922
 * Returns the relative character position of the desired char
923
 * or -1 if not found
924
 */
925
int
926
0
xmlUTF8Strloc(const xmlChar *utf, const xmlChar *utfchar) {
927
0
    size_t i;
928
0
    int size;
929
0
    int ch;
930
931
0
    if (utf==NULL || utfchar==NULL) return -1;
932
0
    size = xmlUTF8Strsize(utfchar, 1);
933
0
        for(i=0; (ch=*utf) != 0; i++) {
934
0
            if (xmlStrncmp(utf, utfchar, size)==0)
935
0
                return(i > INT_MAX ? 0 : i);
936
0
            utf++;
937
0
            if ( ch & 0x80 ) {
938
                /* if not simple ascii, verify proper format */
939
0
                if ( (ch & 0xc0) != 0xc0 )
940
0
                    return(-1);
941
                /* then skip over remaining bytes for this char */
942
0
                while ( (ch <<= 1) & 0x80 )
943
0
                    if ( (*utf++ & 0xc0) != 0x80 )
944
0
                        return(-1);
945
0
            }
946
0
        }
947
948
0
    return(-1);
949
0
}
950
/**
951
 * xmlUTF8Strsub:
952
 * @utf:  a sequence of UTF-8 encoded bytes
953
 * @start: relative pos of first char
954
 * @len:   total number to copy
955
 *
956
 * Create a substring from a given UTF-8 string
957
 * Note:  positions are given in units of UTF-8 chars
958
 *
959
 * Returns a pointer to a newly created string
960
 * or NULL if any problem
961
 */
962
963
xmlChar *
964
0
xmlUTF8Strsub(const xmlChar *utf, int start, int len) {
965
0
    int i;
966
0
    int ch;
967
968
0
    if (utf == NULL) return(NULL);
969
0
    if (start < 0) return(NULL);
970
0
    if (len < 0) return(NULL);
971
972
    /*
973
     * Skip over any leading chars
974
     */
975
0
    for (i = 0;i < start;i++) {
976
0
        if ((ch=*utf++) == 0) return(NULL);
977
0
        if ( ch & 0x80 ) {
978
            /* if not simple ascii, verify proper format */
979
0
            if ( (ch & 0xc0) != 0xc0 )
980
0
                return(NULL);
981
            /* then skip over remaining bytes for this char */
982
0
            while ( (ch <<= 1) & 0x80 )
983
0
                if ( (*utf++ & 0xc0) != 0x80 )
984
0
                    return(NULL);
985
0
        }
986
0
    }
987
988
0
    return(xmlUTF8Strndup(utf, len));
989
0
}
990
991
/**
992
 * xmlEscapeFormatString:
993
 * @msg:  a pointer to the string in which to escape '%' characters.
994
 * Must be a heap-allocated buffer created by libxml2 that may be
995
 * returned, or that may be freed and replaced.
996
 *
997
 * Replaces the string pointed to by 'msg' with an escaped string.
998
 * Returns the same string with all '%' characters escaped.
999
 */
1000
xmlChar *
1001
xmlEscapeFormatString(xmlChar **msg)
1002
0
{
1003
0
    xmlChar *msgPtr = NULL;
1004
0
    xmlChar *result = NULL;
1005
0
    xmlChar *resultPtr = NULL;
1006
0
    size_t count = 0;
1007
0
    size_t msgLen = 0;
1008
0
    size_t resultLen = 0;
1009
1010
0
    if (!msg || !*msg)
1011
0
        return(NULL);
1012
1013
0
    for (msgPtr = *msg; *msgPtr != '\0'; ++msgPtr) {
1014
0
        ++msgLen;
1015
0
        if (*msgPtr == '%')
1016
0
            ++count;
1017
0
    }
1018
1019
0
    if (count == 0)
1020
0
        return(*msg);
1021
1022
0
    if ((count > INT_MAX) || (msgLen > INT_MAX - count))
1023
0
        return(NULL);
1024
0
    resultLen = msgLen + count + 1;
1025
0
    result = (xmlChar *) xmlMallocAtomic(resultLen * sizeof(xmlChar));
1026
0
    if (result == NULL) {
1027
        /* Clear *msg to prevent format string vulnerabilities in
1028
           out-of-memory situations. */
1029
0
        xmlFree(*msg);
1030
0
        *msg = NULL;
1031
0
        xmlErrMemory(NULL, NULL);
1032
0
        return(NULL);
1033
0
    }
1034
1035
0
    for (msgPtr = *msg, resultPtr = result; *msgPtr != '\0'; ++msgPtr, ++resultPtr) {
1036
0
        *resultPtr = *msgPtr;
1037
0
        if (*msgPtr == '%')
1038
0
            *(++resultPtr) = '%';
1039
0
    }
1040
0
    result[resultLen - 1] = '\0';
1041
1042
0
    xmlFree(*msg);
1043
0
    *msg = result;
1044
1045
0
    return *msg;
1046
0
}
1047