Coverage Report

Created: 2026-03-12 06:42

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/libxml2-2.9.7/xmlstring.c
Line
Count
Source
1
/*
2
 * string.c : an XML string utilities module
3
 *
4
 * This module provides various utility functions for manipulating
5
 * the xmlChar* type. All functions named xmlStr* have been moved here
6
 * from the parser.c file (their original home).
7
 *
8
 * See Copyright for the status of this software.
9
 *
10
 * UTF8 string routines from:
11
 * William Brack <wbrack@mmm.com.hk>
12
 *
13
 * daniel@veillard.com
14
 */
15
16
#define IN_LIBXML
17
#include "libxml.h"
18
19
#include <stdlib.h>
20
#include <string.h>
21
#include <libxml/xmlmemory.h>
22
#include <libxml/parserInternals.h>
23
#include <libxml/xmlstring.h>
24
25
/************************************************************************
26
 *                                                                      *
27
 *                Commodity functions to handle xmlChars                *
28
 *                                                                      *
29
 ************************************************************************/
30
31
/**
32
 * xmlStrndup:
33
 * @cur:  the input xmlChar *
34
 * @len:  the len of @cur
35
 *
36
 * a strndup for array of xmlChar's
37
 *
38
 * Returns a new xmlChar * or NULL
39
 */
40
xmlChar *
41
122M
xmlStrndup(const xmlChar *cur, int len) {
42
122M
    xmlChar *ret;
43
44
122M
    if ((cur == NULL) || (len < 0)) return(NULL);
45
122M
    ret = (xmlChar *) xmlMallocAtomic((len + 1) * sizeof(xmlChar));
46
122M
    if (ret == NULL) {
47
0
        xmlErrMemory(NULL, NULL);
48
0
        return(NULL);
49
0
    }
50
122M
    memcpy(ret, cur, len * sizeof(xmlChar));
51
122M
    ret[len] = 0;
52
122M
    return(ret);
53
122M
}
54
55
/**
56
 * xmlStrdup:
57
 * @cur:  the input xmlChar *
58
 *
59
 * a strdup for array of xmlChar's. Since they are supposed to be
60
 * encoded in UTF-8 or an encoding with 8bit based chars, we assume
61
 * a termination mark of '0'.
62
 *
63
 * Returns a new xmlChar * or NULL
64
 */
65
xmlChar *
66
255M
xmlStrdup(const xmlChar *cur) {
67
255M
    const xmlChar *p = cur;
68
69
255M
    if (cur == NULL) return(NULL);
70
85.0G
    while (*p != 0) p++; /* non input consuming */
71
115M
    return(xmlStrndup(cur, p - cur));
72
255M
}
73
74
/**
75
 * xmlCharStrndup:
76
 * @cur:  the input char *
77
 * @len:  the len of @cur
78
 *
79
 * a strndup for char's to xmlChar's
80
 *
81
 * Returns a new xmlChar * or NULL
82
 */
83
84
xmlChar *
85
152k
xmlCharStrndup(const char *cur, int len) {
86
152k
    int i;
87
152k
    xmlChar *ret;
88
89
152k
    if ((cur == NULL) || (len < 0)) return(NULL);
90
152k
    ret = (xmlChar *) xmlMallocAtomic((len + 1) * sizeof(xmlChar));
91
152k
    if (ret == NULL) {
92
0
        xmlErrMemory(NULL, NULL);
93
0
        return(NULL);
94
0
    }
95
623k
    for (i = 0;i < len;i++) {
96
470k
        ret[i] = (xmlChar) cur[i];
97
470k
        if (ret[i] == 0) return(ret);
98
470k
    }
99
152k
    ret[len] = 0;
100
152k
    return(ret);
101
152k
}
102
103
/**
104
 * xmlCharStrdup:
105
 * @cur:  the input char *
106
 *
107
 * a strdup for char's to xmlChar's
108
 *
109
 * Returns a new xmlChar * or NULL
110
 */
111
112
xmlChar *
113
152k
xmlCharStrdup(const char *cur) {
114
152k
    const char *p = cur;
115
116
152k
    if (cur == NULL) return(NULL);
117
623k
    while (*p != '\0') p++; /* non input consuming */
118
152k
    return(xmlCharStrndup(cur, p - cur));
119
152k
}
120
121
/**
122
 * xmlStrcmp:
123
 * @str1:  the first xmlChar *
124
 * @str2:  the second xmlChar *
125
 *
126
 * a strcmp for xmlChar's
127
 *
128
 * Returns the integer result of the comparison
129
 */
130
131
int
132
0
xmlStrcmp(const xmlChar *str1, const xmlChar *str2) {
133
0
    register int tmp;
134
135
0
    if (str1 == str2) return(0);
136
0
    if (str1 == NULL) return(-1);
137
0
    if (str2 == NULL) return(1);
138
0
    do {
139
0
        tmp = *str1++ - *str2;
140
0
        if (tmp != 0) return(tmp);
141
0
    } while (*str2++ != 0);
142
0
    return 0;
143
0
}
144
145
/**
146
 * xmlStrEqual:
147
 * @str1:  the first xmlChar *
148
 * @str2:  the second xmlChar *
149
 *
150
 * Check if both strings are equal of have same content.
151
 * Should be a bit more readable and faster than xmlStrcmp()
152
 *
153
 * Returns 1 if they are equal, 0 if they are different
154
 */
155
156
int
157
36.8M
xmlStrEqual(const xmlChar *str1, const xmlChar *str2) {
158
36.8M
    if (str1 == str2) return(1);
159
34.6M
    if (str1 == NULL) return(0);
160
31.7M
    if (str2 == NULL) return(0);
161
65.3M
    do {
162
65.3M
        if (*str1++ != *str2) return(0);
163
65.3M
    } while (*str2++);
164
4.96M
    return(1);
165
31.5M
}
166
167
/**
168
 * xmlStrQEqual:
169
 * @pref:  the prefix of the QName
170
 * @name:  the localname of the QName
171
 * @str:  the second xmlChar *
172
 *
173
 * Check if a QName is Equal to a given string
174
 *
175
 * Returns 1 if they are equal, 0 if they are different
176
 */
177
178
int
179
1.10M
xmlStrQEqual(const xmlChar *pref, const xmlChar *name, const xmlChar *str) {
180
1.10M
    if (pref == NULL) return(xmlStrEqual(name, str));
181
776k
    if (name == NULL) return(0);
182
776k
    if (str == NULL) return(0);
183
184
4.18M
    do {
185
4.18M
        if (*pref++ != *str) return(0);
186
4.18M
    } while ((*str++) && (*pref));
187
767k
    if (*str++ != ':') return(0);
188
11.7M
    do {
189
11.7M
        if (*name++ != *str) return(0);
190
11.7M
    } while (*str++);
191
737k
    return(1);
192
764k
}
193
194
/**
195
 * xmlStrncmp:
196
 * @str1:  the first xmlChar *
197
 * @str2:  the second xmlChar *
198
 * @len:  the max comparison length
199
 *
200
 * a strncmp for xmlChar's
201
 *
202
 * Returns the integer result of the comparison
203
 */
204
205
int
206
200k
xmlStrncmp(const xmlChar *str1, const xmlChar *str2, int len) {
207
200k
    register int tmp;
208
209
200k
    if (len <= 0) return(0);
210
200k
    if (str1 == str2) return(0);
211
200k
    if (str1 == NULL) return(-1);
212
200k
    if (str2 == NULL) return(1);
213
200k
#ifdef __GNUC__
214
200k
    tmp = strncmp((const char *)str1, (const char *)str2, len);
215
200k
    return tmp;
216
#else
217
    do {
218
        tmp = *str1++ - *str2;
219
        if (tmp != 0 || --len == 0) return(tmp);
220
    } while (*str2++ != 0);
221
    return 0;
222
#endif
223
200k
}
224
225
static const xmlChar casemap[256] = {
226
    0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,
227
    0x08,0x09,0x0A,0x0B,0x0C,0x0D,0x0E,0x0F,
228
    0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,
229
    0x18,0x19,0x1A,0x1B,0x1C,0x1D,0x1E,0x1F,
230
    0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,
231
    0x28,0x29,0x2A,0x2B,0x2C,0x2D,0x2E,0x2F,
232
    0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,
233
    0x38,0x39,0x3A,0x3B,0x3C,0x3D,0x3E,0x3F,
234
    0x40,0x61,0x62,0x63,0x64,0x65,0x66,0x67,
235
    0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,
236
    0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,
237
    0x78,0x79,0x7A,0x7B,0x5C,0x5D,0x5E,0x5F,
238
    0x60,0x61,0x62,0x63,0x64,0x65,0x66,0x67,
239
    0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,
240
    0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,
241
    0x78,0x79,0x7A,0x7B,0x7C,0x7D,0x7E,0x7F,
242
    0x80,0x81,0x82,0x83,0x84,0x85,0x86,0x87,
243
    0x88,0x89,0x8A,0x8B,0x8C,0x8D,0x8E,0x8F,
244
    0x90,0x91,0x92,0x93,0x94,0x95,0x96,0x97,
245
    0x98,0x99,0x9A,0x9B,0x9C,0x9D,0x9E,0x9F,
246
    0xA0,0xA1,0xA2,0xA3,0xA4,0xA5,0xA6,0xA7,
247
    0xA8,0xA9,0xAA,0xAB,0xAC,0xAD,0xAE,0xAF,
248
    0xB0,0xB1,0xB2,0xB3,0xB4,0xB5,0xB6,0xB7,
249
    0xB8,0xB9,0xBA,0xBB,0xBC,0xBD,0xBE,0xBF,
250
    0xC0,0xC1,0xC2,0xC3,0xC4,0xC5,0xC6,0xC7,
251
    0xC8,0xC9,0xCA,0xCB,0xCC,0xCD,0xCE,0xCF,
252
    0xD0,0xD1,0xD2,0xD3,0xD4,0xD5,0xD6,0xD7,
253
    0xD8,0xD9,0xDA,0xDB,0xDC,0xDD,0xDE,0xDF,
254
    0xE0,0xE1,0xE2,0xE3,0xE4,0xE5,0xE6,0xE7,
255
    0xE8,0xE9,0xEA,0xEB,0xEC,0xED,0xEE,0xEF,
256
    0xF0,0xF1,0xF2,0xF3,0xF4,0xF5,0xF6,0xF7,
257
    0xF8,0xF9,0xFA,0xFB,0xFC,0xFD,0xFE,0xFF
258
};
259
260
/**
261
 * xmlStrcasecmp:
262
 * @str1:  the first xmlChar *
263
 * @str2:  the second xmlChar *
264
 *
265
 * a strcasecmp for xmlChar's
266
 *
267
 * Returns the integer result of the comparison
268
 */
269
270
int
271
186k
xmlStrcasecmp(const xmlChar *str1, const xmlChar *str2) {
272
186k
    register int tmp;
273
274
186k
    if (str1 == str2) return(0);
275
186k
    if (str1 == NULL) return(-1);
276
186k
    if (str2 == NULL) return(1);
277
482k
    do {
278
482k
        tmp = casemap[*str1++] - casemap[*str2];
279
482k
        if (tmp != 0) return(tmp);
280
482k
    } while (*str2++ != 0);
281
23.6k
    return 0;
282
186k
}
283
284
/**
285
 * xmlStrncasecmp:
286
 * @str1:  the first xmlChar *
287
 * @str2:  the second xmlChar *
288
 * @len:  the max comparison length
289
 *
290
 * a strncasecmp for xmlChar's
291
 *
292
 * Returns the integer result of the comparison
293
 */
294
295
int
296
4.73M
xmlStrncasecmp(const xmlChar *str1, const xmlChar *str2, int len) {
297
4.73M
    register int tmp;
298
299
4.73M
    if (len <= 0) return(0);
300
4.73M
    if (str1 == str2) return(0);
301
4.73M
    if (str1 == NULL) return(-1);
302
4.73M
    if (str2 == NULL) return(1);
303
28.3M
    do {
304
28.3M
        tmp = casemap[*str1++] - casemap[*str2];
305
28.3M
        if (tmp != 0 || --len == 0) return(tmp);
306
28.3M
    } while (*str2++ != 0);
307
0
    return 0;
308
4.73M
}
309
310
/**
311
 * xmlStrchr:
312
 * @str:  the xmlChar * array
313
 * @val:  the xmlChar to search
314
 *
315
 * a strchr for xmlChar's
316
 *
317
 * Returns the xmlChar * for the first occurrence or NULL.
318
 */
319
320
const xmlChar *
321
1.47M
xmlStrchr(const xmlChar *str, xmlChar val) {
322
1.47M
    if (str == NULL) return(NULL);
323
1.09G
    while (*str != 0) { /* non input consuming */
324
1.09G
        if (*str == val) return((xmlChar *) str);
325
1.09G
        str++;
326
1.09G
    }
327
1.17M
    return(NULL);
328
1.47M
}
329
330
/**
331
 * xmlStrstr:
332
 * @str:  the xmlChar * array (haystack)
333
 * @val:  the xmlChar to search (needle)
334
 *
335
 * a strstr for xmlChar's
336
 *
337
 * Returns the xmlChar * for the first occurrence or NULL.
338
 */
339
340
const xmlChar *
341
0
xmlStrstr(const xmlChar *str, const xmlChar *val) {
342
0
    int n;
343
344
0
    if (str == NULL) return(NULL);
345
0
    if (val == NULL) return(NULL);
346
0
    n = xmlStrlen(val);
347
348
0
    if (n == 0) return(str);
349
0
    while (*str != 0) { /* non input consuming */
350
0
        if (*str == *val) {
351
0
            if (!xmlStrncmp(str, val, n)) return((const xmlChar *) str);
352
0
        }
353
0
        str++;
354
0
    }
355
0
    return(NULL);
356
0
}
357
358
/**
359
 * xmlStrcasestr:
360
 * @str:  the xmlChar * array (haystack)
361
 * @val:  the xmlChar to search (needle)
362
 *
363
 * a case-ignoring strstr for xmlChar's
364
 *
365
 * Returns the xmlChar * for the first occurrence or NULL.
366
 */
367
368
const xmlChar *
369
4.73M
xmlStrcasestr(const xmlChar *str, const xmlChar *val) {
370
4.73M
    int n;
371
372
4.73M
    if (str == NULL) return(NULL);
373
4.73M
    if (val == NULL) return(NULL);
374
4.73M
    n = xmlStrlen(val);
375
376
4.73M
    if (n == 0) return(str);
377
4.84M
    while (*str != 0) { /* non input consuming */
378
4.82M
        if (casemap[*str] == casemap[*val])
379
4.73M
            if (!xmlStrncasecmp(str, val, n)) return(str);
380
107k
        str++;
381
107k
    }
382
13.2k
    return(NULL);
383
4.73M
}
384
385
/**
386
 * xmlStrsub:
387
 * @str:  the xmlChar * array (haystack)
388
 * @start:  the index of the first char (zero based)
389
 * @len:  the length of the substring
390
 *
391
 * Extract a substring of a given string
392
 *
393
 * Returns the xmlChar * for the first occurrence or NULL.
394
 */
395
396
xmlChar *
397
0
xmlStrsub(const xmlChar *str, int start, int len) {
398
0
    int i;
399
400
0
    if (str == NULL) return(NULL);
401
0
    if (start < 0) return(NULL);
402
0
    if (len < 0) return(NULL);
403
404
0
    for (i = 0;i < start;i++) {
405
0
        if (*str == 0) return(NULL);
406
0
        str++;
407
0
    }
408
0
    if (*str == 0) return(NULL);
409
0
    return(xmlStrndup(str, len));
410
0
}
411
412
/**
413
 * xmlStrlen:
414
 * @str:  the xmlChar * array
415
 *
416
 * length of a xmlChar's string
417
 *
418
 * Returns the number of xmlChar contained in the ARRAY.
419
 */
420
421
int
422
38.1M
xmlStrlen(const xmlChar *str) {
423
38.1M
    int len = 0;
424
425
38.1M
    if (str == NULL) return(0);
426
46.3G
    while (*str != 0) { /* non input consuming */
427
46.3G
        str++;
428
46.3G
        len++;
429
46.3G
    }
430
38.1M
    return(len);
431
38.1M
}
432
433
/**
434
 * xmlStrncat:
435
 * @cur:  the original xmlChar * array
436
 * @add:  the xmlChar * array added
437
 * @len:  the length of @add
438
 *
439
 * a strncat for array of xmlChar's, it will extend @cur with the len
440
 * first bytes of @add. Note that if @len < 0 then this is an API error
441
 * and NULL will be returned.
442
 *
443
 * Returns a new xmlChar *, the original @cur is reallocated and should
444
 * not be freed.
445
 */
446
447
xmlChar *
448
219k
xmlStrncat(xmlChar *cur, const xmlChar *add, int len) {
449
219k
    int size;
450
219k
    xmlChar *ret;
451
452
219k
    if ((add == NULL) || (len == 0))
453
96.9k
        return(cur);
454
122k
    if (len < 0)
455
0
  return(NULL);
456
122k
    if (cur == NULL)
457
0
        return(xmlStrndup(add, len));
458
459
122k
    size = xmlStrlen(cur);
460
122k
    if (size < 0)
461
0
        return(NULL);
462
122k
    ret = (xmlChar *) xmlRealloc(cur, (size + len + 1) * sizeof(xmlChar));
463
122k
    if (ret == NULL) {
464
0
        xmlErrMemory(NULL, NULL);
465
0
        return(cur);
466
0
    }
467
122k
    memcpy(&ret[size], add, len * sizeof(xmlChar));
468
122k
    ret[size + len] = 0;
469
122k
    return(ret);
470
122k
}
471
472
/**
473
 * xmlStrncatNew:
474
 * @str1:  first xmlChar string
475
 * @str2:  second xmlChar string
476
 * @len:  the len of @str2 or < 0
477
 *
478
 * same as xmlStrncat, but creates a new string.  The original
479
 * two strings are not freed. If @len is < 0 then the length
480
 * will be calculated automatically.
481
 *
482
 * Returns a new xmlChar * or NULL
483
 */
484
xmlChar *
485
0
xmlStrncatNew(const xmlChar *str1, const xmlChar *str2, int len) {
486
0
    int size;
487
0
    xmlChar *ret;
488
489
0
    if (len < 0) {
490
0
        len = xmlStrlen(str2);
491
0
        if (len < 0)
492
0
            return(NULL);
493
0
    }
494
0
    if ((str2 == NULL) || (len == 0))
495
0
        return(xmlStrdup(str1));
496
0
    if (str1 == NULL)
497
0
        return(xmlStrndup(str2, len));
498
499
0
    size = xmlStrlen(str1);
500
0
    if (size < 0)
501
0
        return(NULL);
502
0
    ret = (xmlChar *) xmlMalloc((size + len + 1) * sizeof(xmlChar));
503
0
    if (ret == NULL) {
504
0
        xmlErrMemory(NULL, NULL);
505
0
        return(xmlStrndup(str1, size));
506
0
    }
507
0
    memcpy(ret, str1, size * sizeof(xmlChar));
508
0
    memcpy(&ret[size], str2, len * sizeof(xmlChar));
509
0
    ret[size + len] = 0;
510
0
    return(ret);
511
0
}
512
513
/**
514
 * xmlStrcat:
515
 * @cur:  the original xmlChar * array
516
 * @add:  the xmlChar * array added
517
 *
518
 * a strcat for array of xmlChar's. Since they are supposed to be
519
 * encoded in UTF-8 or an encoding with 8bit based chars, we assume
520
 * a termination mark of '0'.
521
 *
522
 * Returns a new xmlChar * containing the concatenated string. The original
523
 * @cur is reallocated and should not be freed.
524
 */
525
xmlChar *
526
46.7k
xmlStrcat(xmlChar *cur, const xmlChar *add) {
527
46.7k
    const xmlChar *p = add;
528
529
46.7k
    if (add == NULL) return(cur);
530
46.6k
    if (cur == NULL)
531
27.3k
        return(xmlStrdup(add));
532
533
2.68G
    while (*p != 0) p++; /* non input consuming */
534
19.2k
    return(xmlStrncat(cur, add, p - add));
535
46.6k
}
536
537
/**
538
 * xmlStrPrintf:
539
 * @buf:   the result buffer.
540
 * @len:   the result buffer length.
541
 * @msg:   the message with printf formatting.
542
 * @...:   extra parameters for the message.
543
 *
544
 * Formats @msg and places result into @buf.
545
 *
546
 * Returns the number of characters written to @buf or -1 if an error occurs.
547
 */
548
int XMLCDECL
549
0
xmlStrPrintf(xmlChar *buf, int len, const char *msg, ...) {
550
0
    va_list args;
551
0
    int ret;
552
553
0
    if((buf == NULL) || (msg == NULL)) {
554
0
        return(-1);
555
0
    }
556
557
0
    va_start(args, msg);
558
0
    ret = vsnprintf((char *) buf, len, (const char *) msg, args);
559
0
    va_end(args);
560
0
    buf[len - 1] = 0; /* be safe ! */
561
562
0
    return(ret);
563
0
}
564
565
/**
566
 * xmlStrVPrintf:
567
 * @buf:   the result buffer.
568
 * @len:   the result buffer length.
569
 * @msg:   the message with printf formatting.
570
 * @ap:    extra parameters for the message.
571
 *
572
 * Formats @msg and places result into @buf.
573
 *
574
 * Returns the number of characters written to @buf or -1 if an error occurs.
575
 */
576
int
577
0
xmlStrVPrintf(xmlChar *buf, int len, const char *msg, va_list ap) {
578
0
    int ret;
579
580
0
    if((buf == NULL) || (msg == NULL)) {
581
0
        return(-1);
582
0
    }
583
584
0
    ret = vsnprintf((char *) buf, len, (const char *) msg, ap);
585
0
    buf[len - 1] = 0; /* be safe ! */
586
587
0
    return(ret);
588
0
}
589
590
/************************************************************************
591
 *                                                                      *
592
 *              Generic UTF8 handling routines                          *
593
 *                                                                      *
594
 * From rfc2044: encoding of the Unicode values on UTF-8:               *
595
 *                                                                      *
596
 * UCS-4 range (hex.)           UTF-8 octet sequence (binary)           *
597
 * 0000 0000-0000 007F   0xxxxxxx                                       *
598
 * 0000 0080-0000 07FF   110xxxxx 10xxxxxx                              *
599
 * 0000 0800-0000 FFFF   1110xxxx 10xxxxxx 10xxxxxx                     *
600
 *                                                                      *
601
 * I hope we won't use values > 0xFFFF anytime soon !                   *
602
 *                                                                      *
603
 ************************************************************************/
604
605
606
/**
607
 * xmlUTF8Size:
608
 * @utf: pointer to the UTF8 character
609
 *
610
 * calculates the internal size of a UTF8 character
611
 *
612
 * returns the numbers of bytes in the character, -1 on format error
613
 */
614
int
615
0
xmlUTF8Size(const xmlChar *utf) {
616
0
    xmlChar mask;
617
0
    int len;
618
619
0
    if (utf == NULL)
620
0
        return -1;
621
0
    if (*utf < 0x80)
622
0
        return 1;
623
    /* check valid UTF8 character */
624
0
    if (!(*utf & 0x40))
625
0
        return -1;
626
    /* determine number of bytes in char */
627
0
    len = 2;
628
0
    for (mask=0x20; mask != 0; mask>>=1) {
629
0
        if (!(*utf & mask))
630
0
            return len;
631
0
        len++;
632
0
    }
633
0
    return -1;
634
0
}
635
636
/**
637
 * xmlUTF8Charcmp:
638
 * @utf1: pointer to first UTF8 char
639
 * @utf2: pointer to second UTF8 char
640
 *
641
 * compares the two UCS4 values
642
 *
643
 * returns result of the compare as with xmlStrncmp
644
 */
645
int
646
0
xmlUTF8Charcmp(const xmlChar *utf1, const xmlChar *utf2) {
647
648
0
    if (utf1 == NULL ) {
649
0
        if (utf2 == NULL)
650
0
            return 0;
651
0
        return -1;
652
0
    }
653
0
    return xmlStrncmp(utf1, utf2, xmlUTF8Size(utf1));
654
0
}
655
656
/**
657
 * xmlUTF8Strlen:
658
 * @utf:  a sequence of UTF-8 encoded bytes
659
 *
660
 * compute the length of an UTF8 string, it doesn't do a full UTF8
661
 * checking of the content of the string.
662
 *
663
 * Returns the number of characters in the string or -1 in case of error
664
 */
665
int
666
0
xmlUTF8Strlen(const xmlChar *utf) {
667
0
    int ret = 0;
668
669
0
    if (utf == NULL)
670
0
        return(-1);
671
672
0
    while (*utf != 0) {
673
0
        if (utf[0] & 0x80) {
674
0
            if ((utf[1] & 0xc0) != 0x80)
675
0
                return(-1);
676
0
            if ((utf[0] & 0xe0) == 0xe0) {
677
0
                if ((utf[2] & 0xc0) != 0x80)
678
0
                    return(-1);
679
0
                if ((utf[0] & 0xf0) == 0xf0) {
680
0
                    if ((utf[0] & 0xf8) != 0xf0 || (utf[3] & 0xc0) != 0x80)
681
0
                        return(-1);
682
0
                    utf += 4;
683
0
                } else {
684
0
                    utf += 3;
685
0
                }
686
0
            } else {
687
0
                utf += 2;
688
0
            }
689
0
        } else {
690
0
            utf++;
691
0
        }
692
0
        ret++;
693
0
    }
694
0
    return(ret);
695
0
}
696
697
/**
698
 * xmlGetUTF8Char:
699
 * @utf:  a sequence of UTF-8 encoded bytes
700
 * @len:  a pointer to the minimum number of bytes present in
701
 *        the sequence.  This is used to assure the next character
702
 *        is completely contained within the sequence.
703
 *
704
 * Read the first UTF8 character from @utf
705
 *
706
 * Returns the char value or -1 in case of error, and sets *len to
707
 *        the actual number of bytes consumed (0 in case of error)
708
 */
709
int
710
0
xmlGetUTF8Char(const unsigned char *utf, int *len) {
711
0
    unsigned int c;
712
713
0
    if (utf == NULL)
714
0
        goto error;
715
0
    if (len == NULL)
716
0
        goto error;
717
0
    if (*len < 1)
718
0
        goto error;
719
720
0
    c = utf[0];
721
0
    if (c & 0x80) {
722
0
        if (*len < 2)
723
0
            goto error;
724
0
        if ((utf[1] & 0xc0) != 0x80)
725
0
            goto error;
726
0
        if ((c & 0xe0) == 0xe0) {
727
0
            if (*len < 3)
728
0
                goto error;
729
0
            if ((utf[2] & 0xc0) != 0x80)
730
0
                goto error;
731
0
            if ((c & 0xf0) == 0xf0) {
732
0
                if (*len < 4)
733
0
                    goto error;
734
0
                if ((c & 0xf8) != 0xf0 || (utf[3] & 0xc0) != 0x80)
735
0
                    goto error;
736
0
                *len = 4;
737
                /* 4-byte code */
738
0
                c = (utf[0] & 0x7) << 18;
739
0
                c |= (utf[1] & 0x3f) << 12;
740
0
                c |= (utf[2] & 0x3f) << 6;
741
0
                c |= utf[3] & 0x3f;
742
0
            } else {
743
              /* 3-byte code */
744
0
                *len = 3;
745
0
                c = (utf[0] & 0xf) << 12;
746
0
                c |= (utf[1] & 0x3f) << 6;
747
0
                c |= utf[2] & 0x3f;
748
0
            }
749
0
        } else {
750
          /* 2-byte code */
751
0
            *len = 2;
752
0
            c = (utf[0] & 0x1f) << 6;
753
0
            c |= utf[1] & 0x3f;
754
0
        }
755
0
    } else {
756
        /* 1-byte code */
757
0
        *len = 1;
758
0
    }
759
0
    return(c);
760
761
0
error:
762
0
    if (len != NULL)
763
0
  *len = 0;
764
0
    return(-1);
765
0
}
766
767
/**
768
 * xmlCheckUTF8:
769
 * @utf: Pointer to putative UTF-8 encoded string.
770
 *
771
 * Checks @utf for being valid UTF-8. @utf is assumed to be
772
 * null-terminated. This function is not super-strict, as it will
773
 * allow longer UTF-8 sequences than necessary. Note that Java is
774
 * capable of producing these sequences if provoked. Also note, this
775
 * routine checks for the 4-byte maximum size, but does not check for
776
 * 0x10ffff maximum value.
777
 *
778
 * Return value: true if @utf is valid.
779
 **/
780
int
781
xmlCheckUTF8(const unsigned char *utf)
782
0
{
783
0
    int ix;
784
0
    unsigned char c;
785
786
0
    if (utf == NULL)
787
0
        return(0);
788
    /*
789
     * utf is a string of 1, 2, 3 or 4 bytes.  The valid strings
790
     * are as follows (in "bit format"):
791
     *    0xxxxxxx                                      valid 1-byte
792
     *    110xxxxx 10xxxxxx                             valid 2-byte
793
     *    1110xxxx 10xxxxxx 10xxxxxx                    valid 3-byte
794
     *    11110xxx 10xxxxxx 10xxxxxx 10xxxxxx           valid 4-byte
795
     */
796
0
    for (ix = 0; (c = utf[ix]);) {      /* string is 0-terminated */
797
0
        if ((c & 0x80) == 0x00) { /* 1-byte code, starts with 10 */
798
0
            ix++;
799
0
  } else if ((c & 0xe0) == 0xc0) {/* 2-byte code, starts with 110 */
800
0
      if ((utf[ix+1] & 0xc0 ) != 0x80)
801
0
          return 0;
802
0
      ix += 2;
803
0
  } else if ((c & 0xf0) == 0xe0) {/* 3-byte code, starts with 1110 */
804
0
      if (((utf[ix+1] & 0xc0) != 0x80) ||
805
0
          ((utf[ix+2] & 0xc0) != 0x80))
806
0
        return 0;
807
0
      ix += 3;
808
0
  } else if ((c & 0xf8) == 0xf0) {/* 4-byte code, starts with 11110 */
809
0
      if (((utf[ix+1] & 0xc0) != 0x80) ||
810
0
          ((utf[ix+2] & 0xc0) != 0x80) ||
811
0
    ((utf[ix+3] & 0xc0) != 0x80))
812
0
        return 0;
813
0
      ix += 4;
814
0
  } else       /* unknown encoding */
815
0
      return 0;
816
0
      }
817
0
      return(1);
818
0
}
819
820
/**
821
 * xmlUTF8Strsize:
822
 * @utf:  a sequence of UTF-8 encoded bytes
823
 * @len:  the number of characters in the array
824
 *
825
 * storage size of an UTF8 string
826
 * the behaviour is not guaranteed if the input string is not UTF-8
827
 *
828
 * Returns the storage size of
829
 * the first 'len' characters of ARRAY
830
 */
831
832
int
833
0
xmlUTF8Strsize(const xmlChar *utf, int len) {
834
0
    const xmlChar   *ptr=utf;
835
0
    xmlChar         ch;
836
837
0
    if (utf == NULL)
838
0
        return(0);
839
840
0
    if (len <= 0)
841
0
        return(0);
842
843
0
    while ( len-- > 0) {
844
0
        if ( !*ptr )
845
0
            break;
846
0
        if ( (ch = *ptr++) & 0x80)
847
0
            while ((ch<<=1) & 0x80 ) {
848
0
    if (*ptr == 0) break;
849
0
                ptr++;
850
0
      }
851
0
    }
852
0
    return (ptr - utf);
853
0
}
854
855
856
/**
857
 * xmlUTF8Strndup:
858
 * @utf:  the input UTF8 *
859
 * @len:  the len of @utf (in chars)
860
 *
861
 * a strndup for array of UTF8's
862
 *
863
 * Returns a new UTF8 * or NULL
864
 */
865
xmlChar *
866
0
xmlUTF8Strndup(const xmlChar *utf, int len) {
867
0
    xmlChar *ret;
868
0
    int i;
869
870
0
    if ((utf == NULL) || (len < 0)) return(NULL);
871
0
    i = xmlUTF8Strsize(utf, len);
872
0
    ret = (xmlChar *) xmlMallocAtomic((i + 1) * sizeof(xmlChar));
873
0
    if (ret == NULL) {
874
0
        xmlGenericError(xmlGenericErrorContext,
875
0
                "malloc of %ld byte failed\n",
876
0
                (len + 1) * (long)sizeof(xmlChar));
877
0
        return(NULL);
878
0
    }
879
0
    memcpy(ret, utf, i * sizeof(xmlChar));
880
0
    ret[i] = 0;
881
0
    return(ret);
882
0
}
883
884
/**
885
 * xmlUTF8Strpos:
886
 * @utf:  the input UTF8 *
887
 * @pos:  the position of the desired UTF8 char (in chars)
888
 *
889
 * a function to provide the equivalent of fetching a
890
 * character from a string array
891
 *
892
 * Returns a pointer to the UTF8 character or NULL
893
 */
894
const xmlChar *
895
0
xmlUTF8Strpos(const xmlChar *utf, int pos) {
896
0
    xmlChar ch;
897
898
0
    if (utf == NULL) return(NULL);
899
0
    if (pos < 0)
900
0
        return(NULL);
901
0
    while (pos--) {
902
0
        if ((ch=*utf++) == 0) return(NULL);
903
0
        if ( ch & 0x80 ) {
904
            /* if not simple ascii, verify proper format */
905
0
            if ( (ch & 0xc0) != 0xc0 )
906
0
                return(NULL);
907
            /* then skip over remaining bytes for this char */
908
0
            while ( (ch <<= 1) & 0x80 )
909
0
                if ( (*utf++ & 0xc0) != 0x80 )
910
0
                    return(NULL);
911
0
        }
912
0
    }
913
0
    return((xmlChar *)utf);
914
0
}
915
916
/**
917
 * xmlUTF8Strloc:
918
 * @utf:  the input UTF8 *
919
 * @utfchar:  the UTF8 character to be found
920
 *
921
 * a function to provide the relative location of a UTF8 char
922
 *
923
 * Returns the relative character position of the desired char
924
 * or -1 if not found
925
 */
926
int
927
0
xmlUTF8Strloc(const xmlChar *utf, const xmlChar *utfchar) {
928
0
    int i, size;
929
0
    xmlChar ch;
930
931
0
    if (utf==NULL || utfchar==NULL) return -1;
932
0
    size = xmlUTF8Strsize(utfchar, 1);
933
0
        for(i=0; (ch=*utf) != 0; i++) {
934
0
            if (xmlStrncmp(utf, utfchar, size)==0)
935
0
                return(i);
936
0
            utf++;
937
0
            if ( ch & 0x80 ) {
938
                /* if not simple ascii, verify proper format */
939
0
                if ( (ch & 0xc0) != 0xc0 )
940
0
                    return(-1);
941
                /* then skip over remaining bytes for this char */
942
0
                while ( (ch <<= 1) & 0x80 )
943
0
                    if ( (*utf++ & 0xc0) != 0x80 )
944
0
                        return(-1);
945
0
            }
946
0
        }
947
948
0
    return(-1);
949
0
}
950
/**
951
 * xmlUTF8Strsub:
952
 * @utf:  a sequence of UTF-8 encoded bytes
953
 * @start: relative pos of first char
954
 * @len:   total number to copy
955
 *
956
 * Create a substring from a given UTF-8 string
957
 * Note:  positions are given in units of UTF-8 chars
958
 *
959
 * Returns a pointer to a newly created string
960
 * or NULL if any problem
961
 */
962
963
xmlChar *
964
0
xmlUTF8Strsub(const xmlChar *utf, int start, int len) {
965
0
    int            i;
966
0
    xmlChar ch;
967
968
0
    if (utf == NULL) return(NULL);
969
0
    if (start < 0) return(NULL);
970
0
    if (len < 0) return(NULL);
971
972
    /*
973
     * Skip over any leading chars
974
     */
975
0
    for (i = 0;i < start;i++) {
976
0
        if ((ch=*utf++) == 0) return(NULL);
977
0
        if ( ch & 0x80 ) {
978
            /* if not simple ascii, verify proper format */
979
0
            if ( (ch & 0xc0) != 0xc0 )
980
0
                return(NULL);
981
            /* then skip over remaining bytes for this char */
982
0
            while ( (ch <<= 1) & 0x80 )
983
0
                if ( (*utf++ & 0xc0) != 0x80 )
984
0
                    return(NULL);
985
0
        }
986
0
    }
987
988
0
    return(xmlUTF8Strndup(utf, len));
989
0
}
990
991
/**
992
 * xmlEscapeFormatString:
993
 * @msg:  a pointer to the string in which to escape '%' characters.
994
 * Must be a heap-allocated buffer created by libxml2 that may be
995
 * returned, or that may be freed and replaced.
996
 *
997
 * Replaces the string pointed to by 'msg' with an escaped string.
998
 * Returns the same string with all '%' characters escaped.
999
 */
1000
xmlChar *
1001
xmlEscapeFormatString(xmlChar **msg)
1002
0
{
1003
0
    xmlChar *msgPtr = NULL;
1004
0
    xmlChar *result = NULL;
1005
0
    xmlChar *resultPtr = NULL;
1006
0
    size_t count = 0;
1007
0
    size_t msgLen = 0;
1008
0
    size_t resultLen = 0;
1009
1010
0
    if (!msg || !*msg)
1011
0
        return(NULL);
1012
1013
0
    for (msgPtr = *msg; *msgPtr != '\0'; ++msgPtr) {
1014
0
        ++msgLen;
1015
0
        if (*msgPtr == '%')
1016
0
            ++count;
1017
0
    }
1018
1019
0
    if (count == 0)
1020
0
        return(*msg);
1021
1022
0
    resultLen = msgLen + count + 1;
1023
0
    result = (xmlChar *) xmlMallocAtomic(resultLen * sizeof(xmlChar));
1024
0
    if (result == NULL) {
1025
        /* Clear *msg to prevent format string vulnerabilities in
1026
           out-of-memory situations. */
1027
0
        xmlFree(*msg);
1028
0
        *msg = NULL;
1029
0
        xmlErrMemory(NULL, NULL);
1030
0
        return(NULL);
1031
0
    }
1032
1033
0
    for (msgPtr = *msg, resultPtr = result; *msgPtr != '\0'; ++msgPtr, ++resultPtr) {
1034
0
        *resultPtr = *msgPtr;
1035
0
        if (*msgPtr == '%')
1036
0
            *(++resultPtr) = '%';
1037
0
    }
1038
0
    result[resultLen - 1] = '\0';
1039
1040
0
    xmlFree(*msg);
1041
0
    *msg = result;
1042
1043
0
    return *msg;
1044
0
}
1045
1046
#define bottom_xmlstring
1047
#include "elfgcchack.h"