Coverage Report

Created: 2025-08-04 07:15

/src/libxml2-2.9.7/xmlstring.c
Line
Count
Source (jump to first uncovered line)
1
/*
2
 * string.c : an XML string utilities module
3
 *
4
 * This module provides various utility functions for manipulating
5
 * the xmlChar* type. All functions named xmlStr* have been moved here
6
 * from the parser.c file (their original home).
7
 *
8
 * See Copyright for the status of this software.
9
 *
10
 * UTF8 string routines from:
11
 * William Brack <wbrack@mmm.com.hk>
12
 *
13
 * daniel@veillard.com
14
 */
15
16
#define IN_LIBXML
17
#include "libxml.h"
18
19
#include <stdlib.h>
20
#include <string.h>
21
#include <libxml/xmlmemory.h>
22
#include <libxml/parserInternals.h>
23
#include <libxml/xmlstring.h>
24
25
/************************************************************************
26
 *                                                                      *
27
 *                Commodity functions to handle xmlChars                *
28
 *                                                                      *
29
 ************************************************************************/
30
31
/**
32
 * xmlStrndup:
33
 * @cur:  the input xmlChar *
34
 * @len:  the len of @cur
35
 *
36
 * a strndup for array of xmlChar's
37
 *
38
 * Returns a new xmlChar * or NULL
39
 */
40
xmlChar *
41
112
xmlStrndup(const xmlChar *cur, int len) {
42
112
    xmlChar *ret;
43
44
112
    if ((cur == NULL) || (len < 0)) return(NULL);
45
112
    ret = (xmlChar *) xmlMallocAtomic((len + 1) * sizeof(xmlChar));
46
112
    if (ret == NULL) {
47
0
        xmlErrMemory(NULL, NULL);
48
0
        return(NULL);
49
0
    }
50
112
    memcpy(ret, cur, len * sizeof(xmlChar));
51
112
    ret[len] = 0;
52
112
    return(ret);
53
112
}
54
55
/**
56
 * xmlStrdup:
57
 * @cur:  the input xmlChar *
58
 *
59
 * a strdup for array of xmlChar's. Since they are supposed to be
60
 * encoded in UTF-8 or an encoding with 8bit based chars, we assume
61
 * a termination mark of '0'.
62
 *
63
 * Returns a new xmlChar * or NULL
64
 */
65
xmlChar *
66
112
xmlStrdup(const xmlChar *cur) {
67
112
    const xmlChar *p = cur;
68
69
112
    if (cur == NULL) return(NULL);
70
868
    while (*p != 0) p++; /* non input consuming */
71
112
    return(xmlStrndup(cur, p - cur));
72
112
}
73
74
/**
75
 * xmlCharStrndup:
76
 * @cur:  the input char *
77
 * @len:  the len of @cur
78
 *
79
 * a strndup for char's to xmlChar's
80
 *
81
 * Returns a new xmlChar * or NULL
82
 */
83
84
xmlChar *
85
0
xmlCharStrndup(const char *cur, int len) {
86
0
    int i;
87
0
    xmlChar *ret;
88
89
0
    if ((cur == NULL) || (len < 0)) return(NULL);
90
0
    ret = (xmlChar *) xmlMallocAtomic((len + 1) * sizeof(xmlChar));
91
0
    if (ret == NULL) {
92
0
        xmlErrMemory(NULL, NULL);
93
0
        return(NULL);
94
0
    }
95
0
    for (i = 0;i < len;i++) {
96
0
        ret[i] = (xmlChar) cur[i];
97
0
        if (ret[i] == 0) return(ret);
98
0
    }
99
0
    ret[len] = 0;
100
0
    return(ret);
101
0
}
102
103
/**
104
 * xmlCharStrdup:
105
 * @cur:  the input char *
106
 *
107
 * a strdup for char's to xmlChar's
108
 *
109
 * Returns a new xmlChar * or NULL
110
 */
111
112
xmlChar *
113
0
xmlCharStrdup(const char *cur) {
114
0
    const char *p = cur;
115
116
0
    if (cur == NULL) return(NULL);
117
0
    while (*p != '\0') p++; /* non input consuming */
118
0
    return(xmlCharStrndup(cur, p - cur));
119
0
}
120
121
/**
122
 * xmlStrcmp:
123
 * @str1:  the first xmlChar *
124
 * @str2:  the second xmlChar *
125
 *
126
 * a strcmp for xmlChar's
127
 *
128
 * Returns the integer result of the comparison
129
 */
130
131
int
132
0
xmlStrcmp(const xmlChar *str1, const xmlChar *str2) {
133
0
    register int tmp;
134
135
0
    if (str1 == str2) return(0);
136
0
    if (str1 == NULL) return(-1);
137
0
    if (str2 == NULL) return(1);
138
0
    do {
139
0
        tmp = *str1++ - *str2;
140
0
        if (tmp != 0) return(tmp);
141
0
    } while (*str2++ != 0);
142
0
    return 0;
143
0
}
144
145
/**
146
 * xmlStrEqual:
147
 * @str1:  the first xmlChar *
148
 * @str2:  the second xmlChar *
149
 *
150
 * Check if both strings are equal of have same content.
151
 * Should be a bit more readable and faster than xmlStrcmp()
152
 *
153
 * Returns 1 if they are equal, 0 if they are different
154
 */
155
156
int
157
0
xmlStrEqual(const xmlChar *str1, const xmlChar *str2) {
158
0
    if (str1 == str2) return(1);
159
0
    if (str1 == NULL) return(0);
160
0
    if (str2 == NULL) return(0);
161
0
    do {
162
0
        if (*str1++ != *str2) return(0);
163
0
    } while (*str2++);
164
0
    return(1);
165
0
}
166
167
/**
168
 * xmlStrQEqual:
169
 * @pref:  the prefix of the QName
170
 * @name:  the localname of the QName
171
 * @str:  the second xmlChar *
172
 *
173
 * Check if a QName is Equal to a given string
174
 *
175
 * Returns 1 if they are equal, 0 if they are different
176
 */
177
178
int
179
0
xmlStrQEqual(const xmlChar *pref, const xmlChar *name, const xmlChar *str) {
180
0
    if (pref == NULL) return(xmlStrEqual(name, str));
181
0
    if (name == NULL) return(0);
182
0
    if (str == NULL) return(0);
183
184
0
    do {
185
0
        if (*pref++ != *str) return(0);
186
0
    } while ((*str++) && (*pref));
187
0
    if (*str++ != ':') return(0);
188
0
    do {
189
0
        if (*name++ != *str) return(0);
190
0
    } while (*str++);
191
0
    return(1);
192
0
}
193
194
/**
195
 * xmlStrncmp:
196
 * @str1:  the first xmlChar *
197
 * @str2:  the second xmlChar *
198
 * @len:  the max comparison length
199
 *
200
 * a strncmp for xmlChar's
201
 *
202
 * Returns the integer result of the comparison
203
 */
204
205
int
206
0
xmlStrncmp(const xmlChar *str1, const xmlChar *str2, int len) {
207
0
    register int tmp;
208
209
0
    if (len <= 0) return(0);
210
0
    if (str1 == str2) return(0);
211
0
    if (str1 == NULL) return(-1);
212
0
    if (str2 == NULL) return(1);
213
0
#ifdef __GNUC__
214
0
    tmp = strncmp((const char *)str1, (const char *)str2, len);
215
0
    return tmp;
216
#else
217
    do {
218
        tmp = *str1++ - *str2;
219
        if (tmp != 0 || --len == 0) return(tmp);
220
    } while (*str2++ != 0);
221
    return 0;
222
#endif
223
0
}
224
225
static const xmlChar casemap[256] = {
226
    0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,
227
    0x08,0x09,0x0A,0x0B,0x0C,0x0D,0x0E,0x0F,
228
    0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,
229
    0x18,0x19,0x1A,0x1B,0x1C,0x1D,0x1E,0x1F,
230
    0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,
231
    0x28,0x29,0x2A,0x2B,0x2C,0x2D,0x2E,0x2F,
232
    0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,
233
    0x38,0x39,0x3A,0x3B,0x3C,0x3D,0x3E,0x3F,
234
    0x40,0x61,0x62,0x63,0x64,0x65,0x66,0x67,
235
    0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,
236
    0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,
237
    0x78,0x79,0x7A,0x7B,0x5C,0x5D,0x5E,0x5F,
238
    0x60,0x61,0x62,0x63,0x64,0x65,0x66,0x67,
239
    0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,
240
    0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,
241
    0x78,0x79,0x7A,0x7B,0x7C,0x7D,0x7E,0x7F,
242
    0x80,0x81,0x82,0x83,0x84,0x85,0x86,0x87,
243
    0x88,0x89,0x8A,0x8B,0x8C,0x8D,0x8E,0x8F,
244
    0x90,0x91,0x92,0x93,0x94,0x95,0x96,0x97,
245
    0x98,0x99,0x9A,0x9B,0x9C,0x9D,0x9E,0x9F,
246
    0xA0,0xA1,0xA2,0xA3,0xA4,0xA5,0xA6,0xA7,
247
    0xA8,0xA9,0xAA,0xAB,0xAC,0xAD,0xAE,0xAF,
248
    0xB0,0xB1,0xB2,0xB3,0xB4,0xB5,0xB6,0xB7,
249
    0xB8,0xB9,0xBA,0xBB,0xBC,0xBD,0xBE,0xBF,
250
    0xC0,0xC1,0xC2,0xC3,0xC4,0xC5,0xC6,0xC7,
251
    0xC8,0xC9,0xCA,0xCB,0xCC,0xCD,0xCE,0xCF,
252
    0xD0,0xD1,0xD2,0xD3,0xD4,0xD5,0xD6,0xD7,
253
    0xD8,0xD9,0xDA,0xDB,0xDC,0xDD,0xDE,0xDF,
254
    0xE0,0xE1,0xE2,0xE3,0xE4,0xE5,0xE6,0xE7,
255
    0xE8,0xE9,0xEA,0xEB,0xEC,0xED,0xEE,0xEF,
256
    0xF0,0xF1,0xF2,0xF3,0xF4,0xF5,0xF6,0xF7,
257
    0xF8,0xF9,0xFA,0xFB,0xFC,0xFD,0xFE,0xFF
258
};
259
260
/**
261
 * xmlStrcasecmp:
262
 * @str1:  the first xmlChar *
263
 * @str2:  the second xmlChar *
264
 *
265
 * a strcasecmp for xmlChar's
266
 *
267
 * Returns the integer result of the comparison
268
 */
269
270
int
271
0
xmlStrcasecmp(const xmlChar *str1, const xmlChar *str2) {
272
0
    register int tmp;
273
274
0
    if (str1 == str2) return(0);
275
0
    if (str1 == NULL) return(-1);
276
0
    if (str2 == NULL) return(1);
277
0
    do {
278
0
        tmp = casemap[*str1++] - casemap[*str2];
279
0
        if (tmp != 0) return(tmp);
280
0
    } while (*str2++ != 0);
281
0
    return 0;
282
0
}
283
284
/**
285
 * xmlStrncasecmp:
286
 * @str1:  the first xmlChar *
287
 * @str2:  the second xmlChar *
288
 * @len:  the max comparison length
289
 *
290
 * a strncasecmp for xmlChar's
291
 *
292
 * Returns the integer result of the comparison
293
 */
294
295
int
296
0
xmlStrncasecmp(const xmlChar *str1, const xmlChar *str2, int len) {
297
0
    register int tmp;
298
299
0
    if (len <= 0) return(0);
300
0
    if (str1 == str2) return(0);
301
0
    if (str1 == NULL) return(-1);
302
0
    if (str2 == NULL) return(1);
303
0
    do {
304
0
        tmp = casemap[*str1++] - casemap[*str2];
305
0
        if (tmp != 0 || --len == 0) return(tmp);
306
0
    } while (*str2++ != 0);
307
0
    return 0;
308
0
}
309
310
/**
311
 * xmlStrchr:
312
 * @str:  the xmlChar * array
313
 * @val:  the xmlChar to search
314
 *
315
 * a strchr for xmlChar's
316
 *
317
 * Returns the xmlChar * for the first occurrence or NULL.
318
 */
319
320
const xmlChar *
321
0
xmlStrchr(const xmlChar *str, xmlChar val) {
322
0
    if (str == NULL) return(NULL);
323
0
    while (*str != 0) { /* non input consuming */
324
0
        if (*str == val) return((xmlChar *) str);
325
0
        str++;
326
0
    }
327
0
    return(NULL);
328
0
}
329
330
/**
331
 * xmlStrstr:
332
 * @str:  the xmlChar * array (haystack)
333
 * @val:  the xmlChar to search (needle)
334
 *
335
 * a strstr for xmlChar's
336
 *
337
 * Returns the xmlChar * for the first occurrence or NULL.
338
 */
339
340
const xmlChar *
341
0
xmlStrstr(const xmlChar *str, const xmlChar *val) {
342
0
    int n;
343
344
0
    if (str == NULL) return(NULL);
345
0
    if (val == NULL) return(NULL);
346
0
    n = xmlStrlen(val);
347
348
0
    if (n == 0) return(str);
349
0
    while (*str != 0) { /* non input consuming */
350
0
        if (*str == *val) {
351
0
            if (!xmlStrncmp(str, val, n)) return((const xmlChar *) str);
352
0
        }
353
0
        str++;
354
0
    }
355
0
    return(NULL);
356
0
}
357
358
/**
359
 * xmlStrcasestr:
360
 * @str:  the xmlChar * array (haystack)
361
 * @val:  the xmlChar to search (needle)
362
 *
363
 * a case-ignoring strstr for xmlChar's
364
 *
365
 * Returns the xmlChar * for the first occurrence or NULL.
366
 */
367
368
const xmlChar *
369
0
xmlStrcasestr(const xmlChar *str, const xmlChar *val) {
370
0
    int n;
371
372
0
    if (str == NULL) return(NULL);
373
0
    if (val == NULL) return(NULL);
374
0
    n = xmlStrlen(val);
375
376
0
    if (n == 0) return(str);
377
0
    while (*str != 0) { /* non input consuming */
378
0
        if (casemap[*str] == casemap[*val])
379
0
            if (!xmlStrncasecmp(str, val, n)) return(str);
380
0
        str++;
381
0
    }
382
0
    return(NULL);
383
0
}
384
385
/**
386
 * xmlStrsub:
387
 * @str:  the xmlChar * array (haystack)
388
 * @start:  the index of the first char (zero based)
389
 * @len:  the length of the substring
390
 *
391
 * Extract a substring of a given string
392
 *
393
 * Returns the xmlChar * for the first occurrence or NULL.
394
 */
395
396
xmlChar *
397
0
xmlStrsub(const xmlChar *str, int start, int len) {
398
0
    int i;
399
400
0
    if (str == NULL) return(NULL);
401
0
    if (start < 0) return(NULL);
402
0
    if (len < 0) return(NULL);
403
404
0
    for (i = 0;i < start;i++) {
405
0
        if (*str == 0) return(NULL);
406
0
        str++;
407
0
    }
408
0
    if (*str == 0) return(NULL);
409
0
    return(xmlStrndup(str, len));
410
0
}
411
412
/**
413
 * xmlStrlen:
414
 * @str:  the xmlChar * array
415
 *
416
 * length of a xmlChar's string
417
 *
418
 * Returns the number of xmlChar contained in the ARRAY.
419
 */
420
421
int
422
0
xmlStrlen(const xmlChar *str) {
423
0
    int len = 0;
424
425
0
    if (str == NULL) return(0);
426
0
    while (*str != 0) { /* non input consuming */
427
0
        str++;
428
0
        len++;
429
0
    }
430
0
    return(len);
431
0
}
432
433
/**
434
 * xmlStrncat:
435
 * @cur:  the original xmlChar * array
436
 * @add:  the xmlChar * array added
437
 * @len:  the length of @add
438
 *
439
 * a strncat for array of xmlChar's, it will extend @cur with the len
440
 * first bytes of @add. Note that if @len < 0 then this is an API error
441
 * and NULL will be returned.
442
 *
443
 * Returns a new xmlChar *, the original @cur is reallocated and should
444
 * not be freed.
445
 */
446
447
xmlChar *
448
0
xmlStrncat(xmlChar *cur, const xmlChar *add, int len) {
449
0
    int size;
450
0
    xmlChar *ret;
451
452
0
    if ((add == NULL) || (len == 0))
453
0
        return(cur);
454
0
    if (len < 0)
455
0
  return(NULL);
456
0
    if (cur == NULL)
457
0
        return(xmlStrndup(add, len));
458
459
0
    size = xmlStrlen(cur);
460
0
    if (size < 0)
461
0
        return(NULL);
462
0
    ret = (xmlChar *) xmlRealloc(cur, (size + len + 1) * sizeof(xmlChar));
463
0
    if (ret == NULL) {
464
0
        xmlErrMemory(NULL, NULL);
465
0
        return(cur);
466
0
    }
467
0
    memcpy(&ret[size], add, len * sizeof(xmlChar));
468
0
    ret[size + len] = 0;
469
0
    return(ret);
470
0
}
471
472
/**
473
 * xmlStrncatNew:
474
 * @str1:  first xmlChar string
475
 * @str2:  second xmlChar string
476
 * @len:  the len of @str2 or < 0
477
 *
478
 * same as xmlStrncat, but creates a new string.  The original
479
 * two strings are not freed. If @len is < 0 then the length
480
 * will be calculated automatically.
481
 *
482
 * Returns a new xmlChar * or NULL
483
 */
484
xmlChar *
485
0
xmlStrncatNew(const xmlChar *str1, const xmlChar *str2, int len) {
486
0
    int size;
487
0
    xmlChar *ret;
488
489
0
    if (len < 0) {
490
0
        len = xmlStrlen(str2);
491
0
        if (len < 0)
492
0
            return(NULL);
493
0
    }
494
0
    if ((str2 == NULL) || (len == 0))
495
0
        return(xmlStrdup(str1));
496
0
    if (str1 == NULL)
497
0
        return(xmlStrndup(str2, len));
498
499
0
    size = xmlStrlen(str1);
500
0
    if (size < 0)
501
0
        return(NULL);
502
0
    ret = (xmlChar *) xmlMalloc((size + len + 1) * sizeof(xmlChar));
503
0
    if (ret == NULL) {
504
0
        xmlErrMemory(NULL, NULL);
505
0
        return(xmlStrndup(str1, size));
506
0
    }
507
0
    memcpy(ret, str1, size * sizeof(xmlChar));
508
0
    memcpy(&ret[size], str2, len * sizeof(xmlChar));
509
0
    ret[size + len] = 0;
510
0
    return(ret);
511
0
}
512
513
/**
514
 * xmlStrcat:
515
 * @cur:  the original xmlChar * array
516
 * @add:  the xmlChar * array added
517
 *
518
 * a strcat for array of xmlChar's. Since they are supposed to be
519
 * encoded in UTF-8 or an encoding with 8bit based chars, we assume
520
 * a termination mark of '0'.
521
 *
522
 * Returns a new xmlChar * containing the concatenated string. The original
523
 * @cur is reallocated and should not be freed.
524
 */
525
xmlChar *
526
0
xmlStrcat(xmlChar *cur, const xmlChar *add) {
527
0
    const xmlChar *p = add;
528
529
0
    if (add == NULL) return(cur);
530
0
    if (cur == NULL)
531
0
        return(xmlStrdup(add));
532
533
0
    while (*p != 0) p++; /* non input consuming */
534
0
    return(xmlStrncat(cur, add, p - add));
535
0
}
536
537
/**
538
 * xmlStrPrintf:
539
 * @buf:   the result buffer.
540
 * @len:   the result buffer length.
541
 * @msg:   the message with printf formatting.
542
 * @...:   extra parameters for the message.
543
 *
544
 * Formats @msg and places result into @buf.
545
 *
546
 * Returns the number of characters written to @buf or -1 if an error occurs.
547
 */
548
int XMLCDECL
549
0
xmlStrPrintf(xmlChar *buf, int len, const char *msg, ...) {
550
0
    va_list args;
551
0
    int ret;
552
553
0
    if((buf == NULL) || (msg == NULL)) {
554
0
        return(-1);
555
0
    }
556
557
0
    va_start(args, msg);
558
0
    ret = vsnprintf((char *) buf, len, (const char *) msg, args);
559
0
    va_end(args);
560
0
    buf[len - 1] = 0; /* be safe ! */
561
562
0
    return(ret);
563
0
}
564
565
/**
566
 * xmlStrVPrintf:
567
 * @buf:   the result buffer.
568
 * @len:   the result buffer length.
569
 * @msg:   the message with printf formatting.
570
 * @ap:    extra parameters for the message.
571
 *
572
 * Formats @msg and places result into @buf.
573
 *
574
 * Returns the number of characters written to @buf or -1 if an error occurs.
575
 */
576
int
577
0
xmlStrVPrintf(xmlChar *buf, int len, const char *msg, va_list ap) {
578
0
    int ret;
579
580
0
    if((buf == NULL) || (msg == NULL)) {
581
0
        return(-1);
582
0
    }
583
584
0
    ret = vsnprintf((char *) buf, len, (const char *) msg, ap);
585
0
    buf[len - 1] = 0; /* be safe ! */
586
587
0
    return(ret);
588
0
}
589
590
/************************************************************************
591
 *                                                                      *
592
 *              Generic UTF8 handling routines                          *
593
 *                                                                      *
594
 * From rfc2044: encoding of the Unicode values on UTF-8:               *
595
 *                                                                      *
596
 * UCS-4 range (hex.)           UTF-8 octet sequence (binary)           *
597
 * 0000 0000-0000 007F   0xxxxxxx                                       *
598
 * 0000 0080-0000 07FF   110xxxxx 10xxxxxx                              *
599
 * 0000 0800-0000 FFFF   1110xxxx 10xxxxxx 10xxxxxx                     *
600
 *                                                                      *
601
 * I hope we won't use values > 0xFFFF anytime soon !                   *
602
 *                                                                      *
603
 ************************************************************************/
604
605
606
/**
607
 * xmlUTF8Size:
608
 * @utf: pointer to the UTF8 character
609
 *
610
 * calculates the internal size of a UTF8 character
611
 *
612
 * returns the numbers of bytes in the character, -1 on format error
613
 */
614
int
615
0
xmlUTF8Size(const xmlChar *utf) {
616
0
    xmlChar mask;
617
0
    int len;
618
619
0
    if (utf == NULL)
620
0
        return -1;
621
0
    if (*utf < 0x80)
622
0
        return 1;
623
    /* check valid UTF8 character */
624
0
    if (!(*utf & 0x40))
625
0
        return -1;
626
    /* determine number of bytes in char */
627
0
    len = 2;
628
0
    for (mask=0x20; mask != 0; mask>>=1) {
629
0
        if (!(*utf & mask))
630
0
            return len;
631
0
        len++;
632
0
    }
633
0
    return -1;
634
0
}
635
636
/**
637
 * xmlUTF8Charcmp:
638
 * @utf1: pointer to first UTF8 char
639
 * @utf2: pointer to second UTF8 char
640
 *
641
 * compares the two UCS4 values
642
 *
643
 * returns result of the compare as with xmlStrncmp
644
 */
645
int
646
0
xmlUTF8Charcmp(const xmlChar *utf1, const xmlChar *utf2) {
647
648
0
    if (utf1 == NULL ) {
649
0
        if (utf2 == NULL)
650
0
            return 0;
651
0
        return -1;
652
0
    }
653
0
    return xmlStrncmp(utf1, utf2, xmlUTF8Size(utf1));
654
0
}
655
656
/**
657
 * xmlUTF8Strlen:
658
 * @utf:  a sequence of UTF-8 encoded bytes
659
 *
660
 * compute the length of an UTF8 string, it doesn't do a full UTF8
661
 * checking of the content of the string.
662
 *
663
 * Returns the number of characters in the string or -1 in case of error
664
 */
665
int
666
0
xmlUTF8Strlen(const xmlChar *utf) {
667
0
    int ret = 0;
668
669
0
    if (utf == NULL)
670
0
        return(-1);
671
672
0
    while (*utf != 0) {
673
0
        if (utf[0] & 0x80) {
674
0
            if ((utf[1] & 0xc0) != 0x80)
675
0
                return(-1);
676
0
            if ((utf[0] & 0xe0) == 0xe0) {
677
0
                if ((utf[2] & 0xc0) != 0x80)
678
0
                    return(-1);
679
0
                if ((utf[0] & 0xf0) == 0xf0) {
680
0
                    if ((utf[0] & 0xf8) != 0xf0 || (utf[3] & 0xc0) != 0x80)
681
0
                        return(-1);
682
0
                    utf += 4;
683
0
                } else {
684
0
                    utf += 3;
685
0
                }
686
0
            } else {
687
0
                utf += 2;
688
0
            }
689
0
        } else {
690
0
            utf++;
691
0
        }
692
0
        ret++;
693
0
    }
694
0
    return(ret);
695
0
}
696
697
/**
698
 * xmlGetUTF8Char:
699
 * @utf:  a sequence of UTF-8 encoded bytes
700
 * @len:  a pointer to the minimum number of bytes present in
701
 *        the sequence.  This is used to assure the next character
702
 *        is completely contained within the sequence.
703
 *
704
 * Read the first UTF8 character from @utf
705
 *
706
 * Returns the char value or -1 in case of error, and sets *len to
707
 *        the actual number of bytes consumed (0 in case of error)
708
 */
709
int
710
0
xmlGetUTF8Char(const unsigned char *utf, int *len) {
711
0
    unsigned int c;
712
713
0
    if (utf == NULL)
714
0
        goto error;
715
0
    if (len == NULL)
716
0
        goto error;
717
0
    if (*len < 1)
718
0
        goto error;
719
720
0
    c = utf[0];
721
0
    if (c & 0x80) {
722
0
        if (*len < 2)
723
0
            goto error;
724
0
        if ((utf[1] & 0xc0) != 0x80)
725
0
            goto error;
726
0
        if ((c & 0xe0) == 0xe0) {
727
0
            if (*len < 3)
728
0
                goto error;
729
0
            if ((utf[2] & 0xc0) != 0x80)
730
0
                goto error;
731
0
            if ((c & 0xf0) == 0xf0) {
732
0
                if (*len < 4)
733
0
                    goto error;
734
0
                if ((c & 0xf8) != 0xf0 || (utf[3] & 0xc0) != 0x80)
735
0
                    goto error;
736
0
                *len = 4;
737
                /* 4-byte code */
738
0
                c = (utf[0] & 0x7) << 18;
739
0
                c |= (utf[1] & 0x3f) << 12;
740
0
                c |= (utf[2] & 0x3f) << 6;
741
0
                c |= utf[3] & 0x3f;
742
0
            } else {
743
              /* 3-byte code */
744
0
                *len = 3;
745
0
                c = (utf[0] & 0xf) << 12;
746
0
                c |= (utf[1] & 0x3f) << 6;
747
0
                c |= utf[2] & 0x3f;
748
0
            }
749
0
        } else {
750
          /* 2-byte code */
751
0
            *len = 2;
752
0
            c = (utf[0] & 0x1f) << 6;
753
0
            c |= utf[1] & 0x3f;
754
0
        }
755
0
    } else {
756
        /* 1-byte code */
757
0
        *len = 1;
758
0
    }
759
0
    return(c);
760
761
0
error:
762
0
    if (len != NULL)
763
0
  *len = 0;
764
0
    return(-1);
765
0
}
766
767
/**
768
 * xmlCheckUTF8:
769
 * @utf: Pointer to putative UTF-8 encoded string.
770
 *
771
 * Checks @utf for being valid UTF-8. @utf is assumed to be
772
 * null-terminated. This function is not super-strict, as it will
773
 * allow longer UTF-8 sequences than necessary. Note that Java is
774
 * capable of producing these sequences if provoked. Also note, this
775
 * routine checks for the 4-byte maximum size, but does not check for
776
 * 0x10ffff maximum value.
777
 *
778
 * Return value: true if @utf is valid.
779
 **/
780
int
781
xmlCheckUTF8(const unsigned char *utf)
782
0
{
783
0
    int ix;
784
0
    unsigned char c;
785
786
0
    if (utf == NULL)
787
0
        return(0);
788
    /*
789
     * utf is a string of 1, 2, 3 or 4 bytes.  The valid strings
790
     * are as follows (in "bit format"):
791
     *    0xxxxxxx                                      valid 1-byte
792
     *    110xxxxx 10xxxxxx                             valid 2-byte
793
     *    1110xxxx 10xxxxxx 10xxxxxx                    valid 3-byte
794
     *    11110xxx 10xxxxxx 10xxxxxx 10xxxxxx           valid 4-byte
795
     */
796
0
    for (ix = 0; (c = utf[ix]);) {      /* string is 0-terminated */
797
0
        if ((c & 0x80) == 0x00) { /* 1-byte code, starts with 10 */
798
0
            ix++;
799
0
  } else if ((c & 0xe0) == 0xc0) {/* 2-byte code, starts with 110 */
800
0
      if ((utf[ix+1] & 0xc0 ) != 0x80)
801
0
          return 0;
802
0
      ix += 2;
803
0
  } else if ((c & 0xf0) == 0xe0) {/* 3-byte code, starts with 1110 */
804
0
      if (((utf[ix+1] & 0xc0) != 0x80) ||
805
0
          ((utf[ix+2] & 0xc0) != 0x80))
806
0
        return 0;
807
0
      ix += 3;
808
0
  } else if ((c & 0xf8) == 0xf0) {/* 4-byte code, starts with 11110 */
809
0
      if (((utf[ix+1] & 0xc0) != 0x80) ||
810
0
          ((utf[ix+2] & 0xc0) != 0x80) ||
811
0
    ((utf[ix+3] & 0xc0) != 0x80))
812
0
        return 0;
813
0
      ix += 4;
814
0
  } else       /* unknown encoding */
815
0
      return 0;
816
0
      }
817
0
      return(1);
818
0
}
819
820
/**
821
 * xmlUTF8Strsize:
822
 * @utf:  a sequence of UTF-8 encoded bytes
823
 * @len:  the number of characters in the array
824
 *
825
 * storage size of an UTF8 string
826
 * the behaviour is not guaranteed if the input string is not UTF-8
827
 *
828
 * Returns the storage size of
829
 * the first 'len' characters of ARRAY
830
 */
831
832
int
833
0
xmlUTF8Strsize(const xmlChar *utf, int len) {
834
0
    const xmlChar   *ptr=utf;
835
0
    xmlChar         ch;
836
837
0
    if (utf == NULL)
838
0
        return(0);
839
840
0
    if (len <= 0)
841
0
        return(0);
842
843
0
    while ( len-- > 0) {
844
0
        if ( !*ptr )
845
0
            break;
846
0
        if ( (ch = *ptr++) & 0x80)
847
0
            while ((ch<<=1) & 0x80 ) {
848
0
    if (*ptr == 0) break;
849
0
                ptr++;
850
0
      }
851
0
    }
852
0
    return (ptr - utf);
853
0
}
854
855
856
/**
857
 * xmlUTF8Strndup:
858
 * @utf:  the input UTF8 *
859
 * @len:  the len of @utf (in chars)
860
 *
861
 * a strndup for array of UTF8's
862
 *
863
 * Returns a new UTF8 * or NULL
864
 */
865
xmlChar *
866
0
xmlUTF8Strndup(const xmlChar *utf, int len) {
867
0
    xmlChar *ret;
868
0
    int i;
869
870
0
    if ((utf == NULL) || (len < 0)) return(NULL);
871
0
    i = xmlUTF8Strsize(utf, len);
872
0
    ret = (xmlChar *) xmlMallocAtomic((i + 1) * sizeof(xmlChar));
873
0
    if (ret == NULL) {
874
0
        xmlGenericError(xmlGenericErrorContext,
875
0
                "malloc of %ld byte failed\n",
876
0
                (len + 1) * (long)sizeof(xmlChar));
877
0
        return(NULL);
878
0
    }
879
0
    memcpy(ret, utf, i * sizeof(xmlChar));
880
0
    ret[i] = 0;
881
0
    return(ret);
882
0
}
883
884
/**
885
 * xmlUTF8Strpos:
886
 * @utf:  the input UTF8 *
887
 * @pos:  the position of the desired UTF8 char (in chars)
888
 *
889
 * a function to provide the equivalent of fetching a
890
 * character from a string array
891
 *
892
 * Returns a pointer to the UTF8 character or NULL
893
 */
894
const xmlChar *
895
0
xmlUTF8Strpos(const xmlChar *utf, int pos) {
896
0
    xmlChar ch;
897
898
0
    if (utf == NULL) return(NULL);
899
0
    if (pos < 0)
900
0
        return(NULL);
901
0
    while (pos--) {
902
0
        if ((ch=*utf++) == 0) return(NULL);
903
0
        if ( ch & 0x80 ) {
904
            /* if not simple ascii, verify proper format */
905
0
            if ( (ch & 0xc0) != 0xc0 )
906
0
                return(NULL);
907
            /* then skip over remaining bytes for this char */
908
0
            while ( (ch <<= 1) & 0x80 )
909
0
                if ( (*utf++ & 0xc0) != 0x80 )
910
0
                    return(NULL);
911
0
        }
912
0
    }
913
0
    return((xmlChar *)utf);
914
0
}
915
916
/**
917
 * xmlUTF8Strloc:
918
 * @utf:  the input UTF8 *
919
 * @utfchar:  the UTF8 character to be found
920
 *
921
 * a function to provide the relative location of a UTF8 char
922
 *
923
 * Returns the relative character position of the desired char
924
 * or -1 if not found
925
 */
926
int
927
0
xmlUTF8Strloc(const xmlChar *utf, const xmlChar *utfchar) {
928
0
    int i, size;
929
0
    xmlChar ch;
930
931
0
    if (utf==NULL || utfchar==NULL) return -1;
932
0
    size = xmlUTF8Strsize(utfchar, 1);
933
0
        for(i=0; (ch=*utf) != 0; i++) {
934
0
            if (xmlStrncmp(utf, utfchar, size)==0)
935
0
                return(i);
936
0
            utf++;
937
0
            if ( ch & 0x80 ) {
938
                /* if not simple ascii, verify proper format */
939
0
                if ( (ch & 0xc0) != 0xc0 )
940
0
                    return(-1);
941
                /* then skip over remaining bytes for this char */
942
0
                while ( (ch <<= 1) & 0x80 )
943
0
                    if ( (*utf++ & 0xc0) != 0x80 )
944
0
                        return(-1);
945
0
            }
946
0
        }
947
948
0
    return(-1);
949
0
}
950
/**
951
 * xmlUTF8Strsub:
952
 * @utf:  a sequence of UTF-8 encoded bytes
953
 * @start: relative pos of first char
954
 * @len:   total number to copy
955
 *
956
 * Create a substring from a given UTF-8 string
957
 * Note:  positions are given in units of UTF-8 chars
958
 *
959
 * Returns a pointer to a newly created string
960
 * or NULL if any problem
961
 */
962
963
xmlChar *
964
0
xmlUTF8Strsub(const xmlChar *utf, int start, int len) {
965
0
    int            i;
966
0
    xmlChar ch;
967
968
0
    if (utf == NULL) return(NULL);
969
0
    if (start < 0) return(NULL);
970
0
    if (len < 0) return(NULL);
971
972
    /*
973
     * Skip over any leading chars
974
     */
975
0
    for (i = 0;i < start;i++) {
976
0
        if ((ch=*utf++) == 0) return(NULL);
977
0
        if ( ch & 0x80 ) {
978
            /* if not simple ascii, verify proper format */
979
0
            if ( (ch & 0xc0) != 0xc0 )
980
0
                return(NULL);
981
            /* then skip over remaining bytes for this char */
982
0
            while ( (ch <<= 1) & 0x80 )
983
0
                if ( (*utf++ & 0xc0) != 0x80 )
984
0
                    return(NULL);
985
0
        }
986
0
    }
987
988
0
    return(xmlUTF8Strndup(utf, len));
989
0
}
990
991
/**
992
 * xmlEscapeFormatString:
993
 * @msg:  a pointer to the string in which to escape '%' characters.
994
 * Must be a heap-allocated buffer created by libxml2 that may be
995
 * returned, or that may be freed and replaced.
996
 *
997
 * Replaces the string pointed to by 'msg' with an escaped string.
998
 * Returns the same string with all '%' characters escaped.
999
 */
1000
xmlChar *
1001
xmlEscapeFormatString(xmlChar **msg)
1002
0
{
1003
0
    xmlChar *msgPtr = NULL;
1004
0
    xmlChar *result = NULL;
1005
0
    xmlChar *resultPtr = NULL;
1006
0
    size_t count = 0;
1007
0
    size_t msgLen = 0;
1008
0
    size_t resultLen = 0;
1009
1010
0
    if (!msg || !*msg)
1011
0
        return(NULL);
1012
1013
0
    for (msgPtr = *msg; *msgPtr != '\0'; ++msgPtr) {
1014
0
        ++msgLen;
1015
0
        if (*msgPtr == '%')
1016
0
            ++count;
1017
0
    }
1018
1019
0
    if (count == 0)
1020
0
        return(*msg);
1021
1022
0
    resultLen = msgLen + count + 1;
1023
0
    result = (xmlChar *) xmlMallocAtomic(resultLen * sizeof(xmlChar));
1024
0
    if (result == NULL) {
1025
        /* Clear *msg to prevent format string vulnerabilities in
1026
           out-of-memory situations. */
1027
0
        xmlFree(*msg);
1028
0
        *msg = NULL;
1029
0
        xmlErrMemory(NULL, NULL);
1030
0
        return(NULL);
1031
0
    }
1032
1033
0
    for (msgPtr = *msg, resultPtr = result; *msgPtr != '\0'; ++msgPtr, ++resultPtr) {
1034
0
        *resultPtr = *msgPtr;
1035
0
        if (*msgPtr == '%')
1036
0
            *(++resultPtr) = '%';
1037
0
    }
1038
0
    result[resultLen - 1] = '\0';
1039
1040
0
    xmlFree(*msg);
1041
0
    *msg = result;
1042
1043
0
    return *msg;
1044
0
}
1045
1046
#define bottom_xmlstring
1047
#include "elfgcchack.h"