Coverage Report

Created: 2026-05-19 06:33

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/nss/lib/util/utf8.c
Line
Count
Source
1
/* This Source Code Form is subject to the terms of the Mozilla Public
2
 * License, v. 2.0. If a copy of the MPL was not distributed with this
3
 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
4
5
#include "seccomon.h"
6
#include "secport.h"
7
#include <limits.h>
8
9
/*
10
 * From RFC 2044:
11
 *
12
 * UCS-4 range (hex.)           UTF-8 octet sequence (binary)
13
 * 0000 0000-0000 007F   0xxxxxxx
14
 * 0000 0080-0000 07FF   110xxxxx 10xxxxxx
15
 * 0000 0800-0000 FFFF   1110xxxx 10xxxxxx 10xxxxxx
16
 * 0001 0000-001F FFFF   11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
17
 * 0020 0000-03FF FFFF   111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
18
 * 0400 0000-7FFF FFFF   1111110x 10xxxxxx ... 10xxxxxx
19
 */
20
21
/*
22
 * From http://www.imc.org/draft-hoffman-utf16
23
 *
24
 * For U on [0x00010000,0x0010FFFF]:  Let U' = U - 0x00010000
25
 *
26
 * U' = yyyyyyyyyyxxxxxxxxxx
27
 * W1 = 110110yyyyyyyyyy
28
 * W2 = 110111xxxxxxxxxx
29
 */
30
31
/*
32
 * This code is assuming NETWORK BYTE ORDER for the 16- and 32-bit
33
 * character values.  If you wish to use this code for working with
34
 * host byte order values, define the following:
35
 *
36
 * #if IS_BIG_ENDIAN
37
 * #define L_0 0
38
 * #define L_1 1
39
 * #define L_2 2
40
 * #define L_3 3
41
 * #define H_0 0
42
 * #define H_1 1
43
 * #else / * not everyone has elif * /
44
 * #if IS_LITTLE_ENDIAN
45
 * #define L_0 3
46
 * #define L_1 2
47
 * #define L_2 1
48
 * #define L_3 0
49
 * #define H_0 1
50
 * #define H_1 0
51
 * #else
52
 * #error "PDP and NUXI support deferred"
53
 * #endif / * IS_LITTLE_ENDIAN * /
54
 * #endif / * IS_BIG_ENDIAN * /
55
 */
56
57
4.69k
#define L_0 0
58
12.7k
#define L_1 1
59
12.9k
#define L_2 2
60
8.52k
#define L_3 3
61
139k
#define H_0 0
62
39.7k
#define H_1 1
63
64
0
#define BAD_UTF8 ((PRUint32)-1)
65
66
/*
67
 * Parse a single UTF-8 character per the spec. in section 3.9 (D36)
68
 * of Unicode 4.0.0.
69
 *
70
 * Parameters:
71
 * index - Points to the byte offset in inBuf of character to read.  On success,
72
 *         updated to the offset of the following character.
73
 * inBuf - Input buffer, UTF-8 encoded
74
 * inbufLen - Length of input buffer, in bytes.
75
 *
76
 * Returns:
77
 * Success - The UCS4 encoded character
78
 * Failure - BAD_UTF8
79
 */
80
static PRUint32
81
sec_port_read_utf8(unsigned int *index, unsigned char *inBuf, unsigned int inBufLen)
82
0
{
83
0
    PRUint32 result;
84
0
    unsigned int i = *index;
85
0
    int bytes_left;
86
0
    PRUint32 min_value;
87
88
0
    PORT_Assert(i < inBufLen);
89
90
0
    if ((inBuf[i] & 0x80) == 0x00) {
91
0
        result = inBuf[i++];
92
0
        bytes_left = 0;
93
0
        min_value = 0;
94
0
    } else if ((inBuf[i] & 0xE0) == 0xC0) {
95
0
        result = inBuf[i++] & 0x1F;
96
0
        bytes_left = 1;
97
0
        min_value = 0x80;
98
0
    } else if ((inBuf[i] & 0xF0) == 0xE0) {
99
0
        result = inBuf[i++] & 0x0F;
100
0
        bytes_left = 2;
101
0
        min_value = 0x800;
102
0
    } else if ((inBuf[i] & 0xF8) == 0xF0) {
103
0
        result = inBuf[i++] & 0x07;
104
0
        bytes_left = 3;
105
0
        min_value = 0x10000;
106
0
    } else {
107
0
        return BAD_UTF8;
108
0
    }
109
110
0
    while (bytes_left--) {
111
0
        if (i >= inBufLen || (inBuf[i] & 0xC0) != 0x80)
112
0
            return BAD_UTF8;
113
0
        result = (result << 6) | (inBuf[i++] & 0x3F);
114
0
    }
115
116
    /* Check for overlong sequences, surrogates, and outside unicode range */
117
0
    if (result < min_value || (result & 0xFFFFF800) == 0xD800 || result > 0x10FFFF) {
118
0
        return BAD_UTF8;
119
0
    }
120
121
0
    *index = i;
122
0
    return result;
123
0
}
124
125
PRBool
126
sec_port_ucs4_utf8_conversion_function(
127
    PRBool toUnicode,
128
    unsigned char *inBuf,
129
    unsigned int inBufLen,
130
    unsigned char *outBuf,
131
    unsigned int maxOutBufLen,
132
    unsigned int *outBufLen)
133
2.80k
{
134
2.80k
    PORT_Assert((unsigned int *)NULL != outBufLen);
135
136
2.80k
    if (toUnicode) {
137
0
        unsigned int i, len = 0;
138
139
0
        if (inBufLen > UINT_MAX / 4) {
140
0
            *outBufLen = 0;
141
0
            return PR_FALSE;
142
0
        }
143
144
0
        for (i = 0; i < inBufLen;) {
145
0
            if ((inBuf[i] & 0x80) == 0x00)
146
0
                i += 1;
147
0
            else if ((inBuf[i] & 0xE0) == 0xC0)
148
0
                i += 2;
149
0
            else if ((inBuf[i] & 0xF0) == 0xE0)
150
0
                i += 3;
151
0
            else if ((inBuf[i] & 0xF8) == 0xF0)
152
0
                i += 4;
153
0
            else
154
0
                return PR_FALSE;
155
156
0
            len += 4;
157
0
        }
158
159
0
        if (len > maxOutBufLen) {
160
0
            *outBufLen = len;
161
0
            return PR_FALSE;
162
0
        }
163
164
0
        len = 0;
165
166
0
        for (i = 0; i < inBufLen;) {
167
0
            PRUint32 ucs4 = sec_port_read_utf8(&i, inBuf, inBufLen);
168
169
0
            if (ucs4 == BAD_UTF8)
170
0
                return PR_FALSE;
171
172
0
            outBuf[len + L_0] = 0x00;
173
0
            outBuf[len + L_1] = (unsigned char)(ucs4 >> 16);
174
0
            outBuf[len + L_2] = (unsigned char)(ucs4 >> 8);
175
0
            outBuf[len + L_3] = (unsigned char)ucs4;
176
177
0
            len += 4;
178
0
        }
179
180
0
        *outBufLen = len;
181
0
        return PR_TRUE;
182
2.80k
    } else {
183
2.80k
        unsigned int i, len = 0;
184
2.80k
        PORT_Assert((inBufLen % 4) == 0);
185
2.80k
        if ((inBufLen % 4) != 0) {
186
0
            *outBufLen = 0;
187
0
            return PR_FALSE;
188
0
        }
189
190
6.50k
        for (i = 0; i < inBufLen; i += 4) {
191
4.69k
            if ((inBuf[i + L_0] > 0x00) || (inBuf[i + L_1] > 0x10)) {
192
1.00k
                *outBufLen = 0;
193
1.00k
                return PR_FALSE;
194
3.69k
            } else if (inBuf[i + L_1] >= 0x01)
195
973
                len += 4;
196
2.72k
            else if (inBuf[i + L_2] >= 0x08)
197
701
                len += 3;
198
2.02k
            else if ((inBuf[i + L_2] > 0x00) || (inBuf[i + L_3] >= 0x80))
199
1.03k
                len += 2;
200
983
            else
201
983
                len += 1;
202
4.69k
        }
203
204
1.80k
        if (len > maxOutBufLen) {
205
0
            *outBufLen = len;
206
0
            return PR_FALSE;
207
0
        }
208
209
1.80k
        len = 0;
210
211
5.13k
        for (i = 0; i < inBufLen; i += 4) {
212
3.33k
            if (inBuf[i + L_1] >= 0x01) {
213
                /* 0001 0000-001F FFFF -> 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx */
214
                /* 00000000 000abcde fghijklm nopqrstu ->
215
                   11110abc 10defghi 10jklmno 10pqrstu */
216
217
840
                outBuf[len + 0] = 0xF0 | ((inBuf[i + L_1] & 0x1C) >> 2);
218
840
                outBuf[len + 1] = 0x80 | ((inBuf[i + L_1] & 0x03) << 4) | ((inBuf[i + L_2] & 0xF0) >> 4);
219
840
                outBuf[len + 2] = 0x80 | ((inBuf[i + L_2] & 0x0F) << 2) | ((inBuf[i + L_3] & 0xC0) >> 6);
220
840
                outBuf[len + 3] = 0x80 | ((inBuf[i + L_3] & 0x3F) >> 0);
221
222
840
                len += 4;
223
2.49k
            } else if (inBuf[i + L_2] >= 0x08) {
224
                /* 0000 0800-0000 FFFF -> 1110xxxx 10xxxxxx 10xxxxxx */
225
                /* 00000000 00000000 abcdefgh ijklmnop ->
226
                   1110abcd 10efghij 10klmnop */
227
228
601
                outBuf[len + 0] = 0xE0 | ((inBuf[i + L_2] & 0xF0) >> 4);
229
601
                outBuf[len + 1] = 0x80 | ((inBuf[i + L_2] & 0x0F) << 2) | ((inBuf[i + L_3] & 0xC0) >> 6);
230
601
                outBuf[len + 2] = 0x80 | ((inBuf[i + L_3] & 0x3F) >> 0);
231
232
601
                len += 3;
233
1.89k
            } else if ((inBuf[i + L_2] > 0x00) || (inBuf[i + L_3] >= 0x80)) {
234
                /* 0000 0080-0000 07FF -> 110xxxxx 10xxxxxx */
235
                /* 00000000 00000000 00000abc defghijk ->
236
                   110abcde 10fghijk */
237
238
990
                outBuf[len + 0] = 0xC0 | ((inBuf[i + L_2] & 0x07) << 2) | ((inBuf[i + L_3] & 0xC0) >> 6);
239
990
                outBuf[len + 1] = 0x80 | ((inBuf[i + L_3] & 0x3F) >> 0);
240
241
990
                len += 2;
242
990
            } else {
243
                /* 0000 0000-0000 007F -> 0xxxxxx */
244
                /* 00000000 00000000 00000000 0abcdefg ->
245
                   0abcdefg */
246
247
900
                outBuf[len + 0] = (inBuf[i + L_3] & 0x7F);
248
249
900
                len += 1;
250
900
            }
251
3.33k
        }
252
253
1.80k
        *outBufLen = len;
254
1.80k
        return PR_TRUE;
255
1.80k
    }
256
2.80k
}
257
258
PRBool
259
sec_port_ucs2_utf8_conversion_function(
260
    PRBool toUnicode,
261
    unsigned char *inBuf,
262
    unsigned int inBufLen,
263
    unsigned char *outBuf,
264
    unsigned int maxOutBufLen,
265
    unsigned int *outBufLen)
266
28.6k
{
267
28.6k
    PORT_Assert((unsigned int *)NULL != outBufLen);
268
269
28.6k
    if (toUnicode) {
270
0
        unsigned int i, len = 0;
271
272
0
        if (inBufLen > UINT_MAX / 2) {
273
0
            *outBufLen = 0;
274
0
            return PR_FALSE;
275
0
        }
276
277
0
        for (i = 0; i < inBufLen;) {
278
0
            if ((inBuf[i] & 0x80) == 0x00) {
279
0
                i += 1;
280
0
                len += 2;
281
0
            } else if ((inBuf[i] & 0xE0) == 0xC0) {
282
0
                i += 2;
283
0
                len += 2;
284
0
            } else if ((inBuf[i] & 0xF0) == 0xE0) {
285
0
                i += 3;
286
0
                len += 2;
287
0
            } else if ((inBuf[i] & 0xF8) == 0xF0) {
288
0
                i += 4;
289
0
                len += 4;
290
0
            } else
291
0
                return PR_FALSE;
292
0
        }
293
294
0
        if (len > maxOutBufLen) {
295
0
            *outBufLen = len;
296
0
            return PR_FALSE;
297
0
        }
298
299
0
        len = 0;
300
301
0
        for (i = 0; i < inBufLen;) {
302
0
            PRUint32 ucs4 = sec_port_read_utf8(&i, inBuf, inBufLen);
303
304
0
            if (ucs4 == BAD_UTF8)
305
0
                return PR_FALSE;
306
307
0
            if (ucs4 < 0x10000) {
308
0
                outBuf[len + H_0] = (unsigned char)(ucs4 >> 8);
309
0
                outBuf[len + H_1] = (unsigned char)ucs4;
310
0
                len += 2;
311
0
            } else {
312
0
                ucs4 -= 0x10000;
313
0
                outBuf[len + 0 + H_0] = (unsigned char)(0xD8 | ((ucs4 >> 18) & 0x3));
314
0
                outBuf[len + 0 + H_1] = (unsigned char)(ucs4 >> 10);
315
0
                outBuf[len + 2 + H_0] = (unsigned char)(0xDC | ((ucs4 >> 8) & 0x3));
316
0
                outBuf[len + 2 + H_1] = (unsigned char)ucs4;
317
0
                len += 4;
318
0
            }
319
0
        }
320
321
0
        *outBufLen = len;
322
0
        return PR_TRUE;
323
28.6k
    } else {
324
28.6k
        unsigned int i, len = 0;
325
28.6k
        PORT_Assert((inBufLen % 2) == 0);
326
28.6k
        if ((inBufLen % 2) != 0) {
327
0
            *outBufLen = 0;
328
0
            return PR_FALSE;
329
0
        }
330
331
28.6k
        if (inBufLen / 2 > UINT_MAX / 3) {
332
0
            *outBufLen = 0;
333
0
            return PR_FALSE;
334
0
        }
335
336
48.6k
        for (i = 0; i < inBufLen; i += 2) {
337
20.9k
            if ((inBuf[i + H_0] == 0x00) && ((inBuf[i + H_1] & 0x80) == 0x00))
338
2.20k
                len += 1;
339
18.7k
            else if (inBuf[i + H_0] < 0x08)
340
4.09k
                len += 2;
341
14.6k
            else if (((inBuf[i + H_0] & 0xFC) == 0xD8)) {
342
1.13k
                if (((inBufLen - i) > 2) && ((inBuf[i + 2 + H_0] & 0xFC) == 0xDC)) {
343
479
                    i += 2;
344
479
                    len += 4;
345
657
                } else {
346
657
                    return PR_FALSE;
347
657
                }
348
13.5k
            } else if ((inBuf[i + H_0] & 0xFC) == 0xDC) {
349
312
                return PR_FALSE;
350
13.2k
            } else {
351
13.2k
                len += 3;
352
13.2k
            }
353
20.9k
        }
354
355
27.6k
        if (len > maxOutBufLen) {
356
0
            *outBufLen = len;
357
0
            return PR_FALSE;
358
0
        }
359
360
27.6k
        len = 0;
361
362
44.9k
        for (i = 0; i < inBufLen; i += 2) {
363
17.3k
            if ((inBuf[i + H_0] == 0x00) && ((inBuf[i + H_1] & 0x80) == 0x00)) {
364
                /* 0000-007F -> 0xxxxxx */
365
                /* 00000000 0abcdefg -> 0abcdefg */
366
367
2.05k
                outBuf[len] = inBuf[i + H_1] & 0x7F;
368
369
2.05k
                len += 1;
370
15.2k
            } else if (inBuf[i + H_0] < 0x08) {
371
                /* 0080-07FF -> 110xxxxx 10xxxxxx */
372
                /* 00000abc defghijk -> 110abcde 10fghijk */
373
374
3.69k
                outBuf[len + 0] = 0xC0 | ((inBuf[i + H_0] & 0x07) << 2) | ((inBuf[i + H_1] & 0xC0) >> 6);
375
3.69k
                outBuf[len + 1] = 0x80 | ((inBuf[i + H_1] & 0x3F) >> 0);
376
377
3.69k
                len += 2;
378
11.5k
            } else if ((inBuf[i + H_0] & 0xFC) == 0xD8) {
379
417
                int abcde, BCDE;
380
381
417
                PORT_Assert(((inBufLen - i) > 2) && ((inBuf[i + 2 + H_0] & 0xFC) == 0xDC));
382
383
                /* D800-DBFF DC00-DFFF -> 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx */
384
                /* 110110BC DEfghijk 110111lm nopqrstu ->
385
                   { Let abcde = BCDE + 1 }
386
                   11110abc 10defghi 10jklmno 10pqrstu */
387
388
417
                BCDE = ((inBuf[i + H_0] & 0x03) << 2) | ((inBuf[i + H_1] & 0xC0) >> 6);
389
417
                abcde = BCDE + 1;
390
391
417
                outBuf[len + 0] = 0xF0 | ((abcde & 0x1C) >> 2);
392
417
                outBuf[len + 1] = 0x80 | ((abcde & 0x03) << 4) | ((inBuf[i + 0 + H_1] & 0x3C) >> 2);
393
417
                outBuf[len + 2] = 0x80 | ((inBuf[i + 0 + H_1] & 0x03) << 4) | ((inBuf[i + 2 + H_0] & 0x03) << 2) | ((inBuf[i + 2 + H_1] & 0xC0) >> 6);
394
417
                outBuf[len + 3] = 0x80 | ((inBuf[i + 2 + H_1] & 0x3F) >> 0);
395
396
417
                i += 2;
397
417
                len += 4;
398
11.1k
            } else {
399
                /* 0800-FFFF -> 1110xxxx 10xxxxxx 10xxxxxx */
400
                /* abcdefgh ijklmnop -> 1110abcd 10efghij 10klmnop */
401
402
11.1k
                outBuf[len + 0] = 0xE0 | ((inBuf[i + H_0] & 0xF0) >> 4);
403
11.1k
                outBuf[len + 1] = 0x80 | ((inBuf[i + H_0] & 0x0F) << 2) | ((inBuf[i + H_1] & 0xC0) >> 6);
404
11.1k
                outBuf[len + 2] = 0x80 | ((inBuf[i + H_1] & 0x3F) >> 0);
405
406
11.1k
                len += 3;
407
11.1k
            }
408
17.3k
        }
409
410
27.6k
        *outBufLen = len;
411
27.6k
        return PR_TRUE;
412
27.6k
    }
413
28.6k
}
414
415
PRBool
416
sec_port_iso88591_utf8_conversion_function(
417
    const unsigned char *inBuf,
418
    unsigned int inBufLen,
419
    unsigned char *outBuf,
420
    unsigned int maxOutBufLen,
421
    unsigned int *outBufLen)
422
6.52k
{
423
6.52k
    unsigned int i, len = 0;
424
425
6.52k
    PORT_Assert((unsigned int *)NULL != outBufLen);
426
427
6.52k
    if (inBufLen > UINT_MAX / 2) {
428
0
        *outBufLen = 0;
429
0
        return PR_FALSE;
430
0
    }
431
432
220k
    for (i = 0; i < inBufLen; i++) {
433
214k
        if ((inBuf[i] & 0x80) == 0x00)
434
157k
            len += 1;
435
56.2k
        else
436
56.2k
            len += 2;
437
214k
    }
438
439
6.52k
    if (len > maxOutBufLen) {
440
0
        *outBufLen = len;
441
0
        return PR_FALSE;
442
0
    }
443
444
6.52k
    len = 0;
445
446
220k
    for (i = 0; i < inBufLen; i++) {
447
214k
        if ((inBuf[i] & 0x80) == 0x00) {
448
            /* 00-7F -> 0xxxxxxx */
449
            /* 0abcdefg -> 0abcdefg */
450
451
157k
            outBuf[len] = inBuf[i];
452
157k
            len += 1;
453
157k
        } else {
454
            /* 80-FF <- 110xxxxx 10xxxxxx */
455
            /* 00000000 abcdefgh -> 110000ab 10cdefgh */
456
457
56.2k
            outBuf[len + 0] = 0xC0 | ((inBuf[i] & 0xC0) >> 6);
458
56.2k
            outBuf[len + 1] = 0x80 | ((inBuf[i] & 0x3F) >> 0);
459
460
56.2k
            len += 2;
461
56.2k
        }
462
214k
    }
463
464
6.52k
    *outBufLen = len;
465
6.52k
    return PR_TRUE;
466
6.52k
}