Coverage Report

Created: 2023-02-15 06:24

/proc/self/cwd/external/utf8_range/naive.c
Line
Count
Source (jump to first uncovered line)
1
#include <stdio.h>
2
3
/*
4
 * http://www.unicode.org/versions/Unicode6.0.0/ch03.pdf - page 94
5
 *
6
 * Table 3-7. Well-Formed UTF-8 Byte Sequences
7
 *
8
 * +--------------------+------------+-------------+------------+-------------+
9
 * | Code Points        | First Byte | Second Byte | Third Byte | Fourth Byte |
10
 * +--------------------+------------+-------------+------------+-------------+
11
 * | U+0000..U+007F     | 00..7F     |             |            |             |
12
 * +--------------------+------------+-------------+------------+-------------+
13
 * | U+0080..U+07FF     | C2..DF     | 80..BF      |            |             |
14
 * +--------------------+------------+-------------+------------+-------------+
15
 * | U+0800..U+0FFF     | E0         | A0..BF      | 80..BF     |             |
16
 * +--------------------+------------+-------------+------------+-------------+
17
 * | U+1000..U+CFFF     | E1..EC     | 80..BF      | 80..BF     |             |
18
 * +--------------------+------------+-------------+------------+-------------+
19
 * | U+D000..U+D7FF     | ED         | 80..9F      | 80..BF     |             |
20
 * +--------------------+------------+-------------+------------+-------------+
21
 * | U+E000..U+FFFF     | EE..EF     | 80..BF      | 80..BF     |             |
22
 * +--------------------+------------+-------------+------------+-------------+
23
 * | U+10000..U+3FFFF   | F0         | 90..BF      | 80..BF     | 80..BF      |
24
 * +--------------------+------------+-------------+------------+-------------+
25
 * | U+40000..U+FFFFF   | F1..F3     | 80..BF      | 80..BF     | 80..BF      |
26
 * +--------------------+------------+-------------+------------+-------------+
27
 * | U+100000..U+10FFFF | F4         | 80..8F      | 80..BF     | 80..BF      |
28
 * +--------------------+------------+-------------+------------+-------------+
29
 */
30
31
/* Return 0 - success,  >0 - index(1 based) of first error char */
32
int utf8_naive(const unsigned char *data, int len)
33
0
{
34
0
    int err_pos = 1;
35
36
0
    while (len) {
37
0
        int bytes;
38
0
        const unsigned char byte1 = data[0];
39
40
        /* 00..7F */
41
0
        if (byte1 <= 0x7F) {
42
0
            bytes = 1;
43
        /* C2..DF, 80..BF */
44
0
        } else if (len >= 2 && byte1 >= 0xC2 && byte1 <= 0xDF &&
45
0
                (signed char)data[1] <= (signed char)0xBF) {
46
0
            bytes = 2;
47
0
        } else if (len >= 3) {
48
0
            const unsigned char byte2 = data[1];
49
50
            /* Is byte2, byte3 between 0x80 ~ 0xBF */
51
0
            const int byte2_ok = (signed char)byte2 <= (signed char)0xBF;
52
0
            const int byte3_ok = (signed char)data[2] <= (signed char)0xBF;
53
54
0
            if (byte2_ok && byte3_ok &&
55
                     /* E0, A0..BF, 80..BF */
56
0
                    ((byte1 == 0xE0 && byte2 >= 0xA0) ||
57
                     /* E1..EC, 80..BF, 80..BF */
58
0
                     (byte1 >= 0xE1 && byte1 <= 0xEC) ||
59
                     /* ED, 80..9F, 80..BF */
60
0
                     (byte1 == 0xED && byte2 <= 0x9F) ||
61
                     /* EE..EF, 80..BF, 80..BF */
62
0
                     (byte1 >= 0xEE && byte1 <= 0xEF))) {
63
0
                bytes = 3;
64
0
            } else if (len >= 4) {
65
                /* Is byte4 between 0x80 ~ 0xBF */
66
0
                const int byte4_ok = (signed char)data[3] <= (signed char)0xBF;
67
68
0
                if (byte2_ok && byte3_ok && byte4_ok &&
69
                         /* F0, 90..BF, 80..BF, 80..BF */
70
0
                        ((byte1 == 0xF0 && byte2 >= 0x90) ||
71
                         /* F1..F3, 80..BF, 80..BF, 80..BF */
72
0
                         (byte1 >= 0xF1 && byte1 <= 0xF3) ||
73
                         /* F4, 80..8F, 80..BF, 80..BF */
74
0
                         (byte1 == 0xF4 && byte2 <= 0x8F))) {
75
0
                    bytes = 4;
76
0
                } else {
77
0
                    return err_pos;
78
0
                }
79
0
            } else {
80
0
                return err_pos;
81
0
            }
82
0
        } else {
83
0
            return err_pos;
84
0
        }
85
86
0
        len -= bytes;
87
0
        err_pos += bytes;
88
0
        data += bytes;
89
0
    }
90
91
0
    return 0;
92
0
}