Coverage Report

Created: 2025-07-01 07:00

/src/open62541/deps/utf8.c
Line
Count
Source (jump to first uncovered line)
1
/* This Source Code Form is subject to the terms of the Mozilla Public
2
 * License, v. 2.0. If a copy of the MPL was not distributed with this
3
 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
4
 *
5
 *    Copyright 2024 (c) Fraunhofer IOSB (Author: Julius Pfrommer)
6
 */
7
8
#include "utf8.h"
9
10
0
#define UTF_PARSE_BYTE(pos) do {                        \
11
0
        byte = str[pos];                                \
12
0
        if(UTF_UNLIKELY(byte < 0x80 || byte > 0xBF))    \
13
0
            return 0; /* Not a continuation byte */     \
14
0
        *codepoint = (*codepoint << 6) + (byte & 0x3F); \
15
0
    } while(0)
16
17
unsigned
18
0
utf8_to_codepoint(const unsigned char *str, size_t len, unsigned *codepoint) {
19
    /* Ensure there is a character to read */
20
0
    if(UTF_UNLIKELY(len == 0))
21
0
        return 0;
22
23
0
    *codepoint = str[0];
24
0
    if(UTF_LIKELY(*codepoint < 0x80))
25
0
        return 1; /* Normal ASCII */
26
27
0
    if(UTF_UNLIKELY(*codepoint <= 0xC1))
28
0
        return 0; /* Continuation byte not allowed here */
29
30
0
    unsigned count;
31
0
    unsigned char byte;
32
0
    if(*codepoint <= 0xDF) { /* 2-byte sequence */
33
0
        if(len < 2)
34
0
            return 0;
35
0
        count = 2;
36
0
        *codepoint &= 0x1F;
37
0
        UTF_PARSE_BYTE(1);
38
0
        if(UTF_UNLIKELY(*codepoint < 0x80))
39
0
            return 0; /* Too small for the encoding length */
40
0
    } else if(*codepoint <= 0xEF) { /* 3-byte sequence */
41
0
        if(len < 3)
42
0
            return 0;
43
0
        count = 3;
44
0
        *codepoint &= 0xF;
45
0
        UTF_PARSE_BYTE(1);
46
0
        UTF_PARSE_BYTE(2);
47
0
        if(UTF_UNLIKELY(*codepoint < 0x800))
48
0
            return 0; /* Too small for the encoding length */
49
0
    } else if(*codepoint <= 0xF4) { /* 4-byte sequence */
50
0
        if(len < 4)
51
0
            return 0;
52
0
        count = 4;
53
0
        *codepoint &= 0x7;
54
0
        UTF_PARSE_BYTE(1);
55
0
        UTF_PARSE_BYTE(2);
56
0
        UTF_PARSE_BYTE(3);
57
0
        if(UTF_UNLIKELY(*codepoint < 0x10000))
58
0
            return 0; /* Too small for the encoding length */
59
0
    } else {
60
0
        return 0; /* Invalid utf8 encoding */
61
0
    }
62
63
0
    if(UTF_UNLIKELY(*codepoint > 0x10FFFF))
64
0
        return 0; /* Not in the Unicode range */
65
66
0
    return count;
67
0
}