Coverage Report

Created: 2025-07-01 07:00

/src/open62541/deps/utf8.h
Line
Count
Source (jump to first uncovered line)
1
/* This Source Code Form is subject to the terms of the Mozilla Public
2
 * License, v. 2.0. If a copy of the MPL was not distributed with this
3
 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
4
 *
5
 *    Copyright 2024 (c) Fraunhofer IOSB (Author: Julius Pfrommer)
6
 */
7
8
#ifndef UTF8_H_
9
#define UTF8_H_
10
11
#include <stddef.h>
12
13
#ifdef __cplusplus
14
extern "C" {
15
#endif
16
17
#ifdef _MSC_VER
18
# define UTF_INLINE __inline
19
#else
20
# define UTF_INLINE inline
21
#endif
22
23
#if defined(__GNUC__) || defined(__clang__)
24
0
# define UTF_LIKELY(x) __builtin_expect((x), 1)
25
0
# define UTF_UNLIKELY(x) __builtin_expect((x), 0)
26
#else
27
# define UTF_LIKELY(x) (x)
28
# define UTF_UNLIKELY(x) (x)
29
#endif
30
31
/* Extract the next utf8 codepoint from the buffer. Returns the length (1-4) of
32
 * the codepoint encoding or 0 upon an error. */
33
unsigned
34
utf8_to_codepoint(const unsigned char *str, size_t len, unsigned *codepoint);
35
36
/* Encodes the codepoint in utf8. The string needs to have enough space (at most
37
 * four byte) available. Returns the encoding length. */
38
static UTF_INLINE unsigned
39
0
utf8_from_codepoint(unsigned char *str, unsigned codepoint) {
40
0
    if(UTF_LIKELY(codepoint <= 0x7F)) { /* Plain ASCII */
41
0
        str[0] = (unsigned char)codepoint;
42
0
        return 1;
43
0
    }
44
0
    if(UTF_LIKELY(codepoint <= 0x07FF)) { /* 2-byte unicode */
45
0
        str[0] = (unsigned char)(((codepoint >> 6) & 0x1F) | 0xC0);
46
0
        str[1] = (unsigned char)(((codepoint >> 0) & 0x3F) | 0x80);
47
0
        return 2;
48
0
    }
49
0
    if(UTF_LIKELY(codepoint <= 0xFFFF)) { /* 3-byte unicode */
50
0
        str[0] = (unsigned char)(((codepoint >> 12) & 0x0F) | 0xE0);
51
0
        str[1] = (unsigned char)(((codepoint >>  6) & 0x3F) | 0x80);
52
0
        str[2] = (unsigned char)(((codepoint >>  0) & 0x3F) | 0x80);
53
0
        return 3;
54
0
    }
55
0
    if(UTF_LIKELY(codepoint <= 0x10FFFF)) { /* 4-byte unicode */
56
0
        str[0] = (unsigned char)(((codepoint >> 18) & 0x07) | 0xF0);
57
0
        str[1] = (unsigned char)(((codepoint >> 12) & 0x3F) | 0x80);
58
0
        str[2] = (unsigned char)(((codepoint >>  6) & 0x3F) | 0x80);
59
0
        str[3] = (unsigned char)(((codepoint >>  0) & 0x3F) | 0x80);
60
0
        return 4;
61
0
    }
62
0
    return 0; /* Not a unicode codepoint */
63
0
}
Unexecuted instantiation: utf8.c:utf8_from_codepoint
Unexecuted instantiation: cj5.c:utf8_from_codepoint
Unexecuted instantiation: ua_types_encoding_json_105.c:utf8_from_codepoint
64
65
/* Returns the encoding length of the codepoint */
66
static UTF_INLINE unsigned
67
0
utf8_length(unsigned codepoint) {
68
0
    if(UTF_LIKELY(codepoint <= 0x7F))
69
0
        return 1; /* Plain ASCII */
70
0
    if(UTF_LIKELY(codepoint <= 0x07FF))
71
0
        return 2; /* 2-byte unicode */
72
0
    if(UTF_LIKELY(codepoint <= 0xFFFF))
73
0
        return 3; /* 3-byte unicode */
74
0
    if(UTF_LIKELY(codepoint <= 0x10FFFF))
75
0
        return 4; /* 4-byte unicode */
76
0
    return 0; /* Not a unicode codepoint */
77
0
}
Unexecuted instantiation: utf8.c:utf8_length
Unexecuted instantiation: cj5.c:utf8_length
Unexecuted instantiation: ua_types_encoding_json_105.c:utf8_length
78
79
#ifdef __cplusplus
80
}
81
#endif
82
83
#endif /* UTF8_H_ */