/src/open62541/deps/utf8.h
Line | Count | Source (jump to first uncovered line) |
1 | | /* This Source Code Form is subject to the terms of the Mozilla Public |
2 | | * License, v. 2.0. If a copy of the MPL was not distributed with this |
3 | | * file, You can obtain one at http://mozilla.org/MPL/2.0/. |
4 | | * |
5 | | * Copyright 2024 (c) Fraunhofer IOSB (Author: Julius Pfrommer) |
6 | | */ |
7 | | |
8 | | #ifndef UTF8_H_ |
9 | | #define UTF8_H_ |
10 | | |
11 | | #include <stddef.h> |
12 | | |
13 | | #ifdef __cplusplus |
14 | | extern "C" { |
15 | | #endif |
16 | | |
17 | | #ifdef _MSC_VER |
18 | | # define UTF_INLINE __inline |
19 | | #else |
20 | | # define UTF_INLINE inline |
21 | | #endif |
22 | | |
23 | | #if defined(__GNUC__) || defined(__clang__) |
24 | 0 | # define UTF_LIKELY(x) __builtin_expect((x), 1) |
25 | 0 | # define UTF_UNLIKELY(x) __builtin_expect((x), 0) |
26 | | #else |
27 | | # define UTF_LIKELY(x) (x) |
28 | | # define UTF_UNLIKELY(x) (x) |
29 | | #endif |
30 | | |
31 | | /* Extract the next utf8 codepoint from the buffer. Returns the length (1-4) of |
32 | | * the codepoint encoding or 0 upon an error. */ |
33 | | unsigned |
34 | | utf8_to_codepoint(const unsigned char *str, size_t len, unsigned *codepoint); |
35 | | |
36 | | /* Encodes the codepoint in utf8. The string needs to have enough space (at most |
37 | | * four byte) available. Returns the encoding length. */ |
38 | | static UTF_INLINE unsigned |
39 | 0 | utf8_from_codepoint(unsigned char *str, unsigned codepoint) { |
40 | 0 | if(UTF_LIKELY(codepoint <= 0x7F)) { /* Plain ASCII */ |
41 | 0 | str[0] = (unsigned char)codepoint; |
42 | 0 | return 1; |
43 | 0 | } |
44 | 0 | if(UTF_LIKELY(codepoint <= 0x07FF)) { /* 2-byte unicode */ |
45 | 0 | str[0] = (unsigned char)(((codepoint >> 6) & 0x1F) | 0xC0); |
46 | 0 | str[1] = (unsigned char)(((codepoint >> 0) & 0x3F) | 0x80); |
47 | 0 | return 2; |
48 | 0 | } |
49 | 0 | if(UTF_LIKELY(codepoint <= 0xFFFF)) { /* 3-byte unicode */ |
50 | 0 | str[0] = (unsigned char)(((codepoint >> 12) & 0x0F) | 0xE0); |
51 | 0 | str[1] = (unsigned char)(((codepoint >> 6) & 0x3F) | 0x80); |
52 | 0 | str[2] = (unsigned char)(((codepoint >> 0) & 0x3F) | 0x80); |
53 | 0 | return 3; |
54 | 0 | } |
55 | 0 | if(UTF_LIKELY(codepoint <= 0x10FFFF)) { /* 4-byte unicode */ |
56 | 0 | str[0] = (unsigned char)(((codepoint >> 18) & 0x07) | 0xF0); |
57 | 0 | str[1] = (unsigned char)(((codepoint >> 12) & 0x3F) | 0x80); |
58 | 0 | str[2] = (unsigned char)(((codepoint >> 6) & 0x3F) | 0x80); |
59 | 0 | str[3] = (unsigned char)(((codepoint >> 0) & 0x3F) | 0x80); |
60 | 0 | return 4; |
61 | 0 | } |
62 | 0 | return 0; /* Not a unicode codepoint */ |
63 | 0 | } Unexecuted instantiation: utf8.c:utf8_from_codepoint Unexecuted instantiation: cj5.c:utf8_from_codepoint Unexecuted instantiation: ua_types_encoding_json_105.c:utf8_from_codepoint |
64 | | |
65 | | /* Returns the encoding length of the codepoint */ |
66 | | static UTF_INLINE unsigned |
67 | 0 | utf8_length(unsigned codepoint) { |
68 | 0 | if(UTF_LIKELY(codepoint <= 0x7F)) |
69 | 0 | return 1; /* Plain ASCII */ |
70 | 0 | if(UTF_LIKELY(codepoint <= 0x07FF)) |
71 | 0 | return 2; /* 2-byte unicode */ |
72 | 0 | if(UTF_LIKELY(codepoint <= 0xFFFF)) |
73 | 0 | return 3; /* 3-byte unicode */ |
74 | 0 | if(UTF_LIKELY(codepoint <= 0x10FFFF)) |
75 | 0 | return 4; /* 4-byte unicode */ |
76 | 0 | return 0; /* Not a unicode codepoint */ |
77 | 0 | } Unexecuted instantiation: utf8.c:utf8_length Unexecuted instantiation: cj5.c:utf8_length Unexecuted instantiation: ua_types_encoding_json_105.c:utf8_length |
78 | | |
79 | | #ifdef __cplusplus |
80 | | } |
81 | | #endif |
82 | | |
83 | | #endif /* UTF8_H_ */ |