/src/serenity/AK/UnicodeUtils.h
Line | Count | Source |
1 | | /* |
2 | | * Copyright (c) 2021, Max Wipfli <mail@maxwipfli.ch> |
3 | | * |
4 | | * SPDX-License-Identifier: BSD-2-Clause |
5 | | */ |
6 | | |
7 | | #pragma once |
8 | | |
9 | | #include <AK/Concepts.h> |
10 | | #include <AK/Error.h> |
11 | | #include <AK/Forward.h> |
12 | | |
13 | | namespace AK::UnicodeUtils { |
14 | | |
15 | | constexpr int bytes_to_store_code_point_in_utf8(u32 code_point) |
16 | 4.09M | { |
17 | 4.09M | if (code_point <= 0x7f) |
18 | 4.09M | return 1; |
19 | 0 | if (code_point <= 0x7ff) |
20 | 0 | return 2; |
21 | 0 | if (code_point <= 0xffff) |
22 | 0 | return 3; |
23 | 0 | if (code_point <= 0x10ffff) |
24 | 0 | return 4; |
25 | 0 | return 0; |
26 | 0 | } |
27 | | |
28 | | template<typename Callback> |
29 | | [[nodiscard]] constexpr int code_point_to_utf8(u32 code_point, Callback callback) |
30 | 4.09M | { |
31 | 4.09M | if (code_point <= 0x7f) { |
32 | 4.09M | callback(static_cast<char>(code_point)); |
33 | 4.09M | return 1; |
34 | 4.09M | } else if (code_point <= 0x07ff) { |
35 | 0 | callback(static_cast<char>(((code_point >> 6) & 0x1f) | 0xc0)); |
36 | 0 | callback(static_cast<char>(((code_point >> 0) & 0x3f) | 0x80)); |
37 | 0 | return 2; |
38 | 0 | } else if (code_point <= 0xffff) { |
39 | 0 | callback(static_cast<char>(((code_point >> 12) & 0x0f) | 0xe0)); |
40 | 0 | callback(static_cast<char>(((code_point >> 6) & 0x3f) | 0x80)); |
41 | 0 | callback(static_cast<char>(((code_point >> 0) & 0x3f) | 0x80)); |
42 | 0 | return 3; |
43 | 0 | } else if (code_point <= 0x10ffff) { |
44 | 0 | callback(static_cast<char>(((code_point >> 18) & 0x07) | 0xf0)); |
45 | 0 | callback(static_cast<char>(((code_point >> 12) & 0x3f) | 0x80)); |
46 | 0 | callback(static_cast<char>(((code_point >> 6) & 0x3f) | 0x80)); |
47 | 0 | callback(static_cast<char>(((code_point >> 0) & 0x3f) | 0x80)); |
48 | 0 | return 4; |
49 | 0 | } |
50 | 0 | return -1; |
51 | 4.09M | } int AK::UnicodeUtils::code_point_to_utf8<AK::String::from_code_point(unsigned int)::{lambda(AK::Span<unsigned char>)#1}::operator()(AK::Span<unsigned char>) const::{lambda(auto:1)#1}>(unsigned int, AK::String::from_code_point(unsigned int)::{lambda(AK::Span<unsigned char>)#1}::operator()(AK::Span<unsigned char>) const::{lambda(auto:1)#1})Line | Count | Source | 30 | 4.09M | { | 31 | 4.09M | if (code_point <= 0x7f) { | 32 | 4.09M | callback(static_cast<char>(code_point)); | 33 | 4.09M | return 1; | 34 | 4.09M | } else if (code_point <= 0x07ff) { | 35 | 0 | callback(static_cast<char>(((code_point >> 6) & 0x1f) | 0xc0)); | 36 | 0 | callback(static_cast<char>(((code_point >> 0) & 0x3f) | 0x80)); | 37 | 0 | return 2; | 38 | 0 | } else if (code_point <= 0xffff) { | 39 | 0 | callback(static_cast<char>(((code_point >> 12) & 0x0f) | 0xe0)); | 40 | 0 | callback(static_cast<char>(((code_point >> 6) & 0x3f) | 0x80)); | 41 | 0 | callback(static_cast<char>(((code_point >> 0) & 0x3f) | 0x80)); | 42 | 0 | return 3; | 43 | 0 | } else if (code_point <= 0x10ffff) { | 44 | 0 | callback(static_cast<char>(((code_point >> 18) & 0x07) | 0xf0)); | 45 | 0 | callback(static_cast<char>(((code_point >> 12) & 0x3f) | 0x80)); | 46 | 0 | callback(static_cast<char>(((code_point >> 6) & 0x3f) | 0x80)); | 47 | 0 | callback(static_cast<char>(((code_point >> 0) & 0x3f) | 0x80)); | 48 | 0 | return 4; | 49 | 0 | } | 50 | 0 | return -1; | 51 | 4.09M | } |
Unexecuted instantiation: String.cpp:int AK::UnicodeUtils::code_point_to_utf8<AK::String::repeated(unsigned int, unsigned long)::$_0>(unsigned int, AK::String::repeated(unsigned int, unsigned long)::$_0) Unexecuted instantiation: GlobalObject.cpp:int AK::UnicodeUtils::code_point_to_utf8<JS::encode(JS::VM&, AK::ByteString const&, AK::StringView)::$_0>(unsigned int, JS::encode(JS::VM&, AK::ByteString const&, AK::StringView)::$_0) |
52 | | |
53 | | template<FallibleFunction<char> Callback> |
54 | | [[nodiscard]] ErrorOr<int> try_code_point_to_utf8(u32 code_point, Callback&& callback) |
55 | 106M | { |
56 | 106M | if (code_point <= 0x7f) { |
57 | 28.6M | TRY(callback(static_cast<char>(code_point))); |
58 | 28.6M | return 1; |
59 | 28.6M | } |
60 | 77.3M | if (code_point <= 0x07ff) { |
61 | 2.31M | TRY(callback(static_cast<char>((((code_point >> 6) & 0x1f) | 0xc0)))); |
62 | 2.31M | TRY(callback(static_cast<char>((((code_point >> 0) & 0x3f) | 0x80)))); |
63 | 2.31M | return 2; |
64 | 2.31M | } |
65 | 75.0M | if (code_point <= 0xffff) { |
66 | 74.7M | TRY(callback(static_cast<char>((((code_point >> 12) & 0x0f) | 0xe0)))); |
67 | 74.7M | TRY(callback(static_cast<char>((((code_point >> 6) & 0x3f) | 0x80)))); |
68 | 74.7M | TRY(callback(static_cast<char>((((code_point >> 0) & 0x3f) | 0x80)))); |
69 | 74.7M | return 3; |
70 | 74.7M | } |
71 | 280k | if (code_point <= 0x10ffff) { |
72 | 120k | TRY(callback(static_cast<char>((((code_point >> 18) & 0x07) | 0xf0)))); |
73 | 120k | TRY(callback(static_cast<char>((((code_point >> 12) & 0x3f) | 0x80)))); |
74 | 120k | TRY(callback(static_cast<char>((((code_point >> 6) & 0x3f) | 0x80)))); |
75 | 120k | TRY(callback(static_cast<char>((((code_point >> 0) & 0x3f) | 0x80)))); |
76 | 120k | return 4; |
77 | 120k | } |
78 | 159k | return -1; |
79 | 280k | } |
80 | | |
81 | | } |