/src/botan/src/lib/utils/charset.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | /* |
2 | | * Character Set Handling |
3 | | * (C) 1999-2007,2021 Jack Lloyd |
4 | | * |
5 | | * Botan is released under the Simplified BSD License (see license.txt) |
6 | | */ |
7 | | |
8 | | #include <botan/internal/charset.h> |
9 | | #include <botan/internal/loadstor.h> |
10 | | #include <botan/exceptn.h> |
11 | | #include <sstream> |
12 | | |
13 | | namespace Botan { |
14 | | |
15 | | namespace { |
16 | | |
17 | | void append_utf8_for(std::string& s, uint32_t c) |
18 | 155k | { |
19 | 155k | if(c >= 0xD800 && c < 0xE000) |
20 | 589 | throw Decoding_Error("Invalid Unicode character"); |
21 | | |
22 | 155k | if(c <= 0x7F) |
23 | 69.4k | { |
24 | 69.4k | const uint8_t b0 = static_cast<uint8_t>(c); |
25 | 69.4k | s.push_back(static_cast<char>(b0)); |
26 | 69.4k | } |
27 | 85.6k | else if(c <= 0x7FF) |
28 | 57.0k | { |
29 | 57.0k | const uint8_t b0 = 0xC0 | static_cast<uint8_t>(c >> 6); |
30 | 57.0k | const uint8_t b1 = 0x80 | static_cast<uint8_t>(c & 0x3F); |
31 | 57.0k | s.push_back(static_cast<char>(b0)); |
32 | 57.0k | s.push_back(static_cast<char>(b1)); |
33 | 57.0k | } |
34 | 28.6k | else if(c <= 0xFFFF) |
35 | 24.3k | { |
36 | 24.3k | const uint8_t b0 = 0xE0 | static_cast<uint8_t>(c >> 12); |
37 | 24.3k | const uint8_t b1 = 0x80 | static_cast<uint8_t>((c >> 6) & 0x3F); |
38 | 24.3k | const uint8_t b2 = 0x80 | static_cast<uint8_t>(c & 0x3F); |
39 | 24.3k | s.push_back(static_cast<char>(b0)); |
40 | 24.3k | s.push_back(static_cast<char>(b1)); |
41 | 24.3k | s.push_back(static_cast<char>(b2)); |
42 | 24.3k | } |
43 | 4.23k | else if(c <= 0x10FFFF) |
44 | 3.13k | { |
45 | 3.13k | const uint8_t b0 = 0xF0 | static_cast<uint8_t>(c >> 18); |
46 | 3.13k | const uint8_t b1 = 0x80 | static_cast<uint8_t>((c >> 12) & 0x3F); |
47 | 3.13k | const uint8_t b2 = 0x80 | static_cast<uint8_t>((c >> 6) & 0x3F); |
48 | 3.13k | const uint8_t b3 = 0x80 | static_cast<uint8_t>(c & 0x3F); |
49 | 3.13k | s.push_back(static_cast<char>(b0)); |
50 | 3.13k | s.push_back(static_cast<char>(b1)); |
51 | 3.13k | s.push_back(static_cast<char>(b2)); |
52 | 3.13k | s.push_back(static_cast<char>(b3)); |
53 | 3.13k | } |
54 | 1.10k | else |
55 | 1.10k | throw Decoding_Error("Invalid Unicode character"); |
56 | | |
57 | 155k | } |
58 | | |
59 | | } |
60 | | |
61 | | std::string ucs2_to_utf8(const uint8_t ucs2[], size_t len) |
62 | 5.31k | { |
63 | 5.31k | if(len % 2 != 0) |
64 | 255 | throw Decoding_Error("Invalid length for UCS-2 string"); |
65 | | |
66 | 5.06k | const size_t chars = len / 2; |
67 | | |
68 | 5.06k | std::string s; |
69 | 37.9k | for(size_t i = 0; i != chars; ++i) |
70 | 32.8k | { |
71 | 32.8k | const uint32_t c = load_be<uint16_t>(ucs2, i); |
72 | 32.8k | append_utf8_for(s, c); |
73 | 32.8k | } |
74 | | |
75 | 5.06k | return s; |
76 | 5.31k | } |
77 | | |
78 | | std::string ucs4_to_utf8(const uint8_t ucs4[], size_t len) |
79 | 4.05k | { |
80 | 4.05k | if(len % 4 != 0) |
81 | 290 | throw Decoding_Error("Invalid length for UCS-4 string"); |
82 | | |
83 | 3.76k | const size_t chars = len / 4; |
84 | | |
85 | 3.76k | std::string s; |
86 | 9.43k | for(size_t i = 0; i != chars; ++i) |
87 | 5.67k | { |
88 | 5.67k | const uint32_t c = load_be<uint32_t>(ucs4, i); |
89 | 5.67k | append_utf8_for(s, c); |
90 | 5.67k | } |
91 | | |
92 | 3.76k | return s; |
93 | 4.05k | } |
94 | | |
95 | | /* |
96 | | * Convert from ISO 8859-1 to UTF-8 |
97 | | */ |
98 | | std::string latin1_to_utf8(const uint8_t chars[], size_t len) |
99 | 8.43k | { |
100 | 8.43k | std::string s; |
101 | 125k | for(size_t i = 0; i != len; ++i) |
102 | 117k | { |
103 | 117k | const uint32_t c = static_cast<uint8_t>(chars[i]); |
104 | 117k | append_utf8_for(s, c); |
105 | 117k | } |
106 | 8.43k | return s; |
107 | 8.43k | } |
108 | | |
109 | | std::string format_char_for_display(char c) |
110 | 189 | { |
111 | 189 | std::ostringstream oss; |
112 | | |
113 | 189 | oss << "'"; |
114 | | |
115 | 189 | if(c == '\t') |
116 | 0 | { oss << "\\t"; } |
117 | 189 | else if(c == '\n') |
118 | 0 | { oss << "\\n"; } |
119 | 189 | else if(c == '\r') |
120 | 0 | { oss << "\\r"; } |
121 | 189 | else if(static_cast<unsigned char>(c) >= 128) |
122 | 76 | { |
123 | 76 | unsigned char z = static_cast<unsigned char>(c); |
124 | 76 | oss << "\\x" << std::hex << std::uppercase << static_cast<int>(z); |
125 | 76 | } |
126 | 113 | else |
127 | 113 | { oss << c; } |
128 | | |
129 | 189 | oss << "'"; |
130 | | |
131 | 189 | return oss.str(); |
132 | 189 | } |
133 | | |
134 | | } |
135 | | |