/src/botan/src/lib/utils/charset.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | /* |
2 | | * Character Set Handling |
3 | | * (C) 1999-2007,2021 Jack Lloyd |
4 | | * |
5 | | * Botan is released under the Simplified BSD License (see license.txt) |
6 | | */ |
7 | | |
8 | | #include <botan/internal/charset.h> |
9 | | |
10 | | #include <botan/exceptn.h> |
11 | | #include <botan/internal/loadstor.h> |
12 | | #include <sstream> |
13 | | |
14 | | namespace Botan { |
15 | | |
16 | | namespace { |
17 | | |
18 | 12.1k | void append_utf8_for(std::string& s, uint32_t c) { |
19 | 12.1k | if(c >= 0xD800 && c < 0xE000) { |
20 | 48 | throw Decoding_Error("Invalid Unicode character"); |
21 | 48 | } |
22 | | |
23 | 12.1k | if(c <= 0x7F) { |
24 | 5.41k | const uint8_t b0 = static_cast<uint8_t>(c); |
25 | 5.41k | s.push_back(static_cast<char>(b0)); |
26 | 6.71k | } else if(c <= 0x7FF) { |
27 | 3.35k | const uint8_t b0 = 0xC0 | static_cast<uint8_t>(c >> 6); |
28 | 3.35k | const uint8_t b1 = 0x80 | static_cast<uint8_t>(c & 0x3F); |
29 | 3.35k | s.push_back(static_cast<char>(b0)); |
30 | 3.35k | s.push_back(static_cast<char>(b1)); |
31 | 3.36k | } else if(c <= 0xFFFF) { |
32 | 3.02k | const uint8_t b0 = 0xE0 | static_cast<uint8_t>(c >> 12); |
33 | 3.02k | const uint8_t b1 = 0x80 | static_cast<uint8_t>((c >> 6) & 0x3F); |
34 | 3.02k | const uint8_t b2 = 0x80 | static_cast<uint8_t>(c & 0x3F); |
35 | 3.02k | s.push_back(static_cast<char>(b0)); |
36 | 3.02k | s.push_back(static_cast<char>(b1)); |
37 | 3.02k | s.push_back(static_cast<char>(b2)); |
38 | 3.02k | } else if(c <= 0x10FFFF) { |
39 | 237 | const uint8_t b0 = 0xF0 | static_cast<uint8_t>(c >> 18); |
40 | 237 | const uint8_t b1 = 0x80 | static_cast<uint8_t>((c >> 12) & 0x3F); |
41 | 237 | const uint8_t b2 = 0x80 | static_cast<uint8_t>((c >> 6) & 0x3F); |
42 | 237 | const uint8_t b3 = 0x80 | static_cast<uint8_t>(c & 0x3F); |
43 | 237 | s.push_back(static_cast<char>(b0)); |
44 | 237 | s.push_back(static_cast<char>(b1)); |
45 | 237 | s.push_back(static_cast<char>(b2)); |
46 | 237 | s.push_back(static_cast<char>(b3)); |
47 | 237 | } else { |
48 | 96 | throw Decoding_Error("Invalid Unicode character"); |
49 | 96 | } |
50 | 12.1k | } |
51 | | |
52 | | } // namespace |
53 | | |
54 | 762 | std::string ucs2_to_utf8(const uint8_t ucs2[], size_t len) { |
55 | 762 | if(len % 2 != 0) { |
56 | 19 | throw Decoding_Error("Invalid length for UCS-2 string"); |
57 | 19 | } |
58 | | |
59 | 743 | const size_t chars = len / 2; |
60 | | |
61 | 743 | std::string s; |
62 | 4.81k | for(size_t i = 0; i != chars; ++i) { |
63 | 4.07k | const uint32_t c = load_be<uint16_t>(ucs2, i); |
64 | 4.07k | append_utf8_for(s, c); |
65 | 4.07k | } |
66 | | |
67 | 743 | return s; |
68 | 762 | } |
69 | | |
70 | 263 | std::string ucs4_to_utf8(const uint8_t ucs4[], size_t len) { |
71 | 263 | if(len % 4 != 0) { |
72 | 20 | throw Decoding_Error("Invalid length for UCS-4 string"); |
73 | 20 | } |
74 | | |
75 | 243 | const size_t chars = len / 4; |
76 | | |
77 | 243 | std::string s; |
78 | 728 | for(size_t i = 0; i != chars; ++i) { |
79 | 485 | const uint32_t c = load_be<uint32_t>(ucs4, i); |
80 | 485 | append_utf8_for(s, c); |
81 | 485 | } |
82 | | |
83 | 243 | return s; |
84 | 263 | } |
85 | | |
86 | | /* |
87 | | * Convert from ISO 8859-1 to UTF-8 |
88 | | */ |
89 | 720 | std::string latin1_to_utf8(const uint8_t chars[], size_t len) { |
90 | 720 | std::string s; |
91 | 8.35k | for(size_t i = 0; i != len; ++i) { |
92 | 7.63k | const uint32_t c = static_cast<uint8_t>(chars[i]); |
93 | 7.63k | append_utf8_for(s, c); |
94 | 7.63k | } |
95 | 720 | return s; |
96 | 720 | } |
97 | | |
98 | 37 | std::string format_char_for_display(char c) { |
99 | 37 | std::ostringstream oss; |
100 | | |
101 | 37 | oss << "'"; |
102 | | |
103 | 37 | if(c == '\t') { |
104 | 0 | oss << "\\t"; |
105 | 37 | } else if(c == '\n') { |
106 | 0 | oss << "\\n"; |
107 | 37 | } else if(c == '\r') { |
108 | 0 | oss << "\\r"; |
109 | 37 | } else if(static_cast<unsigned char>(c) >= 128) { |
110 | 18 | unsigned char z = static_cast<unsigned char>(c); |
111 | 18 | oss << "\\x" << std::hex << std::uppercase << static_cast<int>(z); |
112 | 19 | } else { |
113 | 19 | oss << c; |
114 | 19 | } |
115 | | |
116 | 37 | oss << "'"; |
117 | | |
118 | 37 | return oss.str(); |
119 | 37 | } |
120 | | |
121 | | } // namespace Botan |