/src/botan/src/lib/utils/charset.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | /* |
2 | | * Character Set Handling |
3 | | * (C) 1999-2007,2021 Jack Lloyd |
4 | | * |
5 | | * Botan is released under the Simplified BSD License (see license.txt) |
6 | | */ |
7 | | |
8 | | #include <botan/internal/charset.h> |
9 | | |
10 | | #include <botan/exceptn.h> |
11 | | #include <botan/internal/loadstor.h> |
12 | | #include <sstream> |
13 | | |
14 | | namespace Botan { |
15 | | |
16 | | namespace { |
17 | | |
18 | 0 | void append_utf8_for(std::string& s, uint32_t c) { |
19 | 0 | if(c >= 0xD800 && c < 0xE000) { |
20 | 0 | throw Decoding_Error("Invalid Unicode character"); |
21 | 0 | } |
22 | | |
23 | 0 | if(c <= 0x7F) { |
24 | 0 | const uint8_t b0 = static_cast<uint8_t>(c); |
25 | 0 | s.push_back(static_cast<char>(b0)); |
26 | 0 | } else if(c <= 0x7FF) { |
27 | 0 | const uint8_t b0 = 0xC0 | static_cast<uint8_t>(c >> 6); |
28 | 0 | const uint8_t b1 = 0x80 | static_cast<uint8_t>(c & 0x3F); |
29 | 0 | s.push_back(static_cast<char>(b0)); |
30 | 0 | s.push_back(static_cast<char>(b1)); |
31 | 0 | } else if(c <= 0xFFFF) { |
32 | 0 | const uint8_t b0 = 0xE0 | static_cast<uint8_t>(c >> 12); |
33 | 0 | const uint8_t b1 = 0x80 | static_cast<uint8_t>((c >> 6) & 0x3F); |
34 | 0 | const uint8_t b2 = 0x80 | static_cast<uint8_t>(c & 0x3F); |
35 | 0 | s.push_back(static_cast<char>(b0)); |
36 | 0 | s.push_back(static_cast<char>(b1)); |
37 | 0 | s.push_back(static_cast<char>(b2)); |
38 | 0 | } else if(c <= 0x10FFFF) { |
39 | 0 | const uint8_t b0 = 0xF0 | static_cast<uint8_t>(c >> 18); |
40 | 0 | const uint8_t b1 = 0x80 | static_cast<uint8_t>((c >> 12) & 0x3F); |
41 | 0 | const uint8_t b2 = 0x80 | static_cast<uint8_t>((c >> 6) & 0x3F); |
42 | 0 | const uint8_t b3 = 0x80 | static_cast<uint8_t>(c & 0x3F); |
43 | 0 | s.push_back(static_cast<char>(b0)); |
44 | 0 | s.push_back(static_cast<char>(b1)); |
45 | 0 | s.push_back(static_cast<char>(b2)); |
46 | 0 | s.push_back(static_cast<char>(b3)); |
47 | 0 | } else { |
48 | 0 | throw Decoding_Error("Invalid Unicode character"); |
49 | 0 | } |
50 | 0 | } |
51 | | |
52 | | } // namespace |
53 | | |
54 | 0 | std::string ucs2_to_utf8(const uint8_t ucs2[], size_t len) { |
55 | 0 | if(len % 2 != 0) { |
56 | 0 | throw Decoding_Error("Invalid length for UCS-2 string"); |
57 | 0 | } |
58 | | |
59 | 0 | const size_t chars = len / 2; |
60 | |
|
61 | 0 | std::string s; |
62 | 0 | for(size_t i = 0; i != chars; ++i) { |
63 | 0 | const uint32_t c = load_be<uint16_t>(ucs2, i); |
64 | 0 | append_utf8_for(s, c); |
65 | 0 | } |
66 | |
|
67 | 0 | return s; |
68 | 0 | } |
69 | | |
70 | 0 | std::string ucs4_to_utf8(const uint8_t ucs4[], size_t len) { |
71 | 0 | if(len % 4 != 0) { |
72 | 0 | throw Decoding_Error("Invalid length for UCS-4 string"); |
73 | 0 | } |
74 | | |
75 | 0 | const size_t chars = len / 4; |
76 | |
|
77 | 0 | std::string s; |
78 | 0 | for(size_t i = 0; i != chars; ++i) { |
79 | 0 | const uint32_t c = load_be<uint32_t>(ucs4, i); |
80 | 0 | append_utf8_for(s, c); |
81 | 0 | } |
82 | |
|
83 | 0 | return s; |
84 | 0 | } |
85 | | |
86 | | /* |
87 | | * Convert from ISO 8859-1 to UTF-8 |
88 | | */ |
89 | 0 | std::string latin1_to_utf8(const uint8_t chars[], size_t len) { |
90 | 0 | std::string s; |
91 | 0 | for(size_t i = 0; i != len; ++i) { |
92 | 0 | const uint32_t c = static_cast<uint8_t>(chars[i]); |
93 | 0 | append_utf8_for(s, c); |
94 | 0 | } |
95 | 0 | return s; |
96 | 0 | } |
97 | | |
98 | 0 | std::string format_char_for_display(char c) { |
99 | 0 | std::ostringstream oss; |
100 | |
|
101 | 0 | oss << "'"; |
102 | |
|
103 | 0 | if(c == '\t') { |
104 | 0 | oss << "\\t"; |
105 | 0 | } else if(c == '\n') { |
106 | 0 | oss << "\\n"; |
107 | 0 | } else if(c == '\r') { |
108 | 0 | oss << "\\r"; |
109 | 0 | } else if(static_cast<unsigned char>(c) >= 128) { |
110 | 0 | unsigned char z = static_cast<unsigned char>(c); |
111 | 0 | oss << "\\x" << std::hex << std::uppercase << static_cast<int>(z); |
112 | 0 | } else { |
113 | 0 | oss << c; |
114 | 0 | } |
115 | |
|
116 | 0 | oss << "'"; |
117 | |
|
118 | 0 | return oss.str(); |
119 | 0 | } |
120 | | |
121 | | } // namespace Botan |