Coverage Report

Created: 2024-03-10 06:16

/src/botan/src/lib/utils/charset.cpp
Line
Count
Source (jump to first uncovered line)
1
/*
2
* Character Set Handling
3
* (C) 1999-2007,2021 Jack Lloyd
4
*
5
* Botan is released under the Simplified BSD License (see license.txt)
6
*/
7
8
#include <botan/internal/charset.h>
9
10
#include <botan/exceptn.h>
11
#include <botan/internal/loadstor.h>
12
#include <sstream>
13
14
namespace Botan {
15
16
namespace {
17
18
12.1k
void append_utf8_for(std::string& s, uint32_t c) {
19
12.1k
   if(c >= 0xD800 && c < 0xE000) {
20
48
      throw Decoding_Error("Invalid Unicode character");
21
48
   }
22
23
12.1k
   if(c <= 0x7F) {
24
5.41k
      const uint8_t b0 = static_cast<uint8_t>(c);
25
5.41k
      s.push_back(static_cast<char>(b0));
26
6.71k
   } else if(c <= 0x7FF) {
27
3.35k
      const uint8_t b0 = 0xC0 | static_cast<uint8_t>(c >> 6);
28
3.35k
      const uint8_t b1 = 0x80 | static_cast<uint8_t>(c & 0x3F);
29
3.35k
      s.push_back(static_cast<char>(b0));
30
3.35k
      s.push_back(static_cast<char>(b1));
31
3.36k
   } else if(c <= 0xFFFF) {
32
3.02k
      const uint8_t b0 = 0xE0 | static_cast<uint8_t>(c >> 12);
33
3.02k
      const uint8_t b1 = 0x80 | static_cast<uint8_t>((c >> 6) & 0x3F);
34
3.02k
      const uint8_t b2 = 0x80 | static_cast<uint8_t>(c & 0x3F);
35
3.02k
      s.push_back(static_cast<char>(b0));
36
3.02k
      s.push_back(static_cast<char>(b1));
37
3.02k
      s.push_back(static_cast<char>(b2));
38
3.02k
   } else if(c <= 0x10FFFF) {
39
237
      const uint8_t b0 = 0xF0 | static_cast<uint8_t>(c >> 18);
40
237
      const uint8_t b1 = 0x80 | static_cast<uint8_t>((c >> 12) & 0x3F);
41
237
      const uint8_t b2 = 0x80 | static_cast<uint8_t>((c >> 6) & 0x3F);
42
237
      const uint8_t b3 = 0x80 | static_cast<uint8_t>(c & 0x3F);
43
237
      s.push_back(static_cast<char>(b0));
44
237
      s.push_back(static_cast<char>(b1));
45
237
      s.push_back(static_cast<char>(b2));
46
237
      s.push_back(static_cast<char>(b3));
47
237
   } else {
48
96
      throw Decoding_Error("Invalid Unicode character");
49
96
   }
50
12.1k
}
51
52
}  // namespace
53
54
762
std::string ucs2_to_utf8(const uint8_t ucs2[], size_t len) {
55
762
   if(len % 2 != 0) {
56
19
      throw Decoding_Error("Invalid length for UCS-2 string");
57
19
   }
58
59
743
   const size_t chars = len / 2;
60
61
743
   std::string s;
62
4.81k
   for(size_t i = 0; i != chars; ++i) {
63
4.07k
      const uint32_t c = load_be<uint16_t>(ucs2, i);
64
4.07k
      append_utf8_for(s, c);
65
4.07k
   }
66
67
743
   return s;
68
762
}
69
70
263
std::string ucs4_to_utf8(const uint8_t ucs4[], size_t len) {
71
263
   if(len % 4 != 0) {
72
20
      throw Decoding_Error("Invalid length for UCS-4 string");
73
20
   }
74
75
243
   const size_t chars = len / 4;
76
77
243
   std::string s;
78
728
   for(size_t i = 0; i != chars; ++i) {
79
485
      const uint32_t c = load_be<uint32_t>(ucs4, i);
80
485
      append_utf8_for(s, c);
81
485
   }
82
83
243
   return s;
84
263
}
85
86
/*
87
* Convert from ISO 8859-1 to UTF-8
88
*/
89
720
std::string latin1_to_utf8(const uint8_t chars[], size_t len) {
90
720
   std::string s;
91
8.35k
   for(size_t i = 0; i != len; ++i) {
92
7.63k
      const uint32_t c = static_cast<uint8_t>(chars[i]);
93
7.63k
      append_utf8_for(s, c);
94
7.63k
   }
95
720
   return s;
96
720
}
97
98
37
std::string format_char_for_display(char c) {
99
37
   std::ostringstream oss;
100
101
37
   oss << "'";
102
103
37
   if(c == '\t') {
104
0
      oss << "\\t";
105
37
   } else if(c == '\n') {
106
0
      oss << "\\n";
107
37
   } else if(c == '\r') {
108
0
      oss << "\\r";
109
37
   } else if(static_cast<unsigned char>(c) >= 128) {
110
18
      unsigned char z = static_cast<unsigned char>(c);
111
18
      oss << "\\x" << std::hex << std::uppercase << static_cast<int>(z);
112
19
   } else {
113
19
      oss << c;
114
19
   }
115
116
37
   oss << "'";
117
118
37
   return oss.str();
119
37
}
120
121
}  // namespace Botan