Coverage Report

Created: 2023-06-07 07:00

/src/botan/src/lib/utils/charset.cpp
Line
Count
Source (jump to first uncovered line)
1
/*
2
* Character Set Handling
3
* (C) 1999-2007,2021 Jack Lloyd
4
*
5
* Botan is released under the Simplified BSD License (see license.txt)
6
*/
7
8
#include <botan/internal/charset.h>
9
10
#include <botan/exceptn.h>
11
#include <botan/internal/loadstor.h>
12
#include <sstream>
13
14
namespace Botan {
15
16
namespace {
17
18
0
void append_utf8_for(std::string& s, uint32_t c) {
19
0
   if(c >= 0xD800 && c < 0xE000) {
20
0
      throw Decoding_Error("Invalid Unicode character");
21
0
   }
22
23
0
   if(c <= 0x7F) {
24
0
      const uint8_t b0 = static_cast<uint8_t>(c);
25
0
      s.push_back(static_cast<char>(b0));
26
0
   } else if(c <= 0x7FF) {
27
0
      const uint8_t b0 = 0xC0 | static_cast<uint8_t>(c >> 6);
28
0
      const uint8_t b1 = 0x80 | static_cast<uint8_t>(c & 0x3F);
29
0
      s.push_back(static_cast<char>(b0));
30
0
      s.push_back(static_cast<char>(b1));
31
0
   } else if(c <= 0xFFFF) {
32
0
      const uint8_t b0 = 0xE0 | static_cast<uint8_t>(c >> 12);
33
0
      const uint8_t b1 = 0x80 | static_cast<uint8_t>((c >> 6) & 0x3F);
34
0
      const uint8_t b2 = 0x80 | static_cast<uint8_t>(c & 0x3F);
35
0
      s.push_back(static_cast<char>(b0));
36
0
      s.push_back(static_cast<char>(b1));
37
0
      s.push_back(static_cast<char>(b2));
38
0
   } else if(c <= 0x10FFFF) {
39
0
      const uint8_t b0 = 0xF0 | static_cast<uint8_t>(c >> 18);
40
0
      const uint8_t b1 = 0x80 | static_cast<uint8_t>((c >> 12) & 0x3F);
41
0
      const uint8_t b2 = 0x80 | static_cast<uint8_t>((c >> 6) & 0x3F);
42
0
      const uint8_t b3 = 0x80 | static_cast<uint8_t>(c & 0x3F);
43
0
      s.push_back(static_cast<char>(b0));
44
0
      s.push_back(static_cast<char>(b1));
45
0
      s.push_back(static_cast<char>(b2));
46
0
      s.push_back(static_cast<char>(b3));
47
0
   } else {
48
0
      throw Decoding_Error("Invalid Unicode character");
49
0
   }
50
0
}
51
52
}  // namespace
53
54
0
std::string ucs2_to_utf8(const uint8_t ucs2[], size_t len) {
55
0
   if(len % 2 != 0) {
56
0
      throw Decoding_Error("Invalid length for UCS-2 string");
57
0
   }
58
59
0
   const size_t chars = len / 2;
60
61
0
   std::string s;
62
0
   for(size_t i = 0; i != chars; ++i) {
63
0
      const uint32_t c = load_be<uint16_t>(ucs2, i);
64
0
      append_utf8_for(s, c);
65
0
   }
66
67
0
   return s;
68
0
}
69
70
0
std::string ucs4_to_utf8(const uint8_t ucs4[], size_t len) {
71
0
   if(len % 4 != 0) {
72
0
      throw Decoding_Error("Invalid length for UCS-4 string");
73
0
   }
74
75
0
   const size_t chars = len / 4;
76
77
0
   std::string s;
78
0
   for(size_t i = 0; i != chars; ++i) {
79
0
      const uint32_t c = load_be<uint32_t>(ucs4, i);
80
0
      append_utf8_for(s, c);
81
0
   }
82
83
0
   return s;
84
0
}
85
86
/*
87
* Convert from ISO 8859-1 to UTF-8
88
*/
89
0
std::string latin1_to_utf8(const uint8_t chars[], size_t len) {
90
0
   std::string s;
91
0
   for(size_t i = 0; i != len; ++i) {
92
0
      const uint32_t c = static_cast<uint8_t>(chars[i]);
93
0
      append_utf8_for(s, c);
94
0
   }
95
0
   return s;
96
0
}
97
98
0
std::string format_char_for_display(char c) {
99
0
   std::ostringstream oss;
100
101
0
   oss << "'";
102
103
0
   if(c == '\t') {
104
0
      oss << "\\t";
105
0
   } else if(c == '\n') {
106
0
      oss << "\\n";
107
0
   } else if(c == '\r') {
108
0
      oss << "\\r";
109
0
   } else if(static_cast<unsigned char>(c) >= 128) {
110
0
      unsigned char z = static_cast<unsigned char>(c);
111
0
      oss << "\\x" << std::hex << std::uppercase << static_cast<int>(z);
112
0
   } else {
113
0
      oss << c;
114
0
   }
115
116
0
   oss << "'";
117
118
0
   return oss.str();
119
0
}
120
121
}  // namespace Botan