Coverage Report

Created: 2023-02-13 06:21

/src/botan/src/lib/utils/charset.cpp
Line
Count
Source (jump to first uncovered line)
1
/*
2
* Character Set Handling
3
* (C) 1999-2007,2021 Jack Lloyd
4
*
5
* Botan is released under the Simplified BSD License (see license.txt)
6
*/
7
8
#include <botan/internal/charset.h>
9
#include <botan/internal/loadstor.h>
10
#include <botan/exceptn.h>
11
#include <sstream>
12
13
namespace Botan {
14
15
namespace {
16
17
void append_utf8_for(std::string& s, uint32_t c)
18
155k
   {
19
155k
   if(c >= 0xD800 && c < 0xE000)
20
589
      throw Decoding_Error("Invalid Unicode character");
21
22
155k
   if(c <= 0x7F)
23
69.4k
      {
24
69.4k
      const uint8_t b0 = static_cast<uint8_t>(c);
25
69.4k
      s.push_back(static_cast<char>(b0));
26
69.4k
      }
27
85.6k
   else if(c <= 0x7FF)
28
57.0k
      {
29
57.0k
      const uint8_t b0 = 0xC0 | static_cast<uint8_t>(c >> 6);
30
57.0k
      const uint8_t b1 = 0x80 | static_cast<uint8_t>(c & 0x3F);
31
57.0k
      s.push_back(static_cast<char>(b0));
32
57.0k
      s.push_back(static_cast<char>(b1));
33
57.0k
      }
34
28.6k
   else if(c <= 0xFFFF)
35
24.3k
      {
36
24.3k
      const uint8_t b0 = 0xE0 | static_cast<uint8_t>(c >> 12);
37
24.3k
      const uint8_t b1 = 0x80 | static_cast<uint8_t>((c >> 6) & 0x3F);
38
24.3k
      const uint8_t b2 = 0x80 | static_cast<uint8_t>(c & 0x3F);
39
24.3k
      s.push_back(static_cast<char>(b0));
40
24.3k
      s.push_back(static_cast<char>(b1));
41
24.3k
      s.push_back(static_cast<char>(b2));
42
24.3k
      }
43
4.23k
   else if(c <= 0x10FFFF)
44
3.13k
      {
45
3.13k
      const uint8_t b0 = 0xF0 | static_cast<uint8_t>(c >> 18);
46
3.13k
      const uint8_t b1 = 0x80 | static_cast<uint8_t>((c >> 12) & 0x3F);
47
3.13k
      const uint8_t b2 = 0x80 | static_cast<uint8_t>((c >> 6) & 0x3F);
48
3.13k
      const uint8_t b3 = 0x80 | static_cast<uint8_t>(c & 0x3F);
49
3.13k
      s.push_back(static_cast<char>(b0));
50
3.13k
      s.push_back(static_cast<char>(b1));
51
3.13k
      s.push_back(static_cast<char>(b2));
52
3.13k
      s.push_back(static_cast<char>(b3));
53
3.13k
      }
54
1.10k
   else
55
1.10k
      throw Decoding_Error("Invalid Unicode character");
56
57
155k
   }
58
59
}
60
61
std::string ucs2_to_utf8(const uint8_t ucs2[], size_t len)
62
5.31k
   {
63
5.31k
   if(len % 2 != 0)
64
255
      throw Decoding_Error("Invalid length for UCS-2 string");
65
66
5.06k
   const size_t chars = len / 2;
67
68
5.06k
   std::string s;
69
37.9k
   for(size_t i = 0; i != chars; ++i)
70
32.8k
      {
71
32.8k
      const uint32_t c = load_be<uint16_t>(ucs2, i);
72
32.8k
      append_utf8_for(s, c);
73
32.8k
      }
74
75
5.06k
   return s;
76
5.31k
   }
77
78
std::string ucs4_to_utf8(const uint8_t ucs4[], size_t len)
79
4.05k
   {
80
4.05k
   if(len % 4 != 0)
81
290
      throw Decoding_Error("Invalid length for UCS-4 string");
82
83
3.76k
   const size_t chars = len / 4;
84
85
3.76k
   std::string s;
86
9.43k
   for(size_t i = 0; i != chars; ++i)
87
5.67k
      {
88
5.67k
      const uint32_t c = load_be<uint32_t>(ucs4, i);
89
5.67k
      append_utf8_for(s, c);
90
5.67k
      }
91
92
3.76k
   return s;
93
4.05k
   }
94
95
/*
96
* Convert from ISO 8859-1 to UTF-8
97
*/
98
std::string latin1_to_utf8(const uint8_t chars[], size_t len)
99
8.43k
   {
100
8.43k
   std::string s;
101
125k
   for(size_t i = 0; i != len; ++i)
102
117k
      {
103
117k
      const uint32_t c = static_cast<uint8_t>(chars[i]);
104
117k
      append_utf8_for(s, c);
105
117k
      }
106
8.43k
   return s;
107
8.43k
   }
108
109
std::string format_char_for_display(char c)
110
189
   {
111
189
   std::ostringstream oss;
112
113
189
   oss << "'";
114
115
189
   if(c == '\t')
116
0
      { oss << "\\t"; }
117
189
   else if(c == '\n')
118
0
      { oss << "\\n"; }
119
189
   else if(c == '\r')
120
0
      { oss << "\\r"; }
121
189
   else if(static_cast<unsigned char>(c) >= 128)
122
76
      {
123
76
      unsigned char z = static_cast<unsigned char>(c);
124
76
      oss << "\\x" << std::hex << std::uppercase << static_cast<int>(z);
125
76
      }
126
113
   else
127
113
      { oss << c; }
128
129
189
   oss << "'";
130
131
189
   return oss.str();
132
189
   }
133
134
}
135