Coverage Report

Created: 2021-06-10 10:30

/src/botan/src/lib/utils/charset.cpp
Line
Count
Source
1
/*
2
* Character Set Handling
3
* (C) 1999-2007,2021 Jack Lloyd
4
*
5
* Botan is released under the Simplified BSD License (see license.txt)
6
*/
7
8
#include <botan/internal/charset.h>
9
#include <botan/internal/loadstor.h>
10
#include <botan/exceptn.h>
11
12
namespace Botan {
13
14
namespace {
15
16
void append_utf8_for(std::string& s, uint32_t c)
17
123k
   {
18
123k
   if(c >= 0xD800 && c < 0xE000)
19
296
      throw Decoding_Error("Invalid Unicode character");
20
21
123k
   if(c <= 0x7F)
22
60.3k
      {
23
60.3k
      const uint8_t b0 = static_cast<uint8_t>(c);
24
60.3k
      s.push_back(static_cast<char>(b0));
25
60.3k
      }
26
63.3k
   else if(c <= 0x7FF)
27
43.6k
      {
28
43.6k
      const uint8_t b0 = 0xC0 | static_cast<uint8_t>(c >> 6);
29
43.6k
      const uint8_t b1 = 0x80 | static_cast<uint8_t>(c & 0x3F);
30
43.6k
      s.push_back(static_cast<char>(b0));
31
43.6k
      s.push_back(static_cast<char>(b1));
32
43.6k
      }
33
19.6k
   else if(c <= 0xFFFF)
34
18.0k
      {
35
18.0k
      const uint8_t b0 = 0xE0 | static_cast<uint8_t>(c >> 12);
36
18.0k
      const uint8_t b1 = 0x80 | static_cast<uint8_t>((c >> 6) & 0x3F);
37
18.0k
      const uint8_t b2 = 0x80 | static_cast<uint8_t>(c & 0x3F);
38
18.0k
      s.push_back(static_cast<char>(b0));
39
18.0k
      s.push_back(static_cast<char>(b1));
40
18.0k
      s.push_back(static_cast<char>(b2));
41
18.0k
      }
42
1.65k
   else if(c <= 0x10FFFF)
43
1.10k
      {
44
1.10k
      const uint8_t b0 = 0xF0 | static_cast<uint8_t>(c >> 18);
45
1.10k
      const uint8_t b1 = 0x80 | static_cast<uint8_t>((c >> 12) & 0x3F);
46
1.10k
      const uint8_t b2 = 0x80 | static_cast<uint8_t>((c >> 6) & 0x3F);
47
1.10k
      const uint8_t b3 = 0x80 | static_cast<uint8_t>(c & 0x3F);
48
1.10k
      s.push_back(static_cast<char>(b0));
49
1.10k
      s.push_back(static_cast<char>(b1));
50
1.10k
      s.push_back(static_cast<char>(b2));
51
1.10k
      s.push_back(static_cast<char>(b3));
52
1.10k
      }
53
553
   else
54
553
      throw Decoding_Error("Invalid Unicode character");
55
56
123k
   }
57
58
}
59
60
std::string ucs2_to_utf8(const uint8_t ucs2[], size_t len)
61
5.53k
   {
62
5.53k
   if(len % 2 != 0)
63
232
      throw Decoding_Error("Invalid length for UCS-2 string");
64
65
5.30k
   const size_t chars = len / 2;
66
67
5.30k
   std::string s;
68
34.2k
   for(size_t i = 0; i != chars; ++i)
69
28.9k
      {
70
28.9k
      const uint32_t c = load_be<uint16_t>(ucs2, i);
71
28.9k
      append_utf8_for(s, c);
72
28.9k
      }
73
74
5.30k
   return s;
75
5.30k
   }
76
77
std::string ucs4_to_utf8(const uint8_t ucs4[], size_t len)
78
2.73k
   {
79
2.73k
   if(len % 4 != 0)
80
233
      throw Decoding_Error("Invalid length for UCS-4 string");
81
82
2.50k
   const size_t chars = len / 4;
83
84
2.50k
   std::string s;
85
4.77k
   for(size_t i = 0; i != chars; ++i)
86
2.27k
      {
87
2.27k
      const uint32_t c = load_be<uint32_t>(ucs4, i);
88
2.27k
      append_utf8_for(s, c);
89
2.27k
      }
90
91
2.50k
   return s;
92
2.50k
   }
93
94
/*
95
* Convert from ISO 8859-1 to UTF-8
96
*/
97
std::string latin1_to_utf8(const uint8_t chars[], size_t len)
98
5.15k
   {
99
5.15k
   std::string s;
100
97.8k
   for(size_t i = 0; i != len; ++i)
101
92.7k
      {
102
92.7k
      const uint32_t c = static_cast<uint8_t>(chars[i]);
103
92.7k
      append_utf8_for(s, c);
104
92.7k
      }
105
5.15k
   return s;
106
5.15k
   }
107
108
}
109