Coverage Report

Created: 2026-02-09 06:05

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/CMake/Source/cm_codecvt.cxx
Line
Count
Source
1
/* Distributed under the OSI-approved BSD 3-Clause License.  See accompanying
2
   file LICENSE.rst or https://cmake.org/licensing for details.  */
3
#include "cm_codecvt.hxx"
4
5
#if defined(_WIN32)
6
#  include <cassert>
7
#  include <cstring>
8
9
#  include <windows.h>
10
#  undef max
11
#  include "cmsys/Encoding.hxx"
12
13
#  include "cm_utf8.h"
14
#endif
15
16
#include "cm_codecvt_Encoding.hxx"
17
18
codecvt::codecvt(codecvt_Encoding e)
19
#if defined(_WIN32)
20
  : m_codepage(0)
21
#endif
22
0
{
23
0
  switch (e) {
24
0
    case codecvt_Encoding::ConsoleOutput:
25
#if defined(_WIN32)
26
      m_noconv = false;
27
      m_codepage = GetConsoleOutputCP();
28
      break;
29
#endif
30
0
    case codecvt_Encoding::ANSI:
31
#if defined(_WIN32)
32
      m_noconv = false;
33
      m_codepage = CP_ACP;
34
      break;
35
#endif
36
    // We don't know which ANSI encoding to use for other platforms than
37
    // Windows so we don't do any conversion there
38
0
    case codecvt_Encoding::UTF8:
39
0
    case codecvt_Encoding::UTF8_WITH_BOM:
40
    // Assume internal encoding is UTF-8
41
0
    case codecvt_Encoding::None:
42
    // No encoding
43
0
    default:
44
0
      this->m_noconv = true;
45
0
  }
46
0
}
47
48
0
codecvt::~codecvt() = default;
49
50
bool codecvt::do_always_noconv() const noexcept
51
0
{
52
0
  return this->m_noconv;
53
0
}
54
55
std::codecvt_base::result codecvt::do_out(mbstate_t& state, char const* from,
56
                                          char const* from_end,
57
                                          char const*& from_next, char* to,
58
                                          char* to_end, char*& to_next) const
59
0
{
60
0
  from_next = from;
61
0
  to_next = to;
62
0
  if (this->m_noconv) {
63
0
    return std::codecvt_base::noconv;
64
0
  }
65
#if defined(_WIN32)
66
  // Use a const view of the state because we should not modify it until we
67
  // have fully processed and consume a byte (with sufficient space in the
68
  // output buffer).  We call helpers to re-cast and modify the state
69
  State const& lstate = reinterpret_cast<State&>(state);
70
71
  while (from_next != from_end) {
72
    // Count leading ones in the bits of the next byte.
73
    unsigned char const ones =
74
      cm_utf8_ones[static_cast<unsigned char>(*from_next)];
75
76
    if (ones != 1 && lstate.buffered != 0) {
77
      // We have a buffered partial codepoint that we never completed.
78
      return std::codecvt_base::error;
79
    } else if (ones == 1 && lstate.buffered == 0) {
80
      // This is a continuation of a codepoint that never started.
81
      return std::codecvt_base::error;
82
    }
83
84
    // Compute the number of bytes in the current codepoint.
85
    int need = 0;
86
    switch (ones) {
87
      case 0: // 0xxx xxxx: new codepoint of size 1
88
        need = 1;
89
        break;
90
      case 1: // 10xx xxxx: continues a codepoint
91
        assert(lstate.size != 0);
92
        need = lstate.size;
93
        break;
94
      case 2: // 110x xxxx: new codepoint of size 2
95
        need = 2;
96
        break;
97
      case 3: // 1110 xxxx: new codepoint of size 3
98
        need = 3;
99
        break;
100
      case 4: // 1111 0xxx: new codepoint of size 4
101
        need = 4;
102
        break;
103
      default: // invalid byte
104
        return std::codecvt_base::error;
105
    }
106
    assert(need > 0);
107
108
    if (lstate.buffered + 1 == need) {
109
      // This byte completes a codepoint.
110
      std::codecvt_base::result decode_result =
111
        this->Decode(state, need, from_next, to_next, to_end);
112
      if (decode_result != std::codecvt_base::ok) {
113
        return decode_result;
114
      }
115
    } else {
116
      // This byte does not complete a codepoint.
117
      this->BufferPartial(state, need, from_next);
118
    }
119
  }
120
121
  return std::codecvt_base::ok;
122
#else
123
0
  static_cast<void>(state);
124
0
  static_cast<void>(from);
125
0
  static_cast<void>(from_end);
126
0
  static_cast<void>(from_next);
127
0
  static_cast<void>(to);
128
0
  static_cast<void>(to_end);
129
0
  static_cast<void>(to_next);
130
0
  return std::codecvt_base::noconv;
131
0
#endif
132
0
}
133
134
std::codecvt_base::result codecvt::do_unshift(mbstate_t& state, char* to,
135
                                              char* to_end,
136
                                              char*& to_next) const
137
0
{
138
0
  to_next = to;
139
0
  if (this->m_noconv) {
140
0
    return std::codecvt_base::noconv;
141
0
  }
142
#if defined(_WIN32)
143
  State& lstate = reinterpret_cast<State&>(state);
144
  if (lstate.buffered != 0) {
145
    return this->DecodePartial(state, to_next, to_end);
146
  }
147
  return std::codecvt_base::ok;
148
#else
149
0
  static_cast<void>(state);
150
0
  static_cast<void>(to_end);
151
0
  return std::codecvt_base::ok;
152
0
#endif
153
0
}
154
155
#if defined(_WIN32)
156
std::codecvt_base::result codecvt::Decode(mbstate_t& state, int size,
157
                                          char const*& from_next,
158
                                          char*& to_next, char* to_end) const
159
{
160
  State& lstate = reinterpret_cast<State&>(state);
161
162
  // Collect all the bytes for this codepoint.
163
  char buf[4];
164
  memcpy(buf, lstate.partial, lstate.buffered);
165
  buf[lstate.buffered] = *from_next;
166
167
  // Convert the encoding.
168
  wchar_t wbuf[2];
169
  int wlen =
170
    MultiByteToWideChar(CP_UTF8, MB_ERR_INVALID_CHARS, buf, size, wbuf, 2);
171
  if (wlen <= 0) {
172
    return std::codecvt_base::error;
173
  }
174
175
  int tlen = WideCharToMultiByte(m_codepage, 0, wbuf, wlen, to_next,
176
                                 to_end - to_next, nullptr, nullptr);
177
  if (tlen <= 0) {
178
    if (GetLastError() == ERROR_INSUFFICIENT_BUFFER) {
179
      return std::codecvt_base::partial;
180
    }
181
    return std::codecvt_base::error;
182
  }
183
184
  // Move past the now-consumed byte in the input buffer.
185
  ++from_next;
186
187
  // Move past the converted codepoint in the output buffer.
188
  to_next += tlen;
189
190
  // Re-initialize the state for the next codepoint to start.
191
  lstate = State();
192
193
  return std::codecvt_base::ok;
194
}
195
196
std::codecvt_base::result codecvt::DecodePartial(mbstate_t& state,
197
                                                 char*& to_next,
198
                                                 char* to_end) const
199
{
200
  State& lstate = reinterpret_cast<State&>(state);
201
202
  // Try converting the partial codepoint.
203
  wchar_t wbuf[2];
204
  int wlen = MultiByteToWideChar(CP_UTF8, MB_ERR_INVALID_CHARS, lstate.partial,
205
                                 lstate.buffered, wbuf, 2);
206
  if (wlen <= 0) {
207
    return std::codecvt_base::error;
208
  }
209
210
  int tlen = WideCharToMultiByte(m_codepage, 0, wbuf, wlen, to_next,
211
                                 to_end - to_next, nullptr, nullptr);
212
  if (tlen <= 0) {
213
    if (GetLastError() == ERROR_INSUFFICIENT_BUFFER) {
214
      return std::codecvt_base::partial;
215
    }
216
    return std::codecvt_base::error;
217
  }
218
219
  // Move past the converted codepoint in the output buffer.
220
  to_next += tlen;
221
222
  // Re-initialize the state for the next codepoint to start.
223
  lstate = State();
224
225
  return std::codecvt_base::ok;
226
}
227
228
void codecvt::BufferPartial(mbstate_t& state, int size,
229
                            char const*& from_next) const
230
{
231
  State& lstate = reinterpret_cast<State&>(state);
232
233
  // Save the byte in our buffer for later.
234
  lstate.partial[lstate.buffered++] = *from_next;
235
  lstate.size = size;
236
237
  // Move past the now-consumed byte in the input buffer.
238
  ++from_next;
239
}
240
#endif
241
242
int codecvt::do_max_length() const noexcept
243
0
{
244
0
  return 4;
245
0
}
246
247
int codecvt::do_encoding() const noexcept
248
0
{
249
0
  return 0;
250
0
}