Coverage Report

Created: 2018-09-25 14:53

/src/mozilla-central/intl/uconv/nsScriptableUConv.cpp
Line
Count
Source (jump to first uncovered line)
1
2
/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
3
/* This Source Code Form is subject to the terms of the Mozilla Public
4
 * License, v. 2.0. If a copy of the MPL was not distributed with this
5
 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
6
7
#include "nsString.h"
8
#include "nsIScriptableUConv.h"
9
#include "nsScriptableUConv.h"
10
#include "nsIStringStream.h"
11
#include "nsComponentManagerUtils.h"
12
13
using namespace mozilla;
14
15
/* Implementation file */
16
NS_IMPL_ISUPPORTS(nsScriptableUnicodeConverter, nsIScriptableUnicodeConverter)
17
18
nsScriptableUnicodeConverter::nsScriptableUnicodeConverter()
19
: mIsInternal(false)
20
0
{
21
0
}
22
23
nsScriptableUnicodeConverter::~nsScriptableUnicodeConverter()
24
0
{
25
0
}
26
27
NS_IMETHODIMP
28
nsScriptableUnicodeConverter::ConvertFromUnicode(const nsAString& aSrc,
29
                                                 nsACString& _retval)
30
0
{
31
0
  if (!mEncoder)
32
0
    return NS_ERROR_FAILURE;
33
0
34
0
  // We can compute the length without replacement, because the
35
0
  // the replacement is only one byte long and a mappable character
36
0
  // would always output something, i.e. at least one byte.
37
0
  // When encoding to ISO-2022-JP, unmappables shouldn't be able
38
0
  // to cause more escape sequences to be emitted than the mappable
39
0
  // worst case where every input character causes an escape into
40
0
  // a different state.
41
0
  CheckedInt<size_t> needed =
42
0
    mEncoder->MaxBufferLengthFromUTF16WithoutReplacement(aSrc.Length());
43
0
  if (!needed.isValid() || needed.value() > UINT32_MAX) {
44
0
    return NS_ERROR_OUT_OF_MEMORY;
45
0
  }
46
0
47
0
  if (!_retval.SetLength(needed.value(), fallible)) {
48
0
    return NS_ERROR_OUT_OF_MEMORY;
49
0
  }
50
0
51
0
  auto src = MakeSpan(aSrc);
52
0
  auto dst = AsWritableBytes(MakeSpan(_retval));
53
0
  size_t totalWritten = 0;
54
0
  for (;;) {
55
0
    uint32_t result;
56
0
    size_t read;
57
0
    size_t written;
58
0
    Tie(result, read, written) =
59
0
      mEncoder->EncodeFromUTF16WithoutReplacement(src, dst, false);
60
0
    if (result != kInputEmpty && result != kOutputFull) {
61
0
      MOZ_RELEASE_ASSERT(written < dst.Length(),
62
0
        "Unmappables with one-byte replacement should not exceed mappable worst case.");
63
0
      dst[written++] = '?';
64
0
    }
65
0
    totalWritten += written;
66
0
    if (result == kInputEmpty) {
67
0
      MOZ_ASSERT(totalWritten <= UINT32_MAX);
68
0
      if (!_retval.SetLength(totalWritten, fallible)) {
69
0
        return NS_ERROR_OUT_OF_MEMORY;
70
0
      }
71
0
      return NS_OK;
72
0
    }
73
0
    src = src.From(read);
74
0
    dst = dst.From(written);
75
0
  }
76
0
}
77
78
NS_IMETHODIMP
79
nsScriptableUnicodeConverter::Finish(nsACString& _retval)
80
0
{
81
0
  // The documentation for this method says it should be called after
82
0
  // ConvertFromUnicode(). However, our own tests called it after
83
0
  // convertFromByteArray(), i.e. when *decoding*.
84
0
  // Assuming that there exists extensions that similarly call
85
0
  // this at the wrong time, let's deal. In general, it is a design
86
0
  // error for this class to handle conversions in both directions.
87
0
  if (!mEncoder) {
88
0
    _retval.Truncate();
89
0
    mDecoder->Encoding()->NewDecoderWithBOMRemovalInto(*mDecoder);
90
0
    return NS_OK;
91
0
  }
92
0
  // If we are encoding to ISO-2022-JP, potentially
93
0
  // transition back to the ASCII state. The buffer
94
0
  // needs to be large enough for an additional NCR,
95
0
  // though.
96
0
  _retval.SetLength(13);
97
0
  Span<char16_t> src(nullptr);
98
0
  uint32_t result;
99
0
  size_t read;
100
0
  size_t written;
101
0
  bool hadErrors;
102
0
  Tie(result, read, written, hadErrors) =
103
0
    mEncoder->EncodeFromUTF16(src, _retval, true);
104
0
  Unused << hadErrors;
105
0
  MOZ_ASSERT(!read);
106
0
  MOZ_ASSERT(result == kInputEmpty);
107
0
  _retval.SetLength(written);
108
0
109
0
  mDecoder->Encoding()->NewDecoderWithBOMRemovalInto(*mDecoder);
110
0
  mEncoder->Encoding()->NewEncoderInto(*mEncoder);
111
0
  return NS_OK;
112
0
}
113
114
NS_IMETHODIMP
115
nsScriptableUnicodeConverter::ConvertToUnicode(const nsACString& aSrc, nsAString& _retval)
116
0
{
117
0
  if (!mDecoder)
118
0
    return NS_ERROR_FAILURE;
119
0
120
0
  uint32_t length = aSrc.Length();
121
0
122
0
  CheckedInt<size_t> needed = mDecoder->MaxUTF16BufferLength(length);
123
0
  if (!needed.isValid() || needed.value() > UINT32_MAX) {
124
0
    return NS_ERROR_OUT_OF_MEMORY;
125
0
  }
126
0
127
0
  if (!_retval.SetLength(needed.value(), fallible)) {
128
0
    return NS_ERROR_OUT_OF_MEMORY;
129
0
  }
130
0
131
0
  auto src = MakeSpan(reinterpret_cast<const uint8_t*>(aSrc.BeginReading()), length);
132
0
  uint32_t result;
133
0
  size_t read;
134
0
  size_t written;
135
0
  bool hadErrors;
136
0
  // The UTF-8 decoder used to throw regardless of the error behavior.
137
0
  // Simulating the old behavior for compatibility with legacy callers.
138
0
  // If callers want control over the behavior, they should switch to
139
0
  // TextDecoder.
140
0
  if (mDecoder->Encoding() == UTF_8_ENCODING) {
141
0
    Tie(result, read, written) =
142
0
      mDecoder->DecodeToUTF16WithoutReplacement(src, _retval, false);
143
0
    if (result != kInputEmpty) {
144
0
      return NS_ERROR_UDEC_ILLEGALINPUT;
145
0
    }
146
0
  } else {
147
0
    Tie(result, read, written, hadErrors) =
148
0
      mDecoder->DecodeToUTF16(src, _retval, false);
149
0
  }
150
0
  MOZ_ASSERT(result == kInputEmpty);
151
0
  MOZ_ASSERT(read == length);
152
0
  MOZ_ASSERT(written <= needed.value());
153
0
  Unused << hadErrors;
154
0
  if (!_retval.SetLength(written, fallible)) {
155
0
    return NS_ERROR_OUT_OF_MEMORY;
156
0
  }
157
0
  return NS_OK;
158
0
}
159
160
NS_IMETHODIMP
161
nsScriptableUnicodeConverter::ConvertToByteArray(const nsAString& aString,
162
                                                 uint32_t* aLen,
163
                                                 uint8_t** _aData)
164
0
{
165
0
  if (!mEncoder)
166
0
    return NS_ERROR_FAILURE;
167
0
168
0
  CheckedInt<size_t> needed =
169
0
    mEncoder->MaxBufferLengthFromUTF16WithoutReplacement(aString.Length());
170
0
  if (!needed.isValid() || needed.value() > UINT32_MAX) {
171
0
    return NS_ERROR_OUT_OF_MEMORY;
172
0
  }
173
0
174
0
  uint8_t* data = (uint8_t*)malloc(needed.value());
175
0
  if (!data) {
176
0
    return NS_ERROR_OUT_OF_MEMORY;
177
0
  }
178
0
  auto src = MakeSpan(aString);
179
0
  auto dst = MakeSpan(data, needed.value());
180
0
  size_t totalWritten = 0;
181
0
  for (;;) {
182
0
    uint32_t result;
183
0
    size_t read;
184
0
    size_t written;
185
0
    Tie(result, read, written) =
186
0
      mEncoder->EncodeFromUTF16WithoutReplacement(src, dst, true);
187
0
    if (result != kInputEmpty && result != kOutputFull) {
188
0
      // There's always room for one byte in the case of
189
0
      // an unmappable character, because otherwise
190
0
      // we'd have gotten `kOutputFull`.
191
0
      dst[written++] = '?';
192
0
    }
193
0
    totalWritten += written;
194
0
    if (result == kInputEmpty) {
195
0
      *_aData = data;
196
0
      MOZ_ASSERT(totalWritten <= UINT32_MAX);
197
0
      *aLen = totalWritten;
198
0
      return NS_OK;
199
0
    }
200
0
    src = src.From(read);
201
0
    dst = dst.From(written);
202
0
  }
203
0
}
204
205
NS_IMETHODIMP
206
nsScriptableUnicodeConverter::ConvertToInputStream(const nsAString& aString,
207
                                                   nsIInputStream** _retval)
208
0
{
209
0
  nsresult rv;
210
0
  nsCOMPtr<nsIStringInputStream> inputStream =
211
0
    do_CreateInstance("@mozilla.org/io/string-input-stream;1", &rv);
212
0
  if (NS_FAILED(rv))
213
0
    return rv;
214
0
215
0
  uint8_t* data;
216
0
  uint32_t dataLen;
217
0
  rv = ConvertToByteArray(aString, &dataLen, &data);
218
0
  if (NS_FAILED(rv))
219
0
    return rv;
220
0
221
0
  rv = inputStream->AdoptData(reinterpret_cast<char*>(data), dataLen);
222
0
  if (NS_FAILED(rv)) {
223
0
    free(data);
224
0
    return rv;
225
0
  }
226
0
227
0
  NS_ADDREF(*_retval = inputStream);
228
0
  return rv;
229
0
}
230
231
NS_IMETHODIMP
232
nsScriptableUnicodeConverter::GetCharset(nsACString& aCharset)
233
0
{
234
0
  if (!mDecoder) {
235
0
    aCharset.Truncate();
236
0
  } else {
237
0
    mDecoder->Encoding()->Name(aCharset);
238
0
  }
239
0
  return NS_OK;
240
0
}
241
242
NS_IMETHODIMP
243
nsScriptableUnicodeConverter::SetCharset(const nsACString& aCharset)
244
0
{
245
0
  return InitConverter(aCharset);
246
0
}
247
248
NS_IMETHODIMP
249
nsScriptableUnicodeConverter::GetIsInternal(bool *aIsInternal)
250
0
{
251
0
  *aIsInternal = mIsInternal;
252
0
  return NS_OK;
253
0
}
254
255
NS_IMETHODIMP
256
nsScriptableUnicodeConverter::SetIsInternal(const bool aIsInternal)
257
0
{
258
0
  mIsInternal = aIsInternal;
259
0
  return NS_OK;
260
0
}
261
262
nsresult
263
nsScriptableUnicodeConverter::InitConverter(const nsACString& aCharset)
264
0
{
265
0
  mEncoder = nullptr;
266
0
  mDecoder = nullptr;
267
0
268
0
  auto encoding = Encoding::ForLabelNoReplacement(aCharset);
269
0
  if (!encoding) {
270
0
    return NS_ERROR_UCONV_NOCONV;
271
0
  }
272
0
  if (!(encoding == UTF_16LE_ENCODING || encoding == UTF_16BE_ENCODING)) {
273
0
    mEncoder = encoding->NewEncoder();
274
0
  }
275
0
  mDecoder = encoding->NewDecoderWithBOMRemoval();
276
0
  return NS_OK;
277
0
}