/src/mozilla-central/intl/uconv/nsScriptableUConv.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | |
2 | | /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ |
3 | | /* This Source Code Form is subject to the terms of the Mozilla Public |
4 | | * License, v. 2.0. If a copy of the MPL was not distributed with this |
5 | | * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ |
6 | | |
7 | | #include "nsString.h" |
8 | | #include "nsIScriptableUConv.h" |
9 | | #include "nsScriptableUConv.h" |
10 | | #include "nsIStringStream.h" |
11 | | #include "nsComponentManagerUtils.h" |
12 | | |
13 | | using namespace mozilla; |
14 | | |
15 | | /* Implementation file */ |
16 | | NS_IMPL_ISUPPORTS(nsScriptableUnicodeConverter, nsIScriptableUnicodeConverter) |
17 | | |
18 | | nsScriptableUnicodeConverter::nsScriptableUnicodeConverter() |
19 | | : mIsInternal(false) |
20 | 0 | { |
21 | 0 | } |
22 | | |
23 | | nsScriptableUnicodeConverter::~nsScriptableUnicodeConverter() |
24 | 0 | { |
25 | 0 | } |
26 | | |
27 | | NS_IMETHODIMP |
28 | | nsScriptableUnicodeConverter::ConvertFromUnicode(const nsAString& aSrc, |
29 | | nsACString& _retval) |
30 | 0 | { |
31 | 0 | if (!mEncoder) |
32 | 0 | return NS_ERROR_FAILURE; |
33 | 0 | |
34 | 0 | // We can compute the length without replacement, because the |
35 | 0 | // the replacement is only one byte long and a mappable character |
36 | 0 | // would always output something, i.e. at least one byte. |
37 | 0 | // When encoding to ISO-2022-JP, unmappables shouldn't be able |
38 | 0 | // to cause more escape sequences to be emitted than the mappable |
39 | 0 | // worst case where every input character causes an escape into |
40 | 0 | // a different state. |
41 | 0 | CheckedInt<size_t> needed = |
42 | 0 | mEncoder->MaxBufferLengthFromUTF16WithoutReplacement(aSrc.Length()); |
43 | 0 | if (!needed.isValid() || needed.value() > UINT32_MAX) { |
44 | 0 | return NS_ERROR_OUT_OF_MEMORY; |
45 | 0 | } |
46 | 0 | |
47 | 0 | if (!_retval.SetLength(needed.value(), fallible)) { |
48 | 0 | return NS_ERROR_OUT_OF_MEMORY; |
49 | 0 | } |
50 | 0 | |
51 | 0 | auto src = MakeSpan(aSrc); |
52 | 0 | auto dst = AsWritableBytes(MakeSpan(_retval)); |
53 | 0 | size_t totalWritten = 0; |
54 | 0 | for (;;) { |
55 | 0 | uint32_t result; |
56 | 0 | size_t read; |
57 | 0 | size_t written; |
58 | 0 | Tie(result, read, written) = |
59 | 0 | mEncoder->EncodeFromUTF16WithoutReplacement(src, dst, false); |
60 | 0 | if (result != kInputEmpty && result != kOutputFull) { |
61 | 0 | MOZ_RELEASE_ASSERT(written < dst.Length(), |
62 | 0 | "Unmappables with one-byte replacement should not exceed mappable worst case."); |
63 | 0 | dst[written++] = '?'; |
64 | 0 | } |
65 | 0 | totalWritten += written; |
66 | 0 | if (result == kInputEmpty) { |
67 | 0 | MOZ_ASSERT(totalWritten <= UINT32_MAX); |
68 | 0 | if (!_retval.SetLength(totalWritten, fallible)) { |
69 | 0 | return NS_ERROR_OUT_OF_MEMORY; |
70 | 0 | } |
71 | 0 | return NS_OK; |
72 | 0 | } |
73 | 0 | src = src.From(read); |
74 | 0 | dst = dst.From(written); |
75 | 0 | } |
76 | 0 | } |
77 | | |
78 | | NS_IMETHODIMP |
79 | | nsScriptableUnicodeConverter::Finish(nsACString& _retval) |
80 | 0 | { |
81 | 0 | // The documentation for this method says it should be called after |
82 | 0 | // ConvertFromUnicode(). However, our own tests called it after |
83 | 0 | // convertFromByteArray(), i.e. when *decoding*. |
84 | 0 | // Assuming that there exists extensions that similarly call |
85 | 0 | // this at the wrong time, let's deal. In general, it is a design |
86 | 0 | // error for this class to handle conversions in both directions. |
87 | 0 | if (!mEncoder) { |
88 | 0 | _retval.Truncate(); |
89 | 0 | mDecoder->Encoding()->NewDecoderWithBOMRemovalInto(*mDecoder); |
90 | 0 | return NS_OK; |
91 | 0 | } |
92 | 0 | // If we are encoding to ISO-2022-JP, potentially |
93 | 0 | // transition back to the ASCII state. The buffer |
94 | 0 | // needs to be large enough for an additional NCR, |
95 | 0 | // though. |
96 | 0 | _retval.SetLength(13); |
97 | 0 | Span<char16_t> src(nullptr); |
98 | 0 | uint32_t result; |
99 | 0 | size_t read; |
100 | 0 | size_t written; |
101 | 0 | bool hadErrors; |
102 | 0 | Tie(result, read, written, hadErrors) = |
103 | 0 | mEncoder->EncodeFromUTF16(src, _retval, true); |
104 | 0 | Unused << hadErrors; |
105 | 0 | MOZ_ASSERT(!read); |
106 | 0 | MOZ_ASSERT(result == kInputEmpty); |
107 | 0 | _retval.SetLength(written); |
108 | 0 |
|
109 | 0 | mDecoder->Encoding()->NewDecoderWithBOMRemovalInto(*mDecoder); |
110 | 0 | mEncoder->Encoding()->NewEncoderInto(*mEncoder); |
111 | 0 | return NS_OK; |
112 | 0 | } |
113 | | |
114 | | NS_IMETHODIMP |
115 | | nsScriptableUnicodeConverter::ConvertToUnicode(const nsACString& aSrc, nsAString& _retval) |
116 | 0 | { |
117 | 0 | if (!mDecoder) |
118 | 0 | return NS_ERROR_FAILURE; |
119 | 0 | |
120 | 0 | uint32_t length = aSrc.Length(); |
121 | 0 |
|
122 | 0 | CheckedInt<size_t> needed = mDecoder->MaxUTF16BufferLength(length); |
123 | 0 | if (!needed.isValid() || needed.value() > UINT32_MAX) { |
124 | 0 | return NS_ERROR_OUT_OF_MEMORY; |
125 | 0 | } |
126 | 0 | |
127 | 0 | if (!_retval.SetLength(needed.value(), fallible)) { |
128 | 0 | return NS_ERROR_OUT_OF_MEMORY; |
129 | 0 | } |
130 | 0 | |
131 | 0 | auto src = MakeSpan(reinterpret_cast<const uint8_t*>(aSrc.BeginReading()), length); |
132 | 0 | uint32_t result; |
133 | 0 | size_t read; |
134 | 0 | size_t written; |
135 | 0 | bool hadErrors; |
136 | 0 | // The UTF-8 decoder used to throw regardless of the error behavior. |
137 | 0 | // Simulating the old behavior for compatibility with legacy callers. |
138 | 0 | // If callers want control over the behavior, they should switch to |
139 | 0 | // TextDecoder. |
140 | 0 | if (mDecoder->Encoding() == UTF_8_ENCODING) { |
141 | 0 | Tie(result, read, written) = |
142 | 0 | mDecoder->DecodeToUTF16WithoutReplacement(src, _retval, false); |
143 | 0 | if (result != kInputEmpty) { |
144 | 0 | return NS_ERROR_UDEC_ILLEGALINPUT; |
145 | 0 | } |
146 | 0 | } else { |
147 | 0 | Tie(result, read, written, hadErrors) = |
148 | 0 | mDecoder->DecodeToUTF16(src, _retval, false); |
149 | 0 | } |
150 | 0 | MOZ_ASSERT(result == kInputEmpty); |
151 | 0 | MOZ_ASSERT(read == length); |
152 | 0 | MOZ_ASSERT(written <= needed.value()); |
153 | 0 | Unused << hadErrors; |
154 | 0 | if (!_retval.SetLength(written, fallible)) { |
155 | 0 | return NS_ERROR_OUT_OF_MEMORY; |
156 | 0 | } |
157 | 0 | return NS_OK; |
158 | 0 | } |
159 | | |
160 | | NS_IMETHODIMP |
161 | | nsScriptableUnicodeConverter::ConvertToByteArray(const nsAString& aString, |
162 | | uint32_t* aLen, |
163 | | uint8_t** _aData) |
164 | 0 | { |
165 | 0 | if (!mEncoder) |
166 | 0 | return NS_ERROR_FAILURE; |
167 | 0 | |
168 | 0 | CheckedInt<size_t> needed = |
169 | 0 | mEncoder->MaxBufferLengthFromUTF16WithoutReplacement(aString.Length()); |
170 | 0 | if (!needed.isValid() || needed.value() > UINT32_MAX) { |
171 | 0 | return NS_ERROR_OUT_OF_MEMORY; |
172 | 0 | } |
173 | 0 | |
174 | 0 | uint8_t* data = (uint8_t*)malloc(needed.value()); |
175 | 0 | if (!data) { |
176 | 0 | return NS_ERROR_OUT_OF_MEMORY; |
177 | 0 | } |
178 | 0 | auto src = MakeSpan(aString); |
179 | 0 | auto dst = MakeSpan(data, needed.value()); |
180 | 0 | size_t totalWritten = 0; |
181 | 0 | for (;;) { |
182 | 0 | uint32_t result; |
183 | 0 | size_t read; |
184 | 0 | size_t written; |
185 | 0 | Tie(result, read, written) = |
186 | 0 | mEncoder->EncodeFromUTF16WithoutReplacement(src, dst, true); |
187 | 0 | if (result != kInputEmpty && result != kOutputFull) { |
188 | 0 | // There's always room for one byte in the case of |
189 | 0 | // an unmappable character, because otherwise |
190 | 0 | // we'd have gotten `kOutputFull`. |
191 | 0 | dst[written++] = '?'; |
192 | 0 | } |
193 | 0 | totalWritten += written; |
194 | 0 | if (result == kInputEmpty) { |
195 | 0 | *_aData = data; |
196 | 0 | MOZ_ASSERT(totalWritten <= UINT32_MAX); |
197 | 0 | *aLen = totalWritten; |
198 | 0 | return NS_OK; |
199 | 0 | } |
200 | 0 | src = src.From(read); |
201 | 0 | dst = dst.From(written); |
202 | 0 | } |
203 | 0 | } |
204 | | |
205 | | NS_IMETHODIMP |
206 | | nsScriptableUnicodeConverter::ConvertToInputStream(const nsAString& aString, |
207 | | nsIInputStream** _retval) |
208 | 0 | { |
209 | 0 | nsresult rv; |
210 | 0 | nsCOMPtr<nsIStringInputStream> inputStream = |
211 | 0 | do_CreateInstance("@mozilla.org/io/string-input-stream;1", &rv); |
212 | 0 | if (NS_FAILED(rv)) |
213 | 0 | return rv; |
214 | 0 | |
215 | 0 | uint8_t* data; |
216 | 0 | uint32_t dataLen; |
217 | 0 | rv = ConvertToByteArray(aString, &dataLen, &data); |
218 | 0 | if (NS_FAILED(rv)) |
219 | 0 | return rv; |
220 | 0 | |
221 | 0 | rv = inputStream->AdoptData(reinterpret_cast<char*>(data), dataLen); |
222 | 0 | if (NS_FAILED(rv)) { |
223 | 0 | free(data); |
224 | 0 | return rv; |
225 | 0 | } |
226 | 0 | |
227 | 0 | NS_ADDREF(*_retval = inputStream); |
228 | 0 | return rv; |
229 | 0 | } |
230 | | |
231 | | NS_IMETHODIMP |
232 | | nsScriptableUnicodeConverter::GetCharset(nsACString& aCharset) |
233 | 0 | { |
234 | 0 | if (!mDecoder) { |
235 | 0 | aCharset.Truncate(); |
236 | 0 | } else { |
237 | 0 | mDecoder->Encoding()->Name(aCharset); |
238 | 0 | } |
239 | 0 | return NS_OK; |
240 | 0 | } |
241 | | |
242 | | NS_IMETHODIMP |
243 | | nsScriptableUnicodeConverter::SetCharset(const nsACString& aCharset) |
244 | 0 | { |
245 | 0 | return InitConverter(aCharset); |
246 | 0 | } |
247 | | |
248 | | NS_IMETHODIMP |
249 | | nsScriptableUnicodeConverter::GetIsInternal(bool *aIsInternal) |
250 | 0 | { |
251 | 0 | *aIsInternal = mIsInternal; |
252 | 0 | return NS_OK; |
253 | 0 | } |
254 | | |
255 | | NS_IMETHODIMP |
256 | | nsScriptableUnicodeConverter::SetIsInternal(const bool aIsInternal) |
257 | 0 | { |
258 | 0 | mIsInternal = aIsInternal; |
259 | 0 | return NS_OK; |
260 | 0 | } |
261 | | |
262 | | nsresult |
263 | | nsScriptableUnicodeConverter::InitConverter(const nsACString& aCharset) |
264 | 0 | { |
265 | 0 | mEncoder = nullptr; |
266 | 0 | mDecoder = nullptr; |
267 | 0 |
|
268 | 0 | auto encoding = Encoding::ForLabelNoReplacement(aCharset); |
269 | 0 | if (!encoding) { |
270 | 0 | return NS_ERROR_UCONV_NOCONV; |
271 | 0 | } |
272 | 0 | if (!(encoding == UTF_16LE_ENCODING || encoding == UTF_16BE_ENCODING)) { |
273 | 0 | mEncoder = encoding->NewEncoder(); |
274 | 0 | } |
275 | 0 | mDecoder = encoding->NewDecoderWithBOMRemoval(); |
276 | 0 | return NS_OK; |
277 | 0 | } |