/src/mozilla-central/intl/uconv/nsConverterInputStream.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | /* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ |
2 | | /* This Source Code Form is subject to the terms of the Mozilla Public |
3 | | * License, v. 2.0. If a copy of the MPL was not distributed with this |
4 | | * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ |
5 | | |
6 | | #include "nsConverterInputStream.h" |
7 | | #include "nsIInputStream.h" |
8 | | #include "nsReadLine.h" |
9 | | #include "nsStreamUtils.h" |
10 | | #include <algorithm> |
11 | | #include "mozilla/Unused.h" |
12 | | |
13 | | using namespace mozilla; |
14 | | |
15 | 0 | #define CONVERTER_BUFFER_SIZE 8192 |
16 | | |
17 | | NS_IMPL_ISUPPORTS(nsConverterInputStream, nsIConverterInputStream, |
18 | | nsIUnicharInputStream, nsIUnicharLineInputStream) |
19 | | |
20 | | |
21 | | NS_IMETHODIMP |
22 | | nsConverterInputStream::Init(nsIInputStream* aStream, |
23 | | const char *aCharset, |
24 | | int32_t aBufferSize, |
25 | | char16_t aReplacementChar) |
26 | 0 | { |
27 | 0 | nsAutoCString label; |
28 | 0 | if (!aCharset) { |
29 | 0 | label.AssignLiteral("UTF-8"); |
30 | 0 | } else { |
31 | 0 | label = aCharset; |
32 | 0 | } |
33 | 0 |
|
34 | 0 | auto encoding = Encoding::ForLabelNoReplacement(label); |
35 | 0 | if (!encoding) { |
36 | 0 | return NS_ERROR_UCONV_NOCONV; |
37 | 0 | } |
38 | 0 | // Previously, the implementation auto-switched only |
39 | 0 | // between the two UTF-16 variants and only when |
40 | 0 | // initialized with an endianness-unspecific label. |
41 | 0 | mConverter = encoding->NewDecoder(); |
42 | 0 |
|
43 | 0 | size_t outputBufferSize; |
44 | 0 | if (aBufferSize <= 0) { |
45 | 0 | aBufferSize = CONVERTER_BUFFER_SIZE; |
46 | 0 | outputBufferSize = CONVERTER_BUFFER_SIZE; |
47 | 0 | } else { |
48 | 0 | // NetUtil.jsm assumes that if buffer size equals |
49 | 0 | // the input size, the whole stream will be processed |
50 | 0 | // as one readString. This is not true with encoding_rs, |
51 | 0 | // because encoding_rs might want to see space for a |
52 | 0 | // surrogate pair, so let's compute a larger output |
53 | 0 | // buffer length. |
54 | 0 | CheckedInt<size_t> needed = mConverter->MaxUTF16BufferLength(aBufferSize); |
55 | 0 | if (!needed.isValid()) { |
56 | 0 | return NS_ERROR_OUT_OF_MEMORY; |
57 | 0 | } |
58 | 0 | outputBufferSize = needed.value(); |
59 | 0 | } |
60 | 0 |
|
61 | 0 | // set up our buffers. |
62 | 0 | if (!mByteData.SetCapacity(aBufferSize, mozilla::fallible) || |
63 | 0 | !mUnicharData.SetLength(outputBufferSize, mozilla::fallible)) { |
64 | 0 | return NS_ERROR_OUT_OF_MEMORY; |
65 | 0 | } |
66 | 0 | |
67 | 0 | mInput = aStream; |
68 | 0 | mErrorsAreFatal = !aReplacementChar; |
69 | 0 | return NS_OK; |
70 | 0 | } |
71 | | |
72 | | NS_IMETHODIMP |
73 | | nsConverterInputStream::Close() |
74 | 0 | { |
75 | 0 | nsresult rv = mInput ? mInput->Close() : NS_OK; |
76 | 0 | mLineBuffer = nullptr; |
77 | 0 | mInput = nullptr; |
78 | 0 | mConverter = nullptr; |
79 | 0 | mByteData.Clear(); |
80 | 0 | mUnicharData.Clear(); |
81 | 0 | return rv; |
82 | 0 | } |
83 | | |
84 | | NS_IMETHODIMP |
85 | | nsConverterInputStream::Read(char16_t* aBuf, |
86 | | uint32_t aCount, |
87 | | uint32_t *aReadCount) |
88 | 0 | { |
89 | 0 | NS_ASSERTION(mUnicharDataLength >= mUnicharDataOffset, "unsigned madness"); |
90 | 0 | uint32_t readCount = mUnicharDataLength - mUnicharDataOffset; |
91 | 0 | if (0 == readCount) { |
92 | 0 | // Fill the unichar buffer |
93 | 0 | readCount = Fill(&mLastErrorCode); |
94 | 0 | if (readCount == 0) { |
95 | 0 | *aReadCount = 0; |
96 | 0 | return mLastErrorCode; |
97 | 0 | } |
98 | 0 | } |
99 | 0 | if (readCount > aCount) { |
100 | 0 | readCount = aCount; |
101 | 0 | } |
102 | 0 | memcpy(aBuf, mUnicharData.Elements() + mUnicharDataOffset, |
103 | 0 | readCount * sizeof(char16_t)); |
104 | 0 | mUnicharDataOffset += readCount; |
105 | 0 | *aReadCount = readCount; |
106 | 0 | return NS_OK; |
107 | 0 | } |
108 | | |
109 | | NS_IMETHODIMP |
110 | | nsConverterInputStream::ReadSegments(nsWriteUnicharSegmentFun aWriter, |
111 | | void* aClosure, |
112 | | uint32_t aCount, uint32_t *aReadCount) |
113 | 0 | { |
114 | 0 | NS_ASSERTION(mUnicharDataLength >= mUnicharDataOffset, "unsigned madness"); |
115 | 0 | uint32_t bytesToWrite = mUnicharDataLength - mUnicharDataOffset; |
116 | 0 | nsresult rv; |
117 | 0 | if (0 == bytesToWrite) { |
118 | 0 | // Fill the unichar buffer |
119 | 0 | bytesToWrite = Fill(&rv); |
120 | 0 | if (bytesToWrite <= 0) { |
121 | 0 | *aReadCount = 0; |
122 | 0 | return rv; |
123 | 0 | } |
124 | 0 | if (NS_FAILED(rv)) { |
125 | 0 | return rv; |
126 | 0 | } |
127 | 0 | } |
128 | 0 | |
129 | 0 | if (bytesToWrite > aCount) |
130 | 0 | bytesToWrite = aCount; |
131 | 0 |
|
132 | 0 | uint32_t bytesWritten; |
133 | 0 | uint32_t totalBytesWritten = 0; |
134 | 0 |
|
135 | 0 | while (bytesToWrite) { |
136 | 0 | rv = aWriter(this, aClosure, |
137 | 0 | mUnicharData.Elements() + mUnicharDataOffset, |
138 | 0 | totalBytesWritten, bytesToWrite, &bytesWritten); |
139 | 0 | if (NS_FAILED(rv)) { |
140 | 0 | // don't propagate errors to the caller |
141 | 0 | break; |
142 | 0 | } |
143 | 0 | |
144 | 0 | bytesToWrite -= bytesWritten; |
145 | 0 | totalBytesWritten += bytesWritten; |
146 | 0 | mUnicharDataOffset += bytesWritten; |
147 | 0 | } |
148 | 0 |
|
149 | 0 | *aReadCount = totalBytesWritten; |
150 | 0 |
|
151 | 0 | return NS_OK; |
152 | 0 | } |
153 | | |
154 | | NS_IMETHODIMP |
155 | | nsConverterInputStream::ReadString(uint32_t aCount, nsAString& aString, |
156 | | uint32_t* aReadCount) |
157 | 0 | { |
158 | 0 | NS_ASSERTION(mUnicharDataLength >= mUnicharDataOffset, "unsigned madness"); |
159 | 0 | uint32_t readCount = mUnicharDataLength - mUnicharDataOffset; |
160 | 0 | if (0 == readCount) { |
161 | 0 | // Fill the unichar buffer |
162 | 0 | readCount = Fill(&mLastErrorCode); |
163 | 0 | if (readCount == 0) { |
164 | 0 | *aReadCount = 0; |
165 | 0 | return mLastErrorCode; |
166 | 0 | } |
167 | 0 | } |
168 | 0 | if (readCount > aCount) { |
169 | 0 | readCount = aCount; |
170 | 0 | } |
171 | 0 | const char16_t* buf = mUnicharData.Elements() + mUnicharDataOffset; |
172 | 0 | aString.Assign(buf, readCount); |
173 | 0 | mUnicharDataOffset += readCount; |
174 | 0 | *aReadCount = readCount; |
175 | 0 | return NS_OK; |
176 | 0 | } |
177 | | |
178 | | uint32_t |
179 | | nsConverterInputStream::Fill(nsresult * aErrorCode) |
180 | 0 | { |
181 | 0 | if (nullptr == mInput) { |
182 | 0 | // We already closed the stream! |
183 | 0 | *aErrorCode = NS_BASE_STREAM_CLOSED; |
184 | 0 | return 0; |
185 | 0 | } |
186 | 0 | |
187 | 0 | if (NS_FAILED(mLastErrorCode)) { |
188 | 0 | // We failed to completely convert last time, and error-recovery |
189 | 0 | // is disabled. We will fare no better this time, so... |
190 | 0 | *aErrorCode = mLastErrorCode; |
191 | 0 | return 0; |
192 | 0 | } |
193 | 0 | |
194 | 0 | // We assume a many to one conversion and are using equal sizes for |
195 | 0 | // the two buffers. However if an error happens at the very start |
196 | 0 | // of a byte buffer we may end up in a situation where n bytes lead |
197 | 0 | // to n+1 unicode chars. Thus we need to keep track of the leftover |
198 | 0 | // bytes as we convert. |
199 | 0 | |
200 | 0 | uint32_t nb; |
201 | 0 | *aErrorCode = NS_FillArray(mByteData, mInput, mLeftOverBytes, &nb); |
202 | 0 | if (nb == 0 && mLeftOverBytes == 0) { |
203 | 0 | // No more data |
204 | 0 | *aErrorCode = NS_OK; |
205 | 0 | return 0; |
206 | 0 | } |
207 | 0 | |
208 | 0 | NS_ASSERTION(uint32_t(nb) + mLeftOverBytes == mByteData.Length(), |
209 | 0 | "mByteData is lying to us somewhere"); |
210 | 0 |
|
211 | 0 | // Now convert as much of the byte buffer to unicode as possible |
212 | 0 | auto src = AsBytes(MakeSpan(mByteData)); |
213 | 0 | auto dst = MakeSpan(mUnicharData); |
214 | 0 | // mUnicharData.Length() is the buffer length, not the fill status. |
215 | 0 | // mUnicharDataLength reflects the current fill status. |
216 | 0 | mUnicharDataLength = 0; |
217 | 0 | // Whenever we convert, mUnicharData is logically empty. |
218 | 0 | mUnicharDataOffset = 0; |
219 | 0 | // Truncation from size_t to uint32_t below is OK, because the sizes |
220 | 0 | // are bounded by the lengths of mByteData and mUnicharData. |
221 | 0 | uint32_t result; |
222 | 0 | size_t read; |
223 | 0 | size_t written; |
224 | 0 | bool hadErrors; |
225 | 0 | // The design of this class is fundamentally bogus in that trailing |
226 | 0 | // errors are ignored. Always passing false as the last argument to |
227 | 0 | // Decode* calls below. |
228 | 0 | if (mErrorsAreFatal) { |
229 | 0 | Tie(result, read, written) = |
230 | 0 | mConverter->DecodeToUTF16WithoutReplacement(src, dst, false); |
231 | 0 | } else { |
232 | 0 | Tie(result, read, written, hadErrors) = |
233 | 0 | mConverter->DecodeToUTF16(src, dst, false); |
234 | 0 | } |
235 | 0 | Unused << hadErrors; |
236 | 0 | mLeftOverBytes = mByteData.Length() - read; |
237 | 0 | mUnicharDataLength = written; |
238 | 0 | if (result == kInputEmpty || result == kOutputFull) { |
239 | 0 | *aErrorCode = NS_OK; |
240 | 0 | } else { |
241 | 0 | MOZ_ASSERT(mErrorsAreFatal, "How come DecodeToUTF16() reported error?"); |
242 | 0 | *aErrorCode = NS_ERROR_UDEC_ILLEGALINPUT; |
243 | 0 | } |
244 | 0 | return mUnicharDataLength; |
245 | 0 | } |
246 | | |
247 | | NS_IMETHODIMP |
248 | | nsConverterInputStream::ReadLine(nsAString& aLine, bool* aResult) |
249 | 0 | { |
250 | 0 | if (!mLineBuffer) { |
251 | 0 | mLineBuffer = new nsLineBuffer<char16_t>; |
252 | 0 | } |
253 | 0 | return NS_ReadLine(this, mLineBuffer.get(), aLine, aResult); |
254 | 0 | } |