/src/mozilla-central/intl/uconv/nsConverterInputStream.cpp

Source (jump to first uncovered line)
/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
/* This Source Code Form is subject to the terms of the Mozilla Public
 * License, v. 2.0. If a copy of the MPL was not distributed with this
 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */

#include "nsConverterInputStream.h"
#include "nsIInputStream.h"
#include "nsReadLine.h"
#include "nsStreamUtils.h"
#include <algorithm>
#include "mozilla/Unused.h"

using namespace mozilla;

#define CONVERTER_BUFFER_SIZE 8192

NS_IMPL_ISUPPORTS(nsConverterInputStream, nsIConverterInputStream,
                  nsIUnicharInputStream, nsIUnicharLineInputStream)


NS_IMETHODIMP
nsConverterInputStream::Init(nsIInputStream* aStream,
                             const char *aCharset,
                             int32_t aBufferSize,
                             char16_t aReplacementChar)
{
    nsAutoCString label;
    if (!aCharset) {
        label.AssignLiteral("UTF-8");
    } else {
        label = aCharset;
    }

    auto encoding = Encoding::ForLabelNoReplacement(label);
    if (!encoding) {
      return NS_ERROR_UCONV_NOCONV;
    }
    // Previously, the implementation auto-switched only
    // between the two UTF-16 variants and only when
    // initialized with an endianness-unspecific label.
    mConverter = encoding->NewDecoder();

    size_t outputBufferSize;
    if (aBufferSize <= 0) {
      aBufferSize = CONVERTER_BUFFER_SIZE;
      outputBufferSize = CONVERTER_BUFFER_SIZE;
    } else {
      // NetUtil.jsm assumes that if buffer size equals
      // the input size, the whole stream will be processed
      // as one readString. This is not true with encoding_rs,
      // because encoding_rs might want to see space for a
      // surrogate pair, so let's compute a larger output
      // buffer length.
      CheckedInt<size_t> needed = mConverter->MaxUTF16BufferLength(aBufferSize);
      if (!needed.isValid()) {
        return NS_ERROR_OUT_OF_MEMORY;
      }
      outputBufferSize = needed.value();
    }

    // set up our buffers.
    if (!mByteData.SetCapacity(aBufferSize, mozilla::fallible) ||
        !mUnicharData.SetLength(outputBufferSize, mozilla::fallible)) {
      return NS_ERROR_OUT_OF_MEMORY;
    }

    mInput = aStream;
    mErrorsAreFatal = !aReplacementChar;
    return NS_OK;
}

NS_IMETHODIMP
nsConverterInputStream::Close()
{
    nsresult rv = mInput ? mInput->Close() : NS_OK;
    mLineBuffer = nullptr;
    mInput = nullptr;
    mConverter = nullptr;
    mByteData.Clear();
    mUnicharData.Clear();
    return rv;
}

NS_IMETHODIMP
nsConverterInputStream::Read(char16_t* aBuf,
                             uint32_t aCount,
                             uint32_t *aReadCount)
{
  NS_ASSERTION(mUnicharDataLength >= mUnicharDataOffset, "unsigned madness");
  uint32_t readCount = mUnicharDataLength - mUnicharDataOffset;
  if (0 == readCount) {
    // Fill the unichar buffer
    readCount = Fill(&mLastErrorCode);
    if (readCount == 0) {
      *aReadCount = 0;
      return mLastErrorCode;
    }
  }
  if (readCount > aCount) {
    readCount = aCount;
  }
  memcpy(aBuf, mUnicharData.Elements() + mUnicharDataOffset,
         readCount * sizeof(char16_t));
  mUnicharDataOffset += readCount;
  *aReadCount = readCount;
  return NS_OK;
}

NS_IMETHODIMP
nsConverterInputStream::ReadSegments(nsWriteUnicharSegmentFun aWriter,
                                     void* aClosure,
                                     uint32_t aCount, uint32_t *aReadCount)
{
  NS_ASSERTION(mUnicharDataLength >= mUnicharDataOffset, "unsigned madness");
  uint32_t bytesToWrite = mUnicharDataLength - mUnicharDataOffset;
  nsresult rv;
  if (0 == bytesToWrite) {
    // Fill the unichar buffer
    bytesToWrite = Fill(&rv);
    if (bytesToWrite <= 0) {
      *aReadCount = 0;
      return rv;
    }
    if (NS_FAILED(rv)) {
      return rv;
    }
  }

  if (bytesToWrite > aCount)
    bytesToWrite = aCount;

  uint32_t bytesWritten;
  uint32_t totalBytesWritten = 0;

  while (bytesToWrite) {
    rv = aWriter(this, aClosure,
                 mUnicharData.Elements() + mUnicharDataOffset,
                 totalBytesWritten, bytesToWrite, &bytesWritten);
    if (NS_FAILED(rv)) {
      // don't propagate errors to the caller
      break;
    }

    bytesToWrite -= bytesWritten;
    totalBytesWritten += bytesWritten;
    mUnicharDataOffset += bytesWritten;
  }

  *aReadCount = totalBytesWritten;

  return NS_OK;
}

NS_IMETHODIMP
nsConverterInputStream::ReadString(uint32_t aCount, nsAString& aString,
                                   uint32_t* aReadCount)
{
  NS_ASSERTION(mUnicharDataLength >= mUnicharDataOffset, "unsigned madness");
  uint32_t readCount = mUnicharDataLength - mUnicharDataOffset;
  if (0 == readCount) {
    // Fill the unichar buffer
    readCount = Fill(&mLastErrorCode);
    if (readCount == 0) {
      *aReadCount = 0;
      return mLastErrorCode;
    }
  }
  if (readCount > aCount) {
    readCount = aCount;
  }
  const char16_t* buf = mUnicharData.Elements() + mUnicharDataOffset;
  aString.Assign(buf, readCount);
  mUnicharDataOffset += readCount;
  *aReadCount = readCount;
  return NS_OK;
}

uint32_t
nsConverterInputStream::Fill(nsresult * aErrorCode)
{
  if (nullptr == mInput) {
    // We already closed the stream!
    *aErrorCode = NS_BASE_STREAM_CLOSED;
    return 0;
  }

  if (NS_FAILED(mLastErrorCode)) {
    // We failed to completely convert last time, and error-recovery
    // is disabled.  We will fare no better this time, so...
    *aErrorCode = mLastErrorCode;
    return 0;
  }

  // We assume a many to one conversion and are using equal sizes for
  // the two buffers.  However if an error happens at the very start
  // of a byte buffer we may end up in a situation where n bytes lead
  // to n+1 unicode chars.  Thus we need to keep track of the leftover
  // bytes as we convert.

  uint32_t nb;
  *aErrorCode = NS_FillArray(mByteData, mInput, mLeftOverBytes, &nb);
  if (nb == 0 && mLeftOverBytes == 0) {
    // No more data
    *aErrorCode = NS_OK;
    return 0;
  }

  NS_ASSERTION(uint32_t(nb) + mLeftOverBytes == mByteData.Length(),
               "mByteData is lying to us somewhere");

  // Now convert as much of the byte buffer to unicode as possible
  auto src = AsBytes(MakeSpan(mByteData));
  auto dst = MakeSpan(mUnicharData);
  // mUnicharData.Length() is the buffer length, not the fill status.
  // mUnicharDataLength reflects the current fill status.
  mUnicharDataLength = 0;
  // Whenever we convert, mUnicharData is logically empty.
  mUnicharDataOffset = 0;
  // Truncation from size_t to uint32_t below is OK, because the sizes
  // are bounded by the lengths of mByteData and mUnicharData.
  uint32_t result;
  size_t read;
  size_t written;
  bool hadErrors;
  // The design of this class is fundamentally bogus in that trailing
  // errors are ignored. Always passing false as the last argument to
  // Decode* calls below.
  if (mErrorsAreFatal) {
    Tie(result, read, written) =
      mConverter->DecodeToUTF16WithoutReplacement(src, dst, false);
  } else {
    Tie(result, read, written, hadErrors) =
      mConverter->DecodeToUTF16(src, dst, false);
  }
  Unused << hadErrors;
  mLeftOverBytes = mByteData.Length() - read;
  mUnicharDataLength = written;
  if (result == kInputEmpty || result == kOutputFull) {
    *aErrorCode = NS_OK;
  } else {
    MOZ_ASSERT(mErrorsAreFatal, "How come DecodeToUTF16() reported error?");
    *aErrorCode = NS_ERROR_UDEC_ILLEGALINPUT;
  }
  return mUnicharDataLength;
}

NS_IMETHODIMP
nsConverterInputStream::ReadLine(nsAString& aLine, bool* aResult)
{
  if (!mLineBuffer) {
    mLineBuffer = new nsLineBuffer<char16_t>;
  }
  return NS_ReadLine(this, mLineBuffer.get(), aLine, aResult);
}

Coverage Report

Created: 2018-09-25 14:53

Line	Count	Source (jump to first uncovered line)
1		/* -- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -- */
2		/* This Source Code Form is subject to the terms of the Mozilla Public
3		* License, v. 2.0. If a copy of the MPL was not distributed with this
4		* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
5
6		#include "nsConverterInputStream.h"
7		#include "nsIInputStream.h"
8		#include "nsReadLine.h"
9		#include "nsStreamUtils.h"
10		#include <algorithm>
11		#include "mozilla/Unused.h"
12
13		using namespace mozilla;
14
15	0	#define CONVERTER_BUFFER_SIZE 8192
16
17		NS_IMPL_ISUPPORTS(nsConverterInputStream, nsIConverterInputStream,
18		nsIUnicharInputStream, nsIUnicharLineInputStream)
19
20
21		NS_IMETHODIMP
22		nsConverterInputStream::Init(nsIInputStream* aStream,
23		const char *aCharset,
24		int32_t aBufferSize,
25		char16_t aReplacementChar)
26	0	{
27	0	nsAutoCString label;
28	0	if (!aCharset) {
29	0	label.AssignLiteral("UTF-8");
30	0	} else {
31	0	label = aCharset;
32	0	}
33	0
34	0	auto encoding = Encoding::ForLabelNoReplacement(label);
35	0	if (!encoding) {
36	0	return NS_ERROR_UCONV_NOCONV;
37	0	}
38	0	// Previously, the implementation auto-switched only
39	0	// between the two UTF-16 variants and only when
40	0	// initialized with an endianness-unspecific label.
41	0	mConverter = encoding->NewDecoder();
42	0
43	0	size_t outputBufferSize;
44	0	if (aBufferSize <= 0) {
45	0	aBufferSize = CONVERTER_BUFFER_SIZE;
46	0	outputBufferSize = CONVERTER_BUFFER_SIZE;
47	0	} else {
48	0	// NetUtil.jsm assumes that if buffer size equals
49	0	// the input size, the whole stream will be processed
50	0	// as one readString. This is not true with encoding_rs,
51	0	// because encoding_rs might want to see space for a
52	0	// surrogate pair, so let's compute a larger output
53	0	// buffer length.
54	0	CheckedInt<size_t> needed = mConverter->MaxUTF16BufferLength(aBufferSize);
55	0	if (!needed.isValid()) {
56	0	return NS_ERROR_OUT_OF_MEMORY;
57	0	}
58	0	outputBufferSize = needed.value();
59	0	}
60	0
61	0	// set up our buffers.
62	0	if (!mByteData.SetCapacity(aBufferSize, mozilla::fallible) \|\|
63	0	!mUnicharData.SetLength(outputBufferSize, mozilla::fallible)) {
64	0	return NS_ERROR_OUT_OF_MEMORY;
65	0	}
66	0
67	0	mInput = aStream;
68	0	mErrorsAreFatal = !aReplacementChar;
69	0	return NS_OK;
70	0	}
71
72		NS_IMETHODIMP
73		nsConverterInputStream::Close()
74	0	{
75	0	nsresult rv = mInput ? mInput->Close() : NS_OK;
76	0	mLineBuffer = nullptr;
77	0	mInput = nullptr;
78	0	mConverter = nullptr;
79	0	mByteData.Clear();
80	0	mUnicharData.Clear();
81	0	return rv;
82	0	}
83
84		NS_IMETHODIMP
85		nsConverterInputStream::Read(char16_t* aBuf,
86		uint32_t aCount,
87		uint32_t *aReadCount)
88	0	{
89	0	NS_ASSERTION(mUnicharDataLength >= mUnicharDataOffset, "unsigned madness");
90	0	uint32_t readCount = mUnicharDataLength - mUnicharDataOffset;
91	0	if (0 == readCount) {
92	0	// Fill the unichar buffer
93	0	readCount = Fill(&mLastErrorCode);
94	0	if (readCount == 0) {
95	0	*aReadCount = 0;
96	0	return mLastErrorCode;
97	0	}
98	0	}
99	0	if (readCount > aCount) {
100	0	readCount = aCount;
101	0	}
102	0	memcpy(aBuf, mUnicharData.Elements() + mUnicharDataOffset,
103	0	readCount * sizeof(char16_t));
104	0	mUnicharDataOffset += readCount;
105	0	*aReadCount = readCount;
106	0	return NS_OK;
107	0	}
108
109		NS_IMETHODIMP
110		nsConverterInputStream::ReadSegments(nsWriteUnicharSegmentFun aWriter,
111		void* aClosure,
112		uint32_t aCount, uint32_t *aReadCount)
113	0	{
114	0	NS_ASSERTION(mUnicharDataLength >= mUnicharDataOffset, "unsigned madness");
115	0	uint32_t bytesToWrite = mUnicharDataLength - mUnicharDataOffset;
116	0	nsresult rv;
117	0	if (0 == bytesToWrite) {
118	0	// Fill the unichar buffer
119	0	bytesToWrite = Fill(&rv);
120	0	if (bytesToWrite <= 0) {
121	0	*aReadCount = 0;
122	0	return rv;
123	0	}
124	0	if (NS_FAILED(rv)) {
125	0	return rv;
126	0	}
127	0	}
128	0
129	0	if (bytesToWrite > aCount)
130	0	bytesToWrite = aCount;
131	0
132	0	uint32_t bytesWritten;
133	0	uint32_t totalBytesWritten = 0;
134	0
135	0	while (bytesToWrite) {
136	0	rv = aWriter(this, aClosure,
137	0	mUnicharData.Elements() + mUnicharDataOffset,
138	0	totalBytesWritten, bytesToWrite, &bytesWritten);
139	0	if (NS_FAILED(rv)) {
140	0	// don't propagate errors to the caller
141	0	break;
142	0	}
143	0
144	0	bytesToWrite -= bytesWritten;
145	0	totalBytesWritten += bytesWritten;
146	0	mUnicharDataOffset += bytesWritten;
147	0	}
148	0
149	0	*aReadCount = totalBytesWritten;
150	0
151	0	return NS_OK;
152	0	}
153
154		NS_IMETHODIMP
155		nsConverterInputStream::ReadString(uint32_t aCount, nsAString& aString,
156		uint32_t* aReadCount)
157	0	{
158	0	NS_ASSERTION(mUnicharDataLength >= mUnicharDataOffset, "unsigned madness");
159	0	uint32_t readCount = mUnicharDataLength - mUnicharDataOffset;
160	0	if (0 == readCount) {
161	0	// Fill the unichar buffer
162	0	readCount = Fill(&mLastErrorCode);
163	0	if (readCount == 0) {
164	0	*aReadCount = 0;
165	0	return mLastErrorCode;
166	0	}
167	0	}
168	0	if (readCount > aCount) {
169	0	readCount = aCount;
170	0	}
171	0	const char16_t* buf = mUnicharData.Elements() + mUnicharDataOffset;
172	0	aString.Assign(buf, readCount);
173	0	mUnicharDataOffset += readCount;
174	0	*aReadCount = readCount;
175	0	return NS_OK;
176	0	}
177
178		uint32_t
179		nsConverterInputStream::Fill(nsresult * aErrorCode)
180	0	{
181	0	if (nullptr == mInput) {
182	0	// We already closed the stream!
183	0	*aErrorCode = NS_BASE_STREAM_CLOSED;
184	0	return 0;
185	0	}
186	0
187	0	if (NS_FAILED(mLastErrorCode)) {
188	0	// We failed to completely convert last time, and error-recovery
189	0	// is disabled. We will fare no better this time, so...
190	0	*aErrorCode = mLastErrorCode;
191	0	return 0;
192	0	}
193	0
194	0	// We assume a many to one conversion and are using equal sizes for
195	0	// the two buffers. However if an error happens at the very start
196	0	// of a byte buffer we may end up in a situation where n bytes lead
197	0	// to n+1 unicode chars. Thus we need to keep track of the leftover
198	0	// bytes as we convert.
199	0
200	0	uint32_t nb;
201	0	*aErrorCode = NS_FillArray(mByteData, mInput, mLeftOverBytes, &nb);
202	0	if (nb == 0 && mLeftOverBytes == 0) {
203	0	// No more data
204	0	*aErrorCode = NS_OK;
205	0	return 0;
206	0	}
207	0
208	0	NS_ASSERTION(uint32_t(nb) + mLeftOverBytes == mByteData.Length(),
209	0	"mByteData is lying to us somewhere");
210	0
211	0	// Now convert as much of the byte buffer to unicode as possible
212	0	auto src = AsBytes(MakeSpan(mByteData));
213	0	auto dst = MakeSpan(mUnicharData);
214	0	// mUnicharData.Length() is the buffer length, not the fill status.
215	0	// mUnicharDataLength reflects the current fill status.
216	0	mUnicharDataLength = 0;
217	0	// Whenever we convert, mUnicharData is logically empty.
218	0	mUnicharDataOffset = 0;
219	0	// Truncation from size_t to uint32_t below is OK, because the sizes
220	0	// are bounded by the lengths of mByteData and mUnicharData.
221	0	uint32_t result;
222	0	size_t read;
223	0	size_t written;
224	0	bool hadErrors;
225	0	// The design of this class is fundamentally bogus in that trailing
226	0	// errors are ignored. Always passing false as the last argument to
227	0	// Decode* calls below.
228	0	if (mErrorsAreFatal) {
229	0	Tie(result, read, written) =
230	0	mConverter->DecodeToUTF16WithoutReplacement(src, dst, false);
231	0	} else {
232	0	Tie(result, read, written, hadErrors) =
233	0	mConverter->DecodeToUTF16(src, dst, false);
234	0	}
235	0	Unused << hadErrors;
236	0	mLeftOverBytes = mByteData.Length() - read;
237	0	mUnicharDataLength = written;
238	0	if (result == kInputEmpty \|\| result == kOutputFull) {
239	0	*aErrorCode = NS_OK;
240	0	} else {
241	0	MOZ_ASSERT(mErrorsAreFatal, "How come DecodeToUTF16() reported error?");
242	0	*aErrorCode = NS_ERROR_UDEC_ILLEGALINPUT;
243	0	}
244	0	return mUnicharDataLength;
245	0	}
246
247		NS_IMETHODIMP
248		nsConverterInputStream::ReadLine(nsAString& aLine, bool* aResult)
249	0	{
250	0	if (!mLineBuffer) {
251	0	mLineBuffer = new nsLineBuffer<char16_t>;
252	0	}
253	0	return NS_ReadLine(this, mLineBuffer.get(), aLine, aResult);
254	0	}