/src/libreoffice/io/source/TextInputStream/TextInputStream.cxx
Line | Count | Source |
1 | | /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ |
2 | | /* |
3 | | * This file is part of the LibreOffice project. |
4 | | * |
5 | | * This Source Code Form is subject to the terms of the Mozilla Public |
6 | | * License, v. 2.0. If a copy of the MPL was not distributed with this |
7 | | * file, You can obtain one at http://mozilla.org/MPL/2.0/. |
8 | | * |
9 | | * This file incorporates work covered by the following license notice: |
10 | | * |
11 | | * Licensed to the Apache Software Foundation (ASF) under one or more |
12 | | * contributor license agreements. See the NOTICE file distributed |
13 | | * with this work for additional information regarding copyright |
14 | | * ownership. The ASF licenses this file to you under the Apache |
15 | | * License, Version 2.0 (the "License"); you may not use this file |
16 | | * except in compliance with the License. You may obtain a copy of |
17 | | * the License at http://www.apache.org/licenses/LICENSE-2.0 . |
18 | | */ |
19 | | |
20 | | #include <string.h> |
21 | | |
22 | | #include <comphelper/sequence.hxx> |
23 | | #include <cppuhelper/implbase.hxx> |
24 | | #include <cppuhelper/supportsservice.hxx> |
25 | | |
26 | | #include <rtl/textenc.h> |
27 | | #include <rtl/tencinfo.h> |
28 | | |
29 | | #include <com/sun/star/io/BufferSizeExceededException.hpp> |
30 | | #include <com/sun/star/io/IOException.hpp> |
31 | | #include <com/sun/star/io/NotConnectedException.hpp> |
32 | | #include <com/sun/star/io/XTextInputStream2.hpp> |
33 | | #include <com/sun/star/lang/IllegalArgumentException.hpp> |
34 | | #include <com/sun/star/lang/XServiceInfo.hpp> |
35 | | |
36 | | #include <optional> |
37 | | #include <vector> |
38 | | |
39 | | namespace com::sun::star::uno { class XComponentContext; } |
40 | | |
41 | | using namespace ::cppu; |
42 | | using namespace ::com::sun::star::uno; |
43 | | using namespace ::com::sun::star::lang; |
44 | | using namespace ::com::sun::star::io; |
45 | | |
46 | | |
47 | | // Implementation XTextInputStream |
48 | | |
49 | 297 | #define INITIAL_UNICODE_BUFFER_CAPACITY 0x100 |
50 | 22.1k | #define READ_BYTE_COUNT 0x100 |
51 | | |
52 | | namespace { |
53 | | |
54 | | class OTextInputStream : public WeakImplHelper< XTextInputStream2, XServiceInfo > |
55 | | { |
56 | | Reference< XInputStream > mxStream; |
57 | | |
58 | | struct Encoding_t |
59 | | { |
60 | | rtl_TextToUnicodeConverter mConvText2Unicode; |
61 | | rtl_TextToUnicodeContext mContextText2Unicode; |
62 | | Encoding_t(rtl_TextEncoding encoding) |
63 | 297 | { |
64 | 297 | mConvText2Unicode = rtl_createTextToUnicodeConverter(encoding); |
65 | 297 | mContextText2Unicode = rtl_createTextToUnicodeContext(mConvText2Unicode); |
66 | 297 | } |
67 | | ~Encoding_t() |
68 | 297 | { |
69 | 297 | rtl_destroyTextToUnicodeContext(mConvText2Unicode, mContextText2Unicode); |
70 | 297 | rtl_destroyTextToUnicodeConverter(mConvText2Unicode); |
71 | 297 | } |
72 | | }; |
73 | | std::optional<Encoding_t> moEncoding; |
74 | | Sequence<sal_Int8> mSeqSource; |
75 | | |
76 | | // Internal buffer for characters that are already converted successfully |
77 | | std::vector<sal_Unicode> mvBuffer; |
78 | | sal_Int32 mnCharsInBuffer; |
79 | | bool mbReachedEOF; |
80 | | |
81 | | /// @throws IOException |
82 | | /// @throws RuntimeException |
83 | | OUString implReadString( const Sequence< sal_Unicode >& Delimiters, |
84 | | bool bRemoveDelimiter, bool bFindLineEnd ); |
85 | | /// @throws IOException |
86 | | /// @throws RuntimeException |
87 | | sal_Int32 implReadNext(); |
88 | | /// @throws RuntimeException |
89 | | void checkNull(); |
90 | | |
91 | | public: |
92 | | OTextInputStream(); |
93 | | |
94 | | // Methods XTextInputStream |
95 | | virtual OUString SAL_CALL readLine( ) override; |
96 | | virtual OUString SAL_CALL readString( const Sequence< sal_Unicode >& Delimiters, sal_Bool bRemoveDelimiter ) override; |
97 | | virtual sal_Bool SAL_CALL isEOF( ) override; |
98 | | virtual void SAL_CALL setEncoding( const OUString& Encoding ) override; |
99 | | |
100 | | // Methods XInputStream |
101 | | virtual sal_Int32 SAL_CALL readBytes( Sequence< sal_Int8 >& aData, sal_Int32 nBytesToRead ) override; |
102 | | virtual sal_Int32 SAL_CALL readSomeBytes( Sequence< sal_Int8 >& aData, sal_Int32 nMaxBytesToRead ) override; |
103 | | virtual void SAL_CALL skipBytes( sal_Int32 nBytesToSkip ) override; |
104 | | virtual sal_Int32 SAL_CALL available( ) override; |
105 | | virtual void SAL_CALL closeInput( ) override; |
106 | | |
107 | | // Methods XActiveDataSink |
108 | | virtual void SAL_CALL setInputStream( const Reference< XInputStream >& aStream ) override; |
109 | | virtual Reference< XInputStream > SAL_CALL getInputStream() override; |
110 | | |
111 | | // Methods XServiceInfo |
112 | | virtual OUString SAL_CALL getImplementationName() override; |
113 | | virtual Sequence< OUString > SAL_CALL getSupportedServiceNames() override; |
114 | | virtual sal_Bool SAL_CALL supportsService(const OUString& ServiceName) override; |
115 | | }; |
116 | | |
117 | | } |
118 | | |
119 | | OTextInputStream::OTextInputStream() |
120 | 297 | : mSeqSource(READ_BYTE_COUNT) |
121 | 297 | , mvBuffer(INITIAL_UNICODE_BUFFER_CAPACITY, 0) |
122 | 297 | , mnCharsInBuffer(0) |
123 | 297 | , mbReachedEOF(false) |
124 | 297 | { |
125 | 297 | } |
126 | | |
127 | | // Check uninitialized object |
128 | | |
129 | | void OTextInputStream::checkNull() |
130 | 849k | { |
131 | 849k | if (mxStream==nullptr){ |
132 | 0 | throw RuntimeException(u"Uninitialized object"_ustr); |
133 | 0 | } |
134 | 849k | } |
135 | | |
136 | | // XTextInputStream |
137 | | |
138 | | OUString OTextInputStream::readLine( ) |
139 | 106 | { |
140 | 106 | checkNull(); |
141 | 106 | static Sequence< sal_Unicode > aDummySeq; |
142 | 106 | return implReadString( aDummySeq, true, true ); |
143 | 106 | } |
144 | | |
145 | | OUString OTextInputStream::readString( const Sequence< sal_Unicode >& Delimiters, sal_Bool bRemoveDelimiter ) |
146 | 339k | { |
147 | 339k | checkNull(); |
148 | 339k | return implReadString( Delimiters, bRemoveDelimiter, false ); |
149 | 339k | } |
150 | | |
151 | | sal_Bool OTextInputStream::isEOF() |
152 | 509k | { |
153 | 509k | checkNull(); |
154 | 509k | bool bRet = false; |
155 | 509k | if( mnCharsInBuffer == 0 && mbReachedEOF ) |
156 | 547 | bRet = true; |
157 | 509k | return bRet; |
158 | 509k | } |
159 | | |
160 | | |
161 | | OUString OTextInputStream::implReadString( const Sequence< sal_Unicode >& Delimiters, |
162 | | bool bRemoveDelimiter, bool bFindLineEnd ) |
163 | 339k | { |
164 | 339k | OUString aRetStr; |
165 | 339k | if (!moEncoding) |
166 | 0 | { |
167 | 0 | setEncoding( u"utf8"_ustr ); |
168 | 0 | } |
169 | | |
170 | | // Only for bFindLineEnd |
171 | 339k | constexpr sal_Unicode cLineEndChar1 = '\r'; |
172 | 339k | constexpr sal_Unicode cLineEndChar2 = '\n'; |
173 | | |
174 | 339k | sal_Int32 nBufferReadPos = 0; |
175 | 339k | sal_Int32 nCopyLen = -1; |
176 | 339k | sal_Unicode cFirstLineEndChar = 0; |
177 | 5.49M | while (true) |
178 | 5.49M | { |
179 | | // Still characters available? |
180 | 5.49M | if( nBufferReadPos == mnCharsInBuffer ) |
181 | 21.8k | { |
182 | | // Already reached EOF? Then we can't read any more |
183 | | // Or no, so read new characters |
184 | 21.8k | if( !implReadNext() ) |
185 | 183 | break; |
186 | 21.8k | } |
187 | | |
188 | | // Now there should be characters available |
189 | | // (otherwise the loop should have been broken before) |
190 | 5.49M | sal_Unicode c = mvBuffer[ nBufferReadPos++ ]; |
191 | | |
192 | 5.49M | if( bFindLineEnd ) |
193 | 2.67k | { |
194 | 2.67k | if (cFirstLineEndChar != 0) |
195 | 105 | { |
196 | 105 | assert(nCopyLen >= 0); |
197 | | // This is a check if the next character after a line end char is its second half |
198 | | // Same line end char -> new line break; non-line-end char -> new line start |
199 | 105 | if ((c == cFirstLineEndChar) || (c != cLineEndChar1 && c != cLineEndChar2)) |
200 | 4 | { |
201 | | // Not a two-char line end |
202 | 4 | nBufferReadPos--; |
203 | 4 | } |
204 | 105 | break; |
205 | 105 | } |
206 | 2.57k | else if( c == cLineEndChar1 || c == cLineEndChar2 ) |
207 | 105 | { |
208 | 105 | nCopyLen = nBufferReadPos - 1; // we know what to copy |
209 | 105 | cFirstLineEndChar = c; // take one more loop, to check if it's a two-char line end |
210 | 105 | } |
211 | 2.67k | } |
212 | 5.48M | else if( comphelper::findValue(Delimiters, c) != -1 ) |
213 | 339k | { |
214 | 339k | nCopyLen = nBufferReadPos; |
215 | 339k | if( bRemoveDelimiter ) |
216 | 0 | nCopyLen--; |
217 | 339k | break; |
218 | 339k | } |
219 | 5.49M | } |
220 | | |
221 | | // Nothing found? Return all |
222 | 339k | if (nCopyLen < 0) |
223 | 183 | nCopyLen = nBufferReadPos; |
224 | | |
225 | | // Create string |
226 | 339k | if( nCopyLen ) |
227 | 339k | aRetStr = OUString( mvBuffer.data(), nCopyLen ); |
228 | | |
229 | | // Copy rest of buffer |
230 | 339k | std::copy(mvBuffer.data() + nBufferReadPos, mvBuffer.data() + mnCharsInBuffer, mvBuffer.data()); |
231 | 339k | mnCharsInBuffer -= nBufferReadPos; |
232 | | |
233 | 339k | return aRetStr; |
234 | 339k | } |
235 | | |
236 | | |
237 | | sal_Int32 OTextInputStream::implReadNext() |
238 | 21.8k | { |
239 | | // Already reached EOF? Then we can't read any more |
240 | 21.8k | if (mbReachedEOF) |
241 | 0 | return 0; |
242 | | |
243 | 21.8k | try |
244 | 21.8k | { |
245 | 21.8k | if (mxStream->readSomeBytes(mSeqSource, READ_BYTE_COUNT) == 0) |
246 | 183 | { |
247 | 183 | mbReachedEOF = true; |
248 | 183 | return 0; |
249 | 183 | } |
250 | | |
251 | | // Try to convert |
252 | 21.6k | sal_uInt32 uiInfo = mvBuffer.size() - mnCharsInBuffer < o3tl::make_unsigned(mSeqSource.getLength()) |
253 | 21.6k | ? RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOOSMALL |
254 | 21.6k | : 0; |
255 | 21.6k | const sal_Int32 nOldCharsInBuffer = mnCharsInBuffer; |
256 | 21.6k | sal_Size nSourceCount = 0; |
257 | 21.6k | while( true ) |
258 | 21.5k | { |
259 | 21.5k | if (uiInfo & RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOOSMALL) |
260 | 392 | { |
261 | 392 | mvBuffer.resize(mvBuffer.size() * 2); |
262 | 392 | } |
263 | | |
264 | | // All invalid characters are transformed to the unicode undefined char |
265 | 21.5k | sal_Size nSrcCvtBytes = 0; |
266 | 21.5k | mnCharsInBuffer += rtl_convertTextToUnicode( |
267 | 21.5k | moEncoding->mConvText2Unicode, |
268 | 21.5k | moEncoding->mContextText2Unicode, |
269 | 21.5k | reinterpret_cast<const char*>(mSeqSource.getConstArray() + nSourceCount), |
270 | 21.5k | mSeqSource.getLength() - nSourceCount, |
271 | 21.5k | mvBuffer.data() + mnCharsInBuffer, |
272 | 21.5k | mvBuffer.size() - mnCharsInBuffer, |
273 | 21.5k | RTL_TEXTTOUNICODE_FLAGS_UNDEFINED_DEFAULT | |
274 | 21.5k | RTL_TEXTTOUNICODE_FLAGS_MBUNDEFINED_DEFAULT | |
275 | 21.5k | RTL_TEXTTOUNICODE_FLAGS_INVALID_DEFAULT, |
276 | 21.5k | &uiInfo, |
277 | 21.5k | &nSrcCvtBytes ); |
278 | 21.5k | nSourceCount += nSrcCvtBytes; |
279 | | |
280 | 21.5k | if( uiInfo & RTL_TEXTTOUNICODE_INFO_SRCBUFFERTOOSMALL ) |
281 | 0 | { |
282 | | // read next byte |
283 | 0 | Sequence<sal_Int8> aOneByteSeq(1); |
284 | 0 | if (mxStream->readSomeBytes(aOneByteSeq, 1) == 0) |
285 | 0 | { |
286 | 0 | mbReachedEOF = true; |
287 | 0 | return mnCharsInBuffer - nOldCharsInBuffer; |
288 | 0 | } |
289 | | |
290 | 0 | mSeqSource = comphelper::concatSequences(mSeqSource, aOneByteSeq); |
291 | 0 | } |
292 | 21.5k | else if (!(uiInfo & RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOOSMALL)) |
293 | 21.5k | return mnCharsInBuffer - nOldCharsInBuffer; // finished |
294 | 21.5k | } |
295 | 21.6k | } |
296 | 21.8k | catch( NotConnectedException& ) |
297 | 21.8k | { |
298 | 0 | throw IOException(u"Not connected"_ustr); |
299 | | //throw IOException( L"OTextInputStream::implReadString failed" ); |
300 | 0 | } |
301 | 21.8k | catch( BufferSizeExceededException& ) |
302 | 21.8k | { |
303 | 0 | throw IOException(u"Buffer size exceeded"_ustr); |
304 | 0 | } |
305 | 21.8k | } |
306 | | |
307 | | void OTextInputStream::setEncoding( const OUString& Encoding ) |
308 | 297 | { |
309 | 297 | OString aOEncodingStr = OUStringToOString( Encoding, RTL_TEXTENCODING_ASCII_US ); |
310 | 297 | rtl_TextEncoding encoding = rtl_getTextEncodingFromMimeCharset( aOEncodingStr.getStr() ); |
311 | 297 | if( RTL_TEXTENCODING_DONTKNOW == encoding ) |
312 | 0 | throw IllegalArgumentException("Unknown encoding '" + Encoding + "'", getXWeak(), 0); |
313 | | |
314 | 297 | moEncoding.emplace(encoding); |
315 | 297 | } |
316 | | |
317 | | |
318 | | // XInputStream |
319 | | |
320 | | sal_Int32 OTextInputStream::readBytes( Sequence< sal_Int8 >& aData, sal_Int32 nBytesToRead ) |
321 | 0 | { |
322 | 0 | checkNull(); |
323 | 0 | return mxStream->readBytes( aData, nBytesToRead ); |
324 | 0 | } |
325 | | |
326 | | sal_Int32 OTextInputStream::readSomeBytes( Sequence< sal_Int8 >& aData, sal_Int32 nMaxBytesToRead ) |
327 | 0 | { |
328 | 0 | checkNull(); |
329 | 0 | return mxStream->readSomeBytes( aData, nMaxBytesToRead ); |
330 | 0 | } |
331 | | |
332 | | void OTextInputStream::skipBytes( sal_Int32 nBytesToSkip ) |
333 | 0 | { |
334 | 0 | checkNull(); |
335 | 0 | mxStream->skipBytes( nBytesToSkip ); |
336 | 0 | } |
337 | | |
338 | | sal_Int32 OTextInputStream::available( ) |
339 | 0 | { |
340 | 0 | checkNull(); |
341 | 0 | return mxStream->available(); |
342 | 0 | } |
343 | | |
344 | | void OTextInputStream::closeInput( ) |
345 | 0 | { |
346 | 0 | checkNull(); |
347 | 0 | mxStream->closeInput(); |
348 | 0 | } |
349 | | |
350 | | |
351 | | // XActiveDataSink |
352 | | |
353 | | void OTextInputStream::setInputStream( const Reference< XInputStream >& aStream ) |
354 | 297 | { |
355 | 297 | mxStream = aStream; |
356 | 297 | } |
357 | | |
358 | | Reference< XInputStream > OTextInputStream::getInputStream() |
359 | 0 | { |
360 | 0 | return mxStream; |
361 | 0 | } |
362 | | |
363 | | OUString OTextInputStream::getImplementationName() |
364 | 0 | { |
365 | 0 | return u"com.sun.star.comp.io.TextInputStream"_ustr; |
366 | 0 | } |
367 | | |
368 | | sal_Bool OTextInputStream::supportsService(const OUString& ServiceName) |
369 | 0 | { |
370 | 0 | return cppu::supportsService(this, ServiceName); |
371 | 0 | } |
372 | | |
373 | | Sequence< OUString > OTextInputStream::getSupportedServiceNames() |
374 | 0 | { |
375 | 0 | return { u"com.sun.star.io.TextInputStream"_ustr }; |
376 | 0 | } |
377 | | |
378 | | extern "C" SAL_DLLPUBLIC_EXPORT css::uno::XInterface* |
379 | | io_OTextInputStream_get_implementation( |
380 | | css::uno::XComponentContext* , css::uno::Sequence<css::uno::Any> const&) |
381 | 297 | { |
382 | 297 | return cppu::acquire(new OTextInputStream); |
383 | 297 | } |
384 | | |
385 | | |
386 | | /* vim:set shiftwidth=4 softtabstop=4 expandtab: */ |