Coverage Report

Created: 2026-05-16 09:25

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/libreoffice/io/source/TextInputStream/TextInputStream.cxx
Line
Count
Source
1
/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2
/*
3
 * This file is part of the LibreOffice project.
4
 *
5
 * This Source Code Form is subject to the terms of the Mozilla Public
6
 * License, v. 2.0. If a copy of the MPL was not distributed with this
7
 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8
 *
9
 * This file incorporates work covered by the following license notice:
10
 *
11
 *   Licensed to the Apache Software Foundation (ASF) under one or more
12
 *   contributor license agreements. See the NOTICE file distributed
13
 *   with this work for additional information regarding copyright
14
 *   ownership. The ASF licenses this file to you under the Apache
15
 *   License, Version 2.0 (the "License"); you may not use this file
16
 *   except in compliance with the License. You may obtain a copy of
17
 *   the License at http://www.apache.org/licenses/LICENSE-2.0 .
18
 */
19
20
#include <string.h>
21
22
#include <comphelper/sequence.hxx>
23
#include <cppuhelper/implbase.hxx>
24
#include <cppuhelper/supportsservice.hxx>
25
26
#include <rtl/textenc.h>
27
#include <rtl/tencinfo.h>
28
29
#include <com/sun/star/io/BufferSizeExceededException.hpp>
30
#include <com/sun/star/io/IOException.hpp>
31
#include <com/sun/star/io/NotConnectedException.hpp>
32
#include <com/sun/star/io/XTextInputStream2.hpp>
33
#include <com/sun/star/lang/IllegalArgumentException.hpp>
34
#include <com/sun/star/lang/XServiceInfo.hpp>
35
36
#include <optional>
37
#include <vector>
38
39
namespace com::sun::star::uno { class XComponentContext; }
40
41
using namespace ::cppu;
42
using namespace ::com::sun::star::uno;
43
using namespace ::com::sun::star::lang;
44
using namespace ::com::sun::star::io;
45
46
47
// Implementation XTextInputStream
48
49
297
#define INITIAL_UNICODE_BUFFER_CAPACITY     0x100
50
22.1k
#define READ_BYTE_COUNT                     0x100
51
52
namespace {
53
54
class OTextInputStream : public WeakImplHelper< XTextInputStream2, XServiceInfo >
55
{
56
    Reference< XInputStream > mxStream;
57
58
    struct Encoding_t
59
    {
60
        rtl_TextToUnicodeConverter  mConvText2Unicode;
61
        rtl_TextToUnicodeContext    mContextText2Unicode;
62
        Encoding_t(rtl_TextEncoding encoding)
63
297
        {
64
297
            mConvText2Unicode = rtl_createTextToUnicodeConverter(encoding);
65
297
            mContextText2Unicode = rtl_createTextToUnicodeContext(mConvText2Unicode);
66
297
        }
67
        ~Encoding_t()
68
297
        {
69
297
            rtl_destroyTextToUnicodeContext(mConvText2Unicode, mContextText2Unicode);
70
297
            rtl_destroyTextToUnicodeConverter(mConvText2Unicode);
71
297
        }
72
    };
73
    std::optional<Encoding_t> moEncoding;
74
    Sequence<sal_Int8>          mSeqSource;
75
76
    // Internal buffer for characters that are already converted successfully
77
    std::vector<sal_Unicode> mvBuffer;
78
    sal_Int32 mnCharsInBuffer;
79
    bool mbReachedEOF;
80
81
    /// @throws IOException
82
    /// @throws RuntimeException
83
    OUString implReadString( const Sequence< sal_Unicode >& Delimiters,
84
        bool bRemoveDelimiter, bool bFindLineEnd );
85
    /// @throws IOException
86
    /// @throws RuntimeException
87
    sal_Int32 implReadNext();
88
    /// @throws RuntimeException
89
    void checkNull();
90
91
public:
92
    OTextInputStream();
93
94
    // Methods XTextInputStream
95
    virtual OUString SAL_CALL readLine(  ) override;
96
    virtual OUString SAL_CALL readString( const Sequence< sal_Unicode >& Delimiters, sal_Bool bRemoveDelimiter ) override;
97
    virtual sal_Bool SAL_CALL isEOF(  ) override;
98
    virtual void SAL_CALL setEncoding( const OUString& Encoding ) override;
99
100
    // Methods XInputStream
101
    virtual sal_Int32 SAL_CALL readBytes( Sequence< sal_Int8 >& aData, sal_Int32 nBytesToRead ) override;
102
    virtual sal_Int32 SAL_CALL readSomeBytes( Sequence< sal_Int8 >& aData, sal_Int32 nMaxBytesToRead ) override;
103
    virtual void SAL_CALL skipBytes( sal_Int32 nBytesToSkip ) override;
104
    virtual sal_Int32 SAL_CALL available(  ) override;
105
    virtual void SAL_CALL closeInput(  ) override;
106
107
    // Methods XActiveDataSink
108
    virtual void SAL_CALL setInputStream( const Reference< XInputStream >& aStream ) override;
109
    virtual Reference< XInputStream > SAL_CALL getInputStream() override;
110
111
    // Methods XServiceInfo
112
        virtual OUString              SAL_CALL getImplementationName() override;
113
        virtual Sequence< OUString >  SAL_CALL getSupportedServiceNames() override;
114
        virtual sal_Bool              SAL_CALL supportsService(const OUString& ServiceName) override;
115
};
116
117
}
118
119
OTextInputStream::OTextInputStream()
120
297
    : mSeqSource(READ_BYTE_COUNT)
121
297
    , mvBuffer(INITIAL_UNICODE_BUFFER_CAPACITY, 0)
122
297
    , mnCharsInBuffer(0)
123
297
    , mbReachedEOF(false)
124
297
{
125
297
}
126
127
// Check uninitialized object
128
129
void OTextInputStream::checkNull()
130
849k
{
131
849k
    if (mxStream==nullptr){
132
0
        throw RuntimeException(u"Uninitialized object"_ustr);
133
0
    }
134
849k
}
135
136
// XTextInputStream
137
138
OUString OTextInputStream::readLine(  )
139
106
{
140
106
    checkNull();
141
106
    static Sequence< sal_Unicode > aDummySeq;
142
106
    return implReadString( aDummySeq, true, true );
143
106
}
144
145
OUString OTextInputStream::readString( const Sequence< sal_Unicode >& Delimiters, sal_Bool bRemoveDelimiter )
146
339k
{
147
339k
    checkNull();
148
339k
    return implReadString( Delimiters, bRemoveDelimiter, false );
149
339k
}
150
151
sal_Bool OTextInputStream::isEOF()
152
509k
{
153
509k
    checkNull();
154
509k
    bool bRet = false;
155
509k
    if( mnCharsInBuffer == 0 && mbReachedEOF )
156
547
        bRet = true;
157
509k
    return bRet;
158
509k
}
159
160
161
OUString OTextInputStream::implReadString( const Sequence< sal_Unicode >& Delimiters,
162
                                           bool bRemoveDelimiter, bool bFindLineEnd )
163
339k
{
164
339k
    OUString aRetStr;
165
339k
    if (!moEncoding)
166
0
    {
167
0
        setEncoding( u"utf8"_ustr );
168
0
    }
169
170
    // Only for bFindLineEnd
171
339k
    constexpr sal_Unicode cLineEndChar1 = '\r';
172
339k
    constexpr sal_Unicode cLineEndChar2 = '\n';
173
174
339k
    sal_Int32 nBufferReadPos = 0;
175
339k
    sal_Int32 nCopyLen = -1;
176
339k
    sal_Unicode cFirstLineEndChar = 0;
177
5.49M
    while (true)
178
5.49M
    {
179
        // Still characters available?
180
5.49M
        if( nBufferReadPos == mnCharsInBuffer )
181
21.8k
        {
182
            // Already reached EOF? Then we can't read any more
183
            // Or no, so read new characters
184
21.8k
            if( !implReadNext() )
185
183
                break;
186
21.8k
        }
187
188
        // Now there should be characters available
189
        // (otherwise the loop should have been broken before)
190
5.49M
        sal_Unicode c = mvBuffer[ nBufferReadPos++ ];
191
192
5.49M
        if( bFindLineEnd )
193
2.67k
        {
194
2.67k
            if (cFirstLineEndChar != 0)
195
105
            {
196
105
                assert(nCopyLen >= 0);
197
                // This is a check if the next character after a line end char is its second half
198
                // Same line end char -> new line break; non-line-end char -> new line start
199
105
                if ((c == cFirstLineEndChar) || (c != cLineEndChar1 && c != cLineEndChar2))
200
4
                {
201
                    // Not a two-char line end
202
4
                    nBufferReadPos--;
203
4
                }
204
105
                break;
205
105
            }
206
2.57k
            else if( c == cLineEndChar1 || c == cLineEndChar2 )
207
105
            {
208
105
                nCopyLen = nBufferReadPos - 1; // we know what to copy
209
105
                cFirstLineEndChar = c; // take one more loop, to check if it's a two-char line end
210
105
            }
211
2.67k
        }
212
5.48M
        else if( comphelper::findValue(Delimiters, c) != -1 )
213
339k
        {
214
339k
            nCopyLen = nBufferReadPos;
215
339k
            if( bRemoveDelimiter )
216
0
                nCopyLen--;
217
339k
            break;
218
339k
        }
219
5.49M
    }
220
221
    // Nothing found? Return all
222
339k
    if (nCopyLen < 0)
223
183
        nCopyLen = nBufferReadPos;
224
225
    // Create string
226
339k
    if( nCopyLen )
227
339k
        aRetStr = OUString( mvBuffer.data(), nCopyLen );
228
229
    // Copy rest of buffer
230
339k
    std::copy(mvBuffer.data() + nBufferReadPos, mvBuffer.data() + mnCharsInBuffer, mvBuffer.data());
231
339k
    mnCharsInBuffer -= nBufferReadPos;
232
233
339k
    return aRetStr;
234
339k
}
235
236
237
sal_Int32 OTextInputStream::implReadNext()
238
21.8k
{
239
    // Already reached EOF? Then we can't read any more
240
21.8k
    if (mbReachedEOF)
241
0
        return 0;
242
243
21.8k
    try
244
21.8k
    {
245
21.8k
        if (mxStream->readSomeBytes(mSeqSource, READ_BYTE_COUNT) == 0)
246
183
        {
247
183
            mbReachedEOF = true;
248
183
            return 0;
249
183
        }
250
251
        // Try to convert
252
21.6k
        sal_uInt32 uiInfo = mvBuffer.size() - mnCharsInBuffer < o3tl::make_unsigned(mSeqSource.getLength())
253
21.6k
                                ? RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOOSMALL
254
21.6k
                                : 0;
255
21.6k
        const sal_Int32 nOldCharsInBuffer = mnCharsInBuffer;
256
21.6k
        sal_Size nSourceCount = 0;
257
21.6k
        while( true )
258
21.5k
        {
259
21.5k
            if (uiInfo & RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOOSMALL)
260
392
            {
261
392
                mvBuffer.resize(mvBuffer.size() * 2);
262
392
            }
263
264
            // All invalid characters are transformed to the unicode undefined char
265
21.5k
            sal_Size nSrcCvtBytes = 0;
266
21.5k
            mnCharsInBuffer += rtl_convertTextToUnicode(
267
21.5k
                                moEncoding->mConvText2Unicode,
268
21.5k
                                moEncoding->mContextText2Unicode,
269
21.5k
                                reinterpret_cast<const char*>(mSeqSource.getConstArray() + nSourceCount),
270
21.5k
                                mSeqSource.getLength() - nSourceCount,
271
21.5k
                                mvBuffer.data() + mnCharsInBuffer,
272
21.5k
                                mvBuffer.size() - mnCharsInBuffer,
273
21.5k
                                RTL_TEXTTOUNICODE_FLAGS_UNDEFINED_DEFAULT   |
274
21.5k
                                RTL_TEXTTOUNICODE_FLAGS_MBUNDEFINED_DEFAULT |
275
21.5k
                                RTL_TEXTTOUNICODE_FLAGS_INVALID_DEFAULT,
276
21.5k
                                &uiInfo,
277
21.5k
                                &nSrcCvtBytes );
278
21.5k
            nSourceCount += nSrcCvtBytes;
279
280
21.5k
            if( uiInfo & RTL_TEXTTOUNICODE_INFO_SRCBUFFERTOOSMALL )
281
0
            {
282
                // read next byte
283
0
                Sequence<sal_Int8> aOneByteSeq(1);
284
0
                if (mxStream->readSomeBytes(aOneByteSeq, 1) == 0)
285
0
                {
286
0
                    mbReachedEOF = true;
287
0
                    return mnCharsInBuffer - nOldCharsInBuffer;
288
0
                }
289
290
0
                mSeqSource = comphelper::concatSequences(mSeqSource, aOneByteSeq);
291
0
            }
292
21.5k
            else if (!(uiInfo & RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOOSMALL))
293
21.5k
                return mnCharsInBuffer - nOldCharsInBuffer; // finished
294
21.5k
        }
295
21.6k
    }
296
21.8k
    catch( NotConnectedException& )
297
21.8k
    {
298
0
        throw IOException(u"Not connected"_ustr);
299
        //throw IOException( L"OTextInputStream::implReadString failed" );
300
0
    }
301
21.8k
    catch( BufferSizeExceededException& )
302
21.8k
    {
303
0
        throw IOException(u"Buffer size exceeded"_ustr);
304
0
    }
305
21.8k
}
306
307
void OTextInputStream::setEncoding( const OUString& Encoding )
308
297
{
309
297
    OString aOEncodingStr = OUStringToOString( Encoding, RTL_TEXTENCODING_ASCII_US );
310
297
    rtl_TextEncoding encoding = rtl_getTextEncodingFromMimeCharset( aOEncodingStr.getStr() );
311
297
    if( RTL_TEXTENCODING_DONTKNOW == encoding )
312
0
        throw IllegalArgumentException("Unknown encoding '" + Encoding + "'", getXWeak(), 0);
313
314
297
    moEncoding.emplace(encoding);
315
297
}
316
317
318
// XInputStream
319
320
sal_Int32 OTextInputStream::readBytes( Sequence< sal_Int8 >& aData, sal_Int32 nBytesToRead )
321
0
{
322
0
    checkNull();
323
0
    return mxStream->readBytes( aData, nBytesToRead );
324
0
}
325
326
sal_Int32 OTextInputStream::readSomeBytes( Sequence< sal_Int8 >& aData, sal_Int32 nMaxBytesToRead )
327
0
{
328
0
    checkNull();
329
0
    return mxStream->readSomeBytes( aData, nMaxBytesToRead );
330
0
}
331
332
void OTextInputStream::skipBytes( sal_Int32 nBytesToSkip )
333
0
{
334
0
    checkNull();
335
0
    mxStream->skipBytes( nBytesToSkip );
336
0
}
337
338
sal_Int32 OTextInputStream::available(  )
339
0
{
340
0
    checkNull();
341
0
    return mxStream->available();
342
0
}
343
344
void OTextInputStream::closeInput(  )
345
0
{
346
0
    checkNull();
347
0
    mxStream->closeInput();
348
0
}
349
350
351
// XActiveDataSink
352
353
void OTextInputStream::setInputStream( const Reference< XInputStream >& aStream )
354
297
{
355
297
    mxStream = aStream;
356
297
}
357
358
Reference< XInputStream > OTextInputStream::getInputStream()
359
0
{
360
0
    return mxStream;
361
0
}
362
363
OUString OTextInputStream::getImplementationName()
364
0
{
365
0
    return u"com.sun.star.comp.io.TextInputStream"_ustr;
366
0
}
367
368
sal_Bool OTextInputStream::supportsService(const OUString& ServiceName)
369
0
{
370
0
    return cppu::supportsService(this, ServiceName);
371
0
}
372
373
Sequence< OUString > OTextInputStream::getSupportedServiceNames()
374
0
{
375
0
    return { u"com.sun.star.io.TextInputStream"_ustr };
376
0
}
377
378
extern "C" SAL_DLLPUBLIC_EXPORT css::uno::XInterface*
379
io_OTextInputStream_get_implementation(
380
    css::uno::XComponentContext* , css::uno::Sequence<css::uno::Any> const&)
381
297
{
382
297
    return cppu::acquire(new OTextInputStream);
383
297
}
384
385
386
/* vim:set shiftwidth=4 softtabstop=4 expandtab: */