Coverage Report

Created: 2025-07-07 10:01

/src/libreoffice/package/source/zipapi/ThreadedDeflater.cxx
Line
Count
Source (jump to first uncovered line)
1
/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2
/*
3
 * This file is part of the LibreOffice project.
4
 *
5
 * This Source Code Form is subject to the terms of the Mozilla Public
6
 * License, v. 2.0. If a copy of the MPL was not distributed with this
7
 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8
 *
9
 * This file incorporates work covered by the following license notice:
10
 *
11
 *   Licensed to the Apache Software Foundation (ASF) under one or more
12
 *   contributor license agreements. See the NOTICE file distributed
13
 *   with this work for additional information regarding copyright
14
 *   ownership. The ASF licenses this file to you under the Apache
15
 *   License, Version 2.0 (the "License"); you may not use this file
16
 *   except in compliance with the License. You may obtain a copy of
17
 *   the License at http://www.apache.org/licenses/LICENSE-2.0 .
18
 */
19
20
#include <ThreadedDeflater.hxx>
21
#include <zlib.h>
22
#include <com/sun/star/packages/zip/ZipConstants.hpp>
23
#include <sal/log.hxx>
24
25
using namespace com::sun::star::packages::zip::ZipConstants;
26
using namespace com::sun::star;
27
28
namespace ZipUtils
29
{
30
const sal_Int64 MaxBlockSize = 128 * 1024;
31
32
// Parallel ZLIB compression using threads. The class internally splits the data into
33
// blocks and spawns ThreadPool tasks to process them independently. This is achieved
34
// in a similar way how pigz works, see comments from Mark Adler at
35
// https://stackoverflow.com/questions/30294766/how-to-use-multiple-threads-for-zlib-compression
36
// and
37
// https://stackoverflow.com/questions/30794053/how-to-use-multiple-threads-for-zlib-compression-same-input-source
38
39
// Everything here should be either read-only, or writing to distinct data, or atomic.
40
41
class ThreadedDeflater::Task : public comphelper::ThreadTask
42
{
43
    z_stream stream;
44
    ThreadedDeflater* deflater;
45
    int sequence;
46
    int blockSize;
47
    bool firstTask : 1;
48
    bool lastTask : 1;
49
50
public:
51
    Task(ThreadedDeflater* deflater_, int sequence_, int blockSize_, bool firstTask_,
52
         bool lastTask_)
53
0
        : comphelper::ThreadTask(deflater_->threadTaskTag)
54
0
        , stream()
55
0
        , deflater(deflater_)
56
0
        , sequence(sequence_)
57
0
        , blockSize(blockSize_)
58
0
        , firstTask(firstTask_)
59
0
        , lastTask(lastTask_)
60
0
    {
61
0
    }
62
63
private:
64
    virtual void doWork() override;
65
};
66
67
ThreadedDeflater::ThreadedDeflater(sal_Int32 nSetLevel)
68
0
    : threadTaskTag(comphelper::ThreadPool::createThreadTaskTag())
69
0
    , totalIn(0)
70
0
    , totalOut(0)
71
0
    , zlibLevel(nSetLevel)
72
0
{
73
0
}
74
75
0
ThreadedDeflater::~ThreadedDeflater() COVERITY_NOEXCEPT_FALSE { clear(); }
76
77
void ThreadedDeflater::deflateWrite(
78
    const css::uno::Reference<css::io::XInputStream>& xInStream,
79
    const std::function<void(const css::uno::Sequence<sal_Int8>&, sal_Int32)>& rProcessInputFunc,
80
    const std::function<void(const css::uno::Sequence<sal_Int8>&, sal_Int32)>& rProcessOutputFunc)
81
0
{
82
0
    sal_Int64 nThreadCount = comphelper::ThreadPool::getSharedOptimalPool().getWorkerCount();
83
0
    sal_Int64 batchSize = MaxBlockSize * nThreadCount;
84
0
    inBuffer.realloc(batchSize);
85
0
    prevDataBlock.realloc(MaxBlockSize);
86
0
    outBuffers.resize(nThreadCount);
87
0
    maProcessOutputFunc = rProcessOutputFunc;
88
0
    bool firstTask = true;
89
90
0
    while (xInStream->available() > 0)
91
0
    {
92
0
        sal_Int64 inputBytes = xInStream->readBytes(inBuffer, batchSize);
93
0
        rProcessInputFunc(inBuffer, inputBytes);
94
0
        totalIn += inputBytes;
95
0
        int sequence = 0;
96
0
        bool lastBatch = xInStream->available() <= 0;
97
0
        sal_Int64 bytesPending = inputBytes;
98
0
        while (bytesPending > 0)
99
0
        {
100
0
            sal_Int64 taskSize = std::min(MaxBlockSize, bytesPending);
101
0
            bytesPending -= taskSize;
102
0
            bool lastTask = lastBatch && !bytesPending;
103
0
            comphelper::ThreadPool::getSharedOptimalPool().pushTask(
104
0
                std::make_unique<Task>(this, sequence++, taskSize, firstTask, lastTask));
105
106
0
            if (firstTask)
107
0
                firstTask = false;
108
0
        }
109
110
0
        assert(bytesPending == 0);
111
112
0
        comphelper::ThreadPool::getSharedOptimalPool().waitUntilDone(threadTaskTag);
113
114
0
        if (!lastBatch)
115
0
        {
116
0
            assert(inputBytes == batchSize);
117
0
            std::copy_n(std::cbegin(inBuffer) + (batchSize - MaxBlockSize), MaxBlockSize,
118
0
                        prevDataBlock.getArray());
119
0
        }
120
121
0
        processDeflatedBuffers();
122
0
    }
123
0
}
124
125
void ThreadedDeflater::processDeflatedBuffers()
126
0
{
127
0
    sal_Int64 batchOutputSize = 0;
128
0
    for (const auto& buffer : outBuffers)
129
0
        batchOutputSize += buffer.size();
130
131
0
    css::uno::Sequence<sal_Int8> outBuffer(batchOutputSize);
132
133
0
    auto pos = outBuffer.getArray();
134
0
    for (auto& buffer : outBuffers)
135
0
    {
136
0
        pos = std::copy(buffer.begin(), buffer.end(), pos);
137
0
        buffer.clear();
138
0
    }
139
140
0
    maProcessOutputFunc(outBuffer, batchOutputSize);
141
0
    totalOut += batchOutputSize;
142
0
}
143
144
void ThreadedDeflater::clear()
145
0
{
146
0
    inBuffer = uno::Sequence<sal_Int8>();
147
0
    outBuffers.clear();
148
0
}
149
150
#if defined Z_PREFIX
151
#define deflateInit2 z_deflateInit2
152
#define deflateBound z_deflateBound
153
#define deflateSetDictionary z_deflateSetDictionary
154
#define deflate z_deflate
155
#define deflateEnd z_deflateEnd
156
#endif
157
158
void ThreadedDeflater::Task::doWork()
159
0
{
160
0
    stream.zalloc = nullptr;
161
0
    stream.zfree = nullptr;
162
0
    stream.opaque = nullptr;
163
    // -MAX_WBITS means 32k window size and raw stream
164
0
    if (deflateInit2(&stream, deflater->zlibLevel, Z_DEFLATED, -MAX_WBITS, DEF_MEM_LEVEL,
165
0
                     Z_DEFAULT_STRATEGY)
166
0
        != Z_OK)
167
0
    {
168
0
        SAL_WARN("package.threadeddeflate", "deflateInit2() failed");
169
0
        abort();
170
0
    }
171
    // Find out size for our output buffer to be large enough for deflate() needing to be called just once.
172
0
    sal_Int64 outputMaxSize = deflateBound(&stream, blockSize);
173
    // add extra size for Z_SYNC_FLUSH
174
0
    outputMaxSize += 20;
175
0
    deflater->outBuffers[sequence].resize(outputMaxSize);
176
0
    sal_Int64 myInBufferStart = sequence * MaxBlockSize;
177
    // zlib doesn't handle const properly
178
0
    unsigned char* inBufferPtr = reinterpret_cast<unsigned char*>(
179
0
        const_cast<signed char*>(deflater->inBuffer.getConstArray()));
180
0
    if (!firstTask)
181
0
    {
182
        // the window size is 32k, so set last 32k of previous data as the dictionary
183
0
        assert(MAX_WBITS == 15);
184
0
        assert(MaxBlockSize >= 32768);
185
0
        if (sequence > 0)
186
0
        {
187
0
            deflateSetDictionary(&stream, inBufferPtr + myInBufferStart - 32768, 32768);
188
0
        }
189
0
        else
190
0
        {
191
0
            unsigned char* prevBufferPtr = reinterpret_cast<unsigned char*>(
192
0
                const_cast<signed char*>(deflater->prevDataBlock.getConstArray()));
193
0
            deflateSetDictionary(&stream, prevBufferPtr + MaxBlockSize - 32768, 32768);
194
0
        }
195
0
    }
196
0
    stream.next_in = inBufferPtr + myInBufferStart;
197
0
    stream.avail_in = blockSize;
198
0
    stream.next_out = reinterpret_cast<unsigned char*>(deflater->outBuffers[sequence].data());
199
0
    stream.avail_out = outputMaxSize;
200
201
    // The trick is in using Z_SYNC_FLUSH instead of Z_NO_FLUSH. It will align the data at a byte boundary,
202
    // and since we use a raw stream, the data blocks then can be simply concatenated.
203
0
    int res = deflate(&stream, lastTask ? Z_FINISH : Z_SYNC_FLUSH);
204
0
    assert(stream.avail_in == 0); // Check that everything has been deflated.
205
0
    if (lastTask ? res == Z_STREAM_END : res == Z_OK)
206
0
    { // ok
207
0
        sal_Int64 outSize = outputMaxSize - stream.avail_out;
208
0
        deflater->outBuffers[sequence].resize(outSize);
209
0
    }
210
0
    else
211
0
    {
212
0
        SAL_WARN("package.threadeddeflate", "deflate() failed");
213
0
        abort();
214
0
    }
215
0
    deflateEnd(&stream);
216
0
}
217
218
} // namespace
219
220
/* vim:set shiftwidth=4 softtabstop=4 expandtab: */