/src/libreoffice/package/source/zipapi/ThreadedDeflater.cxx

Source (jump to first uncovered line)
/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
/*
 * This file is part of the LibreOffice project.
 *
 * This Source Code Form is subject to the terms of the Mozilla Public
 * License, v. 2.0. If a copy of the MPL was not distributed with this
 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
 *
 * This file incorporates work covered by the following license notice:
 *
 *   Licensed to the Apache Software Foundation (ASF) under one or more
 *   contributor license agreements. See the NOTICE file distributed
 *   with this work for additional information regarding copyright
 *   ownership. The ASF licenses this file to you under the Apache
 *   License, Version 2.0 (the "License"); you may not use this file
 *   except in compliance with the License. You may obtain a copy of
 *   the License at http://www.apache.org/licenses/LICENSE-2.0 .
 */

#include <ThreadedDeflater.hxx>
#include <zlib.h>
#include <com/sun/star/packages/zip/ZipConstants.hpp>
#include <sal/log.hxx>

using namespace com::sun::star::packages::zip::ZipConstants;
using namespace com::sun::star;

namespace ZipUtils
{
const sal_Int64 MaxBlockSize = 128 * 1024;

// Parallel ZLIB compression using threads. The class internally splits the data into
// blocks and spawns ThreadPool tasks to process them independently. This is achieved
// in a similar way how pigz works, see comments from Mark Adler at
// https://stackoverflow.com/questions/30294766/how-to-use-multiple-threads-for-zlib-compression
// and
// https://stackoverflow.com/questions/30794053/how-to-use-multiple-threads-for-zlib-compression-same-input-source

// Everything here should be either read-only, or writing to distinct data, or atomic.

class ThreadedDeflater::Task : public comphelper::ThreadTask
{
    z_stream stream;
    ThreadedDeflater* deflater;
    int sequence;
    int blockSize;
    bool firstTask : 1;
    bool lastTask : 1;

public:
    Task(ThreadedDeflater* deflater_, int sequence_, int blockSize_, bool firstTask_,
         bool lastTask_)
        : comphelper::ThreadTask(deflater_->threadTaskTag)
        , stream()
        , deflater(deflater_)
        , sequence(sequence_)
        , blockSize(blockSize_)
        , firstTask(firstTask_)
        , lastTask(lastTask_)
    {
    }

private:
    virtual void doWork() override;
};

ThreadedDeflater::ThreadedDeflater(sal_Int32 nSetLevel)
    : threadTaskTag(comphelper::ThreadPool::createThreadTaskTag())
    , totalIn(0)
    , totalOut(0)
    , zlibLevel(nSetLevel)
{
}

ThreadedDeflater::~ThreadedDeflater() COVERITY_NOEXCEPT_FALSE { clear(); }

void ThreadedDeflater::deflateWrite(
    const css::uno::Reference<css::io::XInputStream>& xInStream,
    const std::function<void(const css::uno::Sequence<sal_Int8>&, sal_Int32)>& rProcessInputFunc,
    const std::function<void(const css::uno::Sequence<sal_Int8>&, sal_Int32)>& rProcessOutputFunc)
{
    sal_Int64 nThreadCount = comphelper::ThreadPool::getSharedOptimalPool().getWorkerCount();
    sal_Int64 batchSize = MaxBlockSize * nThreadCount;
    inBuffer.realloc(batchSize);
    prevDataBlock.realloc(MaxBlockSize);
    outBuffers.resize(nThreadCount);
    maProcessOutputFunc = rProcessOutputFunc;
    bool firstTask = true;

    while (xInStream->available() > 0)
    {
        sal_Int64 inputBytes = xInStream->readBytes(inBuffer, batchSize);
        rProcessInputFunc(inBuffer, inputBytes);
        totalIn += inputBytes;
        int sequence = 0;
        bool lastBatch = xInStream->available() <= 0;
        sal_Int64 bytesPending = inputBytes;
        while (bytesPending > 0)
        {
            sal_Int64 taskSize = std::min(MaxBlockSize, bytesPending);
            bytesPending -= taskSize;
            bool lastTask = lastBatch && !bytesPending;
            comphelper::ThreadPool::getSharedOptimalPool().pushTask(
                std::make_unique<Task>(this, sequence++, taskSize, firstTask, lastTask));

            if (firstTask)
                firstTask = false;
        }

        assert(bytesPending == 0);

        comphelper::ThreadPool::getSharedOptimalPool().waitUntilDone(threadTaskTag);

        if (!lastBatch)
        {
            assert(inputBytes == batchSize);
            std::copy_n(std::cbegin(inBuffer) + (batchSize - MaxBlockSize), MaxBlockSize,
                        prevDataBlock.getArray());
        }

        processDeflatedBuffers();
    }
}

void ThreadedDeflater::processDeflatedBuffers()
{
    sal_Int64 batchOutputSize = 0;
    for (const auto& buffer : outBuffers)
        batchOutputSize += buffer.size();

    css::uno::Sequence<sal_Int8> outBuffer(batchOutputSize);

    auto pos = outBuffer.getArray();
    for (auto& buffer : outBuffers)
    {
        pos = std::copy(buffer.begin(), buffer.end(), pos);
        buffer.clear();
    }

    maProcessOutputFunc(outBuffer, batchOutputSize);
    totalOut += batchOutputSize;
}

void ThreadedDeflater::clear()
{
    inBuffer = uno::Sequence<sal_Int8>();
    outBuffers.clear();
}

#if defined Z_PREFIX
#define deflateInit2 z_deflateInit2
#define deflateBound z_deflateBound
#define deflateSetDictionary z_deflateSetDictionary
#define deflate z_deflate
#define deflateEnd z_deflateEnd
#endif

void ThreadedDeflater::Task::doWork()
{
    stream.zalloc = nullptr;
    stream.zfree = nullptr;
    stream.opaque = nullptr;
    // -MAX_WBITS means 32k window size and raw stream
    if (deflateInit2(&stream, deflater->zlibLevel, Z_DEFLATED, -MAX_WBITS, DEF_MEM_LEVEL,
                     Z_DEFAULT_STRATEGY)
        != Z_OK)
    {
        SAL_WARN("package.threadeddeflate", "deflateInit2() failed");
        abort();
    }
    // Find out size for our output buffer to be large enough for deflate() needing to be called just once.
    sal_Int64 outputMaxSize = deflateBound(&stream, blockSize);
    // add extra size for Z_SYNC_FLUSH
    outputMaxSize += 20;
    deflater->outBuffers[sequence].resize(outputMaxSize);
    sal_Int64 myInBufferStart = sequence * MaxBlockSize;
    // zlib doesn't handle const properly
    unsigned char* inBufferPtr = reinterpret_cast<unsigned char*>(
        const_cast<signed char*>(deflater->inBuffer.getConstArray()));
    if (!firstTask)
    {
        // the window size is 32k, so set last 32k of previous data as the dictionary
        assert(MAX_WBITS == 15);
        assert(MaxBlockSize >= 32768);
        if (sequence > 0)
        {
            deflateSetDictionary(&stream, inBufferPtr + myInBufferStart - 32768, 32768);
        }
        else
        {
            unsigned char* prevBufferPtr = reinterpret_cast<unsigned char*>(
                const_cast<signed char*>(deflater->prevDataBlock.getConstArray()));
            deflateSetDictionary(&stream, prevBufferPtr + MaxBlockSize - 32768, 32768);
        }
    }
    stream.next_in = inBufferPtr + myInBufferStart;
    stream.avail_in = blockSize;
    stream.next_out = reinterpret_cast<unsigned char*>(deflater->outBuffers[sequence].data());
    stream.avail_out = outputMaxSize;

    // The trick is in using Z_SYNC_FLUSH instead of Z_NO_FLUSH. It will align the data at a byte boundary,
    // and since we use a raw stream, the data blocks then can be simply concatenated.
    int res = deflate(&stream, lastTask ? Z_FINISH : Z_SYNC_FLUSH);
    assert(stream.avail_in == 0); // Check that everything has been deflated.
    if (lastTask ? res == Z_STREAM_END : res == Z_OK)
    { // ok
        sal_Int64 outSize = outputMaxSize - stream.avail_out;
        deflater->outBuffers[sequence].resize(outSize);
    }
    else
    {
        SAL_WARN("package.threadeddeflate", "deflate() failed");
        abort();
    }
    deflateEnd(&stream);
}

} // namespace

/* vim:set shiftwidth=4 softtabstop=4 expandtab: */

Coverage Report

Created: 2025-07-07 10:01

Line	Count	Source (jump to first uncovered line)
1		/* -- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -- */
2		/*
3		* This file is part of the LibreOffice project.
4		*
5		* This Source Code Form is subject to the terms of the Mozilla Public
6		* License, v. 2.0. If a copy of the MPL was not distributed with this
7		* file, You can obtain one at http://mozilla.org/MPL/2.0/.
8		*
9		* This file incorporates work covered by the following license notice:
10		*
11		* Licensed to the Apache Software Foundation (ASF) under one or more
12		* contributor license agreements. See the NOTICE file distributed
13		* with this work for additional information regarding copyright
14		* ownership. The ASF licenses this file to you under the Apache
15		* License, Version 2.0 (the "License"); you may not use this file
16		* except in compliance with the License. You may obtain a copy of
17		* the License at http://www.apache.org/licenses/LICENSE-2.0 .
18		*/
19
20		#include <ThreadedDeflater.hxx>
21		#include <zlib.h>
22		#include <com/sun/star/packages/zip/ZipConstants.hpp>
23		#include <sal/log.hxx>
24
25		using namespace com::sun::star::packages::zip::ZipConstants;
26		using namespace com::sun::star;
27
28		namespace ZipUtils
29		{
30		const sal_Int64 MaxBlockSize = 128 * 1024;
31
32		// Parallel ZLIB compression using threads. The class internally splits the data into
33		// blocks and spawns ThreadPool tasks to process them independently. This is achieved
34		// in a similar way how pigz works, see comments from Mark Adler at
35		// https://stackoverflow.com/questions/30294766/how-to-use-multiple-threads-for-zlib-compression
36		// and
37		// https://stackoverflow.com/questions/30794053/how-to-use-multiple-threads-for-zlib-compression-same-input-source
38
39		// Everything here should be either read-only, or writing to distinct data, or atomic.
40
41		class ThreadedDeflater::Task : public comphelper::ThreadTask
42		{
43		z_stream stream;
44		ThreadedDeflater* deflater;
45		int sequence;
46		int blockSize;
47		bool firstTask : 1;
48		bool lastTask : 1;
49
50		public:
51		Task(ThreadedDeflater* deflater_, int sequence_, int blockSize_, bool firstTask_,
52		bool lastTask_)
53	0	: comphelper::ThreadTask(deflater_->threadTaskTag)
54	0	, stream()
55	0	, deflater(deflater_)
56	0	, sequence(sequence_)
57	0	, blockSize(blockSize_)
58	0	, firstTask(firstTask_)
59	0	, lastTask(lastTask_)
60	0	{
61	0	}
62
63		private:
64		virtual void doWork() override;
65		};
66
67		ThreadedDeflater::ThreadedDeflater(sal_Int32 nSetLevel)
68	0	: threadTaskTag(comphelper::ThreadPool::createThreadTaskTag())
69	0	, totalIn(0)
70	0	, totalOut(0)
71	0	, zlibLevel(nSetLevel)
72	0	{
73	0	}
74
75	0	ThreadedDeflater::~ThreadedDeflater() COVERITY_NOEXCEPT_FALSE { clear(); }
76
77		void ThreadedDeflater::deflateWrite(
78		const css::uno::Reference<css::io::XInputStream>& xInStream,
79		const std::function<void(const css::uno::Sequence<sal_Int8>&, sal_Int32)>& rProcessInputFunc,
80		const std::function<void(const css::uno::Sequence<sal_Int8>&, sal_Int32)>& rProcessOutputFunc)
81	0	{
82	0	sal_Int64 nThreadCount = comphelper::ThreadPool::getSharedOptimalPool().getWorkerCount();
83	0	sal_Int64 batchSize = MaxBlockSize * nThreadCount;
84	0	inBuffer.realloc(batchSize);
85	0	prevDataBlock.realloc(MaxBlockSize);
86	0	outBuffers.resize(nThreadCount);
87	0	maProcessOutputFunc = rProcessOutputFunc;
88	0	bool firstTask = true;
89
90	0	while (xInStream->available() > 0)
91	0	{
92	0	sal_Int64 inputBytes = xInStream->readBytes(inBuffer, batchSize);
93	0	rProcessInputFunc(inBuffer, inputBytes);
94	0	totalIn += inputBytes;
95	0	int sequence = 0;
96	0	bool lastBatch = xInStream->available() <= 0;
97	0	sal_Int64 bytesPending = inputBytes;
98	0	while (bytesPending > 0)
99	0	{
100	0	sal_Int64 taskSize = std::min(MaxBlockSize, bytesPending);
101	0	bytesPending -= taskSize;
102	0	bool lastTask = lastBatch && !bytesPending;
103	0	comphelper::ThreadPool::getSharedOptimalPool().pushTask(
104	0	std::make_unique<Task>(this, sequence++, taskSize, firstTask, lastTask));
105
106	0	if (firstTask)
107	0	firstTask = false;
108	0	}
109
110	0	assert(bytesPending == 0);
111
112	0	comphelper::ThreadPool::getSharedOptimalPool().waitUntilDone(threadTaskTag);
113
114	0	if (!lastBatch)
115	0	{
116	0	assert(inputBytes == batchSize);
117	0	std::copy_n(std::cbegin(inBuffer) + (batchSize - MaxBlockSize), MaxBlockSize,
118	0	prevDataBlock.getArray());
119	0	}
120
121	0	processDeflatedBuffers();
122	0	}
123	0	}
124
125		void ThreadedDeflater::processDeflatedBuffers()
126	0	{
127	0	sal_Int64 batchOutputSize = 0;
128	0	for (const auto& buffer : outBuffers)
129	0	batchOutputSize += buffer.size();
130
131	0	css::uno::Sequence<sal_Int8> outBuffer(batchOutputSize);
132
133	0	auto pos = outBuffer.getArray();
134	0	for (auto& buffer : outBuffers)
135	0	{
136	0	pos = std::copy(buffer.begin(), buffer.end(), pos);
137	0	buffer.clear();
138	0	}
139
140	0	maProcessOutputFunc(outBuffer, batchOutputSize);
141	0	totalOut += batchOutputSize;
142	0	}
143
144		void ThreadedDeflater::clear()
145	0	{
146	0	inBuffer = uno::Sequence<sal_Int8>();
147	0	outBuffers.clear();
148	0	}
149
150		#if defined Z_PREFIX
151		#define deflateInit2 z_deflateInit2
152		#define deflateBound z_deflateBound
153		#define deflateSetDictionary z_deflateSetDictionary
154		#define deflate z_deflate
155		#define deflateEnd z_deflateEnd
156		#endif
157
158		void ThreadedDeflater::Task::doWork()
159	0	{
160	0	stream.zalloc = nullptr;
161	0	stream.zfree = nullptr;
162	0	stream.opaque = nullptr;
163		// -MAX_WBITS means 32k window size and raw stream
164	0	if (deflateInit2(&stream, deflater->zlibLevel, Z_DEFLATED, -MAX_WBITS, DEF_MEM_LEVEL,
165	0	Z_DEFAULT_STRATEGY)
166	0	!= Z_OK)
167	0	{
168	0	SAL_WARN("package.threadeddeflate", "deflateInit2() failed");
169	0	abort();
170	0	}
171		// Find out size for our output buffer to be large enough for deflate() needing to be called just once.
172	0	sal_Int64 outputMaxSize = deflateBound(&stream, blockSize);
173		// add extra size for Z_SYNC_FLUSH
174	0	outputMaxSize += 20;
175	0	deflater->outBuffers[sequence].resize(outputMaxSize);
176	0	sal_Int64 myInBufferStart = sequence * MaxBlockSize;
177		// zlib doesn't handle const properly
178	0	unsigned char* inBufferPtr = reinterpret_cast<unsigned char*>(
179	0	const_cast<signed char*>(deflater->inBuffer.getConstArray()));
180	0	if (!firstTask)
181	0	{
182		// the window size is 32k, so set last 32k of previous data as the dictionary
183	0	assert(MAX_WBITS == 15);
184	0	assert(MaxBlockSize >= 32768);
185	0	if (sequence > 0)
186	0	{
187	0	deflateSetDictionary(&stream, inBufferPtr + myInBufferStart - 32768, 32768);
188	0	}
189	0	else
190	0	{
191	0	unsigned char* prevBufferPtr = reinterpret_cast<unsigned char*>(
192	0	const_cast<signed char*>(deflater->prevDataBlock.getConstArray()));
193	0	deflateSetDictionary(&stream, prevBufferPtr + MaxBlockSize - 32768, 32768);
194	0	}
195	0	}
196	0	stream.next_in = inBufferPtr + myInBufferStart;
197	0	stream.avail_in = blockSize;
198	0	stream.next_out = reinterpret_cast<unsigned char*>(deflater->outBuffers[sequence].data());
199	0	stream.avail_out = outputMaxSize;
200
201		// The trick is in using Z_SYNC_FLUSH instead of Z_NO_FLUSH. It will align the data at a byte boundary,
202		// and since we use a raw stream, the data blocks then can be simply concatenated.
203	0	int res = deflate(&stream, lastTask ? Z_FINISH : Z_SYNC_FLUSH);
204	0	assert(stream.avail_in == 0); // Check that everything has been deflated.
205	0	if (lastTask ? res == Z_STREAM_END : res == Z_OK)
206	0	{ // ok
207	0	sal_Int64 outSize = outputMaxSize - stream.avail_out;
208	0	deflater->outBuffers[sequence].resize(outSize);
209	0	}
210	0	else
211	0	{
212	0	SAL_WARN("package.threadeddeflate", "deflate() failed");
213	0	abort();
214	0	}
215	0	deflateEnd(&stream);
216	0	}
217
218		} // namespace
219
220		/* vim:set shiftwidth=4 softtabstop=4 expandtab: */