/src/libreoffice/package/source/zipapi/ThreadedDeflater.cxx
Line | Count | Source (jump to first uncovered line) |
1 | | /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ |
2 | | /* |
3 | | * This file is part of the LibreOffice project. |
4 | | * |
5 | | * This Source Code Form is subject to the terms of the Mozilla Public |
6 | | * License, v. 2.0. If a copy of the MPL was not distributed with this |
7 | | * file, You can obtain one at http://mozilla.org/MPL/2.0/. |
8 | | * |
9 | | * This file incorporates work covered by the following license notice: |
10 | | * |
11 | | * Licensed to the Apache Software Foundation (ASF) under one or more |
12 | | * contributor license agreements. See the NOTICE file distributed |
13 | | * with this work for additional information regarding copyright |
14 | | * ownership. The ASF licenses this file to you under the Apache |
15 | | * License, Version 2.0 (the "License"); you may not use this file |
16 | | * except in compliance with the License. You may obtain a copy of |
17 | | * the License at http://www.apache.org/licenses/LICENSE-2.0 . |
18 | | */ |
19 | | |
20 | | #include <ThreadedDeflater.hxx> |
21 | | #include <zlib.h> |
22 | | #include <com/sun/star/packages/zip/ZipConstants.hpp> |
23 | | #include <sal/log.hxx> |
24 | | |
25 | | using namespace com::sun::star::packages::zip::ZipConstants; |
26 | | using namespace com::sun::star; |
27 | | |
28 | | namespace ZipUtils |
29 | | { |
30 | | const sal_Int64 MaxBlockSize = 128 * 1024; |
31 | | |
32 | | // Parallel ZLIB compression using threads. The class internally splits the data into |
33 | | // blocks and spawns ThreadPool tasks to process them independently. This is achieved |
34 | | // in a similar way how pigz works, see comments from Mark Adler at |
35 | | // https://stackoverflow.com/questions/30294766/how-to-use-multiple-threads-for-zlib-compression |
36 | | // and |
37 | | // https://stackoverflow.com/questions/30794053/how-to-use-multiple-threads-for-zlib-compression-same-input-source |
38 | | |
39 | | // Everything here should be either read-only, or writing to distinct data, or atomic. |
40 | | |
41 | | class ThreadedDeflater::Task : public comphelper::ThreadTask |
42 | | { |
43 | | z_stream stream; |
44 | | ThreadedDeflater* deflater; |
45 | | int sequence; |
46 | | int blockSize; |
47 | | bool firstTask : 1; |
48 | | bool lastTask : 1; |
49 | | |
50 | | public: |
51 | | Task(ThreadedDeflater* deflater_, int sequence_, int blockSize_, bool firstTask_, |
52 | | bool lastTask_) |
53 | 0 | : comphelper::ThreadTask(deflater_->threadTaskTag) |
54 | 0 | , stream() |
55 | 0 | , deflater(deflater_) |
56 | 0 | , sequence(sequence_) |
57 | 0 | , blockSize(blockSize_) |
58 | 0 | , firstTask(firstTask_) |
59 | 0 | , lastTask(lastTask_) |
60 | 0 | { |
61 | 0 | } |
62 | | |
63 | | private: |
64 | | virtual void doWork() override; |
65 | | }; |
66 | | |
67 | | ThreadedDeflater::ThreadedDeflater(sal_Int32 nSetLevel) |
68 | 0 | : threadTaskTag(comphelper::ThreadPool::createThreadTaskTag()) |
69 | 0 | , totalIn(0) |
70 | 0 | , totalOut(0) |
71 | 0 | , zlibLevel(nSetLevel) |
72 | 0 | { |
73 | 0 | } |
74 | | |
75 | 0 | ThreadedDeflater::~ThreadedDeflater() COVERITY_NOEXCEPT_FALSE { clear(); } |
76 | | |
77 | | void ThreadedDeflater::deflateWrite( |
78 | | const css::uno::Reference<css::io::XInputStream>& xInStream, |
79 | | const std::function<void(const css::uno::Sequence<sal_Int8>&, sal_Int32)>& rProcessInputFunc, |
80 | | const std::function<void(const css::uno::Sequence<sal_Int8>&, sal_Int32)>& rProcessOutputFunc) |
81 | 0 | { |
82 | 0 | sal_Int64 nThreadCount = comphelper::ThreadPool::getSharedOptimalPool().getWorkerCount(); |
83 | 0 | sal_Int64 batchSize = MaxBlockSize * nThreadCount; |
84 | 0 | inBuffer.realloc(batchSize); |
85 | 0 | prevDataBlock.realloc(MaxBlockSize); |
86 | 0 | outBuffers.resize(nThreadCount); |
87 | 0 | maProcessOutputFunc = rProcessOutputFunc; |
88 | 0 | bool firstTask = true; |
89 | |
|
90 | 0 | while (xInStream->available() > 0) |
91 | 0 | { |
92 | 0 | sal_Int64 inputBytes = xInStream->readBytes(inBuffer, batchSize); |
93 | 0 | rProcessInputFunc(inBuffer, inputBytes); |
94 | 0 | totalIn += inputBytes; |
95 | 0 | int sequence = 0; |
96 | 0 | bool lastBatch = xInStream->available() <= 0; |
97 | 0 | sal_Int64 bytesPending = inputBytes; |
98 | 0 | while (bytesPending > 0) |
99 | 0 | { |
100 | 0 | sal_Int64 taskSize = std::min(MaxBlockSize, bytesPending); |
101 | 0 | bytesPending -= taskSize; |
102 | 0 | bool lastTask = lastBatch && !bytesPending; |
103 | 0 | comphelper::ThreadPool::getSharedOptimalPool().pushTask( |
104 | 0 | std::make_unique<Task>(this, sequence++, taskSize, firstTask, lastTask)); |
105 | |
|
106 | 0 | if (firstTask) |
107 | 0 | firstTask = false; |
108 | 0 | } |
109 | |
|
110 | 0 | assert(bytesPending == 0); |
111 | |
|
112 | 0 | comphelper::ThreadPool::getSharedOptimalPool().waitUntilDone(threadTaskTag); |
113 | |
|
114 | 0 | if (!lastBatch) |
115 | 0 | { |
116 | 0 | assert(inputBytes == batchSize); |
117 | 0 | std::copy_n(std::cbegin(inBuffer) + (batchSize - MaxBlockSize), MaxBlockSize, |
118 | 0 | prevDataBlock.getArray()); |
119 | 0 | } |
120 | |
|
121 | 0 | processDeflatedBuffers(); |
122 | 0 | } |
123 | 0 | } |
124 | | |
125 | | void ThreadedDeflater::processDeflatedBuffers() |
126 | 0 | { |
127 | 0 | sal_Int64 batchOutputSize = 0; |
128 | 0 | for (const auto& buffer : outBuffers) |
129 | 0 | batchOutputSize += buffer.size(); |
130 | |
|
131 | 0 | css::uno::Sequence<sal_Int8> outBuffer(batchOutputSize); |
132 | |
|
133 | 0 | auto pos = outBuffer.getArray(); |
134 | 0 | for (auto& buffer : outBuffers) |
135 | 0 | { |
136 | 0 | pos = std::copy(buffer.begin(), buffer.end(), pos); |
137 | 0 | buffer.clear(); |
138 | 0 | } |
139 | |
|
140 | 0 | maProcessOutputFunc(outBuffer, batchOutputSize); |
141 | 0 | totalOut += batchOutputSize; |
142 | 0 | } |
143 | | |
144 | | void ThreadedDeflater::clear() |
145 | 0 | { |
146 | 0 | inBuffer = uno::Sequence<sal_Int8>(); |
147 | 0 | outBuffers.clear(); |
148 | 0 | } |
149 | | |
150 | | #if defined Z_PREFIX |
151 | | #define deflateInit2 z_deflateInit2 |
152 | | #define deflateBound z_deflateBound |
153 | | #define deflateSetDictionary z_deflateSetDictionary |
154 | | #define deflate z_deflate |
155 | | #define deflateEnd z_deflateEnd |
156 | | #endif |
157 | | |
158 | | void ThreadedDeflater::Task::doWork() |
159 | 0 | { |
160 | 0 | stream.zalloc = nullptr; |
161 | 0 | stream.zfree = nullptr; |
162 | 0 | stream.opaque = nullptr; |
163 | | // -MAX_WBITS means 32k window size and raw stream |
164 | 0 | if (deflateInit2(&stream, deflater->zlibLevel, Z_DEFLATED, -MAX_WBITS, DEF_MEM_LEVEL, |
165 | 0 | Z_DEFAULT_STRATEGY) |
166 | 0 | != Z_OK) |
167 | 0 | { |
168 | 0 | SAL_WARN("package.threadeddeflate", "deflateInit2() failed"); |
169 | 0 | abort(); |
170 | 0 | } |
171 | | // Find out size for our output buffer to be large enough for deflate() needing to be called just once. |
172 | 0 | sal_Int64 outputMaxSize = deflateBound(&stream, blockSize); |
173 | | // add extra size for Z_SYNC_FLUSH |
174 | 0 | outputMaxSize += 20; |
175 | 0 | deflater->outBuffers[sequence].resize(outputMaxSize); |
176 | 0 | sal_Int64 myInBufferStart = sequence * MaxBlockSize; |
177 | | // zlib doesn't handle const properly |
178 | 0 | unsigned char* inBufferPtr = reinterpret_cast<unsigned char*>( |
179 | 0 | const_cast<signed char*>(deflater->inBuffer.getConstArray())); |
180 | 0 | if (!firstTask) |
181 | 0 | { |
182 | | // the window size is 32k, so set last 32k of previous data as the dictionary |
183 | 0 | assert(MAX_WBITS == 15); |
184 | 0 | assert(MaxBlockSize >= 32768); |
185 | 0 | if (sequence > 0) |
186 | 0 | { |
187 | 0 | deflateSetDictionary(&stream, inBufferPtr + myInBufferStart - 32768, 32768); |
188 | 0 | } |
189 | 0 | else |
190 | 0 | { |
191 | 0 | unsigned char* prevBufferPtr = reinterpret_cast<unsigned char*>( |
192 | 0 | const_cast<signed char*>(deflater->prevDataBlock.getConstArray())); |
193 | 0 | deflateSetDictionary(&stream, prevBufferPtr + MaxBlockSize - 32768, 32768); |
194 | 0 | } |
195 | 0 | } |
196 | 0 | stream.next_in = inBufferPtr + myInBufferStart; |
197 | 0 | stream.avail_in = blockSize; |
198 | 0 | stream.next_out = reinterpret_cast<unsigned char*>(deflater->outBuffers[sequence].data()); |
199 | 0 | stream.avail_out = outputMaxSize; |
200 | | |
201 | | // The trick is in using Z_SYNC_FLUSH instead of Z_NO_FLUSH. It will align the data at a byte boundary, |
202 | | // and since we use a raw stream, the data blocks then can be simply concatenated. |
203 | 0 | int res = deflate(&stream, lastTask ? Z_FINISH : Z_SYNC_FLUSH); |
204 | 0 | assert(stream.avail_in == 0); // Check that everything has been deflated. |
205 | 0 | if (lastTask ? res == Z_STREAM_END : res == Z_OK) |
206 | 0 | { // ok |
207 | 0 | sal_Int64 outSize = outputMaxSize - stream.avail_out; |
208 | 0 | deflater->outBuffers[sequence].resize(outSize); |
209 | 0 | } |
210 | 0 | else |
211 | 0 | { |
212 | 0 | SAL_WARN("package.threadeddeflate", "deflate() failed"); |
213 | 0 | abort(); |
214 | 0 | } |
215 | 0 | deflateEnd(&stream); |
216 | 0 | } |
217 | | |
218 | | } // namespace |
219 | | |
220 | | /* vim:set shiftwidth=4 softtabstop=4 expandtab: */ |