Coverage Report

Created: 2025-12-31 10:39

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/libreoffice/include/comphelper/parallelsort.hxx
Line
Count
Source
1
/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2
/*
3
 * This file is part of the LibreOffice project.
4
 *
5
 * This Source Code Form is subject to the terms of the Mozilla Public
6
 * License, v. 2.0. If a copy of the MPL was not distributed with this
7
 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8
 */
9
10
#ifndef INCLUDED_COMPHELPER_PARALLELSORT_HXX
11
#define INCLUDED_COMPHELPER_PARALLELSORT_HXX
12
13
#include <comphelper/threadpool.hxx>
14
#include <tools/cpuid.hxx>
15
16
#include <memory>
17
#include <iterator>
18
#include <thread>
19
#include <algorithm>
20
#include <cmath>
21
#include <random>
22
#include <functional>
23
#include <iostream>
24
#include <chrono>
25
26
namespace comphelper
27
{
28
const size_t nThreadCountGlobal = std::thread::hardware_concurrency();
29
const bool bHyperThreadingActive = cpuid::hasHyperThreading();
30
static comphelper::ThreadPool& rTPool(comphelper::ThreadPool::getSharedOptimalPool());
31
32
static thread_local std::mt19937 aGenerator{ std::random_device{}() };
33
34
#define PARALLELSORT_ENABLEPZ 0
35
36
namespace
37
{
38
class ProfileZone
39
{
40
public:
41
#if PARALLELSORT_ENABLEPZ
42
    ProfileZone(const char* pTag)
43
        : maTag(pTag)
44
        , maStart(std::chrono::steady_clock::now())
45
        , mbFinished(false)
46
    {
47
    }
48
49
    ~ProfileZone()
50
    {
51
        if (!mbFinished)
52
            showTimeElapsed();
53
    }
54
55
    void stop()
56
    {
57
        showTimeElapsed();
58
        mbFinished = true;
59
    }
60
#else
61
    ProfileZone(const char* /*pTag*/)
62
8.96k
        : mbDummy(true)
63
8.96k
    {
64
8.96k
    }
65
66
    void stop()
67
1.47k
    {
68
        // Avoid loplugin:staticmethods, loplugin:staticaccess errors
69
1.47k
        (void)mbDummy;
70
1.47k
    }
71
#endif
72
73
private:
74
#if PARALLELSORT_ENABLEPZ
75
76
    void showTimeElapsed()
77
    {
78
        auto end = std::chrono::steady_clock::now();
79
        size_t elapsed
80
            = std::chrono::duration_cast<std::chrono::milliseconds>(end - maStart).count();
81
        std::cout << maTag << " : " << elapsed << " ms" << std::endl << std::flush;
82
    }
83
84
    std::string maTag;
85
    std::chrono::steady_clock::time_point maStart;
86
    bool mbFinished;
87
#else
88
    bool mbDummy;
89
90
#endif
91
};
92
93
class ParallelRunner
94
{
95
    class Executor final : public comphelper::ThreadTask
96
    {
97
    public:
98
        Executor(const std::shared_ptr<comphelper::ThreadTaskTag>& rTag,
99
                 std::function<void()> aFunc)
100
13.4k
            : comphelper::ThreadTask(rTag)
101
13.4k
            , maFunc(std::move(aFunc))
102
13.4k
        {
103
13.4k
        }
104
105
13.3k
        virtual void doWork() override { maFunc(); }
106
107
    private:
108
        const std::function<void()> maFunc;
109
    };
110
111
public:
112
420
    ParallelRunner() { maTag = comphelper::ThreadPool::createThreadTaskTag(); }
113
114
    void enqueue(std::function<void()> aFunc)
115
13.4k
    {
116
13.4k
        rTPool.pushTask(std::make_unique<Executor>(maTag, aFunc));
117
13.4k
    }
118
119
420
    void wait() { rTPool.waitUntilDone(maTag, false); }
120
121
private:
122
    std::shared_ptr<comphelper::ThreadTaskTag> maTag;
123
};
124
125
constexpr size_t nMaxTreeArraySize = 64;
126
127
size_t lcl_tree_array_size(size_t nNum)
128
840
{
129
840
    size_t nPow2;
130
5.88k
    for (nPow2 = 1; nPow2 <= nNum; nPow2 <<= 1)
131
5.04k
        ;
132
840
    return std::clamp((nPow2 >> 1), size_t(1), nMaxTreeArraySize);
133
840
}
134
135
template <class RandItr> struct Sampler
136
{
137
    using ValueType = typename std::iterator_traits<RandItr>::value_type;
138
139
    static void sample(RandItr aBegin, RandItr aEnd, ValueType* pSamples, size_t nSamples,
140
                       size_t /*nParallelism*/)
141
630
    {
142
630
        ProfileZone aZone("\tsample()");
143
630
        assert(aBegin <= aEnd);
144
630
        size_t nLen = static_cast<std::size_t>(aEnd - aBegin);
145
630
        assert(std::mt19937::max() >= nLen);
146
147
2.56M
        for (size_t nIdx = 0; nIdx < nSamples; ++nIdx)
148
2.56M
        {
149
2.56M
            size_t nSel = aGenerator() % nLen--;
150
2.56M
            using namespace std;
151
2.56M
            swap(*(aBegin + nSel), *(aBegin + nLen));
152
2.56M
            pSamples[nIdx] = *(aBegin + nLen);
153
2.56M
        }
154
630
    }
155
};
156
157
template <class RandItr, class Compare> class Binner
158
{
159
    using ValueType = typename std::iterator_traits<RandItr>::value_type;
160
161
    const size_t mnTreeArraySize;
162
    const size_t mnDividers;
163
    constexpr static size_t mnMaxStaticSize = 1024 * 50;
164
    uint8_t maLabels[mnMaxStaticSize];
165
    ValueType maDividers[nMaxTreeArraySize];
166
    std::unique_ptr<uint8_t[]> pLabels;
167
    size_t maSepBinEnds[nMaxTreeArraySize * nMaxTreeArraySize];
168
    bool mbThreaded;
169
170
public:
171
    size_t maBinEnds[nMaxTreeArraySize];
172
173
    Binner(const ValueType* pSamples, size_t nSamples, size_t nBins, bool bThreaded)
174
210
        : mnTreeArraySize(lcl_tree_array_size(nBins))
175
210
        , mnDividers(mnTreeArraySize - 1)
176
210
        , mbThreaded(bThreaded)
177
210
    {
178
210
        assert((nSamples % mnTreeArraySize) == 0);
179
210
        assert(mnTreeArraySize <= nMaxTreeArraySize);
180
210
        std::fill(maBinEnds, maBinEnds + mnTreeArraySize, 0);
181
210
        std::fill(maSepBinEnds, maSepBinEnds + mnTreeArraySize * mnTreeArraySize, 0);
182
210
        fillTreeArray(1, pSamples, pSamples + nSamples);
183
210
    }
Unexecuted instantiation: dpcache.cxx:comphelper::(anonymous namespace)::Binner<std::__1::__wrap_iter<(anonymous namespace)::Bucket*>, (anonymous namespace)::LessByValue>::Binner((anonymous namespace)::Bucket const*, unsigned long, unsigned long, bool)
dpcache.cxx:comphelper::(anonymous namespace)::Binner<std::__1::__wrap_iter<(anonymous namespace)::Bucket*>, (anonymous namespace)::LessByDataIndex>::Binner((anonymous namespace)::Bucket const*, unsigned long, unsigned long, bool)
Line
Count
Source
174
210
        : mnTreeArraySize(lcl_tree_array_size(nBins))
175
210
        , mnDividers(mnTreeArraySize - 1)
176
210
        , mbThreaded(bThreaded)
177
210
    {
178
210
        assert((nSamples % mnTreeArraySize) == 0);
179
        assert(mnTreeArraySize <= nMaxTreeArraySize);
180
210
        std::fill(maBinEnds, maBinEnds + mnTreeArraySize, 0);
181
210
        std::fill(maSepBinEnds, maSepBinEnds + mnTreeArraySize * mnTreeArraySize, 0);
182
210
        fillTreeArray(1, pSamples, pSamples + nSamples);
183
210
    }
Unexecuted instantiation: dpcache.cxx:comphelper::(anonymous namespace)::Binner<std::__1::__wrap_iter<(anonymous namespace)::Bucket*>, (anonymous namespace)::LessByOrderIndex>::Binner((anonymous namespace)::Bucket const*, unsigned long, unsigned long, bool)
184
185
    void fillTreeArray(size_t nPos, const ValueType* pLow, const ValueType* pHigh)
186
6.51k
    {
187
6.51k
        assert(pLow <= pHigh);
188
6.51k
        const ValueType* pMid = pLow + (pHigh - pLow) / 2;
189
6.51k
        maDividers[nPos] = *pMid;
190
191
6.51k
        if (2 * nPos < mnDividers) // So that 2*nPos < mnTreeArraySize
192
3.15k
        {
193
3.15k
            fillTreeArray(2 * nPos, pLow, pMid);
194
3.15k
            fillTreeArray(2 * nPos + 1, pMid + 1, pHigh);
195
3.15k
        }
196
6.51k
    }
Unexecuted instantiation: dpcache.cxx:comphelper::(anonymous namespace)::Binner<std::__1::__wrap_iter<(anonymous namespace)::Bucket*>, (anonymous namespace)::LessByValue>::fillTreeArray(unsigned long, (anonymous namespace)::Bucket const*, (anonymous namespace)::Bucket const*)
dpcache.cxx:comphelper::(anonymous namespace)::Binner<std::__1::__wrap_iter<(anonymous namespace)::Bucket*>, (anonymous namespace)::LessByDataIndex>::fillTreeArray(unsigned long, (anonymous namespace)::Bucket const*, (anonymous namespace)::Bucket const*)
Line
Count
Source
186
6.51k
    {
187
6.51k
        assert(pLow <= pHigh);
188
6.51k
        const ValueType* pMid = pLow + (pHigh - pLow) / 2;
189
6.51k
        maDividers[nPos] = *pMid;
190
191
6.51k
        if (2 * nPos < mnDividers) // So that 2*nPos < mnTreeArraySize
192
3.15k
        {
193
3.15k
            fillTreeArray(2 * nPos, pLow, pMid);
194
3.15k
            fillTreeArray(2 * nPos + 1, pMid + 1, pHigh);
195
3.15k
        }
196
6.51k
    }
Unexecuted instantiation: dpcache.cxx:comphelper::(anonymous namespace)::Binner<std::__1::__wrap_iter<(anonymous namespace)::Bucket*>, (anonymous namespace)::LessByOrderIndex>::fillTreeArray(unsigned long, (anonymous namespace)::Bucket const*, (anonymous namespace)::Bucket const*)
197
198
    constexpr inline size_t findBin(const ValueType& rVal, Compare& aComp)
199
15.5M
    {
200
15.5M
        size_t nIdx = 1;
201
88.9M
        while (nIdx <= mnDividers)
202
73.3M
            nIdx = ((nIdx << 1) + aComp(maDividers[nIdx], rVal));
203
15.5M
        return (nIdx - mnTreeArraySize);
204
15.5M
    }
Unexecuted instantiation: dpcache.cxx:comphelper::(anonymous namespace)::Binner<std::__1::__wrap_iter<(anonymous namespace)::Bucket*>, (anonymous namespace)::LessByValue>::findBin((anonymous namespace)::Bucket const&, (anonymous namespace)::LessByValue&)
dpcache.cxx:comphelper::(anonymous namespace)::Binner<std::__1::__wrap_iter<(anonymous namespace)::Bucket*>, (anonymous namespace)::LessByDataIndex>::findBin((anonymous namespace)::Bucket const&, (anonymous namespace)::LessByDataIndex&)
Line
Count
Source
199
15.5M
    {
200
15.5M
        size_t nIdx = 1;
201
88.9M
        while (nIdx <= mnDividers)
202
73.3M
            nIdx = ((nIdx << 1) + aComp(maDividers[nIdx], rVal));
203
15.5M
        return (nIdx - mnTreeArraySize);
204
15.5M
    }
Unexecuted instantiation: dpcache.cxx:comphelper::(anonymous namespace)::Binner<std::__1::__wrap_iter<(anonymous namespace)::Bucket*>, (anonymous namespace)::LessByOrderIndex>::findBin((anonymous namespace)::Bucket const&, (anonymous namespace)::LessByOrderIndex&)
205
206
    void label(const RandItr aBegin, const RandItr aEnd, Compare& aComp)
207
210
    {
208
210
        ProfileZone aZoneSetup("\tlabel():setup");
209
210
        size_t nLen = static_cast<std::size_t>(aEnd - aBegin);
210
210
        if (nLen > mnMaxStaticSize)
211
210
            pLabels = std::make_unique<uint8_t[]>(nLen);
212
210
        uint8_t* pLabelsRaw = (nLen > mnMaxStaticSize) ? pLabels.get() : maLabels;
213
210
        aZoneSetup.stop();
214
210
        ProfileZone aZoneFindBins("\tFindBins()");
215
210
        if (mbThreaded)
216
210
        {
217
210
            ParallelRunner aPRunner;
218
210
            const size_t nBins = mnTreeArraySize;
219
6.93k
            for (size_t nTIdx = 0; nTIdx < nBins; ++nTIdx)
220
6.72k
            {
221
6.72k
                aPRunner.enqueue([this, nTIdx, nBins, nLen, aBegin, pLabelsRaw, &aComp] {
222
6.65k
                    ProfileZone aZoneIn("\t\tFindBinsThreaded()");
223
6.65k
                    size_t nBinEndsStartIdx = nTIdx * mnTreeArraySize;
224
6.65k
                    size_t* pBinEnds = maSepBinEnds + nBinEndsStartIdx;
225
6.65k
                    size_t aBinEndsF[nMaxTreeArraySize] = { 0 };
226
15.5M
                    for (size_t nIdx = nTIdx; nIdx < nLen; nIdx += nBins)
227
15.5M
                    {
228
15.5M
                        size_t nBinIdx = findBin(*(aBegin + nIdx), aComp);
229
15.5M
                        pLabelsRaw[nIdx] = static_cast<uint8_t>(nBinIdx);
230
15.5M
                        ++aBinEndsF[nBinIdx];
231
15.5M
                    }
232
233
213k
                    for (size_t nIdx = 0; nIdx < mnTreeArraySize; ++nIdx)
234
207k
                        pBinEnds[nIdx] = aBinEndsF[nIdx];
235
6.65k
                });
Unexecuted instantiation: dpcache.cxx:comphelper::(anonymous namespace)::Binner<std::__1::__wrap_iter<(anonymous namespace)::Bucket*>, (anonymous namespace)::LessByValue>::label(std::__1::__wrap_iter<(anonymous namespace)::Bucket*>, std::__1::__wrap_iter<(anonymous namespace)::Bucket*>, (anonymous namespace)::LessByValue&)::{lambda()#1}::operator()() const
dpcache.cxx:comphelper::(anonymous namespace)::Binner<std::__1::__wrap_iter<(anonymous namespace)::Bucket*>, (anonymous namespace)::LessByDataIndex>::label(std::__1::__wrap_iter<(anonymous namespace)::Bucket*>, std::__1::__wrap_iter<(anonymous namespace)::Bucket*>, (anonymous namespace)::LessByDataIndex&)::{lambda()#1}::operator()() const
Line
Count
Source
221
6.65k
                aPRunner.enqueue([this, nTIdx, nBins, nLen, aBegin, pLabelsRaw, &aComp] {
222
6.65k
                    ProfileZone aZoneIn("\t\tFindBinsThreaded()");
223
6.65k
                    size_t nBinEndsStartIdx = nTIdx * mnTreeArraySize;
224
6.65k
                    size_t* pBinEnds = maSepBinEnds + nBinEndsStartIdx;
225
6.65k
                    size_t aBinEndsF[nMaxTreeArraySize] = { 0 };
226
15.5M
                    for (size_t nIdx = nTIdx; nIdx < nLen; nIdx += nBins)
227
15.5M
                    {
228
15.5M
                        size_t nBinIdx = findBin(*(aBegin + nIdx), aComp);
229
15.5M
                        pLabelsRaw[nIdx] = static_cast<uint8_t>(nBinIdx);
230
15.5M
                        ++aBinEndsF[nBinIdx];
231
15.5M
                    }
232
233
213k
                    for (size_t nIdx = 0; nIdx < mnTreeArraySize; ++nIdx)
234
207k
                        pBinEnds[nIdx] = aBinEndsF[nIdx];
235
6.65k
                });
Unexecuted instantiation: dpcache.cxx:comphelper::(anonymous namespace)::Binner<std::__1::__wrap_iter<(anonymous namespace)::Bucket*>, (anonymous namespace)::LessByOrderIndex>::label(std::__1::__wrap_iter<(anonymous namespace)::Bucket*>, std::__1::__wrap_iter<(anonymous namespace)::Bucket*>, (anonymous namespace)::LessByOrderIndex&)::{lambda()#1}::operator()() const
236
6.72k
            }
237
238
210
            aPRunner.wait();
239
240
            // Populate maBinEnds from maSepBinEnds
241
6.93k
            for (size_t nTIdx = 0; nTIdx < mnTreeArraySize; ++nTIdx)
242
6.72k
            {
243
221k
                for (size_t nSepIdx = 0; nSepIdx < mnTreeArraySize; ++nSepIdx)
244
215k
                    maBinEnds[nTIdx] += maSepBinEnds[nSepIdx * mnTreeArraySize + nTIdx];
245
6.72k
            }
246
210
        }
247
0
        else
248
0
        {
249
0
            uint8_t* pLabel = pLabelsRaw;
250
0
            for (RandItr aItr = aBegin; aItr != aEnd; ++aItr)
251
0
            {
252
0
                size_t nBinIdx = findBin(*aItr, aComp);
253
0
                *pLabel++ = nBinIdx;
254
0
                ++maBinEnds[nBinIdx];
255
0
            }
256
0
        }
257
258
210
        aZoneFindBins.stop();
259
260
210
        size_t nSum = 0;
261
        // Store each bin's starting position in maBinEnds array for now.
262
6.93k
        for (size_t nIdx = 0; nIdx < mnTreeArraySize; ++nIdx)
263
6.72k
        {
264
6.72k
            size_t nSize = maBinEnds[nIdx];
265
6.72k
            maBinEnds[nIdx] = nSum;
266
6.72k
            nSum += nSize;
267
6.72k
        }
268
269
        // Now maBinEnds has end positions of each bin.
270
210
    }
Unexecuted instantiation: dpcache.cxx:comphelper::(anonymous namespace)::Binner<std::__1::__wrap_iter<(anonymous namespace)::Bucket*>, (anonymous namespace)::LessByValue>::label(std::__1::__wrap_iter<(anonymous namespace)::Bucket*>, std::__1::__wrap_iter<(anonymous namespace)::Bucket*>, (anonymous namespace)::LessByValue&)
dpcache.cxx:comphelper::(anonymous namespace)::Binner<std::__1::__wrap_iter<(anonymous namespace)::Bucket*>, (anonymous namespace)::LessByDataIndex>::label(std::__1::__wrap_iter<(anonymous namespace)::Bucket*>, std::__1::__wrap_iter<(anonymous namespace)::Bucket*>, (anonymous namespace)::LessByDataIndex&)
Line
Count
Source
207
210
    {
208
210
        ProfileZone aZoneSetup("\tlabel():setup");
209
210
        size_t nLen = static_cast<std::size_t>(aEnd - aBegin);
210
210
        if (nLen > mnMaxStaticSize)
211
210
            pLabels = std::make_unique<uint8_t[]>(nLen);
212
210
        uint8_t* pLabelsRaw = (nLen > mnMaxStaticSize) ? pLabels.get() : maLabels;
213
210
        aZoneSetup.stop();
214
210
        ProfileZone aZoneFindBins("\tFindBins()");
215
210
        if (mbThreaded)
216
210
        {
217
210
            ParallelRunner aPRunner;
218
210
            const size_t nBins = mnTreeArraySize;
219
6.93k
            for (size_t nTIdx = 0; nTIdx < nBins; ++nTIdx)
220
6.72k
            {
221
6.72k
                aPRunner.enqueue([this, nTIdx, nBins, nLen, aBegin, pLabelsRaw, &aComp] {
222
6.72k
                    ProfileZone aZoneIn("\t\tFindBinsThreaded()");
223
6.72k
                    size_t nBinEndsStartIdx = nTIdx * mnTreeArraySize;
224
6.72k
                    size_t* pBinEnds = maSepBinEnds + nBinEndsStartIdx;
225
6.72k
                    size_t aBinEndsF[nMaxTreeArraySize] = { 0 };
226
6.72k
                    for (size_t nIdx = nTIdx; nIdx < nLen; nIdx += nBins)
227
6.72k
                    {
228
6.72k
                        size_t nBinIdx = findBin(*(aBegin + nIdx), aComp);
229
6.72k
                        pLabelsRaw[nIdx] = static_cast<uint8_t>(nBinIdx);
230
6.72k
                        ++aBinEndsF[nBinIdx];
231
6.72k
                    }
232
233
6.72k
                    for (size_t nIdx = 0; nIdx < mnTreeArraySize; ++nIdx)
234
6.72k
                        pBinEnds[nIdx] = aBinEndsF[nIdx];
235
6.72k
                });
236
6.72k
            }
237
238
210
            aPRunner.wait();
239
240
            // Populate maBinEnds from maSepBinEnds
241
6.93k
            for (size_t nTIdx = 0; nTIdx < mnTreeArraySize; ++nTIdx)
242
6.72k
            {
243
221k
                for (size_t nSepIdx = 0; nSepIdx < mnTreeArraySize; ++nSepIdx)
244
215k
                    maBinEnds[nTIdx] += maSepBinEnds[nSepIdx * mnTreeArraySize + nTIdx];
245
6.72k
            }
246
210
        }
247
0
        else
248
0
        {
249
0
            uint8_t* pLabel = pLabelsRaw;
250
0
            for (RandItr aItr = aBegin; aItr != aEnd; ++aItr)
251
0
            {
252
0
                size_t nBinIdx = findBin(*aItr, aComp);
253
0
                *pLabel++ = nBinIdx;
254
0
                ++maBinEnds[nBinIdx];
255
0
            }
256
0
        }
257
258
210
        aZoneFindBins.stop();
259
260
210
        size_t nSum = 0;
261
        // Store each bin's starting position in maBinEnds array for now.
262
6.93k
        for (size_t nIdx = 0; nIdx < mnTreeArraySize; ++nIdx)
263
6.72k
        {
264
6.72k
            size_t nSize = maBinEnds[nIdx];
265
6.72k
            maBinEnds[nIdx] = nSum;
266
6.72k
            nSum += nSize;
267
6.72k
        }
268
269
        // Now maBinEnds has end positions of each bin.
270
210
    }
Unexecuted instantiation: dpcache.cxx:comphelper::(anonymous namespace)::Binner<std::__1::__wrap_iter<(anonymous namespace)::Bucket*>, (anonymous namespace)::LessByOrderIndex>::label(std::__1::__wrap_iter<(anonymous namespace)::Bucket*>, std::__1::__wrap_iter<(anonymous namespace)::Bucket*>, (anonymous namespace)::LessByOrderIndex&)
271
272
    void bin(const RandItr aBegin, const RandItr aEnd, ValueType* pOut)
273
210
    {
274
210
        ProfileZone aZone("\tbin()");
275
210
        const size_t nLen = static_cast<std::size_t>(aEnd - aBegin);
276
210
        uint8_t* pLabelsRaw = (nLen > mnMaxStaticSize) ? pLabels.get() : maLabels;
277
210
        size_t nIdx;
278
220M
        for (nIdx = 0; nIdx < nLen; ++nIdx)
279
220M
        {
280
220M
            pOut[maBinEnds[pLabelsRaw[nIdx]]++] = *(aBegin + nIdx);
281
220M
        }
282
210
    }
Unexecuted instantiation: dpcache.cxx:comphelper::(anonymous namespace)::Binner<std::__1::__wrap_iter<(anonymous namespace)::Bucket*>, (anonymous namespace)::LessByValue>::bin(std::__1::__wrap_iter<(anonymous namespace)::Bucket*>, std::__1::__wrap_iter<(anonymous namespace)::Bucket*>, (anonymous namespace)::Bucket*)
dpcache.cxx:comphelper::(anonymous namespace)::Binner<std::__1::__wrap_iter<(anonymous namespace)::Bucket*>, (anonymous namespace)::LessByDataIndex>::bin(std::__1::__wrap_iter<(anonymous namespace)::Bucket*>, std::__1::__wrap_iter<(anonymous namespace)::Bucket*>, (anonymous namespace)::Bucket*)
Line
Count
Source
273
210
    {
274
210
        ProfileZone aZone("\tbin()");
275
210
        const size_t nLen = static_cast<std::size_t>(aEnd - aBegin);
276
210
        uint8_t* pLabelsRaw = (nLen > mnMaxStaticSize) ? pLabels.get() : maLabels;
277
210
        size_t nIdx;
278
220M
        for (nIdx = 0; nIdx < nLen; ++nIdx)
279
220M
        {
280
220M
            pOut[maBinEnds[pLabelsRaw[nIdx]]++] = *(aBegin + nIdx);
281
220M
        }
282
210
    }
Unexecuted instantiation: dpcache.cxx:comphelper::(anonymous namespace)::Binner<std::__1::__wrap_iter<(anonymous namespace)::Bucket*>, (anonymous namespace)::LessByOrderIndex>::bin(std::__1::__wrap_iter<(anonymous namespace)::Bucket*>, std::__1::__wrap_iter<(anonymous namespace)::Bucket*>, (anonymous namespace)::Bucket*)
283
};
284
285
template <class RandItr, class Compare = std::less<>>
286
void s3sort(const RandItr aBegin, const RandItr aEnd, Compare aComp = Compare(),
287
            bool bThreaded = true)
288
3.17k
{
289
3.17k
    static size_t nThreadCount = nThreadCountGlobal;
290
291
3.17k
    constexpr size_t nBaseCaseSize = 1024;
292
3.17k
    const std::size_t nLen = static_cast<std::size_t>(aEnd - aBegin);
293
3.17k
    if (nLen < nBaseCaseSize)
294
2.54k
    {
295
2.54k
        std::stable_sort(aBegin, aEnd, aComp);
296
2.54k
        return;
297
2.54k
    }
298
299
630
    using ValueType = typename std::iterator_traits<RandItr>::value_type;
300
630
    auto pOut = std::make_unique<ValueType[]>(nLen);
301
302
630
    const size_t nBins = lcl_tree_array_size(nThreadCount);
303
630
    assert(nBins >= 1);
304
630
    const size_t nOverSamplingFactor = std::max(1.0, std::sqrt(static_cast<double>(nLen) / 64));
305
630
    const size_t nSamples = nOverSamplingFactor * nBins;
306
630
    auto aSamples = std::make_unique<ValueType[]>(nSamples);
307
630
    ProfileZone aZoneSampleAnsSort("SampleAndSort");
308
    // Select samples and sort them
309
630
    Sampler<RandItr>::sample(aBegin, aEnd, aSamples.get(), nSamples, nBins);
310
630
    std::sort(aSamples.get(), aSamples.get() + nSamples, aComp);
311
630
    aZoneSampleAnsSort.stop();
312
313
630
    if (!aComp(aSamples[0], aSamples[nSamples - 1]))
314
420
    {
315
        // All samples are equal, fallback to standard sort.
316
420
        std::sort(aBegin, aEnd, aComp);
317
420
        return;
318
420
    }
319
320
210
    ProfileZone aZoneBinner("Binner");
321
    // Create and populate bins using pOut from input iterators.
322
210
    Binner<RandItr, Compare> aBinner(aSamples.get(), nSamples, nBins, bThreaded);
323
210
    aBinner.label(aBegin, aEnd, aComp);
324
210
    aBinner.bin(aBegin, aEnd, pOut.get());
325
210
    aZoneBinner.stop();
326
327
210
    ProfileZone aZoneSortBins("SortBins");
328
210
    ValueType* pOutRaw = pOut.get();
329
210
    if (bThreaded)
330
210
    {
331
210
        ParallelRunner aPRunner;
332
        // Sort the bins separately.
333
6.93k
        for (size_t nBinIdx = 0, nBinStart = 0; nBinIdx < nBins; ++nBinIdx)
334
6.72k
        {
335
6.72k
            size_t nBinEnd = aBinner.maBinEnds[nBinIdx];
336
6.72k
            aPRunner.enqueue([pOutRaw, nBinStart, nBinEnd, &aComp] {
337
6.64k
                std::sort(pOutRaw + nBinStart, pOutRaw + nBinEnd, aComp);
338
6.64k
            });
Unexecuted instantiation: dpcache.cxx:comphelper::(anonymous namespace)::s3sort<std::__1::__wrap_iter<(anonymous namespace)::Bucket*>, (anonymous namespace)::LessByValue>(std::__1::__wrap_iter<(anonymous namespace)::Bucket*>, std::__1::__wrap_iter<(anonymous namespace)::Bucket*>, (anonymous namespace)::LessByValue, bool)::{lambda()#1}::operator()() const
dpcache.cxx:comphelper::(anonymous namespace)::s3sort<std::__1::__wrap_iter<(anonymous namespace)::Bucket*>, (anonymous namespace)::LessByDataIndex>(std::__1::__wrap_iter<(anonymous namespace)::Bucket*>, std::__1::__wrap_iter<(anonymous namespace)::Bucket*>, (anonymous namespace)::LessByDataIndex, bool)::{lambda()#1}::operator()() const
Line
Count
Source
336
6.64k
            aPRunner.enqueue([pOutRaw, nBinStart, nBinEnd, &aComp] {
337
6.64k
                std::sort(pOutRaw + nBinStart, pOutRaw + nBinEnd, aComp);
338
6.64k
            });
Unexecuted instantiation: dpcache.cxx:comphelper::(anonymous namespace)::s3sort<std::__1::__wrap_iter<(anonymous namespace)::Bucket*>, (anonymous namespace)::LessByOrderIndex>(std::__1::__wrap_iter<(anonymous namespace)::Bucket*>, std::__1::__wrap_iter<(anonymous namespace)::Bucket*>, (anonymous namespace)::LessByOrderIndex, bool)::{lambda()#1}::operator()() const
339
340
6.72k
            nBinStart = nBinEnd;
341
6.72k
        }
342
343
210
        aPRunner.wait();
344
210
    }
345
0
    else
346
0
    {
347
0
        for (size_t nBinIdx = 0, nBinStart = 0; nBinIdx < nBins; ++nBinIdx)
348
0
        {
349
0
            auto nBinEnd = aBinner.maBinEnds[nBinIdx];
350
0
            std::sort(pOutRaw + nBinStart, pOutRaw + nBinEnd, aComp);
351
0
            nBinStart = nBinEnd;
352
0
        }
353
0
    }
354
355
210
    aZoneSortBins.stop();
356
357
    // Move the sorted array to the array specified by input iterators.
358
210
    std::move(pOutRaw, pOutRaw + nLen, aBegin);
359
210
}
dpcache.cxx:void comphelper::(anonymous namespace)::s3sort<std::__1::__wrap_iter<(anonymous namespace)::Bucket*>, (anonymous namespace)::LessByValue>(std::__1::__wrap_iter<(anonymous namespace)::Bucket*>, std::__1::__wrap_iter<(anonymous namespace)::Bucket*>, (anonymous namespace)::LessByValue, bool)
Line
Count
Source
288
1.05k
{
289
1.05k
    static size_t nThreadCount = nThreadCountGlobal;
290
291
1.05k
    constexpr size_t nBaseCaseSize = 1024;
292
1.05k
    const std::size_t nLen = static_cast<std::size_t>(aEnd - aBegin);
293
1.05k
    if (nLen < nBaseCaseSize)
294
849
    {
295
849
        std::stable_sort(aBegin, aEnd, aComp);
296
849
        return;
297
849
    }
298
299
210
    using ValueType = typename std::iterator_traits<RandItr>::value_type;
300
210
    auto pOut = std::make_unique<ValueType[]>(nLen);
301
302
210
    const size_t nBins = lcl_tree_array_size(nThreadCount);
303
210
    assert(nBins >= 1);
304
210
    const size_t nOverSamplingFactor = std::max(1.0, std::sqrt(static_cast<double>(nLen) / 64));
305
210
    const size_t nSamples = nOverSamplingFactor * nBins;
306
210
    auto aSamples = std::make_unique<ValueType[]>(nSamples);
307
210
    ProfileZone aZoneSampleAnsSort("SampleAndSort");
308
    // Select samples and sort them
309
210
    Sampler<RandItr>::sample(aBegin, aEnd, aSamples.get(), nSamples, nBins);
310
210
    std::sort(aSamples.get(), aSamples.get() + nSamples, aComp);
311
210
    aZoneSampleAnsSort.stop();
312
313
210
    if (!aComp(aSamples[0], aSamples[nSamples - 1]))
314
210
    {
315
        // All samples are equal, fallback to standard sort.
316
210
        std::sort(aBegin, aEnd, aComp);
317
210
        return;
318
210
    }
319
320
0
    ProfileZone aZoneBinner("Binner");
321
    // Create and populate bins using pOut from input iterators.
322
0
    Binner<RandItr, Compare> aBinner(aSamples.get(), nSamples, nBins, bThreaded);
323
0
    aBinner.label(aBegin, aEnd, aComp);
324
0
    aBinner.bin(aBegin, aEnd, pOut.get());
325
0
    aZoneBinner.stop();
326
327
0
    ProfileZone aZoneSortBins("SortBins");
328
0
    ValueType* pOutRaw = pOut.get();
329
0
    if (bThreaded)
330
0
    {
331
0
        ParallelRunner aPRunner;
332
        // Sort the bins separately.
333
0
        for (size_t nBinIdx = 0, nBinStart = 0; nBinIdx < nBins; ++nBinIdx)
334
0
        {
335
0
            size_t nBinEnd = aBinner.maBinEnds[nBinIdx];
336
0
            aPRunner.enqueue([pOutRaw, nBinStart, nBinEnd, &aComp] {
337
0
                std::sort(pOutRaw + nBinStart, pOutRaw + nBinEnd, aComp);
338
0
            });
339
340
0
            nBinStart = nBinEnd;
341
0
        }
342
343
0
        aPRunner.wait();
344
0
    }
345
0
    else
346
0
    {
347
0
        for (size_t nBinIdx = 0, nBinStart = 0; nBinIdx < nBins; ++nBinIdx)
348
0
        {
349
0
            auto nBinEnd = aBinner.maBinEnds[nBinIdx];
350
0
            std::sort(pOutRaw + nBinStart, pOutRaw + nBinEnd, aComp);
351
0
            nBinStart = nBinEnd;
352
0
        }
353
0
    }
354
355
0
    aZoneSortBins.stop();
356
357
    // Move the sorted array to the array specified by input iterators.
358
0
    std::move(pOutRaw, pOutRaw + nLen, aBegin);
359
0
}
dpcache.cxx:void comphelper::(anonymous namespace)::s3sort<std::__1::__wrap_iter<(anonymous namespace)::Bucket*>, (anonymous namespace)::LessByDataIndex>(std::__1::__wrap_iter<(anonymous namespace)::Bucket*>, std::__1::__wrap_iter<(anonymous namespace)::Bucket*>, (anonymous namespace)::LessByDataIndex, bool)
Line
Count
Source
288
1.05k
{
289
1.05k
    static size_t nThreadCount = nThreadCountGlobal;
290
291
1.05k
    constexpr size_t nBaseCaseSize = 1024;
292
1.05k
    const std::size_t nLen = static_cast<std::size_t>(aEnd - aBegin);
293
1.05k
    if (nLen < nBaseCaseSize)
294
849
    {
295
849
        std::stable_sort(aBegin, aEnd, aComp);
296
849
        return;
297
849
    }
298
299
210
    using ValueType = typename std::iterator_traits<RandItr>::value_type;
300
210
    auto pOut = std::make_unique<ValueType[]>(nLen);
301
302
210
    const size_t nBins = lcl_tree_array_size(nThreadCount);
303
210
    assert(nBins >= 1);
304
210
    const size_t nOverSamplingFactor = std::max(1.0, std::sqrt(static_cast<double>(nLen) / 64));
305
210
    const size_t nSamples = nOverSamplingFactor * nBins;
306
210
    auto aSamples = std::make_unique<ValueType[]>(nSamples);
307
210
    ProfileZone aZoneSampleAnsSort("SampleAndSort");
308
    // Select samples and sort them
309
210
    Sampler<RandItr>::sample(aBegin, aEnd, aSamples.get(), nSamples, nBins);
310
210
    std::sort(aSamples.get(), aSamples.get() + nSamples, aComp);
311
210
    aZoneSampleAnsSort.stop();
312
313
210
    if (!aComp(aSamples[0], aSamples[nSamples - 1]))
314
0
    {
315
        // All samples are equal, fallback to standard sort.
316
0
        std::sort(aBegin, aEnd, aComp);
317
0
        return;
318
0
    }
319
320
210
    ProfileZone aZoneBinner("Binner");
321
    // Create and populate bins using pOut from input iterators.
322
210
    Binner<RandItr, Compare> aBinner(aSamples.get(), nSamples, nBins, bThreaded);
323
210
    aBinner.label(aBegin, aEnd, aComp);
324
210
    aBinner.bin(aBegin, aEnd, pOut.get());
325
210
    aZoneBinner.stop();
326
327
210
    ProfileZone aZoneSortBins("SortBins");
328
210
    ValueType* pOutRaw = pOut.get();
329
210
    if (bThreaded)
330
210
    {
331
210
        ParallelRunner aPRunner;
332
        // Sort the bins separately.
333
6.93k
        for (size_t nBinIdx = 0, nBinStart = 0; nBinIdx < nBins; ++nBinIdx)
334
6.72k
        {
335
6.72k
            size_t nBinEnd = aBinner.maBinEnds[nBinIdx];
336
6.72k
            aPRunner.enqueue([pOutRaw, nBinStart, nBinEnd, &aComp] {
337
6.72k
                std::sort(pOutRaw + nBinStart, pOutRaw + nBinEnd, aComp);
338
6.72k
            });
339
340
6.72k
            nBinStart = nBinEnd;
341
6.72k
        }
342
343
210
        aPRunner.wait();
344
210
    }
345
0
    else
346
0
    {
347
0
        for (size_t nBinIdx = 0, nBinStart = 0; nBinIdx < nBins; ++nBinIdx)
348
0
        {
349
0
            auto nBinEnd = aBinner.maBinEnds[nBinIdx];
350
0
            std::sort(pOutRaw + nBinStart, pOutRaw + nBinEnd, aComp);
351
0
            nBinStart = nBinEnd;
352
0
        }
353
0
    }
354
355
210
    aZoneSortBins.stop();
356
357
    // Move the sorted array to the array specified by input iterators.
358
210
    std::move(pOutRaw, pOutRaw + nLen, aBegin);
359
210
}
dpcache.cxx:void comphelper::(anonymous namespace)::s3sort<std::__1::__wrap_iter<(anonymous namespace)::Bucket*>, (anonymous namespace)::LessByOrderIndex>(std::__1::__wrap_iter<(anonymous namespace)::Bucket*>, std::__1::__wrap_iter<(anonymous namespace)::Bucket*>, (anonymous namespace)::LessByOrderIndex, bool)
Line
Count
Source
288
1.05k
{
289
1.05k
    static size_t nThreadCount = nThreadCountGlobal;
290
291
1.05k
    constexpr size_t nBaseCaseSize = 1024;
292
1.05k
    const std::size_t nLen = static_cast<std::size_t>(aEnd - aBegin);
293
1.05k
    if (nLen < nBaseCaseSize)
294
849
    {
295
849
        std::stable_sort(aBegin, aEnd, aComp);
296
849
        return;
297
849
    }
298
299
210
    using ValueType = typename std::iterator_traits<RandItr>::value_type;
300
210
    auto pOut = std::make_unique<ValueType[]>(nLen);
301
302
210
    const size_t nBins = lcl_tree_array_size(nThreadCount);
303
210
    assert(nBins >= 1);
304
210
    const size_t nOverSamplingFactor = std::max(1.0, std::sqrt(static_cast<double>(nLen) / 64));
305
210
    const size_t nSamples = nOverSamplingFactor * nBins;
306
210
    auto aSamples = std::make_unique<ValueType[]>(nSamples);
307
210
    ProfileZone aZoneSampleAnsSort("SampleAndSort");
308
    // Select samples and sort them
309
210
    Sampler<RandItr>::sample(aBegin, aEnd, aSamples.get(), nSamples, nBins);
310
210
    std::sort(aSamples.get(), aSamples.get() + nSamples, aComp);
311
210
    aZoneSampleAnsSort.stop();
312
313
210
    if (!aComp(aSamples[0], aSamples[nSamples - 1]))
314
210
    {
315
        // All samples are equal, fallback to standard sort.
316
210
        std::sort(aBegin, aEnd, aComp);
317
210
        return;
318
210
    }
319
320
0
    ProfileZone aZoneBinner("Binner");
321
    // Create and populate bins using pOut from input iterators.
322
0
    Binner<RandItr, Compare> aBinner(aSamples.get(), nSamples, nBins, bThreaded);
323
0
    aBinner.label(aBegin, aEnd, aComp);
324
0
    aBinner.bin(aBegin, aEnd, pOut.get());
325
0
    aZoneBinner.stop();
326
327
0
    ProfileZone aZoneSortBins("SortBins");
328
0
    ValueType* pOutRaw = pOut.get();
329
0
    if (bThreaded)
330
0
    {
331
0
        ParallelRunner aPRunner;
332
        // Sort the bins separately.
333
0
        for (size_t nBinIdx = 0, nBinStart = 0; nBinIdx < nBins; ++nBinIdx)
334
0
        {
335
0
            size_t nBinEnd = aBinner.maBinEnds[nBinIdx];
336
0
            aPRunner.enqueue([pOutRaw, nBinStart, nBinEnd, &aComp] {
337
0
                std::sort(pOutRaw + nBinStart, pOutRaw + nBinEnd, aComp);
338
0
            });
339
340
0
            nBinStart = nBinEnd;
341
0
        }
342
343
0
        aPRunner.wait();
344
0
    }
345
0
    else
346
0
    {
347
0
        for (size_t nBinIdx = 0, nBinStart = 0; nBinIdx < nBins; ++nBinIdx)
348
0
        {
349
0
            auto nBinEnd = aBinner.maBinEnds[nBinIdx];
350
0
            std::sort(pOutRaw + nBinStart, pOutRaw + nBinEnd, aComp);
351
0
            nBinStart = nBinEnd;
352
0
        }
353
0
    }
354
355
0
    aZoneSortBins.stop();
356
357
    // Move the sorted array to the array specified by input iterators.
358
0
    std::move(pOutRaw, pOutRaw + nLen, aBegin);
359
0
}
360
361
} // anonymous namespace
362
363
template <class RandItr, class Compare = std::less<>>
364
void parallelSort(const RandItr aBegin, const RandItr aEnd, Compare aComp = Compare())
365
3.17k
{
366
    assert(aBegin <= aEnd);
367
3.17k
    s3sort(aBegin, aEnd, aComp);
368
3.17k
}
dpcache.cxx:void comphelper::parallelSort<std::__1::__wrap_iter<(anonymous namespace)::Bucket*>, (anonymous namespace)::LessByValue>(std::__1::__wrap_iter<(anonymous namespace)::Bucket*>, std::__1::__wrap_iter<(anonymous namespace)::Bucket*>, (anonymous namespace)::LessByValue)
Line
Count
Source
365
1.05k
{
366
    assert(aBegin <= aEnd);
367
1.05k
    s3sort(aBegin, aEnd, aComp);
368
1.05k
}
dpcache.cxx:void comphelper::parallelSort<std::__1::__wrap_iter<(anonymous namespace)::Bucket*>, (anonymous namespace)::LessByDataIndex>(std::__1::__wrap_iter<(anonymous namespace)::Bucket*>, std::__1::__wrap_iter<(anonymous namespace)::Bucket*>, (anonymous namespace)::LessByDataIndex)
Line
Count
Source
365
1.05k
{
366
    assert(aBegin <= aEnd);
367
1.05k
    s3sort(aBegin, aEnd, aComp);
368
1.05k
}
dpcache.cxx:void comphelper::parallelSort<std::__1::__wrap_iter<(anonymous namespace)::Bucket*>, (anonymous namespace)::LessByOrderIndex>(std::__1::__wrap_iter<(anonymous namespace)::Bucket*>, std::__1::__wrap_iter<(anonymous namespace)::Bucket*>, (anonymous namespace)::LessByOrderIndex)
Line
Count
Source
365
1.05k
{
366
    assert(aBegin <= aEnd);
367
1.05k
    s3sort(aBegin, aEnd, aComp);
368
1.05k
}
369
370
} // namespace comphelper
371
372
#endif // INCLUDED_COMPHELPER_PARALLELSORT_HXX
373
374
/* vim:set shiftwidth=4 softtabstop=4 expandtab: */