Coverage Report

Created: 2026-05-23 06:04

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/assimp/code/PostProcessing/ImproveCacheLocality.cpp
Line
Count
Source
1
/*
2
---------------------------------------------------------------------------
3
Open Asset Import Library (assimp)
4
---------------------------------------------------------------------------
5
6
Copyright (c) 2006-2026, assimp team
7
8
All rights reserved.
9
10
Redistribution and use of this software in source and binary forms,
11
with or without modification, are permitted provided that the following
12
conditions are met:
13
14
* Redistributions of source code must retain the above
15
  copyright notice, this list of conditions and the
16
  following disclaimer.
17
18
* Redistributions in binary form must reproduce the above
19
  copyright notice, this list of conditions and the
20
  following disclaimer in the documentation and/or other
21
  materials provided with the distribution.
22
23
* Neither the name of the assimp team, nor the names of its
24
  contributors may be used to endorse or promote products
25
  derived from this software without specific prior
26
  written permission of the assimp team.
27
28
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
29
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
30
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
31
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
32
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
33
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
34
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
35
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
36
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
37
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
38
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
39
---------------------------------------------------------------------------
40
*/
41
42
/** @file Implementation of the post processing step to improve the cache locality of a mesh.
43
 * <br>
44
 * The algorithm is roughly basing on this paper:
45
 * http://www.cs.princeton.edu/gfx/pubs/Sander_2007_%3ETR/tipsy.pdf
46
 *   .. although overdraw reduction isn't implemented yet ...
47
 */
48
49
// internal headers
50
#include "PostProcessing/ImproveCacheLocality.h"
51
#include "Common/VertexTriangleAdjacency.h"
52
53
#include <assimp/StringUtils.h>
54
#include <assimp/postprocess.h>
55
#include <assimp/scene.h>
56
#include <assimp/DefaultLogger.hpp>
57
#include <stdio.h>
58
#include <stack>
59
60
namespace Assimp {
61
namespace {
62
    ai_real calculateInputACMR(aiMesh *pMesh, const aiFace *const pcEnd,
63
0
            unsigned int configCacheDepth, unsigned int meshNum) {
64
0
        ai_real fACMR = 0.0f;
65
0
        unsigned int *piFIFOStack = new unsigned int[configCacheDepth];
66
0
        memset(piFIFOStack, 0xff, configCacheDepth * sizeof(unsigned int));
67
0
        unsigned int *piCur = piFIFOStack;
68
0
        const unsigned int *const piCurEnd = piFIFOStack + configCacheDepth;
69
70
        // count the number of cache misses
71
0
        unsigned int iCacheMisses = 0;
72
0
        for (const aiFace *pcFace = pMesh->mFaces; pcFace != pcEnd; ++pcFace) {
73
0
            for (unsigned int qq = 0; qq < 3; ++qq) {
74
0
                bool bInCache = false;
75
0
                for (unsigned int *pp = piFIFOStack; pp < piCurEnd; ++pp) {
76
0
                    if (*pp == pcFace->mIndices[qq]) {
77
                        // the vertex is in cache
78
0
                        bInCache = true;
79
0
                        break;
80
0
                    }
81
0
                }
82
0
                if (!bInCache) {
83
0
                    ++iCacheMisses;
84
0
                    if (piCurEnd == piCur) {
85
0
                        piCur = piFIFOStack;
86
0
                    }
87
0
                    *piCur++ = pcFace->mIndices[qq];
88
0
                }
89
0
            }
90
0
        }
91
0
        delete[] piFIFOStack;
92
0
        fACMR = (ai_real)iCacheMisses / pMesh->mNumFaces;
93
0
        if (3.0 == fACMR) {
94
0
            char szBuff[128]; // should be sufficiently large in every case
95
96
            // the JoinIdenticalVertices process has not been executed on this
97
            // mesh, otherwise this value would normally be at least minimally
98
            // smaller than 3.0 ...
99
0
            ai_snprintf(szBuff, 128, "Mesh %u: Not suitable for vcache optimization", meshNum);
100
0
            ASSIMP_LOG_WARN(szBuff);
101
0
            return static_cast<ai_real>(0.f);
102
0
        }
103
0
        return fACMR;
104
0
    }
105
}
106
107
// ------------------------------------------------------------------------------------------------
108
// Constructor to be privately used by Importer
109
297
ImproveCacheLocalityProcess::ImproveCacheLocalityProcess() : mConfigCacheDepth(PP_ICL_PTCACHE_SIZE) {
110
    // empty
111
297
}
112
113
// ------------------------------------------------------------------------------------------------
114
// Returns whether the processing step is present in the given flag field.
115
218
bool ImproveCacheLocalityProcess::IsActive(unsigned int pFlags) const {
116
218
    return (pFlags & aiProcess_ImproveCacheLocality) != 0;
117
218
}
118
119
// ------------------------------------------------------------------------------------------------
120
// Setup configuration
121
218
void ImproveCacheLocalityProcess::SetupProperties(const Importer *pImp) {
122
    // AI_CONFIG_PP_ICL_PTCACHE_SIZE controls the target cache size for the optimizer
123
218
    mConfigCacheDepth = pImp->GetPropertyInteger(AI_CONFIG_PP_ICL_PTCACHE_SIZE, PP_ICL_PTCACHE_SIZE);
124
218
}
125
126
// ------------------------------------------------------------------------------------------------
127
// Executes the post processing step on the given imported data.
128
218
void ImproveCacheLocalityProcess::Execute(aiScene *pScene) {
129
218
    if (!pScene->mNumMeshes) {
130
127
        ASSIMP_LOG_DEBUG("ImproveCacheLocalityProcess skipped; there are no meshes");
131
127
        return;
132
127
    }
133
134
218
    ASSIMP_LOG_DEBUG("ImproveCacheLocalityProcess begin");
135
136
91
    float out = 0.f;
137
91
    unsigned int numf = 0, numm = 0;
138
188
    for (unsigned int a = 0; a < pScene->mNumMeshes; ++a) {
139
97
        const float res = ProcessMesh(pScene->mMeshes[a], a);
140
97
        if (res) {
141
0
            numf += pScene->mMeshes[a]->mNumFaces;
142
0
            out += res;
143
0
            ++numm;
144
0
        }
145
97
    }
146
91
    if (!DefaultLogger::isNullLogger()) {
147
0
        if (numf > 0) {
148
0
            ASSIMP_LOG_INFO("Cache relevant are ", numm, " meshes (", numf, " faces). Average output ACMR is ", out / numf);
149
0
        }
150
0
        ASSIMP_LOG_DEBUG("ImproveCacheLocalityProcess finished. ");
151
0
    }
152
91
}
153
154
// ------------------------------------------------------------------------------------------------
155
// Improves the cache coherency of a specific mesh
156
97
ai_real ImproveCacheLocalityProcess::ProcessMesh(aiMesh *pMesh, unsigned int meshNum) {
157
    // TODO: rewrite this to use std::vector or boost::shared_array
158
97
    ai_assert(nullptr != pMesh);
159
160
    // Check whether the input data is valid
161
    // - there must be vertices and faces
162
    // - all faces must be triangulated or we can't operate on them
163
97
    if (!pMesh->HasFaces() || !pMesh->HasPositions())
164
0
        return static_cast<ai_real>(0.f);
165
166
97
    if (pMesh->mPrimitiveTypes != aiPrimitiveType_TRIANGLE) {
167
4
        ASSIMP_LOG_ERROR("This algorithm works on triangle meshes only");
168
4
        return static_cast<ai_real>(0.f);
169
4
    }
170
171
93
    if (pMesh->mNumVertices <= mConfigCacheDepth) {
172
4
        return static_cast<ai_real>(0.f);
173
4
    }
174
175
89
    ai_real fACMR = 3.f;
176
89
    const aiFace *const pcEnd = pMesh->mFaces + pMesh->mNumFaces;
177
178
    // Input ACMR is for logging purposes only
179
89
    if (!DefaultLogger::isNullLogger()) {
180
0
        fACMR = calculateInputACMR(pMesh, pcEnd, mConfigCacheDepth, meshNum);
181
0
    }
182
183
    // first we need to build a vertex-triangle adjacency list
184
89
    VertexTriangleAdjacency adj(pMesh->mFaces, pMesh->mNumFaces, pMesh->mNumVertices, true);
185
186
    // build a list to store per-vertex caching time stamps
187
89
    std::vector<unsigned int> piCachingStamps;
188
89
    piCachingStamps.resize(pMesh->mNumVertices);
189
89
    memset(&piCachingStamps[0], 0x0, pMesh->mNumVertices * sizeof(unsigned int));
190
191
    // allocate an empty output index buffer. We store the output indices in one large array.
192
    // Since the number of triangles won't change the input faces can be reused. This is how
193
    // we save thousands of redundant mini allocations for aiFace::mIndices
194
89
    const unsigned int iIdxCnt = pMesh->mNumFaces * 3;
195
89
    std::vector<unsigned int> piIBOutput;
196
89
    piIBOutput.resize(iIdxCnt);
197
89
    std::vector<unsigned int>::iterator piCSIter = piIBOutput.begin();
198
199
    // allocate the flag array to hold the information
200
    // whether a face has already been emitted or not
201
89
    std::vector<bool> abEmitted(pMesh->mNumFaces, false);
202
203
    // dead-end vertex index stack
204
89
    std::stack<unsigned int, std::vector<unsigned int>> sDeadEndVStack;
205
206
    // create a copy of the piNumTriPtr buffer
207
89
    unsigned int *const piNumTriPtr = adj.mLiveTriangles;
208
89
    const std::vector<unsigned int> piNumTriPtrNoModify(piNumTriPtr, piNumTriPtr + pMesh->mNumVertices);
209
210
    // get the largest number of referenced triangles and allocate the "candidate buffer"
211
89
    unsigned int iMaxRefTris = 0;
212
89
    {
213
89
        const unsigned int *piCur = adj.mLiveTriangles;
214
89
        const unsigned int *const piCurEnd = adj.mLiveTriangles + pMesh->mNumVertices;
215
132k
        for (; piCur != piCurEnd; ++piCur) {
216
132k
            iMaxRefTris = std::max(iMaxRefTris, *piCur);
217
132k
        }
218
89
    }
219
89
    ai_assert(iMaxRefTris > 0);
220
89
    std::vector<unsigned int> piCandidates;
221
89
    piCandidates.resize(iMaxRefTris * 3);
222
89
    unsigned int iCacheMisses = 0;
223
224
    // ...................................................................................
225
    /** PSEUDOCODE for the algorithm
226
227
        A = Build-Adjacency(I) Vertex-triangle adjacency
228
        L = Get-Triangle-Counts(A) Per-vertex live triangle counts
229
        C = Zero(Vertex-Count(I)) Per-vertex caching time stamps
230
        D = Empty-Stack() Dead-end vertex stack
231
        E = False(Triangle-Count(I)) Per triangle emitted flag
232
        O = Empty-Index-Buffer() Empty output buffer
233
        f = 0 Arbitrary starting vertex
234
        s = k+1, i = 1 Time stamp and cursor
235
        while f >= 0 For all valid fanning vertices
236
            N = Empty-Set() 1-ring of next candidates
237
            for each Triangle t in Neighbors(A, f)
238
                if !Emitted(E,t)
239
                    for each Vertex v in t
240
                        Append(O,v) Output vertex
241
                        Push(D,v) Add to dead-end stack
242
                        Insert(N,v) Register as candidate
243
                        L[v] = L[v]-1 Decrease live triangle count
244
                        if s-C[v] > k If not in cache
245
                            C[v] = s Set time stamp
246
                            s = s+1 Increment time stamp
247
                    E[t] = true Flag triangle as emitted
248
            Select next fanning vertex
249
            f = Get-Next-Vertex(I,i,k,N,C,s,L,D)
250
        return O
251
        */
252
    // ...................................................................................
253
254
89
    int ivdx = 0;
255
89
    int ics = 1;
256
89
    int iStampCnt = mConfigCacheDepth + 1;
257
81.9k
    while (ivdx >= 0) {
258
259
81.8k
        unsigned int icnt = piNumTriPtrNoModify[ivdx];
260
81.8k
        unsigned int *piList = adj.GetAdjacentTriangles(ivdx);
261
81.8k
        std::vector<unsigned int>::iterator piCurCandidate = piCandidates.begin();
262
263
        // get all triangles in the neighborhood
264
451k
        for (unsigned int tri = 0; tri < icnt; ++tri) {
265
266
            // if they have not yet been emitted, add them to the output IB
267
369k
            const unsigned int fidx = *piList++;
268
369k
            if (!abEmitted[fidx]) {
269
270
                // so iterate through all vertices of the current triangle
271
175k
                const aiFace *pcFace = &pMesh->mFaces[fidx];
272
175k
                const unsigned nind = pcFace->mNumIndices;
273
702k
                for (unsigned ind = 0; ind < nind; ind++) {
274
527k
                    unsigned dp = pcFace->mIndices[ind];
275
276
                    // the current vertex won't have any free triangles after this step
277
527k
                    if (ivdx != (int)dp) {
278
                        // append the vertex to the dead-end stack
279
351k
                        sDeadEndVStack.push(dp);
280
281
                        // register as candidate for the next step
282
351k
                        *piCurCandidate++ = dp;
283
284
                        // decrease the per-vertex triangle counts
285
351k
                        piNumTriPtr[dp]--;
286
351k
                    }
287
288
                    // append the vertex to the output index buffer
289
527k
                    *piCSIter++ = dp;
290
291
                    // if the vertex is not yet in cache, set its cache count
292
527k
                    if (iStampCnt - piCachingStamps[dp] > mConfigCacheDepth) {
293
163k
                        piCachingStamps[dp] = iStampCnt++;
294
163k
                        ++iCacheMisses;
295
163k
                    }
296
527k
                }
297
                // flag triangle as emitted
298
175k
                abEmitted[fidx] = true;
299
175k
            }
300
369k
        }
301
302
        // the vertex has now no living adjacent triangles anymore
303
81.8k
        piNumTriPtr[ivdx] = 0;
304
305
        // get next fanning vertex
306
81.8k
        ivdx = -1;
307
81.8k
        int max_priority = -1;
308
433k
        for (std::vector<unsigned int>::iterator piCur = piCandidates.begin(); piCur != piCurCandidate; ++piCur) {
309
351k
            const unsigned int dp = *piCur;
310
311
            // must have live triangles
312
351k
            if (piNumTriPtr[dp] > 0) {
313
295k
                int priority = 0;
314
315
                // will the vertex be in cache, even after fanning occurs?
316
295k
                unsigned int tmp;
317
295k
                if ((tmp = iStampCnt - piCachingStamps[dp]) + 2 * piNumTriPtr[dp] <= mConfigCacheDepth) {
318
283k
                    priority = tmp;
319
283k
                }
320
321
                // keep best candidate
322
295k
                if (priority > max_priority) {
323
105k
                    max_priority = priority;
324
105k
                    ivdx = dp;
325
105k
                }
326
295k
            }
327
351k
        }
328
        // did we reach a dead end?
329
81.8k
        if (-1 == ivdx) {
330
            // need to get a non-local vertex for which we have a good chance that it is still
331
            // in the cache ...
332
355k
            while (!sDeadEndVStack.empty()) {
333
351k
                unsigned int iCachedIdx = sDeadEndVStack.top();
334
351k
                sDeadEndVStack.pop();
335
351k
                if (piNumTriPtr[iCachedIdx] > 0) {
336
5.10k
                    ivdx = iCachedIdx;
337
5.10k
                    break;
338
5.10k
                }
339
351k
            }
340
341
9.13k
            if (-1 == ivdx) {
342
                // well, there isn't such a vertex. Simply get the next vertex in input order and
343
                // hope it is not too bad ...
344
132k
                while (ics < (int)pMesh->mNumVertices) {
345
132k
                    ++ics;
346
132k
                    if (piNumTriPtr[ics] > 0) {
347
3.93k
                        ivdx = ics;
348
3.93k
                        break;
349
3.93k
                    }
350
132k
                }
351
4.02k
            }
352
9.13k
        }
353
81.8k
    }
354
89
    ai_real fACMR2 = 0.0f;
355
89
    if (!DefaultLogger::isNullLogger()) {
356
0
        fACMR2 = static_cast<ai_real>(iCacheMisses / pMesh->mNumFaces);
357
0
        const ai_real averageACMR = ((fACMR - fACMR2) / fACMR) * 100.f;
358
        // very intense verbose logging ... prepare for much text if there are many meshes
359
0
        if (DefaultLogger::get()->getLogSeverity() == Logger::VERBOSE) {
360
0
            ASSIMP_LOG_VERBOSE_DEBUG("Mesh ", meshNum, "| ACMR in: ", fACMR, " out: ", fACMR2, " | average ACMR ", averageACMR);
361
0
        }
362
0
        fACMR2 *= pMesh->mNumFaces;
363
0
    }
364
365
    // sort the output index buffer back to the input array
366
89
    piCSIter = piIBOutput.begin();
367
175k
    for (aiFace *pcFace = pMesh->mFaces; pcFace != pcEnd; ++pcFace) {
368
175k
        unsigned nind = pcFace->mNumIndices;
369
175k
        unsigned *ind = pcFace->mIndices;
370
175k
        if (nind > 0)
371
175k
            ind[0] = *piCSIter++;
372
175k
        if (nind > 1)
373
175k
            ind[1] = *piCSIter++;
374
175k
        if (nind > 2)
375
175k
            ind[2] = *piCSIter++;
376
175k
    }
377
378
89
    return fACMR2;
379
93
}
380
381
} // namespace Assimp