/src/assimp/code/PostProcessing/ImproveCacheLocality.cpp
Line | Count | Source |
1 | | /* |
2 | | --------------------------------------------------------------------------- |
3 | | Open Asset Import Library (assimp) |
4 | | --------------------------------------------------------------------------- |
5 | | |
6 | | Copyright (c) 2006-2026, assimp team |
7 | | |
8 | | All rights reserved. |
9 | | |
10 | | Redistribution and use of this software in source and binary forms, |
11 | | with or without modification, are permitted provided that the following |
12 | | conditions are met: |
13 | | |
14 | | * Redistributions of source code must retain the above |
15 | | copyright notice, this list of conditions and the |
16 | | following disclaimer. |
17 | | |
18 | | * Redistributions in binary form must reproduce the above |
19 | | copyright notice, this list of conditions and the |
20 | | following disclaimer in the documentation and/or other |
21 | | materials provided with the distribution. |
22 | | |
23 | | * Neither the name of the assimp team, nor the names of its |
24 | | contributors may be used to endorse or promote products |
25 | | derived from this software without specific prior |
26 | | written permission of the assimp team. |
27 | | |
28 | | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
29 | | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
30 | | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR |
31 | | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT |
32 | | OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
33 | | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT |
34 | | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, |
35 | | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY |
36 | | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
37 | | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
38 | | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
39 | | --------------------------------------------------------------------------- |
40 | | */ |
41 | | |
42 | | /** @file Implementation of the post processing step to improve the cache locality of a mesh. |
43 | | * <br> |
44 | | * The algorithm is roughly basing on this paper: |
45 | | * http://www.cs.princeton.edu/gfx/pubs/Sander_2007_%3ETR/tipsy.pdf |
46 | | * .. although overdraw reduction isn't implemented yet ... |
47 | | */ |
48 | | |
49 | | // internal headers |
50 | | #include "PostProcessing/ImproveCacheLocality.h" |
51 | | #include "Common/VertexTriangleAdjacency.h" |
52 | | |
53 | | #include <assimp/StringUtils.h> |
54 | | #include <assimp/postprocess.h> |
55 | | #include <assimp/scene.h> |
56 | | #include <assimp/DefaultLogger.hpp> |
57 | | #include <stdio.h> |
58 | | #include <stack> |
59 | | |
60 | | namespace Assimp { |
61 | | namespace { |
62 | | ai_real calculateInputACMR(aiMesh *pMesh, const aiFace *const pcEnd, |
63 | 0 | unsigned int configCacheDepth, unsigned int meshNum) { |
64 | 0 | ai_real fACMR = 0.0f; |
65 | 0 | unsigned int *piFIFOStack = new unsigned int[configCacheDepth]; |
66 | 0 | memset(piFIFOStack, 0xff, configCacheDepth * sizeof(unsigned int)); |
67 | 0 | unsigned int *piCur = piFIFOStack; |
68 | 0 | const unsigned int *const piCurEnd = piFIFOStack + configCacheDepth; |
69 | | |
70 | | // count the number of cache misses |
71 | 0 | unsigned int iCacheMisses = 0; |
72 | 0 | for (const aiFace *pcFace = pMesh->mFaces; pcFace != pcEnd; ++pcFace) { |
73 | 0 | for (unsigned int qq = 0; qq < 3; ++qq) { |
74 | 0 | bool bInCache = false; |
75 | 0 | for (unsigned int *pp = piFIFOStack; pp < piCurEnd; ++pp) { |
76 | 0 | if (*pp == pcFace->mIndices[qq]) { |
77 | | // the vertex is in cache |
78 | 0 | bInCache = true; |
79 | 0 | break; |
80 | 0 | } |
81 | 0 | } |
82 | 0 | if (!bInCache) { |
83 | 0 | ++iCacheMisses; |
84 | 0 | if (piCurEnd == piCur) { |
85 | 0 | piCur = piFIFOStack; |
86 | 0 | } |
87 | 0 | *piCur++ = pcFace->mIndices[qq]; |
88 | 0 | } |
89 | 0 | } |
90 | 0 | } |
91 | 0 | delete[] piFIFOStack; |
92 | 0 | fACMR = (ai_real)iCacheMisses / pMesh->mNumFaces; |
93 | 0 | if (3.0 == fACMR) { |
94 | 0 | char szBuff[128]; // should be sufficiently large in every case |
95 | | |
96 | | // the JoinIdenticalVertices process has not been executed on this |
97 | | // mesh, otherwise this value would normally be at least minimally |
98 | | // smaller than 3.0 ... |
99 | 0 | ai_snprintf(szBuff, 128, "Mesh %u: Not suitable for vcache optimization", meshNum); |
100 | 0 | ASSIMP_LOG_WARN(szBuff); |
101 | 0 | return static_cast<ai_real>(0.f); |
102 | 0 | } |
103 | 0 | return fACMR; |
104 | 0 | } |
105 | | } |
106 | | |
107 | | // ------------------------------------------------------------------------------------------------ |
108 | | // Constructor to be privately used by Importer |
109 | 297 | ImproveCacheLocalityProcess::ImproveCacheLocalityProcess() : mConfigCacheDepth(PP_ICL_PTCACHE_SIZE) { |
110 | | // empty |
111 | 297 | } |
112 | | |
113 | | // ------------------------------------------------------------------------------------------------ |
114 | | // Returns whether the processing step is present in the given flag field. |
115 | 218 | bool ImproveCacheLocalityProcess::IsActive(unsigned int pFlags) const { |
116 | 218 | return (pFlags & aiProcess_ImproveCacheLocality) != 0; |
117 | 218 | } |
118 | | |
119 | | // ------------------------------------------------------------------------------------------------ |
120 | | // Setup configuration |
121 | 218 | void ImproveCacheLocalityProcess::SetupProperties(const Importer *pImp) { |
122 | | // AI_CONFIG_PP_ICL_PTCACHE_SIZE controls the target cache size for the optimizer |
123 | 218 | mConfigCacheDepth = pImp->GetPropertyInteger(AI_CONFIG_PP_ICL_PTCACHE_SIZE, PP_ICL_PTCACHE_SIZE); |
124 | 218 | } |
125 | | |
126 | | // ------------------------------------------------------------------------------------------------ |
127 | | // Executes the post processing step on the given imported data. |
128 | 218 | void ImproveCacheLocalityProcess::Execute(aiScene *pScene) { |
129 | 218 | if (!pScene->mNumMeshes) { |
130 | 127 | ASSIMP_LOG_DEBUG("ImproveCacheLocalityProcess skipped; there are no meshes"); |
131 | 127 | return; |
132 | 127 | } |
133 | | |
134 | 218 | ASSIMP_LOG_DEBUG("ImproveCacheLocalityProcess begin"); |
135 | | |
136 | 91 | float out = 0.f; |
137 | 91 | unsigned int numf = 0, numm = 0; |
138 | 188 | for (unsigned int a = 0; a < pScene->mNumMeshes; ++a) { |
139 | 97 | const float res = ProcessMesh(pScene->mMeshes[a], a); |
140 | 97 | if (res) { |
141 | 0 | numf += pScene->mMeshes[a]->mNumFaces; |
142 | 0 | out += res; |
143 | 0 | ++numm; |
144 | 0 | } |
145 | 97 | } |
146 | 91 | if (!DefaultLogger::isNullLogger()) { |
147 | 0 | if (numf > 0) { |
148 | 0 | ASSIMP_LOG_INFO("Cache relevant are ", numm, " meshes (", numf, " faces). Average output ACMR is ", out / numf); |
149 | 0 | } |
150 | 0 | ASSIMP_LOG_DEBUG("ImproveCacheLocalityProcess finished. "); |
151 | 0 | } |
152 | 91 | } |
153 | | |
154 | | // ------------------------------------------------------------------------------------------------ |
155 | | // Improves the cache coherency of a specific mesh |
156 | 97 | ai_real ImproveCacheLocalityProcess::ProcessMesh(aiMesh *pMesh, unsigned int meshNum) { |
157 | | // TODO: rewrite this to use std::vector or boost::shared_array |
158 | 97 | ai_assert(nullptr != pMesh); |
159 | | |
160 | | // Check whether the input data is valid |
161 | | // - there must be vertices and faces |
162 | | // - all faces must be triangulated or we can't operate on them |
163 | 97 | if (!pMesh->HasFaces() || !pMesh->HasPositions()) |
164 | 0 | return static_cast<ai_real>(0.f); |
165 | | |
166 | 97 | if (pMesh->mPrimitiveTypes != aiPrimitiveType_TRIANGLE) { |
167 | 4 | ASSIMP_LOG_ERROR("This algorithm works on triangle meshes only"); |
168 | 4 | return static_cast<ai_real>(0.f); |
169 | 4 | } |
170 | | |
171 | 93 | if (pMesh->mNumVertices <= mConfigCacheDepth) { |
172 | 4 | return static_cast<ai_real>(0.f); |
173 | 4 | } |
174 | | |
175 | 89 | ai_real fACMR = 3.f; |
176 | 89 | const aiFace *const pcEnd = pMesh->mFaces + pMesh->mNumFaces; |
177 | | |
178 | | // Input ACMR is for logging purposes only |
179 | 89 | if (!DefaultLogger::isNullLogger()) { |
180 | 0 | fACMR = calculateInputACMR(pMesh, pcEnd, mConfigCacheDepth, meshNum); |
181 | 0 | } |
182 | | |
183 | | // first we need to build a vertex-triangle adjacency list |
184 | 89 | VertexTriangleAdjacency adj(pMesh->mFaces, pMesh->mNumFaces, pMesh->mNumVertices, true); |
185 | | |
186 | | // build a list to store per-vertex caching time stamps |
187 | 89 | std::vector<unsigned int> piCachingStamps; |
188 | 89 | piCachingStamps.resize(pMesh->mNumVertices); |
189 | 89 | memset(&piCachingStamps[0], 0x0, pMesh->mNumVertices * sizeof(unsigned int)); |
190 | | |
191 | | // allocate an empty output index buffer. We store the output indices in one large array. |
192 | | // Since the number of triangles won't change the input faces can be reused. This is how |
193 | | // we save thousands of redundant mini allocations for aiFace::mIndices |
194 | 89 | const unsigned int iIdxCnt = pMesh->mNumFaces * 3; |
195 | 89 | std::vector<unsigned int> piIBOutput; |
196 | 89 | piIBOutput.resize(iIdxCnt); |
197 | 89 | std::vector<unsigned int>::iterator piCSIter = piIBOutput.begin(); |
198 | | |
199 | | // allocate the flag array to hold the information |
200 | | // whether a face has already been emitted or not |
201 | 89 | std::vector<bool> abEmitted(pMesh->mNumFaces, false); |
202 | | |
203 | | // dead-end vertex index stack |
204 | 89 | std::stack<unsigned int, std::vector<unsigned int>> sDeadEndVStack; |
205 | | |
206 | | // create a copy of the piNumTriPtr buffer |
207 | 89 | unsigned int *const piNumTriPtr = adj.mLiveTriangles; |
208 | 89 | const std::vector<unsigned int> piNumTriPtrNoModify(piNumTriPtr, piNumTriPtr + pMesh->mNumVertices); |
209 | | |
210 | | // get the largest number of referenced triangles and allocate the "candidate buffer" |
211 | 89 | unsigned int iMaxRefTris = 0; |
212 | 89 | { |
213 | 89 | const unsigned int *piCur = adj.mLiveTriangles; |
214 | 89 | const unsigned int *const piCurEnd = adj.mLiveTriangles + pMesh->mNumVertices; |
215 | 132k | for (; piCur != piCurEnd; ++piCur) { |
216 | 132k | iMaxRefTris = std::max(iMaxRefTris, *piCur); |
217 | 132k | } |
218 | 89 | } |
219 | 89 | ai_assert(iMaxRefTris > 0); |
220 | 89 | std::vector<unsigned int> piCandidates; |
221 | 89 | piCandidates.resize(iMaxRefTris * 3); |
222 | 89 | unsigned int iCacheMisses = 0; |
223 | | |
224 | | // ................................................................................... |
225 | | /** PSEUDOCODE for the algorithm |
226 | | |
227 | | A = Build-Adjacency(I) Vertex-triangle adjacency |
228 | | L = Get-Triangle-Counts(A) Per-vertex live triangle counts |
229 | | C = Zero(Vertex-Count(I)) Per-vertex caching time stamps |
230 | | D = Empty-Stack() Dead-end vertex stack |
231 | | E = False(Triangle-Count(I)) Per triangle emitted flag |
232 | | O = Empty-Index-Buffer() Empty output buffer |
233 | | f = 0 Arbitrary starting vertex |
234 | | s = k+1, i = 1 Time stamp and cursor |
235 | | while f >= 0 For all valid fanning vertices |
236 | | N = Empty-Set() 1-ring of next candidates |
237 | | for each Triangle t in Neighbors(A, f) |
238 | | if !Emitted(E,t) |
239 | | for each Vertex v in t |
240 | | Append(O,v) Output vertex |
241 | | Push(D,v) Add to dead-end stack |
242 | | Insert(N,v) Register as candidate |
243 | | L[v] = L[v]-1 Decrease live triangle count |
244 | | if s-C[v] > k If not in cache |
245 | | C[v] = s Set time stamp |
246 | | s = s+1 Increment time stamp |
247 | | E[t] = true Flag triangle as emitted |
248 | | Select next fanning vertex |
249 | | f = Get-Next-Vertex(I,i,k,N,C,s,L,D) |
250 | | return O |
251 | | */ |
252 | | // ................................................................................... |
253 | | |
254 | 89 | int ivdx = 0; |
255 | 89 | int ics = 1; |
256 | 89 | int iStampCnt = mConfigCacheDepth + 1; |
257 | 81.9k | while (ivdx >= 0) { |
258 | | |
259 | 81.8k | unsigned int icnt = piNumTriPtrNoModify[ivdx]; |
260 | 81.8k | unsigned int *piList = adj.GetAdjacentTriangles(ivdx); |
261 | 81.8k | std::vector<unsigned int>::iterator piCurCandidate = piCandidates.begin(); |
262 | | |
263 | | // get all triangles in the neighborhood |
264 | 451k | for (unsigned int tri = 0; tri < icnt; ++tri) { |
265 | | |
266 | | // if they have not yet been emitted, add them to the output IB |
267 | 369k | const unsigned int fidx = *piList++; |
268 | 369k | if (!abEmitted[fidx]) { |
269 | | |
270 | | // so iterate through all vertices of the current triangle |
271 | 175k | const aiFace *pcFace = &pMesh->mFaces[fidx]; |
272 | 175k | const unsigned nind = pcFace->mNumIndices; |
273 | 702k | for (unsigned ind = 0; ind < nind; ind++) { |
274 | 527k | unsigned dp = pcFace->mIndices[ind]; |
275 | | |
276 | | // the current vertex won't have any free triangles after this step |
277 | 527k | if (ivdx != (int)dp) { |
278 | | // append the vertex to the dead-end stack |
279 | 351k | sDeadEndVStack.push(dp); |
280 | | |
281 | | // register as candidate for the next step |
282 | 351k | *piCurCandidate++ = dp; |
283 | | |
284 | | // decrease the per-vertex triangle counts |
285 | 351k | piNumTriPtr[dp]--; |
286 | 351k | } |
287 | | |
288 | | // append the vertex to the output index buffer |
289 | 527k | *piCSIter++ = dp; |
290 | | |
291 | | // if the vertex is not yet in cache, set its cache count |
292 | 527k | if (iStampCnt - piCachingStamps[dp] > mConfigCacheDepth) { |
293 | 163k | piCachingStamps[dp] = iStampCnt++; |
294 | 163k | ++iCacheMisses; |
295 | 163k | } |
296 | 527k | } |
297 | | // flag triangle as emitted |
298 | 175k | abEmitted[fidx] = true; |
299 | 175k | } |
300 | 369k | } |
301 | | |
302 | | // the vertex has now no living adjacent triangles anymore |
303 | 81.8k | piNumTriPtr[ivdx] = 0; |
304 | | |
305 | | // get next fanning vertex |
306 | 81.8k | ivdx = -1; |
307 | 81.8k | int max_priority = -1; |
308 | 433k | for (std::vector<unsigned int>::iterator piCur = piCandidates.begin(); piCur != piCurCandidate; ++piCur) { |
309 | 351k | const unsigned int dp = *piCur; |
310 | | |
311 | | // must have live triangles |
312 | 351k | if (piNumTriPtr[dp] > 0) { |
313 | 295k | int priority = 0; |
314 | | |
315 | | // will the vertex be in cache, even after fanning occurs? |
316 | 295k | unsigned int tmp; |
317 | 295k | if ((tmp = iStampCnt - piCachingStamps[dp]) + 2 * piNumTriPtr[dp] <= mConfigCacheDepth) { |
318 | 283k | priority = tmp; |
319 | 283k | } |
320 | | |
321 | | // keep best candidate |
322 | 295k | if (priority > max_priority) { |
323 | 105k | max_priority = priority; |
324 | 105k | ivdx = dp; |
325 | 105k | } |
326 | 295k | } |
327 | 351k | } |
328 | | // did we reach a dead end? |
329 | 81.8k | if (-1 == ivdx) { |
330 | | // need to get a non-local vertex for which we have a good chance that it is still |
331 | | // in the cache ... |
332 | 355k | while (!sDeadEndVStack.empty()) { |
333 | 351k | unsigned int iCachedIdx = sDeadEndVStack.top(); |
334 | 351k | sDeadEndVStack.pop(); |
335 | 351k | if (piNumTriPtr[iCachedIdx] > 0) { |
336 | 5.10k | ivdx = iCachedIdx; |
337 | 5.10k | break; |
338 | 5.10k | } |
339 | 351k | } |
340 | | |
341 | 9.13k | if (-1 == ivdx) { |
342 | | // well, there isn't such a vertex. Simply get the next vertex in input order and |
343 | | // hope it is not too bad ... |
344 | 132k | while (ics < (int)pMesh->mNumVertices) { |
345 | 132k | ++ics; |
346 | 132k | if (piNumTriPtr[ics] > 0) { |
347 | 3.93k | ivdx = ics; |
348 | 3.93k | break; |
349 | 3.93k | } |
350 | 132k | } |
351 | 4.02k | } |
352 | 9.13k | } |
353 | 81.8k | } |
354 | 89 | ai_real fACMR2 = 0.0f; |
355 | 89 | if (!DefaultLogger::isNullLogger()) { |
356 | 0 | fACMR2 = static_cast<ai_real>(iCacheMisses / pMesh->mNumFaces); |
357 | 0 | const ai_real averageACMR = ((fACMR - fACMR2) / fACMR) * 100.f; |
358 | | // very intense verbose logging ... prepare for much text if there are many meshes |
359 | 0 | if (DefaultLogger::get()->getLogSeverity() == Logger::VERBOSE) { |
360 | 0 | ASSIMP_LOG_VERBOSE_DEBUG("Mesh ", meshNum, "| ACMR in: ", fACMR, " out: ", fACMR2, " | average ACMR ", averageACMR); |
361 | 0 | } |
362 | 0 | fACMR2 *= pMesh->mNumFaces; |
363 | 0 | } |
364 | | |
365 | | // sort the output index buffer back to the input array |
366 | 89 | piCSIter = piIBOutput.begin(); |
367 | 175k | for (aiFace *pcFace = pMesh->mFaces; pcFace != pcEnd; ++pcFace) { |
368 | 175k | unsigned nind = pcFace->mNumIndices; |
369 | 175k | unsigned *ind = pcFace->mIndices; |
370 | 175k | if (nind > 0) |
371 | 175k | ind[0] = *piCSIter++; |
372 | 175k | if (nind > 1) |
373 | 175k | ind[1] = *piCSIter++; |
374 | 175k | if (nind > 2) |
375 | 175k | ind[2] = *piCSIter++; |
376 | 175k | } |
377 | | |
378 | 89 | return fACMR2; |
379 | 93 | } |
380 | | |
381 | | } // namespace Assimp |