Coverage Report

Created: 2026-05-24 07:02

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/ogre/OgreMain/src/OgreOptimisedUtilGeneral.cpp
Line
Count
Source
1
/*
2
-----------------------------------------------------------------------------
3
This source file is part of OGRE
4
    (Object-oriented Graphics Rendering Engine)
5
For the latest info, see http://www.ogre3d.org/
6
7
Copyright (c) 2000-2014 Torus Knot Software Ltd
8
9
Permission is hereby granted, free of charge, to any person obtaining a copy
10
of this software and associated documentation files (the "Software"), to deal
11
in the Software without restriction, including without limitation the rights
12
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
13
copies of the Software, and to permit persons to whom the Software is
14
furnished to do so, subject to the following conditions:
15
16
The above copyright notice and this permission notice shall be included in
17
all copies or substantial portions of the Software.
18
19
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
20
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
21
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
22
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
23
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
24
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
25
THE SOFTWARE.
26
-----------------------------------------------------------------------------
27
*/
28
#include "OgreStableHeaders.h"
29
30
#include "OgreOptimisedUtil.h"
31
32
namespace Ogre {
33
34
//-------------------------------------------------------------------------
35
// Local classes
36
//-------------------------------------------------------------------------
37
38
    /** General implementation of OptimisedUtil.
39
    @note
40
        Don't use this class directly, use OptimisedUtil instead.
41
    */
42
    class _OgrePrivate OptimisedUtilGeneral : public OptimisedUtil
43
    {
44
    public:
45
        /// @copydoc OptimisedUtil::softwareVertexSkinning
46
        void softwareVertexSkinning(
47
            const float *srcPosPtr, float *destPosPtr,
48
            const float *srcNormPtr, float *destNormPtr,
49
            const float *blendWeightPtr, const unsigned char* blendIndexPtr,
50
            const Affine3* const* blendMatrices,
51
            size_t srcPosStride, size_t destPosStride,
52
            size_t srcNormStride, size_t destNormStride,
53
            size_t blendWeightStride, size_t blendIndexStride,
54
            size_t numWeightsPerVertex,
55
            size_t numVertices) override;
56
57
        /// @copydoc OptimisedUtil::softwareVertexMorph
58
        void softwareVertexMorph(
59
            float t,
60
            const float *srcPos1, const float *srcPos2,
61
            float *dstPos,
62
            size_t pos1VSize, size_t pos2VSize, size_t dstVSize, 
63
            size_t numVertices,
64
            bool morphNormals) override;
65
66
        /// @copydoc OptimisedUtil::concatenateAffineMatrices
67
        void concatenateAffineMatrices(
68
            const Affine3& baseMatrix,
69
            const Affine3* srcMatrices,
70
            Affine3* dstMatrices,
71
            size_t numMatrices) override;
72
73
        /// @copydoc OptimisedUtil::calculateFaceNormals
74
        void calculateFaceNormals(
75
            const float *positions,
76
            const EdgeData::Triangle *triangles,
77
            Vector4 *faceNormals,
78
            size_t numTriangles) override;
79
80
        /// @copydoc OptimisedUtil::calculateLightFacing
81
        void calculateLightFacing(
82
            const Vector4& lightPos,
83
            const Vector4* faceNormals,
84
            char* lightFacings,
85
            size_t numFaces) override;
86
87
        /// @copydoc OptimisedUtil::extrudeVertices
88
        void extrudeVertices(
89
            const Vector4& lightPos,
90
            Real extrudeDist,
91
            const float* srcPositions,
92
            float* destPositions,
93
            size_t numVertices) override;
94
    };
95
    //---------------------------------------------------------------------
96
    //---------------------------------------------------------------------
97
    //---------------------------------------------------------------------
98
    void OptimisedUtilGeneral::softwareVertexSkinning(
99
        const float *pSrcPos, float *pDestPos,
100
        const float *pSrcNorm, float *pDestNorm,
101
        const float *pBlendWeight, const unsigned char* pBlendIndex,
102
        const Affine3* const* blendMatrices,
103
        size_t srcPosStride, size_t destPosStride,
104
        size_t srcNormStride, size_t destNormStride,
105
        size_t blendWeightStride, size_t blendIndexStride,
106
        size_t numWeightsPerVertex,
107
        size_t numVertices)
108
0
    {
109
        // Source vectors
110
0
        Vector3 sourceVec = Vector3::ZERO, sourceNorm = Vector3::ZERO;
111
        // Accumulation vectors
112
0
        Vector3 accumVecPos, accumVecNorm;
113
114
        // Loop per vertex
115
0
        for (size_t vertIdx = 0; vertIdx < numVertices; ++vertIdx)
116
0
        {
117
            // Load source vertex elements
118
0
            sourceVec.x = pSrcPos[0];
119
0
            sourceVec.y = pSrcPos[1];
120
0
            sourceVec.z = pSrcPos[2];
121
122
0
            if (pSrcNorm)
123
0
            {
124
0
                sourceNorm.x = pSrcNorm[0];
125
0
                sourceNorm.y = pSrcNorm[1];
126
0
                sourceNorm.z = pSrcNorm[2];
127
0
            }
128
129
            // Load accumulators
130
0
            accumVecPos = Vector3::ZERO;
131
0
            accumVecNorm = Vector3::ZERO;
132
133
            // Loop per blend weight
134
            //
135
            // Note: Don't change "unsigned short" here!!! If use "size_t" instead,
136
            // VC7.1 unroll this loop to four blend weights pre-iteration, and then
137
            // loss performance 10% in this function. Ok, this give a hint that we
138
            // should unroll this loop manually for better performance, will do that
139
            // later.
140
            //
141
0
            for (unsigned short blendIdx = 0; blendIdx < numWeightsPerVertex; ++blendIdx)
142
0
            {
143
                // Blend by multiplying source by blend matrix and scaling by weight
144
                // Add to accumulator
145
                // NB weights must be normalised!!
146
0
                Real weight = pBlendWeight[blendIdx];
147
0
                if (weight)
148
0
                {
149
                    // Blend position, use 3x4 matrix
150
0
                    const Affine3& mat = *blendMatrices[pBlendIndex[blendIdx]];
151
0
                    accumVecPos.x +=
152
0
                        (mat[0][0] * sourceVec.x +
153
0
                         mat[0][1] * sourceVec.y +
154
0
                         mat[0][2] * sourceVec.z +
155
0
                         mat[0][3])
156
0
                         * weight;
157
0
                    accumVecPos.y +=
158
0
                        (mat[1][0] * sourceVec.x +
159
0
                         mat[1][1] * sourceVec.y +
160
0
                         mat[1][2] * sourceVec.z +
161
0
                         mat[1][3])
162
0
                         * weight;
163
0
                    accumVecPos.z +=
164
0
                        (mat[2][0] * sourceVec.x +
165
0
                         mat[2][1] * sourceVec.y +
166
0
                         mat[2][2] * sourceVec.z +
167
0
                         mat[2][3])
168
0
                         * weight;
169
0
                    if (pSrcNorm)
170
0
                    {
171
                        // Blend normal
172
                        // We should blend by inverse transpose here, but because we're assuming the 3x3
173
                        // aspect of the matrix is orthogonal (no non-uniform scaling), the inverse transpose
174
                        // is equal to the main 3x3 matrix
175
                        // Note because it's a normal we just extract the rotational part, saves us renormalising here
176
0
                        accumVecNorm.x +=
177
0
                            (mat[0][0] * sourceNorm.x +
178
0
                             mat[0][1] * sourceNorm.y +
179
0
                             mat[0][2] * sourceNorm.z)
180
0
                             * weight;
181
0
                        accumVecNorm.y +=
182
0
                            (mat[1][0] * sourceNorm.x +
183
0
                             mat[1][1] * sourceNorm.y +
184
0
                             mat[1][2] * sourceNorm.z)
185
0
                            * weight;
186
0
                        accumVecNorm.z +=
187
0
                            (mat[2][0] * sourceNorm.x +
188
0
                             mat[2][1] * sourceNorm.y +
189
0
                             mat[2][2] * sourceNorm.z)
190
0
                            * weight;
191
0
                    }
192
0
                }
193
0
            }
194
195
            // Stored blended vertex in hardware buffer
196
0
            pDestPos[0] = accumVecPos.x;
197
0
            pDestPos[1] = accumVecPos.y;
198
0
            pDestPos[2] = accumVecPos.z;
199
200
            // Stored blended vertex in temp buffer
201
0
            if (pSrcNorm)
202
0
            {
203
                // Normalise
204
0
                accumVecNorm.normalise();
205
0
                pDestNorm[0] = accumVecNorm.x;
206
0
                pDestNorm[1] = accumVecNorm.y;
207
0
                pDestNorm[2] = accumVecNorm.z;
208
                // Advance pointers
209
0
                advanceRawPointer(pSrcNorm, srcNormStride);
210
0
                advanceRawPointer(pDestNorm, destNormStride);
211
0
            }
212
213
            // Advance pointers
214
0
            advanceRawPointer(pSrcPos, srcPosStride);
215
0
            advanceRawPointer(pDestPos, destPosStride);
216
0
            advanceRawPointer(pBlendWeight, blendWeightStride);
217
0
            advanceRawPointer(pBlendIndex, blendIndexStride);
218
0
        }
219
0
    }
220
    //---------------------------------------------------------------------
221
    void OptimisedUtilGeneral::concatenateAffineMatrices(
222
        const Affine3& baseMatrix,
223
        const Affine3* pSrcMat,
224
        Affine3* pDstMat,
225
        size_t numMatrices)
226
0
    {
227
0
        for (size_t i = 0; i < numMatrices; ++i)
228
0
        {
229
0
            *pDstMat = baseMatrix * *pSrcMat ;
230
231
0
            ++pSrcMat;
232
0
            ++pDstMat;
233
0
        }
234
0
    }
235
    //---------------------------------------------------------------------
236
    void OptimisedUtilGeneral::softwareVertexMorph(
237
        float t,
238
        const float *pSrc1, const float *pSrc2,
239
        float *pDst,
240
        size_t pos1VSize, size_t pos2VSize, size_t dstVSize,
241
        size_t numVertices,
242
        bool morphNormals)
243
0
    {
244
0
        size_t src1Skip = pos1VSize/sizeof(float) - 3 - (morphNormals ? 3 : 0);
245
0
        size_t src2Skip = pos2VSize/sizeof(float) - 3 - (morphNormals ? 3 : 0);
246
0
        size_t dstSkip = dstVSize/sizeof(float) - 3 - (morphNormals ? 3 : 0);
247
        
248
0
        Vector3f nlerpNormal;
249
0
        for (size_t i = 0; i < numVertices; ++i)
250
0
        {
251
            // x
252
0
            *pDst++ = *pSrc1 + t * (*pSrc2 - *pSrc1) ;
253
0
            ++pSrc1; ++pSrc2;
254
            // y
255
0
            *pDst++ = *pSrc1 + t * (*pSrc2 - *pSrc1) ;
256
0
            ++pSrc1; ++pSrc2;
257
            // z
258
0
            *pDst++ = *pSrc1 + t * (*pSrc2 - *pSrc1) ;
259
0
            ++pSrc1; ++pSrc2;
260
            
261
0
            if (morphNormals)
262
0
            {
263
                // normals must be in the same buffer as pos
264
                // perform an nlerp
265
                // we don't have enough information for a spherical interp
266
0
                nlerpNormal[0] = *pSrc1 + t * (*pSrc2 - *pSrc1);
267
0
                ++pSrc1; ++pSrc2;
268
0
                nlerpNormal[1] = *pSrc1 + t * (*pSrc2 - *pSrc1);
269
0
                ++pSrc1; ++pSrc2;
270
0
                nlerpNormal[2] = *pSrc1 + t * (*pSrc2 - *pSrc1);
271
0
                ++pSrc1; ++pSrc2;
272
0
                nlerpNormal.normalise();
273
0
                *pDst++ = nlerpNormal[0];
274
0
                *pDst++ = nlerpNormal[1];
275
0
                *pDst++ = nlerpNormal[2];
276
0
            }
277
            
278
0
            pSrc1 += src1Skip;
279
0
            pSrc2 += src2Skip;
280
0
            pDst += dstSkip;
281
            
282
0
        }
283
0
    }
284
    //---------------------------------------------------------------------
285
    void OptimisedUtilGeneral::calculateFaceNormals(
286
        const float *positions,
287
        const EdgeData::Triangle *triangles,
288
        Vector4 *faceNormals,
289
        size_t numTriangles)
290
0
    {
291
0
        for ( ; numTriangles; --numTriangles)
292
0
        {
293
0
            const EdgeData::Triangle& t = *triangles++;
294
0
            size_t offset;
295
296
0
            offset = t.vertIndex[0] * 3;
297
0
            Vector3 v1(positions[offset+0], positions[offset+1], positions[offset+2]);
298
299
0
            offset = t.vertIndex[1] * 3;
300
0
            Vector3 v2(positions[offset+0], positions[offset+1], positions[offset+2]);
301
302
0
            offset = t.vertIndex[2] * 3;
303
0
            Vector3 v3(positions[offset+0], positions[offset+1], positions[offset+2]);
304
305
0
            *faceNormals++ = Math::calculateFaceNormalWithoutNormalize(v1, v2, v3);
306
0
        }
307
0
    }
308
    //---------------------------------------------------------------------
309
    void OptimisedUtilGeneral::calculateLightFacing(
310
        const Vector4& lightPos,
311
        const Vector4* faceNormals,
312
        char* lightFacings,
313
        size_t numFaces)
314
0
    {
315
0
        for (size_t i = 0; i < numFaces; ++i)
316
0
        {
317
0
            *lightFacings++ = (lightPos.dotProduct(*faceNormals++) > 0);
318
0
        }
319
0
    }
320
    //---------------------------------------------------------------------
321
    void OptimisedUtilGeneral::extrudeVertices(
322
        const Vector4& lightPos,
323
        Real extrudeDist,
324
        const float* pSrcPos,
325
        float* pDestPos,
326
        size_t numVertices)
327
0
    {
328
0
        if (lightPos.w == 0.0f)
329
0
        {
330
            // Directional light, extrusion is along light direction
331
332
0
            Vector3 extrusionDir(
333
0
                -lightPos.x,
334
0
                -lightPos.y,
335
0
                -lightPos.z);
336
0
            extrusionDir.normalise();
337
0
            extrusionDir *= extrudeDist;
338
339
0
            for (size_t vert = 0; vert < numVertices; ++vert)
340
0
            {
341
0
                *pDestPos++ = *pSrcPos++ + extrusionDir.x;
342
0
                *pDestPos++ = *pSrcPos++ + extrusionDir.y;
343
0
                *pDestPos++ = *pSrcPos++ + extrusionDir.z;
344
0
            }
345
0
        }
346
0
        else
347
0
        {
348
            // Point light, calculate extrusionDir for every vertex
349
0
            assert(lightPos.w == 1.0f);
350
351
0
            for (size_t vert = 0; vert < numVertices; ++vert)
352
0
            {
353
0
                Vector3 extrusionDir(
354
0
                    pSrcPos[0] - lightPos.x,
355
0
                    pSrcPos[1] - lightPos.y,
356
0
                    pSrcPos[2] - lightPos.z);
357
0
                extrusionDir.normalise();
358
0
                extrusionDir *= extrudeDist;
359
360
0
                *pDestPos++ = *pSrcPos++ + extrusionDir.x;
361
0
                *pDestPos++ = *pSrcPos++ + extrusionDir.y;
362
0
                *pDestPos++ = *pSrcPos++ + extrusionDir.z;
363
0
            }
364
0
        }
365
0
    }
366
    //---------------------------------------------------------------------
367
    //---------------------------------------------------------------------
368
    //---------------------------------------------------------------------
369
    extern OptimisedUtil* _getOptimisedUtilGeneral(void);
370
    extern OptimisedUtil* _getOptimisedUtilGeneral(void)
371
0
    {
372
0
        static OptimisedUtilGeneral msOptimisedUtilGeneral;
373
0
        return &msOptimisedUtilGeneral;
374
0
    }
375
376
}