Coverage Report

Created: 2025-11-04 07:18

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/ogre/OgreMain/src/OgreInstanceBatchShader.cpp
Line
Count
Source
1
/*
2
-----------------------------------------------------------------------------
3
This source file is part of OGRE
4
(Object-oriented Graphics Rendering Engine)
5
For the latest info, see http://www.ogre3d.org/
6
7
Copyright (c) 2000-2014 Torus Knot Software Ltd
8
9
Permission is hereby granted, free of charge, to any person obtaining a copy
10
of this software and associated documentation files (the "Software"), to deal
11
in the Software without restriction, including without limitation the rights
12
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
13
copies of the Software, and to permit persons to whom the Software is
14
furnished to do so, subject to the following conditions:
15
16
The above copyright notice and this permission notice shall be included in
17
all copies or substantial portions of the Software.
18
19
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
20
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
21
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
22
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
23
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
24
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
25
THE SOFTWARE.
26
-----------------------------------------------------------------------------
27
*/
28
#include "OgreStableHeaders.h"
29
#include "OgreInstanceBatchShader.h"
30
#include "OgreRenderOperation.h"
31
#include "OgreInstancedEntity.h"
32
33
namespace Ogre
34
{
35
    InstanceBatchShader::InstanceBatchShader( InstanceManager *creator, MeshPtr &meshReference,
36
                                        const MaterialPtr &material, size_t instancesPerBatch,
37
                                        const Mesh::IndexMap *indexToBoneMap, const String &batchName ) :
38
0
                InstanceBatch( creator, meshReference, material, instancesPerBatch,
39
0
                                indexToBoneMap, batchName ),
40
0
                mNumWorldMatrices( instancesPerBatch )
41
0
    {
42
0
    }
43
44
    //-----------------------------------------------------------------------
45
    size_t InstanceBatchShader::calculateMaxNumInstances( const SubMesh *baseSubMesh, uint16 flags ) const
46
0
    {
47
0
        const size_t numBones = std::max<size_t>( 1, baseSubMesh->blendIndexToBoneIndexMap.size() );
48
49
0
        mMaterial->load();
50
0
        Technique *technique = mMaterial->getBestTechnique();
51
0
        if( technique )
52
0
        {
53
0
            GpuProgramParametersSharedPtr vertexParam = technique->getPass(0)->getVertexProgramParameters();
54
0
            for(auto& it : vertexParam->getConstantDefinitions().map)
55
0
            {
56
0
                const GpuConstantDefinition &constDef = it.second;
57
0
                if(((constDef.constType == GCT_MATRIX_3X4 ||
58
0
                    constDef.constType == GCT_MATRIX_4X3 ||             //OGL GLSL bitches without this
59
0
                    constDef.constType == GCT_MATRIX_2X4 ||
60
0
                    constDef.constType == GCT_FLOAT4)                   //OGL GLSL bitches without this
61
0
                    && constDef.isFloat()) ||
62
0
                   ((constDef.constType == GCT_MATRIX_DOUBLE_3X4 ||
63
0
                    constDef.constType == GCT_MATRIX_DOUBLE_4X3 ||      //OGL GLSL bitches without this
64
0
                    constDef.constType == GCT_MATRIX_DOUBLE_2X4 ||
65
0
                    constDef.constType == GCT_DOUBLE4)                  //OGL GLSL bitches without this
66
0
                    && constDef.isDouble())
67
0
                   )
68
0
                {
69
0
                    const GpuProgramParameters::AutoConstantEntry *entry =
70
0
                                    vertexParam->_findRawAutoConstantEntryFloat( constDef.physicalIndex );
71
0
                    if( entry && (entry->paramType == GpuProgramParameters::ACT_WORLD_MATRIX_ARRAY_3x4 || entry->paramType == GpuProgramParameters::ACT_WORLD_DUALQUATERNION_ARRAY_2x4))
72
0
                    {
73
                        //Material is correctly done!
74
0
                        size_t arraySize = constDef.arraySize;
75
76
                        //Deal with GL "hacky" way of doing 4x3 matrices
77
0
                        if(entry->paramType == GpuProgramParameters::ACT_WORLD_MATRIX_ARRAY_3x4 && constDef.constType == GCT_FLOAT4)
78
0
                            arraySize /= 3;
79
0
                        else if(entry->paramType == GpuProgramParameters::ACT_WORLD_DUALQUATERNION_ARRAY_2x4 && constDef.constType == GCT_FLOAT4)
80
0
                            arraySize /= 2;
81
82
                        //Check the num of arrays
83
0
                        size_t retVal = arraySize / numBones;
84
85
0
                        if( flags & IM_USE16BIT )
86
0
                        {
87
0
                            if( baseSubMesh->vertexData->vertexCount * retVal > 0xFFFF )
88
0
                                retVal = 0xFFFF / baseSubMesh->vertexData->vertexCount;
89
0
                        }
90
91
0
                        if((retVal < 3 && entry->paramType == GpuProgramParameters::ACT_WORLD_MATRIX_ARRAY_3x4) ||
92
0
                            (retVal < 2 && entry->paramType == GpuProgramParameters::ACT_WORLD_DUALQUATERNION_ARRAY_2x4))
93
0
                        {
94
0
                            LogManager::getSingleton().logWarning( "InstanceBatchShader: Mesh '" +
95
0
                                        mMeshReference->getName() + "' using material '" +
96
0
                                        mMaterial->getName() + "'. The amount of possible "
97
0
                                        "instances per batch is very low. Performance benefits will "
98
0
                                        "be minimal, if any. It might be even slower!");
99
0
                        }
100
101
0
                        return retVal;
102
0
                    }
103
0
                }
104
0
            }
105
106
0
            OGRE_EXCEPT(Exception::ERR_INVALIDPARAMS,
107
0
                        "Material '" + mMaterial->getName() + "' does not support hardware skinning");
108
0
        }
109
110
        //Reaching here the material is just unsupported.
111
112
0
        return 0;
113
0
    }
114
    //-----------------------------------------------------------------------
115
    void InstanceBatchShader::buildFrom( const SubMesh *baseSubMesh, const RenderOperation &renderOperation )
116
0
    {
117
0
        if( mMeshReference->hasSkeleton() && mMeshReference->getSkeleton() )
118
0
            mNumWorldMatrices = mInstancesPerBatch * baseSubMesh->blendIndexToBoneIndexMap.size();
119
0
        InstanceBatch::buildFrom( baseSubMesh, renderOperation );
120
0
    }
121
    //-----------------------------------------------------------------------
122
    void InstanceBatchShader::setupVertices( const SubMesh* baseSubMesh )
123
0
    {
124
0
        mRenderOperation.vertexData = OGRE_NEW VertexData();
125
0
        mRemoveOwnVertexData = true; //Raise flag to remove our own vertex data in the end (not always needed)
126
127
0
        VertexData *thisVertexData = mRenderOperation.vertexData;
128
0
        VertexData *baseVertexData = baseSubMesh->vertexData;
129
130
0
        thisVertexData->vertexStart = 0;
131
0
        thisVertexData->vertexCount = baseVertexData->vertexCount * mInstancesPerBatch;
132
133
0
        HardwareBufferManager::getSingleton().destroyVertexDeclaration( thisVertexData->vertexDeclaration );
134
0
        thisVertexData->vertexDeclaration = baseVertexData->vertexDeclaration->clone();
135
136
0
        if( mMeshReference->hasSkeleton() && mMeshReference->getSkeleton() )
137
0
        {
138
            //Building hw skinned batches follow a different path
139
0
            setupHardwareSkinned( baseSubMesh, thisVertexData, baseVertexData );
140
0
            return;
141
0
        }
142
143
        //TODO: Can't we, instead of using another source, put the index ID in the same source?
144
0
        thisVertexData->vertexDeclaration->addElement(
145
0
                                        thisVertexData->vertexDeclaration->getMaxSource() + 1, 0,
146
0
                                        VET_UBYTE4, VES_BLEND_INDICES );
147
148
149
0
        for( uint16 i=0; i<thisVertexData->vertexDeclaration->getMaxSource(); ++i )
150
0
        {
151
            //Create our own vertex buffer
152
0
            HardwareVertexBufferSharedPtr vertexBuffer =
153
0
                                            HardwareBufferManager::getSingleton().createVertexBuffer(
154
0
                                            thisVertexData->vertexDeclaration->getVertexSize(i),
155
0
                                            thisVertexData->vertexCount,
156
0
                                            HardwareBuffer::HBU_STATIC_WRITE_ONLY );
157
0
            thisVertexData->vertexBufferBinding->setBinding( i, vertexBuffer );
158
159
            //Grab the base submesh data
160
0
            HardwareVertexBufferSharedPtr baseVertexBuffer =
161
0
                                                    baseVertexData->vertexBufferBinding->getBuffer(i);
162
163
0
            HardwareBufferLockGuard thisLock(vertexBuffer, HardwareBuffer::HBL_DISCARD);
164
0
            HardwareBufferLockGuard baseLock(baseVertexBuffer, HardwareBuffer::HBL_READ_ONLY);
165
0
            char* thisBuf = static_cast<char*>(thisLock.pData);
166
0
            char* baseBuf = static_cast<char*>(baseLock.pData);
167
168
            //Copy and repeat
169
0
            for( size_t j=0; j<mInstancesPerBatch; ++j )
170
0
            {
171
0
                const size_t sizeOfBuffer = baseVertexData->vertexCount *
172
0
                                            baseVertexData->vertexDeclaration->getVertexSize(i);
173
0
                memcpy( thisBuf + j * sizeOfBuffer, baseBuf, sizeOfBuffer );
174
0
            }
175
0
        }
176
177
0
        {
178
            //Now create the vertices "index ID" to individualize each instance
179
0
            const unsigned short lastSource = thisVertexData->vertexDeclaration->getMaxSource();
180
0
            HardwareVertexBufferSharedPtr vertexBuffer =
181
0
                                            HardwareBufferManager::getSingleton().createVertexBuffer(
182
0
                                            thisVertexData->vertexDeclaration->getVertexSize( lastSource ),
183
0
                                            thisVertexData->vertexCount,
184
0
                                            HardwareBuffer::HBU_STATIC_WRITE_ONLY );
185
0
            thisVertexData->vertexBufferBinding->setBinding( lastSource, vertexBuffer );
186
187
0
            HardwareBufferLockGuard thisLock(vertexBuffer, HardwareBuffer::HBL_DISCARD);
188
0
            char* thisBuf = static_cast<char*>(thisLock.pData);
189
0
            for( uint8 j=0; j<uint8(mInstancesPerBatch); ++j )
190
0
            {
191
0
                for( size_t k=0; k<baseVertexData->vertexCount; ++k )
192
0
                {
193
0
                    *thisBuf++ = j;
194
0
                    *thisBuf++ = j;
195
0
                    *thisBuf++ = j;
196
0
                    *thisBuf++ = j;
197
0
                }
198
0
            }
199
200
0
        }
201
0
    }
202
    //-----------------------------------------------------------------------
203
    void InstanceBatchShader::setupIndices( const SubMesh* baseSubMesh )
204
0
    {
205
0
        mRenderOperation.indexData = OGRE_NEW IndexData();
206
0
        mRemoveOwnIndexData = true; //Raise flag to remove our own index data in the end (not always needed)
207
208
0
        IndexData *thisIndexData = mRenderOperation.indexData;
209
0
        IndexData *baseIndexData = baseSubMesh->indexData;
210
211
0
        thisIndexData->indexStart = 0;
212
0
        thisIndexData->indexCount = baseIndexData->indexCount * mInstancesPerBatch;
213
214
        //TODO: Check numVertices is below max supported by GPU
215
0
        HardwareIndexBuffer::IndexType indexType = HardwareIndexBuffer::IT_16BIT;
216
0
        if( mRenderOperation.vertexData->vertexCount > 65535 )
217
0
            indexType = HardwareIndexBuffer::IT_32BIT;
218
0
        thisIndexData->indexBuffer = HardwareBufferManager::getSingleton().createIndexBuffer(
219
0
            indexType, thisIndexData->indexCount, HardwareBuffer::HBU_STATIC_WRITE_ONLY );
220
221
0
        HardwareBufferLockGuard thisLock(thisIndexData->indexBuffer, HardwareBuffer::HBL_DISCARD);
222
0
        HardwareBufferLockGuard baseLock(baseIndexData->indexBuffer, HardwareBuffer::HBL_READ_ONLY);
223
0
        uint16 *thisBuf16 = static_cast<uint16*>(thisLock.pData);
224
0
        uint32 *thisBuf32 = static_cast<uint32*>(thisLock.pData);
225
0
        bool baseIndex16bit = baseIndexData->indexBuffer->getType() == HardwareIndexBuffer::IT_16BIT;
226
227
0
        for( size_t i=0; i<mInstancesPerBatch; ++i )
228
0
        {
229
0
            const size_t vertexOffset = i * mRenderOperation.vertexData->vertexCount / mInstancesPerBatch;
230
231
0
            const uint16 *initBuf16 = static_cast<const uint16 *>(baseLock.pData);
232
0
            const uint32 *initBuf32 = static_cast<const uint32 *>(baseLock.pData);
233
234
0
            for( size_t j=0; j<baseIndexData->indexCount; ++j )
235
0
            {
236
0
                uint32 originalVal = baseIndex16bit ? *initBuf16++ : *initBuf32++;
237
238
0
                if( indexType == HardwareIndexBuffer::IT_16BIT )
239
0
                    *thisBuf16++ = static_cast<uint16>(originalVal + vertexOffset);
240
0
                else
241
0
                    *thisBuf32++ = static_cast<uint32>(originalVal + vertexOffset);
242
0
            }
243
0
        }
244
0
    }
245
    //-----------------------------------------------------------------------
246
    void InstanceBatchShader::setupHardwareSkinned( const SubMesh* baseSubMesh, VertexData *thisVertexData,
247
                                                    VertexData *baseVertexData )
248
0
    {
249
0
        const uint8 numBones = uint8(baseSubMesh->blendIndexToBoneIndexMap.size());
250
0
        mNumWorldMatrices = mInstancesPerBatch * numBones;
251
252
0
        for( uint16 i=0; i<=thisVertexData->vertexDeclaration->getMaxSource(); ++i )
253
0
        {
254
            //Create our own vertex buffer
255
0
            HardwareVertexBufferSharedPtr vertexBuffer =
256
0
                                            HardwareBufferManager::getSingleton().createVertexBuffer(
257
0
                                            thisVertexData->vertexDeclaration->getVertexSize(i),
258
0
                                            thisVertexData->vertexCount,
259
0
                                            HardwareBuffer::HBU_STATIC_WRITE_ONLY );
260
0
            thisVertexData->vertexBufferBinding->setBinding( i, vertexBuffer );
261
262
0
            VertexDeclaration::VertexElementList veList =
263
0
                                            thisVertexData->vertexDeclaration->findElementsBySource(i);
264
265
            //Grab the base submesh data
266
0
            HardwareVertexBufferSharedPtr baseVertexBuffer =
267
0
                                                    baseVertexData->vertexBufferBinding->getBuffer(i);
268
269
0
            HardwareBufferLockGuard thisVertexLock(vertexBuffer, HardwareBuffer::HBL_DISCARD);
270
0
            HardwareBufferLockGuard baseVertexLock(baseVertexBuffer, HardwareBuffer::HBL_READ_ONLY);
271
0
            char* thisBuf = static_cast<char*>(thisVertexLock.pData);
272
0
            char* baseBuf = static_cast<char*>(baseVertexLock.pData);
273
0
            char *startBuf = baseBuf;
274
275
            //Copy and repeat
276
0
            for (uint8 j = 0; j < uint8(mInstancesPerBatch); ++j)
277
0
            {
278
                //Repeat source
279
0
                baseBuf = startBuf;
280
281
0
                for (size_t k = 0; k < baseVertexData->vertexCount; ++k)
282
0
                {
283
0
                    for (auto& vl : veList)
284
0
                    {
285
0
                        switch (vl.getSemantic())
286
0
                        {
287
0
                        case VES_BLEND_INDICES:
288
0
                        *(thisBuf + vl.getOffset() + 0) = *(baseBuf + vl.getOffset() + 0) + j * numBones;
289
0
                        *(thisBuf + vl.getOffset() + 1) = *(baseBuf + vl.getOffset() + 1) + j * numBones;
290
0
                        *(thisBuf + vl.getOffset() + 2) = *(baseBuf + vl.getOffset() + 2) + j * numBones;
291
0
                        *(thisBuf + vl.getOffset() + 3) = *(baseBuf + vl.getOffset() + 3) + j * numBones;
292
0
                            break;
293
0
                        default:
294
0
                            memcpy( thisBuf + vl.getOffset(), baseBuf + vl.getOffset(), vl.getSize() );
295
0
                            break;
296
0
                        }
297
0
                    }
298
0
                    thisBuf += baseVertexData->vertexDeclaration->getVertexSize(i);
299
0
                    baseBuf += baseVertexData->vertexDeclaration->getVertexSize(i);
300
0
                }
301
0
            }
302
0
        }
303
0
    }
304
    //-----------------------------------------------------------------------
305
    void InstanceBatchShader::getWorldTransforms( Matrix4* xform ) const
306
0
    {
307
0
        if (MeshManager::getBonesUseObjectSpace())
308
0
        {
309
0
            *xform = Affine3::IDENTITY;
310
0
            xform++;
311
0
        }
312
313
0
        for (auto *e : mInstancedEntities)
314
0
        {
315
0
            xform += e->getTransforms(xform);
316
0
        }
317
0
    }
318
    //-----------------------------------------------------------------------
319
    unsigned short InstanceBatchShader::getNumWorldTransforms(void) const
320
0
    {
321
0
        return uint16(mNumWorldMatrices) + uint16(MeshManager::getBonesUseObjectSpace());
322
0
    }
323
}