Coverage Report

Created: 2025-11-09 06:57

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/ogre/OgreMain/src/OgreInstanceBatchVTF.cpp
Line
Count
Source
1
/*
2
-----------------------------------------------------------------------------
3
This source file is part of OGRE
4
(Object-oriented Graphics Rendering Engine)
5
For the latest info, see http://www.ogre3d.org/
6
7
Copyright (c) 2000-2014 Torus Knot Software Ltd
8
9
Permission is hereby granted, free of charge, to any person obtaining a copy
10
of this software and associated documentation files (the "Software"), to deal
11
in the Software without restriction, including without limitation the rights
12
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
13
copies of the Software, and to permit persons to whom the Software is
14
furnished to do so, subject to the following conditions:
15
16
The above copyright notice and this permission notice shall be included in
17
all copies or substantial portions of the Software.
18
19
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
20
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
21
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
22
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
23
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
24
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
25
THE SOFTWARE.
26
-----------------------------------------------------------------------------
27
*/
28
#include "OgreStableHeaders.h"
29
#include "OgreInstanceBatchVTF.h"
30
#include "OgreHardwarePixelBuffer.h"
31
#include "OgreInstancedEntity.h"
32
#include "OgreMaterial.h"
33
#include "OgreDualQuaternion.h"
34
35
namespace Ogre
36
{
37
    static const uint16 c_maxTexWidth   = 4096;
38
    static const uint16 c_maxTexHeight  = 4096;
39
40
    BaseInstanceBatchVTF::BaseInstanceBatchVTF( InstanceManager *creator, MeshPtr &meshReference,
41
                                        const MaterialPtr &material, size_t instancesPerBatch,
42
                                        const Mesh::IndexMap *indexToBoneMap, const String &batchName) :
43
0
                InstanceBatch( creator, meshReference, material, instancesPerBatch,
44
0
                                indexToBoneMap, batchName ),
45
0
                mMatricesPerInstance(0),
46
0
                mNumWorldMatrices( instancesPerBatch ),
47
0
                mWidthFloatsPadding( 0 ),
48
0
                mMaxFloatsPerLine( std::numeric_limits<size_t>::max() ),
49
0
                mRowLength(3),
50
0
                mWeightCount(1),
51
0
                mTempTransformsArray3x4(0),
52
0
                mUseBoneMatrixLookup(false),
53
0
                mMaxLookupTableInstances(16),
54
0
                mUseBoneDualQuaternions(false),
55
0
                mForceOneWeight(false),
56
0
                mUseOneWeight(false)
57
0
    {
58
0
        cloneMaterial( mMaterial );
59
0
    }
60
61
    BaseInstanceBatchVTF::~BaseInstanceBatchVTF()
62
0
    {
63
        //Remove cloned caster materials (if any)
64
0
        Material::Techniques::const_iterator it;
65
0
        for(it = mMaterial->getTechniques().begin(); it != mMaterial->getTechniques().end(); ++it)
66
0
        {
67
0
            Technique *technique = *it;
68
69
0
            if (technique->getShadowCasterMaterial())
70
0
                MaterialManager::getSingleton().remove( technique->getShadowCasterMaterial() );
71
0
        }
72
73
        //Remove cloned material
74
0
        MaterialManager::getSingleton().remove( mMaterial );
75
76
        //Remove the VTF texture
77
0
        if( mMatrixTexture )
78
0
            TextureManager::getSingleton().remove( mMatrixTexture );
79
80
0
        delete[] mTempTransformsArray3x4;
81
0
    }
82
83
    //-----------------------------------------------------------------------
84
    void BaseInstanceBatchVTF::buildFrom( const SubMesh *baseSubMesh, const RenderOperation &renderOperation )
85
0
    {
86
0
        if (useBoneMatrixLookup())
87
0
        {
88
            //when using bone matrix lookup resource are not shared
89
            //
90
            //Future implementation: while the instance vertex buffer can't be shared
91
            //The texture can be.
92
            //
93
0
            build(baseSubMesh);
94
0
        }
95
0
        else
96
0
        {
97
0
            createVertexTexture( baseSubMesh );
98
0
            InstanceBatch::buildFrom( baseSubMesh, renderOperation );
99
0
        }
100
0
    }
101
    //-----------------------------------------------------------------------
102
    void BaseInstanceBatchVTF::cloneMaterial( const MaterialPtr &material )
103
0
    {
104
        //Used to track down shadow casters, so the same material caster doesn't get cloned twice
105
0
        typedef std::map<String, MaterialPtr> MatMap;
106
0
        MatMap clonedMaterials;
107
108
        //We need to clone the material so we can have different textures for each batch.
109
0
        mMaterial = material->clone( mName + "/VTFMaterial" );
110
111
        //Now do the same with the techniques which have a material shadow caster
112
0
        Material::Techniques::const_iterator it;
113
0
        for(it = material->getTechniques().begin(); it != material->getTechniques().end(); ++it)
114
0
        {
115
0
            Technique *technique = *it;
116
117
0
            if( technique->getShadowCasterMaterial() )
118
0
            {
119
0
                const MaterialPtr &casterMat    = technique->getShadowCasterMaterial();
120
0
                const String &casterName        = casterMat->getName();
121
122
                //Was this material already cloned?
123
0
                MatMap::const_iterator itor = clonedMaterials.find(casterName);
124
125
0
                if( itor == clonedMaterials.end() )
126
0
                {
127
                    //No? Clone it and track it
128
0
                    MaterialPtr cloned = casterMat->clone( mName + "/VTFMaterialCaster" +
129
0
                                                    StringConverter::toString(clonedMaterials.size()) );
130
0
                    technique->setShadowCasterMaterial( cloned );
131
0
                    clonedMaterials[casterName] = cloned;
132
0
                }
133
0
                else
134
0
                    technique->setShadowCasterMaterial( itor->second ); //Reuse the previously cloned mat
135
0
            }
136
0
        }
137
0
    }
138
    //-----------------------------------------------------------------------
139
    void BaseInstanceBatchVTF::retrieveBoneIdx( VertexData *baseVertexData, HWBoneIdxVec &outBoneIdx )
140
0
    {
141
0
        const VertexElement *ve = baseVertexData->vertexDeclaration->
142
0
                                                            findElementBySemantic( VES_BLEND_INDICES );
143
0
        const VertexElement *veWeights = baseVertexData->vertexDeclaration->findElementBySemantic( VES_BLEND_WEIGHTS );
144
        
145
0
        HardwareVertexBufferSharedPtr buff = baseVertexData->vertexBufferBinding->getBuffer(ve->getSource());
146
0
        HardwareBufferLockGuard baseVertexLock(buff, HardwareBuffer::HBL_READ_ONLY);
147
0
        char const *baseBuffer = static_cast<char const*>(baseVertexLock.pData);
148
149
0
        for( size_t i=0; i<baseVertexData->vertexCount; ++i )
150
0
        {
151
0
            float const *pWeights = reinterpret_cast<float const*>(baseBuffer + veWeights->getOffset());
152
153
0
            uint8 biggestWeightIdx = 0;
154
0
            for( uint8 j=1; j< uint8(mWeightCount); ++j )
155
0
            {
156
0
                biggestWeightIdx = pWeights[biggestWeightIdx] < pWeights[j] ? j : biggestWeightIdx;
157
0
            }
158
159
0
            uint8 const *pIndex = reinterpret_cast<uint8 const*>(baseBuffer + ve->getOffset());
160
0
            outBoneIdx[i] = pIndex[biggestWeightIdx];
161
162
0
            baseBuffer += baseVertexData->vertexDeclaration->getVertexSize(ve->getSource());
163
0
        }
164
0
    }
165
166
    //-----------------------------------------------------------------------
167
    void BaseInstanceBatchVTF::retrieveBoneIdxWithWeights(VertexData *baseVertexData, HWBoneIdxVec &outBoneIdx, HWBoneWgtVec &outBoneWgt)
168
0
    {
169
0
        const VertexElement *ve = baseVertexData->vertexDeclaration->findElementBySemantic( VES_BLEND_INDICES );
170
0
        const VertexElement *veWeights = baseVertexData->vertexDeclaration->findElementBySemantic( VES_BLEND_WEIGHTS );
171
        
172
0
        HardwareVertexBufferSharedPtr buff = baseVertexData->vertexBufferBinding->getBuffer(ve->getSource());
173
0
        HardwareBufferLockGuard baseVertexLock(buff, HardwareBuffer::HBL_READ_ONLY);
174
0
        char const *baseBuffer = static_cast<char const*>(baseVertexLock.pData);
175
176
0
        for( size_t i=0; i<baseVertexData->vertexCount * mWeightCount; i += mWeightCount)
177
0
        {
178
0
            float const *pWeights = reinterpret_cast<float const*>(baseBuffer + veWeights->getOffset());
179
0
            uint8 const *pIndex = reinterpret_cast<uint8 const*>(baseBuffer + ve->getOffset());
180
181
0
            float weightMagnitude = 0.0f;
182
0
            for( size_t j=0; j < mWeightCount; ++j )
183
0
            {
184
0
                outBoneWgt[i+j] = pWeights[j];
185
0
                weightMagnitude += pWeights[j];
186
0
                outBoneIdx[i+j] = pIndex[j];
187
0
            }
188
189
            //Normalize the bone weights so they add to one
190
0
            for(size_t j=0; j < mWeightCount; ++j)
191
0
            {
192
0
                outBoneWgt[i+j] /= weightMagnitude;
193
0
            }
194
195
0
            baseBuffer += baseVertexData->vertexDeclaration->getVertexSize(ve->getSource());
196
0
        }
197
0
    }
198
    
199
    //-----------------------------------------------------------------------
200
    void BaseInstanceBatchVTF::setupMaterialToUseVTF( TextureType textureType, MaterialPtr &material ) const
201
0
    {
202
0
        Material::Techniques::const_iterator t;
203
0
        for(t = material->getTechniques().begin(); t != material->getTechniques().end(); ++t)
204
0
        for (auto *technique : material->getTechniques())
205
0
        {
206
0
            for (auto *pass : technique->getPasses())
207
0
            {
208
0
                for(auto *texUnit : pass->getTextureUnitStates())
209
0
                {
210
0
                    if( texUnit->getName() == "InstancingVTF" )
211
0
                    {
212
0
                        texUnit->setTextureName( mMatrixTexture->getName(), textureType );
213
0
                        texUnit->setTextureFiltering( TFO_NONE );
214
0
                    }
215
0
                }
216
0
            }
217
218
0
            if( technique->getShadowCasterMaterial() )
219
0
            {
220
0
                MaterialPtr matCaster = technique->getShadowCasterMaterial();
221
0
                setupMaterialToUseVTF(textureType, matCaster );
222
0
            }
223
0
        }
224
0
    }
225
    //-----------------------------------------------------------------------
226
    void BaseInstanceBatchVTF::createVertexTexture( const SubMesh* baseSubMesh )
227
0
    {
228
        /*
229
        TODO: Find a way to retrieve max texture resolution,
230
        http://www.ogre3d.org/forums/viewtopic.php?t=38305
231
232
        Currently assuming it's 4096x4096, which is a safe bet for any hardware with decent VTF*/
233
        
234
0
        size_t uniqueAnimations = mInstancesPerBatch;
235
0
        if (useBoneMatrixLookup())
236
0
        {
237
0
            uniqueAnimations = std::min<size_t>(getMaxLookupTableInstances(), uniqueAnimations);
238
0
        }
239
0
        mMatricesPerInstance = std::max<size_t>( 1, baseSubMesh->blendIndexToBoneIndexMap.size() );
240
241
0
        if(mUseBoneDualQuaternions && !mTempTransformsArray3x4)
242
0
        {
243
0
            mTempTransformsArray3x4 = new Matrix3x4f[mMatricesPerInstance];
244
0
        }
245
        
246
0
        mNumWorldMatrices = uniqueAnimations * mMatricesPerInstance;
247
248
        //Calculate the width & height required to hold all the matrices. Start by filling the width
249
        //first (i.e. 4096x1 4096x2 4096x3, etc)
250
        
251
0
        size_t texWidth         = std::min<size_t>( mNumWorldMatrices * mRowLength, c_maxTexWidth );
252
0
        size_t maxUsableWidth   = texWidth;
253
0
        if( matricesTogetherPerRow() )
254
0
        {
255
            //The technique requires all matrices from the same instance in the same row
256
            //i.e. 4094 -> 4095 -> skip 4096 -> 0 (next row) contains data from a new instance 
257
0
            mWidthFloatsPadding = texWidth % (mMatricesPerInstance * mRowLength);
258
259
0
            if( mWidthFloatsPadding )
260
0
            {
261
0
                mMaxFloatsPerLine = texWidth - mWidthFloatsPadding;
262
263
0
                maxUsableWidth = mMaxFloatsPerLine;
264
265
                //Values are in pixels, convert them to floats (1 pixel = 4 floats)
266
0
                mWidthFloatsPadding *= 4;
267
0
                mMaxFloatsPerLine       *= 4;
268
0
            }
269
0
        }
270
271
0
        size_t texHeight = mNumWorldMatrices * mRowLength / maxUsableWidth;
272
273
0
        if( (mNumWorldMatrices * mRowLength) % maxUsableWidth )
274
0
            texHeight += 1;
275
276
        //Don't use 1D textures, as OGL goes crazy because the shader should be calling texture1D()...
277
0
        TextureType texType = TEX_TYPE_2D;
278
279
0
        mMatrixTexture = TextureManager::getSingleton().createManual(
280
0
                                        mName + "/VTF", mMeshReference->getGroup(), texType,
281
0
                                        (uint)texWidth, (uint)texHeight,
282
0
                                        0, PF_FLOAT32_RGBA, TU_DYNAMIC_WRITE_ONLY_DISCARDABLE );
283
284
0
        OgreAssert(mMatrixTexture->getFormat() == PF_FLOAT32_RGBA, "float texture support required");
285
        //Set our cloned material to use this custom texture!
286
0
        setupMaterialToUseVTF( texType, mMaterial );
287
0
    }
288
289
    //-----------------------------------------------------------------------
290
    size_t BaseInstanceBatchVTF::convert3x4MatricesToDualQuaternions(Matrix3x4f* matrices, size_t numOfMatrices, float* outDualQuaternions)
291
0
    {
292
0
        DualQuaternion dQuat;
293
0
        size_t floatsWritten = 0;
294
295
0
        for (size_t m = 0; m < numOfMatrices; ++m)
296
0
        {
297
0
            dQuat.fromTransformationMatrix(Affine3(matrices[m][0]));
298
            
299
            //Copy the 2x4 matrix
300
0
            for(int i = 0; i < 8; ++i)
301
0
            {
302
0
                *outDualQuaternions++ = static_cast<float>( dQuat[i] );
303
0
                ++floatsWritten;
304
0
            }
305
0
        }
306
307
0
        return floatsWritten;
308
0
    }
309
    
310
    //-----------------------------------------------------------------------
311
    void BaseInstanceBatchVTF::updateVertexTexture(void)
312
0
    {
313
        //Now lock the texture and copy the 4x3 matrices!
314
0
        HardwareBufferLockGuard matTexLock(mMatrixTexture->getBuffer(), HardwareBuffer::HBL_DISCARD);
315
0
        const PixelBox &pixelBox = mMatrixTexture->getBuffer()->getCurrentLock();
316
317
0
        float *pDest = reinterpret_cast<float*>(pixelBox.data);
318
319
0
        InstancedEntityVec::const_iterator itor = mInstancedEntities.begin();
320
0
        InstancedEntityVec::const_iterator end  = mInstancedEntities.end();
321
322
0
        Matrix3x4f* transforms;
323
324
        //If using dual quaternion skinning, write the transforms to a temporary buffer,
325
        //then convert to dual quaternions, then later write to the pixel buffer
326
        //Otherwise simply write the transforms to the pixel buffer directly
327
0
        if(mUseBoneDualQuaternions)
328
0
        {
329
0
            transforms = mTempTransformsArray3x4;
330
0
        }
331
0
        else
332
0
        {
333
0
            transforms = (Matrix3x4f*)pDest;
334
0
        }
335
336
        
337
0
        while( itor != end )
338
0
        {
339
0
            size_t floatsWritten = (*itor)->getTransforms3x4( transforms );
340
341
0
            if( mManager->getCameraRelativeRendering() )
342
0
                makeMatrixCameraRelative3x4( transforms, floatsWritten / 12 );
343
344
0
            if(mUseBoneDualQuaternions)
345
0
            {
346
0
                floatsWritten = convert3x4MatricesToDualQuaternions(transforms, floatsWritten / 12, pDest);
347
0
                pDest += floatsWritten;
348
0
            }
349
0
            else
350
0
            {
351
0
                transforms += floatsWritten / 12;
352
0
            }
353
            
354
0
            ++itor;
355
0
        }
356
0
    }
357
    /** update the lookup numbers for entities with shared transforms */
358
    void BaseInstanceBatchVTF::updateSharedLookupIndexes()
359
0
    {
360
0
        if (mTransformSharingDirty)
361
0
        {
362
0
            if (useBoneMatrixLookup())
363
0
            {
364
                //In each entity update the "transform lookup number" so that:
365
                // 1. All entities sharing the same transformation will share the same unique number
366
                // 2. "transform lookup number" will be numbered from 0 up to getMaxLookupTableInstances
367
0
                uint16 lookupCounter = 0;
368
0
                typedef std::map<Affine3*,uint16> MapTransformId;
369
0
                MapTransformId transformToId;
370
0
                InstancedEntityVec::const_iterator itEnt = mInstancedEntities.begin(),
371
0
                    itEntEnd = mInstancedEntities.end();
372
0
                for(;itEnt != itEntEnd ; ++itEnt)
373
0
                {
374
0
                    if ((*itEnt)->isInScene())
375
0
                    {
376
0
                        Affine3* transformUniqueId = (*itEnt)->mBoneMatrices;
377
0
                        MapTransformId::iterator itLu = transformToId.find(transformUniqueId);
378
0
                        if (itLu == transformToId.end())
379
0
                        {
380
0
                            itLu = transformToId.insert(std::make_pair(transformUniqueId,lookupCounter)).first;
381
0
                            ++lookupCounter;
382
0
                        }
383
0
                        (*itEnt)->setTransformLookupNumber(itLu->second);
384
0
                    }
385
0
                    else 
386
0
                    {
387
0
                        (*itEnt)->setTransformLookupNumber(0);
388
0
                    }
389
0
                }
390
391
0
                if (lookupCounter > getMaxLookupTableInstances())
392
0
                {
393
0
                    OGRE_EXCEPT(Exception::ERR_INVALID_STATE,"Number of unique bone matrix states exceeds current limitation.","BaseInstanceBatchVTF::updateSharedLookupIndexes()");
394
0
                }
395
0
            }
396
397
0
            mTransformSharingDirty = false;
398
0
        }
399
0
    }
400
401
    //-----------------------------------------------------------------------
402
    InstancedEntity* BaseInstanceBatchVTF::generateInstancedEntity(size_t num)
403
0
    {
404
0
        InstancedEntity* sharedTransformEntity = NULL;
405
0
        if ((useBoneMatrixLookup()) && (num >= getMaxLookupTableInstances()))
406
0
        {
407
0
            sharedTransformEntity = mInstancedEntities[num % getMaxLookupTableInstances()];
408
0
            if (sharedTransformEntity->mSharedTransformEntity)
409
0
            {
410
0
                sharedTransformEntity = sharedTransformEntity->mSharedTransformEntity;
411
0
            }
412
0
        }
413
414
0
        return OGRE_NEW InstancedEntity(this, static_cast<uint32>(num), sharedTransformEntity);
415
0
    }
416
417
418
    //-----------------------------------------------------------------------
419
    void BaseInstanceBatchVTF::getWorldTransforms( Matrix4* xform ) const
420
0
    {
421
0
        *xform = Matrix4::IDENTITY;
422
0
    }
423
    //-----------------------------------------------------------------------
424
    void BaseInstanceBatchVTF::_updateRenderQueue(RenderQueue* queue)
425
0
    {
426
0
        InstanceBatch::_updateRenderQueue( queue );
427
428
0
        if( mBoundsUpdated || mDirtyAnimation || mManager->getCameraRelativeRendering() )
429
0
            updateVertexTexture();
430
431
0
        mBoundsUpdated = false;
432
0
    }
433
    //-----------------------------------------------------------------------
434
    // InstanceBatchVTF
435
    //-----------------------------------------------------------------------
436
    InstanceBatchVTF::InstanceBatchVTF( 
437
        InstanceManager *creator, MeshPtr &meshReference, 
438
        const MaterialPtr &material, size_t instancesPerBatch, 
439
        const Mesh::IndexMap *indexToBoneMap, const String &batchName )
440
0
            : BaseInstanceBatchVTF (creator, meshReference, material, 
441
0
                                    instancesPerBatch, indexToBoneMap, batchName)
442
0
    {
443
444
0
    }
445
    //-----------------------------------------------------------------------
446
    InstanceBatchVTF::~InstanceBatchVTF()
447
    {
448
    }   
449
    //-----------------------------------------------------------------------
450
    void InstanceBatchVTF::setupVertices( const SubMesh* baseSubMesh )
451
0
    {
452
0
        mRenderOperation.vertexData = OGRE_NEW VertexData();
453
0
        mRemoveOwnVertexData = true; //Raise flag to remove our own vertex data in the end (not always needed)
454
455
0
        VertexData *thisVertexData = mRenderOperation.vertexData;
456
0
        VertexData *baseVertexData = baseSubMesh->vertexData;
457
458
0
        thisVertexData->vertexStart = 0;
459
0
        thisVertexData->vertexCount = baseVertexData->vertexCount * mInstancesPerBatch;
460
461
0
        HardwareBufferManager::getSingleton().destroyVertexDeclaration( thisVertexData->vertexDeclaration );
462
0
        thisVertexData->vertexDeclaration = baseVertexData->vertexDeclaration->clone();
463
464
0
        HWBoneIdxVec hwBoneIdx;
465
0
        HWBoneWgtVec hwBoneWgt;
466
467
        //Blend weights may not be present because HW_VTF does not require to be skeletally animated
468
0
        const VertexElement *veWeights = baseVertexData->vertexDeclaration->
469
0
                                                    findElementBySemantic( VES_BLEND_WEIGHTS );
470
0
        if( veWeights )
471
0
        {
472
            //One weight is recommended for VTF
473
0
            mWeightCount = (forceOneWeight() || useOneWeight()) ?
474
0
                                1 : veWeights->getSize() / sizeof(float);
475
0
        }
476
0
        else
477
0
        {
478
0
            mWeightCount = 1;
479
0
        }
480
481
0
        hwBoneIdx.resize( baseVertexData->vertexCount * mWeightCount, 0 );
482
483
0
        if( mMeshReference->hasSkeleton() && mMeshReference->getSkeleton() )
484
0
        {
485
0
            if(mWeightCount > 1)
486
0
            {
487
0
                hwBoneWgt.resize( baseVertexData->vertexCount * mWeightCount, 0 );
488
0
                retrieveBoneIdxWithWeights(baseVertexData, hwBoneIdx, hwBoneWgt);
489
0
            }
490
0
            else
491
0
            {
492
0
                retrieveBoneIdx( baseVertexData, hwBoneIdx );
493
0
                thisVertexData->vertexDeclaration->removeElement( VES_BLEND_INDICES );
494
0
                thisVertexData->vertexDeclaration->removeElement( VES_BLEND_WEIGHTS );
495
496
0
                thisVertexData->vertexDeclaration->closeGapsInSource();
497
0
            }
498
499
0
        }
500
501
0
        for( unsigned short i=0; i<thisVertexData->vertexDeclaration->getMaxSource()+1; ++i )
502
0
        {
503
            //Create our own vertex buffer
504
0
            HardwareVertexBufferSharedPtr vertexBuffer =
505
0
                HardwareBufferManager::getSingleton().createVertexBuffer(
506
0
                thisVertexData->vertexDeclaration->getVertexSize(i),
507
0
                thisVertexData->vertexCount,
508
0
                HardwareBuffer::HBU_STATIC_WRITE_ONLY );
509
0
            thisVertexData->vertexBufferBinding->setBinding( i, vertexBuffer );
510
511
            //Grab the base submesh data
512
0
            HardwareVertexBufferSharedPtr baseVertexBuffer =
513
0
                baseVertexData->vertexBufferBinding->getBuffer(i);
514
515
0
            HardwareBufferLockGuard thisLock(vertexBuffer, HardwareBuffer::HBL_DISCARD);
516
0
            HardwareBufferLockGuard baseLock(baseVertexBuffer, HardwareBuffer::HBL_READ_ONLY);
517
0
            char* thisBuf = static_cast<char*>(thisLock.pData);
518
0
            char* baseBuf = static_cast<char*>(baseLock.pData);
519
520
            //Copy and repeat
521
0
            for( size_t j=0; j<mInstancesPerBatch; ++j )
522
0
            {
523
0
                const size_t sizeOfBuffer = baseVertexData->vertexCount *
524
0
                    baseVertexData->vertexDeclaration->getVertexSize(i);
525
0
                memcpy( thisBuf + j * sizeOfBuffer, baseBuf, sizeOfBuffer );
526
0
            }
527
0
        }
528
529
0
        createVertexTexture( baseSubMesh );
530
0
        createVertexSemantics( thisVertexData, baseVertexData, hwBoneIdx, hwBoneWgt);
531
0
    }
532
    //-----------------------------------------------------------------------
533
    void InstanceBatchVTF::setupIndices( const SubMesh* baseSubMesh )
534
0
    {
535
0
        mRenderOperation.indexData = OGRE_NEW IndexData();
536
0
        mRemoveOwnIndexData = true; //Raise flag to remove our own index data in the end (not always needed)
537
538
0
        IndexData *thisIndexData = mRenderOperation.indexData;
539
0
        IndexData *baseIndexData = baseSubMesh->indexData;
540
541
0
        thisIndexData->indexStart = 0;
542
0
        thisIndexData->indexCount = baseIndexData->indexCount * mInstancesPerBatch;
543
544
        //TODO: Check numVertices is below max supported by GPU
545
0
        HardwareIndexBuffer::IndexType indexType = HardwareIndexBuffer::IT_16BIT;
546
0
        if( mRenderOperation.vertexData->vertexCount > 65535 )
547
0
            indexType = HardwareIndexBuffer::IT_32BIT;
548
0
        thisIndexData->indexBuffer = HardwareBufferManager::getSingleton().createIndexBuffer(
549
0
            indexType, thisIndexData->indexCount, HardwareBuffer::HBU_STATIC_WRITE_ONLY );
550
551
0
        HardwareBufferLockGuard thisLock(thisIndexData->indexBuffer, HardwareBuffer::HBL_DISCARD);
552
0
        HardwareBufferLockGuard baseLock(baseIndexData->indexBuffer, HardwareBuffer::HBL_READ_ONLY);
553
0
        uint16 *thisBuf16 = static_cast<uint16*>(thisLock.pData);
554
0
        uint32 *thisBuf32 = static_cast<uint32*>(thisLock.pData);
555
0
        bool baseIndex16bit = baseIndexData->indexBuffer->getType() == HardwareIndexBuffer::IT_16BIT;
556
557
0
        for( size_t i=0; i<mInstancesPerBatch; ++i )
558
0
        {
559
0
            const size_t vertexOffset = i * mRenderOperation.vertexData->vertexCount / mInstancesPerBatch;
560
561
0
            const uint16 *initBuf16 = static_cast<const uint16 *>(baseLock.pData);
562
0
            const uint32 *initBuf32 = static_cast<const uint32 *>(baseLock.pData);
563
564
0
            for( size_t j=0; j<baseIndexData->indexCount; ++j )
565
0
            {
566
0
                uint32 originalVal = baseIndex16bit ? *initBuf16++ : *initBuf32++;
567
568
0
                if( indexType == HardwareIndexBuffer::IT_16BIT )
569
0
                    *thisBuf16++ = static_cast<uint16>(originalVal + vertexOffset);
570
0
                else
571
0
                    *thisBuf32++ = static_cast<uint32>(originalVal + vertexOffset);
572
0
            }
573
0
        }
574
0
    }
575
    //-----------------------------------------------------------------------
576
    void InstanceBatchVTF::createVertexSemantics( 
577
        VertexData *thisVertexData, VertexData *baseVertexData, const HWBoneIdxVec &hwBoneIdx, const HWBoneWgtVec &hwBoneWgt)
578
0
    {
579
0
        const size_t texWidth  = mMatrixTexture->getWidth();
580
0
        const size_t texHeight = mMatrixTexture->getHeight();
581
582
        //Calculate the texel offsets to correct them offline
583
        //Akwardly enough, the offset is needed in OpenGL too
584
0
        Vector2 texelOffsets;
585
        //RenderSystem *renderSystem = Root::getSingleton().getRenderSystem();
586
0
        texelOffsets.x = /*renderSystem->getHorizontalTexelOffset()*/ -0.5f / (float)texWidth;
587
0
        texelOffsets.y = /*renderSystem->getVerticalTexelOffset()*/ -0.5f / (float)texHeight;
588
589
        //Only one weight per vertex is supported. It would not only be complex, but prohibitively slow.
590
        //Put them in a new buffer, since it's 32 bytes aligned :-)
591
0
        const unsigned short newSource = thisVertexData->vertexDeclaration->getMaxSource() + 1;
592
0
        size_t maxFloatsPerVector = 4;
593
0
        size_t offset = 0;
594
595
0
        for(size_t i = 0; i < mWeightCount; i += maxFloatsPerVector / mRowLength)
596
0
        {
597
0
            offset += thisVertexData->vertexDeclaration->addElement( newSource, offset, VET_FLOAT4, VES_TEXTURE_COORDINATES,
598
0
                thisVertexData->vertexDeclaration->
599
0
                getNextFreeTextureCoordinate() ).getSize();
600
0
            offset += thisVertexData->vertexDeclaration->addElement( newSource, offset, VET_FLOAT4, VES_TEXTURE_COORDINATES,
601
0
                thisVertexData->vertexDeclaration->
602
0
                getNextFreeTextureCoordinate() ).getSize();
603
0
        }
604
605
        //Add the weights (supports up to four, which is Ogre's limit)
606
0
        if(mWeightCount > 1)
607
0
        {
608
0
            thisVertexData->vertexDeclaration->addElement(newSource, offset, VET_FLOAT4, VES_BLEND_WEIGHTS,
609
0
                                        thisVertexData->vertexDeclaration->getNextFreeTextureCoordinate() ).getSize();
610
0
        }
611
612
        //Create our own vertex buffer
613
0
        HardwareVertexBufferSharedPtr vertexBuffer =
614
0
            HardwareBufferManager::getSingleton().createVertexBuffer(
615
0
            thisVertexData->vertexDeclaration->getVertexSize(newSource),
616
0
            thisVertexData->vertexCount,
617
0
            HardwareBuffer::HBU_STATIC_WRITE_ONLY );
618
0
        thisVertexData->vertexBufferBinding->setBinding( newSource, vertexBuffer );
619
620
0
        HardwareBufferLockGuard vertexLock(vertexBuffer, HardwareBuffer::HBL_DISCARD);
621
0
        float *thisFloat = static_cast<float*>(vertexLock.pData);
622
        
623
        //Copy and repeat
624
0
        for( size_t i=0; i<mInstancesPerBatch; ++i )
625
0
        {
626
0
            for( size_t j=0; j<baseVertexData->vertexCount * mWeightCount; j += mWeightCount )
627
0
            {
628
0
                size_t numberOfMatricesInLine = 0;
629
630
0
                for(size_t wgtIdx = 0; wgtIdx < mWeightCount; ++wgtIdx)
631
0
                {
632
0
                    for( size_t k=0; k < mRowLength; ++k)
633
0
                    {
634
0
                        size_t instanceIdx = (hwBoneIdx[j+wgtIdx] + i * mMatricesPerInstance) * mRowLength + k;
635
                        //x
636
0
                        *thisFloat++ = ((instanceIdx % texWidth) / (float)texWidth) - (float)texelOffsets.x;
637
                        //y
638
0
                        *thisFloat++ = ((instanceIdx / texWidth) / (float)texHeight) - (float)texelOffsets.y;
639
0
                    }
640
641
0
                    ++numberOfMatricesInLine;
642
643
                    //If another matrix can't be fit, we're on another line, or if this is the last weight
644
0
                    if((numberOfMatricesInLine + 1) * mRowLength > maxFloatsPerVector || (wgtIdx+1) == mWeightCount)
645
0
                    {
646
                        //Place zeroes in the remaining coordinates
647
0
                        for ( size_t k=mRowLength * numberOfMatricesInLine; k < maxFloatsPerVector; ++k)
648
0
                        {
649
0
                            *thisFloat++ = 0.0f;
650
0
                            *thisFloat++ = 0.0f;
651
0
                        }
652
653
0
                        numberOfMatricesInLine = 0;
654
0
                    }
655
0
                }
656
657
                //Don't need to write weights if there is only one
658
0
                if(mWeightCount > 1)
659
0
                {
660
                    //Write the weights
661
0
                    for(size_t wgtIdx = 0; wgtIdx < mWeightCount; ++wgtIdx)
662
0
                    {
663
0
                        *thisFloat++ = hwBoneWgt[j+wgtIdx];
664
0
                    }
665
666
                    //Fill the rest of the line with zeros
667
0
                    for(size_t wgtIdx = mWeightCount; wgtIdx < maxFloatsPerVector; ++wgtIdx)
668
0
                    {
669
0
                        *thisFloat++ = 0.0f;
670
0
                    }
671
0
                }
672
0
            }
673
0
        }
674
0
    }
675
    //-----------------------------------------------------------------------
676
    size_t InstanceBatchVTF::calculateMaxNumInstances( 
677
                    const SubMesh *baseSubMesh, uint16 flags ) const
678
0
    {
679
0
        size_t retVal = 0;
680
681
0
        RenderSystem *renderSystem = Root::getSingleton().getRenderSystem();
682
0
        const RenderSystemCapabilities *capabilities = renderSystem->getCapabilities();
683
684
        //VTF must be supported
685
0
        if( capabilities->getNumVertexTextureUnits() )
686
0
        {
687
            //TODO: Check PF_FLOAT32_RGBA is supported (should be, since it was the 1st one)
688
0
            const size_t numBones = std::max<size_t>( 1, baseSubMesh->blendIndexToBoneIndexMap.size() );
689
0
            retVal = c_maxTexWidth * c_maxTexHeight / mRowLength / numBones;
690
691
0
            if( flags & IM_USE16BIT )
692
0
            {
693
0
                if( baseSubMesh->vertexData->vertexCount * retVal > 0xFFFF )
694
0
                    retVal = 0xFFFF / baseSubMesh->vertexData->vertexCount;
695
0
            }
696
697
0
            if( flags & IM_VTFBESTFIT )
698
0
            {
699
0
                const size_t instancesPerBatch = std::min( retVal, mInstancesPerBatch );
700
                //Do the same as in createVertexTexture()
701
0
                const size_t numWorldMatrices = instancesPerBatch * numBones;
702
703
0
                size_t texWidth  = std::min<size_t>( numWorldMatrices * mRowLength, c_maxTexWidth );
704
0
                size_t texHeight = numWorldMatrices * mRowLength / c_maxTexWidth;
705
706
0
                const size_t remainder = (numWorldMatrices * mRowLength) % c_maxTexWidth;
707
708
0
                if( remainder && texHeight > 0 )
709
0
                    retVal = static_cast<size_t>(texWidth * texHeight / (float)mRowLength / (float)(numBones));
710
0
            }
711
0
        }
712
713
0
        return retVal;
714
715
0
    }
716
}