/src/ogre/OgreMain/src/OgreInstanceBatchShader.cpp

Source
/*
-----------------------------------------------------------------------------
This source file is part of OGRE
(Object-oriented Graphics Rendering Engine)
For the latest info, see http://www.ogre3d.org/

Copyright (c) 2000-2014 Torus Knot Software Ltd

Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:

The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
-----------------------------------------------------------------------------
*/
#include "OgreStableHeaders.h"
#include "OgreInstanceBatchShader.h"
#include "OgreRenderOperation.h"
#include "OgreInstancedEntity.h"

namespace Ogre
{
    InstanceBatchShader::InstanceBatchShader( InstanceManager *creator, MeshPtr &meshReference,
                                        const MaterialPtr &material, size_t instancesPerBatch,
                                        const Mesh::IndexMap *indexToBoneMap, const String &batchName ) :
                InstanceBatch( creator, meshReference, material, instancesPerBatch,
                                indexToBoneMap, batchName ),
                mNumWorldMatrices( instancesPerBatch )
    {
    }

    //-----------------------------------------------------------------------
    size_t InstanceBatchShader::calculateMaxNumInstances( const SubMesh *baseSubMesh, uint16 flags ) const
    {
        const size_t numBones = std::max<size_t>( 1, baseSubMesh->blendIndexToBoneIndexMap.size() );

        mMaterial->load();
        Technique *technique = mMaterial->getBestTechnique();
        if( technique )
        {
            GpuProgramParametersSharedPtr vertexParam = technique->getPass(0)->getVertexProgramParameters();
            for(auto& it : vertexParam->getConstantDefinitions().map)
            {
                const GpuConstantDefinition &constDef = it.second;
                if(((constDef.constType == GCT_MATRIX_3X4 ||
                    constDef.constType == GCT_MATRIX_4X3 ||             //OGL GLSL bitches without this
                    constDef.constType == GCT_MATRIX_2X4 ||
                    constDef.constType == GCT_FLOAT4)                   //OGL GLSL bitches without this
                    && constDef.isFloat()) ||
                   ((constDef.constType == GCT_MATRIX_DOUBLE_3X4 ||
                    constDef.constType == GCT_MATRIX_DOUBLE_4X3 ||      //OGL GLSL bitches without this
                    constDef.constType == GCT_MATRIX_DOUBLE_2X4 ||
                    constDef.constType == GCT_DOUBLE4)                  //OGL GLSL bitches without this
                    && constDef.isDouble())
                   )
                {
                    const GpuProgramParameters::AutoConstantEntry *entry =
                                    vertexParam->_findRawAutoConstantEntryFloat( constDef.physicalIndex );
                    if( entry && (entry->paramType == GpuProgramParameters::ACT_WORLD_MATRIX_ARRAY_3x4 || entry->paramType == GpuProgramParameters::ACT_WORLD_DUALQUATERNION_ARRAY_2x4))
                    {
                        //Material is correctly done!
                        size_t arraySize = constDef.arraySize;

                        //Deal with GL "hacky" way of doing 4x3 matrices
                        if(entry->paramType == GpuProgramParameters::ACT_WORLD_MATRIX_ARRAY_3x4 && constDef.constType == GCT_FLOAT4)
                            arraySize /= 3;
                        else if(entry->paramType == GpuProgramParameters::ACT_WORLD_DUALQUATERNION_ARRAY_2x4 && constDef.constType == GCT_FLOAT4)
                            arraySize /= 2;

                        //Check the num of arrays
                        size_t retVal = arraySize / numBones;

                        if( flags & IM_USE16BIT )
                        {
                            if( baseSubMesh->vertexData->vertexCount * retVal > 0xFFFF )
                                retVal = 0xFFFF / baseSubMesh->vertexData->vertexCount;
                        }

                        if((retVal < 3 && entry->paramType == GpuProgramParameters::ACT_WORLD_MATRIX_ARRAY_3x4) ||
                            (retVal < 2 && entry->paramType == GpuProgramParameters::ACT_WORLD_DUALQUATERNION_ARRAY_2x4))
                        {
                            LogManager::getSingleton().logWarning( "InstanceBatchShader: Mesh '" +
                                        mMeshReference->getName() + "' using material '" +
                                        mMaterial->getName() + "'. The amount of possible "
                                        "instances per batch is very low. Performance benefits will "
                                        "be minimal, if any. It might be even slower!");
                        }

                        return retVal;
                    }
                }
            }

            OGRE_EXCEPT(Exception::ERR_INVALIDPARAMS,
                        "Material '" + mMaterial->getName() + "' does not support hardware skinning");
        }

        //Reaching here the material is just unsupported.

        return 0;
    }
    //-----------------------------------------------------------------------
    void InstanceBatchShader::buildFrom( const SubMesh *baseSubMesh, const RenderOperation &renderOperation )
    {
        if( mMeshReference->hasSkeleton() && mMeshReference->getSkeleton() )
            mNumWorldMatrices = mInstancesPerBatch * baseSubMesh->blendIndexToBoneIndexMap.size();
        InstanceBatch::buildFrom( baseSubMesh, renderOperation );
    }
    //-----------------------------------------------------------------------
    void InstanceBatchShader::setupVertices( const SubMesh* baseSubMesh )
    {
        mRenderOperation.vertexData = OGRE_NEW VertexData();
        mRemoveOwnVertexData = true; //Raise flag to remove our own vertex data in the end (not always needed)

        VertexData *thisVertexData = mRenderOperation.vertexData;
        VertexData *baseVertexData = baseSubMesh->vertexData;

        thisVertexData->vertexStart = 0;
        thisVertexData->vertexCount = baseVertexData->vertexCount * mInstancesPerBatch;

        HardwareBufferManager::getSingleton().destroyVertexDeclaration( thisVertexData->vertexDeclaration );
        thisVertexData->vertexDeclaration = baseVertexData->vertexDeclaration->clone();

        if( mMeshReference->hasSkeleton() && mMeshReference->getSkeleton() )
        {
            //Building hw skinned batches follow a different path
            setupHardwareSkinned( baseSubMesh, thisVertexData, baseVertexData );
            return;
        }

        //TODO: Can't we, instead of using another source, put the index ID in the same source?
        thisVertexData->vertexDeclaration->addElement(
                                        thisVertexData->vertexDeclaration->getMaxSource() + 1, 0,
                                        VET_UBYTE4, VES_BLEND_INDICES );


        for( uint16 i=0; i<thisVertexData->vertexDeclaration->getMaxSource(); ++i )
        {
            //Create our own vertex buffer
            HardwareVertexBufferSharedPtr vertexBuffer =
                                            HardwareBufferManager::getSingleton().createVertexBuffer(
                                            thisVertexData->vertexDeclaration->getVertexSize(i),
                                            thisVertexData->vertexCount,
                                            HardwareBuffer::HBU_STATIC_WRITE_ONLY );
            thisVertexData->vertexBufferBinding->setBinding( i, vertexBuffer );

            //Grab the base submesh data
            HardwareVertexBufferSharedPtr baseVertexBuffer =
                                                    baseVertexData->vertexBufferBinding->getBuffer(i);

            HardwareBufferLockGuard thisLock(vertexBuffer, HardwareBuffer::HBL_DISCARD);
            HardwareBufferLockGuard baseLock(baseVertexBuffer, HardwareBuffer::HBL_READ_ONLY);
            char* thisBuf = static_cast<char*>(thisLock.pData);
            char* baseBuf = static_cast<char*>(baseLock.pData);

            //Copy and repeat
            for( size_t j=0; j<mInstancesPerBatch; ++j )
            {
                const size_t sizeOfBuffer = baseVertexData->vertexCount *
                                            baseVertexData->vertexDeclaration->getVertexSize(i);
                memcpy( thisBuf + j * sizeOfBuffer, baseBuf, sizeOfBuffer );
            }
        }

        {
            //Now create the vertices "index ID" to individualize each instance
            const unsigned short lastSource = thisVertexData->vertexDeclaration->getMaxSource();
            HardwareVertexBufferSharedPtr vertexBuffer =
                                            HardwareBufferManager::getSingleton().createVertexBuffer(
                                            thisVertexData->vertexDeclaration->getVertexSize( lastSource ),
                                            thisVertexData->vertexCount,
                                            HardwareBuffer::HBU_STATIC_WRITE_ONLY );
            thisVertexData->vertexBufferBinding->setBinding( lastSource, vertexBuffer );

            HardwareBufferLockGuard thisLock(vertexBuffer, HardwareBuffer::HBL_DISCARD);
            char* thisBuf = static_cast<char*>(thisLock.pData);
            for( uint8 j=0; j<uint8(mInstancesPerBatch); ++j )
            {
                for( size_t k=0; k<baseVertexData->vertexCount; ++k )
                {
                    *thisBuf++ = j;
                    *thisBuf++ = j;
                    *thisBuf++ = j;
                    *thisBuf++ = j;
                }
            }

        }
    }
    //-----------------------------------------------------------------------
    void InstanceBatchShader::setupIndices( const SubMesh* baseSubMesh )
    {
        mRenderOperation.indexData = OGRE_NEW IndexData();
        mRemoveOwnIndexData = true; //Raise flag to remove our own index data in the end (not always needed)

        IndexData *thisIndexData = mRenderOperation.indexData;
        IndexData *baseIndexData = baseSubMesh->indexData;

        thisIndexData->indexStart = 0;
        thisIndexData->indexCount = baseIndexData->indexCount * mInstancesPerBatch;

        //TODO: Check numVertices is below max supported by GPU
        HardwareIndexBuffer::IndexType indexType = HardwareIndexBuffer::IT_16BIT;
        if( mRenderOperation.vertexData->vertexCount > 65535 )
            indexType = HardwareIndexBuffer::IT_32BIT;
        thisIndexData->indexBuffer = HardwareBufferManager::getSingleton().createIndexBuffer(
            indexType, thisIndexData->indexCount, HardwareBuffer::HBU_STATIC_WRITE_ONLY );

        HardwareBufferLockGuard thisLock(thisIndexData->indexBuffer, HardwareBuffer::HBL_DISCARD);
        HardwareBufferLockGuard baseLock(baseIndexData->indexBuffer, HardwareBuffer::HBL_READ_ONLY);
        uint16 *thisBuf16 = static_cast<uint16*>(thisLock.pData);
        uint32 *thisBuf32 = static_cast<uint32*>(thisLock.pData);
        bool baseIndex16bit = baseIndexData->indexBuffer->getType() == HardwareIndexBuffer::IT_16BIT;

        for( size_t i=0; i<mInstancesPerBatch; ++i )
        {
            const size_t vertexOffset = i * mRenderOperation.vertexData->vertexCount / mInstancesPerBatch;

            const uint16 *initBuf16 = static_cast<const uint16 *>(baseLock.pData);
            const uint32 *initBuf32 = static_cast<const uint32 *>(baseLock.pData);

            for( size_t j=0; j<baseIndexData->indexCount; ++j )
            {
                uint32 originalVal = baseIndex16bit ? *initBuf16++ : *initBuf32++;

                if( indexType == HardwareIndexBuffer::IT_16BIT )
                    *thisBuf16++ = static_cast<uint16>(originalVal + vertexOffset);
                else
                    *thisBuf32++ = static_cast<uint32>(originalVal + vertexOffset);
            }
        }
    }
    //-----------------------------------------------------------------------
    void InstanceBatchShader::setupHardwareSkinned( const SubMesh* baseSubMesh, VertexData *thisVertexData,
                                                    VertexData *baseVertexData )
    {
        const uint8 numBones = uint8(baseSubMesh->blendIndexToBoneIndexMap.size());
        mNumWorldMatrices = mInstancesPerBatch * numBones;

        for( uint16 i=0; i<=thisVertexData->vertexDeclaration->getMaxSource(); ++i )
        {
            //Create our own vertex buffer
            HardwareVertexBufferSharedPtr vertexBuffer =
                                            HardwareBufferManager::getSingleton().createVertexBuffer(
                                            thisVertexData->vertexDeclaration->getVertexSize(i),
                                            thisVertexData->vertexCount,
                                            HardwareBuffer::HBU_STATIC_WRITE_ONLY );
            thisVertexData->vertexBufferBinding->setBinding( i, vertexBuffer );

            VertexDeclaration::VertexElementList veList =
                                            thisVertexData->vertexDeclaration->findElementsBySource(i);

            //Grab the base submesh data
            HardwareVertexBufferSharedPtr baseVertexBuffer =
                                                    baseVertexData->vertexBufferBinding->getBuffer(i);

            HardwareBufferLockGuard thisVertexLock(vertexBuffer, HardwareBuffer::HBL_DISCARD);
            HardwareBufferLockGuard baseVertexLock(baseVertexBuffer, HardwareBuffer::HBL_READ_ONLY);
            char* thisBuf = static_cast<char*>(thisVertexLock.pData);
            char* baseBuf = static_cast<char*>(baseVertexLock.pData);
            char *startBuf = baseBuf;

            //Copy and repeat
            for (uint8 j = 0; j < uint8(mInstancesPerBatch); ++j)
            {
                //Repeat source
                baseBuf = startBuf;

                for (size_t k = 0; k < baseVertexData->vertexCount; ++k)
                {
                    for (auto& vl : veList)
                    {
                        switch (vl.getSemantic())
                        {
                        case VES_BLEND_INDICES:
                        *(thisBuf + vl.getOffset() + 0) = *(baseBuf + vl.getOffset() + 0) + j * numBones;
                        *(thisBuf + vl.getOffset() + 1) = *(baseBuf + vl.getOffset() + 1) + j * numBones;
                        *(thisBuf + vl.getOffset() + 2) = *(baseBuf + vl.getOffset() + 2) + j * numBones;
                        *(thisBuf + vl.getOffset() + 3) = *(baseBuf + vl.getOffset() + 3) + j * numBones;
                            break;
                        default:
                            memcpy( thisBuf + vl.getOffset(), baseBuf + vl.getOffset(), vl.getSize() );
                            break;
                        }
                    }
                    thisBuf += baseVertexData->vertexDeclaration->getVertexSize(i);
                    baseBuf += baseVertexData->vertexDeclaration->getVertexSize(i);
                }
            }
        }
    }
    //-----------------------------------------------------------------------
    void InstanceBatchShader::getWorldTransforms( Matrix4* xform ) const
    {
        if (MeshManager::getBonesUseObjectSpace())
        {
            *xform = Affine3::IDENTITY;
            xform++;
        }

        for (auto *e : mInstancedEntities)
        {
            xform += e->getTransforms(xform);
        }
    }
    //-----------------------------------------------------------------------
    unsigned short InstanceBatchShader::getNumWorldTransforms(void) const
    {
        return uint16(mNumWorldMatrices) + uint16(MeshManager::getBonesUseObjectSpace());
    }
}

Coverage Report

Created: 2025-11-04 07:18

Line	Count	Source
1		/*
2		-----------------------------------------------------------------------------
3		This source file is part of OGRE
4		(Object-oriented Graphics Rendering Engine)
5		For the latest info, see http://www.ogre3d.org/
6
7		Copyright (c) 2000-2014 Torus Knot Software Ltd
8
9		Permission is hereby granted, free of charge, to any person obtaining a copy
10		of this software and associated documentation files (the "Software"), to deal
11		in the Software without restriction, including without limitation the rights
12		to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
13		copies of the Software, and to permit persons to whom the Software is
14		furnished to do so, subject to the following conditions:
15
16		The above copyright notice and this permission notice shall be included in
17		all copies or substantial portions of the Software.
18
19		THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
20		IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
21		FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
22		AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
23		LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
24		OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
25		THE SOFTWARE.
26		-----------------------------------------------------------------------------
27		*/
28		#include "OgreStableHeaders.h"
29		#include "OgreInstanceBatchShader.h"
30		#include "OgreRenderOperation.h"
31		#include "OgreInstancedEntity.h"
32
33		namespace Ogre
34		{
35		InstanceBatchShader::InstanceBatchShader( InstanceManager *creator, MeshPtr &meshReference,
36		const MaterialPtr &material, size_t instancesPerBatch,
37		const Mesh::IndexMap *indexToBoneMap, const String &batchName ) :
38	0	InstanceBatch( creator, meshReference, material, instancesPerBatch,
39	0	indexToBoneMap, batchName ),
40	0	mNumWorldMatrices( instancesPerBatch )
41	0	{
42	0	}
43
44		//-----------------------------------------------------------------------
45		size_t InstanceBatchShader::calculateMaxNumInstances( const SubMesh *baseSubMesh, uint16 flags ) const
46	0	{
47	0	const size_t numBones = std::max<size_t>( 1, baseSubMesh->blendIndexToBoneIndexMap.size() );
48
49	0	mMaterial->load();
50	0	Technique *technique = mMaterial->getBestTechnique();
51	0	if( technique )
52	0	{
53	0	GpuProgramParametersSharedPtr vertexParam = technique->getPass(0)->getVertexProgramParameters();
54	0	for(auto& it : vertexParam->getConstantDefinitions().map)
55	0	{
56	0	const GpuConstantDefinition &constDef = it.second;
57	0	if(((constDef.constType == GCT_MATRIX_3X4 \|\|
58	0	constDef.constType == GCT_MATRIX_4X3 \|\| //OGL GLSL bitches without this
59	0	constDef.constType == GCT_MATRIX_2X4 \|\|
60	0	constDef.constType == GCT_FLOAT4) //OGL GLSL bitches without this
61	0	&& constDef.isFloat()) \|\|
62	0	((constDef.constType == GCT_MATRIX_DOUBLE_3X4 \|\|
63	0	constDef.constType == GCT_MATRIX_DOUBLE_4X3 \|\| //OGL GLSL bitches without this
64	0	constDef.constType == GCT_MATRIX_DOUBLE_2X4 \|\|
65	0	constDef.constType == GCT_DOUBLE4) //OGL GLSL bitches without this
66	0	&& constDef.isDouble())
67	0	)
68	0	{
69	0	const GpuProgramParameters::AutoConstantEntry *entry =
70	0	vertexParam->_findRawAutoConstantEntryFloat( constDef.physicalIndex );
71	0	if( entry && (entry->paramType == GpuProgramParameters::ACT_WORLD_MATRIX_ARRAY_3x4 \|\| entry->paramType == GpuProgramParameters::ACT_WORLD_DUALQUATERNION_ARRAY_2x4))
72	0	{
73		//Material is correctly done!
74	0	size_t arraySize = constDef.arraySize;
75
76		//Deal with GL "hacky" way of doing 4x3 matrices
77	0	if(entry->paramType == GpuProgramParameters::ACT_WORLD_MATRIX_ARRAY_3x4 && constDef.constType == GCT_FLOAT4)
78	0	arraySize /= 3;
79	0	else if(entry->paramType == GpuProgramParameters::ACT_WORLD_DUALQUATERNION_ARRAY_2x4 && constDef.constType == GCT_FLOAT4)
80	0	arraySize /= 2;
81
82		//Check the num of arrays
83	0	size_t retVal = arraySize / numBones;
84
85	0	if( flags & IM_USE16BIT )
86	0	{
87	0	if( baseSubMesh->vertexData->vertexCount * retVal > 0xFFFF )
88	0	retVal = 0xFFFF / baseSubMesh->vertexData->vertexCount;
89	0	}
90
91	0	if((retVal < 3 && entry->paramType == GpuProgramParameters::ACT_WORLD_MATRIX_ARRAY_3x4) \|\|
92	0	(retVal < 2 && entry->paramType == GpuProgramParameters::ACT_WORLD_DUALQUATERNION_ARRAY_2x4))
93	0	{
94	0	LogManager::getSingleton().logWarning( "InstanceBatchShader: Mesh '" +
95	0	mMeshReference->getName() + "' using material '" +
96	0	mMaterial->getName() + "'. The amount of possible "
97	0	"instances per batch is very low. Performance benefits will "
98	0	"be minimal, if any. It might be even slower!");
99	0	}
100
101	0	return retVal;
102	0	}
103	0	}
104	0	}
105
106	0	OGRE_EXCEPT(Exception::ERR_INVALIDPARAMS,
107	0	"Material '" + mMaterial->getName() + "' does not support hardware skinning");
108	0	}
109
110		//Reaching here the material is just unsupported.
111
112	0	return 0;
113	0	}
114		//-----------------------------------------------------------------------
115		void InstanceBatchShader::buildFrom( const SubMesh *baseSubMesh, const RenderOperation &renderOperation )
116	0	{
117	0	if( mMeshReference->hasSkeleton() && mMeshReference->getSkeleton() )
118	0	mNumWorldMatrices = mInstancesPerBatch * baseSubMesh->blendIndexToBoneIndexMap.size();
119	0	InstanceBatch::buildFrom( baseSubMesh, renderOperation );
120	0	}
121		//-----------------------------------------------------------------------
122		void InstanceBatchShader::setupVertices( const SubMesh* baseSubMesh )
123	0	{
124	0	mRenderOperation.vertexData = OGRE_NEW VertexData();
125	0	mRemoveOwnVertexData = true; //Raise flag to remove our own vertex data in the end (not always needed)
126
127	0	VertexData *thisVertexData = mRenderOperation.vertexData;
128	0	VertexData *baseVertexData = baseSubMesh->vertexData;
129
130	0	thisVertexData->vertexStart = 0;
131	0	thisVertexData->vertexCount = baseVertexData->vertexCount * mInstancesPerBatch;
132
133	0	HardwareBufferManager::getSingleton().destroyVertexDeclaration( thisVertexData->vertexDeclaration );
134	0	thisVertexData->vertexDeclaration = baseVertexData->vertexDeclaration->clone();
135
136	0	if( mMeshReference->hasSkeleton() && mMeshReference->getSkeleton() )
137	0	{
138		//Building hw skinned batches follow a different path
139	0	setupHardwareSkinned( baseSubMesh, thisVertexData, baseVertexData );
140	0	return;
141	0	}
142
143		//TODO: Can't we, instead of using another source, put the index ID in the same source?
144	0	thisVertexData->vertexDeclaration->addElement(
145	0	thisVertexData->vertexDeclaration->getMaxSource() + 1, 0,
146	0	VET_UBYTE4, VES_BLEND_INDICES );
147
148
149	0	for( uint16 i=0; i<thisVertexData->vertexDeclaration->getMaxSource(); ++i )
150	0	{
151		//Create our own vertex buffer
152	0	HardwareVertexBufferSharedPtr vertexBuffer =
153	0	HardwareBufferManager::getSingleton().createVertexBuffer(
154	0	thisVertexData->vertexDeclaration->getVertexSize(i),
155	0	thisVertexData->vertexCount,
156	0	HardwareBuffer::HBU_STATIC_WRITE_ONLY );
157	0	thisVertexData->vertexBufferBinding->setBinding( i, vertexBuffer );
158
159		//Grab the base submesh data
160	0	HardwareVertexBufferSharedPtr baseVertexBuffer =
161	0	baseVertexData->vertexBufferBinding->getBuffer(i);
162
163	0	HardwareBufferLockGuard thisLock(vertexBuffer, HardwareBuffer::HBL_DISCARD);
164	0	HardwareBufferLockGuard baseLock(baseVertexBuffer, HardwareBuffer::HBL_READ_ONLY);
165	0	char* thisBuf = static_cast<char*>(thisLock.pData);
166	0	char* baseBuf = static_cast<char*>(baseLock.pData);
167
168		//Copy and repeat
169	0	for( size_t j=0; j<mInstancesPerBatch; ++j )
170	0	{
171	0	const size_t sizeOfBuffer = baseVertexData->vertexCount *
172	0	baseVertexData->vertexDeclaration->getVertexSize(i);
173	0	memcpy( thisBuf + j * sizeOfBuffer, baseBuf, sizeOfBuffer );
174	0	}
175	0	}
176
177	0	{
178		//Now create the vertices "index ID" to individualize each instance
179	0	const unsigned short lastSource = thisVertexData->vertexDeclaration->getMaxSource();
180	0	HardwareVertexBufferSharedPtr vertexBuffer =
181	0	HardwareBufferManager::getSingleton().createVertexBuffer(
182	0	thisVertexData->vertexDeclaration->getVertexSize( lastSource ),
183	0	thisVertexData->vertexCount,
184	0	HardwareBuffer::HBU_STATIC_WRITE_ONLY );
185	0	thisVertexData->vertexBufferBinding->setBinding( lastSource, vertexBuffer );
186
187	0	HardwareBufferLockGuard thisLock(vertexBuffer, HardwareBuffer::HBL_DISCARD);
188	0	char* thisBuf = static_cast<char*>(thisLock.pData);
189	0	for( uint8 j=0; j<uint8(mInstancesPerBatch); ++j )
190	0	{
191	0	for( size_t k=0; k<baseVertexData->vertexCount; ++k )
192	0	{
193	0	*thisBuf++ = j;
194	0	*thisBuf++ = j;
195	0	*thisBuf++ = j;
196	0	*thisBuf++ = j;
197	0	}
198	0	}
199
200	0	}
201	0	}
202		//-----------------------------------------------------------------------
203		void InstanceBatchShader::setupIndices( const SubMesh* baseSubMesh )
204	0	{
205	0	mRenderOperation.indexData = OGRE_NEW IndexData();
206	0	mRemoveOwnIndexData = true; //Raise flag to remove our own index data in the end (not always needed)
207
208	0	IndexData *thisIndexData = mRenderOperation.indexData;
209	0	IndexData *baseIndexData = baseSubMesh->indexData;
210
211	0	thisIndexData->indexStart = 0;
212	0	thisIndexData->indexCount = baseIndexData->indexCount * mInstancesPerBatch;
213
214		//TODO: Check numVertices is below max supported by GPU
215	0	HardwareIndexBuffer::IndexType indexType = HardwareIndexBuffer::IT_16BIT;
216	0	if( mRenderOperation.vertexData->vertexCount > 65535 )
217	0	indexType = HardwareIndexBuffer::IT_32BIT;
218	0	thisIndexData->indexBuffer = HardwareBufferManager::getSingleton().createIndexBuffer(
219	0	indexType, thisIndexData->indexCount, HardwareBuffer::HBU_STATIC_WRITE_ONLY );
220
221	0	HardwareBufferLockGuard thisLock(thisIndexData->indexBuffer, HardwareBuffer::HBL_DISCARD);
222	0	HardwareBufferLockGuard baseLock(baseIndexData->indexBuffer, HardwareBuffer::HBL_READ_ONLY);
223	0	uint16 thisBuf16 = static_cast<uint16>(thisLock.pData);
224	0	uint32 thisBuf32 = static_cast<uint32>(thisLock.pData);
225	0	bool baseIndex16bit = baseIndexData->indexBuffer->getType() == HardwareIndexBuffer::IT_16BIT;
226
227	0	for( size_t i=0; i<mInstancesPerBatch; ++i )
228	0	{
229	0	const size_t vertexOffset = i * mRenderOperation.vertexData->vertexCount / mInstancesPerBatch;
230
231	0	const uint16 initBuf16 = static_cast<const uint16 >(baseLock.pData);
232	0	const uint32 initBuf32 = static_cast<const uint32 >(baseLock.pData);
233
234	0	for( size_t j=0; j<baseIndexData->indexCount; ++j )
235	0	{
236	0	uint32 originalVal = baseIndex16bit ? initBuf16++ : initBuf32++;
237
238	0	if( indexType == HardwareIndexBuffer::IT_16BIT )
239	0	*thisBuf16++ = static_cast<uint16>(originalVal + vertexOffset);
240	0	else
241	0	*thisBuf32++ = static_cast<uint32>(originalVal + vertexOffset);
242	0	}
243	0	}
244	0	}
245		//-----------------------------------------------------------------------
246		void InstanceBatchShader::setupHardwareSkinned( const SubMesh* baseSubMesh, VertexData *thisVertexData,
247		VertexData *baseVertexData )
248	0	{
249	0	const uint8 numBones = uint8(baseSubMesh->blendIndexToBoneIndexMap.size());
250	0	mNumWorldMatrices = mInstancesPerBatch * numBones;
251
252	0	for( uint16 i=0; i<=thisVertexData->vertexDeclaration->getMaxSource(); ++i )
253	0	{
254		//Create our own vertex buffer
255	0	HardwareVertexBufferSharedPtr vertexBuffer =
256	0	HardwareBufferManager::getSingleton().createVertexBuffer(
257	0	thisVertexData->vertexDeclaration->getVertexSize(i),
258	0	thisVertexData->vertexCount,
259	0	HardwareBuffer::HBU_STATIC_WRITE_ONLY );
260	0	thisVertexData->vertexBufferBinding->setBinding( i, vertexBuffer );
261
262	0	VertexDeclaration::VertexElementList veList =
263	0	thisVertexData->vertexDeclaration->findElementsBySource(i);
264
265		//Grab the base submesh data
266	0	HardwareVertexBufferSharedPtr baseVertexBuffer =
267	0	baseVertexData->vertexBufferBinding->getBuffer(i);
268
269	0	HardwareBufferLockGuard thisVertexLock(vertexBuffer, HardwareBuffer::HBL_DISCARD);
270	0	HardwareBufferLockGuard baseVertexLock(baseVertexBuffer, HardwareBuffer::HBL_READ_ONLY);
271	0	char* thisBuf = static_cast<char*>(thisVertexLock.pData);
272	0	char* baseBuf = static_cast<char*>(baseVertexLock.pData);
273	0	char *startBuf = baseBuf;
274
275		//Copy and repeat
276	0	for (uint8 j = 0; j < uint8(mInstancesPerBatch); ++j)
277	0	{
278		//Repeat source
279	0	baseBuf = startBuf;
280
281	0	for (size_t k = 0; k < baseVertexData->vertexCount; ++k)
282	0	{
283	0	for (auto& vl : veList)
284	0	{
285	0	switch (vl.getSemantic())
286	0	{
287	0	case VES_BLEND_INDICES:
288	0	(thisBuf + vl.getOffset() + 0) = (baseBuf + vl.getOffset() + 0) + j * numBones;
289	0	(thisBuf + vl.getOffset() + 1) = (baseBuf + vl.getOffset() + 1) + j * numBones;
290	0	(thisBuf + vl.getOffset() + 2) = (baseBuf + vl.getOffset() + 2) + j * numBones;
291	0	(thisBuf + vl.getOffset() + 3) = (baseBuf + vl.getOffset() + 3) + j * numBones;
292	0	break;
293	0	default:
294	0	memcpy( thisBuf + vl.getOffset(), baseBuf + vl.getOffset(), vl.getSize() );
295	0	break;
296	0	}
297	0	}
298	0	thisBuf += baseVertexData->vertexDeclaration->getVertexSize(i);
299	0	baseBuf += baseVertexData->vertexDeclaration->getVertexSize(i);
300	0	}
301	0	}
302	0	}
303	0	}
304		//-----------------------------------------------------------------------
305		void InstanceBatchShader::getWorldTransforms( Matrix4* xform ) const
306	0	{
307	0	if (MeshManager::getBonesUseObjectSpace())
308	0	{
309	0	*xform = Affine3::IDENTITY;
310	0	xform++;
311	0	}
312
313	0	for (auto *e : mInstancedEntities)
314	0	{
315	0	xform += e->getTransforms(xform);
316	0	}
317	0	}
318		//-----------------------------------------------------------------------
319		unsigned short InstanceBatchShader::getNumWorldTransforms(void) const
320	0	{
321	0	return uint16(mNumWorldMatrices) + uint16(MeshManager::getBonesUseObjectSpace());
322	0	}
323		}