/src/ogre/OgreMain/src/OgreInstanceBatchShader.cpp
Line | Count | Source |
1 | | /* |
2 | | ----------------------------------------------------------------------------- |
3 | | This source file is part of OGRE |
4 | | (Object-oriented Graphics Rendering Engine) |
5 | | For the latest info, see http://www.ogre3d.org/ |
6 | | |
7 | | Copyright (c) 2000-2014 Torus Knot Software Ltd |
8 | | |
9 | | Permission is hereby granted, free of charge, to any person obtaining a copy |
10 | | of this software and associated documentation files (the "Software"), to deal |
11 | | in the Software without restriction, including without limitation the rights |
12 | | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell |
13 | | copies of the Software, and to permit persons to whom the Software is |
14 | | furnished to do so, subject to the following conditions: |
15 | | |
16 | | The above copyright notice and this permission notice shall be included in |
17 | | all copies or substantial portions of the Software. |
18 | | |
19 | | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
20 | | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
21 | | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE |
22 | | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
23 | | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
24 | | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN |
25 | | THE SOFTWARE. |
26 | | ----------------------------------------------------------------------------- |
27 | | */ |
28 | | #include "OgreStableHeaders.h" |
29 | | #include "OgreInstanceBatchShader.h" |
30 | | #include "OgreRenderOperation.h" |
31 | | #include "OgreInstancedEntity.h" |
32 | | |
33 | | namespace Ogre |
34 | | { |
35 | | InstanceBatchShader::InstanceBatchShader( InstanceManager *creator, MeshPtr &meshReference, |
36 | | const MaterialPtr &material, size_t instancesPerBatch, |
37 | | const Mesh::IndexMap *indexToBoneMap, const String &batchName ) : |
38 | 0 | InstanceBatch( creator, meshReference, material, instancesPerBatch, |
39 | 0 | indexToBoneMap, batchName ), |
40 | 0 | mNumWorldMatrices( instancesPerBatch ) |
41 | 0 | { |
42 | 0 | } |
43 | | |
44 | | //----------------------------------------------------------------------- |
45 | | size_t InstanceBatchShader::calculateMaxNumInstances( const SubMesh *baseSubMesh, uint16 flags ) const |
46 | 0 | { |
47 | 0 | const size_t numBones = std::max<size_t>( 1, baseSubMesh->blendIndexToBoneIndexMap.size() ); |
48 | |
|
49 | 0 | mMaterial->load(); |
50 | 0 | Technique *technique = mMaterial->getBestTechnique(); |
51 | 0 | if( technique ) |
52 | 0 | { |
53 | 0 | GpuProgramParametersSharedPtr vertexParam = technique->getPass(0)->getVertexProgramParameters(); |
54 | 0 | for(auto& it : vertexParam->getConstantDefinitions().map) |
55 | 0 | { |
56 | 0 | const GpuConstantDefinition &constDef = it.second; |
57 | 0 | if(((constDef.constType == GCT_MATRIX_3X4 || |
58 | 0 | constDef.constType == GCT_MATRIX_4X3 || //OGL GLSL bitches without this |
59 | 0 | constDef.constType == GCT_MATRIX_2X4 || |
60 | 0 | constDef.constType == GCT_FLOAT4) //OGL GLSL bitches without this |
61 | 0 | && constDef.isFloat()) || |
62 | 0 | ((constDef.constType == GCT_MATRIX_DOUBLE_3X4 || |
63 | 0 | constDef.constType == GCT_MATRIX_DOUBLE_4X3 || //OGL GLSL bitches without this |
64 | 0 | constDef.constType == GCT_MATRIX_DOUBLE_2X4 || |
65 | 0 | constDef.constType == GCT_DOUBLE4) //OGL GLSL bitches without this |
66 | 0 | && constDef.isDouble()) |
67 | 0 | ) |
68 | 0 | { |
69 | 0 | const GpuProgramParameters::AutoConstantEntry *entry = |
70 | 0 | vertexParam->_findRawAutoConstantEntryFloat( constDef.physicalIndex ); |
71 | 0 | if( entry && (entry->paramType == GpuProgramParameters::ACT_WORLD_MATRIX_ARRAY_3x4 || entry->paramType == GpuProgramParameters::ACT_WORLD_DUALQUATERNION_ARRAY_2x4)) |
72 | 0 | { |
73 | | //Material is correctly done! |
74 | 0 | size_t arraySize = constDef.arraySize; |
75 | | |
76 | | //Deal with GL "hacky" way of doing 4x3 matrices |
77 | 0 | if(entry->paramType == GpuProgramParameters::ACT_WORLD_MATRIX_ARRAY_3x4 && constDef.constType == GCT_FLOAT4) |
78 | 0 | arraySize /= 3; |
79 | 0 | else if(entry->paramType == GpuProgramParameters::ACT_WORLD_DUALQUATERNION_ARRAY_2x4 && constDef.constType == GCT_FLOAT4) |
80 | 0 | arraySize /= 2; |
81 | | |
82 | | //Check the num of arrays |
83 | 0 | size_t retVal = arraySize / numBones; |
84 | |
|
85 | 0 | if( flags & IM_USE16BIT ) |
86 | 0 | { |
87 | 0 | if( baseSubMesh->vertexData->vertexCount * retVal > 0xFFFF ) |
88 | 0 | retVal = 0xFFFF / baseSubMesh->vertexData->vertexCount; |
89 | 0 | } |
90 | |
|
91 | 0 | if((retVal < 3 && entry->paramType == GpuProgramParameters::ACT_WORLD_MATRIX_ARRAY_3x4) || |
92 | 0 | (retVal < 2 && entry->paramType == GpuProgramParameters::ACT_WORLD_DUALQUATERNION_ARRAY_2x4)) |
93 | 0 | { |
94 | 0 | LogManager::getSingleton().logWarning( "InstanceBatchShader: Mesh '" + |
95 | 0 | mMeshReference->getName() + "' using material '" + |
96 | 0 | mMaterial->getName() + "'. The amount of possible " |
97 | 0 | "instances per batch is very low. Performance benefits will " |
98 | 0 | "be minimal, if any. It might be even slower!"); |
99 | 0 | } |
100 | |
|
101 | 0 | return retVal; |
102 | 0 | } |
103 | 0 | } |
104 | 0 | } |
105 | | |
106 | 0 | OGRE_EXCEPT(Exception::ERR_INVALIDPARAMS, |
107 | 0 | "Material '" + mMaterial->getName() + "' does not support hardware skinning"); |
108 | 0 | } |
109 | | |
110 | | //Reaching here the material is just unsupported. |
111 | | |
112 | 0 | return 0; |
113 | 0 | } |
114 | | //----------------------------------------------------------------------- |
115 | | void InstanceBatchShader::buildFrom( const SubMesh *baseSubMesh, const RenderOperation &renderOperation ) |
116 | 0 | { |
117 | 0 | if( mMeshReference->hasSkeleton() && mMeshReference->getSkeleton() ) |
118 | 0 | mNumWorldMatrices = mInstancesPerBatch * baseSubMesh->blendIndexToBoneIndexMap.size(); |
119 | 0 | InstanceBatch::buildFrom( baseSubMesh, renderOperation ); |
120 | 0 | } |
121 | | //----------------------------------------------------------------------- |
122 | | void InstanceBatchShader::setupVertices( const SubMesh* baseSubMesh ) |
123 | 0 | { |
124 | 0 | mRenderOperation.vertexData = OGRE_NEW VertexData(); |
125 | 0 | mRemoveOwnVertexData = true; //Raise flag to remove our own vertex data in the end (not always needed) |
126 | |
|
127 | 0 | VertexData *thisVertexData = mRenderOperation.vertexData; |
128 | 0 | VertexData *baseVertexData = baseSubMesh->vertexData; |
129 | |
|
130 | 0 | thisVertexData->vertexStart = 0; |
131 | 0 | thisVertexData->vertexCount = baseVertexData->vertexCount * mInstancesPerBatch; |
132 | |
|
133 | 0 | HardwareBufferManager::getSingleton().destroyVertexDeclaration( thisVertexData->vertexDeclaration ); |
134 | 0 | thisVertexData->vertexDeclaration = baseVertexData->vertexDeclaration->clone(); |
135 | |
|
136 | 0 | if( mMeshReference->hasSkeleton() && mMeshReference->getSkeleton() ) |
137 | 0 | { |
138 | | //Building hw skinned batches follow a different path |
139 | 0 | setupHardwareSkinned( baseSubMesh, thisVertexData, baseVertexData ); |
140 | 0 | return; |
141 | 0 | } |
142 | | |
143 | | //TODO: Can't we, instead of using another source, put the index ID in the same source? |
144 | 0 | thisVertexData->vertexDeclaration->addElement( |
145 | 0 | thisVertexData->vertexDeclaration->getMaxSource() + 1, 0, |
146 | 0 | VET_UBYTE4, VES_BLEND_INDICES ); |
147 | | |
148 | |
|
149 | 0 | for( uint16 i=0; i<thisVertexData->vertexDeclaration->getMaxSource(); ++i ) |
150 | 0 | { |
151 | | //Create our own vertex buffer |
152 | 0 | HardwareVertexBufferSharedPtr vertexBuffer = |
153 | 0 | HardwareBufferManager::getSingleton().createVertexBuffer( |
154 | 0 | thisVertexData->vertexDeclaration->getVertexSize(i), |
155 | 0 | thisVertexData->vertexCount, |
156 | 0 | HardwareBuffer::HBU_STATIC_WRITE_ONLY ); |
157 | 0 | thisVertexData->vertexBufferBinding->setBinding( i, vertexBuffer ); |
158 | | |
159 | | //Grab the base submesh data |
160 | 0 | HardwareVertexBufferSharedPtr baseVertexBuffer = |
161 | 0 | baseVertexData->vertexBufferBinding->getBuffer(i); |
162 | |
|
163 | 0 | HardwareBufferLockGuard thisLock(vertexBuffer, HardwareBuffer::HBL_DISCARD); |
164 | 0 | HardwareBufferLockGuard baseLock(baseVertexBuffer, HardwareBuffer::HBL_READ_ONLY); |
165 | 0 | char* thisBuf = static_cast<char*>(thisLock.pData); |
166 | 0 | char* baseBuf = static_cast<char*>(baseLock.pData); |
167 | | |
168 | | //Copy and repeat |
169 | 0 | for( size_t j=0; j<mInstancesPerBatch; ++j ) |
170 | 0 | { |
171 | 0 | const size_t sizeOfBuffer = baseVertexData->vertexCount * |
172 | 0 | baseVertexData->vertexDeclaration->getVertexSize(i); |
173 | 0 | memcpy( thisBuf + j * sizeOfBuffer, baseBuf, sizeOfBuffer ); |
174 | 0 | } |
175 | 0 | } |
176 | |
|
177 | 0 | { |
178 | | //Now create the vertices "index ID" to individualize each instance |
179 | 0 | const unsigned short lastSource = thisVertexData->vertexDeclaration->getMaxSource(); |
180 | 0 | HardwareVertexBufferSharedPtr vertexBuffer = |
181 | 0 | HardwareBufferManager::getSingleton().createVertexBuffer( |
182 | 0 | thisVertexData->vertexDeclaration->getVertexSize( lastSource ), |
183 | 0 | thisVertexData->vertexCount, |
184 | 0 | HardwareBuffer::HBU_STATIC_WRITE_ONLY ); |
185 | 0 | thisVertexData->vertexBufferBinding->setBinding( lastSource, vertexBuffer ); |
186 | |
|
187 | 0 | HardwareBufferLockGuard thisLock(vertexBuffer, HardwareBuffer::HBL_DISCARD); |
188 | 0 | char* thisBuf = static_cast<char*>(thisLock.pData); |
189 | 0 | for( uint8 j=0; j<uint8(mInstancesPerBatch); ++j ) |
190 | 0 | { |
191 | 0 | for( size_t k=0; k<baseVertexData->vertexCount; ++k ) |
192 | 0 | { |
193 | 0 | *thisBuf++ = j; |
194 | 0 | *thisBuf++ = j; |
195 | 0 | *thisBuf++ = j; |
196 | 0 | *thisBuf++ = j; |
197 | 0 | } |
198 | 0 | } |
199 | |
|
200 | 0 | } |
201 | 0 | } |
202 | | //----------------------------------------------------------------------- |
203 | | void InstanceBatchShader::setupIndices( const SubMesh* baseSubMesh ) |
204 | 0 | { |
205 | 0 | mRenderOperation.indexData = OGRE_NEW IndexData(); |
206 | 0 | mRemoveOwnIndexData = true; //Raise flag to remove our own index data in the end (not always needed) |
207 | |
|
208 | 0 | IndexData *thisIndexData = mRenderOperation.indexData; |
209 | 0 | IndexData *baseIndexData = baseSubMesh->indexData; |
210 | |
|
211 | 0 | thisIndexData->indexStart = 0; |
212 | 0 | thisIndexData->indexCount = baseIndexData->indexCount * mInstancesPerBatch; |
213 | | |
214 | | //TODO: Check numVertices is below max supported by GPU |
215 | 0 | HardwareIndexBuffer::IndexType indexType = HardwareIndexBuffer::IT_16BIT; |
216 | 0 | if( mRenderOperation.vertexData->vertexCount > 65535 ) |
217 | 0 | indexType = HardwareIndexBuffer::IT_32BIT; |
218 | 0 | thisIndexData->indexBuffer = HardwareBufferManager::getSingleton().createIndexBuffer( |
219 | 0 | indexType, thisIndexData->indexCount, HardwareBuffer::HBU_STATIC_WRITE_ONLY ); |
220 | |
|
221 | 0 | HardwareBufferLockGuard thisLock(thisIndexData->indexBuffer, HardwareBuffer::HBL_DISCARD); |
222 | 0 | HardwareBufferLockGuard baseLock(baseIndexData->indexBuffer, HardwareBuffer::HBL_READ_ONLY); |
223 | 0 | uint16 *thisBuf16 = static_cast<uint16*>(thisLock.pData); |
224 | 0 | uint32 *thisBuf32 = static_cast<uint32*>(thisLock.pData); |
225 | 0 | bool baseIndex16bit = baseIndexData->indexBuffer->getType() == HardwareIndexBuffer::IT_16BIT; |
226 | |
|
227 | 0 | for( size_t i=0; i<mInstancesPerBatch; ++i ) |
228 | 0 | { |
229 | 0 | const size_t vertexOffset = i * mRenderOperation.vertexData->vertexCount / mInstancesPerBatch; |
230 | |
|
231 | 0 | const uint16 *initBuf16 = static_cast<const uint16 *>(baseLock.pData); |
232 | 0 | const uint32 *initBuf32 = static_cast<const uint32 *>(baseLock.pData); |
233 | |
|
234 | 0 | for( size_t j=0; j<baseIndexData->indexCount; ++j ) |
235 | 0 | { |
236 | 0 | uint32 originalVal = baseIndex16bit ? *initBuf16++ : *initBuf32++; |
237 | |
|
238 | 0 | if( indexType == HardwareIndexBuffer::IT_16BIT ) |
239 | 0 | *thisBuf16++ = static_cast<uint16>(originalVal + vertexOffset); |
240 | 0 | else |
241 | 0 | *thisBuf32++ = static_cast<uint32>(originalVal + vertexOffset); |
242 | 0 | } |
243 | 0 | } |
244 | 0 | } |
245 | | //----------------------------------------------------------------------- |
246 | | void InstanceBatchShader::setupHardwareSkinned( const SubMesh* baseSubMesh, VertexData *thisVertexData, |
247 | | VertexData *baseVertexData ) |
248 | 0 | { |
249 | 0 | const uint8 numBones = uint8(baseSubMesh->blendIndexToBoneIndexMap.size()); |
250 | 0 | mNumWorldMatrices = mInstancesPerBatch * numBones; |
251 | |
|
252 | 0 | for( uint16 i=0; i<=thisVertexData->vertexDeclaration->getMaxSource(); ++i ) |
253 | 0 | { |
254 | | //Create our own vertex buffer |
255 | 0 | HardwareVertexBufferSharedPtr vertexBuffer = |
256 | 0 | HardwareBufferManager::getSingleton().createVertexBuffer( |
257 | 0 | thisVertexData->vertexDeclaration->getVertexSize(i), |
258 | 0 | thisVertexData->vertexCount, |
259 | 0 | HardwareBuffer::HBU_STATIC_WRITE_ONLY ); |
260 | 0 | thisVertexData->vertexBufferBinding->setBinding( i, vertexBuffer ); |
261 | |
|
262 | 0 | VertexDeclaration::VertexElementList veList = |
263 | 0 | thisVertexData->vertexDeclaration->findElementsBySource(i); |
264 | | |
265 | | //Grab the base submesh data |
266 | 0 | HardwareVertexBufferSharedPtr baseVertexBuffer = |
267 | 0 | baseVertexData->vertexBufferBinding->getBuffer(i); |
268 | |
|
269 | 0 | HardwareBufferLockGuard thisVertexLock(vertexBuffer, HardwareBuffer::HBL_DISCARD); |
270 | 0 | HardwareBufferLockGuard baseVertexLock(baseVertexBuffer, HardwareBuffer::HBL_READ_ONLY); |
271 | 0 | char* thisBuf = static_cast<char*>(thisVertexLock.pData); |
272 | 0 | char* baseBuf = static_cast<char*>(baseVertexLock.pData); |
273 | 0 | char *startBuf = baseBuf; |
274 | | |
275 | | //Copy and repeat |
276 | 0 | for (uint8 j = 0; j < uint8(mInstancesPerBatch); ++j) |
277 | 0 | { |
278 | | //Repeat source |
279 | 0 | baseBuf = startBuf; |
280 | |
|
281 | 0 | for (size_t k = 0; k < baseVertexData->vertexCount; ++k) |
282 | 0 | { |
283 | 0 | for (auto& vl : veList) |
284 | 0 | { |
285 | 0 | switch (vl.getSemantic()) |
286 | 0 | { |
287 | 0 | case VES_BLEND_INDICES: |
288 | 0 | *(thisBuf + vl.getOffset() + 0) = *(baseBuf + vl.getOffset() + 0) + j * numBones; |
289 | 0 | *(thisBuf + vl.getOffset() + 1) = *(baseBuf + vl.getOffset() + 1) + j * numBones; |
290 | 0 | *(thisBuf + vl.getOffset() + 2) = *(baseBuf + vl.getOffset() + 2) + j * numBones; |
291 | 0 | *(thisBuf + vl.getOffset() + 3) = *(baseBuf + vl.getOffset() + 3) + j * numBones; |
292 | 0 | break; |
293 | 0 | default: |
294 | 0 | memcpy( thisBuf + vl.getOffset(), baseBuf + vl.getOffset(), vl.getSize() ); |
295 | 0 | break; |
296 | 0 | } |
297 | 0 | } |
298 | 0 | thisBuf += baseVertexData->vertexDeclaration->getVertexSize(i); |
299 | 0 | baseBuf += baseVertexData->vertexDeclaration->getVertexSize(i); |
300 | 0 | } |
301 | 0 | } |
302 | 0 | } |
303 | 0 | } |
304 | | //----------------------------------------------------------------------- |
305 | | void InstanceBatchShader::getWorldTransforms( Matrix4* xform ) const |
306 | 0 | { |
307 | 0 | if (MeshManager::getBonesUseObjectSpace()) |
308 | 0 | { |
309 | 0 | *xform = Affine3::IDENTITY; |
310 | 0 | xform++; |
311 | 0 | } |
312 | |
|
313 | 0 | for (auto *e : mInstancedEntities) |
314 | 0 | { |
315 | 0 | xform += e->getTransforms(xform); |
316 | 0 | } |
317 | 0 | } |
318 | | //----------------------------------------------------------------------- |
319 | | unsigned short InstanceBatchShader::getNumWorldTransforms(void) const |
320 | 0 | { |
321 | 0 | return uint16(mNumWorldMatrices) + uint16(MeshManager::getBonesUseObjectSpace()); |
322 | 0 | } |
323 | | } |