/src/ogre/OgreMain/src/OgreInstanceBatchVTF.cpp
Line | Count | Source |
1 | | /* |
2 | | ----------------------------------------------------------------------------- |
3 | | This source file is part of OGRE |
4 | | (Object-oriented Graphics Rendering Engine) |
5 | | For the latest info, see http://www.ogre3d.org/ |
6 | | |
7 | | Copyright (c) 2000-2014 Torus Knot Software Ltd |
8 | | |
9 | | Permission is hereby granted, free of charge, to any person obtaining a copy |
10 | | of this software and associated documentation files (the "Software"), to deal |
11 | | in the Software without restriction, including without limitation the rights |
12 | | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell |
13 | | copies of the Software, and to permit persons to whom the Software is |
14 | | furnished to do so, subject to the following conditions: |
15 | | |
16 | | The above copyright notice and this permission notice shall be included in |
17 | | all copies or substantial portions of the Software. |
18 | | |
19 | | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
20 | | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
21 | | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE |
22 | | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
23 | | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
24 | | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN |
25 | | THE SOFTWARE. |
26 | | ----------------------------------------------------------------------------- |
27 | | */ |
28 | | #include "OgreStableHeaders.h" |
29 | | #include "OgreInstanceBatchVTF.h" |
30 | | #include "OgreHardwarePixelBuffer.h" |
31 | | #include "OgreInstancedEntity.h" |
32 | | #include "OgreMaterial.h" |
33 | | #include "OgreDualQuaternion.h" |
34 | | |
35 | | namespace Ogre |
36 | | { |
37 | | static const uint16 c_maxTexWidth = 4096; |
38 | | static const uint16 c_maxTexHeight = 4096; |
39 | | |
40 | | BaseInstanceBatchVTF::BaseInstanceBatchVTF( InstanceManager *creator, MeshPtr &meshReference, |
41 | | const MaterialPtr &material, size_t instancesPerBatch, |
42 | | const Mesh::IndexMap *indexToBoneMap, const String &batchName) : |
43 | 0 | InstanceBatch( creator, meshReference, material, instancesPerBatch, |
44 | 0 | indexToBoneMap, batchName ), |
45 | 0 | mMatricesPerInstance(0), |
46 | 0 | mNumWorldMatrices( instancesPerBatch ), |
47 | 0 | mWidthFloatsPadding( 0 ), |
48 | 0 | mMaxFloatsPerLine( std::numeric_limits<size_t>::max() ), |
49 | 0 | mRowLength(3), |
50 | 0 | mWeightCount(1), |
51 | 0 | mTempTransformsArray3x4(0), |
52 | 0 | mUseBoneMatrixLookup(false), |
53 | 0 | mMaxLookupTableInstances(16), |
54 | 0 | mUseBoneDualQuaternions(false), |
55 | 0 | mForceOneWeight(false), |
56 | 0 | mUseOneWeight(false) |
57 | 0 | { |
58 | 0 | cloneMaterial( mMaterial ); |
59 | 0 | } |
60 | | |
61 | | BaseInstanceBatchVTF::~BaseInstanceBatchVTF() |
62 | 0 | { |
63 | | //Remove cloned caster materials (if any) |
64 | 0 | Material::Techniques::const_iterator it; |
65 | 0 | for(it = mMaterial->getTechniques().begin(); it != mMaterial->getTechniques().end(); ++it) |
66 | 0 | { |
67 | 0 | Technique *technique = *it; |
68 | |
|
69 | 0 | if (technique->getShadowCasterMaterial()) |
70 | 0 | MaterialManager::getSingleton().remove( technique->getShadowCasterMaterial() ); |
71 | 0 | } |
72 | | |
73 | | //Remove cloned material |
74 | 0 | MaterialManager::getSingleton().remove( mMaterial ); |
75 | | |
76 | | //Remove the VTF texture |
77 | 0 | if( mMatrixTexture ) |
78 | 0 | TextureManager::getSingleton().remove( mMatrixTexture ); |
79 | |
|
80 | 0 | delete[] mTempTransformsArray3x4; |
81 | 0 | } |
82 | | |
83 | | //----------------------------------------------------------------------- |
84 | | void BaseInstanceBatchVTF::buildFrom( const SubMesh *baseSubMesh, const RenderOperation &renderOperation ) |
85 | 0 | { |
86 | 0 | if (useBoneMatrixLookup()) |
87 | 0 | { |
88 | | //when using bone matrix lookup resource are not shared |
89 | | // |
90 | | //Future implementation: while the instance vertex buffer can't be shared |
91 | | //The texture can be. |
92 | | // |
93 | 0 | build(baseSubMesh); |
94 | 0 | } |
95 | 0 | else |
96 | 0 | { |
97 | 0 | createVertexTexture( baseSubMesh ); |
98 | 0 | InstanceBatch::buildFrom( baseSubMesh, renderOperation ); |
99 | 0 | } |
100 | 0 | } |
101 | | //----------------------------------------------------------------------- |
102 | | void BaseInstanceBatchVTF::cloneMaterial( const MaterialPtr &material ) |
103 | 0 | { |
104 | | //Used to track down shadow casters, so the same material caster doesn't get cloned twice |
105 | 0 | typedef std::map<String, MaterialPtr> MatMap; |
106 | 0 | MatMap clonedMaterials; |
107 | | |
108 | | //We need to clone the material so we can have different textures for each batch. |
109 | 0 | mMaterial = material->clone( mName + "/VTFMaterial" ); |
110 | | |
111 | | //Now do the same with the techniques which have a material shadow caster |
112 | 0 | Material::Techniques::const_iterator it; |
113 | 0 | for(it = material->getTechniques().begin(); it != material->getTechniques().end(); ++it) |
114 | 0 | { |
115 | 0 | Technique *technique = *it; |
116 | |
|
117 | 0 | if( technique->getShadowCasterMaterial() ) |
118 | 0 | { |
119 | 0 | const MaterialPtr &casterMat = technique->getShadowCasterMaterial(); |
120 | 0 | const String &casterName = casterMat->getName(); |
121 | | |
122 | | //Was this material already cloned? |
123 | 0 | MatMap::const_iterator itor = clonedMaterials.find(casterName); |
124 | |
|
125 | 0 | if( itor == clonedMaterials.end() ) |
126 | 0 | { |
127 | | //No? Clone it and track it |
128 | 0 | MaterialPtr cloned = casterMat->clone( mName + "/VTFMaterialCaster" + |
129 | 0 | StringConverter::toString(clonedMaterials.size()) ); |
130 | 0 | technique->setShadowCasterMaterial( cloned ); |
131 | 0 | clonedMaterials[casterName] = cloned; |
132 | 0 | } |
133 | 0 | else |
134 | 0 | technique->setShadowCasterMaterial( itor->second ); //Reuse the previously cloned mat |
135 | 0 | } |
136 | 0 | } |
137 | 0 | } |
138 | | //----------------------------------------------------------------------- |
139 | | void BaseInstanceBatchVTF::retrieveBoneIdx( VertexData *baseVertexData, HWBoneIdxVec &outBoneIdx ) |
140 | 0 | { |
141 | 0 | const VertexElement *ve = baseVertexData->vertexDeclaration-> |
142 | 0 | findElementBySemantic( VES_BLEND_INDICES ); |
143 | 0 | const VertexElement *veWeights = baseVertexData->vertexDeclaration->findElementBySemantic( VES_BLEND_WEIGHTS ); |
144 | | |
145 | 0 | HardwareVertexBufferSharedPtr buff = baseVertexData->vertexBufferBinding->getBuffer(ve->getSource()); |
146 | 0 | HardwareBufferLockGuard baseVertexLock(buff, HardwareBuffer::HBL_READ_ONLY); |
147 | 0 | char const *baseBuffer = static_cast<char const*>(baseVertexLock.pData); |
148 | |
|
149 | 0 | for( size_t i=0; i<baseVertexData->vertexCount; ++i ) |
150 | 0 | { |
151 | 0 | float const *pWeights = reinterpret_cast<float const*>(baseBuffer + veWeights->getOffset()); |
152 | |
|
153 | 0 | uint8 biggestWeightIdx = 0; |
154 | 0 | for( uint8 j=1; j< uint8(mWeightCount); ++j ) |
155 | 0 | { |
156 | 0 | biggestWeightIdx = pWeights[biggestWeightIdx] < pWeights[j] ? j : biggestWeightIdx; |
157 | 0 | } |
158 | |
|
159 | 0 | uint8 const *pIndex = reinterpret_cast<uint8 const*>(baseBuffer + ve->getOffset()); |
160 | 0 | outBoneIdx[i] = pIndex[biggestWeightIdx]; |
161 | |
|
162 | 0 | baseBuffer += baseVertexData->vertexDeclaration->getVertexSize(ve->getSource()); |
163 | 0 | } |
164 | 0 | } |
165 | | |
166 | | //----------------------------------------------------------------------- |
167 | | void BaseInstanceBatchVTF::retrieveBoneIdxWithWeights(VertexData *baseVertexData, HWBoneIdxVec &outBoneIdx, HWBoneWgtVec &outBoneWgt) |
168 | 0 | { |
169 | 0 | const VertexElement *ve = baseVertexData->vertexDeclaration->findElementBySemantic( VES_BLEND_INDICES ); |
170 | 0 | const VertexElement *veWeights = baseVertexData->vertexDeclaration->findElementBySemantic( VES_BLEND_WEIGHTS ); |
171 | | |
172 | 0 | HardwareVertexBufferSharedPtr buff = baseVertexData->vertexBufferBinding->getBuffer(ve->getSource()); |
173 | 0 | HardwareBufferLockGuard baseVertexLock(buff, HardwareBuffer::HBL_READ_ONLY); |
174 | 0 | char const *baseBuffer = static_cast<char const*>(baseVertexLock.pData); |
175 | |
|
176 | 0 | for( size_t i=0; i<baseVertexData->vertexCount * mWeightCount; i += mWeightCount) |
177 | 0 | { |
178 | 0 | float const *pWeights = reinterpret_cast<float const*>(baseBuffer + veWeights->getOffset()); |
179 | 0 | uint8 const *pIndex = reinterpret_cast<uint8 const*>(baseBuffer + ve->getOffset()); |
180 | |
|
181 | 0 | float weightMagnitude = 0.0f; |
182 | 0 | for( size_t j=0; j < mWeightCount; ++j ) |
183 | 0 | { |
184 | 0 | outBoneWgt[i+j] = pWeights[j]; |
185 | 0 | weightMagnitude += pWeights[j]; |
186 | 0 | outBoneIdx[i+j] = pIndex[j]; |
187 | 0 | } |
188 | | |
189 | | //Normalize the bone weights so they add to one |
190 | 0 | for(size_t j=0; j < mWeightCount; ++j) |
191 | 0 | { |
192 | 0 | outBoneWgt[i+j] /= weightMagnitude; |
193 | 0 | } |
194 | |
|
195 | 0 | baseBuffer += baseVertexData->vertexDeclaration->getVertexSize(ve->getSource()); |
196 | 0 | } |
197 | 0 | } |
198 | | |
199 | | //----------------------------------------------------------------------- |
200 | | void BaseInstanceBatchVTF::setupMaterialToUseVTF( TextureType textureType, MaterialPtr &material ) const |
201 | 0 | { |
202 | 0 | Material::Techniques::const_iterator t; |
203 | 0 | for(t = material->getTechniques().begin(); t != material->getTechniques().end(); ++t) |
204 | 0 | for (auto *technique : material->getTechniques()) |
205 | 0 | { |
206 | 0 | for (auto *pass : technique->getPasses()) |
207 | 0 | { |
208 | 0 | for(auto *texUnit : pass->getTextureUnitStates()) |
209 | 0 | { |
210 | 0 | if( texUnit->getName() == "InstancingVTF" ) |
211 | 0 | { |
212 | 0 | texUnit->setTextureName( mMatrixTexture->getName(), textureType ); |
213 | 0 | texUnit->setTextureFiltering( TFO_NONE ); |
214 | 0 | } |
215 | 0 | } |
216 | 0 | } |
217 | |
|
218 | 0 | if( technique->getShadowCasterMaterial() ) |
219 | 0 | { |
220 | 0 | MaterialPtr matCaster = technique->getShadowCasterMaterial(); |
221 | 0 | setupMaterialToUseVTF(textureType, matCaster ); |
222 | 0 | } |
223 | 0 | } |
224 | 0 | } |
225 | | //----------------------------------------------------------------------- |
226 | | void BaseInstanceBatchVTF::createVertexTexture( const SubMesh* baseSubMesh ) |
227 | 0 | { |
228 | | /* |
229 | | TODO: Find a way to retrieve max texture resolution, |
230 | | http://www.ogre3d.org/forums/viewtopic.php?t=38305 |
231 | | |
232 | | Currently assuming it's 4096x4096, which is a safe bet for any hardware with decent VTF*/ |
233 | | |
234 | 0 | size_t uniqueAnimations = mInstancesPerBatch; |
235 | 0 | if (useBoneMatrixLookup()) |
236 | 0 | { |
237 | 0 | uniqueAnimations = std::min<size_t>(getMaxLookupTableInstances(), uniqueAnimations); |
238 | 0 | } |
239 | 0 | mMatricesPerInstance = std::max<size_t>( 1, baseSubMesh->blendIndexToBoneIndexMap.size() ); |
240 | |
|
241 | 0 | if(mUseBoneDualQuaternions && !mTempTransformsArray3x4) |
242 | 0 | { |
243 | 0 | mTempTransformsArray3x4 = new Matrix3x4f[mMatricesPerInstance]; |
244 | 0 | } |
245 | | |
246 | 0 | mNumWorldMatrices = uniqueAnimations * mMatricesPerInstance; |
247 | | |
248 | | //Calculate the width & height required to hold all the matrices. Start by filling the width |
249 | | //first (i.e. 4096x1 4096x2 4096x3, etc) |
250 | | |
251 | 0 | size_t texWidth = std::min<size_t>( mNumWorldMatrices * mRowLength, c_maxTexWidth ); |
252 | 0 | size_t maxUsableWidth = texWidth; |
253 | 0 | if( matricesTogetherPerRow() ) |
254 | 0 | { |
255 | | //The technique requires all matrices from the same instance in the same row |
256 | | //i.e. 4094 -> 4095 -> skip 4096 -> 0 (next row) contains data from a new instance |
257 | 0 | mWidthFloatsPadding = texWidth % (mMatricesPerInstance * mRowLength); |
258 | |
|
259 | 0 | if( mWidthFloatsPadding ) |
260 | 0 | { |
261 | 0 | mMaxFloatsPerLine = texWidth - mWidthFloatsPadding; |
262 | |
|
263 | 0 | maxUsableWidth = mMaxFloatsPerLine; |
264 | | |
265 | | //Values are in pixels, convert them to floats (1 pixel = 4 floats) |
266 | 0 | mWidthFloatsPadding *= 4; |
267 | 0 | mMaxFloatsPerLine *= 4; |
268 | 0 | } |
269 | 0 | } |
270 | |
|
271 | 0 | size_t texHeight = mNumWorldMatrices * mRowLength / maxUsableWidth; |
272 | |
|
273 | 0 | if( (mNumWorldMatrices * mRowLength) % maxUsableWidth ) |
274 | 0 | texHeight += 1; |
275 | | |
276 | | //Don't use 1D textures, as OGL goes crazy because the shader should be calling texture1D()... |
277 | 0 | TextureType texType = TEX_TYPE_2D; |
278 | |
|
279 | 0 | mMatrixTexture = TextureManager::getSingleton().createManual( |
280 | 0 | mName + "/VTF", mMeshReference->getGroup(), texType, |
281 | 0 | (uint)texWidth, (uint)texHeight, |
282 | 0 | 0, PF_FLOAT32_RGBA, TU_DYNAMIC_WRITE_ONLY_DISCARDABLE ); |
283 | |
|
284 | 0 | OgreAssert(mMatrixTexture->getFormat() == PF_FLOAT32_RGBA, "float texture support required"); |
285 | | //Set our cloned material to use this custom texture! |
286 | 0 | setupMaterialToUseVTF( texType, mMaterial ); |
287 | 0 | } |
288 | | |
289 | | //----------------------------------------------------------------------- |
290 | | size_t BaseInstanceBatchVTF::convert3x4MatricesToDualQuaternions(Matrix3x4f* matrices, size_t numOfMatrices, float* outDualQuaternions) |
291 | 0 | { |
292 | 0 | DualQuaternion dQuat; |
293 | 0 | size_t floatsWritten = 0; |
294 | |
|
295 | 0 | for (size_t m = 0; m < numOfMatrices; ++m) |
296 | 0 | { |
297 | 0 | dQuat.fromTransformationMatrix(Affine3(matrices[m][0])); |
298 | | |
299 | | //Copy the 2x4 matrix |
300 | 0 | for(int i = 0; i < 8; ++i) |
301 | 0 | { |
302 | 0 | *outDualQuaternions++ = static_cast<float>( dQuat[i] ); |
303 | 0 | ++floatsWritten; |
304 | 0 | } |
305 | 0 | } |
306 | |
|
307 | 0 | return floatsWritten; |
308 | 0 | } |
309 | | |
310 | | //----------------------------------------------------------------------- |
311 | | void BaseInstanceBatchVTF::updateVertexTexture(void) |
312 | 0 | { |
313 | | //Now lock the texture and copy the 4x3 matrices! |
314 | 0 | HardwareBufferLockGuard matTexLock(mMatrixTexture->getBuffer(), HardwareBuffer::HBL_DISCARD); |
315 | 0 | const PixelBox &pixelBox = mMatrixTexture->getBuffer()->getCurrentLock(); |
316 | |
|
317 | 0 | float *pDest = reinterpret_cast<float*>(pixelBox.data); |
318 | |
|
319 | 0 | InstancedEntityVec::const_iterator itor = mInstancedEntities.begin(); |
320 | 0 | InstancedEntityVec::const_iterator end = mInstancedEntities.end(); |
321 | |
|
322 | 0 | Matrix3x4f* transforms; |
323 | | |
324 | | //If using dual quaternion skinning, write the transforms to a temporary buffer, |
325 | | //then convert to dual quaternions, then later write to the pixel buffer |
326 | | //Otherwise simply write the transforms to the pixel buffer directly |
327 | 0 | if(mUseBoneDualQuaternions) |
328 | 0 | { |
329 | 0 | transforms = mTempTransformsArray3x4; |
330 | 0 | } |
331 | 0 | else |
332 | 0 | { |
333 | 0 | transforms = (Matrix3x4f*)pDest; |
334 | 0 | } |
335 | | |
336 | | |
337 | 0 | while( itor != end ) |
338 | 0 | { |
339 | 0 | size_t floatsWritten = (*itor)->getTransforms3x4( transforms ); |
340 | |
|
341 | 0 | if( mManager->getCameraRelativeRendering() ) |
342 | 0 | makeMatrixCameraRelative3x4( transforms, floatsWritten / 12 ); |
343 | |
|
344 | 0 | if(mUseBoneDualQuaternions) |
345 | 0 | { |
346 | 0 | floatsWritten = convert3x4MatricesToDualQuaternions(transforms, floatsWritten / 12, pDest); |
347 | 0 | pDest += floatsWritten; |
348 | 0 | } |
349 | 0 | else |
350 | 0 | { |
351 | 0 | transforms += floatsWritten / 12; |
352 | 0 | } |
353 | | |
354 | 0 | ++itor; |
355 | 0 | } |
356 | 0 | } |
357 | | /** update the lookup numbers for entities with shared transforms */ |
358 | | void BaseInstanceBatchVTF::updateSharedLookupIndexes() |
359 | 0 | { |
360 | 0 | if (mTransformSharingDirty) |
361 | 0 | { |
362 | 0 | if (useBoneMatrixLookup()) |
363 | 0 | { |
364 | | //In each entity update the "transform lookup number" so that: |
365 | | // 1. All entities sharing the same transformation will share the same unique number |
366 | | // 2. "transform lookup number" will be numbered from 0 up to getMaxLookupTableInstances |
367 | 0 | uint16 lookupCounter = 0; |
368 | 0 | typedef std::map<Affine3*,uint16> MapTransformId; |
369 | 0 | MapTransformId transformToId; |
370 | 0 | InstancedEntityVec::const_iterator itEnt = mInstancedEntities.begin(), |
371 | 0 | itEntEnd = mInstancedEntities.end(); |
372 | 0 | for(;itEnt != itEntEnd ; ++itEnt) |
373 | 0 | { |
374 | 0 | if ((*itEnt)->isInScene()) |
375 | 0 | { |
376 | 0 | Affine3* transformUniqueId = (*itEnt)->mBoneMatrices; |
377 | 0 | MapTransformId::iterator itLu = transformToId.find(transformUniqueId); |
378 | 0 | if (itLu == transformToId.end()) |
379 | 0 | { |
380 | 0 | itLu = transformToId.insert(std::make_pair(transformUniqueId,lookupCounter)).first; |
381 | 0 | ++lookupCounter; |
382 | 0 | } |
383 | 0 | (*itEnt)->setTransformLookupNumber(itLu->second); |
384 | 0 | } |
385 | 0 | else |
386 | 0 | { |
387 | 0 | (*itEnt)->setTransformLookupNumber(0); |
388 | 0 | } |
389 | 0 | } |
390 | |
|
391 | 0 | if (lookupCounter > getMaxLookupTableInstances()) |
392 | 0 | { |
393 | 0 | OGRE_EXCEPT(Exception::ERR_INVALID_STATE,"Number of unique bone matrix states exceeds current limitation.","BaseInstanceBatchVTF::updateSharedLookupIndexes()"); |
394 | 0 | } |
395 | 0 | } |
396 | | |
397 | 0 | mTransformSharingDirty = false; |
398 | 0 | } |
399 | 0 | } |
400 | | |
401 | | //----------------------------------------------------------------------- |
402 | | InstancedEntity* BaseInstanceBatchVTF::generateInstancedEntity(size_t num) |
403 | 0 | { |
404 | 0 | InstancedEntity* sharedTransformEntity = NULL; |
405 | 0 | if ((useBoneMatrixLookup()) && (num >= getMaxLookupTableInstances())) |
406 | 0 | { |
407 | 0 | sharedTransformEntity = mInstancedEntities[num % getMaxLookupTableInstances()]; |
408 | 0 | if (sharedTransformEntity->mSharedTransformEntity) |
409 | 0 | { |
410 | 0 | sharedTransformEntity = sharedTransformEntity->mSharedTransformEntity; |
411 | 0 | } |
412 | 0 | } |
413 | |
|
414 | 0 | return OGRE_NEW InstancedEntity(this, static_cast<uint32>(num), sharedTransformEntity); |
415 | 0 | } |
416 | | |
417 | | |
418 | | //----------------------------------------------------------------------- |
419 | | void BaseInstanceBatchVTF::getWorldTransforms( Matrix4* xform ) const |
420 | 0 | { |
421 | 0 | *xform = Matrix4::IDENTITY; |
422 | 0 | } |
423 | | //----------------------------------------------------------------------- |
424 | | void BaseInstanceBatchVTF::_updateRenderQueue(RenderQueue* queue) |
425 | 0 | { |
426 | 0 | InstanceBatch::_updateRenderQueue( queue ); |
427 | |
|
428 | 0 | if( mBoundsUpdated || mDirtyAnimation || mManager->getCameraRelativeRendering() ) |
429 | 0 | updateVertexTexture(); |
430 | |
|
431 | 0 | mBoundsUpdated = false; |
432 | 0 | } |
433 | | //----------------------------------------------------------------------- |
434 | | // InstanceBatchVTF |
435 | | //----------------------------------------------------------------------- |
436 | | InstanceBatchVTF::InstanceBatchVTF( |
437 | | InstanceManager *creator, MeshPtr &meshReference, |
438 | | const MaterialPtr &material, size_t instancesPerBatch, |
439 | | const Mesh::IndexMap *indexToBoneMap, const String &batchName ) |
440 | 0 | : BaseInstanceBatchVTF (creator, meshReference, material, |
441 | 0 | instancesPerBatch, indexToBoneMap, batchName) |
442 | 0 | { |
443 | |
|
444 | 0 | } |
445 | | //----------------------------------------------------------------------- |
446 | | InstanceBatchVTF::~InstanceBatchVTF() |
447 | | { |
448 | | } |
449 | | //----------------------------------------------------------------------- |
450 | | void InstanceBatchVTF::setupVertices( const SubMesh* baseSubMesh ) |
451 | 0 | { |
452 | 0 | mRenderOperation.vertexData = OGRE_NEW VertexData(); |
453 | 0 | mRemoveOwnVertexData = true; //Raise flag to remove our own vertex data in the end (not always needed) |
454 | |
|
455 | 0 | VertexData *thisVertexData = mRenderOperation.vertexData; |
456 | 0 | VertexData *baseVertexData = baseSubMesh->vertexData; |
457 | |
|
458 | 0 | thisVertexData->vertexStart = 0; |
459 | 0 | thisVertexData->vertexCount = baseVertexData->vertexCount * mInstancesPerBatch; |
460 | |
|
461 | 0 | HardwareBufferManager::getSingleton().destroyVertexDeclaration( thisVertexData->vertexDeclaration ); |
462 | 0 | thisVertexData->vertexDeclaration = baseVertexData->vertexDeclaration->clone(); |
463 | |
|
464 | 0 | HWBoneIdxVec hwBoneIdx; |
465 | 0 | HWBoneWgtVec hwBoneWgt; |
466 | | |
467 | | //Blend weights may not be present because HW_VTF does not require to be skeletally animated |
468 | 0 | const VertexElement *veWeights = baseVertexData->vertexDeclaration-> |
469 | 0 | findElementBySemantic( VES_BLEND_WEIGHTS ); |
470 | 0 | if( veWeights ) |
471 | 0 | { |
472 | | //One weight is recommended for VTF |
473 | 0 | mWeightCount = (forceOneWeight() || useOneWeight()) ? |
474 | 0 | 1 : veWeights->getSize() / sizeof(float); |
475 | 0 | } |
476 | 0 | else |
477 | 0 | { |
478 | 0 | mWeightCount = 1; |
479 | 0 | } |
480 | |
|
481 | 0 | hwBoneIdx.resize( baseVertexData->vertexCount * mWeightCount, 0 ); |
482 | |
|
483 | 0 | if( mMeshReference->hasSkeleton() && mMeshReference->getSkeleton() ) |
484 | 0 | { |
485 | 0 | if(mWeightCount > 1) |
486 | 0 | { |
487 | 0 | hwBoneWgt.resize( baseVertexData->vertexCount * mWeightCount, 0 ); |
488 | 0 | retrieveBoneIdxWithWeights(baseVertexData, hwBoneIdx, hwBoneWgt); |
489 | 0 | } |
490 | 0 | else |
491 | 0 | { |
492 | 0 | retrieveBoneIdx( baseVertexData, hwBoneIdx ); |
493 | 0 | thisVertexData->vertexDeclaration->removeElement( VES_BLEND_INDICES ); |
494 | 0 | thisVertexData->vertexDeclaration->removeElement( VES_BLEND_WEIGHTS ); |
495 | |
|
496 | 0 | thisVertexData->vertexDeclaration->closeGapsInSource(); |
497 | 0 | } |
498 | |
|
499 | 0 | } |
500 | |
|
501 | 0 | for( unsigned short i=0; i<thisVertexData->vertexDeclaration->getMaxSource()+1; ++i ) |
502 | 0 | { |
503 | | //Create our own vertex buffer |
504 | 0 | HardwareVertexBufferSharedPtr vertexBuffer = |
505 | 0 | HardwareBufferManager::getSingleton().createVertexBuffer( |
506 | 0 | thisVertexData->vertexDeclaration->getVertexSize(i), |
507 | 0 | thisVertexData->vertexCount, |
508 | 0 | HardwareBuffer::HBU_STATIC_WRITE_ONLY ); |
509 | 0 | thisVertexData->vertexBufferBinding->setBinding( i, vertexBuffer ); |
510 | | |
511 | | //Grab the base submesh data |
512 | 0 | HardwareVertexBufferSharedPtr baseVertexBuffer = |
513 | 0 | baseVertexData->vertexBufferBinding->getBuffer(i); |
514 | |
|
515 | 0 | HardwareBufferLockGuard thisLock(vertexBuffer, HardwareBuffer::HBL_DISCARD); |
516 | 0 | HardwareBufferLockGuard baseLock(baseVertexBuffer, HardwareBuffer::HBL_READ_ONLY); |
517 | 0 | char* thisBuf = static_cast<char*>(thisLock.pData); |
518 | 0 | char* baseBuf = static_cast<char*>(baseLock.pData); |
519 | | |
520 | | //Copy and repeat |
521 | 0 | for( size_t j=0; j<mInstancesPerBatch; ++j ) |
522 | 0 | { |
523 | 0 | const size_t sizeOfBuffer = baseVertexData->vertexCount * |
524 | 0 | baseVertexData->vertexDeclaration->getVertexSize(i); |
525 | 0 | memcpy( thisBuf + j * sizeOfBuffer, baseBuf, sizeOfBuffer ); |
526 | 0 | } |
527 | 0 | } |
528 | |
|
529 | 0 | createVertexTexture( baseSubMesh ); |
530 | 0 | createVertexSemantics( thisVertexData, baseVertexData, hwBoneIdx, hwBoneWgt); |
531 | 0 | } |
532 | | //----------------------------------------------------------------------- |
533 | | void InstanceBatchVTF::setupIndices( const SubMesh* baseSubMesh ) |
534 | 0 | { |
535 | 0 | mRenderOperation.indexData = OGRE_NEW IndexData(); |
536 | 0 | mRemoveOwnIndexData = true; //Raise flag to remove our own index data in the end (not always needed) |
537 | |
|
538 | 0 | IndexData *thisIndexData = mRenderOperation.indexData; |
539 | 0 | IndexData *baseIndexData = baseSubMesh->indexData; |
540 | |
|
541 | 0 | thisIndexData->indexStart = 0; |
542 | 0 | thisIndexData->indexCount = baseIndexData->indexCount * mInstancesPerBatch; |
543 | | |
544 | | //TODO: Check numVertices is below max supported by GPU |
545 | 0 | HardwareIndexBuffer::IndexType indexType = HardwareIndexBuffer::IT_16BIT; |
546 | 0 | if( mRenderOperation.vertexData->vertexCount > 65535 ) |
547 | 0 | indexType = HardwareIndexBuffer::IT_32BIT; |
548 | 0 | thisIndexData->indexBuffer = HardwareBufferManager::getSingleton().createIndexBuffer( |
549 | 0 | indexType, thisIndexData->indexCount, HardwareBuffer::HBU_STATIC_WRITE_ONLY ); |
550 | |
|
551 | 0 | HardwareBufferLockGuard thisLock(thisIndexData->indexBuffer, HardwareBuffer::HBL_DISCARD); |
552 | 0 | HardwareBufferLockGuard baseLock(baseIndexData->indexBuffer, HardwareBuffer::HBL_READ_ONLY); |
553 | 0 | uint16 *thisBuf16 = static_cast<uint16*>(thisLock.pData); |
554 | 0 | uint32 *thisBuf32 = static_cast<uint32*>(thisLock.pData); |
555 | 0 | bool baseIndex16bit = baseIndexData->indexBuffer->getType() == HardwareIndexBuffer::IT_16BIT; |
556 | |
|
557 | 0 | for( size_t i=0; i<mInstancesPerBatch; ++i ) |
558 | 0 | { |
559 | 0 | const size_t vertexOffset = i * mRenderOperation.vertexData->vertexCount / mInstancesPerBatch; |
560 | |
|
561 | 0 | const uint16 *initBuf16 = static_cast<const uint16 *>(baseLock.pData); |
562 | 0 | const uint32 *initBuf32 = static_cast<const uint32 *>(baseLock.pData); |
563 | |
|
564 | 0 | for( size_t j=0; j<baseIndexData->indexCount; ++j ) |
565 | 0 | { |
566 | 0 | uint32 originalVal = baseIndex16bit ? *initBuf16++ : *initBuf32++; |
567 | |
|
568 | 0 | if( indexType == HardwareIndexBuffer::IT_16BIT ) |
569 | 0 | *thisBuf16++ = static_cast<uint16>(originalVal + vertexOffset); |
570 | 0 | else |
571 | 0 | *thisBuf32++ = static_cast<uint32>(originalVal + vertexOffset); |
572 | 0 | } |
573 | 0 | } |
574 | 0 | } |
575 | | //----------------------------------------------------------------------- |
576 | | void InstanceBatchVTF::createVertexSemantics( |
577 | | VertexData *thisVertexData, VertexData *baseVertexData, const HWBoneIdxVec &hwBoneIdx, const HWBoneWgtVec &hwBoneWgt) |
578 | 0 | { |
579 | 0 | const size_t texWidth = mMatrixTexture->getWidth(); |
580 | 0 | const size_t texHeight = mMatrixTexture->getHeight(); |
581 | | |
582 | | //Calculate the texel offsets to correct them offline |
583 | | //Akwardly enough, the offset is needed in OpenGL too |
584 | 0 | Vector2 texelOffsets; |
585 | | //RenderSystem *renderSystem = Root::getSingleton().getRenderSystem(); |
586 | 0 | texelOffsets.x = /*renderSystem->getHorizontalTexelOffset()*/ -0.5f / (float)texWidth; |
587 | 0 | texelOffsets.y = /*renderSystem->getVerticalTexelOffset()*/ -0.5f / (float)texHeight; |
588 | | |
589 | | //Only one weight per vertex is supported. It would not only be complex, but prohibitively slow. |
590 | | //Put them in a new buffer, since it's 32 bytes aligned :-) |
591 | 0 | const unsigned short newSource = thisVertexData->vertexDeclaration->getMaxSource() + 1; |
592 | 0 | size_t maxFloatsPerVector = 4; |
593 | 0 | size_t offset = 0; |
594 | |
|
595 | 0 | for(size_t i = 0; i < mWeightCount; i += maxFloatsPerVector / mRowLength) |
596 | 0 | { |
597 | 0 | offset += thisVertexData->vertexDeclaration->addElement( newSource, offset, VET_FLOAT4, VES_TEXTURE_COORDINATES, |
598 | 0 | thisVertexData->vertexDeclaration-> |
599 | 0 | getNextFreeTextureCoordinate() ).getSize(); |
600 | 0 | offset += thisVertexData->vertexDeclaration->addElement( newSource, offset, VET_FLOAT4, VES_TEXTURE_COORDINATES, |
601 | 0 | thisVertexData->vertexDeclaration-> |
602 | 0 | getNextFreeTextureCoordinate() ).getSize(); |
603 | 0 | } |
604 | | |
605 | | //Add the weights (supports up to four, which is Ogre's limit) |
606 | 0 | if(mWeightCount > 1) |
607 | 0 | { |
608 | 0 | thisVertexData->vertexDeclaration->addElement(newSource, offset, VET_FLOAT4, VES_BLEND_WEIGHTS, |
609 | 0 | thisVertexData->vertexDeclaration->getNextFreeTextureCoordinate() ).getSize(); |
610 | 0 | } |
611 | | |
612 | | //Create our own vertex buffer |
613 | 0 | HardwareVertexBufferSharedPtr vertexBuffer = |
614 | 0 | HardwareBufferManager::getSingleton().createVertexBuffer( |
615 | 0 | thisVertexData->vertexDeclaration->getVertexSize(newSource), |
616 | 0 | thisVertexData->vertexCount, |
617 | 0 | HardwareBuffer::HBU_STATIC_WRITE_ONLY ); |
618 | 0 | thisVertexData->vertexBufferBinding->setBinding( newSource, vertexBuffer ); |
619 | |
|
620 | 0 | HardwareBufferLockGuard vertexLock(vertexBuffer, HardwareBuffer::HBL_DISCARD); |
621 | 0 | float *thisFloat = static_cast<float*>(vertexLock.pData); |
622 | | |
623 | | //Copy and repeat |
624 | 0 | for( size_t i=0; i<mInstancesPerBatch; ++i ) |
625 | 0 | { |
626 | 0 | for( size_t j=0; j<baseVertexData->vertexCount * mWeightCount; j += mWeightCount ) |
627 | 0 | { |
628 | 0 | size_t numberOfMatricesInLine = 0; |
629 | |
|
630 | 0 | for(size_t wgtIdx = 0; wgtIdx < mWeightCount; ++wgtIdx) |
631 | 0 | { |
632 | 0 | for( size_t k=0; k < mRowLength; ++k) |
633 | 0 | { |
634 | 0 | size_t instanceIdx = (hwBoneIdx[j+wgtIdx] + i * mMatricesPerInstance) * mRowLength + k; |
635 | | //x |
636 | 0 | *thisFloat++ = ((instanceIdx % texWidth) / (float)texWidth) - (float)texelOffsets.x; |
637 | | //y |
638 | 0 | *thisFloat++ = ((instanceIdx / texWidth) / (float)texHeight) - (float)texelOffsets.y; |
639 | 0 | } |
640 | |
|
641 | 0 | ++numberOfMatricesInLine; |
642 | | |
643 | | //If another matrix can't be fit, we're on another line, or if this is the last weight |
644 | 0 | if((numberOfMatricesInLine + 1) * mRowLength > maxFloatsPerVector || (wgtIdx+1) == mWeightCount) |
645 | 0 | { |
646 | | //Place zeroes in the remaining coordinates |
647 | 0 | for ( size_t k=mRowLength * numberOfMatricesInLine; k < maxFloatsPerVector; ++k) |
648 | 0 | { |
649 | 0 | *thisFloat++ = 0.0f; |
650 | 0 | *thisFloat++ = 0.0f; |
651 | 0 | } |
652 | |
|
653 | 0 | numberOfMatricesInLine = 0; |
654 | 0 | } |
655 | 0 | } |
656 | | |
657 | | //Don't need to write weights if there is only one |
658 | 0 | if(mWeightCount > 1) |
659 | 0 | { |
660 | | //Write the weights |
661 | 0 | for(size_t wgtIdx = 0; wgtIdx < mWeightCount; ++wgtIdx) |
662 | 0 | { |
663 | 0 | *thisFloat++ = hwBoneWgt[j+wgtIdx]; |
664 | 0 | } |
665 | | |
666 | | //Fill the rest of the line with zeros |
667 | 0 | for(size_t wgtIdx = mWeightCount; wgtIdx < maxFloatsPerVector; ++wgtIdx) |
668 | 0 | { |
669 | 0 | *thisFloat++ = 0.0f; |
670 | 0 | } |
671 | 0 | } |
672 | 0 | } |
673 | 0 | } |
674 | 0 | } |
675 | | //----------------------------------------------------------------------- |
676 | | size_t InstanceBatchVTF::calculateMaxNumInstances( |
677 | | const SubMesh *baseSubMesh, uint16 flags ) const |
678 | 0 | { |
679 | 0 | size_t retVal = 0; |
680 | |
|
681 | 0 | RenderSystem *renderSystem = Root::getSingleton().getRenderSystem(); |
682 | 0 | const RenderSystemCapabilities *capabilities = renderSystem->getCapabilities(); |
683 | | |
684 | | //VTF must be supported |
685 | 0 | if( capabilities->getNumVertexTextureUnits() ) |
686 | 0 | { |
687 | | //TODO: Check PF_FLOAT32_RGBA is supported (should be, since it was the 1st one) |
688 | 0 | const size_t numBones = std::max<size_t>( 1, baseSubMesh->blendIndexToBoneIndexMap.size() ); |
689 | 0 | retVal = c_maxTexWidth * c_maxTexHeight / mRowLength / numBones; |
690 | |
|
691 | 0 | if( flags & IM_USE16BIT ) |
692 | 0 | { |
693 | 0 | if( baseSubMesh->vertexData->vertexCount * retVal > 0xFFFF ) |
694 | 0 | retVal = 0xFFFF / baseSubMesh->vertexData->vertexCount; |
695 | 0 | } |
696 | |
|
697 | 0 | if( flags & IM_VTFBESTFIT ) |
698 | 0 | { |
699 | 0 | const size_t instancesPerBatch = std::min( retVal, mInstancesPerBatch ); |
700 | | //Do the same as in createVertexTexture() |
701 | 0 | const size_t numWorldMatrices = instancesPerBatch * numBones; |
702 | |
|
703 | 0 | size_t texWidth = std::min<size_t>( numWorldMatrices * mRowLength, c_maxTexWidth ); |
704 | 0 | size_t texHeight = numWorldMatrices * mRowLength / c_maxTexWidth; |
705 | |
|
706 | 0 | const size_t remainder = (numWorldMatrices * mRowLength) % c_maxTexWidth; |
707 | |
|
708 | 0 | if( remainder && texHeight > 0 ) |
709 | 0 | retVal = static_cast<size_t>(texWidth * texHeight / (float)mRowLength / (float)(numBones)); |
710 | 0 | } |
711 | 0 | } |
712 | |
|
713 | 0 | return retVal; |
714 | |
|
715 | 0 | } |
716 | | } |