/src/ogre/OgreMain/src/OgreOptimisedUtilGeneral.cpp
Line | Count | Source |
1 | | /* |
2 | | ----------------------------------------------------------------------------- |
3 | | This source file is part of OGRE |
4 | | (Object-oriented Graphics Rendering Engine) |
5 | | For the latest info, see http://www.ogre3d.org/ |
6 | | |
7 | | Copyright (c) 2000-2014 Torus Knot Software Ltd |
8 | | |
9 | | Permission is hereby granted, free of charge, to any person obtaining a copy |
10 | | of this software and associated documentation files (the "Software"), to deal |
11 | | in the Software without restriction, including without limitation the rights |
12 | | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell |
13 | | copies of the Software, and to permit persons to whom the Software is |
14 | | furnished to do so, subject to the following conditions: |
15 | | |
16 | | The above copyright notice and this permission notice shall be included in |
17 | | all copies or substantial portions of the Software. |
18 | | |
19 | | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
20 | | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
21 | | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE |
22 | | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
23 | | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
24 | | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN |
25 | | THE SOFTWARE. |
26 | | ----------------------------------------------------------------------------- |
27 | | */ |
28 | | #include "OgreStableHeaders.h" |
29 | | |
30 | | #include "OgreOptimisedUtil.h" |
31 | | |
32 | | namespace Ogre { |
33 | | |
34 | | //------------------------------------------------------------------------- |
35 | | // Local classes |
36 | | //------------------------------------------------------------------------- |
37 | | |
38 | | /** General implementation of OptimisedUtil. |
39 | | @note |
40 | | Don't use this class directly, use OptimisedUtil instead. |
41 | | */ |
42 | | class _OgrePrivate OptimisedUtilGeneral : public OptimisedUtil |
43 | | { |
44 | | public: |
45 | | /// @copydoc OptimisedUtil::softwareVertexSkinning |
46 | | void softwareVertexSkinning( |
47 | | const float *srcPosPtr, float *destPosPtr, |
48 | | const float *srcNormPtr, float *destNormPtr, |
49 | | const float *blendWeightPtr, const unsigned char* blendIndexPtr, |
50 | | const Affine3* const* blendMatrices, |
51 | | size_t srcPosStride, size_t destPosStride, |
52 | | size_t srcNormStride, size_t destNormStride, |
53 | | size_t blendWeightStride, size_t blendIndexStride, |
54 | | size_t numWeightsPerVertex, |
55 | | size_t numVertices) override; |
56 | | |
57 | | /// @copydoc OptimisedUtil::softwareVertexMorph |
58 | | void softwareVertexMorph( |
59 | | float t, |
60 | | const float *srcPos1, const float *srcPos2, |
61 | | float *dstPos, |
62 | | size_t pos1VSize, size_t pos2VSize, size_t dstVSize, |
63 | | size_t numVertices, |
64 | | bool morphNormals) override; |
65 | | |
66 | | /// @copydoc OptimisedUtil::concatenateAffineMatrices |
67 | | void concatenateAffineMatrices( |
68 | | const Affine3& baseMatrix, |
69 | | const Affine3* srcMatrices, |
70 | | Affine3* dstMatrices, |
71 | | size_t numMatrices) override; |
72 | | |
73 | | /// @copydoc OptimisedUtil::calculateFaceNormals |
74 | | void calculateFaceNormals( |
75 | | const float *positions, |
76 | | const EdgeData::Triangle *triangles, |
77 | | Vector4 *faceNormals, |
78 | | size_t numTriangles) override; |
79 | | |
80 | | /// @copydoc OptimisedUtil::calculateLightFacing |
81 | | void calculateLightFacing( |
82 | | const Vector4& lightPos, |
83 | | const Vector4* faceNormals, |
84 | | char* lightFacings, |
85 | | size_t numFaces) override; |
86 | | |
87 | | /// @copydoc OptimisedUtil::extrudeVertices |
88 | | void extrudeVertices( |
89 | | const Vector4& lightPos, |
90 | | Real extrudeDist, |
91 | | const float* srcPositions, |
92 | | float* destPositions, |
93 | | size_t numVertices) override; |
94 | | }; |
95 | | //--------------------------------------------------------------------- |
96 | | //--------------------------------------------------------------------- |
97 | | //--------------------------------------------------------------------- |
98 | | void OptimisedUtilGeneral::softwareVertexSkinning( |
99 | | const float *pSrcPos, float *pDestPos, |
100 | | const float *pSrcNorm, float *pDestNorm, |
101 | | const float *pBlendWeight, const unsigned char* pBlendIndex, |
102 | | const Affine3* const* blendMatrices, |
103 | | size_t srcPosStride, size_t destPosStride, |
104 | | size_t srcNormStride, size_t destNormStride, |
105 | | size_t blendWeightStride, size_t blendIndexStride, |
106 | | size_t numWeightsPerVertex, |
107 | | size_t numVertices) |
108 | 0 | { |
109 | | // Source vectors |
110 | 0 | Vector3 sourceVec = Vector3::ZERO, sourceNorm = Vector3::ZERO; |
111 | | // Accumulation vectors |
112 | 0 | Vector3 accumVecPos, accumVecNorm; |
113 | | |
114 | | // Loop per vertex |
115 | 0 | for (size_t vertIdx = 0; vertIdx < numVertices; ++vertIdx) |
116 | 0 | { |
117 | | // Load source vertex elements |
118 | 0 | sourceVec.x = pSrcPos[0]; |
119 | 0 | sourceVec.y = pSrcPos[1]; |
120 | 0 | sourceVec.z = pSrcPos[2]; |
121 | |
|
122 | 0 | if (pSrcNorm) |
123 | 0 | { |
124 | 0 | sourceNorm.x = pSrcNorm[0]; |
125 | 0 | sourceNorm.y = pSrcNorm[1]; |
126 | 0 | sourceNorm.z = pSrcNorm[2]; |
127 | 0 | } |
128 | | |
129 | | // Load accumulators |
130 | 0 | accumVecPos = Vector3::ZERO; |
131 | 0 | accumVecNorm = Vector3::ZERO; |
132 | | |
133 | | // Loop per blend weight |
134 | | // |
135 | | // Note: Don't change "unsigned short" here!!! If use "size_t" instead, |
136 | | // VC7.1 unroll this loop to four blend weights pre-iteration, and then |
137 | | // loss performance 10% in this function. Ok, this give a hint that we |
138 | | // should unroll this loop manually for better performance, will do that |
139 | | // later. |
140 | | // |
141 | 0 | for (unsigned short blendIdx = 0; blendIdx < numWeightsPerVertex; ++blendIdx) |
142 | 0 | { |
143 | | // Blend by multiplying source by blend matrix and scaling by weight |
144 | | // Add to accumulator |
145 | | // NB weights must be normalised!! |
146 | 0 | Real weight = pBlendWeight[blendIdx]; |
147 | 0 | if (weight) |
148 | 0 | { |
149 | | // Blend position, use 3x4 matrix |
150 | 0 | const Affine3& mat = *blendMatrices[pBlendIndex[blendIdx]]; |
151 | 0 | accumVecPos.x += |
152 | 0 | (mat[0][0] * sourceVec.x + |
153 | 0 | mat[0][1] * sourceVec.y + |
154 | 0 | mat[0][2] * sourceVec.z + |
155 | 0 | mat[0][3]) |
156 | 0 | * weight; |
157 | 0 | accumVecPos.y += |
158 | 0 | (mat[1][0] * sourceVec.x + |
159 | 0 | mat[1][1] * sourceVec.y + |
160 | 0 | mat[1][2] * sourceVec.z + |
161 | 0 | mat[1][3]) |
162 | 0 | * weight; |
163 | 0 | accumVecPos.z += |
164 | 0 | (mat[2][0] * sourceVec.x + |
165 | 0 | mat[2][1] * sourceVec.y + |
166 | 0 | mat[2][2] * sourceVec.z + |
167 | 0 | mat[2][3]) |
168 | 0 | * weight; |
169 | 0 | if (pSrcNorm) |
170 | 0 | { |
171 | | // Blend normal |
172 | | // We should blend by inverse transpose here, but because we're assuming the 3x3 |
173 | | // aspect of the matrix is orthogonal (no non-uniform scaling), the inverse transpose |
174 | | // is equal to the main 3x3 matrix |
175 | | // Note because it's a normal we just extract the rotational part, saves us renormalising here |
176 | 0 | accumVecNorm.x += |
177 | 0 | (mat[0][0] * sourceNorm.x + |
178 | 0 | mat[0][1] * sourceNorm.y + |
179 | 0 | mat[0][2] * sourceNorm.z) |
180 | 0 | * weight; |
181 | 0 | accumVecNorm.y += |
182 | 0 | (mat[1][0] * sourceNorm.x + |
183 | 0 | mat[1][1] * sourceNorm.y + |
184 | 0 | mat[1][2] * sourceNorm.z) |
185 | 0 | * weight; |
186 | 0 | accumVecNorm.z += |
187 | 0 | (mat[2][0] * sourceNorm.x + |
188 | 0 | mat[2][1] * sourceNorm.y + |
189 | 0 | mat[2][2] * sourceNorm.z) |
190 | 0 | * weight; |
191 | 0 | } |
192 | 0 | } |
193 | 0 | } |
194 | | |
195 | | // Stored blended vertex in hardware buffer |
196 | 0 | pDestPos[0] = accumVecPos.x; |
197 | 0 | pDestPos[1] = accumVecPos.y; |
198 | 0 | pDestPos[2] = accumVecPos.z; |
199 | | |
200 | | // Stored blended vertex in temp buffer |
201 | 0 | if (pSrcNorm) |
202 | 0 | { |
203 | | // Normalise |
204 | 0 | accumVecNorm.normalise(); |
205 | 0 | pDestNorm[0] = accumVecNorm.x; |
206 | 0 | pDestNorm[1] = accumVecNorm.y; |
207 | 0 | pDestNorm[2] = accumVecNorm.z; |
208 | | // Advance pointers |
209 | 0 | advanceRawPointer(pSrcNorm, srcNormStride); |
210 | 0 | advanceRawPointer(pDestNorm, destNormStride); |
211 | 0 | } |
212 | | |
213 | | // Advance pointers |
214 | 0 | advanceRawPointer(pSrcPos, srcPosStride); |
215 | 0 | advanceRawPointer(pDestPos, destPosStride); |
216 | 0 | advanceRawPointer(pBlendWeight, blendWeightStride); |
217 | 0 | advanceRawPointer(pBlendIndex, blendIndexStride); |
218 | 0 | } |
219 | 0 | } |
220 | | //--------------------------------------------------------------------- |
221 | | void OptimisedUtilGeneral::concatenateAffineMatrices( |
222 | | const Affine3& baseMatrix, |
223 | | const Affine3* pSrcMat, |
224 | | Affine3* pDstMat, |
225 | | size_t numMatrices) |
226 | 0 | { |
227 | 0 | for (size_t i = 0; i < numMatrices; ++i) |
228 | 0 | { |
229 | 0 | *pDstMat = baseMatrix * *pSrcMat ; |
230 | |
|
231 | 0 | ++pSrcMat; |
232 | 0 | ++pDstMat; |
233 | 0 | } |
234 | 0 | } |
235 | | //--------------------------------------------------------------------- |
236 | | void OptimisedUtilGeneral::softwareVertexMorph( |
237 | | float t, |
238 | | const float *pSrc1, const float *pSrc2, |
239 | | float *pDst, |
240 | | size_t pos1VSize, size_t pos2VSize, size_t dstVSize, |
241 | | size_t numVertices, |
242 | | bool morphNormals) |
243 | 0 | { |
244 | 0 | size_t src1Skip = pos1VSize/sizeof(float) - 3 - (morphNormals ? 3 : 0); |
245 | 0 | size_t src2Skip = pos2VSize/sizeof(float) - 3 - (morphNormals ? 3 : 0); |
246 | 0 | size_t dstSkip = dstVSize/sizeof(float) - 3 - (morphNormals ? 3 : 0); |
247 | | |
248 | 0 | Vector3f nlerpNormal; |
249 | 0 | for (size_t i = 0; i < numVertices; ++i) |
250 | 0 | { |
251 | | // x |
252 | 0 | *pDst++ = *pSrc1 + t * (*pSrc2 - *pSrc1) ; |
253 | 0 | ++pSrc1; ++pSrc2; |
254 | | // y |
255 | 0 | *pDst++ = *pSrc1 + t * (*pSrc2 - *pSrc1) ; |
256 | 0 | ++pSrc1; ++pSrc2; |
257 | | // z |
258 | 0 | *pDst++ = *pSrc1 + t * (*pSrc2 - *pSrc1) ; |
259 | 0 | ++pSrc1; ++pSrc2; |
260 | | |
261 | 0 | if (morphNormals) |
262 | 0 | { |
263 | | // normals must be in the same buffer as pos |
264 | | // perform an nlerp |
265 | | // we don't have enough information for a spherical interp |
266 | 0 | nlerpNormal[0] = *pSrc1 + t * (*pSrc2 - *pSrc1); |
267 | 0 | ++pSrc1; ++pSrc2; |
268 | 0 | nlerpNormal[1] = *pSrc1 + t * (*pSrc2 - *pSrc1); |
269 | 0 | ++pSrc1; ++pSrc2; |
270 | 0 | nlerpNormal[2] = *pSrc1 + t * (*pSrc2 - *pSrc1); |
271 | 0 | ++pSrc1; ++pSrc2; |
272 | 0 | nlerpNormal.normalise(); |
273 | 0 | *pDst++ = nlerpNormal[0]; |
274 | 0 | *pDst++ = nlerpNormal[1]; |
275 | 0 | *pDst++ = nlerpNormal[2]; |
276 | 0 | } |
277 | | |
278 | 0 | pSrc1 += src1Skip; |
279 | 0 | pSrc2 += src2Skip; |
280 | 0 | pDst += dstSkip; |
281 | | |
282 | 0 | } |
283 | 0 | } |
284 | | //--------------------------------------------------------------------- |
285 | | void OptimisedUtilGeneral::calculateFaceNormals( |
286 | | const float *positions, |
287 | | const EdgeData::Triangle *triangles, |
288 | | Vector4 *faceNormals, |
289 | | size_t numTriangles) |
290 | 0 | { |
291 | 0 | for ( ; numTriangles; --numTriangles) |
292 | 0 | { |
293 | 0 | const EdgeData::Triangle& t = *triangles++; |
294 | 0 | size_t offset; |
295 | |
|
296 | 0 | offset = t.vertIndex[0] * 3; |
297 | 0 | Vector3 v1(positions[offset+0], positions[offset+1], positions[offset+2]); |
298 | |
|
299 | 0 | offset = t.vertIndex[1] * 3; |
300 | 0 | Vector3 v2(positions[offset+0], positions[offset+1], positions[offset+2]); |
301 | |
|
302 | 0 | offset = t.vertIndex[2] * 3; |
303 | 0 | Vector3 v3(positions[offset+0], positions[offset+1], positions[offset+2]); |
304 | |
|
305 | 0 | *faceNormals++ = Math::calculateFaceNormalWithoutNormalize(v1, v2, v3); |
306 | 0 | } |
307 | 0 | } |
308 | | //--------------------------------------------------------------------- |
309 | | void OptimisedUtilGeneral::calculateLightFacing( |
310 | | const Vector4& lightPos, |
311 | | const Vector4* faceNormals, |
312 | | char* lightFacings, |
313 | | size_t numFaces) |
314 | 0 | { |
315 | 0 | for (size_t i = 0; i < numFaces; ++i) |
316 | 0 | { |
317 | 0 | *lightFacings++ = (lightPos.dotProduct(*faceNormals++) > 0); |
318 | 0 | } |
319 | 0 | } |
320 | | //--------------------------------------------------------------------- |
321 | | void OptimisedUtilGeneral::extrudeVertices( |
322 | | const Vector4& lightPos, |
323 | | Real extrudeDist, |
324 | | const float* pSrcPos, |
325 | | float* pDestPos, |
326 | | size_t numVertices) |
327 | 0 | { |
328 | 0 | if (lightPos.w == 0.0f) |
329 | 0 | { |
330 | | // Directional light, extrusion is along light direction |
331 | |
|
332 | 0 | Vector3 extrusionDir( |
333 | 0 | -lightPos.x, |
334 | 0 | -lightPos.y, |
335 | 0 | -lightPos.z); |
336 | 0 | extrusionDir.normalise(); |
337 | 0 | extrusionDir *= extrudeDist; |
338 | |
|
339 | 0 | for (size_t vert = 0; vert < numVertices; ++vert) |
340 | 0 | { |
341 | 0 | *pDestPos++ = *pSrcPos++ + extrusionDir.x; |
342 | 0 | *pDestPos++ = *pSrcPos++ + extrusionDir.y; |
343 | 0 | *pDestPos++ = *pSrcPos++ + extrusionDir.z; |
344 | 0 | } |
345 | 0 | } |
346 | 0 | else |
347 | 0 | { |
348 | | // Point light, calculate extrusionDir for every vertex |
349 | 0 | assert(lightPos.w == 1.0f); |
350 | |
|
351 | 0 | for (size_t vert = 0; vert < numVertices; ++vert) |
352 | 0 | { |
353 | 0 | Vector3 extrusionDir( |
354 | 0 | pSrcPos[0] - lightPos.x, |
355 | 0 | pSrcPos[1] - lightPos.y, |
356 | 0 | pSrcPos[2] - lightPos.z); |
357 | 0 | extrusionDir.normalise(); |
358 | 0 | extrusionDir *= extrudeDist; |
359 | |
|
360 | 0 | *pDestPos++ = *pSrcPos++ + extrusionDir.x; |
361 | 0 | *pDestPos++ = *pSrcPos++ + extrusionDir.y; |
362 | 0 | *pDestPos++ = *pSrcPos++ + extrusionDir.z; |
363 | 0 | } |
364 | 0 | } |
365 | 0 | } |
366 | | //--------------------------------------------------------------------- |
367 | | //--------------------------------------------------------------------- |
368 | | //--------------------------------------------------------------------- |
369 | | extern OptimisedUtil* _getOptimisedUtilGeneral(void); |
370 | | extern OptimisedUtil* _getOptimisedUtilGeneral(void) |
371 | 0 | { |
372 | 0 | static OptimisedUtilGeneral msOptimisedUtilGeneral; |
373 | 0 | return &msOptimisedUtilGeneral; |
374 | 0 | } |
375 | | |
376 | | } |