/src/vvenc/source/Lib/CommonLib/RdCost.cpp
Line | Count | Source |
1 | | /* ----------------------------------------------------------------------------- |
2 | | The copyright in this software is being made available under the Clear BSD |
3 | | License, included below. No patent rights, trademark rights and/or |
4 | | other Intellectual Property Rights other than the copyrights concerning |
5 | | the Software are granted under this license. |
6 | | |
7 | | The Clear BSD License |
8 | | |
9 | | Copyright (c) 2019-2026, Fraunhofer-Gesellschaft zur Förderung der angewandten Forschung e.V. & The VVenC Authors. |
10 | | All rights reserved. |
11 | | |
12 | | Redistribution and use in source and binary forms, with or without modification, |
13 | | are permitted (subject to the limitations in the disclaimer below) provided that |
14 | | the following conditions are met: |
15 | | |
16 | | * Redistributions of source code must retain the above copyright notice, |
17 | | this list of conditions and the following disclaimer. |
18 | | |
19 | | * Redistributions in binary form must reproduce the above copyright |
20 | | notice, this list of conditions and the following disclaimer in the |
21 | | documentation and/or other materials provided with the distribution. |
22 | | |
23 | | * Neither the name of the copyright holder nor the names of its |
24 | | contributors may be used to endorse or promote products derived from this |
25 | | software without specific prior written permission. |
26 | | |
27 | | NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE GRANTED BY |
28 | | THIS LICENSE. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND |
29 | | CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
30 | | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A |
31 | | PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR |
32 | | CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, |
33 | | EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, |
34 | | PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR |
35 | | BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER |
36 | | IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
37 | | ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE |
38 | | POSSIBILITY OF SUCH DAMAGE. |
39 | | |
40 | | |
41 | | ------------------------------------------------------------------------------------------- */ |
42 | | |
43 | | |
44 | | /** \file RdCost.cpp |
45 | | \brief RD cost computation class |
46 | | */ |
47 | | |
48 | | #define DONT_UNDEF_SIZE_AWARE_PER_EL_OP |
49 | | |
50 | | #include "RdCost.h" |
51 | | #include "Rom.h" |
52 | | #include "UnitPartitioner.h" |
53 | | #include "SearchSpaceCounter.h" |
54 | | |
55 | | |
56 | | //! \ingroup CommonLib |
57 | | //! \{ |
58 | | |
59 | | namespace vvenc { |
60 | | |
61 | | |
62 | | template<int csx> |
63 | | static Distortion lumaWeightedSSE_Core( const DistParam& rcDtParam, ChromaFormat chmFmt, const uint32_t* lumaWeights ); |
64 | | |
65 | | static Distortion fixWeightedSSE_Core( const DistParam& rcDtParam, uint32_t fixedWeight ); |
66 | | |
67 | | RdCost::RdCost() |
68 | 0 | : m_afpDistortFunc{ { nullptr, }, { nullptr, } } |
69 | 0 | { |
70 | 0 | } |
71 | | |
72 | | RdCost::~RdCost() |
73 | 0 | { |
74 | 0 | } |
75 | | |
76 | | void RdCost::setLambda( double dLambda, const BitDepths &bitDepths ) |
77 | 0 | { |
78 | 0 | m_dLambda = dLambda; |
79 | 0 | m_DistScale = double(1<<SCALE_BITS) / m_dLambda; |
80 | 0 | m_dLambdaMotionSAD = sqrt(m_dLambda); |
81 | 0 | } |
82 | | |
83 | | |
84 | | // Initialize Function Pointer by [eDFunc] |
85 | | void RdCost::create( bool enableOpt ) |
86 | 0 | { |
87 | 0 | m_signalType = RESHAPE_SIGNAL_NULL; |
88 | 0 | m_chromaWeight = 1.0; |
89 | 0 | m_lumaBD = 10; |
90 | 0 | m_afpDistortFunc[0][DF_SSE ] = RdCost::xGetSSE; |
91 | 0 | m_afpDistortFunc[0][DF_SSE2 ] = RdCost::xGetSSE; |
92 | 0 | m_afpDistortFunc[0][DF_SSE4 ] = RdCost::xGetSSE4; |
93 | 0 | m_afpDistortFunc[0][DF_SSE8 ] = RdCost::xGetSSE8; |
94 | 0 | m_afpDistortFunc[0][DF_SSE16 ] = RdCost::xGetSSE16; |
95 | 0 | m_afpDistortFunc[0][DF_SSE32 ] = RdCost::xGetSSE32; |
96 | 0 | m_afpDistortFunc[0][DF_SSE64 ] = RdCost::xGetSSE64; |
97 | 0 | m_afpDistortFunc[0][DF_SSE128 ] = RdCost::xGetSSE128; |
98 | |
|
99 | 0 | m_afpDistortFunc[0][DF_SAD ] = RdCost::xGetSAD; |
100 | 0 | m_afpDistortFunc[0][DF_SAD2 ] = RdCost::xGetSAD; |
101 | 0 | m_afpDistortFunc[0][DF_SAD4 ] = RdCost::xGetSAD4; |
102 | 0 | m_afpDistortFunc[0][DF_SAD8 ] = RdCost::xGetSAD8; |
103 | 0 | m_afpDistortFunc[0][DF_SAD16 ] = RdCost::xGetSAD16; |
104 | 0 | m_afpDistortFunc[0][DF_SAD32 ] = RdCost::xGetSAD32; |
105 | 0 | m_afpDistortFunc[0][DF_SAD64 ] = RdCost::xGetSAD64; |
106 | 0 | m_afpDistortFunc[0][DF_SAD128 ] = RdCost::xGetSAD128; |
107 | |
|
108 | 0 | m_afpDistortFunc[0][DF_HAD ] = RdCost::xGetHADs<false>; |
109 | 0 | m_afpDistortFunc[0][DF_HAD2 ] = RdCost::xGetHADs<false>; |
110 | 0 | m_afpDistortFunc[0][DF_HAD4 ] = RdCost::xGetHADs<false>; |
111 | 0 | m_afpDistortFunc[0][DF_HAD8 ] = RdCost::xGetHADs<false>; |
112 | 0 | m_afpDistortFunc[0][DF_HAD16 ] = RdCost::xGetHADs<false>; |
113 | 0 | m_afpDistortFunc[0][DF_HAD32 ] = RdCost::xGetHADs<false>; |
114 | 0 | m_afpDistortFunc[0][DF_HAD64 ] = RdCost::xGetHADs<false>; |
115 | 0 | m_afpDistortFunc[0][DF_HAD128 ] = RdCost::xGetHADs<false>; |
116 | |
|
117 | 0 | m_afpDistortFunc[0][DF_HAD_fast ] = RdCost::xGetHADs<true>; |
118 | 0 | m_afpDistortFunc[0][DF_HAD2_fast ] = RdCost::xGetHADs<true>; |
119 | 0 | m_afpDistortFunc[0][DF_HAD4_fast ] = RdCost::xGetHADs<true>; |
120 | 0 | m_afpDistortFunc[0][DF_HAD8_fast ] = RdCost::xGetHADs<true>; |
121 | 0 | m_afpDistortFunc[0][DF_HAD16_fast ] = RdCost::xGetHADs<true>; |
122 | 0 | m_afpDistortFunc[0][DF_HAD32_fast ] = RdCost::xGetHADs<true>; |
123 | 0 | m_afpDistortFunc[0][DF_HAD64_fast ] = RdCost::xGetHADs<true>; |
124 | 0 | m_afpDistortFunc[0][DF_HAD128_fast ] = RdCost::xGetHADs<true>; |
125 | | |
126 | | // m_afpDistortFunc[0][DF_SAD_INTERMEDIATE_BITDEPTH] = RdCost::xGetSAD; |
127 | 0 | m_afpDistortFunc[0][DF_HAD_2SAD ] = RdCost::xGetHAD2SADs; |
128 | |
|
129 | 0 | m_afpDistortFunc[0][DF_SAD_WITH_MASK] = RdCost::xGetSADwMask; |
130 | | // m_afpDistortFunc[1] can be used in any case |
131 | 0 | memcpy( m_afpDistortFunc[1], m_afpDistortFunc[0], sizeof(m_afpDistortFunc)/2); |
132 | |
|
133 | 0 | m_wtdPredPtr[0] = lumaWeightedSSE_Core<0>; |
134 | 0 | m_wtdPredPtr[1] = lumaWeightedSSE_Core<1>; |
135 | 0 | m_fxdWtdPredPtr = fixWeightedSSE_Core; |
136 | |
|
137 | 0 | m_afpDistortFuncX5[0] = RdCost::xGetSAD8X5; |
138 | 0 | m_afpDistortFuncX5[1] = RdCost::xGetSAD16X5; |
139 | |
|
140 | 0 | #if ENABLE_SIMD_OPT_DIST |
141 | 0 | if( enableOpt ) |
142 | 0 | { |
143 | 0 | #ifdef TARGET_SIMD_X86 |
144 | 0 | initRdCostX86(); |
145 | 0 | #endif |
146 | | #ifdef TARGET_SIMD_ARM |
147 | | initRdCostARM(); |
148 | | #endif |
149 | 0 | } |
150 | 0 | #endif |
151 | |
|
152 | 0 | m_costMode = VVENC_COST_STANDARD_LOSSY; |
153 | 0 | m_motionLambda = 0; |
154 | 0 | m_iCostScale = 0; |
155 | 0 | } |
156 | | |
157 | | #if ENABLE_MEASURE_SEARCH_SPACE |
158 | | static Distortion xMeasurePredSearchSpaceInterceptor( const DistParam& dp ) |
159 | | { |
160 | | g_searchSpaceAcc.addPrediction( dp.cur.width, dp.cur.height, toChannelType( dp.compID ) ); |
161 | | return dp.xDistFunc( dp ); |
162 | | } |
163 | | |
164 | | #endif |
165 | | void RdCost::setDistParam( DistParam &rcDP, const CPelBuf& org, const Pel* piRefY, int iRefStride, int bitDepth, ComponentID compID, int subShiftMode, int useHadamard ) |
166 | 0 | { |
167 | 0 | rcDP.bitDepth = bitDepth; |
168 | 0 | rcDP.compID = compID; |
169 | | |
170 | | // set Original & Curr Pointer / Stride |
171 | 0 | rcDP.org = org; |
172 | |
|
173 | 0 | rcDP.cur.buf = piRefY; |
174 | 0 | rcDP.cur.stride = iRefStride; |
175 | | |
176 | | // set Block Width / Height |
177 | 0 | rcDP.cur.width = org.width; |
178 | 0 | rcDP.cur.height = org.height; |
179 | 0 | rcDP.maximumDistortionForEarlyExit = MAX_DISTORTION; |
180 | |
|
181 | 0 | const int base = (rcDP.bitDepth > 10 || rcDP.applyWeight) ? 1 : 0; |
182 | 0 | if( !useHadamard ) |
183 | 0 | { |
184 | 0 | rcDP.distFunc = m_afpDistortFunc[base][ DF_SAD + Log2( org.width ) ]; |
185 | 0 | } |
186 | 0 | else |
187 | 0 | { |
188 | 0 | rcDP.distFunc = m_afpDistortFunc[base][( useHadamard == 1 ? DF_HAD : DF_HAD_fast ) + Log2( org.width ) ]; |
189 | 0 | } |
190 | | |
191 | | // initialize |
192 | 0 | rcDP.subShift = 0; |
193 | |
|
194 | 0 | if( subShiftMode == 1 ) |
195 | 0 | { |
196 | 0 | if( rcDP.org.height > 8 && rcDP.org.width <= 128 ) |
197 | 0 | { |
198 | 0 | rcDP.subShift = 1; |
199 | 0 | } |
200 | 0 | } |
201 | 0 | else if( subShiftMode == 2 ) |
202 | 0 | { |
203 | 0 | if (rcDP.org.height > 8) |
204 | 0 | { |
205 | 0 | rcDP.subShift = 1; |
206 | 0 | } |
207 | 0 | } |
208 | |
|
209 | | #if ENABLE_MEASURE_SEARCH_SPACE |
210 | | rcDP.xDistFunc = rcDP.distFunc; |
211 | | rcDP.distFunc = xMeasurePredSearchSpaceInterceptor; |
212 | | #endif |
213 | 0 | } |
214 | | |
215 | | |
216 | | DistParam RdCost::setDistParam( const CPelBuf& org, const CPelBuf& cur, int bitDepth, DFunc dfunc ) |
217 | 0 | { |
218 | 0 | int index = dfunc; |
219 | 0 | if( dfunc != DF_HAD && dfunc != DF_HAD_fast && dfunc != DF_HAD_2SAD ) |
220 | 0 | { |
221 | 0 | index += Log2(org.width); |
222 | 0 | } |
223 | |
|
224 | 0 | const int base = bitDepth > 10 ? 1:0; //TBD: check does SDA ever overflow |
225 | | #if ENABLE_MEASURE_SEARCH_SPACE |
226 | | DistParam rcDP( org, cur, m_afpDistortFunc[base][index], bitDepth, 0, COMP_Y ); |
227 | | rcDP.xDistFunc = rcDP.distFunc; |
228 | | rcDP.distFunc = xMeasurePredSearchSpaceInterceptor; |
229 | | return rcDP; |
230 | | #else |
231 | 0 | return DistParam( org, cur, m_afpDistortFunc[base][index], bitDepth, 0, COMP_Y ); |
232 | 0 | #endif |
233 | 0 | } |
234 | | |
235 | | DistParam RdCost::setDistParam( const Pel* pOrg, const Pel* piRefY, int iOrgStride, int iRefStride, int bitDepth, ComponentID compID, int width, int height, int subShift, bool isDMVR ) |
236 | 0 | { |
237 | 0 | DistParam rcDP; |
238 | 0 | rcDP.bitDepth = bitDepth; |
239 | 0 | rcDP.compID = compID; |
240 | |
|
241 | 0 | rcDP.org.buf = pOrg; |
242 | 0 | rcDP.org.stride = iOrgStride; |
243 | 0 | rcDP.org.width = width; |
244 | 0 | rcDP.org.height = height; |
245 | |
|
246 | 0 | rcDP.cur.buf = piRefY; |
247 | 0 | rcDP.cur.stride = iRefStride; |
248 | 0 | rcDP.cur.width = width; |
249 | 0 | rcDP.cur.height = height; |
250 | 0 | rcDP.subShift = subShift; |
251 | | |
252 | | // CHECK( useHadamard || rcDP.useMR, "only used in xDMVRCost with these default parameters (so far...)" ); |
253 | 0 | const int base = (rcDP.bitDepth > 10) ? 1 : 0; |
254 | |
|
255 | 0 | rcDP.distFunc = m_afpDistortFunc[base][ DF_SAD + Log2( width ) ]; |
256 | | |
257 | 0 | if( isDMVR ) |
258 | 0 | { |
259 | 0 | rcDP.dmvrSadX5 = m_afpDistortFuncX5[Log2( width ) - 3]; |
260 | 0 | } |
261 | |
|
262 | | #if ENABLE_MEASURE_SEARCH_SPACE |
263 | | if( !isDMVR ) |
264 | | { |
265 | | // DMVT is part of the decoder complexity |
266 | | rcDP.xDistFunc = rcDP.distFunc; |
267 | | rcDP.distFunc = xMeasurePredSearchSpaceInterceptor; |
268 | | } |
269 | | |
270 | | #endif |
271 | 0 | return rcDP; |
272 | 0 | } |
273 | | |
274 | | Distortion RdCost::getDistPart( const CPelBuf& org, const CPelBuf& cur, int bitDepth, const ComponentID compId, DFunc eDFunc, const CPelBuf* orgLuma ) |
275 | 0 | { |
276 | 0 | DistParam dp( org, cur, nullptr, bitDepth, 0, compId ); |
277 | | # if ENABLE_MEASURE_SEARCH_SPACE |
278 | | g_searchSpaceAcc.addPrediction( dp.cur.width, dp.cur.height, toChannelType( dp.compID ) ); |
279 | | #endif |
280 | 0 | Distortion dist; |
281 | 0 | if( orgLuma ) |
282 | 0 | { |
283 | 0 | CHECKD( eDFunc != DF_SSE_WTD, "mismatch func and parameter") |
284 | 0 | dp.orgLuma = orgLuma; |
285 | 0 | dist = RdCost::xGetSSE_WTD( dp ); |
286 | 0 | } |
287 | 0 | else |
288 | 0 | { |
289 | 0 | if( ( org.width == 1 ) ) |
290 | 0 | { |
291 | 0 | dist = xGetSSE( dp ); |
292 | 0 | } |
293 | 0 | else |
294 | 0 | { |
295 | 0 | const int base = (bitDepth > 10) ? 1 : 0; |
296 | 0 | dist = m_afpDistortFunc[base][eDFunc + Log2(org.width)](dp); |
297 | 0 | } |
298 | 0 | } |
299 | 0 | if (isChroma(compId)) |
300 | 0 | { |
301 | 0 | return ((Distortion) (m_distortionWeight[ compId ] * dist)); |
302 | 0 | } |
303 | 0 | else |
304 | 0 | { |
305 | 0 | return dist; |
306 | 0 | } |
307 | 0 | } |
308 | | |
309 | | // ==================================================================================================================== |
310 | | // Distortion functions |
311 | | // ==================================================================================================================== |
312 | | |
313 | | // -------------------------------------------------------------------------------------------------------------------- |
314 | | // SAD |
315 | | // -------------------------------------------------------------------------------------------------------------------- |
316 | | |
317 | | Distortion RdCost::xGetSAD( const DistParam& rcDtParam ) |
318 | 0 | { |
319 | 0 | if ( rcDtParam.applyWeight ) |
320 | 0 | { |
321 | 0 | THROW(" no support"); |
322 | 0 | } |
323 | | |
324 | 0 | const Pel* piOrg = rcDtParam.org.buf; |
325 | 0 | const Pel* piCur = rcDtParam.cur.buf; |
326 | 0 | const int iCols = rcDtParam.org.width; |
327 | 0 | int iRows = rcDtParam.org.height; |
328 | 0 | const int iSubShift = rcDtParam.subShift; |
329 | 0 | const int iSubStep = ( 1 << iSubShift ); |
330 | 0 | const int iStrideCur = rcDtParam.cur.stride * iSubStep; |
331 | 0 | const int iStrideOrg = rcDtParam.org.stride * iSubStep; |
332 | 0 | const uint32_t distortionShift = DISTORTION_PRECISION_ADJUSTMENT(rcDtParam.bitDepth); |
333 | |
|
334 | 0 | Distortion uiSum = 0; |
335 | |
|
336 | 0 | for( ; iRows != 0; iRows -= iSubStep ) |
337 | 0 | { |
338 | 0 | for (int n = 0; n < iCols; n++ ) |
339 | 0 | { |
340 | 0 | uiSum += abs( piOrg[n] - piCur[n] ); |
341 | 0 | } |
342 | 0 | if (rcDtParam.maximumDistortionForEarlyExit < ( uiSum >> distortionShift )) |
343 | 0 | { |
344 | 0 | return ( uiSum >> distortionShift ); |
345 | 0 | } |
346 | 0 | piOrg += iStrideOrg; |
347 | 0 | piCur += iStrideCur; |
348 | 0 | } |
349 | | |
350 | 0 | uiSum <<= iSubShift; |
351 | 0 | return ( uiSum >> distortionShift ); |
352 | 0 | } |
353 | | |
354 | | Distortion RdCost::xGetSAD4( const DistParam& rcDtParam ) |
355 | 0 | { |
356 | 0 | if ( rcDtParam.applyWeight ) |
357 | 0 | { |
358 | 0 | THROW(" no support"); |
359 | 0 | } |
360 | | |
361 | 0 | const Pel* piOrg = rcDtParam.org.buf; |
362 | 0 | const Pel* piCur = rcDtParam.cur.buf; |
363 | 0 | int iRows = rcDtParam.org.height; |
364 | 0 | int iSubShift = rcDtParam.subShift; |
365 | 0 | int iSubStep = ( 1 << iSubShift ); |
366 | 0 | int iStrideCur = rcDtParam.cur.stride * iSubStep; |
367 | 0 | int iStrideOrg = rcDtParam.org.stride * iSubStep; |
368 | |
|
369 | 0 | Distortion uiSum = 0; |
370 | |
|
371 | 0 | for( ; iRows != 0; iRows -= iSubStep ) |
372 | 0 | { |
373 | 0 | uiSum += abs( piOrg[0] - piCur[0] ); |
374 | 0 | uiSum += abs( piOrg[1] - piCur[1] ); |
375 | 0 | uiSum += abs( piOrg[2] - piCur[2] ); |
376 | 0 | uiSum += abs( piOrg[3] - piCur[3] ); |
377 | |
|
378 | 0 | piOrg += iStrideOrg; |
379 | 0 | piCur += iStrideCur; |
380 | 0 | } |
381 | |
|
382 | 0 | uiSum <<= iSubShift; |
383 | 0 | return (uiSum >> DISTORTION_PRECISION_ADJUSTMENT(rcDtParam.bitDepth)); |
384 | 0 | } |
385 | | |
386 | | Distortion RdCost::xGetSAD8( const DistParam& rcDtParam ) |
387 | 0 | { |
388 | 0 | if ( rcDtParam.applyWeight ) |
389 | 0 | { |
390 | 0 | THROW(" no support"); |
391 | 0 | } |
392 | | |
393 | 0 | const Pel* piOrg = rcDtParam.org.buf; |
394 | 0 | const Pel* piCur = rcDtParam.cur.buf; |
395 | 0 | int iRows = rcDtParam.org.height; |
396 | 0 | int iSubShift = rcDtParam.subShift; |
397 | 0 | int iSubStep = ( 1 << iSubShift ); |
398 | 0 | int iStrideCur = rcDtParam.cur.stride * iSubStep; |
399 | 0 | int iStrideOrg = rcDtParam.org.stride * iSubStep; |
400 | |
|
401 | 0 | Distortion uiSum = 0; |
402 | |
|
403 | 0 | for( ; iRows != 0; iRows-=iSubStep ) |
404 | 0 | { |
405 | 0 | uiSum += abs( piOrg[0] - piCur[0] ); |
406 | 0 | uiSum += abs( piOrg[1] - piCur[1] ); |
407 | 0 | uiSum += abs( piOrg[2] - piCur[2] ); |
408 | 0 | uiSum += abs( piOrg[3] - piCur[3] ); |
409 | 0 | uiSum += abs( piOrg[4] - piCur[4] ); |
410 | 0 | uiSum += abs( piOrg[5] - piCur[5] ); |
411 | 0 | uiSum += abs( piOrg[6] - piCur[6] ); |
412 | 0 | uiSum += abs( piOrg[7] - piCur[7] ); |
413 | |
|
414 | 0 | piOrg += iStrideOrg; |
415 | 0 | piCur += iStrideCur; |
416 | 0 | } |
417 | |
|
418 | 0 | uiSum <<= iSubShift; |
419 | 0 | return (uiSum >> DISTORTION_PRECISION_ADJUSTMENT(rcDtParam.bitDepth)); |
420 | 0 | } |
421 | | |
422 | | Distortion RdCost::xGetSAD16( const DistParam& rcDtParam ) |
423 | 0 | { |
424 | 0 | if ( rcDtParam.applyWeight ) |
425 | 0 | { |
426 | 0 | THROW(" no support"); |
427 | 0 | } |
428 | | |
429 | 0 | const Pel* piOrg = rcDtParam.org.buf; |
430 | 0 | const Pel* piCur = rcDtParam.cur.buf; |
431 | 0 | int iRows = rcDtParam.org.height; |
432 | 0 | int iSubShift = rcDtParam.subShift; |
433 | 0 | int iSubStep = ( 1 << iSubShift ); |
434 | 0 | int iStrideCur = rcDtParam.cur.stride * iSubStep; |
435 | 0 | int iStrideOrg = rcDtParam.org.stride * iSubStep; |
436 | |
|
437 | 0 | Distortion uiSum = 0; |
438 | |
|
439 | 0 | for( ; iRows != 0; iRows -= iSubStep ) |
440 | 0 | { |
441 | 0 | uiSum += abs( piOrg[0] - piCur[0] ); |
442 | 0 | uiSum += abs( piOrg[1] - piCur[1] ); |
443 | 0 | uiSum += abs( piOrg[2] - piCur[2] ); |
444 | 0 | uiSum += abs( piOrg[3] - piCur[3] ); |
445 | 0 | uiSum += abs( piOrg[4] - piCur[4] ); |
446 | 0 | uiSum += abs( piOrg[5] - piCur[5] ); |
447 | 0 | uiSum += abs( piOrg[6] - piCur[6] ); |
448 | 0 | uiSum += abs( piOrg[7] - piCur[7] ); |
449 | 0 | uiSum += abs( piOrg[8] - piCur[8] ); |
450 | 0 | uiSum += abs( piOrg[9] - piCur[9] ); |
451 | 0 | uiSum += abs( piOrg[10] - piCur[10] ); |
452 | 0 | uiSum += abs( piOrg[11] - piCur[11] ); |
453 | 0 | uiSum += abs( piOrg[12] - piCur[12] ); |
454 | 0 | uiSum += abs( piOrg[13] - piCur[13] ); |
455 | 0 | uiSum += abs( piOrg[14] - piCur[14] ); |
456 | 0 | uiSum += abs( piOrg[15] - piCur[15] ); |
457 | |
|
458 | 0 | piOrg += iStrideOrg; |
459 | 0 | piCur += iStrideCur; |
460 | 0 | } |
461 | |
|
462 | 0 | uiSum <<= iSubShift; |
463 | 0 | return (uiSum >> DISTORTION_PRECISION_ADJUSTMENT(rcDtParam.bitDepth)); |
464 | 0 | } |
465 | | |
466 | | |
467 | | Distortion RdCost::xGetSAD128( const DistParam &rcDtParam ) |
468 | 0 | { |
469 | 0 | const Pel* piOrg = rcDtParam.org.buf; |
470 | 0 | const Pel* piCur = rcDtParam.cur.buf; |
471 | 0 | int iRows = rcDtParam.org.height; |
472 | 0 | int iCols = rcDtParam.org.width; |
473 | 0 | int iSubShift = rcDtParam.subShift; |
474 | 0 | int iSubStep = ( 1 << iSubShift ); |
475 | 0 | int iStrideCur = rcDtParam.cur.stride * iSubStep; |
476 | 0 | int iStrideOrg = rcDtParam.org.stride * iSubStep; |
477 | |
|
478 | 0 | Distortion uiSum = 0; |
479 | |
|
480 | 0 | for( ; iRows != 0; iRows-=iSubStep ) |
481 | 0 | { |
482 | 0 | for (int n = 0; n < iCols; n+=16 ) |
483 | 0 | { |
484 | 0 | uiSum += abs( piOrg[n+ 0] - piCur[n+ 0] ); |
485 | 0 | uiSum += abs( piOrg[n+ 1] - piCur[n+ 1] ); |
486 | 0 | uiSum += abs( piOrg[n+ 2] - piCur[n+ 2] ); |
487 | 0 | uiSum += abs( piOrg[n+ 3] - piCur[n+ 3] ); |
488 | 0 | uiSum += abs( piOrg[n+ 4] - piCur[n+ 4] ); |
489 | 0 | uiSum += abs( piOrg[n+ 5] - piCur[n+ 5] ); |
490 | 0 | uiSum += abs( piOrg[n+ 6] - piCur[n+ 6] ); |
491 | 0 | uiSum += abs( piOrg[n+ 7] - piCur[n+ 7] ); |
492 | 0 | uiSum += abs( piOrg[n+ 8] - piCur[n+ 8] ); |
493 | 0 | uiSum += abs( piOrg[n+ 9] - piCur[n+ 9] ); |
494 | 0 | uiSum += abs( piOrg[n+10] - piCur[n+10] ); |
495 | 0 | uiSum += abs( piOrg[n+11] - piCur[n+11] ); |
496 | 0 | uiSum += abs( piOrg[n+12] - piCur[n+12] ); |
497 | 0 | uiSum += abs( piOrg[n+13] - piCur[n+13] ); |
498 | 0 | uiSum += abs( piOrg[n+14] - piCur[n+14] ); |
499 | 0 | uiSum += abs( piOrg[n+15] - piCur[n+15] ); |
500 | 0 | } |
501 | 0 | piOrg += iStrideOrg; |
502 | 0 | piCur += iStrideCur; |
503 | 0 | } |
504 | |
|
505 | 0 | uiSum <<= iSubShift; |
506 | 0 | return (uiSum >> DISTORTION_PRECISION_ADJUSTMENT(rcDtParam.bitDepth)); |
507 | 0 | } |
508 | | |
509 | | Distortion RdCost::xGetSAD32( const DistParam &rcDtParam ) |
510 | 0 | { |
511 | 0 | if ( rcDtParam.applyWeight ) |
512 | 0 | { |
513 | 0 | THROW(" no support"); |
514 | 0 | } |
515 | | |
516 | 0 | const Pel* piOrg = rcDtParam.org.buf; |
517 | 0 | const Pel* piCur = rcDtParam.cur.buf; |
518 | 0 | int iRows = rcDtParam.org.height; |
519 | 0 | int iSubShift = rcDtParam.subShift; |
520 | 0 | int iSubStep = ( 1 << iSubShift ); |
521 | 0 | int iStrideCur = rcDtParam.cur.stride * iSubStep; |
522 | 0 | int iStrideOrg = rcDtParam.org.stride * iSubStep; |
523 | |
|
524 | 0 | Distortion uiSum = 0; |
525 | |
|
526 | 0 | for( ; iRows != 0; iRows-=iSubStep ) |
527 | 0 | { |
528 | 0 | uiSum += abs( piOrg[0] - piCur[0] ); |
529 | 0 | uiSum += abs( piOrg[1] - piCur[1] ); |
530 | 0 | uiSum += abs( piOrg[2] - piCur[2] ); |
531 | 0 | uiSum += abs( piOrg[3] - piCur[3] ); |
532 | 0 | uiSum += abs( piOrg[4] - piCur[4] ); |
533 | 0 | uiSum += abs( piOrg[5] - piCur[5] ); |
534 | 0 | uiSum += abs( piOrg[6] - piCur[6] ); |
535 | 0 | uiSum += abs( piOrg[7] - piCur[7] ); |
536 | 0 | uiSum += abs( piOrg[8] - piCur[8] ); |
537 | 0 | uiSum += abs( piOrg[9] - piCur[9] ); |
538 | 0 | uiSum += abs( piOrg[10] - piCur[10] ); |
539 | 0 | uiSum += abs( piOrg[11] - piCur[11] ); |
540 | 0 | uiSum += abs( piOrg[12] - piCur[12] ); |
541 | 0 | uiSum += abs( piOrg[13] - piCur[13] ); |
542 | 0 | uiSum += abs( piOrg[14] - piCur[14] ); |
543 | 0 | uiSum += abs( piOrg[15] - piCur[15] ); |
544 | 0 | uiSum += abs( piOrg[16] - piCur[16] ); |
545 | 0 | uiSum += abs( piOrg[17] - piCur[17] ); |
546 | 0 | uiSum += abs( piOrg[18] - piCur[18] ); |
547 | 0 | uiSum += abs( piOrg[19] - piCur[19] ); |
548 | 0 | uiSum += abs( piOrg[20] - piCur[20] ); |
549 | 0 | uiSum += abs( piOrg[21] - piCur[21] ); |
550 | 0 | uiSum += abs( piOrg[22] - piCur[22] ); |
551 | 0 | uiSum += abs( piOrg[23] - piCur[23] ); |
552 | 0 | uiSum += abs( piOrg[24] - piCur[24] ); |
553 | 0 | uiSum += abs( piOrg[25] - piCur[25] ); |
554 | 0 | uiSum += abs( piOrg[26] - piCur[26] ); |
555 | 0 | uiSum += abs( piOrg[27] - piCur[27] ); |
556 | 0 | uiSum += abs( piOrg[28] - piCur[28] ); |
557 | 0 | uiSum += abs( piOrg[29] - piCur[29] ); |
558 | 0 | uiSum += abs( piOrg[30] - piCur[30] ); |
559 | 0 | uiSum += abs( piOrg[31] - piCur[31] ); |
560 | |
|
561 | 0 | piOrg += iStrideOrg; |
562 | 0 | piCur += iStrideCur; |
563 | 0 | } |
564 | |
|
565 | 0 | uiSum <<= iSubShift; |
566 | 0 | return (uiSum >> DISTORTION_PRECISION_ADJUSTMENT(rcDtParam.bitDepth)); |
567 | 0 | } |
568 | | |
569 | | |
570 | | Distortion RdCost::xGetSAD64( const DistParam &rcDtParam ) |
571 | 0 | { |
572 | 0 | if ( rcDtParam.applyWeight ) |
573 | 0 | { |
574 | 0 | THROW(" no support"); |
575 | 0 | } |
576 | | |
577 | 0 | const Pel* piOrg = rcDtParam.org.buf; |
578 | 0 | const Pel* piCur = rcDtParam.cur.buf; |
579 | 0 | int iRows = rcDtParam.org.height; |
580 | 0 | int iSubShift = rcDtParam.subShift; |
581 | 0 | int iSubStep = ( 1 << iSubShift ); |
582 | 0 | int iStrideCur = rcDtParam.cur.stride * iSubStep; |
583 | 0 | int iStrideOrg = rcDtParam.org.stride * iSubStep; |
584 | |
|
585 | 0 | Distortion uiSum = 0; |
586 | |
|
587 | 0 | for( ; iRows != 0; iRows-=iSubStep ) |
588 | 0 | { |
589 | 0 | uiSum += abs( piOrg[0] - piCur[0] ); |
590 | 0 | uiSum += abs( piOrg[1] - piCur[1] ); |
591 | 0 | uiSum += abs( piOrg[2] - piCur[2] ); |
592 | 0 | uiSum += abs( piOrg[3] - piCur[3] ); |
593 | 0 | uiSum += abs( piOrg[4] - piCur[4] ); |
594 | 0 | uiSum += abs( piOrg[5] - piCur[5] ); |
595 | 0 | uiSum += abs( piOrg[6] - piCur[6] ); |
596 | 0 | uiSum += abs( piOrg[7] - piCur[7] ); |
597 | 0 | uiSum += abs( piOrg[8] - piCur[8] ); |
598 | 0 | uiSum += abs( piOrg[9] - piCur[9] ); |
599 | 0 | uiSum += abs( piOrg[10] - piCur[10] ); |
600 | 0 | uiSum += abs( piOrg[11] - piCur[11] ); |
601 | 0 | uiSum += abs( piOrg[12] - piCur[12] ); |
602 | 0 | uiSum += abs( piOrg[13] - piCur[13] ); |
603 | 0 | uiSum += abs( piOrg[14] - piCur[14] ); |
604 | 0 | uiSum += abs( piOrg[15] - piCur[15] ); |
605 | 0 | uiSum += abs( piOrg[16] - piCur[16] ); |
606 | 0 | uiSum += abs( piOrg[17] - piCur[17] ); |
607 | 0 | uiSum += abs( piOrg[18] - piCur[18] ); |
608 | 0 | uiSum += abs( piOrg[19] - piCur[19] ); |
609 | 0 | uiSum += abs( piOrg[20] - piCur[20] ); |
610 | 0 | uiSum += abs( piOrg[21] - piCur[21] ); |
611 | 0 | uiSum += abs( piOrg[22] - piCur[22] ); |
612 | 0 | uiSum += abs( piOrg[23] - piCur[23] ); |
613 | 0 | uiSum += abs( piOrg[24] - piCur[24] ); |
614 | 0 | uiSum += abs( piOrg[25] - piCur[25] ); |
615 | 0 | uiSum += abs( piOrg[26] - piCur[26] ); |
616 | 0 | uiSum += abs( piOrg[27] - piCur[27] ); |
617 | 0 | uiSum += abs( piOrg[28] - piCur[28] ); |
618 | 0 | uiSum += abs( piOrg[29] - piCur[29] ); |
619 | 0 | uiSum += abs( piOrg[30] - piCur[30] ); |
620 | 0 | uiSum += abs( piOrg[31] - piCur[31] ); |
621 | 0 | uiSum += abs( piOrg[32] - piCur[32] ); |
622 | 0 | uiSum += abs( piOrg[33] - piCur[33] ); |
623 | 0 | uiSum += abs( piOrg[34] - piCur[34] ); |
624 | 0 | uiSum += abs( piOrg[35] - piCur[35] ); |
625 | 0 | uiSum += abs( piOrg[36] - piCur[36] ); |
626 | 0 | uiSum += abs( piOrg[37] - piCur[37] ); |
627 | 0 | uiSum += abs( piOrg[38] - piCur[38] ); |
628 | 0 | uiSum += abs( piOrg[39] - piCur[39] ); |
629 | 0 | uiSum += abs( piOrg[40] - piCur[40] ); |
630 | 0 | uiSum += abs( piOrg[41] - piCur[41] ); |
631 | 0 | uiSum += abs( piOrg[42] - piCur[42] ); |
632 | 0 | uiSum += abs( piOrg[43] - piCur[43] ); |
633 | 0 | uiSum += abs( piOrg[44] - piCur[44] ); |
634 | 0 | uiSum += abs( piOrg[45] - piCur[45] ); |
635 | 0 | uiSum += abs( piOrg[46] - piCur[46] ); |
636 | 0 | uiSum += abs( piOrg[47] - piCur[47] ); |
637 | 0 | uiSum += abs( piOrg[48] - piCur[48] ); |
638 | 0 | uiSum += abs( piOrg[49] - piCur[49] ); |
639 | 0 | uiSum += abs( piOrg[50] - piCur[50] ); |
640 | 0 | uiSum += abs( piOrg[51] - piCur[51] ); |
641 | 0 | uiSum += abs( piOrg[52] - piCur[52] ); |
642 | 0 | uiSum += abs( piOrg[53] - piCur[53] ); |
643 | 0 | uiSum += abs( piOrg[54] - piCur[54] ); |
644 | 0 | uiSum += abs( piOrg[55] - piCur[55] ); |
645 | 0 | uiSum += abs( piOrg[56] - piCur[56] ); |
646 | 0 | uiSum += abs( piOrg[57] - piCur[57] ); |
647 | 0 | uiSum += abs( piOrg[58] - piCur[58] ); |
648 | 0 | uiSum += abs( piOrg[59] - piCur[59] ); |
649 | 0 | uiSum += abs( piOrg[60] - piCur[60] ); |
650 | 0 | uiSum += abs( piOrg[61] - piCur[61] ); |
651 | 0 | uiSum += abs( piOrg[62] - piCur[62] ); |
652 | 0 | uiSum += abs( piOrg[63] - piCur[63] ); |
653 | |
|
654 | 0 | piOrg += iStrideOrg; |
655 | 0 | piCur += iStrideCur; |
656 | 0 | } |
657 | |
|
658 | 0 | uiSum <<= iSubShift; |
659 | 0 | return (uiSum >> DISTORTION_PRECISION_ADJUSTMENT(rcDtParam.bitDepth)); |
660 | 0 | } |
661 | | |
662 | | |
663 | | // -------------------------------------------------------------------------------------------------------------------- |
664 | | // SSE |
665 | | // -------------------------------------------------------------------------------------------------------------------- |
666 | | |
667 | | Distortion RdCost::xGetSSE( const DistParam &rcDtParam ) |
668 | 0 | { |
669 | 0 | if ( rcDtParam.applyWeight ) |
670 | 0 | { |
671 | 0 | THROW(" no support"); |
672 | 0 | } |
673 | | |
674 | 0 | const Pel* piOrg = rcDtParam.org.buf; |
675 | 0 | const Pel* piCur = rcDtParam.cur.buf; |
676 | 0 | int iRows = rcDtParam.org.height; |
677 | 0 | int iCols = rcDtParam.org.width; |
678 | 0 | int iStrideCur = rcDtParam.cur.stride; |
679 | 0 | int iStrideOrg = rcDtParam.org.stride; |
680 | |
|
681 | 0 | Distortion uiSum = 0; |
682 | 0 | uint32_t uiShift = DISTORTION_PRECISION_ADJUSTMENT(rcDtParam.bitDepth) << 1; |
683 | |
|
684 | 0 | Intermediate_Int iTemp; |
685 | |
|
686 | 0 | for( ; iRows != 0; iRows-- ) |
687 | 0 | { |
688 | 0 | for (int n = 0; n < iCols; n++ ) |
689 | 0 | { |
690 | 0 | iTemp = piOrg[n ] - piCur[n ]; |
691 | 0 | uiSum += Distortion(( iTemp * iTemp ) >> uiShift); |
692 | 0 | } |
693 | 0 | piOrg += iStrideOrg; |
694 | 0 | piCur += iStrideCur; |
695 | 0 | } |
696 | |
|
697 | 0 | return ( uiSum ); |
698 | 0 | } |
699 | | |
700 | | Distortion RdCost::xGetSSE4( const DistParam &rcDtParam ) |
701 | 0 | { |
702 | 0 | if ( rcDtParam.applyWeight ) |
703 | 0 | { |
704 | 0 | CHECK( rcDtParam.org.width != 4, "Invalid size" ); |
705 | 0 | THROW(" no support"); |
706 | 0 | } |
707 | | |
708 | 0 | const Pel* piOrg = rcDtParam.org.buf; |
709 | 0 | const Pel* piCur = rcDtParam.cur.buf; |
710 | 0 | int iRows = rcDtParam.org.height; |
711 | 0 | int iStrideOrg = rcDtParam.org.stride; |
712 | 0 | int iStrideCur = rcDtParam.cur.stride; |
713 | |
|
714 | 0 | Distortion uiSum = 0; |
715 | 0 | uint32_t uiShift = DISTORTION_PRECISION_ADJUSTMENT(rcDtParam.bitDepth) << 1; |
716 | |
|
717 | 0 | Intermediate_Int iTemp; |
718 | |
|
719 | 0 | for( ; iRows != 0; iRows-- ) |
720 | 0 | { |
721 | |
|
722 | 0 | iTemp = piOrg[0] - piCur[0]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); |
723 | 0 | iTemp = piOrg[1] - piCur[1]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); |
724 | 0 | iTemp = piOrg[2] - piCur[2]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); |
725 | 0 | iTemp = piOrg[3] - piCur[3]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); |
726 | |
|
727 | 0 | piOrg += iStrideOrg; |
728 | 0 | piCur += iStrideCur; |
729 | 0 | } |
730 | |
|
731 | 0 | return ( uiSum ); |
732 | 0 | } |
733 | | |
734 | | Distortion RdCost::xGetSSE8( const DistParam &rcDtParam ) |
735 | 0 | { |
736 | 0 | if ( rcDtParam.applyWeight ) |
737 | 0 | { |
738 | 0 | CHECK( rcDtParam.org.width != 8, "Invalid size" ); |
739 | 0 | THROW(" no support"); |
740 | 0 | } |
741 | | |
742 | 0 | const Pel* piOrg = rcDtParam.org.buf; |
743 | 0 | const Pel* piCur = rcDtParam.cur.buf; |
744 | 0 | int iRows = rcDtParam.org.height; |
745 | 0 | int iStrideOrg = rcDtParam.org.stride; |
746 | 0 | int iStrideCur = rcDtParam.cur.stride; |
747 | |
|
748 | 0 | Distortion uiSum = 0; |
749 | 0 | uint32_t uiShift = DISTORTION_PRECISION_ADJUSTMENT(rcDtParam.bitDepth) << 1; |
750 | |
|
751 | 0 | Intermediate_Int iTemp; |
752 | |
|
753 | 0 | for( ; iRows != 0; iRows-- ) |
754 | 0 | { |
755 | 0 | iTemp = piOrg[0] - piCur[0]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); |
756 | 0 | iTemp = piOrg[1] - piCur[1]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); |
757 | 0 | iTemp = piOrg[2] - piCur[2]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); |
758 | 0 | iTemp = piOrg[3] - piCur[3]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); |
759 | 0 | iTemp = piOrg[4] - piCur[4]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); |
760 | 0 | iTemp = piOrg[5] - piCur[5]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); |
761 | 0 | iTemp = piOrg[6] - piCur[6]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); |
762 | 0 | iTemp = piOrg[7] - piCur[7]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); |
763 | |
|
764 | 0 | piOrg += iStrideOrg; |
765 | 0 | piCur += iStrideCur; |
766 | 0 | } |
767 | |
|
768 | 0 | return ( uiSum ); |
769 | 0 | } |
770 | | |
771 | | Distortion RdCost::xGetSSE16( const DistParam &rcDtParam ) |
772 | 0 | { |
773 | 0 | if ( rcDtParam.applyWeight ) |
774 | 0 | { |
775 | 0 | CHECK( rcDtParam.org.width != 16, "Invalid size" ); |
776 | 0 | THROW(" no support"); |
777 | 0 | } |
778 | | |
779 | 0 | const Pel* piOrg = rcDtParam.org.buf; |
780 | 0 | const Pel* piCur = rcDtParam.cur.buf; |
781 | 0 | int iRows = rcDtParam.org.height; |
782 | 0 | int iStrideOrg = rcDtParam.org.stride; |
783 | 0 | int iStrideCur = rcDtParam.cur.stride; |
784 | |
|
785 | 0 | Distortion uiSum = 0; |
786 | 0 | uint32_t uiShift = DISTORTION_PRECISION_ADJUSTMENT(rcDtParam.bitDepth) << 1; |
787 | |
|
788 | 0 | Intermediate_Int iTemp; |
789 | |
|
790 | 0 | for( ; iRows != 0; iRows-- ) |
791 | 0 | { |
792 | |
|
793 | 0 | iTemp = piOrg[ 0] - piCur[ 0]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); |
794 | 0 | iTemp = piOrg[ 1] - piCur[ 1]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); |
795 | 0 | iTemp = piOrg[ 2] - piCur[ 2]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); |
796 | 0 | iTemp = piOrg[ 3] - piCur[ 3]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); |
797 | 0 | iTemp = piOrg[ 4] - piCur[ 4]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); |
798 | 0 | iTemp = piOrg[ 5] - piCur[ 5]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); |
799 | 0 | iTemp = piOrg[ 6] - piCur[ 6]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); |
800 | 0 | iTemp = piOrg[ 7] - piCur[ 7]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); |
801 | 0 | iTemp = piOrg[ 8] - piCur[ 8]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); |
802 | 0 | iTemp = piOrg[ 9] - piCur[ 9]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); |
803 | 0 | iTemp = piOrg[10] - piCur[10]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); |
804 | 0 | iTemp = piOrg[11] - piCur[11]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); |
805 | 0 | iTemp = piOrg[12] - piCur[12]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); |
806 | 0 | iTemp = piOrg[13] - piCur[13]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); |
807 | 0 | iTemp = piOrg[14] - piCur[14]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); |
808 | 0 | iTemp = piOrg[15] - piCur[15]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); |
809 | |
|
810 | 0 | piOrg += iStrideOrg; |
811 | 0 | piCur += iStrideCur; |
812 | 0 | } |
813 | |
|
814 | 0 | return ( uiSum ); |
815 | 0 | } |
816 | | |
817 | | Distortion RdCost::xGetSSE128( const DistParam &rcDtParam ) |
818 | 0 | { |
819 | 0 | if ( rcDtParam.applyWeight ) |
820 | 0 | { |
821 | 0 | THROW(" no support"); |
822 | 0 | } |
823 | 0 | const Pel* piOrg = rcDtParam.org.buf; |
824 | 0 | const Pel* piCur = rcDtParam.cur.buf; |
825 | 0 | int iRows = rcDtParam.org.height; |
826 | 0 | int iCols = rcDtParam.org.width; |
827 | 0 | int iStrideOrg = rcDtParam.org.stride; |
828 | 0 | int iStrideCur = rcDtParam.cur.stride; |
829 | |
|
830 | 0 | Distortion uiSum = 0; |
831 | 0 | uint32_t uiShift = DISTORTION_PRECISION_ADJUSTMENT(rcDtParam.bitDepth) << 1; |
832 | |
|
833 | 0 | Intermediate_Int iTemp; |
834 | |
|
835 | 0 | for( ; iRows != 0; iRows-- ) |
836 | 0 | { |
837 | 0 | for (int n = 0; n < iCols; n+=16 ) |
838 | 0 | { |
839 | |
|
840 | 0 | iTemp = piOrg[n+ 0] - piCur[n+ 0]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); |
841 | 0 | iTemp = piOrg[n+ 1] - piCur[n+ 1]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); |
842 | 0 | iTemp = piOrg[n+ 2] - piCur[n+ 2]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); |
843 | 0 | iTemp = piOrg[n+ 3] - piCur[n+ 3]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); |
844 | 0 | iTemp = piOrg[n+ 4] - piCur[n+ 4]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); |
845 | 0 | iTemp = piOrg[n+ 5] - piCur[n+ 5]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); |
846 | 0 | iTemp = piOrg[n+ 6] - piCur[n+ 6]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); |
847 | 0 | iTemp = piOrg[n+ 7] - piCur[n+ 7]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); |
848 | 0 | iTemp = piOrg[n+ 8] - piCur[n+ 8]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); |
849 | 0 | iTemp = piOrg[n+ 9] - piCur[n+ 9]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); |
850 | 0 | iTemp = piOrg[n+10] - piCur[n+10]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); |
851 | 0 | iTemp = piOrg[n+11] - piCur[n+11]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); |
852 | 0 | iTemp = piOrg[n+12] - piCur[n+12]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); |
853 | 0 | iTemp = piOrg[n+13] - piCur[n+13]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); |
854 | 0 | iTemp = piOrg[n+14] - piCur[n+14]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); |
855 | 0 | iTemp = piOrg[n+15] - piCur[n+15]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); |
856 | |
|
857 | 0 | } |
858 | 0 | piOrg += iStrideOrg; |
859 | 0 | piCur += iStrideCur; |
860 | 0 | } |
861 | |
|
862 | 0 | return ( uiSum ); |
863 | 0 | } |
864 | | |
865 | | Distortion RdCost::xGetSSE32( const DistParam &rcDtParam ) |
866 | 0 | { |
867 | 0 | if ( rcDtParam.applyWeight ) |
868 | 0 | { |
869 | 0 | THROW(" no support"); |
870 | 0 | } |
871 | | |
872 | 0 | const Pel* piOrg = rcDtParam.org.buf; |
873 | 0 | const Pel* piCur = rcDtParam.cur.buf; |
874 | 0 | int iRows = rcDtParam.org.height; |
875 | 0 | int iStrideOrg = rcDtParam.org.stride; |
876 | 0 | int iStrideCur = rcDtParam.cur.stride; |
877 | |
|
878 | 0 | Distortion uiSum = 0; |
879 | 0 | uint32_t uiShift = DISTORTION_PRECISION_ADJUSTMENT(rcDtParam.bitDepth) << 1; |
880 | |
|
881 | 0 | Intermediate_Int iTemp; |
882 | |
|
883 | 0 | for( ; iRows != 0; iRows-- ) |
884 | 0 | { |
885 | |
|
886 | 0 | iTemp = piOrg[ 0] - piCur[ 0]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); |
887 | 0 | iTemp = piOrg[ 1] - piCur[ 1]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); |
888 | 0 | iTemp = piOrg[ 2] - piCur[ 2]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); |
889 | 0 | iTemp = piOrg[ 3] - piCur[ 3]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); |
890 | 0 | iTemp = piOrg[ 4] - piCur[ 4]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); |
891 | 0 | iTemp = piOrg[ 5] - piCur[ 5]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); |
892 | 0 | iTemp = piOrg[ 6] - piCur[ 6]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); |
893 | 0 | iTemp = piOrg[ 7] - piCur[ 7]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); |
894 | 0 | iTemp = piOrg[ 8] - piCur[ 8]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); |
895 | 0 | iTemp = piOrg[ 9] - piCur[ 9]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); |
896 | 0 | iTemp = piOrg[10] - piCur[10]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); |
897 | 0 | iTemp = piOrg[11] - piCur[11]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); |
898 | 0 | iTemp = piOrg[12] - piCur[12]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); |
899 | 0 | iTemp = piOrg[13] - piCur[13]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); |
900 | 0 | iTemp = piOrg[14] - piCur[14]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); |
901 | 0 | iTemp = piOrg[15] - piCur[15]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); |
902 | 0 | iTemp = piOrg[16] - piCur[16]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); |
903 | 0 | iTemp = piOrg[17] - piCur[17]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); |
904 | 0 | iTemp = piOrg[18] - piCur[18]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); |
905 | 0 | iTemp = piOrg[19] - piCur[19]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); |
906 | 0 | iTemp = piOrg[20] - piCur[20]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); |
907 | 0 | iTemp = piOrg[21] - piCur[21]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); |
908 | 0 | iTemp = piOrg[22] - piCur[22]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); |
909 | 0 | iTemp = piOrg[23] - piCur[23]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); |
910 | 0 | iTemp = piOrg[24] - piCur[24]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); |
911 | 0 | iTemp = piOrg[25] - piCur[25]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); |
912 | 0 | iTemp = piOrg[26] - piCur[26]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); |
913 | 0 | iTemp = piOrg[27] - piCur[27]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); |
914 | 0 | iTemp = piOrg[28] - piCur[28]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); |
915 | 0 | iTemp = piOrg[29] - piCur[29]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); |
916 | 0 | iTemp = piOrg[30] - piCur[30]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); |
917 | 0 | iTemp = piOrg[31] - piCur[31]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); |
918 | |
|
919 | 0 | piOrg += iStrideOrg; |
920 | 0 | piCur += iStrideCur; |
921 | 0 | } |
922 | |
|
923 | 0 | return ( uiSum ); |
924 | 0 | } |
925 | | |
926 | | Distortion RdCost::xGetSSE64( const DistParam &rcDtParam ) |
927 | 0 | { |
928 | 0 | if ( rcDtParam.applyWeight ) |
929 | 0 | { |
930 | 0 | THROW(" no support"); |
931 | 0 | } |
932 | | |
933 | 0 | const Pel* piOrg = rcDtParam.org.buf; |
934 | 0 | const Pel* piCur = rcDtParam.cur.buf; |
935 | 0 | int iRows = rcDtParam.org.height; |
936 | 0 | int iStrideOrg = rcDtParam.org.stride; |
937 | 0 | int iStrideCur = rcDtParam.cur.stride; |
938 | |
|
939 | 0 | Distortion uiSum = 0; |
940 | 0 | uint32_t uiShift = DISTORTION_PRECISION_ADJUSTMENT(rcDtParam.bitDepth) << 1; |
941 | |
|
942 | 0 | Intermediate_Int iTemp; |
943 | |
|
944 | 0 | for( ; iRows != 0; iRows-- ) |
945 | 0 | { |
946 | 0 | iTemp = piOrg[ 0] - piCur[ 0]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); |
947 | 0 | iTemp = piOrg[ 1] - piCur[ 1]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); |
948 | 0 | iTemp = piOrg[ 2] - piCur[ 2]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); |
949 | 0 | iTemp = piOrg[ 3] - piCur[ 3]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); |
950 | 0 | iTemp = piOrg[ 4] - piCur[ 4]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); |
951 | 0 | iTemp = piOrg[ 5] - piCur[ 5]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); |
952 | 0 | iTemp = piOrg[ 6] - piCur[ 6]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); |
953 | 0 | iTemp = piOrg[ 7] - piCur[ 7]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); |
954 | 0 | iTemp = piOrg[ 8] - piCur[ 8]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); |
955 | 0 | iTemp = piOrg[ 9] - piCur[ 9]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); |
956 | 0 | iTemp = piOrg[10] - piCur[10]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); |
957 | 0 | iTemp = piOrg[11] - piCur[11]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); |
958 | 0 | iTemp = piOrg[12] - piCur[12]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); |
959 | 0 | iTemp = piOrg[13] - piCur[13]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); |
960 | 0 | iTemp = piOrg[14] - piCur[14]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); |
961 | 0 | iTemp = piOrg[15] - piCur[15]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); |
962 | 0 | iTemp = piOrg[16] - piCur[16]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); |
963 | 0 | iTemp = piOrg[17] - piCur[17]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); |
964 | 0 | iTemp = piOrg[18] - piCur[18]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); |
965 | 0 | iTemp = piOrg[19] - piCur[19]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); |
966 | 0 | iTemp = piOrg[20] - piCur[20]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); |
967 | 0 | iTemp = piOrg[21] - piCur[21]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); |
968 | 0 | iTemp = piOrg[22] - piCur[22]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); |
969 | 0 | iTemp = piOrg[23] - piCur[23]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); |
970 | 0 | iTemp = piOrg[24] - piCur[24]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); |
971 | 0 | iTemp = piOrg[25] - piCur[25]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); |
972 | 0 | iTemp = piOrg[26] - piCur[26]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); |
973 | 0 | iTemp = piOrg[27] - piCur[27]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); |
974 | 0 | iTemp = piOrg[28] - piCur[28]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); |
975 | 0 | iTemp = piOrg[29] - piCur[29]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); |
976 | 0 | iTemp = piOrg[30] - piCur[30]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); |
977 | 0 | iTemp = piOrg[31] - piCur[31]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); |
978 | 0 | iTemp = piOrg[32] - piCur[32]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); |
979 | 0 | iTemp = piOrg[33] - piCur[33]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); |
980 | 0 | iTemp = piOrg[34] - piCur[34]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); |
981 | 0 | iTemp = piOrg[35] - piCur[35]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); |
982 | 0 | iTemp = piOrg[36] - piCur[36]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); |
983 | 0 | iTemp = piOrg[37] - piCur[37]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); |
984 | 0 | iTemp = piOrg[38] - piCur[38]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); |
985 | 0 | iTemp = piOrg[39] - piCur[39]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); |
986 | 0 | iTemp = piOrg[40] - piCur[40]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); |
987 | 0 | iTemp = piOrg[41] - piCur[41]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); |
988 | 0 | iTemp = piOrg[42] - piCur[42]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); |
989 | 0 | iTemp = piOrg[43] - piCur[43]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); |
990 | 0 | iTemp = piOrg[44] - piCur[44]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); |
991 | 0 | iTemp = piOrg[45] - piCur[45]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); |
992 | 0 | iTemp = piOrg[46] - piCur[46]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); |
993 | 0 | iTemp = piOrg[47] - piCur[47]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); |
994 | 0 | iTemp = piOrg[48] - piCur[48]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); |
995 | 0 | iTemp = piOrg[49] - piCur[49]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); |
996 | 0 | iTemp = piOrg[50] - piCur[50]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); |
997 | 0 | iTemp = piOrg[51] - piCur[51]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); |
998 | 0 | iTemp = piOrg[52] - piCur[52]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); |
999 | 0 | iTemp = piOrg[53] - piCur[53]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); |
1000 | 0 | iTemp = piOrg[54] - piCur[54]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); |
1001 | 0 | iTemp = piOrg[55] - piCur[55]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); |
1002 | 0 | iTemp = piOrg[56] - piCur[56]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); |
1003 | 0 | iTemp = piOrg[57] - piCur[57]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); |
1004 | 0 | iTemp = piOrg[58] - piCur[58]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); |
1005 | 0 | iTemp = piOrg[59] - piCur[59]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); |
1006 | 0 | iTemp = piOrg[60] - piCur[60]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); |
1007 | 0 | iTemp = piOrg[61] - piCur[61]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); |
1008 | 0 | iTemp = piOrg[62] - piCur[62]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); |
1009 | 0 | iTemp = piOrg[63] - piCur[63]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift); |
1010 | |
|
1011 | 0 | piOrg += iStrideOrg; |
1012 | 0 | piCur += iStrideCur; |
1013 | 0 | } |
1014 | |
|
1015 | 0 | return ( uiSum ); |
1016 | 0 | } |
1017 | | |
1018 | | // -------------------------------------------------------------------------------------------------------------------- |
1019 | | // HADAMARD with step (used in fractional search) |
1020 | | // -------------------------------------------------------------------------------------------------------------------- |
1021 | | |
1022 | | Distortion RdCost::xCalcHADs2x2( const Pel* piOrg, const Pel* piCur, int iStrideOrg, int iStrideCur ) |
1023 | 0 | { |
1024 | 0 | Distortion satd = 0; |
1025 | 0 | TCoeff diff[4], m[4]; |
1026 | |
|
1027 | 0 | diff[0] = piOrg[0 ] - piCur[0]; |
1028 | 0 | diff[1] = piOrg[1 ] - piCur[1]; |
1029 | 0 | diff[2] = piOrg[iStrideOrg ] - piCur[0 + iStrideCur]; |
1030 | 0 | diff[3] = piOrg[iStrideOrg + 1] - piCur[1 + iStrideCur]; |
1031 | 0 | m[0] = diff[0] + diff[2]; |
1032 | 0 | m[1] = diff[1] + diff[3]; |
1033 | 0 | m[2] = diff[0] - diff[2]; |
1034 | 0 | m[3] = diff[1] - diff[3]; |
1035 | | |
1036 | 0 | satd += abs(m[0] + m[1]) >> 2; |
1037 | 0 | satd += abs(m[0] - m[1]); |
1038 | 0 | satd += abs(m[2] + m[3]); |
1039 | 0 | satd += abs(m[2] - m[3]); |
1040 | |
|
1041 | 0 | return satd; |
1042 | 0 | } |
1043 | | |
1044 | | static Distortion xCalcHADs4x4( const Pel* piOrg, const Pel* piCur, int iStrideOrg, int iStrideCur ) |
1045 | 0 | { |
1046 | 0 | int k; |
1047 | 0 | Distortion satd = 0; |
1048 | 0 | TCoeff diff[16], m[16], d[16]; |
1049 | |
|
1050 | 0 | for( k = 0; k < 16; k+=4 ) |
1051 | 0 | { |
1052 | 0 | diff[k+0] = piOrg[0] - piCur[0]; |
1053 | 0 | diff[k+1] = piOrg[1] - piCur[1]; |
1054 | 0 | diff[k+2] = piOrg[2] - piCur[2]; |
1055 | 0 | diff[k+3] = piOrg[3] - piCur[3]; |
1056 | |
|
1057 | 0 | piCur += iStrideCur; |
1058 | 0 | piOrg += iStrideOrg; |
1059 | 0 | } |
1060 | | |
1061 | | /*===== hadamard transform =====*/ |
1062 | 0 | m[ 0] = diff[ 0] + diff[12]; |
1063 | 0 | m[ 1] = diff[ 1] + diff[13]; |
1064 | 0 | m[ 2] = diff[ 2] + diff[14]; |
1065 | 0 | m[ 3] = diff[ 3] + diff[15]; |
1066 | 0 | m[ 4] = diff[ 4] + diff[ 8]; |
1067 | 0 | m[ 5] = diff[ 5] + diff[ 9]; |
1068 | 0 | m[ 6] = diff[ 6] + diff[10]; |
1069 | 0 | m[ 7] = diff[ 7] + diff[11]; |
1070 | 0 | m[ 8] = diff[ 4] - diff[ 8]; |
1071 | 0 | m[ 9] = diff[ 5] - diff[ 9]; |
1072 | 0 | m[10] = diff[ 6] - diff[10]; |
1073 | 0 | m[11] = diff[ 7] - diff[11]; |
1074 | 0 | m[12] = diff[ 0] - diff[12]; |
1075 | 0 | m[13] = diff[ 1] - diff[13]; |
1076 | 0 | m[14] = diff[ 2] - diff[14]; |
1077 | 0 | m[15] = diff[ 3] - diff[15]; |
1078 | |
|
1079 | 0 | d[ 0] = m[ 0] + m[ 4]; |
1080 | 0 | d[ 1] = m[ 1] + m[ 5]; |
1081 | 0 | d[ 2] = m[ 2] + m[ 6]; |
1082 | 0 | d[ 3] = m[ 3] + m[ 7]; |
1083 | 0 | d[ 4] = m[ 8] + m[12]; |
1084 | 0 | d[ 5] = m[ 9] + m[13]; |
1085 | 0 | d[ 6] = m[10] + m[14]; |
1086 | 0 | d[ 7] = m[11] + m[15]; |
1087 | 0 | d[ 8] = m[ 0] - m[ 4]; |
1088 | 0 | d[ 9] = m[ 1] - m[ 5]; |
1089 | 0 | d[10] = m[ 2] - m[ 6]; |
1090 | 0 | d[11] = m[ 3] - m[ 7]; |
1091 | 0 | d[12] = m[12] - m[ 8]; |
1092 | 0 | d[13] = m[13] - m[ 9]; |
1093 | 0 | d[14] = m[14] - m[10]; |
1094 | 0 | d[15] = m[15] - m[11]; |
1095 | |
|
1096 | 0 | m[ 0] = d[ 0] + d[ 3]; |
1097 | 0 | m[ 1] = d[ 1] + d[ 2]; |
1098 | 0 | m[ 2] = d[ 1] - d[ 2]; |
1099 | 0 | m[ 3] = d[ 0] - d[ 3]; |
1100 | 0 | m[ 4] = d[ 4] + d[ 7]; |
1101 | 0 | m[ 5] = d[ 5] + d[ 6]; |
1102 | 0 | m[ 6] = d[ 5] - d[ 6]; |
1103 | 0 | m[ 7] = d[ 4] - d[ 7]; |
1104 | 0 | m[ 8] = d[ 8] + d[11]; |
1105 | 0 | m[ 9] = d[ 9] + d[10]; |
1106 | 0 | m[10] = d[ 9] - d[10]; |
1107 | 0 | m[11] = d[ 8] - d[11]; |
1108 | 0 | m[12] = d[12] + d[15]; |
1109 | 0 | m[13] = d[13] + d[14]; |
1110 | 0 | m[14] = d[13] - d[14]; |
1111 | 0 | m[15] = d[12] - d[15]; |
1112 | |
|
1113 | 0 | d[ 0] = m[ 0] + m[ 1]; |
1114 | 0 | d[ 1] = m[ 0] - m[ 1]; |
1115 | 0 | d[ 2] = m[ 2] + m[ 3]; |
1116 | 0 | d[ 3] = m[ 3] - m[ 2]; |
1117 | 0 | d[ 4] = m[ 4] + m[ 5]; |
1118 | 0 | d[ 5] = m[ 4] - m[ 5]; |
1119 | 0 | d[ 6] = m[ 6] + m[ 7]; |
1120 | 0 | d[ 7] = m[ 7] - m[ 6]; |
1121 | 0 | d[ 8] = m[ 8] + m[ 9]; |
1122 | 0 | d[ 9] = m[ 8] - m[ 9]; |
1123 | 0 | d[10] = m[10] + m[11]; |
1124 | 0 | d[11] = m[11] - m[10]; |
1125 | 0 | d[12] = m[12] + m[13]; |
1126 | 0 | d[13] = m[12] - m[13]; |
1127 | 0 | d[14] = m[14] + m[15]; |
1128 | 0 | d[15] = m[15] - m[14]; |
1129 | |
|
1130 | 0 | for (k=0; k<16; ++k) |
1131 | 0 | { |
1132 | 0 | satd += abs(d[k]); |
1133 | 0 | } |
1134 | |
|
1135 | 0 | satd -= abs( d[0] ); |
1136 | 0 | satd += abs( d[0] ) >> 2; |
1137 | 0 | satd = ((satd+1)>>1); |
1138 | |
|
1139 | 0 | return satd; |
1140 | 0 | } |
1141 | | |
1142 | | static Distortion xCalcHADs16x16_fast( const Pel* piOrg, const Pel* piCur, int iStrideOrg, int iStrideCur ) |
1143 | 0 | { |
1144 | 0 | int k, i, j, jj; |
1145 | 0 | Distortion sad = 0; |
1146 | 0 | TCoeff diff[64], m1[8][8], m2[8][8], m3[8][8]; |
1147 | |
|
1148 | 0 | for( k = 0; k < 64; k += 8 ) |
1149 | 0 | { |
1150 | 0 | diff[k+0] = ( ( piOrg[ 0] + piOrg[ 0+1] + piOrg[ 0+iStrideOrg] + piOrg[ 0+1+iStrideOrg] + 2 ) >> 2 ) - ( ( piCur[ 0] + piCur[ 0+1] + piCur[ 0+iStrideCur] + piCur[ 0+1+iStrideCur] + 2 ) >> 2 ); |
1151 | 0 | diff[k+1] = ( ( piOrg[ 2] + piOrg[ 2+1] + piOrg[ 2+iStrideOrg] + piOrg[ 2+1+iStrideOrg] + 2 ) >> 2 ) - ( ( piCur[ 2] + piCur[ 2+1] + piCur[ 2+iStrideCur] + piCur[ 2+1+iStrideCur] + 2 ) >> 2 ); |
1152 | 0 | diff[k+2] = ( ( piOrg[ 4] + piOrg[ 4+1] + piOrg[ 4+iStrideOrg] + piOrg[ 4+1+iStrideOrg] + 2 ) >> 2 ) - ( ( piCur[ 4] + piCur[ 4+1] + piCur[ 4+iStrideCur] + piCur[ 4+1+iStrideCur] + 2 ) >> 2 ); |
1153 | 0 | diff[k+3] = ( ( piOrg[ 6] + piOrg[ 6+1] + piOrg[ 6+iStrideOrg] + piOrg[ 6+1+iStrideOrg] + 2 ) >> 2 ) - ( ( piCur[ 6] + piCur[ 6+1] + piCur[ 6+iStrideCur] + piCur[ 6+1+iStrideCur] + 2 ) >> 2 ); |
1154 | 0 | diff[k+4] = ( ( piOrg[ 8] + piOrg[ 8+1] + piOrg[ 8+iStrideOrg] + piOrg[ 8+1+iStrideOrg] + 2 ) >> 2 ) - ( ( piCur[ 8] + piCur[ 8+1] + piCur[ 8+iStrideCur] + piCur[ 8+1+iStrideCur] + 2 ) >> 2 ); |
1155 | 0 | diff[k+5] = ( ( piOrg[10] + piOrg[10+1] + piOrg[10+iStrideOrg] + piOrg[10+1+iStrideOrg] + 2 ) >> 2 ) - ( ( piCur[10] + piCur[10+1] + piCur[10+iStrideCur] + piCur[10+1+iStrideCur] + 2 ) >> 2 ); |
1156 | 0 | diff[k+6] = ( ( piOrg[12] + piOrg[12+1] + piOrg[12+iStrideOrg] + piOrg[12+1+iStrideOrg] + 2 ) >> 2 ) - ( ( piCur[12] + piCur[12+1] + piCur[12+iStrideCur] + piCur[12+1+iStrideCur] + 2 ) >> 2 ); |
1157 | 0 | diff[k+7] = ( ( piOrg[14] + piOrg[14+1] + piOrg[14+iStrideOrg] + piOrg[14+1+iStrideOrg] + 2 ) >> 2 ) - ( ( piCur[14] + piCur[14+1] + piCur[14+iStrideCur] + piCur[14+1+iStrideCur] + 2 ) >> 2 ); |
1158 | |
|
1159 | 0 | piCur += 2 * iStrideCur; |
1160 | 0 | piOrg += 2 * iStrideOrg; |
1161 | 0 | } |
1162 | | |
1163 | | //horizontal |
1164 | 0 | for (j=0; j < 8; j++) |
1165 | 0 | { |
1166 | 0 | jj = j << 3; |
1167 | 0 | m2[j][0] = diff[jj ] + diff[jj+4]; |
1168 | 0 | m2[j][1] = diff[jj+1] + diff[jj+5]; |
1169 | 0 | m2[j][2] = diff[jj+2] + diff[jj+6]; |
1170 | 0 | m2[j][3] = diff[jj+3] + diff[jj+7]; |
1171 | 0 | m2[j][4] = diff[jj ] - diff[jj+4]; |
1172 | 0 | m2[j][5] = diff[jj+1] - diff[jj+5]; |
1173 | 0 | m2[j][6] = diff[jj+2] - diff[jj+6]; |
1174 | 0 | m2[j][7] = diff[jj+3] - diff[jj+7]; |
1175 | |
|
1176 | 0 | m1[j][0] = m2[j][0] + m2[j][2]; |
1177 | 0 | m1[j][1] = m2[j][1] + m2[j][3]; |
1178 | 0 | m1[j][2] = m2[j][0] - m2[j][2]; |
1179 | 0 | m1[j][3] = m2[j][1] - m2[j][3]; |
1180 | 0 | m1[j][4] = m2[j][4] + m2[j][6]; |
1181 | 0 | m1[j][5] = m2[j][5] + m2[j][7]; |
1182 | 0 | m1[j][6] = m2[j][4] - m2[j][6]; |
1183 | 0 | m1[j][7] = m2[j][5] - m2[j][7]; |
1184 | |
|
1185 | 0 | m2[j][0] = m1[j][0] + m1[j][1]; |
1186 | 0 | m2[j][1] = m1[j][0] - m1[j][1]; |
1187 | 0 | m2[j][2] = m1[j][2] + m1[j][3]; |
1188 | 0 | m2[j][3] = m1[j][2] - m1[j][3]; |
1189 | 0 | m2[j][4] = m1[j][4] + m1[j][5]; |
1190 | 0 | m2[j][5] = m1[j][4] - m1[j][5]; |
1191 | 0 | m2[j][6] = m1[j][6] + m1[j][7]; |
1192 | 0 | m2[j][7] = m1[j][6] - m1[j][7]; |
1193 | 0 | } |
1194 | | |
1195 | | //vertical |
1196 | 0 | for (i=0; i < 8; i++) |
1197 | 0 | { |
1198 | 0 | m3[0][i] = m2[0][i] + m2[4][i]; |
1199 | 0 | m3[1][i] = m2[1][i] + m2[5][i]; |
1200 | 0 | m3[2][i] = m2[2][i] + m2[6][i]; |
1201 | 0 | m3[3][i] = m2[3][i] + m2[7][i]; |
1202 | 0 | m3[4][i] = m2[0][i] - m2[4][i]; |
1203 | 0 | m3[5][i] = m2[1][i] - m2[5][i]; |
1204 | 0 | m3[6][i] = m2[2][i] - m2[6][i]; |
1205 | 0 | m3[7][i] = m2[3][i] - m2[7][i]; |
1206 | |
|
1207 | 0 | m1[0][i] = m3[0][i] + m3[2][i]; |
1208 | 0 | m1[1][i] = m3[1][i] + m3[3][i]; |
1209 | 0 | m1[2][i] = m3[0][i] - m3[2][i]; |
1210 | 0 | m1[3][i] = m3[1][i] - m3[3][i]; |
1211 | 0 | m1[4][i] = m3[4][i] + m3[6][i]; |
1212 | 0 | m1[5][i] = m3[5][i] + m3[7][i]; |
1213 | 0 | m1[6][i] = m3[4][i] - m3[6][i]; |
1214 | 0 | m1[7][i] = m3[5][i] - m3[7][i]; |
1215 | |
|
1216 | 0 | m2[0][i] = m1[0][i] + m1[1][i]; |
1217 | 0 | m2[1][i] = m1[0][i] - m1[1][i]; |
1218 | 0 | m2[2][i] = m1[2][i] + m1[3][i]; |
1219 | 0 | m2[3][i] = m1[2][i] - m1[3][i]; |
1220 | 0 | m2[4][i] = m1[4][i] + m1[5][i]; |
1221 | 0 | m2[5][i] = m1[4][i] - m1[5][i]; |
1222 | 0 | m2[6][i] = m1[6][i] + m1[7][i]; |
1223 | 0 | m2[7][i] = m1[6][i] - m1[7][i]; |
1224 | 0 | } |
1225 | |
|
1226 | 0 | for (i = 0; i < 8; i++) |
1227 | 0 | { |
1228 | 0 | for (j = 0; j < 8; j++) |
1229 | 0 | { |
1230 | 0 | sad += abs(m2[i][j]); |
1231 | 0 | } |
1232 | 0 | } |
1233 | | |
1234 | 0 | sad -= abs( m2[0][0] ); |
1235 | 0 | sad += abs( m2[0][0] ) >> 2; |
1236 | 0 | sad=((sad+2)>>2); |
1237 | |
|
1238 | 0 | return (sad << 2); |
1239 | 0 | } |
1240 | | |
1241 | | static Distortion xCalcHADs8x8( const Pel* piOrg, const Pel* piCur, int iStrideOrg, int iStrideCur ) |
1242 | 0 | { |
1243 | 0 | int k, i, j, jj; |
1244 | 0 | Distortion sad = 0; |
1245 | 0 | TCoeff diff[64], m1[8][8], m2[8][8], m3[8][8]; |
1246 | |
|
1247 | 0 | for( k = 0; k < 64; k += 8 ) |
1248 | 0 | { |
1249 | 0 | diff[k+0] = piOrg[0] - piCur[0]; |
1250 | 0 | diff[k+1] = piOrg[1] - piCur[1]; |
1251 | 0 | diff[k+2] = piOrg[2] - piCur[2]; |
1252 | 0 | diff[k+3] = piOrg[3] - piCur[3]; |
1253 | 0 | diff[k+4] = piOrg[4] - piCur[4]; |
1254 | 0 | diff[k+5] = piOrg[5] - piCur[5]; |
1255 | 0 | diff[k+6] = piOrg[6] - piCur[6]; |
1256 | 0 | diff[k+7] = piOrg[7] - piCur[7]; |
1257 | |
|
1258 | 0 | piCur += iStrideCur; |
1259 | 0 | piOrg += iStrideOrg; |
1260 | 0 | } |
1261 | | |
1262 | | //horizontal |
1263 | 0 | for (j=0; j < 8; j++) |
1264 | 0 | { |
1265 | 0 | jj = j << 3; |
1266 | 0 | m2[j][0] = diff[jj ] + diff[jj+4]; |
1267 | 0 | m2[j][1] = diff[jj+1] + diff[jj+5]; |
1268 | 0 | m2[j][2] = diff[jj+2] + diff[jj+6]; |
1269 | 0 | m2[j][3] = diff[jj+3] + diff[jj+7]; |
1270 | 0 | m2[j][4] = diff[jj ] - diff[jj+4]; |
1271 | 0 | m2[j][5] = diff[jj+1] - diff[jj+5]; |
1272 | 0 | m2[j][6] = diff[jj+2] - diff[jj+6]; |
1273 | 0 | m2[j][7] = diff[jj+3] - diff[jj+7]; |
1274 | |
|
1275 | 0 | m1[j][0] = m2[j][0] + m2[j][2]; |
1276 | 0 | m1[j][1] = m2[j][1] + m2[j][3]; |
1277 | 0 | m1[j][2] = m2[j][0] - m2[j][2]; |
1278 | 0 | m1[j][3] = m2[j][1] - m2[j][3]; |
1279 | 0 | m1[j][4] = m2[j][4] + m2[j][6]; |
1280 | 0 | m1[j][5] = m2[j][5] + m2[j][7]; |
1281 | 0 | m1[j][6] = m2[j][4] - m2[j][6]; |
1282 | 0 | m1[j][7] = m2[j][5] - m2[j][7]; |
1283 | |
|
1284 | 0 | m2[j][0] = m1[j][0] + m1[j][1]; |
1285 | 0 | m2[j][1] = m1[j][0] - m1[j][1]; |
1286 | 0 | m2[j][2] = m1[j][2] + m1[j][3]; |
1287 | 0 | m2[j][3] = m1[j][2] - m1[j][3]; |
1288 | 0 | m2[j][4] = m1[j][4] + m1[j][5]; |
1289 | 0 | m2[j][5] = m1[j][4] - m1[j][5]; |
1290 | 0 | m2[j][6] = m1[j][6] + m1[j][7]; |
1291 | 0 | m2[j][7] = m1[j][6] - m1[j][7]; |
1292 | 0 | } |
1293 | | |
1294 | | //vertical |
1295 | 0 | for (i=0; i < 8; i++) |
1296 | 0 | { |
1297 | 0 | m3[0][i] = m2[0][i] + m2[4][i]; |
1298 | 0 | m3[1][i] = m2[1][i] + m2[5][i]; |
1299 | 0 | m3[2][i] = m2[2][i] + m2[6][i]; |
1300 | 0 | m3[3][i] = m2[3][i] + m2[7][i]; |
1301 | 0 | m3[4][i] = m2[0][i] - m2[4][i]; |
1302 | 0 | m3[5][i] = m2[1][i] - m2[5][i]; |
1303 | 0 | m3[6][i] = m2[2][i] - m2[6][i]; |
1304 | 0 | m3[7][i] = m2[3][i] - m2[7][i]; |
1305 | |
|
1306 | 0 | m1[0][i] = m3[0][i] + m3[2][i]; |
1307 | 0 | m1[1][i] = m3[1][i] + m3[3][i]; |
1308 | 0 | m1[2][i] = m3[0][i] - m3[2][i]; |
1309 | 0 | m1[3][i] = m3[1][i] - m3[3][i]; |
1310 | 0 | m1[4][i] = m3[4][i] + m3[6][i]; |
1311 | 0 | m1[5][i] = m3[5][i] + m3[7][i]; |
1312 | 0 | m1[6][i] = m3[4][i] - m3[6][i]; |
1313 | 0 | m1[7][i] = m3[5][i] - m3[7][i]; |
1314 | |
|
1315 | 0 | m2[0][i] = m1[0][i] + m1[1][i]; |
1316 | 0 | m2[1][i] = m1[0][i] - m1[1][i]; |
1317 | 0 | m2[2][i] = m1[2][i] + m1[3][i]; |
1318 | 0 | m2[3][i] = m1[2][i] - m1[3][i]; |
1319 | 0 | m2[4][i] = m1[4][i] + m1[5][i]; |
1320 | 0 | m2[5][i] = m1[4][i] - m1[5][i]; |
1321 | 0 | m2[6][i] = m1[6][i] + m1[7][i]; |
1322 | 0 | m2[7][i] = m1[6][i] - m1[7][i]; |
1323 | 0 | } |
1324 | |
|
1325 | 0 | for (i = 0; i < 8; i++) |
1326 | 0 | { |
1327 | 0 | for (j = 0; j < 8; j++) |
1328 | 0 | { |
1329 | 0 | sad += abs(m2[i][j]); |
1330 | 0 | } |
1331 | 0 | } |
1332 | | |
1333 | 0 | sad -= abs( m2[0][0] ); |
1334 | 0 | sad += abs( m2[0][0] ) >> 2; |
1335 | 0 | sad=((sad+2)>>2); |
1336 | |
|
1337 | 0 | return sad; |
1338 | 0 | } |
1339 | | |
1340 | | static Distortion xCalcHADs16x8( const Pel* piOrg, const Pel* piCur, int iStrideOrg, int iStrideCur ) |
1341 | 0 | { //need to add SIMD implementation ,JCA |
1342 | 0 | int k, i, j, jj, sad = 0; |
1343 | 0 | int diff[128], m1[8][16], m2[8][16]; |
1344 | 0 | for( k = 0; k < 128; k += 16 ) |
1345 | 0 | { |
1346 | 0 | diff[k + 0] = piOrg[0] - piCur[0]; |
1347 | 0 | diff[k + 1] = piOrg[1] - piCur[1]; |
1348 | 0 | diff[k + 2] = piOrg[2] - piCur[2]; |
1349 | 0 | diff[k + 3] = piOrg[3] - piCur[3]; |
1350 | 0 | diff[k + 4] = piOrg[4] - piCur[4]; |
1351 | 0 | diff[k + 5] = piOrg[5] - piCur[5]; |
1352 | 0 | diff[k + 6] = piOrg[6] - piCur[6]; |
1353 | 0 | diff[k + 7] = piOrg[7] - piCur[7]; |
1354 | |
|
1355 | 0 | diff[k + 8] = piOrg[8] - piCur[8]; |
1356 | 0 | diff[k + 9] = piOrg[9] - piCur[9]; |
1357 | 0 | diff[k + 10] = piOrg[10] - piCur[10]; |
1358 | 0 | diff[k + 11] = piOrg[11] - piCur[11]; |
1359 | 0 | diff[k + 12] = piOrg[12] - piCur[12]; |
1360 | 0 | diff[k + 13] = piOrg[13] - piCur[13]; |
1361 | 0 | diff[k + 14] = piOrg[14] - piCur[14]; |
1362 | 0 | diff[k + 15] = piOrg[15] - piCur[15]; |
1363 | |
|
1364 | 0 | piCur += iStrideCur; |
1365 | 0 | piOrg += iStrideOrg; |
1366 | 0 | } |
1367 | | |
1368 | | //horizontal |
1369 | 0 | for( j = 0; j < 8; j++ ) |
1370 | 0 | { |
1371 | 0 | jj = j << 4; |
1372 | |
|
1373 | 0 | m2[j][0] = diff[jj ] + diff[jj + 8]; |
1374 | 0 | m2[j][1] = diff[jj + 1] + diff[jj + 9]; |
1375 | 0 | m2[j][2] = diff[jj + 2] + diff[jj + 10]; |
1376 | 0 | m2[j][3] = diff[jj + 3] + diff[jj + 11]; |
1377 | 0 | m2[j][4] = diff[jj + 4] + diff[jj + 12]; |
1378 | 0 | m2[j][5] = diff[jj + 5] + diff[jj + 13]; |
1379 | 0 | m2[j][6] = diff[jj + 6] + diff[jj + 14]; |
1380 | 0 | m2[j][7] = diff[jj + 7] + diff[jj + 15]; |
1381 | 0 | m2[j][8] = diff[jj ] - diff[jj + 8]; |
1382 | 0 | m2[j][9] = diff[jj + 1] - diff[jj + 9]; |
1383 | 0 | m2[j][10] = diff[jj + 2] - diff[jj + 10]; |
1384 | 0 | m2[j][11] = diff[jj + 3] - diff[jj + 11]; |
1385 | 0 | m2[j][12] = diff[jj + 4] - diff[jj + 12]; |
1386 | 0 | m2[j][13] = diff[jj + 5] - diff[jj + 13]; |
1387 | 0 | m2[j][14] = diff[jj + 6] - diff[jj + 14]; |
1388 | 0 | m2[j][15] = diff[jj + 7] - diff[jj + 15]; |
1389 | |
|
1390 | 0 | m1[j][0] = m2[j][0] + m2[j][4]; |
1391 | 0 | m1[j][1] = m2[j][1] + m2[j][5]; |
1392 | 0 | m1[j][2] = m2[j][2] + m2[j][6]; |
1393 | 0 | m1[j][3] = m2[j][3] + m2[j][7]; |
1394 | 0 | m1[j][4] = m2[j][0] - m2[j][4]; |
1395 | 0 | m1[j][5] = m2[j][1] - m2[j][5]; |
1396 | 0 | m1[j][6] = m2[j][2] - m2[j][6]; |
1397 | 0 | m1[j][7] = m2[j][3] - m2[j][7]; |
1398 | 0 | m1[j][8] = m2[j][8] + m2[j][12]; |
1399 | 0 | m1[j][9] = m2[j][9] + m2[j][13]; |
1400 | 0 | m1[j][10] = m2[j][10] + m2[j][14]; |
1401 | 0 | m1[j][11] = m2[j][11] + m2[j][15]; |
1402 | 0 | m1[j][12] = m2[j][8] - m2[j][12]; |
1403 | 0 | m1[j][13] = m2[j][9] - m2[j][13]; |
1404 | 0 | m1[j][14] = m2[j][10] - m2[j][14]; |
1405 | 0 | m1[j][15] = m2[j][11] - m2[j][15]; |
1406 | |
|
1407 | 0 | m2[j][0] = m1[j][0] + m1[j][2]; |
1408 | 0 | m2[j][1] = m1[j][1] + m1[j][3]; |
1409 | 0 | m2[j][2] = m1[j][0] - m1[j][2]; |
1410 | 0 | m2[j][3] = m1[j][1] - m1[j][3]; |
1411 | 0 | m2[j][4] = m1[j][4] + m1[j][6]; |
1412 | 0 | m2[j][5] = m1[j][5] + m1[j][7]; |
1413 | 0 | m2[j][6] = m1[j][4] - m1[j][6]; |
1414 | 0 | m2[j][7] = m1[j][5] - m1[j][7]; |
1415 | 0 | m2[j][8] = m1[j][8] + m1[j][10]; |
1416 | 0 | m2[j][9] = m1[j][9] + m1[j][11]; |
1417 | 0 | m2[j][10] = m1[j][8] - m1[j][10]; |
1418 | 0 | m2[j][11] = m1[j][9] - m1[j][11]; |
1419 | 0 | m2[j][12] = m1[j][12] + m1[j][14]; |
1420 | 0 | m2[j][13] = m1[j][13] + m1[j][15]; |
1421 | 0 | m2[j][14] = m1[j][12] - m1[j][14]; |
1422 | 0 | m2[j][15] = m1[j][13] - m1[j][15]; |
1423 | |
|
1424 | 0 | m1[j][0] = m2[j][0] + m2[j][1]; |
1425 | 0 | m1[j][1] = m2[j][0] - m2[j][1]; |
1426 | 0 | m1[j][2] = m2[j][2] + m2[j][3]; |
1427 | 0 | m1[j][3] = m2[j][2] - m2[j][3]; |
1428 | 0 | m1[j][4] = m2[j][4] + m2[j][5]; |
1429 | 0 | m1[j][5] = m2[j][4] - m2[j][5]; |
1430 | 0 | m1[j][6] = m2[j][6] + m2[j][7]; |
1431 | 0 | m1[j][7] = m2[j][6] - m2[j][7]; |
1432 | 0 | m1[j][8] = m2[j][8] + m2[j][9]; |
1433 | 0 | m1[j][9] = m2[j][8] - m2[j][9]; |
1434 | 0 | m1[j][10] = m2[j][10] + m2[j][11]; |
1435 | 0 | m1[j][11] = m2[j][10] - m2[j][11]; |
1436 | 0 | m1[j][12] = m2[j][12] + m2[j][13]; |
1437 | 0 | m1[j][13] = m2[j][12] - m2[j][13]; |
1438 | 0 | m1[j][14] = m2[j][14] + m2[j][15]; |
1439 | 0 | m1[j][15] = m2[j][14] - m2[j][15]; |
1440 | 0 | } |
1441 | | |
1442 | | //vertical |
1443 | 0 | for( i = 0; i < 16; i++ ) |
1444 | 0 | { |
1445 | 0 | m2[0][i] = m1[0][i] + m1[4][i]; |
1446 | 0 | m2[1][i] = m1[1][i] + m1[5][i]; |
1447 | 0 | m2[2][i] = m1[2][i] + m1[6][i]; |
1448 | 0 | m2[3][i] = m1[3][i] + m1[7][i]; |
1449 | 0 | m2[4][i] = m1[0][i] - m1[4][i]; |
1450 | 0 | m2[5][i] = m1[1][i] - m1[5][i]; |
1451 | 0 | m2[6][i] = m1[2][i] - m1[6][i]; |
1452 | 0 | m2[7][i] = m1[3][i] - m1[7][i]; |
1453 | |
|
1454 | 0 | m1[0][i] = m2[0][i] + m2[2][i]; |
1455 | 0 | m1[1][i] = m2[1][i] + m2[3][i]; |
1456 | 0 | m1[2][i] = m2[0][i] - m2[2][i]; |
1457 | 0 | m1[3][i] = m2[1][i] - m2[3][i]; |
1458 | 0 | m1[4][i] = m2[4][i] + m2[6][i]; |
1459 | 0 | m1[5][i] = m2[5][i] + m2[7][i]; |
1460 | 0 | m1[6][i] = m2[4][i] - m2[6][i]; |
1461 | 0 | m1[7][i] = m2[5][i] - m2[7][i]; |
1462 | |
|
1463 | 0 | m2[0][i] = m1[0][i] + m1[1][i]; |
1464 | 0 | m2[1][i] = m1[0][i] - m1[1][i]; |
1465 | 0 | m2[2][i] = m1[2][i] + m1[3][i]; |
1466 | 0 | m2[3][i] = m1[2][i] - m1[3][i]; |
1467 | 0 | m2[4][i] = m1[4][i] + m1[5][i]; |
1468 | 0 | m2[5][i] = m1[4][i] - m1[5][i]; |
1469 | 0 | m2[6][i] = m1[6][i] + m1[7][i]; |
1470 | 0 | m2[7][i] = m1[6][i] - m1[7][i]; |
1471 | 0 | } |
1472 | |
|
1473 | 0 | for( i = 0; i < 8; i++ ) |
1474 | 0 | { |
1475 | 0 | for( j = 0; j < 16; j++ ) |
1476 | 0 | { |
1477 | 0 | sad += abs( m2[i][j] ); |
1478 | 0 | } |
1479 | 0 | } |
1480 | | |
1481 | 0 | sad -= abs( m2[0][0] ); |
1482 | 0 | sad += abs( m2[0][0] ) >> 2; |
1483 | 0 | sad = ( int ) ( sad / sqrt( 16.0 * 8 ) * 2 ); |
1484 | |
|
1485 | 0 | return sad; |
1486 | 0 | } |
1487 | | |
1488 | | static Distortion xCalcHADs8x16( const Pel* piOrg, const Pel* piCur, int iStrideOrg, int iStrideCur ) |
1489 | 0 | { |
1490 | 0 | int k, i, j, jj, sad = 0; |
1491 | 0 | int diff[128], m1[16][8], m2[16][8]; |
1492 | 0 | for( k = 0; k < 128; k += 8 ) |
1493 | 0 | { |
1494 | 0 | diff[k + 0] = piOrg[0] - piCur[0]; |
1495 | 0 | diff[k + 1] = piOrg[1] - piCur[1]; |
1496 | 0 | diff[k + 2] = piOrg[2] - piCur[2]; |
1497 | 0 | diff[k + 3] = piOrg[3] - piCur[3]; |
1498 | 0 | diff[k + 4] = piOrg[4] - piCur[4]; |
1499 | 0 | diff[k + 5] = piOrg[5] - piCur[5]; |
1500 | 0 | diff[k + 6] = piOrg[6] - piCur[6]; |
1501 | 0 | diff[k + 7] = piOrg[7] - piCur[7]; |
1502 | |
|
1503 | 0 | piCur += iStrideCur; |
1504 | 0 | piOrg += iStrideOrg; |
1505 | 0 | } |
1506 | | |
1507 | | //horizontal |
1508 | 0 | for( j = 0; j < 16; j++ ) |
1509 | 0 | { |
1510 | 0 | jj = j << 3; |
1511 | |
|
1512 | 0 | m2[j][0] = diff[jj] + diff[jj + 4]; |
1513 | 0 | m2[j][1] = diff[jj + 1] + diff[jj + 5]; |
1514 | 0 | m2[j][2] = diff[jj + 2] + diff[jj + 6]; |
1515 | 0 | m2[j][3] = diff[jj + 3] + diff[jj + 7]; |
1516 | 0 | m2[j][4] = diff[jj] - diff[jj + 4]; |
1517 | 0 | m2[j][5] = diff[jj + 1] - diff[jj + 5]; |
1518 | 0 | m2[j][6] = diff[jj + 2] - diff[jj + 6]; |
1519 | 0 | m2[j][7] = diff[jj + 3] - diff[jj + 7]; |
1520 | |
|
1521 | 0 | m1[j][0] = m2[j][0] + m2[j][2]; |
1522 | 0 | m1[j][1] = m2[j][1] + m2[j][3]; |
1523 | 0 | m1[j][2] = m2[j][0] - m2[j][2]; |
1524 | 0 | m1[j][3] = m2[j][1] - m2[j][3]; |
1525 | 0 | m1[j][4] = m2[j][4] + m2[j][6]; |
1526 | 0 | m1[j][5] = m2[j][5] + m2[j][7]; |
1527 | 0 | m1[j][6] = m2[j][4] - m2[j][6]; |
1528 | 0 | m1[j][7] = m2[j][5] - m2[j][7]; |
1529 | |
|
1530 | 0 | m2[j][0] = m1[j][0] + m1[j][1]; |
1531 | 0 | m2[j][1] = m1[j][0] - m1[j][1]; |
1532 | 0 | m2[j][2] = m1[j][2] + m1[j][3]; |
1533 | 0 | m2[j][3] = m1[j][2] - m1[j][3]; |
1534 | 0 | m2[j][4] = m1[j][4] + m1[j][5]; |
1535 | 0 | m2[j][5] = m1[j][4] - m1[j][5]; |
1536 | 0 | m2[j][6] = m1[j][6] + m1[j][7]; |
1537 | 0 | m2[j][7] = m1[j][6] - m1[j][7]; |
1538 | 0 | } |
1539 | | |
1540 | | //vertical |
1541 | 0 | for( i = 0; i < 8; i++ ) |
1542 | 0 | { |
1543 | 0 | m1[0][i] = m2[0][i] + m2[8][i]; |
1544 | 0 | m1[1][i] = m2[1][i] + m2[9][i]; |
1545 | 0 | m1[2][i] = m2[2][i] + m2[10][i]; |
1546 | 0 | m1[3][i] = m2[3][i] + m2[11][i]; |
1547 | 0 | m1[4][i] = m2[4][i] + m2[12][i]; |
1548 | 0 | m1[5][i] = m2[5][i] + m2[13][i]; |
1549 | 0 | m1[6][i] = m2[6][i] + m2[14][i]; |
1550 | 0 | m1[7][i] = m2[7][i] + m2[15][i]; |
1551 | 0 | m1[8][i] = m2[0][i] - m2[8][i]; |
1552 | 0 | m1[9][i] = m2[1][i] - m2[9][i]; |
1553 | 0 | m1[10][i] = m2[2][i] - m2[10][i]; |
1554 | 0 | m1[11][i] = m2[3][i] - m2[11][i]; |
1555 | 0 | m1[12][i] = m2[4][i] - m2[12][i]; |
1556 | 0 | m1[13][i] = m2[5][i] - m2[13][i]; |
1557 | 0 | m1[14][i] = m2[6][i] - m2[14][i]; |
1558 | 0 | m1[15][i] = m2[7][i] - m2[15][i]; |
1559 | |
|
1560 | 0 | m2[0][i] = m1[0][i] + m1[4][i]; |
1561 | 0 | m2[1][i] = m1[1][i] + m1[5][i]; |
1562 | 0 | m2[2][i] = m1[2][i] + m1[6][i]; |
1563 | 0 | m2[3][i] = m1[3][i] + m1[7][i]; |
1564 | 0 | m2[4][i] = m1[0][i] - m1[4][i]; |
1565 | 0 | m2[5][i] = m1[1][i] - m1[5][i]; |
1566 | 0 | m2[6][i] = m1[2][i] - m1[6][i]; |
1567 | 0 | m2[7][i] = m1[3][i] - m1[7][i]; |
1568 | 0 | m2[8][i] = m1[8][i] + m1[12][i]; |
1569 | 0 | m2[9][i] = m1[9][i] + m1[13][i]; |
1570 | 0 | m2[10][i] = m1[10][i] + m1[14][i]; |
1571 | 0 | m2[11][i] = m1[11][i] + m1[15][i]; |
1572 | 0 | m2[12][i] = m1[8][i] - m1[12][i]; |
1573 | 0 | m2[13][i] = m1[9][i] - m1[13][i]; |
1574 | 0 | m2[14][i] = m1[10][i] - m1[14][i]; |
1575 | 0 | m2[15][i] = m1[11][i] - m1[15][i]; |
1576 | |
|
1577 | 0 | m1[0][i] = m2[0][i] + m2[2][i]; |
1578 | 0 | m1[1][i] = m2[1][i] + m2[3][i]; |
1579 | 0 | m1[2][i] = m2[0][i] - m2[2][i]; |
1580 | 0 | m1[3][i] = m2[1][i] - m2[3][i]; |
1581 | 0 | m1[4][i] = m2[4][i] + m2[6][i]; |
1582 | 0 | m1[5][i] = m2[5][i] + m2[7][i]; |
1583 | 0 | m1[6][i] = m2[4][i] - m2[6][i]; |
1584 | 0 | m1[7][i] = m2[5][i] - m2[7][i]; |
1585 | 0 | m1[8][i] = m2[8][i] + m2[10][i]; |
1586 | 0 | m1[9][i] = m2[9][i] + m2[11][i]; |
1587 | 0 | m1[10][i] = m2[8][i] - m2[10][i]; |
1588 | 0 | m1[11][i] = m2[9][i] - m2[11][i]; |
1589 | 0 | m1[12][i] = m2[12][i] + m2[14][i]; |
1590 | 0 | m1[13][i] = m2[13][i] + m2[15][i]; |
1591 | 0 | m1[14][i] = m2[12][i] - m2[14][i]; |
1592 | 0 | m1[15][i] = m2[13][i] - m2[15][i]; |
1593 | |
|
1594 | 0 | m2[0][i] = m1[0][i] + m1[1][i]; |
1595 | 0 | m2[1][i] = m1[0][i] - m1[1][i]; |
1596 | 0 | m2[2][i] = m1[2][i] + m1[3][i]; |
1597 | 0 | m2[3][i] = m1[2][i] - m1[3][i]; |
1598 | 0 | m2[4][i] = m1[4][i] + m1[5][i]; |
1599 | 0 | m2[5][i] = m1[4][i] - m1[5][i]; |
1600 | 0 | m2[6][i] = m1[6][i] + m1[7][i]; |
1601 | 0 | m2[7][i] = m1[6][i] - m1[7][i]; |
1602 | 0 | m2[8][i] = m1[8][i] + m1[9][i]; |
1603 | 0 | m2[9][i] = m1[8][i] - m1[9][i]; |
1604 | 0 | m2[10][i] = m1[10][i] + m1[11][i]; |
1605 | 0 | m2[11][i] = m1[10][i] - m1[11][i]; |
1606 | 0 | m2[12][i] = m1[12][i] + m1[13][i]; |
1607 | 0 | m2[13][i] = m1[12][i] - m1[13][i]; |
1608 | 0 | m2[14][i] = m1[14][i] + m1[15][i]; |
1609 | 0 | m2[15][i] = m1[14][i] - m1[15][i]; |
1610 | 0 | } |
1611 | |
|
1612 | 0 | for( i = 0; i < 16; i++ ) |
1613 | 0 | { |
1614 | 0 | for( j = 0; j < 8; j++ ) |
1615 | 0 | { |
1616 | 0 | sad += abs( m2[i][j] ); |
1617 | 0 | } |
1618 | 0 | } |
1619 | | |
1620 | 0 | sad -= abs( m2[0][0] ); |
1621 | 0 | sad += abs( m2[0][0] ) >> 2; |
1622 | 0 | sad = ( int ) ( sad / sqrt( 16.0 * 8 ) * 2 ); |
1623 | |
|
1624 | 0 | return sad; |
1625 | 0 | } |
1626 | | |
1627 | | static Distortion xCalcHADs4x8( const Pel* piOrg, const Pel* piCur, int iStrideOrg, int iStrideCur ) |
1628 | 0 | { |
1629 | 0 | int k, i, j, jj, sad = 0; |
1630 | 0 | int diff[32], m1[8][4], m2[8][4]; |
1631 | 0 | for( k = 0; k < 32; k += 4 ) |
1632 | 0 | { |
1633 | 0 | diff[k + 0] = piOrg[0] - piCur[0]; |
1634 | 0 | diff[k + 1] = piOrg[1] - piCur[1]; |
1635 | 0 | diff[k + 2] = piOrg[2] - piCur[2]; |
1636 | 0 | diff[k + 3] = piOrg[3] - piCur[3]; |
1637 | |
|
1638 | 0 | piCur += iStrideCur; |
1639 | 0 | piOrg += iStrideOrg; |
1640 | 0 | } |
1641 | | |
1642 | | //horizontal |
1643 | 0 | for( j = 0; j < 8; j++ ) |
1644 | 0 | { |
1645 | 0 | jj = j << 2; |
1646 | 0 | m2[j][0] = diff[jj] + diff[jj + 2]; |
1647 | 0 | m2[j][1] = diff[jj + 1] + diff[jj + 3]; |
1648 | 0 | m2[j][2] = diff[jj] - diff[jj + 2]; |
1649 | 0 | m2[j][3] = diff[jj + 1] - diff[jj + 3]; |
1650 | |
|
1651 | 0 | m1[j][0] = m2[j][0] + m2[j][1]; |
1652 | 0 | m1[j][1] = m2[j][0] - m2[j][1]; |
1653 | 0 | m1[j][2] = m2[j][2] + m2[j][3]; |
1654 | 0 | m1[j][3] = m2[j][2] - m2[j][3]; |
1655 | 0 | } |
1656 | | |
1657 | | //vertical |
1658 | 0 | for( i = 0; i < 4; i++ ) |
1659 | 0 | { |
1660 | 0 | m2[0][i] = m1[0][i] + m1[4][i]; |
1661 | 0 | m2[1][i] = m1[1][i] + m1[5][i]; |
1662 | 0 | m2[2][i] = m1[2][i] + m1[6][i]; |
1663 | 0 | m2[3][i] = m1[3][i] + m1[7][i]; |
1664 | 0 | m2[4][i] = m1[0][i] - m1[4][i]; |
1665 | 0 | m2[5][i] = m1[1][i] - m1[5][i]; |
1666 | 0 | m2[6][i] = m1[2][i] - m1[6][i]; |
1667 | 0 | m2[7][i] = m1[3][i] - m1[7][i]; |
1668 | |
|
1669 | 0 | m1[0][i] = m2[0][i] + m2[2][i]; |
1670 | 0 | m1[1][i] = m2[1][i] + m2[3][i]; |
1671 | 0 | m1[2][i] = m2[0][i] - m2[2][i]; |
1672 | 0 | m1[3][i] = m2[1][i] - m2[3][i]; |
1673 | 0 | m1[4][i] = m2[4][i] + m2[6][i]; |
1674 | 0 | m1[5][i] = m2[5][i] + m2[7][i]; |
1675 | 0 | m1[6][i] = m2[4][i] - m2[6][i]; |
1676 | 0 | m1[7][i] = m2[5][i] - m2[7][i]; |
1677 | |
|
1678 | 0 | m2[0][i] = m1[0][i] + m1[1][i]; |
1679 | 0 | m2[1][i] = m1[0][i] - m1[1][i]; |
1680 | 0 | m2[2][i] = m1[2][i] + m1[3][i]; |
1681 | 0 | m2[3][i] = m1[2][i] - m1[3][i]; |
1682 | 0 | m2[4][i] = m1[4][i] + m1[5][i]; |
1683 | 0 | m2[5][i] = m1[4][i] - m1[5][i]; |
1684 | 0 | m2[6][i] = m1[6][i] + m1[7][i]; |
1685 | 0 | m2[7][i] = m1[6][i] - m1[7][i]; |
1686 | 0 | } |
1687 | |
|
1688 | 0 | for( i = 0; i < 8; i++ ) |
1689 | 0 | { |
1690 | 0 | for( j = 0; j < 4; j++ ) |
1691 | 0 | { |
1692 | 0 | sad += abs( m2[i][j] ); |
1693 | 0 | } |
1694 | 0 | } |
1695 | | |
1696 | 0 | sad -= abs( m2[0][0] ); |
1697 | 0 | sad += abs( m2[0][0] ) >> 2; |
1698 | 0 | sad = ( int ) ( sad / sqrt( 4.0 * 8 ) * 2 ); |
1699 | |
|
1700 | 0 | return sad; |
1701 | 0 | } |
1702 | | |
1703 | | static Distortion xCalcHADs8x4( const Pel* piOrg, const Pel* piCur, int iStrideOrg, int iStrideCur ) |
1704 | 0 | { |
1705 | 0 | int k, i, j, jj, sad = 0; |
1706 | 0 | int diff[32], m1[4][8], m2[4][8]; |
1707 | 0 | for( k = 0; k < 32; k += 8 ) |
1708 | 0 | { |
1709 | 0 | diff[k + 0] = piOrg[0] - piCur[0]; |
1710 | 0 | diff[k + 1] = piOrg[1] - piCur[1]; |
1711 | 0 | diff[k + 2] = piOrg[2] - piCur[2]; |
1712 | 0 | diff[k + 3] = piOrg[3] - piCur[3]; |
1713 | 0 | diff[k + 4] = piOrg[4] - piCur[4]; |
1714 | 0 | diff[k + 5] = piOrg[5] - piCur[5]; |
1715 | 0 | diff[k + 6] = piOrg[6] - piCur[6]; |
1716 | 0 | diff[k + 7] = piOrg[7] - piCur[7]; |
1717 | |
|
1718 | 0 | piCur += iStrideCur; |
1719 | 0 | piOrg += iStrideOrg; |
1720 | 0 | } |
1721 | | |
1722 | | //horizontal |
1723 | 0 | for( j = 0; j < 4; j++ ) |
1724 | 0 | { |
1725 | 0 | jj = j << 3; |
1726 | |
|
1727 | 0 | m2[j][0] = diff[jj] + diff[jj + 4]; |
1728 | 0 | m2[j][1] = diff[jj + 1] + diff[jj + 5]; |
1729 | 0 | m2[j][2] = diff[jj + 2] + diff[jj + 6]; |
1730 | 0 | m2[j][3] = diff[jj + 3] + diff[jj + 7]; |
1731 | 0 | m2[j][4] = diff[jj] - diff[jj + 4]; |
1732 | 0 | m2[j][5] = diff[jj + 1] - diff[jj + 5]; |
1733 | 0 | m2[j][6] = diff[jj + 2] - diff[jj + 6]; |
1734 | 0 | m2[j][7] = diff[jj + 3] - diff[jj + 7]; |
1735 | |
|
1736 | 0 | m1[j][0] = m2[j][0] + m2[j][2]; |
1737 | 0 | m1[j][1] = m2[j][1] + m2[j][3]; |
1738 | 0 | m1[j][2] = m2[j][0] - m2[j][2]; |
1739 | 0 | m1[j][3] = m2[j][1] - m2[j][3]; |
1740 | 0 | m1[j][4] = m2[j][4] + m2[j][6]; |
1741 | 0 | m1[j][5] = m2[j][5] + m2[j][7]; |
1742 | 0 | m1[j][6] = m2[j][4] - m2[j][6]; |
1743 | 0 | m1[j][7] = m2[j][5] - m2[j][7]; |
1744 | |
|
1745 | 0 | m2[j][0] = m1[j][0] + m1[j][1]; |
1746 | 0 | m2[j][1] = m1[j][0] - m1[j][1]; |
1747 | 0 | m2[j][2] = m1[j][2] + m1[j][3]; |
1748 | 0 | m2[j][3] = m1[j][2] - m1[j][3]; |
1749 | 0 | m2[j][4] = m1[j][4] + m1[j][5]; |
1750 | 0 | m2[j][5] = m1[j][4] - m1[j][5]; |
1751 | 0 | m2[j][6] = m1[j][6] + m1[j][7]; |
1752 | 0 | m2[j][7] = m1[j][6] - m1[j][7]; |
1753 | 0 | } |
1754 | | |
1755 | | //vertical |
1756 | 0 | for( i = 0; i < 8; i++ ) |
1757 | 0 | { |
1758 | 0 | m1[0][i] = m2[0][i] + m2[2][i]; |
1759 | 0 | m1[1][i] = m2[1][i] + m2[3][i]; |
1760 | 0 | m1[2][i] = m2[0][i] - m2[2][i]; |
1761 | 0 | m1[3][i] = m2[1][i] - m2[3][i]; |
1762 | |
|
1763 | 0 | m2[0][i] = m1[0][i] + m1[1][i]; |
1764 | 0 | m2[1][i] = m1[0][i] - m1[1][i]; |
1765 | 0 | m2[2][i] = m1[2][i] + m1[3][i]; |
1766 | 0 | m2[3][i] = m1[2][i] - m1[3][i]; |
1767 | 0 | } |
1768 | |
|
1769 | 0 | for( i = 0; i < 4; i++ ) |
1770 | 0 | { |
1771 | 0 | for( j = 0; j < 8; j++ ) |
1772 | 0 | { |
1773 | 0 | sad += abs( m2[i][j] ); |
1774 | 0 | } |
1775 | 0 | } |
1776 | | |
1777 | 0 | sad -= abs( m2[0][0] ); |
1778 | 0 | sad += abs( m2[0][0] ) >> 2; |
1779 | 0 | sad = ( int ) ( sad / sqrt( 4.0 * 8 ) * 2 ); |
1780 | |
|
1781 | 0 | return sad; |
1782 | 0 | } |
1783 | | |
1784 | | Distortion RdCost::xGetHAD2SADs( const DistParam &rcDtParam ) |
1785 | 0 | { |
1786 | 0 | if( rcDtParam.applyWeight ) |
1787 | 0 | { |
1788 | 0 | THROW(" no support"); |
1789 | 0 | } |
1790 | | |
1791 | 0 | Distortion distHad = xGetHADs<false>( rcDtParam ); |
1792 | 0 | Distortion distSad = 0; |
1793 | 0 | { |
1794 | 0 | CHECKD( (rcDtParam.org.width != rcDtParam.org.stride) || (rcDtParam.cur.stride != rcDtParam.org.stride) , "this functions assumes compact, aligned buffering"); |
1795 | |
|
1796 | 0 | const Pel* piOrg = rcDtParam.org.buf; |
1797 | 0 | const Pel* piCur = rcDtParam.cur.buf; |
1798 | 0 | int iRows = rcDtParam.org.height>>2; |
1799 | 0 | int iCols = rcDtParam.org.width<<2; |
1800 | |
|
1801 | 0 | Distortion uiSum = 0; |
1802 | |
|
1803 | 0 | for( int y = 0; y < iRows; y++ ) |
1804 | 0 | { |
1805 | 0 | for (int n = 0; n < iCols; n+=16 ) |
1806 | 0 | { |
1807 | 0 | uiSum += abs( piOrg[n+ 0] - piCur[n+ 0] ); |
1808 | 0 | uiSum += abs( piOrg[n+ 1] - piCur[n+ 1] ); |
1809 | 0 | uiSum += abs( piOrg[n+ 2] - piCur[n+ 2] ); |
1810 | 0 | uiSum += abs( piOrg[n+ 3] - piCur[n+ 3] ); |
1811 | 0 | uiSum += abs( piOrg[n+ 4] - piCur[n+ 4] ); |
1812 | 0 | uiSum += abs( piOrg[n+ 5] - piCur[n+ 5] ); |
1813 | 0 | uiSum += abs( piOrg[n+ 6] - piCur[n+ 6] ); |
1814 | 0 | uiSum += abs( piOrg[n+ 7] - piCur[n+ 7] ); |
1815 | 0 | uiSum += abs( piOrg[n+ 8] - piCur[n+ 8] ); |
1816 | 0 | uiSum += abs( piOrg[n+ 9] - piCur[n+ 9] ); |
1817 | 0 | uiSum += abs( piOrg[n+10] - piCur[n+10] ); |
1818 | 0 | uiSum += abs( piOrg[n+11] - piCur[n+11] ); |
1819 | 0 | uiSum += abs( piOrg[n+12] - piCur[n+12] ); |
1820 | 0 | uiSum += abs( piOrg[n+13] - piCur[n+13] ); |
1821 | 0 | uiSum += abs( piOrg[n+14] - piCur[n+14] ); |
1822 | 0 | uiSum += abs( piOrg[n+15] - piCur[n+15] ); |
1823 | 0 | } |
1824 | 0 | piOrg += iCols; |
1825 | 0 | piCur += iCols; |
1826 | 0 | } |
1827 | |
|
1828 | 0 | distSad = (uiSum >> DISTORTION_PRECISION_ADJUSTMENT(rcDtParam.bitDepth)); |
1829 | 0 | } |
1830 | | |
1831 | 0 | return std::min( distHad, 2*distSad); |
1832 | 0 | } |
1833 | | |
1834 | | template<bool fastHad> |
1835 | | Distortion RdCost::xGetHADs( const DistParam &rcDtParam ) |
1836 | 0 | { |
1837 | 0 | if( rcDtParam.applyWeight ) |
1838 | 0 | { |
1839 | 0 | THROW(" no support"); |
1840 | 0 | } |
1841 | 0 | const Pel* piOrg = rcDtParam.org.buf; |
1842 | 0 | const Pel* piCur = rcDtParam.cur.buf; |
1843 | 0 | const int iRows = rcDtParam.org.height; |
1844 | 0 | const int iCols = rcDtParam.org.width; |
1845 | 0 | const int iStrideCur = rcDtParam.cur.stride; |
1846 | 0 | const int iStrideOrg = rcDtParam.org.stride; |
1847 | |
|
1848 | 0 | int x = 0, y = 0; |
1849 | |
|
1850 | 0 | Distortion uiSum = 0; |
1851 | |
|
1852 | 0 | if( iCols > iRows && ( iRows & 7 ) == 0 && ( iCols & 15 ) == 0 ) |
1853 | 0 | { |
1854 | 0 | for( y = 0; y < iRows; y += 8 ) |
1855 | 0 | { |
1856 | 0 | for( x = 0; x < iCols; x += 16 ) |
1857 | 0 | { |
1858 | 0 | uiSum += xCalcHADs16x8( &piOrg[x], &piCur[x], iStrideOrg, iStrideCur ); |
1859 | 0 | } |
1860 | 0 | piOrg += iStrideOrg * 8; |
1861 | 0 | piCur += iStrideCur * 8; |
1862 | 0 | } |
1863 | 0 | } |
1864 | 0 | else if( iCols < iRows && ( iCols & 7 ) == 0 && ( iRows & 15 ) == 0 ) |
1865 | 0 | { |
1866 | 0 | for( y = 0; y < iRows; y += 16 ) |
1867 | 0 | { |
1868 | 0 | for( x = 0; x < iCols; x += 8 ) |
1869 | 0 | { |
1870 | 0 | uiSum += xCalcHADs8x16( &piOrg[x], &piCur[x], iStrideOrg, iStrideCur ); |
1871 | 0 | } |
1872 | 0 | piOrg += iStrideOrg * 16; |
1873 | 0 | piCur += iStrideCur * 16; |
1874 | 0 | } |
1875 | 0 | } |
1876 | 0 | else if( iCols > iRows && ( iRows & 3 ) == 0 && ( iCols & 7 ) == 0 ) |
1877 | 0 | { |
1878 | 0 | for( y = 0; y < iRows; y += 4 ) |
1879 | 0 | { |
1880 | 0 | for( x = 0; x < iCols; x += 8 ) |
1881 | 0 | { |
1882 | 0 | uiSum += xCalcHADs8x4( &piOrg[x], &piCur[x], iStrideOrg, iStrideCur ); |
1883 | 0 | } |
1884 | 0 | piOrg += iStrideOrg * 4; |
1885 | 0 | piCur += iStrideCur * 4; |
1886 | 0 | } |
1887 | 0 | } |
1888 | 0 | else if( iCols < iRows && ( iCols & 3 ) == 0 && ( iRows & 7 ) == 0 ) |
1889 | 0 | { |
1890 | 0 | for( y = 0; y < iRows; y += 8 ) |
1891 | 0 | { |
1892 | 0 | for( x = 0; x < iCols; x += 4 ) |
1893 | 0 | { |
1894 | 0 | uiSum += xCalcHADs4x8( &piOrg[x], &piCur[x], iStrideOrg, iStrideCur ); |
1895 | 0 | } |
1896 | 0 | piOrg += iStrideOrg * 8; |
1897 | 0 | piCur += iStrideCur * 8; |
1898 | 0 | } |
1899 | 0 | } |
1900 | 0 | else if( fastHad && ( ( iRows % 32 == 0 ) && ( iCols % 32 == 0 ) ) && iRows == iCols ) |
1901 | 0 | { |
1902 | 0 | for( y = 0; y < iRows; y += 16 ) |
1903 | 0 | { |
1904 | 0 | for( x = 0; x < iCols; x += 16 ) |
1905 | 0 | { |
1906 | 0 | uiSum += xCalcHADs16x16_fast( &piOrg[x], &piCur[x], iStrideOrg, iStrideCur ); |
1907 | 0 | } |
1908 | 0 | piOrg += 16 * iStrideOrg; |
1909 | 0 | piCur += 16 * iStrideCur; |
1910 | 0 | } |
1911 | 0 | } |
1912 | 0 | else if( ( iRows % 8 == 0 ) && ( iCols % 8 == 0 ) ) |
1913 | 0 | { |
1914 | 0 | for( y = 0; y < iRows; y += 8 ) |
1915 | 0 | { |
1916 | 0 | for( x = 0; x < iCols; x += 8 ) |
1917 | 0 | { |
1918 | 0 | uiSum += xCalcHADs8x8( &piOrg[x], &piCur[x], iStrideOrg, iStrideCur ); |
1919 | 0 | } |
1920 | 0 | piOrg += 8*iStrideOrg; |
1921 | 0 | piCur += 8*iStrideCur; |
1922 | 0 | } |
1923 | 0 | } |
1924 | 0 | else if( ( iRows % 4 == 0 ) && ( iCols % 4 == 0 ) ) |
1925 | 0 | { |
1926 | 0 | for( y = 0; y < iRows; y += 4 ) |
1927 | 0 | { |
1928 | 0 | for( x = 0; x < iCols; x += 4 ) |
1929 | 0 | { |
1930 | 0 | uiSum += xCalcHADs4x4( &piOrg[x], &piCur[x], iStrideOrg, iStrideCur ); |
1931 | 0 | } |
1932 | 0 | piOrg += 4*iStrideOrg; |
1933 | 0 | piCur += 4*iStrideCur; |
1934 | 0 | } |
1935 | 0 | } |
1936 | 0 | else if( ( iRows % 2 == 0 ) && ( iCols % 2 == 0 ) ) |
1937 | 0 | { |
1938 | 0 | for( y = 0; y < iRows; y += 2 ) |
1939 | 0 | { |
1940 | 0 | for( x = 0; x < iCols; x += 2 ) |
1941 | 0 | { |
1942 | 0 | uiSum += xCalcHADs2x2( &piOrg[x], &piCur[x], iStrideOrg, iStrideCur ); |
1943 | 0 | } |
1944 | 0 | piOrg += 2*iStrideOrg; |
1945 | 0 | piCur += 2*iStrideCur; |
1946 | 0 | } |
1947 | 0 | } |
1948 | 0 | else |
1949 | 0 | { |
1950 | 0 | THROW( "Invalid size" ); |
1951 | 0 | } |
1952 | | |
1953 | 0 | return (uiSum >> DISTORTION_PRECISION_ADJUSTMENT(rcDtParam.bitDepth)); |
1954 | 0 | } Unexecuted instantiation: unsigned long vvenc::RdCost::xGetHADs<false>(vvenc::DistParam const&) Unexecuted instantiation: unsigned long vvenc::RdCost::xGetHADs<true>(vvenc::DistParam const&) |
1955 | | |
1956 | | |
1957 | | void RdCost::saveUnadjustedLambda() |
1958 | 0 | { |
1959 | 0 | m_dLambda_unadjusted = m_dLambda; |
1960 | 0 | m_DistScaleUnadjusted = m_DistScale; |
1961 | 0 | } |
1962 | | |
1963 | | |
1964 | | inline Distortion getWeightedMSE(const Pel org, const Pel cur, const int64_t fixedPTweight, unsigned uiShift) |
1965 | 0 | { |
1966 | 0 | const Intermediate_Int iTemp = org - cur; |
1967 | 0 | return Intermediate_Int((fixedPTweight*(iTemp*iTemp) + (1 << 15)) >> uiShift); |
1968 | 0 | } |
1969 | | |
1970 | | template<int csx> |
1971 | | static Distortion lumaWeightedSSE_Core( const DistParam& rcDtParam, ChromaFormat chmFmt, const uint32_t* lumaWeights ) |
1972 | 0 | { |
1973 | 0 | int iRows = rcDtParam.org.height; |
1974 | 0 | const Pel* piOrg = rcDtParam.org.buf; |
1975 | 0 | const Pel* piCur = rcDtParam.cur.buf; |
1976 | 0 | const int iCols = rcDtParam.org.width; |
1977 | 0 | const int iStrideCur = rcDtParam.cur.stride; |
1978 | 0 | const int iStrideOrg = rcDtParam.org.stride; |
1979 | 0 | const Pel* piOrgLuma = rcDtParam.orgLuma->buf; |
1980 | 0 | const int iStrideOrgLuma = rcDtParam.orgLuma->stride; |
1981 | |
|
1982 | 0 | Distortion uiSum = 0; |
1983 | 0 | uint32_t uiShift = 16 + (DISTORTION_PRECISION_ADJUSTMENT(rcDtParam.bitDepth) << 1); |
1984 | | |
1985 | | // cf, column factor, offset of the second column, to be set to '0' for width of '1' |
1986 | 0 | const int cf = 1 - ( iCols & 1 ); |
1987 | 0 | CHECK( ( iCols & 1 ) && iCols != 1, "Width can only be even or equal to '1'!" ); |
1988 | 0 | const ComponentID compId = rcDtParam.compID; |
1989 | 0 | const size_t cShiftY = getComponentScaleY(compId, chmFmt); |
1990 | |
|
1991 | 0 | for( ; iRows != 0; iRows-- ) |
1992 | 0 | { |
1993 | 0 | for (int n = 0; n < iCols; n+=2 ) |
1994 | 0 | { |
1995 | 0 | uiSum += getWeightedMSE( piOrg[n ], piCur[n ], lumaWeights[piOrgLuma[(n )<<csx]], uiShift ); |
1996 | 0 | uiSum += getWeightedMSE( piOrg[n+cf], piCur[n+cf], lumaWeights[piOrgLuma[(n+cf)<<csx]], uiShift ); |
1997 | 0 | } |
1998 | |
|
1999 | 0 | piOrg += iStrideOrg; |
2000 | 0 | piCur += iStrideCur; |
2001 | 0 | piOrgLuma += iStrideOrgLuma<<cShiftY; |
2002 | 0 | } |
2003 | |
|
2004 | 0 | return ( uiSum >> ( 1 - cf ) ); |
2005 | 0 | } Unexecuted instantiation: RdCost.cpp:unsigned long vvenc::lumaWeightedSSE_Core<0>(vvenc::DistParam const&, vvencChromaFormat, unsigned int const*) Unexecuted instantiation: RdCost.cpp:unsigned long vvenc::lumaWeightedSSE_Core<1>(vvenc::DistParam const&, vvencChromaFormat, unsigned int const*) |
2006 | | |
2007 | | static Distortion fixWeightedSSE_Core( const DistParam& rcDtParam, uint32_t fixedPTweight ) |
2008 | 0 | { |
2009 | 0 | int iRows = rcDtParam.org.height; |
2010 | 0 | const Pel* piOrg = rcDtParam.org.buf; |
2011 | 0 | const Pel* piCur = rcDtParam.cur.buf; |
2012 | 0 | const int iCols = rcDtParam.org.width; |
2013 | 0 | const int iStrideCur = rcDtParam.cur.stride; |
2014 | 0 | const int iStrideOrg = rcDtParam.org.stride; |
2015 | |
|
2016 | 0 | Distortion uiSum = 0; |
2017 | 0 | uint32_t uiShift = 16 + (DISTORTION_PRECISION_ADJUSTMENT(rcDtParam.bitDepth) << 1); |
2018 | | |
2019 | | // cf, column factor, offset of the second column, to be set to '0' for width of '1' |
2020 | 0 | const int cf = 1 - ( iCols & 1 ); |
2021 | 0 | CHECK( ( iCols & 1 ) && iCols != 1, "Width can only be even or equal to '1'!" ); |
2022 | | |
2023 | 0 | for( ; iRows != 0; iRows-- ) |
2024 | 0 | { |
2025 | 0 | for (int n = 0; n < iCols; n+=2 ) |
2026 | 0 | { |
2027 | 0 | uiSum += getWeightedMSE( piOrg[n ], piCur[n ], fixedPTweight, uiShift ); |
2028 | 0 | uiSum += getWeightedMSE( piOrg[n+cf], piCur[n+cf], fixedPTweight, uiShift ); |
2029 | 0 | } |
2030 | 0 | piOrg += iStrideOrg; |
2031 | 0 | piCur += iStrideCur; |
2032 | 0 | } |
2033 | |
|
2034 | 0 | return ( uiSum >> ( 1 - cf ) ); |
2035 | 0 | } |
2036 | | |
2037 | | Distortion RdCost::xGetSSE_WTD( const DistParam &rcDtParam ) const |
2038 | 0 | { |
2039 | 0 | if( rcDtParam.applyWeight ) |
2040 | 0 | { |
2041 | 0 | THROW("no support"); |
2042 | 0 | } |
2043 | | |
2044 | 0 | if ((m_signalType == RESHAPE_SIGNAL_SDR || m_signalType == RESHAPE_SIGNAL_HLG) && rcDtParam.compID != COMP_Y) |
2045 | 0 | { |
2046 | 0 | const uint32_t fixedPTweight = ( uint32_t ) ( m_chromaWeight * ( double ) ( 1 << 16 ) ); |
2047 | |
|
2048 | 0 | return m_fxdWtdPredPtr( rcDtParam, fixedPTweight ); |
2049 | 0 | } |
2050 | 0 | else |
2051 | 0 | { |
2052 | 0 | return m_wtdPredPtr[getComponentScaleX(rcDtParam.compID, m_cf)]( rcDtParam, m_cf, m_reshapeLumaLevelToWeightPLUT ); |
2053 | 0 | } |
2054 | | |
2055 | 0 | return 0; |
2056 | 0 | } |
2057 | | |
2058 | 0 | void RdCost::xGetSAD8X5(const DistParam& rcDtParam, Distortion* cost, bool isCalCentrePos) { |
2059 | 0 | DistParam rcDtParamTmp0 = rcDtParam; |
2060 | |
|
2061 | 0 | DistParam rcDtParamTmp1 = rcDtParam; |
2062 | 0 | rcDtParamTmp1.org.buf += 1; |
2063 | 0 | rcDtParamTmp1.cur.buf -= 1; |
2064 | |
|
2065 | 0 | DistParam rcDtParamTmp2 = rcDtParam; |
2066 | 0 | rcDtParamTmp2.org.buf += 2; |
2067 | 0 | rcDtParamTmp2.cur.buf -= 2; |
2068 | |
|
2069 | 0 | DistParam rcDtParamTmp3 = rcDtParam; |
2070 | 0 | rcDtParamTmp3.org.buf += 3; |
2071 | 0 | rcDtParamTmp3.cur.buf -= 3; |
2072 | |
|
2073 | 0 | DistParam rcDtParamTmp4 = rcDtParam; |
2074 | 0 | rcDtParamTmp4.org.buf += 4; |
2075 | 0 | rcDtParamTmp4.cur.buf -= 4; |
2076 | | |
2077 | 0 | cost[0] = (RdCost::xGetSAD8(rcDtParamTmp0)) >> 1; |
2078 | 0 | cost[1] = (RdCost::xGetSAD8(rcDtParamTmp1)) >> 1; |
2079 | 0 | if (isCalCentrePos) cost[2] = (RdCost::xGetSAD8(rcDtParamTmp2)) >> 1; |
2080 | 0 | cost[3] = (RdCost::xGetSAD8(rcDtParamTmp3)) >> 1; |
2081 | 0 | cost[4] = (RdCost::xGetSAD8(rcDtParamTmp4)) >> 1; |
2082 | 0 | } |
2083 | | |
2084 | 0 | void RdCost::xGetSAD16X5(const DistParam& rcDtParam, Distortion* cost, bool isCalCentrePos) { |
2085 | 0 | DistParam rcDtParamTmp0 = rcDtParam; |
2086 | |
|
2087 | 0 | DistParam rcDtParamTmp1 = rcDtParam; |
2088 | 0 | rcDtParamTmp1.org.buf += 1; |
2089 | 0 | rcDtParamTmp1.cur.buf -= 1; |
2090 | |
|
2091 | 0 | DistParam rcDtParamTmp2 = rcDtParam; |
2092 | 0 | rcDtParamTmp2.org.buf += 2; |
2093 | 0 | rcDtParamTmp2.cur.buf -= 2; |
2094 | |
|
2095 | 0 | DistParam rcDtParamTmp3 = rcDtParam; |
2096 | 0 | rcDtParamTmp3.org.buf += 3; |
2097 | 0 | rcDtParamTmp3.cur.buf -= 3; |
2098 | |
|
2099 | 0 | DistParam rcDtParamTmp4 = rcDtParam; |
2100 | 0 | rcDtParamTmp4.org.buf += 4; |
2101 | 0 | rcDtParamTmp4.cur.buf -= 4; |
2102 | | |
2103 | 0 | cost[0] = (RdCost::xGetSAD16(rcDtParamTmp0)) >> 1; |
2104 | 0 | cost[1] = (RdCost::xGetSAD16(rcDtParamTmp1)) >> 1; |
2105 | 0 | if (isCalCentrePos) cost[2] = (RdCost::xGetSAD16(rcDtParamTmp2)) >> 1; |
2106 | 0 | cost[3] = (RdCost::xGetSAD16(rcDtParamTmp3)) >> 1; |
2107 | 0 | cost[4] = (RdCost::xGetSAD16(rcDtParamTmp4)) >> 1; |
2108 | 0 | } |
2109 | | |
2110 | | void RdCost::setDistParamGeo(DistParam &rcDP, const CPelBuf &org, const Pel *piRefY, int iRefStride, const Pel *mask, |
2111 | | int iMaskStride, int stepX, int iMaskStride2, int bitDepth, ComponentID compID) |
2112 | 0 | { |
2113 | 0 | rcDP.bitDepth = bitDepth; |
2114 | 0 | rcDP.compID = compID; |
2115 | | |
2116 | | // set Original & Curr Pointer / Stride |
2117 | 0 | rcDP.org = org; |
2118 | 0 | rcDP.cur.buf = piRefY; |
2119 | 0 | rcDP.cur.stride = iRefStride; |
2120 | | |
2121 | | // set Mask |
2122 | 0 | rcDP.mask = mask; |
2123 | 0 | rcDP.maskStride = iMaskStride; |
2124 | 0 | rcDP.stepX = stepX; |
2125 | 0 | rcDP.maskStride2 = iMaskStride2; |
2126 | | |
2127 | | // set Block Width / Height |
2128 | 0 | rcDP.cur.width = org.width; |
2129 | 0 | rcDP.cur.height = org.height; |
2130 | 0 | rcDP.maximumDistortionForEarlyExit = MAX_DISTORTION; |
2131 | | |
2132 | | // set Cost function for motion estimation with Mask |
2133 | 0 | rcDP.distFunc = m_afpDistortFunc[0][DF_SAD_WITH_MASK]; |
2134 | 0 | } |
2135 | | |
2136 | | Distortion RdCost::xGetSADwMask(const DistParam &rcDtParam) |
2137 | 0 | { |
2138 | 0 | const Pel * org = rcDtParam.org.buf; |
2139 | 0 | const Pel * cur = rcDtParam.cur.buf; |
2140 | 0 | const Pel * mask = rcDtParam.mask; |
2141 | 0 | const int cols = rcDtParam.org.width; |
2142 | 0 | int rows = rcDtParam.org.height; |
2143 | 0 | const int subShift = rcDtParam.subShift; |
2144 | 0 | const int subStep = (1 << subShift); |
2145 | 0 | const int strideCur = rcDtParam.cur.stride * subStep; |
2146 | 0 | const int strideOrg = rcDtParam.org.stride * subStep; |
2147 | 0 | const int strideMask = rcDtParam.maskStride * subStep; |
2148 | 0 | const int stepX = rcDtParam.stepX; |
2149 | 0 | const int strideMask2 = rcDtParam.maskStride2; |
2150 | 0 | const uint32_t distortionShift = DISTORTION_PRECISION_ADJUSTMENT(rcDtParam.bitDepth); |
2151 | |
|
2152 | 0 | Distortion sum = 0; |
2153 | 0 | for (; rows != 0; rows -= subStep) |
2154 | 0 | { |
2155 | 0 | for (int n = 0; n < cols; n++) |
2156 | 0 | { |
2157 | 0 | sum += abs(org[n] - cur[n]) * *mask; |
2158 | 0 | mask += stepX; |
2159 | 0 | } |
2160 | 0 | org += strideOrg; |
2161 | 0 | cur += strideCur; |
2162 | 0 | mask += strideMask; |
2163 | 0 | mask += strideMask2; |
2164 | 0 | } |
2165 | 0 | sum <<= subShift; |
2166 | 0 | return (sum >> distortionShift); |
2167 | 0 | } |
2168 | | |
2169 | | Distortion RdCost::getBvCostMultiplePredsIBC(int x, int y, bool useIMV) |
2170 | 0 | { |
2171 | 0 | return Distortion(m_dCostIBC * getBitsMultiplePredsIBC(x, y, useIMV)); |
2172 | 0 | } |
2173 | | |
2174 | | static inline unsigned getIComponentBitsIBC( int val ) |
2175 | 0 | { |
2176 | 0 | if( !val ) return 1; |
2177 | | |
2178 | 0 | const unsigned int l2 = floorLog2( (val <= 0) ? (-val << 1) + 1 : (val << 1) ); |
2179 | |
|
2180 | 0 | return (l2 << 1) + 1; |
2181 | 0 | } |
2182 | | |
2183 | | unsigned int RdCost::getBitsMultiplePredsIBC(int x, int y, bool useIMV) |
2184 | 0 | { |
2185 | 0 | int rmvH[2]; |
2186 | 0 | int rmvV[2]; |
2187 | 0 | rmvH[0] = x - m_bvPredictors[0].hor; |
2188 | 0 | rmvH[1] = x - m_bvPredictors[1].hor; |
2189 | |
|
2190 | 0 | rmvV[0] = y - m_bvPredictors[0].ver; |
2191 | 0 | rmvV[1] = y - m_bvPredictors[1].ver; |
2192 | 0 | int absCand[2]; |
2193 | 0 | absCand[0] = abs(rmvH[0]) + abs(rmvV[0]); |
2194 | 0 | absCand[1] = abs(rmvH[1]) + abs(rmvV[1]); |
2195 | |
|
2196 | 0 | if (useIMV && x % 4 == 0 && y % 4 == 0) |
2197 | 0 | { |
2198 | 0 | int rmvHQP[2]; |
2199 | 0 | int rmvVQP[2]; |
2200 | |
|
2201 | 0 | int imvShift = 2; |
2202 | 0 | int offset = 1 << (imvShift - 1); |
2203 | |
|
2204 | 0 | rmvHQP[0] = (x >> 2) - ((m_bvPredictors[0].hor + offset) >> 2); |
2205 | 0 | rmvHQP[1] = (x >> 2) - ((m_bvPredictors[1].hor + offset) >> 2); |
2206 | 0 | rmvVQP[0] = (y >> 2) - ((m_bvPredictors[0].ver + offset) >> 2); |
2207 | 0 | rmvVQP[1] = (y >> 2) - ((m_bvPredictors[1].ver + offset) >> 2); |
2208 | |
|
2209 | 0 | int absCandQP[2]; |
2210 | 0 | absCandQP[0] = abs(rmvHQP[0]) + abs(rmvVQP[0]); |
2211 | 0 | absCandQP[1] = abs(rmvHQP[1]) + abs(rmvVQP[1]); |
2212 | 0 | unsigned int candBits0QP, candBits1QP; |
2213 | 0 | if (absCand[0] < absCand[1]) |
2214 | 0 | { |
2215 | 0 | unsigned int candBits0 = getIComponentBitsIBC(rmvH[0]) + getIComponentBitsIBC(rmvV[0]); |
2216 | 0 | if (absCandQP[0] < absCandQP[1]) |
2217 | 0 | { |
2218 | 0 | candBits0QP = getIComponentBitsIBC(rmvHQP[0]) + getIComponentBitsIBC(rmvVQP[0]); |
2219 | 0 | return candBits0QP < candBits0 ? candBits0QP : candBits0; |
2220 | 0 | } |
2221 | 0 | else |
2222 | 0 | { |
2223 | 0 | candBits1QP = getIComponentBitsIBC(rmvHQP[1]) + getIComponentBitsIBC(rmvVQP[1]); |
2224 | 0 | return candBits1QP < candBits0 ? candBits1QP : candBits0; |
2225 | 0 | } |
2226 | 0 | } |
2227 | 0 | else |
2228 | 0 | { |
2229 | 0 | unsigned int candBits1 = getIComponentBitsIBC(rmvH[1]) + getIComponentBitsIBC(rmvV[1]); |
2230 | 0 | if (absCandQP[0] < absCandQP[1]) |
2231 | 0 | { |
2232 | 0 | candBits0QP = getIComponentBitsIBC(rmvHQP[0]) + getIComponentBitsIBC(rmvVQP[0]); |
2233 | 0 | return candBits0QP < candBits1 ? candBits0QP : candBits1; |
2234 | 0 | } |
2235 | 0 | else |
2236 | 0 | { |
2237 | 0 | candBits1QP = getIComponentBitsIBC(rmvHQP[1]) + getIComponentBitsIBC(rmvVQP[1]); |
2238 | 0 | return candBits1QP < candBits1 ? candBits1QP : candBits1; |
2239 | 0 | } |
2240 | 0 | } |
2241 | 0 | } |
2242 | 0 | else |
2243 | 0 | { |
2244 | 0 | if (absCand[0] < absCand[1]) |
2245 | 0 | { |
2246 | 0 | return getIComponentBitsIBC(rmvH[0]) + getIComponentBitsIBC(rmvV[0]); |
2247 | 0 | } |
2248 | 0 | else |
2249 | 0 | { |
2250 | 0 | return getIComponentBitsIBC(rmvH[1]) + getIComponentBitsIBC(rmvV[1]); |
2251 | 0 | } |
2252 | 0 | } |
2253 | 0 | } |
2254 | | |
2255 | | } // namespace vvenc |
2256 | | |
2257 | | //! \} |
2258 | | |