Coverage Report

Created: 2026-04-01 07:49

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/vvenc/source/Lib/EncoderLib/InterSearch.cpp
Line
Count
Source
1
/* -----------------------------------------------------------------------------
2
The copyright in this software is being made available under the Clear BSD
3
License, included below. No patent rights, trademark rights and/or 
4
other Intellectual Property Rights other than the copyrights concerning 
5
the Software are granted under this license.
6
7
The Clear BSD License
8
9
Copyright (c) 2019-2026, Fraunhofer-Gesellschaft zur Förderung der angewandten Forschung e.V. & The VVenC Authors.
10
All rights reserved.
11
12
Redistribution and use in source and binary forms, with or without modification,
13
are permitted (subject to the limitations in the disclaimer below) provided that
14
the following conditions are met:
15
16
     * Redistributions of source code must retain the above copyright notice,
17
     this list of conditions and the following disclaimer.
18
19
     * Redistributions in binary form must reproduce the above copyright
20
     notice, this list of conditions and the following disclaimer in the
21
     documentation and/or other materials provided with the distribution.
22
23
     * Neither the name of the copyright holder nor the names of its
24
     contributors may be used to endorse or promote products derived from this
25
     software without specific prior written permission.
26
27
NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE GRANTED BY
28
THIS LICENSE. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
29
CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
30
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
31
PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
32
CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
33
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
34
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
35
BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
36
IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
37
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
38
POSSIBILITY OF SUCH DAMAGE.
39
40
41
------------------------------------------------------------------------------------------- */
42
43
44
/** \file     EncSearch.cpp
45
 *  \brief    encoder inter search class
46
 */
47
48
#include "InterSearch.h"
49
#include "EncModeCtrl.h"
50
#include "EncLib.h"
51
#include "CommonLib/CommonDef.h"
52
#include "CommonLib/Rom.h"
53
#include "CommonLib/MotionInfo.h"
54
#include "CommonLib/Picture.h"
55
#include "CommonLib/UnitTools.h"
56
#include "CommonLib/Reshape.h"
57
#include "CommonLib/dtrace_next.h"
58
#include "CommonLib/dtrace_buffer.h"
59
#include "CommonLib/TimeProfiler.h"
60
61
#include <math.h>
62
63
 //! \ingroup EncoderLib
64
 //! \{
65
66
namespace vvenc {
67
68
static const Mv s_acMvRefineH[9] =
69
{
70
  Mv(  0,  0 ), // 0
71
  Mv(  0, -1 ), // 1
72
  Mv(  0,  1 ), // 2
73
  Mv( -1,  0 ), // 3
74
  Mv(  1,  0 ), // 4
75
  Mv( -1, -1 ), // 5
76
  Mv(  1, -1 ), // 6
77
  Mv( -1,  1 ), // 7
78
  Mv(  1,  1 )  // 8
79
};
80
81
static const Mv s_acMvRefineQ[9] =
82
{
83
  Mv(  0,  0 ), // 0
84
  Mv(  0, -1 ), // 1
85
  Mv(  0,  1 ), // 2
86
  Mv( -1, -1 ), // 5
87
  Mv(  1, -1 ), // 6
88
  Mv( -1,  0 ), // 3
89
  Mv(  1,  0 ), // 4
90
  Mv( -1,  1 ), // 7
91
  Mv(  1,  1 )  // 8
92
};
93
94
static const bool s_skipQpelPosition[ 42 ][ 9 ] =
95
{
96
  { false, true,  true,  true,  true,  true,  true,  true,  true  },
97
  { true,  true,  true,  true,  true,  false, true,  true,  true  },
98
  { true,  true,  true,  true,  true,  true,  false, true,  true  },
99
  { true,  false, true,  true,  true,  true,  true,  true,  true  },
100
  { true,  false, true,  false, true,  false, true,  true,  true  },
101
  { true,  false, true,  true,  false, true,  false, true,  true  },
102
  { true,  true,  false, true,  true,  true,  true,  true,  true  },
103
  { true,  true,  false, true,  true,  false, true,  false, true  },
104
  { true,  true,  false, true,  true,  true,  false, true,  false },
105
  { true,  true,  false, true,  true,  true,  true,  false, false },
106
  { true,  true,  true,  true,  true,  false, true,  true,  true  },
107
  { true,  true,  false, true,  true,  false, true,  false, true  },
108
  { true,  true,  true,  true,  true,  true,  false, true,  true  },
109
  { true,  true,  false, true,  true,  true,  false, true,  false },
110
  { true,  false, true,  false, false, true,  true,  true,  true  },
111
  { true,  true,  true,  true,  true,  false, true,  true,  true  },
112
  { true,  false, true,  false, true,  false, true,  true,  true  },
113
  { true,  true,  true,  true,  true,  true,  false, true,  true  },
114
  { true,  false, true,  true,  false, true,  false, true,  true  },
115
  { true,  true,  true,  true,  false, true,  false, true,  false },
116
  { true,  false, true,  true,  true,  true,  true,  true,  true  },
117
  { true,  false, true,  true,  false, true,  false, true,  true  },
118
  { true,  true,  false, true,  true,  true,  true,  true,  true  },
119
  { true,  true,  false, true,  true,  true,  false, true,  false },
120
  { true,  true,  true,  false, true,  false, true,  false, true  },
121
  { true,  false, true,  true,  true,  true,  true,  true,  true  },
122
  { true,  false, true,  false, true,  false, true,  true,  true  },
123
  { true,  true,  false, true,  true,  true,  true,  true,  true  },
124
  { true,  true,  false, true,  true,  false, true,  false, true  },
125
  { true,  true,  true,  true,  true,  true,  false, true,  true  },
126
  { true,  true,  false, true,  true,  true,  true,  true,  true  },
127
  { true,  true,  false, true,  true,  true,  false, true,  false },
128
  { true,  true,  true,  true,  true,  false, true,  true,  true  },
129
  { true,  true,  false, true,  true,  true,  true,  true,  true  },
130
  { true,  true,  false, true,  true,  false, true,  false, true  },
131
  { true,  true,  true,  true,  true,  true,  false, true,  true  },
132
  { true,  false, true,  true,  true,  true,  true,  true,  true  },
133
  { true,  false, true,  true,  false, true,  false, true,  true  },
134
  { true,  true,  true,  true,  true,  false, true,  true,  true  },
135
  { true,  false, true,  true,  true,  true,  true,  true,  true  },
136
  { true,  false, true,  false, true,  false, true,  true,  true  },
137
  { false, false, false, false, false, false, false, false, false },
138
};
139
140
//   1,0    3,0    0,1    1,1    2,1    3,1    1,2    3,2    0,3    1,3    2,3    3,3    H1,0   H3,0
141
static const bool s_doInterpQ[ 42 ][ 14 ] =
142
{
143
  { false, false, false, false, false, false, false, false, false, false, false, false, false, false },
144
  { false, false, false, false, false, false, false, false, true,  false, false, false, false, true  },
145
  { false, false, true,  false, false, false, false, false, false, false, false, false, true,  false },
146
  { false, true,  false, false, false, false, false, false, false, false, false, false, false, false },
147
  { false, true,  false, false, false, false, false, false, true,  false, false, true,  false, true  },
148
  { false, true,  true,  false, false, true,  false, false, false, false, false, false, true,  false },
149
  { true,  false, false, false, false, false, false, false, false, false, false, false, false, false },
150
  { true,  false, false, false, false, false, false, false, true,  true,  false, false, false, true  },
151
  { true,  false, true,  true,  false, false, false, false, false, false, false, false, true,  false },
152
  { false, true,  false, false, false, true,  false, false, false, false, false, true,  true,  true  },
153
  { false, false, false, false, false, false, false, false, false, false, true,  false, false, true  },
154
  { false, true,  false, false, false, false, false, false, false, false, true,  true,  false, true  },
155
  { false, false, false, false, true,  false, false, false, false, false, false, false, true,  false },
156
  { false, true,  false, false, true,  true,  false, false, false, false, false, false, true,  false },
157
  { true,  false, false, true,  false, false, false, false, false, true,  false, false, true,  true  },
158
  { false, false, false, false, false, false, false, false, false, false, true,  false, false, true  },
159
  { true,  false, false, false, false, false, false, false, false, true,  true,  false, false, true  },
160
  { false, false, false, false, true,  false, false, false, false, false, false, false, true,  false },
161
  { true,  false, false, true,  true,  false, false, false, false, false, false, false, true,  false },
162
  { false, false, false, false, false, false, false, false, true,  true,  false, true,  false, true  },
163
  { false, false, false, false, false, false, false, true,  false, false, false, false, false, false },
164
  { false, false, false, false, false, false, false, true,  true,  false, false, true,  false, true  },
165
  { false, false, false, false, false, false, true,  false, false, false, false, false, false, false },
166
  { false, false, false, false, false, false, true,  false, true,  true,  false, false, false, true  },
167
  { false, false, true,  true,  false, true,  false, false, false, false, false, false, true,  false },
168
  { false, false, false, false, false, false, false, true,  false, false, false, false, false, false },
169
  { false, false, true,  false, false, true,  false, true,  false, false, false, false, true,  false },
170
  { false, false, false, false, false, false, true,  false, false, false, false, false, false, false },
171
  { false, false, true,  true,  false, false, true,  false, false, false, false, false, true,  false },
172
  { false, false, false, false, false, false, false, false, false, false, true,  false, false, true  },
173
  { false, false, false, false, false, false, false, true,  false, false, false, false, false, false },
174
  { false, false, false, false, false, false, false, true,  false, false, true,  true,  false, true  },
175
  { false, false, false, false, true,  false, false, false, false, false, false, false, true,  false },
176
  { false, false, false, false, false, false, false, true,  false, false, false, false, false, false },
177
  { false, false, false, false, true,  true,  false, true,  false, false, false, false, true,  false },
178
  { false, false, false, false, false, false, false, false, false, false, true,  false, false, true  },
179
  { false, false, false, false, false, false, true,  false, false, false, false, false, false, false },
180
  { false, false, false, false, false, false, true,  false, false, true,  true,  false, false, true  },
181
  { false, false, false, false, true,  false, false, false, false, false, false, false, true,  false },
182
  { false, false, false, false, false, false, true,  false, false, false, false, false, false, false },
183
  { false, false, false, true,  true,  false, true,  false, false, false, false, false, true,  false },
184
  { true,  true,  true,  true,  true,  true,  true,  true,  true,  true,  true,  true,  true,  true  },
185
};
186
187
const int BlkUniMvInfoBuffer::m_uniMvListMaxSize;
188
189
InterSearch::InterSearch()
190
0
  : m_modeCtrl                    (nullptr)
191
0
  , m_defaultCachedBvs            (nullptr)
192
0
  , m_pcEncCfg                    (nullptr)
193
0
  , m_pcTrQuant                   (nullptr)
194
0
  , m_iSearchRange                (0)
195
0
  , m_bipredSearchRange           (0)
196
0
  , m_motionEstimationSearchMethod(VVENC_MESEARCH_FULL)
197
0
  , m_motionEstimationSearchMethodSCC( 0 )
198
0
  , m_CABACEstimator              (nullptr)
199
0
  , m_CtxCache                    (nullptr)
200
0
  , m_pTempPel                    (nullptr)
201
0
{
202
0
  for (int i=0; i<MAX_NUM_REF_LIST_ADAPT_SR; i++)
203
0
  {
204
0
    memset (m_aaiAdaptSR[i], 0, MAX_IDX_ADAPT_SR * sizeof (int));
205
0
  }
206
0
  for (int i=0; i<AMVP_MAX_NUM_CANDS+1; i++)
207
0
  {
208
0
    memset (m_auiMVPIdxCost[i], 0, (AMVP_MAX_NUM_CANDS+1) * sizeof (uint32_t) );
209
0
  }
210
0
}
211
212
213
InterSearch::~InterSearch()
214
0
{
215
0
  destroy();
216
0
}
217
218
void InterSearch::init( const VVEncCfg& encCfg, TrQuant* pTrQuant, RdCost* pRdCost, EncModeCtrl* pModeCtrl, CodingStructure **pSaveCS )
219
0
{
220
0
  InterPrediction::init( pRdCost, encCfg.m_internChromaFormat, encCfg.m_CTUSize, encCfg.m_ifpLines );
221
0
  m_numBVs                       = 0;
222
0
  m_pcEncCfg                     = &encCfg;
223
0
  m_pcTrQuant                    = pTrQuant;
224
0
  m_pcRdCost                     = pRdCost;
225
0
  m_modeCtrl                     = pModeCtrl;
226
0
  m_pSaveCS                      = pSaveCS;
227
228
0
  m_iSearchRange                    = encCfg.m_SearchRange;
229
0
  m_bipredSearchRange               = encCfg.m_bipredSearchRange;
230
0
  m_motionEstimationSearchMethod    = vvencMESearchMethod( encCfg.m_motionEstimationSearchMethod );
231
0
  m_motionEstimationSearchMethodSCC = encCfg.m_motionEstimationSearchMethodSCC;
232
233
0
  for( uint32_t iDir = 0; iDir < MAX_NUM_REF_LIST_ADAPT_SR; iDir++ )
234
0
  {
235
0
    for( uint32_t iRefIdx = 0; iRefIdx < MAX_IDX_ADAPT_SR; iRefIdx++ )
236
0
    {
237
0
      m_aaiAdaptSR[iDir][iRefIdx] = m_iSearchRange;
238
0
    }
239
0
  }
240
241
  // initialize motion cost
242
0
  for( int iNum = 0; iNum < AMVP_MAX_NUM_CANDS + 1; iNum++ )
243
0
  {
244
0
    for( int iIdx = 0; iIdx < AMVP_MAX_NUM_CANDS; iIdx++ )
245
0
    {
246
0
      if( iIdx < iNum )
247
0
      {
248
0
        m_auiMVPIdxCost[iIdx][iNum] = xGetMvpIdxBits( iIdx, iNum );
249
0
      }
250
0
      else
251
0
      {
252
0
        m_auiMVPIdxCost[iIdx][iNum] = MAX_UINT;
253
0
      }
254
0
    }
255
0
  }
256
257
0
  const ChromaFormat cform   = encCfg.m_internChromaFormat;
258
0
  const int          ctuSize = encCfg.m_CTUSize;
259
0
  for (uint32_t i = 0; i < NUM_REF_PIC_LIST_01; i++)
260
0
  {
261
0
    m_tmpPredStorage[i].create( UnitArea( cform, Area( 0, 0, ctuSize, ctuSize ) ) );
262
0
  }
263
0
  m_tmpStorageLCU.create( UnitArea( cform, Area( 0, 0, ctuSize, ctuSize ) ) );
264
0
  m_pTempPel = new Pel[ctuSize * ctuSize];
265
0
  m_tmpAffiStorage.create(UnitArea(cform, Area(0, 0, ctuSize, ctuSize + 2)));  // allow overread by 2 samples
266
0
  m_tmpAffiError = new Pel[ctuSize * ctuSize];
267
0
  m_tmpAffiDeri[0] = new Pel[ctuSize * ctuSize];
268
0
  m_tmpAffiDeri[1] = new Pel[ctuSize * ctuSize];
269
270
0
  CompArea chromaArea( COMP_Cb, cform, Area( 0, 0, encCfg.m_CTUSize, encCfg.m_CTUSize ), true );
271
0
  for( int i = 0; i < 4; i++ )
272
0
  {
273
0
    m_orgResiCb[i].create( chromaArea );
274
0
    m_orgResiCr[i].create( chromaArea );
275
0
  }
276
0
}
277
278
void InterSearch::destroy()
279
0
{
280
0
  if ( m_pTempPel )
281
0
  {
282
0
    delete [] m_pTempPel;
283
0
    m_pTempPel = nullptr;
284
0
  }
285
286
0
  for( int i = 0; i < NUM_REF_PIC_LIST_01; i++ )
287
0
  {
288
0
    m_tmpPredStorage[i].destroy();
289
0
  }
290
0
  m_tmpStorageLCU.destroy();
291
0
  m_tmpAffiStorage.destroy();
292
0
  if (m_tmpAffiError != NULL)
293
0
  {
294
0
    delete[] m_tmpAffiError;
295
0
    m_tmpAffiError = nullptr;
296
0
  }
297
0
  if (m_tmpAffiDeri[0] != NULL)
298
0
  {
299
0
    delete[] m_tmpAffiDeri[0];
300
0
    m_tmpAffiDeri[0] = nullptr;
301
0
  }
302
0
  if (m_tmpAffiDeri[1] != NULL)
303
0
  {
304
0
    delete[] m_tmpAffiDeri[1];
305
0
    m_tmpAffiDeri[1] = nullptr;
306
0
  }
307
308
0
  m_pSaveCS  = nullptr;
309
0
}
310
311
void InterSearch::setCtuEncRsrc( CABACWriter* cabacEstimator, CtxCache* ctxCache, ReuseUniMv* pReuseUniMv, BlkUniMvInfoBuffer* pBlkUniMvInfoBuffer, AffineProfList* pAffineProfList, IbcBvCand* pCachedBvs )
312
0
{
313
0
  m_CABACEstimator     = cabacEstimator;
314
0
  m_CtxCache           = ctxCache;
315
0
  m_ReuseUniMv         = pReuseUniMv;
316
0
  m_BlkUniMvInfoBuffer = pBlkUniMvInfoBuffer;
317
0
  m_AffineProfList     = pAffineProfList;
318
0
  m_defaultCachedBvs   = pCachedBvs;
319
0
}
320
321
ReuseUniMv::ReuseUniMv()
322
0
{
323
0
  const int numPos     = MAX_CU_SIZE >> MIN_CU_LOG2;
324
0
  const int maxSizeIdx = MAX_CU_SIZE_IDX-2;
325
0
  for( int wIdx = 0; wIdx < maxSizeIdx; wIdx++ )
326
0
  {
327
0
    for( int hIdx = 0; hIdx < maxSizeIdx; hIdx++ )
328
0
    {
329
0
      for( int y = 0; y < numPos; y++ )
330
0
      {
331
0
        for( int x = 0; x < numPos; x++ )
332
0
        {
333
0
          m_reusedUniMVs[ wIdx ][ hIdx ][ x ][ y ] = nullptr;
334
0
        }
335
0
      }
336
0
    }
337
0
  }
338
0
}
339
340
ReuseUniMv::~ReuseUniMv()
341
0
{
342
0
  resetReusedUniMvs();
343
0
}
344
345
void ReuseUniMv::resetReusedUniMvs()
346
0
{
347
0
  const int numPos     = MAX_CU_SIZE >> MIN_CU_LOG2;
348
0
  const int maxSizeIdx = MAX_CU_SIZE_IDX-2;
349
0
  for ( int wIdx = 0; wIdx < maxSizeIdx; wIdx++ )
350
0
  {
351
0
    for ( int hIdx = 0; hIdx < maxSizeIdx; hIdx++ )
352
0
    {
353
0
      for ( int y = 0; y < numPos; y++ )
354
0
      {
355
0
        for ( int x = 0; x < numPos; x++ )
356
0
        {
357
0
          if ( m_reusedUniMVs[ wIdx ][ hIdx ][ x ][ y ] )
358
0
          {
359
0
            delete [] m_reusedUniMVs[ wIdx ][ hIdx ][ x ][ y ];
360
0
            m_reusedUniMVs[ wIdx ][ hIdx ][ x ][ y ] = nullptr;
361
0
          }
362
0
        }
363
0
      }
364
0
    }
365
0
  }
366
0
}
367
368
void InterSearch::loadGlobalUniMvs( const Area& lumaArea, const PreCalcValues& pcv)
369
0
{
370
0
  unsigned idx1, idx2, idx3, idx4;
371
0
  getAreaIdxNew(lumaArea, pcv, idx1, idx2, idx3, idx4);
372
0
  if( m_ReuseUniMv->m_reusedUniMVs[idx1][idx2][idx3][idx4])
373
0
  {
374
//    DTRACE( g_trace_ctx, D_TMP, "%d unimv load %d %d %d %d \n", g_trace_ctx->getChannelCounter(D_TMP), idx3,idx4,idx1,idx2 );
375
0
    m_BlkUniMvInfoBuffer->insertUniMvCands(lumaArea, m_ReuseUniMv->m_reusedUniMVs[idx1][idx2][idx3][idx4]);
376
0
  }
377
0
}
378
379
void InterSearch::getBestSbt( CodingStructure* tempCS, CodingUnit* cu, uint8_t& histBestSbt, Distortion& curPuSse, uint8_t sbtAllowed, bool doPreAnalyzeResi, bool mtsAllowed )
380
0
{
381
0
  m_estMinDistSbt[NUMBER_SBT_MODE] = MAX_DISTORTION;
382
0
  m_skipSbtAll = false;
383
384
0
  if( doPreAnalyzeResi )
385
0
  {
386
0
    xCalcMinDistSbt( *tempCS, *cu, sbtAllowed );
387
0
  }
388
389
0
  curPuSse = getEstDistSbt( NUMBER_SBT_MODE );
390
391
0
  if( doPreAnalyzeResi )
392
0
  {
393
0
    if( m_skipSbtAll && !mtsAllowed )
394
0
    {
395
0
      histBestSbt = 0; //try DCT2
396
0
    }
397
0
    else
398
0
    {
399
0
      int  slShift = 4 + std::min( Log2( cu->lwidth() * cu->lheight() ), 9 );
400
0
      assert( curPuSse != MAX_DISTORTION );
401
0
      histBestSbt = m_modeCtrl->findBestSbt( cu->cs->area, (uint32_t)( curPuSse >> slShift ) );
402
0
      if( m_skipSbtAll && CU::isSbtMode( histBestSbt ) ) //special case, skip SBT when loading SBT
403
0
      {
404
0
        histBestSbt = 0; //try DCT2
405
0
      }
406
0
    }
407
0
  }
408
0
}
409
410
411
inline void InterSearch::xTZSearchHelp( TZSearchStruct& rcStruct, const int iSearchX, const int iSearchY, const uint8_t ucPointNr, const uint32_t uiDistance )
412
0
{
413
0
  Distortion  uiSad = 0;
414
415
0
  const Pel* const  piRefSrch = rcStruct.piRefY + iSearchY * rcStruct.iRefStride + iSearchX;
416
417
0
  m_cDistParam.cur.buf = piRefSrch;
418
419
0
  uiSad = m_cDistParam.distFunc( m_cDistParam );
420
421
  // only add motion cost if uiSad is smaller than best. Otherwise pointless
422
  // to add motion cost.
423
0
  if( uiSad < rcStruct.uiBestSad )
424
0
  {
425
    // motion cost
426
0
    uiSad += m_pcRdCost->getCostOfVectorWithPredictor( iSearchX, iSearchY, rcStruct.imvShift );
427
428
0
    if( uiSad < rcStruct.uiBestSad )
429
0
    {
430
0
      rcStruct.uiBestSad      = uiSad;
431
0
      rcStruct.iBestX         = iSearchX;
432
0
      rcStruct.iBestY         = iSearchY;
433
0
      rcStruct.uiBestDistance = uiDistance;
434
0
      rcStruct.uiBestRound    = 0;
435
0
      rcStruct.ucPointNr      = ucPointNr;
436
0
      m_cDistParam.maximumDistortionForEarlyExit = uiSad;
437
0
    }
438
0
  }
439
0
}
440
441
442
443
inline void InterSearch::xTZ2PointSearch( TZSearchStruct& rcStruct )
444
0
{
445
0
  const SearchRange& sr = rcStruct.searchRange;
446
447
0
  static const int xOffset[2][9] = { {  0, -1, -1,  0, -1, +1, -1, -1, +1 }, {  0,  0, +1, +1, -1, +1,  0, +1,  0 } };
448
0
  static const int yOffset[2][9] = { {  0,  0, -1, -1, +1, -1,  0, +1,  0 }, {  0, -1, -1,  0, -1, +1, +1, +1, +1 } };
449
450
  // 2 point search,                   //   1 2 3
451
  // check only the 2 untested points  //   4 0 5
452
  // around the start point            //   6 7 8
453
0
  const int iX1 = rcStruct.iBestX + xOffset[0][rcStruct.ucPointNr];
454
0
  const int iX2 = rcStruct.iBestX + xOffset[1][rcStruct.ucPointNr];
455
456
0
  const int iY1 = rcStruct.iBestY + yOffset[0][rcStruct.ucPointNr];
457
0
  const int iY2 = rcStruct.iBestY + yOffset[1][rcStruct.ucPointNr];
458
459
0
  if( iX1 >= sr.left && iX1 <= sr.right && iY1 >= sr.top && iY1 <= sr.bottom )
460
0
  {
461
0
    xTZSearchHelp( rcStruct, iX1, iY1, 0, 2 );
462
0
  }
463
464
0
  if( iX2 >= sr.left && iX2 <= sr.right && iY2 >= sr.top && iY2 <= sr.bottom )
465
0
  {
466
0
    xTZSearchHelp( rcStruct, iX2, iY2, 0, 2 );
467
0
  }
468
0
}
469
470
inline void InterSearch::xTZ4PointSquareSearch( TZSearchStruct & rcStruct, const int iStartX, const int iStartY, const int iDist )
471
0
{
472
0
  const SearchRange& sr = rcStruct.searchRange;
473
0
  CHECK( iDist == 0 || iDist > 2, "Invalid distance" );
474
  // 4 point search,                   //     1 2 3
475
  // search around the start point     //     4 0 5
476
  // with the required  distance       //     6 7 8
477
0
  const int iTop = iStartY - iDist;
478
0
  const int iBottom = iStartY + iDist;
479
0
  const int iLeft = iStartX - iDist;
480
0
  const int iRight = iStartX + iDist;
481
0
  rcStruct.uiBestRound += 1;
482
483
0
  if ( iTop >= sr.top )
484
0
  {
485
0
    if ( iLeft >= sr.left ) // check top left
486
0
    {
487
0
      xTZSearchHelp( rcStruct, iLeft, iTop, 1, iDist );
488
0
    }
489
0
    if ( iRight <= sr.right ) // check top right
490
0
    {
491
0
      xTZSearchHelp( rcStruct, iRight, iTop, 3, iDist );
492
0
    }
493
0
  }
494
0
  if ( iBottom <= sr.bottom )
495
0
  {
496
0
    if ( iLeft >= sr.left ) // check bottom left
497
0
    {
498
0
      xTZSearchHelp( rcStruct, iLeft, iBottom, 6, iDist );
499
0
    }
500
0
    if ( iRight <= sr.right ) // check bottom right
501
0
    {
502
0
      xTZSearchHelp( rcStruct, iRight, iBottom, 8, iDist );
503
0
    }
504
0
  }
505
0
}
506
507
inline void InterSearch::xTZ8PointSquareSearch( TZSearchStruct& rcStruct, const int iStartX, const int iStartY, const int iDist )
508
0
{
509
0
  const SearchRange& sr = rcStruct.searchRange;
510
0
  // 8 point search,                   //   1 2 3
511
0
  // search around the start point     //   4 0 5
512
0
  // with the required  distance       //   6 7 8
513
0
  CHECK( iDist == 0 , "Invalid distance");
514
0
  const int iTop        = iStartY - iDist;
515
0
  const int iBottom     = iStartY + iDist;
516
0
  const int iLeft       = iStartX - iDist;
517
0
  const int iRight      = iStartX + iDist;
518
0
  rcStruct.uiBestRound += 1;
519
0
520
0
  if ( iTop >= sr.top ) // check top
521
0
  {
522
0
    if ( iLeft >= sr.left ) // check top left
523
0
    {
524
0
      xTZSearchHelp( rcStruct, iLeft, iTop, 1, iDist );
525
0
    }
526
0
    // top middle
527
0
    xTZSearchHelp( rcStruct, iStartX, iTop, 2, iDist );
528
0
529
0
    if ( iRight <= sr.right ) // check top right
530
0
    {
531
0
      xTZSearchHelp( rcStruct, iRight, iTop, 3, iDist );
532
0
    }
533
0
  } // check top
534
0
  if ( iLeft >= sr.left ) // check middle left
535
0
  {
536
0
    xTZSearchHelp( rcStruct, iLeft, iStartY, 4, iDist );
537
0
  }
538
0
  if ( iRight <= sr.right ) // check middle right
539
0
  {
540
0
    xTZSearchHelp( rcStruct, iRight, iStartY, 5, iDist );
541
0
  }
542
0
  if ( iBottom <= sr.bottom ) // check bottom
543
0
  {
544
0
    if ( iLeft >= sr.left ) // check bottom left
545
0
    {
546
0
      xTZSearchHelp( rcStruct, iLeft, iBottom, 6, iDist );
547
0
    }
548
0
    // check bottom middle
549
0
    xTZSearchHelp( rcStruct, iStartX, iBottom, 7, iDist );
550
0
551
0
    if ( iRight <= sr.right ) // check bottom right
552
0
    {
553
0
      xTZSearchHelp( rcStruct, iRight, iBottom, 8, iDist );
554
0
    }
555
0
  } // check bottom
556
0
}
557
558
inline void InterSearch::xTZ8PointDiamondSearch( TZSearchStruct& rcStruct,
559
                                                 const int iStartX,
560
                                                 const int iStartY,
561
                                                 const int iDist,
562
                                                 const bool bCheckCornersAtDist1 )
563
0
{
564
0
  const SearchRange& sr = rcStruct.searchRange;
565
  // 8 point search,                   //   1 2 3
566
  // search around the start point     //   4 0 5
567
  // with the required  distance       //   6 7 8
568
0
  CHECK( iDist == 0, "Invalid distance" );
569
0
  const int iTop        = iStartY - iDist;
570
0
  const int iBottom     = iStartY + iDist;
571
0
  const int iLeft       = iStartX - iDist;
572
0
  const int iRight      = iStartX + iDist;
573
0
  rcStruct.uiBestRound += 1;
574
575
0
  if ( iDist == 1 )
576
0
  {
577
0
    if ( iTop >= sr.top ) // check top
578
0
    {
579
0
      if (bCheckCornersAtDist1)
580
0
      {
581
0
        if ( iLeft >= sr.left) // check top-left
582
0
        {
583
0
          xTZSearchHelp( rcStruct, iLeft, iTop, 1, iDist );
584
0
        }
585
0
        xTZSearchHelp( rcStruct, iStartX, iTop, 2, iDist );
586
0
        if ( iRight <= sr.right ) // check middle right
587
0
        {
588
0
          xTZSearchHelp( rcStruct, iRight, iTop, 3, iDist );
589
0
        }
590
0
      }
591
0
      else
592
0
      {
593
0
        xTZSearchHelp( rcStruct, iStartX, iTop, 2, iDist );
594
0
      }
595
0
    }
596
0
    if ( iLeft >= sr.left ) // check middle left
597
0
    {
598
0
      xTZSearchHelp( rcStruct, iLeft, iStartY, 4, iDist );
599
0
    }
600
0
    if ( iRight <= sr.right ) // check middle right
601
0
    {
602
0
      xTZSearchHelp( rcStruct, iRight, iStartY, 5, iDist );
603
0
    }
604
0
    if ( iBottom <= sr.bottom ) // check bottom
605
0
    {
606
0
      if (bCheckCornersAtDist1)
607
0
      {
608
0
        if ( iLeft >= sr.left) // check top-left
609
0
        {
610
0
          xTZSearchHelp( rcStruct, iLeft, iBottom, 6, iDist );
611
0
        }
612
0
        xTZSearchHelp( rcStruct, iStartX, iBottom, 7, iDist );
613
0
        if ( iRight <= sr.right ) // check middle right
614
0
        {
615
0
          xTZSearchHelp( rcStruct, iRight, iBottom, 8, iDist );
616
0
        }
617
0
      }
618
0
      else
619
0
      {
620
0
        xTZSearchHelp( rcStruct, iStartX, iBottom, 7, iDist );
621
0
      }
622
0
    }
623
0
  }
624
0
  else
625
0
  {
626
0
    if ( iDist <= 8 )
627
0
    {
628
0
      const int iTop_2      = iStartY - (iDist>>1);
629
0
      const int iBottom_2   = iStartY + (iDist>>1);
630
0
      const int iLeft_2     = iStartX - (iDist>>1);
631
0
      const int iRight_2    = iStartX + (iDist>>1);
632
633
0
      if (  iTop >= sr.top && iLeft >= sr.left &&
634
0
           iRight <= sr.right && iBottom <= sr.bottom ) // check border
635
0
      {
636
0
        xTZSearchHelp( rcStruct, iStartX,  iTop,      2, iDist    );
637
0
        xTZSearchHelp( rcStruct, iLeft_2,  iTop_2,    1, iDist>>1 );
638
0
        xTZSearchHelp( rcStruct, iRight_2, iTop_2,    3, iDist>>1 );
639
0
        xTZSearchHelp( rcStruct, iLeft,    iStartY,   4, iDist    );
640
0
        xTZSearchHelp( rcStruct, iRight,   iStartY,   5, iDist    );
641
0
        xTZSearchHelp( rcStruct, iLeft_2,  iBottom_2, 6, iDist>>1 );
642
0
        xTZSearchHelp( rcStruct, iRight_2, iBottom_2, 8, iDist>>1 );
643
0
        xTZSearchHelp( rcStruct, iStartX,  iBottom,   7, iDist    );
644
0
      }
645
0
      else // check border
646
0
      {
647
0
        if ( iTop >= sr.top ) // check top
648
0
        {
649
0
          xTZSearchHelp( rcStruct, iStartX, iTop, 2, iDist );
650
0
        }
651
0
        if ( iTop_2 >= sr.top ) // check half top
652
0
        {
653
0
          if ( iLeft_2 >= sr.left ) // check half left
654
0
          {
655
0
            xTZSearchHelp( rcStruct, iLeft_2, iTop_2, 1, (iDist>>1) );
656
0
          }
657
0
          if ( iRight_2 <= sr.right ) // check half right
658
0
          {
659
0
            xTZSearchHelp( rcStruct, iRight_2, iTop_2, 3, (iDist>>1) );
660
0
          }
661
0
        } // check half top
662
0
        if ( iLeft >= sr.left ) // check left
663
0
        {
664
0
          xTZSearchHelp( rcStruct, iLeft, iStartY, 4, iDist );
665
0
        }
666
0
        if ( iRight <= sr.right ) // check right
667
0
        {
668
0
          xTZSearchHelp( rcStruct, iRight, iStartY, 5, iDist );
669
0
        }
670
0
        if ( iBottom_2 <= sr.bottom ) // check half bottom
671
0
        {
672
0
          if ( iLeft_2 >= sr.left ) // check half left
673
0
          {
674
0
            xTZSearchHelp( rcStruct, iLeft_2, iBottom_2, 6, (iDist>>1) );
675
0
          }
676
0
          if ( iRight_2 <= sr.right ) // check half right
677
0
          {
678
0
            xTZSearchHelp( rcStruct, iRight_2, iBottom_2, 8, (iDist>>1) );
679
0
          }
680
0
        } // check half bottom
681
0
        if ( iBottom <= sr.bottom ) // check bottom
682
0
        {
683
0
          xTZSearchHelp( rcStruct, iStartX, iBottom, 7, iDist );
684
0
        }
685
0
      } // check border
686
0
    }
687
0
    else // iDist > 8
688
0
    {
689
0
      if ( iTop >= sr.top && iLeft >= sr.left &&
690
0
           iRight <= sr.right && iBottom <= sr.bottom ) // check border
691
0
      {
692
0
        xTZSearchHelp( rcStruct, iStartX, iTop,    0, iDist );
693
0
        xTZSearchHelp( rcStruct, iLeft,   iStartY, 0, iDist );
694
0
        xTZSearchHelp( rcStruct, iRight,  iStartY, 0, iDist );
695
0
        xTZSearchHelp( rcStruct, iStartX, iBottom, 0, iDist );
696
0
        for ( int index = 1; index < 4; index++ )
697
0
        {
698
0
          const int iPosYT = iTop    + ((iDist>>2) * index);
699
0
          const int iPosYB = iBottom - ((iDist>>2) * index);
700
0
          const int iPosXL = iStartX - ((iDist>>2) * index);
701
0
          const int iPosXR = iStartX + ((iDist>>2) * index);
702
0
          xTZSearchHelp( rcStruct, iPosXL, iPosYT, 0, iDist );
703
0
          xTZSearchHelp( rcStruct, iPosXR, iPosYT, 0, iDist );
704
0
          xTZSearchHelp( rcStruct, iPosXL, iPosYB, 0, iDist );
705
0
          xTZSearchHelp( rcStruct, iPosXR, iPosYB, 0, iDist );
706
0
        }
707
0
      }
708
0
      else // check border
709
0
      {
710
0
        if ( iTop >= sr.top ) // check top
711
0
        {
712
0
          xTZSearchHelp( rcStruct, iStartX, iTop, 0, iDist );
713
0
        }
714
0
        if ( iLeft >= sr.left ) // check left
715
0
        {
716
0
          xTZSearchHelp( rcStruct, iLeft, iStartY, 0, iDist );
717
0
        }
718
0
        if ( iRight <= sr.right ) // check right
719
0
        {
720
0
          xTZSearchHelp( rcStruct, iRight, iStartY, 0, iDist );
721
0
        }
722
0
        if ( iBottom <= sr.bottom ) // check bottom
723
0
        {
724
0
          xTZSearchHelp( rcStruct, iStartX, iBottom, 0, iDist );
725
0
        }
726
0
        for ( int index = 1; index < 4; index++ )
727
0
        {
728
0
          const int iPosYT = iTop    + ((iDist>>2) * index);
729
0
          const int iPosYB = iBottom - ((iDist>>2) * index);
730
0
          const int iPosXL = iStartX - ((iDist>>2) * index);
731
0
          const int iPosXR = iStartX + ((iDist>>2) * index);
732
733
0
          if ( iPosYT >= sr.top ) // check top
734
0
          {
735
0
            if ( iPosXL >= sr.left ) // check left
736
0
            {
737
0
              xTZSearchHelp( rcStruct, iPosXL, iPosYT, 0, iDist );
738
0
            }
739
0
            if ( iPosXR <= sr.right ) // check right
740
0
            {
741
0
              xTZSearchHelp( rcStruct, iPosXR, iPosYT, 0, iDist );
742
0
            }
743
0
          } // check top
744
0
          if ( iPosYB <= sr.bottom ) // check bottom
745
0
          {
746
0
            if ( iPosXL >= sr.left ) // check left
747
0
            {
748
0
              xTZSearchHelp( rcStruct, iPosXL, iPosYB, 0, iDist );
749
0
            }
750
0
            if ( iPosXR <= sr.right ) // check right
751
0
            {
752
0
              xTZSearchHelp( rcStruct, iPosXR, iPosYB, 0, iDist );
753
0
            }
754
0
          } // check bottom
755
0
        } // for ...
756
0
      } // check border
757
0
    } // iDist <= 8
758
0
  } // iDist == 1
759
0
}
760
761
Distortion InterSearch::xPatternRefinement( const CPelBuf* pcPatternKey,
762
                                            Mv baseRefMv,
763
                                            int iFrac, Mv& rcMvFrac,
764
                                            Distortion& uiDistBest,
765
                                            int& patternId,
766
                                            CPelBuf* pattern,
767
                                            bool useAltHpelIf )
768
0
{
769
0
  Distortion  uiDist;
770
0
  uiDistBest = m_pcEncCfg->m_fastSubPel == 1 ? uiDistBest : MAX_DISTORTION;
771
0
  uint32_t        uiDirecBest = 0;
772
0
  const int reduceTap = m_pcEncCfg->m_meReduceTap;
773
774
0
  Pel*  piRefPos;
775
0
  int iRefStride = pcPatternKey->width + 1;
776
0
  m_pcRdCost->setDistParam( m_cDistParam, *pcPatternKey, m_filteredBlock[0][0][0], iRefStride, m_lumaClpRng.bd, COMP_Y, 0, m_pcEncCfg->m_bUseHADME ? ( m_pcEncCfg->m_fastHad ? 2 : 1 ) : 0 );
777
778
0
  const ClpRng& clpRng = m_lumaClpRng;
779
0
  int width = pattern->width;
780
0
  int height = pattern->height;
781
0
  int srcStride = pattern->stride;
782
783
0
  int intStride = width + 1;
784
0
  int dstStride = width + 1;
785
0
  Pel* intPtr;
786
0
  Pel* dstPtr;
787
0
  int filterSize     = useAltHpelIf ? ( reduceTap >= 1 ? NTAPS_AFFINE : NTAPS_LUMA )
788
0
                                    : ( reduceTap == 1 ? NTAPS_AFFINE
789
0
                                                       : ( reduceTap == 0 ? NTAPS_LUMA : NTAPS_CHROMA ) );
790
0
  int halfFilterSize = ( filterSize >> 1 );
791
0
  const Pel* srcPtr  = pattern->buf - halfFilterSize*srcStride - 1;
792
793
0
  const ChromaFormat chFmt = m_currChromaFormat;
794
795
0
  Distortion distH[ 9 ] = { uiDistBest, uiDistBest, uiDistBest, uiDistBest, uiDistBest, uiDistBest, uiDistBest, uiDistBest, uiDistBest };
796
0
  const int TH = 17, TL = 15, shift = 4;
797
798
0
  const Mv* pcMvRefine = (iFrac == 2 ? s_acMvRefineH : s_acMvRefineQ);
799
0
  for (uint32_t i = 0; i < 9; i++)
800
0
  {
801
0
    if( m_pcEncCfg->m_fastSubPel == 1 )
802
0
    {
803
0
      if( s_skipQpelPosition[ patternId ][ i ] )
804
0
      {
805
0
        continue;
806
0
      }
807
808
0
      if( 2 == iFrac )
809
0
      {
810
0
        if ( ( 5 == i && 0 == uiDirecBest ) || ( 7 == i && 1 == uiDirecBest ) || ( 8 == i && ( 1 == uiDirecBest || 3 == uiDirecBest || 5 == uiDirecBest ) ) )
811
0
        {
812
0
          break;
813
0
        }
814
815
0
        if( 0 == i )
816
0
        {
817
          // split the prediction with funny widths into power-of-2 and +1 parts for the sake of SIMD speed-up
818
0
          m_if.filterHor( COMP_Y, srcPtr, srcStride, m_filteredBlockTmp[ 0 ][ 0 ], intStride, width, height + filterSize, 0 << MV_FRACTIONAL_BITS_DIFF, false, chFmt, clpRng, useAltHpelIf, 0, reduceTap );
819
0
          m_if.filterHor( COMP_Y, srcPtr + width, srcStride, m_filteredBlockTmp[ 0 ][ 0 ] + width, intStride, 1, height + filterSize, 0 << MV_FRACTIONAL_BITS_DIFF, false, chFmt, clpRng, useAltHpelIf, 0, reduceTap );
820
821
          // split the prediction with funny widths into power-of-2 and +1 parts for the sake of SIMD speed-up
822
0
          m_if.filterHor( COMP_Y, srcPtr, srcStride, m_filteredBlockTmp[ 2 ][ 0 ], intStride, width, height + filterSize, 2 << MV_FRACTIONAL_BITS_DIFF, false, chFmt, clpRng, useAltHpelIf, 0, reduceTap );
823
0
          m_if.filterHor( COMP_Y, srcPtr + width, srcStride, m_filteredBlockTmp[ 2 ][ 0 ] + width, intStride, 1, height + filterSize, 2 << MV_FRACTIONAL_BITS_DIFF, false, chFmt, clpRng, useAltHpelIf, 0, reduceTap );
824
825
0
          intPtr = m_filteredBlockTmp[ 0 ][ 0 ] + halfFilterSize * intStride + 1;
826
0
          dstPtr = m_filteredBlock[ 0 ][ 0 ][ 0 ];
827
0
          m_if.filterVer( COMP_Y, intPtr, intStride, dstPtr, dstStride, width + 0, height + 0, 0 << MV_FRACTIONAL_BITS_DIFF, false, true, chFmt, clpRng, useAltHpelIf, 0, reduceTap );
828
0
        }
829
0
        else if( 1 == i )
830
0
        {
831
0
          intPtr = m_filteredBlockTmp[ 0 ][ 0 ] + ( halfFilterSize - 1 ) * intStride + 1;
832
0
          dstPtr = m_filteredBlock[ 2 ][ 0 ][ 0 ];
833
0
          m_if.filterVer( COMP_Y, intPtr, intStride, dstPtr, dstStride, width + 0, height + 1, 2 << MV_FRACTIONAL_BITS_DIFF, false, true, chFmt, clpRng, useAltHpelIf, 0, reduceTap );
834
0
        }
835
0
        else if( 3 == i )
836
0
        {
837
0
          intPtr = m_filteredBlockTmp[ 2 ][ 0 ] + halfFilterSize * intStride;
838
0
          dstPtr = m_filteredBlock[ 0 ][ 2 ][ 0 ];
839
          // split the prediction with funny widths into power-of-2 and +1 parts for the sake of SIMD speed-up
840
0
          m_if.filterVer( COMP_Y, intPtr, intStride, dstPtr, dstStride, width, height + 0, 0 << MV_FRACTIONAL_BITS_DIFF, false, true, chFmt, clpRng, useAltHpelIf, 0, reduceTap );
841
0
          m_if.filterVer( COMP_Y, intPtr + width, intStride, dstPtr + width, dstStride, 1, height + 0, 0 << MV_FRACTIONAL_BITS_DIFF, false, true, chFmt, clpRng, useAltHpelIf, 0, reduceTap );
842
0
        }
843
0
        else if( 5 == i )
844
0
        {
845
0
          intPtr = m_filteredBlockTmp[ 2 ][ 0 ] + ( halfFilterSize - 1 ) * intStride;
846
0
          dstPtr = m_filteredBlock[ 2 ][ 2 ][ 0 ];
847
          // split the prediction with funny widths into power-of-2 and +1 parts for the sake of SIMD speed-up
848
0
          m_if.filterVer( COMP_Y, intPtr, intStride, dstPtr, dstStride, width, height + 1, 2 << MV_FRACTIONAL_BITS_DIFF, false, true, chFmt, clpRng, useAltHpelIf, 0, reduceTap );
849
0
          m_if.filterVer( COMP_Y, intPtr + width, intStride, dstPtr + width, dstStride, 1, height + 1, 2 << MV_FRACTIONAL_BITS_DIFF, false, true, chFmt, clpRng, useAltHpelIf, 0, reduceTap );
850
0
        }
851
0
      }
852
0
    }
853
0
    Mv cMvTest = pcMvRefine[ i ];
854
0
    cMvTest += baseRefMv;
855
856
0
    int horVal = cMvTest.hor * iFrac;
857
0
    int verVal = cMvTest.ver * iFrac;
858
0
    piRefPos = m_filteredBlock[verVal & 3][horVal & 3][0];
859
860
0
    if ( horVal == 2 && ( verVal & 1 ) == 0 )
861
0
    {
862
0
      piRefPos += 1;
863
0
    }
864
0
    if ( ( horVal & 1 ) == 0 && verVal == 2 )
865
0
    {
866
0
      piRefPos += iRefStride;
867
0
    }
868
0
    cMvTest = pcMvRefine[i];
869
0
    cMvTest += rcMvFrac;
870
871
872
0
    m_cDistParam.cur.buf   = piRefPos;
873
0
    uiDist = m_cDistParam.distFunc( m_cDistParam );
874
0
    uiDist += m_pcRdCost->getCostOfVectorWithPredictor( cMvTest.hor, cMvTest.ver, 0 );
875
876
0
    distH[ i ] = uiDist;
877
0
    if ( uiDist < uiDistBest )
878
0
    {
879
0
      uiDistBest  = uiDist;
880
0
      uiDirecBest = i;
881
0
      m_cDistParam.maximumDistortionForEarlyExit = uiDist;
882
0
    }
883
0
  }
884
885
0
  rcMvFrac = pcMvRefine[uiDirecBest];
886
887
0
  if( m_pcEncCfg->m_fastSubPel == 1 && iFrac == 2 )
888
0
  {
889
0
    switch ( uiDirecBest )
890
0
    {
891
0
    case 0:
892
      // hor
893
0
      distH[ 3 ] <<= shift;
894
0
      patternId += ( distH[ 3 ] > TH * distH[ 4 ] ? 2 : ( distH[ 3 ] < TL * distH[ 4 ] ? 1 : 0 ) );
895
      // ver
896
0
      distH[ 1 ] <<= shift;
897
0
      patternId += ( distH[ 1 ] > TH * distH[ 2 ] ? 6 : ( distH[ 1 ] < TL * distH[ 2 ] ? 3 : 0 ) );
898
0
      break;
899
0
    case 1:
900
      // hor
901
0
      distH[ 5 ] <<= shift;
902
0
      patternId += ( distH[ 5 ] > TH * distH[ 6 ] ? 4 : ( distH[ 5 ] < TL * distH[ 6 ] ? 2 : 0 ) );
903
      // ver
904
0
      patternId += ( distH[ 2 ] - distH[ 0 ] > distH[ 0 ] - distH[ 1 ] ? 1 : 0 );
905
906
0
      patternId += ( 41 == patternId ? 0 : 8 );
907
0
      break;
908
0
    case 2:
909
      // hor
910
0
      distH[ 7 ] <<= shift;
911
0
      patternId += ( distH[ 7 ] > TH * distH[ 8 ] ? 4 : ( distH[ 7 ] < TL * distH[ 8 ] ? 2 : 0 ) );
912
      // ver
913
0
      patternId += ( distH[ 1 ] - distH[ 0 ] > distH[ 0 ] - distH[ 2 ] ? 1 : 0 );
914
915
0
      patternId += ( 41 == patternId ? 0 : 13 );
916
0
      break;
917
0
    case 3:
918
      // hor
919
0
      patternId += ( distH[ 4 ] - distH[ 0 ] > distH[ 0 ] - distH[ 3 ] ? 1 : 0 );
920
      // ver
921
0
      distH[ 5 ] <<= shift;
922
0
      patternId += ( distH[ 5 ] > TH * distH[ 7 ] ? 4 : ( distH[ 5 ] < TL * distH[ 7 ] ? 2 : 0 ) );
923
924
0
      patternId += ( 41 == patternId ? 0 : 18 );
925
0
      break;
926
0
    case 4:
927
      // hor
928
0
      patternId += ( distH[ 3 ] - distH[ 0 ] > distH[ 0 ] - distH[ 4 ] ? 1 : 0 );
929
      // ver
930
0
      distH[ 6 ] <<= shift;
931
0
      patternId += ( distH[ 6 ] > TH * distH[ 8 ] ? 4 : ( distH[ 6 ] < TL * distH[ 8 ] ? 2 : 0 ) );
932
933
0
      patternId += ( 41 == patternId ? 0 : 23 );
934
0
      break;
935
0
    case 5:
936
      // hor
937
0
      patternId += ( distH[ 6 ] - distH[ 1 ] > distH[ 1 ] - distH[ 5 ] ? 1 : 0 );
938
      // ver
939
0
      patternId += ( distH[ 7 ] - distH[ 3 ] > distH[ 3 ] - distH[ 5 ] ? 2 : 0 );
940
941
0
      patternId += ( 41 == patternId ? 0 : 28 );
942
0
      break;
943
0
    case 6:
944
      // hor
945
0
      patternId += ( distH[ 5 ] - distH[ 1 ] > distH[ 1 ] - distH[ 6 ] ? 1 : 0 );
946
      // ver
947
0
      patternId += ( distH[ 8 ] - distH[ 4 ] > distH[ 4 ] - distH[ 6 ] ? 2 : 0 );
948
949
0
      patternId += ( 41 == patternId ? 0 : 31 );
950
0
      break;
951
0
    case 7:
952
      // hor
953
0
      patternId += ( distH[ 8 ] - distH[ 2 ] > distH[ 2 ] - distH[ 7 ] ? 1 : 0 );
954
      // ver
955
0
      patternId += ( distH[ 5 ] - distH[ 3 ] > distH[ 3 ] - distH[ 7 ] ? 2 : 0 );
956
957
0
      patternId += ( 41 == patternId ? 0 : 34 );
958
0
      break;
959
0
    case 8:
960
      // hor
961
0
      patternId += ( distH[ 7 ] - distH[ 2 ] > distH[ 2 ] - distH[ 8 ] ? 1 : 0 );
962
      // ver
963
0
      patternId += ( distH[ 6 ] - distH[ 4 ] > distH[ 4 ] - distH[ 8 ] ? 2 : 0 );
964
965
0
      patternId += ( 41 == patternId ? 0 : 37 );
966
0
      break;
967
0
    default:
968
0
      break;
969
0
    }
970
0
  }
971
972
0
  return uiDistBest;
973
0
}
974
975
//! search of the best candidate for inter prediction
976
bool InterSearch::predInterSearch(CodingUnit& cu, Partitioner& partitioner, double& bestCostInter)
977
0
{
978
0
  PROFILER_SCOPE_AND_STAGE_EXT( 1, _TPROF, P_INTER_MVD_SEARCH, cu.cs, partitioner.chType );
979
0
  CodingStructure& cs = *cu.cs;
980
981
0
  AMVPInfo     amvp[2];
982
0
  Mv           cMvSrchRngLT;
983
0
  Mv           cMvSrchRngRB;
984
0
  Mv           cMvZero;
985
0
  Mv           cMv[2];
986
0
  Mv           cMvBi[2];
987
0
  Mv           cMvTemp[2][MAX_REF_PICS];
988
0
  Mv           cMvHevcTemp[2][MAX_REF_PICS];
989
0
  int          iNumPredDir = cs.slice->isInterP() ? 1 : 2;
990
991
0
  Mv           cMvPred[2][MAX_REF_PICS];
992
993
0
  Mv           cMvPredBi[2][MAX_REF_PICS];
994
0
  int          aaiMvpIdxBi[2][MAX_REF_PICS];
995
996
0
  int          aaiMvpIdx[2][MAX_REF_PICS];
997
0
  int          aaiMvpNum[2][MAX_REF_PICS];
998
999
0
  AMVPInfo     aacAMVPInfo[2][MAX_REF_PICS];
1000
1001
0
  int          iRefIdx[2]={0,0}; //If un-initialized, may cause SEGV in bi-directional prediction iterative stage.
1002
0
  int          iRefIdxBi[2] = { -1, -1 };
1003
1004
0
  uint32_t     uiMbBits[3] = {1, 1, 0};
1005
1006
0
  uint32_t     uiLastMode = 0;
1007
0
  int          iRefStart, iRefEnd;
1008
1009
0
  int          symMode = 0;
1010
1011
0
  int          bestBiPRefIdxL1 = 0;
1012
0
  int          bestBiPMvpL1    = 0;
1013
0
  Distortion   biPDistTemp     = MAX_DISTORTION;
1014
1015
0
  uint8_t      BcwIdx          = (cu.cs->slice->isInterB() ? cu.BcwIdx : BCW_DEFAULT);
1016
0
  bool         enforceBcwPred = false;
1017
1018
  // Loop over Prediction Units
1019
0
  uint32_t     puIdx = 0;
1020
0
  uint32_t     uiLastModeTemp = 0;
1021
0
  Distortion   uiAffineCost = MAX_DISTORTION;
1022
0
  Distortion   uiHevcCost = MAX_DISTORTION;
1023
0
  bool checkAffine = (cu.imv == IMV_OFF);
1024
0
  if (cu.cs->bestParent != nullptr && cu.cs->bestParent->getCU(CH_L,TREE_D) != nullptr && cu.cs->bestParent->getCU(CH_L,TREE_D)->affine == false)
1025
0
  {
1026
0
    m_skipPROF = true;
1027
0
  }
1028
1029
0
  m_encOnly = true;
1030
0
  {
1031
0
    CU::spanMotionInfo( cu );
1032
0
    Distortion   uiCost[2] = { MAX_DISTORTION, MAX_DISTORTION };
1033
0
    Distortion   uiCostBi  =   MAX_DISTORTION;
1034
0
    Distortion   uiCostTemp;
1035
1036
0
    uint32_t         uiBits[3];
1037
0
    uint32_t         uiBitsTemp;
1038
0
    Distortion   bestBiPDist = MAX_DISTORTION;
1039
1040
0
    Distortion   uiCostTempL0[MAX_NUM_REF];
1041
0
    for (int iNumRef=0; iNumRef < MAX_NUM_REF; iNumRef++)
1042
0
    {
1043
0
      uiCostTempL0[iNumRef] = MAX_DISTORTION;
1044
0
    }
1045
0
    uint32_t         uiBitsTempL0[MAX_NUM_REF];
1046
1047
0
    Mv           mvValidList1;
1048
0
    int          refIdxValidList1 = 0;
1049
0
    uint32_t         bitsValidList1   = MAX_UINT;
1050
0
    Distortion   costValidList1   = MAX_DISTORTION;
1051
1052
0
    CPelUnitBuf origBuf = cu.cs->getOrgBuf( cu );
1053
1054
0
    xGetBlkBits( cs.slice->isInterP(), puIdx, uiLastMode, uiMbBits );
1055
1056
0
    m_pcRdCost->selectMotionLambda();
1057
1058
0
    unsigned imvShift = cu.imv == IMV_HPEL ? 1 : (cu.imv << 1);
1059
1060
    //  Uni-directional prediction
1061
0
    for ( int iRefList = 0; iRefList < iNumPredDir; iRefList++ )
1062
0
    {
1063
0
      RefPicList  refPicList = ( iRefList ? REF_PIC_LIST_1 : REF_PIC_LIST_0 );
1064
0
      for (int iRefIdxTemp = 0; iRefIdxTemp < cs.slice->numRefIdx[ refPicList ]; iRefIdxTemp++)
1065
0
      {
1066
0
        uiBitsTemp = uiMbBits[iRefList];
1067
0
        if ( cs.slice->numRefIdx[ refPicList ] > 1 )
1068
0
        {
1069
0
          uiBitsTemp += iRefIdxTemp+1;
1070
0
          if ( iRefIdxTemp == cs.slice->numRefIdx[ refPicList ]-1 )
1071
0
          {
1072
0
            uiBitsTemp--;
1073
0
          }
1074
0
        }
1075
0
        xEstimateMvPredAMVP( cu, origBuf, refPicList, iRefIdxTemp, cMvPred[iRefList][iRefIdxTemp], amvp[refPicList], biPDistTemp);
1076
1077
0
        aaiMvpIdx[iRefList][iRefIdxTemp] = cu.mvpIdx[refPicList];
1078
0
        aaiMvpNum[iRefList][iRefIdxTemp] = cu.mvpNum[refPicList];
1079
1080
0
        if(cs.picHeader->mvdL1Zero && iRefList==1 && biPDistTemp < bestBiPDist)
1081
0
        {
1082
0
          bestBiPDist = biPDistTemp;
1083
0
          bestBiPMvpL1 = aaiMvpIdx[iRefList][iRefIdxTemp];
1084
0
          bestBiPRefIdxL1 = iRefIdxTemp;
1085
0
        }
1086
1087
0
        uiBitsTemp += m_auiMVPIdxCost[aaiMvpIdx[iRefList][iRefIdxTemp]][AMVP_MAX_NUM_CANDS];
1088
1089
0
        if ( m_pcEncCfg->m_bFastMEForGenBLowDelayEnabled && iRefList == 1 )    // list 1
1090
0
        {
1091
0
          if ( cs.slice->list1IdxToList0Idx[ iRefIdxTemp ] >= 0 )
1092
0
          {
1093
0
            cMvTemp[1][iRefIdxTemp] = cMvTemp[0][cs.slice->list1IdxToList0Idx[iRefIdxTemp ]];
1094
0
            uiCostTemp = uiCostTempL0[cs.slice->list1IdxToList0Idx[ iRefIdxTemp ]];
1095
            /*first subtract the bit-rate part of the cost of the other list*/
1096
0
            uiCostTemp -= m_pcRdCost->getCost( uiBitsTempL0[cs.slice->list1IdxToList0Idx[ iRefIdxTemp ]] );
1097
            /*correct the bit-rate part of the current ref*/
1098
0
            m_pcRdCost->setPredictor  ( cMvPred[iRefList][iRefIdxTemp] );
1099
0
            uiBitsTemp += m_pcRdCost->getBitsOfVectorWithPredictor( cMvTemp[1][iRefIdxTemp].hor, cMvTemp[1][iRefIdxTemp].ver, imvShift + MV_FRACTIONAL_BITS_DIFF );
1100
            /*calculate the correct cost*/
1101
0
            uiCostTemp += m_pcRdCost->getCost( uiBitsTemp );
1102
0
          }
1103
0
          else
1104
0
          {
1105
0
            xMotionEstimation( cu, origBuf, refPicList, cMvPred[iRefList][iRefIdxTemp], iRefIdxTemp, cMvTemp[iRefList][iRefIdxTemp], aaiMvpIdx[iRefList][iRefIdxTemp], uiBitsTemp, uiCostTemp, amvp[refPicList] );
1106
0
          }
1107
0
        }
1108
0
        else
1109
0
        {
1110
0
          xMotionEstimation( cu, origBuf, refPicList, cMvPred[iRefList][iRefIdxTemp], iRefIdxTemp, cMvTemp[iRefList][iRefIdxTemp], aaiMvpIdx[iRefList][iRefIdxTemp], uiBitsTemp, uiCostTemp, amvp[refPicList] );
1111
0
        }
1112
          
1113
0
        if( cs.slice->sps->BCW && cu.BcwIdx == BCW_DEFAULT && cs.slice->isInterB() )
1114
0
        {
1115
0
          m_uniMotions.setReadMode( true, (uint32_t)iRefList, (uint32_t)iRefIdxTemp) ;
1116
0
          m_uniMotions.copyFrom( cMvTemp[iRefList][iRefIdxTemp], uiCostTemp - m_pcRdCost->getCost(uiBitsTemp), (uint32_t)iRefList, (uint32_t)iRefIdxTemp );
1117
0
        }
1118
1119
0
        xCopyAMVPInfo( &amvp[refPicList], &aacAMVPInfo[iRefList][iRefIdxTemp]); // must always be done ( also when AMVP_MODE = AM_NONE )
1120
0
        xCheckBestMVP( refPicList, cMvTemp[iRefList][iRefIdxTemp], cMvPred[iRefList][iRefIdxTemp], aaiMvpIdx[iRefList][iRefIdxTemp], amvp[refPicList], uiBitsTemp, uiCostTemp, cu.imv );
1121
1122
0
        if ( iRefList == 0 )
1123
0
        {
1124
0
          uiCostTempL0[iRefIdxTemp] = uiCostTemp;
1125
0
          uiBitsTempL0[iRefIdxTemp] = uiBitsTemp;
1126
0
        }
1127
0
        if ( uiCostTemp < uiCost[iRefList] )
1128
0
        {
1129
0
          uiCost[iRefList] = uiCostTemp;
1130
0
          uiBits[iRefList] = uiBitsTemp; // storing for bi-prediction
1131
1132
          // set motion
1133
0
          cMv    [iRefList] = cMvTemp[iRefList][iRefIdxTemp];
1134
0
          iRefIdx[iRefList] = iRefIdxTemp;
1135
0
        }
1136
1137
0
        if ( iRefList == 1 && uiCostTemp < costValidList1 && cs.slice->list1IdxToList0Idx[ iRefIdxTemp ] < 0 )
1138
0
        {
1139
0
          costValidList1 = uiCostTemp;
1140
0
          bitsValidList1 = uiBitsTemp;
1141
1142
          // set motion
1143
0
          mvValidList1     = cMvTemp[iRefList][iRefIdxTemp];
1144
0
          refIdxValidList1 = iRefIdxTemp;
1145
0
        }
1146
0
      }
1147
0
    }
1148
1149
0
    ::memcpy(cMvHevcTemp, cMvTemp, sizeof(cMvTemp));
1150
0
    if (cu.imv == IMV_OFF && (!cu.slice->sps->BCW || BcwIdx == BCW_DEFAULT))
1151
0
    {
1152
0
      m_BlkUniMvInfoBuffer->insertUniMvCands(cu.Y(), &cMvTemp[0][0]);
1153
1154
0
      unsigned idx1, idx2, idx3, idx4;
1155
0
      getAreaIdxNew(cu.Y(), *cs.pcv, idx1, idx2, idx3, idx4);
1156
0
      if( ! m_ReuseUniMv->m_reusedUniMVs[idx1][idx2][idx3][idx4] )
1157
0
      {
1158
0
        m_ReuseUniMv->m_reusedUniMVs[idx1][idx2][idx3][idx4] = new Mv[ 2 * MAX_REF_PICS ];
1159
//          DTRACE( g_trace_ctx, D_TMP, "%d unimv first reuse %d %d %d %d \n", g_trace_ctx->getChannelCounter(D_TMP), idx3,idx4,idx1,idx2 );
1160
0
      }
1161
0
      ::memcpy(m_ReuseUniMv->m_reusedUniMVs[idx1][idx2][idx3][idx4], cMvTemp, 2 * MAX_REF_PICS * sizeof(Mv));
1162
0
    }
1163
0
    if (bestCostInter != MAX_DOUBLE)
1164
0
    {
1165
0
      int L = (cu.slice->TLayer <= 2) ? 0 : (cu.slice->TLayer - 2);
1166
0
      double besCostMerge = bestCostInter;
1167
0
      bestCostInter = (uiCost[0] < uiCost[1]) ? uiCost[0] : uiCost[1];
1168
0
      if ((cu.slice->TLayer > (m_pcEncCfg->m_maxTLayer - (m_pcEncCfg->m_FastInferMerge & 7))) && bestCostInter > MRG_FAST_RATIOMYV[L] * besCostMerge)
1169
0
      {
1170
0
        m_skipPROF = false;
1171
0
        m_encOnly = false;
1172
0
        return true;
1173
0
      }
1174
0
    }
1175
    //  Bi-predictive Motion estimation
1176
0
    if( cs.slice->isInterB() && !CU::isBipredRestriction( cu ) && (cu.slice->checkLDC || BcwIdx == BCW_DEFAULT  || !m_affineModeSelected || m_pcEncCfg->m_BCW != 2 ) )
1177
0
    {
1178
0
      PROFILER_SCOPE_AND_STAGE_EXT( 1, _TPROF, P_INTER_MVD_SEARCH_B, &cs, partitioner.chType );
1179
0
      bool doBiPred = true;
1180
0
      cMvBi[0] = cMv[0];
1181
0
      cMvBi[1] = cMv[1];
1182
0
      iRefIdxBi[0] = iRefIdx[0];
1183
0
      iRefIdxBi[1] = iRefIdx[1];
1184
1185
0
      ::memcpy( cMvPredBi,   cMvPred,   sizeof( cMvPred   ) );
1186
0
      ::memcpy( aaiMvpIdxBi, aaiMvpIdx, sizeof( aaiMvpIdx ) );
1187
1188
0
      uint32_t uiMotBits[2];
1189
1190
0
      if(cs.picHeader->mvdL1Zero)
1191
0
      {
1192
        // case: no mvd for L1
1193
        // note: mv = mvp + mvd
1194
        // mv for L1 is equal to mvp(L1) and the mvd search is only performed for L0
1195
0
        xCopyAMVPInfo(&aacAMVPInfo[1][bestBiPRefIdxL1], &amvp[REF_PIC_LIST_1]);
1196
0
        aaiMvpIdxBi[1][bestBiPRefIdxL1] = bestBiPMvpL1;
1197
0
        cMvPredBi  [1][bestBiPRefIdxL1] = amvp[REF_PIC_LIST_1].mvCand[bestBiPMvpL1];
1198
0
        if( m_pcEncCfg->m_ifpLines && !CU::isMvInRangeFPP( cu.ly(), cu.lheight(), cMvPredBi[1][bestBiPRefIdxL1].ver, m_pcEncCfg->m_ifpLines, *cu.cs->pcv ) )
1199
0
        {
1200
          // this mvp cannot be used for mv, skip Bi-pred
1201
0
          uiCostBi = std::numeric_limits<Distortion>::max();
1202
0
          doBiPred = false;
1203
0
        }
1204
1205
0
        if( doBiPred )
1206
0
        {
1207
0
          cMvBi[1] = cMvPredBi[1][bestBiPRefIdxL1];
1208
0
          iRefIdxBi[1] = bestBiPRefIdxL1;
1209
0
          cu.mv[REF_PIC_LIST_1][0] = cMvBi[1];
1210
0
          cu.refIdx[REF_PIC_LIST_1] = iRefIdxBi[1];
1211
0
          cu.mvpIdx[REF_PIC_LIST_1] = bestBiPMvpL1;
1212
0
          PelUnitBuf predBufTmp = m_tmpPredStorage[REF_PIC_LIST_1].getCompactBuf( cu );
1213
0
          motionCompensation( cu, predBufTmp, REF_PIC_LIST_1 );
1214
1215
0
          uiMotBits[0] = uiBits[0] - uiMbBits[0];
1216
0
          uiMotBits[1] = uiMbBits[1];
1217
1218
0
          if(cs.slice->numRefIdx[REF_PIC_LIST_1] > 1)
1219
0
          {
1220
0
            uiMotBits[1] += bestBiPRefIdxL1 + 1;
1221
0
            if(bestBiPRefIdxL1 == cs.slice->numRefIdx[REF_PIC_LIST_1] - 1)
1222
0
            {
1223
0
              uiMotBits[1]--;
1224
0
            }
1225
0
          }
1226
1227
0
          uiMotBits[1] += m_auiMVPIdxCost[aaiMvpIdxBi[1][bestBiPRefIdxL1]][AMVP_MAX_NUM_CANDS];
1228
1229
0
          uiBits[2] = uiMbBits[2] + uiMotBits[0] + uiMotBits[1];
1230
1231
0
          cMvTemp[1][bestBiPRefIdxL1] = cMvBi[1];
1232
0
        }
1233
0
      }
1234
0
      else
1235
0
      {
1236
0
        uiMotBits[0] = uiBits[0] - uiMbBits[0];
1237
0
        uiMotBits[1] = uiBits[1] - uiMbBits[1];
1238
0
        uiBits[2] = uiMbBits[2] + uiMotBits[0] + uiMotBits[1];
1239
0
      }
1240
1241
0
      if( doBiPred )
1242
0
      {
1243
        // 4-times iteration (default)
1244
0
        int iNumIter = 4;
1245
1246
        // fast encoder setting: only one iteration
1247
0
        if ( m_pcEncCfg->m_fastInterSearchMode==VVENC_FASTINTERSEARCH_MODE3 || m_pcEncCfg->m_fastInterSearchMode==VVENC_FASTINTERSEARCH_MODE2 || cs.picHeader->mvdL1Zero )
1248
0
        {
1249
0
          iNumIter = 1;
1250
0
        }
1251
1252
0
        enforceBcwPred = (BcwIdx != BCW_DEFAULT);
1253
1254
0
        for ( int iIter = 0; iIter < iNumIter; iIter++ )
1255
0
        {
1256
0
          int         iRefList    = iIter % 2;
1257
1258
0
          if ( m_pcEncCfg->m_fastInterSearchMode==VVENC_FASTINTERSEARCH_MODE3 || m_pcEncCfg->m_fastInterSearchMode==VVENC_FASTINTERSEARCH_MODE2 )
1259
0
          {
1260
0
            if( uiCost[0] <= uiCost[1] )
1261
0
            {
1262
0
              iRefList = 1;
1263
0
            }
1264
0
            else
1265
0
            {
1266
0
              iRefList = 0;
1267
0
            }
1268
0
          }
1269
0
          else if ( iIter == 0 )
1270
0
          {
1271
0
            iRefList = 0;
1272
0
          }
1273
0
          if ( iIter == 0 && !cs.picHeader->mvdL1Zero)
1274
0
          {
1275
0
            cu.mv    [1 - iRefList][0] = cMv    [1 - iRefList];
1276
0
            cu.refIdx[1 - iRefList]    = iRefIdx[1 - iRefList];
1277
1278
0
            PelUnitBuf predBufTmp = m_tmpPredStorage[1 - iRefList].getCompactBuf( cu );
1279
0
            motionCompensation( cu, predBufTmp, RefPicList(1 - iRefList) );
1280
0
          }
1281
1282
0
          RefPicList  refPicList = ( iRefList ? REF_PIC_LIST_1 : REF_PIC_LIST_0 );
1283
1284
0
          if(cs.picHeader->mvdL1Zero)
1285
0
          {
1286
0
            iRefList = 0;
1287
0
            refPicList = REF_PIC_LIST_0;
1288
0
          }
1289
1290
0
          bool bChanged = false;
1291
1292
0
          iRefStart = 0;
1293
0
          iRefEnd   = cs.slice->numRefIdx[ refPicList ]-1;
1294
0
          for (int iRefIdxTemp = iRefStart; iRefIdxTemp <= iRefEnd; iRefIdxTemp++)
1295
0
          {
1296
0
            uiBitsTemp = uiMbBits[2] + uiMotBits[1-iRefList];
1297
0
            uiBitsTemp += ( (cs.slice->sps->BCW == true) ? getWeightIdxBits(BcwIdx) : 0 );
1298
0
            if ( cs.slice->numRefIdx[ refPicList ] > 1 )
1299
0
            {
1300
0
              uiBitsTemp += iRefIdxTemp+1;
1301
0
              if ( iRefIdxTemp == cs.slice->numRefIdx[ refPicList ]-1 )
1302
0
              {
1303
0
                uiBitsTemp--;
1304
0
              }
1305
0
            }
1306
0
            uiBitsTemp += m_auiMVPIdxCost[aaiMvpIdxBi[iRefList][iRefIdxTemp]][AMVP_MAX_NUM_CANDS];
1307
0
            if ( cs.slice->biDirPred )
1308
0
            {
1309
0
              uiBitsTemp += 1; // add one bit for symmetrical MVD mode
1310
0
            }
1311
            // call ME
1312
0
            xCopyAMVPInfo(&aacAMVPInfo[iRefList][iRefIdxTemp], &amvp[refPicList] );
1313
0
            xMotionEstimation ( cu, origBuf, refPicList, cMvPredBi[iRefList][iRefIdxTemp], iRefIdxTemp, cMvTemp[iRefList][iRefIdxTemp], aaiMvpIdxBi[iRefList][iRefIdxTemp], uiBitsTemp, uiCostTemp, amvp[refPicList], true );
1314
0
            xCheckBestMVP( refPicList, cMvTemp[iRefList][iRefIdxTemp], cMvPredBi[iRefList][iRefIdxTemp], aaiMvpIdxBi[iRefList][iRefIdxTemp], amvp[refPicList], uiBitsTemp, uiCostTemp, cu.imv);
1315
0
            if ( uiCostTemp < uiCostBi )
1316
0
            {
1317
0
              bChanged = true;
1318
1319
0
              cMvBi[iRefList]     = cMvTemp[iRefList][iRefIdxTemp];
1320
0
              iRefIdxBi[iRefList] = iRefIdxTemp;
1321
1322
0
              uiCostBi            = uiCostTemp;
1323
0
              uiMotBits[iRefList] = uiBitsTemp - uiMbBits[2] - uiMotBits[1-iRefList];
1324
0
              uiMotBits[iRefList] -= ( (cs.slice->sps->BCW == true) ? getWeightIdxBits(BcwIdx) : 0 );
1325
0
              uiBits[2]           = uiBitsTemp;
1326
1327
0
              if(iNumIter!=1)
1328
0
              {
1329
                //  Set motion
1330
0
                cu.mv    [refPicList][0] = cMvBi    [iRefList];
1331
0
                cu.refIdx[refPicList]    = iRefIdxBi[iRefList];
1332
1333
0
                PelUnitBuf predBufTmp = m_tmpPredStorage[iRefList].getCompactBuf( cu );
1334
0
                motionCompensation( cu, predBufTmp, refPicList );
1335
0
              }
1336
0
            }
1337
0
          } // for loop-iRefIdxTemp
1338
1339
0
          if( !bChanged )
1340
0
          {
1341
0
            if ((uiCostBi <= uiCost[0] && uiCostBi <= uiCost[1]) || enforceBcwPred)
1342
0
            {
1343
0
              xCopyAMVPInfo(&aacAMVPInfo[0][iRefIdxBi[0]], &amvp[REF_PIC_LIST_0]);
1344
0
              xCheckBestMVP( REF_PIC_LIST_0, cMvBi[0], cMvPredBi[0][iRefIdxBi[0]], aaiMvpIdxBi[0][iRefIdxBi[0]], amvp[REF_PIC_LIST_0], uiBits[2], uiCostBi, cu.imv);
1345
0
              if(!cs.picHeader->mvdL1Zero)
1346
0
              {
1347
0
                xCopyAMVPInfo(&aacAMVPInfo[1][iRefIdxBi[1]], &amvp[REF_PIC_LIST_1]);
1348
0
                xCheckBestMVP( REF_PIC_LIST_1, cMvBi[1], cMvPredBi[1][iRefIdxBi[1]], aaiMvpIdxBi[1][iRefIdxBi[1]], amvp[REF_PIC_LIST_1], uiBits[2], uiCostBi, cu.imv);
1349
0
              }
1350
0
            }
1351
0
            break;
1352
0
          }
1353
0
        } // for loop-iter
1354
0
      }
1355
1356
      // SMVD
1357
0
      if( cs.slice->biDirPred )
1358
0
      {
1359
0
        double th1 = 1.02;
1360
0
        bool testSME = true;
1361
0
        int numStartCand = m_pcEncCfg->m_SMVD > 1 ? 1 : 5;
1362
0
        Distortion symCost;
1363
0
        Mv cMvPredSym[2];
1364
0
        int mvpIdxSym[2];
1365
1366
0
        int curRefList = REF_PIC_LIST_0;
1367
0
        int tarRefList = 1 - curRefList;
1368
0
        RefPicList eCurRefList = (curRefList ? REF_PIC_LIST_1 : REF_PIC_LIST_0);
1369
0
        int refIdxCur = cs.slice->symRefIdx[ curRefList ];
1370
0
        int refIdxTar = cs.slice->symRefIdx[ tarRefList ];
1371
0
        if( aacAMVPInfo[ curRefList ][ refIdxCur ].mvCand[ 0 ] == aacAMVPInfo[ curRefList ][ refIdxCur ].mvCand[ 1 ] )
1372
0
        {
1373
0
          aacAMVPInfo[ curRefList ][ refIdxCur ].numCand = 1;
1374
0
        }
1375
0
        if( aacAMVPInfo[ tarRefList ][ refIdxTar ].mvCand[ 0 ] == aacAMVPInfo[ tarRefList ][ refIdxTar ].mvCand[ 1 ] )
1376
0
        {
1377
0
          aacAMVPInfo[ tarRefList ][ refIdxTar ].numCand = 1;
1378
0
        }
1379
1380
0
        MvField cCurMvField, cTarMvField;
1381
0
        Distortion costStart = MAX_DISTORTION;
1382
0
        for ( int i = 0; i < aacAMVPInfo[curRefList][refIdxCur].numCand; i++ )
1383
0
        {
1384
0
          for ( int j = 0; j < aacAMVPInfo[tarRefList][refIdxTar].numCand; j++ )
1385
0
          {
1386
            GCC_WARNING_DISABLE_array_bounds // probably a bug in gcc-10 static analyzer: It thinks the indices are -1 and therefore triggers -Werror=array-bounds
1387
0
            cCurMvField.setMvField( aacAMVPInfo[curRefList][refIdxCur].mvCand[i], refIdxCur );
1388
0
            cTarMvField.setMvField( aacAMVPInfo[tarRefList][refIdxTar].mvCand[j], refIdxTar );
1389
0
            GCC_WARNING_RESET
1390
0
            if( m_pcEncCfg->m_ifpLines )
1391
0
            {
1392
0
              xCheckAndClipMvToFppLine( cCurMvField.mv, cu.ly(), cu.lheight(), m_pcEncCfg->m_ifpLines, *cu.cs->pcv );
1393
0
              xCheckAndClipMvToFppLine( cTarMvField.mv, cu.ly(), cu.lheight(), m_pcEncCfg->m_ifpLines, *cu.cs->pcv );
1394
0
            }
1395
0
            Distortion cost = xGetSymCost( cu, origBuf, eCurRefList, cCurMvField, cTarMvField, BcwIdx );
1396
0
            if ( cost < costStart )
1397
0
            {
1398
0
              costStart = cost;
1399
0
              cMvPredSym[curRefList] = aacAMVPInfo[curRefList][refIdxCur].mvCand[i];
1400
0
              cMvPredSym[tarRefList] = aacAMVPInfo[tarRefList][refIdxTar].mvCand[j];
1401
0
              mvpIdxSym[curRefList] = i;
1402
0
              mvpIdxSym[tarRefList] = j;
1403
0
            }
1404
0
          }
1405
0
        }
1406
0
        cCurMvField.mv = cMvPredSym[curRefList];
1407
0
        cTarMvField.mv = cMvPredSym[tarRefList];
1408
1409
0
        m_pcRdCost->setCostScale(0);
1410
0
        Mv pred = cMvPredSym[curRefList];
1411
0
        pred.changeTransPrecInternal2Amvr(cu.imv);
1412
0
        m_pcRdCost->setPredictor(pred);
1413
0
        Mv mv = cCurMvField.mv;
1414
0
        mv.changeTransPrecInternal2Amvr(cu.imv);
1415
0
        uint32_t bits = m_pcRdCost->getBitsOfVectorWithPredictor(mv.hor, mv.ver, 0);
1416
0
        bits += m_auiMVPIdxCost[mvpIdxSym[curRefList]][AMVP_MAX_NUM_CANDS];
1417
0
        bits += m_auiMVPIdxCost[mvpIdxSym[tarRefList]][AMVP_MAX_NUM_CANDS];
1418
0
        costStart += m_pcRdCost->getCost(bits);
1419
1420
0
        std::vector<Mv> symmvdCands;
1421
0
        auto smmvdCandsGen = [&](Mv mvCand, bool mvPrecAdj)
1422
0
        {
1423
0
          if (mvPrecAdj && cu.imv)
1424
0
          {
1425
0
            mvCand.roundTransPrecInternal2Amvr(cu.imv);
1426
0
          }
1427
1428
0
          bool toAddMvCand = true;
1429
0
          for (std::vector<Mv>::iterator pos = symmvdCands.begin(); pos != symmvdCands.end(); pos++)
1430
0
          {
1431
0
            if (*pos == mvCand)
1432
0
            {
1433
0
              toAddMvCand = false;
1434
0
              break;
1435
0
            }
1436
0
          }
1437
1438
0
          if (toAddMvCand)
1439
0
          {
1440
0
            symmvdCands.push_back(mvCand);
1441
0
          }
1442
0
        };
1443
1444
0
        smmvdCandsGen(cMvHevcTemp[curRefList][refIdxCur], false);
1445
0
        smmvdCandsGen(cMvTemp[curRefList][refIdxCur], false);
1446
0
        if (iRefIdxBi[curRefList] == refIdxCur)
1447
0
        {
1448
0
          smmvdCandsGen(cMvBi[curRefList], false);
1449
0
        }
1450
0
        for (int i = 0; i < m_BlkUniMvInfoBuffer->m_uniMvListSize; i++)
1451
0
        {
1452
0
          if( symmvdCands.size() >= numStartCand )
1453
0
          {
1454
0
            break;
1455
0
          }
1456
0
          BlkUniMvInfo* curMvInfo = m_BlkUniMvInfoBuffer->getBlkUniMvInfo(i);
1457
0
          smmvdCandsGen(curMvInfo->uniMvs[curRefList][refIdxCur], true);
1458
0
        }
1459
1460
0
        for (auto mvStart : symmvdCands)
1461
0
        {
1462
0
          bool checked = false; //if it has been checkin in the mvPred.
1463
0
          for (int i = 0; i < aacAMVPInfo[curRefList][refIdxCur].numCand && !checked; i++)
1464
0
          {
1465
0
            checked |= (mvStart == aacAMVPInfo[curRefList][refIdxCur].mvCand[i]);
1466
0
          }
1467
0
          if (checked)
1468
0
          {
1469
0
            continue;
1470
0
          }
1471
1472
0
          Distortion bestCost = costStart;
1473
0
          xSymMvdCheckBestMvp(cu, origBuf, mvStart, (RefPicList)curRefList, aacAMVPInfo, BcwIdx, cMvPredSym, mvpIdxSym, costStart, false);
1474
0
          if (costStart < bestCost)
1475
0
          {
1476
0
            cCurMvField.setMvField(mvStart, refIdxCur);
1477
0
            cTarMvField.setMvField(mvStart.getSymmvdMv(cMvPredSym[curRefList], cMvPredSym[tarRefList]), refIdxTar);
1478
0
          }
1479
0
        }
1480
0
        Mv startPtMv = cCurMvField.mv;
1481
1482
0
        Distortion mvpCost = m_pcRdCost->getCost(m_auiMVPIdxCost[mvpIdxSym[curRefList]][AMVP_MAX_NUM_CANDS] + m_auiMVPIdxCost[mvpIdxSym[tarRefList]][AMVP_MAX_NUM_CANDS]);
1483
0
        symCost = costStart - mvpCost;
1484
1485
        // ME
1486
0
        testSME = m_pcEncCfg->m_SMVD <= 2 || ( symCost < uiCostBi * th1 && uiCostBi < uiCost[ 0 ] && uiCostBi < uiCost[ 1 ] );
1487
0
        if( testSME )
1488
0
        {
1489
0
          xSymMotionEstimation( cu, origBuf, cMvPredSym[ curRefList ], cMvPredSym[ tarRefList ], eCurRefList, cCurMvField, cTarMvField, symCost, BcwIdx );
1490
0
        }
1491
1492
0
        symCost += mvpCost;
1493
1494
0
        if (startPtMv != cCurMvField.mv)
1495
0
        { // if ME change MV, run a final check for best MVP.
1496
0
          xSymMvdCheckBestMvp(cu, origBuf, cCurMvField.mv, (RefPicList)curRefList, aacAMVPInfo, BcwIdx, cMvPredSym, mvpIdxSym, symCost, true);
1497
0
        }
1498
1499
0
        bits = uiMbBits[2];
1500
0
        bits += 1; // add one bit for #symmetrical MVD mode
1501
0
        bits += ( (cs.slice->sps->BCW == true) ? getWeightIdxBits(BcwIdx) : 0 );
1502
0
        symCost += m_pcRdCost->getCost(bits);
1503
0
        cTarMvField.setMvField(cCurMvField.mv.getSymmvdMv(cMvPredSym[curRefList], cMvPredSym[tarRefList]), refIdxTar);
1504
1505
        // save results
1506
0
        if ( symCost < uiCostBi  
1507
0
          && ( !m_pcEncCfg->m_ifpLines || 
1508
0
          ( CU::isMvInRangeFPP( cu.ly(), cu.lheight(), cCurMvField.mv.ver, m_pcEncCfg->m_ifpLines, *cu.cs->pcv ) &&
1509
0
            CU::isMvInRangeFPP( cu.ly(), cu.lheight(), cTarMvField.mv.ver, m_pcEncCfg->m_ifpLines, *cu.cs->pcv ) ) )          
1510
0
          )
1511
0
        {
1512
0
          uiCostBi = symCost;
1513
0
          symMode = 1 + curRefList;
1514
1515
0
          cMvBi[curRefList] = cCurMvField.mv;
1516
0
          iRefIdxBi[curRefList] = cCurMvField.refIdx;
1517
0
          aaiMvpIdxBi[curRefList][cCurMvField.refIdx] = mvpIdxSym[curRefList];
1518
0
          cMvPredBi[curRefList][iRefIdxBi[curRefList]] = cMvPredSym[curRefList];
1519
1520
0
          cMvBi[tarRefList] = cTarMvField.mv;
1521
0
          iRefIdxBi[tarRefList] = cTarMvField.refIdx;
1522
0
          aaiMvpIdxBi[tarRefList][cTarMvField.refIdx] = mvpIdxSym[tarRefList];
1523
0
          cMvPredBi[tarRefList][iRefIdxBi[tarRefList]] = cMvPredSym[tarRefList];
1524
0
        }
1525
0
      }
1526
0
    } // if (B_SLICE)
1527
1528
      //  Clear Motion Field
1529
0
    cu.mv [REF_PIC_LIST_0][0] = Mv();
1530
0
    cu.mv [REF_PIC_LIST_1][0] = Mv();
1531
0
    cu.mvd[REF_PIC_LIST_0][0] = cMvZero;
1532
0
    cu.mvd[REF_PIC_LIST_1][0] = cMvZero;
1533
0
    cu.refIdx[REF_PIC_LIST_0] = NOT_VALID;
1534
0
    cu.refIdx[REF_PIC_LIST_1] = NOT_VALID;
1535
0
    cu.mvpIdx[REF_PIC_LIST_0] = NOT_VALID;
1536
0
    cu.mvpIdx[REF_PIC_LIST_1] = NOT_VALID;
1537
0
    cu.mvpNum[REF_PIC_LIST_0] = NOT_VALID;
1538
0
    cu.mvpNum[REF_PIC_LIST_1] = NOT_VALID;
1539
1540
    // Set Motion Field
1541
0
    cMv    [1] = mvValidList1;
1542
0
    iRefIdx[1] = refIdxValidList1;
1543
0
    uiBits [1] = bitsValidList1;
1544
0
    uiCost [1] = costValidList1;
1545
0
    if( enforceBcwPred )
1546
0
    {
1547
0
      uiCost[0] = uiCost[1] = MAX_UINT;
1548
0
    }
1549
1550
0
    uiLastModeTemp = uiLastMode;
1551
0
    if ( uiCostBi <= uiCost[0] && uiCostBi <= uiCost[1])
1552
0
    {
1553
0
      bestCostInter = uiCostBi;
1554
0
      uiLastMode = 2;
1555
0
      cu.mv [REF_PIC_LIST_0][0] = cMvBi[0];
1556
0
      cu.mv [REF_PIC_LIST_1][0] = cMvBi[1];
1557
0
      cu.mvd[REF_PIC_LIST_0][0] = cMvBi[0] - cMvPredBi[0][iRefIdxBi[0]];
1558
0
      cu.mvd[REF_PIC_LIST_1][0] = cMvBi[1] - cMvPredBi[1][iRefIdxBi[1]];
1559
0
      cu.refIdx[REF_PIC_LIST_0] = iRefIdxBi[0];
1560
0
      cu.refIdx[REF_PIC_LIST_1] = iRefIdxBi[1];
1561
0
      cu.mvpIdx[REF_PIC_LIST_0] = aaiMvpIdxBi[0][iRefIdxBi[0]];
1562
0
      cu.mvpIdx[REF_PIC_LIST_1] = aaiMvpIdxBi[1][iRefIdxBi[1]];
1563
0
      cu.mvpNum[REF_PIC_LIST_0] = aaiMvpNum[0][iRefIdxBi[0]];
1564
0
      cu.mvpNum[REF_PIC_LIST_1] = aaiMvpNum[1][iRefIdxBi[1]];
1565
0
      cu.interDir = 3;
1566
1567
0
      cu.smvdMode = symMode;
1568
0
    }
1569
0
    else if ( uiCost[0] <= uiCost[1] )
1570
0
    {
1571
0
      bestCostInter = uiCost[0];
1572
0
      uiLastMode = 0;
1573
0
      cu.mv [REF_PIC_LIST_0][0] = cMv[0];
1574
0
      cu.mvd[REF_PIC_LIST_0][0] = cMv[0] - cMvPred[0][iRefIdx[0]];
1575
0
      cu.refIdx[REF_PIC_LIST_0] = iRefIdx[0];
1576
0
      cu.mvpIdx[REF_PIC_LIST_0] = aaiMvpIdx[0][iRefIdx[0]];
1577
0
      cu.mvpNum[REF_PIC_LIST_0] = aaiMvpNum[0][iRefIdx[0]];
1578
0
      cu.interDir = 1;
1579
0
    }
1580
0
    else
1581
0
    {
1582
0
      bestCostInter = uiCost[1];
1583
0
      uiLastMode = 1;
1584
0
      cu.mv [REF_PIC_LIST_1][0] = cMv[1];
1585
0
      cu.mvd[REF_PIC_LIST_1][0] = cMv[1] - cMvPred[1][iRefIdx[1]];
1586
0
      cu.refIdx[REF_PIC_LIST_1] = iRefIdx[1];
1587
0
      cu.mvpIdx[REF_PIC_LIST_1] = aaiMvpIdx[1][iRefIdx[1]];
1588
0
      cu.mvpNum[REF_PIC_LIST_1] = aaiMvpNum[1][iRefIdx[1]];
1589
0
      cu.interDir = 2;
1590
0
    }
1591
1592
0
    if( BcwIdx != BCW_DEFAULT )
1593
0
    {
1594
0
      cu.BcwIdx = BCW_DEFAULT; // Reset to default for the Non-NormalMC modes.
1595
0
    }
1596
0
    uiHevcCost = (uiCostBi <= uiCost[0] && uiCostBi <= uiCost[1]) ? uiCostBi : ((uiCost[0] <= uiCost[1]) ? uiCost[0] : uiCost[1]);
1597
0
    if (m_pcEncCfg->m_Affine > 2)
1598
0
    {
1599
0
      if (cu.slice->TLayer > 3)
1600
0
      {
1601
0
        checkAffine = false;
1602
0
      }
1603
0
      else
1604
0
      {
1605
0
        if( m_pcEncCfg->m_Affine >= 4 && cu.slice->TLayer >= 2 )
1606
0
        {
1607
0
          checkAffine = m_modeCtrl->comprCUCtx->bestCU ? (checkAffine && m_modeCtrl->comprCUCtx->bestCU->affine) : checkAffine;
1608
0
        }
1609
0
      }
1610
0
    }
1611
0
    if( checkAffine && cu.Y().width > 8 && cu.Y().height > 8 && m_pcEncCfg->m_Affine > 0 )
1612
0
    {
1613
      // Based on:
1614
      // H. Pejman*, S. Coulombe*, C. Vazquez*, M. Jamali° and A. Vakili°
1615
      // *École de technologie supérieure, °Summit Tech Multimedia
1616
      // "An Adjustable Fast Decision Method for Affine Motion Estimation in VVC,"
1617
      // ICIP, Kuala Lumpur, Malaysia, 2023, pp. 2695-2699, doi: 10.1109/ICIP49359.2023.10222750.
1618
      // https://ieeexplore.ieee.org/document/10222750
1619
1620
0
      static const double affine_thr_coffs[3] = { 2.534229853866437, 0.05173246 ,0.87650414 };
1621
0
      static const double affine_thr_param[5] = { 1, 1, 1, 1.3, 2.3 }; // TODO: Adapt if extending m_Affine range!
1622
0
      const int qp         = cu.qp;
1623
0
      const int blk_area   = cu.Y().area();
1624
0
      const double threshold  = affine_thr_param[m_pcEncCfg->m_Affine - 1];
1625
1626
      //Multiple linear regression (MLR):
1627
      //Y = b0 + b1*(QP) + b2*(LOG2(BLK_AREA))
1628
0
      double log_affine_thr =
1629
0
        affine_thr_coffs[0] +
1630
0
        qp * affine_thr_coffs[1] +
1631
0
        log2(blk_area) * affine_thr_coffs[2];
1632
1633
      //log_affine_thr is LOG 2 of estimated thr
1634
0
      double affine_thr = pow(2, log_affine_thr) * threshold;
1635
1636
0
      double scaled_uiHevcCost = (double)uiHevcCost;
1637
1638
      //The trained coefficients are based on the cost of internal 10 BitDepth. So, the cost should be scaled if the internal BitDepth is not 10.
1639
0
      if (m_pcEncCfg->m_internalBitDepth[0] !=10)
1640
0
      {
1641
        //Based on the CTC documnet to convert 8 bit to 10 bit video or vice versa, the VTM only multiply (8 to 10 bits) or divide (10 to 8 bits) pixel values to 4.
1642
        //In this case, the cost values are approximately scaled by 4.
1643
        //The trained data acquired from internal 10 bit data. So, if internal bit depth is 8, the conversion into 10-bit cost can be done as follows:
1644
0
        scaled_uiHevcCost = uiHevcCost * (pow(2.0, 10-m_pcEncCfg->m_internalBitDepth[0]));
1645
0
      }
1646
0
      if( scaled_uiHevcCost < affine_thr )
1647
0
      {
1648
0
        checkAffine = false;
1649
0
      }
1650
0
    }
1651
0
    if (cu.Y().width > 8 && cu.Y().height > 8 && cu.slice->sps->Affine && checkAffine)
1652
0
    {
1653
0
      PROFILER_SCOPE_AND_STAGE_EXT( 1, _TPROF, P_INTER_MVD_SEARCH_AFFINE, &cs, partitioner.chType );
1654
0
      m_hevcCost = uiHevcCost;
1655
      // save normal hevc result
1656
0
      uint32_t uiMRGIndex = cu.mergeIdx;
1657
0
      bool bMergeFlag = cu.mergeFlag;
1658
0
      uint32_t uiInterDir = cu.interDir;
1659
0
      int  iSymMode = cu.smvdMode;
1660
1661
0
      Mv cMvd[2];
1662
0
      uint32_t uiMvpIdx[2], uiMvpNum[2];
1663
0
      uiMvpIdx[0] = cu.mvpIdx[REF_PIC_LIST_0];
1664
0
      uiMvpIdx[1] = cu.mvpIdx[REF_PIC_LIST_1];
1665
0
      uiMvpNum[0] = cu.mvpNum[REF_PIC_LIST_0];
1666
0
      uiMvpNum[1] = cu.mvpNum[REF_PIC_LIST_1];
1667
0
      cMvd[0] = cu.mvd[REF_PIC_LIST_0][0];
1668
0
      cMvd[1] = cu.mvd[REF_PIC_LIST_1][0];
1669
1670
0
      MvField cHevcMvField[2];
1671
0
      cHevcMvField[0].setMvField(cu.mv[REF_PIC_LIST_0][0], cu.refIdx[REF_PIC_LIST_0]);
1672
0
      cHevcMvField[1].setMvField(cu.mv[REF_PIC_LIST_1][0], cu.refIdx[REF_PIC_LIST_1]);
1673
1674
      // do affine ME & Merge
1675
0
      cu.affineType = AFFINEMODEL_4PARAM;
1676
0
      Mv acMvAffine4Para[2][MAX_REF_PICS][3];
1677
0
      int refIdx4Para[2] = { -1, -1 };
1678
1679
0
      xPredAffineInterSearch(cu, origBuf, puIdx, uiLastModeTemp, uiAffineCost, cMvHevcTemp, acMvAffine4Para, refIdx4Para, BcwIdx, enforceBcwPred, (cs.slice->sps->BCW == true) ? getWeightIdxBits(BcwIdx) : 0 );
1680
1681
0
      if (cu.imv == IMV_OFF)
1682
0
      {
1683
0
        storeAffineMotion(cu.mv, cu.refIdx, AFFINEMODEL_4PARAM, BcwIdx);
1684
0
      }
1685
0
      if (cu.slice->sps->AffineType && uiAffineCost != MAX_DISTORTION)
1686
0
      {
1687
0
        if (uiAffineCost < uiHevcCost * 1.05) ///< condition for 6 parameter affine ME
1688
0
        {
1689
          // save 4 parameter results
1690
0
          Mv bestMv[2][3], bestMvd[2][3];
1691
0
          int bestMvpIdx[2], bestMvpNum[2], bestRefIdx[2];
1692
0
          uint8_t bestInterDir;
1693
1694
0
          bestInterDir = cu.interDir;
1695
0
          bestRefIdx[0] = cu.refIdx[0];
1696
0
          bestRefIdx[1] = cu.refIdx[1];
1697
0
          bestMvpIdx[0] = cu.mvpIdx[0];
1698
0
          bestMvpIdx[1] = cu.mvpIdx[1];
1699
0
          bestMvpNum[0] = cu.mvpNum[0];
1700
0
          bestMvpNum[1] = cu.mvpNum[1];
1701
1702
0
          for (int refList = 0; refList < 2; refList++)
1703
0
          {
1704
0
            bestMv[refList][0] = cu.mv[refList][0];
1705
0
            bestMv[refList][1] = cu.mv[refList][1];
1706
0
            bestMv[refList][2] = cu.mv[refList][2];
1707
0
            bestMvd[refList][0] = cu.mvd[refList][0];
1708
0
            bestMvd[refList][1] = cu.mvd[refList][1];
1709
0
            bestMvd[refList][2] = cu.mvd[refList][2];
1710
0
          }
1711
1712
0
          refIdx4Para[0] = bestRefIdx[0];
1713
0
          refIdx4Para[1] = bestRefIdx[1];
1714
1715
0
          Distortion uiAffine6Cost = MAX_DISTORTION;
1716
0
          cu.affineType = AFFINEMODEL_6PARAM;
1717
0
          xPredAffineInterSearch(cu, origBuf, puIdx, uiLastModeTemp, uiAffine6Cost, cMvHevcTemp, acMvAffine4Para, refIdx4Para, BcwIdx, enforceBcwPred, (cs.slice->sps->BCW == true) ? getWeightIdxBits(BcwIdx) : 0 );
1718
1719
0
          if (cu.imv == IMV_OFF)
1720
0
          {
1721
0
            storeAffineMotion(cu.mv, cu.refIdx, AFFINEMODEL_6PARAM, BcwIdx);
1722
0
          }
1723
1724
          // reset to 4 parameter affine inter mode
1725
0
          if (uiAffineCost <= uiAffine6Cost)
1726
0
          {
1727
0
            cu.affineType = AFFINEMODEL_4PARAM;
1728
0
            cu.interDir = bestInterDir;
1729
0
            cu.refIdx[0] = bestRefIdx[0];
1730
0
            cu.refIdx[1] = bestRefIdx[1];
1731
0
            cu.mvpIdx[0] = bestMvpIdx[0];
1732
0
            cu.mvpIdx[1] = bestMvpIdx[1];
1733
0
            cu.mvpNum[0] = bestMvpNum[0];
1734
0
            cu.mvpNum[1] = bestMvpNum[1];
1735
1736
0
            for (int verIdx = 0; verIdx < 3; verIdx++)
1737
0
            {
1738
0
              cu.mvd[REF_PIC_LIST_0][verIdx] = bestMvd[0][verIdx];
1739
0
              cu.mvd[REF_PIC_LIST_1][verIdx] = bestMvd[1][verIdx];
1740
0
            }
1741
1742
0
            CU::setAllAffineMv(cu, bestMv[0][0], bestMv[0][1], bestMv[0][2], REF_PIC_LIST_0);
1743
0
            CU::setAllAffineMv(cu, bestMv[1][0], bestMv[1][1], bestMv[1][2], REF_PIC_LIST_1);
1744
0
          }
1745
0
          else
1746
0
          {
1747
0
            uiAffineCost = uiAffine6Cost;
1748
0
          }
1749
0
        }
1750
1751
0
        uiAffineCost += m_pcRdCost->getCost(1); // add one bit for affine_type
1752
0
      }
1753
1754
0
      if (uiHevcCost <= uiAffineCost)
1755
0
      {
1756
        // set hevc me result
1757
0
        cu.affine = false;
1758
0
        cu.mergeFlag = bMergeFlag;
1759
0
        cu.mergeIdx = uiMRGIndex;
1760
0
        cu.interDir = uiInterDir;
1761
0
        cu.smvdMode = iSymMode;
1762
0
        cu.mv[REF_PIC_LIST_0][0]  = cHevcMvField[0].mv;
1763
0
        cu.refIdx[REF_PIC_LIST_0] = cHevcMvField[0].refIdx;
1764
0
        cu.mv[REF_PIC_LIST_1][0]  = cHevcMvField[1].mv;
1765
0
        cu.refIdx[REF_PIC_LIST_1] = cHevcMvField[1].refIdx;
1766
0
        cu.mvpIdx[REF_PIC_LIST_0] = uiMvpIdx[0];
1767
0
        cu.mvpIdx[REF_PIC_LIST_1] = uiMvpIdx[1];
1768
0
        cu.mvpNum[REF_PIC_LIST_0] = uiMvpNum[0];
1769
0
        cu.mvpNum[REF_PIC_LIST_1] = uiMvpNum[1];
1770
0
        cu.mvd[REF_PIC_LIST_0][0] = cMvd[0];
1771
0
        cu.mvd[REF_PIC_LIST_1][0] = cMvd[1];
1772
0
      }
1773
0
      else
1774
0
      {
1775
0
        cu.smvdMode = 0;
1776
0
        CHECK(!cu.affine, "Wrong.");
1777
0
        uiLastMode = uiLastModeTemp;
1778
0
      }
1779
0
    }
1780
1781
0
    if( cu.interDir == 3 && !cu.mergeFlag )
1782
0
    {
1783
0
      if (BcwIdx != BCW_DEFAULT)
1784
0
      {
1785
0
        cu.BcwIdx = BcwIdx;
1786
0
      }
1787
0
    }
1788
1789
0
    CU::spanMotionInfo( cu );
1790
1791
0
    m_skipPROF = false;
1792
0
    m_encOnly  = false;
1793
    //  MC
1794
0
    PelUnitBuf predBuf = cu.cs->getPredBuf(cu);
1795
0
    motionCompensation( cu, predBuf, REF_PIC_LIST_X );
1796
0
    puIdx++;
1797
0
  }
1798
1799
0
  return false;
1800
0
}
1801
1802
// AMVP
1803
void InterSearch::xEstimateMvPredAMVP( CodingUnit& cu, CPelUnitBuf& origBuf, RefPicList refPicList, int iRefIdx, Mv& rcMvPred, AMVPInfo& rAMVPInfo, Distortion& distBiP )
1804
0
{
1805
0
  Mv         cBestMv;
1806
0
  int        iBestIdx   = 0;
1807
0
  Distortion uiBestCost = MAX_DISTORTION;
1808
0
  int        i;
1809
1810
0
  AMVPInfo*  pcAMVPInfo = &rAMVPInfo;
1811
1812
  // Fill the MV Candidates
1813
0
  CU::fillMvpCand( cu, refPicList, iRefIdx, *pcAMVPInfo );
1814
1815
  // initialize Mvp index & Mvp
1816
0
  iBestIdx = 0;
1817
0
  cBestMv  = pcAMVPInfo->mvCand[0];
1818
1819
0
  PelUnitBuf predBuf = m_tmpStorageLCU.getCompactBuf( cu );
1820
1821
  //-- Check Minimum Cost.
1822
0
  for( i = 0 ; i < pcAMVPInfo->numCand; i++)
1823
0
  {
1824
0
    Mv mvCand = pcAMVPInfo->mvCand[i];
1825
0
    if( m_pcEncCfg->m_ifpLines )
1826
0
      xClipMvSearch( mvCand, cu.lumaPos(), cu.lumaSize(),*cu.cs->pcv, true );
1827
1828
0
    Distortion uiTmpCost = xGetTemplateCost( cu, origBuf, predBuf, mvCand, i, AMVP_MAX_NUM_CANDS, refPicList, iRefIdx );
1829
0
    if( uiBestCost > uiTmpCost )
1830
0
    {
1831
0
      uiBestCost  = uiTmpCost;
1832
0
      cBestMv     = pcAMVPInfo->mvCand[i];
1833
0
      iBestIdx    = i;
1834
0
      distBiP     = uiTmpCost;
1835
0
    }
1836
0
  }
1837
1838
  // Setting Best MVP
1839
0
  rcMvPred = cBestMv;
1840
0
  cu.mvpIdx[refPicList] = iBestIdx;
1841
0
  cu.mvpNum[refPicList] = pcAMVPInfo->numCand;
1842
1843
0
  return;
1844
0
}
1845
1846
uint32_t InterSearch::xGetMvpIdxBits(int iIdx, int iNum)
1847
0
{
1848
0
  CHECK(iIdx < 0 || iNum < 0 || iIdx >= iNum, "Invalid parameters");
1849
1850
0
  if (iNum == 1)
1851
0
  {
1852
0
    return 0;
1853
0
  }
1854
1855
0
  uint32_t uiLength = 1;
1856
0
  int iTemp = iIdx;
1857
0
  if ( iTemp == 0 )
1858
0
  {
1859
0
    return uiLength;
1860
0
  }
1861
1862
0
  bool bCodeLast = ( iNum-1 > iTemp );
1863
1864
0
  uiLength += (iTemp-1);
1865
1866
0
  if( bCodeLast )
1867
0
  {
1868
0
    uiLength++;
1869
0
  }
1870
1871
0
  return uiLength;
1872
0
}
1873
1874
void InterSearch::xGetBlkBits( bool bPSlice, int iPartIdx, uint32_t uiLastMode, uint32_t uiBlkBit[3])
1875
0
{
1876
0
  uiBlkBit[0] = (! bPSlice) ? 3 : 1;
1877
0
  uiBlkBit[1] = 3;
1878
0
  uiBlkBit[2] = 5;
1879
0
}
1880
1881
void InterSearch::xCopyAMVPInfo (AMVPInfo* pSrc, AMVPInfo* pDst)
1882
0
{
1883
0
  pDst->numCand = pSrc->numCand;
1884
0
  for (int i = 0; i < pSrc->numCand; i++)
1885
0
  {
1886
0
    pDst->mvCand[i] = pSrc->mvCand[i];
1887
0
  }
1888
0
}
1889
1890
void InterSearch::xCheckBestMVP ( RefPicList refPicList, const Mv& cMv, Mv& rcMvPred, int& riMVPIdx, AMVPInfo& amvpInfo, uint32_t& ruiBits, Distortion& ruiCost, const uint8_t imv )
1891
0
{
1892
0
  if ( imv > 0 && imv < 3 )
1893
0
  {
1894
0
    return;
1895
0
  }
1896
1897
0
  AMVPInfo* pcAMVPInfo = &amvpInfo;
1898
1899
0
  CHECK(pcAMVPInfo->mvCand[riMVPIdx] != rcMvPred, "Invalid MV prediction candidate");
1900
1901
0
  if (pcAMVPInfo->numCand < 2)
1902
0
  {
1903
0
    return;
1904
0
  }
1905
1906
0
  m_pcRdCost->setCostScale ( 0    );
1907
1908
0
  int iBestMVPIdx = riMVPIdx;
1909
1910
0
  Mv pred = rcMvPred;
1911
0
  pred.changeTransPrecInternal2Amvr(imv);
1912
0
  m_pcRdCost->setPredictor( pred );
1913
0
  Mv mv = cMv;
1914
0
  mv.changeTransPrecInternal2Amvr(imv);
1915
0
  int iOrgMvBits = m_pcRdCost->getBitsOfVectorWithPredictor(mv.hor, mv.ver, 0);
1916
0
  iOrgMvBits += m_auiMVPIdxCost[riMVPIdx][AMVP_MAX_NUM_CANDS];
1917
0
  int iBestMvBits = iOrgMvBits;
1918
1919
0
  for (int iMVPIdx = 0; iMVPIdx < pcAMVPInfo->numCand; iMVPIdx++)
1920
0
  {
1921
0
    if (iMVPIdx == riMVPIdx)
1922
0
    {
1923
0
      continue;
1924
0
    }
1925
1926
0
    pred = pcAMVPInfo->mvCand[iMVPIdx];
1927
0
    pred.changeTransPrecInternal2Amvr(imv);
1928
0
    m_pcRdCost->setPredictor( pred );
1929
0
    int iMvBits = m_pcRdCost->getBitsOfVectorWithPredictor(mv.hor, mv.ver, 0);
1930
0
    iMvBits += m_auiMVPIdxCost[iMVPIdx][AMVP_MAX_NUM_CANDS];
1931
1932
0
    if (iMvBits < iBestMvBits)
1933
0
    {
1934
0
      iBestMvBits = iMvBits;
1935
0
      iBestMVPIdx = iMVPIdx;
1936
0
    }
1937
0
  }
1938
1939
0
  if (iBestMVPIdx != riMVPIdx)  //if changed
1940
0
  {
1941
0
    rcMvPred = pcAMVPInfo->mvCand[iBestMVPIdx];
1942
1943
0
    riMVPIdx = iBestMVPIdx;
1944
0
    uint32_t uiOrgBits = ruiBits;
1945
0
    ruiBits = uiOrgBits - iOrgMvBits + iBestMvBits;
1946
0
    ruiCost = (ruiCost - m_pcRdCost->getCost( uiOrgBits ))  + m_pcRdCost->getCost( ruiBits );
1947
0
  }
1948
0
}
1949
1950
1951
Distortion InterSearch::xGetTemplateCost( const CodingUnit& cu,
1952
                                          CPelUnitBuf& origBuf,
1953
                                          PelUnitBuf&  predBuf,
1954
                                          Mv           cMvCand,
1955
                                          int          iMVPIdx,
1956
                                          int          iMVPNum,
1957
                                          RefPicList   refPicList,
1958
                                          int          iRefIdx
1959
)
1960
0
{
1961
0
  Distortion uiCost = MAX_DISTORTION;
1962
1963
0
  const Picture* picRef = cu.slice->getRefPic( refPicList, iRefIdx );
1964
0
  clipMv( cMvCand, cu.lumaPos(), cu.lumaSize(), *cu.cs->pcv );
1965
1966
  // prediction pattern
1967
0
  xPredInterBlk( COMP_Y, cu, picRef, cMvCand, predBuf, false, cu.slice->clpRngs[ COMP_Y ], false, false);
1968
1969
  // calc distortion
1970
1971
0
  uiCost = m_pcRdCost->getDistPart(origBuf.Y(), predBuf.Y(), cu.cs->sps->bitDepths[ CH_L ], COMP_Y, DF_SAD);
1972
0
  uiCost += m_pcRdCost->getCost( m_auiMVPIdxCost[iMVPIdx][iMVPNum] );
1973
1974
0
  return uiCost;
1975
0
}
1976
1977
void InterSearch::xMotionEstimation(CodingUnit& cu, CPelUnitBuf& origBuf, RefPicList refPicList, Mv& rcMvPred, int iRefIdxPred, Mv& rcMv, int& riMVPIdx, uint32_t& ruiBits, Distortion& ruiCost, const AMVPInfo& amvpInfo, bool bBi)
1978
0
{
1979
0
  if( cu.cs->sps->BCW && cu.BcwIdx != BCW_DEFAULT && !bBi && xReadBufferedUniMv( cu, refPicList, iRefIdxPred, rcMvPred, rcMv, ruiBits, ruiCost ) )
1980
0
  {
1981
0
    return;
1982
0
  }
1983
1984
0
  Mv cMvHalf, cMvQter;
1985
1986
0
  CHECK(refPicList >= MAX_NUM_REF_LIST_ADAPT_SR || iRefIdxPred>=int(MAX_IDX_ADAPT_SR), "Invalid reference picture list");
1987
0
  m_iSearchRange = m_aaiAdaptSR[refPicList][iRefIdxPred];
1988
1989
0
  int    iSrchRng   = (bBi ? m_bipredSearchRange : m_iSearchRange);
1990
0
  double fWeight    = 1.0;
1991
1992
0
  CPelUnitBuf  origBufTmpCnst;
1993
0
  CPelUnitBuf* pBuf      = &origBuf;
1994
1995
0
  if(bBi) // Bi-predictive ME
1996
0
  {
1997
0
    PelUnitBuf  origBufTmp = m_tmpStorageLCU.getCompactBuf( cu );
1998
    // NOTE: Other buf contains predicted signal from another direction
1999
0
    PelUnitBuf otherBuf = m_tmpPredStorage[1 - (int)refPicList].getCompactBuf( cu );
2000
0
    origBufTmp.copyFrom(origBuf);
2001
0
    origBufTmp.removeHighFreq( otherBuf, m_pcEncCfg->m_bClipForBiPredMeEnabled, cu.slice->clpRngs );
2002
   
2003
0
    origBufTmpCnst = origBufTmp;
2004
0
    pBuf           = &origBufTmpCnst;
2005
0
    fWeight        = xGetMEDistortionWeight( cu.BcwIdx, refPicList );
2006
0
  }
2007
2008
  //  Search key pattern initialization
2009
0
  CPelBuf  tmpPattern   = pBuf->Y();
2010
0
  CPelBuf* pcPatternKey = &tmpPattern;
2011
2012
0
  m_lumaClpRng = cu.cs->slice->clpRngs[ COMP_Y ];
2013
2014
0
  const Picture* refPic = cu.slice->getRefPic(refPicList, iRefIdxPred);
2015
0
  CPelBuf buf = refPic->getRecoBuf(cu.blocks[COMP_Y]);
2016
2017
0
  TZSearchStruct cStruct;
2018
0
  cStruct.pcPatternKey  = pcPatternKey;
2019
0
  cStruct.iRefStride    = buf.stride;
2020
0
  cStruct.piRefY        = buf.buf;
2021
0
  cStruct.imvShift      = cu.imv == IMV_HPEL ? 1 : (cu.imv << 1);
2022
0
  cStruct.useAltHpelIf  = cu.imv == IMV_HPEL;
2023
0
  cStruct.zeroMV        = false;
2024
0
  cStruct.uiBestSad     = MAX_DISTORTION;
2025
2026
2027
0
  CodedCUInfo &relatedCU = m_modeCtrl->getBlkInfo( cu );
2028
2029
0
  bool bQTBTMV = false;
2030
0
  Mv cIntMv;
2031
0
  if( !bBi )
2032
0
  {
2033
0
    bool bValid = relatedCU.getMv( refPicList, iRefIdxPred, cIntMv );
2034
0
    if( bValid )
2035
0
    {
2036
0
      bQTBTMV = true;
2037
0
      cIntMv.changePrecision( MV_PRECISION_INT, MV_PRECISION_INTERNAL);
2038
0
    }
2039
0
  }
2040
2041
0
  Mv predQuarter = rcMvPred;
2042
0
  predQuarter.changePrecision(MV_PRECISION_INTERNAL, MV_PRECISION_QUARTER);
2043
0
  m_pcRdCost->setPredictor( predQuarter );
2044
0
  m_pcRdCost->setCostScale(2);
2045
2046
  //  Do integer search
2047
0
  if( m_motionEstimationSearchMethod == VVENC_MESEARCH_FULL || bBi )
2048
0
  {
2049
0
    cStruct.subShiftMode = m_pcEncCfg->m_fastInterSearchMode == VVENC_FASTINTERSEARCH_MODE1 || m_pcEncCfg->m_fastInterSearchMode == VVENC_FASTINTERSEARCH_MODE3 ? 1 : 0;
2050
0
    m_pcRdCost->setDistParam( m_cDistParam, *cStruct.pcPatternKey, cStruct.piRefY, cStruct.iRefStride, m_lumaClpRng.bd, COMP_Y, cStruct.subShiftMode );
2051
2052
0
    Mv bestInitMv = (bBi ? rcMv : rcMvPred);
2053
0
    Mv cTmpMv     = bestInitMv;
2054
0
    xClipMvSearch(cTmpMv, cu.lumaPos(), cu.lumaSize(), *cu.cs->pcv, m_pcEncCfg->m_ifpLines );
2055
0
    cTmpMv.changePrecision(MV_PRECISION_INTERNAL, MV_PRECISION_INT);
2056
0
    m_cDistParam.cur.buf = cStruct.piRefY + (cTmpMv.ver * cStruct.iRefStride) + cTmpMv.hor;
2057
0
    Distortion uiBestSad = m_cDistParam.distFunc(m_cDistParam);
2058
0
    uiBestSad += m_pcRdCost->getCostOfVectorWithPredictor(cTmpMv.hor, cTmpMv.ver, cStruct.imvShift);
2059
2060
0
    Mv prevMv[BlkUniMvInfoBuffer::m_uniMvListMaxSize];
2061
2062
0
    for( int i = 0; i < m_BlkUniMvInfoBuffer->m_uniMvListSize; i++ )
2063
0
    {
2064
0
      const BlkUniMvInfo* curMvInfo = m_BlkUniMvInfoBuffer->getBlkUniMvInfo( i );
2065
0
      cTmpMv = curMvInfo->uniMvs[refPicList][iRefIdxPred];
2066
0
      prevMv[i] = cTmpMv;
2067
2068
0
      int j = 0;
2069
0
      for( ; j < i; j++ )
2070
0
      {
2071
0
        if( cTmpMv == prevMv[j] )
2072
0
        {
2073
0
          break;
2074
0
        }
2075
0
      }
2076
0
      if( j < i )
2077
0
        continue;
2078
2079
0
      xClipMvSearch(cTmpMv, cu.lumaPos(), cu.lumaSize(), *cu.cs->pcv, m_pcEncCfg->m_ifpLines);
2080
0
      cTmpMv.changePrecision(MV_PRECISION_INTERNAL, MV_PRECISION_INT);
2081
0
      m_cDistParam.cur.buf = cStruct.piRefY + (cTmpMv.ver * cStruct.iRefStride) + cTmpMv.hor;
2082
2083
0
      Distortion uiSad = m_cDistParam.distFunc(m_cDistParam);
2084
0
      uiSad += m_pcRdCost->getCostOfVectorWithPredictor(cTmpMv.hor, cTmpMv.ver, cStruct.imvShift);
2085
0
      if( uiSad < uiBestSad )
2086
0
      {
2087
0
        uiBestSad = uiSad;
2088
0
        bestInitMv = curMvInfo->uniMvs[refPicList][iRefIdxPred];
2089
0
        m_cDistParam.maximumDistortionForEarlyExit = uiSad;
2090
0
      }
2091
0
    }
2092
2093
0
    xSetSearchRange( cu, bestInitMv, iSrchRng, cStruct.searchRange );
2094
0
    xPatternSearch ( cStruct, rcMv, ruiCost);
2095
0
  }
2096
0
  else if( bQTBTMV )
2097
0
  {
2098
0
    rcMv = cIntMv;
2099
0
    cStruct.subShiftMode = ( m_pcEncCfg->m_fastInterSearchMode == VVENC_FASTINTERSEARCH_MODE1 || m_pcEncCfg->m_fastInterSearchMode == VVENC_FASTINTERSEARCH_MODE3 ) ? 1 : 0;
2100
0
    xTZSearch( cu, refPicList, iRefIdxPred, cStruct, rcMv, ruiCost, false, true );
2101
0
  }
2102
0
  else
2103
0
  {
2104
0
    cStruct.subShiftMode = ( m_pcEncCfg->m_fastInterSearchMode == VVENC_FASTINTERSEARCH_MODE1 || m_pcEncCfg->m_fastInterSearchMode == VVENC_FASTINTERSEARCH_MODE3 ) ? 1 : 0;
2105
0
    rcMv = rcMvPred;
2106
0
    xPatternSearchFast(cu, refPicList, iRefIdxPred, cStruct, rcMv, ruiCost );
2107
0
    relatedCU.setMv( refPicList, iRefIdxPred, rcMv );
2108
0
  }
2109
2110
0
  DTRACE( g_trace_ctx, D_ME, "%d %d %d :MECostFPel<L%d,%d>: %d,%d,%dx%d, %d", DTRACE_GET_COUNTER( g_trace_ctx, D_ME ), cu.slice->poc, 0, ( int ) refPicList, ( int ) bBi, cu.Y().x, cu.Y().y, cu.Y().width, cu.Y().height, ruiCost );
2111
  // sub-pel refinement for sub-pel resolution
2112
0
  if ( cu.imv == IMV_OFF || cu.imv == IMV_HPEL )
2113
0
  {
2114
0
    if ( m_pcEncCfg->m_fastSubPel != 2 )
2115
0
    {
2116
0
      xPatternSearchFracDIF( cu, refPicList, iRefIdxPred, cStruct, rcMv, cMvHalf, cMvQter, ruiCost );
2117
0
    }
2118
0
    m_pcRdCost->setCostScale( 0 );
2119
0
    rcMv <<= 2;
2120
0
    rcMv  += ( cMvHalf <<= 1 );
2121
0
    rcMv  += cMvQter;
2122
0
    uint32_t uiMvBits = m_pcRdCost->getBitsOfVectorWithPredictor( rcMv.hor, rcMv.ver, cStruct.imvShift );
2123
0
    ruiBits += uiMvBits;
2124
0
    ruiCost = ( Distortion ) ( floor( fWeight * ( ( double ) ruiCost - ( double ) m_pcRdCost->getCost( uiMvBits ) ) ) + ( double ) m_pcRdCost->getCost( ruiBits ) );
2125
0
    rcMv.changePrecision(MV_PRECISION_QUARTER, MV_PRECISION_INTERNAL);
2126
0
  }
2127
0
  else // integer refinement for integer-pel and 4-pel resolution
2128
0
  {
2129
0
    rcMv.changePrecision(MV_PRECISION_INT, MV_PRECISION_INTERNAL);
2130
0
    xPatternSearchIntRefine( cu, cStruct, rcMv, rcMvPred, riMVPIdx, ruiBits, ruiCost, amvpInfo, fWeight);
2131
0
  }
2132
0
  DTRACE(g_trace_ctx, D_ME, "   MECost<L%d,%d>: %6d (%d)  MV:%d,%d\n", (int)refPicList, (int)bBi, ruiCost, ruiBits, rcMv.hor << 2, rcMv.ver << 2);
2133
0
}
2134
2135
void InterSearch::xClipMvSearch( Mv& rcMv, const Position& pos, const struct Size& size, const PreCalcValues& pcv, const int ifpLines )
2136
0
{
2137
0
  int iMvShift = MV_FRACTIONAL_BITS_INTERNAL;
2138
0
  int iOffset = 8;
2139
0
  int iHorMax = ( pcv.lumaWidth + iOffset - ( int ) pos.x - 1 ) << iMvShift;
2140
0
  int iHorMin = ( -( int ) pcv.maxCUSize   - iOffset - ( int ) pos.x + 1 ) * (1 << iMvShift);
2141
2142
0
  int maxLumaHeight = ifpLines && ((pos.y >> pcv.maxCUSizeLog2) + ifpLines + 1 < pcv.heightInCtus) ? 
2143
    
2144
0
    (((pos.y >> pcv.maxCUSizeLog2) + ifpLines + 1) << pcv.maxCUSizeLog2 ) - size.height - 4  // 4 samples from DCTIF vertical bottom part
2145
2146
0
    : pcv.lumaHeight + iOffset;
2147
2148
0
  int iVerMax = ( maxLumaHeight - ( int ) pos.y - 1 ) << iMvShift;
2149
0
  int iVerMin = ( -( int ) pcv.maxCUSize   - iOffset - ( int ) pos.y + 1 ) * (1 << iMvShift);
2150
2151
0
  rcMv.hor = ( std::min( iHorMax, std::max( iHorMin, rcMv.hor ) ) );
2152
0
  rcMv.ver = ( std::min( iVerMax, std::max( iVerMin, rcMv.ver ) ) );
2153
0
}
2154
2155
void InterSearch::xClipMvToFppLine( Mv& mv, const int yB, const int nH, const int ifpLines, const PreCalcValues& pcv )
2156
0
{
2157
0
  const int yCompScale = 0;
2158
0
  const int mvPrecShift = MV_FRACTIONAL_BITS_INTERNAL;
2159
0
  const int ctuLogScale = pcv.maxCUSizeLog2 - yCompScale;
2160
0
  const int yRefMax     = ( ( ( yB >> ctuLogScale ) + ifpLines + 1 ) << ctuLogScale ) - 1;
2161
0
  const int yRefMv      = yB + nH + ( 4 >> yCompScale ) + (mv.ver >> mvPrecShift) - 1;
2162
0
  CHECKD( yRefMv <= yRefMax, "Not expected" );
2163
0
  mv.ver -= ( yRefMv - yRefMax ) << mvPrecShift;
2164
0
}
2165
2166
void InterSearch::xCheckAndClipMvToFppLine( Mv& mv, const int yB, const int nH, const int ifpLines, const PreCalcValues& pcv )
2167
0
{
2168
0
  const int yCompScale  = 0;
2169
0
  const int mvPrecShift = MV_FRACTIONAL_BITS_INTERNAL;
2170
0
  const int ctuLogScale = pcv.maxCUSizeLog2 - yCompScale;
2171
0
  const int yBMax       = ( pcv.heightInCtus - 1 - ifpLines ) << ctuLogScale;
2172
0
  if( yB < yBMax )
2173
0
  {
2174
0
    const int yRefMax = ( ( ( yB >> ctuLogScale ) + ifpLines + 1 ) << ctuLogScale ) - 1;
2175
0
    const int yRefMv  = yB + nH + ( 4 >> yCompScale ) + (mv.ver >> mvPrecShift) - 1;
2176
0
    if( yRefMv > yRefMax )
2177
0
    {
2178
      // clip MV
2179
0
      mv.ver -= (yRefMv - yRefMax) << mvPrecShift;
2180
0
    }
2181
0
  }
2182
0
}
2183
2184
void InterSearch::xSetSearchRange ( const CodingUnit& cu,
2185
                                    const Mv& cMvPred,
2186
                                    const int iSrchRng,
2187
                                    SearchRange& sr )
2188
0
{
2189
0
  const PreCalcValues& pcv = *cu.cs->pcv;
2190
0
  const int iMvShift = MV_FRACTIONAL_BITS_INTERNAL;
2191
0
  Mv cFPMvPred = cMvPred;
2192
0
  clipMv( cFPMvPred, cu.lumaPos(), cu.lumaSize(), pcv );
2193
2194
0
  Mv mvTL(cFPMvPred.hor - (iSrchRng << iMvShift), cFPMvPred.ver - (iSrchRng << iMvShift));
2195
0
  Mv mvBR(cFPMvPred.hor + (iSrchRng << iMvShift), cFPMvPred.ver + (iSrchRng << iMvShift));
2196
2197
0
  clipMv( mvTL, cu.lumaPos(), cu.lumaSize(), pcv);
2198
0
  xClipMvSearch( mvBR, cu.lumaPos(), cu.lumaSize(), pcv, m_pcEncCfg->m_ifpLines );
2199
2200
0
  mvTL.divideByPowerOf2( iMvShift );
2201
0
  mvBR.divideByPowerOf2( iMvShift );
2202
2203
0
  sr.left   = mvTL.hor;
2204
0
  sr.top    = mvTL.ver;
2205
0
  sr.right  = mvBR.hor;
2206
0
  sr.bottom = mvBR.ver;
2207
0
}
2208
2209
2210
void InterSearch::xPatternSearch( TZSearchStruct&  cStruct,
2211
                                  Mv&                 rcMv,
2212
                                  Distortion&         ruiSAD )
2213
0
{
2214
0
  Distortion  uiSad;
2215
0
  Distortion  uiSadBest = MAX_DISTORTION;
2216
0
  int         iBestX = 0;
2217
0
  int         iBestY = 0;
2218
2219
  //-- jclee for using the SAD function pointer
2220
0
  m_pcRdCost->setDistParam( m_cDistParam, *cStruct.pcPatternKey, cStruct.piRefY, cStruct.iRefStride, m_lumaClpRng.bd, COMP_Y, cStruct.subShiftMode );
2221
2222
0
  const SearchRange& sr = cStruct.searchRange;
2223
2224
0
  const Pel* piRef = cStruct.piRefY + (sr.top * cStruct.iRefStride);
2225
0
  for ( int y = sr.top; y <= sr.bottom; y++ )
2226
0
  {
2227
0
    for ( int x = sr.left; x <= sr.right; x++ )
2228
0
    {
2229
      //  find min. distortion position
2230
0
      m_cDistParam.cur.buf = piRef + x;
2231
2232
0
      uiSad = m_cDistParam.distFunc( m_cDistParam );
2233
2234
      // motion cost
2235
0
      uiSad += m_pcRdCost->getCostOfVectorWithPredictor( x, y, cStruct.imvShift );
2236
2237
0
      if ( uiSad < uiSadBest )
2238
0
      {
2239
0
        uiSadBest = uiSad;
2240
0
        iBestX    = x;
2241
0
        iBestY    = y;
2242
0
        m_cDistParam.maximumDistortionForEarlyExit = uiSad;
2243
0
      }
2244
0
    }
2245
0
    piRef += cStruct.iRefStride;
2246
0
  }
2247
0
  rcMv.set( iBestX, iBestY );
2248
2249
0
  cStruct.uiBestSad = uiSadBest; // th for testing
2250
0
  ruiSAD = uiSadBest - m_pcRdCost->getCostOfVectorWithPredictor( iBestX, iBestY, cStruct.imvShift );
2251
0
  return;
2252
0
}
2253
2254
2255
void InterSearch::xPatternSearchFast( const CodingUnit& cu,
2256
                                      RefPicList            refPicList,
2257
                                      int                   iRefIdxPred,
2258
                                      TZSearchStruct&       cStruct,
2259
                                      Mv&                   rcMv,
2260
                                      Distortion&           ruiSAD )
2261
0
{
2262
0
  if( cu.cs->picture->useME )
2263
0
  {
2264
0
    switch ( m_motionEstimationSearchMethodSCC )
2265
0
    {
2266
0
      case 3: //VVENC_MESEARCH_DIAMOND_FAST:
2267
0
        xTZSearch( cu, refPicList, iRefIdxPred, cStruct, rcMv, ruiSAD, true, true );
2268
0
        break;
2269
0
      case 2: //VVENC_MESEARCH_DIAMOND:
2270
0
        xTZSearch( cu, refPicList, iRefIdxPred, cStruct, rcMv, ruiSAD, true );
2271
0
        break;
2272
0
      default:
2273
0
        THROW("shouldn't get here");
2274
0
        break;
2275
0
    }
2276
0
    return;
2277
0
  }
2278
2279
0
  switch ( m_motionEstimationSearchMethod )
2280
0
  {
2281
0
    case VVENC_MESEARCH_DIAMOND_FAST:
2282
0
      xTZSearch         ( cu, refPicList, iRefIdxPred, cStruct, rcMv, ruiSAD, false, true );
2283
0
      break;
2284
0
    case VVENC_MESEARCH_DIAMOND:
2285
0
      xTZSearch         ( cu, refPicList, iRefIdxPred, cStruct, rcMv, ruiSAD, false );
2286
0
      break;
2287
0
    case VVENC_MESEARCH_DIAMOND_ENHANCED:
2288
0
      xTZSearch         ( cu, refPicList, iRefIdxPred, cStruct, rcMv, ruiSAD, true );
2289
0
      break;
2290
0
    case VVENC_MESEARCH_FULL:
2291
0
    default:
2292
0
      THROW("shouldn't get here");
2293
0
      break;
2294
0
  }
2295
0
}
2296
2297
2298
void InterSearch::xTZSearch( const CodingUnit& cu,
2299
                             RefPicList            refPicList,
2300
                             int                   iRefIdxPred,
2301
                             TZSearchStruct&       cStruct,
2302
                             Mv&                   rcMv,
2303
                             Distortion&           ruiSAD,
2304
                             const bool            bExtendedSettings,
2305
                             const bool            bFastSettings)
2306
0
{
2307
0
  const bool bUseRasterInFastMode                    = true; //toggle this to further reduce runtime
2308
0
  const bool bUseAdaptiveRaster                      = bExtendedSettings;
2309
0
  const int  iRaster                                 = (bFastSettings && bUseRasterInFastMode) ? 8 : 5;
2310
0
  const bool bTestZeroVector                         = true && !bFastSettings;
2311
0
  const bool bTestZeroVectorStart                    = bExtendedSettings;
2312
0
  const bool bTestZeroVectorStop                     = false;
2313
0
  const bool bFirstSearchDiamond                     = true;  // 1 = xTZ8PointDiamondSearch   0 = xTZ8PointSquareSearch
2314
0
  const bool bFirstCornersForDiamondDist1            = bExtendedSettings;
2315
0
  const bool bFirstSearchStop                        = m_pcEncCfg->m_bFastMEAssumingSmootherMVEnabled;
2316
0
  const uint32_t uiFirstSearchRounds                 = bFastSettings ? (bUseRasterInFastMode?3:2) : 3;     // first search stop X rounds after best match (must be >=1)
2317
0
  const bool bEnableRasterSearch                     = bFastSettings ? bUseRasterInFastMode : true;
2318
0
  const bool bAlwaysRasterSearch                     = bExtendedSettings;  // true: BETTER but factor 2 slower
2319
0
  const bool bRasterRefinementEnable                 = false; // enable either raster refinement or star refinement
2320
0
  const bool bRasterRefinementDiamond                = false; // 1 = xTZ8PointDiamondSearch   0 = xTZ8PointSquareSearch
2321
0
  const bool bRasterRefinementCornersForDiamondDist1 = bExtendedSettings;
2322
0
  const bool bStarRefinementEnable                   = true;  // enable either star refinement or raster refinement
2323
0
  const bool bStarRefinementDiamond                  = true;  // 1 = xTZ8PointDiamondSearch   0 = xTZ8PointSquareSearch
2324
0
  const bool bStarRefinementCornersForDiamondDist1   = bExtendedSettings;
2325
0
  const bool bStarRefinementStop                     = bFastSettings;
2326
0
  const uint32_t uiStarRefinementRounds              = 2;  // star refinement stop X rounds after best match (must be >=1)
2327
0
  const bool bNewZeroNeighbourhoodTest               = bExtendedSettings;
2328
2329
0
  int iSearchRange = m_iSearchRange;
2330
0
  xClipMvSearch( rcMv, cu.lumaPos(), cu.lumaSize(),*cu.cs->pcv, m_pcEncCfg->m_ifpLines );
2331
0
  rcMv.changePrecision(MV_PRECISION_INTERNAL, MV_PRECISION_QUARTER);
2332
0
  rcMv.divideByPowerOf2(2);
2333
2334
  //
2335
0
  m_cDistParam.maximumDistortionForEarlyExit = cStruct.uiBestSad;
2336
0
  m_pcRdCost->setDistParam( m_cDistParam, *cStruct.pcPatternKey, cStruct.piRefY, cStruct.iRefStride, m_lumaClpRng.bd, COMP_Y, cStruct.subShiftMode );
2337
2338
  // set rcMv (Median predictor) as start point and as best point
2339
0
  xTZSearchHelp( cStruct, rcMv.hor, rcMv.ver, 0, 0 );
2340
2341
  // test whether zero Mv is better start point than Median predictor
2342
0
  if ( bTestZeroVector )
2343
0
  {
2344
0
    if( ( rcMv.hor != 0 || rcMv.ver != 0 ) && ( 0 != cStruct.iBestX || 0 != cStruct.iBestY ) )
2345
0
    {
2346
      // only test 0-vector if not obviously previously tested.
2347
0
      xTZSearchHelp( cStruct, 0, 0, 0, 0 );
2348
0
    }
2349
0
  }
2350
2351
0
  SearchRange& sr = cStruct.searchRange;
2352
2353
0
  for (int i = 0; i < m_BlkUniMvInfoBuffer->m_uniMvListSize; i++)
2354
0
  {
2355
0
    const BlkUniMvInfo* curMvInfo = m_BlkUniMvInfoBuffer->getBlkUniMvInfo(i);
2356
0
    Mv cTmpMv = curMvInfo->uniMvs[refPicList][iRefIdxPred];
2357
2358
0
    xClipMvSearch(cTmpMv, cu.lumaPos(), cu.lumaSize(), *cu.cs->pcv, m_pcEncCfg->m_ifpLines);
2359
0
    cTmpMv.changePrecision(MV_PRECISION_INTERNAL, MV_PRECISION_INT);
2360
0
    m_cDistParam.cur.buf = cStruct.piRefY + (cTmpMv.ver * cStruct.iRefStride) + cTmpMv.hor;
2361
2362
0
    Distortion uiSad = m_cDistParam.distFunc(m_cDistParam);
2363
0
    uiSad += m_pcRdCost->getCostOfVectorWithPredictor(cTmpMv.hor, cTmpMv.ver, cStruct.imvShift);
2364
0
    if (uiSad < cStruct.uiBestSad)
2365
0
    {
2366
0
      cStruct.uiBestSad = uiSad;
2367
0
      cStruct.iBestX = cTmpMv.hor;
2368
0
      cStruct.iBestY = cTmpMv.ver;
2369
0
      m_cDistParam.maximumDistortionForEarlyExit = uiSad;
2370
0
    }
2371
0
  }
2372
2373
0
  {
2374
    // set search range
2375
0
    Mv currBestMv(cStruct.iBestX, cStruct.iBestY );
2376
0
    currBestMv <<= MV_FRACTIONAL_BITS_INTERNAL;
2377
0
    xSetSearchRange(cu, currBestMv, m_iSearchRange >> (bFastSettings ? 1 : 0), sr );
2378
0
  }
2379
2380
  // starting point after initial examination
2381
0
  int  iDist = 0;
2382
0
  int  iStartX = cStruct.iBestX;
2383
0
  int  iStartY = cStruct.iBestY;
2384
2385
  // Early termination of motion search after selection of starting candidate
2386
0
  if( m_pcEncCfg->m_bIntegerET )
2387
0
  {
2388
0
    bool isLargeBlock = cu.lumaSize().area() > 64;
2389
0
    xTZ8PointDiamondSearch( cStruct, iStartX, iStartY, 1, false ); // 4-point small diamond search
2390
0
    if ( cStruct.iBestX == iStartX && cStruct.iBestY == iStartY )
2391
0
    {
2392
0
      if ( isLargeBlock )
2393
0
      {
2394
0
        xTZ4PointSquareSearch( cStruct, iStartX, iStartY, 1 );
2395
0
        if ( cStruct.iBestX == iStartX && cStruct.iBestY == iStartY )
2396
0
        {
2397
          // write out best match
2398
0
          rcMv.set( cStruct.iBestX, cStruct.iBestY );
2399
0
          ruiSAD = cStruct.uiBestSad - m_pcRdCost->getCostOfVectorWithPredictor( cStruct.iBestX, cStruct.iBestY, cStruct.imvShift );
2400
0
          return;
2401
0
        }
2402
0
      }
2403
0
      else
2404
0
      {
2405
        // write out best match
2406
0
        rcMv.set( cStruct.iBestX, cStruct.iBestY );
2407
0
        ruiSAD = cStruct.uiBestSad - m_pcRdCost->getCostOfVectorWithPredictor( cStruct.iBestX, cStruct.iBestY, cStruct.imvShift );
2408
0
        return;
2409
0
      }
2410
0
    }
2411
0
  }
2412
2413
  // start search
2414
0
  iDist = 0;
2415
0
  iStartX = cStruct.iBestX;
2416
0
  iStartY = cStruct.iBestY;
2417
2418
0
  const bool bBestCandidateZero = ( cStruct.iBestX == 0 ) && ( cStruct.iBestY == 0 );
2419
2420
  // first search around best position up to now.
2421
  // The following works as a "subsampled/log" window search around the best candidate
2422
0
  for( iDist = 1; iDist <= iSearchRange; iDist *= 2 )
2423
0
  {
2424
0
    if( bFirstSearchDiamond == 1 )
2425
0
    {
2426
0
      xTZ8PointDiamondSearch( cStruct, iStartX, iStartY, iDist, bFirstCornersForDiamondDist1 );
2427
0
    }
2428
0
    else
2429
0
    {
2430
0
      xTZ8PointSquareSearch( cStruct, iStartX, iStartY, iDist );
2431
0
    }
2432
2433
0
    if( bFirstSearchStop && ( cStruct.uiBestRound >= uiFirstSearchRounds ) ) // stop criterion
2434
0
    {
2435
0
      break;
2436
0
    }
2437
0
  }
2438
2439
0
  if( bNewZeroNeighbourhoodTest )
2440
0
  {
2441
0
    if( bTestZeroVectorStart && !bBestCandidateZero )
2442
0
    {
2443
0
      for( iDist = 1; iDist <= ( iSearchRange >> 1 ); iDist *= 2 )
2444
0
      {
2445
0
        xTZ8PointDiamondSearch( cStruct, 0, 0, iDist, false );
2446
0
        if( bTestZeroVectorStop && ( cStruct.uiBestRound > 2 ) ) // stop criterion
2447
0
        {
2448
0
          break;
2449
0
        }
2450
0
      }
2451
0
    }
2452
0
  }
2453
2454
  // calculate only 2 missing points instead 8 points if cStruct.uiBestDistance == 1
2455
0
  if ( cStruct.uiBestDistance == 1 )
2456
0
  {
2457
0
    cStruct.uiBestDistance = 0;
2458
0
    xTZ2PointSearch( cStruct );
2459
0
  }
2460
2461
  // raster search if distance is too big
2462
0
  if( bUseAdaptiveRaster )
2463
0
  {
2464
0
    int iWindowSize     = iRaster;
2465
0
    SearchRange localsr = sr;
2466
2467
0
    if( !( bEnableRasterSearch && ( ( ( int ) ( cStruct.uiBestDistance ) >= iRaster ) ) ) )
2468
0
    {
2469
0
      iWindowSize++;
2470
0
      localsr.left    /= 2;
2471
0
      localsr.right   /= 2;
2472
0
      localsr.top     /= 2;
2473
0
      localsr.bottom  /= 2;
2474
0
    }
2475
2476
0
    cStruct.uiBestDistance = iWindowSize;
2477
2478
0
    for( iStartY = localsr.top; iStartY <= localsr.bottom; iStartY += iWindowSize )
2479
0
    {
2480
0
      for( iStartX = localsr.left; iStartX <= localsr.right; iStartX += iWindowSize )
2481
0
      {
2482
0
        xTZSearchHelp( cStruct, iStartX, iStartY, 0, iWindowSize );
2483
0
      }
2484
0
    }
2485
0
  }
2486
0
  else
2487
0
  {
2488
0
    if( bEnableRasterSearch && ( ( ( int ) ( cStruct.uiBestDistance ) >= iRaster ) || bAlwaysRasterSearch ) )
2489
0
    {
2490
0
      cStruct.uiBestDistance = iRaster;
2491
2492
0
      for( iStartY = sr.top; iStartY <= sr.bottom; iStartY += iRaster )
2493
0
      {
2494
0
        for( iStartX = sr.left; iStartX <= sr.right; iStartX += iRaster )
2495
0
        {
2496
0
          xTZSearchHelp( cStruct, iStartX, iStartY, 0, iRaster );
2497
0
        }
2498
0
      }
2499
0
    }
2500
0
  }
2501
2502
  // raster refinement
2503
2504
0
  if ( bRasterRefinementEnable && cStruct.uiBestDistance > 0 )
2505
0
  {
2506
0
    while ( cStruct.uiBestDistance > 0 )
2507
0
    {
2508
0
      iStartX = cStruct.iBestX;
2509
0
      iStartY = cStruct.iBestY;
2510
0
      if ( cStruct.uiBestDistance > 1 )
2511
0
      {
2512
0
        iDist = cStruct.uiBestDistance >>= 1;
2513
0
        if ( bRasterRefinementDiamond == 1 )
2514
0
        {
2515
0
          xTZ8PointDiamondSearch ( cStruct, iStartX, iStartY, iDist, bRasterRefinementCornersForDiamondDist1 );
2516
0
        }
2517
0
        else
2518
0
        {
2519
0
          xTZ8PointSquareSearch  ( cStruct, iStartX, iStartY, iDist );
2520
0
        }
2521
0
      }
2522
2523
      // calculate only 2 missing points instead 8 points if cStruct.uiBestDistance == 1
2524
0
      if ( cStruct.uiBestDistance == 1 )
2525
0
      {
2526
0
        cStruct.uiBestDistance = 0;
2527
0
        if ( cStruct.ucPointNr != 0 )
2528
0
        {
2529
0
          xTZ2PointSearch( cStruct );
2530
0
        }
2531
0
      }
2532
0
    }
2533
0
  }
2534
2535
  // star refinement
2536
0
  if ( bStarRefinementEnable && cStruct.uiBestDistance > 0 )
2537
0
  {
2538
0
    while ( cStruct.uiBestDistance > 0 )
2539
0
    {
2540
0
      iStartX = cStruct.iBestX;
2541
0
      iStartY = cStruct.iBestY;
2542
0
      cStruct.uiBestDistance = 0;
2543
0
      cStruct.ucPointNr = 0;
2544
0
      for ( iDist = 1; iDist < iSearchRange + 1; iDist*=2 )
2545
0
      {
2546
0
        if ( bStarRefinementDiamond == 1 )
2547
0
        {
2548
0
          xTZ8PointDiamondSearch ( cStruct, iStartX, iStartY, iDist, bStarRefinementCornersForDiamondDist1 );
2549
0
        }
2550
0
        else
2551
0
        {
2552
0
          xTZ8PointSquareSearch  ( cStruct, iStartX, iStartY, iDist );
2553
0
        }
2554
0
        if ( bStarRefinementStop && (cStruct.uiBestRound >= uiStarRefinementRounds) ) // stop criterion
2555
0
        {
2556
0
          break;
2557
0
        }
2558
0
      }
2559
2560
      // calculate only 2 missing points instead 8 points if cStrukt.uiBestDistance == 1
2561
0
      if ( cStruct.uiBestDistance == 1 )
2562
0
      {
2563
0
        cStruct.uiBestDistance = 0;
2564
0
        if ( cStruct.ucPointNr != 0 )
2565
0
        {
2566
0
          xTZ2PointSearch( cStruct );
2567
0
        }
2568
0
      }
2569
0
    }
2570
0
  }
2571
2572
  // write out best match
2573
0
  rcMv.set( cStruct.iBestX, cStruct.iBestY );
2574
0
  ruiSAD = cStruct.uiBestSad - m_pcRdCost->getCostOfVectorWithPredictor( cStruct.iBestX, cStruct.iBestY, cStruct.imvShift );
2575
0
}
2576
2577
void InterSearch::xPatternSearchIntRefine(CodingUnit& cu, TZSearchStruct&  cStruct, Mv& rcMv, Mv& rcMvPred, int& riMVPIdx, uint32_t& ruiBits, Distortion& ruiCost, const AMVPInfo& amvpInfo, double fWeight)
2578
0
{
2579
2580
0
  CHECK( cu.imv == IMV_OFF || cu.imv == IMV_HPEL , "xPatternSearchIntRefine(): Sub-pel MV used.");
2581
0
  CHECK( amvpInfo.mvCand[riMVPIdx] != rcMvPred, "xPatternSearchIntRefine(): MvPred issue.");
2582
2583
0
  m_pcRdCost->setDistParam(m_cDistParam, *cStruct.pcPatternKey, cStruct.piRefY, cStruct.iRefStride, m_lumaClpRng.bd, COMP_Y, 0, m_pcEncCfg->m_bUseHADME ? ( m_pcEncCfg->m_fastHad ? 2 : 1 ) : 0 );
2584
2585
  // -> set MV scale for cost calculation to QPEL (0)
2586
0
  m_pcRdCost->setCostScale ( 0 );
2587
2588
0
  Distortion  uiDist, uiSATD = 0;
2589
0
  Distortion  uiBestDist  = MAX_DISTORTION;
2590
  // subtract old MVP costs because costs for all newly tested MVPs are added in here
2591
0
  ruiBits -= m_auiMVPIdxCost[riMVPIdx][AMVP_MAX_NUM_CANDS];
2592
2593
0
  Mv cBestMv = rcMv;
2594
0
  Mv cBaseMvd[2];
2595
0
  int iBestBits = 0;
2596
0
  int iBestMVPIdx = riMVPIdx;
2597
0
  Mv testPos[9] = { { 0, 0}, { -1, -1},{ -1, 0},{ -1, 1},{ 0, -1},{ 0, 1},{ 1, -1},{ 1, 0},{ 1, 1} };
2598
2599
2600
0
  cBaseMvd[0] = (rcMv - amvpInfo.mvCand[0]);
2601
0
  cBaseMvd[1] = (rcMv - amvpInfo.mvCand[1]);
2602
0
  CHECK( (cBaseMvd[0].hor & 0x03) != 0 || (cBaseMvd[0].ver & 0x03) != 0 , "xPatternSearchIntRefine(): AMVP cand 0 Mvd issue.");
2603
0
  CHECK( (cBaseMvd[1].hor & 0x03) != 0 || (cBaseMvd[1].ver & 0x03) != 0 , "xPatternSearchIntRefine(): AMVP cand 1 Mvd issue.");
2604
2605
0
  cBaseMvd[0].roundTransPrecInternal2Amvr(cu.imv);
2606
0
  cBaseMvd[1].roundTransPrecInternal2Amvr(cu.imv);
2607
2608
  // test best integer position and all 8 neighboring positions
2609
0
  for (int pos = 0; pos < 9; pos ++)
2610
0
  {
2611
0
    Mv cTestMv[2];
2612
    // test both AMVP candidates for each position
2613
0
    for (int iMVPIdx = 0; iMVPIdx < amvpInfo.numCand; iMVPIdx++)
2614
0
    {
2615
0
      cTestMv[iMVPIdx] = testPos[pos];
2616
0
      cTestMv[iMVPIdx].changeTransPrecAmvr2Internal(cu.imv);
2617
0
      cTestMv[iMVPIdx] += cBaseMvd[iMVPIdx];
2618
0
      cTestMv[iMVPIdx] += amvpInfo.mvCand[iMVPIdx];
2619
2620
0
      if( m_pcEncCfg->m_ifpLines && !CU::isMvInRangeFPP( cu.ly(), cu.lheight(), cTestMv[iMVPIdx].ver, m_pcEncCfg->m_ifpLines, *cu.cs->pcv ) )
2621
0
      {
2622
0
        xClipMvToFppLine( cTestMv[iMVPIdx], cu.ly(), cu.lheight(), m_pcEncCfg->m_ifpLines, *cu.cs->pcv );
2623
0
        cTestMv[iMVPIdx].roundTransPrecInternal2AmvrVertical(cu.imv);
2624
0
      }
2625
2626
0
      if ( iMVPIdx == 0 || cTestMv[0] != cTestMv[1])
2627
0
      {
2628
0
        Mv cTempMV = cTestMv[iMVPIdx];
2629
0
        clipMv(cTempMV, cu.lumaPos(), cu.lumaSize(), *cu.cs->pcv);
2630
0
        m_cDistParam.cur.buf = cStruct.piRefY  + cStruct.iRefStride * (cTempMV.ver >>  MV_FRACTIONAL_BITS_INTERNAL) + (cTempMV.hor >> MV_FRACTIONAL_BITS_INTERNAL);
2631
0
        uiDist = uiSATD = (Distortion) (m_cDistParam.distFunc( m_cDistParam ) * fWeight);
2632
0
      }
2633
0
      else
2634
0
      {
2635
0
        uiDist = uiSATD;
2636
0
      }
2637
2638
0
      int iMvBits = m_auiMVPIdxCost[iMVPIdx][AMVP_MAX_NUM_CANDS];
2639
0
      Mv pred = amvpInfo.mvCand[iMVPIdx];
2640
0
      pred.changeTransPrecInternal2Amvr(cu.imv);
2641
0
      m_pcRdCost->setPredictor( pred );
2642
0
      Mv mv = cTestMv[iMVPIdx];
2643
0
      mv.changeTransPrecInternal2Amvr(cu.imv);
2644
0
      iMvBits += m_pcRdCost->getBitsOfVectorWithPredictor( mv.hor, mv.ver, 0 );
2645
0
      uiDist += m_pcRdCost->getCost(iMvBits);
2646
2647
0
      if (uiDist < uiBestDist)
2648
0
      {
2649
0
        uiBestDist = uiDist;
2650
0
        cBestMv = cTestMv[iMVPIdx];
2651
0
        iBestMVPIdx = iMVPIdx;
2652
0
        iBestBits = iMvBits;
2653
0
      }
2654
0
    }
2655
0
  }
2656
0
  if( uiBestDist == MAX_DISTORTION )
2657
0
  {
2658
0
    ruiCost = MAX_DISTORTION;
2659
0
    return;
2660
0
  }
2661
2662
0
  rcMv = cBestMv;
2663
0
  rcMvPred = amvpInfo.mvCand[iBestMVPIdx];
2664
0
  riMVPIdx = iBestMVPIdx;
2665
0
  m_pcRdCost->setPredictor( rcMvPred );
2666
2667
0
  ruiBits += iBestBits;
2668
  // taken from JEM 5.0
2669
  // verify since it makes no sence to subtract Lamda*(Rmvd+Rmvpidx) from D+Lamda(Rmvd)
2670
  // this would take the rate for the MVP idx out of the cost calculation
2671
  // however this rate is always 1 so impact is small
2672
0
  ruiCost = uiBestDist - m_pcRdCost->getCost(iBestBits) + m_pcRdCost->getCost(ruiBits);
2673
  // taken from JEM 5.0
2674
  // verify since it makes no sense to add rate for MVDs twicce
2675
2676
0
  return;
2677
0
}
2678
2679
void InterSearch::xPatternSearchFracDIF(
2680
  const CodingUnit& cu,
2681
  RefPicList            refPicList,
2682
  int                   iRefIdx,
2683
  TZSearchStruct&    cStruct,
2684
  const Mv&             rcMvInt,
2685
  Mv&                   rcMvHalf,
2686
  Mv&                   rcMvQter,
2687
  Distortion&           ruiCost
2688
)
2689
0
{
2690
0
  PROFILER_SCOPE_AND_STAGE( 0, _TPROF, P_FRAC_PEL );
2691
2692
  //  Reference pattern initialization (integer scale)
2693
0
  int         iOffset    = rcMvInt.hor + rcMvInt.ver * cStruct.iRefStride;
2694
0
  CPelBuf cPatternRoi(cStruct.piRefY + iOffset, cStruct.iRefStride, *cStruct.pcPatternKey);
2695
2696
  //  Half-pel refinement
2697
0
  m_pcRdCost->setCostScale(1);
2698
0
  if( 0 == m_pcEncCfg->m_fastSubPel )
2699
0
  {
2700
0
    xExtDIFUpSamplingH( &cPatternRoi, cStruct.useAltHpelIf );
2701
0
  }
2702
2703
0
  rcMvHalf = rcMvInt;   rcMvHalf <<= 1;    // for mv-cost
2704
0
  Mv baseRefMv(0, 0);
2705
0
  Distortion  uiDistBest = MAX_DISTORTION;
2706
0
  int patternId = 41;
2707
0
  ruiCost = xPatternRefinement( cStruct.pcPatternKey, baseRefMv, 2, rcMvHalf, uiDistBest, patternId, &cPatternRoi, cStruct.useAltHpelIf );
2708
0
  patternId -= ( m_pcEncCfg->m_fastSubPel == 1 ? 41 : 0 );
2709
2710
2711
  //  quarter-pel refinement
2712
0
  if( cStruct.imvShift == IMV_OFF && 0 != patternId )
2713
0
  {
2714
0
    PROFILER_SCOPE_AND_STAGE( 0, _TPROF, P_QPEL );
2715
0
    m_pcRdCost->setCostScale( 0 );
2716
0
    xExtDIFUpSamplingQ( &cPatternRoi, rcMvHalf, patternId );
2717
0
    baseRefMv = rcMvHalf;
2718
0
    baseRefMv <<= 1;
2719
2720
0
    rcMvQter = rcMvInt;    rcMvQter <<= 1;    // for mv-cost
2721
0
    rcMvQter += rcMvHalf;  rcMvQter <<= 1;
2722
0
    ruiCost = xPatternRefinement( cStruct.pcPatternKey, baseRefMv, 1, rcMvQter, uiDistBest, patternId, &cPatternRoi, cStruct.useAltHpelIf );
2723
0
  }
2724
2725
0
}
2726
2727
Distortion InterSearch::xGetSymCost( const CodingUnit& cu, CPelUnitBuf& origBuf, RefPicList eCurRefPicList, const MvField& cCurMvField, MvField& cTarMvField, int BcwIdx )
2728
0
{
2729
0
  Distortion cost = MAX_DISTORTION;
2730
0
  RefPicList eTarRefPicList = (RefPicList)(1 - (int)eCurRefPicList);
2731
2732
  // get prediction of eCurRefPicList
2733
0
  PelUnitBuf  predBufA  = m_tmpPredStorage[eCurRefPicList].getCompactBuf( cu );
2734
0
  const Picture* picRefA = cu.slice->getRefPic( eCurRefPicList, cCurMvField.refIdx );
2735
0
  Mv mvA = cCurMvField.mv;
2736
0
  clipMv( mvA, cu.lumaPos(), cu.lumaSize(), *cu.cs->pcv );
2737
0
  xPredInterBlk( COMP_Y, cu, picRefA, mvA, predBufA, false, cu.slice->clpRngs[ COMP_Y ], false, false );
2738
2739
  // get prediction of eTarRefPicList
2740
0
  PelUnitBuf predBufB = m_tmpPredStorage[eTarRefPicList].getCompactBuf( cu );
2741
0
  const Picture* picRefB = cu.slice->getRefPic( eTarRefPicList, cTarMvField.refIdx );
2742
0
  Mv mvB = cTarMvField.mv;
2743
0
  clipMv( mvB, cu.lumaPos(), cu.lumaSize(), *cu.cs->pcv );
2744
0
  xPredInterBlk( COMP_Y, cu, picRefB, mvB, predBufB, false, cu.slice->clpRngs[ COMP_Y ], false, false );
2745
2746
0
  PelUnitBuf bufTmp = m_tmpStorageLCU.getCompactBuf( cu );
2747
0
  bufTmp.copyFrom( origBuf );
2748
0
  bufTmp.removeHighFreq( predBufA, m_pcEncCfg->m_bClipForBiPredMeEnabled, cu.slice->clpRngs/*, getBcwWeight( cu.BcwIdx, eTarRefPicList )*/ );
2749
0
  double fWeight = xGetMEDistortionWeight( cu.BcwIdx, eTarRefPicList );
2750
2751
  // calc distortion
2752
0
  cost = ( Distortion ) floor( fWeight * ( double ) m_pcRdCost->getDistPart( bufTmp.Y(), predBufB.Y(), cu.cs->sps->bitDepths[ CH_L ], COMP_Y, DF_HAD ) );
2753
2754
0
  return(cost);
2755
0
}
2756
2757
Distortion InterSearch::xSymRefineMvSearch( CodingUnit& cu, CPelUnitBuf& origBuf, Mv& rcMvCurPred, Mv& rcMvTarPred, RefPicList refPicList, MvField& rCurMvField, 
2758
                                            MvField& rTarMvField, Distortion uiMinCost, int SearchPattern, int nSearchStepShift, uint32_t uiMaxSearchRounds, int BcwIdx )
2759
0
{
2760
0
  const Mv mvSearchOffsetCross[4] = { Mv( 0 , 1 ) , Mv( 1 , 0 ) , Mv( 0 , -1 ) , Mv( -1 ,  0 ) };
2761
0
  const Mv mvSearchOffsetSquare[8] = { Mv( -1 , 1 ) , Mv( 0 , 1 ) , Mv( 1 ,  1 ) , Mv( 1 ,  0 ) , Mv( 1 , -1 ) , Mv( 0 , -1 ) , Mv( -1 , -1 ) , Mv( -1 , 0 ) };
2762
0
  const Mv mvSearchOffsetDiamond[8] = { Mv( 0 , 2 ) , Mv( 1 , 1 ) , Mv( 2 ,  0 ) , Mv( 1 , -1 ) , Mv( 0 , -2 ) , Mv( -1 , -1 ) , Mv( -2 ,  0 ) , Mv( -1 , 1 ) };
2763
0
  const Mv mvSearchOffsetHexagon[6] = { Mv( 2 , 0 ) , Mv( 1 , 2 ) , Mv( -1 ,  2 ) , Mv( -2 ,  0 ) , Mv( -1 , -2 ) , Mv( 1 , -2 ) };
2764
2765
0
  int nDirectStart = 0, nDirectEnd = 0, nDirectRounding = 0, nDirectMask = 0;
2766
0
  const Mv * pSearchOffset;
2767
0
  if ( SearchPattern == 0 )
2768
0
  {
2769
0
    nDirectEnd = 3;
2770
0
    nDirectRounding = 4;
2771
0
    nDirectMask = 0x03;
2772
0
    pSearchOffset = mvSearchOffsetCross;
2773
0
  }
2774
0
  else if ( SearchPattern == 1 )
2775
0
  {
2776
0
    nDirectEnd = 7;
2777
0
    nDirectRounding = 8;
2778
0
    nDirectMask = 0x07;
2779
0
    pSearchOffset = mvSearchOffsetSquare;
2780
0
  }
2781
0
  else if ( SearchPattern == 2 )
2782
0
  {
2783
0
    nDirectEnd = 7;
2784
0
    nDirectRounding = 8;
2785
0
    nDirectMask = 0x07;
2786
0
    pSearchOffset = mvSearchOffsetDiamond;
2787
0
  }
2788
0
  else if ( SearchPattern == 3 )
2789
0
  {
2790
0
    nDirectEnd = 5;
2791
0
    pSearchOffset = mvSearchOffsetHexagon;
2792
0
  }
2793
0
  else
2794
0
  {
2795
0
    THROW( "Invalid search pattern" );
2796
0
  }
2797
2798
0
  int nBestDirect;
2799
0
  for ( uint32_t uiRound = 0; uiRound < uiMaxSearchRounds; uiRound++ )
2800
0
  {
2801
0
    Distortion roundZeroBestCost = MAX_DISTORTION;
2802
0
    const int positionLut[ 8 ] = { 0, 2, 4, 6, 1, 3, 5, 7 };
2803
0
    nBestDirect = -1;
2804
0
    MvField mvCurCenter = rCurMvField;
2805
0
    for ( int nIdx = nDirectStart; nIdx <= nDirectEnd; nIdx++ )
2806
0
    {
2807
      // terminate the search if none of the first four tested points hasn't provided improvement
2808
0
      if( m_pcEncCfg->m_SMVD > 1 && 2 == SearchPattern && 0 == uiRound && 4 == nIdx && roundZeroBestCost > uiMinCost )
2809
0
      {
2810
0
        break;
2811
0
      }
2812
0
      int nDirect;
2813
0
      if ( SearchPattern == 3 )
2814
0
      {
2815
0
        nDirect = nIdx < 0 ? nIdx + 6 : nIdx >= 6 ? nIdx - 6 : nIdx;
2816
0
      }
2817
0
      else
2818
0
      {
2819
0
        if( m_pcEncCfg->m_SMVD > 1 && 2 == SearchPattern && 0 == uiRound )
2820
0
        {
2821
0
          nDirect = positionLut[ ( nIdx + nDirectRounding ) & nDirectMask ];
2822
0
        }
2823
0
        else
2824
0
        {
2825
0
          nDirect = ( nIdx + nDirectRounding ) & nDirectMask;
2826
0
        }
2827
0
      }
2828
2829
0
      Mv mvOffset = pSearchOffset[nDirect];
2830
0
      mvOffset <<= nSearchStepShift;
2831
0
      MvField mvCand = mvCurCenter, mvPair;
2832
0
      mvCand.mv += mvOffset;
2833
0
      if( m_pcEncCfg->m_ifpLines && !CU::isMvInRangeFPP( cu.ly(), cu.lheight(), mvCand.mv.ver, m_pcEncCfg->m_ifpLines, *cu.cs->pcv ) )
2834
0
      {
2835
0
        continue; // Skip this pos
2836
0
      }
2837
2838
      // get MVD cost
2839
0
      Mv pred = rcMvCurPred;
2840
0
      pred.changeTransPrecInternal2Amvr(cu.imv);
2841
0
      m_pcRdCost->setPredictor( pred );
2842
0
      m_pcRdCost->setCostScale( 0 );
2843
0
      Mv mv = mvCand.mv;
2844
0
      mv.changeTransPrecInternal2Amvr(cu.imv);
2845
0
      uint32_t uiMvBits = m_pcRdCost->getBitsOfVectorWithPredictor( mv.hor, mv.ver, 0 );
2846
0
      Distortion uiCost = m_pcRdCost->getCost( uiMvBits );
2847
2848
      // get MVD pair and set target MV
2849
0
      mvPair.refIdx = rTarMvField.refIdx;
2850
0
      mvPair.mv.set( rcMvTarPred.hor - (mvCand.mv.hor - rcMvCurPred.hor), rcMvTarPred.ver - (mvCand.mv.ver - rcMvCurPred.ver) );
2851
2852
0
      if( m_pcEncCfg->m_ifpLines && !CU::isMvInRangeFPP( cu.ly(), cu.lheight(), mvPair.mv.ver, m_pcEncCfg->m_ifpLines, *cu.cs->pcv ) )
2853
0
      {
2854
0
        continue; // Skip this pos
2855
0
      }
2856
2857
0
      uiCost += xGetSymCost( cu, origBuf, refPicList, mvCand, mvPair, BcwIdx );
2858
0
      if ( uiCost < uiMinCost )
2859
0
      {
2860
0
        uiMinCost = uiCost;
2861
0
        rCurMvField = mvCand;
2862
0
        rTarMvField = mvPair;
2863
0
        nBestDirect = nDirect;
2864
0
      }
2865
0
      if ( m_pcEncCfg->m_SMVD > 1 && 2 == SearchPattern && 0 == uiRound && 4 > nIdx && uiCost < roundZeroBestCost)
2866
0
      {
2867
0
        roundZeroBestCost = uiCost;
2868
0
      }
2869
0
    }
2870
2871
0
    if ( nBestDirect == -1 )
2872
0
    {
2873
0
      break;
2874
0
    }
2875
0
    int nStep = 1;
2876
0
    if( (SearchPattern == 1 || SearchPattern == 2) && m_pcEncCfg->m_SMVD <= 1 )
2877
0
    {
2878
      // test at most 3 points in fast presets
2879
0
      nStep = 2 - ( nBestDirect & 0x01 );
2880
0
    }
2881
0
    nDirectStart = nBestDirect - nStep;
2882
0
    nDirectEnd = nBestDirect + nStep;
2883
0
  }
2884
2885
0
  return(uiMinCost);
2886
0
}
2887
2888
2889
void InterSearch::xSymMotionEstimation( CodingUnit& cu, CPelUnitBuf& origBuf, Mv& rcMvCurPred, Mv& rcMvTarPred, RefPicList refPicList, MvField& rCurMvField, MvField& rTarMvField, Distortion& ruiCost, int BcwIdx )
2890
0
{
2891
  // Refine Search
2892
0
  int nSearchStepShift = MV_FRACTIONAL_BITS_DIFF;
2893
0
  int nDiamondRound = 8;
2894
0
  int nCrossRound = 1;
2895
2896
0
  nSearchStepShift += cu.imv == IMV_HPEL ? 1 : (cu.imv << 1);
2897
0
  nDiamondRound >>= cu.imv;
2898
2899
0
  ruiCost = xSymRefineMvSearch( cu, origBuf, rcMvCurPred, rcMvTarPred, refPicList, rCurMvField, rTarMvField, ruiCost, 2, nSearchStepShift, nDiamondRound, BcwIdx );
2900
0
  if( m_pcEncCfg->m_SMVD < 3 )
2901
0
  {
2902
0
    ruiCost = xSymRefineMvSearch( cu, origBuf, rcMvCurPred, rcMvTarPred, refPicList, rCurMvField, rTarMvField, ruiCost, 0, nSearchStepShift, nCrossRound, BcwIdx );
2903
0
  }
2904
0
}
2905
2906
2907
/**
2908
* \brief Generate half-sample interpolated block
2909
*
2910
* \param pattern Reference picture ROI
2911
* \param biPred    Flag indicating whether block is for biprediction
2912
*/
2913
void InterSearch::xExtDIFUpSamplingH(CPelBuf* pattern, bool useAltHpelIf)
2914
0
{
2915
0
  PROFILER_SCOPE_AND_STAGE( 0, _TPROF, P_HPEL_INTERP );
2916
0
  const ClpRng& clpRng = m_lumaClpRng;
2917
0
  int width            = pattern->width;
2918
0
  int height           = pattern->height;
2919
0
  int srcStride        = pattern->stride;
2920
0
  const int reduceTap = m_pcEncCfg->m_meReduceTap;
2921
2922
0
  int intStride = width + 1;
2923
0
  int dstStride = width + 1;
2924
0
  Pel* intPtr;
2925
0
  Pel* dstPtr;
2926
0
  int filterSize     = useAltHpelIf ? ( reduceTap >= 1 ? NTAPS_AFFINE : NTAPS_LUMA )
2927
0
                                    : ( reduceTap == 1 ? NTAPS_AFFINE
2928
0
                                                       : ( reduceTap == 0 ? NTAPS_LUMA : NTAPS_CHROMA ) );
2929
0
  int halfFilterSize = ( filterSize >> 1 );
2930
0
  const Pel *srcPtr  = pattern->buf - halfFilterSize * srcStride - 1;
2931
2932
0
  const ChromaFormat chFmt = m_currChromaFormat;
2933
2934
  // split the prediction with funny widths into power-of-2 and +1 parts for the sake of SIMD speed-up
2935
0
  m_if.filterHor( COMP_Y, srcPtr,         srcStride, m_filteredBlockTmp[0][0]        , intStride, width, height + filterSize, 0 << MV_FRACTIONAL_BITS_DIFF, false, chFmt, clpRng, useAltHpelIf, 0, reduceTap );
2936
0
  m_if.filterHor( COMP_Y, srcPtr + width, srcStride, m_filteredBlockTmp[0][0] + width, intStride,     1, height + filterSize, 0 << MV_FRACTIONAL_BITS_DIFF, false, chFmt, clpRng, useAltHpelIf, 0, reduceTap );
2937
2938
  // split the prediction with funny widths into power-of-2 and +1 parts for the sake of SIMD speed-up
2939
0
  m_if.filterHor( COMP_Y, srcPtr,         srcStride, m_filteredBlockTmp[2][0],         intStride, width, height + filterSize, 2 << MV_FRACTIONAL_BITS_DIFF, false, chFmt, clpRng, useAltHpelIf, 0, reduceTap );
2940
0
  m_if.filterHor( COMP_Y, srcPtr + width, srcStride, m_filteredBlockTmp[2][0] + width, intStride,     1, height + filterSize, 2 << MV_FRACTIONAL_BITS_DIFF, false, chFmt, clpRng, useAltHpelIf, 0, reduceTap );
2941
2942
0
  intPtr = m_filteredBlockTmp[0][0] + halfFilterSize * intStride + 1;
2943
0
  dstPtr = m_filteredBlock[0][0][0];
2944
0
  m_if.filterVer( COMP_Y, intPtr, intStride, dstPtr, dstStride, width + 0, height + 0, 0 << MV_FRACTIONAL_BITS_DIFF, false, true, chFmt, clpRng, useAltHpelIf, 0, reduceTap );
2945
2946
0
  intPtr = m_filteredBlockTmp[0][0] + (halfFilterSize - 1) * intStride + 1;
2947
0
  dstPtr = m_filteredBlock[2][0][0];
2948
0
  m_if.filterVer( COMP_Y, intPtr, intStride, dstPtr, dstStride, width + 0, height + 1, 2 << MV_FRACTIONAL_BITS_DIFF, false, true, chFmt, clpRng, useAltHpelIf, 0, reduceTap );
2949
2950
0
  intPtr = m_filteredBlockTmp[2][0] + halfFilterSize * intStride;
2951
0
  dstPtr = m_filteredBlock[0][2][0];
2952
  // split the prediction with funny widths into power-of-2 and +1 parts for the sake of SIMD speed-up
2953
0
  m_if.filterVer( COMP_Y, intPtr,         intStride, dstPtr,         dstStride, width, height + 0, 0 << MV_FRACTIONAL_BITS_DIFF, false, true, chFmt, clpRng, useAltHpelIf, 0, reduceTap );
2954
0
  m_if.filterVer( COMP_Y, intPtr + width, intStride, dstPtr + width, dstStride,     1, height + 0, 0 << MV_FRACTIONAL_BITS_DIFF, false, true, chFmt, clpRng, useAltHpelIf, 0, reduceTap );
2955
2956
0
  intPtr = m_filteredBlockTmp[2][0] + (halfFilterSize - 1) * intStride;
2957
0
  dstPtr = m_filteredBlock[2][2][0];
2958
  // split the prediction with funny widths into power-of-2 and +1 parts for the sake of SIMD speed-up
2959
0
  m_if.filterVer( COMP_Y, intPtr,         intStride, dstPtr,         dstStride, width, height + 1, 2 << MV_FRACTIONAL_BITS_DIFF, false, true, chFmt, clpRng, useAltHpelIf, 0, reduceTap );
2960
0
  m_if.filterVer( COMP_Y, intPtr + width, intStride, dstPtr + width, dstStride,     1, height + 1, 2 << MV_FRACTIONAL_BITS_DIFF, false, true, chFmt, clpRng, useAltHpelIf, 0, reduceTap );
2961
0
}
2962
2963
2964
2965
2966
2967
/**
2968
* \brief Generate quarter-sample interpolated blocks
2969
*
2970
* \param pattern    Reference picture ROI
2971
* \param halfPelRef Half-pel mv
2972
* \param biPred     Flag indicating whether block is for biprediction
2973
*/
2974
void InterSearch::xExtDIFUpSamplingQ( CPelBuf* pattern, Mv halfPelRef, int& patternId )
2975
0
{
2976
0
  PROFILER_SCOPE_AND_STAGE( 0, _TPROF, P_QPEL_INTERP );
2977
0
  const ClpRng& clpRng = m_lumaClpRng;
2978
0
  int width      = pattern->width;
2979
0
  int height     = pattern->height;
2980
0
  int srcStride  = pattern->stride;
2981
0
  const int reduceTap = m_pcEncCfg->m_meReduceTap;
2982
2983
0
  Pel const* srcPtr;
2984
0
  int intStride = width + 1;
2985
0
  int dstStride = width + 1;
2986
0
  Pel* intPtr;
2987
0
  Pel* dstPtr;
2988
2989
0
  int filterSize     = reduceTap == 1 ? NTAPS_AFFINE
2990
0
                   : ( reduceTap == 0 ? NTAPS_LUMA : NTAPS_CHROMA );
2991
2992
0
  int halfFilterSize = (filterSize>>1);
2993
2994
0
  int extHeight = (halfPelRef.ver == 0) ? height + filterSize : height + filterSize-1;
2995
2996
0
  const ChromaFormat chFmt = m_currChromaFormat;
2997
2998
0
  if( s_doInterpQ[ patternId ][ 12 ] )
2999
0
  {
3000
    // Horizontal filter 1/4
3001
0
    srcPtr = pattern->buf - halfFilterSize * srcStride - 1;
3002
0
    intPtr = m_filteredBlockTmp[ 1 ][ 0 ];
3003
0
    if( halfPelRef.ver > 0 )
3004
0
    {
3005
0
      srcPtr += srcStride;
3006
0
    }
3007
0
    if( halfPelRef.hor >= 0 )
3008
0
    {
3009
0
      srcPtr += 1;
3010
0
    }
3011
0
    m_if.filterHor( COMP_Y, srcPtr, srcStride, intPtr, intStride, width, extHeight, 1 << MV_FRACTIONAL_BITS_DIFF, false, chFmt, clpRng, false, 0, reduceTap );
3012
0
  }
3013
3014
0
  if( s_doInterpQ[ patternId ][ 13 ] )
3015
0
  {
3016
    // Horizontal filter 3/4
3017
0
    srcPtr = pattern->buf - halfFilterSize*srcStride - 1;
3018
0
    intPtr = m_filteredBlockTmp[ 3 ][ 0 ];
3019
0
    if( halfPelRef.ver > 0 )
3020
0
    {
3021
0
      srcPtr += srcStride;
3022
0
    }
3023
0
    if( halfPelRef.hor > 0 )
3024
0
    {
3025
0
      srcPtr += 1;
3026
0
    }
3027
0
    m_if.filterHor( COMP_Y, srcPtr, srcStride, intPtr, intStride, width, extHeight, 3 << MV_FRACTIONAL_BITS_DIFF, false, chFmt, clpRng, false, 0, reduceTap );
3028
0
  }
3029
3030
0
  if( s_doInterpQ[ patternId ][ 3 ] )
3031
0
  {
3032
    // Generate @ 1,1
3033
0
    intPtr = m_filteredBlockTmp[ 1 ][ 0 ] + ( halfFilterSize - 1 ) * intStride;
3034
0
    dstPtr = m_filteredBlock[ 1 ][ 1 ][ 0 ];
3035
0
    if( halfPelRef.ver == 0 )
3036
0
    {
3037
0
      intPtr += intStride;
3038
0
    }
3039
0
    m_if.filterVer( COMP_Y, intPtr, intStride, dstPtr, dstStride, width, height, 1 << MV_FRACTIONAL_BITS_DIFF, false, true, chFmt, clpRng, false, 0, reduceTap );
3040
0
  }
3041
3042
0
  if( s_doInterpQ[ patternId ][ 11 ] )
3043
0
  {
3044
    // Generate @ 3,3
3045
0
    intPtr = m_filteredBlockTmp[ 3 ][ 0 ] + ( halfFilterSize - 1 ) * intStride;
3046
0
    dstPtr = m_filteredBlock[ 3 ][ 3 ][ 0 ];
3047
0
    m_if.filterVer( COMP_Y, intPtr, intStride, dstPtr, dstStride, width, height, 3 << MV_FRACTIONAL_BITS_DIFF, false, true, chFmt, clpRng, false, 0, reduceTap );
3048
0
  }
3049
3050
0
  if( s_doInterpQ[ patternId ][ 5 ] )
3051
0
  {
3052
    // Generate @ 3,1
3053
0
    intPtr = m_filteredBlockTmp[ 1 ][ 0 ] + ( halfFilterSize - 1 ) * intStride;
3054
0
    dstPtr = m_filteredBlock[ 3 ][ 1 ][ 0 ];
3055
0
    m_if.filterVer( COMP_Y, intPtr, intStride, dstPtr, dstStride, width, height, 3 << MV_FRACTIONAL_BITS_DIFF, false, true, chFmt, clpRng, false, 0, reduceTap );
3056
0
  }
3057
3058
0
  if( s_doInterpQ[ patternId ][ 9 ] )
3059
0
  {
3060
    // Generate @ 1,3
3061
0
    intPtr = m_filteredBlockTmp[ 3 ][ 0 ] + ( halfFilterSize - 1 ) * intStride;
3062
0
    dstPtr = m_filteredBlock[ 1 ][ 3 ][ 0 ];
3063
0
    if( halfPelRef.ver == 0 )
3064
0
    {
3065
0
      intPtr += intStride;
3066
0
    }
3067
0
    m_if.filterVer( COMP_Y, intPtr, intStride, dstPtr, dstStride, width, height, 1 << MV_FRACTIONAL_BITS_DIFF, false, true, chFmt, clpRng, false, 0, reduceTap );
3068
0
  }
3069
3070
0
  if (halfPelRef.ver != 0)
3071
0
  {
3072
0
    if( s_doInterpQ[ patternId ][ 4 ] )
3073
0
    {
3074
      // Generate @ 2,1
3075
0
      intPtr = m_filteredBlockTmp[ 1 ][ 0 ] + ( halfFilterSize - 1 ) * intStride;
3076
0
      dstPtr = m_filteredBlock[ 2 ][ 1 ][ 0 ];
3077
0
      if( halfPelRef.ver == 0 )
3078
0
      {
3079
0
        intPtr += intStride;
3080
0
      }
3081
0
      m_if.filterVer( COMP_Y, intPtr, intStride, dstPtr, dstStride, width, height, 2 << MV_FRACTIONAL_BITS_DIFF, false, true, chFmt, clpRng, false, 0, reduceTap );
3082
0
    }
3083
3084
0
    if( s_doInterpQ[ patternId ][ 10 ] )
3085
0
    {
3086
      // Generate @ 2,3
3087
0
      intPtr = m_filteredBlockTmp[ 3 ][ 0 ] + ( halfFilterSize - 1 ) * intStride;
3088
0
      dstPtr = m_filteredBlock[ 2 ][ 3 ][ 0 ];
3089
0
      if( halfPelRef.ver == 0 )
3090
0
      {
3091
0
        intPtr += intStride;
3092
0
      }
3093
0
      m_if.filterVer( COMP_Y, intPtr, intStride, dstPtr, dstStride, width, height, 2 << MV_FRACTIONAL_BITS_DIFF, false, true, chFmt, clpRng, false, 0, reduceTap );
3094
0
    }
3095
0
  }
3096
0
  else
3097
0
  {
3098
0
    if( s_doInterpQ[ patternId ][ 2 ] )
3099
0
    {
3100
      // Generate @ 0,1
3101
0
      intPtr = m_filteredBlockTmp[ 1 ][ 0 ] + halfFilterSize * intStride;
3102
0
      dstPtr = m_filteredBlock[ 0 ][ 1 ][ 0 ];
3103
0
      m_if.filterVer( COMP_Y, intPtr, intStride, dstPtr, dstStride, width, height, 0 << MV_FRACTIONAL_BITS_DIFF, false, true, chFmt, clpRng, false, 0, reduceTap );
3104
0
    }
3105
3106
0
    if( s_doInterpQ[ patternId ][ 8 ] )
3107
0
    {
3108
      // Generate @ 0,3
3109
0
      intPtr = m_filteredBlockTmp[ 3 ][ 0 ] + halfFilterSize * intStride;
3110
0
      dstPtr = m_filteredBlock[ 0 ][ 3 ][ 0 ];
3111
0
      m_if.filterVer( COMP_Y, intPtr, intStride, dstPtr, dstStride, width, height, 0 << MV_FRACTIONAL_BITS_DIFF, false, true, chFmt, clpRng, false, 0, reduceTap );
3112
0
    }
3113
0
  }
3114
3115
0
  if (halfPelRef.hor != 0)
3116
0
  {
3117
0
    if( s_doInterpQ[ patternId ][ 6 ] )
3118
0
    {
3119
      // Generate @ 1,2
3120
0
      intPtr = m_filteredBlockTmp[ 2 ][ 0 ] + ( halfFilterSize - 1 ) * intStride;
3121
0
      dstPtr = m_filteredBlock[ 1 ][ 2 ][ 0 ];
3122
0
      if( halfPelRef.hor > 0 )
3123
0
      {
3124
0
        intPtr += 1;
3125
0
      }
3126
0
      if( halfPelRef.ver >= 0 )
3127
0
      {
3128
0
        intPtr += intStride;
3129
0
      }
3130
0
      m_if.filterVer( COMP_Y, intPtr, intStride, dstPtr, dstStride, width, height, 1 << MV_FRACTIONAL_BITS_DIFF, false, true, chFmt, clpRng, false, 0, reduceTap );
3131
0
    }
3132
3133
0
    if( s_doInterpQ[ patternId ][ 7 ] )
3134
0
    {
3135
      // Generate @ 3,2
3136
0
      intPtr = m_filteredBlockTmp[ 2 ][ 0 ] + ( halfFilterSize - 1 ) * intStride;
3137
0
      dstPtr = m_filteredBlock[ 3 ][ 2 ][ 0 ];
3138
0
      if( halfPelRef.hor > 0 )
3139
0
      {
3140
0
        intPtr += 1;
3141
0
      }
3142
0
      if( halfPelRef.ver > 0 )
3143
0
      {
3144
0
        intPtr += intStride;
3145
0
      }
3146
0
      m_if.filterVer( COMP_Y, intPtr, intStride, dstPtr, dstStride, width, height, 3 << MV_FRACTIONAL_BITS_DIFF, false, true, chFmt, clpRng, false, 0, reduceTap );
3147
0
    }
3148
0
  }
3149
0
  else
3150
0
  {
3151
0
    if( s_doInterpQ[ patternId ][ 0 ] )
3152
0
    {
3153
      // Generate @ 1,0
3154
0
      intPtr = m_filteredBlockTmp[ 0 ][ 0 ] + ( halfFilterSize - 1 ) * intStride + 1;
3155
0
      dstPtr = m_filteredBlock[ 1 ][ 0 ][ 0 ];
3156
0
      if( halfPelRef.ver >= 0 )
3157
0
      {
3158
0
        intPtr += intStride;
3159
0
      }
3160
0
      m_if.filterVer( COMP_Y, intPtr, intStride, dstPtr, dstStride, width, height, 1 << MV_FRACTIONAL_BITS_DIFF, false, true, chFmt, clpRng, false, 0, reduceTap );
3161
0
    }
3162
3163
0
    if( s_doInterpQ[ patternId ][ 1 ] )
3164
0
    {
3165
      // Generate @ 3,0
3166
0
      intPtr = m_filteredBlockTmp[ 0 ][ 0 ] + ( halfFilterSize - 1 ) * intStride + 1;
3167
0
      dstPtr = m_filteredBlock[ 3 ][ 0 ][ 0 ];
3168
0
      if( halfPelRef.ver > 0 )
3169
0
      {
3170
0
        intPtr += intStride;
3171
0
      }
3172
0
      m_if.filterVer( COMP_Y, intPtr, intStride, dstPtr, dstStride, width, height, 3 << MV_FRACTIONAL_BITS_DIFF, false, true, chFmt, clpRng, false, 0, reduceTap );
3173
0
    }
3174
0
  }
3175
0
}
3176
3177
3178
void InterSearch::xEncodeInterResidualQT(CodingStructure &cs, Partitioner &partitioner, const ComponentID compID)
3179
0
{
3180
0
  const UnitArea& currArea    = partitioner.currArea();
3181
0
  const TransformUnit& currTU = *cs.getTU(isLuma(partitioner.chType) ? currArea.lumaPos() : currArea.chromaPos(), partitioner.chType);
3182
0
  const CodingUnit &cu        = *currTU.cu;
3183
0
  const unsigned currDepth    = partitioner.currTrDepth;
3184
3185
0
  const bool bSubdiv          = currDepth != currTU.depth;
3186
3187
0
  if (compID == MAX_NUM_TBLOCKS)  // we are not processing a channel, instead we always recurse and code the CBFs
3188
0
  {
3189
0
    if( partitioner.canSplit( TU_MAX_TR_SPLIT, cs ) )
3190
0
    {
3191
0
      CHECK( !bSubdiv, "Not performing the implicit TU split" );
3192
0
    }
3193
0
    else if( cu.sbtInfo && partitioner.canSplit( CU::getSbtTuSplit( cu.sbtInfo ), cs ) )
3194
0
    {
3195
0
      CHECK( !bSubdiv, "Not performing the implicit TU split - sbt" );
3196
0
    }
3197
0
    else
3198
0
    {
3199
0
      CHECK( bSubdiv, "transformsplit not supported" );
3200
0
    }
3201
3202
0
    CHECK(CU::isIntra(cu), "Inter search provided with intra CU");
3203
3204
0
    if( cu.chromaFormat != CHROMA_400
3205
0
      && (!CU::isSepTree(cu) || isChroma(partitioner.chType))
3206
0
      )
3207
0
    {
3208
0
      {
3209
0
        {
3210
0
          const bool  chroma_cbf = TU::getCbfAtDepth( currTU, COMP_Cb, currDepth );
3211
0
          if (!(cu.sbtInfo && (currDepth == 0 || (currDepth == 1 && currTU.noResidual))))
3212
0
          m_CABACEstimator->cbf_comp( cu, chroma_cbf, currArea.blocks[COMP_Cb], currDepth );
3213
0
        }
3214
0
        {
3215
0
          const bool  chroma_cbf = TU::getCbfAtDepth( currTU, COMP_Cr, currDepth );
3216
0
          if (!(cu.sbtInfo && (currDepth == 0 || (currDepth == 1 && currTU.noResidual))))
3217
0
          m_CABACEstimator->cbf_comp( cu, chroma_cbf, currArea.blocks[COMP_Cr], currDepth, TU::getCbfAtDepth( currTU, COMP_Cb, currDepth ) );
3218
0
        }
3219
0
      }
3220
0
    }
3221
3222
0
    if( !bSubdiv && !( cu.sbtInfo && currTU.noResidual )
3223
0
      && !isChroma(partitioner.chType)
3224
0
      )
3225
0
    {
3226
0
      m_CABACEstimator->cbf_comp( cu, TU::getCbfAtDepth( currTU, COMP_Y, currDepth ), currArea.Y(), currDepth );
3227
0
    }
3228
0
  }
3229
3230
0
  if (!bSubdiv)
3231
0
  {
3232
0
    if (compID != MAX_NUM_TBLOCKS) // we have already coded the CBFs, so now we code coefficients
3233
0
    {
3234
0
      if( currArea.blocks[compID].valid() )
3235
0
      {
3236
0
        if( compID == COMP_Cr )
3237
0
        {
3238
0
          const int cbfMask = ( TU::getCbf( currTU, COMP_Cb ) ? 2 : 0) + ( TU::getCbf( currTU, COMP_Cr ) ? 1 : 0 );
3239
0
          m_CABACEstimator->joint_cb_cr( currTU, cbfMask );
3240
0
        }
3241
0
        if( TU::getCbf( currTU, compID ) )
3242
0
        {
3243
0
          m_CABACEstimator->residual_coding( currTU, compID );
3244
0
        }
3245
0
      }
3246
0
    }
3247
0
  }
3248
0
  else
3249
0
  {
3250
0
    if( compID == MAX_NUM_TBLOCKS || TU::getCbfAtDepth( currTU, compID, currDepth ) )
3251
0
    {
3252
0
      if( partitioner.canSplit( TU_MAX_TR_SPLIT, cs ) )
3253
0
      {
3254
0
        partitioner.splitCurrArea( TU_MAX_TR_SPLIT, cs );
3255
0
      }
3256
0
      else if( cu.sbtInfo && partitioner.canSplit( CU::getSbtTuSplit( cu.sbtInfo ), cs ) )
3257
0
      {
3258
0
        partitioner.splitCurrArea( CU::getSbtTuSplit( cu.sbtInfo ), cs );
3259
0
      }
3260
0
      else
3261
0
        THROW( "Implicit TU split not available!" );
3262
3263
0
      do
3264
0
      {
3265
0
        xEncodeInterResidualQT( cs, partitioner, compID );
3266
0
      } while( partitioner.nextPart( cs ) );
3267
3268
0
      partitioner.exitCurrSplit();
3269
0
    }
3270
0
  }
3271
0
}
3272
3273
void InterSearch::xCalcMinDistSbt( CodingStructure &cs, const CodingUnit& cu, const uint8_t sbtAllowed )
3274
0
{
3275
0
  if( !sbtAllowed )
3276
0
  {
3277
0
    m_estMinDistSbt[NUMBER_SBT_MODE] = 0;
3278
0
    for( int comp = 0; comp < getNumberValidTBlocks( *cs.pcv ); comp++ )
3279
0
    {
3280
0
      const ComponentID compID = ComponentID( comp );
3281
0
      CPelBuf pred = cs.getPredBuf( compID );
3282
0
      CPelBuf org  = cs.getOrgBuf( compID );
3283
0
      m_estMinDistSbt[NUMBER_SBT_MODE] += m_pcRdCost->getDistPart( org, pred, cs.sps->bitDepths[ toChannelType( compID ) ], compID, DF_SSE );
3284
0
    }
3285
0
    return;
3286
0
  }
3287
3288
  //SBT fast algorithm 2.1 : estimate a minimum RD cost of a SBT mode based on the luma distortion of uncoded part and coded part (assuming distorted can be reduced to 1/16);
3289
  //                         if this cost is larger than the best cost, no need to try a specific SBT mode
3290
0
  int cuWidth  = cu.lwidth();
3291
0
  int cuHeight = cu.lheight();
3292
0
  int numPartX = cuWidth  >= 16 ? 4 : ( cuWidth  == 4 ? 1 : 2 );
3293
0
  int numPartY = cuHeight >= 16 ? 4 : ( cuHeight == 4 ? 1 : 2 );
3294
0
  Distortion dist[4][4];
3295
0
  memset( dist, 0, sizeof( Distortion ) * 16 );
3296
3297
0
  for( uint32_t c = 0; c < getNumberValidTBlocks( *cs.pcv ); c++ )
3298
0
  {
3299
0
    const ComponentID compID   = ComponentID( c );
3300
0
    const CompArea&   compArea = cu.blocks[compID];
3301
0
    const CPelBuf orgPel  = cs.getOrgBuf( compArea );
3302
0
    const CPelBuf predPel = cs.getPredBuf( compArea );
3303
0
    int lengthX = compArea.width / numPartX;
3304
0
    int lengthY = compArea.height / numPartY;
3305
0
    int strideOrg  = orgPel.stride;
3306
0
    int stridePred = predPel.stride;
3307
0
    uint32_t   uiShift = DISTORTION_PRECISION_ADJUSTMENT( ( *cs.sps.bitDepths[ toChannelType( compID ) ] - 8 ) << 1 );
3308
0
    Intermediate_Int iTemp;
3309
3310
    //calc distY of 16 sub parts
3311
0
    for( int j = 0; j < numPartY; j++ )
3312
0
    {
3313
0
      for( int i = 0; i < numPartX; i++ )
3314
0
      {
3315
0
        int posX = i * lengthX;
3316
0
        int posY = j * lengthY;
3317
0
        const Pel* ptrOrg  = orgPel.bufAt( posX, posY );
3318
0
        const Pel* ptrPred = predPel.bufAt( posX, posY );
3319
0
        Distortion uiSum = 0;
3320
0
        for( int n = 0; n < lengthY; n++ )
3321
0
        {
3322
0
          for( int m = 0; m < lengthX; m++ )
3323
0
          {
3324
0
            iTemp = ptrOrg[m] - ptrPred[m];
3325
0
            uiSum += Distortion( ( iTemp * iTemp ) >> uiShift );
3326
0
          }
3327
0
          ptrOrg += strideOrg;
3328
0
          ptrPred += stridePred;
3329
0
        }
3330
0
        if( isChroma( compID ) )
3331
0
        {
3332
0
          uiSum = (Distortion)( uiSum * m_pcRdCost->getChromaWeight() );
3333
0
        }
3334
0
        dist[j][i] += uiSum;
3335
0
      }
3336
0
    }
3337
0
  }
3338
3339
  //SSE of a CU
3340
0
  m_estMinDistSbt[NUMBER_SBT_MODE] = 0;
3341
0
  for( int j = 0; j < numPartY; j++ )
3342
0
  {
3343
0
    for( int i = 0; i < numPartX; i++ )
3344
0
    {
3345
0
      m_estMinDistSbt[NUMBER_SBT_MODE] += dist[j][i];
3346
0
    }
3347
0
  }
3348
  //init per-mode dist
3349
0
  for( int i = SBT_VER_H0; i < NUMBER_SBT_MODE; i++ )
3350
0
  {
3351
0
    m_estMinDistSbt[i] = MAX_DISTORTION;
3352
0
  }
3353
3354
  //SBT fast algorithm 1: not try SBT if the residual is too small to compensate bits for encoding residual info
3355
0
  uint64_t minNonZeroResiFracBits = 12 << SCALE_BITS;
3356
0
  if( m_pcRdCost->calcRdCost( 0, m_estMinDistSbt[NUMBER_SBT_MODE] ) < m_pcRdCost->calcRdCost( minNonZeroResiFracBits, 0 ) )
3357
0
  {
3358
0
    m_skipSbtAll = true;
3359
0
    return;
3360
0
  }
3361
3362
  //derive estimated minDist of SBT = zero-residual part distortion + non-zero residual part distortion / 16
3363
0
  int shift = 5;
3364
0
  Distortion distResiPart = 0, distNoResiPart = 0;
3365
3366
0
  if( CU::targetSbtAllowed( SBT_VER_HALF, sbtAllowed ) )
3367
0
  {
3368
0
    int offsetResiPart = 0;
3369
0
    int offsetNoResiPart = numPartX / 2;
3370
0
    distResiPart = distNoResiPart = 0;
3371
0
    assert( numPartX >= 2 );
3372
0
    for( int j = 0; j < numPartY; j++ )
3373
0
    {
3374
0
      for( int i = 0; i < numPartX / 2; i++ )
3375
0
      {
3376
0
        distResiPart   += dist[j][i + offsetResiPart];
3377
0
        distNoResiPart += dist[j][i + offsetNoResiPart];
3378
0
      }
3379
0
    }
3380
0
    m_estMinDistSbt[SBT_VER_H0] = ( distResiPart >> shift ) + distNoResiPart;
3381
0
    m_estMinDistSbt[SBT_VER_H1] = ( distNoResiPart >> shift ) + distResiPart;
3382
0
  }
3383
3384
0
  if( CU::targetSbtAllowed( SBT_HOR_HALF, sbtAllowed ) )
3385
0
  {
3386
0
    int offsetResiPart = 0;
3387
0
    int offsetNoResiPart = numPartY / 2;
3388
0
    assert( numPartY >= 2 );
3389
0
    distResiPart = distNoResiPart = 0;
3390
0
    for( int j = 0; j < numPartY / 2; j++ )
3391
0
    {
3392
0
      for( int i = 0; i < numPartX; i++ )
3393
0
      {
3394
0
        distResiPart   += dist[j + offsetResiPart][i];
3395
0
        distNoResiPart += dist[j + offsetNoResiPart][i];
3396
0
      }
3397
0
    }
3398
0
    m_estMinDistSbt[SBT_HOR_H0] = ( distResiPart >> shift ) + distNoResiPart;
3399
0
    m_estMinDistSbt[SBT_HOR_H1] = ( distNoResiPart >> shift ) + distResiPart;
3400
0
  }
3401
3402
0
  if( CU::targetSbtAllowed( SBT_VER_QUAD, sbtAllowed ) )
3403
0
  {
3404
0
    assert( numPartX == 4 );
3405
0
    m_estMinDistSbt[SBT_VER_Q0] = m_estMinDistSbt[SBT_VER_Q1] = 0;
3406
0
    for( int j = 0; j < numPartY; j++ )
3407
0
    {
3408
0
      m_estMinDistSbt[SBT_VER_Q0] += dist[j][0] + ( ( dist[j][1] + dist[j][2] + dist[j][3] ) << shift );
3409
0
      m_estMinDistSbt[SBT_VER_Q1] += dist[j][3] + ( ( dist[j][0] + dist[j][1] + dist[j][2] ) << shift );
3410
0
    }
3411
0
    m_estMinDistSbt[SBT_VER_Q0] = m_estMinDistSbt[SBT_VER_Q0] >> shift;
3412
0
    m_estMinDistSbt[SBT_VER_Q1] = m_estMinDistSbt[SBT_VER_Q1] >> shift;
3413
0
  }
3414
3415
0
  if( CU::targetSbtAllowed( SBT_HOR_QUAD, sbtAllowed ) )
3416
0
  {
3417
0
    assert( numPartY == 4 );
3418
0
    m_estMinDistSbt[SBT_HOR_Q0] = m_estMinDistSbt[SBT_HOR_Q1] = 0;
3419
0
    for( int i = 0; i < numPartX; i++ )
3420
0
    {
3421
0
      m_estMinDistSbt[SBT_HOR_Q0] += dist[0][i] + ( ( dist[1][i] + dist[2][i] + dist[3][i] ) << shift );
3422
0
      m_estMinDistSbt[SBT_HOR_Q1] += dist[3][i] + ( ( dist[0][i] + dist[1][i] + dist[2][i] ) << shift );
3423
0
    }
3424
0
    m_estMinDistSbt[SBT_HOR_Q0] = m_estMinDistSbt[SBT_HOR_Q0] >> shift;
3425
0
    m_estMinDistSbt[SBT_HOR_Q1] = m_estMinDistSbt[SBT_HOR_Q1] >> shift;
3426
0
  }
3427
3428
  //SBT fast algorithm 5: try N SBT modes with the lowest distortion
3429
0
  Distortion temp[NUMBER_SBT_MODE];
3430
0
  memcpy( temp, m_estMinDistSbt, sizeof( Distortion ) * NUMBER_SBT_MODE );
3431
0
  memset( m_sbtRdoOrder, 255, NUMBER_SBT_MODE );
3432
0
  int startIdx = 0, numRDO;
3433
0
  numRDO = CU::targetSbtAllowed( SBT_VER_HALF, sbtAllowed ) + CU::targetSbtAllowed( SBT_HOR_HALF, sbtAllowed );
3434
0
  numRDO = std::min( ( numRDO << 1 ), SBT_NUM_RDO );
3435
0
  for( int i = startIdx; i < startIdx + numRDO; i++ )
3436
0
  {
3437
0
    Distortion minDist = MAX_DISTORTION;
3438
0
    for( int n = SBT_VER_H0; n <= SBT_HOR_H1; n++ )
3439
0
    {
3440
0
      if( temp[n] < minDist )
3441
0
      {
3442
0
        minDist = temp[n];
3443
0
        m_sbtRdoOrder[i] = n;
3444
0
      }
3445
0
    }
3446
0
    temp[m_sbtRdoOrder[i]] = MAX_DISTORTION;
3447
0
  }
3448
3449
0
  startIdx += numRDO;
3450
0
  numRDO = CU::targetSbtAllowed( SBT_VER_QUAD, sbtAllowed ) + CU::targetSbtAllowed( SBT_HOR_QUAD, sbtAllowed );
3451
0
  numRDO = std::min( ( numRDO << 1 ), SBT_NUM_RDO );
3452
0
  for( int i = startIdx; i < startIdx + numRDO; i++ )
3453
0
  {
3454
0
    Distortion minDist = MAX_DISTORTION;
3455
0
    for( int n = SBT_VER_Q0; n <= SBT_HOR_Q1; n++ )
3456
0
    {
3457
0
      if( temp[n] < minDist )
3458
0
      {
3459
0
        minDist = temp[n];
3460
0
        m_sbtRdoOrder[i] = n;
3461
0
      }
3462
0
    }
3463
0
    temp[m_sbtRdoOrder[i]] = MAX_DISTORTION;
3464
0
  }
3465
0
}
3466
3467
uint8_t InterSearch::skipSbtByRDCost( int width, int height, int mtDepth, uint8_t sbtIdx, uint8_t sbtPos, double bestCost, Distortion distSbtOff, double costSbtOff, bool rootCbfSbtOff )
3468
0
{
3469
0
  int sbtMode = CU::getSbtMode( sbtIdx, sbtPos );
3470
3471
  //SBT fast algorithm 2.2 : estimate a minimum RD cost of a SBT mode based on the luma distortion of uncoded part and coded part (assuming distorted can be reduced to 1/16);
3472
  //                         if this cost is larger than the best cost, no need to try a specific SBT mode
3473
0
  if( m_pcRdCost->calcRdCost( 11 << SCALE_BITS, m_estMinDistSbt[sbtMode] ) > bestCost )
3474
0
  {
3475
0
    return 0; //early skip type 0
3476
0
  }
3477
3478
0
  if( costSbtOff != MAX_DOUBLE )
3479
0
  {
3480
0
    if( !rootCbfSbtOff )
3481
0
    {
3482
      //SBT fast algorithm 3: skip SBT when the residual is too small (estCost is more accurate than fast algorithm 1, counting PU mode bits)
3483
0
      uint64_t minNonZeroResiFracBits = 10 << SCALE_BITS;
3484
0
      Distortion distResiPart;
3485
0
      if( sbtIdx == SBT_VER_HALF || sbtIdx == SBT_HOR_HALF )
3486
0
      {
3487
0
        distResiPart = (Distortion)( ( ( m_estMinDistSbt[NUMBER_SBT_MODE] - m_estMinDistSbt[sbtMode] ) * 9 ) >> 4 );
3488
0
      }
3489
0
      else
3490
0
      {
3491
0
        distResiPart = (Distortion)( ( ( m_estMinDistSbt[NUMBER_SBT_MODE] - m_estMinDistSbt[sbtMode] ) * 3 ) >> 3 );
3492
0
      }
3493
3494
0
      double estCost = ( costSbtOff - m_pcRdCost->calcRdCost( 0 << SCALE_BITS, distSbtOff ) ) + m_pcRdCost->calcRdCost( minNonZeroResiFracBits, m_estMinDistSbt[sbtMode] + distResiPart );
3495
0
      if( estCost > costSbtOff )
3496
0
      {
3497
0
        return 1;
3498
0
      }
3499
0
      if( estCost > bestCost )
3500
0
      {
3501
0
        return 2;
3502
0
      }
3503
0
    }
3504
0
    else
3505
0
    {
3506
      //SBT fast algorithm 4: skip SBT when an estimated RD cost is larger than the bestCost
3507
0
      double weight = sbtMode > SBT_HOR_H1 ? 0.4 : 0.6;
3508
0
      double estCost = ( ( costSbtOff - m_pcRdCost->calcRdCost( 0 << SCALE_BITS, distSbtOff ) ) * weight ) + m_pcRdCost->calcRdCost( 0 << SCALE_BITS, m_estMinDistSbt[sbtMode] );
3509
0
      if( estCost > bestCost )
3510
0
      {
3511
0
        return 3;
3512
0
      }
3513
0
    }
3514
0
  }
3515
0
  return MAX_UCHAR;
3516
0
}
3517
3518
void InterSearch::xEstimateInterResidualQT(CodingStructure &cs, Partitioner &partitioner, Distortion *puiZeroDist /*= NULL*/)
3519
0
{
3520
0
  const UnitArea& currArea = partitioner.currArea();
3521
0
  const SPS &sps           = *cs.sps;
3522
3523
0
  const uint32_t numValidComp  = getNumberValidComponents( sps.chromaFormatIdc );
3524
0
  const uint32_t numTBlocks    = getNumberValidTBlocks   ( *cs.pcv );
3525
0
  CodingUnit& cu               = *cs.getCU(partitioner.chType, partitioner.treeType);
3526
0
  const unsigned currDepth = partitioner.currTrDepth;
3527
0
  const bool useTS = cs.picture->useTS;
3528
3529
0
  bool bCheckFull  = !partitioner.canSplit( TU_MAX_TR_SPLIT, cs );
3530
0
  if( cu.sbtInfo && partitioner.canSplit( CU::getSbtTuSplit( cu.sbtInfo ), cs ) )
3531
0
  {
3532
0
    bCheckFull = false;
3533
0
  }
3534
0
  bool bCheckSplit = !bCheckFull;
3535
3536
  // get temporary data
3537
0
  CodingStructure *csSplit = nullptr;
3538
0
  CodingStructure *csFull  = nullptr;
3539
0
  if (bCheckSplit)
3540
0
  {
3541
0
    csSplit = &cs;
3542
0
  }
3543
0
  else if (bCheckFull)
3544
0
  {
3545
0
    csFull = &cs;
3546
0
  }
3547
3548
0
  Distortion uiSingleDist         = 0;
3549
0
  Distortion uiSingleDistComp [3] = { 0, 0, 0 };
3550
3551
0
  const TempCtx ctxStart  ( m_CtxCache, m_CABACEstimator->getCtx() );
3552
0
  TempCtx       ctxBest   ( m_CtxCache );
3553
3554
0
  PelUnitBuf    orgResiBuf;
3555
0
  orgResiBuf = m_tmpStorageLCU.getCompactBuf( currArea );
3556
0
  orgResiBuf.copyFrom(cs.getResiBuf(currArea));
3557
3558
0
  if (bCheckFull)
3559
0
  {
3560
0
    ReshapeData& reshapeData = cs.picture->reshapeData;
3561
3562
0
    TransformUnit& tu = csFull->addTU(CS::getArea(cs, currArea, partitioner.chType, partitioner.treeType), partitioner.chType, &cu);
3563
0
    tu.depth          = currDepth;
3564
0
    tu.mtsIdx[COMP_Y] = MTS_DCT2_DCT2;
3565
0
    tu.checkTuNoResidual( partitioner.currPartIdx() );
3566
0
    if (cs.picHeader->lmcsEnabled && reshapeData.getCTUFlag() && cs.picHeader->lmcsChromaResidualScale && !(CS::isDualITree(cs) && cs.slice->isIntra() && tu.cu->predMode == MODE_IBC))
3567
0
    {
3568
0
      tu.chromaAdj = reshapeData.calculateChromaAdjVpduNei(tu, tu.blocks[COMP_Y], tu.cu->treeType);
3569
0
    }
3570
3571
0
    double minCost [MAX_NUM_TBLOCKS];
3572
3573
0
    m_CABACEstimator->resetBits();
3574
3575
0
    memset(m_pTempPel, 0, sizeof(Pel) * tu.Y().area()); // not necessary needed for inside of recursion (only at the beginning)
3576
3577
0
    for (uint32_t i = 0; i < numTBlocks; i++)
3578
0
    {
3579
0
      minCost[i] = MAX_DOUBLE;
3580
0
    }
3581
3582
0
    CodingStructure &saveCS = *m_pSaveCS[1];
3583
0
    saveCS.pcv     = cs.pcv;
3584
0
    saveCS.picture = cs.picture;
3585
0
    saveCS.area.repositionTo( currArea );
3586
3587
0
    TransformUnit& bestTU = saveCS.tus.empty() ? saveCS.addTU( currArea, partitioner.chType, nullptr ) : *saveCS.tus.front();
3588
0
    bestTU.initData();
3589
0
    bestTU.UnitArea::operator=( currArea );
3590
3591
0
    for( uint32_t c = 0; c < numTBlocks; c++ )
3592
0
    {
3593
0
      const ComponentID compID    = ComponentID(c);
3594
0
      const CompArea&   compArea  = tu.blocks[compID];
3595
0
      const int channelBitDepth   = sps.bitDepths[toChannelType(compID)];
3596
3597
0
      if( !tu.blocks[compID].valid() )
3598
0
      {
3599
0
        continue;
3600
0
      }
3601
0
      bool tsAllowed = useTS && TU::isTSAllowed(tu, compID) && (isLuma(compID) || (isChroma(compID) && m_pcEncCfg->m_useChromaTS));
3602
0
      if (isChroma(compID) && tsAllowed && (tu.mtsIdx[COMP_Y] != MTS_SKIP))
3603
0
      {
3604
0
        tsAllowed = false;
3605
0
      }
3606
0
      uint8_t nNumTransformCands = 1 + (tsAllowed ? 1 : 0); // DCT + TS = 2 tests
3607
0
      std::vector<TrMode> trModes;
3608
3609
0
      if (nNumTransformCands > 1)
3610
0
      {
3611
0
        trModes.push_back(TrMode(0, true)); //DCT2
3612
        //for a SBT-no-residual TU, the RDO process should be called once, in order to get the RD cost
3613
0
        if ( !tu.noResidual )
3614
0
        {
3615
0
          trModes.push_back(TrMode(1, true));
3616
0
        }
3617
0
        else
3618
0
        {
3619
0
          nNumTransformCands--;
3620
0
        }
3621
0
      }
3622
0
      bool isLast = true;
3623
0
      for (int transformMode = 0; transformMode < nNumTransformCands; transformMode++)
3624
0
      {
3625
0
        const bool isFirstMode = transformMode == 0;
3626
3627
        // copy the original residual into the residual buffer
3628
0
        csFull->getResiBuf(compArea).copyFrom(orgResiBuf.get(compID));
3629
3630
3631
0
        m_CABACEstimator->getCtx() = ctxStart;
3632
0
        m_CABACEstimator->resetBits();
3633
3634
0
        if (bestTU.mtsIdx[compID] == MTS_SKIP && m_pcEncCfg->m_TS)
3635
0
        {
3636
0
          continue;
3637
0
        }
3638
0
        tu.mtsIdx[compID] = transformMode ? trModes[transformMode].first : 0;
3639
3640
0
        const QpParam cQP(tu, compID);  // note: uses tu.transformSkip[compID]
3641
0
        m_pcTrQuant->selectLambda(compID);
3642
3643
0
        const Slice& slice = *tu.cu->slice;
3644
0
        if (slice.picHeader->lmcsEnabled && reshapeData.getCTUFlag() && isChroma(compID) && slice.picHeader->lmcsChromaResidualScale )
3645
0
        {
3646
0
          double cRescale = (double)(1 << CSCALE_FP_PREC) / (double)(tu.chromaAdj);
3647
0
          m_pcTrQuant->scaleLambda( 1.0/(cRescale*cRescale) );
3648
0
        }
3649
3650
0
        if ( sps.jointCbCr && isChroma( compID ) && ( tu.cu->cs->slice->sliceQp > 18 ) )
3651
0
        {
3652
0
          m_pcTrQuant->scaleLambda( 1.05 );
3653
0
        }
3654
0
        TCoeff     currAbsSum = 0;
3655
0
        uint64_t   currCompFracBits = 0;
3656
0
        Distortion currCompDist = 0;
3657
0
        double     currCompCost = 0;
3658
0
        uint64_t   nonCoeffFracBits = 0;
3659
0
        Distortion nonCoeffDist = 0;
3660
0
        double     nonCoeffCost = 0;
3661
3662
0
        if (slice.picHeader->lmcsEnabled && reshapeData.getCTUFlag() && isChroma(compID) && slice.picHeader->lmcsChromaResidualScale && tu.blocks[compID].width*tu.blocks[compID].height > 4 )
3663
0
        {
3664
0
          PelBuf resiBuf = csFull->getResiBuf(compArea);
3665
0
          resiBuf.scaleSignal(tu.chromaAdj, 1, slice.clpRngs[compID]);
3666
0
        }
3667
3668
0
        if (nNumTransformCands > 1)
3669
0
        {
3670
0
          if (transformMode == 0)
3671
0
          {
3672
0
            m_pcTrQuant->checktransformsNxN(tu, &trModes, 2, compID);
3673
0
            tu.mtsIdx[compID] = trModes[0].first;
3674
0
            if (!trModes[transformMode + 1].second)
3675
0
            {
3676
0
              nNumTransformCands = 1;
3677
0
            }
3678
0
          }
3679
0
          m_pcTrQuant->transformNxN(tu, compID, cQP, currAbsSum, m_CABACEstimator->getCtx(), true);
3680
0
        }
3681
0
        else
3682
0
        {
3683
0
          m_pcTrQuant->transformNxN(tu, compID, cQP, currAbsSum, m_CABACEstimator->getCtx());
3684
0
        }
3685
0
        if (isFirstMode || (currAbsSum == 0))
3686
0
        {
3687
0
          const CPelBuf zeroBuf(m_pTempPel, compArea);
3688
0
          const CPelBuf& orgResi = orgResiBuf.get(compID);
3689
3690
0
          nonCoeffDist = m_pcRdCost->getDistPart(zeroBuf, orgResi, channelBitDepth, compID, DF_SSE); // initialized with zero residual distortion
3691
3692
0
          if (!tu.noResidual)
3693
0
          {
3694
0
            const bool prevCbf = (compID == COMP_Cr ? tu.cbf[COMP_Cb] : false);
3695
0
            m_CABACEstimator->cbf_comp(*tu.cu, false, compArea, currDepth, prevCbf);
3696
0
          }
3697
3698
0
          nonCoeffFracBits = m_CABACEstimator->getEstFracBits();
3699
0
          nonCoeffCost = m_pcRdCost->calcRdCost(nonCoeffFracBits, nonCoeffDist, !m_pcEncCfg->m_lumaLevelToDeltaQPEnabled);
3700
0
        }
3701
3702
0
        if ((puiZeroDist != NULL) && isFirstMode)
3703
0
        {
3704
0
          *puiZeroDist += nonCoeffDist; // initialized with zero residual distortion
3705
0
        }
3706
3707
0
        if (currAbsSum > 0) //if non-zero coefficients are present, a residual needs to be derived for further prediction
3708
0
        {
3709
0
          if (isFirstMode)
3710
0
          {
3711
0
            m_CABACEstimator->getCtx() = ctxStart;
3712
0
            m_CABACEstimator->resetBits();
3713
0
          }
3714
3715
0
          const bool prevCbf = ( compID == COMP_Cr ? tu.cbf[COMP_Cb] : false );
3716
0
          m_CABACEstimator->cbf_comp( *tu.cu, true, compArea, currDepth, prevCbf );
3717
0
          if( compID == COMP_Cr )
3718
0
          {
3719
0
            const int cbfMask = ( tu.cbf[COMP_Cb] ? 2 : 0 ) + 1;
3720
0
            m_CABACEstimator->joint_cb_cr( tu, cbfMask );
3721
0
          }
3722
0
          CUCtx cuCtx;
3723
0
          cuCtx.isDQPCoded = true;
3724
0
          cuCtx.isChromaQpAdjCoded = true;
3725
0
          m_CABACEstimator->residual_coding(tu, compID, &cuCtx);
3726
0
          m_CABACEstimator->mts_idx(cu, &cuCtx);
3727
3728
0
          currCompFracBits = m_CABACEstimator->getEstFracBits();
3729
3730
0
          PelBuf resiBuf  = csFull->getResiBuf(compArea);
3731
0
          CPelBuf orgResi = orgResiBuf.get(compID);
3732
3733
0
          m_pcTrQuant->invTransformNxN(tu, compID, resiBuf, cQP);
3734
0
          if (slice.picHeader->lmcsEnabled && isChroma(compID) && slice.picHeader->lmcsChromaResidualScale && tu.blocks[compID].width*tu.blocks[compID].height > 4)
3735
0
          {
3736
0
            resiBuf.scaleSignal(tu.chromaAdj, 0, slice.clpRngs[compID]);
3737
0
          }
3738
3739
0
          currCompDist = m_pcRdCost->getDistPart(orgResi, resiBuf, channelBitDepth, compID, DF_SSE);
3740
0
          currCompCost = m_pcRdCost->calcRdCost(currCompFracBits, currCompDist, false);
3741
0
        }
3742
0
        else if (transformMode > 0)
3743
0
        {
3744
0
          currCompCost = MAX_DOUBLE;
3745
0
        }
3746
0
        else
3747
0
        {
3748
0
          currCompFracBits = nonCoeffFracBits;
3749
0
          currCompDist     = nonCoeffDist;
3750
0
          currCompCost     = nonCoeffCost;
3751
3752
0
          tu.cbf[compID] = 0;
3753
0
        }
3754
3755
        // evaluate
3756
0
        if ((currCompCost < minCost[compID]) || (transformMode == 1 && currCompCost == minCost[compID]))
3757
0
        {
3758
          // copy component
3759
0
          if (isFirstMode && ((nonCoeffCost < currCompCost) || (currAbsSum == 0))) // check for forced null
3760
0
          {
3761
0
            tu.getCoeffs( compID ).fill( 0 );
3762
0
            csFull->getResiBuf( compArea ).fill( 0 );
3763
0
            tu.cbf[compID]   = 0;
3764
3765
0
            currAbsSum       = 0;
3766
0
            currCompFracBits = nonCoeffFracBits;
3767
0
            currCompDist     = nonCoeffDist;
3768
0
            currCompCost     = nonCoeffCost;
3769
0
          }
3770
3771
0
          uiSingleDistComp[compID] = currCompDist;
3772
0
          minCost[compID]          = currCompCost;
3773
0
          if (transformMode != (nNumTransformCands - 1))
3774
0
          {
3775
0
            bestTU.copyComponentFrom(tu, compID);
3776
0
            saveCS.getResiBuf(compArea).copyFrom(csFull->getResiBuf(compArea));
3777
0
          }
3778
0
          else
3779
0
          {
3780
0
            isLast = false;
3781
0
          }
3782
0
        }
3783
0
        if( tu.noResidual )
3784
0
        {
3785
0
          CHECK( currCompFracBits > 0 || currAbsSum, "currCompFracBits > 0 when tu noResidual" );
3786
0
        }
3787
0
      }
3788
0
      if (isLast)
3789
0
      {
3790
0
        tu.copyComponentFrom(bestTU, compID);
3791
0
        csFull->getResiBuf(compArea).copyFrom(saveCS.getResiBuf(compArea));
3792
0
      }
3793
0
    } // component loop
3794
3795
0
    if ( tu.blocks.size()>2 && tu.blocks[COMP_Cb].valid() )
3796
0
    {
3797
0
      const CompArea& cbArea = tu.blocks[COMP_Cb];
3798
0
      const CompArea& crArea = tu.blocks[COMP_Cr];
3799
0
      bool checkJointCbCr = (sps.jointCbCr) && (!tu.noResidual) && (TU::getCbf(tu, COMP_Cb) || TU::getCbf(tu, COMP_Cr));
3800
0
      const int channelBitDepth = sps.bitDepths[toChannelType(COMP_Cb)];
3801
0
      const Slice& slice = *tu.cu->slice;
3802
0
      bool      reshape         = slice.picHeader->lmcsEnabled && reshapeData.getCTUFlag() && slice.picHeader->lmcsChromaResidualScale
3803
0
                               && tu.blocks[COMP_Cb].width * tu.blocks[COMP_Cb].height > 4;
3804
0
      double minCostCbCr = minCost[COMP_Cb] + minCost[COMP_Cr];
3805
0
      bool   isLastBest  = false;
3806
3807
0
      bool checkDCTOnly = m_pcEncCfg->m_useChromaTS && ((TU::getCbf(tu, COMP_Cb) && tu.mtsIdx[COMP_Cb] == MTS_DCT2_DCT2 && !TU::getCbf(tu, COMP_Cr)) ||
3808
0
        (TU::getCbf(tu, COMP_Cr) && tu.mtsIdx[COMP_Cr] == MTS_DCT2_DCT2 && !TU::getCbf(tu, COMP_Cb)) ||
3809
0
        (TU::getCbf(tu, COMP_Cb) && tu.mtsIdx[COMP_Cb] == MTS_DCT2_DCT2 && TU::getCbf(tu, COMP_Cr) && tu.mtsIdx[COMP_Cr] == MTS_DCT2_DCT2));
3810
0
      bool checkTSOnly = m_pcEncCfg->m_useChromaTS && ((TU::getCbf(tu, COMP_Cb) && tu.mtsIdx[COMP_Cb] == MTS_SKIP && !TU::getCbf(tu, COMP_Cr)) ||
3811
0
        (TU::getCbf(tu, COMP_Cr) && tu.mtsIdx[COMP_Cr] == MTS_SKIP && !TU::getCbf(tu, COMP_Cb)) ||
3812
0
        (TU::getCbf(tu, COMP_Cb) && tu.mtsIdx[COMP_Cb] == MTS_SKIP && TU::getCbf(tu, COMP_Cr) && tu.mtsIdx[COMP_Cr] == MTS_SKIP));
3813
3814
0
      std::vector<int> jointCbfMasksToTest;
3815
0
      if ( checkJointCbCr )
3816
0
      {
3817
0
        for( int i = 0; i < 4; i++ )
3818
0
        {
3819
0
          m_orgResiCb[i].compactResize(cbArea);
3820
0
          m_orgResiCr[i].compactResize(crArea);
3821
0
        }
3822
0
        m_orgResiCb[0].copyFrom(orgResiBuf.Cb());
3823
0
        m_orgResiCr[0].copyFrom(orgResiBuf.Cr());
3824
0
        if (reshape)
3825
0
        {
3826
0
          m_orgResiCb[0].scaleSignal(tu.chromaAdj, 1, slice.clpRngs[COMP_Cb]);
3827
0
          m_orgResiCr[0].scaleSignal(tu.chromaAdj, 1, slice.clpRngs[COMP_Cr]);
3828
0
        }
3829
3830
0
        jointCbfMasksToTest = m_pcTrQuant->selectICTCandidates(tu, m_orgResiCb, m_orgResiCr);
3831
3832
0
        bestTU.copyComponentFrom(tu, COMP_Cb);
3833
0
        bestTU.copyComponentFrom(tu, COMP_Cr);
3834
0
        saveCS.getResiBuf(cbArea).copyFrom(csFull->getResiBuf(cbArea));
3835
0
        saveCS.getResiBuf(crArea).copyFrom(csFull->getResiBuf(crArea));
3836
0
      }
3837
3838
0
      for (int cbfMask: jointCbfMasksToTest)
3839
0
      {
3840
0
        ComponentID codeCompId = (cbfMask >> 1 ? COMP_Cb : COMP_Cr);
3841
0
        ComponentID otherCompId = (codeCompId == COMP_Cr ? COMP_Cb : COMP_Cr);
3842
0
        bool tsAllowed = useTS && TU::isTSAllowed(tu, codeCompId) && (m_pcEncCfg->m_useChromaTS);
3843
0
        if (tsAllowed && (tu.mtsIdx[COMP_Y] != MTS_SKIP))
3844
0
        {
3845
0
          tsAllowed = false;
3846
0
        }
3847
0
        if (!tsAllowed)
3848
0
        {
3849
0
          checkTSOnly = false;
3850
0
        }
3851
0
        uint8_t     numTransformCands = 1 + (tsAllowed && (!(checkDCTOnly || checkTSOnly)) ? 1 : 0); // DCT + TS = 2 tests
3852
0
        std::vector<TrMode> trModes;
3853
0
        if (numTransformCands > 1)
3854
0
        {
3855
0
          trModes.push_back(TrMode(0, true)); // DCT2
3856
0
          trModes.push_back(TrMode(1, true));//TS
3857
0
        }
3858
0
        else
3859
0
        {
3860
0
          tu.mtsIdx[codeCompId] = checkTSOnly ? 1 : 0;
3861
0
        }
3862
0
        for (int modeId = 0; modeId < numTransformCands; modeId++)
3863
0
        {
3864
0
          TCoeff     currAbsSum = 0;
3865
0
          uint64_t   currCompFracBits = 0;
3866
0
          Distortion currCompDistCb = 0;
3867
0
          Distortion currCompDistCr = 0;
3868
0
          double     currCompCost = 0;
3869
3870
0
          tu.jointCbCr = (uint8_t)cbfMask;
3871
0
          if (numTransformCands > 1)
3872
0
          {
3873
0
            tu.mtsIdx[codeCompId] = trModes[modeId].first;
3874
0
          }
3875
0
          tu.mtsIdx[otherCompId] = MTS_DCT2_DCT2;
3876
0
          const QpParam cQP(tu, COMP_Cb);  // note: uses tu.transformSkip[compID]
3877
0
          m_pcTrQuant->selectLambda(COMP_Cb);
3878
3879
          // Lambda is loosened for the joint mode with respect to single modes as the same residual is used for both chroma blocks
3880
0
          const int    absIct = abs(TU::getICTMode(tu));
3881
0
          const double lfact = (absIct == 1 || absIct == 3 ? 0.8 : 0.5);
3882
0
          m_pcTrQuant->scaleLambda(lfact);
3883
0
          if (checkJointCbCr && (tu.cu->cs->slice->sliceQp > 18))
3884
0
          {
3885
0
            m_pcTrQuant->scaleLambda(1.05);
3886
0
          }
3887
3888
0
          m_CABACEstimator->getCtx() = ctxStart;
3889
0
          m_CABACEstimator->resetBits();
3890
3891
0
          PelBuf cbResi = csFull->getResiBuf(cbArea);
3892
0
          PelBuf crResi = csFull->getResiBuf(crArea);
3893
0
          cbResi.copyFrom(m_orgResiCb[cbfMask]);
3894
0
          crResi.copyFrom(m_orgResiCr[cbfMask]);
3895
3896
0
          if (reshape)
3897
0
          {
3898
0
            double cRescale = (double)(1 << CSCALE_FP_PREC) / (double)(tu.chromaAdj);
3899
0
            m_pcTrQuant->scaleLambda(1.0 / (cRescale * cRescale));
3900
0
          }
3901
3902
0
          int         codedCbfMask = 0;
3903
0
          ComponentID codeCompId = (tu.jointCbCr >> 1 ? COMP_Cb : COMP_Cr);
3904
0
          ComponentID otherCompId = (codeCompId == COMP_Cr ? COMP_Cb : COMP_Cr);
3905
0
          const QpParam qpCbCr(tu, codeCompId);
3906
3907
0
          tu.getCoeffs(otherCompId).fill(0);   // do we need that?
3908
0
          TU::setCbfAtDepth(tu, otherCompId, tu.depth, false);
3909
3910
0
          PelBuf& codeResi = (codeCompId == COMP_Cr ? crResi : cbResi);
3911
0
          TCoeff  compAbsSum = 0;
3912
0
          if (numTransformCands > 1)
3913
0
          {
3914
0
            if (modeId == 0)
3915
0
            {
3916
0
              m_pcTrQuant->checktransformsNxN(tu, &trModes, 2, codeCompId);
3917
0
              tu.mtsIdx[codeCompId] = trModes[modeId].first;
3918
0
              tu.mtsIdx[otherCompId] = MTS_DCT2_DCT2;
3919
0
              if (!trModes[modeId + 1].second)
3920
0
              {
3921
0
                numTransformCands = 1;
3922
0
              }
3923
0
            }
3924
0
            m_pcTrQuant->transformNxN(tu, codeCompId, qpCbCr, compAbsSum, m_CABACEstimator->getCtx(), true);
3925
0
          }
3926
0
          else
3927
0
          {
3928
0
            m_pcTrQuant->transformNxN(tu, codeCompId, qpCbCr, compAbsSum, m_CABACEstimator->getCtx());
3929
0
          }
3930
0
          if (compAbsSum > 0)
3931
0
          {
3932
0
            m_pcTrQuant->invTransformNxN(tu, codeCompId, codeResi, qpCbCr);
3933
0
            codedCbfMask += (codeCompId == COMP_Cb ? 2 : 1);
3934
0
          }
3935
0
          else
3936
0
          {
3937
0
            codeResi.fill(0);
3938
0
          }
3939
3940
0
          if (tu.jointCbCr == 3 && codedCbfMask == 2)
3941
0
          {
3942
0
            codedCbfMask = 3;
3943
0
            TU::setCbfAtDepth(tu, COMP_Cr, tu.depth, true);
3944
0
          }
3945
0
          if (codedCbfMask && tu.jointCbCr != codedCbfMask)
3946
0
          {
3947
0
            codedCbfMask = 0;
3948
0
          }
3949
0
          currAbsSum = codedCbfMask;
3950
0
          if (!tu.mtsIdx[codeCompId])
3951
0
          {
3952
0
            numTransformCands = (currAbsSum <= 0) ? 1 : numTransformCands;
3953
0
          }
3954
0
          if (currAbsSum > 0)
3955
0
          {
3956
0
            m_CABACEstimator->cbf_comp(*tu.cu, codedCbfMask >> 1, cbArea, currDepth, false);
3957
0
            m_CABACEstimator->cbf_comp(*tu.cu, codedCbfMask & 1, crArea, currDepth, codedCbfMask >> 1);
3958
0
            m_CABACEstimator->joint_cb_cr(tu, codedCbfMask);
3959
0
            if (codedCbfMask >> 1)
3960
0
              m_CABACEstimator->residual_coding(tu, COMP_Cb);
3961
0
            if (codedCbfMask & 1)
3962
0
              m_CABACEstimator->residual_coding(tu, COMP_Cr);
3963
0
            currCompFracBits = m_CABACEstimator->getEstFracBits();
3964
3965
0
            m_pcTrQuant->invTransformICT(tu, cbResi, crResi);
3966
0
            if (reshape)
3967
0
            {
3968
0
              cbResi.scaleSignal(tu.chromaAdj, 0, slice.clpRngs[COMP_Cb]);
3969
0
              crResi.scaleSignal(tu.chromaAdj, 0, slice.clpRngs[COMP_Cr]);
3970
0
            }
3971
3972
0
            currCompDistCb = m_pcRdCost->getDistPart(orgResiBuf.Cb(), cbResi, channelBitDepth, COMP_Cb, DF_SSE);
3973
0
            currCompDistCr = m_pcRdCost->getDistPart(orgResiBuf.Cr(), crResi, channelBitDepth, COMP_Cr, DF_SSE);
3974
0
            currCompCost = m_pcRdCost->calcRdCost(currCompFracBits, currCompDistCr + currCompDistCb, false);
3975
0
          }
3976
0
          else
3977
0
            currCompCost = MAX_DOUBLE;
3978
3979
          // evaluate
3980
0
          if (currCompCost < minCostCbCr)
3981
0
          {
3982
0
            uiSingleDistComp[COMP_Cb] = currCompDistCb;
3983
0
            uiSingleDistComp[COMP_Cr] = currCompDistCr;
3984
0
            minCostCbCr = currCompCost;
3985
0
            isLastBest = (cbfMask == jointCbfMasksToTest.back()) && (modeId == (numTransformCands - 1));
3986
0
            if (!isLastBest)
3987
0
            {
3988
0
              bestTU.copyComponentFrom(tu, COMP_Cb);
3989
0
              bestTU.copyComponentFrom(tu, COMP_Cr);
3990
0
              saveCS.getResiBuf(cbArea).copyFrom(csFull->getResiBuf(cbArea));
3991
0
              saveCS.getResiBuf(crArea).copyFrom(csFull->getResiBuf(crArea));
3992
0
            }
3993
0
          }
3994
0
        }
3995
3996
0
        if( !isLastBest )
3997
0
        {
3998
          // copy component
3999
0
          tu.copyComponentFrom( bestTU, COMP_Cb );
4000
0
          tu.copyComponentFrom( bestTU, COMP_Cr );
4001
0
          csFull->getResiBuf( cbArea ).copyFrom( saveCS.getResiBuf( cbArea ) );
4002
0
          csFull->getResiBuf( crArea ).copyFrom( saveCS.getResiBuf( crArea ) );
4003
0
        }
4004
0
      }
4005
0
    }
4006
4007
0
    m_CABACEstimator->getCtx() = ctxStart;
4008
0
    m_CABACEstimator->resetBits();
4009
0
    if( !tu.noResidual )
4010
0
    {
4011
0
      static const ComponentID cbf_getComp[3] = { COMP_Cb, COMP_Cr, COMP_Y };
4012
0
      for( unsigned c = 0; c < numTBlocks; c++)
4013
0
      {
4014
0
        const ComponentID compID = numTBlocks>1 ? cbf_getComp[c] : COMP_Y;
4015
0
        if( tu.blocks[compID].valid() )
4016
0
        {
4017
0
          const bool prevCbf = ( compID == COMP_Cr ? TU::getCbfAtDepth( tu, COMP_Cb, currDepth ) : false );
4018
0
          m_CABACEstimator->cbf_comp( *tu.cu, TU::getCbfAtDepth( tu, compID, currDepth ), tu.blocks[compID], currDepth, prevCbf );
4019
0
        }
4020
0
      }
4021
0
    }
4022
4023
0
    for (uint32_t ch = 0; ch < numValidComp; ch++)
4024
0
    {
4025
0
      const ComponentID compID = ComponentID(ch);
4026
0
      if (tu.blocks[compID].valid())
4027
0
      {
4028
0
        if( compID == COMP_Cr )
4029
0
        {
4030
0
          const int cbfMask = ( TU::getCbf( tu, COMP_Cb ) ? 2 : 0 ) + ( TU::getCbf( tu, COMP_Cr ) ? 1 : 0 );
4031
0
          m_CABACEstimator->joint_cb_cr(tu, cbfMask);
4032
0
        }
4033
0
        if( TU::getCbf( tu, compID ) )
4034
0
        {
4035
0
          m_CABACEstimator->residual_coding( tu, compID );
4036
0
        }
4037
0
        uiSingleDist += uiSingleDistComp[compID];
4038
0
      }
4039
0
    }
4040
0
    if( tu.noResidual )
4041
0
    {
4042
0
      CHECK( m_CABACEstimator->getEstFracBits() > 0, "no residual TU's bits shall be 0" );
4043
0
    }
4044
4045
0
    csFull->fracBits += m_CABACEstimator->getEstFracBits();
4046
0
    csFull->dist     += uiSingleDist;
4047
0
    csFull->cost      = m_pcRdCost->calcRdCost(csFull->fracBits, csFull->dist, !m_pcEncCfg->m_lumaLevelToDeltaQPEnabled);
4048
0
  } // check full
4049
4050
  // code sub-blocks
4051
0
  if( bCheckSplit )
4052
0
  {
4053
0
    if( bCheckFull )
4054
0
    {
4055
0
      m_CABACEstimator->getCtx() = ctxStart;
4056
0
    }
4057
4058
0
    if( partitioner.canSplit( TU_MAX_TR_SPLIT, cs ) )
4059
0
    {
4060
0
      partitioner.splitCurrArea( TU_MAX_TR_SPLIT, cs );
4061
0
    }
4062
0
    else if( cu.sbtInfo && partitioner.canSplit( CU::getSbtTuSplit( cu.sbtInfo ), cs ) )
4063
0
    {
4064
0
      partitioner.splitCurrArea( CU::getSbtTuSplit( cu.sbtInfo ), cs );
4065
0
    }
4066
0
    else
4067
0
      THROW( "Implicit TU split not available!" );
4068
4069
0
    do
4070
0
    {
4071
0
      xEstimateInterResidualQT(*csSplit, partitioner, bCheckFull ? nullptr : puiZeroDist );
4072
4073
0
      csSplit->cost = m_pcRdCost->calcRdCost( csSplit->fracBits, csSplit->dist );
4074
0
    } while( partitioner.nextPart( *csSplit ) );
4075
4076
0
    partitioner.exitCurrSplit();
4077
4078
0
    unsigned        compCbf[3]  = { 0, 0, 0 };
4079
4080
0
    if( !bCheckFull )
4081
0
    {
4082
0
      for( auto &currTU : csSplit->traverseTUs( currArea, partitioner.chType ) )
4083
0
      {
4084
0
        for( unsigned ch = 0; ch < numTBlocks; ch++ )
4085
0
        {
4086
0
          compCbf[ ch ] |= ( TU::getCbfAtDepth( currTU, ComponentID(ch), currDepth + 1 ) ? 1 : 0 );
4087
0
        }
4088
0
      }
4089
4090
0
      for( auto &currTU : csSplit->traverseTUs( currArea, partitioner.chType ) )
4091
0
      {
4092
0
        TU::setCbfAtDepth   ( currTU, COMP_Y,  currDepth, compCbf[ COMP_Y  ] );
4093
0
        if( currArea.chromaFormat != CHROMA_400 )
4094
0
        {
4095
0
          TU::setCbfAtDepth ( currTU, COMP_Cb, currDepth, compCbf[ COMP_Cb ] );
4096
0
          TU::setCbfAtDepth ( currTU, COMP_Cr, currDepth, compCbf[ COMP_Cr ] );
4097
0
        }
4098
0
      }
4099
4100
0
      m_CABACEstimator->getCtx() = ctxStart;
4101
0
      m_CABACEstimator->resetBits();
4102
4103
      // when compID isn't a channel, code Cbfs:
4104
0
      xEncodeInterResidualQT( *csSplit, partitioner, MAX_NUM_TBLOCKS );
4105
4106
0
      for (uint32_t ch = 0; ch < numValidComp; ch++)
4107
0
      {
4108
0
        const ComponentID compID = ComponentID(ch);
4109
0
        xEncodeInterResidualQT( *csSplit, partitioner, compID );
4110
0
      }
4111
4112
0
      csSplit->fracBits = m_CABACEstimator->getEstFracBits();
4113
0
      csSplit->cost     = m_pcRdCost->calcRdCost(csSplit->fracBits, csSplit->dist);
4114
0
    }
4115
0
  }
4116
0
}
4117
4118
void InterSearch::encodeResAndCalcRdInterCU(CodingStructure &cs, Partitioner &partitioner, const bool skipResidual )
4119
0
{
4120
0
  CodingUnit &cu = *cs.getCU( partitioner.chType, partitioner.treeType );
4121
0
  bool luma      = true;
4122
0
  bool chroma    = cs.pcv->chrFormat != VVENC_CHROMA_400;
4123
0
  if( cu.predMode == MODE_IBC )
4124
0
  {
4125
0
    luma    = !cu.mccNoLuma  ();
4126
0
    chroma &= !cu.mccNoChroma();
4127
0
  }
4128
0
  if( cu.predMode == MODE_INTER )
4129
0
    CHECK( CU::isSepTree(cu), "CU with Inter mode must be in single tree" );
4130
4131
0
  const ChromaFormat format      = cs.area.chromaFormat;;
4132
0
  const int  numValidComponents  = getNumberValidComponents(format);
4133
0
  const SPS &sps                 = *cs.sps;
4134
0
  const ReshapeData& reshapeData = cs.picture->reshapeData;
4135
4136
0
  if( skipResidual ) //  No residual coding : SKIP mode
4137
0
  {
4138
0
    cu.skip    = true;
4139
0
    cu.rootCbf = false;
4140
0
    CHECK( cu.sbtInfo != 0, "sbtInfo shall be 0 if CU has no residual" );
4141
0
    cs.getResiBuf().fill(0);
4142
0
    cs.getRecoBuf().copyFrom(cs.getPredBuf() );
4143
0
    if( cs.picHeader->lmcsEnabled && reshapeData.getCTUFlag() && !cu.ciip && !CU::isIBC(cu))
4144
0
    {
4145
0
      cs.getRecoBuf().Y().rspSignal( reshapeData.getFwdLUT());
4146
0
    }
4147
4148
    // add new "empty" TU(s) spanning the whole CU
4149
0
    cs.addEmptyTUs( partitioner, &cu );
4150
0
    Distortion distortion = 0;
4151
4152
0
    for (int comp = 0; comp < numValidComponents; comp++)
4153
0
    {
4154
0
      const ComponentID compID = ComponentID(comp);
4155
0
      if (compID == COMP_Y && !luma)
4156
0
        continue;
4157
0
      if (compID != COMP_Y && !chroma)
4158
0
        continue;
4159
0
      CPelBuf reco = cs.getRecoBuf (compID);
4160
0
      CPelBuf org  = cs.getOrgBuf  (compID);
4161
0
      if ((cs.picHeader->lmcsEnabled && reshapeData.getCTUFlag()) || m_pcEncCfg->m_lumaLevelToDeltaQPEnabled )
4162
0
      {
4163
0
        const CompArea& areaY = cu.Y();
4164
0
        const CPelBuf orgLuma = cs.getOrgBuf( areaY );
4165
0
        if (compID == COMP_Y && !m_pcEncCfg->m_lumaLevelToDeltaQPEnabled )
4166
0
        {
4167
0
          PelBuf tmpRecLuma = cs.getRspRecoBuf();
4168
0
          tmpRecLuma.rspSignal(reco, reshapeData.getInvLUT());
4169
0
          distortion += m_pcRdCost->getDistPart(org, tmpRecLuma, sps.bitDepths[ CH_L ], compID, DF_SSE_WTD, &orgLuma);
4170
0
        }
4171
0
        else
4172
0
          distortion += m_pcRdCost->getDistPart( org, reco, sps.bitDepths[ CH_C ], compID, DF_SSE_WTD, &orgLuma );
4173
0
      }
4174
0
      else
4175
0
      {
4176
0
        distortion  += m_pcRdCost->getDistPart( org, reco, sps.bitDepths[ toChannelType( compID ) ], compID, DF_SSE );
4177
0
      }
4178
0
    }
4179
4180
0
    CodingUnit& cu = *cs.getCU(partitioner.chType, TREE_D);
4181
0
    m_CABACEstimator->resetBits();
4182
0
    m_CABACEstimator->cu_skip_flag  ( cu );
4183
0
    m_CABACEstimator->merge_data(cu);
4184
0
    cs.fracBits = m_CABACEstimator->getEstFracBits();
4185
0
    cs.dist     = distortion;
4186
0
    cs.cost     = m_pcRdCost->calcRdCost(cs.fracBits, cs.dist);
4187
4188
0
    return;
4189
0
  }
4190
4191
  //  Residual coding.
4192
0
  if (luma)
4193
0
  {
4194
0
    if (cs.picHeader->lmcsEnabled && reshapeData.getCTUFlag())
4195
0
    {
4196
0
      if (!cu.ciip && !CU::isIBC(cu))
4197
0
      {
4198
0
        const CompArea& areaY = cu.Y();
4199
0
        PelBuf tmpPred = m_tmpStorageLCU.getCompactBuf(areaY);
4200
0
        tmpPred.rspSignal(cs.getPredBuf(COMP_Y), reshapeData.getFwdLUT());
4201
0
        cs.getResiBuf(COMP_Y).subtract(cs.getRspOrgBuf(), tmpPred);
4202
0
      }
4203
0
      else
4204
0
      {
4205
0
        cs.getResiBuf(COMP_Y).subtract(cs.getRspOrgBuf(), cs.getPredBuf(COMP_Y));
4206
0
      }
4207
0
    }
4208
0
    else
4209
0
    {
4210
0
      cs.getResiBuf(COMP_Y).subtract(cs.getOrgBuf(COMP_Y), cs.getPredBuf(COMP_Y));
4211
0
    }
4212
0
  }
4213
0
  if (chroma)
4214
0
  {
4215
0
    cs.getResiBuf(COMP_Cb).subtract(cs.getOrgBuf(COMP_Cb), cs.getPredBuf(COMP_Cb));
4216
0
    cs.getResiBuf(COMP_Cr).subtract(cs.getOrgBuf(COMP_Cr), cs.getPredBuf(COMP_Cr));
4217
0
  }
4218
4219
0
  Distortion zeroDistortion = 0;
4220
4221
0
  const TempCtx ctxStart( m_CtxCache, m_CABACEstimator->getCtx() );
4222
4223
0
  xEstimateInterResidualQT(cs, partitioner, &zeroDistortion );
4224
0
  TransformUnit& firstTU = *cs.getTU( partitioner.chType );
4225
4226
0
  cu.rootCbf = false;
4227
0
  m_CABACEstimator->resetBits();
4228
0
  m_CABACEstimator->rqt_root_cbf( cu );
4229
0
  const uint64_t  zeroFracBits = m_CABACEstimator->getEstFracBits();
4230
0
  double zeroCost = m_pcRdCost->calcRdCost( zeroFracBits, zeroDistortion, !m_pcEncCfg->m_lumaLevelToDeltaQPEnabled );
4231
4232
0
  const int  numValidTBlocks   = getNumberValidTBlocks( *cs.pcv );
4233
0
  for (uint32_t i = 0; i < numValidTBlocks; i++)
4234
0
  {
4235
0
    cu.rootCbf |= TU::getCbfAtDepth(firstTU, ComponentID(i), 0);
4236
0
  }
4237
4238
  // -------------------------------------------------------
4239
  // If a block full of 0's is efficient, then just use 0's.
4240
  // The costs at this point do not include header bits.
4241
4242
0
  if (zeroCost < cs.cost || !cu.rootCbf)
4243
0
  {
4244
0
    cu.sbtInfo = 0;
4245
0
    cu.rootCbf = false;
4246
4247
0
    cs.clearTUs();
4248
4249
    // add a new "empty" TU spanning the whole CU
4250
0
    cs.addEmptyTUs( partitioner, &cu );
4251
0
  }
4252
4253
  // all decisions now made. Fully encode the CU, including the headers:
4254
0
  m_CABACEstimator->getCtx() = ctxStart;
4255
4256
0
  uint64_t finalFracBits = xGetSymbolFracBitsInter( cs, partitioner );
4257
  // we've now encoded the CU, and so have a valid bit cost
4258
0
  if (!cu.rootCbf)
4259
0
  {
4260
0
    if (luma)
4261
0
    {
4262
0
      cs.getResiBuf().bufs[0].fill(0); // Clear the residual image, if we didn't code it.
4263
0
    }
4264
0
    if (chroma && isChromaEnabled(cs.pcv->chrFormat))
4265
0
    {
4266
0
      cs.getResiBuf().bufs[1].fill(0); // Clear the residual image, if we didn't code it.
4267
0
      cs.getResiBuf().bufs[2].fill(0); // Clear the residual image, if we didn't code it.
4268
0
    }
4269
0
  }
4270
0
  if (luma)
4271
0
  {
4272
0
    if (cu.rootCbf && cs.picHeader->lmcsEnabled && reshapeData.getCTUFlag())
4273
0
    {
4274
0
      if (!cu.ciip && !CU::isIBC(cu))
4275
0
      {
4276
0
        PelBuf tmpPred = m_tmpStorageLCU.getCompactBuf(cu.Y());
4277
0
        tmpPred.rspSignal(cs.getPredBuf(COMP_Y), reshapeData.getFwdLUT());
4278
0
        cs.getRecoBuf(COMP_Y).reconstruct(tmpPred, cs.getResiBuf(COMP_Y), cs.slice->clpRngs[COMP_Y]);
4279
0
      }
4280
0
      else
4281
0
      {
4282
0
        cs.getRecoBuf(COMP_Y).reconstruct(cs.getPredBuf(COMP_Y), cs.getResiBuf(COMP_Y), cs.slice->clpRngs[COMP_Y]);
4283
0
      }
4284
0
    }
4285
0
    else
4286
0
    {
4287
0
      cs.getRecoBuf().bufs[0].reconstruct(cs.getPredBuf().bufs[0], cs.getResiBuf().bufs[0], cs.slice->clpRngs[COMP_Y]);
4288
0
      if (cs.picHeader->lmcsEnabled && reshapeData.getCTUFlag() && !cu.ciip && !CU::isIBC(cu))
4289
0
      {
4290
0
        cs.getRecoBuf().bufs[0].rspSignal(reshapeData.getFwdLUT());
4291
0
      }
4292
0
    }
4293
0
  }
4294
0
  if (chroma)
4295
0
  {
4296
0
    cs.getRecoBuf().bufs[1].reconstruct(cs.getPredBuf().bufs[1], cs.getResiBuf().bufs[1], cs.slice->clpRngs[COMP_Cb]);
4297
0
    cs.getRecoBuf().bufs[2].reconstruct(cs.getPredBuf().bufs[2], cs.getResiBuf().bufs[2], cs.slice->clpRngs[COMP_Cr]);
4298
0
  }
4299
  // update with clipped distortion and cost (previously unclipped reconstruction values were used)
4300
0
  Distortion finalDistortion = 0;
4301
4302
0
  for (int comp = 0; comp < numValidComponents; comp++)
4303
0
  {
4304
0
    const ComponentID compID = ComponentID(comp);
4305
0
    if (compID == COMP_Y && !luma)
4306
0
      continue;
4307
0
    if (compID != COMP_Y && !chroma)
4308
0
      continue;
4309
0
    CPelBuf reco = cs.getRecoBuf (compID);
4310
0
    CPelBuf org  = cs.getOrgBuf  (compID);
4311
4312
0
    if( (cs.picHeader->lmcsEnabled && reshapeData.getCTUFlag()) || m_pcEncCfg->m_lumaLevelToDeltaQPEnabled )
4313
0
    {
4314
0
      const CPelBuf orgLuma = cs.getOrgBuf( cs.area.blocks[COMP_Y] );
4315
0
      if (compID == COMP_Y && !m_pcEncCfg->m_lumaLevelToDeltaQPEnabled )
4316
0
      {
4317
0
        PelBuf tmpRecLuma = cs.getRspRecoBuf();
4318
0
        tmpRecLuma.rspSignal( reco, reshapeData.getInvLUT());
4319
0
        finalDistortion += m_pcRdCost->getDistPart(org, tmpRecLuma, sps.bitDepths[toChannelType(compID)], compID, DF_SSE_WTD, &orgLuma);
4320
0
      }
4321
0
      else
4322
0
      {
4323
0
        finalDistortion += m_pcRdCost->getDistPart(org, reco, sps.bitDepths[toChannelType(compID)], compID, DF_SSE_WTD, &orgLuma);
4324
0
      }
4325
0
    }
4326
0
    else
4327
0
    {
4328
0
      finalDistortion += m_pcRdCost->getDistPart( org, reco, sps.bitDepths[toChannelType(compID)], compID, DF_SSE );
4329
0
    }
4330
0
  }
4331
4332
0
  cs.dist     = finalDistortion;
4333
0
  cs.fracBits = finalFracBits;
4334
0
  cs.cost     = m_pcRdCost->calcRdCost(cs.fracBits, cs.dist);
4335
4336
0
  CHECK(cs.tus.size() == 0, "No TUs present");
4337
0
}
4338
4339
uint64_t InterSearch::xGetSymbolFracBitsInter(CodingStructure &cs, Partitioner &partitioner)
4340
0
{
4341
0
  uint64_t fracBits   = 0;
4342
0
  CodingUnit &cu    = *cs.getCU( partitioner.chType, partitioner.treeType );
4343
4344
0
  m_CABACEstimator->resetBits();
4345
4346
0
  if( cu.mergeFlag && !cu.rootCbf )
4347
0
  {
4348
0
    cu.skip = true;
4349
4350
0
    m_CABACEstimator->cu_skip_flag  ( cu );
4351
0
    if (!cu.ciip)
4352
0
    {
4353
0
      m_CABACEstimator->merge_data(cu);
4354
0
    }
4355
0
    fracBits   += m_CABACEstimator->getEstFracBits();
4356
0
  }
4357
0
  else
4358
0
  {
4359
0
    CHECK( cu.skip, "Skip flag has to be off at this point!" );
4360
4361
0
    if (cu.Y().valid())
4362
0
    m_CABACEstimator->cu_skip_flag( cu );
4363
0
    m_CABACEstimator->pred_mode   ( cu );
4364
0
    m_CABACEstimator->cu_pred_data( cu );
4365
0
    CUCtx cuCtx;
4366
0
    cuCtx.isDQPCoded = true;
4367
0
    cuCtx.isChromaQpAdjCoded = true;
4368
0
    m_CABACEstimator->cu_residual ( cu, partitioner, cuCtx );
4369
0
    fracBits       += m_CABACEstimator->getEstFracBits();
4370
0
  }
4371
4372
0
  return fracBits;
4373
0
}
4374
4375
double InterSearch::xGetMEDistortionWeight(uint8_t BcwIdx, RefPicList refPicList)
4376
0
{
4377
0
  if( BcwIdx != BCW_DEFAULT )
4378
0
  {
4379
0
    return fabs( (double)getBcwWeight( BcwIdx, refPicList ) / (double)g_BcwWeightBase );
4380
0
  }
4381
0
  else
4382
0
  {
4383
0
    return 0.5;
4384
0
  }
4385
0
}
4386
4387
bool InterSearch::xReadBufferedUniMv( CodingUnit& cu, RefPicList eRefPicList, int32_t iRefIdx, Mv& pcMvPred, Mv& rcMv, uint32_t& ruiBits, Distortion& ruiCost )
4388
0
{
4389
0
  if( m_uniMotions.isReadMode( (uint32_t)eRefPicList, (uint32_t)iRefIdx ) )
4390
0
  {
4391
0
    m_uniMotions.copyTo(rcMv, ruiCost, (uint32_t)eRefPicList, (uint32_t)iRefIdx);
4392
4393
0
    Mv pred = pcMvPred;
4394
0
    pred.changeTransPrecInternal2Amvr( cu.imv );
4395
0
    m_pcRdCost->setPredictor(pred);
4396
0
    m_pcRdCost->setCostScale(0);
4397
4398
0
    Mv mv = rcMv;
4399
0
    mv.changeTransPrecInternal2Amvr( cu.imv );
4400
0
    uint32_t mvBits = m_pcRdCost->getBitsOfVectorWithPredictor( mv.hor, mv.ver, 0 );
4401
4402
0
    ruiBits += mvBits;
4403
0
    ruiCost += m_pcRdCost->getCost(ruiBits);
4404
0
    return true;
4405
0
  }
4406
0
  return false;
4407
0
}
4408
4409
bool InterSearch::xReadBufferedAffineUniMv( CodingUnit& cu, RefPicList eRefPicList, int32_t iRefIdx, Mv acMvPred[3], Mv acMv[3], uint32_t& ruiBits, Distortion& ruiCost, int& mvpIdx, const AffineAMVPInfo& aamvpi )
4410
0
{
4411
0
  if( m_uniMotions.isReadModeAffine( (uint32_t)eRefPicList, (uint32_t)iRefIdx, cu.affineType ) )
4412
0
  {
4413
0
    m_uniMotions.copyAffineMvTo( acMv, ruiCost, (uint32_t)eRefPicList, (uint32_t)iRefIdx, cu.affineType, mvpIdx );
4414
0
    m_pcRdCost->setCostScale(0);
4415
0
    acMvPred[0] = aamvpi.mvCandLT[mvpIdx];
4416
0
    acMvPred[1] = aamvpi.mvCandRT[mvpIdx];
4417
0
    acMvPred[2] = aamvpi.mvCandLB[mvpIdx];
4418
4419
0
    uint32_t mvBits = 0;
4420
0
    for( int verIdx = 0; verIdx < ( cu.affineType ? 3 : 2 ); verIdx++ )
4421
0
    {
4422
0
      Mv pred = verIdx ? acMvPred[verIdx] + acMv[0] - acMvPred[0] : acMvPred[verIdx];
4423
0
      pred.changePrecision(MV_PRECISION_INTERNAL, MV_PRECISION_QUARTER);
4424
0
      m_pcRdCost->setPredictor(pred);
4425
0
      Mv mv = acMv[verIdx];
4426
0
      mv.changePrecision(MV_PRECISION_INTERNAL, MV_PRECISION_QUARTER);
4427
0
      mvBits += m_pcRdCost->getBitsOfVectorWithPredictor( mv.hor, mv.ver, 0 );
4428
0
    }
4429
0
    ruiBits += mvBits;
4430
0
    ruiCost += m_pcRdCost->getCost(ruiBits);
4431
0
    return true;
4432
0
  }
4433
0
  return false;
4434
0
}
4435
4436
void InterSearch::xSymMvdCheckBestMvp(
4437
  CodingUnit& cu,
4438
  CPelUnitBuf& origBuf,
4439
  Mv curMv,
4440
  RefPicList curRefList,
4441
  AMVPInfo amvpInfo[2][MAX_REF_PICS],
4442
  int32_t BcwIdx,
4443
  Mv cMvPredSym[2],
4444
  int32_t mvpIdxSym[2],
4445
  Distortion& bestCost,
4446
  bool skip
4447
)
4448
0
{
4449
0
  RefPicList tarRefList = (RefPicList)(1 - curRefList);
4450
0
  int32_t refIdxCur = cu.slice->symRefIdx[curRefList];
4451
0
  int32_t refIdxTar = cu.slice->symRefIdx[tarRefList];
4452
4453
0
  MvField cCurMvField, cTarMvField;
4454
0
  cCurMvField.setMvField(curMv, refIdxCur);
4455
0
  AMVPInfo& amvpCur = amvpInfo[curRefList][refIdxCur];
4456
0
  AMVPInfo& amvpTar = amvpInfo[tarRefList][refIdxTar];
4457
0
  m_pcRdCost->setCostScale(0);
4458
4459
0
  double fWeight = 0.0;
4460
0
  PelUnitBuf bufTmp;
4461
4462
  // get prediction of eCurRefPicList
4463
0
  PelUnitBuf predBufA = m_tmpPredStorage[curRefList].getCompactBuf( cu );
4464
0
  const Picture* picRefA = cu.slice->getRefPic(curRefList, cCurMvField.refIdx);
4465
0
  Mv mvA = cCurMvField.mv;
4466
0
  xClipMvSearch( mvA, cu.lumaPos(), cu.lumaSize(), *cu.cs->pcv, m_ifpLines );
4467
0
  xPredInterBlk( COMP_Y, cu, picRefA, mvA, predBufA, false, cu.slice->clpRngs[ COMP_Y ], false, false );
4468
4469
0
  bufTmp = m_tmpStorageLCU.getCompactBuf( cu );
4470
0
  bufTmp.copyFrom( origBuf );
4471
0
  bufTmp.removeHighFreq( predBufA, m_pcEncCfg->m_bClipForBiPredMeEnabled, cu.slice->clpRngs/*, getBcwWeight( cu.BcwIdx, tarRefList )*/ );
4472
0
  fWeight = xGetMEDistortionWeight( cu.BcwIdx, tarRefList );
4473
4474
0
  int32_t skipMvpIdx[2];
4475
0
  skipMvpIdx[0] = skip ? mvpIdxSym[0] : -1;
4476
0
  skipMvpIdx[1] = skip ? mvpIdxSym[1] : -1;
4477
4478
0
  for (int i = 0; i < amvpCur.numCand; i++)
4479
0
  {
4480
0
    for (int j = 0; j < amvpTar.numCand; j++)
4481
0
    {
4482
0
      if (skipMvpIdx[curRefList] == i && skipMvpIdx[tarRefList] == j)
4483
0
        continue;
4484
4485
0
      Distortion cost = MAX_DISTORTION;
4486
0
      cTarMvField.setMvField(curMv.getSymmvdMv(amvpCur.mvCand[i], amvpTar.mvCand[j]), refIdxTar);
4487
4488
      // get prediction of eTarRefPicList
4489
0
      PelUnitBuf predBufB = m_tmpPredStorage[tarRefList].getCompactBuf( cu );
4490
0
      const Picture* picRefB = cu.slice->getRefPic(tarRefList, cTarMvField.refIdx);
4491
0
      Mv mvB = cTarMvField.mv;
4492
0
      xClipMvSearch( mvB, cu.lumaPos(), cu.lumaSize(), *cu.cs->pcv, m_ifpLines );
4493
0
      xPredInterBlk( COMP_Y, cu, picRefB, mvB, predBufB, false, cu.slice->clpRngs[ COMP_Y ], false, false );
4494
4495
      // calc distortion
4496
0
      cost = ( Distortion ) floor( fWeight * ( double ) m_pcRdCost->getDistPart( bufTmp.Y(), predBufB.Y(), cu.cs->sps->bitDepths[ CH_L ], COMP_Y, DF_HAD ) );
4497
4498
0
      Mv pred = amvpCur.mvCand[i];
4499
0
      pred.changeTransPrecInternal2Amvr(cu.imv);
4500
0
      m_pcRdCost->setPredictor(pred);
4501
0
      Mv mv = curMv;
4502
0
      mv.changeTransPrecInternal2Amvr(cu.imv);
4503
0
      uint32_t bits = m_pcRdCost->getBitsOfVectorWithPredictor(mv.hor, mv.ver, 0);
4504
0
      bits += m_auiMVPIdxCost[i][AMVP_MAX_NUM_CANDS];
4505
0
      bits += m_auiMVPIdxCost[j][AMVP_MAX_NUM_CANDS];
4506
0
      cost += m_pcRdCost->getCost(bits);
4507
0
      if (cost < bestCost)
4508
0
      {
4509
0
        bestCost = cost;
4510
0
        cMvPredSym[curRefList] = amvpCur.mvCand[i];
4511
0
        cMvPredSym[tarRefList] = amvpTar.mvCand[j];
4512
0
        mvpIdxSym[curRefList] = i;
4513
0
        mvpIdxSym[tarRefList] = j;
4514
0
      }
4515
0
    }
4516
0
  }
4517
0
}
4518
4519
void InterSearch::resetSavedAffineMotion()
4520
0
{
4521
0
  for (int i = 0; i < 2; i++)
4522
0
  {
4523
0
    for (int j = 0; j < 2; j++)
4524
0
    {
4525
0
      m_affineMotion.acMvAffine4Para[i][j] = Mv(0, 0);
4526
0
      m_affineMotion.acMvAffine6Para[i][j] = Mv(0, 0);
4527
0
    }
4528
0
    m_affineMotion.acMvAffine6Para[i][2] = Mv(0, 0);
4529
4530
0
    m_affineMotion.affine4ParaRefIdx[i] = -1;
4531
0
    m_affineMotion.affine6ParaRefIdx[i] = -1;
4532
0
  }
4533
0
  m_affineMotion.affine4ParaAvail = false;
4534
0
  m_affineMotion.affine6ParaAvail = false;
4535
0
}
4536
4537
void InterSearch::storeAffineMotion(Mv acAffineMv[2][3], int16_t affineRefIdx[2], EAffineModel affineType, int BcwIdx)
4538
0
{
4539
0
  if ((BcwIdx == BCW_DEFAULT || !m_affineMotion.affine6ParaAvail) && affineType == AFFINEMODEL_6PARAM)
4540
0
  {
4541
0
    for (int i = 0; i < 2; i++)
4542
0
    {
4543
0
      for (int j = 0; j < 3; j++)
4544
0
      {
4545
0
        m_affineMotion.acMvAffine6Para[i][j] = acAffineMv[i][j];
4546
0
      }
4547
0
      m_affineMotion.affine6ParaRefIdx[i] = affineRefIdx[i];
4548
0
    }
4549
0
    m_affineMotion.affine6ParaAvail = true;
4550
0
  }
4551
4552
0
  if ((BcwIdx == BCW_DEFAULT || !m_affineMotion.affine4ParaAvail) && affineType == AFFINEMODEL_4PARAM)
4553
0
  {
4554
0
    for (int i = 0; i < 2; i++)
4555
0
    {
4556
0
      for (int j = 0; j < 2; j++)
4557
0
      {
4558
0
        m_affineMotion.acMvAffine4Para[i][j] = acAffineMv[i][j];
4559
0
      }
4560
0
      m_affineMotion.affine4ParaRefIdx[i] = affineRefIdx[i];
4561
0
    }
4562
0
    m_affineMotion.affine4ParaAvail = true;
4563
0
  }
4564
0
}
4565
4566
void InterSearch::xPredAffineInterSearch( CodingUnit& cu,
4567
                                          CPelUnitBuf&    origBuf,
4568
                                          int             puIdx,
4569
                                          uint32_t&       lastMode,
4570
                                          Distortion&     affineCost,
4571
                                          Mv              hevcMv[2][MAX_REF_PICS],
4572
                                          Mv              mvAffine4Para[2][MAX_REF_PICS][3],
4573
                                          int             refIdx4Para[2],
4574
                                          uint8_t         BcwIdx,
4575
                                          bool            enforceBcwPred,
4576
                                          uint32_t        BcwIdxBits )
4577
0
{
4578
0
  const Slice &slice = *cu.slice;
4579
4580
0
  affineCost = MAX_DISTORTION;
4581
4582
0
  Mv        cMvZero;
4583
0
  Mv        aacMv[2][3];
4584
0
  Mv        cMvBi[2][3];
4585
0
  AffineMVInfo tmp;
4586
4587
0
  int       iNumPredDir = slice.isInterP() ? 1 : 2;
4588
4589
0
  int mvNum = 2;
4590
0
  mvNum = cu.affineType ? 3 : 2;
4591
4592
  // Mvp
4593
0
  Mv        cMvPred[2][MAX_REF_PICS][3];
4594
0
  Mv        cMvPredBi[2][MAX_REF_PICS][3];
4595
0
  int       aaiMvpIdxBi[2][MAX_REF_PICS];
4596
0
  int       aaiMvpIdx[2][MAX_REF_PICS];
4597
0
  int       aaiMvpNum[2][MAX_REF_PICS];
4598
4599
0
  AffineAMVPInfo aacAffineAMVPInfo[2][MAX_REF_PICS];
4600
0
  AffineAMVPInfo affiAMVPInfoTemp[2];
4601
4602
0
  uint32_t      uiMbBits[3] = { 1, 1, 0 };
4603
0
  int           iRefIdx[2] = { 0,0 }; // If un-initialized, may cause SEGV in bi-directional prediction iterative stage.
4604
0
  int           iRefIdxBi[2];
4605
0
  int           iRefStart, iRefEnd;
4606
0
  int           bestBiPRefIdxL1 = 0;
4607
0
  int           bestBiPMvpL1 = 0;
4608
0
  Distortion    biPDistTemp = MAX_DISTORTION;
4609
4610
0
  Distortion    uiCost[2] = { MAX_DISTORTION, MAX_DISTORTION };
4611
0
  Distortion    uiCostBi = MAX_DISTORTION;
4612
0
  Distortion    uiCostTemp;
4613
4614
0
  uint32_t      uiBits[3] = { 0 };
4615
0
  uint32_t      uiBitsTemp;
4616
0
  Distortion    bestBiPDist = MAX_DISTORTION;
4617
4618
0
  Distortion    uiCostTempL0[MAX_NUM_REF];
4619
0
  for (int iNumRef = 0; iNumRef < MAX_NUM_REF; iNumRef++)
4620
0
  {
4621
0
    uiCostTempL0[iNumRef] = MAX_DISTORTION;
4622
0
  }
4623
0
  uint32_t      uiBitsTempL0[MAX_NUM_REF];
4624
4625
0
  Mv            mvValidList1[4];
4626
0
  int           refIdxValidList1 = 0;
4627
0
  uint32_t      bitsValidList1 = MAX_UINT;
4628
0
  Distortion    costValidList1 = MAX_DISTORTION;
4629
0
  Mv            mvHevc[3];
4630
0
  const bool    affineAmvrEnabled = false;
4631
4632
0
  xGetBlkBits(slice.isInterP(), puIdx, lastMode, uiMbBits);
4633
4634
0
  cu.affine = true;
4635
0
  cu.mergeFlag = false;
4636
0
  if (BcwIdx != BCW_DEFAULT)
4637
0
  {
4638
0
    cu.BcwIdx = BcwIdx;
4639
0
  }
4640
4641
  // Uni-directional prediction
4642
0
  for (int iRefList = 0; iRefList < iNumPredDir; iRefList++)
4643
0
  {
4644
0
    RefPicList  refPicList = (iRefList ? REF_PIC_LIST_1 : REF_PIC_LIST_0);
4645
0
    cu.interDir = (iRefList ? 2 : 1);
4646
0
    for (int iRefIdxTemp = 0; iRefIdxTemp < slice.numRefIdx[refPicList]; iRefIdxTemp++)
4647
0
    {
4648
      // Get RefIdx bits
4649
0
      uiBitsTemp = uiMbBits[iRefList];
4650
0
      if (slice.numRefIdx[refPicList] > 1)
4651
0
      {
4652
0
        uiBitsTemp += iRefIdxTemp + 1;
4653
0
        if (iRefIdxTemp == slice.numRefIdx[refPicList] - 1)
4654
0
        {
4655
0
          uiBitsTemp--;
4656
0
        }
4657
0
      }
4658
4659
      // Do Affine AMVP
4660
0
      bool foundPred = xEstimateAffineAMVP(cu, affiAMVPInfoTemp[refPicList], origBuf, refPicList, iRefIdxTemp, cMvPred[iRefList][iRefIdxTemp], biPDistTemp);
4661
0
      if( !foundPred )
4662
0
        return;
4663
4664
0
      if (affineAmvrEnabled)
4665
0
      {
4666
0
        biPDistTemp += m_pcRdCost->getCost(xCalcAffineMVBits(cu, cMvPred[iRefList][iRefIdxTemp], cMvPred[iRefList][iRefIdxTemp]));
4667
0
      }
4668
0
      aaiMvpIdx[iRefList][iRefIdxTemp] = cu.mvpIdx[refPicList];
4669
0
      aaiMvpNum[iRefList][iRefIdxTemp] = cu.mvpNum[refPicList];;
4670
0
      if (cu.affineType == AFFINEMODEL_6PARAM && refIdx4Para[iRefList] != iRefIdxTemp)
4671
0
      {
4672
0
        xCopyAffineAMVPInfo(affiAMVPInfoTemp[refPicList], aacAffineAMVPInfo[iRefList][iRefIdxTemp]);
4673
0
        continue;
4674
0
      }
4675
4676
      // set hevc ME result as start search position when it is best than mvp
4677
0
      for (int i = 0; i<3; i++)
4678
0
      {
4679
0
        mvHevc[i] = hevcMv[iRefList][iRefIdxTemp];
4680
0
        mvHevc[i].roundAffinePrecInternal2Amvr(cu.imv);
4681
0
      }
4682
0
      PelUnitBuf predBuf = m_tmpStorageLCU.getCompactBuf(cu);
4683
4684
0
      Distortion uiCandCost = xGetAffineTemplateCost(cu, origBuf, predBuf, mvHevc, aaiMvpIdx[iRefList][iRefIdxTemp],
4685
0
        AMVP_MAX_NUM_CANDS, refPicList, iRefIdxTemp);
4686
4687
0
      if (affineAmvrEnabled)
4688
0
      {
4689
0
        uiCandCost += m_pcRdCost->getCost(xCalcAffineMVBits(cu, mvHevc, cMvPred[iRefList][iRefIdxTemp]));
4690
0
      }
4691
4692
      //check stored affine motion
4693
0
      bool affine4Para = cu.affineType == AFFINEMODEL_4PARAM;
4694
0
      bool savedParaAvail = cu.imv && ((m_affineMotion.affine4ParaRefIdx[iRefList] == iRefIdxTemp && affine4Para && m_affineMotion.affine4ParaAvail) ||
4695
0
        (m_affineMotion.affine6ParaRefIdx[iRefList] == iRefIdxTemp && !affine4Para && m_affineMotion.affine6ParaAvail));
4696
4697
0
      if (savedParaAvail)
4698
0
      {
4699
0
        Mv mvFour[3];
4700
0
        for (int i = 0; i < mvNum; i++)
4701
0
        {
4702
0
          mvFour[i] = affine4Para ? m_affineMotion.acMvAffine4Para[iRefList][i] : m_affineMotion.acMvAffine6Para[iRefList][i];
4703
0
          mvFour[i].roundAffinePrecInternal2Amvr(cu.imv);
4704
0
        }
4705
4706
0
        Distortion candCostInherit = xGetAffineTemplateCost(cu, origBuf, predBuf, mvFour, aaiMvpIdx[iRefList][iRefIdxTemp], AMVP_MAX_NUM_CANDS, refPicList, iRefIdxTemp);
4707
0
        candCostInherit += m_pcRdCost->getCost(xCalcAffineMVBits(cu, mvFour, cMvPred[iRefList][iRefIdxTemp]));
4708
4709
0
        if (candCostInherit < uiCandCost)
4710
0
        {
4711
0
          uiCandCost = candCostInherit;
4712
0
          memcpy(mvHevc, mvFour, 3 * sizeof(Mv));
4713
0
        }
4714
0
      }
4715
4716
0
      if( cu.affineType == AFFINEMODEL_4PARAM && m_AffineProfList->m_affMVListSize && (!cu.cs->sps->BCW || BcwIdx == BCW_DEFAULT ) )
4717
0
      {
4718
0
        int shift = MAX_CU_DEPTH;
4719
0
        for (int i = 0; i < m_AffineProfList->m_affMVListSize; i++)
4720
0
        {
4721
0
          AffineMVInfo *mvInfo = m_AffineProfList->m_affMVList + ((m_AffineProfList->m_affMVListIdx - i - 1 + m_AffineProfList->m_affMVListMaxSize) % (m_AffineProfList->m_affMVListMaxSize));
4722
          //check;
4723
0
          int j = 0;
4724
0
          for (; j < i; j++)
4725
0
          {
4726
0
            AffineMVInfo *prevMvInfo = m_AffineProfList->m_affMVList + ((m_AffineProfList->m_affMVListIdx - j - 1 + m_AffineProfList->m_affMVListMaxSize) % (m_AffineProfList->m_affMVListMaxSize));
4727
0
            if ((mvInfo->affMVs[iRefList][iRefIdxTemp][0] == prevMvInfo->affMVs[iRefList][iRefIdxTemp][0]) &&
4728
0
              (mvInfo->affMVs[iRefList][iRefIdxTemp][1] == prevMvInfo->affMVs[iRefList][iRefIdxTemp][1])
4729
0
              && (mvInfo->x == prevMvInfo->x) && (mvInfo->y == prevMvInfo->y)
4730
0
              && (mvInfo->w == prevMvInfo->w)
4731
0
              )
4732
0
            {
4733
0
              break;
4734
0
            }
4735
0
          }
4736
0
          if (j < i)
4737
0
            continue;
4738
4739
0
          Mv mvTmp[3], *nbMv = mvInfo->affMVs[iRefList][iRefIdxTemp];
4740
0
          int vx, vy;
4741
0
          int dMvHorX, dMvHorY, dMvVerX, dMvVerY;
4742
0
          int mvScaleHor = nbMv[0].hor * (1<< shift);
4743
0
          int mvScaleVer = nbMv[0].ver * (1<< shift);
4744
0
          Mv dMv = nbMv[1] - nbMv[0];
4745
0
          dMvHorX = dMv.hor *(1<<(shift - Log2(mvInfo->w)));
4746
0
          dMvHorY = dMv.ver *(1<< (shift - Log2(mvInfo->w)));
4747
0
          dMvVerX = -dMvHorY;
4748
0
          dMvVerY = dMvHorX;
4749
0
          vx = mvScaleHor + dMvHorX * (cu.Y().x - mvInfo->x) + dMvVerX * (cu.Y().y - mvInfo->y);
4750
0
          vy = mvScaleVer + dMvHorY * (cu.Y().x - mvInfo->x) + dMvVerY * (cu.Y().y - mvInfo->y);
4751
0
          roundAffineMv(vx, vy, shift);
4752
0
          mvTmp[0] = Mv(vx, vy);
4753
0
          mvTmp[0].clipToStorageBitDepth();
4754
0
          clipMv(mvTmp[0], cu.lumaPos(), cu.lumaSize(), *cu.cs->pcv);
4755
0
          mvTmp[0].roundAffinePrecInternal2Amvr(cu.imv);
4756
0
          vx = mvScaleHor + dMvHorX * (cu.Y().x + cu.Y().width - mvInfo->x) + dMvVerX * (cu.Y().y - mvInfo->y);
4757
0
          vy = mvScaleVer + dMvHorY * (cu.Y().x + cu.Y().width - mvInfo->x) + dMvVerY * (cu.Y().y - mvInfo->y);
4758
0
          roundAffineMv(vx, vy, shift);
4759
0
          mvTmp[1] = Mv(vx, vy);
4760
0
          mvTmp[1].clipToStorageBitDepth();
4761
0
          clipMv(mvTmp[1], cu.lumaPos(), cu.lumaSize(), *cu.cs->pcv);
4762
0
          mvTmp[0].roundAffinePrecInternal2Amvr(cu.imv);
4763
0
          mvTmp[1].roundAffinePrecInternal2Amvr(cu.imv);
4764
0
          Distortion tmpCost = xGetAffineTemplateCost(cu, origBuf, predBuf, mvTmp, aaiMvpIdx[iRefList][iRefIdxTemp], AMVP_MAX_NUM_CANDS, refPicList, iRefIdxTemp);
4765
0
          if (affineAmvrEnabled)
4766
0
          {
4767
0
            tmpCost += m_pcRdCost->getCost(xCalcAffineMVBits(cu, mvTmp, cMvPred[iRefList][iRefIdxTemp]));
4768
0
          }
4769
0
          if (tmpCost < uiCandCost)
4770
0
          {
4771
0
            uiCandCost = tmpCost;
4772
0
            std::memcpy(mvHevc, mvTmp, 3 * sizeof(Mv));
4773
0
          }
4774
0
        }
4775
0
      }
4776
0
      if (cu.affineType == AFFINEMODEL_6PARAM)
4777
0
      {
4778
0
        Mv mvFour[3];
4779
0
        mvFour[0] = mvAffine4Para[iRefList][iRefIdxTemp][0];
4780
0
        mvFour[1] = mvAffine4Para[iRefList][iRefIdxTemp][1];
4781
0
        mvAffine4Para[iRefList][iRefIdxTemp][0].roundAffinePrecInternal2Amvr(cu.imv);
4782
0
        mvAffine4Para[iRefList][iRefIdxTemp][1].roundAffinePrecInternal2Amvr(cu.imv);
4783
4784
0
        int shift = MAX_CU_DEPTH;
4785
0
        int vx2 = (mvFour[0].hor * (1<< shift)) - ((mvFour[1].ver - mvFour[0].ver) * (1<< (shift + Log2(cu.lheight()) - Log2(cu.lwidth()))));
4786
0
        int vy2 = (mvFour[0].ver * (1<< shift)) + ((mvFour[1].hor - mvFour[0].hor) * (1<< (shift + Log2(cu.lheight()) - Log2(cu.lwidth()))));
4787
0
        int offset = (1 << (shift - 1));
4788
0
        vx2 = (vx2 + offset - (vx2 >= 0)) >> shift;
4789
0
        vy2 = (vy2 + offset - (vy2 >= 0)) >> shift;
4790
0
        mvFour[2].hor = vx2;
4791
0
        mvFour[2].ver = vy2;
4792
0
        mvFour[2].clipToStorageBitDepth();
4793
0
        mvFour[0].roundAffinePrecInternal2Amvr(cu.imv);
4794
0
        mvFour[1].roundAffinePrecInternal2Amvr(cu.imv);
4795
0
        mvFour[2].roundAffinePrecInternal2Amvr(cu.imv);
4796
0
        Distortion uiCandCostInherit = xGetAffineTemplateCost(cu, origBuf, predBuf, mvFour, aaiMvpIdx[iRefList][iRefIdxTemp], AMVP_MAX_NUM_CANDS, refPicList, iRefIdxTemp);
4797
0
        if (affineAmvrEnabled)
4798
0
        {
4799
0
          uiCandCostInherit += m_pcRdCost->getCost(xCalcAffineMVBits(cu, mvFour, cMvPred[iRefList][iRefIdxTemp]));
4800
0
        }
4801
0
        if (uiCandCostInherit < uiCandCost)
4802
0
        {
4803
0
          uiCandCost = uiCandCostInherit;
4804
0
          for (int i = 0; i < 3; i++)
4805
0
          {
4806
0
            mvHevc[i] = mvFour[i];
4807
0
          }
4808
0
        }
4809
0
      }
4810
4811
0
      if (uiCandCost < biPDistTemp)
4812
0
      {
4813
0
        ::memcpy(tmp.affMVs[iRefList][iRefIdxTemp], mvHevc, sizeof(Mv) * 3);
4814
0
      }
4815
0
      else
4816
0
      {
4817
0
        ::memcpy(tmp.affMVs[iRefList][iRefIdxTemp], cMvPred[iRefList][iRefIdxTemp], sizeof(Mv) * 3);
4818
0
      }
4819
4820
      // GPB list 1, save the best MvpIdx, RefIdx and Cost
4821
0
      if (slice.picHeader->mvdL1Zero && iRefList == 1 && biPDistTemp < bestBiPDist)
4822
0
      {
4823
0
        bestBiPDist = biPDistTemp;
4824
0
        bestBiPMvpL1 = aaiMvpIdx[iRefList][iRefIdxTemp];
4825
0
        bestBiPRefIdxL1 = iRefIdxTemp;
4826
0
      }
4827
4828
      // Update bits
4829
0
      uiBitsTemp += m_auiMVPIdxCost[aaiMvpIdx[iRefList][iRefIdxTemp]][AMVP_MAX_NUM_CANDS];
4830
4831
0
      if (m_pcEncCfg->m_bFastMEForGenBLowDelayEnabled && iRefList == 1)   // list 1
4832
0
      {
4833
0
        if (slice.list1IdxToList0Idx[iRefIdxTemp] >= 0 && (cu.affineType != AFFINEMODEL_6PARAM || slice.list1IdxToList0Idx[iRefIdxTemp] == refIdx4Para[0]))
4834
0
        {
4835
0
          int iList1ToList0Idx = slice.list1IdxToList0Idx[iRefIdxTemp];
4836
0
          ::memcpy(tmp.affMVs[1][iRefIdxTemp], tmp.affMVs[0][iList1ToList0Idx], sizeof(Mv) * 3);
4837
0
          uiCostTemp = uiCostTempL0[iList1ToList0Idx];
4838
4839
0
          uiCostTemp -= m_pcRdCost->getCost(uiBitsTempL0[iList1ToList0Idx]);
4840
0
          uiBitsTemp += xCalcAffineMVBits(cu, tmp.affMVs[iRefList][iRefIdxTemp], cMvPred[iRefList][iRefIdxTemp]);
4841
          /*calculate the correct cost*/
4842
0
          uiCostTemp += m_pcRdCost->getCost(uiBitsTemp);
4843
0
          DTRACE(g_trace_ctx, D_COMMON, " (%d) uiCostTemp=%d\n", DTRACE_GET_COUNTER(g_trace_ctx, D_COMMON), uiCostTemp);
4844
0
        }
4845
0
        else
4846
0
        {
4847
0
          xAffineMotionEstimation(cu, origBuf, refPicList, cMvPred[iRefList][iRefIdxTemp], iRefIdxTemp, tmp.affMVs[iRefList][iRefIdxTemp], 
4848
0
                                  uiBitsTemp, uiCostTemp, aaiMvpIdx[iRefList][iRefIdxTemp], affiAMVPInfoTemp[refPicList]);
4849
0
        }
4850
0
      }
4851
0
      else
4852
0
      {
4853
0
        xAffineMotionEstimation(cu, origBuf, refPicList, cMvPred[iRefList][iRefIdxTemp], iRefIdxTemp, tmp.affMVs[iRefList][iRefIdxTemp], 
4854
0
                                uiBitsTemp, uiCostTemp, aaiMvpIdx[iRefList][iRefIdxTemp], affiAMVPInfoTemp[refPicList]);
4855
0
      }
4856
      
4857
0
      if( slice.sps->BCW && cu.BcwIdx == BCW_DEFAULT && slice.isInterB() )
4858
0
      {
4859
0
        m_uniMotions.setReadModeAffine( true, (uint8_t)iRefList, (uint8_t)iRefIdxTemp, cu.affineType );
4860
0
        m_uniMotions.copyAffineMvFrom( tmp.affMVs[iRefList][iRefIdxTemp], uiCostTemp - m_pcRdCost->getCost(uiBitsTemp), (uint8_t)iRefList, (uint8_t)iRefIdxTemp, cu.affineType,
4861
0
                                       aaiMvpIdx[iRefList][iRefIdxTemp] );
4862
0
      }
4863
4864
      // Set best AMVP Index
4865
0
      xCopyAffineAMVPInfo(affiAMVPInfoTemp[refPicList], aacAffineAMVPInfo[iRefList][iRefIdxTemp]);
4866
0
      if (cu.imv != 2)//|| !m_pcEncCfg->getUseAffineAmvrEncOpt())
4867
0
        xCheckBestAffineMVP(cu, affiAMVPInfoTemp[refPicList], refPicList, tmp.affMVs[iRefList][iRefIdxTemp], cMvPred[iRefList][iRefIdxTemp], aaiMvpIdx[iRefList][iRefIdxTemp], uiBitsTemp, uiCostTemp);
4868
4869
0
      if (iRefList == 0)
4870
0
      {
4871
0
        uiCostTempL0[iRefIdxTemp] = uiCostTemp;
4872
0
        uiBitsTempL0[iRefIdxTemp] = uiBitsTemp;
4873
0
      }
4874
0
      DTRACE(g_trace_ctx, D_COMMON, " (%d) uiCostTemp=%d, uiCost[iRefList]=%d\n", DTRACE_GET_COUNTER(g_trace_ctx, D_COMMON), uiCostTemp, uiCost[iRefList]);
4875
0
      if (uiCostTemp < uiCost[iRefList])
4876
0
      {
4877
0
        uiCost[iRefList] = uiCostTemp;
4878
0
        uiBits[iRefList] = uiBitsTemp; // storing for bi-prediction
4879
4880
                                       // set best motion
4881
0
        ::memcpy(aacMv[iRefList], tmp.affMVs[iRefList][iRefIdxTemp], sizeof(Mv) * 3);
4882
0
        iRefIdx[iRefList] = iRefIdxTemp;
4883
0
      }
4884
4885
0
      if (iRefList == 1 && uiCostTemp < costValidList1 && slice.list1IdxToList0Idx[iRefIdxTemp] < 0)
4886
0
      {
4887
0
        costValidList1 = uiCostTemp;
4888
0
        bitsValidList1 = uiBitsTemp;
4889
4890
        // set motion
4891
0
        memcpy(mvValidList1, tmp.affMVs[iRefList][iRefIdxTemp], sizeof(Mv) * 3);
4892
0
        refIdxValidList1 = iRefIdxTemp;
4893
0
      }
4894
0
    } // End refIdx loop
4895
0
  } // end Uni-prediction
4896
4897
0
  if (cu.affineType == AFFINEMODEL_4PARAM)
4898
0
  {
4899
0
    ::memcpy(mvAffine4Para, tmp.affMVs, sizeof(tmp.affMVs));
4900
0
    if (cu.imv == IMV_OFF)
4901
0
    {
4902
0
      m_AffineProfList->insert( tmp, cu.Y());
4903
0
    }
4904
0
  }
4905
4906
  // Bi-directional prediction
4907
0
  if (slice.isInterB() && !CU::isBipredRestriction(cu))
4908
0
  {
4909
0
    cu.interDir = 3;
4910
0
    m_isBi = true;
4911
4912
    // Set as best list0 and list1
4913
0
    iRefIdxBi[0] = iRefIdx[0];
4914
0
    iRefIdxBi[1] = iRefIdx[1];
4915
4916
0
    ::memcpy(cMvBi, aacMv, sizeof(aacMv));
4917
0
    ::memcpy(cMvPredBi, cMvPred, sizeof(cMvPred));
4918
0
    ::memcpy(aaiMvpIdxBi, aaiMvpIdx, sizeof(aaiMvpIdx));
4919
4920
0
    uint32_t uiMotBits[2];
4921
0
    bool doBiPred = true;
4922
4923
0
    if (slice.picHeader->mvdL1Zero) // GPB, list 1 only use Mvp
4924
0
    {
4925
0
      xCopyAffineAMVPInfo(aacAffineAMVPInfo[1][bestBiPRefIdxL1], affiAMVPInfoTemp[REF_PIC_LIST_1]);
4926
0
      cu.mvpIdx[REF_PIC_LIST_1] = bestBiPMvpL1;
4927
0
      aaiMvpIdxBi[1][bestBiPRefIdxL1] = bestBiPMvpL1;
4928
4929
      // Set Mv for list1
4930
0
      Mv pcMvTemp[3] = { affiAMVPInfoTemp[REF_PIC_LIST_1].mvCandLT[bestBiPMvpL1],
4931
0
                         affiAMVPInfoTemp[REF_PIC_LIST_1].mvCandRT[bestBiPMvpL1],
4932
0
                         affiAMVPInfoTemp[REF_PIC_LIST_1].mvCandLB[bestBiPMvpL1] };
4933
0
      ::memcpy(cMvPredBi[1][bestBiPRefIdxL1], pcMvTemp, sizeof(Mv) * 3);
4934
0
      ::memcpy(cMvBi[1], pcMvTemp, sizeof(Mv) * 3);
4935
0
      ::memcpy(tmp.affMVs[1][bestBiPRefIdxL1], pcMvTemp, sizeof(Mv) * 3);
4936
0
      iRefIdxBi[1] = bestBiPRefIdxL1;
4937
4938
0
      if( m_pcEncCfg->m_ifpLines && !xIsAffineMvInRangeFPP( cu, pcMvTemp, m_pcEncCfg->m_ifpLines ) )
4939
0
      {
4940
        // this mvp cannot be used for mv, skip Bi-pred
4941
0
        uiCostBi = MAX_DISTORTION;
4942
0
        doBiPred = false;
4943
0
      }
4944
0
      else
4945
0
      {
4946
4947
        // Get list1 prediction block
4948
0
        CU::setAllAffineMv(cu, cMvBi[1][0], cMvBi[1][1], cMvBi[1][2], REF_PIC_LIST_1);
4949
0
        cu.refIdx[REF_PIC_LIST_1] = iRefIdxBi[1];
4950
4951
0
        PelUnitBuf predBufTmp = m_tmpPredStorage[REF_PIC_LIST_1].getCompactBuf( cu );
4952
0
        motionCompensation(cu, predBufTmp, REF_PIC_LIST_1);
4953
4954
        // Update bits
4955
0
        uiMotBits[0] = uiBits[0] - uiMbBits[0];
4956
0
        uiMotBits[1] = uiMbBits[1];
4957
4958
0
        if (slice.numRefIdx[REF_PIC_LIST_1] > 1)
4959
0
        {
4960
0
          uiMotBits[1] += bestBiPRefIdxL1 + 1;
4961
0
          if (bestBiPRefIdxL1 == slice.numRefIdx[REF_PIC_LIST_1] - 1)
4962
0
          {
4963
0
            uiMotBits[1]--;
4964
0
          }
4965
0
        }
4966
0
        uiMotBits[1] += m_auiMVPIdxCost[aaiMvpIdxBi[1][bestBiPRefIdxL1]][AMVP_MAX_NUM_CANDS];
4967
0
        uiBits[2] = uiMbBits[2] + uiMotBits[0] + uiMotBits[1];
4968
0
      }
4969
0
    }
4970
0
    else
4971
0
    {
4972
0
      uiMotBits[0] = uiBits[0] - uiMbBits[0];
4973
0
      uiMotBits[1] = uiBits[1] - uiMbBits[1];
4974
0
      uiBits[2] = uiMbBits[2] + uiMotBits[0] + uiMotBits[1];
4975
0
    }
4976
4977
0
    if (doBiPred)
4978
0
    {
4979
      // 4-times iteration (default)
4980
0
      int iNumIter = 4;
4981
      // fast encoder setting or GPB: only one iteration
4982
0
      if (m_pcEncCfg->m_fastInterSearchMode == VVENC_FASTINTERSEARCH_MODE3 || m_pcEncCfg->m_fastInterSearchMode == VVENC_FASTINTERSEARCH_MODE2 || slice.picHeader->mvdL1Zero)
4983
0
      {
4984
0
        iNumIter = 1;
4985
0
      }
4986
4987
0
      for (int iIter = 0; iIter < iNumIter; iIter++)
4988
0
      {
4989
        // Set RefList
4990
0
        int iRefList = iIter % 2;
4991
0
        if (m_pcEncCfg->m_fastInterSearchMode == VVENC_FASTINTERSEARCH_MODE3 || m_pcEncCfg->m_fastInterSearchMode == VVENC_FASTINTERSEARCH_MODE2)
4992
0
        {
4993
0
          if (uiCost[0] <= uiCost[1])
4994
0
          {
4995
0
            iRefList = 1;
4996
0
          }
4997
0
          else
4998
0
          {
4999
0
            iRefList = 0;
5000
0
          }
5001
0
        }
5002
0
        else if (iIter == 0)
5003
0
        {
5004
0
          iRefList = 0;
5005
0
        }
5006
5007
        // First iterate, get prediction block of opposite direction
5008
0
        if (iIter == 0 && !slice.picHeader->mvdL1Zero)
5009
0
        {
5010
0
          if( m_pcEncCfg->m_ifpLines && !xIsAffineMvInRangeFPP( cu, aacMv[1 - iRefList], m_pcEncCfg->m_ifpLines ) )
5011
0
          {
5012
0
            continue;
5013
0
          }
5014
5015
0
          CU::setAllAffineMv(cu, aacMv[1 - iRefList][0], aacMv[1 - iRefList][1], aacMv[1 - iRefList][2], RefPicList(1 - iRefList));
5016
0
          cu.refIdx[1 - iRefList] = iRefIdx[1 - iRefList];
5017
5018
0
          PelUnitBuf predBufTmp = m_tmpPredStorage[1 - iRefList].getCompactBuf( cu );
5019
0
          motionCompensation(cu, predBufTmp, RefPicList(1 - iRefList));
5020
0
        }
5021
5022
0
        RefPicList refPicList = (iRefList ? REF_PIC_LIST_1 : REF_PIC_LIST_0);
5023
5024
0
        if (slice.picHeader->mvdL1Zero) // GPB, fix List 1, search List 0
5025
0
        {
5026
0
          iRefList = 0;
5027
0
          refPicList = REF_PIC_LIST_0;
5028
0
        }
5029
5030
0
        bool bChanged = false;
5031
5032
0
        iRefStart = 0;
5033
0
        iRefEnd = slice.numRefIdx[refPicList] - 1;
5034
0
        for (int iRefIdxTemp = iRefStart; iRefIdxTemp <= iRefEnd; iRefIdxTemp++)
5035
0
        {
5036
0
          if (cu.affineType == AFFINEMODEL_6PARAM && refIdx4Para[iRefList] != iRefIdxTemp)
5037
0
          {
5038
0
            continue;
5039
0
          }
5040
          // update bits
5041
0
          uiBitsTemp = uiMbBits[2] + uiMotBits[1 - iRefList];
5042
0
          uiBitsTemp += ( (cu.slice->sps->BCW == true) ? BcwIdxBits : 0 );
5043
0
          if (slice.numRefIdx[refPicList] > 1)
5044
0
          {
5045
0
            uiBitsTemp += iRefIdxTemp + 1;
5046
0
            if (iRefIdxTemp == slice.numRefIdx[refPicList] - 1)
5047
0
            {
5048
0
              uiBitsTemp--;
5049
0
            }
5050
0
          }
5051
0
          uiBitsTemp += m_auiMVPIdxCost[aaiMvpIdxBi[iRefList][iRefIdxTemp]][AMVP_MAX_NUM_CANDS];
5052
5053
          // call Affine ME
5054
0
          xAffineMotionEstimation(cu, origBuf, refPicList, cMvPredBi[iRefList][iRefIdxTemp], iRefIdxTemp, tmp.affMVs[iRefList][iRefIdxTemp], 
5055
0
                                  uiBitsTemp, uiCostTemp, aaiMvpIdxBi[iRefList][iRefIdxTemp], aacAffineAMVPInfo[iRefList][iRefIdxTemp], true);
5056
0
          xCopyAffineAMVPInfo(aacAffineAMVPInfo[iRefList][iRefIdxTemp], affiAMVPInfoTemp[refPicList]);
5057
0
          if (cu.imv != 2)
5058
0
          {
5059
0
            xCheckBestAffineMVP(cu, affiAMVPInfoTemp[refPicList], refPicList, tmp.affMVs[iRefList][iRefIdxTemp], cMvPredBi[iRefList][iRefIdxTemp], aaiMvpIdxBi[iRefList][iRefIdxTemp], uiBitsTemp, uiCostTemp);
5060
0
          }
5061
5062
0
          if (uiCostTemp < uiCostBi)
5063
0
          {
5064
0
            bChanged = true;
5065
0
            ::memcpy(cMvBi[iRefList], tmp.affMVs[iRefList][iRefIdxTemp], sizeof(Mv) * 3);
5066
0
            iRefIdxBi[iRefList] = iRefIdxTemp;
5067
5068
0
            uiCostBi = uiCostTemp;
5069
0
            uiMotBits[iRefList] = uiBitsTemp - uiMbBits[2] - uiMotBits[1 - iRefList];
5070
0
            uiMotBits[iRefList] -= ( (cu.slice->sps->BCW == true) ? BcwIdxBits : 0 );
5071
0
            uiBits[2] = uiBitsTemp;
5072
5073
0
            if (iNumIter != 1) // MC for next iter
5074
0
            {
5075
              //  Set motion
5076
0
              CU::setAllAffineMv(cu, cMvBi[iRefList][0], cMvBi[iRefList][1], cMvBi[iRefList][2], refPicList);
5077
0
              cu.refIdx[refPicList] = iRefIdxBi[refPicList];
5078
0
              PelUnitBuf predBufTmp = m_tmpPredStorage[iRefList].getCompactBuf( cu );
5079
0
              motionCompensation(cu, predBufTmp, refPicList);
5080
0
            }
5081
0
          }
5082
0
        } // for loop-iRefIdxTemp
5083
5084
0
        if (!bChanged)
5085
0
        {
5086
0
          if ((uiCostBi <= uiCost[0] && uiCostBi <= uiCost[1]) || enforceBcwPred)
5087
0
          {
5088
0
            xCopyAffineAMVPInfo(aacAffineAMVPInfo[0][iRefIdxBi[0]], affiAMVPInfoTemp[REF_PIC_LIST_0]);
5089
0
            xCheckBestAffineMVP(cu, affiAMVPInfoTemp[REF_PIC_LIST_0], REF_PIC_LIST_0, cMvBi[0], cMvPredBi[0][iRefIdxBi[0]], aaiMvpIdxBi[0][iRefIdxBi[0]], uiBits[2], uiCostBi);
5090
5091
0
            if (!slice.picHeader->mvdL1Zero)
5092
0
            {
5093
0
              xCopyAffineAMVPInfo(aacAffineAMVPInfo[1][iRefIdxBi[1]], affiAMVPInfoTemp[REF_PIC_LIST_1]);
5094
0
              xCheckBestAffineMVP(cu, affiAMVPInfoTemp[REF_PIC_LIST_1], REF_PIC_LIST_1, cMvBi[1], cMvPredBi[1][iRefIdxBi[1]], aaiMvpIdxBi[1][iRefIdxBi[1]], uiBits[2], uiCostBi);
5095
0
            }
5096
0
          }
5097
0
          break;
5098
0
        }
5099
0
      } // for loop-iter
5100
0
    }
5101
0
    m_isBi = false;
5102
0
  } // if (B_SLICE)
5103
5104
0
  cu.mv [REF_PIC_LIST_0][0] = Mv();
5105
0
  cu.mv [REF_PIC_LIST_1][0] = Mv();
5106
0
  cu.mvd[REF_PIC_LIST_0][0] = cMvZero;
5107
0
  cu.mvd[REF_PIC_LIST_1][0] = cMvZero;
5108
0
  cu.refIdx[REF_PIC_LIST_0] = NOT_VALID;
5109
0
  cu.refIdx[REF_PIC_LIST_1] = NOT_VALID;
5110
0
  cu.mvpIdx[REF_PIC_LIST_0] = NOT_VALID;
5111
0
  cu.mvpIdx[REF_PIC_LIST_1] = NOT_VALID;
5112
0
  cu.mvpNum[REF_PIC_LIST_0] = NOT_VALID;
5113
0
  cu.mvpNum[REF_PIC_LIST_1] = NOT_VALID;
5114
5115
0
  for (int verIdx = 0; verIdx < 3; verIdx++)
5116
0
  {
5117
0
    cu.mvd[REF_PIC_LIST_0][verIdx] = cMvZero;
5118
0
    cu.mvd[REF_PIC_LIST_1][verIdx] = cMvZero;
5119
0
  }
5120
5121
  // Set Motion Field
5122
0
  memcpy(aacMv[1], mvValidList1, sizeof(Mv) * 3);
5123
0
  iRefIdx[1] = refIdxValidList1;
5124
0
  uiBits[1] = bitsValidList1;
5125
0
  uiCost[1] = costValidList1;
5126
5127
0
  if (enforceBcwPred)
5128
0
  {
5129
0
    uiCost[0] = uiCost[1] = MAX_UINT;
5130
0
  }
5131
5132
  // Affine ME result set
5133
0
  if (uiCostBi <= uiCost[0] && uiCostBi <= uiCost[1]) // Bi
5134
0
  {
5135
0
    lastMode = 2;
5136
0
    affineCost = uiCostBi;
5137
0
    cu.interDir = 3;
5138
0
    CU::setAllAffineMv(cu, cMvBi[0][0], cMvBi[0][1], cMvBi[0][2], REF_PIC_LIST_0);
5139
0
    CU::setAllAffineMv(cu, cMvBi[1][0], cMvBi[1][1], cMvBi[1][2], REF_PIC_LIST_1);
5140
0
    cu.refIdx[REF_PIC_LIST_0] = iRefIdxBi[0];
5141
0
    cu.refIdx[REF_PIC_LIST_1] = iRefIdxBi[1];
5142
5143
0
    for (int verIdx = 0; verIdx < mvNum; verIdx++)
5144
0
    {
5145
0
      cu.mvd[REF_PIC_LIST_0][verIdx] = cMvBi[0][verIdx] - cMvPredBi[0][iRefIdxBi[0]][verIdx];
5146
0
      cu.mvd[REF_PIC_LIST_1][verIdx] = cMvBi[1][verIdx] - cMvPredBi[1][iRefIdxBi[1]][verIdx];
5147
0
      if (verIdx != 0)
5148
0
      {
5149
0
        cu.mvd[0][verIdx] = cu.mvd[0][verIdx] - cu.mvd[0][0];
5150
0
        cu.mvd[1][verIdx] = cu.mvd[1][verIdx] - cu.mvd[1][0];
5151
0
      }
5152
0
    }
5153
5154
5155
0
    cu.mvpIdx[REF_PIC_LIST_0] = aaiMvpIdxBi[0][iRefIdxBi[0]];
5156
0
    cu.mvpNum[REF_PIC_LIST_0] = aaiMvpNum[0][iRefIdxBi[0]];
5157
0
    cu.mvpIdx[REF_PIC_LIST_1] = aaiMvpIdxBi[1][iRefIdxBi[1]];
5158
0
    cu.mvpNum[REF_PIC_LIST_1] = aaiMvpNum[1][iRefIdxBi[1]];
5159
0
  }
5160
0
  else if (uiCost[0] <= uiCost[1]) // List 0
5161
0
  {
5162
0
    lastMode = 0;
5163
0
    affineCost = uiCost[0];
5164
0
    cu.interDir = 1;
5165
0
    CU::setAllAffineMv(cu, aacMv[0][0], aacMv[0][1], aacMv[0][2], REF_PIC_LIST_0);
5166
0
    cu.refIdx[REF_PIC_LIST_0] = iRefIdx[0];
5167
5168
0
    for (int verIdx = 0; verIdx < mvNum; verIdx++)
5169
0
    {
5170
0
      cu.mvd[REF_PIC_LIST_0][verIdx] = aacMv[0][verIdx] - cMvPred[0][iRefIdx[0]][verIdx];
5171
0
      if (verIdx != 0)
5172
0
      {
5173
0
        cu.mvd[0][verIdx] = cu.mvd[0][verIdx] - cu.mvd[0][0];
5174
0
      }
5175
0
    }
5176
5177
0
    cu.mvpIdx[REF_PIC_LIST_0] = aaiMvpIdx[0][iRefIdx[0]];
5178
0
    cu.mvpNum[REF_PIC_LIST_0] = aaiMvpNum[0][iRefIdx[0]];
5179
0
  }
5180
0
  else
5181
0
  {
5182
0
    lastMode = 1;
5183
0
    affineCost = uiCost[1];
5184
0
    cu.interDir = 2;
5185
0
    CU::setAllAffineMv(cu, aacMv[1][0], aacMv[1][1], aacMv[1][2], REF_PIC_LIST_1);
5186
0
    cu.refIdx[REF_PIC_LIST_1] = iRefIdx[1];
5187
5188
0
    for (int verIdx = 0; verIdx < mvNum; verIdx++)
5189
0
    {
5190
0
      cu.mvd[REF_PIC_LIST_1][verIdx] = aacMv[1][verIdx] - cMvPred[1][iRefIdx[1]][verIdx];
5191
0
      if (verIdx != 0)
5192
0
      {
5193
0
        cu.mvd[1][verIdx] = cu.mvd[1][verIdx] - cu.mvd[1][0];
5194
0
      }
5195
0
    }
5196
5197
0
    cu.mvpIdx[REF_PIC_LIST_1] = aaiMvpIdx[1][iRefIdx[1]];
5198
0
    cu.mvpNum[REF_PIC_LIST_1] = aaiMvpNum[1][iRefIdx[1]];
5199
0
  }
5200
0
  if (BcwIdx != BCW_DEFAULT)
5201
0
  {
5202
0
    cu.BcwIdx = BCW_DEFAULT;
5203
0
  }
5204
0
}
5205
5206
Distortion InterSearch::xGetAffineTemplateCost(CodingUnit& cu, CPelUnitBuf& origBuf, PelUnitBuf& predBuf, Mv acMvCand[3], int iMVPIdx, int iMVPNum, RefPicList refPicList, int iRefIdx)
5207
0
{
5208
0
  Distortion uiCost = MAX_DISTORTION;
5209
5210
0
  const Picture* picRef = cu.slice->getRefPic(refPicList, iRefIdx);
5211
5212
  // prediction pattern
5213
0
  Mv mv[3];
5214
0
  memcpy(mv, acMvCand, sizeof(mv));
5215
5216
0
  if( m_pcEncCfg->m_ifpLines && !xIsAffineMvInRangeFPP( cu, mv, m_pcEncCfg->m_ifpLines ) )
5217
0
  {
5218
0
    return MAX_DISTORTION>>1;  
5219
0
  }
5220
5221
0
  xPredAffineBlk(COMP_Y, cu, picRef, mv, predBuf, false, cu.slice->clpRngs[COMP_Y], refPicList);
5222
5223
  // calc distortion
5224
0
  uiCost = m_pcRdCost->getDistPart(origBuf.Y(), predBuf.Y(), cu.cs->sps->bitDepths[CH_L], COMP_Y, DF_HAD );
5225
0
  uiCost += m_pcRdCost->getCost(m_auiMVPIdxCost[iMVPIdx][iMVPNum]);
5226
5227
0
  DTRACE(g_trace_ctx, D_COMMON, " (%d) affineTemplateCost=%d\n", DTRACE_GET_COUNTER(g_trace_ctx, D_COMMON), uiCost);
5228
0
  return uiCost;
5229
0
}
5230
5231
void solveEqual(double** dEqualCoeff, int iOrder, double* dAffinePara)
5232
0
{
5233
0
  for (int k = 0; k < iOrder; k++)
5234
0
  {
5235
0
    dAffinePara[k] = 0.;
5236
0
  }
5237
5238
  // row echelon
5239
0
  for (int i = 1; i < iOrder; i++)
5240
0
  {
5241
    // find column max
5242
0
    double temp = fabs(dEqualCoeff[i][i - 1]);
5243
0
    int tempIdx = i;
5244
0
    for (int j = i + 1; j < iOrder + 1; j++)
5245
0
    {
5246
0
      if (fabs(dEqualCoeff[j][i - 1]) > temp)
5247
0
      {
5248
0
        temp = fabs(dEqualCoeff[j][i - 1]);
5249
0
        tempIdx = j;
5250
0
      }
5251
0
    }
5252
5253
    // swap line
5254
0
    if (tempIdx != i)
5255
0
    {
5256
0
      for (int j = 0; j < iOrder + 1; j++)
5257
0
      {
5258
0
        dEqualCoeff[0][j] = dEqualCoeff[i][j];
5259
0
        dEqualCoeff[i][j] = dEqualCoeff[tempIdx][j];
5260
0
        dEqualCoeff[tempIdx][j] = dEqualCoeff[0][j];
5261
0
      }
5262
0
    }
5263
5264
    // elimination first column
5265
0
    if (dEqualCoeff[i][i - 1] == 0.)
5266
0
    {
5267
0
      return;
5268
0
    }
5269
0
    for (int j = i + 1; j < iOrder + 1; j++)
5270
0
    {
5271
0
      for (int k = i; k < iOrder + 1; k++)
5272
0
      {
5273
0
        dEqualCoeff[j][k] = dEqualCoeff[j][k] - dEqualCoeff[i][k] * dEqualCoeff[j][i - 1] / dEqualCoeff[i][i - 1];
5274
0
      }
5275
0
    }
5276
0
  }
5277
5278
0
  if (dEqualCoeff[iOrder][iOrder - 1] == 0.)
5279
0
  {
5280
0
    return;
5281
0
  }
5282
0
  dAffinePara[iOrder - 1] = dEqualCoeff[iOrder][iOrder] / dEqualCoeff[iOrder][iOrder - 1];
5283
0
  for (int i = iOrder - 2; i >= 0; i--)
5284
0
  {
5285
0
    if (dEqualCoeff[i + 1][i] == 0.)
5286
0
    {
5287
0
      for (int k = 0; k < iOrder; k++)
5288
0
      {
5289
0
        dAffinePara[k] = 0.;
5290
0
      }
5291
0
      return;
5292
0
    }
5293
0
    double temp = 0;
5294
0
    for (int j = i + 1; j < iOrder; j++)
5295
0
    {
5296
0
      temp += dEqualCoeff[i + 1][j] * dAffinePara[j];
5297
0
    }
5298
0
    dAffinePara[i] = (dEqualCoeff[i + 1][iOrder] - temp) / dEqualCoeff[i + 1][i];
5299
0
  }
5300
0
}
5301
5302
void InterSearch::xCheckBestAffineMVP(CodingUnit& cu, AffineAMVPInfo &affineAMVPInfo, RefPicList refPicList, Mv acMv[3], Mv acMvPred[3], int& riMVPIdx, uint32_t& ruiBits, Distortion& ruiCost)
5303
0
{
5304
0
  if (affineAMVPInfo.numCand < 2)
5305
0
  {
5306
0
    return;
5307
0
  }
5308
5309
0
  int mvNum = cu.affineType ? 3 : 2;
5310
5311
0
  m_pcRdCost->selectMotionLambda();
5312
0
  m_pcRdCost->setCostScale(0);
5313
5314
0
  int iBestMVPIdx = riMVPIdx;
5315
5316
  // Get origin MV bits
5317
0
  Mv tmpPredMv[3];
5318
0
  int iOrgMvBits = xCalcAffineMVBits(cu, acMv, acMvPred);
5319
0
  iOrgMvBits += m_auiMVPIdxCost[riMVPIdx][AMVP_MAX_NUM_CANDS];
5320
5321
0
  int iBestMvBits = iOrgMvBits;
5322
0
  for (int iMVPIdx = 0; iMVPIdx < affineAMVPInfo.numCand; iMVPIdx++)
5323
0
  {
5324
0
    if (iMVPIdx == riMVPIdx)
5325
0
    {
5326
0
      continue;
5327
0
    }
5328
0
    tmpPredMv[0] = affineAMVPInfo.mvCandLT[iMVPIdx];
5329
0
    tmpPredMv[1] = affineAMVPInfo.mvCandRT[iMVPIdx];
5330
0
    if (mvNum == 3)
5331
0
    {
5332
0
      tmpPredMv[2] = affineAMVPInfo.mvCandLB[iMVPIdx];
5333
0
    }
5334
0
    int iMvBits = xCalcAffineMVBits(cu, acMv, tmpPredMv);
5335
0
    iMvBits += m_auiMVPIdxCost[iMVPIdx][AMVP_MAX_NUM_CANDS];
5336
5337
0
    if (iMvBits < iBestMvBits)
5338
0
    {
5339
0
      iBestMvBits = iMvBits;
5340
0
      iBestMVPIdx = iMVPIdx;
5341
0
    }
5342
0
  }
5343
5344
0
  if (iBestMVPIdx != riMVPIdx)  // if changed
5345
0
  {
5346
0
    acMvPred[0] = affineAMVPInfo.mvCandLT[iBestMVPIdx];
5347
0
    acMvPred[1] = affineAMVPInfo.mvCandRT[iBestMVPIdx];
5348
0
    acMvPred[2] = affineAMVPInfo.mvCandLB[iBestMVPIdx];
5349
0
    riMVPIdx = iBestMVPIdx;
5350
0
    uint32_t uiOrgBits = ruiBits;
5351
0
    ruiBits = uiOrgBits - iOrgMvBits + iBestMvBits;
5352
0
    ruiCost = (ruiCost - m_pcRdCost->getCost(uiOrgBits)) + m_pcRdCost->getCost(ruiBits);
5353
0
  }
5354
0
}
5355
5356
void InterSearch::xAffineMotionEstimation(CodingUnit& cu,
5357
  CPelUnitBuf&    origBuf,
5358
  RefPicList      refPicList,
5359
  Mv              acMvPred[3],
5360
  int             iRefIdxPred,
5361
  Mv              acMv[3],
5362
  uint32_t&       ruiBits,
5363
  Distortion&     ruiCost,
5364
  int&            mvpIdx,
5365
  const AffineAMVPInfo& aamvpi,
5366
  bool            bBi)
5367
0
{
5368
0
  if( cu.cs->sps->BCW && cu.BcwIdx != BCW_DEFAULT && !bBi && xReadBufferedAffineUniMv( cu, refPicList, iRefIdxPred, acMvPred, acMv, ruiBits, ruiCost, mvpIdx, aamvpi ) )
5369
0
  {
5370
0
    return;
5371
0
  }
5372
5373
0
  int bestMvpIdx = mvpIdx;
5374
0
  const int width = cu.Y().width;
5375
0
  const int height = cu.Y().height;
5376
5377
0
  const Picture* refPic = cu.slice->getRefPic(refPicList, iRefIdxPred);
5378
5379
  // Set Origin YUV: pcYuv
5380
0
  CPelUnitBuf*   pBuf = &origBuf;
5381
0
  double        fWeight = 1.0;
5382
5383
0
  CPelUnitBuf  origBufTmpCnst;
5384
5385
  // if Bi, set to ( 2 * Org - ListX )
5386
0
  if (bBi)
5387
0
  {
5388
0
    PelUnitBuf  origBufTmp = m_tmpStorageLCU.getCompactBuf(cu);
5389
    // NOTE: Other buf contains predicted signal from another direction
5390
0
    PelUnitBuf otherBuf = m_tmpPredStorage[1 - (int)refPicList].getCompactBuf( cu );
5391
0
    origBufTmp.copyFrom(origBuf);
5392
0
    origBufTmp.removeHighFreq(otherBuf, m_pcEncCfg->m_bClipForBiPredMeEnabled, cu.slice->clpRngs);
5393
5394
0
    origBufTmpCnst = origBufTmp;
5395
0
    pBuf           = &origBufTmpCnst;
5396
0
    fWeight        = xGetMEDistortionWeight(cu.BcwIdx, refPicList);
5397
0
  }
5398
5399
  // pred YUV
5400
0
  PelUnitBuf  predBuf = m_tmpAffiStorage.getCompactBuf(cu);
5401
5402
  // Set start Mv position, use input mv as started search mv
5403
0
  Mv acMvTemp[3];
5404
0
  ::memcpy(acMvTemp, acMv, sizeof(Mv) * 3);
5405
  // Set delta mv
5406
  // malloc buffer
5407
0
  int iParaNum = cu.affineType ? 7 : 5;
5408
0
  int affineParaNum = iParaNum - 1;
5409
0
  int mvNum = cu.affineType ? 3 : 2;
5410
0
  double **pdEqualCoeff;
5411
0
  pdEqualCoeff = new double *[iParaNum];
5412
0
  for (int i = 0; i < iParaNum; i++)
5413
0
  {
5414
0
    pdEqualCoeff[i] = new double[iParaNum];
5415
0
  }
5416
5417
0
  int64_t  i64EqualCoeff[7][7];
5418
0
  Pel    *piError = m_tmpAffiError;
5419
0
  Pel    *pdDerivate[2];
5420
0
  pdDerivate[0] = m_tmpAffiDeri[0];
5421
0
  pdDerivate[1] = m_tmpAffiDeri[1];
5422
5423
0
  Distortion uiCostBest = MAX_DISTORTION;
5424
0
  uint32_t uiBitsBest = 0;
5425
5426
  // do motion compensation with origin mv
5427
5428
0
  clipMv(acMvTemp[0], cu.lumaPos(), cu.lumaSize(), *cu.cs->pcv);
5429
0
  clipMv(acMvTemp[1], cu.lumaPos(), cu.lumaSize(), *cu.cs->pcv);
5430
0
  if (cu.affineType == AFFINEMODEL_6PARAM)
5431
0
  {
5432
0
    clipMv(acMvTemp[2], cu.lumaPos(), cu.lumaSize(), *cu.cs->pcv);
5433
0
  }
5434
5435
0
  acMvTemp[0].roundAffinePrecInternal2Amvr(cu.imv);
5436
0
  acMvTemp[1].roundAffinePrecInternal2Amvr(cu.imv);
5437
0
  if (cu.affineType == AFFINEMODEL_6PARAM)
5438
0
  {
5439
0
    acMvTemp[2].roundAffinePrecInternal2Amvr(cu.imv);
5440
0
  }
5441
0
  if( !m_pcEncCfg->m_ifpLines || xIsAffineMvInRangeFPP( cu, acMvTemp, m_pcEncCfg->m_ifpLines ) )
5442
0
  {
5443
0
    xPredAffineBlk(COMP_Y, cu, refPic, acMvTemp, predBuf, false, cu.cs->slice->clpRngs[COMP_Y], refPicList);
5444
5445
    // get error
5446
0
    uiCostBest = m_pcRdCost->getDistPart(predBuf.Y(), pBuf->Y(), cu.cs->sps->bitDepths[CH_L], COMP_Y, DF_HAD);
5447
5448
    // get cost with mv
5449
0
    m_pcRdCost->setCostScale(0);
5450
0
    uiBitsBest = ruiBits;
5451
0
    DTRACE(g_trace_ctx, D_COMMON, " (%d) xx uiBitsBest=%d\n", DTRACE_GET_COUNTER(g_trace_ctx, D_COMMON), uiBitsBest);
5452
0
    uiBitsBest += xCalcAffineMVBits(cu, acMvTemp, acMvPred);
5453
0
    DTRACE(g_trace_ctx, D_COMMON, " (%d) yy uiBitsBest=%d\n", DTRACE_GET_COUNTER(g_trace_ctx, D_COMMON), uiBitsBest);
5454
0
    uiCostBest = (Distortion)(floor(fWeight * (double)uiCostBest) + (double)m_pcRdCost->getCost(uiBitsBest));
5455
5456
0
    DTRACE(g_trace_ctx, D_COMMON, " (%d) uiBitsBest=%d, uiCostBest=%d\n", DTRACE_GET_COUNTER(g_trace_ctx, D_COMMON), uiBitsBest, uiCostBest);
5457
5458
0
    ::memcpy(acMv, acMvTemp, sizeof(Mv) * 3);
5459
0
  }
5460
0
  const int predBufStride = predBuf.Y().stride;
5461
0
  Mv prevIterMv[7][3];
5462
0
  int iIterTime;
5463
0
  if (cu.affineType == AFFINEMODEL_6PARAM)
5464
0
  {
5465
0
    iIterTime = bBi ? 3 : 4;
5466
0
  }
5467
0
  else
5468
0
  {
5469
0
    iIterTime = bBi ? 3 : 5;
5470
0
  }
5471
5472
0
  if (!cu.cs->sps->AffineType)// getUseAffineType())
5473
0
  {
5474
0
    iIterTime = bBi ? 5 : 7;
5475
0
  }
5476
5477
0
  for (int iter = 0; iter<iIterTime; iter++)    // iterate loop
5478
0
  {
5479
0
    memcpy(prevIterMv[iter], acMvTemp, sizeof(Mv) * 3);
5480
    /*********************************************************************************
5481
    *                         use gradient to update mv
5482
    *********************************************************************************/
5483
    // get Error Matrix
5484
0
    PelBuf( piError, width, height ).subtract( pBuf->Y(), predBuf.Y() );
5485
5486
    // sobel x direction
5487
    // -1 0 1
5488
    // -2 0 2
5489
    // -1 0 1
5490
0
    Pel* pPred = predBuf.Y().buf;
5491
0
    m_HorizontalSobelFilter(pPred, predBufStride, pdDerivate[0], width, width, height);
5492
5493
    // sobel y direction
5494
    // -1 -2 -1
5495
    //  0  0  0
5496
    //  1  2  1
5497
0
    m_VerticalSobelFilter(pPred, predBufStride, pdDerivate[1], width, width, height);
5498
5499
    // solve delta x and y
5500
0
    for (int row = 0; row < iParaNum; row++)
5501
0
    {
5502
0
      memset(&i64EqualCoeff[row][0], 0, iParaNum * sizeof(int64_t));
5503
0
    }
5504
5505
0
    m_EqualCoeffComputer[cu.affineType]( piError, width, pdDerivate, width, width, height, i64EqualCoeff );
5506
5507
0
    for (int row = 0; row < iParaNum; row++)
5508
0
    {
5509
0
      for (int i = 0; i < iParaNum; i++)
5510
0
      {
5511
0
        pdEqualCoeff[row][i] = (double)i64EqualCoeff[row][i];
5512
0
      }
5513
0
    }
5514
5515
0
    double dAffinePara[6];
5516
0
    double dDeltaMv[6];
5517
0
    Mv acDeltaMv[3];
5518
5519
0
    solveEqual(pdEqualCoeff, affineParaNum, dAffinePara);
5520
5521
    // convert to delta mv
5522
0
    dDeltaMv[0] = dAffinePara[0];
5523
0
    dDeltaMv[2] = dAffinePara[2];
5524
0
    const bool extParams = cu.affineType == AFFINEMODEL_6PARAM;
5525
0
    if (extParams)
5526
0
    {
5527
0
      dDeltaMv[1] = dAffinePara[1] * width + dAffinePara[0];
5528
0
      dDeltaMv[3] = dAffinePara[3] * width + dAffinePara[2];
5529
0
      dDeltaMv[4] = dAffinePara[4] * height + dAffinePara[0];
5530
0
      dDeltaMv[5] = dAffinePara[5] * height + dAffinePara[2];
5531
0
    }
5532
0
    else
5533
0
    {
5534
0
      dDeltaMv[1] = dAffinePara[1] * width + dAffinePara[0];
5535
0
      dDeltaMv[3] = -dAffinePara[3] * width + dAffinePara[2];
5536
0
    }
5537
5538
0
    const int normShiftTab[3] = { MV_PRECISION_QUARTER - MV_PRECISION_INT, MV_PRECISION_SIXTEENTH - MV_PRECISION_INT, MV_PRECISION_QUARTER - MV_PRECISION_INT };
5539
0
    const int stepShiftTab[3] = { MV_PRECISION_INTERNAL - MV_PRECISION_QUARTER, MV_PRECISION_INTERNAL - MV_PRECISION_SIXTEENTH, MV_PRECISION_INTERNAL - MV_PRECISION_QUARTER };
5540
0
    const int multiShift = 1 << normShiftTab[cu.imv];
5541
0
    const int mvShift = stepShiftTab[cu.imv];
5542
5543
0
    acDeltaMv[0] = Mv((int)(dDeltaMv[0] * multiShift + SIGN(dDeltaMv[0]) * 0.5) * (1<< mvShift), (int)(dDeltaMv[2] * multiShift + SIGN(dDeltaMv[2]) * 0.5) * (1<< mvShift));
5544
0
    acDeltaMv[1] = Mv((int)(dDeltaMv[1] * multiShift + SIGN(dDeltaMv[1]) * 0.5) * (1<< mvShift), (int)(dDeltaMv[3] * multiShift + SIGN(dDeltaMv[3]) * 0.5) * (1<< mvShift));
5545
0
    if (extParams)
5546
0
    {
5547
0
      acDeltaMv[2] = Mv((int)(dDeltaMv[4] * multiShift + SIGN(dDeltaMv[4]) * 0.5) *  (1<< mvShift), (int)(dDeltaMv[5] * multiShift + SIGN(dDeltaMv[5]) * 0.5) *  (1<< mvShift));
5548
0
    }
5549
0
    bool bAllZero = false;
5550
0
    for (int i = 0; i < mvNum; i++)
5551
0
    {
5552
0
      Mv deltaMv = acDeltaMv[i];
5553
0
      if (cu.imv == IMV_4PEL)
5554
0
      {
5555
0
        deltaMv.roundToPrecision(MV_PRECISION_INTERNAL, MV_PRECISION_HALF);
5556
0
      }
5557
0
      if (deltaMv.hor != 0 || deltaMv.ver != 0)
5558
0
      {
5559
0
        bAllZero = false;
5560
0
        break;
5561
0
      }
5562
0
      bAllZero = true;
5563
0
    }
5564
5565
0
    if (bAllZero)
5566
0
      break;
5567
5568
    // do motion compensation with updated mv
5569
0
    for (int i = 0; i < mvNum; i++)
5570
0
    {
5571
0
      acMvTemp[i] += acDeltaMv[i];
5572
0
      acMvTemp[i].hor = Clip3(MV_MIN, MV_MAX, acMvTemp[i].hor);
5573
0
      acMvTemp[i].ver = Clip3(MV_MIN, MV_MAX, acMvTemp[i].ver);
5574
0
      acMvTemp[i].roundAffinePrecInternal2Amvr(cu.imv);
5575
5576
0
      clipMv(acMvTemp[i], cu.lumaPos(), cu.lumaSize(), *cu.cs->pcv);
5577
0
    }
5578
5579
0
    if( !m_pcEncCfg->m_ifpLines || xIsAffineMvInRangeFPP( cu, acMvTemp, m_pcEncCfg->m_ifpLines ) )
5580
0
    {
5581
0
      xPredAffineBlk(COMP_Y, cu, refPic, acMvTemp, predBuf, false, cu.slice->clpRngs[COMP_Y], refPicList);
5582
5583
      // get error
5584
0
      Distortion uiCostTemp = m_pcRdCost->getDistPart(predBuf.Y(), pBuf->Y(), cu.cs->sps->bitDepths[CH_L], COMP_Y, DF_HAD);
5585
0
      DTRACE(g_trace_ctx, D_COMMON, " (%d) uiCostTemp=%d\n", DTRACE_GET_COUNTER(g_trace_ctx, D_COMMON), uiCostTemp);
5586
5587
      // get cost with mv
5588
0
      m_pcRdCost->setCostScale(0);
5589
0
      uint32_t uiBitsTemp = ruiBits;
5590
0
      uiBitsTemp += xCalcAffineMVBits(cu, acMvTemp, acMvPred);
5591
0
      uiCostTemp = (Distortion)(floor(fWeight * (double)uiCostTemp) + (double)m_pcRdCost->getCost(uiBitsTemp));
5592
5593
      // store best cost and mv
5594
0
      if (uiCostTemp < uiCostBest)
5595
0
      {
5596
0
        uiCostBest = uiCostTemp;
5597
0
        uiBitsBest = uiBitsTemp;
5598
0
        memcpy(acMv, acMvTemp, sizeof(Mv) * 3);
5599
0
        mvpIdx = bestMvpIdx;
5600
0
      }
5601
0
      else if(m_pcEncCfg->m_Affine > 1)
5602
0
      {
5603
0
        break;
5604
0
      }
5605
0
    }
5606
0
  }
5607
5608
0
  auto checkCPMVRdCost = [&](Mv ctrlPtMv[3])
5609
0
  {
5610
0
    if( !m_pcEncCfg->m_ifpLines || xIsAffineMvInRangeFPP( cu, ctrlPtMv, m_pcEncCfg->m_ifpLines ) )
5611
0
    {
5612
0
      xPredAffineBlk(COMP_Y, cu, refPic, ctrlPtMv, predBuf, false, cu.slice->clpRngs[COMP_Y], refPicList);
5613
      // get error
5614
0
      Distortion costTemp = m_pcRdCost->getDistPart(predBuf.Y(), pBuf->Y(), cu.cs->sps->bitDepths[CH_L], COMP_Y, DF_HAD);
5615
      // get cost with mv
5616
0
      m_pcRdCost->setCostScale(0);
5617
0
      uint32_t bitsTemp = ruiBits;
5618
0
      bitsTemp += xCalcAffineMVBits(cu, ctrlPtMv, acMvPred);
5619
0
      costTemp = (Distortion)(floor(fWeight * (double)costTemp) + (double)m_pcRdCost->getCost(bitsTemp));
5620
      // store best cost and mv
5621
0
      if (costTemp < uiCostBest)
5622
0
      {
5623
0
        uiCostBest = costTemp;
5624
0
        uiBitsBest = bitsTemp;
5625
0
        ::memcpy(acMv, ctrlPtMv, sizeof(Mv) * 3);
5626
0
      }
5627
0
    }
5628
0
  };
5629
5630
0
  const uint32_t mvShiftTable[3] = { MV_PRECISION_INTERNAL - MV_PRECISION_QUARTER, MV_PRECISION_INTERNAL - MV_PRECISION_INTERNAL, MV_PRECISION_INTERNAL - MV_PRECISION_INT };
5631
0
  const uint32_t mvShift = mvShiftTable[cu.imv];
5632
0
  if (uiCostBest <= AFFINE_ME_LIST_MVP_TH*m_hevcCost)
5633
0
  {
5634
0
    Mv mvPredTmp[3] = { acMvPred[0], acMvPred[1], acMvPred[2] };
5635
0
    Mv mvME[3];
5636
0
    ::memcpy(mvME, acMv, sizeof(Mv) * 3);
5637
0
    Mv dMv = mvME[0] - mvPredTmp[0];
5638
5639
0
    for (int j = 0; j < mvNum; j++)
5640
0
    {
5641
0
      if ((!j && mvME[j] != mvPredTmp[j]) || (j && mvME[j] != (mvPredTmp[j] + dMv)))
5642
0
      {
5643
0
        ::memcpy(acMvTemp, mvME, sizeof(Mv) * 3);
5644
0
        acMvTemp[j] = mvPredTmp[j];
5645
5646
0
        if (j)
5647
0
          acMvTemp[j] += dMv;
5648
5649
0
        checkCPMVRdCost(acMvTemp);
5650
0
      }
5651
0
    }
5652
5653
    //keep the rotation/zoom;
5654
0
    if (mvME[0] != mvPredTmp[0])
5655
0
    {
5656
0
      ::memcpy(acMvTemp, mvME, sizeof(Mv) * 3);
5657
0
      for (int i = 1; i < mvNum; i++)
5658
0
      {
5659
0
        acMvTemp[i] -= dMv;
5660
0
      }
5661
0
      acMvTemp[0] = mvPredTmp[0];
5662
5663
0
      checkCPMVRdCost(acMvTemp);
5664
0
    }
5665
5666
    //keep the translation;
5667
0
    if (cu.affineType == AFFINEMODEL_6PARAM && mvME[1] != (mvPredTmp[1] + dMv) && mvME[2] != (mvPredTmp[2] + dMv))
5668
0
    {
5669
0
      ::memcpy(acMvTemp, mvME, sizeof(Mv) * 3);
5670
5671
0
      acMvTemp[1] = mvPredTmp[1] + dMv;
5672
0
      acMvTemp[2] = mvPredTmp[2] + dMv;
5673
5674
0
      checkCPMVRdCost(acMvTemp);
5675
0
    }
5676
5677
    // 8 nearest neighbor search
5678
0
    int testPos[8][2] = { { -1, 0 },{ 0, -1 },{ 0, 1 },{ 1, 0 },{ -1, -1 },{ -1, 1 },{ 1, 1 },{ 1, -1 } };
5679
0
    const int maxSearchRound = 3;
5680
5681
0
    for (int rnd = 0; rnd < maxSearchRound; rnd++)
5682
0
    {
5683
0
      bool modelChange = false;
5684
      //search the model parameters with finear granularity;
5685
0
      for (int j = 0; j < mvNum; j++)
5686
0
      {
5687
0
        bool loopChange = false;
5688
0
        for (int iter = 0; iter < 2; iter++)
5689
0
        {
5690
0
          if (iter == 1 && !loopChange)
5691
0
          {
5692
0
            break;
5693
0
          }
5694
0
          Mv centerMv[3];
5695
0
          memcpy(centerMv, acMv, sizeof(Mv) * 3);
5696
0
          memcpy(acMvTemp, acMv, sizeof(Mv) * 3);
5697
5698
0
          for (int i = ((iter == 0) ? 0 : 4); i < ((iter == 0) ? 4 : 8); i++)
5699
0
          {
5700
0
            acMvTemp[j].set(centerMv[j].hor + (testPos[i][0] * (1 << mvShift)), centerMv[j].ver + (testPos[i][1] * (1 << mvShift)));
5701
0
            clipMv(acMvTemp[j], cu.lumaPos(), cu.lumaSize(), *cu.cs->pcv);
5702
5703
0
            if( !m_pcEncCfg->m_ifpLines || xIsAffineMvInRangeFPP( cu, acMvTemp, m_pcEncCfg->m_ifpLines ) )
5704
0
            {
5705
0
              xPredAffineBlk(COMP_Y, cu, refPic, acMvTemp, predBuf, false, cu.slice->clpRngs[COMP_Y], refPicList);
5706
5707
0
              Distortion costTemp = m_pcRdCost->getDistPart(predBuf.Y(), pBuf->Y(), cu.cs->sps->bitDepths[CH_L], COMP_Y, DF_HAD);
5708
0
              uint32_t bitsTemp = ruiBits;
5709
0
              bitsTemp += xCalcAffineMVBits(cu, acMvTemp, acMvPred);
5710
0
              costTemp = (Distortion)(floor(fWeight * (double)costTemp) + (double)m_pcRdCost->getCost(bitsTemp));
5711
5712
0
              if (costTemp < uiCostBest)
5713
0
              {
5714
0
                uiCostBest = costTemp;
5715
0
                uiBitsBest = bitsTemp;
5716
0
                ::memcpy(acMv, acMvTemp, sizeof(Mv) * 3);
5717
0
                modelChange = true;
5718
0
                loopChange = true;
5719
0
              }
5720
0
            }
5721
0
          }
5722
0
        }
5723
0
      }
5724
5725
0
      if (!modelChange)
5726
0
      {
5727
0
        break;
5728
0
      }
5729
0
    }
5730
0
  }
5731
0
  acMvPred[0] = aamvpi.mvCandLT[mvpIdx];
5732
0
  acMvPred[1] = aamvpi.mvCandRT[mvpIdx];
5733
0
  acMvPred[2] = aamvpi.mvCandLB[mvpIdx];
5734
5735
  // free buffer
5736
0
  for (int i = 0; i<iParaNum; i++)
5737
0
    delete[]pdEqualCoeff[i];
5738
0
  delete[]pdEqualCoeff;
5739
5740
0
  ruiBits = uiBitsBest;
5741
0
  ruiCost = uiCostBest;
5742
0
  DTRACE(g_trace_ctx, D_COMMON, " (%d) uiBitsBest=%d, uiCostBest=%d\n", DTRACE_GET_COUNTER(g_trace_ctx, D_COMMON), uiBitsBest, uiCostBest);
5743
0
}
5744
5745
bool InterSearch::xEstimateAffineAMVP(CodingUnit& cu, AffineAMVPInfo& affineAMVPInfo, CPelUnitBuf& origBuf, RefPicList refPicList, int iRefIdx, Mv acMvPred[3], Distortion& distBiP)
5746
0
{
5747
0
  Mv         bestMvLT, bestMvRT, bestMvLB;
5748
0
  int        iBestIdx = 0;
5749
0
  Distortion uiBestCost = MAX_DISTORTION;
5750
5751
  // Fill the MV Candidates
5752
0
  CU::fillAffineMvpCand(cu, refPicList, iRefIdx, affineAMVPInfo);
5753
0
  CHECK(affineAMVPInfo.numCand == 0, "Assertion failed.");
5754
5755
0
  PelUnitBuf predBuf = m_tmpStorageLCU.getCompactBuf( cu );
5756
5757
0
  bool stop_check = false;
5758
0
  if (affineAMVPInfo.mvCandLT[0] == affineAMVPInfo.mvCandLT[1])
5759
0
  {
5760
0
    if ((affineAMVPInfo.mvCandRT[0] == affineAMVPInfo.mvCandRT[1]) && (affineAMVPInfo.mvCandLB[0] == affineAMVPInfo.mvCandLB[1]))
5761
0
    {
5762
0
      stop_check = true;
5763
0
    }
5764
0
  }
5765
5766
  // initialize Mvp index & Mvp
5767
0
  iBestIdx = -1;
5768
0
  for (int i = 0; i < affineAMVPInfo.numCand; i++)
5769
0
  {
5770
0
    if (i && stop_check)
5771
0
    {
5772
0
      continue;
5773
0
    }
5774
0
    Mv mv[3] = { affineAMVPInfo.mvCandLT[i], affineAMVPInfo.mvCandRT[i], affineAMVPInfo.mvCandLB[i] };
5775
0
    Distortion uiTmpCost = xGetAffineTemplateCost(cu, origBuf, predBuf, mv, i, AMVP_MAX_NUM_CANDS, refPicList, iRefIdx);
5776
5777
0
    if (uiBestCost > uiTmpCost)
5778
0
    {
5779
0
      uiBestCost = uiTmpCost;
5780
0
      bestMvLT = affineAMVPInfo.mvCandLT[i];
5781
0
      bestMvRT = affineAMVPInfo.mvCandRT[i];
5782
0
      bestMvLB = affineAMVPInfo.mvCandLB[i];
5783
0
      iBestIdx = i;
5784
0
      distBiP  = uiTmpCost;
5785
0
    }
5786
0
  }
5787
5788
0
  if( iBestIdx < 0 )
5789
0
    return false;
5790
5791
  // Setting Best MVP
5792
0
  acMvPred[0] = bestMvLT;
5793
0
  acMvPred[1] = bestMvRT;
5794
0
  acMvPred[2] = bestMvLB;
5795
5796
0
  cu.mvpIdx[refPicList] = iBestIdx;
5797
0
  cu.mvpNum[refPicList] = affineAMVPInfo.numCand;
5798
0
  DTRACE(g_trace_ctx, D_COMMON, "#estAffi=%d \n", affineAMVPInfo.numCand);
5799
0
  return true;
5800
0
}
5801
5802
void InterSearch::xCopyAffineAMVPInfo(AffineAMVPInfo& src, AffineAMVPInfo& dst)
5803
0
{
5804
0
  dst.numCand = src.numCand;
5805
0
  DTRACE(g_trace_ctx, D_COMMON, " (%d) #copyAffi=%d \n", DTRACE_GET_COUNTER(g_trace_ctx, D_COMMON), src.numCand);
5806
0
  ::memcpy(dst.mvCandLT, src.mvCandLT, sizeof(Mv)*src.numCand);
5807
0
  ::memcpy(dst.mvCandRT, src.mvCandRT, sizeof(Mv)*src.numCand);
5808
0
  ::memcpy(dst.mvCandLB, src.mvCandLB, sizeof(Mv)*src.numCand);
5809
0
}
5810
5811
uint32_t InterSearch::xCalcAffineMVBits(CodingUnit& cu, Mv acMvTemp[3], Mv acMvPred[3])
5812
0
{
5813
0
  int mvNum = cu.affineType ? 3 : 2;
5814
0
  m_pcRdCost->setCostScale(0);
5815
0
  uint32_t bitsTemp = 0;
5816
5817
0
  for (int verIdx = 0; verIdx < mvNum; verIdx++)
5818
0
  {
5819
0
    Mv pred = verIdx == 0 ? acMvPred[verIdx] : acMvPred[verIdx] + acMvTemp[0] - acMvPred[0];
5820
0
    pred.changeAffinePrecInternal2Amvr(cu.imv);
5821
0
    m_pcRdCost->setPredictor(pred);
5822
0
    Mv mv = acMvTemp[verIdx];
5823
0
    mv.changeAffinePrecInternal2Amvr(cu.imv);
5824
5825
0
    bitsTemp += m_pcRdCost->getBitsOfVectorWithPredictor(mv.hor, mv.ver, 0);
5826
0
  }
5827
5828
0
  return bitsTemp;
5829
0
}
5830
5831
5832
//! set adaptive search range based on poc difference
5833
void InterSearch::setSearchRange( const Slice* slice, const VVEncCfg& encCfg )
5834
0
{
5835
0
  if( !encCfg.m_bUseASR || slice->isIRAP() )
5836
0
  {
5837
0
    return;
5838
0
  }
5839
5840
0
  int iCurrPOC = slice->poc;
5841
0
  int iRefPOC;
5842
0
  int iGOPSize = encCfg.m_GOPSize;
5843
0
  int iOffset = (iGOPSize >> 1);
5844
0
  int iMaxSR = encCfg.m_SearchRange;
5845
0
  int iNumPredDir = slice->isInterP() ? 1 : 2;
5846
5847
0
  for (int iDir = 0; iDir < iNumPredDir; iDir++)
5848
0
  {
5849
0
    RefPicList  e = ( iDir ? REF_PIC_LIST_1 : REF_PIC_LIST_0 );
5850
0
    for (int iRefIdx = 0; iRefIdx < slice->numRefIdx[e]; iRefIdx++)
5851
0
    {
5852
0
      iRefPOC = slice->getRefPic(e, iRefIdx)->getPOC();
5853
0
      int newSearchRange = Clip3(encCfg.m_minSearchWindow, iMaxSR, (iMaxSR*ADAPT_SR_SCALE*abs(iCurrPOC - iRefPOC)+iOffset)/iGOPSize);
5854
0
      m_aaiAdaptSR[iDir][iRefIdx] = newSearchRange;
5855
0
    }
5856
0
  }
5857
0
}
5858
5859
void InterSearch::xIBCSearchMVCandUpdate(Distortion  sad, int x, int y, Distortion* sadBestCand, Mv* cMVCand)
5860
0
{
5861
0
  int j = CHROMA_REFINEMENT_CANDIDATES - 1;
5862
5863
0
  if (sad < sadBestCand[CHROMA_REFINEMENT_CANDIDATES - 1])
5864
0
  {
5865
0
    for (int t = CHROMA_REFINEMENT_CANDIDATES - 1; t >= 0; t--)
5866
0
    {
5867
0
      if (sad < sadBestCand[t])
5868
0
        j = t;
5869
0
    }
5870
5871
0
    for (int k = CHROMA_REFINEMENT_CANDIDATES - 1; k > j; k--)
5872
0
    {
5873
0
      sadBestCand[k] = sadBestCand[k - 1];
5874
5875
0
      cMVCand[k].set(cMVCand[k - 1].hor, cMVCand[k - 1].ver);
5876
0
    }
5877
0
    sadBestCand[j] = sad;
5878
0
    cMVCand[j].set(x, y);
5879
0
  }
5880
0
}
5881
5882
int InterSearch::xIBCSearchMVChromaRefine(CodingUnit& cu,
5883
  int         roiWidth,
5884
  int         roiHeight,
5885
  int         cuPelX,
5886
  int         cuPelY,
5887
  Distortion* sadBestCand,
5888
  Mv* cMVCand
5889
5890
)
5891
0
{
5892
0
  if ((!isChromaEnabled(cu.chromaFormat)) || (!cu.Cb().valid()))
5893
0
  {
5894
0
    return 0;
5895
0
  }
5896
5897
0
  int bestCandIdx = 0;
5898
0
  Distortion  sadBest = std::numeric_limits<Distortion>::max();
5899
0
  Distortion  tempSad;
5900
5901
0
  Pel* pRef;
5902
0
  Pel* pOrg;
5903
0
  int refStride, orgStride;
5904
0
  int width, height;
5905
5906
0
  int picWidth = cu.cs->slice->pps->picWidthInLumaSamples;
5907
0
  int picHeight = cu.cs->slice->pps->picHeightInLumaSamples;
5908
5909
0
  UnitArea allCompBlocks(cu.chromaFormat, (Area)cu.block(COMP_Y));
5910
0
  for (int cand = 0; cand < CHROMA_REFINEMENT_CANDIDATES; cand++)
5911
0
  {
5912
0
    if (sadBestCand[cand] == std::numeric_limits<Distortion>::max())
5913
0
    {
5914
0
      continue;
5915
0
    }
5916
5917
0
    if ((!cMVCand[cand].hor) && (!cMVCand[cand].ver))
5918
0
      continue;
5919
5920
0
    if (((int)(cuPelY + cMVCand[cand].ver + roiHeight) >= picHeight) || ((cuPelY + cMVCand[cand].ver) < 0))
5921
0
      continue;
5922
5923
0
    if (((int)(cuPelX + cMVCand[cand].hor + roiWidth) >= picWidth) || ((cuPelX + cMVCand[cand].hor) < 0))
5924
0
      continue;
5925
5926
0
    tempSad = sadBestCand[cand];
5927
5928
0
    cu.mv[0][0] = cMVCand[cand];
5929
0
    cu.mv[0][0].changePrecision(MV_PRECISION_INT, MV_PRECISION_INTERNAL);
5930
0
    cu.interDir = 1;
5931
0
    cu.refIdx[0] = cu.cs->slice->numRefIdx[REF_PIC_LIST_0]; // last idx in the list
5932
5933
0
    PelUnitBuf predBufTmp = m_tmpPredStorage[REF_PIC_LIST_0].getCompactBuf(cu);
5934
0
    motionCompensation(cu, predBufTmp, REF_PIC_LIST_0);
5935
5936
0
    for (unsigned int ch = COMP_Cb; ch < getNumberValidComponents(cu.cs->sps->chromaFormatIdc); ch++)
5937
0
    {
5938
0
      width = roiWidth >> getComponentScaleX(ComponentID(ch), cu.chromaFormat);
5939
0
      height = roiHeight >> getComponentScaleY(ComponentID(ch), cu.chromaFormat);
5940
5941
0
      PelUnitBuf origBuf = cu.cs->getOrgBuf(allCompBlocks);
5942
0
      PelUnitBuf* pBuf = &origBuf;
5943
0
      CPelBuf  tmpPattern = pBuf->get(ComponentID(ch));
5944
0
      pOrg = (Pel*)tmpPattern.buf;
5945
5946
0
      Picture* refPic = cu.slice->pic;
5947
0
      const CPelBuf refBuf = refPic->getRecoBuf(allCompBlocks.blocks[ComponentID(ch)]);
5948
0
      pRef = (Pel*)refBuf.buf;
5949
5950
0
      refStride = refBuf.stride;
5951
0
      orgStride = tmpPattern.stride;
5952
5953
      //ComponentID compID = (ComponentID)ch;
5954
0
      PelUnitBuf* pBufRef = &predBufTmp;
5955
0
      CPelBuf  tmpPatternRef = pBufRef->get(ComponentID(ch));
5956
0
      pRef = (Pel*)tmpPatternRef.buf;
5957
0
      refStride = tmpPatternRef.stride;
5958
5959
5960
0
      for (int row = 0; row < height; row++)
5961
0
      {
5962
0
        for (int col = 0; col < width; col++)
5963
0
        {
5964
0
          tempSad += ((abs(pRef[col] - pOrg[col])) >> (cu.cs->sps->bitDepths[CH_C] - 8));
5965
0
        }
5966
0
        pRef += refStride;
5967
0
        pOrg += orgStride;
5968
0
      }
5969
0
    }
5970
5971
0
    if (tempSad < sadBest)
5972
0
    {
5973
0
      sadBest = tempSad;
5974
0
      bestCandIdx = cand;
5975
0
    }
5976
0
  }
5977
5978
0
  return bestCandIdx;
5979
0
}
5980
static unsigned int xMergeCandLists(Mv* dst, unsigned int dn, unsigned int dstTotalLength, Mv* src, unsigned int sn)
5981
0
{
5982
0
  for (unsigned int cand = 0; cand < sn && dn < dstTotalLength; cand++)
5983
0
  {
5984
0
    if (src[cand] == Mv())
5985
0
    {
5986
0
      continue;
5987
0
    }
5988
0
    bool found = false;
5989
0
    for (int j = 0; j < dn; j++)
5990
0
    {
5991
0
      if (src[cand] == dst[j])
5992
0
      {
5993
0
        found = true;
5994
0
        break;
5995
0
      }
5996
0
    }
5997
5998
0
    if (!found)
5999
0
    {
6000
0
      dst[dn] = src[cand];
6001
0
      dn++;
6002
0
    }
6003
0
  }
6004
6005
0
  return dn;
6006
0
}
6007
void InterSearch::xIntraPatternSearchIBC(CodingUnit& cu, TZSearchStruct& cStruct, Mv& rcMv, Distortion& ruiCost, Mv* pcMvSrchRngLT, Mv* pcMvSrchRngRB, Mv* pcMvPred)
6008
0
{
6009
0
  const int   srchRngHorLeft = pcMvSrchRngLT->hor;
6010
0
  const int   srchRngHorRight = pcMvSrchRngRB->hor;
6011
0
  const int   srchRngVerTop = pcMvSrchRngLT->ver;
6012
0
  const int   srchRngVerBottom = pcMvSrchRngRB->ver;
6013
6014
0
  const unsigned int  lcuWidth = cu.cs->slice->sps->CTUSize;
6015
0
  const int   puPelOffsetX = 0;
6016
0
  const int   puPelOffsetY = 0;
6017
0
  const int   cuPelX = cu.Y().x;
6018
0
  const int   cuPelY = cu.Y().y;
6019
6020
0
  int          roiWidth = cu.lwidth();
6021
0
  int          roiHeight = cu.lheight();
6022
6023
0
  Distortion  sad;
6024
0
  Distortion  sadBest = std::numeric_limits<Distortion>::max();
6025
0
  int         bestX = 0;
6026
0
  int         bestY = 0;
6027
6028
0
  const Pel* piRefSrch = cStruct.piRefY; 
6029
6030
0
  int         bestCandIdx = 0;
6031
6032
0
  Distortion  sadBestCand[CHROMA_REFINEMENT_CANDIDATES];
6033
0
  Mv          cMVCand[CHROMA_REFINEMENT_CANDIDATES];
6034
6035
0
  const bool  useAmvr = cu.cs->sps->AMVR;
6036
6037
6038
0
  for (int cand = 0; cand < CHROMA_REFINEMENT_CANDIDATES; cand++)
6039
0
  {
6040
0
    sadBestCand[cand] = std::numeric_limits<Distortion>::max();
6041
0
    cMVCand[cand].set(0, 0);
6042
0
  }
6043
6044
0
  m_pcRdCost->setDistParam(m_cDistParam, *cStruct.pcPatternKey, cStruct.piRefY, cStruct.iRefStride, m_lumaClpRng.bd, COMP_Y, cStruct.subShiftMode);
6045
6046
0
  const int picWidth = cu.cs->slice->pps->picWidthInLumaSamples;
6047
0
  const int picHeight = cu.cs->slice->pps->picHeightInLumaSamples;
6048
6049
6050
0
  {
6051
0
    m_cDistParam.subShift = 0;
6052
0
    Distortion tempSadBest = 0;
6053
6054
0
    int srLeft = srchRngHorLeft, srRight = srchRngHorRight, srTop = srchRngVerTop, srBottom = srchRngVerBottom;
6055
0
    m_numBVs = 0;
6056
0
    m_numBVs = xMergeCandLists(m_acBVs, m_numBVs, (2 * IBC_NUM_CANDIDATES), m_defaultCachedBvs->m_bvCands, m_defaultCachedBvs->currCnt);
6057
6058
0
    Mv cMvPredEncOnly[IBC_NUM_CANDIDATES];
6059
0
    int nbPreds = 0;
6060
0
    CU::getIbcMVPsEncOnly(cu, cMvPredEncOnly, nbPreds);
6061
0
    m_numBVs = xMergeCandLists(m_acBVs, m_numBVs, (2 * IBC_NUM_CANDIDATES), cMvPredEncOnly, nbPreds);
6062
6063
0
    for (unsigned int cand = 0; cand < m_numBVs; cand++)
6064
0
    {
6065
0
      int xPred = m_acBVs[cand].hor;
6066
0
      int yPred = m_acBVs[cand].ver;
6067
6068
0
      if (!(xPred == 0 && yPred == 0)
6069
0
        && !((yPred < srTop) || (yPred > srBottom))
6070
0
        && !((xPred < srLeft) || (xPred > srRight)))
6071
0
      {
6072
0
        bool validCand = searchBvIBC(cu, cuPelX, cuPelY, roiWidth, roiHeight, picWidth, picHeight, xPred, yPred, lcuWidth);
6073
6074
0
        if (validCand)
6075
0
        {
6076
0
          sad = m_pcRdCost->getBvCostMultiplePredsIBC(xPred, yPred, useAmvr);
6077
0
          m_cDistParam.cur.buf = piRefSrch + cStruct.iRefStride * yPred + xPred;
6078
0
          sad += m_cDistParam.distFunc(m_cDistParam);
6079
6080
0
          xIBCSearchMVCandUpdate(sad, xPred, yPred, sadBestCand, cMVCand);
6081
0
        }
6082
0
      }
6083
0
    }
6084
6085
0
    bestX = cMVCand[0].hor;
6086
0
    bestY = cMVCand[0].ver;
6087
0
    rcMv.set(bestX, bestY);
6088
0
    sadBest = sadBestCand[0];
6089
6090
0
    const int boundY = (0 - roiHeight - puPelOffsetY);
6091
0
    for (int y = std::max(srchRngVerTop, 0 - cuPelY); y <= boundY; ++y)
6092
0
    {
6093
0
      if (!searchBvIBC(cu, cuPelX, cuPelY, roiWidth, roiHeight, picWidth, picHeight, 0, y, lcuWidth))
6094
0
      {
6095
0
        continue;
6096
0
      }
6097
6098
0
      sad = m_pcRdCost->getBvCostMultiplePredsIBC(0, y, useAmvr);
6099
0
      m_cDistParam.cur.buf = piRefSrch + cStruct.iRefStride * y;
6100
0
      sad += m_cDistParam.distFunc(m_cDistParam);
6101
6102
0
      xIBCSearchMVCandUpdate(sad, 0, y, sadBestCand, cMVCand);
6103
0
      tempSadBest = sadBestCand[0];
6104
0
      if (sadBestCand[0] <= 3)
6105
0
      {
6106
0
        bestX = cMVCand[0].hor;
6107
0
        bestY = cMVCand[0].ver;
6108
0
        sadBest = sadBestCand[0];
6109
0
        rcMv.set(bestX, bestY);
6110
0
        ruiCost = sadBest;
6111
0
        goto end;
6112
0
      }
6113
0
    }
6114
6115
0
    const int boundX = std::max(srchRngHorLeft, -cuPelX);
6116
0
    for (int x = 0 - roiWidth - puPelOffsetX; x >= boundX; --x)
6117
0
    {
6118
0
      if (!searchBvIBC(cu, cuPelX, cuPelY, roiWidth, roiHeight, picWidth, picHeight, x, 0, lcuWidth))
6119
0
      {
6120
0
        continue;
6121
0
      }
6122
6123
0
      sad = m_pcRdCost->getBvCostMultiplePredsIBC(x, 0, useAmvr);
6124
0
      m_cDistParam.cur.buf = piRefSrch + x;
6125
0
      sad += m_cDistParam.distFunc(m_cDistParam);
6126
6127
6128
0
      xIBCSearchMVCandUpdate(sad, x, 0, sadBestCand, cMVCand);
6129
0
      tempSadBest = sadBestCand[0];
6130
0
      if (sadBestCand[0] <= 3)
6131
0
      {
6132
0
        bestX = cMVCand[0].hor;
6133
0
        bestY = cMVCand[0].ver;
6134
0
        sadBest = sadBestCand[0];
6135
0
        rcMv.set(bestX, bestY);
6136
0
        ruiCost = sadBest;
6137
0
        goto end;
6138
0
      }
6139
0
    }
6140
6141
0
    bestX = cMVCand[0].hor;
6142
0
    bestY = cMVCand[0].ver;
6143
0
    sadBest = sadBestCand[0];
6144
0
    if ((!bestX && !bestY) || (sadBest - m_pcRdCost->getBvCostMultiplePredsIBC(bestX, bestY, useAmvr) <= 32))
6145
0
    {
6146
      //chroma refine
6147
0
      bestCandIdx = xIBCSearchMVChromaRefine(cu, roiWidth, roiHeight, cuPelX, cuPelY, sadBestCand, cMVCand);
6148
0
      bestX = cMVCand[bestCandIdx].hor;
6149
0
      bestY = cMVCand[bestCandIdx].ver;
6150
0
      sadBest = sadBestCand[bestCandIdx];
6151
0
      rcMv.set(bestX, bestY);
6152
0
      ruiCost = sadBest;
6153
0
      goto end;
6154
0
    }
6155
6156
0
    if (cu.lwidth() < 16 && cu.lheight() < 16)
6157
0
    {
6158
0
      int stepS = 2;
6159
0
      if (m_pcEncCfg->m_IBCFastMethod > 2)
6160
0
      {
6161
0
        if (m_pcEncCfg->m_IBCFastMethod == 5)
6162
0
        {
6163
0
          stepS = 8;
6164
0
        }
6165
0
        else if ((cu.lwidth() > 4) || (cu.lheight() > 4))
6166
0
        {
6167
0
          stepS = 4;
6168
0
        }
6169
0
      }
6170
6171
0
      const int minCuLog2 = m_pcEncCfg->m_log2MinCodingBlockSize;
6172
0
      const int minCuMask = (1 << minCuLog2) - 1;
6173
0
      bool lastDec = false;
6174
6175
0
      for (int searchStep = 0; searchStep < 3; searchStep++)
6176
0
      {
6177
0
        int delaySy = searchStep ? 1 : 0;
6178
0
        int delaySx = searchStep > 1 ? 1 : 0;
6179
0
        int startY = (std::max(srchRngVerTop, -cuPelY) + delaySy);
6180
0
        int startX = (std::max(srchRngHorLeft, -cuPelX) + delaySx);
6181
0
        int endY = srchRngVerBottom;
6182
0
        int endX = srchRngHorRight;
6183
6184
0
        if (m_pcEncCfg->m_IBCFastMethod > 5)
6185
0
        {
6186
0
          startY = bestY - 4;
6187
0
          endY = bestY + 4;
6188
0
          startX = bestX - 4;
6189
0
          endX = bestX + 4;
6190
0
          stepS = 1;
6191
0
          if (searchStep)
6192
0
          {
6193
0
            break;
6194
0
          }
6195
0
        }
6196
6197
0
        for (int y = startY; y <= endY; y += stepS)
6198
0
        {
6199
0
          if ((y == 0) || ((int)(cuPelY + y + roiHeight) >= picHeight))
6200
0
            continue;
6201
0
          bool firstX = true;
6202
0
          int stepSx = searchStep ? stepS : 1;
6203
0
          for (int x = startX; x <= endX; firstX = false, x += stepSx)
6204
0
          {
6205
0
            if ((x == 0) || ((int)(cuPelX + x + roiWidth) >= picWidth))
6206
0
              continue;
6207
6208
0
            bool isSameAsLast = !firstX && ((cuPelX + x) & minCuMask) > 1;
6209
0
            if (searchStep || (m_pcEncCfg->m_IBCFastMethod > 5))
6210
0
            {
6211
0
              if (!searchBvIBC(cu, cuPelX, cuPelY, roiWidth, roiHeight, picWidth, picHeight, x, y, lcuWidth))
6212
0
              {
6213
0
                continue;
6214
0
              }
6215
0
            }
6216
0
            else if ((isSameAsLast && !lastDec) || (!isSameAsLast && !searchBvIBC(cu, cuPelX, cuPelY, roiWidth, roiHeight, picWidth, picHeight, x, y, lcuWidth)))
6217
0
            {
6218
0
              lastDec = false;
6219
0
              continue;
6220
0
            }
6221
0
            lastDec = true;
6222
6223
0
            sad = m_pcRdCost->getBvCostMultiplePredsIBC(x, y, useAmvr);
6224
0
            m_cDistParam.cur.buf = piRefSrch + cStruct.iRefStride * y + x;
6225
0
            sad += m_cDistParam.distFunc(m_cDistParam);
6226
6227
0
            xIBCSearchMVCandUpdate(sad, x, y, sadBestCand, cMVCand);
6228
6229
6230
0
            if (searchStep && sadBestCand[0] <= 5)
6231
0
            {
6232
              //chroma refine & return
6233
0
              bestCandIdx = xIBCSearchMVChromaRefine(cu, roiWidth, roiHeight, cuPelX, cuPelY, sadBestCand, cMVCand);
6234
0
              bestX = cMVCand[bestCandIdx].hor;
6235
0
              bestY = cMVCand[bestCandIdx].ver;
6236
0
              sadBest = sadBestCand[bestCandIdx];
6237
0
              rcMv.set(bestX, bestY);
6238
0
              ruiCost = sadBest;
6239
0
              goto end;
6240
0
            }
6241
0
          }
6242
0
        }
6243
6244
0
        if ((searchStep < 2) && (m_pcEncCfg->m_IBCFastMethod < 6))
6245
0
        {
6246
0
          if ((m_pcEncCfg->m_IBCFastMethod > 2) && (m_pcEncCfg->m_IBCFastMethod < 5))
6247
0
          {
6248
0
            if ((bestX == cMVCand[0].hor) && (bestY == cMVCand[0].ver))
6249
0
            {
6250
0
              sadBest = sadBestCand[bestCandIdx];
6251
0
              rcMv.set(bestX, bestY);
6252
0
              ruiCost = sadBest;
6253
0
              goto end;
6254
0
            }
6255
0
          }
6256
0
          bestX = cMVCand[0].hor;
6257
0
          bestY = cMVCand[0].ver;
6258
0
          sadBest = sadBestCand[0];
6259
6260
0
          int StopSearch = searchStep ? 32 : 16;
6261
0
          if ((searchStep && (sadBest >= tempSadBest)) || (sadBest - m_pcRdCost->getBvCostMultiplePredsIBC(bestX, bestY, useAmvr) <= StopSearch))
6262
0
          {
6263
            //chroma refine
6264
0
            bestCandIdx = xIBCSearchMVChromaRefine(cu, roiWidth, roiHeight, cuPelX, cuPelY, sadBestCand, cMVCand);
6265
6266
0
            bestX = cMVCand[bestCandIdx].hor;
6267
0
            bestY = cMVCand[bestCandIdx].ver;
6268
0
            sadBest = sadBestCand[bestCandIdx];
6269
0
            rcMv.set(bestX, bestY);
6270
0
            ruiCost = sadBest;
6271
0
            goto end;
6272
0
          }
6273
0
        }
6274
0
      }
6275
0
    }
6276
0
  }
6277
6278
0
  bestCandIdx = xIBCSearchMVChromaRefine(cu, roiWidth, roiHeight, cuPelX, cuPelY, sadBestCand, cMVCand);
6279
6280
0
  bestX = cMVCand[bestCandIdx].hor;
6281
0
  bestY = cMVCand[bestCandIdx].ver;
6282
0
  sadBest = sadBestCand[bestCandIdx];
6283
0
  rcMv.set(bestX, bestY);
6284
0
  ruiCost = sadBest;
6285
6286
0
end:
6287
0
  m_numBVs = 0;
6288
0
  m_numBVs = xMergeCandLists(m_acBVs, m_numBVs, (2 * IBC_NUM_CANDIDATES), m_defaultCachedBvs->m_bvCands, m_defaultCachedBvs->currCnt);
6289
6290
0
  m_defaultCachedBvs->currCnt = 0;
6291
0
  m_defaultCachedBvs->currCnt = xMergeCandLists(m_defaultCachedBvs->m_bvCands, m_defaultCachedBvs->currCnt, IBC_NUM_CANDIDATES, cMVCand, CHROMA_REFINEMENT_CANDIDATES);
6292
0
  m_defaultCachedBvs->currCnt = xMergeCandLists(m_defaultCachedBvs->m_bvCands, m_defaultCachedBvs->currCnt, IBC_NUM_CANDIDATES, m_acBVs, m_numBVs);
6293
6294
0
  for (unsigned int cand = 0; cand < CHROMA_REFINEMENT_CANDIDATES; cand++)
6295
0
  {
6296
0
    if (cMVCand[cand].hor == 0 && cMVCand[cand].ver == 0)
6297
0
    {
6298
0
      continue;
6299
0
    }
6300
0
    m_ctuRecord[cu.lumaPos()][cu.lumaSize()].bvRecord[cMVCand[cand]] = sadBestCand[cand];
6301
0
  }
6302
6303
0
  return;
6304
0
}
6305
6306
6307
6308
// based on xMotionEstimation
6309
void InterSearch::xIBCEstimation(CodingUnit& cu, PelUnitBuf& origBuf, Mv* pcMvPred, Mv& rcMv, Distortion& ruiCost )
6310
0
{
6311
0
  const int iPicWidth = cu.cs->slice->pps->picWidthInLumaSamples;
6312
0
  const int iPicHeight = cu.cs->slice->pps->picHeightInLumaSamples;
6313
0
  const unsigned int  lcuWidth = cu.cs->slice->sps->CTUSize;
6314
0
  const int           cuPelX = cu.Y().x;
6315
0
  const int           cuPelY = cu.Y().y;
6316
0
  int                 iRoiWidth = cu.lwidth();
6317
0
  int                 iRoiHeight = cu.lheight();
6318
6319
0
  PelUnitBuf* pBuf = &origBuf;
6320
6321
  //  Search key pattern initialization
6322
0
  CPelBuf  tmpPattern = pBuf->Y();
6323
0
  CPelBuf* pcPatternKey = &tmpPattern;
6324
0
  PelBuf tmpOrgLuma;
6325
0
  ReshapeData& reshapeData = cu.cs->picture->reshapeData;
6326
0
  if ((cu.cs->slice->lmcsEnabled && reshapeData.getCTUFlag()))
6327
0
  {
6328
0
    tmpOrgLuma = m_tmpStorageLCU.getCompactBuf(cu.Y());
6329
0
    tmpOrgLuma.rspSignal(tmpPattern, reshapeData.getInvLUT());
6330
0
    pcPatternKey = (CPelBuf*)&tmpOrgLuma;
6331
0
  }
6332
0
  m_lumaClpRng = cu.cs->slice->clpRngs[COMP_Y];
6333
0
  Picture* refPic = cu.slice->pic;
6334
0
  const CPelBuf refBuf = refPic->getRecoBuf(cu.blocks[COMP_Y]);
6335
6336
0
  TZSearchStruct cStruct; 
6337
0
  cStruct.pcPatternKey  = pcPatternKey;
6338
0
  cStruct.iRefStride    = refBuf.stride;
6339
0
  cStruct.piRefY        = refBuf.buf;
6340
0
  CHECK( cu.imv == IMV_HPEL, "IF_IBC" );
6341
0
  cStruct.imvShift      = cu.imv << 1;
6342
0
  cStruct.subShiftMode  = 0;
6343
0
  cStruct.uiBestSad     = MAX_DISTORTION;
6344
6345
0
  m_pcRdCost->getMotionCostIBC(0);
6346
0
  m_pcRdCost->setPredictorsIBC(pcMvPred);
6347
0
  m_pcRdCost->setCostScale(0);
6348
6349
0
  m_pcRdCost->setDistParam(m_cDistParam, *cStruct.pcPatternKey, cStruct.piRefY, cStruct.iRefStride, m_lumaClpRng.bd, COMP_Y, cStruct.subShiftMode);
6350
0
  bool buffered = false;
6351
0
  if (m_pcEncCfg->m_IBCFastMethod)// IBC_FAST_METHOD_BUFFERBV
6352
0
  {
6353
0
    ruiCost = MAX_UINT;
6354
0
    std::unordered_map<Mv, Distortion>& history = m_ctuRecord[cu.lumaPos()][cu.lumaSize()].bvRecord;
6355
0
    for (std::unordered_map<Mv, Distortion>::iterator p = history.begin(); p != history.end(); p++)
6356
0
    {
6357
0
      const Mv& bv = p->first;
6358
6359
0
      int xBv = bv.hor;
6360
0
      int yBv = bv.ver;
6361
0
      if (searchBvIBC(cu, cuPelX, cuPelY, iRoiWidth, iRoiHeight, iPicWidth, iPicHeight, xBv, yBv, lcuWidth))
6362
0
      {
6363
0
        buffered = true;
6364
0
        Distortion sad = m_pcRdCost->getBvCostMultiplePredsIBC(xBv, yBv, cu.cs->sps->AMVR);
6365
0
        m_cDistParam.cur.buf = cStruct.piRefY + cStruct.iRefStride * yBv + xBv;
6366
0
        sad += m_cDistParam.distFunc(m_cDistParam);
6367
0
        if (sad < ruiCost)
6368
0
        {
6369
0
          rcMv = bv;
6370
0
          ruiCost = sad;
6371
0
        }
6372
0
        else if (sad == ruiCost)
6373
0
        {
6374
          // stabilise the search through the unordered list
6375
0
          if (bv.hor < rcMv.hor
6376
0
            || (bv.hor == rcMv.hor && bv.ver < rcMv.ver))
6377
0
          {
6378
            // update the vector.
6379
0
            rcMv = bv;
6380
0
          }
6381
0
        }
6382
0
      }
6383
0
    }
6384
6385
0
    if (buffered)
6386
0
    {
6387
0
      Mv cMvPredEncOnly[IBC_NUM_CANDIDATES];
6388
0
      int nbPreds = 0;
6389
0
      CU::getIbcMVPsEncOnly(cu, cMvPredEncOnly, nbPreds);
6390
6391
0
      for (unsigned int cand = 0; cand < nbPreds; cand++)
6392
0
      {
6393
0
        int xPred = cMvPredEncOnly[cand].hor;
6394
0
        int yPred = cMvPredEncOnly[cand].ver;
6395
6396
0
        if (searchBvIBC(cu, cuPelX, cuPelY, iRoiWidth, iRoiHeight, iPicWidth, iPicHeight, xPred, yPred, lcuWidth))
6397
0
        {
6398
0
          Distortion sad = m_pcRdCost->getBvCostMultiplePredsIBC(xPred, yPred, cu.cs->sps->AMVR);
6399
0
          m_cDistParam.cur.buf = cStruct.piRefY + cStruct.iRefStride * yPred + xPred;
6400
0
          sad += m_cDistParam.distFunc(m_cDistParam);
6401
0
          if (sad < ruiCost)
6402
0
          {
6403
0
            rcMv.set(xPred, yPred);
6404
0
            ruiCost = sad;
6405
0
          }
6406
0
          else if (sad == ruiCost)
6407
0
          {
6408
            // stabilise the search through the unordered list
6409
0
            if (xPred < rcMv.hor
6410
0
              || (xPred == rcMv.hor && yPred < rcMv.ver))
6411
0
            {
6412
              // update the vector.
6413
0
              rcMv.set(xPred, yPred);
6414
0
            }
6415
0
          }
6416
0
          m_ctuRecord[cu.lumaPos()][cu.lumaSize()].bvRecord[Mv(xPred, yPred)] = sad;
6417
0
        }
6418
0
      }
6419
0
    }
6420
0
  }
6421
6422
0
  if (!buffered)
6423
0
  {
6424
0
    Mv        cMvSrchRngLT;
6425
0
    Mv        cMvSrchRngRB;
6426
6427
    // assume that intra BV is integer-pel precision
6428
0
    xSetIntraSearchRangeIBC(cu, cu.lwidth(), cu.lheight(), cMvSrchRngLT, cMvSrchRngRB);
6429
6430
    //  Do integer search
6431
0
    xIntraPatternSearchIBC(cu, cStruct, rcMv, ruiCost, &cMvSrchRngLT, &cMvSrchRngRB, pcMvPred);
6432
0
  }
6433
0
}
6434
// based on xSetSearchRange
6435
void InterSearch::xSetIntraSearchRangeIBC(CodingUnit& cu, int iRoiWidth, int iRoiHeight, Mv& rcMvSrchRngLT, Mv& rcMvSrchRngRB)
6436
0
{
6437
 // const SPS& sps = *cu.cs->sps;
6438
6439
0
  int srLeft, srRight, srTop, srBottom;
6440
6441
0
  const int cuPelX = cu.Y().x;
6442
0
  const int cuPelY = cu.Y().y;
6443
6444
0
  const int lcuWidth = cu.cs->slice->sps->CTUSize;
6445
0
  const int ctuSizeLog2 = floorLog2(lcuWidth);
6446
0
  int numLeftCTUs = (1 << ((7 - ctuSizeLog2) << 1)) - ((ctuSizeLog2 < 7) ? 1 : 0);
6447
6448
0
  srLeft = -(numLeftCTUs * lcuWidth + (cuPelX % lcuWidth));
6449
0
  srTop = -(cuPelY % lcuWidth);
6450
6451
0
  srRight = lcuWidth - (cuPelX % lcuWidth) - iRoiWidth;
6452
0
  srBottom = lcuWidth - (cuPelY % lcuWidth) - iRoiHeight;
6453
6454
0
  rcMvSrchRngLT.hor=srLeft;
6455
0
  rcMvSrchRngLT.ver=srTop;
6456
0
  rcMvSrchRngRB.hor=srRight;
6457
0
  rcMvSrchRngRB.ver=srBottom;
6458
6459
0
  rcMvSrchRngLT <<= 2;
6460
0
  rcMvSrchRngRB <<= 2;
6461
0
  bool temp = m_clipMvInSubPic;
6462
0
  m_clipMvInSubPic = true;
6463
0
  clipMv(rcMvSrchRngLT,cu.lumaPos(),cu.lumaSize(), *cu.cs->pcv, *cu.cs->pps, m_clipMvInSubPic);
6464
0
  clipMv(rcMvSrchRngRB, cu.lumaPos(),cu.lumaSize(), *cu.cs->pcv, * cu.cs->pps, m_clipMvInSubPic);
6465
0
  m_clipMvInSubPic = temp;
6466
0
  rcMvSrchRngLT >>= 2;
6467
0
  rcMvSrchRngRB >>= 2;
6468
0
}
6469
6470
bool InterSearch::predIBCSearch(CodingUnit& cu, Partitioner& partitioner)
6471
0
{
6472
0
  Mv           cMvSrchRngLT;
6473
0
  Mv           cMvSrchRngRB;
6474
0
  cu.imv = IMV_4PEL;
6475
0
  AMVPInfo amvpInfo4Pel;
6476
0
  CU::fillIBCMvpCand(cu, amvpInfo4Pel);
6477
6478
0
  cu.imv = IMV_OFF;// (Int)cu.cs->sps->getUseIMV(); // set as IMV=0 initially
6479
0
  Mv    cMv, cMvPred[2];
6480
0
  AMVPInfo amvpInfo;
6481
0
  CU::fillIBCMvpCand(cu, amvpInfo);
6482
  // store in full pel accuracy, shift before use in search
6483
0
  cMvPred[0] = amvpInfo.mvCand[0];
6484
0
  cMvPred[0].changePrecision(MV_PRECISION_INTERNAL, MV_PRECISION_INT);
6485
0
  cMvPred[1] = amvpInfo.mvCand[1];
6486
0
  cMvPred[1].changePrecision(MV_PRECISION_INTERNAL, MV_PRECISION_INT);
6487
6488
0
  int iBvpNum = 2;
6489
0
  int bvpIdxBest = 0;
6490
0
  cMv.setZero();
6491
0
  Distortion cost = 0;
6492
0
  if (cu.cs->sps->maxNumIBCMergeCand == 1)
6493
0
  {
6494
0
    iBvpNum = 1;
6495
0
    cMvPred[1] = cMvPred[0];
6496
0
  }
6497
6498
0
  if (cMv.hor == 0 && cMv.ver == 0)
6499
0
  {
6500
    // if hash search does not work or is not enabled
6501
0
    PelUnitBuf origBuf = cu.cs->getOrgBuf(cu);
6502
0
    xIBCEstimation(cu, origBuf, cMvPred, cMv, cost );
6503
0
  }
6504
6505
0
  if (cMv.hor == 0 && cMv.ver == 0)
6506
0
  {
6507
0
    return false;
6508
0
  }
6509
  /// ibc search
6510
  /////////////////////////////////////////////////////////
6511
0
  unsigned int bitsBVPBest, bitsBVPTemp;
6512
0
  bitsBVPBest = MAX_INT;
6513
0
  m_pcRdCost->setCostScale(0);
6514
6515
0
  for (int bvpIdxTemp = 0; bvpIdxTemp < iBvpNum; bvpIdxTemp++)
6516
0
  {
6517
0
    m_pcRdCost->setPredictor(cMvPred[bvpIdxTemp]);
6518
6519
0
    bitsBVPTemp = m_pcRdCost->getBitsOfVectorWithPredictor(cMv.hor, cMv.ver, 0);
6520
6521
0
    if (bitsBVPTemp < bitsBVPBest)
6522
0
    {
6523
0
      bitsBVPBest = bitsBVPTemp;
6524
0
      bvpIdxBest = bvpIdxTemp;
6525
6526
0
      if (cu.cs->sps->AMVR && cMv != cMvPred[bvpIdxTemp])
6527
0
        cu.imv = IMV_FPEL; // set as full-pel
6528
0
      else
6529
0
        cu.imv = IMV_OFF; // set as fractional-pel
6530
6531
0
    }
6532
6533
0
    unsigned int bitsBVPQP = MAX_UINT;
6534
6535
6536
0
    Mv mvPredQuadPel;
6537
0
    if ((cMv.hor % 4 == 0) && (cMv.ver % 4 == 0) && (cu.cs->sps->AMVR))
6538
0
    {
6539
0
      mvPredQuadPel = amvpInfo4Pel.mvCand[bvpIdxTemp];// cMvPred[bvpIdxTemp];
6540
6541
0
      mvPredQuadPel.changePrecision(MV_PRECISION_INTERNAL, MV_PRECISION_4PEL);
6542
6543
0
      m_pcRdCost->setPredictor(mvPredQuadPel);
6544
6545
0
      bitsBVPQP = m_pcRdCost->getBitsOfVectorWithPredictor(cMv.hor >> 2, cMv.ver >> 2, 0);
6546
6547
0
    }
6548
0
    mvPredQuadPel.changePrecision(MV_PRECISION_4PEL, MV_PRECISION_INT);
6549
0
    if (bitsBVPQP < bitsBVPBest && cMv != mvPredQuadPel)
6550
0
    {
6551
0
      bitsBVPBest = bitsBVPQP;
6552
0
      bvpIdxBest = bvpIdxTemp;
6553
6554
0
      if (cu.cs->sps->AMVR)
6555
0
        cu.imv = IMV_4PEL;
6556
0
    }
6557
6558
0
  }
6559
6560
0
  cMv.changePrecision( MV_PRECISION_INT, MV_PRECISION_INTERNAL );
6561
0
  cu.mv[REF_PIC_LIST_0][0] = cMv; // store in fractional pel accuracy
6562
6563
0
  cu.mvpIdx[REF_PIC_LIST_0] = bvpIdxBest;
6564
6565
0
  if (cu.imv == IMV_4PEL && cMv != amvpInfo4Pel.mvCand[bvpIdxBest])
6566
0
    cu.mvd[REF_PIC_LIST_0][0] = cMv - amvpInfo4Pel.mvCand[bvpIdxBest];
6567
0
  else
6568
0
    cu.mvd[REF_PIC_LIST_0][0] = cMv - amvpInfo.mvCand[bvpIdxBest];
6569
6570
0
  if (cu.mvd[REF_PIC_LIST_0][0] == Mv(0, 0))
6571
0
    cu.imv = IMV_OFF;
6572
0
  if (cu.imv == IMV_4PEL)
6573
0
    assert((cMv.hor % 16 == 0) && (cMv.ver % 16 == 0));
6574
0
  if (cu.cs->sps->AMVR)
6575
0
    assert(cu.imv > 0 || cu.mvd[REF_PIC_LIST_0][0] == Mv());
6576
6577
0
  cu.refIdx[REF_PIC_LIST_0] = MAX_NUM_REF;
6578
6579
0
  return true;
6580
0
}
6581
6582
6583
static inline bool isYPartBefore( SplitSeries series, const int ctuSizeLog2, const Position& refPos, const Position& pos )
6584
0
{
6585
0
#ifndef NDEBUG
6586
0
  const int refCtuX = refPos.x >> ctuSizeLog2;
6587
0
  const int refCtuY = refPos.y >> ctuSizeLog2;
6588
0
  const int posCtuX = pos.x >> ctuSizeLog2;
6589
0
  const int posCtuY = pos.y >> ctuSizeLog2;
6590
6591
0
  CHECK( refCtuX != posCtuX || refCtuY != posCtuY, "This method can only be applied for positions within the same CTU" );
6592
6593
0
#endif
6594
0
  const int ctuMask = ( 1 << ctuSizeLog2 ) - 1;
6595
6596
0
  const int refX = refPos.x & ctuMask;
6597
0
  const int refY = refPos.y & ctuMask;
6598
0
  const int posX = pos.x & ctuMask;
6599
0
  const int posY = pos.y & ctuMask;
6600
6601
0
  int x = 0, y = 0, w = 1 << ctuSizeLog2, h = 1 << ctuSizeLog2;
6602
  
6603
0
  while( true )
6604
0
  {
6605
0
    PartSplit split = PartSplit( series & SPLIT_MASK );
6606
6607
0
    switch( split )
6608
0
    {
6609
0
    case CU_QUAD_SPLIT:
6610
0
      w >>= 1;
6611
0
      if( posX >= x + w ) x += w;
6612
0
    case CU_HORZ_SPLIT:
6613
0
      h >>= 1;
6614
0
      if( posY >= y + h ) y += h;
6615
0
      break;
6616
6617
0
    case CU_VERT_SPLIT:
6618
0
      w >>= 1;
6619
0
      if( posX >= x + w ) x += w;
6620
0
      goto checkXonly;
6621
6622
0
    case CU_TRIH_SPLIT:
6623
0
      h >>= 2;
6624
0
      if( posY >= y + h ) { y += h; h <<= 1; }
6625
0
      if( posY >= y + h ) { y += h; h >>= 1; }
6626
0
      break;
6627
6628
0
    case CU_TRIV_SPLIT:
6629
0
      w >>= 2;
6630
0
      if( posX >= x + w ) { x += w; w <<= 1; }
6631
0
      if( posX >= x + w ) { x += w; w >>= 1; }
6632
0
      goto checkXonly;
6633
6634
0
    default:
6635
0
      return false;
6636
0
    }
6637
6638
0
    if( refY >= y + h ) return true;
6639
0
    else if( refY < y ) return false;
6640
6641
0
checkXonly:
6642
0
    if( refX >= x + w ) return true;
6643
0
    else if( refX < x ) return false;
6644
6645
0
    series >>= SPLIT_DMULT; continue;
6646
0
  }
6647
6648
0
  return false;
6649
0
}
6650
6651
bool InterSearch::searchBvIBC(const CodingUnit& cu, int xPos, int yPos, int width, int height, int picWidth, int picHeight, int xBv, int yBv, int ctuSize) const
6652
0
{
6653
0
  const int ctuSizeLog2 = Log2(ctuSize);
6654
6655
0
  int refRightX  = xPos + xBv + width  - 1;
6656
0
  int refBottomY = yPos + yBv + height - 1;
6657
6658
0
  int refLeftX = xPos + xBv;
6659
0
  int refTopY  = yPos + yBv;
6660
6661
0
  if ((xPos + xBv) < 0)
6662
0
  {
6663
0
    return false;
6664
0
  }
6665
0
  if (refRightX >= picWidth)
6666
0
  {
6667
0
    return false;
6668
0
  }
6669
6670
0
  if ((yPos + yBv) < 0)
6671
0
  {
6672
0
    return false;
6673
0
  }
6674
0
  if (refBottomY >= picHeight)
6675
0
  {
6676
0
    return false;
6677
0
  }
6678
0
  if ((xBv + width) > 0 && (yBv + height) > 0)
6679
0
  {
6680
0
    return false;
6681
0
  }
6682
6683
  // Don't search the above CTU row
6684
0
  if (refTopY >> ctuSizeLog2 < yPos >> ctuSizeLog2)
6685
0
    return false;
6686
6687
  // Don't search the below CTU row
6688
0
  if (refBottomY >> ctuSizeLog2 > yPos >> ctuSizeLog2)
6689
0
  {
6690
0
    return false;
6691
0
  }
6692
6693
0
  unsigned curTileIdx = cu.cs->pps->getTileIdx(cu.lumaPos());
6694
0
  unsigned refTileIdx = cu.cs->pps->getTileIdx(Position(refLeftX, refTopY));
6695
0
  if (curTileIdx != refTileIdx)
6696
0
  {
6697
0
    return false;
6698
0
  }
6699
0
  refTileIdx = cu.cs->pps->getTileIdx(Position(refLeftX, refBottomY));
6700
0
  if (curTileIdx != refTileIdx)
6701
0
  {
6702
0
    return false;
6703
0
  }
6704
0
  refTileIdx = cu.cs->pps->getTileIdx(Position(refRightX, refTopY));
6705
0
  if (curTileIdx != refTileIdx)
6706
0
  {
6707
0
    return false;
6708
0
  }
6709
0
  refTileIdx = cu.cs->pps->getTileIdx(Position(refRightX, refBottomY));
6710
0
  if (curTileIdx != refTileIdx)
6711
0
  {
6712
0
    return false;
6713
0
  }
6714
6715
0
  const Position cuPos{ xPos, yPos };
6716
6717
  //int numLeftCTUs = (1 << ((7 - ctuSizeLog2) << 1)) - ((ctuSizeLog2 < 7) ? 1 : 0);
6718
0
  static const int numLeftCTUsLUT[3] = { 15, 3, 1 };
6719
6720
  // in the same CTU line
6721
0
  const int numLeftCTUs = numLeftCTUsLUT[ctuSizeLog2 - 5];
6722
6723
0
  if( ( refRightX >> ctuSizeLog2 <= xPos >> ctuSizeLog2 ) && ( refLeftX >> ctuSizeLog2 >= ( xPos >> ctuSizeLog2 ) - numLeftCTUs ) )
6724
0
  {
6725
    // in the same CTU, or left CTU
6726
    // if part of ref block is in the left CTU, some area can be referred from the not-yet updated local CTU buffer
6727
0
    if( ( ctuSizeLog2 == 7 ) && ( ( refLeftX >> ctuSizeLog2 ) == ( ( xPos >> ctuSizeLog2 ) - 1 ) ) )
6728
0
    {
6729
      // ref block's collocated block in current CTU
6730
0
      const Position refPosCol64x64{ ( refLeftX + ctuSize ) & ~63, refTopY & ~63 };
6731
0
      if( refPosCol64x64 == Position{ xPos & ~63, yPos & ~63 } )
6732
0
        return false;
6733
6734
      //CodingUnit* curef = cu.cs->getCU(refPosCol64x64, CH_L, cu.treeType);
6735
      //bool isDecomp = curef && ((cu.cs != curef->cs) || cu.idx < curef->idx);
6736
0
      bool isDecomp = isYPartBefore( cu.splitSeries, ctuSizeLog2, cuPos, refPosCol64x64 );
6737
0
      if( isDecomp )
6738
0
      {
6739
0
        return false;
6740
0
      }
6741
0
    }
6742
0
  }
6743
0
  else
6744
0
    return false;
6745
6746
  // in the same CTU, or valid area from left CTU. Check if the reference block is already coded
6747
0
  const Position refPosBR{ refRightX, refBottomY };
6748
  //CodingUnit* curef = cu.cs->getCU(refPosBR, CH_L, cu.treeType);
6749
  //bool isDecomp = curef && ((cu.cs != curef->cs) || cu.idx < curef->idx);
6750
0
  bool isDecomp = ( ( refPosBR.x >> ctuSizeLog2 ) < ( cuPos.x >> ctuSizeLog2 ) ) || ( refRightX < xPos && refBottomY < yPos ) || isYPartBefore( cu.splitSeries, ctuSizeLog2, cuPos, refPosBR );
6751
6752
0
  return isDecomp;
6753
0
}
6754
6755
} // namespace vvenc
6756
6757
//! \}
6758