Coverage Report

Created: 2026-05-30 06:10

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/work/vvenc/source/Lib/EncoderLib/InterSearch.cpp
Line
Count
Source
1
/* -----------------------------------------------------------------------------
2
The copyright in this software is being made available under the Clear BSD
3
License, included below. No patent rights, trademark rights and/or 
4
other Intellectual Property Rights other than the copyrights concerning 
5
the Software are granted under this license.
6
7
The Clear BSD License
8
9
Copyright (c) 2019-2026, Fraunhofer-Gesellschaft zur Förderung der angewandten Forschung e.V. & The VVenC Authors.
10
All rights reserved.
11
12
Redistribution and use in source and binary forms, with or without modification,
13
are permitted (subject to the limitations in the disclaimer below) provided that
14
the following conditions are met:
15
16
     * Redistributions of source code must retain the above copyright notice,
17
     this list of conditions and the following disclaimer.
18
19
     * Redistributions in binary form must reproduce the above copyright
20
     notice, this list of conditions and the following disclaimer in the
21
     documentation and/or other materials provided with the distribution.
22
23
     * Neither the name of the copyright holder nor the names of its
24
     contributors may be used to endorse or promote products derived from this
25
     software without specific prior written permission.
26
27
NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE GRANTED BY
28
THIS LICENSE. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
29
CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
30
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
31
PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
32
CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
33
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
34
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
35
BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
36
IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
37
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
38
POSSIBILITY OF SUCH DAMAGE.
39
40
41
------------------------------------------------------------------------------------------- */
42
43
44
/** \file     EncSearch.cpp
45
 *  \brief    encoder inter search class
46
 */
47
48
#include "InterSearch.h"
49
#include "EncModeCtrl.h"
50
#include "EncLib.h"
51
#include "CommonLib/CommonDef.h"
52
#include "CommonLib/Rom.h"
53
#include "CommonLib/MotionInfo.h"
54
#include "CommonLib/Picture.h"
55
#include "CommonLib/UnitTools.h"
56
#include "CommonLib/Reshape.h"
57
#include "CommonLib/dtrace_next.h"
58
#include "CommonLib/dtrace_buffer.h"
59
#include "CommonLib/TimeProfiler.h"
60
61
#include <math.h>
62
63
 //! \ingroup EncoderLib
64
 //! \{
65
66
namespace vvenc {
67
68
static const Mv s_acMvRefineH[9] =
69
{
70
  Mv(  0,  0 ), // 0
71
  Mv(  0, -1 ), // 1
72
  Mv(  0,  1 ), // 2
73
  Mv( -1,  0 ), // 3
74
  Mv(  1,  0 ), // 4
75
  Mv( -1, -1 ), // 5
76
  Mv(  1, -1 ), // 6
77
  Mv( -1,  1 ), // 7
78
  Mv(  1,  1 )  // 8
79
};
80
81
static const Mv s_acMvRefineQ[9] =
82
{
83
  Mv(  0,  0 ), // 0
84
  Mv(  0, -1 ), // 1
85
  Mv(  0,  1 ), // 2
86
  Mv( -1, -1 ), // 5
87
  Mv(  1, -1 ), // 6
88
  Mv( -1,  0 ), // 3
89
  Mv(  1,  0 ), // 4
90
  Mv( -1,  1 ), // 7
91
  Mv(  1,  1 )  // 8
92
};
93
94
static const bool s_skipQpelPosition[ 42 ][ 9 ] =
95
{
96
  { false, true,  true,  true,  true,  true,  true,  true,  true  },
97
  { true,  true,  true,  true,  true,  false, true,  true,  true  },
98
  { true,  true,  true,  true,  true,  true,  false, true,  true  },
99
  { true,  false, true,  true,  true,  true,  true,  true,  true  },
100
  { true,  false, true,  false, true,  false, true,  true,  true  },
101
  { true,  false, true,  true,  false, true,  false, true,  true  },
102
  { true,  true,  false, true,  true,  true,  true,  true,  true  },
103
  { true,  true,  false, true,  true,  false, true,  false, true  },
104
  { true,  true,  false, true,  true,  true,  false, true,  false },
105
  { true,  true,  false, true,  true,  true,  true,  false, false },
106
  { true,  true,  true,  true,  true,  false, true,  true,  true  },
107
  { true,  true,  false, true,  true,  false, true,  false, true  },
108
  { true,  true,  true,  true,  true,  true,  false, true,  true  },
109
  { true,  true,  false, true,  true,  true,  false, true,  false },
110
  { true,  false, true,  false, false, true,  true,  true,  true  },
111
  { true,  true,  true,  true,  true,  false, true,  true,  true  },
112
  { true,  false, true,  false, true,  false, true,  true,  true  },
113
  { true,  true,  true,  true,  true,  true,  false, true,  true  },
114
  { true,  false, true,  true,  false, true,  false, true,  true  },
115
  { true,  true,  true,  true,  false, true,  false, true,  false },
116
  { true,  false, true,  true,  true,  true,  true,  true,  true  },
117
  { true,  false, true,  true,  false, true,  false, true,  true  },
118
  { true,  true,  false, true,  true,  true,  true,  true,  true  },
119
  { true,  true,  false, true,  true,  true,  false, true,  false },
120
  { true,  true,  true,  false, true,  false, true,  false, true  },
121
  { true,  false, true,  true,  true,  true,  true,  true,  true  },
122
  { true,  false, true,  false, true,  false, true,  true,  true  },
123
  { true,  true,  false, true,  true,  true,  true,  true,  true  },
124
  { true,  true,  false, true,  true,  false, true,  false, true  },
125
  { true,  true,  true,  true,  true,  true,  false, true,  true  },
126
  { true,  true,  false, true,  true,  true,  true,  true,  true  },
127
  { true,  true,  false, true,  true,  true,  false, true,  false },
128
  { true,  true,  true,  true,  true,  false, true,  true,  true  },
129
  { true,  true,  false, true,  true,  true,  true,  true,  true  },
130
  { true,  true,  false, true,  true,  false, true,  false, true  },
131
  { true,  true,  true,  true,  true,  true,  false, true,  true  },
132
  { true,  false, true,  true,  true,  true,  true,  true,  true  },
133
  { true,  false, true,  true,  false, true,  false, true,  true  },
134
  { true,  true,  true,  true,  true,  false, true,  true,  true  },
135
  { true,  false, true,  true,  true,  true,  true,  true,  true  },
136
  { true,  false, true,  false, true,  false, true,  true,  true  },
137
  { false, false, false, false, false, false, false, false, false },
138
};
139
140
//   1,0    3,0    0,1    1,1    2,1    3,1    1,2    3,2    0,3    1,3    2,3    3,3    H1,0   H3,0
141
static const bool s_doInterpQ[ 42 ][ 14 ] =
142
{
143
  { false, false, false, false, false, false, false, false, false, false, false, false, false, false },
144
  { false, false, false, false, false, false, false, false, true,  false, false, false, false, true  },
145
  { false, false, true,  false, false, false, false, false, false, false, false, false, true,  false },
146
  { false, true,  false, false, false, false, false, false, false, false, false, false, false, false },
147
  { false, true,  false, false, false, false, false, false, true,  false, false, true,  false, true  },
148
  { false, true,  true,  false, false, true,  false, false, false, false, false, false, true,  false },
149
  { true,  false, false, false, false, false, false, false, false, false, false, false, false, false },
150
  { true,  false, false, false, false, false, false, false, true,  true,  false, false, false, true  },
151
  { true,  false, true,  true,  false, false, false, false, false, false, false, false, true,  false },
152
  { false, true,  false, false, false, true,  false, false, false, false, false, true,  true,  true  },
153
  { false, false, false, false, false, false, false, false, false, false, true,  false, false, true  },
154
  { false, true,  false, false, false, false, false, false, false, false, true,  true,  false, true  },
155
  { false, false, false, false, true,  false, false, false, false, false, false, false, true,  false },
156
  { false, true,  false, false, true,  true,  false, false, false, false, false, false, true,  false },
157
  { true,  false, false, true,  false, false, false, false, false, true,  false, false, true,  true  },
158
  { false, false, false, false, false, false, false, false, false, false, true,  false, false, true  },
159
  { true,  false, false, false, false, false, false, false, false, true,  true,  false, false, true  },
160
  { false, false, false, false, true,  false, false, false, false, false, false, false, true,  false },
161
  { true,  false, false, true,  true,  false, false, false, false, false, false, false, true,  false },
162
  { false, false, false, false, false, false, false, false, true,  true,  false, true,  false, true  },
163
  { false, false, false, false, false, false, false, true,  false, false, false, false, false, false },
164
  { false, false, false, false, false, false, false, true,  true,  false, false, true,  false, true  },
165
  { false, false, false, false, false, false, true,  false, false, false, false, false, false, false },
166
  { false, false, false, false, false, false, true,  false, true,  true,  false, false, false, true  },
167
  { false, false, true,  true,  false, true,  false, false, false, false, false, false, true,  false },
168
  { false, false, false, false, false, false, false, true,  false, false, false, false, false, false },
169
  { false, false, true,  false, false, true,  false, true,  false, false, false, false, true,  false },
170
  { false, false, false, false, false, false, true,  false, false, false, false, false, false, false },
171
  { false, false, true,  true,  false, false, true,  false, false, false, false, false, true,  false },
172
  { false, false, false, false, false, false, false, false, false, false, true,  false, false, true  },
173
  { false, false, false, false, false, false, false, true,  false, false, false, false, false, false },
174
  { false, false, false, false, false, false, false, true,  false, false, true,  true,  false, true  },
175
  { false, false, false, false, true,  false, false, false, false, false, false, false, true,  false },
176
  { false, false, false, false, false, false, false, true,  false, false, false, false, false, false },
177
  { false, false, false, false, true,  true,  false, true,  false, false, false, false, true,  false },
178
  { false, false, false, false, false, false, false, false, false, false, true,  false, false, true  },
179
  { false, false, false, false, false, false, true,  false, false, false, false, false, false, false },
180
  { false, false, false, false, false, false, true,  false, false, true,  true,  false, false, true  },
181
  { false, false, false, false, true,  false, false, false, false, false, false, false, true,  false },
182
  { false, false, false, false, false, false, true,  false, false, false, false, false, false, false },
183
  { false, false, false, true,  true,  false, true,  false, false, false, false, false, true,  false },
184
  { true,  true,  true,  true,  true,  true,  true,  true,  true,  true,  true,  true,  true,  true  },
185
};
186
187
const int BlkUniMvInfoBuffer::m_uniMvListMaxSize;
188
189
InterSearch::InterSearch()
190
20.7k
  : m_modeCtrl                    (nullptr)
191
20.7k
  , m_defaultCachedBvs            (nullptr)
192
20.7k
  , m_pcEncCfg                    (nullptr)
193
20.7k
  , m_pcTrQuant                   (nullptr)
194
20.7k
  , m_iSearchRange                (0)
195
20.7k
  , m_bipredSearchRange           (0)
196
20.7k
  , m_motionEstimationSearchMethod(VVENC_MESEARCH_FULL)
197
20.7k
  , m_motionEstimationSearchMethodSCC( 0 )
198
20.7k
  , m_CABACEstimator              (nullptr)
199
20.7k
  , m_CtxCache                    (nullptr)
200
20.7k
  , m_pTempPel                    (nullptr)
201
20.7k
{
202
62.3k
  for (int i=0; i<MAX_NUM_REF_LIST_ADAPT_SR; i++)
203
41.5k
  {
204
41.5k
    memset (m_aaiAdaptSR[i], 0, MAX_IDX_ADAPT_SR * sizeof (int));
205
41.5k
  }
206
83.0k
  for (int i=0; i<AMVP_MAX_NUM_CANDS+1; i++)
207
62.3k
  {
208
62.3k
    memset (m_auiMVPIdxCost[i], 0, (AMVP_MAX_NUM_CANDS+1) * sizeof (uint32_t) );
209
62.3k
  }
210
20.7k
}
211
212
213
InterSearch::~InterSearch()
214
20.7k
{
215
20.7k
  destroy();
216
20.7k
}
217
218
void InterSearch::init( const VVEncCfg& encCfg, TrQuant* pTrQuant, RdCost* pRdCost, EncModeCtrl* pModeCtrl, CodingStructure **pSaveCS )
219
20.7k
{
220
20.7k
  InterPrediction::init( pRdCost, encCfg.m_internChromaFormat, encCfg.m_CTUSize, encCfg.m_ifpLines );
221
20.7k
  m_numBVs                       = 0;
222
20.7k
  m_pcEncCfg                     = &encCfg;
223
20.7k
  m_pcTrQuant                    = pTrQuant;
224
20.7k
  m_pcRdCost                     = pRdCost;
225
20.7k
  m_modeCtrl                     = pModeCtrl;
226
20.7k
  m_pSaveCS                      = pSaveCS;
227
228
20.7k
  m_iSearchRange                    = encCfg.m_SearchRange;
229
20.7k
  m_bipredSearchRange               = encCfg.m_bipredSearchRange;
230
20.7k
  m_motionEstimationSearchMethod    = vvencMESearchMethod( encCfg.m_motionEstimationSearchMethod );
231
20.7k
  m_motionEstimationSearchMethodSCC = encCfg.m_motionEstimationSearchMethodSCC;
232
233
62.3k
  for( uint32_t iDir = 0; iDir < MAX_NUM_REF_LIST_ADAPT_SR; iDir++ )
234
41.5k
  {
235
290k
    for( uint32_t iRefIdx = 0; iRefIdx < MAX_IDX_ADAPT_SR; iRefIdx++ )
236
249k
    {
237
249k
      m_aaiAdaptSR[iDir][iRefIdx] = m_iSearchRange;
238
249k
    }
239
41.5k
  }
240
241
  // initialize motion cost
242
83.0k
  for( int iNum = 0; iNum < AMVP_MAX_NUM_CANDS + 1; iNum++ )
243
62.3k
  {
244
186k
    for( int iIdx = 0; iIdx < AMVP_MAX_NUM_CANDS; iIdx++ )
245
124k
    {
246
124k
      if( iIdx < iNum )
247
62.3k
      {
248
62.3k
        m_auiMVPIdxCost[iIdx][iNum] = xGetMvpIdxBits( iIdx, iNum );
249
62.3k
      }
250
62.3k
      else
251
62.3k
      {
252
62.3k
        m_auiMVPIdxCost[iIdx][iNum] = MAX_UINT;
253
62.3k
      }
254
124k
    }
255
62.3k
  }
256
257
20.7k
  const ChromaFormat cform   = encCfg.m_internChromaFormat;
258
20.7k
  const int          ctuSize = encCfg.m_CTUSize;
259
62.3k
  for (uint32_t i = 0; i < NUM_REF_PIC_LIST_01; i++)
260
41.5k
  {
261
41.5k
    m_tmpPredStorage[i].create( UnitArea( cform, Area( 0, 0, ctuSize, ctuSize ) ) );
262
41.5k
  }
263
20.7k
  m_tmpStorageLCU.create( UnitArea( cform, Area( 0, 0, ctuSize, ctuSize ) ) );
264
20.7k
  m_pTempPel = new Pel[ctuSize * ctuSize];
265
20.7k
  m_tmpAffiStorage.create(UnitArea(cform, Area(0, 0, ctuSize, ctuSize + 2)));  // allow overread by 2 samples
266
20.7k
  m_tmpAffiError = new Pel[ctuSize * ctuSize];
267
20.7k
  m_tmpAffiDeri[0] = new Pel[ctuSize * ctuSize];
268
20.7k
  m_tmpAffiDeri[1] = new Pel[ctuSize * ctuSize];
269
270
20.7k
  CompArea chromaArea( COMP_Cb, cform, Area( 0, 0, encCfg.m_CTUSize, encCfg.m_CTUSize ), true );
271
103k
  for( int i = 0; i < 4; i++ )
272
83.0k
  {
273
83.0k
    m_orgResiCb[i].create( chromaArea );
274
83.0k
    m_orgResiCr[i].create( chromaArea );
275
83.0k
  }
276
20.7k
}
277
278
void InterSearch::destroy()
279
20.7k
{
280
20.7k
  if ( m_pTempPel )
281
20.7k
  {
282
20.7k
    delete [] m_pTempPel;
283
20.7k
    m_pTempPel = nullptr;
284
20.7k
  }
285
286
62.3k
  for( int i = 0; i < NUM_REF_PIC_LIST_01; i++ )
287
41.5k
  {
288
41.5k
    m_tmpPredStorage[i].destroy();
289
41.5k
  }
290
20.7k
  m_tmpStorageLCU.destroy();
291
20.7k
  m_tmpAffiStorage.destroy();
292
20.7k
  if (m_tmpAffiError != NULL)
293
20.7k
  {
294
20.7k
    delete[] m_tmpAffiError;
295
20.7k
    m_tmpAffiError = nullptr;
296
20.7k
  }
297
20.7k
  if (m_tmpAffiDeri[0] != NULL)
298
20.7k
  {
299
20.7k
    delete[] m_tmpAffiDeri[0];
300
20.7k
    m_tmpAffiDeri[0] = nullptr;
301
20.7k
  }
302
20.7k
  if (m_tmpAffiDeri[1] != NULL)
303
20.7k
  {
304
20.7k
    delete[] m_tmpAffiDeri[1];
305
20.7k
    m_tmpAffiDeri[1] = nullptr;
306
20.7k
  }
307
308
20.7k
  m_pSaveCS  = nullptr;
309
20.7k
}
310
311
void InterSearch::setCtuEncRsrc( CABACWriter* cabacEstimator, CtxCache* ctxCache, ReuseUniMv* pReuseUniMv, BlkUniMvInfoBuffer* pBlkUniMvInfoBuffer, AffineProfList* pAffineProfList, IbcBvCand* pCachedBvs )
312
4.05k
{
313
4.05k
  m_CABACEstimator     = cabacEstimator;
314
4.05k
  m_CtxCache           = ctxCache;
315
4.05k
  m_ReuseUniMv         = pReuseUniMv;
316
4.05k
  m_BlkUniMvInfoBuffer = pBlkUniMvInfoBuffer;
317
4.05k
  m_AffineProfList     = pAffineProfList;
318
4.05k
  m_defaultCachedBvs   = pCachedBvs;
319
4.05k
}
320
321
ReuseUniMv::ReuseUniMv()
322
9.06k
{
323
9.06k
  const int numPos     = MAX_CU_SIZE >> MIN_CU_LOG2;
324
9.06k
  const int maxSizeIdx = MAX_CU_SIZE_IDX-2;
325
63.4k
  for( int wIdx = 0; wIdx < maxSizeIdx; wIdx++ )
326
54.3k
  {
327
380k
    for( int hIdx = 0; hIdx < maxSizeIdx; hIdx++ )
328
326k
    {
329
10.7M
      for( int y = 0; y < numPos; y++ )
330
10.4M
      {
331
344M
        for( int x = 0; x < numPos; x++ )
332
333M
        {
333
333M
          m_reusedUniMVs[ wIdx ][ hIdx ][ x ][ y ] = nullptr;
334
333M
        }
335
10.4M
      }
336
326k
    }
337
54.3k
  }
338
9.06k
}
339
340
ReuseUniMv::~ReuseUniMv()
341
9.06k
{
342
9.06k
  resetReusedUniMvs();
343
9.06k
}
344
345
void ReuseUniMv::resetReusedUniMvs()
346
13.5k
{
347
13.5k
  const int numPos     = MAX_CU_SIZE >> MIN_CU_LOG2;
348
13.5k
  const int maxSizeIdx = MAX_CU_SIZE_IDX-2;
349
95.1k
  for ( int wIdx = 0; wIdx < maxSizeIdx; wIdx++ )
350
81.5k
  {
351
570k
    for ( int hIdx = 0; hIdx < maxSizeIdx; hIdx++ )
352
489k
    {
353
16.1M
      for ( int y = 0; y < numPos; y++ )
354
15.6M
      {
355
516M
        for ( int x = 0; x < numPos; x++ )
356
500M
        {
357
500M
          if ( m_reusedUniMVs[ wIdx ][ hIdx ][ x ][ y ] )
358
0
          {
359
0
            delete [] m_reusedUniMVs[ wIdx ][ hIdx ][ x ][ y ];
360
0
            m_reusedUniMVs[ wIdx ][ hIdx ][ x ][ y ] = nullptr;
361
0
          }
362
500M
        }
363
15.6M
      }
364
489k
    }
365
81.5k
  }
366
13.5k
}
367
368
void InterSearch::loadGlobalUniMvs( const Area& lumaArea, const PreCalcValues& pcv)
369
0
{
370
0
  unsigned idx1, idx2, idx3, idx4;
371
0
  getAreaIdxNew(lumaArea, pcv, idx1, idx2, idx3, idx4);
372
0
  if( m_ReuseUniMv->m_reusedUniMVs[idx1][idx2][idx3][idx4])
373
0
  {
374
//    DTRACE( g_trace_ctx, D_TMP, "%d unimv load %d %d %d %d \n", g_trace_ctx->getChannelCounter(D_TMP), idx3,idx4,idx1,idx2 );
375
0
    m_BlkUniMvInfoBuffer->insertUniMvCands(lumaArea, m_ReuseUniMv->m_reusedUniMVs[idx1][idx2][idx3][idx4]);
376
0
  }
377
0
}
378
379
void InterSearch::getBestSbt( CodingStructure* tempCS, CodingUnit* cu, uint8_t& histBestSbt, Distortion& curPuSse, uint8_t sbtAllowed, bool doPreAnalyzeResi, bool mtsAllowed )
380
0
{
381
0
  m_estMinDistSbt[NUMBER_SBT_MODE] = MAX_DISTORTION;
382
0
  m_skipSbtAll = false;
383
384
0
  if( doPreAnalyzeResi )
385
0
  {
386
0
    xCalcMinDistSbt( *tempCS, *cu, sbtAllowed );
387
0
  }
388
389
0
  curPuSse = getEstDistSbt( NUMBER_SBT_MODE );
390
391
0
  if( doPreAnalyzeResi )
392
0
  {
393
0
    if( m_skipSbtAll && !mtsAllowed )
394
0
    {
395
0
      histBestSbt = 0; //try DCT2
396
0
    }
397
0
    else
398
0
    {
399
0
      int  slShift = 4 + std::min( Log2( cu->lwidth() * cu->lheight() ), 9 );
400
0
      assert( curPuSse != MAX_DISTORTION );
401
0
      histBestSbt = m_modeCtrl->findBestSbt( cu->cs->area, (uint32_t)( curPuSse >> slShift ) );
402
0
      if( m_skipSbtAll && CU::isSbtMode( histBestSbt ) ) //special case, skip SBT when loading SBT
403
0
      {
404
0
        histBestSbt = 0; //try DCT2
405
0
      }
406
0
    }
407
0
  }
408
0
}
409
410
411
inline void InterSearch::xTZSearchHelp( TZSearchStruct& rcStruct, const int iSearchX, const int iSearchY, const uint8_t ucPointNr, const uint32_t uiDistance )
412
0
{
413
0
  Distortion  uiSad = 0;
414
415
0
  const Pel* const  piRefSrch = rcStruct.piRefY + iSearchY * rcStruct.iRefStride + iSearchX;
416
417
0
  m_cDistParam.cur.buf = piRefSrch;
418
419
0
  uiSad = m_cDistParam.distFunc( m_cDistParam );
420
421
  // only add motion cost if uiSad is smaller than best. Otherwise pointless
422
  // to add motion cost.
423
0
  if( uiSad < rcStruct.uiBestSad )
424
0
  {
425
    // motion cost
426
0
    uiSad += m_pcRdCost->getCostOfVectorWithPredictor( iSearchX, iSearchY, rcStruct.imvShift );
427
428
0
    if( uiSad < rcStruct.uiBestSad )
429
0
    {
430
0
      rcStruct.uiBestSad      = uiSad;
431
0
      rcStruct.iBestX         = iSearchX;
432
0
      rcStruct.iBestY         = iSearchY;
433
0
      rcStruct.uiBestDistance = uiDistance;
434
0
      rcStruct.uiBestRound    = 0;
435
0
      rcStruct.ucPointNr      = ucPointNr;
436
0
      m_cDistParam.maximumDistortionForEarlyExit = uiSad;
437
0
    }
438
0
  }
439
0
}
440
441
442
443
inline void InterSearch::xTZ2PointSearch( TZSearchStruct& rcStruct )
444
0
{
445
0
  const SearchRange& sr = rcStruct.searchRange;
446
447
0
  static const int xOffset[2][9] = { {  0, -1, -1,  0, -1, +1, -1, -1, +1 }, {  0,  0, +1, +1, -1, +1,  0, +1,  0 } };
448
0
  static const int yOffset[2][9] = { {  0,  0, -1, -1, +1, -1,  0, +1,  0 }, {  0, -1, -1,  0, -1, +1, +1, +1, +1 } };
449
450
  // 2 point search,                   //   1 2 3
451
  // check only the 2 untested points  //   4 0 5
452
  // around the start point            //   6 7 8
453
0
  const int iX1 = rcStruct.iBestX + xOffset[0][rcStruct.ucPointNr];
454
0
  const int iX2 = rcStruct.iBestX + xOffset[1][rcStruct.ucPointNr];
455
456
0
  const int iY1 = rcStruct.iBestY + yOffset[0][rcStruct.ucPointNr];
457
0
  const int iY2 = rcStruct.iBestY + yOffset[1][rcStruct.ucPointNr];
458
459
0
  if( iX1 >= sr.left && iX1 <= sr.right && iY1 >= sr.top && iY1 <= sr.bottom )
460
0
  {
461
0
    xTZSearchHelp( rcStruct, iX1, iY1, 0, 2 );
462
0
  }
463
464
0
  if( iX2 >= sr.left && iX2 <= sr.right && iY2 >= sr.top && iY2 <= sr.bottom )
465
0
  {
466
0
    xTZSearchHelp( rcStruct, iX2, iY2, 0, 2 );
467
0
  }
468
0
}
469
470
inline void InterSearch::xTZ4PointSquareSearch( TZSearchStruct & rcStruct, const int iStartX, const int iStartY, const int iDist )
471
0
{
472
0
  const SearchRange& sr = rcStruct.searchRange;
473
0
  CHECK( iDist == 0 || iDist > 2, "Invalid distance" );
474
  // 4 point search,                   //     1 2 3
475
  // search around the start point     //     4 0 5
476
  // with the required  distance       //     6 7 8
477
0
  const int iTop = iStartY - iDist;
478
0
  const int iBottom = iStartY + iDist;
479
0
  const int iLeft = iStartX - iDist;
480
0
  const int iRight = iStartX + iDist;
481
0
  rcStruct.uiBestRound += 1;
482
483
0
  if ( iTop >= sr.top )
484
0
  {
485
0
    if ( iLeft >= sr.left ) // check top left
486
0
    {
487
0
      xTZSearchHelp( rcStruct, iLeft, iTop, 1, iDist );
488
0
    }
489
0
    if ( iRight <= sr.right ) // check top right
490
0
    {
491
0
      xTZSearchHelp( rcStruct, iRight, iTop, 3, iDist );
492
0
    }
493
0
  }
494
0
  if ( iBottom <= sr.bottom )
495
0
  {
496
0
    if ( iLeft >= sr.left ) // check bottom left
497
0
    {
498
0
      xTZSearchHelp( rcStruct, iLeft, iBottom, 6, iDist );
499
0
    }
500
0
    if ( iRight <= sr.right ) // check bottom right
501
0
    {
502
0
      xTZSearchHelp( rcStruct, iRight, iBottom, 8, iDist );
503
0
    }
504
0
  }
505
0
}
506
507
inline void InterSearch::xTZ8PointSquareSearch( TZSearchStruct& rcStruct, const int iStartX, const int iStartY, const int iDist )
508
0
{
509
0
  const SearchRange& sr = rcStruct.searchRange;
510
0
  // 8 point search,                   //   1 2 3
511
0
  // search around the start point     //   4 0 5
512
0
  // with the required  distance       //   6 7 8
513
0
  CHECK( iDist == 0 , "Invalid distance");
514
0
  const int iTop        = iStartY - iDist;
515
0
  const int iBottom     = iStartY + iDist;
516
0
  const int iLeft       = iStartX - iDist;
517
0
  const int iRight      = iStartX + iDist;
518
0
  rcStruct.uiBestRound += 1;
519
0
520
0
  if ( iTop >= sr.top ) // check top
521
0
  {
522
0
    if ( iLeft >= sr.left ) // check top left
523
0
    {
524
0
      xTZSearchHelp( rcStruct, iLeft, iTop, 1, iDist );
525
0
    }
526
0
    // top middle
527
0
    xTZSearchHelp( rcStruct, iStartX, iTop, 2, iDist );
528
0
529
0
    if ( iRight <= sr.right ) // check top right
530
0
    {
531
0
      xTZSearchHelp( rcStruct, iRight, iTop, 3, iDist );
532
0
    }
533
0
  } // check top
534
0
  if ( iLeft >= sr.left ) // check middle left
535
0
  {
536
0
    xTZSearchHelp( rcStruct, iLeft, iStartY, 4, iDist );
537
0
  }
538
0
  if ( iRight <= sr.right ) // check middle right
539
0
  {
540
0
    xTZSearchHelp( rcStruct, iRight, iStartY, 5, iDist );
541
0
  }
542
0
  if ( iBottom <= sr.bottom ) // check bottom
543
0
  {
544
0
    if ( iLeft >= sr.left ) // check bottom left
545
0
    {
546
0
      xTZSearchHelp( rcStruct, iLeft, iBottom, 6, iDist );
547
0
    }
548
0
    // check bottom middle
549
0
    xTZSearchHelp( rcStruct, iStartX, iBottom, 7, iDist );
550
0
551
0
    if ( iRight <= sr.right ) // check bottom right
552
0
    {
553
0
      xTZSearchHelp( rcStruct, iRight, iBottom, 8, iDist );
554
0
    }
555
0
  } // check bottom
556
0
}
557
558
inline void InterSearch::xTZ8PointDiamondSearch( TZSearchStruct& rcStruct,
559
                                                 const int iStartX,
560
                                                 const int iStartY,
561
                                                 const int iDist,
562
                                                 const bool bCheckCornersAtDist1 )
563
0
{
564
0
  const SearchRange& sr = rcStruct.searchRange;
565
  // 8 point search,                   //   1 2 3
566
  // search around the start point     //   4 0 5
567
  // with the required  distance       //   6 7 8
568
0
  CHECK( iDist == 0, "Invalid distance" );
569
0
  const int iTop        = iStartY - iDist;
570
0
  const int iBottom     = iStartY + iDist;
571
0
  const int iLeft       = iStartX - iDist;
572
0
  const int iRight      = iStartX + iDist;
573
0
  rcStruct.uiBestRound += 1;
574
575
0
  if ( iDist == 1 )
576
0
  {
577
0
    if ( iTop >= sr.top ) // check top
578
0
    {
579
0
      if (bCheckCornersAtDist1)
580
0
      {
581
0
        if ( iLeft >= sr.left) // check top-left
582
0
        {
583
0
          xTZSearchHelp( rcStruct, iLeft, iTop, 1, iDist );
584
0
        }
585
0
        xTZSearchHelp( rcStruct, iStartX, iTop, 2, iDist );
586
0
        if ( iRight <= sr.right ) // check middle right
587
0
        {
588
0
          xTZSearchHelp( rcStruct, iRight, iTop, 3, iDist );
589
0
        }
590
0
      }
591
0
      else
592
0
      {
593
0
        xTZSearchHelp( rcStruct, iStartX, iTop, 2, iDist );
594
0
      }
595
0
    }
596
0
    if ( iLeft >= sr.left ) // check middle left
597
0
    {
598
0
      xTZSearchHelp( rcStruct, iLeft, iStartY, 4, iDist );
599
0
    }
600
0
    if ( iRight <= sr.right ) // check middle right
601
0
    {
602
0
      xTZSearchHelp( rcStruct, iRight, iStartY, 5, iDist );
603
0
    }
604
0
    if ( iBottom <= sr.bottom ) // check bottom
605
0
    {
606
0
      if (bCheckCornersAtDist1)
607
0
      {
608
0
        if ( iLeft >= sr.left) // check top-left
609
0
        {
610
0
          xTZSearchHelp( rcStruct, iLeft, iBottom, 6, iDist );
611
0
        }
612
0
        xTZSearchHelp( rcStruct, iStartX, iBottom, 7, iDist );
613
0
        if ( iRight <= sr.right ) // check middle right
614
0
        {
615
0
          xTZSearchHelp( rcStruct, iRight, iBottom, 8, iDist );
616
0
        }
617
0
      }
618
0
      else
619
0
      {
620
0
        xTZSearchHelp( rcStruct, iStartX, iBottom, 7, iDist );
621
0
      }
622
0
    }
623
0
  }
624
0
  else
625
0
  {
626
0
    if ( iDist <= 8 )
627
0
    {
628
0
      const int iTop_2      = iStartY - (iDist>>1);
629
0
      const int iBottom_2   = iStartY + (iDist>>1);
630
0
      const int iLeft_2     = iStartX - (iDist>>1);
631
0
      const int iRight_2    = iStartX + (iDist>>1);
632
633
0
      if (  iTop >= sr.top && iLeft >= sr.left &&
634
0
           iRight <= sr.right && iBottom <= sr.bottom ) // check border
635
0
      {
636
0
        xTZSearchHelp( rcStruct, iStartX,  iTop,      2, iDist    );
637
0
        xTZSearchHelp( rcStruct, iLeft_2,  iTop_2,    1, iDist>>1 );
638
0
        xTZSearchHelp( rcStruct, iRight_2, iTop_2,    3, iDist>>1 );
639
0
        xTZSearchHelp( rcStruct, iLeft,    iStartY,   4, iDist    );
640
0
        xTZSearchHelp( rcStruct, iRight,   iStartY,   5, iDist    );
641
0
        xTZSearchHelp( rcStruct, iLeft_2,  iBottom_2, 6, iDist>>1 );
642
0
        xTZSearchHelp( rcStruct, iRight_2, iBottom_2, 8, iDist>>1 );
643
0
        xTZSearchHelp( rcStruct, iStartX,  iBottom,   7, iDist    );
644
0
      }
645
0
      else // check border
646
0
      {
647
0
        if ( iTop >= sr.top ) // check top
648
0
        {
649
0
          xTZSearchHelp( rcStruct, iStartX, iTop, 2, iDist );
650
0
        }
651
0
        if ( iTop_2 >= sr.top ) // check half top
652
0
        {
653
0
          if ( iLeft_2 >= sr.left ) // check half left
654
0
          {
655
0
            xTZSearchHelp( rcStruct, iLeft_2, iTop_2, 1, (iDist>>1) );
656
0
          }
657
0
          if ( iRight_2 <= sr.right ) // check half right
658
0
          {
659
0
            xTZSearchHelp( rcStruct, iRight_2, iTop_2, 3, (iDist>>1) );
660
0
          }
661
0
        } // check half top
662
0
        if ( iLeft >= sr.left ) // check left
663
0
        {
664
0
          xTZSearchHelp( rcStruct, iLeft, iStartY, 4, iDist );
665
0
        }
666
0
        if ( iRight <= sr.right ) // check right
667
0
        {
668
0
          xTZSearchHelp( rcStruct, iRight, iStartY, 5, iDist );
669
0
        }
670
0
        if ( iBottom_2 <= sr.bottom ) // check half bottom
671
0
        {
672
0
          if ( iLeft_2 >= sr.left ) // check half left
673
0
          {
674
0
            xTZSearchHelp( rcStruct, iLeft_2, iBottom_2, 6, (iDist>>1) );
675
0
          }
676
0
          if ( iRight_2 <= sr.right ) // check half right
677
0
          {
678
0
            xTZSearchHelp( rcStruct, iRight_2, iBottom_2, 8, (iDist>>1) );
679
0
          }
680
0
        } // check half bottom
681
0
        if ( iBottom <= sr.bottom ) // check bottom
682
0
        {
683
0
          xTZSearchHelp( rcStruct, iStartX, iBottom, 7, iDist );
684
0
        }
685
0
      } // check border
686
0
    }
687
0
    else // iDist > 8
688
0
    {
689
0
      if ( iTop >= sr.top && iLeft >= sr.left &&
690
0
           iRight <= sr.right && iBottom <= sr.bottom ) // check border
691
0
      {
692
0
        xTZSearchHelp( rcStruct, iStartX, iTop,    0, iDist );
693
0
        xTZSearchHelp( rcStruct, iLeft,   iStartY, 0, iDist );
694
0
        xTZSearchHelp( rcStruct, iRight,  iStartY, 0, iDist );
695
0
        xTZSearchHelp( rcStruct, iStartX, iBottom, 0, iDist );
696
0
        for ( int index = 1; index < 4; index++ )
697
0
        {
698
0
          const int iPosYT = iTop    + ((iDist>>2) * index);
699
0
          const int iPosYB = iBottom - ((iDist>>2) * index);
700
0
          const int iPosXL = iStartX - ((iDist>>2) * index);
701
0
          const int iPosXR = iStartX + ((iDist>>2) * index);
702
0
          xTZSearchHelp( rcStruct, iPosXL, iPosYT, 0, iDist );
703
0
          xTZSearchHelp( rcStruct, iPosXR, iPosYT, 0, iDist );
704
0
          xTZSearchHelp( rcStruct, iPosXL, iPosYB, 0, iDist );
705
0
          xTZSearchHelp( rcStruct, iPosXR, iPosYB, 0, iDist );
706
0
        }
707
0
      }
708
0
      else // check border
709
0
      {
710
0
        if ( iTop >= sr.top ) // check top
711
0
        {
712
0
          xTZSearchHelp( rcStruct, iStartX, iTop, 0, iDist );
713
0
        }
714
0
        if ( iLeft >= sr.left ) // check left
715
0
        {
716
0
          xTZSearchHelp( rcStruct, iLeft, iStartY, 0, iDist );
717
0
        }
718
0
        if ( iRight <= sr.right ) // check right
719
0
        {
720
0
          xTZSearchHelp( rcStruct, iRight, iStartY, 0, iDist );
721
0
        }
722
0
        if ( iBottom <= sr.bottom ) // check bottom
723
0
        {
724
0
          xTZSearchHelp( rcStruct, iStartX, iBottom, 0, iDist );
725
0
        }
726
0
        for ( int index = 1; index < 4; index++ )
727
0
        {
728
0
          const int iPosYT = iTop    + ((iDist>>2) * index);
729
0
          const int iPosYB = iBottom - ((iDist>>2) * index);
730
0
          const int iPosXL = iStartX - ((iDist>>2) * index);
731
0
          const int iPosXR = iStartX + ((iDist>>2) * index);
732
733
0
          if ( iPosYT >= sr.top ) // check top
734
0
          {
735
0
            if ( iPosXL >= sr.left ) // check left
736
0
            {
737
0
              xTZSearchHelp( rcStruct, iPosXL, iPosYT, 0, iDist );
738
0
            }
739
0
            if ( iPosXR <= sr.right ) // check right
740
0
            {
741
0
              xTZSearchHelp( rcStruct, iPosXR, iPosYT, 0, iDist );
742
0
            }
743
0
          } // check top
744
0
          if ( iPosYB <= sr.bottom ) // check bottom
745
0
          {
746
0
            if ( iPosXL >= sr.left ) // check left
747
0
            {
748
0
              xTZSearchHelp( rcStruct, iPosXL, iPosYB, 0, iDist );
749
0
            }
750
0
            if ( iPosXR <= sr.right ) // check right
751
0
            {
752
0
              xTZSearchHelp( rcStruct, iPosXR, iPosYB, 0, iDist );
753
0
            }
754
0
          } // check bottom
755
0
        } // for ...
756
0
      } // check border
757
0
    } // iDist <= 8
758
0
  } // iDist == 1
759
0
}
760
761
Distortion InterSearch::xPatternRefinement( const CPelBuf* pcPatternKey,
762
                                            Mv baseRefMv,
763
                                            int iFrac, Mv& rcMvFrac,
764
                                            Distortion& uiDistBest,
765
                                            int& patternId,
766
                                            CPelBuf* pattern,
767
                                            bool useAltHpelIf )
768
0
{
769
0
  Distortion  uiDist;
770
0
  uiDistBest = m_pcEncCfg->m_fastSubPel == 1 ? uiDistBest : MAX_DISTORTION;
771
0
  uint32_t        uiDirecBest = 0;
772
0
  const int reduceTap = m_pcEncCfg->m_meReduceTap;
773
774
0
  Pel*  piRefPos;
775
0
  int iRefStride = pcPatternKey->width + 1;
776
0
  m_pcRdCost->setDistParam( m_cDistParam, *pcPatternKey, m_filteredBlock[0][0][0], iRefStride, m_lumaClpRng.bd, COMP_Y, 0, m_pcEncCfg->m_bUseHADME ? ( m_pcEncCfg->m_fastHad ? 2 : 1 ) : 0 );
777
778
0
  const ClpRng& clpRng = m_lumaClpRng;
779
0
  int width = pattern->width;
780
0
  int height = pattern->height;
781
0
  int srcStride = pattern->stride;
782
783
0
  int intStride = width + 1;
784
0
  int dstStride = width + 1;
785
0
  Pel* intPtr;
786
0
  Pel* dstPtr;
787
0
  int filterSize     = useAltHpelIf ? ( reduceTap >= 1 ? NTAPS_AFFINE : NTAPS_LUMA )
788
0
                                    : ( reduceTap == 1 ? NTAPS_AFFINE
789
0
                                                       : ( reduceTap == 0 ? NTAPS_LUMA : NTAPS_CHROMA ) );
790
0
  int halfFilterSize = ( filterSize >> 1 );
791
0
  const Pel* srcPtr  = pattern->buf - halfFilterSize*srcStride - 1;
792
793
0
  const ChromaFormat chFmt = m_currChromaFormat;
794
795
0
  Distortion distH[ 9 ] = { uiDistBest, uiDistBest, uiDistBest, uiDistBest, uiDistBest, uiDistBest, uiDistBest, uiDistBest, uiDistBest };
796
0
  const int TH = 17, TL = 15, shift = 4;
797
798
0
  const Mv* pcMvRefine = (iFrac == 2 ? s_acMvRefineH : s_acMvRefineQ);
799
0
  for (uint32_t i = 0; i < 9; i++)
800
0
  {
801
0
    if( m_pcEncCfg->m_fastSubPel == 1 )
802
0
    {
803
0
      if( s_skipQpelPosition[ patternId ][ i ] )
804
0
      {
805
0
        continue;
806
0
      }
807
808
0
      if( 2 == iFrac )
809
0
      {
810
0
        if ( ( 5 == i && 0 == uiDirecBest ) || ( 7 == i && 1 == uiDirecBest ) || ( 8 == i && ( 1 == uiDirecBest || 3 == uiDirecBest || 5 == uiDirecBest ) ) )
811
0
        {
812
0
          break;
813
0
        }
814
815
0
        if( 0 == i )
816
0
        {
817
          // split the prediction with funny widths into power-of-2 and +1 parts for the sake of SIMD speed-up
818
0
          m_if.filterHor( COMP_Y, srcPtr, srcStride, m_filteredBlockTmp[ 0 ][ 0 ], intStride, width, height + filterSize, 0 << MV_FRACTIONAL_BITS_DIFF, false, chFmt, clpRng, useAltHpelIf, 0, reduceTap );
819
0
          m_if.filterHor( COMP_Y, srcPtr + width, srcStride, m_filteredBlockTmp[ 0 ][ 0 ] + width, intStride, 1, height + filterSize, 0 << MV_FRACTIONAL_BITS_DIFF, false, chFmt, clpRng, useAltHpelIf, 0, reduceTap );
820
821
          // split the prediction with funny widths into power-of-2 and +1 parts for the sake of SIMD speed-up
822
0
          m_if.filterHor( COMP_Y, srcPtr, srcStride, m_filteredBlockTmp[ 2 ][ 0 ], intStride, width, height + filterSize, 2 << MV_FRACTIONAL_BITS_DIFF, false, chFmt, clpRng, useAltHpelIf, 0, reduceTap );
823
0
          m_if.filterHor( COMP_Y, srcPtr + width, srcStride, m_filteredBlockTmp[ 2 ][ 0 ] + width, intStride, 1, height + filterSize, 2 << MV_FRACTIONAL_BITS_DIFF, false, chFmt, clpRng, useAltHpelIf, 0, reduceTap );
824
825
0
          intPtr = m_filteredBlockTmp[ 0 ][ 0 ] + halfFilterSize * intStride + 1;
826
0
          dstPtr = m_filteredBlock[ 0 ][ 0 ][ 0 ];
827
0
          m_if.filterVer( COMP_Y, intPtr, intStride, dstPtr, dstStride, width + 0, height + 0, 0 << MV_FRACTIONAL_BITS_DIFF, false, true, chFmt, clpRng, useAltHpelIf, 0, reduceTap );
828
0
        }
829
0
        else if( 1 == i )
830
0
        {
831
0
          intPtr = m_filteredBlockTmp[ 0 ][ 0 ] + ( halfFilterSize - 1 ) * intStride + 1;
832
0
          dstPtr = m_filteredBlock[ 2 ][ 0 ][ 0 ];
833
0
          m_if.filterVer( COMP_Y, intPtr, intStride, dstPtr, dstStride, width + 0, height + 1, 2 << MV_FRACTIONAL_BITS_DIFF, false, true, chFmt, clpRng, useAltHpelIf, 0, reduceTap );
834
0
        }
835
0
        else if( 3 == i )
836
0
        {
837
0
          intPtr = m_filteredBlockTmp[ 2 ][ 0 ] + halfFilterSize * intStride;
838
0
          dstPtr = m_filteredBlock[ 0 ][ 2 ][ 0 ];
839
          // split the prediction with funny widths into power-of-2 and +1 parts for the sake of SIMD speed-up
840
0
          m_if.filterVer( COMP_Y, intPtr, intStride, dstPtr, dstStride, width, height + 0, 0 << MV_FRACTIONAL_BITS_DIFF, false, true, chFmt, clpRng, useAltHpelIf, 0, reduceTap );
841
0
          m_if.filterVer( COMP_Y, intPtr + width, intStride, dstPtr + width, dstStride, 1, height + 0, 0 << MV_FRACTIONAL_BITS_DIFF, false, true, chFmt, clpRng, useAltHpelIf, 0, reduceTap );
842
0
        }
843
0
        else if( 5 == i )
844
0
        {
845
0
          intPtr = m_filteredBlockTmp[ 2 ][ 0 ] + ( halfFilterSize - 1 ) * intStride;
846
0
          dstPtr = m_filteredBlock[ 2 ][ 2 ][ 0 ];
847
          // split the prediction with funny widths into power-of-2 and +1 parts for the sake of SIMD speed-up
848
0
          m_if.filterVer( COMP_Y, intPtr, intStride, dstPtr, dstStride, width, height + 1, 2 << MV_FRACTIONAL_BITS_DIFF, false, true, chFmt, clpRng, useAltHpelIf, 0, reduceTap );
849
0
          m_if.filterVer( COMP_Y, intPtr + width, intStride, dstPtr + width, dstStride, 1, height + 1, 2 << MV_FRACTIONAL_BITS_DIFF, false, true, chFmt, clpRng, useAltHpelIf, 0, reduceTap );
850
0
        }
851
0
      }
852
0
    }
853
0
    Mv cMvTest = pcMvRefine[ i ];
854
0
    cMvTest += baseRefMv;
855
856
0
    int horVal = cMvTest.hor * iFrac;
857
0
    int verVal = cMvTest.ver * iFrac;
858
0
    piRefPos = m_filteredBlock[verVal & 3][horVal & 3][0];
859
860
0
    if ( horVal == 2 && ( verVal & 1 ) == 0 )
861
0
    {
862
0
      piRefPos += 1;
863
0
    }
864
0
    if ( ( horVal & 1 ) == 0 && verVal == 2 )
865
0
    {
866
0
      piRefPos += iRefStride;
867
0
    }
868
0
    cMvTest = pcMvRefine[i];
869
0
    cMvTest += rcMvFrac;
870
871
872
0
    m_cDistParam.cur.buf   = piRefPos;
873
0
    uiDist = m_cDistParam.distFunc( m_cDistParam );
874
0
    uiDist += m_pcRdCost->getCostOfVectorWithPredictor( cMvTest.hor, cMvTest.ver, 0 );
875
876
0
    distH[ i ] = uiDist;
877
0
    if ( uiDist < uiDistBest )
878
0
    {
879
0
      uiDistBest  = uiDist;
880
0
      uiDirecBest = i;
881
0
      m_cDistParam.maximumDistortionForEarlyExit = uiDist;
882
0
    }
883
0
  }
884
885
0
  rcMvFrac = pcMvRefine[uiDirecBest];
886
887
0
  if( m_pcEncCfg->m_fastSubPel == 1 && iFrac == 2 )
888
0
  {
889
0
    switch ( uiDirecBest )
890
0
    {
891
0
    case 0:
892
      // hor
893
0
      distH[ 3 ] <<= shift;
894
0
      patternId += ( distH[ 3 ] > TH * distH[ 4 ] ? 2 : ( distH[ 3 ] < TL * distH[ 4 ] ? 1 : 0 ) );
895
      // ver
896
0
      distH[ 1 ] <<= shift;
897
0
      patternId += ( distH[ 1 ] > TH * distH[ 2 ] ? 6 : ( distH[ 1 ] < TL * distH[ 2 ] ? 3 : 0 ) );
898
0
      break;
899
0
    case 1:
900
      // hor
901
0
      distH[ 5 ] <<= shift;
902
0
      patternId += ( distH[ 5 ] > TH * distH[ 6 ] ? 4 : ( distH[ 5 ] < TL * distH[ 6 ] ? 2 : 0 ) );
903
      // ver
904
0
      patternId += ( distH[ 2 ] - distH[ 0 ] > distH[ 0 ] - distH[ 1 ] ? 1 : 0 );
905
906
0
      patternId += ( 41 == patternId ? 0 : 8 );
907
0
      break;
908
0
    case 2:
909
      // hor
910
0
      distH[ 7 ] <<= shift;
911
0
      patternId += ( distH[ 7 ] > TH * distH[ 8 ] ? 4 : ( distH[ 7 ] < TL * distH[ 8 ] ? 2 : 0 ) );
912
      // ver
913
0
      patternId += ( distH[ 1 ] - distH[ 0 ] > distH[ 0 ] - distH[ 2 ] ? 1 : 0 );
914
915
0
      patternId += ( 41 == patternId ? 0 : 13 );
916
0
      break;
917
0
    case 3:
918
      // hor
919
0
      patternId += ( distH[ 4 ] - distH[ 0 ] > distH[ 0 ] - distH[ 3 ] ? 1 : 0 );
920
      // ver
921
0
      distH[ 5 ] <<= shift;
922
0
      patternId += ( distH[ 5 ] > TH * distH[ 7 ] ? 4 : ( distH[ 5 ] < TL * distH[ 7 ] ? 2 : 0 ) );
923
924
0
      patternId += ( 41 == patternId ? 0 : 18 );
925
0
      break;
926
0
    case 4:
927
      // hor
928
0
      patternId += ( distH[ 3 ] - distH[ 0 ] > distH[ 0 ] - distH[ 4 ] ? 1 : 0 );
929
      // ver
930
0
      distH[ 6 ] <<= shift;
931
0
      patternId += ( distH[ 6 ] > TH * distH[ 8 ] ? 4 : ( distH[ 6 ] < TL * distH[ 8 ] ? 2 : 0 ) );
932
933
0
      patternId += ( 41 == patternId ? 0 : 23 );
934
0
      break;
935
0
    case 5:
936
      // hor
937
0
      patternId += ( distH[ 6 ] - distH[ 1 ] > distH[ 1 ] - distH[ 5 ] ? 1 : 0 );
938
      // ver
939
0
      patternId += ( distH[ 7 ] - distH[ 3 ] > distH[ 3 ] - distH[ 5 ] ? 2 : 0 );
940
941
0
      patternId += ( 41 == patternId ? 0 : 28 );
942
0
      break;
943
0
    case 6:
944
      // hor
945
0
      patternId += ( distH[ 5 ] - distH[ 1 ] > distH[ 1 ] - distH[ 6 ] ? 1 : 0 );
946
      // ver
947
0
      patternId += ( distH[ 8 ] - distH[ 4 ] > distH[ 4 ] - distH[ 6 ] ? 2 : 0 );
948
949
0
      patternId += ( 41 == patternId ? 0 : 31 );
950
0
      break;
951
0
    case 7:
952
      // hor
953
0
      patternId += ( distH[ 8 ] - distH[ 2 ] > distH[ 2 ] - distH[ 7 ] ? 1 : 0 );
954
      // ver
955
0
      patternId += ( distH[ 5 ] - distH[ 3 ] > distH[ 3 ] - distH[ 7 ] ? 2 : 0 );
956
957
0
      patternId += ( 41 == patternId ? 0 : 34 );
958
0
      break;
959
0
    case 8:
960
      // hor
961
0
      patternId += ( distH[ 7 ] - distH[ 2 ] > distH[ 2 ] - distH[ 8 ] ? 1 : 0 );
962
      // ver
963
0
      patternId += ( distH[ 6 ] - distH[ 4 ] > distH[ 4 ] - distH[ 8 ] ? 2 : 0 );
964
965
0
      patternId += ( 41 == patternId ? 0 : 37 );
966
0
      break;
967
0
    default:
968
0
      break;
969
0
    }
970
0
  }
971
972
0
  return uiDistBest;
973
0
}
974
975
//! search of the best candidate for inter prediction
976
bool InterSearch::predInterSearch(CodingUnit& cu, Partitioner& partitioner, double& bestCostInter)
977
0
{
978
0
  PROFILER_SCOPE_AND_STAGE_EXT( 1, _TPROF, P_INTER_MVD_SEARCH, cu.cs, partitioner.chType );
979
0
  CodingStructure& cs = *cu.cs;
980
981
0
  AMVPInfo     amvp[2];
982
0
  Mv           cMvSrchRngLT;
983
0
  Mv           cMvSrchRngRB;
984
0
  Mv           cMvZero;
985
0
  Mv           cMv[2];
986
0
  Mv           cMvBi[2];
987
0
  Mv           cMvTemp[2][MAX_REF_PICS];
988
0
  Mv           cMvHevcTemp[2][MAX_REF_PICS];
989
0
  int          iNumPredDir = cs.slice->isInterP() ? 1 : 2;
990
991
0
  Mv           cMvPred[2][MAX_REF_PICS];
992
993
0
  Mv           cMvPredBi[2][MAX_REF_PICS];
994
0
  int          aaiMvpIdxBi[2][MAX_REF_PICS];
995
996
0
  int          aaiMvpIdx[2][MAX_REF_PICS];
997
0
  int          aaiMvpNum[2][MAX_REF_PICS];
998
999
0
  AMVPInfo     aacAMVPInfo[2][MAX_REF_PICS];
1000
1001
0
  int          iRefIdx[2]={0,0}; //If un-initialized, may cause SEGV in bi-directional prediction iterative stage.
1002
0
  int          iRefIdxBi[2] = { -1, -1 };
1003
1004
0
  uint32_t     uiMbBits[3] = {1, 1, 0};
1005
1006
0
  uint32_t     uiLastMode = 0;
1007
0
  int          iRefStart, iRefEnd;
1008
1009
0
  int          symMode = 0;
1010
1011
0
  int          bestBiPRefIdxL1 = 0;
1012
0
  int          bestBiPMvpL1    = 0;
1013
0
  Distortion   biPDistTemp     = MAX_DISTORTION;
1014
1015
0
  uint8_t      BcwIdx          = (cu.cs->slice->isInterB() ? cu.BcwIdx : BCW_DEFAULT);
1016
0
  bool         enforceBcwPred = false;
1017
1018
  // Loop over Prediction Units
1019
0
  uint32_t     puIdx = 0;
1020
0
  uint32_t     uiLastModeTemp = 0;
1021
0
  Distortion   uiAffineCost = MAX_DISTORTION;
1022
0
  Distortion   uiHevcCost = MAX_DISTORTION;
1023
0
  bool checkAffine = (cu.imv == IMV_OFF);
1024
0
  if (cu.cs->bestParent != nullptr && cu.cs->bestParent->getCU(CH_L,TREE_D) != nullptr && cu.cs->bestParent->getCU(CH_L,TREE_D)->affine == false)
1025
0
  {
1026
0
    m_skipPROF = true;
1027
0
  }
1028
1029
0
  m_encOnly = true;
1030
0
  {
1031
0
    CU::spanMotionInfo( cu );
1032
0
    Distortion   uiCost[2] = { MAX_DISTORTION, MAX_DISTORTION };
1033
0
    Distortion   uiCostBi  =   MAX_DISTORTION;
1034
0
    Distortion   uiCostTemp;
1035
1036
0
    uint32_t         uiBits[3];
1037
0
    uint32_t         uiBitsTemp;
1038
0
    Distortion   bestBiPDist = MAX_DISTORTION;
1039
1040
0
    Distortion   uiCostTempL0[MAX_NUM_REF];
1041
0
    for (int iNumRef=0; iNumRef < MAX_NUM_REF; iNumRef++)
1042
0
    {
1043
0
      uiCostTempL0[iNumRef] = MAX_DISTORTION;
1044
0
    }
1045
0
    uint32_t         uiBitsTempL0[MAX_NUM_REF];
1046
1047
0
    Mv           mvValidList1;
1048
0
    int          refIdxValidList1 = 0;
1049
0
    uint32_t         bitsValidList1   = MAX_UINT;
1050
0
    Distortion   costValidList1   = MAX_DISTORTION;
1051
1052
0
    CPelUnitBuf origBuf = cu.cs->getOrgBuf( cu );
1053
1054
0
    xGetBlkBits( cs.slice->isInterP(), puIdx, uiLastMode, uiMbBits );
1055
1056
0
    m_pcRdCost->selectMotionLambda();
1057
1058
0
    unsigned imvShift = cu.imv == IMV_HPEL ? 1 : (cu.imv << 1);
1059
1060
    //  Uni-directional prediction
1061
0
    for ( int iRefList = 0; iRefList < iNumPredDir; iRefList++ )
1062
0
    {
1063
0
      RefPicList  refPicList = ( iRefList ? REF_PIC_LIST_1 : REF_PIC_LIST_0 );
1064
0
      for (int iRefIdxTemp = 0; iRefIdxTemp < cs.slice->numRefIdx[ refPicList ]; iRefIdxTemp++)
1065
0
      {
1066
0
        uiBitsTemp = uiMbBits[iRefList];
1067
0
        if ( cs.slice->numRefIdx[ refPicList ] > 1 )
1068
0
        {
1069
0
          uiBitsTemp += iRefIdxTemp+1;
1070
0
          if ( iRefIdxTemp == cs.slice->numRefIdx[ refPicList ]-1 )
1071
0
          {
1072
0
            uiBitsTemp--;
1073
0
          }
1074
0
        }
1075
0
        xEstimateMvPredAMVP( cu, origBuf, refPicList, iRefIdxTemp, cMvPred[iRefList][iRefIdxTemp], amvp[refPicList], biPDistTemp);
1076
1077
0
        aaiMvpIdx[iRefList][iRefIdxTemp] = cu.mvpIdx[refPicList];
1078
0
        aaiMvpNum[iRefList][iRefIdxTemp] = cu.mvpNum[refPicList];
1079
1080
0
        if(cs.picHeader->mvdL1Zero && iRefList==1 && biPDistTemp < bestBiPDist)
1081
0
        {
1082
0
          bestBiPDist = biPDistTemp;
1083
0
          bestBiPMvpL1 = aaiMvpIdx[iRefList][iRefIdxTemp];
1084
0
          bestBiPRefIdxL1 = iRefIdxTemp;
1085
0
        }
1086
1087
0
        uiBitsTemp += m_auiMVPIdxCost[aaiMvpIdx[iRefList][iRefIdxTemp]][AMVP_MAX_NUM_CANDS];
1088
1089
0
        if ( m_pcEncCfg->m_bFastMEForGenBLowDelayEnabled && iRefList == 1 )    // list 1
1090
0
        {
1091
0
          if ( cs.slice->list1IdxToList0Idx[ iRefIdxTemp ] >= 0 )
1092
0
          {
1093
0
            cMvTemp[1][iRefIdxTemp] = cMvTemp[0][cs.slice->list1IdxToList0Idx[iRefIdxTemp ]];
1094
0
            uiCostTemp = uiCostTempL0[cs.slice->list1IdxToList0Idx[ iRefIdxTemp ]];
1095
            /*first subtract the bit-rate part of the cost of the other list*/
1096
0
            uiCostTemp -= m_pcRdCost->getCost( uiBitsTempL0[cs.slice->list1IdxToList0Idx[ iRefIdxTemp ]] );
1097
            /*correct the bit-rate part of the current ref*/
1098
0
            m_pcRdCost->setPredictor  ( cMvPred[iRefList][iRefIdxTemp] );
1099
0
            uiBitsTemp += m_pcRdCost->getBitsOfVectorWithPredictor( cMvTemp[1][iRefIdxTemp].hor, cMvTemp[1][iRefIdxTemp].ver, imvShift + MV_FRACTIONAL_BITS_DIFF );
1100
            /*calculate the correct cost*/
1101
0
            uiCostTemp += m_pcRdCost->getCost( uiBitsTemp );
1102
0
          }
1103
0
          else
1104
0
          {
1105
0
            xMotionEstimation( cu, origBuf, refPicList, cMvPred[iRefList][iRefIdxTemp], iRefIdxTemp, cMvTemp[iRefList][iRefIdxTemp], aaiMvpIdx[iRefList][iRefIdxTemp], uiBitsTemp, uiCostTemp, amvp[refPicList] );
1106
0
          }
1107
0
        }
1108
0
        else
1109
0
        {
1110
0
          xMotionEstimation( cu, origBuf, refPicList, cMvPred[iRefList][iRefIdxTemp], iRefIdxTemp, cMvTemp[iRefList][iRefIdxTemp], aaiMvpIdx[iRefList][iRefIdxTemp], uiBitsTemp, uiCostTemp, amvp[refPicList] );
1111
0
        }
1112
          
1113
0
        if( cs.slice->sps->BCW && cu.BcwIdx == BCW_DEFAULT && cs.slice->isInterB() )
1114
0
        {
1115
0
          m_uniMotions.setReadMode( true, (uint32_t)iRefList, (uint32_t)iRefIdxTemp) ;
1116
0
          m_uniMotions.copyFrom( cMvTemp[iRefList][iRefIdxTemp], uiCostTemp - m_pcRdCost->getCost(uiBitsTemp), (uint32_t)iRefList, (uint32_t)iRefIdxTemp );
1117
0
        }
1118
1119
0
        xCopyAMVPInfo( &amvp[refPicList], &aacAMVPInfo[iRefList][iRefIdxTemp]); // must always be done ( also when AMVP_MODE = AM_NONE )
1120
0
        xCheckBestMVP( refPicList, cMvTemp[iRefList][iRefIdxTemp], cMvPred[iRefList][iRefIdxTemp], aaiMvpIdx[iRefList][iRefIdxTemp], amvp[refPicList], uiBitsTemp, uiCostTemp, cu.imv );
1121
1122
0
        if ( iRefList == 0 )
1123
0
        {
1124
0
          uiCostTempL0[iRefIdxTemp] = uiCostTemp;
1125
0
          uiBitsTempL0[iRefIdxTemp] = uiBitsTemp;
1126
0
        }
1127
0
        if ( uiCostTemp < uiCost[iRefList] )
1128
0
        {
1129
0
          uiCost[iRefList] = uiCostTemp;
1130
0
          uiBits[iRefList] = uiBitsTemp; // storing for bi-prediction
1131
1132
          // set motion
1133
0
          cMv    [iRefList] = cMvTemp[iRefList][iRefIdxTemp];
1134
0
          iRefIdx[iRefList] = iRefIdxTemp;
1135
0
        }
1136
1137
0
        if ( iRefList == 1 && uiCostTemp < costValidList1 && cs.slice->list1IdxToList0Idx[ iRefIdxTemp ] < 0 )
1138
0
        {
1139
0
          costValidList1 = uiCostTemp;
1140
0
          bitsValidList1 = uiBitsTemp;
1141
1142
          // set motion
1143
0
          mvValidList1     = cMvTemp[iRefList][iRefIdxTemp];
1144
0
          refIdxValidList1 = iRefIdxTemp;
1145
0
        }
1146
0
      }
1147
0
    }
1148
1149
0
    ::memcpy(cMvHevcTemp, cMvTemp, sizeof(cMvTemp));
1150
0
    if (cu.imv == IMV_OFF && (!cu.slice->sps->BCW || BcwIdx == BCW_DEFAULT))
1151
0
    {
1152
0
      m_BlkUniMvInfoBuffer->insertUniMvCands(cu.Y(), &cMvTemp[0][0]);
1153
1154
0
      unsigned idx1, idx2, idx3, idx4;
1155
0
      getAreaIdxNew(cu.Y(), *cs.pcv, idx1, idx2, idx3, idx4);
1156
0
      if( ! m_ReuseUniMv->m_reusedUniMVs[idx1][idx2][idx3][idx4] )
1157
0
      {
1158
0
        m_ReuseUniMv->m_reusedUniMVs[idx1][idx2][idx3][idx4] = new Mv[ 2 * MAX_REF_PICS ];
1159
//          DTRACE( g_trace_ctx, D_TMP, "%d unimv first reuse %d %d %d %d \n", g_trace_ctx->getChannelCounter(D_TMP), idx3,idx4,idx1,idx2 );
1160
0
      }
1161
0
      ::memcpy(m_ReuseUniMv->m_reusedUniMVs[idx1][idx2][idx3][idx4], cMvTemp, 2 * MAX_REF_PICS * sizeof(Mv));
1162
0
    }
1163
0
    if (bestCostInter != MAX_DOUBLE)
1164
0
    {
1165
0
      int L = (cu.slice->TLayer <= 2) ? 0 : (cu.slice->TLayer - 2);
1166
0
      double besCostMerge = bestCostInter;
1167
0
      bestCostInter = (uiCost[0] < uiCost[1]) ? uiCost[0] : uiCost[1];
1168
0
      if ((cu.slice->TLayer > (m_pcEncCfg->m_maxTLayer - (m_pcEncCfg->m_FastInferMerge & 7))) && bestCostInter > MRG_FAST_RATIOMYV[L] * besCostMerge)
1169
0
      {
1170
0
        m_skipPROF = false;
1171
0
        m_encOnly = false;
1172
0
        return true;
1173
0
      }
1174
0
    }
1175
    //  Bi-predictive Motion estimation
1176
0
    if( cs.slice->isInterB() && !CU::isBipredRestriction( cu ) && (cu.slice->checkLDC || BcwIdx == BCW_DEFAULT  || !m_affineModeSelected || m_pcEncCfg->m_BCW != 2 ) )
1177
0
    {
1178
0
      PROFILER_SCOPE_AND_STAGE_EXT( 1, _TPROF, P_INTER_MVD_SEARCH_B, &cs, partitioner.chType );
1179
0
      bool doBiPred = true;
1180
0
      cMvBi[0] = cMv[0];
1181
0
      cMvBi[1] = cMv[1];
1182
0
      iRefIdxBi[0] = iRefIdx[0];
1183
0
      iRefIdxBi[1] = iRefIdx[1];
1184
1185
0
      ::memcpy( cMvPredBi,   cMvPred,   sizeof( cMvPred   ) );
1186
0
      ::memcpy( aaiMvpIdxBi, aaiMvpIdx, sizeof( aaiMvpIdx ) );
1187
1188
0
      uint32_t uiMotBits[2];
1189
1190
0
      if(cs.picHeader->mvdL1Zero)
1191
0
      {
1192
        // case: no mvd for L1
1193
        // note: mv = mvp + mvd
1194
        // mv for L1 is equal to mvp(L1) and the mvd search is only performed for L0
1195
0
        xCopyAMVPInfo(&aacAMVPInfo[1][bestBiPRefIdxL1], &amvp[REF_PIC_LIST_1]);
1196
0
        aaiMvpIdxBi[1][bestBiPRefIdxL1] = bestBiPMvpL1;
1197
0
        cMvPredBi  [1][bestBiPRefIdxL1] = amvp[REF_PIC_LIST_1].mvCand[bestBiPMvpL1];
1198
0
        if( m_pcEncCfg->m_ifpLines && !CU::isMvInRangeFPP( cu.ly(), cu.lheight(), cMvPredBi[1][bestBiPRefIdxL1].ver, m_pcEncCfg->m_ifpLines, *cu.cs->pcv ) )
1199
0
        {
1200
          // this mvp cannot be used for mv, skip Bi-pred
1201
0
          uiCostBi = std::numeric_limits<Distortion>::max();
1202
0
          doBiPred = false;
1203
0
        }
1204
1205
0
        if( doBiPred )
1206
0
        {
1207
0
          cMvBi[1] = cMvPredBi[1][bestBiPRefIdxL1];
1208
0
          iRefIdxBi[1] = bestBiPRefIdxL1;
1209
0
          cu.mv[REF_PIC_LIST_1][0] = cMvBi[1];
1210
0
          cu.refIdx[REF_PIC_LIST_1] = iRefIdxBi[1];
1211
0
          cu.mvpIdx[REF_PIC_LIST_1] = bestBiPMvpL1;
1212
0
          PelUnitBuf predBufTmp = m_tmpPredStorage[REF_PIC_LIST_1].getCompactBuf( cu );
1213
0
          motionCompensation( cu, predBufTmp, REF_PIC_LIST_1 );
1214
1215
0
          uiMotBits[0] = uiBits[0] - uiMbBits[0];
1216
0
          uiMotBits[1] = uiMbBits[1];
1217
1218
0
          if(cs.slice->numRefIdx[REF_PIC_LIST_1] > 1)
1219
0
          {
1220
0
            uiMotBits[1] += bestBiPRefIdxL1 + 1;
1221
0
            if(bestBiPRefIdxL1 == cs.slice->numRefIdx[REF_PIC_LIST_1] - 1)
1222
0
            {
1223
0
              uiMotBits[1]--;
1224
0
            }
1225
0
          }
1226
1227
0
          uiMotBits[1] += m_auiMVPIdxCost[aaiMvpIdxBi[1][bestBiPRefIdxL1]][AMVP_MAX_NUM_CANDS];
1228
1229
0
          uiBits[2] = uiMbBits[2] + uiMotBits[0] + uiMotBits[1];
1230
1231
0
          cMvTemp[1][bestBiPRefIdxL1] = cMvBi[1];
1232
0
        }
1233
0
      }
1234
0
      else
1235
0
      {
1236
0
        uiMotBits[0] = uiBits[0] - uiMbBits[0];
1237
0
        uiMotBits[1] = uiBits[1] - uiMbBits[1];
1238
0
        uiBits[2] = uiMbBits[2] + uiMotBits[0] + uiMotBits[1];
1239
0
      }
1240
1241
0
      if( doBiPred )
1242
0
      {
1243
        // 4-times iteration (default)
1244
0
        int iNumIter = 4;
1245
1246
        // fast encoder setting: only one iteration
1247
0
        if ( m_pcEncCfg->m_fastInterSearchMode==VVENC_FASTINTERSEARCH_MODE3 || m_pcEncCfg->m_fastInterSearchMode==VVENC_FASTINTERSEARCH_MODE2 || cs.picHeader->mvdL1Zero )
1248
0
        {
1249
0
          iNumIter = 1;
1250
0
        }
1251
1252
0
        enforceBcwPred = (BcwIdx != BCW_DEFAULT);
1253
1254
0
        for ( int iIter = 0; iIter < iNumIter; iIter++ )
1255
0
        {
1256
0
          int         iRefList    = iIter % 2;
1257
1258
0
          if ( m_pcEncCfg->m_fastInterSearchMode==VVENC_FASTINTERSEARCH_MODE3 || m_pcEncCfg->m_fastInterSearchMode==VVENC_FASTINTERSEARCH_MODE2 )
1259
0
          {
1260
0
            if( uiCost[0] <= uiCost[1] )
1261
0
            {
1262
0
              iRefList = 1;
1263
0
            }
1264
0
            else
1265
0
            {
1266
0
              iRefList = 0;
1267
0
            }
1268
0
          }
1269
0
          else if ( iIter == 0 )
1270
0
          {
1271
0
            iRefList = 0;
1272
0
          }
1273
0
          if ( iIter == 0 && !cs.picHeader->mvdL1Zero)
1274
0
          {
1275
0
            cu.mv    [1 - iRefList][0] = cMv    [1 - iRefList];
1276
0
            cu.refIdx[1 - iRefList]    = iRefIdx[1 - iRefList];
1277
1278
0
            PelUnitBuf predBufTmp = m_tmpPredStorage[1 - iRefList].getCompactBuf( cu );
1279
0
            motionCompensation( cu, predBufTmp, RefPicList(1 - iRefList) );
1280
0
          }
1281
1282
0
          RefPicList  refPicList = ( iRefList ? REF_PIC_LIST_1 : REF_PIC_LIST_0 );
1283
1284
0
          if(cs.picHeader->mvdL1Zero)
1285
0
          {
1286
0
            iRefList = 0;
1287
0
            refPicList = REF_PIC_LIST_0;
1288
0
          }
1289
1290
0
          bool bChanged = false;
1291
1292
0
          iRefStart = 0;
1293
0
          iRefEnd   = cs.slice->numRefIdx[ refPicList ]-1;
1294
0
          for (int iRefIdxTemp = iRefStart; iRefIdxTemp <= iRefEnd; iRefIdxTemp++)
1295
0
          {
1296
0
            uiBitsTemp = uiMbBits[2] + uiMotBits[1-iRefList];
1297
0
            uiBitsTemp += ( (cs.slice->sps->BCW == true) ? getWeightIdxBits(BcwIdx) : 0 );
1298
0
            if ( cs.slice->numRefIdx[ refPicList ] > 1 )
1299
0
            {
1300
0
              uiBitsTemp += iRefIdxTemp+1;
1301
0
              if ( iRefIdxTemp == cs.slice->numRefIdx[ refPicList ]-1 )
1302
0
              {
1303
0
                uiBitsTemp--;
1304
0
              }
1305
0
            }
1306
0
            uiBitsTemp += m_auiMVPIdxCost[aaiMvpIdxBi[iRefList][iRefIdxTemp]][AMVP_MAX_NUM_CANDS];
1307
0
            if ( cs.slice->biDirPred )
1308
0
            {
1309
0
              uiBitsTemp += 1; // add one bit for symmetrical MVD mode
1310
0
            }
1311
            // call ME
1312
0
            xCopyAMVPInfo(&aacAMVPInfo[iRefList][iRefIdxTemp], &amvp[refPicList] );
1313
0
            xMotionEstimation ( cu, origBuf, refPicList, cMvPredBi[iRefList][iRefIdxTemp], iRefIdxTemp, cMvTemp[iRefList][iRefIdxTemp], aaiMvpIdxBi[iRefList][iRefIdxTemp], uiBitsTemp, uiCostTemp, amvp[refPicList], true );
1314
0
            xCheckBestMVP( refPicList, cMvTemp[iRefList][iRefIdxTemp], cMvPredBi[iRefList][iRefIdxTemp], aaiMvpIdxBi[iRefList][iRefIdxTemp], amvp[refPicList], uiBitsTemp, uiCostTemp, cu.imv);
1315
0
            if ( uiCostTemp < uiCostBi )
1316
0
            {
1317
0
              bChanged = true;
1318
1319
0
              cMvBi[iRefList]     = cMvTemp[iRefList][iRefIdxTemp];
1320
0
              iRefIdxBi[iRefList] = iRefIdxTemp;
1321
1322
0
              uiCostBi            = uiCostTemp;
1323
0
              uiMotBits[iRefList] = uiBitsTemp - uiMbBits[2] - uiMotBits[1-iRefList];
1324
0
              uiMotBits[iRefList] -= ( (cs.slice->sps->BCW == true) ? getWeightIdxBits(BcwIdx) : 0 );
1325
0
              uiBits[2]           = uiBitsTemp;
1326
1327
0
              if(iNumIter!=1)
1328
0
              {
1329
                //  Set motion
1330
0
                cu.mv    [refPicList][0] = cMvBi    [iRefList];
1331
0
                cu.refIdx[refPicList]    = iRefIdxBi[iRefList];
1332
1333
0
                PelUnitBuf predBufTmp = m_tmpPredStorage[iRefList].getCompactBuf( cu );
1334
0
                motionCompensation( cu, predBufTmp, refPicList );
1335
0
              }
1336
0
            }
1337
0
          } // for loop-iRefIdxTemp
1338
1339
0
          if( !bChanged )
1340
0
          {
1341
0
            if ((uiCostBi <= uiCost[0] && uiCostBi <= uiCost[1]) || enforceBcwPred)
1342
0
            {
1343
0
              xCopyAMVPInfo(&aacAMVPInfo[0][iRefIdxBi[0]], &amvp[REF_PIC_LIST_0]);
1344
0
              xCheckBestMVP( REF_PIC_LIST_0, cMvBi[0], cMvPredBi[0][iRefIdxBi[0]], aaiMvpIdxBi[0][iRefIdxBi[0]], amvp[REF_PIC_LIST_0], uiBits[2], uiCostBi, cu.imv);
1345
0
              if(!cs.picHeader->mvdL1Zero)
1346
0
              {
1347
0
                xCopyAMVPInfo(&aacAMVPInfo[1][iRefIdxBi[1]], &amvp[REF_PIC_LIST_1]);
1348
0
                xCheckBestMVP( REF_PIC_LIST_1, cMvBi[1], cMvPredBi[1][iRefIdxBi[1]], aaiMvpIdxBi[1][iRefIdxBi[1]], amvp[REF_PIC_LIST_1], uiBits[2], uiCostBi, cu.imv);
1349
0
              }
1350
0
            }
1351
0
            break;
1352
0
          }
1353
0
        } // for loop-iter
1354
0
      }
1355
1356
      // SMVD
1357
0
      if( cs.slice->biDirPred )
1358
0
      {
1359
0
        double th1 = 1.02;
1360
0
        bool testSME = true;
1361
0
        int numStartCand = m_pcEncCfg->m_SMVD > 1 ? 1 : 5;
1362
0
        Distortion symCost;
1363
0
        Mv cMvPredSym[2];
1364
0
        int mvpIdxSym[2];
1365
1366
0
        int curRefList = REF_PIC_LIST_0;
1367
0
        int tarRefList = 1 - curRefList;
1368
0
        RefPicList eCurRefList = (curRefList ? REF_PIC_LIST_1 : REF_PIC_LIST_0);
1369
0
        int refIdxCur = cs.slice->symRefIdx[ curRefList ];
1370
0
        int refIdxTar = cs.slice->symRefIdx[ tarRefList ];
1371
0
        if( aacAMVPInfo[ curRefList ][ refIdxCur ].mvCand[ 0 ] == aacAMVPInfo[ curRefList ][ refIdxCur ].mvCand[ 1 ] )
1372
0
        {
1373
0
          aacAMVPInfo[ curRefList ][ refIdxCur ].numCand = 1;
1374
0
        }
1375
0
        if( aacAMVPInfo[ tarRefList ][ refIdxTar ].mvCand[ 0 ] == aacAMVPInfo[ tarRefList ][ refIdxTar ].mvCand[ 1 ] )
1376
0
        {
1377
0
          aacAMVPInfo[ tarRefList ][ refIdxTar ].numCand = 1;
1378
0
        }
1379
1380
0
        MvField cCurMvField, cTarMvField;
1381
0
        Distortion costStart = MAX_DISTORTION;
1382
0
        for ( int i = 0; i < aacAMVPInfo[curRefList][refIdxCur].numCand; i++ )
1383
0
        {
1384
0
          for ( int j = 0; j < aacAMVPInfo[tarRefList][refIdxTar].numCand; j++ )
1385
0
          {
1386
            GCC_WARNING_DISABLE_array_bounds // probably a bug in gcc-10 static analyzer: It thinks the indices are -1 and therefore triggers -Werror=array-bounds
1387
0
            cCurMvField.setMvField( aacAMVPInfo[curRefList][refIdxCur].mvCand[i], refIdxCur );
1388
0
            cTarMvField.setMvField( aacAMVPInfo[tarRefList][refIdxTar].mvCand[j], refIdxTar );
1389
0
            GCC_WARNING_RESET
1390
0
            if( m_pcEncCfg->m_ifpLines )
1391
0
            {
1392
0
              xCheckAndClipMvToFppLine( cCurMvField.mv, cu.ly(), cu.lheight(), m_pcEncCfg->m_ifpLines, *cu.cs->pcv );
1393
0
              xCheckAndClipMvToFppLine( cTarMvField.mv, cu.ly(), cu.lheight(), m_pcEncCfg->m_ifpLines, *cu.cs->pcv );
1394
0
            }
1395
0
            Distortion cost = xGetSymCost( cu, origBuf, eCurRefList, cCurMvField, cTarMvField, BcwIdx );
1396
0
            if ( cost < costStart )
1397
0
            {
1398
0
              costStart = cost;
1399
0
              cMvPredSym[curRefList] = aacAMVPInfo[curRefList][refIdxCur].mvCand[i];
1400
0
              cMvPredSym[tarRefList] = aacAMVPInfo[tarRefList][refIdxTar].mvCand[j];
1401
0
              mvpIdxSym[curRefList] = i;
1402
0
              mvpIdxSym[tarRefList] = j;
1403
0
            }
1404
0
          }
1405
0
        }
1406
0
        cCurMvField.mv = cMvPredSym[curRefList];
1407
0
        cTarMvField.mv = cMvPredSym[tarRefList];
1408
1409
0
        m_pcRdCost->setCostScale(0);
1410
0
        Mv pred = cMvPredSym[curRefList];
1411
0
        pred.changeTransPrecInternal2Amvr(cu.imv);
1412
0
        m_pcRdCost->setPredictor(pred);
1413
0
        Mv mv = cCurMvField.mv;
1414
0
        mv.changeTransPrecInternal2Amvr(cu.imv);
1415
0
        uint32_t bits = m_pcRdCost->getBitsOfVectorWithPredictor(mv.hor, mv.ver, 0);
1416
0
        bits += m_auiMVPIdxCost[mvpIdxSym[curRefList]][AMVP_MAX_NUM_CANDS];
1417
0
        bits += m_auiMVPIdxCost[mvpIdxSym[tarRefList]][AMVP_MAX_NUM_CANDS];
1418
0
        costStart += m_pcRdCost->getCost(bits);
1419
1420
0
        std::vector<Mv> symmvdCands;
1421
0
        auto smmvdCandsGen = [&](Mv mvCand, bool mvPrecAdj)
1422
0
        {
1423
0
          if (mvPrecAdj && cu.imv)
1424
0
          {
1425
0
            mvCand.roundTransPrecInternal2Amvr(cu.imv);
1426
0
          }
1427
1428
0
          bool toAddMvCand = true;
1429
0
          for (std::vector<Mv>::iterator pos = symmvdCands.begin(); pos != symmvdCands.end(); pos++)
1430
0
          {
1431
0
            if (*pos == mvCand)
1432
0
            {
1433
0
              toAddMvCand = false;
1434
0
              break;
1435
0
            }
1436
0
          }
1437
1438
0
          if (toAddMvCand)
1439
0
          {
1440
0
            symmvdCands.push_back(mvCand);
1441
0
          }
1442
0
        };
1443
1444
0
        smmvdCandsGen(cMvHevcTemp[curRefList][refIdxCur], false);
1445
0
        smmvdCandsGen(cMvTemp[curRefList][refIdxCur], false);
1446
0
        if (iRefIdxBi[curRefList] == refIdxCur)
1447
0
        {
1448
0
          smmvdCandsGen(cMvBi[curRefList], false);
1449
0
        }
1450
0
        for (int i = 0; i < m_BlkUniMvInfoBuffer->m_uniMvListSize; i++)
1451
0
        {
1452
0
          if( symmvdCands.size() >= numStartCand )
1453
0
          {
1454
0
            break;
1455
0
          }
1456
0
          BlkUniMvInfo* curMvInfo = m_BlkUniMvInfoBuffer->getBlkUniMvInfo(i);
1457
0
          smmvdCandsGen(curMvInfo->uniMvs[curRefList][refIdxCur], true);
1458
0
        }
1459
1460
0
        for (auto mvStart : symmvdCands)
1461
0
        {
1462
0
          bool checked = false; //if it has been checkin in the mvPred.
1463
0
          for (int i = 0; i < aacAMVPInfo[curRefList][refIdxCur].numCand && !checked; i++)
1464
0
          {
1465
0
            checked |= (mvStart == aacAMVPInfo[curRefList][refIdxCur].mvCand[i]);
1466
0
          }
1467
0
          if (checked)
1468
0
          {
1469
0
            continue;
1470
0
          }
1471
1472
0
          Distortion bestCost = costStart;
1473
0
          xSymMvdCheckBestMvp(cu, origBuf, mvStart, (RefPicList)curRefList, aacAMVPInfo, BcwIdx, cMvPredSym, mvpIdxSym, costStart, false);
1474
0
          if (costStart < bestCost)
1475
0
          {
1476
0
            cCurMvField.setMvField(mvStart, refIdxCur);
1477
0
            cTarMvField.setMvField(mvStart.getSymmvdMv(cMvPredSym[curRefList], cMvPredSym[tarRefList]), refIdxTar);
1478
0
          }
1479
0
        }
1480
0
        Mv startPtMv = cCurMvField.mv;
1481
1482
0
        Distortion mvpCost = m_pcRdCost->getCost(m_auiMVPIdxCost[mvpIdxSym[curRefList]][AMVP_MAX_NUM_CANDS] + m_auiMVPIdxCost[mvpIdxSym[tarRefList]][AMVP_MAX_NUM_CANDS]);
1483
0
        symCost = costStart - mvpCost;
1484
1485
        // ME
1486
0
        testSME = m_pcEncCfg->m_SMVD <= 2 || ( symCost < uiCostBi * th1 && uiCostBi < uiCost[ 0 ] && uiCostBi < uiCost[ 1 ] );
1487
0
        if( testSME )
1488
0
        {
1489
0
          xSymMotionEstimation( cu, origBuf, cMvPredSym[ curRefList ], cMvPredSym[ tarRefList ], eCurRefList, cCurMvField, cTarMvField, symCost, BcwIdx );
1490
0
        }
1491
1492
0
        symCost += mvpCost;
1493
1494
0
        if (startPtMv != cCurMvField.mv)
1495
0
        { // if ME change MV, run a final check for best MVP.
1496
0
          xSymMvdCheckBestMvp(cu, origBuf, cCurMvField.mv, (RefPicList)curRefList, aacAMVPInfo, BcwIdx, cMvPredSym, mvpIdxSym, symCost, true);
1497
0
        }
1498
1499
0
        bits = uiMbBits[2];
1500
0
        bits += 1; // add one bit for #symmetrical MVD mode
1501
0
        bits += ( (cs.slice->sps->BCW == true) ? getWeightIdxBits(BcwIdx) : 0 );
1502
0
        symCost += m_pcRdCost->getCost(bits);
1503
0
        cTarMvField.setMvField(cCurMvField.mv.getSymmvdMv(cMvPredSym[curRefList], cMvPredSym[tarRefList]), refIdxTar);
1504
1505
        // save results
1506
0
        if ( symCost < uiCostBi  
1507
0
          && ( !m_pcEncCfg->m_ifpLines || 
1508
0
          ( CU::isMvInRangeFPP( cu.ly(), cu.lheight(), cCurMvField.mv.ver, m_pcEncCfg->m_ifpLines, *cu.cs->pcv ) &&
1509
0
            CU::isMvInRangeFPP( cu.ly(), cu.lheight(), cTarMvField.mv.ver, m_pcEncCfg->m_ifpLines, *cu.cs->pcv ) ) )          
1510
0
          )
1511
0
        {
1512
0
          uiCostBi = symCost;
1513
0
          symMode = 1 + curRefList;
1514
1515
0
          cMvBi[curRefList] = cCurMvField.mv;
1516
0
          iRefIdxBi[curRefList] = cCurMvField.refIdx;
1517
0
          aaiMvpIdxBi[curRefList][cCurMvField.refIdx] = mvpIdxSym[curRefList];
1518
0
          cMvPredBi[curRefList][iRefIdxBi[curRefList]] = cMvPredSym[curRefList];
1519
1520
0
          cMvBi[tarRefList] = cTarMvField.mv;
1521
0
          iRefIdxBi[tarRefList] = cTarMvField.refIdx;
1522
0
          aaiMvpIdxBi[tarRefList][cTarMvField.refIdx] = mvpIdxSym[tarRefList];
1523
0
          cMvPredBi[tarRefList][iRefIdxBi[tarRefList]] = cMvPredSym[tarRefList];
1524
0
        }
1525
0
      }
1526
0
    } // if (B_SLICE)
1527
1528
      //  Clear Motion Field
1529
0
    cu.mv [REF_PIC_LIST_0][0] = Mv();
1530
0
    cu.mv [REF_PIC_LIST_1][0] = Mv();
1531
0
    cu.mvd[REF_PIC_LIST_0][0] = cMvZero;
1532
0
    cu.mvd[REF_PIC_LIST_1][0] = cMvZero;
1533
0
    cu.refIdx[REF_PIC_LIST_0] = NOT_VALID;
1534
0
    cu.refIdx[REF_PIC_LIST_1] = NOT_VALID;
1535
0
    cu.mvpIdx[REF_PIC_LIST_0] = NOT_VALID;
1536
0
    cu.mvpIdx[REF_PIC_LIST_1] = NOT_VALID;
1537
0
    cu.mvpNum[REF_PIC_LIST_0] = NOT_VALID;
1538
0
    cu.mvpNum[REF_PIC_LIST_1] = NOT_VALID;
1539
1540
    // Set Motion Field
1541
0
    cMv    [1] = mvValidList1;
1542
0
    iRefIdx[1] = refIdxValidList1;
1543
0
    uiBits [1] = bitsValidList1;
1544
0
    uiCost [1] = costValidList1;
1545
0
    if( enforceBcwPred )
1546
0
    {
1547
0
      uiCost[0] = uiCost[1] = MAX_UINT;
1548
0
    }
1549
1550
0
    uiLastModeTemp = uiLastMode;
1551
0
    if ( uiCostBi <= uiCost[0] && uiCostBi <= uiCost[1])
1552
0
    {
1553
0
      bestCostInter = uiCostBi;
1554
0
      uiLastMode = 2;
1555
0
      cu.mv [REF_PIC_LIST_0][0] = cMvBi[0];
1556
0
      cu.mv [REF_PIC_LIST_1][0] = cMvBi[1];
1557
0
      cu.mvd[REF_PIC_LIST_0][0] = cMvBi[0] - cMvPredBi[0][iRefIdxBi[0]];
1558
0
      cu.mvd[REF_PIC_LIST_1][0] = cMvBi[1] - cMvPredBi[1][iRefIdxBi[1]];
1559
0
      cu.refIdx[REF_PIC_LIST_0] = iRefIdxBi[0];
1560
0
      cu.refIdx[REF_PIC_LIST_1] = iRefIdxBi[1];
1561
0
      cu.mvpIdx[REF_PIC_LIST_0] = aaiMvpIdxBi[0][iRefIdxBi[0]];
1562
0
      cu.mvpIdx[REF_PIC_LIST_1] = aaiMvpIdxBi[1][iRefIdxBi[1]];
1563
0
      cu.mvpNum[REF_PIC_LIST_0] = aaiMvpNum[0][iRefIdxBi[0]];
1564
0
      cu.mvpNum[REF_PIC_LIST_1] = aaiMvpNum[1][iRefIdxBi[1]];
1565
0
      cu.interDir = 3;
1566
1567
0
      cu.smvdMode = symMode;
1568
0
    }
1569
0
    else if ( uiCost[0] <= uiCost[1] )
1570
0
    {
1571
0
      bestCostInter = uiCost[0];
1572
0
      uiLastMode = 0;
1573
0
      cu.mv [REF_PIC_LIST_0][0] = cMv[0];
1574
0
      cu.mvd[REF_PIC_LIST_0][0] = cMv[0] - cMvPred[0][iRefIdx[0]];
1575
0
      cu.refIdx[REF_PIC_LIST_0] = iRefIdx[0];
1576
0
      cu.mvpIdx[REF_PIC_LIST_0] = aaiMvpIdx[0][iRefIdx[0]];
1577
0
      cu.mvpNum[REF_PIC_LIST_0] = aaiMvpNum[0][iRefIdx[0]];
1578
0
      cu.interDir = 1;
1579
0
    }
1580
0
    else
1581
0
    {
1582
0
      bestCostInter = uiCost[1];
1583
0
      uiLastMode = 1;
1584
0
      cu.mv [REF_PIC_LIST_1][0] = cMv[1];
1585
0
      cu.mvd[REF_PIC_LIST_1][0] = cMv[1] - cMvPred[1][iRefIdx[1]];
1586
0
      cu.refIdx[REF_PIC_LIST_1] = iRefIdx[1];
1587
0
      cu.mvpIdx[REF_PIC_LIST_1] = aaiMvpIdx[1][iRefIdx[1]];
1588
0
      cu.mvpNum[REF_PIC_LIST_1] = aaiMvpNum[1][iRefIdx[1]];
1589
0
      cu.interDir = 2;
1590
0
    }
1591
1592
0
    if( BcwIdx != BCW_DEFAULT )
1593
0
    {
1594
0
      cu.BcwIdx = BCW_DEFAULT; // Reset to default for the Non-NormalMC modes.
1595
0
    }
1596
0
    uiHevcCost = (uiCostBi <= uiCost[0] && uiCostBi <= uiCost[1]) ? uiCostBi : ((uiCost[0] <= uiCost[1]) ? uiCost[0] : uiCost[1]);
1597
0
    if (m_pcEncCfg->m_Affine > 2)
1598
0
    {
1599
0
      if (cu.slice->TLayer > 3)
1600
0
      {
1601
0
        checkAffine = false;
1602
0
      }
1603
0
      else
1604
0
      {
1605
0
        if( m_pcEncCfg->m_Affine >= 4 && cu.slice->TLayer >= 2 )
1606
0
        {
1607
0
          checkAffine = m_modeCtrl->comprCUCtx->bestCU ? (checkAffine && m_modeCtrl->comprCUCtx->bestCU->affine) : checkAffine;
1608
0
        }
1609
0
      }
1610
0
    }
1611
0
    if( checkAffine && cu.Y().width > 8 && cu.Y().height > 8 && m_pcEncCfg->m_Affine > 0 )
1612
0
    {
1613
      // Based on:
1614
      // H. Pejman*, S. Coulombe*, C. Vazquez*, M. Jamali° and A. Vakili°
1615
      // *École de technologie supérieure, °Summit Tech Multimedia
1616
      // "An Adjustable Fast Decision Method for Affine Motion Estimation in VVC,"
1617
      // ICIP, Kuala Lumpur, Malaysia, 2023, pp. 2695-2699, doi: 10.1109/ICIP49359.2023.10222750.
1618
      // https://ieeexplore.ieee.org/document/10222750
1619
1620
0
      static const double affine_thr_coffs[3] = { 2.534229853866437, 0.05173246 ,0.87650414 };
1621
0
      static const double affine_thr_param[5] = { 1, 1, 1, 1.3, 2.3 }; // TODO: Adapt if extending m_Affine range!
1622
0
      const int qp         = cu.qp;
1623
0
      const int blk_area   = cu.Y().area();
1624
0
      const double threshold  = affine_thr_param[m_pcEncCfg->m_Affine - 1];
1625
1626
      //Multiple linear regression (MLR):
1627
      //Y = b0 + b1*(QP) + b2*(LOG2(BLK_AREA))
1628
0
      double log_affine_thr =
1629
0
        affine_thr_coffs[0] +
1630
0
        qp * affine_thr_coffs[1] +
1631
0
        log2(blk_area) * affine_thr_coffs[2];
1632
1633
      //log_affine_thr is LOG 2 of estimated thr
1634
0
      double affine_thr = pow(2, log_affine_thr) * threshold;
1635
1636
0
      double scaled_uiHevcCost = (double)uiHevcCost;
1637
1638
      //The trained coefficients are based on the cost of internal 10 BitDepth. So, the cost should be scaled if the internal BitDepth is not 10.
1639
0
      if (m_pcEncCfg->m_internalBitDepth[0] !=10)
1640
0
      {
1641
        //Based on the CTC documnet to convert 8 bit to 10 bit video or vice versa, the VTM only multiply (8 to 10 bits) or divide (10 to 8 bits) pixel values to 4.
1642
        //In this case, the cost values are approximately scaled by 4.
1643
        //The trained data acquired from internal 10 bit data. So, if internal bit depth is 8, the conversion into 10-bit cost can be done as follows:
1644
0
        scaled_uiHevcCost = uiHevcCost * (pow(2.0, 10-m_pcEncCfg->m_internalBitDepth[0]));
1645
0
      }
1646
0
      if( scaled_uiHevcCost < affine_thr )
1647
0
      {
1648
0
        checkAffine = false;
1649
0
      }
1650
0
    }
1651
0
    if (cu.Y().width > 8 && cu.Y().height > 8 && cu.slice->sps->Affine && checkAffine)
1652
0
    {
1653
0
      PROFILER_SCOPE_AND_STAGE_EXT( 1, _TPROF, P_INTER_MVD_SEARCH_AFFINE, &cs, partitioner.chType );
1654
0
      m_hevcCost = uiHevcCost;
1655
      // save normal hevc result
1656
0
      uint32_t uiMRGIndex = cu.mergeIdx;
1657
0
      bool bMergeFlag = cu.mergeFlag;
1658
0
      uint32_t uiInterDir = cu.interDir;
1659
0
      int  iSymMode = cu.smvdMode;
1660
1661
0
      Mv cMvd[2];
1662
0
      uint32_t uiMvpIdx[2], uiMvpNum[2];
1663
0
      uiMvpIdx[0] = cu.mvpIdx[REF_PIC_LIST_0];
1664
0
      uiMvpIdx[1] = cu.mvpIdx[REF_PIC_LIST_1];
1665
0
      uiMvpNum[0] = cu.mvpNum[REF_PIC_LIST_0];
1666
0
      uiMvpNum[1] = cu.mvpNum[REF_PIC_LIST_1];
1667
0
      cMvd[0] = cu.mvd[REF_PIC_LIST_0][0];
1668
0
      cMvd[1] = cu.mvd[REF_PIC_LIST_1][0];
1669
1670
0
      MvField cHevcMvField[2];
1671
0
      cHevcMvField[0].setMvField(cu.mv[REF_PIC_LIST_0][0], cu.refIdx[REF_PIC_LIST_0]);
1672
0
      cHevcMvField[1].setMvField(cu.mv[REF_PIC_LIST_1][0], cu.refIdx[REF_PIC_LIST_1]);
1673
1674
      // do affine ME & Merge
1675
0
      cu.affineType = AFFINEMODEL_4PARAM;
1676
0
      Mv acMvAffine4Para[2][MAX_REF_PICS][3];
1677
0
      int refIdx4Para[2] = { -1, -1 };
1678
1679
0
      xPredAffineInterSearch(cu, origBuf, puIdx, uiLastModeTemp, uiAffineCost, cMvHevcTemp, acMvAffine4Para, refIdx4Para, BcwIdx, enforceBcwPred, (cs.slice->sps->BCW == true) ? getWeightIdxBits(BcwIdx) : 0 );
1680
1681
0
      if (cu.imv == IMV_OFF)
1682
0
      {
1683
0
        storeAffineMotion(cu.mv, cu.refIdx, AFFINEMODEL_4PARAM, BcwIdx);
1684
0
      }
1685
0
      if (cu.slice->sps->AffineType && uiAffineCost != MAX_DISTORTION)
1686
0
      {
1687
0
        if (uiAffineCost < uiHevcCost * 1.05) ///< condition for 6 parameter affine ME
1688
0
        {
1689
          // save 4 parameter results
1690
0
          Mv bestMv[2][3], bestMvd[2][3];
1691
0
          int bestMvpIdx[2], bestMvpNum[2], bestRefIdx[2];
1692
0
          uint8_t bestInterDir;
1693
1694
0
          bestInterDir = cu.interDir;
1695
0
          bestRefIdx[0] = cu.refIdx[0];
1696
0
          bestRefIdx[1] = cu.refIdx[1];
1697
0
          bestMvpIdx[0] = cu.mvpIdx[0];
1698
0
          bestMvpIdx[1] = cu.mvpIdx[1];
1699
0
          bestMvpNum[0] = cu.mvpNum[0];
1700
0
          bestMvpNum[1] = cu.mvpNum[1];
1701
1702
0
          for (int refList = 0; refList < 2; refList++)
1703
0
          {
1704
0
            bestMv[refList][0] = cu.mv[refList][0];
1705
0
            bestMv[refList][1] = cu.mv[refList][1];
1706
0
            bestMv[refList][2] = cu.mv[refList][2];
1707
0
            bestMvd[refList][0] = cu.mvd[refList][0];
1708
0
            bestMvd[refList][1] = cu.mvd[refList][1];
1709
0
            bestMvd[refList][2] = cu.mvd[refList][2];
1710
0
          }
1711
1712
0
          refIdx4Para[0] = bestRefIdx[0];
1713
0
          refIdx4Para[1] = bestRefIdx[1];
1714
1715
0
          Distortion uiAffine6Cost = MAX_DISTORTION;
1716
0
          cu.affineType = AFFINEMODEL_6PARAM;
1717
0
          xPredAffineInterSearch(cu, origBuf, puIdx, uiLastModeTemp, uiAffine6Cost, cMvHevcTemp, acMvAffine4Para, refIdx4Para, BcwIdx, enforceBcwPred, (cs.slice->sps->BCW == true) ? getWeightIdxBits(BcwIdx) : 0 );
1718
1719
0
          if (cu.imv == IMV_OFF)
1720
0
          {
1721
0
            storeAffineMotion(cu.mv, cu.refIdx, AFFINEMODEL_6PARAM, BcwIdx);
1722
0
          }
1723
1724
          // reset to 4 parameter affine inter mode
1725
0
          if (uiAffineCost <= uiAffine6Cost)
1726
0
          {
1727
0
            cu.affineType = AFFINEMODEL_4PARAM;
1728
0
            cu.interDir = bestInterDir;
1729
0
            cu.refIdx[0] = bestRefIdx[0];
1730
0
            cu.refIdx[1] = bestRefIdx[1];
1731
0
            cu.mvpIdx[0] = bestMvpIdx[0];
1732
0
            cu.mvpIdx[1] = bestMvpIdx[1];
1733
0
            cu.mvpNum[0] = bestMvpNum[0];
1734
0
            cu.mvpNum[1] = bestMvpNum[1];
1735
1736
0
            for (int verIdx = 0; verIdx < 3; verIdx++)
1737
0
            {
1738
0
              cu.mvd[REF_PIC_LIST_0][verIdx] = bestMvd[0][verIdx];
1739
0
              cu.mvd[REF_PIC_LIST_1][verIdx] = bestMvd[1][verIdx];
1740
0
            }
1741
1742
0
            CU::setAllAffineMv(cu, bestMv[0][0], bestMv[0][1], bestMv[0][2], REF_PIC_LIST_0);
1743
0
            CU::setAllAffineMv(cu, bestMv[1][0], bestMv[1][1], bestMv[1][2], REF_PIC_LIST_1);
1744
0
          }
1745
0
          else
1746
0
          {
1747
0
            uiAffineCost = uiAffine6Cost;
1748
0
          }
1749
0
        }
1750
1751
0
        uiAffineCost += m_pcRdCost->getCost(1); // add one bit for affine_type
1752
0
      }
1753
1754
0
      if (uiHevcCost <= uiAffineCost)
1755
0
      {
1756
        // set hevc me result
1757
0
        cu.affine = false;
1758
0
        cu.mergeFlag = bMergeFlag;
1759
0
        cu.mergeIdx = uiMRGIndex;
1760
0
        cu.interDir = uiInterDir;
1761
0
        cu.smvdMode = iSymMode;
1762
0
        cu.mv[REF_PIC_LIST_0][0]  = cHevcMvField[0].mv;
1763
0
        cu.refIdx[REF_PIC_LIST_0] = cHevcMvField[0].refIdx;
1764
0
        cu.mv[REF_PIC_LIST_1][0]  = cHevcMvField[1].mv;
1765
0
        cu.refIdx[REF_PIC_LIST_1] = cHevcMvField[1].refIdx;
1766
0
        cu.mvpIdx[REF_PIC_LIST_0] = uiMvpIdx[0];
1767
0
        cu.mvpIdx[REF_PIC_LIST_1] = uiMvpIdx[1];
1768
0
        cu.mvpNum[REF_PIC_LIST_0] = uiMvpNum[0];
1769
0
        cu.mvpNum[REF_PIC_LIST_1] = uiMvpNum[1];
1770
0
        cu.mvd[REF_PIC_LIST_0][0] = cMvd[0];
1771
0
        cu.mvd[REF_PIC_LIST_1][0] = cMvd[1];
1772
0
      }
1773
0
      else
1774
0
      {
1775
0
        cu.smvdMode = 0;
1776
0
        CHECK(!cu.affine, "Wrong.");
1777
0
        uiLastMode = uiLastModeTemp;
1778
0
      }
1779
0
    }
1780
1781
0
    if( cu.interDir == 3 && !cu.mergeFlag )
1782
0
    {
1783
0
      if (BcwIdx != BCW_DEFAULT)
1784
0
      {
1785
0
        cu.BcwIdx = BcwIdx;
1786
0
      }
1787
0
    }
1788
1789
0
    CU::spanMotionInfo( cu );
1790
1791
0
    m_skipPROF = false;
1792
0
    m_encOnly  = false;
1793
    //  MC
1794
0
    PelUnitBuf predBuf = cu.cs->getPredBuf(cu);
1795
0
    motionCompensation( cu, predBuf, REF_PIC_LIST_X );
1796
0
    puIdx++;
1797
0
  }
1798
1799
0
  return false;
1800
0
}
1801
1802
// AMVP
1803
void InterSearch::xEstimateMvPredAMVP( CodingUnit& cu, CPelUnitBuf& origBuf, RefPicList refPicList, int iRefIdx, Mv& rcMvPred, AMVPInfo& rAMVPInfo, Distortion& distBiP )
1804
0
{
1805
0
  Mv         cBestMv;
1806
0
  int        iBestIdx   = 0;
1807
0
  Distortion uiBestCost = MAX_DISTORTION;
1808
0
  int        i;
1809
1810
0
  AMVPInfo*  pcAMVPInfo = &rAMVPInfo;
1811
1812
  // Fill the MV Candidates
1813
0
  CU::fillMvpCand( cu, refPicList, iRefIdx, *pcAMVPInfo );
1814
1815
  // initialize Mvp index & Mvp
1816
0
  iBestIdx = 0;
1817
0
  cBestMv  = pcAMVPInfo->mvCand[0];
1818
1819
0
  PelUnitBuf predBuf = m_tmpStorageLCU.getCompactBuf( cu );
1820
1821
  //-- Check Minimum Cost.
1822
0
  for( i = 0 ; i < pcAMVPInfo->numCand; i++)
1823
0
  {
1824
0
    Mv mvCand = pcAMVPInfo->mvCand[i];
1825
0
    if( m_pcEncCfg->m_ifpLines )
1826
0
      xClipMvSearch( mvCand, cu.lumaPos(), cu.lumaSize(),*cu.cs->pcv, true );
1827
1828
0
    Distortion uiTmpCost = xGetTemplateCost( cu, origBuf, predBuf, mvCand, i, AMVP_MAX_NUM_CANDS, refPicList, iRefIdx );
1829
0
    if( uiBestCost > uiTmpCost )
1830
0
    {
1831
0
      uiBestCost  = uiTmpCost;
1832
0
      cBestMv     = pcAMVPInfo->mvCand[i];
1833
0
      iBestIdx    = i;
1834
0
      distBiP     = uiTmpCost;
1835
0
    }
1836
0
  }
1837
1838
  // Setting Best MVP
1839
0
  rcMvPred = cBestMv;
1840
0
  cu.mvpIdx[refPicList] = iBestIdx;
1841
0
  cu.mvpNum[refPicList] = pcAMVPInfo->numCand;
1842
1843
0
  return;
1844
0
}
1845
1846
uint32_t InterSearch::xGetMvpIdxBits(int iIdx, int iNum)
1847
62.3k
{
1848
62.3k
  CHECK(iIdx < 0 || iNum < 0 || iIdx >= iNum, "Invalid parameters");
1849
1850
62.3k
  if (iNum == 1)
1851
20.7k
  {
1852
20.7k
    return 0;
1853
20.7k
  }
1854
1855
41.5k
  uint32_t uiLength = 1;
1856
41.5k
  int iTemp = iIdx;
1857
41.5k
  if ( iTemp == 0 )
1858
20.7k
  {
1859
20.7k
    return uiLength;
1860
20.7k
  }
1861
1862
20.7k
  bool bCodeLast = ( iNum-1 > iTemp );
1863
1864
20.7k
  uiLength += (iTemp-1);
1865
1866
20.7k
  if( bCodeLast )
1867
0
  {
1868
0
    uiLength++;
1869
0
  }
1870
1871
20.7k
  return uiLength;
1872
41.5k
}
1873
1874
void InterSearch::xGetBlkBits( bool bPSlice, int iPartIdx, uint32_t uiLastMode, uint32_t uiBlkBit[3])
1875
0
{
1876
0
  uiBlkBit[0] = (! bPSlice) ? 3 : 1;
1877
0
  uiBlkBit[1] = 3;
1878
0
  uiBlkBit[2] = 5;
1879
0
}
1880
1881
void InterSearch::xCopyAMVPInfo (AMVPInfo* pSrc, AMVPInfo* pDst)
1882
0
{
1883
0
  pDst->numCand = pSrc->numCand;
1884
0
  for (int i = 0; i < pSrc->numCand; i++)
1885
0
  {
1886
0
    pDst->mvCand[i] = pSrc->mvCand[i];
1887
0
  }
1888
0
}
1889
1890
void InterSearch::xCheckBestMVP ( RefPicList refPicList, const Mv& cMv, Mv& rcMvPred, int& riMVPIdx, AMVPInfo& amvpInfo, uint32_t& ruiBits, Distortion& ruiCost, const uint8_t imv )
1891
0
{
1892
0
  if ( imv > 0 && imv < 3 )
1893
0
  {
1894
0
    return;
1895
0
  }
1896
1897
0
  AMVPInfo* pcAMVPInfo = &amvpInfo;
1898
1899
0
  CHECK(pcAMVPInfo->mvCand[riMVPIdx] != rcMvPred, "Invalid MV prediction candidate");
1900
1901
0
  if (pcAMVPInfo->numCand < 2)
1902
0
  {
1903
0
    return;
1904
0
  }
1905
1906
0
  m_pcRdCost->setCostScale ( 0    );
1907
1908
0
  int iBestMVPIdx = riMVPIdx;
1909
1910
0
  Mv pred = rcMvPred;
1911
0
  pred.changeTransPrecInternal2Amvr(imv);
1912
0
  m_pcRdCost->setPredictor( pred );
1913
0
  Mv mv = cMv;
1914
0
  mv.changeTransPrecInternal2Amvr(imv);
1915
0
  int iOrgMvBits = m_pcRdCost->getBitsOfVectorWithPredictor(mv.hor, mv.ver, 0);
1916
0
  iOrgMvBits += m_auiMVPIdxCost[riMVPIdx][AMVP_MAX_NUM_CANDS];
1917
0
  int iBestMvBits = iOrgMvBits;
1918
1919
0
  for (int iMVPIdx = 0; iMVPIdx < pcAMVPInfo->numCand; iMVPIdx++)
1920
0
  {
1921
0
    if (iMVPIdx == riMVPIdx)
1922
0
    {
1923
0
      continue;
1924
0
    }
1925
1926
0
    pred = pcAMVPInfo->mvCand[iMVPIdx];
1927
0
    pred.changeTransPrecInternal2Amvr(imv);
1928
0
    m_pcRdCost->setPredictor( pred );
1929
0
    int iMvBits = m_pcRdCost->getBitsOfVectorWithPredictor(mv.hor, mv.ver, 0);
1930
0
    iMvBits += m_auiMVPIdxCost[iMVPIdx][AMVP_MAX_NUM_CANDS];
1931
1932
0
    if (iMvBits < iBestMvBits)
1933
0
    {
1934
0
      iBestMvBits = iMvBits;
1935
0
      iBestMVPIdx = iMVPIdx;
1936
0
    }
1937
0
  }
1938
1939
0
  if (iBestMVPIdx != riMVPIdx)  //if changed
1940
0
  {
1941
0
    rcMvPred = pcAMVPInfo->mvCand[iBestMVPIdx];
1942
1943
0
    riMVPIdx = iBestMVPIdx;
1944
0
    uint32_t uiOrgBits = ruiBits;
1945
0
    ruiBits = uiOrgBits - iOrgMvBits + iBestMvBits;
1946
0
    ruiCost = (ruiCost - m_pcRdCost->getCost( uiOrgBits ))  + m_pcRdCost->getCost( ruiBits );
1947
0
  }
1948
0
}
1949
1950
1951
Distortion InterSearch::xGetTemplateCost( const CodingUnit& cu,
1952
                                          CPelUnitBuf& origBuf,
1953
                                          PelUnitBuf&  predBuf,
1954
                                          Mv           cMvCand,
1955
                                          int          iMVPIdx,
1956
                                          int          iMVPNum,
1957
                                          RefPicList   refPicList,
1958
                                          int          iRefIdx
1959
)
1960
0
{
1961
0
  Distortion uiCost = MAX_DISTORTION;
1962
1963
0
  const Picture* picRef = cu.slice->getRefPic( refPicList, iRefIdx );
1964
0
  clipMv( cMvCand, cu.lumaPos(), cu.lumaSize(), *cu.cs->pcv );
1965
1966
  // prediction pattern
1967
0
  xPredInterBlk( COMP_Y, cu, picRef, cMvCand, predBuf, false, cu.slice->clpRngs[ COMP_Y ], false, false);
1968
1969
  // calc distortion
1970
1971
0
  uiCost = m_pcRdCost->getDistPart(origBuf.Y(), predBuf.Y(), cu.cs->sps->bitDepths[ CH_L ], COMP_Y, DF_SAD);
1972
0
  uiCost += m_pcRdCost->getCost( m_auiMVPIdxCost[iMVPIdx][iMVPNum] );
1973
1974
0
  return uiCost;
1975
0
}
1976
1977
void InterSearch::xMotionEstimation(CodingUnit& cu, CPelUnitBuf& origBuf, RefPicList refPicList, Mv& rcMvPred, int iRefIdxPred, Mv& rcMv, int& riMVPIdx, uint32_t& ruiBits, Distortion& ruiCost, const AMVPInfo& amvpInfo, bool bBi)
1978
0
{
1979
0
  if( cu.cs->sps->BCW && cu.BcwIdx != BCW_DEFAULT && !bBi && xReadBufferedUniMv( cu, refPicList, iRefIdxPred, rcMvPred, rcMv, ruiBits, ruiCost ) )
1980
0
  {
1981
0
    return;
1982
0
  }
1983
1984
0
  Mv cMvHalf, cMvQter;
1985
1986
0
  CHECK(refPicList >= MAX_NUM_REF_LIST_ADAPT_SR || iRefIdxPred>=int(MAX_IDX_ADAPT_SR), "Invalid reference picture list");
1987
0
  m_iSearchRange = m_aaiAdaptSR[refPicList][iRefIdxPred];
1988
1989
0
  int    iSrchRng   = (bBi ? m_bipredSearchRange : m_iSearchRange);
1990
0
  double fWeight    = 1.0;
1991
1992
0
  CPelUnitBuf  origBufTmpCnst;
1993
0
  CPelUnitBuf* pBuf      = &origBuf;
1994
1995
0
  if(bBi) // Bi-predictive ME
1996
0
  {
1997
0
    PelUnitBuf  origBufTmp = m_tmpStorageLCU.getCompactBuf( cu );
1998
    // NOTE: Other buf contains predicted signal from another direction
1999
0
    PelUnitBuf otherBuf = m_tmpPredStorage[1 - (int)refPicList].getCompactBuf( cu );
2000
0
    origBufTmp.copyFrom(origBuf);
2001
0
    origBufTmp.removeHighFreq( otherBuf, m_pcEncCfg->m_bClipForBiPredMeEnabled, cu.slice->clpRngs );
2002
   
2003
0
    origBufTmpCnst = origBufTmp;
2004
0
    pBuf           = &origBufTmpCnst;
2005
0
    fWeight        = xGetMEDistortionWeight( cu.BcwIdx, refPicList );
2006
0
  }
2007
2008
  //  Search key pattern initialization
2009
0
  CPelBuf  tmpPattern   = pBuf->Y();
2010
0
  CPelBuf* pcPatternKey = &tmpPattern;
2011
2012
0
  m_lumaClpRng = cu.cs->slice->clpRngs[ COMP_Y ];
2013
2014
0
  const Picture* refPic = cu.slice->getRefPic(refPicList, iRefIdxPred);
2015
0
  CPelBuf buf = refPic->getRecoBuf(cu.blocks[COMP_Y]);
2016
2017
0
  TZSearchStruct cStruct;
2018
0
  cStruct.pcPatternKey  = pcPatternKey;
2019
0
  cStruct.iRefStride    = buf.stride;
2020
0
  cStruct.piRefY        = buf.buf;
2021
0
  cStruct.imvShift      = cu.imv == IMV_HPEL ? 1 : (cu.imv << 1);
2022
0
  cStruct.useAltHpelIf  = cu.imv == IMV_HPEL;
2023
0
  cStruct.zeroMV        = false;
2024
0
  cStruct.uiBestSad     = MAX_DISTORTION;
2025
2026
2027
0
  CodedCUInfo &relatedCU = m_modeCtrl->getBlkInfo( cu );
2028
2029
0
  bool bQTBTMV = false;
2030
0
  Mv cIntMv;
2031
0
  if( !bBi )
2032
0
  {
2033
0
    bool bValid = relatedCU.getMv( refPicList, iRefIdxPred, cIntMv );
2034
0
    if( bValid )
2035
0
    {
2036
0
      bQTBTMV = true;
2037
0
      cIntMv.changePrecision( MV_PRECISION_INT, MV_PRECISION_INTERNAL);
2038
0
    }
2039
0
  }
2040
2041
0
  Mv predQuarter = rcMvPred;
2042
0
  predQuarter.changePrecision(MV_PRECISION_INTERNAL, MV_PRECISION_QUARTER);
2043
0
  m_pcRdCost->setPredictor( predQuarter );
2044
0
  m_pcRdCost->setCostScale(2);
2045
2046
  //  Do integer search
2047
0
  if( m_motionEstimationSearchMethod == VVENC_MESEARCH_FULL || bBi )
2048
0
  {
2049
0
    cStruct.subShiftMode = m_pcEncCfg->m_fastInterSearchMode == VVENC_FASTINTERSEARCH_MODE1 || m_pcEncCfg->m_fastInterSearchMode == VVENC_FASTINTERSEARCH_MODE3 ? 1 : 0;
2050
0
    m_pcRdCost->setDistParam( m_cDistParam, *cStruct.pcPatternKey, cStruct.piRefY, cStruct.iRefStride, m_lumaClpRng.bd, COMP_Y, cStruct.subShiftMode );
2051
2052
0
    Mv bestInitMv = (bBi ? rcMv : rcMvPred);
2053
0
    Mv cTmpMv     = bestInitMv;
2054
0
    xClipMvSearch(cTmpMv, cu.lumaPos(), cu.lumaSize(), *cu.cs->pcv, m_pcEncCfg->m_ifpLines );
2055
0
    cTmpMv.changePrecision(MV_PRECISION_INTERNAL, MV_PRECISION_INT);
2056
0
    m_cDistParam.cur.buf = cStruct.piRefY + (cTmpMv.ver * cStruct.iRefStride) + cTmpMv.hor;
2057
0
    Distortion uiBestSad = m_cDistParam.distFunc(m_cDistParam);
2058
0
    uiBestSad += m_pcRdCost->getCostOfVectorWithPredictor(cTmpMv.hor, cTmpMv.ver, cStruct.imvShift);
2059
2060
0
    Mv prevMv[BlkUniMvInfoBuffer::m_uniMvListMaxSize];
2061
2062
0
    for( int i = 0; i < m_BlkUniMvInfoBuffer->m_uniMvListSize; i++ )
2063
0
    {
2064
0
      const BlkUniMvInfo* curMvInfo = m_BlkUniMvInfoBuffer->getBlkUniMvInfo( i );
2065
0
      cTmpMv = curMvInfo->uniMvs[refPicList][iRefIdxPred];
2066
0
      prevMv[i] = cTmpMv;
2067
2068
0
      int j = 0;
2069
0
      for( ; j < i; j++ )
2070
0
      {
2071
0
        if( cTmpMv == prevMv[j] )
2072
0
        {
2073
0
          break;
2074
0
        }
2075
0
      }
2076
0
      if( j < i )
2077
0
        continue;
2078
2079
0
      xClipMvSearch(cTmpMv, cu.lumaPos(), cu.lumaSize(), *cu.cs->pcv, m_pcEncCfg->m_ifpLines);
2080
0
      cTmpMv.changePrecision(MV_PRECISION_INTERNAL, MV_PRECISION_INT);
2081
0
      m_cDistParam.cur.buf = cStruct.piRefY + (cTmpMv.ver * cStruct.iRefStride) + cTmpMv.hor;
2082
2083
0
      Distortion uiSad = m_cDistParam.distFunc(m_cDistParam);
2084
0
      uiSad += m_pcRdCost->getCostOfVectorWithPredictor(cTmpMv.hor, cTmpMv.ver, cStruct.imvShift);
2085
0
      if( uiSad < uiBestSad )
2086
0
      {
2087
0
        uiBestSad = uiSad;
2088
0
        bestInitMv = curMvInfo->uniMvs[refPicList][iRefIdxPred];
2089
0
        m_cDistParam.maximumDistortionForEarlyExit = uiSad;
2090
0
      }
2091
0
    }
2092
2093
0
    xSetSearchRange( cu, bestInitMv, iSrchRng, cStruct.searchRange );
2094
0
    xPatternSearch ( cStruct, rcMv, ruiCost);
2095
0
  }
2096
0
  else if( bQTBTMV )
2097
0
  {
2098
0
    rcMv = cIntMv;
2099
0
    cStruct.subShiftMode = ( m_pcEncCfg->m_fastInterSearchMode == VVENC_FASTINTERSEARCH_MODE1 || m_pcEncCfg->m_fastInterSearchMode == VVENC_FASTINTERSEARCH_MODE3 ) ? 1 : 0;
2100
0
    xTZSearch( cu, refPicList, iRefIdxPred, cStruct, rcMv, ruiCost, false, true );
2101
0
  }
2102
0
  else
2103
0
  {
2104
0
    cStruct.subShiftMode = ( m_pcEncCfg->m_fastInterSearchMode == VVENC_FASTINTERSEARCH_MODE1 || m_pcEncCfg->m_fastInterSearchMode == VVENC_FASTINTERSEARCH_MODE3 ) ? 1 : 0;
2105
0
    rcMv = rcMvPred;
2106
0
    xPatternSearchFast(cu, refPicList, iRefIdxPred, cStruct, rcMv, ruiCost );
2107
0
    relatedCU.setMv( refPicList, iRefIdxPred, rcMv );
2108
0
  }
2109
2110
0
  DTRACE( g_trace_ctx, D_ME, "%d %d %d :MECostFPel<L%d,%d>: %d,%d,%dx%d, %d", DTRACE_GET_COUNTER( g_trace_ctx, D_ME ), cu.slice->poc, 0, ( int ) refPicList, ( int ) bBi, cu.Y().x, cu.Y().y, cu.Y().width, cu.Y().height, ruiCost );
2111
  // sub-pel refinement for sub-pel resolution
2112
0
  if ( cu.imv == IMV_OFF || cu.imv == IMV_HPEL )
2113
0
  {
2114
0
    if ( m_pcEncCfg->m_fastSubPel != 2 )
2115
0
    {
2116
0
      xPatternSearchFracDIF( cu, refPicList, iRefIdxPred, cStruct, rcMv, cMvHalf, cMvQter, ruiCost );
2117
0
    }
2118
0
    m_pcRdCost->setCostScale( 0 );
2119
0
    rcMv <<= 2;
2120
0
    rcMv  += ( cMvHalf <<= 1 );
2121
0
    rcMv  += cMvQter;
2122
0
    uint32_t uiMvBits = m_pcRdCost->getBitsOfVectorWithPredictor( rcMv.hor, rcMv.ver, cStruct.imvShift );
2123
0
    ruiBits += uiMvBits;
2124
0
    ruiCost = ( Distortion ) ( floor( fWeight * ( ( double ) ruiCost - ( double ) m_pcRdCost->getCost( uiMvBits ) ) ) + ( double ) m_pcRdCost->getCost( ruiBits ) );
2125
0
    rcMv.changePrecision(MV_PRECISION_QUARTER, MV_PRECISION_INTERNAL);
2126
0
  }
2127
0
  else // integer refinement for integer-pel and 4-pel resolution
2128
0
  {
2129
0
    rcMv.changePrecision(MV_PRECISION_INT, MV_PRECISION_INTERNAL);
2130
0
    xPatternSearchIntRefine( cu, cStruct, rcMv, rcMvPred, riMVPIdx, ruiBits, ruiCost, amvpInfo, fWeight);
2131
0
  }
2132
0
  DTRACE(g_trace_ctx, D_ME, "   MECost<L%d,%d>: %6d (%d)  MV:%d,%d\n", (int)refPicList, (int)bBi, ruiCost, ruiBits, rcMv.hor << 2, rcMv.ver << 2);
2133
0
}
2134
2135
void InterSearch::xClipMvSearch( Mv& rcMv, const Position& pos, const struct Size& size, const PreCalcValues& pcv, const int ifpLines )
2136
0
{
2137
0
  int iMvShift = MV_FRACTIONAL_BITS_INTERNAL;
2138
0
  int iOffset = 8;
2139
0
  int iHorMax = ( pcv.lumaWidth + iOffset - ( int ) pos.x - 1 ) << iMvShift;
2140
0
  int iHorMin = ( -( int ) pcv.maxCUSize   - iOffset - ( int ) pos.x + 1 ) * (1 << iMvShift);
2141
2142
0
  int maxLumaHeight = ifpLines && ((pos.y >> pcv.maxCUSizeLog2) + ifpLines + 1 < pcv.heightInCtus) ? 
2143
    
2144
0
    (((pos.y >> pcv.maxCUSizeLog2) + ifpLines + 1) << pcv.maxCUSizeLog2 ) - size.height - 4  // 4 samples from DCTIF vertical bottom part
2145
2146
0
    : pcv.lumaHeight + iOffset;
2147
2148
0
  int iVerMax = ( maxLumaHeight - ( int ) pos.y - 1 ) << iMvShift;
2149
0
  int iVerMin = ( -( int ) pcv.maxCUSize   - iOffset - ( int ) pos.y + 1 ) * (1 << iMvShift);
2150
2151
0
  rcMv.hor = ( std::min( iHorMax, std::max( iHorMin, rcMv.hor ) ) );
2152
0
  rcMv.ver = ( std::min( iVerMax, std::max( iVerMin, rcMv.ver ) ) );
2153
0
}
2154
2155
void InterSearch::xClipMvToFppLine( Mv& mv, const int yB, const int nH, const int ifpLines, const PreCalcValues& pcv )
2156
0
{
2157
0
  const int yCompScale = 0;
2158
0
  const int mvPrecShift = MV_FRACTIONAL_BITS_INTERNAL;
2159
0
  const int ctuLogScale = pcv.maxCUSizeLog2 - yCompScale;
2160
0
  const int yRefMax     = ( ( ( yB >> ctuLogScale ) + ifpLines + 1 ) << ctuLogScale ) - 1;
2161
0
  const int yRefMv      = yB + nH + ( 4 >> yCompScale ) + (mv.ver >> mvPrecShift) - 1;
2162
0
  CHECKD( yRefMv <= yRefMax, "Not expected" );
2163
0
  mv.ver -= ( yRefMv - yRefMax ) << mvPrecShift;
2164
0
}
2165
2166
void InterSearch::xCheckAndClipMvToFppLine( Mv& mv, const int yB, const int nH, const int ifpLines, const PreCalcValues& pcv )
2167
0
{
2168
0
  const int yCompScale  = 0;
2169
0
  const int mvPrecShift = MV_FRACTIONAL_BITS_INTERNAL;
2170
0
  const int ctuLogScale = pcv.maxCUSizeLog2 - yCompScale;
2171
0
  const int yBMax       = ( pcv.heightInCtus - 1 - ifpLines ) << ctuLogScale;
2172
0
  if( yB < yBMax )
2173
0
  {
2174
0
    const int yRefMax = ( ( ( yB >> ctuLogScale ) + ifpLines + 1 ) << ctuLogScale ) - 1;
2175
0
    const int yRefMv  = yB + nH + ( 4 >> yCompScale ) + (mv.ver >> mvPrecShift) - 1;
2176
0
    if( yRefMv > yRefMax )
2177
0
    {
2178
      // clip MV
2179
0
      mv.ver -= (yRefMv - yRefMax) << mvPrecShift;
2180
0
    }
2181
0
  }
2182
0
}
2183
2184
void InterSearch::xSetSearchRange ( const CodingUnit& cu,
2185
                                    const Mv& cMvPred,
2186
                                    const int iSrchRng,
2187
                                    SearchRange& sr )
2188
0
{
2189
0
  const PreCalcValues& pcv = *cu.cs->pcv;
2190
0
  const int iMvShift = MV_FRACTIONAL_BITS_INTERNAL;
2191
0
  Mv cFPMvPred = cMvPred;
2192
0
  clipMv( cFPMvPred, cu.lumaPos(), cu.lumaSize(), pcv );
2193
2194
0
  Mv mvTL(cFPMvPred.hor - (iSrchRng << iMvShift), cFPMvPred.ver - (iSrchRng << iMvShift));
2195
0
  Mv mvBR(cFPMvPred.hor + (iSrchRng << iMvShift), cFPMvPred.ver + (iSrchRng << iMvShift));
2196
2197
0
  clipMv( mvTL, cu.lumaPos(), cu.lumaSize(), pcv);
2198
0
  xClipMvSearch( mvBR, cu.lumaPos(), cu.lumaSize(), pcv, m_pcEncCfg->m_ifpLines );
2199
2200
0
  mvTL.divideByPowerOf2( iMvShift );
2201
0
  mvBR.divideByPowerOf2( iMvShift );
2202
2203
0
  sr.left   = mvTL.hor;
2204
0
  sr.top    = mvTL.ver;
2205
0
  sr.right  = mvBR.hor;
2206
0
  sr.bottom = mvBR.ver;
2207
0
}
2208
2209
2210
void InterSearch::xPatternSearch( TZSearchStruct&  cStruct,
2211
                                  Mv&                 rcMv,
2212
                                  Distortion&         ruiSAD )
2213
0
{
2214
0
  Distortion  uiSad;
2215
0
  Distortion  uiSadBest = MAX_DISTORTION;
2216
0
  int         iBestX = 0;
2217
0
  int         iBestY = 0;
2218
2219
  //-- jclee for using the SAD function pointer
2220
0
  m_pcRdCost->setDistParam( m_cDistParam, *cStruct.pcPatternKey, cStruct.piRefY, cStruct.iRefStride, m_lumaClpRng.bd, COMP_Y, cStruct.subShiftMode );
2221
2222
0
  const SearchRange& sr = cStruct.searchRange;
2223
2224
0
  const Pel* piRef = cStruct.piRefY + (sr.top * cStruct.iRefStride);
2225
0
  for ( int y = sr.top; y <= sr.bottom; y++ )
2226
0
  {
2227
0
    for ( int x = sr.left; x <= sr.right; x++ )
2228
0
    {
2229
      //  find min. distortion position
2230
0
      m_cDistParam.cur.buf = piRef + x;
2231
2232
0
      uiSad = m_cDistParam.distFunc( m_cDistParam );
2233
2234
      // motion cost
2235
0
      uiSad += m_pcRdCost->getCostOfVectorWithPredictor( x, y, cStruct.imvShift );
2236
2237
0
      if ( uiSad < uiSadBest )
2238
0
      {
2239
0
        uiSadBest = uiSad;
2240
0
        iBestX    = x;
2241
0
        iBestY    = y;
2242
0
        m_cDistParam.maximumDistortionForEarlyExit = uiSad;
2243
0
      }
2244
0
    }
2245
0
    piRef += cStruct.iRefStride;
2246
0
  }
2247
0
  rcMv.set( iBestX, iBestY );
2248
2249
0
  cStruct.uiBestSad = uiSadBest; // th for testing
2250
0
  ruiSAD = uiSadBest - m_pcRdCost->getCostOfVectorWithPredictor( iBestX, iBestY, cStruct.imvShift );
2251
0
  return;
2252
0
}
2253
2254
2255
void InterSearch::xPatternSearchFast( const CodingUnit& cu,
2256
                                      RefPicList            refPicList,
2257
                                      int                   iRefIdxPred,
2258
                                      TZSearchStruct&       cStruct,
2259
                                      Mv&                   rcMv,
2260
                                      Distortion&           ruiSAD )
2261
0
{
2262
0
  if( cu.cs->picture->useME )
2263
0
  {
2264
0
    switch ( m_motionEstimationSearchMethodSCC )
2265
0
    {
2266
0
      case 3: //VVENC_MESEARCH_DIAMOND_FAST:
2267
0
        xTZSearch( cu, refPicList, iRefIdxPred, cStruct, rcMv, ruiSAD, true, true );
2268
0
        break;
2269
0
      case 2: //VVENC_MESEARCH_DIAMOND:
2270
0
        xTZSearch( cu, refPicList, iRefIdxPred, cStruct, rcMv, ruiSAD, true );
2271
0
        break;
2272
0
      default:
2273
0
        THROW("shouldn't get here");
2274
0
        break;
2275
0
    }
2276
0
    return;
2277
0
  }
2278
2279
0
  switch ( m_motionEstimationSearchMethod )
2280
0
  {
2281
0
    case VVENC_MESEARCH_DIAMOND_FAST:
2282
0
      xTZSearch         ( cu, refPicList, iRefIdxPred, cStruct, rcMv, ruiSAD, false, true );
2283
0
      break;
2284
0
    case VVENC_MESEARCH_DIAMOND:
2285
0
      xTZSearch         ( cu, refPicList, iRefIdxPred, cStruct, rcMv, ruiSAD, false );
2286
0
      break;
2287
0
    case VVENC_MESEARCH_DIAMOND_ENHANCED:
2288
0
      xTZSearch         ( cu, refPicList, iRefIdxPred, cStruct, rcMv, ruiSAD, true );
2289
0
      break;
2290
0
    case VVENC_MESEARCH_FULL:
2291
0
    default:
2292
0
      THROW("shouldn't get here");
2293
0
      break;
2294
0
  }
2295
0
}
2296
2297
2298
void InterSearch::xTZSearch( const CodingUnit& cu,
2299
                             RefPicList            refPicList,
2300
                             int                   iRefIdxPred,
2301
                             TZSearchStruct&       cStruct,
2302
                             Mv&                   rcMv,
2303
                             Distortion&           ruiSAD,
2304
                             const bool            bExtendedSettings,
2305
                             const bool            bFastSettings)
2306
0
{
2307
0
  const bool bUseRasterInFastMode                    = true; //toggle this to further reduce runtime
2308
0
  const bool bUseAdaptiveRaster                      = bExtendedSettings;
2309
0
  const int  iRaster                                 = (bFastSettings && bUseRasterInFastMode) ? 8 : 5;
2310
0
  const bool bTestZeroVector                         = true && !bFastSettings;
2311
0
  const bool bTestZeroVectorStart                    = bExtendedSettings;
2312
0
  const bool bTestZeroVectorStop                     = false;
2313
0
  const bool bFirstSearchDiamond                     = true;  // 1 = xTZ8PointDiamondSearch   0 = xTZ8PointSquareSearch
2314
0
  const bool bFirstCornersForDiamondDist1            = bExtendedSettings;
2315
0
  const bool bFirstSearchStop                        = m_pcEncCfg->m_bFastMEAssumingSmootherMVEnabled;
2316
0
  const uint32_t uiFirstSearchRounds                 = bFastSettings ? (bUseRasterInFastMode?3:2) : 3;     // first search stop X rounds after best match (must be >=1)
2317
0
  const bool bEnableRasterSearch                     = bFastSettings ? bUseRasterInFastMode : true;
2318
0
  const bool bAlwaysRasterSearch                     = bExtendedSettings;  // true: BETTER but factor 2 slower
2319
0
  const bool bRasterRefinementEnable                 = false; // enable either raster refinement or star refinement
2320
0
  const bool bRasterRefinementDiamond                = false; // 1 = xTZ8PointDiamondSearch   0 = xTZ8PointSquareSearch
2321
0
  const bool bRasterRefinementCornersForDiamondDist1 = bExtendedSettings;
2322
0
  const bool bStarRefinementEnable                   = true;  // enable either star refinement or raster refinement
2323
0
  const bool bStarRefinementDiamond                  = true;  // 1 = xTZ8PointDiamondSearch   0 = xTZ8PointSquareSearch
2324
0
  const bool bStarRefinementCornersForDiamondDist1   = bExtendedSettings;
2325
0
  const bool bStarRefinementStop                     = bFastSettings;
2326
0
  const uint32_t uiStarRefinementRounds              = 2;  // star refinement stop X rounds after best match (must be >=1)
2327
0
  const bool bNewZeroNeighbourhoodTest               = bExtendedSettings;
2328
2329
0
  int iSearchRange = m_iSearchRange;
2330
0
  xClipMvSearch( rcMv, cu.lumaPos(), cu.lumaSize(),*cu.cs->pcv, m_pcEncCfg->m_ifpLines );
2331
0
  rcMv.changePrecision(MV_PRECISION_INTERNAL, MV_PRECISION_QUARTER);
2332
0
  rcMv.divideByPowerOf2(2);
2333
2334
  //
2335
0
  m_cDistParam.maximumDistortionForEarlyExit = cStruct.uiBestSad;
2336
0
  m_pcRdCost->setDistParam( m_cDistParam, *cStruct.pcPatternKey, cStruct.piRefY, cStruct.iRefStride, m_lumaClpRng.bd, COMP_Y, cStruct.subShiftMode );
2337
2338
  // set rcMv (Median predictor) as start point and as best point
2339
0
  xTZSearchHelp( cStruct, rcMv.hor, rcMv.ver, 0, 0 );
2340
2341
  // test whether zero Mv is better start point than Median predictor
2342
0
  if ( bTestZeroVector )
2343
0
  {
2344
0
    if( ( rcMv.hor != 0 || rcMv.ver != 0 ) && ( 0 != cStruct.iBestX || 0 != cStruct.iBestY ) )
2345
0
    {
2346
      // only test 0-vector if not obviously previously tested.
2347
0
      xTZSearchHelp( cStruct, 0, 0, 0, 0 );
2348
0
    }
2349
0
  }
2350
2351
0
  SearchRange& sr = cStruct.searchRange;
2352
2353
0
  for (int i = 0; i < m_BlkUniMvInfoBuffer->m_uniMvListSize; i++)
2354
0
  {
2355
0
    const BlkUniMvInfo* curMvInfo = m_BlkUniMvInfoBuffer->getBlkUniMvInfo(i);
2356
0
    Mv cTmpMv = curMvInfo->uniMvs[refPicList][iRefIdxPred];
2357
2358
0
    xClipMvSearch(cTmpMv, cu.lumaPos(), cu.lumaSize(), *cu.cs->pcv, m_pcEncCfg->m_ifpLines);
2359
0
    cTmpMv.changePrecision(MV_PRECISION_INTERNAL, MV_PRECISION_INT);
2360
0
    m_cDistParam.cur.buf = cStruct.piRefY + (cTmpMv.ver * cStruct.iRefStride) + cTmpMv.hor;
2361
2362
0
    Distortion uiSad = m_cDistParam.distFunc(m_cDistParam);
2363
0
    uiSad += m_pcRdCost->getCostOfVectorWithPredictor(cTmpMv.hor, cTmpMv.ver, cStruct.imvShift);
2364
0
    if (uiSad < cStruct.uiBestSad)
2365
0
    {
2366
0
      cStruct.uiBestSad = uiSad;
2367
0
      cStruct.iBestX = cTmpMv.hor;
2368
0
      cStruct.iBestY = cTmpMv.ver;
2369
0
      m_cDistParam.maximumDistortionForEarlyExit = uiSad;
2370
0
    }
2371
0
  }
2372
2373
0
  {
2374
    // set search range
2375
0
    Mv currBestMv(cStruct.iBestX, cStruct.iBestY );
2376
0
    currBestMv <<= MV_FRACTIONAL_BITS_INTERNAL;
2377
0
    xSetSearchRange(cu, currBestMv, m_iSearchRange >> (bFastSettings ? 1 : 0), sr );
2378
0
  }
2379
2380
  // starting point after initial examination
2381
0
  int  iDist = 0;
2382
0
  int  iStartX = cStruct.iBestX;
2383
0
  int  iStartY = cStruct.iBestY;
2384
2385
  // Early termination of motion search after selection of starting candidate
2386
0
  if( m_pcEncCfg->m_bIntegerET )
2387
0
  {
2388
0
    bool isLargeBlock = cu.lumaSize().area() > 64;
2389
0
    xTZ8PointDiamondSearch( cStruct, iStartX, iStartY, 1, false ); // 4-point small diamond search
2390
0
    if ( cStruct.iBestX == iStartX && cStruct.iBestY == iStartY )
2391
0
    {
2392
0
      if ( isLargeBlock )
2393
0
      {
2394
0
        xTZ4PointSquareSearch( cStruct, iStartX, iStartY, 1 );
2395
0
        if ( cStruct.iBestX == iStartX && cStruct.iBestY == iStartY )
2396
0
        {
2397
          // write out best match
2398
0
          rcMv.set( cStruct.iBestX, cStruct.iBestY );
2399
0
          ruiSAD = cStruct.uiBestSad - m_pcRdCost->getCostOfVectorWithPredictor( cStruct.iBestX, cStruct.iBestY, cStruct.imvShift );
2400
0
          return;
2401
0
        }
2402
0
      }
2403
0
      else
2404
0
      {
2405
        // write out best match
2406
0
        rcMv.set( cStruct.iBestX, cStruct.iBestY );
2407
0
        ruiSAD = cStruct.uiBestSad - m_pcRdCost->getCostOfVectorWithPredictor( cStruct.iBestX, cStruct.iBestY, cStruct.imvShift );
2408
0
        return;
2409
0
      }
2410
0
    }
2411
0
  }
2412
2413
  // start search
2414
0
  iDist = 0;
2415
0
  iStartX = cStruct.iBestX;
2416
0
  iStartY = cStruct.iBestY;
2417
2418
0
  const bool bBestCandidateZero = ( cStruct.iBestX == 0 ) && ( cStruct.iBestY == 0 );
2419
2420
  // first search around best position up to now.
2421
  // The following works as a "subsampled/log" window search around the best candidate
2422
0
  for( iDist = 1; iDist <= iSearchRange; iDist *= 2 )
2423
0
  {
2424
0
    if( bFirstSearchDiamond == 1 )
2425
0
    {
2426
0
      xTZ8PointDiamondSearch( cStruct, iStartX, iStartY, iDist, bFirstCornersForDiamondDist1 );
2427
0
    }
2428
0
    else
2429
0
    {
2430
0
      xTZ8PointSquareSearch( cStruct, iStartX, iStartY, iDist );
2431
0
    }
2432
2433
0
    if( bFirstSearchStop && ( cStruct.uiBestRound >= uiFirstSearchRounds ) ) // stop criterion
2434
0
    {
2435
0
      break;
2436
0
    }
2437
0
  }
2438
2439
0
  if( bNewZeroNeighbourhoodTest )
2440
0
  {
2441
0
    if( bTestZeroVectorStart && !bBestCandidateZero )
2442
0
    {
2443
0
      for( iDist = 1; iDist <= ( iSearchRange >> 1 ); iDist *= 2 )
2444
0
      {
2445
0
        xTZ8PointDiamondSearch( cStruct, 0, 0, iDist, false );
2446
0
        if( bTestZeroVectorStop && ( cStruct.uiBestRound > 2 ) ) // stop criterion
2447
0
        {
2448
0
          break;
2449
0
        }
2450
0
      }
2451
0
    }
2452
0
  }
2453
2454
  // calculate only 2 missing points instead 8 points if cStruct.uiBestDistance == 1
2455
0
  if ( cStruct.uiBestDistance == 1 )
2456
0
  {
2457
0
    cStruct.uiBestDistance = 0;
2458
0
    xTZ2PointSearch( cStruct );
2459
0
  }
2460
2461
  // raster search if distance is too big
2462
0
  if( bUseAdaptiveRaster )
2463
0
  {
2464
0
    int iWindowSize     = iRaster;
2465
0
    SearchRange localsr = sr;
2466
2467
0
    if( !( bEnableRasterSearch && ( ( ( int ) ( cStruct.uiBestDistance ) >= iRaster ) ) ) )
2468
0
    {
2469
0
      iWindowSize++;
2470
0
      localsr.left    /= 2;
2471
0
      localsr.right   /= 2;
2472
0
      localsr.top     /= 2;
2473
0
      localsr.bottom  /= 2;
2474
0
    }
2475
2476
0
    cStruct.uiBestDistance = iWindowSize;
2477
2478
0
    for( iStartY = localsr.top; iStartY <= localsr.bottom; iStartY += iWindowSize )
2479
0
    {
2480
0
      for( iStartX = localsr.left; iStartX <= localsr.right; iStartX += iWindowSize )
2481
0
      {
2482
0
        xTZSearchHelp( cStruct, iStartX, iStartY, 0, iWindowSize );
2483
0
      }
2484
0
    }
2485
0
  }
2486
0
  else
2487
0
  {
2488
0
    if( bEnableRasterSearch && ( ( ( int ) ( cStruct.uiBestDistance ) >= iRaster ) || bAlwaysRasterSearch ) )
2489
0
    {
2490
0
      cStruct.uiBestDistance = iRaster;
2491
2492
0
      for( iStartY = sr.top; iStartY <= sr.bottom; iStartY += iRaster )
2493
0
      {
2494
0
        for( iStartX = sr.left; iStartX <= sr.right; iStartX += iRaster )
2495
0
        {
2496
0
          xTZSearchHelp( cStruct, iStartX, iStartY, 0, iRaster );
2497
0
        }
2498
0
      }
2499
0
    }
2500
0
  }
2501
2502
  // raster refinement
2503
2504
0
  if ( bRasterRefinementEnable && cStruct.uiBestDistance > 0 )
2505
0
  {
2506
0
    while ( cStruct.uiBestDistance > 0 )
2507
0
    {
2508
0
      iStartX = cStruct.iBestX;
2509
0
      iStartY = cStruct.iBestY;
2510
0
      if ( cStruct.uiBestDistance > 1 )
2511
0
      {
2512
0
        iDist = cStruct.uiBestDistance >>= 1;
2513
0
        if ( bRasterRefinementDiamond == 1 )
2514
0
        {
2515
0
          xTZ8PointDiamondSearch ( cStruct, iStartX, iStartY, iDist, bRasterRefinementCornersForDiamondDist1 );
2516
0
        }
2517
0
        else
2518
0
        {
2519
0
          xTZ8PointSquareSearch  ( cStruct, iStartX, iStartY, iDist );
2520
0
        }
2521
0
      }
2522
2523
      // calculate only 2 missing points instead 8 points if cStruct.uiBestDistance == 1
2524
0
      if ( cStruct.uiBestDistance == 1 )
2525
0
      {
2526
0
        cStruct.uiBestDistance = 0;
2527
0
        if ( cStruct.ucPointNr != 0 )
2528
0
        {
2529
0
          xTZ2PointSearch( cStruct );
2530
0
        }
2531
0
      }
2532
0
    }
2533
0
  }
2534
2535
  // star refinement
2536
0
  if ( bStarRefinementEnable && cStruct.uiBestDistance > 0 )
2537
0
  {
2538
0
    while ( cStruct.uiBestDistance > 0 )
2539
0
    {
2540
0
      iStartX = cStruct.iBestX;
2541
0
      iStartY = cStruct.iBestY;
2542
0
      cStruct.uiBestDistance = 0;
2543
0
      cStruct.ucPointNr = 0;
2544
0
      for ( iDist = 1; iDist < iSearchRange + 1; iDist*=2 )
2545
0
      {
2546
0
        if ( bStarRefinementDiamond == 1 )
2547
0
        {
2548
0
          xTZ8PointDiamondSearch ( cStruct, iStartX, iStartY, iDist, bStarRefinementCornersForDiamondDist1 );
2549
0
        }
2550
0
        else
2551
0
        {
2552
0
          xTZ8PointSquareSearch  ( cStruct, iStartX, iStartY, iDist );
2553
0
        }
2554
0
        if ( bStarRefinementStop && (cStruct.uiBestRound >= uiStarRefinementRounds) ) // stop criterion
2555
0
        {
2556
0
          break;
2557
0
        }
2558
0
      }
2559
2560
      // calculate only 2 missing points instead 8 points if cStrukt.uiBestDistance == 1
2561
0
      if ( cStruct.uiBestDistance == 1 )
2562
0
      {
2563
0
        cStruct.uiBestDistance = 0;
2564
0
        if ( cStruct.ucPointNr != 0 )
2565
0
        {
2566
0
          xTZ2PointSearch( cStruct );
2567
0
        }
2568
0
      }
2569
0
    }
2570
0
  }
2571
2572
  // write out best match
2573
0
  rcMv.set( cStruct.iBestX, cStruct.iBestY );
2574
0
  ruiSAD = cStruct.uiBestSad - m_pcRdCost->getCostOfVectorWithPredictor( cStruct.iBestX, cStruct.iBestY, cStruct.imvShift );
2575
0
}
2576
2577
void InterSearch::xPatternSearchIntRefine(CodingUnit& cu, TZSearchStruct&  cStruct, Mv& rcMv, Mv& rcMvPred, int& riMVPIdx, uint32_t& ruiBits, Distortion& ruiCost, const AMVPInfo& amvpInfo, double fWeight)
2578
0
{
2579
2580
0
  CHECK( cu.imv == IMV_OFF || cu.imv == IMV_HPEL , "xPatternSearchIntRefine(): Sub-pel MV used.");
2581
0
  CHECK( amvpInfo.mvCand[riMVPIdx] != rcMvPred, "xPatternSearchIntRefine(): MvPred issue.");
2582
2583
0
  m_pcRdCost->setDistParam(m_cDistParam, *cStruct.pcPatternKey, cStruct.piRefY, cStruct.iRefStride, m_lumaClpRng.bd, COMP_Y, 0, m_pcEncCfg->m_bUseHADME ? ( m_pcEncCfg->m_fastHad ? 2 : 1 ) : 0 );
2584
2585
  // -> set MV scale for cost calculation to QPEL (0)
2586
0
  m_pcRdCost->setCostScale ( 0 );
2587
2588
0
  Distortion  uiDist, uiSATD = 0;
2589
0
  Distortion  uiBestDist  = MAX_DISTORTION;
2590
  // subtract old MVP costs because costs for all newly tested MVPs are added in here
2591
0
  ruiBits -= m_auiMVPIdxCost[riMVPIdx][AMVP_MAX_NUM_CANDS];
2592
2593
0
  Mv cBestMv = rcMv;
2594
0
  Mv cBaseMvd[2];
2595
0
  int iBestBits = 0;
2596
0
  int iBestMVPIdx = riMVPIdx;
2597
0
  Mv testPos[9] = { { 0, 0}, { -1, -1},{ -1, 0},{ -1, 1},{ 0, -1},{ 0, 1},{ 1, -1},{ 1, 0},{ 1, 1} };
2598
2599
2600
0
  cBaseMvd[0] = (rcMv - amvpInfo.mvCand[0]);
2601
0
  cBaseMvd[1] = (rcMv - amvpInfo.mvCand[1]);
2602
0
  CHECK( (cBaseMvd[0].hor & 0x03) != 0 || (cBaseMvd[0].ver & 0x03) != 0 , "xPatternSearchIntRefine(): AMVP cand 0 Mvd issue.");
2603
0
  CHECK( (cBaseMvd[1].hor & 0x03) != 0 || (cBaseMvd[1].ver & 0x03) != 0 , "xPatternSearchIntRefine(): AMVP cand 1 Mvd issue.");
2604
2605
0
  cBaseMvd[0].roundTransPrecInternal2Amvr(cu.imv);
2606
0
  cBaseMvd[1].roundTransPrecInternal2Amvr(cu.imv);
2607
2608
  // test best integer position and all 8 neighboring positions
2609
0
  for (int pos = 0; pos < 9; pos ++)
2610
0
  {
2611
0
    Mv cTestMv[2];
2612
    // test both AMVP candidates for each position
2613
0
    for (int iMVPIdx = 0; iMVPIdx < amvpInfo.numCand; iMVPIdx++)
2614
0
    {
2615
0
      cTestMv[iMVPIdx] = testPos[pos];
2616
0
      cTestMv[iMVPIdx].changeTransPrecAmvr2Internal(cu.imv);
2617
0
      cTestMv[iMVPIdx] += cBaseMvd[iMVPIdx];
2618
0
      cTestMv[iMVPIdx] += amvpInfo.mvCand[iMVPIdx];
2619
2620
0
      if( m_pcEncCfg->m_ifpLines && !CU::isMvInRangeFPP( cu.ly(), cu.lheight(), cTestMv[iMVPIdx].ver, m_pcEncCfg->m_ifpLines, *cu.cs->pcv ) )
2621
0
      {
2622
0
        xClipMvToFppLine( cTestMv[iMVPIdx], cu.ly(), cu.lheight(), m_pcEncCfg->m_ifpLines, *cu.cs->pcv );
2623
0
        cTestMv[iMVPIdx].roundTransPrecInternal2AmvrVertical(cu.imv);
2624
0
      }
2625
2626
0
      if ( iMVPIdx == 0 || cTestMv[0] != cTestMv[1])
2627
0
      {
2628
0
        Mv cTempMV = cTestMv[iMVPIdx];
2629
0
        clipMv(cTempMV, cu.lumaPos(), cu.lumaSize(), *cu.cs->pcv);
2630
0
        m_cDistParam.cur.buf = cStruct.piRefY  + cStruct.iRefStride * (cTempMV.ver >>  MV_FRACTIONAL_BITS_INTERNAL) + (cTempMV.hor >> MV_FRACTIONAL_BITS_INTERNAL);
2631
0
        uiDist = uiSATD = (Distortion) (m_cDistParam.distFunc( m_cDistParam ) * fWeight);
2632
0
      }
2633
0
      else
2634
0
      {
2635
0
        uiDist = uiSATD;
2636
0
      }
2637
2638
0
      int iMvBits = m_auiMVPIdxCost[iMVPIdx][AMVP_MAX_NUM_CANDS];
2639
0
      Mv pred = amvpInfo.mvCand[iMVPIdx];
2640
0
      pred.changeTransPrecInternal2Amvr(cu.imv);
2641
0
      m_pcRdCost->setPredictor( pred );
2642
0
      Mv mv = cTestMv[iMVPIdx];
2643
0
      mv.changeTransPrecInternal2Amvr(cu.imv);
2644
0
      iMvBits += m_pcRdCost->getBitsOfVectorWithPredictor( mv.hor, mv.ver, 0 );
2645
0
      uiDist += m_pcRdCost->getCost(iMvBits);
2646
2647
0
      if (uiDist < uiBestDist)
2648
0
      {
2649
0
        uiBestDist = uiDist;
2650
0
        cBestMv = cTestMv[iMVPIdx];
2651
0
        iBestMVPIdx = iMVPIdx;
2652
0
        iBestBits = iMvBits;
2653
0
      }
2654
0
    }
2655
0
  }
2656
0
  if( uiBestDist == MAX_DISTORTION )
2657
0
  {
2658
0
    ruiCost = MAX_DISTORTION;
2659
0
    return;
2660
0
  }
2661
2662
0
  rcMv = cBestMv;
2663
0
  rcMvPred = amvpInfo.mvCand[iBestMVPIdx];
2664
0
  riMVPIdx = iBestMVPIdx;
2665
0
  m_pcRdCost->setPredictor( rcMvPred );
2666
2667
0
  ruiBits += iBestBits;
2668
  // taken from JEM 5.0
2669
  // verify since it makes no sence to subtract Lamda*(Rmvd+Rmvpidx) from D+Lamda(Rmvd)
2670
  // this would take the rate for the MVP idx out of the cost calculation
2671
  // however this rate is always 1 so impact is small
2672
0
  ruiCost = uiBestDist - m_pcRdCost->getCost(iBestBits) + m_pcRdCost->getCost(ruiBits);
2673
  // taken from JEM 5.0
2674
  // verify since it makes no sense to add rate for MVDs twicce
2675
2676
0
  return;
2677
0
}
2678
2679
void InterSearch::xPatternSearchFracDIF(
2680
  const CodingUnit& cu,
2681
  RefPicList            refPicList,
2682
  int                   iRefIdx,
2683
  TZSearchStruct&    cStruct,
2684
  const Mv&             rcMvInt,
2685
  Mv&                   rcMvHalf,
2686
  Mv&                   rcMvQter,
2687
  Distortion&           ruiCost
2688
)
2689
0
{
2690
0
  PROFILER_SCOPE_AND_STAGE( 0, _TPROF, P_FRAC_PEL );
2691
2692
  //  Reference pattern initialization (integer scale)
2693
0
  int         iOffset    = rcMvInt.hor + rcMvInt.ver * cStruct.iRefStride;
2694
0
  CPelBuf cPatternRoi(cStruct.piRefY + iOffset, cStruct.iRefStride, *cStruct.pcPatternKey);
2695
2696
  //  Half-pel refinement
2697
0
  m_pcRdCost->setCostScale(1);
2698
0
  if( 0 == m_pcEncCfg->m_fastSubPel )
2699
0
  {
2700
0
    xExtDIFUpSamplingH( &cPatternRoi, cStruct.useAltHpelIf );
2701
0
  }
2702
2703
0
  rcMvHalf = rcMvInt;   rcMvHalf <<= 1;    // for mv-cost
2704
0
  Mv baseRefMv(0, 0);
2705
0
  Distortion  uiDistBest = MAX_DISTORTION;
2706
0
  int patternId = 41;
2707
0
  ruiCost = xPatternRefinement( cStruct.pcPatternKey, baseRefMv, 2, rcMvHalf, uiDistBest, patternId, &cPatternRoi, cStruct.useAltHpelIf );
2708
0
  patternId -= ( m_pcEncCfg->m_fastSubPel == 1 ? 41 : 0 );
2709
2710
2711
  //  quarter-pel refinement
2712
0
  if( cStruct.imvShift == IMV_OFF && 0 != patternId )
2713
0
  {
2714
0
    PROFILER_SCOPE_AND_STAGE( 0, _TPROF, P_QPEL );
2715
0
    m_pcRdCost->setCostScale( 0 );
2716
0
    xExtDIFUpSamplingQ( &cPatternRoi, rcMvHalf, patternId );
2717
0
    baseRefMv = rcMvHalf;
2718
0
    baseRefMv <<= 1;
2719
2720
0
    rcMvQter = rcMvInt;    rcMvQter <<= 1;    // for mv-cost
2721
0
    rcMvQter += rcMvHalf;  rcMvQter <<= 1;
2722
0
    ruiCost = xPatternRefinement( cStruct.pcPatternKey, baseRefMv, 1, rcMvQter, uiDistBest, patternId, &cPatternRoi, cStruct.useAltHpelIf );
2723
0
  }
2724
2725
0
}
2726
2727
Distortion InterSearch::xGetSymCost( const CodingUnit& cu, CPelUnitBuf& origBuf, RefPicList eCurRefPicList, const MvField& cCurMvField, MvField& cTarMvField, int BcwIdx )
2728
0
{
2729
0
  Distortion cost = MAX_DISTORTION;
2730
0
  RefPicList eTarRefPicList = (RefPicList)(1 - (int)eCurRefPicList);
2731
2732
  // get prediction of eCurRefPicList
2733
0
  PelUnitBuf  predBufA  = m_tmpPredStorage[eCurRefPicList].getCompactBuf( cu );
2734
0
  const Picture* picRefA = cu.slice->getRefPic( eCurRefPicList, cCurMvField.refIdx );
2735
0
  Mv mvA = cCurMvField.mv;
2736
0
  clipMv( mvA, cu.lumaPos(), cu.lumaSize(), *cu.cs->pcv );
2737
0
  xPredInterBlk( COMP_Y, cu, picRefA, mvA, predBufA, false, cu.slice->clpRngs[ COMP_Y ], false, false );
2738
2739
  // get prediction of eTarRefPicList
2740
0
  PelUnitBuf predBufB = m_tmpPredStorage[eTarRefPicList].getCompactBuf( cu );
2741
0
  const Picture* picRefB = cu.slice->getRefPic( eTarRefPicList, cTarMvField.refIdx );
2742
0
  Mv mvB = cTarMvField.mv;
2743
0
  clipMv( mvB, cu.lumaPos(), cu.lumaSize(), *cu.cs->pcv );
2744
0
  xPredInterBlk( COMP_Y, cu, picRefB, mvB, predBufB, false, cu.slice->clpRngs[ COMP_Y ], false, false );
2745
2746
0
  PelUnitBuf bufTmp = m_tmpStorageLCU.getCompactBuf( cu );
2747
0
  bufTmp.copyFrom( origBuf );
2748
0
  bufTmp.removeHighFreq( predBufA, m_pcEncCfg->m_bClipForBiPredMeEnabled, cu.slice->clpRngs/*, getBcwWeight( cu.BcwIdx, eTarRefPicList )*/ );
2749
0
  double fWeight = xGetMEDistortionWeight( cu.BcwIdx, eTarRefPicList );
2750
2751
  // calc distortion
2752
0
  cost = ( Distortion ) floor( fWeight * ( double ) m_pcRdCost->getDistPart( bufTmp.Y(), predBufB.Y(), cu.cs->sps->bitDepths[ CH_L ], COMP_Y, DF_HAD ) );
2753
2754
0
  return(cost);
2755
0
}
2756
2757
Distortion InterSearch::xSymRefineMvSearch( CodingUnit& cu, CPelUnitBuf& origBuf, Mv& rcMvCurPred, Mv& rcMvTarPred, RefPicList refPicList, MvField& rCurMvField, 
2758
                                            MvField& rTarMvField, Distortion uiMinCost, int SearchPattern, int nSearchStepShift, uint32_t uiMaxSearchRounds, int BcwIdx )
2759
0
{
2760
0
  const Mv mvSearchOffsetCross[4] = { Mv( 0 , 1 ) , Mv( 1 , 0 ) , Mv( 0 , -1 ) , Mv( -1 ,  0 ) };
2761
0
  const Mv mvSearchOffsetSquare[8] = { Mv( -1 , 1 ) , Mv( 0 , 1 ) , Mv( 1 ,  1 ) , Mv( 1 ,  0 ) , Mv( 1 , -1 ) , Mv( 0 , -1 ) , Mv( -1 , -1 ) , Mv( -1 , 0 ) };
2762
0
  const Mv mvSearchOffsetDiamond[8] = { Mv( 0 , 2 ) , Mv( 1 , 1 ) , Mv( 2 ,  0 ) , Mv( 1 , -1 ) , Mv( 0 , -2 ) , Mv( -1 , -1 ) , Mv( -2 ,  0 ) , Mv( -1 , 1 ) };
2763
0
  const Mv mvSearchOffsetHexagon[6] = { Mv( 2 , 0 ) , Mv( 1 , 2 ) , Mv( -1 ,  2 ) , Mv( -2 ,  0 ) , Mv( -1 , -2 ) , Mv( 1 , -2 ) };
2764
2765
0
  int nDirectStart = 0, nDirectEnd = 0, nDirectRounding = 0, nDirectMask = 0;
2766
0
  const Mv * pSearchOffset;
2767
0
  if ( SearchPattern == 0 )
2768
0
  {
2769
0
    nDirectEnd = 3;
2770
0
    nDirectRounding = 4;
2771
0
    nDirectMask = 0x03;
2772
0
    pSearchOffset = mvSearchOffsetCross;
2773
0
  }
2774
0
  else if ( SearchPattern == 1 )
2775
0
  {
2776
0
    nDirectEnd = 7;
2777
0
    nDirectRounding = 8;
2778
0
    nDirectMask = 0x07;
2779
0
    pSearchOffset = mvSearchOffsetSquare;
2780
0
  }
2781
0
  else if ( SearchPattern == 2 )
2782
0
  {
2783
0
    nDirectEnd = 7;
2784
0
    nDirectRounding = 8;
2785
0
    nDirectMask = 0x07;
2786
0
    pSearchOffset = mvSearchOffsetDiamond;
2787
0
  }
2788
0
  else if ( SearchPattern == 3 )
2789
0
  {
2790
0
    nDirectEnd = 5;
2791
0
    pSearchOffset = mvSearchOffsetHexagon;
2792
0
  }
2793
0
  else
2794
0
  {
2795
0
    THROW( "Invalid search pattern" );
2796
0
  }
2797
2798
0
  int nBestDirect;
2799
0
  for ( uint32_t uiRound = 0; uiRound < uiMaxSearchRounds; uiRound++ )
2800
0
  {
2801
0
    Distortion roundZeroBestCost = MAX_DISTORTION;
2802
0
    const int positionLut[ 8 ] = { 0, 2, 4, 6, 1, 3, 5, 7 };
2803
0
    nBestDirect = -1;
2804
0
    MvField mvCurCenter = rCurMvField;
2805
0
    for ( int nIdx = nDirectStart; nIdx <= nDirectEnd; nIdx++ )
2806
0
    {
2807
      // terminate the search if none of the first four tested points hasn't provided improvement
2808
0
      if( m_pcEncCfg->m_SMVD > 1 && 2 == SearchPattern && 0 == uiRound && 4 == nIdx && roundZeroBestCost > uiMinCost )
2809
0
      {
2810
0
        break;
2811
0
      }
2812
0
      int nDirect;
2813
0
      if ( SearchPattern == 3 )
2814
0
      {
2815
0
        nDirect = nIdx < 0 ? nIdx + 6 : nIdx >= 6 ? nIdx - 6 : nIdx;
2816
0
      }
2817
0
      else
2818
0
      {
2819
0
        if( m_pcEncCfg->m_SMVD > 1 && 2 == SearchPattern && 0 == uiRound )
2820
0
        {
2821
0
          nDirect = positionLut[ ( nIdx + nDirectRounding ) & nDirectMask ];
2822
0
        }
2823
0
        else
2824
0
        {
2825
0
          nDirect = ( nIdx + nDirectRounding ) & nDirectMask;
2826
0
        }
2827
0
      }
2828
2829
0
      Mv mvOffset = pSearchOffset[nDirect];
2830
0
      mvOffset <<= nSearchStepShift;
2831
0
      MvField mvCand = mvCurCenter, mvPair;
2832
0
      mvCand.mv += mvOffset;
2833
0
      if( m_pcEncCfg->m_ifpLines && !CU::isMvInRangeFPP( cu.ly(), cu.lheight(), mvCand.mv.ver, m_pcEncCfg->m_ifpLines, *cu.cs->pcv ) )
2834
0
      {
2835
0
        continue; // Skip this pos
2836
0
      }
2837
2838
      // get MVD cost
2839
0
      Mv pred = rcMvCurPred;
2840
0
      pred.changeTransPrecInternal2Amvr(cu.imv);
2841
0
      m_pcRdCost->setPredictor( pred );
2842
0
      m_pcRdCost->setCostScale( 0 );
2843
0
      Mv mv = mvCand.mv;
2844
0
      mv.changeTransPrecInternal2Amvr(cu.imv);
2845
0
      uint32_t uiMvBits = m_pcRdCost->getBitsOfVectorWithPredictor( mv.hor, mv.ver, 0 );
2846
0
      Distortion uiCost = m_pcRdCost->getCost( uiMvBits );
2847
2848
      // get MVD pair and set target MV
2849
0
      mvPair.refIdx = rTarMvField.refIdx;
2850
0
      mvPair.mv.set( rcMvTarPred.hor - (mvCand.mv.hor - rcMvCurPred.hor), rcMvTarPred.ver - (mvCand.mv.ver - rcMvCurPred.ver) );
2851
2852
0
      if( m_pcEncCfg->m_ifpLines && !CU::isMvInRangeFPP( cu.ly(), cu.lheight(), mvPair.mv.ver, m_pcEncCfg->m_ifpLines, *cu.cs->pcv ) )
2853
0
      {
2854
0
        continue; // Skip this pos
2855
0
      }
2856
2857
0
      uiCost += xGetSymCost( cu, origBuf, refPicList, mvCand, mvPair, BcwIdx );
2858
0
      if ( uiCost < uiMinCost )
2859
0
      {
2860
0
        uiMinCost = uiCost;
2861
0
        rCurMvField = mvCand;
2862
0
        rTarMvField = mvPair;
2863
0
        nBestDirect = nDirect;
2864
0
      }
2865
0
      if ( m_pcEncCfg->m_SMVD > 1 && 2 == SearchPattern && 0 == uiRound && 4 > nIdx && uiCost < roundZeroBestCost)
2866
0
      {
2867
0
        roundZeroBestCost = uiCost;
2868
0
      }
2869
0
    }
2870
2871
0
    if ( nBestDirect == -1 )
2872
0
    {
2873
0
      break;
2874
0
    }
2875
0
    int nStep = 1;
2876
0
    if( (SearchPattern == 1 || SearchPattern == 2) && m_pcEncCfg->m_SMVD <= 1 )
2877
0
    {
2878
      // test at most 3 points in fast presets
2879
0
      nStep = 2 - ( nBestDirect & 0x01 );
2880
0
    }
2881
0
    nDirectStart = nBestDirect - nStep;
2882
0
    nDirectEnd = nBestDirect + nStep;
2883
0
  }
2884
2885
0
  return(uiMinCost);
2886
0
}
2887
2888
2889
void InterSearch::xSymMotionEstimation( CodingUnit& cu, CPelUnitBuf& origBuf, Mv& rcMvCurPred, Mv& rcMvTarPred, RefPicList refPicList, MvField& rCurMvField, MvField& rTarMvField, Distortion& ruiCost, int BcwIdx )
2890
0
{
2891
  // Refine Search
2892
0
  int nSearchStepShift = MV_FRACTIONAL_BITS_DIFF;
2893
0
  int nDiamondRound = 8;
2894
0
  int nCrossRound = 1;
2895
2896
0
  nSearchStepShift += cu.imv == IMV_HPEL ? 1 : (cu.imv << 1);
2897
0
  nDiamondRound >>= cu.imv;
2898
2899
0
  ruiCost = xSymRefineMvSearch( cu, origBuf, rcMvCurPred, rcMvTarPred, refPicList, rCurMvField, rTarMvField, ruiCost, 2, nSearchStepShift, nDiamondRound, BcwIdx );
2900
0
  if( m_pcEncCfg->m_SMVD < 3 )
2901
0
  {
2902
0
    ruiCost = xSymRefineMvSearch( cu, origBuf, rcMvCurPred, rcMvTarPred, refPicList, rCurMvField, rTarMvField, ruiCost, 0, nSearchStepShift, nCrossRound, BcwIdx );
2903
0
  }
2904
0
}
2905
2906
2907
/**
2908
* \brief Generate half-sample interpolated block
2909
*
2910
* \param pattern Reference picture ROI
2911
* \param biPred    Flag indicating whether block is for biprediction
2912
*/
2913
void InterSearch::xExtDIFUpSamplingH(CPelBuf* pattern, bool useAltHpelIf)
2914
0
{
2915
0
  PROFILER_SCOPE_AND_STAGE( 0, _TPROF, P_HPEL_INTERP );
2916
0
  const ClpRng& clpRng = m_lumaClpRng;
2917
0
  int width            = pattern->width;
2918
0
  int height           = pattern->height;
2919
0
  int srcStride        = pattern->stride;
2920
0
  const int reduceTap = m_pcEncCfg->m_meReduceTap;
2921
2922
0
  int intStride = width + 1;
2923
0
  int dstStride = width + 1;
2924
0
  Pel* intPtr;
2925
0
  Pel* dstPtr;
2926
0
  int filterSize     = useAltHpelIf ? ( reduceTap >= 1 ? NTAPS_AFFINE : NTAPS_LUMA )
2927
0
                                    : ( reduceTap == 1 ? NTAPS_AFFINE
2928
0
                                                       : ( reduceTap == 0 ? NTAPS_LUMA : NTAPS_CHROMA ) );
2929
0
  int halfFilterSize = ( filterSize >> 1 );
2930
0
  const Pel *srcPtr  = pattern->buf - halfFilterSize * srcStride - 1;
2931
2932
0
  const ChromaFormat chFmt = m_currChromaFormat;
2933
2934
  // split the prediction with funny widths into power-of-2 and +1 parts for the sake of SIMD speed-up
2935
0
  m_if.filterHor( COMP_Y, srcPtr,         srcStride, m_filteredBlockTmp[0][0]        , intStride, width, height + filterSize, 0 << MV_FRACTIONAL_BITS_DIFF, false, chFmt, clpRng, useAltHpelIf, 0, reduceTap );
2936
0
  m_if.filterHor( COMP_Y, srcPtr + width, srcStride, m_filteredBlockTmp[0][0] + width, intStride,     1, height + filterSize, 0 << MV_FRACTIONAL_BITS_DIFF, false, chFmt, clpRng, useAltHpelIf, 0, reduceTap );
2937
2938
  // split the prediction with funny widths into power-of-2 and +1 parts for the sake of SIMD speed-up
2939
0
  m_if.filterHor( COMP_Y, srcPtr,         srcStride, m_filteredBlockTmp[2][0],         intStride, width, height + filterSize, 2 << MV_FRACTIONAL_BITS_DIFF, false, chFmt, clpRng, useAltHpelIf, 0, reduceTap );
2940
0
  m_if.filterHor( COMP_Y, srcPtr + width, srcStride, m_filteredBlockTmp[2][0] + width, intStride,     1, height + filterSize, 2 << MV_FRACTIONAL_BITS_DIFF, false, chFmt, clpRng, useAltHpelIf, 0, reduceTap );
2941
2942
0
  intPtr = m_filteredBlockTmp[0][0] + halfFilterSize * intStride + 1;
2943
0
  dstPtr = m_filteredBlock[0][0][0];
2944
0
  m_if.filterVer( COMP_Y, intPtr, intStride, dstPtr, dstStride, width + 0, height + 0, 0 << MV_FRACTIONAL_BITS_DIFF, false, true, chFmt, clpRng, useAltHpelIf, 0, reduceTap );
2945
2946
0
  intPtr = m_filteredBlockTmp[0][0] + (halfFilterSize - 1) * intStride + 1;
2947
0
  dstPtr = m_filteredBlock[2][0][0];
2948
0
  m_if.filterVer( COMP_Y, intPtr, intStride, dstPtr, dstStride, width + 0, height + 1, 2 << MV_FRACTIONAL_BITS_DIFF, false, true, chFmt, clpRng, useAltHpelIf, 0, reduceTap );
2949
2950
0
  intPtr = m_filteredBlockTmp[2][0] + halfFilterSize * intStride;
2951
0
  dstPtr = m_filteredBlock[0][2][0];
2952
  // split the prediction with funny widths into power-of-2 and +1 parts for the sake of SIMD speed-up
2953
0
  m_if.filterVer( COMP_Y, intPtr,         intStride, dstPtr,         dstStride, width, height + 0, 0 << MV_FRACTIONAL_BITS_DIFF, false, true, chFmt, clpRng, useAltHpelIf, 0, reduceTap );
2954
0
  m_if.filterVer( COMP_Y, intPtr + width, intStride, dstPtr + width, dstStride,     1, height + 0, 0 << MV_FRACTIONAL_BITS_DIFF, false, true, chFmt, clpRng, useAltHpelIf, 0, reduceTap );
2955
2956
0
  intPtr = m_filteredBlockTmp[2][0] + (halfFilterSize - 1) * intStride;
2957
0
  dstPtr = m_filteredBlock[2][2][0];
2958
  // split the prediction with funny widths into power-of-2 and +1 parts for the sake of SIMD speed-up
2959
0
  m_if.filterVer( COMP_Y, intPtr,         intStride, dstPtr,         dstStride, width, height + 1, 2 << MV_FRACTIONAL_BITS_DIFF, false, true, chFmt, clpRng, useAltHpelIf, 0, reduceTap );
2960
0
  m_if.filterVer( COMP_Y, intPtr + width, intStride, dstPtr + width, dstStride,     1, height + 1, 2 << MV_FRACTIONAL_BITS_DIFF, false, true, chFmt, clpRng, useAltHpelIf, 0, reduceTap );
2961
0
}
2962
2963
2964
2965
2966
2967
/**
2968
* \brief Generate quarter-sample interpolated blocks
2969
*
2970
* \param pattern    Reference picture ROI
2971
* \param halfPelRef Half-pel mv
2972
* \param biPred     Flag indicating whether block is for biprediction
2973
*/
2974
void InterSearch::xExtDIFUpSamplingQ( CPelBuf* pattern, Mv halfPelRef, int& patternId )
2975
0
{
2976
0
  PROFILER_SCOPE_AND_STAGE( 0, _TPROF, P_QPEL_INTERP );
2977
0
  const ClpRng& clpRng = m_lumaClpRng;
2978
0
  int width      = pattern->width;
2979
0
  int height     = pattern->height;
2980
0
  int srcStride  = pattern->stride;
2981
0
  const int reduceTap = m_pcEncCfg->m_meReduceTap;
2982
2983
0
  Pel const* srcPtr;
2984
0
  int intStride = width + 1;
2985
0
  int dstStride = width + 1;
2986
0
  Pel* intPtr;
2987
0
  Pel* dstPtr;
2988
2989
0
  int filterSize     = reduceTap == 1 ? NTAPS_AFFINE
2990
0
                   : ( reduceTap == 0 ? NTAPS_LUMA : NTAPS_CHROMA );
2991
2992
0
  int halfFilterSize = (filterSize>>1);
2993
2994
0
  int extHeight = (halfPelRef.ver == 0) ? height + filterSize : height + filterSize-1;
2995
2996
0
  const ChromaFormat chFmt = m_currChromaFormat;
2997
2998
0
  if( s_doInterpQ[ patternId ][ 12 ] )
2999
0
  {
3000
    // Horizontal filter 1/4
3001
0
    srcPtr = pattern->buf - halfFilterSize * srcStride - 1;
3002
0
    intPtr = m_filteredBlockTmp[ 1 ][ 0 ];
3003
0
    if( halfPelRef.ver > 0 )
3004
0
    {
3005
0
      srcPtr += srcStride;
3006
0
    }
3007
0
    if( halfPelRef.hor >= 0 )
3008
0
    {
3009
0
      srcPtr += 1;
3010
0
    }
3011
0
    m_if.filterHor( COMP_Y, srcPtr, srcStride, intPtr, intStride, width, extHeight, 1 << MV_FRACTIONAL_BITS_DIFF, false, chFmt, clpRng, false, 0, reduceTap );
3012
0
  }
3013
3014
0
  if( s_doInterpQ[ patternId ][ 13 ] )
3015
0
  {
3016
    // Horizontal filter 3/4
3017
0
    srcPtr = pattern->buf - halfFilterSize*srcStride - 1;
3018
0
    intPtr = m_filteredBlockTmp[ 3 ][ 0 ];
3019
0
    if( halfPelRef.ver > 0 )
3020
0
    {
3021
0
      srcPtr += srcStride;
3022
0
    }
3023
0
    if( halfPelRef.hor > 0 )
3024
0
    {
3025
0
      srcPtr += 1;
3026
0
    }
3027
0
    m_if.filterHor( COMP_Y, srcPtr, srcStride, intPtr, intStride, width, extHeight, 3 << MV_FRACTIONAL_BITS_DIFF, false, chFmt, clpRng, false, 0, reduceTap );
3028
0
  }
3029
3030
0
  if( s_doInterpQ[ patternId ][ 3 ] )
3031
0
  {
3032
    // Generate @ 1,1
3033
0
    intPtr = m_filteredBlockTmp[ 1 ][ 0 ] + ( halfFilterSize - 1 ) * intStride;
3034
0
    dstPtr = m_filteredBlock[ 1 ][ 1 ][ 0 ];
3035
0
    if( halfPelRef.ver == 0 )
3036
0
    {
3037
0
      intPtr += intStride;
3038
0
    }
3039
0
    m_if.filterVer( COMP_Y, intPtr, intStride, dstPtr, dstStride, width, height, 1 << MV_FRACTIONAL_BITS_DIFF, false, true, chFmt, clpRng, false, 0, reduceTap );
3040
0
  }
3041
3042
0
  if( s_doInterpQ[ patternId ][ 11 ] )
3043
0
  {
3044
    // Generate @ 3,3
3045
0
    intPtr = m_filteredBlockTmp[ 3 ][ 0 ] + ( halfFilterSize - 1 ) * intStride;
3046
0
    dstPtr = m_filteredBlock[ 3 ][ 3 ][ 0 ];
3047
0
    m_if.filterVer( COMP_Y, intPtr, intStride, dstPtr, dstStride, width, height, 3 << MV_FRACTIONAL_BITS_DIFF, false, true, chFmt, clpRng, false, 0, reduceTap );
3048
0
  }
3049
3050
0
  if( s_doInterpQ[ patternId ][ 5 ] )
3051
0
  {
3052
    // Generate @ 3,1
3053
0
    intPtr = m_filteredBlockTmp[ 1 ][ 0 ] + ( halfFilterSize - 1 ) * intStride;
3054
0
    dstPtr = m_filteredBlock[ 3 ][ 1 ][ 0 ];
3055
0
    m_if.filterVer( COMP_Y, intPtr, intStride, dstPtr, dstStride, width, height, 3 << MV_FRACTIONAL_BITS_DIFF, false, true, chFmt, clpRng, false, 0, reduceTap );
3056
0
  }
3057
3058
0
  if( s_doInterpQ[ patternId ][ 9 ] )
3059
0
  {
3060
    // Generate @ 1,3
3061
0
    intPtr = m_filteredBlockTmp[ 3 ][ 0 ] + ( halfFilterSize - 1 ) * intStride;
3062
0
    dstPtr = m_filteredBlock[ 1 ][ 3 ][ 0 ];
3063
0
    if( halfPelRef.ver == 0 )
3064
0
    {
3065
0
      intPtr += intStride;
3066
0
    }
3067
0
    m_if.filterVer( COMP_Y, intPtr, intStride, dstPtr, dstStride, width, height, 1 << MV_FRACTIONAL_BITS_DIFF, false, true, chFmt, clpRng, false, 0, reduceTap );
3068
0
  }
3069
3070
0
  if (halfPelRef.ver != 0)
3071
0
  {
3072
0
    if( s_doInterpQ[ patternId ][ 4 ] )
3073
0
    {
3074
      // Generate @ 2,1
3075
0
      intPtr = m_filteredBlockTmp[ 1 ][ 0 ] + ( halfFilterSize - 1 ) * intStride;
3076
0
      dstPtr = m_filteredBlock[ 2 ][ 1 ][ 0 ];
3077
0
      if( halfPelRef.ver == 0 )
3078
0
      {
3079
0
        intPtr += intStride;
3080
0
      }
3081
0
      m_if.filterVer( COMP_Y, intPtr, intStride, dstPtr, dstStride, width, height, 2 << MV_FRACTIONAL_BITS_DIFF, false, true, chFmt, clpRng, false, 0, reduceTap );
3082
0
    }
3083
3084
0
    if( s_doInterpQ[ patternId ][ 10 ] )
3085
0
    {
3086
      // Generate @ 2,3
3087
0
      intPtr = m_filteredBlockTmp[ 3 ][ 0 ] + ( halfFilterSize - 1 ) * intStride;
3088
0
      dstPtr = m_filteredBlock[ 2 ][ 3 ][ 0 ];
3089
0
      if( halfPelRef.ver == 0 )
3090
0
      {
3091
0
        intPtr += intStride;
3092
0
      }
3093
0
      m_if.filterVer( COMP_Y, intPtr, intStride, dstPtr, dstStride, width, height, 2 << MV_FRACTIONAL_BITS_DIFF, false, true, chFmt, clpRng, false, 0, reduceTap );
3094
0
    }
3095
0
  }
3096
0
  else
3097
0
  {
3098
0
    if( s_doInterpQ[ patternId ][ 2 ] )
3099
0
    {
3100
      // Generate @ 0,1
3101
0
      intPtr = m_filteredBlockTmp[ 1 ][ 0 ] + halfFilterSize * intStride;
3102
0
      dstPtr = m_filteredBlock[ 0 ][ 1 ][ 0 ];
3103
0
      m_if.filterVer( COMP_Y, intPtr, intStride, dstPtr, dstStride, width, height, 0 << MV_FRACTIONAL_BITS_DIFF, false, true, chFmt, clpRng, false, 0, reduceTap );
3104
0
    }
3105
3106
0
    if( s_doInterpQ[ patternId ][ 8 ] )
3107
0
    {
3108
      // Generate @ 0,3
3109
0
      intPtr = m_filteredBlockTmp[ 3 ][ 0 ] + halfFilterSize * intStride;
3110
0
      dstPtr = m_filteredBlock[ 0 ][ 3 ][ 0 ];
3111
0
      m_if.filterVer( COMP_Y, intPtr, intStride, dstPtr, dstStride, width, height, 0 << MV_FRACTIONAL_BITS_DIFF, false, true, chFmt, clpRng, false, 0, reduceTap );
3112
0
    }
3113
0
  }
3114
3115
0
  if (halfPelRef.hor != 0)
3116
0
  {
3117
0
    if( s_doInterpQ[ patternId ][ 6 ] )
3118
0
    {
3119
      // Generate @ 1,2
3120
0
      intPtr = m_filteredBlockTmp[ 2 ][ 0 ] + ( halfFilterSize - 1 ) * intStride;
3121
0
      dstPtr = m_filteredBlock[ 1 ][ 2 ][ 0 ];
3122
0
      if( halfPelRef.hor > 0 )
3123
0
      {
3124
0
        intPtr += 1;
3125
0
      }
3126
0
      if( halfPelRef.ver >= 0 )
3127
0
      {
3128
0
        intPtr += intStride;
3129
0
      }
3130
0
      m_if.filterVer( COMP_Y, intPtr, intStride, dstPtr, dstStride, width, height, 1 << MV_FRACTIONAL_BITS_DIFF, false, true, chFmt, clpRng, false, 0, reduceTap );
3131
0
    }
3132
3133
0
    if( s_doInterpQ[ patternId ][ 7 ] )
3134
0
    {
3135
      // Generate @ 3,2
3136
0
      intPtr = m_filteredBlockTmp[ 2 ][ 0 ] + ( halfFilterSize - 1 ) * intStride;
3137
0
      dstPtr = m_filteredBlock[ 3 ][ 2 ][ 0 ];
3138
0
      if( halfPelRef.hor > 0 )
3139
0
      {
3140
0
        intPtr += 1;
3141
0
      }
3142
0
      if( halfPelRef.ver > 0 )
3143
0
      {
3144
0
        intPtr += intStride;
3145
0
      }
3146
0
      m_if.filterVer( COMP_Y, intPtr, intStride, dstPtr, dstStride, width, height, 3 << MV_FRACTIONAL_BITS_DIFF, false, true, chFmt, clpRng, false, 0, reduceTap );
3147
0
    }
3148
0
  }
3149
0
  else
3150
0
  {
3151
0
    if( s_doInterpQ[ patternId ][ 0 ] )
3152
0
    {
3153
      // Generate @ 1,0
3154
0
      intPtr = m_filteredBlockTmp[ 0 ][ 0 ] + ( halfFilterSize - 1 ) * intStride + 1;
3155
0
      dstPtr = m_filteredBlock[ 1 ][ 0 ][ 0 ];
3156
0
      if( halfPelRef.ver >= 0 )
3157
0
      {
3158
0
        intPtr += intStride;
3159
0
      }
3160
0
      m_if.filterVer( COMP_Y, intPtr, intStride, dstPtr, dstStride, width, height, 1 << MV_FRACTIONAL_BITS_DIFF, false, true, chFmt, clpRng, false, 0, reduceTap );
3161
0
    }
3162
3163
0
    if( s_doInterpQ[ patternId ][ 1 ] )
3164
0
    {
3165
      // Generate @ 3,0
3166
0
      intPtr = m_filteredBlockTmp[ 0 ][ 0 ] + ( halfFilterSize - 1 ) * intStride + 1;
3167
0
      dstPtr = m_filteredBlock[ 3 ][ 0 ][ 0 ];
3168
0
      if( halfPelRef.ver > 0 )
3169
0
      {
3170
0
        intPtr += intStride;
3171
0
      }
3172
0
      m_if.filterVer( COMP_Y, intPtr, intStride, dstPtr, dstStride, width, height, 3 << MV_FRACTIONAL_BITS_DIFF, false, true, chFmt, clpRng, false, 0, reduceTap );
3173
0
    }
3174
0
  }
3175
0
}
3176
3177
3178
void InterSearch::xEncodeInterResidualQT(CodingStructure &cs, Partitioner &partitioner, const ComponentID compID)
3179
0
{
3180
0
  const UnitArea& currArea    = partitioner.currArea();
3181
0
  const TransformUnit& currTU = *cs.getTU(isLuma(partitioner.chType) ? currArea.lumaPos() : currArea.chromaPos(), partitioner.chType);
3182
0
  const CodingUnit &cu        = *currTU.cu;
3183
0
  const unsigned currDepth    = partitioner.currTrDepth;
3184
3185
0
  const bool bSubdiv          = currDepth != currTU.depth;
3186
3187
0
  if (compID == MAX_NUM_TBLOCKS)  // we are not processing a channel, instead we always recurse and code the CBFs
3188
0
  {
3189
0
    if( partitioner.canSplit( TU_MAX_TR_SPLIT, cs ) )
3190
0
    {
3191
0
      CHECK( !bSubdiv, "Not performing the implicit TU split" );
3192
0
    }
3193
0
    else if( cu.sbtInfo && partitioner.canSplit( CU::getSbtTuSplit( cu.sbtInfo ), cs ) )
3194
0
    {
3195
0
      CHECK( !bSubdiv, "Not performing the implicit TU split - sbt" );
3196
0
    }
3197
0
    else
3198
0
    {
3199
0
      CHECK( bSubdiv, "transformsplit not supported" );
3200
0
    }
3201
3202
0
    CHECK(CU::isIntra(cu), "Inter search provided with intra CU");
3203
3204
0
    if( cu.chromaFormat != CHROMA_400
3205
0
      && (!CU::isSepTree(cu) || isChroma(partitioner.chType))
3206
0
      )
3207
0
    {
3208
0
      {
3209
0
        {
3210
0
          const bool  chroma_cbf = TU::getCbfAtDepth( currTU, COMP_Cb, currDepth );
3211
0
          if (!(cu.sbtInfo && (currDepth == 0 || (currDepth == 1 && currTU.noResidual))))
3212
0
          m_CABACEstimator->cbf_comp( cu, chroma_cbf, currArea.blocks[COMP_Cb], currDepth );
3213
0
        }
3214
0
        {
3215
0
          const bool  chroma_cbf = TU::getCbfAtDepth( currTU, COMP_Cr, currDepth );
3216
0
          if (!(cu.sbtInfo && (currDepth == 0 || (currDepth == 1 && currTU.noResidual))))
3217
0
          m_CABACEstimator->cbf_comp( cu, chroma_cbf, currArea.blocks[COMP_Cr], currDepth, TU::getCbfAtDepth( currTU, COMP_Cb, currDepth ) );
3218
0
        }
3219
0
      }
3220
0
    }
3221
3222
0
    if( !bSubdiv && !( cu.sbtInfo && currTU.noResidual )
3223
0
      && !isChroma(partitioner.chType)
3224
0
      )
3225
0
    {
3226
0
      m_CABACEstimator->cbf_comp( cu, TU::getCbfAtDepth( currTU, COMP_Y, currDepth ), currArea.Y(), currDepth );
3227
0
    }
3228
0
  }
3229
3230
0
  if (!bSubdiv)
3231
0
  {
3232
0
    if (compID != MAX_NUM_TBLOCKS) // we have already coded the CBFs, so now we code coefficients
3233
0
    {
3234
0
      if( currArea.blocks[compID].valid() )
3235
0
      {
3236
0
        if( compID == COMP_Cr )
3237
0
        {
3238
0
          const int cbfMask = ( TU::getCbf( currTU, COMP_Cb ) ? 2 : 0) + ( TU::getCbf( currTU, COMP_Cr ) ? 1 : 0 );
3239
0
          m_CABACEstimator->joint_cb_cr( currTU, cbfMask );
3240
0
        }
3241
0
        if( TU::getCbf( currTU, compID ) )
3242
0
        {
3243
0
          m_CABACEstimator->residual_coding( currTU, compID );
3244
0
        }
3245
0
      }
3246
0
    }
3247
0
  }
3248
0
  else
3249
0
  {
3250
0
    if( compID == MAX_NUM_TBLOCKS || TU::getCbfAtDepth( currTU, compID, currDepth ) )
3251
0
    {
3252
0
      if( partitioner.canSplit( TU_MAX_TR_SPLIT, cs ) )
3253
0
      {
3254
0
        partitioner.splitCurrArea( TU_MAX_TR_SPLIT, cs );
3255
0
      }
3256
0
      else if( cu.sbtInfo && partitioner.canSplit( CU::getSbtTuSplit( cu.sbtInfo ), cs ) )
3257
0
      {
3258
0
        partitioner.splitCurrArea( CU::getSbtTuSplit( cu.sbtInfo ), cs );
3259
0
      }
3260
0
      else
3261
0
        THROW( "Implicit TU split not available!" );
3262
3263
0
      do
3264
0
      {
3265
0
        xEncodeInterResidualQT( cs, partitioner, compID );
3266
0
      } while( partitioner.nextPart( cs ) );
3267
3268
0
      partitioner.exitCurrSplit();
3269
0
    }
3270
0
  }
3271
0
}
3272
3273
void InterSearch::xCalcMinDistSbt( CodingStructure &cs, const CodingUnit& cu, const uint8_t sbtAllowed )
3274
0
{
3275
0
  if( !sbtAllowed )
3276
0
  {
3277
0
    m_estMinDistSbt[NUMBER_SBT_MODE] = 0;
3278
0
    for( int comp = 0; comp < getNumberValidTBlocks( *cs.pcv ); comp++ )
3279
0
    {
3280
0
      const ComponentID compID = ComponentID( comp );
3281
0
      CPelBuf pred = cs.getPredBuf( compID );
3282
0
      CPelBuf org  = cs.getOrgBuf( compID );
3283
0
      m_estMinDistSbt[NUMBER_SBT_MODE] += m_pcRdCost->getDistPart( org, pred, cs.sps->bitDepths[ toChannelType( compID ) ], compID, DF_SSE );
3284
0
    }
3285
0
    return;
3286
0
  }
3287
3288
  //SBT fast algorithm 2.1 : estimate a minimum RD cost of a SBT mode based on the luma distortion of uncoded part and coded part (assuming distorted can be reduced to 1/16);
3289
  //                         if this cost is larger than the best cost, no need to try a specific SBT mode
3290
0
  int cuWidth  = cu.lwidth();
3291
0
  int cuHeight = cu.lheight();
3292
0
  int numPartX = cuWidth  >= 16 ? 4 : ( cuWidth  == 4 ? 1 : 2 );
3293
0
  int numPartY = cuHeight >= 16 ? 4 : ( cuHeight == 4 ? 1 : 2 );
3294
0
  Distortion dist[4][4];
3295
0
  memset( dist, 0, sizeof( Distortion ) * 16 );
3296
3297
0
  for( uint32_t c = 0; c < getNumberValidTBlocks( *cs.pcv ); c++ )
3298
0
  {
3299
0
    const ComponentID compID   = ComponentID( c );
3300
0
    const CompArea&   compArea = cu.blocks[compID];
3301
0
    const CPelBuf orgPel  = cs.getOrgBuf( compArea );
3302
0
    const CPelBuf predPel = cs.getPredBuf( compArea );
3303
0
    int lengthX = compArea.width / numPartX;
3304
0
    int lengthY = compArea.height / numPartY;
3305
0
    int strideOrg  = orgPel.stride;
3306
0
    int stridePred = predPel.stride;
3307
0
    uint32_t   uiShift = DISTORTION_PRECISION_ADJUSTMENT( ( *cs.sps.bitDepths[ toChannelType( compID ) ] - 8 ) << 1 );
3308
0
    Intermediate_Int iTemp;
3309
3310
    //calc distY of 16 sub parts
3311
0
    for( int j = 0; j < numPartY; j++ )
3312
0
    {
3313
0
      for( int i = 0; i < numPartX; i++ )
3314
0
      {
3315
0
        int posX = i * lengthX;
3316
0
        int posY = j * lengthY;
3317
0
        const Pel* ptrOrg  = orgPel.bufAt( posX, posY );
3318
0
        const Pel* ptrPred = predPel.bufAt( posX, posY );
3319
0
        Distortion uiSum = 0;
3320
0
        for( int n = 0; n < lengthY; n++ )
3321
0
        {
3322
0
          for( int m = 0; m < lengthX; m++ )
3323
0
          {
3324
0
            iTemp = ptrOrg[m] - ptrPred[m];
3325
0
            uiSum += Distortion( ( iTemp * iTemp ) >> uiShift );
3326
0
          }
3327
0
          ptrOrg += strideOrg;
3328
0
          ptrPred += stridePred;
3329
0
        }
3330
0
        if( isChroma( compID ) )
3331
0
        {
3332
0
          uiSum = (Distortion)( uiSum * m_pcRdCost->getChromaWeight() );
3333
0
        }
3334
0
        dist[j][i] += uiSum;
3335
0
      }
3336
0
    }
3337
0
  }
3338
3339
  //SSE of a CU
3340
0
  m_estMinDistSbt[NUMBER_SBT_MODE] = 0;
3341
0
  for( int j = 0; j < numPartY; j++ )
3342
0
  {
3343
0
    for( int i = 0; i < numPartX; i++ )
3344
0
    {
3345
0
      m_estMinDistSbt[NUMBER_SBT_MODE] += dist[j][i];
3346
0
    }
3347
0
  }
3348
  //init per-mode dist
3349
0
  for( int i = SBT_VER_H0; i < NUMBER_SBT_MODE; i++ )
3350
0
  {
3351
0
    m_estMinDistSbt[i] = MAX_DISTORTION;
3352
0
  }
3353
3354
  //SBT fast algorithm 1: not try SBT if the residual is too small to compensate bits for encoding residual info
3355
0
  uint64_t minNonZeroResiFracBits = 12 << SCALE_BITS;
3356
0
  if( m_pcRdCost->calcRdCost( 0, m_estMinDistSbt[NUMBER_SBT_MODE] ) < m_pcRdCost->calcRdCost( minNonZeroResiFracBits, 0 ) )
3357
0
  {
3358
0
    m_skipSbtAll = true;
3359
0
    return;
3360
0
  }
3361
3362
  //derive estimated minDist of SBT = zero-residual part distortion + non-zero residual part distortion / 16
3363
0
  int shift = 5;
3364
0
  Distortion distResiPart = 0, distNoResiPart = 0;
3365
3366
0
  if( CU::targetSbtAllowed( SBT_VER_HALF, sbtAllowed ) )
3367
0
  {
3368
0
    int offsetResiPart = 0;
3369
0
    int offsetNoResiPart = numPartX / 2;
3370
0
    distResiPart = distNoResiPart = 0;
3371
0
    assert( numPartX >= 2 );
3372
0
    for( int j = 0; j < numPartY; j++ )
3373
0
    {
3374
0
      for( int i = 0; i < numPartX / 2; i++ )
3375
0
      {
3376
0
        distResiPart   += dist[j][i + offsetResiPart];
3377
0
        distNoResiPart += dist[j][i + offsetNoResiPart];
3378
0
      }
3379
0
    }
3380
0
    m_estMinDistSbt[SBT_VER_H0] = ( distResiPart >> shift ) + distNoResiPart;
3381
0
    m_estMinDistSbt[SBT_VER_H1] = ( distNoResiPart >> shift ) + distResiPart;
3382
0
  }
3383
3384
0
  if( CU::targetSbtAllowed( SBT_HOR_HALF, sbtAllowed ) )
3385
0
  {
3386
0
    int offsetResiPart = 0;
3387
0
    int offsetNoResiPart = numPartY / 2;
3388
0
    assert( numPartY >= 2 );
3389
0
    distResiPart = distNoResiPart = 0;
3390
0
    for( int j = 0; j < numPartY / 2; j++ )
3391
0
    {
3392
0
      for( int i = 0; i < numPartX; i++ )
3393
0
      {
3394
0
        distResiPart   += dist[j + offsetResiPart][i];
3395
0
        distNoResiPart += dist[j + offsetNoResiPart][i];
3396
0
      }
3397
0
    }
3398
0
    m_estMinDistSbt[SBT_HOR_H0] = ( distResiPart >> shift ) + distNoResiPart;
3399
0
    m_estMinDistSbt[SBT_HOR_H1] = ( distNoResiPart >> shift ) + distResiPart;
3400
0
  }
3401
3402
0
  if( CU::targetSbtAllowed( SBT_VER_QUAD, sbtAllowed ) )
3403
0
  {
3404
0
    assert( numPartX == 4 );
3405
0
    m_estMinDistSbt[SBT_VER_Q0] = m_estMinDistSbt[SBT_VER_Q1] = 0;
3406
0
    for( int j = 0; j < numPartY; j++ )
3407
0
    {
3408
0
      m_estMinDistSbt[SBT_VER_Q0] += dist[j][0] + ( ( dist[j][1] + dist[j][2] + dist[j][3] ) << shift );
3409
0
      m_estMinDistSbt[SBT_VER_Q1] += dist[j][3] + ( ( dist[j][0] + dist[j][1] + dist[j][2] ) << shift );
3410
0
    }
3411
0
    m_estMinDistSbt[SBT_VER_Q0] = m_estMinDistSbt[SBT_VER_Q0] >> shift;
3412
0
    m_estMinDistSbt[SBT_VER_Q1] = m_estMinDistSbt[SBT_VER_Q1] >> shift;
3413
0
  }
3414
3415
0
  if( CU::targetSbtAllowed( SBT_HOR_QUAD, sbtAllowed ) )
3416
0
  {
3417
0
    assert( numPartY == 4 );
3418
0
    m_estMinDistSbt[SBT_HOR_Q0] = m_estMinDistSbt[SBT_HOR_Q1] = 0;
3419
0
    for( int i = 0; i < numPartX; i++ )
3420
0
    {
3421
0
      m_estMinDistSbt[SBT_HOR_Q0] += dist[0][i] + ( ( dist[1][i] + dist[2][i] + dist[3][i] ) << shift );
3422
0
      m_estMinDistSbt[SBT_HOR_Q1] += dist[3][i] + ( ( dist[0][i] + dist[1][i] + dist[2][i] ) << shift );
3423
0
    }
3424
0
    m_estMinDistSbt[SBT_HOR_Q0] = m_estMinDistSbt[SBT_HOR_Q0] >> shift;
3425
0
    m_estMinDistSbt[SBT_HOR_Q1] = m_estMinDistSbt[SBT_HOR_Q1] >> shift;
3426
0
  }
3427
3428
  //SBT fast algorithm 5: try N SBT modes with the lowest distortion
3429
0
  Distortion temp[NUMBER_SBT_MODE];
3430
0
  memcpy( temp, m_estMinDistSbt, sizeof( Distortion ) * NUMBER_SBT_MODE );
3431
0
  memset( m_sbtRdoOrder, 255, NUMBER_SBT_MODE );
3432
0
  int startIdx = 0, numRDO;
3433
0
  numRDO = CU::targetSbtAllowed( SBT_VER_HALF, sbtAllowed ) + CU::targetSbtAllowed( SBT_HOR_HALF, sbtAllowed );
3434
0
  numRDO = std::min( ( numRDO << 1 ), SBT_NUM_RDO );
3435
0
  for( int i = startIdx; i < startIdx + numRDO; i++ )
3436
0
  {
3437
0
    Distortion minDist = MAX_DISTORTION;
3438
0
    for( int n = SBT_VER_H0; n <= SBT_HOR_H1; n++ )
3439
0
    {
3440
0
      if( temp[n] < minDist )
3441
0
      {
3442
0
        minDist = temp[n];
3443
0
        m_sbtRdoOrder[i] = n;
3444
0
      }
3445
0
    }
3446
0
    temp[m_sbtRdoOrder[i]] = MAX_DISTORTION;
3447
0
  }
3448
3449
0
  startIdx += numRDO;
3450
0
  numRDO = CU::targetSbtAllowed( SBT_VER_QUAD, sbtAllowed ) + CU::targetSbtAllowed( SBT_HOR_QUAD, sbtAllowed );
3451
0
  numRDO = std::min( ( numRDO << 1 ), SBT_NUM_RDO );
3452
0
  for( int i = startIdx; i < startIdx + numRDO; i++ )
3453
0
  {
3454
0
    Distortion minDist = MAX_DISTORTION;
3455
0
    for( int n = SBT_VER_Q0; n <= SBT_HOR_Q1; n++ )
3456
0
    {
3457
0
      if( temp[n] < minDist )
3458
0
      {
3459
0
        minDist = temp[n];
3460
0
        m_sbtRdoOrder[i] = n;
3461
0
      }
3462
0
    }
3463
0
    temp[m_sbtRdoOrder[i]] = MAX_DISTORTION;
3464
0
  }
3465
0
}
3466
3467
uint8_t InterSearch::skipSbtByRDCost( int width, int height, int mtDepth, uint8_t sbtIdx, uint8_t sbtPos, double bestCost, Distortion distSbtOff, double costSbtOff, bool rootCbfSbtOff )
3468
0
{
3469
0
  int sbtMode = CU::getSbtMode( sbtIdx, sbtPos );
3470
3471
  //SBT fast algorithm 2.2 : estimate a minimum RD cost of a SBT mode based on the luma distortion of uncoded part and coded part (assuming distorted can be reduced to 1/16);
3472
  //                         if this cost is larger than the best cost, no need to try a specific SBT mode
3473
0
  if( m_pcRdCost->calcRdCost( 11 << SCALE_BITS, m_estMinDistSbt[sbtMode] ) > bestCost )
3474
0
  {
3475
0
    return 0; //early skip type 0
3476
0
  }
3477
3478
0
  if( costSbtOff != MAX_DOUBLE )
3479
0
  {
3480
0
    if( !rootCbfSbtOff )
3481
0
    {
3482
      //SBT fast algorithm 3: skip SBT when the residual is too small (estCost is more accurate than fast algorithm 1, counting PU mode bits)
3483
0
      uint64_t minNonZeroResiFracBits = 10 << SCALE_BITS;
3484
0
      Distortion distResiPart;
3485
0
      if( sbtIdx == SBT_VER_HALF || sbtIdx == SBT_HOR_HALF )
3486
0
      {
3487
0
        distResiPart = (Distortion)( ( ( m_estMinDistSbt[NUMBER_SBT_MODE] - m_estMinDistSbt[sbtMode] ) * 9 ) >> 4 );
3488
0
      }
3489
0
      else
3490
0
      {
3491
0
        distResiPart = (Distortion)( ( ( m_estMinDistSbt[NUMBER_SBT_MODE] - m_estMinDistSbt[sbtMode] ) * 3 ) >> 3 );
3492
0
      }
3493
3494
0
      double estCost = ( costSbtOff - m_pcRdCost->calcRdCost( 0 << SCALE_BITS, distSbtOff ) ) + m_pcRdCost->calcRdCost( minNonZeroResiFracBits, m_estMinDistSbt[sbtMode] + distResiPart );
3495
0
      if( estCost > costSbtOff )
3496
0
      {
3497
0
        return 1;
3498
0
      }
3499
0
      if( estCost > bestCost )
3500
0
      {
3501
0
        return 2;
3502
0
      }
3503
0
    }
3504
0
    else
3505
0
    {
3506
      //SBT fast algorithm 4: skip SBT when an estimated RD cost is larger than the bestCost
3507
0
      double weight = sbtMode > SBT_HOR_H1 ? 0.4 : 0.6;
3508
0
      double estCost = ( ( costSbtOff - m_pcRdCost->calcRdCost( 0 << SCALE_BITS, distSbtOff ) ) * weight ) + m_pcRdCost->calcRdCost( 0 << SCALE_BITS, m_estMinDistSbt[sbtMode] );
3509
0
      if( estCost > bestCost )
3510
0
      {
3511
0
        return 3;
3512
0
      }
3513
0
    }
3514
0
  }
3515
0
  return MAX_UCHAR;
3516
0
}
3517
3518
void InterSearch::xEstimateInterResidualQT(CodingStructure &cs, Partitioner &partitioner, Distortion *puiZeroDist /*= NULL*/)
3519
24.3k
{
3520
24.3k
  const UnitArea& currArea = partitioner.currArea();
3521
24.3k
  const SPS &sps           = *cs.sps;
3522
3523
24.3k
  const uint32_t numValidComp  = getNumberValidComponents( sps.chromaFormatIdc );
3524
24.3k
  const uint32_t numTBlocks    = getNumberValidTBlocks   ( *cs.pcv );
3525
24.3k
  CodingUnit& cu               = *cs.getCU(partitioner.chType, partitioner.treeType);
3526
24.3k
  const unsigned currDepth = partitioner.currTrDepth;
3527
24.3k
  const bool useTS = cs.picture->useTS;
3528
3529
24.3k
  bool bCheckFull  = !partitioner.canSplit( TU_MAX_TR_SPLIT, cs );
3530
24.3k
  if( cu.sbtInfo && partitioner.canSplit( CU::getSbtTuSplit( cu.sbtInfo ), cs ) )
3531
0
  {
3532
0
    bCheckFull = false;
3533
0
  }
3534
24.3k
  bool bCheckSplit = !bCheckFull;
3535
3536
  // get temporary data
3537
24.3k
  CodingStructure *csSplit = nullptr;
3538
24.3k
  CodingStructure *csFull  = nullptr;
3539
24.3k
  if (bCheckSplit)
3540
0
  {
3541
0
    csSplit = &cs;
3542
0
  }
3543
24.3k
  else if (bCheckFull)
3544
24.3k
  {
3545
24.3k
    csFull = &cs;
3546
24.3k
  }
3547
3548
24.3k
  Distortion uiSingleDist         = 0;
3549
24.3k
  Distortion uiSingleDistComp [3] = { 0, 0, 0 };
3550
3551
24.3k
  const TempCtx ctxStart  ( m_CtxCache, m_CABACEstimator->getCtx() );
3552
24.3k
  TempCtx       ctxBest   ( m_CtxCache );
3553
3554
24.3k
  PelUnitBuf    orgResiBuf;
3555
24.3k
  orgResiBuf = m_tmpStorageLCU.getCompactBuf( currArea );
3556
24.3k
  orgResiBuf.copyFrom(cs.getResiBuf(currArea));
3557
3558
24.3k
  if (bCheckFull)
3559
24.3k
  {
3560
24.3k
    ReshapeData& reshapeData = cs.picture->reshapeData;
3561
3562
24.3k
    TransformUnit& tu = csFull->addTU(CS::getArea(cs, currArea, partitioner.chType, partitioner.treeType), partitioner.chType, &cu);
3563
24.3k
    tu.depth          = currDepth;
3564
24.3k
    tu.mtsIdx[COMP_Y] = MTS_DCT2_DCT2;
3565
24.3k
    tu.checkTuNoResidual( partitioner.currPartIdx() );
3566
24.3k
    if (cs.picHeader->lmcsEnabled && reshapeData.getCTUFlag() && cs.picHeader->lmcsChromaResidualScale && !(CS::isDualITree(cs) && cs.slice->isIntra() && tu.cu->predMode == MODE_IBC))
3567
0
    {
3568
0
      tu.chromaAdj = reshapeData.calculateChromaAdjVpduNei(tu, tu.blocks[COMP_Y], tu.cu->treeType);
3569
0
    }
3570
3571
24.3k
    double minCost [MAX_NUM_TBLOCKS];
3572
3573
24.3k
    m_CABACEstimator->resetBits();
3574
3575
24.3k
    memset(m_pTempPel, 0, sizeof(Pel) * tu.Y().area()); // not necessary needed for inside of recursion (only at the beginning)
3576
3577
97.3k
    for (uint32_t i = 0; i < numTBlocks; i++)
3578
73.0k
    {
3579
73.0k
      minCost[i] = MAX_DOUBLE;
3580
73.0k
    }
3581
3582
24.3k
    CodingStructure &saveCS = *m_pSaveCS[1];
3583
24.3k
    saveCS.pcv     = cs.pcv;
3584
24.3k
    saveCS.picture = cs.picture;
3585
24.3k
    saveCS.area.repositionTo( currArea );
3586
3587
24.3k
    TransformUnit& bestTU = saveCS.tus.empty() ? saveCS.addTU( currArea, partitioner.chType, nullptr ) : *saveCS.tus.front();
3588
24.3k
    bestTU.initData();
3589
24.3k
    bestTU.UnitArea::operator=( currArea );
3590
3591
97.3k
    for( uint32_t c = 0; c < numTBlocks; c++ )
3592
73.0k
    {
3593
73.0k
      const ComponentID compID    = ComponentID(c);
3594
73.0k
      const CompArea&   compArea  = tu.blocks[compID];
3595
73.0k
      const int channelBitDepth   = sps.bitDepths[toChannelType(compID)];
3596
3597
73.0k
      if( !tu.blocks[compID].valid() )
3598
48.6k
      {
3599
48.6k
        continue;
3600
48.6k
      }
3601
24.3k
      bool tsAllowed = useTS && TU::isTSAllowed(tu, compID) && (isLuma(compID) || (isChroma(compID) && m_pcEncCfg->m_useChromaTS));
3602
24.3k
      if (isChroma(compID) && tsAllowed && (tu.mtsIdx[COMP_Y] != MTS_SKIP))
3603
0
      {
3604
0
        tsAllowed = false;
3605
0
      }
3606
24.3k
      uint8_t nNumTransformCands = 1 + (tsAllowed ? 1 : 0); // DCT + TS = 2 tests
3607
24.3k
      std::vector<TrMode> trModes;
3608
3609
24.3k
      if (nNumTransformCands > 1)
3610
3.25k
      {
3611
3.25k
        trModes.push_back(TrMode(0, true)); //DCT2
3612
        //for a SBT-no-residual TU, the RDO process should be called once, in order to get the RD cost
3613
3.25k
        if ( !tu.noResidual )
3614
3.25k
        {
3615
3.25k
          trModes.push_back(TrMode(1, true));
3616
3.25k
        }
3617
0
        else
3618
0
        {
3619
0
          nNumTransformCands--;
3620
0
        }
3621
3.25k
      }
3622
24.3k
      bool isLast = true;
3623
51.7k
      for (int transformMode = 0; transformMode < nNumTransformCands; transformMode++)
3624
27.3k
      {
3625
27.3k
        const bool isFirstMode = transformMode == 0;
3626
3627
        // copy the original residual into the residual buffer
3628
27.3k
        csFull->getResiBuf(compArea).copyFrom(orgResiBuf.get(compID));
3629
3630
3631
27.3k
        m_CABACEstimator->getCtx() = ctxStart;
3632
27.3k
        m_CABACEstimator->resetBits();
3633
3634
27.3k
        if (bestTU.mtsIdx[compID] == MTS_SKIP && m_pcEncCfg->m_TS)
3635
0
        {
3636
0
          continue;
3637
0
        }
3638
27.3k
        tu.mtsIdx[compID] = transformMode ? trModes[transformMode].first : 0;
3639
3640
27.3k
        const QpParam cQP(tu, compID);  // note: uses tu.transformSkip[compID]
3641
27.3k
        m_pcTrQuant->selectLambda(compID);
3642
3643
27.3k
        const Slice& slice = *tu.cu->slice;
3644
27.3k
        if (slice.picHeader->lmcsEnabled && reshapeData.getCTUFlag() && isChroma(compID) && slice.picHeader->lmcsChromaResidualScale )
3645
0
        {
3646
0
          double cRescale = (double)(1 << CSCALE_FP_PREC) / (double)(tu.chromaAdj);
3647
0
          m_pcTrQuant->scaleLambda( 1.0/(cRescale*cRescale) );
3648
0
        }
3649
3650
27.3k
        if ( sps.jointCbCr && isChroma( compID ) && ( tu.cu->cs->slice->sliceQp > 18 ) )
3651
0
        {
3652
0
          m_pcTrQuant->scaleLambda( 1.05 );
3653
0
        }
3654
27.3k
        TCoeff     currAbsSum = 0;
3655
27.3k
        uint64_t   currCompFracBits = 0;
3656
27.3k
        Distortion currCompDist = 0;
3657
27.3k
        double     currCompCost = 0;
3658
27.3k
        uint64_t   nonCoeffFracBits = 0;
3659
27.3k
        Distortion nonCoeffDist = 0;
3660
27.3k
        double     nonCoeffCost = 0;
3661
3662
27.3k
        if (slice.picHeader->lmcsEnabled && reshapeData.getCTUFlag() && isChroma(compID) && slice.picHeader->lmcsChromaResidualScale && tu.blocks[compID].width*tu.blocks[compID].height > 4 )
3663
0
        {
3664
0
          PelBuf resiBuf = csFull->getResiBuf(compArea);
3665
0
          resiBuf.scaleSignal(tu.chromaAdj, 1, slice.clpRngs[compID]);
3666
0
        }
3667
3668
27.3k
        if (nNumTransformCands > 1)
3669
6.31k
        {
3670
6.31k
          if (transformMode == 0)
3671
3.25k
          {
3672
3.25k
            m_pcTrQuant->checktransformsNxN(tu, &trModes, 2, compID);
3673
3.25k
            tu.mtsIdx[compID] = trModes[0].first;
3674
3.25k
            if (!trModes[transformMode + 1].second)
3675
195
            {
3676
195
              nNumTransformCands = 1;
3677
195
            }
3678
3.25k
          }
3679
6.31k
          m_pcTrQuant->transformNxN(tu, compID, cQP, currAbsSum, m_CABACEstimator->getCtx(), true);
3680
6.31k
        }
3681
21.0k
        else
3682
21.0k
        {
3683
21.0k
          m_pcTrQuant->transformNxN(tu, compID, cQP, currAbsSum, m_CABACEstimator->getCtx());
3684
21.0k
        }
3685
27.3k
        if (isFirstMode || (currAbsSum == 0))
3686
27.3k
        {
3687
27.3k
          const CPelBuf zeroBuf(m_pTempPel, compArea);
3688
27.3k
          const CPelBuf& orgResi = orgResiBuf.get(compID);
3689
3690
27.3k
          nonCoeffDist = m_pcRdCost->getDistPart(zeroBuf, orgResi, channelBitDepth, compID, DF_SSE); // initialized with zero residual distortion
3691
3692
27.3k
          if (!tu.noResidual)
3693
27.3k
          {
3694
27.3k
            const bool prevCbf = (compID == COMP_Cr ? tu.cbf[COMP_Cb] : false);
3695
27.3k
            m_CABACEstimator->cbf_comp(*tu.cu, false, compArea, currDepth, prevCbf);
3696
27.3k
          }
3697
3698
27.3k
          nonCoeffFracBits = m_CABACEstimator->getEstFracBits();
3699
27.3k
          nonCoeffCost = m_pcRdCost->calcRdCost(nonCoeffFracBits, nonCoeffDist, !m_pcEncCfg->m_lumaLevelToDeltaQPEnabled);
3700
27.3k
        }
3701
3702
27.3k
        if ((puiZeroDist != NULL) && isFirstMode)
3703
24.3k
        {
3704
24.3k
          *puiZeroDist += nonCoeffDist; // initialized with zero residual distortion
3705
24.3k
        }
3706
3707
27.3k
        if (currAbsSum > 0) //if non-zero coefficients are present, a residual needs to be derived for further prediction
3708
227
        {
3709
227
          if (isFirstMode)
3710
227
          {
3711
227
            m_CABACEstimator->getCtx() = ctxStart;
3712
227
            m_CABACEstimator->resetBits();
3713
227
          }
3714
3715
227
          const bool prevCbf = ( compID == COMP_Cr ? tu.cbf[COMP_Cb] : false );
3716
227
          m_CABACEstimator->cbf_comp( *tu.cu, true, compArea, currDepth, prevCbf );
3717
227
          if( compID == COMP_Cr )
3718
0
          {
3719
0
            const int cbfMask = ( tu.cbf[COMP_Cb] ? 2 : 0 ) + 1;
3720
0
            m_CABACEstimator->joint_cb_cr( tu, cbfMask );
3721
0
          }
3722
227
          CUCtx cuCtx;
3723
227
          cuCtx.isDQPCoded = true;
3724
227
          cuCtx.isChromaQpAdjCoded = true;
3725
227
          m_CABACEstimator->residual_coding(tu, compID, &cuCtx);
3726
227
          m_CABACEstimator->mts_idx(cu, &cuCtx);
3727
3728
227
          currCompFracBits = m_CABACEstimator->getEstFracBits();
3729
3730
227
          PelBuf resiBuf  = csFull->getResiBuf(compArea);
3731
227
          CPelBuf orgResi = orgResiBuf.get(compID);
3732
3733
227
          m_pcTrQuant->invTransformNxN(tu, compID, resiBuf, cQP);
3734
227
          if (slice.picHeader->lmcsEnabled && isChroma(compID) && slice.picHeader->lmcsChromaResidualScale && tu.blocks[compID].width*tu.blocks[compID].height > 4)
3735
0
          {
3736
0
            resiBuf.scaleSignal(tu.chromaAdj, 0, slice.clpRngs[compID]);
3737
0
          }
3738
3739
227
          currCompDist = m_pcRdCost->getDistPart(orgResi, resiBuf, channelBitDepth, compID, DF_SSE);
3740
227
          currCompCost = m_pcRdCost->calcRdCost(currCompFracBits, currCompDist, false);
3741
227
        }
3742
27.1k
        else if (transformMode > 0)
3743
3.06k
        {
3744
3.06k
          currCompCost = MAX_DOUBLE;
3745
3.06k
        }
3746
24.1k
        else
3747
24.1k
        {
3748
24.1k
          currCompFracBits = nonCoeffFracBits;
3749
24.1k
          currCompDist     = nonCoeffDist;
3750
24.1k
          currCompCost     = nonCoeffCost;
3751
3752
24.1k
          tu.cbf[compID] = 0;
3753
24.1k
        }
3754
3755
        // evaluate
3756
27.3k
        if ((currCompCost < minCost[compID]) || (transformMode == 1 && currCompCost == minCost[compID]))
3757
24.3k
        {
3758
          // copy component
3759
24.3k
          if (isFirstMode && ((nonCoeffCost < currCompCost) || (currAbsSum == 0))) // check for forced null
3760
24.1k
          {
3761
24.1k
            tu.getCoeffs( compID ).fill( 0 );
3762
24.1k
            csFull->getResiBuf( compArea ).fill( 0 );
3763
24.1k
            tu.cbf[compID]   = 0;
3764
3765
24.1k
            currAbsSum       = 0;
3766
24.1k
            currCompFracBits = nonCoeffFracBits;
3767
24.1k
            currCompDist     = nonCoeffDist;
3768
24.1k
            currCompCost     = nonCoeffCost;
3769
24.1k
          }
3770
3771
24.3k
          uiSingleDistComp[compID] = currCompDist;
3772
24.3k
          minCost[compID]          = currCompCost;
3773
24.3k
          if (transformMode != (nNumTransformCands - 1))
3774
3.06k
          {
3775
3.06k
            bestTU.copyComponentFrom(tu, compID);
3776
3.06k
            saveCS.getResiBuf(compArea).copyFrom(csFull->getResiBuf(compArea));
3777
3.06k
          }
3778
21.2k
          else
3779
21.2k
          {
3780
21.2k
            isLast = false;
3781
21.2k
          }
3782
24.3k
        }
3783
27.3k
        if( tu.noResidual )
3784
0
        {
3785
0
          CHECK( currCompFracBits > 0 || currAbsSum, "currCompFracBits > 0 when tu noResidual" );
3786
0
        }
3787
27.3k
      }
3788
24.3k
      if (isLast)
3789
3.06k
      {
3790
3.06k
        tu.copyComponentFrom(bestTU, compID);
3791
3.06k
        csFull->getResiBuf(compArea).copyFrom(saveCS.getResiBuf(compArea));
3792
3.06k
      }
3793
24.3k
    } // component loop
3794
3795
24.3k
    if ( tu.blocks.size()>2 && tu.blocks[COMP_Cb].valid() )
3796
0
    {
3797
0
      const CompArea& cbArea = tu.blocks[COMP_Cb];
3798
0
      const CompArea& crArea = tu.blocks[COMP_Cr];
3799
0
      bool checkJointCbCr = (sps.jointCbCr) && (!tu.noResidual) && (TU::getCbf(tu, COMP_Cb) || TU::getCbf(tu, COMP_Cr));
3800
0
      const int channelBitDepth = sps.bitDepths[toChannelType(COMP_Cb)];
3801
0
      const Slice& slice = *tu.cu->slice;
3802
0
      bool      reshape         = slice.picHeader->lmcsEnabled && reshapeData.getCTUFlag() && slice.picHeader->lmcsChromaResidualScale
3803
0
                               && tu.blocks[COMP_Cb].width * tu.blocks[COMP_Cb].height > 4;
3804
0
      double minCostCbCr = minCost[COMP_Cb] + minCost[COMP_Cr];
3805
0
      bool   isLastBest  = false;
3806
3807
0
      bool checkDCTOnly = m_pcEncCfg->m_useChromaTS && ((TU::getCbf(tu, COMP_Cb) && tu.mtsIdx[COMP_Cb] == MTS_DCT2_DCT2 && !TU::getCbf(tu, COMP_Cr)) ||
3808
0
        (TU::getCbf(tu, COMP_Cr) && tu.mtsIdx[COMP_Cr] == MTS_DCT2_DCT2 && !TU::getCbf(tu, COMP_Cb)) ||
3809
0
        (TU::getCbf(tu, COMP_Cb) && tu.mtsIdx[COMP_Cb] == MTS_DCT2_DCT2 && TU::getCbf(tu, COMP_Cr) && tu.mtsIdx[COMP_Cr] == MTS_DCT2_DCT2));
3810
0
      bool checkTSOnly = m_pcEncCfg->m_useChromaTS && ((TU::getCbf(tu, COMP_Cb) && tu.mtsIdx[COMP_Cb] == MTS_SKIP && !TU::getCbf(tu, COMP_Cr)) ||
3811
0
        (TU::getCbf(tu, COMP_Cr) && tu.mtsIdx[COMP_Cr] == MTS_SKIP && !TU::getCbf(tu, COMP_Cb)) ||
3812
0
        (TU::getCbf(tu, COMP_Cb) && tu.mtsIdx[COMP_Cb] == MTS_SKIP && TU::getCbf(tu, COMP_Cr) && tu.mtsIdx[COMP_Cr] == MTS_SKIP));
3813
3814
0
      std::vector<int> jointCbfMasksToTest;
3815
0
      if ( checkJointCbCr )
3816
0
      {
3817
0
        for( int i = 0; i < 4; i++ )
3818
0
        {
3819
0
          m_orgResiCb[i].compactResize(cbArea);
3820
0
          m_orgResiCr[i].compactResize(crArea);
3821
0
        }
3822
0
        m_orgResiCb[0].copyFrom(orgResiBuf.Cb());
3823
0
        m_orgResiCr[0].copyFrom(orgResiBuf.Cr());
3824
0
        if (reshape)
3825
0
        {
3826
0
          m_orgResiCb[0].scaleSignal(tu.chromaAdj, 1, slice.clpRngs[COMP_Cb]);
3827
0
          m_orgResiCr[0].scaleSignal(tu.chromaAdj, 1, slice.clpRngs[COMP_Cr]);
3828
0
        }
3829
3830
0
        jointCbfMasksToTest = m_pcTrQuant->selectICTCandidates(tu, m_orgResiCb, m_orgResiCr);
3831
3832
0
        bestTU.copyComponentFrom(tu, COMP_Cb);
3833
0
        bestTU.copyComponentFrom(tu, COMP_Cr);
3834
0
        saveCS.getResiBuf(cbArea).copyFrom(csFull->getResiBuf(cbArea));
3835
0
        saveCS.getResiBuf(crArea).copyFrom(csFull->getResiBuf(crArea));
3836
0
      }
3837
3838
0
      for (int cbfMask: jointCbfMasksToTest)
3839
0
      {
3840
0
        ComponentID codeCompId = (cbfMask >> 1 ? COMP_Cb : COMP_Cr);
3841
0
        ComponentID otherCompId = (codeCompId == COMP_Cr ? COMP_Cb : COMP_Cr);
3842
0
        bool tsAllowed = useTS && TU::isTSAllowed(tu, codeCompId) && (m_pcEncCfg->m_useChromaTS);
3843
0
        if (tsAllowed && (tu.mtsIdx[COMP_Y] != MTS_SKIP))
3844
0
        {
3845
0
          tsAllowed = false;
3846
0
        }
3847
0
        if (!tsAllowed)
3848
0
        {
3849
0
          checkTSOnly = false;
3850
0
        }
3851
0
        uint8_t     numTransformCands = 1 + (tsAllowed && (!(checkDCTOnly || checkTSOnly)) ? 1 : 0); // DCT + TS = 2 tests
3852
0
        std::vector<TrMode> trModes;
3853
0
        if (numTransformCands > 1)
3854
0
        {
3855
0
          trModes.push_back(TrMode(0, true)); // DCT2
3856
0
          trModes.push_back(TrMode(1, true));//TS
3857
0
        }
3858
0
        else
3859
0
        {
3860
0
          tu.mtsIdx[codeCompId] = checkTSOnly ? 1 : 0;
3861
0
        }
3862
0
        for (int modeId = 0; modeId < numTransformCands; modeId++)
3863
0
        {
3864
0
          TCoeff     currAbsSum = 0;
3865
0
          uint64_t   currCompFracBits = 0;
3866
0
          Distortion currCompDistCb = 0;
3867
0
          Distortion currCompDistCr = 0;
3868
0
          double     currCompCost = 0;
3869
3870
0
          tu.jointCbCr = (uint8_t)cbfMask;
3871
0
          if (numTransformCands > 1)
3872
0
          {
3873
0
            tu.mtsIdx[codeCompId] = trModes[modeId].first;
3874
0
          }
3875
0
          tu.mtsIdx[otherCompId] = MTS_DCT2_DCT2;
3876
0
          const QpParam cQP(tu, COMP_Cb);  // note: uses tu.transformSkip[compID]
3877
0
          m_pcTrQuant->selectLambda(COMP_Cb);
3878
3879
          // Lambda is loosened for the joint mode with respect to single modes as the same residual is used for both chroma blocks
3880
0
          const int    absIct = abs(TU::getICTMode(tu));
3881
0
          const double lfact = (absIct == 1 || absIct == 3 ? 0.8 : 0.5);
3882
0
          m_pcTrQuant->scaleLambda(lfact);
3883
0
          if (checkJointCbCr && (tu.cu->cs->slice->sliceQp > 18))
3884
0
          {
3885
0
            m_pcTrQuant->scaleLambda(1.05);
3886
0
          }
3887
3888
0
          m_CABACEstimator->getCtx() = ctxStart;
3889
0
          m_CABACEstimator->resetBits();
3890
3891
0
          PelBuf cbResi = csFull->getResiBuf(cbArea);
3892
0
          PelBuf crResi = csFull->getResiBuf(crArea);
3893
0
          cbResi.copyFrom(m_orgResiCb[cbfMask]);
3894
0
          crResi.copyFrom(m_orgResiCr[cbfMask]);
3895
3896
0
          if (reshape)
3897
0
          {
3898
0
            double cRescale = (double)(1 << CSCALE_FP_PREC) / (double)(tu.chromaAdj);
3899
0
            m_pcTrQuant->scaleLambda(1.0 / (cRescale * cRescale));
3900
0
          }
3901
3902
0
          int         codedCbfMask = 0;
3903
0
          ComponentID codeCompId = (tu.jointCbCr >> 1 ? COMP_Cb : COMP_Cr);
3904
0
          ComponentID otherCompId = (codeCompId == COMP_Cr ? COMP_Cb : COMP_Cr);
3905
0
          const QpParam qpCbCr(tu, codeCompId);
3906
3907
0
          tu.getCoeffs(otherCompId).fill(0);   // do we need that?
3908
0
          TU::setCbfAtDepth(tu, otherCompId, tu.depth, false);
3909
3910
0
          PelBuf& codeResi = (codeCompId == COMP_Cr ? crResi : cbResi);
3911
0
          TCoeff  compAbsSum = 0;
3912
0
          if (numTransformCands > 1)
3913
0
          {
3914
0
            if (modeId == 0)
3915
0
            {
3916
0
              m_pcTrQuant->checktransformsNxN(tu, &trModes, 2, codeCompId);
3917
0
              tu.mtsIdx[codeCompId] = trModes[modeId].first;
3918
0
              tu.mtsIdx[otherCompId] = MTS_DCT2_DCT2;
3919
0
              if (!trModes[modeId + 1].second)
3920
0
              {
3921
0
                numTransformCands = 1;
3922
0
              }
3923
0
            }
3924
0
            m_pcTrQuant->transformNxN(tu, codeCompId, qpCbCr, compAbsSum, m_CABACEstimator->getCtx(), true);
3925
0
          }
3926
0
          else
3927
0
          {
3928
0
            m_pcTrQuant->transformNxN(tu, codeCompId, qpCbCr, compAbsSum, m_CABACEstimator->getCtx());
3929
0
          }
3930
0
          if (compAbsSum > 0)
3931
0
          {
3932
0
            m_pcTrQuant->invTransformNxN(tu, codeCompId, codeResi, qpCbCr);
3933
0
            codedCbfMask += (codeCompId == COMP_Cb ? 2 : 1);
3934
0
          }
3935
0
          else
3936
0
          {
3937
0
            codeResi.fill(0);
3938
0
          }
3939
3940
0
          if (tu.jointCbCr == 3 && codedCbfMask == 2)
3941
0
          {
3942
0
            codedCbfMask = 3;
3943
0
            TU::setCbfAtDepth(tu, COMP_Cr, tu.depth, true);
3944
0
          }
3945
0
          if (codedCbfMask && tu.jointCbCr != codedCbfMask)
3946
0
          {
3947
0
            codedCbfMask = 0;
3948
0
          }
3949
0
          currAbsSum = codedCbfMask;
3950
0
          if (!tu.mtsIdx[codeCompId])
3951
0
          {
3952
0
            numTransformCands = (currAbsSum <= 0) ? 1 : numTransformCands;
3953
0
          }
3954
0
          if (currAbsSum > 0)
3955
0
          {
3956
0
            m_CABACEstimator->cbf_comp(*tu.cu, codedCbfMask >> 1, cbArea, currDepth, false);
3957
0
            m_CABACEstimator->cbf_comp(*tu.cu, codedCbfMask & 1, crArea, currDepth, codedCbfMask >> 1);
3958
0
            m_CABACEstimator->joint_cb_cr(tu, codedCbfMask);
3959
0
            if (codedCbfMask >> 1)
3960
0
              m_CABACEstimator->residual_coding(tu, COMP_Cb);
3961
0
            if (codedCbfMask & 1)
3962
0
              m_CABACEstimator->residual_coding(tu, COMP_Cr);
3963
0
            currCompFracBits = m_CABACEstimator->getEstFracBits();
3964
3965
0
            m_pcTrQuant->invTransformICT(tu, cbResi, crResi);
3966
0
            if (reshape)
3967
0
            {
3968
0
              cbResi.scaleSignal(tu.chromaAdj, 0, slice.clpRngs[COMP_Cb]);
3969
0
              crResi.scaleSignal(tu.chromaAdj, 0, slice.clpRngs[COMP_Cr]);
3970
0
            }
3971
3972
0
            currCompDistCb = m_pcRdCost->getDistPart(orgResiBuf.Cb(), cbResi, channelBitDepth, COMP_Cb, DF_SSE);
3973
0
            currCompDistCr = m_pcRdCost->getDistPart(orgResiBuf.Cr(), crResi, channelBitDepth, COMP_Cr, DF_SSE);
3974
0
            currCompCost = m_pcRdCost->calcRdCost(currCompFracBits, currCompDistCr + currCompDistCb, false);
3975
0
          }
3976
0
          else
3977
0
            currCompCost = MAX_DOUBLE;
3978
3979
          // evaluate
3980
0
          if (currCompCost < minCostCbCr)
3981
0
          {
3982
0
            uiSingleDistComp[COMP_Cb] = currCompDistCb;
3983
0
            uiSingleDistComp[COMP_Cr] = currCompDistCr;
3984
0
            minCostCbCr = currCompCost;
3985
0
            isLastBest = (cbfMask == jointCbfMasksToTest.back()) && (modeId == (numTransformCands - 1));
3986
0
            if (!isLastBest)
3987
0
            {
3988
0
              bestTU.copyComponentFrom(tu, COMP_Cb);
3989
0
              bestTU.copyComponentFrom(tu, COMP_Cr);
3990
0
              saveCS.getResiBuf(cbArea).copyFrom(csFull->getResiBuf(cbArea));
3991
0
              saveCS.getResiBuf(crArea).copyFrom(csFull->getResiBuf(crArea));
3992
0
            }
3993
0
          }
3994
0
        }
3995
3996
0
        if( !isLastBest )
3997
0
        {
3998
          // copy component
3999
0
          tu.copyComponentFrom( bestTU, COMP_Cb );
4000
0
          tu.copyComponentFrom( bestTU, COMP_Cr );
4001
0
          csFull->getResiBuf( cbArea ).copyFrom( saveCS.getResiBuf( cbArea ) );
4002
0
          csFull->getResiBuf( crArea ).copyFrom( saveCS.getResiBuf( crArea ) );
4003
0
        }
4004
0
      }
4005
0
    }
4006
4007
24.3k
    m_CABACEstimator->getCtx() = ctxStart;
4008
24.3k
    m_CABACEstimator->resetBits();
4009
24.3k
    if( !tu.noResidual )
4010
24.3k
    {
4011
24.3k
      static const ComponentID cbf_getComp[3] = { COMP_Cb, COMP_Cr, COMP_Y };
4012
97.3k
      for( unsigned c = 0; c < numTBlocks; c++)
4013
73.0k
      {
4014
73.0k
        const ComponentID compID = numTBlocks>1 ? cbf_getComp[c] : COMP_Y;
4015
73.0k
        if( tu.blocks[compID].valid() )
4016
24.3k
        {
4017
24.3k
          const bool prevCbf = ( compID == COMP_Cr ? TU::getCbfAtDepth( tu, COMP_Cb, currDepth ) : false );
4018
24.3k
          m_CABACEstimator->cbf_comp( *tu.cu, TU::getCbfAtDepth( tu, compID, currDepth ), tu.blocks[compID], currDepth, prevCbf );
4019
24.3k
        }
4020
73.0k
      }
4021
24.3k
    }
4022
4023
97.3k
    for (uint32_t ch = 0; ch < numValidComp; ch++)
4024
73.0k
    {
4025
73.0k
      const ComponentID compID = ComponentID(ch);
4026
73.0k
      if (tu.blocks[compID].valid())
4027
24.3k
      {
4028
24.3k
        if( compID == COMP_Cr )
4029
0
        {
4030
0
          const int cbfMask = ( TU::getCbf( tu, COMP_Cb ) ? 2 : 0 ) + ( TU::getCbf( tu, COMP_Cr ) ? 1 : 0 );
4031
0
          m_CABACEstimator->joint_cb_cr(tu, cbfMask);
4032
0
        }
4033
24.3k
        if( TU::getCbf( tu, compID ) )
4034
146
        {
4035
146
          m_CABACEstimator->residual_coding( tu, compID );
4036
146
        }
4037
24.3k
        uiSingleDist += uiSingleDistComp[compID];
4038
24.3k
      }
4039
73.0k
    }
4040
24.3k
    if( tu.noResidual )
4041
0
    {
4042
0
      CHECK( m_CABACEstimator->getEstFracBits() > 0, "no residual TU's bits shall be 0" );
4043
0
    }
4044
4045
24.3k
    csFull->fracBits += m_CABACEstimator->getEstFracBits();
4046
24.3k
    csFull->dist     += uiSingleDist;
4047
24.3k
    csFull->cost      = m_pcRdCost->calcRdCost(csFull->fracBits, csFull->dist, !m_pcEncCfg->m_lumaLevelToDeltaQPEnabled);
4048
24.3k
  } // check full
4049
4050
  // code sub-blocks
4051
24.3k
  if( bCheckSplit )
4052
0
  {
4053
0
    if( bCheckFull )
4054
0
    {
4055
0
      m_CABACEstimator->getCtx() = ctxStart;
4056
0
    }
4057
4058
0
    if( partitioner.canSplit( TU_MAX_TR_SPLIT, cs ) )
4059
0
    {
4060
0
      partitioner.splitCurrArea( TU_MAX_TR_SPLIT, cs );
4061
0
    }
4062
0
    else if( cu.sbtInfo && partitioner.canSplit( CU::getSbtTuSplit( cu.sbtInfo ), cs ) )
4063
0
    {
4064
0
      partitioner.splitCurrArea( CU::getSbtTuSplit( cu.sbtInfo ), cs );
4065
0
    }
4066
0
    else
4067
0
      THROW( "Implicit TU split not available!" );
4068
4069
0
    do
4070
0
    {
4071
0
      xEstimateInterResidualQT(*csSplit, partitioner, bCheckFull ? nullptr : puiZeroDist );
4072
4073
0
      csSplit->cost = m_pcRdCost->calcRdCost( csSplit->fracBits, csSplit->dist );
4074
0
    } while( partitioner.nextPart( *csSplit ) );
4075
4076
0
    partitioner.exitCurrSplit();
4077
4078
0
    unsigned        compCbf[3]  = { 0, 0, 0 };
4079
4080
0
    if( !bCheckFull )
4081
0
    {
4082
0
      for( auto &currTU : csSplit->traverseTUs( currArea, partitioner.chType ) )
4083
0
      {
4084
0
        for( unsigned ch = 0; ch < numTBlocks; ch++ )
4085
0
        {
4086
0
          compCbf[ ch ] |= ( TU::getCbfAtDepth( currTU, ComponentID(ch), currDepth + 1 ) ? 1 : 0 );
4087
0
        }
4088
0
      }
4089
4090
0
      for( auto &currTU : csSplit->traverseTUs( currArea, partitioner.chType ) )
4091
0
      {
4092
0
        TU::setCbfAtDepth   ( currTU, COMP_Y,  currDepth, compCbf[ COMP_Y  ] );
4093
0
        if( currArea.chromaFormat != CHROMA_400 )
4094
0
        {
4095
0
          TU::setCbfAtDepth ( currTU, COMP_Cb, currDepth, compCbf[ COMP_Cb ] );
4096
0
          TU::setCbfAtDepth ( currTU, COMP_Cr, currDepth, compCbf[ COMP_Cr ] );
4097
0
        }
4098
0
      }
4099
4100
0
      m_CABACEstimator->getCtx() = ctxStart;
4101
0
      m_CABACEstimator->resetBits();
4102
4103
      // when compID isn't a channel, code Cbfs:
4104
0
      xEncodeInterResidualQT( *csSplit, partitioner, MAX_NUM_TBLOCKS );
4105
4106
0
      for (uint32_t ch = 0; ch < numValidComp; ch++)
4107
0
      {
4108
0
        const ComponentID compID = ComponentID(ch);
4109
0
        xEncodeInterResidualQT( *csSplit, partitioner, compID );
4110
0
      }
4111
4112
0
      csSplit->fracBits = m_CABACEstimator->getEstFracBits();
4113
0
      csSplit->cost     = m_pcRdCost->calcRdCost(csSplit->fracBits, csSplit->dist);
4114
0
    }
4115
0
  }
4116
24.3k
}
4117
4118
void InterSearch::encodeResAndCalcRdInterCU(CodingStructure &cs, Partitioner &partitioner, const bool skipResidual )
4119
24.3k
{
4120
24.3k
  CodingUnit &cu = *cs.getCU( partitioner.chType, partitioner.treeType );
4121
24.3k
  bool luma      = true;
4122
24.3k
  bool chroma    = cs.pcv->chrFormat != VVENC_CHROMA_400;
4123
24.3k
  if( cu.predMode == MODE_IBC )
4124
24.3k
  {
4125
24.3k
    luma    = !cu.mccNoLuma  ();
4126
24.3k
    chroma &= !cu.mccNoChroma();
4127
24.3k
  }
4128
24.3k
  if( cu.predMode == MODE_INTER )
4129
24.3k
    CHECK( CU::isSepTree(cu), "CU with Inter mode must be in single tree" );
4130
4131
24.3k
  const ChromaFormat format      = cs.area.chromaFormat;;
4132
24.3k
  const int  numValidComponents  = getNumberValidComponents(format);
4133
24.3k
  const SPS &sps                 = *cs.sps;
4134
24.3k
  const ReshapeData& reshapeData = cs.picture->reshapeData;
4135
4136
24.3k
  if( skipResidual ) //  No residual coding : SKIP mode
4137
0
  {
4138
0
    cu.skip    = true;
4139
0
    cu.rootCbf = false;
4140
0
    CHECK( cu.sbtInfo != 0, "sbtInfo shall be 0 if CU has no residual" );
4141
0
    cs.getResiBuf().fill(0);
4142
0
    cs.getRecoBuf().copyFrom(cs.getPredBuf() );
4143
0
    if( cs.picHeader->lmcsEnabled && reshapeData.getCTUFlag() && !cu.ciip && !CU::isIBC(cu))
4144
0
    {
4145
0
      cs.getRecoBuf().Y().rspSignal( reshapeData.getFwdLUT());
4146
0
    }
4147
4148
    // add new "empty" TU(s) spanning the whole CU
4149
0
    cs.addEmptyTUs( partitioner, &cu );
4150
0
    Distortion distortion = 0;
4151
4152
0
    for (int comp = 0; comp < numValidComponents; comp++)
4153
0
    {
4154
0
      const ComponentID compID = ComponentID(comp);
4155
0
      if (compID == COMP_Y && !luma)
4156
0
        continue;
4157
0
      if (compID != COMP_Y && !chroma)
4158
0
        continue;
4159
0
      CPelBuf reco = cs.getRecoBuf (compID);
4160
0
      CPelBuf org  = cs.getOrgBuf  (compID);
4161
0
      if ((cs.picHeader->lmcsEnabled && reshapeData.getCTUFlag()) || m_pcEncCfg->m_lumaLevelToDeltaQPEnabled )
4162
0
      {
4163
0
        const CompArea& areaY = cu.Y();
4164
0
        const CPelBuf orgLuma = cs.getOrgBuf( areaY );
4165
0
        if (compID == COMP_Y && !m_pcEncCfg->m_lumaLevelToDeltaQPEnabled )
4166
0
        {
4167
0
          PelBuf tmpRecLuma = cs.getRspRecoBuf();
4168
0
          tmpRecLuma.rspSignal(reco, reshapeData.getInvLUT());
4169
0
          distortion += m_pcRdCost->getDistPart(org, tmpRecLuma, sps.bitDepths[ CH_L ], compID, DF_SSE_WTD, &orgLuma);
4170
0
        }
4171
0
        else
4172
0
          distortion += m_pcRdCost->getDistPart( org, reco, sps.bitDepths[ CH_C ], compID, DF_SSE_WTD, &orgLuma );
4173
0
      }
4174
0
      else
4175
0
      {
4176
0
        distortion  += m_pcRdCost->getDistPart( org, reco, sps.bitDepths[ toChannelType( compID ) ], compID, DF_SSE );
4177
0
      }
4178
0
    }
4179
4180
0
    CodingUnit& cu = *cs.getCU(partitioner.chType, TREE_D);
4181
0
    m_CABACEstimator->resetBits();
4182
0
    m_CABACEstimator->cu_skip_flag  ( cu );
4183
0
    m_CABACEstimator->merge_data(cu);
4184
0
    cs.fracBits = m_CABACEstimator->getEstFracBits();
4185
0
    cs.dist     = distortion;
4186
0
    cs.cost     = m_pcRdCost->calcRdCost(cs.fracBits, cs.dist);
4187
4188
0
    return;
4189
0
  }
4190
4191
  //  Residual coding.
4192
24.3k
  if (luma)
4193
24.3k
  {
4194
24.3k
    if (cs.picHeader->lmcsEnabled && reshapeData.getCTUFlag())
4195
0
    {
4196
0
      if (!cu.ciip && !CU::isIBC(cu))
4197
0
      {
4198
0
        const CompArea& areaY = cu.Y();
4199
0
        PelBuf tmpPred = m_tmpStorageLCU.getCompactBuf(areaY);
4200
0
        tmpPred.rspSignal(cs.getPredBuf(COMP_Y), reshapeData.getFwdLUT());
4201
0
        cs.getResiBuf(COMP_Y).subtract(cs.getRspOrgBuf(), tmpPred);
4202
0
      }
4203
0
      else
4204
0
      {
4205
0
        cs.getResiBuf(COMP_Y).subtract(cs.getRspOrgBuf(), cs.getPredBuf(COMP_Y));
4206
0
      }
4207
0
    }
4208
24.3k
    else
4209
24.3k
    {
4210
24.3k
      cs.getResiBuf(COMP_Y).subtract(cs.getOrgBuf(COMP_Y), cs.getPredBuf(COMP_Y));
4211
24.3k
    }
4212
24.3k
  }
4213
24.3k
  if (chroma)
4214
0
  {
4215
0
    cs.getResiBuf(COMP_Cb).subtract(cs.getOrgBuf(COMP_Cb), cs.getPredBuf(COMP_Cb));
4216
0
    cs.getResiBuf(COMP_Cr).subtract(cs.getOrgBuf(COMP_Cr), cs.getPredBuf(COMP_Cr));
4217
0
  }
4218
4219
24.3k
  Distortion zeroDistortion = 0;
4220
4221
24.3k
  const TempCtx ctxStart( m_CtxCache, m_CABACEstimator->getCtx() );
4222
4223
24.3k
  xEstimateInterResidualQT(cs, partitioner, &zeroDistortion );
4224
24.3k
  TransformUnit& firstTU = *cs.getTU( partitioner.chType );
4225
4226
24.3k
  cu.rootCbf = false;
4227
24.3k
  m_CABACEstimator->resetBits();
4228
24.3k
  m_CABACEstimator->rqt_root_cbf( cu );
4229
24.3k
  const uint64_t  zeroFracBits = m_CABACEstimator->getEstFracBits();
4230
24.3k
  double zeroCost = m_pcRdCost->calcRdCost( zeroFracBits, zeroDistortion, !m_pcEncCfg->m_lumaLevelToDeltaQPEnabled );
4231
4232
24.3k
  const int  numValidTBlocks   = getNumberValidTBlocks( *cs.pcv );
4233
97.3k
  for (uint32_t i = 0; i < numValidTBlocks; i++)
4234
73.0k
  {
4235
73.0k
    cu.rootCbf |= TU::getCbfAtDepth(firstTU, ComponentID(i), 0);
4236
73.0k
  }
4237
4238
  // -------------------------------------------------------
4239
  // If a block full of 0's is efficient, then just use 0's.
4240
  // The costs at this point do not include header bits.
4241
4242
24.3k
  if (zeroCost < cs.cost || !cu.rootCbf)
4243
24.1k
  {
4244
24.1k
    cu.sbtInfo = 0;
4245
24.1k
    cu.rootCbf = false;
4246
4247
24.1k
    cs.clearTUs();
4248
4249
    // add a new "empty" TU spanning the whole CU
4250
24.1k
    cs.addEmptyTUs( partitioner, &cu );
4251
24.1k
  }
4252
4253
  // all decisions now made. Fully encode the CU, including the headers:
4254
24.3k
  m_CABACEstimator->getCtx() = ctxStart;
4255
4256
24.3k
  uint64_t finalFracBits = xGetSymbolFracBitsInter( cs, partitioner );
4257
  // we've now encoded the CU, and so have a valid bit cost
4258
24.3k
  if (!cu.rootCbf)
4259
24.1k
  {
4260
24.1k
    if (luma)
4261
24.1k
    {
4262
24.1k
      cs.getResiBuf().bufs[0].fill(0); // Clear the residual image, if we didn't code it.
4263
24.1k
    }
4264
24.1k
    if (chroma && isChromaEnabled(cs.pcv->chrFormat))
4265
0
    {
4266
0
      cs.getResiBuf().bufs[1].fill(0); // Clear the residual image, if we didn't code it.
4267
0
      cs.getResiBuf().bufs[2].fill(0); // Clear the residual image, if we didn't code it.
4268
0
    }
4269
24.1k
  }
4270
24.3k
  if (luma)
4271
24.3k
  {
4272
24.3k
    if (cu.rootCbf && cs.picHeader->lmcsEnabled && reshapeData.getCTUFlag())
4273
0
    {
4274
0
      if (!cu.ciip && !CU::isIBC(cu))
4275
0
      {
4276
0
        PelBuf tmpPred = m_tmpStorageLCU.getCompactBuf(cu.Y());
4277
0
        tmpPred.rspSignal(cs.getPredBuf(COMP_Y), reshapeData.getFwdLUT());
4278
0
        cs.getRecoBuf(COMP_Y).reconstruct(tmpPred, cs.getResiBuf(COMP_Y), cs.slice->clpRngs[COMP_Y]);
4279
0
      }
4280
0
      else
4281
0
      {
4282
0
        cs.getRecoBuf(COMP_Y).reconstruct(cs.getPredBuf(COMP_Y), cs.getResiBuf(COMP_Y), cs.slice->clpRngs[COMP_Y]);
4283
0
      }
4284
0
    }
4285
24.3k
    else
4286
24.3k
    {
4287
24.3k
      cs.getRecoBuf().bufs[0].reconstruct(cs.getPredBuf().bufs[0], cs.getResiBuf().bufs[0], cs.slice->clpRngs[COMP_Y]);
4288
24.3k
      if (cs.picHeader->lmcsEnabled && reshapeData.getCTUFlag() && !cu.ciip && !CU::isIBC(cu))
4289
0
      {
4290
0
        cs.getRecoBuf().bufs[0].rspSignal(reshapeData.getFwdLUT());
4291
0
      }
4292
24.3k
    }
4293
24.3k
  }
4294
24.3k
  if (chroma)
4295
0
  {
4296
0
    cs.getRecoBuf().bufs[1].reconstruct(cs.getPredBuf().bufs[1], cs.getResiBuf().bufs[1], cs.slice->clpRngs[COMP_Cb]);
4297
0
    cs.getRecoBuf().bufs[2].reconstruct(cs.getPredBuf().bufs[2], cs.getResiBuf().bufs[2], cs.slice->clpRngs[COMP_Cr]);
4298
0
  }
4299
  // update with clipped distortion and cost (previously unclipped reconstruction values were used)
4300
24.3k
  Distortion finalDistortion = 0;
4301
4302
97.3k
  for (int comp = 0; comp < numValidComponents; comp++)
4303
73.0k
  {
4304
73.0k
    const ComponentID compID = ComponentID(comp);
4305
73.0k
    if (compID == COMP_Y && !luma)
4306
0
      continue;
4307
73.0k
    if (compID != COMP_Y && !chroma)
4308
48.6k
      continue;
4309
24.3k
    CPelBuf reco = cs.getRecoBuf (compID);
4310
24.3k
    CPelBuf org  = cs.getOrgBuf  (compID);
4311
4312
24.3k
    if( (cs.picHeader->lmcsEnabled && reshapeData.getCTUFlag()) || m_pcEncCfg->m_lumaLevelToDeltaQPEnabled )
4313
0
    {
4314
0
      const CPelBuf orgLuma = cs.getOrgBuf( cs.area.blocks[COMP_Y] );
4315
0
      if (compID == COMP_Y && !m_pcEncCfg->m_lumaLevelToDeltaQPEnabled )
4316
0
      {
4317
0
        PelBuf tmpRecLuma = cs.getRspRecoBuf();
4318
0
        tmpRecLuma.rspSignal( reco, reshapeData.getInvLUT());
4319
0
        finalDistortion += m_pcRdCost->getDistPart(org, tmpRecLuma, sps.bitDepths[toChannelType(compID)], compID, DF_SSE_WTD, &orgLuma);
4320
0
      }
4321
0
      else
4322
0
      {
4323
0
        finalDistortion += m_pcRdCost->getDistPart(org, reco, sps.bitDepths[toChannelType(compID)], compID, DF_SSE_WTD, &orgLuma);
4324
0
      }
4325
0
    }
4326
24.3k
    else
4327
24.3k
    {
4328
24.3k
      finalDistortion += m_pcRdCost->getDistPart( org, reco, sps.bitDepths[toChannelType(compID)], compID, DF_SSE );
4329
24.3k
    }
4330
24.3k
  }
4331
4332
24.3k
  cs.dist     = finalDistortion;
4333
24.3k
  cs.fracBits = finalFracBits;
4334
24.3k
  cs.cost     = m_pcRdCost->calcRdCost(cs.fracBits, cs.dist);
4335
4336
24.3k
  CHECK(cs.tus.size() == 0, "No TUs present");
4337
24.3k
}
4338
4339
uint64_t InterSearch::xGetSymbolFracBitsInter(CodingStructure &cs, Partitioner &partitioner)
4340
24.3k
{
4341
24.3k
  uint64_t fracBits   = 0;
4342
24.3k
  CodingUnit &cu    = *cs.getCU( partitioner.chType, partitioner.treeType );
4343
4344
24.3k
  m_CABACEstimator->resetBits();
4345
4346
24.3k
  if( cu.mergeFlag && !cu.rootCbf )
4347
0
  {
4348
0
    cu.skip = true;
4349
4350
0
    m_CABACEstimator->cu_skip_flag  ( cu );
4351
0
    if (!cu.ciip)
4352
0
    {
4353
0
      m_CABACEstimator->merge_data(cu);
4354
0
    }
4355
0
    fracBits   += m_CABACEstimator->getEstFracBits();
4356
0
  }
4357
24.3k
  else
4358
24.3k
  {
4359
24.3k
    CHECK( cu.skip, "Skip flag has to be off at this point!" );
4360
4361
24.3k
    if (cu.Y().valid())
4362
24.3k
    m_CABACEstimator->cu_skip_flag( cu );
4363
24.3k
    m_CABACEstimator->pred_mode   ( cu );
4364
24.3k
    m_CABACEstimator->cu_pred_data( cu );
4365
24.3k
    CUCtx cuCtx;
4366
24.3k
    cuCtx.isDQPCoded = true;
4367
24.3k
    cuCtx.isChromaQpAdjCoded = true;
4368
24.3k
    m_CABACEstimator->cu_residual ( cu, partitioner, cuCtx );
4369
24.3k
    fracBits       += m_CABACEstimator->getEstFracBits();
4370
24.3k
  }
4371
4372
24.3k
  return fracBits;
4373
24.3k
}
4374
4375
double InterSearch::xGetMEDistortionWeight(uint8_t BcwIdx, RefPicList refPicList)
4376
0
{
4377
0
  if( BcwIdx != BCW_DEFAULT )
4378
0
  {
4379
0
    return fabs( (double)getBcwWeight( BcwIdx, refPicList ) / (double)g_BcwWeightBase );
4380
0
  }
4381
0
  else
4382
0
  {
4383
0
    return 0.5;
4384
0
  }
4385
0
}
4386
4387
bool InterSearch::xReadBufferedUniMv( CodingUnit& cu, RefPicList eRefPicList, int32_t iRefIdx, Mv& pcMvPred, Mv& rcMv, uint32_t& ruiBits, Distortion& ruiCost )
4388
0
{
4389
0
  if( m_uniMotions.isReadMode( (uint32_t)eRefPicList, (uint32_t)iRefIdx ) )
4390
0
  {
4391
0
    m_uniMotions.copyTo(rcMv, ruiCost, (uint32_t)eRefPicList, (uint32_t)iRefIdx);
4392
4393
0
    Mv pred = pcMvPred;
4394
0
    pred.changeTransPrecInternal2Amvr( cu.imv );
4395
0
    m_pcRdCost->setPredictor(pred);
4396
0
    m_pcRdCost->setCostScale(0);
4397
4398
0
    Mv mv = rcMv;
4399
0
    mv.changeTransPrecInternal2Amvr( cu.imv );
4400
0
    uint32_t mvBits = m_pcRdCost->getBitsOfVectorWithPredictor( mv.hor, mv.ver, 0 );
4401
4402
0
    ruiBits += mvBits;
4403
0
    ruiCost += m_pcRdCost->getCost(ruiBits);
4404
0
    return true;
4405
0
  }
4406
0
  return false;
4407
0
}
4408
4409
bool InterSearch::xReadBufferedAffineUniMv( CodingUnit& cu, RefPicList eRefPicList, int32_t iRefIdx, Mv acMvPred[3], Mv acMv[3], uint32_t& ruiBits, Distortion& ruiCost, int& mvpIdx, const AffineAMVPInfo& aamvpi )
4410
0
{
4411
0
  if( m_uniMotions.isReadModeAffine( (uint32_t)eRefPicList, (uint32_t)iRefIdx, cu.affineType ) )
4412
0
  {
4413
0
    m_uniMotions.copyAffineMvTo( acMv, ruiCost, (uint32_t)eRefPicList, (uint32_t)iRefIdx, cu.affineType, mvpIdx );
4414
0
    m_pcRdCost->setCostScale(0);
4415
0
    acMvPred[0] = aamvpi.mvCandLT[mvpIdx];
4416
0
    acMvPred[1] = aamvpi.mvCandRT[mvpIdx];
4417
0
    acMvPred[2] = aamvpi.mvCandLB[mvpIdx];
4418
4419
0
    uint32_t mvBits = 0;
4420
0
    for( int verIdx = 0; verIdx < ( cu.affineType ? 3 : 2 ); verIdx++ )
4421
0
    {
4422
0
      Mv pred = verIdx ? acMvPred[verIdx] + acMv[0] - acMvPred[0] : acMvPred[verIdx];
4423
0
      pred.changePrecision(MV_PRECISION_INTERNAL, MV_PRECISION_QUARTER);
4424
0
      m_pcRdCost->setPredictor(pred);
4425
0
      Mv mv = acMv[verIdx];
4426
0
      mv.changePrecision(MV_PRECISION_INTERNAL, MV_PRECISION_QUARTER);
4427
0
      mvBits += m_pcRdCost->getBitsOfVectorWithPredictor( mv.hor, mv.ver, 0 );
4428
0
    }
4429
0
    ruiBits += mvBits;
4430
0
    ruiCost += m_pcRdCost->getCost(ruiBits);
4431
0
    return true;
4432
0
  }
4433
0
  return false;
4434
0
}
4435
4436
void InterSearch::xSymMvdCheckBestMvp(
4437
  CodingUnit& cu,
4438
  CPelUnitBuf& origBuf,
4439
  Mv curMv,
4440
  RefPicList curRefList,
4441
  AMVPInfo amvpInfo[2][MAX_REF_PICS],
4442
  int32_t BcwIdx,
4443
  Mv cMvPredSym[2],
4444
  int32_t mvpIdxSym[2],
4445
  Distortion& bestCost,
4446
  bool skip
4447
)
4448
0
{
4449
0
  RefPicList tarRefList = (RefPicList)(1 - curRefList);
4450
0
  int32_t refIdxCur = cu.slice->symRefIdx[curRefList];
4451
0
  int32_t refIdxTar = cu.slice->symRefIdx[tarRefList];
4452
4453
0
  MvField cCurMvField, cTarMvField;
4454
0
  cCurMvField.setMvField(curMv, refIdxCur);
4455
0
  AMVPInfo& amvpCur = amvpInfo[curRefList][refIdxCur];
4456
0
  AMVPInfo& amvpTar = amvpInfo[tarRefList][refIdxTar];
4457
0
  m_pcRdCost->setCostScale(0);
4458
4459
0
  double fWeight = 0.0;
4460
0
  PelUnitBuf bufTmp;
4461
4462
  // get prediction of eCurRefPicList
4463
0
  PelUnitBuf predBufA = m_tmpPredStorage[curRefList].getCompactBuf( cu );
4464
0
  const Picture* picRefA = cu.slice->getRefPic(curRefList, cCurMvField.refIdx);
4465
0
  Mv mvA = cCurMvField.mv;
4466
0
  xClipMvSearch( mvA, cu.lumaPos(), cu.lumaSize(), *cu.cs->pcv, m_ifpLines );
4467
0
  xPredInterBlk( COMP_Y, cu, picRefA, mvA, predBufA, false, cu.slice->clpRngs[ COMP_Y ], false, false );
4468
4469
0
  bufTmp = m_tmpStorageLCU.getCompactBuf( cu );
4470
0
  bufTmp.copyFrom( origBuf );
4471
0
  bufTmp.removeHighFreq( predBufA, m_pcEncCfg->m_bClipForBiPredMeEnabled, cu.slice->clpRngs/*, getBcwWeight( cu.BcwIdx, tarRefList )*/ );
4472
0
  fWeight = xGetMEDistortionWeight( cu.BcwIdx, tarRefList );
4473
4474
0
  int32_t skipMvpIdx[2];
4475
0
  skipMvpIdx[0] = skip ? mvpIdxSym[0] : -1;
4476
0
  skipMvpIdx[1] = skip ? mvpIdxSym[1] : -1;
4477
4478
0
  for (int i = 0; i < amvpCur.numCand; i++)
4479
0
  {
4480
0
    for (int j = 0; j < amvpTar.numCand; j++)
4481
0
    {
4482
0
      if (skipMvpIdx[curRefList] == i && skipMvpIdx[tarRefList] == j)
4483
0
        continue;
4484
4485
0
      Distortion cost = MAX_DISTORTION;
4486
0
      cTarMvField.setMvField(curMv.getSymmvdMv(amvpCur.mvCand[i], amvpTar.mvCand[j]), refIdxTar);
4487
4488
      // get prediction of eTarRefPicList
4489
0
      PelUnitBuf predBufB = m_tmpPredStorage[tarRefList].getCompactBuf( cu );
4490
0
      const Picture* picRefB = cu.slice->getRefPic(tarRefList, cTarMvField.refIdx);
4491
0
      Mv mvB = cTarMvField.mv;
4492
0
      xClipMvSearch( mvB, cu.lumaPos(), cu.lumaSize(), *cu.cs->pcv, m_ifpLines );
4493
0
      xPredInterBlk( COMP_Y, cu, picRefB, mvB, predBufB, false, cu.slice->clpRngs[ COMP_Y ], false, false );
4494
4495
      // calc distortion
4496
0
      cost = ( Distortion ) floor( fWeight * ( double ) m_pcRdCost->getDistPart( bufTmp.Y(), predBufB.Y(), cu.cs->sps->bitDepths[ CH_L ], COMP_Y, DF_HAD ) );
4497
4498
0
      Mv pred = amvpCur.mvCand[i];
4499
0
      pred.changeTransPrecInternal2Amvr(cu.imv);
4500
0
      m_pcRdCost->setPredictor(pred);
4501
0
      Mv mv = curMv;
4502
0
      mv.changeTransPrecInternal2Amvr(cu.imv);
4503
0
      uint32_t bits = m_pcRdCost->getBitsOfVectorWithPredictor(mv.hor, mv.ver, 0);
4504
0
      bits += m_auiMVPIdxCost[i][AMVP_MAX_NUM_CANDS];
4505
0
      bits += m_auiMVPIdxCost[j][AMVP_MAX_NUM_CANDS];
4506
0
      cost += m_pcRdCost->getCost(bits);
4507
0
      if (cost < bestCost)
4508
0
      {
4509
0
        bestCost = cost;
4510
0
        cMvPredSym[curRefList] = amvpCur.mvCand[i];
4511
0
        cMvPredSym[tarRefList] = amvpTar.mvCand[j];
4512
0
        mvpIdxSym[curRefList] = i;
4513
0
        mvpIdxSym[tarRefList] = j;
4514
0
      }
4515
0
    }
4516
0
  }
4517
0
}
4518
4519
void InterSearch::resetSavedAffineMotion()
4520
139k
{
4521
417k
  for (int i = 0; i < 2; i++)
4522
278k
  {
4523
835k
    for (int j = 0; j < 2; j++)
4524
557k
    {
4525
557k
      m_affineMotion.acMvAffine4Para[i][j] = Mv(0, 0);
4526
557k
      m_affineMotion.acMvAffine6Para[i][j] = Mv(0, 0);
4527
557k
    }
4528
278k
    m_affineMotion.acMvAffine6Para[i][2] = Mv(0, 0);
4529
4530
278k
    m_affineMotion.affine4ParaRefIdx[i] = -1;
4531
278k
    m_affineMotion.affine6ParaRefIdx[i] = -1;
4532
278k
  }
4533
139k
  m_affineMotion.affine4ParaAvail = false;
4534
139k
  m_affineMotion.affine6ParaAvail = false;
4535
139k
}
4536
4537
void InterSearch::storeAffineMotion(Mv acAffineMv[2][3], int16_t affineRefIdx[2], EAffineModel affineType, int BcwIdx)
4538
0
{
4539
0
  if ((BcwIdx == BCW_DEFAULT || !m_affineMotion.affine6ParaAvail) && affineType == AFFINEMODEL_6PARAM)
4540
0
  {
4541
0
    for (int i = 0; i < 2; i++)
4542
0
    {
4543
0
      for (int j = 0; j < 3; j++)
4544
0
      {
4545
0
        m_affineMotion.acMvAffine6Para[i][j] = acAffineMv[i][j];
4546
0
      }
4547
0
      m_affineMotion.affine6ParaRefIdx[i] = affineRefIdx[i];
4548
0
    }
4549
0
    m_affineMotion.affine6ParaAvail = true;
4550
0
  }
4551
4552
0
  if ((BcwIdx == BCW_DEFAULT || !m_affineMotion.affine4ParaAvail) && affineType == AFFINEMODEL_4PARAM)
4553
0
  {
4554
0
    for (int i = 0; i < 2; i++)
4555
0
    {
4556
0
      for (int j = 0; j < 2; j++)
4557
0
      {
4558
0
        m_affineMotion.acMvAffine4Para[i][j] = acAffineMv[i][j];
4559
0
      }
4560
0
      m_affineMotion.affine4ParaRefIdx[i] = affineRefIdx[i];
4561
0
    }
4562
0
    m_affineMotion.affine4ParaAvail = true;
4563
0
  }
4564
0
}
4565
4566
void InterSearch::xPredAffineInterSearch( CodingUnit& cu,
4567
                                          CPelUnitBuf&    origBuf,
4568
                                          int             puIdx,
4569
                                          uint32_t&       lastMode,
4570
                                          Distortion&     affineCost,
4571
                                          Mv              hevcMv[2][MAX_REF_PICS],
4572
                                          Mv              mvAffine4Para[2][MAX_REF_PICS][3],
4573
                                          int             refIdx4Para[2],
4574
                                          uint8_t         BcwIdx,
4575
                                          bool            enforceBcwPred,
4576
                                          uint32_t        BcwIdxBits )
4577
0
{
4578
0
  const Slice &slice = *cu.slice;
4579
4580
0
  affineCost = MAX_DISTORTION;
4581
4582
0
  Mv        cMvZero;
4583
0
  Mv        aacMv[2][3];
4584
0
  Mv        cMvBi[2][3];
4585
0
  AffineMVInfo tmp;
4586
4587
0
  int       iNumPredDir = slice.isInterP() ? 1 : 2;
4588
4589
0
  int mvNum = 2;
4590
0
  mvNum = cu.affineType ? 3 : 2;
4591
4592
  // Mvp
4593
0
  Mv        cMvPred[2][MAX_REF_PICS][3];
4594
0
  Mv        cMvPredBi[2][MAX_REF_PICS][3];
4595
0
  int       aaiMvpIdxBi[2][MAX_REF_PICS];
4596
0
  int       aaiMvpIdx[2][MAX_REF_PICS];
4597
0
  int       aaiMvpNum[2][MAX_REF_PICS];
4598
4599
0
  AffineAMVPInfo aacAffineAMVPInfo[2][MAX_REF_PICS];
4600
0
  AffineAMVPInfo affiAMVPInfoTemp[2];
4601
4602
0
  uint32_t      uiMbBits[3] = { 1, 1, 0 };
4603
0
  int           iRefIdx[2] = { 0,0 }; // If un-initialized, may cause SEGV in bi-directional prediction iterative stage.
4604
0
  int           iRefIdxBi[2];
4605
0
  int           iRefStart, iRefEnd;
4606
0
  int           bestBiPRefIdxL1 = 0;
4607
0
  int           bestBiPMvpL1 = 0;
4608
0
  Distortion    biPDistTemp = MAX_DISTORTION;
4609
4610
0
  Distortion    uiCost[2] = { MAX_DISTORTION, MAX_DISTORTION };
4611
0
  Distortion    uiCostBi = MAX_DISTORTION;
4612
0
  Distortion    uiCostTemp;
4613
4614
0
  uint32_t      uiBits[3] = { 0 };
4615
0
  uint32_t      uiBitsTemp;
4616
0
  Distortion    bestBiPDist = MAX_DISTORTION;
4617
4618
0
  Distortion    uiCostTempL0[MAX_NUM_REF];
4619
0
  for (int iNumRef = 0; iNumRef < MAX_NUM_REF; iNumRef++)
4620
0
  {
4621
0
    uiCostTempL0[iNumRef] = MAX_DISTORTION;
4622
0
  }
4623
0
  uint32_t      uiBitsTempL0[MAX_NUM_REF];
4624
4625
0
  Mv            mvValidList1[4];
4626
0
  int           refIdxValidList1 = 0;
4627
0
  uint32_t      bitsValidList1 = MAX_UINT;
4628
0
  Distortion    costValidList1 = MAX_DISTORTION;
4629
0
  Mv            mvHevc[3];
4630
0
  const bool    affineAmvrEnabled = false;
4631
4632
0
  xGetBlkBits(slice.isInterP(), puIdx, lastMode, uiMbBits);
4633
4634
0
  cu.affine = true;
4635
0
  cu.mergeFlag = false;
4636
0
  if (BcwIdx != BCW_DEFAULT)
4637
0
  {
4638
0
    cu.BcwIdx = BcwIdx;
4639
0
  }
4640
4641
  // Uni-directional prediction
4642
0
  for (int iRefList = 0; iRefList < iNumPredDir; iRefList++)
4643
0
  {
4644
0
    RefPicList  refPicList = (iRefList ? REF_PIC_LIST_1 : REF_PIC_LIST_0);
4645
0
    cu.interDir = (iRefList ? 2 : 1);
4646
0
    for (int iRefIdxTemp = 0; iRefIdxTemp < slice.numRefIdx[refPicList]; iRefIdxTemp++)
4647
0
    {
4648
      // Get RefIdx bits
4649
0
      uiBitsTemp = uiMbBits[iRefList];
4650
0
      if (slice.numRefIdx[refPicList] > 1)
4651
0
      {
4652
0
        uiBitsTemp += iRefIdxTemp + 1;
4653
0
        if (iRefIdxTemp == slice.numRefIdx[refPicList] - 1)
4654
0
        {
4655
0
          uiBitsTemp--;
4656
0
        }
4657
0
      }
4658
4659
      // Do Affine AMVP
4660
0
      bool foundPred = xEstimateAffineAMVP(cu, affiAMVPInfoTemp[refPicList], origBuf, refPicList, iRefIdxTemp, cMvPred[iRefList][iRefIdxTemp], biPDistTemp);
4661
0
      if( !foundPred )
4662
0
        return;
4663
4664
0
      if (affineAmvrEnabled)
4665
0
      {
4666
0
        biPDistTemp += m_pcRdCost->getCost(xCalcAffineMVBits(cu, cMvPred[iRefList][iRefIdxTemp], cMvPred[iRefList][iRefIdxTemp]));
4667
0
      }
4668
0
      aaiMvpIdx[iRefList][iRefIdxTemp] = cu.mvpIdx[refPicList];
4669
0
      aaiMvpNum[iRefList][iRefIdxTemp] = cu.mvpNum[refPicList];;
4670
0
      if (cu.affineType == AFFINEMODEL_6PARAM && refIdx4Para[iRefList] != iRefIdxTemp)
4671
0
      {
4672
0
        xCopyAffineAMVPInfo(affiAMVPInfoTemp[refPicList], aacAffineAMVPInfo[iRefList][iRefIdxTemp]);
4673
0
        continue;
4674
0
      }
4675
4676
      // set hevc ME result as start search position when it is best than mvp
4677
0
      for (int i = 0; i<3; i++)
4678
0
      {
4679
0
        mvHevc[i] = hevcMv[iRefList][iRefIdxTemp];
4680
0
        mvHevc[i].roundAffinePrecInternal2Amvr(cu.imv);
4681
0
      }
4682
0
      PelUnitBuf predBuf = m_tmpStorageLCU.getCompactBuf(cu);
4683
4684
0
      Distortion uiCandCost = xGetAffineTemplateCost(cu, origBuf, predBuf, mvHevc, aaiMvpIdx[iRefList][iRefIdxTemp],
4685
0
        AMVP_MAX_NUM_CANDS, refPicList, iRefIdxTemp);
4686
4687
0
      if (affineAmvrEnabled)
4688
0
      {
4689
0
        uiCandCost += m_pcRdCost->getCost(xCalcAffineMVBits(cu, mvHevc, cMvPred[iRefList][iRefIdxTemp]));
4690
0
      }
4691
4692
      //check stored affine motion
4693
0
      bool affine4Para = cu.affineType == AFFINEMODEL_4PARAM;
4694
0
      bool savedParaAvail = cu.imv && ((m_affineMotion.affine4ParaRefIdx[iRefList] == iRefIdxTemp && affine4Para && m_affineMotion.affine4ParaAvail) ||
4695
0
        (m_affineMotion.affine6ParaRefIdx[iRefList] == iRefIdxTemp && !affine4Para && m_affineMotion.affine6ParaAvail));
4696
4697
0
      if (savedParaAvail)
4698
0
      {
4699
0
        Mv mvFour[3];
4700
0
        for (int i = 0; i < mvNum; i++)
4701
0
        {
4702
0
          mvFour[i] = affine4Para ? m_affineMotion.acMvAffine4Para[iRefList][i] : m_affineMotion.acMvAffine6Para[iRefList][i];
4703
0
          mvFour[i].roundAffinePrecInternal2Amvr(cu.imv);
4704
0
        }
4705
4706
0
        Distortion candCostInherit = xGetAffineTemplateCost(cu, origBuf, predBuf, mvFour, aaiMvpIdx[iRefList][iRefIdxTemp], AMVP_MAX_NUM_CANDS, refPicList, iRefIdxTemp);
4707
0
        candCostInherit += m_pcRdCost->getCost(xCalcAffineMVBits(cu, mvFour, cMvPred[iRefList][iRefIdxTemp]));
4708
4709
0
        if (candCostInherit < uiCandCost)
4710
0
        {
4711
0
          uiCandCost = candCostInherit;
4712
0
          memcpy(mvHevc, mvFour, 3 * sizeof(Mv));
4713
0
        }
4714
0
      }
4715
4716
0
      if( cu.affineType == AFFINEMODEL_4PARAM && m_AffineProfList->m_affMVListSize && (!cu.cs->sps->BCW || BcwIdx == BCW_DEFAULT ) )
4717
0
      {
4718
0
        int shift = MAX_CU_DEPTH;
4719
0
        for (int i = 0; i < m_AffineProfList->m_affMVListSize; i++)
4720
0
        {
4721
0
          AffineMVInfo *mvInfo = m_AffineProfList->m_affMVList + ((m_AffineProfList->m_affMVListIdx - i - 1 + m_AffineProfList->m_affMVListMaxSize) % (m_AffineProfList->m_affMVListMaxSize));
4722
          //check;
4723
0
          int j = 0;
4724
0
          for (; j < i; j++)
4725
0
          {
4726
0
            AffineMVInfo *prevMvInfo = m_AffineProfList->m_affMVList + ((m_AffineProfList->m_affMVListIdx - j - 1 + m_AffineProfList->m_affMVListMaxSize) % (m_AffineProfList->m_affMVListMaxSize));
4727
0
            if ((mvInfo->affMVs[iRefList][iRefIdxTemp][0] == prevMvInfo->affMVs[iRefList][iRefIdxTemp][0]) &&
4728
0
              (mvInfo->affMVs[iRefList][iRefIdxTemp][1] == prevMvInfo->affMVs[iRefList][iRefIdxTemp][1])
4729
0
              && (mvInfo->x == prevMvInfo->x) && (mvInfo->y == prevMvInfo->y)
4730
0
              && (mvInfo->w == prevMvInfo->w)
4731
0
              )
4732
0
            {
4733
0
              break;
4734
0
            }
4735
0
          }
4736
0
          if (j < i)
4737
0
            continue;
4738
4739
0
          Mv mvTmp[3], *nbMv = mvInfo->affMVs[iRefList][iRefIdxTemp];
4740
0
          int vx, vy;
4741
0
          int dMvHorX, dMvHorY, dMvVerX, dMvVerY;
4742
0
          int mvScaleHor = nbMv[0].hor * (1<< shift);
4743
0
          int mvScaleVer = nbMv[0].ver * (1<< shift);
4744
0
          Mv dMv = nbMv[1] - nbMv[0];
4745
0
          dMvHorX = dMv.hor *(1<<(shift - Log2(mvInfo->w)));
4746
0
          dMvHorY = dMv.ver *(1<< (shift - Log2(mvInfo->w)));
4747
0
          dMvVerX = -dMvHorY;
4748
0
          dMvVerY = dMvHorX;
4749
0
          vx = mvScaleHor + dMvHorX * (cu.Y().x - mvInfo->x) + dMvVerX * (cu.Y().y - mvInfo->y);
4750
0
          vy = mvScaleVer + dMvHorY * (cu.Y().x - mvInfo->x) + dMvVerY * (cu.Y().y - mvInfo->y);
4751
0
          roundAffineMv(vx, vy, shift);
4752
0
          mvTmp[0] = Mv(vx, vy);
4753
0
          mvTmp[0].clipToStorageBitDepth();
4754
0
          clipMv(mvTmp[0], cu.lumaPos(), cu.lumaSize(), *cu.cs->pcv);
4755
0
          mvTmp[0].roundAffinePrecInternal2Amvr(cu.imv);
4756
0
          vx = mvScaleHor + dMvHorX * (cu.Y().x + cu.Y().width - mvInfo->x) + dMvVerX * (cu.Y().y - mvInfo->y);
4757
0
          vy = mvScaleVer + dMvHorY * (cu.Y().x + cu.Y().width - mvInfo->x) + dMvVerY * (cu.Y().y - mvInfo->y);
4758
0
          roundAffineMv(vx, vy, shift);
4759
0
          mvTmp[1] = Mv(vx, vy);
4760
0
          mvTmp[1].clipToStorageBitDepth();
4761
0
          clipMv(mvTmp[1], cu.lumaPos(), cu.lumaSize(), *cu.cs->pcv);
4762
0
          mvTmp[0].roundAffinePrecInternal2Amvr(cu.imv);
4763
0
          mvTmp[1].roundAffinePrecInternal2Amvr(cu.imv);
4764
0
          Distortion tmpCost = xGetAffineTemplateCost(cu, origBuf, predBuf, mvTmp, aaiMvpIdx[iRefList][iRefIdxTemp], AMVP_MAX_NUM_CANDS, refPicList, iRefIdxTemp);
4765
0
          if (affineAmvrEnabled)
4766
0
          {
4767
0
            tmpCost += m_pcRdCost->getCost(xCalcAffineMVBits(cu, mvTmp, cMvPred[iRefList][iRefIdxTemp]));
4768
0
          }
4769
0
          if (tmpCost < uiCandCost)
4770
0
          {
4771
0
            uiCandCost = tmpCost;
4772
0
            std::memcpy(mvHevc, mvTmp, 3 * sizeof(Mv));
4773
0
          }
4774
0
        }
4775
0
      }
4776
0
      if (cu.affineType == AFFINEMODEL_6PARAM)
4777
0
      {
4778
0
        Mv mvFour[3];
4779
0
        mvFour[0] = mvAffine4Para[iRefList][iRefIdxTemp][0];
4780
0
        mvFour[1] = mvAffine4Para[iRefList][iRefIdxTemp][1];
4781
0
        mvAffine4Para[iRefList][iRefIdxTemp][0].roundAffinePrecInternal2Amvr(cu.imv);
4782
0
        mvAffine4Para[iRefList][iRefIdxTemp][1].roundAffinePrecInternal2Amvr(cu.imv);
4783
4784
0
        int shift = MAX_CU_DEPTH;
4785
0
        int vx2 = (mvFour[0].hor * (1<< shift)) - ((mvFour[1].ver - mvFour[0].ver) * (1<< (shift + Log2(cu.lheight()) - Log2(cu.lwidth()))));
4786
0
        int vy2 = (mvFour[0].ver * (1<< shift)) + ((mvFour[1].hor - mvFour[0].hor) * (1<< (shift + Log2(cu.lheight()) - Log2(cu.lwidth()))));
4787
0
        int offset = (1 << (shift - 1));
4788
0
        vx2 = (vx2 + offset - (vx2 >= 0)) >> shift;
4789
0
        vy2 = (vy2 + offset - (vy2 >= 0)) >> shift;
4790
0
        mvFour[2].hor = vx2;
4791
0
        mvFour[2].ver = vy2;
4792
0
        mvFour[2].clipToStorageBitDepth();
4793
0
        mvFour[0].roundAffinePrecInternal2Amvr(cu.imv);
4794
0
        mvFour[1].roundAffinePrecInternal2Amvr(cu.imv);
4795
0
        mvFour[2].roundAffinePrecInternal2Amvr(cu.imv);
4796
0
        Distortion uiCandCostInherit = xGetAffineTemplateCost(cu, origBuf, predBuf, mvFour, aaiMvpIdx[iRefList][iRefIdxTemp], AMVP_MAX_NUM_CANDS, refPicList, iRefIdxTemp);
4797
0
        if (affineAmvrEnabled)
4798
0
        {
4799
0
          uiCandCostInherit += m_pcRdCost->getCost(xCalcAffineMVBits(cu, mvFour, cMvPred[iRefList][iRefIdxTemp]));
4800
0
        }
4801
0
        if (uiCandCostInherit < uiCandCost)
4802
0
        {
4803
0
          uiCandCost = uiCandCostInherit;
4804
0
          for (int i = 0; i < 3; i++)
4805
0
          {
4806
0
            mvHevc[i] = mvFour[i];
4807
0
          }
4808
0
        }
4809
0
      }
4810
4811
0
      if (uiCandCost < biPDistTemp)
4812
0
      {
4813
0
        ::memcpy(tmp.affMVs[iRefList][iRefIdxTemp], mvHevc, sizeof(Mv) * 3);
4814
0
      }
4815
0
      else
4816
0
      {
4817
0
        ::memcpy(tmp.affMVs[iRefList][iRefIdxTemp], cMvPred[iRefList][iRefIdxTemp], sizeof(Mv) * 3);
4818
0
      }
4819
4820
      // GPB list 1, save the best MvpIdx, RefIdx and Cost
4821
0
      if (slice.picHeader->mvdL1Zero && iRefList == 1 && biPDistTemp < bestBiPDist)
4822
0
      {
4823
0
        bestBiPDist = biPDistTemp;
4824
0
        bestBiPMvpL1 = aaiMvpIdx[iRefList][iRefIdxTemp];
4825
0
        bestBiPRefIdxL1 = iRefIdxTemp;
4826
0
      }
4827
4828
      // Update bits
4829
0
      uiBitsTemp += m_auiMVPIdxCost[aaiMvpIdx[iRefList][iRefIdxTemp]][AMVP_MAX_NUM_CANDS];
4830
4831
0
      if (m_pcEncCfg->m_bFastMEForGenBLowDelayEnabled && iRefList == 1)   // list 1
4832
0
      {
4833
0
        if (slice.list1IdxToList0Idx[iRefIdxTemp] >= 0 && (cu.affineType != AFFINEMODEL_6PARAM || slice.list1IdxToList0Idx[iRefIdxTemp] == refIdx4Para[0]))
4834
0
        {
4835
0
          int iList1ToList0Idx = slice.list1IdxToList0Idx[iRefIdxTemp];
4836
0
          ::memcpy(tmp.affMVs[1][iRefIdxTemp], tmp.affMVs[0][iList1ToList0Idx], sizeof(Mv) * 3);
4837
0
          uiCostTemp = uiCostTempL0[iList1ToList0Idx];
4838
4839
0
          uiCostTemp -= m_pcRdCost->getCost(uiBitsTempL0[iList1ToList0Idx]);
4840
0
          uiBitsTemp += xCalcAffineMVBits(cu, tmp.affMVs[iRefList][iRefIdxTemp], cMvPred[iRefList][iRefIdxTemp]);
4841
          /*calculate the correct cost*/
4842
0
          uiCostTemp += m_pcRdCost->getCost(uiBitsTemp);
4843
0
          DTRACE(g_trace_ctx, D_COMMON, " (%d) uiCostTemp=%d\n", DTRACE_GET_COUNTER(g_trace_ctx, D_COMMON), uiCostTemp);
4844
0
        }
4845
0
        else
4846
0
        {
4847
0
          xAffineMotionEstimation(cu, origBuf, refPicList, cMvPred[iRefList][iRefIdxTemp], iRefIdxTemp, tmp.affMVs[iRefList][iRefIdxTemp], 
4848
0
                                  uiBitsTemp, uiCostTemp, aaiMvpIdx[iRefList][iRefIdxTemp], affiAMVPInfoTemp[refPicList]);
4849
0
        }
4850
0
      }
4851
0
      else
4852
0
      {
4853
0
        xAffineMotionEstimation(cu, origBuf, refPicList, cMvPred[iRefList][iRefIdxTemp], iRefIdxTemp, tmp.affMVs[iRefList][iRefIdxTemp], 
4854
0
                                uiBitsTemp, uiCostTemp, aaiMvpIdx[iRefList][iRefIdxTemp], affiAMVPInfoTemp[refPicList]);
4855
0
      }
4856
      
4857
0
      if( slice.sps->BCW && cu.BcwIdx == BCW_DEFAULT && slice.isInterB() )
4858
0
      {
4859
0
        m_uniMotions.setReadModeAffine( true, (uint8_t)iRefList, (uint8_t)iRefIdxTemp, cu.affineType );
4860
0
        m_uniMotions.copyAffineMvFrom( tmp.affMVs[iRefList][iRefIdxTemp], uiCostTemp - m_pcRdCost->getCost(uiBitsTemp), (uint8_t)iRefList, (uint8_t)iRefIdxTemp, cu.affineType,
4861
0
                                       aaiMvpIdx[iRefList][iRefIdxTemp] );
4862
0
      }
4863
4864
      // Set best AMVP Index
4865
0
      xCopyAffineAMVPInfo(affiAMVPInfoTemp[refPicList], aacAffineAMVPInfo[iRefList][iRefIdxTemp]);
4866
0
      if (cu.imv != 2)//|| !m_pcEncCfg->getUseAffineAmvrEncOpt())
4867
0
        xCheckBestAffineMVP(cu, affiAMVPInfoTemp[refPicList], refPicList, tmp.affMVs[iRefList][iRefIdxTemp], cMvPred[iRefList][iRefIdxTemp], aaiMvpIdx[iRefList][iRefIdxTemp], uiBitsTemp, uiCostTemp);
4868
4869
0
      if (iRefList == 0)
4870
0
      {
4871
0
        uiCostTempL0[iRefIdxTemp] = uiCostTemp;
4872
0
        uiBitsTempL0[iRefIdxTemp] = uiBitsTemp;
4873
0
      }
4874
0
      DTRACE(g_trace_ctx, D_COMMON, " (%d) uiCostTemp=%d, uiCost[iRefList]=%d\n", DTRACE_GET_COUNTER(g_trace_ctx, D_COMMON), uiCostTemp, uiCost[iRefList]);
4875
0
      if (uiCostTemp < uiCost[iRefList])
4876
0
      {
4877
0
        uiCost[iRefList] = uiCostTemp;
4878
0
        uiBits[iRefList] = uiBitsTemp; // storing for bi-prediction
4879
4880
                                       // set best motion
4881
0
        ::memcpy(aacMv[iRefList], tmp.affMVs[iRefList][iRefIdxTemp], sizeof(Mv) * 3);
4882
0
        iRefIdx[iRefList] = iRefIdxTemp;
4883
0
      }
4884
4885
0
      if (iRefList == 1 && uiCostTemp < costValidList1 && slice.list1IdxToList0Idx[iRefIdxTemp] < 0)
4886
0
      {
4887
0
        costValidList1 = uiCostTemp;
4888
0
        bitsValidList1 = uiBitsTemp;
4889
4890
        // set motion
4891
0
        memcpy(mvValidList1, tmp.affMVs[iRefList][iRefIdxTemp], sizeof(Mv) * 3);
4892
0
        refIdxValidList1 = iRefIdxTemp;
4893
0
      }
4894
0
    } // End refIdx loop
4895
0
  } // end Uni-prediction
4896
4897
0
  if (cu.affineType == AFFINEMODEL_4PARAM)
4898
0
  {
4899
0
    ::memcpy(mvAffine4Para, tmp.affMVs, sizeof(tmp.affMVs));
4900
0
    if (cu.imv == IMV_OFF)
4901
0
    {
4902
0
      m_AffineProfList->insert( tmp, cu.Y());
4903
0
    }
4904
0
  }
4905
4906
  // Bi-directional prediction
4907
0
  if (slice.isInterB() && !CU::isBipredRestriction(cu))
4908
0
  {
4909
0
    cu.interDir = 3;
4910
0
    m_isBi = true;
4911
4912
    // Set as best list0 and list1
4913
0
    iRefIdxBi[0] = iRefIdx[0];
4914
0
    iRefIdxBi[1] = iRefIdx[1];
4915
4916
0
    ::memcpy(cMvBi, aacMv, sizeof(aacMv));
4917
0
    ::memcpy(cMvPredBi, cMvPred, sizeof(cMvPred));
4918
0
    ::memcpy(aaiMvpIdxBi, aaiMvpIdx, sizeof(aaiMvpIdx));
4919
4920
0
    uint32_t uiMotBits[2];
4921
0
    bool doBiPred = true;
4922
4923
0
    if (slice.picHeader->mvdL1Zero) // GPB, list 1 only use Mvp
4924
0
    {
4925
0
      xCopyAffineAMVPInfo(aacAffineAMVPInfo[1][bestBiPRefIdxL1], affiAMVPInfoTemp[REF_PIC_LIST_1]);
4926
0
      cu.mvpIdx[REF_PIC_LIST_1] = bestBiPMvpL1;
4927
0
      aaiMvpIdxBi[1][bestBiPRefIdxL1] = bestBiPMvpL1;
4928
4929
      // Set Mv for list1
4930
0
      Mv pcMvTemp[3] = { affiAMVPInfoTemp[REF_PIC_LIST_1].mvCandLT[bestBiPMvpL1],
4931
0
                         affiAMVPInfoTemp[REF_PIC_LIST_1].mvCandRT[bestBiPMvpL1],
4932
0
                         affiAMVPInfoTemp[REF_PIC_LIST_1].mvCandLB[bestBiPMvpL1] };
4933
0
      ::memcpy(cMvPredBi[1][bestBiPRefIdxL1], pcMvTemp, sizeof(Mv) * 3);
4934
0
      ::memcpy(cMvBi[1], pcMvTemp, sizeof(Mv) * 3);
4935
0
      ::memcpy(tmp.affMVs[1][bestBiPRefIdxL1], pcMvTemp, sizeof(Mv) * 3);
4936
0
      iRefIdxBi[1] = bestBiPRefIdxL1;
4937
4938
0
      if( m_pcEncCfg->m_ifpLines && !xIsAffineMvInRangeFPP( cu, pcMvTemp, m_pcEncCfg->m_ifpLines ) )
4939
0
      {
4940
        // this mvp cannot be used for mv, skip Bi-pred
4941
0
        uiCostBi = MAX_DISTORTION;
4942
0
        doBiPred = false;
4943
0
      }
4944
0
      else
4945
0
      {
4946
4947
        // Get list1 prediction block
4948
0
        CU::setAllAffineMv(cu, cMvBi[1][0], cMvBi[1][1], cMvBi[1][2], REF_PIC_LIST_1);
4949
0
        cu.refIdx[REF_PIC_LIST_1] = iRefIdxBi[1];
4950
4951
0
        PelUnitBuf predBufTmp = m_tmpPredStorage[REF_PIC_LIST_1].getCompactBuf( cu );
4952
0
        motionCompensation(cu, predBufTmp, REF_PIC_LIST_1);
4953
4954
        // Update bits
4955
0
        uiMotBits[0] = uiBits[0] - uiMbBits[0];
4956
0
        uiMotBits[1] = uiMbBits[1];
4957
4958
0
        if (slice.numRefIdx[REF_PIC_LIST_1] > 1)
4959
0
        {
4960
0
          uiMotBits[1] += bestBiPRefIdxL1 + 1;
4961
0
          if (bestBiPRefIdxL1 == slice.numRefIdx[REF_PIC_LIST_1] - 1)
4962
0
          {
4963
0
            uiMotBits[1]--;
4964
0
          }
4965
0
        }
4966
0
        uiMotBits[1] += m_auiMVPIdxCost[aaiMvpIdxBi[1][bestBiPRefIdxL1]][AMVP_MAX_NUM_CANDS];
4967
0
        uiBits[2] = uiMbBits[2] + uiMotBits[0] + uiMotBits[1];
4968
0
      }
4969
0
    }
4970
0
    else
4971
0
    {
4972
0
      uiMotBits[0] = uiBits[0] - uiMbBits[0];
4973
0
      uiMotBits[1] = uiBits[1] - uiMbBits[1];
4974
0
      uiBits[2] = uiMbBits[2] + uiMotBits[0] + uiMotBits[1];
4975
0
    }
4976
4977
0
    if (doBiPred)
4978
0
    {
4979
      // 4-times iteration (default)
4980
0
      int iNumIter = 4;
4981
      // fast encoder setting or GPB: only one iteration
4982
0
      if (m_pcEncCfg->m_fastInterSearchMode == VVENC_FASTINTERSEARCH_MODE3 || m_pcEncCfg->m_fastInterSearchMode == VVENC_FASTINTERSEARCH_MODE2 || slice.picHeader->mvdL1Zero)
4983
0
      {
4984
0
        iNumIter = 1;
4985
0
      }
4986
4987
0
      for (int iIter = 0; iIter < iNumIter; iIter++)
4988
0
      {
4989
        // Set RefList
4990
0
        int iRefList = iIter % 2;
4991
0
        if (m_pcEncCfg->m_fastInterSearchMode == VVENC_FASTINTERSEARCH_MODE3 || m_pcEncCfg->m_fastInterSearchMode == VVENC_FASTINTERSEARCH_MODE2)
4992
0
        {
4993
0
          if (uiCost[0] <= uiCost[1])
4994
0
          {
4995
0
            iRefList = 1;
4996
0
          }
4997
0
          else
4998
0
          {
4999
0
            iRefList = 0;
5000
0
          }
5001
0
        }
5002
0
        else if (iIter == 0)
5003
0
        {
5004
0
          iRefList = 0;
5005
0
        }
5006
5007
        // First iterate, get prediction block of opposite direction
5008
0
        if (iIter == 0 && !slice.picHeader->mvdL1Zero)
5009
0
        {
5010
0
          if( m_pcEncCfg->m_ifpLines && !xIsAffineMvInRangeFPP( cu, aacMv[1 - iRefList], m_pcEncCfg->m_ifpLines ) )
5011
0
          {
5012
0
            continue;
5013
0
          }
5014
5015
0
          CU::setAllAffineMv(cu, aacMv[1 - iRefList][0], aacMv[1 - iRefList][1], aacMv[1 - iRefList][2], RefPicList(1 - iRefList));
5016
0
          cu.refIdx[1 - iRefList] = iRefIdx[1 - iRefList];
5017
5018
0
          PelUnitBuf predBufTmp = m_tmpPredStorage[1 - iRefList].getCompactBuf( cu );
5019
0
          motionCompensation(cu, predBufTmp, RefPicList(1 - iRefList));
5020
0
        }
5021
5022
0
        RefPicList refPicList = (iRefList ? REF_PIC_LIST_1 : REF_PIC_LIST_0);
5023
5024
0
        if (slice.picHeader->mvdL1Zero) // GPB, fix List 1, search List 0
5025
0
        {
5026
0
          iRefList = 0;
5027
0
          refPicList = REF_PIC_LIST_0;
5028
0
        }
5029
5030
0
        bool bChanged = false;
5031
5032
0
        iRefStart = 0;
5033
0
        iRefEnd = slice.numRefIdx[refPicList] - 1;
5034
0
        for (int iRefIdxTemp = iRefStart; iRefIdxTemp <= iRefEnd; iRefIdxTemp++)
5035
0
        {
5036
0
          if (cu.affineType == AFFINEMODEL_6PARAM && refIdx4Para[iRefList] != iRefIdxTemp)
5037
0
          {
5038
0
            continue;
5039
0
          }
5040
          // update bits
5041
0
          uiBitsTemp = uiMbBits[2] + uiMotBits[1 - iRefList];
5042
0
          uiBitsTemp += ( (cu.slice->sps->BCW == true) ? BcwIdxBits : 0 );
5043
0
          if (slice.numRefIdx[refPicList] > 1)
5044
0
          {
5045
0
            uiBitsTemp += iRefIdxTemp + 1;
5046
0
            if (iRefIdxTemp == slice.numRefIdx[refPicList] - 1)
5047
0
            {
5048
0
              uiBitsTemp--;
5049
0
            }
5050
0
          }
5051
0
          uiBitsTemp += m_auiMVPIdxCost[aaiMvpIdxBi[iRefList][iRefIdxTemp]][AMVP_MAX_NUM_CANDS];
5052
5053
          // call Affine ME
5054
0
          xAffineMotionEstimation(cu, origBuf, refPicList, cMvPredBi[iRefList][iRefIdxTemp], iRefIdxTemp, tmp.affMVs[iRefList][iRefIdxTemp], 
5055
0
                                  uiBitsTemp, uiCostTemp, aaiMvpIdxBi[iRefList][iRefIdxTemp], aacAffineAMVPInfo[iRefList][iRefIdxTemp], true);
5056
0
          xCopyAffineAMVPInfo(aacAffineAMVPInfo[iRefList][iRefIdxTemp], affiAMVPInfoTemp[refPicList]);
5057
0
          if (cu.imv != 2)
5058
0
          {
5059
0
            xCheckBestAffineMVP(cu, affiAMVPInfoTemp[refPicList], refPicList, tmp.affMVs[iRefList][iRefIdxTemp], cMvPredBi[iRefList][iRefIdxTemp], aaiMvpIdxBi[iRefList][iRefIdxTemp], uiBitsTemp, uiCostTemp);
5060
0
          }
5061
5062
0
          if (uiCostTemp < uiCostBi)
5063
0
          {
5064
0
            bChanged = true;
5065
0
            ::memcpy(cMvBi[iRefList], tmp.affMVs[iRefList][iRefIdxTemp], sizeof(Mv) * 3);
5066
0
            iRefIdxBi[iRefList] = iRefIdxTemp;
5067
5068
0
            uiCostBi = uiCostTemp;
5069
0
            uiMotBits[iRefList] = uiBitsTemp - uiMbBits[2] - uiMotBits[1 - iRefList];
5070
0
            uiMotBits[iRefList] -= ( (cu.slice->sps->BCW == true) ? BcwIdxBits : 0 );
5071
0
            uiBits[2] = uiBitsTemp;
5072
5073
0
            if (iNumIter != 1) // MC for next iter
5074
0
            {
5075
              //  Set motion
5076
0
              CU::setAllAffineMv(cu, cMvBi[iRefList][0], cMvBi[iRefList][1], cMvBi[iRefList][2], refPicList);
5077
0
              cu.refIdx[refPicList] = iRefIdxBi[refPicList];
5078
0
              PelUnitBuf predBufTmp = m_tmpPredStorage[iRefList].getCompactBuf( cu );
5079
0
              motionCompensation(cu, predBufTmp, refPicList);
5080
0
            }
5081
0
          }
5082
0
        } // for loop-iRefIdxTemp
5083
5084
0
        if (!bChanged)
5085
0
        {
5086
0
          if ((uiCostBi <= uiCost[0] && uiCostBi <= uiCost[1]) || enforceBcwPred)
5087
0
          {
5088
0
            xCopyAffineAMVPInfo(aacAffineAMVPInfo[0][iRefIdxBi[0]], affiAMVPInfoTemp[REF_PIC_LIST_0]);
5089
0
            xCheckBestAffineMVP(cu, affiAMVPInfoTemp[REF_PIC_LIST_0], REF_PIC_LIST_0, cMvBi[0], cMvPredBi[0][iRefIdxBi[0]], aaiMvpIdxBi[0][iRefIdxBi[0]], uiBits[2], uiCostBi);
5090
5091
0
            if (!slice.picHeader->mvdL1Zero)
5092
0
            {
5093
0
              xCopyAffineAMVPInfo(aacAffineAMVPInfo[1][iRefIdxBi[1]], affiAMVPInfoTemp[REF_PIC_LIST_1]);
5094
0
              xCheckBestAffineMVP(cu, affiAMVPInfoTemp[REF_PIC_LIST_1], REF_PIC_LIST_1, cMvBi[1], cMvPredBi[1][iRefIdxBi[1]], aaiMvpIdxBi[1][iRefIdxBi[1]], uiBits[2], uiCostBi);
5095
0
            }
5096
0
          }
5097
0
          break;
5098
0
        }
5099
0
      } // for loop-iter
5100
0
    }
5101
0
    m_isBi = false;
5102
0
  } // if (B_SLICE)
5103
5104
0
  cu.mv [REF_PIC_LIST_0][0] = Mv();
5105
0
  cu.mv [REF_PIC_LIST_1][0] = Mv();
5106
0
  cu.mvd[REF_PIC_LIST_0][0] = cMvZero;
5107
0
  cu.mvd[REF_PIC_LIST_1][0] = cMvZero;
5108
0
  cu.refIdx[REF_PIC_LIST_0] = NOT_VALID;
5109
0
  cu.refIdx[REF_PIC_LIST_1] = NOT_VALID;
5110
0
  cu.mvpIdx[REF_PIC_LIST_0] = NOT_VALID;
5111
0
  cu.mvpIdx[REF_PIC_LIST_1] = NOT_VALID;
5112
0
  cu.mvpNum[REF_PIC_LIST_0] = NOT_VALID;
5113
0
  cu.mvpNum[REF_PIC_LIST_1] = NOT_VALID;
5114
5115
0
  for (int verIdx = 0; verIdx < 3; verIdx++)
5116
0
  {
5117
0
    cu.mvd[REF_PIC_LIST_0][verIdx] = cMvZero;
5118
0
    cu.mvd[REF_PIC_LIST_1][verIdx] = cMvZero;
5119
0
  }
5120
5121
  // Set Motion Field
5122
0
  memcpy(aacMv[1], mvValidList1, sizeof(Mv) * 3);
5123
0
  iRefIdx[1] = refIdxValidList1;
5124
0
  uiBits[1] = bitsValidList1;
5125
0
  uiCost[1] = costValidList1;
5126
5127
0
  if (enforceBcwPred)
5128
0
  {
5129
0
    uiCost[0] = uiCost[1] = MAX_UINT;
5130
0
  }
5131
5132
  // Affine ME result set
5133
0
  if (uiCostBi <= uiCost[0] && uiCostBi <= uiCost[1]) // Bi
5134
0
  {
5135
0
    lastMode = 2;
5136
0
    affineCost = uiCostBi;
5137
0
    cu.interDir = 3;
5138
0
    CU::setAllAffineMv(cu, cMvBi[0][0], cMvBi[0][1], cMvBi[0][2], REF_PIC_LIST_0);
5139
0
    CU::setAllAffineMv(cu, cMvBi[1][0], cMvBi[1][1], cMvBi[1][2], REF_PIC_LIST_1);
5140
0
    cu.refIdx[REF_PIC_LIST_0] = iRefIdxBi[0];
5141
0
    cu.refIdx[REF_PIC_LIST_1] = iRefIdxBi[1];
5142
5143
0
    for (int verIdx = 0; verIdx < mvNum; verIdx++)
5144
0
    {
5145
0
      cu.mvd[REF_PIC_LIST_0][verIdx] = cMvBi[0][verIdx] - cMvPredBi[0][iRefIdxBi[0]][verIdx];
5146
0
      cu.mvd[REF_PIC_LIST_1][verIdx] = cMvBi[1][verIdx] - cMvPredBi[1][iRefIdxBi[1]][verIdx];
5147
0
      if (verIdx != 0)
5148
0
      {
5149
0
        cu.mvd[0][verIdx] = cu.mvd[0][verIdx] - cu.mvd[0][0];
5150
0
        cu.mvd[1][verIdx] = cu.mvd[1][verIdx] - cu.mvd[1][0];
5151
0
      }
5152
0
    }
5153
5154
5155
0
    cu.mvpIdx[REF_PIC_LIST_0] = aaiMvpIdxBi[0][iRefIdxBi[0]];
5156
0
    cu.mvpNum[REF_PIC_LIST_0] = aaiMvpNum[0][iRefIdxBi[0]];
5157
0
    cu.mvpIdx[REF_PIC_LIST_1] = aaiMvpIdxBi[1][iRefIdxBi[1]];
5158
0
    cu.mvpNum[REF_PIC_LIST_1] = aaiMvpNum[1][iRefIdxBi[1]];
5159
0
  }
5160
0
  else if (uiCost[0] <= uiCost[1]) // List 0
5161
0
  {
5162
0
    lastMode = 0;
5163
0
    affineCost = uiCost[0];
5164
0
    cu.interDir = 1;
5165
0
    CU::setAllAffineMv(cu, aacMv[0][0], aacMv[0][1], aacMv[0][2], REF_PIC_LIST_0);
5166
0
    cu.refIdx[REF_PIC_LIST_0] = iRefIdx[0];
5167
5168
0
    for (int verIdx = 0; verIdx < mvNum; verIdx++)
5169
0
    {
5170
0
      cu.mvd[REF_PIC_LIST_0][verIdx] = aacMv[0][verIdx] - cMvPred[0][iRefIdx[0]][verIdx];
5171
0
      if (verIdx != 0)
5172
0
      {
5173
0
        cu.mvd[0][verIdx] = cu.mvd[0][verIdx] - cu.mvd[0][0];
5174
0
      }
5175
0
    }
5176
5177
0
    cu.mvpIdx[REF_PIC_LIST_0] = aaiMvpIdx[0][iRefIdx[0]];
5178
0
    cu.mvpNum[REF_PIC_LIST_0] = aaiMvpNum[0][iRefIdx[0]];
5179
0
  }
5180
0
  else
5181
0
  {
5182
0
    lastMode = 1;
5183
0
    affineCost = uiCost[1];
5184
0
    cu.interDir = 2;
5185
0
    CU::setAllAffineMv(cu, aacMv[1][0], aacMv[1][1], aacMv[1][2], REF_PIC_LIST_1);
5186
0
    cu.refIdx[REF_PIC_LIST_1] = iRefIdx[1];
5187
5188
0
    for (int verIdx = 0; verIdx < mvNum; verIdx++)
5189
0
    {
5190
0
      cu.mvd[REF_PIC_LIST_1][verIdx] = aacMv[1][verIdx] - cMvPred[1][iRefIdx[1]][verIdx];
5191
0
      if (verIdx != 0)
5192
0
      {
5193
0
        cu.mvd[1][verIdx] = cu.mvd[1][verIdx] - cu.mvd[1][0];
5194
0
      }
5195
0
    }
5196
5197
0
    cu.mvpIdx[REF_PIC_LIST_1] = aaiMvpIdx[1][iRefIdx[1]];
5198
0
    cu.mvpNum[REF_PIC_LIST_1] = aaiMvpNum[1][iRefIdx[1]];
5199
0
  }
5200
0
  if (BcwIdx != BCW_DEFAULT)
5201
0
  {
5202
0
    cu.BcwIdx = BCW_DEFAULT;
5203
0
  }
5204
0
}
5205
5206
Distortion InterSearch::xGetAffineTemplateCost(CodingUnit& cu, CPelUnitBuf& origBuf, PelUnitBuf& predBuf, Mv acMvCand[3], int iMVPIdx, int iMVPNum, RefPicList refPicList, int iRefIdx)
5207
0
{
5208
0
  Distortion uiCost = MAX_DISTORTION;
5209
5210
0
  const Picture* picRef = cu.slice->getRefPic(refPicList, iRefIdx);
5211
5212
  // prediction pattern
5213
0
  Mv mv[3];
5214
0
  memcpy(mv, acMvCand, sizeof(mv));
5215
5216
0
  if( m_pcEncCfg->m_ifpLines && !xIsAffineMvInRangeFPP( cu, mv, m_pcEncCfg->m_ifpLines ) )
5217
0
  {
5218
0
    return MAX_DISTORTION>>1;  
5219
0
  }
5220
5221
0
  xPredAffineBlk(COMP_Y, cu, picRef, mv, predBuf, false, cu.slice->clpRngs[COMP_Y], refPicList);
5222
5223
  // calc distortion
5224
0
  uiCost = m_pcRdCost->getDistPart(origBuf.Y(), predBuf.Y(), cu.cs->sps->bitDepths[CH_L], COMP_Y, DF_HAD );
5225
0
  uiCost += m_pcRdCost->getCost(m_auiMVPIdxCost[iMVPIdx][iMVPNum]);
5226
5227
0
  DTRACE(g_trace_ctx, D_COMMON, " (%d) affineTemplateCost=%d\n", DTRACE_GET_COUNTER(g_trace_ctx, D_COMMON), uiCost);
5228
0
  return uiCost;
5229
0
}
5230
5231
void solveEqual(double** dEqualCoeff, int iOrder, double* dAffinePara)
5232
0
{
5233
0
  for (int k = 0; k < iOrder; k++)
5234
0
  {
5235
0
    dAffinePara[k] = 0.;
5236
0
  }
5237
5238
  // row echelon
5239
0
  for (int i = 1; i < iOrder; i++)
5240
0
  {
5241
    // find column max
5242
0
    double temp = fabs(dEqualCoeff[i][i - 1]);
5243
0
    int tempIdx = i;
5244
0
    for (int j = i + 1; j < iOrder + 1; j++)
5245
0
    {
5246
0
      if (fabs(dEqualCoeff[j][i - 1]) > temp)
5247
0
      {
5248
0
        temp = fabs(dEqualCoeff[j][i - 1]);
5249
0
        tempIdx = j;
5250
0
      }
5251
0
    }
5252
5253
    // swap line
5254
0
    if (tempIdx != i)
5255
0
    {
5256
0
      for (int j = 0; j < iOrder + 1; j++)
5257
0
      {
5258
0
        dEqualCoeff[0][j] = dEqualCoeff[i][j];
5259
0
        dEqualCoeff[i][j] = dEqualCoeff[tempIdx][j];
5260
0
        dEqualCoeff[tempIdx][j] = dEqualCoeff[0][j];
5261
0
      }
5262
0
    }
5263
5264
    // elimination first column
5265
0
    if (dEqualCoeff[i][i - 1] == 0.)
5266
0
    {
5267
0
      return;
5268
0
    }
5269
0
    for (int j = i + 1; j < iOrder + 1; j++)
5270
0
    {
5271
0
      for (int k = i; k < iOrder + 1; k++)
5272
0
      {
5273
0
        dEqualCoeff[j][k] = dEqualCoeff[j][k] - dEqualCoeff[i][k] * dEqualCoeff[j][i - 1] / dEqualCoeff[i][i - 1];
5274
0
      }
5275
0
    }
5276
0
  }
5277
5278
0
  if (dEqualCoeff[iOrder][iOrder - 1] == 0.)
5279
0
  {
5280
0
    return;
5281
0
  }
5282
0
  dAffinePara[iOrder - 1] = dEqualCoeff[iOrder][iOrder] / dEqualCoeff[iOrder][iOrder - 1];
5283
0
  for (int i = iOrder - 2; i >= 0; i--)
5284
0
  {
5285
0
    if (dEqualCoeff[i + 1][i] == 0.)
5286
0
    {
5287
0
      for (int k = 0; k < iOrder; k++)
5288
0
      {
5289
0
        dAffinePara[k] = 0.;
5290
0
      }
5291
0
      return;
5292
0
    }
5293
0
    double temp = 0;
5294
0
    for (int j = i + 1; j < iOrder; j++)
5295
0
    {
5296
0
      temp += dEqualCoeff[i + 1][j] * dAffinePara[j];
5297
0
    }
5298
0
    dAffinePara[i] = (dEqualCoeff[i + 1][iOrder] - temp) / dEqualCoeff[i + 1][i];
5299
0
  }
5300
0
}
5301
5302
void InterSearch::xCheckBestAffineMVP(CodingUnit& cu, AffineAMVPInfo &affineAMVPInfo, RefPicList refPicList, Mv acMv[3], Mv acMvPred[3], int& riMVPIdx, uint32_t& ruiBits, Distortion& ruiCost)
5303
0
{
5304
0
  if (affineAMVPInfo.numCand < 2)
5305
0
  {
5306
0
    return;
5307
0
  }
5308
5309
0
  int mvNum = cu.affineType ? 3 : 2;
5310
5311
0
  m_pcRdCost->selectMotionLambda();
5312
0
  m_pcRdCost->setCostScale(0);
5313
5314
0
  int iBestMVPIdx = riMVPIdx;
5315
5316
  // Get origin MV bits
5317
0
  Mv tmpPredMv[3];
5318
0
  int iOrgMvBits = xCalcAffineMVBits(cu, acMv, acMvPred);
5319
0
  iOrgMvBits += m_auiMVPIdxCost[riMVPIdx][AMVP_MAX_NUM_CANDS];
5320
5321
0
  int iBestMvBits = iOrgMvBits;
5322
0
  for (int iMVPIdx = 0; iMVPIdx < affineAMVPInfo.numCand; iMVPIdx++)
5323
0
  {
5324
0
    if (iMVPIdx == riMVPIdx)
5325
0
    {
5326
0
      continue;
5327
0
    }
5328
0
    tmpPredMv[0] = affineAMVPInfo.mvCandLT[iMVPIdx];
5329
0
    tmpPredMv[1] = affineAMVPInfo.mvCandRT[iMVPIdx];
5330
0
    if (mvNum == 3)
5331
0
    {
5332
0
      tmpPredMv[2] = affineAMVPInfo.mvCandLB[iMVPIdx];
5333
0
    }
5334
0
    int iMvBits = xCalcAffineMVBits(cu, acMv, tmpPredMv);
5335
0
    iMvBits += m_auiMVPIdxCost[iMVPIdx][AMVP_MAX_NUM_CANDS];
5336
5337
0
    if (iMvBits < iBestMvBits)
5338
0
    {
5339
0
      iBestMvBits = iMvBits;
5340
0
      iBestMVPIdx = iMVPIdx;
5341
0
    }
5342
0
  }
5343
5344
0
  if (iBestMVPIdx != riMVPIdx)  // if changed
5345
0
  {
5346
0
    acMvPred[0] = affineAMVPInfo.mvCandLT[iBestMVPIdx];
5347
0
    acMvPred[1] = affineAMVPInfo.mvCandRT[iBestMVPIdx];
5348
0
    acMvPred[2] = affineAMVPInfo.mvCandLB[iBestMVPIdx];
5349
0
    riMVPIdx = iBestMVPIdx;
5350
0
    uint32_t uiOrgBits = ruiBits;
5351
0
    ruiBits = uiOrgBits - iOrgMvBits + iBestMvBits;
5352
0
    ruiCost = (ruiCost - m_pcRdCost->getCost(uiOrgBits)) + m_pcRdCost->getCost(ruiBits);
5353
0
  }
5354
0
}
5355
5356
void InterSearch::xAffineMotionEstimation(CodingUnit& cu,
5357
  CPelUnitBuf&    origBuf,
5358
  RefPicList      refPicList,
5359
  Mv              acMvPred[3],
5360
  int             iRefIdxPred,
5361
  Mv              acMv[3],
5362
  uint32_t&       ruiBits,
5363
  Distortion&     ruiCost,
5364
  int&            mvpIdx,
5365
  const AffineAMVPInfo& aamvpi,
5366
  bool            bBi)
5367
0
{
5368
0
  if( cu.cs->sps->BCW && cu.BcwIdx != BCW_DEFAULT && !bBi && xReadBufferedAffineUniMv( cu, refPicList, iRefIdxPred, acMvPred, acMv, ruiBits, ruiCost, mvpIdx, aamvpi ) )
5369
0
  {
5370
0
    return;
5371
0
  }
5372
5373
0
  int bestMvpIdx = mvpIdx;
5374
0
  const int width = cu.Y().width;
5375
0
  const int height = cu.Y().height;
5376
5377
0
  const Picture* refPic = cu.slice->getRefPic(refPicList, iRefIdxPred);
5378
5379
  // Set Origin YUV: pcYuv
5380
0
  CPelUnitBuf*   pBuf = &origBuf;
5381
0
  double        fWeight = 1.0;
5382
5383
0
  CPelUnitBuf  origBufTmpCnst;
5384
5385
  // if Bi, set to ( 2 * Org - ListX )
5386
0
  if (bBi)
5387
0
  {
5388
0
    PelUnitBuf  origBufTmp = m_tmpStorageLCU.getCompactBuf(cu);
5389
    // NOTE: Other buf contains predicted signal from another direction
5390
0
    PelUnitBuf otherBuf = m_tmpPredStorage[1 - (int)refPicList].getCompactBuf( cu );
5391
0
    origBufTmp.copyFrom(origBuf);
5392
0
    origBufTmp.removeHighFreq(otherBuf, m_pcEncCfg->m_bClipForBiPredMeEnabled, cu.slice->clpRngs);
5393
5394
0
    origBufTmpCnst = origBufTmp;
5395
0
    pBuf           = &origBufTmpCnst;
5396
0
    fWeight        = xGetMEDistortionWeight(cu.BcwIdx, refPicList);
5397
0
  }
5398
5399
  // pred YUV
5400
0
  PelUnitBuf  predBuf = m_tmpAffiStorage.getCompactBuf(cu);
5401
5402
  // Set start Mv position, use input mv as started search mv
5403
0
  Mv acMvTemp[3];
5404
0
  ::memcpy(acMvTemp, acMv, sizeof(Mv) * 3);
5405
  // Set delta mv
5406
  // malloc buffer
5407
0
  int iParaNum = cu.affineType ? 7 : 5;
5408
0
  int affineParaNum = iParaNum - 1;
5409
0
  int mvNum = cu.affineType ? 3 : 2;
5410
0
  double **pdEqualCoeff;
5411
0
  pdEqualCoeff = new double *[iParaNum];
5412
0
  for (int i = 0; i < iParaNum; i++)
5413
0
  {
5414
0
    pdEqualCoeff[i] = new double[iParaNum];
5415
0
  }
5416
5417
0
  int64_t  i64EqualCoeff[7][7];
5418
0
  Pel    *piError = m_tmpAffiError;
5419
0
  Pel    *pdDerivate[2];
5420
0
  pdDerivate[0] = m_tmpAffiDeri[0];
5421
0
  pdDerivate[1] = m_tmpAffiDeri[1];
5422
5423
0
  Distortion uiCostBest = MAX_DISTORTION;
5424
0
  uint32_t uiBitsBest = 0;
5425
5426
  // do motion compensation with origin mv
5427
5428
0
  clipMv(acMvTemp[0], cu.lumaPos(), cu.lumaSize(), *cu.cs->pcv);
5429
0
  clipMv(acMvTemp[1], cu.lumaPos(), cu.lumaSize(), *cu.cs->pcv);
5430
0
  if (cu.affineType == AFFINEMODEL_6PARAM)
5431
0
  {
5432
0
    clipMv(acMvTemp[2], cu.lumaPos(), cu.lumaSize(), *cu.cs->pcv);
5433
0
  }
5434
5435
0
  acMvTemp[0].roundAffinePrecInternal2Amvr(cu.imv);
5436
0
  acMvTemp[1].roundAffinePrecInternal2Amvr(cu.imv);
5437
0
  if (cu.affineType == AFFINEMODEL_6PARAM)
5438
0
  {
5439
0
    acMvTemp[2].roundAffinePrecInternal2Amvr(cu.imv);
5440
0
  }
5441
0
  if( !m_pcEncCfg->m_ifpLines || xIsAffineMvInRangeFPP( cu, acMvTemp, m_pcEncCfg->m_ifpLines ) )
5442
0
  {
5443
0
    xPredAffineBlk(COMP_Y, cu, refPic, acMvTemp, predBuf, false, cu.cs->slice->clpRngs[COMP_Y], refPicList);
5444
5445
    // get error
5446
0
    uiCostBest = m_pcRdCost->getDistPart(predBuf.Y(), pBuf->Y(), cu.cs->sps->bitDepths[CH_L], COMP_Y, DF_HAD);
5447
5448
    // get cost with mv
5449
0
    m_pcRdCost->setCostScale(0);
5450
0
    uiBitsBest = ruiBits;
5451
0
    DTRACE(g_trace_ctx, D_COMMON, " (%d) xx uiBitsBest=%d\n", DTRACE_GET_COUNTER(g_trace_ctx, D_COMMON), uiBitsBest);
5452
0
    uiBitsBest += xCalcAffineMVBits(cu, acMvTemp, acMvPred);
5453
0
    DTRACE(g_trace_ctx, D_COMMON, " (%d) yy uiBitsBest=%d\n", DTRACE_GET_COUNTER(g_trace_ctx, D_COMMON), uiBitsBest);
5454
0
    uiCostBest = (Distortion)(floor(fWeight * (double)uiCostBest) + (double)m_pcRdCost->getCost(uiBitsBest));
5455
5456
0
    DTRACE(g_trace_ctx, D_COMMON, " (%d) uiBitsBest=%d, uiCostBest=%d\n", DTRACE_GET_COUNTER(g_trace_ctx, D_COMMON), uiBitsBest, uiCostBest);
5457
5458
0
    ::memcpy(acMv, acMvTemp, sizeof(Mv) * 3);
5459
0
  }
5460
0
  const int predBufStride = predBuf.Y().stride;
5461
0
  Mv prevIterMv[7][3];
5462
0
  int iIterTime;
5463
0
  if (cu.affineType == AFFINEMODEL_6PARAM)
5464
0
  {
5465
0
    iIterTime = bBi ? 3 : 4;
5466
0
  }
5467
0
  else
5468
0
  {
5469
0
    iIterTime = bBi ? 3 : 5;
5470
0
  }
5471
5472
0
  if (!cu.cs->sps->AffineType)// getUseAffineType())
5473
0
  {
5474
0
    iIterTime = bBi ? 5 : 7;
5475
0
  }
5476
5477
0
  for (int iter = 0; iter<iIterTime; iter++)    // iterate loop
5478
0
  {
5479
0
    memcpy(prevIterMv[iter], acMvTemp, sizeof(Mv) * 3);
5480
    /*********************************************************************************
5481
    *                         use gradient to update mv
5482
    *********************************************************************************/
5483
    // get Error Matrix
5484
0
    PelBuf( piError, width, height ).subtract( pBuf->Y(), predBuf.Y() );
5485
5486
    // sobel x direction
5487
    // -1 0 1
5488
    // -2 0 2
5489
    // -1 0 1
5490
0
    Pel* pPred = predBuf.Y().buf;
5491
0
    m_HorizontalSobelFilter(pPred, predBufStride, pdDerivate[0], width, width, height);
5492
5493
    // sobel y direction
5494
    // -1 -2 -1
5495
    //  0  0  0
5496
    //  1  2  1
5497
0
    m_VerticalSobelFilter(pPred, predBufStride, pdDerivate[1], width, width, height);
5498
5499
    // solve delta x and y
5500
0
    for (int row = 0; row < iParaNum; row++)
5501
0
    {
5502
0
      memset(&i64EqualCoeff[row][0], 0, iParaNum * sizeof(int64_t));
5503
0
    }
5504
5505
0
    m_EqualCoeffComputer[cu.affineType]( piError, width, pdDerivate, width, width, height, i64EqualCoeff );
5506
5507
0
    for (int row = 0; row < iParaNum; row++)
5508
0
    {
5509
0
      for (int i = 0; i < iParaNum; i++)
5510
0
      {
5511
0
        pdEqualCoeff[row][i] = (double)i64EqualCoeff[row][i];
5512
0
      }
5513
0
    }
5514
5515
0
    double dAffinePara[6];
5516
0
    double dDeltaMv[6];
5517
0
    Mv acDeltaMv[3];
5518
5519
0
    solveEqual(pdEqualCoeff, affineParaNum, dAffinePara);
5520
5521
    // convert to delta mv
5522
0
    dDeltaMv[0] = dAffinePara[0];
5523
0
    dDeltaMv[2] = dAffinePara[2];
5524
0
    const bool extParams = cu.affineType == AFFINEMODEL_6PARAM;
5525
0
    if (extParams)
5526
0
    {
5527
0
      dDeltaMv[1] = dAffinePara[1] * width + dAffinePara[0];
5528
0
      dDeltaMv[3] = dAffinePara[3] * width + dAffinePara[2];
5529
0
      dDeltaMv[4] = dAffinePara[4] * height + dAffinePara[0];
5530
0
      dDeltaMv[5] = dAffinePara[5] * height + dAffinePara[2];
5531
0
    }
5532
0
    else
5533
0
    {
5534
0
      dDeltaMv[1] = dAffinePara[1] * width + dAffinePara[0];
5535
0
      dDeltaMv[3] = -dAffinePara[3] * width + dAffinePara[2];
5536
0
    }
5537
5538
0
    const int normShiftTab[3] = { MV_PRECISION_QUARTER - MV_PRECISION_INT, MV_PRECISION_SIXTEENTH - MV_PRECISION_INT, MV_PRECISION_QUARTER - MV_PRECISION_INT };
5539
0
    const int stepShiftTab[3] = { MV_PRECISION_INTERNAL - MV_PRECISION_QUARTER, MV_PRECISION_INTERNAL - MV_PRECISION_SIXTEENTH, MV_PRECISION_INTERNAL - MV_PRECISION_QUARTER };
5540
0
    const int multiShift = 1 << normShiftTab[cu.imv];
5541
0
    const int mvShift = stepShiftTab[cu.imv];
5542
5543
0
    acDeltaMv[0] = Mv((int)(dDeltaMv[0] * multiShift + SIGN(dDeltaMv[0]) * 0.5) * (1<< mvShift), (int)(dDeltaMv[2] * multiShift + SIGN(dDeltaMv[2]) * 0.5) * (1<< mvShift));
5544
0
    acDeltaMv[1] = Mv((int)(dDeltaMv[1] * multiShift + SIGN(dDeltaMv[1]) * 0.5) * (1<< mvShift), (int)(dDeltaMv[3] * multiShift + SIGN(dDeltaMv[3]) * 0.5) * (1<< mvShift));
5545
0
    if (extParams)
5546
0
    {
5547
0
      acDeltaMv[2] = Mv((int)(dDeltaMv[4] * multiShift + SIGN(dDeltaMv[4]) * 0.5) *  (1<< mvShift), (int)(dDeltaMv[5] * multiShift + SIGN(dDeltaMv[5]) * 0.5) *  (1<< mvShift));
5548
0
    }
5549
0
    bool bAllZero = false;
5550
0
    for (int i = 0; i < mvNum; i++)
5551
0
    {
5552
0
      Mv deltaMv = acDeltaMv[i];
5553
0
      if (cu.imv == IMV_4PEL)
5554
0
      {
5555
0
        deltaMv.roundToPrecision(MV_PRECISION_INTERNAL, MV_PRECISION_HALF);
5556
0
      }
5557
0
      if (deltaMv.hor != 0 || deltaMv.ver != 0)
5558
0
      {
5559
0
        bAllZero = false;
5560
0
        break;
5561
0
      }
5562
0
      bAllZero = true;
5563
0
    }
5564
5565
0
    if (bAllZero)
5566
0
      break;
5567
5568
    // do motion compensation with updated mv
5569
0
    for (int i = 0; i < mvNum; i++)
5570
0
    {
5571
0
      acMvTemp[i] += acDeltaMv[i];
5572
0
      acMvTemp[i].hor = Clip3(MV_MIN, MV_MAX, acMvTemp[i].hor);
5573
0
      acMvTemp[i].ver = Clip3(MV_MIN, MV_MAX, acMvTemp[i].ver);
5574
0
      acMvTemp[i].roundAffinePrecInternal2Amvr(cu.imv);
5575
5576
0
      clipMv(acMvTemp[i], cu.lumaPos(), cu.lumaSize(), *cu.cs->pcv);
5577
0
    }
5578
5579
0
    if( !m_pcEncCfg->m_ifpLines || xIsAffineMvInRangeFPP( cu, acMvTemp, m_pcEncCfg->m_ifpLines ) )
5580
0
    {
5581
0
      xPredAffineBlk(COMP_Y, cu, refPic, acMvTemp, predBuf, false, cu.slice->clpRngs[COMP_Y], refPicList);
5582
5583
      // get error
5584
0
      Distortion uiCostTemp = m_pcRdCost->getDistPart(predBuf.Y(), pBuf->Y(), cu.cs->sps->bitDepths[CH_L], COMP_Y, DF_HAD);
5585
0
      DTRACE(g_trace_ctx, D_COMMON, " (%d) uiCostTemp=%d\n", DTRACE_GET_COUNTER(g_trace_ctx, D_COMMON), uiCostTemp);
5586
5587
      // get cost with mv
5588
0
      m_pcRdCost->setCostScale(0);
5589
0
      uint32_t uiBitsTemp = ruiBits;
5590
0
      uiBitsTemp += xCalcAffineMVBits(cu, acMvTemp, acMvPred);
5591
0
      uiCostTemp = (Distortion)(floor(fWeight * (double)uiCostTemp) + (double)m_pcRdCost->getCost(uiBitsTemp));
5592
5593
      // store best cost and mv
5594
0
      if (uiCostTemp < uiCostBest)
5595
0
      {
5596
0
        uiCostBest = uiCostTemp;
5597
0
        uiBitsBest = uiBitsTemp;
5598
0
        memcpy(acMv, acMvTemp, sizeof(Mv) * 3);
5599
0
        mvpIdx = bestMvpIdx;
5600
0
      }
5601
0
      else if(m_pcEncCfg->m_Affine > 1)
5602
0
      {
5603
0
        break;
5604
0
      }
5605
0
    }
5606
0
  }
5607
5608
0
  auto checkCPMVRdCost = [&](Mv ctrlPtMv[3])
5609
0
  {
5610
0
    if( !m_pcEncCfg->m_ifpLines || xIsAffineMvInRangeFPP( cu, ctrlPtMv, m_pcEncCfg->m_ifpLines ) )
5611
0
    {
5612
0
      xPredAffineBlk(COMP_Y, cu, refPic, ctrlPtMv, predBuf, false, cu.slice->clpRngs[COMP_Y], refPicList);
5613
      // get error
5614
0
      Distortion costTemp = m_pcRdCost->getDistPart(predBuf.Y(), pBuf->Y(), cu.cs->sps->bitDepths[CH_L], COMP_Y, DF_HAD);
5615
      // get cost with mv
5616
0
      m_pcRdCost->setCostScale(0);
5617
0
      uint32_t bitsTemp = ruiBits;
5618
0
      bitsTemp += xCalcAffineMVBits(cu, ctrlPtMv, acMvPred);
5619
0
      costTemp = (Distortion)(floor(fWeight * (double)costTemp) + (double)m_pcRdCost->getCost(bitsTemp));
5620
      // store best cost and mv
5621
0
      if (costTemp < uiCostBest)
5622
0
      {
5623
0
        uiCostBest = costTemp;
5624
0
        uiBitsBest = bitsTemp;
5625
0
        ::memcpy(acMv, ctrlPtMv, sizeof(Mv) * 3);
5626
0
      }
5627
0
    }
5628
0
  };
5629
5630
0
  const uint32_t mvShiftTable[3] = { MV_PRECISION_INTERNAL - MV_PRECISION_QUARTER, MV_PRECISION_INTERNAL - MV_PRECISION_INTERNAL, MV_PRECISION_INTERNAL - MV_PRECISION_INT };
5631
0
  const uint32_t mvShift = mvShiftTable[cu.imv];
5632
0
  if (uiCostBest <= AFFINE_ME_LIST_MVP_TH*m_hevcCost)
5633
0
  {
5634
0
    Mv mvPredTmp[3] = { acMvPred[0], acMvPred[1], acMvPred[2] };
5635
0
    Mv mvME[3];
5636
0
    ::memcpy(mvME, acMv, sizeof(Mv) * 3);
5637
0
    Mv dMv = mvME[0] - mvPredTmp[0];
5638
5639
0
    for (int j = 0; j < mvNum; j++)
5640
0
    {
5641
0
      if ((!j && mvME[j] != mvPredTmp[j]) || (j && mvME[j] != (mvPredTmp[j] + dMv)))
5642
0
      {
5643
0
        ::memcpy(acMvTemp, mvME, sizeof(Mv) * 3);
5644
0
        acMvTemp[j] = mvPredTmp[j];
5645
5646
0
        if (j)
5647
0
          acMvTemp[j] += dMv;
5648
5649
0
        checkCPMVRdCost(acMvTemp);
5650
0
      }
5651
0
    }
5652
5653
    //keep the rotation/zoom;
5654
0
    if (mvME[0] != mvPredTmp[0])
5655
0
    {
5656
0
      ::memcpy(acMvTemp, mvME, sizeof(Mv) * 3);
5657
0
      for (int i = 1; i < mvNum; i++)
5658
0
      {
5659
0
        acMvTemp[i] -= dMv;
5660
0
      }
5661
0
      acMvTemp[0] = mvPredTmp[0];
5662
5663
0
      checkCPMVRdCost(acMvTemp);
5664
0
    }
5665
5666
    //keep the translation;
5667
0
    if (cu.affineType == AFFINEMODEL_6PARAM && mvME[1] != (mvPredTmp[1] + dMv) && mvME[2] != (mvPredTmp[2] + dMv))
5668
0
    {
5669
0
      ::memcpy(acMvTemp, mvME, sizeof(Mv) * 3);
5670
5671
0
      acMvTemp[1] = mvPredTmp[1] + dMv;
5672
0
      acMvTemp[2] = mvPredTmp[2] + dMv;
5673
5674
0
      checkCPMVRdCost(acMvTemp);
5675
0
    }
5676
5677
    // 8 nearest neighbor search
5678
0
    int testPos[8][2] = { { -1, 0 },{ 0, -1 },{ 0, 1 },{ 1, 0 },{ -1, -1 },{ -1, 1 },{ 1, 1 },{ 1, -1 } };
5679
0
    const int maxSearchRound = 3;
5680
5681
0
    for (int rnd = 0; rnd < maxSearchRound; rnd++)
5682
0
    {
5683
0
      bool modelChange = false;
5684
      //search the model parameters with finear granularity;
5685
0
      for (int j = 0; j < mvNum; j++)
5686
0
      {
5687
0
        bool loopChange = false;
5688
0
        for (int iter = 0; iter < 2; iter++)
5689
0
        {
5690
0
          if (iter == 1 && !loopChange)
5691
0
          {
5692
0
            break;
5693
0
          }
5694
0
          Mv centerMv[3];
5695
0
          memcpy(centerMv, acMv, sizeof(Mv) * 3);
5696
0
          memcpy(acMvTemp, acMv, sizeof(Mv) * 3);
5697
5698
0
          for (int i = ((iter == 0) ? 0 : 4); i < ((iter == 0) ? 4 : 8); i++)
5699
0
          {
5700
0
            acMvTemp[j].set(centerMv[j].hor + (testPos[i][0] * (1 << mvShift)), centerMv[j].ver + (testPos[i][1] * (1 << mvShift)));
5701
0
            clipMv(acMvTemp[j], cu.lumaPos(), cu.lumaSize(), *cu.cs->pcv);
5702
5703
0
            if( !m_pcEncCfg->m_ifpLines || xIsAffineMvInRangeFPP( cu, acMvTemp, m_pcEncCfg->m_ifpLines ) )
5704
0
            {
5705
0
              xPredAffineBlk(COMP_Y, cu, refPic, acMvTemp, predBuf, false, cu.slice->clpRngs[COMP_Y], refPicList);
5706
5707
0
              Distortion costTemp = m_pcRdCost->getDistPart(predBuf.Y(), pBuf->Y(), cu.cs->sps->bitDepths[CH_L], COMP_Y, DF_HAD);
5708
0
              uint32_t bitsTemp = ruiBits;
5709
0
              bitsTemp += xCalcAffineMVBits(cu, acMvTemp, acMvPred);
5710
0
              costTemp = (Distortion)(floor(fWeight * (double)costTemp) + (double)m_pcRdCost->getCost(bitsTemp));
5711
5712
0
              if (costTemp < uiCostBest)
5713
0
              {
5714
0
                uiCostBest = costTemp;
5715
0
                uiBitsBest = bitsTemp;
5716
0
                ::memcpy(acMv, acMvTemp, sizeof(Mv) * 3);
5717
0
                modelChange = true;
5718
0
                loopChange = true;
5719
0
              }
5720
0
            }
5721
0
          }
5722
0
        }
5723
0
      }
5724
5725
0
      if (!modelChange)
5726
0
      {
5727
0
        break;
5728
0
      }
5729
0
    }
5730
0
  }
5731
0
  acMvPred[0] = aamvpi.mvCandLT[mvpIdx];
5732
0
  acMvPred[1] = aamvpi.mvCandRT[mvpIdx];
5733
0
  acMvPred[2] = aamvpi.mvCandLB[mvpIdx];
5734
5735
  // free buffer
5736
0
  for (int i = 0; i<iParaNum; i++)
5737
0
    delete[]pdEqualCoeff[i];
5738
0
  delete[]pdEqualCoeff;
5739
5740
0
  ruiBits = uiBitsBest;
5741
0
  ruiCost = uiCostBest;
5742
0
  DTRACE(g_trace_ctx, D_COMMON, " (%d) uiBitsBest=%d, uiCostBest=%d\n", DTRACE_GET_COUNTER(g_trace_ctx, D_COMMON), uiBitsBest, uiCostBest);
5743
0
}
5744
5745
bool InterSearch::xEstimateAffineAMVP(CodingUnit& cu, AffineAMVPInfo& affineAMVPInfo, CPelUnitBuf& origBuf, RefPicList refPicList, int iRefIdx, Mv acMvPred[3], Distortion& distBiP)
5746
0
{
5747
0
  Mv         bestMvLT, bestMvRT, bestMvLB;
5748
0
  int        iBestIdx = 0;
5749
0
  Distortion uiBestCost = MAX_DISTORTION;
5750
5751
  // Fill the MV Candidates
5752
0
  CU::fillAffineMvpCand(cu, refPicList, iRefIdx, affineAMVPInfo);
5753
0
  CHECK(affineAMVPInfo.numCand == 0, "Assertion failed.");
5754
5755
0
  PelUnitBuf predBuf = m_tmpStorageLCU.getCompactBuf( cu );
5756
5757
0
  bool stop_check = false;
5758
0
  if (affineAMVPInfo.mvCandLT[0] == affineAMVPInfo.mvCandLT[1])
5759
0
  {
5760
0
    if ((affineAMVPInfo.mvCandRT[0] == affineAMVPInfo.mvCandRT[1]) && (affineAMVPInfo.mvCandLB[0] == affineAMVPInfo.mvCandLB[1]))
5761
0
    {
5762
0
      stop_check = true;
5763
0
    }
5764
0
  }
5765
5766
  // initialize Mvp index & Mvp
5767
0
  iBestIdx = -1;
5768
0
  for (int i = 0; i < affineAMVPInfo.numCand; i++)
5769
0
  {
5770
0
    if (i && stop_check)
5771
0
    {
5772
0
      continue;
5773
0
    }
5774
0
    Mv mv[3] = { affineAMVPInfo.mvCandLT[i], affineAMVPInfo.mvCandRT[i], affineAMVPInfo.mvCandLB[i] };
5775
0
    Distortion uiTmpCost = xGetAffineTemplateCost(cu, origBuf, predBuf, mv, i, AMVP_MAX_NUM_CANDS, refPicList, iRefIdx);
5776
5777
0
    if (uiBestCost > uiTmpCost)
5778
0
    {
5779
0
      uiBestCost = uiTmpCost;
5780
0
      bestMvLT = affineAMVPInfo.mvCandLT[i];
5781
0
      bestMvRT = affineAMVPInfo.mvCandRT[i];
5782
0
      bestMvLB = affineAMVPInfo.mvCandLB[i];
5783
0
      iBestIdx = i;
5784
0
      distBiP  = uiTmpCost;
5785
0
    }
5786
0
  }
5787
5788
0
  if( iBestIdx < 0 )
5789
0
    return false;
5790
5791
  // Setting Best MVP
5792
0
  acMvPred[0] = bestMvLT;
5793
0
  acMvPred[1] = bestMvRT;
5794
0
  acMvPred[2] = bestMvLB;
5795
5796
0
  cu.mvpIdx[refPicList] = iBestIdx;
5797
0
  cu.mvpNum[refPicList] = affineAMVPInfo.numCand;
5798
0
  DTRACE(g_trace_ctx, D_COMMON, "#estAffi=%d \n", affineAMVPInfo.numCand);
5799
0
  return true;
5800
0
}
5801
5802
void InterSearch::xCopyAffineAMVPInfo(AffineAMVPInfo& src, AffineAMVPInfo& dst)
5803
0
{
5804
0
  dst.numCand = src.numCand;
5805
0
  DTRACE(g_trace_ctx, D_COMMON, " (%d) #copyAffi=%d \n", DTRACE_GET_COUNTER(g_trace_ctx, D_COMMON), src.numCand);
5806
0
  ::memcpy(dst.mvCandLT, src.mvCandLT, sizeof(Mv)*src.numCand);
5807
0
  ::memcpy(dst.mvCandRT, src.mvCandRT, sizeof(Mv)*src.numCand);
5808
0
  ::memcpy(dst.mvCandLB, src.mvCandLB, sizeof(Mv)*src.numCand);
5809
0
}
5810
5811
uint32_t InterSearch::xCalcAffineMVBits(CodingUnit& cu, Mv acMvTemp[3], Mv acMvPred[3])
5812
0
{
5813
0
  int mvNum = cu.affineType ? 3 : 2;
5814
0
  m_pcRdCost->setCostScale(0);
5815
0
  uint32_t bitsTemp = 0;
5816
5817
0
  for (int verIdx = 0; verIdx < mvNum; verIdx++)
5818
0
  {
5819
0
    Mv pred = verIdx == 0 ? acMvPred[verIdx] : acMvPred[verIdx] + acMvTemp[0] - acMvPred[0];
5820
0
    pred.changeAffinePrecInternal2Amvr(cu.imv);
5821
0
    m_pcRdCost->setPredictor(pred);
5822
0
    Mv mv = acMvTemp[verIdx];
5823
0
    mv.changeAffinePrecInternal2Amvr(cu.imv);
5824
5825
0
    bitsTemp += m_pcRdCost->getBitsOfVectorWithPredictor(mv.hor, mv.ver, 0);
5826
0
  }
5827
5828
0
  return bitsTemp;
5829
0
}
5830
5831
5832
//! set adaptive search range based on poc difference
5833
void InterSearch::setSearchRange( const Slice* slice, const VVEncCfg& encCfg )
5834
5.19k
{
5835
5.19k
  if( !encCfg.m_bUseASR || slice->isIRAP() )
5836
5.19k
  {
5837
5.19k
    return;
5838
5.19k
  }
5839
5840
0
  int iCurrPOC = slice->poc;
5841
0
  int iRefPOC;
5842
0
  int iGOPSize = encCfg.m_GOPSize;
5843
0
  int iOffset = (iGOPSize >> 1);
5844
0
  int iMaxSR = encCfg.m_SearchRange;
5845
0
  int iNumPredDir = slice->isInterP() ? 1 : 2;
5846
5847
0
  for (int iDir = 0; iDir < iNumPredDir; iDir++)
5848
0
  {
5849
0
    RefPicList  e = ( iDir ? REF_PIC_LIST_1 : REF_PIC_LIST_0 );
5850
0
    for (int iRefIdx = 0; iRefIdx < slice->numRefIdx[e]; iRefIdx++)
5851
0
    {
5852
0
      iRefPOC = slice->getRefPic(e, iRefIdx)->getPOC();
5853
0
      int newSearchRange = Clip3(encCfg.m_minSearchWindow, iMaxSR, (iMaxSR*ADAPT_SR_SCALE*abs(iCurrPOC - iRefPOC)+iOffset)/iGOPSize);
5854
0
      m_aaiAdaptSR[iDir][iRefIdx] = newSearchRange;
5855
0
    }
5856
0
  }
5857
0
}
5858
5859
void InterSearch::xIBCSearchMVCandUpdate(Distortion  sad, int x, int y, Distortion* sadBestCand, Mv* cMVCand)
5860
1.31M
{
5861
1.31M
  int j = CHROMA_REFINEMENT_CANDIDATES - 1;
5862
5863
1.31M
  if (sad < sadBestCand[CHROMA_REFINEMENT_CANDIDATES - 1])
5864
211k
  {
5865
1.90M
    for (int t = CHROMA_REFINEMENT_CANDIDATES - 1; t >= 0; t--)
5866
1.68M
    {
5867
1.68M
      if (sad < sadBestCand[t])
5868
1.01M
        j = t;
5869
1.68M
    }
5870
5871
1.01M
    for (int k = CHROMA_REFINEMENT_CANDIDATES - 1; k > j; k--)
5872
805k
    {
5873
805k
      sadBestCand[k] = sadBestCand[k - 1];
5874
5875
805k
      cMVCand[k].set(cMVCand[k - 1].hor, cMVCand[k - 1].ver);
5876
805k
    }
5877
211k
    sadBestCand[j] = sad;
5878
211k
    cMVCand[j].set(x, y);
5879
211k
  }
5880
1.31M
}
5881
5882
int InterSearch::xIBCSearchMVChromaRefine(CodingUnit& cu,
5883
  int         roiWidth,
5884
  int         roiHeight,
5885
  int         cuPelX,
5886
  int         cuPelY,
5887
  Distortion* sadBestCand,
5888
  Mv* cMVCand
5889
5890
)
5891
22.5k
{
5892
22.5k
  if ((!isChromaEnabled(cu.chromaFormat)) || (!cu.Cb().valid()))
5893
22.5k
  {
5894
22.5k
    return 0;
5895
22.5k
  }
5896
5897
0
  int bestCandIdx = 0;
5898
0
  Distortion  sadBest = std::numeric_limits<Distortion>::max();
5899
0
  Distortion  tempSad;
5900
5901
0
  Pel* pRef;
5902
0
  Pel* pOrg;
5903
0
  int refStride, orgStride;
5904
0
  int width, height;
5905
5906
0
  int picWidth = cu.cs->slice->pps->picWidthInLumaSamples;
5907
0
  int picHeight = cu.cs->slice->pps->picHeightInLumaSamples;
5908
5909
0
  UnitArea allCompBlocks(cu.chromaFormat, (Area)cu.block(COMP_Y));
5910
0
  for (int cand = 0; cand < CHROMA_REFINEMENT_CANDIDATES; cand++)
5911
0
  {
5912
0
    if (sadBestCand[cand] == std::numeric_limits<Distortion>::max())
5913
0
    {
5914
0
      continue;
5915
0
    }
5916
5917
0
    if ((!cMVCand[cand].hor) && (!cMVCand[cand].ver))
5918
0
      continue;
5919
5920
0
    if (((int)(cuPelY + cMVCand[cand].ver + roiHeight) >= picHeight) || ((cuPelY + cMVCand[cand].ver) < 0))
5921
0
      continue;
5922
5923
0
    if (((int)(cuPelX + cMVCand[cand].hor + roiWidth) >= picWidth) || ((cuPelX + cMVCand[cand].hor) < 0))
5924
0
      continue;
5925
5926
0
    tempSad = sadBestCand[cand];
5927
5928
0
    cu.mv[0][0] = cMVCand[cand];
5929
0
    cu.mv[0][0].changePrecision(MV_PRECISION_INT, MV_PRECISION_INTERNAL);
5930
0
    cu.interDir = 1;
5931
0
    cu.refIdx[0] = cu.cs->slice->numRefIdx[REF_PIC_LIST_0]; // last idx in the list
5932
5933
0
    PelUnitBuf predBufTmp = m_tmpPredStorage[REF_PIC_LIST_0].getCompactBuf(cu);
5934
0
    motionCompensation(cu, predBufTmp, REF_PIC_LIST_0);
5935
5936
0
    for (unsigned int ch = COMP_Cb; ch < getNumberValidComponents(cu.cs->sps->chromaFormatIdc); ch++)
5937
0
    {
5938
0
      width = roiWidth >> getComponentScaleX(ComponentID(ch), cu.chromaFormat);
5939
0
      height = roiHeight >> getComponentScaleY(ComponentID(ch), cu.chromaFormat);
5940
5941
0
      PelUnitBuf origBuf = cu.cs->getOrgBuf(allCompBlocks);
5942
0
      PelUnitBuf* pBuf = &origBuf;
5943
0
      CPelBuf  tmpPattern = pBuf->get(ComponentID(ch));
5944
0
      pOrg = (Pel*)tmpPattern.buf;
5945
5946
0
      Picture* refPic = cu.slice->pic;
5947
0
      const CPelBuf refBuf = refPic->getRecoBuf(allCompBlocks.blocks[ComponentID(ch)]);
5948
0
      pRef = (Pel*)refBuf.buf;
5949
5950
0
      refStride = refBuf.stride;
5951
0
      orgStride = tmpPattern.stride;
5952
5953
      //ComponentID compID = (ComponentID)ch;
5954
0
      PelUnitBuf* pBufRef = &predBufTmp;
5955
0
      CPelBuf  tmpPatternRef = pBufRef->get(ComponentID(ch));
5956
0
      pRef = (Pel*)tmpPatternRef.buf;
5957
0
      refStride = tmpPatternRef.stride;
5958
5959
5960
0
      for (int row = 0; row < height; row++)
5961
0
      {
5962
0
        for (int col = 0; col < width; col++)
5963
0
        {
5964
0
          tempSad += ((abs(pRef[col] - pOrg[col])) >> (cu.cs->sps->bitDepths[CH_C] - 8));
5965
0
        }
5966
0
        pRef += refStride;
5967
0
        pOrg += orgStride;
5968
0
      }
5969
0
    }
5970
5971
0
    if (tempSad < sadBest)
5972
0
    {
5973
0
      sadBest = tempSad;
5974
0
      bestCandIdx = cand;
5975
0
    }
5976
0
  }
5977
5978
0
  return bestCandIdx;
5979
22.5k
}
5980
static unsigned int xMergeCandLists(Mv* dst, unsigned int dn, unsigned int dstTotalLength, Mv* src, unsigned int sn)
5981
135k
{
5982
1.06M
  for (unsigned int cand = 0; cand < sn && dn < dstTotalLength; cand++)
5983
929k
  {
5984
929k
    if (src[cand] == Mv())
5985
70.5k
    {
5986
70.5k
      continue;
5987
70.5k
    }
5988
859k
    bool found = false;
5989
6.54M
    for (int j = 0; j < dn; j++)
5990
5.80M
    {
5991
5.80M
      if (src[cand] == dst[j])
5992
118k
      {
5993
118k
        found = true;
5994
118k
        break;
5995
118k
      }
5996
5.80M
    }
5997
5998
859k
    if (!found)
5999
740k
    {
6000
740k
      dst[dn] = src[cand];
6001
740k
      dn++;
6002
740k
    }
6003
859k
  }
6004
6005
135k
  return dn;
6006
135k
}
6007
void InterSearch::xIntraPatternSearchIBC(CodingUnit& cu, TZSearchStruct& cStruct, Mv& rcMv, Distortion& ruiCost, Mv* pcMvSrchRngLT, Mv* pcMvSrchRngRB, Mv* pcMvPred)
6008
27.1k
{
6009
27.1k
  const int   srchRngHorLeft = pcMvSrchRngLT->hor;
6010
27.1k
  const int   srchRngHorRight = pcMvSrchRngRB->hor;
6011
27.1k
  const int   srchRngVerTop = pcMvSrchRngLT->ver;
6012
27.1k
  const int   srchRngVerBottom = pcMvSrchRngRB->ver;
6013
6014
27.1k
  const unsigned int  lcuWidth = cu.cs->slice->sps->CTUSize;
6015
27.1k
  const int   puPelOffsetX = 0;
6016
27.1k
  const int   puPelOffsetY = 0;
6017
27.1k
  const int   cuPelX = cu.Y().x;
6018
27.1k
  const int   cuPelY = cu.Y().y;
6019
6020
27.1k
  int          roiWidth = cu.lwidth();
6021
27.1k
  int          roiHeight = cu.lheight();
6022
6023
27.1k
  Distortion  sad;
6024
27.1k
  Distortion  sadBest = std::numeric_limits<Distortion>::max();
6025
27.1k
  int         bestX = 0;
6026
27.1k
  int         bestY = 0;
6027
6028
27.1k
  const Pel* piRefSrch = cStruct.piRefY; 
6029
6030
27.1k
  int         bestCandIdx = 0;
6031
6032
27.1k
  Distortion  sadBestCand[CHROMA_REFINEMENT_CANDIDATES];
6033
27.1k
  Mv          cMVCand[CHROMA_REFINEMENT_CANDIDATES];
6034
6035
27.1k
  const bool  useAmvr = cu.cs->sps->AMVR;
6036
6037
6038
244k
  for (int cand = 0; cand < CHROMA_REFINEMENT_CANDIDATES; cand++)
6039
217k
  {
6040
217k
    sadBestCand[cand] = std::numeric_limits<Distortion>::max();
6041
217k
    cMVCand[cand].set(0, 0);
6042
217k
  }
6043
6044
27.1k
  m_pcRdCost->setDistParam(m_cDistParam, *cStruct.pcPatternKey, cStruct.piRefY, cStruct.iRefStride, m_lumaClpRng.bd, COMP_Y, cStruct.subShiftMode);
6045
6046
27.1k
  const int picWidth = cu.cs->slice->pps->picWidthInLumaSamples;
6047
27.1k
  const int picHeight = cu.cs->slice->pps->picHeightInLumaSamples;
6048
6049
6050
27.1k
  {
6051
27.1k
    m_cDistParam.subShift = 0;
6052
27.1k
    Distortion tempSadBest = 0;
6053
6054
27.1k
    int srLeft = srchRngHorLeft, srRight = srchRngHorRight, srTop = srchRngVerTop, srBottom = srchRngVerBottom;
6055
27.1k
    m_numBVs = 0;
6056
27.1k
    m_numBVs = xMergeCandLists(m_acBVs, m_numBVs, (2 * IBC_NUM_CANDIDATES), m_defaultCachedBvs->m_bvCands, m_defaultCachedBvs->currCnt);
6057
6058
27.1k
    Mv cMvPredEncOnly[IBC_NUM_CANDIDATES];
6059
27.1k
    int nbPreds = 0;
6060
27.1k
    CU::getIbcMVPsEncOnly(cu, cMvPredEncOnly, nbPreds);
6061
27.1k
    m_numBVs = xMergeCandLists(m_acBVs, m_numBVs, (2 * IBC_NUM_CANDIDATES), cMvPredEncOnly, nbPreds);
6062
6063
264k
    for (unsigned int cand = 0; cand < m_numBVs; cand++)
6064
237k
    {
6065
237k
      int xPred = m_acBVs[cand].hor;
6066
237k
      int yPred = m_acBVs[cand].ver;
6067
6068
237k
      if (!(xPred == 0 && yPred == 0)
6069
237k
        && !((yPred < srTop) || (yPred > srBottom))
6070
212k
        && !((xPred < srLeft) || (xPred > srRight)))
6071
212k
      {
6072
212k
        bool validCand = searchBvIBC(cu, cuPelX, cuPelY, roiWidth, roiHeight, picWidth, picHeight, xPred, yPred, lcuWidth);
6073
6074
212k
        if (validCand)
6075
174k
        {
6076
174k
          sad = m_pcRdCost->getBvCostMultiplePredsIBC(xPred, yPred, useAmvr);
6077
174k
          m_cDistParam.cur.buf = piRefSrch + cStruct.iRefStride * yPred + xPred;
6078
174k
          sad += m_cDistParam.distFunc(m_cDistParam);
6079
6080
174k
          xIBCSearchMVCandUpdate(sad, xPred, yPred, sadBestCand, cMVCand);
6081
174k
        }
6082
212k
      }
6083
237k
    }
6084
6085
27.1k
    bestX = cMVCand[0].hor;
6086
27.1k
    bestY = cMVCand[0].ver;
6087
27.1k
    rcMv.set(bestX, bestY);
6088
27.1k
    sadBest = sadBestCand[0];
6089
6090
27.1k
    const int boundY = (0 - roiHeight - puPelOffsetY);
6091
482k
    for (int y = std::max(srchRngVerTop, 0 - cuPelY); y <= boundY; ++y)
6092
458k
    {
6093
458k
      if (!searchBvIBC(cu, cuPelX, cuPelY, roiWidth, roiHeight, picWidth, picHeight, 0, y, lcuWidth))
6094
0
      {
6095
0
        continue;
6096
0
      }
6097
6098
458k
      sad = m_pcRdCost->getBvCostMultiplePredsIBC(0, y, useAmvr);
6099
458k
      m_cDistParam.cur.buf = piRefSrch + cStruct.iRefStride * y;
6100
458k
      sad += m_cDistParam.distFunc(m_cDistParam);
6101
6102
458k
      xIBCSearchMVCandUpdate(sad, 0, y, sadBestCand, cMVCand);
6103
458k
      tempSadBest = sadBestCand[0];
6104
458k
      if (sadBestCand[0] <= 3)
6105
3.14k
      {
6106
3.14k
        bestX = cMVCand[0].hor;
6107
3.14k
        bestY = cMVCand[0].ver;
6108
3.14k
        sadBest = sadBestCand[0];
6109
3.14k
        rcMv.set(bestX, bestY);
6110
3.14k
        ruiCost = sadBest;
6111
3.14k
        goto end;
6112
3.14k
      }
6113
458k
    }
6114
6115
24.0k
    const int boundX = std::max(srchRngHorLeft, -cuPelX);
6116
1.54M
    for (int x = 0 - roiWidth - puPelOffsetX; x >= boundX; --x)
6117
1.51M
    {
6118
1.51M
      if (!searchBvIBC(cu, cuPelX, cuPelY, roiWidth, roiHeight, picWidth, picHeight, x, 0, lcuWidth))
6119
847k
      {
6120
847k
        continue;
6121
847k
      }
6122
6123
672k
      sad = m_pcRdCost->getBvCostMultiplePredsIBC(x, 0, useAmvr);
6124
672k
      m_cDistParam.cur.buf = piRefSrch + x;
6125
672k
      sad += m_cDistParam.distFunc(m_cDistParam);
6126
6127
6128
672k
      xIBCSearchMVCandUpdate(sad, x, 0, sadBestCand, cMVCand);
6129
672k
      tempSadBest = sadBestCand[0];
6130
672k
      if (sadBestCand[0] <= 3)
6131
1.43k
      {
6132
1.43k
        bestX = cMVCand[0].hor;
6133
1.43k
        bestY = cMVCand[0].ver;
6134
1.43k
        sadBest = sadBestCand[0];
6135
1.43k
        rcMv.set(bestX, bestY);
6136
1.43k
        ruiCost = sadBest;
6137
1.43k
        goto end;
6138
1.43k
      }
6139
672k
    }
6140
6141
22.5k
    bestX = cMVCand[0].hor;
6142
22.5k
    bestY = cMVCand[0].ver;
6143
22.5k
    sadBest = sadBestCand[0];
6144
22.5k
    if ((!bestX && !bestY) || (sadBest - m_pcRdCost->getBvCostMultiplePredsIBC(bestX, bestY, useAmvr) <= 32))
6145
21.3k
    {
6146
      //chroma refine
6147
21.3k
      bestCandIdx = xIBCSearchMVChromaRefine(cu, roiWidth, roiHeight, cuPelX, cuPelY, sadBestCand, cMVCand);
6148
21.3k
      bestX = cMVCand[bestCandIdx].hor;
6149
21.3k
      bestY = cMVCand[bestCandIdx].ver;
6150
21.3k
      sadBest = sadBestCand[bestCandIdx];
6151
21.3k
      rcMv.set(bestX, bestY);
6152
21.3k
      ruiCost = sadBest;
6153
21.3k
      goto end;
6154
21.3k
    }
6155
6156
1.18k
    if (cu.lwidth() < 16 && cu.lheight() < 16)
6157
22
    {
6158
22
      int stepS = 2;
6159
22
      if (m_pcEncCfg->m_IBCFastMethod > 2)
6160
22
      {
6161
22
        if (m_pcEncCfg->m_IBCFastMethod == 5)
6162
0
        {
6163
0
          stepS = 8;
6164
0
        }
6165
22
        else if ((cu.lwidth() > 4) || (cu.lheight() > 4))
6166
22
        {
6167
22
          stepS = 4;
6168
22
        }
6169
22
      }
6170
6171
22
      const int minCuLog2 = m_pcEncCfg->m_log2MinCodingBlockSize;
6172
22
      const int minCuMask = (1 << minCuLog2) - 1;
6173
22
      bool lastDec = false;
6174
6175
22
      for (int searchStep = 0; searchStep < 3; searchStep++)
6176
22
      {
6177
22
        int delaySy = searchStep ? 1 : 0;
6178
22
        int delaySx = searchStep > 1 ? 1 : 0;
6179
22
        int startY = (std::max(srchRngVerTop, -cuPelY) + delaySy);
6180
22
        int startX = (std::max(srchRngHorLeft, -cuPelX) + delaySx);
6181
22
        int endY = srchRngVerBottom;
6182
22
        int endX = srchRngHorRight;
6183
6184
22
        if (m_pcEncCfg->m_IBCFastMethod > 5)
6185
0
        {
6186
0
          startY = bestY - 4;
6187
0
          endY = bestY + 4;
6188
0
          startX = bestX - 4;
6189
0
          endX = bestX + 4;
6190
0
          stepS = 1;
6191
0
          if (searchStep)
6192
0
          {
6193
0
            break;
6194
0
          }
6195
0
        }
6196
6197
571
        for (int y = startY; y <= endY; y += stepS)
6198
549
        {
6199
549
          if ((y == 0) || ((int)(cuPelY + y + roiHeight) >= picHeight))
6200
273
            continue;
6201
276
          bool firstX = true;
6202
276
          int stepSx = searchStep ? stepS : 1;
6203
30.5k
          for (int x = startX; x <= endX; firstX = false, x += stepSx)
6204
30.2k
          {
6205
30.2k
            if ((x == 0) || ((int)(cuPelX + x + roiWidth) >= picWidth))
6206
14.1k
              continue;
6207
6208
16.1k
            bool isSameAsLast = !firstX && ((cuPelX + x) & minCuMask) > 1;
6209
16.1k
            if (searchStep || (m_pcEncCfg->m_IBCFastMethod > 5))
6210
0
            {
6211
0
              if (!searchBvIBC(cu, cuPelX, cuPelY, roiWidth, roiHeight, picWidth, picHeight, x, y, lcuWidth))
6212
0
              {
6213
0
                continue;
6214
0
              }
6215
0
            }
6216
16.1k
            else if ((isSameAsLast && !lastDec) || (!isSameAsLast && !searchBvIBC(cu, cuPelX, cuPelY, roiWidth, roiHeight, picWidth, picHeight, x, y, lcuWidth)))
6217
4.22k
            {
6218
4.22k
              lastDec = false;
6219
4.22k
              continue;
6220
4.22k
            }
6221
11.8k
            lastDec = true;
6222
6223
11.8k
            sad = m_pcRdCost->getBvCostMultiplePredsIBC(x, y, useAmvr);
6224
11.8k
            m_cDistParam.cur.buf = piRefSrch + cStruct.iRefStride * y + x;
6225
11.8k
            sad += m_cDistParam.distFunc(m_cDistParam);
6226
6227
11.8k
            xIBCSearchMVCandUpdate(sad, x, y, sadBestCand, cMVCand);
6228
6229
6230
11.8k
            if (searchStep && sadBestCand[0] <= 5)
6231
0
            {
6232
              //chroma refine & return
6233
0
              bestCandIdx = xIBCSearchMVChromaRefine(cu, roiWidth, roiHeight, cuPelX, cuPelY, sadBestCand, cMVCand);
6234
0
              bestX = cMVCand[bestCandIdx].hor;
6235
0
              bestY = cMVCand[bestCandIdx].ver;
6236
0
              sadBest = sadBestCand[bestCandIdx];
6237
0
              rcMv.set(bestX, bestY);
6238
0
              ruiCost = sadBest;
6239
0
              goto end;
6240
0
            }
6241
11.8k
          }
6242
276
        }
6243
6244
22
        if ((searchStep < 2) && (m_pcEncCfg->m_IBCFastMethod < 6))
6245
22
        {
6246
22
          if ((m_pcEncCfg->m_IBCFastMethod > 2) && (m_pcEncCfg->m_IBCFastMethod < 5))
6247
22
          {
6248
22
            if ((bestX == cMVCand[0].hor) && (bestY == cMVCand[0].ver))
6249
22
            {
6250
22
              sadBest = sadBestCand[bestCandIdx];
6251
22
              rcMv.set(bestX, bestY);
6252
22
              ruiCost = sadBest;
6253
22
              goto end;
6254
22
            }
6255
22
          }
6256
0
          bestX = cMVCand[0].hor;
6257
0
          bestY = cMVCand[0].ver;
6258
0
          sadBest = sadBestCand[0];
6259
6260
0
          int StopSearch = searchStep ? 32 : 16;
6261
0
          if ((searchStep && (sadBest >= tempSadBest)) || (sadBest - m_pcRdCost->getBvCostMultiplePredsIBC(bestX, bestY, useAmvr) <= StopSearch))
6262
0
          {
6263
            //chroma refine
6264
0
            bestCandIdx = xIBCSearchMVChromaRefine(cu, roiWidth, roiHeight, cuPelX, cuPelY, sadBestCand, cMVCand);
6265
6266
0
            bestX = cMVCand[bestCandIdx].hor;
6267
0
            bestY = cMVCand[bestCandIdx].ver;
6268
0
            sadBest = sadBestCand[bestCandIdx];
6269
0
            rcMv.set(bestX, bestY);
6270
0
            ruiCost = sadBest;
6271
0
            goto end;
6272
0
          }
6273
0
        }
6274
22
      }
6275
22
    }
6276
1.18k
  }
6277
6278
1.16k
  bestCandIdx = xIBCSearchMVChromaRefine(cu, roiWidth, roiHeight, cuPelX, cuPelY, sadBestCand, cMVCand);
6279
6280
1.16k
  bestX = cMVCand[bestCandIdx].hor;
6281
1.16k
  bestY = cMVCand[bestCandIdx].ver;
6282
1.16k
  sadBest = sadBestCand[bestCandIdx];
6283
1.16k
  rcMv.set(bestX, bestY);
6284
1.16k
  ruiCost = sadBest;
6285
6286
27.1k
end:
6287
27.1k
  m_numBVs = 0;
6288
27.1k
  m_numBVs = xMergeCandLists(m_acBVs, m_numBVs, (2 * IBC_NUM_CANDIDATES), m_defaultCachedBvs->m_bvCands, m_defaultCachedBvs->currCnt);
6289
6290
27.1k
  m_defaultCachedBvs->currCnt = 0;
6291
27.1k
  m_defaultCachedBvs->currCnt = xMergeCandLists(m_defaultCachedBvs->m_bvCands, m_defaultCachedBvs->currCnt, IBC_NUM_CANDIDATES, cMVCand, CHROMA_REFINEMENT_CANDIDATES);
6292
27.1k
  m_defaultCachedBvs->currCnt = xMergeCandLists(m_defaultCachedBvs->m_bvCands, m_defaultCachedBvs->currCnt, IBC_NUM_CANDIDATES, m_acBVs, m_numBVs);
6293
6294
244k
  for (unsigned int cand = 0; cand < CHROMA_REFINEMENT_CANDIDATES; cand++)
6295
217k
  {
6296
217k
    if (cMVCand[cand].hor == 0 && cMVCand[cand].ver == 0)
6297
70.5k
    {
6298
70.5k
      continue;
6299
70.5k
    }
6300
146k
    m_ctuRecord[cu.lumaPos()][cu.lumaSize()].bvRecord[cMVCand[cand]] = sadBestCand[cand];
6301
146k
  }
6302
6303
27.1k
  return;
6304
1.16k
}
6305
6306
6307
6308
// based on xMotionEstimation
6309
void InterSearch::xIBCEstimation(CodingUnit& cu, PelUnitBuf& origBuf, Mv* pcMvPred, Mv& rcMv, Distortion& ruiCost )
6310
27.1k
{
6311
27.1k
  const int iPicWidth = cu.cs->slice->pps->picWidthInLumaSamples;
6312
27.1k
  const int iPicHeight = cu.cs->slice->pps->picHeightInLumaSamples;
6313
27.1k
  const unsigned int  lcuWidth = cu.cs->slice->sps->CTUSize;
6314
27.1k
  const int           cuPelX = cu.Y().x;
6315
27.1k
  const int           cuPelY = cu.Y().y;
6316
27.1k
  int                 iRoiWidth = cu.lwidth();
6317
27.1k
  int                 iRoiHeight = cu.lheight();
6318
6319
27.1k
  PelUnitBuf* pBuf = &origBuf;
6320
6321
  //  Search key pattern initialization
6322
27.1k
  CPelBuf  tmpPattern = pBuf->Y();
6323
27.1k
  CPelBuf* pcPatternKey = &tmpPattern;
6324
27.1k
  PelBuf tmpOrgLuma;
6325
27.1k
  ReshapeData& reshapeData = cu.cs->picture->reshapeData;
6326
27.1k
  if ((cu.cs->slice->lmcsEnabled && reshapeData.getCTUFlag()))
6327
0
  {
6328
0
    tmpOrgLuma = m_tmpStorageLCU.getCompactBuf(cu.Y());
6329
0
    tmpOrgLuma.rspSignal(tmpPattern, reshapeData.getInvLUT());
6330
0
    pcPatternKey = (CPelBuf*)&tmpOrgLuma;
6331
0
  }
6332
27.1k
  m_lumaClpRng = cu.cs->slice->clpRngs[COMP_Y];
6333
27.1k
  Picture* refPic = cu.slice->pic;
6334
27.1k
  const CPelBuf refBuf = refPic->getRecoBuf(cu.blocks[COMP_Y]);
6335
6336
27.1k
  TZSearchStruct cStruct; 
6337
27.1k
  cStruct.pcPatternKey  = pcPatternKey;
6338
27.1k
  cStruct.iRefStride    = refBuf.stride;
6339
27.1k
  cStruct.piRefY        = refBuf.buf;
6340
27.1k
  CHECK( cu.imv == IMV_HPEL, "IF_IBC" );
6341
27.1k
  cStruct.imvShift      = cu.imv << 1;
6342
27.1k
  cStruct.subShiftMode  = 0;
6343
27.1k
  cStruct.uiBestSad     = MAX_DISTORTION;
6344
6345
27.1k
  m_pcRdCost->getMotionCostIBC(0);
6346
27.1k
  m_pcRdCost->setPredictorsIBC(pcMvPred);
6347
27.1k
  m_pcRdCost->setCostScale(0);
6348
6349
27.1k
  m_pcRdCost->setDistParam(m_cDistParam, *cStruct.pcPatternKey, cStruct.piRefY, cStruct.iRefStride, m_lumaClpRng.bd, COMP_Y, cStruct.subShiftMode);
6350
27.1k
  bool buffered = false;
6351
27.1k
  if (m_pcEncCfg->m_IBCFastMethod)// IBC_FAST_METHOD_BUFFERBV
6352
27.1k
  {
6353
27.1k
    ruiCost = MAX_UINT;
6354
27.1k
    std::unordered_map<Mv, Distortion>& history = m_ctuRecord[cu.lumaPos()][cu.lumaSize()].bvRecord;
6355
27.1k
    for (std::unordered_map<Mv, Distortion>::iterator p = history.begin(); p != history.end(); p++)
6356
14
    {
6357
14
      const Mv& bv = p->first;
6358
6359
14
      int xBv = bv.hor;
6360
14
      int yBv = bv.ver;
6361
14
      if (searchBvIBC(cu, cuPelX, cuPelY, iRoiWidth, iRoiHeight, iPicWidth, iPicHeight, xBv, yBv, lcuWidth))
6362
14
      {
6363
14
        buffered = true;
6364
14
        Distortion sad = m_pcRdCost->getBvCostMultiplePredsIBC(xBv, yBv, cu.cs->sps->AMVR);
6365
14
        m_cDistParam.cur.buf = cStruct.piRefY + cStruct.iRefStride * yBv + xBv;
6366
14
        sad += m_cDistParam.distFunc(m_cDistParam);
6367
14
        if (sad < ruiCost)
6368
14
        {
6369
14
          rcMv = bv;
6370
14
          ruiCost = sad;
6371
14
        }
6372
0
        else if (sad == ruiCost)
6373
0
        {
6374
          // stabilise the search through the unordered list
6375
0
          if (bv.hor < rcMv.hor
6376
0
            || (bv.hor == rcMv.hor && bv.ver < rcMv.ver))
6377
0
          {
6378
            // update the vector.
6379
0
            rcMv = bv;
6380
0
          }
6381
0
        }
6382
14
      }
6383
14
    }
6384
6385
27.1k
    if (buffered)
6386
14
    {
6387
14
      Mv cMvPredEncOnly[IBC_NUM_CANDIDATES];
6388
14
      int nbPreds = 0;
6389
14
      CU::getIbcMVPsEncOnly(cu, cMvPredEncOnly, nbPreds);
6390
6391
14
      for (unsigned int cand = 0; cand < nbPreds; cand++)
6392
0
      {
6393
0
        int xPred = cMvPredEncOnly[cand].hor;
6394
0
        int yPred = cMvPredEncOnly[cand].ver;
6395
6396
0
        if (searchBvIBC(cu, cuPelX, cuPelY, iRoiWidth, iRoiHeight, iPicWidth, iPicHeight, xPred, yPred, lcuWidth))
6397
0
        {
6398
0
          Distortion sad = m_pcRdCost->getBvCostMultiplePredsIBC(xPred, yPred, cu.cs->sps->AMVR);
6399
0
          m_cDistParam.cur.buf = cStruct.piRefY + cStruct.iRefStride * yPred + xPred;
6400
0
          sad += m_cDistParam.distFunc(m_cDistParam);
6401
0
          if (sad < ruiCost)
6402
0
          {
6403
0
            rcMv.set(xPred, yPred);
6404
0
            ruiCost = sad;
6405
0
          }
6406
0
          else if (sad == ruiCost)
6407
0
          {
6408
            // stabilise the search through the unordered list
6409
0
            if (xPred < rcMv.hor
6410
0
              || (xPred == rcMv.hor && yPred < rcMv.ver))
6411
0
            {
6412
              // update the vector.
6413
0
              rcMv.set(xPred, yPred);
6414
0
            }
6415
0
          }
6416
0
          m_ctuRecord[cu.lumaPos()][cu.lumaSize()].bvRecord[Mv(xPred, yPred)] = sad;
6417
0
        }
6418
0
      }
6419
14
    }
6420
27.1k
  }
6421
6422
27.1k
  if (!buffered)
6423
27.1k
  {
6424
27.1k
    Mv        cMvSrchRngLT;
6425
27.1k
    Mv        cMvSrchRngRB;
6426
6427
    // assume that intra BV is integer-pel precision
6428
27.1k
    xSetIntraSearchRangeIBC(cu, cu.lwidth(), cu.lheight(), cMvSrchRngLT, cMvSrchRngRB);
6429
6430
    //  Do integer search
6431
27.1k
    xIntraPatternSearchIBC(cu, cStruct, rcMv, ruiCost, &cMvSrchRngLT, &cMvSrchRngRB, pcMvPred);
6432
27.1k
  }
6433
27.1k
}
6434
// based on xSetSearchRange
6435
void InterSearch::xSetIntraSearchRangeIBC(CodingUnit& cu, int iRoiWidth, int iRoiHeight, Mv& rcMvSrchRngLT, Mv& rcMvSrchRngRB)
6436
27.1k
{
6437
 // const SPS& sps = *cu.cs->sps;
6438
6439
27.1k
  int srLeft, srRight, srTop, srBottom;
6440
6441
27.1k
  const int cuPelX = cu.Y().x;
6442
27.1k
  const int cuPelY = cu.Y().y;
6443
6444
27.1k
  const int lcuWidth = cu.cs->slice->sps->CTUSize;
6445
27.1k
  const int ctuSizeLog2 = floorLog2(lcuWidth);
6446
27.1k
  int numLeftCTUs = (1 << ((7 - ctuSizeLog2) << 1)) - ((ctuSizeLog2 < 7) ? 1 : 0);
6447
6448
27.1k
  srLeft = -(numLeftCTUs * lcuWidth + (cuPelX % lcuWidth));
6449
27.1k
  srTop = -(cuPelY % lcuWidth);
6450
6451
27.1k
  srRight = lcuWidth - (cuPelX % lcuWidth) - iRoiWidth;
6452
27.1k
  srBottom = lcuWidth - (cuPelY % lcuWidth) - iRoiHeight;
6453
6454
27.1k
  rcMvSrchRngLT.hor=srLeft;
6455
27.1k
  rcMvSrchRngLT.ver=srTop;
6456
27.1k
  rcMvSrchRngRB.hor=srRight;
6457
27.1k
  rcMvSrchRngRB.ver=srBottom;
6458
6459
27.1k
  rcMvSrchRngLT <<= 2;
6460
27.1k
  rcMvSrchRngRB <<= 2;
6461
27.1k
  bool temp = m_clipMvInSubPic;
6462
27.1k
  m_clipMvInSubPic = true;
6463
27.1k
  clipMv(rcMvSrchRngLT,cu.lumaPos(),cu.lumaSize(), *cu.cs->pcv, *cu.cs->pps, m_clipMvInSubPic);
6464
27.1k
  clipMv(rcMvSrchRngRB, cu.lumaPos(),cu.lumaSize(), *cu.cs->pcv, * cu.cs->pps, m_clipMvInSubPic);
6465
27.1k
  m_clipMvInSubPic = temp;
6466
27.1k
  rcMvSrchRngLT >>= 2;
6467
27.1k
  rcMvSrchRngRB >>= 2;
6468
27.1k
}
6469
6470
bool InterSearch::predIBCSearch(CodingUnit& cu, Partitioner& partitioner)
6471
27.1k
{
6472
27.1k
  Mv           cMvSrchRngLT;
6473
27.1k
  Mv           cMvSrchRngRB;
6474
27.1k
  cu.imv = IMV_4PEL;
6475
27.1k
  AMVPInfo amvpInfo4Pel;
6476
27.1k
  CU::fillIBCMvpCand(cu, amvpInfo4Pel);
6477
6478
27.1k
  cu.imv = IMV_OFF;// (Int)cu.cs->sps->getUseIMV(); // set as IMV=0 initially
6479
27.1k
  Mv    cMv, cMvPred[2];
6480
27.1k
  AMVPInfo amvpInfo;
6481
27.1k
  CU::fillIBCMvpCand(cu, amvpInfo);
6482
  // store in full pel accuracy, shift before use in search
6483
27.1k
  cMvPred[0] = amvpInfo.mvCand[0];
6484
27.1k
  cMvPred[0].changePrecision(MV_PRECISION_INTERNAL, MV_PRECISION_INT);
6485
27.1k
  cMvPred[1] = amvpInfo.mvCand[1];
6486
27.1k
  cMvPred[1].changePrecision(MV_PRECISION_INTERNAL, MV_PRECISION_INT);
6487
6488
27.1k
  int iBvpNum = 2;
6489
27.1k
  int bvpIdxBest = 0;
6490
27.1k
  cMv.setZero();
6491
27.1k
  Distortion cost = 0;
6492
27.1k
  if (cu.cs->sps->maxNumIBCMergeCand == 1)
6493
0
  {
6494
0
    iBvpNum = 1;
6495
0
    cMvPred[1] = cMvPred[0];
6496
0
  }
6497
6498
27.1k
  if (cMv.hor == 0 && cMv.ver == 0)
6499
27.1k
  {
6500
    // if hash search does not work or is not enabled
6501
27.1k
    PelUnitBuf origBuf = cu.cs->getOrgBuf(cu);
6502
27.1k
    xIBCEstimation(cu, origBuf, cMvPred, cMv, cost );
6503
27.1k
  }
6504
6505
27.1k
  if (cMv.hor == 0 && cMv.ver == 0)
6506
2.82k
  {
6507
2.82k
    return false;
6508
2.82k
  }
6509
  /// ibc search
6510
  /////////////////////////////////////////////////////////
6511
24.3k
  unsigned int bitsBVPBest, bitsBVPTemp;
6512
24.3k
  bitsBVPBest = MAX_INT;
6513
24.3k
  m_pcRdCost->setCostScale(0);
6514
6515
73.0k
  for (int bvpIdxTemp = 0; bvpIdxTemp < iBvpNum; bvpIdxTemp++)
6516
48.6k
  {
6517
48.6k
    m_pcRdCost->setPredictor(cMvPred[bvpIdxTemp]);
6518
6519
48.6k
    bitsBVPTemp = m_pcRdCost->getBitsOfVectorWithPredictor(cMv.hor, cMv.ver, 0);
6520
6521
48.6k
    if (bitsBVPTemp < bitsBVPBest)
6522
24.3k
    {
6523
24.3k
      bitsBVPBest = bitsBVPTemp;
6524
24.3k
      bvpIdxBest = bvpIdxTemp;
6525
6526
24.3k
      if (cu.cs->sps->AMVR && cMv != cMvPred[bvpIdxTemp])
6527
24.3k
        cu.imv = IMV_FPEL; // set as full-pel
6528
0
      else
6529
0
        cu.imv = IMV_OFF; // set as fractional-pel
6530
6531
24.3k
    }
6532
6533
48.6k
    unsigned int bitsBVPQP = MAX_UINT;
6534
6535
6536
48.6k
    Mv mvPredQuadPel;
6537
48.6k
    if ((cMv.hor % 4 == 0) && (cMv.ver % 4 == 0) && (cu.cs->sps->AMVR))
6538
48.6k
    {
6539
48.6k
      mvPredQuadPel = amvpInfo4Pel.mvCand[bvpIdxTemp];// cMvPred[bvpIdxTemp];
6540
6541
48.6k
      mvPredQuadPel.changePrecision(MV_PRECISION_INTERNAL, MV_PRECISION_4PEL);
6542
6543
48.6k
      m_pcRdCost->setPredictor(mvPredQuadPel);
6544
6545
48.6k
      bitsBVPQP = m_pcRdCost->getBitsOfVectorWithPredictor(cMv.hor >> 2, cMv.ver >> 2, 0);
6546
6547
48.6k
    }
6548
48.6k
    mvPredQuadPel.changePrecision(MV_PRECISION_4PEL, MV_PRECISION_INT);
6549
48.6k
    if (bitsBVPQP < bitsBVPBest && cMv != mvPredQuadPel)
6550
24.3k
    {
6551
24.3k
      bitsBVPBest = bitsBVPQP;
6552
24.3k
      bvpIdxBest = bvpIdxTemp;
6553
6554
24.3k
      if (cu.cs->sps->AMVR)
6555
24.3k
        cu.imv = IMV_4PEL;
6556
24.3k
    }
6557
6558
48.6k
  }
6559
6560
24.3k
  cMv.changePrecision( MV_PRECISION_INT, MV_PRECISION_INTERNAL );
6561
24.3k
  cu.mv[REF_PIC_LIST_0][0] = cMv; // store in fractional pel accuracy
6562
6563
24.3k
  cu.mvpIdx[REF_PIC_LIST_0] = bvpIdxBest;
6564
6565
24.3k
  if (cu.imv == IMV_4PEL && cMv != amvpInfo4Pel.mvCand[bvpIdxBest])
6566
24.3k
    cu.mvd[REF_PIC_LIST_0][0] = cMv - amvpInfo4Pel.mvCand[bvpIdxBest];
6567
0
  else
6568
0
    cu.mvd[REF_PIC_LIST_0][0] = cMv - amvpInfo.mvCand[bvpIdxBest];
6569
6570
24.3k
  if (cu.mvd[REF_PIC_LIST_0][0] == Mv(0, 0))
6571
0
    cu.imv = IMV_OFF;
6572
24.3k
  if (cu.imv == IMV_4PEL)
6573
24.3k
    assert((cMv.hor % 16 == 0) && (cMv.ver % 16 == 0));
6574
24.3k
  if (cu.cs->sps->AMVR)
6575
24.3k
    assert(cu.imv > 0 || cu.mvd[REF_PIC_LIST_0][0] == Mv());
6576
6577
24.3k
  cu.refIdx[REF_PIC_LIST_0] = MAX_NUM_REF;
6578
6579
24.3k
  return true;
6580
24.3k
}
6581
6582
6583
static inline bool isYPartBefore( SplitSeries series, const int ctuSizeLog2, const Position& refPos, const Position& pos )
6584
1.60M
{
6585
1.60M
#ifndef NDEBUG
6586
1.60M
  const int refCtuX = refPos.x >> ctuSizeLog2;
6587
1.60M
  const int refCtuY = refPos.y >> ctuSizeLog2;
6588
1.60M
  const int posCtuX = pos.x >> ctuSizeLog2;
6589
1.60M
  const int posCtuY = pos.y >> ctuSizeLog2;
6590
6591
1.60M
  CHECK( refCtuX != posCtuX || refCtuY != posCtuY, "This method can only be applied for positions within the same CTU" );
6592
6593
1.60M
#endif
6594
1.60M
  const int ctuMask = ( 1 << ctuSizeLog2 ) - 1;
6595
6596
1.60M
  const int refX = refPos.x & ctuMask;
6597
1.60M
  const int refY = refPos.y & ctuMask;
6598
1.60M
  const int posX = pos.x & ctuMask;
6599
1.60M
  const int posY = pos.y & ctuMask;
6600
6601
1.60M
  int x = 0, y = 0, w = 1 << ctuSizeLog2, h = 1 << ctuSizeLog2;
6602
  
6603
2.16M
  while( true )
6604
2.16M
  {
6605
2.16M
    PartSplit split = PartSplit( series & SPLIT_MASK );
6606
6607
2.16M
    switch( split )
6608
2.16M
    {
6609
2.10M
    case CU_QUAD_SPLIT:
6610
2.10M
      w >>= 1;
6611
2.10M
      if( posX >= x + w ) x += w;
6612
2.13M
    case CU_HORZ_SPLIT:
6613
2.13M
      h >>= 1;
6614
2.13M
      if( posY >= y + h ) y += h;
6615
2.13M
      break;
6616
6617
32.8k
    case CU_VERT_SPLIT:
6618
32.8k
      w >>= 1;
6619
32.8k
      if( posX >= x + w ) x += w;
6620
32.8k
      goto checkXonly;
6621
6622
0
    case CU_TRIH_SPLIT:
6623
0
      h >>= 2;
6624
0
      if( posY >= y + h ) { y += h; h <<= 1; }
6625
0
      if( posY >= y + h ) { y += h; h >>= 1; }
6626
0
      break;
6627
6628
0
    case CU_TRIV_SPLIT:
6629
0
      w >>= 2;
6630
0
      if( posX >= x + w ) { x += w; w <<= 1; }
6631
0
      if( posX >= x + w ) { x += w; w >>= 1; }
6632
0
      goto checkXonly;
6633
6634
0
    default:
6635
0
      return false;
6636
2.16M
    }
6637
6638
2.13M
    if( refY >= y + h ) return true;
6639
1.60M
    else if( refY < y ) return false;
6640
6641
1.63M
checkXonly:
6642
1.63M
    if( refX >= x + w ) return true;
6643
802k
    else if( refX < x ) return false;
6644
6645
562k
    series >>= SPLIT_DMULT; continue;
6646
1.63M
  }
6647
6648
50
  return false;
6649
1.60M
}
6650
6651
bool InterSearch::searchBvIBC(const CodingUnit& cu, int xPos, int yPos, int width, int height, int picWidth, int picHeight, int xBv, int yBv, int ctuSize) const
6652
2.36M
{
6653
2.36M
  const int ctuSizeLog2 = Log2(ctuSize);
6654
6655
2.36M
  int refRightX  = xPos + xBv + width  - 1;
6656
2.36M
  int refBottomY = yPos + yBv + height - 1;
6657
6658
2.36M
  int refLeftX = xPos + xBv;
6659
2.36M
  int refTopY  = yPos + yBv;
6660
6661
2.36M
  if ((xPos + xBv) < 0)
6662
13.7k
  {
6663
13.7k
    return false;
6664
13.7k
  }
6665
2.34M
  if (refRightX >= picWidth)
6666
0
  {
6667
0
    return false;
6668
0
  }
6669
6670
2.34M
  if ((yPos + yBv) < 0)
6671
0
  {
6672
0
    return false;
6673
0
  }
6674
2.34M
  if (refBottomY >= picHeight)
6675
0
  {
6676
0
    return false;
6677
0
  }
6678
2.34M
  if ((xBv + width) > 0 && (yBv + height) > 0)
6679
187k
  {
6680
187k
    return false;
6681
187k
  }
6682
6683
  // Don't search the above CTU row
6684
2.15M
  if (refTopY >> ctuSizeLog2 < yPos >> ctuSizeLog2)
6685
0
    return false;
6686
6687
  // Don't search the below CTU row
6688
2.15M
  if (refBottomY >> ctuSizeLog2 > yPos >> ctuSizeLog2)
6689
0
  {
6690
0
    return false;
6691
0
  }
6692
6693
2.15M
  unsigned curTileIdx = cu.cs->pps->getTileIdx(cu.lumaPos());
6694
2.15M
  unsigned refTileIdx = cu.cs->pps->getTileIdx(Position(refLeftX, refTopY));
6695
2.15M
  if (curTileIdx != refTileIdx)
6696
0
  {
6697
0
    return false;
6698
0
  }
6699
2.15M
  refTileIdx = cu.cs->pps->getTileIdx(Position(refLeftX, refBottomY));
6700
2.15M
  if (curTileIdx != refTileIdx)
6701
0
  {
6702
0
    return false;
6703
0
  }
6704
2.15M
  refTileIdx = cu.cs->pps->getTileIdx(Position(refRightX, refTopY));
6705
2.15M
  if (curTileIdx != refTileIdx)
6706
0
  {
6707
0
    return false;
6708
0
  }
6709
2.15M
  refTileIdx = cu.cs->pps->getTileIdx(Position(refRightX, refBottomY));
6710
2.15M
  if (curTileIdx != refTileIdx)
6711
0
  {
6712
0
    return false;
6713
0
  }
6714
6715
2.15M
  const Position cuPos{ xPos, yPos };
6716
6717
  //int numLeftCTUs = (1 << ((7 - ctuSizeLog2) << 1)) - ((ctuSizeLog2 < 7) ? 1 : 0);
6718
2.15M
  static const int numLeftCTUsLUT[3] = { 15, 3, 1 };
6719
6720
  // in the same CTU line
6721
2.15M
  const int numLeftCTUs = numLeftCTUsLUT[ctuSizeLog2 - 5];
6722
6723
2.15M
  if( ( refRightX >> ctuSizeLog2 <= xPos >> ctuSizeLog2 ) && ( refLeftX >> ctuSizeLog2 >= ( xPos >> ctuSizeLog2 ) - numLeftCTUs ) )
6724
2.15M
  {
6725
    // in the same CTU, or left CTU
6726
    // if part of ref block is in the left CTU, some area can be referred from the not-yet updated local CTU buffer
6727
2.15M
    if( ( ctuSizeLog2 == 7 ) && ( ( refLeftX >> ctuSizeLog2 ) == ( ( xPos >> ctuSizeLog2 ) - 1 ) ) )
6728
1.08M
    {
6729
      // ref block's collocated block in current CTU
6730
1.08M
      const Position refPosCol64x64{ ( refLeftX + ctuSize ) & ~63, refTopY & ~63 };
6731
1.08M
      if( refPosCol64x64 == Position{ xPos & ~63, yPos & ~63 } )
6732
589k
        return false;
6733
6734
      //CodingUnit* curef = cu.cs->getCU(refPosCol64x64, CH_L, cu.treeType);
6735
      //bool isDecomp = curef && ((cu.cs != curef->cs) || cu.idx < curef->idx);
6736
500k
      bool isDecomp = isYPartBefore( cu.splitSeries, ctuSizeLog2, cuPos, refPosCol64x64 );
6737
500k
      if( isDecomp )
6738
260k
      {
6739
260k
        return false;
6740
260k
      }
6741
500k
    }
6742
2.15M
  }
6743
33
  else
6744
33
    return false;
6745
6746
  // in the same CTU, or valid area from left CTU. Check if the reference block is already coded
6747
1.31M
  const Position refPosBR{ refRightX, refBottomY };
6748
  //CodingUnit* curef = cu.cs->getCU(refPosBR, CH_L, cu.treeType);
6749
  //bool isDecomp = curef && ((cu.cs != curef->cs) || cu.idx < curef->idx);
6750
1.31M
  bool isDecomp = ( ( refPosBR.x >> ctuSizeLog2 ) < ( cuPos.x >> ctuSizeLog2 ) ) || ( refRightX < xPos && refBottomY < yPos ) || isYPartBefore( cu.splitSeries, ctuSizeLog2, cuPos, refPosBR );
6751
6752
1.31M
  return isDecomp;
6753
2.15M
}
6754
6755
} // namespace vvenc
6756
6757
//! \}
6758