Coverage Report

Created: 2026-06-15 06:25

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/work/vvenc/source/Lib/EncoderLib/InterSearch.cpp
Line
Count
Source
1
/* -----------------------------------------------------------------------------
2
The copyright in this software is being made available under the Clear BSD
3
License, included below. No patent rights, trademark rights and/or 
4
other Intellectual Property Rights other than the copyrights concerning 
5
the Software are granted under this license.
6
7
The Clear BSD License
8
9
Copyright (c) 2019-2026, Fraunhofer-Gesellschaft zur Förderung der angewandten Forschung e.V. & The VVenC Authors.
10
All rights reserved.
11
12
Redistribution and use in source and binary forms, with or without modification,
13
are permitted (subject to the limitations in the disclaimer below) provided that
14
the following conditions are met:
15
16
     * Redistributions of source code must retain the above copyright notice,
17
     this list of conditions and the following disclaimer.
18
19
     * Redistributions in binary form must reproduce the above copyright
20
     notice, this list of conditions and the following disclaimer in the
21
     documentation and/or other materials provided with the distribution.
22
23
     * Neither the name of the copyright holder nor the names of its
24
     contributors may be used to endorse or promote products derived from this
25
     software without specific prior written permission.
26
27
NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE GRANTED BY
28
THIS LICENSE. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
29
CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
30
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
31
PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
32
CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
33
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
34
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
35
BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
36
IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
37
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
38
POSSIBILITY OF SUCH DAMAGE.
39
40
41
------------------------------------------------------------------------------------------- */
42
43
44
/** \file     EncSearch.cpp
45
 *  \brief    encoder inter search class
46
 */
47
48
#include "InterSearch.h"
49
#include "EncModeCtrl.h"
50
#include "EncLib.h"
51
#include "CommonLib/CommonDef.h"
52
#include "CommonLib/Rom.h"
53
#include "CommonLib/MotionInfo.h"
54
#include "CommonLib/Picture.h"
55
#include "CommonLib/UnitTools.h"
56
#include "CommonLib/Reshape.h"
57
#include "CommonLib/dtrace_next.h"
58
#include "CommonLib/dtrace_buffer.h"
59
#include "CommonLib/TimeProfiler.h"
60
61
#include <math.h>
62
63
 //! \ingroup EncoderLib
64
 //! \{
65
66
namespace vvenc {
67
68
static const Mv s_acMvRefineH[9] =
69
{
70
  Mv(  0,  0 ), // 0
71
  Mv(  0, -1 ), // 1
72
  Mv(  0,  1 ), // 2
73
  Mv( -1,  0 ), // 3
74
  Mv(  1,  0 ), // 4
75
  Mv( -1, -1 ), // 5
76
  Mv(  1, -1 ), // 6
77
  Mv( -1,  1 ), // 7
78
  Mv(  1,  1 )  // 8
79
};
80
81
static const Mv s_acMvRefineQ[9] =
82
{
83
  Mv(  0,  0 ), // 0
84
  Mv(  0, -1 ), // 1
85
  Mv(  0,  1 ), // 2
86
  Mv( -1, -1 ), // 5
87
  Mv(  1, -1 ), // 6
88
  Mv( -1,  0 ), // 3
89
  Mv(  1,  0 ), // 4
90
  Mv( -1,  1 ), // 7
91
  Mv(  1,  1 )  // 8
92
};
93
94
static const bool s_skipQpelPosition[ 42 ][ 9 ] =
95
{
96
  { false, true,  true,  true,  true,  true,  true,  true,  true  },
97
  { true,  true,  true,  true,  true,  false, true,  true,  true  },
98
  { true,  true,  true,  true,  true,  true,  false, true,  true  },
99
  { true,  false, true,  true,  true,  true,  true,  true,  true  },
100
  { true,  false, true,  false, true,  false, true,  true,  true  },
101
  { true,  false, true,  true,  false, true,  false, true,  true  },
102
  { true,  true,  false, true,  true,  true,  true,  true,  true  },
103
  { true,  true,  false, true,  true,  false, true,  false, true  },
104
  { true,  true,  false, true,  true,  true,  false, true,  false },
105
  { true,  true,  false, true,  true,  true,  true,  false, false },
106
  { true,  true,  true,  true,  true,  false, true,  true,  true  },
107
  { true,  true,  false, true,  true,  false, true,  false, true  },
108
  { true,  true,  true,  true,  true,  true,  false, true,  true  },
109
  { true,  true,  false, true,  true,  true,  false, true,  false },
110
  { true,  false, true,  false, false, true,  true,  true,  true  },
111
  { true,  true,  true,  true,  true,  false, true,  true,  true  },
112
  { true,  false, true,  false, true,  false, true,  true,  true  },
113
  { true,  true,  true,  true,  true,  true,  false, true,  true  },
114
  { true,  false, true,  true,  false, true,  false, true,  true  },
115
  { true,  true,  true,  true,  false, true,  false, true,  false },
116
  { true,  false, true,  true,  true,  true,  true,  true,  true  },
117
  { true,  false, true,  true,  false, true,  false, true,  true  },
118
  { true,  true,  false, true,  true,  true,  true,  true,  true  },
119
  { true,  true,  false, true,  true,  true,  false, true,  false },
120
  { true,  true,  true,  false, true,  false, true,  false, true  },
121
  { true,  false, true,  true,  true,  true,  true,  true,  true  },
122
  { true,  false, true,  false, true,  false, true,  true,  true  },
123
  { true,  true,  false, true,  true,  true,  true,  true,  true  },
124
  { true,  true,  false, true,  true,  false, true,  false, true  },
125
  { true,  true,  true,  true,  true,  true,  false, true,  true  },
126
  { true,  true,  false, true,  true,  true,  true,  true,  true  },
127
  { true,  true,  false, true,  true,  true,  false, true,  false },
128
  { true,  true,  true,  true,  true,  false, true,  true,  true  },
129
  { true,  true,  false, true,  true,  true,  true,  true,  true  },
130
  { true,  true,  false, true,  true,  false, true,  false, true  },
131
  { true,  true,  true,  true,  true,  true,  false, true,  true  },
132
  { true,  false, true,  true,  true,  true,  true,  true,  true  },
133
  { true,  false, true,  true,  false, true,  false, true,  true  },
134
  { true,  true,  true,  true,  true,  false, true,  true,  true  },
135
  { true,  false, true,  true,  true,  true,  true,  true,  true  },
136
  { true,  false, true,  false, true,  false, true,  true,  true  },
137
  { false, false, false, false, false, false, false, false, false },
138
};
139
140
//   1,0    3,0    0,1    1,1    2,1    3,1    1,2    3,2    0,3    1,3    2,3    3,3    H1,0   H3,0
141
static const bool s_doInterpQ[ 42 ][ 14 ] =
142
{
143
  { false, false, false, false, false, false, false, false, false, false, false, false, false, false },
144
  { false, false, false, false, false, false, false, false, true,  false, false, false, false, true  },
145
  { false, false, true,  false, false, false, false, false, false, false, false, false, true,  false },
146
  { false, true,  false, false, false, false, false, false, false, false, false, false, false, false },
147
  { false, true,  false, false, false, false, false, false, true,  false, false, true,  false, true  },
148
  { false, true,  true,  false, false, true,  false, false, false, false, false, false, true,  false },
149
  { true,  false, false, false, false, false, false, false, false, false, false, false, false, false },
150
  { true,  false, false, false, false, false, false, false, true,  true,  false, false, false, true  },
151
  { true,  false, true,  true,  false, false, false, false, false, false, false, false, true,  false },
152
  { false, true,  false, false, false, true,  false, false, false, false, false, true,  true,  true  },
153
  { false, false, false, false, false, false, false, false, false, false, true,  false, false, true  },
154
  { false, true,  false, false, false, false, false, false, false, false, true,  true,  false, true  },
155
  { false, false, false, false, true,  false, false, false, false, false, false, false, true,  false },
156
  { false, true,  false, false, true,  true,  false, false, false, false, false, false, true,  false },
157
  { true,  false, false, true,  false, false, false, false, false, true,  false, false, true,  true  },
158
  { false, false, false, false, false, false, false, false, false, false, true,  false, false, true  },
159
  { true,  false, false, false, false, false, false, false, false, true,  true,  false, false, true  },
160
  { false, false, false, false, true,  false, false, false, false, false, false, false, true,  false },
161
  { true,  false, false, true,  true,  false, false, false, false, false, false, false, true,  false },
162
  { false, false, false, false, false, false, false, false, true,  true,  false, true,  false, true  },
163
  { false, false, false, false, false, false, false, true,  false, false, false, false, false, false },
164
  { false, false, false, false, false, false, false, true,  true,  false, false, true,  false, true  },
165
  { false, false, false, false, false, false, true,  false, false, false, false, false, false, false },
166
  { false, false, false, false, false, false, true,  false, true,  true,  false, false, false, true  },
167
  { false, false, true,  true,  false, true,  false, false, false, false, false, false, true,  false },
168
  { false, false, false, false, false, false, false, true,  false, false, false, false, false, false },
169
  { false, false, true,  false, false, true,  false, true,  false, false, false, false, true,  false },
170
  { false, false, false, false, false, false, true,  false, false, false, false, false, false, false },
171
  { false, false, true,  true,  false, false, true,  false, false, false, false, false, true,  false },
172
  { false, false, false, false, false, false, false, false, false, false, true,  false, false, true  },
173
  { false, false, false, false, false, false, false, true,  false, false, false, false, false, false },
174
  { false, false, false, false, false, false, false, true,  false, false, true,  true,  false, true  },
175
  { false, false, false, false, true,  false, false, false, false, false, false, false, true,  false },
176
  { false, false, false, false, false, false, false, true,  false, false, false, false, false, false },
177
  { false, false, false, false, true,  true,  false, true,  false, false, false, false, true,  false },
178
  { false, false, false, false, false, false, false, false, false, false, true,  false, false, true  },
179
  { false, false, false, false, false, false, true,  false, false, false, false, false, false, false },
180
  { false, false, false, false, false, false, true,  false, false, true,  true,  false, false, true  },
181
  { false, false, false, false, true,  false, false, false, false, false, false, false, true,  false },
182
  { false, false, false, false, false, false, true,  false, false, false, false, false, false, false },
183
  { false, false, false, true,  true,  false, true,  false, false, false, false, false, true,  false },
184
  { true,  true,  true,  true,  true,  true,  true,  true,  true,  true,  true,  true,  true,  true  },
185
};
186
187
const int BlkUniMvInfoBuffer::m_uniMvListMaxSize;
188
189
InterSearch::InterSearch()
190
17.3k
  : m_modeCtrl                    (nullptr)
191
17.3k
  , m_defaultCachedBvs            (nullptr)
192
17.3k
  , m_pcEncCfg                    (nullptr)
193
17.3k
  , m_pcTrQuant                   (nullptr)
194
17.3k
  , m_iSearchRange                (0)
195
17.3k
  , m_bipredSearchRange           (0)
196
17.3k
  , m_motionEstimationSearchMethod(VVENC_MESEARCH_FULL)
197
17.3k
  , m_motionEstimationSearchMethodSCC( 0 )
198
17.3k
  , m_CABACEstimator              (nullptr)
199
17.3k
  , m_CtxCache                    (nullptr)
200
17.3k
  , m_pTempPel                    (nullptr)
201
17.3k
{
202
52.1k
  for (int i=0; i<MAX_NUM_REF_LIST_ADAPT_SR; i++)
203
34.7k
  {
204
34.7k
    memset (m_aaiAdaptSR[i], 0, MAX_IDX_ADAPT_SR * sizeof (int));
205
34.7k
  }
206
69.5k
  for (int i=0; i<AMVP_MAX_NUM_CANDS+1; i++)
207
52.1k
  {
208
52.1k
    memset (m_auiMVPIdxCost[i], 0, (AMVP_MAX_NUM_CANDS+1) * sizeof (uint32_t) );
209
52.1k
  }
210
17.3k
}
211
212
213
InterSearch::~InterSearch()
214
17.3k
{
215
17.3k
  destroy();
216
17.3k
}
217
218
void InterSearch::init( const VVEncCfg& encCfg, TrQuant* pTrQuant, RdCost* pRdCost, EncModeCtrl* pModeCtrl, CodingStructure **pSaveCS )
219
17.3k
{
220
17.3k
  InterPrediction::init( pRdCost, encCfg.m_internChromaFormat, encCfg.m_CTUSize, encCfg.m_ifpLines );
221
17.3k
  m_numBVs                       = 0;
222
17.3k
  m_pcEncCfg                     = &encCfg;
223
17.3k
  m_pcTrQuant                    = pTrQuant;
224
17.3k
  m_pcRdCost                     = pRdCost;
225
17.3k
  m_modeCtrl                     = pModeCtrl;
226
17.3k
  m_pSaveCS                      = pSaveCS;
227
228
17.3k
  m_iSearchRange                    = encCfg.m_SearchRange;
229
17.3k
  m_bipredSearchRange               = encCfg.m_bipredSearchRange;
230
17.3k
  m_motionEstimationSearchMethod    = vvencMESearchMethod( encCfg.m_motionEstimationSearchMethod );
231
17.3k
  m_motionEstimationSearchMethodSCC = encCfg.m_motionEstimationSearchMethodSCC;
232
233
52.1k
  for( uint32_t iDir = 0; iDir < MAX_NUM_REF_LIST_ADAPT_SR; iDir++ )
234
34.7k
  {
235
243k
    for( uint32_t iRefIdx = 0; iRefIdx < MAX_IDX_ADAPT_SR; iRefIdx++ )
236
208k
    {
237
208k
      m_aaiAdaptSR[iDir][iRefIdx] = m_iSearchRange;
238
208k
    }
239
34.7k
  }
240
241
  // initialize motion cost
242
69.5k
  for( int iNum = 0; iNum < AMVP_MAX_NUM_CANDS + 1; iNum++ )
243
52.1k
  {
244
156k
    for( int iIdx = 0; iIdx < AMVP_MAX_NUM_CANDS; iIdx++ )
245
104k
    {
246
104k
      if( iIdx < iNum )
247
52.1k
      {
248
52.1k
        m_auiMVPIdxCost[iIdx][iNum] = xGetMvpIdxBits( iIdx, iNum );
249
52.1k
      }
250
52.1k
      else
251
52.1k
      {
252
52.1k
        m_auiMVPIdxCost[iIdx][iNum] = MAX_UINT;
253
52.1k
      }
254
104k
    }
255
52.1k
  }
256
257
17.3k
  const ChromaFormat cform   = encCfg.m_internChromaFormat;
258
17.3k
  const int          ctuSize = encCfg.m_CTUSize;
259
52.1k
  for (uint32_t i = 0; i < NUM_REF_PIC_LIST_01; i++)
260
34.7k
  {
261
34.7k
    m_tmpPredStorage[i].create( UnitArea( cform, Area( 0, 0, ctuSize, ctuSize ) ) );
262
34.7k
  }
263
17.3k
  m_tmpStorageLCU.create( UnitArea( cform, Area( 0, 0, ctuSize, ctuSize ) ) );
264
17.3k
  m_pTempPel = new Pel[ctuSize * ctuSize];
265
17.3k
  m_tmpAffiStorage.create(UnitArea(cform, Area(0, 0, ctuSize, ctuSize + 2)));  // allow overread by 2 samples
266
17.3k
  m_tmpAffiError = new Pel[ctuSize * ctuSize];
267
17.3k
  m_tmpAffiDeri[0] = new Pel[ctuSize * ctuSize];
268
17.3k
  m_tmpAffiDeri[1] = new Pel[ctuSize * ctuSize];
269
270
17.3k
  CompArea chromaArea( COMP_Cb, cform, Area( 0, 0, encCfg.m_CTUSize, encCfg.m_CTUSize ), true );
271
86.8k
  for( int i = 0; i < 4; i++ )
272
69.5k
  {
273
69.5k
    m_orgResiCb[i].create( chromaArea );
274
69.5k
    m_orgResiCr[i].create( chromaArea );
275
69.5k
  }
276
17.3k
}
277
278
void InterSearch::destroy()
279
17.3k
{
280
17.3k
  if ( m_pTempPel )
281
17.3k
  {
282
17.3k
    delete [] m_pTempPel;
283
17.3k
    m_pTempPel = nullptr;
284
17.3k
  }
285
286
52.1k
  for( int i = 0; i < NUM_REF_PIC_LIST_01; i++ )
287
34.7k
  {
288
34.7k
    m_tmpPredStorage[i].destroy();
289
34.7k
  }
290
17.3k
  m_tmpStorageLCU.destroy();
291
17.3k
  m_tmpAffiStorage.destroy();
292
17.3k
  if (m_tmpAffiError != NULL)
293
17.3k
  {
294
17.3k
    delete[] m_tmpAffiError;
295
17.3k
    m_tmpAffiError = nullptr;
296
17.3k
  }
297
17.3k
  if (m_tmpAffiDeri[0] != NULL)
298
17.3k
  {
299
17.3k
    delete[] m_tmpAffiDeri[0];
300
17.3k
    m_tmpAffiDeri[0] = nullptr;
301
17.3k
  }
302
17.3k
  if (m_tmpAffiDeri[1] != NULL)
303
17.3k
  {
304
17.3k
    delete[] m_tmpAffiDeri[1];
305
17.3k
    m_tmpAffiDeri[1] = nullptr;
306
17.3k
  }
307
308
17.3k
  m_pSaveCS  = nullptr;
309
17.3k
}
310
311
void InterSearch::setCtuEncRsrc( CABACWriter* cabacEstimator, CtxCache* ctxCache, ReuseUniMv* pReuseUniMv, BlkUniMvInfoBuffer* pBlkUniMvInfoBuffer, AffineProfList* pAffineProfList, IbcBvCand* pCachedBvs )
312
3.33k
{
313
3.33k
  m_CABACEstimator     = cabacEstimator;
314
3.33k
  m_CtxCache           = ctxCache;
315
3.33k
  m_ReuseUniMv         = pReuseUniMv;
316
3.33k
  m_BlkUniMvInfoBuffer = pBlkUniMvInfoBuffer;
317
3.33k
  m_AffineProfList     = pAffineProfList;
318
3.33k
  m_defaultCachedBvs   = pCachedBvs;
319
3.33k
}
320
321
ReuseUniMv::ReuseUniMv()
322
7.60k
{
323
7.60k
  const int numPos     = MAX_CU_SIZE >> MIN_CU_LOG2;
324
7.60k
  const int maxSizeIdx = MAX_CU_SIZE_IDX-2;
325
53.2k
  for( int wIdx = 0; wIdx < maxSizeIdx; wIdx++ )
326
45.6k
  {
327
319k
    for( int hIdx = 0; hIdx < maxSizeIdx; hIdx++ )
328
273k
    {
329
9.03M
      for( int y = 0; y < numPos; y++ )
330
8.76M
      {
331
289M
        for( int x = 0; x < numPos; x++ )
332
280M
        {
333
280M
          m_reusedUniMVs[ wIdx ][ hIdx ][ x ][ y ] = nullptr;
334
280M
        }
335
8.76M
      }
336
273k
    }
337
45.6k
  }
338
7.60k
}
339
340
ReuseUniMv::~ReuseUniMv()
341
7.60k
{
342
7.60k
  resetReusedUniMvs();
343
7.60k
}
344
345
void ReuseUniMv::resetReusedUniMvs()
346
11.4k
{
347
11.4k
  const int numPos     = MAX_CU_SIZE >> MIN_CU_LOG2;
348
11.4k
  const int maxSizeIdx = MAX_CU_SIZE_IDX-2;
349
79.8k
  for ( int wIdx = 0; wIdx < maxSizeIdx; wIdx++ )
350
68.4k
  {
351
479k
    for ( int hIdx = 0; hIdx < maxSizeIdx; hIdx++ )
352
410k
    {
353
13.5M
      for ( int y = 0; y < numPos; y++ )
354
13.1M
      {
355
433M
        for ( int x = 0; x < numPos; x++ )
356
420M
        {
357
420M
          if ( m_reusedUniMVs[ wIdx ][ hIdx ][ x ][ y ] )
358
0
          {
359
0
            delete [] m_reusedUniMVs[ wIdx ][ hIdx ][ x ][ y ];
360
0
            m_reusedUniMVs[ wIdx ][ hIdx ][ x ][ y ] = nullptr;
361
0
          }
362
420M
        }
363
13.1M
      }
364
410k
    }
365
68.4k
  }
366
11.4k
}
367
368
void InterSearch::loadGlobalUniMvs( const Area& lumaArea, const PreCalcValues& pcv)
369
0
{
370
0
  unsigned idx1, idx2, idx3, idx4;
371
0
  getAreaIdxNew(lumaArea, pcv, idx1, idx2, idx3, idx4);
372
0
  if( m_ReuseUniMv->m_reusedUniMVs[idx1][idx2][idx3][idx4])
373
0
  {
374
//    DTRACE( g_trace_ctx, D_TMP, "%d unimv load %d %d %d %d \n", g_trace_ctx->getChannelCounter(D_TMP), idx3,idx4,idx1,idx2 );
375
0
    m_BlkUniMvInfoBuffer->insertUniMvCands(lumaArea, m_ReuseUniMv->m_reusedUniMVs[idx1][idx2][idx3][idx4]);
376
0
  }
377
0
}
378
379
void InterSearch::getBestSbt( CodingStructure* tempCS, CodingUnit* cu, uint8_t& histBestSbt, Distortion& curPuSse, uint8_t sbtAllowed, bool doPreAnalyzeResi, bool mtsAllowed )
380
0
{
381
0
  m_estMinDistSbt[NUMBER_SBT_MODE] = MAX_DISTORTION;
382
0
  m_skipSbtAll = false;
383
384
0
  if( doPreAnalyzeResi )
385
0
  {
386
0
    xCalcMinDistSbt( *tempCS, *cu, sbtAllowed );
387
0
  }
388
389
0
  curPuSse = getEstDistSbt( NUMBER_SBT_MODE );
390
391
0
  if( doPreAnalyzeResi )
392
0
  {
393
0
    if( m_skipSbtAll && !mtsAllowed )
394
0
    {
395
0
      histBestSbt = 0; //try DCT2
396
0
    }
397
0
    else
398
0
    {
399
0
      int  slShift = 4 + std::min( Log2( cu->lwidth() * cu->lheight() ), 9 );
400
0
      assert( curPuSse != MAX_DISTORTION );
401
0
      histBestSbt = m_modeCtrl->findBestSbt( cu->cs->area, (uint32_t)( curPuSse >> slShift ) );
402
0
      if( m_skipSbtAll && CU::isSbtMode( histBestSbt ) ) //special case, skip SBT when loading SBT
403
0
      {
404
0
        histBestSbt = 0; //try DCT2
405
0
      }
406
0
    }
407
0
  }
408
0
}
409
410
411
inline void InterSearch::xTZSearchHelp( TZSearchStruct& rcStruct, const int iSearchX, const int iSearchY, const uint8_t ucPointNr, const uint32_t uiDistance )
412
0
{
413
0
  Distortion  uiSad = 0;
414
415
0
  const Pel* const  piRefSrch = rcStruct.piRefY + iSearchY * rcStruct.iRefStride + iSearchX;
416
417
0
  m_cDistParam.cur.buf = piRefSrch;
418
419
0
  uiSad = m_cDistParam.distFunc( m_cDistParam );
420
421
  // only add motion cost if uiSad is smaller than best. Otherwise pointless
422
  // to add motion cost.
423
0
  if( uiSad < rcStruct.uiBestSad )
424
0
  {
425
    // motion cost
426
0
    uiSad += m_pcRdCost->getCostOfVectorWithPredictor( iSearchX, iSearchY, rcStruct.imvShift );
427
428
0
    if( uiSad < rcStruct.uiBestSad )
429
0
    {
430
0
      rcStruct.uiBestSad      = uiSad;
431
0
      rcStruct.iBestX         = iSearchX;
432
0
      rcStruct.iBestY         = iSearchY;
433
0
      rcStruct.uiBestDistance = uiDistance;
434
0
      rcStruct.uiBestRound    = 0;
435
0
      rcStruct.ucPointNr      = ucPointNr;
436
0
      m_cDistParam.maximumDistortionForEarlyExit = uiSad;
437
0
    }
438
0
  }
439
0
}
440
441
442
443
inline void InterSearch::xTZ2PointSearch( TZSearchStruct& rcStruct )
444
0
{
445
0
  const SearchRange& sr = rcStruct.searchRange;
446
447
0
  static const int xOffset[2][9] = { {  0, -1, -1,  0, -1, +1, -1, -1, +1 }, {  0,  0, +1, +1, -1, +1,  0, +1,  0 } };
448
0
  static const int yOffset[2][9] = { {  0,  0, -1, -1, +1, -1,  0, +1,  0 }, {  0, -1, -1,  0, -1, +1, +1, +1, +1 } };
449
450
  // 2 point search,                   //   1 2 3
451
  // check only the 2 untested points  //   4 0 5
452
  // around the start point            //   6 7 8
453
0
  const int iX1 = rcStruct.iBestX + xOffset[0][rcStruct.ucPointNr];
454
0
  const int iX2 = rcStruct.iBestX + xOffset[1][rcStruct.ucPointNr];
455
456
0
  const int iY1 = rcStruct.iBestY + yOffset[0][rcStruct.ucPointNr];
457
0
  const int iY2 = rcStruct.iBestY + yOffset[1][rcStruct.ucPointNr];
458
459
0
  if( iX1 >= sr.left && iX1 <= sr.right && iY1 >= sr.top && iY1 <= sr.bottom )
460
0
  {
461
0
    xTZSearchHelp( rcStruct, iX1, iY1, 0, 2 );
462
0
  }
463
464
0
  if( iX2 >= sr.left && iX2 <= sr.right && iY2 >= sr.top && iY2 <= sr.bottom )
465
0
  {
466
0
    xTZSearchHelp( rcStruct, iX2, iY2, 0, 2 );
467
0
  }
468
0
}
469
470
inline void InterSearch::xTZ4PointSquareSearch( TZSearchStruct & rcStruct, const int iStartX, const int iStartY, const int iDist )
471
0
{
472
0
  const SearchRange& sr = rcStruct.searchRange;
473
0
  CHECK( iDist == 0 || iDist > 2, "Invalid distance" );
474
  // 4 point search,                   //     1 2 3
475
  // search around the start point     //     4 0 5
476
  // with the required  distance       //     6 7 8
477
0
  const int iTop = iStartY - iDist;
478
0
  const int iBottom = iStartY + iDist;
479
0
  const int iLeft = iStartX - iDist;
480
0
  const int iRight = iStartX + iDist;
481
0
  rcStruct.uiBestRound += 1;
482
483
0
  if ( iTop >= sr.top )
484
0
  {
485
0
    if ( iLeft >= sr.left ) // check top left
486
0
    {
487
0
      xTZSearchHelp( rcStruct, iLeft, iTop, 1, iDist );
488
0
    }
489
0
    if ( iRight <= sr.right ) // check top right
490
0
    {
491
0
      xTZSearchHelp( rcStruct, iRight, iTop, 3, iDist );
492
0
    }
493
0
  }
494
0
  if ( iBottom <= sr.bottom )
495
0
  {
496
0
    if ( iLeft >= sr.left ) // check bottom left
497
0
    {
498
0
      xTZSearchHelp( rcStruct, iLeft, iBottom, 6, iDist );
499
0
    }
500
0
    if ( iRight <= sr.right ) // check bottom right
501
0
    {
502
0
      xTZSearchHelp( rcStruct, iRight, iBottom, 8, iDist );
503
0
    }
504
0
  }
505
0
}
506
507
inline void InterSearch::xTZ8PointSquareSearch( TZSearchStruct& rcStruct, const int iStartX, const int iStartY, const int iDist )
508
0
{
509
0
  const SearchRange& sr = rcStruct.searchRange;
510
0
  // 8 point search,                   //   1 2 3
511
0
  // search around the start point     //   4 0 5
512
0
  // with the required  distance       //   6 7 8
513
0
  CHECK( iDist == 0 , "Invalid distance");
514
0
  const int iTop        = iStartY - iDist;
515
0
  const int iBottom     = iStartY + iDist;
516
0
  const int iLeft       = iStartX - iDist;
517
0
  const int iRight      = iStartX + iDist;
518
0
  rcStruct.uiBestRound += 1;
519
0
520
0
  if ( iTop >= sr.top ) // check top
521
0
  {
522
0
    if ( iLeft >= sr.left ) // check top left
523
0
    {
524
0
      xTZSearchHelp( rcStruct, iLeft, iTop, 1, iDist );
525
0
    }
526
0
    // top middle
527
0
    xTZSearchHelp( rcStruct, iStartX, iTop, 2, iDist );
528
0
529
0
    if ( iRight <= sr.right ) // check top right
530
0
    {
531
0
      xTZSearchHelp( rcStruct, iRight, iTop, 3, iDist );
532
0
    }
533
0
  } // check top
534
0
  if ( iLeft >= sr.left ) // check middle left
535
0
  {
536
0
    xTZSearchHelp( rcStruct, iLeft, iStartY, 4, iDist );
537
0
  }
538
0
  if ( iRight <= sr.right ) // check middle right
539
0
  {
540
0
    xTZSearchHelp( rcStruct, iRight, iStartY, 5, iDist );
541
0
  }
542
0
  if ( iBottom <= sr.bottom ) // check bottom
543
0
  {
544
0
    if ( iLeft >= sr.left ) // check bottom left
545
0
    {
546
0
      xTZSearchHelp( rcStruct, iLeft, iBottom, 6, iDist );
547
0
    }
548
0
    // check bottom middle
549
0
    xTZSearchHelp( rcStruct, iStartX, iBottom, 7, iDist );
550
0
551
0
    if ( iRight <= sr.right ) // check bottom right
552
0
    {
553
0
      xTZSearchHelp( rcStruct, iRight, iBottom, 8, iDist );
554
0
    }
555
0
  } // check bottom
556
0
}
557
558
inline void InterSearch::xTZ8PointDiamondSearch( TZSearchStruct& rcStruct,
559
                                                 const int iStartX,
560
                                                 const int iStartY,
561
                                                 const int iDist,
562
                                                 const bool bCheckCornersAtDist1 )
563
0
{
564
0
  const SearchRange& sr = rcStruct.searchRange;
565
  // 8 point search,                   //   1 2 3
566
  // search around the start point     //   4 0 5
567
  // with the required  distance       //   6 7 8
568
0
  CHECK( iDist == 0, "Invalid distance" );
569
0
  const int iTop        = iStartY - iDist;
570
0
  const int iBottom     = iStartY + iDist;
571
0
  const int iLeft       = iStartX - iDist;
572
0
  const int iRight      = iStartX + iDist;
573
0
  rcStruct.uiBestRound += 1;
574
575
0
  if ( iDist == 1 )
576
0
  {
577
0
    if ( iTop >= sr.top ) // check top
578
0
    {
579
0
      if (bCheckCornersAtDist1)
580
0
      {
581
0
        if ( iLeft >= sr.left) // check top-left
582
0
        {
583
0
          xTZSearchHelp( rcStruct, iLeft, iTop, 1, iDist );
584
0
        }
585
0
        xTZSearchHelp( rcStruct, iStartX, iTop, 2, iDist );
586
0
        if ( iRight <= sr.right ) // check middle right
587
0
        {
588
0
          xTZSearchHelp( rcStruct, iRight, iTop, 3, iDist );
589
0
        }
590
0
      }
591
0
      else
592
0
      {
593
0
        xTZSearchHelp( rcStruct, iStartX, iTop, 2, iDist );
594
0
      }
595
0
    }
596
0
    if ( iLeft >= sr.left ) // check middle left
597
0
    {
598
0
      xTZSearchHelp( rcStruct, iLeft, iStartY, 4, iDist );
599
0
    }
600
0
    if ( iRight <= sr.right ) // check middle right
601
0
    {
602
0
      xTZSearchHelp( rcStruct, iRight, iStartY, 5, iDist );
603
0
    }
604
0
    if ( iBottom <= sr.bottom ) // check bottom
605
0
    {
606
0
      if (bCheckCornersAtDist1)
607
0
      {
608
0
        if ( iLeft >= sr.left) // check top-left
609
0
        {
610
0
          xTZSearchHelp( rcStruct, iLeft, iBottom, 6, iDist );
611
0
        }
612
0
        xTZSearchHelp( rcStruct, iStartX, iBottom, 7, iDist );
613
0
        if ( iRight <= sr.right ) // check middle right
614
0
        {
615
0
          xTZSearchHelp( rcStruct, iRight, iBottom, 8, iDist );
616
0
        }
617
0
      }
618
0
      else
619
0
      {
620
0
        xTZSearchHelp( rcStruct, iStartX, iBottom, 7, iDist );
621
0
      }
622
0
    }
623
0
  }
624
0
  else
625
0
  {
626
0
    if ( iDist <= 8 )
627
0
    {
628
0
      const int iTop_2      = iStartY - (iDist>>1);
629
0
      const int iBottom_2   = iStartY + (iDist>>1);
630
0
      const int iLeft_2     = iStartX - (iDist>>1);
631
0
      const int iRight_2    = iStartX + (iDist>>1);
632
633
0
      if (  iTop >= sr.top && iLeft >= sr.left &&
634
0
           iRight <= sr.right && iBottom <= sr.bottom ) // check border
635
0
      {
636
0
        xTZSearchHelp( rcStruct, iStartX,  iTop,      2, iDist    );
637
0
        xTZSearchHelp( rcStruct, iLeft_2,  iTop_2,    1, iDist>>1 );
638
0
        xTZSearchHelp( rcStruct, iRight_2, iTop_2,    3, iDist>>1 );
639
0
        xTZSearchHelp( rcStruct, iLeft,    iStartY,   4, iDist    );
640
0
        xTZSearchHelp( rcStruct, iRight,   iStartY,   5, iDist    );
641
0
        xTZSearchHelp( rcStruct, iLeft_2,  iBottom_2, 6, iDist>>1 );
642
0
        xTZSearchHelp( rcStruct, iRight_2, iBottom_2, 8, iDist>>1 );
643
0
        xTZSearchHelp( rcStruct, iStartX,  iBottom,   7, iDist    );
644
0
      }
645
0
      else // check border
646
0
      {
647
0
        if ( iTop >= sr.top ) // check top
648
0
        {
649
0
          xTZSearchHelp( rcStruct, iStartX, iTop, 2, iDist );
650
0
        }
651
0
        if ( iTop_2 >= sr.top ) // check half top
652
0
        {
653
0
          if ( iLeft_2 >= sr.left ) // check half left
654
0
          {
655
0
            xTZSearchHelp( rcStruct, iLeft_2, iTop_2, 1, (iDist>>1) );
656
0
          }
657
0
          if ( iRight_2 <= sr.right ) // check half right
658
0
          {
659
0
            xTZSearchHelp( rcStruct, iRight_2, iTop_2, 3, (iDist>>1) );
660
0
          }
661
0
        } // check half top
662
0
        if ( iLeft >= sr.left ) // check left
663
0
        {
664
0
          xTZSearchHelp( rcStruct, iLeft, iStartY, 4, iDist );
665
0
        }
666
0
        if ( iRight <= sr.right ) // check right
667
0
        {
668
0
          xTZSearchHelp( rcStruct, iRight, iStartY, 5, iDist );
669
0
        }
670
0
        if ( iBottom_2 <= sr.bottom ) // check half bottom
671
0
        {
672
0
          if ( iLeft_2 >= sr.left ) // check half left
673
0
          {
674
0
            xTZSearchHelp( rcStruct, iLeft_2, iBottom_2, 6, (iDist>>1) );
675
0
          }
676
0
          if ( iRight_2 <= sr.right ) // check half right
677
0
          {
678
0
            xTZSearchHelp( rcStruct, iRight_2, iBottom_2, 8, (iDist>>1) );
679
0
          }
680
0
        } // check half bottom
681
0
        if ( iBottom <= sr.bottom ) // check bottom
682
0
        {
683
0
          xTZSearchHelp( rcStruct, iStartX, iBottom, 7, iDist );
684
0
        }
685
0
      } // check border
686
0
    }
687
0
    else // iDist > 8
688
0
    {
689
0
      if ( iTop >= sr.top && iLeft >= sr.left &&
690
0
           iRight <= sr.right && iBottom <= sr.bottom ) // check border
691
0
      {
692
0
        xTZSearchHelp( rcStruct, iStartX, iTop,    0, iDist );
693
0
        xTZSearchHelp( rcStruct, iLeft,   iStartY, 0, iDist );
694
0
        xTZSearchHelp( rcStruct, iRight,  iStartY, 0, iDist );
695
0
        xTZSearchHelp( rcStruct, iStartX, iBottom, 0, iDist );
696
0
        for ( int index = 1; index < 4; index++ )
697
0
        {
698
0
          const int iPosYT = iTop    + ((iDist>>2) * index);
699
0
          const int iPosYB = iBottom - ((iDist>>2) * index);
700
0
          const int iPosXL = iStartX - ((iDist>>2) * index);
701
0
          const int iPosXR = iStartX + ((iDist>>2) * index);
702
0
          xTZSearchHelp( rcStruct, iPosXL, iPosYT, 0, iDist );
703
0
          xTZSearchHelp( rcStruct, iPosXR, iPosYT, 0, iDist );
704
0
          xTZSearchHelp( rcStruct, iPosXL, iPosYB, 0, iDist );
705
0
          xTZSearchHelp( rcStruct, iPosXR, iPosYB, 0, iDist );
706
0
        }
707
0
      }
708
0
      else // check border
709
0
      {
710
0
        if ( iTop >= sr.top ) // check top
711
0
        {
712
0
          xTZSearchHelp( rcStruct, iStartX, iTop, 0, iDist );
713
0
        }
714
0
        if ( iLeft >= sr.left ) // check left
715
0
        {
716
0
          xTZSearchHelp( rcStruct, iLeft, iStartY, 0, iDist );
717
0
        }
718
0
        if ( iRight <= sr.right ) // check right
719
0
        {
720
0
          xTZSearchHelp( rcStruct, iRight, iStartY, 0, iDist );
721
0
        }
722
0
        if ( iBottom <= sr.bottom ) // check bottom
723
0
        {
724
0
          xTZSearchHelp( rcStruct, iStartX, iBottom, 0, iDist );
725
0
        }
726
0
        for ( int index = 1; index < 4; index++ )
727
0
        {
728
0
          const int iPosYT = iTop    + ((iDist>>2) * index);
729
0
          const int iPosYB = iBottom - ((iDist>>2) * index);
730
0
          const int iPosXL = iStartX - ((iDist>>2) * index);
731
0
          const int iPosXR = iStartX + ((iDist>>2) * index);
732
733
0
          if ( iPosYT >= sr.top ) // check top
734
0
          {
735
0
            if ( iPosXL >= sr.left ) // check left
736
0
            {
737
0
              xTZSearchHelp( rcStruct, iPosXL, iPosYT, 0, iDist );
738
0
            }
739
0
            if ( iPosXR <= sr.right ) // check right
740
0
            {
741
0
              xTZSearchHelp( rcStruct, iPosXR, iPosYT, 0, iDist );
742
0
            }
743
0
          } // check top
744
0
          if ( iPosYB <= sr.bottom ) // check bottom
745
0
          {
746
0
            if ( iPosXL >= sr.left ) // check left
747
0
            {
748
0
              xTZSearchHelp( rcStruct, iPosXL, iPosYB, 0, iDist );
749
0
            }
750
0
            if ( iPosXR <= sr.right ) // check right
751
0
            {
752
0
              xTZSearchHelp( rcStruct, iPosXR, iPosYB, 0, iDist );
753
0
            }
754
0
          } // check bottom
755
0
        } // for ...
756
0
      } // check border
757
0
    } // iDist <= 8
758
0
  } // iDist == 1
759
0
}
760
761
Distortion InterSearch::xPatternRefinement( const CPelBuf* pcPatternKey,
762
                                            Mv baseRefMv,
763
                                            int iFrac, Mv& rcMvFrac,
764
                                            Distortion& uiDistBest,
765
                                            int& patternId,
766
                                            CPelBuf* pattern,
767
                                            bool useAltHpelIf )
768
0
{
769
0
  Distortion  uiDist;
770
0
  uiDistBest = m_pcEncCfg->m_fastSubPel == 1 ? uiDistBest : MAX_DISTORTION;
771
0
  uint32_t        uiDirecBest = 0;
772
0
  const int reduceTap = m_pcEncCfg->m_meReduceTap;
773
774
0
  Pel*  piRefPos;
775
0
  int iRefStride = pcPatternKey->width + 1;
776
0
  m_pcRdCost->setDistParam( m_cDistParam, *pcPatternKey, m_filteredBlock[0][0][0], iRefStride, m_lumaClpRng.bd, COMP_Y, 0, m_pcEncCfg->m_bUseHADME ? ( m_pcEncCfg->m_fastHad ? 2 : 1 ) : 0 );
777
778
0
  const ClpRng& clpRng = m_lumaClpRng;
779
0
  int width = pattern->width;
780
0
  int height = pattern->height;
781
0
  int srcStride = pattern->stride;
782
783
0
  int intStride = width + 1;
784
0
  int dstStride = width + 1;
785
0
  Pel* intPtr;
786
0
  Pel* dstPtr;
787
0
  int filterSize     = useAltHpelIf ? ( reduceTap >= 1 ? NTAPS_AFFINE : NTAPS_LUMA )
788
0
                                    : ( reduceTap == 1 ? NTAPS_AFFINE
789
0
                                                       : ( reduceTap == 0 ? NTAPS_LUMA : NTAPS_CHROMA ) );
790
0
  int halfFilterSize = ( filterSize >> 1 );
791
0
  const Pel* srcPtr  = pattern->buf - halfFilterSize*srcStride - 1;
792
793
0
  const ChromaFormat chFmt = m_currChromaFormat;
794
795
0
  Distortion distH[ 9 ] = { uiDistBest, uiDistBest, uiDistBest, uiDistBest, uiDistBest, uiDistBest, uiDistBest, uiDistBest, uiDistBest };
796
0
  const int TH = 17, TL = 15, shift = 4;
797
798
0
  const Mv* pcMvRefine = (iFrac == 2 ? s_acMvRefineH : s_acMvRefineQ);
799
0
  for (uint32_t i = 0; i < 9; i++)
800
0
  {
801
0
    if( m_pcEncCfg->m_fastSubPel == 1 )
802
0
    {
803
0
      if( s_skipQpelPosition[ patternId ][ i ] )
804
0
      {
805
0
        continue;
806
0
      }
807
808
0
      if( 2 == iFrac )
809
0
      {
810
0
        if ( ( 5 == i && 0 == uiDirecBest ) || ( 7 == i && 1 == uiDirecBest ) || ( 8 == i && ( 1 == uiDirecBest || 3 == uiDirecBest || 5 == uiDirecBest ) ) )
811
0
        {
812
0
          break;
813
0
        }
814
815
0
        if( 0 == i )
816
0
        {
817
          // split the prediction with funny widths into power-of-2 and +1 parts for the sake of SIMD speed-up
818
0
          m_if.filterHor( COMP_Y, srcPtr, srcStride, m_filteredBlockTmp[ 0 ][ 0 ], intStride, width, height + filterSize, 0 << MV_FRACTIONAL_BITS_DIFF, false, chFmt, clpRng, useAltHpelIf, 0, reduceTap );
819
0
          m_if.filterHor( COMP_Y, srcPtr + width, srcStride, m_filteredBlockTmp[ 0 ][ 0 ] + width, intStride, 1, height + filterSize, 0 << MV_FRACTIONAL_BITS_DIFF, false, chFmt, clpRng, useAltHpelIf, 0, reduceTap );
820
821
          // split the prediction with funny widths into power-of-2 and +1 parts for the sake of SIMD speed-up
822
0
          m_if.filterHor( COMP_Y, srcPtr, srcStride, m_filteredBlockTmp[ 2 ][ 0 ], intStride, width, height + filterSize, 2 << MV_FRACTIONAL_BITS_DIFF, false, chFmt, clpRng, useAltHpelIf, 0, reduceTap );
823
0
          m_if.filterHor( COMP_Y, srcPtr + width, srcStride, m_filteredBlockTmp[ 2 ][ 0 ] + width, intStride, 1, height + filterSize, 2 << MV_FRACTIONAL_BITS_DIFF, false, chFmt, clpRng, useAltHpelIf, 0, reduceTap );
824
825
0
          intPtr = m_filteredBlockTmp[ 0 ][ 0 ] + halfFilterSize * intStride + 1;
826
0
          dstPtr = m_filteredBlock[ 0 ][ 0 ][ 0 ];
827
0
          m_if.filterVer( COMP_Y, intPtr, intStride, dstPtr, dstStride, width + 0, height + 0, 0 << MV_FRACTIONAL_BITS_DIFF, false, true, chFmt, clpRng, useAltHpelIf, 0, reduceTap );
828
0
        }
829
0
        else if( 1 == i )
830
0
        {
831
0
          intPtr = m_filteredBlockTmp[ 0 ][ 0 ] + ( halfFilterSize - 1 ) * intStride + 1;
832
0
          dstPtr = m_filteredBlock[ 2 ][ 0 ][ 0 ];
833
0
          m_if.filterVer( COMP_Y, intPtr, intStride, dstPtr, dstStride, width + 0, height + 1, 2 << MV_FRACTIONAL_BITS_DIFF, false, true, chFmt, clpRng, useAltHpelIf, 0, reduceTap );
834
0
        }
835
0
        else if( 3 == i )
836
0
        {
837
0
          intPtr = m_filteredBlockTmp[ 2 ][ 0 ] + halfFilterSize * intStride;
838
0
          dstPtr = m_filteredBlock[ 0 ][ 2 ][ 0 ];
839
          // split the prediction with funny widths into power-of-2 and +1 parts for the sake of SIMD speed-up
840
0
          m_if.filterVer( COMP_Y, intPtr, intStride, dstPtr, dstStride, width, height + 0, 0 << MV_FRACTIONAL_BITS_DIFF, false, true, chFmt, clpRng, useAltHpelIf, 0, reduceTap );
841
0
          m_if.filterVer( COMP_Y, intPtr + width, intStride, dstPtr + width, dstStride, 1, height + 0, 0 << MV_FRACTIONAL_BITS_DIFF, false, true, chFmt, clpRng, useAltHpelIf, 0, reduceTap );
842
0
        }
843
0
        else if( 5 == i )
844
0
        {
845
0
          intPtr = m_filteredBlockTmp[ 2 ][ 0 ] + ( halfFilterSize - 1 ) * intStride;
846
0
          dstPtr = m_filteredBlock[ 2 ][ 2 ][ 0 ];
847
          // split the prediction with funny widths into power-of-2 and +1 parts for the sake of SIMD speed-up
848
0
          m_if.filterVer( COMP_Y, intPtr, intStride, dstPtr, dstStride, width, height + 1, 2 << MV_FRACTIONAL_BITS_DIFF, false, true, chFmt, clpRng, useAltHpelIf, 0, reduceTap );
849
0
          m_if.filterVer( COMP_Y, intPtr + width, intStride, dstPtr + width, dstStride, 1, height + 1, 2 << MV_FRACTIONAL_BITS_DIFF, false, true, chFmt, clpRng, useAltHpelIf, 0, reduceTap );
850
0
        }
851
0
      }
852
0
    }
853
0
    Mv cMvTest = pcMvRefine[ i ];
854
0
    cMvTest += baseRefMv;
855
856
0
    int horVal = cMvTest.hor * iFrac;
857
0
    int verVal = cMvTest.ver * iFrac;
858
0
    piRefPos = m_filteredBlock[verVal & 3][horVal & 3][0];
859
860
0
    if ( horVal == 2 && ( verVal & 1 ) == 0 )
861
0
    {
862
0
      piRefPos += 1;
863
0
    }
864
0
    if ( ( horVal & 1 ) == 0 && verVal == 2 )
865
0
    {
866
0
      piRefPos += iRefStride;
867
0
    }
868
0
    cMvTest = pcMvRefine[i];
869
0
    cMvTest += rcMvFrac;
870
871
872
0
    m_cDistParam.cur.buf   = piRefPos;
873
0
    uiDist = m_cDistParam.distFunc( m_cDistParam );
874
0
    uiDist += m_pcRdCost->getCostOfVectorWithPredictor( cMvTest.hor, cMvTest.ver, 0 );
875
876
0
    distH[ i ] = uiDist;
877
0
    if ( uiDist < uiDistBest )
878
0
    {
879
0
      uiDistBest  = uiDist;
880
0
      uiDirecBest = i;
881
0
      m_cDistParam.maximumDistortionForEarlyExit = uiDist;
882
0
    }
883
0
  }
884
885
0
  rcMvFrac = pcMvRefine[uiDirecBest];
886
887
0
  if( m_pcEncCfg->m_fastSubPel == 1 && iFrac == 2 )
888
0
  {
889
0
    switch ( uiDirecBest )
890
0
    {
891
0
    case 0:
892
      // hor
893
0
      distH[ 3 ] <<= shift;
894
0
      patternId += ( distH[ 3 ] > TH * distH[ 4 ] ? 2 : ( distH[ 3 ] < TL * distH[ 4 ] ? 1 : 0 ) );
895
      // ver
896
0
      distH[ 1 ] <<= shift;
897
0
      patternId += ( distH[ 1 ] > TH * distH[ 2 ] ? 6 : ( distH[ 1 ] < TL * distH[ 2 ] ? 3 : 0 ) );
898
0
      break;
899
0
    case 1:
900
      // hor
901
0
      distH[ 5 ] <<= shift;
902
0
      patternId += ( distH[ 5 ] > TH * distH[ 6 ] ? 4 : ( distH[ 5 ] < TL * distH[ 6 ] ? 2 : 0 ) );
903
      // ver
904
0
      patternId += ( distH[ 2 ] - distH[ 0 ] > distH[ 0 ] - distH[ 1 ] ? 1 : 0 );
905
906
0
      patternId += ( 41 == patternId ? 0 : 8 );
907
0
      break;
908
0
    case 2:
909
      // hor
910
0
      distH[ 7 ] <<= shift;
911
0
      patternId += ( distH[ 7 ] > TH * distH[ 8 ] ? 4 : ( distH[ 7 ] < TL * distH[ 8 ] ? 2 : 0 ) );
912
      // ver
913
0
      patternId += ( distH[ 1 ] - distH[ 0 ] > distH[ 0 ] - distH[ 2 ] ? 1 : 0 );
914
915
0
      patternId += ( 41 == patternId ? 0 : 13 );
916
0
      break;
917
0
    case 3:
918
      // hor
919
0
      patternId += ( distH[ 4 ] - distH[ 0 ] > distH[ 0 ] - distH[ 3 ] ? 1 : 0 );
920
      // ver
921
0
      distH[ 5 ] <<= shift;
922
0
      patternId += ( distH[ 5 ] > TH * distH[ 7 ] ? 4 : ( distH[ 5 ] < TL * distH[ 7 ] ? 2 : 0 ) );
923
924
0
      patternId += ( 41 == patternId ? 0 : 18 );
925
0
      break;
926
0
    case 4:
927
      // hor
928
0
      patternId += ( distH[ 3 ] - distH[ 0 ] > distH[ 0 ] - distH[ 4 ] ? 1 : 0 );
929
      // ver
930
0
      distH[ 6 ] <<= shift;
931
0
      patternId += ( distH[ 6 ] > TH * distH[ 8 ] ? 4 : ( distH[ 6 ] < TL * distH[ 8 ] ? 2 : 0 ) );
932
933
0
      patternId += ( 41 == patternId ? 0 : 23 );
934
0
      break;
935
0
    case 5:
936
      // hor
937
0
      patternId += ( distH[ 6 ] - distH[ 1 ] > distH[ 1 ] - distH[ 5 ] ? 1 : 0 );
938
      // ver
939
0
      patternId += ( distH[ 7 ] - distH[ 3 ] > distH[ 3 ] - distH[ 5 ] ? 2 : 0 );
940
941
0
      patternId += ( 41 == patternId ? 0 : 28 );
942
0
      break;
943
0
    case 6:
944
      // hor
945
0
      patternId += ( distH[ 5 ] - distH[ 1 ] > distH[ 1 ] - distH[ 6 ] ? 1 : 0 );
946
      // ver
947
0
      patternId += ( distH[ 8 ] - distH[ 4 ] > distH[ 4 ] - distH[ 6 ] ? 2 : 0 );
948
949
0
      patternId += ( 41 == patternId ? 0 : 31 );
950
0
      break;
951
0
    case 7:
952
      // hor
953
0
      patternId += ( distH[ 8 ] - distH[ 2 ] > distH[ 2 ] - distH[ 7 ] ? 1 : 0 );
954
      // ver
955
0
      patternId += ( distH[ 5 ] - distH[ 3 ] > distH[ 3 ] - distH[ 7 ] ? 2 : 0 );
956
957
0
      patternId += ( 41 == patternId ? 0 : 34 );
958
0
      break;
959
0
    case 8:
960
      // hor
961
0
      patternId += ( distH[ 7 ] - distH[ 2 ] > distH[ 2 ] - distH[ 8 ] ? 1 : 0 );
962
      // ver
963
0
      patternId += ( distH[ 6 ] - distH[ 4 ] > distH[ 4 ] - distH[ 8 ] ? 2 : 0 );
964
965
0
      patternId += ( 41 == patternId ? 0 : 37 );
966
0
      break;
967
0
    default:
968
0
      break;
969
0
    }
970
0
  }
971
972
0
  return uiDistBest;
973
0
}
974
975
//! search of the best candidate for inter prediction
976
bool InterSearch::predInterSearch(CodingUnit& cu, Partitioner& partitioner, double& bestCostInter)
977
0
{
978
0
  PROFILER_SCOPE_AND_STAGE_EXT( 1, _TPROF, P_INTER_MVD_SEARCH, cu.cs, partitioner.chType );
979
0
  CodingStructure& cs = *cu.cs;
980
981
0
  AMVPInfo     amvp[2];
982
0
  Mv           cMvSrchRngLT;
983
0
  Mv           cMvSrchRngRB;
984
0
  Mv           cMvZero;
985
0
  Mv           cMv[2];
986
0
  Mv           cMvBi[2];
987
0
  Mv           cMvTemp[2][MAX_REF_PICS];
988
0
  Mv           cMvHevcTemp[2][MAX_REF_PICS];
989
0
  int          iNumPredDir = cs.slice->isInterP() ? 1 : 2;
990
991
0
  Mv           cMvPred[2][MAX_REF_PICS];
992
993
0
  Mv           cMvPredBi[2][MAX_REF_PICS];
994
0
  int          aaiMvpIdxBi[2][MAX_REF_PICS];
995
996
0
  int          aaiMvpIdx[2][MAX_REF_PICS];
997
0
  int          aaiMvpNum[2][MAX_REF_PICS];
998
999
0
  AMVPInfo     aacAMVPInfo[2][MAX_REF_PICS];
1000
1001
0
  int          iRefIdx[2]={0,0}; //If un-initialized, may cause SEGV in bi-directional prediction iterative stage.
1002
0
  int          iRefIdxBi[2] = { -1, -1 };
1003
1004
0
  uint32_t     uiMbBits[3] = {1, 1, 0};
1005
1006
0
  uint32_t     uiLastMode = 0;
1007
0
  int          iRefStart, iRefEnd;
1008
1009
0
  int          symMode = 0;
1010
1011
0
  int          bestBiPRefIdxL1 = 0;
1012
0
  int          bestBiPMvpL1    = 0;
1013
0
  Distortion   biPDistTemp     = MAX_DISTORTION;
1014
1015
0
  uint8_t      BcwIdx          = (cu.cs->slice->isInterB() ? cu.BcwIdx : BCW_DEFAULT);
1016
0
  bool         enforceBcwPred = false;
1017
1018
  // Loop over Prediction Units
1019
0
  uint32_t     puIdx = 0;
1020
0
  uint32_t     uiLastModeTemp = 0;
1021
0
  Distortion   uiAffineCost = MAX_DISTORTION;
1022
0
  Distortion   uiHevcCost = MAX_DISTORTION;
1023
0
  bool checkAffine = (cu.imv == IMV_OFF);
1024
0
  if (cu.cs->bestParent != nullptr && cu.cs->bestParent->getCU(CH_L,TREE_D) != nullptr && cu.cs->bestParent->getCU(CH_L,TREE_D)->affine == false)
1025
0
  {
1026
0
    m_skipPROF = true;
1027
0
  }
1028
1029
0
  m_encOnly = true;
1030
0
  {
1031
0
    CU::spanMotionInfo( cu );
1032
0
    Distortion   uiCost[2] = { MAX_DISTORTION, MAX_DISTORTION };
1033
0
    Distortion   uiCostBi  =   MAX_DISTORTION;
1034
0
    Distortion   uiCostTemp;
1035
1036
0
    uint32_t         uiBits[3];
1037
0
    uint32_t         uiBitsTemp;
1038
0
    Distortion   bestBiPDist = MAX_DISTORTION;
1039
1040
0
    Distortion   uiCostTempL0[MAX_NUM_REF];
1041
0
    for (int iNumRef=0; iNumRef < MAX_NUM_REF; iNumRef++)
1042
0
    {
1043
0
      uiCostTempL0[iNumRef] = MAX_DISTORTION;
1044
0
    }
1045
0
    uint32_t         uiBitsTempL0[MAX_NUM_REF];
1046
1047
0
    Mv           mvValidList1;
1048
0
    int          refIdxValidList1 = 0;
1049
0
    uint32_t         bitsValidList1   = MAX_UINT;
1050
0
    Distortion   costValidList1   = MAX_DISTORTION;
1051
1052
0
    CPelUnitBuf origBuf = cu.cs->getOrgBuf( cu );
1053
1054
0
    xGetBlkBits( cs.slice->isInterP(), puIdx, uiLastMode, uiMbBits );
1055
1056
0
    m_pcRdCost->selectMotionLambda();
1057
1058
0
    unsigned imvShift = cu.imv == IMV_HPEL ? 1 : (cu.imv << 1);
1059
1060
    //  Uni-directional prediction
1061
0
    for ( int iRefList = 0; iRefList < iNumPredDir; iRefList++ )
1062
0
    {
1063
0
      RefPicList  refPicList = ( iRefList ? REF_PIC_LIST_1 : REF_PIC_LIST_0 );
1064
0
      for (int iRefIdxTemp = 0; iRefIdxTemp < cs.slice->numRefIdx[ refPicList ]; iRefIdxTemp++)
1065
0
      {
1066
0
        uiBitsTemp = uiMbBits[iRefList];
1067
0
        if ( cs.slice->numRefIdx[ refPicList ] > 1 )
1068
0
        {
1069
0
          uiBitsTemp += iRefIdxTemp+1;
1070
0
          if ( iRefIdxTemp == cs.slice->numRefIdx[ refPicList ]-1 )
1071
0
          {
1072
0
            uiBitsTemp--;
1073
0
          }
1074
0
        }
1075
0
        xEstimateMvPredAMVP( cu, origBuf, refPicList, iRefIdxTemp, cMvPred[iRefList][iRefIdxTemp], amvp[refPicList], biPDistTemp);
1076
1077
0
        aaiMvpIdx[iRefList][iRefIdxTemp] = cu.mvpIdx[refPicList];
1078
0
        aaiMvpNum[iRefList][iRefIdxTemp] = cu.mvpNum[refPicList];
1079
1080
0
        if(cs.picHeader->mvdL1Zero && iRefList==1 && biPDistTemp < bestBiPDist)
1081
0
        {
1082
0
          bestBiPDist = biPDistTemp;
1083
0
          bestBiPMvpL1 = aaiMvpIdx[iRefList][iRefIdxTemp];
1084
0
          bestBiPRefIdxL1 = iRefIdxTemp;
1085
0
        }
1086
1087
0
        uiBitsTemp += m_auiMVPIdxCost[aaiMvpIdx[iRefList][iRefIdxTemp]][AMVP_MAX_NUM_CANDS];
1088
1089
0
        if ( m_pcEncCfg->m_bFastMEForGenBLowDelayEnabled && iRefList == 1 )    // list 1
1090
0
        {
1091
0
          if ( cs.slice->list1IdxToList0Idx[ iRefIdxTemp ] >= 0 )
1092
0
          {
1093
0
            cMvTemp[1][iRefIdxTemp] = cMvTemp[0][cs.slice->list1IdxToList0Idx[iRefIdxTemp ]];
1094
0
            uiCostTemp = uiCostTempL0[cs.slice->list1IdxToList0Idx[ iRefIdxTemp ]];
1095
            /*first subtract the bit-rate part of the cost of the other list*/
1096
0
            uiCostTemp -= m_pcRdCost->getCost( uiBitsTempL0[cs.slice->list1IdxToList0Idx[ iRefIdxTemp ]] );
1097
            /*correct the bit-rate part of the current ref*/
1098
0
            m_pcRdCost->setPredictor  ( cMvPred[iRefList][iRefIdxTemp] );
1099
0
            uiBitsTemp += m_pcRdCost->getBitsOfVectorWithPredictor( cMvTemp[1][iRefIdxTemp].hor, cMvTemp[1][iRefIdxTemp].ver, imvShift + MV_FRACTIONAL_BITS_DIFF );
1100
            /*calculate the correct cost*/
1101
0
            uiCostTemp += m_pcRdCost->getCost( uiBitsTemp );
1102
0
          }
1103
0
          else
1104
0
          {
1105
0
            xMotionEstimation( cu, origBuf, refPicList, cMvPred[iRefList][iRefIdxTemp], iRefIdxTemp, cMvTemp[iRefList][iRefIdxTemp], aaiMvpIdx[iRefList][iRefIdxTemp], uiBitsTemp, uiCostTemp, amvp[refPicList] );
1106
0
          }
1107
0
        }
1108
0
        else
1109
0
        {
1110
0
          xMotionEstimation( cu, origBuf, refPicList, cMvPred[iRefList][iRefIdxTemp], iRefIdxTemp, cMvTemp[iRefList][iRefIdxTemp], aaiMvpIdx[iRefList][iRefIdxTemp], uiBitsTemp, uiCostTemp, amvp[refPicList] );
1111
0
        }
1112
          
1113
0
        if( cs.slice->sps->BCW && cu.BcwIdx == BCW_DEFAULT && cs.slice->isInterB() )
1114
0
        {
1115
0
          m_uniMotions.setReadMode( true, (uint32_t)iRefList, (uint32_t)iRefIdxTemp) ;
1116
0
          m_uniMotions.copyFrom( cMvTemp[iRefList][iRefIdxTemp], uiCostTemp - m_pcRdCost->getCost(uiBitsTemp), (uint32_t)iRefList, (uint32_t)iRefIdxTemp );
1117
0
        }
1118
1119
0
        xCopyAMVPInfo( &amvp[refPicList], &aacAMVPInfo[iRefList][iRefIdxTemp]); // must always be done ( also when AMVP_MODE = AM_NONE )
1120
0
        xCheckBestMVP( refPicList, cMvTemp[iRefList][iRefIdxTemp], cMvPred[iRefList][iRefIdxTemp], aaiMvpIdx[iRefList][iRefIdxTemp], amvp[refPicList], uiBitsTemp, uiCostTemp, cu.imv );
1121
1122
0
        if ( iRefList == 0 )
1123
0
        {
1124
0
          uiCostTempL0[iRefIdxTemp] = uiCostTemp;
1125
0
          uiBitsTempL0[iRefIdxTemp] = uiBitsTemp;
1126
0
        }
1127
0
        if ( uiCostTemp < uiCost[iRefList] )
1128
0
        {
1129
0
          uiCost[iRefList] = uiCostTemp;
1130
0
          uiBits[iRefList] = uiBitsTemp; // storing for bi-prediction
1131
1132
          // set motion
1133
0
          cMv    [iRefList] = cMvTemp[iRefList][iRefIdxTemp];
1134
0
          iRefIdx[iRefList] = iRefIdxTemp;
1135
0
        }
1136
1137
0
        if ( iRefList == 1 && uiCostTemp < costValidList1 && cs.slice->list1IdxToList0Idx[ iRefIdxTemp ] < 0 )
1138
0
        {
1139
0
          costValidList1 = uiCostTemp;
1140
0
          bitsValidList1 = uiBitsTemp;
1141
1142
          // set motion
1143
0
          mvValidList1     = cMvTemp[iRefList][iRefIdxTemp];
1144
0
          refIdxValidList1 = iRefIdxTemp;
1145
0
        }
1146
0
      }
1147
0
    }
1148
1149
0
    ::memcpy(cMvHevcTemp, cMvTemp, sizeof(cMvTemp));
1150
0
    if (cu.imv == IMV_OFF && (!cu.slice->sps->BCW || BcwIdx == BCW_DEFAULT))
1151
0
    {
1152
0
      m_BlkUniMvInfoBuffer->insertUniMvCands(cu.Y(), &cMvTemp[0][0]);
1153
1154
0
      unsigned idx1, idx2, idx3, idx4;
1155
0
      getAreaIdxNew(cu.Y(), *cs.pcv, idx1, idx2, idx3, idx4);
1156
0
      if( ! m_ReuseUniMv->m_reusedUniMVs[idx1][idx2][idx3][idx4] )
1157
0
      {
1158
0
        m_ReuseUniMv->m_reusedUniMVs[idx1][idx2][idx3][idx4] = new Mv[ 2 * MAX_REF_PICS ];
1159
//          DTRACE( g_trace_ctx, D_TMP, "%d unimv first reuse %d %d %d %d \n", g_trace_ctx->getChannelCounter(D_TMP), idx3,idx4,idx1,idx2 );
1160
0
      }
1161
0
      ::memcpy(m_ReuseUniMv->m_reusedUniMVs[idx1][idx2][idx3][idx4], cMvTemp, 2 * MAX_REF_PICS * sizeof(Mv));
1162
0
    }
1163
0
    if (bestCostInter != MAX_DOUBLE)
1164
0
    {
1165
0
      int L = (cu.slice->TLayer <= 2) ? 0 : (cu.slice->TLayer - 2);
1166
0
      double besCostMerge = bestCostInter;
1167
0
      bestCostInter = (uiCost[0] < uiCost[1]) ? uiCost[0] : uiCost[1];
1168
0
      if ((cu.slice->TLayer > (m_pcEncCfg->m_maxTLayer - (m_pcEncCfg->m_FastInferMerge & 7))) && bestCostInter > MRG_FAST_RATIOMYV[L] * besCostMerge)
1169
0
      {
1170
0
        m_skipPROF = false;
1171
0
        m_encOnly = false;
1172
0
        return true;
1173
0
      }
1174
0
    }
1175
    //  Bi-predictive Motion estimation
1176
0
    if( cs.slice->isInterB() && !CU::isBipredRestriction( cu ) && (cu.slice->checkLDC || BcwIdx == BCW_DEFAULT  || !m_affineModeSelected || m_pcEncCfg->m_BCW != 2 ) )
1177
0
    {
1178
0
      PROFILER_SCOPE_AND_STAGE_EXT( 1, _TPROF, P_INTER_MVD_SEARCH_B, &cs, partitioner.chType );
1179
0
      bool doBiPred = true;
1180
0
      cMvBi[0] = cMv[0];
1181
0
      cMvBi[1] = cMv[1];
1182
0
      iRefIdxBi[0] = iRefIdx[0];
1183
0
      iRefIdxBi[1] = iRefIdx[1];
1184
1185
0
      ::memcpy( cMvPredBi,   cMvPred,   sizeof( cMvPred   ) );
1186
0
      ::memcpy( aaiMvpIdxBi, aaiMvpIdx, sizeof( aaiMvpIdx ) );
1187
1188
0
      uint32_t uiMotBits[2];
1189
1190
0
      if(cs.picHeader->mvdL1Zero)
1191
0
      {
1192
        // case: no mvd for L1
1193
        // note: mv = mvp + mvd
1194
        // mv for L1 is equal to mvp(L1) and the mvd search is only performed for L0
1195
0
        xCopyAMVPInfo(&aacAMVPInfo[1][bestBiPRefIdxL1], &amvp[REF_PIC_LIST_1]);
1196
0
        aaiMvpIdxBi[1][bestBiPRefIdxL1] = bestBiPMvpL1;
1197
0
        cMvPredBi  [1][bestBiPRefIdxL1] = amvp[REF_PIC_LIST_1].mvCand[bestBiPMvpL1];
1198
0
        if( m_pcEncCfg->m_ifpLines && !CU::isMvInRangeFPP( cu.ly(), cu.lheight(), cMvPredBi[1][bestBiPRefIdxL1].ver, m_pcEncCfg->m_ifpLines, *cu.cs->pcv ) )
1199
0
        {
1200
          // this mvp cannot be used for mv, skip Bi-pred
1201
0
          uiCostBi = std::numeric_limits<Distortion>::max();
1202
0
          doBiPred = false;
1203
0
        }
1204
1205
0
        if( doBiPred )
1206
0
        {
1207
0
          cMvBi[1] = cMvPredBi[1][bestBiPRefIdxL1];
1208
0
          iRefIdxBi[1] = bestBiPRefIdxL1;
1209
0
          cu.mv[REF_PIC_LIST_1][0] = cMvBi[1];
1210
0
          cu.refIdx[REF_PIC_LIST_1] = iRefIdxBi[1];
1211
0
          cu.mvpIdx[REF_PIC_LIST_1] = bestBiPMvpL1;
1212
0
          PelUnitBuf predBufTmp = m_tmpPredStorage[REF_PIC_LIST_1].getCompactBuf( cu );
1213
0
          motionCompensation( cu, predBufTmp, REF_PIC_LIST_1 );
1214
1215
0
          uiMotBits[0] = uiBits[0] - uiMbBits[0];
1216
0
          uiMotBits[1] = uiMbBits[1];
1217
1218
0
          if(cs.slice->numRefIdx[REF_PIC_LIST_1] > 1)
1219
0
          {
1220
0
            uiMotBits[1] += bestBiPRefIdxL1 + 1;
1221
0
            if(bestBiPRefIdxL1 == cs.slice->numRefIdx[REF_PIC_LIST_1] - 1)
1222
0
            {
1223
0
              uiMotBits[1]--;
1224
0
            }
1225
0
          }
1226
1227
0
          uiMotBits[1] += m_auiMVPIdxCost[aaiMvpIdxBi[1][bestBiPRefIdxL1]][AMVP_MAX_NUM_CANDS];
1228
1229
0
          uiBits[2] = uiMbBits[2] + uiMotBits[0] + uiMotBits[1];
1230
1231
0
          cMvTemp[1][bestBiPRefIdxL1] = cMvBi[1];
1232
0
        }
1233
0
      }
1234
0
      else
1235
0
      {
1236
0
        uiMotBits[0] = uiBits[0] - uiMbBits[0];
1237
0
        uiMotBits[1] = uiBits[1] - uiMbBits[1];
1238
0
        uiBits[2] = uiMbBits[2] + uiMotBits[0] + uiMotBits[1];
1239
0
      }
1240
1241
0
      if( doBiPred )
1242
0
      {
1243
        // 4-times iteration (default)
1244
0
        int iNumIter = 4;
1245
1246
        // fast encoder setting: only one iteration
1247
0
        if ( m_pcEncCfg->m_fastInterSearchMode==VVENC_FASTINTERSEARCH_MODE3 || m_pcEncCfg->m_fastInterSearchMode==VVENC_FASTINTERSEARCH_MODE2 || cs.picHeader->mvdL1Zero )
1248
0
        {
1249
0
          iNumIter = 1;
1250
0
        }
1251
1252
0
        enforceBcwPred = (BcwIdx != BCW_DEFAULT);
1253
1254
0
        for ( int iIter = 0; iIter < iNumIter; iIter++ )
1255
0
        {
1256
0
          int         iRefList    = iIter % 2;
1257
1258
0
          if ( m_pcEncCfg->m_fastInterSearchMode==VVENC_FASTINTERSEARCH_MODE3 || m_pcEncCfg->m_fastInterSearchMode==VVENC_FASTINTERSEARCH_MODE2 )
1259
0
          {
1260
0
            if( uiCost[0] <= uiCost[1] )
1261
0
            {
1262
0
              iRefList = 1;
1263
0
            }
1264
0
            else
1265
0
            {
1266
0
              iRefList = 0;
1267
0
            }
1268
0
          }
1269
0
          else if ( iIter == 0 )
1270
0
          {
1271
0
            iRefList = 0;
1272
0
          }
1273
0
          if ( iIter == 0 && !cs.picHeader->mvdL1Zero)
1274
0
          {
1275
0
            cu.mv    [1 - iRefList][0] = cMv    [1 - iRefList];
1276
0
            cu.refIdx[1 - iRefList]    = iRefIdx[1 - iRefList];
1277
1278
0
            PelUnitBuf predBufTmp = m_tmpPredStorage[1 - iRefList].getCompactBuf( cu );
1279
0
            motionCompensation( cu, predBufTmp, RefPicList(1 - iRefList) );
1280
0
          }
1281
1282
0
          RefPicList  refPicList = ( iRefList ? REF_PIC_LIST_1 : REF_PIC_LIST_0 );
1283
1284
0
          if(cs.picHeader->mvdL1Zero)
1285
0
          {
1286
0
            iRefList = 0;
1287
0
            refPicList = REF_PIC_LIST_0;
1288
0
          }
1289
1290
0
          bool bChanged = false;
1291
1292
0
          iRefStart = 0;
1293
0
          iRefEnd   = cs.slice->numRefIdx[ refPicList ]-1;
1294
0
          for (int iRefIdxTemp = iRefStart; iRefIdxTemp <= iRefEnd; iRefIdxTemp++)
1295
0
          {
1296
0
            uiBitsTemp = uiMbBits[2] + uiMotBits[1-iRefList];
1297
0
            uiBitsTemp += ( (cs.slice->sps->BCW == true) ? getWeightIdxBits(BcwIdx) : 0 );
1298
0
            if ( cs.slice->numRefIdx[ refPicList ] > 1 )
1299
0
            {
1300
0
              uiBitsTemp += iRefIdxTemp+1;
1301
0
              if ( iRefIdxTemp == cs.slice->numRefIdx[ refPicList ]-1 )
1302
0
              {
1303
0
                uiBitsTemp--;
1304
0
              }
1305
0
            }
1306
0
            uiBitsTemp += m_auiMVPIdxCost[aaiMvpIdxBi[iRefList][iRefIdxTemp]][AMVP_MAX_NUM_CANDS];
1307
0
            if ( cs.slice->biDirPred )
1308
0
            {
1309
0
              uiBitsTemp += 1; // add one bit for symmetrical MVD mode
1310
0
            }
1311
            // call ME
1312
0
            xCopyAMVPInfo(&aacAMVPInfo[iRefList][iRefIdxTemp], &amvp[refPicList] );
1313
0
            xMotionEstimation ( cu, origBuf, refPicList, cMvPredBi[iRefList][iRefIdxTemp], iRefIdxTemp, cMvTemp[iRefList][iRefIdxTemp], aaiMvpIdxBi[iRefList][iRefIdxTemp], uiBitsTemp, uiCostTemp, amvp[refPicList], true );
1314
0
            xCheckBestMVP( refPicList, cMvTemp[iRefList][iRefIdxTemp], cMvPredBi[iRefList][iRefIdxTemp], aaiMvpIdxBi[iRefList][iRefIdxTemp], amvp[refPicList], uiBitsTemp, uiCostTemp, cu.imv);
1315
0
            if ( uiCostTemp < uiCostBi )
1316
0
            {
1317
0
              bChanged = true;
1318
1319
0
              cMvBi[iRefList]     = cMvTemp[iRefList][iRefIdxTemp];
1320
0
              iRefIdxBi[iRefList] = iRefIdxTemp;
1321
1322
0
              uiCostBi            = uiCostTemp;
1323
0
              uiMotBits[iRefList] = uiBitsTemp - uiMbBits[2] - uiMotBits[1-iRefList];
1324
0
              uiMotBits[iRefList] -= ( (cs.slice->sps->BCW == true) ? getWeightIdxBits(BcwIdx) : 0 );
1325
0
              uiBits[2]           = uiBitsTemp;
1326
1327
0
              if(iNumIter!=1)
1328
0
              {
1329
                //  Set motion
1330
0
                cu.mv    [refPicList][0] = cMvBi    [iRefList];
1331
0
                cu.refIdx[refPicList]    = iRefIdxBi[iRefList];
1332
1333
0
                PelUnitBuf predBufTmp = m_tmpPredStorage[iRefList].getCompactBuf( cu );
1334
0
                motionCompensation( cu, predBufTmp, refPicList );
1335
0
              }
1336
0
            }
1337
0
          } // for loop-iRefIdxTemp
1338
1339
0
          if( !bChanged )
1340
0
          {
1341
0
            if ((uiCostBi <= uiCost[0] && uiCostBi <= uiCost[1]) || enforceBcwPred)
1342
0
            {
1343
0
              xCopyAMVPInfo(&aacAMVPInfo[0][iRefIdxBi[0]], &amvp[REF_PIC_LIST_0]);
1344
0
              xCheckBestMVP( REF_PIC_LIST_0, cMvBi[0], cMvPredBi[0][iRefIdxBi[0]], aaiMvpIdxBi[0][iRefIdxBi[0]], amvp[REF_PIC_LIST_0], uiBits[2], uiCostBi, cu.imv);
1345
0
              if(!cs.picHeader->mvdL1Zero)
1346
0
              {
1347
0
                xCopyAMVPInfo(&aacAMVPInfo[1][iRefIdxBi[1]], &amvp[REF_PIC_LIST_1]);
1348
0
                xCheckBestMVP( REF_PIC_LIST_1, cMvBi[1], cMvPredBi[1][iRefIdxBi[1]], aaiMvpIdxBi[1][iRefIdxBi[1]], amvp[REF_PIC_LIST_1], uiBits[2], uiCostBi, cu.imv);
1349
0
              }
1350
0
            }
1351
0
            break;
1352
0
          }
1353
0
        } // for loop-iter
1354
0
      }
1355
1356
      // SMVD
1357
0
      if( cs.slice->biDirPred )
1358
0
      {
1359
0
        double th1 = 1.02;
1360
0
        bool testSME = true;
1361
0
        int numStartCand = m_pcEncCfg->m_SMVD > 1 ? 1 : 5;
1362
0
        Distortion symCost;
1363
0
        Mv cMvPredSym[2];
1364
0
        int mvpIdxSym[2];
1365
1366
0
        int curRefList = REF_PIC_LIST_0;
1367
0
        int tarRefList = 1 - curRefList;
1368
0
        RefPicList eCurRefList = (curRefList ? REF_PIC_LIST_1 : REF_PIC_LIST_0);
1369
0
        int refIdxCur = cs.slice->symRefIdx[ curRefList ];
1370
0
        int refIdxTar = cs.slice->symRefIdx[ tarRefList ];
1371
0
        if( aacAMVPInfo[ curRefList ][ refIdxCur ].mvCand[ 0 ] == aacAMVPInfo[ curRefList ][ refIdxCur ].mvCand[ 1 ] )
1372
0
        {
1373
0
          aacAMVPInfo[ curRefList ][ refIdxCur ].numCand = 1;
1374
0
        }
1375
0
        if( aacAMVPInfo[ tarRefList ][ refIdxTar ].mvCand[ 0 ] == aacAMVPInfo[ tarRefList ][ refIdxTar ].mvCand[ 1 ] )
1376
0
        {
1377
0
          aacAMVPInfo[ tarRefList ][ refIdxTar ].numCand = 1;
1378
0
        }
1379
1380
0
        MvField cCurMvField, cTarMvField;
1381
0
        Distortion costStart = MAX_DISTORTION;
1382
0
        for ( int i = 0; i < aacAMVPInfo[curRefList][refIdxCur].numCand; i++ )
1383
0
        {
1384
0
          for ( int j = 0; j < aacAMVPInfo[tarRefList][refIdxTar].numCand; j++ )
1385
0
          {
1386
            GCC_WARNING_DISABLE_array_bounds // probably a bug in gcc-10 static analyzer: It thinks the indices are -1 and therefore triggers -Werror=array-bounds
1387
0
            cCurMvField.setMvField( aacAMVPInfo[curRefList][refIdxCur].mvCand[i], refIdxCur );
1388
0
            cTarMvField.setMvField( aacAMVPInfo[tarRefList][refIdxTar].mvCand[j], refIdxTar );
1389
0
            GCC_WARNING_RESET
1390
0
            if( m_pcEncCfg->m_ifpLines )
1391
0
            {
1392
0
              xCheckAndClipMvToFppLine( cCurMvField.mv, cu.ly(), cu.lheight(), m_pcEncCfg->m_ifpLines, *cu.cs->pcv );
1393
0
              xCheckAndClipMvToFppLine( cTarMvField.mv, cu.ly(), cu.lheight(), m_pcEncCfg->m_ifpLines, *cu.cs->pcv );
1394
0
            }
1395
0
            Distortion cost = xGetSymCost( cu, origBuf, eCurRefList, cCurMvField, cTarMvField, BcwIdx );
1396
0
            if ( cost < costStart )
1397
0
            {
1398
0
              costStart = cost;
1399
0
              cMvPredSym[curRefList] = aacAMVPInfo[curRefList][refIdxCur].mvCand[i];
1400
0
              cMvPredSym[tarRefList] = aacAMVPInfo[tarRefList][refIdxTar].mvCand[j];
1401
0
              mvpIdxSym[curRefList] = i;
1402
0
              mvpIdxSym[tarRefList] = j;
1403
0
            }
1404
0
          }
1405
0
        }
1406
0
        cCurMvField.mv = cMvPredSym[curRefList];
1407
0
        cTarMvField.mv = cMvPredSym[tarRefList];
1408
1409
0
        m_pcRdCost->setCostScale(0);
1410
0
        Mv pred = cMvPredSym[curRefList];
1411
0
        pred.changeTransPrecInternal2Amvr(cu.imv);
1412
0
        m_pcRdCost->setPredictor(pred);
1413
0
        Mv mv = cCurMvField.mv;
1414
0
        mv.changeTransPrecInternal2Amvr(cu.imv);
1415
0
        uint32_t bits = m_pcRdCost->getBitsOfVectorWithPredictor(mv.hor, mv.ver, 0);
1416
0
        bits += m_auiMVPIdxCost[mvpIdxSym[curRefList]][AMVP_MAX_NUM_CANDS];
1417
0
        bits += m_auiMVPIdxCost[mvpIdxSym[tarRefList]][AMVP_MAX_NUM_CANDS];
1418
0
        costStart += m_pcRdCost->getCost(bits);
1419
1420
0
        std::vector<Mv> symmvdCands;
1421
0
        auto smmvdCandsGen = [&](Mv mvCand, bool mvPrecAdj)
1422
0
        {
1423
0
          if (mvPrecAdj && cu.imv)
1424
0
          {
1425
0
            mvCand.roundTransPrecInternal2Amvr(cu.imv);
1426
0
          }
1427
1428
0
          bool toAddMvCand = true;
1429
0
          for (std::vector<Mv>::iterator pos = symmvdCands.begin(); pos != symmvdCands.end(); pos++)
1430
0
          {
1431
0
            if (*pos == mvCand)
1432
0
            {
1433
0
              toAddMvCand = false;
1434
0
              break;
1435
0
            }
1436
0
          }
1437
1438
0
          if (toAddMvCand)
1439
0
          {
1440
0
            symmvdCands.push_back(mvCand);
1441
0
          }
1442
0
        };
1443
1444
0
        smmvdCandsGen(cMvHevcTemp[curRefList][refIdxCur], false);
1445
0
        smmvdCandsGen(cMvTemp[curRefList][refIdxCur], false);
1446
0
        if (iRefIdxBi[curRefList] == refIdxCur)
1447
0
        {
1448
0
          smmvdCandsGen(cMvBi[curRefList], false);
1449
0
        }
1450
0
        for (int i = 0; i < m_BlkUniMvInfoBuffer->m_uniMvListSize; i++)
1451
0
        {
1452
0
          if( symmvdCands.size() >= numStartCand )
1453
0
          {
1454
0
            break;
1455
0
          }
1456
0
          BlkUniMvInfo* curMvInfo = m_BlkUniMvInfoBuffer->getBlkUniMvInfo(i);
1457
0
          smmvdCandsGen(curMvInfo->uniMvs[curRefList][refIdxCur], true);
1458
0
        }
1459
1460
0
        for (auto mvStart : symmvdCands)
1461
0
        {
1462
0
          bool checked = false; //if it has been checkin in the mvPred.
1463
0
          for (int i = 0; i < aacAMVPInfo[curRefList][refIdxCur].numCand && !checked; i++)
1464
0
          {
1465
0
            checked |= (mvStart == aacAMVPInfo[curRefList][refIdxCur].mvCand[i]);
1466
0
          }
1467
0
          if (checked)
1468
0
          {
1469
0
            continue;
1470
0
          }
1471
1472
0
          Distortion bestCost = costStart;
1473
0
          xSymMvdCheckBestMvp(cu, origBuf, mvStart, (RefPicList)curRefList, aacAMVPInfo, BcwIdx, cMvPredSym, mvpIdxSym, costStart, false);
1474
0
          if (costStart < bestCost)
1475
0
          {
1476
0
            cCurMvField.setMvField(mvStart, refIdxCur);
1477
0
            cTarMvField.setMvField(mvStart.getSymmvdMv(cMvPredSym[curRefList], cMvPredSym[tarRefList]), refIdxTar);
1478
0
          }
1479
0
        }
1480
0
        Mv startPtMv = cCurMvField.mv;
1481
1482
0
        Distortion mvpCost = m_pcRdCost->getCost(m_auiMVPIdxCost[mvpIdxSym[curRefList]][AMVP_MAX_NUM_CANDS] + m_auiMVPIdxCost[mvpIdxSym[tarRefList]][AMVP_MAX_NUM_CANDS]);
1483
0
        symCost = costStart - mvpCost;
1484
1485
        // ME
1486
0
        testSME = m_pcEncCfg->m_SMVD <= 2 || ( symCost < uiCostBi * th1 && uiCostBi < uiCost[ 0 ] && uiCostBi < uiCost[ 1 ] );
1487
0
        if( testSME )
1488
0
        {
1489
0
          xSymMotionEstimation( cu, origBuf, cMvPredSym[ curRefList ], cMvPredSym[ tarRefList ], eCurRefList, cCurMvField, cTarMvField, symCost, BcwIdx );
1490
0
        }
1491
1492
0
        symCost += mvpCost;
1493
1494
0
        if (startPtMv != cCurMvField.mv)
1495
0
        { // if ME change MV, run a final check for best MVP.
1496
0
          xSymMvdCheckBestMvp(cu, origBuf, cCurMvField.mv, (RefPicList)curRefList, aacAMVPInfo, BcwIdx, cMvPredSym, mvpIdxSym, symCost, true);
1497
0
        }
1498
1499
0
        bits = uiMbBits[2];
1500
0
        bits += 1; // add one bit for #symmetrical MVD mode
1501
0
        bits += ( (cs.slice->sps->BCW == true) ? getWeightIdxBits(BcwIdx) : 0 );
1502
0
        symCost += m_pcRdCost->getCost(bits);
1503
0
        cTarMvField.setMvField(cCurMvField.mv.getSymmvdMv(cMvPredSym[curRefList], cMvPredSym[tarRefList]), refIdxTar);
1504
1505
        // save results
1506
0
        if ( symCost < uiCostBi  
1507
0
          && ( !m_pcEncCfg->m_ifpLines || 
1508
0
          ( CU::isMvInRangeFPP( cu.ly(), cu.lheight(), cCurMvField.mv.ver, m_pcEncCfg->m_ifpLines, *cu.cs->pcv ) &&
1509
0
            CU::isMvInRangeFPP( cu.ly(), cu.lheight(), cTarMvField.mv.ver, m_pcEncCfg->m_ifpLines, *cu.cs->pcv ) ) )          
1510
0
          )
1511
0
        {
1512
0
          uiCostBi = symCost;
1513
0
          symMode = 1 + curRefList;
1514
1515
0
          cMvBi[curRefList] = cCurMvField.mv;
1516
0
          iRefIdxBi[curRefList] = cCurMvField.refIdx;
1517
0
          aaiMvpIdxBi[curRefList][cCurMvField.refIdx] = mvpIdxSym[curRefList];
1518
0
          cMvPredBi[curRefList][iRefIdxBi[curRefList]] = cMvPredSym[curRefList];
1519
1520
0
          cMvBi[tarRefList] = cTarMvField.mv;
1521
0
          iRefIdxBi[tarRefList] = cTarMvField.refIdx;
1522
0
          aaiMvpIdxBi[tarRefList][cTarMvField.refIdx] = mvpIdxSym[tarRefList];
1523
0
          cMvPredBi[tarRefList][iRefIdxBi[tarRefList]] = cMvPredSym[tarRefList];
1524
0
        }
1525
0
      }
1526
0
    } // if (B_SLICE)
1527
1528
      //  Clear Motion Field
1529
0
    cu.mv [REF_PIC_LIST_0][0] = Mv();
1530
0
    cu.mv [REF_PIC_LIST_1][0] = Mv();
1531
0
    cu.mvd[REF_PIC_LIST_0][0] = cMvZero;
1532
0
    cu.mvd[REF_PIC_LIST_1][0] = cMvZero;
1533
0
    cu.refIdx[REF_PIC_LIST_0] = NOT_VALID;
1534
0
    cu.refIdx[REF_PIC_LIST_1] = NOT_VALID;
1535
0
    cu.mvpIdx[REF_PIC_LIST_0] = NOT_VALID;
1536
0
    cu.mvpIdx[REF_PIC_LIST_1] = NOT_VALID;
1537
0
    cu.mvpNum[REF_PIC_LIST_0] = NOT_VALID;
1538
0
    cu.mvpNum[REF_PIC_LIST_1] = NOT_VALID;
1539
1540
    // Set Motion Field
1541
0
    cMv    [1] = mvValidList1;
1542
0
    iRefIdx[1] = refIdxValidList1;
1543
0
    uiBits [1] = bitsValidList1;
1544
0
    uiCost [1] = costValidList1;
1545
0
    if( enforceBcwPred )
1546
0
    {
1547
0
      uiCost[0] = uiCost[1] = MAX_UINT;
1548
0
    }
1549
1550
0
    uiLastModeTemp = uiLastMode;
1551
0
    if ( uiCostBi <= uiCost[0] && uiCostBi <= uiCost[1])
1552
0
    {
1553
0
      bestCostInter = uiCostBi;
1554
0
      uiLastMode = 2;
1555
0
      cu.mv [REF_PIC_LIST_0][0] = cMvBi[0];
1556
0
      cu.mv [REF_PIC_LIST_1][0] = cMvBi[1];
1557
0
      cu.mvd[REF_PIC_LIST_0][0] = cMvBi[0] - cMvPredBi[0][iRefIdxBi[0]];
1558
0
      cu.mvd[REF_PIC_LIST_1][0] = cMvBi[1] - cMvPredBi[1][iRefIdxBi[1]];
1559
0
      cu.refIdx[REF_PIC_LIST_0] = iRefIdxBi[0];
1560
0
      cu.refIdx[REF_PIC_LIST_1] = iRefIdxBi[1];
1561
0
      cu.mvpIdx[REF_PIC_LIST_0] = aaiMvpIdxBi[0][iRefIdxBi[0]];
1562
0
      cu.mvpIdx[REF_PIC_LIST_1] = aaiMvpIdxBi[1][iRefIdxBi[1]];
1563
0
      cu.mvpNum[REF_PIC_LIST_0] = aaiMvpNum[0][iRefIdxBi[0]];
1564
0
      cu.mvpNum[REF_PIC_LIST_1] = aaiMvpNum[1][iRefIdxBi[1]];
1565
0
      cu.interDir = 3;
1566
1567
0
      cu.smvdMode = symMode;
1568
0
    }
1569
0
    else if ( uiCost[0] <= uiCost[1] )
1570
0
    {
1571
0
      bestCostInter = uiCost[0];
1572
0
      uiLastMode = 0;
1573
0
      cu.mv [REF_PIC_LIST_0][0] = cMv[0];
1574
0
      cu.mvd[REF_PIC_LIST_0][0] = cMv[0] - cMvPred[0][iRefIdx[0]];
1575
0
      cu.refIdx[REF_PIC_LIST_0] = iRefIdx[0];
1576
0
      cu.mvpIdx[REF_PIC_LIST_0] = aaiMvpIdx[0][iRefIdx[0]];
1577
0
      cu.mvpNum[REF_PIC_LIST_0] = aaiMvpNum[0][iRefIdx[0]];
1578
0
      cu.interDir = 1;
1579
0
    }
1580
0
    else
1581
0
    {
1582
0
      bestCostInter = uiCost[1];
1583
0
      uiLastMode = 1;
1584
0
      cu.mv [REF_PIC_LIST_1][0] = cMv[1];
1585
0
      cu.mvd[REF_PIC_LIST_1][0] = cMv[1] - cMvPred[1][iRefIdx[1]];
1586
0
      cu.refIdx[REF_PIC_LIST_1] = iRefIdx[1];
1587
0
      cu.mvpIdx[REF_PIC_LIST_1] = aaiMvpIdx[1][iRefIdx[1]];
1588
0
      cu.mvpNum[REF_PIC_LIST_1] = aaiMvpNum[1][iRefIdx[1]];
1589
0
      cu.interDir = 2;
1590
0
    }
1591
1592
0
    if( BcwIdx != BCW_DEFAULT )
1593
0
    {
1594
0
      cu.BcwIdx = BCW_DEFAULT; // Reset to default for the Non-NormalMC modes.
1595
0
    }
1596
0
    uiHevcCost = (uiCostBi <= uiCost[0] && uiCostBi <= uiCost[1]) ? uiCostBi : ((uiCost[0] <= uiCost[1]) ? uiCost[0] : uiCost[1]);
1597
0
    if (m_pcEncCfg->m_Affine > 2)
1598
0
    {
1599
0
      if (cu.slice->TLayer > 3)
1600
0
      {
1601
0
        checkAffine = false;
1602
0
      }
1603
0
      else
1604
0
      {
1605
0
        if( m_pcEncCfg->m_Affine >= 4 && cu.slice->TLayer >= 2 )
1606
0
        {
1607
0
          checkAffine = m_modeCtrl->comprCUCtx->bestCU ? (checkAffine && m_modeCtrl->comprCUCtx->bestCU->affine) : checkAffine;
1608
0
        }
1609
0
      }
1610
0
    }
1611
0
    if( checkAffine && cu.Y().width > 8 && cu.Y().height > 8 && m_pcEncCfg->m_Affine > 0 )
1612
0
    {
1613
      // Based on:
1614
      // H. Pejman*, S. Coulombe*, C. Vazquez*, M. Jamali° and A. Vakili°
1615
      // *École de technologie supérieure, °Summit Tech Multimedia
1616
      // "An Adjustable Fast Decision Method for Affine Motion Estimation in VVC,"
1617
      // ICIP, Kuala Lumpur, Malaysia, 2023, pp. 2695-2699, doi: 10.1109/ICIP49359.2023.10222750.
1618
      // https://ieeexplore.ieee.org/document/10222750
1619
1620
0
      static const double affine_thr_coffs[3] = { 2.534229853866437, 0.05173246 ,0.87650414 };
1621
0
      static const double affine_thr_param[5] = { 1, 1, 1, 1.3, 2.3 }; // TODO: Adapt if extending m_Affine range!
1622
0
      const int qp         = cu.qp;
1623
0
      const int blk_area   = cu.Y().area();
1624
0
      const double threshold  = affine_thr_param[m_pcEncCfg->m_Affine - 1];
1625
1626
      //Multiple linear regression (MLR):
1627
      //Y = b0 + b1*(QP) + b2*(LOG2(BLK_AREA))
1628
0
      double log_affine_thr =
1629
0
        affine_thr_coffs[0] +
1630
0
        qp * affine_thr_coffs[1] +
1631
0
        log2(blk_area) * affine_thr_coffs[2];
1632
1633
      //log_affine_thr is LOG 2 of estimated thr
1634
0
      double affine_thr = pow(2, log_affine_thr) * threshold;
1635
1636
0
      double scaled_uiHevcCost = (double)uiHevcCost;
1637
1638
      //The trained coefficients are based on the cost of internal 10 BitDepth. So, the cost should be scaled if the internal BitDepth is not 10.
1639
0
      if (m_pcEncCfg->m_internalBitDepth[0] !=10)
1640
0
      {
1641
        //Based on the CTC documnet to convert 8 bit to 10 bit video or vice versa, the VTM only multiply (8 to 10 bits) or divide (10 to 8 bits) pixel values to 4.
1642
        //In this case, the cost values are approximately scaled by 4.
1643
        //The trained data acquired from internal 10 bit data. So, if internal bit depth is 8, the conversion into 10-bit cost can be done as follows:
1644
0
        scaled_uiHevcCost = uiHevcCost * (pow(2.0, 10-m_pcEncCfg->m_internalBitDepth[0]));
1645
0
      }
1646
0
      if( scaled_uiHevcCost < affine_thr )
1647
0
      {
1648
0
        checkAffine = false;
1649
0
      }
1650
0
    }
1651
0
    if (cu.Y().width > 8 && cu.Y().height > 8 && cu.slice->sps->Affine && checkAffine)
1652
0
    {
1653
0
      PROFILER_SCOPE_AND_STAGE_EXT( 1, _TPROF, P_INTER_MVD_SEARCH_AFFINE, &cs, partitioner.chType );
1654
0
      m_hevcCost = uiHevcCost;
1655
      // save normal hevc result
1656
0
      uint32_t uiMRGIndex = cu.mergeIdx;
1657
0
      bool bMergeFlag = cu.mergeFlag;
1658
0
      uint32_t uiInterDir = cu.interDir;
1659
0
      int  iSymMode = cu.smvdMode;
1660
1661
0
      Mv cMvd[2];
1662
0
      uint32_t uiMvpIdx[2], uiMvpNum[2];
1663
0
      uiMvpIdx[0] = cu.mvpIdx[REF_PIC_LIST_0];
1664
0
      uiMvpIdx[1] = cu.mvpIdx[REF_PIC_LIST_1];
1665
0
      uiMvpNum[0] = cu.mvpNum[REF_PIC_LIST_0];
1666
0
      uiMvpNum[1] = cu.mvpNum[REF_PIC_LIST_1];
1667
0
      cMvd[0] = cu.mvd[REF_PIC_LIST_0][0];
1668
0
      cMvd[1] = cu.mvd[REF_PIC_LIST_1][0];
1669
1670
0
      MvField cHevcMvField[2];
1671
0
      cHevcMvField[0].setMvField(cu.mv[REF_PIC_LIST_0][0], cu.refIdx[REF_PIC_LIST_0]);
1672
0
      cHevcMvField[1].setMvField(cu.mv[REF_PIC_LIST_1][0], cu.refIdx[REF_PIC_LIST_1]);
1673
1674
      // do affine ME & Merge
1675
0
      cu.affineType = AFFINEMODEL_4PARAM;
1676
0
      Mv acMvAffine4Para[2][MAX_REF_PICS][3];
1677
0
      int refIdx4Para[2] = { -1, -1 };
1678
1679
0
      xPredAffineInterSearch(cu, origBuf, puIdx, uiLastModeTemp, uiAffineCost, cMvHevcTemp, acMvAffine4Para, refIdx4Para, BcwIdx, enforceBcwPred, (cs.slice->sps->BCW == true) ? getWeightIdxBits(BcwIdx) : 0 );
1680
1681
0
      if (cu.imv == IMV_OFF)
1682
0
      {
1683
0
        storeAffineMotion(cu.mv, cu.refIdx, AFFINEMODEL_4PARAM, BcwIdx);
1684
0
      }
1685
0
      if (cu.slice->sps->AffineType && uiAffineCost != MAX_DISTORTION)
1686
0
      {
1687
0
        if (uiAffineCost < uiHevcCost * 1.05) ///< condition for 6 parameter affine ME
1688
0
        {
1689
          // save 4 parameter results
1690
0
          Mv bestMv[2][3], bestMvd[2][3];
1691
0
          int bestMvpIdx[2], bestMvpNum[2], bestRefIdx[2];
1692
0
          uint8_t bestInterDir;
1693
1694
0
          bestInterDir = cu.interDir;
1695
0
          bestRefIdx[0] = cu.refIdx[0];
1696
0
          bestRefIdx[1] = cu.refIdx[1];
1697
0
          bestMvpIdx[0] = cu.mvpIdx[0];
1698
0
          bestMvpIdx[1] = cu.mvpIdx[1];
1699
0
          bestMvpNum[0] = cu.mvpNum[0];
1700
0
          bestMvpNum[1] = cu.mvpNum[1];
1701
1702
0
          for (int refList = 0; refList < 2; refList++)
1703
0
          {
1704
0
            bestMv[refList][0] = cu.mv[refList][0];
1705
0
            bestMv[refList][1] = cu.mv[refList][1];
1706
0
            bestMv[refList][2] = cu.mv[refList][2];
1707
0
            bestMvd[refList][0] = cu.mvd[refList][0];
1708
0
            bestMvd[refList][1] = cu.mvd[refList][1];
1709
0
            bestMvd[refList][2] = cu.mvd[refList][2];
1710
0
          }
1711
1712
0
          refIdx4Para[0] = bestRefIdx[0];
1713
0
          refIdx4Para[1] = bestRefIdx[1];
1714
1715
0
          Distortion uiAffine6Cost = MAX_DISTORTION;
1716
0
          cu.affineType = AFFINEMODEL_6PARAM;
1717
0
          xPredAffineInterSearch(cu, origBuf, puIdx, uiLastModeTemp, uiAffine6Cost, cMvHevcTemp, acMvAffine4Para, refIdx4Para, BcwIdx, enforceBcwPred, (cs.slice->sps->BCW == true) ? getWeightIdxBits(BcwIdx) : 0 );
1718
1719
0
          if (cu.imv == IMV_OFF)
1720
0
          {
1721
0
            storeAffineMotion(cu.mv, cu.refIdx, AFFINEMODEL_6PARAM, BcwIdx);
1722
0
          }
1723
1724
          // reset to 4 parameter affine inter mode
1725
0
          if (uiAffineCost <= uiAffine6Cost)
1726
0
          {
1727
0
            cu.affineType = AFFINEMODEL_4PARAM;
1728
0
            cu.interDir = bestInterDir;
1729
0
            cu.refIdx[0] = bestRefIdx[0];
1730
0
            cu.refIdx[1] = bestRefIdx[1];
1731
0
            cu.mvpIdx[0] = bestMvpIdx[0];
1732
0
            cu.mvpIdx[1] = bestMvpIdx[1];
1733
0
            cu.mvpNum[0] = bestMvpNum[0];
1734
0
            cu.mvpNum[1] = bestMvpNum[1];
1735
1736
0
            for (int verIdx = 0; verIdx < 3; verIdx++)
1737
0
            {
1738
0
              cu.mvd[REF_PIC_LIST_0][verIdx] = bestMvd[0][verIdx];
1739
0
              cu.mvd[REF_PIC_LIST_1][verIdx] = bestMvd[1][verIdx];
1740
0
            }
1741
1742
0
            CU::setAllAffineMv(cu, bestMv[0][0], bestMv[0][1], bestMv[0][2], REF_PIC_LIST_0);
1743
0
            CU::setAllAffineMv(cu, bestMv[1][0], bestMv[1][1], bestMv[1][2], REF_PIC_LIST_1);
1744
0
          }
1745
0
          else
1746
0
          {
1747
0
            uiAffineCost = uiAffine6Cost;
1748
0
          }
1749
0
        }
1750
1751
0
        uiAffineCost += m_pcRdCost->getCost(1); // add one bit for affine_type
1752
0
      }
1753
1754
0
      if (uiHevcCost <= uiAffineCost)
1755
0
      {
1756
        // set hevc me result
1757
0
        cu.affine = false;
1758
0
        cu.mergeFlag = bMergeFlag;
1759
0
        cu.mergeIdx = uiMRGIndex;
1760
0
        cu.interDir = uiInterDir;
1761
0
        cu.smvdMode = iSymMode;
1762
0
        cu.mv[REF_PIC_LIST_0][0]  = cHevcMvField[0].mv;
1763
0
        cu.refIdx[REF_PIC_LIST_0] = cHevcMvField[0].refIdx;
1764
0
        cu.mv[REF_PIC_LIST_1][0]  = cHevcMvField[1].mv;
1765
0
        cu.refIdx[REF_PIC_LIST_1] = cHevcMvField[1].refIdx;
1766
0
        cu.mvpIdx[REF_PIC_LIST_0] = uiMvpIdx[0];
1767
0
        cu.mvpIdx[REF_PIC_LIST_1] = uiMvpIdx[1];
1768
0
        cu.mvpNum[REF_PIC_LIST_0] = uiMvpNum[0];
1769
0
        cu.mvpNum[REF_PIC_LIST_1] = uiMvpNum[1];
1770
0
        cu.mvd[REF_PIC_LIST_0][0] = cMvd[0];
1771
0
        cu.mvd[REF_PIC_LIST_1][0] = cMvd[1];
1772
0
      }
1773
0
      else
1774
0
      {
1775
0
        cu.smvdMode = 0;
1776
0
        CHECK(!cu.affine, "Wrong.");
1777
0
        uiLastMode = uiLastModeTemp;
1778
0
      }
1779
0
    }
1780
1781
0
    if( cu.interDir == 3 && !cu.mergeFlag )
1782
0
    {
1783
0
      if (BcwIdx != BCW_DEFAULT)
1784
0
      {
1785
0
        cu.BcwIdx = BcwIdx;
1786
0
      }
1787
0
    }
1788
1789
0
    CU::spanMotionInfo( cu );
1790
1791
0
    m_skipPROF = false;
1792
0
    m_encOnly  = false;
1793
    //  MC
1794
0
    PelUnitBuf predBuf = cu.cs->getPredBuf(cu);
1795
0
    motionCompensation( cu, predBuf, REF_PIC_LIST_X );
1796
0
    puIdx++;
1797
0
  }
1798
1799
0
  return false;
1800
0
}
1801
1802
// AMVP
1803
void InterSearch::xEstimateMvPredAMVP( CodingUnit& cu, CPelUnitBuf& origBuf, RefPicList refPicList, int iRefIdx, Mv& rcMvPred, AMVPInfo& rAMVPInfo, Distortion& distBiP )
1804
0
{
1805
0
  Mv         cBestMv;
1806
0
  int        iBestIdx   = 0;
1807
0
  Distortion uiBestCost = MAX_DISTORTION;
1808
0
  int        i;
1809
1810
0
  AMVPInfo*  pcAMVPInfo = &rAMVPInfo;
1811
1812
  // Fill the MV Candidates
1813
0
  CU::fillMvpCand( cu, refPicList, iRefIdx, *pcAMVPInfo );
1814
1815
  // initialize Mvp index & Mvp
1816
0
  iBestIdx = 0;
1817
0
  cBestMv  = pcAMVPInfo->mvCand[0];
1818
1819
0
  PelUnitBuf predBuf = m_tmpStorageLCU.getCompactBuf( cu );
1820
1821
  //-- Check Minimum Cost.
1822
0
  for( i = 0 ; i < pcAMVPInfo->numCand; i++)
1823
0
  {
1824
0
    Mv mvCand = pcAMVPInfo->mvCand[i];
1825
0
    if( m_pcEncCfg->m_ifpLines )
1826
0
      xClipMvSearch( mvCand, cu.lumaPos(), cu.lumaSize(),*cu.cs->pcv, true );
1827
1828
0
    Distortion uiTmpCost = xGetTemplateCost( cu, origBuf, predBuf, mvCand, i, AMVP_MAX_NUM_CANDS, refPicList, iRefIdx );
1829
0
    if( uiBestCost > uiTmpCost )
1830
0
    {
1831
0
      uiBestCost  = uiTmpCost;
1832
0
      cBestMv     = pcAMVPInfo->mvCand[i];
1833
0
      iBestIdx    = i;
1834
0
      distBiP     = uiTmpCost;
1835
0
    }
1836
0
  }
1837
1838
  // Setting Best MVP
1839
0
  rcMvPred = cBestMv;
1840
0
  cu.mvpIdx[refPicList] = iBestIdx;
1841
0
  cu.mvpNum[refPicList] = pcAMVPInfo->numCand;
1842
1843
0
  return;
1844
0
}
1845
1846
uint32_t InterSearch::xGetMvpIdxBits(int iIdx, int iNum)
1847
52.1k
{
1848
52.1k
  CHECK(iIdx < 0 || iNum < 0 || iIdx >= iNum, "Invalid parameters");
1849
1850
52.1k
  if (iNum == 1)
1851
17.3k
  {
1852
17.3k
    return 0;
1853
17.3k
  }
1854
1855
34.7k
  uint32_t uiLength = 1;
1856
34.7k
  int iTemp = iIdx;
1857
34.7k
  if ( iTemp == 0 )
1858
17.3k
  {
1859
17.3k
    return uiLength;
1860
17.3k
  }
1861
1862
17.3k
  bool bCodeLast = ( iNum-1 > iTemp );
1863
1864
17.3k
  uiLength += (iTemp-1);
1865
1866
17.3k
  if( bCodeLast )
1867
0
  {
1868
0
    uiLength++;
1869
0
  }
1870
1871
17.3k
  return uiLength;
1872
34.7k
}
1873
1874
void InterSearch::xGetBlkBits( bool bPSlice, int iPartIdx, uint32_t uiLastMode, uint32_t uiBlkBit[3])
1875
0
{
1876
0
  uiBlkBit[0] = (! bPSlice) ? 3 : 1;
1877
0
  uiBlkBit[1] = 3;
1878
0
  uiBlkBit[2] = 5;
1879
0
}
1880
1881
void InterSearch::xCopyAMVPInfo (AMVPInfo* pSrc, AMVPInfo* pDst)
1882
0
{
1883
0
  pDst->numCand = pSrc->numCand;
1884
0
  for (int i = 0; i < pSrc->numCand; i++)
1885
0
  {
1886
0
    pDst->mvCand[i] = pSrc->mvCand[i];
1887
0
  }
1888
0
}
1889
1890
void InterSearch::xCheckBestMVP ( RefPicList refPicList, const Mv& cMv, Mv& rcMvPred, int& riMVPIdx, AMVPInfo& amvpInfo, uint32_t& ruiBits, Distortion& ruiCost, const uint8_t imv )
1891
0
{
1892
0
  if ( imv > 0 && imv < 3 )
1893
0
  {
1894
0
    return;
1895
0
  }
1896
1897
0
  AMVPInfo* pcAMVPInfo = &amvpInfo;
1898
1899
0
  CHECK(pcAMVPInfo->mvCand[riMVPIdx] != rcMvPred, "Invalid MV prediction candidate");
1900
1901
0
  if (pcAMVPInfo->numCand < 2)
1902
0
  {
1903
0
    return;
1904
0
  }
1905
1906
0
  m_pcRdCost->setCostScale ( 0    );
1907
1908
0
  int iBestMVPIdx = riMVPIdx;
1909
1910
0
  Mv pred = rcMvPred;
1911
0
  pred.changeTransPrecInternal2Amvr(imv);
1912
0
  m_pcRdCost->setPredictor( pred );
1913
0
  Mv mv = cMv;
1914
0
  mv.changeTransPrecInternal2Amvr(imv);
1915
0
  int iOrgMvBits = m_pcRdCost->getBitsOfVectorWithPredictor(mv.hor, mv.ver, 0);
1916
0
  iOrgMvBits += m_auiMVPIdxCost[riMVPIdx][AMVP_MAX_NUM_CANDS];
1917
0
  int iBestMvBits = iOrgMvBits;
1918
1919
0
  for (int iMVPIdx = 0; iMVPIdx < pcAMVPInfo->numCand; iMVPIdx++)
1920
0
  {
1921
0
    if (iMVPIdx == riMVPIdx)
1922
0
    {
1923
0
      continue;
1924
0
    }
1925
1926
0
    pred = pcAMVPInfo->mvCand[iMVPIdx];
1927
0
    pred.changeTransPrecInternal2Amvr(imv);
1928
0
    m_pcRdCost->setPredictor( pred );
1929
0
    int iMvBits = m_pcRdCost->getBitsOfVectorWithPredictor(mv.hor, mv.ver, 0);
1930
0
    iMvBits += m_auiMVPIdxCost[iMVPIdx][AMVP_MAX_NUM_CANDS];
1931
1932
0
    if (iMvBits < iBestMvBits)
1933
0
    {
1934
0
      iBestMvBits = iMvBits;
1935
0
      iBestMVPIdx = iMVPIdx;
1936
0
    }
1937
0
  }
1938
1939
0
  if (iBestMVPIdx != riMVPIdx)  //if changed
1940
0
  {
1941
0
    rcMvPred = pcAMVPInfo->mvCand[iBestMVPIdx];
1942
1943
0
    riMVPIdx = iBestMVPIdx;
1944
0
    uint32_t uiOrgBits = ruiBits;
1945
0
    ruiBits = uiOrgBits - iOrgMvBits + iBestMvBits;
1946
0
    ruiCost = (ruiCost - m_pcRdCost->getCost( uiOrgBits ))  + m_pcRdCost->getCost( ruiBits );
1947
0
  }
1948
0
}
1949
1950
1951
Distortion InterSearch::xGetTemplateCost( const CodingUnit& cu,
1952
                                          CPelUnitBuf& origBuf,
1953
                                          PelUnitBuf&  predBuf,
1954
                                          Mv           cMvCand,
1955
                                          int          iMVPIdx,
1956
                                          int          iMVPNum,
1957
                                          RefPicList   refPicList,
1958
                                          int          iRefIdx
1959
)
1960
0
{
1961
0
  Distortion uiCost = MAX_DISTORTION;
1962
1963
0
  const Picture* picRef = cu.slice->getRefPic( refPicList, iRefIdx );
1964
0
  clipMv( cMvCand, cu.lumaPos(), cu.lumaSize(), *cu.cs->pcv );
1965
1966
  // prediction pattern
1967
0
  xPredInterBlk( COMP_Y, cu, picRef, cMvCand, predBuf, false, cu.slice->clpRngs[ COMP_Y ], false, false);
1968
1969
  // calc distortion
1970
1971
0
  uiCost = m_pcRdCost->getDistPart(origBuf.Y(), predBuf.Y(), cu.cs->sps->bitDepths[ CH_L ], COMP_Y, DF_SAD);
1972
0
  uiCost += m_pcRdCost->getCost( m_auiMVPIdxCost[iMVPIdx][iMVPNum] );
1973
1974
0
  return uiCost;
1975
0
}
1976
1977
void InterSearch::xMotionEstimation(CodingUnit& cu, CPelUnitBuf& origBuf, RefPicList refPicList, Mv& rcMvPred, int iRefIdxPred, Mv& rcMv, int& riMVPIdx, uint32_t& ruiBits, Distortion& ruiCost, const AMVPInfo& amvpInfo, bool bBi)
1978
0
{
1979
0
  if( cu.cs->sps->BCW && cu.BcwIdx != BCW_DEFAULT && !bBi && xReadBufferedUniMv( cu, refPicList, iRefIdxPred, rcMvPred, rcMv, ruiBits, ruiCost ) )
1980
0
  {
1981
0
    return;
1982
0
  }
1983
1984
0
  Mv cMvHalf, cMvQter;
1985
1986
0
  CHECK(refPicList >= MAX_NUM_REF_LIST_ADAPT_SR || iRefIdxPred>=int(MAX_IDX_ADAPT_SR), "Invalid reference picture list");
1987
0
  m_iSearchRange = m_aaiAdaptSR[refPicList][iRefIdxPred];
1988
1989
0
  int    iSrchRng   = (bBi ? m_bipredSearchRange : m_iSearchRange);
1990
0
  double fWeight    = 1.0;
1991
1992
0
  CPelUnitBuf  origBufTmpCnst;
1993
0
  CPelUnitBuf* pBuf      = &origBuf;
1994
1995
0
  if(bBi) // Bi-predictive ME
1996
0
  {
1997
0
    PelUnitBuf  origBufTmp = m_tmpStorageLCU.getCompactBuf( cu );
1998
    // NOTE: Other buf contains predicted signal from another direction
1999
0
    PelUnitBuf otherBuf = m_tmpPredStorage[1 - (int)refPicList].getCompactBuf( cu );
2000
0
    origBufTmp.copyFrom(origBuf);
2001
0
    origBufTmp.removeHighFreq( otherBuf, m_pcEncCfg->m_bClipForBiPredMeEnabled, cu.slice->clpRngs );
2002
   
2003
0
    origBufTmpCnst = origBufTmp;
2004
0
    pBuf           = &origBufTmpCnst;
2005
0
    fWeight        = xGetMEDistortionWeight( cu.BcwIdx, refPicList );
2006
0
  }
2007
2008
  //  Search key pattern initialization
2009
0
  CPelBuf  tmpPattern   = pBuf->Y();
2010
0
  CPelBuf* pcPatternKey = &tmpPattern;
2011
2012
0
  m_lumaClpRng = cu.cs->slice->clpRngs[ COMP_Y ];
2013
2014
0
  const Picture* refPic = cu.slice->getRefPic(refPicList, iRefIdxPred);
2015
0
  CPelBuf buf = refPic->getRecoBuf(cu.blocks[COMP_Y]);
2016
2017
0
  TZSearchStruct cStruct;
2018
0
  cStruct.pcPatternKey  = pcPatternKey;
2019
0
  cStruct.iRefStride    = buf.stride;
2020
0
  cStruct.piRefY        = buf.buf;
2021
0
  cStruct.imvShift      = cu.imv == IMV_HPEL ? 1 : (cu.imv << 1);
2022
0
  cStruct.useAltHpelIf  = cu.imv == IMV_HPEL;
2023
0
  cStruct.zeroMV        = false;
2024
0
  cStruct.uiBestSad     = MAX_DISTORTION;
2025
2026
2027
0
  CodedCUInfo &relatedCU = m_modeCtrl->getBlkInfo( cu );
2028
2029
0
  bool bQTBTMV = false;
2030
0
  Mv cIntMv;
2031
0
  if( !bBi )
2032
0
  {
2033
0
    bool bValid = relatedCU.getMv( refPicList, iRefIdxPred, cIntMv );
2034
0
    if( bValid )
2035
0
    {
2036
0
      bQTBTMV = true;
2037
0
      cIntMv.changePrecision( MV_PRECISION_INT, MV_PRECISION_INTERNAL);
2038
0
    }
2039
0
  }
2040
2041
0
  Mv predQuarter = rcMvPred;
2042
0
  predQuarter.changePrecision(MV_PRECISION_INTERNAL, MV_PRECISION_QUARTER);
2043
0
  m_pcRdCost->setPredictor( predQuarter );
2044
0
  m_pcRdCost->setCostScale(2);
2045
2046
  //  Do integer search
2047
0
  if( m_motionEstimationSearchMethod == VVENC_MESEARCH_FULL || bBi )
2048
0
  {
2049
0
    cStruct.subShiftMode = m_pcEncCfg->m_fastInterSearchMode == VVENC_FASTINTERSEARCH_MODE1 || m_pcEncCfg->m_fastInterSearchMode == VVENC_FASTINTERSEARCH_MODE3 ? 1 : 0;
2050
0
    m_pcRdCost->setDistParam( m_cDistParam, *cStruct.pcPatternKey, cStruct.piRefY, cStruct.iRefStride, m_lumaClpRng.bd, COMP_Y, cStruct.subShiftMode );
2051
2052
0
    Mv bestInitMv = (bBi ? rcMv : rcMvPred);
2053
0
    Mv cTmpMv     = bestInitMv;
2054
0
    xClipMvSearch(cTmpMv, cu.lumaPos(), cu.lumaSize(), *cu.cs->pcv, m_pcEncCfg->m_ifpLines );
2055
0
    cTmpMv.changePrecision(MV_PRECISION_INTERNAL, MV_PRECISION_INT);
2056
0
    m_cDistParam.cur.buf = cStruct.piRefY + (cTmpMv.ver * cStruct.iRefStride) + cTmpMv.hor;
2057
0
    Distortion uiBestSad = m_cDistParam.distFunc(m_cDistParam);
2058
0
    uiBestSad += m_pcRdCost->getCostOfVectorWithPredictor(cTmpMv.hor, cTmpMv.ver, cStruct.imvShift);
2059
2060
0
    Mv prevMv[BlkUniMvInfoBuffer::m_uniMvListMaxSize];
2061
2062
0
    for( int i = 0; i < m_BlkUniMvInfoBuffer->m_uniMvListSize; i++ )
2063
0
    {
2064
0
      const BlkUniMvInfo* curMvInfo = m_BlkUniMvInfoBuffer->getBlkUniMvInfo( i );
2065
0
      cTmpMv = curMvInfo->uniMvs[refPicList][iRefIdxPred];
2066
0
      prevMv[i] = cTmpMv;
2067
2068
0
      int j = 0;
2069
0
      for( ; j < i; j++ )
2070
0
      {
2071
0
        if( cTmpMv == prevMv[j] )
2072
0
        {
2073
0
          break;
2074
0
        }
2075
0
      }
2076
0
      if( j < i )
2077
0
        continue;
2078
2079
0
      xClipMvSearch(cTmpMv, cu.lumaPos(), cu.lumaSize(), *cu.cs->pcv, m_pcEncCfg->m_ifpLines);
2080
0
      cTmpMv.changePrecision(MV_PRECISION_INTERNAL, MV_PRECISION_INT);
2081
0
      m_cDistParam.cur.buf = cStruct.piRefY + (cTmpMv.ver * cStruct.iRefStride) + cTmpMv.hor;
2082
2083
0
      Distortion uiSad = m_cDistParam.distFunc(m_cDistParam);
2084
0
      uiSad += m_pcRdCost->getCostOfVectorWithPredictor(cTmpMv.hor, cTmpMv.ver, cStruct.imvShift);
2085
0
      if( uiSad < uiBestSad )
2086
0
      {
2087
0
        uiBestSad = uiSad;
2088
0
        bestInitMv = curMvInfo->uniMvs[refPicList][iRefIdxPred];
2089
0
        m_cDistParam.maximumDistortionForEarlyExit = uiSad;
2090
0
      }
2091
0
    }
2092
2093
0
    xSetSearchRange( cu, bestInitMv, iSrchRng, cStruct.searchRange );
2094
0
    xPatternSearch ( cStruct, rcMv, ruiCost);
2095
0
  }
2096
0
  else if( bQTBTMV )
2097
0
  {
2098
0
    rcMv = cIntMv;
2099
0
    cStruct.subShiftMode = ( m_pcEncCfg->m_fastInterSearchMode == VVENC_FASTINTERSEARCH_MODE1 || m_pcEncCfg->m_fastInterSearchMode == VVENC_FASTINTERSEARCH_MODE3 ) ? 1 : 0;
2100
0
    xTZSearch( cu, refPicList, iRefIdxPred, cStruct, rcMv, ruiCost, false, true );
2101
0
  }
2102
0
  else
2103
0
  {
2104
0
    cStruct.subShiftMode = ( m_pcEncCfg->m_fastInterSearchMode == VVENC_FASTINTERSEARCH_MODE1 || m_pcEncCfg->m_fastInterSearchMode == VVENC_FASTINTERSEARCH_MODE3 ) ? 1 : 0;
2105
0
    rcMv = rcMvPred;
2106
0
    xPatternSearchFast(cu, refPicList, iRefIdxPred, cStruct, rcMv, ruiCost );
2107
0
    relatedCU.setMv( refPicList, iRefIdxPred, rcMv );
2108
0
  }
2109
2110
0
  DTRACE( g_trace_ctx, D_ME, "%d %d %d :MECostFPel<L%d,%d>: %d,%d,%dx%d, %d", DTRACE_GET_COUNTER( g_trace_ctx, D_ME ), cu.slice->poc, 0, ( int ) refPicList, ( int ) bBi, cu.Y().x, cu.Y().y, cu.Y().width, cu.Y().height, ruiCost );
2111
  // sub-pel refinement for sub-pel resolution
2112
0
  if ( cu.imv == IMV_OFF || cu.imv == IMV_HPEL )
2113
0
  {
2114
0
    if ( m_pcEncCfg->m_fastSubPel != 2 )
2115
0
    {
2116
0
      xPatternSearchFracDIF( cu, refPicList, iRefIdxPred, cStruct, rcMv, cMvHalf, cMvQter, ruiCost );
2117
0
    }
2118
0
    m_pcRdCost->setCostScale( 0 );
2119
0
    rcMv <<= 2;
2120
0
    rcMv  += ( cMvHalf <<= 1 );
2121
0
    rcMv  += cMvQter;
2122
0
    uint32_t uiMvBits = m_pcRdCost->getBitsOfVectorWithPredictor( rcMv.hor, rcMv.ver, cStruct.imvShift );
2123
0
    ruiBits += uiMvBits;
2124
0
    ruiCost = ( Distortion ) ( floor( fWeight * ( ( double ) ruiCost - ( double ) m_pcRdCost->getCost( uiMvBits ) ) ) + ( double ) m_pcRdCost->getCost( ruiBits ) );
2125
0
    rcMv.changePrecision(MV_PRECISION_QUARTER, MV_PRECISION_INTERNAL);
2126
0
  }
2127
0
  else // integer refinement for integer-pel and 4-pel resolution
2128
0
  {
2129
0
    rcMv.changePrecision(MV_PRECISION_INT, MV_PRECISION_INTERNAL);
2130
0
    xPatternSearchIntRefine( cu, cStruct, rcMv, rcMvPred, riMVPIdx, ruiBits, ruiCost, amvpInfo, fWeight);
2131
0
  }
2132
0
  DTRACE(g_trace_ctx, D_ME, "   MECost<L%d,%d>: %6d (%d)  MV:%d,%d\n", (int)refPicList, (int)bBi, ruiCost, ruiBits, rcMv.hor << 2, rcMv.ver << 2);
2133
0
}
2134
2135
void InterSearch::xClipMvSearch( Mv& rcMv, const Position& pos, const struct Size& size, const PreCalcValues& pcv, const int ifpLines )
2136
0
{
2137
0
  int iMvShift = MV_FRACTIONAL_BITS_INTERNAL;
2138
0
  int iOffset = 8;
2139
0
  int iHorMax = ( pcv.lumaWidth + iOffset - ( int ) pos.x - 1 ) << iMvShift;
2140
0
  int iHorMin = ( -( int ) pcv.maxCUSize   - iOffset - ( int ) pos.x + 1 ) * (1 << iMvShift);
2141
2142
0
  int maxLumaHeight = ifpLines && ((pos.y >> pcv.maxCUSizeLog2) + ifpLines + 1 < pcv.heightInCtus) ? 
2143
    
2144
0
    (((pos.y >> pcv.maxCUSizeLog2) + ifpLines + 1) << pcv.maxCUSizeLog2 ) - size.height - 4  // 4 samples from DCTIF vertical bottom part
2145
2146
0
    : pcv.lumaHeight + iOffset;
2147
2148
0
  int iVerMax = ( maxLumaHeight - ( int ) pos.y - 1 ) << iMvShift;
2149
0
  int iVerMin = ( -( int ) pcv.maxCUSize   - iOffset - ( int ) pos.y + 1 ) * (1 << iMvShift);
2150
2151
0
  rcMv.hor = ( std::min( iHorMax, std::max( iHorMin, rcMv.hor ) ) );
2152
0
  rcMv.ver = ( std::min( iVerMax, std::max( iVerMin, rcMv.ver ) ) );
2153
0
}
2154
2155
void InterSearch::xClipMvToFppLine( Mv& mv, const int yB, const int nH, const int ifpLines, const PreCalcValues& pcv )
2156
0
{
2157
0
  const int yCompScale = 0;
2158
0
  const int mvPrecShift = MV_FRACTIONAL_BITS_INTERNAL;
2159
0
  const int ctuLogScale = pcv.maxCUSizeLog2 - yCompScale;
2160
0
  const int yRefMax     = ( ( ( yB >> ctuLogScale ) + ifpLines + 1 ) << ctuLogScale ) - 1;
2161
0
  const int yRefMv      = yB + nH + ( 4 >> yCompScale ) + (mv.ver >> mvPrecShift) - 1;
2162
0
  CHECKD( yRefMv <= yRefMax, "Not expected" );
2163
0
  mv.ver -= ( yRefMv - yRefMax ) << mvPrecShift;
2164
0
}
2165
2166
void InterSearch::xCheckAndClipMvToFppLine( Mv& mv, const int yB, const int nH, const int ifpLines, const PreCalcValues& pcv )
2167
0
{
2168
0
  const int yCompScale  = 0;
2169
0
  const int mvPrecShift = MV_FRACTIONAL_BITS_INTERNAL;
2170
0
  const int ctuLogScale = pcv.maxCUSizeLog2 - yCompScale;
2171
0
  const int yBMax       = ( pcv.heightInCtus - 1 - ifpLines ) << ctuLogScale;
2172
0
  if( yB < yBMax )
2173
0
  {
2174
0
    const int yRefMax = ( ( ( yB >> ctuLogScale ) + ifpLines + 1 ) << ctuLogScale ) - 1;
2175
0
    const int yRefMv  = yB + nH + ( 4 >> yCompScale ) + (mv.ver >> mvPrecShift) - 1;
2176
0
    if( yRefMv > yRefMax )
2177
0
    {
2178
      // clip MV
2179
0
      mv.ver -= (yRefMv - yRefMax) << mvPrecShift;
2180
0
    }
2181
0
  }
2182
0
}
2183
2184
void InterSearch::xSetSearchRange ( const CodingUnit& cu,
2185
                                    const Mv& cMvPred,
2186
                                    const int iSrchRng,
2187
                                    SearchRange& sr )
2188
0
{
2189
0
  const PreCalcValues& pcv = *cu.cs->pcv;
2190
0
  const int iMvShift = MV_FRACTIONAL_BITS_INTERNAL;
2191
0
  Mv cFPMvPred = cMvPred;
2192
0
  clipMv( cFPMvPred, cu.lumaPos(), cu.lumaSize(), pcv );
2193
2194
0
  Mv mvTL(cFPMvPred.hor - (iSrchRng << iMvShift), cFPMvPred.ver - (iSrchRng << iMvShift));
2195
0
  Mv mvBR(cFPMvPred.hor + (iSrchRng << iMvShift), cFPMvPred.ver + (iSrchRng << iMvShift));
2196
2197
0
  clipMv( mvTL, cu.lumaPos(), cu.lumaSize(), pcv);
2198
0
  xClipMvSearch( mvBR, cu.lumaPos(), cu.lumaSize(), pcv, m_pcEncCfg->m_ifpLines );
2199
2200
0
  mvTL.divideByPowerOf2( iMvShift );
2201
0
  mvBR.divideByPowerOf2( iMvShift );
2202
2203
0
  sr.left   = mvTL.hor;
2204
0
  sr.top    = mvTL.ver;
2205
0
  sr.right  = mvBR.hor;
2206
0
  sr.bottom = mvBR.ver;
2207
0
}
2208
2209
2210
void InterSearch::xPatternSearch( TZSearchStruct&  cStruct,
2211
                                  Mv&                 rcMv,
2212
                                  Distortion&         ruiSAD )
2213
0
{
2214
0
  Distortion  uiSad;
2215
0
  Distortion  uiSadBest = MAX_DISTORTION;
2216
0
  int         iBestX = 0;
2217
0
  int         iBestY = 0;
2218
2219
  //-- jclee for using the SAD function pointer
2220
0
  m_pcRdCost->setDistParam( m_cDistParam, *cStruct.pcPatternKey, cStruct.piRefY, cStruct.iRefStride, m_lumaClpRng.bd, COMP_Y, cStruct.subShiftMode );
2221
2222
0
  const SearchRange& sr = cStruct.searchRange;
2223
2224
0
  const Pel* piRef = cStruct.piRefY + (sr.top * cStruct.iRefStride);
2225
0
  for ( int y = sr.top; y <= sr.bottom; y++ )
2226
0
  {
2227
0
    for ( int x = sr.left; x <= sr.right; x++ )
2228
0
    {
2229
      //  find min. distortion position
2230
0
      m_cDistParam.cur.buf = piRef + x;
2231
2232
0
      uiSad = m_cDistParam.distFunc( m_cDistParam );
2233
2234
      // motion cost
2235
0
      uiSad += m_pcRdCost->getCostOfVectorWithPredictor( x, y, cStruct.imvShift );
2236
2237
0
      if ( uiSad < uiSadBest )
2238
0
      {
2239
0
        uiSadBest = uiSad;
2240
0
        iBestX    = x;
2241
0
        iBestY    = y;
2242
0
        m_cDistParam.maximumDistortionForEarlyExit = uiSad;
2243
0
      }
2244
0
    }
2245
0
    piRef += cStruct.iRefStride;
2246
0
  }
2247
0
  rcMv.set( iBestX, iBestY );
2248
2249
0
  cStruct.uiBestSad = uiSadBest; // th for testing
2250
0
  ruiSAD = uiSadBest - m_pcRdCost->getCostOfVectorWithPredictor( iBestX, iBestY, cStruct.imvShift );
2251
0
  return;
2252
0
}
2253
2254
2255
void InterSearch::xPatternSearchFast( const CodingUnit& cu,
2256
                                      RefPicList            refPicList,
2257
                                      int                   iRefIdxPred,
2258
                                      TZSearchStruct&       cStruct,
2259
                                      Mv&                   rcMv,
2260
                                      Distortion&           ruiSAD )
2261
0
{
2262
0
  if( cu.cs->picture->useME )
2263
0
  {
2264
0
    switch ( m_motionEstimationSearchMethodSCC )
2265
0
    {
2266
0
      case 3: //VVENC_MESEARCH_DIAMOND_FAST:
2267
0
        xTZSearch( cu, refPicList, iRefIdxPred, cStruct, rcMv, ruiSAD, true, true );
2268
0
        break;
2269
0
      case 2: //VVENC_MESEARCH_DIAMOND:
2270
0
        xTZSearch( cu, refPicList, iRefIdxPred, cStruct, rcMv, ruiSAD, true );
2271
0
        break;
2272
0
      default:
2273
0
        THROW("shouldn't get here");
2274
0
        break;
2275
0
    }
2276
0
    return;
2277
0
  }
2278
2279
0
  switch ( m_motionEstimationSearchMethod )
2280
0
  {
2281
0
    case VVENC_MESEARCH_DIAMOND_FAST:
2282
0
      xTZSearch         ( cu, refPicList, iRefIdxPred, cStruct, rcMv, ruiSAD, false, true );
2283
0
      break;
2284
0
    case VVENC_MESEARCH_DIAMOND:
2285
0
      xTZSearch         ( cu, refPicList, iRefIdxPred, cStruct, rcMv, ruiSAD, false );
2286
0
      break;
2287
0
    case VVENC_MESEARCH_DIAMOND_ENHANCED:
2288
0
      xTZSearch         ( cu, refPicList, iRefIdxPred, cStruct, rcMv, ruiSAD, true );
2289
0
      break;
2290
0
    case VVENC_MESEARCH_FULL:
2291
0
    default:
2292
0
      THROW("shouldn't get here");
2293
0
      break;
2294
0
  }
2295
0
}
2296
2297
2298
void InterSearch::xTZSearch( const CodingUnit& cu,
2299
                             RefPicList            refPicList,
2300
                             int                   iRefIdxPred,
2301
                             TZSearchStruct&       cStruct,
2302
                             Mv&                   rcMv,
2303
                             Distortion&           ruiSAD,
2304
                             const bool            bExtendedSettings,
2305
                             const bool            bFastSettings)
2306
0
{
2307
0
  const bool bUseRasterInFastMode                    = true; //toggle this to further reduce runtime
2308
0
  const bool bUseAdaptiveRaster                      = bExtendedSettings;
2309
0
  const int  iRaster                                 = (bFastSettings && bUseRasterInFastMode) ? 8 : 5;
2310
0
  const bool bTestZeroVector                         = true && !bFastSettings;
2311
0
  const bool bTestZeroVectorStart                    = bExtendedSettings;
2312
0
  const bool bTestZeroVectorStop                     = false;
2313
0
  const bool bFirstSearchDiamond                     = true;  // 1 = xTZ8PointDiamondSearch   0 = xTZ8PointSquareSearch
2314
0
  const bool bFirstCornersForDiamondDist1            = bExtendedSettings;
2315
0
  const bool bFirstSearchStop                        = m_pcEncCfg->m_bFastMEAssumingSmootherMVEnabled;
2316
0
  const uint32_t uiFirstSearchRounds                 = bFastSettings ? (bUseRasterInFastMode?3:2) : 3;     // first search stop X rounds after best match (must be >=1)
2317
0
  const bool bEnableRasterSearch                     = bFastSettings ? bUseRasterInFastMode : true;
2318
0
  const bool bAlwaysRasterSearch                     = bExtendedSettings;  // true: BETTER but factor 2 slower
2319
0
  const bool bRasterRefinementEnable                 = false; // enable either raster refinement or star refinement
2320
0
  const bool bRasterRefinementDiamond                = false; // 1 = xTZ8PointDiamondSearch   0 = xTZ8PointSquareSearch
2321
0
  const bool bRasterRefinementCornersForDiamondDist1 = bExtendedSettings;
2322
0
  const bool bStarRefinementEnable                   = true;  // enable either star refinement or raster refinement
2323
0
  const bool bStarRefinementDiamond                  = true;  // 1 = xTZ8PointDiamondSearch   0 = xTZ8PointSquareSearch
2324
0
  const bool bStarRefinementCornersForDiamondDist1   = bExtendedSettings;
2325
0
  const bool bStarRefinementStop                     = bFastSettings;
2326
0
  const uint32_t uiStarRefinementRounds              = 2;  // star refinement stop X rounds after best match (must be >=1)
2327
0
  const bool bNewZeroNeighbourhoodTest               = bExtendedSettings;
2328
2329
0
  int iSearchRange = m_iSearchRange;
2330
0
  xClipMvSearch( rcMv, cu.lumaPos(), cu.lumaSize(),*cu.cs->pcv, m_pcEncCfg->m_ifpLines );
2331
0
  rcMv.changePrecision(MV_PRECISION_INTERNAL, MV_PRECISION_QUARTER);
2332
0
  rcMv.divideByPowerOf2(2);
2333
2334
  //
2335
0
  m_cDistParam.maximumDistortionForEarlyExit = cStruct.uiBestSad;
2336
0
  m_pcRdCost->setDistParam( m_cDistParam, *cStruct.pcPatternKey, cStruct.piRefY, cStruct.iRefStride, m_lumaClpRng.bd, COMP_Y, cStruct.subShiftMode );
2337
2338
  // set rcMv (Median predictor) as start point and as best point
2339
0
  xTZSearchHelp( cStruct, rcMv.hor, rcMv.ver, 0, 0 );
2340
2341
  // test whether zero Mv is better start point than Median predictor
2342
0
  if ( bTestZeroVector )
2343
0
  {
2344
0
    if( ( rcMv.hor != 0 || rcMv.ver != 0 ) && ( 0 != cStruct.iBestX || 0 != cStruct.iBestY ) )
2345
0
    {
2346
      // only test 0-vector if not obviously previously tested.
2347
0
      xTZSearchHelp( cStruct, 0, 0, 0, 0 );
2348
0
    }
2349
0
  }
2350
2351
0
  SearchRange& sr = cStruct.searchRange;
2352
2353
0
  for (int i = 0; i < m_BlkUniMvInfoBuffer->m_uniMvListSize; i++)
2354
0
  {
2355
0
    const BlkUniMvInfo* curMvInfo = m_BlkUniMvInfoBuffer->getBlkUniMvInfo(i);
2356
0
    Mv cTmpMv = curMvInfo->uniMvs[refPicList][iRefIdxPred];
2357
2358
0
    xClipMvSearch(cTmpMv, cu.lumaPos(), cu.lumaSize(), *cu.cs->pcv, m_pcEncCfg->m_ifpLines);
2359
0
    cTmpMv.changePrecision(MV_PRECISION_INTERNAL, MV_PRECISION_INT);
2360
0
    m_cDistParam.cur.buf = cStruct.piRefY + (cTmpMv.ver * cStruct.iRefStride) + cTmpMv.hor;
2361
2362
0
    Distortion uiSad = m_cDistParam.distFunc(m_cDistParam);
2363
0
    uiSad += m_pcRdCost->getCostOfVectorWithPredictor(cTmpMv.hor, cTmpMv.ver, cStruct.imvShift);
2364
0
    if (uiSad < cStruct.uiBestSad)
2365
0
    {
2366
0
      cStruct.uiBestSad = uiSad;
2367
0
      cStruct.iBestX = cTmpMv.hor;
2368
0
      cStruct.iBestY = cTmpMv.ver;
2369
0
      m_cDistParam.maximumDistortionForEarlyExit = uiSad;
2370
0
    }
2371
0
  }
2372
2373
0
  {
2374
    // set search range
2375
0
    Mv currBestMv(cStruct.iBestX, cStruct.iBestY );
2376
0
    currBestMv <<= MV_FRACTIONAL_BITS_INTERNAL;
2377
0
    xSetSearchRange(cu, currBestMv, m_iSearchRange >> (bFastSettings ? 1 : 0), sr );
2378
0
  }
2379
2380
  // starting point after initial examination
2381
0
  int  iDist = 0;
2382
0
  int  iStartX = cStruct.iBestX;
2383
0
  int  iStartY = cStruct.iBestY;
2384
2385
  // Early termination of motion search after selection of starting candidate
2386
0
  if( m_pcEncCfg->m_bIntegerET )
2387
0
  {
2388
0
    bool isLargeBlock = cu.lumaSize().area() > 64;
2389
0
    xTZ8PointDiamondSearch( cStruct, iStartX, iStartY, 1, false ); // 4-point small diamond search
2390
0
    if ( cStruct.iBestX == iStartX && cStruct.iBestY == iStartY )
2391
0
    {
2392
0
      if ( isLargeBlock )
2393
0
      {
2394
0
        xTZ4PointSquareSearch( cStruct, iStartX, iStartY, 1 );
2395
0
        if ( cStruct.iBestX == iStartX && cStruct.iBestY == iStartY )
2396
0
        {
2397
          // write out best match
2398
0
          rcMv.set( cStruct.iBestX, cStruct.iBestY );
2399
0
          ruiSAD = cStruct.uiBestSad - m_pcRdCost->getCostOfVectorWithPredictor( cStruct.iBestX, cStruct.iBestY, cStruct.imvShift );
2400
0
          return;
2401
0
        }
2402
0
      }
2403
0
      else
2404
0
      {
2405
        // write out best match
2406
0
        rcMv.set( cStruct.iBestX, cStruct.iBestY );
2407
0
        ruiSAD = cStruct.uiBestSad - m_pcRdCost->getCostOfVectorWithPredictor( cStruct.iBestX, cStruct.iBestY, cStruct.imvShift );
2408
0
        return;
2409
0
      }
2410
0
    }
2411
0
  }
2412
2413
  // start search
2414
0
  iDist = 0;
2415
0
  iStartX = cStruct.iBestX;
2416
0
  iStartY = cStruct.iBestY;
2417
2418
0
  const bool bBestCandidateZero = ( cStruct.iBestX == 0 ) && ( cStruct.iBestY == 0 );
2419
2420
  // first search around best position up to now.
2421
  // The following works as a "subsampled/log" window search around the best candidate
2422
0
  for( iDist = 1; iDist <= iSearchRange; iDist *= 2 )
2423
0
  {
2424
0
    if( bFirstSearchDiamond == 1 )
2425
0
    {
2426
0
      xTZ8PointDiamondSearch( cStruct, iStartX, iStartY, iDist, bFirstCornersForDiamondDist1 );
2427
0
    }
2428
0
    else
2429
0
    {
2430
0
      xTZ8PointSquareSearch( cStruct, iStartX, iStartY, iDist );
2431
0
    }
2432
2433
0
    if( bFirstSearchStop && ( cStruct.uiBestRound >= uiFirstSearchRounds ) ) // stop criterion
2434
0
    {
2435
0
      break;
2436
0
    }
2437
0
  }
2438
2439
0
  if( bNewZeroNeighbourhoodTest )
2440
0
  {
2441
0
    if( bTestZeroVectorStart && !bBestCandidateZero )
2442
0
    {
2443
0
      for( iDist = 1; iDist <= ( iSearchRange >> 1 ); iDist *= 2 )
2444
0
      {
2445
0
        xTZ8PointDiamondSearch( cStruct, 0, 0, iDist, false );
2446
0
        if( bTestZeroVectorStop && ( cStruct.uiBestRound > 2 ) ) // stop criterion
2447
0
        {
2448
0
          break;
2449
0
        }
2450
0
      }
2451
0
    }
2452
0
  }
2453
2454
  // calculate only 2 missing points instead 8 points if cStruct.uiBestDistance == 1
2455
0
  if ( cStruct.uiBestDistance == 1 )
2456
0
  {
2457
0
    cStruct.uiBestDistance = 0;
2458
0
    xTZ2PointSearch( cStruct );
2459
0
  }
2460
2461
  // raster search if distance is too big
2462
0
  if( bUseAdaptiveRaster )
2463
0
  {
2464
0
    int iWindowSize     = iRaster;
2465
0
    SearchRange localsr = sr;
2466
2467
0
    if( !( bEnableRasterSearch && ( ( ( int ) ( cStruct.uiBestDistance ) >= iRaster ) ) ) )
2468
0
    {
2469
0
      iWindowSize++;
2470
0
      localsr.left    /= 2;
2471
0
      localsr.right   /= 2;
2472
0
      localsr.top     /= 2;
2473
0
      localsr.bottom  /= 2;
2474
0
    }
2475
2476
0
    cStruct.uiBestDistance = iWindowSize;
2477
2478
0
    for( iStartY = localsr.top; iStartY <= localsr.bottom; iStartY += iWindowSize )
2479
0
    {
2480
0
      for( iStartX = localsr.left; iStartX <= localsr.right; iStartX += iWindowSize )
2481
0
      {
2482
0
        xTZSearchHelp( cStruct, iStartX, iStartY, 0, iWindowSize );
2483
0
      }
2484
0
    }
2485
0
  }
2486
0
  else
2487
0
  {
2488
0
    if( bEnableRasterSearch && ( ( ( int ) ( cStruct.uiBestDistance ) >= iRaster ) || bAlwaysRasterSearch ) )
2489
0
    {
2490
0
      cStruct.uiBestDistance = iRaster;
2491
2492
0
      for( iStartY = sr.top; iStartY <= sr.bottom; iStartY += iRaster )
2493
0
      {
2494
0
        for( iStartX = sr.left; iStartX <= sr.right; iStartX += iRaster )
2495
0
        {
2496
0
          xTZSearchHelp( cStruct, iStartX, iStartY, 0, iRaster );
2497
0
        }
2498
0
      }
2499
0
    }
2500
0
  }
2501
2502
  // raster refinement
2503
2504
0
  if ( bRasterRefinementEnable && cStruct.uiBestDistance > 0 )
2505
0
  {
2506
0
    while ( cStruct.uiBestDistance > 0 )
2507
0
    {
2508
0
      iStartX = cStruct.iBestX;
2509
0
      iStartY = cStruct.iBestY;
2510
0
      if ( cStruct.uiBestDistance > 1 )
2511
0
      {
2512
0
        iDist = cStruct.uiBestDistance >>= 1;
2513
0
        if ( bRasterRefinementDiamond == 1 )
2514
0
        {
2515
0
          xTZ8PointDiamondSearch ( cStruct, iStartX, iStartY, iDist, bRasterRefinementCornersForDiamondDist1 );
2516
0
        }
2517
0
        else
2518
0
        {
2519
0
          xTZ8PointSquareSearch  ( cStruct, iStartX, iStartY, iDist );
2520
0
        }
2521
0
      }
2522
2523
      // calculate only 2 missing points instead 8 points if cStruct.uiBestDistance == 1
2524
0
      if ( cStruct.uiBestDistance == 1 )
2525
0
      {
2526
0
        cStruct.uiBestDistance = 0;
2527
0
        if ( cStruct.ucPointNr != 0 )
2528
0
        {
2529
0
          xTZ2PointSearch( cStruct );
2530
0
        }
2531
0
      }
2532
0
    }
2533
0
  }
2534
2535
  // star refinement
2536
0
  if ( bStarRefinementEnable && cStruct.uiBestDistance > 0 )
2537
0
  {
2538
0
    while ( cStruct.uiBestDistance > 0 )
2539
0
    {
2540
0
      iStartX = cStruct.iBestX;
2541
0
      iStartY = cStruct.iBestY;
2542
0
      cStruct.uiBestDistance = 0;
2543
0
      cStruct.ucPointNr = 0;
2544
0
      for ( iDist = 1; iDist < iSearchRange + 1; iDist*=2 )
2545
0
      {
2546
0
        if ( bStarRefinementDiamond == 1 )
2547
0
        {
2548
0
          xTZ8PointDiamondSearch ( cStruct, iStartX, iStartY, iDist, bStarRefinementCornersForDiamondDist1 );
2549
0
        }
2550
0
        else
2551
0
        {
2552
0
          xTZ8PointSquareSearch  ( cStruct, iStartX, iStartY, iDist );
2553
0
        }
2554
0
        if ( bStarRefinementStop && (cStruct.uiBestRound >= uiStarRefinementRounds) ) // stop criterion
2555
0
        {
2556
0
          break;
2557
0
        }
2558
0
      }
2559
2560
      // calculate only 2 missing points instead 8 points if cStrukt.uiBestDistance == 1
2561
0
      if ( cStruct.uiBestDistance == 1 )
2562
0
      {
2563
0
        cStruct.uiBestDistance = 0;
2564
0
        if ( cStruct.ucPointNr != 0 )
2565
0
        {
2566
0
          xTZ2PointSearch( cStruct );
2567
0
        }
2568
0
      }
2569
0
    }
2570
0
  }
2571
2572
  // write out best match
2573
0
  rcMv.set( cStruct.iBestX, cStruct.iBestY );
2574
0
  ruiSAD = cStruct.uiBestSad - m_pcRdCost->getCostOfVectorWithPredictor( cStruct.iBestX, cStruct.iBestY, cStruct.imvShift );
2575
0
}
2576
2577
void InterSearch::xPatternSearchIntRefine(CodingUnit& cu, TZSearchStruct&  cStruct, Mv& rcMv, Mv& rcMvPred, int& riMVPIdx, uint32_t& ruiBits, Distortion& ruiCost, const AMVPInfo& amvpInfo, double fWeight)
2578
0
{
2579
2580
0
  CHECK( cu.imv == IMV_OFF || cu.imv == IMV_HPEL , "xPatternSearchIntRefine(): Sub-pel MV used.");
2581
0
  CHECK( amvpInfo.mvCand[riMVPIdx] != rcMvPred, "xPatternSearchIntRefine(): MvPred issue.");
2582
2583
0
  m_pcRdCost->setDistParam(m_cDistParam, *cStruct.pcPatternKey, cStruct.piRefY, cStruct.iRefStride, m_lumaClpRng.bd, COMP_Y, 0, m_pcEncCfg->m_bUseHADME ? ( m_pcEncCfg->m_fastHad ? 2 : 1 ) : 0 );
2584
2585
  // -> set MV scale for cost calculation to QPEL (0)
2586
0
  m_pcRdCost->setCostScale ( 0 );
2587
2588
0
  Distortion  uiDist, uiSATD = 0;
2589
0
  Distortion  uiBestDist  = MAX_DISTORTION;
2590
  // subtract old MVP costs because costs for all newly tested MVPs are added in here
2591
0
  ruiBits -= m_auiMVPIdxCost[riMVPIdx][AMVP_MAX_NUM_CANDS];
2592
2593
0
  Mv cBestMv = rcMv;
2594
0
  Mv cBaseMvd[2];
2595
0
  int iBestBits = 0;
2596
0
  int iBestMVPIdx = riMVPIdx;
2597
0
  Mv testPos[9] = { { 0, 0}, { -1, -1},{ -1, 0},{ -1, 1},{ 0, -1},{ 0, 1},{ 1, -1},{ 1, 0},{ 1, 1} };
2598
2599
2600
0
  cBaseMvd[0] = (rcMv - amvpInfo.mvCand[0]);
2601
0
  cBaseMvd[1] = (rcMv - amvpInfo.mvCand[1]);
2602
0
  CHECK( (cBaseMvd[0].hor & 0x03) != 0 || (cBaseMvd[0].ver & 0x03) != 0 , "xPatternSearchIntRefine(): AMVP cand 0 Mvd issue.");
2603
0
  CHECK( (cBaseMvd[1].hor & 0x03) != 0 || (cBaseMvd[1].ver & 0x03) != 0 , "xPatternSearchIntRefine(): AMVP cand 1 Mvd issue.");
2604
2605
0
  cBaseMvd[0].roundTransPrecInternal2Amvr(cu.imv);
2606
0
  cBaseMvd[1].roundTransPrecInternal2Amvr(cu.imv);
2607
2608
  // test best integer position and all 8 neighboring positions
2609
0
  for (int pos = 0; pos < 9; pos ++)
2610
0
  {
2611
0
    Mv cTestMv[2];
2612
    // test both AMVP candidates for each position
2613
0
    for (int iMVPIdx = 0; iMVPIdx < amvpInfo.numCand; iMVPIdx++)
2614
0
    {
2615
0
      cTestMv[iMVPIdx] = testPos[pos];
2616
0
      cTestMv[iMVPIdx].changeTransPrecAmvr2Internal(cu.imv);
2617
0
      cTestMv[iMVPIdx] += cBaseMvd[iMVPIdx];
2618
0
      cTestMv[iMVPIdx] += amvpInfo.mvCand[iMVPIdx];
2619
2620
0
      if( m_pcEncCfg->m_ifpLines && !CU::isMvInRangeFPP( cu.ly(), cu.lheight(), cTestMv[iMVPIdx].ver, m_pcEncCfg->m_ifpLines, *cu.cs->pcv ) )
2621
0
      {
2622
0
        xClipMvToFppLine( cTestMv[iMVPIdx], cu.ly(), cu.lheight(), m_pcEncCfg->m_ifpLines, *cu.cs->pcv );
2623
0
        cTestMv[iMVPIdx].roundTransPrecInternal2AmvrVertical(cu.imv);
2624
0
      }
2625
2626
0
      if ( iMVPIdx == 0 || cTestMv[0] != cTestMv[1])
2627
0
      {
2628
0
        Mv cTempMV = cTestMv[iMVPIdx];
2629
0
        clipMv(cTempMV, cu.lumaPos(), cu.lumaSize(), *cu.cs->pcv);
2630
0
        m_cDistParam.cur.buf = cStruct.piRefY  + cStruct.iRefStride * (cTempMV.ver >>  MV_FRACTIONAL_BITS_INTERNAL) + (cTempMV.hor >> MV_FRACTIONAL_BITS_INTERNAL);
2631
0
        uiDist = uiSATD = (Distortion) (m_cDistParam.distFunc( m_cDistParam ) * fWeight);
2632
0
      }
2633
0
      else
2634
0
      {
2635
0
        uiDist = uiSATD;
2636
0
      }
2637
2638
0
      int iMvBits = m_auiMVPIdxCost[iMVPIdx][AMVP_MAX_NUM_CANDS];
2639
0
      Mv pred = amvpInfo.mvCand[iMVPIdx];
2640
0
      pred.changeTransPrecInternal2Amvr(cu.imv);
2641
0
      m_pcRdCost->setPredictor( pred );
2642
0
      Mv mv = cTestMv[iMVPIdx];
2643
0
      mv.changeTransPrecInternal2Amvr(cu.imv);
2644
0
      iMvBits += m_pcRdCost->getBitsOfVectorWithPredictor( mv.hor, mv.ver, 0 );
2645
0
      uiDist += m_pcRdCost->getCost(iMvBits);
2646
2647
0
      if (uiDist < uiBestDist)
2648
0
      {
2649
0
        uiBestDist = uiDist;
2650
0
        cBestMv = cTestMv[iMVPIdx];
2651
0
        iBestMVPIdx = iMVPIdx;
2652
0
        iBestBits = iMvBits;
2653
0
      }
2654
0
    }
2655
0
  }
2656
0
  if( uiBestDist == MAX_DISTORTION )
2657
0
  {
2658
0
    ruiCost = MAX_DISTORTION;
2659
0
    return;
2660
0
  }
2661
2662
0
  rcMv = cBestMv;
2663
0
  rcMvPred = amvpInfo.mvCand[iBestMVPIdx];
2664
0
  riMVPIdx = iBestMVPIdx;
2665
0
  m_pcRdCost->setPredictor( rcMvPred );
2666
2667
0
  ruiBits += iBestBits;
2668
  // taken from JEM 5.0
2669
  // verify since it makes no sence to subtract Lamda*(Rmvd+Rmvpidx) from D+Lamda(Rmvd)
2670
  // this would take the rate for the MVP idx out of the cost calculation
2671
  // however this rate is always 1 so impact is small
2672
0
  ruiCost = uiBestDist - m_pcRdCost->getCost(iBestBits) + m_pcRdCost->getCost(ruiBits);
2673
  // taken from JEM 5.0
2674
  // verify since it makes no sense to add rate for MVDs twicce
2675
2676
0
  return;
2677
0
}
2678
2679
void InterSearch::xPatternSearchFracDIF(
2680
  const CodingUnit& cu,
2681
  RefPicList            refPicList,
2682
  int                   iRefIdx,
2683
  TZSearchStruct&    cStruct,
2684
  const Mv&             rcMvInt,
2685
  Mv&                   rcMvHalf,
2686
  Mv&                   rcMvQter,
2687
  Distortion&           ruiCost
2688
)
2689
0
{
2690
0
  PROFILER_SCOPE_AND_STAGE( 0, _TPROF, P_FRAC_PEL );
2691
2692
  //  Reference pattern initialization (integer scale)
2693
0
  int         iOffset    = rcMvInt.hor + rcMvInt.ver * cStruct.iRefStride;
2694
0
  CPelBuf cPatternRoi(cStruct.piRefY + iOffset, cStruct.iRefStride, *cStruct.pcPatternKey);
2695
2696
  //  Half-pel refinement
2697
0
  m_pcRdCost->setCostScale(1);
2698
0
  if( 0 == m_pcEncCfg->m_fastSubPel )
2699
0
  {
2700
0
    xExtDIFUpSamplingH( &cPatternRoi, cStruct.useAltHpelIf );
2701
0
  }
2702
2703
0
  rcMvHalf = rcMvInt;   rcMvHalf <<= 1;    // for mv-cost
2704
0
  Mv baseRefMv(0, 0);
2705
0
  Distortion  uiDistBest = MAX_DISTORTION;
2706
0
  int patternId = 41;
2707
0
  ruiCost = xPatternRefinement( cStruct.pcPatternKey, baseRefMv, 2, rcMvHalf, uiDistBest, patternId, &cPatternRoi, cStruct.useAltHpelIf );
2708
0
  patternId -= ( m_pcEncCfg->m_fastSubPel == 1 ? 41 : 0 );
2709
2710
2711
  //  quarter-pel refinement
2712
0
  if( cStruct.imvShift == IMV_OFF && 0 != patternId )
2713
0
  {
2714
0
    PROFILER_SCOPE_AND_STAGE( 0, _TPROF, P_QPEL );
2715
0
    m_pcRdCost->setCostScale( 0 );
2716
0
    xExtDIFUpSamplingQ( &cPatternRoi, rcMvHalf, patternId );
2717
0
    baseRefMv = rcMvHalf;
2718
0
    baseRefMv <<= 1;
2719
2720
0
    rcMvQter = rcMvInt;    rcMvQter <<= 1;    // for mv-cost
2721
0
    rcMvQter += rcMvHalf;  rcMvQter <<= 1;
2722
0
    ruiCost = xPatternRefinement( cStruct.pcPatternKey, baseRefMv, 1, rcMvQter, uiDistBest, patternId, &cPatternRoi, cStruct.useAltHpelIf );
2723
0
  }
2724
2725
0
}
2726
2727
Distortion InterSearch::xGetSymCost( const CodingUnit& cu, CPelUnitBuf& origBuf, RefPicList eCurRefPicList, const MvField& cCurMvField, MvField& cTarMvField, int BcwIdx )
2728
0
{
2729
0
  Distortion cost = MAX_DISTORTION;
2730
0
  RefPicList eTarRefPicList = (RefPicList)(1 - (int)eCurRefPicList);
2731
2732
  // get prediction of eCurRefPicList
2733
0
  PelUnitBuf  predBufA  = m_tmpPredStorage[eCurRefPicList].getCompactBuf( cu );
2734
0
  const Picture* picRefA = cu.slice->getRefPic( eCurRefPicList, cCurMvField.refIdx );
2735
0
  Mv mvA = cCurMvField.mv;
2736
0
  clipMv( mvA, cu.lumaPos(), cu.lumaSize(), *cu.cs->pcv );
2737
0
  xPredInterBlk( COMP_Y, cu, picRefA, mvA, predBufA, false, cu.slice->clpRngs[ COMP_Y ], false, false );
2738
2739
  // get prediction of eTarRefPicList
2740
0
  PelUnitBuf predBufB = m_tmpPredStorage[eTarRefPicList].getCompactBuf( cu );
2741
0
  const Picture* picRefB = cu.slice->getRefPic( eTarRefPicList, cTarMvField.refIdx );
2742
0
  Mv mvB = cTarMvField.mv;
2743
0
  clipMv( mvB, cu.lumaPos(), cu.lumaSize(), *cu.cs->pcv );
2744
0
  xPredInterBlk( COMP_Y, cu, picRefB, mvB, predBufB, false, cu.slice->clpRngs[ COMP_Y ], false, false );
2745
2746
0
  PelUnitBuf bufTmp = m_tmpStorageLCU.getCompactBuf( cu );
2747
0
  bufTmp.copyFrom( origBuf );
2748
0
  bufTmp.removeHighFreq( predBufA, m_pcEncCfg->m_bClipForBiPredMeEnabled, cu.slice->clpRngs/*, getBcwWeight( cu.BcwIdx, eTarRefPicList )*/ );
2749
0
  double fWeight = xGetMEDistortionWeight( cu.BcwIdx, eTarRefPicList );
2750
2751
  // calc distortion
2752
0
  cost = ( Distortion ) floor( fWeight * ( double ) m_pcRdCost->getDistPart( bufTmp.Y(), predBufB.Y(), cu.cs->sps->bitDepths[ CH_L ], COMP_Y, DF_HAD ) );
2753
2754
0
  return(cost);
2755
0
}
2756
2757
Distortion InterSearch::xSymRefineMvSearch( CodingUnit& cu, CPelUnitBuf& origBuf, Mv& rcMvCurPred, Mv& rcMvTarPred, RefPicList refPicList, MvField& rCurMvField, 
2758
                                            MvField& rTarMvField, Distortion uiMinCost, int SearchPattern, int nSearchStepShift, uint32_t uiMaxSearchRounds, int BcwIdx )
2759
0
{
2760
0
  const Mv mvSearchOffsetCross[4] = { Mv( 0 , 1 ) , Mv( 1 , 0 ) , Mv( 0 , -1 ) , Mv( -1 ,  0 ) };
2761
0
  const Mv mvSearchOffsetSquare[8] = { Mv( -1 , 1 ) , Mv( 0 , 1 ) , Mv( 1 ,  1 ) , Mv( 1 ,  0 ) , Mv( 1 , -1 ) , Mv( 0 , -1 ) , Mv( -1 , -1 ) , Mv( -1 , 0 ) };
2762
0
  const Mv mvSearchOffsetDiamond[8] = { Mv( 0 , 2 ) , Mv( 1 , 1 ) , Mv( 2 ,  0 ) , Mv( 1 , -1 ) , Mv( 0 , -2 ) , Mv( -1 , -1 ) , Mv( -2 ,  0 ) , Mv( -1 , 1 ) };
2763
0
  const Mv mvSearchOffsetHexagon[6] = { Mv( 2 , 0 ) , Mv( 1 , 2 ) , Mv( -1 ,  2 ) , Mv( -2 ,  0 ) , Mv( -1 , -2 ) , Mv( 1 , -2 ) };
2764
2765
0
  int nDirectStart = 0, nDirectEnd = 0, nDirectRounding = 0, nDirectMask = 0;
2766
0
  const Mv * pSearchOffset;
2767
0
  if ( SearchPattern == 0 )
2768
0
  {
2769
0
    nDirectEnd = 3;
2770
0
    nDirectRounding = 4;
2771
0
    nDirectMask = 0x03;
2772
0
    pSearchOffset = mvSearchOffsetCross;
2773
0
  }
2774
0
  else if ( SearchPattern == 1 )
2775
0
  {
2776
0
    nDirectEnd = 7;
2777
0
    nDirectRounding = 8;
2778
0
    nDirectMask = 0x07;
2779
0
    pSearchOffset = mvSearchOffsetSquare;
2780
0
  }
2781
0
  else if ( SearchPattern == 2 )
2782
0
  {
2783
0
    nDirectEnd = 7;
2784
0
    nDirectRounding = 8;
2785
0
    nDirectMask = 0x07;
2786
0
    pSearchOffset = mvSearchOffsetDiamond;
2787
0
  }
2788
0
  else if ( SearchPattern == 3 )
2789
0
  {
2790
0
    nDirectEnd = 5;
2791
0
    pSearchOffset = mvSearchOffsetHexagon;
2792
0
  }
2793
0
  else
2794
0
  {
2795
0
    THROW( "Invalid search pattern" );
2796
0
  }
2797
2798
0
  int nBestDirect;
2799
0
  for ( uint32_t uiRound = 0; uiRound < uiMaxSearchRounds; uiRound++ )
2800
0
  {
2801
0
    Distortion roundZeroBestCost = MAX_DISTORTION;
2802
0
    const int positionLut[ 8 ] = { 0, 2, 4, 6, 1, 3, 5, 7 };
2803
0
    nBestDirect = -1;
2804
0
    MvField mvCurCenter = rCurMvField;
2805
0
    for ( int nIdx = nDirectStart; nIdx <= nDirectEnd; nIdx++ )
2806
0
    {
2807
      // terminate the search if none of the first four tested points hasn't provided improvement
2808
0
      if( m_pcEncCfg->m_SMVD > 1 && 2 == SearchPattern && 0 == uiRound && 4 == nIdx && roundZeroBestCost > uiMinCost )
2809
0
      {
2810
0
        break;
2811
0
      }
2812
0
      int nDirect;
2813
0
      if ( SearchPattern == 3 )
2814
0
      {
2815
0
        nDirect = nIdx < 0 ? nIdx + 6 : nIdx >= 6 ? nIdx - 6 : nIdx;
2816
0
      }
2817
0
      else
2818
0
      {
2819
0
        if( m_pcEncCfg->m_SMVD > 1 && 2 == SearchPattern && 0 == uiRound )
2820
0
        {
2821
0
          nDirect = positionLut[ ( nIdx + nDirectRounding ) & nDirectMask ];
2822
0
        }
2823
0
        else
2824
0
        {
2825
0
          nDirect = ( nIdx + nDirectRounding ) & nDirectMask;
2826
0
        }
2827
0
      }
2828
2829
0
      Mv mvOffset = pSearchOffset[nDirect];
2830
0
      mvOffset <<= nSearchStepShift;
2831
0
      MvField mvCand = mvCurCenter, mvPair;
2832
0
      mvCand.mv += mvOffset;
2833
0
      if( m_pcEncCfg->m_ifpLines && !CU::isMvInRangeFPP( cu.ly(), cu.lheight(), mvCand.mv.ver, m_pcEncCfg->m_ifpLines, *cu.cs->pcv ) )
2834
0
      {
2835
0
        continue; // Skip this pos
2836
0
      }
2837
2838
      // get MVD cost
2839
0
      Mv pred = rcMvCurPred;
2840
0
      pred.changeTransPrecInternal2Amvr(cu.imv);
2841
0
      m_pcRdCost->setPredictor( pred );
2842
0
      m_pcRdCost->setCostScale( 0 );
2843
0
      Mv mv = mvCand.mv;
2844
0
      mv.changeTransPrecInternal2Amvr(cu.imv);
2845
0
      uint32_t uiMvBits = m_pcRdCost->getBitsOfVectorWithPredictor( mv.hor, mv.ver, 0 );
2846
0
      Distortion uiCost = m_pcRdCost->getCost( uiMvBits );
2847
2848
      // get MVD pair and set target MV
2849
0
      mvPair.refIdx = rTarMvField.refIdx;
2850
0
      mvPair.mv.set( rcMvTarPred.hor - (mvCand.mv.hor - rcMvCurPred.hor), rcMvTarPred.ver - (mvCand.mv.ver - rcMvCurPred.ver) );
2851
2852
0
      if( m_pcEncCfg->m_ifpLines && !CU::isMvInRangeFPP( cu.ly(), cu.lheight(), mvPair.mv.ver, m_pcEncCfg->m_ifpLines, *cu.cs->pcv ) )
2853
0
      {
2854
0
        continue; // Skip this pos
2855
0
      }
2856
2857
0
      uiCost += xGetSymCost( cu, origBuf, refPicList, mvCand, mvPair, BcwIdx );
2858
0
      if ( uiCost < uiMinCost )
2859
0
      {
2860
0
        uiMinCost = uiCost;
2861
0
        rCurMvField = mvCand;
2862
0
        rTarMvField = mvPair;
2863
0
        nBestDirect = nDirect;
2864
0
      }
2865
0
      if ( m_pcEncCfg->m_SMVD > 1 && 2 == SearchPattern && 0 == uiRound && 4 > nIdx && uiCost < roundZeroBestCost)
2866
0
      {
2867
0
        roundZeroBestCost = uiCost;
2868
0
      }
2869
0
    }
2870
2871
0
    if ( nBestDirect == -1 )
2872
0
    {
2873
0
      break;
2874
0
    }
2875
0
    int nStep = 1;
2876
0
    if( (SearchPattern == 1 || SearchPattern == 2) && m_pcEncCfg->m_SMVD <= 1 )
2877
0
    {
2878
      // test at most 3 points in fast presets
2879
0
      nStep = 2 - ( nBestDirect & 0x01 );
2880
0
    }
2881
0
    nDirectStart = nBestDirect - nStep;
2882
0
    nDirectEnd = nBestDirect + nStep;
2883
0
  }
2884
2885
0
  return(uiMinCost);
2886
0
}
2887
2888
2889
void InterSearch::xSymMotionEstimation( CodingUnit& cu, CPelUnitBuf& origBuf, Mv& rcMvCurPred, Mv& rcMvTarPred, RefPicList refPicList, MvField& rCurMvField, MvField& rTarMvField, Distortion& ruiCost, int BcwIdx )
2890
0
{
2891
  // Refine Search
2892
0
  int nSearchStepShift = MV_FRACTIONAL_BITS_DIFF;
2893
0
  int nDiamondRound = 8;
2894
0
  int nCrossRound = 1;
2895
2896
0
  nSearchStepShift += cu.imv == IMV_HPEL ? 1 : (cu.imv << 1);
2897
0
  nDiamondRound >>= cu.imv;
2898
2899
0
  ruiCost = xSymRefineMvSearch( cu, origBuf, rcMvCurPred, rcMvTarPred, refPicList, rCurMvField, rTarMvField, ruiCost, 2, nSearchStepShift, nDiamondRound, BcwIdx );
2900
0
  if( m_pcEncCfg->m_SMVD < 3 )
2901
0
  {
2902
0
    ruiCost = xSymRefineMvSearch( cu, origBuf, rcMvCurPred, rcMvTarPred, refPicList, rCurMvField, rTarMvField, ruiCost, 0, nSearchStepShift, nCrossRound, BcwIdx );
2903
0
  }
2904
0
}
2905
2906
2907
/**
2908
* \brief Generate half-sample interpolated block
2909
*
2910
* \param pattern Reference picture ROI
2911
* \param biPred    Flag indicating whether block is for biprediction
2912
*/
2913
void InterSearch::xExtDIFUpSamplingH(CPelBuf* pattern, bool useAltHpelIf)
2914
0
{
2915
0
  PROFILER_SCOPE_AND_STAGE( 0, _TPROF, P_HPEL_INTERP );
2916
0
  const ClpRng& clpRng = m_lumaClpRng;
2917
0
  int width            = pattern->width;
2918
0
  int height           = pattern->height;
2919
0
  int srcStride        = pattern->stride;
2920
0
  const int reduceTap = m_pcEncCfg->m_meReduceTap;
2921
2922
0
  int intStride = width + 1;
2923
0
  int dstStride = width + 1;
2924
0
  Pel* intPtr;
2925
0
  Pel* dstPtr;
2926
0
  int filterSize     = useAltHpelIf ? ( reduceTap >= 1 ? NTAPS_AFFINE : NTAPS_LUMA )
2927
0
                                    : ( reduceTap == 1 ? NTAPS_AFFINE
2928
0
                                                       : ( reduceTap == 0 ? NTAPS_LUMA : NTAPS_CHROMA ) );
2929
0
  int halfFilterSize = ( filterSize >> 1 );
2930
0
  const Pel *srcPtr  = pattern->buf - halfFilterSize * srcStride - 1;
2931
2932
0
  const ChromaFormat chFmt = m_currChromaFormat;
2933
2934
  // split the prediction with funny widths into power-of-2 and +1 parts for the sake of SIMD speed-up
2935
0
  m_if.filterHor( COMP_Y, srcPtr,         srcStride, m_filteredBlockTmp[0][0]        , intStride, width, height + filterSize, 0 << MV_FRACTIONAL_BITS_DIFF, false, chFmt, clpRng, useAltHpelIf, 0, reduceTap );
2936
0
  m_if.filterHor( COMP_Y, srcPtr + width, srcStride, m_filteredBlockTmp[0][0] + width, intStride,     1, height + filterSize, 0 << MV_FRACTIONAL_BITS_DIFF, false, chFmt, clpRng, useAltHpelIf, 0, reduceTap );
2937
2938
  // split the prediction with funny widths into power-of-2 and +1 parts for the sake of SIMD speed-up
2939
0
  m_if.filterHor( COMP_Y, srcPtr,         srcStride, m_filteredBlockTmp[2][0],         intStride, width, height + filterSize, 2 << MV_FRACTIONAL_BITS_DIFF, false, chFmt, clpRng, useAltHpelIf, 0, reduceTap );
2940
0
  m_if.filterHor( COMP_Y, srcPtr + width, srcStride, m_filteredBlockTmp[2][0] + width, intStride,     1, height + filterSize, 2 << MV_FRACTIONAL_BITS_DIFF, false, chFmt, clpRng, useAltHpelIf, 0, reduceTap );
2941
2942
0
  intPtr = m_filteredBlockTmp[0][0] + halfFilterSize * intStride + 1;
2943
0
  dstPtr = m_filteredBlock[0][0][0];
2944
0
  m_if.filterVer( COMP_Y, intPtr, intStride, dstPtr, dstStride, width + 0, height + 0, 0 << MV_FRACTIONAL_BITS_DIFF, false, true, chFmt, clpRng, useAltHpelIf, 0, reduceTap );
2945
2946
0
  intPtr = m_filteredBlockTmp[0][0] + (halfFilterSize - 1) * intStride + 1;
2947
0
  dstPtr = m_filteredBlock[2][0][0];
2948
0
  m_if.filterVer( COMP_Y, intPtr, intStride, dstPtr, dstStride, width + 0, height + 1, 2 << MV_FRACTIONAL_BITS_DIFF, false, true, chFmt, clpRng, useAltHpelIf, 0, reduceTap );
2949
2950
0
  intPtr = m_filteredBlockTmp[2][0] + halfFilterSize * intStride;
2951
0
  dstPtr = m_filteredBlock[0][2][0];
2952
  // split the prediction with funny widths into power-of-2 and +1 parts for the sake of SIMD speed-up
2953
0
  m_if.filterVer( COMP_Y, intPtr,         intStride, dstPtr,         dstStride, width, height + 0, 0 << MV_FRACTIONAL_BITS_DIFF, false, true, chFmt, clpRng, useAltHpelIf, 0, reduceTap );
2954
0
  m_if.filterVer( COMP_Y, intPtr + width, intStride, dstPtr + width, dstStride,     1, height + 0, 0 << MV_FRACTIONAL_BITS_DIFF, false, true, chFmt, clpRng, useAltHpelIf, 0, reduceTap );
2955
2956
0
  intPtr = m_filteredBlockTmp[2][0] + (halfFilterSize - 1) * intStride;
2957
0
  dstPtr = m_filteredBlock[2][2][0];
2958
  // split the prediction with funny widths into power-of-2 and +1 parts for the sake of SIMD speed-up
2959
0
  m_if.filterVer( COMP_Y, intPtr,         intStride, dstPtr,         dstStride, width, height + 1, 2 << MV_FRACTIONAL_BITS_DIFF, false, true, chFmt, clpRng, useAltHpelIf, 0, reduceTap );
2960
0
  m_if.filterVer( COMP_Y, intPtr + width, intStride, dstPtr + width, dstStride,     1, height + 1, 2 << MV_FRACTIONAL_BITS_DIFF, false, true, chFmt, clpRng, useAltHpelIf, 0, reduceTap );
2961
0
}
2962
2963
2964
2965
2966
2967
/**
2968
* \brief Generate quarter-sample interpolated blocks
2969
*
2970
* \param pattern    Reference picture ROI
2971
* \param halfPelRef Half-pel mv
2972
* \param biPred     Flag indicating whether block is for biprediction
2973
*/
2974
void InterSearch::xExtDIFUpSamplingQ( CPelBuf* pattern, Mv halfPelRef, int& patternId )
2975
0
{
2976
0
  PROFILER_SCOPE_AND_STAGE( 0, _TPROF, P_QPEL_INTERP );
2977
0
  const ClpRng& clpRng = m_lumaClpRng;
2978
0
  int width      = pattern->width;
2979
0
  int height     = pattern->height;
2980
0
  int srcStride  = pattern->stride;
2981
0
  const int reduceTap = m_pcEncCfg->m_meReduceTap;
2982
2983
0
  Pel const* srcPtr;
2984
0
  int intStride = width + 1;
2985
0
  int dstStride = width + 1;
2986
0
  Pel* intPtr;
2987
0
  Pel* dstPtr;
2988
2989
0
  int filterSize     = reduceTap == 1 ? NTAPS_AFFINE
2990
0
                   : ( reduceTap == 0 ? NTAPS_LUMA : NTAPS_CHROMA );
2991
2992
0
  int halfFilterSize = (filterSize>>1);
2993
2994
0
  int extHeight = (halfPelRef.ver == 0) ? height + filterSize : height + filterSize-1;
2995
2996
0
  const ChromaFormat chFmt = m_currChromaFormat;
2997
2998
0
  if( s_doInterpQ[ patternId ][ 12 ] )
2999
0
  {
3000
    // Horizontal filter 1/4
3001
0
    srcPtr = pattern->buf - halfFilterSize * srcStride - 1;
3002
0
    intPtr = m_filteredBlockTmp[ 1 ][ 0 ];
3003
0
    if( halfPelRef.ver > 0 )
3004
0
    {
3005
0
      srcPtr += srcStride;
3006
0
    }
3007
0
    if( halfPelRef.hor >= 0 )
3008
0
    {
3009
0
      srcPtr += 1;
3010
0
    }
3011
0
    m_if.filterHor( COMP_Y, srcPtr, srcStride, intPtr, intStride, width, extHeight, 1 << MV_FRACTIONAL_BITS_DIFF, false, chFmt, clpRng, false, 0, reduceTap );
3012
0
  }
3013
3014
0
  if( s_doInterpQ[ patternId ][ 13 ] )
3015
0
  {
3016
    // Horizontal filter 3/4
3017
0
    srcPtr = pattern->buf - halfFilterSize*srcStride - 1;
3018
0
    intPtr = m_filteredBlockTmp[ 3 ][ 0 ];
3019
0
    if( halfPelRef.ver > 0 )
3020
0
    {
3021
0
      srcPtr += srcStride;
3022
0
    }
3023
0
    if( halfPelRef.hor > 0 )
3024
0
    {
3025
0
      srcPtr += 1;
3026
0
    }
3027
0
    m_if.filterHor( COMP_Y, srcPtr, srcStride, intPtr, intStride, width, extHeight, 3 << MV_FRACTIONAL_BITS_DIFF, false, chFmt, clpRng, false, 0, reduceTap );
3028
0
  }
3029
3030
0
  if( s_doInterpQ[ patternId ][ 3 ] )
3031
0
  {
3032
    // Generate @ 1,1
3033
0
    intPtr = m_filteredBlockTmp[ 1 ][ 0 ] + ( halfFilterSize - 1 ) * intStride;
3034
0
    dstPtr = m_filteredBlock[ 1 ][ 1 ][ 0 ];
3035
0
    if( halfPelRef.ver == 0 )
3036
0
    {
3037
0
      intPtr += intStride;
3038
0
    }
3039
0
    m_if.filterVer( COMP_Y, intPtr, intStride, dstPtr, dstStride, width, height, 1 << MV_FRACTIONAL_BITS_DIFF, false, true, chFmt, clpRng, false, 0, reduceTap );
3040
0
  }
3041
3042
0
  if( s_doInterpQ[ patternId ][ 11 ] )
3043
0
  {
3044
    // Generate @ 3,3
3045
0
    intPtr = m_filteredBlockTmp[ 3 ][ 0 ] + ( halfFilterSize - 1 ) * intStride;
3046
0
    dstPtr = m_filteredBlock[ 3 ][ 3 ][ 0 ];
3047
0
    m_if.filterVer( COMP_Y, intPtr, intStride, dstPtr, dstStride, width, height, 3 << MV_FRACTIONAL_BITS_DIFF, false, true, chFmt, clpRng, false, 0, reduceTap );
3048
0
  }
3049
3050
0
  if( s_doInterpQ[ patternId ][ 5 ] )
3051
0
  {
3052
    // Generate @ 3,1
3053
0
    intPtr = m_filteredBlockTmp[ 1 ][ 0 ] + ( halfFilterSize - 1 ) * intStride;
3054
0
    dstPtr = m_filteredBlock[ 3 ][ 1 ][ 0 ];
3055
0
    m_if.filterVer( COMP_Y, intPtr, intStride, dstPtr, dstStride, width, height, 3 << MV_FRACTIONAL_BITS_DIFF, false, true, chFmt, clpRng, false, 0, reduceTap );
3056
0
  }
3057
3058
0
  if( s_doInterpQ[ patternId ][ 9 ] )
3059
0
  {
3060
    // Generate @ 1,3
3061
0
    intPtr = m_filteredBlockTmp[ 3 ][ 0 ] + ( halfFilterSize - 1 ) * intStride;
3062
0
    dstPtr = m_filteredBlock[ 1 ][ 3 ][ 0 ];
3063
0
    if( halfPelRef.ver == 0 )
3064
0
    {
3065
0
      intPtr += intStride;
3066
0
    }
3067
0
    m_if.filterVer( COMP_Y, intPtr, intStride, dstPtr, dstStride, width, height, 1 << MV_FRACTIONAL_BITS_DIFF, false, true, chFmt, clpRng, false, 0, reduceTap );
3068
0
  }
3069
3070
0
  if (halfPelRef.ver != 0)
3071
0
  {
3072
0
    if( s_doInterpQ[ patternId ][ 4 ] )
3073
0
    {
3074
      // Generate @ 2,1
3075
0
      intPtr = m_filteredBlockTmp[ 1 ][ 0 ] + ( halfFilterSize - 1 ) * intStride;
3076
0
      dstPtr = m_filteredBlock[ 2 ][ 1 ][ 0 ];
3077
0
      if( halfPelRef.ver == 0 )
3078
0
      {
3079
0
        intPtr += intStride;
3080
0
      }
3081
0
      m_if.filterVer( COMP_Y, intPtr, intStride, dstPtr, dstStride, width, height, 2 << MV_FRACTIONAL_BITS_DIFF, false, true, chFmt, clpRng, false, 0, reduceTap );
3082
0
    }
3083
3084
0
    if( s_doInterpQ[ patternId ][ 10 ] )
3085
0
    {
3086
      // Generate @ 2,3
3087
0
      intPtr = m_filteredBlockTmp[ 3 ][ 0 ] + ( halfFilterSize - 1 ) * intStride;
3088
0
      dstPtr = m_filteredBlock[ 2 ][ 3 ][ 0 ];
3089
0
      if( halfPelRef.ver == 0 )
3090
0
      {
3091
0
        intPtr += intStride;
3092
0
      }
3093
0
      m_if.filterVer( COMP_Y, intPtr, intStride, dstPtr, dstStride, width, height, 2 << MV_FRACTIONAL_BITS_DIFF, false, true, chFmt, clpRng, false, 0, reduceTap );
3094
0
    }
3095
0
  }
3096
0
  else
3097
0
  {
3098
0
    if( s_doInterpQ[ patternId ][ 2 ] )
3099
0
    {
3100
      // Generate @ 0,1
3101
0
      intPtr = m_filteredBlockTmp[ 1 ][ 0 ] + halfFilterSize * intStride;
3102
0
      dstPtr = m_filteredBlock[ 0 ][ 1 ][ 0 ];
3103
0
      m_if.filterVer( COMP_Y, intPtr, intStride, dstPtr, dstStride, width, height, 0 << MV_FRACTIONAL_BITS_DIFF, false, true, chFmt, clpRng, false, 0, reduceTap );
3104
0
    }
3105
3106
0
    if( s_doInterpQ[ patternId ][ 8 ] )
3107
0
    {
3108
      // Generate @ 0,3
3109
0
      intPtr = m_filteredBlockTmp[ 3 ][ 0 ] + halfFilterSize * intStride;
3110
0
      dstPtr = m_filteredBlock[ 0 ][ 3 ][ 0 ];
3111
0
      m_if.filterVer( COMP_Y, intPtr, intStride, dstPtr, dstStride, width, height, 0 << MV_FRACTIONAL_BITS_DIFF, false, true, chFmt, clpRng, false, 0, reduceTap );
3112
0
    }
3113
0
  }
3114
3115
0
  if (halfPelRef.hor != 0)
3116
0
  {
3117
0
    if( s_doInterpQ[ patternId ][ 6 ] )
3118
0
    {
3119
      // Generate @ 1,2
3120
0
      intPtr = m_filteredBlockTmp[ 2 ][ 0 ] + ( halfFilterSize - 1 ) * intStride;
3121
0
      dstPtr = m_filteredBlock[ 1 ][ 2 ][ 0 ];
3122
0
      if( halfPelRef.hor > 0 )
3123
0
      {
3124
0
        intPtr += 1;
3125
0
      }
3126
0
      if( halfPelRef.ver >= 0 )
3127
0
      {
3128
0
        intPtr += intStride;
3129
0
      }
3130
0
      m_if.filterVer( COMP_Y, intPtr, intStride, dstPtr, dstStride, width, height, 1 << MV_FRACTIONAL_BITS_DIFF, false, true, chFmt, clpRng, false, 0, reduceTap );
3131
0
    }
3132
3133
0
    if( s_doInterpQ[ patternId ][ 7 ] )
3134
0
    {
3135
      // Generate @ 3,2
3136
0
      intPtr = m_filteredBlockTmp[ 2 ][ 0 ] + ( halfFilterSize - 1 ) * intStride;
3137
0
      dstPtr = m_filteredBlock[ 3 ][ 2 ][ 0 ];
3138
0
      if( halfPelRef.hor > 0 )
3139
0
      {
3140
0
        intPtr += 1;
3141
0
      }
3142
0
      if( halfPelRef.ver > 0 )
3143
0
      {
3144
0
        intPtr += intStride;
3145
0
      }
3146
0
      m_if.filterVer( COMP_Y, intPtr, intStride, dstPtr, dstStride, width, height, 3 << MV_FRACTIONAL_BITS_DIFF, false, true, chFmt, clpRng, false, 0, reduceTap );
3147
0
    }
3148
0
  }
3149
0
  else
3150
0
  {
3151
0
    if( s_doInterpQ[ patternId ][ 0 ] )
3152
0
    {
3153
      // Generate @ 1,0
3154
0
      intPtr = m_filteredBlockTmp[ 0 ][ 0 ] + ( halfFilterSize - 1 ) * intStride + 1;
3155
0
      dstPtr = m_filteredBlock[ 1 ][ 0 ][ 0 ];
3156
0
      if( halfPelRef.ver >= 0 )
3157
0
      {
3158
0
        intPtr += intStride;
3159
0
      }
3160
0
      m_if.filterVer( COMP_Y, intPtr, intStride, dstPtr, dstStride, width, height, 1 << MV_FRACTIONAL_BITS_DIFF, false, true, chFmt, clpRng, false, 0, reduceTap );
3161
0
    }
3162
3163
0
    if( s_doInterpQ[ patternId ][ 1 ] )
3164
0
    {
3165
      // Generate @ 3,0
3166
0
      intPtr = m_filteredBlockTmp[ 0 ][ 0 ] + ( halfFilterSize - 1 ) * intStride + 1;
3167
0
      dstPtr = m_filteredBlock[ 3 ][ 0 ][ 0 ];
3168
0
      if( halfPelRef.ver > 0 )
3169
0
      {
3170
0
        intPtr += intStride;
3171
0
      }
3172
0
      m_if.filterVer( COMP_Y, intPtr, intStride, dstPtr, dstStride, width, height, 3 << MV_FRACTIONAL_BITS_DIFF, false, true, chFmt, clpRng, false, 0, reduceTap );
3173
0
    }
3174
0
  }
3175
0
}
3176
3177
3178
void InterSearch::xEncodeInterResidualQT(CodingStructure &cs, Partitioner &partitioner, const ComponentID compID)
3179
0
{
3180
0
  const UnitArea& currArea    = partitioner.currArea();
3181
0
  const TransformUnit& currTU = *cs.getTU(isLuma(partitioner.chType) ? currArea.lumaPos() : currArea.chromaPos(), partitioner.chType);
3182
0
  const CodingUnit &cu        = *currTU.cu;
3183
0
  const unsigned currDepth    = partitioner.currTrDepth;
3184
3185
0
  const bool bSubdiv          = currDepth != currTU.depth;
3186
3187
0
  if (compID == MAX_NUM_TBLOCKS)  // we are not processing a channel, instead we always recurse and code the CBFs
3188
0
  {
3189
0
    if( partitioner.canSplit( TU_MAX_TR_SPLIT, cs ) )
3190
0
    {
3191
0
      CHECK( !bSubdiv, "Not performing the implicit TU split" );
3192
0
    }
3193
0
    else if( cu.sbtInfo && partitioner.canSplit( CU::getSbtTuSplit( cu.sbtInfo ), cs ) )
3194
0
    {
3195
0
      CHECK( !bSubdiv, "Not performing the implicit TU split - sbt" );
3196
0
    }
3197
0
    else
3198
0
    {
3199
0
      CHECK( bSubdiv, "transformsplit not supported" );
3200
0
    }
3201
3202
0
    CHECK(CU::isIntra(cu), "Inter search provided with intra CU");
3203
3204
0
    if( cu.chromaFormat != CHROMA_400
3205
0
      && (!CU::isSepTree(cu) || isChroma(partitioner.chType))
3206
0
      )
3207
0
    {
3208
0
      {
3209
0
        {
3210
0
          const bool  chroma_cbf = TU::getCbfAtDepth( currTU, COMP_Cb, currDepth );
3211
0
          if (!(cu.sbtInfo && (currDepth == 0 || (currDepth == 1 && currTU.noResidual))))
3212
0
          m_CABACEstimator->cbf_comp( cu, chroma_cbf, currArea.blocks[COMP_Cb], currDepth );
3213
0
        }
3214
0
        {
3215
0
          const bool  chroma_cbf = TU::getCbfAtDepth( currTU, COMP_Cr, currDepth );
3216
0
          if (!(cu.sbtInfo && (currDepth == 0 || (currDepth == 1 && currTU.noResidual))))
3217
0
          m_CABACEstimator->cbf_comp( cu, chroma_cbf, currArea.blocks[COMP_Cr], currDepth, TU::getCbfAtDepth( currTU, COMP_Cb, currDepth ) );
3218
0
        }
3219
0
      }
3220
0
    }
3221
3222
0
    if( !bSubdiv && !( cu.sbtInfo && currTU.noResidual )
3223
0
      && !isChroma(partitioner.chType)
3224
0
      )
3225
0
    {
3226
0
      m_CABACEstimator->cbf_comp( cu, TU::getCbfAtDepth( currTU, COMP_Y, currDepth ), currArea.Y(), currDepth );
3227
0
    }
3228
0
  }
3229
3230
0
  if (!bSubdiv)
3231
0
  {
3232
0
    if (compID != MAX_NUM_TBLOCKS) // we have already coded the CBFs, so now we code coefficients
3233
0
    {
3234
0
      if( currArea.blocks[compID].valid() )
3235
0
      {
3236
0
        if( compID == COMP_Cr )
3237
0
        {
3238
0
          const int cbfMask = ( TU::getCbf( currTU, COMP_Cb ) ? 2 : 0) + ( TU::getCbf( currTU, COMP_Cr ) ? 1 : 0 );
3239
0
          m_CABACEstimator->joint_cb_cr( currTU, cbfMask );
3240
0
        }
3241
0
        if( TU::getCbf( currTU, compID ) )
3242
0
        {
3243
0
          m_CABACEstimator->residual_coding( currTU, compID );
3244
0
        }
3245
0
      }
3246
0
    }
3247
0
  }
3248
0
  else
3249
0
  {
3250
0
    if( compID == MAX_NUM_TBLOCKS || TU::getCbfAtDepth( currTU, compID, currDepth ) )
3251
0
    {
3252
0
      if( partitioner.canSplit( TU_MAX_TR_SPLIT, cs ) )
3253
0
      {
3254
0
        partitioner.splitCurrArea( TU_MAX_TR_SPLIT, cs );
3255
0
      }
3256
0
      else if( cu.sbtInfo && partitioner.canSplit( CU::getSbtTuSplit( cu.sbtInfo ), cs ) )
3257
0
      {
3258
0
        partitioner.splitCurrArea( CU::getSbtTuSplit( cu.sbtInfo ), cs );
3259
0
      }
3260
0
      else
3261
0
        THROW( "Implicit TU split not available!" );
3262
3263
0
      do
3264
0
      {
3265
0
        xEncodeInterResidualQT( cs, partitioner, compID );
3266
0
      } while( partitioner.nextPart( cs ) );
3267
3268
0
      partitioner.exitCurrSplit();
3269
0
    }
3270
0
  }
3271
0
}
3272
3273
void InterSearch::xCalcMinDistSbt( CodingStructure &cs, const CodingUnit& cu, const uint8_t sbtAllowed )
3274
0
{
3275
0
  if( !sbtAllowed )
3276
0
  {
3277
0
    m_estMinDistSbt[NUMBER_SBT_MODE] = 0;
3278
0
    for( int comp = 0; comp < getNumberValidTBlocks( *cs.pcv ); comp++ )
3279
0
    {
3280
0
      const ComponentID compID = ComponentID( comp );
3281
0
      CPelBuf pred = cs.getPredBuf( compID );
3282
0
      CPelBuf org  = cs.getOrgBuf( compID );
3283
0
      m_estMinDistSbt[NUMBER_SBT_MODE] += m_pcRdCost->getDistPart( org, pred, cs.sps->bitDepths[ toChannelType( compID ) ], compID, DF_SSE );
3284
0
    }
3285
0
    return;
3286
0
  }
3287
3288
  //SBT fast algorithm 2.1 : estimate a minimum RD cost of a SBT mode based on the luma distortion of uncoded part and coded part (assuming distorted can be reduced to 1/16);
3289
  //                         if this cost is larger than the best cost, no need to try a specific SBT mode
3290
0
  int cuWidth  = cu.lwidth();
3291
0
  int cuHeight = cu.lheight();
3292
0
  int numPartX = cuWidth  >= 16 ? 4 : ( cuWidth  == 4 ? 1 : 2 );
3293
0
  int numPartY = cuHeight >= 16 ? 4 : ( cuHeight == 4 ? 1 : 2 );
3294
0
  Distortion dist[4][4];
3295
0
  memset( dist, 0, sizeof( Distortion ) * 16 );
3296
3297
0
  for( uint32_t c = 0; c < getNumberValidTBlocks( *cs.pcv ); c++ )
3298
0
  {
3299
0
    const ComponentID compID   = ComponentID( c );
3300
0
    const CompArea&   compArea = cu.blocks[compID];
3301
0
    const CPelBuf orgPel  = cs.getOrgBuf( compArea );
3302
0
    const CPelBuf predPel = cs.getPredBuf( compArea );
3303
0
    int lengthX = compArea.width / numPartX;
3304
0
    int lengthY = compArea.height / numPartY;
3305
0
    int strideOrg  = orgPel.stride;
3306
0
    int stridePred = predPel.stride;
3307
0
    uint32_t   uiShift = DISTORTION_PRECISION_ADJUSTMENT( ( *cs.sps.bitDepths[ toChannelType( compID ) ] - 8 ) << 1 );
3308
0
    Intermediate_Int iTemp;
3309
3310
    //calc distY of 16 sub parts
3311
0
    for( int j = 0; j < numPartY; j++ )
3312
0
    {
3313
0
      for( int i = 0; i < numPartX; i++ )
3314
0
      {
3315
0
        int posX = i * lengthX;
3316
0
        int posY = j * lengthY;
3317
0
        const Pel* ptrOrg  = orgPel.bufAt( posX, posY );
3318
0
        const Pel* ptrPred = predPel.bufAt( posX, posY );
3319
0
        Distortion uiSum = 0;
3320
0
        for( int n = 0; n < lengthY; n++ )
3321
0
        {
3322
0
          for( int m = 0; m < lengthX; m++ )
3323
0
          {
3324
0
            iTemp = ptrOrg[m] - ptrPred[m];
3325
0
            uiSum += Distortion( ( iTemp * iTemp ) >> uiShift );
3326
0
          }
3327
0
          ptrOrg += strideOrg;
3328
0
          ptrPred += stridePred;
3329
0
        }
3330
0
        if( isChroma( compID ) )
3331
0
        {
3332
0
          uiSum = (Distortion)( uiSum * m_pcRdCost->getChromaWeight() );
3333
0
        }
3334
0
        dist[j][i] += uiSum;
3335
0
      }
3336
0
    }
3337
0
  }
3338
3339
  //SSE of a CU
3340
0
  m_estMinDistSbt[NUMBER_SBT_MODE] = 0;
3341
0
  for( int j = 0; j < numPartY; j++ )
3342
0
  {
3343
0
    for( int i = 0; i < numPartX; i++ )
3344
0
    {
3345
0
      m_estMinDistSbt[NUMBER_SBT_MODE] += dist[j][i];
3346
0
    }
3347
0
  }
3348
  //init per-mode dist
3349
0
  for( int i = SBT_VER_H0; i < NUMBER_SBT_MODE; i++ )
3350
0
  {
3351
0
    m_estMinDistSbt[i] = MAX_DISTORTION;
3352
0
  }
3353
3354
  //SBT fast algorithm 1: not try SBT if the residual is too small to compensate bits for encoding residual info
3355
0
  uint64_t minNonZeroResiFracBits = 12 << SCALE_BITS;
3356
0
  if( m_pcRdCost->calcRdCost( 0, m_estMinDistSbt[NUMBER_SBT_MODE] ) < m_pcRdCost->calcRdCost( minNonZeroResiFracBits, 0 ) )
3357
0
  {
3358
0
    m_skipSbtAll = true;
3359
0
    return;
3360
0
  }
3361
3362
  //derive estimated minDist of SBT = zero-residual part distortion + non-zero residual part distortion / 16
3363
0
  int shift = 5;
3364
0
  Distortion distResiPart = 0, distNoResiPart = 0;
3365
3366
0
  if( CU::targetSbtAllowed( SBT_VER_HALF, sbtAllowed ) )
3367
0
  {
3368
0
    int offsetResiPart = 0;
3369
0
    int offsetNoResiPart = numPartX / 2;
3370
0
    distResiPart = distNoResiPart = 0;
3371
0
    assert( numPartX >= 2 );
3372
0
    for( int j = 0; j < numPartY; j++ )
3373
0
    {
3374
0
      for( int i = 0; i < numPartX / 2; i++ )
3375
0
      {
3376
0
        distResiPart   += dist[j][i + offsetResiPart];
3377
0
        distNoResiPart += dist[j][i + offsetNoResiPart];
3378
0
      }
3379
0
    }
3380
0
    m_estMinDistSbt[SBT_VER_H0] = ( distResiPart >> shift ) + distNoResiPart;
3381
0
    m_estMinDistSbt[SBT_VER_H1] = ( distNoResiPart >> shift ) + distResiPart;
3382
0
  }
3383
3384
0
  if( CU::targetSbtAllowed( SBT_HOR_HALF, sbtAllowed ) )
3385
0
  {
3386
0
    int offsetResiPart = 0;
3387
0
    int offsetNoResiPart = numPartY / 2;
3388
0
    assert( numPartY >= 2 );
3389
0
    distResiPart = distNoResiPart = 0;
3390
0
    for( int j = 0; j < numPartY / 2; j++ )
3391
0
    {
3392
0
      for( int i = 0; i < numPartX; i++ )
3393
0
      {
3394
0
        distResiPart   += dist[j + offsetResiPart][i];
3395
0
        distNoResiPart += dist[j + offsetNoResiPart][i];
3396
0
      }
3397
0
    }
3398
0
    m_estMinDistSbt[SBT_HOR_H0] = ( distResiPart >> shift ) + distNoResiPart;
3399
0
    m_estMinDistSbt[SBT_HOR_H1] = ( distNoResiPart >> shift ) + distResiPart;
3400
0
  }
3401
3402
0
  if( CU::targetSbtAllowed( SBT_VER_QUAD, sbtAllowed ) )
3403
0
  {
3404
0
    assert( numPartX == 4 );
3405
0
    m_estMinDistSbt[SBT_VER_Q0] = m_estMinDistSbt[SBT_VER_Q1] = 0;
3406
0
    for( int j = 0; j < numPartY; j++ )
3407
0
    {
3408
0
      m_estMinDistSbt[SBT_VER_Q0] += dist[j][0] + ( ( dist[j][1] + dist[j][2] + dist[j][3] ) << shift );
3409
0
      m_estMinDistSbt[SBT_VER_Q1] += dist[j][3] + ( ( dist[j][0] + dist[j][1] + dist[j][2] ) << shift );
3410
0
    }
3411
0
    m_estMinDistSbt[SBT_VER_Q0] = m_estMinDistSbt[SBT_VER_Q0] >> shift;
3412
0
    m_estMinDistSbt[SBT_VER_Q1] = m_estMinDistSbt[SBT_VER_Q1] >> shift;
3413
0
  }
3414
3415
0
  if( CU::targetSbtAllowed( SBT_HOR_QUAD, sbtAllowed ) )
3416
0
  {
3417
0
    assert( numPartY == 4 );
3418
0
    m_estMinDistSbt[SBT_HOR_Q0] = m_estMinDistSbt[SBT_HOR_Q1] = 0;
3419
0
    for( int i = 0; i < numPartX; i++ )
3420
0
    {
3421
0
      m_estMinDistSbt[SBT_HOR_Q0] += dist[0][i] + ( ( dist[1][i] + dist[2][i] + dist[3][i] ) << shift );
3422
0
      m_estMinDistSbt[SBT_HOR_Q1] += dist[3][i] + ( ( dist[0][i] + dist[1][i] + dist[2][i] ) << shift );
3423
0
    }
3424
0
    m_estMinDistSbt[SBT_HOR_Q0] = m_estMinDistSbt[SBT_HOR_Q0] >> shift;
3425
0
    m_estMinDistSbt[SBT_HOR_Q1] = m_estMinDistSbt[SBT_HOR_Q1] >> shift;
3426
0
  }
3427
3428
  //SBT fast algorithm 5: try N SBT modes with the lowest distortion
3429
0
  Distortion temp[NUMBER_SBT_MODE];
3430
0
  memcpy( temp, m_estMinDistSbt, sizeof( Distortion ) * NUMBER_SBT_MODE );
3431
0
  memset( m_sbtRdoOrder, 255, NUMBER_SBT_MODE );
3432
0
  int startIdx = 0, numRDO;
3433
0
  numRDO = CU::targetSbtAllowed( SBT_VER_HALF, sbtAllowed ) + CU::targetSbtAllowed( SBT_HOR_HALF, sbtAllowed );
3434
0
  numRDO = std::min( ( numRDO << 1 ), SBT_NUM_RDO );
3435
0
  for( int i = startIdx; i < startIdx + numRDO; i++ )
3436
0
  {
3437
0
    Distortion minDist = MAX_DISTORTION;
3438
0
    for( int n = SBT_VER_H0; n <= SBT_HOR_H1; n++ )
3439
0
    {
3440
0
      if( temp[n] < minDist )
3441
0
      {
3442
0
        minDist = temp[n];
3443
0
        m_sbtRdoOrder[i] = n;
3444
0
      }
3445
0
    }
3446
0
    temp[m_sbtRdoOrder[i]] = MAX_DISTORTION;
3447
0
  }
3448
3449
0
  startIdx += numRDO;
3450
0
  numRDO = CU::targetSbtAllowed( SBT_VER_QUAD, sbtAllowed ) + CU::targetSbtAllowed( SBT_HOR_QUAD, sbtAllowed );
3451
0
  numRDO = std::min( ( numRDO << 1 ), SBT_NUM_RDO );
3452
0
  for( int i = startIdx; i < startIdx + numRDO; i++ )
3453
0
  {
3454
0
    Distortion minDist = MAX_DISTORTION;
3455
0
    for( int n = SBT_VER_Q0; n <= SBT_HOR_Q1; n++ )
3456
0
    {
3457
0
      if( temp[n] < minDist )
3458
0
      {
3459
0
        minDist = temp[n];
3460
0
        m_sbtRdoOrder[i] = n;
3461
0
      }
3462
0
    }
3463
0
    temp[m_sbtRdoOrder[i]] = MAX_DISTORTION;
3464
0
  }
3465
0
}
3466
3467
uint8_t InterSearch::skipSbtByRDCost( int width, int height, int mtDepth, uint8_t sbtIdx, uint8_t sbtPos, double bestCost, Distortion distSbtOff, double costSbtOff, bool rootCbfSbtOff )
3468
0
{
3469
0
  int sbtMode = CU::getSbtMode( sbtIdx, sbtPos );
3470
3471
  //SBT fast algorithm 2.2 : estimate a minimum RD cost of a SBT mode based on the luma distortion of uncoded part and coded part (assuming distorted can be reduced to 1/16);
3472
  //                         if this cost is larger than the best cost, no need to try a specific SBT mode
3473
0
  if( m_pcRdCost->calcRdCost( 11 << SCALE_BITS, m_estMinDistSbt[sbtMode] ) > bestCost )
3474
0
  {
3475
0
    return 0; //early skip type 0
3476
0
  }
3477
3478
0
  if( costSbtOff != MAX_DOUBLE )
3479
0
  {
3480
0
    if( !rootCbfSbtOff )
3481
0
    {
3482
      //SBT fast algorithm 3: skip SBT when the residual is too small (estCost is more accurate than fast algorithm 1, counting PU mode bits)
3483
0
      uint64_t minNonZeroResiFracBits = 10 << SCALE_BITS;
3484
0
      Distortion distResiPart;
3485
0
      if( sbtIdx == SBT_VER_HALF || sbtIdx == SBT_HOR_HALF )
3486
0
      {
3487
0
        distResiPart = (Distortion)( ( ( m_estMinDistSbt[NUMBER_SBT_MODE] - m_estMinDistSbt[sbtMode] ) * 9 ) >> 4 );
3488
0
      }
3489
0
      else
3490
0
      {
3491
0
        distResiPart = (Distortion)( ( ( m_estMinDistSbt[NUMBER_SBT_MODE] - m_estMinDistSbt[sbtMode] ) * 3 ) >> 3 );
3492
0
      }
3493
3494
0
      double estCost = ( costSbtOff - m_pcRdCost->calcRdCost( 0 << SCALE_BITS, distSbtOff ) ) + m_pcRdCost->calcRdCost( minNonZeroResiFracBits, m_estMinDistSbt[sbtMode] + distResiPart );
3495
0
      if( estCost > costSbtOff )
3496
0
      {
3497
0
        return 1;
3498
0
      }
3499
0
      if( estCost > bestCost )
3500
0
      {
3501
0
        return 2;
3502
0
      }
3503
0
    }
3504
0
    else
3505
0
    {
3506
      //SBT fast algorithm 4: skip SBT when an estimated RD cost is larger than the bestCost
3507
0
      double weight = sbtMode > SBT_HOR_H1 ? 0.4 : 0.6;
3508
0
      double estCost = ( ( costSbtOff - m_pcRdCost->calcRdCost( 0 << SCALE_BITS, distSbtOff ) ) * weight ) + m_pcRdCost->calcRdCost( 0 << SCALE_BITS, m_estMinDistSbt[sbtMode] );
3509
0
      if( estCost > bestCost )
3510
0
      {
3511
0
        return 3;
3512
0
      }
3513
0
    }
3514
0
  }
3515
0
  return MAX_UCHAR;
3516
0
}
3517
3518
void InterSearch::xEstimateInterResidualQT(CodingStructure &cs, Partitioner &partitioner, Distortion *puiZeroDist /*= NULL*/)
3519
20.2k
{
3520
20.2k
  const UnitArea& currArea = partitioner.currArea();
3521
20.2k
  const SPS &sps           = *cs.sps;
3522
3523
20.2k
  const uint32_t numValidComp  = getNumberValidComponents( sps.chromaFormatIdc );
3524
20.2k
  const uint32_t numTBlocks    = getNumberValidTBlocks   ( *cs.pcv );
3525
20.2k
  CodingUnit& cu               = *cs.getCU(partitioner.chType, partitioner.treeType);
3526
20.2k
  const unsigned currDepth = partitioner.currTrDepth;
3527
20.2k
  const bool useTS = cs.picture->useTS;
3528
3529
20.2k
  bool bCheckFull  = !partitioner.canSplit( TU_MAX_TR_SPLIT, cs );
3530
20.2k
  if( cu.sbtInfo && partitioner.canSplit( CU::getSbtTuSplit( cu.sbtInfo ), cs ) )
3531
0
  {
3532
0
    bCheckFull = false;
3533
0
  }
3534
20.2k
  bool bCheckSplit = !bCheckFull;
3535
3536
  // get temporary data
3537
20.2k
  CodingStructure *csSplit = nullptr;
3538
20.2k
  CodingStructure *csFull  = nullptr;
3539
20.2k
  if (bCheckSplit)
3540
0
  {
3541
0
    csSplit = &cs;
3542
0
  }
3543
20.2k
  else if (bCheckFull)
3544
20.2k
  {
3545
20.2k
    csFull = &cs;
3546
20.2k
  }
3547
3548
20.2k
  Distortion uiSingleDist         = 0;
3549
20.2k
  Distortion uiSingleDistComp [3] = { 0, 0, 0 };
3550
3551
20.2k
  const TempCtx ctxStart  ( m_CtxCache, m_CABACEstimator->getCtx() );
3552
20.2k
  TempCtx       ctxBest   ( m_CtxCache );
3553
3554
20.2k
  PelUnitBuf    orgResiBuf;
3555
20.2k
  orgResiBuf = m_tmpStorageLCU.getCompactBuf( currArea );
3556
20.2k
  orgResiBuf.copyFrom(cs.getResiBuf(currArea));
3557
3558
20.2k
  if (bCheckFull)
3559
20.2k
  {
3560
20.2k
    ReshapeData& reshapeData = cs.picture->reshapeData;
3561
3562
20.2k
    TransformUnit& tu = csFull->addTU(CS::getArea(cs, currArea, partitioner.chType, partitioner.treeType), partitioner.chType, &cu);
3563
20.2k
    tu.depth          = currDepth;
3564
20.2k
    tu.mtsIdx[COMP_Y] = MTS_DCT2_DCT2;
3565
20.2k
    tu.checkTuNoResidual( partitioner.currPartIdx() );
3566
20.2k
    if (cs.picHeader->lmcsEnabled && reshapeData.getCTUFlag() && cs.picHeader->lmcsChromaResidualScale && !(CS::isDualITree(cs) && cs.slice->isIntra() && tu.cu->predMode == MODE_IBC))
3567
0
    {
3568
0
      tu.chromaAdj = reshapeData.calculateChromaAdjVpduNei(tu, tu.blocks[COMP_Y], tu.cu->treeType);
3569
0
    }
3570
3571
20.2k
    double minCost [MAX_NUM_TBLOCKS];
3572
3573
20.2k
    m_CABACEstimator->resetBits();
3574
3575
20.2k
    memset(m_pTempPel, 0, sizeof(Pel) * tu.Y().area()); // not necessary needed for inside of recursion (only at the beginning)
3576
3577
81.0k
    for (uint32_t i = 0; i < numTBlocks; i++)
3578
60.8k
    {
3579
60.8k
      minCost[i] = MAX_DOUBLE;
3580
60.8k
    }
3581
3582
20.2k
    CodingStructure &saveCS = *m_pSaveCS[1];
3583
20.2k
    saveCS.pcv     = cs.pcv;
3584
20.2k
    saveCS.picture = cs.picture;
3585
20.2k
    saveCS.area.repositionTo( currArea );
3586
3587
20.2k
    TransformUnit& bestTU = saveCS.tus.empty() ? saveCS.addTU( currArea, partitioner.chType, nullptr ) : *saveCS.tus.front();
3588
20.2k
    bestTU.initData();
3589
20.2k
    bestTU.UnitArea::operator=( currArea );
3590
3591
81.0k
    for( uint32_t c = 0; c < numTBlocks; c++ )
3592
60.8k
    {
3593
60.8k
      const ComponentID compID    = ComponentID(c);
3594
60.8k
      const CompArea&   compArea  = tu.blocks[compID];
3595
60.8k
      const int channelBitDepth   = sps.bitDepths[toChannelType(compID)];
3596
3597
60.8k
      if( !tu.blocks[compID].valid() )
3598
40.5k
      {
3599
40.5k
        continue;
3600
40.5k
      }
3601
20.2k
      bool tsAllowed = useTS && TU::isTSAllowed(tu, compID) && (isLuma(compID) || (isChroma(compID) && m_pcEncCfg->m_useChromaTS));
3602
20.2k
      if (isChroma(compID) && tsAllowed && (tu.mtsIdx[COMP_Y] != MTS_SKIP))
3603
0
      {
3604
0
        tsAllowed = false;
3605
0
      }
3606
20.2k
      uint8_t nNumTransformCands = 1 + (tsAllowed ? 1 : 0); // DCT + TS = 2 tests
3607
20.2k
      std::vector<TrMode> trModes;
3608
3609
20.2k
      if (nNumTransformCands > 1)
3610
2.95k
      {
3611
2.95k
        trModes.push_back(TrMode(0, true)); //DCT2
3612
        //for a SBT-no-residual TU, the RDO process should be called once, in order to get the RD cost
3613
2.95k
        if ( !tu.noResidual )
3614
2.95k
        {
3615
2.95k
          trModes.push_back(TrMode(1, true));
3616
2.95k
        }
3617
0
        else
3618
0
        {
3619
0
          nNumTransformCands--;
3620
0
        }
3621
2.95k
      }
3622
20.2k
      bool isLast = true;
3623
43.3k
      for (int transformMode = 0; transformMode < nNumTransformCands; transformMode++)
3624
23.0k
      {
3625
23.0k
        const bool isFirstMode = transformMode == 0;
3626
3627
        // copy the original residual into the residual buffer
3628
23.0k
        csFull->getResiBuf(compArea).copyFrom(orgResiBuf.get(compID));
3629
3630
3631
23.0k
        m_CABACEstimator->getCtx() = ctxStart;
3632
23.0k
        m_CABACEstimator->resetBits();
3633
3634
23.0k
        if (bestTU.mtsIdx[compID] == MTS_SKIP && m_pcEncCfg->m_TS)
3635
0
        {
3636
0
          continue;
3637
0
        }
3638
23.0k
        tu.mtsIdx[compID] = transformMode ? trModes[transformMode].first : 0;
3639
3640
23.0k
        const QpParam cQP(tu, compID);  // note: uses tu.transformSkip[compID]
3641
23.0k
        m_pcTrQuant->selectLambda(compID);
3642
3643
23.0k
        const Slice& slice = *tu.cu->slice;
3644
23.0k
        if (slice.picHeader->lmcsEnabled && reshapeData.getCTUFlag() && isChroma(compID) && slice.picHeader->lmcsChromaResidualScale )
3645
0
        {
3646
0
          double cRescale = (double)(1 << CSCALE_FP_PREC) / (double)(tu.chromaAdj);
3647
0
          m_pcTrQuant->scaleLambda( 1.0/(cRescale*cRescale) );
3648
0
        }
3649
3650
23.0k
        if ( sps.jointCbCr && isChroma( compID ) && ( tu.cu->cs->slice->sliceQp > 18 ) )
3651
0
        {
3652
0
          m_pcTrQuant->scaleLambda( 1.05 );
3653
0
        }
3654
23.0k
        TCoeff     currAbsSum = 0;
3655
23.0k
        uint64_t   currCompFracBits = 0;
3656
23.0k
        Distortion currCompDist = 0;
3657
23.0k
        double     currCompCost = 0;
3658
23.0k
        uint64_t   nonCoeffFracBits = 0;
3659
23.0k
        Distortion nonCoeffDist = 0;
3660
23.0k
        double     nonCoeffCost = 0;
3661
3662
23.0k
        if (slice.picHeader->lmcsEnabled && reshapeData.getCTUFlag() && isChroma(compID) && slice.picHeader->lmcsChromaResidualScale && tu.blocks[compID].width*tu.blocks[compID].height > 4 )
3663
0
        {
3664
0
          PelBuf resiBuf = csFull->getResiBuf(compArea);
3665
0
          resiBuf.scaleSignal(tu.chromaAdj, 1, slice.clpRngs[compID]);
3666
0
        }
3667
3668
23.0k
        if (nNumTransformCands > 1)
3669
5.72k
        {
3670
5.72k
          if (transformMode == 0)
3671
2.95k
          {
3672
2.95k
            m_pcTrQuant->checktransformsNxN(tu, &trModes, 2, compID);
3673
2.95k
            tu.mtsIdx[compID] = trModes[0].first;
3674
2.95k
            if (!trModes[transformMode + 1].second)
3675
195
            {
3676
195
              nNumTransformCands = 1;
3677
195
            }
3678
2.95k
          }
3679
5.72k
          m_pcTrQuant->transformNxN(tu, compID, cQP, currAbsSum, m_CABACEstimator->getCtx(), true);
3680
5.72k
        }
3681
17.3k
        else
3682
17.3k
        {
3683
17.3k
          m_pcTrQuant->transformNxN(tu, compID, cQP, currAbsSum, m_CABACEstimator->getCtx());
3684
17.3k
        }
3685
23.0k
        if (isFirstMode || (currAbsSum == 0))
3686
23.0k
        {
3687
23.0k
          const CPelBuf zeroBuf(m_pTempPel, compArea);
3688
23.0k
          const CPelBuf& orgResi = orgResiBuf.get(compID);
3689
3690
23.0k
          nonCoeffDist = m_pcRdCost->getDistPart(zeroBuf, orgResi, channelBitDepth, compID, DF_SSE); // initialized with zero residual distortion
3691
3692
23.0k
          if (!tu.noResidual)
3693
23.0k
          {
3694
23.0k
            const bool prevCbf = (compID == COMP_Cr ? tu.cbf[COMP_Cb] : false);
3695
23.0k
            m_CABACEstimator->cbf_comp(*tu.cu, false, compArea, currDepth, prevCbf);
3696
23.0k
          }
3697
3698
23.0k
          nonCoeffFracBits = m_CABACEstimator->getEstFracBits();
3699
23.0k
          nonCoeffCost = m_pcRdCost->calcRdCost(nonCoeffFracBits, nonCoeffDist, !m_pcEncCfg->m_lumaLevelToDeltaQPEnabled);
3700
23.0k
        }
3701
3702
23.0k
        if ((puiZeroDist != NULL) && isFirstMode)
3703
20.2k
        {
3704
20.2k
          *puiZeroDist += nonCoeffDist; // initialized with zero residual distortion
3705
20.2k
        }
3706
3707
23.0k
        if (currAbsSum > 0) //if non-zero coefficients are present, a residual needs to be derived for further prediction
3708
193
        {
3709
193
          if (isFirstMode)
3710
193
          {
3711
193
            m_CABACEstimator->getCtx() = ctxStart;
3712
193
            m_CABACEstimator->resetBits();
3713
193
          }
3714
3715
193
          const bool prevCbf = ( compID == COMP_Cr ? tu.cbf[COMP_Cb] : false );
3716
193
          m_CABACEstimator->cbf_comp( *tu.cu, true, compArea, currDepth, prevCbf );
3717
193
          if( compID == COMP_Cr )
3718
0
          {
3719
0
            const int cbfMask = ( tu.cbf[COMP_Cb] ? 2 : 0 ) + 1;
3720
0
            m_CABACEstimator->joint_cb_cr( tu, cbfMask );
3721
0
          }
3722
193
          CUCtx cuCtx;
3723
193
          cuCtx.isDQPCoded = true;
3724
193
          cuCtx.isChromaQpAdjCoded = true;
3725
193
          m_CABACEstimator->residual_coding(tu, compID, &cuCtx);
3726
193
          m_CABACEstimator->mts_idx(cu, &cuCtx);
3727
3728
193
          currCompFracBits = m_CABACEstimator->getEstFracBits();
3729
3730
193
          PelBuf resiBuf  = csFull->getResiBuf(compArea);
3731
193
          CPelBuf orgResi = orgResiBuf.get(compID);
3732
3733
193
          m_pcTrQuant->invTransformNxN(tu, compID, resiBuf, cQP);
3734
193
          if (slice.picHeader->lmcsEnabled && isChroma(compID) && slice.picHeader->lmcsChromaResidualScale && tu.blocks[compID].width*tu.blocks[compID].height > 4)
3735
0
          {
3736
0
            resiBuf.scaleSignal(tu.chromaAdj, 0, slice.clpRngs[compID]);
3737
0
          }
3738
3739
193
          currCompDist = m_pcRdCost->getDistPart(orgResi, resiBuf, channelBitDepth, compID, DF_SSE);
3740
193
          currCompCost = m_pcRdCost->calcRdCost(currCompFracBits, currCompDist, false);
3741
193
        }
3742
22.8k
        else if (transformMode > 0)
3743
2.76k
        {
3744
2.76k
          currCompCost = MAX_DOUBLE;
3745
2.76k
        }
3746
20.0k
        else
3747
20.0k
        {
3748
20.0k
          currCompFracBits = nonCoeffFracBits;
3749
20.0k
          currCompDist     = nonCoeffDist;
3750
20.0k
          currCompCost     = nonCoeffCost;
3751
3752
20.0k
          tu.cbf[compID] = 0;
3753
20.0k
        }
3754
3755
        // evaluate
3756
23.0k
        if ((currCompCost < minCost[compID]) || (transformMode == 1 && currCompCost == minCost[compID]))
3757
20.2k
        {
3758
          // copy component
3759
20.2k
          if (isFirstMode && ((nonCoeffCost < currCompCost) || (currAbsSum == 0))) // check for forced null
3760
20.1k
          {
3761
20.1k
            tu.getCoeffs( compID ).fill( 0 );
3762
20.1k
            csFull->getResiBuf( compArea ).fill( 0 );
3763
20.1k
            tu.cbf[compID]   = 0;
3764
3765
20.1k
            currAbsSum       = 0;
3766
20.1k
            currCompFracBits = nonCoeffFracBits;
3767
20.1k
            currCompDist     = nonCoeffDist;
3768
20.1k
            currCompCost     = nonCoeffCost;
3769
20.1k
          }
3770
3771
20.2k
          uiSingleDistComp[compID] = currCompDist;
3772
20.2k
          minCost[compID]          = currCompCost;
3773
20.2k
          if (transformMode != (nNumTransformCands - 1))
3774
2.76k
          {
3775
2.76k
            bestTU.copyComponentFrom(tu, compID);
3776
2.76k
            saveCS.getResiBuf(compArea).copyFrom(csFull->getResiBuf(compArea));
3777
2.76k
          }
3778
17.5k
          else
3779
17.5k
          {
3780
17.5k
            isLast = false;
3781
17.5k
          }
3782
20.2k
        }
3783
23.0k
        if( tu.noResidual )
3784
0
        {
3785
0
          CHECK( currCompFracBits > 0 || currAbsSum, "currCompFracBits > 0 when tu noResidual" );
3786
0
        }
3787
23.0k
      }
3788
20.2k
      if (isLast)
3789
2.76k
      {
3790
2.76k
        tu.copyComponentFrom(bestTU, compID);
3791
2.76k
        csFull->getResiBuf(compArea).copyFrom(saveCS.getResiBuf(compArea));
3792
2.76k
      }
3793
20.2k
    } // component loop
3794
3795
20.2k
    if ( tu.blocks.size()>2 && tu.blocks[COMP_Cb].valid() )
3796
0
    {
3797
0
      const CompArea& cbArea = tu.blocks[COMP_Cb];
3798
0
      const CompArea& crArea = tu.blocks[COMP_Cr];
3799
0
      bool checkJointCbCr = (sps.jointCbCr) && (!tu.noResidual) && (TU::getCbf(tu, COMP_Cb) || TU::getCbf(tu, COMP_Cr));
3800
0
      const int channelBitDepth = sps.bitDepths[toChannelType(COMP_Cb)];
3801
0
      const Slice& slice = *tu.cu->slice;
3802
0
      bool      reshape         = slice.picHeader->lmcsEnabled && reshapeData.getCTUFlag() && slice.picHeader->lmcsChromaResidualScale
3803
0
                               && tu.blocks[COMP_Cb].width * tu.blocks[COMP_Cb].height > 4;
3804
0
      double minCostCbCr = minCost[COMP_Cb] + minCost[COMP_Cr];
3805
0
      bool   isLastBest  = false;
3806
3807
0
      bool checkDCTOnly = m_pcEncCfg->m_useChromaTS && ((TU::getCbf(tu, COMP_Cb) && tu.mtsIdx[COMP_Cb] == MTS_DCT2_DCT2 && !TU::getCbf(tu, COMP_Cr)) ||
3808
0
        (TU::getCbf(tu, COMP_Cr) && tu.mtsIdx[COMP_Cr] == MTS_DCT2_DCT2 && !TU::getCbf(tu, COMP_Cb)) ||
3809
0
        (TU::getCbf(tu, COMP_Cb) && tu.mtsIdx[COMP_Cb] == MTS_DCT2_DCT2 && TU::getCbf(tu, COMP_Cr) && tu.mtsIdx[COMP_Cr] == MTS_DCT2_DCT2));
3810
0
      bool checkTSOnly = m_pcEncCfg->m_useChromaTS && ((TU::getCbf(tu, COMP_Cb) && tu.mtsIdx[COMP_Cb] == MTS_SKIP && !TU::getCbf(tu, COMP_Cr)) ||
3811
0
        (TU::getCbf(tu, COMP_Cr) && tu.mtsIdx[COMP_Cr] == MTS_SKIP && !TU::getCbf(tu, COMP_Cb)) ||
3812
0
        (TU::getCbf(tu, COMP_Cb) && tu.mtsIdx[COMP_Cb] == MTS_SKIP && TU::getCbf(tu, COMP_Cr) && tu.mtsIdx[COMP_Cr] == MTS_SKIP));
3813
3814
0
      std::vector<int> jointCbfMasksToTest;
3815
0
      if ( checkJointCbCr )
3816
0
      {
3817
0
        for( int i = 0; i < 4; i++ )
3818
0
        {
3819
0
          m_orgResiCb[i].compactResize(cbArea);
3820
0
          m_orgResiCr[i].compactResize(crArea);
3821
0
        }
3822
0
        m_orgResiCb[0].copyFrom(orgResiBuf.Cb());
3823
0
        m_orgResiCr[0].copyFrom(orgResiBuf.Cr());
3824
0
        if (reshape)
3825
0
        {
3826
0
          m_orgResiCb[0].scaleSignal(tu.chromaAdj, 1, slice.clpRngs[COMP_Cb]);
3827
0
          m_orgResiCr[0].scaleSignal(tu.chromaAdj, 1, slice.clpRngs[COMP_Cr]);
3828
0
        }
3829
3830
0
        jointCbfMasksToTest = m_pcTrQuant->selectICTCandidates(tu, m_orgResiCb, m_orgResiCr);
3831
3832
0
        bestTU.copyComponentFrom(tu, COMP_Cb);
3833
0
        bestTU.copyComponentFrom(tu, COMP_Cr);
3834
0
        saveCS.getResiBuf(cbArea).copyFrom(csFull->getResiBuf(cbArea));
3835
0
        saveCS.getResiBuf(crArea).copyFrom(csFull->getResiBuf(crArea));
3836
0
      }
3837
3838
0
      for (int cbfMask: jointCbfMasksToTest)
3839
0
      {
3840
0
        ComponentID codeCompId = (cbfMask >> 1 ? COMP_Cb : COMP_Cr);
3841
0
        ComponentID otherCompId = (codeCompId == COMP_Cr ? COMP_Cb : COMP_Cr);
3842
0
        bool tsAllowed = useTS && TU::isTSAllowed(tu, codeCompId) && (m_pcEncCfg->m_useChromaTS);
3843
0
        if (tsAllowed && (tu.mtsIdx[COMP_Y] != MTS_SKIP))
3844
0
        {
3845
0
          tsAllowed = false;
3846
0
        }
3847
0
        if (!tsAllowed)
3848
0
        {
3849
0
          checkTSOnly = false;
3850
0
        }
3851
0
        uint8_t     numTransformCands = 1 + (tsAllowed && (!(checkDCTOnly || checkTSOnly)) ? 1 : 0); // DCT + TS = 2 tests
3852
0
        std::vector<TrMode> trModes;
3853
0
        if (numTransformCands > 1)
3854
0
        {
3855
0
          trModes.push_back(TrMode(0, true)); // DCT2
3856
0
          trModes.push_back(TrMode(1, true));//TS
3857
0
        }
3858
0
        else
3859
0
        {
3860
0
          tu.mtsIdx[codeCompId] = checkTSOnly ? 1 : 0;
3861
0
        }
3862
0
        for (int modeId = 0; modeId < numTransformCands; modeId++)
3863
0
        {
3864
0
          TCoeff     currAbsSum = 0;
3865
0
          uint64_t   currCompFracBits = 0;
3866
0
          Distortion currCompDistCb = 0;
3867
0
          Distortion currCompDistCr = 0;
3868
0
          double     currCompCost = 0;
3869
3870
0
          tu.jointCbCr = (uint8_t)cbfMask;
3871
0
          if (numTransformCands > 1)
3872
0
          {
3873
0
            tu.mtsIdx[codeCompId] = trModes[modeId].first;
3874
0
          }
3875
0
          tu.mtsIdx[otherCompId] = MTS_DCT2_DCT2;
3876
0
          const QpParam cQP(tu, COMP_Cb);  // note: uses tu.transformSkip[compID]
3877
0
          m_pcTrQuant->selectLambda(COMP_Cb);
3878
3879
          // Lambda is loosened for the joint mode with respect to single modes as the same residual is used for both chroma blocks
3880
0
          const int    absIct = abs(TU::getICTMode(tu));
3881
0
          const double lfact = (absIct == 1 || absIct == 3 ? 0.8 : 0.5);
3882
0
          m_pcTrQuant->scaleLambda(lfact);
3883
0
          if (checkJointCbCr && (tu.cu->cs->slice->sliceQp > 18))
3884
0
          {
3885
0
            m_pcTrQuant->scaleLambda(1.05);
3886
0
          }
3887
3888
0
          m_CABACEstimator->getCtx() = ctxStart;
3889
0
          m_CABACEstimator->resetBits();
3890
3891
0
          PelBuf cbResi = csFull->getResiBuf(cbArea);
3892
0
          PelBuf crResi = csFull->getResiBuf(crArea);
3893
0
          cbResi.copyFrom(m_orgResiCb[cbfMask]);
3894
0
          crResi.copyFrom(m_orgResiCr[cbfMask]);
3895
3896
0
          if (reshape)
3897
0
          {
3898
0
            double cRescale = (double)(1 << CSCALE_FP_PREC) / (double)(tu.chromaAdj);
3899
0
            m_pcTrQuant->scaleLambda(1.0 / (cRescale * cRescale));
3900
0
          }
3901
3902
0
          int         codedCbfMask = 0;
3903
0
          ComponentID codeCompId = (tu.jointCbCr >> 1 ? COMP_Cb : COMP_Cr);
3904
0
          ComponentID otherCompId = (codeCompId == COMP_Cr ? COMP_Cb : COMP_Cr);
3905
0
          const QpParam qpCbCr(tu, codeCompId);
3906
3907
0
          tu.getCoeffs(otherCompId).fill(0);   // do we need that?
3908
0
          TU::setCbfAtDepth(tu, otherCompId, tu.depth, false);
3909
3910
0
          PelBuf& codeResi = (codeCompId == COMP_Cr ? crResi : cbResi);
3911
0
          TCoeff  compAbsSum = 0;
3912
0
          if (numTransformCands > 1)
3913
0
          {
3914
0
            if (modeId == 0)
3915
0
            {
3916
0
              m_pcTrQuant->checktransformsNxN(tu, &trModes, 2, codeCompId);
3917
0
              tu.mtsIdx[codeCompId] = trModes[modeId].first;
3918
0
              tu.mtsIdx[otherCompId] = MTS_DCT2_DCT2;
3919
0
              if (!trModes[modeId + 1].second)
3920
0
              {
3921
0
                numTransformCands = 1;
3922
0
              }
3923
0
            }
3924
0
            m_pcTrQuant->transformNxN(tu, codeCompId, qpCbCr, compAbsSum, m_CABACEstimator->getCtx(), true);
3925
0
          }
3926
0
          else
3927
0
          {
3928
0
            m_pcTrQuant->transformNxN(tu, codeCompId, qpCbCr, compAbsSum, m_CABACEstimator->getCtx());
3929
0
          }
3930
0
          if (compAbsSum > 0)
3931
0
          {
3932
0
            m_pcTrQuant->invTransformNxN(tu, codeCompId, codeResi, qpCbCr);
3933
0
            codedCbfMask += (codeCompId == COMP_Cb ? 2 : 1);
3934
0
          }
3935
0
          else
3936
0
          {
3937
0
            codeResi.fill(0);
3938
0
          }
3939
3940
0
          if (tu.jointCbCr == 3 && codedCbfMask == 2)
3941
0
          {
3942
0
            codedCbfMask = 3;
3943
0
            TU::setCbfAtDepth(tu, COMP_Cr, tu.depth, true);
3944
0
          }
3945
0
          if (codedCbfMask && tu.jointCbCr != codedCbfMask)
3946
0
          {
3947
0
            codedCbfMask = 0;
3948
0
          }
3949
0
          currAbsSum = codedCbfMask;
3950
0
          if (!tu.mtsIdx[codeCompId])
3951
0
          {
3952
0
            numTransformCands = (currAbsSum <= 0) ? 1 : numTransformCands;
3953
0
          }
3954
0
          if (currAbsSum > 0)
3955
0
          {
3956
0
            m_CABACEstimator->cbf_comp(*tu.cu, codedCbfMask >> 1, cbArea, currDepth, false);
3957
0
            m_CABACEstimator->cbf_comp(*tu.cu, codedCbfMask & 1, crArea, currDepth, codedCbfMask >> 1);
3958
0
            m_CABACEstimator->joint_cb_cr(tu, codedCbfMask);
3959
0
            if (codedCbfMask >> 1)
3960
0
              m_CABACEstimator->residual_coding(tu, COMP_Cb);
3961
0
            if (codedCbfMask & 1)
3962
0
              m_CABACEstimator->residual_coding(tu, COMP_Cr);
3963
0
            currCompFracBits = m_CABACEstimator->getEstFracBits();
3964
3965
0
            m_pcTrQuant->invTransformICT(tu, cbResi, crResi);
3966
0
            if (reshape)
3967
0
            {
3968
0
              cbResi.scaleSignal(tu.chromaAdj, 0, slice.clpRngs[COMP_Cb]);
3969
0
              crResi.scaleSignal(tu.chromaAdj, 0, slice.clpRngs[COMP_Cr]);
3970
0
            }
3971
3972
0
            currCompDistCb = m_pcRdCost->getDistPart(orgResiBuf.Cb(), cbResi, channelBitDepth, COMP_Cb, DF_SSE);
3973
0
            currCompDistCr = m_pcRdCost->getDistPart(orgResiBuf.Cr(), crResi, channelBitDepth, COMP_Cr, DF_SSE);
3974
0
            currCompCost = m_pcRdCost->calcRdCost(currCompFracBits, currCompDistCr + currCompDistCb, false);
3975
0
          }
3976
0
          else
3977
0
            currCompCost = MAX_DOUBLE;
3978
3979
          // evaluate
3980
0
          if (currCompCost < minCostCbCr)
3981
0
          {
3982
0
            uiSingleDistComp[COMP_Cb] = currCompDistCb;
3983
0
            uiSingleDistComp[COMP_Cr] = currCompDistCr;
3984
0
            minCostCbCr = currCompCost;
3985
0
            isLastBest = (cbfMask == jointCbfMasksToTest.back()) && (modeId == (numTransformCands - 1));
3986
0
            if (!isLastBest)
3987
0
            {
3988
0
              bestTU.copyComponentFrom(tu, COMP_Cb);
3989
0
              bestTU.copyComponentFrom(tu, COMP_Cr);
3990
0
              saveCS.getResiBuf(cbArea).copyFrom(csFull->getResiBuf(cbArea));
3991
0
              saveCS.getResiBuf(crArea).copyFrom(csFull->getResiBuf(crArea));
3992
0
            }
3993
0
          }
3994
0
        }
3995
3996
0
        if( !isLastBest )
3997
0
        {
3998
          // copy component
3999
0
          tu.copyComponentFrom( bestTU, COMP_Cb );
4000
0
          tu.copyComponentFrom( bestTU, COMP_Cr );
4001
0
          csFull->getResiBuf( cbArea ).copyFrom( saveCS.getResiBuf( cbArea ) );
4002
0
          csFull->getResiBuf( crArea ).copyFrom( saveCS.getResiBuf( crArea ) );
4003
0
        }
4004
0
      }
4005
0
    }
4006
4007
20.2k
    m_CABACEstimator->getCtx() = ctxStart;
4008
20.2k
    m_CABACEstimator->resetBits();
4009
20.2k
    if( !tu.noResidual )
4010
20.2k
    {
4011
20.2k
      static const ComponentID cbf_getComp[3] = { COMP_Cb, COMP_Cr, COMP_Y };
4012
81.0k
      for( unsigned c = 0; c < numTBlocks; c++)
4013
60.8k
      {
4014
60.8k
        const ComponentID compID = numTBlocks>1 ? cbf_getComp[c] : COMP_Y;
4015
60.8k
        if( tu.blocks[compID].valid() )
4016
20.2k
        {
4017
20.2k
          const bool prevCbf = ( compID == COMP_Cr ? TU::getCbfAtDepth( tu, COMP_Cb, currDepth ) : false );
4018
20.2k
          m_CABACEstimator->cbf_comp( *tu.cu, TU::getCbfAtDepth( tu, compID, currDepth ), tu.blocks[compID], currDepth, prevCbf );
4019
20.2k
        }
4020
60.8k
      }
4021
20.2k
    }
4022
4023
81.0k
    for (uint32_t ch = 0; ch < numValidComp; ch++)
4024
60.8k
    {
4025
60.8k
      const ComponentID compID = ComponentID(ch);
4026
60.8k
      if (tu.blocks[compID].valid())
4027
20.2k
      {
4028
20.2k
        if( compID == COMP_Cr )
4029
0
        {
4030
0
          const int cbfMask = ( TU::getCbf( tu, COMP_Cb ) ? 2 : 0 ) + ( TU::getCbf( tu, COMP_Cr ) ? 1 : 0 );
4031
0
          m_CABACEstimator->joint_cb_cr(tu, cbfMask);
4032
0
        }
4033
20.2k
        if( TU::getCbf( tu, compID ) )
4034
126
        {
4035
126
          m_CABACEstimator->residual_coding( tu, compID );
4036
126
        }
4037
20.2k
        uiSingleDist += uiSingleDistComp[compID];
4038
20.2k
      }
4039
60.8k
    }
4040
20.2k
    if( tu.noResidual )
4041
0
    {
4042
0
      CHECK( m_CABACEstimator->getEstFracBits() > 0, "no residual TU's bits shall be 0" );
4043
0
    }
4044
4045
20.2k
    csFull->fracBits += m_CABACEstimator->getEstFracBits();
4046
20.2k
    csFull->dist     += uiSingleDist;
4047
20.2k
    csFull->cost      = m_pcRdCost->calcRdCost(csFull->fracBits, csFull->dist, !m_pcEncCfg->m_lumaLevelToDeltaQPEnabled);
4048
20.2k
  } // check full
4049
4050
  // code sub-blocks
4051
20.2k
  if( bCheckSplit )
4052
0
  {
4053
0
    if( bCheckFull )
4054
0
    {
4055
0
      m_CABACEstimator->getCtx() = ctxStart;
4056
0
    }
4057
4058
0
    if( partitioner.canSplit( TU_MAX_TR_SPLIT, cs ) )
4059
0
    {
4060
0
      partitioner.splitCurrArea( TU_MAX_TR_SPLIT, cs );
4061
0
    }
4062
0
    else if( cu.sbtInfo && partitioner.canSplit( CU::getSbtTuSplit( cu.sbtInfo ), cs ) )
4063
0
    {
4064
0
      partitioner.splitCurrArea( CU::getSbtTuSplit( cu.sbtInfo ), cs );
4065
0
    }
4066
0
    else
4067
0
      THROW( "Implicit TU split not available!" );
4068
4069
0
    do
4070
0
    {
4071
0
      xEstimateInterResidualQT(*csSplit, partitioner, bCheckFull ? nullptr : puiZeroDist );
4072
4073
0
      csSplit->cost = m_pcRdCost->calcRdCost( csSplit->fracBits, csSplit->dist );
4074
0
    } while( partitioner.nextPart( *csSplit ) );
4075
4076
0
    partitioner.exitCurrSplit();
4077
4078
0
    unsigned        compCbf[3]  = { 0, 0, 0 };
4079
4080
0
    if( !bCheckFull )
4081
0
    {
4082
0
      for( auto &currTU : csSplit->traverseTUs( currArea, partitioner.chType ) )
4083
0
      {
4084
0
        for( unsigned ch = 0; ch < numTBlocks; ch++ )
4085
0
        {
4086
0
          compCbf[ ch ] |= ( TU::getCbfAtDepth( currTU, ComponentID(ch), currDepth + 1 ) ? 1 : 0 );
4087
0
        }
4088
0
      }
4089
4090
0
      for( auto &currTU : csSplit->traverseTUs( currArea, partitioner.chType ) )
4091
0
      {
4092
0
        TU::setCbfAtDepth   ( currTU, COMP_Y,  currDepth, compCbf[ COMP_Y  ] );
4093
0
        if( currArea.chromaFormat != CHROMA_400 )
4094
0
        {
4095
0
          TU::setCbfAtDepth ( currTU, COMP_Cb, currDepth, compCbf[ COMP_Cb ] );
4096
0
          TU::setCbfAtDepth ( currTU, COMP_Cr, currDepth, compCbf[ COMP_Cr ] );
4097
0
        }
4098
0
      }
4099
4100
0
      m_CABACEstimator->getCtx() = ctxStart;
4101
0
      m_CABACEstimator->resetBits();
4102
4103
      // when compID isn't a channel, code Cbfs:
4104
0
      xEncodeInterResidualQT( *csSplit, partitioner, MAX_NUM_TBLOCKS );
4105
4106
0
      for (uint32_t ch = 0; ch < numValidComp; ch++)
4107
0
      {
4108
0
        const ComponentID compID = ComponentID(ch);
4109
0
        xEncodeInterResidualQT( *csSplit, partitioner, compID );
4110
0
      }
4111
4112
0
      csSplit->fracBits = m_CABACEstimator->getEstFracBits();
4113
0
      csSplit->cost     = m_pcRdCost->calcRdCost(csSplit->fracBits, csSplit->dist);
4114
0
    }
4115
0
  }
4116
20.2k
}
4117
4118
void InterSearch::encodeResAndCalcRdInterCU(CodingStructure &cs, Partitioner &partitioner, const bool skipResidual )
4119
20.2k
{
4120
20.2k
  CodingUnit &cu = *cs.getCU( partitioner.chType, partitioner.treeType );
4121
20.2k
  bool luma      = true;
4122
20.2k
  bool chroma    = cs.pcv->chrFormat != VVENC_CHROMA_400;
4123
20.2k
  if( cu.predMode == MODE_IBC )
4124
20.2k
  {
4125
20.2k
    luma    = !cu.mccNoLuma  ();
4126
20.2k
    chroma &= !cu.mccNoChroma();
4127
20.2k
  }
4128
20.2k
  if( cu.predMode == MODE_INTER )
4129
20.2k
    CHECK( CU::isSepTree(cu), "CU with Inter mode must be in single tree" );
4130
4131
20.2k
  const ChromaFormat format      = cs.area.chromaFormat;;
4132
20.2k
  const int  numValidComponents  = getNumberValidComponents(format);
4133
20.2k
  const SPS &sps                 = *cs.sps;
4134
20.2k
  const ReshapeData& reshapeData = cs.picture->reshapeData;
4135
4136
20.2k
  if( skipResidual ) //  No residual coding : SKIP mode
4137
0
  {
4138
0
    cu.skip    = true;
4139
0
    cu.rootCbf = false;
4140
0
    CHECK( cu.sbtInfo != 0, "sbtInfo shall be 0 if CU has no residual" );
4141
0
    cs.getResiBuf().fill(0);
4142
0
    cs.getRecoBuf().copyFrom(cs.getPredBuf() );
4143
0
    if( cs.picHeader->lmcsEnabled && reshapeData.getCTUFlag() && !cu.ciip && !CU::isIBC(cu))
4144
0
    {
4145
0
      cs.getRecoBuf().Y().rspSignal( reshapeData.getFwdLUT());
4146
0
    }
4147
4148
    // add new "empty" TU(s) spanning the whole CU
4149
0
    cs.addEmptyTUs( partitioner, &cu );
4150
0
    Distortion distortion = 0;
4151
4152
0
    for (int comp = 0; comp < numValidComponents; comp++)
4153
0
    {
4154
0
      const ComponentID compID = ComponentID(comp);
4155
0
      if (compID == COMP_Y && !luma)
4156
0
        continue;
4157
0
      if (compID != COMP_Y && !chroma)
4158
0
        continue;
4159
0
      CPelBuf reco = cs.getRecoBuf (compID);
4160
0
      CPelBuf org  = cs.getOrgBuf  (compID);
4161
0
      if ((cs.picHeader->lmcsEnabled && reshapeData.getCTUFlag()) || m_pcEncCfg->m_lumaLevelToDeltaQPEnabled )
4162
0
      {
4163
0
        const CompArea& areaY = cu.Y();
4164
0
        const CPelBuf orgLuma = cs.getOrgBuf( areaY );
4165
0
        if (compID == COMP_Y && !m_pcEncCfg->m_lumaLevelToDeltaQPEnabled )
4166
0
        {
4167
0
          PelBuf tmpRecLuma = cs.getRspRecoBuf();
4168
0
          tmpRecLuma.rspSignal(reco, reshapeData.getInvLUT());
4169
0
          distortion += m_pcRdCost->getDistPart(org, tmpRecLuma, sps.bitDepths[ CH_L ], compID, DF_SSE_WTD, &orgLuma);
4170
0
        }
4171
0
        else
4172
0
          distortion += m_pcRdCost->getDistPart( org, reco, sps.bitDepths[ CH_C ], compID, DF_SSE_WTD, &orgLuma );
4173
0
      }
4174
0
      else
4175
0
      {
4176
0
        distortion  += m_pcRdCost->getDistPart( org, reco, sps.bitDepths[ toChannelType( compID ) ], compID, DF_SSE );
4177
0
      }
4178
0
    }
4179
4180
0
    CodingUnit& cu = *cs.getCU(partitioner.chType, TREE_D);
4181
0
    m_CABACEstimator->resetBits();
4182
0
    m_CABACEstimator->cu_skip_flag  ( cu );
4183
0
    m_CABACEstimator->merge_data(cu);
4184
0
    cs.fracBits = m_CABACEstimator->getEstFracBits();
4185
0
    cs.dist     = distortion;
4186
0
    cs.cost     = m_pcRdCost->calcRdCost(cs.fracBits, cs.dist);
4187
4188
0
    return;
4189
0
  }
4190
4191
  //  Residual coding.
4192
20.2k
  if (luma)
4193
20.2k
  {
4194
20.2k
    if (cs.picHeader->lmcsEnabled && reshapeData.getCTUFlag())
4195
0
    {
4196
0
      if (!cu.ciip && !CU::isIBC(cu))
4197
0
      {
4198
0
        const CompArea& areaY = cu.Y();
4199
0
        PelBuf tmpPred = m_tmpStorageLCU.getCompactBuf(areaY);
4200
0
        tmpPred.rspSignal(cs.getPredBuf(COMP_Y), reshapeData.getFwdLUT());
4201
0
        cs.getResiBuf(COMP_Y).subtract(cs.getRspOrgBuf(), tmpPred);
4202
0
      }
4203
0
      else
4204
0
      {
4205
0
        cs.getResiBuf(COMP_Y).subtract(cs.getRspOrgBuf(), cs.getPredBuf(COMP_Y));
4206
0
      }
4207
0
    }
4208
20.2k
    else
4209
20.2k
    {
4210
20.2k
      cs.getResiBuf(COMP_Y).subtract(cs.getOrgBuf(COMP_Y), cs.getPredBuf(COMP_Y));
4211
20.2k
    }
4212
20.2k
  }
4213
20.2k
  if (chroma)
4214
0
  {
4215
0
    cs.getResiBuf(COMP_Cb).subtract(cs.getOrgBuf(COMP_Cb), cs.getPredBuf(COMP_Cb));
4216
0
    cs.getResiBuf(COMP_Cr).subtract(cs.getOrgBuf(COMP_Cr), cs.getPredBuf(COMP_Cr));
4217
0
  }
4218
4219
20.2k
  Distortion zeroDistortion = 0;
4220
4221
20.2k
  const TempCtx ctxStart( m_CtxCache, m_CABACEstimator->getCtx() );
4222
4223
20.2k
  xEstimateInterResidualQT(cs, partitioner, &zeroDistortion );
4224
20.2k
  TransformUnit& firstTU = *cs.getTU( partitioner.chType );
4225
4226
20.2k
  cu.rootCbf = false;
4227
20.2k
  m_CABACEstimator->resetBits();
4228
20.2k
  m_CABACEstimator->rqt_root_cbf( cu );
4229
20.2k
  const uint64_t  zeroFracBits = m_CABACEstimator->getEstFracBits();
4230
20.2k
  double zeroCost = m_pcRdCost->calcRdCost( zeroFracBits, zeroDistortion, !m_pcEncCfg->m_lumaLevelToDeltaQPEnabled );
4231
4232
20.2k
  const int  numValidTBlocks   = getNumberValidTBlocks( *cs.pcv );
4233
81.0k
  for (uint32_t i = 0; i < numValidTBlocks; i++)
4234
60.8k
  {
4235
60.8k
    cu.rootCbf |= TU::getCbfAtDepth(firstTU, ComponentID(i), 0);
4236
60.8k
  }
4237
4238
  // -------------------------------------------------------
4239
  // If a block full of 0's is efficient, then just use 0's.
4240
  // The costs at this point do not include header bits.
4241
4242
20.2k
  if (zeroCost < cs.cost || !cu.rootCbf)
4243
20.1k
  {
4244
20.1k
    cu.sbtInfo = 0;
4245
20.1k
    cu.rootCbf = false;
4246
4247
20.1k
    cs.clearTUs();
4248
4249
    // add a new "empty" TU spanning the whole CU
4250
20.1k
    cs.addEmptyTUs( partitioner, &cu );
4251
20.1k
  }
4252
4253
  // all decisions now made. Fully encode the CU, including the headers:
4254
20.2k
  m_CABACEstimator->getCtx() = ctxStart;
4255
4256
20.2k
  uint64_t finalFracBits = xGetSymbolFracBitsInter( cs, partitioner );
4257
  // we've now encoded the CU, and so have a valid bit cost
4258
20.2k
  if (!cu.rootCbf)
4259
20.1k
  {
4260
20.1k
    if (luma)
4261
20.1k
    {
4262
20.1k
      cs.getResiBuf().bufs[0].fill(0); // Clear the residual image, if we didn't code it.
4263
20.1k
    }
4264
20.1k
    if (chroma && isChromaEnabled(cs.pcv->chrFormat))
4265
0
    {
4266
0
      cs.getResiBuf().bufs[1].fill(0); // Clear the residual image, if we didn't code it.
4267
0
      cs.getResiBuf().bufs[2].fill(0); // Clear the residual image, if we didn't code it.
4268
0
    }
4269
20.1k
  }
4270
20.2k
  if (luma)
4271
20.2k
  {
4272
20.2k
    if (cu.rootCbf && cs.picHeader->lmcsEnabled && reshapeData.getCTUFlag())
4273
0
    {
4274
0
      if (!cu.ciip && !CU::isIBC(cu))
4275
0
      {
4276
0
        PelBuf tmpPred = m_tmpStorageLCU.getCompactBuf(cu.Y());
4277
0
        tmpPred.rspSignal(cs.getPredBuf(COMP_Y), reshapeData.getFwdLUT());
4278
0
        cs.getRecoBuf(COMP_Y).reconstruct(tmpPred, cs.getResiBuf(COMP_Y), cs.slice->clpRngs[COMP_Y]);
4279
0
      }
4280
0
      else
4281
0
      {
4282
0
        cs.getRecoBuf(COMP_Y).reconstruct(cs.getPredBuf(COMP_Y), cs.getResiBuf(COMP_Y), cs.slice->clpRngs[COMP_Y]);
4283
0
      }
4284
0
    }
4285
20.2k
    else
4286
20.2k
    {
4287
20.2k
      cs.getRecoBuf().bufs[0].reconstruct(cs.getPredBuf().bufs[0], cs.getResiBuf().bufs[0], cs.slice->clpRngs[COMP_Y]);
4288
20.2k
      if (cs.picHeader->lmcsEnabled && reshapeData.getCTUFlag() && !cu.ciip && !CU::isIBC(cu))
4289
0
      {
4290
0
        cs.getRecoBuf().bufs[0].rspSignal(reshapeData.getFwdLUT());
4291
0
      }
4292
20.2k
    }
4293
20.2k
  }
4294
20.2k
  if (chroma)
4295
0
  {
4296
0
    cs.getRecoBuf().bufs[1].reconstruct(cs.getPredBuf().bufs[1], cs.getResiBuf().bufs[1], cs.slice->clpRngs[COMP_Cb]);
4297
0
    cs.getRecoBuf().bufs[2].reconstruct(cs.getPredBuf().bufs[2], cs.getResiBuf().bufs[2], cs.slice->clpRngs[COMP_Cr]);
4298
0
  }
4299
  // update with clipped distortion and cost (previously unclipped reconstruction values were used)
4300
20.2k
  Distortion finalDistortion = 0;
4301
4302
81.0k
  for (int comp = 0; comp < numValidComponents; comp++)
4303
60.8k
  {
4304
60.8k
    const ComponentID compID = ComponentID(comp);
4305
60.8k
    if (compID == COMP_Y && !luma)
4306
0
      continue;
4307
60.8k
    if (compID != COMP_Y && !chroma)
4308
40.5k
      continue;
4309
20.2k
    CPelBuf reco = cs.getRecoBuf (compID);
4310
20.2k
    CPelBuf org  = cs.getOrgBuf  (compID);
4311
4312
20.2k
    if( (cs.picHeader->lmcsEnabled && reshapeData.getCTUFlag()) || m_pcEncCfg->m_lumaLevelToDeltaQPEnabled )
4313
0
    {
4314
0
      const CPelBuf orgLuma = cs.getOrgBuf( cs.area.blocks[COMP_Y] );
4315
0
      if (compID == COMP_Y && !m_pcEncCfg->m_lumaLevelToDeltaQPEnabled )
4316
0
      {
4317
0
        PelBuf tmpRecLuma = cs.getRspRecoBuf();
4318
0
        tmpRecLuma.rspSignal( reco, reshapeData.getInvLUT());
4319
0
        finalDistortion += m_pcRdCost->getDistPart(org, tmpRecLuma, sps.bitDepths[toChannelType(compID)], compID, DF_SSE_WTD, &orgLuma);
4320
0
      }
4321
0
      else
4322
0
      {
4323
0
        finalDistortion += m_pcRdCost->getDistPart(org, reco, sps.bitDepths[toChannelType(compID)], compID, DF_SSE_WTD, &orgLuma);
4324
0
      }
4325
0
    }
4326
20.2k
    else
4327
20.2k
    {
4328
20.2k
      finalDistortion += m_pcRdCost->getDistPart( org, reco, sps.bitDepths[toChannelType(compID)], compID, DF_SSE );
4329
20.2k
    }
4330
20.2k
  }
4331
4332
20.2k
  cs.dist     = finalDistortion;
4333
20.2k
  cs.fracBits = finalFracBits;
4334
20.2k
  cs.cost     = m_pcRdCost->calcRdCost(cs.fracBits, cs.dist);
4335
4336
20.2k
  CHECK(cs.tus.size() == 0, "No TUs present");
4337
20.2k
}
4338
4339
uint64_t InterSearch::xGetSymbolFracBitsInter(CodingStructure &cs, Partitioner &partitioner)
4340
20.2k
{
4341
20.2k
  uint64_t fracBits   = 0;
4342
20.2k
  CodingUnit &cu    = *cs.getCU( partitioner.chType, partitioner.treeType );
4343
4344
20.2k
  m_CABACEstimator->resetBits();
4345
4346
20.2k
  if( cu.mergeFlag && !cu.rootCbf )
4347
0
  {
4348
0
    cu.skip = true;
4349
4350
0
    m_CABACEstimator->cu_skip_flag  ( cu );
4351
0
    if (!cu.ciip)
4352
0
    {
4353
0
      m_CABACEstimator->merge_data(cu);
4354
0
    }
4355
0
    fracBits   += m_CABACEstimator->getEstFracBits();
4356
0
  }
4357
20.2k
  else
4358
20.2k
  {
4359
20.2k
    CHECK( cu.skip, "Skip flag has to be off at this point!" );
4360
4361
20.2k
    if (cu.Y().valid())
4362
20.2k
    m_CABACEstimator->cu_skip_flag( cu );
4363
20.2k
    m_CABACEstimator->pred_mode   ( cu );
4364
20.2k
    m_CABACEstimator->cu_pred_data( cu );
4365
20.2k
    CUCtx cuCtx;
4366
20.2k
    cuCtx.isDQPCoded = true;
4367
20.2k
    cuCtx.isChromaQpAdjCoded = true;
4368
20.2k
    m_CABACEstimator->cu_residual ( cu, partitioner, cuCtx );
4369
20.2k
    fracBits       += m_CABACEstimator->getEstFracBits();
4370
20.2k
  }
4371
4372
20.2k
  return fracBits;
4373
20.2k
}
4374
4375
double InterSearch::xGetMEDistortionWeight(uint8_t BcwIdx, RefPicList refPicList)
4376
0
{
4377
0
  if( BcwIdx != BCW_DEFAULT )
4378
0
  {
4379
0
    return fabs( (double)getBcwWeight( BcwIdx, refPicList ) / (double)g_BcwWeightBase );
4380
0
  }
4381
0
  else
4382
0
  {
4383
0
    return 0.5;
4384
0
  }
4385
0
}
4386
4387
bool InterSearch::xReadBufferedUniMv( CodingUnit& cu, RefPicList eRefPicList, int32_t iRefIdx, Mv& pcMvPred, Mv& rcMv, uint32_t& ruiBits, Distortion& ruiCost )
4388
0
{
4389
0
  if( m_uniMotions.isReadMode( (uint32_t)eRefPicList, (uint32_t)iRefIdx ) )
4390
0
  {
4391
0
    m_uniMotions.copyTo(rcMv, ruiCost, (uint32_t)eRefPicList, (uint32_t)iRefIdx);
4392
4393
0
    Mv pred = pcMvPred;
4394
0
    pred.changeTransPrecInternal2Amvr( cu.imv );
4395
0
    m_pcRdCost->setPredictor(pred);
4396
0
    m_pcRdCost->setCostScale(0);
4397
4398
0
    Mv mv = rcMv;
4399
0
    mv.changeTransPrecInternal2Amvr( cu.imv );
4400
0
    uint32_t mvBits = m_pcRdCost->getBitsOfVectorWithPredictor( mv.hor, mv.ver, 0 );
4401
4402
0
    ruiBits += mvBits;
4403
0
    ruiCost += m_pcRdCost->getCost(ruiBits);
4404
0
    return true;
4405
0
  }
4406
0
  return false;
4407
0
}
4408
4409
bool InterSearch::xReadBufferedAffineUniMv( CodingUnit& cu, RefPicList eRefPicList, int32_t iRefIdx, Mv acMvPred[3], Mv acMv[3], uint32_t& ruiBits, Distortion& ruiCost, int& mvpIdx, const AffineAMVPInfo& aamvpi )
4410
0
{
4411
0
  if( m_uniMotions.isReadModeAffine( (uint32_t)eRefPicList, (uint32_t)iRefIdx, cu.affineType ) )
4412
0
  {
4413
0
    m_uniMotions.copyAffineMvTo( acMv, ruiCost, (uint32_t)eRefPicList, (uint32_t)iRefIdx, cu.affineType, mvpIdx );
4414
0
    m_pcRdCost->setCostScale(0);
4415
0
    acMvPred[0] = aamvpi.mvCandLT[mvpIdx];
4416
0
    acMvPred[1] = aamvpi.mvCandRT[mvpIdx];
4417
0
    acMvPred[2] = aamvpi.mvCandLB[mvpIdx];
4418
4419
0
    uint32_t mvBits = 0;
4420
0
    for( int verIdx = 0; verIdx < ( cu.affineType ? 3 : 2 ); verIdx++ )
4421
0
    {
4422
0
      Mv pred = verIdx ? acMvPred[verIdx] + acMv[0] - acMvPred[0] : acMvPred[verIdx];
4423
0
      pred.changePrecision(MV_PRECISION_INTERNAL, MV_PRECISION_QUARTER);
4424
0
      m_pcRdCost->setPredictor(pred);
4425
0
      Mv mv = acMv[verIdx];
4426
0
      mv.changePrecision(MV_PRECISION_INTERNAL, MV_PRECISION_QUARTER);
4427
0
      mvBits += m_pcRdCost->getBitsOfVectorWithPredictor( mv.hor, mv.ver, 0 );
4428
0
    }
4429
0
    ruiBits += mvBits;
4430
0
    ruiCost += m_pcRdCost->getCost(ruiBits);
4431
0
    return true;
4432
0
  }
4433
0
  return false;
4434
0
}
4435
4436
void InterSearch::xSymMvdCheckBestMvp(
4437
  CodingUnit& cu,
4438
  CPelUnitBuf& origBuf,
4439
  Mv curMv,
4440
  RefPicList curRefList,
4441
  AMVPInfo amvpInfo[2][MAX_REF_PICS],
4442
  int32_t BcwIdx,
4443
  Mv cMvPredSym[2],
4444
  int32_t mvpIdxSym[2],
4445
  Distortion& bestCost,
4446
  bool skip
4447
)
4448
0
{
4449
0
  RefPicList tarRefList = (RefPicList)(1 - curRefList);
4450
0
  int32_t refIdxCur = cu.slice->symRefIdx[curRefList];
4451
0
  int32_t refIdxTar = cu.slice->symRefIdx[tarRefList];
4452
4453
0
  MvField cCurMvField, cTarMvField;
4454
0
  cCurMvField.setMvField(curMv, refIdxCur);
4455
0
  AMVPInfo& amvpCur = amvpInfo[curRefList][refIdxCur];
4456
0
  AMVPInfo& amvpTar = amvpInfo[tarRefList][refIdxTar];
4457
0
  m_pcRdCost->setCostScale(0);
4458
4459
0
  double fWeight = 0.0;
4460
0
  PelUnitBuf bufTmp;
4461
4462
  // get prediction of eCurRefPicList
4463
0
  PelUnitBuf predBufA = m_tmpPredStorage[curRefList].getCompactBuf( cu );
4464
0
  const Picture* picRefA = cu.slice->getRefPic(curRefList, cCurMvField.refIdx);
4465
0
  Mv mvA = cCurMvField.mv;
4466
0
  xClipMvSearch( mvA, cu.lumaPos(), cu.lumaSize(), *cu.cs->pcv, m_ifpLines );
4467
0
  xPredInterBlk( COMP_Y, cu, picRefA, mvA, predBufA, false, cu.slice->clpRngs[ COMP_Y ], false, false );
4468
4469
0
  bufTmp = m_tmpStorageLCU.getCompactBuf( cu );
4470
0
  bufTmp.copyFrom( origBuf );
4471
0
  bufTmp.removeHighFreq( predBufA, m_pcEncCfg->m_bClipForBiPredMeEnabled, cu.slice->clpRngs/*, getBcwWeight( cu.BcwIdx, tarRefList )*/ );
4472
0
  fWeight = xGetMEDistortionWeight( cu.BcwIdx, tarRefList );
4473
4474
0
  int32_t skipMvpIdx[2];
4475
0
  skipMvpIdx[0] = skip ? mvpIdxSym[0] : -1;
4476
0
  skipMvpIdx[1] = skip ? mvpIdxSym[1] : -1;
4477
4478
0
  for (int i = 0; i < amvpCur.numCand; i++)
4479
0
  {
4480
0
    for (int j = 0; j < amvpTar.numCand; j++)
4481
0
    {
4482
0
      if (skipMvpIdx[curRefList] == i && skipMvpIdx[tarRefList] == j)
4483
0
        continue;
4484
4485
0
      Distortion cost = MAX_DISTORTION;
4486
0
      cTarMvField.setMvField(curMv.getSymmvdMv(amvpCur.mvCand[i], amvpTar.mvCand[j]), refIdxTar);
4487
4488
      // get prediction of eTarRefPicList
4489
0
      PelUnitBuf predBufB = m_tmpPredStorage[tarRefList].getCompactBuf( cu );
4490
0
      const Picture* picRefB = cu.slice->getRefPic(tarRefList, cTarMvField.refIdx);
4491
0
      Mv mvB = cTarMvField.mv;
4492
0
      xClipMvSearch( mvB, cu.lumaPos(), cu.lumaSize(), *cu.cs->pcv, m_ifpLines );
4493
0
      xPredInterBlk( COMP_Y, cu, picRefB, mvB, predBufB, false, cu.slice->clpRngs[ COMP_Y ], false, false );
4494
4495
      // calc distortion
4496
0
      cost = ( Distortion ) floor( fWeight * ( double ) m_pcRdCost->getDistPart( bufTmp.Y(), predBufB.Y(), cu.cs->sps->bitDepths[ CH_L ], COMP_Y, DF_HAD ) );
4497
4498
0
      Mv pred = amvpCur.mvCand[i];
4499
0
      pred.changeTransPrecInternal2Amvr(cu.imv);
4500
0
      m_pcRdCost->setPredictor(pred);
4501
0
      Mv mv = curMv;
4502
0
      mv.changeTransPrecInternal2Amvr(cu.imv);
4503
0
      uint32_t bits = m_pcRdCost->getBitsOfVectorWithPredictor(mv.hor, mv.ver, 0);
4504
0
      bits += m_auiMVPIdxCost[i][AMVP_MAX_NUM_CANDS];
4505
0
      bits += m_auiMVPIdxCost[j][AMVP_MAX_NUM_CANDS];
4506
0
      cost += m_pcRdCost->getCost(bits);
4507
0
      if (cost < bestCost)
4508
0
      {
4509
0
        bestCost = cost;
4510
0
        cMvPredSym[curRefList] = amvpCur.mvCand[i];
4511
0
        cMvPredSym[tarRefList] = amvpTar.mvCand[j];
4512
0
        mvpIdxSym[curRefList] = i;
4513
0
        mvpIdxSym[tarRefList] = j;
4514
0
      }
4515
0
    }
4516
0
  }
4517
0
}
4518
4519
void InterSearch::resetSavedAffineMotion()
4520
116k
{
4521
350k
  for (int i = 0; i < 2; i++)
4522
233k
  {
4523
700k
    for (int j = 0; j < 2; j++)
4524
467k
    {
4525
467k
      m_affineMotion.acMvAffine4Para[i][j] = Mv(0, 0);
4526
467k
      m_affineMotion.acMvAffine6Para[i][j] = Mv(0, 0);
4527
467k
    }
4528
233k
    m_affineMotion.acMvAffine6Para[i][2] = Mv(0, 0);
4529
4530
233k
    m_affineMotion.affine4ParaRefIdx[i] = -1;
4531
233k
    m_affineMotion.affine6ParaRefIdx[i] = -1;
4532
233k
  }
4533
116k
  m_affineMotion.affine4ParaAvail = false;
4534
116k
  m_affineMotion.affine6ParaAvail = false;
4535
116k
}
4536
4537
void InterSearch::storeAffineMotion(Mv acAffineMv[2][3], int16_t affineRefIdx[2], EAffineModel affineType, int BcwIdx)
4538
0
{
4539
0
  if ((BcwIdx == BCW_DEFAULT || !m_affineMotion.affine6ParaAvail) && affineType == AFFINEMODEL_6PARAM)
4540
0
  {
4541
0
    for (int i = 0; i < 2; i++)
4542
0
    {
4543
0
      for (int j = 0; j < 3; j++)
4544
0
      {
4545
0
        m_affineMotion.acMvAffine6Para[i][j] = acAffineMv[i][j];
4546
0
      }
4547
0
      m_affineMotion.affine6ParaRefIdx[i] = affineRefIdx[i];
4548
0
    }
4549
0
    m_affineMotion.affine6ParaAvail = true;
4550
0
  }
4551
4552
0
  if ((BcwIdx == BCW_DEFAULT || !m_affineMotion.affine4ParaAvail) && affineType == AFFINEMODEL_4PARAM)
4553
0
  {
4554
0
    for (int i = 0; i < 2; i++)
4555
0
    {
4556
0
      for (int j = 0; j < 2; j++)
4557
0
      {
4558
0
        m_affineMotion.acMvAffine4Para[i][j] = acAffineMv[i][j];
4559
0
      }
4560
0
      m_affineMotion.affine4ParaRefIdx[i] = affineRefIdx[i];
4561
0
    }
4562
0
    m_affineMotion.affine4ParaAvail = true;
4563
0
  }
4564
0
}
4565
4566
void InterSearch::xPredAffineInterSearch( CodingUnit& cu,
4567
                                          CPelUnitBuf&    origBuf,
4568
                                          int             puIdx,
4569
                                          uint32_t&       lastMode,
4570
                                          Distortion&     affineCost,
4571
                                          Mv              hevcMv[2][MAX_REF_PICS],
4572
                                          Mv              mvAffine4Para[2][MAX_REF_PICS][3],
4573
                                          int             refIdx4Para[2],
4574
                                          uint8_t         BcwIdx,
4575
                                          bool            enforceBcwPred,
4576
                                          uint32_t        BcwIdxBits )
4577
0
{
4578
0
  const Slice &slice = *cu.slice;
4579
4580
0
  affineCost = MAX_DISTORTION;
4581
4582
0
  Mv        cMvZero;
4583
0
  Mv        aacMv[2][3];
4584
0
  Mv        cMvBi[2][3];
4585
0
  AffineMVInfo tmp;
4586
4587
0
  int       iNumPredDir = slice.isInterP() ? 1 : 2;
4588
4589
0
  int mvNum = 2;
4590
0
  mvNum = cu.affineType ? 3 : 2;
4591
4592
  // Mvp
4593
0
  Mv        cMvPred[2][MAX_REF_PICS][3];
4594
0
  Mv        cMvPredBi[2][MAX_REF_PICS][3];
4595
0
  int       aaiMvpIdxBi[2][MAX_REF_PICS];
4596
0
  int       aaiMvpIdx[2][MAX_REF_PICS];
4597
0
  int       aaiMvpNum[2][MAX_REF_PICS];
4598
4599
0
  AffineAMVPInfo aacAffineAMVPInfo[2][MAX_REF_PICS];
4600
0
  AffineAMVPInfo affiAMVPInfoTemp[2];
4601
4602
0
  uint32_t      uiMbBits[3] = { 1, 1, 0 };
4603
0
  int           iRefIdx[2] = { 0,0 }; // If un-initialized, may cause SEGV in bi-directional prediction iterative stage.
4604
0
  int           iRefIdxBi[2];
4605
0
  int           iRefStart, iRefEnd;
4606
0
  int           bestBiPRefIdxL1 = 0;
4607
0
  int           bestBiPMvpL1 = 0;
4608
0
  Distortion    biPDistTemp = MAX_DISTORTION;
4609
4610
0
  Distortion    uiCost[2] = { MAX_DISTORTION, MAX_DISTORTION };
4611
0
  Distortion    uiCostBi = MAX_DISTORTION;
4612
0
  Distortion    uiCostTemp;
4613
4614
0
  uint32_t      uiBits[3] = { 0 };
4615
0
  uint32_t      uiBitsTemp;
4616
0
  Distortion    bestBiPDist = MAX_DISTORTION;
4617
4618
0
  Distortion    uiCostTempL0[MAX_NUM_REF];
4619
0
  for (int iNumRef = 0; iNumRef < MAX_NUM_REF; iNumRef++)
4620
0
  {
4621
0
    uiCostTempL0[iNumRef] = MAX_DISTORTION;
4622
0
  }
4623
0
  uint32_t      uiBitsTempL0[MAX_NUM_REF];
4624
4625
0
  Mv            mvValidList1[4];
4626
0
  int           refIdxValidList1 = 0;
4627
0
  uint32_t      bitsValidList1 = MAX_UINT;
4628
0
  Distortion    costValidList1 = MAX_DISTORTION;
4629
0
  Mv            mvHevc[3];
4630
0
  const bool    affineAmvrEnabled = false;
4631
4632
0
  xGetBlkBits(slice.isInterP(), puIdx, lastMode, uiMbBits);
4633
4634
0
  cu.affine = true;
4635
0
  cu.mergeFlag = false;
4636
0
  if (BcwIdx != BCW_DEFAULT)
4637
0
  {
4638
0
    cu.BcwIdx = BcwIdx;
4639
0
  }
4640
4641
  // Uni-directional prediction
4642
0
  for (int iRefList = 0; iRefList < iNumPredDir; iRefList++)
4643
0
  {
4644
0
    RefPicList  refPicList = (iRefList ? REF_PIC_LIST_1 : REF_PIC_LIST_0);
4645
0
    cu.interDir = (iRefList ? 2 : 1);
4646
0
    for (int iRefIdxTemp = 0; iRefIdxTemp < slice.numRefIdx[refPicList]; iRefIdxTemp++)
4647
0
    {
4648
      // Get RefIdx bits
4649
0
      uiBitsTemp = uiMbBits[iRefList];
4650
0
      if (slice.numRefIdx[refPicList] > 1)
4651
0
      {
4652
0
        uiBitsTemp += iRefIdxTemp + 1;
4653
0
        if (iRefIdxTemp == slice.numRefIdx[refPicList] - 1)
4654
0
        {
4655
0
          uiBitsTemp--;
4656
0
        }
4657
0
      }
4658
4659
      // Do Affine AMVP
4660
0
      bool foundPred = xEstimateAffineAMVP(cu, affiAMVPInfoTemp[refPicList], origBuf, refPicList, iRefIdxTemp, cMvPred[iRefList][iRefIdxTemp], biPDistTemp);
4661
0
      if( !foundPred )
4662
0
        return;
4663
4664
0
      if (affineAmvrEnabled)
4665
0
      {
4666
0
        biPDistTemp += m_pcRdCost->getCost(xCalcAffineMVBits(cu, cMvPred[iRefList][iRefIdxTemp], cMvPred[iRefList][iRefIdxTemp]));
4667
0
      }
4668
0
      aaiMvpIdx[iRefList][iRefIdxTemp] = cu.mvpIdx[refPicList];
4669
0
      aaiMvpNum[iRefList][iRefIdxTemp] = cu.mvpNum[refPicList];;
4670
0
      if (cu.affineType == AFFINEMODEL_6PARAM && refIdx4Para[iRefList] != iRefIdxTemp)
4671
0
      {
4672
0
        xCopyAffineAMVPInfo(affiAMVPInfoTemp[refPicList], aacAffineAMVPInfo[iRefList][iRefIdxTemp]);
4673
0
        continue;
4674
0
      }
4675
4676
      // set hevc ME result as start search position when it is best than mvp
4677
0
      for (int i = 0; i<3; i++)
4678
0
      {
4679
0
        mvHevc[i] = hevcMv[iRefList][iRefIdxTemp];
4680
0
        mvHevc[i].roundAffinePrecInternal2Amvr(cu.imv);
4681
0
      }
4682
0
      PelUnitBuf predBuf = m_tmpStorageLCU.getCompactBuf(cu);
4683
4684
0
      Distortion uiCandCost = xGetAffineTemplateCost(cu, origBuf, predBuf, mvHevc, aaiMvpIdx[iRefList][iRefIdxTemp],
4685
0
        AMVP_MAX_NUM_CANDS, refPicList, iRefIdxTemp);
4686
4687
0
      if (affineAmvrEnabled)
4688
0
      {
4689
0
        uiCandCost += m_pcRdCost->getCost(xCalcAffineMVBits(cu, mvHevc, cMvPred[iRefList][iRefIdxTemp]));
4690
0
      }
4691
4692
      //check stored affine motion
4693
0
      bool affine4Para = cu.affineType == AFFINEMODEL_4PARAM;
4694
0
      bool savedParaAvail = cu.imv && ((m_affineMotion.affine4ParaRefIdx[iRefList] == iRefIdxTemp && affine4Para && m_affineMotion.affine4ParaAvail) ||
4695
0
        (m_affineMotion.affine6ParaRefIdx[iRefList] == iRefIdxTemp && !affine4Para && m_affineMotion.affine6ParaAvail));
4696
4697
0
      if (savedParaAvail)
4698
0
      {
4699
0
        Mv mvFour[3];
4700
0
        for (int i = 0; i < mvNum; i++)
4701
0
        {
4702
0
          mvFour[i] = affine4Para ? m_affineMotion.acMvAffine4Para[iRefList][i] : m_affineMotion.acMvAffine6Para[iRefList][i];
4703
0
          mvFour[i].roundAffinePrecInternal2Amvr(cu.imv);
4704
0
        }
4705
4706
0
        Distortion candCostInherit = xGetAffineTemplateCost(cu, origBuf, predBuf, mvFour, aaiMvpIdx[iRefList][iRefIdxTemp], AMVP_MAX_NUM_CANDS, refPicList, iRefIdxTemp);
4707
0
        candCostInherit += m_pcRdCost->getCost(xCalcAffineMVBits(cu, mvFour, cMvPred[iRefList][iRefIdxTemp]));
4708
4709
0
        if (candCostInherit < uiCandCost)
4710
0
        {
4711
0
          uiCandCost = candCostInherit;
4712
0
          memcpy(mvHevc, mvFour, 3 * sizeof(Mv));
4713
0
        }
4714
0
      }
4715
4716
0
      if( cu.affineType == AFFINEMODEL_4PARAM && m_AffineProfList->m_affMVListSize && (!cu.cs->sps->BCW || BcwIdx == BCW_DEFAULT ) )
4717
0
      {
4718
0
        int shift = MAX_CU_DEPTH;
4719
0
        for (int i = 0; i < m_AffineProfList->m_affMVListSize; i++)
4720
0
        {
4721
0
          AffineMVInfo *mvInfo = m_AffineProfList->m_affMVList + ((m_AffineProfList->m_affMVListIdx - i - 1 + m_AffineProfList->m_affMVListMaxSize) % (m_AffineProfList->m_affMVListMaxSize));
4722
          //check;
4723
0
          int j = 0;
4724
0
          for (; j < i; j++)
4725
0
          {
4726
0
            AffineMVInfo *prevMvInfo = m_AffineProfList->m_affMVList + ((m_AffineProfList->m_affMVListIdx - j - 1 + m_AffineProfList->m_affMVListMaxSize) % (m_AffineProfList->m_affMVListMaxSize));
4727
0
            if ((mvInfo->affMVs[iRefList][iRefIdxTemp][0] == prevMvInfo->affMVs[iRefList][iRefIdxTemp][0]) &&
4728
0
              (mvInfo->affMVs[iRefList][iRefIdxTemp][1] == prevMvInfo->affMVs[iRefList][iRefIdxTemp][1])
4729
0
              && (mvInfo->x == prevMvInfo->x) && (mvInfo->y == prevMvInfo->y)
4730
0
              && (mvInfo->w == prevMvInfo->w)
4731
0
              )
4732
0
            {
4733
0
              break;
4734
0
            }
4735
0
          }
4736
0
          if (j < i)
4737
0
            continue;
4738
4739
0
          Mv mvTmp[3], *nbMv = mvInfo->affMVs[iRefList][iRefIdxTemp];
4740
0
          int vx, vy;
4741
0
          int dMvHorX, dMvHorY, dMvVerX, dMvVerY;
4742
0
          int mvScaleHor = nbMv[0].hor * (1<< shift);
4743
0
          int mvScaleVer = nbMv[0].ver * (1<< shift);
4744
0
          Mv dMv = nbMv[1] - nbMv[0];
4745
0
          dMvHorX = dMv.hor *(1<<(shift - Log2(mvInfo->w)));
4746
0
          dMvHorY = dMv.ver *(1<< (shift - Log2(mvInfo->w)));
4747
0
          dMvVerX = -dMvHorY;
4748
0
          dMvVerY = dMvHorX;
4749
0
          vx = mvScaleHor + dMvHorX * (cu.Y().x - mvInfo->x) + dMvVerX * (cu.Y().y - mvInfo->y);
4750
0
          vy = mvScaleVer + dMvHorY * (cu.Y().x - mvInfo->x) + dMvVerY * (cu.Y().y - mvInfo->y);
4751
0
          roundAffineMv(vx, vy, shift);
4752
0
          mvTmp[0] = Mv(vx, vy);
4753
0
          mvTmp[0].clipToStorageBitDepth();
4754
0
          clipMv(mvTmp[0], cu.lumaPos(), cu.lumaSize(), *cu.cs->pcv);
4755
0
          mvTmp[0].roundAffinePrecInternal2Amvr(cu.imv);
4756
0
          vx = mvScaleHor + dMvHorX * (cu.Y().x + cu.Y().width - mvInfo->x) + dMvVerX * (cu.Y().y - mvInfo->y);
4757
0
          vy = mvScaleVer + dMvHorY * (cu.Y().x + cu.Y().width - mvInfo->x) + dMvVerY * (cu.Y().y - mvInfo->y);
4758
0
          roundAffineMv(vx, vy, shift);
4759
0
          mvTmp[1] = Mv(vx, vy);
4760
0
          mvTmp[1].clipToStorageBitDepth();
4761
0
          clipMv(mvTmp[1], cu.lumaPos(), cu.lumaSize(), *cu.cs->pcv);
4762
0
          mvTmp[0].roundAffinePrecInternal2Amvr(cu.imv);
4763
0
          mvTmp[1].roundAffinePrecInternal2Amvr(cu.imv);
4764
0
          Distortion tmpCost = xGetAffineTemplateCost(cu, origBuf, predBuf, mvTmp, aaiMvpIdx[iRefList][iRefIdxTemp], AMVP_MAX_NUM_CANDS, refPicList, iRefIdxTemp);
4765
0
          if (affineAmvrEnabled)
4766
0
          {
4767
0
            tmpCost += m_pcRdCost->getCost(xCalcAffineMVBits(cu, mvTmp, cMvPred[iRefList][iRefIdxTemp]));
4768
0
          }
4769
0
          if (tmpCost < uiCandCost)
4770
0
          {
4771
0
            uiCandCost = tmpCost;
4772
0
            std::memcpy(mvHevc, mvTmp, 3 * sizeof(Mv));
4773
0
          }
4774
0
        }
4775
0
      }
4776
0
      if (cu.affineType == AFFINEMODEL_6PARAM)
4777
0
      {
4778
0
        Mv mvFour[3];
4779
0
        mvFour[0] = mvAffine4Para[iRefList][iRefIdxTemp][0];
4780
0
        mvFour[1] = mvAffine4Para[iRefList][iRefIdxTemp][1];
4781
0
        mvAffine4Para[iRefList][iRefIdxTemp][0].roundAffinePrecInternal2Amvr(cu.imv);
4782
0
        mvAffine4Para[iRefList][iRefIdxTemp][1].roundAffinePrecInternal2Amvr(cu.imv);
4783
4784
0
        int shift = MAX_CU_DEPTH;
4785
0
        int vx2 = (mvFour[0].hor * (1<< shift)) - ((mvFour[1].ver - mvFour[0].ver) * (1<< (shift + Log2(cu.lheight()) - Log2(cu.lwidth()))));
4786
0
        int vy2 = (mvFour[0].ver * (1<< shift)) + ((mvFour[1].hor - mvFour[0].hor) * (1<< (shift + Log2(cu.lheight()) - Log2(cu.lwidth()))));
4787
0
        int offset = (1 << (shift - 1));
4788
0
        vx2 = (vx2 + offset - (vx2 >= 0)) >> shift;
4789
0
        vy2 = (vy2 + offset - (vy2 >= 0)) >> shift;
4790
0
        mvFour[2].hor = vx2;
4791
0
        mvFour[2].ver = vy2;
4792
0
        mvFour[2].clipToStorageBitDepth();
4793
0
        mvFour[0].roundAffinePrecInternal2Amvr(cu.imv);
4794
0
        mvFour[1].roundAffinePrecInternal2Amvr(cu.imv);
4795
0
        mvFour[2].roundAffinePrecInternal2Amvr(cu.imv);
4796
0
        Distortion uiCandCostInherit = xGetAffineTemplateCost(cu, origBuf, predBuf, mvFour, aaiMvpIdx[iRefList][iRefIdxTemp], AMVP_MAX_NUM_CANDS, refPicList, iRefIdxTemp);
4797
0
        if (affineAmvrEnabled)
4798
0
        {
4799
0
          uiCandCostInherit += m_pcRdCost->getCost(xCalcAffineMVBits(cu, mvFour, cMvPred[iRefList][iRefIdxTemp]));
4800
0
        }
4801
0
        if (uiCandCostInherit < uiCandCost)
4802
0
        {
4803
0
          uiCandCost = uiCandCostInherit;
4804
0
          for (int i = 0; i < 3; i++)
4805
0
          {
4806
0
            mvHevc[i] = mvFour[i];
4807
0
          }
4808
0
        }
4809
0
      }
4810
4811
0
      if (uiCandCost < biPDistTemp)
4812
0
      {
4813
0
        ::memcpy(tmp.affMVs[iRefList][iRefIdxTemp], mvHevc, sizeof(Mv) * 3);
4814
0
      }
4815
0
      else
4816
0
      {
4817
0
        ::memcpy(tmp.affMVs[iRefList][iRefIdxTemp], cMvPred[iRefList][iRefIdxTemp], sizeof(Mv) * 3);
4818
0
      }
4819
4820
      // GPB list 1, save the best MvpIdx, RefIdx and Cost
4821
0
      if (slice.picHeader->mvdL1Zero && iRefList == 1 && biPDistTemp < bestBiPDist)
4822
0
      {
4823
0
        bestBiPDist = biPDistTemp;
4824
0
        bestBiPMvpL1 = aaiMvpIdx[iRefList][iRefIdxTemp];
4825
0
        bestBiPRefIdxL1 = iRefIdxTemp;
4826
0
      }
4827
4828
      // Update bits
4829
0
      uiBitsTemp += m_auiMVPIdxCost[aaiMvpIdx[iRefList][iRefIdxTemp]][AMVP_MAX_NUM_CANDS];
4830
4831
0
      if (m_pcEncCfg->m_bFastMEForGenBLowDelayEnabled && iRefList == 1)   // list 1
4832
0
      {
4833
0
        if (slice.list1IdxToList0Idx[iRefIdxTemp] >= 0 && (cu.affineType != AFFINEMODEL_6PARAM || slice.list1IdxToList0Idx[iRefIdxTemp] == refIdx4Para[0]))
4834
0
        {
4835
0
          int iList1ToList0Idx = slice.list1IdxToList0Idx[iRefIdxTemp];
4836
0
          ::memcpy(tmp.affMVs[1][iRefIdxTemp], tmp.affMVs[0][iList1ToList0Idx], sizeof(Mv) * 3);
4837
0
          uiCostTemp = uiCostTempL0[iList1ToList0Idx];
4838
4839
0
          uiCostTemp -= m_pcRdCost->getCost(uiBitsTempL0[iList1ToList0Idx]);
4840
0
          uiBitsTemp += xCalcAffineMVBits(cu, tmp.affMVs[iRefList][iRefIdxTemp], cMvPred[iRefList][iRefIdxTemp]);
4841
          /*calculate the correct cost*/
4842
0
          uiCostTemp += m_pcRdCost->getCost(uiBitsTemp);
4843
0
          DTRACE(g_trace_ctx, D_COMMON, " (%d) uiCostTemp=%d\n", DTRACE_GET_COUNTER(g_trace_ctx, D_COMMON), uiCostTemp);
4844
0
        }
4845
0
        else
4846
0
        {
4847
0
          xAffineMotionEstimation(cu, origBuf, refPicList, cMvPred[iRefList][iRefIdxTemp], iRefIdxTemp, tmp.affMVs[iRefList][iRefIdxTemp], 
4848
0
                                  uiBitsTemp, uiCostTemp, aaiMvpIdx[iRefList][iRefIdxTemp], affiAMVPInfoTemp[refPicList]);
4849
0
        }
4850
0
      }
4851
0
      else
4852
0
      {
4853
0
        xAffineMotionEstimation(cu, origBuf, refPicList, cMvPred[iRefList][iRefIdxTemp], iRefIdxTemp, tmp.affMVs[iRefList][iRefIdxTemp], 
4854
0
                                uiBitsTemp, uiCostTemp, aaiMvpIdx[iRefList][iRefIdxTemp], affiAMVPInfoTemp[refPicList]);
4855
0
      }
4856
      
4857
0
      if( slice.sps->BCW && cu.BcwIdx == BCW_DEFAULT && slice.isInterB() )
4858
0
      {
4859
0
        m_uniMotions.setReadModeAffine( true, (uint8_t)iRefList, (uint8_t)iRefIdxTemp, cu.affineType );
4860
0
        m_uniMotions.copyAffineMvFrom( tmp.affMVs[iRefList][iRefIdxTemp], uiCostTemp - m_pcRdCost->getCost(uiBitsTemp), (uint8_t)iRefList, (uint8_t)iRefIdxTemp, cu.affineType,
4861
0
                                       aaiMvpIdx[iRefList][iRefIdxTemp] );
4862
0
      }
4863
4864
      // Set best AMVP Index
4865
0
      xCopyAffineAMVPInfo(affiAMVPInfoTemp[refPicList], aacAffineAMVPInfo[iRefList][iRefIdxTemp]);
4866
0
      if (cu.imv != 2)//|| !m_pcEncCfg->getUseAffineAmvrEncOpt())
4867
0
        xCheckBestAffineMVP(cu, affiAMVPInfoTemp[refPicList], refPicList, tmp.affMVs[iRefList][iRefIdxTemp], cMvPred[iRefList][iRefIdxTemp], aaiMvpIdx[iRefList][iRefIdxTemp], uiBitsTemp, uiCostTemp);
4868
4869
0
      if (iRefList == 0)
4870
0
      {
4871
0
        uiCostTempL0[iRefIdxTemp] = uiCostTemp;
4872
0
        uiBitsTempL0[iRefIdxTemp] = uiBitsTemp;
4873
0
      }
4874
0
      DTRACE(g_trace_ctx, D_COMMON, " (%d) uiCostTemp=%d, uiCost[iRefList]=%d\n", DTRACE_GET_COUNTER(g_trace_ctx, D_COMMON), uiCostTemp, uiCost[iRefList]);
4875
0
      if (uiCostTemp < uiCost[iRefList])
4876
0
      {
4877
0
        uiCost[iRefList] = uiCostTemp;
4878
0
        uiBits[iRefList] = uiBitsTemp; // storing for bi-prediction
4879
4880
                                       // set best motion
4881
0
        ::memcpy(aacMv[iRefList], tmp.affMVs[iRefList][iRefIdxTemp], sizeof(Mv) * 3);
4882
0
        iRefIdx[iRefList] = iRefIdxTemp;
4883
0
      }
4884
4885
0
      if (iRefList == 1 && uiCostTemp < costValidList1 && slice.list1IdxToList0Idx[iRefIdxTemp] < 0)
4886
0
      {
4887
0
        costValidList1 = uiCostTemp;
4888
0
        bitsValidList1 = uiBitsTemp;
4889
4890
        // set motion
4891
0
        memcpy(mvValidList1, tmp.affMVs[iRefList][iRefIdxTemp], sizeof(Mv) * 3);
4892
0
        refIdxValidList1 = iRefIdxTemp;
4893
0
      }
4894
0
    } // End refIdx loop
4895
0
  } // end Uni-prediction
4896
4897
0
  if (cu.affineType == AFFINEMODEL_4PARAM)
4898
0
  {
4899
0
    ::memcpy(mvAffine4Para, tmp.affMVs, sizeof(tmp.affMVs));
4900
0
    if (cu.imv == IMV_OFF)
4901
0
    {
4902
0
      m_AffineProfList->insert( tmp, cu.Y());
4903
0
    }
4904
0
  }
4905
4906
  // Bi-directional prediction
4907
0
  if (slice.isInterB() && !CU::isBipredRestriction(cu))
4908
0
  {
4909
0
    cu.interDir = 3;
4910
0
    m_isBi = true;
4911
4912
    // Set as best list0 and list1
4913
0
    iRefIdxBi[0] = iRefIdx[0];
4914
0
    iRefIdxBi[1] = iRefIdx[1];
4915
4916
0
    ::memcpy(cMvBi, aacMv, sizeof(aacMv));
4917
0
    ::memcpy(cMvPredBi, cMvPred, sizeof(cMvPred));
4918
0
    ::memcpy(aaiMvpIdxBi, aaiMvpIdx, sizeof(aaiMvpIdx));
4919
4920
0
    uint32_t uiMotBits[2];
4921
0
    bool doBiPred = true;
4922
4923
0
    if (slice.picHeader->mvdL1Zero) // GPB, list 1 only use Mvp
4924
0
    {
4925
0
      xCopyAffineAMVPInfo(aacAffineAMVPInfo[1][bestBiPRefIdxL1], affiAMVPInfoTemp[REF_PIC_LIST_1]);
4926
0
      cu.mvpIdx[REF_PIC_LIST_1] = bestBiPMvpL1;
4927
0
      aaiMvpIdxBi[1][bestBiPRefIdxL1] = bestBiPMvpL1;
4928
4929
      // Set Mv for list1
4930
0
      Mv pcMvTemp[3] = { affiAMVPInfoTemp[REF_PIC_LIST_1].mvCandLT[bestBiPMvpL1],
4931
0
                         affiAMVPInfoTemp[REF_PIC_LIST_1].mvCandRT[bestBiPMvpL1],
4932
0
                         affiAMVPInfoTemp[REF_PIC_LIST_1].mvCandLB[bestBiPMvpL1] };
4933
0
      ::memcpy(cMvPredBi[1][bestBiPRefIdxL1], pcMvTemp, sizeof(Mv) * 3);
4934
0
      ::memcpy(cMvBi[1], pcMvTemp, sizeof(Mv) * 3);
4935
0
      ::memcpy(tmp.affMVs[1][bestBiPRefIdxL1], pcMvTemp, sizeof(Mv) * 3);
4936
0
      iRefIdxBi[1] = bestBiPRefIdxL1;
4937
4938
0
      if( m_pcEncCfg->m_ifpLines && !xIsAffineMvInRangeFPP( cu, pcMvTemp, m_pcEncCfg->m_ifpLines ) )
4939
0
      {
4940
        // this mvp cannot be used for mv, skip Bi-pred
4941
0
        uiCostBi = MAX_DISTORTION;
4942
0
        doBiPred = false;
4943
0
      }
4944
0
      else
4945
0
      {
4946
4947
        // Get list1 prediction block
4948
0
        CU::setAllAffineMv(cu, cMvBi[1][0], cMvBi[1][1], cMvBi[1][2], REF_PIC_LIST_1);
4949
0
        cu.refIdx[REF_PIC_LIST_1] = iRefIdxBi[1];
4950
4951
0
        PelUnitBuf predBufTmp = m_tmpPredStorage[REF_PIC_LIST_1].getCompactBuf( cu );
4952
0
        motionCompensation(cu, predBufTmp, REF_PIC_LIST_1);
4953
4954
        // Update bits
4955
0
        uiMotBits[0] = uiBits[0] - uiMbBits[0];
4956
0
        uiMotBits[1] = uiMbBits[1];
4957
4958
0
        if (slice.numRefIdx[REF_PIC_LIST_1] > 1)
4959
0
        {
4960
0
          uiMotBits[1] += bestBiPRefIdxL1 + 1;
4961
0
          if (bestBiPRefIdxL1 == slice.numRefIdx[REF_PIC_LIST_1] - 1)
4962
0
          {
4963
0
            uiMotBits[1]--;
4964
0
          }
4965
0
        }
4966
0
        uiMotBits[1] += m_auiMVPIdxCost[aaiMvpIdxBi[1][bestBiPRefIdxL1]][AMVP_MAX_NUM_CANDS];
4967
0
        uiBits[2] = uiMbBits[2] + uiMotBits[0] + uiMotBits[1];
4968
0
      }
4969
0
    }
4970
0
    else
4971
0
    {
4972
0
      uiMotBits[0] = uiBits[0] - uiMbBits[0];
4973
0
      uiMotBits[1] = uiBits[1] - uiMbBits[1];
4974
0
      uiBits[2] = uiMbBits[2] + uiMotBits[0] + uiMotBits[1];
4975
0
    }
4976
4977
0
    if (doBiPred)
4978
0
    {
4979
      // 4-times iteration (default)
4980
0
      int iNumIter = 4;
4981
      // fast encoder setting or GPB: only one iteration
4982
0
      if (m_pcEncCfg->m_fastInterSearchMode == VVENC_FASTINTERSEARCH_MODE3 || m_pcEncCfg->m_fastInterSearchMode == VVENC_FASTINTERSEARCH_MODE2 || slice.picHeader->mvdL1Zero)
4983
0
      {
4984
0
        iNumIter = 1;
4985
0
      }
4986
4987
0
      for (int iIter = 0; iIter < iNumIter; iIter++)
4988
0
      {
4989
        // Set RefList
4990
0
        int iRefList = iIter % 2;
4991
0
        if (m_pcEncCfg->m_fastInterSearchMode == VVENC_FASTINTERSEARCH_MODE3 || m_pcEncCfg->m_fastInterSearchMode == VVENC_FASTINTERSEARCH_MODE2)
4992
0
        {
4993
0
          if (uiCost[0] <= uiCost[1])
4994
0
          {
4995
0
            iRefList = 1;
4996
0
          }
4997
0
          else
4998
0
          {
4999
0
            iRefList = 0;
5000
0
          }
5001
0
        }
5002
0
        else if (iIter == 0)
5003
0
        {
5004
0
          iRefList = 0;
5005
0
        }
5006
5007
        // First iterate, get prediction block of opposite direction
5008
0
        if (iIter == 0 && !slice.picHeader->mvdL1Zero)
5009
0
        {
5010
0
          if( m_pcEncCfg->m_ifpLines && !xIsAffineMvInRangeFPP( cu, aacMv[1 - iRefList], m_pcEncCfg->m_ifpLines ) )
5011
0
          {
5012
0
            continue;
5013
0
          }
5014
5015
0
          CU::setAllAffineMv(cu, aacMv[1 - iRefList][0], aacMv[1 - iRefList][1], aacMv[1 - iRefList][2], RefPicList(1 - iRefList));
5016
0
          cu.refIdx[1 - iRefList] = iRefIdx[1 - iRefList];
5017
5018
0
          PelUnitBuf predBufTmp = m_tmpPredStorage[1 - iRefList].getCompactBuf( cu );
5019
0
          motionCompensation(cu, predBufTmp, RefPicList(1 - iRefList));
5020
0
        }
5021
5022
0
        RefPicList refPicList = (iRefList ? REF_PIC_LIST_1 : REF_PIC_LIST_0);
5023
5024
0
        if (slice.picHeader->mvdL1Zero) // GPB, fix List 1, search List 0
5025
0
        {
5026
0
          iRefList = 0;
5027
0
          refPicList = REF_PIC_LIST_0;
5028
0
        }
5029
5030
0
        bool bChanged = false;
5031
5032
0
        iRefStart = 0;
5033
0
        iRefEnd = slice.numRefIdx[refPicList] - 1;
5034
0
        for (int iRefIdxTemp = iRefStart; iRefIdxTemp <= iRefEnd; iRefIdxTemp++)
5035
0
        {
5036
0
          if (cu.affineType == AFFINEMODEL_6PARAM && refIdx4Para[iRefList] != iRefIdxTemp)
5037
0
          {
5038
0
            continue;
5039
0
          }
5040
          // update bits
5041
0
          uiBitsTemp = uiMbBits[2] + uiMotBits[1 - iRefList];
5042
0
          uiBitsTemp += ( (cu.slice->sps->BCW == true) ? BcwIdxBits : 0 );
5043
0
          if (slice.numRefIdx[refPicList] > 1)
5044
0
          {
5045
0
            uiBitsTemp += iRefIdxTemp + 1;
5046
0
            if (iRefIdxTemp == slice.numRefIdx[refPicList] - 1)
5047
0
            {
5048
0
              uiBitsTemp--;
5049
0
            }
5050
0
          }
5051
0
          uiBitsTemp += m_auiMVPIdxCost[aaiMvpIdxBi[iRefList][iRefIdxTemp]][AMVP_MAX_NUM_CANDS];
5052
5053
          // call Affine ME
5054
0
          xAffineMotionEstimation(cu, origBuf, refPicList, cMvPredBi[iRefList][iRefIdxTemp], iRefIdxTemp, tmp.affMVs[iRefList][iRefIdxTemp], 
5055
0
                                  uiBitsTemp, uiCostTemp, aaiMvpIdxBi[iRefList][iRefIdxTemp], aacAffineAMVPInfo[iRefList][iRefIdxTemp], true);
5056
0
          xCopyAffineAMVPInfo(aacAffineAMVPInfo[iRefList][iRefIdxTemp], affiAMVPInfoTemp[refPicList]);
5057
0
          if (cu.imv != 2)
5058
0
          {
5059
0
            xCheckBestAffineMVP(cu, affiAMVPInfoTemp[refPicList], refPicList, tmp.affMVs[iRefList][iRefIdxTemp], cMvPredBi[iRefList][iRefIdxTemp], aaiMvpIdxBi[iRefList][iRefIdxTemp], uiBitsTemp, uiCostTemp);
5060
0
          }
5061
5062
0
          if (uiCostTemp < uiCostBi)
5063
0
          {
5064
0
            bChanged = true;
5065
0
            ::memcpy(cMvBi[iRefList], tmp.affMVs[iRefList][iRefIdxTemp], sizeof(Mv) * 3);
5066
0
            iRefIdxBi[iRefList] = iRefIdxTemp;
5067
5068
0
            uiCostBi = uiCostTemp;
5069
0
            uiMotBits[iRefList] = uiBitsTemp - uiMbBits[2] - uiMotBits[1 - iRefList];
5070
0
            uiMotBits[iRefList] -= ( (cu.slice->sps->BCW == true) ? BcwIdxBits : 0 );
5071
0
            uiBits[2] = uiBitsTemp;
5072
5073
0
            if (iNumIter != 1) // MC for next iter
5074
0
            {
5075
              //  Set motion
5076
0
              CU::setAllAffineMv(cu, cMvBi[iRefList][0], cMvBi[iRefList][1], cMvBi[iRefList][2], refPicList);
5077
0
              cu.refIdx[refPicList] = iRefIdxBi[refPicList];
5078
0
              PelUnitBuf predBufTmp = m_tmpPredStorage[iRefList].getCompactBuf( cu );
5079
0
              motionCompensation(cu, predBufTmp, refPicList);
5080
0
            }
5081
0
          }
5082
0
        } // for loop-iRefIdxTemp
5083
5084
0
        if (!bChanged)
5085
0
        {
5086
0
          if ((uiCostBi <= uiCost[0] && uiCostBi <= uiCost[1]) || enforceBcwPred)
5087
0
          {
5088
0
            xCopyAffineAMVPInfo(aacAffineAMVPInfo[0][iRefIdxBi[0]], affiAMVPInfoTemp[REF_PIC_LIST_0]);
5089
0
            xCheckBestAffineMVP(cu, affiAMVPInfoTemp[REF_PIC_LIST_0], REF_PIC_LIST_0, cMvBi[0], cMvPredBi[0][iRefIdxBi[0]], aaiMvpIdxBi[0][iRefIdxBi[0]], uiBits[2], uiCostBi);
5090
5091
0
            if (!slice.picHeader->mvdL1Zero)
5092
0
            {
5093
0
              xCopyAffineAMVPInfo(aacAffineAMVPInfo[1][iRefIdxBi[1]], affiAMVPInfoTemp[REF_PIC_LIST_1]);
5094
0
              xCheckBestAffineMVP(cu, affiAMVPInfoTemp[REF_PIC_LIST_1], REF_PIC_LIST_1, cMvBi[1], cMvPredBi[1][iRefIdxBi[1]], aaiMvpIdxBi[1][iRefIdxBi[1]], uiBits[2], uiCostBi);
5095
0
            }
5096
0
          }
5097
0
          break;
5098
0
        }
5099
0
      } // for loop-iter
5100
0
    }
5101
0
    m_isBi = false;
5102
0
  } // if (B_SLICE)
5103
5104
0
  cu.mv [REF_PIC_LIST_0][0] = Mv();
5105
0
  cu.mv [REF_PIC_LIST_1][0] = Mv();
5106
0
  cu.mvd[REF_PIC_LIST_0][0] = cMvZero;
5107
0
  cu.mvd[REF_PIC_LIST_1][0] = cMvZero;
5108
0
  cu.refIdx[REF_PIC_LIST_0] = NOT_VALID;
5109
0
  cu.refIdx[REF_PIC_LIST_1] = NOT_VALID;
5110
0
  cu.mvpIdx[REF_PIC_LIST_0] = NOT_VALID;
5111
0
  cu.mvpIdx[REF_PIC_LIST_1] = NOT_VALID;
5112
0
  cu.mvpNum[REF_PIC_LIST_0] = NOT_VALID;
5113
0
  cu.mvpNum[REF_PIC_LIST_1] = NOT_VALID;
5114
5115
0
  for (int verIdx = 0; verIdx < 3; verIdx++)
5116
0
  {
5117
0
    cu.mvd[REF_PIC_LIST_0][verIdx] = cMvZero;
5118
0
    cu.mvd[REF_PIC_LIST_1][verIdx] = cMvZero;
5119
0
  }
5120
5121
  // Set Motion Field
5122
0
  memcpy(aacMv[1], mvValidList1, sizeof(Mv) * 3);
5123
0
  iRefIdx[1] = refIdxValidList1;
5124
0
  uiBits[1] = bitsValidList1;
5125
0
  uiCost[1] = costValidList1;
5126
5127
0
  if (enforceBcwPred)
5128
0
  {
5129
0
    uiCost[0] = uiCost[1] = MAX_UINT;
5130
0
  }
5131
5132
  // Affine ME result set
5133
0
  if (uiCostBi <= uiCost[0] && uiCostBi <= uiCost[1]) // Bi
5134
0
  {
5135
0
    lastMode = 2;
5136
0
    affineCost = uiCostBi;
5137
0
    cu.interDir = 3;
5138
0
    CU::setAllAffineMv(cu, cMvBi[0][0], cMvBi[0][1], cMvBi[0][2], REF_PIC_LIST_0);
5139
0
    CU::setAllAffineMv(cu, cMvBi[1][0], cMvBi[1][1], cMvBi[1][2], REF_PIC_LIST_1);
5140
0
    cu.refIdx[REF_PIC_LIST_0] = iRefIdxBi[0];
5141
0
    cu.refIdx[REF_PIC_LIST_1] = iRefIdxBi[1];
5142
5143
0
    for (int verIdx = 0; verIdx < mvNum; verIdx++)
5144
0
    {
5145
0
      cu.mvd[REF_PIC_LIST_0][verIdx] = cMvBi[0][verIdx] - cMvPredBi[0][iRefIdxBi[0]][verIdx];
5146
0
      cu.mvd[REF_PIC_LIST_1][verIdx] = cMvBi[1][verIdx] - cMvPredBi[1][iRefIdxBi[1]][verIdx];
5147
0
      if (verIdx != 0)
5148
0
      {
5149
0
        cu.mvd[0][verIdx] = cu.mvd[0][verIdx] - cu.mvd[0][0];
5150
0
        cu.mvd[1][verIdx] = cu.mvd[1][verIdx] - cu.mvd[1][0];
5151
0
      }
5152
0
    }
5153
5154
5155
0
    cu.mvpIdx[REF_PIC_LIST_0] = aaiMvpIdxBi[0][iRefIdxBi[0]];
5156
0
    cu.mvpNum[REF_PIC_LIST_0] = aaiMvpNum[0][iRefIdxBi[0]];
5157
0
    cu.mvpIdx[REF_PIC_LIST_1] = aaiMvpIdxBi[1][iRefIdxBi[1]];
5158
0
    cu.mvpNum[REF_PIC_LIST_1] = aaiMvpNum[1][iRefIdxBi[1]];
5159
0
  }
5160
0
  else if (uiCost[0] <= uiCost[1]) // List 0
5161
0
  {
5162
0
    lastMode = 0;
5163
0
    affineCost = uiCost[0];
5164
0
    cu.interDir = 1;
5165
0
    CU::setAllAffineMv(cu, aacMv[0][0], aacMv[0][1], aacMv[0][2], REF_PIC_LIST_0);
5166
0
    cu.refIdx[REF_PIC_LIST_0] = iRefIdx[0];
5167
5168
0
    for (int verIdx = 0; verIdx < mvNum; verIdx++)
5169
0
    {
5170
0
      cu.mvd[REF_PIC_LIST_0][verIdx] = aacMv[0][verIdx] - cMvPred[0][iRefIdx[0]][verIdx];
5171
0
      if (verIdx != 0)
5172
0
      {
5173
0
        cu.mvd[0][verIdx] = cu.mvd[0][verIdx] - cu.mvd[0][0];
5174
0
      }
5175
0
    }
5176
5177
0
    cu.mvpIdx[REF_PIC_LIST_0] = aaiMvpIdx[0][iRefIdx[0]];
5178
0
    cu.mvpNum[REF_PIC_LIST_0] = aaiMvpNum[0][iRefIdx[0]];
5179
0
  }
5180
0
  else
5181
0
  {
5182
0
    lastMode = 1;
5183
0
    affineCost = uiCost[1];
5184
0
    cu.interDir = 2;
5185
0
    CU::setAllAffineMv(cu, aacMv[1][0], aacMv[1][1], aacMv[1][2], REF_PIC_LIST_1);
5186
0
    cu.refIdx[REF_PIC_LIST_1] = iRefIdx[1];
5187
5188
0
    for (int verIdx = 0; verIdx < mvNum; verIdx++)
5189
0
    {
5190
0
      cu.mvd[REF_PIC_LIST_1][verIdx] = aacMv[1][verIdx] - cMvPred[1][iRefIdx[1]][verIdx];
5191
0
      if (verIdx != 0)
5192
0
      {
5193
0
        cu.mvd[1][verIdx] = cu.mvd[1][verIdx] - cu.mvd[1][0];
5194
0
      }
5195
0
    }
5196
5197
0
    cu.mvpIdx[REF_PIC_LIST_1] = aaiMvpIdx[1][iRefIdx[1]];
5198
0
    cu.mvpNum[REF_PIC_LIST_1] = aaiMvpNum[1][iRefIdx[1]];
5199
0
  }
5200
0
  if (BcwIdx != BCW_DEFAULT)
5201
0
  {
5202
0
    cu.BcwIdx = BCW_DEFAULT;
5203
0
  }
5204
0
}
5205
5206
Distortion InterSearch::xGetAffineTemplateCost(CodingUnit& cu, CPelUnitBuf& origBuf, PelUnitBuf& predBuf, Mv acMvCand[3], int iMVPIdx, int iMVPNum, RefPicList refPicList, int iRefIdx)
5207
0
{
5208
0
  Distortion uiCost = MAX_DISTORTION;
5209
5210
0
  const Picture* picRef = cu.slice->getRefPic(refPicList, iRefIdx);
5211
5212
  // prediction pattern
5213
0
  Mv mv[3];
5214
0
  memcpy(mv, acMvCand, sizeof(mv));
5215
5216
0
  if( m_pcEncCfg->m_ifpLines && !xIsAffineMvInRangeFPP( cu, mv, m_pcEncCfg->m_ifpLines ) )
5217
0
  {
5218
0
    return MAX_DISTORTION>>1;  
5219
0
  }
5220
5221
0
  xPredAffineBlk(COMP_Y, cu, picRef, mv, predBuf, false, cu.slice->clpRngs[COMP_Y], refPicList);
5222
5223
  // calc distortion
5224
0
  uiCost = m_pcRdCost->getDistPart(origBuf.Y(), predBuf.Y(), cu.cs->sps->bitDepths[CH_L], COMP_Y, DF_HAD );
5225
0
  uiCost += m_pcRdCost->getCost(m_auiMVPIdxCost[iMVPIdx][iMVPNum]);
5226
5227
0
  DTRACE(g_trace_ctx, D_COMMON, " (%d) affineTemplateCost=%d\n", DTRACE_GET_COUNTER(g_trace_ctx, D_COMMON), uiCost);
5228
0
  return uiCost;
5229
0
}
5230
5231
void solveEqual(double** dEqualCoeff, int iOrder, double* dAffinePara)
5232
0
{
5233
0
  for (int k = 0; k < iOrder; k++)
5234
0
  {
5235
0
    dAffinePara[k] = 0.;
5236
0
  }
5237
5238
  // row echelon
5239
0
  for (int i = 1; i < iOrder; i++)
5240
0
  {
5241
    // find column max
5242
0
    double temp = fabs(dEqualCoeff[i][i - 1]);
5243
0
    int tempIdx = i;
5244
0
    for (int j = i + 1; j < iOrder + 1; j++)
5245
0
    {
5246
0
      if (fabs(dEqualCoeff[j][i - 1]) > temp)
5247
0
      {
5248
0
        temp = fabs(dEqualCoeff[j][i - 1]);
5249
0
        tempIdx = j;
5250
0
      }
5251
0
    }
5252
5253
    // swap line
5254
0
    if (tempIdx != i)
5255
0
    {
5256
0
      for (int j = 0; j < iOrder + 1; j++)
5257
0
      {
5258
0
        dEqualCoeff[0][j] = dEqualCoeff[i][j];
5259
0
        dEqualCoeff[i][j] = dEqualCoeff[tempIdx][j];
5260
0
        dEqualCoeff[tempIdx][j] = dEqualCoeff[0][j];
5261
0
      }
5262
0
    }
5263
5264
    // elimination first column
5265
0
    if (dEqualCoeff[i][i - 1] == 0.)
5266
0
    {
5267
0
      return;
5268
0
    }
5269
0
    for (int j = i + 1; j < iOrder + 1; j++)
5270
0
    {
5271
0
      for (int k = i; k < iOrder + 1; k++)
5272
0
      {
5273
0
        dEqualCoeff[j][k] = dEqualCoeff[j][k] - dEqualCoeff[i][k] * dEqualCoeff[j][i - 1] / dEqualCoeff[i][i - 1];
5274
0
      }
5275
0
    }
5276
0
  }
5277
5278
0
  if (dEqualCoeff[iOrder][iOrder - 1] == 0.)
5279
0
  {
5280
0
    return;
5281
0
  }
5282
0
  dAffinePara[iOrder - 1] = dEqualCoeff[iOrder][iOrder] / dEqualCoeff[iOrder][iOrder - 1];
5283
0
  for (int i = iOrder - 2; i >= 0; i--)
5284
0
  {
5285
0
    if (dEqualCoeff[i + 1][i] == 0.)
5286
0
    {
5287
0
      for (int k = 0; k < iOrder; k++)
5288
0
      {
5289
0
        dAffinePara[k] = 0.;
5290
0
      }
5291
0
      return;
5292
0
    }
5293
0
    double temp = 0;
5294
0
    for (int j = i + 1; j < iOrder; j++)
5295
0
    {
5296
0
      temp += dEqualCoeff[i + 1][j] * dAffinePara[j];
5297
0
    }
5298
0
    dAffinePara[i] = (dEqualCoeff[i + 1][iOrder] - temp) / dEqualCoeff[i + 1][i];
5299
0
  }
5300
0
}
5301
5302
void InterSearch::xCheckBestAffineMVP(CodingUnit& cu, AffineAMVPInfo &affineAMVPInfo, RefPicList refPicList, Mv acMv[3], Mv acMvPred[3], int& riMVPIdx, uint32_t& ruiBits, Distortion& ruiCost)
5303
0
{
5304
0
  if (affineAMVPInfo.numCand < 2)
5305
0
  {
5306
0
    return;
5307
0
  }
5308
5309
0
  int mvNum = cu.affineType ? 3 : 2;
5310
5311
0
  m_pcRdCost->selectMotionLambda();
5312
0
  m_pcRdCost->setCostScale(0);
5313
5314
0
  int iBestMVPIdx = riMVPIdx;
5315
5316
  // Get origin MV bits
5317
0
  Mv tmpPredMv[3];
5318
0
  int iOrgMvBits = xCalcAffineMVBits(cu, acMv, acMvPred);
5319
0
  iOrgMvBits += m_auiMVPIdxCost[riMVPIdx][AMVP_MAX_NUM_CANDS];
5320
5321
0
  int iBestMvBits = iOrgMvBits;
5322
0
  for (int iMVPIdx = 0; iMVPIdx < affineAMVPInfo.numCand; iMVPIdx++)
5323
0
  {
5324
0
    if (iMVPIdx == riMVPIdx)
5325
0
    {
5326
0
      continue;
5327
0
    }
5328
0
    tmpPredMv[0] = affineAMVPInfo.mvCandLT[iMVPIdx];
5329
0
    tmpPredMv[1] = affineAMVPInfo.mvCandRT[iMVPIdx];
5330
0
    if (mvNum == 3)
5331
0
    {
5332
0
      tmpPredMv[2] = affineAMVPInfo.mvCandLB[iMVPIdx];
5333
0
    }
5334
0
    int iMvBits = xCalcAffineMVBits(cu, acMv, tmpPredMv);
5335
0
    iMvBits += m_auiMVPIdxCost[iMVPIdx][AMVP_MAX_NUM_CANDS];
5336
5337
0
    if (iMvBits < iBestMvBits)
5338
0
    {
5339
0
      iBestMvBits = iMvBits;
5340
0
      iBestMVPIdx = iMVPIdx;
5341
0
    }
5342
0
  }
5343
5344
0
  if (iBestMVPIdx != riMVPIdx)  // if changed
5345
0
  {
5346
0
    acMvPred[0] = affineAMVPInfo.mvCandLT[iBestMVPIdx];
5347
0
    acMvPred[1] = affineAMVPInfo.mvCandRT[iBestMVPIdx];
5348
0
    acMvPred[2] = affineAMVPInfo.mvCandLB[iBestMVPIdx];
5349
0
    riMVPIdx = iBestMVPIdx;
5350
0
    uint32_t uiOrgBits = ruiBits;
5351
0
    ruiBits = uiOrgBits - iOrgMvBits + iBestMvBits;
5352
0
    ruiCost = (ruiCost - m_pcRdCost->getCost(uiOrgBits)) + m_pcRdCost->getCost(ruiBits);
5353
0
  }
5354
0
}
5355
5356
void InterSearch::xAffineMotionEstimation(CodingUnit& cu,
5357
  CPelUnitBuf&    origBuf,
5358
  RefPicList      refPicList,
5359
  Mv              acMvPred[3],
5360
  int             iRefIdxPred,
5361
  Mv              acMv[3],
5362
  uint32_t&       ruiBits,
5363
  Distortion&     ruiCost,
5364
  int&            mvpIdx,
5365
  const AffineAMVPInfo& aamvpi,
5366
  bool            bBi)
5367
0
{
5368
0
  if( cu.cs->sps->BCW && cu.BcwIdx != BCW_DEFAULT && !bBi && xReadBufferedAffineUniMv( cu, refPicList, iRefIdxPred, acMvPred, acMv, ruiBits, ruiCost, mvpIdx, aamvpi ) )
5369
0
  {
5370
0
    return;
5371
0
  }
5372
5373
0
  int bestMvpIdx = mvpIdx;
5374
0
  const int width = cu.Y().width;
5375
0
  const int height = cu.Y().height;
5376
5377
0
  const Picture* refPic = cu.slice->getRefPic(refPicList, iRefIdxPred);
5378
5379
  // Set Origin YUV: pcYuv
5380
0
  CPelUnitBuf*   pBuf = &origBuf;
5381
0
  double        fWeight = 1.0;
5382
5383
0
  CPelUnitBuf  origBufTmpCnst;
5384
5385
  // if Bi, set to ( 2 * Org - ListX )
5386
0
  if (bBi)
5387
0
  {
5388
0
    PelUnitBuf  origBufTmp = m_tmpStorageLCU.getCompactBuf(cu);
5389
    // NOTE: Other buf contains predicted signal from another direction
5390
0
    PelUnitBuf otherBuf = m_tmpPredStorage[1 - (int)refPicList].getCompactBuf( cu );
5391
0
    origBufTmp.copyFrom(origBuf);
5392
0
    origBufTmp.removeHighFreq(otherBuf, m_pcEncCfg->m_bClipForBiPredMeEnabled, cu.slice->clpRngs);
5393
5394
0
    origBufTmpCnst = origBufTmp;
5395
0
    pBuf           = &origBufTmpCnst;
5396
0
    fWeight        = xGetMEDistortionWeight(cu.BcwIdx, refPicList);
5397
0
  }
5398
5399
  // pred YUV
5400
0
  PelUnitBuf  predBuf = m_tmpAffiStorage.getCompactBuf(cu);
5401
5402
  // Set start Mv position, use input mv as started search mv
5403
0
  Mv acMvTemp[3];
5404
0
  ::memcpy(acMvTemp, acMv, sizeof(Mv) * 3);
5405
  // Set delta mv
5406
  // malloc buffer
5407
0
  int iParaNum = cu.affineType ? 7 : 5;
5408
0
  int affineParaNum = iParaNum - 1;
5409
0
  int mvNum = cu.affineType ? 3 : 2;
5410
0
  double **pdEqualCoeff;
5411
0
  pdEqualCoeff = new double *[iParaNum];
5412
0
  for (int i = 0; i < iParaNum; i++)
5413
0
  {
5414
0
    pdEqualCoeff[i] = new double[iParaNum];
5415
0
  }
5416
5417
0
  int64_t  i64EqualCoeff[7][7];
5418
0
  Pel    *piError = m_tmpAffiError;
5419
0
  Pel    *pdDerivate[2];
5420
0
  pdDerivate[0] = m_tmpAffiDeri[0];
5421
0
  pdDerivate[1] = m_tmpAffiDeri[1];
5422
5423
0
  Distortion uiCostBest = MAX_DISTORTION;
5424
0
  uint32_t uiBitsBest = 0;
5425
5426
  // do motion compensation with origin mv
5427
5428
0
  clipMv(acMvTemp[0], cu.lumaPos(), cu.lumaSize(), *cu.cs->pcv);
5429
0
  clipMv(acMvTemp[1], cu.lumaPos(), cu.lumaSize(), *cu.cs->pcv);
5430
0
  if (cu.affineType == AFFINEMODEL_6PARAM)
5431
0
  {
5432
0
    clipMv(acMvTemp[2], cu.lumaPos(), cu.lumaSize(), *cu.cs->pcv);
5433
0
  }
5434
5435
0
  acMvTemp[0].roundAffinePrecInternal2Amvr(cu.imv);
5436
0
  acMvTemp[1].roundAffinePrecInternal2Amvr(cu.imv);
5437
0
  if (cu.affineType == AFFINEMODEL_6PARAM)
5438
0
  {
5439
0
    acMvTemp[2].roundAffinePrecInternal2Amvr(cu.imv);
5440
0
  }
5441
0
  if( !m_pcEncCfg->m_ifpLines || xIsAffineMvInRangeFPP( cu, acMvTemp, m_pcEncCfg->m_ifpLines ) )
5442
0
  {
5443
0
    xPredAffineBlk(COMP_Y, cu, refPic, acMvTemp, predBuf, false, cu.cs->slice->clpRngs[COMP_Y], refPicList);
5444
5445
    // get error
5446
0
    uiCostBest = m_pcRdCost->getDistPart(predBuf.Y(), pBuf->Y(), cu.cs->sps->bitDepths[CH_L], COMP_Y, DF_HAD);
5447
5448
    // get cost with mv
5449
0
    m_pcRdCost->setCostScale(0);
5450
0
    uiBitsBest = ruiBits;
5451
0
    DTRACE(g_trace_ctx, D_COMMON, " (%d) xx uiBitsBest=%d\n", DTRACE_GET_COUNTER(g_trace_ctx, D_COMMON), uiBitsBest);
5452
0
    uiBitsBest += xCalcAffineMVBits(cu, acMvTemp, acMvPred);
5453
0
    DTRACE(g_trace_ctx, D_COMMON, " (%d) yy uiBitsBest=%d\n", DTRACE_GET_COUNTER(g_trace_ctx, D_COMMON), uiBitsBest);
5454
0
    uiCostBest = (Distortion)(floor(fWeight * (double)uiCostBest) + (double)m_pcRdCost->getCost(uiBitsBest));
5455
5456
0
    DTRACE(g_trace_ctx, D_COMMON, " (%d) uiBitsBest=%d, uiCostBest=%d\n", DTRACE_GET_COUNTER(g_trace_ctx, D_COMMON), uiBitsBest, uiCostBest);
5457
5458
0
    ::memcpy(acMv, acMvTemp, sizeof(Mv) * 3);
5459
0
  }
5460
0
  const int predBufStride = predBuf.Y().stride;
5461
0
  Mv prevIterMv[7][3];
5462
0
  int iIterTime;
5463
0
  if (cu.affineType == AFFINEMODEL_6PARAM)
5464
0
  {
5465
0
    iIterTime = bBi ? 3 : 4;
5466
0
  }
5467
0
  else
5468
0
  {
5469
0
    iIterTime = bBi ? 3 : 5;
5470
0
  }
5471
5472
0
  if (!cu.cs->sps->AffineType)// getUseAffineType())
5473
0
  {
5474
0
    iIterTime = bBi ? 5 : 7;
5475
0
  }
5476
5477
0
  for (int iter = 0; iter<iIterTime; iter++)    // iterate loop
5478
0
  {
5479
0
    memcpy(prevIterMv[iter], acMvTemp, sizeof(Mv) * 3);
5480
    /*********************************************************************************
5481
    *                         use gradient to update mv
5482
    *********************************************************************************/
5483
    // get Error Matrix
5484
0
    PelBuf( piError, width, height ).subtract( pBuf->Y(), predBuf.Y() );
5485
5486
    // sobel x direction
5487
    // -1 0 1
5488
    // -2 0 2
5489
    // -1 0 1
5490
0
    Pel* pPred = predBuf.Y().buf;
5491
0
    m_HorizontalSobelFilter(pPred, predBufStride, pdDerivate[0], width, width, height);
5492
5493
    // sobel y direction
5494
    // -1 -2 -1
5495
    //  0  0  0
5496
    //  1  2  1
5497
0
    m_VerticalSobelFilter(pPred, predBufStride, pdDerivate[1], width, width, height);
5498
5499
    // solve delta x and y
5500
0
    for (int row = 0; row < iParaNum; row++)
5501
0
    {
5502
0
      memset(&i64EqualCoeff[row][0], 0, iParaNum * sizeof(int64_t));
5503
0
    }
5504
5505
0
    m_EqualCoeffComputer[cu.affineType]( piError, width, pdDerivate, width, width, height, i64EqualCoeff );
5506
5507
0
    for (int row = 0; row < iParaNum; row++)
5508
0
    {
5509
0
      for (int i = 0; i < iParaNum; i++)
5510
0
      {
5511
0
        pdEqualCoeff[row][i] = (double)i64EqualCoeff[row][i];
5512
0
      }
5513
0
    }
5514
5515
0
    double dAffinePara[6];
5516
0
    double dDeltaMv[6];
5517
0
    Mv acDeltaMv[3];
5518
5519
0
    solveEqual(pdEqualCoeff, affineParaNum, dAffinePara);
5520
5521
    // convert to delta mv
5522
0
    dDeltaMv[0] = dAffinePara[0];
5523
0
    dDeltaMv[2] = dAffinePara[2];
5524
0
    const bool extParams = cu.affineType == AFFINEMODEL_6PARAM;
5525
0
    if (extParams)
5526
0
    {
5527
0
      dDeltaMv[1] = dAffinePara[1] * width + dAffinePara[0];
5528
0
      dDeltaMv[3] = dAffinePara[3] * width + dAffinePara[2];
5529
0
      dDeltaMv[4] = dAffinePara[4] * height + dAffinePara[0];
5530
0
      dDeltaMv[5] = dAffinePara[5] * height + dAffinePara[2];
5531
0
    }
5532
0
    else
5533
0
    {
5534
0
      dDeltaMv[1] = dAffinePara[1] * width + dAffinePara[0];
5535
0
      dDeltaMv[3] = -dAffinePara[3] * width + dAffinePara[2];
5536
0
    }
5537
5538
0
    const int normShiftTab[3] = { MV_PRECISION_QUARTER - MV_PRECISION_INT, MV_PRECISION_SIXTEENTH - MV_PRECISION_INT, MV_PRECISION_QUARTER - MV_PRECISION_INT };
5539
0
    const int stepShiftTab[3] = { MV_PRECISION_INTERNAL - MV_PRECISION_QUARTER, MV_PRECISION_INTERNAL - MV_PRECISION_SIXTEENTH, MV_PRECISION_INTERNAL - MV_PRECISION_QUARTER };
5540
0
    const int multiShift = 1 << normShiftTab[cu.imv];
5541
0
    const int mvShift = stepShiftTab[cu.imv];
5542
5543
0
    acDeltaMv[0] = Mv((int)(dDeltaMv[0] * multiShift + SIGN(dDeltaMv[0]) * 0.5) * (1<< mvShift), (int)(dDeltaMv[2] * multiShift + SIGN(dDeltaMv[2]) * 0.5) * (1<< mvShift));
5544
0
    acDeltaMv[1] = Mv((int)(dDeltaMv[1] * multiShift + SIGN(dDeltaMv[1]) * 0.5) * (1<< mvShift), (int)(dDeltaMv[3] * multiShift + SIGN(dDeltaMv[3]) * 0.5) * (1<< mvShift));
5545
0
    if (extParams)
5546
0
    {
5547
0
      acDeltaMv[2] = Mv((int)(dDeltaMv[4] * multiShift + SIGN(dDeltaMv[4]) * 0.5) *  (1<< mvShift), (int)(dDeltaMv[5] * multiShift + SIGN(dDeltaMv[5]) * 0.5) *  (1<< mvShift));
5548
0
    }
5549
0
    bool bAllZero = false;
5550
0
    for (int i = 0; i < mvNum; i++)
5551
0
    {
5552
0
      Mv deltaMv = acDeltaMv[i];
5553
0
      if (cu.imv == IMV_4PEL)
5554
0
      {
5555
0
        deltaMv.roundToPrecision(MV_PRECISION_INTERNAL, MV_PRECISION_HALF);
5556
0
      }
5557
0
      if (deltaMv.hor != 0 || deltaMv.ver != 0)
5558
0
      {
5559
0
        bAllZero = false;
5560
0
        break;
5561
0
      }
5562
0
      bAllZero = true;
5563
0
    }
5564
5565
0
    if (bAllZero)
5566
0
      break;
5567
5568
    // do motion compensation with updated mv
5569
0
    for (int i = 0; i < mvNum; i++)
5570
0
    {
5571
0
      acMvTemp[i] += acDeltaMv[i];
5572
0
      acMvTemp[i].hor = Clip3(MV_MIN, MV_MAX, acMvTemp[i].hor);
5573
0
      acMvTemp[i].ver = Clip3(MV_MIN, MV_MAX, acMvTemp[i].ver);
5574
0
      acMvTemp[i].roundAffinePrecInternal2Amvr(cu.imv);
5575
5576
0
      clipMv(acMvTemp[i], cu.lumaPos(), cu.lumaSize(), *cu.cs->pcv);
5577
0
    }
5578
5579
0
    if( !m_pcEncCfg->m_ifpLines || xIsAffineMvInRangeFPP( cu, acMvTemp, m_pcEncCfg->m_ifpLines ) )
5580
0
    {
5581
0
      xPredAffineBlk(COMP_Y, cu, refPic, acMvTemp, predBuf, false, cu.slice->clpRngs[COMP_Y], refPicList);
5582
5583
      // get error
5584
0
      Distortion uiCostTemp = m_pcRdCost->getDistPart(predBuf.Y(), pBuf->Y(), cu.cs->sps->bitDepths[CH_L], COMP_Y, DF_HAD);
5585
0
      DTRACE(g_trace_ctx, D_COMMON, " (%d) uiCostTemp=%d\n", DTRACE_GET_COUNTER(g_trace_ctx, D_COMMON), uiCostTemp);
5586
5587
      // get cost with mv
5588
0
      m_pcRdCost->setCostScale(0);
5589
0
      uint32_t uiBitsTemp = ruiBits;
5590
0
      uiBitsTemp += xCalcAffineMVBits(cu, acMvTemp, acMvPred);
5591
0
      uiCostTemp = (Distortion)(floor(fWeight * (double)uiCostTemp) + (double)m_pcRdCost->getCost(uiBitsTemp));
5592
5593
      // store best cost and mv
5594
0
      if (uiCostTemp < uiCostBest)
5595
0
      {
5596
0
        uiCostBest = uiCostTemp;
5597
0
        uiBitsBest = uiBitsTemp;
5598
0
        memcpy(acMv, acMvTemp, sizeof(Mv) * 3);
5599
0
        mvpIdx = bestMvpIdx;
5600
0
      }
5601
0
      else if(m_pcEncCfg->m_Affine > 1)
5602
0
      {
5603
0
        break;
5604
0
      }
5605
0
    }
5606
0
  }
5607
5608
0
  auto checkCPMVRdCost = [&](Mv ctrlPtMv[3])
5609
0
  {
5610
0
    if( !m_pcEncCfg->m_ifpLines || xIsAffineMvInRangeFPP( cu, ctrlPtMv, m_pcEncCfg->m_ifpLines ) )
5611
0
    {
5612
0
      xPredAffineBlk(COMP_Y, cu, refPic, ctrlPtMv, predBuf, false, cu.slice->clpRngs[COMP_Y], refPicList);
5613
      // get error
5614
0
      Distortion costTemp = m_pcRdCost->getDistPart(predBuf.Y(), pBuf->Y(), cu.cs->sps->bitDepths[CH_L], COMP_Y, DF_HAD);
5615
      // get cost with mv
5616
0
      m_pcRdCost->setCostScale(0);
5617
0
      uint32_t bitsTemp = ruiBits;
5618
0
      bitsTemp += xCalcAffineMVBits(cu, ctrlPtMv, acMvPred);
5619
0
      costTemp = (Distortion)(floor(fWeight * (double)costTemp) + (double)m_pcRdCost->getCost(bitsTemp));
5620
      // store best cost and mv
5621
0
      if (costTemp < uiCostBest)
5622
0
      {
5623
0
        uiCostBest = costTemp;
5624
0
        uiBitsBest = bitsTemp;
5625
0
        ::memcpy(acMv, ctrlPtMv, sizeof(Mv) * 3);
5626
0
      }
5627
0
    }
5628
0
  };
5629
5630
0
  const uint32_t mvShiftTable[3] = { MV_PRECISION_INTERNAL - MV_PRECISION_QUARTER, MV_PRECISION_INTERNAL - MV_PRECISION_INTERNAL, MV_PRECISION_INTERNAL - MV_PRECISION_INT };
5631
0
  const uint32_t mvShift = mvShiftTable[cu.imv];
5632
0
  if (uiCostBest <= AFFINE_ME_LIST_MVP_TH*m_hevcCost)
5633
0
  {
5634
0
    Mv mvPredTmp[3] = { acMvPred[0], acMvPred[1], acMvPred[2] };
5635
0
    Mv mvME[3];
5636
0
    ::memcpy(mvME, acMv, sizeof(Mv) * 3);
5637
0
    Mv dMv = mvME[0] - mvPredTmp[0];
5638
5639
0
    for (int j = 0; j < mvNum; j++)
5640
0
    {
5641
0
      if ((!j && mvME[j] != mvPredTmp[j]) || (j && mvME[j] != (mvPredTmp[j] + dMv)))
5642
0
      {
5643
0
        ::memcpy(acMvTemp, mvME, sizeof(Mv) * 3);
5644
0
        acMvTemp[j] = mvPredTmp[j];
5645
5646
0
        if (j)
5647
0
          acMvTemp[j] += dMv;
5648
5649
0
        checkCPMVRdCost(acMvTemp);
5650
0
      }
5651
0
    }
5652
5653
    //keep the rotation/zoom;
5654
0
    if (mvME[0] != mvPredTmp[0])
5655
0
    {
5656
0
      ::memcpy(acMvTemp, mvME, sizeof(Mv) * 3);
5657
0
      for (int i = 1; i < mvNum; i++)
5658
0
      {
5659
0
        acMvTemp[i] -= dMv;
5660
0
      }
5661
0
      acMvTemp[0] = mvPredTmp[0];
5662
5663
0
      checkCPMVRdCost(acMvTemp);
5664
0
    }
5665
5666
    //keep the translation;
5667
0
    if (cu.affineType == AFFINEMODEL_6PARAM && mvME[1] != (mvPredTmp[1] + dMv) && mvME[2] != (mvPredTmp[2] + dMv))
5668
0
    {
5669
0
      ::memcpy(acMvTemp, mvME, sizeof(Mv) * 3);
5670
5671
0
      acMvTemp[1] = mvPredTmp[1] + dMv;
5672
0
      acMvTemp[2] = mvPredTmp[2] + dMv;
5673
5674
0
      checkCPMVRdCost(acMvTemp);
5675
0
    }
5676
5677
    // 8 nearest neighbor search
5678
0
    int testPos[8][2] = { { -1, 0 },{ 0, -1 },{ 0, 1 },{ 1, 0 },{ -1, -1 },{ -1, 1 },{ 1, 1 },{ 1, -1 } };
5679
0
    const int maxSearchRound = 3;
5680
5681
0
    for (int rnd = 0; rnd < maxSearchRound; rnd++)
5682
0
    {
5683
0
      bool modelChange = false;
5684
      //search the model parameters with finear granularity;
5685
0
      for (int j = 0; j < mvNum; j++)
5686
0
      {
5687
0
        bool loopChange = false;
5688
0
        for (int iter = 0; iter < 2; iter++)
5689
0
        {
5690
0
          if (iter == 1 && !loopChange)
5691
0
          {
5692
0
            break;
5693
0
          }
5694
0
          Mv centerMv[3];
5695
0
          memcpy(centerMv, acMv, sizeof(Mv) * 3);
5696
0
          memcpy(acMvTemp, acMv, sizeof(Mv) * 3);
5697
5698
0
          for (int i = ((iter == 0) ? 0 : 4); i < ((iter == 0) ? 4 : 8); i++)
5699
0
          {
5700
0
            acMvTemp[j].set(centerMv[j].hor + (testPos[i][0] * (1 << mvShift)), centerMv[j].ver + (testPos[i][1] * (1 << mvShift)));
5701
0
            clipMv(acMvTemp[j], cu.lumaPos(), cu.lumaSize(), *cu.cs->pcv);
5702
5703
0
            if( !m_pcEncCfg->m_ifpLines || xIsAffineMvInRangeFPP( cu, acMvTemp, m_pcEncCfg->m_ifpLines ) )
5704
0
            {
5705
0
              xPredAffineBlk(COMP_Y, cu, refPic, acMvTemp, predBuf, false, cu.slice->clpRngs[COMP_Y], refPicList);
5706
5707
0
              Distortion costTemp = m_pcRdCost->getDistPart(predBuf.Y(), pBuf->Y(), cu.cs->sps->bitDepths[CH_L], COMP_Y, DF_HAD);
5708
0
              uint32_t bitsTemp = ruiBits;
5709
0
              bitsTemp += xCalcAffineMVBits(cu, acMvTemp, acMvPred);
5710
0
              costTemp = (Distortion)(floor(fWeight * (double)costTemp) + (double)m_pcRdCost->getCost(bitsTemp));
5711
5712
0
              if (costTemp < uiCostBest)
5713
0
              {
5714
0
                uiCostBest = costTemp;
5715
0
                uiBitsBest = bitsTemp;
5716
0
                ::memcpy(acMv, acMvTemp, sizeof(Mv) * 3);
5717
0
                modelChange = true;
5718
0
                loopChange = true;
5719
0
              }
5720
0
            }
5721
0
          }
5722
0
        }
5723
0
      }
5724
5725
0
      if (!modelChange)
5726
0
      {
5727
0
        break;
5728
0
      }
5729
0
    }
5730
0
  }
5731
0
  acMvPred[0] = aamvpi.mvCandLT[mvpIdx];
5732
0
  acMvPred[1] = aamvpi.mvCandRT[mvpIdx];
5733
0
  acMvPred[2] = aamvpi.mvCandLB[mvpIdx];
5734
5735
  // free buffer
5736
0
  for (int i = 0; i<iParaNum; i++)
5737
0
    delete[]pdEqualCoeff[i];
5738
0
  delete[]pdEqualCoeff;
5739
5740
0
  ruiBits = uiBitsBest;
5741
0
  ruiCost = uiCostBest;
5742
0
  DTRACE(g_trace_ctx, D_COMMON, " (%d) uiBitsBest=%d, uiCostBest=%d\n", DTRACE_GET_COUNTER(g_trace_ctx, D_COMMON), uiBitsBest, uiCostBest);
5743
0
}
5744
5745
bool InterSearch::xEstimateAffineAMVP(CodingUnit& cu, AffineAMVPInfo& affineAMVPInfo, CPelUnitBuf& origBuf, RefPicList refPicList, int iRefIdx, Mv acMvPred[3], Distortion& distBiP)
5746
0
{
5747
0
  Mv         bestMvLT, bestMvRT, bestMvLB;
5748
0
  int        iBestIdx = 0;
5749
0
  Distortion uiBestCost = MAX_DISTORTION;
5750
5751
  // Fill the MV Candidates
5752
0
  CU::fillAffineMvpCand(cu, refPicList, iRefIdx, affineAMVPInfo);
5753
0
  CHECK(affineAMVPInfo.numCand == 0, "Assertion failed.");
5754
5755
0
  PelUnitBuf predBuf = m_tmpStorageLCU.getCompactBuf( cu );
5756
5757
0
  bool stop_check = false;
5758
0
  if (affineAMVPInfo.mvCandLT[0] == affineAMVPInfo.mvCandLT[1])
5759
0
  {
5760
0
    if ((affineAMVPInfo.mvCandRT[0] == affineAMVPInfo.mvCandRT[1]) && (affineAMVPInfo.mvCandLB[0] == affineAMVPInfo.mvCandLB[1]))
5761
0
    {
5762
0
      stop_check = true;
5763
0
    }
5764
0
  }
5765
5766
  // initialize Mvp index & Mvp
5767
0
  iBestIdx = -1;
5768
0
  for (int i = 0; i < affineAMVPInfo.numCand; i++)
5769
0
  {
5770
0
    if (i && stop_check)
5771
0
    {
5772
0
      continue;
5773
0
    }
5774
0
    Mv mv[3] = { affineAMVPInfo.mvCandLT[i], affineAMVPInfo.mvCandRT[i], affineAMVPInfo.mvCandLB[i] };
5775
0
    Distortion uiTmpCost = xGetAffineTemplateCost(cu, origBuf, predBuf, mv, i, AMVP_MAX_NUM_CANDS, refPicList, iRefIdx);
5776
5777
0
    if (uiBestCost > uiTmpCost)
5778
0
    {
5779
0
      uiBestCost = uiTmpCost;
5780
0
      bestMvLT = affineAMVPInfo.mvCandLT[i];
5781
0
      bestMvRT = affineAMVPInfo.mvCandRT[i];
5782
0
      bestMvLB = affineAMVPInfo.mvCandLB[i];
5783
0
      iBestIdx = i;
5784
0
      distBiP  = uiTmpCost;
5785
0
    }
5786
0
  }
5787
5788
0
  if( iBestIdx < 0 )
5789
0
    return false;
5790
5791
  // Setting Best MVP
5792
0
  acMvPred[0] = bestMvLT;
5793
0
  acMvPred[1] = bestMvRT;
5794
0
  acMvPred[2] = bestMvLB;
5795
5796
0
  cu.mvpIdx[refPicList] = iBestIdx;
5797
0
  cu.mvpNum[refPicList] = affineAMVPInfo.numCand;
5798
0
  DTRACE(g_trace_ctx, D_COMMON, "#estAffi=%d \n", affineAMVPInfo.numCand);
5799
0
  return true;
5800
0
}
5801
5802
void InterSearch::xCopyAffineAMVPInfo(AffineAMVPInfo& src, AffineAMVPInfo& dst)
5803
0
{
5804
0
  dst.numCand = src.numCand;
5805
0
  DTRACE(g_trace_ctx, D_COMMON, " (%d) #copyAffi=%d \n", DTRACE_GET_COUNTER(g_trace_ctx, D_COMMON), src.numCand);
5806
0
  ::memcpy(dst.mvCandLT, src.mvCandLT, sizeof(Mv)*src.numCand);
5807
0
  ::memcpy(dst.mvCandRT, src.mvCandRT, sizeof(Mv)*src.numCand);
5808
0
  ::memcpy(dst.mvCandLB, src.mvCandLB, sizeof(Mv)*src.numCand);
5809
0
}
5810
5811
uint32_t InterSearch::xCalcAffineMVBits(CodingUnit& cu, Mv acMvTemp[3], Mv acMvPred[3])
5812
0
{
5813
0
  int mvNum = cu.affineType ? 3 : 2;
5814
0
  m_pcRdCost->setCostScale(0);
5815
0
  uint32_t bitsTemp = 0;
5816
5817
0
  for (int verIdx = 0; verIdx < mvNum; verIdx++)
5818
0
  {
5819
0
    Mv pred = verIdx == 0 ? acMvPred[verIdx] : acMvPred[verIdx] + acMvTemp[0] - acMvPred[0];
5820
0
    pred.changeAffinePrecInternal2Amvr(cu.imv);
5821
0
    m_pcRdCost->setPredictor(pred);
5822
0
    Mv mv = acMvTemp[verIdx];
5823
0
    mv.changeAffinePrecInternal2Amvr(cu.imv);
5824
5825
0
    bitsTemp += m_pcRdCost->getBitsOfVectorWithPredictor(mv.hor, mv.ver, 0);
5826
0
  }
5827
5828
0
  return bitsTemp;
5829
0
}
5830
5831
5832
//! set adaptive search range based on poc difference
5833
void InterSearch::setSearchRange( const Slice* slice, const VVEncCfg& encCfg )
5834
4.34k
{
5835
4.34k
  if( !encCfg.m_bUseASR || slice->isIRAP() )
5836
4.34k
  {
5837
4.34k
    return;
5838
4.34k
  }
5839
5840
0
  int iCurrPOC = slice->poc;
5841
0
  int iRefPOC;
5842
0
  int iGOPSize = encCfg.m_GOPSize;
5843
0
  int iOffset = (iGOPSize >> 1);
5844
0
  int iMaxSR = encCfg.m_SearchRange;
5845
0
  int iNumPredDir = slice->isInterP() ? 1 : 2;
5846
5847
0
  for (int iDir = 0; iDir < iNumPredDir; iDir++)
5848
0
  {
5849
0
    RefPicList  e = ( iDir ? REF_PIC_LIST_1 : REF_PIC_LIST_0 );
5850
0
    for (int iRefIdx = 0; iRefIdx < slice->numRefIdx[e]; iRefIdx++)
5851
0
    {
5852
0
      iRefPOC = slice->getRefPic(e, iRefIdx)->getPOC();
5853
0
      int newSearchRange = Clip3(encCfg.m_minSearchWindow, iMaxSR, (iMaxSR*ADAPT_SR_SCALE*abs(iCurrPOC - iRefPOC)+iOffset)/iGOPSize);
5854
0
      m_aaiAdaptSR[iDir][iRefIdx] = newSearchRange;
5855
0
    }
5856
0
  }
5857
0
}
5858
5859
void InterSearch::xIBCSearchMVCandUpdate(Distortion  sad, int x, int y, Distortion* sadBestCand, Mv* cMVCand)
5860
1.08M
{
5861
1.08M
  int j = CHROMA_REFINEMENT_CANDIDATES - 1;
5862
5863
1.08M
  if (sad < sadBestCand[CHROMA_REFINEMENT_CANDIDATES - 1])
5864
176k
  {
5865
1.58M
    for (int t = CHROMA_REFINEMENT_CANDIDATES - 1; t >= 0; t--)
5866
1.40M
    {
5867
1.40M
      if (sad < sadBestCand[t])
5868
847k
        j = t;
5869
1.40M
    }
5870
5871
847k
    for (int k = CHROMA_REFINEMENT_CANDIDATES - 1; k > j; k--)
5872
671k
    {
5873
671k
      sadBestCand[k] = sadBestCand[k - 1];
5874
5875
671k
      cMVCand[k].set(cMVCand[k - 1].hor, cMVCand[k - 1].ver);
5876
671k
    }
5877
176k
    sadBestCand[j] = sad;
5878
176k
    cMVCand[j].set(x, y);
5879
176k
  }
5880
1.08M
}
5881
5882
int InterSearch::xIBCSearchMVChromaRefine(CodingUnit& cu,
5883
  int         roiWidth,
5884
  int         roiHeight,
5885
  int         cuPelX,
5886
  int         cuPelY,
5887
  Distortion* sadBestCand,
5888
  Mv* cMVCand
5889
5890
)
5891
18.9k
{
5892
18.9k
  if ((!isChromaEnabled(cu.chromaFormat)) || (!cu.Cb().valid()))
5893
18.9k
  {
5894
18.9k
    return 0;
5895
18.9k
  }
5896
5897
0
  int bestCandIdx = 0;
5898
0
  Distortion  sadBest = std::numeric_limits<Distortion>::max();
5899
0
  Distortion  tempSad;
5900
5901
0
  Pel* pRef;
5902
0
  Pel* pOrg;
5903
0
  int refStride, orgStride;
5904
0
  int width, height;
5905
5906
0
  int picWidth = cu.cs->slice->pps->picWidthInLumaSamples;
5907
0
  int picHeight = cu.cs->slice->pps->picHeightInLumaSamples;
5908
5909
0
  UnitArea allCompBlocks(cu.chromaFormat, (Area)cu.block(COMP_Y));
5910
0
  for (int cand = 0; cand < CHROMA_REFINEMENT_CANDIDATES; cand++)
5911
0
  {
5912
0
    if (sadBestCand[cand] == std::numeric_limits<Distortion>::max())
5913
0
    {
5914
0
      continue;
5915
0
    }
5916
5917
0
    if ((!cMVCand[cand].hor) && (!cMVCand[cand].ver))
5918
0
      continue;
5919
5920
0
    if (((int)(cuPelY + cMVCand[cand].ver + roiHeight) >= picHeight) || ((cuPelY + cMVCand[cand].ver) < 0))
5921
0
      continue;
5922
5923
0
    if (((int)(cuPelX + cMVCand[cand].hor + roiWidth) >= picWidth) || ((cuPelX + cMVCand[cand].hor) < 0))
5924
0
      continue;
5925
5926
0
    tempSad = sadBestCand[cand];
5927
5928
0
    cu.mv[0][0] = cMVCand[cand];
5929
0
    cu.mv[0][0].changePrecision(MV_PRECISION_INT, MV_PRECISION_INTERNAL);
5930
0
    cu.interDir = 1;
5931
0
    cu.refIdx[0] = cu.cs->slice->numRefIdx[REF_PIC_LIST_0]; // last idx in the list
5932
5933
0
    PelUnitBuf predBufTmp = m_tmpPredStorage[REF_PIC_LIST_0].getCompactBuf(cu);
5934
0
    motionCompensation(cu, predBufTmp, REF_PIC_LIST_0);
5935
5936
0
    for (unsigned int ch = COMP_Cb; ch < getNumberValidComponents(cu.cs->sps->chromaFormatIdc); ch++)
5937
0
    {
5938
0
      width = roiWidth >> getComponentScaleX(ComponentID(ch), cu.chromaFormat);
5939
0
      height = roiHeight >> getComponentScaleY(ComponentID(ch), cu.chromaFormat);
5940
5941
0
      PelUnitBuf origBuf = cu.cs->getOrgBuf(allCompBlocks);
5942
0
      PelUnitBuf* pBuf = &origBuf;
5943
0
      CPelBuf  tmpPattern = pBuf->get(ComponentID(ch));
5944
0
      pOrg = (Pel*)tmpPattern.buf;
5945
5946
0
      Picture* refPic = cu.slice->pic;
5947
0
      const CPelBuf refBuf = refPic->getRecoBuf(allCompBlocks.blocks[ComponentID(ch)]);
5948
0
      pRef = (Pel*)refBuf.buf;
5949
5950
0
      refStride = refBuf.stride;
5951
0
      orgStride = tmpPattern.stride;
5952
5953
      //ComponentID compID = (ComponentID)ch;
5954
0
      PelUnitBuf* pBufRef = &predBufTmp;
5955
0
      CPelBuf  tmpPatternRef = pBufRef->get(ComponentID(ch));
5956
0
      pRef = (Pel*)tmpPatternRef.buf;
5957
0
      refStride = tmpPatternRef.stride;
5958
5959
5960
0
      for (int row = 0; row < height; row++)
5961
0
      {
5962
0
        for (int col = 0; col < width; col++)
5963
0
        {
5964
0
          tempSad += ((abs(pRef[col] - pOrg[col])) >> (cu.cs->sps->bitDepths[CH_C] - 8));
5965
0
        }
5966
0
        pRef += refStride;
5967
0
        pOrg += orgStride;
5968
0
      }
5969
0
    }
5970
5971
0
    if (tempSad < sadBest)
5972
0
    {
5973
0
      sadBest = tempSad;
5974
0
      bestCandIdx = cand;
5975
0
    }
5976
0
  }
5977
5978
0
  return bestCandIdx;
5979
18.9k
}
5980
static unsigned int xMergeCandLists(Mv* dst, unsigned int dn, unsigned int dstTotalLength, Mv* src, unsigned int sn)
5981
113k
{
5982
889k
  for (unsigned int cand = 0; cand < sn && dn < dstTotalLength; cand++)
5983
775k
  {
5984
775k
    if (src[cand] == Mv())
5985
59.1k
    {
5986
59.1k
      continue;
5987
59.1k
    }
5988
716k
    bool found = false;
5989
5.45M
    for (int j = 0; j < dn; j++)
5990
4.83M
    {
5991
4.83M
      if (src[cand] == dst[j])
5992
98.7k
      {
5993
98.7k
        found = true;
5994
98.7k
        break;
5995
98.7k
      }
5996
4.83M
    }
5997
5998
716k
    if (!found)
5999
618k
    {
6000
618k
      dst[dn] = src[cand];
6001
618k
      dn++;
6002
618k
    }
6003
716k
  }
6004
6005
113k
  return dn;
6006
113k
}
6007
void InterSearch::xIntraPatternSearchIBC(CodingUnit& cu, TZSearchStruct& cStruct, Mv& rcMv, Distortion& ruiCost, Mv* pcMvSrchRngLT, Mv* pcMvSrchRngRB, Mv* pcMvPred)
6008
22.6k
{
6009
22.6k
  const int   srchRngHorLeft = pcMvSrchRngLT->hor;
6010
22.6k
  const int   srchRngHorRight = pcMvSrchRngRB->hor;
6011
22.6k
  const int   srchRngVerTop = pcMvSrchRngLT->ver;
6012
22.6k
  const int   srchRngVerBottom = pcMvSrchRngRB->ver;
6013
6014
22.6k
  const unsigned int  lcuWidth = cu.cs->slice->sps->CTUSize;
6015
22.6k
  const int   puPelOffsetX = 0;
6016
22.6k
  const int   puPelOffsetY = 0;
6017
22.6k
  const int   cuPelX = cu.Y().x;
6018
22.6k
  const int   cuPelY = cu.Y().y;
6019
6020
22.6k
  int          roiWidth = cu.lwidth();
6021
22.6k
  int          roiHeight = cu.lheight();
6022
6023
22.6k
  Distortion  sad;
6024
22.6k
  Distortion  sadBest = std::numeric_limits<Distortion>::max();
6025
22.6k
  int         bestX = 0;
6026
22.6k
  int         bestY = 0;
6027
6028
22.6k
  const Pel* piRefSrch = cStruct.piRefY; 
6029
6030
22.6k
  int         bestCandIdx = 0;
6031
6032
22.6k
  Distortion  sadBestCand[CHROMA_REFINEMENT_CANDIDATES];
6033
22.6k
  Mv          cMVCand[CHROMA_REFINEMENT_CANDIDATES];
6034
6035
22.6k
  const bool  useAmvr = cu.cs->sps->AMVR;
6036
6037
6038
204k
  for (int cand = 0; cand < CHROMA_REFINEMENT_CANDIDATES; cand++)
6039
181k
  {
6040
181k
    sadBestCand[cand] = std::numeric_limits<Distortion>::max();
6041
181k
    cMVCand[cand].set(0, 0);
6042
181k
  }
6043
6044
22.6k
  m_pcRdCost->setDistParam(m_cDistParam, *cStruct.pcPatternKey, cStruct.piRefY, cStruct.iRefStride, m_lumaClpRng.bd, COMP_Y, cStruct.subShiftMode);
6045
6046
22.6k
  const int picWidth = cu.cs->slice->pps->picWidthInLumaSamples;
6047
22.6k
  const int picHeight = cu.cs->slice->pps->picHeightInLumaSamples;
6048
6049
6050
22.6k
  {
6051
22.6k
    m_cDistParam.subShift = 0;
6052
22.6k
    Distortion tempSadBest = 0;
6053
6054
22.6k
    int srLeft = srchRngHorLeft, srRight = srchRngHorRight, srTop = srchRngVerTop, srBottom = srchRngVerBottom;
6055
22.6k
    m_numBVs = 0;
6056
22.6k
    m_numBVs = xMergeCandLists(m_acBVs, m_numBVs, (2 * IBC_NUM_CANDIDATES), m_defaultCachedBvs->m_bvCands, m_defaultCachedBvs->currCnt);
6057
6058
22.6k
    Mv cMvPredEncOnly[IBC_NUM_CANDIDATES];
6059
22.6k
    int nbPreds = 0;
6060
22.6k
    CU::getIbcMVPsEncOnly(cu, cMvPredEncOnly, nbPreds);
6061
22.6k
    m_numBVs = xMergeCandLists(m_acBVs, m_numBVs, (2 * IBC_NUM_CANDIDATES), cMvPredEncOnly, nbPreds);
6062
6063
220k
    for (unsigned int cand = 0; cand < m_numBVs; cand++)
6064
198k
    {
6065
198k
      int xPred = m_acBVs[cand].hor;
6066
198k
      int yPred = m_acBVs[cand].ver;
6067
6068
198k
      if (!(xPred == 0 && yPred == 0)
6069
198k
        && !((yPred < srTop) || (yPred > srBottom))
6070
177k
        && !((xPred < srLeft) || (xPred > srRight)))
6071
177k
      {
6072
177k
        bool validCand = searchBvIBC(cu, cuPelX, cuPelY, roiWidth, roiHeight, picWidth, picHeight, xPred, yPred, lcuWidth);
6073
6074
177k
        if (validCand)
6075
145k
        {
6076
145k
          sad = m_pcRdCost->getBvCostMultiplePredsIBC(xPred, yPred, useAmvr);
6077
145k
          m_cDistParam.cur.buf = piRefSrch + cStruct.iRefStride * yPred + xPred;
6078
145k
          sad += m_cDistParam.distFunc(m_cDistParam);
6079
6080
145k
          xIBCSearchMVCandUpdate(sad, xPred, yPred, sadBestCand, cMVCand);
6081
145k
        }
6082
177k
      }
6083
198k
    }
6084
6085
22.6k
    bestX = cMVCand[0].hor;
6086
22.6k
    bestY = cMVCand[0].ver;
6087
22.6k
    rcMv.set(bestX, bestY);
6088
22.6k
    sadBest = sadBestCand[0];
6089
6090
22.6k
    const int boundY = (0 - roiHeight - puPelOffsetY);
6091
388k
    for (int y = std::max(srchRngVerTop, 0 - cuPelY); y <= boundY; ++y)
6092
368k
    {
6093
368k
      if (!searchBvIBC(cu, cuPelX, cuPelY, roiWidth, roiHeight, picWidth, picHeight, 0, y, lcuWidth))
6094
0
      {
6095
0
        continue;
6096
0
      }
6097
6098
368k
      sad = m_pcRdCost->getBvCostMultiplePredsIBC(0, y, useAmvr);
6099
368k
      m_cDistParam.cur.buf = piRefSrch + cStruct.iRefStride * y;
6100
368k
      sad += m_cDistParam.distFunc(m_cDistParam);
6101
6102
368k
      xIBCSearchMVCandUpdate(sad, 0, y, sadBestCand, cMVCand);
6103
368k
      tempSadBest = sadBestCand[0];
6104
368k
      if (sadBestCand[0] <= 3)
6105
2.64k
      {
6106
2.64k
        bestX = cMVCand[0].hor;
6107
2.64k
        bestY = cMVCand[0].ver;
6108
2.64k
        sadBest = sadBestCand[0];
6109
2.64k
        rcMv.set(bestX, bestY);
6110
2.64k
        ruiCost = sadBest;
6111
2.64k
        goto end;
6112
2.64k
      }
6113
368k
    }
6114
6115
20.0k
    const int boundX = std::max(srchRngHorLeft, -cuPelX);
6116
1.28M
    for (int x = 0 - roiWidth - puPelOffsetX; x >= boundX; --x)
6117
1.26M
    {
6118
1.26M
      if (!searchBvIBC(cu, cuPelX, cuPelY, roiWidth, roiHeight, picWidth, picHeight, x, 0, lcuWidth))
6119
705k
      {
6120
705k
        continue;
6121
705k
      }
6122
6123
564k
      sad = m_pcRdCost->getBvCostMultiplePredsIBC(x, 0, useAmvr);
6124
564k
      m_cDistParam.cur.buf = piRefSrch + x;
6125
564k
      sad += m_cDistParam.distFunc(m_cDistParam);
6126
6127
6128
564k
      xIBCSearchMVCandUpdate(sad, x, 0, sadBestCand, cMVCand);
6129
564k
      tempSadBest = sadBestCand[0];
6130
564k
      if (sadBestCand[0] <= 3)
6131
1.12k
      {
6132
1.12k
        bestX = cMVCand[0].hor;
6133
1.12k
        bestY = cMVCand[0].ver;
6134
1.12k
        sadBest = sadBestCand[0];
6135
1.12k
        rcMv.set(bestX, bestY);
6136
1.12k
        ruiCost = sadBest;
6137
1.12k
        goto end;
6138
1.12k
      }
6139
564k
    }
6140
6141
18.9k
    bestX = cMVCand[0].hor;
6142
18.9k
    bestY = cMVCand[0].ver;
6143
18.9k
    sadBest = sadBestCand[0];
6144
18.9k
    if ((!bestX && !bestY) || (sadBest - m_pcRdCost->getBvCostMultiplePredsIBC(bestX, bestY, useAmvr) <= 32))
6145
17.8k
    {
6146
      //chroma refine
6147
17.8k
      bestCandIdx = xIBCSearchMVChromaRefine(cu, roiWidth, roiHeight, cuPelX, cuPelY, sadBestCand, cMVCand);
6148
17.8k
      bestX = cMVCand[bestCandIdx].hor;
6149
17.8k
      bestY = cMVCand[bestCandIdx].ver;
6150
17.8k
      sadBest = sadBestCand[bestCandIdx];
6151
17.8k
      rcMv.set(bestX, bestY);
6152
17.8k
      ruiCost = sadBest;
6153
17.8k
      goto end;
6154
17.8k
    }
6155
6156
1.03k
    if (cu.lwidth() < 16 && cu.lheight() < 16)
6157
23
    {
6158
23
      int stepS = 2;
6159
23
      if (m_pcEncCfg->m_IBCFastMethod > 2)
6160
23
      {
6161
23
        if (m_pcEncCfg->m_IBCFastMethod == 5)
6162
0
        {
6163
0
          stepS = 8;
6164
0
        }
6165
23
        else if ((cu.lwidth() > 4) || (cu.lheight() > 4))
6166
23
        {
6167
23
          stepS = 4;
6168
23
        }
6169
23
      }
6170
6171
23
      const int minCuLog2 = m_pcEncCfg->m_log2MinCodingBlockSize;
6172
23
      const int minCuMask = (1 << minCuLog2) - 1;
6173
23
      bool lastDec = false;
6174
6175
23
      for (int searchStep = 0; searchStep < 3; searchStep++)
6176
23
      {
6177
23
        int delaySy = searchStep ? 1 : 0;
6178
23
        int delaySx = searchStep > 1 ? 1 : 0;
6179
23
        int startY = (std::max(srchRngVerTop, -cuPelY) + delaySy);
6180
23
        int startX = (std::max(srchRngHorLeft, -cuPelX) + delaySx);
6181
23
        int endY = srchRngVerBottom;
6182
23
        int endX = srchRngHorRight;
6183
6184
23
        if (m_pcEncCfg->m_IBCFastMethod > 5)
6185
0
        {
6186
0
          startY = bestY - 4;
6187
0
          endY = bestY + 4;
6188
0
          startX = bestX - 4;
6189
0
          endX = bestX + 4;
6190
0
          stepS = 1;
6191
0
          if (searchStep)
6192
0
          {
6193
0
            break;
6194
0
          }
6195
0
        }
6196
6197
582
        for (int y = startY; y <= endY; y += stepS)
6198
559
        {
6199
559
          if ((y == 0) || ((int)(cuPelY + y + roiHeight) >= picHeight))
6200
315
            continue;
6201
244
          bool firstX = true;
6202
244
          int stepSx = searchStep ? stepS : 1;
6203
26.5k
          for (int x = startX; x <= endX; firstX = false, x += stepSx)
6204
26.3k
          {
6205
26.3k
            if ((x == 0) || ((int)(cuPelX + x + roiWidth) >= picWidth))
6206
12.5k
              continue;
6207
6208
13.7k
            bool isSameAsLast = !firstX && ((cuPelX + x) & minCuMask) > 1;
6209
13.7k
            if (searchStep || (m_pcEncCfg->m_IBCFastMethod > 5))
6210
0
            {
6211
0
              if (!searchBvIBC(cu, cuPelX, cuPelY, roiWidth, roiHeight, picWidth, picHeight, x, y, lcuWidth))
6212
0
              {
6213
0
                continue;
6214
0
              }
6215
0
            }
6216
13.7k
            else if ((isSameAsLast && !lastDec) || (!isSameAsLast && !searchBvIBC(cu, cuPelX, cuPelY, roiWidth, roiHeight, picWidth, picHeight, x, y, lcuWidth)))
6217
3.47k
            {
6218
3.47k
              lastDec = false;
6219
3.47k
              continue;
6220
3.47k
            }
6221
10.2k
            lastDec = true;
6222
6223
10.2k
            sad = m_pcRdCost->getBvCostMultiplePredsIBC(x, y, useAmvr);
6224
10.2k
            m_cDistParam.cur.buf = piRefSrch + cStruct.iRefStride * y + x;
6225
10.2k
            sad += m_cDistParam.distFunc(m_cDistParam);
6226
6227
10.2k
            xIBCSearchMVCandUpdate(sad, x, y, sadBestCand, cMVCand);
6228
6229
6230
10.2k
            if (searchStep && sadBestCand[0] <= 5)
6231
0
            {
6232
              //chroma refine & return
6233
0
              bestCandIdx = xIBCSearchMVChromaRefine(cu, roiWidth, roiHeight, cuPelX, cuPelY, sadBestCand, cMVCand);
6234
0
              bestX = cMVCand[bestCandIdx].hor;
6235
0
              bestY = cMVCand[bestCandIdx].ver;
6236
0
              sadBest = sadBestCand[bestCandIdx];
6237
0
              rcMv.set(bestX, bestY);
6238
0
              ruiCost = sadBest;
6239
0
              goto end;
6240
0
            }
6241
10.2k
          }
6242
244
        }
6243
6244
23
        if ((searchStep < 2) && (m_pcEncCfg->m_IBCFastMethod < 6))
6245
23
        {
6246
23
          if ((m_pcEncCfg->m_IBCFastMethod > 2) && (m_pcEncCfg->m_IBCFastMethod < 5))
6247
23
          {
6248
23
            if ((bestX == cMVCand[0].hor) && (bestY == cMVCand[0].ver))
6249
23
            {
6250
23
              sadBest = sadBestCand[bestCandIdx];
6251
23
              rcMv.set(bestX, bestY);
6252
23
              ruiCost = sadBest;
6253
23
              goto end;
6254
23
            }
6255
23
          }
6256
0
          bestX = cMVCand[0].hor;
6257
0
          bestY = cMVCand[0].ver;
6258
0
          sadBest = sadBestCand[0];
6259
6260
0
          int StopSearch = searchStep ? 32 : 16;
6261
0
          if ((searchStep && (sadBest >= tempSadBest)) || (sadBest - m_pcRdCost->getBvCostMultiplePredsIBC(bestX, bestY, useAmvr) <= StopSearch))
6262
0
          {
6263
            //chroma refine
6264
0
            bestCandIdx = xIBCSearchMVChromaRefine(cu, roiWidth, roiHeight, cuPelX, cuPelY, sadBestCand, cMVCand);
6265
6266
0
            bestX = cMVCand[bestCandIdx].hor;
6267
0
            bestY = cMVCand[bestCandIdx].ver;
6268
0
            sadBest = sadBestCand[bestCandIdx];
6269
0
            rcMv.set(bestX, bestY);
6270
0
            ruiCost = sadBest;
6271
0
            goto end;
6272
0
          }
6273
0
        }
6274
23
      }
6275
23
    }
6276
1.03k
  }
6277
6278
1.01k
  bestCandIdx = xIBCSearchMVChromaRefine(cu, roiWidth, roiHeight, cuPelX, cuPelY, sadBestCand, cMVCand);
6279
6280
1.01k
  bestX = cMVCand[bestCandIdx].hor;
6281
1.01k
  bestY = cMVCand[bestCandIdx].ver;
6282
1.01k
  sadBest = sadBestCand[bestCandIdx];
6283
1.01k
  rcMv.set(bestX, bestY);
6284
1.01k
  ruiCost = sadBest;
6285
6286
22.6k
end:
6287
22.6k
  m_numBVs = 0;
6288
22.6k
  m_numBVs = xMergeCandLists(m_acBVs, m_numBVs, (2 * IBC_NUM_CANDIDATES), m_defaultCachedBvs->m_bvCands, m_defaultCachedBvs->currCnt);
6289
6290
22.6k
  m_defaultCachedBvs->currCnt = 0;
6291
22.6k
  m_defaultCachedBvs->currCnt = xMergeCandLists(m_defaultCachedBvs->m_bvCands, m_defaultCachedBvs->currCnt, IBC_NUM_CANDIDATES, cMVCand, CHROMA_REFINEMENT_CANDIDATES);
6292
22.6k
  m_defaultCachedBvs->currCnt = xMergeCandLists(m_defaultCachedBvs->m_bvCands, m_defaultCachedBvs->currCnt, IBC_NUM_CANDIDATES, m_acBVs, m_numBVs);
6293
6294
204k
  for (unsigned int cand = 0; cand < CHROMA_REFINEMENT_CANDIDATES; cand++)
6295
181k
  {
6296
181k
    if (cMVCand[cand].hor == 0 && cMVCand[cand].ver == 0)
6297
59.1k
    {
6298
59.1k
      continue;
6299
59.1k
    }
6300
122k
    m_ctuRecord[cu.lumaPos()][cu.lumaSize()].bvRecord[cMVCand[cand]] = sadBestCand[cand];
6301
122k
  }
6302
6303
22.6k
  return;
6304
1.01k
}
6305
6306
6307
6308
// based on xMotionEstimation
6309
void InterSearch::xIBCEstimation(CodingUnit& cu, PelUnitBuf& origBuf, Mv* pcMvPred, Mv& rcMv, Distortion& ruiCost )
6310
22.7k
{
6311
22.7k
  const int iPicWidth = cu.cs->slice->pps->picWidthInLumaSamples;
6312
22.7k
  const int iPicHeight = cu.cs->slice->pps->picHeightInLumaSamples;
6313
22.7k
  const unsigned int  lcuWidth = cu.cs->slice->sps->CTUSize;
6314
22.7k
  const int           cuPelX = cu.Y().x;
6315
22.7k
  const int           cuPelY = cu.Y().y;
6316
22.7k
  int                 iRoiWidth = cu.lwidth();
6317
22.7k
  int                 iRoiHeight = cu.lheight();
6318
6319
22.7k
  PelUnitBuf* pBuf = &origBuf;
6320
6321
  //  Search key pattern initialization
6322
22.7k
  CPelBuf  tmpPattern = pBuf->Y();
6323
22.7k
  CPelBuf* pcPatternKey = &tmpPattern;
6324
22.7k
  PelBuf tmpOrgLuma;
6325
22.7k
  ReshapeData& reshapeData = cu.cs->picture->reshapeData;
6326
22.7k
  if ((cu.cs->slice->lmcsEnabled && reshapeData.getCTUFlag()))
6327
0
  {
6328
0
    tmpOrgLuma = m_tmpStorageLCU.getCompactBuf(cu.Y());
6329
0
    tmpOrgLuma.rspSignal(tmpPattern, reshapeData.getInvLUT());
6330
0
    pcPatternKey = (CPelBuf*)&tmpOrgLuma;
6331
0
  }
6332
22.7k
  m_lumaClpRng = cu.cs->slice->clpRngs[COMP_Y];
6333
22.7k
  Picture* refPic = cu.slice->pic;
6334
22.7k
  const CPelBuf refBuf = refPic->getRecoBuf(cu.blocks[COMP_Y]);
6335
6336
22.7k
  TZSearchStruct cStruct; 
6337
22.7k
  cStruct.pcPatternKey  = pcPatternKey;
6338
22.7k
  cStruct.iRefStride    = refBuf.stride;
6339
22.7k
  cStruct.piRefY        = refBuf.buf;
6340
22.7k
  CHECK( cu.imv == IMV_HPEL, "IF_IBC" );
6341
22.7k
  cStruct.imvShift      = cu.imv << 1;
6342
22.7k
  cStruct.subShiftMode  = 0;
6343
22.7k
  cStruct.uiBestSad     = MAX_DISTORTION;
6344
6345
22.7k
  m_pcRdCost->getMotionCostIBC(0);
6346
22.7k
  m_pcRdCost->setPredictorsIBC(pcMvPred);
6347
22.7k
  m_pcRdCost->setCostScale(0);
6348
6349
22.7k
  m_pcRdCost->setDistParam(m_cDistParam, *cStruct.pcPatternKey, cStruct.piRefY, cStruct.iRefStride, m_lumaClpRng.bd, COMP_Y, cStruct.subShiftMode);
6350
22.7k
  bool buffered = false;
6351
22.7k
  if (m_pcEncCfg->m_IBCFastMethod)// IBC_FAST_METHOD_BUFFERBV
6352
22.7k
  {
6353
22.7k
    ruiCost = MAX_UINT;
6354
22.7k
    std::unordered_map<Mv, Distortion>& history = m_ctuRecord[cu.lumaPos()][cu.lumaSize()].bvRecord;
6355
22.7k
    for (std::unordered_map<Mv, Distortion>::iterator p = history.begin(); p != history.end(); p++)
6356
14
    {
6357
14
      const Mv& bv = p->first;
6358
6359
14
      int xBv = bv.hor;
6360
14
      int yBv = bv.ver;
6361
14
      if (searchBvIBC(cu, cuPelX, cuPelY, iRoiWidth, iRoiHeight, iPicWidth, iPicHeight, xBv, yBv, lcuWidth))
6362
14
      {
6363
14
        buffered = true;
6364
14
        Distortion sad = m_pcRdCost->getBvCostMultiplePredsIBC(xBv, yBv, cu.cs->sps->AMVR);
6365
14
        m_cDistParam.cur.buf = cStruct.piRefY + cStruct.iRefStride * yBv + xBv;
6366
14
        sad += m_cDistParam.distFunc(m_cDistParam);
6367
14
        if (sad < ruiCost)
6368
14
        {
6369
14
          rcMv = bv;
6370
14
          ruiCost = sad;
6371
14
        }
6372
0
        else if (sad == ruiCost)
6373
0
        {
6374
          // stabilise the search through the unordered list
6375
0
          if (bv.hor < rcMv.hor
6376
0
            || (bv.hor == rcMv.hor && bv.ver < rcMv.ver))
6377
0
          {
6378
            // update the vector.
6379
0
            rcMv = bv;
6380
0
          }
6381
0
        }
6382
14
      }
6383
14
    }
6384
6385
22.7k
    if (buffered)
6386
14
    {
6387
14
      Mv cMvPredEncOnly[IBC_NUM_CANDIDATES];
6388
14
      int nbPreds = 0;
6389
14
      CU::getIbcMVPsEncOnly(cu, cMvPredEncOnly, nbPreds);
6390
6391
14
      for (unsigned int cand = 0; cand < nbPreds; cand++)
6392
0
      {
6393
0
        int xPred = cMvPredEncOnly[cand].hor;
6394
0
        int yPred = cMvPredEncOnly[cand].ver;
6395
6396
0
        if (searchBvIBC(cu, cuPelX, cuPelY, iRoiWidth, iRoiHeight, iPicWidth, iPicHeight, xPred, yPred, lcuWidth))
6397
0
        {
6398
0
          Distortion sad = m_pcRdCost->getBvCostMultiplePredsIBC(xPred, yPred, cu.cs->sps->AMVR);
6399
0
          m_cDistParam.cur.buf = cStruct.piRefY + cStruct.iRefStride * yPred + xPred;
6400
0
          sad += m_cDistParam.distFunc(m_cDistParam);
6401
0
          if (sad < ruiCost)
6402
0
          {
6403
0
            rcMv.set(xPred, yPred);
6404
0
            ruiCost = sad;
6405
0
          }
6406
0
          else if (sad == ruiCost)
6407
0
          {
6408
            // stabilise the search through the unordered list
6409
0
            if (xPred < rcMv.hor
6410
0
              || (xPred == rcMv.hor && yPred < rcMv.ver))
6411
0
            {
6412
              // update the vector.
6413
0
              rcMv.set(xPred, yPred);
6414
0
            }
6415
0
          }
6416
0
          m_ctuRecord[cu.lumaPos()][cu.lumaSize()].bvRecord[Mv(xPred, yPred)] = sad;
6417
0
        }
6418
0
      }
6419
14
    }
6420
22.7k
  }
6421
6422
22.7k
  if (!buffered)
6423
22.6k
  {
6424
22.6k
    Mv        cMvSrchRngLT;
6425
22.6k
    Mv        cMvSrchRngRB;
6426
6427
    // assume that intra BV is integer-pel precision
6428
22.6k
    xSetIntraSearchRangeIBC(cu, cu.lwidth(), cu.lheight(), cMvSrchRngLT, cMvSrchRngRB);
6429
6430
    //  Do integer search
6431
22.6k
    xIntraPatternSearchIBC(cu, cStruct, rcMv, ruiCost, &cMvSrchRngLT, &cMvSrchRngRB, pcMvPred);
6432
22.6k
  }
6433
22.7k
}
6434
// based on xSetSearchRange
6435
void InterSearch::xSetIntraSearchRangeIBC(CodingUnit& cu, int iRoiWidth, int iRoiHeight, Mv& rcMvSrchRngLT, Mv& rcMvSrchRngRB)
6436
22.6k
{
6437
 // const SPS& sps = *cu.cs->sps;
6438
6439
22.6k
  int srLeft, srRight, srTop, srBottom;
6440
6441
22.6k
  const int cuPelX = cu.Y().x;
6442
22.6k
  const int cuPelY = cu.Y().y;
6443
6444
22.6k
  const int lcuWidth = cu.cs->slice->sps->CTUSize;
6445
22.6k
  const int ctuSizeLog2 = floorLog2(lcuWidth);
6446
22.6k
  int numLeftCTUs = (1 << ((7 - ctuSizeLog2) << 1)) - ((ctuSizeLog2 < 7) ? 1 : 0);
6447
6448
22.6k
  srLeft = -(numLeftCTUs * lcuWidth + (cuPelX % lcuWidth));
6449
22.6k
  srTop = -(cuPelY % lcuWidth);
6450
6451
22.6k
  srRight = lcuWidth - (cuPelX % lcuWidth) - iRoiWidth;
6452
22.6k
  srBottom = lcuWidth - (cuPelY % lcuWidth) - iRoiHeight;
6453
6454
22.6k
  rcMvSrchRngLT.hor=srLeft;
6455
22.6k
  rcMvSrchRngLT.ver=srTop;
6456
22.6k
  rcMvSrchRngRB.hor=srRight;
6457
22.6k
  rcMvSrchRngRB.ver=srBottom;
6458
6459
22.6k
  rcMvSrchRngLT <<= 2;
6460
22.6k
  rcMvSrchRngRB <<= 2;
6461
22.6k
  bool temp = m_clipMvInSubPic;
6462
22.6k
  m_clipMvInSubPic = true;
6463
22.6k
  clipMv(rcMvSrchRngLT,cu.lumaPos(),cu.lumaSize(), *cu.cs->pcv, *cu.cs->pps, m_clipMvInSubPic);
6464
22.6k
  clipMv(rcMvSrchRngRB, cu.lumaPos(),cu.lumaSize(), *cu.cs->pcv, * cu.cs->pps, m_clipMvInSubPic);
6465
22.6k
  m_clipMvInSubPic = temp;
6466
22.6k
  rcMvSrchRngLT >>= 2;
6467
22.6k
  rcMvSrchRngRB >>= 2;
6468
22.6k
}
6469
6470
bool InterSearch::predIBCSearch(CodingUnit& cu, Partitioner& partitioner)
6471
22.7k
{
6472
22.7k
  Mv           cMvSrchRngLT;
6473
22.7k
  Mv           cMvSrchRngRB;
6474
22.7k
  cu.imv = IMV_4PEL;
6475
22.7k
  AMVPInfo amvpInfo4Pel;
6476
22.7k
  CU::fillIBCMvpCand(cu, amvpInfo4Pel);
6477
6478
22.7k
  cu.imv = IMV_OFF;// (Int)cu.cs->sps->getUseIMV(); // set as IMV=0 initially
6479
22.7k
  Mv    cMv, cMvPred[2];
6480
22.7k
  AMVPInfo amvpInfo;
6481
22.7k
  CU::fillIBCMvpCand(cu, amvpInfo);
6482
  // store in full pel accuracy, shift before use in search
6483
22.7k
  cMvPred[0] = amvpInfo.mvCand[0];
6484
22.7k
  cMvPred[0].changePrecision(MV_PRECISION_INTERNAL, MV_PRECISION_INT);
6485
22.7k
  cMvPred[1] = amvpInfo.mvCand[1];
6486
22.7k
  cMvPred[1].changePrecision(MV_PRECISION_INTERNAL, MV_PRECISION_INT);
6487
6488
22.7k
  int iBvpNum = 2;
6489
22.7k
  int bvpIdxBest = 0;
6490
22.7k
  cMv.setZero();
6491
22.7k
  Distortion cost = 0;
6492
22.7k
  if (cu.cs->sps->maxNumIBCMergeCand == 1)
6493
0
  {
6494
0
    iBvpNum = 1;
6495
0
    cMvPred[1] = cMvPred[0];
6496
0
  }
6497
6498
22.7k
  if (cMv.hor == 0 && cMv.ver == 0)
6499
22.7k
  {
6500
    // if hash search does not work or is not enabled
6501
22.7k
    PelUnitBuf origBuf = cu.cs->getOrgBuf(cu);
6502
22.7k
    xIBCEstimation(cu, origBuf, cMvPred, cMv, cost );
6503
22.7k
  }
6504
6505
22.7k
  if (cMv.hor == 0 && cMv.ver == 0)
6506
2.43k
  {
6507
2.43k
    return false;
6508
2.43k
  }
6509
  /// ibc search
6510
  /////////////////////////////////////////////////////////
6511
20.2k
  unsigned int bitsBVPBest, bitsBVPTemp;
6512
20.2k
  bitsBVPBest = MAX_INT;
6513
20.2k
  m_pcRdCost->setCostScale(0);
6514
6515
60.8k
  for (int bvpIdxTemp = 0; bvpIdxTemp < iBvpNum; bvpIdxTemp++)
6516
40.5k
  {
6517
40.5k
    m_pcRdCost->setPredictor(cMvPred[bvpIdxTemp]);
6518
6519
40.5k
    bitsBVPTemp = m_pcRdCost->getBitsOfVectorWithPredictor(cMv.hor, cMv.ver, 0);
6520
6521
40.5k
    if (bitsBVPTemp < bitsBVPBest)
6522
20.2k
    {
6523
20.2k
      bitsBVPBest = bitsBVPTemp;
6524
20.2k
      bvpIdxBest = bvpIdxTemp;
6525
6526
20.2k
      if (cu.cs->sps->AMVR && cMv != cMvPred[bvpIdxTemp])
6527
20.2k
        cu.imv = IMV_FPEL; // set as full-pel
6528
0
      else
6529
0
        cu.imv = IMV_OFF; // set as fractional-pel
6530
6531
20.2k
    }
6532
6533
40.5k
    unsigned int bitsBVPQP = MAX_UINT;
6534
6535
6536
40.5k
    Mv mvPredQuadPel;
6537
40.5k
    if ((cMv.hor % 4 == 0) && (cMv.ver % 4 == 0) && (cu.cs->sps->AMVR))
6538
40.5k
    {
6539
40.5k
      mvPredQuadPel = amvpInfo4Pel.mvCand[bvpIdxTemp];// cMvPred[bvpIdxTemp];
6540
6541
40.5k
      mvPredQuadPel.changePrecision(MV_PRECISION_INTERNAL, MV_PRECISION_4PEL);
6542
6543
40.5k
      m_pcRdCost->setPredictor(mvPredQuadPel);
6544
6545
40.5k
      bitsBVPQP = m_pcRdCost->getBitsOfVectorWithPredictor(cMv.hor >> 2, cMv.ver >> 2, 0);
6546
6547
40.5k
    }
6548
40.5k
    mvPredQuadPel.changePrecision(MV_PRECISION_4PEL, MV_PRECISION_INT);
6549
40.5k
    if (bitsBVPQP < bitsBVPBest && cMv != mvPredQuadPel)
6550
20.2k
    {
6551
20.2k
      bitsBVPBest = bitsBVPQP;
6552
20.2k
      bvpIdxBest = bvpIdxTemp;
6553
6554
20.2k
      if (cu.cs->sps->AMVR)
6555
20.2k
        cu.imv = IMV_4PEL;
6556
20.2k
    }
6557
6558
40.5k
  }
6559
6560
20.2k
  cMv.changePrecision( MV_PRECISION_INT, MV_PRECISION_INTERNAL );
6561
20.2k
  cu.mv[REF_PIC_LIST_0][0] = cMv; // store in fractional pel accuracy
6562
6563
20.2k
  cu.mvpIdx[REF_PIC_LIST_0] = bvpIdxBest;
6564
6565
20.2k
  if (cu.imv == IMV_4PEL && cMv != amvpInfo4Pel.mvCand[bvpIdxBest])
6566
20.2k
    cu.mvd[REF_PIC_LIST_0][0] = cMv - amvpInfo4Pel.mvCand[bvpIdxBest];
6567
18.4E
  else
6568
18.4E
    cu.mvd[REF_PIC_LIST_0][0] = cMv - amvpInfo.mvCand[bvpIdxBest];
6569
6570
20.2k
  if (cu.mvd[REF_PIC_LIST_0][0] == Mv(0, 0))
6571
0
    cu.imv = IMV_OFF;
6572
20.2k
  if (cu.imv == IMV_4PEL)
6573
20.2k
    assert((cMv.hor % 16 == 0) && (cMv.ver % 16 == 0));
6574
20.2k
  if (cu.cs->sps->AMVR)
6575
20.2k
    assert(cu.imv > 0 || cu.mvd[REF_PIC_LIST_0][0] == Mv());
6576
6577
20.2k
  cu.refIdx[REF_PIC_LIST_0] = MAX_NUM_REF;
6578
6579
20.2k
  return true;
6580
20.2k
}
6581
6582
6583
static inline bool isYPartBefore( SplitSeries series, const int ctuSizeLog2, const Position& refPos, const Position& pos )
6584
1.32M
{
6585
1.32M
#ifndef NDEBUG
6586
1.32M
  const int refCtuX = refPos.x >> ctuSizeLog2;
6587
1.32M
  const int refCtuY = refPos.y >> ctuSizeLog2;
6588
1.32M
  const int posCtuX = pos.x >> ctuSizeLog2;
6589
1.32M
  const int posCtuY = pos.y >> ctuSizeLog2;
6590
6591
1.32M
  CHECK( refCtuX != posCtuX || refCtuY != posCtuY, "This method can only be applied for positions within the same CTU" );
6592
6593
1.32M
#endif
6594
1.32M
  const int ctuMask = ( 1 << ctuSizeLog2 ) - 1;
6595
6596
1.32M
  const int refX = refPos.x & ctuMask;
6597
1.32M
  const int refY = refPos.y & ctuMask;
6598
1.32M
  const int posX = pos.x & ctuMask;
6599
1.32M
  const int posY = pos.y & ctuMask;
6600
6601
1.32M
  int x = 0, y = 0, w = 1 << ctuSizeLog2, h = 1 << ctuSizeLog2;
6602
  
6603
1.80M
  while( true )
6604
1.80M
  {
6605
1.80M
    PartSplit split = PartSplit( series & SPLIT_MASK );
6606
6607
1.80M
    switch( split )
6608
1.80M
    {
6609
1.75M
    case CU_QUAD_SPLIT:
6610
1.75M
      w >>= 1;
6611
1.75M
      if( posX >= x + w ) x += w;
6612
1.77M
    case CU_HORZ_SPLIT:
6613
1.77M
      h >>= 1;
6614
1.77M
      if( posY >= y + h ) y += h;
6615
1.77M
      break;
6616
6617
27.0k
    case CU_VERT_SPLIT:
6618
27.0k
      w >>= 1;
6619
27.0k
      if( posX >= x + w ) x += w;
6620
27.0k
      goto checkXonly;
6621
6622
0
    case CU_TRIH_SPLIT:
6623
0
      h >>= 2;
6624
0
      if( posY >= y + h ) { y += h; h <<= 1; }
6625
0
      if( posY >= y + h ) { y += h; h >>= 1; }
6626
0
      break;
6627
6628
0
    case CU_TRIV_SPLIT:
6629
0
      w >>= 2;
6630
0
      if( posX >= x + w ) { x += w; w <<= 1; }
6631
0
      if( posX >= x + w ) { x += w; w >>= 1; }
6632
0
      goto checkXonly;
6633
6634
0
    default:
6635
0
      return false;
6636
1.80M
    }
6637
6638
1.77M
    if( refY >= y + h ) return true;
6639
1.34M
    else if( refY < y ) return false;
6640
6641
1.37M
checkXonly:
6642
1.37M
    if( refX >= x + w ) return true;
6643
681k
    else if( refX < x ) return false;
6644
6645
475k
    series >>= SPLIT_DMULT; continue;
6646
1.37M
  }
6647
6648
85
  return false;
6649
1.32M
}
6650
6651
bool InterSearch::searchBvIBC(const CodingUnit& cu, int xPos, int yPos, int width, int height, int picWidth, int picHeight, int xBv, int yBv, int ctuSize) const
6652
1.95M
{
6653
1.95M
  const int ctuSizeLog2 = Log2(ctuSize);
6654
6655
1.95M
  int refRightX  = xPos + xBv + width  - 1;
6656
1.95M
  int refBottomY = yPos + yBv + height - 1;
6657
6658
1.95M
  int refLeftX = xPos + xBv;
6659
1.95M
  int refTopY  = yPos + yBv;
6660
6661
1.95M
  if ((xPos + xBv) < 0)
6662
11.1k
  {
6663
11.1k
    return false;
6664
11.1k
  }
6665
1.94M
  if (refRightX >= picWidth)
6666
0
  {
6667
0
    return false;
6668
0
  }
6669
6670
1.94M
  if ((yPos + yBv) < 0)
6671
0
  {
6672
0
    return false;
6673
0
  }
6674
1.94M
  if (refBottomY >= picHeight)
6675
0
  {
6676
0
    return false;
6677
0
  }
6678
1.94M
  if ((xBv + width) > 0 && (yBv + height) > 0)
6679
156k
  {
6680
156k
    return false;
6681
156k
  }
6682
6683
  // Don't search the above CTU row
6684
1.79M
  if (refTopY >> ctuSizeLog2 < yPos >> ctuSizeLog2)
6685
0
    return false;
6686
6687
  // Don't search the below CTU row
6688
1.79M
  if (refBottomY >> ctuSizeLog2 > yPos >> ctuSizeLog2)
6689
0
  {
6690
0
    return false;
6691
0
  }
6692
6693
1.79M
  unsigned curTileIdx = cu.cs->pps->getTileIdx(cu.lumaPos());
6694
1.79M
  unsigned refTileIdx = cu.cs->pps->getTileIdx(Position(refLeftX, refTopY));
6695
1.79M
  if (curTileIdx != refTileIdx)
6696
0
  {
6697
0
    return false;
6698
0
  }
6699
1.79M
  refTileIdx = cu.cs->pps->getTileIdx(Position(refLeftX, refBottomY));
6700
1.79M
  if (curTileIdx != refTileIdx)
6701
0
  {
6702
0
    return false;
6703
0
  }
6704
1.79M
  refTileIdx = cu.cs->pps->getTileIdx(Position(refRightX, refTopY));
6705
1.79M
  if (curTileIdx != refTileIdx)
6706
0
  {
6707
0
    return false;
6708
0
  }
6709
1.79M
  refTileIdx = cu.cs->pps->getTileIdx(Position(refRightX, refBottomY));
6710
1.79M
  if (curTileIdx != refTileIdx)
6711
0
  {
6712
0
    return false;
6713
0
  }
6714
6715
1.79M
  const Position cuPos{ xPos, yPos };
6716
6717
  //int numLeftCTUs = (1 << ((7 - ctuSizeLog2) << 1)) - ((ctuSizeLog2 < 7) ? 1 : 0);
6718
1.79M
  static const int numLeftCTUsLUT[3] = { 15, 3, 1 };
6719
6720
  // in the same CTU line
6721
1.79M
  const int numLeftCTUs = numLeftCTUsLUT[ctuSizeLog2 - 5];
6722
6723
1.79M
  if( ( refRightX >> ctuSizeLog2 <= xPos >> ctuSizeLog2 ) && ( refLeftX >> ctuSizeLog2 >= ( xPos >> ctuSizeLog2 ) - numLeftCTUs ) )
6724
1.79M
  {
6725
    // in the same CTU, or left CTU
6726
    // if part of ref block is in the left CTU, some area can be referred from the not-yet updated local CTU buffer
6727
1.79M
    if( ( ctuSizeLog2 == 7 ) && ( ( refLeftX >> ctuSizeLog2 ) == ( ( xPos >> ctuSizeLog2 ) - 1 ) ) )
6728
913k
    {
6729
      // ref block's collocated block in current CTU
6730
913k
      const Position refPosCol64x64{ ( refLeftX + ctuSize ) & ~63, refTopY & ~63 };
6731
913k
      if( refPosCol64x64 == Position{ xPos & ~63, yPos & ~63 } )
6732
493k
        return false;
6733
6734
      //CodingUnit* curef = cu.cs->getCU(refPosCol64x64, CH_L, cu.treeType);
6735
      //bool isDecomp = curef && ((cu.cs != curef->cs) || cu.idx < curef->idx);
6736
419k
      bool isDecomp = isYPartBefore( cu.splitSeries, ctuSizeLog2, cuPos, refPosCol64x64 );
6737
419k
      if( isDecomp )
6738
213k
      {
6739
213k
        return false;
6740
213k
      }
6741
419k
    }
6742
1.79M
  }
6743
18.4E
  else
6744
18.4E
    return false;
6745
6746
  // in the same CTU, or valid area from left CTU. Check if the reference block is already coded
6747
1.08M
  const Position refPosBR{ refRightX, refBottomY };
6748
  //CodingUnit* curef = cu.cs->getCU(refPosBR, CH_L, cu.treeType);
6749
  //bool isDecomp = curef && ((cu.cs != curef->cs) || cu.idx < curef->idx);
6750
1.08M
  bool isDecomp = ( ( refPosBR.x >> ctuSizeLog2 ) < ( cuPos.x >> ctuSizeLog2 ) ) || ( refRightX < xPos && refBottomY < yPos ) || isYPartBefore( cu.splitSeries, ctuSizeLog2, cuPos, refPosBR );
6751
6752
1.08M
  return isDecomp;
6753
1.79M
}
6754
6755
} // namespace vvenc
6756
6757
//! \}
6758