Coverage Report

Created: 2025-07-23 08:18

/src/x265/source/common/cudata.cpp
Line
Count
Source (jump to first uncovered line)
1
/*****************************************************************************
2
 * Copyright (C) 2013-2020 MulticoreWare, Inc
3
 *
4
 * Authors: Steve Borho <steve@borho.org>
5
 *          Min Chen <chenm003@163.com>
6
 *
7
 * This program is free software; you can redistribute it and/or modify
8
 * it under the terms of the GNU General Public License as published by
9
 * the Free Software Foundation; either version 2 of the License, or
10
 * (at your option) any later version.
11
 *
12
 * This program is distributed in the hope that it will be useful,
13
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15
 * GNU General Public License for more details.
16
 *
17
 * You should have received a copy of the GNU General Public License
18
 * along with this program; if not, write to the Free Software
19
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02111, USA.
20
 *
21
 * This program is also available under a commercial proprietary license.
22
 * For more information, contact us at license @ x265.com.
23
 *****************************************************************************/
24
25
#include "common.h"
26
#include "frame.h"
27
#include "framedata.h"
28
#include "picyuv.h"
29
#include "mv.h"
30
#include "cudata.h"
31
#define MAX_MV 1 << 14
32
33
using namespace X265_NS;
34
35
/* for all bcast* and copy* functions, dst and src are aligned to MIN(size, 32) */
36
37
0
static void bcast1(uint8_t* dst, uint8_t val)  { dst[0] = val; }
38
39
0
static void copy4(uint8_t* dst, uint8_t* src)  { ((uint32_t*)dst)[0] = ((uint32_t*)src)[0]; }
40
0
static void bcast4(uint8_t* dst, uint8_t val)  { ((uint32_t*)dst)[0] = 0x01010101u * val; }
41
42
0
static void copy16(uint8_t* dst, uint8_t* src) { ((uint64_t*)dst)[0] = ((uint64_t*)src)[0]; ((uint64_t*)dst)[1] = ((uint64_t*)src)[1]; }
43
0
static void bcast16(uint8_t* dst, uint8_t val) { uint64_t bval = 0x0101010101010101ULL * val; ((uint64_t*)dst)[0] = bval; ((uint64_t*)dst)[1] = bval; }
44
45
0
static void copy64(uint8_t* dst, uint8_t* src) { ((uint64_t*)dst)[0] = ((uint64_t*)src)[0]; ((uint64_t*)dst)[1] = ((uint64_t*)src)[1]; 
46
0
                                                 ((uint64_t*)dst)[2] = ((uint64_t*)src)[2]; ((uint64_t*)dst)[3] = ((uint64_t*)src)[3];
47
0
                                                 ((uint64_t*)dst)[4] = ((uint64_t*)src)[4]; ((uint64_t*)dst)[5] = ((uint64_t*)src)[5];
48
0
                                                 ((uint64_t*)dst)[6] = ((uint64_t*)src)[6]; ((uint64_t*)dst)[7] = ((uint64_t*)src)[7]; }
49
0
static void bcast64(uint8_t* dst, uint8_t val) { uint64_t bval = 0x0101010101010101ULL * val;
50
0
                                                 ((uint64_t*)dst)[0] = bval; ((uint64_t*)dst)[1] = bval; ((uint64_t*)dst)[2] = bval; ((uint64_t*)dst)[3] = bval;
51
0
                                                 ((uint64_t*)dst)[4] = bval; ((uint64_t*)dst)[5] = bval; ((uint64_t*)dst)[6] = bval; ((uint64_t*)dst)[7] = bval; }
52
53
/* at 256 bytes, memset/memcpy will probably use SIMD more effectively than our uint64_t hack,
54
 * but hand-written assembly would beat it. */
55
0
static void copy256(uint8_t* dst, uint8_t* src) { memcpy(dst, src, 256); }
56
0
static void bcast256(uint8_t* dst, uint8_t val) { memset(dst, val, 256); }
57
58
namespace {
59
// file private namespace
60
61
/* Check whether 2 addresses point to the same column */
62
inline bool isEqualCol(int addrA, int addrB)
63
0
{
64
0
    return ((addrA ^ addrB) & (RASTER_SIZE - 1)) == 0;
65
0
}
66
67
/* Check whether 2 addresses point to the same row */
68
inline bool isEqualRow(int addrA, int addrB)
69
0
{
70
0
    return ((addrA ^ addrB) < RASTER_SIZE);
71
0
}
72
73
/* Check whether 2 addresses point to the same row or column */
74
inline bool isEqualRowOrCol(int addrA, int addrB)
75
0
{
76
0
    return isEqualCol(addrA, addrB) || isEqualRow(addrA, addrB);
77
0
}
78
79
/* Check whether one address points to the first column */
80
inline bool isZeroCol(int addr)
81
0
{
82
0
    return (addr & (RASTER_SIZE - 1)) == 0;
83
0
}
84
85
/* Check whether one address points to the first row */
86
inline bool isZeroRow(int addr)
87
0
{
88
0
    return (addr < RASTER_SIZE);
89
0
}
90
91
/* Check whether one address points to a column whose index is smaller than a given value */
92
inline bool lessThanCol(int addr, int val)
93
0
{
94
0
    return (addr & (RASTER_SIZE - 1)) < val;
95
0
}
96
97
/* Check whether one address points to a row whose index is smaller than a given value */
98
inline bool lessThanRow(int addr, int val)
99
0
{
100
    // addr / numUnits < val
101
0
    return (addr >> LOG2_RASTER_SIZE) < val;
102
0
}
103
104
inline MV scaleMv(MV mv, int scale)
105
0
{
106
0
    int mvx = x265_clip3(-32768, 32767, (scale * mv.x + 127 + (scale * mv.x < 0)) >> 8);
107
0
    int mvy = x265_clip3(-32768, 32767, (scale * mv.y + 127 + (scale * mv.y < 0)) >> 8);
108
109
0
    return MV((int32_t)mvx, (int32_t)mvy);
110
0
}
111
112
}
113
114
CUData::CUData()
115
0
{
116
0
    memset(this, 0, sizeof(*this));
117
0
}
118
119
void CUData::initialize(const CUDataMemPool& dataPool, uint32_t depth, const x265_param& param, int instance)
120
0
{
121
0
    int csp = param.internalCsp;
122
0
    m_chromaFormat  = csp;
123
0
    m_hChromaShift  = CHROMA_H_SHIFT(csp);
124
0
    m_vChromaShift  = CHROMA_V_SHIFT(csp);
125
0
    m_numPartitions = param.num4x4Partitions >> (depth * 2);
126
127
0
    if (!s_partSet[0])
128
0
    {
129
0
        s_numPartInCUSize = 1 << param.unitSizeDepth;
130
0
        switch (param.maxLog2CUSize)
131
0
        {
132
0
        case 6:
133
0
            s_partSet[0] = bcast256;
134
0
            s_partSet[1] = bcast64;
135
0
            s_partSet[2] = bcast16;
136
0
            s_partSet[3] = bcast4;
137
0
            s_partSet[4] = bcast1;
138
0
            break;
139
0
        case 5:
140
0
            s_partSet[0] = bcast64;
141
0
            s_partSet[1] = bcast16;
142
0
            s_partSet[2] = bcast4;
143
0
            s_partSet[3] = bcast1;
144
0
            s_partSet[4] = NULL;
145
0
            break;
146
0
        case 4:
147
0
            s_partSet[0] = bcast16;
148
0
            s_partSet[1] = bcast4;
149
0
            s_partSet[2] = bcast1;
150
0
            s_partSet[3] = NULL;
151
0
            s_partSet[4] = NULL;
152
0
            break;
153
0
        default:
154
0
            X265_CHECK(0, "unexpected CTU size\n");
155
0
            break;
156
0
        }
157
0
    }
158
159
0
    switch (m_numPartitions)
160
0
    {
161
0
    case 256: // 64x64 CU
162
0
        m_partCopy = copy256;
163
0
        m_partSet = bcast256;
164
0
        m_subPartCopy = copy64;
165
0
        m_subPartSet = bcast64;
166
0
        break;
167
0
    case 64:  // 32x32 CU
168
0
        m_partCopy = copy64;
169
0
        m_partSet = bcast64;
170
0
        m_subPartCopy = copy16;
171
0
        m_subPartSet = bcast16;
172
0
        break;
173
0
    case 16:  // 16x16 CU
174
0
        m_partCopy = copy16;
175
0
        m_partSet = bcast16;
176
0
        m_subPartCopy = copy4;
177
0
        m_subPartSet = bcast4;
178
0
        break;
179
0
    case 4:   // 8x8 CU
180
0
        m_partCopy = copy4;
181
0
        m_partSet = bcast4;
182
0
        m_subPartCopy = NULL;
183
0
        m_subPartSet = NULL;
184
0
        break;
185
0
    default:
186
0
        X265_CHECK(0, "unexpected CU partition count\n");
187
0
        break;
188
0
    }
189
190
0
    if (csp == X265_CSP_I400)
191
0
    {
192
        /* Each CU's data is layed out sequentially within the charMemBlock */
193
0
        uint8_t *charBuf = dataPool.charMemBlock + (m_numPartitions * (BytesPerPartition - 4)) * instance;
194
195
0
        m_qp        = (int8_t*)charBuf; charBuf += m_numPartitions;
196
0
        m_qpAnalysis = (int8_t*)charBuf; charBuf += m_numPartitions;
197
0
        m_log2CUSize         = charBuf; charBuf += m_numPartitions;
198
0
        m_lumaIntraDir       = charBuf; charBuf += m_numPartitions;
199
0
        m_tqBypass           = charBuf; charBuf += m_numPartitions;
200
0
        m_refIdx[0] = (int8_t*)charBuf; charBuf += m_numPartitions;
201
0
        m_refIdx[1] = (int8_t*)charBuf; charBuf += m_numPartitions;
202
0
        m_cuDepth            = charBuf; charBuf += m_numPartitions;
203
0
        m_predMode           = charBuf; charBuf += m_numPartitions; /* the order up to here is important in initCTU() and initSubCU() */
204
0
        m_partSize           = charBuf; charBuf += m_numPartitions;
205
0
        m_skipFlag[0]        = charBuf; charBuf += m_numPartitions;
206
0
        m_skipFlag[1]        = charBuf; charBuf += m_numPartitions;
207
0
        m_mergeFlag          = charBuf; charBuf += m_numPartitions;
208
0
        m_interDir           = charBuf; charBuf += m_numPartitions;
209
0
        m_mvpIdx[0]          = charBuf; charBuf += m_numPartitions;
210
0
        m_mvpIdx[1]          = charBuf; charBuf += m_numPartitions;
211
0
        m_tuDepth            = charBuf; charBuf += m_numPartitions;
212
0
        m_transformSkip[0]   = charBuf; charBuf += m_numPartitions;
213
0
        m_cbf[0]             = charBuf; charBuf += m_numPartitions;
214
0
        m_chromaIntraDir     = charBuf; charBuf += m_numPartitions;
215
216
0
        X265_CHECK(charBuf == dataPool.charMemBlock + (m_numPartitions * (BytesPerPartition - 4)) * (instance + 1), "CU data layout is broken\n"); //BytesPerPartition
217
218
0
        m_mv[0]  = dataPool.mvMemBlock + (instance * 4) * m_numPartitions;
219
0
        m_mv[1]  = m_mv[0] +  m_numPartitions;
220
0
        m_mvd[0] = m_mv[1] +  m_numPartitions;
221
0
        m_mvd[1] = m_mvd[0] + m_numPartitions;
222
223
0
        m_distortion = dataPool.distortionMemBlock + instance * m_numPartitions;
224
225
0
        uint32_t cuSize = param.maxCUSize >> depth;
226
0
        m_trCoeff[0] = dataPool.trCoeffMemBlock + instance * (cuSize * cuSize);
227
0
        m_trCoeff[1] = m_trCoeff[2] = 0;
228
0
        m_transformSkip[1] = m_transformSkip[2] = m_cbf[1] = m_cbf[2] = 0;
229
0
        m_fAc_den[0] = m_fDc_den[0] = 0;
230
0
    }
231
0
    else
232
0
    {
233
        /* Each CU's data is layed out sequentially within the charMemBlock */
234
0
        uint8_t *charBuf = dataPool.charMemBlock + (m_numPartitions * BytesPerPartition) * instance;
235
236
0
        m_qp        = (int8_t*)charBuf; charBuf += m_numPartitions;
237
0
        m_qpAnalysis = (int8_t*)charBuf; charBuf += m_numPartitions;
238
0
        m_log2CUSize         = charBuf; charBuf += m_numPartitions;
239
0
        m_lumaIntraDir       = charBuf; charBuf += m_numPartitions;
240
0
        m_tqBypass           = charBuf; charBuf += m_numPartitions;
241
0
        m_refIdx[0] = (int8_t*)charBuf; charBuf += m_numPartitions;
242
0
        m_refIdx[1] = (int8_t*)charBuf; charBuf += m_numPartitions;
243
0
        m_cuDepth            = charBuf; charBuf += m_numPartitions;
244
0
        m_predMode           = charBuf; charBuf += m_numPartitions; /* the order up to here is important in initCTU() and initSubCU() */
245
0
        m_partSize           = charBuf; charBuf += m_numPartitions;
246
0
        m_skipFlag[0]        = charBuf; charBuf += m_numPartitions;
247
0
        m_skipFlag[1]        = charBuf; charBuf += m_numPartitions;
248
0
        m_mergeFlag          = charBuf; charBuf += m_numPartitions;
249
0
        m_interDir           = charBuf; charBuf += m_numPartitions;
250
0
        m_mvpIdx[0]          = charBuf; charBuf += m_numPartitions;
251
0
        m_mvpIdx[1]          = charBuf; charBuf += m_numPartitions;
252
0
        m_tuDepth            = charBuf; charBuf += m_numPartitions;
253
0
        m_transformSkip[0]   = charBuf; charBuf += m_numPartitions;
254
0
        m_transformSkip[1]   = charBuf; charBuf += m_numPartitions;
255
0
        m_transformSkip[2]   = charBuf; charBuf += m_numPartitions;
256
0
        m_cbf[0]             = charBuf; charBuf += m_numPartitions;
257
0
        m_cbf[1]             = charBuf; charBuf += m_numPartitions;
258
0
        m_cbf[2]             = charBuf; charBuf += m_numPartitions;
259
0
        m_chromaIntraDir     = charBuf; charBuf += m_numPartitions;
260
261
0
        X265_CHECK(charBuf == dataPool.charMemBlock + (m_numPartitions * BytesPerPartition) * (instance + 1), "CU data layout is broken\n");
262
263
0
        m_mv[0]  = dataPool.mvMemBlock + (instance * 4) * m_numPartitions;
264
0
        m_mv[1]  = m_mv[0] +  m_numPartitions;
265
0
        m_mvd[0] = m_mv[1] +  m_numPartitions;
266
0
        m_mvd[1] = m_mvd[0] + m_numPartitions;
267
268
0
        m_distortion = dataPool.distortionMemBlock + instance * m_numPartitions;
269
270
0
        uint32_t cuSize = param.maxCUSize >> depth;
271
0
        uint32_t sizeL = cuSize * cuSize;
272
0
        uint32_t sizeC = sizeL >> (m_hChromaShift + m_vChromaShift); // block chroma part
273
0
        m_trCoeff[0] = dataPool.trCoeffMemBlock + instance * (sizeL + sizeC * 2);
274
0
        m_trCoeff[1] = m_trCoeff[0] + sizeL;
275
0
        m_trCoeff[2] = m_trCoeff[0] + sizeL + sizeC;
276
0
        for (int i = 0; i < 3; i++)
277
0
            m_fAc_den[i] = m_fDc_den[i] = 0;
278
0
    }
279
0
}
280
281
void CUData::initCTU(const Frame& frame, uint32_t cuAddr, int qp, uint32_t firstRowInSlice, uint32_t lastRowInSlice, uint32_t lastCuInSlice)
282
0
{
283
0
    m_encData       = frame.m_encData;
284
0
    m_slice         = m_encData->m_slice;
285
0
    m_cuAddr        = cuAddr;
286
0
    m_cuPelX        = (cuAddr % m_slice->m_sps->numCuInWidth) << m_slice->m_param->maxLog2CUSize;
287
0
    m_cuPelY        = (cuAddr / m_slice->m_sps->numCuInWidth) << m_slice->m_param->maxLog2CUSize;
288
0
    m_absIdxInCTU   = 0;
289
0
    m_numPartitions = m_encData->m_param->num4x4Partitions;
290
0
    m_bFirstRowInSlice = (uint8_t)firstRowInSlice;
291
0
    m_bLastRowInSlice  = (uint8_t)lastRowInSlice;
292
0
    m_bLastCuInSlice   = (uint8_t)lastCuInSlice;
293
#if ENABLE_SCC_EXT
294
    m_lastIntraBCMv[0].set(0, 0);
295
    m_lastIntraBCMv[1].set(0, 0);
296
#endif
297
298
    /* sequential memsets */
299
0
    m_partSet((uint8_t*)m_qp, (uint8_t)qp);
300
0
    m_partSet((uint8_t*)m_qpAnalysis, (uint8_t)qp);
301
0
    m_partSet(m_log2CUSize,   (uint8_t)m_slice->m_param->maxLog2CUSize);
302
0
    m_partSet(m_lumaIntraDir, (uint8_t)ALL_IDX);
303
0
    m_partSet(m_chromaIntraDir, (uint8_t)ALL_IDX);
304
0
    m_partSet(m_tqBypass,     (uint8_t)frame.m_encData->m_param->bLossless);
305
0
    if (m_slice->m_sliceType != I_SLICE)
306
0
    {
307
0
        m_partSet((uint8_t*)m_refIdx[0], (uint8_t)REF_NOT_VALID);
308
0
        m_partSet((uint8_t*)m_refIdx[1], (uint8_t)REF_NOT_VALID);
309
0
    }
310
311
0
    X265_CHECK(!(frame.m_encData->m_param->bLossless && !m_slice->m_pps->bTransquantBypassEnabled), "lossless enabled without TQbypass in PPS\n");
312
313
    /* initialize the remaining CU data in one memset */
314
0
    memset(m_cuDepth, 0, (frame.m_param->internalCsp == X265_CSP_I400 ? BytesPerPartition - 12 : BytesPerPartition - 8) * m_numPartitions);
315
316
0
    for (int8_t i = 0; i < NUM_TU_DEPTH; i++)
317
0
        m_refTuDepth[i] = -1;
318
319
0
    m_vbvAffected = false;
320
321
0
    uint32_t widthInCU = m_slice->m_sps->numCuInWidth;
322
0
    m_cuLeft = (m_cuAddr % widthInCU) ? m_encData->getPicCTU(m_cuAddr - 1) : NULL;
323
0
    m_cuAbove = (m_cuAddr >= widthInCU) && !m_bFirstRowInSlice ? m_encData->getPicCTU(m_cuAddr - widthInCU) : NULL;
324
0
    m_cuAboveLeft = (m_cuLeft && m_cuAbove) ? m_encData->getPicCTU(m_cuAddr - widthInCU - 1) : NULL;
325
0
    m_cuAboveRight = (m_cuAbove && ((m_cuAddr % widthInCU) < (widthInCU - 1))) ? m_encData->getPicCTU(m_cuAddr - widthInCU + 1) : NULL;
326
0
    memset(m_distortion, 0, m_numPartitions * sizeof(sse_t));
327
0
}
328
329
// initialize Sub partition
330
#if ENABLE_SCC_EXT
331
void CUData::initSubCU(const CUData& ctu, const CUGeom& cuGeom, int qp, MV lastIntraBCMv[2])
332
#else
333
void CUData::initSubCU(const CUData& ctu, const CUGeom& cuGeom, int qp)
334
#endif
335
0
{
336
0
    m_absIdxInCTU   = cuGeom.absPartIdx;
337
0
    m_encData       = ctu.m_encData;
338
0
    m_slice         = ctu.m_slice;
339
0
    m_cuAddr        = ctu.m_cuAddr;
340
0
    m_cuPelX        = ctu.m_cuPelX + g_zscanToPelX[cuGeom.absPartIdx];
341
0
    m_cuPelY        = ctu.m_cuPelY + g_zscanToPelY[cuGeom.absPartIdx];
342
0
    m_cuLeft        = ctu.m_cuLeft;
343
0
    m_cuAbove       = ctu.m_cuAbove;
344
0
    m_cuAboveLeft   = ctu.m_cuAboveLeft;
345
0
    m_cuAboveRight  = ctu.m_cuAboveRight;
346
0
    m_bFirstRowInSlice = ctu.m_bFirstRowInSlice;
347
0
    m_bLastRowInSlice = ctu.m_bLastRowInSlice;
348
0
    m_bLastCuInSlice = ctu.m_bLastCuInSlice;
349
0
    for (int i = 0; i < 3; i++)
350
0
    {
351
0
        m_fAc_den[i] = ctu.m_fAc_den[i];
352
0
        m_fDc_den[i] = ctu.m_fDc_den[i];
353
0
    }
354
355
0
    X265_CHECK(m_numPartitions == cuGeom.numPartitions, "initSubCU() size mismatch\n");
356
357
0
    m_partSet((uint8_t*)m_qp, (uint8_t)qp);
358
0
    m_partSet((uint8_t*)m_qpAnalysis, (uint8_t)qp);
359
360
0
    m_partSet(m_log2CUSize,   (uint8_t)cuGeom.log2CUSize);
361
0
    m_partSet(m_lumaIntraDir, (uint8_t)ALL_IDX);
362
0
    m_partSet(m_chromaIntraDir, (uint8_t)ALL_IDX);
363
0
    m_partSet(m_tqBypass,     (uint8_t)m_encData->m_param->bLossless);
364
0
    m_partSet((uint8_t*)m_refIdx[0], (uint8_t)REF_NOT_VALID);
365
0
    m_partSet((uint8_t*)m_refIdx[1], (uint8_t)REF_NOT_VALID);
366
0
    m_partSet(m_cuDepth,      (uint8_t)cuGeom.depth);
367
368
    /* initialize the remaining CU data in one memset */
369
0
    memset(m_predMode, 0, (ctu.m_chromaFormat == X265_CSP_I400 ? BytesPerPartition - 13 : BytesPerPartition - 9) * m_numPartitions);
370
0
    memset(m_distortion, 0, m_numPartitions * sizeof(sse_t));
371
372
#if ENABLE_SCC_EXT
373
    if (lastIntraBCMv)
374
    {
375
        for (int i = 0; i < 2; i++)
376
            m_lastIntraBCMv[i] = lastIntraBCMv[i];
377
    }
378
#endif
379
0
}
380
381
/* Copy the results of a sub-part (split) CU to the parent CU */
382
void CUData::copyPartFrom(const CUData& subCU, const CUGeom& childGeom, uint32_t subPartIdx)
383
0
{
384
0
    X265_CHECK(subPartIdx < 4, "part unit should be less than 4\n");
385
386
0
    uint32_t offset = childGeom.numPartitions * subPartIdx;
387
388
0
    m_bFirstRowInSlice = subCU.m_bFirstRowInSlice;
389
0
    m_bLastCuInSlice = subCU.m_bLastCuInSlice;
390
391
0
    m_subPartCopy((uint8_t*)m_qp + offset, (uint8_t*)subCU.m_qp);
392
0
    m_subPartCopy((uint8_t*)m_qpAnalysis + offset, (uint8_t*)subCU.m_qpAnalysis);
393
0
    m_subPartCopy(m_log2CUSize + offset, subCU.m_log2CUSize);
394
0
    m_subPartCopy(m_lumaIntraDir + offset, subCU.m_lumaIntraDir);
395
0
    m_subPartCopy(m_tqBypass + offset, subCU.m_tqBypass);
396
0
    m_subPartCopy((uint8_t*)m_refIdx[0] + offset, (uint8_t*)subCU.m_refIdx[0]);
397
0
    m_subPartCopy((uint8_t*)m_refIdx[1] + offset, (uint8_t*)subCU.m_refIdx[1]);
398
0
    m_subPartCopy(m_cuDepth + offset, subCU.m_cuDepth);
399
0
    m_subPartCopy(m_predMode + offset, subCU.m_predMode);
400
0
    m_subPartCopy(m_partSize + offset, subCU.m_partSize);
401
0
    m_subPartCopy(m_mergeFlag + offset, subCU.m_mergeFlag);
402
0
    m_subPartCopy(m_interDir + offset, subCU.m_interDir);
403
0
    m_subPartCopy(m_mvpIdx[0] + offset, subCU.m_mvpIdx[0]);
404
0
    m_subPartCopy(m_mvpIdx[1] + offset, subCU.m_mvpIdx[1]);
405
0
    m_subPartCopy(m_tuDepth + offset, subCU.m_tuDepth);
406
407
0
    m_subPartCopy(m_transformSkip[0] + offset, subCU.m_transformSkip[0]);
408
0
    m_subPartCopy(m_cbf[0] + offset, subCU.m_cbf[0]);
409
410
0
    memcpy(m_mv[0] + offset, subCU.m_mv[0], childGeom.numPartitions * sizeof(MV));
411
0
    memcpy(m_mv[1] + offset, subCU.m_mv[1], childGeom.numPartitions * sizeof(MV));
412
0
    memcpy(m_mvd[0] + offset, subCU.m_mvd[0], childGeom.numPartitions * sizeof(MV));
413
0
    memcpy(m_mvd[1] + offset, subCU.m_mvd[1], childGeom.numPartitions * sizeof(MV));
414
415
0
    memcpy(m_distortion + offset, subCU.m_distortion, childGeom.numPartitions * sizeof(sse_t));
416
417
0
    uint32_t tmp = 1 << ((m_slice->m_param->maxLog2CUSize - childGeom.depth) * 2);
418
0
    uint32_t tmp2 = subPartIdx * tmp;
419
0
    memcpy(m_trCoeff[0] + tmp2, subCU.m_trCoeff[0], sizeof(coeff_t)* tmp);
420
421
0
    if (subCU.m_chromaFormat != X265_CSP_I400)
422
0
    {
423
0
        m_subPartCopy(m_transformSkip[1] + offset, subCU.m_transformSkip[1]);
424
0
        m_subPartCopy(m_transformSkip[2] + offset, subCU.m_transformSkip[2]);
425
0
        m_subPartCopy(m_cbf[1] + offset, subCU.m_cbf[1]);
426
0
        m_subPartCopy(m_cbf[2] + offset, subCU.m_cbf[2]);
427
0
        m_subPartCopy(m_chromaIntraDir + offset, subCU.m_chromaIntraDir);
428
429
0
        uint32_t tmpC = tmp >> (m_hChromaShift + m_vChromaShift);
430
0
        uint32_t tmpC2 = tmp2 >> (m_hChromaShift + m_vChromaShift);
431
0
        memcpy(m_trCoeff[1] + tmpC2, subCU.m_trCoeff[1], sizeof(coeff_t) * tmpC);
432
0
        memcpy(m_trCoeff[2] + tmpC2, subCU.m_trCoeff[2], sizeof(coeff_t) * tmpC);
433
0
    }
434
#if ENABLE_SCC_EXT
435
    for (int i = 0; i < 2; i++)
436
        m_lastIntraBCMv[i] = subCU.m_lastIntraBCMv[i];
437
#endif
438
0
}
439
440
/* If a sub-CU part is not present (off the edge of the picture) its depth and
441
 * log2size should still be configured */
442
void CUData::setEmptyPart(const CUGeom& childGeom, uint32_t subPartIdx)
443
0
{
444
0
    uint32_t offset = childGeom.numPartitions * subPartIdx;
445
0
    m_subPartSet(m_cuDepth + offset, (uint8_t)childGeom.depth);
446
0
    m_subPartSet(m_log2CUSize + offset, (uint8_t)childGeom.log2CUSize);
447
0
}
448
449
/* Copy all CU data from one instance to the next, except set lossless flag
450
 * This will only get used when --cu-lossless is enabled but --lossless is not. */
451
void CUData::initLosslessCU(const CUData& cu, const CUGeom& cuGeom)
452
0
{
453
    /* Start by making an exact copy */
454
0
    m_encData      = cu.m_encData;
455
0
    m_slice        = cu.m_slice;
456
0
    m_cuAddr       = cu.m_cuAddr;
457
0
    m_cuPelX       = cu.m_cuPelX;
458
0
    m_cuPelY       = cu.m_cuPelY;
459
0
    m_cuLeft       = cu.m_cuLeft;
460
0
    m_cuAbove      = cu.m_cuAbove;
461
0
    m_cuAboveLeft  = cu.m_cuAboveLeft;
462
0
    m_cuAboveRight = cu.m_cuAboveRight;
463
0
    m_absIdxInCTU  = cuGeom.absPartIdx;
464
0
    m_numPartitions = cuGeom.numPartitions;
465
0
    memcpy(m_qp, cu.m_qp, BytesPerPartition * m_numPartitions);
466
0
    memcpy(m_mv[0],  cu.m_mv[0],  m_numPartitions * sizeof(MV));
467
0
    memcpy(m_mv[1],  cu.m_mv[1],  m_numPartitions * sizeof(MV));
468
0
    memcpy(m_mvd[0], cu.m_mvd[0], m_numPartitions * sizeof(MV));
469
0
    memcpy(m_mvd[1], cu.m_mvd[1], m_numPartitions * sizeof(MV));
470
0
    memcpy(m_distortion, cu.m_distortion, m_numPartitions * sizeof(sse_t));
471
472
    /* force TQBypass to true */
473
0
    m_partSet(m_tqBypass, true);
474
475
    /* clear residual coding flags */
476
0
    m_partSet(m_predMode, cu.m_predMode[0] & (MODE_INTRA | MODE_INTER));
477
0
    m_partSet(m_tuDepth, 0);
478
0
    m_partSet(m_cbf[0], 0);
479
0
    m_partSet(m_transformSkip[0], 0);
480
481
0
    if (cu.m_chromaFormat != X265_CSP_I400)
482
0
    {
483
0
        m_partSet(m_chromaIntraDir, (uint8_t)ALL_IDX);
484
0
        m_partSet(m_cbf[1], 0);
485
0
        m_partSet(m_cbf[2], 0);
486
0
        m_partSet(m_transformSkip[1], 0);
487
0
        m_partSet(m_transformSkip[2], 0);
488
0
    }
489
0
}
490
491
/* Copy completed predicted CU to CTU in picture */
492
void CUData::copyToPic(uint32_t depth) const
493
0
{
494
0
    CUData& ctu = *m_encData->getPicCTU(m_cuAddr);
495
496
0
    m_partCopy((uint8_t*)ctu.m_qp + m_absIdxInCTU, (uint8_t*)m_qp);
497
0
    m_partCopy((uint8_t*)ctu.m_qpAnalysis + m_absIdxInCTU, (uint8_t*)m_qpAnalysis);
498
0
    m_partCopy(ctu.m_log2CUSize + m_absIdxInCTU, m_log2CUSize);
499
0
    m_partCopy(ctu.m_lumaIntraDir + m_absIdxInCTU, m_lumaIntraDir);
500
0
    m_partCopy(ctu.m_tqBypass + m_absIdxInCTU, m_tqBypass);
501
0
    m_partCopy((uint8_t*)ctu.m_refIdx[0] + m_absIdxInCTU, (uint8_t*)m_refIdx[0]);
502
0
    m_partCopy((uint8_t*)ctu.m_refIdx[1] + m_absIdxInCTU, (uint8_t*)m_refIdx[1]);
503
0
    m_partCopy(ctu.m_cuDepth + m_absIdxInCTU, m_cuDepth);
504
0
    m_partCopy(ctu.m_predMode + m_absIdxInCTU, m_predMode);
505
0
    m_partCopy(ctu.m_partSize + m_absIdxInCTU, m_partSize);
506
0
    m_partCopy(ctu.m_mergeFlag + m_absIdxInCTU, m_mergeFlag);
507
0
    m_partCopy(ctu.m_interDir + m_absIdxInCTU, m_interDir);
508
0
    m_partCopy(ctu.m_mvpIdx[0] + m_absIdxInCTU, m_mvpIdx[0]);
509
0
    m_partCopy(ctu.m_mvpIdx[1] + m_absIdxInCTU, m_mvpIdx[1]);
510
0
    m_partCopy(ctu.m_tuDepth + m_absIdxInCTU, m_tuDepth);
511
0
    m_partCopy(ctu.m_transformSkip[0] + m_absIdxInCTU, m_transformSkip[0]);
512
0
    m_partCopy(ctu.m_cbf[0] + m_absIdxInCTU, m_cbf[0]);
513
514
0
    memcpy(ctu.m_mv[0] + m_absIdxInCTU, m_mv[0], m_numPartitions * sizeof(MV));
515
0
    memcpy(ctu.m_mv[1] + m_absIdxInCTU, m_mv[1], m_numPartitions * sizeof(MV));
516
0
    memcpy(ctu.m_mvd[0] + m_absIdxInCTU, m_mvd[0], m_numPartitions * sizeof(MV));
517
0
    memcpy(ctu.m_mvd[1] + m_absIdxInCTU, m_mvd[1], m_numPartitions * sizeof(MV));
518
519
0
    memcpy(ctu.m_distortion + m_absIdxInCTU, m_distortion, m_numPartitions * sizeof(sse_t));
520
521
0
    uint32_t tmpY = 1 << ((m_slice->m_param->maxLog2CUSize - depth) * 2);
522
0
    uint32_t tmpY2 = m_absIdxInCTU << (LOG2_UNIT_SIZE * 2);
523
0
    memcpy(ctu.m_trCoeff[0] + tmpY2, m_trCoeff[0], sizeof(coeff_t)* tmpY);
524
525
0
    if (ctu.m_chromaFormat != X265_CSP_I400)
526
0
    {
527
0
        m_partCopy(ctu.m_transformSkip[1] + m_absIdxInCTU, m_transformSkip[1]);
528
0
        m_partCopy(ctu.m_transformSkip[2] + m_absIdxInCTU, m_transformSkip[2]);
529
0
        m_partCopy(ctu.m_cbf[1] + m_absIdxInCTU, m_cbf[1]);
530
0
        m_partCopy(ctu.m_cbf[2] + m_absIdxInCTU, m_cbf[2]);
531
0
        m_partCopy(ctu.m_chromaIntraDir + m_absIdxInCTU, m_chromaIntraDir);
532
533
0
        uint32_t tmpC = tmpY >> (m_hChromaShift + m_vChromaShift);
534
0
        uint32_t tmpC2 = tmpY2 >> (m_hChromaShift + m_vChromaShift);
535
0
        memcpy(ctu.m_trCoeff[1] + tmpC2, m_trCoeff[1], sizeof(coeff_t) * tmpC);
536
0
        memcpy(ctu.m_trCoeff[2] + tmpC2, m_trCoeff[2], sizeof(coeff_t) * tmpC);
537
0
    }
538
0
}
539
540
/* The reverse of copyToPic, called only by encodeResidue */
541
void CUData::copyFromPic(const CUData& ctu, const CUGeom& cuGeom, int csp, bool copyQp)
542
0
{
543
0
    m_encData       = ctu.m_encData;
544
0
    m_slice         = ctu.m_slice;
545
0
    m_cuAddr        = ctu.m_cuAddr;
546
0
    m_cuPelX        = ctu.m_cuPelX + g_zscanToPelX[cuGeom.absPartIdx];
547
0
    m_cuPelY        = ctu.m_cuPelY + g_zscanToPelY[cuGeom.absPartIdx];
548
0
    m_absIdxInCTU   = cuGeom.absPartIdx;
549
0
    m_numPartitions = cuGeom.numPartitions;
550
551
    /* copy out all prediction info for this part */
552
0
    if (copyQp)
553
0
    {
554
0
        m_partCopy((uint8_t*)m_qp, (uint8_t*)ctu.m_qp + m_absIdxInCTU);
555
0
        m_partCopy((uint8_t*)m_qpAnalysis, (uint8_t*)ctu.m_qpAnalysis + m_absIdxInCTU);
556
0
    }
557
558
0
    m_partCopy(m_log2CUSize,   ctu.m_log2CUSize + m_absIdxInCTU);
559
0
    m_partCopy(m_lumaIntraDir, ctu.m_lumaIntraDir + m_absIdxInCTU);
560
0
    m_partCopy(m_tqBypass,     ctu.m_tqBypass + m_absIdxInCTU);
561
0
    m_partCopy((uint8_t*)m_refIdx[0], (uint8_t*)ctu.m_refIdx[0] + m_absIdxInCTU);
562
0
    m_partCopy((uint8_t*)m_refIdx[1], (uint8_t*)ctu.m_refIdx[1] + m_absIdxInCTU);
563
0
    m_partCopy(m_cuDepth,      ctu.m_cuDepth + m_absIdxInCTU);
564
0
    m_partSet(m_predMode, ctu.m_predMode[m_absIdxInCTU] & (MODE_INTRA | MODE_INTER)); /* clear skip flag */
565
0
    m_partCopy(m_partSize,     ctu.m_partSize + m_absIdxInCTU);
566
0
    m_partCopy(m_mergeFlag,    ctu.m_mergeFlag + m_absIdxInCTU);
567
0
    m_partCopy(m_interDir,     ctu.m_interDir + m_absIdxInCTU);
568
0
    m_partCopy(m_mvpIdx[0],    ctu.m_mvpIdx[0] + m_absIdxInCTU);
569
0
    m_partCopy(m_mvpIdx[1],    ctu.m_mvpIdx[1] + m_absIdxInCTU);
570
0
    m_partCopy(m_chromaIntraDir, ctu.m_chromaIntraDir + m_absIdxInCTU);
571
572
0
    memcpy(m_mv[0], ctu.m_mv[0] + m_absIdxInCTU, m_numPartitions * sizeof(MV));
573
0
    memcpy(m_mv[1], ctu.m_mv[1] + m_absIdxInCTU, m_numPartitions * sizeof(MV));
574
0
    memcpy(m_mvd[0], ctu.m_mvd[0] + m_absIdxInCTU, m_numPartitions * sizeof(MV));
575
0
    memcpy(m_mvd[1], ctu.m_mvd[1] + m_absIdxInCTU, m_numPartitions * sizeof(MV));
576
577
0
    memcpy(m_distortion, ctu.m_distortion + m_absIdxInCTU, m_numPartitions * sizeof(sse_t));
578
579
    /* clear residual coding flags */
580
0
    m_partSet(m_tuDepth, 0);
581
0
    m_partSet(m_transformSkip[0], 0);
582
0
    m_partSet(m_cbf[0], 0);
583
584
0
    if (csp != X265_CSP_I400)
585
0
    {        
586
0
        m_partSet(m_transformSkip[1], 0);
587
0
        m_partSet(m_transformSkip[2], 0);
588
0
        m_partSet(m_cbf[1], 0);
589
0
        m_partSet(m_cbf[2], 0);
590
0
    }
591
0
}
592
593
/* Only called by encodeResidue, these fields can be modified during inter/intra coding */
594
void CUData::updatePic(uint32_t depth, int picCsp) const
595
0
{
596
0
    CUData& ctu = *m_encData->getPicCTU(m_cuAddr);
597
598
0
    m_partCopy((uint8_t*)ctu.m_qp + m_absIdxInCTU, (uint8_t*)m_qp);
599
0
    m_partCopy((uint8_t*)ctu.m_qpAnalysis + m_absIdxInCTU, (uint8_t*)m_qpAnalysis);
600
0
    m_partCopy(ctu.m_transformSkip[0] + m_absIdxInCTU, m_transformSkip[0]);
601
0
    m_partCopy(ctu.m_predMode + m_absIdxInCTU, m_predMode);
602
0
    m_partCopy(ctu.m_tuDepth + m_absIdxInCTU, m_tuDepth);
603
0
    m_partCopy(ctu.m_cbf[0] + m_absIdxInCTU, m_cbf[0]);
604
605
0
    uint32_t tmpY = 1 << ((m_slice->m_param->maxLog2CUSize - depth) * 2);
606
0
    uint32_t tmpY2 = m_absIdxInCTU << (LOG2_UNIT_SIZE * 2);
607
0
    memcpy(ctu.m_trCoeff[0] + tmpY2, m_trCoeff[0], sizeof(coeff_t)* tmpY);
608
609
0
    if (ctu.m_chromaFormat != X265_CSP_I400 && picCsp != X265_CSP_I400)
610
0
    {
611
0
        m_partCopy(ctu.m_transformSkip[1] + m_absIdxInCTU, m_transformSkip[1]);
612
0
        m_partCopy(ctu.m_transformSkip[2] + m_absIdxInCTU, m_transformSkip[2]);
613
614
0
        m_partCopy(ctu.m_cbf[1] + m_absIdxInCTU, m_cbf[1]);
615
0
        m_partCopy(ctu.m_cbf[2] + m_absIdxInCTU, m_cbf[2]);
616
0
        m_partCopy(ctu.m_chromaIntraDir + m_absIdxInCTU, m_chromaIntraDir);
617
618
0
        tmpY  >>= m_hChromaShift + m_vChromaShift;
619
0
        tmpY2 >>= m_hChromaShift + m_vChromaShift;
620
0
        memcpy(ctu.m_trCoeff[1] + tmpY2, m_trCoeff[1], sizeof(coeff_t) * tmpY);
621
0
        memcpy(ctu.m_trCoeff[2] + tmpY2, m_trCoeff[2], sizeof(coeff_t) * tmpY);
622
0
    }
623
0
}
624
625
const CUData* CUData::getPULeft(uint32_t& lPartUnitIdx, uint32_t curPartUnitIdx) const
626
0
{
627
0
    uint32_t absPartIdx = g_zscanToRaster[curPartUnitIdx];
628
629
0
    if (!isZeroCol(absPartIdx))
630
0
    {
631
0
        uint32_t absZorderCUIdx   = g_zscanToRaster[m_absIdxInCTU];
632
0
        lPartUnitIdx = g_rasterToZscan[absPartIdx - 1];
633
0
        if (isEqualCol(absPartIdx, absZorderCUIdx))
634
0
            return m_encData->getPicCTU(m_cuAddr);
635
0
        else
636
0
        {
637
0
            lPartUnitIdx -= m_absIdxInCTU;
638
0
            return this;
639
0
        }
640
0
    }
641
642
0
    lPartUnitIdx = g_rasterToZscan[absPartIdx + s_numPartInCUSize - 1];
643
0
    return m_cuLeft;
644
0
}
645
646
const CUData* CUData::getPUAbove(uint32_t& aPartUnitIdx, uint32_t curPartUnitIdx) const
647
0
{
648
0
    uint32_t absPartIdx = g_zscanToRaster[curPartUnitIdx];
649
650
0
    if (!isZeroRow(absPartIdx))
651
0
    {
652
0
        uint32_t absZorderCUIdx = g_zscanToRaster[m_absIdxInCTU];
653
0
        aPartUnitIdx = g_rasterToZscan[absPartIdx - RASTER_SIZE];
654
0
        if (isEqualRow(absPartIdx, absZorderCUIdx))
655
0
            return m_encData->getPicCTU(m_cuAddr);
656
0
        else
657
0
            aPartUnitIdx -= m_absIdxInCTU;
658
0
        return this;
659
0
    }
660
661
0
    aPartUnitIdx = g_rasterToZscan[absPartIdx + ((s_numPartInCUSize - 1) << LOG2_RASTER_SIZE)];
662
0
    return m_cuAbove;
663
0
}
664
665
const CUData* CUData::getPUAboveLeft(uint32_t& alPartUnitIdx, uint32_t curPartUnitIdx) const
666
0
{
667
0
    uint32_t absPartIdx = g_zscanToRaster[curPartUnitIdx];
668
669
0
    if (!isZeroCol(absPartIdx))
670
0
    {
671
0
        if (!isZeroRow(absPartIdx))
672
0
        {
673
0
            uint32_t absZorderCUIdx  = g_zscanToRaster[m_absIdxInCTU];
674
0
            alPartUnitIdx = g_rasterToZscan[absPartIdx - RASTER_SIZE - 1];
675
0
            if (isEqualRowOrCol(absPartIdx, absZorderCUIdx))
676
0
                return m_encData->getPicCTU(m_cuAddr);
677
0
            else
678
0
            {
679
0
                alPartUnitIdx -= m_absIdxInCTU;
680
0
                return this;
681
0
            }
682
0
        }
683
0
        alPartUnitIdx = g_rasterToZscan[absPartIdx + ((s_numPartInCUSize - 1) << LOG2_RASTER_SIZE) - 1];
684
0
        return m_cuAbove;
685
0
    }
686
687
0
    if (!isZeroRow(absPartIdx))
688
0
    {
689
0
        alPartUnitIdx = g_rasterToZscan[absPartIdx - RASTER_SIZE + s_numPartInCUSize - 1];
690
0
        return m_cuLeft;
691
0
    }
692
693
0
    alPartUnitIdx = m_encData->m_param->num4x4Partitions - 1;
694
0
    return m_cuAboveLeft;
695
0
}
696
697
const CUData* CUData::getPUAboveRight(uint32_t& arPartUnitIdx, uint32_t curPartUnitIdx) const
698
0
{
699
0
    if ((m_encData->getPicCTU(m_cuAddr)->m_cuPelX + g_zscanToPelX[curPartUnitIdx] + UNIT_SIZE) >= m_slice->m_sps->picWidthInLumaSamples)
700
0
        return NULL;
701
702
0
    uint32_t absPartIdxRT = g_zscanToRaster[curPartUnitIdx];
703
704
0
    if (lessThanCol(absPartIdxRT, s_numPartInCUSize - 1))
705
0
    {
706
0
        if (!isZeroRow(absPartIdxRT))
707
0
        {
708
0
            if (curPartUnitIdx > g_rasterToZscan[absPartIdxRT - RASTER_SIZE + 1])
709
0
            {
710
0
                uint32_t absZorderCUIdx = g_zscanToRaster[m_absIdxInCTU] + (1 << (m_log2CUSize[0] - LOG2_UNIT_SIZE)) - 1;
711
0
                arPartUnitIdx = g_rasterToZscan[absPartIdxRT - RASTER_SIZE + 1];
712
0
                if (isEqualRowOrCol(absPartIdxRT, absZorderCUIdx))
713
0
                    return m_encData->getPicCTU(m_cuAddr);
714
0
                else
715
0
                {
716
0
                    arPartUnitIdx -= m_absIdxInCTU;
717
0
                    return this;
718
0
                }
719
0
            }
720
0
            return NULL;
721
0
        }
722
0
        arPartUnitIdx = g_rasterToZscan[absPartIdxRT + ((s_numPartInCUSize - 1) << LOG2_RASTER_SIZE) + 1];
723
0
        return m_cuAbove;
724
0
    }
725
726
0
    if (!isZeroRow(absPartIdxRT))
727
0
        return NULL;
728
729
0
    arPartUnitIdx = g_rasterToZscan[(s_numPartInCUSize - 1) << LOG2_RASTER_SIZE];
730
0
    return m_cuAboveRight;
731
0
}
732
733
const CUData* CUData::getPUBelowLeft(uint32_t& blPartUnitIdx, uint32_t curPartUnitIdx) const
734
0
{
735
0
    if ((m_encData->getPicCTU(m_cuAddr)->m_cuPelY + g_zscanToPelY[curPartUnitIdx] + UNIT_SIZE) >= m_slice->m_sps->picHeightInLumaSamples)
736
0
        return NULL;
737
738
0
    uint32_t absPartIdxLB = g_zscanToRaster[curPartUnitIdx];
739
740
0
    if (lessThanRow(absPartIdxLB, s_numPartInCUSize - 1))
741
0
    {
742
0
        if (!isZeroCol(absPartIdxLB))
743
0
        {
744
0
            if (curPartUnitIdx > g_rasterToZscan[absPartIdxLB + RASTER_SIZE - 1])
745
0
            {
746
0
                uint32_t absZorderCUIdxLB = g_zscanToRaster[m_absIdxInCTU] + (((1 << (m_log2CUSize[0] - LOG2_UNIT_SIZE)) - 1) << LOG2_RASTER_SIZE);
747
0
                blPartUnitIdx = g_rasterToZscan[absPartIdxLB + RASTER_SIZE - 1];
748
0
                if (isEqualRowOrCol(absPartIdxLB, absZorderCUIdxLB))
749
0
                    return m_encData->getPicCTU(m_cuAddr);
750
0
                else
751
0
                {
752
0
                    blPartUnitIdx -= m_absIdxInCTU;
753
0
                    return this;
754
0
                }
755
0
            }
756
0
            return NULL;
757
0
        }
758
0
        blPartUnitIdx = g_rasterToZscan[absPartIdxLB + RASTER_SIZE + s_numPartInCUSize - 1];
759
0
        return m_cuLeft;
760
0
    }
761
762
0
    return NULL;
763
0
}
764
765
const CUData* CUData::getPUBelowLeftAdi(uint32_t& blPartUnitIdx,  uint32_t curPartUnitIdx, uint32_t partUnitOffset) const
766
0
{
767
0
    if ((m_encData->getPicCTU(m_cuAddr)->m_cuPelY + g_zscanToPelY[curPartUnitIdx] + (partUnitOffset << LOG2_UNIT_SIZE)) >= m_slice->m_sps->picHeightInLumaSamples)
768
0
        return NULL;
769
770
0
    uint32_t absPartIdxLB = g_zscanToRaster[curPartUnitIdx];
771
772
0
    if (lessThanRow(absPartIdxLB, s_numPartInCUSize - partUnitOffset))
773
0
    {
774
0
        if (!isZeroCol(absPartIdxLB))
775
0
        {
776
0
            if (curPartUnitIdx > g_rasterToZscan[absPartIdxLB + (partUnitOffset << LOG2_RASTER_SIZE) - 1])
777
0
            {
778
0
                uint32_t absZorderCUIdxLB = g_zscanToRaster[m_absIdxInCTU] + (((1 << (m_log2CUSize[0] - LOG2_UNIT_SIZE)) - 1) << LOG2_RASTER_SIZE);
779
0
                blPartUnitIdx = g_rasterToZscan[absPartIdxLB + (partUnitOffset << LOG2_RASTER_SIZE) - 1];
780
0
                if (isEqualRowOrCol(absPartIdxLB, absZorderCUIdxLB))
781
0
                    return m_encData->getPicCTU(m_cuAddr);
782
0
                else
783
0
                {
784
0
                    blPartUnitIdx -= m_absIdxInCTU;
785
0
                    return this;
786
0
                }
787
0
            }
788
0
            return NULL;
789
0
        }
790
0
        blPartUnitIdx = g_rasterToZscan[absPartIdxLB + (partUnitOffset << LOG2_RASTER_SIZE) + s_numPartInCUSize - 1];
791
0
        return m_cuLeft;
792
0
    }
793
794
0
    return NULL;
795
0
}
796
797
const CUData* CUData::getPUAboveRightAdi(uint32_t& arPartUnitIdx, uint32_t curPartUnitIdx, uint32_t partUnitOffset) const
798
0
{
799
0
    if ((m_encData->getPicCTU(m_cuAddr)->m_cuPelX + g_zscanToPelX[curPartUnitIdx] + (partUnitOffset << LOG2_UNIT_SIZE)) >= m_slice->m_sps->picWidthInLumaSamples)
800
0
        return NULL;
801
802
0
    uint32_t absPartIdxRT = g_zscanToRaster[curPartUnitIdx];
803
804
0
    if (lessThanCol(absPartIdxRT, s_numPartInCUSize - partUnitOffset))
805
0
    {
806
0
        if (!isZeroRow(absPartIdxRT))
807
0
        {
808
0
            if (curPartUnitIdx > g_rasterToZscan[absPartIdxRT - RASTER_SIZE + partUnitOffset])
809
0
            {
810
0
                uint32_t absZorderCUIdx = g_zscanToRaster[m_absIdxInCTU] + (1 << (m_log2CUSize[0] - LOG2_UNIT_SIZE)) - 1;
811
0
                arPartUnitIdx = g_rasterToZscan[absPartIdxRT - RASTER_SIZE + partUnitOffset];
812
0
                if (isEqualRowOrCol(absPartIdxRT, absZorderCUIdx))
813
0
                    return m_encData->getPicCTU(m_cuAddr);
814
0
                else
815
0
                {
816
0
                    arPartUnitIdx -= m_absIdxInCTU;
817
0
                    return this;
818
0
                }
819
0
            }
820
0
            return NULL;
821
0
        }
822
0
        arPartUnitIdx = g_rasterToZscan[absPartIdxRT + ((s_numPartInCUSize - 1) << LOG2_RASTER_SIZE) + partUnitOffset];
823
0
        return m_cuAbove;
824
0
    }
825
826
0
    if (!isZeroRow(absPartIdxRT))
827
0
        return NULL;
828
829
0
    arPartUnitIdx = g_rasterToZscan[((s_numPartInCUSize - 1) << LOG2_RASTER_SIZE) + partUnitOffset - 1];
830
0
    return m_cuAboveRight;
831
0
}
832
833
/* Get left QpMinCu */
834
const CUData* CUData::getQpMinCuLeft(uint32_t& lPartUnitIdx, uint32_t curAbsIdxInCTU) const
835
0
{
836
0
    uint32_t absZorderQpMinCUIdx = curAbsIdxInCTU & (0xFF << (m_encData->m_param->unitSizeDepth - m_slice->m_pps->maxCuDQPDepth) * 2);
837
0
    uint32_t absRorderQpMinCUIdx = g_zscanToRaster[absZorderQpMinCUIdx];
838
839
    // check for left CTU boundary
840
0
    if (isZeroCol(absRorderQpMinCUIdx))
841
0
        return NULL;
842
843
    // get index of left-CU relative to top-left corner of current quantization group
844
0
    lPartUnitIdx = g_rasterToZscan[absRorderQpMinCUIdx - 1];
845
846
    // return pointer to current CTU
847
0
    return m_encData->getPicCTU(m_cuAddr);
848
0
}
849
850
/* Get above QpMinCu */
851
const CUData* CUData::getQpMinCuAbove(uint32_t& aPartUnitIdx, uint32_t curAbsIdxInCTU) const
852
0
{
853
0
    uint32_t absZorderQpMinCUIdx = curAbsIdxInCTU & (0xFF << (m_encData->m_param->unitSizeDepth - m_slice->m_pps->maxCuDQPDepth) * 2);
854
0
    uint32_t absRorderQpMinCUIdx = g_zscanToRaster[absZorderQpMinCUIdx];
855
856
    // check for top CTU boundary
857
0
    if (isZeroRow(absRorderQpMinCUIdx))
858
0
        return NULL;
859
860
    // get index of top-CU relative to top-left corner of current quantization group
861
0
    aPartUnitIdx = g_rasterToZscan[absRorderQpMinCUIdx - RASTER_SIZE];
862
863
    // return pointer to current CTU
864
0
    return m_encData->getPicCTU(m_cuAddr);
865
0
}
866
867
/* Get reference QP from left QpMinCu or latest coded QP */
868
int8_t CUData::getRefQP(uint32_t curAbsIdxInCTU) const
869
0
{
870
0
    uint32_t lPartIdx = 0, aPartIdx = 0;
871
0
    const CUData* cULeft = getQpMinCuLeft(lPartIdx, m_absIdxInCTU + curAbsIdxInCTU);
872
0
    const CUData* cUAbove = getQpMinCuAbove(aPartIdx, m_absIdxInCTU + curAbsIdxInCTU);
873
874
0
    return ((cULeft ? cULeft->m_qp[lPartIdx] : getLastCodedQP(curAbsIdxInCTU)) + (cUAbove ? cUAbove->m_qp[aPartIdx] : getLastCodedQP(curAbsIdxInCTU)) + 1) >> 1;
875
0
}
876
877
int CUData::getLastValidPartIdx(int absPartIdx) const
878
0
{
879
0
    int lastValidPartIdx = absPartIdx - 1;
880
881
0
    while (lastValidPartIdx >= 0 && m_predMode[lastValidPartIdx] == MODE_NONE)
882
0
    {
883
0
        uint32_t depth = m_cuDepth[lastValidPartIdx];
884
0
        lastValidPartIdx -= m_numPartitions >> (depth << 1);
885
0
    }
886
887
0
    return lastValidPartIdx;
888
0
}
889
890
int8_t CUData::getLastCodedQP(uint32_t absPartIdx) const
891
0
{
892
0
    uint32_t quPartIdxMask = 0xFF << (m_encData->m_param->unitSizeDepth - m_slice->m_pps->maxCuDQPDepth) * 2;
893
0
    int lastValidPartIdx = getLastValidPartIdx(absPartIdx & quPartIdxMask);
894
895
0
    if (lastValidPartIdx >= 0)
896
0
        return m_qp[lastValidPartIdx];
897
0
    else
898
0
    {
899
0
        if (m_absIdxInCTU)
900
0
            return m_encData->getPicCTU(m_cuAddr)->getLastCodedQP(m_absIdxInCTU);
901
0
        else if (m_cuAddr > 0 && !(m_slice->m_pps->bEntropyCodingSyncEnabled && !(m_cuAddr % m_slice->m_sps->numCuInWidth)))
902
0
            return m_encData->getPicCTU(m_cuAddr - 1)->getLastCodedQP(m_encData->m_param->num4x4Partitions);
903
0
        else
904
0
            return (int8_t)m_slice->m_sliceQp;
905
0
    }
906
0
}
907
908
/* Get allowed chroma intra modes */
909
void CUData::getAllowedChromaDir(uint32_t absPartIdx, uint32_t* modeList) const
910
0
{
911
0
    modeList[0] = PLANAR_IDX;
912
0
    modeList[1] = VER_IDX;
913
0
    modeList[2] = HOR_IDX;
914
0
    modeList[3] = DC_IDX;
915
0
    modeList[4] = DM_CHROMA_IDX;
916
917
0
    uint32_t lumaMode = m_lumaIntraDir[absPartIdx];
918
919
0
    for (int i = 0; i < NUM_CHROMA_MODE - 1; i++)
920
0
    {
921
0
        if (lumaMode == modeList[i])
922
0
        {
923
0
            modeList[i] = 34; // VER+8 mode
924
0
            break;
925
0
        }
926
0
    }
927
0
}
928
929
/* Get most probable intra modes */
930
int CUData::getIntraDirLumaPredictor(uint32_t absPartIdx, uint32_t* intraDirPred) const
931
0
{
932
0
    const CUData* tempCU;
933
0
    uint32_t tempPartIdx;
934
0
    uint32_t leftIntraDir, aboveIntraDir;
935
936
    // Get intra direction of left PU
937
0
    tempCU = getPULeft(tempPartIdx, m_absIdxInCTU + absPartIdx);
938
939
0
    leftIntraDir = (tempCU && tempCU->isIntra(tempPartIdx)) ? tempCU->m_lumaIntraDir[tempPartIdx] : DC_IDX;
940
941
    // Get intra direction of above PU
942
0
    tempCU = g_zscanToPelY[m_absIdxInCTU + absPartIdx] > 0 ? getPUAbove(tempPartIdx, m_absIdxInCTU + absPartIdx) : NULL;
943
944
0
    aboveIntraDir = (tempCU && tempCU->isIntra(tempPartIdx)) ? tempCU->m_lumaIntraDir[tempPartIdx] : DC_IDX;
945
946
0
    if (leftIntraDir == aboveIntraDir)
947
0
    {
948
0
        if (leftIntraDir >= 2) // angular modes
949
0
        {
950
0
            intraDirPred[0] = leftIntraDir;
951
0
            intraDirPred[1] = ((leftIntraDir - 2 + 31) & 31) + 2;
952
0
            intraDirPred[2] = ((leftIntraDir - 2 +  1) & 31) + 2;
953
0
        }
954
0
        else //non-angular
955
0
        {
956
0
            intraDirPred[0] = PLANAR_IDX;
957
0
            intraDirPred[1] = DC_IDX;
958
0
            intraDirPred[2] = VER_IDX;
959
0
        }
960
0
        return 1;
961
0
    }
962
0
    else
963
0
    {
964
0
        intraDirPred[0] = leftIntraDir;
965
0
        intraDirPred[1] = aboveIntraDir;
966
967
0
        if (leftIntraDir && aboveIntraDir) //both modes are non-planar
968
0
            intraDirPred[2] = PLANAR_IDX;
969
0
        else
970
0
            intraDirPred[2] =  (leftIntraDir + aboveIntraDir) < 2 ? VER_IDX : DC_IDX;
971
0
        return 2;
972
0
    }
973
0
}
974
975
uint32_t CUData::getCtxSplitFlag(uint32_t absPartIdx, uint32_t depth) const
976
0
{
977
0
    const CUData* tempCU;
978
0
    uint32_t    tempPartIdx;
979
0
    uint32_t    ctx;
980
981
    // Get left split flag
982
0
    tempCU = getPULeft(tempPartIdx, m_absIdxInCTU + absPartIdx);
983
0
    ctx  = (tempCU) ? ((tempCU->m_cuDepth[tempPartIdx] > depth) ? 1 : 0) : 0;
984
985
    // Get above split flag
986
0
    tempCU = getPUAbove(tempPartIdx, m_absIdxInCTU + absPartIdx);
987
0
    ctx += (tempCU) ? ((tempCU->m_cuDepth[tempPartIdx] > depth) ? 1 : 0) : 0;
988
989
0
    return ctx;
990
0
}
991
992
void CUData::getIntraTUQtDepthRange(uint32_t tuDepthRange[2], uint32_t absPartIdx) const
993
0
{
994
0
    uint32_t log2CUSize = m_log2CUSize[absPartIdx];
995
0
    uint32_t splitFlag = m_partSize[absPartIdx] != SIZE_2Nx2N;
996
997
0
    tuDepthRange[0] = m_slice->m_sps->quadtreeTULog2MinSize;
998
0
    tuDepthRange[1] = m_slice->m_sps->quadtreeTULog2MaxSize;
999
1000
0
    tuDepthRange[0] = x265_clip3(tuDepthRange[0], tuDepthRange[1], log2CUSize - (m_slice->m_sps->quadtreeTUMaxDepthIntra - 1 + splitFlag));
1001
0
}
1002
1003
void CUData::getInterTUQtDepthRange(uint32_t tuDepthRange[2], uint32_t absPartIdx) const
1004
0
{
1005
0
    uint32_t log2CUSize = m_log2CUSize[absPartIdx];
1006
0
    uint32_t quadtreeTUMaxDepth = m_slice->m_sps->quadtreeTUMaxDepthInter;
1007
0
    uint32_t splitFlag = quadtreeTUMaxDepth == 1 && m_partSize[absPartIdx] != SIZE_2Nx2N;
1008
1009
0
    tuDepthRange[0] = m_slice->m_sps->quadtreeTULog2MinSize;
1010
0
    tuDepthRange[1] = m_slice->m_sps->quadtreeTULog2MaxSize;
1011
1012
0
    tuDepthRange[0] = x265_clip3(tuDepthRange[0], tuDepthRange[1], log2CUSize - (quadtreeTUMaxDepth - 1 + splitFlag));
1013
0
}
1014
1015
uint32_t CUData::getCtxSkipFlag(uint32_t absPartIdx) const
1016
0
{
1017
0
    const CUData* tempCU;
1018
0
    uint32_t tempPartIdx;
1019
0
    uint32_t ctx;
1020
1021
    // Get BCBP of left PU
1022
0
    tempCU = getPULeft(tempPartIdx, m_absIdxInCTU + absPartIdx);
1023
0
    ctx    = tempCU ? tempCU->isSkipped(tempPartIdx) : 0;
1024
1025
    // Get BCBP of above PU
1026
0
    tempCU = getPUAbove(tempPartIdx, m_absIdxInCTU + absPartIdx);
1027
0
    ctx   += tempCU ? tempCU->isSkipped(tempPartIdx) : 0;
1028
1029
0
    return ctx;
1030
0
}
1031
1032
bool CUData::setQPSubCUs(int8_t qp, uint32_t absPartIdx, uint32_t depth)
1033
0
{
1034
0
    uint32_t curPartNumb = m_encData->m_param->num4x4Partitions >> (depth << 1);
1035
0
    uint32_t curPartNumQ = curPartNumb >> 2;
1036
1037
0
    if (m_cuDepth[absPartIdx] > depth)
1038
0
    {
1039
0
        for (uint32_t subPartIdx = 0; subPartIdx < 4; subPartIdx++)
1040
0
            if (setQPSubCUs(qp, absPartIdx + subPartIdx * curPartNumQ, depth + 1))
1041
0
                return true;
1042
0
    }
1043
0
    else
1044
0
    {
1045
0
        if (getQtRootCbf(absPartIdx))
1046
0
            return true;
1047
0
        else
1048
0
            setQPSubParts(qp, absPartIdx, depth);
1049
0
    }
1050
1051
0
    return false;
1052
0
}
1053
1054
void CUData::setPUInterDir(uint8_t dir, uint32_t absPartIdx, uint32_t puIdx)
1055
0
{
1056
0
    uint32_t curPartNumQ = m_numPartitions >> 2;
1057
0
    X265_CHECK(puIdx < 2, "unexpected part unit index\n");
1058
1059
0
    switch (m_partSize[absPartIdx])
1060
0
    {
1061
0
    case SIZE_2Nx2N:
1062
0
        memset(m_interDir + absPartIdx, dir, 4 * curPartNumQ);
1063
0
        break;
1064
0
    case SIZE_2NxN:
1065
0
        memset(m_interDir + absPartIdx, dir, 2 * curPartNumQ);
1066
0
        break;
1067
0
    case SIZE_Nx2N:
1068
0
        memset(m_interDir + absPartIdx, dir, curPartNumQ);
1069
0
        memset(m_interDir + absPartIdx + 2 * curPartNumQ, dir, curPartNumQ);
1070
0
        break;
1071
0
    case SIZE_NxN:
1072
0
        memset(m_interDir + absPartIdx, dir, curPartNumQ);
1073
0
        break;
1074
0
    case SIZE_2NxnU:
1075
0
        if (!puIdx)
1076
0
        {
1077
0
            memset(m_interDir + absPartIdx, dir, (curPartNumQ >> 1));
1078
0
            memset(m_interDir + absPartIdx + curPartNumQ, dir, (curPartNumQ >> 1));
1079
0
        }
1080
0
        else
1081
0
        {
1082
0
            memset(m_interDir + absPartIdx, dir, (curPartNumQ >> 1));
1083
0
            memset(m_interDir + absPartIdx + curPartNumQ, dir, ((curPartNumQ >> 1) + (curPartNumQ << 1)));
1084
0
        }
1085
0
        break;
1086
0
    case SIZE_2NxnD:
1087
0
        if (!puIdx)
1088
0
        {
1089
0
            memset(m_interDir + absPartIdx, dir, ((curPartNumQ << 1) + (curPartNumQ >> 1)));
1090
0
            memset(m_interDir + absPartIdx + (curPartNumQ << 1) + curPartNumQ, dir, (curPartNumQ >> 1));
1091
0
        }
1092
0
        else
1093
0
        {
1094
0
            memset(m_interDir + absPartIdx, dir, (curPartNumQ >> 1));
1095
0
            memset(m_interDir + absPartIdx + curPartNumQ, dir, (curPartNumQ >> 1));
1096
0
        }
1097
0
        break;
1098
0
    case SIZE_nLx2N:
1099
0
        if (!puIdx)
1100
0
        {
1101
0
            memset(m_interDir + absPartIdx, dir, (curPartNumQ >> 2));
1102
0
            memset(m_interDir + absPartIdx + (curPartNumQ >> 1), dir, (curPartNumQ >> 2));
1103
0
            memset(m_interDir + absPartIdx + (curPartNumQ << 1), dir, (curPartNumQ >> 2));
1104
0
            memset(m_interDir + absPartIdx + (curPartNumQ << 1) + (curPartNumQ >> 1), dir, (curPartNumQ >> 2));
1105
0
        }
1106
0
        else
1107
0
        {
1108
0
            memset(m_interDir + absPartIdx, dir, (curPartNumQ >> 2));
1109
0
            memset(m_interDir + absPartIdx + (curPartNumQ >> 1), dir, (curPartNumQ + (curPartNumQ >> 2)));
1110
0
            memset(m_interDir + absPartIdx + (curPartNumQ << 1), dir, (curPartNumQ >> 2));
1111
0
            memset(m_interDir + absPartIdx + (curPartNumQ << 1) + (curPartNumQ >> 1), dir, (curPartNumQ + (curPartNumQ >> 2)));
1112
0
        }
1113
0
        break;
1114
0
    case SIZE_nRx2N:
1115
0
        if (!puIdx)
1116
0
        {
1117
0
            memset(m_interDir + absPartIdx, dir, (curPartNumQ + (curPartNumQ >> 2)));
1118
0
            memset(m_interDir + absPartIdx + curPartNumQ + (curPartNumQ >> 1), dir, (curPartNumQ >> 2));
1119
0
            memset(m_interDir + absPartIdx + (curPartNumQ << 1), dir, (curPartNumQ + (curPartNumQ >> 2)));
1120
0
            memset(m_interDir + absPartIdx + (curPartNumQ << 1) + curPartNumQ + (curPartNumQ >> 1), dir, (curPartNumQ >> 2));
1121
0
        }
1122
0
        else
1123
0
        {
1124
0
            memset(m_interDir + absPartIdx, dir, (curPartNumQ >> 2));
1125
0
            memset(m_interDir + absPartIdx + (curPartNumQ >> 1), dir, (curPartNumQ >> 2));
1126
0
            memset(m_interDir + absPartIdx + (curPartNumQ << 1), dir, (curPartNumQ >> 2));
1127
0
            memset(m_interDir + absPartIdx + (curPartNumQ << 1) + (curPartNumQ >> 1), dir, (curPartNumQ >> 2));
1128
0
        }
1129
0
        break;
1130
0
    default:
1131
0
        X265_CHECK(0, "unexpected part type\n");
1132
0
        break;
1133
0
    }
1134
0
}
1135
1136
template<typename T>
1137
void CUData::setAllPU(T* p, const T& val, int absPartIdx, int puIdx)
1138
0
{
1139
0
    int i;
1140
1141
0
    p += absPartIdx;
1142
0
    int numElements = m_numPartitions;
1143
1144
0
    switch (m_partSize[absPartIdx])
1145
0
    {
1146
0
    case SIZE_2Nx2N:
1147
0
        for (i = 0; i < numElements; i++)
1148
0
            p[i] = val;
1149
0
        break;
1150
1151
0
    case SIZE_2NxN:
1152
0
        numElements >>= 1;
1153
0
        for (i = 0; i < numElements; i++)
1154
0
            p[i] = val;
1155
0
        break;
1156
1157
0
    case SIZE_Nx2N:
1158
0
        numElements >>= 2;
1159
0
        for (i = 0; i < numElements; i++)
1160
0
        {
1161
0
            p[i] = val;
1162
0
            p[i + 2 * numElements] = val;
1163
0
        }
1164
0
        break;
1165
1166
0
    case SIZE_2NxnU:
1167
0
    {
1168
0
        int curPartNumQ = numElements >> 2;
1169
0
        if (!puIdx)
1170
0
        {
1171
0
            T *pT  = p;
1172
0
            T *pT2 = p + curPartNumQ;
1173
0
            for (i = 0; i < (curPartNumQ >> 1); i++)
1174
0
            {
1175
0
                pT[i] = val;
1176
0
                pT2[i] = val;
1177
0
            }
1178
0
        }
1179
0
        else
1180
0
        {
1181
0
            T *pT  = p;
1182
0
            for (i = 0; i < (curPartNumQ >> 1); i++)
1183
0
                pT[i] = val;
1184
1185
0
            pT = p + curPartNumQ;
1186
0
            for (i = 0; i < ((curPartNumQ >> 1) + (curPartNumQ << 1)); i++)
1187
0
                pT[i] = val;
1188
0
        }
1189
0
        break;
1190
0
    }
1191
1192
0
    case SIZE_2NxnD:
1193
0
    {
1194
0
        int curPartNumQ = numElements >> 2;
1195
0
        if (!puIdx)
1196
0
        {
1197
0
            T *pT  = p;
1198
0
            for (i = 0; i < ((curPartNumQ >> 1) + (curPartNumQ << 1)); i++)
1199
0
                pT[i] = val;
1200
1201
0
            pT = p + (numElements - curPartNumQ);
1202
0
            for (i = 0; i < (curPartNumQ >> 1); i++)
1203
0
                pT[i] = val;
1204
0
        }
1205
0
        else
1206
0
        {
1207
0
            T *pT  = p;
1208
0
            T *pT2 = p + curPartNumQ;
1209
0
            for (i = 0; i < (curPartNumQ >> 1); i++)
1210
0
            {
1211
0
                pT[i] = val;
1212
0
                pT2[i] = val;
1213
0
            }
1214
0
        }
1215
0
        break;
1216
0
    }
1217
1218
0
    case SIZE_nLx2N:
1219
0
    {
1220
0
        int curPartNumQ = numElements >> 2;
1221
0
        if (!puIdx)
1222
0
        {
1223
0
            T *pT  = p;
1224
0
            T *pT2 = p + (curPartNumQ << 1);
1225
0
            T *pT3 = p + (curPartNumQ >> 1);
1226
0
            T *pT4 = p + (curPartNumQ << 1) + (curPartNumQ >> 1);
1227
1228
0
            for (i = 0; i < (curPartNumQ >> 2); i++)
1229
0
            {
1230
0
                pT[i] = val;
1231
0
                pT2[i] = val;
1232
0
                pT3[i] = val;
1233
0
                pT4[i] = val;
1234
0
            }
1235
0
        }
1236
0
        else
1237
0
        {
1238
0
            T *pT  = p;
1239
0
            T *pT2 = p + (curPartNumQ << 1);
1240
0
            for (i = 0; i < (curPartNumQ >> 2); i++)
1241
0
            {
1242
0
                pT[i] = val;
1243
0
                pT2[i] = val;
1244
0
            }
1245
1246
0
            pT  = p + (curPartNumQ >> 1);
1247
0
            pT2 = p + (curPartNumQ << 1) + (curPartNumQ >> 1);
1248
0
            for (i = 0; i < ((curPartNumQ >> 2) + curPartNumQ); i++)
1249
0
            {
1250
0
                pT[i] = val;
1251
0
                pT2[i] = val;
1252
0
            }
1253
0
        }
1254
0
        break;
1255
0
    }
1256
1257
0
    case SIZE_nRx2N:
1258
0
    {
1259
0
        int curPartNumQ = numElements >> 2;
1260
0
        if (!puIdx)
1261
0
        {
1262
0
            T *pT  = p;
1263
0
            T *pT2 = p + (curPartNumQ << 1);
1264
0
            for (i = 0; i < ((curPartNumQ >> 2) + curPartNumQ); i++)
1265
0
            {
1266
0
                pT[i] = val;
1267
0
                pT2[i] = val;
1268
0
            }
1269
1270
0
            pT  = p + curPartNumQ + (curPartNumQ >> 1);
1271
0
            pT2 = p + numElements - curPartNumQ + (curPartNumQ >> 1);
1272
0
            for (i = 0; i < (curPartNumQ >> 2); i++)
1273
0
            {
1274
0
                pT[i] = val;
1275
0
                pT2[i] = val;
1276
0
            }
1277
0
        }
1278
0
        else
1279
0
        {
1280
0
            T *pT  = p;
1281
0
            T *pT2 = p + (curPartNumQ >> 1);
1282
0
            T *pT3 = p + (curPartNumQ << 1);
1283
0
            T *pT4 = p + (curPartNumQ << 1) + (curPartNumQ >> 1);
1284
0
            for (i = 0; i < (curPartNumQ >> 2); i++)
1285
0
            {
1286
0
                pT[i] = val;
1287
0
                pT2[i] = val;
1288
0
                pT3[i] = val;
1289
0
                pT4[i] = val;
1290
0
            }
1291
0
        }
1292
0
        break;
1293
0
    }
1294
1295
0
    case SIZE_NxN:
1296
0
    default:
1297
0
        X265_CHECK(0, "unknown partition type\n");
1298
0
        break;
1299
0
    }
1300
0
}
Unexecuted instantiation: void x265::CUData::setAllPU<x265::MV>(x265::MV*, x265::MV const&, int, int)
Unexecuted instantiation: void x265::CUData::setAllPU<signed char>(signed char*, signed char const&, int, int)
1301
1302
void CUData::setPUMv(int list, const MV& mv, int absPartIdx, int puIdx)
1303
0
{
1304
0
    setAllPU(m_mv[list], mv, absPartIdx, puIdx);
1305
0
}
1306
1307
void CUData::setPURefIdx(int list, int8_t refIdx, int absPartIdx, int puIdx)
1308
0
{
1309
0
    setAllPU(m_refIdx[list], refIdx, absPartIdx, puIdx);
1310
0
}
1311
1312
void CUData::getPartIndexAndSize(uint32_t partIdx, uint32_t& outPartAddr, int& outWidth, int& outHeight) const
1313
0
{
1314
0
    int cuSize = 1 << m_log2CUSize[0];
1315
0
    int partType = m_partSize[0];
1316
1317
0
    int tmp = partTable[partType][partIdx][0];
1318
0
    outWidth = ((tmp >> 4) * cuSize) >> 2;
1319
0
    outHeight = ((tmp & 0xF) * cuSize) >> 2;
1320
0
    outPartAddr = (partAddrTable[partType][partIdx] * m_numPartitions) >> 4;
1321
0
}
1322
1323
void CUData::getMvField(const CUData* cu, uint32_t absPartIdx, int picList, MVField& outMvField) const
1324
0
{
1325
0
    if (cu)
1326
0
    {
1327
0
        outMvField.mv = cu->m_mv[picList][absPartIdx];
1328
0
        outMvField.refIdx = cu->m_refIdx[picList][absPartIdx];
1329
0
    }
1330
0
    else
1331
0
    {
1332
        // OUT OF BOUNDARY
1333
0
        outMvField.mv = 0;
1334
0
        outMvField.refIdx = REF_NOT_VALID;
1335
0
    }
1336
0
}
1337
1338
void CUData::deriveLeftRightTopIdx(uint32_t partIdx, uint32_t& partIdxLT, uint32_t& partIdxRT) const
1339
0
{
1340
0
    partIdxLT = m_absIdxInCTU;
1341
0
    partIdxRT = g_rasterToZscan[g_zscanToRaster[partIdxLT] + (1 << (m_log2CUSize[0] - LOG2_UNIT_SIZE)) - 1];
1342
1343
0
    switch (m_partSize[0])
1344
0
    {
1345
0
    case SIZE_2Nx2N: break;
1346
0
    case SIZE_2NxN:
1347
0
        partIdxLT += (partIdx == 0) ? 0 : m_numPartitions >> 1;
1348
0
        partIdxRT += (partIdx == 0) ? 0 : m_numPartitions >> 1;
1349
0
        break;
1350
0
    case SIZE_Nx2N:
1351
0
        partIdxLT += (partIdx == 0) ? 0 : m_numPartitions >> 2;
1352
0
        partIdxRT -= (partIdx == 1) ? 0 : m_numPartitions >> 2;
1353
0
        break;
1354
0
    case SIZE_NxN:
1355
0
        partIdxLT += (m_numPartitions >> 2) * partIdx;
1356
0
        partIdxRT +=  (m_numPartitions >> 2) * (partIdx - 1);
1357
0
        break;
1358
0
    case SIZE_2NxnU:
1359
0
        partIdxLT += (partIdx == 0) ? 0 : m_numPartitions >> 3;
1360
0
        partIdxRT += (partIdx == 0) ? 0 : m_numPartitions >> 3;
1361
0
        break;
1362
0
    case SIZE_2NxnD:
1363
0
        partIdxLT += (partIdx == 0) ? 0 : (m_numPartitions >> 1) + (m_numPartitions >> 3);
1364
0
        partIdxRT += (partIdx == 0) ? 0 : (m_numPartitions >> 1) + (m_numPartitions >> 3);
1365
0
        break;
1366
0
    case SIZE_nLx2N:
1367
0
        partIdxLT += (partIdx == 0) ? 0 : m_numPartitions >> 4;
1368
0
        partIdxRT -= (partIdx == 1) ? 0 : (m_numPartitions >> 2) + (m_numPartitions >> 4);
1369
0
        break;
1370
0
    case SIZE_nRx2N:
1371
0
        partIdxLT += (partIdx == 0) ? 0 : (m_numPartitions >> 2) + (m_numPartitions >> 4);
1372
0
        partIdxRT -= (partIdx == 1) ? 0 : m_numPartitions >> 4;
1373
0
        break;
1374
0
    default:
1375
0
        X265_CHECK(0, "unexpected part index\n");
1376
0
        break;
1377
0
    }
1378
0
}
1379
1380
uint32_t CUData::deriveLeftBottomIdx(uint32_t puIdx) const
1381
0
{
1382
0
    uint32_t outPartIdxLB;
1383
0
    outPartIdxLB = g_rasterToZscan[g_zscanToRaster[m_absIdxInCTU] + (((1 << (m_log2CUSize[0] - LOG2_UNIT_SIZE - 1)) - 1) << LOG2_RASTER_SIZE)];
1384
1385
0
    switch (m_partSize[0])
1386
0
    {
1387
0
    case SIZE_2Nx2N:
1388
0
        outPartIdxLB += m_numPartitions >> 1;
1389
0
        break;
1390
0
    case SIZE_2NxN:
1391
0
        outPartIdxLB += puIdx ? m_numPartitions >> 1 : 0;
1392
0
        break;
1393
0
    case SIZE_Nx2N:
1394
0
        outPartIdxLB += puIdx ? (m_numPartitions >> 2) * 3 : m_numPartitions >> 1;
1395
0
        break;
1396
0
    case SIZE_NxN:
1397
0
        outPartIdxLB += (m_numPartitions >> 2) * puIdx;
1398
0
        break;
1399
0
    case SIZE_2NxnU:
1400
0
        outPartIdxLB += puIdx ? m_numPartitions >> 1 : -((int)m_numPartitions >> 3);
1401
0
        break;
1402
0
    case SIZE_2NxnD:
1403
0
        outPartIdxLB += puIdx ? m_numPartitions >> 1 : (m_numPartitions >> 2) + (m_numPartitions >> 3);
1404
0
        break;
1405
0
    case SIZE_nLx2N:
1406
0
        outPartIdxLB += puIdx ? (m_numPartitions >> 1) + (m_numPartitions >> 4) : m_numPartitions >> 1;
1407
0
        break;
1408
0
    case SIZE_nRx2N:
1409
0
        outPartIdxLB += puIdx ? (m_numPartitions >> 1) + (m_numPartitions >> 2) + (m_numPartitions >> 4) : m_numPartitions >> 1;
1410
0
        break;
1411
0
    default:
1412
0
        X265_CHECK(0, "unexpected part index\n");
1413
0
        break;
1414
0
    }
1415
0
    return outPartIdxLB;
1416
0
}
1417
1418
/* Derives the partition index of neighboring bottom right block */
1419
uint32_t CUData::deriveRightBottomIdx(uint32_t puIdx) const
1420
0
{
1421
0
    uint32_t outPartIdxRB;
1422
0
    outPartIdxRB = g_rasterToZscan[g_zscanToRaster[m_absIdxInCTU] +
1423
0
                                   (((1 << (m_log2CUSize[0] - LOG2_UNIT_SIZE - 1)) - 1) << LOG2_RASTER_SIZE) +
1424
0
                                   (1 << (m_log2CUSize[0] - LOG2_UNIT_SIZE)) - 1];
1425
1426
0
    switch (m_partSize[0])
1427
0
    {
1428
0
    case SIZE_2Nx2N:
1429
0
        outPartIdxRB += m_numPartitions >> 1;
1430
0
        break;
1431
0
    case SIZE_2NxN:
1432
0
        outPartIdxRB += puIdx ? m_numPartitions >> 1 : 0;
1433
0
        break;
1434
0
    case SIZE_Nx2N:
1435
0
        outPartIdxRB += puIdx ? m_numPartitions >> 1 : m_numPartitions >> 2;
1436
0
        break;
1437
0
    case SIZE_NxN:
1438
0
        outPartIdxRB += (m_numPartitions >> 2) * (puIdx - 1);
1439
0
        break;
1440
0
    case SIZE_2NxnU:
1441
0
        outPartIdxRB += puIdx ? m_numPartitions >> 1 : -((int)m_numPartitions >> 3);
1442
0
        break;
1443
0
    case SIZE_2NxnD:
1444
0
        outPartIdxRB += puIdx ? m_numPartitions >> 1 : (m_numPartitions >> 2) + (m_numPartitions >> 3);
1445
0
        break;
1446
0
    case SIZE_nLx2N:
1447
0
        outPartIdxRB += puIdx ? m_numPartitions >> 1 : (m_numPartitions >> 3) + (m_numPartitions >> 4);
1448
0
        break;
1449
0
    case SIZE_nRx2N:
1450
0
        outPartIdxRB += puIdx ? m_numPartitions >> 1 : (m_numPartitions >> 2) + (m_numPartitions >> 3) + (m_numPartitions >> 4);
1451
0
        break;
1452
0
    default:
1453
0
        X265_CHECK(0, "unexpected part index\n");
1454
0
        break;
1455
0
    }
1456
0
    return outPartIdxRB;
1457
0
}
1458
1459
bool CUData::hasEqualMotion(uint32_t absPartIdx, const CUData& candCU, uint32_t candAbsPartIdx) const
1460
0
{
1461
0
    if (m_interDir[absPartIdx] != candCU.m_interDir[candAbsPartIdx])
1462
0
        return false;
1463
1464
0
    for (uint32_t refListIdx = 0; refListIdx < 2; refListIdx++)
1465
0
    {
1466
0
        if (m_interDir[absPartIdx] & (1 << refListIdx))
1467
0
        {
1468
0
            if (m_mv[refListIdx][absPartIdx] != candCU.m_mv[refListIdx][candAbsPartIdx] ||
1469
0
                m_refIdx[refListIdx][absPartIdx] != candCU.m_refIdx[refListIdx][candAbsPartIdx])
1470
0
                return false;
1471
0
        }
1472
0
    }
1473
1474
0
    return true;
1475
0
}
1476
1477
/* Construct list of merging candidates, returns count */
1478
uint32_t CUData::getInterMergeCandidates(uint32_t absPartIdx, uint32_t puIdx, MVField(*candMvField)[2], uint8_t* candDir) const
1479
0
{
1480
0
    uint32_t absPartAddr = m_absIdxInCTU + absPartIdx;
1481
0
    const bool isInterB = m_slice->isInterB();
1482
1483
0
    const uint32_t maxNumMergeCand = m_slice->m_maxNumMergeCand;
1484
1485
0
    for (uint32_t i = 0; i < maxNumMergeCand; ++i)
1486
0
    {
1487
0
        candMvField[i][0].mv = 0;
1488
0
        candMvField[i][1].mv = 0;
1489
0
        candMvField[i][0].refIdx = REF_NOT_VALID;
1490
0
        candMvField[i][1].refIdx = REF_NOT_VALID;
1491
0
    }
1492
1493
    /* calculate the location of upper-left corner pixel and size of the current PU */
1494
0
    int xP, yP, nPSW, nPSH;
1495
1496
0
    int cuSize = 1 << m_log2CUSize[0];
1497
0
    int partMode = m_partSize[0];
1498
1499
0
    int tmp = partTable[partMode][puIdx][0];
1500
0
    nPSW = ((tmp >> 4) * cuSize) >> 2;
1501
0
    nPSH = ((tmp & 0xF) * cuSize) >> 2;
1502
1503
0
    tmp = partTable[partMode][puIdx][1];
1504
0
    xP = ((tmp >> 4) * cuSize) >> 2;
1505
0
    yP = ((tmp & 0xF) * cuSize) >> 2;
1506
1507
0
    uint32_t count = 0;
1508
1509
0
    uint32_t partIdxLT, partIdxRT, partIdxLB = deriveLeftBottomIdx(puIdx);
1510
0
    PartSize curPS = (PartSize)m_partSize[absPartIdx];
1511
    
1512
    // left
1513
0
    uint32_t leftPartIdx = 0;
1514
0
    const CUData* cuLeft = getPULeft(leftPartIdx, partIdxLB);
1515
0
    bool isAvailableA1 = cuLeft &&
1516
0
        cuLeft->isDiffMER(xP - 1, yP + nPSH - 1, xP, yP) &&
1517
0
        !(puIdx == 1 && (curPS == SIZE_Nx2N || curPS == SIZE_nLx2N || curPS == SIZE_nRx2N)) &&
1518
0
        cuLeft->isInter(leftPartIdx);
1519
0
    if (isAvailableA1)
1520
0
    {
1521
        // get Inter Dir
1522
0
        candDir[count] = cuLeft->m_interDir[leftPartIdx];
1523
        // get Mv from Left
1524
0
        cuLeft->getMvField(cuLeft, leftPartIdx, 0, candMvField[count][0]);
1525
0
        if (isInterB)
1526
0
            cuLeft->getMvField(cuLeft, leftPartIdx, 1, candMvField[count][1]);
1527
1528
0
        if (++count == maxNumMergeCand)
1529
0
            return maxNumMergeCand;
1530
0
    }
1531
1532
0
    deriveLeftRightTopIdx(puIdx, partIdxLT, partIdxRT);
1533
1534
    // above
1535
0
    uint32_t abovePartIdx = 0;
1536
0
    const CUData* cuAbove = getPUAbove(abovePartIdx, partIdxRT);
1537
0
    bool isAvailableB1 = cuAbove &&
1538
0
        cuAbove->isDiffMER(xP + nPSW - 1, yP - 1, xP, yP) &&
1539
0
        !(puIdx == 1 && (curPS == SIZE_2NxN || curPS == SIZE_2NxnU || curPS == SIZE_2NxnD)) &&
1540
0
        cuAbove->isInter(abovePartIdx);
1541
0
    if (isAvailableB1 && (!isAvailableA1 || !cuLeft->hasEqualMotion(leftPartIdx, *cuAbove, abovePartIdx)))
1542
0
    {
1543
        // get Inter Dir
1544
0
        candDir[count] = cuAbove->m_interDir[abovePartIdx];
1545
        // get Mv from Left
1546
0
        cuAbove->getMvField(cuAbove, abovePartIdx, 0, candMvField[count][0]);
1547
0
        if (isInterB)
1548
0
            cuAbove->getMvField(cuAbove, abovePartIdx, 1, candMvField[count][1]);
1549
1550
0
        if (++count == maxNumMergeCand)
1551
0
            return maxNumMergeCand;
1552
0
    }
1553
1554
    // above right
1555
0
    uint32_t aboveRightPartIdx = 0;
1556
0
    const CUData* cuAboveRight = getPUAboveRight(aboveRightPartIdx, partIdxRT);
1557
0
    bool isAvailableB0 = cuAboveRight &&
1558
0
        cuAboveRight->isDiffMER(xP + nPSW, yP - 1, xP, yP) &&
1559
0
        cuAboveRight->isInter(aboveRightPartIdx);
1560
0
    if (isAvailableB0 && (!isAvailableB1 || !cuAbove->hasEqualMotion(abovePartIdx, *cuAboveRight, aboveRightPartIdx)))
1561
0
    {
1562
        // get Inter Dir
1563
0
        candDir[count] = cuAboveRight->m_interDir[aboveRightPartIdx];
1564
        // get Mv from Left
1565
0
        cuAboveRight->getMvField(cuAboveRight, aboveRightPartIdx, 0, candMvField[count][0]);
1566
0
        if (isInterB)
1567
0
            cuAboveRight->getMvField(cuAboveRight, aboveRightPartIdx, 1, candMvField[count][1]);
1568
1569
0
        if (++count == maxNumMergeCand)
1570
0
            return maxNumMergeCand;
1571
0
    }
1572
1573
    // left bottom
1574
0
    uint32_t leftBottomPartIdx = 0;
1575
0
    const CUData* cuLeftBottom = this->getPUBelowLeft(leftBottomPartIdx, partIdxLB);
1576
0
    bool isAvailableA0 = cuLeftBottom &&
1577
0
        cuLeftBottom->isDiffMER(xP - 1, yP + nPSH, xP, yP) &&
1578
0
        cuLeftBottom->isInter(leftBottomPartIdx);
1579
0
    if (isAvailableA0 && (!isAvailableA1 || !cuLeft->hasEqualMotion(leftPartIdx, *cuLeftBottom, leftBottomPartIdx)))
1580
0
    {
1581
        // get Inter Dir
1582
0
        candDir[count] = cuLeftBottom->m_interDir[leftBottomPartIdx];
1583
        // get Mv from Left
1584
0
        cuLeftBottom->getMvField(cuLeftBottom, leftBottomPartIdx, 0, candMvField[count][0]);
1585
0
        if (isInterB)
1586
0
            cuLeftBottom->getMvField(cuLeftBottom, leftBottomPartIdx, 1, candMvField[count][1]);
1587
1588
0
        if (++count == maxNumMergeCand)
1589
0
            return maxNumMergeCand;
1590
0
    }
1591
1592
    // above left
1593
0
    if (count < 4)
1594
0
    {
1595
0
        uint32_t aboveLeftPartIdx = 0;
1596
0
        const CUData* cuAboveLeft = getPUAboveLeft(aboveLeftPartIdx, absPartAddr);
1597
0
        bool isAvailableB2 = cuAboveLeft &&
1598
0
            cuAboveLeft->isDiffMER(xP - 1, yP - 1, xP, yP) &&
1599
0
            cuAboveLeft->isInter(aboveLeftPartIdx);
1600
0
        if (isAvailableB2 && (!isAvailableA1 || !cuLeft->hasEqualMotion(leftPartIdx, *cuAboveLeft, aboveLeftPartIdx))
1601
0
            && (!isAvailableB1 || !cuAbove->hasEqualMotion(abovePartIdx, *cuAboveLeft, aboveLeftPartIdx)))
1602
0
        {
1603
            // get Inter Dir
1604
0
            candDir[count] = cuAboveLeft->m_interDir[aboveLeftPartIdx];
1605
            // get Mv from Left
1606
0
            cuAboveLeft->getMvField(cuAboveLeft, aboveLeftPartIdx, 0, candMvField[count][0]);
1607
0
            if (isInterB)
1608
0
                cuAboveLeft->getMvField(cuAboveLeft, aboveLeftPartIdx, 1, candMvField[count][1]);
1609
1610
0
            if (++count == maxNumMergeCand)
1611
0
                return maxNumMergeCand;
1612
0
        }
1613
0
    }
1614
#if ENABLE_SCC_EXT
1615
    if (m_slice->m_bTemporalMvp)
1616
#else
1617
0
    if (m_slice->m_sps->bTemporalMVPEnabled)
1618
0
#endif
1619
0
    {
1620
0
        uint32_t partIdxRB = deriveRightBottomIdx(puIdx);
1621
0
        MV colmv;
1622
0
        int ctuIdx = -1;
1623
1624
        // image boundary check
1625
0
        if (m_encData->getPicCTU(m_cuAddr)->m_cuPelX + g_zscanToPelX[partIdxRB] + UNIT_SIZE < m_slice->m_sps->picWidthInLumaSamples &&
1626
0
            m_encData->getPicCTU(m_cuAddr)->m_cuPelY + g_zscanToPelY[partIdxRB] + UNIT_SIZE < m_slice->m_sps->picHeightInLumaSamples)
1627
0
        {
1628
0
            uint32_t absPartIdxRB = g_zscanToRaster[partIdxRB];
1629
0
            uint32_t numUnits = s_numPartInCUSize;
1630
0
            bool bNotLastCol = lessThanCol(absPartIdxRB, numUnits - 1); // is not at the last column of CTU
1631
0
            bool bNotLastRow = lessThanRow(absPartIdxRB, numUnits - 1); // is not at the last row    of CTU
1632
1633
0
            if (bNotLastCol && bNotLastRow)
1634
0
            {
1635
0
                absPartAddr = g_rasterToZscan[absPartIdxRB + RASTER_SIZE + 1];
1636
0
                ctuIdx = m_cuAddr;
1637
0
            }
1638
0
            else if (bNotLastCol)
1639
0
                absPartAddr = g_rasterToZscan[(absPartIdxRB + 1) & (numUnits - 1)];
1640
0
            else if (bNotLastRow)
1641
0
            {
1642
0
                absPartAddr = g_rasterToZscan[absPartIdxRB + RASTER_SIZE - numUnits + 1];
1643
0
                ctuIdx = m_cuAddr + 1;
1644
0
            }
1645
0
            else // is the right bottom corner of CTU
1646
0
                absPartAddr = 0;
1647
0
        }
1648
1649
0
        int maxList = isInterB ? 2 : 1;
1650
0
        int dir = 0, refIdx = 0;
1651
0
        for (int list = 0; list < maxList; list++)
1652
0
        {
1653
0
            bool bExistMV = ctuIdx >= 0 && getColMVP(colmv, refIdx, list, ctuIdx, absPartAddr);
1654
0
            if (!bExistMV)
1655
0
            {
1656
0
                uint32_t partIdxCenter = deriveCenterIdx(puIdx);
1657
0
                bExistMV = getColMVP(colmv, refIdx, list, m_cuAddr, partIdxCenter);
1658
0
            }
1659
0
            if (bExistMV)
1660
0
            {
1661
0
                dir |= (1 << list);
1662
0
                candMvField[count][list].mv = colmv;
1663
0
                candMvField[count][list].refIdx = refIdx;
1664
0
            }
1665
0
        }
1666
1667
0
        if (dir != 0)
1668
0
        {
1669
0
            candDir[count] = (uint8_t)dir;
1670
1671
0
            if (++count == maxNumMergeCand)
1672
0
                return maxNumMergeCand;
1673
0
        }
1674
0
    }
1675
1676
0
    if (isInterB)
1677
0
    {
1678
0
        const uint32_t cutoff = count * (count - 1);
1679
0
        uint32_t priorityList0 = 0xEDC984; // { 0, 1, 0, 2, 1, 2, 0, 3, 1, 3, 2, 3 }
1680
0
        uint32_t priorityList1 = 0xB73621; // { 1, 0, 2, 0, 2, 1, 3, 0, 3, 1, 3, 2 }
1681
1682
0
        for (uint32_t idx = 0; idx < cutoff; idx++, priorityList0 >>= 2, priorityList1 >>= 2)
1683
0
        {
1684
0
            int i = priorityList0 & 3;
1685
0
            int j = priorityList1 & 3;
1686
1687
0
            if ((candDir[i] & 0x1) && (candDir[j] & 0x2))
1688
0
            {
1689
                // get Mv from cand[i] and cand[j]
1690
0
                int refIdxL0 = candMvField[i][0].refIdx;
1691
0
                int refIdxL1 = candMvField[j][1].refIdx;
1692
0
                int refPOCL0 = m_slice->m_refPOCList[0][refIdxL0];
1693
0
                int refPOCL1 = m_slice->m_refPOCList[1][refIdxL1];
1694
0
                if (!(refPOCL0 == refPOCL1 && candMvField[i][0].mv == candMvField[j][1].mv))
1695
0
                {
1696
0
                    candMvField[count][0].mv = candMvField[i][0].mv;
1697
0
                    candMvField[count][0].refIdx = refIdxL0;
1698
0
                    candMvField[count][1].mv = candMvField[j][1].mv;
1699
0
                    candMvField[count][1].refIdx = refIdxL1;
1700
0
                    candDir[count] = 3;
1701
1702
0
                    if (++count == maxNumMergeCand)
1703
0
                        return maxNumMergeCand;
1704
0
                }
1705
0
            }
1706
0
        }
1707
0
    }
1708
0
    int numRefIdx0 = m_slice->m_numRefIdx[0];
1709
#if ENABLE_SCC_EXT
1710
    if (m_slice->m_param->bEnableSCC)
1711
        numRefIdx0--;
1712
#endif
1713
0
    int numRefIdx = (isInterB) ? X265_MIN(numRefIdx0, m_slice->m_numRefIdx[1]) : numRefIdx0;
1714
0
    int r = 0;
1715
0
    int refcnt = 0;
1716
0
    while (numRefIdx && (count < maxNumMergeCand))
1717
0
    {
1718
0
        candDir[count] = 1;
1719
0
        candMvField[count][0].mv.word = 0;
1720
0
        candMvField[count][0].refIdx = r;
1721
1722
0
        if (isInterB)
1723
0
        {
1724
0
            candDir[count] = 3;
1725
0
            candMvField[count][1].mv.word = 0;
1726
0
            candMvField[count][1].refIdx = r;
1727
0
        }
1728
1729
0
        count++;
1730
1731
0
        if (refcnt == numRefIdx - 1)
1732
0
            r = 0;
1733
0
        else
1734
0
        {
1735
0
            ++r;
1736
0
            ++refcnt;
1737
0
        }
1738
0
    }
1739
1740
0
    return count;
1741
0
}
1742
1743
// Create the PMV list. Called for each reference index.
1744
#if (ENABLE_MULTIVIEW || ENABLE_SCC_EXT)
1745
int CUData::getPMV(InterNeighbourMV* neighbours, uint32_t picList, uint32_t refIdx, MV* amvpCand, MV* pmv, uint32_t puIdx, uint32_t absPartIdx) const
1746
#else
1747
int CUData::getPMV(InterNeighbourMV* neighbours, uint32_t picList, uint32_t refIdx, MV* amvpCand, MV* pmv) const
1748
#endif
1749
0
{
1750
0
    MV directMV[MD_ABOVE_LEFT + 1];
1751
0
    MV indirectMV[MD_ABOVE_LEFT + 1];
1752
0
    bool validDirect[MD_ABOVE_LEFT + 1];
1753
0
    bool validIndirect[MD_ABOVE_LEFT + 1];
1754
1755
#if (ENABLE_MULTIVIEW || ENABLE_SCC_EXT)
1756
    if (m_slice->m_param->numViews > 1 || m_slice->m_param->bEnableSCC)
1757
    {
1758
        // Left candidate.
1759
        if ((neighbours + MD_BELOW_LEFT)->isAvailable || (neighbours + MD_LEFT)->isAvailable)
1760
        {
1761
            validIndirect[MD_ABOVE_RIGHT] = validIndirect[MD_ABOVE] = validIndirect[MD_ABOVE_LEFT] = false;
1762
1763
            validDirect[MD_BELOW_LEFT] = getDirectPMV(directMV[MD_BELOW_LEFT], neighbours + MD_BELOW_LEFT, picList, refIdx);
1764
            validDirect[MD_LEFT] = getDirectPMV(directMV[MD_LEFT], neighbours + MD_LEFT, picList, refIdx);
1765
1766
            validIndirect[MD_BELOW_LEFT] = getIndirectPMV(indirectMV[MD_BELOW_LEFT], neighbours + MD_BELOW_LEFT, picList, refIdx);
1767
            validIndirect[MD_LEFT] = getIndirectPMV(indirectMV[MD_LEFT], neighbours + MD_LEFT, picList, refIdx);
1768
        }
1769
1770
        // Top candidate.
1771
        validDirect[MD_ABOVE_RIGHT] = getDirectPMV(directMV[MD_ABOVE_RIGHT], neighbours + MD_ABOVE_RIGHT, picList, refIdx);
1772
        validDirect[MD_ABOVE] = getDirectPMV(directMV[MD_ABOVE], neighbours + MD_ABOVE, picList, refIdx);
1773
        validDirect[MD_ABOVE_LEFT] = getDirectPMV(directMV[MD_ABOVE_LEFT], neighbours + MD_ABOVE_LEFT, picList, refIdx);
1774
1775
        // Top candidate.
1776
        if (!((neighbours + MD_BELOW_LEFT)->isAvailable || (neighbours + MD_LEFT)->isAvailable))
1777
        {
1778
            validDirect[MD_BELOW_LEFT] = validDirect[MD_LEFT] = validIndirect[MD_BELOW_LEFT] = validIndirect[MD_LEFT] = false;
1779
            validIndirect[MD_ABOVE_RIGHT] = getIndirectPMV(indirectMV[MD_ABOVE_RIGHT], neighbours + MD_ABOVE_RIGHT, picList, refIdx);
1780
            validIndirect[MD_ABOVE] = getIndirectPMV(indirectMV[MD_ABOVE], neighbours + MD_ABOVE, picList, refIdx);
1781
            validIndirect[MD_ABOVE_LEFT] = getIndirectPMV(indirectMV[MD_ABOVE_LEFT], neighbours + MD_ABOVE_LEFT, picList, refIdx);
1782
        }
1783
    }
1784
    else
1785
#endif
1786
0
    {
1787
        // Left candidate.
1788
0
        validDirect[MD_BELOW_LEFT] = getDirectPMV(directMV[MD_BELOW_LEFT], neighbours + MD_BELOW_LEFT, picList, refIdx);
1789
0
        validDirect[MD_LEFT] = getDirectPMV(directMV[MD_LEFT], neighbours + MD_LEFT, picList, refIdx);
1790
        // Top candidate.
1791
0
        validDirect[MD_ABOVE_RIGHT] = getDirectPMV(directMV[MD_ABOVE_RIGHT], neighbours + MD_ABOVE_RIGHT, picList, refIdx);
1792
0
        validDirect[MD_ABOVE] = getDirectPMV(directMV[MD_ABOVE], neighbours + MD_ABOVE, picList, refIdx);
1793
0
        validDirect[MD_ABOVE_LEFT] = getDirectPMV(directMV[MD_ABOVE_LEFT], neighbours + MD_ABOVE_LEFT, picList, refIdx);
1794
1795
        // Left candidate.
1796
0
        validIndirect[MD_BELOW_LEFT] = getIndirectPMV(indirectMV[MD_BELOW_LEFT], neighbours + MD_BELOW_LEFT, picList, refIdx);
1797
0
        validIndirect[MD_LEFT] = getIndirectPMV(indirectMV[MD_LEFT], neighbours + MD_LEFT, picList, refIdx);
1798
        // Top candidate.
1799
0
        validIndirect[MD_ABOVE_RIGHT] = getIndirectPMV(indirectMV[MD_ABOVE_RIGHT], neighbours + MD_ABOVE_RIGHT, picList, refIdx);
1800
0
        validIndirect[MD_ABOVE] = getIndirectPMV(indirectMV[MD_ABOVE], neighbours + MD_ABOVE, picList, refIdx);
1801
0
        validIndirect[MD_ABOVE_LEFT] = getIndirectPMV(indirectMV[MD_ABOVE_LEFT], neighbours + MD_ABOVE_LEFT, picList, refIdx);
1802
0
    }
1803
1804
0
    int num = 0;
1805
    // Left predictor search
1806
0
    if (validDirect[MD_BELOW_LEFT])
1807
0
        amvpCand[num++] = directMV[MD_BELOW_LEFT];
1808
0
    else if (validDirect[MD_LEFT])
1809
0
        amvpCand[num++] = directMV[MD_LEFT];
1810
0
    else if (validIndirect[MD_BELOW_LEFT])
1811
0
        amvpCand[num++] = indirectMV[MD_BELOW_LEFT];
1812
0
    else if (validIndirect[MD_LEFT])
1813
0
        amvpCand[num++] = indirectMV[MD_LEFT];
1814
1815
0
    bool bAddedSmvp = num > 0;
1816
1817
    // Above predictor search
1818
0
    if (validDirect[MD_ABOVE_RIGHT])
1819
0
        amvpCand[num++] = directMV[MD_ABOVE_RIGHT];
1820
0
    else if (validDirect[MD_ABOVE])
1821
0
        amvpCand[num++] = directMV[MD_ABOVE];
1822
0
    else if (validDirect[MD_ABOVE_LEFT])
1823
0
        amvpCand[num++] = directMV[MD_ABOVE_LEFT];
1824
1825
0
    if (!bAddedSmvp)
1826
0
    {
1827
0
        if (validIndirect[MD_ABOVE_RIGHT])
1828
0
            amvpCand[num++] = indirectMV[MD_ABOVE_RIGHT];
1829
0
        else if (validIndirect[MD_ABOVE])
1830
0
            amvpCand[num++] = indirectMV[MD_ABOVE];
1831
0
        else if (validIndirect[MD_ABOVE_LEFT])
1832
0
            amvpCand[num++] = indirectMV[MD_ABOVE_LEFT];
1833
0
    }
1834
1835
0
    int numMvc = 0;
1836
0
    for (int dir = MD_LEFT; dir <= MD_ABOVE_LEFT; dir++)
1837
0
    {
1838
0
        if (validDirect[dir] && directMV[dir].notZero())
1839
0
            pmv[numMvc++] = directMV[dir];
1840
1841
0
        if (validIndirect[dir] && indirectMV[dir].notZero())
1842
0
            pmv[numMvc++] = indirectMV[dir];
1843
0
    }
1844
1845
0
    if (num == 2)
1846
0
        num -= amvpCand[0] == amvpCand[1];
1847
1848
    // Get the collocated candidate. At this step, either the first candidate
1849
    // was found or its value is 0.
1850
#if ENABLE_MULTIVIEW || ENABLE_SCC_EXT
1851
    if (m_slice->m_param->numViews > 1 || m_slice->m_param->bEnableSCC)
1852
    {
1853
        if (m_slice->m_bTemporalMvp && num < 2)
1854
        {
1855
            int refId = refIdx;
1856
            uint32_t absPartAddr = m_absIdxInCTU + absPartIdx;
1857
            uint32_t partIdxRB = deriveRightBottomIdx(puIdx);
1858
1859
            // co-located RightBottom temporal predictor (H)
1860
            int ctuIdx = -1;
1861
1862
            // image boundary check
1863
            if (m_encData->getPicCTU(m_cuAddr)->m_cuPelX + g_zscanToPelX[partIdxRB] + UNIT_SIZE < m_slice->m_sps->picWidthInLumaSamples &&
1864
                m_encData->getPicCTU(m_cuAddr)->m_cuPelY + g_zscanToPelY[partIdxRB] + UNIT_SIZE < m_slice->m_sps->picHeightInLumaSamples)
1865
            {
1866
                uint32_t absPartIdxRB = g_zscanToRaster[partIdxRB];
1867
                uint32_t numUnits = s_numPartInCUSize;
1868
                bool bNotLastCol = lessThanCol(absPartIdxRB, numUnits - 1); // is not at the last column of CTU
1869
                bool bNotLastRow = lessThanRow(absPartIdxRB, numUnits - 1); // is not at the last row    of CTU
1870
1871
                if (bNotLastCol && bNotLastRow)
1872
                {
1873
                    absPartAddr = g_rasterToZscan[absPartIdxRB + RASTER_SIZE + 1];
1874
                    ctuIdx = m_cuAddr;
1875
                }
1876
                else if (bNotLastCol)
1877
                    absPartAddr = g_rasterToZscan[(absPartIdxRB + 1) & (numUnits - 1)];
1878
                else if (bNotLastRow)
1879
                {
1880
                    absPartAddr = g_rasterToZscan[absPartIdxRB + RASTER_SIZE - numUnits + 1];
1881
                    ctuIdx = m_cuAddr + 1;
1882
                }
1883
                else // is the right bottom corner of CTU
1884
                    absPartAddr = 0;
1885
            }
1886
            if (ctuIdx >= 0 && getColMVP(neighbours[MD_COLLOCATED].mv[picList], refId, picList, ctuIdx, absPartAddr))
1887
                pmv[numMvc++] = amvpCand[num++] = neighbours[MD_COLLOCATED].mv[picList];
1888
            else
1889
            {
1890
                uint32_t partIdxCenter = deriveCenterIdx(puIdx);
1891
                uint32_t curCTUIdx = m_cuAddr;
1892
                if (getColMVP(neighbours[MD_COLLOCATED].mv[picList], refId, picList, curCTUIdx, partIdxCenter))
1893
                    pmv[numMvc++] = amvpCand[num++] = neighbours[MD_COLLOCATED].mv[picList];
1894
            }
1895
        }
1896
    }
1897
    else
1898
#endif
1899
0
    {
1900
0
        if (m_slice->m_sps->bTemporalMVPEnabled && num < 2)
1901
0
        {
1902
0
            int tempRefIdx = neighbours[MD_COLLOCATED].refIdx[picList];
1903
0
            if (tempRefIdx != -1)
1904
0
            {
1905
0
                uint32_t cuAddr = neighbours[MD_COLLOCATED].cuAddr[picList];
1906
0
                const Frame* colPic = m_slice->m_refFrameList[m_slice->isInterB() && !m_slice->m_colFromL0Flag][m_slice->m_colRefIdx];
1907
0
                const CUData* colCU = colPic->m_encData->getPicCTU(cuAddr);
1908
1909
                // Scale the vector
1910
0
                int colRefPOC = colCU->m_slice->m_refPOCList[tempRefIdx >> 4][tempRefIdx & 0xf];
1911
0
                int colPOC = colCU->m_slice->m_poc;
1912
1913
0
                int curRefPOC = m_slice->m_refPOCList[picList][refIdx];
1914
0
                int curPOC = m_slice->m_poc;
1915
1916
0
                pmv[numMvc++] = amvpCand[num++] = scaleMvByPOCDist(neighbours[MD_COLLOCATED].mv[picList], curPOC, curRefPOC, colPOC, colRefPOC);
1917
0
            }
1918
0
        }
1919
0
    }
1920
1921
0
    while (num < AMVP_NUM_CANDS)
1922
0
        amvpCand[num++].set(0, 0);
1923
1924
0
    return numMvc;
1925
0
}
1926
1927
/* Constructs a list of candidates for AMVP, and a larger list of motion candidates */
1928
void CUData::getNeighbourMV(uint32_t puIdx, uint32_t absPartIdx, InterNeighbourMV* neighbours) const
1929
0
{
1930
    // Set the temporal neighbour to unavailable by default.
1931
0
    neighbours[MD_COLLOCATED].unifiedRef = -1;
1932
1933
0
    uint32_t partIdxLT, partIdxRT, partIdxLB = deriveLeftBottomIdx(puIdx);
1934
0
    deriveLeftRightTopIdx(puIdx, partIdxLT, partIdxRT);
1935
1936
    // Load the spatial MVs.
1937
0
    getInterNeighbourMV(neighbours + MD_BELOW_LEFT, partIdxLB, MD_BELOW_LEFT);
1938
0
    getInterNeighbourMV(neighbours + MD_LEFT,       partIdxLB, MD_LEFT);
1939
0
    getInterNeighbourMV(neighbours + MD_ABOVE_RIGHT,partIdxRT, MD_ABOVE_RIGHT);
1940
0
    getInterNeighbourMV(neighbours + MD_ABOVE,      partIdxRT, MD_ABOVE);
1941
0
    getInterNeighbourMV(neighbours + MD_ABOVE_LEFT, partIdxLT, MD_ABOVE_LEFT);
1942
1943
0
    if (m_slice->m_bTemporalMvp && !(m_slice->m_param->bEnableSCC || m_slice->m_param->numViews > 1))
1944
0
    {
1945
0
        uint32_t absPartAddr = m_absIdxInCTU + absPartIdx;
1946
0
        uint32_t partIdxRB = deriveRightBottomIdx(puIdx);
1947
1948
        // co-located RightBottom temporal predictor (H)
1949
0
        int ctuIdx = -1;
1950
1951
        // image boundary check
1952
0
        if (m_encData->getPicCTU(m_cuAddr)->m_cuPelX + g_zscanToPelX[partIdxRB] + UNIT_SIZE < m_slice->m_sps->picWidthInLumaSamples &&
1953
0
            m_encData->getPicCTU(m_cuAddr)->m_cuPelY + g_zscanToPelY[partIdxRB] + UNIT_SIZE < m_slice->m_sps->picHeightInLumaSamples)
1954
0
        {
1955
0
            uint32_t absPartIdxRB = g_zscanToRaster[partIdxRB];
1956
0
            uint32_t numUnits = s_numPartInCUSize;
1957
0
            bool bNotLastCol = lessThanCol(absPartIdxRB, numUnits - 1); // is not at the last column of CTU
1958
0
            bool bNotLastRow = lessThanRow(absPartIdxRB, numUnits - 1); // is not at the last row    of CTU
1959
1960
0
            if (bNotLastCol && bNotLastRow)
1961
0
            {
1962
0
                absPartAddr = g_rasterToZscan[absPartIdxRB + RASTER_SIZE + 1];
1963
0
                ctuIdx = m_cuAddr;
1964
0
            }
1965
0
            else if (bNotLastCol)
1966
0
                absPartAddr = g_rasterToZscan[(absPartIdxRB + 1) & (numUnits - 1)];
1967
0
            else if (bNotLastRow)
1968
0
            {
1969
0
                absPartAddr = g_rasterToZscan[absPartIdxRB + RASTER_SIZE - numUnits + 1];
1970
0
                ctuIdx = m_cuAddr + 1;
1971
0
            }
1972
0
            else // is the right bottom corner of CTU
1973
0
                absPartAddr = 0;
1974
0
        }
1975
1976
0
        if (!(ctuIdx >= 0 && getCollocatedMV(ctuIdx, absPartAddr, neighbours + MD_COLLOCATED)))
1977
0
        {
1978
0
            uint32_t partIdxCenter =  deriveCenterIdx(puIdx);
1979
0
            uint32_t curCTUIdx = m_cuAddr;
1980
0
            getCollocatedMV(curCTUIdx, partIdxCenter, neighbours + MD_COLLOCATED);
1981
0
        }
1982
0
    }
1983
0
}
1984
1985
void CUData::getInterNeighbourMV(InterNeighbourMV *neighbour, uint32_t partUnitIdx, MVP_DIR dir) const
1986
0
{
1987
0
    const CUData* tmpCU = NULL;
1988
0
    uint32_t idx = 0;
1989
1990
0
    switch (dir)
1991
0
    {
1992
0
    case MD_LEFT:
1993
0
        tmpCU = getPULeft(idx, partUnitIdx);
1994
0
        break;
1995
0
    case MD_ABOVE:
1996
0
        tmpCU = getPUAbove(idx, partUnitIdx);
1997
0
        break;
1998
0
    case MD_ABOVE_RIGHT:
1999
0
        tmpCU = getPUAboveRight(idx, partUnitIdx);
2000
0
        break;
2001
0
    case MD_BELOW_LEFT:
2002
0
        tmpCU = getPUBelowLeft(idx, partUnitIdx);
2003
0
        break;
2004
0
    case MD_ABOVE_LEFT:
2005
0
        tmpCU = getPUAboveLeft(idx, partUnitIdx);
2006
0
        break;
2007
0
    default:
2008
0
        break;
2009
0
    }
2010
2011
0
    if (!tmpCU)
2012
0
    {
2013
        // Mark the PMV as unavailable.
2014
0
        for (int i = 0; i < 2; i++)
2015
0
            neighbour->refIdx[i] = -1;
2016
0
        neighbour->isAvailable = (tmpCU != NULL) && (tmpCU->isInter(idx));
2017
0
        return;
2018
0
    }
2019
2020
0
    for (int i = 0; i < 2; i++)
2021
0
    {
2022
        // Get the MV.
2023
0
        neighbour->mv[i] = tmpCU->m_mv[i][idx];
2024
2025
        // Get the reference idx.
2026
0
        neighbour->refIdx[i] = tmpCU->m_refIdx[i][idx];
2027
0
        neighbour->isAvailable = (tmpCU != NULL) && (tmpCU->isInter(idx));
2028
0
    }
2029
0
}
2030
2031
/* Clip motion vector to within slightly padded boundary of picture (the
2032
 * MV may reference a block that is completely within the padded area).
2033
 * Note this function is unaware of how much of this picture is actually
2034
 * available for use (re: frame parallelism) */
2035
void CUData::clipMv(MV& outMV) const
2036
0
{
2037
0
    const uint32_t mvshift = 2;
2038
0
    uint32_t offset = 8;
2039
2040
0
    int32_t xmax = (int32_t)((m_slice->m_sps->picWidthInLumaSamples + offset - m_cuPelX - 1) << mvshift);
2041
0
    int32_t xmin = -(int32_t)((m_encData->m_param->maxCUSize + offset + m_cuPelX - 1) << mvshift);
2042
2043
0
    int32_t ymax = (int32_t)((m_slice->m_sps->picHeightInLumaSamples + offset - m_cuPelY - 1) << mvshift);
2044
0
    int32_t ymin = -(int32_t)((m_encData->m_param->maxCUSize + offset + m_cuPelY - 1) << mvshift);
2045
2046
0
    outMV.x = X265_MIN(xmax, X265_MAX(xmin, outMV.x));
2047
0
    outMV.y = X265_MIN(ymax, X265_MAX(ymin, outMV.y));
2048
0
}
2049
2050
// Load direct spatial MV if available.
2051
bool CUData::getDirectPMV(MV& pmv, InterNeighbourMV *neighbours, uint32_t picList, uint32_t refIdx) const
2052
0
{
2053
0
    int curRefPOC = m_slice->m_refPOCList[picList][refIdx];
2054
0
    for (int i = 0; i < 2; i++, picList = !picList)
2055
0
    {
2056
0
        int partRefIdx = neighbours->refIdx[picList];
2057
0
        if (partRefIdx >= 0 && curRefPOC == m_slice->m_refPOCList[picList][partRefIdx])
2058
0
        {
2059
0
            pmv = neighbours->mv[picList];
2060
0
            return true;
2061
0
        }
2062
0
    }
2063
0
    return false;
2064
0
}
2065
2066
// Load indirect spatial MV if available. An indirect MV has to be scaled.
2067
bool CUData::getIndirectPMV(MV& outMV, InterNeighbourMV *neighbours, uint32_t picList, uint32_t refIdx) const
2068
0
{
2069
0
    int curPOC = m_slice->m_poc;
2070
0
    int neibPOC = curPOC;
2071
0
    int curRefPOC = m_slice->m_refPOCList[picList][refIdx];
2072
2073
0
    for (int i = 0; i < 2; i++, picList = !picList)
2074
0
    {
2075
0
        int partRefIdx = neighbours->refIdx[picList];
2076
0
        if (partRefIdx >= 0)
2077
0
        {
2078
0
            int neibRefPOC = m_slice->m_refPOCList[picList][partRefIdx];
2079
0
            MV mvp = neighbours->mv[picList];
2080
2081
#if ENABLE_MULTIVIEW || ENABLE_SCC_EXT
2082
            if ((curRefPOC == curPOC) == (neibRefPOC == curPOC))
2083
            {
2084
                if (curRefPOC == curPOC)
2085
                    outMV = mvp;
2086
                if (!(curRefPOC == curPOC))
2087
                    outMV = scaleMvByPOCDist(mvp, curPOC, curRefPOC, neibPOC, neibRefPOC);
2088
                return true;
2089
            }
2090
#else
2091
0
            outMV = scaleMvByPOCDist(mvp, curPOC, curRefPOC, neibPOC, neibRefPOC);
2092
0
            return true;
2093
0
#endif
2094
0
        }
2095
0
    }
2096
0
    return false;
2097
0
}
2098
2099
bool CUData::getColMVP(MV& outMV, int& outRefIdx, int picList, int cuAddr, int partUnitIdx) const
2100
0
{
2101
0
    const Frame* colPic = m_slice->m_refFrameList[m_slice->isInterB() && !m_slice->m_colFromL0Flag][m_slice->m_colRefIdx];
2102
0
    const CUData* colCU = colPic->m_encData->getPicCTU(cuAddr);
2103
2104
0
    uint32_t absPartAddr = partUnitIdx & TMVP_UNIT_MASK;
2105
0
    if (colCU->m_predMode[partUnitIdx] == MODE_NONE || colCU->isIntra(absPartAddr))
2106
0
        return false;
2107
2108
0
    int colRefPicList = m_slice->m_bCheckLDC ? picList : m_slice->m_colFromL0Flag;
2109
2110
0
    int colRefIdx = colCU->m_refIdx[colRefPicList][absPartAddr];
2111
2112
0
    if (colRefIdx < 0)
2113
0
    {
2114
0
        colRefPicList = !colRefPicList;
2115
0
        colRefIdx = colCU->m_refIdx[colRefPicList][absPartAddr];
2116
2117
0
        if (colRefIdx < 0)
2118
0
            return false;
2119
0
    }
2120
2121
    // Scale the vector
2122
0
    int colRefPOC = colCU->m_slice->m_refPOCList[colRefPicList][colRefIdx];
2123
0
    int colPOC = colCU->m_slice->m_poc;
2124
0
    MV colmv = colCU->m_mv[colRefPicList][absPartAddr];
2125
2126
0
    int curRefPOC = m_slice->m_refPOCList[picList][outRefIdx];
2127
0
    int curPOC = m_slice->m_poc;
2128
2129
#if ENABLE_MULTIVIEW || ENABLE_SCC_EXT
2130
    if ((colPOC == colRefPOC) != (curPOC == curRefPOC))
2131
        return false;
2132
    else if (curRefPOC == curPOC)
2133
        outMV = colmv;
2134
    else if (!(curRefPOC == curPOC))
2135
        outMV = scaleMvByPOCDist(colmv, curPOC, curRefPOC, colPOC, colRefPOC);
2136
#else
2137
0
    outMV = scaleMvByPOCDist(colmv, curPOC, curRefPOC, colPOC, colRefPOC);
2138
0
#endif
2139
0
    return true;
2140
0
}
2141
2142
// Cache the collocated MV.
2143
bool CUData::getCollocatedMV(int cuAddr, int partUnitIdx, InterNeighbourMV *neighbour) const
2144
0
{
2145
0
    const Frame* colPic = m_slice->m_refFrameList[m_slice->isInterB() && !m_slice->m_colFromL0Flag][m_slice->m_colRefIdx];
2146
0
    const CUData* colCU = colPic->m_encData->getPicCTU(cuAddr);
2147
2148
0
    uint32_t absPartAddr = partUnitIdx & TMVP_UNIT_MASK;
2149
0
    if (colCU->m_predMode[partUnitIdx] == MODE_NONE || colCU->isIntra(absPartAddr))
2150
0
        return false;
2151
2152
0
    for (int list = 0; list < 2; list++)
2153
0
    {
2154
0
        neighbour->cuAddr[list] = cuAddr;
2155
0
        int colRefPicList = m_slice->m_bCheckLDC ? list : m_slice->m_colFromL0Flag;
2156
0
        int colRefIdx = colCU->m_refIdx[colRefPicList][absPartAddr];
2157
2158
0
        if (colRefIdx < 0)
2159
0
            colRefPicList = !colRefPicList;
2160
2161
0
        neighbour->refIdx[list] = colCU->m_refIdx[colRefPicList][absPartAddr];
2162
0
        neighbour->refIdx[list] |= colRefPicList << 4;
2163
2164
0
        neighbour->mv[list] = colCU->m_mv[colRefPicList][absPartAddr];
2165
0
    }
2166
2167
0
    return neighbour->unifiedRef != -1;
2168
0
}
2169
2170
MV CUData::scaleMvByPOCDist(const MV& inMV, int curPOC, int curRefPOC, int colPOC, int colRefPOC) const
2171
0
{
2172
0
    int diffPocD = colPOC - colRefPOC;
2173
0
    int diffPocB = curPOC - curRefPOC;
2174
2175
0
    if (diffPocD == diffPocB)
2176
0
        return inMV;
2177
0
    else
2178
0
    {
2179
0
        int tdb   = x265_clip3(-128, 127, diffPocB);
2180
0
        int tdd   = x265_clip3(-128, 127, diffPocD);
2181
0
        int x     = (0x4000 + abs(tdd / 2)) / tdd;
2182
0
        int scale = x265_clip3(-4096, 4095, (tdb * x + 32) >> 6);
2183
0
        return scaleMv(inMV, scale);
2184
0
    }
2185
0
}
2186
2187
uint32_t CUData::deriveCenterIdx(uint32_t puIdx) const
2188
0
{
2189
0
    uint32_t absPartIdx;
2190
0
    int puWidth, puHeight;
2191
2192
0
    getPartIndexAndSize(puIdx, absPartIdx, puWidth, puHeight);
2193
2194
0
    return g_rasterToZscan[g_zscanToRaster[m_absIdxInCTU + absPartIdx]
2195
0
                           + ((puHeight >> (LOG2_UNIT_SIZE + 1)) << LOG2_RASTER_SIZE)
2196
0
                           + (puWidth  >> (LOG2_UNIT_SIZE + 1))];
2197
0
}
2198
2199
void CUData::getTUEntropyCodingParameters(TUEntropyCodingParameters &result, uint32_t absPartIdx, uint32_t log2TrSize, bool bIsLuma) const
2200
0
{
2201
0
    bool bIsIntra = isIntra(absPartIdx);
2202
2203
    // set the group layout
2204
0
    const uint32_t log2TrSizeCG = log2TrSize - 2;
2205
2206
    // set the scan orders
2207
0
    if (bIsIntra)
2208
0
    {
2209
0
        uint32_t dirMode;
2210
2211
0
        if (bIsLuma)
2212
0
            dirMode = m_lumaIntraDir[absPartIdx];
2213
0
        else
2214
0
        {
2215
0
            dirMode = m_chromaIntraDir[absPartIdx];
2216
0
            if (dirMode == DM_CHROMA_IDX)
2217
0
            {
2218
0
                dirMode = m_lumaIntraDir[(m_chromaFormat == X265_CSP_I444) ? absPartIdx : absPartIdx & 0xFC];
2219
0
                dirMode = (m_chromaFormat == X265_CSP_I422) ? g_chroma422IntraAngleMappingTable[dirMode] : dirMode;
2220
0
            }
2221
0
        }
2222
2223
0
        if (log2TrSize <= (MDCS_LOG2_MAX_SIZE - m_hChromaShift) || (bIsLuma && log2TrSize == MDCS_LOG2_MAX_SIZE))
2224
0
            result.scanType = dirMode >= 22 && dirMode <= 30 ? SCAN_HOR : dirMode >= 6 && dirMode <= 14 ? SCAN_VER : SCAN_DIAG;
2225
0
        else
2226
0
            result.scanType = SCAN_DIAG;
2227
0
    }
2228
0
    else
2229
0
        result.scanType = SCAN_DIAG;
2230
2231
0
    result.scan     = g_scanOrder[result.scanType][log2TrSize - 2];
2232
0
    result.scanCG   = g_scanOrderCG[result.scanType][log2TrSizeCG];
2233
2234
0
    if (log2TrSize == 2)
2235
0
        result.firstSignificanceMapContext = 0;
2236
0
    else if (log2TrSize == 3)
2237
0
        result.firstSignificanceMapContext = (result.scanType != SCAN_DIAG && bIsLuma) ? 15 : 9;
2238
0
    else
2239
0
        result.firstSignificanceMapContext = bIsLuma ? 21 : 12;
2240
0
}
2241
2242
0
#define CU_SET_FLAG(bitfield, flag, value) (bitfield) = ((bitfield) & (~(flag))) | ((~((value) - 1)) & (flag))
2243
2244
void CUData::calcCTUGeoms(uint32_t ctuWidth, uint32_t ctuHeight, uint32_t maxCUSize, uint32_t minCUSize, CUGeom cuDataArray[CUGeom::MAX_GEOMS])
2245
0
{
2246
0
    uint32_t num4x4Partition = (1U << ((g_log2Size[maxCUSize] - LOG2_UNIT_SIZE) << 1));
2247
2248
    // Initialize the coding blocks inside the CTB
2249
0
    for (uint32_t log2CUSize = g_log2Size[maxCUSize], rangeCUIdx = 0; log2CUSize >= g_log2Size[minCUSize]; log2CUSize--)
2250
0
    {
2251
0
        uint32_t blockSize = 1 << log2CUSize;
2252
0
        uint32_t sbWidth   = 1 << (g_log2Size[maxCUSize] - log2CUSize);
2253
0
        int32_t lastLevelFlag = log2CUSize == g_log2Size[minCUSize];
2254
2255
0
        for (uint32_t sbY = 0; sbY < sbWidth; sbY++)
2256
0
        {
2257
0
            for (uint32_t sbX = 0; sbX < sbWidth; sbX++)
2258
0
            {
2259
0
                uint32_t depthIdx = g_depthScanIdx[sbY][sbX];
2260
0
                uint32_t cuIdx = rangeCUIdx + depthIdx;
2261
0
                uint32_t childIdx = rangeCUIdx + sbWidth * sbWidth + (depthIdx << 2);
2262
0
                uint32_t px = sbX * blockSize;
2263
0
                uint32_t py = sbY * blockSize;
2264
0
                int32_t presentFlag = px < ctuWidth && py < ctuHeight;
2265
0
                int32_t splitMandatoryFlag = presentFlag && !lastLevelFlag && (px + blockSize > ctuWidth || py + blockSize > ctuHeight);
2266
                
2267
                /* Offset of the luma CU in the X, Y direction in terms of pixels from the CTU origin */
2268
0
                uint32_t xOffset = (sbX * blockSize) >> 3;
2269
0
                uint32_t yOffset = (sbY * blockSize) >> 3;
2270
0
                X265_CHECK(cuIdx < CUGeom::MAX_GEOMS, "CU geom index bug\n");
2271
2272
0
                CUGeom *cu = cuDataArray + cuIdx;
2273
0
                cu->log2CUSize = log2CUSize;
2274
0
                cu->childOffset = childIdx - cuIdx;
2275
0
                cu->absPartIdx = g_depthScanIdx[yOffset][xOffset] * 4;
2276
0
                cu->numPartitions = (num4x4Partition >> ((g_log2Size[maxCUSize] - cu->log2CUSize) * 2));
2277
0
                cu->depth = g_log2Size[maxCUSize] - log2CUSize;
2278
0
                cu->geomRecurId = cuIdx;
2279
2280
0
                cu->flags = 0;
2281
0
                CU_SET_FLAG(cu->flags, CUGeom::PRESENT, presentFlag);
2282
0
                CU_SET_FLAG(cu->flags, CUGeom::SPLIT_MANDATORY | CUGeom::SPLIT, splitMandatoryFlag);
2283
0
                CU_SET_FLAG(cu->flags, CUGeom::LEAF, lastLevelFlag);
2284
0
            }
2285
0
        }
2286
0
        rangeCUIdx += sbWidth * sbWidth;
2287
0
    }
2288
0
}
2289
2290
#if ENABLE_SCC_EXT
2291
bool CUData::getDerivedBV(uint32_t absPartIdx, const MV& currentMv, MV& derivedMv, uint32_t width, uint32_t height)
2292
{
2293
    const int   ctuWidth = m_slice->m_param->maxCUSize;
2294
    const int   ctuHeight = m_slice->m_param->maxCUSize;
2295
    int   cuPelX = m_cuPelX + (absPartIdx ? g_zscanToPelX[absPartIdx] : 0);
2296
    int   cuPelY = m_cuPelY + (absPartIdx ? g_zscanToPelX[absPartIdx] : 0);
2297
    int rngX = cuPelX + (currentMv.x >> 2);
2298
    int rngY = cuPelY + (currentMv.y >> 2);
2299
    uint32_t m_frameWidthInCtus = (m_slice->m_sps->picWidthInLumaSamples % ctuWidth) ? m_slice->m_sps->picWidthInLumaSamples / ctuWidth + 1 : m_slice->m_sps->picWidthInLumaSamples / ctuWidth;
2300
2301
    if (rngX < 0 || rngY < 0 || (rngX + width) > m_slice->m_sps->picWidthInLumaSamples || (rngY + height) > m_slice->m_sps->picHeightInLumaSamples)
2302
    {
2303
        return false;
2304
    }
2305
2306
    int refCtbAddr = (rngY / ctuHeight) * m_frameWidthInCtus + (rngX / ctuWidth);
2307
2308
    int      relCUPelX = rngX & (ctuWidth - 1);
2309
    int      relCUPelY = rngY & (ctuHeight - 1);
2310
    uint32_t absPartIdxDerived = g_rasterToZscan[((relCUPelY >> 2) << 4) + (relCUPelX >> 2)];
2311
    CUData* refCU = m_encData->getPicCTU(refCtbAddr);
2312
2313
    if (refCU->m_slice == NULL)
2314
        return false;
2315
2316
    MVField mv1;
2317
    refCU->getMvField(refCU, absPartIdxDerived, 0, mv1);
2318
2319
    int iCurrCtbAddr = (m_cuPelY / ctuHeight) * m_frameWidthInCtus + (m_cuPelX / ctuWidth);
2320
    uint32_t currAbsPartIdx = g_rasterToZscan[(((m_cuPelY & (ctuHeight - 1)) >> 2) << 4) + ((m_cuPelX & (ctuWidth - 1)) >> 2)];
2321
2322
    if ((refCtbAddr > iCurrCtbAddr) || ((refCtbAddr == iCurrCtbAddr) && (absPartIdxDerived >= currAbsPartIdx)))
2323
        return false;
2324
2325
    int refIdx = mv1.refIdx;
2326
    bool isIBC;
2327
    if (refCU->isIntra(absPartIdxDerived))
2328
    {
2329
        isIBC = false;
2330
    }
2331
    else
2332
    {
2333
        isIBC = (refIdx >= 0) ? (refCU->m_slice->m_refFrameList[0][refIdx]->m_poc == refCU->m_slice->m_poc) : 0;
2334
    }
2335
    derivedMv = mv1.mv;
2336
    derivedMv += currentMv;
2337
2338
    return isIBC;
2339
2340
}
2341
2342
bool CUData::isIntraBC(const CUData* cu, uint32_t absPartIdx) const
2343
{
2344
    if (cu->isIntra(absPartIdx))
2345
    {
2346
        return false;
2347
    }
2348
    MVField mv;
2349
    cu->getMvField(cu, absPartIdx, 0, mv);
2350
    int iRefIdx = mv.refIdx;
2351
    bool isNeighborIntraBC = (iRefIdx >= 0) ? (m_slice->m_refFrameList[0][iRefIdx]->m_poc == m_slice->m_poc) : false;
2352
2353
    return isNeighborIntraBC;
2354
}
2355
2356
bool CUData::getColMVPIBC(int ctuRsAddr, int partUnitIdx, MV& rcMv)
2357
{
2358
    uint32_t absPartAddr = partUnitIdx;
2359
2360
    // use coldir.
2361
    Frame* colPic = m_slice->m_lastEncPic;
2362
    if (!colPic)
2363
        return false;
2364
2365
    CUData* colCU = m_encData->getPicCTU(ctuRsAddr);
2366
    MVField tempMv;
2367
    colCU->getMvField(colCU, absPartAddr, 0, tempMv);
2368
    if (tempMv.refIdx == REF_NOT_VALID)
2369
        return false;
2370
2371
    rcMv = tempMv.mv;
2372
2373
    return true;
2374
}
2375
2376
void CUData::getIntraBCMVPsEncOnly(uint32_t absPartIdx, MV* MvPred, int& nbPred, int puIdx)
2377
{
2378
    uint32_t        tempPartIdx;
2379
    uint32_t        left, above;
2380
    MVField         tempMvField;
2381
2382
    int width, height;
2383
    getPartIndexAndSize(puIdx, absPartIdx, width, height);
2384
    uint32_t            numPartInCUWidth = s_numPartInCUSize;
2385
    uint32_t            m_numPartitionsInCtu = s_numPartInCUSize * s_numPartInCUSize;
2386
    uint32_t            m_frameWidthInCtus = (m_slice->m_sps->picWidthInLumaSamples % m_slice->m_param->maxCUSize) ? m_slice->m_sps->picWidthInLumaSamples / m_slice->m_param->maxCUSize + 1 : m_slice->m_sps->picWidthInLumaSamples / m_slice->m_param->maxCUSize;
2387
2388
    uint32_t            partIdxLT = m_absIdxInCTU;
2389
    uint32_t            partIdxLB = g_rasterToZscan[g_zscanToRaster[m_absIdxInCTU] + (((1 << (m_log2CUSize[0] - LOG2_UNIT_SIZE - 1)) - 1) << LOG2_RASTER_SIZE)];
2390
    uint32_t            partIdxRT = g_rasterToZscan[g_zscanToRaster[partIdxLT] + (1 << (m_log2CUSize[0] - LOG2_UNIT_SIZE)) - 1];
2391
2392
    left = above = 0;
2393
2394
    MvPred[0] = m_lastIntraBCMv[0];
2395
    if (MvPred[0] != MV(0, 0))
2396
    {
2397
        nbPred++;
2398
        if (getDerivedBV(absPartIdx, MvPred[nbPred - 1], MvPred[nbPred], width, height))
2399
            nbPred++;
2400
    }
2401
    MvPred[nbPred] = m_lastIntraBCMv[1];
2402
    if (MvPred[nbPred] != MV(0, 0))
2403
    {
2404
        nbPred++;
2405
        if (getDerivedBV(absPartIdx, MvPred[nbPred - 1], MvPred[nbPred], width, height))
2406
            nbPred++;
2407
    }
2408
2409
    //left
2410
    const CUData* leftCU = getPULeft(tempPartIdx, partIdxLB);
2411
    left = leftCU ? isIntraBC(leftCU, tempPartIdx) : 0;
2412
2413
    if (left)
2414
    {
2415
        leftCU->getMvField(leftCU, tempPartIdx, 0, tempMvField);
2416
        MvPred[nbPred++] = tempMvField.mv;
2417
        if (getDerivedBV(absPartIdx, MvPred[nbPred - 1], MvPred[nbPred], width, height))
2418
        {
2419
            nbPred++;
2420
        }
2421
    }
2422
2423
    //above
2424
    const CUData* aboveCU = getPUAbove(tempPartIdx, partIdxRT);
2425
    above = aboveCU ? isIntraBC(aboveCU, tempPartIdx) : 0;
2426
2427
    if (above)
2428
    {
2429
        aboveCU->getMvField(aboveCU, tempPartIdx, 0, tempMvField);
2430
        MvPred[nbPred++] = tempMvField.mv;
2431
        if (getDerivedBV(absPartIdx, MvPred[nbPred - 1], MvPred[nbPred], width, height))
2432
        {
2433
            nbPred++;
2434
        }
2435
    }
2436
2437
    if (m_slice->isOnlyCurrentPictureAsReference())
2438
    {
2439
        MV mvCol;
2440
        bool isColAvail = false;
2441
        if (m_absIdxInCTU && m_slice->m_lastEncPic && m_slice->m_lastEncPic->m_poc < m_slice->m_poc)
2442
        {
2443
            uint32_t partIdxRB;
2444
            partIdxRB = deriveRightBottomIdx(puIdx);
2445
2446
            uint32_t absPartIdxTmp = g_zscanToRaster[partIdxRB];
2447
            uint32_t absPartAddr = m_absIdxInCTU + absPartIdx;
2448
            int      iLCUIdx = -1;
2449
2450
            if (((m_encData->getPicCTU(m_cuAddr)->m_cuPelX + g_zscanToPelX[g_rasterToZscan[absPartIdxTmp]] + 4) < m_slice->m_sps->picWidthInLumaSamples)  // image boundary check
2451
                && ((m_encData->getPicCTU(m_cuAddr)->m_cuPelY + g_zscanToPelY[g_rasterToZscan[absPartIdxTmp]] + 4) < m_slice->m_sps->picHeightInLumaSamples))
2452
            {
2453
                if ((absPartIdxTmp % numPartInCUWidth < numPartInCUWidth - 1) &&           // is not at the last column of LCU
2454
                    (absPartIdxTmp / numPartInCUWidth < s_numPartInCUSize - 1)) // is not at the last row    of LCU
2455
                {
2456
                    absPartAddr = g_rasterToZscan[absPartIdxTmp + numPartInCUWidth + 1];
2457
                    iLCUIdx = m_cuAddr;
2458
                }
2459
                else if (absPartIdxTmp % numPartInCUWidth < numPartInCUWidth - 1)           // is not at the last column of CTU But is last row of CTU
2460
                {
2461
                    absPartAddr = g_rasterToZscan[(absPartIdxTmp + numPartInCUWidth + 1) % m_numPartitionsInCtu];
2462
                    iLCUIdx = m_cuAddr + m_frameWidthInCtus;
2463
                }
2464
                else if (absPartIdxTmp / numPartInCUWidth < s_numPartInCUSize - 1)          // is not at the last row of CTU But is last column of CTU
2465
                {
2466
                    absPartAddr = g_rasterToZscan[absPartIdxTmp + 1];
2467
                    iLCUIdx = m_cuAddr + 1;
2468
                }
2469
            }
2470
            if (iLCUIdx >= 0)
2471
            {
2472
                isColAvail = getColMVPIBC(iLCUIdx, absPartAddr, mvCol);
2473
2474
                if (!isColAvail)
2475
                {
2476
                    uint32_t uiPartIdxCenter;
2477
                    uiPartIdxCenter = deriveCenterIdx(puIdx);
2478
                    isColAvail = getColMVPIBC(m_cuAddr, uiPartIdxCenter, mvCol);
2479
                }
2480
            }
2481
        }
2482
        if (isColAvail)
2483
        {
2484
            MvPred[nbPred++] = mvCol;
2485
            if (getDerivedBV(absPartIdx, MvPred[nbPred - 1], MvPred[nbPred], width, height))
2486
            {
2487
                nbPred++;
2488
            }
2489
        }
2490
    }
2491
2492
    // Below Left predictor search
2493
    const CUData* tempBelowLeftCU = getPUBelowLeft(tempPartIdx, partIdxLB);
2494
    uint32_t belowLeft = (tempBelowLeftCU) ? tempBelowLeftCU->isIntraBC(tempBelowLeftCU, tempPartIdx) : 0;
2495
    if (belowLeft)
2496
    {
2497
        tempBelowLeftCU->getMvField(tempBelowLeftCU, tempPartIdx, 0, tempMvField);
2498
        MvPred[nbPred++] = tempMvField.mv;
2499
        if (getDerivedBV(absPartIdx, MvPred[nbPred - 1], MvPred[nbPred], width, height))
2500
        {
2501
            nbPred++;
2502
        }
2503
    }
2504
2505
    // Above Right predictor search
2506
    const CUData* tempAboveRightCU = getPUAboveRight(tempPartIdx, partIdxRT);
2507
    uint32_t aboveRight = (tempAboveRightCU) ? tempAboveRightCU->isIntraBC(tempAboveRightCU, tempPartIdx) : 0;
2508
    if (aboveRight)
2509
    {
2510
        tempAboveRightCU->getMvField(tempAboveRightCU, tempPartIdx, 0, tempMvField);
2511
        MvPred[nbPred++] = tempMvField.mv;
2512
        if (getDerivedBV(absPartIdx, MvPred[nbPred - 1], MvPred[nbPred], width, height))
2513
        {
2514
            nbPred++;
2515
        }
2516
    }
2517
2518
    // Above Left predictor search
2519
    const CUData* tempAboveLeftCU = getPUAboveLeft(tempPartIdx, partIdxLT);
2520
    uint32_t aboveLeft = (tempAboveLeftCU) ? tempAboveLeftCU->isIntraBC(tempAboveLeftCU, tempPartIdx) : 0;
2521
    if (aboveLeft)
2522
    {
2523
        tempAboveLeftCU->getMvField(tempAboveLeftCU, tempPartIdx, 0, tempMvField);
2524
        MvPred[nbPred++] = tempMvField.mv;
2525
        if (getDerivedBV(absPartIdx, MvPred[nbPred - 1], MvPred[nbPred], width, height))
2526
        {
2527
            nbPred++;
2528
        }
2529
    }
2530
}
2531
2532
void CUData::roundMergeCandidates(MVField(*pcMvFieldNeighbours)[2], int iCount) const
2533
{
2534
    if (m_slice->m_useIntegerMv)
2535
    {
2536
        for (int i = 0; i < iCount; i++)
2537
        {
2538
            pcMvFieldNeighbours[i][0].mv = (pcMvFieldNeighbours[i][0].mv >> 2) << 2;
2539
            pcMvFieldNeighbours[i][0].refIdx = pcMvFieldNeighbours[i][0].refIdx;
2540
        }
2541
    }
2542
    else
2543
    {
2544
        for (int i = 0; i < iCount; i++)
2545
        {
2546
            int iCurrRefIdx = pcMvFieldNeighbours[i][0].refIdx;
2547
            if (iCurrRefIdx >= 0)
2548
            {
2549
                if (m_slice->m_refFrameList[0][iCurrRefIdx]->m_poc == m_slice->m_poc)
2550
                {
2551
                    pcMvFieldNeighbours[i][0].mv = (pcMvFieldNeighbours[i][0].mv >> 2) << 2;
2552
                    pcMvFieldNeighbours[i][0].refIdx = pcMvFieldNeighbours[i][0].refIdx;
2553
                }
2554
            }
2555
        }
2556
    }
2557
}
2558
2559
bool CUData::is8x8BipredRestriction(MV mvL0, MV mvL1, int iRefIdxL0, int iRefIdxL1) const
2560
{
2561
    if (iRefIdxL0 < -1 || iRefIdxL0 >= MAX_NUM_REF)
2562
    {
2563
        iRefIdxL0 = -1;
2564
    }
2565
    if (iRefIdxL1 < -1 || iRefIdxL1 >= MAX_NUM_REF)
2566
    {
2567
        iRefIdxL1 = -1;
2568
    }
2569
    bool b8x8BiPredRestricted = false;
2570
    int RefPOCL0 = -1;
2571
    int RefPOCL1 = -1;
2572
    if (iRefIdxL0 >= 0 && iRefIdxL1 >= 0)
2573
    {
2574
        RefPOCL0 = m_slice->m_refPOCList[0][iRefIdxL0];
2575
        RefPOCL1 = m_slice->m_refPOCList[1][iRefIdxL1];
2576
        bool mvL0Int = (((mvL0.x & 0x3) == 0) && ((mvL0.y & 0x3) == 0));
2577
        bool mvL1Int = (((mvL1.x & 0x3) == 0) && ((mvL1.y & 0x3) == 0));
2578
        bool IdenticalMV = ((mvL0 == mvL1) && (RefPOCL0 == RefPOCL1));
2579
        b8x8BiPredRestricted = (
2580
            !mvL0Int && !mvL1Int && !IdenticalMV &&
2581
            (m_slice->m_param->bEnableSCC)
2582
            && (m_slice->m_bUseSao || !m_slice->m_pps->bPicDisableDeblockingFilter || 0));
2583
    }
2584
    return b8x8BiPredRestricted;
2585
}
2586
#endif