Coverage Report

Created: 2026-03-08 06:41

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/work/x265/source/common/cudata.cpp
Line
Count
Source
1
/*****************************************************************************
2
 * Copyright (C) 2013-2020 MulticoreWare, Inc
3
 *
4
 * Authors: Steve Borho <steve@borho.org>
5
 *          Min Chen <chenm003@163.com>
6
 *
7
 * This program is free software; you can redistribute it and/or modify
8
 * it under the terms of the GNU General Public License as published by
9
 * the Free Software Foundation; either version 2 of the License, or
10
 * (at your option) any later version.
11
 *
12
 * This program is distributed in the hope that it will be useful,
13
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15
 * GNU General Public License for more details.
16
 *
17
 * You should have received a copy of the GNU General Public License
18
 * along with this program; if not, write to the Free Software
19
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02111, USA.
20
 *
21
 * This program is also available under a commercial proprietary license.
22
 * For more information, contact us at license @ x265.com.
23
 *****************************************************************************/
24
25
#include "common.h"
26
#include "frame.h"
27
#include "framedata.h"
28
#include "picyuv.h"
29
#include "mv.h"
30
#include "cudata.h"
31
#define MAX_MV 1 << 14
32
33
using namespace X265_NS;
34
35
/* for all bcast* and copy* functions, dst and src are aligned to MIN(size, 32) */
36
37
13.1M
static void bcast1(uint8_t* dst, uint8_t val)  { dst[0] = val; }
38
39
13.1M
static void copy4(uint8_t* dst, uint8_t* src)  { ((uint32_t*)dst)[0] = ((uint32_t*)src)[0]; }
40
14.2M
static void bcast4(uint8_t* dst, uint8_t val)  { ((uint32_t*)dst)[0] = 0x01010101u * val; }
41
42
3.24M
static void copy16(uint8_t* dst, uint8_t* src) { ((uint64_t*)dst)[0] = ((uint64_t*)src)[0]; ((uint64_t*)dst)[1] = ((uint64_t*)src)[1]; }
43
2.99M
static void bcast16(uint8_t* dst, uint8_t val) { uint64_t bval = 0x0101010101010101ULL * val; ((uint64_t*)dst)[0] = bval; ((uint64_t*)dst)[1] = bval; }
44
45
827k
static void copy64(uint8_t* dst, uint8_t* src) { ((uint64_t*)dst)[0] = ((uint64_t*)src)[0]; ((uint64_t*)dst)[1] = ((uint64_t*)src)[1]; 
46
827k
                                                 ((uint64_t*)dst)[2] = ((uint64_t*)src)[2]; ((uint64_t*)dst)[3] = ((uint64_t*)src)[3];
47
827k
                                                 ((uint64_t*)dst)[4] = ((uint64_t*)src)[4]; ((uint64_t*)dst)[5] = ((uint64_t*)src)[5];
48
827k
                                                 ((uint64_t*)dst)[6] = ((uint64_t*)src)[6]; ((uint64_t*)dst)[7] = ((uint64_t*)src)[7]; }
49
650k
static void bcast64(uint8_t* dst, uint8_t val) { uint64_t bval = 0x0101010101010101ULL * val;
50
650k
                                                 ((uint64_t*)dst)[0] = bval; ((uint64_t*)dst)[1] = bval; ((uint64_t*)dst)[2] = bval; ((uint64_t*)dst)[3] = bval;
51
650k
                                                 ((uint64_t*)dst)[4] = bval; ((uint64_t*)dst)[5] = bval; ((uint64_t*)dst)[6] = bval; ((uint64_t*)dst)[7] = bval; }
52
53
/* at 256 bytes, memset/memcpy will probably use SIMD more effectively than our uint64_t hack,
54
 * but hand-written assembly would beat it. */
55
120k
static void copy256(uint8_t* dst, uint8_t* src) { memcpy(dst, src, 256); }
56
87.8k
static void bcast256(uint8_t* dst, uint8_t val) { memset(dst, val, 256); }
57
58
namespace {
59
// file private namespace
60
61
/* Check whether 2 addresses point to the same column */
62
inline bool isEqualCol(int addrA, int addrB)
63
36.7M
{
64
36.7M
    return ((addrA ^ addrB) & (RASTER_SIZE - 1)) == 0;
65
36.7M
}
66
67
/* Check whether 2 addresses point to the same row */
68
inline bool isEqualRow(int addrA, int addrB)
69
23.7M
{
70
23.7M
    return ((addrA ^ addrB) < RASTER_SIZE);
71
23.7M
}
72
73
/* Check whether 2 addresses point to the same row or column */
74
inline bool isEqualRowOrCol(int addrA, int addrB)
75
17.8M
{
76
17.8M
    return isEqualCol(addrA, addrB) || isEqualRow(addrA, addrB);
77
17.8M
}
78
79
/* Check whether one address points to the first column */
80
inline bool isZeroCol(int addr)
81
46.1M
{
82
46.1M
    return (addr & (RASTER_SIZE - 1)) == 0;
83
46.1M
}
84
85
/* Check whether one address points to the first row */
86
inline bool isZeroRow(int addr)
87
47.8M
{
88
47.8M
    return (addr < RASTER_SIZE);
89
47.8M
}
90
91
/* Check whether one address points to a column whose index is smaller than a given value */
92
inline bool lessThanCol(int addr, int val)
93
15.6M
{
94
15.6M
    return (addr & (RASTER_SIZE - 1)) < val;
95
15.6M
}
96
97
/* Check whether one address points to a row whose index is smaller than a given value */
98
inline bool lessThanRow(int addr, int val)
99
15.6M
{
100
    // addr / numUnits < val
101
15.6M
    return (addr >> LOG2_RASTER_SIZE) < val;
102
15.6M
}
103
104
inline MV scaleMv(MV mv, int scale)
105
0
{
106
0
    int mvx = x265_clip3(-32768, 32767, (scale * mv.x + 127 + (scale * mv.x < 0)) >> 8);
107
0
    int mvy = x265_clip3(-32768, 32767, (scale * mv.y + 127 + (scale * mv.y < 0)) >> 8);
108
109
0
    return MV((int32_t)mvx, (int32_t)mvy);
110
0
}
111
112
}
113
114
CUData::CUData()
115
1.20M
{
116
1.20M
    memset(this, 0, sizeof(*this));
117
1.20M
}
118
119
void CUData::initialize(const CUDataMemPool& dataPool, uint32_t depth, const x265_param& param, int instance)
120
1.03M
{
121
1.03M
    int csp = param.internalCsp;
122
1.03M
    m_chromaFormat  = csp;
123
1.03M
    m_hChromaShift  = CHROMA_H_SHIFT(csp);
124
1.03M
    m_vChromaShift  = CHROMA_V_SHIFT(csp);
125
1.03M
    m_numPartitions = param.num4x4Partitions >> (depth * 2);
126
127
1.03M
    if (!s_partSet[0])
128
1.03M
    {
129
1.03M
        s_numPartInCUSize = 1 << param.unitSizeDepth;
130
1.03M
        switch (param.maxLog2CUSize)
131
1.03M
        {
132
790k
        case 6:
133
790k
            s_partSet[0] = bcast256;
134
790k
            s_partSet[1] = bcast64;
135
790k
            s_partSet[2] = bcast16;
136
790k
            s_partSet[3] = bcast4;
137
790k
            s_partSet[4] = bcast1;
138
790k
            break;
139
89.7k
        case 5:
140
89.7k
            s_partSet[0] = bcast64;
141
89.7k
            s_partSet[1] = bcast16;
142
89.7k
            s_partSet[2] = bcast4;
143
89.7k
            s_partSet[3] = bcast1;
144
89.7k
            s_partSet[4] = NULL;
145
89.7k
            break;
146
151k
        case 4:
147
151k
            s_partSet[0] = bcast16;
148
151k
            s_partSet[1] = bcast4;
149
151k
            s_partSet[2] = bcast1;
150
151k
            s_partSet[3] = NULL;
151
151k
            s_partSet[4] = NULL;
152
151k
            break;
153
0
        default:
154
0
            X265_CHECK(0, "unexpected CTU size\n");
155
0
            break;
156
1.03M
        }
157
1.03M
    }
158
159
1.03M
    switch (m_numPartitions)
160
1.03M
    {
161
201k
    case 256: // 64x64 CU
162
201k
        m_partCopy = copy256;
163
201k
        m_partSet = bcast256;
164
201k
        m_subPartCopy = copy64;
165
201k
        m_subPartSet = bcast64;
166
201k
        break;
167
226k
    case 64:  // 32x32 CU
168
226k
        m_partCopy = copy64;
169
226k
        m_partSet = bcast64;
170
226k
        m_subPartCopy = copy16;
171
226k
        m_subPartSet = bcast16;
172
226k
        break;
173
305k
    case 16:  // 16x16 CU
174
305k
        m_partCopy = copy16;
175
305k
        m_partSet = bcast16;
176
305k
        m_subPartCopy = copy4;
177
305k
        m_subPartSet = bcast4;
178
305k
        break;
179
298k
    case 4:   // 8x8 CU
180
298k
        m_partCopy = copy4;
181
298k
        m_partSet = bcast4;
182
298k
        m_subPartCopy = NULL;
183
298k
        m_subPartSet = NULL;
184
298k
        break;
185
0
    default:
186
0
        X265_CHECK(0, "unexpected CU partition count\n");
187
0
        break;
188
1.03M
    }
189
190
1.03M
    if (csp == X265_CSP_I400)
191
0
    {
192
        /* Each CU's data is layed out sequentially within the charMemBlock */
193
0
        uint8_t *charBuf = dataPool.charMemBlock + (m_numPartitions * (BytesPerPartition - 4)) * instance;
194
195
0
        m_qp        = (int8_t*)charBuf; charBuf += m_numPartitions;
196
0
        m_qpAnalysis = (int8_t*)charBuf; charBuf += m_numPartitions;
197
0
        m_log2CUSize         = charBuf; charBuf += m_numPartitions;
198
0
        m_lumaIntraDir       = charBuf; charBuf += m_numPartitions;
199
0
        m_tqBypass           = charBuf; charBuf += m_numPartitions;
200
0
        m_refIdx[0] = (int8_t*)charBuf; charBuf += m_numPartitions;
201
0
        m_refIdx[1] = (int8_t*)charBuf; charBuf += m_numPartitions;
202
0
        m_cuDepth            = charBuf; charBuf += m_numPartitions;
203
0
        m_predMode           = charBuf; charBuf += m_numPartitions; /* the order up to here is important in initCTU() and initSubCU() */
204
0
        m_partSize           = charBuf; charBuf += m_numPartitions;
205
0
        m_skipFlag[0]        = charBuf; charBuf += m_numPartitions;
206
0
        m_skipFlag[1]        = charBuf; charBuf += m_numPartitions;
207
0
        m_mergeFlag          = charBuf; charBuf += m_numPartitions;
208
0
        m_interDir           = charBuf; charBuf += m_numPartitions;
209
0
        m_mvpIdx[0]          = charBuf; charBuf += m_numPartitions;
210
0
        m_mvpIdx[1]          = charBuf; charBuf += m_numPartitions;
211
0
        m_tuDepth            = charBuf; charBuf += m_numPartitions;
212
0
        m_transformSkip[0]   = charBuf; charBuf += m_numPartitions;
213
0
        m_cbf[0]             = charBuf; charBuf += m_numPartitions;
214
0
        m_chromaIntraDir     = charBuf; charBuf += m_numPartitions;
215
216
0
        X265_CHECK(charBuf == dataPool.charMemBlock + (m_numPartitions * (BytesPerPartition - 4)) * (instance + 1), "CU data layout is broken\n"); //BytesPerPartition
217
218
0
        m_mv[0]  = dataPool.mvMemBlock + (instance * 4) * m_numPartitions;
219
0
        m_mv[1]  = m_mv[0] +  m_numPartitions;
220
0
        m_mvd[0] = m_mv[1] +  m_numPartitions;
221
0
        m_mvd[1] = m_mvd[0] + m_numPartitions;
222
223
0
        m_distortion = dataPool.distortionMemBlock + instance * m_numPartitions;
224
225
0
        uint32_t cuSize = param.maxCUSize >> depth;
226
0
        m_trCoeff[0] = dataPool.trCoeffMemBlock + instance * (cuSize * cuSize);
227
0
        m_trCoeff[1] = m_trCoeff[2] = 0;
228
0
        m_transformSkip[1] = m_transformSkip[2] = m_cbf[1] = m_cbf[2] = 0;
229
0
        m_fAc_den[0] = m_fDc_den[0] = 0;
230
0
    }
231
1.03M
    else
232
1.03M
    {
233
        /* Each CU's data is layed out sequentially within the charMemBlock */
234
1.03M
        uint8_t *charBuf = dataPool.charMemBlock + (m_numPartitions * BytesPerPartition) * instance;
235
236
1.03M
        m_qp        = (int8_t*)charBuf; charBuf += m_numPartitions;
237
1.03M
        m_qpAnalysis = (int8_t*)charBuf; charBuf += m_numPartitions;
238
1.03M
        m_log2CUSize         = charBuf; charBuf += m_numPartitions;
239
1.03M
        m_lumaIntraDir       = charBuf; charBuf += m_numPartitions;
240
1.03M
        m_tqBypass           = charBuf; charBuf += m_numPartitions;
241
1.03M
        m_refIdx[0] = (int8_t*)charBuf; charBuf += m_numPartitions;
242
1.03M
        m_refIdx[1] = (int8_t*)charBuf; charBuf += m_numPartitions;
243
1.03M
        m_cuDepth            = charBuf; charBuf += m_numPartitions;
244
1.03M
        m_predMode           = charBuf; charBuf += m_numPartitions; /* the order up to here is important in initCTU() and initSubCU() */
245
1.03M
        m_partSize           = charBuf; charBuf += m_numPartitions;
246
1.03M
        m_skipFlag[0]        = charBuf; charBuf += m_numPartitions;
247
1.03M
        m_skipFlag[1]        = charBuf; charBuf += m_numPartitions;
248
1.03M
        m_mergeFlag          = charBuf; charBuf += m_numPartitions;
249
1.03M
        m_interDir           = charBuf; charBuf += m_numPartitions;
250
1.03M
        m_mvpIdx[0]          = charBuf; charBuf += m_numPartitions;
251
1.03M
        m_mvpIdx[1]          = charBuf; charBuf += m_numPartitions;
252
1.03M
        m_tuDepth            = charBuf; charBuf += m_numPartitions;
253
1.03M
        m_transformSkip[0]   = charBuf; charBuf += m_numPartitions;
254
1.03M
        m_transformSkip[1]   = charBuf; charBuf += m_numPartitions;
255
1.03M
        m_transformSkip[2]   = charBuf; charBuf += m_numPartitions;
256
1.03M
        m_cbf[0]             = charBuf; charBuf += m_numPartitions;
257
1.03M
        m_cbf[1]             = charBuf; charBuf += m_numPartitions;
258
1.03M
        m_cbf[2]             = charBuf; charBuf += m_numPartitions;
259
1.03M
        m_chromaIntraDir     = charBuf; charBuf += m_numPartitions;
260
261
1.03M
        X265_CHECK(charBuf == dataPool.charMemBlock + (m_numPartitions * BytesPerPartition) * (instance + 1), "CU data layout is broken\n");
262
263
1.03M
        m_mv[0]  = dataPool.mvMemBlock + (instance * 4) * m_numPartitions;
264
1.03M
        m_mv[1]  = m_mv[0] +  m_numPartitions;
265
1.03M
        m_mvd[0] = m_mv[1] +  m_numPartitions;
266
1.03M
        m_mvd[1] = m_mvd[0] + m_numPartitions;
267
268
1.03M
        m_distortion = dataPool.distortionMemBlock + instance * m_numPartitions;
269
270
1.03M
        uint32_t cuSize = param.maxCUSize >> depth;
271
1.03M
        uint32_t sizeL = cuSize * cuSize;
272
1.03M
        uint32_t sizeC = sizeL >> (m_hChromaShift + m_vChromaShift); // block chroma part
273
1.03M
        m_trCoeff[0] = dataPool.trCoeffMemBlock + instance * (sizeL + sizeC * 2);
274
1.03M
        m_trCoeff[1] = m_trCoeff[0] + sizeL;
275
1.03M
        m_trCoeff[2] = m_trCoeff[0] + sizeL + sizeC;
276
4.12M
        for (int i = 0; i < 3; i++)
277
3.09M
            m_fAc_den[i] = m_fDc_den[i] = 0;
278
1.03M
    }
279
1.03M
}
280
281
void CUData::initCTU(const Frame& frame, uint32_t cuAddr, int qp, uint32_t firstRowInSlice, uint32_t lastRowInSlice, uint32_t lastCuInSlice)
282
13.7k
{
283
13.7k
    m_encData       = frame.m_encData;
284
13.7k
    m_slice         = m_encData->m_slice;
285
13.7k
    m_cuAddr        = cuAddr;
286
13.7k
    m_cuPelX        = (cuAddr % m_slice->m_sps->numCuInWidth) << m_slice->m_param->maxLog2CUSize;
287
13.7k
    m_cuPelY        = (cuAddr / m_slice->m_sps->numCuInWidth) << m_slice->m_param->maxLog2CUSize;
288
13.7k
    m_absIdxInCTU   = 0;
289
13.7k
    m_numPartitions = m_encData->m_param->num4x4Partitions;
290
13.7k
    m_bFirstRowInSlice = (uint8_t)firstRowInSlice;
291
13.7k
    m_bLastRowInSlice  = (uint8_t)lastRowInSlice;
292
13.7k
    m_bLastCuInSlice   = (uint8_t)lastCuInSlice;
293
#if ENABLE_SCC_EXT
294
    m_lastIntraBCMv[0].set(0, 0);
295
    m_lastIntraBCMv[1].set(0, 0);
296
#endif
297
298
    /* sequential memsets */
299
13.7k
    m_partSet((uint8_t*)m_qp, (uint8_t)qp);
300
13.7k
    m_partSet((uint8_t*)m_qpAnalysis, (uint8_t)qp);
301
13.7k
    m_partSet(m_log2CUSize,   (uint8_t)m_slice->m_param->maxLog2CUSize);
302
13.7k
    m_partSet(m_lumaIntraDir, (uint8_t)ALL_IDX);
303
13.7k
    m_partSet(m_chromaIntraDir, (uint8_t)ALL_IDX);
304
13.7k
    m_partSet(m_tqBypass,     (uint8_t)frame.m_encData->m_param->bLossless);
305
13.7k
    if (m_slice->m_sliceType != I_SLICE)
306
0
    {
307
0
        m_partSet((uint8_t*)m_refIdx[0], (uint8_t)REF_NOT_VALID);
308
0
        m_partSet((uint8_t*)m_refIdx[1], (uint8_t)REF_NOT_VALID);
309
0
    }
310
311
13.7k
    X265_CHECK(!(frame.m_encData->m_param->bLossless && !m_slice->m_pps->bTransquantBypassEnabled), "lossless enabled without TQbypass in PPS\n");
312
313
    /* initialize the remaining CU data in one memset */
314
13.7k
    memset(m_cuDepth, 0, (frame.m_param->internalCsp == X265_CSP_I400 ? BytesPerPartition - 12 : BytesPerPartition - 8) * m_numPartitions);
315
316
301k
    for (int8_t i = 0; i < NUM_TU_DEPTH; i++)
317
287k
        m_refTuDepth[i] = -1;
318
319
13.7k
    m_vbvAffected = false;
320
321
13.7k
    uint32_t widthInCU = m_slice->m_sps->numCuInWidth;
322
13.7k
    m_cuLeft = (m_cuAddr % widthInCU) ? m_encData->getPicCTU(m_cuAddr - 1) : NULL;
323
13.7k
    m_cuAbove = (m_cuAddr >= widthInCU) && !m_bFirstRowInSlice ? m_encData->getPicCTU(m_cuAddr - widthInCU) : NULL;
324
13.7k
    m_cuAboveLeft = (m_cuLeft && m_cuAbove) ? m_encData->getPicCTU(m_cuAddr - widthInCU - 1) : NULL;
325
13.7k
    m_cuAboveRight = (m_cuAbove && ((m_cuAddr % widthInCU) < (widthInCU - 1))) ? m_encData->getPicCTU(m_cuAddr - widthInCU + 1) : NULL;
326
13.7k
    memset(m_distortion, 0, m_numPartitions * sizeof(sse_t));
327
13.7k
}
328
329
// initialize Sub partition
330
#if ENABLE_SCC_EXT
331
void CUData::initSubCU(const CUData& ctu, const CUGeom& cuGeom, int qp, MV lastIntraBCMv[2])
332
#else
333
void CUData::initSubCU(const CUData& ctu, const CUGeom& cuGeom, int qp)
334
#endif
335
784k
{
336
784k
    m_absIdxInCTU   = cuGeom.absPartIdx;
337
784k
    m_encData       = ctu.m_encData;
338
784k
    m_slice         = ctu.m_slice;
339
784k
    m_cuAddr        = ctu.m_cuAddr;
340
784k
    m_cuPelX        = ctu.m_cuPelX + g_zscanToPelX[cuGeom.absPartIdx];
341
784k
    m_cuPelY        = ctu.m_cuPelY + g_zscanToPelY[cuGeom.absPartIdx];
342
784k
    m_cuLeft        = ctu.m_cuLeft;
343
784k
    m_cuAbove       = ctu.m_cuAbove;
344
784k
    m_cuAboveLeft   = ctu.m_cuAboveLeft;
345
784k
    m_cuAboveRight  = ctu.m_cuAboveRight;
346
784k
    m_bFirstRowInSlice = ctu.m_bFirstRowInSlice;
347
784k
    m_bLastRowInSlice = ctu.m_bLastRowInSlice;
348
784k
    m_bLastCuInSlice = ctu.m_bLastCuInSlice;
349
3.13M
    for (int i = 0; i < 3; i++)
350
2.35M
    {
351
2.35M
        m_fAc_den[i] = ctu.m_fAc_den[i];
352
2.35M
        m_fDc_den[i] = ctu.m_fDc_den[i];
353
2.35M
    }
354
355
784k
    X265_CHECK(m_numPartitions == cuGeom.numPartitions, "initSubCU() size mismatch\n");
356
357
784k
    m_partSet((uint8_t*)m_qp, (uint8_t)qp);
358
784k
    m_partSet((uint8_t*)m_qpAnalysis, (uint8_t)qp);
359
360
784k
    m_partSet(m_log2CUSize,   (uint8_t)cuGeom.log2CUSize);
361
784k
    m_partSet(m_lumaIntraDir, (uint8_t)ALL_IDX);
362
784k
    m_partSet(m_chromaIntraDir, (uint8_t)ALL_IDX);
363
784k
    m_partSet(m_tqBypass,     (uint8_t)m_encData->m_param->bLossless);
364
784k
    m_partSet((uint8_t*)m_refIdx[0], (uint8_t)REF_NOT_VALID);
365
784k
    m_partSet((uint8_t*)m_refIdx[1], (uint8_t)REF_NOT_VALID);
366
784k
    m_partSet(m_cuDepth,      (uint8_t)cuGeom.depth);
367
368
    /* initialize the remaining CU data in one memset */
369
784k
    memset(m_predMode, 0, (ctu.m_chromaFormat == X265_CSP_I400 ? BytesPerPartition - 13 : BytesPerPartition - 9) * m_numPartitions);
370
784k
    memset(m_distortion, 0, m_numPartitions * sizeof(sse_t));
371
372
#if ENABLE_SCC_EXT
373
    if (lastIntraBCMv)
374
    {
375
        for (int i = 0; i < 2; i++)
376
            m_lastIntraBCMv[i] = lastIntraBCMv[i];
377
    }
378
#endif
379
784k
}
380
381
/* Copy the results of a sub-part (split) CU to the parent CU */
382
void CUData::copyPartFrom(const CUData& subCU, const CUGeom& childGeom, uint32_t subPartIdx)
383
386k
{
384
386k
    X265_CHECK(subPartIdx < 4, "part unit should be less than 4\n");
385
386
386k
    uint32_t offset = childGeom.numPartitions * subPartIdx;
387
388
386k
    m_bFirstRowInSlice = subCU.m_bFirstRowInSlice;
389
386k
    m_bLastCuInSlice = subCU.m_bLastCuInSlice;
390
391
386k
    m_subPartCopy((uint8_t*)m_qp + offset, (uint8_t*)subCU.m_qp);
392
386k
    m_subPartCopy((uint8_t*)m_qpAnalysis + offset, (uint8_t*)subCU.m_qpAnalysis);
393
386k
    m_subPartCopy(m_log2CUSize + offset, subCU.m_log2CUSize);
394
386k
    m_subPartCopy(m_lumaIntraDir + offset, subCU.m_lumaIntraDir);
395
386k
    m_subPartCopy(m_tqBypass + offset, subCU.m_tqBypass);
396
386k
    m_subPartCopy((uint8_t*)m_refIdx[0] + offset, (uint8_t*)subCU.m_refIdx[0]);
397
386k
    m_subPartCopy((uint8_t*)m_refIdx[1] + offset, (uint8_t*)subCU.m_refIdx[1]);
398
386k
    m_subPartCopy(m_cuDepth + offset, subCU.m_cuDepth);
399
386k
    m_subPartCopy(m_predMode + offset, subCU.m_predMode);
400
386k
    m_subPartCopy(m_partSize + offset, subCU.m_partSize);
401
386k
    m_subPartCopy(m_mergeFlag + offset, subCU.m_mergeFlag);
402
386k
    m_subPartCopy(m_interDir + offset, subCU.m_interDir);
403
386k
    m_subPartCopy(m_mvpIdx[0] + offset, subCU.m_mvpIdx[0]);
404
386k
    m_subPartCopy(m_mvpIdx[1] + offset, subCU.m_mvpIdx[1]);
405
386k
    m_subPartCopy(m_tuDepth + offset, subCU.m_tuDepth);
406
407
386k
    m_subPartCopy(m_transformSkip[0] + offset, subCU.m_transformSkip[0]);
408
386k
    m_subPartCopy(m_cbf[0] + offset, subCU.m_cbf[0]);
409
410
386k
    memcpy(m_mv[0] + offset, subCU.m_mv[0], childGeom.numPartitions * sizeof(MV));
411
386k
    memcpy(m_mv[1] + offset, subCU.m_mv[1], childGeom.numPartitions * sizeof(MV));
412
386k
    memcpy(m_mvd[0] + offset, subCU.m_mvd[0], childGeom.numPartitions * sizeof(MV));
413
386k
    memcpy(m_mvd[1] + offset, subCU.m_mvd[1], childGeom.numPartitions * sizeof(MV));
414
415
386k
    memcpy(m_distortion + offset, subCU.m_distortion, childGeom.numPartitions * sizeof(sse_t));
416
417
386k
    uint32_t tmp = 1 << ((m_slice->m_param->maxLog2CUSize - childGeom.depth) * 2);
418
386k
    uint32_t tmp2 = subPartIdx * tmp;
419
386k
    memcpy(m_trCoeff[0] + tmp2, subCU.m_trCoeff[0], sizeof(coeff_t)* tmp);
420
421
386k
    if (subCU.m_chromaFormat != X265_CSP_I400)
422
386k
    {
423
386k
        m_subPartCopy(m_transformSkip[1] + offset, subCU.m_transformSkip[1]);
424
386k
        m_subPartCopy(m_transformSkip[2] + offset, subCU.m_transformSkip[2]);
425
386k
        m_subPartCopy(m_cbf[1] + offset, subCU.m_cbf[1]);
426
386k
        m_subPartCopy(m_cbf[2] + offset, subCU.m_cbf[2]);
427
386k
        m_subPartCopy(m_chromaIntraDir + offset, subCU.m_chromaIntraDir);
428
429
386k
        uint32_t tmpC = tmp >> (m_hChromaShift + m_vChromaShift);
430
386k
        uint32_t tmpC2 = tmp2 >> (m_hChromaShift + m_vChromaShift);
431
386k
        memcpy(m_trCoeff[1] + tmpC2, subCU.m_trCoeff[1], sizeof(coeff_t) * tmpC);
432
386k
        memcpy(m_trCoeff[2] + tmpC2, subCU.m_trCoeff[2], sizeof(coeff_t) * tmpC);
433
386k
    }
434
#if ENABLE_SCC_EXT
435
    for (int i = 0; i < 2; i++)
436
        m_lastIntraBCMv[i] = subCU.m_lastIntraBCMv[i];
437
#endif
438
386k
}
439
440
/* If a sub-CU part is not present (off the edge of the picture) its depth and
441
 * log2size should still be configured */
442
void CUData::setEmptyPart(const CUGeom& childGeom, uint32_t subPartIdx)
443
21.5k
{
444
21.5k
    uint32_t offset = childGeom.numPartitions * subPartIdx;
445
21.5k
    m_subPartSet(m_cuDepth + offset, (uint8_t)childGeom.depth);
446
21.5k
    m_subPartSet(m_log2CUSize + offset, (uint8_t)childGeom.log2CUSize);
447
21.5k
}
448
449
/* Copy all CU data from one instance to the next, except set lossless flag
450
 * This will only get used when --cu-lossless is enabled but --lossless is not. */
451
void CUData::initLosslessCU(const CUData& cu, const CUGeom& cuGeom)
452
0
{
453
    /* Start by making an exact copy */
454
0
    m_encData      = cu.m_encData;
455
0
    m_slice        = cu.m_slice;
456
0
    m_cuAddr       = cu.m_cuAddr;
457
0
    m_cuPelX       = cu.m_cuPelX;
458
0
    m_cuPelY       = cu.m_cuPelY;
459
0
    m_cuLeft       = cu.m_cuLeft;
460
0
    m_cuAbove      = cu.m_cuAbove;
461
0
    m_cuAboveLeft  = cu.m_cuAboveLeft;
462
0
    m_cuAboveRight = cu.m_cuAboveRight;
463
0
    m_absIdxInCTU  = cuGeom.absPartIdx;
464
0
    m_numPartitions = cuGeom.numPartitions;
465
0
    memcpy(m_qp, cu.m_qp, BytesPerPartition * m_numPartitions);
466
0
    memcpy(m_mv[0],  cu.m_mv[0],  m_numPartitions * sizeof(MV));
467
0
    memcpy(m_mv[1],  cu.m_mv[1],  m_numPartitions * sizeof(MV));
468
0
    memcpy(m_mvd[0], cu.m_mvd[0], m_numPartitions * sizeof(MV));
469
0
    memcpy(m_mvd[1], cu.m_mvd[1], m_numPartitions * sizeof(MV));
470
0
    memcpy(m_distortion, cu.m_distortion, m_numPartitions * sizeof(sse_t));
471
472
    /* force TQBypass to true */
473
0
    m_partSet(m_tqBypass, true);
474
475
    /* clear residual coding flags */
476
0
    m_partSet(m_predMode, cu.m_predMode[0] & (MODE_INTRA | MODE_INTER));
477
0
    m_partSet(m_tuDepth, 0);
478
0
    m_partSet(m_cbf[0], 0);
479
0
    m_partSet(m_transformSkip[0], 0);
480
481
0
    if (cu.m_chromaFormat != X265_CSP_I400)
482
0
    {
483
0
        m_partSet(m_chromaIntraDir, (uint8_t)ALL_IDX);
484
0
        m_partSet(m_cbf[1], 0);
485
0
        m_partSet(m_cbf[2], 0);
486
0
        m_partSet(m_transformSkip[1], 0);
487
0
        m_partSet(m_transformSkip[2], 0);
488
0
    }
489
0
}
490
491
/* Copy completed predicted CU to CTU in picture */
492
void CUData::copyToPic(uint32_t depth) const
493
400k
{
494
400k
    CUData& ctu = *m_encData->getPicCTU(m_cuAddr);
495
496
400k
    m_partCopy((uint8_t*)ctu.m_qp + m_absIdxInCTU, (uint8_t*)m_qp);
497
400k
    m_partCopy((uint8_t*)ctu.m_qpAnalysis + m_absIdxInCTU, (uint8_t*)m_qpAnalysis);
498
400k
    m_partCopy(ctu.m_log2CUSize + m_absIdxInCTU, m_log2CUSize);
499
400k
    m_partCopy(ctu.m_lumaIntraDir + m_absIdxInCTU, m_lumaIntraDir);
500
400k
    m_partCopy(ctu.m_tqBypass + m_absIdxInCTU, m_tqBypass);
501
400k
    m_partCopy((uint8_t*)ctu.m_refIdx[0] + m_absIdxInCTU, (uint8_t*)m_refIdx[0]);
502
400k
    m_partCopy((uint8_t*)ctu.m_refIdx[1] + m_absIdxInCTU, (uint8_t*)m_refIdx[1]);
503
400k
    m_partCopy(ctu.m_cuDepth + m_absIdxInCTU, m_cuDepth);
504
400k
    m_partCopy(ctu.m_predMode + m_absIdxInCTU, m_predMode);
505
400k
    m_partCopy(ctu.m_partSize + m_absIdxInCTU, m_partSize);
506
400k
    m_partCopy(ctu.m_mergeFlag + m_absIdxInCTU, m_mergeFlag);
507
400k
    m_partCopy(ctu.m_interDir + m_absIdxInCTU, m_interDir);
508
400k
    m_partCopy(ctu.m_mvpIdx[0] + m_absIdxInCTU, m_mvpIdx[0]);
509
400k
    m_partCopy(ctu.m_mvpIdx[1] + m_absIdxInCTU, m_mvpIdx[1]);
510
400k
    m_partCopy(ctu.m_tuDepth + m_absIdxInCTU, m_tuDepth);
511
400k
    m_partCopy(ctu.m_transformSkip[0] + m_absIdxInCTU, m_transformSkip[0]);
512
400k
    m_partCopy(ctu.m_cbf[0] + m_absIdxInCTU, m_cbf[0]);
513
514
400k
    memcpy(ctu.m_mv[0] + m_absIdxInCTU, m_mv[0], m_numPartitions * sizeof(MV));
515
400k
    memcpy(ctu.m_mv[1] + m_absIdxInCTU, m_mv[1], m_numPartitions * sizeof(MV));
516
400k
    memcpy(ctu.m_mvd[0] + m_absIdxInCTU, m_mvd[0], m_numPartitions * sizeof(MV));
517
400k
    memcpy(ctu.m_mvd[1] + m_absIdxInCTU, m_mvd[1], m_numPartitions * sizeof(MV));
518
519
400k
    memcpy(ctu.m_distortion + m_absIdxInCTU, m_distortion, m_numPartitions * sizeof(sse_t));
520
521
400k
    uint32_t tmpY = 1 << ((m_slice->m_param->maxLog2CUSize - depth) * 2);
522
400k
    uint32_t tmpY2 = m_absIdxInCTU << (LOG2_UNIT_SIZE * 2);
523
400k
    memcpy(ctu.m_trCoeff[0] + tmpY2, m_trCoeff[0], sizeof(coeff_t)* tmpY);
524
525
400k
    if (ctu.m_chromaFormat != X265_CSP_I400)
526
400k
    {
527
400k
        m_partCopy(ctu.m_transformSkip[1] + m_absIdxInCTU, m_transformSkip[1]);
528
400k
        m_partCopy(ctu.m_transformSkip[2] + m_absIdxInCTU, m_transformSkip[2]);
529
400k
        m_partCopy(ctu.m_cbf[1] + m_absIdxInCTU, m_cbf[1]);
530
400k
        m_partCopy(ctu.m_cbf[2] + m_absIdxInCTU, m_cbf[2]);
531
400k
        m_partCopy(ctu.m_chromaIntraDir + m_absIdxInCTU, m_chromaIntraDir);
532
533
400k
        uint32_t tmpC = tmpY >> (m_hChromaShift + m_vChromaShift);
534
400k
        uint32_t tmpC2 = tmpY2 >> (m_hChromaShift + m_vChromaShift);
535
400k
        memcpy(ctu.m_trCoeff[1] + tmpC2, m_trCoeff[1], sizeof(coeff_t) * tmpC);
536
400k
        memcpy(ctu.m_trCoeff[2] + tmpC2, m_trCoeff[2], sizeof(coeff_t) * tmpC);
537
400k
    }
538
400k
}
539
540
/* The reverse of copyToPic, called only by encodeResidue */
541
void CUData::copyFromPic(const CUData& ctu, const CUGeom& cuGeom, int csp, bool copyQp)
542
0
{
543
0
    m_encData       = ctu.m_encData;
544
0
    m_slice         = ctu.m_slice;
545
0
    m_cuAddr        = ctu.m_cuAddr;
546
0
    m_cuPelX        = ctu.m_cuPelX + g_zscanToPelX[cuGeom.absPartIdx];
547
0
    m_cuPelY        = ctu.m_cuPelY + g_zscanToPelY[cuGeom.absPartIdx];
548
0
    m_absIdxInCTU   = cuGeom.absPartIdx;
549
0
    m_numPartitions = cuGeom.numPartitions;
550
551
    /* copy out all prediction info for this part */
552
0
    if (copyQp)
553
0
    {
554
0
        m_partCopy((uint8_t*)m_qp, (uint8_t*)ctu.m_qp + m_absIdxInCTU);
555
0
        m_partCopy((uint8_t*)m_qpAnalysis, (uint8_t*)ctu.m_qpAnalysis + m_absIdxInCTU);
556
0
    }
557
558
0
    m_partCopy(m_log2CUSize,   ctu.m_log2CUSize + m_absIdxInCTU);
559
0
    m_partCopy(m_lumaIntraDir, ctu.m_lumaIntraDir + m_absIdxInCTU);
560
0
    m_partCopy(m_tqBypass,     ctu.m_tqBypass + m_absIdxInCTU);
561
0
    m_partCopy((uint8_t*)m_refIdx[0], (uint8_t*)ctu.m_refIdx[0] + m_absIdxInCTU);
562
0
    m_partCopy((uint8_t*)m_refIdx[1], (uint8_t*)ctu.m_refIdx[1] + m_absIdxInCTU);
563
0
    m_partCopy(m_cuDepth,      ctu.m_cuDepth + m_absIdxInCTU);
564
0
    m_partSet(m_predMode, ctu.m_predMode[m_absIdxInCTU] & (MODE_INTRA | MODE_INTER)); /* clear skip flag */
565
0
    m_partCopy(m_partSize,     ctu.m_partSize + m_absIdxInCTU);
566
0
    m_partCopy(m_mergeFlag,    ctu.m_mergeFlag + m_absIdxInCTU);
567
0
    m_partCopy(m_interDir,     ctu.m_interDir + m_absIdxInCTU);
568
0
    m_partCopy(m_mvpIdx[0],    ctu.m_mvpIdx[0] + m_absIdxInCTU);
569
0
    m_partCopy(m_mvpIdx[1],    ctu.m_mvpIdx[1] + m_absIdxInCTU);
570
0
    m_partCopy(m_chromaIntraDir, ctu.m_chromaIntraDir + m_absIdxInCTU);
571
572
0
    memcpy(m_mv[0], ctu.m_mv[0] + m_absIdxInCTU, m_numPartitions * sizeof(MV));
573
0
    memcpy(m_mv[1], ctu.m_mv[1] + m_absIdxInCTU, m_numPartitions * sizeof(MV));
574
0
    memcpy(m_mvd[0], ctu.m_mvd[0] + m_absIdxInCTU, m_numPartitions * sizeof(MV));
575
0
    memcpy(m_mvd[1], ctu.m_mvd[1] + m_absIdxInCTU, m_numPartitions * sizeof(MV));
576
577
0
    memcpy(m_distortion, ctu.m_distortion + m_absIdxInCTU, m_numPartitions * sizeof(sse_t));
578
579
    /* clear residual coding flags */
580
0
    m_partSet(m_tuDepth, 0);
581
0
    m_partSet(m_transformSkip[0], 0);
582
0
    m_partSet(m_cbf[0], 0);
583
584
0
    if (csp != X265_CSP_I400)
585
0
    {        
586
0
        m_partSet(m_transformSkip[1], 0);
587
0
        m_partSet(m_transformSkip[2], 0);
588
0
        m_partSet(m_cbf[1], 0);
589
0
        m_partSet(m_cbf[2], 0);
590
0
    }
591
0
}
592
593
/* Only called by encodeResidue, these fields can be modified during inter/intra coding */
594
void CUData::updatePic(uint32_t depth, int picCsp) const
595
0
{
596
0
    CUData& ctu = *m_encData->getPicCTU(m_cuAddr);
597
598
0
    m_partCopy((uint8_t*)ctu.m_qp + m_absIdxInCTU, (uint8_t*)m_qp);
599
0
    m_partCopy((uint8_t*)ctu.m_qpAnalysis + m_absIdxInCTU, (uint8_t*)m_qpAnalysis);
600
0
    m_partCopy(ctu.m_transformSkip[0] + m_absIdxInCTU, m_transformSkip[0]);
601
0
    m_partCopy(ctu.m_predMode + m_absIdxInCTU, m_predMode);
602
0
    m_partCopy(ctu.m_tuDepth + m_absIdxInCTU, m_tuDepth);
603
0
    m_partCopy(ctu.m_cbf[0] + m_absIdxInCTU, m_cbf[0]);
604
605
0
    uint32_t tmpY = 1 << ((m_slice->m_param->maxLog2CUSize - depth) * 2);
606
0
    uint32_t tmpY2 = m_absIdxInCTU << (LOG2_UNIT_SIZE * 2);
607
0
    memcpy(ctu.m_trCoeff[0] + tmpY2, m_trCoeff[0], sizeof(coeff_t)* tmpY);
608
609
0
    if (ctu.m_chromaFormat != X265_CSP_I400 && picCsp != X265_CSP_I400)
610
0
    {
611
0
        m_partCopy(ctu.m_transformSkip[1] + m_absIdxInCTU, m_transformSkip[1]);
612
0
        m_partCopy(ctu.m_transformSkip[2] + m_absIdxInCTU, m_transformSkip[2]);
613
614
0
        m_partCopy(ctu.m_cbf[1] + m_absIdxInCTU, m_cbf[1]);
615
0
        m_partCopy(ctu.m_cbf[2] + m_absIdxInCTU, m_cbf[2]);
616
0
        m_partCopy(ctu.m_chromaIntraDir + m_absIdxInCTU, m_chromaIntraDir);
617
618
0
        tmpY  >>= m_hChromaShift + m_vChromaShift;
619
0
        tmpY2 >>= m_hChromaShift + m_vChromaShift;
620
0
        memcpy(ctu.m_trCoeff[1] + tmpY2, m_trCoeff[1], sizeof(coeff_t) * tmpY);
621
0
        memcpy(ctu.m_trCoeff[2] + tmpY2, m_trCoeff[2], sizeof(coeff_t) * tmpY);
622
0
    }
623
0
}
624
625
const CUData* CUData::getPULeft(uint32_t& lPartUnitIdx, uint32_t curPartUnitIdx) const
626
23.6M
{
627
23.6M
    uint32_t absPartIdx = g_zscanToRaster[curPartUnitIdx];
628
629
23.6M
    if (!isZeroCol(absPartIdx))
630
19.0M
    {
631
19.0M
        uint32_t absZorderCUIdx   = g_zscanToRaster[m_absIdxInCTU];
632
19.0M
        lPartUnitIdx = g_rasterToZscan[absPartIdx - 1];
633
19.0M
        if (isEqualCol(absPartIdx, absZorderCUIdx))
634
13.7M
            return m_encData->getPicCTU(m_cuAddr);
635
5.32M
        else
636
5.32M
        {
637
5.32M
            lPartUnitIdx -= m_absIdxInCTU;
638
5.32M
            return this;
639
5.32M
        }
640
19.0M
    }
641
642
4.57M
    lPartUnitIdx = g_rasterToZscan[absPartIdx + s_numPartInCUSize - 1];
643
4.57M
    return m_cuLeft;
644
23.6M
}
645
646
const CUData* CUData::getPUAbove(uint32_t& aPartUnitIdx, uint32_t curPartUnitIdx) const
647
22.7M
{
648
22.7M
    uint32_t absPartIdx = g_zscanToRaster[curPartUnitIdx];
649
650
22.7M
    if (!isZeroRow(absPartIdx))
651
19.0M
    {
652
19.0M
        uint32_t absZorderCUIdx = g_zscanToRaster[m_absIdxInCTU];
653
19.0M
        aPartUnitIdx = g_rasterToZscan[absPartIdx - RASTER_SIZE];
654
19.0M
        if (isEqualRow(absPartIdx, absZorderCUIdx))
655
13.7M
            return m_encData->getPicCTU(m_cuAddr);
656
5.31M
        else
657
5.31M
            aPartUnitIdx -= m_absIdxInCTU;
658
5.31M
        return this;
659
19.0M
    }
660
661
3.68M
    aPartUnitIdx = g_rasterToZscan[absPartIdx + ((s_numPartInCUSize - 1) << LOG2_RASTER_SIZE)];
662
3.68M
    return m_cuAbove;
663
22.7M
}
664
665
const CUData* CUData::getPUAboveLeft(uint32_t& alPartUnitIdx, uint32_t curPartUnitIdx) const
666
9.61M
{
667
9.61M
    uint32_t absPartIdx = g_zscanToRaster[curPartUnitIdx];
668
669
9.61M
    if (!isZeroCol(absPartIdx))
670
8.12M
    {
671
8.12M
        if (!isZeroRow(absPartIdx))
672
7.02M
        {
673
7.02M
            uint32_t absZorderCUIdx  = g_zscanToRaster[m_absIdxInCTU];
674
7.02M
            alPartUnitIdx = g_rasterToZscan[absPartIdx - RASTER_SIZE - 1];
675
7.02M
            if (isEqualRowOrCol(absPartIdx, absZorderCUIdx))
676
5.76M
                return m_encData->getPicCTU(m_cuAddr);
677
1.26M
            else
678
1.26M
            {
679
1.26M
                alPartUnitIdx -= m_absIdxInCTU;
680
1.26M
                return this;
681
1.26M
            }
682
7.02M
        }
683
1.09M
        alPartUnitIdx = g_rasterToZscan[absPartIdx + ((s_numPartInCUSize - 1) << LOG2_RASTER_SIZE) - 1];
684
1.09M
        return m_cuAbove;
685
8.12M
    }
686
687
1.49M
    if (!isZeroRow(absPartIdx))
688
1.11M
    {
689
1.11M
        alPartUnitIdx = g_rasterToZscan[absPartIdx - RASTER_SIZE + s_numPartInCUSize - 1];
690
1.11M
        return m_cuLeft;
691
1.11M
    }
692
693
377k
    alPartUnitIdx = m_encData->m_param->num4x4Partitions - 1;
694
377k
    return m_cuAboveLeft;
695
1.49M
}
696
697
const CUData* CUData::getPUAboveRight(uint32_t& arPartUnitIdx, uint32_t curPartUnitIdx) const
698
0
{
699
0
    if ((m_encData->getPicCTU(m_cuAddr)->m_cuPelX + g_zscanToPelX[curPartUnitIdx] + UNIT_SIZE) >= m_slice->m_sps->picWidthInLumaSamples)
700
0
        return NULL;
701
702
0
    uint32_t absPartIdxRT = g_zscanToRaster[curPartUnitIdx];
703
704
0
    if (lessThanCol(absPartIdxRT, s_numPartInCUSize - 1))
705
0
    {
706
0
        if (!isZeroRow(absPartIdxRT))
707
0
        {
708
0
            if (curPartUnitIdx > g_rasterToZscan[absPartIdxRT - RASTER_SIZE + 1])
709
0
            {
710
0
                uint32_t absZorderCUIdx = g_zscanToRaster[m_absIdxInCTU] + (1 << (m_log2CUSize[0] - LOG2_UNIT_SIZE)) - 1;
711
0
                arPartUnitIdx = g_rasterToZscan[absPartIdxRT - RASTER_SIZE + 1];
712
0
                if (isEqualRowOrCol(absPartIdxRT, absZorderCUIdx))
713
0
                    return m_encData->getPicCTU(m_cuAddr);
714
0
                else
715
0
                {
716
0
                    arPartUnitIdx -= m_absIdxInCTU;
717
0
                    return this;
718
0
                }
719
0
            }
720
0
            return NULL;
721
0
        }
722
0
        arPartUnitIdx = g_rasterToZscan[absPartIdxRT + ((s_numPartInCUSize - 1) << LOG2_RASTER_SIZE) + 1];
723
0
        return m_cuAbove;
724
0
    }
725
726
0
    if (!isZeroRow(absPartIdxRT))
727
0
        return NULL;
728
729
0
    arPartUnitIdx = g_rasterToZscan[(s_numPartInCUSize - 1) << LOG2_RASTER_SIZE];
730
0
    return m_cuAboveRight;
731
0
}
732
733
const CUData* CUData::getPUBelowLeft(uint32_t& blPartUnitIdx, uint32_t curPartUnitIdx) const
734
0
{
735
0
    if ((m_encData->getPicCTU(m_cuAddr)->m_cuPelY + g_zscanToPelY[curPartUnitIdx] + UNIT_SIZE) >= m_slice->m_sps->picHeightInLumaSamples)
736
0
        return NULL;
737
738
0
    uint32_t absPartIdxLB = g_zscanToRaster[curPartUnitIdx];
739
740
0
    if (lessThanRow(absPartIdxLB, s_numPartInCUSize - 1))
741
0
    {
742
0
        if (!isZeroCol(absPartIdxLB))
743
0
        {
744
0
            if (curPartUnitIdx > g_rasterToZscan[absPartIdxLB + RASTER_SIZE - 1])
745
0
            {
746
0
                uint32_t absZorderCUIdxLB = g_zscanToRaster[m_absIdxInCTU] + (((1 << (m_log2CUSize[0] - LOG2_UNIT_SIZE)) - 1) << LOG2_RASTER_SIZE);
747
0
                blPartUnitIdx = g_rasterToZscan[absPartIdxLB + RASTER_SIZE - 1];
748
0
                if (isEqualRowOrCol(absPartIdxLB, absZorderCUIdxLB))
749
0
                    return m_encData->getPicCTU(m_cuAddr);
750
0
                else
751
0
                {
752
0
                    blPartUnitIdx -= m_absIdxInCTU;
753
0
                    return this;
754
0
                }
755
0
            }
756
0
            return NULL;
757
0
        }
758
0
        blPartUnitIdx = g_rasterToZscan[absPartIdxLB + RASTER_SIZE + s_numPartInCUSize - 1];
759
0
        return m_cuLeft;
760
0
    }
761
762
0
    return NULL;
763
0
}
764
765
const CUData* CUData::getPUBelowLeftAdi(uint32_t& blPartUnitIdx,  uint32_t curPartUnitIdx, uint32_t partUnitOffset) const
766
16.4M
{
767
16.4M
    if ((m_encData->getPicCTU(m_cuAddr)->m_cuPelY + g_zscanToPelY[curPartUnitIdx] + (partUnitOffset << LOG2_UNIT_SIZE)) >= m_slice->m_sps->picHeightInLumaSamples)
768
751k
        return NULL;
769
770
15.6M
    uint32_t absPartIdxLB = g_zscanToRaster[curPartUnitIdx];
771
772
15.6M
    if (lessThanRow(absPartIdxLB, s_numPartInCUSize - partUnitOffset))
773
13.1M
    {
774
13.1M
        if (!isZeroCol(absPartIdxLB))
775
10.8M
        {
776
10.8M
            if (curPartUnitIdx > g_rasterToZscan[absPartIdxLB + (partUnitOffset << LOG2_RASTER_SIZE) - 1])
777
2.98M
            {
778
2.98M
                uint32_t absZorderCUIdxLB = g_zscanToRaster[m_absIdxInCTU] + (((1 << (m_log2CUSize[0] - LOG2_UNIT_SIZE)) - 1) << LOG2_RASTER_SIZE);
779
2.98M
                blPartUnitIdx = g_rasterToZscan[absPartIdxLB + (partUnitOffset << LOG2_RASTER_SIZE) - 1];
780
2.98M
                if (isEqualRowOrCol(absPartIdxLB, absZorderCUIdxLB))
781
2.98M
                    return m_encData->getPicCTU(m_cuAddr);
782
502
                else
783
502
                {
784
502
                    blPartUnitIdx -= m_absIdxInCTU;
785
502
                    return this;
786
502
                }
787
2.98M
            }
788
7.89M
            return NULL;
789
10.8M
        }
790
2.26M
        blPartUnitIdx = g_rasterToZscan[absPartIdxLB + (partUnitOffset << LOG2_RASTER_SIZE) + s_numPartInCUSize - 1];
791
2.26M
        return m_cuLeft;
792
13.1M
    }
793
794
2.53M
    return NULL;
795
15.6M
}
796
797
const CUData* CUData::getPUAboveRightAdi(uint32_t& arPartUnitIdx, uint32_t curPartUnitIdx, uint32_t partUnitOffset) const
798
16.4M
{
799
16.4M
    if ((m_encData->getPicCTU(m_cuAddr)->m_cuPelX + g_zscanToPelX[curPartUnitIdx] + (partUnitOffset << LOG2_UNIT_SIZE)) >= m_slice->m_sps->picWidthInLumaSamples)
800
789k
        return NULL;
801
802
15.6M
    uint32_t absPartIdxRT = g_zscanToRaster[curPartUnitIdx];
803
804
15.6M
    if (lessThanCol(absPartIdxRT, s_numPartInCUSize - partUnitOffset))
805
13.1M
    {
806
13.1M
        if (!isZeroRow(absPartIdxRT))
807
10.8M
        {
808
10.8M
            if (curPartUnitIdx > g_rasterToZscan[absPartIdxRT - RASTER_SIZE + partUnitOffset])
809
7.94M
            {
810
7.94M
                uint32_t absZorderCUIdx = g_zscanToRaster[m_absIdxInCTU] + (1 << (m_log2CUSize[0] - LOG2_UNIT_SIZE)) - 1;
811
7.94M
                arPartUnitIdx = g_rasterToZscan[absPartIdxRT - RASTER_SIZE + partUnitOffset];
812
7.94M
                if (isEqualRowOrCol(absPartIdxRT, absZorderCUIdx))
813
6.56M
                    return m_encData->getPicCTU(m_cuAddr);
814
1.38M
                else
815
1.38M
                {
816
1.38M
                    arPartUnitIdx -= m_absIdxInCTU;
817
1.38M
                    return this;
818
1.38M
                }
819
7.94M
            }
820
2.94M
            return NULL;
821
10.8M
        }
822
2.27M
        arPartUnitIdx = g_rasterToZscan[absPartIdxRT + ((s_numPartInCUSize - 1) << LOG2_RASTER_SIZE) + partUnitOffset];
823
2.27M
        return m_cuAbove;
824
13.1M
    }
825
826
2.49M
    if (!isZeroRow(absPartIdxRT))
827
1.66M
        return NULL;
828
829
831k
    arPartUnitIdx = g_rasterToZscan[((s_numPartInCUSize - 1) << LOG2_RASTER_SIZE) + partUnitOffset - 1];
830
831k
    return m_cuAboveRight;
831
2.49M
}
832
833
/* Get left QpMinCu */
834
const CUData* CUData::getQpMinCuLeft(uint32_t& lPartUnitIdx, uint32_t curAbsIdxInCTU) const
835
95.5k
{
836
95.5k
    uint32_t absZorderQpMinCUIdx = curAbsIdxInCTU & (0xFF << (m_encData->m_param->unitSizeDepth - m_slice->m_pps->maxCuDQPDepth) * 2);
837
95.5k
    uint32_t absRorderQpMinCUIdx = g_zscanToRaster[absZorderQpMinCUIdx];
838
839
    // check for left CTU boundary
840
95.5k
    if (isZeroCol(absRorderQpMinCUIdx))
841
66.7k
        return NULL;
842
843
    // get index of left-CU relative to top-left corner of current quantization group
844
28.7k
    lPartUnitIdx = g_rasterToZscan[absRorderQpMinCUIdx - 1];
845
846
    // return pointer to current CTU
847
28.7k
    return m_encData->getPicCTU(m_cuAddr);
848
95.5k
}
849
850
/* Get above QpMinCu */
851
const CUData* CUData::getQpMinCuAbove(uint32_t& aPartUnitIdx, uint32_t curAbsIdxInCTU) const
852
95.5k
{
853
95.5k
    uint32_t absZorderQpMinCUIdx = curAbsIdxInCTU & (0xFF << (m_encData->m_param->unitSizeDepth - m_slice->m_pps->maxCuDQPDepth) * 2);
854
95.5k
    uint32_t absRorderQpMinCUIdx = g_zscanToRaster[absZorderQpMinCUIdx];
855
856
    // check for top CTU boundary
857
95.5k
    if (isZeroRow(absRorderQpMinCUIdx))
858
66.8k
        return NULL;
859
860
    // get index of top-CU relative to top-left corner of current quantization group
861
28.7k
    aPartUnitIdx = g_rasterToZscan[absRorderQpMinCUIdx - RASTER_SIZE];
862
863
    // return pointer to current CTU
864
28.7k
    return m_encData->getPicCTU(m_cuAddr);
865
95.5k
}
866
867
/* Get reference QP from left QpMinCu or latest coded QP */
868
int8_t CUData::getRefQP(uint32_t curAbsIdxInCTU) const
869
95.5k
{
870
95.5k
    uint32_t lPartIdx = 0, aPartIdx = 0;
871
95.5k
    const CUData* cULeft = getQpMinCuLeft(lPartIdx, m_absIdxInCTU + curAbsIdxInCTU);
872
95.5k
    const CUData* cUAbove = getQpMinCuAbove(aPartIdx, m_absIdxInCTU + curAbsIdxInCTU);
873
874
95.5k
    return ((cULeft ? cULeft->m_qp[lPartIdx] : getLastCodedQP(curAbsIdxInCTU)) + (cUAbove ? cUAbove->m_qp[aPartIdx] : getLastCodedQP(curAbsIdxInCTU)) + 1) >> 1;
875
95.5k
}
876
877
int CUData::getLastValidPartIdx(int absPartIdx) const
878
221k
{
879
221k
    int lastValidPartIdx = absPartIdx - 1;
880
881
278k
    while (lastValidPartIdx >= 0 && m_predMode[lastValidPartIdx] == MODE_NONE)
882
57.0k
    {
883
57.0k
        uint32_t depth = m_cuDepth[lastValidPartIdx];
884
57.0k
        lastValidPartIdx -= m_numPartitions >> (depth << 1);
885
57.0k
    }
886
887
221k
    return lastValidPartIdx;
888
221k
}
889
890
int8_t CUData::getLastCodedQP(uint32_t absPartIdx) const
891
221k
{
892
221k
    uint32_t quPartIdxMask = 0xFF << (m_encData->m_param->unitSizeDepth - m_slice->m_pps->maxCuDQPDepth) * 2;
893
221k
    int lastValidPartIdx = getLastValidPartIdx(absPartIdx & quPartIdxMask);
894
895
221k
    if (lastValidPartIdx >= 0)
896
106k
        return m_qp[lastValidPartIdx];
897
114k
    else
898
114k
    {
899
114k
        if (m_absIdxInCTU)
900
11.8k
            return m_encData->getPicCTU(m_cuAddr)->getLastCodedQP(m_absIdxInCTU);
901
102k
        else if (m_cuAddr > 0 && !(m_slice->m_pps->bEntropyCodingSyncEnabled && !(m_cuAddr % m_slice->m_sps->numCuInWidth)))
902
76.0k
            return m_encData->getPicCTU(m_cuAddr - 1)->getLastCodedQP(m_encData->m_param->num4x4Partitions);
903
26.7k
        else
904
26.7k
            return (int8_t)m_slice->m_sliceQp;
905
114k
    }
906
221k
}
907
908
/* Get allowed chroma intra modes */
909
void CUData::getAllowedChromaDir(uint32_t absPartIdx, uint32_t* modeList) const
910
1.44M
{
911
1.44M
    modeList[0] = PLANAR_IDX;
912
1.44M
    modeList[1] = VER_IDX;
913
1.44M
    modeList[2] = HOR_IDX;
914
1.44M
    modeList[3] = DC_IDX;
915
1.44M
    modeList[4] = DM_CHROMA_IDX;
916
917
1.44M
    uint32_t lumaMode = m_lumaIntraDir[absPartIdx];
918
919
2.39M
    for (int i = 0; i < NUM_CHROMA_MODE - 1; i++)
920
2.39M
    {
921
2.39M
        if (lumaMode == modeList[i])
922
1.44M
        {
923
1.44M
            modeList[i] = 34; // VER+8 mode
924
1.44M
            break;
925
1.44M
        }
926
2.39M
    }
927
1.44M
}
928
929
/* Get most probable intra modes */
930
int CUData::getIntraDirLumaPredictor(uint32_t absPartIdx, uint32_t* intraDirPred) const
931
6.75M
{
932
6.75M
    const CUData* tempCU;
933
6.75M
    uint32_t tempPartIdx;
934
6.75M
    uint32_t leftIntraDir, aboveIntraDir;
935
936
    // Get intra direction of left PU
937
6.75M
    tempCU = getPULeft(tempPartIdx, m_absIdxInCTU + absPartIdx);
938
939
6.75M
    leftIntraDir = (tempCU && tempCU->isIntra(tempPartIdx)) ? tempCU->m_lumaIntraDir[tempPartIdx] : DC_IDX;
940
941
    // Get intra direction of above PU
942
6.75M
    tempCU = g_zscanToPelY[m_absIdxInCTU + absPartIdx] > 0 ? getPUAbove(tempPartIdx, m_absIdxInCTU + absPartIdx) : NULL;
943
944
6.75M
    aboveIntraDir = (tempCU && tempCU->isIntra(tempPartIdx)) ? tempCU->m_lumaIntraDir[tempPartIdx] : DC_IDX;
945
946
6.75M
    if (leftIntraDir == aboveIntraDir)
947
3.85M
    {
948
3.85M
        if (leftIntraDir >= 2) // angular modes
949
62.5k
        {
950
62.5k
            intraDirPred[0] = leftIntraDir;
951
62.5k
            intraDirPred[1] = ((leftIntraDir - 2 + 31) & 31) + 2;
952
62.5k
            intraDirPred[2] = ((leftIntraDir - 2 +  1) & 31) + 2;
953
62.5k
        }
954
3.79M
        else //non-angular
955
3.79M
        {
956
3.79M
            intraDirPred[0] = PLANAR_IDX;
957
3.79M
            intraDirPred[1] = DC_IDX;
958
3.79M
            intraDirPred[2] = VER_IDX;
959
3.79M
        }
960
3.85M
        return 1;
961
3.85M
    }
962
2.89M
    else
963
2.89M
    {
964
2.89M
        intraDirPred[0] = leftIntraDir;
965
2.89M
        intraDirPred[1] = aboveIntraDir;
966
967
2.89M
        if (leftIntraDir && aboveIntraDir) //both modes are non-planar
968
39.5k
            intraDirPred[2] = PLANAR_IDX;
969
2.85M
        else
970
18.4E
            intraDirPred[2] =  (leftIntraDir + aboveIntraDir) < 2 ? VER_IDX : DC_IDX;
971
2.89M
        return 2;
972
2.89M
    }
973
6.75M
}
974
975
uint32_t CUData::getCtxSplitFlag(uint32_t absPartIdx, uint32_t depth) const
976
238k
{
977
238k
    const CUData* tempCU;
978
238k
    uint32_t    tempPartIdx;
979
238k
    uint32_t    ctx;
980
981
    // Get left split flag
982
238k
    tempCU = getPULeft(tempPartIdx, m_absIdxInCTU + absPartIdx);
983
238k
    ctx  = (tempCU) ? ((tempCU->m_cuDepth[tempPartIdx] > depth) ? 1 : 0) : 0;
984
985
    // Get above split flag
986
238k
    tempCU = getPUAbove(tempPartIdx, m_absIdxInCTU + absPartIdx);
987
238k
    ctx += (tempCU) ? ((tempCU->m_cuDepth[tempPartIdx] > depth) ? 1 : 0) : 0;
988
989
238k
    return ctx;
990
238k
}
991
992
void CUData::getIntraTUQtDepthRange(uint32_t tuDepthRange[2], uint32_t absPartIdx) const
993
762k
{
994
762k
    uint32_t log2CUSize = m_log2CUSize[absPartIdx];
995
762k
    uint32_t splitFlag = m_partSize[absPartIdx] != SIZE_2Nx2N;
996
997
762k
    tuDepthRange[0] = m_slice->m_sps->quadtreeTULog2MinSize;
998
762k
    tuDepthRange[1] = m_slice->m_sps->quadtreeTULog2MaxSize;
999
1000
762k
    tuDepthRange[0] = x265_clip3(tuDepthRange[0], tuDepthRange[1], log2CUSize - (m_slice->m_sps->quadtreeTUMaxDepthIntra - 1 + splitFlag));
1001
762k
}
1002
1003
void CUData::getInterTUQtDepthRange(uint32_t tuDepthRange[2], uint32_t absPartIdx) const
1004
0
{
1005
0
    uint32_t log2CUSize = m_log2CUSize[absPartIdx];
1006
0
    uint32_t quadtreeTUMaxDepth = m_slice->m_sps->quadtreeTUMaxDepthInter;
1007
0
    uint32_t splitFlag = quadtreeTUMaxDepth == 1 && m_partSize[absPartIdx] != SIZE_2Nx2N;
1008
1009
0
    tuDepthRange[0] = m_slice->m_sps->quadtreeTULog2MinSize;
1010
0
    tuDepthRange[1] = m_slice->m_sps->quadtreeTULog2MaxSize;
1011
1012
0
    tuDepthRange[0] = x265_clip3(tuDepthRange[0], tuDepthRange[1], log2CUSize - (quadtreeTUMaxDepth - 1 + splitFlag));
1013
0
}
1014
1015
uint32_t CUData::getCtxSkipFlag(uint32_t absPartIdx) const
1016
0
{
1017
0
    const CUData* tempCU;
1018
0
    uint32_t tempPartIdx;
1019
0
    uint32_t ctx;
1020
1021
    // Get BCBP of left PU
1022
0
    tempCU = getPULeft(tempPartIdx, m_absIdxInCTU + absPartIdx);
1023
0
    ctx    = tempCU ? tempCU->isSkipped(tempPartIdx) : 0;
1024
1025
    // Get BCBP of above PU
1026
0
    tempCU = getPUAbove(tempPartIdx, m_absIdxInCTU + absPartIdx);
1027
0
    ctx   += tempCU ? tempCU->isSkipped(tempPartIdx) : 0;
1028
1029
0
    return ctx;
1030
0
}
1031
1032
bool CUData::setQPSubCUs(int8_t qp, uint32_t absPartIdx, uint32_t depth)
1033
978
{
1034
978
    uint32_t curPartNumb = m_encData->m_param->num4x4Partitions >> (depth << 1);
1035
978
    uint32_t curPartNumQ = curPartNumb >> 2;
1036
1037
978
    if (m_cuDepth[absPartIdx] > depth)
1038
489
    {
1039
489
        for (uint32_t subPartIdx = 0; subPartIdx < 4; subPartIdx++)
1040
489
            if (setQPSubCUs(qp, absPartIdx + subPartIdx * curPartNumQ, depth + 1))
1041
489
                return true;
1042
489
    }
1043
489
    else
1044
489
    {
1045
489
        if (getQtRootCbf(absPartIdx))
1046
489
            return true;
1047
0
        else
1048
0
            setQPSubParts(qp, absPartIdx, depth);
1049
489
    }
1050
1051
0
    return false;
1052
978
}
1053
1054
void CUData::setPUInterDir(uint8_t dir, uint32_t absPartIdx, uint32_t puIdx)
1055
0
{
1056
0
    uint32_t curPartNumQ = m_numPartitions >> 2;
1057
0
    X265_CHECK(puIdx < 2, "unexpected part unit index\n");
1058
1059
0
    switch (m_partSize[absPartIdx])
1060
0
    {
1061
0
    case SIZE_2Nx2N:
1062
0
        memset(m_interDir + absPartIdx, dir, 4 * curPartNumQ);
1063
0
        break;
1064
0
    case SIZE_2NxN:
1065
0
        memset(m_interDir + absPartIdx, dir, 2 * curPartNumQ);
1066
0
        break;
1067
0
    case SIZE_Nx2N:
1068
0
        memset(m_interDir + absPartIdx, dir, curPartNumQ);
1069
0
        memset(m_interDir + absPartIdx + 2 * curPartNumQ, dir, curPartNumQ);
1070
0
        break;
1071
0
    case SIZE_NxN:
1072
0
        memset(m_interDir + absPartIdx, dir, curPartNumQ);
1073
0
        break;
1074
0
    case SIZE_2NxnU:
1075
0
        if (!puIdx)
1076
0
        {
1077
0
            memset(m_interDir + absPartIdx, dir, (curPartNumQ >> 1));
1078
0
            memset(m_interDir + absPartIdx + curPartNumQ, dir, (curPartNumQ >> 1));
1079
0
        }
1080
0
        else
1081
0
        {
1082
0
            memset(m_interDir + absPartIdx, dir, (curPartNumQ >> 1));
1083
0
            memset(m_interDir + absPartIdx + curPartNumQ, dir, ((curPartNumQ >> 1) + (curPartNumQ << 1)));
1084
0
        }
1085
0
        break;
1086
0
    case SIZE_2NxnD:
1087
0
        if (!puIdx)
1088
0
        {
1089
0
            memset(m_interDir + absPartIdx, dir, ((curPartNumQ << 1) + (curPartNumQ >> 1)));
1090
0
            memset(m_interDir + absPartIdx + (curPartNumQ << 1) + curPartNumQ, dir, (curPartNumQ >> 1));
1091
0
        }
1092
0
        else
1093
0
        {
1094
0
            memset(m_interDir + absPartIdx, dir, (curPartNumQ >> 1));
1095
0
            memset(m_interDir + absPartIdx + curPartNumQ, dir, (curPartNumQ >> 1));
1096
0
        }
1097
0
        break;
1098
0
    case SIZE_nLx2N:
1099
0
        if (!puIdx)
1100
0
        {
1101
0
            memset(m_interDir + absPartIdx, dir, (curPartNumQ >> 2));
1102
0
            memset(m_interDir + absPartIdx + (curPartNumQ >> 1), dir, (curPartNumQ >> 2));
1103
0
            memset(m_interDir + absPartIdx + (curPartNumQ << 1), dir, (curPartNumQ >> 2));
1104
0
            memset(m_interDir + absPartIdx + (curPartNumQ << 1) + (curPartNumQ >> 1), dir, (curPartNumQ >> 2));
1105
0
        }
1106
0
        else
1107
0
        {
1108
0
            memset(m_interDir + absPartIdx, dir, (curPartNumQ >> 2));
1109
0
            memset(m_interDir + absPartIdx + (curPartNumQ >> 1), dir, (curPartNumQ + (curPartNumQ >> 2)));
1110
0
            memset(m_interDir + absPartIdx + (curPartNumQ << 1), dir, (curPartNumQ >> 2));
1111
0
            memset(m_interDir + absPartIdx + (curPartNumQ << 1) + (curPartNumQ >> 1), dir, (curPartNumQ + (curPartNumQ >> 2)));
1112
0
        }
1113
0
        break;
1114
0
    case SIZE_nRx2N:
1115
0
        if (!puIdx)
1116
0
        {
1117
0
            memset(m_interDir + absPartIdx, dir, (curPartNumQ + (curPartNumQ >> 2)));
1118
0
            memset(m_interDir + absPartIdx + curPartNumQ + (curPartNumQ >> 1), dir, (curPartNumQ >> 2));
1119
0
            memset(m_interDir + absPartIdx + (curPartNumQ << 1), dir, (curPartNumQ + (curPartNumQ >> 2)));
1120
0
            memset(m_interDir + absPartIdx + (curPartNumQ << 1) + curPartNumQ + (curPartNumQ >> 1), dir, (curPartNumQ >> 2));
1121
0
        }
1122
0
        else
1123
0
        {
1124
0
            memset(m_interDir + absPartIdx, dir, (curPartNumQ >> 2));
1125
0
            memset(m_interDir + absPartIdx + (curPartNumQ >> 1), dir, (curPartNumQ >> 2));
1126
0
            memset(m_interDir + absPartIdx + (curPartNumQ << 1), dir, (curPartNumQ >> 2));
1127
0
            memset(m_interDir + absPartIdx + (curPartNumQ << 1) + (curPartNumQ >> 1), dir, (curPartNumQ >> 2));
1128
0
        }
1129
0
        break;
1130
0
    default:
1131
0
        X265_CHECK(0, "unexpected part type\n");
1132
0
        break;
1133
0
    }
1134
0
}
1135
1136
template<typename T>
1137
void CUData::setAllPU(T* p, const T& val, int absPartIdx, int puIdx)
1138
0
{
1139
0
    int i;
1140
1141
0
    p += absPartIdx;
1142
0
    int numElements = m_numPartitions;
1143
1144
0
    switch (m_partSize[absPartIdx])
1145
0
    {
1146
0
    case SIZE_2Nx2N:
1147
0
        for (i = 0; i < numElements; i++)
1148
0
            p[i] = val;
1149
0
        break;
1150
1151
0
    case SIZE_2NxN:
1152
0
        numElements >>= 1;
1153
0
        for (i = 0; i < numElements; i++)
1154
0
            p[i] = val;
1155
0
        break;
1156
1157
0
    case SIZE_Nx2N:
1158
0
        numElements >>= 2;
1159
0
        for (i = 0; i < numElements; i++)
1160
0
        {
1161
0
            p[i] = val;
1162
0
            p[i + 2 * numElements] = val;
1163
0
        }
1164
0
        break;
1165
1166
0
    case SIZE_2NxnU:
1167
0
    {
1168
0
        int curPartNumQ = numElements >> 2;
1169
0
        if (!puIdx)
1170
0
        {
1171
0
            T *pT  = p;
1172
0
            T *pT2 = p + curPartNumQ;
1173
0
            for (i = 0; i < (curPartNumQ >> 1); i++)
1174
0
            {
1175
0
                pT[i] = val;
1176
0
                pT2[i] = val;
1177
0
            }
1178
0
        }
1179
0
        else
1180
0
        {
1181
0
            T *pT  = p;
1182
0
            for (i = 0; i < (curPartNumQ >> 1); i++)
1183
0
                pT[i] = val;
1184
1185
0
            pT = p + curPartNumQ;
1186
0
            for (i = 0; i < ((curPartNumQ >> 1) + (curPartNumQ << 1)); i++)
1187
0
                pT[i] = val;
1188
0
        }
1189
0
        break;
1190
0
    }
1191
1192
0
    case SIZE_2NxnD:
1193
0
    {
1194
0
        int curPartNumQ = numElements >> 2;
1195
0
        if (!puIdx)
1196
0
        {
1197
0
            T *pT  = p;
1198
0
            for (i = 0; i < ((curPartNumQ >> 1) + (curPartNumQ << 1)); i++)
1199
0
                pT[i] = val;
1200
1201
0
            pT = p + (numElements - curPartNumQ);
1202
0
            for (i = 0; i < (curPartNumQ >> 1); i++)
1203
0
                pT[i] = val;
1204
0
        }
1205
0
        else
1206
0
        {
1207
0
            T *pT  = p;
1208
0
            T *pT2 = p + curPartNumQ;
1209
0
            for (i = 0; i < (curPartNumQ >> 1); i++)
1210
0
            {
1211
0
                pT[i] = val;
1212
0
                pT2[i] = val;
1213
0
            }
1214
0
        }
1215
0
        break;
1216
0
    }
1217
1218
0
    case SIZE_nLx2N:
1219
0
    {
1220
0
        int curPartNumQ = numElements >> 2;
1221
0
        if (!puIdx)
1222
0
        {
1223
0
            T *pT  = p;
1224
0
            T *pT2 = p + (curPartNumQ << 1);
1225
0
            T *pT3 = p + (curPartNumQ >> 1);
1226
0
            T *pT4 = p + (curPartNumQ << 1) + (curPartNumQ >> 1);
1227
1228
0
            for (i = 0; i < (curPartNumQ >> 2); i++)
1229
0
            {
1230
0
                pT[i] = val;
1231
0
                pT2[i] = val;
1232
0
                pT3[i] = val;
1233
0
                pT4[i] = val;
1234
0
            }
1235
0
        }
1236
0
        else
1237
0
        {
1238
0
            T *pT  = p;
1239
0
            T *pT2 = p + (curPartNumQ << 1);
1240
0
            for (i = 0; i < (curPartNumQ >> 2); i++)
1241
0
            {
1242
0
                pT[i] = val;
1243
0
                pT2[i] = val;
1244
0
            }
1245
1246
0
            pT  = p + (curPartNumQ >> 1);
1247
0
            pT2 = p + (curPartNumQ << 1) + (curPartNumQ >> 1);
1248
0
            for (i = 0; i < ((curPartNumQ >> 2) + curPartNumQ); i++)
1249
0
            {
1250
0
                pT[i] = val;
1251
0
                pT2[i] = val;
1252
0
            }
1253
0
        }
1254
0
        break;
1255
0
    }
1256
1257
0
    case SIZE_nRx2N:
1258
0
    {
1259
0
        int curPartNumQ = numElements >> 2;
1260
0
        if (!puIdx)
1261
0
        {
1262
0
            T *pT  = p;
1263
0
            T *pT2 = p + (curPartNumQ << 1);
1264
0
            for (i = 0; i < ((curPartNumQ >> 2) + curPartNumQ); i++)
1265
0
            {
1266
0
                pT[i] = val;
1267
0
                pT2[i] = val;
1268
0
            }
1269
1270
0
            pT  = p + curPartNumQ + (curPartNumQ >> 1);
1271
0
            pT2 = p + numElements - curPartNumQ + (curPartNumQ >> 1);
1272
0
            for (i = 0; i < (curPartNumQ >> 2); i++)
1273
0
            {
1274
0
                pT[i] = val;
1275
0
                pT2[i] = val;
1276
0
            }
1277
0
        }
1278
0
        else
1279
0
        {
1280
0
            T *pT  = p;
1281
0
            T *pT2 = p + (curPartNumQ >> 1);
1282
0
            T *pT3 = p + (curPartNumQ << 1);
1283
0
            T *pT4 = p + (curPartNumQ << 1) + (curPartNumQ >> 1);
1284
0
            for (i = 0; i < (curPartNumQ >> 2); i++)
1285
0
            {
1286
0
                pT[i] = val;
1287
0
                pT2[i] = val;
1288
0
                pT3[i] = val;
1289
0
                pT4[i] = val;
1290
0
            }
1291
0
        }
1292
0
        break;
1293
0
    }
1294
1295
0
    case SIZE_NxN:
1296
0
    default:
1297
0
        X265_CHECK(0, "unknown partition type\n");
1298
0
        break;
1299
0
    }
1300
0
}
Unexecuted instantiation: void x265::CUData::setAllPU<x265::MV>(x265::MV*, x265::MV const&, int, int)
Unexecuted instantiation: void x265::CUData::setAllPU<signed char>(signed char*, signed char const&, int, int)
1301
1302
void CUData::setPUMv(int list, const MV& mv, int absPartIdx, int puIdx)
1303
0
{
1304
0
    setAllPU(m_mv[list], mv, absPartIdx, puIdx);
1305
0
}
1306
1307
void CUData::setPURefIdx(int list, int8_t refIdx, int absPartIdx, int puIdx)
1308
0
{
1309
0
    setAllPU(m_refIdx[list], refIdx, absPartIdx, puIdx);
1310
0
}
1311
1312
void CUData::getPartIndexAndSize(uint32_t partIdx, uint32_t& outPartAddr, int& outWidth, int& outHeight) const
1313
0
{
1314
0
    int cuSize = 1 << m_log2CUSize[0];
1315
0
    int partType = m_partSize[0];
1316
1317
0
    int tmp = partTable[partType][partIdx][0];
1318
0
    outWidth = ((tmp >> 4) * cuSize) >> 2;
1319
0
    outHeight = ((tmp & 0xF) * cuSize) >> 2;
1320
0
    outPartAddr = (partAddrTable[partType][partIdx] * m_numPartitions) >> 4;
1321
0
}
1322
1323
void CUData::getMvField(const CUData* cu, uint32_t absPartIdx, int picList, MVField& outMvField) const
1324
0
{
1325
0
    if (cu)
1326
0
    {
1327
0
        outMvField.mv = cu->m_mv[picList][absPartIdx];
1328
0
        outMvField.refIdx = cu->m_refIdx[picList][absPartIdx];
1329
0
    }
1330
0
    else
1331
0
    {
1332
        // OUT OF BOUNDARY
1333
0
        outMvField.mv = 0;
1334
0
        outMvField.refIdx = REF_NOT_VALID;
1335
0
    }
1336
0
}
1337
1338
void CUData::deriveLeftRightTopIdx(uint32_t partIdx, uint32_t& partIdxLT, uint32_t& partIdxRT) const
1339
0
{
1340
0
    partIdxLT = m_absIdxInCTU;
1341
0
    partIdxRT = g_rasterToZscan[g_zscanToRaster[partIdxLT] + (1 << (m_log2CUSize[0] - LOG2_UNIT_SIZE)) - 1];
1342
1343
0
    switch (m_partSize[0])
1344
0
    {
1345
0
    case SIZE_2Nx2N: break;
1346
0
    case SIZE_2NxN:
1347
0
        partIdxLT += (partIdx == 0) ? 0 : m_numPartitions >> 1;
1348
0
        partIdxRT += (partIdx == 0) ? 0 : m_numPartitions >> 1;
1349
0
        break;
1350
0
    case SIZE_Nx2N:
1351
0
        partIdxLT += (partIdx == 0) ? 0 : m_numPartitions >> 2;
1352
0
        partIdxRT -= (partIdx == 1) ? 0 : m_numPartitions >> 2;
1353
0
        break;
1354
0
    case SIZE_NxN:
1355
0
        partIdxLT += (m_numPartitions >> 2) * partIdx;
1356
0
        partIdxRT +=  (m_numPartitions >> 2) * (partIdx - 1);
1357
0
        break;
1358
0
    case SIZE_2NxnU:
1359
0
        partIdxLT += (partIdx == 0) ? 0 : m_numPartitions >> 3;
1360
0
        partIdxRT += (partIdx == 0) ? 0 : m_numPartitions >> 3;
1361
0
        break;
1362
0
    case SIZE_2NxnD:
1363
0
        partIdxLT += (partIdx == 0) ? 0 : (m_numPartitions >> 1) + (m_numPartitions >> 3);
1364
0
        partIdxRT += (partIdx == 0) ? 0 : (m_numPartitions >> 1) + (m_numPartitions >> 3);
1365
0
        break;
1366
0
    case SIZE_nLx2N:
1367
0
        partIdxLT += (partIdx == 0) ? 0 : m_numPartitions >> 4;
1368
0
        partIdxRT -= (partIdx == 1) ? 0 : (m_numPartitions >> 2) + (m_numPartitions >> 4);
1369
0
        break;
1370
0
    case SIZE_nRx2N:
1371
0
        partIdxLT += (partIdx == 0) ? 0 : (m_numPartitions >> 2) + (m_numPartitions >> 4);
1372
0
        partIdxRT -= (partIdx == 1) ? 0 : m_numPartitions >> 4;
1373
0
        break;
1374
0
    default:
1375
0
        X265_CHECK(0, "unexpected part index\n");
1376
0
        break;
1377
0
    }
1378
0
}
1379
1380
uint32_t CUData::deriveLeftBottomIdx(uint32_t puIdx) const
1381
0
{
1382
0
    uint32_t outPartIdxLB;
1383
0
    outPartIdxLB = g_rasterToZscan[g_zscanToRaster[m_absIdxInCTU] + (((1 << (m_log2CUSize[0] - LOG2_UNIT_SIZE - 1)) - 1) << LOG2_RASTER_SIZE)];
1384
1385
0
    switch (m_partSize[0])
1386
0
    {
1387
0
    case SIZE_2Nx2N:
1388
0
        outPartIdxLB += m_numPartitions >> 1;
1389
0
        break;
1390
0
    case SIZE_2NxN:
1391
0
        outPartIdxLB += puIdx ? m_numPartitions >> 1 : 0;
1392
0
        break;
1393
0
    case SIZE_Nx2N:
1394
0
        outPartIdxLB += puIdx ? (m_numPartitions >> 2) * 3 : m_numPartitions >> 1;
1395
0
        break;
1396
0
    case SIZE_NxN:
1397
0
        outPartIdxLB += (m_numPartitions >> 2) * puIdx;
1398
0
        break;
1399
0
    case SIZE_2NxnU:
1400
0
        outPartIdxLB += puIdx ? m_numPartitions >> 1 : -((int)m_numPartitions >> 3);
1401
0
        break;
1402
0
    case SIZE_2NxnD:
1403
0
        outPartIdxLB += puIdx ? m_numPartitions >> 1 : (m_numPartitions >> 2) + (m_numPartitions >> 3);
1404
0
        break;
1405
0
    case SIZE_nLx2N:
1406
0
        outPartIdxLB += puIdx ? (m_numPartitions >> 1) + (m_numPartitions >> 4) : m_numPartitions >> 1;
1407
0
        break;
1408
0
    case SIZE_nRx2N:
1409
0
        outPartIdxLB += puIdx ? (m_numPartitions >> 1) + (m_numPartitions >> 2) + (m_numPartitions >> 4) : m_numPartitions >> 1;
1410
0
        break;
1411
0
    default:
1412
0
        X265_CHECK(0, "unexpected part index\n");
1413
0
        break;
1414
0
    }
1415
0
    return outPartIdxLB;
1416
0
}
1417
1418
/* Derives the partition index of neighboring bottom right block */
1419
uint32_t CUData::deriveRightBottomIdx(uint32_t puIdx) const
1420
0
{
1421
0
    uint32_t outPartIdxRB;
1422
0
    outPartIdxRB = g_rasterToZscan[g_zscanToRaster[m_absIdxInCTU] +
1423
0
                                   (((1 << (m_log2CUSize[0] - LOG2_UNIT_SIZE - 1)) - 1) << LOG2_RASTER_SIZE) +
1424
0
                                   (1 << (m_log2CUSize[0] - LOG2_UNIT_SIZE)) - 1];
1425
1426
0
    switch (m_partSize[0])
1427
0
    {
1428
0
    case SIZE_2Nx2N:
1429
0
        outPartIdxRB += m_numPartitions >> 1;
1430
0
        break;
1431
0
    case SIZE_2NxN:
1432
0
        outPartIdxRB += puIdx ? m_numPartitions >> 1 : 0;
1433
0
        break;
1434
0
    case SIZE_Nx2N:
1435
0
        outPartIdxRB += puIdx ? m_numPartitions >> 1 : m_numPartitions >> 2;
1436
0
        break;
1437
0
    case SIZE_NxN:
1438
0
        outPartIdxRB += (m_numPartitions >> 2) * (puIdx - 1);
1439
0
        break;
1440
0
    case SIZE_2NxnU:
1441
0
        outPartIdxRB += puIdx ? m_numPartitions >> 1 : -((int)m_numPartitions >> 3);
1442
0
        break;
1443
0
    case SIZE_2NxnD:
1444
0
        outPartIdxRB += puIdx ? m_numPartitions >> 1 : (m_numPartitions >> 2) + (m_numPartitions >> 3);
1445
0
        break;
1446
0
    case SIZE_nLx2N:
1447
0
        outPartIdxRB += puIdx ? m_numPartitions >> 1 : (m_numPartitions >> 3) + (m_numPartitions >> 4);
1448
0
        break;
1449
0
    case SIZE_nRx2N:
1450
0
        outPartIdxRB += puIdx ? m_numPartitions >> 1 : (m_numPartitions >> 2) + (m_numPartitions >> 3) + (m_numPartitions >> 4);
1451
0
        break;
1452
0
    default:
1453
0
        X265_CHECK(0, "unexpected part index\n");
1454
0
        break;
1455
0
    }
1456
0
    return outPartIdxRB;
1457
0
}
1458
1459
bool CUData::hasEqualMotion(uint32_t absPartIdx, const CUData& candCU, uint32_t candAbsPartIdx) const
1460
0
{
1461
0
    if (m_interDir[absPartIdx] != candCU.m_interDir[candAbsPartIdx])
1462
0
        return false;
1463
1464
0
    for (uint32_t refListIdx = 0; refListIdx < 2; refListIdx++)
1465
0
    {
1466
0
        if (m_interDir[absPartIdx] & (1 << refListIdx))
1467
0
        {
1468
0
            if (m_mv[refListIdx][absPartIdx] != candCU.m_mv[refListIdx][candAbsPartIdx] ||
1469
0
                m_refIdx[refListIdx][absPartIdx] != candCU.m_refIdx[refListIdx][candAbsPartIdx])
1470
0
                return false;
1471
0
        }
1472
0
    }
1473
1474
0
    return true;
1475
0
}
1476
1477
/* Construct list of merging candidates, returns count */
1478
uint32_t CUData::getInterMergeCandidates(uint32_t absPartIdx, uint32_t puIdx, MVField(*candMvField)[2], uint8_t* candDir) const
1479
0
{
1480
0
    uint32_t absPartAddr = m_absIdxInCTU + absPartIdx;
1481
0
    const bool isInterB = m_slice->isInterB();
1482
1483
0
    const uint32_t maxNumMergeCand = m_slice->m_maxNumMergeCand;
1484
1485
0
    for (uint32_t i = 0; i < maxNumMergeCand; ++i)
1486
0
    {
1487
0
        candMvField[i][0].mv = 0;
1488
0
        candMvField[i][1].mv = 0;
1489
0
        candMvField[i][0].refIdx = REF_NOT_VALID;
1490
0
        candMvField[i][1].refIdx = REF_NOT_VALID;
1491
0
    }
1492
1493
    /* calculate the location of upper-left corner pixel and size of the current PU */
1494
0
    int xP, yP, nPSW, nPSH;
1495
1496
0
    int cuSize = 1 << m_log2CUSize[0];
1497
0
    int partMode = m_partSize[0];
1498
1499
0
    int tmp = partTable[partMode][puIdx][0];
1500
0
    nPSW = ((tmp >> 4) * cuSize) >> 2;
1501
0
    nPSH = ((tmp & 0xF) * cuSize) >> 2;
1502
1503
0
    tmp = partTable[partMode][puIdx][1];
1504
0
    xP = ((tmp >> 4) * cuSize) >> 2;
1505
0
    yP = ((tmp & 0xF) * cuSize) >> 2;
1506
1507
0
    uint32_t count = 0;
1508
1509
0
    uint32_t partIdxLT, partIdxRT, partIdxLB = deriveLeftBottomIdx(puIdx);
1510
0
    PartSize curPS = (PartSize)m_partSize[absPartIdx];
1511
    
1512
    // left
1513
0
    uint32_t leftPartIdx = 0;
1514
0
    const CUData* cuLeft = getPULeft(leftPartIdx, partIdxLB);
1515
0
    bool isAvailableA1 = cuLeft &&
1516
0
        cuLeft->isDiffMER(xP - 1, yP + nPSH - 1, xP, yP) &&
1517
0
        !(puIdx == 1 && (curPS == SIZE_Nx2N || curPS == SIZE_nLx2N || curPS == SIZE_nRx2N)) &&
1518
0
        cuLeft->isInter(leftPartIdx);
1519
0
    if (isAvailableA1)
1520
0
    {
1521
        // get Inter Dir
1522
0
        candDir[count] = cuLeft->m_interDir[leftPartIdx];
1523
        // get Mv from Left
1524
0
        cuLeft->getMvField(cuLeft, leftPartIdx, 0, candMvField[count][0]);
1525
0
        if (isInterB)
1526
0
            cuLeft->getMvField(cuLeft, leftPartIdx, 1, candMvField[count][1]);
1527
1528
0
        if (++count == maxNumMergeCand)
1529
0
            return maxNumMergeCand;
1530
0
    }
1531
1532
0
    deriveLeftRightTopIdx(puIdx, partIdxLT, partIdxRT);
1533
1534
    // above
1535
0
    uint32_t abovePartIdx = 0;
1536
0
    const CUData* cuAbove = getPUAbove(abovePartIdx, partIdxRT);
1537
0
    bool isAvailableB1 = cuAbove &&
1538
0
        cuAbove->isDiffMER(xP + nPSW - 1, yP - 1, xP, yP) &&
1539
0
        !(puIdx == 1 && (curPS == SIZE_2NxN || curPS == SIZE_2NxnU || curPS == SIZE_2NxnD)) &&
1540
0
        cuAbove->isInter(abovePartIdx);
1541
0
    if (isAvailableB1 && (!isAvailableA1 || !cuLeft->hasEqualMotion(leftPartIdx, *cuAbove, abovePartIdx)))
1542
0
    {
1543
        // get Inter Dir
1544
0
        candDir[count] = cuAbove->m_interDir[abovePartIdx];
1545
        // get Mv from Left
1546
0
        cuAbove->getMvField(cuAbove, abovePartIdx, 0, candMvField[count][0]);
1547
0
        if (isInterB)
1548
0
            cuAbove->getMvField(cuAbove, abovePartIdx, 1, candMvField[count][1]);
1549
1550
0
        if (++count == maxNumMergeCand)
1551
0
            return maxNumMergeCand;
1552
0
    }
1553
1554
    // above right
1555
0
    uint32_t aboveRightPartIdx = 0;
1556
0
    const CUData* cuAboveRight = getPUAboveRight(aboveRightPartIdx, partIdxRT);
1557
0
    bool isAvailableB0 = cuAboveRight &&
1558
0
        cuAboveRight->isDiffMER(xP + nPSW, yP - 1, xP, yP) &&
1559
0
        cuAboveRight->isInter(aboveRightPartIdx);
1560
0
    if (isAvailableB0 && (!isAvailableB1 || !cuAbove->hasEqualMotion(abovePartIdx, *cuAboveRight, aboveRightPartIdx)))
1561
0
    {
1562
        // get Inter Dir
1563
0
        candDir[count] = cuAboveRight->m_interDir[aboveRightPartIdx];
1564
        // get Mv from Left
1565
0
        cuAboveRight->getMvField(cuAboveRight, aboveRightPartIdx, 0, candMvField[count][0]);
1566
0
        if (isInterB)
1567
0
            cuAboveRight->getMvField(cuAboveRight, aboveRightPartIdx, 1, candMvField[count][1]);
1568
1569
0
        if (++count == maxNumMergeCand)
1570
0
            return maxNumMergeCand;
1571
0
    }
1572
1573
    // left bottom
1574
0
    uint32_t leftBottomPartIdx = 0;
1575
0
    const CUData* cuLeftBottom = this->getPUBelowLeft(leftBottomPartIdx, partIdxLB);
1576
0
    bool isAvailableA0 = cuLeftBottom &&
1577
0
        cuLeftBottom->isDiffMER(xP - 1, yP + nPSH, xP, yP) &&
1578
0
        cuLeftBottom->isInter(leftBottomPartIdx);
1579
0
    if (isAvailableA0 && (!isAvailableA1 || !cuLeft->hasEqualMotion(leftPartIdx, *cuLeftBottom, leftBottomPartIdx)))
1580
0
    {
1581
        // get Inter Dir
1582
0
        candDir[count] = cuLeftBottom->m_interDir[leftBottomPartIdx];
1583
        // get Mv from Left
1584
0
        cuLeftBottom->getMvField(cuLeftBottom, leftBottomPartIdx, 0, candMvField[count][0]);
1585
0
        if (isInterB)
1586
0
            cuLeftBottom->getMvField(cuLeftBottom, leftBottomPartIdx, 1, candMvField[count][1]);
1587
1588
0
        if (++count == maxNumMergeCand)
1589
0
            return maxNumMergeCand;
1590
0
    }
1591
1592
    // above left
1593
0
    if (count < 4)
1594
0
    {
1595
0
        uint32_t aboveLeftPartIdx = 0;
1596
0
        const CUData* cuAboveLeft = getPUAboveLeft(aboveLeftPartIdx, absPartAddr);
1597
0
        bool isAvailableB2 = cuAboveLeft &&
1598
0
            cuAboveLeft->isDiffMER(xP - 1, yP - 1, xP, yP) &&
1599
0
            cuAboveLeft->isInter(aboveLeftPartIdx);
1600
0
        if (isAvailableB2 && (!isAvailableA1 || !cuLeft->hasEqualMotion(leftPartIdx, *cuAboveLeft, aboveLeftPartIdx))
1601
0
            && (!isAvailableB1 || !cuAbove->hasEqualMotion(abovePartIdx, *cuAboveLeft, aboveLeftPartIdx)))
1602
0
        {
1603
            // get Inter Dir
1604
0
            candDir[count] = cuAboveLeft->m_interDir[aboveLeftPartIdx];
1605
            // get Mv from Left
1606
0
            cuAboveLeft->getMvField(cuAboveLeft, aboveLeftPartIdx, 0, candMvField[count][0]);
1607
0
            if (isInterB)
1608
0
                cuAboveLeft->getMvField(cuAboveLeft, aboveLeftPartIdx, 1, candMvField[count][1]);
1609
1610
0
            if (++count == maxNumMergeCand)
1611
0
                return maxNumMergeCand;
1612
0
        }
1613
0
    }
1614
#if ENABLE_SCC_EXT
1615
    if (m_slice->m_bTemporalMvp)
1616
#else
1617
0
    if (m_slice->m_sps->bTemporalMVPEnabled)
1618
0
#endif
1619
0
    {
1620
0
        uint32_t partIdxRB = deriveRightBottomIdx(puIdx);
1621
0
        MV colmv;
1622
0
        int ctuIdx = -1;
1623
1624
        // image boundary check
1625
0
        if (m_encData->getPicCTU(m_cuAddr)->m_cuPelX + g_zscanToPelX[partIdxRB] + UNIT_SIZE < m_slice->m_sps->picWidthInLumaSamples &&
1626
0
            m_encData->getPicCTU(m_cuAddr)->m_cuPelY + g_zscanToPelY[partIdxRB] + UNIT_SIZE < m_slice->m_sps->picHeightInLumaSamples)
1627
0
        {
1628
0
            uint32_t absPartIdxRB = g_zscanToRaster[partIdxRB];
1629
0
            uint32_t numUnits = s_numPartInCUSize;
1630
0
            bool bNotLastCol = lessThanCol(absPartIdxRB, numUnits - 1); // is not at the last column of CTU
1631
0
            bool bNotLastRow = lessThanRow(absPartIdxRB, numUnits - 1); // is not at the last row    of CTU
1632
1633
0
            if (bNotLastCol && bNotLastRow)
1634
0
            {
1635
0
                absPartAddr = g_rasterToZscan[absPartIdxRB + RASTER_SIZE + 1];
1636
0
                ctuIdx = m_cuAddr;
1637
0
            }
1638
0
            else if (bNotLastCol)
1639
0
                absPartAddr = g_rasterToZscan[(absPartIdxRB + 1) & (numUnits - 1)];
1640
0
            else if (bNotLastRow)
1641
0
            {
1642
0
                absPartAddr = g_rasterToZscan[absPartIdxRB + RASTER_SIZE - numUnits + 1];
1643
0
                ctuIdx = m_cuAddr + 1;
1644
0
            }
1645
0
            else // is the right bottom corner of CTU
1646
0
                absPartAddr = 0;
1647
0
        }
1648
1649
0
        int maxList = isInterB ? 2 : 1;
1650
0
        int dir = 0, refIdx = 0;
1651
0
        for (int list = 0; list < maxList; list++)
1652
0
        {
1653
0
            bool bExistMV = ctuIdx >= 0 && getColMVP(colmv, refIdx, list, ctuIdx, absPartAddr);
1654
0
            if (!bExistMV)
1655
0
            {
1656
0
                uint32_t partIdxCenter = deriveCenterIdx(puIdx);
1657
0
                bExistMV = getColMVP(colmv, refIdx, list, m_cuAddr, partIdxCenter);
1658
0
            }
1659
0
            if (bExistMV)
1660
0
            {
1661
0
                dir |= (1 << list);
1662
0
                candMvField[count][list].mv = colmv;
1663
0
                candMvField[count][list].refIdx = refIdx;
1664
0
            }
1665
0
        }
1666
1667
0
        if (dir != 0)
1668
0
        {
1669
0
            candDir[count] = (uint8_t)dir;
1670
1671
0
            if (++count == maxNumMergeCand)
1672
0
                return maxNumMergeCand;
1673
0
        }
1674
0
    }
1675
1676
0
    if (isInterB)
1677
0
    {
1678
0
        const uint32_t cutoff = count * (count - 1);
1679
0
        uint32_t priorityList0 = 0xEDC984; // { 0, 1, 0, 2, 1, 2, 0, 3, 1, 3, 2, 3 }
1680
0
        uint32_t priorityList1 = 0xB73621; // { 1, 0, 2, 0, 2, 1, 3, 0, 3, 1, 3, 2 }
1681
1682
0
        for (uint32_t idx = 0; idx < cutoff; idx++, priorityList0 >>= 2, priorityList1 >>= 2)
1683
0
        {
1684
0
            int i = priorityList0 & 3;
1685
0
            int j = priorityList1 & 3;
1686
1687
0
            if ((candDir[i] & 0x1) && (candDir[j] & 0x2))
1688
0
            {
1689
                // get Mv from cand[i] and cand[j]
1690
0
                int refIdxL0 = candMvField[i][0].refIdx;
1691
0
                int refIdxL1 = candMvField[j][1].refIdx;
1692
0
                int refPOCL0 = m_slice->m_refPOCList[0][refIdxL0];
1693
0
                int refPOCL1 = m_slice->m_refPOCList[1][refIdxL1];
1694
0
                if (!(refPOCL0 == refPOCL1 && candMvField[i][0].mv == candMvField[j][1].mv))
1695
0
                {
1696
0
                    candMvField[count][0].mv = candMvField[i][0].mv;
1697
0
                    candMvField[count][0].refIdx = refIdxL0;
1698
0
                    candMvField[count][1].mv = candMvField[j][1].mv;
1699
0
                    candMvField[count][1].refIdx = refIdxL1;
1700
0
                    candDir[count] = 3;
1701
1702
0
                    if (++count == maxNumMergeCand)
1703
0
                        return maxNumMergeCand;
1704
0
                }
1705
0
            }
1706
0
        }
1707
0
    }
1708
0
    int numRefIdx0 = m_slice->m_numRefIdx[0];
1709
#if ENABLE_SCC_EXT
1710
    if (m_slice->m_param->bEnableSCC)
1711
        numRefIdx0--;
1712
#endif
1713
0
    int numRefIdx = (isInterB) ? X265_MIN(numRefIdx0, m_slice->m_numRefIdx[1]) : numRefIdx0;
1714
0
    int r = 0;
1715
0
    int refcnt = 0;
1716
0
    while (numRefIdx && (count < maxNumMergeCand))
1717
0
    {
1718
0
        candDir[count] = 1;
1719
0
        candMvField[count][0].mv.word = 0;
1720
0
        candMvField[count][0].refIdx = r;
1721
1722
0
        if (isInterB)
1723
0
        {
1724
0
            candDir[count] = 3;
1725
0
            candMvField[count][1].mv.word = 0;
1726
0
            candMvField[count][1].refIdx = r;
1727
0
        }
1728
1729
0
        count++;
1730
1731
0
        if (refcnt == numRefIdx - 1)
1732
0
            r = 0;
1733
0
        else
1734
0
        {
1735
0
            ++r;
1736
0
            ++refcnt;
1737
0
        }
1738
0
    }
1739
1740
0
    return count;
1741
0
}
1742
1743
bool CUData::getMedianColMV(const CUData* colCU, const Frame* colPic, int list, int ref, MV& outMV) const
1744
0
{
1745
0
    int mvCount = 0;
1746
0
    int mvX[MAX_NUM_PARTITIONS], mvY[MAX_NUM_PARTITIONS];
1747
1748
0
    for (uint32_t partIdx = 0; partIdx < colCU->m_numPartitions; partIdx++)
1749
0
    {
1750
0
        uint32_t absPartAddr = partIdx & TMVP_UNIT_MASK;
1751
0
        if (colCU->m_predMode[partIdx] == MODE_NONE || colCU->isIntra(absPartAddr))
1752
0
            continue;
1753
1754
0
        int8_t refIdx = colCU->m_refIdx[list][partIdx];
1755
0
        if (refIdx < 0)
1756
0
            continue;
1757
1758
0
        MV rawMv = colCU->m_mv[list][partIdx];
1759
1760
0
        int colPOC = colPic->m_encData->m_slice->m_poc;
1761
0
        int colRefPOC = colPic->m_encData->m_slice->m_refPOCList[list][refIdx];
1762
1763
0
        int curPOC = m_slice->m_poc;
1764
0
        int curRefPOC = this->m_slice->m_refPOCList[list][ref];
1765
1766
0
        MV scaledMv = scaleMvByPOCDist(rawMv, curPOC, curRefPOC, colPOC, colRefPOC);
1767
1768
0
        if (mvCount >= MAX_NUM_PARTITIONS)
1769
0
            break;
1770
1771
0
        mvX[mvCount] = scaledMv.x;
1772
0
        mvY[mvCount] = scaledMv.y;
1773
0
        mvCount++;
1774
0
    }
1775
1776
0
    if (mvCount == 0)
1777
0
        return false;
1778
1779
0
    size_t mid = mvCount >> 1;
1780
1781
0
    std::nth_element(mvX, mvX + mid, mvX + mvCount);
1782
0
    std::nth_element(mvY, mvY + mid, mvY + mvCount);
1783
1784
0
    if (mvCount & 1)
1785
0
    {
1786
0
        outMV.x = mvX[mid];
1787
0
        outMV.y = mvY[mid];
1788
0
    }
1789
0
    else
1790
0
    {
1791
0
        int lowerMaxX = *std::max_element(mvX, mvX + mid);
1792
0
        int lowerMaxY = *std::max_element(mvY, mvY + mid);
1793
1794
0
        outMV.x = (lowerMaxX + mvX[mid]) >> 1;
1795
0
        outMV.y = (lowerMaxY + mvY[mid]) >> 1;
1796
0
    }
1797
1798
0
    return true;
1799
0
}
1800
1801
// Create the PMV list. Called for each reference index.
1802
#if (ENABLE_MULTIVIEW || ENABLE_SCC_EXT)
1803
int CUData::getPMV(InterNeighbourMV* neighbours, uint32_t picList, uint32_t refIdx, MV* amvpCand, MV* pmv, uint32_t puIdx, uint32_t absPartIdx) const
1804
#else
1805
int CUData::getPMV(InterNeighbourMV* neighbours, uint32_t picList, uint32_t refIdx, MV* amvpCand, MV* pmv) const
1806
#endif
1807
0
{
1808
0
    MV directMV[MD_ABOVE_LEFT + 1];
1809
0
    MV indirectMV[MD_ABOVE_LEFT + 1];
1810
0
    bool validDirect[MD_ABOVE_LEFT + 1];
1811
0
    bool validIndirect[MD_ABOVE_LEFT + 1];
1812
1813
#if (ENABLE_MULTIVIEW || ENABLE_SCC_EXT)
1814
    if (m_slice->m_param->numViews > 1 || m_slice->m_param->bEnableSCC)
1815
    {
1816
        // Left candidate.
1817
        if ((neighbours + MD_BELOW_LEFT)->isAvailable || (neighbours + MD_LEFT)->isAvailable)
1818
        {
1819
            validIndirect[MD_ABOVE_RIGHT] = validIndirect[MD_ABOVE] = validIndirect[MD_ABOVE_LEFT] = false;
1820
1821
            validDirect[MD_BELOW_LEFT] = getDirectPMV(directMV[MD_BELOW_LEFT], neighbours + MD_BELOW_LEFT, picList, refIdx);
1822
            validDirect[MD_LEFT] = getDirectPMV(directMV[MD_LEFT], neighbours + MD_LEFT, picList, refIdx);
1823
1824
            validIndirect[MD_BELOW_LEFT] = getIndirectPMV(indirectMV[MD_BELOW_LEFT], neighbours + MD_BELOW_LEFT, picList, refIdx);
1825
            validIndirect[MD_LEFT] = getIndirectPMV(indirectMV[MD_LEFT], neighbours + MD_LEFT, picList, refIdx);
1826
        }
1827
1828
        // Top candidate.
1829
        validDirect[MD_ABOVE_RIGHT] = getDirectPMV(directMV[MD_ABOVE_RIGHT], neighbours + MD_ABOVE_RIGHT, picList, refIdx);
1830
        validDirect[MD_ABOVE] = getDirectPMV(directMV[MD_ABOVE], neighbours + MD_ABOVE, picList, refIdx);
1831
        validDirect[MD_ABOVE_LEFT] = getDirectPMV(directMV[MD_ABOVE_LEFT], neighbours + MD_ABOVE_LEFT, picList, refIdx);
1832
1833
        // Top candidate.
1834
        if (!((neighbours + MD_BELOW_LEFT)->isAvailable || (neighbours + MD_LEFT)->isAvailable))
1835
        {
1836
            validDirect[MD_BELOW_LEFT] = validDirect[MD_LEFT] = validIndirect[MD_BELOW_LEFT] = validIndirect[MD_LEFT] = false;
1837
            validIndirect[MD_ABOVE_RIGHT] = getIndirectPMV(indirectMV[MD_ABOVE_RIGHT], neighbours + MD_ABOVE_RIGHT, picList, refIdx);
1838
            validIndirect[MD_ABOVE] = getIndirectPMV(indirectMV[MD_ABOVE], neighbours + MD_ABOVE, picList, refIdx);
1839
            validIndirect[MD_ABOVE_LEFT] = getIndirectPMV(indirectMV[MD_ABOVE_LEFT], neighbours + MD_ABOVE_LEFT, picList, refIdx);
1840
        }
1841
    }
1842
    else
1843
#endif
1844
0
    {
1845
        // Left candidate.
1846
0
        validDirect[MD_BELOW_LEFT] = getDirectPMV(directMV[MD_BELOW_LEFT], neighbours + MD_BELOW_LEFT, picList, refIdx);
1847
0
        validDirect[MD_LEFT] = getDirectPMV(directMV[MD_LEFT], neighbours + MD_LEFT, picList, refIdx);
1848
        // Top candidate.
1849
0
        validDirect[MD_ABOVE_RIGHT] = getDirectPMV(directMV[MD_ABOVE_RIGHT], neighbours + MD_ABOVE_RIGHT, picList, refIdx);
1850
0
        validDirect[MD_ABOVE] = getDirectPMV(directMV[MD_ABOVE], neighbours + MD_ABOVE, picList, refIdx);
1851
0
        validDirect[MD_ABOVE_LEFT] = getDirectPMV(directMV[MD_ABOVE_LEFT], neighbours + MD_ABOVE_LEFT, picList, refIdx);
1852
1853
        // Left candidate.
1854
0
        validIndirect[MD_BELOW_LEFT] = getIndirectPMV(indirectMV[MD_BELOW_LEFT], neighbours + MD_BELOW_LEFT, picList, refIdx);
1855
0
        validIndirect[MD_LEFT] = getIndirectPMV(indirectMV[MD_LEFT], neighbours + MD_LEFT, picList, refIdx);
1856
        // Top candidate.
1857
0
        validIndirect[MD_ABOVE_RIGHT] = getIndirectPMV(indirectMV[MD_ABOVE_RIGHT], neighbours + MD_ABOVE_RIGHT, picList, refIdx);
1858
0
        validIndirect[MD_ABOVE] = getIndirectPMV(indirectMV[MD_ABOVE], neighbours + MD_ABOVE, picList, refIdx);
1859
0
        validIndirect[MD_ABOVE_LEFT] = getIndirectPMV(indirectMV[MD_ABOVE_LEFT], neighbours + MD_ABOVE_LEFT, picList, refIdx);
1860
0
    }
1861
1862
0
    int num = 0;
1863
    // Left predictor search
1864
0
    if (validDirect[MD_BELOW_LEFT])
1865
0
        amvpCand[num++] = directMV[MD_BELOW_LEFT];
1866
0
    else if (validDirect[MD_LEFT])
1867
0
        amvpCand[num++] = directMV[MD_LEFT];
1868
0
    else if (validIndirect[MD_BELOW_LEFT])
1869
0
        amvpCand[num++] = indirectMV[MD_BELOW_LEFT];
1870
0
    else if (validIndirect[MD_LEFT])
1871
0
        amvpCand[num++] = indirectMV[MD_LEFT];
1872
1873
0
    bool bAddedSmvp = num > 0;
1874
1875
    // Above predictor search
1876
0
    if (validDirect[MD_ABOVE_RIGHT])
1877
0
        amvpCand[num++] = directMV[MD_ABOVE_RIGHT];
1878
0
    else if (validDirect[MD_ABOVE])
1879
0
        amvpCand[num++] = directMV[MD_ABOVE];
1880
0
    else if (validDirect[MD_ABOVE_LEFT])
1881
0
        amvpCand[num++] = directMV[MD_ABOVE_LEFT];
1882
1883
0
    if (!bAddedSmvp)
1884
0
    {
1885
0
        if (validIndirect[MD_ABOVE_RIGHT])
1886
0
            amvpCand[num++] = indirectMV[MD_ABOVE_RIGHT];
1887
0
        else if (validIndirect[MD_ABOVE])
1888
0
            amvpCand[num++] = indirectMV[MD_ABOVE];
1889
0
        else if (validIndirect[MD_ABOVE_LEFT])
1890
0
            amvpCand[num++] = indirectMV[MD_ABOVE_LEFT];
1891
0
    }
1892
1893
0
    int numMvc = 0;
1894
0
    for (int dir = MD_LEFT; dir <= MD_ABOVE_LEFT; dir++)
1895
0
    {
1896
0
        if (validDirect[dir] && directMV[dir].notZero())
1897
0
            pmv[numMvc++] = directMV[dir];
1898
1899
0
        if (validIndirect[dir] && indirectMV[dir].notZero())
1900
0
            pmv[numMvc++] = indirectMV[dir];
1901
0
    }
1902
1903
0
    if (num == 2)
1904
0
        num -= amvpCand[0] == amvpCand[1];
1905
1906
    // Get the collocated candidate. At this step, either the first candidate
1907
    // was found or its value is 0.
1908
#if ENABLE_MULTIVIEW || ENABLE_SCC_EXT
1909
    if (m_slice->m_param->numViews > 1 || m_slice->m_param->bEnableSCC)
1910
    {
1911
        if (m_slice->m_bTemporalMvp && num < 2)
1912
        {
1913
            int refId = refIdx;
1914
            uint32_t absPartAddr = m_absIdxInCTU + absPartIdx;
1915
            uint32_t partIdxRB = deriveRightBottomIdx(puIdx);
1916
1917
            // co-located RightBottom temporal predictor (H)
1918
            int ctuIdx = -1;
1919
1920
            // image boundary check
1921
            if (m_encData->getPicCTU(m_cuAddr)->m_cuPelX + g_zscanToPelX[partIdxRB] + UNIT_SIZE < m_slice->m_sps->picWidthInLumaSamples &&
1922
                m_encData->getPicCTU(m_cuAddr)->m_cuPelY + g_zscanToPelY[partIdxRB] + UNIT_SIZE < m_slice->m_sps->picHeightInLumaSamples)
1923
            {
1924
                uint32_t absPartIdxRB = g_zscanToRaster[partIdxRB];
1925
                uint32_t numUnits = s_numPartInCUSize;
1926
                bool bNotLastCol = lessThanCol(absPartIdxRB, numUnits - 1); // is not at the last column of CTU
1927
                bool bNotLastRow = lessThanRow(absPartIdxRB, numUnits - 1); // is not at the last row    of CTU
1928
1929
                if (bNotLastCol && bNotLastRow)
1930
                {
1931
                    absPartAddr = g_rasterToZscan[absPartIdxRB + RASTER_SIZE + 1];
1932
                    ctuIdx = m_cuAddr;
1933
                }
1934
                else if (bNotLastCol)
1935
                    absPartAddr = g_rasterToZscan[(absPartIdxRB + 1) & (numUnits - 1)];
1936
                else if (bNotLastRow)
1937
                {
1938
                    absPartAddr = g_rasterToZscan[absPartIdxRB + RASTER_SIZE - numUnits + 1];
1939
                    ctuIdx = m_cuAddr + 1;
1940
                }
1941
                else // is the right bottom corner of CTU
1942
                    absPartAddr = 0;
1943
            }
1944
            if (ctuIdx >= 0 && getColMVP(neighbours[MD_COLLOCATED].mv[picList], refId, picList, ctuIdx, absPartAddr))
1945
                pmv[numMvc++] = amvpCand[num++] = neighbours[MD_COLLOCATED].mv[picList];
1946
            else
1947
            {
1948
                uint32_t partIdxCenter = deriveCenterIdx(puIdx);
1949
                uint32_t curCTUIdx = m_cuAddr;
1950
                if (getColMVP(neighbours[MD_COLLOCATED].mv[picList], refId, picList, curCTUIdx, partIdxCenter))
1951
                    pmv[numMvc++] = amvpCand[num++] = neighbours[MD_COLLOCATED].mv[picList];
1952
            }
1953
        }
1954
    }
1955
    else
1956
#endif
1957
0
    {
1958
0
        if (m_slice->m_sps->bTemporalMVPEnabled && num < 2)
1959
0
        {
1960
0
            int tempRefIdx = neighbours[MD_COLLOCATED].refIdx[picList];
1961
0
            if (tempRefIdx != -1)
1962
0
            {
1963
0
                uint32_t cuAddr = neighbours[MD_COLLOCATED].cuAddr[picList];
1964
0
                const Frame* colPic = m_slice->m_refFrameList[m_slice->isInterB() && !m_slice->m_colFromL0Flag][m_slice->m_colRefIdx];
1965
0
                const CUData* colCU = colPic->m_encData->getPicCTU(cuAddr);
1966
1967
                // Scale the vector
1968
0
                int colRefPOC = colCU->m_slice->m_refPOCList[tempRefIdx >> 4][tempRefIdx & 0xf];
1969
0
                int colPOC = colCU->m_slice->m_poc;
1970
1971
0
                int curRefPOC = m_slice->m_refPOCList[picList][refIdx];
1972
0
                int curPOC = m_slice->m_poc;
1973
1974
0
                pmv[numMvc++] = amvpCand[num++] = scaleMvByPOCDist(neighbours[MD_COLLOCATED].mv[picList], curPOC, curRefPOC, colPOC, colRefPOC);
1975
0
            }
1976
0
        }
1977
0
    }
1978
1979
0
    while (num < AMVP_NUM_CANDS)
1980
0
        amvpCand[num++].set(0, 0);
1981
1982
0
    return numMvc;
1983
0
}
1984
1985
/* Constructs a list of candidates for AMVP, and a larger list of motion candidates */
1986
void CUData::getNeighbourMV(uint32_t puIdx, uint32_t absPartIdx, InterNeighbourMV* neighbours) const
1987
0
{
1988
    // Set the temporal neighbour to unavailable by default.
1989
0
    neighbours[MD_COLLOCATED].unifiedRef = -1;
1990
1991
0
    uint32_t partIdxLT, partIdxRT, partIdxLB = deriveLeftBottomIdx(puIdx);
1992
0
    deriveLeftRightTopIdx(puIdx, partIdxLT, partIdxRT);
1993
1994
    // Load the spatial MVs.
1995
0
    getInterNeighbourMV(neighbours + MD_BELOW_LEFT, partIdxLB, MD_BELOW_LEFT);
1996
0
    getInterNeighbourMV(neighbours + MD_LEFT,       partIdxLB, MD_LEFT);
1997
0
    getInterNeighbourMV(neighbours + MD_ABOVE_RIGHT,partIdxRT, MD_ABOVE_RIGHT);
1998
0
    getInterNeighbourMV(neighbours + MD_ABOVE,      partIdxRT, MD_ABOVE);
1999
0
    getInterNeighbourMV(neighbours + MD_ABOVE_LEFT, partIdxLT, MD_ABOVE_LEFT);
2000
2001
0
    if (m_slice->m_bTemporalMvp && !(m_slice->m_param->bEnableSCC || m_slice->m_param->numViews > 1))
2002
0
    {
2003
0
        uint32_t absPartAddr = m_absIdxInCTU + absPartIdx;
2004
0
        uint32_t partIdxRB = deriveRightBottomIdx(puIdx);
2005
2006
        // co-located RightBottom temporal predictor (H)
2007
0
        int ctuIdx = -1;
2008
2009
        // image boundary check
2010
0
        if (m_encData->getPicCTU(m_cuAddr)->m_cuPelX + g_zscanToPelX[partIdxRB] + UNIT_SIZE < m_slice->m_sps->picWidthInLumaSamples &&
2011
0
            m_encData->getPicCTU(m_cuAddr)->m_cuPelY + g_zscanToPelY[partIdxRB] + UNIT_SIZE < m_slice->m_sps->picHeightInLumaSamples)
2012
0
        {
2013
0
            uint32_t absPartIdxRB = g_zscanToRaster[partIdxRB];
2014
0
            uint32_t numUnits = s_numPartInCUSize;
2015
0
            bool bNotLastCol = lessThanCol(absPartIdxRB, numUnits - 1); // is not at the last column of CTU
2016
0
            bool bNotLastRow = lessThanRow(absPartIdxRB, numUnits - 1); // is not at the last row    of CTU
2017
2018
0
            if (bNotLastCol && bNotLastRow)
2019
0
            {
2020
0
                absPartAddr = g_rasterToZscan[absPartIdxRB + RASTER_SIZE + 1];
2021
0
                ctuIdx = m_cuAddr;
2022
0
            }
2023
0
            else if (bNotLastCol)
2024
0
                absPartAddr = g_rasterToZscan[(absPartIdxRB + 1) & (numUnits - 1)];
2025
0
            else if (bNotLastRow)
2026
0
            {
2027
0
                absPartAddr = g_rasterToZscan[absPartIdxRB + RASTER_SIZE - numUnits + 1];
2028
0
                ctuIdx = m_cuAddr + 1;
2029
0
            }
2030
0
            else // is the right bottom corner of CTU
2031
0
                absPartAddr = 0;
2032
0
        }
2033
2034
0
        if (!(ctuIdx >= 0 && getCollocatedMV(ctuIdx, absPartAddr, neighbours + MD_COLLOCATED)))
2035
0
        {
2036
0
            uint32_t partIdxCenter =  deriveCenterIdx(puIdx);
2037
0
            uint32_t curCTUIdx = m_cuAddr;
2038
0
            getCollocatedMV(curCTUIdx, partIdxCenter, neighbours + MD_COLLOCATED);
2039
0
        }
2040
0
    }
2041
0
}
2042
2043
void CUData::getInterNeighbourMV(InterNeighbourMV *neighbour, uint32_t partUnitIdx, MVP_DIR dir) const
2044
0
{
2045
0
    const CUData* tmpCU = NULL;
2046
0
    uint32_t idx = 0;
2047
2048
0
    switch (dir)
2049
0
    {
2050
0
    case MD_LEFT:
2051
0
        tmpCU = getPULeft(idx, partUnitIdx);
2052
0
        break;
2053
0
    case MD_ABOVE:
2054
0
        tmpCU = getPUAbove(idx, partUnitIdx);
2055
0
        break;
2056
0
    case MD_ABOVE_RIGHT:
2057
0
        tmpCU = getPUAboveRight(idx, partUnitIdx);
2058
0
        break;
2059
0
    case MD_BELOW_LEFT:
2060
0
        tmpCU = getPUBelowLeft(idx, partUnitIdx);
2061
0
        break;
2062
0
    case MD_ABOVE_LEFT:
2063
0
        tmpCU = getPUAboveLeft(idx, partUnitIdx);
2064
0
        break;
2065
0
    default:
2066
0
        break;
2067
0
    }
2068
2069
0
    if (!tmpCU)
2070
0
    {
2071
        // Mark the PMV as unavailable.
2072
0
        for (int i = 0; i < 2; i++)
2073
0
            neighbour->refIdx[i] = -1;
2074
0
        neighbour->isAvailable = (tmpCU != NULL) && (tmpCU->isInter(idx));
2075
0
        return;
2076
0
    }
2077
2078
0
    for (int i = 0; i < 2; i++)
2079
0
    {
2080
        // Get the MV.
2081
0
        neighbour->mv[i] = tmpCU->m_mv[i][idx];
2082
2083
        // Get the reference idx.
2084
0
        neighbour->refIdx[i] = tmpCU->m_refIdx[i][idx];
2085
0
        neighbour->isAvailable = (tmpCU != NULL) && (tmpCU->isInter(idx));
2086
0
    }
2087
0
}
2088
2089
/* Clip motion vector to within slightly padded boundary of picture (the
2090
 * MV may reference a block that is completely within the padded area).
2091
 * Note this function is unaware of how much of this picture is actually
2092
 * available for use (re: frame parallelism) */
2093
void CUData::clipMv(MV& outMV) const
2094
0
{
2095
0
    const uint32_t mvshift = 2;
2096
0
    uint32_t offset = 8;
2097
2098
0
    int32_t xmax = (int32_t)((m_slice->m_sps->picWidthInLumaSamples + offset - m_cuPelX - 1) << mvshift);
2099
0
    int32_t xmin = -(int32_t)((m_encData->m_param->maxCUSize + offset + m_cuPelX - 1) << mvshift);
2100
2101
0
    int32_t ymax = (int32_t)((m_slice->m_sps->picHeightInLumaSamples + offset - m_cuPelY - 1) << mvshift);
2102
0
    int32_t ymin = -(int32_t)((m_encData->m_param->maxCUSize + offset + m_cuPelY - 1) << mvshift);
2103
2104
0
    outMV.x = X265_MIN(xmax, X265_MAX(xmin, outMV.x));
2105
0
    outMV.y = X265_MIN(ymax, X265_MAX(ymin, outMV.y));
2106
0
}
2107
2108
// Load direct spatial MV if available.
2109
bool CUData::getDirectPMV(MV& pmv, InterNeighbourMV *neighbours, uint32_t picList, uint32_t refIdx) const
2110
0
{
2111
0
    int curRefPOC = m_slice->m_refPOCList[picList][refIdx];
2112
0
    for (int i = 0; i < 2; i++, picList = !picList)
2113
0
    {
2114
0
        int partRefIdx = neighbours->refIdx[picList];
2115
0
        if (partRefIdx >= 0 && curRefPOC == m_slice->m_refPOCList[picList][partRefIdx])
2116
0
        {
2117
0
            pmv = neighbours->mv[picList];
2118
0
            return true;
2119
0
        }
2120
0
    }
2121
0
    return false;
2122
0
}
2123
2124
// Load indirect spatial MV if available. An indirect MV has to be scaled.
2125
bool CUData::getIndirectPMV(MV& outMV, InterNeighbourMV *neighbours, uint32_t picList, uint32_t refIdx) const
2126
0
{
2127
0
    int curPOC = m_slice->m_poc;
2128
0
    int neibPOC = curPOC;
2129
0
    int curRefPOC = m_slice->m_refPOCList[picList][refIdx];
2130
2131
0
    for (int i = 0; i < 2; i++, picList = !picList)
2132
0
    {
2133
0
        int partRefIdx = neighbours->refIdx[picList];
2134
0
        if (partRefIdx >= 0)
2135
0
        {
2136
0
            int neibRefPOC = m_slice->m_refPOCList[picList][partRefIdx];
2137
0
            MV mvp = neighbours->mv[picList];
2138
2139
#if ENABLE_MULTIVIEW || ENABLE_SCC_EXT
2140
            if ((curRefPOC == curPOC) == (neibRefPOC == curPOC))
2141
            {
2142
                if (curRefPOC == curPOC)
2143
                    outMV = mvp;
2144
                if (!(curRefPOC == curPOC))
2145
                    outMV = scaleMvByPOCDist(mvp, curPOC, curRefPOC, neibPOC, neibRefPOC);
2146
                return true;
2147
            }
2148
#else
2149
0
            outMV = scaleMvByPOCDist(mvp, curPOC, curRefPOC, neibPOC, neibRefPOC);
2150
0
            return true;
2151
0
#endif
2152
0
        }
2153
0
    }
2154
0
    return false;
2155
0
}
2156
2157
bool CUData::getColMVP(MV& outMV, int& outRefIdx, int picList, int cuAddr, int partUnitIdx) const
2158
0
{
2159
0
    const Frame* colPic = m_slice->m_refFrameList[m_slice->isInterB() && !m_slice->m_colFromL0Flag][m_slice->m_colRefIdx];
2160
0
    const CUData* colCU = colPic->m_encData->getPicCTU(cuAddr);
2161
2162
0
    uint32_t absPartAddr = partUnitIdx & TMVP_UNIT_MASK;
2163
0
    if (colCU->m_predMode[partUnitIdx] == MODE_NONE || colCU->isIntra(absPartAddr))
2164
0
        return false;
2165
2166
0
    int colRefPicList = m_slice->m_bCheckLDC ? picList : m_slice->m_colFromL0Flag;
2167
2168
0
    int colRefIdx = colCU->m_refIdx[colRefPicList][absPartAddr];
2169
2170
0
    if (colRefIdx < 0)
2171
0
    {
2172
0
        colRefPicList = !colRefPicList;
2173
0
        colRefIdx = colCU->m_refIdx[colRefPicList][absPartAddr];
2174
2175
0
        if (colRefIdx < 0)
2176
0
            return false;
2177
0
    }
2178
2179
    // Scale the vector
2180
0
    int colRefPOC = colCU->m_slice->m_refPOCList[colRefPicList][colRefIdx];
2181
0
    int colPOC = colCU->m_slice->m_poc;
2182
0
    MV colmv = colCU->m_mv[colRefPicList][absPartAddr];
2183
2184
0
    int curRefPOC = m_slice->m_refPOCList[picList][outRefIdx];
2185
0
    int curPOC = m_slice->m_poc;
2186
2187
#if ENABLE_MULTIVIEW || ENABLE_SCC_EXT
2188
    if ((colPOC == colRefPOC) != (curPOC == curRefPOC))
2189
        return false;
2190
    else if (curRefPOC == curPOC)
2191
        outMV = colmv;
2192
    else if (!(curRefPOC == curPOC))
2193
        outMV = scaleMvByPOCDist(colmv, curPOC, curRefPOC, colPOC, colRefPOC);
2194
#else
2195
0
    outMV = scaleMvByPOCDist(colmv, curPOC, curRefPOC, colPOC, colRefPOC);
2196
0
#endif
2197
0
    return true;
2198
0
}
2199
2200
// Cache the collocated MV.
2201
bool CUData::getCollocatedMV(int cuAddr, int partUnitIdx, InterNeighbourMV *neighbour) const
2202
0
{
2203
0
    const Frame* colPic = m_slice->m_refFrameList[m_slice->isInterB() && !m_slice->m_colFromL0Flag][m_slice->m_colRefIdx];
2204
0
    const CUData* colCU = colPic->m_encData->getPicCTU(cuAddr);
2205
2206
0
    uint32_t absPartAddr = partUnitIdx & TMVP_UNIT_MASK;
2207
0
    if (colCU->m_predMode[partUnitIdx] == MODE_NONE || colCU->isIntra(absPartAddr))
2208
0
        return false;
2209
2210
0
    for (int list = 0; list < 2; list++)
2211
0
    {
2212
0
        neighbour->cuAddr[list] = cuAddr;
2213
0
        int colRefPicList = m_slice->m_bCheckLDC ? list : m_slice->m_colFromL0Flag;
2214
0
        int colRefIdx = colCU->m_refIdx[colRefPicList][absPartAddr];
2215
2216
0
        if (colRefIdx < 0)
2217
0
            colRefPicList = !colRefPicList;
2218
2219
0
        neighbour->refIdx[list] = colCU->m_refIdx[colRefPicList][absPartAddr];
2220
0
        neighbour->refIdx[list] |= colRefPicList << 4;
2221
2222
0
        neighbour->mv[list] = colCU->m_mv[colRefPicList][absPartAddr];
2223
0
    }
2224
2225
0
    return neighbour->unifiedRef != -1;
2226
0
}
2227
2228
MV CUData::scaleMvByPOCDist(const MV& inMV, int curPOC, int curRefPOC, int colPOC, int colRefPOC) const
2229
0
{
2230
0
    int diffPocD = colPOC - colRefPOC;
2231
0
    int diffPocB = curPOC - curRefPOC;
2232
2233
0
    if (diffPocD == diffPocB)
2234
0
        return inMV;
2235
0
    else
2236
0
    {
2237
0
        int tdb   = x265_clip3(-128, 127, diffPocB);
2238
0
        int tdd   = x265_clip3(-128, 127, diffPocD);
2239
0
        int x     = (0x4000 + abs(tdd / 2)) / tdd;
2240
0
        int scale = x265_clip3(-4096, 4095, (tdb * x + 32) >> 6);
2241
0
        return scaleMv(inMV, scale);
2242
0
    }
2243
0
}
2244
2245
uint32_t CUData::deriveCenterIdx(uint32_t puIdx) const
2246
0
{
2247
0
    uint32_t absPartIdx;
2248
0
    int puWidth, puHeight;
2249
2250
0
    getPartIndexAndSize(puIdx, absPartIdx, puWidth, puHeight);
2251
2252
0
    return g_rasterToZscan[g_zscanToRaster[m_absIdxInCTU + absPartIdx]
2253
0
                           + ((puHeight >> (LOG2_UNIT_SIZE + 1)) << LOG2_RASTER_SIZE)
2254
0
                           + (puWidth  >> (LOG2_UNIT_SIZE + 1))];
2255
0
}
2256
2257
void CUData::getTUEntropyCodingParameters(TUEntropyCodingParameters &result, uint32_t absPartIdx, uint32_t log2TrSize, bool bIsLuma) const
2258
112k
{
2259
112k
    bool bIsIntra = isIntra(absPartIdx);
2260
2261
    // set the group layout
2262
112k
    const uint32_t log2TrSizeCG = log2TrSize - 2;
2263
2264
    // set the scan orders
2265
112k
    if (bIsIntra)
2266
112k
    {
2267
112k
        uint32_t dirMode;
2268
2269
112k
        if (bIsLuma)
2270
47.8k
            dirMode = m_lumaIntraDir[absPartIdx];
2271
64.8k
        else
2272
64.8k
        {
2273
64.8k
            dirMode = m_chromaIntraDir[absPartIdx];
2274
64.8k
            if (dirMode == DM_CHROMA_IDX)
2275
16.1k
            {
2276
16.1k
                dirMode = m_lumaIntraDir[(m_chromaFormat == X265_CSP_I444) ? absPartIdx : absPartIdx & 0xFC];
2277
16.1k
                dirMode = (m_chromaFormat == X265_CSP_I422) ? g_chroma422IntraAngleMappingTable[dirMode] : dirMode;
2278
16.1k
            }
2279
64.8k
        }
2280
2281
112k
        if (log2TrSize <= (MDCS_LOG2_MAX_SIZE - m_hChromaShift) || (bIsLuma && log2TrSize == MDCS_LOG2_MAX_SIZE))
2282
55.5k
            result.scanType = dirMode >= 22 && dirMode <= 30 ? SCAN_HOR : dirMode >= 6 && dirMode <= 14 ? SCAN_VER : SCAN_DIAG;
2283
57.2k
        else
2284
57.2k
            result.scanType = SCAN_DIAG;
2285
112k
    }
2286
0
    else
2287
0
        result.scanType = SCAN_DIAG;
2288
2289
112k
    result.scan     = g_scanOrder[result.scanType][log2TrSize - 2];
2290
112k
    result.scanCG   = g_scanOrderCG[result.scanType][log2TrSizeCG];
2291
2292
112k
    if (log2TrSize == 2)
2293
43.7k
        result.firstSignificanceMapContext = 0;
2294
69.0k
    else if (log2TrSize == 3)
2295
37.4k
        result.firstSignificanceMapContext = (result.scanType != SCAN_DIAG && bIsLuma) ? 15 : 9;
2296
31.5k
    else
2297
31.5k
        result.firstSignificanceMapContext = bIsLuma ? 21 : 12;
2298
112k
}
2299
2300
406k
#define CU_SET_FLAG(bitfield, flag, value) (bitfield) = ((bitfield) & (~(flag))) | ((~((value) - 1)) & (flag))
2301
2302
void CUData::calcCTUGeoms(uint32_t ctuWidth, uint32_t ctuHeight, uint32_t maxCUSize, uint32_t minCUSize, CUGeom cuDataArray[CUGeom::MAX_GEOMS])
2303
1.89k
{
2304
1.89k
    uint32_t num4x4Partition = (1U << ((g_log2Size[maxCUSize] - LOG2_UNIT_SIZE) << 1));
2305
2306
    // Initialize the coding blocks inside the CTB
2307
8.91k
    for (uint32_t log2CUSize = g_log2Size[maxCUSize], rangeCUIdx = 0; log2CUSize >= g_log2Size[minCUSize]; log2CUSize--)
2308
7.01k
    {
2309
7.01k
        uint32_t blockSize = 1 << log2CUSize;
2310
7.01k
        uint32_t sbWidth   = 1 << (g_log2Size[maxCUSize] - log2CUSize);
2311
7.01k
        int32_t lastLevelFlag = log2CUSize == g_log2Size[minCUSize];
2312
2313
31.7k
        for (uint32_t sbY = 0; sbY < sbWidth; sbY++)
2314
24.7k
        {
2315
160k
            for (uint32_t sbX = 0; sbX < sbWidth; sbX++)
2316
135k
            {
2317
135k
                uint32_t depthIdx = g_depthScanIdx[sbY][sbX];
2318
135k
                uint32_t cuIdx = rangeCUIdx + depthIdx;
2319
135k
                uint32_t childIdx = rangeCUIdx + sbWidth * sbWidth + (depthIdx << 2);
2320
135k
                uint32_t px = sbX * blockSize;
2321
135k
                uint32_t py = sbY * blockSize;
2322
135k
                int32_t presentFlag = px < ctuWidth && py < ctuHeight;
2323
135k
                int32_t splitMandatoryFlag = presentFlag && !lastLevelFlag && (px + blockSize > ctuWidth || py + blockSize > ctuHeight);
2324
                
2325
                /* Offset of the luma CU in the X, Y direction in terms of pixels from the CTU origin */
2326
135k
                uint32_t xOffset = (sbX * blockSize) >> 3;
2327
135k
                uint32_t yOffset = (sbY * blockSize) >> 3;
2328
135k
                X265_CHECK(cuIdx < CUGeom::MAX_GEOMS, "CU geom index bug\n");
2329
2330
135k
                CUGeom *cu = cuDataArray + cuIdx;
2331
135k
                cu->log2CUSize = log2CUSize;
2332
135k
                cu->childOffset = childIdx - cuIdx;
2333
135k
                cu->absPartIdx = g_depthScanIdx[yOffset][xOffset] * 4;
2334
135k
                cu->numPartitions = (num4x4Partition >> ((g_log2Size[maxCUSize] - cu->log2CUSize) * 2));
2335
135k
                cu->depth = g_log2Size[maxCUSize] - log2CUSize;
2336
135k
                cu->geomRecurId = cuIdx;
2337
2338
135k
                cu->flags = 0;
2339
135k
                CU_SET_FLAG(cu->flags, CUGeom::PRESENT, presentFlag);
2340
135k
                CU_SET_FLAG(cu->flags, CUGeom::SPLIT_MANDATORY | CUGeom::SPLIT, splitMandatoryFlag);
2341
135k
                CU_SET_FLAG(cu->flags, CUGeom::LEAF, lastLevelFlag);
2342
135k
            }
2343
24.7k
        }
2344
7.01k
        rangeCUIdx += sbWidth * sbWidth;
2345
7.01k
    }
2346
1.89k
}
2347
2348
#if ENABLE_SCC_EXT
2349
bool CUData::getDerivedBV(uint32_t absPartIdx, const MV& currentMv, MV& derivedMv, uint32_t width, uint32_t height)
2350
{
2351
    const int   ctuWidth = m_slice->m_param->maxCUSize;
2352
    const int   ctuHeight = m_slice->m_param->maxCUSize;
2353
    int   cuPelX = m_cuPelX + (absPartIdx ? g_zscanToPelX[absPartIdx] : 0);
2354
    int   cuPelY = m_cuPelY + (absPartIdx ? g_zscanToPelX[absPartIdx] : 0);
2355
    int rngX = cuPelX + (currentMv.x >> 2);
2356
    int rngY = cuPelY + (currentMv.y >> 2);
2357
    uint32_t m_frameWidthInCtus = (m_slice->m_sps->picWidthInLumaSamples % ctuWidth) ? m_slice->m_sps->picWidthInLumaSamples / ctuWidth + 1 : m_slice->m_sps->picWidthInLumaSamples / ctuWidth;
2358
2359
    if (rngX < 0 || rngY < 0 || (rngX + width) > m_slice->m_sps->picWidthInLumaSamples || (rngY + height) > m_slice->m_sps->picHeightInLumaSamples)
2360
    {
2361
        return false;
2362
    }
2363
2364
    int refCtbAddr = (rngY / ctuHeight) * m_frameWidthInCtus + (rngX / ctuWidth);
2365
2366
    int      relCUPelX = rngX & (ctuWidth - 1);
2367
    int      relCUPelY = rngY & (ctuHeight - 1);
2368
    uint32_t absPartIdxDerived = g_rasterToZscan[((relCUPelY >> 2) << 4) + (relCUPelX >> 2)];
2369
    CUData* refCU = m_encData->getPicCTU(refCtbAddr);
2370
2371
    if (refCU->m_slice == NULL)
2372
        return false;
2373
2374
    MVField mv1;
2375
    refCU->getMvField(refCU, absPartIdxDerived, 0, mv1);
2376
2377
    int iCurrCtbAddr = (m_cuPelY / ctuHeight) * m_frameWidthInCtus + (m_cuPelX / ctuWidth);
2378
    uint32_t currAbsPartIdx = g_rasterToZscan[(((m_cuPelY & (ctuHeight - 1)) >> 2) << 4) + ((m_cuPelX & (ctuWidth - 1)) >> 2)];
2379
2380
    if ((refCtbAddr > iCurrCtbAddr) || ((refCtbAddr == iCurrCtbAddr) && (absPartIdxDerived >= currAbsPartIdx)))
2381
        return false;
2382
2383
    int refIdx = mv1.refIdx;
2384
    bool isIBC;
2385
    if (refCU->isIntra(absPartIdxDerived))
2386
    {
2387
        isIBC = false;
2388
    }
2389
    else
2390
    {
2391
        isIBC = (refIdx >= 0) ? (refCU->m_slice->m_refFrameList[0][refIdx]->m_poc == refCU->m_slice->m_poc) : 0;
2392
    }
2393
    derivedMv = mv1.mv;
2394
    derivedMv += currentMv;
2395
2396
    return isIBC;
2397
2398
}
2399
2400
bool CUData::isIntraBC(const CUData* cu, uint32_t absPartIdx) const
2401
{
2402
    if (cu->isIntra(absPartIdx))
2403
    {
2404
        return false;
2405
    }
2406
    MVField mv;
2407
    cu->getMvField(cu, absPartIdx, 0, mv);
2408
    int iRefIdx = mv.refIdx;
2409
    bool isNeighborIntraBC = (iRefIdx >= 0) ? (m_slice->m_refFrameList[0][iRefIdx]->m_poc == m_slice->m_poc) : false;
2410
2411
    return isNeighborIntraBC;
2412
}
2413
2414
bool CUData::getColMVPIBC(int ctuRsAddr, int partUnitIdx, MV& rcMv)
2415
{
2416
    uint32_t absPartAddr = partUnitIdx;
2417
2418
    // use coldir.
2419
    Frame* colPic = m_slice->m_lastEncPic;
2420
    if (!colPic)
2421
        return false;
2422
2423
    CUData* colCU = m_encData->getPicCTU(ctuRsAddr);
2424
    MVField tempMv;
2425
    colCU->getMvField(colCU, absPartAddr, 0, tempMv);
2426
    if (tempMv.refIdx == REF_NOT_VALID)
2427
        return false;
2428
2429
    rcMv = tempMv.mv;
2430
2431
    return true;
2432
}
2433
2434
void CUData::getIntraBCMVPsEncOnly(uint32_t absPartIdx, MV* MvPred, int& nbPred, int puIdx)
2435
{
2436
    uint32_t        tempPartIdx;
2437
    uint32_t        left, above;
2438
    MVField         tempMvField;
2439
2440
    int width, height;
2441
    getPartIndexAndSize(puIdx, absPartIdx, width, height);
2442
    uint32_t            numPartInCUWidth = s_numPartInCUSize;
2443
    uint32_t            m_numPartitionsInCtu = s_numPartInCUSize * s_numPartInCUSize;
2444
    uint32_t            m_frameWidthInCtus = (m_slice->m_sps->picWidthInLumaSamples % m_slice->m_param->maxCUSize) ? m_slice->m_sps->picWidthInLumaSamples / m_slice->m_param->maxCUSize + 1 : m_slice->m_sps->picWidthInLumaSamples / m_slice->m_param->maxCUSize;
2445
2446
    uint32_t            partIdxLT = m_absIdxInCTU;
2447
    uint32_t            partIdxLB = g_rasterToZscan[g_zscanToRaster[m_absIdxInCTU] + (((1 << (m_log2CUSize[0] - LOG2_UNIT_SIZE - 1)) - 1) << LOG2_RASTER_SIZE)];
2448
    uint32_t            partIdxRT = g_rasterToZscan[g_zscanToRaster[partIdxLT] + (1 << (m_log2CUSize[0] - LOG2_UNIT_SIZE)) - 1];
2449
2450
    left = above = 0;
2451
2452
    MvPred[0] = m_lastIntraBCMv[0];
2453
    if (MvPred[0] != MV(0, 0))
2454
    {
2455
        nbPred++;
2456
        if (getDerivedBV(absPartIdx, MvPred[nbPred - 1], MvPred[nbPred], width, height))
2457
            nbPred++;
2458
    }
2459
    MvPred[nbPred] = m_lastIntraBCMv[1];
2460
    if (MvPred[nbPred] != MV(0, 0))
2461
    {
2462
        nbPred++;
2463
        if (getDerivedBV(absPartIdx, MvPred[nbPred - 1], MvPred[nbPred], width, height))
2464
            nbPred++;
2465
    }
2466
2467
    //left
2468
    const CUData* leftCU = getPULeft(tempPartIdx, partIdxLB);
2469
    left = leftCU ? isIntraBC(leftCU, tempPartIdx) : 0;
2470
2471
    if (left)
2472
    {
2473
        leftCU->getMvField(leftCU, tempPartIdx, 0, tempMvField);
2474
        MvPred[nbPred++] = tempMvField.mv;
2475
        if (getDerivedBV(absPartIdx, MvPred[nbPred - 1], MvPred[nbPred], width, height))
2476
        {
2477
            nbPred++;
2478
        }
2479
    }
2480
2481
    //above
2482
    const CUData* aboveCU = getPUAbove(tempPartIdx, partIdxRT);
2483
    above = aboveCU ? isIntraBC(aboveCU, tempPartIdx) : 0;
2484
2485
    if (above)
2486
    {
2487
        aboveCU->getMvField(aboveCU, tempPartIdx, 0, tempMvField);
2488
        MvPred[nbPred++] = tempMvField.mv;
2489
        if (getDerivedBV(absPartIdx, MvPred[nbPred - 1], MvPred[nbPred], width, height))
2490
        {
2491
            nbPred++;
2492
        }
2493
    }
2494
2495
    if (m_slice->isOnlyCurrentPictureAsReference())
2496
    {
2497
        MV mvCol;
2498
        bool isColAvail = false;
2499
        if (m_absIdxInCTU && m_slice->m_lastEncPic && m_slice->m_lastEncPic->m_poc < m_slice->m_poc)
2500
        {
2501
            uint32_t partIdxRB;
2502
            partIdxRB = deriveRightBottomIdx(puIdx);
2503
2504
            uint32_t absPartIdxTmp = g_zscanToRaster[partIdxRB];
2505
            uint32_t absPartAddr = m_absIdxInCTU + absPartIdx;
2506
            int      iLCUIdx = -1;
2507
2508
            if (((m_encData->getPicCTU(m_cuAddr)->m_cuPelX + g_zscanToPelX[g_rasterToZscan[absPartIdxTmp]] + 4) < m_slice->m_sps->picWidthInLumaSamples)  // image boundary check
2509
                && ((m_encData->getPicCTU(m_cuAddr)->m_cuPelY + g_zscanToPelY[g_rasterToZscan[absPartIdxTmp]] + 4) < m_slice->m_sps->picHeightInLumaSamples))
2510
            {
2511
                if ((absPartIdxTmp % numPartInCUWidth < numPartInCUWidth - 1) &&           // is not at the last column of LCU
2512
                    (absPartIdxTmp / numPartInCUWidth < s_numPartInCUSize - 1)) // is not at the last row    of LCU
2513
                {
2514
                    absPartAddr = g_rasterToZscan[absPartIdxTmp + numPartInCUWidth + 1];
2515
                    iLCUIdx = m_cuAddr;
2516
                }
2517
                else if (absPartIdxTmp % numPartInCUWidth < numPartInCUWidth - 1)           // is not at the last column of CTU But is last row of CTU
2518
                {
2519
                    absPartAddr = g_rasterToZscan[(absPartIdxTmp + numPartInCUWidth + 1) % m_numPartitionsInCtu];
2520
                    iLCUIdx = m_cuAddr + m_frameWidthInCtus;
2521
                }
2522
                else if (absPartIdxTmp / numPartInCUWidth < s_numPartInCUSize - 1)          // is not at the last row of CTU But is last column of CTU
2523
                {
2524
                    absPartAddr = g_rasterToZscan[absPartIdxTmp + 1];
2525
                    iLCUIdx = m_cuAddr + 1;
2526
                }
2527
            }
2528
            if (iLCUIdx >= 0)
2529
            {
2530
                isColAvail = getColMVPIBC(iLCUIdx, absPartAddr, mvCol);
2531
2532
                if (!isColAvail)
2533
                {
2534
                    uint32_t uiPartIdxCenter;
2535
                    uiPartIdxCenter = deriveCenterIdx(puIdx);
2536
                    isColAvail = getColMVPIBC(m_cuAddr, uiPartIdxCenter, mvCol);
2537
                }
2538
            }
2539
        }
2540
        if (isColAvail)
2541
        {
2542
            MvPred[nbPred++] = mvCol;
2543
            if (getDerivedBV(absPartIdx, MvPred[nbPred - 1], MvPred[nbPred], width, height))
2544
            {
2545
                nbPred++;
2546
            }
2547
        }
2548
    }
2549
2550
    // Below Left predictor search
2551
    const CUData* tempBelowLeftCU = getPUBelowLeft(tempPartIdx, partIdxLB);
2552
    uint32_t belowLeft = (tempBelowLeftCU) ? tempBelowLeftCU->isIntraBC(tempBelowLeftCU, tempPartIdx) : 0;
2553
    if (belowLeft)
2554
    {
2555
        tempBelowLeftCU->getMvField(tempBelowLeftCU, tempPartIdx, 0, tempMvField);
2556
        MvPred[nbPred++] = tempMvField.mv;
2557
        if (getDerivedBV(absPartIdx, MvPred[nbPred - 1], MvPred[nbPred], width, height))
2558
        {
2559
            nbPred++;
2560
        }
2561
    }
2562
2563
    // Above Right predictor search
2564
    const CUData* tempAboveRightCU = getPUAboveRight(tempPartIdx, partIdxRT);
2565
    uint32_t aboveRight = (tempAboveRightCU) ? tempAboveRightCU->isIntraBC(tempAboveRightCU, tempPartIdx) : 0;
2566
    if (aboveRight)
2567
    {
2568
        tempAboveRightCU->getMvField(tempAboveRightCU, tempPartIdx, 0, tempMvField);
2569
        MvPred[nbPred++] = tempMvField.mv;
2570
        if (getDerivedBV(absPartIdx, MvPred[nbPred - 1], MvPred[nbPred], width, height))
2571
        {
2572
            nbPred++;
2573
        }
2574
    }
2575
2576
    // Above Left predictor search
2577
    const CUData* tempAboveLeftCU = getPUAboveLeft(tempPartIdx, partIdxLT);
2578
    uint32_t aboveLeft = (tempAboveLeftCU) ? tempAboveLeftCU->isIntraBC(tempAboveLeftCU, tempPartIdx) : 0;
2579
    if (aboveLeft)
2580
    {
2581
        tempAboveLeftCU->getMvField(tempAboveLeftCU, tempPartIdx, 0, tempMvField);
2582
        MvPred[nbPred++] = tempMvField.mv;
2583
        if (getDerivedBV(absPartIdx, MvPred[nbPred - 1], MvPred[nbPred], width, height))
2584
        {
2585
            nbPred++;
2586
        }
2587
    }
2588
}
2589
2590
void CUData::roundMergeCandidates(MVField(*pcMvFieldNeighbours)[2], int iCount) const
2591
{
2592
    if (m_slice->m_useIntegerMv)
2593
    {
2594
        for (int i = 0; i < iCount; i++)
2595
        {
2596
            pcMvFieldNeighbours[i][0].mv = (pcMvFieldNeighbours[i][0].mv >> 2) << 2;
2597
            pcMvFieldNeighbours[i][0].refIdx = pcMvFieldNeighbours[i][0].refIdx;
2598
        }
2599
    }
2600
    else
2601
    {
2602
        for (int i = 0; i < iCount; i++)
2603
        {
2604
            int iCurrRefIdx = pcMvFieldNeighbours[i][0].refIdx;
2605
            if (iCurrRefIdx >= 0)
2606
            {
2607
                if (m_slice->m_refFrameList[0][iCurrRefIdx]->m_poc == m_slice->m_poc)
2608
                {
2609
                    pcMvFieldNeighbours[i][0].mv = (pcMvFieldNeighbours[i][0].mv >> 2) << 2;
2610
                    pcMvFieldNeighbours[i][0].refIdx = pcMvFieldNeighbours[i][0].refIdx;
2611
                }
2612
            }
2613
        }
2614
    }
2615
}
2616
2617
bool CUData::is8x8BipredRestriction(MV mvL0, MV mvL1, int iRefIdxL0, int iRefIdxL1) const
2618
{
2619
    if (iRefIdxL0 < -1 || iRefIdxL0 >= MAX_NUM_REF)
2620
    {
2621
        iRefIdxL0 = -1;
2622
    }
2623
    if (iRefIdxL1 < -1 || iRefIdxL1 >= MAX_NUM_REF)
2624
    {
2625
        iRefIdxL1 = -1;
2626
    }
2627
    bool b8x8BiPredRestricted = false;
2628
    int RefPOCL0 = -1;
2629
    int RefPOCL1 = -1;
2630
    if (iRefIdxL0 >= 0 && iRefIdxL1 >= 0)
2631
    {
2632
        RefPOCL0 = m_slice->m_refPOCList[0][iRefIdxL0];
2633
        RefPOCL1 = m_slice->m_refPOCList[1][iRefIdxL1];
2634
        bool mvL0Int = (((mvL0.x & 0x3) == 0) && ((mvL0.y & 0x3) == 0));
2635
        bool mvL1Int = (((mvL1.x & 0x3) == 0) && ((mvL1.y & 0x3) == 0));
2636
        bool IdenticalMV = ((mvL0 == mvL1) && (RefPOCL0 == RefPOCL1));
2637
        b8x8BiPredRestricted = (
2638
            !mvL0Int && !mvL1Int && !IdenticalMV &&
2639
            (m_slice->m_param->bEnableSCC)
2640
            && (m_slice->m_bUseSao || !m_slice->m_pps->bPicDisableDeblockingFilter || 0));
2641
    }
2642
    return b8x8BiPredRestricted;
2643
}
2644
#endif