Coverage Report

Created: 2022-08-24 06:15

/src/x265/source/common/cudata.cpp
Line
Count
Source (jump to first uncovered line)
1
/*****************************************************************************
2
 * Copyright (C) 2013-2020 MulticoreWare, Inc
3
 *
4
 * Authors: Steve Borho <steve@borho.org>
5
 *          Min Chen <chenm003@163.com>
6
 *
7
 * This program is free software; you can redistribute it and/or modify
8
 * it under the terms of the GNU General Public License as published by
9
 * the Free Software Foundation; either version 2 of the License, or
10
 * (at your option) any later version.
11
 *
12
 * This program is distributed in the hope that it will be useful,
13
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15
 * GNU General Public License for more details.
16
 *
17
 * You should have received a copy of the GNU General Public License
18
 * along with this program; if not, write to the Free Software
19
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02111, USA.
20
 *
21
 * This program is also available under a commercial proprietary license.
22
 * For more information, contact us at license @ x265.com.
23
 *****************************************************************************/
24
25
#include "common.h"
26
#include "frame.h"
27
#include "framedata.h"
28
#include "picyuv.h"
29
#include "mv.h"
30
#include "cudata.h"
31
#define MAX_MV 1 << 14
32
33
using namespace X265_NS;
34
35
/* for all bcast* and copy* functions, dst and src are aligned to MIN(size, 32) */
36
37
14.7M
static void bcast1(uint8_t* dst, uint8_t val)  { dst[0] = val; }
38
39
14.8M
static void copy4(uint8_t* dst, uint8_t* src)  { ((uint32_t*)dst)[0] = ((uint32_t*)src)[0]; }
40
16.1M
static void bcast4(uint8_t* dst, uint8_t val)  { ((uint32_t*)dst)[0] = 0x01010101u * val; }
41
42
3.70M
static void copy16(uint8_t* dst, uint8_t* src) { ((uint64_t*)dst)[0] = ((uint64_t*)src)[0]; ((uint64_t*)dst)[1] = ((uint64_t*)src)[1]; }
43
3.39M
static void bcast16(uint8_t* dst, uint8_t val) { uint64_t bval = 0x0101010101010101ULL * val; ((uint64_t*)dst)[0] = bval; ((uint64_t*)dst)[1] = bval; }
44
45
945k
static void copy64(uint8_t* dst, uint8_t* src) { ((uint64_t*)dst)[0] = ((uint64_t*)src)[0]; ((uint64_t*)dst)[1] = ((uint64_t*)src)[1]; 
46
945k
                                                 ((uint64_t*)dst)[2] = ((uint64_t*)src)[2]; ((uint64_t*)dst)[3] = ((uint64_t*)src)[3];
47
945k
                                                 ((uint64_t*)dst)[4] = ((uint64_t*)src)[4]; ((uint64_t*)dst)[5] = ((uint64_t*)src)[5];
48
945k
                                                 ((uint64_t*)dst)[6] = ((uint64_t*)src)[6]; ((uint64_t*)dst)[7] = ((uint64_t*)src)[7]; }
49
754k
static void bcast64(uint8_t* dst, uint8_t val) { uint64_t bval = 0x0101010101010101ULL * val;
50
754k
                                                 ((uint64_t*)dst)[0] = bval; ((uint64_t*)dst)[1] = bval; ((uint64_t*)dst)[2] = bval; ((uint64_t*)dst)[3] = bval;
51
754k
                                                 ((uint64_t*)dst)[4] = bval; ((uint64_t*)dst)[5] = bval; ((uint64_t*)dst)[6] = bval; ((uint64_t*)dst)[7] = bval; }
52
53
/* at 256 bytes, memset/memcpy will probably use SIMD more effectively than our uint64_t hack,
54
 * but hand-written assembly would beat it. */
55
136k
static void copy256(uint8_t* dst, uint8_t* src) { memcpy(dst, src, 256); }
56
99.1k
static void bcast256(uint8_t* dst, uint8_t val) { memset(dst, val, 256); }
57
58
namespace {
59
// file private namespace
60
61
/* Check whether 2 addresses point to the same column */
62
inline bool isEqualCol(int addrA, int addrB)
63
42.1M
{
64
42.1M
    return ((addrA ^ addrB) & (RASTER_SIZE - 1)) == 0;
65
42.1M
}
66
67
/* Check whether 2 addresses point to the same row */
68
inline bool isEqualRow(int addrA, int addrB)
69
42.0M
{
70
42.0M
    return ((addrA ^ addrB) < RASTER_SIZE);
71
42.0M
}
72
73
/* Check whether 2 addresses point to the same row or column */
74
inline bool isEqualRowOrCol(int addrA, int addrB)
75
20.5M
{
76
20.5M
    return isEqualCol(addrA, addrB) | isEqualRow(addrA, addrB);
77
20.5M
}
78
79
/* Check whether one address points to the first column */
80
inline bool isZeroCol(int addr)
81
52.4M
{
82
52.4M
    return (addr & (RASTER_SIZE - 1)) == 0;
83
52.4M
}
84
85
/* Check whether one address points to the first row */
86
inline bool isZeroRow(int addr)
87
54.2M
{
88
54.2M
    return (addr < RASTER_SIZE);
89
54.2M
}
90
91
/* Check whether one address points to a column whose index is smaller than a given value */
92
inline bool lessThanCol(int addr, int val)
93
17.8M
{
94
17.8M
    return (addr & (RASTER_SIZE - 1)) < val;
95
17.8M
}
96
97
/* Check whether one address points to a row whose index is smaller than a given value */
98
inline bool lessThanRow(int addr, int val)
99
17.7M
{
100
    // addr / numUnits < val
101
17.7M
    return (addr >> LOG2_RASTER_SIZE) < val;
102
17.7M
}
103
104
inline MV scaleMv(MV mv, int scale)
105
0
{
106
0
    int mvx = x265_clip3(-32768, 32767, (scale * mv.x + 127 + (scale * mv.x < 0)) >> 8);
107
0
    int mvy = x265_clip3(-32768, 32767, (scale * mv.y + 127 + (scale * mv.y < 0)) >> 8);
108
109
0
    return MV((int32_t)mvx, (int32_t)mvy);
110
0
}
111
112
}
113
114
CUData::CUData()
115
1.28M
{
116
1.28M
    memset(this, 0, sizeof(*this));
117
1.28M
}
118
119
void CUData::initialize(const CUDataMemPool& dataPool, uint32_t depth, const x265_param& param, int instance)
120
1.11M
{
121
1.11M
    int csp = param.internalCsp;
122
1.11M
    m_chromaFormat  = csp;
123
1.11M
    m_hChromaShift  = CHROMA_H_SHIFT(csp);
124
1.11M
    m_vChromaShift  = CHROMA_V_SHIFT(csp);
125
1.11M
    m_numPartitions = param.num4x4Partitions >> (depth * 2);
126
127
1.11M
    if (!s_partSet[0])
128
1.11M
    {
129
1.11M
        s_numPartInCUSize = 1 << param.unitSizeDepth;
130
1.11M
        switch (param.maxLog2CUSize)
131
1.11M
        {
132
884k
        case 6:
133
884k
            s_partSet[0] = bcast256;
134
884k
            s_partSet[1] = bcast64;
135
884k
            s_partSet[2] = bcast16;
136
884k
            s_partSet[3] = bcast4;
137
884k
            s_partSet[4] = bcast1;
138
884k
            break;
139
92.9k
        case 5:
140
92.9k
            s_partSet[0] = bcast64;
141
92.9k
            s_partSet[1] = bcast16;
142
92.9k
            s_partSet[2] = bcast4;
143
92.9k
            s_partSet[3] = bcast1;
144
92.9k
            s_partSet[4] = NULL;
145
92.9k
            break;
146
140k
        case 4:
147
140k
            s_partSet[0] = bcast16;
148
140k
            s_partSet[1] = bcast4;
149
140k
            s_partSet[2] = bcast1;
150
140k
            s_partSet[3] = NULL;
151
140k
            s_partSet[4] = NULL;
152
140k
            break;
153
0
        default:
154
0
            X265_CHECK(0, "unexpected CTU size\n");
155
0
            break;
156
1.11M
        }
157
1.11M
    }
158
159
1.11M
    switch (m_numPartitions)
160
1.11M
    {
161
225k
    case 256: // 64x64 CU
162
225k
        m_partCopy = copy256;
163
225k
        m_partSet = bcast256;
164
225k
        m_subPartCopy = copy64;
165
225k
        m_subPartSet = bcast64;
166
225k
        break;
167
251k
    case 64:  // 32x32 CU
168
251k
        m_partCopy = copy64;
169
251k
        m_partSet = bcast64;
170
251k
        m_subPartCopy = copy16;
171
251k
        m_subPartSet = bcast16;
172
251k
        break;
173
324k
    case 16:  // 16x16 CU
174
324k
        m_partCopy = copy16;
175
324k
        m_partSet = bcast16;
176
324k
        m_subPartCopy = copy4;
177
324k
        m_subPartSet = bcast4;
178
324k
        break;
179
317k
    case 4:   // 8x8 CU
180
317k
        m_partCopy = copy4;
181
317k
        m_partSet = bcast4;
182
317k
        m_subPartCopy = NULL;
183
317k
        m_subPartSet = NULL;
184
317k
        break;
185
0
    default:
186
0
        X265_CHECK(0, "unexpected CU partition count\n");
187
0
        break;
188
1.11M
    }
189
190
1.11M
    if (csp == X265_CSP_I400)
191
0
    {
192
        /* Each CU's data is layed out sequentially within the charMemBlock */
193
0
        uint8_t *charBuf = dataPool.charMemBlock + (m_numPartitions * (BytesPerPartition - 4)) * instance;
194
195
0
        m_qp        = (int8_t*)charBuf; charBuf += m_numPartitions;
196
0
        m_qpAnalysis = (int8_t*)charBuf; charBuf += m_numPartitions;
197
0
        m_log2CUSize         = charBuf; charBuf += m_numPartitions;
198
0
        m_lumaIntraDir       = charBuf; charBuf += m_numPartitions;
199
0
        m_tqBypass           = charBuf; charBuf += m_numPartitions;
200
0
        m_refIdx[0] = (int8_t*)charBuf; charBuf += m_numPartitions;
201
0
        m_refIdx[1] = (int8_t*)charBuf; charBuf += m_numPartitions;
202
0
        m_cuDepth            = charBuf; charBuf += m_numPartitions;
203
0
        m_predMode           = charBuf; charBuf += m_numPartitions; /* the order up to here is important in initCTU() and initSubCU() */
204
0
        m_partSize           = charBuf; charBuf += m_numPartitions;
205
0
        m_skipFlag[0]        = charBuf; charBuf += m_numPartitions;
206
0
        m_skipFlag[1]        = charBuf; charBuf += m_numPartitions;
207
0
        m_mergeFlag          = charBuf; charBuf += m_numPartitions;
208
0
        m_interDir           = charBuf; charBuf += m_numPartitions;
209
0
        m_mvpIdx[0]          = charBuf; charBuf += m_numPartitions;
210
0
        m_mvpIdx[1]          = charBuf; charBuf += m_numPartitions;
211
0
        m_tuDepth            = charBuf; charBuf += m_numPartitions;
212
0
        m_transformSkip[0]   = charBuf; charBuf += m_numPartitions;
213
0
        m_cbf[0]             = charBuf; charBuf += m_numPartitions;
214
0
        m_chromaIntraDir     = charBuf; charBuf += m_numPartitions;
215
216
0
        X265_CHECK(charBuf == dataPool.charMemBlock + (m_numPartitions * (BytesPerPartition - 4)) * (instance + 1), "CU data layout is broken\n"); //BytesPerPartition
217
218
0
        m_mv[0]  = dataPool.mvMemBlock + (instance * 4) * m_numPartitions;
219
0
        m_mv[1]  = m_mv[0] +  m_numPartitions;
220
0
        m_mvd[0] = m_mv[1] +  m_numPartitions;
221
0
        m_mvd[1] = m_mvd[0] + m_numPartitions;
222
223
0
        m_distortion = dataPool.distortionMemBlock + instance * m_numPartitions;
224
225
0
        uint32_t cuSize = param.maxCUSize >> depth;
226
0
        m_trCoeff[0] = dataPool.trCoeffMemBlock + instance * (cuSize * cuSize);
227
0
        m_trCoeff[1] = m_trCoeff[2] = 0;
228
0
        m_transformSkip[1] = m_transformSkip[2] = m_cbf[1] = m_cbf[2] = 0;
229
0
        m_fAc_den[0] = m_fDc_den[0] = 0;
230
0
    }
231
1.11M
    else
232
1.11M
    {
233
        /* Each CU's data is layed out sequentially within the charMemBlock */
234
1.11M
        uint8_t *charBuf = dataPool.charMemBlock + (m_numPartitions * BytesPerPartition) * instance;
235
236
1.11M
        m_qp        = (int8_t*)charBuf; charBuf += m_numPartitions;
237
1.11M
        m_qpAnalysis = (int8_t*)charBuf; charBuf += m_numPartitions;
238
1.11M
        m_log2CUSize         = charBuf; charBuf += m_numPartitions;
239
1.11M
        m_lumaIntraDir       = charBuf; charBuf += m_numPartitions;
240
1.11M
        m_tqBypass           = charBuf; charBuf += m_numPartitions;
241
1.11M
        m_refIdx[0] = (int8_t*)charBuf; charBuf += m_numPartitions;
242
1.11M
        m_refIdx[1] = (int8_t*)charBuf; charBuf += m_numPartitions;
243
1.11M
        m_cuDepth            = charBuf; charBuf += m_numPartitions;
244
1.11M
        m_predMode           = charBuf; charBuf += m_numPartitions; /* the order up to here is important in initCTU() and initSubCU() */
245
1.11M
        m_partSize           = charBuf; charBuf += m_numPartitions;
246
1.11M
        m_skipFlag[0]        = charBuf; charBuf += m_numPartitions;
247
1.11M
        m_skipFlag[1]        = charBuf; charBuf += m_numPartitions;
248
1.11M
        m_mergeFlag          = charBuf; charBuf += m_numPartitions;
249
1.11M
        m_interDir           = charBuf; charBuf += m_numPartitions;
250
1.11M
        m_mvpIdx[0]          = charBuf; charBuf += m_numPartitions;
251
1.11M
        m_mvpIdx[1]          = charBuf; charBuf += m_numPartitions;
252
1.11M
        m_tuDepth            = charBuf; charBuf += m_numPartitions;
253
1.11M
        m_transformSkip[0]   = charBuf; charBuf += m_numPartitions;
254
1.11M
        m_transformSkip[1]   = charBuf; charBuf += m_numPartitions;
255
1.11M
        m_transformSkip[2]   = charBuf; charBuf += m_numPartitions;
256
1.11M
        m_cbf[0]             = charBuf; charBuf += m_numPartitions;
257
1.11M
        m_cbf[1]             = charBuf; charBuf += m_numPartitions;
258
1.11M
        m_cbf[2]             = charBuf; charBuf += m_numPartitions;
259
1.11M
        m_chromaIntraDir     = charBuf; charBuf += m_numPartitions;
260
261
1.11M
        X265_CHECK(charBuf == dataPool.charMemBlock + (m_numPartitions * BytesPerPartition) * (instance + 1), "CU data layout is broken\n");
262
263
1.11M
        m_mv[0]  = dataPool.mvMemBlock + (instance * 4) * m_numPartitions;
264
1.11M
        m_mv[1]  = m_mv[0] +  m_numPartitions;
265
1.11M
        m_mvd[0] = m_mv[1] +  m_numPartitions;
266
1.11M
        m_mvd[1] = m_mvd[0] + m_numPartitions;
267
268
1.11M
        m_distortion = dataPool.distortionMemBlock + instance * m_numPartitions;
269
270
1.11M
        uint32_t cuSize = param.maxCUSize >> depth;
271
1.11M
        uint32_t sizeL = cuSize * cuSize;
272
1.11M
        uint32_t sizeC = sizeL >> (m_hChromaShift + m_vChromaShift); // block chroma part
273
1.11M
        m_trCoeff[0] = dataPool.trCoeffMemBlock + instance * (sizeL + sizeC * 2);
274
1.11M
        m_trCoeff[1] = m_trCoeff[0] + sizeL;
275
1.11M
        m_trCoeff[2] = m_trCoeff[0] + sizeL + sizeC;
276
4.47M
        for (int i = 0; i < 3; i++)
277
3.35M
            m_fAc_den[i] = m_fDc_den[i] = 0;
278
1.11M
    }
279
1.11M
}
280
281
void CUData::initCTU(const Frame& frame, uint32_t cuAddr, int qp, uint32_t firstRowInSlice, uint32_t lastRowInSlice, uint32_t lastCuInSlice)
282
13.9k
{
283
13.9k
    m_encData       = frame.m_encData;
284
13.9k
    m_slice         = m_encData->m_slice;
285
13.9k
    m_cuAddr        = cuAddr;
286
13.9k
    m_cuPelX        = (cuAddr % m_slice->m_sps->numCuInWidth) << m_slice->m_param->maxLog2CUSize;
287
13.9k
    m_cuPelY        = (cuAddr / m_slice->m_sps->numCuInWidth) << m_slice->m_param->maxLog2CUSize;
288
13.9k
    m_absIdxInCTU   = 0;
289
13.9k
    m_numPartitions = m_encData->m_param->num4x4Partitions;
290
13.9k
    m_bFirstRowInSlice = (uint8_t)firstRowInSlice;
291
13.9k
    m_bLastRowInSlice  = (uint8_t)lastRowInSlice;
292
13.9k
    m_bLastCuInSlice   = (uint8_t)lastCuInSlice;
293
294
    /* sequential memsets */
295
13.9k
    m_partSet((uint8_t*)m_qp, (uint8_t)qp);
296
13.9k
    m_partSet((uint8_t*)m_qpAnalysis, (uint8_t)qp);
297
13.9k
    m_partSet(m_log2CUSize,   (uint8_t)m_slice->m_param->maxLog2CUSize);
298
13.9k
    m_partSet(m_lumaIntraDir, (uint8_t)ALL_IDX);
299
13.9k
    m_partSet(m_chromaIntraDir, (uint8_t)ALL_IDX);
300
13.9k
    m_partSet(m_tqBypass,     (uint8_t)frame.m_encData->m_param->bLossless);
301
13.9k
    if (m_slice->m_sliceType != I_SLICE)
302
0
    {
303
0
        m_partSet((uint8_t*)m_refIdx[0], (uint8_t)REF_NOT_VALID);
304
0
        m_partSet((uint8_t*)m_refIdx[1], (uint8_t)REF_NOT_VALID);
305
0
    }
306
307
13.9k
    X265_CHECK(!(frame.m_encData->m_param->bLossless && !m_slice->m_pps->bTransquantBypassEnabled), "lossless enabled without TQbypass in PPS\n");
308
309
    /* initialize the remaining CU data in one memset */
310
13.9k
    memset(m_cuDepth, 0, (frame.m_param->internalCsp == X265_CSP_I400 ? BytesPerPartition - 12 : BytesPerPartition - 8) * m_numPartitions);
311
312
307k
    for (int8_t i = 0; i < NUM_TU_DEPTH; i++)
313
293k
        m_refTuDepth[i] = -1;
314
315
13.9k
    m_vbvAffected = false;
316
317
13.9k
    uint32_t widthInCU = m_slice->m_sps->numCuInWidth;
318
13.9k
    m_cuLeft = (m_cuAddr % widthInCU) ? m_encData->getPicCTU(m_cuAddr - 1) : NULL;
319
13.9k
    m_cuAbove = (m_cuAddr >= widthInCU) && !m_bFirstRowInSlice ? m_encData->getPicCTU(m_cuAddr - widthInCU) : NULL;
320
13.9k
    m_cuAboveLeft = (m_cuLeft && m_cuAbove) ? m_encData->getPicCTU(m_cuAddr - widthInCU - 1) : NULL;
321
13.9k
    m_cuAboveRight = (m_cuAbove && ((m_cuAddr % widthInCU) < (widthInCU - 1))) ? m_encData->getPicCTU(m_cuAddr - widthInCU + 1) : NULL;
322
13.9k
    memset(m_distortion, 0, m_numPartitions * sizeof(sse_t));
323
13.9k
}
324
325
// initialize Sub partition
326
void CUData::initSubCU(const CUData& ctu, const CUGeom& cuGeom, int qp)
327
890k
{
328
890k
    m_absIdxInCTU   = cuGeom.absPartIdx;
329
890k
    m_encData       = ctu.m_encData;
330
890k
    m_slice         = ctu.m_slice;
331
890k
    m_cuAddr        = ctu.m_cuAddr;
332
890k
    m_cuPelX        = ctu.m_cuPelX + g_zscanToPelX[cuGeom.absPartIdx];
333
890k
    m_cuPelY        = ctu.m_cuPelY + g_zscanToPelY[cuGeom.absPartIdx];
334
890k
    m_cuLeft        = ctu.m_cuLeft;
335
890k
    m_cuAbove       = ctu.m_cuAbove;
336
890k
    m_cuAboveLeft   = ctu.m_cuAboveLeft;
337
890k
    m_cuAboveRight  = ctu.m_cuAboveRight;
338
890k
    m_bFirstRowInSlice = ctu.m_bFirstRowInSlice;
339
890k
    m_bLastRowInSlice = ctu.m_bLastRowInSlice;
340
890k
    m_bLastCuInSlice = ctu.m_bLastCuInSlice;
341
3.56M
    for (int i = 0; i < 3; i++)
342
2.67M
    {
343
2.67M
        m_fAc_den[i] = ctu.m_fAc_den[i];
344
2.67M
        m_fDc_den[i] = ctu.m_fDc_den[i];
345
2.67M
    }
346
347
890k
    X265_CHECK(m_numPartitions == cuGeom.numPartitions, "initSubCU() size mismatch\n");
348
349
890k
    m_partSet((uint8_t*)m_qp, (uint8_t)qp);
350
890k
    m_partSet((uint8_t*)m_qpAnalysis, (uint8_t)qp);
351
352
890k
    m_partSet(m_log2CUSize,   (uint8_t)cuGeom.log2CUSize);
353
890k
    m_partSet(m_lumaIntraDir, (uint8_t)ALL_IDX);
354
890k
    m_partSet(m_chromaIntraDir, (uint8_t)ALL_IDX);
355
890k
    m_partSet(m_tqBypass,     (uint8_t)m_encData->m_param->bLossless);
356
890k
    m_partSet((uint8_t*)m_refIdx[0], (uint8_t)REF_NOT_VALID);
357
890k
    m_partSet((uint8_t*)m_refIdx[1], (uint8_t)REF_NOT_VALID);
358
890k
    m_partSet(m_cuDepth,      (uint8_t)cuGeom.depth);
359
360
    /* initialize the remaining CU data in one memset */
361
890k
    memset(m_predMode, 0, (ctu.m_chromaFormat == X265_CSP_I400 ? BytesPerPartition - 13 : BytesPerPartition - 9) * m_numPartitions);
362
890k
    memset(m_distortion, 0, m_numPartitions * sizeof(sse_t));
363
890k
}
364
365
/* Copy the results of a sub-part (split) CU to the parent CU */
366
void CUData::copyPartFrom(const CUData& subCU, const CUGeom& childGeom, uint32_t subPartIdx)
367
440k
{
368
440k
    X265_CHECK(subPartIdx < 4, "part unit should be less than 4\n");
369
370
440k
    uint32_t offset = childGeom.numPartitions * subPartIdx;
371
372
440k
    m_bFirstRowInSlice = subCU.m_bFirstRowInSlice;
373
440k
    m_bLastCuInSlice = subCU.m_bLastCuInSlice;
374
375
440k
    m_subPartCopy((uint8_t*)m_qp + offset, (uint8_t*)subCU.m_qp);
376
440k
    m_subPartCopy((uint8_t*)m_qpAnalysis + offset, (uint8_t*)subCU.m_qpAnalysis);
377
440k
    m_subPartCopy(m_log2CUSize + offset, subCU.m_log2CUSize);
378
440k
    m_subPartCopy(m_lumaIntraDir + offset, subCU.m_lumaIntraDir);
379
440k
    m_subPartCopy(m_tqBypass + offset, subCU.m_tqBypass);
380
440k
    m_subPartCopy((uint8_t*)m_refIdx[0] + offset, (uint8_t*)subCU.m_refIdx[0]);
381
440k
    m_subPartCopy((uint8_t*)m_refIdx[1] + offset, (uint8_t*)subCU.m_refIdx[1]);
382
440k
    m_subPartCopy(m_cuDepth + offset, subCU.m_cuDepth);
383
440k
    m_subPartCopy(m_predMode + offset, subCU.m_predMode);
384
440k
    m_subPartCopy(m_partSize + offset, subCU.m_partSize);
385
440k
    m_subPartCopy(m_mergeFlag + offset, subCU.m_mergeFlag);
386
440k
    m_subPartCopy(m_interDir + offset, subCU.m_interDir);
387
440k
    m_subPartCopy(m_mvpIdx[0] + offset, subCU.m_mvpIdx[0]);
388
440k
    m_subPartCopy(m_mvpIdx[1] + offset, subCU.m_mvpIdx[1]);
389
440k
    m_subPartCopy(m_tuDepth + offset, subCU.m_tuDepth);
390
391
440k
    m_subPartCopy(m_transformSkip[0] + offset, subCU.m_transformSkip[0]);
392
440k
    m_subPartCopy(m_cbf[0] + offset, subCU.m_cbf[0]);
393
394
440k
    memcpy(m_mv[0] + offset, subCU.m_mv[0], childGeom.numPartitions * sizeof(MV));
395
440k
    memcpy(m_mv[1] + offset, subCU.m_mv[1], childGeom.numPartitions * sizeof(MV));
396
440k
    memcpy(m_mvd[0] + offset, subCU.m_mvd[0], childGeom.numPartitions * sizeof(MV));
397
440k
    memcpy(m_mvd[1] + offset, subCU.m_mvd[1], childGeom.numPartitions * sizeof(MV));
398
399
440k
    memcpy(m_distortion + offset, subCU.m_distortion, childGeom.numPartitions * sizeof(sse_t));
400
401
440k
    uint32_t tmp = 1 << ((m_slice->m_param->maxLog2CUSize - childGeom.depth) * 2);
402
440k
    uint32_t tmp2 = subPartIdx * tmp;
403
440k
    memcpy(m_trCoeff[0] + tmp2, subCU.m_trCoeff[0], sizeof(coeff_t)* tmp);
404
405
440k
    if (subCU.m_chromaFormat != X265_CSP_I400)
406
440k
    {
407
440k
        m_subPartCopy(m_transformSkip[1] + offset, subCU.m_transformSkip[1]);
408
440k
        m_subPartCopy(m_transformSkip[2] + offset, subCU.m_transformSkip[2]);
409
440k
        m_subPartCopy(m_cbf[1] + offset, subCU.m_cbf[1]);
410
440k
        m_subPartCopy(m_cbf[2] + offset, subCU.m_cbf[2]);
411
440k
        m_subPartCopy(m_chromaIntraDir + offset, subCU.m_chromaIntraDir);
412
413
440k
        uint32_t tmpC = tmp >> (m_hChromaShift + m_vChromaShift);
414
440k
        uint32_t tmpC2 = tmp2 >> (m_hChromaShift + m_vChromaShift);
415
440k
        memcpy(m_trCoeff[1] + tmpC2, subCU.m_trCoeff[1], sizeof(coeff_t) * tmpC);
416
440k
        memcpy(m_trCoeff[2] + tmpC2, subCU.m_trCoeff[2], sizeof(coeff_t) * tmpC);
417
440k
    }
418
440k
}
419
420
/* If a sub-CU part is not present (off the edge of the picture) its depth and
421
 * log2size should still be configured */
422
void CUData::setEmptyPart(const CUGeom& childGeom, uint32_t subPartIdx)
423
22.7k
{
424
22.7k
    uint32_t offset = childGeom.numPartitions * subPartIdx;
425
22.7k
    m_subPartSet(m_cuDepth + offset, (uint8_t)childGeom.depth);
426
22.7k
    m_subPartSet(m_log2CUSize + offset, (uint8_t)childGeom.log2CUSize);
427
22.7k
}
428
429
/* Copy all CU data from one instance to the next, except set lossless flag
430
 * This will only get used when --cu-lossless is enabled but --lossless is not. */
431
void CUData::initLosslessCU(const CUData& cu, const CUGeom& cuGeom)
432
0
{
433
    /* Start by making an exact copy */
434
0
    m_encData      = cu.m_encData;
435
0
    m_slice        = cu.m_slice;
436
0
    m_cuAddr       = cu.m_cuAddr;
437
0
    m_cuPelX       = cu.m_cuPelX;
438
0
    m_cuPelY       = cu.m_cuPelY;
439
0
    m_cuLeft       = cu.m_cuLeft;
440
0
    m_cuAbove      = cu.m_cuAbove;
441
0
    m_cuAboveLeft  = cu.m_cuAboveLeft;
442
0
    m_cuAboveRight = cu.m_cuAboveRight;
443
0
    m_absIdxInCTU  = cuGeom.absPartIdx;
444
0
    m_numPartitions = cuGeom.numPartitions;
445
0
    memcpy(m_qp, cu.m_qp, BytesPerPartition * m_numPartitions);
446
0
    memcpy(m_mv[0],  cu.m_mv[0],  m_numPartitions * sizeof(MV));
447
0
    memcpy(m_mv[1],  cu.m_mv[1],  m_numPartitions * sizeof(MV));
448
0
    memcpy(m_mvd[0], cu.m_mvd[0], m_numPartitions * sizeof(MV));
449
0
    memcpy(m_mvd[1], cu.m_mvd[1], m_numPartitions * sizeof(MV));
450
0
    memcpy(m_distortion, cu.m_distortion, m_numPartitions * sizeof(sse_t));
451
452
    /* force TQBypass to true */
453
0
    m_partSet(m_tqBypass, true);
454
455
    /* clear residual coding flags */
456
0
    m_partSet(m_predMode, cu.m_predMode[0] & (MODE_INTRA | MODE_INTER));
457
0
    m_partSet(m_tuDepth, 0);
458
0
    m_partSet(m_cbf[0], 0);
459
0
    m_partSet(m_transformSkip[0], 0);
460
461
0
    if (cu.m_chromaFormat != X265_CSP_I400)
462
0
    {
463
0
        m_partSet(m_chromaIntraDir, (uint8_t)ALL_IDX);
464
0
        m_partSet(m_cbf[1], 0);
465
0
        m_partSet(m_cbf[2], 0);
466
0
        m_partSet(m_transformSkip[1], 0);
467
0
        m_partSet(m_transformSkip[2], 0);
468
0
    }
469
0
}
470
471
/* Copy completed predicted CU to CTU in picture */
472
void CUData::copyToPic(uint32_t depth) const
473
454k
{
474
454k
    CUData& ctu = *m_encData->getPicCTU(m_cuAddr);
475
476
454k
    m_partCopy((uint8_t*)ctu.m_qp + m_absIdxInCTU, (uint8_t*)m_qp);
477
454k
    m_partCopy((uint8_t*)ctu.m_qpAnalysis + m_absIdxInCTU, (uint8_t*)m_qpAnalysis);
478
454k
    m_partCopy(ctu.m_log2CUSize + m_absIdxInCTU, m_log2CUSize);
479
454k
    m_partCopy(ctu.m_lumaIntraDir + m_absIdxInCTU, m_lumaIntraDir);
480
454k
    m_partCopy(ctu.m_tqBypass + m_absIdxInCTU, m_tqBypass);
481
454k
    m_partCopy((uint8_t*)ctu.m_refIdx[0] + m_absIdxInCTU, (uint8_t*)m_refIdx[0]);
482
454k
    m_partCopy((uint8_t*)ctu.m_refIdx[1] + m_absIdxInCTU, (uint8_t*)m_refIdx[1]);
483
454k
    m_partCopy(ctu.m_cuDepth + m_absIdxInCTU, m_cuDepth);
484
454k
    m_partCopy(ctu.m_predMode + m_absIdxInCTU, m_predMode);
485
454k
    m_partCopy(ctu.m_partSize + m_absIdxInCTU, m_partSize);
486
454k
    m_partCopy(ctu.m_mergeFlag + m_absIdxInCTU, m_mergeFlag);
487
454k
    m_partCopy(ctu.m_interDir + m_absIdxInCTU, m_interDir);
488
454k
    m_partCopy(ctu.m_mvpIdx[0] + m_absIdxInCTU, m_mvpIdx[0]);
489
454k
    m_partCopy(ctu.m_mvpIdx[1] + m_absIdxInCTU, m_mvpIdx[1]);
490
454k
    m_partCopy(ctu.m_tuDepth + m_absIdxInCTU, m_tuDepth);
491
454k
    m_partCopy(ctu.m_transformSkip[0] + m_absIdxInCTU, m_transformSkip[0]);
492
454k
    m_partCopy(ctu.m_cbf[0] + m_absIdxInCTU, m_cbf[0]);
493
494
454k
    memcpy(ctu.m_mv[0] + m_absIdxInCTU, m_mv[0], m_numPartitions * sizeof(MV));
495
454k
    memcpy(ctu.m_mv[1] + m_absIdxInCTU, m_mv[1], m_numPartitions * sizeof(MV));
496
454k
    memcpy(ctu.m_mvd[0] + m_absIdxInCTU, m_mvd[0], m_numPartitions * sizeof(MV));
497
454k
    memcpy(ctu.m_mvd[1] + m_absIdxInCTU, m_mvd[1], m_numPartitions * sizeof(MV));
498
499
454k
    memcpy(ctu.m_distortion + m_absIdxInCTU, m_distortion, m_numPartitions * sizeof(sse_t));
500
501
454k
    uint32_t tmpY = 1 << ((m_slice->m_param->maxLog2CUSize - depth) * 2);
502
454k
    uint32_t tmpY2 = m_absIdxInCTU << (LOG2_UNIT_SIZE * 2);
503
454k
    memcpy(ctu.m_trCoeff[0] + tmpY2, m_trCoeff[0], sizeof(coeff_t)* tmpY);
504
505
454k
    if (ctu.m_chromaFormat != X265_CSP_I400)
506
453k
    {
507
453k
        m_partCopy(ctu.m_transformSkip[1] + m_absIdxInCTU, m_transformSkip[1]);
508
453k
        m_partCopy(ctu.m_transformSkip[2] + m_absIdxInCTU, m_transformSkip[2]);
509
453k
        m_partCopy(ctu.m_cbf[1] + m_absIdxInCTU, m_cbf[1]);
510
453k
        m_partCopy(ctu.m_cbf[2] + m_absIdxInCTU, m_cbf[2]);
511
453k
        m_partCopy(ctu.m_chromaIntraDir + m_absIdxInCTU, m_chromaIntraDir);
512
513
453k
        uint32_t tmpC = tmpY >> (m_hChromaShift + m_vChromaShift);
514
453k
        uint32_t tmpC2 = tmpY2 >> (m_hChromaShift + m_vChromaShift);
515
453k
        memcpy(ctu.m_trCoeff[1] + tmpC2, m_trCoeff[1], sizeof(coeff_t) * tmpC);
516
453k
        memcpy(ctu.m_trCoeff[2] + tmpC2, m_trCoeff[2], sizeof(coeff_t) * tmpC);
517
453k
    }
518
454k
}
519
520
/* The reverse of copyToPic, called only by encodeResidue */
521
void CUData::copyFromPic(const CUData& ctu, const CUGeom& cuGeom, int csp, bool copyQp)
522
0
{
523
0
    m_encData       = ctu.m_encData;
524
0
    m_slice         = ctu.m_slice;
525
0
    m_cuAddr        = ctu.m_cuAddr;
526
0
    m_cuPelX        = ctu.m_cuPelX + g_zscanToPelX[cuGeom.absPartIdx];
527
0
    m_cuPelY        = ctu.m_cuPelY + g_zscanToPelY[cuGeom.absPartIdx];
528
0
    m_absIdxInCTU   = cuGeom.absPartIdx;
529
0
    m_numPartitions = cuGeom.numPartitions;
530
531
    /* copy out all prediction info for this part */
532
0
    if (copyQp)
533
0
    {
534
0
        m_partCopy((uint8_t*)m_qp, (uint8_t*)ctu.m_qp + m_absIdxInCTU);
535
0
        m_partCopy((uint8_t*)m_qpAnalysis, (uint8_t*)ctu.m_qpAnalysis + m_absIdxInCTU);
536
0
    }
537
538
0
    m_partCopy(m_log2CUSize,   ctu.m_log2CUSize + m_absIdxInCTU);
539
0
    m_partCopy(m_lumaIntraDir, ctu.m_lumaIntraDir + m_absIdxInCTU);
540
0
    m_partCopy(m_tqBypass,     ctu.m_tqBypass + m_absIdxInCTU);
541
0
    m_partCopy((uint8_t*)m_refIdx[0], (uint8_t*)ctu.m_refIdx[0] + m_absIdxInCTU);
542
0
    m_partCopy((uint8_t*)m_refIdx[1], (uint8_t*)ctu.m_refIdx[1] + m_absIdxInCTU);
543
0
    m_partCopy(m_cuDepth,      ctu.m_cuDepth + m_absIdxInCTU);
544
0
    m_partSet(m_predMode, ctu.m_predMode[m_absIdxInCTU] & (MODE_INTRA | MODE_INTER)); /* clear skip flag */
545
0
    m_partCopy(m_partSize,     ctu.m_partSize + m_absIdxInCTU);
546
0
    m_partCopy(m_mergeFlag,    ctu.m_mergeFlag + m_absIdxInCTU);
547
0
    m_partCopy(m_interDir,     ctu.m_interDir + m_absIdxInCTU);
548
0
    m_partCopy(m_mvpIdx[0],    ctu.m_mvpIdx[0] + m_absIdxInCTU);
549
0
    m_partCopy(m_mvpIdx[1],    ctu.m_mvpIdx[1] + m_absIdxInCTU);
550
0
    m_partCopy(m_chromaIntraDir, ctu.m_chromaIntraDir + m_absIdxInCTU);
551
552
0
    memcpy(m_mv[0], ctu.m_mv[0] + m_absIdxInCTU, m_numPartitions * sizeof(MV));
553
0
    memcpy(m_mv[1], ctu.m_mv[1] + m_absIdxInCTU, m_numPartitions * sizeof(MV));
554
0
    memcpy(m_mvd[0], ctu.m_mvd[0] + m_absIdxInCTU, m_numPartitions * sizeof(MV));
555
0
    memcpy(m_mvd[1], ctu.m_mvd[1] + m_absIdxInCTU, m_numPartitions * sizeof(MV));
556
557
0
    memcpy(m_distortion, ctu.m_distortion + m_absIdxInCTU, m_numPartitions * sizeof(sse_t));
558
559
    /* clear residual coding flags */
560
0
    m_partSet(m_tuDepth, 0);
561
0
    m_partSet(m_transformSkip[0], 0);
562
0
    m_partSet(m_cbf[0], 0);
563
564
0
    if (csp != X265_CSP_I400)
565
0
    {        
566
0
        m_partSet(m_transformSkip[1], 0);
567
0
        m_partSet(m_transformSkip[2], 0);
568
0
        m_partSet(m_cbf[1], 0);
569
0
        m_partSet(m_cbf[2], 0);
570
0
    }
571
0
}
572
573
/* Only called by encodeResidue, these fields can be modified during inter/intra coding */
574
void CUData::updatePic(uint32_t depth, int picCsp) const
575
0
{
576
0
    CUData& ctu = *m_encData->getPicCTU(m_cuAddr);
577
578
0
    m_partCopy((uint8_t*)ctu.m_qp + m_absIdxInCTU, (uint8_t*)m_qp);
579
0
    m_partCopy((uint8_t*)ctu.m_qpAnalysis + m_absIdxInCTU, (uint8_t*)m_qpAnalysis);
580
0
    m_partCopy(ctu.m_transformSkip[0] + m_absIdxInCTU, m_transformSkip[0]);
581
0
    m_partCopy(ctu.m_predMode + m_absIdxInCTU, m_predMode);
582
0
    m_partCopy(ctu.m_tuDepth + m_absIdxInCTU, m_tuDepth);
583
0
    m_partCopy(ctu.m_cbf[0] + m_absIdxInCTU, m_cbf[0]);
584
585
0
    uint32_t tmpY = 1 << ((m_slice->m_param->maxLog2CUSize - depth) * 2);
586
0
    uint32_t tmpY2 = m_absIdxInCTU << (LOG2_UNIT_SIZE * 2);
587
0
    memcpy(ctu.m_trCoeff[0] + tmpY2, m_trCoeff[0], sizeof(coeff_t)* tmpY);
588
589
0
    if (ctu.m_chromaFormat != X265_CSP_I400 && picCsp != X265_CSP_I400)
590
0
    {
591
0
        m_partCopy(ctu.m_transformSkip[1] + m_absIdxInCTU, m_transformSkip[1]);
592
0
        m_partCopy(ctu.m_transformSkip[2] + m_absIdxInCTU, m_transformSkip[2]);
593
594
0
        m_partCopy(ctu.m_cbf[1] + m_absIdxInCTU, m_cbf[1]);
595
0
        m_partCopy(ctu.m_cbf[2] + m_absIdxInCTU, m_cbf[2]);
596
0
        m_partCopy(ctu.m_chromaIntraDir + m_absIdxInCTU, m_chromaIntraDir);
597
598
0
        tmpY  >>= m_hChromaShift + m_vChromaShift;
599
0
        tmpY2 >>= m_hChromaShift + m_vChromaShift;
600
0
        memcpy(ctu.m_trCoeff[1] + tmpY2, m_trCoeff[1], sizeof(coeff_t) * tmpY);
601
0
        memcpy(ctu.m_trCoeff[2] + tmpY2, m_trCoeff[2], sizeof(coeff_t) * tmpY);
602
0
    }
603
0
}
604
605
const CUData* CUData::getPULeft(uint32_t& lPartUnitIdx, uint32_t curPartUnitIdx) const
606
26.8M
{
607
26.8M
    uint32_t absPartIdx = g_zscanToRaster[curPartUnitIdx];
608
609
26.8M
    if (!isZeroCol(absPartIdx))
610
21.8M
    {
611
21.8M
        uint32_t absZorderCUIdx   = g_zscanToRaster[m_absIdxInCTU];
612
21.8M
        lPartUnitIdx = g_rasterToZscan[absPartIdx - 1];
613
21.8M
        if (isEqualCol(absPartIdx, absZorderCUIdx))
614
15.8M
            return m_encData->getPicCTU(m_cuAddr);
615
6.04M
        else
616
6.04M
        {
617
6.04M
            lPartUnitIdx -= m_absIdxInCTU;
618
6.04M
            return this;
619
6.04M
        }
620
21.8M
    }
621
622
4.98M
    lPartUnitIdx = g_rasterToZscan[absPartIdx + s_numPartInCUSize - 1];
623
4.98M
    return m_cuLeft;
624
26.8M
}
625
626
const CUData* CUData::getPUAbove(uint32_t& aPartUnitIdx, uint32_t curPartUnitIdx) const
627
25.7M
{
628
25.7M
    uint32_t absPartIdx = g_zscanToRaster[curPartUnitIdx];
629
630
25.7M
    if (!isZeroRow(absPartIdx))
631
21.7M
    {
632
21.7M
        uint32_t absZorderCUIdx = g_zscanToRaster[m_absIdxInCTU];
633
21.7M
        aPartUnitIdx = g_rasterToZscan[absPartIdx - RASTER_SIZE];
634
21.7M
        if (isEqualRow(absPartIdx, absZorderCUIdx))
635
15.7M
            return m_encData->getPicCTU(m_cuAddr);
636
5.99M
        else
637
5.99M
            aPartUnitIdx -= m_absIdxInCTU;
638
5.99M
        return this;
639
21.7M
    }
640
641
4.03M
    aPartUnitIdx = g_rasterToZscan[absPartIdx + ((s_numPartInCUSize - 1) << LOG2_RASTER_SIZE)];
642
4.03M
    return m_cuAbove;
643
25.7M
}
644
645
const CUData* CUData::getPUAboveLeft(uint32_t& alPartUnitIdx, uint32_t curPartUnitIdx) const
646
10.8M
{
647
10.8M
    uint32_t absPartIdx = g_zscanToRaster[curPartUnitIdx];
648
649
10.8M
    if (!isZeroCol(absPartIdx))
650
9.27M
    {
651
9.27M
        if (!isZeroRow(absPartIdx))
652
8.05M
        {
653
8.05M
            uint32_t absZorderCUIdx  = g_zscanToRaster[m_absIdxInCTU];
654
8.05M
            alPartUnitIdx = g_rasterToZscan[absPartIdx - RASTER_SIZE - 1];
655
8.05M
            if (isEqualRowOrCol(absPartIdx, absZorderCUIdx))
656
6.62M
                return m_encData->getPicCTU(m_cuAddr);
657
1.42M
            else
658
1.42M
            {
659
1.42M
                alPartUnitIdx -= m_absIdxInCTU;
660
1.42M
                return this;
661
1.42M
            }
662
8.05M
        }
663
1.21M
        alPartUnitIdx = g_rasterToZscan[absPartIdx + ((s_numPartInCUSize - 1) << LOG2_RASTER_SIZE) - 1];
664
1.21M
        return m_cuAbove;
665
9.27M
    }
666
667
1.60M
    if (!isZeroRow(absPartIdx))
668
1.22M
    {
669
1.22M
        alPartUnitIdx = g_rasterToZscan[absPartIdx - RASTER_SIZE + s_numPartInCUSize - 1];
670
1.22M
        return m_cuLeft;
671
1.22M
    }
672
673
382k
    alPartUnitIdx = m_encData->m_param->num4x4Partitions - 1;
674
382k
    return m_cuAboveLeft;
675
1.60M
}
676
677
const CUData* CUData::getPUAboveRight(uint32_t& arPartUnitIdx, uint32_t curPartUnitIdx) const
678
0
{
679
0
    if ((m_encData->getPicCTU(m_cuAddr)->m_cuPelX + g_zscanToPelX[curPartUnitIdx] + UNIT_SIZE) >= m_slice->m_sps->picWidthInLumaSamples)
680
0
        return NULL;
681
682
0
    uint32_t absPartIdxRT = g_zscanToRaster[curPartUnitIdx];
683
684
0
    if (lessThanCol(absPartIdxRT, s_numPartInCUSize - 1))
685
0
    {
686
0
        if (!isZeroRow(absPartIdxRT))
687
0
        {
688
0
            if (curPartUnitIdx > g_rasterToZscan[absPartIdxRT - RASTER_SIZE + 1])
689
0
            {
690
0
                uint32_t absZorderCUIdx = g_zscanToRaster[m_absIdxInCTU] + (1 << (m_log2CUSize[0] - LOG2_UNIT_SIZE)) - 1;
691
0
                arPartUnitIdx = g_rasterToZscan[absPartIdxRT - RASTER_SIZE + 1];
692
0
                if (isEqualRowOrCol(absPartIdxRT, absZorderCUIdx))
693
0
                    return m_encData->getPicCTU(m_cuAddr);
694
0
                else
695
0
                {
696
0
                    arPartUnitIdx -= m_absIdxInCTU;
697
0
                    return this;
698
0
                }
699
0
            }
700
0
            return NULL;
701
0
        }
702
0
        arPartUnitIdx = g_rasterToZscan[absPartIdxRT + ((s_numPartInCUSize - 1) << LOG2_RASTER_SIZE) + 1];
703
0
        return m_cuAbove;
704
0
    }
705
706
0
    if (!isZeroRow(absPartIdxRT))
707
0
        return NULL;
708
709
0
    arPartUnitIdx = g_rasterToZscan[(s_numPartInCUSize - 1) << LOG2_RASTER_SIZE];
710
0
    return m_cuAboveRight;
711
0
}
712
713
const CUData* CUData::getPUBelowLeft(uint32_t& blPartUnitIdx, uint32_t curPartUnitIdx) const
714
0
{
715
0
    if ((m_encData->getPicCTU(m_cuAddr)->m_cuPelY + g_zscanToPelY[curPartUnitIdx] + UNIT_SIZE) >= m_slice->m_sps->picHeightInLumaSamples)
716
0
        return NULL;
717
718
0
    uint32_t absPartIdxLB = g_zscanToRaster[curPartUnitIdx];
719
720
0
    if (lessThanRow(absPartIdxLB, s_numPartInCUSize - 1))
721
0
    {
722
0
        if (!isZeroCol(absPartIdxLB))
723
0
        {
724
0
            if (curPartUnitIdx > g_rasterToZscan[absPartIdxLB + RASTER_SIZE - 1])
725
0
            {
726
0
                uint32_t absZorderCUIdxLB = g_zscanToRaster[m_absIdxInCTU] + (((1 << (m_log2CUSize[0] - LOG2_UNIT_SIZE)) - 1) << LOG2_RASTER_SIZE);
727
0
                blPartUnitIdx = g_rasterToZscan[absPartIdxLB + RASTER_SIZE - 1];
728
0
                if (isEqualRowOrCol(absPartIdxLB, absZorderCUIdxLB))
729
0
                    return m_encData->getPicCTU(m_cuAddr);
730
0
                else
731
0
                {
732
0
                    blPartUnitIdx -= m_absIdxInCTU;
733
0
                    return this;
734
0
                }
735
0
            }
736
0
            return NULL;
737
0
        }
738
0
        blPartUnitIdx = g_rasterToZscan[absPartIdxLB + RASTER_SIZE + s_numPartInCUSize - 1];
739
0
        return m_cuLeft;
740
0
    }
741
742
0
    return NULL;
743
0
}
744
745
const CUData* CUData::getPUBelowLeftAdi(uint32_t& blPartUnitIdx,  uint32_t curPartUnitIdx, uint32_t partUnitOffset) const
746
18.6M
{
747
18.6M
    if ((m_encData->getPicCTU(m_cuAddr)->m_cuPelY + g_zscanToPelY[curPartUnitIdx] + (partUnitOffset << LOG2_UNIT_SIZE)) >= m_slice->m_sps->picHeightInLumaSamples)
748
882k
        return NULL;
749
750
17.7M
    uint32_t absPartIdxLB = g_zscanToRaster[curPartUnitIdx];
751
752
17.7M
    if (lessThanRow(absPartIdxLB, s_numPartInCUSize - partUnitOffset))
753
15.0M
    {
754
15.0M
        if (!isZeroCol(absPartIdxLB))
755
12.4M
        {
756
12.4M
            if (curPartUnitIdx > g_rasterToZscan[absPartIdxLB + (partUnitOffset << LOG2_RASTER_SIZE) - 1])
757
3.45M
            {
758
3.45M
                uint32_t absZorderCUIdxLB = g_zscanToRaster[m_absIdxInCTU] + (((1 << (m_log2CUSize[0] - LOG2_UNIT_SIZE)) - 1) << LOG2_RASTER_SIZE);
759
3.45M
                blPartUnitIdx = g_rasterToZscan[absPartIdxLB + (partUnitOffset << LOG2_RASTER_SIZE) - 1];
760
3.45M
                if (isEqualRowOrCol(absPartIdxLB, absZorderCUIdxLB))
761
3.45M
                    return m_encData->getPicCTU(m_cuAddr);
762
18.4E
                else
763
18.4E
                {
764
18.4E
                    blPartUnitIdx -= m_absIdxInCTU;
765
18.4E
                    return this;
766
18.4E
                }
767
3.45M
            }
768
9.04M
            return NULL;
769
12.4M
        }
770
2.51M
        blPartUnitIdx = g_rasterToZscan[absPartIdxLB + (partUnitOffset << LOG2_RASTER_SIZE) + s_numPartInCUSize - 1];
771
2.51M
        return m_cuLeft;
772
15.0M
    }
773
774
2.72M
    return NULL;
775
17.7M
}
776
777
const CUData* CUData::getPUAboveRightAdi(uint32_t& arPartUnitIdx, uint32_t curPartUnitIdx, uint32_t partUnitOffset) const
778
18.6M
{
779
18.6M
    if ((m_encData->getPicCTU(m_cuAddr)->m_cuPelX + g_zscanToPelX[curPartUnitIdx] + (partUnitOffset << LOG2_UNIT_SIZE)) >= m_slice->m_sps->picWidthInLumaSamples)
780
837k
        return NULL;
781
782
17.8M
    uint32_t absPartIdxRT = g_zscanToRaster[curPartUnitIdx];
783
784
17.8M
    if (lessThanCol(absPartIdxRT, s_numPartInCUSize - partUnitOffset))
785
15.0M
    {
786
15.0M
        if (!isZeroRow(absPartIdxRT))
787
12.5M
        {
788
12.5M
            if (curPartUnitIdx > g_rasterToZscan[absPartIdxRT - RASTER_SIZE + partUnitOffset])
789
9.11M
            {
790
9.11M
                uint32_t absZorderCUIdx = g_zscanToRaster[m_absIdxInCTU] + (1 << (m_log2CUSize[0] - LOG2_UNIT_SIZE)) - 1;
791
9.11M
                arPartUnitIdx = g_rasterToZscan[absPartIdxRT - RASTER_SIZE + partUnitOffset];
792
9.11M
                if (isEqualRowOrCol(absPartIdxRT, absZorderCUIdx))
793
7.55M
                    return m_encData->getPicCTU(m_cuAddr);
794
1.56M
                else
795
1.56M
                {
796
1.56M
                    arPartUnitIdx -= m_absIdxInCTU;
797
1.56M
                    return this;
798
1.56M
                }
799
9.11M
            }
800
3.40M
            return NULL;
801
12.5M
        }
802
2.55M
        arPartUnitIdx = g_rasterToZscan[absPartIdxRT + ((s_numPartInCUSize - 1) << LOG2_RASTER_SIZE) + partUnitOffset];
803
2.55M
        return m_cuAbove;
804
15.0M
    }
805
806
2.73M
    if (!isZeroRow(absPartIdxRT))
807
1.86M
        return NULL;
808
809
872k
    arPartUnitIdx = g_rasterToZscan[((s_numPartInCUSize - 1) << LOG2_RASTER_SIZE) + partUnitOffset - 1];
810
872k
    return m_cuAboveRight;
811
2.73M
}
812
813
/* Get left QpMinCu */
814
const CUData* CUData::getQpMinCuLeft(uint32_t& lPartUnitIdx, uint32_t curAbsIdxInCTU) const
815
101k
{
816
101k
    uint32_t absZorderQpMinCUIdx = curAbsIdxInCTU & (0xFF << (m_encData->m_param->unitSizeDepth - m_slice->m_pps->maxCuDQPDepth) * 2);
817
101k
    uint32_t absRorderQpMinCUIdx = g_zscanToRaster[absZorderQpMinCUIdx];
818
819
    // check for left CTU boundary
820
101k
    if (isZeroCol(absRorderQpMinCUIdx))
821
68.4k
        return NULL;
822
823
    // get index of left-CU relative to top-left corner of current quantization group
824
32.7k
    lPartUnitIdx = g_rasterToZscan[absRorderQpMinCUIdx - 1];
825
826
    // return pointer to current CTU
827
32.7k
    return m_encData->getPicCTU(m_cuAddr);
828
101k
}
829
830
/* Get above QpMinCu */
831
const CUData* CUData::getQpMinCuAbove(uint32_t& aPartUnitIdx, uint32_t curAbsIdxInCTU) const
832
101k
{
833
101k
    uint32_t absZorderQpMinCUIdx = curAbsIdxInCTU & (0xFF << (m_encData->m_param->unitSizeDepth - m_slice->m_pps->maxCuDQPDepth) * 2);
834
101k
    uint32_t absRorderQpMinCUIdx = g_zscanToRaster[absZorderQpMinCUIdx];
835
836
    // check for top CTU boundary
837
101k
    if (isZeroRow(absRorderQpMinCUIdx))
838
69.5k
        return NULL;
839
840
    // get index of top-CU relative to top-left corner of current quantization group
841
31.5k
    aPartUnitIdx = g_rasterToZscan[absRorderQpMinCUIdx - RASTER_SIZE];
842
843
    // return pointer to current CTU
844
31.5k
    return m_encData->getPicCTU(m_cuAddr);
845
101k
}
846
847
/* Get reference QP from left QpMinCu or latest coded QP */
848
int8_t CUData::getRefQP(uint32_t curAbsIdxInCTU) const
849
101k
{
850
101k
    uint32_t lPartIdx = 0, aPartIdx = 0;
851
101k
    const CUData* cULeft = getQpMinCuLeft(lPartIdx, m_absIdxInCTU + curAbsIdxInCTU);
852
101k
    const CUData* cUAbove = getQpMinCuAbove(aPartIdx, m_absIdxInCTU + curAbsIdxInCTU);
853
854
101k
    return ((cULeft ? cULeft->m_qp[lPartIdx] : getLastCodedQP(curAbsIdxInCTU)) + (cUAbove ? cUAbove->m_qp[aPartIdx] : getLastCodedQP(curAbsIdxInCTU)) + 1) >> 1;
855
101k
}
856
857
int CUData::getLastValidPartIdx(int absPartIdx) const
858
228k
{
859
228k
    int lastValidPartIdx = absPartIdx - 1;
860
861
288k
    while (lastValidPartIdx >= 0 && m_predMode[lastValidPartIdx] == MODE_NONE)
862
60.1k
    {
863
60.1k
        uint32_t depth = m_cuDepth[lastValidPartIdx];
864
60.1k
        lastValidPartIdx -= m_numPartitions >> (depth << 1);
865
60.1k
    }
866
867
228k
    return lastValidPartIdx;
868
228k
}
869
870
int8_t CUData::getLastCodedQP(uint32_t absPartIdx) const
871
228k
{
872
228k
    uint32_t quPartIdxMask = 0xFF << (m_encData->m_param->unitSizeDepth - m_slice->m_pps->maxCuDQPDepth) * 2;
873
228k
    int lastValidPartIdx = getLastValidPartIdx(absPartIdx & quPartIdxMask);
874
875
228k
    if (lastValidPartIdx >= 0)
876
110k
        return m_qp[lastValidPartIdx];
877
117k
    else
878
117k
    {
879
117k
        if (m_absIdxInCTU)
880
13.3k
            return m_encData->getPicCTU(m_cuAddr)->getLastCodedQP(m_absIdxInCTU);
881
103k
        else if (m_cuAddr > 0 && !(m_slice->m_pps->bEntropyCodingSyncEnabled && !(m_cuAddr % m_slice->m_sps->numCuInWidth)))
882
76.7k
            return m_encData->getPicCTU(m_cuAddr - 1)->getLastCodedQP(m_encData->m_param->num4x4Partitions);
883
27.0k
        else
884
27.0k
            return (int8_t)m_slice->m_sliceQp;
885
117k
    }
886
228k
}
887
888
/* Get allowed chroma intra modes */
889
void CUData::getAllowedChromaDir(uint32_t absPartIdx, uint32_t* modeList) const
890
1.63M
{
891
1.63M
    modeList[0] = PLANAR_IDX;
892
1.63M
    modeList[1] = VER_IDX;
893
1.63M
    modeList[2] = HOR_IDX;
894
1.63M
    modeList[3] = DC_IDX;
895
1.63M
    modeList[4] = DM_CHROMA_IDX;
896
897
1.63M
    uint32_t lumaMode = m_lumaIntraDir[absPartIdx];
898
899
2.72M
    for (int i = 0; i < NUM_CHROMA_MODE - 1; i++)
900
2.72M
    {
901
2.72M
        if (lumaMode == modeList[i])
902
1.63M
        {
903
1.63M
            modeList[i] = 34; // VER+8 mode
904
1.63M
            break;
905
1.63M
        }
906
2.72M
    }
907
1.63M
}
908
909
/* Get most probable intra modes */
910
int CUData::getIntraDirLumaPredictor(uint32_t absPartIdx, uint32_t* intraDirPred) const
911
7.65M
{
912
7.65M
    const CUData* tempCU;
913
7.65M
    uint32_t tempPartIdx;
914
7.65M
    uint32_t leftIntraDir, aboveIntraDir;
915
916
    // Get intra direction of left PU
917
7.65M
    tempCU = getPULeft(tempPartIdx, m_absIdxInCTU + absPartIdx);
918
919
7.65M
    leftIntraDir = (tempCU && tempCU->isIntra(tempPartIdx)) ? tempCU->m_lumaIntraDir[tempPartIdx] : DC_IDX;
920
921
    // Get intra direction of above PU
922
7.65M
    tempCU = g_zscanToPelY[m_absIdxInCTU + absPartIdx] > 0 ? getPUAbove(tempPartIdx, m_absIdxInCTU + absPartIdx) : NULL;
923
924
7.65M
    aboveIntraDir = (tempCU && tempCU->isIntra(tempPartIdx)) ? tempCU->m_lumaIntraDir[tempPartIdx] : DC_IDX;
925
926
7.65M
    if (leftIntraDir == aboveIntraDir)
927
4.38M
    {
928
4.38M
        if (leftIntraDir >= 2) // angular modes
929
80.1k
        {
930
80.1k
            intraDirPred[0] = leftIntraDir;
931
80.1k
            intraDirPred[1] = ((leftIntraDir - 2 + 31) & 31) + 2;
932
80.1k
            intraDirPred[2] = ((leftIntraDir - 2 +  1) & 31) + 2;
933
80.1k
        }
934
4.30M
        else //non-angular
935
4.30M
        {
936
4.30M
            intraDirPred[0] = PLANAR_IDX;
937
4.30M
            intraDirPred[1] = DC_IDX;
938
4.30M
            intraDirPred[2] = VER_IDX;
939
4.30M
        }
940
4.38M
        return 1;
941
4.38M
    }
942
3.26M
    else
943
3.26M
    {
944
3.26M
        intraDirPred[0] = leftIntraDir;
945
3.26M
        intraDirPred[1] = aboveIntraDir;
946
947
3.26M
        if (leftIntraDir && aboveIntraDir) //both modes are non-planar
948
50.4k
            intraDirPred[2] = PLANAR_IDX;
949
3.21M
        else
950
18.4E
            intraDirPred[2] =  (leftIntraDir + aboveIntraDir) < 2 ? VER_IDX : DC_IDX;
951
3.26M
        return 2;
952
3.26M
    }
953
7.65M
}
954
955
uint32_t CUData::getCtxSplitFlag(uint32_t absPartIdx, uint32_t depth) const
956
269k
{
957
269k
    const CUData* tempCU;
958
269k
    uint32_t    tempPartIdx;
959
269k
    uint32_t    ctx;
960
961
    // Get left split flag
962
269k
    tempCU = getPULeft(tempPartIdx, m_absIdxInCTU + absPartIdx);
963
269k
    ctx  = (tempCU) ? ((tempCU->m_cuDepth[tempPartIdx] > depth) ? 1 : 0) : 0;
964
965
    // Get above split flag
966
269k
    tempCU = getPUAbove(tempPartIdx, m_absIdxInCTU + absPartIdx);
967
269k
    ctx += (tempCU) ? ((tempCU->m_cuDepth[tempPartIdx] > depth) ? 1 : 0) : 0;
968
969
269k
    return ctx;
970
269k
}
971
972
void CUData::getIntraTUQtDepthRange(uint32_t tuDepthRange[2], uint32_t absPartIdx) const
973
861k
{
974
861k
    uint32_t log2CUSize = m_log2CUSize[absPartIdx];
975
861k
    uint32_t splitFlag = m_partSize[absPartIdx] != SIZE_2Nx2N;
976
977
861k
    tuDepthRange[0] = m_slice->m_sps->quadtreeTULog2MinSize;
978
861k
    tuDepthRange[1] = m_slice->m_sps->quadtreeTULog2MaxSize;
979
980
861k
    tuDepthRange[0] = x265_clip3(tuDepthRange[0], tuDepthRange[1], log2CUSize - (m_slice->m_sps->quadtreeTUMaxDepthIntra - 1 + splitFlag));
981
861k
}
982
983
void CUData::getInterTUQtDepthRange(uint32_t tuDepthRange[2], uint32_t absPartIdx) const
984
0
{
985
0
    uint32_t log2CUSize = m_log2CUSize[absPartIdx];
986
0
    uint32_t quadtreeTUMaxDepth = m_slice->m_sps->quadtreeTUMaxDepthInter;
987
0
    uint32_t splitFlag = quadtreeTUMaxDepth == 1 && m_partSize[absPartIdx] != SIZE_2Nx2N;
988
989
0
    tuDepthRange[0] = m_slice->m_sps->quadtreeTULog2MinSize;
990
0
    tuDepthRange[1] = m_slice->m_sps->quadtreeTULog2MaxSize;
991
992
0
    tuDepthRange[0] = x265_clip3(tuDepthRange[0], tuDepthRange[1], log2CUSize - (quadtreeTUMaxDepth - 1 + splitFlag));
993
0
}
994
995
uint32_t CUData::getCtxSkipFlag(uint32_t absPartIdx) const
996
0
{
997
0
    const CUData* tempCU;
998
0
    uint32_t tempPartIdx;
999
0
    uint32_t ctx;
1000
1001
    // Get BCBP of left PU
1002
0
    tempCU = getPULeft(tempPartIdx, m_absIdxInCTU + absPartIdx);
1003
0
    ctx    = tempCU ? tempCU->isSkipped(tempPartIdx) : 0;
1004
1005
    // Get BCBP of above PU
1006
0
    tempCU = getPUAbove(tempPartIdx, m_absIdxInCTU + absPartIdx);
1007
0
    ctx   += tempCU ? tempCU->isSkipped(tempPartIdx) : 0;
1008
1009
0
    return ctx;
1010
0
}
1011
1012
bool CUData::setQPSubCUs(int8_t qp, uint32_t absPartIdx, uint32_t depth)
1013
1.00k
{
1014
1.00k
    uint32_t curPartNumb = m_encData->m_param->num4x4Partitions >> (depth << 1);
1015
1.00k
    uint32_t curPartNumQ = curPartNumb >> 2;
1016
1017
1.00k
    if (m_cuDepth[absPartIdx] > depth)
1018
503
    {
1019
503
        for (uint32_t subPartIdx = 0; subPartIdx < 4; subPartIdx++)
1020
503
            if (setQPSubCUs(qp, absPartIdx + subPartIdx * curPartNumQ, depth + 1))
1021
503
                return true;
1022
503
    }
1023
503
    else
1024
503
    {
1025
503
        if (getQtRootCbf(absPartIdx))
1026
503
            return true;
1027
0
        else
1028
0
            setQPSubParts(qp, absPartIdx, depth);
1029
503
    }
1030
1031
0
    return false;
1032
1.00k
}
1033
1034
void CUData::setPUInterDir(uint8_t dir, uint32_t absPartIdx, uint32_t puIdx)
1035
0
{
1036
0
    uint32_t curPartNumQ = m_numPartitions >> 2;
1037
0
    X265_CHECK(puIdx < 2, "unexpected part unit index\n");
1038
1039
0
    switch (m_partSize[absPartIdx])
1040
0
    {
1041
0
    case SIZE_2Nx2N:
1042
0
        memset(m_interDir + absPartIdx, dir, 4 * curPartNumQ);
1043
0
        break;
1044
0
    case SIZE_2NxN:
1045
0
        memset(m_interDir + absPartIdx, dir, 2 * curPartNumQ);
1046
0
        break;
1047
0
    case SIZE_Nx2N:
1048
0
        memset(m_interDir + absPartIdx, dir, curPartNumQ);
1049
0
        memset(m_interDir + absPartIdx + 2 * curPartNumQ, dir, curPartNumQ);
1050
0
        break;
1051
0
    case SIZE_NxN:
1052
0
        memset(m_interDir + absPartIdx, dir, curPartNumQ);
1053
0
        break;
1054
0
    case SIZE_2NxnU:
1055
0
        if (!puIdx)
1056
0
        {
1057
0
            memset(m_interDir + absPartIdx, dir, (curPartNumQ >> 1));
1058
0
            memset(m_interDir + absPartIdx + curPartNumQ, dir, (curPartNumQ >> 1));
1059
0
        }
1060
0
        else
1061
0
        {
1062
0
            memset(m_interDir + absPartIdx, dir, (curPartNumQ >> 1));
1063
0
            memset(m_interDir + absPartIdx + curPartNumQ, dir, ((curPartNumQ >> 1) + (curPartNumQ << 1)));
1064
0
        }
1065
0
        break;
1066
0
    case SIZE_2NxnD:
1067
0
        if (!puIdx)
1068
0
        {
1069
0
            memset(m_interDir + absPartIdx, dir, ((curPartNumQ << 1) + (curPartNumQ >> 1)));
1070
0
            memset(m_interDir + absPartIdx + (curPartNumQ << 1) + curPartNumQ, dir, (curPartNumQ >> 1));
1071
0
        }
1072
0
        else
1073
0
        {
1074
0
            memset(m_interDir + absPartIdx, dir, (curPartNumQ >> 1));
1075
0
            memset(m_interDir + absPartIdx + curPartNumQ, dir, (curPartNumQ >> 1));
1076
0
        }
1077
0
        break;
1078
0
    case SIZE_nLx2N:
1079
0
        if (!puIdx)
1080
0
        {
1081
0
            memset(m_interDir + absPartIdx, dir, (curPartNumQ >> 2));
1082
0
            memset(m_interDir + absPartIdx + (curPartNumQ >> 1), dir, (curPartNumQ >> 2));
1083
0
            memset(m_interDir + absPartIdx + (curPartNumQ << 1), dir, (curPartNumQ >> 2));
1084
0
            memset(m_interDir + absPartIdx + (curPartNumQ << 1) + (curPartNumQ >> 1), dir, (curPartNumQ >> 2));
1085
0
        }
1086
0
        else
1087
0
        {
1088
0
            memset(m_interDir + absPartIdx, dir, (curPartNumQ >> 2));
1089
0
            memset(m_interDir + absPartIdx + (curPartNumQ >> 1), dir, (curPartNumQ + (curPartNumQ >> 2)));
1090
0
            memset(m_interDir + absPartIdx + (curPartNumQ << 1), dir, (curPartNumQ >> 2));
1091
0
            memset(m_interDir + absPartIdx + (curPartNumQ << 1) + (curPartNumQ >> 1), dir, (curPartNumQ + (curPartNumQ >> 2)));
1092
0
        }
1093
0
        break;
1094
0
    case SIZE_nRx2N:
1095
0
        if (!puIdx)
1096
0
        {
1097
0
            memset(m_interDir + absPartIdx, dir, (curPartNumQ + (curPartNumQ >> 2)));
1098
0
            memset(m_interDir + absPartIdx + curPartNumQ + (curPartNumQ >> 1), dir, (curPartNumQ >> 2));
1099
0
            memset(m_interDir + absPartIdx + (curPartNumQ << 1), dir, (curPartNumQ + (curPartNumQ >> 2)));
1100
0
            memset(m_interDir + absPartIdx + (curPartNumQ << 1) + curPartNumQ + (curPartNumQ >> 1), dir, (curPartNumQ >> 2));
1101
0
        }
1102
0
        else
1103
0
        {
1104
0
            memset(m_interDir + absPartIdx, dir, (curPartNumQ >> 2));
1105
0
            memset(m_interDir + absPartIdx + (curPartNumQ >> 1), dir, (curPartNumQ >> 2));
1106
0
            memset(m_interDir + absPartIdx + (curPartNumQ << 1), dir, (curPartNumQ >> 2));
1107
0
            memset(m_interDir + absPartIdx + (curPartNumQ << 1) + (curPartNumQ >> 1), dir, (curPartNumQ >> 2));
1108
0
        }
1109
0
        break;
1110
0
    default:
1111
0
        X265_CHECK(0, "unexpected part type\n");
1112
0
        break;
1113
0
    }
1114
0
}
1115
1116
template<typename T>
1117
void CUData::setAllPU(T* p, const T& val, int absPartIdx, int puIdx)
1118
0
{
1119
0
    int i;
1120
1121
0
    p += absPartIdx;
1122
0
    int numElements = m_numPartitions;
1123
1124
0
    switch (m_partSize[absPartIdx])
1125
0
    {
1126
0
    case SIZE_2Nx2N:
1127
0
        for (i = 0; i < numElements; i++)
1128
0
            p[i] = val;
1129
0
        break;
1130
1131
0
    case SIZE_2NxN:
1132
0
        numElements >>= 1;
1133
0
        for (i = 0; i < numElements; i++)
1134
0
            p[i] = val;
1135
0
        break;
1136
1137
0
    case SIZE_Nx2N:
1138
0
        numElements >>= 2;
1139
0
        for (i = 0; i < numElements; i++)
1140
0
        {
1141
0
            p[i] = val;
1142
0
            p[i + 2 * numElements] = val;
1143
0
        }
1144
0
        break;
1145
1146
0
    case SIZE_2NxnU:
1147
0
    {
1148
0
        int curPartNumQ = numElements >> 2;
1149
0
        if (!puIdx)
1150
0
        {
1151
0
            T *pT  = p;
1152
0
            T *pT2 = p + curPartNumQ;
1153
0
            for (i = 0; i < (curPartNumQ >> 1); i++)
1154
0
            {
1155
0
                pT[i] = val;
1156
0
                pT2[i] = val;
1157
0
            }
1158
0
        }
1159
0
        else
1160
0
        {
1161
0
            T *pT  = p;
1162
0
            for (i = 0; i < (curPartNumQ >> 1); i++)
1163
0
                pT[i] = val;
1164
1165
0
            pT = p + curPartNumQ;
1166
0
            for (i = 0; i < ((curPartNumQ >> 1) + (curPartNumQ << 1)); i++)
1167
0
                pT[i] = val;
1168
0
        }
1169
0
        break;
1170
0
    }
1171
1172
0
    case SIZE_2NxnD:
1173
0
    {
1174
0
        int curPartNumQ = numElements >> 2;
1175
0
        if (!puIdx)
1176
0
        {
1177
0
            T *pT  = p;
1178
0
            for (i = 0; i < ((curPartNumQ >> 1) + (curPartNumQ << 1)); i++)
1179
0
                pT[i] = val;
1180
1181
0
            pT = p + (numElements - curPartNumQ);
1182
0
            for (i = 0; i < (curPartNumQ >> 1); i++)
1183
0
                pT[i] = val;
1184
0
        }
1185
0
        else
1186
0
        {
1187
0
            T *pT  = p;
1188
0
            T *pT2 = p + curPartNumQ;
1189
0
            for (i = 0; i < (curPartNumQ >> 1); i++)
1190
0
            {
1191
0
                pT[i] = val;
1192
0
                pT2[i] = val;
1193
0
            }
1194
0
        }
1195
0
        break;
1196
0
    }
1197
1198
0
    case SIZE_nLx2N:
1199
0
    {
1200
0
        int curPartNumQ = numElements >> 2;
1201
0
        if (!puIdx)
1202
0
        {
1203
0
            T *pT  = p;
1204
0
            T *pT2 = p + (curPartNumQ << 1);
1205
0
            T *pT3 = p + (curPartNumQ >> 1);
1206
0
            T *pT4 = p + (curPartNumQ << 1) + (curPartNumQ >> 1);
1207
1208
0
            for (i = 0; i < (curPartNumQ >> 2); i++)
1209
0
            {
1210
0
                pT[i] = val;
1211
0
                pT2[i] = val;
1212
0
                pT3[i] = val;
1213
0
                pT4[i] = val;
1214
0
            }
1215
0
        }
1216
0
        else
1217
0
        {
1218
0
            T *pT  = p;
1219
0
            T *pT2 = p + (curPartNumQ << 1);
1220
0
            for (i = 0; i < (curPartNumQ >> 2); i++)
1221
0
            {
1222
0
                pT[i] = val;
1223
0
                pT2[i] = val;
1224
0
            }
1225
1226
0
            pT  = p + (curPartNumQ >> 1);
1227
0
            pT2 = p + (curPartNumQ << 1) + (curPartNumQ >> 1);
1228
0
            for (i = 0; i < ((curPartNumQ >> 2) + curPartNumQ); i++)
1229
0
            {
1230
0
                pT[i] = val;
1231
0
                pT2[i] = val;
1232
0
            }
1233
0
        }
1234
0
        break;
1235
0
    }
1236
1237
0
    case SIZE_nRx2N:
1238
0
    {
1239
0
        int curPartNumQ = numElements >> 2;
1240
0
        if (!puIdx)
1241
0
        {
1242
0
            T *pT  = p;
1243
0
            T *pT2 = p + (curPartNumQ << 1);
1244
0
            for (i = 0; i < ((curPartNumQ >> 2) + curPartNumQ); i++)
1245
0
            {
1246
0
                pT[i] = val;
1247
0
                pT2[i] = val;
1248
0
            }
1249
1250
0
            pT  = p + curPartNumQ + (curPartNumQ >> 1);
1251
0
            pT2 = p + numElements - curPartNumQ + (curPartNumQ >> 1);
1252
0
            for (i = 0; i < (curPartNumQ >> 2); i++)
1253
0
            {
1254
0
                pT[i] = val;
1255
0
                pT2[i] = val;
1256
0
            }
1257
0
        }
1258
0
        else
1259
0
        {
1260
0
            T *pT  = p;
1261
0
            T *pT2 = p + (curPartNumQ >> 1);
1262
0
            T *pT3 = p + (curPartNumQ << 1);
1263
0
            T *pT4 = p + (curPartNumQ << 1) + (curPartNumQ >> 1);
1264
0
            for (i = 0; i < (curPartNumQ >> 2); i++)
1265
0
            {
1266
0
                pT[i] = val;
1267
0
                pT2[i] = val;
1268
0
                pT3[i] = val;
1269
0
                pT4[i] = val;
1270
0
            }
1271
0
        }
1272
0
        break;
1273
0
    }
1274
1275
0
    case SIZE_NxN:
1276
0
    default:
1277
0
        X265_CHECK(0, "unknown partition type\n");
1278
0
        break;
1279
0
    }
1280
0
}
Unexecuted instantiation: void x265::CUData::setAllPU<x265::MV>(x265::MV*, x265::MV const&, int, int)
Unexecuted instantiation: void x265::CUData::setAllPU<signed char>(signed char*, signed char const&, int, int)
1281
1282
void CUData::setPUMv(int list, const MV& mv, int absPartIdx, int puIdx)
1283
0
{
1284
0
    setAllPU(m_mv[list], mv, absPartIdx, puIdx);
1285
0
}
1286
1287
void CUData::setPURefIdx(int list, int8_t refIdx, int absPartIdx, int puIdx)
1288
0
{
1289
0
    setAllPU(m_refIdx[list], refIdx, absPartIdx, puIdx);
1290
0
}
1291
1292
void CUData::getPartIndexAndSize(uint32_t partIdx, uint32_t& outPartAddr, int& outWidth, int& outHeight) const
1293
0
{
1294
0
    int cuSize = 1 << m_log2CUSize[0];
1295
0
    int partType = m_partSize[0];
1296
1297
0
    int tmp = partTable[partType][partIdx][0];
1298
0
    outWidth = ((tmp >> 4) * cuSize) >> 2;
1299
0
    outHeight = ((tmp & 0xF) * cuSize) >> 2;
1300
0
    outPartAddr = (partAddrTable[partType][partIdx] * m_numPartitions) >> 4;
1301
0
}
1302
1303
void CUData::getMvField(const CUData* cu, uint32_t absPartIdx, int picList, MVField& outMvField) const
1304
0
{
1305
0
    if (cu)
1306
0
    {
1307
0
        outMvField.mv = cu->m_mv[picList][absPartIdx];
1308
0
        outMvField.refIdx = cu->m_refIdx[picList][absPartIdx];
1309
0
    }
1310
0
    else
1311
0
    {
1312
        // OUT OF BOUNDARY
1313
0
        outMvField.mv = 0;
1314
0
        outMvField.refIdx = REF_NOT_VALID;
1315
0
    }
1316
0
}
1317
1318
void CUData::deriveLeftRightTopIdx(uint32_t partIdx, uint32_t& partIdxLT, uint32_t& partIdxRT) const
1319
0
{
1320
0
    partIdxLT = m_absIdxInCTU;
1321
0
    partIdxRT = g_rasterToZscan[g_zscanToRaster[partIdxLT] + (1 << (m_log2CUSize[0] - LOG2_UNIT_SIZE)) - 1];
1322
1323
0
    switch (m_partSize[0])
1324
0
    {
1325
0
    case SIZE_2Nx2N: break;
1326
0
    case SIZE_2NxN:
1327
0
        partIdxLT += (partIdx == 0) ? 0 : m_numPartitions >> 1;
1328
0
        partIdxRT += (partIdx == 0) ? 0 : m_numPartitions >> 1;
1329
0
        break;
1330
0
    case SIZE_Nx2N:
1331
0
        partIdxLT += (partIdx == 0) ? 0 : m_numPartitions >> 2;
1332
0
        partIdxRT -= (partIdx == 1) ? 0 : m_numPartitions >> 2;
1333
0
        break;
1334
0
    case SIZE_NxN:
1335
0
        partIdxLT += (m_numPartitions >> 2) * partIdx;
1336
0
        partIdxRT +=  (m_numPartitions >> 2) * (partIdx - 1);
1337
0
        break;
1338
0
    case SIZE_2NxnU:
1339
0
        partIdxLT += (partIdx == 0) ? 0 : m_numPartitions >> 3;
1340
0
        partIdxRT += (partIdx == 0) ? 0 : m_numPartitions >> 3;
1341
0
        break;
1342
0
    case SIZE_2NxnD:
1343
0
        partIdxLT += (partIdx == 0) ? 0 : (m_numPartitions >> 1) + (m_numPartitions >> 3);
1344
0
        partIdxRT += (partIdx == 0) ? 0 : (m_numPartitions >> 1) + (m_numPartitions >> 3);
1345
0
        break;
1346
0
    case SIZE_nLx2N:
1347
0
        partIdxLT += (partIdx == 0) ? 0 : m_numPartitions >> 4;
1348
0
        partIdxRT -= (partIdx == 1) ? 0 : (m_numPartitions >> 2) + (m_numPartitions >> 4);
1349
0
        break;
1350
0
    case SIZE_nRx2N:
1351
0
        partIdxLT += (partIdx == 0) ? 0 : (m_numPartitions >> 2) + (m_numPartitions >> 4);
1352
0
        partIdxRT -= (partIdx == 1) ? 0 : m_numPartitions >> 4;
1353
0
        break;
1354
0
    default:
1355
0
        X265_CHECK(0, "unexpected part index\n");
1356
0
        break;
1357
0
    }
1358
0
}
1359
1360
uint32_t CUData::deriveLeftBottomIdx(uint32_t puIdx) const
1361
0
{
1362
0
    uint32_t outPartIdxLB;
1363
0
    outPartIdxLB = g_rasterToZscan[g_zscanToRaster[m_absIdxInCTU] + (((1 << (m_log2CUSize[0] - LOG2_UNIT_SIZE - 1)) - 1) << LOG2_RASTER_SIZE)];
1364
1365
0
    switch (m_partSize[0])
1366
0
    {
1367
0
    case SIZE_2Nx2N:
1368
0
        outPartIdxLB += m_numPartitions >> 1;
1369
0
        break;
1370
0
    case SIZE_2NxN:
1371
0
        outPartIdxLB += puIdx ? m_numPartitions >> 1 : 0;
1372
0
        break;
1373
0
    case SIZE_Nx2N:
1374
0
        outPartIdxLB += puIdx ? (m_numPartitions >> 2) * 3 : m_numPartitions >> 1;
1375
0
        break;
1376
0
    case SIZE_NxN:
1377
0
        outPartIdxLB += (m_numPartitions >> 2) * puIdx;
1378
0
        break;
1379
0
    case SIZE_2NxnU:
1380
0
        outPartIdxLB += puIdx ? m_numPartitions >> 1 : -((int)m_numPartitions >> 3);
1381
0
        break;
1382
0
    case SIZE_2NxnD:
1383
0
        outPartIdxLB += puIdx ? m_numPartitions >> 1 : (m_numPartitions >> 2) + (m_numPartitions >> 3);
1384
0
        break;
1385
0
    case SIZE_nLx2N:
1386
0
        outPartIdxLB += puIdx ? (m_numPartitions >> 1) + (m_numPartitions >> 4) : m_numPartitions >> 1;
1387
0
        break;
1388
0
    case SIZE_nRx2N:
1389
0
        outPartIdxLB += puIdx ? (m_numPartitions >> 1) + (m_numPartitions >> 2) + (m_numPartitions >> 4) : m_numPartitions >> 1;
1390
0
        break;
1391
0
    default:
1392
0
        X265_CHECK(0, "unexpected part index\n");
1393
0
        break;
1394
0
    }
1395
0
    return outPartIdxLB;
1396
0
}
1397
1398
/* Derives the partition index of neighboring bottom right block */
1399
uint32_t CUData::deriveRightBottomIdx(uint32_t puIdx) const
1400
0
{
1401
0
    uint32_t outPartIdxRB;
1402
0
    outPartIdxRB = g_rasterToZscan[g_zscanToRaster[m_absIdxInCTU] +
1403
0
                                   (((1 << (m_log2CUSize[0] - LOG2_UNIT_SIZE - 1)) - 1) << LOG2_RASTER_SIZE) +
1404
0
                                   (1 << (m_log2CUSize[0] - LOG2_UNIT_SIZE)) - 1];
1405
1406
0
    switch (m_partSize[0])
1407
0
    {
1408
0
    case SIZE_2Nx2N:
1409
0
        outPartIdxRB += m_numPartitions >> 1;
1410
0
        break;
1411
0
    case SIZE_2NxN:
1412
0
        outPartIdxRB += puIdx ? m_numPartitions >> 1 : 0;
1413
0
        break;
1414
0
    case SIZE_Nx2N:
1415
0
        outPartIdxRB += puIdx ? m_numPartitions >> 1 : m_numPartitions >> 2;
1416
0
        break;
1417
0
    case SIZE_NxN:
1418
0
        outPartIdxRB += (m_numPartitions >> 2) * (puIdx - 1);
1419
0
        break;
1420
0
    case SIZE_2NxnU:
1421
0
        outPartIdxRB += puIdx ? m_numPartitions >> 1 : -((int)m_numPartitions >> 3);
1422
0
        break;
1423
0
    case SIZE_2NxnD:
1424
0
        outPartIdxRB += puIdx ? m_numPartitions >> 1 : (m_numPartitions >> 2) + (m_numPartitions >> 3);
1425
0
        break;
1426
0
    case SIZE_nLx2N:
1427
0
        outPartIdxRB += puIdx ? m_numPartitions >> 1 : (m_numPartitions >> 3) + (m_numPartitions >> 4);
1428
0
        break;
1429
0
    case SIZE_nRx2N:
1430
0
        outPartIdxRB += puIdx ? m_numPartitions >> 1 : (m_numPartitions >> 2) + (m_numPartitions >> 3) + (m_numPartitions >> 4);
1431
0
        break;
1432
0
    default:
1433
0
        X265_CHECK(0, "unexpected part index\n");
1434
0
        break;
1435
0
    }
1436
0
    return outPartIdxRB;
1437
0
}
1438
1439
bool CUData::hasEqualMotion(uint32_t absPartIdx, const CUData& candCU, uint32_t candAbsPartIdx) const
1440
0
{
1441
0
    if (m_interDir[absPartIdx] != candCU.m_interDir[candAbsPartIdx])
1442
0
        return false;
1443
1444
0
    for (uint32_t refListIdx = 0; refListIdx < 2; refListIdx++)
1445
0
    {
1446
0
        if (m_interDir[absPartIdx] & (1 << refListIdx))
1447
0
        {
1448
0
            if (m_mv[refListIdx][absPartIdx] != candCU.m_mv[refListIdx][candAbsPartIdx] ||
1449
0
                m_refIdx[refListIdx][absPartIdx] != candCU.m_refIdx[refListIdx][candAbsPartIdx])
1450
0
                return false;
1451
0
        }
1452
0
    }
1453
1454
0
    return true;
1455
0
}
1456
1457
/* Construct list of merging candidates, returns count */
1458
uint32_t CUData::getInterMergeCandidates(uint32_t absPartIdx, uint32_t puIdx, MVField(*candMvField)[2], uint8_t* candDir) const
1459
0
{
1460
0
    uint32_t absPartAddr = m_absIdxInCTU + absPartIdx;
1461
0
    const bool isInterB = m_slice->isInterB();
1462
1463
0
    const uint32_t maxNumMergeCand = m_slice->m_maxNumMergeCand;
1464
1465
0
    for (uint32_t i = 0; i < maxNumMergeCand; ++i)
1466
0
    {
1467
0
        candMvField[i][0].mv = 0;
1468
0
        candMvField[i][1].mv = 0;
1469
0
        candMvField[i][0].refIdx = REF_NOT_VALID;
1470
0
        candMvField[i][1].refIdx = REF_NOT_VALID;
1471
0
    }
1472
1473
    /* calculate the location of upper-left corner pixel and size of the current PU */
1474
0
    int xP, yP, nPSW, nPSH;
1475
1476
0
    int cuSize = 1 << m_log2CUSize[0];
1477
0
    int partMode = m_partSize[0];
1478
1479
0
    int tmp = partTable[partMode][puIdx][0];
1480
0
    nPSW = ((tmp >> 4) * cuSize) >> 2;
1481
0
    nPSH = ((tmp & 0xF) * cuSize) >> 2;
1482
1483
0
    tmp = partTable[partMode][puIdx][1];
1484
0
    xP = ((tmp >> 4) * cuSize) >> 2;
1485
0
    yP = ((tmp & 0xF) * cuSize) >> 2;
1486
1487
0
    uint32_t count = 0;
1488
1489
0
    uint32_t partIdxLT, partIdxRT, partIdxLB = deriveLeftBottomIdx(puIdx);
1490
0
    PartSize curPS = (PartSize)m_partSize[absPartIdx];
1491
    
1492
    // left
1493
0
    uint32_t leftPartIdx = 0;
1494
0
    const CUData* cuLeft = getPULeft(leftPartIdx, partIdxLB);
1495
0
    bool isAvailableA1 = cuLeft &&
1496
0
        cuLeft->isDiffMER(xP - 1, yP + nPSH - 1, xP, yP) &&
1497
0
        !(puIdx == 1 && (curPS == SIZE_Nx2N || curPS == SIZE_nLx2N || curPS == SIZE_nRx2N)) &&
1498
0
        cuLeft->isInter(leftPartIdx);
1499
0
    if (isAvailableA1)
1500
0
    {
1501
        // get Inter Dir
1502
0
        candDir[count] = cuLeft->m_interDir[leftPartIdx];
1503
        // get Mv from Left
1504
0
        cuLeft->getMvField(cuLeft, leftPartIdx, 0, candMvField[count][0]);
1505
0
        if (isInterB)
1506
0
            cuLeft->getMvField(cuLeft, leftPartIdx, 1, candMvField[count][1]);
1507
1508
0
        if (++count == maxNumMergeCand)
1509
0
            return maxNumMergeCand;
1510
0
    }
1511
1512
0
    deriveLeftRightTopIdx(puIdx, partIdxLT, partIdxRT);
1513
1514
    // above
1515
0
    uint32_t abovePartIdx = 0;
1516
0
    const CUData* cuAbove = getPUAbove(abovePartIdx, partIdxRT);
1517
0
    bool isAvailableB1 = cuAbove &&
1518
0
        cuAbove->isDiffMER(xP + nPSW - 1, yP - 1, xP, yP) &&
1519
0
        !(puIdx == 1 && (curPS == SIZE_2NxN || curPS == SIZE_2NxnU || curPS == SIZE_2NxnD)) &&
1520
0
        cuAbove->isInter(abovePartIdx);
1521
0
    if (isAvailableB1 && (!isAvailableA1 || !cuLeft->hasEqualMotion(leftPartIdx, *cuAbove, abovePartIdx)))
1522
0
    {
1523
        // get Inter Dir
1524
0
        candDir[count] = cuAbove->m_interDir[abovePartIdx];
1525
        // get Mv from Left
1526
0
        cuAbove->getMvField(cuAbove, abovePartIdx, 0, candMvField[count][0]);
1527
0
        if (isInterB)
1528
0
            cuAbove->getMvField(cuAbove, abovePartIdx, 1, candMvField[count][1]);
1529
1530
0
        if (++count == maxNumMergeCand)
1531
0
            return maxNumMergeCand;
1532
0
    }
1533
1534
    // above right
1535
0
    uint32_t aboveRightPartIdx = 0;
1536
0
    const CUData* cuAboveRight = getPUAboveRight(aboveRightPartIdx, partIdxRT);
1537
0
    bool isAvailableB0 = cuAboveRight &&
1538
0
        cuAboveRight->isDiffMER(xP + nPSW, yP - 1, xP, yP) &&
1539
0
        cuAboveRight->isInter(aboveRightPartIdx);
1540
0
    if (isAvailableB0 && (!isAvailableB1 || !cuAbove->hasEqualMotion(abovePartIdx, *cuAboveRight, aboveRightPartIdx)))
1541
0
    {
1542
        // get Inter Dir
1543
0
        candDir[count] = cuAboveRight->m_interDir[aboveRightPartIdx];
1544
        // get Mv from Left
1545
0
        cuAboveRight->getMvField(cuAboveRight, aboveRightPartIdx, 0, candMvField[count][0]);
1546
0
        if (isInterB)
1547
0
            cuAboveRight->getMvField(cuAboveRight, aboveRightPartIdx, 1, candMvField[count][1]);
1548
1549
0
        if (++count == maxNumMergeCand)
1550
0
            return maxNumMergeCand;
1551
0
    }
1552
1553
    // left bottom
1554
0
    uint32_t leftBottomPartIdx = 0;
1555
0
    const CUData* cuLeftBottom = this->getPUBelowLeft(leftBottomPartIdx, partIdxLB);
1556
0
    bool isAvailableA0 = cuLeftBottom &&
1557
0
        cuLeftBottom->isDiffMER(xP - 1, yP + nPSH, xP, yP) &&
1558
0
        cuLeftBottom->isInter(leftBottomPartIdx);
1559
0
    if (isAvailableA0 && (!isAvailableA1 || !cuLeft->hasEqualMotion(leftPartIdx, *cuLeftBottom, leftBottomPartIdx)))
1560
0
    {
1561
        // get Inter Dir
1562
0
        candDir[count] = cuLeftBottom->m_interDir[leftBottomPartIdx];
1563
        // get Mv from Left
1564
0
        cuLeftBottom->getMvField(cuLeftBottom, leftBottomPartIdx, 0, candMvField[count][0]);
1565
0
        if (isInterB)
1566
0
            cuLeftBottom->getMvField(cuLeftBottom, leftBottomPartIdx, 1, candMvField[count][1]);
1567
1568
0
        if (++count == maxNumMergeCand)
1569
0
            return maxNumMergeCand;
1570
0
    }
1571
1572
    // above left
1573
0
    if (count < 4)
1574
0
    {
1575
0
        uint32_t aboveLeftPartIdx = 0;
1576
0
        const CUData* cuAboveLeft = getPUAboveLeft(aboveLeftPartIdx, absPartAddr);
1577
0
        bool isAvailableB2 = cuAboveLeft &&
1578
0
            cuAboveLeft->isDiffMER(xP - 1, yP - 1, xP, yP) &&
1579
0
            cuAboveLeft->isInter(aboveLeftPartIdx);
1580
0
        if (isAvailableB2 && (!isAvailableA1 || !cuLeft->hasEqualMotion(leftPartIdx, *cuAboveLeft, aboveLeftPartIdx))
1581
0
            && (!isAvailableB1 || !cuAbove->hasEqualMotion(abovePartIdx, *cuAboveLeft, aboveLeftPartIdx)))
1582
0
        {
1583
            // get Inter Dir
1584
0
            candDir[count] = cuAboveLeft->m_interDir[aboveLeftPartIdx];
1585
            // get Mv from Left
1586
0
            cuAboveLeft->getMvField(cuAboveLeft, aboveLeftPartIdx, 0, candMvField[count][0]);
1587
0
            if (isInterB)
1588
0
                cuAboveLeft->getMvField(cuAboveLeft, aboveLeftPartIdx, 1, candMvField[count][1]);
1589
1590
0
            if (++count == maxNumMergeCand)
1591
0
                return maxNumMergeCand;
1592
0
        }
1593
0
    }
1594
0
    if (m_slice->m_sps->bTemporalMVPEnabled)
1595
0
    {
1596
0
        uint32_t partIdxRB = deriveRightBottomIdx(puIdx);
1597
0
        MV colmv;
1598
0
        int ctuIdx = -1;
1599
1600
        // image boundary check
1601
0
        if (m_encData->getPicCTU(m_cuAddr)->m_cuPelX + g_zscanToPelX[partIdxRB] + UNIT_SIZE < m_slice->m_sps->picWidthInLumaSamples &&
1602
0
            m_encData->getPicCTU(m_cuAddr)->m_cuPelY + g_zscanToPelY[partIdxRB] + UNIT_SIZE < m_slice->m_sps->picHeightInLumaSamples)
1603
0
        {
1604
0
            uint32_t absPartIdxRB = g_zscanToRaster[partIdxRB];
1605
0
            uint32_t numUnits = s_numPartInCUSize;
1606
0
            bool bNotLastCol = lessThanCol(absPartIdxRB, numUnits - 1); // is not at the last column of CTU
1607
0
            bool bNotLastRow = lessThanRow(absPartIdxRB, numUnits - 1); // is not at the last row    of CTU
1608
1609
0
            if (bNotLastCol && bNotLastRow)
1610
0
            {
1611
0
                absPartAddr = g_rasterToZscan[absPartIdxRB + RASTER_SIZE + 1];
1612
0
                ctuIdx = m_cuAddr;
1613
0
            }
1614
0
            else if (bNotLastCol)
1615
0
                absPartAddr = g_rasterToZscan[(absPartIdxRB + 1) & (numUnits - 1)];
1616
0
            else if (bNotLastRow)
1617
0
            {
1618
0
                absPartAddr = g_rasterToZscan[absPartIdxRB + RASTER_SIZE - numUnits + 1];
1619
0
                ctuIdx = m_cuAddr + 1;
1620
0
            }
1621
0
            else // is the right bottom corner of CTU
1622
0
                absPartAddr = 0;
1623
0
        }
1624
1625
0
        int maxList = isInterB ? 2 : 1;
1626
0
        int dir = 0, refIdx = 0;
1627
0
        for (int list = 0; list < maxList; list++)
1628
0
        {
1629
0
            bool bExistMV = ctuIdx >= 0 && getColMVP(colmv, refIdx, list, ctuIdx, absPartAddr);
1630
0
            if (!bExistMV)
1631
0
            {
1632
0
                uint32_t partIdxCenter = deriveCenterIdx(puIdx);
1633
0
                bExistMV = getColMVP(colmv, refIdx, list, m_cuAddr, partIdxCenter);
1634
0
            }
1635
0
            if (bExistMV)
1636
0
            {
1637
0
                dir |= (1 << list);
1638
0
                candMvField[count][list].mv = colmv;
1639
0
                candMvField[count][list].refIdx = refIdx;
1640
0
            }
1641
0
        }
1642
1643
0
        if (dir != 0)
1644
0
        {
1645
0
            candDir[count] = (uint8_t)dir;
1646
1647
0
            if (++count == maxNumMergeCand)
1648
0
                return maxNumMergeCand;
1649
0
        }
1650
0
    }
1651
1652
0
    if (isInterB)
1653
0
    {
1654
0
        const uint32_t cutoff = count * (count - 1);
1655
0
        uint32_t priorityList0 = 0xEDC984; // { 0, 1, 0, 2, 1, 2, 0, 3, 1, 3, 2, 3 }
1656
0
        uint32_t priorityList1 = 0xB73621; // { 1, 0, 2, 0, 2, 1, 3, 0, 3, 1, 3, 2 }
1657
1658
0
        for (uint32_t idx = 0; idx < cutoff; idx++, priorityList0 >>= 2, priorityList1 >>= 2)
1659
0
        {
1660
0
            int i = priorityList0 & 3;
1661
0
            int j = priorityList1 & 3;
1662
1663
0
            if ((candDir[i] & 0x1) && (candDir[j] & 0x2))
1664
0
            {
1665
                // get Mv from cand[i] and cand[j]
1666
0
                int refIdxL0 = candMvField[i][0].refIdx;
1667
0
                int refIdxL1 = candMvField[j][1].refIdx;
1668
0
                int refPOCL0 = m_slice->m_refPOCList[0][refIdxL0];
1669
0
                int refPOCL1 = m_slice->m_refPOCList[1][refIdxL1];
1670
0
                if (!(refPOCL0 == refPOCL1 && candMvField[i][0].mv == candMvField[j][1].mv))
1671
0
                {
1672
0
                    candMvField[count][0].mv = candMvField[i][0].mv;
1673
0
                    candMvField[count][0].refIdx = refIdxL0;
1674
0
                    candMvField[count][1].mv = candMvField[j][1].mv;
1675
0
                    candMvField[count][1].refIdx = refIdxL1;
1676
0
                    candDir[count] = 3;
1677
1678
0
                    if (++count == maxNumMergeCand)
1679
0
                        return maxNumMergeCand;
1680
0
                }
1681
0
            }
1682
0
        }
1683
0
    }
1684
0
    int numRefIdx = (isInterB) ? X265_MIN(m_slice->m_numRefIdx[0], m_slice->m_numRefIdx[1]) : m_slice->m_numRefIdx[0];
1685
0
    int r = 0;
1686
0
    int refcnt = 0;
1687
0
    while (count < maxNumMergeCand)
1688
0
    {
1689
0
        candDir[count] = 1;
1690
0
        candMvField[count][0].mv.word = 0;
1691
0
        candMvField[count][0].refIdx = r;
1692
1693
0
        if (isInterB)
1694
0
        {
1695
0
            candDir[count] = 3;
1696
0
            candMvField[count][1].mv.word = 0;
1697
0
            candMvField[count][1].refIdx = r;
1698
0
        }
1699
1700
0
        count++;
1701
1702
0
        if (refcnt == numRefIdx - 1)
1703
0
            r = 0;
1704
0
        else
1705
0
        {
1706
0
            ++r;
1707
0
            ++refcnt;
1708
0
        }
1709
0
    }
1710
1711
0
    return count;
1712
0
}
1713
1714
// Create the PMV list. Called for each reference index.
1715
int CUData::getPMV(InterNeighbourMV *neighbours, uint32_t picList, uint32_t refIdx, MV* amvpCand, MV* pmv) const
1716
0
{
1717
0
    MV directMV[MD_ABOVE_LEFT + 1];
1718
0
    MV indirectMV[MD_ABOVE_LEFT + 1];
1719
0
    bool validDirect[MD_ABOVE_LEFT + 1];
1720
0
    bool validIndirect[MD_ABOVE_LEFT + 1];
1721
1722
    // Left candidate.
1723
0
    validDirect[MD_BELOW_LEFT]  = getDirectPMV(directMV[MD_BELOW_LEFT], neighbours + MD_BELOW_LEFT, picList, refIdx);
1724
0
    validDirect[MD_LEFT]        = getDirectPMV(directMV[MD_LEFT], neighbours + MD_LEFT, picList, refIdx);
1725
    // Top candidate.
1726
0
    validDirect[MD_ABOVE_RIGHT] = getDirectPMV(directMV[MD_ABOVE_RIGHT], neighbours + MD_ABOVE_RIGHT, picList, refIdx);
1727
0
    validDirect[MD_ABOVE]       = getDirectPMV(directMV[MD_ABOVE], neighbours + MD_ABOVE, picList, refIdx);
1728
0
    validDirect[MD_ABOVE_LEFT]  = getDirectPMV(directMV[MD_ABOVE_LEFT], neighbours + MD_ABOVE_LEFT, picList, refIdx);
1729
1730
    // Left candidate.
1731
0
    validIndirect[MD_BELOW_LEFT]  = getIndirectPMV(indirectMV[MD_BELOW_LEFT], neighbours + MD_BELOW_LEFT, picList, refIdx);
1732
0
    validIndirect[MD_LEFT]        = getIndirectPMV(indirectMV[MD_LEFT], neighbours + MD_LEFT, picList, refIdx);
1733
    // Top candidate.
1734
0
    validIndirect[MD_ABOVE_RIGHT] = getIndirectPMV(indirectMV[MD_ABOVE_RIGHT], neighbours + MD_ABOVE_RIGHT, picList, refIdx);
1735
0
    validIndirect[MD_ABOVE]       = getIndirectPMV(indirectMV[MD_ABOVE], neighbours + MD_ABOVE, picList, refIdx);
1736
0
    validIndirect[MD_ABOVE_LEFT]  = getIndirectPMV(indirectMV[MD_ABOVE_LEFT], neighbours + MD_ABOVE_LEFT, picList, refIdx);
1737
1738
0
    int num = 0;
1739
    // Left predictor search
1740
0
    if (validDirect[MD_BELOW_LEFT])
1741
0
        amvpCand[num++] = directMV[MD_BELOW_LEFT];
1742
0
    else if (validDirect[MD_LEFT])
1743
0
        amvpCand[num++] = directMV[MD_LEFT];
1744
0
    else if (validIndirect[MD_BELOW_LEFT])
1745
0
        amvpCand[num++] = indirectMV[MD_BELOW_LEFT];
1746
0
    else if (validIndirect[MD_LEFT])
1747
0
        amvpCand[num++] = indirectMV[MD_LEFT];
1748
1749
0
    bool bAddedSmvp = num > 0;
1750
1751
    // Above predictor search
1752
0
    if (validDirect[MD_ABOVE_RIGHT])
1753
0
        amvpCand[num++] = directMV[MD_ABOVE_RIGHT];
1754
0
    else if (validDirect[MD_ABOVE])
1755
0
        amvpCand[num++] = directMV[MD_ABOVE];
1756
0
    else if (validDirect[MD_ABOVE_LEFT])
1757
0
        amvpCand[num++] = directMV[MD_ABOVE_LEFT];
1758
1759
0
    if (!bAddedSmvp)
1760
0
    {
1761
0
        if (validIndirect[MD_ABOVE_RIGHT])
1762
0
            amvpCand[num++] = indirectMV[MD_ABOVE_RIGHT];
1763
0
        else if (validIndirect[MD_ABOVE])
1764
0
            amvpCand[num++] = indirectMV[MD_ABOVE];
1765
0
        else if (validIndirect[MD_ABOVE_LEFT])
1766
0
            amvpCand[num++] = indirectMV[MD_ABOVE_LEFT];
1767
0
    }
1768
1769
0
    int numMvc = 0;
1770
0
    for (int dir = MD_LEFT; dir <= MD_ABOVE_LEFT; dir++)
1771
0
    {
1772
0
        if (validDirect[dir] && directMV[dir].notZero())
1773
0
            pmv[numMvc++] = directMV[dir];
1774
1775
0
        if (validIndirect[dir] && indirectMV[dir].notZero())
1776
0
            pmv[numMvc++] = indirectMV[dir];
1777
0
    }
1778
1779
0
    if (num == 2)
1780
0
        num -= amvpCand[0] == amvpCand[1];
1781
1782
    // Get the collocated candidate. At this step, either the first candidate
1783
    // was found or its value is 0.
1784
0
    if (m_slice->m_sps->bTemporalMVPEnabled && num < 2)
1785
0
    {
1786
0
        int tempRefIdx = neighbours[MD_COLLOCATED].refIdx[picList];
1787
0
        if (tempRefIdx != -1)
1788
0
        {
1789
0
            uint32_t cuAddr = neighbours[MD_COLLOCATED].cuAddr[picList];
1790
0
            const Frame* colPic = m_slice->m_refFrameList[m_slice->isInterB() && !m_slice->m_colFromL0Flag][m_slice->m_colRefIdx];
1791
0
            const CUData* colCU = colPic->m_encData->getPicCTU(cuAddr);
1792
1793
            // Scale the vector
1794
0
            int colRefPOC = colCU->m_slice->m_refPOCList[tempRefIdx >> 4][tempRefIdx & 0xf];
1795
0
            int colPOC = colCU->m_slice->m_poc;
1796
1797
0
            int curRefPOC = m_slice->m_refPOCList[picList][refIdx];
1798
0
            int curPOC = m_slice->m_poc;
1799
0
            pmv[numMvc++] = amvpCand[num++] = scaleMvByPOCDist(neighbours[MD_COLLOCATED].mv[picList], curPOC, curRefPOC, colPOC, colRefPOC);
1800
0
        }
1801
0
    }
1802
1803
0
    while (num < AMVP_NUM_CANDS)
1804
0
        amvpCand[num++] = 0;
1805
1806
0
    return numMvc;
1807
0
}
1808
1809
/* Constructs a list of candidates for AMVP, and a larger list of motion candidates */
1810
void CUData::getNeighbourMV(uint32_t puIdx, uint32_t absPartIdx, InterNeighbourMV* neighbours) const
1811
0
{
1812
    // Set the temporal neighbour to unavailable by default.
1813
0
    neighbours[MD_COLLOCATED].unifiedRef = -1;
1814
1815
0
    uint32_t partIdxLT, partIdxRT, partIdxLB = deriveLeftBottomIdx(puIdx);
1816
0
    deriveLeftRightTopIdx(puIdx, partIdxLT, partIdxRT);
1817
1818
    // Load the spatial MVs.
1819
0
    getInterNeighbourMV(neighbours + MD_BELOW_LEFT, partIdxLB, MD_BELOW_LEFT);
1820
0
    getInterNeighbourMV(neighbours + MD_LEFT,       partIdxLB, MD_LEFT);
1821
0
    getInterNeighbourMV(neighbours + MD_ABOVE_RIGHT,partIdxRT, MD_ABOVE_RIGHT);
1822
0
    getInterNeighbourMV(neighbours + MD_ABOVE,      partIdxRT, MD_ABOVE);
1823
0
    getInterNeighbourMV(neighbours + MD_ABOVE_LEFT, partIdxLT, MD_ABOVE_LEFT);
1824
1825
0
    if (m_slice->m_sps->bTemporalMVPEnabled)
1826
0
    {
1827
0
        uint32_t absPartAddr = m_absIdxInCTU + absPartIdx;
1828
0
        uint32_t partIdxRB = deriveRightBottomIdx(puIdx);
1829
1830
        // co-located RightBottom temporal predictor (H)
1831
0
        int ctuIdx = -1;
1832
1833
        // image boundary check
1834
0
        if (m_encData->getPicCTU(m_cuAddr)->m_cuPelX + g_zscanToPelX[partIdxRB] + UNIT_SIZE < m_slice->m_sps->picWidthInLumaSamples &&
1835
0
            m_encData->getPicCTU(m_cuAddr)->m_cuPelY + g_zscanToPelY[partIdxRB] + UNIT_SIZE < m_slice->m_sps->picHeightInLumaSamples)
1836
0
        {
1837
0
            uint32_t absPartIdxRB = g_zscanToRaster[partIdxRB];
1838
0
            uint32_t numUnits = s_numPartInCUSize;
1839
0
            bool bNotLastCol = lessThanCol(absPartIdxRB, numUnits - 1); // is not at the last column of CTU
1840
0
            bool bNotLastRow = lessThanRow(absPartIdxRB, numUnits - 1); // is not at the last row    of CTU
1841
1842
0
            if (bNotLastCol && bNotLastRow)
1843
0
            {
1844
0
                absPartAddr = g_rasterToZscan[absPartIdxRB + RASTER_SIZE + 1];
1845
0
                ctuIdx = m_cuAddr;
1846
0
            }
1847
0
            else if (bNotLastCol)
1848
0
                absPartAddr = g_rasterToZscan[(absPartIdxRB + 1) & (numUnits - 1)];
1849
0
            else if (bNotLastRow)
1850
0
            {
1851
0
                absPartAddr = g_rasterToZscan[absPartIdxRB + RASTER_SIZE - numUnits + 1];
1852
0
                ctuIdx = m_cuAddr + 1;
1853
0
            }
1854
0
            else // is the right bottom corner of CTU
1855
0
                absPartAddr = 0;
1856
0
        }
1857
1858
0
        if (!(ctuIdx >= 0 && getCollocatedMV(ctuIdx, absPartAddr, neighbours + MD_COLLOCATED)))
1859
0
        {
1860
0
            uint32_t partIdxCenter =  deriveCenterIdx(puIdx);
1861
0
            uint32_t curCTUIdx = m_cuAddr;
1862
0
            getCollocatedMV(curCTUIdx, partIdxCenter, neighbours + MD_COLLOCATED);
1863
0
        }
1864
0
    }
1865
0
}
1866
1867
void CUData::getInterNeighbourMV(InterNeighbourMV *neighbour, uint32_t partUnitIdx, MVP_DIR dir) const
1868
0
{
1869
0
    const CUData* tmpCU = NULL;
1870
0
    uint32_t idx = 0;
1871
1872
0
    switch (dir)
1873
0
    {
1874
0
    case MD_LEFT:
1875
0
        tmpCU = getPULeft(idx, partUnitIdx);
1876
0
        break;
1877
0
    case MD_ABOVE:
1878
0
        tmpCU = getPUAbove(idx, partUnitIdx);
1879
0
        break;
1880
0
    case MD_ABOVE_RIGHT:
1881
0
        tmpCU = getPUAboveRight(idx, partUnitIdx);
1882
0
        break;
1883
0
    case MD_BELOW_LEFT:
1884
0
        tmpCU = getPUBelowLeft(idx, partUnitIdx);
1885
0
        break;
1886
0
    case MD_ABOVE_LEFT:
1887
0
        tmpCU = getPUAboveLeft(idx, partUnitIdx);
1888
0
        break;
1889
0
    default:
1890
0
        break;
1891
0
    }
1892
1893
0
    if (!tmpCU)
1894
0
    {
1895
        // Mark the PMV as unavailable.
1896
0
        for (int i = 0; i < 2; i++)
1897
0
            neighbour->refIdx[i] = -1;
1898
0
        return;
1899
0
    }
1900
1901
0
    for (int i = 0; i < 2; i++)
1902
0
    {
1903
        // Get the MV.
1904
0
        neighbour->mv[i] = tmpCU->m_mv[i][idx];
1905
1906
        // Get the reference idx.
1907
0
        neighbour->refIdx[i] = tmpCU->m_refIdx[i][idx];
1908
0
    }
1909
0
}
1910
1911
/* Clip motion vector to within slightly padded boundary of picture (the
1912
 * MV may reference a block that is completely within the padded area).
1913
 * Note this function is unaware of how much of this picture is actually
1914
 * available for use (re: frame parallelism) */
1915
void CUData::clipMv(MV& outMV) const
1916
0
{
1917
0
    const uint32_t mvshift = 2;
1918
0
    uint32_t offset = 8;
1919
1920
0
    int32_t xmax = (int32_t)((m_slice->m_sps->picWidthInLumaSamples + offset - m_cuPelX - 1) << mvshift);
1921
0
    int32_t xmin = -(int32_t)((m_encData->m_param->maxCUSize + offset + m_cuPelX - 1) << mvshift);
1922
1923
0
    int32_t ymax = (int32_t)((m_slice->m_sps->picHeightInLumaSamples + offset - m_cuPelY - 1) << mvshift);
1924
0
    int32_t ymin = -(int32_t)((m_encData->m_param->maxCUSize + offset + m_cuPelY - 1) << mvshift);
1925
1926
0
    outMV.x = X265_MIN(xmax, X265_MAX(xmin, outMV.x));
1927
0
    outMV.y = X265_MIN(ymax, X265_MAX(ymin, outMV.y));
1928
0
}
1929
1930
// Load direct spatial MV if available.
1931
bool CUData::getDirectPMV(MV& pmv, InterNeighbourMV *neighbours, uint32_t picList, uint32_t refIdx) const
1932
0
{
1933
0
    int curRefPOC = m_slice->m_refPOCList[picList][refIdx];
1934
0
    for (int i = 0; i < 2; i++, picList = !picList)
1935
0
    {
1936
0
        int partRefIdx = neighbours->refIdx[picList];
1937
0
        if (partRefIdx >= 0 && curRefPOC == m_slice->m_refPOCList[picList][partRefIdx])
1938
0
        {
1939
0
            pmv = neighbours->mv[picList];
1940
0
            return true;
1941
0
        }
1942
0
    }
1943
0
    return false;
1944
0
}
1945
1946
// Load indirect spatial MV if available. An indirect MV has to be scaled.
1947
bool CUData::getIndirectPMV(MV& outMV, InterNeighbourMV *neighbours, uint32_t picList, uint32_t refIdx) const
1948
0
{
1949
0
    int curPOC = m_slice->m_poc;
1950
0
    int neibPOC = curPOC;
1951
0
    int curRefPOC = m_slice->m_refPOCList[picList][refIdx];
1952
1953
0
    for (int i = 0; i < 2; i++, picList = !picList)
1954
0
    {
1955
0
        int partRefIdx = neighbours->refIdx[picList];
1956
0
        if (partRefIdx >= 0)
1957
0
        {
1958
0
            int neibRefPOC = m_slice->m_refPOCList[picList][partRefIdx];
1959
0
            MV mvp = neighbours->mv[picList];
1960
1961
0
            outMV = scaleMvByPOCDist(mvp, curPOC, curRefPOC, neibPOC, neibRefPOC);
1962
0
            return true;
1963
0
        }
1964
0
    }
1965
0
    return false;
1966
0
}
1967
1968
bool CUData::getColMVP(MV& outMV, int& outRefIdx, int picList, int cuAddr, int partUnitIdx) const
1969
0
{
1970
0
    const Frame* colPic = m_slice->m_refFrameList[m_slice->isInterB() && !m_slice->m_colFromL0Flag][m_slice->m_colRefIdx];
1971
0
    const CUData* colCU = colPic->m_encData->getPicCTU(cuAddr);
1972
1973
0
    uint32_t absPartAddr = partUnitIdx & TMVP_UNIT_MASK;
1974
0
    if (colCU->m_predMode[partUnitIdx] == MODE_NONE || colCU->isIntra(absPartAddr))
1975
0
        return false;
1976
1977
0
    int colRefPicList = m_slice->m_bCheckLDC ? picList : m_slice->m_colFromL0Flag;
1978
1979
0
    int colRefIdx = colCU->m_refIdx[colRefPicList][absPartAddr];
1980
1981
0
    if (colRefIdx < 0)
1982
0
    {
1983
0
        colRefPicList = !colRefPicList;
1984
0
        colRefIdx = colCU->m_refIdx[colRefPicList][absPartAddr];
1985
1986
0
        if (colRefIdx < 0)
1987
0
            return false;
1988
0
    }
1989
1990
    // Scale the vector
1991
0
    int colRefPOC = colCU->m_slice->m_refPOCList[colRefPicList][colRefIdx];
1992
0
    int colPOC = colCU->m_slice->m_poc;
1993
0
    MV colmv = colCU->m_mv[colRefPicList][absPartAddr];
1994
1995
0
    int curRefPOC = m_slice->m_refPOCList[picList][outRefIdx];
1996
0
    int curPOC = m_slice->m_poc;
1997
1998
0
    outMV = scaleMvByPOCDist(colmv, curPOC, curRefPOC, colPOC, colRefPOC);
1999
0
    return true;
2000
0
}
2001
2002
// Cache the collocated MV.
2003
bool CUData::getCollocatedMV(int cuAddr, int partUnitIdx, InterNeighbourMV *neighbour) const
2004
0
{
2005
0
    const Frame* colPic = m_slice->m_refFrameList[m_slice->isInterB() && !m_slice->m_colFromL0Flag][m_slice->m_colRefIdx];
2006
0
    const CUData* colCU = colPic->m_encData->getPicCTU(cuAddr);
2007
2008
0
    uint32_t absPartAddr = partUnitIdx & TMVP_UNIT_MASK;
2009
0
    if (colCU->m_predMode[partUnitIdx] == MODE_NONE || colCU->isIntra(absPartAddr))
2010
0
        return false;
2011
2012
0
    for (int list = 0; list < 2; list++)
2013
0
    {
2014
0
        neighbour->cuAddr[list] = cuAddr;
2015
0
        int colRefPicList = m_slice->m_bCheckLDC ? list : m_slice->m_colFromL0Flag;
2016
0
        int colRefIdx = colCU->m_refIdx[colRefPicList][absPartAddr];
2017
2018
0
        if (colRefIdx < 0)
2019
0
            colRefPicList = !colRefPicList;
2020
2021
0
        neighbour->refIdx[list] = colCU->m_refIdx[colRefPicList][absPartAddr];
2022
0
        neighbour->refIdx[list] |= colRefPicList << 4;
2023
2024
0
        neighbour->mv[list] = colCU->m_mv[colRefPicList][absPartAddr];
2025
0
    }
2026
2027
0
    return neighbour->unifiedRef != -1;
2028
0
}
2029
2030
MV CUData::scaleMvByPOCDist(const MV& inMV, int curPOC, int curRefPOC, int colPOC, int colRefPOC) const
2031
0
{
2032
0
    int diffPocD = colPOC - colRefPOC;
2033
0
    int diffPocB = curPOC - curRefPOC;
2034
2035
0
    if (diffPocD == diffPocB)
2036
0
        return inMV;
2037
0
    else
2038
0
    {
2039
0
        int tdb   = x265_clip3(-128, 127, diffPocB);
2040
0
        int tdd   = x265_clip3(-128, 127, diffPocD);
2041
0
        int x     = (0x4000 + abs(tdd / 2)) / tdd;
2042
0
        int scale = x265_clip3(-4096, 4095, (tdb * x + 32) >> 6);
2043
0
        return scaleMv(inMV, scale);
2044
0
    }
2045
0
}
2046
2047
uint32_t CUData::deriveCenterIdx(uint32_t puIdx) const
2048
0
{
2049
0
    uint32_t absPartIdx;
2050
0
    int puWidth, puHeight;
2051
2052
0
    getPartIndexAndSize(puIdx, absPartIdx, puWidth, puHeight);
2053
2054
0
    return g_rasterToZscan[g_zscanToRaster[m_absIdxInCTU + absPartIdx]
2055
0
                           + ((puHeight >> (LOG2_UNIT_SIZE + 1)) << LOG2_RASTER_SIZE)
2056
0
                           + (puWidth  >> (LOG2_UNIT_SIZE + 1))];
2057
0
}
2058
2059
void CUData::getTUEntropyCodingParameters(TUEntropyCodingParameters &result, uint32_t absPartIdx, uint32_t log2TrSize, bool bIsLuma) const
2060
117k
{
2061
117k
    bool bIsIntra = isIntra(absPartIdx);
2062
2063
    // set the group layout
2064
117k
    const uint32_t log2TrSizeCG = log2TrSize - 2;
2065
2066
    // set the scan orders
2067
117k
    if (bIsIntra)
2068
117k
    {
2069
117k
        uint32_t dirMode;
2070
2071
117k
        if (bIsLuma)
2072
51.7k
            dirMode = m_lumaIntraDir[absPartIdx];
2073
65.8k
        else
2074
65.8k
        {
2075
65.8k
            dirMode = m_chromaIntraDir[absPartIdx];
2076
65.8k
            if (dirMode == DM_CHROMA_IDX)
2077
16.3k
            {
2078
16.3k
                dirMode = m_lumaIntraDir[(m_chromaFormat == X265_CSP_I444) ? absPartIdx : absPartIdx & 0xFC];
2079
16.3k
                dirMode = (m_chromaFormat == X265_CSP_I422) ? g_chroma422IntraAngleMappingTable[dirMode] : dirMode;
2080
16.3k
            }
2081
65.8k
        }
2082
2083
117k
        if (log2TrSize <= (MDCS_LOG2_MAX_SIZE - m_hChromaShift) || (bIsLuma && log2TrSize == MDCS_LOG2_MAX_SIZE))
2084
59.9k
            result.scanType = dirMode >= 22 && dirMode <= 30 ? SCAN_HOR : dirMode >= 6 && dirMode <= 14 ? SCAN_VER : SCAN_DIAG;
2085
57.6k
        else
2086
57.6k
            result.scanType = SCAN_DIAG;
2087
117k
    }
2088
18.4E
    else
2089
18.4E
        result.scanType = SCAN_DIAG;
2090
2091
117k
    result.scan     = g_scanOrder[result.scanType][log2TrSize - 2];
2092
117k
    result.scanCG   = g_scanOrderCG[result.scanType][log2TrSizeCG];
2093
2094
117k
    if (log2TrSize == 2)
2095
47.1k
        result.firstSignificanceMapContext = 0;
2096
70.4k
    else if (log2TrSize == 3)
2097
36.9k
        result.firstSignificanceMapContext = (result.scanType != SCAN_DIAG && bIsLuma) ? 15 : 9;
2098
33.4k
    else
2099
33.4k
        result.firstSignificanceMapContext = bIsLuma ? 21 : 12;
2100
117k
}
2101
2102
433k
#define CU_SET_FLAG(bitfield, flag, value) (bitfield) = ((bitfield) & (~(flag))) | ((~((value) - 1)) & (flag))
2103
2104
void CUData::calcCTUGeoms(uint32_t ctuWidth, uint32_t ctuHeight, uint32_t maxCUSize, uint32_t minCUSize, CUGeom cuDataArray[CUGeom::MAX_GEOMS])
2105
1.98k
{
2106
1.98k
    uint32_t num4x4Partition = (1U << ((g_log2Size[maxCUSize] - LOG2_UNIT_SIZE) << 1));
2107
2108
    // Initialize the coding blocks inside the CTB
2109
9.39k
    for (uint32_t log2CUSize = g_log2Size[maxCUSize], rangeCUIdx = 0; log2CUSize >= g_log2Size[minCUSize]; log2CUSize--)
2110
7.40k
    {
2111
7.40k
        uint32_t blockSize = 1 << log2CUSize;
2112
7.40k
        uint32_t sbWidth   = 1 << (g_log2Size[maxCUSize] - log2CUSize);
2113
7.40k
        int32_t lastLevelFlag = log2CUSize == g_log2Size[minCUSize];
2114
2115
33.7k
        for (uint32_t sbY = 0; sbY < sbWidth; sbY++)
2116
26.3k
        {
2117
170k
            for (uint32_t sbX = 0; sbX < sbWidth; sbX++)
2118
144k
            {
2119
144k
                uint32_t depthIdx = g_depthScanIdx[sbY][sbX];
2120
144k
                uint32_t cuIdx = rangeCUIdx + depthIdx;
2121
144k
                uint32_t childIdx = rangeCUIdx + sbWidth * sbWidth + (depthIdx << 2);
2122
144k
                uint32_t px = sbX * blockSize;
2123
144k
                uint32_t py = sbY * blockSize;
2124
144k
                int32_t presentFlag = px < ctuWidth && py < ctuHeight;
2125
144k
                int32_t splitMandatoryFlag = presentFlag && !lastLevelFlag && (px + blockSize > ctuWidth || py + blockSize > ctuHeight);
2126
                
2127
                /* Offset of the luma CU in the X, Y direction in terms of pixels from the CTU origin */
2128
144k
                uint32_t xOffset = (sbX * blockSize) >> 3;
2129
144k
                uint32_t yOffset = (sbY * blockSize) >> 3;
2130
144k
                X265_CHECK(cuIdx < CUGeom::MAX_GEOMS, "CU geom index bug\n");
2131
2132
144k
                CUGeom *cu = cuDataArray + cuIdx;
2133
144k
                cu->log2CUSize = log2CUSize;
2134
144k
                cu->childOffset = childIdx - cuIdx;
2135
144k
                cu->absPartIdx = g_depthScanIdx[yOffset][xOffset] * 4;
2136
144k
                cu->numPartitions = (num4x4Partition >> ((g_log2Size[maxCUSize] - cu->log2CUSize) * 2));
2137
144k
                cu->depth = g_log2Size[maxCUSize] - log2CUSize;
2138
144k
                cu->geomRecurId = cuIdx;
2139
2140
144k
                cu->flags = 0;
2141
144k
                CU_SET_FLAG(cu->flags, CUGeom::PRESENT, presentFlag);
2142
144k
                CU_SET_FLAG(cu->flags, CUGeom::SPLIT_MANDATORY | CUGeom::SPLIT, splitMandatoryFlag);
2143
144k
                CU_SET_FLAG(cu->flags, CUGeom::LEAF, lastLevelFlag);
2144
144k
            }
2145
26.3k
        }
2146
7.40k
        rangeCUIdx += sbWidth * sbWidth;
2147
7.40k
    }
2148
1.98k
}