/src/x265/source/common/cudata.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | /***************************************************************************** |
2 | | * Copyright (C) 2013-2020 MulticoreWare, Inc |
3 | | * |
4 | | * Authors: Steve Borho <steve@borho.org> |
5 | | * Min Chen <chenm003@163.com> |
6 | | * |
7 | | * This program is free software; you can redistribute it and/or modify |
8 | | * it under the terms of the GNU General Public License as published by |
9 | | * the Free Software Foundation; either version 2 of the License, or |
10 | | * (at your option) any later version. |
11 | | * |
12 | | * This program is distributed in the hope that it will be useful, |
13 | | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
14 | | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
15 | | * GNU General Public License for more details. |
16 | | * |
17 | | * You should have received a copy of the GNU General Public License |
18 | | * along with this program; if not, write to the Free Software |
19 | | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA. |
20 | | * |
21 | | * This program is also available under a commercial proprietary license. |
22 | | * For more information, contact us at license @ x265.com. |
23 | | *****************************************************************************/ |
24 | | |
25 | | #include "common.h" |
26 | | #include "frame.h" |
27 | | #include "framedata.h" |
28 | | #include "picyuv.h" |
29 | | #include "mv.h" |
30 | | #include "cudata.h" |
31 | | #define MAX_MV 1 << 14 |
32 | | |
33 | | using namespace X265_NS; |
34 | | |
35 | | /* for all bcast* and copy* functions, dst and src are aligned to MIN(size, 32) */ |
36 | | |
37 | 0 | static void bcast1(uint8_t* dst, uint8_t val) { dst[0] = val; } |
38 | | |
39 | 0 | static void copy4(uint8_t* dst, uint8_t* src) { ((uint32_t*)dst)[0] = ((uint32_t*)src)[0]; } |
40 | 0 | static void bcast4(uint8_t* dst, uint8_t val) { ((uint32_t*)dst)[0] = 0x01010101u * val; } |
41 | | |
42 | 0 | static void copy16(uint8_t* dst, uint8_t* src) { ((uint64_t*)dst)[0] = ((uint64_t*)src)[0]; ((uint64_t*)dst)[1] = ((uint64_t*)src)[1]; } |
43 | 0 | static void bcast16(uint8_t* dst, uint8_t val) { uint64_t bval = 0x0101010101010101ULL * val; ((uint64_t*)dst)[0] = bval; ((uint64_t*)dst)[1] = bval; } |
44 | | |
45 | 0 | static void copy64(uint8_t* dst, uint8_t* src) { ((uint64_t*)dst)[0] = ((uint64_t*)src)[0]; ((uint64_t*)dst)[1] = ((uint64_t*)src)[1]; |
46 | 0 | ((uint64_t*)dst)[2] = ((uint64_t*)src)[2]; ((uint64_t*)dst)[3] = ((uint64_t*)src)[3]; |
47 | 0 | ((uint64_t*)dst)[4] = ((uint64_t*)src)[4]; ((uint64_t*)dst)[5] = ((uint64_t*)src)[5]; |
48 | 0 | ((uint64_t*)dst)[6] = ((uint64_t*)src)[6]; ((uint64_t*)dst)[7] = ((uint64_t*)src)[7]; } |
49 | 0 | static void bcast64(uint8_t* dst, uint8_t val) { uint64_t bval = 0x0101010101010101ULL * val; |
50 | 0 | ((uint64_t*)dst)[0] = bval; ((uint64_t*)dst)[1] = bval; ((uint64_t*)dst)[2] = bval; ((uint64_t*)dst)[3] = bval; |
51 | 0 | ((uint64_t*)dst)[4] = bval; ((uint64_t*)dst)[5] = bval; ((uint64_t*)dst)[6] = bval; ((uint64_t*)dst)[7] = bval; } |
52 | | |
53 | | /* at 256 bytes, memset/memcpy will probably use SIMD more effectively than our uint64_t hack, |
54 | | * but hand-written assembly would beat it. */ |
55 | 0 | static void copy256(uint8_t* dst, uint8_t* src) { memcpy(dst, src, 256); } |
56 | 0 | static void bcast256(uint8_t* dst, uint8_t val) { memset(dst, val, 256); } |
57 | | |
58 | | namespace { |
59 | | // file private namespace |
60 | | |
61 | | /* Check whether 2 addresses point to the same column */ |
62 | | inline bool isEqualCol(int addrA, int addrB) |
63 | 0 | { |
64 | 0 | return ((addrA ^ addrB) & (RASTER_SIZE - 1)) == 0; |
65 | 0 | } |
66 | | |
67 | | /* Check whether 2 addresses point to the same row */ |
68 | | inline bool isEqualRow(int addrA, int addrB) |
69 | 0 | { |
70 | 0 | return ((addrA ^ addrB) < RASTER_SIZE); |
71 | 0 | } |
72 | | |
73 | | /* Check whether 2 addresses point to the same row or column */ |
74 | | inline bool isEqualRowOrCol(int addrA, int addrB) |
75 | 0 | { |
76 | 0 | return isEqualCol(addrA, addrB) || isEqualRow(addrA, addrB); |
77 | 0 | } |
78 | | |
79 | | /* Check whether one address points to the first column */ |
80 | | inline bool isZeroCol(int addr) |
81 | 0 | { |
82 | 0 | return (addr & (RASTER_SIZE - 1)) == 0; |
83 | 0 | } |
84 | | |
85 | | /* Check whether one address points to the first row */ |
86 | | inline bool isZeroRow(int addr) |
87 | 0 | { |
88 | 0 | return (addr < RASTER_SIZE); |
89 | 0 | } |
90 | | |
91 | | /* Check whether one address points to a column whose index is smaller than a given value */ |
92 | | inline bool lessThanCol(int addr, int val) |
93 | 0 | { |
94 | 0 | return (addr & (RASTER_SIZE - 1)) < val; |
95 | 0 | } |
96 | | |
97 | | /* Check whether one address points to a row whose index is smaller than a given value */ |
98 | | inline bool lessThanRow(int addr, int val) |
99 | 0 | { |
100 | | // addr / numUnits < val |
101 | 0 | return (addr >> LOG2_RASTER_SIZE) < val; |
102 | 0 | } |
103 | | |
104 | | inline MV scaleMv(MV mv, int scale) |
105 | 0 | { |
106 | 0 | int mvx = x265_clip3(-32768, 32767, (scale * mv.x + 127 + (scale * mv.x < 0)) >> 8); |
107 | 0 | int mvy = x265_clip3(-32768, 32767, (scale * mv.y + 127 + (scale * mv.y < 0)) >> 8); |
108 | |
|
109 | 0 | return MV((int32_t)mvx, (int32_t)mvy); |
110 | 0 | } |
111 | | |
112 | | } |
113 | | |
114 | | CUData::CUData() |
115 | 0 | { |
116 | 0 | memset(this, 0, sizeof(*this)); |
117 | 0 | } |
118 | | |
119 | | void CUData::initialize(const CUDataMemPool& dataPool, uint32_t depth, const x265_param& param, int instance) |
120 | 0 | { |
121 | 0 | int csp = param.internalCsp; |
122 | 0 | m_chromaFormat = csp; |
123 | 0 | m_hChromaShift = CHROMA_H_SHIFT(csp); |
124 | 0 | m_vChromaShift = CHROMA_V_SHIFT(csp); |
125 | 0 | m_numPartitions = param.num4x4Partitions >> (depth * 2); |
126 | |
|
127 | 0 | if (!s_partSet[0]) |
128 | 0 | { |
129 | 0 | s_numPartInCUSize = 1 << param.unitSizeDepth; |
130 | 0 | switch (param.maxLog2CUSize) |
131 | 0 | { |
132 | 0 | case 6: |
133 | 0 | s_partSet[0] = bcast256; |
134 | 0 | s_partSet[1] = bcast64; |
135 | 0 | s_partSet[2] = bcast16; |
136 | 0 | s_partSet[3] = bcast4; |
137 | 0 | s_partSet[4] = bcast1; |
138 | 0 | break; |
139 | 0 | case 5: |
140 | 0 | s_partSet[0] = bcast64; |
141 | 0 | s_partSet[1] = bcast16; |
142 | 0 | s_partSet[2] = bcast4; |
143 | 0 | s_partSet[3] = bcast1; |
144 | 0 | s_partSet[4] = NULL; |
145 | 0 | break; |
146 | 0 | case 4: |
147 | 0 | s_partSet[0] = bcast16; |
148 | 0 | s_partSet[1] = bcast4; |
149 | 0 | s_partSet[2] = bcast1; |
150 | 0 | s_partSet[3] = NULL; |
151 | 0 | s_partSet[4] = NULL; |
152 | 0 | break; |
153 | 0 | default: |
154 | 0 | X265_CHECK(0, "unexpected CTU size\n"); |
155 | 0 | break; |
156 | 0 | } |
157 | 0 | } |
158 | | |
159 | 0 | switch (m_numPartitions) |
160 | 0 | { |
161 | 0 | case 256: // 64x64 CU |
162 | 0 | m_partCopy = copy256; |
163 | 0 | m_partSet = bcast256; |
164 | 0 | m_subPartCopy = copy64; |
165 | 0 | m_subPartSet = bcast64; |
166 | 0 | break; |
167 | 0 | case 64: // 32x32 CU |
168 | 0 | m_partCopy = copy64; |
169 | 0 | m_partSet = bcast64; |
170 | 0 | m_subPartCopy = copy16; |
171 | 0 | m_subPartSet = bcast16; |
172 | 0 | break; |
173 | 0 | case 16: // 16x16 CU |
174 | 0 | m_partCopy = copy16; |
175 | 0 | m_partSet = bcast16; |
176 | 0 | m_subPartCopy = copy4; |
177 | 0 | m_subPartSet = bcast4; |
178 | 0 | break; |
179 | 0 | case 4: // 8x8 CU |
180 | 0 | m_partCopy = copy4; |
181 | 0 | m_partSet = bcast4; |
182 | 0 | m_subPartCopy = NULL; |
183 | 0 | m_subPartSet = NULL; |
184 | 0 | break; |
185 | 0 | default: |
186 | 0 | X265_CHECK(0, "unexpected CU partition count\n"); |
187 | 0 | break; |
188 | 0 | } |
189 | | |
190 | 0 | if (csp == X265_CSP_I400) |
191 | 0 | { |
192 | | /* Each CU's data is layed out sequentially within the charMemBlock */ |
193 | 0 | uint8_t *charBuf = dataPool.charMemBlock + (m_numPartitions * (BytesPerPartition - 4)) * instance; |
194 | |
|
195 | 0 | m_qp = (int8_t*)charBuf; charBuf += m_numPartitions; |
196 | 0 | m_qpAnalysis = (int8_t*)charBuf; charBuf += m_numPartitions; |
197 | 0 | m_log2CUSize = charBuf; charBuf += m_numPartitions; |
198 | 0 | m_lumaIntraDir = charBuf; charBuf += m_numPartitions; |
199 | 0 | m_tqBypass = charBuf; charBuf += m_numPartitions; |
200 | 0 | m_refIdx[0] = (int8_t*)charBuf; charBuf += m_numPartitions; |
201 | 0 | m_refIdx[1] = (int8_t*)charBuf; charBuf += m_numPartitions; |
202 | 0 | m_cuDepth = charBuf; charBuf += m_numPartitions; |
203 | 0 | m_predMode = charBuf; charBuf += m_numPartitions; /* the order up to here is important in initCTU() and initSubCU() */ |
204 | 0 | m_partSize = charBuf; charBuf += m_numPartitions; |
205 | 0 | m_skipFlag[0] = charBuf; charBuf += m_numPartitions; |
206 | 0 | m_skipFlag[1] = charBuf; charBuf += m_numPartitions; |
207 | 0 | m_mergeFlag = charBuf; charBuf += m_numPartitions; |
208 | 0 | m_interDir = charBuf; charBuf += m_numPartitions; |
209 | 0 | m_mvpIdx[0] = charBuf; charBuf += m_numPartitions; |
210 | 0 | m_mvpIdx[1] = charBuf; charBuf += m_numPartitions; |
211 | 0 | m_tuDepth = charBuf; charBuf += m_numPartitions; |
212 | 0 | m_transformSkip[0] = charBuf; charBuf += m_numPartitions; |
213 | 0 | m_cbf[0] = charBuf; charBuf += m_numPartitions; |
214 | 0 | m_chromaIntraDir = charBuf; charBuf += m_numPartitions; |
215 | |
|
216 | 0 | X265_CHECK(charBuf == dataPool.charMemBlock + (m_numPartitions * (BytesPerPartition - 4)) * (instance + 1), "CU data layout is broken\n"); //BytesPerPartition |
217 | |
|
218 | 0 | m_mv[0] = dataPool.mvMemBlock + (instance * 4) * m_numPartitions; |
219 | 0 | m_mv[1] = m_mv[0] + m_numPartitions; |
220 | 0 | m_mvd[0] = m_mv[1] + m_numPartitions; |
221 | 0 | m_mvd[1] = m_mvd[0] + m_numPartitions; |
222 | |
|
223 | 0 | m_distortion = dataPool.distortionMemBlock + instance * m_numPartitions; |
224 | |
|
225 | 0 | uint32_t cuSize = param.maxCUSize >> depth; |
226 | 0 | m_trCoeff[0] = dataPool.trCoeffMemBlock + instance * (cuSize * cuSize); |
227 | 0 | m_trCoeff[1] = m_trCoeff[2] = 0; |
228 | 0 | m_transformSkip[1] = m_transformSkip[2] = m_cbf[1] = m_cbf[2] = 0; |
229 | 0 | m_fAc_den[0] = m_fDc_den[0] = 0; |
230 | 0 | } |
231 | 0 | else |
232 | 0 | { |
233 | | /* Each CU's data is layed out sequentially within the charMemBlock */ |
234 | 0 | uint8_t *charBuf = dataPool.charMemBlock + (m_numPartitions * BytesPerPartition) * instance; |
235 | |
|
236 | 0 | m_qp = (int8_t*)charBuf; charBuf += m_numPartitions; |
237 | 0 | m_qpAnalysis = (int8_t*)charBuf; charBuf += m_numPartitions; |
238 | 0 | m_log2CUSize = charBuf; charBuf += m_numPartitions; |
239 | 0 | m_lumaIntraDir = charBuf; charBuf += m_numPartitions; |
240 | 0 | m_tqBypass = charBuf; charBuf += m_numPartitions; |
241 | 0 | m_refIdx[0] = (int8_t*)charBuf; charBuf += m_numPartitions; |
242 | 0 | m_refIdx[1] = (int8_t*)charBuf; charBuf += m_numPartitions; |
243 | 0 | m_cuDepth = charBuf; charBuf += m_numPartitions; |
244 | 0 | m_predMode = charBuf; charBuf += m_numPartitions; /* the order up to here is important in initCTU() and initSubCU() */ |
245 | 0 | m_partSize = charBuf; charBuf += m_numPartitions; |
246 | 0 | m_skipFlag[0] = charBuf; charBuf += m_numPartitions; |
247 | 0 | m_skipFlag[1] = charBuf; charBuf += m_numPartitions; |
248 | 0 | m_mergeFlag = charBuf; charBuf += m_numPartitions; |
249 | 0 | m_interDir = charBuf; charBuf += m_numPartitions; |
250 | 0 | m_mvpIdx[0] = charBuf; charBuf += m_numPartitions; |
251 | 0 | m_mvpIdx[1] = charBuf; charBuf += m_numPartitions; |
252 | 0 | m_tuDepth = charBuf; charBuf += m_numPartitions; |
253 | 0 | m_transformSkip[0] = charBuf; charBuf += m_numPartitions; |
254 | 0 | m_transformSkip[1] = charBuf; charBuf += m_numPartitions; |
255 | 0 | m_transformSkip[2] = charBuf; charBuf += m_numPartitions; |
256 | 0 | m_cbf[0] = charBuf; charBuf += m_numPartitions; |
257 | 0 | m_cbf[1] = charBuf; charBuf += m_numPartitions; |
258 | 0 | m_cbf[2] = charBuf; charBuf += m_numPartitions; |
259 | 0 | m_chromaIntraDir = charBuf; charBuf += m_numPartitions; |
260 | |
|
261 | 0 | X265_CHECK(charBuf == dataPool.charMemBlock + (m_numPartitions * BytesPerPartition) * (instance + 1), "CU data layout is broken\n"); |
262 | |
|
263 | 0 | m_mv[0] = dataPool.mvMemBlock + (instance * 4) * m_numPartitions; |
264 | 0 | m_mv[1] = m_mv[0] + m_numPartitions; |
265 | 0 | m_mvd[0] = m_mv[1] + m_numPartitions; |
266 | 0 | m_mvd[1] = m_mvd[0] + m_numPartitions; |
267 | |
|
268 | 0 | m_distortion = dataPool.distortionMemBlock + instance * m_numPartitions; |
269 | |
|
270 | 0 | uint32_t cuSize = param.maxCUSize >> depth; |
271 | 0 | uint32_t sizeL = cuSize * cuSize; |
272 | 0 | uint32_t sizeC = sizeL >> (m_hChromaShift + m_vChromaShift); // block chroma part |
273 | 0 | m_trCoeff[0] = dataPool.trCoeffMemBlock + instance * (sizeL + sizeC * 2); |
274 | 0 | m_trCoeff[1] = m_trCoeff[0] + sizeL; |
275 | 0 | m_trCoeff[2] = m_trCoeff[0] + sizeL + sizeC; |
276 | 0 | for (int i = 0; i < 3; i++) |
277 | 0 | m_fAc_den[i] = m_fDc_den[i] = 0; |
278 | 0 | } |
279 | 0 | } |
280 | | |
281 | | void CUData::initCTU(const Frame& frame, uint32_t cuAddr, int qp, uint32_t firstRowInSlice, uint32_t lastRowInSlice, uint32_t lastCuInSlice) |
282 | 0 | { |
283 | 0 | m_encData = frame.m_encData; |
284 | 0 | m_slice = m_encData->m_slice; |
285 | 0 | m_cuAddr = cuAddr; |
286 | 0 | m_cuPelX = (cuAddr % m_slice->m_sps->numCuInWidth) << m_slice->m_param->maxLog2CUSize; |
287 | 0 | m_cuPelY = (cuAddr / m_slice->m_sps->numCuInWidth) << m_slice->m_param->maxLog2CUSize; |
288 | 0 | m_absIdxInCTU = 0; |
289 | 0 | m_numPartitions = m_encData->m_param->num4x4Partitions; |
290 | 0 | m_bFirstRowInSlice = (uint8_t)firstRowInSlice; |
291 | 0 | m_bLastRowInSlice = (uint8_t)lastRowInSlice; |
292 | 0 | m_bLastCuInSlice = (uint8_t)lastCuInSlice; |
293 | | #if ENABLE_SCC_EXT |
294 | | m_lastIntraBCMv[0].set(0, 0); |
295 | | m_lastIntraBCMv[1].set(0, 0); |
296 | | #endif |
297 | | |
298 | | /* sequential memsets */ |
299 | 0 | m_partSet((uint8_t*)m_qp, (uint8_t)qp); |
300 | 0 | m_partSet((uint8_t*)m_qpAnalysis, (uint8_t)qp); |
301 | 0 | m_partSet(m_log2CUSize, (uint8_t)m_slice->m_param->maxLog2CUSize); |
302 | 0 | m_partSet(m_lumaIntraDir, (uint8_t)ALL_IDX); |
303 | 0 | m_partSet(m_chromaIntraDir, (uint8_t)ALL_IDX); |
304 | 0 | m_partSet(m_tqBypass, (uint8_t)frame.m_encData->m_param->bLossless); |
305 | 0 | if (m_slice->m_sliceType != I_SLICE) |
306 | 0 | { |
307 | 0 | m_partSet((uint8_t*)m_refIdx[0], (uint8_t)REF_NOT_VALID); |
308 | 0 | m_partSet((uint8_t*)m_refIdx[1], (uint8_t)REF_NOT_VALID); |
309 | 0 | } |
310 | |
|
311 | 0 | X265_CHECK(!(frame.m_encData->m_param->bLossless && !m_slice->m_pps->bTransquantBypassEnabled), "lossless enabled without TQbypass in PPS\n"); |
312 | | |
313 | | /* initialize the remaining CU data in one memset */ |
314 | 0 | memset(m_cuDepth, 0, (frame.m_param->internalCsp == X265_CSP_I400 ? BytesPerPartition - 12 : BytesPerPartition - 8) * m_numPartitions); |
315 | |
|
316 | 0 | for (int8_t i = 0; i < NUM_TU_DEPTH; i++) |
317 | 0 | m_refTuDepth[i] = -1; |
318 | |
|
319 | 0 | m_vbvAffected = false; |
320 | |
|
321 | 0 | uint32_t widthInCU = m_slice->m_sps->numCuInWidth; |
322 | 0 | m_cuLeft = (m_cuAddr % widthInCU) ? m_encData->getPicCTU(m_cuAddr - 1) : NULL; |
323 | 0 | m_cuAbove = (m_cuAddr >= widthInCU) && !m_bFirstRowInSlice ? m_encData->getPicCTU(m_cuAddr - widthInCU) : NULL; |
324 | 0 | m_cuAboveLeft = (m_cuLeft && m_cuAbove) ? m_encData->getPicCTU(m_cuAddr - widthInCU - 1) : NULL; |
325 | 0 | m_cuAboveRight = (m_cuAbove && ((m_cuAddr % widthInCU) < (widthInCU - 1))) ? m_encData->getPicCTU(m_cuAddr - widthInCU + 1) : NULL; |
326 | 0 | memset(m_distortion, 0, m_numPartitions * sizeof(sse_t)); |
327 | 0 | } |
328 | | |
329 | | // initialize Sub partition |
330 | | #if ENABLE_SCC_EXT |
331 | | void CUData::initSubCU(const CUData& ctu, const CUGeom& cuGeom, int qp, MV lastIntraBCMv[2]) |
332 | | #else |
333 | | void CUData::initSubCU(const CUData& ctu, const CUGeom& cuGeom, int qp) |
334 | | #endif |
335 | 0 | { |
336 | 0 | m_absIdxInCTU = cuGeom.absPartIdx; |
337 | 0 | m_encData = ctu.m_encData; |
338 | 0 | m_slice = ctu.m_slice; |
339 | 0 | m_cuAddr = ctu.m_cuAddr; |
340 | 0 | m_cuPelX = ctu.m_cuPelX + g_zscanToPelX[cuGeom.absPartIdx]; |
341 | 0 | m_cuPelY = ctu.m_cuPelY + g_zscanToPelY[cuGeom.absPartIdx]; |
342 | 0 | m_cuLeft = ctu.m_cuLeft; |
343 | 0 | m_cuAbove = ctu.m_cuAbove; |
344 | 0 | m_cuAboveLeft = ctu.m_cuAboveLeft; |
345 | 0 | m_cuAboveRight = ctu.m_cuAboveRight; |
346 | 0 | m_bFirstRowInSlice = ctu.m_bFirstRowInSlice; |
347 | 0 | m_bLastRowInSlice = ctu.m_bLastRowInSlice; |
348 | 0 | m_bLastCuInSlice = ctu.m_bLastCuInSlice; |
349 | 0 | for (int i = 0; i < 3; i++) |
350 | 0 | { |
351 | 0 | m_fAc_den[i] = ctu.m_fAc_den[i]; |
352 | 0 | m_fDc_den[i] = ctu.m_fDc_den[i]; |
353 | 0 | } |
354 | |
|
355 | 0 | X265_CHECK(m_numPartitions == cuGeom.numPartitions, "initSubCU() size mismatch\n"); |
356 | |
|
357 | 0 | m_partSet((uint8_t*)m_qp, (uint8_t)qp); |
358 | 0 | m_partSet((uint8_t*)m_qpAnalysis, (uint8_t)qp); |
359 | |
|
360 | 0 | m_partSet(m_log2CUSize, (uint8_t)cuGeom.log2CUSize); |
361 | 0 | m_partSet(m_lumaIntraDir, (uint8_t)ALL_IDX); |
362 | 0 | m_partSet(m_chromaIntraDir, (uint8_t)ALL_IDX); |
363 | 0 | m_partSet(m_tqBypass, (uint8_t)m_encData->m_param->bLossless); |
364 | 0 | m_partSet((uint8_t*)m_refIdx[0], (uint8_t)REF_NOT_VALID); |
365 | 0 | m_partSet((uint8_t*)m_refIdx[1], (uint8_t)REF_NOT_VALID); |
366 | 0 | m_partSet(m_cuDepth, (uint8_t)cuGeom.depth); |
367 | | |
368 | | /* initialize the remaining CU data in one memset */ |
369 | 0 | memset(m_predMode, 0, (ctu.m_chromaFormat == X265_CSP_I400 ? BytesPerPartition - 13 : BytesPerPartition - 9) * m_numPartitions); |
370 | 0 | memset(m_distortion, 0, m_numPartitions * sizeof(sse_t)); |
371 | |
|
372 | | #if ENABLE_SCC_EXT |
373 | | if (lastIntraBCMv) |
374 | | { |
375 | | for (int i = 0; i < 2; i++) |
376 | | m_lastIntraBCMv[i] = lastIntraBCMv[i]; |
377 | | } |
378 | | #endif |
379 | 0 | } |
380 | | |
381 | | /* Copy the results of a sub-part (split) CU to the parent CU */ |
382 | | void CUData::copyPartFrom(const CUData& subCU, const CUGeom& childGeom, uint32_t subPartIdx) |
383 | 0 | { |
384 | 0 | X265_CHECK(subPartIdx < 4, "part unit should be less than 4\n"); |
385 | |
|
386 | 0 | uint32_t offset = childGeom.numPartitions * subPartIdx; |
387 | |
|
388 | 0 | m_bFirstRowInSlice = subCU.m_bFirstRowInSlice; |
389 | 0 | m_bLastCuInSlice = subCU.m_bLastCuInSlice; |
390 | |
|
391 | 0 | m_subPartCopy((uint8_t*)m_qp + offset, (uint8_t*)subCU.m_qp); |
392 | 0 | m_subPartCopy((uint8_t*)m_qpAnalysis + offset, (uint8_t*)subCU.m_qpAnalysis); |
393 | 0 | m_subPartCopy(m_log2CUSize + offset, subCU.m_log2CUSize); |
394 | 0 | m_subPartCopy(m_lumaIntraDir + offset, subCU.m_lumaIntraDir); |
395 | 0 | m_subPartCopy(m_tqBypass + offset, subCU.m_tqBypass); |
396 | 0 | m_subPartCopy((uint8_t*)m_refIdx[0] + offset, (uint8_t*)subCU.m_refIdx[0]); |
397 | 0 | m_subPartCopy((uint8_t*)m_refIdx[1] + offset, (uint8_t*)subCU.m_refIdx[1]); |
398 | 0 | m_subPartCopy(m_cuDepth + offset, subCU.m_cuDepth); |
399 | 0 | m_subPartCopy(m_predMode + offset, subCU.m_predMode); |
400 | 0 | m_subPartCopy(m_partSize + offset, subCU.m_partSize); |
401 | 0 | m_subPartCopy(m_mergeFlag + offset, subCU.m_mergeFlag); |
402 | 0 | m_subPartCopy(m_interDir + offset, subCU.m_interDir); |
403 | 0 | m_subPartCopy(m_mvpIdx[0] + offset, subCU.m_mvpIdx[0]); |
404 | 0 | m_subPartCopy(m_mvpIdx[1] + offset, subCU.m_mvpIdx[1]); |
405 | 0 | m_subPartCopy(m_tuDepth + offset, subCU.m_tuDepth); |
406 | |
|
407 | 0 | m_subPartCopy(m_transformSkip[0] + offset, subCU.m_transformSkip[0]); |
408 | 0 | m_subPartCopy(m_cbf[0] + offset, subCU.m_cbf[0]); |
409 | |
|
410 | 0 | memcpy(m_mv[0] + offset, subCU.m_mv[0], childGeom.numPartitions * sizeof(MV)); |
411 | 0 | memcpy(m_mv[1] + offset, subCU.m_mv[1], childGeom.numPartitions * sizeof(MV)); |
412 | 0 | memcpy(m_mvd[0] + offset, subCU.m_mvd[0], childGeom.numPartitions * sizeof(MV)); |
413 | 0 | memcpy(m_mvd[1] + offset, subCU.m_mvd[1], childGeom.numPartitions * sizeof(MV)); |
414 | |
|
415 | 0 | memcpy(m_distortion + offset, subCU.m_distortion, childGeom.numPartitions * sizeof(sse_t)); |
416 | |
|
417 | 0 | uint32_t tmp = 1 << ((m_slice->m_param->maxLog2CUSize - childGeom.depth) * 2); |
418 | 0 | uint32_t tmp2 = subPartIdx * tmp; |
419 | 0 | memcpy(m_trCoeff[0] + tmp2, subCU.m_trCoeff[0], sizeof(coeff_t)* tmp); |
420 | |
|
421 | 0 | if (subCU.m_chromaFormat != X265_CSP_I400) |
422 | 0 | { |
423 | 0 | m_subPartCopy(m_transformSkip[1] + offset, subCU.m_transformSkip[1]); |
424 | 0 | m_subPartCopy(m_transformSkip[2] + offset, subCU.m_transformSkip[2]); |
425 | 0 | m_subPartCopy(m_cbf[1] + offset, subCU.m_cbf[1]); |
426 | 0 | m_subPartCopy(m_cbf[2] + offset, subCU.m_cbf[2]); |
427 | 0 | m_subPartCopy(m_chromaIntraDir + offset, subCU.m_chromaIntraDir); |
428 | |
|
429 | 0 | uint32_t tmpC = tmp >> (m_hChromaShift + m_vChromaShift); |
430 | 0 | uint32_t tmpC2 = tmp2 >> (m_hChromaShift + m_vChromaShift); |
431 | 0 | memcpy(m_trCoeff[1] + tmpC2, subCU.m_trCoeff[1], sizeof(coeff_t) * tmpC); |
432 | 0 | memcpy(m_trCoeff[2] + tmpC2, subCU.m_trCoeff[2], sizeof(coeff_t) * tmpC); |
433 | 0 | } |
434 | | #if ENABLE_SCC_EXT |
435 | | for (int i = 0; i < 2; i++) |
436 | | m_lastIntraBCMv[i] = subCU.m_lastIntraBCMv[i]; |
437 | | #endif |
438 | 0 | } |
439 | | |
440 | | /* If a sub-CU part is not present (off the edge of the picture) its depth and |
441 | | * log2size should still be configured */ |
442 | | void CUData::setEmptyPart(const CUGeom& childGeom, uint32_t subPartIdx) |
443 | 0 | { |
444 | 0 | uint32_t offset = childGeom.numPartitions * subPartIdx; |
445 | 0 | m_subPartSet(m_cuDepth + offset, (uint8_t)childGeom.depth); |
446 | 0 | m_subPartSet(m_log2CUSize + offset, (uint8_t)childGeom.log2CUSize); |
447 | 0 | } |
448 | | |
449 | | /* Copy all CU data from one instance to the next, except set lossless flag |
450 | | * This will only get used when --cu-lossless is enabled but --lossless is not. */ |
451 | | void CUData::initLosslessCU(const CUData& cu, const CUGeom& cuGeom) |
452 | 0 | { |
453 | | /* Start by making an exact copy */ |
454 | 0 | m_encData = cu.m_encData; |
455 | 0 | m_slice = cu.m_slice; |
456 | 0 | m_cuAddr = cu.m_cuAddr; |
457 | 0 | m_cuPelX = cu.m_cuPelX; |
458 | 0 | m_cuPelY = cu.m_cuPelY; |
459 | 0 | m_cuLeft = cu.m_cuLeft; |
460 | 0 | m_cuAbove = cu.m_cuAbove; |
461 | 0 | m_cuAboveLeft = cu.m_cuAboveLeft; |
462 | 0 | m_cuAboveRight = cu.m_cuAboveRight; |
463 | 0 | m_absIdxInCTU = cuGeom.absPartIdx; |
464 | 0 | m_numPartitions = cuGeom.numPartitions; |
465 | 0 | memcpy(m_qp, cu.m_qp, BytesPerPartition * m_numPartitions); |
466 | 0 | memcpy(m_mv[0], cu.m_mv[0], m_numPartitions * sizeof(MV)); |
467 | 0 | memcpy(m_mv[1], cu.m_mv[1], m_numPartitions * sizeof(MV)); |
468 | 0 | memcpy(m_mvd[0], cu.m_mvd[0], m_numPartitions * sizeof(MV)); |
469 | 0 | memcpy(m_mvd[1], cu.m_mvd[1], m_numPartitions * sizeof(MV)); |
470 | 0 | memcpy(m_distortion, cu.m_distortion, m_numPartitions * sizeof(sse_t)); |
471 | | |
472 | | /* force TQBypass to true */ |
473 | 0 | m_partSet(m_tqBypass, true); |
474 | | |
475 | | /* clear residual coding flags */ |
476 | 0 | m_partSet(m_predMode, cu.m_predMode[0] & (MODE_INTRA | MODE_INTER)); |
477 | 0 | m_partSet(m_tuDepth, 0); |
478 | 0 | m_partSet(m_cbf[0], 0); |
479 | 0 | m_partSet(m_transformSkip[0], 0); |
480 | |
|
481 | 0 | if (cu.m_chromaFormat != X265_CSP_I400) |
482 | 0 | { |
483 | 0 | m_partSet(m_chromaIntraDir, (uint8_t)ALL_IDX); |
484 | 0 | m_partSet(m_cbf[1], 0); |
485 | 0 | m_partSet(m_cbf[2], 0); |
486 | 0 | m_partSet(m_transformSkip[1], 0); |
487 | 0 | m_partSet(m_transformSkip[2], 0); |
488 | 0 | } |
489 | 0 | } |
490 | | |
491 | | /* Copy completed predicted CU to CTU in picture */ |
492 | | void CUData::copyToPic(uint32_t depth) const |
493 | 0 | { |
494 | 0 | CUData& ctu = *m_encData->getPicCTU(m_cuAddr); |
495 | |
|
496 | 0 | m_partCopy((uint8_t*)ctu.m_qp + m_absIdxInCTU, (uint8_t*)m_qp); |
497 | 0 | m_partCopy((uint8_t*)ctu.m_qpAnalysis + m_absIdxInCTU, (uint8_t*)m_qpAnalysis); |
498 | 0 | m_partCopy(ctu.m_log2CUSize + m_absIdxInCTU, m_log2CUSize); |
499 | 0 | m_partCopy(ctu.m_lumaIntraDir + m_absIdxInCTU, m_lumaIntraDir); |
500 | 0 | m_partCopy(ctu.m_tqBypass + m_absIdxInCTU, m_tqBypass); |
501 | 0 | m_partCopy((uint8_t*)ctu.m_refIdx[0] + m_absIdxInCTU, (uint8_t*)m_refIdx[0]); |
502 | 0 | m_partCopy((uint8_t*)ctu.m_refIdx[1] + m_absIdxInCTU, (uint8_t*)m_refIdx[1]); |
503 | 0 | m_partCopy(ctu.m_cuDepth + m_absIdxInCTU, m_cuDepth); |
504 | 0 | m_partCopy(ctu.m_predMode + m_absIdxInCTU, m_predMode); |
505 | 0 | m_partCopy(ctu.m_partSize + m_absIdxInCTU, m_partSize); |
506 | 0 | m_partCopy(ctu.m_mergeFlag + m_absIdxInCTU, m_mergeFlag); |
507 | 0 | m_partCopy(ctu.m_interDir + m_absIdxInCTU, m_interDir); |
508 | 0 | m_partCopy(ctu.m_mvpIdx[0] + m_absIdxInCTU, m_mvpIdx[0]); |
509 | 0 | m_partCopy(ctu.m_mvpIdx[1] + m_absIdxInCTU, m_mvpIdx[1]); |
510 | 0 | m_partCopy(ctu.m_tuDepth + m_absIdxInCTU, m_tuDepth); |
511 | 0 | m_partCopy(ctu.m_transformSkip[0] + m_absIdxInCTU, m_transformSkip[0]); |
512 | 0 | m_partCopy(ctu.m_cbf[0] + m_absIdxInCTU, m_cbf[0]); |
513 | |
|
514 | 0 | memcpy(ctu.m_mv[0] + m_absIdxInCTU, m_mv[0], m_numPartitions * sizeof(MV)); |
515 | 0 | memcpy(ctu.m_mv[1] + m_absIdxInCTU, m_mv[1], m_numPartitions * sizeof(MV)); |
516 | 0 | memcpy(ctu.m_mvd[0] + m_absIdxInCTU, m_mvd[0], m_numPartitions * sizeof(MV)); |
517 | 0 | memcpy(ctu.m_mvd[1] + m_absIdxInCTU, m_mvd[1], m_numPartitions * sizeof(MV)); |
518 | |
|
519 | 0 | memcpy(ctu.m_distortion + m_absIdxInCTU, m_distortion, m_numPartitions * sizeof(sse_t)); |
520 | |
|
521 | 0 | uint32_t tmpY = 1 << ((m_slice->m_param->maxLog2CUSize - depth) * 2); |
522 | 0 | uint32_t tmpY2 = m_absIdxInCTU << (LOG2_UNIT_SIZE * 2); |
523 | 0 | memcpy(ctu.m_trCoeff[0] + tmpY2, m_trCoeff[0], sizeof(coeff_t)* tmpY); |
524 | |
|
525 | 0 | if (ctu.m_chromaFormat != X265_CSP_I400) |
526 | 0 | { |
527 | 0 | m_partCopy(ctu.m_transformSkip[1] + m_absIdxInCTU, m_transformSkip[1]); |
528 | 0 | m_partCopy(ctu.m_transformSkip[2] + m_absIdxInCTU, m_transformSkip[2]); |
529 | 0 | m_partCopy(ctu.m_cbf[1] + m_absIdxInCTU, m_cbf[1]); |
530 | 0 | m_partCopy(ctu.m_cbf[2] + m_absIdxInCTU, m_cbf[2]); |
531 | 0 | m_partCopy(ctu.m_chromaIntraDir + m_absIdxInCTU, m_chromaIntraDir); |
532 | |
|
533 | 0 | uint32_t tmpC = tmpY >> (m_hChromaShift + m_vChromaShift); |
534 | 0 | uint32_t tmpC2 = tmpY2 >> (m_hChromaShift + m_vChromaShift); |
535 | 0 | memcpy(ctu.m_trCoeff[1] + tmpC2, m_trCoeff[1], sizeof(coeff_t) * tmpC); |
536 | 0 | memcpy(ctu.m_trCoeff[2] + tmpC2, m_trCoeff[2], sizeof(coeff_t) * tmpC); |
537 | 0 | } |
538 | 0 | } |
539 | | |
540 | | /* The reverse of copyToPic, called only by encodeResidue */ |
541 | | void CUData::copyFromPic(const CUData& ctu, const CUGeom& cuGeom, int csp, bool copyQp) |
542 | 0 | { |
543 | 0 | m_encData = ctu.m_encData; |
544 | 0 | m_slice = ctu.m_slice; |
545 | 0 | m_cuAddr = ctu.m_cuAddr; |
546 | 0 | m_cuPelX = ctu.m_cuPelX + g_zscanToPelX[cuGeom.absPartIdx]; |
547 | 0 | m_cuPelY = ctu.m_cuPelY + g_zscanToPelY[cuGeom.absPartIdx]; |
548 | 0 | m_absIdxInCTU = cuGeom.absPartIdx; |
549 | 0 | m_numPartitions = cuGeom.numPartitions; |
550 | | |
551 | | /* copy out all prediction info for this part */ |
552 | 0 | if (copyQp) |
553 | 0 | { |
554 | 0 | m_partCopy((uint8_t*)m_qp, (uint8_t*)ctu.m_qp + m_absIdxInCTU); |
555 | 0 | m_partCopy((uint8_t*)m_qpAnalysis, (uint8_t*)ctu.m_qpAnalysis + m_absIdxInCTU); |
556 | 0 | } |
557 | |
|
558 | 0 | m_partCopy(m_log2CUSize, ctu.m_log2CUSize + m_absIdxInCTU); |
559 | 0 | m_partCopy(m_lumaIntraDir, ctu.m_lumaIntraDir + m_absIdxInCTU); |
560 | 0 | m_partCopy(m_tqBypass, ctu.m_tqBypass + m_absIdxInCTU); |
561 | 0 | m_partCopy((uint8_t*)m_refIdx[0], (uint8_t*)ctu.m_refIdx[0] + m_absIdxInCTU); |
562 | 0 | m_partCopy((uint8_t*)m_refIdx[1], (uint8_t*)ctu.m_refIdx[1] + m_absIdxInCTU); |
563 | 0 | m_partCopy(m_cuDepth, ctu.m_cuDepth + m_absIdxInCTU); |
564 | 0 | m_partSet(m_predMode, ctu.m_predMode[m_absIdxInCTU] & (MODE_INTRA | MODE_INTER)); /* clear skip flag */ |
565 | 0 | m_partCopy(m_partSize, ctu.m_partSize + m_absIdxInCTU); |
566 | 0 | m_partCopy(m_mergeFlag, ctu.m_mergeFlag + m_absIdxInCTU); |
567 | 0 | m_partCopy(m_interDir, ctu.m_interDir + m_absIdxInCTU); |
568 | 0 | m_partCopy(m_mvpIdx[0], ctu.m_mvpIdx[0] + m_absIdxInCTU); |
569 | 0 | m_partCopy(m_mvpIdx[1], ctu.m_mvpIdx[1] + m_absIdxInCTU); |
570 | 0 | m_partCopy(m_chromaIntraDir, ctu.m_chromaIntraDir + m_absIdxInCTU); |
571 | |
|
572 | 0 | memcpy(m_mv[0], ctu.m_mv[0] + m_absIdxInCTU, m_numPartitions * sizeof(MV)); |
573 | 0 | memcpy(m_mv[1], ctu.m_mv[1] + m_absIdxInCTU, m_numPartitions * sizeof(MV)); |
574 | 0 | memcpy(m_mvd[0], ctu.m_mvd[0] + m_absIdxInCTU, m_numPartitions * sizeof(MV)); |
575 | 0 | memcpy(m_mvd[1], ctu.m_mvd[1] + m_absIdxInCTU, m_numPartitions * sizeof(MV)); |
576 | |
|
577 | 0 | memcpy(m_distortion, ctu.m_distortion + m_absIdxInCTU, m_numPartitions * sizeof(sse_t)); |
578 | | |
579 | | /* clear residual coding flags */ |
580 | 0 | m_partSet(m_tuDepth, 0); |
581 | 0 | m_partSet(m_transformSkip[0], 0); |
582 | 0 | m_partSet(m_cbf[0], 0); |
583 | |
|
584 | 0 | if (csp != X265_CSP_I400) |
585 | 0 | { |
586 | 0 | m_partSet(m_transformSkip[1], 0); |
587 | 0 | m_partSet(m_transformSkip[2], 0); |
588 | 0 | m_partSet(m_cbf[1], 0); |
589 | 0 | m_partSet(m_cbf[2], 0); |
590 | 0 | } |
591 | 0 | } |
592 | | |
593 | | /* Only called by encodeResidue, these fields can be modified during inter/intra coding */ |
594 | | void CUData::updatePic(uint32_t depth, int picCsp) const |
595 | 0 | { |
596 | 0 | CUData& ctu = *m_encData->getPicCTU(m_cuAddr); |
597 | |
|
598 | 0 | m_partCopy((uint8_t*)ctu.m_qp + m_absIdxInCTU, (uint8_t*)m_qp); |
599 | 0 | m_partCopy((uint8_t*)ctu.m_qpAnalysis + m_absIdxInCTU, (uint8_t*)m_qpAnalysis); |
600 | 0 | m_partCopy(ctu.m_transformSkip[0] + m_absIdxInCTU, m_transformSkip[0]); |
601 | 0 | m_partCopy(ctu.m_predMode + m_absIdxInCTU, m_predMode); |
602 | 0 | m_partCopy(ctu.m_tuDepth + m_absIdxInCTU, m_tuDepth); |
603 | 0 | m_partCopy(ctu.m_cbf[0] + m_absIdxInCTU, m_cbf[0]); |
604 | |
|
605 | 0 | uint32_t tmpY = 1 << ((m_slice->m_param->maxLog2CUSize - depth) * 2); |
606 | 0 | uint32_t tmpY2 = m_absIdxInCTU << (LOG2_UNIT_SIZE * 2); |
607 | 0 | memcpy(ctu.m_trCoeff[0] + tmpY2, m_trCoeff[0], sizeof(coeff_t)* tmpY); |
608 | |
|
609 | 0 | if (ctu.m_chromaFormat != X265_CSP_I400 && picCsp != X265_CSP_I400) |
610 | 0 | { |
611 | 0 | m_partCopy(ctu.m_transformSkip[1] + m_absIdxInCTU, m_transformSkip[1]); |
612 | 0 | m_partCopy(ctu.m_transformSkip[2] + m_absIdxInCTU, m_transformSkip[2]); |
613 | |
|
614 | 0 | m_partCopy(ctu.m_cbf[1] + m_absIdxInCTU, m_cbf[1]); |
615 | 0 | m_partCopy(ctu.m_cbf[2] + m_absIdxInCTU, m_cbf[2]); |
616 | 0 | m_partCopy(ctu.m_chromaIntraDir + m_absIdxInCTU, m_chromaIntraDir); |
617 | |
|
618 | 0 | tmpY >>= m_hChromaShift + m_vChromaShift; |
619 | 0 | tmpY2 >>= m_hChromaShift + m_vChromaShift; |
620 | 0 | memcpy(ctu.m_trCoeff[1] + tmpY2, m_trCoeff[1], sizeof(coeff_t) * tmpY); |
621 | 0 | memcpy(ctu.m_trCoeff[2] + tmpY2, m_trCoeff[2], sizeof(coeff_t) * tmpY); |
622 | 0 | } |
623 | 0 | } |
624 | | |
625 | | const CUData* CUData::getPULeft(uint32_t& lPartUnitIdx, uint32_t curPartUnitIdx) const |
626 | 0 | { |
627 | 0 | uint32_t absPartIdx = g_zscanToRaster[curPartUnitIdx]; |
628 | |
|
629 | 0 | if (!isZeroCol(absPartIdx)) |
630 | 0 | { |
631 | 0 | uint32_t absZorderCUIdx = g_zscanToRaster[m_absIdxInCTU]; |
632 | 0 | lPartUnitIdx = g_rasterToZscan[absPartIdx - 1]; |
633 | 0 | if (isEqualCol(absPartIdx, absZorderCUIdx)) |
634 | 0 | return m_encData->getPicCTU(m_cuAddr); |
635 | 0 | else |
636 | 0 | { |
637 | 0 | lPartUnitIdx -= m_absIdxInCTU; |
638 | 0 | return this; |
639 | 0 | } |
640 | 0 | } |
641 | | |
642 | 0 | lPartUnitIdx = g_rasterToZscan[absPartIdx + s_numPartInCUSize - 1]; |
643 | 0 | return m_cuLeft; |
644 | 0 | } |
645 | | |
646 | | const CUData* CUData::getPUAbove(uint32_t& aPartUnitIdx, uint32_t curPartUnitIdx) const |
647 | 0 | { |
648 | 0 | uint32_t absPartIdx = g_zscanToRaster[curPartUnitIdx]; |
649 | |
|
650 | 0 | if (!isZeroRow(absPartIdx)) |
651 | 0 | { |
652 | 0 | uint32_t absZorderCUIdx = g_zscanToRaster[m_absIdxInCTU]; |
653 | 0 | aPartUnitIdx = g_rasterToZscan[absPartIdx - RASTER_SIZE]; |
654 | 0 | if (isEqualRow(absPartIdx, absZorderCUIdx)) |
655 | 0 | return m_encData->getPicCTU(m_cuAddr); |
656 | 0 | else |
657 | 0 | aPartUnitIdx -= m_absIdxInCTU; |
658 | 0 | return this; |
659 | 0 | } |
660 | | |
661 | 0 | aPartUnitIdx = g_rasterToZscan[absPartIdx + ((s_numPartInCUSize - 1) << LOG2_RASTER_SIZE)]; |
662 | 0 | return m_cuAbove; |
663 | 0 | } |
664 | | |
665 | | const CUData* CUData::getPUAboveLeft(uint32_t& alPartUnitIdx, uint32_t curPartUnitIdx) const |
666 | 0 | { |
667 | 0 | uint32_t absPartIdx = g_zscanToRaster[curPartUnitIdx]; |
668 | |
|
669 | 0 | if (!isZeroCol(absPartIdx)) |
670 | 0 | { |
671 | 0 | if (!isZeroRow(absPartIdx)) |
672 | 0 | { |
673 | 0 | uint32_t absZorderCUIdx = g_zscanToRaster[m_absIdxInCTU]; |
674 | 0 | alPartUnitIdx = g_rasterToZscan[absPartIdx - RASTER_SIZE - 1]; |
675 | 0 | if (isEqualRowOrCol(absPartIdx, absZorderCUIdx)) |
676 | 0 | return m_encData->getPicCTU(m_cuAddr); |
677 | 0 | else |
678 | 0 | { |
679 | 0 | alPartUnitIdx -= m_absIdxInCTU; |
680 | 0 | return this; |
681 | 0 | } |
682 | 0 | } |
683 | 0 | alPartUnitIdx = g_rasterToZscan[absPartIdx + ((s_numPartInCUSize - 1) << LOG2_RASTER_SIZE) - 1]; |
684 | 0 | return m_cuAbove; |
685 | 0 | } |
686 | | |
687 | 0 | if (!isZeroRow(absPartIdx)) |
688 | 0 | { |
689 | 0 | alPartUnitIdx = g_rasterToZscan[absPartIdx - RASTER_SIZE + s_numPartInCUSize - 1]; |
690 | 0 | return m_cuLeft; |
691 | 0 | } |
692 | | |
693 | 0 | alPartUnitIdx = m_encData->m_param->num4x4Partitions - 1; |
694 | 0 | return m_cuAboveLeft; |
695 | 0 | } |
696 | | |
697 | | const CUData* CUData::getPUAboveRight(uint32_t& arPartUnitIdx, uint32_t curPartUnitIdx) const |
698 | 0 | { |
699 | 0 | if ((m_encData->getPicCTU(m_cuAddr)->m_cuPelX + g_zscanToPelX[curPartUnitIdx] + UNIT_SIZE) >= m_slice->m_sps->picWidthInLumaSamples) |
700 | 0 | return NULL; |
701 | | |
702 | 0 | uint32_t absPartIdxRT = g_zscanToRaster[curPartUnitIdx]; |
703 | |
|
704 | 0 | if (lessThanCol(absPartIdxRT, s_numPartInCUSize - 1)) |
705 | 0 | { |
706 | 0 | if (!isZeroRow(absPartIdxRT)) |
707 | 0 | { |
708 | 0 | if (curPartUnitIdx > g_rasterToZscan[absPartIdxRT - RASTER_SIZE + 1]) |
709 | 0 | { |
710 | 0 | uint32_t absZorderCUIdx = g_zscanToRaster[m_absIdxInCTU] + (1 << (m_log2CUSize[0] - LOG2_UNIT_SIZE)) - 1; |
711 | 0 | arPartUnitIdx = g_rasterToZscan[absPartIdxRT - RASTER_SIZE + 1]; |
712 | 0 | if (isEqualRowOrCol(absPartIdxRT, absZorderCUIdx)) |
713 | 0 | return m_encData->getPicCTU(m_cuAddr); |
714 | 0 | else |
715 | 0 | { |
716 | 0 | arPartUnitIdx -= m_absIdxInCTU; |
717 | 0 | return this; |
718 | 0 | } |
719 | 0 | } |
720 | 0 | return NULL; |
721 | 0 | } |
722 | 0 | arPartUnitIdx = g_rasterToZscan[absPartIdxRT + ((s_numPartInCUSize - 1) << LOG2_RASTER_SIZE) + 1]; |
723 | 0 | return m_cuAbove; |
724 | 0 | } |
725 | | |
726 | 0 | if (!isZeroRow(absPartIdxRT)) |
727 | 0 | return NULL; |
728 | | |
729 | 0 | arPartUnitIdx = g_rasterToZscan[(s_numPartInCUSize - 1) << LOG2_RASTER_SIZE]; |
730 | 0 | return m_cuAboveRight; |
731 | 0 | } |
732 | | |
733 | | const CUData* CUData::getPUBelowLeft(uint32_t& blPartUnitIdx, uint32_t curPartUnitIdx) const |
734 | 0 | { |
735 | 0 | if ((m_encData->getPicCTU(m_cuAddr)->m_cuPelY + g_zscanToPelY[curPartUnitIdx] + UNIT_SIZE) >= m_slice->m_sps->picHeightInLumaSamples) |
736 | 0 | return NULL; |
737 | | |
738 | 0 | uint32_t absPartIdxLB = g_zscanToRaster[curPartUnitIdx]; |
739 | |
|
740 | 0 | if (lessThanRow(absPartIdxLB, s_numPartInCUSize - 1)) |
741 | 0 | { |
742 | 0 | if (!isZeroCol(absPartIdxLB)) |
743 | 0 | { |
744 | 0 | if (curPartUnitIdx > g_rasterToZscan[absPartIdxLB + RASTER_SIZE - 1]) |
745 | 0 | { |
746 | 0 | uint32_t absZorderCUIdxLB = g_zscanToRaster[m_absIdxInCTU] + (((1 << (m_log2CUSize[0] - LOG2_UNIT_SIZE)) - 1) << LOG2_RASTER_SIZE); |
747 | 0 | blPartUnitIdx = g_rasterToZscan[absPartIdxLB + RASTER_SIZE - 1]; |
748 | 0 | if (isEqualRowOrCol(absPartIdxLB, absZorderCUIdxLB)) |
749 | 0 | return m_encData->getPicCTU(m_cuAddr); |
750 | 0 | else |
751 | 0 | { |
752 | 0 | blPartUnitIdx -= m_absIdxInCTU; |
753 | 0 | return this; |
754 | 0 | } |
755 | 0 | } |
756 | 0 | return NULL; |
757 | 0 | } |
758 | 0 | blPartUnitIdx = g_rasterToZscan[absPartIdxLB + RASTER_SIZE + s_numPartInCUSize - 1]; |
759 | 0 | return m_cuLeft; |
760 | 0 | } |
761 | | |
762 | 0 | return NULL; |
763 | 0 | } |
764 | | |
765 | | const CUData* CUData::getPUBelowLeftAdi(uint32_t& blPartUnitIdx, uint32_t curPartUnitIdx, uint32_t partUnitOffset) const |
766 | 0 | { |
767 | 0 | if ((m_encData->getPicCTU(m_cuAddr)->m_cuPelY + g_zscanToPelY[curPartUnitIdx] + (partUnitOffset << LOG2_UNIT_SIZE)) >= m_slice->m_sps->picHeightInLumaSamples) |
768 | 0 | return NULL; |
769 | | |
770 | 0 | uint32_t absPartIdxLB = g_zscanToRaster[curPartUnitIdx]; |
771 | |
|
772 | 0 | if (lessThanRow(absPartIdxLB, s_numPartInCUSize - partUnitOffset)) |
773 | 0 | { |
774 | 0 | if (!isZeroCol(absPartIdxLB)) |
775 | 0 | { |
776 | 0 | if (curPartUnitIdx > g_rasterToZscan[absPartIdxLB + (partUnitOffset << LOG2_RASTER_SIZE) - 1]) |
777 | 0 | { |
778 | 0 | uint32_t absZorderCUIdxLB = g_zscanToRaster[m_absIdxInCTU] + (((1 << (m_log2CUSize[0] - LOG2_UNIT_SIZE)) - 1) << LOG2_RASTER_SIZE); |
779 | 0 | blPartUnitIdx = g_rasterToZscan[absPartIdxLB + (partUnitOffset << LOG2_RASTER_SIZE) - 1]; |
780 | 0 | if (isEqualRowOrCol(absPartIdxLB, absZorderCUIdxLB)) |
781 | 0 | return m_encData->getPicCTU(m_cuAddr); |
782 | 0 | else |
783 | 0 | { |
784 | 0 | blPartUnitIdx -= m_absIdxInCTU; |
785 | 0 | return this; |
786 | 0 | } |
787 | 0 | } |
788 | 0 | return NULL; |
789 | 0 | } |
790 | 0 | blPartUnitIdx = g_rasterToZscan[absPartIdxLB + (partUnitOffset << LOG2_RASTER_SIZE) + s_numPartInCUSize - 1]; |
791 | 0 | return m_cuLeft; |
792 | 0 | } |
793 | | |
794 | 0 | return NULL; |
795 | 0 | } |
796 | | |
797 | | const CUData* CUData::getPUAboveRightAdi(uint32_t& arPartUnitIdx, uint32_t curPartUnitIdx, uint32_t partUnitOffset) const |
798 | 0 | { |
799 | 0 | if ((m_encData->getPicCTU(m_cuAddr)->m_cuPelX + g_zscanToPelX[curPartUnitIdx] + (partUnitOffset << LOG2_UNIT_SIZE)) >= m_slice->m_sps->picWidthInLumaSamples) |
800 | 0 | return NULL; |
801 | | |
802 | 0 | uint32_t absPartIdxRT = g_zscanToRaster[curPartUnitIdx]; |
803 | |
|
804 | 0 | if (lessThanCol(absPartIdxRT, s_numPartInCUSize - partUnitOffset)) |
805 | 0 | { |
806 | 0 | if (!isZeroRow(absPartIdxRT)) |
807 | 0 | { |
808 | 0 | if (curPartUnitIdx > g_rasterToZscan[absPartIdxRT - RASTER_SIZE + partUnitOffset]) |
809 | 0 | { |
810 | 0 | uint32_t absZorderCUIdx = g_zscanToRaster[m_absIdxInCTU] + (1 << (m_log2CUSize[0] - LOG2_UNIT_SIZE)) - 1; |
811 | 0 | arPartUnitIdx = g_rasterToZscan[absPartIdxRT - RASTER_SIZE + partUnitOffset]; |
812 | 0 | if (isEqualRowOrCol(absPartIdxRT, absZorderCUIdx)) |
813 | 0 | return m_encData->getPicCTU(m_cuAddr); |
814 | 0 | else |
815 | 0 | { |
816 | 0 | arPartUnitIdx -= m_absIdxInCTU; |
817 | 0 | return this; |
818 | 0 | } |
819 | 0 | } |
820 | 0 | return NULL; |
821 | 0 | } |
822 | 0 | arPartUnitIdx = g_rasterToZscan[absPartIdxRT + ((s_numPartInCUSize - 1) << LOG2_RASTER_SIZE) + partUnitOffset]; |
823 | 0 | return m_cuAbove; |
824 | 0 | } |
825 | | |
826 | 0 | if (!isZeroRow(absPartIdxRT)) |
827 | 0 | return NULL; |
828 | | |
829 | 0 | arPartUnitIdx = g_rasterToZscan[((s_numPartInCUSize - 1) << LOG2_RASTER_SIZE) + partUnitOffset - 1]; |
830 | 0 | return m_cuAboveRight; |
831 | 0 | } |
832 | | |
833 | | /* Get left QpMinCu */ |
834 | | const CUData* CUData::getQpMinCuLeft(uint32_t& lPartUnitIdx, uint32_t curAbsIdxInCTU) const |
835 | 0 | { |
836 | 0 | uint32_t absZorderQpMinCUIdx = curAbsIdxInCTU & (0xFF << (m_encData->m_param->unitSizeDepth - m_slice->m_pps->maxCuDQPDepth) * 2); |
837 | 0 | uint32_t absRorderQpMinCUIdx = g_zscanToRaster[absZorderQpMinCUIdx]; |
838 | | |
839 | | // check for left CTU boundary |
840 | 0 | if (isZeroCol(absRorderQpMinCUIdx)) |
841 | 0 | return NULL; |
842 | | |
843 | | // get index of left-CU relative to top-left corner of current quantization group |
844 | 0 | lPartUnitIdx = g_rasterToZscan[absRorderQpMinCUIdx - 1]; |
845 | | |
846 | | // return pointer to current CTU |
847 | 0 | return m_encData->getPicCTU(m_cuAddr); |
848 | 0 | } |
849 | | |
850 | | /* Get above QpMinCu */ |
851 | | const CUData* CUData::getQpMinCuAbove(uint32_t& aPartUnitIdx, uint32_t curAbsIdxInCTU) const |
852 | 0 | { |
853 | 0 | uint32_t absZorderQpMinCUIdx = curAbsIdxInCTU & (0xFF << (m_encData->m_param->unitSizeDepth - m_slice->m_pps->maxCuDQPDepth) * 2); |
854 | 0 | uint32_t absRorderQpMinCUIdx = g_zscanToRaster[absZorderQpMinCUIdx]; |
855 | | |
856 | | // check for top CTU boundary |
857 | 0 | if (isZeroRow(absRorderQpMinCUIdx)) |
858 | 0 | return NULL; |
859 | | |
860 | | // get index of top-CU relative to top-left corner of current quantization group |
861 | 0 | aPartUnitIdx = g_rasterToZscan[absRorderQpMinCUIdx - RASTER_SIZE]; |
862 | | |
863 | | // return pointer to current CTU |
864 | 0 | return m_encData->getPicCTU(m_cuAddr); |
865 | 0 | } |
866 | | |
867 | | /* Get reference QP from left QpMinCu or latest coded QP */ |
868 | | int8_t CUData::getRefQP(uint32_t curAbsIdxInCTU) const |
869 | 0 | { |
870 | 0 | uint32_t lPartIdx = 0, aPartIdx = 0; |
871 | 0 | const CUData* cULeft = getQpMinCuLeft(lPartIdx, m_absIdxInCTU + curAbsIdxInCTU); |
872 | 0 | const CUData* cUAbove = getQpMinCuAbove(aPartIdx, m_absIdxInCTU + curAbsIdxInCTU); |
873 | |
|
874 | 0 | return ((cULeft ? cULeft->m_qp[lPartIdx] : getLastCodedQP(curAbsIdxInCTU)) + (cUAbove ? cUAbove->m_qp[aPartIdx] : getLastCodedQP(curAbsIdxInCTU)) + 1) >> 1; |
875 | 0 | } |
876 | | |
877 | | int CUData::getLastValidPartIdx(int absPartIdx) const |
878 | 0 | { |
879 | 0 | int lastValidPartIdx = absPartIdx - 1; |
880 | |
|
881 | 0 | while (lastValidPartIdx >= 0 && m_predMode[lastValidPartIdx] == MODE_NONE) |
882 | 0 | { |
883 | 0 | uint32_t depth = m_cuDepth[lastValidPartIdx]; |
884 | 0 | lastValidPartIdx -= m_numPartitions >> (depth << 1); |
885 | 0 | } |
886 | |
|
887 | 0 | return lastValidPartIdx; |
888 | 0 | } |
889 | | |
890 | | int8_t CUData::getLastCodedQP(uint32_t absPartIdx) const |
891 | 0 | { |
892 | 0 | uint32_t quPartIdxMask = 0xFF << (m_encData->m_param->unitSizeDepth - m_slice->m_pps->maxCuDQPDepth) * 2; |
893 | 0 | int lastValidPartIdx = getLastValidPartIdx(absPartIdx & quPartIdxMask); |
894 | |
|
895 | 0 | if (lastValidPartIdx >= 0) |
896 | 0 | return m_qp[lastValidPartIdx]; |
897 | 0 | else |
898 | 0 | { |
899 | 0 | if (m_absIdxInCTU) |
900 | 0 | return m_encData->getPicCTU(m_cuAddr)->getLastCodedQP(m_absIdxInCTU); |
901 | 0 | else if (m_cuAddr > 0 && !(m_slice->m_pps->bEntropyCodingSyncEnabled && !(m_cuAddr % m_slice->m_sps->numCuInWidth))) |
902 | 0 | return m_encData->getPicCTU(m_cuAddr - 1)->getLastCodedQP(m_encData->m_param->num4x4Partitions); |
903 | 0 | else |
904 | 0 | return (int8_t)m_slice->m_sliceQp; |
905 | 0 | } |
906 | 0 | } |
907 | | |
908 | | /* Get allowed chroma intra modes */ |
909 | | void CUData::getAllowedChromaDir(uint32_t absPartIdx, uint32_t* modeList) const |
910 | 0 | { |
911 | 0 | modeList[0] = PLANAR_IDX; |
912 | 0 | modeList[1] = VER_IDX; |
913 | 0 | modeList[2] = HOR_IDX; |
914 | 0 | modeList[3] = DC_IDX; |
915 | 0 | modeList[4] = DM_CHROMA_IDX; |
916 | |
|
917 | 0 | uint32_t lumaMode = m_lumaIntraDir[absPartIdx]; |
918 | |
|
919 | 0 | for (int i = 0; i < NUM_CHROMA_MODE - 1; i++) |
920 | 0 | { |
921 | 0 | if (lumaMode == modeList[i]) |
922 | 0 | { |
923 | 0 | modeList[i] = 34; // VER+8 mode |
924 | 0 | break; |
925 | 0 | } |
926 | 0 | } |
927 | 0 | } |
928 | | |
929 | | /* Get most probable intra modes */ |
930 | | int CUData::getIntraDirLumaPredictor(uint32_t absPartIdx, uint32_t* intraDirPred) const |
931 | 0 | { |
932 | 0 | const CUData* tempCU; |
933 | 0 | uint32_t tempPartIdx; |
934 | 0 | uint32_t leftIntraDir, aboveIntraDir; |
935 | | |
936 | | // Get intra direction of left PU |
937 | 0 | tempCU = getPULeft(tempPartIdx, m_absIdxInCTU + absPartIdx); |
938 | |
|
939 | 0 | leftIntraDir = (tempCU && tempCU->isIntra(tempPartIdx)) ? tempCU->m_lumaIntraDir[tempPartIdx] : DC_IDX; |
940 | | |
941 | | // Get intra direction of above PU |
942 | 0 | tempCU = g_zscanToPelY[m_absIdxInCTU + absPartIdx] > 0 ? getPUAbove(tempPartIdx, m_absIdxInCTU + absPartIdx) : NULL; |
943 | |
|
944 | 0 | aboveIntraDir = (tempCU && tempCU->isIntra(tempPartIdx)) ? tempCU->m_lumaIntraDir[tempPartIdx] : DC_IDX; |
945 | |
|
946 | 0 | if (leftIntraDir == aboveIntraDir) |
947 | 0 | { |
948 | 0 | if (leftIntraDir >= 2) // angular modes |
949 | 0 | { |
950 | 0 | intraDirPred[0] = leftIntraDir; |
951 | 0 | intraDirPred[1] = ((leftIntraDir - 2 + 31) & 31) + 2; |
952 | 0 | intraDirPred[2] = ((leftIntraDir - 2 + 1) & 31) + 2; |
953 | 0 | } |
954 | 0 | else //non-angular |
955 | 0 | { |
956 | 0 | intraDirPred[0] = PLANAR_IDX; |
957 | 0 | intraDirPred[1] = DC_IDX; |
958 | 0 | intraDirPred[2] = VER_IDX; |
959 | 0 | } |
960 | 0 | return 1; |
961 | 0 | } |
962 | 0 | else |
963 | 0 | { |
964 | 0 | intraDirPred[0] = leftIntraDir; |
965 | 0 | intraDirPred[1] = aboveIntraDir; |
966 | |
|
967 | 0 | if (leftIntraDir && aboveIntraDir) //both modes are non-planar |
968 | 0 | intraDirPred[2] = PLANAR_IDX; |
969 | 0 | else |
970 | 0 | intraDirPred[2] = (leftIntraDir + aboveIntraDir) < 2 ? VER_IDX : DC_IDX; |
971 | 0 | return 2; |
972 | 0 | } |
973 | 0 | } |
974 | | |
975 | | uint32_t CUData::getCtxSplitFlag(uint32_t absPartIdx, uint32_t depth) const |
976 | 0 | { |
977 | 0 | const CUData* tempCU; |
978 | 0 | uint32_t tempPartIdx; |
979 | 0 | uint32_t ctx; |
980 | | |
981 | | // Get left split flag |
982 | 0 | tempCU = getPULeft(tempPartIdx, m_absIdxInCTU + absPartIdx); |
983 | 0 | ctx = (tempCU) ? ((tempCU->m_cuDepth[tempPartIdx] > depth) ? 1 : 0) : 0; |
984 | | |
985 | | // Get above split flag |
986 | 0 | tempCU = getPUAbove(tempPartIdx, m_absIdxInCTU + absPartIdx); |
987 | 0 | ctx += (tempCU) ? ((tempCU->m_cuDepth[tempPartIdx] > depth) ? 1 : 0) : 0; |
988 | |
|
989 | 0 | return ctx; |
990 | 0 | } |
991 | | |
992 | | void CUData::getIntraTUQtDepthRange(uint32_t tuDepthRange[2], uint32_t absPartIdx) const |
993 | 0 | { |
994 | 0 | uint32_t log2CUSize = m_log2CUSize[absPartIdx]; |
995 | 0 | uint32_t splitFlag = m_partSize[absPartIdx] != SIZE_2Nx2N; |
996 | |
|
997 | 0 | tuDepthRange[0] = m_slice->m_sps->quadtreeTULog2MinSize; |
998 | 0 | tuDepthRange[1] = m_slice->m_sps->quadtreeTULog2MaxSize; |
999 | |
|
1000 | 0 | tuDepthRange[0] = x265_clip3(tuDepthRange[0], tuDepthRange[1], log2CUSize - (m_slice->m_sps->quadtreeTUMaxDepthIntra - 1 + splitFlag)); |
1001 | 0 | } |
1002 | | |
1003 | | void CUData::getInterTUQtDepthRange(uint32_t tuDepthRange[2], uint32_t absPartIdx) const |
1004 | 0 | { |
1005 | 0 | uint32_t log2CUSize = m_log2CUSize[absPartIdx]; |
1006 | 0 | uint32_t quadtreeTUMaxDepth = m_slice->m_sps->quadtreeTUMaxDepthInter; |
1007 | 0 | uint32_t splitFlag = quadtreeTUMaxDepth == 1 && m_partSize[absPartIdx] != SIZE_2Nx2N; |
1008 | |
|
1009 | 0 | tuDepthRange[0] = m_slice->m_sps->quadtreeTULog2MinSize; |
1010 | 0 | tuDepthRange[1] = m_slice->m_sps->quadtreeTULog2MaxSize; |
1011 | |
|
1012 | 0 | tuDepthRange[0] = x265_clip3(tuDepthRange[0], tuDepthRange[1], log2CUSize - (quadtreeTUMaxDepth - 1 + splitFlag)); |
1013 | 0 | } |
1014 | | |
1015 | | uint32_t CUData::getCtxSkipFlag(uint32_t absPartIdx) const |
1016 | 0 | { |
1017 | 0 | const CUData* tempCU; |
1018 | 0 | uint32_t tempPartIdx; |
1019 | 0 | uint32_t ctx; |
1020 | | |
1021 | | // Get BCBP of left PU |
1022 | 0 | tempCU = getPULeft(tempPartIdx, m_absIdxInCTU + absPartIdx); |
1023 | 0 | ctx = tempCU ? tempCU->isSkipped(tempPartIdx) : 0; |
1024 | | |
1025 | | // Get BCBP of above PU |
1026 | 0 | tempCU = getPUAbove(tempPartIdx, m_absIdxInCTU + absPartIdx); |
1027 | 0 | ctx += tempCU ? tempCU->isSkipped(tempPartIdx) : 0; |
1028 | |
|
1029 | 0 | return ctx; |
1030 | 0 | } |
1031 | | |
1032 | | bool CUData::setQPSubCUs(int8_t qp, uint32_t absPartIdx, uint32_t depth) |
1033 | 0 | { |
1034 | 0 | uint32_t curPartNumb = m_encData->m_param->num4x4Partitions >> (depth << 1); |
1035 | 0 | uint32_t curPartNumQ = curPartNumb >> 2; |
1036 | |
|
1037 | 0 | if (m_cuDepth[absPartIdx] > depth) |
1038 | 0 | { |
1039 | 0 | for (uint32_t subPartIdx = 0; subPartIdx < 4; subPartIdx++) |
1040 | 0 | if (setQPSubCUs(qp, absPartIdx + subPartIdx * curPartNumQ, depth + 1)) |
1041 | 0 | return true; |
1042 | 0 | } |
1043 | 0 | else |
1044 | 0 | { |
1045 | 0 | if (getQtRootCbf(absPartIdx)) |
1046 | 0 | return true; |
1047 | 0 | else |
1048 | 0 | setQPSubParts(qp, absPartIdx, depth); |
1049 | 0 | } |
1050 | | |
1051 | 0 | return false; |
1052 | 0 | } |
1053 | | |
1054 | | void CUData::setPUInterDir(uint8_t dir, uint32_t absPartIdx, uint32_t puIdx) |
1055 | 0 | { |
1056 | 0 | uint32_t curPartNumQ = m_numPartitions >> 2; |
1057 | 0 | X265_CHECK(puIdx < 2, "unexpected part unit index\n"); |
1058 | |
|
1059 | 0 | switch (m_partSize[absPartIdx]) |
1060 | 0 | { |
1061 | 0 | case SIZE_2Nx2N: |
1062 | 0 | memset(m_interDir + absPartIdx, dir, 4 * curPartNumQ); |
1063 | 0 | break; |
1064 | 0 | case SIZE_2NxN: |
1065 | 0 | memset(m_interDir + absPartIdx, dir, 2 * curPartNumQ); |
1066 | 0 | break; |
1067 | 0 | case SIZE_Nx2N: |
1068 | 0 | memset(m_interDir + absPartIdx, dir, curPartNumQ); |
1069 | 0 | memset(m_interDir + absPartIdx + 2 * curPartNumQ, dir, curPartNumQ); |
1070 | 0 | break; |
1071 | 0 | case SIZE_NxN: |
1072 | 0 | memset(m_interDir + absPartIdx, dir, curPartNumQ); |
1073 | 0 | break; |
1074 | 0 | case SIZE_2NxnU: |
1075 | 0 | if (!puIdx) |
1076 | 0 | { |
1077 | 0 | memset(m_interDir + absPartIdx, dir, (curPartNumQ >> 1)); |
1078 | 0 | memset(m_interDir + absPartIdx + curPartNumQ, dir, (curPartNumQ >> 1)); |
1079 | 0 | } |
1080 | 0 | else |
1081 | 0 | { |
1082 | 0 | memset(m_interDir + absPartIdx, dir, (curPartNumQ >> 1)); |
1083 | 0 | memset(m_interDir + absPartIdx + curPartNumQ, dir, ((curPartNumQ >> 1) + (curPartNumQ << 1))); |
1084 | 0 | } |
1085 | 0 | break; |
1086 | 0 | case SIZE_2NxnD: |
1087 | 0 | if (!puIdx) |
1088 | 0 | { |
1089 | 0 | memset(m_interDir + absPartIdx, dir, ((curPartNumQ << 1) + (curPartNumQ >> 1))); |
1090 | 0 | memset(m_interDir + absPartIdx + (curPartNumQ << 1) + curPartNumQ, dir, (curPartNumQ >> 1)); |
1091 | 0 | } |
1092 | 0 | else |
1093 | 0 | { |
1094 | 0 | memset(m_interDir + absPartIdx, dir, (curPartNumQ >> 1)); |
1095 | 0 | memset(m_interDir + absPartIdx + curPartNumQ, dir, (curPartNumQ >> 1)); |
1096 | 0 | } |
1097 | 0 | break; |
1098 | 0 | case SIZE_nLx2N: |
1099 | 0 | if (!puIdx) |
1100 | 0 | { |
1101 | 0 | memset(m_interDir + absPartIdx, dir, (curPartNumQ >> 2)); |
1102 | 0 | memset(m_interDir + absPartIdx + (curPartNumQ >> 1), dir, (curPartNumQ >> 2)); |
1103 | 0 | memset(m_interDir + absPartIdx + (curPartNumQ << 1), dir, (curPartNumQ >> 2)); |
1104 | 0 | memset(m_interDir + absPartIdx + (curPartNumQ << 1) + (curPartNumQ >> 1), dir, (curPartNumQ >> 2)); |
1105 | 0 | } |
1106 | 0 | else |
1107 | 0 | { |
1108 | 0 | memset(m_interDir + absPartIdx, dir, (curPartNumQ >> 2)); |
1109 | 0 | memset(m_interDir + absPartIdx + (curPartNumQ >> 1), dir, (curPartNumQ + (curPartNumQ >> 2))); |
1110 | 0 | memset(m_interDir + absPartIdx + (curPartNumQ << 1), dir, (curPartNumQ >> 2)); |
1111 | 0 | memset(m_interDir + absPartIdx + (curPartNumQ << 1) + (curPartNumQ >> 1), dir, (curPartNumQ + (curPartNumQ >> 2))); |
1112 | 0 | } |
1113 | 0 | break; |
1114 | 0 | case SIZE_nRx2N: |
1115 | 0 | if (!puIdx) |
1116 | 0 | { |
1117 | 0 | memset(m_interDir + absPartIdx, dir, (curPartNumQ + (curPartNumQ >> 2))); |
1118 | 0 | memset(m_interDir + absPartIdx + curPartNumQ + (curPartNumQ >> 1), dir, (curPartNumQ >> 2)); |
1119 | 0 | memset(m_interDir + absPartIdx + (curPartNumQ << 1), dir, (curPartNumQ + (curPartNumQ >> 2))); |
1120 | 0 | memset(m_interDir + absPartIdx + (curPartNumQ << 1) + curPartNumQ + (curPartNumQ >> 1), dir, (curPartNumQ >> 2)); |
1121 | 0 | } |
1122 | 0 | else |
1123 | 0 | { |
1124 | 0 | memset(m_interDir + absPartIdx, dir, (curPartNumQ >> 2)); |
1125 | 0 | memset(m_interDir + absPartIdx + (curPartNumQ >> 1), dir, (curPartNumQ >> 2)); |
1126 | 0 | memset(m_interDir + absPartIdx + (curPartNumQ << 1), dir, (curPartNumQ >> 2)); |
1127 | 0 | memset(m_interDir + absPartIdx + (curPartNumQ << 1) + (curPartNumQ >> 1), dir, (curPartNumQ >> 2)); |
1128 | 0 | } |
1129 | 0 | break; |
1130 | 0 | default: |
1131 | 0 | X265_CHECK(0, "unexpected part type\n"); |
1132 | 0 | break; |
1133 | 0 | } |
1134 | 0 | } |
1135 | | |
1136 | | template<typename T> |
1137 | | void CUData::setAllPU(T* p, const T& val, int absPartIdx, int puIdx) |
1138 | 0 | { |
1139 | 0 | int i; |
1140 | |
|
1141 | 0 | p += absPartIdx; |
1142 | 0 | int numElements = m_numPartitions; |
1143 | |
|
1144 | 0 | switch (m_partSize[absPartIdx]) |
1145 | 0 | { |
1146 | 0 | case SIZE_2Nx2N: |
1147 | 0 | for (i = 0; i < numElements; i++) |
1148 | 0 | p[i] = val; |
1149 | 0 | break; |
1150 | | |
1151 | 0 | case SIZE_2NxN: |
1152 | 0 | numElements >>= 1; |
1153 | 0 | for (i = 0; i < numElements; i++) |
1154 | 0 | p[i] = val; |
1155 | 0 | break; |
1156 | | |
1157 | 0 | case SIZE_Nx2N: |
1158 | 0 | numElements >>= 2; |
1159 | 0 | for (i = 0; i < numElements; i++) |
1160 | 0 | { |
1161 | 0 | p[i] = val; |
1162 | 0 | p[i + 2 * numElements] = val; |
1163 | 0 | } |
1164 | 0 | break; |
1165 | | |
1166 | 0 | case SIZE_2NxnU: |
1167 | 0 | { |
1168 | 0 | int curPartNumQ = numElements >> 2; |
1169 | 0 | if (!puIdx) |
1170 | 0 | { |
1171 | 0 | T *pT = p; |
1172 | 0 | T *pT2 = p + curPartNumQ; |
1173 | 0 | for (i = 0; i < (curPartNumQ >> 1); i++) |
1174 | 0 | { |
1175 | 0 | pT[i] = val; |
1176 | 0 | pT2[i] = val; |
1177 | 0 | } |
1178 | 0 | } |
1179 | 0 | else |
1180 | 0 | { |
1181 | 0 | T *pT = p; |
1182 | 0 | for (i = 0; i < (curPartNumQ >> 1); i++) |
1183 | 0 | pT[i] = val; |
1184 | |
|
1185 | 0 | pT = p + curPartNumQ; |
1186 | 0 | for (i = 0; i < ((curPartNumQ >> 1) + (curPartNumQ << 1)); i++) |
1187 | 0 | pT[i] = val; |
1188 | 0 | } |
1189 | 0 | break; |
1190 | 0 | } |
1191 | | |
1192 | 0 | case SIZE_2NxnD: |
1193 | 0 | { |
1194 | 0 | int curPartNumQ = numElements >> 2; |
1195 | 0 | if (!puIdx) |
1196 | 0 | { |
1197 | 0 | T *pT = p; |
1198 | 0 | for (i = 0; i < ((curPartNumQ >> 1) + (curPartNumQ << 1)); i++) |
1199 | 0 | pT[i] = val; |
1200 | |
|
1201 | 0 | pT = p + (numElements - curPartNumQ); |
1202 | 0 | for (i = 0; i < (curPartNumQ >> 1); i++) |
1203 | 0 | pT[i] = val; |
1204 | 0 | } |
1205 | 0 | else |
1206 | 0 | { |
1207 | 0 | T *pT = p; |
1208 | 0 | T *pT2 = p + curPartNumQ; |
1209 | 0 | for (i = 0; i < (curPartNumQ >> 1); i++) |
1210 | 0 | { |
1211 | 0 | pT[i] = val; |
1212 | 0 | pT2[i] = val; |
1213 | 0 | } |
1214 | 0 | } |
1215 | 0 | break; |
1216 | 0 | } |
1217 | | |
1218 | 0 | case SIZE_nLx2N: |
1219 | 0 | { |
1220 | 0 | int curPartNumQ = numElements >> 2; |
1221 | 0 | if (!puIdx) |
1222 | 0 | { |
1223 | 0 | T *pT = p; |
1224 | 0 | T *pT2 = p + (curPartNumQ << 1); |
1225 | 0 | T *pT3 = p + (curPartNumQ >> 1); |
1226 | 0 | T *pT4 = p + (curPartNumQ << 1) + (curPartNumQ >> 1); |
1227 | |
|
1228 | 0 | for (i = 0; i < (curPartNumQ >> 2); i++) |
1229 | 0 | { |
1230 | 0 | pT[i] = val; |
1231 | 0 | pT2[i] = val; |
1232 | 0 | pT3[i] = val; |
1233 | 0 | pT4[i] = val; |
1234 | 0 | } |
1235 | 0 | } |
1236 | 0 | else |
1237 | 0 | { |
1238 | 0 | T *pT = p; |
1239 | 0 | T *pT2 = p + (curPartNumQ << 1); |
1240 | 0 | for (i = 0; i < (curPartNumQ >> 2); i++) |
1241 | 0 | { |
1242 | 0 | pT[i] = val; |
1243 | 0 | pT2[i] = val; |
1244 | 0 | } |
1245 | |
|
1246 | 0 | pT = p + (curPartNumQ >> 1); |
1247 | 0 | pT2 = p + (curPartNumQ << 1) + (curPartNumQ >> 1); |
1248 | 0 | for (i = 0; i < ((curPartNumQ >> 2) + curPartNumQ); i++) |
1249 | 0 | { |
1250 | 0 | pT[i] = val; |
1251 | 0 | pT2[i] = val; |
1252 | 0 | } |
1253 | 0 | } |
1254 | 0 | break; |
1255 | 0 | } |
1256 | | |
1257 | 0 | case SIZE_nRx2N: |
1258 | 0 | { |
1259 | 0 | int curPartNumQ = numElements >> 2; |
1260 | 0 | if (!puIdx) |
1261 | 0 | { |
1262 | 0 | T *pT = p; |
1263 | 0 | T *pT2 = p + (curPartNumQ << 1); |
1264 | 0 | for (i = 0; i < ((curPartNumQ >> 2) + curPartNumQ); i++) |
1265 | 0 | { |
1266 | 0 | pT[i] = val; |
1267 | 0 | pT2[i] = val; |
1268 | 0 | } |
1269 | |
|
1270 | 0 | pT = p + curPartNumQ + (curPartNumQ >> 1); |
1271 | 0 | pT2 = p + numElements - curPartNumQ + (curPartNumQ >> 1); |
1272 | 0 | for (i = 0; i < (curPartNumQ >> 2); i++) |
1273 | 0 | { |
1274 | 0 | pT[i] = val; |
1275 | 0 | pT2[i] = val; |
1276 | 0 | } |
1277 | 0 | } |
1278 | 0 | else |
1279 | 0 | { |
1280 | 0 | T *pT = p; |
1281 | 0 | T *pT2 = p + (curPartNumQ >> 1); |
1282 | 0 | T *pT3 = p + (curPartNumQ << 1); |
1283 | 0 | T *pT4 = p + (curPartNumQ << 1) + (curPartNumQ >> 1); |
1284 | 0 | for (i = 0; i < (curPartNumQ >> 2); i++) |
1285 | 0 | { |
1286 | 0 | pT[i] = val; |
1287 | 0 | pT2[i] = val; |
1288 | 0 | pT3[i] = val; |
1289 | 0 | pT4[i] = val; |
1290 | 0 | } |
1291 | 0 | } |
1292 | 0 | break; |
1293 | 0 | } |
1294 | | |
1295 | 0 | case SIZE_NxN: |
1296 | 0 | default: |
1297 | 0 | X265_CHECK(0, "unknown partition type\n"); |
1298 | 0 | break; |
1299 | 0 | } |
1300 | 0 | } Unexecuted instantiation: void x265::CUData::setAllPU<x265::MV>(x265::MV*, x265::MV const&, int, int) Unexecuted instantiation: void x265::CUData::setAllPU<signed char>(signed char*, signed char const&, int, int) |
1301 | | |
1302 | | void CUData::setPUMv(int list, const MV& mv, int absPartIdx, int puIdx) |
1303 | 0 | { |
1304 | 0 | setAllPU(m_mv[list], mv, absPartIdx, puIdx); |
1305 | 0 | } |
1306 | | |
1307 | | void CUData::setPURefIdx(int list, int8_t refIdx, int absPartIdx, int puIdx) |
1308 | 0 | { |
1309 | 0 | setAllPU(m_refIdx[list], refIdx, absPartIdx, puIdx); |
1310 | 0 | } |
1311 | | |
1312 | | void CUData::getPartIndexAndSize(uint32_t partIdx, uint32_t& outPartAddr, int& outWidth, int& outHeight) const |
1313 | 0 | { |
1314 | 0 | int cuSize = 1 << m_log2CUSize[0]; |
1315 | 0 | int partType = m_partSize[0]; |
1316 | |
|
1317 | 0 | int tmp = partTable[partType][partIdx][0]; |
1318 | 0 | outWidth = ((tmp >> 4) * cuSize) >> 2; |
1319 | 0 | outHeight = ((tmp & 0xF) * cuSize) >> 2; |
1320 | 0 | outPartAddr = (partAddrTable[partType][partIdx] * m_numPartitions) >> 4; |
1321 | 0 | } |
1322 | | |
1323 | | void CUData::getMvField(const CUData* cu, uint32_t absPartIdx, int picList, MVField& outMvField) const |
1324 | 0 | { |
1325 | 0 | if (cu) |
1326 | 0 | { |
1327 | 0 | outMvField.mv = cu->m_mv[picList][absPartIdx]; |
1328 | 0 | outMvField.refIdx = cu->m_refIdx[picList][absPartIdx]; |
1329 | 0 | } |
1330 | 0 | else |
1331 | 0 | { |
1332 | | // OUT OF BOUNDARY |
1333 | 0 | outMvField.mv = 0; |
1334 | 0 | outMvField.refIdx = REF_NOT_VALID; |
1335 | 0 | } |
1336 | 0 | } |
1337 | | |
1338 | | void CUData::deriveLeftRightTopIdx(uint32_t partIdx, uint32_t& partIdxLT, uint32_t& partIdxRT) const |
1339 | 0 | { |
1340 | 0 | partIdxLT = m_absIdxInCTU; |
1341 | 0 | partIdxRT = g_rasterToZscan[g_zscanToRaster[partIdxLT] + (1 << (m_log2CUSize[0] - LOG2_UNIT_SIZE)) - 1]; |
1342 | |
|
1343 | 0 | switch (m_partSize[0]) |
1344 | 0 | { |
1345 | 0 | case SIZE_2Nx2N: break; |
1346 | 0 | case SIZE_2NxN: |
1347 | 0 | partIdxLT += (partIdx == 0) ? 0 : m_numPartitions >> 1; |
1348 | 0 | partIdxRT += (partIdx == 0) ? 0 : m_numPartitions >> 1; |
1349 | 0 | break; |
1350 | 0 | case SIZE_Nx2N: |
1351 | 0 | partIdxLT += (partIdx == 0) ? 0 : m_numPartitions >> 2; |
1352 | 0 | partIdxRT -= (partIdx == 1) ? 0 : m_numPartitions >> 2; |
1353 | 0 | break; |
1354 | 0 | case SIZE_NxN: |
1355 | 0 | partIdxLT += (m_numPartitions >> 2) * partIdx; |
1356 | 0 | partIdxRT += (m_numPartitions >> 2) * (partIdx - 1); |
1357 | 0 | break; |
1358 | 0 | case SIZE_2NxnU: |
1359 | 0 | partIdxLT += (partIdx == 0) ? 0 : m_numPartitions >> 3; |
1360 | 0 | partIdxRT += (partIdx == 0) ? 0 : m_numPartitions >> 3; |
1361 | 0 | break; |
1362 | 0 | case SIZE_2NxnD: |
1363 | 0 | partIdxLT += (partIdx == 0) ? 0 : (m_numPartitions >> 1) + (m_numPartitions >> 3); |
1364 | 0 | partIdxRT += (partIdx == 0) ? 0 : (m_numPartitions >> 1) + (m_numPartitions >> 3); |
1365 | 0 | break; |
1366 | 0 | case SIZE_nLx2N: |
1367 | 0 | partIdxLT += (partIdx == 0) ? 0 : m_numPartitions >> 4; |
1368 | 0 | partIdxRT -= (partIdx == 1) ? 0 : (m_numPartitions >> 2) + (m_numPartitions >> 4); |
1369 | 0 | break; |
1370 | 0 | case SIZE_nRx2N: |
1371 | 0 | partIdxLT += (partIdx == 0) ? 0 : (m_numPartitions >> 2) + (m_numPartitions >> 4); |
1372 | 0 | partIdxRT -= (partIdx == 1) ? 0 : m_numPartitions >> 4; |
1373 | 0 | break; |
1374 | 0 | default: |
1375 | 0 | X265_CHECK(0, "unexpected part index\n"); |
1376 | 0 | break; |
1377 | 0 | } |
1378 | 0 | } |
1379 | | |
1380 | | uint32_t CUData::deriveLeftBottomIdx(uint32_t puIdx) const |
1381 | 0 | { |
1382 | 0 | uint32_t outPartIdxLB; |
1383 | 0 | outPartIdxLB = g_rasterToZscan[g_zscanToRaster[m_absIdxInCTU] + (((1 << (m_log2CUSize[0] - LOG2_UNIT_SIZE - 1)) - 1) << LOG2_RASTER_SIZE)]; |
1384 | |
|
1385 | 0 | switch (m_partSize[0]) |
1386 | 0 | { |
1387 | 0 | case SIZE_2Nx2N: |
1388 | 0 | outPartIdxLB += m_numPartitions >> 1; |
1389 | 0 | break; |
1390 | 0 | case SIZE_2NxN: |
1391 | 0 | outPartIdxLB += puIdx ? m_numPartitions >> 1 : 0; |
1392 | 0 | break; |
1393 | 0 | case SIZE_Nx2N: |
1394 | 0 | outPartIdxLB += puIdx ? (m_numPartitions >> 2) * 3 : m_numPartitions >> 1; |
1395 | 0 | break; |
1396 | 0 | case SIZE_NxN: |
1397 | 0 | outPartIdxLB += (m_numPartitions >> 2) * puIdx; |
1398 | 0 | break; |
1399 | 0 | case SIZE_2NxnU: |
1400 | 0 | outPartIdxLB += puIdx ? m_numPartitions >> 1 : -((int)m_numPartitions >> 3); |
1401 | 0 | break; |
1402 | 0 | case SIZE_2NxnD: |
1403 | 0 | outPartIdxLB += puIdx ? m_numPartitions >> 1 : (m_numPartitions >> 2) + (m_numPartitions >> 3); |
1404 | 0 | break; |
1405 | 0 | case SIZE_nLx2N: |
1406 | 0 | outPartIdxLB += puIdx ? (m_numPartitions >> 1) + (m_numPartitions >> 4) : m_numPartitions >> 1; |
1407 | 0 | break; |
1408 | 0 | case SIZE_nRx2N: |
1409 | 0 | outPartIdxLB += puIdx ? (m_numPartitions >> 1) + (m_numPartitions >> 2) + (m_numPartitions >> 4) : m_numPartitions >> 1; |
1410 | 0 | break; |
1411 | 0 | default: |
1412 | 0 | X265_CHECK(0, "unexpected part index\n"); |
1413 | 0 | break; |
1414 | 0 | } |
1415 | 0 | return outPartIdxLB; |
1416 | 0 | } |
1417 | | |
1418 | | /* Derives the partition index of neighboring bottom right block */ |
1419 | | uint32_t CUData::deriveRightBottomIdx(uint32_t puIdx) const |
1420 | 0 | { |
1421 | 0 | uint32_t outPartIdxRB; |
1422 | 0 | outPartIdxRB = g_rasterToZscan[g_zscanToRaster[m_absIdxInCTU] + |
1423 | 0 | (((1 << (m_log2CUSize[0] - LOG2_UNIT_SIZE - 1)) - 1) << LOG2_RASTER_SIZE) + |
1424 | 0 | (1 << (m_log2CUSize[0] - LOG2_UNIT_SIZE)) - 1]; |
1425 | |
|
1426 | 0 | switch (m_partSize[0]) |
1427 | 0 | { |
1428 | 0 | case SIZE_2Nx2N: |
1429 | 0 | outPartIdxRB += m_numPartitions >> 1; |
1430 | 0 | break; |
1431 | 0 | case SIZE_2NxN: |
1432 | 0 | outPartIdxRB += puIdx ? m_numPartitions >> 1 : 0; |
1433 | 0 | break; |
1434 | 0 | case SIZE_Nx2N: |
1435 | 0 | outPartIdxRB += puIdx ? m_numPartitions >> 1 : m_numPartitions >> 2; |
1436 | 0 | break; |
1437 | 0 | case SIZE_NxN: |
1438 | 0 | outPartIdxRB += (m_numPartitions >> 2) * (puIdx - 1); |
1439 | 0 | break; |
1440 | 0 | case SIZE_2NxnU: |
1441 | 0 | outPartIdxRB += puIdx ? m_numPartitions >> 1 : -((int)m_numPartitions >> 3); |
1442 | 0 | break; |
1443 | 0 | case SIZE_2NxnD: |
1444 | 0 | outPartIdxRB += puIdx ? m_numPartitions >> 1 : (m_numPartitions >> 2) + (m_numPartitions >> 3); |
1445 | 0 | break; |
1446 | 0 | case SIZE_nLx2N: |
1447 | 0 | outPartIdxRB += puIdx ? m_numPartitions >> 1 : (m_numPartitions >> 3) + (m_numPartitions >> 4); |
1448 | 0 | break; |
1449 | 0 | case SIZE_nRx2N: |
1450 | 0 | outPartIdxRB += puIdx ? m_numPartitions >> 1 : (m_numPartitions >> 2) + (m_numPartitions >> 3) + (m_numPartitions >> 4); |
1451 | 0 | break; |
1452 | 0 | default: |
1453 | 0 | X265_CHECK(0, "unexpected part index\n"); |
1454 | 0 | break; |
1455 | 0 | } |
1456 | 0 | return outPartIdxRB; |
1457 | 0 | } |
1458 | | |
1459 | | bool CUData::hasEqualMotion(uint32_t absPartIdx, const CUData& candCU, uint32_t candAbsPartIdx) const |
1460 | 0 | { |
1461 | 0 | if (m_interDir[absPartIdx] != candCU.m_interDir[candAbsPartIdx]) |
1462 | 0 | return false; |
1463 | | |
1464 | 0 | for (uint32_t refListIdx = 0; refListIdx < 2; refListIdx++) |
1465 | 0 | { |
1466 | 0 | if (m_interDir[absPartIdx] & (1 << refListIdx)) |
1467 | 0 | { |
1468 | 0 | if (m_mv[refListIdx][absPartIdx] != candCU.m_mv[refListIdx][candAbsPartIdx] || |
1469 | 0 | m_refIdx[refListIdx][absPartIdx] != candCU.m_refIdx[refListIdx][candAbsPartIdx]) |
1470 | 0 | return false; |
1471 | 0 | } |
1472 | 0 | } |
1473 | | |
1474 | 0 | return true; |
1475 | 0 | } |
1476 | | |
1477 | | /* Construct list of merging candidates, returns count */ |
1478 | | uint32_t CUData::getInterMergeCandidates(uint32_t absPartIdx, uint32_t puIdx, MVField(*candMvField)[2], uint8_t* candDir) const |
1479 | 0 | { |
1480 | 0 | uint32_t absPartAddr = m_absIdxInCTU + absPartIdx; |
1481 | 0 | const bool isInterB = m_slice->isInterB(); |
1482 | |
|
1483 | 0 | const uint32_t maxNumMergeCand = m_slice->m_maxNumMergeCand; |
1484 | |
|
1485 | 0 | for (uint32_t i = 0; i < maxNumMergeCand; ++i) |
1486 | 0 | { |
1487 | 0 | candMvField[i][0].mv = 0; |
1488 | 0 | candMvField[i][1].mv = 0; |
1489 | 0 | candMvField[i][0].refIdx = REF_NOT_VALID; |
1490 | 0 | candMvField[i][1].refIdx = REF_NOT_VALID; |
1491 | 0 | } |
1492 | | |
1493 | | /* calculate the location of upper-left corner pixel and size of the current PU */ |
1494 | 0 | int xP, yP, nPSW, nPSH; |
1495 | |
|
1496 | 0 | int cuSize = 1 << m_log2CUSize[0]; |
1497 | 0 | int partMode = m_partSize[0]; |
1498 | |
|
1499 | 0 | int tmp = partTable[partMode][puIdx][0]; |
1500 | 0 | nPSW = ((tmp >> 4) * cuSize) >> 2; |
1501 | 0 | nPSH = ((tmp & 0xF) * cuSize) >> 2; |
1502 | |
|
1503 | 0 | tmp = partTable[partMode][puIdx][1]; |
1504 | 0 | xP = ((tmp >> 4) * cuSize) >> 2; |
1505 | 0 | yP = ((tmp & 0xF) * cuSize) >> 2; |
1506 | |
|
1507 | 0 | uint32_t count = 0; |
1508 | |
|
1509 | 0 | uint32_t partIdxLT, partIdxRT, partIdxLB = deriveLeftBottomIdx(puIdx); |
1510 | 0 | PartSize curPS = (PartSize)m_partSize[absPartIdx]; |
1511 | | |
1512 | | // left |
1513 | 0 | uint32_t leftPartIdx = 0; |
1514 | 0 | const CUData* cuLeft = getPULeft(leftPartIdx, partIdxLB); |
1515 | 0 | bool isAvailableA1 = cuLeft && |
1516 | 0 | cuLeft->isDiffMER(xP - 1, yP + nPSH - 1, xP, yP) && |
1517 | 0 | !(puIdx == 1 && (curPS == SIZE_Nx2N || curPS == SIZE_nLx2N || curPS == SIZE_nRx2N)) && |
1518 | 0 | cuLeft->isInter(leftPartIdx); |
1519 | 0 | if (isAvailableA1) |
1520 | 0 | { |
1521 | | // get Inter Dir |
1522 | 0 | candDir[count] = cuLeft->m_interDir[leftPartIdx]; |
1523 | | // get Mv from Left |
1524 | 0 | cuLeft->getMvField(cuLeft, leftPartIdx, 0, candMvField[count][0]); |
1525 | 0 | if (isInterB) |
1526 | 0 | cuLeft->getMvField(cuLeft, leftPartIdx, 1, candMvField[count][1]); |
1527 | |
|
1528 | 0 | if (++count == maxNumMergeCand) |
1529 | 0 | return maxNumMergeCand; |
1530 | 0 | } |
1531 | | |
1532 | 0 | deriveLeftRightTopIdx(puIdx, partIdxLT, partIdxRT); |
1533 | | |
1534 | | // above |
1535 | 0 | uint32_t abovePartIdx = 0; |
1536 | 0 | const CUData* cuAbove = getPUAbove(abovePartIdx, partIdxRT); |
1537 | 0 | bool isAvailableB1 = cuAbove && |
1538 | 0 | cuAbove->isDiffMER(xP + nPSW - 1, yP - 1, xP, yP) && |
1539 | 0 | !(puIdx == 1 && (curPS == SIZE_2NxN || curPS == SIZE_2NxnU || curPS == SIZE_2NxnD)) && |
1540 | 0 | cuAbove->isInter(abovePartIdx); |
1541 | 0 | if (isAvailableB1 && (!isAvailableA1 || !cuLeft->hasEqualMotion(leftPartIdx, *cuAbove, abovePartIdx))) |
1542 | 0 | { |
1543 | | // get Inter Dir |
1544 | 0 | candDir[count] = cuAbove->m_interDir[abovePartIdx]; |
1545 | | // get Mv from Left |
1546 | 0 | cuAbove->getMvField(cuAbove, abovePartIdx, 0, candMvField[count][0]); |
1547 | 0 | if (isInterB) |
1548 | 0 | cuAbove->getMvField(cuAbove, abovePartIdx, 1, candMvField[count][1]); |
1549 | |
|
1550 | 0 | if (++count == maxNumMergeCand) |
1551 | 0 | return maxNumMergeCand; |
1552 | 0 | } |
1553 | | |
1554 | | // above right |
1555 | 0 | uint32_t aboveRightPartIdx = 0; |
1556 | 0 | const CUData* cuAboveRight = getPUAboveRight(aboveRightPartIdx, partIdxRT); |
1557 | 0 | bool isAvailableB0 = cuAboveRight && |
1558 | 0 | cuAboveRight->isDiffMER(xP + nPSW, yP - 1, xP, yP) && |
1559 | 0 | cuAboveRight->isInter(aboveRightPartIdx); |
1560 | 0 | if (isAvailableB0 && (!isAvailableB1 || !cuAbove->hasEqualMotion(abovePartIdx, *cuAboveRight, aboveRightPartIdx))) |
1561 | 0 | { |
1562 | | // get Inter Dir |
1563 | 0 | candDir[count] = cuAboveRight->m_interDir[aboveRightPartIdx]; |
1564 | | // get Mv from Left |
1565 | 0 | cuAboveRight->getMvField(cuAboveRight, aboveRightPartIdx, 0, candMvField[count][0]); |
1566 | 0 | if (isInterB) |
1567 | 0 | cuAboveRight->getMvField(cuAboveRight, aboveRightPartIdx, 1, candMvField[count][1]); |
1568 | |
|
1569 | 0 | if (++count == maxNumMergeCand) |
1570 | 0 | return maxNumMergeCand; |
1571 | 0 | } |
1572 | | |
1573 | | // left bottom |
1574 | 0 | uint32_t leftBottomPartIdx = 0; |
1575 | 0 | const CUData* cuLeftBottom = this->getPUBelowLeft(leftBottomPartIdx, partIdxLB); |
1576 | 0 | bool isAvailableA0 = cuLeftBottom && |
1577 | 0 | cuLeftBottom->isDiffMER(xP - 1, yP + nPSH, xP, yP) && |
1578 | 0 | cuLeftBottom->isInter(leftBottomPartIdx); |
1579 | 0 | if (isAvailableA0 && (!isAvailableA1 || !cuLeft->hasEqualMotion(leftPartIdx, *cuLeftBottom, leftBottomPartIdx))) |
1580 | 0 | { |
1581 | | // get Inter Dir |
1582 | 0 | candDir[count] = cuLeftBottom->m_interDir[leftBottomPartIdx]; |
1583 | | // get Mv from Left |
1584 | 0 | cuLeftBottom->getMvField(cuLeftBottom, leftBottomPartIdx, 0, candMvField[count][0]); |
1585 | 0 | if (isInterB) |
1586 | 0 | cuLeftBottom->getMvField(cuLeftBottom, leftBottomPartIdx, 1, candMvField[count][1]); |
1587 | |
|
1588 | 0 | if (++count == maxNumMergeCand) |
1589 | 0 | return maxNumMergeCand; |
1590 | 0 | } |
1591 | | |
1592 | | // above left |
1593 | 0 | if (count < 4) |
1594 | 0 | { |
1595 | 0 | uint32_t aboveLeftPartIdx = 0; |
1596 | 0 | const CUData* cuAboveLeft = getPUAboveLeft(aboveLeftPartIdx, absPartAddr); |
1597 | 0 | bool isAvailableB2 = cuAboveLeft && |
1598 | 0 | cuAboveLeft->isDiffMER(xP - 1, yP - 1, xP, yP) && |
1599 | 0 | cuAboveLeft->isInter(aboveLeftPartIdx); |
1600 | 0 | if (isAvailableB2 && (!isAvailableA1 || !cuLeft->hasEqualMotion(leftPartIdx, *cuAboveLeft, aboveLeftPartIdx)) |
1601 | 0 | && (!isAvailableB1 || !cuAbove->hasEqualMotion(abovePartIdx, *cuAboveLeft, aboveLeftPartIdx))) |
1602 | 0 | { |
1603 | | // get Inter Dir |
1604 | 0 | candDir[count] = cuAboveLeft->m_interDir[aboveLeftPartIdx]; |
1605 | | // get Mv from Left |
1606 | 0 | cuAboveLeft->getMvField(cuAboveLeft, aboveLeftPartIdx, 0, candMvField[count][0]); |
1607 | 0 | if (isInterB) |
1608 | 0 | cuAboveLeft->getMvField(cuAboveLeft, aboveLeftPartIdx, 1, candMvField[count][1]); |
1609 | |
|
1610 | 0 | if (++count == maxNumMergeCand) |
1611 | 0 | return maxNumMergeCand; |
1612 | 0 | } |
1613 | 0 | } |
1614 | | #if ENABLE_SCC_EXT |
1615 | | if (m_slice->m_bTemporalMvp) |
1616 | | #else |
1617 | 0 | if (m_slice->m_sps->bTemporalMVPEnabled) |
1618 | 0 | #endif |
1619 | 0 | { |
1620 | 0 | uint32_t partIdxRB = deriveRightBottomIdx(puIdx); |
1621 | 0 | MV colmv; |
1622 | 0 | int ctuIdx = -1; |
1623 | | |
1624 | | // image boundary check |
1625 | 0 | if (m_encData->getPicCTU(m_cuAddr)->m_cuPelX + g_zscanToPelX[partIdxRB] + UNIT_SIZE < m_slice->m_sps->picWidthInLumaSamples && |
1626 | 0 | m_encData->getPicCTU(m_cuAddr)->m_cuPelY + g_zscanToPelY[partIdxRB] + UNIT_SIZE < m_slice->m_sps->picHeightInLumaSamples) |
1627 | 0 | { |
1628 | 0 | uint32_t absPartIdxRB = g_zscanToRaster[partIdxRB]; |
1629 | 0 | uint32_t numUnits = s_numPartInCUSize; |
1630 | 0 | bool bNotLastCol = lessThanCol(absPartIdxRB, numUnits - 1); // is not at the last column of CTU |
1631 | 0 | bool bNotLastRow = lessThanRow(absPartIdxRB, numUnits - 1); // is not at the last row of CTU |
1632 | |
|
1633 | 0 | if (bNotLastCol && bNotLastRow) |
1634 | 0 | { |
1635 | 0 | absPartAddr = g_rasterToZscan[absPartIdxRB + RASTER_SIZE + 1]; |
1636 | 0 | ctuIdx = m_cuAddr; |
1637 | 0 | } |
1638 | 0 | else if (bNotLastCol) |
1639 | 0 | absPartAddr = g_rasterToZscan[(absPartIdxRB + 1) & (numUnits - 1)]; |
1640 | 0 | else if (bNotLastRow) |
1641 | 0 | { |
1642 | 0 | absPartAddr = g_rasterToZscan[absPartIdxRB + RASTER_SIZE - numUnits + 1]; |
1643 | 0 | ctuIdx = m_cuAddr + 1; |
1644 | 0 | } |
1645 | 0 | else // is the right bottom corner of CTU |
1646 | 0 | absPartAddr = 0; |
1647 | 0 | } |
1648 | |
|
1649 | 0 | int maxList = isInterB ? 2 : 1; |
1650 | 0 | int dir = 0, refIdx = 0; |
1651 | 0 | for (int list = 0; list < maxList; list++) |
1652 | 0 | { |
1653 | 0 | bool bExistMV = ctuIdx >= 0 && getColMVP(colmv, refIdx, list, ctuIdx, absPartAddr); |
1654 | 0 | if (!bExistMV) |
1655 | 0 | { |
1656 | 0 | uint32_t partIdxCenter = deriveCenterIdx(puIdx); |
1657 | 0 | bExistMV = getColMVP(colmv, refIdx, list, m_cuAddr, partIdxCenter); |
1658 | 0 | } |
1659 | 0 | if (bExistMV) |
1660 | 0 | { |
1661 | 0 | dir |= (1 << list); |
1662 | 0 | candMvField[count][list].mv = colmv; |
1663 | 0 | candMvField[count][list].refIdx = refIdx; |
1664 | 0 | } |
1665 | 0 | } |
1666 | |
|
1667 | 0 | if (dir != 0) |
1668 | 0 | { |
1669 | 0 | candDir[count] = (uint8_t)dir; |
1670 | |
|
1671 | 0 | if (++count == maxNumMergeCand) |
1672 | 0 | return maxNumMergeCand; |
1673 | 0 | } |
1674 | 0 | } |
1675 | | |
1676 | 0 | if (isInterB) |
1677 | 0 | { |
1678 | 0 | const uint32_t cutoff = count * (count - 1); |
1679 | 0 | uint32_t priorityList0 = 0xEDC984; // { 0, 1, 0, 2, 1, 2, 0, 3, 1, 3, 2, 3 } |
1680 | 0 | uint32_t priorityList1 = 0xB73621; // { 1, 0, 2, 0, 2, 1, 3, 0, 3, 1, 3, 2 } |
1681 | |
|
1682 | 0 | for (uint32_t idx = 0; idx < cutoff; idx++, priorityList0 >>= 2, priorityList1 >>= 2) |
1683 | 0 | { |
1684 | 0 | int i = priorityList0 & 3; |
1685 | 0 | int j = priorityList1 & 3; |
1686 | |
|
1687 | 0 | if ((candDir[i] & 0x1) && (candDir[j] & 0x2)) |
1688 | 0 | { |
1689 | | // get Mv from cand[i] and cand[j] |
1690 | 0 | int refIdxL0 = candMvField[i][0].refIdx; |
1691 | 0 | int refIdxL1 = candMvField[j][1].refIdx; |
1692 | 0 | int refPOCL0 = m_slice->m_refPOCList[0][refIdxL0]; |
1693 | 0 | int refPOCL1 = m_slice->m_refPOCList[1][refIdxL1]; |
1694 | 0 | if (!(refPOCL0 == refPOCL1 && candMvField[i][0].mv == candMvField[j][1].mv)) |
1695 | 0 | { |
1696 | 0 | candMvField[count][0].mv = candMvField[i][0].mv; |
1697 | 0 | candMvField[count][0].refIdx = refIdxL0; |
1698 | 0 | candMvField[count][1].mv = candMvField[j][1].mv; |
1699 | 0 | candMvField[count][1].refIdx = refIdxL1; |
1700 | 0 | candDir[count] = 3; |
1701 | |
|
1702 | 0 | if (++count == maxNumMergeCand) |
1703 | 0 | return maxNumMergeCand; |
1704 | 0 | } |
1705 | 0 | } |
1706 | 0 | } |
1707 | 0 | } |
1708 | 0 | int numRefIdx0 = m_slice->m_numRefIdx[0]; |
1709 | | #if ENABLE_SCC_EXT |
1710 | | if (m_slice->m_param->bEnableSCC) |
1711 | | numRefIdx0--; |
1712 | | #endif |
1713 | 0 | int numRefIdx = (isInterB) ? X265_MIN(numRefIdx0, m_slice->m_numRefIdx[1]) : numRefIdx0; |
1714 | 0 | int r = 0; |
1715 | 0 | int refcnt = 0; |
1716 | 0 | while (numRefIdx && (count < maxNumMergeCand)) |
1717 | 0 | { |
1718 | 0 | candDir[count] = 1; |
1719 | 0 | candMvField[count][0].mv.word = 0; |
1720 | 0 | candMvField[count][0].refIdx = r; |
1721 | |
|
1722 | 0 | if (isInterB) |
1723 | 0 | { |
1724 | 0 | candDir[count] = 3; |
1725 | 0 | candMvField[count][1].mv.word = 0; |
1726 | 0 | candMvField[count][1].refIdx = r; |
1727 | 0 | } |
1728 | |
|
1729 | 0 | count++; |
1730 | |
|
1731 | 0 | if (refcnt == numRefIdx - 1) |
1732 | 0 | r = 0; |
1733 | 0 | else |
1734 | 0 | { |
1735 | 0 | ++r; |
1736 | 0 | ++refcnt; |
1737 | 0 | } |
1738 | 0 | } |
1739 | |
|
1740 | 0 | return count; |
1741 | 0 | } |
1742 | | |
1743 | | // Create the PMV list. Called for each reference index. |
1744 | | #if (ENABLE_MULTIVIEW || ENABLE_SCC_EXT) |
1745 | | int CUData::getPMV(InterNeighbourMV* neighbours, uint32_t picList, uint32_t refIdx, MV* amvpCand, MV* pmv, uint32_t puIdx, uint32_t absPartIdx) const |
1746 | | #else |
1747 | | int CUData::getPMV(InterNeighbourMV* neighbours, uint32_t picList, uint32_t refIdx, MV* amvpCand, MV* pmv) const |
1748 | | #endif |
1749 | 0 | { |
1750 | 0 | MV directMV[MD_ABOVE_LEFT + 1]; |
1751 | 0 | MV indirectMV[MD_ABOVE_LEFT + 1]; |
1752 | 0 | bool validDirect[MD_ABOVE_LEFT + 1]; |
1753 | 0 | bool validIndirect[MD_ABOVE_LEFT + 1]; |
1754 | |
|
1755 | | #if (ENABLE_MULTIVIEW || ENABLE_SCC_EXT) |
1756 | | if (m_slice->m_param->numViews > 1 || m_slice->m_param->bEnableSCC) |
1757 | | { |
1758 | | // Left candidate. |
1759 | | if ((neighbours + MD_BELOW_LEFT)->isAvailable || (neighbours + MD_LEFT)->isAvailable) |
1760 | | { |
1761 | | validIndirect[MD_ABOVE_RIGHT] = validIndirect[MD_ABOVE] = validIndirect[MD_ABOVE_LEFT] = false; |
1762 | | |
1763 | | validDirect[MD_BELOW_LEFT] = getDirectPMV(directMV[MD_BELOW_LEFT], neighbours + MD_BELOW_LEFT, picList, refIdx); |
1764 | | validDirect[MD_LEFT] = getDirectPMV(directMV[MD_LEFT], neighbours + MD_LEFT, picList, refIdx); |
1765 | | |
1766 | | validIndirect[MD_BELOW_LEFT] = getIndirectPMV(indirectMV[MD_BELOW_LEFT], neighbours + MD_BELOW_LEFT, picList, refIdx); |
1767 | | validIndirect[MD_LEFT] = getIndirectPMV(indirectMV[MD_LEFT], neighbours + MD_LEFT, picList, refIdx); |
1768 | | } |
1769 | | |
1770 | | // Top candidate. |
1771 | | validDirect[MD_ABOVE_RIGHT] = getDirectPMV(directMV[MD_ABOVE_RIGHT], neighbours + MD_ABOVE_RIGHT, picList, refIdx); |
1772 | | validDirect[MD_ABOVE] = getDirectPMV(directMV[MD_ABOVE], neighbours + MD_ABOVE, picList, refIdx); |
1773 | | validDirect[MD_ABOVE_LEFT] = getDirectPMV(directMV[MD_ABOVE_LEFT], neighbours + MD_ABOVE_LEFT, picList, refIdx); |
1774 | | |
1775 | | // Top candidate. |
1776 | | if (!((neighbours + MD_BELOW_LEFT)->isAvailable || (neighbours + MD_LEFT)->isAvailable)) |
1777 | | { |
1778 | | validDirect[MD_BELOW_LEFT] = validDirect[MD_LEFT] = validIndirect[MD_BELOW_LEFT] = validIndirect[MD_LEFT] = false; |
1779 | | validIndirect[MD_ABOVE_RIGHT] = getIndirectPMV(indirectMV[MD_ABOVE_RIGHT], neighbours + MD_ABOVE_RIGHT, picList, refIdx); |
1780 | | validIndirect[MD_ABOVE] = getIndirectPMV(indirectMV[MD_ABOVE], neighbours + MD_ABOVE, picList, refIdx); |
1781 | | validIndirect[MD_ABOVE_LEFT] = getIndirectPMV(indirectMV[MD_ABOVE_LEFT], neighbours + MD_ABOVE_LEFT, picList, refIdx); |
1782 | | } |
1783 | | } |
1784 | | else |
1785 | | #endif |
1786 | 0 | { |
1787 | | // Left candidate. |
1788 | 0 | validDirect[MD_BELOW_LEFT] = getDirectPMV(directMV[MD_BELOW_LEFT], neighbours + MD_BELOW_LEFT, picList, refIdx); |
1789 | 0 | validDirect[MD_LEFT] = getDirectPMV(directMV[MD_LEFT], neighbours + MD_LEFT, picList, refIdx); |
1790 | | // Top candidate. |
1791 | 0 | validDirect[MD_ABOVE_RIGHT] = getDirectPMV(directMV[MD_ABOVE_RIGHT], neighbours + MD_ABOVE_RIGHT, picList, refIdx); |
1792 | 0 | validDirect[MD_ABOVE] = getDirectPMV(directMV[MD_ABOVE], neighbours + MD_ABOVE, picList, refIdx); |
1793 | 0 | validDirect[MD_ABOVE_LEFT] = getDirectPMV(directMV[MD_ABOVE_LEFT], neighbours + MD_ABOVE_LEFT, picList, refIdx); |
1794 | | |
1795 | | // Left candidate. |
1796 | 0 | validIndirect[MD_BELOW_LEFT] = getIndirectPMV(indirectMV[MD_BELOW_LEFT], neighbours + MD_BELOW_LEFT, picList, refIdx); |
1797 | 0 | validIndirect[MD_LEFT] = getIndirectPMV(indirectMV[MD_LEFT], neighbours + MD_LEFT, picList, refIdx); |
1798 | | // Top candidate. |
1799 | 0 | validIndirect[MD_ABOVE_RIGHT] = getIndirectPMV(indirectMV[MD_ABOVE_RIGHT], neighbours + MD_ABOVE_RIGHT, picList, refIdx); |
1800 | 0 | validIndirect[MD_ABOVE] = getIndirectPMV(indirectMV[MD_ABOVE], neighbours + MD_ABOVE, picList, refIdx); |
1801 | 0 | validIndirect[MD_ABOVE_LEFT] = getIndirectPMV(indirectMV[MD_ABOVE_LEFT], neighbours + MD_ABOVE_LEFT, picList, refIdx); |
1802 | 0 | } |
1803 | |
|
1804 | 0 | int num = 0; |
1805 | | // Left predictor search |
1806 | 0 | if (validDirect[MD_BELOW_LEFT]) |
1807 | 0 | amvpCand[num++] = directMV[MD_BELOW_LEFT]; |
1808 | 0 | else if (validDirect[MD_LEFT]) |
1809 | 0 | amvpCand[num++] = directMV[MD_LEFT]; |
1810 | 0 | else if (validIndirect[MD_BELOW_LEFT]) |
1811 | 0 | amvpCand[num++] = indirectMV[MD_BELOW_LEFT]; |
1812 | 0 | else if (validIndirect[MD_LEFT]) |
1813 | 0 | amvpCand[num++] = indirectMV[MD_LEFT]; |
1814 | |
|
1815 | 0 | bool bAddedSmvp = num > 0; |
1816 | | |
1817 | | // Above predictor search |
1818 | 0 | if (validDirect[MD_ABOVE_RIGHT]) |
1819 | 0 | amvpCand[num++] = directMV[MD_ABOVE_RIGHT]; |
1820 | 0 | else if (validDirect[MD_ABOVE]) |
1821 | 0 | amvpCand[num++] = directMV[MD_ABOVE]; |
1822 | 0 | else if (validDirect[MD_ABOVE_LEFT]) |
1823 | 0 | amvpCand[num++] = directMV[MD_ABOVE_LEFT]; |
1824 | |
|
1825 | 0 | if (!bAddedSmvp) |
1826 | 0 | { |
1827 | 0 | if (validIndirect[MD_ABOVE_RIGHT]) |
1828 | 0 | amvpCand[num++] = indirectMV[MD_ABOVE_RIGHT]; |
1829 | 0 | else if (validIndirect[MD_ABOVE]) |
1830 | 0 | amvpCand[num++] = indirectMV[MD_ABOVE]; |
1831 | 0 | else if (validIndirect[MD_ABOVE_LEFT]) |
1832 | 0 | amvpCand[num++] = indirectMV[MD_ABOVE_LEFT]; |
1833 | 0 | } |
1834 | |
|
1835 | 0 | int numMvc = 0; |
1836 | 0 | for (int dir = MD_LEFT; dir <= MD_ABOVE_LEFT; dir++) |
1837 | 0 | { |
1838 | 0 | if (validDirect[dir] && directMV[dir].notZero()) |
1839 | 0 | pmv[numMvc++] = directMV[dir]; |
1840 | |
|
1841 | 0 | if (validIndirect[dir] && indirectMV[dir].notZero()) |
1842 | 0 | pmv[numMvc++] = indirectMV[dir]; |
1843 | 0 | } |
1844 | |
|
1845 | 0 | if (num == 2) |
1846 | 0 | num -= amvpCand[0] == amvpCand[1]; |
1847 | | |
1848 | | // Get the collocated candidate. At this step, either the first candidate |
1849 | | // was found or its value is 0. |
1850 | | #if ENABLE_MULTIVIEW || ENABLE_SCC_EXT |
1851 | | if (m_slice->m_param->numViews > 1 || m_slice->m_param->bEnableSCC) |
1852 | | { |
1853 | | if (m_slice->m_bTemporalMvp && num < 2) |
1854 | | { |
1855 | | int refId = refIdx; |
1856 | | uint32_t absPartAddr = m_absIdxInCTU + absPartIdx; |
1857 | | uint32_t partIdxRB = deriveRightBottomIdx(puIdx); |
1858 | | |
1859 | | // co-located RightBottom temporal predictor (H) |
1860 | | int ctuIdx = -1; |
1861 | | |
1862 | | // image boundary check |
1863 | | if (m_encData->getPicCTU(m_cuAddr)->m_cuPelX + g_zscanToPelX[partIdxRB] + UNIT_SIZE < m_slice->m_sps->picWidthInLumaSamples && |
1864 | | m_encData->getPicCTU(m_cuAddr)->m_cuPelY + g_zscanToPelY[partIdxRB] + UNIT_SIZE < m_slice->m_sps->picHeightInLumaSamples) |
1865 | | { |
1866 | | uint32_t absPartIdxRB = g_zscanToRaster[partIdxRB]; |
1867 | | uint32_t numUnits = s_numPartInCUSize; |
1868 | | bool bNotLastCol = lessThanCol(absPartIdxRB, numUnits - 1); // is not at the last column of CTU |
1869 | | bool bNotLastRow = lessThanRow(absPartIdxRB, numUnits - 1); // is not at the last row of CTU |
1870 | | |
1871 | | if (bNotLastCol && bNotLastRow) |
1872 | | { |
1873 | | absPartAddr = g_rasterToZscan[absPartIdxRB + RASTER_SIZE + 1]; |
1874 | | ctuIdx = m_cuAddr; |
1875 | | } |
1876 | | else if (bNotLastCol) |
1877 | | absPartAddr = g_rasterToZscan[(absPartIdxRB + 1) & (numUnits - 1)]; |
1878 | | else if (bNotLastRow) |
1879 | | { |
1880 | | absPartAddr = g_rasterToZscan[absPartIdxRB + RASTER_SIZE - numUnits + 1]; |
1881 | | ctuIdx = m_cuAddr + 1; |
1882 | | } |
1883 | | else // is the right bottom corner of CTU |
1884 | | absPartAddr = 0; |
1885 | | } |
1886 | | if (ctuIdx >= 0 && getColMVP(neighbours[MD_COLLOCATED].mv[picList], refId, picList, ctuIdx, absPartAddr)) |
1887 | | pmv[numMvc++] = amvpCand[num++] = neighbours[MD_COLLOCATED].mv[picList]; |
1888 | | else |
1889 | | { |
1890 | | uint32_t partIdxCenter = deriveCenterIdx(puIdx); |
1891 | | uint32_t curCTUIdx = m_cuAddr; |
1892 | | if (getColMVP(neighbours[MD_COLLOCATED].mv[picList], refId, picList, curCTUIdx, partIdxCenter)) |
1893 | | pmv[numMvc++] = amvpCand[num++] = neighbours[MD_COLLOCATED].mv[picList]; |
1894 | | } |
1895 | | } |
1896 | | } |
1897 | | else |
1898 | | #endif |
1899 | 0 | { |
1900 | 0 | if (m_slice->m_sps->bTemporalMVPEnabled && num < 2) |
1901 | 0 | { |
1902 | 0 | int tempRefIdx = neighbours[MD_COLLOCATED].refIdx[picList]; |
1903 | 0 | if (tempRefIdx != -1) |
1904 | 0 | { |
1905 | 0 | uint32_t cuAddr = neighbours[MD_COLLOCATED].cuAddr[picList]; |
1906 | 0 | const Frame* colPic = m_slice->m_refFrameList[m_slice->isInterB() && !m_slice->m_colFromL0Flag][m_slice->m_colRefIdx]; |
1907 | 0 | const CUData* colCU = colPic->m_encData->getPicCTU(cuAddr); |
1908 | | |
1909 | | // Scale the vector |
1910 | 0 | int colRefPOC = colCU->m_slice->m_refPOCList[tempRefIdx >> 4][tempRefIdx & 0xf]; |
1911 | 0 | int colPOC = colCU->m_slice->m_poc; |
1912 | |
|
1913 | 0 | int curRefPOC = m_slice->m_refPOCList[picList][refIdx]; |
1914 | 0 | int curPOC = m_slice->m_poc; |
1915 | |
|
1916 | 0 | pmv[numMvc++] = amvpCand[num++] = scaleMvByPOCDist(neighbours[MD_COLLOCATED].mv[picList], curPOC, curRefPOC, colPOC, colRefPOC); |
1917 | 0 | } |
1918 | 0 | } |
1919 | 0 | } |
1920 | |
|
1921 | 0 | while (num < AMVP_NUM_CANDS) |
1922 | 0 | amvpCand[num++].set(0, 0); |
1923 | |
|
1924 | 0 | return numMvc; |
1925 | 0 | } |
1926 | | |
1927 | | /* Constructs a list of candidates for AMVP, and a larger list of motion candidates */ |
1928 | | void CUData::getNeighbourMV(uint32_t puIdx, uint32_t absPartIdx, InterNeighbourMV* neighbours) const |
1929 | 0 | { |
1930 | | // Set the temporal neighbour to unavailable by default. |
1931 | 0 | neighbours[MD_COLLOCATED].unifiedRef = -1; |
1932 | |
|
1933 | 0 | uint32_t partIdxLT, partIdxRT, partIdxLB = deriveLeftBottomIdx(puIdx); |
1934 | 0 | deriveLeftRightTopIdx(puIdx, partIdxLT, partIdxRT); |
1935 | | |
1936 | | // Load the spatial MVs. |
1937 | 0 | getInterNeighbourMV(neighbours + MD_BELOW_LEFT, partIdxLB, MD_BELOW_LEFT); |
1938 | 0 | getInterNeighbourMV(neighbours + MD_LEFT, partIdxLB, MD_LEFT); |
1939 | 0 | getInterNeighbourMV(neighbours + MD_ABOVE_RIGHT,partIdxRT, MD_ABOVE_RIGHT); |
1940 | 0 | getInterNeighbourMV(neighbours + MD_ABOVE, partIdxRT, MD_ABOVE); |
1941 | 0 | getInterNeighbourMV(neighbours + MD_ABOVE_LEFT, partIdxLT, MD_ABOVE_LEFT); |
1942 | |
|
1943 | 0 | if (m_slice->m_bTemporalMvp && !(m_slice->m_param->bEnableSCC || m_slice->m_param->numViews > 1)) |
1944 | 0 | { |
1945 | 0 | uint32_t absPartAddr = m_absIdxInCTU + absPartIdx; |
1946 | 0 | uint32_t partIdxRB = deriveRightBottomIdx(puIdx); |
1947 | | |
1948 | | // co-located RightBottom temporal predictor (H) |
1949 | 0 | int ctuIdx = -1; |
1950 | | |
1951 | | // image boundary check |
1952 | 0 | if (m_encData->getPicCTU(m_cuAddr)->m_cuPelX + g_zscanToPelX[partIdxRB] + UNIT_SIZE < m_slice->m_sps->picWidthInLumaSamples && |
1953 | 0 | m_encData->getPicCTU(m_cuAddr)->m_cuPelY + g_zscanToPelY[partIdxRB] + UNIT_SIZE < m_slice->m_sps->picHeightInLumaSamples) |
1954 | 0 | { |
1955 | 0 | uint32_t absPartIdxRB = g_zscanToRaster[partIdxRB]; |
1956 | 0 | uint32_t numUnits = s_numPartInCUSize; |
1957 | 0 | bool bNotLastCol = lessThanCol(absPartIdxRB, numUnits - 1); // is not at the last column of CTU |
1958 | 0 | bool bNotLastRow = lessThanRow(absPartIdxRB, numUnits - 1); // is not at the last row of CTU |
1959 | |
|
1960 | 0 | if (bNotLastCol && bNotLastRow) |
1961 | 0 | { |
1962 | 0 | absPartAddr = g_rasterToZscan[absPartIdxRB + RASTER_SIZE + 1]; |
1963 | 0 | ctuIdx = m_cuAddr; |
1964 | 0 | } |
1965 | 0 | else if (bNotLastCol) |
1966 | 0 | absPartAddr = g_rasterToZscan[(absPartIdxRB + 1) & (numUnits - 1)]; |
1967 | 0 | else if (bNotLastRow) |
1968 | 0 | { |
1969 | 0 | absPartAddr = g_rasterToZscan[absPartIdxRB + RASTER_SIZE - numUnits + 1]; |
1970 | 0 | ctuIdx = m_cuAddr + 1; |
1971 | 0 | } |
1972 | 0 | else // is the right bottom corner of CTU |
1973 | 0 | absPartAddr = 0; |
1974 | 0 | } |
1975 | |
|
1976 | 0 | if (!(ctuIdx >= 0 && getCollocatedMV(ctuIdx, absPartAddr, neighbours + MD_COLLOCATED))) |
1977 | 0 | { |
1978 | 0 | uint32_t partIdxCenter = deriveCenterIdx(puIdx); |
1979 | 0 | uint32_t curCTUIdx = m_cuAddr; |
1980 | 0 | getCollocatedMV(curCTUIdx, partIdxCenter, neighbours + MD_COLLOCATED); |
1981 | 0 | } |
1982 | 0 | } |
1983 | 0 | } |
1984 | | |
1985 | | void CUData::getInterNeighbourMV(InterNeighbourMV *neighbour, uint32_t partUnitIdx, MVP_DIR dir) const |
1986 | 0 | { |
1987 | 0 | const CUData* tmpCU = NULL; |
1988 | 0 | uint32_t idx = 0; |
1989 | |
|
1990 | 0 | switch (dir) |
1991 | 0 | { |
1992 | 0 | case MD_LEFT: |
1993 | 0 | tmpCU = getPULeft(idx, partUnitIdx); |
1994 | 0 | break; |
1995 | 0 | case MD_ABOVE: |
1996 | 0 | tmpCU = getPUAbove(idx, partUnitIdx); |
1997 | 0 | break; |
1998 | 0 | case MD_ABOVE_RIGHT: |
1999 | 0 | tmpCU = getPUAboveRight(idx, partUnitIdx); |
2000 | 0 | break; |
2001 | 0 | case MD_BELOW_LEFT: |
2002 | 0 | tmpCU = getPUBelowLeft(idx, partUnitIdx); |
2003 | 0 | break; |
2004 | 0 | case MD_ABOVE_LEFT: |
2005 | 0 | tmpCU = getPUAboveLeft(idx, partUnitIdx); |
2006 | 0 | break; |
2007 | 0 | default: |
2008 | 0 | break; |
2009 | 0 | } |
2010 | | |
2011 | 0 | if (!tmpCU) |
2012 | 0 | { |
2013 | | // Mark the PMV as unavailable. |
2014 | 0 | for (int i = 0; i < 2; i++) |
2015 | 0 | neighbour->refIdx[i] = -1; |
2016 | 0 | neighbour->isAvailable = (tmpCU != NULL) && (tmpCU->isInter(idx)); |
2017 | 0 | return; |
2018 | 0 | } |
2019 | | |
2020 | 0 | for (int i = 0; i < 2; i++) |
2021 | 0 | { |
2022 | | // Get the MV. |
2023 | 0 | neighbour->mv[i] = tmpCU->m_mv[i][idx]; |
2024 | | |
2025 | | // Get the reference idx. |
2026 | 0 | neighbour->refIdx[i] = tmpCU->m_refIdx[i][idx]; |
2027 | 0 | neighbour->isAvailable = (tmpCU != NULL) && (tmpCU->isInter(idx)); |
2028 | 0 | } |
2029 | 0 | } |
2030 | | |
2031 | | /* Clip motion vector to within slightly padded boundary of picture (the |
2032 | | * MV may reference a block that is completely within the padded area). |
2033 | | * Note this function is unaware of how much of this picture is actually |
2034 | | * available for use (re: frame parallelism) */ |
2035 | | void CUData::clipMv(MV& outMV) const |
2036 | 0 | { |
2037 | 0 | const uint32_t mvshift = 2; |
2038 | 0 | uint32_t offset = 8; |
2039 | |
|
2040 | 0 | int32_t xmax = (int32_t)((m_slice->m_sps->picWidthInLumaSamples + offset - m_cuPelX - 1) << mvshift); |
2041 | 0 | int32_t xmin = -(int32_t)((m_encData->m_param->maxCUSize + offset + m_cuPelX - 1) << mvshift); |
2042 | |
|
2043 | 0 | int32_t ymax = (int32_t)((m_slice->m_sps->picHeightInLumaSamples + offset - m_cuPelY - 1) << mvshift); |
2044 | 0 | int32_t ymin = -(int32_t)((m_encData->m_param->maxCUSize + offset + m_cuPelY - 1) << mvshift); |
2045 | |
|
2046 | 0 | outMV.x = X265_MIN(xmax, X265_MAX(xmin, outMV.x)); |
2047 | 0 | outMV.y = X265_MIN(ymax, X265_MAX(ymin, outMV.y)); |
2048 | 0 | } |
2049 | | |
2050 | | // Load direct spatial MV if available. |
2051 | | bool CUData::getDirectPMV(MV& pmv, InterNeighbourMV *neighbours, uint32_t picList, uint32_t refIdx) const |
2052 | 0 | { |
2053 | 0 | int curRefPOC = m_slice->m_refPOCList[picList][refIdx]; |
2054 | 0 | for (int i = 0; i < 2; i++, picList = !picList) |
2055 | 0 | { |
2056 | 0 | int partRefIdx = neighbours->refIdx[picList]; |
2057 | 0 | if (partRefIdx >= 0 && curRefPOC == m_slice->m_refPOCList[picList][partRefIdx]) |
2058 | 0 | { |
2059 | 0 | pmv = neighbours->mv[picList]; |
2060 | 0 | return true; |
2061 | 0 | } |
2062 | 0 | } |
2063 | 0 | return false; |
2064 | 0 | } |
2065 | | |
2066 | | // Load indirect spatial MV if available. An indirect MV has to be scaled. |
2067 | | bool CUData::getIndirectPMV(MV& outMV, InterNeighbourMV *neighbours, uint32_t picList, uint32_t refIdx) const |
2068 | 0 | { |
2069 | 0 | int curPOC = m_slice->m_poc; |
2070 | 0 | int neibPOC = curPOC; |
2071 | 0 | int curRefPOC = m_slice->m_refPOCList[picList][refIdx]; |
2072 | |
|
2073 | 0 | for (int i = 0; i < 2; i++, picList = !picList) |
2074 | 0 | { |
2075 | 0 | int partRefIdx = neighbours->refIdx[picList]; |
2076 | 0 | if (partRefIdx >= 0) |
2077 | 0 | { |
2078 | 0 | int neibRefPOC = m_slice->m_refPOCList[picList][partRefIdx]; |
2079 | 0 | MV mvp = neighbours->mv[picList]; |
2080 | |
|
2081 | | #if ENABLE_MULTIVIEW || ENABLE_SCC_EXT |
2082 | | if ((curRefPOC == curPOC) == (neibRefPOC == curPOC)) |
2083 | | { |
2084 | | if (curRefPOC == curPOC) |
2085 | | outMV = mvp; |
2086 | | if (!(curRefPOC == curPOC)) |
2087 | | outMV = scaleMvByPOCDist(mvp, curPOC, curRefPOC, neibPOC, neibRefPOC); |
2088 | | return true; |
2089 | | } |
2090 | | #else |
2091 | 0 | outMV = scaleMvByPOCDist(mvp, curPOC, curRefPOC, neibPOC, neibRefPOC); |
2092 | 0 | return true; |
2093 | 0 | #endif |
2094 | 0 | } |
2095 | 0 | } |
2096 | 0 | return false; |
2097 | 0 | } |
2098 | | |
2099 | | bool CUData::getColMVP(MV& outMV, int& outRefIdx, int picList, int cuAddr, int partUnitIdx) const |
2100 | 0 | { |
2101 | 0 | const Frame* colPic = m_slice->m_refFrameList[m_slice->isInterB() && !m_slice->m_colFromL0Flag][m_slice->m_colRefIdx]; |
2102 | 0 | const CUData* colCU = colPic->m_encData->getPicCTU(cuAddr); |
2103 | |
|
2104 | 0 | uint32_t absPartAddr = partUnitIdx & TMVP_UNIT_MASK; |
2105 | 0 | if (colCU->m_predMode[partUnitIdx] == MODE_NONE || colCU->isIntra(absPartAddr)) |
2106 | 0 | return false; |
2107 | | |
2108 | 0 | int colRefPicList = m_slice->m_bCheckLDC ? picList : m_slice->m_colFromL0Flag; |
2109 | |
|
2110 | 0 | int colRefIdx = colCU->m_refIdx[colRefPicList][absPartAddr]; |
2111 | |
|
2112 | 0 | if (colRefIdx < 0) |
2113 | 0 | { |
2114 | 0 | colRefPicList = !colRefPicList; |
2115 | 0 | colRefIdx = colCU->m_refIdx[colRefPicList][absPartAddr]; |
2116 | |
|
2117 | 0 | if (colRefIdx < 0) |
2118 | 0 | return false; |
2119 | 0 | } |
2120 | | |
2121 | | // Scale the vector |
2122 | 0 | int colRefPOC = colCU->m_slice->m_refPOCList[colRefPicList][colRefIdx]; |
2123 | 0 | int colPOC = colCU->m_slice->m_poc; |
2124 | 0 | MV colmv = colCU->m_mv[colRefPicList][absPartAddr]; |
2125 | |
|
2126 | 0 | int curRefPOC = m_slice->m_refPOCList[picList][outRefIdx]; |
2127 | 0 | int curPOC = m_slice->m_poc; |
2128 | |
|
2129 | | #if ENABLE_MULTIVIEW || ENABLE_SCC_EXT |
2130 | | if ((colPOC == colRefPOC) != (curPOC == curRefPOC)) |
2131 | | return false; |
2132 | | else if (curRefPOC == curPOC) |
2133 | | outMV = colmv; |
2134 | | else if (!(curRefPOC == curPOC)) |
2135 | | outMV = scaleMvByPOCDist(colmv, curPOC, curRefPOC, colPOC, colRefPOC); |
2136 | | #else |
2137 | 0 | outMV = scaleMvByPOCDist(colmv, curPOC, curRefPOC, colPOC, colRefPOC); |
2138 | 0 | #endif |
2139 | 0 | return true; |
2140 | 0 | } |
2141 | | |
2142 | | // Cache the collocated MV. |
2143 | | bool CUData::getCollocatedMV(int cuAddr, int partUnitIdx, InterNeighbourMV *neighbour) const |
2144 | 0 | { |
2145 | 0 | const Frame* colPic = m_slice->m_refFrameList[m_slice->isInterB() && !m_slice->m_colFromL0Flag][m_slice->m_colRefIdx]; |
2146 | 0 | const CUData* colCU = colPic->m_encData->getPicCTU(cuAddr); |
2147 | |
|
2148 | 0 | uint32_t absPartAddr = partUnitIdx & TMVP_UNIT_MASK; |
2149 | 0 | if (colCU->m_predMode[partUnitIdx] == MODE_NONE || colCU->isIntra(absPartAddr)) |
2150 | 0 | return false; |
2151 | | |
2152 | 0 | for (int list = 0; list < 2; list++) |
2153 | 0 | { |
2154 | 0 | neighbour->cuAddr[list] = cuAddr; |
2155 | 0 | int colRefPicList = m_slice->m_bCheckLDC ? list : m_slice->m_colFromL0Flag; |
2156 | 0 | int colRefIdx = colCU->m_refIdx[colRefPicList][absPartAddr]; |
2157 | |
|
2158 | 0 | if (colRefIdx < 0) |
2159 | 0 | colRefPicList = !colRefPicList; |
2160 | |
|
2161 | 0 | neighbour->refIdx[list] = colCU->m_refIdx[colRefPicList][absPartAddr]; |
2162 | 0 | neighbour->refIdx[list] |= colRefPicList << 4; |
2163 | |
|
2164 | 0 | neighbour->mv[list] = colCU->m_mv[colRefPicList][absPartAddr]; |
2165 | 0 | } |
2166 | |
|
2167 | 0 | return neighbour->unifiedRef != -1; |
2168 | 0 | } |
2169 | | |
2170 | | MV CUData::scaleMvByPOCDist(const MV& inMV, int curPOC, int curRefPOC, int colPOC, int colRefPOC) const |
2171 | 0 | { |
2172 | 0 | int diffPocD = colPOC - colRefPOC; |
2173 | 0 | int diffPocB = curPOC - curRefPOC; |
2174 | |
|
2175 | 0 | if (diffPocD == diffPocB) |
2176 | 0 | return inMV; |
2177 | 0 | else |
2178 | 0 | { |
2179 | 0 | int tdb = x265_clip3(-128, 127, diffPocB); |
2180 | 0 | int tdd = x265_clip3(-128, 127, diffPocD); |
2181 | 0 | int x = (0x4000 + abs(tdd / 2)) / tdd; |
2182 | 0 | int scale = x265_clip3(-4096, 4095, (tdb * x + 32) >> 6); |
2183 | 0 | return scaleMv(inMV, scale); |
2184 | 0 | } |
2185 | 0 | } |
2186 | | |
2187 | | uint32_t CUData::deriveCenterIdx(uint32_t puIdx) const |
2188 | 0 | { |
2189 | 0 | uint32_t absPartIdx; |
2190 | 0 | int puWidth, puHeight; |
2191 | |
|
2192 | 0 | getPartIndexAndSize(puIdx, absPartIdx, puWidth, puHeight); |
2193 | |
|
2194 | 0 | return g_rasterToZscan[g_zscanToRaster[m_absIdxInCTU + absPartIdx] |
2195 | 0 | + ((puHeight >> (LOG2_UNIT_SIZE + 1)) << LOG2_RASTER_SIZE) |
2196 | 0 | + (puWidth >> (LOG2_UNIT_SIZE + 1))]; |
2197 | 0 | } |
2198 | | |
2199 | | void CUData::getTUEntropyCodingParameters(TUEntropyCodingParameters &result, uint32_t absPartIdx, uint32_t log2TrSize, bool bIsLuma) const |
2200 | 0 | { |
2201 | 0 | bool bIsIntra = isIntra(absPartIdx); |
2202 | | |
2203 | | // set the group layout |
2204 | 0 | const uint32_t log2TrSizeCG = log2TrSize - 2; |
2205 | | |
2206 | | // set the scan orders |
2207 | 0 | if (bIsIntra) |
2208 | 0 | { |
2209 | 0 | uint32_t dirMode; |
2210 | |
|
2211 | 0 | if (bIsLuma) |
2212 | 0 | dirMode = m_lumaIntraDir[absPartIdx]; |
2213 | 0 | else |
2214 | 0 | { |
2215 | 0 | dirMode = m_chromaIntraDir[absPartIdx]; |
2216 | 0 | if (dirMode == DM_CHROMA_IDX) |
2217 | 0 | { |
2218 | 0 | dirMode = m_lumaIntraDir[(m_chromaFormat == X265_CSP_I444) ? absPartIdx : absPartIdx & 0xFC]; |
2219 | 0 | dirMode = (m_chromaFormat == X265_CSP_I422) ? g_chroma422IntraAngleMappingTable[dirMode] : dirMode; |
2220 | 0 | } |
2221 | 0 | } |
2222 | |
|
2223 | 0 | if (log2TrSize <= (MDCS_LOG2_MAX_SIZE - m_hChromaShift) || (bIsLuma && log2TrSize == MDCS_LOG2_MAX_SIZE)) |
2224 | 0 | result.scanType = dirMode >= 22 && dirMode <= 30 ? SCAN_HOR : dirMode >= 6 && dirMode <= 14 ? SCAN_VER : SCAN_DIAG; |
2225 | 0 | else |
2226 | 0 | result.scanType = SCAN_DIAG; |
2227 | 0 | } |
2228 | 0 | else |
2229 | 0 | result.scanType = SCAN_DIAG; |
2230 | |
|
2231 | 0 | result.scan = g_scanOrder[result.scanType][log2TrSize - 2]; |
2232 | 0 | result.scanCG = g_scanOrderCG[result.scanType][log2TrSizeCG]; |
2233 | |
|
2234 | 0 | if (log2TrSize == 2) |
2235 | 0 | result.firstSignificanceMapContext = 0; |
2236 | 0 | else if (log2TrSize == 3) |
2237 | 0 | result.firstSignificanceMapContext = (result.scanType != SCAN_DIAG && bIsLuma) ? 15 : 9; |
2238 | 0 | else |
2239 | 0 | result.firstSignificanceMapContext = bIsLuma ? 21 : 12; |
2240 | 0 | } |
2241 | | |
2242 | 0 | #define CU_SET_FLAG(bitfield, flag, value) (bitfield) = ((bitfield) & (~(flag))) | ((~((value) - 1)) & (flag)) |
2243 | | |
2244 | | void CUData::calcCTUGeoms(uint32_t ctuWidth, uint32_t ctuHeight, uint32_t maxCUSize, uint32_t minCUSize, CUGeom cuDataArray[CUGeom::MAX_GEOMS]) |
2245 | 0 | { |
2246 | 0 | uint32_t num4x4Partition = (1U << ((g_log2Size[maxCUSize] - LOG2_UNIT_SIZE) << 1)); |
2247 | | |
2248 | | // Initialize the coding blocks inside the CTB |
2249 | 0 | for (uint32_t log2CUSize = g_log2Size[maxCUSize], rangeCUIdx = 0; log2CUSize >= g_log2Size[minCUSize]; log2CUSize--) |
2250 | 0 | { |
2251 | 0 | uint32_t blockSize = 1 << log2CUSize; |
2252 | 0 | uint32_t sbWidth = 1 << (g_log2Size[maxCUSize] - log2CUSize); |
2253 | 0 | int32_t lastLevelFlag = log2CUSize == g_log2Size[minCUSize]; |
2254 | |
|
2255 | 0 | for (uint32_t sbY = 0; sbY < sbWidth; sbY++) |
2256 | 0 | { |
2257 | 0 | for (uint32_t sbX = 0; sbX < sbWidth; sbX++) |
2258 | 0 | { |
2259 | 0 | uint32_t depthIdx = g_depthScanIdx[sbY][sbX]; |
2260 | 0 | uint32_t cuIdx = rangeCUIdx + depthIdx; |
2261 | 0 | uint32_t childIdx = rangeCUIdx + sbWidth * sbWidth + (depthIdx << 2); |
2262 | 0 | uint32_t px = sbX * blockSize; |
2263 | 0 | uint32_t py = sbY * blockSize; |
2264 | 0 | int32_t presentFlag = px < ctuWidth && py < ctuHeight; |
2265 | 0 | int32_t splitMandatoryFlag = presentFlag && !lastLevelFlag && (px + blockSize > ctuWidth || py + blockSize > ctuHeight); |
2266 | | |
2267 | | /* Offset of the luma CU in the X, Y direction in terms of pixels from the CTU origin */ |
2268 | 0 | uint32_t xOffset = (sbX * blockSize) >> 3; |
2269 | 0 | uint32_t yOffset = (sbY * blockSize) >> 3; |
2270 | 0 | X265_CHECK(cuIdx < CUGeom::MAX_GEOMS, "CU geom index bug\n"); |
2271 | |
|
2272 | 0 | CUGeom *cu = cuDataArray + cuIdx; |
2273 | 0 | cu->log2CUSize = log2CUSize; |
2274 | 0 | cu->childOffset = childIdx - cuIdx; |
2275 | 0 | cu->absPartIdx = g_depthScanIdx[yOffset][xOffset] * 4; |
2276 | 0 | cu->numPartitions = (num4x4Partition >> ((g_log2Size[maxCUSize] - cu->log2CUSize) * 2)); |
2277 | 0 | cu->depth = g_log2Size[maxCUSize] - log2CUSize; |
2278 | 0 | cu->geomRecurId = cuIdx; |
2279 | |
|
2280 | 0 | cu->flags = 0; |
2281 | 0 | CU_SET_FLAG(cu->flags, CUGeom::PRESENT, presentFlag); |
2282 | 0 | CU_SET_FLAG(cu->flags, CUGeom::SPLIT_MANDATORY | CUGeom::SPLIT, splitMandatoryFlag); |
2283 | 0 | CU_SET_FLAG(cu->flags, CUGeom::LEAF, lastLevelFlag); |
2284 | 0 | } |
2285 | 0 | } |
2286 | 0 | rangeCUIdx += sbWidth * sbWidth; |
2287 | 0 | } |
2288 | 0 | } |
2289 | | |
2290 | | #if ENABLE_SCC_EXT |
2291 | | bool CUData::getDerivedBV(uint32_t absPartIdx, const MV& currentMv, MV& derivedMv, uint32_t width, uint32_t height) |
2292 | | { |
2293 | | const int ctuWidth = m_slice->m_param->maxCUSize; |
2294 | | const int ctuHeight = m_slice->m_param->maxCUSize; |
2295 | | int cuPelX = m_cuPelX + (absPartIdx ? g_zscanToPelX[absPartIdx] : 0); |
2296 | | int cuPelY = m_cuPelY + (absPartIdx ? g_zscanToPelX[absPartIdx] : 0); |
2297 | | int rngX = cuPelX + (currentMv.x >> 2); |
2298 | | int rngY = cuPelY + (currentMv.y >> 2); |
2299 | | uint32_t m_frameWidthInCtus = (m_slice->m_sps->picWidthInLumaSamples % ctuWidth) ? m_slice->m_sps->picWidthInLumaSamples / ctuWidth + 1 : m_slice->m_sps->picWidthInLumaSamples / ctuWidth; |
2300 | | |
2301 | | if (rngX < 0 || rngY < 0 || (rngX + width) > m_slice->m_sps->picWidthInLumaSamples || (rngY + height) > m_slice->m_sps->picHeightInLumaSamples) |
2302 | | { |
2303 | | return false; |
2304 | | } |
2305 | | |
2306 | | int refCtbAddr = (rngY / ctuHeight) * m_frameWidthInCtus + (rngX / ctuWidth); |
2307 | | |
2308 | | int relCUPelX = rngX & (ctuWidth - 1); |
2309 | | int relCUPelY = rngY & (ctuHeight - 1); |
2310 | | uint32_t absPartIdxDerived = g_rasterToZscan[((relCUPelY >> 2) << 4) + (relCUPelX >> 2)]; |
2311 | | CUData* refCU = m_encData->getPicCTU(refCtbAddr); |
2312 | | |
2313 | | if (refCU->m_slice == NULL) |
2314 | | return false; |
2315 | | |
2316 | | MVField mv1; |
2317 | | refCU->getMvField(refCU, absPartIdxDerived, 0, mv1); |
2318 | | |
2319 | | int iCurrCtbAddr = (m_cuPelY / ctuHeight) * m_frameWidthInCtus + (m_cuPelX / ctuWidth); |
2320 | | uint32_t currAbsPartIdx = g_rasterToZscan[(((m_cuPelY & (ctuHeight - 1)) >> 2) << 4) + ((m_cuPelX & (ctuWidth - 1)) >> 2)]; |
2321 | | |
2322 | | if ((refCtbAddr > iCurrCtbAddr) || ((refCtbAddr == iCurrCtbAddr) && (absPartIdxDerived >= currAbsPartIdx))) |
2323 | | return false; |
2324 | | |
2325 | | int refIdx = mv1.refIdx; |
2326 | | bool isIBC; |
2327 | | if (refCU->isIntra(absPartIdxDerived)) |
2328 | | { |
2329 | | isIBC = false; |
2330 | | } |
2331 | | else |
2332 | | { |
2333 | | isIBC = (refIdx >= 0) ? (refCU->m_slice->m_refFrameList[0][refIdx]->m_poc == refCU->m_slice->m_poc) : 0; |
2334 | | } |
2335 | | derivedMv = mv1.mv; |
2336 | | derivedMv += currentMv; |
2337 | | |
2338 | | return isIBC; |
2339 | | |
2340 | | } |
2341 | | |
2342 | | bool CUData::isIntraBC(const CUData* cu, uint32_t absPartIdx) const |
2343 | | { |
2344 | | if (cu->isIntra(absPartIdx)) |
2345 | | { |
2346 | | return false; |
2347 | | } |
2348 | | MVField mv; |
2349 | | cu->getMvField(cu, absPartIdx, 0, mv); |
2350 | | int iRefIdx = mv.refIdx; |
2351 | | bool isNeighborIntraBC = (iRefIdx >= 0) ? (m_slice->m_refFrameList[0][iRefIdx]->m_poc == m_slice->m_poc) : false; |
2352 | | |
2353 | | return isNeighborIntraBC; |
2354 | | } |
2355 | | |
2356 | | bool CUData::getColMVPIBC(int ctuRsAddr, int partUnitIdx, MV& rcMv) |
2357 | | { |
2358 | | uint32_t absPartAddr = partUnitIdx; |
2359 | | |
2360 | | // use coldir. |
2361 | | Frame* colPic = m_slice->m_lastEncPic; |
2362 | | if (!colPic) |
2363 | | return false; |
2364 | | |
2365 | | CUData* colCU = m_encData->getPicCTU(ctuRsAddr); |
2366 | | MVField tempMv; |
2367 | | colCU->getMvField(colCU, absPartAddr, 0, tempMv); |
2368 | | if (tempMv.refIdx == REF_NOT_VALID) |
2369 | | return false; |
2370 | | |
2371 | | rcMv = tempMv.mv; |
2372 | | |
2373 | | return true; |
2374 | | } |
2375 | | |
2376 | | void CUData::getIntraBCMVPsEncOnly(uint32_t absPartIdx, MV* MvPred, int& nbPred, int puIdx) |
2377 | | { |
2378 | | uint32_t tempPartIdx; |
2379 | | uint32_t left, above; |
2380 | | MVField tempMvField; |
2381 | | |
2382 | | int width, height; |
2383 | | getPartIndexAndSize(puIdx, absPartIdx, width, height); |
2384 | | uint32_t numPartInCUWidth = s_numPartInCUSize; |
2385 | | uint32_t m_numPartitionsInCtu = s_numPartInCUSize * s_numPartInCUSize; |
2386 | | uint32_t m_frameWidthInCtus = (m_slice->m_sps->picWidthInLumaSamples % m_slice->m_param->maxCUSize) ? m_slice->m_sps->picWidthInLumaSamples / m_slice->m_param->maxCUSize + 1 : m_slice->m_sps->picWidthInLumaSamples / m_slice->m_param->maxCUSize; |
2387 | | |
2388 | | uint32_t partIdxLT = m_absIdxInCTU; |
2389 | | uint32_t partIdxLB = g_rasterToZscan[g_zscanToRaster[m_absIdxInCTU] + (((1 << (m_log2CUSize[0] - LOG2_UNIT_SIZE - 1)) - 1) << LOG2_RASTER_SIZE)]; |
2390 | | uint32_t partIdxRT = g_rasterToZscan[g_zscanToRaster[partIdxLT] + (1 << (m_log2CUSize[0] - LOG2_UNIT_SIZE)) - 1]; |
2391 | | |
2392 | | left = above = 0; |
2393 | | |
2394 | | MvPred[0] = m_lastIntraBCMv[0]; |
2395 | | if (MvPred[0] != MV(0, 0)) |
2396 | | { |
2397 | | nbPred++; |
2398 | | if (getDerivedBV(absPartIdx, MvPred[nbPred - 1], MvPred[nbPred], width, height)) |
2399 | | nbPred++; |
2400 | | } |
2401 | | MvPred[nbPred] = m_lastIntraBCMv[1]; |
2402 | | if (MvPred[nbPred] != MV(0, 0)) |
2403 | | { |
2404 | | nbPred++; |
2405 | | if (getDerivedBV(absPartIdx, MvPred[nbPred - 1], MvPred[nbPred], width, height)) |
2406 | | nbPred++; |
2407 | | } |
2408 | | |
2409 | | //left |
2410 | | const CUData* leftCU = getPULeft(tempPartIdx, partIdxLB); |
2411 | | left = leftCU ? isIntraBC(leftCU, tempPartIdx) : 0; |
2412 | | |
2413 | | if (left) |
2414 | | { |
2415 | | leftCU->getMvField(leftCU, tempPartIdx, 0, tempMvField); |
2416 | | MvPred[nbPred++] = tempMvField.mv; |
2417 | | if (getDerivedBV(absPartIdx, MvPred[nbPred - 1], MvPred[nbPred], width, height)) |
2418 | | { |
2419 | | nbPred++; |
2420 | | } |
2421 | | } |
2422 | | |
2423 | | //above |
2424 | | const CUData* aboveCU = getPUAbove(tempPartIdx, partIdxRT); |
2425 | | above = aboveCU ? isIntraBC(aboveCU, tempPartIdx) : 0; |
2426 | | |
2427 | | if (above) |
2428 | | { |
2429 | | aboveCU->getMvField(aboveCU, tempPartIdx, 0, tempMvField); |
2430 | | MvPred[nbPred++] = tempMvField.mv; |
2431 | | if (getDerivedBV(absPartIdx, MvPred[nbPred - 1], MvPred[nbPred], width, height)) |
2432 | | { |
2433 | | nbPred++; |
2434 | | } |
2435 | | } |
2436 | | |
2437 | | if (m_slice->isOnlyCurrentPictureAsReference()) |
2438 | | { |
2439 | | MV mvCol; |
2440 | | bool isColAvail = false; |
2441 | | if (m_absIdxInCTU && m_slice->m_lastEncPic && m_slice->m_lastEncPic->m_poc < m_slice->m_poc) |
2442 | | { |
2443 | | uint32_t partIdxRB; |
2444 | | partIdxRB = deriveRightBottomIdx(puIdx); |
2445 | | |
2446 | | uint32_t absPartIdxTmp = g_zscanToRaster[partIdxRB]; |
2447 | | uint32_t absPartAddr = m_absIdxInCTU + absPartIdx; |
2448 | | int iLCUIdx = -1; |
2449 | | |
2450 | | if (((m_encData->getPicCTU(m_cuAddr)->m_cuPelX + g_zscanToPelX[g_rasterToZscan[absPartIdxTmp]] + 4) < m_slice->m_sps->picWidthInLumaSamples) // image boundary check |
2451 | | && ((m_encData->getPicCTU(m_cuAddr)->m_cuPelY + g_zscanToPelY[g_rasterToZscan[absPartIdxTmp]] + 4) < m_slice->m_sps->picHeightInLumaSamples)) |
2452 | | { |
2453 | | if ((absPartIdxTmp % numPartInCUWidth < numPartInCUWidth - 1) && // is not at the last column of LCU |
2454 | | (absPartIdxTmp / numPartInCUWidth < s_numPartInCUSize - 1)) // is not at the last row of LCU |
2455 | | { |
2456 | | absPartAddr = g_rasterToZscan[absPartIdxTmp + numPartInCUWidth + 1]; |
2457 | | iLCUIdx = m_cuAddr; |
2458 | | } |
2459 | | else if (absPartIdxTmp % numPartInCUWidth < numPartInCUWidth - 1) // is not at the last column of CTU But is last row of CTU |
2460 | | { |
2461 | | absPartAddr = g_rasterToZscan[(absPartIdxTmp + numPartInCUWidth + 1) % m_numPartitionsInCtu]; |
2462 | | iLCUIdx = m_cuAddr + m_frameWidthInCtus; |
2463 | | } |
2464 | | else if (absPartIdxTmp / numPartInCUWidth < s_numPartInCUSize - 1) // is not at the last row of CTU But is last column of CTU |
2465 | | { |
2466 | | absPartAddr = g_rasterToZscan[absPartIdxTmp + 1]; |
2467 | | iLCUIdx = m_cuAddr + 1; |
2468 | | } |
2469 | | } |
2470 | | if (iLCUIdx >= 0) |
2471 | | { |
2472 | | isColAvail = getColMVPIBC(iLCUIdx, absPartAddr, mvCol); |
2473 | | |
2474 | | if (!isColAvail) |
2475 | | { |
2476 | | uint32_t uiPartIdxCenter; |
2477 | | uiPartIdxCenter = deriveCenterIdx(puIdx); |
2478 | | isColAvail = getColMVPIBC(m_cuAddr, uiPartIdxCenter, mvCol); |
2479 | | } |
2480 | | } |
2481 | | } |
2482 | | if (isColAvail) |
2483 | | { |
2484 | | MvPred[nbPred++] = mvCol; |
2485 | | if (getDerivedBV(absPartIdx, MvPred[nbPred - 1], MvPred[nbPred], width, height)) |
2486 | | { |
2487 | | nbPred++; |
2488 | | } |
2489 | | } |
2490 | | } |
2491 | | |
2492 | | // Below Left predictor search |
2493 | | const CUData* tempBelowLeftCU = getPUBelowLeft(tempPartIdx, partIdxLB); |
2494 | | uint32_t belowLeft = (tempBelowLeftCU) ? tempBelowLeftCU->isIntraBC(tempBelowLeftCU, tempPartIdx) : 0; |
2495 | | if (belowLeft) |
2496 | | { |
2497 | | tempBelowLeftCU->getMvField(tempBelowLeftCU, tempPartIdx, 0, tempMvField); |
2498 | | MvPred[nbPred++] = tempMvField.mv; |
2499 | | if (getDerivedBV(absPartIdx, MvPred[nbPred - 1], MvPred[nbPred], width, height)) |
2500 | | { |
2501 | | nbPred++; |
2502 | | } |
2503 | | } |
2504 | | |
2505 | | // Above Right predictor search |
2506 | | const CUData* tempAboveRightCU = getPUAboveRight(tempPartIdx, partIdxRT); |
2507 | | uint32_t aboveRight = (tempAboveRightCU) ? tempAboveRightCU->isIntraBC(tempAboveRightCU, tempPartIdx) : 0; |
2508 | | if (aboveRight) |
2509 | | { |
2510 | | tempAboveRightCU->getMvField(tempAboveRightCU, tempPartIdx, 0, tempMvField); |
2511 | | MvPred[nbPred++] = tempMvField.mv; |
2512 | | if (getDerivedBV(absPartIdx, MvPred[nbPred - 1], MvPred[nbPred], width, height)) |
2513 | | { |
2514 | | nbPred++; |
2515 | | } |
2516 | | } |
2517 | | |
2518 | | // Above Left predictor search |
2519 | | const CUData* tempAboveLeftCU = getPUAboveLeft(tempPartIdx, partIdxLT); |
2520 | | uint32_t aboveLeft = (tempAboveLeftCU) ? tempAboveLeftCU->isIntraBC(tempAboveLeftCU, tempPartIdx) : 0; |
2521 | | if (aboveLeft) |
2522 | | { |
2523 | | tempAboveLeftCU->getMvField(tempAboveLeftCU, tempPartIdx, 0, tempMvField); |
2524 | | MvPred[nbPred++] = tempMvField.mv; |
2525 | | if (getDerivedBV(absPartIdx, MvPred[nbPred - 1], MvPred[nbPred], width, height)) |
2526 | | { |
2527 | | nbPred++; |
2528 | | } |
2529 | | } |
2530 | | } |
2531 | | |
2532 | | void CUData::roundMergeCandidates(MVField(*pcMvFieldNeighbours)[2], int iCount) const |
2533 | | { |
2534 | | if (m_slice->m_useIntegerMv) |
2535 | | { |
2536 | | for (int i = 0; i < iCount; i++) |
2537 | | { |
2538 | | pcMvFieldNeighbours[i][0].mv = (pcMvFieldNeighbours[i][0].mv >> 2) << 2; |
2539 | | pcMvFieldNeighbours[i][0].refIdx = pcMvFieldNeighbours[i][0].refIdx; |
2540 | | } |
2541 | | } |
2542 | | else |
2543 | | { |
2544 | | for (int i = 0; i < iCount; i++) |
2545 | | { |
2546 | | int iCurrRefIdx = pcMvFieldNeighbours[i][0].refIdx; |
2547 | | if (iCurrRefIdx >= 0) |
2548 | | { |
2549 | | if (m_slice->m_refFrameList[0][iCurrRefIdx]->m_poc == m_slice->m_poc) |
2550 | | { |
2551 | | pcMvFieldNeighbours[i][0].mv = (pcMvFieldNeighbours[i][0].mv >> 2) << 2; |
2552 | | pcMvFieldNeighbours[i][0].refIdx = pcMvFieldNeighbours[i][0].refIdx; |
2553 | | } |
2554 | | } |
2555 | | } |
2556 | | } |
2557 | | } |
2558 | | |
2559 | | bool CUData::is8x8BipredRestriction(MV mvL0, MV mvL1, int iRefIdxL0, int iRefIdxL1) const |
2560 | | { |
2561 | | if (iRefIdxL0 < -1 || iRefIdxL0 >= MAX_NUM_REF) |
2562 | | { |
2563 | | iRefIdxL0 = -1; |
2564 | | } |
2565 | | if (iRefIdxL1 < -1 || iRefIdxL1 >= MAX_NUM_REF) |
2566 | | { |
2567 | | iRefIdxL1 = -1; |
2568 | | } |
2569 | | bool b8x8BiPredRestricted = false; |
2570 | | int RefPOCL0 = -1; |
2571 | | int RefPOCL1 = -1; |
2572 | | if (iRefIdxL0 >= 0 && iRefIdxL1 >= 0) |
2573 | | { |
2574 | | RefPOCL0 = m_slice->m_refPOCList[0][iRefIdxL0]; |
2575 | | RefPOCL1 = m_slice->m_refPOCList[1][iRefIdxL1]; |
2576 | | bool mvL0Int = (((mvL0.x & 0x3) == 0) && ((mvL0.y & 0x3) == 0)); |
2577 | | bool mvL1Int = (((mvL1.x & 0x3) == 0) && ((mvL1.y & 0x3) == 0)); |
2578 | | bool IdenticalMV = ((mvL0 == mvL1) && (RefPOCL0 == RefPOCL1)); |
2579 | | b8x8BiPredRestricted = ( |
2580 | | !mvL0Int && !mvL1Int && !IdenticalMV && |
2581 | | (m_slice->m_param->bEnableSCC) |
2582 | | && (m_slice->m_bUseSao || !m_slice->m_pps->bPicDisableDeblockingFilter || 0)); |
2583 | | } |
2584 | | return b8x8BiPredRestricted; |
2585 | | } |
2586 | | #endif |