/src/x265/source/common/cudata.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | /***************************************************************************** |
2 | | * Copyright (C) 2013-2020 MulticoreWare, Inc |
3 | | * |
4 | | * Authors: Steve Borho <steve@borho.org> |
5 | | * Min Chen <chenm003@163.com> |
6 | | * |
7 | | * This program is free software; you can redistribute it and/or modify |
8 | | * it under the terms of the GNU General Public License as published by |
9 | | * the Free Software Foundation; either version 2 of the License, or |
10 | | * (at your option) any later version. |
11 | | * |
12 | | * This program is distributed in the hope that it will be useful, |
13 | | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
14 | | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
15 | | * GNU General Public License for more details. |
16 | | * |
17 | | * You should have received a copy of the GNU General Public License |
18 | | * along with this program; if not, write to the Free Software |
19 | | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA. |
20 | | * |
21 | | * This program is also available under a commercial proprietary license. |
22 | | * For more information, contact us at license @ x265.com. |
23 | | *****************************************************************************/ |
24 | | |
25 | | #include "common.h" |
26 | | #include "frame.h" |
27 | | #include "framedata.h" |
28 | | #include "picyuv.h" |
29 | | #include "mv.h" |
30 | | #include "cudata.h" |
31 | | #define MAX_MV 1 << 14 |
32 | | |
33 | | using namespace X265_NS; |
34 | | |
35 | | /* for all bcast* and copy* functions, dst and src are aligned to MIN(size, 32) */ |
36 | | |
37 | 14.7M | static void bcast1(uint8_t* dst, uint8_t val) { dst[0] = val; } |
38 | | |
39 | 14.8M | static void copy4(uint8_t* dst, uint8_t* src) { ((uint32_t*)dst)[0] = ((uint32_t*)src)[0]; } |
40 | 16.1M | static void bcast4(uint8_t* dst, uint8_t val) { ((uint32_t*)dst)[0] = 0x01010101u * val; } |
41 | | |
42 | 3.70M | static void copy16(uint8_t* dst, uint8_t* src) { ((uint64_t*)dst)[0] = ((uint64_t*)src)[0]; ((uint64_t*)dst)[1] = ((uint64_t*)src)[1]; } |
43 | 3.39M | static void bcast16(uint8_t* dst, uint8_t val) { uint64_t bval = 0x0101010101010101ULL * val; ((uint64_t*)dst)[0] = bval; ((uint64_t*)dst)[1] = bval; } |
44 | | |
45 | 945k | static void copy64(uint8_t* dst, uint8_t* src) { ((uint64_t*)dst)[0] = ((uint64_t*)src)[0]; ((uint64_t*)dst)[1] = ((uint64_t*)src)[1]; |
46 | 945k | ((uint64_t*)dst)[2] = ((uint64_t*)src)[2]; ((uint64_t*)dst)[3] = ((uint64_t*)src)[3]; |
47 | 945k | ((uint64_t*)dst)[4] = ((uint64_t*)src)[4]; ((uint64_t*)dst)[5] = ((uint64_t*)src)[5]; |
48 | 945k | ((uint64_t*)dst)[6] = ((uint64_t*)src)[6]; ((uint64_t*)dst)[7] = ((uint64_t*)src)[7]; } |
49 | 754k | static void bcast64(uint8_t* dst, uint8_t val) { uint64_t bval = 0x0101010101010101ULL * val; |
50 | 754k | ((uint64_t*)dst)[0] = bval; ((uint64_t*)dst)[1] = bval; ((uint64_t*)dst)[2] = bval; ((uint64_t*)dst)[3] = bval; |
51 | 754k | ((uint64_t*)dst)[4] = bval; ((uint64_t*)dst)[5] = bval; ((uint64_t*)dst)[6] = bval; ((uint64_t*)dst)[7] = bval; } |
52 | | |
53 | | /* at 256 bytes, memset/memcpy will probably use SIMD more effectively than our uint64_t hack, |
54 | | * but hand-written assembly would beat it. */ |
55 | 136k | static void copy256(uint8_t* dst, uint8_t* src) { memcpy(dst, src, 256); } |
56 | 99.1k | static void bcast256(uint8_t* dst, uint8_t val) { memset(dst, val, 256); } |
57 | | |
58 | | namespace { |
59 | | // file private namespace |
60 | | |
61 | | /* Check whether 2 addresses point to the same column */ |
62 | | inline bool isEqualCol(int addrA, int addrB) |
63 | 42.1M | { |
64 | 42.1M | return ((addrA ^ addrB) & (RASTER_SIZE - 1)) == 0; |
65 | 42.1M | } |
66 | | |
67 | | /* Check whether 2 addresses point to the same row */ |
68 | | inline bool isEqualRow(int addrA, int addrB) |
69 | 42.0M | { |
70 | 42.0M | return ((addrA ^ addrB) < RASTER_SIZE); |
71 | 42.0M | } |
72 | | |
73 | | /* Check whether 2 addresses point to the same row or column */ |
74 | | inline bool isEqualRowOrCol(int addrA, int addrB) |
75 | 20.5M | { |
76 | 20.5M | return isEqualCol(addrA, addrB) | isEqualRow(addrA, addrB); |
77 | 20.5M | } |
78 | | |
79 | | /* Check whether one address points to the first column */ |
80 | | inline bool isZeroCol(int addr) |
81 | 52.4M | { |
82 | 52.4M | return (addr & (RASTER_SIZE - 1)) == 0; |
83 | 52.4M | } |
84 | | |
85 | | /* Check whether one address points to the first row */ |
86 | | inline bool isZeroRow(int addr) |
87 | 54.2M | { |
88 | 54.2M | return (addr < RASTER_SIZE); |
89 | 54.2M | } |
90 | | |
91 | | /* Check whether one address points to a column whose index is smaller than a given value */ |
92 | | inline bool lessThanCol(int addr, int val) |
93 | 17.8M | { |
94 | 17.8M | return (addr & (RASTER_SIZE - 1)) < val; |
95 | 17.8M | } |
96 | | |
97 | | /* Check whether one address points to a row whose index is smaller than a given value */ |
98 | | inline bool lessThanRow(int addr, int val) |
99 | 17.7M | { |
100 | | // addr / numUnits < val |
101 | 17.7M | return (addr >> LOG2_RASTER_SIZE) < val; |
102 | 17.7M | } |
103 | | |
104 | | inline MV scaleMv(MV mv, int scale) |
105 | 0 | { |
106 | 0 | int mvx = x265_clip3(-32768, 32767, (scale * mv.x + 127 + (scale * mv.x < 0)) >> 8); |
107 | 0 | int mvy = x265_clip3(-32768, 32767, (scale * mv.y + 127 + (scale * mv.y < 0)) >> 8); |
108 | |
|
109 | 0 | return MV((int32_t)mvx, (int32_t)mvy); |
110 | 0 | } |
111 | | |
112 | | } |
113 | | |
114 | | CUData::CUData() |
115 | 1.28M | { |
116 | 1.28M | memset(this, 0, sizeof(*this)); |
117 | 1.28M | } |
118 | | |
119 | | void CUData::initialize(const CUDataMemPool& dataPool, uint32_t depth, const x265_param& param, int instance) |
120 | 1.11M | { |
121 | 1.11M | int csp = param.internalCsp; |
122 | 1.11M | m_chromaFormat = csp; |
123 | 1.11M | m_hChromaShift = CHROMA_H_SHIFT(csp); |
124 | 1.11M | m_vChromaShift = CHROMA_V_SHIFT(csp); |
125 | 1.11M | m_numPartitions = param.num4x4Partitions >> (depth * 2); |
126 | | |
127 | 1.11M | if (!s_partSet[0]) |
128 | 1.11M | { |
129 | 1.11M | s_numPartInCUSize = 1 << param.unitSizeDepth; |
130 | 1.11M | switch (param.maxLog2CUSize) |
131 | 1.11M | { |
132 | 884k | case 6: |
133 | 884k | s_partSet[0] = bcast256; |
134 | 884k | s_partSet[1] = bcast64; |
135 | 884k | s_partSet[2] = bcast16; |
136 | 884k | s_partSet[3] = bcast4; |
137 | 884k | s_partSet[4] = bcast1; |
138 | 884k | break; |
139 | 92.9k | case 5: |
140 | 92.9k | s_partSet[0] = bcast64; |
141 | 92.9k | s_partSet[1] = bcast16; |
142 | 92.9k | s_partSet[2] = bcast4; |
143 | 92.9k | s_partSet[3] = bcast1; |
144 | 92.9k | s_partSet[4] = NULL; |
145 | 92.9k | break; |
146 | 140k | case 4: |
147 | 140k | s_partSet[0] = bcast16; |
148 | 140k | s_partSet[1] = bcast4; |
149 | 140k | s_partSet[2] = bcast1; |
150 | 140k | s_partSet[3] = NULL; |
151 | 140k | s_partSet[4] = NULL; |
152 | 140k | break; |
153 | 0 | default: |
154 | 0 | X265_CHECK(0, "unexpected CTU size\n"); |
155 | 0 | break; |
156 | 1.11M | } |
157 | 1.11M | } |
158 | | |
159 | 1.11M | switch (m_numPartitions) |
160 | 1.11M | { |
161 | 225k | case 256: // 64x64 CU |
162 | 225k | m_partCopy = copy256; |
163 | 225k | m_partSet = bcast256; |
164 | 225k | m_subPartCopy = copy64; |
165 | 225k | m_subPartSet = bcast64; |
166 | 225k | break; |
167 | 251k | case 64: // 32x32 CU |
168 | 251k | m_partCopy = copy64; |
169 | 251k | m_partSet = bcast64; |
170 | 251k | m_subPartCopy = copy16; |
171 | 251k | m_subPartSet = bcast16; |
172 | 251k | break; |
173 | 324k | case 16: // 16x16 CU |
174 | 324k | m_partCopy = copy16; |
175 | 324k | m_partSet = bcast16; |
176 | 324k | m_subPartCopy = copy4; |
177 | 324k | m_subPartSet = bcast4; |
178 | 324k | break; |
179 | 317k | case 4: // 8x8 CU |
180 | 317k | m_partCopy = copy4; |
181 | 317k | m_partSet = bcast4; |
182 | 317k | m_subPartCopy = NULL; |
183 | 317k | m_subPartSet = NULL; |
184 | 317k | break; |
185 | 0 | default: |
186 | 0 | X265_CHECK(0, "unexpected CU partition count\n"); |
187 | 0 | break; |
188 | 1.11M | } |
189 | | |
190 | 1.11M | if (csp == X265_CSP_I400) |
191 | 0 | { |
192 | | /* Each CU's data is layed out sequentially within the charMemBlock */ |
193 | 0 | uint8_t *charBuf = dataPool.charMemBlock + (m_numPartitions * (BytesPerPartition - 4)) * instance; |
194 | |
|
195 | 0 | m_qp = (int8_t*)charBuf; charBuf += m_numPartitions; |
196 | 0 | m_qpAnalysis = (int8_t*)charBuf; charBuf += m_numPartitions; |
197 | 0 | m_log2CUSize = charBuf; charBuf += m_numPartitions; |
198 | 0 | m_lumaIntraDir = charBuf; charBuf += m_numPartitions; |
199 | 0 | m_tqBypass = charBuf; charBuf += m_numPartitions; |
200 | 0 | m_refIdx[0] = (int8_t*)charBuf; charBuf += m_numPartitions; |
201 | 0 | m_refIdx[1] = (int8_t*)charBuf; charBuf += m_numPartitions; |
202 | 0 | m_cuDepth = charBuf; charBuf += m_numPartitions; |
203 | 0 | m_predMode = charBuf; charBuf += m_numPartitions; /* the order up to here is important in initCTU() and initSubCU() */ |
204 | 0 | m_partSize = charBuf; charBuf += m_numPartitions; |
205 | 0 | m_skipFlag[0] = charBuf; charBuf += m_numPartitions; |
206 | 0 | m_skipFlag[1] = charBuf; charBuf += m_numPartitions; |
207 | 0 | m_mergeFlag = charBuf; charBuf += m_numPartitions; |
208 | 0 | m_interDir = charBuf; charBuf += m_numPartitions; |
209 | 0 | m_mvpIdx[0] = charBuf; charBuf += m_numPartitions; |
210 | 0 | m_mvpIdx[1] = charBuf; charBuf += m_numPartitions; |
211 | 0 | m_tuDepth = charBuf; charBuf += m_numPartitions; |
212 | 0 | m_transformSkip[0] = charBuf; charBuf += m_numPartitions; |
213 | 0 | m_cbf[0] = charBuf; charBuf += m_numPartitions; |
214 | 0 | m_chromaIntraDir = charBuf; charBuf += m_numPartitions; |
215 | |
|
216 | 0 | X265_CHECK(charBuf == dataPool.charMemBlock + (m_numPartitions * (BytesPerPartition - 4)) * (instance + 1), "CU data layout is broken\n"); //BytesPerPartition |
217 | |
|
218 | 0 | m_mv[0] = dataPool.mvMemBlock + (instance * 4) * m_numPartitions; |
219 | 0 | m_mv[1] = m_mv[0] + m_numPartitions; |
220 | 0 | m_mvd[0] = m_mv[1] + m_numPartitions; |
221 | 0 | m_mvd[1] = m_mvd[0] + m_numPartitions; |
222 | |
|
223 | 0 | m_distortion = dataPool.distortionMemBlock + instance * m_numPartitions; |
224 | |
|
225 | 0 | uint32_t cuSize = param.maxCUSize >> depth; |
226 | 0 | m_trCoeff[0] = dataPool.trCoeffMemBlock + instance * (cuSize * cuSize); |
227 | 0 | m_trCoeff[1] = m_trCoeff[2] = 0; |
228 | 0 | m_transformSkip[1] = m_transformSkip[2] = m_cbf[1] = m_cbf[2] = 0; |
229 | 0 | m_fAc_den[0] = m_fDc_den[0] = 0; |
230 | 0 | } |
231 | 1.11M | else |
232 | 1.11M | { |
233 | | /* Each CU's data is layed out sequentially within the charMemBlock */ |
234 | 1.11M | uint8_t *charBuf = dataPool.charMemBlock + (m_numPartitions * BytesPerPartition) * instance; |
235 | | |
236 | 1.11M | m_qp = (int8_t*)charBuf; charBuf += m_numPartitions; |
237 | 1.11M | m_qpAnalysis = (int8_t*)charBuf; charBuf += m_numPartitions; |
238 | 1.11M | m_log2CUSize = charBuf; charBuf += m_numPartitions; |
239 | 1.11M | m_lumaIntraDir = charBuf; charBuf += m_numPartitions; |
240 | 1.11M | m_tqBypass = charBuf; charBuf += m_numPartitions; |
241 | 1.11M | m_refIdx[0] = (int8_t*)charBuf; charBuf += m_numPartitions; |
242 | 1.11M | m_refIdx[1] = (int8_t*)charBuf; charBuf += m_numPartitions; |
243 | 1.11M | m_cuDepth = charBuf; charBuf += m_numPartitions; |
244 | 1.11M | m_predMode = charBuf; charBuf += m_numPartitions; /* the order up to here is important in initCTU() and initSubCU() */ |
245 | 1.11M | m_partSize = charBuf; charBuf += m_numPartitions; |
246 | 1.11M | m_skipFlag[0] = charBuf; charBuf += m_numPartitions; |
247 | 1.11M | m_skipFlag[1] = charBuf; charBuf += m_numPartitions; |
248 | 1.11M | m_mergeFlag = charBuf; charBuf += m_numPartitions; |
249 | 1.11M | m_interDir = charBuf; charBuf += m_numPartitions; |
250 | 1.11M | m_mvpIdx[0] = charBuf; charBuf += m_numPartitions; |
251 | 1.11M | m_mvpIdx[1] = charBuf; charBuf += m_numPartitions; |
252 | 1.11M | m_tuDepth = charBuf; charBuf += m_numPartitions; |
253 | 1.11M | m_transformSkip[0] = charBuf; charBuf += m_numPartitions; |
254 | 1.11M | m_transformSkip[1] = charBuf; charBuf += m_numPartitions; |
255 | 1.11M | m_transformSkip[2] = charBuf; charBuf += m_numPartitions; |
256 | 1.11M | m_cbf[0] = charBuf; charBuf += m_numPartitions; |
257 | 1.11M | m_cbf[1] = charBuf; charBuf += m_numPartitions; |
258 | 1.11M | m_cbf[2] = charBuf; charBuf += m_numPartitions; |
259 | 1.11M | m_chromaIntraDir = charBuf; charBuf += m_numPartitions; |
260 | | |
261 | 1.11M | X265_CHECK(charBuf == dataPool.charMemBlock + (m_numPartitions * BytesPerPartition) * (instance + 1), "CU data layout is broken\n"); |
262 | | |
263 | 1.11M | m_mv[0] = dataPool.mvMemBlock + (instance * 4) * m_numPartitions; |
264 | 1.11M | m_mv[1] = m_mv[0] + m_numPartitions; |
265 | 1.11M | m_mvd[0] = m_mv[1] + m_numPartitions; |
266 | 1.11M | m_mvd[1] = m_mvd[0] + m_numPartitions; |
267 | | |
268 | 1.11M | m_distortion = dataPool.distortionMemBlock + instance * m_numPartitions; |
269 | | |
270 | 1.11M | uint32_t cuSize = param.maxCUSize >> depth; |
271 | 1.11M | uint32_t sizeL = cuSize * cuSize; |
272 | 1.11M | uint32_t sizeC = sizeL >> (m_hChromaShift + m_vChromaShift); // block chroma part |
273 | 1.11M | m_trCoeff[0] = dataPool.trCoeffMemBlock + instance * (sizeL + sizeC * 2); |
274 | 1.11M | m_trCoeff[1] = m_trCoeff[0] + sizeL; |
275 | 1.11M | m_trCoeff[2] = m_trCoeff[0] + sizeL + sizeC; |
276 | 4.47M | for (int i = 0; i < 3; i++) |
277 | 3.35M | m_fAc_den[i] = m_fDc_den[i] = 0; |
278 | 1.11M | } |
279 | 1.11M | } |
280 | | |
281 | | void CUData::initCTU(const Frame& frame, uint32_t cuAddr, int qp, uint32_t firstRowInSlice, uint32_t lastRowInSlice, uint32_t lastCuInSlice) |
282 | 13.9k | { |
283 | 13.9k | m_encData = frame.m_encData; |
284 | 13.9k | m_slice = m_encData->m_slice; |
285 | 13.9k | m_cuAddr = cuAddr; |
286 | 13.9k | m_cuPelX = (cuAddr % m_slice->m_sps->numCuInWidth) << m_slice->m_param->maxLog2CUSize; |
287 | 13.9k | m_cuPelY = (cuAddr / m_slice->m_sps->numCuInWidth) << m_slice->m_param->maxLog2CUSize; |
288 | 13.9k | m_absIdxInCTU = 0; |
289 | 13.9k | m_numPartitions = m_encData->m_param->num4x4Partitions; |
290 | 13.9k | m_bFirstRowInSlice = (uint8_t)firstRowInSlice; |
291 | 13.9k | m_bLastRowInSlice = (uint8_t)lastRowInSlice; |
292 | 13.9k | m_bLastCuInSlice = (uint8_t)lastCuInSlice; |
293 | | |
294 | | /* sequential memsets */ |
295 | 13.9k | m_partSet((uint8_t*)m_qp, (uint8_t)qp); |
296 | 13.9k | m_partSet((uint8_t*)m_qpAnalysis, (uint8_t)qp); |
297 | 13.9k | m_partSet(m_log2CUSize, (uint8_t)m_slice->m_param->maxLog2CUSize); |
298 | 13.9k | m_partSet(m_lumaIntraDir, (uint8_t)ALL_IDX); |
299 | 13.9k | m_partSet(m_chromaIntraDir, (uint8_t)ALL_IDX); |
300 | 13.9k | m_partSet(m_tqBypass, (uint8_t)frame.m_encData->m_param->bLossless); |
301 | 13.9k | if (m_slice->m_sliceType != I_SLICE) |
302 | 0 | { |
303 | 0 | m_partSet((uint8_t*)m_refIdx[0], (uint8_t)REF_NOT_VALID); |
304 | 0 | m_partSet((uint8_t*)m_refIdx[1], (uint8_t)REF_NOT_VALID); |
305 | 0 | } |
306 | | |
307 | 13.9k | X265_CHECK(!(frame.m_encData->m_param->bLossless && !m_slice->m_pps->bTransquantBypassEnabled), "lossless enabled without TQbypass in PPS\n"); |
308 | | |
309 | | /* initialize the remaining CU data in one memset */ |
310 | 13.9k | memset(m_cuDepth, 0, (frame.m_param->internalCsp == X265_CSP_I400 ? BytesPerPartition - 12 : BytesPerPartition - 8) * m_numPartitions); |
311 | | |
312 | 307k | for (int8_t i = 0; i < NUM_TU_DEPTH; i++) |
313 | 293k | m_refTuDepth[i] = -1; |
314 | | |
315 | 13.9k | m_vbvAffected = false; |
316 | | |
317 | 13.9k | uint32_t widthInCU = m_slice->m_sps->numCuInWidth; |
318 | 13.9k | m_cuLeft = (m_cuAddr % widthInCU) ? m_encData->getPicCTU(m_cuAddr - 1) : NULL; |
319 | 13.9k | m_cuAbove = (m_cuAddr >= widthInCU) && !m_bFirstRowInSlice ? m_encData->getPicCTU(m_cuAddr - widthInCU) : NULL; |
320 | 13.9k | m_cuAboveLeft = (m_cuLeft && m_cuAbove) ? m_encData->getPicCTU(m_cuAddr - widthInCU - 1) : NULL; |
321 | 13.9k | m_cuAboveRight = (m_cuAbove && ((m_cuAddr % widthInCU) < (widthInCU - 1))) ? m_encData->getPicCTU(m_cuAddr - widthInCU + 1) : NULL; |
322 | 13.9k | memset(m_distortion, 0, m_numPartitions * sizeof(sse_t)); |
323 | 13.9k | } |
324 | | |
325 | | // initialize Sub partition |
326 | | void CUData::initSubCU(const CUData& ctu, const CUGeom& cuGeom, int qp) |
327 | 890k | { |
328 | 890k | m_absIdxInCTU = cuGeom.absPartIdx; |
329 | 890k | m_encData = ctu.m_encData; |
330 | 890k | m_slice = ctu.m_slice; |
331 | 890k | m_cuAddr = ctu.m_cuAddr; |
332 | 890k | m_cuPelX = ctu.m_cuPelX + g_zscanToPelX[cuGeom.absPartIdx]; |
333 | 890k | m_cuPelY = ctu.m_cuPelY + g_zscanToPelY[cuGeom.absPartIdx]; |
334 | 890k | m_cuLeft = ctu.m_cuLeft; |
335 | 890k | m_cuAbove = ctu.m_cuAbove; |
336 | 890k | m_cuAboveLeft = ctu.m_cuAboveLeft; |
337 | 890k | m_cuAboveRight = ctu.m_cuAboveRight; |
338 | 890k | m_bFirstRowInSlice = ctu.m_bFirstRowInSlice; |
339 | 890k | m_bLastRowInSlice = ctu.m_bLastRowInSlice; |
340 | 890k | m_bLastCuInSlice = ctu.m_bLastCuInSlice; |
341 | 3.56M | for (int i = 0; i < 3; i++) |
342 | 2.67M | { |
343 | 2.67M | m_fAc_den[i] = ctu.m_fAc_den[i]; |
344 | 2.67M | m_fDc_den[i] = ctu.m_fDc_den[i]; |
345 | 2.67M | } |
346 | | |
347 | 890k | X265_CHECK(m_numPartitions == cuGeom.numPartitions, "initSubCU() size mismatch\n"); |
348 | | |
349 | 890k | m_partSet((uint8_t*)m_qp, (uint8_t)qp); |
350 | 890k | m_partSet((uint8_t*)m_qpAnalysis, (uint8_t)qp); |
351 | | |
352 | 890k | m_partSet(m_log2CUSize, (uint8_t)cuGeom.log2CUSize); |
353 | 890k | m_partSet(m_lumaIntraDir, (uint8_t)ALL_IDX); |
354 | 890k | m_partSet(m_chromaIntraDir, (uint8_t)ALL_IDX); |
355 | 890k | m_partSet(m_tqBypass, (uint8_t)m_encData->m_param->bLossless); |
356 | 890k | m_partSet((uint8_t*)m_refIdx[0], (uint8_t)REF_NOT_VALID); |
357 | 890k | m_partSet((uint8_t*)m_refIdx[1], (uint8_t)REF_NOT_VALID); |
358 | 890k | m_partSet(m_cuDepth, (uint8_t)cuGeom.depth); |
359 | | |
360 | | /* initialize the remaining CU data in one memset */ |
361 | 890k | memset(m_predMode, 0, (ctu.m_chromaFormat == X265_CSP_I400 ? BytesPerPartition - 13 : BytesPerPartition - 9) * m_numPartitions); |
362 | 890k | memset(m_distortion, 0, m_numPartitions * sizeof(sse_t)); |
363 | 890k | } |
364 | | |
365 | | /* Copy the results of a sub-part (split) CU to the parent CU */ |
366 | | void CUData::copyPartFrom(const CUData& subCU, const CUGeom& childGeom, uint32_t subPartIdx) |
367 | 440k | { |
368 | 440k | X265_CHECK(subPartIdx < 4, "part unit should be less than 4\n"); |
369 | | |
370 | 440k | uint32_t offset = childGeom.numPartitions * subPartIdx; |
371 | | |
372 | 440k | m_bFirstRowInSlice = subCU.m_bFirstRowInSlice; |
373 | 440k | m_bLastCuInSlice = subCU.m_bLastCuInSlice; |
374 | | |
375 | 440k | m_subPartCopy((uint8_t*)m_qp + offset, (uint8_t*)subCU.m_qp); |
376 | 440k | m_subPartCopy((uint8_t*)m_qpAnalysis + offset, (uint8_t*)subCU.m_qpAnalysis); |
377 | 440k | m_subPartCopy(m_log2CUSize + offset, subCU.m_log2CUSize); |
378 | 440k | m_subPartCopy(m_lumaIntraDir + offset, subCU.m_lumaIntraDir); |
379 | 440k | m_subPartCopy(m_tqBypass + offset, subCU.m_tqBypass); |
380 | 440k | m_subPartCopy((uint8_t*)m_refIdx[0] + offset, (uint8_t*)subCU.m_refIdx[0]); |
381 | 440k | m_subPartCopy((uint8_t*)m_refIdx[1] + offset, (uint8_t*)subCU.m_refIdx[1]); |
382 | 440k | m_subPartCopy(m_cuDepth + offset, subCU.m_cuDepth); |
383 | 440k | m_subPartCopy(m_predMode + offset, subCU.m_predMode); |
384 | 440k | m_subPartCopy(m_partSize + offset, subCU.m_partSize); |
385 | 440k | m_subPartCopy(m_mergeFlag + offset, subCU.m_mergeFlag); |
386 | 440k | m_subPartCopy(m_interDir + offset, subCU.m_interDir); |
387 | 440k | m_subPartCopy(m_mvpIdx[0] + offset, subCU.m_mvpIdx[0]); |
388 | 440k | m_subPartCopy(m_mvpIdx[1] + offset, subCU.m_mvpIdx[1]); |
389 | 440k | m_subPartCopy(m_tuDepth + offset, subCU.m_tuDepth); |
390 | | |
391 | 440k | m_subPartCopy(m_transformSkip[0] + offset, subCU.m_transformSkip[0]); |
392 | 440k | m_subPartCopy(m_cbf[0] + offset, subCU.m_cbf[0]); |
393 | | |
394 | 440k | memcpy(m_mv[0] + offset, subCU.m_mv[0], childGeom.numPartitions * sizeof(MV)); |
395 | 440k | memcpy(m_mv[1] + offset, subCU.m_mv[1], childGeom.numPartitions * sizeof(MV)); |
396 | 440k | memcpy(m_mvd[0] + offset, subCU.m_mvd[0], childGeom.numPartitions * sizeof(MV)); |
397 | 440k | memcpy(m_mvd[1] + offset, subCU.m_mvd[1], childGeom.numPartitions * sizeof(MV)); |
398 | | |
399 | 440k | memcpy(m_distortion + offset, subCU.m_distortion, childGeom.numPartitions * sizeof(sse_t)); |
400 | | |
401 | 440k | uint32_t tmp = 1 << ((m_slice->m_param->maxLog2CUSize - childGeom.depth) * 2); |
402 | 440k | uint32_t tmp2 = subPartIdx * tmp; |
403 | 440k | memcpy(m_trCoeff[0] + tmp2, subCU.m_trCoeff[0], sizeof(coeff_t)* tmp); |
404 | | |
405 | 440k | if (subCU.m_chromaFormat != X265_CSP_I400) |
406 | 440k | { |
407 | 440k | m_subPartCopy(m_transformSkip[1] + offset, subCU.m_transformSkip[1]); |
408 | 440k | m_subPartCopy(m_transformSkip[2] + offset, subCU.m_transformSkip[2]); |
409 | 440k | m_subPartCopy(m_cbf[1] + offset, subCU.m_cbf[1]); |
410 | 440k | m_subPartCopy(m_cbf[2] + offset, subCU.m_cbf[2]); |
411 | 440k | m_subPartCopy(m_chromaIntraDir + offset, subCU.m_chromaIntraDir); |
412 | | |
413 | 440k | uint32_t tmpC = tmp >> (m_hChromaShift + m_vChromaShift); |
414 | 440k | uint32_t tmpC2 = tmp2 >> (m_hChromaShift + m_vChromaShift); |
415 | 440k | memcpy(m_trCoeff[1] + tmpC2, subCU.m_trCoeff[1], sizeof(coeff_t) * tmpC); |
416 | 440k | memcpy(m_trCoeff[2] + tmpC2, subCU.m_trCoeff[2], sizeof(coeff_t) * tmpC); |
417 | 440k | } |
418 | 440k | } |
419 | | |
420 | | /* If a sub-CU part is not present (off the edge of the picture) its depth and |
421 | | * log2size should still be configured */ |
422 | | void CUData::setEmptyPart(const CUGeom& childGeom, uint32_t subPartIdx) |
423 | 22.7k | { |
424 | 22.7k | uint32_t offset = childGeom.numPartitions * subPartIdx; |
425 | 22.7k | m_subPartSet(m_cuDepth + offset, (uint8_t)childGeom.depth); |
426 | 22.7k | m_subPartSet(m_log2CUSize + offset, (uint8_t)childGeom.log2CUSize); |
427 | 22.7k | } |
428 | | |
429 | | /* Copy all CU data from one instance to the next, except set lossless flag |
430 | | * This will only get used when --cu-lossless is enabled but --lossless is not. */ |
431 | | void CUData::initLosslessCU(const CUData& cu, const CUGeom& cuGeom) |
432 | 0 | { |
433 | | /* Start by making an exact copy */ |
434 | 0 | m_encData = cu.m_encData; |
435 | 0 | m_slice = cu.m_slice; |
436 | 0 | m_cuAddr = cu.m_cuAddr; |
437 | 0 | m_cuPelX = cu.m_cuPelX; |
438 | 0 | m_cuPelY = cu.m_cuPelY; |
439 | 0 | m_cuLeft = cu.m_cuLeft; |
440 | 0 | m_cuAbove = cu.m_cuAbove; |
441 | 0 | m_cuAboveLeft = cu.m_cuAboveLeft; |
442 | 0 | m_cuAboveRight = cu.m_cuAboveRight; |
443 | 0 | m_absIdxInCTU = cuGeom.absPartIdx; |
444 | 0 | m_numPartitions = cuGeom.numPartitions; |
445 | 0 | memcpy(m_qp, cu.m_qp, BytesPerPartition * m_numPartitions); |
446 | 0 | memcpy(m_mv[0], cu.m_mv[0], m_numPartitions * sizeof(MV)); |
447 | 0 | memcpy(m_mv[1], cu.m_mv[1], m_numPartitions * sizeof(MV)); |
448 | 0 | memcpy(m_mvd[0], cu.m_mvd[0], m_numPartitions * sizeof(MV)); |
449 | 0 | memcpy(m_mvd[1], cu.m_mvd[1], m_numPartitions * sizeof(MV)); |
450 | 0 | memcpy(m_distortion, cu.m_distortion, m_numPartitions * sizeof(sse_t)); |
451 | | |
452 | | /* force TQBypass to true */ |
453 | 0 | m_partSet(m_tqBypass, true); |
454 | | |
455 | | /* clear residual coding flags */ |
456 | 0 | m_partSet(m_predMode, cu.m_predMode[0] & (MODE_INTRA | MODE_INTER)); |
457 | 0 | m_partSet(m_tuDepth, 0); |
458 | 0 | m_partSet(m_cbf[0], 0); |
459 | 0 | m_partSet(m_transformSkip[0], 0); |
460 | |
|
461 | 0 | if (cu.m_chromaFormat != X265_CSP_I400) |
462 | 0 | { |
463 | 0 | m_partSet(m_chromaIntraDir, (uint8_t)ALL_IDX); |
464 | 0 | m_partSet(m_cbf[1], 0); |
465 | 0 | m_partSet(m_cbf[2], 0); |
466 | 0 | m_partSet(m_transformSkip[1], 0); |
467 | 0 | m_partSet(m_transformSkip[2], 0); |
468 | 0 | } |
469 | 0 | } |
470 | | |
471 | | /* Copy completed predicted CU to CTU in picture */ |
472 | | void CUData::copyToPic(uint32_t depth) const |
473 | 454k | { |
474 | 454k | CUData& ctu = *m_encData->getPicCTU(m_cuAddr); |
475 | | |
476 | 454k | m_partCopy((uint8_t*)ctu.m_qp + m_absIdxInCTU, (uint8_t*)m_qp); |
477 | 454k | m_partCopy((uint8_t*)ctu.m_qpAnalysis + m_absIdxInCTU, (uint8_t*)m_qpAnalysis); |
478 | 454k | m_partCopy(ctu.m_log2CUSize + m_absIdxInCTU, m_log2CUSize); |
479 | 454k | m_partCopy(ctu.m_lumaIntraDir + m_absIdxInCTU, m_lumaIntraDir); |
480 | 454k | m_partCopy(ctu.m_tqBypass + m_absIdxInCTU, m_tqBypass); |
481 | 454k | m_partCopy((uint8_t*)ctu.m_refIdx[0] + m_absIdxInCTU, (uint8_t*)m_refIdx[0]); |
482 | 454k | m_partCopy((uint8_t*)ctu.m_refIdx[1] + m_absIdxInCTU, (uint8_t*)m_refIdx[1]); |
483 | 454k | m_partCopy(ctu.m_cuDepth + m_absIdxInCTU, m_cuDepth); |
484 | 454k | m_partCopy(ctu.m_predMode + m_absIdxInCTU, m_predMode); |
485 | 454k | m_partCopy(ctu.m_partSize + m_absIdxInCTU, m_partSize); |
486 | 454k | m_partCopy(ctu.m_mergeFlag + m_absIdxInCTU, m_mergeFlag); |
487 | 454k | m_partCopy(ctu.m_interDir + m_absIdxInCTU, m_interDir); |
488 | 454k | m_partCopy(ctu.m_mvpIdx[0] + m_absIdxInCTU, m_mvpIdx[0]); |
489 | 454k | m_partCopy(ctu.m_mvpIdx[1] + m_absIdxInCTU, m_mvpIdx[1]); |
490 | 454k | m_partCopy(ctu.m_tuDepth + m_absIdxInCTU, m_tuDepth); |
491 | 454k | m_partCopy(ctu.m_transformSkip[0] + m_absIdxInCTU, m_transformSkip[0]); |
492 | 454k | m_partCopy(ctu.m_cbf[0] + m_absIdxInCTU, m_cbf[0]); |
493 | | |
494 | 454k | memcpy(ctu.m_mv[0] + m_absIdxInCTU, m_mv[0], m_numPartitions * sizeof(MV)); |
495 | 454k | memcpy(ctu.m_mv[1] + m_absIdxInCTU, m_mv[1], m_numPartitions * sizeof(MV)); |
496 | 454k | memcpy(ctu.m_mvd[0] + m_absIdxInCTU, m_mvd[0], m_numPartitions * sizeof(MV)); |
497 | 454k | memcpy(ctu.m_mvd[1] + m_absIdxInCTU, m_mvd[1], m_numPartitions * sizeof(MV)); |
498 | | |
499 | 454k | memcpy(ctu.m_distortion + m_absIdxInCTU, m_distortion, m_numPartitions * sizeof(sse_t)); |
500 | | |
501 | 454k | uint32_t tmpY = 1 << ((m_slice->m_param->maxLog2CUSize - depth) * 2); |
502 | 454k | uint32_t tmpY2 = m_absIdxInCTU << (LOG2_UNIT_SIZE * 2); |
503 | 454k | memcpy(ctu.m_trCoeff[0] + tmpY2, m_trCoeff[0], sizeof(coeff_t)* tmpY); |
504 | | |
505 | 454k | if (ctu.m_chromaFormat != X265_CSP_I400) |
506 | 453k | { |
507 | 453k | m_partCopy(ctu.m_transformSkip[1] + m_absIdxInCTU, m_transformSkip[1]); |
508 | 453k | m_partCopy(ctu.m_transformSkip[2] + m_absIdxInCTU, m_transformSkip[2]); |
509 | 453k | m_partCopy(ctu.m_cbf[1] + m_absIdxInCTU, m_cbf[1]); |
510 | 453k | m_partCopy(ctu.m_cbf[2] + m_absIdxInCTU, m_cbf[2]); |
511 | 453k | m_partCopy(ctu.m_chromaIntraDir + m_absIdxInCTU, m_chromaIntraDir); |
512 | | |
513 | 453k | uint32_t tmpC = tmpY >> (m_hChromaShift + m_vChromaShift); |
514 | 453k | uint32_t tmpC2 = tmpY2 >> (m_hChromaShift + m_vChromaShift); |
515 | 453k | memcpy(ctu.m_trCoeff[1] + tmpC2, m_trCoeff[1], sizeof(coeff_t) * tmpC); |
516 | 453k | memcpy(ctu.m_trCoeff[2] + tmpC2, m_trCoeff[2], sizeof(coeff_t) * tmpC); |
517 | 453k | } |
518 | 454k | } |
519 | | |
520 | | /* The reverse of copyToPic, called only by encodeResidue */ |
521 | | void CUData::copyFromPic(const CUData& ctu, const CUGeom& cuGeom, int csp, bool copyQp) |
522 | 0 | { |
523 | 0 | m_encData = ctu.m_encData; |
524 | 0 | m_slice = ctu.m_slice; |
525 | 0 | m_cuAddr = ctu.m_cuAddr; |
526 | 0 | m_cuPelX = ctu.m_cuPelX + g_zscanToPelX[cuGeom.absPartIdx]; |
527 | 0 | m_cuPelY = ctu.m_cuPelY + g_zscanToPelY[cuGeom.absPartIdx]; |
528 | 0 | m_absIdxInCTU = cuGeom.absPartIdx; |
529 | 0 | m_numPartitions = cuGeom.numPartitions; |
530 | | |
531 | | /* copy out all prediction info for this part */ |
532 | 0 | if (copyQp) |
533 | 0 | { |
534 | 0 | m_partCopy((uint8_t*)m_qp, (uint8_t*)ctu.m_qp + m_absIdxInCTU); |
535 | 0 | m_partCopy((uint8_t*)m_qpAnalysis, (uint8_t*)ctu.m_qpAnalysis + m_absIdxInCTU); |
536 | 0 | } |
537 | |
|
538 | 0 | m_partCopy(m_log2CUSize, ctu.m_log2CUSize + m_absIdxInCTU); |
539 | 0 | m_partCopy(m_lumaIntraDir, ctu.m_lumaIntraDir + m_absIdxInCTU); |
540 | 0 | m_partCopy(m_tqBypass, ctu.m_tqBypass + m_absIdxInCTU); |
541 | 0 | m_partCopy((uint8_t*)m_refIdx[0], (uint8_t*)ctu.m_refIdx[0] + m_absIdxInCTU); |
542 | 0 | m_partCopy((uint8_t*)m_refIdx[1], (uint8_t*)ctu.m_refIdx[1] + m_absIdxInCTU); |
543 | 0 | m_partCopy(m_cuDepth, ctu.m_cuDepth + m_absIdxInCTU); |
544 | 0 | m_partSet(m_predMode, ctu.m_predMode[m_absIdxInCTU] & (MODE_INTRA | MODE_INTER)); /* clear skip flag */ |
545 | 0 | m_partCopy(m_partSize, ctu.m_partSize + m_absIdxInCTU); |
546 | 0 | m_partCopy(m_mergeFlag, ctu.m_mergeFlag + m_absIdxInCTU); |
547 | 0 | m_partCopy(m_interDir, ctu.m_interDir + m_absIdxInCTU); |
548 | 0 | m_partCopy(m_mvpIdx[0], ctu.m_mvpIdx[0] + m_absIdxInCTU); |
549 | 0 | m_partCopy(m_mvpIdx[1], ctu.m_mvpIdx[1] + m_absIdxInCTU); |
550 | 0 | m_partCopy(m_chromaIntraDir, ctu.m_chromaIntraDir + m_absIdxInCTU); |
551 | |
|
552 | 0 | memcpy(m_mv[0], ctu.m_mv[0] + m_absIdxInCTU, m_numPartitions * sizeof(MV)); |
553 | 0 | memcpy(m_mv[1], ctu.m_mv[1] + m_absIdxInCTU, m_numPartitions * sizeof(MV)); |
554 | 0 | memcpy(m_mvd[0], ctu.m_mvd[0] + m_absIdxInCTU, m_numPartitions * sizeof(MV)); |
555 | 0 | memcpy(m_mvd[1], ctu.m_mvd[1] + m_absIdxInCTU, m_numPartitions * sizeof(MV)); |
556 | |
|
557 | 0 | memcpy(m_distortion, ctu.m_distortion + m_absIdxInCTU, m_numPartitions * sizeof(sse_t)); |
558 | | |
559 | | /* clear residual coding flags */ |
560 | 0 | m_partSet(m_tuDepth, 0); |
561 | 0 | m_partSet(m_transformSkip[0], 0); |
562 | 0 | m_partSet(m_cbf[0], 0); |
563 | |
|
564 | 0 | if (csp != X265_CSP_I400) |
565 | 0 | { |
566 | 0 | m_partSet(m_transformSkip[1], 0); |
567 | 0 | m_partSet(m_transformSkip[2], 0); |
568 | 0 | m_partSet(m_cbf[1], 0); |
569 | 0 | m_partSet(m_cbf[2], 0); |
570 | 0 | } |
571 | 0 | } |
572 | | |
573 | | /* Only called by encodeResidue, these fields can be modified during inter/intra coding */ |
574 | | void CUData::updatePic(uint32_t depth, int picCsp) const |
575 | 0 | { |
576 | 0 | CUData& ctu = *m_encData->getPicCTU(m_cuAddr); |
577 | |
|
578 | 0 | m_partCopy((uint8_t*)ctu.m_qp + m_absIdxInCTU, (uint8_t*)m_qp); |
579 | 0 | m_partCopy((uint8_t*)ctu.m_qpAnalysis + m_absIdxInCTU, (uint8_t*)m_qpAnalysis); |
580 | 0 | m_partCopy(ctu.m_transformSkip[0] + m_absIdxInCTU, m_transformSkip[0]); |
581 | 0 | m_partCopy(ctu.m_predMode + m_absIdxInCTU, m_predMode); |
582 | 0 | m_partCopy(ctu.m_tuDepth + m_absIdxInCTU, m_tuDepth); |
583 | 0 | m_partCopy(ctu.m_cbf[0] + m_absIdxInCTU, m_cbf[0]); |
584 | |
|
585 | 0 | uint32_t tmpY = 1 << ((m_slice->m_param->maxLog2CUSize - depth) * 2); |
586 | 0 | uint32_t tmpY2 = m_absIdxInCTU << (LOG2_UNIT_SIZE * 2); |
587 | 0 | memcpy(ctu.m_trCoeff[0] + tmpY2, m_trCoeff[0], sizeof(coeff_t)* tmpY); |
588 | |
|
589 | 0 | if (ctu.m_chromaFormat != X265_CSP_I400 && picCsp != X265_CSP_I400) |
590 | 0 | { |
591 | 0 | m_partCopy(ctu.m_transformSkip[1] + m_absIdxInCTU, m_transformSkip[1]); |
592 | 0 | m_partCopy(ctu.m_transformSkip[2] + m_absIdxInCTU, m_transformSkip[2]); |
593 | |
|
594 | 0 | m_partCopy(ctu.m_cbf[1] + m_absIdxInCTU, m_cbf[1]); |
595 | 0 | m_partCopy(ctu.m_cbf[2] + m_absIdxInCTU, m_cbf[2]); |
596 | 0 | m_partCopy(ctu.m_chromaIntraDir + m_absIdxInCTU, m_chromaIntraDir); |
597 | |
|
598 | 0 | tmpY >>= m_hChromaShift + m_vChromaShift; |
599 | 0 | tmpY2 >>= m_hChromaShift + m_vChromaShift; |
600 | 0 | memcpy(ctu.m_trCoeff[1] + tmpY2, m_trCoeff[1], sizeof(coeff_t) * tmpY); |
601 | 0 | memcpy(ctu.m_trCoeff[2] + tmpY2, m_trCoeff[2], sizeof(coeff_t) * tmpY); |
602 | 0 | } |
603 | 0 | } |
604 | | |
605 | | const CUData* CUData::getPULeft(uint32_t& lPartUnitIdx, uint32_t curPartUnitIdx) const |
606 | 26.8M | { |
607 | 26.8M | uint32_t absPartIdx = g_zscanToRaster[curPartUnitIdx]; |
608 | | |
609 | 26.8M | if (!isZeroCol(absPartIdx)) |
610 | 21.8M | { |
611 | 21.8M | uint32_t absZorderCUIdx = g_zscanToRaster[m_absIdxInCTU]; |
612 | 21.8M | lPartUnitIdx = g_rasterToZscan[absPartIdx - 1]; |
613 | 21.8M | if (isEqualCol(absPartIdx, absZorderCUIdx)) |
614 | 15.8M | return m_encData->getPicCTU(m_cuAddr); |
615 | 6.04M | else |
616 | 6.04M | { |
617 | 6.04M | lPartUnitIdx -= m_absIdxInCTU; |
618 | 6.04M | return this; |
619 | 6.04M | } |
620 | 21.8M | } |
621 | | |
622 | 4.98M | lPartUnitIdx = g_rasterToZscan[absPartIdx + s_numPartInCUSize - 1]; |
623 | 4.98M | return m_cuLeft; |
624 | 26.8M | } |
625 | | |
626 | | const CUData* CUData::getPUAbove(uint32_t& aPartUnitIdx, uint32_t curPartUnitIdx) const |
627 | 25.7M | { |
628 | 25.7M | uint32_t absPartIdx = g_zscanToRaster[curPartUnitIdx]; |
629 | | |
630 | 25.7M | if (!isZeroRow(absPartIdx)) |
631 | 21.7M | { |
632 | 21.7M | uint32_t absZorderCUIdx = g_zscanToRaster[m_absIdxInCTU]; |
633 | 21.7M | aPartUnitIdx = g_rasterToZscan[absPartIdx - RASTER_SIZE]; |
634 | 21.7M | if (isEqualRow(absPartIdx, absZorderCUIdx)) |
635 | 15.7M | return m_encData->getPicCTU(m_cuAddr); |
636 | 5.99M | else |
637 | 5.99M | aPartUnitIdx -= m_absIdxInCTU; |
638 | 5.99M | return this; |
639 | 21.7M | } |
640 | | |
641 | 4.03M | aPartUnitIdx = g_rasterToZscan[absPartIdx + ((s_numPartInCUSize - 1) << LOG2_RASTER_SIZE)]; |
642 | 4.03M | return m_cuAbove; |
643 | 25.7M | } |
644 | | |
645 | | const CUData* CUData::getPUAboveLeft(uint32_t& alPartUnitIdx, uint32_t curPartUnitIdx) const |
646 | 10.8M | { |
647 | 10.8M | uint32_t absPartIdx = g_zscanToRaster[curPartUnitIdx]; |
648 | | |
649 | 10.8M | if (!isZeroCol(absPartIdx)) |
650 | 9.27M | { |
651 | 9.27M | if (!isZeroRow(absPartIdx)) |
652 | 8.05M | { |
653 | 8.05M | uint32_t absZorderCUIdx = g_zscanToRaster[m_absIdxInCTU]; |
654 | 8.05M | alPartUnitIdx = g_rasterToZscan[absPartIdx - RASTER_SIZE - 1]; |
655 | 8.05M | if (isEqualRowOrCol(absPartIdx, absZorderCUIdx)) |
656 | 6.62M | return m_encData->getPicCTU(m_cuAddr); |
657 | 1.42M | else |
658 | 1.42M | { |
659 | 1.42M | alPartUnitIdx -= m_absIdxInCTU; |
660 | 1.42M | return this; |
661 | 1.42M | } |
662 | 8.05M | } |
663 | 1.21M | alPartUnitIdx = g_rasterToZscan[absPartIdx + ((s_numPartInCUSize - 1) << LOG2_RASTER_SIZE) - 1]; |
664 | 1.21M | return m_cuAbove; |
665 | 9.27M | } |
666 | | |
667 | 1.60M | if (!isZeroRow(absPartIdx)) |
668 | 1.22M | { |
669 | 1.22M | alPartUnitIdx = g_rasterToZscan[absPartIdx - RASTER_SIZE + s_numPartInCUSize - 1]; |
670 | 1.22M | return m_cuLeft; |
671 | 1.22M | } |
672 | | |
673 | 382k | alPartUnitIdx = m_encData->m_param->num4x4Partitions - 1; |
674 | 382k | return m_cuAboveLeft; |
675 | 1.60M | } |
676 | | |
677 | | const CUData* CUData::getPUAboveRight(uint32_t& arPartUnitIdx, uint32_t curPartUnitIdx) const |
678 | 0 | { |
679 | 0 | if ((m_encData->getPicCTU(m_cuAddr)->m_cuPelX + g_zscanToPelX[curPartUnitIdx] + UNIT_SIZE) >= m_slice->m_sps->picWidthInLumaSamples) |
680 | 0 | return NULL; |
681 | | |
682 | 0 | uint32_t absPartIdxRT = g_zscanToRaster[curPartUnitIdx]; |
683 | |
|
684 | 0 | if (lessThanCol(absPartIdxRT, s_numPartInCUSize - 1)) |
685 | 0 | { |
686 | 0 | if (!isZeroRow(absPartIdxRT)) |
687 | 0 | { |
688 | 0 | if (curPartUnitIdx > g_rasterToZscan[absPartIdxRT - RASTER_SIZE + 1]) |
689 | 0 | { |
690 | 0 | uint32_t absZorderCUIdx = g_zscanToRaster[m_absIdxInCTU] + (1 << (m_log2CUSize[0] - LOG2_UNIT_SIZE)) - 1; |
691 | 0 | arPartUnitIdx = g_rasterToZscan[absPartIdxRT - RASTER_SIZE + 1]; |
692 | 0 | if (isEqualRowOrCol(absPartIdxRT, absZorderCUIdx)) |
693 | 0 | return m_encData->getPicCTU(m_cuAddr); |
694 | 0 | else |
695 | 0 | { |
696 | 0 | arPartUnitIdx -= m_absIdxInCTU; |
697 | 0 | return this; |
698 | 0 | } |
699 | 0 | } |
700 | 0 | return NULL; |
701 | 0 | } |
702 | 0 | arPartUnitIdx = g_rasterToZscan[absPartIdxRT + ((s_numPartInCUSize - 1) << LOG2_RASTER_SIZE) + 1]; |
703 | 0 | return m_cuAbove; |
704 | 0 | } |
705 | | |
706 | 0 | if (!isZeroRow(absPartIdxRT)) |
707 | 0 | return NULL; |
708 | | |
709 | 0 | arPartUnitIdx = g_rasterToZscan[(s_numPartInCUSize - 1) << LOG2_RASTER_SIZE]; |
710 | 0 | return m_cuAboveRight; |
711 | 0 | } |
712 | | |
713 | | const CUData* CUData::getPUBelowLeft(uint32_t& blPartUnitIdx, uint32_t curPartUnitIdx) const |
714 | 0 | { |
715 | 0 | if ((m_encData->getPicCTU(m_cuAddr)->m_cuPelY + g_zscanToPelY[curPartUnitIdx] + UNIT_SIZE) >= m_slice->m_sps->picHeightInLumaSamples) |
716 | 0 | return NULL; |
717 | | |
718 | 0 | uint32_t absPartIdxLB = g_zscanToRaster[curPartUnitIdx]; |
719 | |
|
720 | 0 | if (lessThanRow(absPartIdxLB, s_numPartInCUSize - 1)) |
721 | 0 | { |
722 | 0 | if (!isZeroCol(absPartIdxLB)) |
723 | 0 | { |
724 | 0 | if (curPartUnitIdx > g_rasterToZscan[absPartIdxLB + RASTER_SIZE - 1]) |
725 | 0 | { |
726 | 0 | uint32_t absZorderCUIdxLB = g_zscanToRaster[m_absIdxInCTU] + (((1 << (m_log2CUSize[0] - LOG2_UNIT_SIZE)) - 1) << LOG2_RASTER_SIZE); |
727 | 0 | blPartUnitIdx = g_rasterToZscan[absPartIdxLB + RASTER_SIZE - 1]; |
728 | 0 | if (isEqualRowOrCol(absPartIdxLB, absZorderCUIdxLB)) |
729 | 0 | return m_encData->getPicCTU(m_cuAddr); |
730 | 0 | else |
731 | 0 | { |
732 | 0 | blPartUnitIdx -= m_absIdxInCTU; |
733 | 0 | return this; |
734 | 0 | } |
735 | 0 | } |
736 | 0 | return NULL; |
737 | 0 | } |
738 | 0 | blPartUnitIdx = g_rasterToZscan[absPartIdxLB + RASTER_SIZE + s_numPartInCUSize - 1]; |
739 | 0 | return m_cuLeft; |
740 | 0 | } |
741 | | |
742 | 0 | return NULL; |
743 | 0 | } |
744 | | |
745 | | const CUData* CUData::getPUBelowLeftAdi(uint32_t& blPartUnitIdx, uint32_t curPartUnitIdx, uint32_t partUnitOffset) const |
746 | 18.6M | { |
747 | 18.6M | if ((m_encData->getPicCTU(m_cuAddr)->m_cuPelY + g_zscanToPelY[curPartUnitIdx] + (partUnitOffset << LOG2_UNIT_SIZE)) >= m_slice->m_sps->picHeightInLumaSamples) |
748 | 882k | return NULL; |
749 | | |
750 | 17.7M | uint32_t absPartIdxLB = g_zscanToRaster[curPartUnitIdx]; |
751 | | |
752 | 17.7M | if (lessThanRow(absPartIdxLB, s_numPartInCUSize - partUnitOffset)) |
753 | 15.0M | { |
754 | 15.0M | if (!isZeroCol(absPartIdxLB)) |
755 | 12.4M | { |
756 | 12.4M | if (curPartUnitIdx > g_rasterToZscan[absPartIdxLB + (partUnitOffset << LOG2_RASTER_SIZE) - 1]) |
757 | 3.45M | { |
758 | 3.45M | uint32_t absZorderCUIdxLB = g_zscanToRaster[m_absIdxInCTU] + (((1 << (m_log2CUSize[0] - LOG2_UNIT_SIZE)) - 1) << LOG2_RASTER_SIZE); |
759 | 3.45M | blPartUnitIdx = g_rasterToZscan[absPartIdxLB + (partUnitOffset << LOG2_RASTER_SIZE) - 1]; |
760 | 3.45M | if (isEqualRowOrCol(absPartIdxLB, absZorderCUIdxLB)) |
761 | 3.45M | return m_encData->getPicCTU(m_cuAddr); |
762 | 18.4E | else |
763 | 18.4E | { |
764 | 18.4E | blPartUnitIdx -= m_absIdxInCTU; |
765 | 18.4E | return this; |
766 | 18.4E | } |
767 | 3.45M | } |
768 | 9.04M | return NULL; |
769 | 12.4M | } |
770 | 2.51M | blPartUnitIdx = g_rasterToZscan[absPartIdxLB + (partUnitOffset << LOG2_RASTER_SIZE) + s_numPartInCUSize - 1]; |
771 | 2.51M | return m_cuLeft; |
772 | 15.0M | } |
773 | | |
774 | 2.72M | return NULL; |
775 | 17.7M | } |
776 | | |
777 | | const CUData* CUData::getPUAboveRightAdi(uint32_t& arPartUnitIdx, uint32_t curPartUnitIdx, uint32_t partUnitOffset) const |
778 | 18.6M | { |
779 | 18.6M | if ((m_encData->getPicCTU(m_cuAddr)->m_cuPelX + g_zscanToPelX[curPartUnitIdx] + (partUnitOffset << LOG2_UNIT_SIZE)) >= m_slice->m_sps->picWidthInLumaSamples) |
780 | 837k | return NULL; |
781 | | |
782 | 17.8M | uint32_t absPartIdxRT = g_zscanToRaster[curPartUnitIdx]; |
783 | | |
784 | 17.8M | if (lessThanCol(absPartIdxRT, s_numPartInCUSize - partUnitOffset)) |
785 | 15.0M | { |
786 | 15.0M | if (!isZeroRow(absPartIdxRT)) |
787 | 12.5M | { |
788 | 12.5M | if (curPartUnitIdx > g_rasterToZscan[absPartIdxRT - RASTER_SIZE + partUnitOffset]) |
789 | 9.11M | { |
790 | 9.11M | uint32_t absZorderCUIdx = g_zscanToRaster[m_absIdxInCTU] + (1 << (m_log2CUSize[0] - LOG2_UNIT_SIZE)) - 1; |
791 | 9.11M | arPartUnitIdx = g_rasterToZscan[absPartIdxRT - RASTER_SIZE + partUnitOffset]; |
792 | 9.11M | if (isEqualRowOrCol(absPartIdxRT, absZorderCUIdx)) |
793 | 7.55M | return m_encData->getPicCTU(m_cuAddr); |
794 | 1.56M | else |
795 | 1.56M | { |
796 | 1.56M | arPartUnitIdx -= m_absIdxInCTU; |
797 | 1.56M | return this; |
798 | 1.56M | } |
799 | 9.11M | } |
800 | 3.40M | return NULL; |
801 | 12.5M | } |
802 | 2.55M | arPartUnitIdx = g_rasterToZscan[absPartIdxRT + ((s_numPartInCUSize - 1) << LOG2_RASTER_SIZE) + partUnitOffset]; |
803 | 2.55M | return m_cuAbove; |
804 | 15.0M | } |
805 | | |
806 | 2.73M | if (!isZeroRow(absPartIdxRT)) |
807 | 1.86M | return NULL; |
808 | | |
809 | 872k | arPartUnitIdx = g_rasterToZscan[((s_numPartInCUSize - 1) << LOG2_RASTER_SIZE) + partUnitOffset - 1]; |
810 | 872k | return m_cuAboveRight; |
811 | 2.73M | } |
812 | | |
813 | | /* Get left QpMinCu */ |
814 | | const CUData* CUData::getQpMinCuLeft(uint32_t& lPartUnitIdx, uint32_t curAbsIdxInCTU) const |
815 | 101k | { |
816 | 101k | uint32_t absZorderQpMinCUIdx = curAbsIdxInCTU & (0xFF << (m_encData->m_param->unitSizeDepth - m_slice->m_pps->maxCuDQPDepth) * 2); |
817 | 101k | uint32_t absRorderQpMinCUIdx = g_zscanToRaster[absZorderQpMinCUIdx]; |
818 | | |
819 | | // check for left CTU boundary |
820 | 101k | if (isZeroCol(absRorderQpMinCUIdx)) |
821 | 68.4k | return NULL; |
822 | | |
823 | | // get index of left-CU relative to top-left corner of current quantization group |
824 | 32.7k | lPartUnitIdx = g_rasterToZscan[absRorderQpMinCUIdx - 1]; |
825 | | |
826 | | // return pointer to current CTU |
827 | 32.7k | return m_encData->getPicCTU(m_cuAddr); |
828 | 101k | } |
829 | | |
830 | | /* Get above QpMinCu */ |
831 | | const CUData* CUData::getQpMinCuAbove(uint32_t& aPartUnitIdx, uint32_t curAbsIdxInCTU) const |
832 | 101k | { |
833 | 101k | uint32_t absZorderQpMinCUIdx = curAbsIdxInCTU & (0xFF << (m_encData->m_param->unitSizeDepth - m_slice->m_pps->maxCuDQPDepth) * 2); |
834 | 101k | uint32_t absRorderQpMinCUIdx = g_zscanToRaster[absZorderQpMinCUIdx]; |
835 | | |
836 | | // check for top CTU boundary |
837 | 101k | if (isZeroRow(absRorderQpMinCUIdx)) |
838 | 69.5k | return NULL; |
839 | | |
840 | | // get index of top-CU relative to top-left corner of current quantization group |
841 | 31.5k | aPartUnitIdx = g_rasterToZscan[absRorderQpMinCUIdx - RASTER_SIZE]; |
842 | | |
843 | | // return pointer to current CTU |
844 | 31.5k | return m_encData->getPicCTU(m_cuAddr); |
845 | 101k | } |
846 | | |
847 | | /* Get reference QP from left QpMinCu or latest coded QP */ |
848 | | int8_t CUData::getRefQP(uint32_t curAbsIdxInCTU) const |
849 | 101k | { |
850 | 101k | uint32_t lPartIdx = 0, aPartIdx = 0; |
851 | 101k | const CUData* cULeft = getQpMinCuLeft(lPartIdx, m_absIdxInCTU + curAbsIdxInCTU); |
852 | 101k | const CUData* cUAbove = getQpMinCuAbove(aPartIdx, m_absIdxInCTU + curAbsIdxInCTU); |
853 | | |
854 | 101k | return ((cULeft ? cULeft->m_qp[lPartIdx] : getLastCodedQP(curAbsIdxInCTU)) + (cUAbove ? cUAbove->m_qp[aPartIdx] : getLastCodedQP(curAbsIdxInCTU)) + 1) >> 1; |
855 | 101k | } |
856 | | |
857 | | int CUData::getLastValidPartIdx(int absPartIdx) const |
858 | 228k | { |
859 | 228k | int lastValidPartIdx = absPartIdx - 1; |
860 | | |
861 | 288k | while (lastValidPartIdx >= 0 && m_predMode[lastValidPartIdx] == MODE_NONE) |
862 | 60.1k | { |
863 | 60.1k | uint32_t depth = m_cuDepth[lastValidPartIdx]; |
864 | 60.1k | lastValidPartIdx -= m_numPartitions >> (depth << 1); |
865 | 60.1k | } |
866 | | |
867 | 228k | return lastValidPartIdx; |
868 | 228k | } |
869 | | |
870 | | int8_t CUData::getLastCodedQP(uint32_t absPartIdx) const |
871 | 228k | { |
872 | 228k | uint32_t quPartIdxMask = 0xFF << (m_encData->m_param->unitSizeDepth - m_slice->m_pps->maxCuDQPDepth) * 2; |
873 | 228k | int lastValidPartIdx = getLastValidPartIdx(absPartIdx & quPartIdxMask); |
874 | | |
875 | 228k | if (lastValidPartIdx >= 0) |
876 | 110k | return m_qp[lastValidPartIdx]; |
877 | 117k | else |
878 | 117k | { |
879 | 117k | if (m_absIdxInCTU) |
880 | 13.3k | return m_encData->getPicCTU(m_cuAddr)->getLastCodedQP(m_absIdxInCTU); |
881 | 103k | else if (m_cuAddr > 0 && !(m_slice->m_pps->bEntropyCodingSyncEnabled && !(m_cuAddr % m_slice->m_sps->numCuInWidth))) |
882 | 76.7k | return m_encData->getPicCTU(m_cuAddr - 1)->getLastCodedQP(m_encData->m_param->num4x4Partitions); |
883 | 27.0k | else |
884 | 27.0k | return (int8_t)m_slice->m_sliceQp; |
885 | 117k | } |
886 | 228k | } |
887 | | |
888 | | /* Get allowed chroma intra modes */ |
889 | | void CUData::getAllowedChromaDir(uint32_t absPartIdx, uint32_t* modeList) const |
890 | 1.63M | { |
891 | 1.63M | modeList[0] = PLANAR_IDX; |
892 | 1.63M | modeList[1] = VER_IDX; |
893 | 1.63M | modeList[2] = HOR_IDX; |
894 | 1.63M | modeList[3] = DC_IDX; |
895 | 1.63M | modeList[4] = DM_CHROMA_IDX; |
896 | | |
897 | 1.63M | uint32_t lumaMode = m_lumaIntraDir[absPartIdx]; |
898 | | |
899 | 2.72M | for (int i = 0; i < NUM_CHROMA_MODE - 1; i++) |
900 | 2.72M | { |
901 | 2.72M | if (lumaMode == modeList[i]) |
902 | 1.63M | { |
903 | 1.63M | modeList[i] = 34; // VER+8 mode |
904 | 1.63M | break; |
905 | 1.63M | } |
906 | 2.72M | } |
907 | 1.63M | } |
908 | | |
909 | | /* Get most probable intra modes */ |
910 | | int CUData::getIntraDirLumaPredictor(uint32_t absPartIdx, uint32_t* intraDirPred) const |
911 | 7.65M | { |
912 | 7.65M | const CUData* tempCU; |
913 | 7.65M | uint32_t tempPartIdx; |
914 | 7.65M | uint32_t leftIntraDir, aboveIntraDir; |
915 | | |
916 | | // Get intra direction of left PU |
917 | 7.65M | tempCU = getPULeft(tempPartIdx, m_absIdxInCTU + absPartIdx); |
918 | | |
919 | 7.65M | leftIntraDir = (tempCU && tempCU->isIntra(tempPartIdx)) ? tempCU->m_lumaIntraDir[tempPartIdx] : DC_IDX; |
920 | | |
921 | | // Get intra direction of above PU |
922 | 7.65M | tempCU = g_zscanToPelY[m_absIdxInCTU + absPartIdx] > 0 ? getPUAbove(tempPartIdx, m_absIdxInCTU + absPartIdx) : NULL; |
923 | | |
924 | 7.65M | aboveIntraDir = (tempCU && tempCU->isIntra(tempPartIdx)) ? tempCU->m_lumaIntraDir[tempPartIdx] : DC_IDX; |
925 | | |
926 | 7.65M | if (leftIntraDir == aboveIntraDir) |
927 | 4.38M | { |
928 | 4.38M | if (leftIntraDir >= 2) // angular modes |
929 | 80.1k | { |
930 | 80.1k | intraDirPred[0] = leftIntraDir; |
931 | 80.1k | intraDirPred[1] = ((leftIntraDir - 2 + 31) & 31) + 2; |
932 | 80.1k | intraDirPred[2] = ((leftIntraDir - 2 + 1) & 31) + 2; |
933 | 80.1k | } |
934 | 4.30M | else //non-angular |
935 | 4.30M | { |
936 | 4.30M | intraDirPred[0] = PLANAR_IDX; |
937 | 4.30M | intraDirPred[1] = DC_IDX; |
938 | 4.30M | intraDirPred[2] = VER_IDX; |
939 | 4.30M | } |
940 | 4.38M | return 1; |
941 | 4.38M | } |
942 | 3.26M | else |
943 | 3.26M | { |
944 | 3.26M | intraDirPred[0] = leftIntraDir; |
945 | 3.26M | intraDirPred[1] = aboveIntraDir; |
946 | | |
947 | 3.26M | if (leftIntraDir && aboveIntraDir) //both modes are non-planar |
948 | 50.4k | intraDirPred[2] = PLANAR_IDX; |
949 | 3.21M | else |
950 | 18.4E | intraDirPred[2] = (leftIntraDir + aboveIntraDir) < 2 ? VER_IDX : DC_IDX; |
951 | 3.26M | return 2; |
952 | 3.26M | } |
953 | 7.65M | } |
954 | | |
955 | | uint32_t CUData::getCtxSplitFlag(uint32_t absPartIdx, uint32_t depth) const |
956 | 269k | { |
957 | 269k | const CUData* tempCU; |
958 | 269k | uint32_t tempPartIdx; |
959 | 269k | uint32_t ctx; |
960 | | |
961 | | // Get left split flag |
962 | 269k | tempCU = getPULeft(tempPartIdx, m_absIdxInCTU + absPartIdx); |
963 | 269k | ctx = (tempCU) ? ((tempCU->m_cuDepth[tempPartIdx] > depth) ? 1 : 0) : 0; |
964 | | |
965 | | // Get above split flag |
966 | 269k | tempCU = getPUAbove(tempPartIdx, m_absIdxInCTU + absPartIdx); |
967 | 269k | ctx += (tempCU) ? ((tempCU->m_cuDepth[tempPartIdx] > depth) ? 1 : 0) : 0; |
968 | | |
969 | 269k | return ctx; |
970 | 269k | } |
971 | | |
972 | | void CUData::getIntraTUQtDepthRange(uint32_t tuDepthRange[2], uint32_t absPartIdx) const |
973 | 861k | { |
974 | 861k | uint32_t log2CUSize = m_log2CUSize[absPartIdx]; |
975 | 861k | uint32_t splitFlag = m_partSize[absPartIdx] != SIZE_2Nx2N; |
976 | | |
977 | 861k | tuDepthRange[0] = m_slice->m_sps->quadtreeTULog2MinSize; |
978 | 861k | tuDepthRange[1] = m_slice->m_sps->quadtreeTULog2MaxSize; |
979 | | |
980 | 861k | tuDepthRange[0] = x265_clip3(tuDepthRange[0], tuDepthRange[1], log2CUSize - (m_slice->m_sps->quadtreeTUMaxDepthIntra - 1 + splitFlag)); |
981 | 861k | } |
982 | | |
983 | | void CUData::getInterTUQtDepthRange(uint32_t tuDepthRange[2], uint32_t absPartIdx) const |
984 | 0 | { |
985 | 0 | uint32_t log2CUSize = m_log2CUSize[absPartIdx]; |
986 | 0 | uint32_t quadtreeTUMaxDepth = m_slice->m_sps->quadtreeTUMaxDepthInter; |
987 | 0 | uint32_t splitFlag = quadtreeTUMaxDepth == 1 && m_partSize[absPartIdx] != SIZE_2Nx2N; |
988 | |
|
989 | 0 | tuDepthRange[0] = m_slice->m_sps->quadtreeTULog2MinSize; |
990 | 0 | tuDepthRange[1] = m_slice->m_sps->quadtreeTULog2MaxSize; |
991 | |
|
992 | 0 | tuDepthRange[0] = x265_clip3(tuDepthRange[0], tuDepthRange[1], log2CUSize - (quadtreeTUMaxDepth - 1 + splitFlag)); |
993 | 0 | } |
994 | | |
995 | | uint32_t CUData::getCtxSkipFlag(uint32_t absPartIdx) const |
996 | 0 | { |
997 | 0 | const CUData* tempCU; |
998 | 0 | uint32_t tempPartIdx; |
999 | 0 | uint32_t ctx; |
1000 | | |
1001 | | // Get BCBP of left PU |
1002 | 0 | tempCU = getPULeft(tempPartIdx, m_absIdxInCTU + absPartIdx); |
1003 | 0 | ctx = tempCU ? tempCU->isSkipped(tempPartIdx) : 0; |
1004 | | |
1005 | | // Get BCBP of above PU |
1006 | 0 | tempCU = getPUAbove(tempPartIdx, m_absIdxInCTU + absPartIdx); |
1007 | 0 | ctx += tempCU ? tempCU->isSkipped(tempPartIdx) : 0; |
1008 | |
|
1009 | 0 | return ctx; |
1010 | 0 | } |
1011 | | |
1012 | | bool CUData::setQPSubCUs(int8_t qp, uint32_t absPartIdx, uint32_t depth) |
1013 | 1.00k | { |
1014 | 1.00k | uint32_t curPartNumb = m_encData->m_param->num4x4Partitions >> (depth << 1); |
1015 | 1.00k | uint32_t curPartNumQ = curPartNumb >> 2; |
1016 | | |
1017 | 1.00k | if (m_cuDepth[absPartIdx] > depth) |
1018 | 503 | { |
1019 | 503 | for (uint32_t subPartIdx = 0; subPartIdx < 4; subPartIdx++) |
1020 | 503 | if (setQPSubCUs(qp, absPartIdx + subPartIdx * curPartNumQ, depth + 1)) |
1021 | 503 | return true; |
1022 | 503 | } |
1023 | 503 | else |
1024 | 503 | { |
1025 | 503 | if (getQtRootCbf(absPartIdx)) |
1026 | 503 | return true; |
1027 | 0 | else |
1028 | 0 | setQPSubParts(qp, absPartIdx, depth); |
1029 | 503 | } |
1030 | | |
1031 | 0 | return false; |
1032 | 1.00k | } |
1033 | | |
1034 | | void CUData::setPUInterDir(uint8_t dir, uint32_t absPartIdx, uint32_t puIdx) |
1035 | 0 | { |
1036 | 0 | uint32_t curPartNumQ = m_numPartitions >> 2; |
1037 | 0 | X265_CHECK(puIdx < 2, "unexpected part unit index\n"); |
1038 | |
|
1039 | 0 | switch (m_partSize[absPartIdx]) |
1040 | 0 | { |
1041 | 0 | case SIZE_2Nx2N: |
1042 | 0 | memset(m_interDir + absPartIdx, dir, 4 * curPartNumQ); |
1043 | 0 | break; |
1044 | 0 | case SIZE_2NxN: |
1045 | 0 | memset(m_interDir + absPartIdx, dir, 2 * curPartNumQ); |
1046 | 0 | break; |
1047 | 0 | case SIZE_Nx2N: |
1048 | 0 | memset(m_interDir + absPartIdx, dir, curPartNumQ); |
1049 | 0 | memset(m_interDir + absPartIdx + 2 * curPartNumQ, dir, curPartNumQ); |
1050 | 0 | break; |
1051 | 0 | case SIZE_NxN: |
1052 | 0 | memset(m_interDir + absPartIdx, dir, curPartNumQ); |
1053 | 0 | break; |
1054 | 0 | case SIZE_2NxnU: |
1055 | 0 | if (!puIdx) |
1056 | 0 | { |
1057 | 0 | memset(m_interDir + absPartIdx, dir, (curPartNumQ >> 1)); |
1058 | 0 | memset(m_interDir + absPartIdx + curPartNumQ, dir, (curPartNumQ >> 1)); |
1059 | 0 | } |
1060 | 0 | else |
1061 | 0 | { |
1062 | 0 | memset(m_interDir + absPartIdx, dir, (curPartNumQ >> 1)); |
1063 | 0 | memset(m_interDir + absPartIdx + curPartNumQ, dir, ((curPartNumQ >> 1) + (curPartNumQ << 1))); |
1064 | 0 | } |
1065 | 0 | break; |
1066 | 0 | case SIZE_2NxnD: |
1067 | 0 | if (!puIdx) |
1068 | 0 | { |
1069 | 0 | memset(m_interDir + absPartIdx, dir, ((curPartNumQ << 1) + (curPartNumQ >> 1))); |
1070 | 0 | memset(m_interDir + absPartIdx + (curPartNumQ << 1) + curPartNumQ, dir, (curPartNumQ >> 1)); |
1071 | 0 | } |
1072 | 0 | else |
1073 | 0 | { |
1074 | 0 | memset(m_interDir + absPartIdx, dir, (curPartNumQ >> 1)); |
1075 | 0 | memset(m_interDir + absPartIdx + curPartNumQ, dir, (curPartNumQ >> 1)); |
1076 | 0 | } |
1077 | 0 | break; |
1078 | 0 | case SIZE_nLx2N: |
1079 | 0 | if (!puIdx) |
1080 | 0 | { |
1081 | 0 | memset(m_interDir + absPartIdx, dir, (curPartNumQ >> 2)); |
1082 | 0 | memset(m_interDir + absPartIdx + (curPartNumQ >> 1), dir, (curPartNumQ >> 2)); |
1083 | 0 | memset(m_interDir + absPartIdx + (curPartNumQ << 1), dir, (curPartNumQ >> 2)); |
1084 | 0 | memset(m_interDir + absPartIdx + (curPartNumQ << 1) + (curPartNumQ >> 1), dir, (curPartNumQ >> 2)); |
1085 | 0 | } |
1086 | 0 | else |
1087 | 0 | { |
1088 | 0 | memset(m_interDir + absPartIdx, dir, (curPartNumQ >> 2)); |
1089 | 0 | memset(m_interDir + absPartIdx + (curPartNumQ >> 1), dir, (curPartNumQ + (curPartNumQ >> 2))); |
1090 | 0 | memset(m_interDir + absPartIdx + (curPartNumQ << 1), dir, (curPartNumQ >> 2)); |
1091 | 0 | memset(m_interDir + absPartIdx + (curPartNumQ << 1) + (curPartNumQ >> 1), dir, (curPartNumQ + (curPartNumQ >> 2))); |
1092 | 0 | } |
1093 | 0 | break; |
1094 | 0 | case SIZE_nRx2N: |
1095 | 0 | if (!puIdx) |
1096 | 0 | { |
1097 | 0 | memset(m_interDir + absPartIdx, dir, (curPartNumQ + (curPartNumQ >> 2))); |
1098 | 0 | memset(m_interDir + absPartIdx + curPartNumQ + (curPartNumQ >> 1), dir, (curPartNumQ >> 2)); |
1099 | 0 | memset(m_interDir + absPartIdx + (curPartNumQ << 1), dir, (curPartNumQ + (curPartNumQ >> 2))); |
1100 | 0 | memset(m_interDir + absPartIdx + (curPartNumQ << 1) + curPartNumQ + (curPartNumQ >> 1), dir, (curPartNumQ >> 2)); |
1101 | 0 | } |
1102 | 0 | else |
1103 | 0 | { |
1104 | 0 | memset(m_interDir + absPartIdx, dir, (curPartNumQ >> 2)); |
1105 | 0 | memset(m_interDir + absPartIdx + (curPartNumQ >> 1), dir, (curPartNumQ >> 2)); |
1106 | 0 | memset(m_interDir + absPartIdx + (curPartNumQ << 1), dir, (curPartNumQ >> 2)); |
1107 | 0 | memset(m_interDir + absPartIdx + (curPartNumQ << 1) + (curPartNumQ >> 1), dir, (curPartNumQ >> 2)); |
1108 | 0 | } |
1109 | 0 | break; |
1110 | 0 | default: |
1111 | 0 | X265_CHECK(0, "unexpected part type\n"); |
1112 | 0 | break; |
1113 | 0 | } |
1114 | 0 | } |
1115 | | |
1116 | | template<typename T> |
1117 | | void CUData::setAllPU(T* p, const T& val, int absPartIdx, int puIdx) |
1118 | 0 | { |
1119 | 0 | int i; |
1120 | |
|
1121 | 0 | p += absPartIdx; |
1122 | 0 | int numElements = m_numPartitions; |
1123 | |
|
1124 | 0 | switch (m_partSize[absPartIdx]) |
1125 | 0 | { |
1126 | 0 | case SIZE_2Nx2N: |
1127 | 0 | for (i = 0; i < numElements; i++) |
1128 | 0 | p[i] = val; |
1129 | 0 | break; |
1130 | | |
1131 | 0 | case SIZE_2NxN: |
1132 | 0 | numElements >>= 1; |
1133 | 0 | for (i = 0; i < numElements; i++) |
1134 | 0 | p[i] = val; |
1135 | 0 | break; |
1136 | | |
1137 | 0 | case SIZE_Nx2N: |
1138 | 0 | numElements >>= 2; |
1139 | 0 | for (i = 0; i < numElements; i++) |
1140 | 0 | { |
1141 | 0 | p[i] = val; |
1142 | 0 | p[i + 2 * numElements] = val; |
1143 | 0 | } |
1144 | 0 | break; |
1145 | | |
1146 | 0 | case SIZE_2NxnU: |
1147 | 0 | { |
1148 | 0 | int curPartNumQ = numElements >> 2; |
1149 | 0 | if (!puIdx) |
1150 | 0 | { |
1151 | 0 | T *pT = p; |
1152 | 0 | T *pT2 = p + curPartNumQ; |
1153 | 0 | for (i = 0; i < (curPartNumQ >> 1); i++) |
1154 | 0 | { |
1155 | 0 | pT[i] = val; |
1156 | 0 | pT2[i] = val; |
1157 | 0 | } |
1158 | 0 | } |
1159 | 0 | else |
1160 | 0 | { |
1161 | 0 | T *pT = p; |
1162 | 0 | for (i = 0; i < (curPartNumQ >> 1); i++) |
1163 | 0 | pT[i] = val; |
1164 | |
|
1165 | 0 | pT = p + curPartNumQ; |
1166 | 0 | for (i = 0; i < ((curPartNumQ >> 1) + (curPartNumQ << 1)); i++) |
1167 | 0 | pT[i] = val; |
1168 | 0 | } |
1169 | 0 | break; |
1170 | 0 | } |
1171 | | |
1172 | 0 | case SIZE_2NxnD: |
1173 | 0 | { |
1174 | 0 | int curPartNumQ = numElements >> 2; |
1175 | 0 | if (!puIdx) |
1176 | 0 | { |
1177 | 0 | T *pT = p; |
1178 | 0 | for (i = 0; i < ((curPartNumQ >> 1) + (curPartNumQ << 1)); i++) |
1179 | 0 | pT[i] = val; |
1180 | |
|
1181 | 0 | pT = p + (numElements - curPartNumQ); |
1182 | 0 | for (i = 0; i < (curPartNumQ >> 1); i++) |
1183 | 0 | pT[i] = val; |
1184 | 0 | } |
1185 | 0 | else |
1186 | 0 | { |
1187 | 0 | T *pT = p; |
1188 | 0 | T *pT2 = p + curPartNumQ; |
1189 | 0 | for (i = 0; i < (curPartNumQ >> 1); i++) |
1190 | 0 | { |
1191 | 0 | pT[i] = val; |
1192 | 0 | pT2[i] = val; |
1193 | 0 | } |
1194 | 0 | } |
1195 | 0 | break; |
1196 | 0 | } |
1197 | | |
1198 | 0 | case SIZE_nLx2N: |
1199 | 0 | { |
1200 | 0 | int curPartNumQ = numElements >> 2; |
1201 | 0 | if (!puIdx) |
1202 | 0 | { |
1203 | 0 | T *pT = p; |
1204 | 0 | T *pT2 = p + (curPartNumQ << 1); |
1205 | 0 | T *pT3 = p + (curPartNumQ >> 1); |
1206 | 0 | T *pT4 = p + (curPartNumQ << 1) + (curPartNumQ >> 1); |
1207 | |
|
1208 | 0 | for (i = 0; i < (curPartNumQ >> 2); i++) |
1209 | 0 | { |
1210 | 0 | pT[i] = val; |
1211 | 0 | pT2[i] = val; |
1212 | 0 | pT3[i] = val; |
1213 | 0 | pT4[i] = val; |
1214 | 0 | } |
1215 | 0 | } |
1216 | 0 | else |
1217 | 0 | { |
1218 | 0 | T *pT = p; |
1219 | 0 | T *pT2 = p + (curPartNumQ << 1); |
1220 | 0 | for (i = 0; i < (curPartNumQ >> 2); i++) |
1221 | 0 | { |
1222 | 0 | pT[i] = val; |
1223 | 0 | pT2[i] = val; |
1224 | 0 | } |
1225 | |
|
1226 | 0 | pT = p + (curPartNumQ >> 1); |
1227 | 0 | pT2 = p + (curPartNumQ << 1) + (curPartNumQ >> 1); |
1228 | 0 | for (i = 0; i < ((curPartNumQ >> 2) + curPartNumQ); i++) |
1229 | 0 | { |
1230 | 0 | pT[i] = val; |
1231 | 0 | pT2[i] = val; |
1232 | 0 | } |
1233 | 0 | } |
1234 | 0 | break; |
1235 | 0 | } |
1236 | | |
1237 | 0 | case SIZE_nRx2N: |
1238 | 0 | { |
1239 | 0 | int curPartNumQ = numElements >> 2; |
1240 | 0 | if (!puIdx) |
1241 | 0 | { |
1242 | 0 | T *pT = p; |
1243 | 0 | T *pT2 = p + (curPartNumQ << 1); |
1244 | 0 | for (i = 0; i < ((curPartNumQ >> 2) + curPartNumQ); i++) |
1245 | 0 | { |
1246 | 0 | pT[i] = val; |
1247 | 0 | pT2[i] = val; |
1248 | 0 | } |
1249 | |
|
1250 | 0 | pT = p + curPartNumQ + (curPartNumQ >> 1); |
1251 | 0 | pT2 = p + numElements - curPartNumQ + (curPartNumQ >> 1); |
1252 | 0 | for (i = 0; i < (curPartNumQ >> 2); i++) |
1253 | 0 | { |
1254 | 0 | pT[i] = val; |
1255 | 0 | pT2[i] = val; |
1256 | 0 | } |
1257 | 0 | } |
1258 | 0 | else |
1259 | 0 | { |
1260 | 0 | T *pT = p; |
1261 | 0 | T *pT2 = p + (curPartNumQ >> 1); |
1262 | 0 | T *pT3 = p + (curPartNumQ << 1); |
1263 | 0 | T *pT4 = p + (curPartNumQ << 1) + (curPartNumQ >> 1); |
1264 | 0 | for (i = 0; i < (curPartNumQ >> 2); i++) |
1265 | 0 | { |
1266 | 0 | pT[i] = val; |
1267 | 0 | pT2[i] = val; |
1268 | 0 | pT3[i] = val; |
1269 | 0 | pT4[i] = val; |
1270 | 0 | } |
1271 | 0 | } |
1272 | 0 | break; |
1273 | 0 | } |
1274 | | |
1275 | 0 | case SIZE_NxN: |
1276 | 0 | default: |
1277 | 0 | X265_CHECK(0, "unknown partition type\n"); |
1278 | 0 | break; |
1279 | 0 | } |
1280 | 0 | } Unexecuted instantiation: void x265::CUData::setAllPU<x265::MV>(x265::MV*, x265::MV const&, int, int) Unexecuted instantiation: void x265::CUData::setAllPU<signed char>(signed char*, signed char const&, int, int) |
1281 | | |
1282 | | void CUData::setPUMv(int list, const MV& mv, int absPartIdx, int puIdx) |
1283 | 0 | { |
1284 | 0 | setAllPU(m_mv[list], mv, absPartIdx, puIdx); |
1285 | 0 | } |
1286 | | |
1287 | | void CUData::setPURefIdx(int list, int8_t refIdx, int absPartIdx, int puIdx) |
1288 | 0 | { |
1289 | 0 | setAllPU(m_refIdx[list], refIdx, absPartIdx, puIdx); |
1290 | 0 | } |
1291 | | |
1292 | | void CUData::getPartIndexAndSize(uint32_t partIdx, uint32_t& outPartAddr, int& outWidth, int& outHeight) const |
1293 | 0 | { |
1294 | 0 | int cuSize = 1 << m_log2CUSize[0]; |
1295 | 0 | int partType = m_partSize[0]; |
1296 | |
|
1297 | 0 | int tmp = partTable[partType][partIdx][0]; |
1298 | 0 | outWidth = ((tmp >> 4) * cuSize) >> 2; |
1299 | 0 | outHeight = ((tmp & 0xF) * cuSize) >> 2; |
1300 | 0 | outPartAddr = (partAddrTable[partType][partIdx] * m_numPartitions) >> 4; |
1301 | 0 | } |
1302 | | |
1303 | | void CUData::getMvField(const CUData* cu, uint32_t absPartIdx, int picList, MVField& outMvField) const |
1304 | 0 | { |
1305 | 0 | if (cu) |
1306 | 0 | { |
1307 | 0 | outMvField.mv = cu->m_mv[picList][absPartIdx]; |
1308 | 0 | outMvField.refIdx = cu->m_refIdx[picList][absPartIdx]; |
1309 | 0 | } |
1310 | 0 | else |
1311 | 0 | { |
1312 | | // OUT OF BOUNDARY |
1313 | 0 | outMvField.mv = 0; |
1314 | 0 | outMvField.refIdx = REF_NOT_VALID; |
1315 | 0 | } |
1316 | 0 | } |
1317 | | |
1318 | | void CUData::deriveLeftRightTopIdx(uint32_t partIdx, uint32_t& partIdxLT, uint32_t& partIdxRT) const |
1319 | 0 | { |
1320 | 0 | partIdxLT = m_absIdxInCTU; |
1321 | 0 | partIdxRT = g_rasterToZscan[g_zscanToRaster[partIdxLT] + (1 << (m_log2CUSize[0] - LOG2_UNIT_SIZE)) - 1]; |
1322 | |
|
1323 | 0 | switch (m_partSize[0]) |
1324 | 0 | { |
1325 | 0 | case SIZE_2Nx2N: break; |
1326 | 0 | case SIZE_2NxN: |
1327 | 0 | partIdxLT += (partIdx == 0) ? 0 : m_numPartitions >> 1; |
1328 | 0 | partIdxRT += (partIdx == 0) ? 0 : m_numPartitions >> 1; |
1329 | 0 | break; |
1330 | 0 | case SIZE_Nx2N: |
1331 | 0 | partIdxLT += (partIdx == 0) ? 0 : m_numPartitions >> 2; |
1332 | 0 | partIdxRT -= (partIdx == 1) ? 0 : m_numPartitions >> 2; |
1333 | 0 | break; |
1334 | 0 | case SIZE_NxN: |
1335 | 0 | partIdxLT += (m_numPartitions >> 2) * partIdx; |
1336 | 0 | partIdxRT += (m_numPartitions >> 2) * (partIdx - 1); |
1337 | 0 | break; |
1338 | 0 | case SIZE_2NxnU: |
1339 | 0 | partIdxLT += (partIdx == 0) ? 0 : m_numPartitions >> 3; |
1340 | 0 | partIdxRT += (partIdx == 0) ? 0 : m_numPartitions >> 3; |
1341 | 0 | break; |
1342 | 0 | case SIZE_2NxnD: |
1343 | 0 | partIdxLT += (partIdx == 0) ? 0 : (m_numPartitions >> 1) + (m_numPartitions >> 3); |
1344 | 0 | partIdxRT += (partIdx == 0) ? 0 : (m_numPartitions >> 1) + (m_numPartitions >> 3); |
1345 | 0 | break; |
1346 | 0 | case SIZE_nLx2N: |
1347 | 0 | partIdxLT += (partIdx == 0) ? 0 : m_numPartitions >> 4; |
1348 | 0 | partIdxRT -= (partIdx == 1) ? 0 : (m_numPartitions >> 2) + (m_numPartitions >> 4); |
1349 | 0 | break; |
1350 | 0 | case SIZE_nRx2N: |
1351 | 0 | partIdxLT += (partIdx == 0) ? 0 : (m_numPartitions >> 2) + (m_numPartitions >> 4); |
1352 | 0 | partIdxRT -= (partIdx == 1) ? 0 : m_numPartitions >> 4; |
1353 | 0 | break; |
1354 | 0 | default: |
1355 | 0 | X265_CHECK(0, "unexpected part index\n"); |
1356 | 0 | break; |
1357 | 0 | } |
1358 | 0 | } |
1359 | | |
1360 | | uint32_t CUData::deriveLeftBottomIdx(uint32_t puIdx) const |
1361 | 0 | { |
1362 | 0 | uint32_t outPartIdxLB; |
1363 | 0 | outPartIdxLB = g_rasterToZscan[g_zscanToRaster[m_absIdxInCTU] + (((1 << (m_log2CUSize[0] - LOG2_UNIT_SIZE - 1)) - 1) << LOG2_RASTER_SIZE)]; |
1364 | |
|
1365 | 0 | switch (m_partSize[0]) |
1366 | 0 | { |
1367 | 0 | case SIZE_2Nx2N: |
1368 | 0 | outPartIdxLB += m_numPartitions >> 1; |
1369 | 0 | break; |
1370 | 0 | case SIZE_2NxN: |
1371 | 0 | outPartIdxLB += puIdx ? m_numPartitions >> 1 : 0; |
1372 | 0 | break; |
1373 | 0 | case SIZE_Nx2N: |
1374 | 0 | outPartIdxLB += puIdx ? (m_numPartitions >> 2) * 3 : m_numPartitions >> 1; |
1375 | 0 | break; |
1376 | 0 | case SIZE_NxN: |
1377 | 0 | outPartIdxLB += (m_numPartitions >> 2) * puIdx; |
1378 | 0 | break; |
1379 | 0 | case SIZE_2NxnU: |
1380 | 0 | outPartIdxLB += puIdx ? m_numPartitions >> 1 : -((int)m_numPartitions >> 3); |
1381 | 0 | break; |
1382 | 0 | case SIZE_2NxnD: |
1383 | 0 | outPartIdxLB += puIdx ? m_numPartitions >> 1 : (m_numPartitions >> 2) + (m_numPartitions >> 3); |
1384 | 0 | break; |
1385 | 0 | case SIZE_nLx2N: |
1386 | 0 | outPartIdxLB += puIdx ? (m_numPartitions >> 1) + (m_numPartitions >> 4) : m_numPartitions >> 1; |
1387 | 0 | break; |
1388 | 0 | case SIZE_nRx2N: |
1389 | 0 | outPartIdxLB += puIdx ? (m_numPartitions >> 1) + (m_numPartitions >> 2) + (m_numPartitions >> 4) : m_numPartitions >> 1; |
1390 | 0 | break; |
1391 | 0 | default: |
1392 | 0 | X265_CHECK(0, "unexpected part index\n"); |
1393 | 0 | break; |
1394 | 0 | } |
1395 | 0 | return outPartIdxLB; |
1396 | 0 | } |
1397 | | |
1398 | | /* Derives the partition index of neighboring bottom right block */ |
1399 | | uint32_t CUData::deriveRightBottomIdx(uint32_t puIdx) const |
1400 | 0 | { |
1401 | 0 | uint32_t outPartIdxRB; |
1402 | 0 | outPartIdxRB = g_rasterToZscan[g_zscanToRaster[m_absIdxInCTU] + |
1403 | 0 | (((1 << (m_log2CUSize[0] - LOG2_UNIT_SIZE - 1)) - 1) << LOG2_RASTER_SIZE) + |
1404 | 0 | (1 << (m_log2CUSize[0] - LOG2_UNIT_SIZE)) - 1]; |
1405 | |
|
1406 | 0 | switch (m_partSize[0]) |
1407 | 0 | { |
1408 | 0 | case SIZE_2Nx2N: |
1409 | 0 | outPartIdxRB += m_numPartitions >> 1; |
1410 | 0 | break; |
1411 | 0 | case SIZE_2NxN: |
1412 | 0 | outPartIdxRB += puIdx ? m_numPartitions >> 1 : 0; |
1413 | 0 | break; |
1414 | 0 | case SIZE_Nx2N: |
1415 | 0 | outPartIdxRB += puIdx ? m_numPartitions >> 1 : m_numPartitions >> 2; |
1416 | 0 | break; |
1417 | 0 | case SIZE_NxN: |
1418 | 0 | outPartIdxRB += (m_numPartitions >> 2) * (puIdx - 1); |
1419 | 0 | break; |
1420 | 0 | case SIZE_2NxnU: |
1421 | 0 | outPartIdxRB += puIdx ? m_numPartitions >> 1 : -((int)m_numPartitions >> 3); |
1422 | 0 | break; |
1423 | 0 | case SIZE_2NxnD: |
1424 | 0 | outPartIdxRB += puIdx ? m_numPartitions >> 1 : (m_numPartitions >> 2) + (m_numPartitions >> 3); |
1425 | 0 | break; |
1426 | 0 | case SIZE_nLx2N: |
1427 | 0 | outPartIdxRB += puIdx ? m_numPartitions >> 1 : (m_numPartitions >> 3) + (m_numPartitions >> 4); |
1428 | 0 | break; |
1429 | 0 | case SIZE_nRx2N: |
1430 | 0 | outPartIdxRB += puIdx ? m_numPartitions >> 1 : (m_numPartitions >> 2) + (m_numPartitions >> 3) + (m_numPartitions >> 4); |
1431 | 0 | break; |
1432 | 0 | default: |
1433 | 0 | X265_CHECK(0, "unexpected part index\n"); |
1434 | 0 | break; |
1435 | 0 | } |
1436 | 0 | return outPartIdxRB; |
1437 | 0 | } |
1438 | | |
1439 | | bool CUData::hasEqualMotion(uint32_t absPartIdx, const CUData& candCU, uint32_t candAbsPartIdx) const |
1440 | 0 | { |
1441 | 0 | if (m_interDir[absPartIdx] != candCU.m_interDir[candAbsPartIdx]) |
1442 | 0 | return false; |
1443 | | |
1444 | 0 | for (uint32_t refListIdx = 0; refListIdx < 2; refListIdx++) |
1445 | 0 | { |
1446 | 0 | if (m_interDir[absPartIdx] & (1 << refListIdx)) |
1447 | 0 | { |
1448 | 0 | if (m_mv[refListIdx][absPartIdx] != candCU.m_mv[refListIdx][candAbsPartIdx] || |
1449 | 0 | m_refIdx[refListIdx][absPartIdx] != candCU.m_refIdx[refListIdx][candAbsPartIdx]) |
1450 | 0 | return false; |
1451 | 0 | } |
1452 | 0 | } |
1453 | | |
1454 | 0 | return true; |
1455 | 0 | } |
1456 | | |
1457 | | /* Construct list of merging candidates, returns count */ |
1458 | | uint32_t CUData::getInterMergeCandidates(uint32_t absPartIdx, uint32_t puIdx, MVField(*candMvField)[2], uint8_t* candDir) const |
1459 | 0 | { |
1460 | 0 | uint32_t absPartAddr = m_absIdxInCTU + absPartIdx; |
1461 | 0 | const bool isInterB = m_slice->isInterB(); |
1462 | |
|
1463 | 0 | const uint32_t maxNumMergeCand = m_slice->m_maxNumMergeCand; |
1464 | |
|
1465 | 0 | for (uint32_t i = 0; i < maxNumMergeCand; ++i) |
1466 | 0 | { |
1467 | 0 | candMvField[i][0].mv = 0; |
1468 | 0 | candMvField[i][1].mv = 0; |
1469 | 0 | candMvField[i][0].refIdx = REF_NOT_VALID; |
1470 | 0 | candMvField[i][1].refIdx = REF_NOT_VALID; |
1471 | 0 | } |
1472 | | |
1473 | | /* calculate the location of upper-left corner pixel and size of the current PU */ |
1474 | 0 | int xP, yP, nPSW, nPSH; |
1475 | |
|
1476 | 0 | int cuSize = 1 << m_log2CUSize[0]; |
1477 | 0 | int partMode = m_partSize[0]; |
1478 | |
|
1479 | 0 | int tmp = partTable[partMode][puIdx][0]; |
1480 | 0 | nPSW = ((tmp >> 4) * cuSize) >> 2; |
1481 | 0 | nPSH = ((tmp & 0xF) * cuSize) >> 2; |
1482 | |
|
1483 | 0 | tmp = partTable[partMode][puIdx][1]; |
1484 | 0 | xP = ((tmp >> 4) * cuSize) >> 2; |
1485 | 0 | yP = ((tmp & 0xF) * cuSize) >> 2; |
1486 | |
|
1487 | 0 | uint32_t count = 0; |
1488 | |
|
1489 | 0 | uint32_t partIdxLT, partIdxRT, partIdxLB = deriveLeftBottomIdx(puIdx); |
1490 | 0 | PartSize curPS = (PartSize)m_partSize[absPartIdx]; |
1491 | | |
1492 | | // left |
1493 | 0 | uint32_t leftPartIdx = 0; |
1494 | 0 | const CUData* cuLeft = getPULeft(leftPartIdx, partIdxLB); |
1495 | 0 | bool isAvailableA1 = cuLeft && |
1496 | 0 | cuLeft->isDiffMER(xP - 1, yP + nPSH - 1, xP, yP) && |
1497 | 0 | !(puIdx == 1 && (curPS == SIZE_Nx2N || curPS == SIZE_nLx2N || curPS == SIZE_nRx2N)) && |
1498 | 0 | cuLeft->isInter(leftPartIdx); |
1499 | 0 | if (isAvailableA1) |
1500 | 0 | { |
1501 | | // get Inter Dir |
1502 | 0 | candDir[count] = cuLeft->m_interDir[leftPartIdx]; |
1503 | | // get Mv from Left |
1504 | 0 | cuLeft->getMvField(cuLeft, leftPartIdx, 0, candMvField[count][0]); |
1505 | 0 | if (isInterB) |
1506 | 0 | cuLeft->getMvField(cuLeft, leftPartIdx, 1, candMvField[count][1]); |
1507 | |
|
1508 | 0 | if (++count == maxNumMergeCand) |
1509 | 0 | return maxNumMergeCand; |
1510 | 0 | } |
1511 | | |
1512 | 0 | deriveLeftRightTopIdx(puIdx, partIdxLT, partIdxRT); |
1513 | | |
1514 | | // above |
1515 | 0 | uint32_t abovePartIdx = 0; |
1516 | 0 | const CUData* cuAbove = getPUAbove(abovePartIdx, partIdxRT); |
1517 | 0 | bool isAvailableB1 = cuAbove && |
1518 | 0 | cuAbove->isDiffMER(xP + nPSW - 1, yP - 1, xP, yP) && |
1519 | 0 | !(puIdx == 1 && (curPS == SIZE_2NxN || curPS == SIZE_2NxnU || curPS == SIZE_2NxnD)) && |
1520 | 0 | cuAbove->isInter(abovePartIdx); |
1521 | 0 | if (isAvailableB1 && (!isAvailableA1 || !cuLeft->hasEqualMotion(leftPartIdx, *cuAbove, abovePartIdx))) |
1522 | 0 | { |
1523 | | // get Inter Dir |
1524 | 0 | candDir[count] = cuAbove->m_interDir[abovePartIdx]; |
1525 | | // get Mv from Left |
1526 | 0 | cuAbove->getMvField(cuAbove, abovePartIdx, 0, candMvField[count][0]); |
1527 | 0 | if (isInterB) |
1528 | 0 | cuAbove->getMvField(cuAbove, abovePartIdx, 1, candMvField[count][1]); |
1529 | |
|
1530 | 0 | if (++count == maxNumMergeCand) |
1531 | 0 | return maxNumMergeCand; |
1532 | 0 | } |
1533 | | |
1534 | | // above right |
1535 | 0 | uint32_t aboveRightPartIdx = 0; |
1536 | 0 | const CUData* cuAboveRight = getPUAboveRight(aboveRightPartIdx, partIdxRT); |
1537 | 0 | bool isAvailableB0 = cuAboveRight && |
1538 | 0 | cuAboveRight->isDiffMER(xP + nPSW, yP - 1, xP, yP) && |
1539 | 0 | cuAboveRight->isInter(aboveRightPartIdx); |
1540 | 0 | if (isAvailableB0 && (!isAvailableB1 || !cuAbove->hasEqualMotion(abovePartIdx, *cuAboveRight, aboveRightPartIdx))) |
1541 | 0 | { |
1542 | | // get Inter Dir |
1543 | 0 | candDir[count] = cuAboveRight->m_interDir[aboveRightPartIdx]; |
1544 | | // get Mv from Left |
1545 | 0 | cuAboveRight->getMvField(cuAboveRight, aboveRightPartIdx, 0, candMvField[count][0]); |
1546 | 0 | if (isInterB) |
1547 | 0 | cuAboveRight->getMvField(cuAboveRight, aboveRightPartIdx, 1, candMvField[count][1]); |
1548 | |
|
1549 | 0 | if (++count == maxNumMergeCand) |
1550 | 0 | return maxNumMergeCand; |
1551 | 0 | } |
1552 | | |
1553 | | // left bottom |
1554 | 0 | uint32_t leftBottomPartIdx = 0; |
1555 | 0 | const CUData* cuLeftBottom = this->getPUBelowLeft(leftBottomPartIdx, partIdxLB); |
1556 | 0 | bool isAvailableA0 = cuLeftBottom && |
1557 | 0 | cuLeftBottom->isDiffMER(xP - 1, yP + nPSH, xP, yP) && |
1558 | 0 | cuLeftBottom->isInter(leftBottomPartIdx); |
1559 | 0 | if (isAvailableA0 && (!isAvailableA1 || !cuLeft->hasEqualMotion(leftPartIdx, *cuLeftBottom, leftBottomPartIdx))) |
1560 | 0 | { |
1561 | | // get Inter Dir |
1562 | 0 | candDir[count] = cuLeftBottom->m_interDir[leftBottomPartIdx]; |
1563 | | // get Mv from Left |
1564 | 0 | cuLeftBottom->getMvField(cuLeftBottom, leftBottomPartIdx, 0, candMvField[count][0]); |
1565 | 0 | if (isInterB) |
1566 | 0 | cuLeftBottom->getMvField(cuLeftBottom, leftBottomPartIdx, 1, candMvField[count][1]); |
1567 | |
|
1568 | 0 | if (++count == maxNumMergeCand) |
1569 | 0 | return maxNumMergeCand; |
1570 | 0 | } |
1571 | | |
1572 | | // above left |
1573 | 0 | if (count < 4) |
1574 | 0 | { |
1575 | 0 | uint32_t aboveLeftPartIdx = 0; |
1576 | 0 | const CUData* cuAboveLeft = getPUAboveLeft(aboveLeftPartIdx, absPartAddr); |
1577 | 0 | bool isAvailableB2 = cuAboveLeft && |
1578 | 0 | cuAboveLeft->isDiffMER(xP - 1, yP - 1, xP, yP) && |
1579 | 0 | cuAboveLeft->isInter(aboveLeftPartIdx); |
1580 | 0 | if (isAvailableB2 && (!isAvailableA1 || !cuLeft->hasEqualMotion(leftPartIdx, *cuAboveLeft, aboveLeftPartIdx)) |
1581 | 0 | && (!isAvailableB1 || !cuAbove->hasEqualMotion(abovePartIdx, *cuAboveLeft, aboveLeftPartIdx))) |
1582 | 0 | { |
1583 | | // get Inter Dir |
1584 | 0 | candDir[count] = cuAboveLeft->m_interDir[aboveLeftPartIdx]; |
1585 | | // get Mv from Left |
1586 | 0 | cuAboveLeft->getMvField(cuAboveLeft, aboveLeftPartIdx, 0, candMvField[count][0]); |
1587 | 0 | if (isInterB) |
1588 | 0 | cuAboveLeft->getMvField(cuAboveLeft, aboveLeftPartIdx, 1, candMvField[count][1]); |
1589 | |
|
1590 | 0 | if (++count == maxNumMergeCand) |
1591 | 0 | return maxNumMergeCand; |
1592 | 0 | } |
1593 | 0 | } |
1594 | 0 | if (m_slice->m_sps->bTemporalMVPEnabled) |
1595 | 0 | { |
1596 | 0 | uint32_t partIdxRB = deriveRightBottomIdx(puIdx); |
1597 | 0 | MV colmv; |
1598 | 0 | int ctuIdx = -1; |
1599 | | |
1600 | | // image boundary check |
1601 | 0 | if (m_encData->getPicCTU(m_cuAddr)->m_cuPelX + g_zscanToPelX[partIdxRB] + UNIT_SIZE < m_slice->m_sps->picWidthInLumaSamples && |
1602 | 0 | m_encData->getPicCTU(m_cuAddr)->m_cuPelY + g_zscanToPelY[partIdxRB] + UNIT_SIZE < m_slice->m_sps->picHeightInLumaSamples) |
1603 | 0 | { |
1604 | 0 | uint32_t absPartIdxRB = g_zscanToRaster[partIdxRB]; |
1605 | 0 | uint32_t numUnits = s_numPartInCUSize; |
1606 | 0 | bool bNotLastCol = lessThanCol(absPartIdxRB, numUnits - 1); // is not at the last column of CTU |
1607 | 0 | bool bNotLastRow = lessThanRow(absPartIdxRB, numUnits - 1); // is not at the last row of CTU |
1608 | |
|
1609 | 0 | if (bNotLastCol && bNotLastRow) |
1610 | 0 | { |
1611 | 0 | absPartAddr = g_rasterToZscan[absPartIdxRB + RASTER_SIZE + 1]; |
1612 | 0 | ctuIdx = m_cuAddr; |
1613 | 0 | } |
1614 | 0 | else if (bNotLastCol) |
1615 | 0 | absPartAddr = g_rasterToZscan[(absPartIdxRB + 1) & (numUnits - 1)]; |
1616 | 0 | else if (bNotLastRow) |
1617 | 0 | { |
1618 | 0 | absPartAddr = g_rasterToZscan[absPartIdxRB + RASTER_SIZE - numUnits + 1]; |
1619 | 0 | ctuIdx = m_cuAddr + 1; |
1620 | 0 | } |
1621 | 0 | else // is the right bottom corner of CTU |
1622 | 0 | absPartAddr = 0; |
1623 | 0 | } |
1624 | |
|
1625 | 0 | int maxList = isInterB ? 2 : 1; |
1626 | 0 | int dir = 0, refIdx = 0; |
1627 | 0 | for (int list = 0; list < maxList; list++) |
1628 | 0 | { |
1629 | 0 | bool bExistMV = ctuIdx >= 0 && getColMVP(colmv, refIdx, list, ctuIdx, absPartAddr); |
1630 | 0 | if (!bExistMV) |
1631 | 0 | { |
1632 | 0 | uint32_t partIdxCenter = deriveCenterIdx(puIdx); |
1633 | 0 | bExistMV = getColMVP(colmv, refIdx, list, m_cuAddr, partIdxCenter); |
1634 | 0 | } |
1635 | 0 | if (bExistMV) |
1636 | 0 | { |
1637 | 0 | dir |= (1 << list); |
1638 | 0 | candMvField[count][list].mv = colmv; |
1639 | 0 | candMvField[count][list].refIdx = refIdx; |
1640 | 0 | } |
1641 | 0 | } |
1642 | |
|
1643 | 0 | if (dir != 0) |
1644 | 0 | { |
1645 | 0 | candDir[count] = (uint8_t)dir; |
1646 | |
|
1647 | 0 | if (++count == maxNumMergeCand) |
1648 | 0 | return maxNumMergeCand; |
1649 | 0 | } |
1650 | 0 | } |
1651 | | |
1652 | 0 | if (isInterB) |
1653 | 0 | { |
1654 | 0 | const uint32_t cutoff = count * (count - 1); |
1655 | 0 | uint32_t priorityList0 = 0xEDC984; // { 0, 1, 0, 2, 1, 2, 0, 3, 1, 3, 2, 3 } |
1656 | 0 | uint32_t priorityList1 = 0xB73621; // { 1, 0, 2, 0, 2, 1, 3, 0, 3, 1, 3, 2 } |
1657 | |
|
1658 | 0 | for (uint32_t idx = 0; idx < cutoff; idx++, priorityList0 >>= 2, priorityList1 >>= 2) |
1659 | 0 | { |
1660 | 0 | int i = priorityList0 & 3; |
1661 | 0 | int j = priorityList1 & 3; |
1662 | |
|
1663 | 0 | if ((candDir[i] & 0x1) && (candDir[j] & 0x2)) |
1664 | 0 | { |
1665 | | // get Mv from cand[i] and cand[j] |
1666 | 0 | int refIdxL0 = candMvField[i][0].refIdx; |
1667 | 0 | int refIdxL1 = candMvField[j][1].refIdx; |
1668 | 0 | int refPOCL0 = m_slice->m_refPOCList[0][refIdxL0]; |
1669 | 0 | int refPOCL1 = m_slice->m_refPOCList[1][refIdxL1]; |
1670 | 0 | if (!(refPOCL0 == refPOCL1 && candMvField[i][0].mv == candMvField[j][1].mv)) |
1671 | 0 | { |
1672 | 0 | candMvField[count][0].mv = candMvField[i][0].mv; |
1673 | 0 | candMvField[count][0].refIdx = refIdxL0; |
1674 | 0 | candMvField[count][1].mv = candMvField[j][1].mv; |
1675 | 0 | candMvField[count][1].refIdx = refIdxL1; |
1676 | 0 | candDir[count] = 3; |
1677 | |
|
1678 | 0 | if (++count == maxNumMergeCand) |
1679 | 0 | return maxNumMergeCand; |
1680 | 0 | } |
1681 | 0 | } |
1682 | 0 | } |
1683 | 0 | } |
1684 | 0 | int numRefIdx = (isInterB) ? X265_MIN(m_slice->m_numRefIdx[0], m_slice->m_numRefIdx[1]) : m_slice->m_numRefIdx[0]; |
1685 | 0 | int r = 0; |
1686 | 0 | int refcnt = 0; |
1687 | 0 | while (count < maxNumMergeCand) |
1688 | 0 | { |
1689 | 0 | candDir[count] = 1; |
1690 | 0 | candMvField[count][0].mv.word = 0; |
1691 | 0 | candMvField[count][0].refIdx = r; |
1692 | |
|
1693 | 0 | if (isInterB) |
1694 | 0 | { |
1695 | 0 | candDir[count] = 3; |
1696 | 0 | candMvField[count][1].mv.word = 0; |
1697 | 0 | candMvField[count][1].refIdx = r; |
1698 | 0 | } |
1699 | |
|
1700 | 0 | count++; |
1701 | |
|
1702 | 0 | if (refcnt == numRefIdx - 1) |
1703 | 0 | r = 0; |
1704 | 0 | else |
1705 | 0 | { |
1706 | 0 | ++r; |
1707 | 0 | ++refcnt; |
1708 | 0 | } |
1709 | 0 | } |
1710 | |
|
1711 | 0 | return count; |
1712 | 0 | } |
1713 | | |
1714 | | // Create the PMV list. Called for each reference index. |
1715 | | int CUData::getPMV(InterNeighbourMV *neighbours, uint32_t picList, uint32_t refIdx, MV* amvpCand, MV* pmv) const |
1716 | 0 | { |
1717 | 0 | MV directMV[MD_ABOVE_LEFT + 1]; |
1718 | 0 | MV indirectMV[MD_ABOVE_LEFT + 1]; |
1719 | 0 | bool validDirect[MD_ABOVE_LEFT + 1]; |
1720 | 0 | bool validIndirect[MD_ABOVE_LEFT + 1]; |
1721 | | |
1722 | | // Left candidate. |
1723 | 0 | validDirect[MD_BELOW_LEFT] = getDirectPMV(directMV[MD_BELOW_LEFT], neighbours + MD_BELOW_LEFT, picList, refIdx); |
1724 | 0 | validDirect[MD_LEFT] = getDirectPMV(directMV[MD_LEFT], neighbours + MD_LEFT, picList, refIdx); |
1725 | | // Top candidate. |
1726 | 0 | validDirect[MD_ABOVE_RIGHT] = getDirectPMV(directMV[MD_ABOVE_RIGHT], neighbours + MD_ABOVE_RIGHT, picList, refIdx); |
1727 | 0 | validDirect[MD_ABOVE] = getDirectPMV(directMV[MD_ABOVE], neighbours + MD_ABOVE, picList, refIdx); |
1728 | 0 | validDirect[MD_ABOVE_LEFT] = getDirectPMV(directMV[MD_ABOVE_LEFT], neighbours + MD_ABOVE_LEFT, picList, refIdx); |
1729 | | |
1730 | | // Left candidate. |
1731 | 0 | validIndirect[MD_BELOW_LEFT] = getIndirectPMV(indirectMV[MD_BELOW_LEFT], neighbours + MD_BELOW_LEFT, picList, refIdx); |
1732 | 0 | validIndirect[MD_LEFT] = getIndirectPMV(indirectMV[MD_LEFT], neighbours + MD_LEFT, picList, refIdx); |
1733 | | // Top candidate. |
1734 | 0 | validIndirect[MD_ABOVE_RIGHT] = getIndirectPMV(indirectMV[MD_ABOVE_RIGHT], neighbours + MD_ABOVE_RIGHT, picList, refIdx); |
1735 | 0 | validIndirect[MD_ABOVE] = getIndirectPMV(indirectMV[MD_ABOVE], neighbours + MD_ABOVE, picList, refIdx); |
1736 | 0 | validIndirect[MD_ABOVE_LEFT] = getIndirectPMV(indirectMV[MD_ABOVE_LEFT], neighbours + MD_ABOVE_LEFT, picList, refIdx); |
1737 | |
|
1738 | 0 | int num = 0; |
1739 | | // Left predictor search |
1740 | 0 | if (validDirect[MD_BELOW_LEFT]) |
1741 | 0 | amvpCand[num++] = directMV[MD_BELOW_LEFT]; |
1742 | 0 | else if (validDirect[MD_LEFT]) |
1743 | 0 | amvpCand[num++] = directMV[MD_LEFT]; |
1744 | 0 | else if (validIndirect[MD_BELOW_LEFT]) |
1745 | 0 | amvpCand[num++] = indirectMV[MD_BELOW_LEFT]; |
1746 | 0 | else if (validIndirect[MD_LEFT]) |
1747 | 0 | amvpCand[num++] = indirectMV[MD_LEFT]; |
1748 | |
|
1749 | 0 | bool bAddedSmvp = num > 0; |
1750 | | |
1751 | | // Above predictor search |
1752 | 0 | if (validDirect[MD_ABOVE_RIGHT]) |
1753 | 0 | amvpCand[num++] = directMV[MD_ABOVE_RIGHT]; |
1754 | 0 | else if (validDirect[MD_ABOVE]) |
1755 | 0 | amvpCand[num++] = directMV[MD_ABOVE]; |
1756 | 0 | else if (validDirect[MD_ABOVE_LEFT]) |
1757 | 0 | amvpCand[num++] = directMV[MD_ABOVE_LEFT]; |
1758 | |
|
1759 | 0 | if (!bAddedSmvp) |
1760 | 0 | { |
1761 | 0 | if (validIndirect[MD_ABOVE_RIGHT]) |
1762 | 0 | amvpCand[num++] = indirectMV[MD_ABOVE_RIGHT]; |
1763 | 0 | else if (validIndirect[MD_ABOVE]) |
1764 | 0 | amvpCand[num++] = indirectMV[MD_ABOVE]; |
1765 | 0 | else if (validIndirect[MD_ABOVE_LEFT]) |
1766 | 0 | amvpCand[num++] = indirectMV[MD_ABOVE_LEFT]; |
1767 | 0 | } |
1768 | |
|
1769 | 0 | int numMvc = 0; |
1770 | 0 | for (int dir = MD_LEFT; dir <= MD_ABOVE_LEFT; dir++) |
1771 | 0 | { |
1772 | 0 | if (validDirect[dir] && directMV[dir].notZero()) |
1773 | 0 | pmv[numMvc++] = directMV[dir]; |
1774 | |
|
1775 | 0 | if (validIndirect[dir] && indirectMV[dir].notZero()) |
1776 | 0 | pmv[numMvc++] = indirectMV[dir]; |
1777 | 0 | } |
1778 | |
|
1779 | 0 | if (num == 2) |
1780 | 0 | num -= amvpCand[0] == amvpCand[1]; |
1781 | | |
1782 | | // Get the collocated candidate. At this step, either the first candidate |
1783 | | // was found or its value is 0. |
1784 | 0 | if (m_slice->m_sps->bTemporalMVPEnabled && num < 2) |
1785 | 0 | { |
1786 | 0 | int tempRefIdx = neighbours[MD_COLLOCATED].refIdx[picList]; |
1787 | 0 | if (tempRefIdx != -1) |
1788 | 0 | { |
1789 | 0 | uint32_t cuAddr = neighbours[MD_COLLOCATED].cuAddr[picList]; |
1790 | 0 | const Frame* colPic = m_slice->m_refFrameList[m_slice->isInterB() && !m_slice->m_colFromL0Flag][m_slice->m_colRefIdx]; |
1791 | 0 | const CUData* colCU = colPic->m_encData->getPicCTU(cuAddr); |
1792 | | |
1793 | | // Scale the vector |
1794 | 0 | int colRefPOC = colCU->m_slice->m_refPOCList[tempRefIdx >> 4][tempRefIdx & 0xf]; |
1795 | 0 | int colPOC = colCU->m_slice->m_poc; |
1796 | |
|
1797 | 0 | int curRefPOC = m_slice->m_refPOCList[picList][refIdx]; |
1798 | 0 | int curPOC = m_slice->m_poc; |
1799 | 0 | pmv[numMvc++] = amvpCand[num++] = scaleMvByPOCDist(neighbours[MD_COLLOCATED].mv[picList], curPOC, curRefPOC, colPOC, colRefPOC); |
1800 | 0 | } |
1801 | 0 | } |
1802 | |
|
1803 | 0 | while (num < AMVP_NUM_CANDS) |
1804 | 0 | amvpCand[num++] = 0; |
1805 | |
|
1806 | 0 | return numMvc; |
1807 | 0 | } |
1808 | | |
1809 | | /* Constructs a list of candidates for AMVP, and a larger list of motion candidates */ |
1810 | | void CUData::getNeighbourMV(uint32_t puIdx, uint32_t absPartIdx, InterNeighbourMV* neighbours) const |
1811 | 0 | { |
1812 | | // Set the temporal neighbour to unavailable by default. |
1813 | 0 | neighbours[MD_COLLOCATED].unifiedRef = -1; |
1814 | |
|
1815 | 0 | uint32_t partIdxLT, partIdxRT, partIdxLB = deriveLeftBottomIdx(puIdx); |
1816 | 0 | deriveLeftRightTopIdx(puIdx, partIdxLT, partIdxRT); |
1817 | | |
1818 | | // Load the spatial MVs. |
1819 | 0 | getInterNeighbourMV(neighbours + MD_BELOW_LEFT, partIdxLB, MD_BELOW_LEFT); |
1820 | 0 | getInterNeighbourMV(neighbours + MD_LEFT, partIdxLB, MD_LEFT); |
1821 | 0 | getInterNeighbourMV(neighbours + MD_ABOVE_RIGHT,partIdxRT, MD_ABOVE_RIGHT); |
1822 | 0 | getInterNeighbourMV(neighbours + MD_ABOVE, partIdxRT, MD_ABOVE); |
1823 | 0 | getInterNeighbourMV(neighbours + MD_ABOVE_LEFT, partIdxLT, MD_ABOVE_LEFT); |
1824 | |
|
1825 | 0 | if (m_slice->m_sps->bTemporalMVPEnabled) |
1826 | 0 | { |
1827 | 0 | uint32_t absPartAddr = m_absIdxInCTU + absPartIdx; |
1828 | 0 | uint32_t partIdxRB = deriveRightBottomIdx(puIdx); |
1829 | | |
1830 | | // co-located RightBottom temporal predictor (H) |
1831 | 0 | int ctuIdx = -1; |
1832 | | |
1833 | | // image boundary check |
1834 | 0 | if (m_encData->getPicCTU(m_cuAddr)->m_cuPelX + g_zscanToPelX[partIdxRB] + UNIT_SIZE < m_slice->m_sps->picWidthInLumaSamples && |
1835 | 0 | m_encData->getPicCTU(m_cuAddr)->m_cuPelY + g_zscanToPelY[partIdxRB] + UNIT_SIZE < m_slice->m_sps->picHeightInLumaSamples) |
1836 | 0 | { |
1837 | 0 | uint32_t absPartIdxRB = g_zscanToRaster[partIdxRB]; |
1838 | 0 | uint32_t numUnits = s_numPartInCUSize; |
1839 | 0 | bool bNotLastCol = lessThanCol(absPartIdxRB, numUnits - 1); // is not at the last column of CTU |
1840 | 0 | bool bNotLastRow = lessThanRow(absPartIdxRB, numUnits - 1); // is not at the last row of CTU |
1841 | |
|
1842 | 0 | if (bNotLastCol && bNotLastRow) |
1843 | 0 | { |
1844 | 0 | absPartAddr = g_rasterToZscan[absPartIdxRB + RASTER_SIZE + 1]; |
1845 | 0 | ctuIdx = m_cuAddr; |
1846 | 0 | } |
1847 | 0 | else if (bNotLastCol) |
1848 | 0 | absPartAddr = g_rasterToZscan[(absPartIdxRB + 1) & (numUnits - 1)]; |
1849 | 0 | else if (bNotLastRow) |
1850 | 0 | { |
1851 | 0 | absPartAddr = g_rasterToZscan[absPartIdxRB + RASTER_SIZE - numUnits + 1]; |
1852 | 0 | ctuIdx = m_cuAddr + 1; |
1853 | 0 | } |
1854 | 0 | else // is the right bottom corner of CTU |
1855 | 0 | absPartAddr = 0; |
1856 | 0 | } |
1857 | |
|
1858 | 0 | if (!(ctuIdx >= 0 && getCollocatedMV(ctuIdx, absPartAddr, neighbours + MD_COLLOCATED))) |
1859 | 0 | { |
1860 | 0 | uint32_t partIdxCenter = deriveCenterIdx(puIdx); |
1861 | 0 | uint32_t curCTUIdx = m_cuAddr; |
1862 | 0 | getCollocatedMV(curCTUIdx, partIdxCenter, neighbours + MD_COLLOCATED); |
1863 | 0 | } |
1864 | 0 | } |
1865 | 0 | } |
1866 | | |
1867 | | void CUData::getInterNeighbourMV(InterNeighbourMV *neighbour, uint32_t partUnitIdx, MVP_DIR dir) const |
1868 | 0 | { |
1869 | 0 | const CUData* tmpCU = NULL; |
1870 | 0 | uint32_t idx = 0; |
1871 | |
|
1872 | 0 | switch (dir) |
1873 | 0 | { |
1874 | 0 | case MD_LEFT: |
1875 | 0 | tmpCU = getPULeft(idx, partUnitIdx); |
1876 | 0 | break; |
1877 | 0 | case MD_ABOVE: |
1878 | 0 | tmpCU = getPUAbove(idx, partUnitIdx); |
1879 | 0 | break; |
1880 | 0 | case MD_ABOVE_RIGHT: |
1881 | 0 | tmpCU = getPUAboveRight(idx, partUnitIdx); |
1882 | 0 | break; |
1883 | 0 | case MD_BELOW_LEFT: |
1884 | 0 | tmpCU = getPUBelowLeft(idx, partUnitIdx); |
1885 | 0 | break; |
1886 | 0 | case MD_ABOVE_LEFT: |
1887 | 0 | tmpCU = getPUAboveLeft(idx, partUnitIdx); |
1888 | 0 | break; |
1889 | 0 | default: |
1890 | 0 | break; |
1891 | 0 | } |
1892 | | |
1893 | 0 | if (!tmpCU) |
1894 | 0 | { |
1895 | | // Mark the PMV as unavailable. |
1896 | 0 | for (int i = 0; i < 2; i++) |
1897 | 0 | neighbour->refIdx[i] = -1; |
1898 | 0 | return; |
1899 | 0 | } |
1900 | | |
1901 | 0 | for (int i = 0; i < 2; i++) |
1902 | 0 | { |
1903 | | // Get the MV. |
1904 | 0 | neighbour->mv[i] = tmpCU->m_mv[i][idx]; |
1905 | | |
1906 | | // Get the reference idx. |
1907 | 0 | neighbour->refIdx[i] = tmpCU->m_refIdx[i][idx]; |
1908 | 0 | } |
1909 | 0 | } |
1910 | | |
1911 | | /* Clip motion vector to within slightly padded boundary of picture (the |
1912 | | * MV may reference a block that is completely within the padded area). |
1913 | | * Note this function is unaware of how much of this picture is actually |
1914 | | * available for use (re: frame parallelism) */ |
1915 | | void CUData::clipMv(MV& outMV) const |
1916 | 0 | { |
1917 | 0 | const uint32_t mvshift = 2; |
1918 | 0 | uint32_t offset = 8; |
1919 | |
|
1920 | 0 | int32_t xmax = (int32_t)((m_slice->m_sps->picWidthInLumaSamples + offset - m_cuPelX - 1) << mvshift); |
1921 | 0 | int32_t xmin = -(int32_t)((m_encData->m_param->maxCUSize + offset + m_cuPelX - 1) << mvshift); |
1922 | |
|
1923 | 0 | int32_t ymax = (int32_t)((m_slice->m_sps->picHeightInLumaSamples + offset - m_cuPelY - 1) << mvshift); |
1924 | 0 | int32_t ymin = -(int32_t)((m_encData->m_param->maxCUSize + offset + m_cuPelY - 1) << mvshift); |
1925 | |
|
1926 | 0 | outMV.x = X265_MIN(xmax, X265_MAX(xmin, outMV.x)); |
1927 | 0 | outMV.y = X265_MIN(ymax, X265_MAX(ymin, outMV.y)); |
1928 | 0 | } |
1929 | | |
1930 | | // Load direct spatial MV if available. |
1931 | | bool CUData::getDirectPMV(MV& pmv, InterNeighbourMV *neighbours, uint32_t picList, uint32_t refIdx) const |
1932 | 0 | { |
1933 | 0 | int curRefPOC = m_slice->m_refPOCList[picList][refIdx]; |
1934 | 0 | for (int i = 0; i < 2; i++, picList = !picList) |
1935 | 0 | { |
1936 | 0 | int partRefIdx = neighbours->refIdx[picList]; |
1937 | 0 | if (partRefIdx >= 0 && curRefPOC == m_slice->m_refPOCList[picList][partRefIdx]) |
1938 | 0 | { |
1939 | 0 | pmv = neighbours->mv[picList]; |
1940 | 0 | return true; |
1941 | 0 | } |
1942 | 0 | } |
1943 | 0 | return false; |
1944 | 0 | } |
1945 | | |
1946 | | // Load indirect spatial MV if available. An indirect MV has to be scaled. |
1947 | | bool CUData::getIndirectPMV(MV& outMV, InterNeighbourMV *neighbours, uint32_t picList, uint32_t refIdx) const |
1948 | 0 | { |
1949 | 0 | int curPOC = m_slice->m_poc; |
1950 | 0 | int neibPOC = curPOC; |
1951 | 0 | int curRefPOC = m_slice->m_refPOCList[picList][refIdx]; |
1952 | |
|
1953 | 0 | for (int i = 0; i < 2; i++, picList = !picList) |
1954 | 0 | { |
1955 | 0 | int partRefIdx = neighbours->refIdx[picList]; |
1956 | 0 | if (partRefIdx >= 0) |
1957 | 0 | { |
1958 | 0 | int neibRefPOC = m_slice->m_refPOCList[picList][partRefIdx]; |
1959 | 0 | MV mvp = neighbours->mv[picList]; |
1960 | |
|
1961 | 0 | outMV = scaleMvByPOCDist(mvp, curPOC, curRefPOC, neibPOC, neibRefPOC); |
1962 | 0 | return true; |
1963 | 0 | } |
1964 | 0 | } |
1965 | 0 | return false; |
1966 | 0 | } |
1967 | | |
1968 | | bool CUData::getColMVP(MV& outMV, int& outRefIdx, int picList, int cuAddr, int partUnitIdx) const |
1969 | 0 | { |
1970 | 0 | const Frame* colPic = m_slice->m_refFrameList[m_slice->isInterB() && !m_slice->m_colFromL0Flag][m_slice->m_colRefIdx]; |
1971 | 0 | const CUData* colCU = colPic->m_encData->getPicCTU(cuAddr); |
1972 | |
|
1973 | 0 | uint32_t absPartAddr = partUnitIdx & TMVP_UNIT_MASK; |
1974 | 0 | if (colCU->m_predMode[partUnitIdx] == MODE_NONE || colCU->isIntra(absPartAddr)) |
1975 | 0 | return false; |
1976 | | |
1977 | 0 | int colRefPicList = m_slice->m_bCheckLDC ? picList : m_slice->m_colFromL0Flag; |
1978 | |
|
1979 | 0 | int colRefIdx = colCU->m_refIdx[colRefPicList][absPartAddr]; |
1980 | |
|
1981 | 0 | if (colRefIdx < 0) |
1982 | 0 | { |
1983 | 0 | colRefPicList = !colRefPicList; |
1984 | 0 | colRefIdx = colCU->m_refIdx[colRefPicList][absPartAddr]; |
1985 | |
|
1986 | 0 | if (colRefIdx < 0) |
1987 | 0 | return false; |
1988 | 0 | } |
1989 | | |
1990 | | // Scale the vector |
1991 | 0 | int colRefPOC = colCU->m_slice->m_refPOCList[colRefPicList][colRefIdx]; |
1992 | 0 | int colPOC = colCU->m_slice->m_poc; |
1993 | 0 | MV colmv = colCU->m_mv[colRefPicList][absPartAddr]; |
1994 | |
|
1995 | 0 | int curRefPOC = m_slice->m_refPOCList[picList][outRefIdx]; |
1996 | 0 | int curPOC = m_slice->m_poc; |
1997 | |
|
1998 | 0 | outMV = scaleMvByPOCDist(colmv, curPOC, curRefPOC, colPOC, colRefPOC); |
1999 | 0 | return true; |
2000 | 0 | } |
2001 | | |
2002 | | // Cache the collocated MV. |
2003 | | bool CUData::getCollocatedMV(int cuAddr, int partUnitIdx, InterNeighbourMV *neighbour) const |
2004 | 0 | { |
2005 | 0 | const Frame* colPic = m_slice->m_refFrameList[m_slice->isInterB() && !m_slice->m_colFromL0Flag][m_slice->m_colRefIdx]; |
2006 | 0 | const CUData* colCU = colPic->m_encData->getPicCTU(cuAddr); |
2007 | |
|
2008 | 0 | uint32_t absPartAddr = partUnitIdx & TMVP_UNIT_MASK; |
2009 | 0 | if (colCU->m_predMode[partUnitIdx] == MODE_NONE || colCU->isIntra(absPartAddr)) |
2010 | 0 | return false; |
2011 | | |
2012 | 0 | for (int list = 0; list < 2; list++) |
2013 | 0 | { |
2014 | 0 | neighbour->cuAddr[list] = cuAddr; |
2015 | 0 | int colRefPicList = m_slice->m_bCheckLDC ? list : m_slice->m_colFromL0Flag; |
2016 | 0 | int colRefIdx = colCU->m_refIdx[colRefPicList][absPartAddr]; |
2017 | |
|
2018 | 0 | if (colRefIdx < 0) |
2019 | 0 | colRefPicList = !colRefPicList; |
2020 | |
|
2021 | 0 | neighbour->refIdx[list] = colCU->m_refIdx[colRefPicList][absPartAddr]; |
2022 | 0 | neighbour->refIdx[list] |= colRefPicList << 4; |
2023 | |
|
2024 | 0 | neighbour->mv[list] = colCU->m_mv[colRefPicList][absPartAddr]; |
2025 | 0 | } |
2026 | |
|
2027 | 0 | return neighbour->unifiedRef != -1; |
2028 | 0 | } |
2029 | | |
2030 | | MV CUData::scaleMvByPOCDist(const MV& inMV, int curPOC, int curRefPOC, int colPOC, int colRefPOC) const |
2031 | 0 | { |
2032 | 0 | int diffPocD = colPOC - colRefPOC; |
2033 | 0 | int diffPocB = curPOC - curRefPOC; |
2034 | |
|
2035 | 0 | if (diffPocD == diffPocB) |
2036 | 0 | return inMV; |
2037 | 0 | else |
2038 | 0 | { |
2039 | 0 | int tdb = x265_clip3(-128, 127, diffPocB); |
2040 | 0 | int tdd = x265_clip3(-128, 127, diffPocD); |
2041 | 0 | int x = (0x4000 + abs(tdd / 2)) / tdd; |
2042 | 0 | int scale = x265_clip3(-4096, 4095, (tdb * x + 32) >> 6); |
2043 | 0 | return scaleMv(inMV, scale); |
2044 | 0 | } |
2045 | 0 | } |
2046 | | |
2047 | | uint32_t CUData::deriveCenterIdx(uint32_t puIdx) const |
2048 | 0 | { |
2049 | 0 | uint32_t absPartIdx; |
2050 | 0 | int puWidth, puHeight; |
2051 | |
|
2052 | 0 | getPartIndexAndSize(puIdx, absPartIdx, puWidth, puHeight); |
2053 | |
|
2054 | 0 | return g_rasterToZscan[g_zscanToRaster[m_absIdxInCTU + absPartIdx] |
2055 | 0 | + ((puHeight >> (LOG2_UNIT_SIZE + 1)) << LOG2_RASTER_SIZE) |
2056 | 0 | + (puWidth >> (LOG2_UNIT_SIZE + 1))]; |
2057 | 0 | } |
2058 | | |
2059 | | void CUData::getTUEntropyCodingParameters(TUEntropyCodingParameters &result, uint32_t absPartIdx, uint32_t log2TrSize, bool bIsLuma) const |
2060 | 117k | { |
2061 | 117k | bool bIsIntra = isIntra(absPartIdx); |
2062 | | |
2063 | | // set the group layout |
2064 | 117k | const uint32_t log2TrSizeCG = log2TrSize - 2; |
2065 | | |
2066 | | // set the scan orders |
2067 | 117k | if (bIsIntra) |
2068 | 117k | { |
2069 | 117k | uint32_t dirMode; |
2070 | | |
2071 | 117k | if (bIsLuma) |
2072 | 51.7k | dirMode = m_lumaIntraDir[absPartIdx]; |
2073 | 65.8k | else |
2074 | 65.8k | { |
2075 | 65.8k | dirMode = m_chromaIntraDir[absPartIdx]; |
2076 | 65.8k | if (dirMode == DM_CHROMA_IDX) |
2077 | 16.3k | { |
2078 | 16.3k | dirMode = m_lumaIntraDir[(m_chromaFormat == X265_CSP_I444) ? absPartIdx : absPartIdx & 0xFC]; |
2079 | 16.3k | dirMode = (m_chromaFormat == X265_CSP_I422) ? g_chroma422IntraAngleMappingTable[dirMode] : dirMode; |
2080 | 16.3k | } |
2081 | 65.8k | } |
2082 | | |
2083 | 117k | if (log2TrSize <= (MDCS_LOG2_MAX_SIZE - m_hChromaShift) || (bIsLuma && log2TrSize == MDCS_LOG2_MAX_SIZE)) |
2084 | 59.9k | result.scanType = dirMode >= 22 && dirMode <= 30 ? SCAN_HOR : dirMode >= 6 && dirMode <= 14 ? SCAN_VER : SCAN_DIAG; |
2085 | 57.6k | else |
2086 | 57.6k | result.scanType = SCAN_DIAG; |
2087 | 117k | } |
2088 | 18.4E | else |
2089 | 18.4E | result.scanType = SCAN_DIAG; |
2090 | | |
2091 | 117k | result.scan = g_scanOrder[result.scanType][log2TrSize - 2]; |
2092 | 117k | result.scanCG = g_scanOrderCG[result.scanType][log2TrSizeCG]; |
2093 | | |
2094 | 117k | if (log2TrSize == 2) |
2095 | 47.1k | result.firstSignificanceMapContext = 0; |
2096 | 70.4k | else if (log2TrSize == 3) |
2097 | 36.9k | result.firstSignificanceMapContext = (result.scanType != SCAN_DIAG && bIsLuma) ? 15 : 9; |
2098 | 33.4k | else |
2099 | 33.4k | result.firstSignificanceMapContext = bIsLuma ? 21 : 12; |
2100 | 117k | } |
2101 | | |
2102 | 433k | #define CU_SET_FLAG(bitfield, flag, value) (bitfield) = ((bitfield) & (~(flag))) | ((~((value) - 1)) & (flag)) |
2103 | | |
2104 | | void CUData::calcCTUGeoms(uint32_t ctuWidth, uint32_t ctuHeight, uint32_t maxCUSize, uint32_t minCUSize, CUGeom cuDataArray[CUGeom::MAX_GEOMS]) |
2105 | 1.98k | { |
2106 | 1.98k | uint32_t num4x4Partition = (1U << ((g_log2Size[maxCUSize] - LOG2_UNIT_SIZE) << 1)); |
2107 | | |
2108 | | // Initialize the coding blocks inside the CTB |
2109 | 9.39k | for (uint32_t log2CUSize = g_log2Size[maxCUSize], rangeCUIdx = 0; log2CUSize >= g_log2Size[minCUSize]; log2CUSize--) |
2110 | 7.40k | { |
2111 | 7.40k | uint32_t blockSize = 1 << log2CUSize; |
2112 | 7.40k | uint32_t sbWidth = 1 << (g_log2Size[maxCUSize] - log2CUSize); |
2113 | 7.40k | int32_t lastLevelFlag = log2CUSize == g_log2Size[minCUSize]; |
2114 | | |
2115 | 33.7k | for (uint32_t sbY = 0; sbY < sbWidth; sbY++) |
2116 | 26.3k | { |
2117 | 170k | for (uint32_t sbX = 0; sbX < sbWidth; sbX++) |
2118 | 144k | { |
2119 | 144k | uint32_t depthIdx = g_depthScanIdx[sbY][sbX]; |
2120 | 144k | uint32_t cuIdx = rangeCUIdx + depthIdx; |
2121 | 144k | uint32_t childIdx = rangeCUIdx + sbWidth * sbWidth + (depthIdx << 2); |
2122 | 144k | uint32_t px = sbX * blockSize; |
2123 | 144k | uint32_t py = sbY * blockSize; |
2124 | 144k | int32_t presentFlag = px < ctuWidth && py < ctuHeight; |
2125 | 144k | int32_t splitMandatoryFlag = presentFlag && !lastLevelFlag && (px + blockSize > ctuWidth || py + blockSize > ctuHeight); |
2126 | | |
2127 | | /* Offset of the luma CU in the X, Y direction in terms of pixels from the CTU origin */ |
2128 | 144k | uint32_t xOffset = (sbX * blockSize) >> 3; |
2129 | 144k | uint32_t yOffset = (sbY * blockSize) >> 3; |
2130 | 144k | X265_CHECK(cuIdx < CUGeom::MAX_GEOMS, "CU geom index bug\n"); |
2131 | | |
2132 | 144k | CUGeom *cu = cuDataArray + cuIdx; |
2133 | 144k | cu->log2CUSize = log2CUSize; |
2134 | 144k | cu->childOffset = childIdx - cuIdx; |
2135 | 144k | cu->absPartIdx = g_depthScanIdx[yOffset][xOffset] * 4; |
2136 | 144k | cu->numPartitions = (num4x4Partition >> ((g_log2Size[maxCUSize] - cu->log2CUSize) * 2)); |
2137 | 144k | cu->depth = g_log2Size[maxCUSize] - log2CUSize; |
2138 | 144k | cu->geomRecurId = cuIdx; |
2139 | | |
2140 | 144k | cu->flags = 0; |
2141 | 144k | CU_SET_FLAG(cu->flags, CUGeom::PRESENT, presentFlag); |
2142 | 144k | CU_SET_FLAG(cu->flags, CUGeom::SPLIT_MANDATORY | CUGeom::SPLIT, splitMandatoryFlag); |
2143 | 144k | CU_SET_FLAG(cu->flags, CUGeom::LEAF, lastLevelFlag); |
2144 | 144k | } |
2145 | 26.3k | } |
2146 | 7.40k | rangeCUIdx += sbWidth * sbWidth; |
2147 | 7.40k | } |
2148 | 1.98k | } |