/src/vvenc/source/Lib/CommonLib/InterPrediction.cpp
Line | Count | Source |
1 | | /* ----------------------------------------------------------------------------- |
2 | | The copyright in this software is being made available under the Clear BSD |
3 | | License, included below. No patent rights, trademark rights and/or |
4 | | other Intellectual Property Rights other than the copyrights concerning |
5 | | the Software are granted under this license. |
6 | | |
7 | | The Clear BSD License |
8 | | |
9 | | Copyright (c) 2019-2026, Fraunhofer-Gesellschaft zur Förderung der angewandten Forschung e.V. & The VVenC Authors. |
10 | | All rights reserved. |
11 | | |
12 | | Redistribution and use in source and binary forms, with or without modification, |
13 | | are permitted (subject to the limitations in the disclaimer below) provided that |
14 | | the following conditions are met: |
15 | | |
16 | | * Redistributions of source code must retain the above copyright notice, |
17 | | this list of conditions and the following disclaimer. |
18 | | |
19 | | * Redistributions in binary form must reproduce the above copyright |
20 | | notice, this list of conditions and the following disclaimer in the |
21 | | documentation and/or other materials provided with the distribution. |
22 | | |
23 | | * Neither the name of the copyright holder nor the names of its |
24 | | contributors may be used to endorse or promote products derived from this |
25 | | software without specific prior written permission. |
26 | | |
27 | | NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE GRANTED BY |
28 | | THIS LICENSE. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND |
29 | | CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
30 | | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A |
31 | | PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR |
32 | | CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, |
33 | | EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, |
34 | | PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR |
35 | | BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER |
36 | | IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
37 | | ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE |
38 | | POSSIBILITY OF SUCH DAMAGE. |
39 | | |
40 | | |
41 | | ------------------------------------------------------------------------------------------- */ |
42 | | |
43 | | |
44 | | /** \file Prediction.cpp |
45 | | \brief prediction class |
46 | | */ |
47 | | |
48 | | #include "InterPrediction.h" |
49 | | #include "Unit.h" |
50 | | #include "UnitTools.h" |
51 | | #include "dtrace_next.h" |
52 | | #include "dtrace_buffer.h" |
53 | | #include "CommonLib/TimeProfiler.h" |
54 | | |
55 | | #include <memory.h> |
56 | | #include <algorithm> |
57 | | |
58 | | //! \ingroup CommonLib |
59 | | //! \{ |
60 | | |
61 | | namespace vvenc { |
62 | | |
63 | | void addBDOFAvgCore(const Pel* src0, const ptrdiff_t src0Stride, const Pel* src1, const ptrdiff_t src1Stride, Pel* dst, const ptrdiff_t dstStride, const Pel* gradX0, const Pel* gradX1, const Pel* gradY0, const Pel*gradY1, const ptrdiff_t gradStride, int width, int height, int tmpx, int tmpy, unsigned shift, int offset, const ClpRng& clpRng) |
64 | 0 | { |
65 | 0 | int b = 0; |
66 | |
|
67 | 0 | for (int y = 0; y < height; y++) |
68 | 0 | { |
69 | 0 | for (int x = 0; x < width; x += 4) |
70 | 0 | { |
71 | 0 | b = tmpx * (gradX0[x] - gradX1[x]) + tmpy * (gradY0[x] - gradY1[x]); |
72 | 0 | dst[x] = ClipPel((int16_t)rightShiftU((src0[x] + src1[x] + b + offset), shift), clpRng); |
73 | |
|
74 | 0 | b = tmpx * (gradX0[x + 1] - gradX1[x + 1]) + tmpy * (gradY0[x + 1] - gradY1[x + 1]); |
75 | 0 | dst[x + 1] = ClipPel((int16_t)rightShiftU((src0[x + 1] + src1[x + 1] + b + offset), shift), clpRng); |
76 | |
|
77 | 0 | b = tmpx * (gradX0[x + 2] - gradX1[x + 2]) + tmpy * (gradY0[x + 2] - gradY1[x + 2]); |
78 | 0 | dst[x + 2] = ClipPel((int16_t)rightShiftU((src0[x + 2] + src1[x + 2] + b + offset), shift), clpRng); |
79 | |
|
80 | 0 | b = tmpx * (gradX0[x + 3] - gradX1[x + 3]) + tmpy * (gradY0[x + 3] - gradY1[x + 3]); |
81 | 0 | dst[x + 3] = ClipPel((int16_t)rightShiftU((src0[x + 3] + src1[x + 3] + b + offset), shift), clpRng); |
82 | 0 | } |
83 | 0 | dst += dstStride; src0 += src0Stride; src1 += src1Stride; |
84 | 0 | gradX0 += gradStride; gradX1 += gradStride; gradY0 += gradStride; gradY1 += gradStride; |
85 | 0 | } |
86 | 0 | } |
87 | | |
88 | | void applyPROFCore(Pel* dst, int dstStride, const Pel* src, int srcStride, int width, int height, const Pel* gradX, const Pel* gradY, int gradStride, const int* dMvX, const int* dMvY, int dMvStride, const bool& bi, int shiftNum, Pel offset, const ClpRng& clpRng) |
89 | 0 | { |
90 | 0 | int idx = 0; |
91 | 0 | const int dILimit = 1 << std::max<int>(clpRng.bd + 1, 13); |
92 | 0 | for (int h = 0; h < height; h++) |
93 | 0 | { |
94 | 0 | for (int w = 0; w < width; w++) |
95 | 0 | { |
96 | 0 | int32_t dI = dMvX[idx] * gradX[w] + dMvY[idx] * gradY[w]; |
97 | 0 | dI = Clip3(-dILimit, dILimit - 1, dI); |
98 | 0 | dst[w] = src[w] + dI; |
99 | 0 | if (!bi) |
100 | 0 | { |
101 | 0 | dst[w] = (dst[w] + offset) >> shiftNum; |
102 | 0 | dst[w] = ClipPel(dst[w], clpRng); |
103 | 0 | } |
104 | 0 | idx++; |
105 | 0 | } |
106 | 0 | gradX += gradStride; |
107 | 0 | gradY += gradStride; |
108 | 0 | dst += dstStride; |
109 | 0 | src += srcStride; |
110 | 0 | } |
111 | 0 | } |
112 | | |
113 | | template<bool PAD = true> |
114 | | void gradFilterCore(const Pel* pSrc, int srcStride, int width, int height, int gradStride, Pel* gradX, Pel* gradY, const int bitDepth) |
115 | 0 | { |
116 | 0 | const Pel* srcTmp = pSrc + srcStride + 1; |
117 | 0 | Pel* gradXTmp = gradX + gradStride + 1; |
118 | 0 | Pel* gradYTmp = gradY + gradStride + 1; |
119 | 0 | int shift1 = 6; |
120 | |
|
121 | 0 | for (int y = 0; y < (height - 2 * BDOF_EXTEND_SIZE); y++) |
122 | 0 | { |
123 | 0 | for (int x = 0; x < (width - 2 * BDOF_EXTEND_SIZE); x++) |
124 | 0 | { |
125 | 0 | gradYTmp[x] = ( srcTmp[x + srcStride] >> shift1 ) - ( srcTmp[x - srcStride] >> shift1 ); |
126 | 0 | gradXTmp[x] = ( srcTmp[x + 1] >> shift1 ) - ( srcTmp[x - 1] >> shift1 ); |
127 | 0 | } |
128 | 0 | gradXTmp += gradStride; |
129 | 0 | gradYTmp += gradStride; |
130 | 0 | srcTmp += srcStride; |
131 | 0 | } |
132 | |
|
133 | 0 | if (PAD) |
134 | 0 | { |
135 | 0 | gradXTmp = gradX + gradStride + 1; |
136 | 0 | gradYTmp = gradY + gradStride + 1; |
137 | 0 | for (int y = 0; y < (height - 2 * BDOF_EXTEND_SIZE); y++) |
138 | 0 | { |
139 | 0 | gradXTmp[-1] = gradXTmp[0]; |
140 | 0 | gradXTmp[width - 2 * BDOF_EXTEND_SIZE] = gradXTmp[width - 2 * BDOF_EXTEND_SIZE - 1]; |
141 | 0 | gradXTmp += gradStride; |
142 | |
|
143 | 0 | gradYTmp[-1] = gradYTmp[0]; |
144 | 0 | gradYTmp[width - 2 * BDOF_EXTEND_SIZE] = gradYTmp[width - 2 * BDOF_EXTEND_SIZE - 1]; |
145 | 0 | gradYTmp += gradStride; |
146 | 0 | } |
147 | |
|
148 | 0 | gradXTmp = gradX + gradStride; |
149 | 0 | gradYTmp = gradY + gradStride; |
150 | 0 | ::memcpy(gradXTmp - gradStride, gradXTmp, sizeof(Pel)*(width)); |
151 | 0 | ::memcpy(gradXTmp + (height - 2 * BDOF_EXTEND_SIZE)*gradStride, gradXTmp + (height - 2 * BDOF_EXTEND_SIZE - 1)*gradStride, sizeof(Pel)*(width)); |
152 | 0 | ::memcpy(gradYTmp - gradStride, gradYTmp, sizeof(Pel)*(width)); |
153 | 0 | ::memcpy(gradYTmp + (height - 2 * BDOF_EXTEND_SIZE)*gradStride, gradYTmp + (height - 2 * BDOF_EXTEND_SIZE - 1)*gradStride, sizeof(Pel)*(width)); |
154 | 0 | } |
155 | 0 | } Unexecuted instantiation: void vvenc::gradFilterCore<true>(short const*, int, int, int, int, short*, short*, int) Unexecuted instantiation: void vvenc::gradFilterCore<false>(short const*, int, int, int, int, short*, short*, int) |
156 | | |
157 | | void calcBDOFSumsCore( const Pel* srcY0Tmp, const Pel* srcY1Tmp, const Pel* gradX0, const Pel* gradX1, |
158 | | const Pel* gradY0, const Pel* gradY1, int xu, int yu, const ptrdiff_t src0Stride, const ptrdiff_t src1Stride, |
159 | | const ptrdiff_t widthG, const int bitDepth, int* sumAbsGX, int* sumAbsGY, int* sumDIX, int* sumDIY, |
160 | | int* sumSignGY_GX ) |
161 | 0 | { |
162 | 0 | int shift4 = 4; |
163 | 0 | int shift5 = 1; |
164 | |
|
165 | 0 | for (int y = 0; y < 6; y++) |
166 | 0 | { |
167 | 0 | for (int x = 0; x < 6; x++) |
168 | 0 | { |
169 | 0 | int tmpGX = (gradX0[x] + gradX1[x]) >> shift5; |
170 | 0 | int tmpGY = (gradY0[x] + gradY1[x]) >> shift5; |
171 | 0 | int tmpDI = (int)((srcY1Tmp[x] >> shift4) - (srcY0Tmp[x] >> shift4)); |
172 | 0 | *sumAbsGX += (tmpGX < 0 ? -tmpGX : tmpGX); |
173 | 0 | *sumAbsGY += (tmpGY < 0 ? -tmpGY : tmpGY); |
174 | 0 | *sumDIX += (tmpGX < 0 ? -tmpDI : (tmpGX == 0 ? 0 : tmpDI)); |
175 | 0 | *sumDIY += (tmpGY < 0 ? -tmpDI : (tmpGY == 0 ? 0 : tmpDI)); |
176 | 0 | *sumSignGY_GX += (tmpGY < 0 ? -tmpGX : (tmpGY == 0 ? 0 : tmpGX)); |
177 | |
|
178 | 0 | } |
179 | 0 | srcY1Tmp += src1Stride; |
180 | 0 | srcY0Tmp += src0Stride; |
181 | 0 | gradX0 += widthG; |
182 | 0 | gradX1 += widthG; |
183 | 0 | gradY0 += widthG; |
184 | 0 | gradY1 += widthG; |
185 | 0 | } |
186 | 0 | } |
187 | | |
188 | | |
189 | | template<int padSize> |
190 | | void paddingCore(Pel *ptr, int stride, int width, int height) |
191 | 0 | { |
192 | | /*left and right padding*/ |
193 | 0 | Pel *ptrTemp1 = ptr; |
194 | 0 | Pel *ptrTemp2 = ptr + (width - 1); |
195 | 0 | ptrdiff_t offset = 0; |
196 | 0 | for (int i = 0; i < height; i++) |
197 | 0 | { |
198 | 0 | offset = stride * i; |
199 | 0 | for (int j = 1; j <= padSize; j++) |
200 | 0 | { |
201 | 0 | *(ptrTemp1 - j + offset) = *(ptrTemp1 + offset); |
202 | 0 | *(ptrTemp2 + j + offset) = *(ptrTemp2 + offset); |
203 | 0 | } |
204 | 0 | } |
205 | | /*Top and Bottom padding*/ |
206 | 0 | int numBytes = (width + padSize + padSize) * sizeof(Pel); |
207 | 0 | ptrTemp1 = (ptr - padSize); |
208 | 0 | ptrTemp2 = (ptr + (stride * (height - 1)) - padSize); |
209 | 0 | for (int i = 1; i <= padSize; i++) |
210 | 0 | { |
211 | 0 | memcpy(ptrTemp1 - (i * stride), (ptrTemp1), numBytes); |
212 | 0 | memcpy(ptrTemp2 + (i * stride), (ptrTemp2), numBytes); |
213 | 0 | } |
214 | 0 | } Unexecuted instantiation: void vvenc::paddingCore<1>(short*, int, int, int) Unexecuted instantiation: void vvenc::paddingCore<2>(short*, int, int, int) |
215 | | |
216 | | void padDmvrCore( const Pel* src, const int srcStride, Pel* dst, const int dstStride, int width, int height, int padSize ) |
217 | 0 | { |
218 | 0 | g_pelBufOP.copyBuffer( ( const char* ) src, srcStride * sizeof( Pel ), ( char* ) dst, dstStride * sizeof( Pel ), width * sizeof( Pel ), height ); |
219 | 0 | if( padSize == 1 ) |
220 | 0 | paddingCore<1>( dst, dstStride, width, height ); |
221 | 0 | else |
222 | 0 | paddingCore<2>( dst, dstStride, width, height ); |
223 | 0 | } |
224 | | |
225 | | // ==================================================================================================================== |
226 | | // Constructor / destructor / initialize |
227 | | // ==================================================================================================================== |
228 | | |
229 | | InterPrediction::InterPrediction() |
230 | 0 | : m_currChromaFormat( NUM_CHROMA_FORMAT ) |
231 | 0 | , m_subPuMC(false) |
232 | 0 | , m_IBCBufferWidth(0) |
233 | 0 | { |
234 | 0 | } |
235 | | |
236 | | InterPrediction::~InterPrediction() |
237 | 0 | { |
238 | 0 | destroy(); |
239 | 0 | } |
240 | | |
241 | | void InterPrediction::destroy() |
242 | 0 | { |
243 | 0 | for( uint32_t i = 0; i < NUM_REF_PIC_LIST_01; i++ ) |
244 | 0 | { |
245 | 0 | m_yuvPred[i].destroy(); |
246 | 0 | } |
247 | 0 | m_geoPartBuf[0].destroy(); |
248 | 0 | m_geoPartBuf[1].destroy(); |
249 | 0 | m_IBCBuffer.destroy(); |
250 | 0 | } |
251 | | |
252 | | void InterPrediction::init( RdCost* pcRdCost, ChromaFormat chFormat, const int ctuSize, const int ifpLines ) |
253 | 0 | { |
254 | | // if it has been initialised before, but the chroma format has changed, release the memory and start again. |
255 | 0 | if( m_yuvPred[L0].getOrigin( COMP_Y ) != nullptr && m_currChromaFormat != chFormat ) |
256 | 0 | { |
257 | 0 | destroy(); |
258 | 0 | DMVR::destroy(); |
259 | 0 | InterPredInterpolation::destroy(); |
260 | 0 | } |
261 | |
|
262 | 0 | m_currChromaFormat = chFormat; |
263 | |
|
264 | 0 | if( m_yuvPred[L0].getOrigin( COMP_Y ) == nullptr ) |
265 | 0 | { |
266 | 0 | for( uint32_t i = 0; i < NUM_REF_PIC_LIST_01; i++ ) |
267 | 0 | { |
268 | 0 | m_yuvPred[i].create( chFormat, Area{ 0, 0, (int)MAX_CU_SIZE, (int)MAX_CU_SIZE }, 0, 0, 32 ); |
269 | 0 | } |
270 | |
|
271 | 0 | InterPredInterpolation::init(); |
272 | 0 | DMVR::init( pcRdCost, chFormat ); |
273 | 0 | m_geoPartBuf[0].create(UnitArea(chFormat, Area(0, 0, MAX_CU_SIZE, MAX_CU_SIZE))); |
274 | 0 | m_geoPartBuf[1].create(UnitArea(chFormat, Area(0, 0, MAX_CU_SIZE, MAX_CU_SIZE))); |
275 | 0 | } |
276 | 0 | if (m_IBCBufferWidth != g_IBCBufferSize / ctuSize) |
277 | 0 | { |
278 | 0 | m_IBCBuffer.destroy(); |
279 | 0 | } |
280 | 0 | if (m_IBCBuffer.bufs.empty()) |
281 | 0 | { |
282 | 0 | m_IBCBufferWidth = g_IBCBufferSize / ctuSize; |
283 | 0 | m_IBCBuffer.create(UnitArea(chFormat, Area(0, 0, m_IBCBufferWidth, ctuSize))); |
284 | 0 | } |
285 | 0 | InterPredInterpolation::m_ifpLines = ifpLines; |
286 | 0 | } |
287 | | |
288 | | // ==================================================================================================================== |
289 | | // Public member functions |
290 | | // ==================================================================================================================== |
291 | | |
292 | | bool InterPrediction::xCheckIdenticalMotion( const CodingUnit& cu ) const |
293 | 0 | { |
294 | 0 | const Slice &slice = *cu.cs->slice; |
295 | |
|
296 | 0 | if( slice.isInterB() && !cu.cs->pps->weightedBiPred ) |
297 | 0 | { |
298 | 0 | if( cu.refIdx[0] >= 0 && cu.refIdx[1] >= 0 ) |
299 | 0 | { |
300 | 0 | int RefPOCL0 = slice.getRefPic( REF_PIC_LIST_0, cu.refIdx[0] )->getPOC(); |
301 | 0 | int RefPOCL1 = slice.getRefPic( REF_PIC_LIST_1, cu.refIdx[1] )->getPOC(); |
302 | |
|
303 | 0 | if( RefPOCL0 == RefPOCL1 ) |
304 | 0 | { |
305 | 0 | if( !cu.affine ) |
306 | 0 | { |
307 | 0 | if( cu.mv[0][0] == cu.mv[1][0] ) |
308 | 0 | { |
309 | 0 | return true; |
310 | 0 | } |
311 | 0 | } |
312 | 0 | else |
313 | 0 | { |
314 | 0 | if( cu.mv[0][0] == cu.mv[1][0] && cu.mv[0][1] == cu.mv[1][1] && ( cu.affineType == AFFINEMODEL_4PARAM || cu.mv[0][2] == cu.mv[1][2] ) ) |
315 | 0 | { |
316 | 0 | return true; |
317 | 0 | } |
318 | 0 | } |
319 | 0 | } |
320 | 0 | } |
321 | 0 | } |
322 | | |
323 | 0 | return false; |
324 | 0 | } |
325 | | |
326 | | void InterPrediction::xSubPuBDOF( const CodingUnit& cu, PelUnitBuf& predBuf, const RefPicList& refPicList /*= REF_PIC_LIST_X*/) |
327 | 0 | { |
328 | 0 | Position puPos = cu.lumaPos(); |
329 | 0 | Size puSize = cu.lumaSize(); |
330 | |
|
331 | 0 | CodingUnit subCu = cu; // th we do not need all that stuff |
332 | 0 | subCu.cs = cu.cs; |
333 | 0 | subCu.mergeType = cu.mergeType; |
334 | 0 | subCu.mmvdMergeFlag = cu.mmvdMergeFlag; |
335 | 0 | subCu.mcControl = cu.mcControl; |
336 | 0 | subCu.mergeFlag = cu.mergeFlag; |
337 | 0 | subCu.ciip = cu.ciip; |
338 | 0 | subCu.mvRefine = cu.mvRefine; |
339 | 0 | subCu.refIdx[0] = cu.refIdx[0]; |
340 | 0 | subCu.refIdx[1] = cu.refIdx[1]; |
341 | |
|
342 | 0 | const int yEnd = puPos.y + puSize.height; |
343 | 0 | const int xEnd = puPos.x + puSize.width; |
344 | 0 | const int dy = std::min((int)MAX_BDOF_APPLICATION_REGION, (int)puSize.height); |
345 | 0 | const int dx = std::min((int)MAX_BDOF_APPLICATION_REGION, (int)puSize.width); |
346 | 0 | for (int y = puPos.y; y < yEnd; y += dy) |
347 | 0 | { |
348 | 0 | for (int x = puPos.x; x < xEnd; x += dx) |
349 | 0 | { |
350 | 0 | const MotionInfo &curMi = cu.getMotionInfo(Position{ x, y }); |
351 | |
|
352 | 0 | subCu.UnitArea::operator=(UnitArea(cu.chromaFormat, Area(x, y, dx, dy))); |
353 | 0 | subCu = curMi; |
354 | 0 | PelUnitBuf subPredBuf = predBuf.subBuf(UnitAreaRelative(cu, subCu)); |
355 | |
|
356 | 0 | motionCompensation(subCu, subPredBuf, refPicList); |
357 | 0 | } |
358 | 0 | } |
359 | 0 | } |
360 | | void InterPrediction::xPredInterUni( const CodingUnit &cu, const RefPicList &refPicList, PelUnitBuf &pcYuvPred, const bool bi, const bool bdofApplied ) |
361 | 0 | { |
362 | 0 | int iRefIdx = cu.refIdx[refPicList]; |
363 | 0 | Mv mv[3]; |
364 | 0 | bool isIBC = false; |
365 | 0 | CHECK(!CU::isIBC(cu) && cu.lwidth() == 4 && cu.lheight() == 4, "invalid 4x4 inter blocks"); |
366 | 0 | if (CU::isIBC(cu)) |
367 | 0 | { |
368 | 0 | isIBC = true; |
369 | 0 | } |
370 | 0 | if (cu.affine) |
371 | 0 | { |
372 | 0 | CHECK(iRefIdx < 0, "iRefIdx incorrect."); |
373 | |
|
374 | 0 | mv[0] = cu.mv[refPicList][0]; |
375 | 0 | mv[1] = cu.mv[refPicList][1]; |
376 | 0 | mv[2] = cu.mv[refPicList][2]; |
377 | 0 | } |
378 | 0 | else |
379 | 0 | { |
380 | 0 | mv[0] = cu.mv[refPicList][0]; |
381 | 0 | if (!isIBC ) |
382 | 0 | clipMv(mv[0], cu.lumaPos(), cu.lumaSize(), *cu.cs->pcv); |
383 | 0 | } |
384 | | |
385 | 0 | for( uint32_t comp = COMP_Y; comp < pcYuvPred.bufs.size(); comp++ ) |
386 | 0 | { |
387 | 0 | const ComponentID compID = ComponentID( comp ); |
388 | |
|
389 | 0 | bool luma = !cu.mccNoLuma (); |
390 | 0 | bool chroma = !cu.mccNoChroma(); |
391 | |
|
392 | 0 | if( compID == COMP_Y && !luma ) |
393 | 0 | continue; |
394 | 0 | if( compID != COMP_Y && !chroma ) |
395 | 0 | continue; |
396 | | |
397 | 0 | if( cu.affine ) |
398 | 0 | { |
399 | 0 | xPredAffineBlk( compID, cu, cu.slice->getRefPic( refPicList, iRefIdx ), mv, pcYuvPred, bi, cu.slice->clpRngs[ compID ], refPicList ); |
400 | 0 | } |
401 | 0 | else |
402 | 0 | { |
403 | 0 | if( isIBC ) |
404 | 0 | { |
405 | 0 | xPredInterBlk( compID, cu, cu.slice->pic, mv[ 0 ], pcYuvPred, bi, cu.slice->clpRngs[ compID ], bdofApplied, isIBC ); |
406 | 0 | } |
407 | 0 | else |
408 | 0 | { |
409 | 0 | xPredInterBlk( compID, cu, cu.slice->getRefPic( refPicList, iRefIdx ), mv[ 0 ], pcYuvPred, bi, cu.slice->clpRngs[ compID ], bdofApplied, isIBC, refPicList ); |
410 | 0 | } |
411 | 0 | } |
412 | 0 | } |
413 | 0 | } |
414 | | |
415 | | void InterPrediction::xPredInterBi( const CodingUnit& cu, PelUnitBuf& yuvPred, const bool bdofApplied, PelUnitBuf *yuvPredTmp ) |
416 | 0 | { |
417 | 0 | CHECK( !cu.affine && cu.refIdx[0] >= 0 && cu.refIdx[1] >= 0 && ( cu.lwidth() + cu.lheight() == 12 ), "invalid 4x8/8x4 bi-predicted blocks" ); |
418 | |
|
419 | 0 | PelUnitBuf puBuf[NUM_REF_PIC_LIST_01]; |
420 | 0 | for (uint32_t refList = 0; refList < NUM_REF_PIC_LIST_01; refList++) |
421 | 0 | { |
422 | 0 | if( cu.refIdx[refList] < 0) |
423 | 0 | { |
424 | 0 | continue; |
425 | 0 | } |
426 | | |
427 | 0 | RefPicList refPicList = (refList ? REF_PIC_LIST_1 : REF_PIC_LIST_0); |
428 | |
|
429 | 0 | CHECK(CU::isIBC(cu) && refPicList != REF_PIC_LIST_0, "Invalid interdir for ibc mode"); |
430 | 0 | CHECK(CU::isIBC(cu) && cu.refIdx[refList] != MAX_NUM_REF, "Invalid reference index for ibc mode"); |
431 | 0 | CHECK((CU::isInter(cu) && cu.refIdx[refList] >= cu.cs->slice->numRefIdx[ refPicList ]), "Invalid reference index"); |
432 | |
|
433 | 0 | puBuf[refList] = m_yuvPred[refList].getCompactBuf( cu ); |
434 | |
|
435 | 0 | if( cu.refIdx[0] >= 0 && cu.refIdx[1] >= 0 ) |
436 | 0 | { |
437 | 0 | xPredInterUni ( cu, refPicList, puBuf[refList], true, bdofApplied ); |
438 | 0 | } |
439 | 0 | else |
440 | 0 | { |
441 | 0 | xPredInterUni( cu, refPicList, puBuf[refList], cu.geo, bdofApplied ); |
442 | 0 | } |
443 | 0 | } |
444 | | |
445 | 0 | xWeightedAverage( cu, puBuf[0], puBuf[1], yuvPred, bdofApplied, yuvPredTmp ); |
446 | 0 | } |
447 | | |
448 | | void InterPrediction::motionCompensationIBC( CodingUnit& cu, PelUnitBuf& predBuf ) |
449 | 0 | { |
450 | | // dual tree handling for IBC as the only ref |
451 | 0 | xPredInterUni( cu, REF_PIC_LIST_0, predBuf, false, false ); |
452 | 0 | } |
453 | | |
454 | | bool InterPrediction::motionCompensation( CodingUnit& cu, PelUnitBuf& predBuf, const RefPicList& refPicList, PelUnitBuf* predBufDfltWght ) |
455 | 0 | { |
456 | 0 | bool ret = false; |
457 | 0 | if( refPicList != REF_PIC_LIST_X ) |
458 | 0 | { |
459 | 0 | xPredInterUni( cu, refPicList, predBuf, false, false ); |
460 | 0 | } |
461 | 0 | else |
462 | 0 | { |
463 | 0 | CHECK( !cu.affine && cu.refIdx[0] >= 0 && cu.refIdx[1] >= 0 && ( cu.lwidth() + cu.lheight() == 12 ), "Invalid 4x8/8x4 bi-predicted blocks" ); |
464 | |
|
465 | 0 | bool bdofApplied = false; |
466 | 0 | if( cu.cs->sps->BDOF && ( !cu.cs->picHeader->disBdofFlag ) ) |
467 | 0 | { |
468 | 0 | if( cu.affine || m_subPuMC || cu.ciip ) |
469 | 0 | { |
470 | 0 | bdofApplied = false; |
471 | 0 | } |
472 | 0 | else |
473 | 0 | { |
474 | 0 | if( CU::isBiPredFromDifferentDirEqDistPoc( cu ) |
475 | 0 | && cu.Y().minDim() >= 8 |
476 | 0 | && cu.Y().area() >= 128 |
477 | 0 | && !cu.smvdMode |
478 | 0 | && !( cu.cs->sps->BCW && cu.BcwIdx != BCW_DEFAULT ) |
479 | 0 | && !( cu.mccNoBdof() && cu.mmvdMergeFlag ) ) |
480 | 0 | { |
481 | 0 | bdofApplied = true; |
482 | 0 | } |
483 | 0 | } |
484 | 0 | } |
485 | |
|
486 | 0 | bool dmvrApplied = cu.mvRefine && CU::checkDMVRCondition( cu ); |
487 | 0 | if( cu.lumaSize().maxDim() > MAX_BDOF_APPLICATION_REGION && cu.mergeType != MRG_TYPE_SUBPU_ATMVP && bdofApplied && !dmvrApplied ) |
488 | 0 | { |
489 | 0 | xSubPuBDOF( cu, predBuf, refPicList ); |
490 | 0 | } |
491 | 0 | else if (cu.mergeType != MRG_TYPE_DEFAULT_N && cu.mergeType != MRG_TYPE_IBC) |
492 | 0 | { |
493 | 0 | xSubPuMC(cu, predBuf, refPicList); |
494 | 0 | } |
495 | 0 | else if( xCheckIdenticalMotion( cu ) ) |
496 | 0 | { |
497 | 0 | xPredInterUni( cu, REF_PIC_LIST_0, predBuf, false, false ); |
498 | |
|
499 | 0 | if( predBufDfltWght ) |
500 | 0 | { |
501 | 0 | predBufDfltWght->copyFrom( predBuf ); |
502 | 0 | } |
503 | 0 | } |
504 | 0 | else if( dmvrApplied ) |
505 | 0 | { |
506 | 0 | xProcessDMVR( cu, predBuf, cu.slice->clpRngs, bdofApplied ); |
507 | 0 | } |
508 | 0 | else |
509 | 0 | { |
510 | 0 | xPredInterBi( cu, predBuf, bdofApplied, predBufDfltWght ); |
511 | 0 | } |
512 | |
|
513 | 0 | DTRACE( g_trace_ctx, D_MOT_COMP, "BDOF=%d, DMVR=%d\n", bdofApplied, dmvrApplied ); |
514 | 0 | ret = bdofApplied || dmvrApplied; |
515 | 0 | } |
516 | | |
517 | 0 | DTRACE( g_trace_ctx, D_MOT_COMP, "MV=%d,%d\n", cu.mv[0][0].hor, cu.mv[0][0].ver ); |
518 | 0 | DTRACE( g_trace_ctx, D_MOT_COMP, "MV=%d,%d\n", cu.mv[1][0].hor, cu.mv[1][0].ver ); |
519 | 0 | DTRACE_PEL_BUF( D_MOT_COMP, predBuf.Y(), cu, cu.predMode, COMP_Y ); |
520 | 0 | if( cu.chromaFormat != VVENC_CHROMA_400 ) |
521 | 0 | { |
522 | 0 | DTRACE_PEL_BUF( D_MOT_COMP, predBuf.Cb(), cu, cu.predMode, COMP_Cb ); |
523 | 0 | DTRACE_PEL_BUF( D_MOT_COMP, predBuf.Cr(), cu, cu.predMode, COMP_Cr ); |
524 | 0 | } |
525 | |
|
526 | 0 | return ret; |
527 | 0 | } |
528 | | |
529 | | void InterPrediction::xSubPuMC(CodingUnit& cu, PelUnitBuf& predBuf, const RefPicList& eRefPicList /*= REF_PIC_LIST_X*/) |
530 | 0 | { |
531 | 0 | Position puPos = cu.lumaPos(); |
532 | 0 | Size puSize = cu.lumaSize(); |
533 | |
|
534 | 0 | int numPartLine = std::max( puSize.width >> ATMVP_SUB_BLOCK_SIZE, 1u ); |
535 | 0 | int numPartCol = std::max( puSize.height >> ATMVP_SUB_BLOCK_SIZE, 1u ); |
536 | 0 | int puHeight = numPartCol == 1 ? puSize.height : 1 << ATMVP_SUB_BLOCK_SIZE; |
537 | 0 | int puWidth = numPartLine == 1 ? puSize.width : 1 << ATMVP_SUB_BLOCK_SIZE; |
538 | |
|
539 | 0 | CodingUnit subCu = cu; |
540 | 0 | subCu.cs = cu.cs; |
541 | 0 | subCu.mergeType = MRG_TYPE_DEFAULT_N; |
542 | |
|
543 | 0 | bool isAffine = cu.affine; |
544 | 0 | subCu.affine = false; |
545 | | |
546 | | // join sub-pus containing the same motion |
547 | 0 | bool verMC = puSize.height > puSize.width; |
548 | 0 | int fstStart = (!verMC ? puPos.y : puPos.x); |
549 | 0 | int secStart = (!verMC ? puPos.x : puPos.y); |
550 | 0 | int fstEnd = (!verMC ? puPos.y + puSize.height : puPos.x + puSize.width); |
551 | 0 | int secEnd = (!verMC ? puPos.x + puSize.width : puPos.y + puSize.height); |
552 | 0 | int fstStep = (!verMC ? puHeight : puWidth); |
553 | 0 | int secStep = (!verMC ? puWidth : puHeight); |
554 | |
|
555 | 0 | cu.refIdx[0] = 0; |
556 | 0 | cu.refIdx[1] = cu.cs->slice->sliceType == VVENC_B_SLICE ? 0 : -1; |
557 | 0 | bool scaled = false;//!CU::isRefPicSameSize(cu); |
558 | |
|
559 | 0 | m_subPuMC = true; |
560 | |
|
561 | 0 | for (int fstDim = fstStart; fstDim < fstEnd; fstDim += fstStep) |
562 | 0 | { |
563 | 0 | for (int secDim = secStart; secDim < secEnd; secDim += secStep) |
564 | 0 | { |
565 | 0 | int x = !verMC ? secDim : fstDim; |
566 | 0 | int y = !verMC ? fstDim : secDim; |
567 | 0 | const MotionInfo &curMi = cu.getMotionInfo(Position{ x, y }); |
568 | |
|
569 | 0 | int length = secStep; |
570 | 0 | int later = secDim + secStep; |
571 | |
|
572 | 0 | while (later < secEnd) |
573 | 0 | { |
574 | 0 | const MotionInfo &laterMi = !verMC ? cu.getMotionInfo(Position{ later, fstDim }) : cu.getMotionInfo(Position{ fstDim, later }); |
575 | 0 | if (!scaled && laterMi == curMi) |
576 | 0 | { |
577 | 0 | length += secStep; |
578 | 0 | } |
579 | 0 | else |
580 | 0 | { |
581 | 0 | break; |
582 | 0 | } |
583 | 0 | later += secStep; |
584 | 0 | } |
585 | 0 | int dx = !verMC ? length : puWidth; |
586 | 0 | int dy = !verMC ? puHeight : length; |
587 | |
|
588 | 0 | subCu.UnitArea::operator=(UnitArea(cu.chromaFormat, Area(x, y, dx, dy))); |
589 | 0 | subCu = curMi; |
590 | 0 | PelUnitBuf subPredBuf = predBuf.subBuf(UnitAreaRelative(cu, subCu)); |
591 | 0 | subCu.mcControl = (cu.mcControl >> 1) << 1; |
592 | 0 | subCu.mvRefine = false; |
593 | 0 | motionCompensation(subCu, subPredBuf, eRefPicList); |
594 | 0 | secDim = later - secStep; |
595 | 0 | } |
596 | 0 | } |
597 | 0 | m_subPuMC = false; |
598 | |
|
599 | 0 | cu.affine = isAffine; |
600 | 0 | } |
601 | | |
602 | | static inline int xRightShiftMSB( int numer, int denom ) |
603 | 0 | { |
604 | 0 | return numer >> floorLog2( denom ); |
605 | 0 | } |
606 | | |
607 | | void xFpBiDirOptFlowCore( const Pel* srcY0, const Pel* srcY1, const Pel* gradX0, const Pel* gradX1, const Pel* gradY0, |
608 | | const Pel* gradY1, const int width, const int height, Pel* dstY, const ptrdiff_t dstStride, |
609 | | const int shiftNum, const int offset, const int limit, const ClpRng& clpRng, |
610 | | const int bitDepth ) |
611 | 0 | { |
612 | 0 | int xUnit = width >> 2; |
613 | 0 | int yUnit = height >> 2; |
614 | 0 | ptrdiff_t widthG = width + 2 * BDOF_EXTEND_SIZE; |
615 | |
|
616 | 0 | ptrdiff_t offsetPos = widthG * BDOF_EXTEND_SIZE + BDOF_EXTEND_SIZE; |
617 | 0 | ptrdiff_t stridePredMC = widthG + 2; |
618 | |
|
619 | 0 | const ptrdiff_t src0Stride = stridePredMC; |
620 | 0 | const ptrdiff_t src1Stride = stridePredMC; |
621 | |
|
622 | 0 | const Pel* srcY0Temp = srcY0; |
623 | 0 | const Pel* srcY1Temp = srcY1; |
624 | |
|
625 | 0 | for( int yu = 0; yu < yUnit; yu++ ) |
626 | 0 | { |
627 | 0 | for( int xu = 0; xu < xUnit; xu++ ) |
628 | 0 | { |
629 | 0 | int tmpx = 0, tmpy = 0; |
630 | 0 | int sumAbsGX = 0, sumAbsGY = 0, sumDIX = 0, sumDIY = 0; |
631 | 0 | int sumSignGY_GX = 0; |
632 | |
|
633 | 0 | const Pel* pGradX0Tmp = gradX0 + ( xu << 2 ) + ( yu << 2 ) * widthG; |
634 | 0 | const Pel* pGradX1Tmp = gradX1 + ( xu << 2 ) + ( yu << 2 ) * widthG; |
635 | 0 | const Pel* pGradY0Tmp = gradY0 + ( xu << 2 ) + ( yu << 2 ) * widthG; |
636 | 0 | const Pel* pGradY1Tmp = gradY1 + ( xu << 2 ) + ( yu << 2 ) * widthG; |
637 | 0 | const Pel* SrcY1Tmp = srcY1 + ( xu << 2 ) + ( yu << 2 ) * src1Stride; |
638 | 0 | const Pel* SrcY0Tmp = srcY0 + ( xu << 2 ) + ( yu << 2 ) * src0Stride; |
639 | |
|
640 | 0 | calcBDOFSumsCore( SrcY0Tmp, SrcY1Tmp, pGradX0Tmp, pGradX1Tmp, pGradY0Tmp, pGradY1Tmp, xu, yu, src0Stride, |
641 | 0 | src1Stride, widthG, bitDepth, &sumAbsGX, &sumAbsGY, &sumDIX, &sumDIY, &sumSignGY_GX ); |
642 | 0 | tmpx = ( sumAbsGX == 0 ? 0 : xRightShiftMSB( 4 * sumDIX, sumAbsGX ) ); |
643 | 0 | tmpx = Clip3( -limit, limit, tmpx ); |
644 | |
|
645 | 0 | const int tmpData = sumSignGY_GX * tmpx >> 1; |
646 | 0 | tmpy = ( sumAbsGY == 0 ? 0 : xRightShiftMSB( ( 4 * sumDIY - tmpData ), sumAbsGY ) ); |
647 | 0 | tmpy = Clip3( -limit, limit, tmpy ); |
648 | |
|
649 | 0 | srcY0Temp = srcY0 + ( stridePredMC + 1 ) + ( ( yu * src0Stride + xu ) << 2 ); |
650 | 0 | srcY1Temp = srcY1 + ( stridePredMC + 1 ) + ( ( yu * src0Stride + xu ) << 2 ); |
651 | 0 | pGradX0Tmp = gradX0 + offsetPos + ( ( yu * widthG + xu ) << 2 ); |
652 | 0 | pGradX1Tmp = gradX1 + offsetPos + ( ( yu * widthG + xu ) << 2 ); |
653 | 0 | pGradY0Tmp = gradY0 + offsetPos + ( ( yu * widthG + xu ) << 2 ); |
654 | 0 | pGradY1Tmp = gradY1 + offsetPos + ( ( yu * widthG + xu ) << 2 ); |
655 | |
|
656 | 0 | Pel* dstY0 = dstY + ( ( yu * dstStride + xu ) << 2 ); |
657 | 0 | addBDOFAvgCore( srcY0Temp, src0Stride, srcY1Temp, src1Stride, dstY0, dstStride, pGradX0Tmp, pGradX1Tmp, |
658 | 0 | pGradY0Tmp, pGradY1Tmp, widthG, ( 1 << 2 ), ( 1 << 2 ), tmpx, tmpy, shiftNum, offset, clpRng ); |
659 | 0 | } // xu |
660 | 0 | } // yu |
661 | 0 | } |
662 | | |
663 | | InterPredInterpolation::InterPredInterpolation() |
664 | 0 | : m_storedMv(nullptr) |
665 | 0 | , m_skipPROF(false) |
666 | 0 | , m_encOnly(false) |
667 | 0 | , m_isBi(false) |
668 | 0 | , m_ifpLines(0) |
669 | 0 | { |
670 | |
|
671 | 0 | } |
672 | | |
673 | | InterPredInterpolation::~InterPredInterpolation() |
674 | 0 | { |
675 | 0 | destroy(); |
676 | 0 | } |
677 | | |
678 | | void InterPredInterpolation::destroy() |
679 | 0 | { |
680 | 0 | for( uint32_t c = 0; c < MAX_NUM_COMP; c++ ) |
681 | 0 | { |
682 | 0 | for( uint32_t i = 0; i < LUMA_INTERPOLATION_FILTER_SUB_SAMPLE_POSITIONS_SIGNAL; i++ ) |
683 | 0 | { |
684 | 0 | for( uint32_t j = 0; j < LUMA_INTERPOLATION_FILTER_SUB_SAMPLE_POSITIONS_SIGNAL; j++ ) |
685 | 0 | { |
686 | 0 | xFree( m_filteredBlock[i][j][c] ); |
687 | 0 | m_filteredBlock[i][j][c] = nullptr; |
688 | 0 | } |
689 | |
|
690 | 0 | xFree( m_filteredBlockTmp[i][c] ); |
691 | 0 | m_filteredBlockTmp[i][c] = nullptr; |
692 | 0 | } |
693 | 0 | } |
694 | 0 | xFree(m_gradX0); m_gradX0 = nullptr; |
695 | 0 | xFree(m_gradY0); m_gradY0 = nullptr; |
696 | 0 | xFree(m_gradX1); m_gradX1 = nullptr; |
697 | 0 | xFree(m_gradY1); m_gradY1 = nullptr; |
698 | |
|
699 | 0 | if (m_storedMv != nullptr) |
700 | 0 | { |
701 | 0 | delete[] m_storedMv; |
702 | 0 | m_storedMv = nullptr; |
703 | 0 | } |
704 | 0 | } |
705 | | |
706 | | void InterPredInterpolation::init( bool enableOpt ) |
707 | 0 | { |
708 | 0 | for( uint32_t c = 0; c < MAX_NUM_COMP; c++ ) |
709 | 0 | { |
710 | 0 | int extWidth = MAX_CU_SIZE + (2 * BDOF_EXTEND_SIZE + 2) + 16; |
711 | 0 | int extHeight = MAX_CU_SIZE + (2 * BDOF_EXTEND_SIZE + 2) + 1; |
712 | 0 | extWidth = extWidth > (MAX_CU_SIZE + (2 * DMVR_NUM_ITERATION) + 16) ? extWidth : MAX_CU_SIZE + (2 * DMVR_NUM_ITERATION) + 16; |
713 | 0 | extHeight = extHeight > (MAX_CU_SIZE + (2 * DMVR_NUM_ITERATION) + 1) ? extHeight : MAX_CU_SIZE + (2 * DMVR_NUM_ITERATION) + 1; |
714 | 0 | for( uint32_t i = 0; i < LUMA_INTERPOLATION_FILTER_SUB_SAMPLE_POSITIONS_SIGNAL; i++ ) |
715 | 0 | { |
716 | 0 | m_filteredBlockTmp[i][c] = ( Pel* ) xMalloc( Pel, ( extWidth + 4 ) * ( extHeight + 7 + 4 ) ); |
717 | 0 | VALGRIND_MEMCLEAR( m_filteredBlockTmp[i][c], sizeof( Pel ) * (extWidth + 4) * (extHeight + 7 + 4) ); |
718 | |
|
719 | 0 | for( uint32_t j = 0; j < LUMA_INTERPOLATION_FILTER_SUB_SAMPLE_POSITIONS_SIGNAL; j++ ) |
720 | 0 | { |
721 | 0 | m_filteredBlock[i][j][c] = ( Pel* ) xMalloc( Pel, extWidth * extHeight ); |
722 | 0 | VALGRIND_MEMCLEAR( m_filteredBlock[i][j][c], sizeof( Pel ) * extWidth * extHeight ); |
723 | 0 | } |
724 | 0 | } |
725 | 0 | } |
726 | |
|
727 | 0 | m_gradX0 = (Pel*)xMalloc(Pel, BDOF_TEMP_BUFFER_SIZE); |
728 | 0 | m_gradY0 = (Pel*)xMalloc(Pel, BDOF_TEMP_BUFFER_SIZE); |
729 | 0 | m_gradX1 = (Pel*)xMalloc(Pel, BDOF_TEMP_BUFFER_SIZE); |
730 | 0 | m_gradY1 = (Pel*)xMalloc(Pel, BDOF_TEMP_BUFFER_SIZE); |
731 | |
|
732 | 0 | VALGRIND_MEMCLEAR( m_gradX0, sizeof( Pel ) * BDOF_TEMP_BUFFER_SIZE ); |
733 | 0 | VALGRIND_MEMCLEAR( m_gradY0, sizeof( Pel ) * BDOF_TEMP_BUFFER_SIZE ); |
734 | 0 | VALGRIND_MEMCLEAR( m_gradX1, sizeof( Pel ) * BDOF_TEMP_BUFFER_SIZE ); |
735 | 0 | VALGRIND_MEMCLEAR( m_gradY1, sizeof( Pel ) * BDOF_TEMP_BUFFER_SIZE ); |
736 | |
|
737 | 0 | m_if.initInterpolationFilter( true ); |
738 | |
|
739 | 0 | xFpBiDirOptFlow = xFpBiDirOptFlowCore; |
740 | 0 | xFpBDOFGradFilter = gradFilterCore; |
741 | 0 | xFpProfGradFilter = gradFilterCore<false>; |
742 | 0 | xFpApplyPROF = applyPROFCore; |
743 | 0 | xFpPadDmvr = padDmvrCore; |
744 | |
|
745 | 0 | if( enableOpt ) |
746 | 0 | { |
747 | 0 | #if ENABLE_SIMD_OPT_BDOF && defined( TARGET_SIMD_X86 ) |
748 | 0 | initInterPredictionX86(); |
749 | 0 | #endif |
750 | | #if ENABLE_SIMD_OPT_BDOF && defined( TARGET_SIMD_ARM ) |
751 | | initInterPredictionARM(); |
752 | | #endif |
753 | 0 | } |
754 | |
|
755 | 0 | if (m_storedMv == nullptr) |
756 | 0 | { |
757 | 0 | const int MVBUFFER_SIZE = MAX_CU_SIZE / MIN_PU_SIZE; |
758 | 0 | m_storedMv = new Mv[MVBUFFER_SIZE*MVBUFFER_SIZE]; |
759 | | #if ENABLE_VALGRIND_CODE |
760 | | for( int i = 0; i < MVBUFFER_SIZE * MVBUFFER_SIZE; i++ ) |
761 | | { |
762 | | m_storedMv[i].setZero(); |
763 | | } |
764 | | #endif |
765 | 0 | } |
766 | 0 | } |
767 | | |
768 | | void InterPredInterpolation::xPredInterBlk( const ComponentID compID, const CodingUnit &cu, |
769 | | const Picture *refPic, const Mv &_mv, PelUnitBuf &dstPic, |
770 | | const bool bi, const ClpRng &clpRng, const bool bdofApplied, const bool isIBC, const RefPicList refPicList, |
771 | | const SizeType dmvrWidth, const SizeType dmvrHeight, |
772 | | const bool bilinearMC, const Pel *srcPadBuf, const int32_t srcPadStride ) |
773 | 0 | { |
774 | 0 | const ChromaFormat chFmt = cu.chromaFormat; |
775 | 0 | const bool rndRes = !bi; |
776 | |
|
777 | 0 | int shiftHor = MV_FRACTIONAL_BITS_INTERNAL + getComponentScaleX(compID, chFmt); |
778 | 0 | int shiftVer = MV_FRACTIONAL_BITS_INTERNAL + getComponentScaleY(compID, chFmt); |
779 | |
|
780 | 0 | Mv mv(_mv); |
781 | |
|
782 | 0 | CHECKD( m_ifpLines && !srcPadBuf && cu.cs->picture != refPic && !CU::isMvInRangeFPP( cu[compID].y, cu[compID].height, mv.ver, m_ifpLines, *cu.cs->pcv, getComponentScaleY(compID, chFmt) ), "xPredInterBlk: CTU line-wise FPP MV restriction failed!\n" ); |
783 | |
|
784 | 0 | int xFrac = mv.hor & ((1 << shiftHor) - 1); |
785 | 0 | int yFrac = mv.ver & ((1 << shiftVer) - 1); |
786 | 0 | if (isIBC) |
787 | 0 | { |
788 | 0 | xFrac = yFrac = 0; |
789 | 0 | } |
790 | |
|
791 | 0 | PelBuf& dstBuf = dstPic.bufs[compID]; |
792 | 0 | unsigned width = dstBuf.width; |
793 | 0 | unsigned height = dstBuf.height; |
794 | |
|
795 | 0 | const Pel* refBufPtr; |
796 | 0 | int refBufStride; |
797 | |
|
798 | 0 | if( srcPadBuf ) |
799 | 0 | { |
800 | 0 | refBufPtr = srcPadBuf; |
801 | 0 | refBufStride = srcPadStride; |
802 | 0 | } |
803 | 0 | else |
804 | 0 | { |
805 | 0 | Position offset = cu.blocks[compID].pos().offset( mv.hor >> shiftHor, mv.ver >> shiftVer ); |
806 | 0 | refBufPtr = refPic->getRecoBufPtr ( compID ); |
807 | 0 | refBufStride = refPic->getRecoBufStride( compID ); |
808 | 0 | refBufPtr += offset.x; |
809 | 0 | refBufPtr += offset.y * refBufStride; |
810 | 0 | } |
811 | |
|
812 | 0 | if( dmvrWidth ) |
813 | 0 | { |
814 | 0 | width = dmvrWidth; |
815 | 0 | height = dmvrHeight; |
816 | 0 | } |
817 | | // backup data |
818 | 0 | const int backupWidth = width; |
819 | 0 | const int backupHeight = height; |
820 | 0 | Pel* backupDstBufPtr = dstBuf.buf; |
821 | 0 | int backupDstBufStride = dstBuf.stride; |
822 | 0 | if( bdofApplied && compID == COMP_Y ) |
823 | 0 | { |
824 | 0 | width = width + 2 * BDOF_EXTEND_SIZE + 2; |
825 | 0 | height = height + 2 * BDOF_EXTEND_SIZE + 2; |
826 | | |
827 | | // change MC output |
828 | 0 | CHECK( refPicList >= NUM_REF_PIC_LIST_01, "Wrong refpiclist" ); |
829 | 0 | dstBuf.stride = width; |
830 | 0 | dstBuf.buf = m_filteredBlockTmp[2 + refPicList][compID] + 2 * dstBuf.stride + 2; |
831 | 0 | } |
832 | 0 | bool useAltHpelIf = cu.imv == IMV_HPEL; |
833 | |
|
834 | 0 | if( bilinearMC ) |
835 | 0 | { |
836 | 0 | m_if.filterN2_2D( compID, refBufPtr, refBufStride, dstBuf.buf, dstBuf.stride, width, height, xFrac, yFrac, clpRng ); |
837 | 0 | } |
838 | 0 | else if( yFrac == 0 ) |
839 | 0 | { |
840 | 0 | m_if.filterHor(compID, refBufPtr, refBufStride, dstBuf.buf, dstBuf.stride, backupWidth, backupHeight, xFrac, rndRes, chFmt, clpRng, useAltHpelIf, 0); |
841 | 0 | } |
842 | 0 | else if( xFrac == 0 ) |
843 | 0 | { |
844 | 0 | m_if.filterVer(compID, refBufPtr, refBufStride, dstBuf.buf, dstBuf.stride, backupWidth, backupHeight, yFrac, true, rndRes, chFmt, clpRng, useAltHpelIf, 0); |
845 | 0 | } |
846 | 0 | else if( backupWidth == 4 && backupHeight == 4 ) |
847 | 0 | { |
848 | 0 | m_if.filter4x4( compID, refBufPtr, refBufStride ,(Pel*)dstBuf.buf, dstBuf.stride, 4, 4, xFrac, yFrac, rndRes, chFmt, clpRng, useAltHpelIf ); |
849 | 0 | } |
850 | 0 | else if( backupWidth == 16 ) |
851 | 0 | { |
852 | 0 | m_if.filter16xH( compID, refBufPtr, refBufStride, dstBuf.buf, dstBuf.stride, 16, backupHeight, xFrac, yFrac, rndRes, chFmt, clpRng, useAltHpelIf ); |
853 | 0 | } |
854 | 0 | else if( backupWidth == 8 ) |
855 | 0 | { |
856 | 0 | m_if.filter8xH( compID, refBufPtr, refBufStride, dstBuf.buf, dstBuf.stride, 8, backupHeight, xFrac, yFrac, rndRes, chFmt, clpRng, useAltHpelIf ); |
857 | 0 | } |
858 | 0 | else |
859 | 0 | { |
860 | 0 | const int vFilterSize = isLuma( compID ) ? NTAPS_LUMA : NTAPS_CHROMA; |
861 | |
|
862 | 0 | PelBuf tmpBuf( m_filteredBlockTmp[0][compID], dmvrWidth ? dmvrWidth : dstBuf.stride, dmvrWidth ? Size( dmvrWidth, dmvrHeight ) : cu.blocks[compID].size() ); |
863 | |
|
864 | 0 | m_if.filterHor(compID, refBufPtr - ((vFilterSize >> 1) - 1) * refBufStride, refBufStride, tmpBuf.buf, tmpBuf.stride, backupWidth, backupHeight + vFilterSize - 1, xFrac, false, chFmt, clpRng, useAltHpelIf, 0); |
865 | 0 | m_if.filterVer(compID, tmpBuf.buf + ((vFilterSize >> 1) - 1) * tmpBuf.stride, tmpBuf.stride, dstBuf.buf, dstBuf.stride, backupWidth, backupHeight, yFrac, false, rndRes, chFmt, clpRng, useAltHpelIf, 0); |
866 | 0 | } |
867 | |
|
868 | 0 | if (bdofApplied && compID == COMP_Y) |
869 | 0 | { |
870 | 0 | const unsigned shift = std::max<int>(2, (IF_INTERNAL_PREC - clpRng.bd)); |
871 | 0 | int xOffset = (xFrac < 8) ? 1 : 0; |
872 | 0 | int yOffset = (yFrac < 8) ? 1 : 0; |
873 | 0 | const Pel* refPel = refBufPtr - yOffset * refBufStride - xOffset; |
874 | 0 | Pel* dstPel = m_filteredBlockTmp[2 + refPicList][compID] + dstBuf.stride + 1; |
875 | 0 | for (int w = 0; w < (width - 2 * BDOF_EXTEND_SIZE); w++) |
876 | 0 | { |
877 | 0 | Pel val = leftShiftU(refPel[w], shift); |
878 | 0 | dstPel[w] = val - (Pel)IF_INTERNAL_OFFS; |
879 | 0 | } |
880 | |
|
881 | 0 | refPel = refBufPtr + (1 - yOffset)*refBufStride - xOffset; |
882 | 0 | dstPel = m_filteredBlockTmp[2 + refPicList][compID] + 2 * dstBuf.stride + 1; |
883 | 0 | for (int h = 0; h < (height - 2 * BDOF_EXTEND_SIZE - 2); h++) |
884 | 0 | { |
885 | 0 | Pel val = leftShiftU(refPel[0], shift); |
886 | 0 | dstPel[0] = val - (Pel)IF_INTERNAL_OFFS; |
887 | |
|
888 | 0 | val = leftShiftU(refPel[width - 3], shift); |
889 | 0 | dstPel[width - 3] = val - (Pel)IF_INTERNAL_OFFS; |
890 | |
|
891 | 0 | refPel += refBufStride; |
892 | 0 | dstPel += dstBuf.stride; |
893 | 0 | } |
894 | |
|
895 | 0 | refPel = refBufPtr + (height - 2 * BDOF_EXTEND_SIZE - 2 + 1 - yOffset)*refBufStride - xOffset; |
896 | 0 | dstPel = m_filteredBlockTmp[2 + refPicList][compID] + (height - 2 * BDOF_EXTEND_SIZE)*dstBuf.stride + 1; |
897 | 0 | for (int w = 0; w < (width - 2 * BDOF_EXTEND_SIZE); w++) |
898 | 0 | { |
899 | 0 | Pel val = leftShiftU(refPel[w], shift); |
900 | 0 | dstPel[w] = val - (Pel)IF_INTERNAL_OFFS; |
901 | 0 | } |
902 | | |
903 | | // restore data |
904 | 0 | width = backupWidth; |
905 | 0 | height = backupHeight; |
906 | 0 | dstBuf.buf = backupDstBufPtr; |
907 | 0 | dstBuf.stride = backupDstBufStride; |
908 | 0 | } |
909 | 0 | } |
910 | | |
911 | | void InterPredInterpolation::xApplyBDOF( PelBuf& yuvDst, const ClpRng& clpRng ) |
912 | 0 | { |
913 | 0 | const int bitDepth = clpRng.bd; |
914 | |
|
915 | 0 | const int height = yuvDst.height; |
916 | 0 | const int width = yuvDst.width; |
917 | 0 | int heightG = height + 2 * BDOF_EXTEND_SIZE; |
918 | 0 | int widthG = width + 2 * BDOF_EXTEND_SIZE; |
919 | |
|
920 | 0 | Pel* gradX0 = m_gradX0; |
921 | 0 | Pel* gradX1 = m_gradX1; |
922 | 0 | Pel* gradY0 = m_gradY0; |
923 | 0 | Pel* gradY1 = m_gradY1; |
924 | |
|
925 | 0 | int stridePredMC = widthG + 2; |
926 | 0 | const Pel* srcY0 = m_filteredBlockTmp[2][COMP_Y] + stridePredMC + 1; |
927 | 0 | const Pel* srcY1 = m_filteredBlockTmp[3][COMP_Y] + stridePredMC + 1; |
928 | |
|
929 | 0 | Pel* dstY = yuvDst.buf; |
930 | 0 | const int dstStride = yuvDst.stride; |
931 | |
|
932 | 0 | for (int refList = 0; refList < NUM_REF_PIC_LIST_01; refList++) |
933 | 0 | { |
934 | 0 | Pel* dstTempPtr = m_filteredBlockTmp[2 + refList][COMP_Y] + stridePredMC + 1; |
935 | 0 | Pel* gradY = (refList == 0) ? m_gradY0 : m_gradY1; |
936 | 0 | Pel* gradX = (refList == 0) ? m_gradX0 : m_gradX1; |
937 | |
|
938 | 0 | xFpBDOFGradFilter(dstTempPtr, stridePredMC, widthG, heightG, widthG, gradX, gradY, bitDepth ); |
939 | 0 | Pel* padStr = m_filteredBlockTmp[2 + refList][COMP_Y] + 2 * stridePredMC + 2; |
940 | 0 | for (int y = 0; y< height; y++) |
941 | 0 | { |
942 | 0 | padStr[-1] = padStr[0]; |
943 | 0 | padStr[width] = padStr[width - 1]; |
944 | 0 | padStr += stridePredMC; |
945 | 0 | } |
946 | |
|
947 | 0 | padStr = m_filteredBlockTmp[2 + refList][COMP_Y] + 2 * stridePredMC + 1; |
948 | 0 | ::memcpy(padStr - stridePredMC, padStr, sizeof(Pel)*(widthG)); |
949 | 0 | ::memcpy(padStr + height*stridePredMC, padStr + (height - 1)*stridePredMC, sizeof(Pel)*(widthG)); |
950 | 0 | } |
951 | |
|
952 | 0 | const unsigned shiftNum = IF_INTERNAL_PREC + 1 - bitDepth; |
953 | 0 | const int offset = (1 << (shiftNum - 1)) + 2 * IF_INTERNAL_OFFS; |
954 | 0 | const int limit = (1 << 4) - 1; |
955 | |
|
956 | 0 | xFpBiDirOptFlow( srcY0, srcY1, gradX0, gradX1, gradY0, gradY1, width, height, dstY, dstStride, shiftNum, offset, |
957 | 0 | limit, clpRng, bitDepth ); |
958 | 0 | } |
959 | | |
960 | | void InterPredInterpolation::xWeightedAverage( const CodingUnit& cu, const CPelUnitBuf& pcYuvSrc0, const CPelUnitBuf& pcYuvSrc1, PelUnitBuf& pcYuvDst, const bool bdofApplied, PelUnitBuf *yuvPredTmp ) |
961 | 0 | { |
962 | 0 | const bool lumaOnly = cu.mccNoChroma(); |
963 | 0 | const bool chromaOnly = cu.mccNoLuma (); |
964 | |
|
965 | 0 | CHECK( chromaOnly && lumaOnly, "should not happen" ); |
966 | |
|
967 | 0 | const ClpRngs& clpRngs = cu.slice->clpRngs; |
968 | 0 | const int iRefIdx0 = cu.refIdx[0]; |
969 | 0 | const int iRefIdx1 = cu.refIdx[1]; |
970 | |
|
971 | 0 | if( iRefIdx0 >= 0 && iRefIdx1 >= 0 ) |
972 | 0 | { |
973 | 0 | if( cu.BcwIdx != BCW_DEFAULT && ( yuvPredTmp || !cu.ciip ) ) |
974 | 0 | { |
975 | 0 | CHECK( bdofApplied, "BCW is disallowed with BIO" ); |
976 | |
|
977 | 0 | pcYuvDst.addWeightedAvg( pcYuvSrc0, pcYuvSrc1, clpRngs, cu.BcwIdx, chromaOnly, lumaOnly ); |
978 | |
|
979 | 0 | if( yuvPredTmp ) |
980 | 0 | { |
981 | 0 | yuvPredTmp->addAvg( pcYuvSrc0, pcYuvSrc1, clpRngs, chromaOnly, lumaOnly ); |
982 | 0 | } |
983 | 0 | return; |
984 | 0 | } |
985 | | |
986 | 0 | if( bdofApplied && !chromaOnly ) |
987 | 0 | { |
988 | 0 | xApplyBDOF( pcYuvDst.Y(), clpRngs[COMP_Y] ); |
989 | 0 | } |
990 | 0 | if( !bdofApplied && ( lumaOnly || chromaOnly ) ) |
991 | 0 | { |
992 | 0 | pcYuvDst.addAvg( pcYuvSrc0, pcYuvSrc1, clpRngs, chromaOnly, lumaOnly ); |
993 | 0 | } |
994 | 0 | else |
995 | 0 | { |
996 | 0 | pcYuvDst.addAvg( pcYuvSrc0, pcYuvSrc1, clpRngs, bdofApplied ); |
997 | 0 | } |
998 | 0 | } |
999 | 0 | else |
1000 | 0 | { |
1001 | 0 | if( cu.geo ) |
1002 | 0 | { |
1003 | 0 | pcYuvDst.copyFrom( iRefIdx0 >= 0 ? pcYuvSrc0 : pcYuvSrc1 ); |
1004 | 0 | } |
1005 | 0 | else |
1006 | 0 | { |
1007 | 0 | pcYuvDst.copyClip( iRefIdx0 >= 0 ? pcYuvSrc0 : pcYuvSrc1, clpRngs, lumaOnly, chromaOnly ); |
1008 | 0 | } |
1009 | 0 | } |
1010 | 0 | } |
1011 | | |
1012 | | void InterPrediction::motionCompensationGeo( CodingUnit &cu, PelUnitBuf &predBuf, const MergeCtx &geoMrgCtx ) |
1013 | 0 | { |
1014 | 0 | const ClpRngs &clpRngs = cu.slice->clpRngs; |
1015 | 0 | const UnitArea localUnitArea( cu.chromaFormat, Area( 0, 0, cu.lwidth(), cu.lheight() ) ); |
1016 | |
|
1017 | 0 | PelUnitBuf tmpGeoBuf0 = m_geoPartBuf[0].getBuf( localUnitArea ); |
1018 | 0 | PelUnitBuf tmpGeoBuf1 = m_geoPartBuf[1].getBuf( localUnitArea ); |
1019 | |
|
1020 | 0 | geoMrgCtx.setMergeInfo( cu, cu.geoMergeIdx[0] ); |
1021 | 0 | CU::spanMotionInfo ( cu ); |
1022 | 0 | motionCompensation ( cu, tmpGeoBuf0, REF_PIC_LIST_X ); // TODO: check 4:0:0 interaction with weighted prediction. |
1023 | |
|
1024 | 0 | geoMrgCtx.setMergeInfo( cu, cu.geoMergeIdx[1] ); |
1025 | 0 | CU::spanMotionInfo ( cu ); |
1026 | 0 | motionCompensation ( cu, tmpGeoBuf1, REF_PIC_LIST_X ); // TODO: check 4:0:0 interaction with weighted prediction. |
1027 | |
|
1028 | 0 | weightedGeoBlk( clpRngs, cu, cu.geoSplitDir, isChromaEnabled( cu.chromaFormat ) ? MAX_NUM_CH : CH_L, predBuf, tmpGeoBuf0, tmpGeoBuf1 ); |
1029 | 0 | } |
1030 | | |
1031 | | void InterPredInterpolation::weightedGeoBlk(const ClpRngs &clpRngs, CodingUnit& cu, const uint8_t splitDir, |
1032 | | int32_t channel, PelUnitBuf &predDst, PelUnitBuf &predSrc0, PelUnitBuf &predSrc1) |
1033 | 0 | { |
1034 | 0 | if( channel != CH_C ) |
1035 | 0 | { |
1036 | 0 | m_if.weightedGeoBlk( clpRngs, cu, cu.lumaSize().width, cu.lumaSize().height, COMP_Y, splitDir, predDst, predSrc0, predSrc1 ); |
1037 | 0 | } |
1038 | |
|
1039 | 0 | if( channel != CH_L && isChromaEnabled( cu.chromaFormat ) ) |
1040 | 0 | { |
1041 | 0 | m_if.weightedGeoBlk( clpRngs, cu, cu.chromaSize().width, cu.chromaSize().height, COMP_Cb, splitDir, predDst, predSrc0, predSrc1 ); |
1042 | 0 | m_if.weightedGeoBlk( clpRngs, cu, cu.chromaSize().width, cu.chromaSize().height, COMP_Cr, splitDir, predDst, predSrc0, predSrc1 ); |
1043 | 0 | } |
1044 | 0 | } |
1045 | | |
1046 | 0 | DMVR::DMVR() : m_pcRdCost( nullptr ) |
1047 | 0 | { |
1048 | 0 | } |
1049 | | |
1050 | | DMVR::~DMVR() |
1051 | 0 | { |
1052 | 0 | destroy(); |
1053 | 0 | } |
1054 | | |
1055 | | void DMVR::destroy() |
1056 | 0 | { |
1057 | 0 | for( int i = 0; i < NUM_REF_PIC_LIST_01; i++ ) |
1058 | 0 | { |
1059 | 0 | m_yuvPred[i].destroy(); |
1060 | 0 | m_yuvPad[i].destroy(); |
1061 | 0 | m_yuvTmp[i].destroy(); |
1062 | 0 | } |
1063 | 0 | m_pcRdCost = nullptr; |
1064 | 0 | } |
1065 | | |
1066 | | void DMVR::init( RdCost* pcRdCost, const ChromaFormat chFormat ) |
1067 | 0 | { |
1068 | 0 | if( m_pcRdCost == nullptr ) |
1069 | 0 | { |
1070 | 0 | m_pcRdCost = pcRdCost; |
1071 | |
|
1072 | 0 | Area predArea = Area( 0, 0, DMVR_SUBCU_SIZE, DMVR_SUBCU_SIZE ); |
1073 | 0 | Area refArea = Area( 0, 0, MAX_CU_SIZE, MAX_CU_SIZE ); |
1074 | 0 | for( int i = 0; i < NUM_REF_PIC_LIST_01; i++ ) |
1075 | 0 | { |
1076 | 0 | m_yuvPred[i].create( chFormat, predArea ); |
1077 | 0 | m_yuvTmp[i].create( CHROMA_400, refArea, 0, DMVR_NUM_ITERATION ); |
1078 | 0 | m_yuvPad[i].create( chFormat, predArea, 0, DMVR_NUM_ITERATION + (NTAPS_LUMA>>1), 32 ); |
1079 | | // the buffer m_yuvPad[i].bufs[0].buf is aligned to 32 |
1080 | | // the actual begin of the written to buffer is m_yuvPad[i].bufs[0].buf - 3 * stride - 3 = m_yuvPad[i].bufs[0].buf - 99, |
1081 | | // which is not aligned with int. Since the margin on the left side is 1 sample too big, moving the buffer within the |
1082 | | // allocated memory 1 to the left doesn't cause problems |
1083 | 0 | m_yuvPad[i].bufs[0].buf--; |
1084 | 0 | } |
1085 | 0 | } |
1086 | 0 | } |
1087 | | |
1088 | | void DMVR::xCopyAndPad( const CodingUnit& cu, PelUnitBuf& pcPad, RefPicList refId, bool forLuma) |
1089 | 0 | { |
1090 | 0 | int width, height; |
1091 | 0 | Mv cMv; |
1092 | |
|
1093 | 0 | const Picture* refPic = cu.slice->getRefPic(refId, cu.refIdx[refId]); |
1094 | |
|
1095 | 0 | static constexpr int mvShift = MV_FRACTIONAL_BITS_INTERNAL; |
1096 | |
|
1097 | 0 | const int start = forLuma ? 0 : 1; |
1098 | 0 | const int end = forLuma ? 1 : MAX_NUM_COMP; |
1099 | |
|
1100 | 0 | for (int compID = start; compID < end; compID++) |
1101 | 0 | { |
1102 | 0 | int filtersize = compID == COMP_Y ? NTAPS_LUMA : NTAPS_CHROMA; |
1103 | 0 | cMv = cu.mv[refId][0]; |
1104 | 0 | width = pcPad.bufs[compID].width; |
1105 | 0 | height = pcPad.bufs[compID].height; |
1106 | |
|
1107 | 0 | int mvshiftTemp = mvShift + getComponentScaleX((ComponentID)compID, cu.chromaFormat); |
1108 | |
|
1109 | 0 | width += filtersize - 1; |
1110 | 0 | height += filtersize - 1; |
1111 | 0 | cMv += Mv(-(((filtersize >> 1) - 1) << mvshiftTemp), -(((filtersize >> 1) - 1) << mvshiftTemp)); |
1112 | |
|
1113 | 0 | clipMv(cMv, cu.lumaPos(), cu.lumaSize(), *cu.cs->pcv); |
1114 | | |
1115 | | /* Pre-fetch similar to HEVC*/ |
1116 | 0 | { |
1117 | 0 | CPelBuf refBuf = refPic->getRecoBuf(ComponentID(compID)); |
1118 | 0 | Position Rec_offset = cu.blocks[compID].pos().offset(cMv.hor >> mvshiftTemp, cMv.ver >> mvshiftTemp); |
1119 | 0 | const Pel* refBufPtr = refBuf.bufAt(Rec_offset); |
1120 | |
|
1121 | 0 | PelBuf& dstBuf = pcPad.bufs[compID]; |
1122 | |
|
1123 | 0 | const int leftTopFilterExt = ((filtersize >> 1) - 1); |
1124 | 0 | const int padOffset = leftTopFilterExt * dstBuf.stride + leftTopFilterExt; |
1125 | 0 | const int padSize = (DMVR_NUM_ITERATION) >> getComponentScaleX((ComponentID)compID, cu.chromaFormat); |
1126 | |
|
1127 | 0 | xFpPadDmvr( refBufPtr, refBuf.stride, dstBuf.buf - padOffset, dstBuf.stride, width, height, padSize ); |
1128 | 0 | } |
1129 | 0 | } |
1130 | 0 | } |
1131 | | |
1132 | | inline int32_t div_for_maxq7(int64_t N, int64_t D) |
1133 | 0 | { |
1134 | 0 | int32_t sign, q; |
1135 | 0 | sign = 0; |
1136 | 0 | if (N < 0) |
1137 | 0 | { |
1138 | 0 | sign = 1; |
1139 | 0 | N = -N; |
1140 | 0 | } |
1141 | |
|
1142 | 0 | q = 0; |
1143 | 0 | D = (D << 3); |
1144 | 0 | if (N >= D) |
1145 | 0 | { |
1146 | 0 | N -= D; |
1147 | 0 | q++; |
1148 | 0 | } |
1149 | 0 | q = (q << 1); |
1150 | |
|
1151 | 0 | D = (D >> 1); |
1152 | 0 | if (N >= D) |
1153 | 0 | { |
1154 | 0 | N -= D; |
1155 | 0 | q++; |
1156 | 0 | } |
1157 | 0 | q = (q << 1); |
1158 | |
|
1159 | 0 | if (N >= (D >> 1)) |
1160 | 0 | q++; |
1161 | |
|
1162 | 0 | if (sign) |
1163 | 0 | return (-q); |
1164 | 0 | return(q); |
1165 | 0 | } |
1166 | | |
1167 | | void xSubPelErrorSrfc(uint64_t *sadBuffer, int32_t *deltaMv) |
1168 | 0 | { |
1169 | 0 | for( int hv = 0; hv < 2; hv++) |
1170 | 0 | { |
1171 | 0 | const int32_t mvSubPelLvl = 4;/*1: half pel, 2: Qpel, 3:1/8, 4: 1/16*/ |
1172 | 0 | int64_t numerator = (int64_t)((sadBuffer[hv+1] - sadBuffer[hv+3]) << mvSubPelLvl); |
1173 | 0 | int64_t denominator = (int64_t)((sadBuffer[hv+1] + sadBuffer[hv+3] - (sadBuffer[0] << 1))); |
1174 | |
|
1175 | 0 | if (0 != denominator) |
1176 | 0 | { |
1177 | 0 | if ((sadBuffer[hv+1] != sadBuffer[0]) && (sadBuffer[hv+3] != sadBuffer[0])) |
1178 | 0 | { |
1179 | 0 | deltaMv[hv] = div_for_maxq7(numerator, denominator); |
1180 | 0 | } |
1181 | 0 | else |
1182 | 0 | { |
1183 | 0 | deltaMv[hv] = (sadBuffer[hv+1] == sadBuffer[0]) ? -8 : 8; |
1184 | 0 | } |
1185 | 0 | } |
1186 | 0 | } |
1187 | 0 | } |
1188 | | |
1189 | | void DMVR::xFinalPaddedMCForDMVR( const CodingUnit& cu, PelUnitBuf* dstBuf, const PelUnitBuf *refBuf, const bool bioApplied, const Mv mergeMv[NUM_REF_PIC_LIST_01], const Mv& refMv ) |
1190 | 0 | { |
1191 | 0 | int mvShift = MV_FRACTIONAL_BITS_INTERNAL; |
1192 | 0 | Mv mv[2]; |
1193 | 0 | mv[L0] = mergeMv[L0] + refMv; mv[L0].clipToStorageBitDepth(); |
1194 | 0 | mv[L1] = mergeMv[L1] - refMv; mv[L1].clipToStorageBitDepth(); |
1195 | |
|
1196 | 0 | for (int k = 0; k < NUM_REF_PIC_LIST_01; k++) |
1197 | 0 | { |
1198 | 0 | RefPicList refId = (RefPicList)k; |
1199 | 0 | const Mv& cMv = mv[refId]; |
1200 | 0 | Mv cMvClipped( cMv ); |
1201 | 0 | clipMv(cMvClipped, cu.lumaPos(), cu.lumaSize(), *cu.cs->pcv); |
1202 | 0 | const Picture* refPic = cu.slice->getRefPic(refId, cu.refIdx[refId]); |
1203 | 0 | const Mv& startMv = mergeMv[refId]; |
1204 | 0 | for (int compID = 0; compID < getNumberValidComponents(cu.chromaFormat); compID++) |
1205 | 0 | { |
1206 | 0 | int mvshiftTemp = mvShift + getComponentScaleX((ComponentID)compID, cu.chromaFormat); |
1207 | 0 | int deltaIntMvX = (cMv.hor >> mvshiftTemp) - (startMv.hor >> mvshiftTemp); |
1208 | 0 | int deltaIntMvY = (cMv.ver >> mvshiftTemp) - (startMv.ver >> mvshiftTemp); |
1209 | |
|
1210 | 0 | CHECK((abs(deltaIntMvX) > DMVR_NUM_ITERATION) || (abs(deltaIntMvY) > DMVR_NUM_ITERATION), "not expected DMVR movement"); |
1211 | |
|
1212 | 0 | if (deltaIntMvX || deltaIntMvY) |
1213 | 0 | { |
1214 | 0 | const PelBuf& srcBuf = refBuf[refId].bufs[compID]; |
1215 | 0 | int offset = (deltaIntMvY)*srcBuf.stride + (deltaIntMvX); |
1216 | |
|
1217 | 0 | xPredInterBlk( ( ComponentID ) compID, cu, nullptr, cMvClipped, dstBuf[refId], true, cu.cs->slice->clpRngs[compID], bioApplied, false, refId, 0, 0, 0, srcBuf.buf + offset, srcBuf.stride ); |
1218 | 0 | } |
1219 | 0 | else |
1220 | 0 | { |
1221 | 0 | xPredInterBlk( ( ComponentID ) compID, cu, refPic, cMvClipped, dstBuf[refId], true, cu.cs->slice->clpRngs[compID], bioApplied, false, refId ); |
1222 | 0 | } |
1223 | 0 | } |
1224 | 0 | } |
1225 | 0 | } |
1226 | | |
1227 | | static void xDMVRSubPixelErrorSurface( int16_t *totalDeltaMV, int16_t *deltaMV, uint64_t *pSADsArray ) |
1228 | 0 | { |
1229 | 0 | int sadStride = (((2 * DMVR_NUM_ITERATION) + 1)); |
1230 | 0 | uint64_t sadbuffer[5]; |
1231 | 0 | if( ( abs( totalDeltaMV[ 0 ] ) != ( 2 << MV_FRACTIONAL_BITS_INTERNAL ) ) |
1232 | 0 | && ( abs( totalDeltaMV[ 1 ] ) != ( 2 << MV_FRACTIONAL_BITS_INTERNAL ) ) ) |
1233 | 0 | { |
1234 | 0 | int32_t tempDeltaMv[2] = { 0,0 }; |
1235 | 0 | sadbuffer[0] = pSADsArray[0]; |
1236 | 0 | sadbuffer[1] = pSADsArray[-1]; |
1237 | 0 | sadbuffer[2] = pSADsArray[-sadStride]; |
1238 | 0 | sadbuffer[3] = pSADsArray[1]; |
1239 | 0 | sadbuffer[4] = pSADsArray[sadStride]; |
1240 | 0 | xSubPelErrorSrfc(sadbuffer, tempDeltaMv); |
1241 | 0 | totalDeltaMV[0] += tempDeltaMv[0]; |
1242 | 0 | totalDeltaMV[1] += tempDeltaMv[1]; |
1243 | 0 | } |
1244 | 0 | } |
1245 | | |
1246 | | void DMVR::xProcessDMVR( const CodingUnit& cu, PelUnitBuf& pcYuvDst, const ClpRngs &clpRngs, const bool bioApplied ) |
1247 | 0 | { |
1248 | 0 | PROFILER_SCOPE_AND_STAGE_EXT( 1, _TPROF, P_INTER_MRG_DMVR, cu.cs, CH_L ); |
1249 | | /*Always High Precision*/ |
1250 | 0 | const int csx = getChannelTypeScaleX( CH_C, cu.chromaFormat ); |
1251 | 0 | const int csy = getChannelTypeScaleY( CH_C, cu.chromaFormat ); |
1252 | 0 | const int mvShift = MV_FRACTIONAL_BITS_INTERNAL; |
1253 | 0 | const int mvShiftC = mvShift + csx; |
1254 | | |
1255 | | /*use merge MV as starting MV*/ |
1256 | 0 | const Mv mergeMv[] = { cu.mv[REF_PIC_LIST_0][0], cu.mv[REF_PIC_LIST_1][0] }; |
1257 | | |
1258 | |
|
1259 | 0 | const int dy = std::min<int>(cu.lumaSize().height, DMVR_SUBCU_SIZE); |
1260 | 0 | const int dx = std::min<int>(cu.lumaSize().width, DMVR_SUBCU_SIZE); |
1261 | |
|
1262 | 0 | const Position& puPos = cu.lumaPos(); |
1263 | |
|
1264 | 0 | bool bioAppliedType[MAX_NUM_SUBCU_DMVR]; |
1265 | | |
1266 | | // Do refinement search |
1267 | 0 | { |
1268 | 0 | const int bilinearBufStride = (cu.Y().width + (2 * DMVR_NUM_ITERATION)); |
1269 | 0 | const int padSize = DMVR_NUM_ITERATION << 1; |
1270 | 0 | const int dstOffset = -( DMVR_NUM_ITERATION * bilinearBufStride + DMVR_NUM_ITERATION ); |
1271 | | |
1272 | | /*use merge MV as starting MV*/ |
1273 | 0 | Mv mergeMVL0 = cu.mv[L0][0]; |
1274 | 0 | Mv mergeMVL1 = cu.mv[L1][0]; |
1275 | | |
1276 | | /*Clip the starting MVs*/ |
1277 | 0 | clipMv(mergeMVL0, cu.lumaPos(), cu.lumaSize(), *cu.cs->pcv); |
1278 | 0 | clipMv(mergeMVL1, cu.lumaPos(), cu.lumaSize(), *cu.cs->pcv); |
1279 | | |
1280 | | /*L0 MC for refinement*/ |
1281 | 0 | { |
1282 | 0 | const Picture* refPic = cu.slice->getRefPic(L0, cu.refIdx[L0]); |
1283 | |
|
1284 | 0 | PelUnitBuf yuvTmp = PelUnitBuf(cu.chromaFormat, PelBuf(m_yuvTmp[L0].getBuf(COMP_Y).buf + dstOffset, bilinearBufStride, cu.lwidth() + padSize, cu.lheight() + padSize)); |
1285 | |
|
1286 | 0 | mergeMVL0.hor -= (DMVR_NUM_ITERATION << MV_FRACTIONAL_BITS_INTERNAL); |
1287 | 0 | mergeMVL0.ver -= (DMVR_NUM_ITERATION << MV_FRACTIONAL_BITS_INTERNAL); |
1288 | |
|
1289 | 0 | xPredInterBlk(COMP_Y, cu, refPic, mergeMVL0, yuvTmp, true, clpRngs[COMP_Y], false, false, L0, cu.lwidth() + padSize, cu.lheight() + padSize, true); |
1290 | 0 | } |
1291 | | |
1292 | | /*L1 MC for refinement*/ |
1293 | 0 | { |
1294 | 0 | const Picture* refPic = cu.slice->getRefPic(L1, cu.refIdx[L1]); |
1295 | |
|
1296 | 0 | PelUnitBuf yuvTmp = PelUnitBuf(cu.chromaFormat, PelBuf(m_yuvTmp[L1].getBuf(COMP_Y).buf + dstOffset, bilinearBufStride, cu.lwidth() + padSize, cu.lheight() + padSize)); |
1297 | |
|
1298 | 0 | mergeMVL1.hor -= (DMVR_NUM_ITERATION << MV_FRACTIONAL_BITS_INTERNAL); |
1299 | 0 | mergeMVL1.ver -= (DMVR_NUM_ITERATION << MV_FRACTIONAL_BITS_INTERNAL); |
1300 | |
|
1301 | 0 | xPredInterBlk(COMP_Y, cu, refPic, mergeMVL1, yuvTmp, true, clpRngs[COMP_Y], false, false, L1, cu.lwidth() + padSize, cu.lheight() + padSize, true); |
1302 | 0 | } |
1303 | | |
1304 | | // point mc buffer to center point to avoid multiplication to reach each iteration to the beginning |
1305 | 0 | const Pel* biLinearPredL0 = m_yuvTmp[0].getBuf( COMP_Y ).buf; |
1306 | 0 | const Pel* biLinearPredL1 = m_yuvTmp[1].getBuf( COMP_Y ).buf; |
1307 | 0 | const int bioEnabledThres = 2 * dy * dx; |
1308 | 0 | const int bd = cu.cs->slice->clpRngs[COMP_Y].bd; |
1309 | |
|
1310 | 0 | DistParam distParam = m_pcRdCost->setDistParam( nullptr, nullptr, bilinearBufStride, bilinearBufStride, bd, COMP_Y, dx, dy, 1, true ); |
1311 | |
|
1312 | 0 | int num = 0; |
1313 | 0 | int yStart = 0; |
1314 | 0 | uint64_t sadArray[((2 * DMVR_NUM_ITERATION) + 1) * ((2 * DMVR_NUM_ITERATION) + 1)]; |
1315 | |
|
1316 | 0 | for( int y = puPos.y; y < ( puPos.y + cu.lumaSize().height ); y = y + dy, yStart = yStart + dy ) |
1317 | 0 | { |
1318 | 0 | for( int x = puPos.x, xStart = 0; x < ( puPos.x + cu.lumaSize().width ); x = x + dx, xStart = xStart + dx ) |
1319 | 0 | { |
1320 | 0 | uint64_t minCost = MAX_UINT64; |
1321 | | |
1322 | | // set all entries to MAX_UNIT64 |
1323 | 0 | uint64_t *pSADsArray = &sadArray[( ( ( 2 * DMVR_NUM_ITERATION ) + 1 ) * ( ( 2 * DMVR_NUM_ITERATION ) + 1 ) ) >> 1]; |
1324 | |
|
1325 | 0 | const Pel* addrL0Centre = biLinearPredL0 + yStart * bilinearBufStride + xStart; |
1326 | 0 | const Pel* addrL1Centre = biLinearPredL1 + yStart * bilinearBufStride + xStart; |
1327 | |
|
1328 | 0 | const Pel* addrL0 = addrL0Centre; |
1329 | 0 | const Pel* addrL1 = addrL1Centre; |
1330 | |
|
1331 | 0 | distParam.org.buf = addrL0; |
1332 | 0 | distParam.cur.buf = addrL1; |
1333 | 0 | minCost = distParam.distFunc( distParam ) >> 1; |
1334 | 0 | minCost -= ( minCost >> 2 ); |
1335 | |
|
1336 | 0 | if( minCost < ( dx * dy ) ) |
1337 | 0 | { |
1338 | 0 | cu.mvdL0SubPu[num] = Mv( 0, 0 ); |
1339 | 0 | } |
1340 | 0 | else |
1341 | 0 | { |
1342 | 0 | int16_t totalDeltaMV[2] = { 0, 0 }; |
1343 | 0 | int16_t deltaMV[2] = { 0, 0 }; |
1344 | |
|
1345 | 0 | pSADsArray[0] = minCost; |
1346 | 0 | pSADsArray = sadArray; |
1347 | |
|
1348 | 0 | for( int ver = -2; ver <= 2; ver++ ) |
1349 | 0 | { |
1350 | 0 | const int initHor = -2; |
1351 | 0 | const ptrdiff_t offset = initHor + ver * bilinearBufStride; |
1352 | | |
1353 | 0 | distParam.org.buf = addrL0 + offset; |
1354 | 0 | distParam.cur.buf = addrL1 - offset; |
1355 | | |
1356 | 0 | distParam.dmvrSadX5( distParam, pSADsArray, ver != 0 ); |
1357 | |
|
1358 | 0 | for( int hor = -2; hor <= 2; hor++, pSADsArray++ ) |
1359 | 0 | { |
1360 | 0 | Distortion cost = *pSADsArray; |
1361 | |
|
1362 | 0 | if( cost < minCost ) |
1363 | 0 | { |
1364 | 0 | minCost = cost; |
1365 | 0 | deltaMV[0] = hor; |
1366 | 0 | deltaMV[1] = ver; |
1367 | 0 | } |
1368 | 0 | } |
1369 | 0 | } |
1370 | |
|
1371 | 0 | pSADsArray = &sadArray[( ( ( 2 * DMVR_NUM_ITERATION ) + 1 ) * ( ( 2 * DMVR_NUM_ITERATION ) + 1 ) ) >> 1]; |
1372 | |
|
1373 | 0 | totalDeltaMV[0] += deltaMV[0]; |
1374 | 0 | totalDeltaMV[1] += deltaMV[1]; |
1375 | 0 | pSADsArray += ( ( deltaMV[1] * ( ( ( 2 * DMVR_NUM_ITERATION ) + 1 ) ) ) + deltaMV[0] ); |
1376 | 0 | totalDeltaMV[0] = totalDeltaMV[0] * ( 1 << mvShift ); |
1377 | 0 | totalDeltaMV[1] = totalDeltaMV[1] * ( 1 << mvShift ); |
1378 | |
|
1379 | 0 | xDMVRSubPixelErrorSurface( totalDeltaMV, deltaMV, pSADsArray ); |
1380 | |
|
1381 | 0 | cu.mvdL0SubPu[num] = Mv( totalDeltaMV[0], totalDeltaMV[1] ); |
1382 | 0 | } |
1383 | |
|
1384 | 0 | bioAppliedType[num] = ( minCost < bioEnabledThres ) ? false : bioApplied; |
1385 | |
|
1386 | 0 | num++; |
1387 | 0 | } |
1388 | 0 | } |
1389 | 0 | } |
1390 | | |
1391 | | // Final MC |
1392 | 0 | CodingUnit subCu = cu; |
1393 | 0 | subCu.UnitArea::operator=(UnitArea(cu.chromaFormat, Area(puPos.x, puPos.y, dx, dy))); |
1394 | 0 | PelUnitBuf subPredBuf = pcYuvDst.subBuf(UnitAreaRelative(cu, subCu)); |
1395 | |
|
1396 | 0 | PelUnitBuf predBuf[NUM_REF_PIC_LIST_01]; |
1397 | 0 | predBuf[L0] = m_yuvPred[L0].getCompactBuf( subCu ); |
1398 | 0 | predBuf[L1] = m_yuvPred[L1].getCompactBuf( subCu ); |
1399 | | /* For padding */ |
1400 | 0 | PelUnitBuf padBuf[NUM_REF_PIC_LIST_01]; |
1401 | 0 | padBuf[L0] = m_yuvPad[L0].getBufPart(subCu); |
1402 | 0 | padBuf[L1] = m_yuvPad[L1].getBufPart(subCu); |
1403 | |
|
1404 | 0 | int x = 0, y = 0; |
1405 | 0 | int xStart = 0, yStart = 0; |
1406 | 0 | int num = 0; |
1407 | 0 | const int scaleX = getComponentScaleX(COMP_Cb, cu.chromaFormat); |
1408 | 0 | const int scaleY = getComponentScaleY(COMP_Cb, cu.chromaFormat); |
1409 | |
|
1410 | 0 | const ptrdiff_t dstStride[MAX_NUM_COMP] = { pcYuvDst.bufs[COMP_Y].stride, cu.chromaFormat != CHROMA_400 ? pcYuvDst.bufs[COMP_Cb].stride : 0, cu.chromaFormat != CHROMA_400 ? pcYuvDst.bufs[COMP_Cr].stride : 0 }; |
1411 | 0 | for( y = puPos.y; y < ( puPos.y + cu.lumaSize().height ); y = y + dy, yStart = yStart + dy ) |
1412 | 0 | { |
1413 | 0 | for( x = puPos.x, xStart = 0; x < ( puPos.x + cu.lumaSize().width ); x = x + dx, xStart = xStart + dx ) |
1414 | 0 | { |
1415 | 0 | subCu.Y().x = x; |
1416 | 0 | subCu.Y().y = y; |
1417 | |
|
1418 | 0 | if( cu.chromaFormat != CHROMA_400 ) |
1419 | 0 | { |
1420 | 0 | subCu.Cb().x = subCu.Cr().x = x >> csx; |
1421 | 0 | subCu.Cb().y = subCu.Cr().y = y >> csy; |
1422 | 0 | } |
1423 | |
|
1424 | 0 | Mv mv0 = mergeMv[REF_PIC_LIST_0] + cu.mvdL0SubPu[num]; mv0.clipToStorageBitDepth(); |
1425 | 0 | Mv mv1 = mergeMv[REF_PIC_LIST_1] - cu.mvdL0SubPu[num]; mv1.clipToStorageBitDepth(); |
1426 | |
|
1427 | 0 | bool padBufL0 = (mv0.hor >> mvShift) != (mergeMv[0].hor >> mvShift) || (mv0.ver >> mvShift) != (mergeMv[0].ver >> mvShift); |
1428 | 0 | bool padBufL0C = (mv0.hor >> mvShiftC) != (mergeMv[0].hor >> mvShiftC) || (mv0.ver >> mvShiftC) != (mergeMv[0].ver >> mvShiftC); |
1429 | | |
1430 | 0 | bool padBufL1 = (mv1.hor >> mvShift) != (mergeMv[1].hor >> mvShift) || (mv1.ver >> mvShift) != (mergeMv[1].ver >> mvShift); |
1431 | 0 | bool padBufL1C = (mv1.hor >> mvShiftC) != (mergeMv[1].hor >> mvShiftC) || (mv1.ver >> mvShiftC) != (mergeMv[1].ver >> mvShiftC); |
1432 | |
|
1433 | 0 | padBufL0C &= cu.chromaFormat != CHROMA_400; |
1434 | 0 | padBufL1C &= cu.chromaFormat != CHROMA_400; |
1435 | |
|
1436 | 0 | if (padBufL0) xCopyAndPad(subCu, padBuf[L0], L0, true); |
1437 | 0 | if (padBufL0C) xCopyAndPad(subCu, padBuf[L0], L0, false); |
1438 | 0 | if (padBufL1) xCopyAndPad(subCu, padBuf[L1], L1, true); |
1439 | 0 | if (padBufL1C) xCopyAndPad(subCu, padBuf[L1], L1, false); |
1440 | |
|
1441 | 0 | xFinalPaddedMCForDMVR( subCu, predBuf, padBuf, bioAppliedType[num], mergeMv, cu.mvdL0SubPu[num] ); |
1442 | |
|
1443 | 0 | subPredBuf.bufs[COMP_Y].buf = pcYuvDst.bufs[COMP_Y].buf + xStart + yStart * dstStride[COMP_Y]; |
1444 | 0 | if( cu.chromaFormat != CHROMA_400 ) |
1445 | 0 | { |
1446 | 0 | subPredBuf.bufs[COMP_Cb].buf = pcYuvDst.bufs[COMP_Cb].buf + (xStart >> scaleX) + ((yStart >> scaleY) * dstStride[COMP_Cb]); |
1447 | 0 | subPredBuf.bufs[COMP_Cr].buf = pcYuvDst.bufs[COMP_Cr].buf + (xStart >> scaleX) + ((yStart >> scaleY) * dstStride[COMP_Cr]); |
1448 | 0 | } |
1449 | |
|
1450 | 0 | xWeightedAverage( subCu, predBuf[L0], predBuf[L1], subPredBuf, bioAppliedType[num] ); |
1451 | |
|
1452 | 0 | num++; |
1453 | 0 | } |
1454 | 0 | } |
1455 | 0 | } |
1456 | | |
1457 | | bool InterPredInterpolation::isSubblockVectorSpreadOverLimit(int a, int b, int c, int d, int predType) |
1458 | 0 | { |
1459 | 0 | int s4 = (4 << 11); |
1460 | 0 | int filterTap = 6; |
1461 | |
|
1462 | 0 | if (predType == 3) |
1463 | 0 | { |
1464 | 0 | int refBlkWidth = std::max(std::max(0, 4 * a + s4), std::max(4 * c, 4 * a + 4 * c + s4)) - std::min(std::min(0, 4 * a + s4), std::min(4 * c, 4 * a + 4 * c + s4)); |
1465 | 0 | int refBlkHeight = std::max(std::max(0, 4 * b), std::max(4 * d + s4, 4 * b + 4 * d + s4)) - std::min(std::min(0, 4 * b), std::min(4 * d + s4, 4 * b + 4 * d + s4)); |
1466 | 0 | refBlkWidth = (refBlkWidth >> 11) + filterTap + 3; |
1467 | 0 | refBlkHeight = (refBlkHeight >> 11) + filterTap + 3; |
1468 | |
|
1469 | 0 | if (refBlkWidth * refBlkHeight > (filterTap + 9) * (filterTap + 9)) |
1470 | 0 | { |
1471 | 0 | return true; |
1472 | 0 | } |
1473 | 0 | } |
1474 | 0 | else |
1475 | 0 | { |
1476 | 0 | int refBlkWidth = std::max(0, 4 * a + s4) - std::min(0, 4 * a + s4); |
1477 | 0 | int refBlkHeight = std::max(0, 4 * b) - std::min(0, 4 * b); |
1478 | 0 | refBlkWidth = (refBlkWidth >> 11) + filterTap + 3; |
1479 | 0 | refBlkHeight = (refBlkHeight >> 11) + filterTap + 3; |
1480 | 0 | if (refBlkWidth * refBlkHeight > (filterTap + 9) * (filterTap + 5)) |
1481 | 0 | { |
1482 | 0 | return true; |
1483 | 0 | } |
1484 | | |
1485 | 0 | refBlkWidth = std::max(0, 4 * c) - std::min(0, 4 * c); |
1486 | 0 | refBlkHeight = std::max(0, 4 * d + s4) - std::min(0, 4 * d + s4); |
1487 | 0 | refBlkWidth = (refBlkWidth >> 11) + filterTap + 3; |
1488 | 0 | refBlkHeight = (refBlkHeight >> 11) + filterTap + 3; |
1489 | 0 | if (refBlkWidth * refBlkHeight > (filterTap + 5) * (filterTap + 9)) |
1490 | 0 | { |
1491 | 0 | return true; |
1492 | 0 | } |
1493 | 0 | } |
1494 | 0 | return false; |
1495 | 0 | } |
1496 | | |
1497 | | void InterPredInterpolation::xPredAffineBlk(const ComponentID compID, const CodingUnit& cu, const Picture* refPic, const Mv* _mv, PelUnitBuf& dstPic, const bool bi, const ClpRng& clpRng, const RefPicList refPicList) |
1498 | 0 | { |
1499 | 0 | const ChromaFormat chFmt = cu.chromaFormat; |
1500 | 0 | int iScaleX = getComponentScaleX(compID, chFmt); |
1501 | 0 | int iScaleY = getComponentScaleY(compID, chFmt); |
1502 | |
|
1503 | 0 | Mv mvLT = _mv[0]; |
1504 | 0 | Mv mvRT = _mv[1]; |
1505 | 0 | Mv mvLB = _mv[2]; |
1506 | | |
1507 | | // get affine sub-block width and height |
1508 | 0 | const int width = cu.Y().width; |
1509 | 0 | const int height = cu.Y().height; |
1510 | 0 | int blockWidth = AFFINE_MIN_BLOCK_SIZE; |
1511 | 0 | int blockHeight = AFFINE_MIN_BLOCK_SIZE; |
1512 | |
|
1513 | 0 | CHECK(blockWidth > (width >> iScaleX), "Sub Block width > Block width"); |
1514 | 0 | CHECK(blockHeight > (height >> iScaleY), "Sub Block height > Block height"); |
1515 | 0 | const int MVBUFFER_SIZE = MAX_CU_SIZE / MIN_PU_SIZE; |
1516 | |
|
1517 | 0 | const int cxWidth = width >> iScaleX; |
1518 | 0 | const int cxHeight = height >> iScaleY; |
1519 | 0 | const int iHalfBW = blockWidth >> 1; |
1520 | 0 | const int iHalfBH = blockHeight >> 1; |
1521 | |
|
1522 | 0 | const int iBit = MAX_CU_DEPTH; |
1523 | 0 | int iDMvHorX = 0; |
1524 | 0 | int iDMvHorY = 0; |
1525 | 0 | int iDMvVerX = 0; |
1526 | 0 | int iDMvVerY = 0; |
1527 | |
|
1528 | 0 | iDMvHorX = (mvRT - mvLT).hor * (1 <<(iBit - Log2(cxWidth))); |
1529 | 0 | iDMvHorY = (mvRT - mvLT).ver * (1 <<(iBit - Log2(cxWidth))); |
1530 | 0 | if (cu.affineType == AFFINEMODEL_6PARAM) |
1531 | 0 | { |
1532 | 0 | iDMvVerX = (mvLB - mvLT).hor * (1 <<(iBit - Log2(cxHeight))); |
1533 | 0 | iDMvVerY = (mvLB - mvLT).ver * (1 <<(iBit - Log2(cxHeight))); |
1534 | 0 | } |
1535 | 0 | else |
1536 | 0 | { |
1537 | 0 | iDMvVerX = -iDMvHorY; |
1538 | 0 | iDMvVerY = iDMvHorX; |
1539 | 0 | } |
1540 | |
|
1541 | 0 | int iMvScaleHor = mvLT.hor * (1 << iBit); |
1542 | 0 | int iMvScaleVer = mvLT.ver * (1 << iBit); |
1543 | 0 | const PPS &pps = *cu.cs->pps; |
1544 | 0 | const SPS &sps = *cu.cs->sps; |
1545 | 0 | const int iMvShift = 4; |
1546 | 0 | const int iOffset = 8; |
1547 | 0 | const int iHorMax = (pps.picWidthInLumaSamples + iOffset - cu.Y().x - 1) << iMvShift; |
1548 | 0 | const int iHorMin = (-(int)cu.cs->pcv->maxCUSize - iOffset - (int)cu.Y().x + 1) * (1 << iMvShift); |
1549 | 0 | const int iVerMax = (pps.picHeightInLumaSamples + iOffset - cu.Y().y - 1) << iMvShift; |
1550 | 0 | const int iVerMin = (-(int)cu.cs->pcv->maxCUSize - iOffset - (int)cu.Y().y + 1) * (1 << iMvShift); |
1551 | | |
1552 | 0 | const int shift = iBit - 4 + MV_FRACTIONAL_BITS_INTERNAL; |
1553 | 0 | const bool subblkMVSpreadOverLimit = isSubblockVectorSpreadOverLimit(iDMvHorX, iDMvHorY, iDMvVerX, iDMvVerY, cu.interDir); |
1554 | |
|
1555 | 0 | bool enablePROF = sps.PROF && (!m_skipPROF) && (compID == COMP_Y); |
1556 | 0 | enablePROF &= (!cu.cs->picHeader->disProfFlag); |
1557 | 0 | enablePROF &= !((cu.affineType == AFFINEMODEL_6PARAM && _mv[0] == _mv[1] && _mv[0] == _mv[2]) || (cu.affineType == AFFINEMODEL_4PARAM && _mv[0] == _mv[1])); |
1558 | 0 | enablePROF &= !subblkMVSpreadOverLimit; |
1559 | 0 | const int profThres = 1 << (iBit + (m_isBi ? 1 : 0)); |
1560 | 0 | enablePROF &= !m_encOnly || cu.slice->checkLDC || iDMvHorX > profThres || iDMvHorY > profThres || iDMvVerX > profThres || iDMvVerY > profThres || iDMvHorX < -profThres || iDMvHorY < -profThres || iDMvVerX < -profThres || iDMvVerY < -profThres; |
1561 | 0 | enablePROF &= pps.picWidthInLumaSamples == refPic->cs->pps->picWidthInLumaSamples && pps.picHeightInLumaSamples == refPic->cs->pps->picHeightInLumaSamples; |
1562 | |
|
1563 | 0 | bool isLast = enablePROF ? false : !bi; |
1564 | |
|
1565 | 0 | const int cuExtW = AFFINE_MIN_BLOCK_SIZE + PROF_BORDER_EXT_W * 2; |
1566 | 0 | const int cuExtH = AFFINE_MIN_BLOCK_SIZE + PROF_BORDER_EXT_H * 2; |
1567 | |
|
1568 | 0 | PelBuf gradXExt(m_gradBuf[0], cuExtW, cuExtH); |
1569 | 0 | PelBuf gradYExt(m_gradBuf[1], cuExtW, cuExtH); |
1570 | |
|
1571 | 0 | int dstExtW = (((blockWidth + PROF_BORDER_EXT_W * 2 + 7) >> 3) << 3); |
1572 | 0 | int dstExtH = (blockHeight + PROF_BORDER_EXT_H * 2); |
1573 | |
|
1574 | 0 | PelBuf dstExtBuf(m_filteredBlockTmp[1][compID], dstExtW, dstExtH); |
1575 | |
|
1576 | 0 | PelBuf& dstBuf = dstPic.bufs[compID]; |
1577 | |
|
1578 | 0 | int *dMvScaleHor = m_dMvBuf[refPicList]; |
1579 | 0 | int *dMvScaleVer = m_dMvBuf[refPicList] + 16; |
1580 | |
|
1581 | 0 | if (enablePROF) |
1582 | 0 | { |
1583 | 0 | int* dMvH = dMvScaleHor; |
1584 | 0 | int* dMvV = dMvScaleVer; |
1585 | 0 | int quadHorX = 4 * iDMvHorX ; |
1586 | 0 | int quadHorY = 4 * iDMvHorY ; |
1587 | 0 | int quadVerX = 4 * iDMvVerX ; |
1588 | 0 | int quadVerY = 4 * iDMvVerY ; |
1589 | |
|
1590 | 0 | dMvH[0] = ((iDMvHorX + iDMvVerX) * 2) - ((quadHorX + quadVerX) * 2); |
1591 | 0 | dMvV[0] = ((iDMvHorY + iDMvVerY) * 2) - ((quadHorY + quadVerY) * 2); |
1592 | |
|
1593 | 0 | for (int w = 1; w < blockWidth; w++) |
1594 | 0 | { |
1595 | 0 | dMvH[w] = dMvH[w - 1] + quadHorX; |
1596 | 0 | dMvV[w] = dMvV[w - 1] + quadHorY; |
1597 | 0 | } |
1598 | |
|
1599 | 0 | dMvH += blockWidth; |
1600 | 0 | dMvV += blockWidth; |
1601 | 0 | for (int h = 1; h < blockHeight; h++) |
1602 | 0 | { |
1603 | 0 | for (int w = 0; w < blockWidth; w++) |
1604 | 0 | { |
1605 | 0 | dMvH[w] = dMvH[w - blockWidth] + quadVerX; |
1606 | 0 | dMvV[w] = dMvV[w - blockWidth] + quadVerY; |
1607 | 0 | } |
1608 | 0 | dMvH += blockWidth; |
1609 | 0 | dMvV += blockWidth; |
1610 | 0 | } |
1611 | |
|
1612 | 0 | const int mvShift = 8; |
1613 | 0 | const int dmvLimit = ( 1 << 5 ) - 1; |
1614 | |
|
1615 | 0 | if (!g_pelBufOP.roundIntVector) |
1616 | 0 | { |
1617 | 0 | for (int idx = 0; idx < blockWidth * blockHeight; idx++) |
1618 | 0 | { |
1619 | 0 | roundAffineMv(dMvScaleHor[idx], dMvScaleVer[idx], mvShift); |
1620 | 0 | dMvScaleHor[idx] = Clip3(-dmvLimit, dmvLimit, dMvScaleHor[idx]); |
1621 | 0 | dMvScaleVer[idx] = Clip3(-dmvLimit, dmvLimit, dMvScaleVer[idx]); |
1622 | 0 | } |
1623 | 0 | } |
1624 | 0 | else |
1625 | 0 | { |
1626 | 0 | int sz = blockWidth * blockHeight; |
1627 | 0 | g_pelBufOP.roundIntVector(dMvScaleHor, sz, mvShift, dmvLimit); |
1628 | 0 | g_pelBufOP.roundIntVector(dMvScaleVer, sz, mvShift, dmvLimit); |
1629 | 0 | } |
1630 | 0 | } |
1631 | |
|
1632 | 0 | int scaleXLuma = getComponentScaleX(COMP_Y, chFmt); |
1633 | 0 | int scaleYLuma = getComponentScaleY(COMP_Y, chFmt); |
1634 | 0 | if( cu.mccNoLuma() && ( compID == COMP_Cb ) && cu.chromaFormat != CHROMA_444 ) |
1635 | 0 | { |
1636 | 0 | CHECK(compID == COMP_Y, "Chroma only subblock MV calculation should not apply to Luma"); |
1637 | 0 | int lumaBlockWidth = AFFINE_MIN_BLOCK_SIZE; |
1638 | 0 | int lumaBlockHeight = AFFINE_MIN_BLOCK_SIZE; |
1639 | |
|
1640 | 0 | CHECK(lumaBlockWidth > (width >> scaleXLuma), "Sub Block width > Block width"); |
1641 | 0 | CHECK(lumaBlockHeight > (height >> scaleYLuma), "Sub Block height > Block height"); |
1642 | |
|
1643 | 0 | const int cxWidthLuma = width >> scaleXLuma; |
1644 | 0 | const int cxHeightLuma = height >> scaleYLuma; |
1645 | 0 | const int halfBWLuma = lumaBlockWidth >> 1; |
1646 | 0 | const int halfBHLuma = lumaBlockHeight >> 1; |
1647 | |
|
1648 | 0 | int dMvHorXLuma, dMvHorYLuma, dMvVerXLuma, dMvVerYLuma; |
1649 | 0 | dMvHorXLuma = (mvRT - mvLT).hor * (1 << (iBit - floorLog2(cxWidthLuma))); |
1650 | 0 | dMvHorYLuma = (mvRT - mvLT).ver * (1 << (iBit - floorLog2(cxWidthLuma))); |
1651 | 0 | if (cu.affineType == AFFINEMODEL_6PARAM) |
1652 | 0 | { |
1653 | 0 | dMvVerXLuma = (mvLB - mvLT).hor * (1 << (iBit - floorLog2(cxHeightLuma))); |
1654 | 0 | dMvVerYLuma = (mvLB - mvLT).ver * (1 << (iBit - floorLog2(cxHeightLuma))); |
1655 | 0 | } |
1656 | 0 | else |
1657 | 0 | { |
1658 | 0 | dMvVerXLuma = -dMvHorYLuma; |
1659 | 0 | dMvVerYLuma = dMvHorXLuma; |
1660 | 0 | } |
1661 | |
|
1662 | 0 | const bool subblkMVSpreadOverLimitLuma = isSubblockVectorSpreadOverLimit(dMvHorXLuma, dMvHorYLuma, dMvVerXLuma, dMvVerYLuma, cu.interDir); |
1663 | | |
1664 | | // get luma MV block by block |
1665 | 0 | for (int h = 0; h < cxHeightLuma; h += lumaBlockHeight) |
1666 | 0 | { |
1667 | 0 | for (int w = 0; w < cxWidthLuma; w += lumaBlockWidth) |
1668 | 0 | { |
1669 | 0 | int mvScaleTmpHor, mvScaleTmpVer; |
1670 | 0 | if (!subblkMVSpreadOverLimitLuma) |
1671 | 0 | { |
1672 | 0 | mvScaleTmpHor = iMvScaleHor + dMvHorXLuma * (halfBWLuma + w) + dMvVerXLuma * (halfBHLuma + h); |
1673 | 0 | mvScaleTmpVer = iMvScaleVer + dMvHorYLuma * (halfBWLuma + w) + dMvVerYLuma * (halfBHLuma + h); |
1674 | 0 | } |
1675 | 0 | else |
1676 | 0 | { |
1677 | 0 | mvScaleTmpHor = iMvScaleHor + dMvHorXLuma * (cxWidthLuma >> 1) + dMvVerXLuma * (cxHeightLuma >> 1); |
1678 | 0 | mvScaleTmpVer = iMvScaleVer + dMvHorYLuma * (cxWidthLuma >> 1) + dMvVerYLuma * (cxHeightLuma >> 1); |
1679 | 0 | } |
1680 | |
|
1681 | 0 | roundAffineMv(mvScaleTmpHor, mvScaleTmpVer, shift); |
1682 | 0 | Mv tmpMv(mvScaleTmpHor, mvScaleTmpVer); |
1683 | 0 | tmpMv.clipToStorageBitDepth(); |
1684 | 0 | mvScaleTmpHor = tmpMv.hor; |
1685 | 0 | mvScaleTmpVer = tmpMv.ver; |
1686 | |
|
1687 | 0 | m_storedMv[h / AFFINE_MIN_BLOCK_SIZE * MVBUFFER_SIZE + w / AFFINE_MIN_BLOCK_SIZE].set(mvScaleTmpHor, mvScaleTmpVer); |
1688 | 0 | } |
1689 | 0 | } |
1690 | 0 | } |
1691 | | // get prediction block by block |
1692 | 0 | const CPelBuf refBuf = refPic->getRecoBuf(compID); |
1693 | |
|
1694 | 0 | const int puX = cu.blocks[compID].x; |
1695 | 0 | const int puY = cu.blocks[compID].y; |
1696 | |
|
1697 | 0 | for (int h = 0; h < cxHeight; h += blockHeight) |
1698 | 0 | { |
1699 | 0 | for (int w = 0; w < cxWidth; w += blockWidth) |
1700 | 0 | { |
1701 | 0 | int iMvScaleTmpHor, iMvScaleTmpVer; |
1702 | 0 | if (compID == COMP_Y || cu.chromaFormat == CHROMA_444) |
1703 | 0 | { |
1704 | 0 | if (!subblkMVSpreadOverLimit) |
1705 | 0 | { |
1706 | 0 | iMvScaleTmpHor = iMvScaleHor + iDMvHorX * (iHalfBW + w) + iDMvVerX * (iHalfBH + h); |
1707 | 0 | iMvScaleTmpVer = iMvScaleVer + iDMvHorY * (iHalfBW + w) + iDMvVerY * (iHalfBH + h); |
1708 | 0 | } |
1709 | 0 | else |
1710 | 0 | { |
1711 | 0 | iMvScaleTmpHor = iMvScaleHor + iDMvHorX * (cxWidth >> 1) + iDMvVerX * (cxHeight >> 1); |
1712 | 0 | iMvScaleTmpVer = iMvScaleVer + iDMvHorY * (cxWidth >> 1) + iDMvVerY * (cxHeight >> 1); |
1713 | 0 | } |
1714 | |
|
1715 | 0 | roundAffineMv(iMvScaleTmpHor, iMvScaleTmpVer, shift); |
1716 | 0 | Mv tmpMv(iMvScaleTmpHor, iMvScaleTmpVer); |
1717 | 0 | tmpMv.clipToStorageBitDepth(); |
1718 | 0 | iMvScaleTmpHor = tmpMv.hor; |
1719 | 0 | iMvScaleTmpVer = tmpMv.ver; |
1720 | | |
1721 | | // clip and scale |
1722 | 0 | m_storedMv[h / AFFINE_MIN_BLOCK_SIZE * MVBUFFER_SIZE + w / AFFINE_MIN_BLOCK_SIZE].set(iMvScaleTmpHor, iMvScaleTmpVer); |
1723 | | // if( scalingRatio == SCALE_1X ) |
1724 | 0 | { |
1725 | 0 | iMvScaleTmpHor = std::min<int>(iHorMax, std::max<int>(iHorMin, iMvScaleTmpHor)); |
1726 | 0 | iMvScaleTmpVer = std::min<int>(iVerMax, std::max<int>(iVerMin, iMvScaleTmpVer)); |
1727 | 0 | } |
1728 | 0 | } |
1729 | 0 | else |
1730 | 0 | { |
1731 | 0 | Mv curMv = m_storedMv[((h << iScaleY) / AFFINE_MIN_BLOCK_SIZE) * MVBUFFER_SIZE + ((w << iScaleX) / AFFINE_MIN_BLOCK_SIZE)] + |
1732 | 0 | m_storedMv[((h << iScaleY) / AFFINE_MIN_BLOCK_SIZE + iScaleY)* MVBUFFER_SIZE + ((w << iScaleX) / AFFINE_MIN_BLOCK_SIZE + iScaleX)]; |
1733 | 0 | roundAffineMv(curMv.hor, curMv.ver, 1); |
1734 | |
|
1735 | 0 | curMv.hor = std::min<int>(iHorMax, std::max<int>(iHorMin, curMv.hor)); |
1736 | 0 | curMv.ver = std::min<int>(iVerMax, std::max<int>(iVerMin, curMv.ver)); |
1737 | |
|
1738 | 0 | iMvScaleTmpHor = curMv.hor; |
1739 | 0 | iMvScaleTmpVer = curMv.ver; |
1740 | 0 | } |
1741 | |
|
1742 | 0 | CHECKD( m_ifpLines && !CU::isMvInRangeFPP( puY + h, blockHeight, iMvScaleTmpVer, m_ifpLines, *pps.pcv, iScaleY ), "xPredAffineBlk: FPP MV restriction failed!\n" ); |
1743 | | // get the MV in high precision |
1744 | 0 | int xFrac, yFrac, xInt, yInt; |
1745 | |
|
1746 | 0 | if (!iScaleX) |
1747 | 0 | { |
1748 | 0 | xInt = iMvScaleTmpHor >> 4; |
1749 | 0 | xFrac = iMvScaleTmpHor & 15; |
1750 | 0 | } |
1751 | 0 | else |
1752 | 0 | { |
1753 | 0 | xInt = iMvScaleTmpHor >> 5; |
1754 | 0 | xFrac = iMvScaleTmpHor & 31; |
1755 | 0 | } |
1756 | 0 | if (!iScaleY) |
1757 | 0 | { |
1758 | 0 | yInt = iMvScaleTmpVer >> 4; |
1759 | 0 | yFrac = iMvScaleTmpVer & 15; |
1760 | 0 | } |
1761 | 0 | else |
1762 | 0 | { |
1763 | 0 | yInt = iMvScaleTmpVer >> 5; |
1764 | 0 | yFrac = iMvScaleTmpVer & 31; |
1765 | 0 | } |
1766 | |
|
1767 | 0 | const Pel* ref = refBuf.buf; |
1768 | 0 | ref += puX + xInt + w; |
1769 | 0 | ref += ( puY + yInt + h ) * refBuf.stride; |
1770 | 0 | Pel* dst = dstBuf.buf + w + h * dstBuf.stride; |
1771 | |
|
1772 | 0 | int refStride = refBuf.stride; |
1773 | 0 | int dstStride = dstBuf.stride; |
1774 | |
|
1775 | 0 | int bw = blockWidth; |
1776 | 0 | int bh = blockHeight; |
1777 | |
|
1778 | 0 | if( enablePROF ) |
1779 | 0 | { |
1780 | 0 | dst = dstExtBuf.bufAt(PROF_BORDER_EXT_W, PROF_BORDER_EXT_H); |
1781 | 0 | dstStride = dstExtBuf.stride; |
1782 | 0 | } |
1783 | |
|
1784 | 0 | if( xFrac && yFrac ) |
1785 | 0 | { |
1786 | 0 | m_if.filter4x4( compID, ref, refStride, dst, dstStride, 4, 4, xFrac, yFrac, isLast, chFmt, clpRng ); |
1787 | 0 | } |
1788 | 0 | else if( !yFrac ) |
1789 | 0 | { |
1790 | 0 | m_if.filterHor( compID, ref, refStride, dst, dstStride, bw, bh, xFrac, isLast, chFmt, clpRng ); |
1791 | 0 | } |
1792 | 0 | else if( xFrac == 0 ) |
1793 | 0 | { |
1794 | 0 | m_if.filterVer( compID, ref, refStride, dst, dstStride, bw, bh, yFrac, true, isLast, chFmt, clpRng ); |
1795 | 0 | } |
1796 | | |
1797 | 0 | if (enablePROF) |
1798 | 0 | { |
1799 | 0 | const unsigned shift = std::max<int>(2, (IF_INTERNAL_PREC - clpRng.bd)); |
1800 | 0 | const int xOffset = xFrac >> 3; |
1801 | 0 | const int yOffset = yFrac >> 3; |
1802 | |
|
1803 | 0 | const int refOffset = (blockHeight + 1) * refStride; |
1804 | 0 | const int dstOffset = (blockHeight + 1)* dstStride; |
1805 | |
|
1806 | 0 | const Pel* refPel = ref - (1 - yOffset) * refStride + xOffset - 1; |
1807 | 0 | Pel* dstPel = dst - dstStride - 1; |
1808 | 0 | for (int pw = 0; pw < blockWidth + 2; pw++) |
1809 | 0 | { |
1810 | 0 | dstPel[pw] = leftShiftU(refPel[pw], shift) - (Pel)IF_INTERNAL_OFFS; |
1811 | 0 | dstPel[pw + dstOffset] = leftShiftU(refPel[pw + refOffset], shift) - (Pel)IF_INTERNAL_OFFS; |
1812 | 0 | } |
1813 | |
|
1814 | 0 | refPel = ref + yOffset * refBuf.stride + xOffset; |
1815 | 0 | dstPel = dst; |
1816 | 0 | for (int ph = 0; ph < blockHeight; ph++, refPel += refStride, dstPel += dstStride) |
1817 | 0 | { |
1818 | 0 | dstPel[-1] = leftShiftU(refPel[-1], shift) - (Pel)IF_INTERNAL_OFFS; |
1819 | 0 | dstPel[blockWidth] = leftShiftU(refPel[blockWidth], shift) - (Pel)IF_INTERNAL_OFFS; |
1820 | 0 | } |
1821 | |
|
1822 | 0 | PelBuf gradXBuf = gradXExt.subBuf(0, 0, blockWidth + 2, blockHeight + 2); |
1823 | 0 | PelBuf gradYBuf = gradYExt.subBuf(0, 0, blockWidth + 2, blockHeight + 2); |
1824 | |
|
1825 | 0 | xFpProfGradFilter(dstExtBuf.buf, dstExtBuf.stride, blockWidth + 2, blockHeight + 2, gradXBuf.stride, gradXBuf.buf, gradYBuf.buf, clpRng.bd); |
1826 | |
|
1827 | 0 | const int shiftNum = std::max<int>(2, (IF_INTERNAL_PREC - clpRng.bd)); |
1828 | 0 | const Pel offset = (1 << (shiftNum - 1)) + IF_INTERNAL_OFFS; |
1829 | 0 | Pel* src = dstExtBuf.bufAt(PROF_BORDER_EXT_W, PROF_BORDER_EXT_H); |
1830 | 0 | Pel* gX = gradXBuf.bufAt(PROF_BORDER_EXT_W, PROF_BORDER_EXT_H); |
1831 | 0 | Pel* gY = gradYBuf.bufAt(PROF_BORDER_EXT_W, PROF_BORDER_EXT_H); |
1832 | |
|
1833 | 0 | Pel* dstY = dstBuf.bufAt(w, h); |
1834 | |
|
1835 | 0 | xFpApplyPROF(dstY, dstBuf.stride, src, dstExtBuf.stride, blockWidth, blockHeight, gX, gY, gradXBuf.stride, dMvScaleHor, dMvScaleVer, blockWidth, bi, shiftNum, offset, clpRng); |
1836 | 0 | } |
1837 | 0 | } |
1838 | 0 | } |
1839 | 0 | } |
1840 | | |
1841 | | bool InterPredInterpolation::xIsAffineMvInRangeFPP( const CodingUnit &cu, const Mv* _mv, const int ifpLines, const int mvPrecShift ) |
1842 | 0 | { |
1843 | 0 | const PreCalcValues& pcv = *cu.cs->pcv; |
1844 | 0 | if( cu.ly() >= ( ( pcv.heightInCtus - 1 - ifpLines ) << pcv.maxCUSizeLog2 ) ) |
1845 | 0 | return true; |
1846 | | |
1847 | 0 | const ChromaFormat chFmt = cu.chromaFormat; |
1848 | 0 | const int width = cu.Y().width; |
1849 | 0 | const int height = cu.Y().height; |
1850 | 0 | const int nBW = AFFINE_MIN_BLOCK_SIZE; |
1851 | 0 | const int nBH = AFFINE_MIN_BLOCK_SIZE; |
1852 | 0 | const int iHalfBW = nBW >> 1; |
1853 | 0 | const int iHalfBH = nBH >> 1; |
1854 | 0 | const int iBit = MAX_CU_DEPTH; |
1855 | 0 | const int shift = iBit - 4 + MV_FRACTIONAL_BITS_INTERNAL; |
1856 | 0 | Mv mvLT = _mv[0]; |
1857 | 0 | Mv mvRT = _mv[1]; |
1858 | 0 | Mv mvLB = _mv[2]; |
1859 | |
|
1860 | 0 | int iDMvHorX = 0; |
1861 | 0 | int iDMvHorY = 0; |
1862 | 0 | int iDMvVerX = 0; |
1863 | 0 | int iDMvVerY = 0; |
1864 | |
|
1865 | 0 | const int iMvScaleVer = mvLT.ver * (1 << iBit); |
1866 | |
|
1867 | 0 | int iScaleX = getChannelTypeScaleX(CH_C, chFmt); |
1868 | 0 | int iScaleY = getChannelTypeScaleY(CH_C, chFmt); |
1869 | 0 | const int cxWidth = width; |
1870 | 0 | const int cxHeight = height; |
1871 | |
|
1872 | 0 | iDMvHorX = (mvRT - mvLT).hor * (1 << (iBit - Log2(cxWidth))); |
1873 | 0 | iDMvHorY = (mvRT - mvLT).ver * (1 <<(iBit - Log2(cxWidth))); |
1874 | 0 | if (cu.affineType == AFFINEMODEL_6PARAM) |
1875 | 0 | { |
1876 | 0 | iDMvVerX = (mvLB - mvLT).hor * (1 <<(iBit - Log2(cxHeight))); |
1877 | 0 | iDMvVerY = (mvLB - mvLT).ver * (1 <<(iBit - Log2(cxHeight))); |
1878 | 0 | } |
1879 | 0 | else |
1880 | 0 | { |
1881 | 0 | iDMvVerX = -iDMvHorY; |
1882 | 0 | iDMvVerY = iDMvHorX; |
1883 | 0 | } |
1884 | 0 | const bool subblkMVSpreadOverLimit = InterPrediction::isSubblockVectorSpreadOverLimit(iDMvHorX, iDMvHorY, iDMvVerX, iDMvVerY, cu.interDir); |
1885 | |
|
1886 | 0 | const int yRefMax = ( ( ( cu.ly() >> pcv.maxCUSizeLog2 ) + ifpLines + 1 ) << pcv.maxCUSizeLog2 ) - 1; |
1887 | 0 | const int dctifMarginVerBot = 4; |
1888 | |
|
1889 | 0 | auto roundMvVal = [&](int mvVal, int shift) |
1890 | 0 | { |
1891 | 0 | const int nOffset = 1 << (shift - 1); |
1892 | 0 | mvVal = (mvVal + nOffset - (mvVal >= 0)) >> shift; |
1893 | 0 | return mvVal; |
1894 | 0 | }; |
1895 | 0 | auto calcAffineMv = [&](int w, int h) |
1896 | 0 | { |
1897 | 0 | int iMvScaleTmpVer; |
1898 | 0 | if(!subblkMVSpreadOverLimit) |
1899 | 0 | { |
1900 | 0 | iMvScaleTmpVer = iMvScaleVer + iDMvHorY * (iHalfBW + w) + iDMvVerY * (iHalfBH + h); |
1901 | 0 | } |
1902 | 0 | else |
1903 | 0 | { |
1904 | 0 | iMvScaleTmpVer = iMvScaleVer + iDMvHorY * (cxWidth >> 1) + iDMvVerY * (cxHeight >> 1); |
1905 | 0 | } |
1906 | 0 | iMvScaleTmpVer = roundMvVal(iMvScaleTmpVer, shift); |
1907 | 0 | return iMvScaleTmpVer; |
1908 | 0 | }; |
1909 | |
|
1910 | 0 | auto checkMvLineSync = [&](int yMv, int blkBot, const int scaleVer = 0 ) |
1911 | 0 | { |
1912 | 0 | if( ( ( blkBot ) + (yMv >> ( mvPrecShift + scaleVer) ) > ( yRefMax >> scaleVer ) ) ) |
1913 | 0 | return false; |
1914 | 0 | return true; |
1915 | 0 | }; |
1916 | |
|
1917 | 0 | const int filterMargin = dctifMarginVerBot - 1; |
1918 | 0 | int x = cu.lx(); |
1919 | 0 | int y = cu.ly(); |
1920 | | |
1921 | | // luma |
1922 | 0 | Position off00 (0, 0); |
1923 | 0 | Position blk00 (x + off00.x, y + off00.y); |
1924 | 0 | int mvVer00 = calcAffineMv(off00.x, off00.y); |
1925 | 0 | if( !checkMvLineSync( mvVer00, blk00.y + filterMargin + nBH ) ) |
1926 | 0 | return false; |
1927 | | |
1928 | 0 | Position off01 (width - nBW, 0); |
1929 | 0 | Position blk01 (x + off01.x, y + off01.y); |
1930 | 0 | int mvVer01 = calcAffineMv(off01.x, off01.y); |
1931 | 0 | if( !checkMvLineSync( mvVer01, blk00.y + filterMargin + nBH ) ) |
1932 | 0 | return false; |
1933 | | |
1934 | 0 | Position off10 (0, height - nBH); |
1935 | 0 | Position blk10 (x + off10.x, y + off10.y); |
1936 | 0 | int mvVer10 = calcAffineMv(off10.x, off10.y); |
1937 | 0 | if( !checkMvLineSync( mvVer10, blk10.y + filterMargin + nBH ) ) |
1938 | 0 | return false; |
1939 | | |
1940 | 0 | Position off11 (width - nBW, height - nBH); |
1941 | 0 | Position blk11 (x + off11.x, y + off11.y); |
1942 | 0 | int mvVer11 = calcAffineMv(off11.x, off11.y); |
1943 | 0 | if( !checkMvLineSync( mvVer11, blk10.y + filterMargin + nBH ) ) |
1944 | 0 | return false; |
1945 | | |
1946 | | // chroma |
1947 | 0 | if( cu.lwidth() == 8 && cu.lheight() == 8 ) |
1948 | 0 | { |
1949 | 0 | if(iScaleX || iScaleY) |
1950 | 0 | { |
1951 | 0 | if(iScaleY) |
1952 | 0 | { |
1953 | 0 | const int blkBot = ((blk00.y + filterMargin) >> iScaleY) + nBH; |
1954 | 0 | if(!checkMvLineSync( roundMvVal( mvVer00 + mvVer11, 1 ), blkBot, iScaleY )) |
1955 | 0 | return false; |
1956 | 0 | } |
1957 | 0 | else |
1958 | 0 | { |
1959 | 0 | if(!checkMvLineSync( roundMvVal( mvVer00 + mvVer01, 1 ), blk00.y + filterMargin + nBH, iScaleY )) |
1960 | 0 | return false; |
1961 | 0 | if(!checkMvLineSync( roundMvVal( mvVer10 + mvVer11, 1 ), blk10.y + filterMargin + nBH, iScaleY )) |
1962 | 0 | return false; |
1963 | 0 | } |
1964 | 0 | } |
1965 | 0 | } |
1966 | 0 | else |
1967 | 0 | { |
1968 | 0 | if(iScaleX || iScaleY) |
1969 | 0 | { |
1970 | 0 | int blkBot = ((blk00.y + filterMargin) >> iScaleY) + nBH; |
1971 | 0 | int mvVer00_ = calcAffineMv(off00.x + nBW, off00.y + (iScaleY ? nBH: 0) ); |
1972 | 0 | if(!checkMvLineSync( roundMvVal( mvVer00 + mvVer00_, 1 ), blkBot, iScaleY )) |
1973 | 0 | return false; |
1974 | | |
1975 | 0 | int _mvVer01 = calcAffineMv( off01.x - nBW, off01.y ); |
1976 | 0 | int mvVer01_ = iScaleY ? calcAffineMv( off01.x , off01.y + nBH ): mvVer01; |
1977 | 0 | if(!checkMvLineSync( roundMvVal( _mvVer01 + mvVer01_, 1 ), blkBot, iScaleY )) |
1978 | 0 | return false; |
1979 | | |
1980 | 0 | blkBot = ((blk10.y - (iScaleY ? nBH: 0) + filterMargin) >> iScaleY) + nBH; |
1981 | 0 | int _mvVer10 = iScaleY ? calcAffineMv( off10.x , off10.y - nBH ): mvVer10; |
1982 | 0 | int mvVer10_ = calcAffineMv( off10.x + nBW, off10.y ); |
1983 | 0 | if(!checkMvLineSync( roundMvVal( _mvVer10 + mvVer10_, 1 ), blkBot, iScaleY )) |
1984 | 0 | return false; |
1985 | | |
1986 | 0 | int _mvVer11 = calcAffineMv( off11.x - nBW, off11.y - (iScaleY ? nBH: 0) ); |
1987 | 0 | if(!checkMvLineSync( roundMvVal( _mvVer11 + mvVer11, 1 ), blkBot, iScaleY )) |
1988 | 0 | return false; |
1989 | 0 | } |
1990 | 0 | } |
1991 | | |
1992 | 0 | return true; |
1993 | 0 | } |
1994 | | |
1995 | | void InterPrediction::xFillIBCBuffer(CodingUnit& cu) |
1996 | 0 | { |
1997 | 0 | for (auto& currPU : CU::traverseTUs(cu)) |
1998 | 0 | { |
1999 | 0 | for (const CompArea& area : currPU.blocks) |
2000 | 0 | { |
2001 | 0 | if (!area.valid()) |
2002 | 0 | { |
2003 | 0 | continue; |
2004 | 0 | } |
2005 | 0 | const unsigned int lcuWidth = cu.cs->slice->sps->CTUSize; |
2006 | 0 | const int shiftSampleHor = getComponentScaleX(area.compID, cu.chromaFormat); |
2007 | 0 | const int shiftSampleVer = getComponentScaleY(area.compID, cu.chromaFormat); |
2008 | 0 | const int ctuSizeLog2Ver = floorLog2(lcuWidth) - shiftSampleVer; |
2009 | 0 | const int pux = area.x & ((m_IBCBufferWidth >> shiftSampleHor) - 1); |
2010 | 0 | const int puy = area.y & ((1 << ctuSizeLog2Ver) - 1); |
2011 | 0 | const CompArea dstArea = CompArea(area.compID, cu.chromaFormat, Position(pux, puy), Size(area.width, area.height)); |
2012 | 0 | CPelBuf srcBuf = cu.cs->getRecoBuf(area); |
2013 | 0 | PelBuf dstBuf = m_IBCBuffer.getBuf(dstArea); |
2014 | |
|
2015 | 0 | dstBuf.copyFrom(srcBuf); |
2016 | 0 | } |
2017 | 0 | } |
2018 | 0 | } |
2019 | | |
2020 | | void InterPrediction::xIntraBlockCopyIBC(CodingUnit& cu, PelUnitBuf& predBuf, const ComponentID compID) |
2021 | 0 | { |
2022 | 0 | const unsigned int lcuWidth = cu.cs->slice->sps->CTUSize; |
2023 | 0 | const int shiftSampleHor = getComponentScaleX(compID, cu.chromaFormat); |
2024 | 0 | const int shiftSampleVer = getComponentScaleY(compID, cu.chromaFormat); |
2025 | 0 | const int ctuSizeLog2Ver = floorLog2(lcuWidth) - shiftSampleVer; |
2026 | 0 | Mv bv = cu.mv[REF_PIC_LIST_0][0]; |
2027 | 0 | bv.changePrecision( MV_PRECISION_INTERNAL, MV_PRECISION_INT ); |
2028 | 0 | int refx, refy; |
2029 | 0 | if (compID == COMP_Y) |
2030 | 0 | { |
2031 | 0 | refx = cu.Y().x + bv.hor; |
2032 | 0 | refy = cu.Y().y + bv.ver; |
2033 | 0 | } |
2034 | 0 | else |
2035 | 0 | {//Cb or Cr |
2036 | 0 | refx = cu.Cb().x + (bv.hor >> shiftSampleHor); |
2037 | 0 | refy = cu.Cb().y + (bv.ver >> shiftSampleVer); |
2038 | 0 | } |
2039 | 0 | refx &= ((m_IBCBufferWidth >> shiftSampleHor) - 1); |
2040 | 0 | refy &= ((1 << ctuSizeLog2Ver) - 1); |
2041 | |
|
2042 | 0 | if (refx + predBuf.bufs[compID].width <= (m_IBCBufferWidth >> shiftSampleHor)) |
2043 | 0 | { |
2044 | 0 | const CompArea srcArea = CompArea(compID, cu.chromaFormat, Position(refx, refy), Size(predBuf.bufs[compID].width, predBuf.bufs[compID].height)); |
2045 | 0 | const CPelBuf refBuf = m_IBCBuffer.getBuf(srcArea); |
2046 | 0 | predBuf.bufs[compID].copyFrom(refBuf); |
2047 | 0 | } |
2048 | 0 | else |
2049 | 0 | {//wrap around |
2050 | 0 | int width = (m_IBCBufferWidth >> shiftSampleHor) - refx; |
2051 | 0 | CompArea srcArea = CompArea(compID, cu.chromaFormat, Position(refx, refy), Size(width, predBuf.bufs[compID].height)); |
2052 | 0 | CPelBuf srcBuf = m_IBCBuffer.getBuf(srcArea); |
2053 | 0 | PelBuf dstBuf = PelBuf(predBuf.bufs[compID].bufAt(Position(0, 0)), predBuf.bufs[compID].stride, Size(width, predBuf.bufs[compID].height)); |
2054 | 0 | dstBuf.copyFrom(srcBuf); |
2055 | |
|
2056 | 0 | width = refx + predBuf.bufs[compID].width - (m_IBCBufferWidth >> shiftSampleHor); |
2057 | 0 | srcArea = CompArea(compID, cu.chromaFormat, Position(0, refy), Size(width, predBuf.bufs[compID].height)); |
2058 | 0 | srcBuf = m_IBCBuffer.getBuf(srcArea); |
2059 | 0 | dstBuf = PelBuf(predBuf.bufs[compID].bufAt(Position((m_IBCBufferWidth >> shiftSampleHor) - refx, 0)), predBuf.bufs[compID].stride, Size(width, predBuf.bufs[compID].height)); |
2060 | 0 | dstBuf.copyFrom(srcBuf); |
2061 | 0 | } |
2062 | 0 | } |
2063 | | |
2064 | | void InterPrediction::resetIBCBuffer(const ChromaFormat chromaFormatIDC, const int ctuSize) |
2065 | 0 | { |
2066 | 0 | const UnitArea area = UnitArea(chromaFormatIDC, Area(0, 0, m_IBCBufferWidth, ctuSize)); |
2067 | 0 | m_IBCBuffer.getBuf(area).fill(-1); |
2068 | 0 | } |
2069 | | |
2070 | | void InterPrediction::resetVPDUforIBC(const ChromaFormat chromaFormatIDC, const int ctuSize, const int vSize, const int xPos, const int yPos) |
2071 | 0 | { |
2072 | 0 | const UnitArea area = UnitArea(chromaFormatIDC, Area(xPos & (m_IBCBufferWidth - 1), yPos & (ctuSize - 1), vSize, vSize)); |
2073 | 0 | m_IBCBuffer.getBuf(area).fill(-1); |
2074 | 0 | } |
2075 | | bool InterPrediction::isLumaBvValidIBC(const int ctuSize, const int xCb, const int yCb, const int width, const int height, const int xBv, const int yBv) |
2076 | 0 | { |
2077 | 0 | if (((yCb + yBv) & (ctuSize - 1)) + height > ctuSize) |
2078 | 0 | { |
2079 | 0 | return false; |
2080 | 0 | } |
2081 | 0 | int refTLx = xCb + xBv; |
2082 | 0 | int refTLy = (yCb + yBv) & (ctuSize - 1); |
2083 | 0 | PelBuf buf = m_IBCBuffer.Y(); |
2084 | 0 | for (int x = 0; x < width; x += 4) |
2085 | 0 | { |
2086 | 0 | for (int y = 0; y < height; y += 4) |
2087 | 0 | { |
2088 | 0 | if (buf.at((x + refTLx) & (m_IBCBufferWidth - 1), y + refTLy) == -1) return false; |
2089 | 0 | if (buf.at((x + 3 + refTLx) & (m_IBCBufferWidth - 1), y + refTLy) == -1) return false; |
2090 | 0 | if (buf.at((x + refTLx) & (m_IBCBufferWidth - 1), y + 3 + refTLy) == -1) return false; |
2091 | 0 | if (buf.at((x + 3 + refTLx) & (m_IBCBufferWidth - 1), y + 3 + refTLy) == -1) return false; |
2092 | 0 | } |
2093 | 0 | } |
2094 | 0 | return true; |
2095 | 0 | } |
2096 | | |
2097 | | } // namespace vvenc |
2098 | | |
2099 | | //! \} |
2100 | | |