/src/vvdec/source/Lib/CommonLib/IntraPrediction.cpp
Line | Count | Source |
1 | | /* ----------------------------------------------------------------------------- |
2 | | The copyright in this software is being made available under the Clear BSD |
3 | | License, included below. No patent rights, trademark rights and/or |
4 | | other Intellectual Property Rights other than the copyrights concerning |
5 | | the Software are granted under this license. |
6 | | |
7 | | The Clear BSD License |
8 | | |
9 | | Copyright (c) 2018-2026, Fraunhofer-Gesellschaft zur Förderung der angewandten Forschung e.V. & The VVdeC Authors. |
10 | | All rights reserved. |
11 | | |
12 | | Redistribution and use in source and binary forms, with or without modification, |
13 | | are permitted (subject to the limitations in the disclaimer below) provided that |
14 | | the following conditions are met: |
15 | | |
16 | | * Redistributions of source code must retain the above copyright notice, |
17 | | this list of conditions and the following disclaimer. |
18 | | |
19 | | * Redistributions in binary form must reproduce the above copyright |
20 | | notice, this list of conditions and the following disclaimer in the |
21 | | documentation and/or other materials provided with the distribution. |
22 | | |
23 | | * Neither the name of the copyright holder nor the names of its |
24 | | contributors may be used to endorse or promote products derived from this |
25 | | software without specific prior written permission. |
26 | | |
27 | | NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE GRANTED BY |
28 | | THIS LICENSE. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND |
29 | | CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
30 | | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A |
31 | | PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR |
32 | | CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, |
33 | | EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, |
34 | | PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR |
35 | | BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER |
36 | | IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
37 | | ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE |
38 | | POSSIBILITY OF SUCH DAMAGE. |
39 | | |
40 | | |
41 | | ------------------------------------------------------------------------------------------- */ |
42 | | |
43 | | /** \file Prediction.cpp |
44 | | \brief prediction class |
45 | | */ |
46 | | |
47 | | #define DONT_UNDEF_SIZE_AWARE_PER_EL_OP |
48 | | |
49 | | #include "IntraPrediction.h" |
50 | | |
51 | | #include "Unit.h" |
52 | | #include "UnitTools.h" |
53 | | |
54 | | #include "Buffer.h" |
55 | | |
56 | | #include "dtrace_next.h" |
57 | | #include "Rom.h" |
58 | | |
59 | | #include <memory.h> |
60 | | #include <array> |
61 | | |
62 | | #include "CommonLib/InterpolationFilter.h" |
63 | | #include "CommonLib/TimeProfiler.h" |
64 | | |
65 | | namespace vvdec |
66 | | { |
67 | | |
68 | | // ==================================================================================================================== |
69 | | // Tables |
70 | | // ==================================================================================================================== |
71 | | |
72 | | const uint8_t IntraPrediction::m_aucIntraFilter[MAX_NUM_CHANNEL_TYPE][MAX_INTRA_FILTER_DEPTHS] = |
73 | | { |
74 | | { // Luma |
75 | | 24, // 1xn |
76 | | 24, // 2xn |
77 | | 24, // 4xn |
78 | | 14, // 8xn |
79 | | 2, // 16xn |
80 | | 0, // 32xn |
81 | | 0, // 64xn |
82 | | 0, // 128xn |
83 | | }, |
84 | | { // Chroma |
85 | | 40, // 1xn |
86 | | 40, // 2xn |
87 | | 40, // 4xn |
88 | | 28, // 8xn |
89 | | 4, // 16xn |
90 | | 0, // 32xn |
91 | | 0, // 64xn |
92 | | 0, // 128xn |
93 | | } |
94 | | }; |
95 | | |
96 | | const TFilterCoeff g_intraGaussFilter[32][4] = { |
97 | | { 16, 32, 16, 0 }, |
98 | | { 16, 32, 16, 0 }, |
99 | | { 15, 31, 17, 1 }, |
100 | | { 15, 31, 17, 1 }, |
101 | | { 14, 30, 18, 2 }, |
102 | | { 14, 30, 18, 2 }, |
103 | | { 13, 29, 19, 3 }, |
104 | | { 13, 29, 19, 3 }, |
105 | | { 12, 28, 20, 4 }, |
106 | | { 12, 28, 20, 4 }, |
107 | | { 11, 27, 21, 5 }, |
108 | | { 11, 27, 21, 5 }, |
109 | | { 10, 26, 22, 6 }, |
110 | | { 10, 26, 22, 6 }, |
111 | | { 9, 25, 23, 7 }, |
112 | | { 9, 25, 23, 7 }, |
113 | | { 8, 24, 24, 8 }, |
114 | | { 8, 24, 24, 8 }, |
115 | | { 7, 23, 25, 9 }, |
116 | | { 7, 23, 25, 9 }, |
117 | | { 6, 22, 26, 10 }, |
118 | | { 6, 22, 26, 10 }, |
119 | | { 5, 21, 27, 11 }, |
120 | | { 5, 21, 27, 11 }, |
121 | | { 4, 20, 28, 12 }, |
122 | | { 4, 20, 28, 12 }, |
123 | | { 3, 19, 29, 13 }, |
124 | | { 3, 19, 29, 13 }, |
125 | | { 2, 18, 30, 14 }, |
126 | | { 2, 18, 30, 14 }, |
127 | | { 1, 17, 31, 15 }, |
128 | | { 1, 17, 31, 15 }, |
129 | | }; |
130 | | |
131 | | void GetLumaRecPixel420Core (const int width,const int height, const Pel* pRecSrc0,const ptrdiff_t iRecStride,Pel* pDst0,const ptrdiff_t iDstStride) |
132 | 0 | { |
133 | 0 | for( int y = 0; y < height; y++ ) |
134 | 0 | { |
135 | 0 | for( int x = 0; x < width; x ++ ) |
136 | 0 | { |
137 | 0 | pDst0[x + 0 ] = ( pRecSrc0[( (x + 0 ) << 1 ) ] * 2 |
138 | 0 | + pRecSrc0[( (x + 0 ) << 1 ) + 1] * 1 |
139 | 0 | + pRecSrc0[( (x + 0 ) << 1 ) - 1] * 1 |
140 | 0 | + pRecSrc0[( (x + 0 ) << 1 ) + iRecStride] * 2 |
141 | 0 | + pRecSrc0[( (x + 0 ) << 1 ) + 1 + iRecStride] * 1 |
142 | 0 | + pRecSrc0[( (x + 0 ) << 1 ) - 1 + iRecStride] * 1 |
143 | 0 | + 4 ) >> 3; |
144 | 0 | } |
145 | 0 | pDst0 += iDstStride; |
146 | 0 | pRecSrc0 += (iRecStride<<1); |
147 | 0 | } |
148 | 0 | } |
149 | | |
150 | | /** Function for deriving planar intra prediction. This function derives the prediction samples for planar mode (intra coding). |
151 | | */ |
152 | | |
153 | | //NOTE: Bit-Limit - 24-bit source |
154 | | void xPredIntraPlanarCore( const CPelBuf &pSrc, PelBuf &pDst, const SPS& sps ) |
155 | 0 | { |
156 | | // with some optimizations gcc gives spurious "-Wmaybe-uninitialized" warnings here |
157 | | GCC_WARNING_DISABLE_maybe_uninitialized |
158 | |
|
159 | 0 | const uint32_t width = pDst.width; |
160 | 0 | const uint32_t height = pDst.height; |
161 | 0 | const uint32_t log2W = getLog2( width ); |
162 | 0 | const uint32_t log2H = getLog2( height ); |
163 | 0 | int leftColumn[MAX_CU_SIZE + 1], topRow[MAX_CU_SIZE + 1], bottomRow[MAX_CU_SIZE], rightColumn[MAX_CU_SIZE]; |
164 | 0 | const uint32_t offset = 1 << (log2W + log2H); |
165 | | // Get left and above reference column and row |
166 | 0 | for( int k = 0; k < width + 1; k++ ) |
167 | 0 | { |
168 | 0 | topRow[k] = pSrc.at( k + 1, 0 ); |
169 | 0 | } |
170 | |
|
171 | 0 | for( int k = 0; k < height + 1; k++ ) |
172 | 0 | { |
173 | 0 | leftColumn[k] = pSrc.at( 0, k + 1 ); |
174 | 0 | } |
175 | | |
176 | | // Prepare intermediate variables used in interpolation |
177 | 0 | int bottomLeft = leftColumn[height]; |
178 | 0 | int topRight = topRow[width]; |
179 | |
|
180 | 0 | for( int k = 0; k < width; k++ ) |
181 | 0 | { |
182 | 0 | bottomRow[k] = bottomLeft - topRow[k]; |
183 | 0 | topRow[k] = topRow[k] << log2H; |
184 | 0 | } |
185 | |
|
186 | 0 | for( int k = 0; k < height; k++ ) |
187 | 0 | { |
188 | 0 | rightColumn[k] = topRight - leftColumn[k]; |
189 | 0 | leftColumn[k] = leftColumn[k] << log2W; |
190 | 0 | } |
191 | |
|
192 | 0 | const uint32_t finalShift = 1 + log2W + log2H; |
193 | 0 | const ptrdiff_t stride = pDst.stride; |
194 | 0 | Pel* pred = pDst.buf; |
195 | 0 | for( int y = 0; y < height; y++, pred += stride ) |
196 | 0 | { |
197 | 0 | int horPred = leftColumn[y]; |
198 | |
|
199 | 0 | for( int x = 0; x < width; x++ ) |
200 | 0 | { |
201 | 0 | horPred += rightColumn[y]; |
202 | 0 | topRow[x] += bottomRow[x]; |
203 | |
|
204 | 0 | int vertPred = topRow[x]; |
205 | 0 | pred[x] = ( ( horPred << log2H ) + ( vertPred << log2W ) + offset ) >> finalShift; |
206 | |
|
207 | 0 | } |
208 | 0 | } |
209 | 0 | GCC_WARNING_RESET |
210 | 0 | } |
211 | | |
212 | | void IntraPredSampleFilterCore(Pel *ptrSrc,const ptrdiff_t srcStride,PelBuf &piPred,const uint32_t uiDirMode,const ClpRng& clpRng) |
213 | 0 | { |
214 | 0 | const CPelBuf srcBuf ( ptrSrc, ( SizeType ) srcStride, ( SizeType ) srcStride ); |
215 | 0 | const int iWidth = piPred.width; |
216 | 0 | const int iHeight = piPred.height; |
217 | 0 | PelBuf dstBuf = piPred; |
218 | |
|
219 | 0 | const int scale = ((getLog2(iWidth) - 2 + getLog2(iHeight) - 2 + 2) >> 2); |
220 | 0 | CHECK(scale < 0 || scale > 31, "PDPC: scale < 0 || scale > 31"); |
221 | |
|
222 | 0 | #if 1 |
223 | 0 | if( uiDirMode == PLANAR_IDX || uiDirMode == DC_IDX ) |
224 | 0 | { |
225 | 0 | for( int y = 0; y < iHeight; y++ ) |
226 | 0 | { |
227 | 0 | const int wT = 32 >> std::min(31, ((y << 1) >> scale)); |
228 | | // const Pel left = srcBuf.at(y + 1, 1); |
229 | 0 | const Pel left = srcBuf.at(0, y + 1 ); |
230 | 0 | for (int x = 0; x < iWidth; x++) |
231 | 0 | { |
232 | 0 | const int wL = 32 >> std::min(31, ((x << 1) >> scale)); |
233 | 0 | const Pel top = srcBuf.at(x + 1, 0); |
234 | 0 | const Pel val = dstBuf.at(x, y); |
235 | 0 | dstBuf.at(x, y) = val + ((wL * (left - val) + wT * (top - val) + 32) >> 6); |
236 | 0 | } |
237 | 0 | } |
238 | 0 | } |
239 | | #else |
240 | | const int lev[4]={std::min(3,iWidth),std::min(6,iWidth),std::min(12,iWidth),std::min(24,iWidth)}; |
241 | | if (uiDirMode == PLANAR_IDX) |
242 | | { |
243 | | for (int y = 0; y < iHeight; y++) |
244 | | { |
245 | | int wT = 32 >> std::min(31, ((y << 1) >> scale)); |
246 | | const Pel left = srcBuf.at(0, y + 1); |
247 | | if (wT) |
248 | | { |
249 | | for (int x = 0; x < iWidth; x++) |
250 | | { |
251 | | const Pel top = srcBuf.at(x + 1, 0); |
252 | | int wL = 32 >> std::min(31, ((x << 1) >> scale)); |
253 | | dstBuf.at(x, y) = ClipPel((wL * left + wT * top + (64 - wL - wT) * dstBuf.at(x, y) + 32) >> 6, clpRng); |
254 | | |
255 | | } |
256 | | } |
257 | | else |
258 | | { |
259 | | for (int x = 0; x < lev[scale]; x++) // bis wL 0 ist, das ist bei x lev[scale] |
260 | | { |
261 | | int wL = 32 >> std::min(31, ((x << 1) >> scale)); |
262 | | dstBuf.at(x, y) = ClipPel((wL * left + (64 - wL) * dstBuf.at(x, y) + 32) >> 6, clpRng); |
263 | | } |
264 | | } |
265 | | } |
266 | | } |
267 | | else if (uiDirMode == DC_IDX) |
268 | | { |
269 | | const Pel topLeft = srcBuf.at(0, 0); |
270 | | for (int y = 0; y < iHeight; y++) |
271 | | { |
272 | | int wT = 32 >> std::min(31, ((y << 1) >> scale)); |
273 | | const Pel left = srcBuf.at(0, y + 1); |
274 | | if (wT) |
275 | | { |
276 | | |
277 | | for (int x = 0; x < iWidth; x++) |
278 | | { |
279 | | const Pel top = srcBuf.at(x + 1, 0); |
280 | | int wL = 32 >> std::min(31, ((x << 1) >> scale)); |
281 | | int wTL = (wL >> 4) + (wT >> 4); |
282 | | dstBuf.at(x, y) = ClipPel((wL * left + wT * top - wTL * topLeft + (64 - wL - wT + wTL) * dstBuf.at(x, y) + 32) >> 6, clpRng); |
283 | | } |
284 | | } |
285 | | else |
286 | | { |
287 | | for (int x = 0; x < lev[scale]; x++) |
288 | | { |
289 | | const Pel top = srcBuf.at(x + 1, 0); |
290 | | int wL = 32 >> std::min(31, ((x << 1) >> scale)); |
291 | | int wTL = (wL >> 4) + (wT >> 4); |
292 | | dstBuf.at(x, y) = ClipPel((wL * left + wT * top - wTL * topLeft + (64 - wL - wT + wTL) * dstBuf.at(x, y) + 32) >> 6, clpRng); |
293 | | } |
294 | | |
295 | | } |
296 | | } |
297 | | } |
298 | | #endif |
299 | 0 | } |
300 | | |
301 | | template<typename T> |
302 | | void IntraPredAngleCore(T* pDstBuf,const ptrdiff_t dstStride,T* refMain,int width,int height,int deltaPos,int intraPredAngle,const TFilterCoeff *ff,const bool useCubicFilter,const ClpRng& clpRng) |
303 | 0 | { |
304 | 0 | for (int y = 0; y<height; y++ ) |
305 | 0 | { |
306 | 0 | const int deltaInt = deltaPos >> 5; |
307 | 0 | const int deltaFract = deltaPos & ( 32 - 1 ); |
308 | |
|
309 | 0 | Pel p[4]; |
310 | |
|
311 | 0 | int refMainIndex = deltaInt + 1; |
312 | |
|
313 | 0 | const TFilterCoeff *f = &ff[deltaFract << 2]; |
314 | |
|
315 | 0 | for( int x = 0; x < width; x++, refMainIndex++ ) |
316 | 0 | { |
317 | 0 | p[0] = refMain[refMainIndex - 1]; |
318 | 0 | p[1] = refMain[refMainIndex ]; |
319 | 0 | p[2] = refMain[refMainIndex + 1]; |
320 | 0 | p[3] = refMain[refMainIndex + 2]; |
321 | |
|
322 | 0 | pDstBuf[y*dstStride + x] = static_cast<Pel>((static_cast<int>(f[0] * p[0]) + static_cast<int>(f[1] * p[1]) + static_cast<int>(f[2] * p[2]) + static_cast<int>(f[3] * p[3]) + 32) >> 6); |
323 | |
|
324 | 0 | if( useCubicFilter ) // only cubic filter has negative coefficients and requires clipping |
325 | 0 | { |
326 | 0 | pDstBuf[y*dstStride + x] = ClipPel( pDstBuf[y*dstStride + x], clpRng ); |
327 | 0 | } |
328 | 0 | } |
329 | 0 | deltaPos += intraPredAngle; |
330 | 0 | } |
331 | 0 | } |
332 | | |
333 | | template<typename T> |
334 | | void IntraPredAngleChroma(T* pDstBuf,const ptrdiff_t dstStride,int16_t* pBorder,int width,int height,int deltaPos,int intraPredAngle) |
335 | 0 | { |
336 | 0 | for (int y = 0; y<height; y++) |
337 | 0 | { |
338 | 0 | const int deltaInt = deltaPos >> 5; |
339 | 0 | const int deltaFract = deltaPos & (32 - 1); |
340 | | |
341 | | // Do linear filtering |
342 | 0 | const Pel *pRM = pBorder + deltaInt + 1; |
343 | 0 | int lastRefMainPel = *pRM++; |
344 | |
|
345 | 0 | for( int x = 0; x < width; pRM++, x++ ) |
346 | 0 | { |
347 | 0 | int thisRefMainPel = *pRM; |
348 | 0 | pDstBuf[x + 0] = ( Pel ) ( ( ( 32 - deltaFract )*lastRefMainPel + deltaFract*thisRefMainPel + 16 ) >> 5 ); |
349 | 0 | lastRefMainPel = thisRefMainPel; |
350 | 0 | } |
351 | 0 | deltaPos += intraPredAngle; |
352 | 0 | pDstBuf += dstStride; |
353 | 0 | } |
354 | |
|
355 | 0 | } |
356 | | |
357 | | // ==================================================================================================================== |
358 | | // Constructor / destructor / initialize |
359 | | // ==================================================================================================================== |
360 | | |
361 | 0 | IntraPrediction::IntraPrediction() : m_currChromaFormat( NUM_CHROMA_FORMAT ) |
362 | 0 | { |
363 | 0 | IntraPredAngleCore4 = IntraPredAngleCore; |
364 | 0 | IntraPredAngleCore8 = IntraPredAngleCore; |
365 | 0 | IntraPredAngleChroma4 = IntraPredAngleChroma; |
366 | 0 | IntraPredAngleChroma8 = IntraPredAngleChroma; |
367 | |
|
368 | 0 | IntraPredSampleFilter8 = IntraPredSampleFilterCore; |
369 | 0 | IntraPredSampleFilter16 = IntraPredSampleFilterCore; |
370 | |
|
371 | 0 | xPredIntraPlanar = xPredIntraPlanarCore; |
372 | |
|
373 | 0 | GetLumaRecPixel420 = GetLumaRecPixel420Core; |
374 | 0 | } |
375 | | |
376 | | IntraPrediction::~IntraPrediction() |
377 | 0 | { |
378 | 0 | destroy(); |
379 | 0 | } |
380 | | |
381 | | void IntraPrediction::destroy() |
382 | 0 | { |
383 | 0 | } |
384 | | |
385 | | void IntraPrediction::init(ChromaFormat chromaFormatIDC, const unsigned bitDepthY) |
386 | 0 | { |
387 | | // if it has been initialised before, but the chroma format has changed, release the memory and start again. |
388 | 0 | if (m_currChromaFormat != chromaFormatIDC) |
389 | 0 | { |
390 | 0 | destroy(); |
391 | 0 | } |
392 | |
|
393 | 0 | m_currChromaFormat = chromaFormatIDC; |
394 | |
|
395 | 0 | std::fill_n( m_neighborSize, 3, 0 ); |
396 | 0 | m_lastCUidx = -1; |
397 | |
|
398 | 0 | #if ENABLE_SIMD_OPT_INTRAPRED && defined( TARGET_SIMD_X86 ) |
399 | 0 | initIntraPredictionX86(); |
400 | 0 | #endif |
401 | 0 | } |
402 | | |
403 | | // ==================================================================================================================== |
404 | | // Public member functions |
405 | | // ==================================================================================================================== |
406 | | |
407 | | // Function for calculating DC value of the reference samples used in Intra prediction |
408 | | //NOTE: Bit-Limit - 25-bit source |
409 | | Pel IntraPrediction::xGetPredValDc( const CPelBuf &pSrc, const Size &dstSize, const int mrlIdx ) |
410 | 0 | { |
411 | 0 | CHECK( dstSize.width == 0 || dstSize.height == 0, "Empty area provided" ); |
412 | |
|
413 | 0 | int idx, sum = 0; |
414 | 0 | Pel dcVal; |
415 | 0 | const int width = dstSize.width; |
416 | 0 | const int height = dstSize.height; |
417 | 0 | const auto denom = (width == height) ? (width << 1) : std::max(width,height); |
418 | 0 | const auto divShift = getLog2(denom); |
419 | 0 | const auto divOffset = (denom >> 1); |
420 | |
|
421 | 0 | if( width >= height ) |
422 | 0 | { |
423 | 0 | for( idx = 0; idx < width; idx++ ) |
424 | 0 | { |
425 | 0 | sum += pSrc.at( mrlIdx + 1 + idx, 0 ); |
426 | 0 | } |
427 | 0 | } |
428 | 0 | if( width <= height ) |
429 | 0 | { |
430 | 0 | for( idx = 0; idx < height; idx++ ) |
431 | 0 | { |
432 | 0 | sum += pSrc.at( 0, mrlIdx + 1 + idx ); |
433 | 0 | } |
434 | 0 | } |
435 | |
|
436 | 0 | dcVal = (sum + divOffset) >> divShift; |
437 | 0 | return dcVal; |
438 | 0 | } |
439 | | |
440 | | int IntraPrediction::getWideAngle( int width, int height, int predMode ) |
441 | 0 | { |
442 | 0 | if ( predMode > DC_IDX && predMode <= VDIA_IDX ) |
443 | 0 | { |
444 | 0 | int modeShift[] = { 0, 6, 10, 12, 14, 15 }; |
445 | 0 | int deltaSize = abs(getLog2(width) - getLog2(height)); |
446 | 0 | if (width > height && predMode < 2 + modeShift[deltaSize]) |
447 | 0 | { |
448 | 0 | predMode += (VDIA_IDX - 1); |
449 | 0 | } |
450 | 0 | else if (height > width && predMode > VDIA_IDX - modeShift[deltaSize]) |
451 | 0 | { |
452 | 0 | predMode -= (VDIA_IDX - 1); |
453 | 0 | } |
454 | 0 | } |
455 | 0 | return predMode; |
456 | 0 | } |
457 | | |
458 | | void IntraPrediction::setReferenceArrayLengths( const CompArea &area ) |
459 | 0 | { |
460 | | // set Top and Left reference samples length |
461 | 0 | const int width = area.width; |
462 | 0 | const int height = area.height; |
463 | |
|
464 | 0 | m_leftRefLength = (height << 1); |
465 | 0 | m_topRefLength = (width << 1); |
466 | |
|
467 | 0 | } |
468 | | |
469 | | |
470 | | |
471 | | void IntraPrediction::predIntraAng( const ComponentID compID, PelBuf &piPred, const CodingUnit &cu, const bool useFilteredPredSamples ) |
472 | 0 | { |
473 | 0 | const ChannelType channelType = toChannelType( compID ); |
474 | 0 | const int iWidth = piPred.width; |
475 | 0 | const int iHeight = piPred.height; |
476 | 0 | const Size cuSize = Size( cu.blocks[compID].width, cu.blocks[compID].height ); |
477 | 0 | CHECK( CU::isMIP(cu, toChannelType(compID)), "We should not get here for MIP." ); |
478 | 0 | const uint32_t uiDirMode = isLuma( compID ) && cu.bdpcmMode() ? BDPCM_IDX : !isLuma(compID) && cu.bdpcmModeChroma() ? BDPCM_IDX : PU::getFinalIntraMode(cu, channelType); |
479 | |
|
480 | 0 | CHECKD( iWidth == 2, "Width of 2 is not supported" ); |
481 | |
|
482 | 0 | const int multiRefIdx = ( compID == COMPONENT_Y ) ? cu.multiRefIdx() : 0; |
483 | 0 | const bool useISP = cu.ispMode() && isLuma( compID ); |
484 | 0 | const int srcStride = m_topRefLength + 1 + multiRefIdx; |
485 | 0 | const int srcHStride = m_leftRefLength + 1 + multiRefIdx; |
486 | 0 | const ClpRng& clpRng ( cu.slice->clpRng( compID ) ); |
487 | 0 | bool doPDPC = ( iWidth >= MIN_TB_SIZEY && iHeight >= MIN_TB_SIZEY ) && multiRefIdx == 0; |
488 | |
|
489 | 0 | const PelBuf& srcBuf = cu.ispMode() && isLuma(compID) ? getISPBuffer( useFilteredPredSamples ) : PelBuf(getPredictorPtr(compID, useFilteredPredSamples), srcStride, srcHStride); |
490 | |
|
491 | 0 | switch (uiDirMode) |
492 | 0 | { |
493 | 0 | case(PLANAR_IDX): xPredIntraPlanar(srcBuf, piPred, *cu.sps); break; |
494 | 0 | case(DC_IDX): xPredIntraDc (srcBuf, piPred, channelType, false, multiRefIdx); break; |
495 | 0 | case(BDPCM_IDX): xPredIntraBDPCM(srcBuf, piPred, isLuma(compID) ? cu.bdpcmMode() : cu.bdpcmModeChroma(), clpRng); break; |
496 | 0 | case(2): |
497 | 0 | case(DIA_IDX): |
498 | 0 | case(VDIA_IDX): |
499 | 0 | if (getWideAngle(useISP ? cuSize.width : iWidth, useISP ? cuSize.height : iHeight, uiDirMode) == static_cast<int>(uiDirMode)) // check if uiDirMode is not wide-angle |
500 | 0 | { |
501 | 0 | xPredIntraAng(srcBuf, piPred, channelType, uiDirMode, clpRng, *cu.sps, multiRefIdx, useFilteredPredSamples, doPDPC, useISP, cuSize ); |
502 | 0 | break; |
503 | 0 | } |
504 | 0 | default: xPredIntraAng(srcBuf, piPred, channelType, uiDirMode, clpRng, *cu.sps, multiRefIdx, useFilteredPredSamples, doPDPC, useISP, cuSize); break; |
505 | 0 | } |
506 | | |
507 | 0 | if( doPDPC && (uiDirMode == PLANAR_IDX || uiDirMode == DC_IDX ) ) |
508 | 0 | { |
509 | 0 | if (iWidth>8) |
510 | 0 | IntraPredSampleFilter16(srcBuf.buf,srcBuf.stride,piPred,uiDirMode,clpRng); |
511 | 0 | else |
512 | 0 | IntraPredSampleFilter8(srcBuf.buf,srcBuf.stride,piPred,uiDirMode,clpRng); |
513 | 0 | } |
514 | 0 | } |
515 | | |
516 | | void IntraPrediction::predIntraChromaLM( const ComponentID compID, PelBuf& piPred, const CodingUnit& cu, const CompArea& chromaArea, int intraDir ) |
517 | 0 | { |
518 | 0 | int iLumaStride = 0; |
519 | 0 | PelBuf Temp; |
520 | 0 | if( (intraDir == MDLM_L_IDX) || (intraDir == MDLM_T_IDX) ) |
521 | 0 | { |
522 | 0 | iLumaStride = 2 * MAX_TU_SIZE_FOR_PROFILE + 1; |
523 | 0 | Temp = PelBuf( m_piYuvExt[1] + iLumaStride + 1, iLumaStride, Size( chromaArea ) ); |
524 | 0 | } |
525 | 0 | else |
526 | 0 | { |
527 | 0 | iLumaStride = MAX_TU_SIZE_FOR_PROFILE + 1; |
528 | 0 | Temp = PelBuf( m_piYuvExt[1] + iLumaStride + 1, iLumaStride, Size( chromaArea ) ); |
529 | 0 | } |
530 | 0 | int a, b, iShift; |
531 | 0 | xGetLMParameters( cu, compID, chromaArea, a, b, iShift ); |
532 | | |
533 | | ////// final prediction |
534 | 0 | piPred.copyFrom( Temp ); |
535 | 0 | piPred.linearTransform( a, iShift, b, true, cu.slice->clpRng( compID ) ); |
536 | 0 | } |
537 | | |
538 | | void IntraPrediction::xPredIntraDc( const CPelBuf &pSrc, PelBuf &pDst, const ChannelType channelType, const bool enableBoundaryFilter, const int mrlIdx ) |
539 | 0 | { |
540 | 0 | const Pel dcval = xGetPredValDc( pSrc, pDst, mrlIdx ); |
541 | 0 | pDst.fill( dcval ); |
542 | 0 | } |
543 | | |
544 | | // Function for deriving the angular Intra predictions |
545 | | void IntraPredAngleCore(Pel *pDstBuf,const int dstStride,Pel* refMain,int width,int height,int deltaPos,int intraPredAngle,const TFilterCoeff *ff,const bool useCubicFilter,const ClpRng& clpRng) |
546 | 0 | { |
547 | 0 | for (int y = 0; y<height; y++ ) |
548 | 0 | { |
549 | 0 | const int deltaInt = deltaPos >> 5; |
550 | 0 | const int deltaFract = deltaPos & ( 32 - 1 ); |
551 | |
|
552 | 0 | Pel p[4]; |
553 | |
|
554 | 0 | int refMainIndex = deltaInt + 1; |
555 | |
|
556 | 0 | const TFilterCoeff *f = &ff[deltaFract << 2]; |
557 | |
|
558 | 0 | for( int x = 0; x < width; x++, refMainIndex++ ) |
559 | 0 | { |
560 | 0 | p[0] = refMain[refMainIndex - 1]; |
561 | 0 | p[1] = refMain[refMainIndex ]; |
562 | 0 | p[2] = refMain[refMainIndex + 1]; |
563 | 0 | p[3] = refMain[refMainIndex + 2]; |
564 | |
|
565 | 0 | pDstBuf[y*dstStride + x] = static_cast<Pel>((static_cast<int>(f[0] * p[0]) + static_cast<int>(f[1] * p[1]) + static_cast<int>(f[2] * p[2]) + static_cast<int>(f[3] * p[3]) + 32) >> 6); |
566 | |
|
567 | 0 | if( useCubicFilter ) // only cubic filter has negative coefficients and requires clipping |
568 | 0 | { |
569 | 0 | pDstBuf[y*dstStride + x] = ClipPel( pDstBuf[y*dstStride + x], clpRng ); |
570 | 0 | } |
571 | 0 | } |
572 | 0 | deltaPos += intraPredAngle; |
573 | 0 | } |
574 | 0 | } |
575 | | |
576 | | |
577 | | /** Function for deriving the simplified angular intra predictions. |
578 | | * |
579 | | * This function derives the prediction samples for the angular mode based on the prediction direction indicated by |
580 | | * the prediction mode index. The prediction direction is given by the displacement of the bottom row of the block and |
581 | | * the reference row above the block in the case of vertical prediction or displacement of the rightmost column |
582 | | * of the block and reference column left from the block in the case of the horizontal prediction. The displacement |
583 | | * is signalled at 1/32 pixel accuracy. When projection of the predicted pixel falls inbetween reference samples, |
584 | | * the predicted value for the pixel is linearly interpolated from the reference samples. All reference samples are taken |
585 | | * from the extended main reference. |
586 | | */ |
587 | | //NOTE: Bit-Limit - 25-bit source |
588 | | |
589 | | void IntraPrediction::xPredIntraAng( const CPelBuf &pSrc, PelBuf &pDst, const ChannelType channelType, const uint32_t dirMode, const ClpRng& clpRng, const SPS& sps, |
590 | | int multiRefIdx, |
591 | | const bool useFilteredPredSamples , |
592 | | bool &doPDPC, |
593 | | const bool useISP, |
594 | | const Size cuSize |
595 | | ) |
596 | 0 | { |
597 | 0 | int width =int(pDst.width); |
598 | 0 | int height=int(pDst.height); |
599 | |
|
600 | 0 | CHECK( !( dirMode > DC_IDX && dirMode < NUM_LUMA_MODE ), "Invalid intra dir" ); |
601 | 0 | int predMode = useISP ? getWideAngle( cuSize.width, cuSize.height, dirMode ) : getWideAngle( width, height, dirMode ); |
602 | 0 | const bool bIsModeVer = predMode >= DIA_IDX; |
603 | 0 | const int intraPredAngleMode = (bIsModeVer) ? predMode - VER_IDX : -(predMode - HOR_IDX); |
604 | 0 | const int absAngMode = abs(intraPredAngleMode); |
605 | 0 | const int signAng = intraPredAngleMode < 0 ? -1 : 1; |
606 | | |
607 | | // Set bitshifts and scale the angle parameter to block size |
608 | 0 | static const int angTable[32] = { 0, 1, 2, 3, 4, 6, 8, 10, 12, 14, 16, 18, 20, 23, 26, 29, 32, 35, 39, 45, 51, 57, 64, 73, 86, 102, 128, 171, 256, 341, 512, 1024 }; |
609 | 0 | static const int invAngTable[32] = { |
610 | 0 | 0, 16384, 8192, 5461, 4096, 2731, 2048, 1638, 1365, 1170, 1024, 910, 819, 712, 630, 565, |
611 | 0 | 512, 468, 420, 364, 321, 287, 256, 224, 191, 161, 128, 96, 64, 48, 32, 16 |
612 | 0 | }; // (512 * 32) / Angle |
613 | 0 | int invAngle = invAngTable[absAngMode]; |
614 | 0 | int absAng = angTable [absAngMode]; |
615 | 0 | int intraPredAngle = signAng * absAng; |
616 | |
|
617 | 0 | Pel* refMain; |
618 | 0 | Pel* refSide; |
619 | |
|
620 | 0 | Pel refAbove[2 * MAX_CU_SIZE + 3 + 33 * MAX_REF_LINE_IDX]; |
621 | 0 | Pel refLeft [2 * MAX_CU_SIZE + 3 + 33 * MAX_REF_LINE_IDX]; |
622 | | |
623 | | // Initialize the Main and Left reference array. |
624 | 0 | if (intraPredAngle < 0) |
625 | 0 | { |
626 | 0 | for (int x = 0; x <= width + 1 + multiRefIdx; x++) |
627 | 0 | { |
628 | 0 | refAbove[x + height] = pSrc.at(x, 0); |
629 | 0 | } |
630 | 0 | for (int y = 0; y <= height + 1 + multiRefIdx; y++) |
631 | 0 | { |
632 | 0 | refLeft[y + width] = pSrc.at(0, y); |
633 | 0 | } |
634 | 0 | refMain = bIsModeVer ? refAbove + height : refLeft + width; |
635 | 0 | refSide = bIsModeVer ? refLeft + width : refAbove + height; |
636 | | |
637 | | // Extend the Main reference to the left. |
638 | 0 | int sizeSide = bIsModeVer ? height : width; |
639 | 0 | for (int k = -sizeSide; k <= -1; k++) |
640 | 0 | { |
641 | 0 | refMain[k] = refSide[std::min((-k * invAngle + 256) >> 9, sizeSide)]; |
642 | 0 | } |
643 | 0 | } |
644 | 0 | else |
645 | 0 | { |
646 | | //for (int x = 0; x <= m_topRefLength + multiRefIdx; x++) |
647 | | //{ |
648 | | // refAbove[x] = pSrc.at(x, 0); |
649 | | //} |
650 | 0 | memcpy( refAbove, pSrc.buf, ( m_topRefLength + multiRefIdx + 1 ) * sizeof( Pel ) ); |
651 | 0 | for (int y = 0; y <= m_leftRefLength + multiRefIdx; y++) |
652 | 0 | { |
653 | 0 | refLeft[y] = pSrc.at(0, y); |
654 | 0 | } |
655 | |
|
656 | 0 | refMain = bIsModeVer ? refAbove : refLeft; |
657 | 0 | refSide = bIsModeVer ? refLeft : refAbove; |
658 | | |
659 | | // Extend main reference to right using replication |
660 | 0 | const int log2Ratio = getLog2(width) - getLog2(height); |
661 | 0 | const int s = std::max<int>(0, bIsModeVer ? log2Ratio : -log2Ratio); |
662 | 0 | const int maxIndex = (multiRefIdx << s) + 2; |
663 | 0 | const int refLength = bIsModeVer ? m_topRefLength : m_leftRefLength; |
664 | 0 | const Pel val = refMain[refLength + multiRefIdx]; |
665 | 0 | for (int z = 1; z <= maxIndex; z++) |
666 | 0 | { |
667 | 0 | refMain[refLength + multiRefIdx + z] = val; |
668 | 0 | } |
669 | 0 | } |
670 | | |
671 | | // swap width/height if we are doing a horizontal mode: |
672 | 0 | Pel tempArray[MAX_TB_SIZEY*MAX_TB_SIZEY]; |
673 | 0 | const ptrdiff_t dstStride = bIsModeVer ? pDst.stride : MAX_TB_SIZEY; |
674 | 0 | Pel *pDstBuf = bIsModeVer ? pDst.buf : tempArray; |
675 | 0 | if (!bIsModeVer) |
676 | 0 | { |
677 | 0 | std::swap(width, height); |
678 | 0 | } |
679 | | |
680 | | // compensate for line offset in reference line buffers |
681 | 0 | refMain += multiRefIdx; |
682 | 0 | refSide += multiRefIdx; |
683 | |
|
684 | 0 | if( intraPredAngle == 0 ) // pure vertical or pure horizontal |
685 | 0 | { |
686 | 0 | if( doPDPC ) |
687 | 0 | { |
688 | 0 | const int scale = ( ( getLog2( width ) - 2 + getLog2( height ) - 2 + 2 ) >> 2 ); |
689 | 0 | CHECK(scale < 0 || scale > 31, "PDPC: scale < 0 || scale > 31"); |
690 | 0 | const int lev[4]={std::min(3,width),std::min(6,width),std::min(12,width),std::min(24,width)}; |
691 | |
|
692 | 0 | const Pel topLeft = pSrc.at(0, 0); |
693 | 0 | for( int y = 0; y < height; y++ ) |
694 | 0 | { |
695 | 0 | const Pel left = refSide[y + 1]; |
696 | 0 | Pel *line = &pDstBuf[y * dstStride]; |
697 | 0 | for( int x = 0; x < lev[scale]; x++ ) |
698 | 0 | { |
699 | 0 | int wL = 32 >> std::min( 31, ( ( x << 1 ) >> scale ) ); |
700 | 0 | *line++ = ClipPel( ( wL * ( left - topLeft ) + ( refMain[x + 1] << 6 ) + 32 ) >> 6, clpRng ); |
701 | 0 | } |
702 | 0 | memcpy( line, refMain + lev[scale] + 1, ( width - lev[scale] ) * sizeof( Pel ) ); |
703 | 0 | } |
704 | 0 | } |
705 | 0 | else |
706 | 0 | { |
707 | 0 | for( int y = 0; y < height; y++ ) |
708 | 0 | { |
709 | 0 | memcpy( pDstBuf + y * dstStride, refMain + 1, width * sizeof( Pel ) ); |
710 | 0 | } |
711 | |
|
712 | 0 | } |
713 | 0 | } |
714 | 0 | else |
715 | 0 | { |
716 | 0 | Pel *pDsty=pDstBuf; |
717 | |
|
718 | 0 | if( !(0 == (absAng & 0x1F)) ) |
719 | 0 | { |
720 | 0 | if( isLuma(channelType) ) |
721 | 0 | { |
722 | 0 | int deltaPos = intraPredAngle * (1 + multiRefIdx); |
723 | 0 | bool interpolationFlag = false, filterFlag = false; |
724 | 0 | const int diff = std::min<int>( abs( predMode - HOR_IDX ), abs( predMode - VER_IDX ) ); |
725 | 0 | const int log2Size = ((getLog2(width) + getLog2(height)) >> 1); |
726 | 0 | CHECKD( log2Size >= MAX_INTRA_FILTER_DEPTHS, "Size not supported" ); |
727 | 0 | filterFlag = (diff > m_aucIntraFilter[channelType][log2Size]); |
728 | |
|
729 | 0 | if( filterFlag ) |
730 | 0 | { |
731 | 0 | const bool isRefFilter = 0 == ( absAng & 0x1F ); |
732 | 0 | interpolationFlag = !isRefFilter; |
733 | 0 | } |
734 | 0 | const bool useCubicFilter = useISP ? true : ( !interpolationFlag || multiRefIdx > 0 ); |
735 | 0 | const TFilterCoeff *f = (useCubicFilter) ? InterpolationFilter::getChromaFilterTable(0) : g_intraGaussFilter[0]; |
736 | 0 | if( ( width & 7 ) == 0 ) |
737 | 0 | { |
738 | 0 | IntraPredAngleCore8(pDstBuf,dstStride,refMain,width,height,deltaPos,intraPredAngle,f,useCubicFilter,clpRng); |
739 | |
|
740 | 0 | } |
741 | 0 | else if( ( width & 3 ) == 0 ) |
742 | 0 | { |
743 | 0 | IntraPredAngleCore4(pDstBuf,dstStride,refMain,width,height,deltaPos,intraPredAngle,f,useCubicFilter,clpRng); |
744 | 0 | } |
745 | 0 | else |
746 | 0 | { |
747 | 0 | CHECK( !useISP, "should not happen" ); |
748 | |
|
749 | 0 | for (int y = 0; y<height; y++ ) |
750 | 0 | { |
751 | 0 | const int deltaInt = deltaPos >> 5; |
752 | 0 | const int deltaFract = deltaPos & ( 32 - 1 ); |
753 | |
|
754 | 0 | Pel p[4]; |
755 | |
|
756 | 0 | int refMainIndex = deltaInt + 1; |
757 | |
|
758 | 0 | const TFilterCoeff *ff = &f[deltaFract << 2]; |
759 | |
|
760 | 0 | for( int x = 0; x < width; x++, refMainIndex++ ) |
761 | 0 | { |
762 | 0 | p[0] = refMain[refMainIndex - 1]; |
763 | 0 | p[1] = refMain[refMainIndex ]; |
764 | 0 | p[2] = refMain[refMainIndex + 1]; |
765 | 0 | p[3] = refMain[refMainIndex + 2]; |
766 | |
|
767 | 0 | pDstBuf[y*dstStride + x] = static_cast<Pel>((static_cast<int>(ff[0] * p[0]) + static_cast<int>(ff[1] * p[1]) + static_cast<int>(ff[2] * p[2]) + static_cast<int>(ff[3] * p[3]) + 32) >> 6); |
768 | |
|
769 | 0 | if( useCubicFilter ) // only cubic filter has negative coefficients and requires clipping |
770 | 0 | { |
771 | 0 | pDstBuf[y*dstStride + x] = ClipPel( pDstBuf[y*dstStride + x], clpRng ); |
772 | 0 | } |
773 | 0 | } |
774 | 0 | deltaPos += intraPredAngle; |
775 | 0 | } |
776 | 0 | } |
777 | |
|
778 | 0 | } |
779 | 0 | else |
780 | 0 | { |
781 | 0 | int deltaPos = intraPredAngle * (1 + multiRefIdx); |
782 | 0 | if ( width >=8 ) |
783 | 0 | { |
784 | 0 | IntraPredAngleChroma8(pDstBuf,dstStride,refMain,width,height,deltaPos,intraPredAngle); |
785 | 0 | } |
786 | 0 | else if( width == 4 ) |
787 | 0 | { |
788 | 0 | IntraPredAngleChroma4(pDstBuf,dstStride,refMain,width,height,deltaPos,intraPredAngle); |
789 | 0 | } |
790 | 0 | else |
791 | 0 | { |
792 | 0 | IntraPredAngleChroma(pDstBuf,dstStride,refMain,width,height,deltaPos,intraPredAngle); |
793 | 0 | } |
794 | 0 | } |
795 | |
|
796 | 0 | } |
797 | 0 | else |
798 | 0 | { |
799 | 0 | for (int y = 0, deltaPos = intraPredAngle * (1 + multiRefIdx); y<height; y++, deltaPos += intraPredAngle, pDsty += dstStride) |
800 | 0 | { |
801 | 0 | const int deltaInt = deltaPos >> 5; |
802 | | // Just copy the integer samples |
803 | 0 | memcpy(pDsty,refMain + deltaInt + 1,width*sizeof(Pel)); |
804 | 0 | } |
805 | 0 | } |
806 | | |
807 | 0 | pDsty=pDstBuf; |
808 | 0 | for (int y = 0, deltaPos = intraPredAngle * (1 + multiRefIdx); y<height; y++, deltaPos += intraPredAngle, pDsty += dstStride) |
809 | 0 | { |
810 | 0 | int angularScale = 0; |
811 | 0 | if( intraPredAngle < 0 ) |
812 | 0 | { |
813 | 0 | doPDPC = false; |
814 | 0 | } |
815 | 0 | else if( intraPredAngle > 0 ) |
816 | 0 | { |
817 | 0 | const int sideSize = predMode >= DIA_IDX ? pDst.height : pDst.width; |
818 | 0 | const int maxScale = 2; |
819 | | |
820 | 0 | angularScale = std::min(maxScale, getLog2(sideSize) - (getLog2(3 * invAngle - 2) - 8)); |
821 | 0 | doPDPC &= angularScale >= 0; |
822 | 0 | } |
823 | | |
824 | 0 | if( doPDPC ) |
825 | 0 | { |
826 | 0 | int invAngleSum = 256; |
827 | |
|
828 | 0 | for (int x = 0; x < std::min(3 << angularScale, width); x++) |
829 | 0 | { |
830 | 0 | invAngleSum += invAngle; |
831 | |
|
832 | 0 | int wL = 32 >> (2 * x >> angularScale); |
833 | 0 | Pel left = refSide[y + (invAngleSum >> 9) + 1]; |
834 | 0 | pDsty[x] = pDsty[x] + ((wL * (left - pDsty[x]) + 32) >> 6); |
835 | 0 | } |
836 | 0 | } |
837 | 0 | } |
838 | 0 | } |
839 | | |
840 | | // Flip the block if this is the horizontal mode |
841 | 0 | if( !bIsModeVer ) |
842 | 0 | { |
843 | 0 | pDst.transposedFrom( CPelBuf( pDstBuf, dstStride, width, height ) ); |
844 | 0 | } |
845 | 0 | } |
846 | | |
847 | | void IntraPrediction::xPredIntraBDPCM(const CPelBuf &pSrc, PelBuf &pDst, const uint32_t dirMode, const ClpRng& clpRng ) |
848 | 0 | { |
849 | 0 | const int wdt = pDst.width; |
850 | 0 | const int hgt = pDst.height; |
851 | |
|
852 | 0 | const ptrdiff_t strideP = pDst.stride; |
853 | 0 | const ptrdiff_t strideS = pSrc.stride; |
854 | |
|
855 | 0 | CHECK( !( dirMode == 1 || dirMode == 2 ), "Incorrect BDPCM mode parameter." ); |
856 | |
|
857 | 0 | Pel* pred = &pDst.buf[0]; |
858 | 0 | if( dirMode == 1 ) |
859 | 0 | { |
860 | 0 | Pel val; |
861 | 0 | for( int y = 0; y < hgt; y++ ) |
862 | 0 | { |
863 | 0 | val = pSrc.buf[(y + 1) * strideS]; |
864 | 0 | for( int x = 0; x < wdt; x++ ) |
865 | 0 | { |
866 | 0 | pred[x] = val; |
867 | 0 | } |
868 | 0 | pred += strideP; |
869 | 0 | } |
870 | 0 | } |
871 | 0 | else |
872 | 0 | { |
873 | 0 | for( int y = 0; y < hgt; y++ ) |
874 | 0 | { |
875 | 0 | for( int x = 0; x < wdt; x++ ) |
876 | 0 | { |
877 | 0 | pred[x] = pSrc.buf[x + 1]; |
878 | 0 | } |
879 | 0 | pred += strideP; |
880 | 0 | } |
881 | 0 | } |
882 | 0 | } |
883 | | |
884 | | void IntraPrediction::predBlendIntraCiip( PelUnitBuf &predUnit, const CodingUnit &cu ) |
885 | 0 | { |
886 | 0 | int maxCompID = 1; |
887 | |
|
888 | 0 | if( isChromaEnabled( cu.chromaFormat ) && cu.chromaSize().width > 2 ) |
889 | 0 | { |
890 | 0 | maxCompID = MAX_NUM_COMPONENT; |
891 | 0 | } |
892 | |
|
893 | 0 | for( int currCompID = 0; currCompID < maxCompID; currCompID++ ) |
894 | 0 | { |
895 | 0 | PelBuf& pred = predUnit.bufs[ currCompID ]; |
896 | 0 | const int width = pred.width; |
897 | 0 | const int height = pred.height; |
898 | 0 | const ptrdiff_t srcStride = width; |
899 | 0 | const ptrdiff_t dstStride = pred.stride; |
900 | 0 | Pel* dstBuf = pred.buf; |
901 | 0 | const bool isUseFilter = currCompID == 0 && IntraPrediction::useFilteredIntraRefSamples( COMPONENT_Y, cu, cu ); |
902 | 0 | Pel* srcBuf = m_piYuvExt[!isUseFilter]; |
903 | 0 | PelBuf srcAreaBuf( srcBuf, srcStride, width, height ); |
904 | |
|
905 | 0 | { |
906 | 0 | PROFILER_SCOPE_AND_STAGE_EXT( 1, g_timeProfiler, P_INTRAPRED, *cu.cs, compID ); |
907 | |
|
908 | 0 | initIntraPatternChType( cu.firstTU, cu.blocks[currCompID], isUseFilter ); |
909 | 0 | predIntraAng( ComponentID( currCompID ), srcAreaBuf, cu, isUseFilter ); |
910 | 0 | } |
911 | |
|
912 | 0 | CHECKD( width == 2, "Width of 2 is not supported" ); |
913 | |
|
914 | 0 | const Position posBL = cu.Y().bottomLeft(); |
915 | 0 | const Position posTR = cu.Y().topRight(); |
916 | |
|
917 | 0 | const CodingUnit* cuLeft = cu.cs->getCURestricted( posBL.offset( -1, 0 ), cu, CHANNEL_TYPE_LUMA, cu.left ); |
918 | 0 | const CodingUnit* cuAbove = cu.cs->getCURestricted( posTR.offset( 0, -1 ), cu, CHANNEL_TYPE_LUMA, cu.above ); |
919 | |
|
920 | 0 | const bool isNeigh0Intra = cuLeft && ( CU::isIntra( *cuLeft ) ); |
921 | 0 | const bool isNeigh1Intra = cuAbove && ( CU::isIntra( *cuAbove ) ); |
922 | |
|
923 | 0 | const int wIntra = 3 - !isNeigh0Intra - !isNeigh1Intra; |
924 | 0 | const int wMerge = 3 - !!isNeigh0Intra - !!isNeigh1Intra; |
925 | |
|
926 | 0 | for( int y = 0; y < height; y++ ) |
927 | 0 | { |
928 | 0 | for( int x = 0; x < width; x += 4 ) |
929 | 0 | { |
930 | 0 | dstBuf[y * dstStride + x + 0] = ( wMerge * dstBuf[y * dstStride + x + 0] + wIntra * srcBuf[y * srcStride + x + 0] + 2 ) >> 2; |
931 | 0 | dstBuf[y * dstStride + x + 1] = ( wMerge * dstBuf[y * dstStride + x + 1] + wIntra * srcBuf[y * srcStride + x + 1] + 2 ) >> 2; |
932 | 0 | dstBuf[y * dstStride + x + 2] = ( wMerge * dstBuf[y * dstStride + x + 2] + wIntra * srcBuf[y * srcStride + x + 2] + 2 ) >> 2; |
933 | 0 | dstBuf[y * dstStride + x + 3] = ( wMerge * dstBuf[y * dstStride + x + 3] + wIntra * srcBuf[y * srcStride + x + 3] + 2 ) >> 2; |
934 | 0 | } |
935 | 0 | } |
936 | 0 | } |
937 | 0 | } |
938 | | |
939 | | inline int isAboveAvailable(const TransformUnit &tu, const ChannelType &chType, const Position &posLT, |
940 | | const uint32_t uiNumUnitsInPU, const uint32_t unitWidth); |
941 | | inline int isLeftAvailable(const TransformUnit &tu, const ChannelType &chType, const Position &posLT, |
942 | | const uint32_t uiNumUnitsInPU, const uint32_t unitWidth); |
943 | | |
944 | | void IntraPrediction::initIntraPatternChType(const TransformUnit &tu, const CompArea &area, const bool bFilterRefSamples) |
945 | 0 | { |
946 | 0 | CHECK( area.width == 2, "Width of 2 is not supported" ); |
947 | 0 | const CodingStructure& cs = *tu.cu->cs; |
948 | |
|
949 | 0 | Pel *refBufUnfiltered = m_piYuvExt[PRED_BUF_UNFILTERED]; |
950 | 0 | Pel *refBufFiltered = m_piYuvExt[PRED_BUF_FILTERED]; |
951 | |
|
952 | 0 | setReferenceArrayLengths( area ); |
953 | | |
954 | | // ----- Step 1: unfiltered reference samples ----- |
955 | 0 | xFillReferenceSamples( cs.picture->getRecoBuf( area ), refBufUnfiltered, area, tu ); |
956 | | // ----- Step 2: filtered reference samples ----- |
957 | 0 | if( bFilterRefSamples ) |
958 | 0 | { |
959 | 0 | xFilterReferenceSamples( refBufUnfiltered, refBufFiltered, area, *cs.sps , tu.cu->multiRefIdx() ); |
960 | 0 | } |
961 | 0 | } |
962 | | |
963 | | void IntraPrediction::initIntraPatternChTypeISP(const CodingUnit& cu, const CompArea& area, PelBuf& recBuf) |
964 | 0 | { |
965 | 0 | const CodingStructure& cs = *cu.cs; |
966 | |
|
967 | 0 | const Position &posLT = area.pos(); |
968 | 0 | bool isLeftAvail = nullptr != cs.getCURestricted( posLT.offset( -1, 0 ), cu, CH_L, posLT.x == cu.lx() ? cu.left : &cu ); |
969 | 0 | bool isAboveAvail = nullptr != cs.getCURestricted( posLT.offset( 0, -1 ), cu, CH_L, posLT.y == cu.ly() ? cu.left : &cu ); |
970 | | |
971 | | // ----- Step 1: unfiltered reference samples ----- |
972 | 0 | if( cu.blocks[area.compID()].x == area.x && cu.blocks[area.compID()].y == area.y ) |
973 | 0 | { |
974 | 0 | Pel* refBufUnfiltered = m_piYuvExt[PRED_BUF_UNFILTERED]; |
975 | | // With the first subpartition all the CU reference samples are fetched at once in a single call to xFillReferenceSamples |
976 | 0 | if( cu.ispMode() == HOR_INTRA_SUBPARTITIONS ) |
977 | 0 | { |
978 | 0 | m_leftRefLength = cu.Y().height << 1; |
979 | 0 | m_topRefLength = cu.Y().width + area.width; |
980 | 0 | } |
981 | 0 | else //if (cu.ispMode() == VER_INTRA_SUBPARTITIONS) |
982 | 0 | { |
983 | 0 | m_leftRefLength = cu.Y().height + area.height; |
984 | 0 | m_topRefLength = cu.Y().width << 1; |
985 | 0 | } |
986 | |
|
987 | 0 | const int srcStride = m_topRefLength + 1; |
988 | 0 | const int srcHStride = m_leftRefLength + 1; |
989 | |
|
990 | 0 | m_pelBufISP[0] = m_pelBufISPBase[0] = PelBuf(m_piYuvExt[PRED_BUF_UNFILTERED], srcStride, srcHStride); |
991 | 0 | m_pelBufISP[1] = m_pelBufISPBase[1] = PelBuf(m_piYuvExt[PRED_BUF_FILTERED], srcStride, srcHStride); |
992 | |
|
993 | 0 | xFillReferenceSamples( cs.picture->getRecoBuf( cu.Y() ), refBufUnfiltered, cu.Y(), isLuma( area.compID() ) ? cu.firstTU : *cu.lastTU ); |
994 | | |
995 | | // After having retrieved all the CU reference samples, the number of reference samples is now adjusted for the current subpartition |
996 | 0 | m_topRefLength = cu.blocks[area.compID()].width + area.width; |
997 | 0 | m_leftRefLength = cu.blocks[area.compID()].height + area.height; |
998 | 0 | } |
999 | 0 | else |
1000 | 0 | { |
1001 | | //Now we only need to fetch the newly available reconstructed samples from the previously coded TU |
1002 | 0 | Position tuPos = area; |
1003 | 0 | tuPos.relativeTo(cu.Y()); |
1004 | 0 | m_pelBufISP[0] = m_pelBufISPBase[0].subBuf(tuPos, area.size()); |
1005 | 0 | m_pelBufISP[1] = m_pelBufISPBase[1].subBuf(tuPos, area.size()); |
1006 | |
|
1007 | 0 | PelBuf& dstBuf = m_pelBufISP[0]; |
1008 | |
|
1009 | 0 | m_topRefLength = cu.blocks[area.compID()].width + area.width; |
1010 | 0 | m_leftRefLength = cu.blocks[area.compID()].height + area.height; |
1011 | |
|
1012 | 0 | const int predSizeHor = m_topRefLength; |
1013 | 0 | const int predSizeVer = m_leftRefLength; |
1014 | 0 | if (cu.ispMode() == HOR_INTRA_SUBPARTITIONS) |
1015 | 0 | { |
1016 | 0 | Pel* src = recBuf.bufAt(0, -1); |
1017 | 0 | Pel* dst = dstBuf.bufAt(1, 0); |
1018 | 0 | for (int i = 0; i < area.width; i++) |
1019 | 0 | { |
1020 | 0 | dst[i] = src[i]; |
1021 | 0 | } |
1022 | 0 | Pel sample = src[area.width - 1]; |
1023 | 0 | dst += area.width; |
1024 | 0 | for (int i = 0; i < predSizeHor - area.width; i++) |
1025 | 0 | { |
1026 | 0 | dst[i] = sample; |
1027 | 0 | } |
1028 | 0 | if (!isLeftAvail) //if left is not avaible, then it is necessary to fetch these samples for each subpartition |
1029 | 0 | { |
1030 | 0 | Pel* dst = dstBuf.bufAt(0, 0); |
1031 | 0 | Pel sample = src[0]; |
1032 | 0 | for (int i = 0; i < predSizeVer + 1; i++) |
1033 | 0 | { |
1034 | 0 | *dst = sample; |
1035 | 0 | dst += dstBuf.stride; |
1036 | 0 | } |
1037 | 0 | } |
1038 | 0 | } |
1039 | 0 | else |
1040 | 0 | { |
1041 | 0 | Pel* src = recBuf.bufAt(-1, 0); |
1042 | 0 | Pel* dst = dstBuf.bufAt(0, 1); |
1043 | 0 | for (int i = 0; i < area.height; i++) |
1044 | 0 | { |
1045 | 0 | *dst = *src; |
1046 | 0 | src += recBuf.stride; |
1047 | 0 | dst += dstBuf.stride; |
1048 | 0 | } |
1049 | 0 | Pel sample = src[-recBuf.stride]; |
1050 | 0 | for (int i = 0; i < predSizeVer - area.height; i++) |
1051 | 0 | { |
1052 | 0 | *dst = sample; |
1053 | 0 | dst += dstBuf.stride; |
1054 | 0 | } |
1055 | |
|
1056 | 0 | if (!isAboveAvail) //if above is not avaible, then it is necessary to fetch these samples for each subpartition |
1057 | 0 | { |
1058 | 0 | Pel* dst = dstBuf.bufAt(0, 0); |
1059 | 0 | Pel sample = recBuf.at(-1, 0); |
1060 | 0 | for (int i = 0; i < predSizeHor + 1; i++) |
1061 | 0 | { |
1062 | 0 | dst[i] = sample; |
1063 | 0 | } |
1064 | 0 | } |
1065 | 0 | } |
1066 | 0 | } |
1067 | 0 | } |
1068 | | |
1069 | | void IntraPrediction::xFillReferenceSamples( const CPelBuf &recoBuf, Pel* refBufUnfiltered, const CompArea &area, const TransformUnit &tu ) |
1070 | 0 | { |
1071 | 0 | const ChannelType chType = toChannelType( area.compID() ); |
1072 | 0 | const CodingUnit &cu = *tu.cu; |
1073 | 0 | const CodingStructure &cs = *cu.cs; |
1074 | 0 | const SPS &sps = *cs.sps; |
1075 | 0 | const PreCalcValues &pcv = *cs.pcv; |
1076 | |
|
1077 | 0 | const int multiRefIdx = (area.compID() == COMPONENT_Y) ? cu.multiRefIdx() : 0; |
1078 | |
|
1079 | 0 | const int tuWidth = area.width; |
1080 | 0 | const int tuHeight = area.height; |
1081 | 0 | const int predSize = m_topRefLength; |
1082 | 0 | const int predHSize = m_leftRefLength; |
1083 | 0 | const int predStride = predSize + 1 + multiRefIdx; |
1084 | |
|
1085 | 0 | const int csx = getChannelTypeScaleX( chType, pcv.chrFormat ); |
1086 | 0 | const int csy = getChannelTypeScaleY( chType, pcv.chrFormat ); |
1087 | |
|
1088 | 0 | const int unitWidth = pcv.minCUWidth >> csx; |
1089 | 0 | const int unitHeight = pcv.minCUHeight >> csy; |
1090 | |
|
1091 | 0 | const int totalAboveUnits = (predSize + (unitWidth - 1)) / unitWidth; |
1092 | 0 | const int totalLeftUnits = (predHSize + (unitHeight - 1)) / unitHeight; |
1093 | 0 | const int totalUnits = totalAboveUnits + totalLeftUnits + 1; //+1 for top-left |
1094 | 0 | const int numAboveUnits = tuWidth / unitWidth; |
1095 | 0 | const int numLeftUnits = tuHeight / unitHeight; |
1096 | 0 | const int numAboveRightUnits = totalAboveUnits - numAboveUnits; |
1097 | 0 | const int numLeftBelowUnits = totalLeftUnits - numLeftUnits; |
1098 | |
|
1099 | 0 | CHECK( numAboveUnits <= 0 || numLeftUnits <= 0 || numAboveRightUnits <= 0 || numLeftBelowUnits <= 0, "Size not supported" ); |
1100 | | |
1101 | | // ----- Step 1: analyze neighborhood ----- |
1102 | 0 | if( m_lastCUidx == tu.cu->idx && area.compID() != getFirstComponentOfChannel( tu.cu->chType() ) ) |
1103 | 0 | { |
1104 | 0 | } |
1105 | 0 | else |
1106 | 0 | { |
1107 | 0 | const Position posLT = area.pos(); |
1108 | | |
1109 | 0 | const bool sameCTUx = !!( posLT.x & ( pcv.maxCUWidthMask >> csx ) ); |
1110 | 0 | const bool sameCTUy = !!( posLT.y & ( pcv.maxCUHeightMask >> csy ) ); |
1111 | 0 | const bool sameCTU = sameCTUx && sameCTUy; |
1112 | |
|
1113 | 0 | m_neighborSize[0] = sameCTU ? 1 : !!cu.cs->getCURestricted( posLT.offset( -1, -1 ), cu, chType, cu.left ? cu.left : cu.above ); |
1114 | |
|
1115 | 0 | if( cu.above || area.y > tu.cu->blocks[chType].y ) |
1116 | 0 | { |
1117 | 0 | m_neighborSize[1] = numAboveUnits; |
1118 | |
|
1119 | 0 | Position posAR{ posLT.x + ( PosType ) area.width, posLT.y }; |
1120 | 0 | m_neighborSize[1] += isAboveAvailable( tu, chType, posAR, numAboveRightUnits, unitWidth ); |
1121 | 0 | } |
1122 | 0 | else |
1123 | 0 | m_neighborSize[1] = 0; |
1124 | |
|
1125 | 0 | if( cu.left || area.x > tu.cu->blocks[chType].x ) |
1126 | 0 | { |
1127 | 0 | m_neighborSize[2] = numLeftUnits; |
1128 | |
|
1129 | 0 | Position posLB{ posLT.x, posLT.y + ( PosType ) area.height }; |
1130 | 0 | m_neighborSize[2] += isLeftAvailable( tu, chType, posLB, numLeftBelowUnits, unitHeight ); |
1131 | 0 | } |
1132 | 0 | else |
1133 | 0 | m_neighborSize[2] = 0; |
1134 | |
|
1135 | 0 | m_lastCUidx = tu.cu->idx; |
1136 | 0 | } |
1137 | |
|
1138 | 0 | int numIntraNeighbor = m_neighborSize[0] + m_neighborSize[1] + m_neighborSize[2]; |
1139 | | |
1140 | | // ----- Step 2: fill reference samples (depending on neighborhood) ----- |
1141 | 0 | const Pel* srcBuf = recoBuf.buf; |
1142 | 0 | const ptrdiff_t srcStride = recoBuf.stride; |
1143 | 0 | Pel* ptrDst = refBufUnfiltered; |
1144 | 0 | const Pel* ptrSrc; |
1145 | 0 | const Pel valueDC = 1 << (sps.getBitDepth() - 1); |
1146 | |
|
1147 | 0 | if( numIntraNeighbor == 0 ) |
1148 | 0 | { |
1149 | | // Fill border with DC value |
1150 | 0 | for (int j = 0; j <= predSize + multiRefIdx; j++) { ptrDst[j] = valueDC; } |
1151 | 0 | for (int i = 1; i <= predHSize + multiRefIdx; i++) { ptrDst[i*predStride] = valueDC; } |
1152 | 0 | } |
1153 | 0 | else if( numIntraNeighbor == totalUnits ) |
1154 | 0 | { |
1155 | | // Fill top-left border and top and top right with rec. samples |
1156 | 0 | ptrSrc = srcBuf - (1 + multiRefIdx) * srcStride - (1 + multiRefIdx); |
1157 | 0 | for (int j = 0; j <= predSize + multiRefIdx; j++) { ptrDst[j] = ptrSrc[j]; } |
1158 | 0 | ptrSrc = srcBuf - multiRefIdx * srcStride - (1 + multiRefIdx); |
1159 | 0 | for (int i = 1; i <= predHSize + multiRefIdx; i++) { ptrDst[i*predStride] = *(ptrSrc); ptrSrc += srcStride; } |
1160 | 0 | } |
1161 | 0 | else // reference samples are partially available |
1162 | 0 | { |
1163 | | // Fill top-left sample(s) if available |
1164 | 0 | if ( m_neighborSize[2] > 0) { // left is available |
1165 | | // Fill left & below-left samples if available (downwards) |
1166 | 0 | ptrSrc = srcBuf - (1 + multiRefIdx); |
1167 | 0 | ptrDst = refBufUnfiltered + (1 + multiRefIdx) * predStride; |
1168 | 0 | int tmpSize = m_neighborSize[2] * unitHeight; |
1169 | 0 | tmpSize = std::min(tmpSize, predHSize); |
1170 | 0 | for (int i = 0; i < tmpSize; i++) { |
1171 | 0 | ptrDst[i * predStride] = ptrSrc[i * srcStride]; |
1172 | 0 | } |
1173 | | |
1174 | | // pad |
1175 | 0 | Pel tmpPixel = ptrDst[(tmpSize - 1) * predStride]; |
1176 | 0 | for (int i = tmpSize; i < predHSize; i++) { |
1177 | 0 | ptrDst[i * predStride] = tmpPixel; |
1178 | 0 | } |
1179 | | |
1180 | | // Fill top-left sample(s) if available |
1181 | 0 | if ( m_neighborSize[0]) { |
1182 | 0 | ptrSrc = srcBuf - (1 + multiRefIdx) * srcStride - (1 + multiRefIdx); |
1183 | 0 | ptrDst = refBufUnfiltered; |
1184 | 0 | memcpy(ptrDst, ptrSrc, sizeof(Pel) * (multiRefIdx + 1)); |
1185 | 0 | for (int i = 1; i <= multiRefIdx; i++) { |
1186 | 0 | ptrDst[i * predStride] = ptrSrc[i * srcStride]; |
1187 | 0 | } |
1188 | 0 | } else { // pad |
1189 | 0 | ptrSrc = srcBuf - (1 + multiRefIdx); // left pixel |
1190 | 0 | ptrDst = refBufUnfiltered; |
1191 | 0 | tmpPixel = ptrSrc[0]; |
1192 | 0 | ptrDst[0] = tmpPixel; |
1193 | 0 | for (int i = 1; i <= multiRefIdx; i++) { |
1194 | 0 | ptrDst[i] = tmpPixel; |
1195 | 0 | ptrDst[i * predStride] = tmpPixel; |
1196 | 0 | } |
1197 | 0 | } |
1198 | | |
1199 | | // Fill above & above-right samples if available (left-to-right) |
1200 | 0 | if ( m_neighborSize[1]) { |
1201 | 0 | ptrSrc = srcBuf - srcStride * (1 + multiRefIdx); |
1202 | 0 | ptrDst = refBufUnfiltered + 1 + multiRefIdx; |
1203 | 0 | tmpSize = m_neighborSize[1] * unitWidth; |
1204 | 0 | tmpSize = std::min(tmpSize, predSize); |
1205 | 0 | memcpy(ptrDst, ptrSrc, tmpSize * sizeof(Pel)); |
1206 | | // pad |
1207 | 0 | Pel tmpPixel = ptrDst[tmpSize - 1]; |
1208 | 0 | for (int i = tmpSize; i < predSize; i++) { |
1209 | 0 | ptrDst[i] = tmpPixel; |
1210 | 0 | } |
1211 | 0 | } else { // all not available, pad |
1212 | 0 | ptrSrc = srcBuf - srcStride * (1 + multiRefIdx); |
1213 | 0 | ptrDst = refBufUnfiltered + 1 + multiRefIdx; |
1214 | 0 | Pel tmpPixel = ptrDst[-1]; |
1215 | 0 | std::fill_n(ptrDst, predSize, tmpPixel); |
1216 | 0 | } |
1217 | 0 | } else { // left is not available, top must be available |
1218 | | // Fill above & above-right samples (left-to-right) |
1219 | 0 | ptrSrc = srcBuf - srcStride * (1 + multiRefIdx); |
1220 | 0 | ptrDst = refBufUnfiltered + 1 + multiRefIdx; |
1221 | 0 | int tmpSize = m_neighborSize[1] * unitWidth; |
1222 | 0 | tmpSize = std::min(tmpSize, predSize); |
1223 | 0 | memcpy(ptrDst, ptrSrc, tmpSize * sizeof(Pel)); |
1224 | | // pad |
1225 | 0 | Pel tmpPixel = ptrDst[tmpSize - 1]; |
1226 | 0 | for (int i = tmpSize; i < predSize; i++) { |
1227 | 0 | ptrDst[i] = tmpPixel; |
1228 | 0 | } |
1229 | |
|
1230 | 0 | tmpPixel = ptrSrc[0]; |
1231 | | // pad top-left sample(s) |
1232 | 0 | ptrDst = refBufUnfiltered; |
1233 | 0 | ptrDst[0] = tmpPixel; |
1234 | 0 | for (int i = 1; i <= multiRefIdx; i++) { |
1235 | 0 | ptrDst[i] = tmpPixel; |
1236 | 0 | ptrDst[i * predStride] = tmpPixel; |
1237 | 0 | } |
1238 | | |
1239 | | // pad left sample(s) |
1240 | 0 | ptrDst = refBufUnfiltered + (1 + multiRefIdx) * predStride; |
1241 | 0 | for (int i = 0; i < predHSize; i++) { |
1242 | 0 | ptrDst[i * predStride] = tmpPixel; |
1243 | 0 | } |
1244 | 0 | } |
1245 | 0 | } |
1246 | 0 | } |
1247 | | |
1248 | | void IntraPrediction::xFilterReferenceSamples( const Pel* refBufUnfiltered, Pel* refBufFiltered, const CompArea &area, const SPS &sps, int multiRefIdx, ptrdiff_t stride ) const |
1249 | 0 | { |
1250 | 0 | if (area.compID() != COMPONENT_Y) |
1251 | 0 | { |
1252 | 0 | multiRefIdx = 0; |
1253 | 0 | } |
1254 | 0 | const int predSize = m_topRefLength + multiRefIdx; |
1255 | 0 | const int predHSize = m_leftRefLength + multiRefIdx; |
1256 | 0 | const ptrdiff_t predStride = stride == 0 ? predSize + 1 : stride; |
1257 | | |
1258 | | |
1259 | | |
1260 | | // Regular reference sample filter |
1261 | 0 | const Pel *piSrcPtr = refBufUnfiltered + (predStride * predHSize); // bottom left |
1262 | 0 | Pel *piDestPtr = refBufFiltered + (predStride * predHSize); // bottom left |
1263 | | |
1264 | | // bottom left (not filtered) |
1265 | 0 | *piDestPtr = *piSrcPtr; |
1266 | 0 | piDestPtr -= predStride; |
1267 | 0 | piSrcPtr -= predStride; |
1268 | | //left column (bottom to top) |
1269 | 0 | for( int i = 1; i < predHSize; i++, piDestPtr -= predStride, piSrcPtr -= predStride) |
1270 | 0 | { |
1271 | 0 | *piDestPtr = (piSrcPtr[predStride] + 2 * piSrcPtr[0] + piSrcPtr[-predStride] + 2) >> 2; |
1272 | 0 | } |
1273 | | //top-left |
1274 | 0 | *piDestPtr = (piSrcPtr[predStride] + 2 * piSrcPtr[0] + piSrcPtr[1] + 2) >> 2; |
1275 | 0 | piDestPtr++; |
1276 | 0 | piSrcPtr++; |
1277 | | //top row (left-to-right) |
1278 | 0 | for( uint32_t i=1; i < predSize; i++, piDestPtr++, piSrcPtr++ ) |
1279 | 0 | { |
1280 | 0 | *piDestPtr = (piSrcPtr[1] + 2 * piSrcPtr[0] + piSrcPtr[-1] + 2) >> 2; |
1281 | 0 | } |
1282 | | // top right (not filtered) |
1283 | 0 | *piDestPtr=*piSrcPtr; |
1284 | 0 | } |
1285 | | |
1286 | | static bool getUseFilterRef( const int predMode ) |
1287 | 0 | { |
1288 | 0 | static const int angTable[32] = { 0, 1, 2, 3, 4, 6, 8, 10, 12, 14, 16, 18, 20, 23, 26, 29, 32, 35, 39, 45, 51, 57, 64, 73, 86, 102, 128, 171, 256, 341, 512, 1024 }; |
1289 | |
|
1290 | 0 | const int intraPredAngleMode = ( predMode >= DIA_IDX ) ? predMode - VER_IDX : -( predMode - HOR_IDX ); |
1291 | |
|
1292 | 0 | const int absAngMode = abs(intraPredAngleMode); |
1293 | 0 | const int absAng = angTable [absAngMode]; |
1294 | |
|
1295 | 0 | return 0 == ( absAng & 0x1F ); |
1296 | 0 | } |
1297 | | |
1298 | | bool IntraPrediction::useFilteredIntraRefSamples( const ComponentID &compID, const CodingUnit &cu, const UnitArea &tuArea ) |
1299 | 0 | { |
1300 | | //const SPS &sps = *cu.sps; |
1301 | 0 | const ChannelType chType = toChannelType( compID ); |
1302 | | |
1303 | | // high level conditions |
1304 | | //if( sps.getSpsRangeExtension().getIntraSmoothingDisabledFlag() ) { return false; } |
1305 | | //if( !isLuma( chType ) ) { return false; } |
1306 | | //if( cu.ispMode() && isLuma(compID) ) { return false; } |
1307 | | //if( CU::isMIP( cu, chType ) ) { return false; } |
1308 | 0 | if( cu.multiRefIdx() ) { return false; } |
1309 | 0 | if( cu.bdpcmMode() ) { return false; } |
1310 | | |
1311 | | // pred. mode related conditions |
1312 | 0 | const int dirMode = PU::getFinalIntraMode( cu, chType ); |
1313 | 0 | if (dirMode == DC_IDX) { return false; } |
1314 | 0 | if (dirMode == PLANAR_IDX) |
1315 | 0 | { |
1316 | 0 | return tuArea.blocks[compID].area() > 32 ? true : false; |
1317 | 0 | } |
1318 | | |
1319 | 0 | const int predMode = getWideAngle(tuArea.blocks[compID].width, tuArea.blocks[compID].height, dirMode); |
1320 | 0 | const int diff = std::min<int>( abs( predMode - HOR_IDX ), abs( predMode - VER_IDX ) ); |
1321 | 0 | const int log2Size = ( ( getLog2( tuArea.blocks[compID].width ) + getLog2( tuArea.blocks[compID].height ) ) >> 1 ); |
1322 | 0 | CHECKD( log2Size >= MAX_INTRA_FILTER_DEPTHS, "Size not supported" ); |
1323 | 0 | return diff > m_aucIntraFilter[ chType ][ log2Size ] && getUseFilterRef( predMode ); |
1324 | 0 | } |
1325 | | |
1326 | | static inline TransformUnit const* getTU( const CodingUnit& cu, const Position& pos, const ChannelType chType ) |
1327 | 0 | { |
1328 | 0 | const TransformUnit* ptu = &cu.firstTU; |
1329 | |
|
1330 | 0 | if( !ptu->next ) return ptu; |
1331 | | |
1332 | 0 | while( !( ptu->blocks[chType].x + ptu->blocks[chType].width > pos.x && ptu->blocks[chType].y + ptu->blocks[chType].height > pos.y ) ) |
1333 | 0 | { |
1334 | 0 | ptu = ptu->next; |
1335 | 0 | } |
1336 | |
|
1337 | 0 | return ptu; |
1338 | 0 | } |
1339 | | |
1340 | | int isAboveAvailable(const TransformUnit &tu, const ChannelType &chType, const Position &posLT, |
1341 | | const uint32_t uiNumUnitsInPU, const uint32_t unitWidth) |
1342 | 0 | { |
1343 | 0 | const CodingUnit &cu = *tu.cu; |
1344 | 0 | const CodingStructure &cs = *cu.cs; |
1345 | |
|
1346 | 0 | int maxDx = uiNumUnitsInPU * unitWidth; |
1347 | 0 | Position refPos = posLT.offset(0, -1); |
1348 | 0 | const TransformUnit *pcTUAbove = nullptr; |
1349 | 0 | const int currTUIdx = tu.idx; |
1350 | 0 | int dx = 0; |
1351 | |
|
1352 | 0 | while( dx < maxDx ) |
1353 | 0 | { |
1354 | 0 | const CodingUnit* cuAbove = cs.getCURestricted( refPos, cu, chType, pcTUAbove ? nullptr : cu.above ); |
1355 | |
|
1356 | 0 | if( !cuAbove ) break; |
1357 | 0 | pcTUAbove = getTU( *cuAbove, refPos, chType ); |
1358 | 0 | if( cuAbove->ctuData == cu.ctuData && pcTUAbove->idx >= currTUIdx ) break; |
1359 | | |
1360 | 0 | int diff = ( int ) pcTUAbove->blocks[chType].width - refPos.x + pcTUAbove->blocks[chType].x; |
1361 | 0 | dx += diff; |
1362 | 0 | refPos.x += diff; |
1363 | 0 | } |
1364 | |
|
1365 | 0 | int neighborSize = dx / unitWidth; |
1366 | 0 | neighborSize = std::min<int>( neighborSize, uiNumUnitsInPU ); |
1367 | 0 | return neighborSize; |
1368 | 0 | } |
1369 | | |
1370 | | int isLeftAvailable(const TransformUnit &tu, const ChannelType &chType, const Position &posLT, |
1371 | | const uint32_t uiNumUnitsInPU, const uint32_t unitHeight) |
1372 | 0 | { |
1373 | 0 | const CodingUnit &cu = *tu.cu; |
1374 | 0 | const CodingStructure &cs = *cu.cs; |
1375 | |
|
1376 | 0 | int maxDy = uiNumUnitsInPU * unitHeight; |
1377 | 0 | Position refPos = posLT.offset(-1, 0); |
1378 | 0 | const TransformUnit *pcTULeft = nullptr; |
1379 | 0 | int currTUIdx = tu.idx; |
1380 | 0 | int dy = 0; |
1381 | |
|
1382 | 0 | while( dy < maxDy ) |
1383 | 0 | { |
1384 | 0 | const CodingUnit* cuLeft = cs.getCURestricted( refPos, cu, chType, pcTULeft ? nullptr : cu.left ); |
1385 | |
|
1386 | 0 | if( !cuLeft ) break; |
1387 | 0 | pcTULeft = getTU( *cuLeft, refPos, chType ); |
1388 | 0 | if( cuLeft->ctuData == cu.ctuData && pcTULeft->idx >= currTUIdx ) break; |
1389 | | |
1390 | 0 | int diff = ( int ) pcTULeft->blocks[chType].height - refPos.y + pcTULeft->blocks[chType].y; |
1391 | 0 | dy += diff; |
1392 | 0 | refPos.y += diff; |
1393 | 0 | } |
1394 | |
|
1395 | 0 | int neighborSize = dy / unitHeight; |
1396 | 0 | neighborSize = std::min<int>( neighborSize, uiNumUnitsInPU ); |
1397 | 0 | return neighborSize; |
1398 | 0 | } |
1399 | | // LumaRecPixels |
1400 | | NO_THREAD_SANITIZE void IntraPrediction::xGetLumaRecPixels(const CodingUnit &cu, CompArea chromaArea) |
1401 | 0 | { |
1402 | 0 | int iDstStride = 0; |
1403 | 0 | Pel* pDst0 = 0; |
1404 | 0 | int curChromaMode = cu.intraDir[1]; |
1405 | 0 | if ((curChromaMode == MDLM_L_IDX) || (curChromaMode == MDLM_T_IDX)) |
1406 | 0 | { |
1407 | 0 | iDstStride = 2 * MAX_TU_SIZE_FOR_PROFILE + 1; |
1408 | 0 | pDst0 = m_piYuvExt[1] + iDstStride + 1; |
1409 | 0 | } |
1410 | 0 | else |
1411 | 0 | { |
1412 | 0 | iDstStride = MAX_TU_SIZE_FOR_PROFILE + 1; |
1413 | 0 | pDst0 = m_piYuvExt[1] + iDstStride + 1; //MMLM_SAMPLE_NEIGHBOR_LINES; |
1414 | 0 | } |
1415 | | //assert 420 chroma subsampling |
1416 | 0 | CompArea lumaArea = CompArea( COMPONENT_Y, chromaArea.lumaPos( cu.chromaFormat), |
1417 | 0 | recalcSize( cu.chromaFormat, CHANNEL_TYPE_CHROMA, CHANNEL_TYPE_LUMA, chromaArea.size() ) );//needed for correct pos/size (4x4 Tus) |
1418 | | |
1419 | |
|
1420 | 0 | CHECK( lumaArea.width == chromaArea.width && CHROMA_444 != cu.chromaFormat, "" ); |
1421 | 0 | CHECK( lumaArea.height == chromaArea.height && CHROMA_444 != cu.chromaFormat && CHROMA_422 != cu.chromaFormat, "" ); |
1422 | |
|
1423 | 0 | const SizeType uiCWidth = chromaArea.width; |
1424 | 0 | const SizeType uiCHeight = chromaArea.height; |
1425 | |
|
1426 | 0 | CPelBuf Src = cu.cs->picture->getRecoBuf( lumaArea ); |
1427 | 0 | Pel const* pRecSrc0 = Src.bufAt( 0, 0 ); |
1428 | 0 | ptrdiff_t iRecStride = Src.stride; |
1429 | 0 | int logSubWidthC = getChannelTypeScaleX(CHANNEL_TYPE_CHROMA, cu.chromaFormat); |
1430 | 0 | int logSubHeightC = getChannelTypeScaleY(CHANNEL_TYPE_CHROMA, cu.chromaFormat); |
1431 | | |
1432 | |
|
1433 | 0 | ptrdiff_t iRecStride2 = iRecStride << logSubHeightC; // TODO: really Height here? not Width? |
1434 | 0 | const int mult = 1 << logSubWidthC ; |
1435 | |
|
1436 | 0 | const CompArea& area = isChroma( cu.chType() ) ? chromaArea : lumaArea; |
1437 | |
|
1438 | 0 | const uint32_t uiTuWidth = area.width; |
1439 | 0 | const uint32_t uiTuHeight = area.height; |
1440 | |
|
1441 | 0 | int iBaseUnitSize = ( 1 << MIN_CU_LOG2 ); |
1442 | |
|
1443 | 0 | const int iUnitWidth = iBaseUnitSize >> getComponentScaleX( area.compID(), cu.chromaFormat ); |
1444 | 0 | const int iUnitHeight = iBaseUnitSize >> getComponentScaleY( area.compID(), cu.chromaFormat ); |
1445 | 0 | const int iTUWidthInUnits = uiTuWidth / iUnitWidth; |
1446 | 0 | const int iTUHeightInUnits = uiTuHeight / iUnitHeight; |
1447 | 0 | const int iAboveUnits = iTUWidthInUnits; |
1448 | 0 | const int iLeftUnits = iTUHeightInUnits; |
1449 | 0 | const int chromaUnitWidth = iBaseUnitSize >> getComponentScaleX(COMPONENT_Cb, cu.chromaFormat); |
1450 | 0 | const int chromaUnitHeight = iBaseUnitSize >> getComponentScaleY(COMPONENT_Cb, cu.chromaFormat); |
1451 | 0 | const int topTemplateSampNum = 2 * uiCWidth; // for MDLM, the number of template samples is 2W or 2H. |
1452 | 0 | const int leftTemplateSampNum = 2 * uiCHeight; |
1453 | 0 | CHECKD( !( m_topRefLength >= topTemplateSampNum ), "Error!" ); |
1454 | 0 | CHECKD( !( m_leftRefLength >= leftTemplateSampNum ), "Error!" ); |
1455 | 0 | int totalAboveUnits = (curChromaMode == MDLM_T_IDX) ? (topTemplateSampNum + (chromaUnitWidth - 1)) / chromaUnitWidth : iAboveUnits; |
1456 | 0 | int totalLeftUnits = (curChromaMode == MDLM_L_IDX) ? (leftTemplateSampNum + (chromaUnitHeight - 1)) / chromaUnitHeight : iLeftUnits; |
1457 | |
|
1458 | 0 | const int availlableLeftUnit = ( cu.left || chromaArea.x > cu.blocks[CH_C].x ) ? totalLeftUnits : 0; |
1459 | 0 | const bool bLeftAvaillable = availlableLeftUnit >= iTUHeightInUnits; |
1460 | | |
1461 | 0 | const int availlableAboveUnit = ( cu.above || chromaArea.y > cu.blocks[CH_C].y ) ? totalAboveUnits : 0; |
1462 | 0 | const bool bAboveAvaillable = availlableAboveUnit >= iTUWidthInUnits; |
1463 | |
|
1464 | 0 | Pel* pDst = nullptr; |
1465 | 0 | Pel const* piSrc = nullptr; |
1466 | |
|
1467 | 0 | bool isFirstRowOfCtu = ( lumaArea.y & ( cu.sps->getCTUSize() - 1) ) == 0; |
1468 | 0 | const ptrdiff_t strOffset = ( CHROMA_444 == cu.chromaFormat ) ? 0 : iRecStride; |
1469 | |
|
1470 | 0 | int c0_3tap = 2, c1_3tap = 1, c2_3tap = 1, offset_3tap = 2, shift_3tap = 2; //sum = 4 |
1471 | 0 | int c0_5tap = 1, c1_5tap = 4, c2_5tap = 1, c3_5tap = 1, c4_5tap = 1, offset_5tap = 4, shift_5tap = 3; //sum = 8 |
1472 | 0 | int c0_6tap = 2, c1_6tap = 1, c2_6tap = 1, c3_6tap = 2, c4_6tap = 1, c5_6tap = 1, offset_6tap = 4, shift_6tap = 3; //sum = 8 |
1473 | |
|
1474 | 0 | switch (cu.chromaFormat) |
1475 | 0 | { |
1476 | 0 | case CHROMA_422: //overwrite filter coefficient values for 422 |
1477 | 0 | c0_3tap = 2, c1_3tap = 1, c2_3tap = 1, offset_3tap = 2, shift_3tap = 2; //sum = 4 |
1478 | 0 | c0_5tap = 0, c1_5tap = 2, c2_5tap = 1, c3_5tap = 1, c4_5tap = 0, offset_5tap = 2, shift_5tap = 2; //sum = 4 |
1479 | 0 | c0_6tap = 2, c1_6tap = 1, c2_6tap = 1, c3_6tap = 0, c4_6tap = 0, c5_6tap = 0, offset_6tap = 2, shift_6tap = 2; //sum = 4 |
1480 | 0 | break; |
1481 | | |
1482 | 0 | case CHROMA_444: //overwrite filter coefficient values for 444 |
1483 | 0 | c0_3tap = 1, c1_3tap = 0, c2_3tap = 0, offset_3tap = 0, shift_3tap = 0; //sum = 1 |
1484 | 0 | c0_5tap = 0, c1_5tap = 1, c2_5tap = 0, c3_5tap = 0, c4_5tap = 0, offset_5tap = 0, shift_5tap = 0; //sum = 1 |
1485 | 0 | c0_6tap = 1, c1_6tap = 0, c2_6tap = 0, c3_6tap = 0, c4_6tap = 0, c5_6tap = 0, offset_6tap = 0, shift_6tap = 0; //sum = 1 |
1486 | 0 | break; |
1487 | | |
1488 | 0 | default: |
1489 | 0 | break; |
1490 | 0 | } |
1491 | | |
1492 | 0 | if( bAboveAvaillable ) |
1493 | 0 | { |
1494 | 0 | pDst = pDst0 - iDstStride; |
1495 | 0 | int avaiAboveSizes = availlableAboveUnit * chromaUnitWidth; |
1496 | 0 | for (int i = 0; i < avaiAboveSizes; i++) |
1497 | 0 | { |
1498 | 0 | if (isFirstRowOfCtu) |
1499 | 0 | { |
1500 | 0 | piSrc = pRecSrc0 - iRecStride; |
1501 | |
|
1502 | 0 | if ((i == 0 && !bLeftAvaillable) || (i == uiCWidth + avaiAboveSizes - 1 + logSubWidthC)) |
1503 | 0 | { |
1504 | 0 | pDst[i] = (piSrc[mult * i] * c0_3tap + piSrc[mult * i] * c1_3tap + piSrc[mult * i + 1] * c2_3tap + offset_3tap) >> shift_3tap; |
1505 | 0 | } |
1506 | 0 | else |
1507 | 0 | { |
1508 | 0 | pDst[i] = (piSrc[mult * i] * c0_3tap + piSrc[mult * i - 1] * c1_3tap + piSrc[mult * i + 1] * c2_3tap + offset_3tap) >> shift_3tap; |
1509 | 0 | } |
1510 | 0 | } |
1511 | 0 | else if( cu.sps->getCclmCollocatedChromaFlag() ) |
1512 | 0 | { |
1513 | 0 | piSrc = pRecSrc0 - iRecStride2; |
1514 | |
|
1515 | 0 | if ((i == 0 && !bLeftAvaillable) || (i == uiCWidth + avaiAboveSizes - 1 + logSubWidthC)) |
1516 | 0 | { |
1517 | 0 | pDst[i] = (piSrc[mult * i - strOffset] * c0_5tap |
1518 | 0 | + piSrc[mult * i] * c1_5tap + piSrc[mult * i] * c2_5tap + piSrc[mult * i + 1] * c3_5tap |
1519 | 0 | + piSrc[mult * i + strOffset] * c4_5tap |
1520 | 0 | + offset_5tap) >> shift_5tap; |
1521 | 0 | } |
1522 | 0 | else |
1523 | 0 | { |
1524 | 0 | pDst[i] = (piSrc[mult * i - strOffset] * c0_5tap |
1525 | 0 | + piSrc[mult * i] * c1_5tap + piSrc[mult * i - 1] * c2_5tap + piSrc[mult * i + 1] * c3_5tap |
1526 | 0 | + piSrc[mult * i + strOffset] * c4_5tap |
1527 | 0 | + offset_5tap) >> shift_5tap; |
1528 | 0 | } |
1529 | 0 | } |
1530 | 0 | else |
1531 | 0 | { |
1532 | 0 | piSrc = pRecSrc0 - iRecStride2; |
1533 | |
|
1534 | 0 | if ((i == 0 && !bLeftAvaillable) || (i == uiCWidth + avaiAboveSizes - 1 + logSubWidthC)) |
1535 | 0 | { |
1536 | 0 | pDst[i] = ((piSrc[mult * i] * c0_6tap + piSrc[mult * i] * c1_6tap + piSrc[mult * i + 1] * c2_6tap) |
1537 | 0 | + (piSrc[mult * i + strOffset] * c3_6tap + piSrc[mult * i + strOffset] * c4_6tap + piSrc[mult * i + 1 + strOffset] * c5_6tap) |
1538 | 0 | + offset_6tap) >> shift_6tap; |
1539 | 0 | } |
1540 | 0 | else |
1541 | 0 | { |
1542 | 0 | pDst[i] = ((piSrc[mult * i] * c0_6tap + piSrc[mult * i - 1] * c1_6tap + piSrc[mult * i + 1] * c2_6tap) |
1543 | 0 | + (piSrc[mult * i + strOffset] * c3_6tap + piSrc[mult * i - 1 + strOffset] * c4_6tap + piSrc[mult * i + 1 + strOffset] * c5_6tap) |
1544 | 0 | + offset_6tap) >> shift_6tap; |
1545 | 0 | } |
1546 | 0 | } |
1547 | 0 | } |
1548 | 0 | } |
1549 | |
|
1550 | 0 | if( bLeftAvaillable ) |
1551 | 0 | { |
1552 | 0 | pDst = pDst0 - 1; |
1553 | |
|
1554 | 0 | piSrc = pRecSrc0 - 2 - logSubWidthC; |
1555 | |
|
1556 | 0 | int availlableLeftSizes = availlableLeftUnit * chromaUnitHeight; |
1557 | 0 | for (int j = 0; j < availlableLeftSizes; j++) |
1558 | 0 | { |
1559 | 0 | if( cu.sps->getCclmCollocatedChromaFlag() ) |
1560 | 0 | { |
1561 | 0 | if ((j == 0 && !bAboveAvaillable) || (j == uiCHeight + availlableLeftSizes - 1 + logSubWidthC)) |
1562 | 0 | { |
1563 | 0 | pDst[0] = ( piSrc[1 ] * c0_5tap |
1564 | 0 | + piSrc[1 ] * c1_5tap + piSrc[0] * c2_5tap + piSrc[2] * c3_5tap |
1565 | 0 | + piSrc[1 + strOffset] * c4_5tap |
1566 | 0 | + offset_5tap ) >> shift_5tap; |
1567 | 0 | } |
1568 | 0 | else |
1569 | 0 | { |
1570 | 0 | pDst[0] = ( piSrc[1 - strOffset] * c0_5tap |
1571 | 0 | + piSrc[1 ] * c1_5tap + piSrc[0] * c2_5tap + piSrc[2] * c3_5tap |
1572 | 0 | + piSrc[1 + strOffset] * c4_5tap |
1573 | 0 | + offset_5tap ) >> shift_5tap; |
1574 | 0 | } |
1575 | 0 | } |
1576 | 0 | else |
1577 | 0 | { |
1578 | 0 | pDst[0] = ((piSrc[1] * c0_6tap + piSrc[0] * c1_6tap + piSrc[2] * c2_6tap) |
1579 | 0 | + (piSrc[1 + strOffset] * c3_6tap + piSrc[strOffset] * c4_6tap + piSrc[2 + strOffset] * c5_6tap) |
1580 | 0 | + offset_6tap) >> shift_6tap; |
1581 | 0 | } |
1582 | |
|
1583 | 0 | piSrc += iRecStride2; |
1584 | 0 | pDst += iDstStride; |
1585 | 0 | } |
1586 | 0 | } |
1587 | |
|
1588 | 0 | if( cu.sps->getCclmCollocatedChromaFlag() ) |
1589 | 0 | { |
1590 | | // TODO: unroll loop |
1591 | 0 | for( int j = 0; j < uiCHeight; j++ ) |
1592 | 0 | { |
1593 | 0 | for( int i = 0; i < uiCWidth; i++ ) |
1594 | 0 | { |
1595 | 0 | if( i == 0 && !bLeftAvaillable ) |
1596 | 0 | { |
1597 | 0 | if( j == 0 && !bAboveAvaillable ) |
1598 | 0 | { |
1599 | 0 | pDst0[i] = (pRecSrc0[mult * i] * c0_5tap |
1600 | 0 | + pRecSrc0[mult * i] * c1_5tap + pRecSrc0[mult * i] * c2_5tap + pRecSrc0[mult * i + 1] * c3_5tap |
1601 | 0 | + pRecSrc0[mult * i + strOffset] * c4_5tap |
1602 | 0 | + offset_5tap) >> shift_5tap; |
1603 | 0 | } |
1604 | 0 | else |
1605 | 0 | { |
1606 | 0 | pDst0[i] = (pRecSrc0[mult * i - strOffset] * c0_5tap |
1607 | 0 | + pRecSrc0[mult * i] * c1_5tap + pRecSrc0[mult * i] * c2_5tap + pRecSrc0[mult * i + 1] * c3_5tap |
1608 | 0 | + pRecSrc0[mult * i + strOffset] * c4_5tap |
1609 | 0 | + offset_5tap) >> shift_5tap; |
1610 | 0 | } |
1611 | 0 | } |
1612 | 0 | else if( j == 0 && !bAboveAvaillable ) |
1613 | 0 | { |
1614 | 0 | pDst0[i] = (pRecSrc0[mult * i] * c0_5tap |
1615 | 0 | + pRecSrc0[mult * i] * c1_5tap + pRecSrc0[mult * i - 1] * c2_5tap + pRecSrc0[mult * i + 1] * c3_5tap |
1616 | 0 | + pRecSrc0[mult * i + strOffset] * c4_5tap |
1617 | 0 | + offset_5tap) >> shift_5tap; |
1618 | 0 | } |
1619 | 0 | else |
1620 | 0 | { |
1621 | 0 | pDst0[i] = (pRecSrc0[mult * i - strOffset] * c0_5tap |
1622 | 0 | + pRecSrc0[mult * i] * c1_5tap + pRecSrc0[mult * i - 1] * c2_5tap + pRecSrc0[mult * i + 1] * c3_5tap |
1623 | 0 | + pRecSrc0[mult * i + strOffset] * c4_5tap |
1624 | 0 | + offset_5tap) >> shift_5tap; |
1625 | 0 | } |
1626 | 0 | } |
1627 | 0 | pDst0 += iDstStride; |
1628 | 0 | pRecSrc0 += iRecStride2; |
1629 | 0 | } |
1630 | 0 | return; |
1631 | 0 | } |
1632 | | |
1633 | | |
1634 | | |
1635 | | |
1636 | 0 | #define GET_LUMA_REC_PIX_INC \ |
1637 | 0 | pDst0 += iDstStride; \ |
1638 | 0 | pRecSrc0 += iRecStride2 |
1639 | | |
1640 | 0 | #define GET_LUMA_REC_PIX_OP2(ADDR) \ |
1641 | 0 | pDst0[ADDR] = ( pRecSrc0[( (ADDR) << logSubWidthC ) ] * c0_6tap \ |
1642 | 0 | + pRecSrc0[( (ADDR) << logSubWidthC ) + 1] * c1_6tap \ |
1643 | 0 | + pRecSrc0[( (ADDR) << logSubWidthC ) - 1] * c2_6tap \ |
1644 | 0 | + pRecSrc0[( (ADDR) << logSubWidthC ) + iRecStride] * c3_6tap \ |
1645 | 0 | + pRecSrc0[( (ADDR) << logSubWidthC ) + 1 + iRecStride] * c4_6tap \ |
1646 | 0 | + pRecSrc0[( (ADDR) << logSubWidthC ) - 1 + iRecStride] * c5_6tap \ |
1647 | 0 | + offset_6tap ) >> shift_6tap |
1648 | | |
1649 | 0 | #define GET_LUMA_REC_PIX_OP1(ADDR) \ |
1650 | 0 | if( !(ADDR) ) \ |
1651 | 0 | { \ |
1652 | 0 | pDst0[0] = ( pRecSrc0[0 ] * c0_6tap \ |
1653 | 0 | + pRecSrc0[0 + 1] * c1_6tap \ |
1654 | 0 | + pRecSrc0[0] * c2_6tap \ |
1655 | 0 | + pRecSrc0[0 + iRecStride] * c3_6tap \ |
1656 | 0 | + pRecSrc0[0 + 1 + iRecStride] * c4_6tap \ |
1657 | 0 | + pRecSrc0[0 + iRecStride] * c5_6tap \ |
1658 | 0 | + offset_6tap ) >> shift_6tap; \ |
1659 | 0 | } \ |
1660 | 0 | else \ |
1661 | 0 | { \ |
1662 | 0 | GET_LUMA_REC_PIX_OP2(ADDR); \ |
1663 | 0 | } |
1664 | | |
1665 | 0 | int width = uiCWidth; |
1666 | 0 | int height = uiCHeight; |
1667 | |
|
1668 | 0 | if( bLeftAvaillable ) |
1669 | 0 | { |
1670 | 0 | if( cu.chromaFormat == CHROMA_420 ) |
1671 | 0 | { |
1672 | 0 | GetLumaRecPixel420( width, height, pRecSrc0, iRecStride, pDst0, iDstStride ); |
1673 | | // SIZE_AWARE_PER_EL_OP( GET_LUMA_REC_PIX_OP2, GET_LUMA_REC_PIX_INC ); |
1674 | 0 | } |
1675 | 0 | else //TODO add SIMD for 422,444 |
1676 | 0 | { |
1677 | 0 | SIZE_AWARE_PER_EL_OP( GET_LUMA_REC_PIX_OP2, GET_LUMA_REC_PIX_INC ); |
1678 | 0 | } |
1679 | 0 | } |
1680 | 0 | else |
1681 | 0 | { |
1682 | 0 | SIZE_AWARE_PER_EL_OP( GET_LUMA_REC_PIX_OP1, GET_LUMA_REC_PIX_INC ); |
1683 | 0 | } |
1684 | 0 | } |
1685 | | |
1686 | | #undef GET_LUMA_REC_PIX_INC |
1687 | | #undef GET_LUMA_REC_PIX_OP1 |
1688 | | #undef GET_LUMA_REC_PIX_OP2 |
1689 | | #undef SIZE_AWARE_PER_EL_OP |
1690 | | |
1691 | | void IntraPrediction::xGetLMParameters(const CodingUnit &cu, const ComponentID compID, |
1692 | | const CompArea &chromaArea, |
1693 | | int &a, int &b, int &iShift) |
1694 | 0 | { |
1695 | 0 | CHECK(compID == COMPONENT_Y, ""); |
1696 | |
|
1697 | 0 | const SizeType cWidth = chromaArea.width; |
1698 | 0 | const SizeType cHeight = chromaArea.height; |
1699 | |
|
1700 | 0 | const Position posLT = chromaArea; |
1701 | |
|
1702 | 0 | const CodingStructure &cs = *cu.cs; |
1703 | |
|
1704 | 0 | const SPS & sps = *cs.sps; |
1705 | 0 | const uint32_t tuWidth = chromaArea.width; |
1706 | 0 | const uint32_t tuHeight = chromaArea.height; |
1707 | 0 | const ChromaFormat nChromaFormat = sps.getChromaFormatIdc(); |
1708 | |
|
1709 | 0 | const int baseUnitSize = 1 << MIN_CU_LOG2; |
1710 | 0 | const int unitWidth = baseUnitSize >> getComponentScaleX(chromaArea.compID(), nChromaFormat); |
1711 | 0 | const int unitHeight = baseUnitSize >> getComponentScaleX(chromaArea.compID(), nChromaFormat); |
1712 | |
|
1713 | 0 | const int tuWidthInUnits = tuWidth / unitWidth; |
1714 | 0 | const int tuHeightInUnits = tuHeight / unitHeight; |
1715 | 0 | const int aboveUnits = tuWidthInUnits; |
1716 | 0 | const int leftUnits = tuHeightInUnits; |
1717 | 0 | int topTemplateSampNum = 2 * cWidth; // for MDLM, the template sample number is 2W or 2H; |
1718 | 0 | int leftTemplateSampNum = 2 * cHeight; |
1719 | 0 | CHECKD( !(m_topRefLength >= topTemplateSampNum), "Error!" ); |
1720 | 0 | CHECKD( !(m_leftRefLength >= leftTemplateSampNum), "Error!" ); |
1721 | 0 | int totalAboveUnits = (topTemplateSampNum + (unitWidth - 1)) / unitWidth; |
1722 | 0 | int totalLeftUnits = (leftTemplateSampNum + (unitHeight - 1)) / unitHeight; |
1723 | 0 | int aboveRightUnits = totalAboveUnits - aboveUnits; |
1724 | 0 | int leftBelowUnits = totalLeftUnits - leftUnits; |
1725 | |
|
1726 | 0 | int curChromaMode = cu.intraDir[1]; |
1727 | 0 | bool aboveAvailable = 0, leftAvailable = 0; |
1728 | |
|
1729 | 0 | const TransformUnit& tu = *getTU( cu, chromaArea.pos(), CH_C ); |
1730 | |
|
1731 | 0 | Pel *srcColor0, *curChroma0; |
1732 | 0 | int srcStride, curStride; |
1733 | |
|
1734 | 0 | PelBuf temp; |
1735 | 0 | if ((curChromaMode == MDLM_L_IDX) || (curChromaMode == MDLM_T_IDX)) |
1736 | 0 | { |
1737 | 0 | srcStride = 2 * MAX_TU_SIZE_FOR_PROFILE + 1; |
1738 | 0 | temp = PelBuf(m_piYuvExt[1] + srcStride + 1, srcStride, Size(chromaArea)); |
1739 | 0 | } |
1740 | 0 | else |
1741 | 0 | { |
1742 | 0 | srcStride = MAX_TU_SIZE_FOR_PROFILE + 1; |
1743 | 0 | temp = PelBuf(m_piYuvExt[1] + srcStride + 1, srcStride, Size(chromaArea)); |
1744 | 0 | } |
1745 | 0 | srcColor0 = temp.bufAt(0, 0); |
1746 | 0 | curChroma0 = getPredictorPtr(compID); |
1747 | |
|
1748 | 0 | curStride = m_topRefLength + 1; |
1749 | |
|
1750 | 0 | curChroma0 += curStride + 1; |
1751 | |
|
1752 | 0 | unsigned internalBitDepth = sps.getBitDepth(); |
1753 | |
|
1754 | 0 | int minLuma[2] = { MAX_INT, 0 }; |
1755 | 0 | int maxLuma[2] = { -MAX_INT, 0 }; |
1756 | |
|
1757 | 0 | Pel *src = srcColor0 - srcStride; |
1758 | 0 | Pel *cur = curChroma0 - curStride; |
1759 | 0 | int actualTopTemplateSampNum = 0; |
1760 | 0 | int actualLeftTemplateSampNum = 0; |
1761 | 0 | if( curChromaMode == MDLM_T_IDX ) |
1762 | 0 | { |
1763 | 0 | int avaiAboveUnits = 0; |
1764 | |
|
1765 | 0 | if( tu.cu->above || chromaArea.y > tu.cu->blocks[CH_C].y ) |
1766 | 0 | { |
1767 | 0 | avaiAboveUnits = aboveUnits; |
1768 | 0 | aboveRightUnits = aboveRightUnits > ( cHeight / unitWidth ) ? cHeight / unitWidth : aboveRightUnits; |
1769 | 0 | avaiAboveUnits += isAboveAvailable( tu, CHANNEL_TYPE_CHROMA, { posLT.x + ( PosType ) cWidth, posLT.y }, aboveRightUnits, unitWidth ); |
1770 | 0 | } |
1771 | |
|
1772 | 0 | aboveAvailable = avaiAboveUnits >= tuWidthInUnits; |
1773 | 0 | actualTopTemplateSampNum = unitWidth * avaiAboveUnits; |
1774 | 0 | } |
1775 | 0 | else if( curChromaMode == MDLM_L_IDX ) |
1776 | 0 | { |
1777 | 0 | int avaiLeftUnits = 0; |
1778 | | |
1779 | 0 | if( tu.cu->left || chromaArea.x > tu.cu->blocks[CH_C].x ) |
1780 | 0 | { |
1781 | 0 | avaiLeftUnits = leftUnits; |
1782 | 0 | leftBelowUnits = leftBelowUnits > ( cWidth / unitHeight ) ? cWidth / unitHeight : leftBelowUnits; |
1783 | 0 | avaiLeftUnits += isLeftAvailable( tu, CHANNEL_TYPE_CHROMA, { posLT.x, posLT.y + ( PosType ) cHeight }, leftBelowUnits, unitHeight ); |
1784 | 0 | } |
1785 | | |
1786 | 0 | leftAvailable = avaiLeftUnits >= tuHeightInUnits; |
1787 | 0 | actualLeftTemplateSampNum = unitHeight * avaiLeftUnits; |
1788 | 0 | } |
1789 | 0 | else if( curChromaMode == LM_CHROMA_IDX ) |
1790 | 0 | { |
1791 | 0 | aboveAvailable = tu.cu->above || chromaArea.y > tu.cu->blocks[CH_C].y; |
1792 | 0 | leftAvailable = tu.cu->left || chromaArea.x > tu.cu->blocks[CH_C].x; |
1793 | 0 | actualTopTemplateSampNum = cWidth; |
1794 | 0 | actualLeftTemplateSampNum = cHeight; |
1795 | 0 | } |
1796 | 0 | int startPos[2]; //0:Above, 1: Left |
1797 | 0 | int pickStep[2]; |
1798 | |
|
1799 | 0 | int aboveIs4 = leftAvailable ? 0 : 1; |
1800 | 0 | int leftIs4 = aboveAvailable ? 0 : 1; |
1801 | |
|
1802 | 0 | startPos[0] = actualTopTemplateSampNum >> (2 + aboveIs4); |
1803 | 0 | pickStep[0] = std::max(1, actualTopTemplateSampNum >> (1 + aboveIs4)); |
1804 | |
|
1805 | 0 | startPos[1] = actualLeftTemplateSampNum >> (2 + leftIs4); |
1806 | 0 | pickStep[1] = std::max(1, actualLeftTemplateSampNum >> (1 + leftIs4)); |
1807 | |
|
1808 | 0 | Pel selectLumaPix[4] = { 0, 0, 0, 0 }; |
1809 | 0 | Pel selectChromaPix[4] = { 0, 0, 0, 0 }; |
1810 | |
|
1811 | 0 | int cntT, cntL; |
1812 | 0 | cntT = cntL = 0; |
1813 | 0 | int cnt = 0; |
1814 | 0 | if (aboveAvailable) |
1815 | 0 | { |
1816 | 0 | cntT = std::min(actualTopTemplateSampNum, (1 + aboveIs4) << 1); |
1817 | 0 | src = srcColor0 - srcStride; |
1818 | 0 | cur = curChroma0 - curStride; |
1819 | 0 | for (int pos = startPos[0]; cnt < cntT; pos += pickStep[0], cnt++) |
1820 | 0 | { |
1821 | 0 | selectLumaPix[cnt] = src[pos]; |
1822 | 0 | selectChromaPix[cnt] = cur[pos]; |
1823 | 0 | } |
1824 | 0 | } |
1825 | |
|
1826 | 0 | if (leftAvailable) |
1827 | 0 | { |
1828 | 0 | cntL = std::min(actualLeftTemplateSampNum, ( 1 + leftIs4 ) << 1 ); |
1829 | 0 | src = srcColor0 - 1; |
1830 | 0 | cur = curChroma0 - 1; |
1831 | 0 | for (int pos = startPos[1], cnt = 0; cnt < cntL; pos += pickStep[1], cnt++) |
1832 | 0 | { |
1833 | 0 | selectLumaPix[cnt + cntT] = src[pos * srcStride]; |
1834 | 0 | selectChromaPix[cnt+ cntT] = cur[pos * curStride]; |
1835 | 0 | } |
1836 | 0 | } |
1837 | 0 | cnt = cntL + cntT; |
1838 | |
|
1839 | 0 | if (cnt == 2) |
1840 | 0 | { |
1841 | 0 | selectLumaPix[3] = selectLumaPix[0]; selectChromaPix[3] = selectChromaPix[0]; |
1842 | 0 | selectLumaPix[2] = selectLumaPix[1]; selectChromaPix[2] = selectChromaPix[1]; |
1843 | 0 | selectLumaPix[0] = selectLumaPix[1]; selectChromaPix[0] = selectChromaPix[1]; |
1844 | 0 | selectLumaPix[1] = selectLumaPix[3]; selectChromaPix[1] = selectChromaPix[3]; |
1845 | 0 | } |
1846 | |
|
1847 | 0 | int minGrpIdx[2] = { 0, 2 }; |
1848 | 0 | int maxGrpIdx[2] = { 1, 3 }; |
1849 | 0 | int *tmpMinGrp = minGrpIdx; |
1850 | 0 | int *tmpMaxGrp = maxGrpIdx; |
1851 | 0 | if (selectLumaPix[tmpMinGrp[0]] > selectLumaPix[tmpMinGrp[1]]) std::swap(tmpMinGrp[0], tmpMinGrp[1]); |
1852 | 0 | if (selectLumaPix[tmpMaxGrp[0]] > selectLumaPix[tmpMaxGrp[1]]) std::swap(tmpMaxGrp[0], tmpMaxGrp[1]); |
1853 | 0 | if (selectLumaPix[tmpMinGrp[0]] > selectLumaPix[tmpMaxGrp[1]]) std::swap(tmpMinGrp, tmpMaxGrp); // TODO: really? not std::swap(tmpMinGrp[0], tmpMaxGrp[1]); ? |
1854 | 0 | if (selectLumaPix[tmpMinGrp[1]] > selectLumaPix[tmpMaxGrp[0]]) std::swap(tmpMinGrp[1], tmpMaxGrp[0]); |
1855 | |
|
1856 | 0 | minLuma[0] = (selectLumaPix [tmpMinGrp[0]] + selectLumaPix [tmpMinGrp[1]] + 1) >> 1; |
1857 | 0 | minLuma[1] = (selectChromaPix[tmpMinGrp[0]] + selectChromaPix[tmpMinGrp[1]] + 1) >> 1; |
1858 | 0 | maxLuma[0] = (selectLumaPix [tmpMaxGrp[0]] + selectLumaPix [tmpMaxGrp[1]] + 1) >> 1; |
1859 | 0 | maxLuma[1] = (selectChromaPix[tmpMaxGrp[0]] + selectChromaPix[tmpMaxGrp[1]] + 1) >> 1; |
1860 | |
|
1861 | 0 | if (leftAvailable || aboveAvailable) |
1862 | 0 | { |
1863 | 0 | int diff = maxLuma[0] - minLuma[0]; |
1864 | 0 | if (diff > 0) |
1865 | 0 | { |
1866 | 0 | int diffC = maxLuma[1] - minLuma[1]; |
1867 | 0 | int x = getLog2( diff ); |
1868 | 0 | static const uint8_t DivSigTable[1 << 4] = { |
1869 | | // 4bit significands - 8 ( MSB is omitted ) |
1870 | 0 | 0, 7, 6, 5, 5, 4, 4, 3, 3, 2, 2, 1, 1, 1, 1, 0 |
1871 | 0 | }; |
1872 | 0 | int normDiff = (diff << 4 >> x) & 15; |
1873 | 0 | int v = DivSigTable[normDiff] | 8; |
1874 | 0 | x += normDiff != 0; |
1875 | |
|
1876 | 0 | int y = diffC == 0 ? 0 : getLog2( abs( diffC ) ) + 1; |
1877 | 0 | int add = 1 << y >> 1; |
1878 | 0 | a = (diffC * v + add) >> y; |
1879 | 0 | iShift = 3 + x - y; |
1880 | 0 | if ( iShift < 1 ) { |
1881 | 0 | iShift = 1; |
1882 | 0 | a = ( (a == 0)? 0: (a < 0)? -15 : 15 ); // a=Sign(a)*15 |
1883 | 0 | } |
1884 | 0 | b = minLuma[1] - ((a * minLuma[0]) >> iShift); |
1885 | 0 | } |
1886 | 0 | else |
1887 | 0 | { |
1888 | 0 | a = 0; |
1889 | 0 | b = minLuma[1]; |
1890 | 0 | iShift = 0; |
1891 | 0 | } |
1892 | 0 | } |
1893 | 0 | else |
1894 | 0 | { |
1895 | 0 | a = 0; |
1896 | |
|
1897 | 0 | b = 1 << (internalBitDepth - 1); |
1898 | |
|
1899 | 0 | iShift = 0; |
1900 | 0 | } |
1901 | 0 | } |
1902 | | |
1903 | | void IntraPrediction::initIntraMip( const CodingUnit &cu, const CompArea &area ) |
1904 | 0 | { |
1905 | 0 | CHECK( area.width > MIP_MAX_WIDTH || area.height > MIP_MAX_HEIGHT, "Error: block size not supported for MIP" ); |
1906 | | |
1907 | | // prepare input (boundary) data for prediction |
1908 | | // CHECK( m_ipaParam.refFilterFlag, "ERROR: unfiltered refs expected for MIP" ); |
1909 | 0 | Pel *ptrSrc = getPredictorPtr( area.compID() ); |
1910 | 0 | const int srcStride = m_topRefLength + 1; //TODO: check this if correct |
1911 | 0 | const int srcHStride = m_leftRefLength + 1; |
1912 | |
|
1913 | 0 | m_matrixIntraPred.prepareInputForPred( CPelBuf( ptrSrc, srcStride, srcHStride ), area, cu.sps->getBitDepth(), area.compID() ); |
1914 | 0 | } |
1915 | | |
1916 | | void IntraPrediction::predIntraMip( const ComponentID compId, PelBuf &piPred, const CodingUnit &cu ) |
1917 | 0 | { |
1918 | 0 | CHECK( piPred.width > MIP_MAX_WIDTH || piPred.height > MIP_MAX_HEIGHT, "Error: block size not supported for MIP" ); |
1919 | 0 | CHECK( piPred.width != (1 << getLog2(piPred.width)) || piPred.height != (1 << getLog2(piPred.height)), "Error: expecting blocks of size 2^M x 2^N" ); |
1920 | | |
1921 | | // generate mode-specific prediction |
1922 | 0 | uint32_t modeIdx = MAX_NUM_MIP_MODE; |
1923 | 0 | bool transposeFlag = false; |
1924 | 0 | if( compId == COMPONENT_Y ) |
1925 | 0 | { |
1926 | 0 | modeIdx = cu.intraDir[CHANNEL_TYPE_LUMA]; |
1927 | 0 | transposeFlag = cu.mipTransposedFlag(); |
1928 | 0 | } |
1929 | 0 | else |
1930 | 0 | { |
1931 | 0 | const CodingUnit &coLocatedLumaPU = PU::getCoLocatedLumaPU(cu); |
1932 | |
|
1933 | 0 | CHECK(cu.intraDir[CHANNEL_TYPE_CHROMA] != DM_CHROMA_IDX, "Error: MIP is only supported for chroma with DM_CHROMA."); |
1934 | 0 | CHECK(!coLocatedLumaPU.mipFlag(), "Error: Co-located luma CU should use MIP."); |
1935 | |
|
1936 | 0 | modeIdx = coLocatedLumaPU.intraDir[CHANNEL_TYPE_LUMA]; |
1937 | 0 | transposeFlag = coLocatedLumaPU.mipTransposedFlag(); |
1938 | 0 | } |
1939 | | |
1940 | 0 | CHECK(modeIdx >= getNumModesMip(piPred), "Error: Wrong MIP mode index"); |
1941 | |
|
1942 | 0 | const int bitDepth = cu.sps->getBitDepth(); |
1943 | 0 | m_matrixIntraPred.predBlock( piPred, modeIdx, piPred, transposeFlag, bitDepth, compId, m_piYuvExt[0] ); |
1944 | 0 | } |
1945 | | |
1946 | | } |