/work/vvenc/source/Lib/CommonLib/MatrixIntraPrediction.cpp
Line | Count | Source |
1 | | /* ----------------------------------------------------------------------------- |
2 | | The copyright in this software is being made available under the Clear BSD |
3 | | License, included below. No patent rights, trademark rights and/or |
4 | | other Intellectual Property Rights other than the copyrights concerning |
5 | | the Software are granted under this license. |
6 | | |
7 | | The Clear BSD License |
8 | | |
9 | | Copyright (c) 2019-2026, Fraunhofer-Gesellschaft zur Förderung der angewandten Forschung e.V. & The VVenC Authors. |
10 | | All rights reserved. |
11 | | |
12 | | Redistribution and use in source and binary forms, with or without modification, |
13 | | are permitted (subject to the limitations in the disclaimer below) provided that |
14 | | the following conditions are met: |
15 | | |
16 | | * Redistributions of source code must retain the above copyright notice, |
17 | | this list of conditions and the following disclaimer. |
18 | | |
19 | | * Redistributions in binary form must reproduce the above copyright |
20 | | notice, this list of conditions and the following disclaimer in the |
21 | | documentation and/or other materials provided with the distribution. |
22 | | |
23 | | * Neither the name of the copyright holder nor the names of its |
24 | | contributors may be used to endorse or promote products derived from this |
25 | | software without specific prior written permission. |
26 | | |
27 | | NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE GRANTED BY |
28 | | THIS LICENSE. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND |
29 | | CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
30 | | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A |
31 | | PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR |
32 | | CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, |
33 | | EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, |
34 | | PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR |
35 | | BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER |
36 | | IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
37 | | ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE |
38 | | POSSIBILITY OF SUCH DAMAGE. |
39 | | |
40 | | |
41 | | ------------------------------------------------------------------------------------------- */ |
42 | | |
43 | | |
44 | | /** \file MatrixIntraPrediction.cpp |
45 | | \brief matrix-based intra prediction class |
46 | | */ |
47 | | |
48 | | |
49 | | #include "MatrixIntraPrediction.h" |
50 | | #include "dtrace_next.h" |
51 | | |
52 | | #include "UnitTools.h" |
53 | | #include "MipData.h" |
54 | | |
55 | | namespace vvenc { |
56 | | |
57 | | static const int MIP_MAX_INPUT_SIZE = 8; |
58 | | static const int MIP_MAX_REDUCED_OUTPUT_SAMPLES = 64; |
59 | | |
60 | | MatrixIntraPrediction::MatrixIntraPrediction() |
61 | 17.3k | : m_reducedBoundary (nullptr) |
62 | 17.3k | , m_reducedBoundaryTransp (nullptr) |
63 | 17.3k | , m_inputOffset ( 0 ) |
64 | 17.3k | , m_inputOffsetTransp ( 0 ) |
65 | 17.3k | , m_refSamplesTop (nullptr) |
66 | 17.3k | , m_refSamplesLeft (nullptr) |
67 | 17.3k | , m_blockSize ( 0, 0 ) |
68 | 17.3k | , m_sizeId ( 0 ) |
69 | 17.3k | , m_reducedBdrySize ( 0 ) |
70 | 17.3k | , m_reducedPredSize ( 0 ) |
71 | 17.3k | , m_upsmpFactorHor ( 0 ) |
72 | 17.3k | , m_upsmpFactorVer ( 0 ) |
73 | 17.3k | { |
74 | 17.3k | m_reducedBoundary = (Pel*)xMalloc( Pel, MIP_MAX_INPUT_SIZE ); |
75 | 17.3k | m_reducedBoundaryTransp = (Pel*)xMalloc( Pel, MIP_MAX_INPUT_SIZE ); |
76 | 17.3k | } |
77 | | |
78 | | MatrixIntraPrediction::~MatrixIntraPrediction() |
79 | 17.3k | { |
80 | 17.3k | xFree( m_reducedBoundary ); m_reducedBoundary = nullptr; |
81 | 17.3k | xFree( m_reducedBoundaryTransp ); m_reducedBoundaryTransp = nullptr; |
82 | 17.3k | } |
83 | | |
84 | | void MatrixIntraPrediction::prepareInputForPred(const CPelBuf &pSrc, const Area& block, const int bitDepth) |
85 | 38.6k | { |
86 | | // Step 1: Save block size and calculate dependent values |
87 | 38.6k | initPredBlockParams(block); |
88 | | |
89 | 38.6k | m_refSamplesTop = pSrc.bufAt(1, 0); |
90 | 38.6k | m_refSamplesLeft = pSrc.bufAt(1, 1); |
91 | | |
92 | | // Step 3: Compute the reduced boundary via Haar-downsampling (input for the prediction) |
93 | 38.6k | const int inputSize = 2 * m_reducedBdrySize; |
94 | | |
95 | 38.6k | Pel* const topReduced = m_reducedBoundary; |
96 | 38.6k | boundaryDownsampling1D( topReduced, m_refSamplesTop, block.width, m_reducedBdrySize ); |
97 | | |
98 | 38.6k | Pel* const leftReduced = m_reducedBoundary + m_reducedBdrySize; |
99 | 38.6k | boundaryDownsampling1D( leftReduced, m_refSamplesLeft, block.height, m_reducedBdrySize ); |
100 | | |
101 | 38.6k | Pel* const leftReducedTransposed = m_reducedBoundaryTransp; |
102 | 38.6k | Pel* const topReducedTransposed = m_reducedBoundaryTransp + m_reducedBdrySize; |
103 | 193k | for( int x = 0; x < m_reducedBdrySize; x++ ) |
104 | 154k | { |
105 | 154k | topReducedTransposed[x] = topReduced[x]; |
106 | 154k | } |
107 | 193k | for( int y = 0; y < m_reducedBdrySize; y++ ) |
108 | 154k | { |
109 | 154k | leftReducedTransposed[y] = leftReduced[y]; |
110 | 154k | } |
111 | | |
112 | | // Step 4: Rebase the reduced boundary |
113 | 38.6k | m_inputOffset = m_reducedBoundary[0]; |
114 | 38.6k | m_inputOffsetTransp = m_reducedBoundaryTransp[0]; |
115 | | |
116 | 38.6k | const bool hasFirstCol = (m_sizeId < 2); |
117 | 38.6k | m_reducedBoundary [0] = hasFirstCol ? ((1 << (bitDepth - 1)) - m_inputOffset ) : 0; // first column of matrix not needed for large blocks |
118 | 38.6k | m_reducedBoundaryTransp[0] = hasFirstCol ? ((1 << (bitDepth - 1)) - m_inputOffsetTransp) : 0; |
119 | 309k | for (int i = 1; i < inputSize; i++) |
120 | 270k | { |
121 | 270k | m_reducedBoundary [i] -= m_inputOffset; |
122 | 270k | m_reducedBoundaryTransp[i] -= m_inputOffsetTransp; |
123 | 270k | } |
124 | 38.6k | } |
125 | | |
126 | | void MatrixIntraPrediction::predBlock(Pel* const result, const int modeIdx, const bool transpose, const int bitDepth) |
127 | 231k | { |
128 | 231k | ALIGN_DATA( MEMORY_ALIGN_DEF_SIZE, Pel bufReducedPred[MIP_MAX_REDUCED_OUTPUT_SAMPLES] ); |
129 | | |
130 | 231k | const bool needUpsampling = ( m_upsmpFactorHor > 1 ) || ( m_upsmpFactorVer > 1 ); |
131 | 231k | Pel* const reducedPred = needUpsampling ? bufReducedPred : result; |
132 | 231k | const Pel* const reducedBoundary = transpose ? m_reducedBoundaryTransp : m_reducedBoundary; |
133 | | |
134 | 231k | { |
135 | 231k | const int outputSize = m_reducedPredSize; |
136 | 231k | const int inputSize = 2 * m_reducedBdrySize; |
137 | 231k | const int offset = transpose ? m_inputOffsetTransp : m_inputOffset; |
138 | 231k | const int maxVal = ( 1 << bitDepth ) - 1; |
139 | | |
140 | 231k | if( outputSize == 8) |
141 | 225k | { |
142 | 225k | g_pelBufOP.mipMatrixMul_8_8( reducedPred, reducedBoundary, &mipMatrix16x16[modeIdx][0][0], maxVal, offset, transpose ); |
143 | 225k | } |
144 | 5.66k | else |
145 | 5.66k | { |
146 | 5.66k | if( inputSize == 4) |
147 | 0 | { |
148 | 0 | g_pelBufOP.mipMatrixMul_4_4( reducedPred, reducedBoundary, &mipMatrix4x4[modeIdx][0][0], maxVal, offset, transpose ); |
149 | 0 | } |
150 | 5.66k | else |
151 | 5.66k | { |
152 | 5.66k | g_pelBufOP.mipMatrixMul_8_4( reducedPred, reducedBoundary, &mipMatrix8x8[modeIdx][0][0], maxVal, offset, transpose ); |
153 | 5.66k | } |
154 | 5.66k | } |
155 | 231k | } |
156 | | |
157 | | // Reduced prediction is transposed if ( transpose && needUpsampling ). |
158 | 231k | if( needUpsampling ) |
159 | 231k | { |
160 | 231k | const Pel* verSrc = reducedPred; |
161 | 231k | SizeType verSrcStep = m_blockSize.width; |
162 | | |
163 | 231k | if( m_upsmpFactorHor > 1 ) |
164 | 223k | { |
165 | 223k | Pel* const horDst = result + (m_upsmpFactorVer - 1) * m_blockSize.width; |
166 | 223k | verSrc = horDst; |
167 | 223k | verSrcStep *= m_upsmpFactorVer; |
168 | | |
169 | 223k | if( m_reducedPredSize == 4) |
170 | 5.66k | { |
171 | 5.66k | if( m_upsmpFactorHor == 2 ) |
172 | 5.66k | predictionUpsampling1DHor<4,1>( horDst, reducedPred, &m_refSamplesLeft[0], verSrcStep, m_upsmpFactorVer ); |
173 | 0 | else if( m_upsmpFactorHor == 4 ) |
174 | 0 | predictionUpsampling1DHor<4,2>( horDst, reducedPred, &m_refSamplesLeft[0], verSrcStep, m_upsmpFactorVer ); |
175 | 0 | else |
176 | 0 | predictionUpsampling1DHor<4,3>( horDst, reducedPred, &m_refSamplesLeft[0], verSrcStep, m_upsmpFactorVer ); |
177 | 5.66k | } |
178 | 217k | else |
179 | 217k | { |
180 | 217k | if( m_upsmpFactorHor == 2 ) |
181 | 72.5k | predictionUpsampling1DHor<8,1>( horDst, reducedPred, &m_refSamplesLeft[0], verSrcStep, m_upsmpFactorVer ); |
182 | 144k | else if( m_upsmpFactorHor == 4 ) |
183 | 88.0k | predictionUpsampling1DHor<8,2>( horDst, reducedPred, &m_refSamplesLeft[0], verSrcStep, m_upsmpFactorVer ); |
184 | 56.8k | else |
185 | 56.8k | predictionUpsampling1DHor<8,3>( horDst, reducedPred, &m_refSamplesLeft[0], verSrcStep, m_upsmpFactorVer ); |
186 | 217k | } |
187 | 223k | } |
188 | | |
189 | 231k | if( m_upsmpFactorVer > 1 ) |
190 | 221k | { |
191 | 221k | if( m_reducedPredSize == 4) |
192 | 5.66k | { |
193 | 5.66k | if( m_upsmpFactorVer == 2 ) |
194 | 5.66k | predictionUpsampling1DVer<4,1>( result, verSrc, &m_refSamplesTop[0], m_blockSize.width, verSrcStep ); |
195 | 0 | else if( m_upsmpFactorVer == 4 ) |
196 | 0 | predictionUpsampling1DVer<4,2>( result, verSrc, &m_refSamplesTop[0], m_blockSize.width, verSrcStep ); |
197 | 0 | else |
198 | 0 | predictionUpsampling1DVer<4,3>( result, verSrc, &m_refSamplesTop[0], m_blockSize.width, verSrcStep ); |
199 | 5.66k | } |
200 | 215k | else |
201 | 215k | { |
202 | 215k | if( m_upsmpFactorVer == 2 ) |
203 | 68.7k | predictionUpsampling1DVer<8,1>( result, verSrc, &m_refSamplesTop[0], m_blockSize.width, verSrcStep ); |
204 | 147k | else if( m_upsmpFactorVer == 4 ) |
205 | 90.1k | predictionUpsampling1DVer<8,2>( result, verSrc, &m_refSamplesTop[0], m_blockSize.width, verSrcStep ); |
206 | 56.8k | else |
207 | 56.8k | predictionUpsampling1DVer<8,3>( result, verSrc, &m_refSamplesTop[0], m_blockSize.width, verSrcStep ); |
208 | 215k | } |
209 | 221k | } |
210 | 231k | } |
211 | 231k | } |
212 | | |
213 | | void MatrixIntraPrediction::initPredBlockParams(const Size& block) |
214 | 38.6k | { |
215 | 38.6k | m_blockSize = block; |
216 | | // init size index |
217 | 38.6k | m_sizeId = getMipSizeId( m_blockSize ); |
218 | | |
219 | | // init reduced boundary size |
220 | 38.6k | m_reducedBdrySize = (m_sizeId == 0) ? 2 : 4; |
221 | | |
222 | | // init reduced prediction size |
223 | 38.6k | m_reducedPredSize = ( m_sizeId < 2 ) ? 4 : 8; |
224 | | |
225 | | // init upsampling factors |
226 | 38.6k | m_upsmpFactorHor = m_blockSize.width / m_reducedPredSize; |
227 | 38.6k | m_upsmpFactorVer = m_blockSize.height / m_reducedPredSize; |
228 | | |
229 | 38.6k | CHECKD( (m_upsmpFactorHor < 1) || ((m_upsmpFactorHor & (m_upsmpFactorHor - 1)) != 0), "Need power of two horizontal upsampling factor." ); |
230 | 38.6k | CHECKD( (m_upsmpFactorVer < 1) || ((m_upsmpFactorVer & (m_upsmpFactorVer - 1)) != 0), "Need power of two vertical upsampling factor." ); |
231 | 38.6k | } |
232 | | |
233 | | void MatrixIntraPrediction::boundaryDownsampling1D(Pel* reducedDst, const Pel* const fullSrc, const SizeType srcLen, const SizeType dstLen) |
234 | 77.3k | { |
235 | 77.3k | if (dstLen < srcLen) |
236 | 77.3k | { |
237 | | // Create reduced boundary by downsampling |
238 | 77.3k | const SizeType downsmpFactor = srcLen / dstLen; |
239 | 77.3k | const int log2DownsmpFactor = floorLog2(downsmpFactor); |
240 | 77.3k | const int roundingOffset = (1 << (log2DownsmpFactor - 1)); |
241 | | |
242 | 77.3k | SizeType srcIdx = 0; |
243 | 386k | for( SizeType dstIdx = 0; dstIdx < dstLen; dstIdx++ ) |
244 | 309k | { |
245 | 309k | int sum = 0; |
246 | 2.98M | for( int k = 0; k < downsmpFactor; k++ ) |
247 | 2.67M | { |
248 | 2.67M | sum += fullSrc[srcIdx++]; |
249 | 2.67M | } |
250 | 309k | reducedDst[dstIdx] = (sum + roundingOffset) >> log2DownsmpFactor; |
251 | 309k | } |
252 | 77.3k | } |
253 | 0 | else |
254 | 0 | { |
255 | | // Copy boundary if no downsampling is needed |
256 | 0 | for (SizeType i = 0; i < dstLen; ++i) |
257 | 0 | { |
258 | 0 | reducedDst[i] = fullSrc[i]; |
259 | 0 | } |
260 | 0 | } |
261 | 77.3k | } |
262 | | |
263 | | template< SizeType predPredSize, unsigned log2UpsmpFactor> |
264 | | void MatrixIntraPrediction::predictionUpsampling1DHor(Pel* const dst, const Pel* const src, const Pel* const bndry, const SizeType dstStride, const SizeType bndryStep ) |
265 | 223k | { |
266 | 223k | const int roundingOffset = 1 << (log2UpsmpFactor - 1); |
267 | 223k | const SizeType upsmpFactor = 1 << log2UpsmpFactor; |
268 | | |
269 | 223k | Pel* dstLine = dst; |
270 | 223k | const Pel* srcLine = src; |
271 | 223k | const Pel* bndryLine = bndry + bndryStep - 1; |
272 | | |
273 | 1.98M | for( SizeType idxOrthDim = 0; idxOrthDim < predPredSize; idxOrthDim++ ) |
274 | 1.76M | { |
275 | 1.76M | const Pel* before = bndryLine; |
276 | 1.76M | const Pel* behind = srcLine; |
277 | 1.76M | Pel* currDst = dstLine; |
278 | 15.7M | for( SizeType idxUpsmpDim = 0; idxUpsmpDim < predPredSize; idxUpsmpDim++ ) |
279 | 14.0M | { |
280 | 14.0M | const int valDiff = *behind - *before; |
281 | 14.0M | int scaledVal = ( ( *before ) << log2UpsmpFactor ) + roundingOffset; |
282 | 75.1M | for( SizeType pos = 0; pos < upsmpFactor; pos++) |
283 | 61.1M | { |
284 | 61.1M | scaledVal += valDiff; |
285 | 61.1M | *currDst = scaledVal >> log2UpsmpFactor; |
286 | 61.1M | currDst++; |
287 | 61.1M | } |
288 | 14.0M | before = behind; |
289 | 14.0M | behind ++; |
290 | 14.0M | } |
291 | | |
292 | 1.76M | srcLine += predPredSize; |
293 | 1.76M | dstLine += dstStride; |
294 | 1.76M | bndryLine += bndryStep; |
295 | 1.76M | } |
296 | 223k | } void vvenc::MatrixIntraPrediction::predictionUpsampling1DHor<4u, 1u>(short*, short const*, short const*, unsigned int, unsigned int) Line | Count | Source | 265 | 5.66k | { | 266 | 5.66k | const int roundingOffset = 1 << (log2UpsmpFactor - 1); | 267 | 5.66k | const SizeType upsmpFactor = 1 << log2UpsmpFactor; | 268 | | | 269 | 5.66k | Pel* dstLine = dst; | 270 | 5.66k | const Pel* srcLine = src; | 271 | 5.66k | const Pel* bndryLine = bndry + bndryStep - 1; | 272 | | | 273 | 28.3k | for( SizeType idxOrthDim = 0; idxOrthDim < predPredSize; idxOrthDim++ ) | 274 | 22.6k | { | 275 | 22.6k | const Pel* before = bndryLine; | 276 | 22.6k | const Pel* behind = srcLine; | 277 | 22.6k | Pel* currDst = dstLine; | 278 | 113k | for( SizeType idxUpsmpDim = 0; idxUpsmpDim < predPredSize; idxUpsmpDim++ ) | 279 | 90.6k | { | 280 | 90.6k | const int valDiff = *behind - *before; | 281 | 90.6k | int scaledVal = ( ( *before ) << log2UpsmpFactor ) + roundingOffset; | 282 | 271k | for( SizeType pos = 0; pos < upsmpFactor; pos++) | 283 | 181k | { | 284 | 181k | scaledVal += valDiff; | 285 | 181k | *currDst = scaledVal >> log2UpsmpFactor; | 286 | 181k | currDst++; | 287 | 181k | } | 288 | 90.6k | before = behind; | 289 | 90.6k | behind ++; | 290 | 90.6k | } | 291 | | | 292 | 22.6k | srcLine += predPredSize; | 293 | 22.6k | dstLine += dstStride; | 294 | 22.6k | bndryLine += bndryStep; | 295 | 22.6k | } | 296 | 5.66k | } |
Unexecuted instantiation: void vvenc::MatrixIntraPrediction::predictionUpsampling1DHor<4u, 2u>(short*, short const*, short const*, unsigned int, unsigned int) Unexecuted instantiation: void vvenc::MatrixIntraPrediction::predictionUpsampling1DHor<4u, 3u>(short*, short const*, short const*, unsigned int, unsigned int) void vvenc::MatrixIntraPrediction::predictionUpsampling1DHor<8u, 1u>(short*, short const*, short const*, unsigned int, unsigned int) Line | Count | Source | 265 | 72.5k | { | 266 | 72.5k | const int roundingOffset = 1 << (log2UpsmpFactor - 1); | 267 | 72.5k | const SizeType upsmpFactor = 1 << log2UpsmpFactor; | 268 | | | 269 | 72.5k | Pel* dstLine = dst; | 270 | 72.5k | const Pel* srcLine = src; | 271 | 72.5k | const Pel* bndryLine = bndry + bndryStep - 1; | 272 | | | 273 | 652k | for( SizeType idxOrthDim = 0; idxOrthDim < predPredSize; idxOrthDim++ ) | 274 | 580k | { | 275 | 580k | const Pel* before = bndryLine; | 276 | 580k | const Pel* behind = srcLine; | 277 | 580k | Pel* currDst = dstLine; | 278 | 5.22M | for( SizeType idxUpsmpDim = 0; idxUpsmpDim < predPredSize; idxUpsmpDim++ ) | 279 | 4.64M | { | 280 | 4.64M | const int valDiff = *behind - *before; | 281 | 4.64M | int scaledVal = ( ( *before ) << log2UpsmpFactor ) + roundingOffset; | 282 | 13.9M | for( SizeType pos = 0; pos < upsmpFactor; pos++) | 283 | 9.28M | { | 284 | 9.28M | scaledVal += valDiff; | 285 | 9.28M | *currDst = scaledVal >> log2UpsmpFactor; | 286 | 9.28M | currDst++; | 287 | 9.28M | } | 288 | 4.64M | before = behind; | 289 | 4.64M | behind ++; | 290 | 4.64M | } | 291 | | | 292 | 580k | srcLine += predPredSize; | 293 | 580k | dstLine += dstStride; | 294 | 580k | bndryLine += bndryStep; | 295 | 580k | } | 296 | 72.5k | } |
void vvenc::MatrixIntraPrediction::predictionUpsampling1DHor<8u, 2u>(short*, short const*, short const*, unsigned int, unsigned int) Line | Count | Source | 265 | 88.0k | { | 266 | 88.0k | const int roundingOffset = 1 << (log2UpsmpFactor - 1); | 267 | 88.0k | const SizeType upsmpFactor = 1 << log2UpsmpFactor; | 268 | | | 269 | 88.0k | Pel* dstLine = dst; | 270 | 88.0k | const Pel* srcLine = src; | 271 | 88.0k | const Pel* bndryLine = bndry + bndryStep - 1; | 272 | | | 273 | 792k | for( SizeType idxOrthDim = 0; idxOrthDim < predPredSize; idxOrthDim++ ) | 274 | 704k | { | 275 | 704k | const Pel* before = bndryLine; | 276 | 704k | const Pel* behind = srcLine; | 277 | 704k | Pel* currDst = dstLine; | 278 | 6.34M | for( SizeType idxUpsmpDim = 0; idxUpsmpDim < predPredSize; idxUpsmpDim++ ) | 279 | 5.63M | { | 280 | 5.63M | const int valDiff = *behind - *before; | 281 | 5.63M | int scaledVal = ( ( *before ) << log2UpsmpFactor ) + roundingOffset; | 282 | 28.1M | for( SizeType pos = 0; pos < upsmpFactor; pos++) | 283 | 22.5M | { | 284 | 22.5M | scaledVal += valDiff; | 285 | 22.5M | *currDst = scaledVal >> log2UpsmpFactor; | 286 | 22.5M | currDst++; | 287 | 22.5M | } | 288 | 5.63M | before = behind; | 289 | 5.63M | behind ++; | 290 | 5.63M | } | 291 | | | 292 | 704k | srcLine += predPredSize; | 293 | 704k | dstLine += dstStride; | 294 | 704k | bndryLine += bndryStep; | 295 | 704k | } | 296 | 88.0k | } |
void vvenc::MatrixIntraPrediction::predictionUpsampling1DHor<8u, 3u>(short*, short const*, short const*, unsigned int, unsigned int) Line | Count | Source | 265 | 56.8k | { | 266 | 56.8k | const int roundingOffset = 1 << (log2UpsmpFactor - 1); | 267 | 56.8k | const SizeType upsmpFactor = 1 << log2UpsmpFactor; | 268 | | | 269 | 56.8k | Pel* dstLine = dst; | 270 | 56.8k | const Pel* srcLine = src; | 271 | 56.8k | const Pel* bndryLine = bndry + bndryStep - 1; | 272 | | | 273 | 511k | for( SizeType idxOrthDim = 0; idxOrthDim < predPredSize; idxOrthDim++ ) | 274 | 454k | { | 275 | 454k | const Pel* before = bndryLine; | 276 | 454k | const Pel* behind = srcLine; | 277 | 454k | Pel* currDst = dstLine; | 278 | 4.09M | for( SizeType idxUpsmpDim = 0; idxUpsmpDim < predPredSize; idxUpsmpDim++ ) | 279 | 3.63M | { | 280 | 3.63M | const int valDiff = *behind - *before; | 281 | 3.63M | int scaledVal = ( ( *before ) << log2UpsmpFactor ) + roundingOffset; | 282 | 32.7M | for( SizeType pos = 0; pos < upsmpFactor; pos++) | 283 | 29.1M | { | 284 | 29.1M | scaledVal += valDiff; | 285 | 29.1M | *currDst = scaledVal >> log2UpsmpFactor; | 286 | 29.1M | currDst++; | 287 | 29.1M | } | 288 | 3.63M | before = behind; | 289 | 3.63M | behind ++; | 290 | 3.63M | } | 291 | | | 292 | 454k | srcLine += predPredSize; | 293 | 454k | dstLine += dstStride; | 294 | 454k | bndryLine += bndryStep; | 295 | 454k | } | 296 | 56.8k | } |
|
297 | | |
298 | | template< SizeType inHeight, unsigned log2UpsmpFactor> |
299 | | void MatrixIntraPrediction::predictionUpsampling1DVer(Pel* const dst, const Pel* const src, const Pel* const bndry, const SizeType outWidth, const SizeType srcStep ) |
300 | 221k | { |
301 | 221k | const int roundingOffset = 1 << (log2UpsmpFactor - 1); |
302 | 221k | const SizeType upsmpFactor = 1 << log2UpsmpFactor; |
303 | | |
304 | 221k | Pel* dstLine = dst; |
305 | 221k | const Pel* srcLine = src; |
306 | 221k | const Pel* bndryLine = bndry; |
307 | | |
308 | 7.79M | for( SizeType idxOrthDim = 0; idxOrthDim < outWidth; idxOrthDim++ ) |
309 | 7.56M | { |
310 | 7.56M | const Pel* before = bndryLine; |
311 | 7.56M | const Pel* behind = srcLine; |
312 | 7.56M | Pel* currDst = dstLine; |
313 | 67.9M | for( SizeType idxUpsmpDim = 0; idxUpsmpDim < inHeight; idxUpsmpDim++ ) |
314 | 60.3M | { |
315 | 60.3M | const int valDiff = *behind - *before; |
316 | 60.3M | int scaledVal = ( ( *before ) << log2UpsmpFactor ) + roundingOffset; |
317 | | |
318 | 391M | for( SizeType pos = 0; pos < upsmpFactor; pos++) |
319 | 331M | { |
320 | 331M | scaledVal += valDiff; |
321 | 331M | *currDst = scaledVal >> log2UpsmpFactor; |
322 | 331M | currDst += outWidth; |
323 | 331M | } |
324 | 60.3M | before = behind; |
325 | 60.3M | behind += srcStep; |
326 | 60.3M | } |
327 | | |
328 | 7.56M | srcLine ++; |
329 | 7.56M | dstLine ++; |
330 | 7.56M | bndryLine ++; |
331 | 7.56M | } |
332 | 221k | } void vvenc::MatrixIntraPrediction::predictionUpsampling1DVer<4u, 1u>(short*, short const*, short const*, unsigned int, unsigned int) Line | Count | Source | 300 | 5.66k | { | 301 | 5.66k | const int roundingOffset = 1 << (log2UpsmpFactor - 1); | 302 | 5.66k | const SizeType upsmpFactor = 1 << log2UpsmpFactor; | 303 | | | 304 | 5.66k | Pel* dstLine = dst; | 305 | 5.66k | const Pel* srcLine = src; | 306 | 5.66k | const Pel* bndryLine = bndry; | 307 | | | 308 | 50.9k | for( SizeType idxOrthDim = 0; idxOrthDim < outWidth; idxOrthDim++ ) | 309 | 45.3k | { | 310 | 45.3k | const Pel* before = bndryLine; | 311 | 45.3k | const Pel* behind = srcLine; | 312 | 45.3k | Pel* currDst = dstLine; | 313 | 226k | for( SizeType idxUpsmpDim = 0; idxUpsmpDim < inHeight; idxUpsmpDim++ ) | 314 | 181k | { | 315 | 181k | const int valDiff = *behind - *before; | 316 | 181k | int scaledVal = ( ( *before ) << log2UpsmpFactor ) + roundingOffset; | 317 | | | 318 | 543k | for( SizeType pos = 0; pos < upsmpFactor; pos++) | 319 | 362k | { | 320 | 362k | scaledVal += valDiff; | 321 | 362k | *currDst = scaledVal >> log2UpsmpFactor; | 322 | 362k | currDst += outWidth; | 323 | 362k | } | 324 | 181k | before = behind; | 325 | 181k | behind += srcStep; | 326 | 181k | } | 327 | | | 328 | 45.3k | srcLine ++; | 329 | 45.3k | dstLine ++; | 330 | 45.3k | bndryLine ++; | 331 | 45.3k | } | 332 | 5.66k | } |
Unexecuted instantiation: void vvenc::MatrixIntraPrediction::predictionUpsampling1DVer<4u, 2u>(short*, short const*, short const*, unsigned int, unsigned int) Unexecuted instantiation: void vvenc::MatrixIntraPrediction::predictionUpsampling1DVer<4u, 3u>(short*, short const*, short const*, unsigned int, unsigned int) void vvenc::MatrixIntraPrediction::predictionUpsampling1DVer<8u, 1u>(short*, short const*, short const*, unsigned int, unsigned int) Line | Count | Source | 300 | 68.7k | { | 301 | 68.7k | const int roundingOffset = 1 << (log2UpsmpFactor - 1); | 302 | 68.7k | const SizeType upsmpFactor = 1 << log2UpsmpFactor; | 303 | | | 304 | 68.7k | Pel* dstLine = dst; | 305 | 68.7k | const Pel* srcLine = src; | 306 | 68.7k | const Pel* bndryLine = bndry; | 307 | | | 308 | 1.71M | for( SizeType idxOrthDim = 0; idxOrthDim < outWidth; idxOrthDim++ ) | 309 | 1.65M | { | 310 | 1.65M | const Pel* before = bndryLine; | 311 | 1.65M | const Pel* behind = srcLine; | 312 | 1.65M | Pel* currDst = dstLine; | 313 | 14.8M | for( SizeType idxUpsmpDim = 0; idxUpsmpDim < inHeight; idxUpsmpDim++ ) | 314 | 13.2M | { | 315 | 13.2M | const int valDiff = *behind - *before; | 316 | 13.2M | int scaledVal = ( ( *before ) << log2UpsmpFactor ) + roundingOffset; | 317 | | | 318 | 39.6M | for( SizeType pos = 0; pos < upsmpFactor; pos++) | 319 | 26.4M | { | 320 | 26.4M | scaledVal += valDiff; | 321 | 26.4M | *currDst = scaledVal >> log2UpsmpFactor; | 322 | 26.4M | currDst += outWidth; | 323 | 26.4M | } | 324 | 13.2M | before = behind; | 325 | 13.2M | behind += srcStep; | 326 | 13.2M | } | 327 | | | 328 | 1.65M | srcLine ++; | 329 | 1.65M | dstLine ++; | 330 | 1.65M | bndryLine ++; | 331 | 1.65M | } | 332 | 68.7k | } |
void vvenc::MatrixIntraPrediction::predictionUpsampling1DVer<8u, 2u>(short*, short const*, short const*, unsigned int, unsigned int) Line | Count | Source | 300 | 90.1k | { | 301 | 90.1k | const int roundingOffset = 1 << (log2UpsmpFactor - 1); | 302 | 90.1k | const SizeType upsmpFactor = 1 << log2UpsmpFactor; | 303 | | | 304 | 90.1k | Pel* dstLine = dst; | 305 | 90.1k | const Pel* srcLine = src; | 306 | 90.1k | const Pel* bndryLine = bndry; | 307 | | | 308 | 2.32M | for( SizeType idxOrthDim = 0; idxOrthDim < outWidth; idxOrthDim++ ) | 309 | 2.23M | { | 310 | 2.23M | const Pel* before = bndryLine; | 311 | 2.23M | const Pel* behind = srcLine; | 312 | 2.23M | Pel* currDst = dstLine; | 313 | 20.1M | for( SizeType idxUpsmpDim = 0; idxUpsmpDim < inHeight; idxUpsmpDim++ ) | 314 | 17.8M | { | 315 | 17.8M | const int valDiff = *behind - *before; | 316 | 17.8M | int scaledVal = ( ( *before ) << log2UpsmpFactor ) + roundingOffset; | 317 | | | 318 | 89.3M | for( SizeType pos = 0; pos < upsmpFactor; pos++) | 319 | 71.4M | { | 320 | 71.4M | scaledVal += valDiff; | 321 | 71.4M | *currDst = scaledVal >> log2UpsmpFactor; | 322 | 71.4M | currDst += outWidth; | 323 | 71.4M | } | 324 | 17.8M | before = behind; | 325 | 17.8M | behind += srcStep; | 326 | 17.8M | } | 327 | | | 328 | 2.23M | srcLine ++; | 329 | 2.23M | dstLine ++; | 330 | 2.23M | bndryLine ++; | 331 | 2.23M | } | 332 | 90.1k | } |
void vvenc::MatrixIntraPrediction::predictionUpsampling1DVer<8u, 3u>(short*, short const*, short const*, unsigned int, unsigned int) Line | Count | Source | 300 | 56.8k | { | 301 | 56.8k | const int roundingOffset = 1 << (log2UpsmpFactor - 1); | 302 | 56.8k | const SizeType upsmpFactor = 1 << log2UpsmpFactor; | 303 | | | 304 | 56.8k | Pel* dstLine = dst; | 305 | 56.8k | const Pel* srcLine = src; | 306 | 56.8k | const Pel* bndryLine = bndry; | 307 | | | 308 | 3.69M | for( SizeType idxOrthDim = 0; idxOrthDim < outWidth; idxOrthDim++ ) | 309 | 3.63M | { | 310 | 3.63M | const Pel* before = bndryLine; | 311 | 3.63M | const Pel* behind = srcLine; | 312 | 3.63M | Pel* currDst = dstLine; | 313 | 32.7M | for( SizeType idxUpsmpDim = 0; idxUpsmpDim < inHeight; idxUpsmpDim++ ) | 314 | 29.1M | { | 315 | 29.1M | const int valDiff = *behind - *before; | 316 | 29.1M | int scaledVal = ( ( *before ) << log2UpsmpFactor ) + roundingOffset; | 317 | | | 318 | 261M | for( SizeType pos = 0; pos < upsmpFactor; pos++) | 319 | 232M | { | 320 | 232M | scaledVal += valDiff; | 321 | 232M | *currDst = scaledVal >> log2UpsmpFactor; | 322 | 232M | currDst += outWidth; | 323 | 232M | } | 324 | 29.1M | before = behind; | 325 | 29.1M | behind += srcStep; | 326 | 29.1M | } | 327 | | | 328 | 3.63M | srcLine ++; | 329 | 3.63M | dstLine ++; | 330 | 3.63M | bndryLine ++; | 331 | 3.63M | } | 332 | 56.8k | } |
|
333 | | |
334 | | |
335 | | } // namespace vvenc |
336 | | |
337 | | //! \} |