/src/vvenc/source/Lib/CommonLib/Buffer.cpp
Line | Count | Source |
1 | | /* ----------------------------------------------------------------------------- |
2 | | The copyright in this software is being made available under the Clear BSD |
3 | | License, included below. No patent rights, trademark rights and/or |
4 | | other Intellectual Property Rights other than the copyrights concerning |
5 | | the Software are granted under this license. |
6 | | |
7 | | The Clear BSD License |
8 | | |
9 | | Copyright (c) 2019-2026, Fraunhofer-Gesellschaft zur Förderung der angewandten Forschung e.V. & The VVenC Authors. |
10 | | All rights reserved. |
11 | | |
12 | | Redistribution and use in source and binary forms, with or without modification, |
13 | | are permitted (subject to the limitations in the disclaimer below) provided that |
14 | | the following conditions are met: |
15 | | |
16 | | * Redistributions of source code must retain the above copyright notice, |
17 | | this list of conditions and the following disclaimer. |
18 | | |
19 | | * Redistributions in binary form must reproduce the above copyright |
20 | | notice, this list of conditions and the following disclaimer in the |
21 | | documentation and/or other materials provided with the distribution. |
22 | | |
23 | | * Neither the name of the copyright holder nor the names of its |
24 | | contributors may be used to endorse or promote products derived from this |
25 | | software without specific prior written permission. |
26 | | |
27 | | NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE GRANTED BY |
28 | | THIS LICENSE. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND |
29 | | CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
30 | | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A |
31 | | PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR |
32 | | CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, |
33 | | EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, |
34 | | PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR |
35 | | BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER |
36 | | IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
37 | | ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE |
38 | | POSSIBILITY OF SUCH DAMAGE. |
39 | | |
40 | | |
41 | | ------------------------------------------------------------------------------------------- */ |
42 | | |
43 | | |
44 | | /** \file Buffer.cpp |
45 | | * \brief Low-overhead class describing 2D memory layout |
46 | | */ |
47 | | |
48 | | #define DONT_UNDEF_SIZE_AWARE_PER_EL_OP |
49 | | |
50 | | // unit needs to come first due to a forward declaration |
51 | | #include "Unit.h" |
52 | | #include "Slice.h" |
53 | | #include "InterpolationFilter.h" |
54 | | |
55 | | //! \ingroup CommonLib |
56 | | //! \{ |
57 | | |
58 | | namespace vvenc { |
59 | | |
60 | | void weightCiipCore( Pel* res, const Pel* src, const int numSamples, int numIntra ) |
61 | 0 | { |
62 | 0 | if( numIntra == 1 ) |
63 | 0 | { |
64 | 0 | for (int n = 0; n < numSamples; n+=2) |
65 | 0 | { |
66 | 0 | res[n ] = (res[n ] + src[n ] + 1) >> 1; |
67 | 0 | res[n+1] = (res[n+1] + src[n+1] + 1) >> 1; |
68 | 0 | } |
69 | 0 | } |
70 | 0 | else |
71 | 0 | { |
72 | 0 | const Pel* scale = numIntra ? src : res; |
73 | 0 | const Pel* unscale = numIntra ? res : src; |
74 | |
|
75 | 0 | for (int n = 0; n < numSamples; n+=2) |
76 | 0 | { |
77 | 0 | res[n ] = (unscale[n ] + 3*scale[n ] + 2) >> 2; |
78 | 0 | res[n+1] = (unscale[n+1] + 3*scale[n+1] + 2) >> 2; |
79 | 0 | } |
80 | 0 | } |
81 | 0 | } |
82 | | |
83 | | template< unsigned inputSize, unsigned outputSize > |
84 | | void mipMatrixMulCore( Pel* res, const Pel* input, const uint8_t* weight, const int maxVal, const int inputOffset, bool transpose ) |
85 | 0 | { |
86 | 0 | Pel buffer[ outputSize*outputSize]; |
87 | |
|
88 | 0 | int sum = 0; |
89 | 0 | for( int i = 0; i < inputSize; i++ ) |
90 | 0 | { |
91 | 0 | sum += input[i]; |
92 | 0 | } |
93 | 0 | const int offset = (1 << (MIP_SHIFT_MATRIX - 1)) - MIP_OFFSET_MATRIX * sum + (inputOffset << MIP_SHIFT_MATRIX); |
94 | 0 | CHECK( inputSize != 4 * (inputSize >> 2), "Error, input size not divisible by four" ); |
95 | |
|
96 | 0 | Pel* mat = transpose ? buffer : res; |
97 | 0 | unsigned posRes = 0; |
98 | 0 | for( unsigned n = 0; n < outputSize*outputSize; n++ ) |
99 | 0 | { |
100 | 0 | int tmp0 = input[0] * weight[0]; |
101 | 0 | int tmp1 = input[1] * weight[1]; |
102 | 0 | int tmp2 = input[2] * weight[2]; |
103 | 0 | int tmp3 = input[3] * weight[3]; |
104 | 0 | if( 8 == inputSize ) |
105 | 0 | { |
106 | 0 | tmp0 += input[4] * weight[4]; |
107 | 0 | tmp1 += input[5] * weight[5]; |
108 | 0 | tmp2 += input[6] * weight[6]; |
109 | 0 | tmp3 += input[7] * weight[7]; |
110 | 0 | } |
111 | 0 | mat[posRes++] = Clip3<int>( 0, maxVal, ((tmp0 + tmp1 + tmp2 + tmp3 + offset) >> MIP_SHIFT_MATRIX) ); |
112 | |
|
113 | 0 | weight += inputSize; |
114 | 0 | } |
115 | |
|
116 | 0 | if( transpose ) |
117 | 0 | { |
118 | 0 | for( int j = 0; j < outputSize; j++ ) |
119 | 0 | { |
120 | 0 | for( int i = 0; i < outputSize; i++ ) |
121 | 0 | { |
122 | 0 | res[j * outputSize + i] = buffer[i * outputSize + j]; |
123 | 0 | } |
124 | 0 | } |
125 | 0 | } |
126 | 0 | } Unexecuted instantiation: void vvenc::mipMatrixMulCore<4u, 4u>(short*, short const*, unsigned char const*, int, int, bool) Unexecuted instantiation: void vvenc::mipMatrixMulCore<8u, 4u>(short*, short const*, unsigned char const*, int, int, bool) Unexecuted instantiation: void vvenc::mipMatrixMulCore<8u, 8u>(short*, short const*, unsigned char const*, int, int, bool) |
127 | | |
128 | | template< typename T > |
129 | | void addAvgCore( const T* src1, int src1Stride, const T* src2, int src2Stride, T* dest, int dstStride, int width, int height, unsigned rshift, int offset, const ClpRng& clpRng ) |
130 | 0 | { |
131 | 0 | #define ADD_AVG_CORE_OP( ADDR ) dest[ADDR] = ClipPel( rightShiftU( ( src1[ADDR] + src2[ADDR] + offset ), rshift ), clpRng ) |
132 | 0 | #define ADD_AVG_CORE_INC \ |
133 | 0 | src1 += src1Stride; \ |
134 | 0 | src2 += src2Stride; \ |
135 | 0 | dest += dstStride; \ |
136 | 0 |
|
137 | 0 | SIZE_AWARE_PER_EL_OP( ADD_AVG_CORE_OP, ADD_AVG_CORE_INC ); |
138 | |
|
139 | 0 | #undef ADD_AVG_CORE_OP |
140 | 0 | #undef ADD_AVG_CORE_INC |
141 | 0 | } |
142 | | |
143 | | template<typename T> |
144 | | void addWeightedAvgCore( const T* src1, int src1Stride, const T* src2, int src2Stride, T* dest, int destStride, int width, int height, unsigned rshift, int offset, int w0, int w1, const ClpRng& clpRng ) |
145 | 0 | { |
146 | 0 | #define ADD_WGHT_AVG_OP( ADDR ) dest[ADDR] = ClipPel( rightShiftU( ( src1[ADDR]*w0 + src2[ADDR]*w1 + offset ), rshift ), clpRng ) |
147 | 0 | #define ADD_WGHT_AVG_INC \ |
148 | 0 | src1 += src1Stride; \ |
149 | 0 | src2 += src2Stride; \ |
150 | 0 | dest += destStride; \ |
151 | 0 |
|
152 | 0 | SIZE_AWARE_PER_EL_OP( ADD_WGHT_AVG_OP, ADD_WGHT_AVG_INC ); |
153 | |
|
154 | 0 | #undef ADD_WGHT_AVG_OP |
155 | 0 | #undef ADD_WGHT_AVG_INC |
156 | 0 | } |
157 | | |
158 | | template<typename T> |
159 | | void subsCore( const T* src0, int src0Stride, const T* src1, int src1Stride, T* dest, int destStride, int width, int height ) |
160 | 0 | { |
161 | 0 | #define SUBS_INC \ |
162 | 0 | dest += destStride; \ |
163 | 0 | src0 += src0Stride; \ |
164 | 0 | src1 += src1Stride; \ |
165 | 0 |
|
166 | 0 | #define SUBS_OP( ADDR ) dest[ADDR] = src0[ADDR] - src1[ADDR] |
167 | |
|
168 | 0 | SIZE_AWARE_PER_EL_OP( SUBS_OP, SUBS_INC ); |
169 | |
|
170 | 0 | #undef SUBS_OP |
171 | 0 | #undef SUBS_INC |
172 | 0 | } |
173 | | |
174 | | void removeHighFreq(int16_t* dst, int dstStride, const int16_t* src, int srcStride, int width, int height) |
175 | 0 | { |
176 | 0 | #define REM_HF_INC \ |
177 | 0 | src += srcStride; \ |
178 | 0 | dst += dstStride; \ |
179 | 0 |
|
180 | 0 | #define REM_HF_OP( ADDR ) dst[ADDR] = 2 * dst[ADDR] - src[ADDR] |
181 | |
|
182 | 0 | SIZE_AWARE_PER_EL_OP(REM_HF_OP, REM_HF_INC); |
183 | |
|
184 | 0 | #undef REM_HF_INC |
185 | 0 | #undef REM_HF_OP |
186 | 0 | #undef REM_HF_OP_CLIP |
187 | 0 | } |
188 | | |
189 | | template<typename T> |
190 | | void reconstructCore( const T* src1, int src1Stride, const T* src2, int src2Stride, T* dest, int dstStride, int width, int height, const ClpRng& clpRng ) |
191 | 0 | { |
192 | 0 | #define RECO_CORE_OP( ADDR ) dest[ADDR] = ClipPel( src1[ADDR] + src2[ADDR], clpRng ) |
193 | 0 | #define RECO_CORE_INC \ |
194 | 0 | src1 += src1Stride; \ |
195 | 0 | src2 += src2Stride; \ |
196 | 0 | dest += dstStride; \ |
197 | 0 |
|
198 | 0 | SIZE_AWARE_PER_EL_OP( RECO_CORE_OP, RECO_CORE_INC ); |
199 | |
|
200 | 0 | #undef RECO_CORE_OP |
201 | 0 | #undef RECO_CORE_INC |
202 | 0 | } |
203 | | |
204 | | template<typename T> |
205 | | void recoCore( const T* src1, const T* src2, T* dest, int numSamples, const ClpRng& clpRng ) |
206 | 0 | { |
207 | 0 | for( int n = 0; n < numSamples; n+=2) |
208 | 0 | { |
209 | 0 | dest[n] = ClipPel( src1[n] + src2[n], clpRng ); |
210 | 0 | dest[n+1] = ClipPel( src1[n+1] + src2[n+1], clpRng ); |
211 | 0 | } |
212 | 0 | } |
213 | | |
214 | | template<typename T> |
215 | | void copyClipCore( const T* src, Pel* dst, int numSamples, const ClpRng& clpRng ) |
216 | 0 | { |
217 | 0 | for( int n = 0; n < numSamples; n+=2) |
218 | 0 | { |
219 | 0 | dst[n] = ClipPel( src[n] , clpRng ); |
220 | 0 | dst[n+1] = ClipPel( src[n+1] , clpRng ); |
221 | 0 | } |
222 | 0 | } |
223 | | |
224 | | template< typename T > |
225 | | void addAvgCore( const T* src1, const T* src2, T* dest, int numSamples, unsigned rshift, int offset, const ClpRng& clpRng ) |
226 | 0 | { |
227 | 0 | for( int n = 0; n < numSamples; n+=2) |
228 | 0 | { |
229 | 0 | dest[n] = ClipPel( rightShiftU( ( src1[n] + src2[n] + offset ), rshift ), clpRng ); |
230 | 0 | dest[n+1] = ClipPel( rightShiftU( ( src1[n+1] + src2[n+1] + offset ), rshift ), clpRng ); |
231 | 0 | } |
232 | 0 | } |
233 | | |
234 | | template< typename T > |
235 | | void roundGeoCore( const T* src, T* dest, const int numSamples, unsigned rshift, int offset, const ClpRng &clpRng) |
236 | 0 | { |
237 | 0 | for( int i = 0; i < numSamples; i+=2) |
238 | 0 | { |
239 | 0 | dest[i] = ClipPel(rightShiftU(src[i ] + offset, rshift), clpRng); |
240 | 0 | dest[i+1] = ClipPel(rightShiftU(src[i+1] + offset, rshift), clpRng); |
241 | 0 | } |
242 | 0 | } |
243 | | |
244 | | template<typename T> |
245 | | void linTfCore( const T* src, int srcStride, Pel* dst, int dstStride, int width, int height, int scale, unsigned shift, int offset, const ClpRng& clpRng, bool bClip ) |
246 | 0 | { |
247 | 0 | #define LINTF_CORE_INC \ |
248 | 0 | src += srcStride; \ |
249 | 0 | dst += dstStride; \ |
250 | 0 |
|
251 | 0 | if( bClip ) |
252 | 0 | { |
253 | 0 | #define LINTF_CORE_OP( ADDR ) dst[ADDR] = ( Pel ) ClipPel( rightShiftU( scale * src[ADDR], shift ) + offset, clpRng ) |
254 | |
|
255 | 0 | SIZE_AWARE_PER_EL_OP( LINTF_CORE_OP, LINTF_CORE_INC ); |
256 | |
|
257 | 0 | #undef LINTF_CORE_OP |
258 | 0 | } |
259 | 0 | else |
260 | 0 | { |
261 | 0 | #define LINTF_CORE_OP( ADDR ) dst[ADDR] = ( Pel ) ( rightShiftU( scale * src[ADDR], shift ) + offset ) |
262 | |
|
263 | 0 | SIZE_AWARE_PER_EL_OP( LINTF_CORE_OP, LINTF_CORE_INC ); |
264 | |
|
265 | 0 | #undef LINTF_CORE_OP |
266 | 0 | } |
267 | 0 | #undef LINTF_CORE_INC |
268 | 0 | } |
269 | | |
270 | | template<typename T, int N> |
271 | | void transposeNxNCore( const Pel* src, int srcStride, Pel* dst, int dstStride ) |
272 | 0 | { |
273 | 0 | for( int i = 0; i < N; i++ ) |
274 | 0 | { |
275 | 0 | for( int j = 0; j < N; j++ ) |
276 | 0 | { |
277 | 0 | dst[j * dstStride] = src[j]; |
278 | 0 | } |
279 | |
|
280 | 0 | dst++; |
281 | 0 | src += srcStride; |
282 | 0 | } |
283 | 0 | } Unexecuted instantiation: void vvenc::transposeNxNCore<short, 4>(short const*, int, short*, int) Unexecuted instantiation: void vvenc::transposeNxNCore<short, 8>(short const*, int, short*, int) |
284 | | |
285 | | template<typename T> |
286 | | void copyClipCore( const T* src, int srcStride, Pel* dst, int dstStride, int width, int height, const ClpRng& clpRng ) |
287 | 0 | { |
288 | 0 | #define RECO_OP( ADDR ) dst[ADDR] = ClipPel( src[ADDR], clpRng ) |
289 | 0 | #define RECO_INC \ |
290 | 0 | src += srcStride; \ |
291 | 0 | dst += dstStride; \ |
292 | 0 |
|
293 | 0 | SIZE_AWARE_PER_EL_OP( RECO_OP, RECO_INC ); |
294 | |
|
295 | 0 | #undef RECO_OP |
296 | 0 | #undef RECO_INC |
297 | 0 | } |
298 | | |
299 | | void copyBufferCore( const char* src, int srcStride, char* dst, int dstStride, int numBytes, int height) |
300 | 0 | { |
301 | 0 | for( int i = 0; i < height; i++, src += srcStride, dst += dstStride ) |
302 | 0 | { |
303 | 0 | memcpy( dst, src, numBytes ); |
304 | 0 | } |
305 | 0 | } |
306 | | |
307 | | void applyLutCore( const Pel* src, const ptrdiff_t srcStride, Pel* dst, const ptrdiff_t dstStride, int width, int height, const Pel* lut ) |
308 | 0 | { |
309 | 0 | #define RSP_SGNL_OP( ADDR ) dst[ADDR] = lut[src[ADDR]] |
310 | 0 | #define RSP_SGNL_INC src += srcStride; dst += dstStride; |
311 | |
|
312 | 0 | SIZE_AWARE_PER_EL_OP( RSP_SGNL_OP, RSP_SGNL_INC ) |
313 | |
|
314 | 0 | #undef RSP_SGNL_OP |
315 | 0 | #undef RSP_SGNL_INC |
316 | 0 | } |
317 | | |
318 | | void fillMapPtr_Core( void** ptrMap, const ptrdiff_t mapStride, int width, int height, void* val ) |
319 | 0 | { |
320 | 0 | if( width == mapStride ) |
321 | 0 | { |
322 | 0 | std::fill_n( ptrMap, width * height, val ); |
323 | 0 | } |
324 | 0 | else |
325 | 0 | { |
326 | 0 | while( height-- ) |
327 | 0 | { |
328 | 0 | std::fill_n( ptrMap, width, val ); |
329 | 0 | ptrMap += mapStride; |
330 | 0 | } |
331 | 0 | } |
332 | 0 | } |
333 | | |
334 | | uint64_t AvgHighPassCore( const int width, const int height, const Pel* pSrc, const int iSrcStride) |
335 | 0 | { |
336 | 0 | uint64_t saAct = 0; |
337 | 0 | for (int y = 1; y < height - 1; y++) |
338 | 0 | { |
339 | 0 | for (int x = 1; x < width - 1; x++) // center cols |
340 | 0 | { |
341 | 0 | const int s = 12 * (int) pSrc[x ] - 2 * ((int) pSrc[x-1] + (int) pSrc[x+1] + (int) pSrc[x -iSrcStride] + (int) pSrc[x +iSrcStride]) |
342 | 0 | - ((int) pSrc[x-1-iSrcStride] + (int) pSrc[x+1-iSrcStride] + (int) pSrc[x-1+iSrcStride] + (int) pSrc[x+1+iSrcStride]); |
343 | 0 | saAct += abs (s); |
344 | 0 | } |
345 | 0 | pSrc += iSrcStride; |
346 | 0 | } |
347 | 0 | return saAct; |
348 | 0 | } |
349 | | |
350 | | uint64_t HDHighPassCore (const int width, const int height,const Pel* pSrc,const Pel* pSM1,const int iSrcStride,const int iSM1Stride) |
351 | 0 | { |
352 | 0 | uint64_t taAct = 0; |
353 | 0 | for (int y = 1; y < height - 1; y++) |
354 | 0 | { |
355 | 0 | for (int x = 1; x < width - 1; x++) // cnt cols |
356 | 0 | { |
357 | 0 | const int t = (int) pSrc[x] - (int) pSM1[x]; |
358 | 0 | taAct += (1 + 3 * abs (t)) >> 1; |
359 | 0 | } |
360 | 0 | pSrc += iSrcStride; |
361 | 0 | pSM1 += iSM1Stride; |
362 | 0 | } |
363 | 0 | return taAct; |
364 | 0 | } |
365 | | |
366 | | uint64_t HDHighPass2Core (const int width, const int height,const Pel* pSrc,const Pel* pSM1,const Pel* pSM2,const int iSrcStride,const int iSM1Stride,const int iSM2Stride) |
367 | 0 | { |
368 | 0 | uint64_t taAct = 0; |
369 | 0 | for (int y = 1; y < height - 1; y++) |
370 | 0 | { |
371 | 0 | for (int x = 1; x < width - 1; x++) // cnt cols |
372 | 0 | { |
373 | 0 | const int t = (int) pSrc[x] - 2 * (int) pSM1[x] + (int) pSM2[x]; |
374 | 0 | taAct += abs (t); |
375 | 0 | } |
376 | 0 | pSrc += iSrcStride; |
377 | 0 | pSM1 += iSM1Stride; |
378 | 0 | pSM2 += iSM2Stride; |
379 | 0 | } |
380 | 0 | return taAct; |
381 | 0 | } |
382 | | uint64_t AvgHighPassWithDownsamplingCore( const int width, const int height, const Pel* pSrc, const int iSrcStride) |
383 | 0 | { |
384 | 0 | uint64_t saAct = 0; |
385 | 0 | pSrc -= iSrcStride; |
386 | 0 | pSrc -= iSrcStride; |
387 | 0 | for (int y = 2; y < height - 2; y += 2) |
388 | 0 | { |
389 | 0 | for (int x = 2; x < width - 2; x += 2) |
390 | 0 | { |
391 | 0 | const int f = 12 * ((int)pSrc[ y *iSrcStride + x ] + (int)pSrc[ y *iSrcStride + x+1] + (int)pSrc[(y+1)*iSrcStride + x ] + (int)pSrc[(y+1)*iSrcStride + x+1]) |
392 | 0 | - 3 * ((int)pSrc[(y-1)*iSrcStride + x ] + (int)pSrc[(y-1)*iSrcStride + x+1] + (int)pSrc[(y+2)*iSrcStride + x ] + (int)pSrc[(y+2)*iSrcStride + x+1]) |
393 | 0 | - 3 * ((int)pSrc[ y *iSrcStride + x-1] + (int)pSrc[ y *iSrcStride + x+2] + (int)pSrc[(y+1)*iSrcStride + x-1] + (int)pSrc[(y+1)*iSrcStride + x+2]) |
394 | 0 | - 2 * ((int)pSrc[(y-1)*iSrcStride + x-1] + (int)pSrc[(y-1)*iSrcStride + x+2] + (int)pSrc[(y+2)*iSrcStride + x-1] + (int)pSrc[(y+2)*iSrcStride + x+2]) |
395 | 0 | - ((int)pSrc[(y-2)*iSrcStride + x-1] + (int)pSrc[(y-2)*iSrcStride + x ] + (int)pSrc[(y-2)*iSrcStride + x+1] + (int)pSrc[(y-2)*iSrcStride + x+2] |
396 | 0 | + (int)pSrc[(y+3)*iSrcStride + x-1] + (int)pSrc[(y+3)*iSrcStride + x ] + (int)pSrc[(y+3)*iSrcStride + x+1] + (int)pSrc[(y+3)*iSrcStride + x+2] |
397 | 0 | + (int)pSrc[(y-1)*iSrcStride + x-2] + (int)pSrc[ y *iSrcStride + x-2] + (int)pSrc[(y+1)*iSrcStride + x-2] + (int)pSrc[(y+2)*iSrcStride + x-2] |
398 | 0 | + (int)pSrc[(y-1)*iSrcStride + x+3] + (int)pSrc[ y *iSrcStride + x+3] + (int)pSrc[(y+1)*iSrcStride + x+3] + (int)pSrc[(y+2)*iSrcStride + x+3]); |
399 | 0 | saAct += (uint64_t) abs(f); |
400 | 0 | } |
401 | 0 | } |
402 | 0 | return saAct; |
403 | 0 | } |
404 | | uint64_t AvgHighPassWithDownsamplingDiff1stCore (const int width, const int height, const Pel* pSrc,const Pel* pSrcM1, const int iSrcStride, const int iSrcM1Stride) |
405 | 0 | { |
406 | 0 | uint64_t taAct = 0; |
407 | 0 | pSrc -= iSrcStride; |
408 | 0 | pSrc -= iSrcStride; |
409 | 0 | pSrcM1-=iSrcM1Stride; |
410 | 0 | pSrcM1-=iSrcM1Stride; |
411 | |
|
412 | 0 | for (uint32_t y = 2; y < height-2; y += 2) |
413 | 0 | { |
414 | 0 | for (uint32_t x = 2; x < width-2; x += 2) |
415 | 0 | { |
416 | 0 | const int t = (int)pSrc [y*iSrcStride + x] + (int)pSrc [y*iSrcStride + x+1] + (int)pSrc [(y+1)*iSrcStride + x] + (int)pSrc [(y+1)*iSrcStride + x+1] |
417 | 0 | - ((int)pSrcM1[y*iSrcM1Stride + x] + (int)pSrcM1[y*iSrcM1Stride + x+1] + (int)pSrcM1[(y+1)*iSrcM1Stride + x] + (int)pSrcM1[(y+1)*iSrcM1Stride + x+1]); |
418 | 0 | taAct += (1 + 3 * abs (t)) >> 1; |
419 | 0 | } |
420 | 0 | } |
421 | 0 | return (taAct ); |
422 | 0 | } |
423 | | |
424 | | uint64_t AvgHighPassWithDownsamplingDiff2ndCore (const int width,const int height,const Pel* pSrc,const Pel* pSrcM1,const Pel* pSrcM2,const int iSrcStride,const int iSM1Stride,const int iSM2Stride) |
425 | 0 | { |
426 | 0 | uint64_t taAct = 0; |
427 | |
|
428 | 0 | pSrc -= iSrcStride; |
429 | 0 | pSrc -= iSrcStride; |
430 | 0 | pSrcM1-=iSM1Stride; |
431 | 0 | pSrcM1-=iSM1Stride; |
432 | 0 | pSrcM2-=iSM2Stride; |
433 | 0 | pSrcM2-=iSM2Stride; |
434 | |
|
435 | 0 | for (uint32_t y = 2; y < height-2; y += 2) |
436 | 0 | { |
437 | 0 | for (uint32_t x = 2; x < width-2; x += 2) |
438 | 0 | { |
439 | 0 | const int t = (int)pSrc [y*iSrcStride + x] + (int)pSrc [y*iSrcStride + x+1] + (int)pSrc [(y+1)*iSrcStride + x] + (int)pSrc [(y+1)*iSrcStride + x+1] |
440 | 0 | - 2 * ((int)pSrcM1[y*iSM1Stride + x] + (int)pSrcM1[y*iSM1Stride + x+1] + (int)pSrcM1[(y+1)*iSM1Stride + x] + (int)pSrcM1[(y+1)*iSM1Stride + x+1]) |
441 | 0 | + (int)pSrcM2[y*iSM2Stride + x] + (int)pSrcM2[y*iSM2Stride + x+1] + (int)pSrcM2[(y+1)*iSM2Stride + x] + (int)pSrcM2[(y+1)*iSM2Stride + x+1]; |
442 | 0 | taAct += (uint64_t) abs(t); |
443 | 0 | } |
444 | 0 | } |
445 | 0 | return (taAct); |
446 | 0 | } |
447 | | |
448 | | PelBufferOps::PelBufferOps() |
449 | 256 | { |
450 | 256 | isInitX86Done = false; |
451 | | |
452 | 256 | addAvg = addAvgCore<Pel>; |
453 | 256 | reco = recoCore<Pel>; |
454 | 256 | copyClip = copyClipCore<Pel>; |
455 | 256 | roundGeo = roundGeoCore<Pel>; |
456 | | |
457 | 256 | addAvg4 = addAvgCore<Pel>; |
458 | 256 | addAvg8 = addAvgCore<Pel>; |
459 | 256 | addAvg16 = addAvgCore<Pel>; |
460 | | |
461 | 256 | sub4 = subsCore<Pel>; |
462 | 256 | sub8 = subsCore<Pel>; |
463 | | |
464 | 256 | wghtAvg4 = addWeightedAvgCore<Pel>; |
465 | 256 | wghtAvg8 = addWeightedAvgCore<Pel>; |
466 | | |
467 | 256 | copyClip4 = copyClipCore<Pel>; |
468 | 256 | copyClip8 = copyClipCore<Pel>; |
469 | | |
470 | 256 | reco4 = reconstructCore<Pel>; |
471 | 256 | reco8 = reconstructCore<Pel>; |
472 | | |
473 | 256 | linTf4 = linTfCore<Pel>; |
474 | 256 | linTf8 = linTfCore<Pel>; |
475 | | |
476 | 256 | copyBuffer = copyBufferCore; |
477 | | |
478 | 256 | removeHighFreq8 = removeHighFreq; |
479 | 256 | removeHighFreq4 = removeHighFreq; |
480 | | |
481 | 256 | transpose4x4 = transposeNxNCore<Pel,4>; |
482 | 256 | transpose8x8 = transposeNxNCore<Pel,8>; |
483 | 256 | mipMatrixMul_4_4 = mipMatrixMulCore<4,4>; |
484 | 256 | mipMatrixMul_8_4 = mipMatrixMulCore<8,4>; |
485 | 256 | mipMatrixMul_8_8 = mipMatrixMulCore<8,8>; |
486 | 256 | weightCiip = weightCiipCore; |
487 | 256 | roundIntVector = nullptr; |
488 | | |
489 | 256 | applyLut = applyLutCore; |
490 | | |
491 | 256 | fillPtrMap = fillMapPtr_Core; |
492 | 256 | AvgHighPassWithDownsampling = AvgHighPassWithDownsamplingCore; |
493 | 256 | AvgHighPass = AvgHighPassCore; |
494 | 256 | AvgHighPassWithDownsamplingDiff1st = AvgHighPassWithDownsamplingDiff1stCore; |
495 | 256 | AvgHighPassWithDownsamplingDiff2nd = AvgHighPassWithDownsamplingDiff2ndCore; |
496 | 256 | HDHighPass = HDHighPassCore; |
497 | 256 | HDHighPass2 = HDHighPass2Core; |
498 | 256 | } |
499 | | |
500 | | PelBufferOps g_pelBufOP = PelBufferOps(); |
501 | | |
502 | | template<> |
503 | | void AreaBuf<Pel>::addWeightedAvg(const AreaBuf<const Pel>& other1, const AreaBuf<const Pel>& other2, const ClpRng& clpRng, const int8_t BcwIdx) |
504 | 0 | { |
505 | 0 | const int8_t w0 = getBcwWeight( BcwIdx, REF_PIC_LIST_0 ); |
506 | 0 | const int8_t w1 = getBcwWeight( BcwIdx, REF_PIC_LIST_1 ); |
507 | 0 | const int8_t log2WeightBase = g_BcwLog2WeightBase; |
508 | 0 | const Pel* src0 = other1.buf; |
509 | 0 | const Pel* src2 = other2.buf; |
510 | 0 | Pel* dest = buf; |
511 | |
|
512 | 0 | const int src1Stride = other1.stride; |
513 | 0 | const int src2Stride = other2.stride; |
514 | 0 | const int destStride = stride; |
515 | 0 | const int clipbd = clpRng.bd; |
516 | 0 | const int shiftNum = std::max<int>( 2, ( IF_INTERNAL_PREC - clipbd ) ) + log2WeightBase; |
517 | 0 | const int offset = ( 1 << ( shiftNum - 1 ) ) + ( IF_INTERNAL_OFFS << log2WeightBase ); |
518 | |
|
519 | 0 | if( ( width & 7 ) == 0 ) |
520 | 0 | { |
521 | 0 | g_pelBufOP.wghtAvg8( src0, src1Stride, src2, src2Stride, dest, destStride, width, height, shiftNum, offset, w0, w1, clpRng ); |
522 | 0 | } |
523 | 0 | else if( ( width & 3 ) == 0 ) |
524 | 0 | { |
525 | 0 | g_pelBufOP.wghtAvg4( src0, src1Stride, src2, src2Stride, dest, destStride, width, height, shiftNum, offset, w0, w1, clpRng ); |
526 | 0 | } |
527 | 0 | else |
528 | 0 | { |
529 | 0 | #define WGHT_AVG_OP( ADDR ) dest[ADDR] = ClipPel( rightShiftU( ( src0[ADDR]*w0 + src2[ADDR]*w1 + offset ), shiftNum ), clpRng ) |
530 | 0 | #define WGHT_AVG_INC \ |
531 | 0 | src0 += src1Stride; \ |
532 | 0 | src2 += src2Stride; \ |
533 | 0 | dest += destStride; \ |
534 | 0 |
|
535 | 0 | SIZE_AWARE_PER_EL_OP( WGHT_AVG_OP, WGHT_AVG_INC ); |
536 | |
|
537 | 0 | #undef WGHT_AVG_OP |
538 | 0 | #undef WGHT_AVG_INC |
539 | 0 | } |
540 | 0 | } |
541 | | |
542 | | template<> |
543 | | void AreaBuf<Pel>::rspSignal( const Pel* pLUT) |
544 | 0 | { |
545 | 0 | g_pelBufOP.applyLut( buf, stride, buf, stride, width, height, pLUT ); |
546 | 0 | } |
547 | | |
548 | | |
549 | | template<> |
550 | | void AreaBuf<Pel>::rspSignal( const AreaBuf<const Pel>& other, const Pel* pLUT) |
551 | 0 | { |
552 | 0 | g_pelBufOP.applyLut( other.buf, other.stride, buf, stride, width, height, pLUT ); |
553 | 0 | } |
554 | | |
555 | | template<> |
556 | | void AreaBuf<Pel>::scaleSignal(const int scale, const bool dir, const ClpRng& clpRng) |
557 | 0 | { |
558 | 0 | Pel* dst = buf; |
559 | 0 | const Pel* src = buf; |
560 | 0 | const int maxAbsclipBD = (1<<clpRng.bd) - 1; |
561 | |
|
562 | 0 | if (dir) // forward |
563 | 0 | { |
564 | 0 | if (width == 1) |
565 | 0 | { |
566 | 0 | THROW("Blocks of width = 1 not supported"); |
567 | 0 | } |
568 | 0 | else |
569 | 0 | { |
570 | 0 | for (unsigned y = 0; y < height; y++) |
571 | 0 | { |
572 | 0 | for (unsigned x = 0; x < width; x++) |
573 | 0 | { |
574 | 0 | int sign = src[x] >= 0 ? 1 : -1; |
575 | 0 | int absval = sign * src[x]; |
576 | 0 | dst[x] = (Pel)Clip3(-maxAbsclipBD, maxAbsclipBD, sign * (((absval << CSCALE_FP_PREC) + (scale >> 1)) / scale)); |
577 | 0 | } |
578 | 0 | dst += stride; |
579 | 0 | src += stride; |
580 | 0 | } |
581 | 0 | } |
582 | 0 | } |
583 | 0 | else // inverse |
584 | 0 | { |
585 | 0 | for (unsigned y = 0; y < height; y++) |
586 | 0 | { |
587 | 0 | for (unsigned x = 0; x < width; x++) |
588 | 0 | { |
589 | 0 | int val = Clip3<int>((-maxAbsclipBD - 1), maxAbsclipBD, (int)src[x]); |
590 | 0 | int sign = src[x] >= 0 ? 1 : -1; |
591 | 0 | int absval = sign * val; |
592 | 0 | val = sign * ((absval * scale + (1 << (CSCALE_FP_PREC - 1))) >> CSCALE_FP_PREC); |
593 | 0 | if (sizeof(Pel) == 2) // avoid overflow when storing data |
594 | 0 | { |
595 | 0 | val = Clip3<int>(-32768, 32767, val); |
596 | 0 | } |
597 | 0 | dst[x] = (Pel)val; |
598 | 0 | } |
599 | 0 | dst += stride; |
600 | 0 | src += stride; |
601 | 0 | } |
602 | 0 | } |
603 | 0 | } |
604 | | |
605 | | template<> |
606 | | void AreaBuf<Pel>::addAvg( const AreaBuf<const Pel>& other1, const AreaBuf<const Pel>& other2, const ClpRng& clpRng) |
607 | 0 | { |
608 | 0 | const Pel* src0 = other1.buf; |
609 | 0 | const Pel* src2 = other2.buf; |
610 | 0 | Pel* dest = buf; |
611 | |
|
612 | 0 | const unsigned src1Stride = other1.stride; |
613 | 0 | const unsigned src2Stride = other2.stride; |
614 | 0 | const unsigned destStride = stride; |
615 | 0 | const int clipbd = clpRng.bd; |
616 | 0 | const unsigned shiftNum = std::max<int>(2, (IF_INTERNAL_PREC - clipbd)) + 1; |
617 | 0 | const int offset = (1 << (shiftNum - 1)) + 2 * IF_INTERNAL_OFFS; |
618 | |
|
619 | 0 | #if ENABLE_SIMD_OPT_BUFFER |
620 | 0 | if( destStride == width ) |
621 | 0 | { |
622 | 0 | g_pelBufOP.addAvg(src0, src2, dest, width * height, shiftNum, offset, clpRng); |
623 | 0 | } |
624 | 0 | else if ((width & 15) == 0) |
625 | 0 | { |
626 | 0 | g_pelBufOP.addAvg16(src0, src1Stride, src2, src2Stride, dest, destStride, width, height, shiftNum, offset, clpRng); |
627 | 0 | } |
628 | 0 | else if( ( width & 7 ) == 0 ) |
629 | 0 | { |
630 | 0 | g_pelBufOP.addAvg8( src0, src1Stride, src2, src2Stride, dest, destStride, width, height, shiftNum, offset, clpRng ); |
631 | 0 | } |
632 | 0 | else if( ( width & 3 ) == 0 ) |
633 | 0 | { |
634 | 0 | g_pelBufOP.addAvg4( src0, src1Stride, src2, src2Stride, dest, destStride, width, height, shiftNum, offset, clpRng ); |
635 | 0 | } |
636 | 0 | else |
637 | 0 | #endif |
638 | 0 | { |
639 | 0 | #define ADD_AVG_OP( ADDR ) dest[ADDR] = ClipPel( rightShiftU( ( src0[ADDR] + src2[ADDR] + offset ), shiftNum ), clpRng ) |
640 | 0 | #define ADD_AVG_INC \ |
641 | 0 | src0 += src1Stride; \ |
642 | 0 | src2 += src2Stride; \ |
643 | 0 | dest += destStride; \ |
644 | 0 |
|
645 | 0 | SIZE_AWARE_PER_EL_OP( ADD_AVG_OP, ADD_AVG_INC ); |
646 | |
|
647 | 0 | #undef ADD_AVG_OP |
648 | 0 | #undef ADD_AVG_INC |
649 | 0 | } |
650 | 0 | } |
651 | | |
652 | | template<> |
653 | | void AreaBuf<Pel>::subtract( const AreaBuf<const Pel>& minuend, const AreaBuf<const Pel>& subtrahend ) |
654 | 0 | { |
655 | 0 | CHECKD( width != minuend.width, "Incompatible size" ); |
656 | 0 | CHECKD( height != minuend.height, "Incompatible size" ); |
657 | 0 | CHECKD( width != subtrahend.width, "Incompatible size"); |
658 | 0 | CHECKD( height != subtrahend.height, "Incompatible size"); |
659 | | |
660 | 0 | Pel* dest = buf; |
661 | 0 | const Pel* mins = minuend .buf; |
662 | 0 | const Pel* subs = subtrahend.buf; |
663 | | |
664 | |
|
665 | 0 | #if ENABLE_SIMD_OPT_BUFFER |
666 | 0 | const unsigned destStride = stride; |
667 | 0 | const unsigned minsStride = minuend. stride; |
668 | 0 | const unsigned subsStride = subtrahend.stride; |
669 | |
|
670 | 0 | if( ( width & 7 ) == 0 ) |
671 | 0 | { |
672 | 0 | g_pelBufOP.sub8( mins, minsStride, subs, subsStride, dest, destStride, width, height ); |
673 | 0 | } |
674 | 0 | else if( ( width & 3 ) == 0 ) |
675 | 0 | { |
676 | 0 | g_pelBufOP.sub4( mins, minsStride, subs, subsStride, dest, destStride, width, height ); |
677 | 0 | } |
678 | 0 | else |
679 | 0 | #endif |
680 | 0 | { |
681 | 0 | #define SUBS_INC \ |
682 | 0 | dest += stride; \ |
683 | 0 | mins += minuend .stride; \ |
684 | 0 | subs += subtrahend.stride; \ |
685 | 0 |
|
686 | 0 | #define SUBS_OP( ADDR ) dest[ADDR] = mins[ADDR] - subs[ADDR] |
687 | |
|
688 | 0 | SIZE_AWARE_PER_EL_OP( SUBS_OP, SUBS_INC ); |
689 | |
|
690 | 0 | #undef SUBS_OP |
691 | 0 | #undef SUBS_INC |
692 | 0 | } |
693 | 0 | } |
694 | | |
695 | | template<> |
696 | | void AreaBuf<const Pel>::calcVarianceSplit( const AreaBuf<const Pel>& Org, const uint32_t size, int& varh,int& varv) const |
697 | 0 | { |
698 | 0 | CHECK( Org.width != Org.height, "Incompatible size!" ); |
699 | 0 | int stride = Org.stride; |
700 | 0 | const Pel* src; |
701 | 0 | Pel data; |
702 | 0 | double variance=0; |
703 | 0 | double mean=0; |
704 | 0 | int64_t sum[4]={0,0,0,0}; |
705 | 0 | int64_t sum_sqr[4]={0,0,0,0}; |
706 | 0 | uint32_t halfsize =size>>1; |
707 | 0 | uint32_t off[4]={0,halfsize,size*halfsize,size*halfsize+halfsize}; |
708 | 0 | int n,x,y; |
709 | |
|
710 | 0 | for( n = 0; n < 4; n++) |
711 | 0 | { |
712 | 0 | src = Org.buf+off[n]; |
713 | 0 | for( y = 0; y < halfsize; y++) |
714 | 0 | { |
715 | 0 | for(x = 0; x < halfsize; x++) |
716 | 0 | { |
717 | 0 | data=src[y*stride+x]; |
718 | 0 | sum[n]+=data; |
719 | 0 | sum_sqr[n]+= data*data; |
720 | 0 | } |
721 | 0 | } |
722 | 0 | } |
723 | 0 | int num=size*(size>>1); |
724 | | // varhu |
725 | 0 | mean=(double)(sum[0]+sum[1])/(num); |
726 | 0 | variance = (double)(sum_sqr[0]+sum_sqr[1])/(num) - (mean*mean); |
727 | 0 | varh =(int)(variance+0.5); |
728 | | // varhl |
729 | 0 | mean=(double)(sum[2]+sum[3])/(num); |
730 | 0 | variance = (double)(sum_sqr[2]+sum_sqr[3])/(num) - (mean*mean); |
731 | 0 | varh +=(int)(variance+0.5); |
732 | | // varvl |
733 | 0 | mean=(double)(sum[0]+sum[2])/(num); |
734 | 0 | variance = (double)(sum_sqr[0]+sum_sqr[2])/(num) - (mean*mean); |
735 | 0 | varv =(int)(variance+0.5); |
736 | | // varvr |
737 | 0 | mean=(double)(sum[1]+sum[3])/(num); |
738 | 0 | variance = (double)(sum_sqr[1]+sum_sqr[3])/(num) - (mean*mean); |
739 | 0 | varv +=(int)(variance+0.5); |
740 | 0 | } |
741 | | |
742 | | template<> |
743 | | void AreaBuf<Pel>::copyClip( const AreaBuf<const Pel>& src, const ClpRng& clpRng ) |
744 | 0 | { |
745 | 0 | const Pel* srcp = src.buf; |
746 | 0 | Pel* dest = buf; |
747 | |
|
748 | 0 | const unsigned srcStride = src.stride; |
749 | 0 | const unsigned destStride = stride; |
750 | |
|
751 | 0 | if( destStride == width) |
752 | 0 | { |
753 | 0 | g_pelBufOP.copyClip(srcp, dest, width * height, clpRng); |
754 | 0 | } |
755 | 0 | else if ((width & 7) == 0) |
756 | 0 | { |
757 | 0 | g_pelBufOP.copyClip8(srcp, srcStride, dest, destStride, width, height, clpRng); |
758 | 0 | } |
759 | 0 | else if ((width & 3) == 0) |
760 | 0 | { |
761 | 0 | g_pelBufOP.copyClip4(srcp, srcStride, dest, destStride, width, height, clpRng); |
762 | 0 | } |
763 | 0 | else |
764 | 0 | { |
765 | 0 | for( int y = 0; y < height; y++ ) |
766 | 0 | { |
767 | 0 | dest[0] = ClipPel( srcp[0], clpRng); |
768 | 0 | dest[1] = ClipPel( srcp[1], clpRng); |
769 | 0 | srcp += srcStride; |
770 | 0 | dest += destStride; |
771 | 0 | } \ |
772 | 0 | } |
773 | 0 | } |
774 | | |
775 | | template<> |
776 | | void AreaBuf<Pel>::reconstruct( const AreaBuf<const Pel>& pred, const AreaBuf<const Pel>& resi, const ClpRng& clpRng ) |
777 | 0 | { |
778 | 0 | const Pel* src1 = pred.buf; |
779 | 0 | const Pel* src2 = resi.buf; |
780 | 0 | Pel* dest = buf; |
781 | |
|
782 | 0 | const unsigned src1Stride = pred.stride; |
783 | 0 | const unsigned src2Stride = resi.stride; |
784 | 0 | const unsigned destStride = stride; |
785 | 0 | if( src2Stride == width ) |
786 | 0 | { |
787 | 0 | g_pelBufOP.reco( pred.buf, resi.buf, buf, width * height, clpRng ); |
788 | 0 | } |
789 | 0 | else if( ( width & 7 ) == 0 ) |
790 | 0 | { |
791 | 0 | g_pelBufOP.reco8( src1, src1Stride, src2, src2Stride, dest, destStride, width, height, clpRng ); |
792 | 0 | } |
793 | 0 | else if( ( width & 3 ) == 0 ) |
794 | 0 | { |
795 | 0 | g_pelBufOP.reco4( src1, src1Stride, src2, src2Stride, dest, destStride, width, height, clpRng ); |
796 | 0 | } |
797 | 0 | else if( ( width & 1 ) == 0 ) |
798 | 0 | { |
799 | 0 | for( int y = 0; y < height; y++ ) |
800 | 0 | { |
801 | 0 | dest[0] = ClipPel( src1[0] + src2[0], clpRng); |
802 | 0 | dest[1] = ClipPel( src1[1] + src2[1], clpRng); |
803 | 0 | src1 += src1Stride; |
804 | 0 | src2 += src2Stride; |
805 | 0 | dest += destStride; |
806 | 0 | } |
807 | 0 | } |
808 | 0 | else |
809 | 0 | { |
810 | 0 | CHECKD( width != 1, "Expecting width to be '1'!" ); |
811 | |
|
812 | 0 | for( int y = 0; y < height; y++ ) |
813 | 0 | { |
814 | 0 | dest[0] = ClipPel( src1[0] + src2[0], clpRng ); |
815 | |
|
816 | 0 | src1 += src1Stride; |
817 | 0 | src2 += src2Stride; |
818 | 0 | dest += destStride; |
819 | 0 | } |
820 | 0 | } |
821 | 0 | } |
822 | | |
823 | | template<> |
824 | | void AreaBuf<Pel>::linearTransform( const int scale, const unsigned shift, const int offset, bool bClip, const ClpRng& clpRng ) |
825 | 0 | { |
826 | 0 | const Pel* src = buf; |
827 | 0 | Pel* dst = buf; |
828 | |
|
829 | 0 | if( stride == width) |
830 | 0 | { |
831 | 0 | if( width > 2 && height > 2 ) |
832 | 0 | { |
833 | 0 | g_pelBufOP.linTf8( src, stride<<2, dst, stride<<2, width<<2, height>>2, scale, shift, offset, clpRng, bClip ); |
834 | 0 | } |
835 | 0 | else |
836 | 0 | { |
837 | 0 | g_pelBufOP.linTf4( src, stride<<1, dst, stride<<1, width<<1, height>>1, scale, shift, offset, clpRng, bClip ); |
838 | 0 | } |
839 | 0 | } |
840 | 0 | else if( ( width & 7 ) == 0 ) |
841 | 0 | { |
842 | 0 | g_pelBufOP.linTf8( src, stride, dst, stride, width, height, scale, shift, offset, clpRng, bClip ); |
843 | 0 | } |
844 | 0 | else if( ( width & 3 ) == 0 ) |
845 | 0 | { |
846 | 0 | g_pelBufOP.linTf4( src, stride, dst, stride, width, height, scale, shift, offset, clpRng, bClip ); |
847 | 0 | } |
848 | 0 | else |
849 | 0 | { |
850 | 0 | if( bClip ) |
851 | 0 | { |
852 | 0 | for( int y = 0; y < height; y++ ) |
853 | 0 | { |
854 | 0 | dst[0] = ( Pel ) ClipPel( rightShiftU( scale * src[0], shift ) + offset, clpRng ); |
855 | 0 | dst[1] = ( Pel ) ClipPel( rightShiftU( scale * src[1], shift ) + offset, clpRng ); |
856 | 0 | src += stride; |
857 | 0 | dst += stride; |
858 | 0 | } |
859 | 0 | } |
860 | 0 | else |
861 | 0 | { |
862 | 0 | for( int y = 0; y < height; y++ ) |
863 | 0 | { |
864 | 0 | dst[0] = ( Pel ) ( rightShiftU( scale * src[0], shift ) + offset ); |
865 | 0 | dst[1] = ( Pel ) ( rightShiftU( scale * src[1], shift ) + offset ); |
866 | 0 | src += stride; |
867 | 0 | dst += stride; |
868 | 0 | } |
869 | 0 | } |
870 | 0 | } |
871 | 0 | } |
872 | | |
873 | | #if ENABLE_SIMD_OPT_BUFFER |
874 | | |
875 | | template<> |
876 | | void AreaBuf<Pel>::transposedFrom( const AreaBuf<const Pel>& other ) |
877 | 0 | { |
878 | 0 | CHECK( width != other.height || height != other.width, "Incompatible size" ); |
879 | |
|
880 | 0 | if( ( ( width | height ) & 7 ) == 0 ) |
881 | 0 | { |
882 | 0 | const Pel* src = other.buf; |
883 | |
|
884 | 0 | for( unsigned y = 0; y < other.height; y += 8 ) |
885 | 0 | { |
886 | 0 | Pel* dst = buf + y; |
887 | |
|
888 | 0 | for( unsigned x = 0; x < other.width; x += 8 ) |
889 | 0 | { |
890 | 0 | g_pelBufOP.transpose8x8( &src[x], other.stride, dst, stride ); |
891 | |
|
892 | 0 | dst += 8 * stride; |
893 | 0 | } |
894 | |
|
895 | 0 | src += 8 * other.stride; |
896 | 0 | } |
897 | 0 | } |
898 | 0 | else if( ( ( width | height ) & 3 ) == 0 ) |
899 | 0 | { |
900 | 0 | const Pel* src = other.buf; |
901 | |
|
902 | 0 | for( unsigned y = 0; y < other.height; y += 4 ) |
903 | 0 | { |
904 | 0 | Pel* dst = buf + y; |
905 | |
|
906 | 0 | for( unsigned x = 0; x < other.width; x += 4 ) |
907 | 0 | { |
908 | 0 | g_pelBufOP.transpose4x4( &src[x], other.stride, dst, stride ); |
909 | |
|
910 | 0 | dst += 4 * stride; |
911 | 0 | } |
912 | |
|
913 | 0 | src += 4 * other.stride; |
914 | 0 | } |
915 | 0 | } |
916 | 0 | else |
917 | 0 | { |
918 | 0 | Pel* dst = buf; |
919 | 0 | const Pel* src = other.buf; |
920 | 0 | width = other.height; |
921 | 0 | height = other.width; |
922 | 0 | stride = stride < width ? width : stride; |
923 | |
|
924 | 0 | for( unsigned y = 0; y < other.height; y++ ) |
925 | 0 | { |
926 | 0 | for( unsigned x = 0; x < other.width; x++ ) |
927 | 0 | { |
928 | 0 | dst[y + x*stride] = src[x + y * other.stride]; |
929 | 0 | } |
930 | 0 | } |
931 | 0 | } |
932 | 0 | } |
933 | | #endif |
934 | | |
935 | | template<> |
936 | | void AreaBuf<Pel>::weightCiip( const AreaBuf<const Pel>& intra, const int numIntra ) |
937 | 0 | { |
938 | 0 | CHECK(width == 2, "Width of 2 is not supported"); |
939 | 0 | g_pelBufOP.weightCiip( buf, intra.buf, width * height, numIntra ); |
940 | 0 | } |
941 | | |
942 | | template<> |
943 | | void AreaBuf<MotionInfo>::fill( const MotionInfo& val ) |
944 | 0 | { |
945 | 0 | if( width == stride ) |
946 | 0 | { |
947 | 0 | std::fill_n( buf, width * height, val ); |
948 | 0 | } |
949 | 0 | else |
950 | 0 | { |
951 | 0 | MotionInfo* dst = buf; |
952 | |
|
953 | 0 | for( int y = 0; y < height; y++, dst += stride ) |
954 | 0 | { |
955 | 0 | std::fill_n( dst, width, val ); |
956 | 0 | } |
957 | 0 | } |
958 | 0 | } |
959 | | |
960 | | PelStorage::PelStorage() |
961 | 0 | { |
962 | 0 | for( uint32_t i = 0; i < MAX_NUM_COMP; i++ ) |
963 | 0 | { |
964 | 0 | m_origin[i] = nullptr; |
965 | 0 | } |
966 | 0 | } |
967 | | |
968 | | PelStorage::~PelStorage() |
969 | 0 | { |
970 | 0 | destroy(); |
971 | 0 | } |
972 | | |
973 | | void PelStorage::create( const UnitArea& _UnitArea ) |
974 | 0 | { |
975 | 0 | create( _UnitArea.chromaFormat, _UnitArea.blocks[0] ); |
976 | 0 | m_maxArea = _UnitArea; |
977 | 0 | } |
978 | | |
979 | | void PelStorage::create( const ChromaFormat &_chromaFormat, const Area& _area ) |
980 | 0 | { |
981 | 0 | CHECK( !bufs.empty(), "Trying to re-create an already initialized buffer" ); |
982 | |
|
983 | 0 | chromaFormat = _chromaFormat; |
984 | |
|
985 | 0 | const uint32_t numComp = getNumberValidComponents( _chromaFormat ); |
986 | |
|
987 | 0 | uint32_t bufSize = 0; |
988 | 0 | for( uint32_t i = 0; i < numComp; i++ ) |
989 | 0 | { |
990 | 0 | const ComponentID compID = ComponentID( i ); |
991 | 0 | const unsigned totalWidth = _area.width >> getComponentScaleX( compID, _chromaFormat ); |
992 | 0 | const unsigned totalHeight = _area.height >> getComponentScaleY( compID, _chromaFormat ); |
993 | |
|
994 | 0 | const uint32_t area = totalWidth * totalHeight; |
995 | 0 | CHECK( !area, "Trying to create a buffer with zero area" ); |
996 | 0 | bufSize += area; |
997 | 0 | } |
998 | | |
999 | 0 | bufSize += 1; // for SIMD DMVR on the bottom right corner, which overreads the lines by 1 sample |
1000 | | |
1001 | | //allocate one buffer |
1002 | 0 | m_origin[0] = ( Pel* ) xMalloc( Pel, bufSize ); |
1003 | |
|
1004 | 0 | Pel* topLeft = m_origin[0]; |
1005 | 0 | for( uint32_t i = 0; i < numComp; i++ ) |
1006 | 0 | { |
1007 | 0 | const ComponentID compID = ComponentID( i ); |
1008 | 0 | const unsigned totalWidth = _area.width >> getComponentScaleX( compID, _chromaFormat ); |
1009 | 0 | const unsigned totalHeight = _area.height >> getComponentScaleY( compID, _chromaFormat ); |
1010 | 0 | const uint32_t area = totalWidth * totalHeight; |
1011 | |
|
1012 | 0 | bufs.push_back( PelBuf( topLeft, totalWidth, totalWidth, totalHeight ) ); |
1013 | 0 | topLeft += area; |
1014 | 0 | } |
1015 | |
|
1016 | 0 | m_maxArea = UnitArea( _chromaFormat, _area ); |
1017 | 0 | } |
1018 | | |
1019 | | void PelStorage::create( const ChromaFormat &_chromaFormat, const Area& _area, const unsigned _maxCUSize, const unsigned _margin, const unsigned _alignment, const bool _scaleChromaMargin ) |
1020 | 0 | { |
1021 | 0 | CHECK( !bufs.empty(), "Trying to re-create an already initialized buffer" ); |
1022 | |
|
1023 | 0 | chromaFormat = _chromaFormat; |
1024 | |
|
1025 | 0 | const uint32_t numComp = getNumberValidComponents( _chromaFormat ); |
1026 | |
|
1027 | 0 | unsigned extHeight = _area.height; |
1028 | 0 | unsigned extWidth = _area.width; |
1029 | |
|
1030 | 0 | if( _maxCUSize ) |
1031 | 0 | { |
1032 | 0 | extHeight = ( ( _area.height + _maxCUSize - 1 ) / _maxCUSize ) * _maxCUSize; |
1033 | 0 | extWidth = ( ( _area.width + _maxCUSize - 1 ) / _maxCUSize ) * _maxCUSize; |
1034 | 0 | } |
1035 | |
|
1036 | 0 | for( uint32_t i = 0; i < numComp; i++ ) |
1037 | 0 | { |
1038 | 0 | const ComponentID compID = ComponentID( i ); |
1039 | 0 | const unsigned scaleX = getComponentScaleX( compID, _chromaFormat ); |
1040 | 0 | const unsigned scaleY = getComponentScaleY( compID, _chromaFormat ); |
1041 | |
|
1042 | 0 | unsigned scaledHeight = extHeight >> scaleY; |
1043 | 0 | unsigned scaledWidth = extWidth >> scaleX; |
1044 | 0 | unsigned ymargin = _margin >> (_scaleChromaMargin?scaleY:0); |
1045 | 0 | unsigned xmargin = _margin >> (_scaleChromaMargin?scaleX:0); |
1046 | 0 | unsigned totalWidth = scaledWidth + 2*xmargin; |
1047 | 0 | unsigned totalHeight = scaledHeight +2*ymargin; |
1048 | |
|
1049 | 0 | if( _alignment ) |
1050 | 0 | { |
1051 | | // make sure buffer lines are align |
1052 | 0 | CHECK( _alignment != MEMORY_ALIGN_DEF_SIZE, "Unsupported alignment" ); |
1053 | 0 | totalWidth = ( ( totalWidth + _alignment - 1 ) / _alignment ) * _alignment; |
1054 | 0 | } |
1055 | 0 | uint32_t area = totalWidth * totalHeight; |
1056 | 0 | CHECK( !area, "Trying to create a buffer with zero area" ); |
1057 | |
|
1058 | 0 | m_origin[i] = ( Pel* ) xMalloc( Pel, area ); |
1059 | 0 | Pel* topLeft = m_origin[i] + totalWidth * ymargin + xmargin; |
1060 | 0 | bufs.push_back( PelBuf( topLeft, totalWidth, _area.width >> scaleX, _area.height >> scaleY ) ); |
1061 | 0 | } |
1062 | | |
1063 | 0 | m_maxArea = UnitArea( _chromaFormat, _area ); |
1064 | 0 | } |
1065 | | |
1066 | | void PelStorage::createFromBuf( PelUnitBuf buf ) |
1067 | 0 | { |
1068 | 0 | chromaFormat = buf.chromaFormat; |
1069 | |
|
1070 | 0 | const uint32_t numCh = getNumberValidComponents( chromaFormat ); |
1071 | |
|
1072 | 0 | bufs.resize(numCh); |
1073 | |
|
1074 | 0 | for( uint32_t i = 0; i < numCh; i++ ) |
1075 | 0 | { |
1076 | 0 | PelBuf cPelBuf = buf.get( ComponentID( i ) ); |
1077 | 0 | bufs[i] = PelBuf( cPelBuf.bufAt( 0, 0 ), cPelBuf.stride, cPelBuf.width, cPelBuf.height ); |
1078 | 0 | } |
1079 | 0 | } |
1080 | | |
1081 | | void PelStorage::compactResize( const UnitArea& area ) |
1082 | 0 | { |
1083 | 0 | CHECK( bufs.size() < area.blocks.size(), "Cannot increase buffer size when compacting!" ); |
1084 | |
|
1085 | 0 | for( uint32_t i = 0; i < area.blocks.size(); i++ ) |
1086 | 0 | { |
1087 | 0 | CHECK( m_maxArea.blocks[i].area() < area.blocks[i].area(), "Cannot increase buffer size when compacting!" ); |
1088 | |
|
1089 | 0 | bufs[i].Size::operator=( area.blocks[i].size() ); |
1090 | 0 | bufs[i].stride = bufs[i].width; |
1091 | 0 | } |
1092 | 0 | } |
1093 | | |
1094 | | void PelStorage::takeOwnership( PelStorage& other ) |
1095 | 0 | { |
1096 | 0 | chromaFormat = other.chromaFormat; |
1097 | |
|
1098 | 0 | const uint32_t numCh = getNumberValidComponents( chromaFormat ); |
1099 | |
|
1100 | 0 | bufs.resize(numCh); |
1101 | |
|
1102 | 0 | for( uint32_t i = 0; i < numCh; i++ ) |
1103 | 0 | { |
1104 | 0 | PelBuf cPelBuf = other.get( ComponentID( i ) ); |
1105 | 0 | bufs[i] = PelBuf( cPelBuf.bufAt( 0, 0 ), cPelBuf.stride, cPelBuf.width, cPelBuf.height ); |
1106 | 0 | std::swap( m_origin[i], other.m_origin[i]); |
1107 | 0 | } |
1108 | |
|
1109 | 0 | m_maxArea = other.m_maxArea; |
1110 | |
|
1111 | 0 | other.destroy(); |
1112 | 0 | } |
1113 | | |
1114 | | |
1115 | | void PelStorage::swap( PelStorage& other ) |
1116 | 0 | { |
1117 | 0 | const uint32_t numCh = getNumberValidComponents( chromaFormat ); |
1118 | |
|
1119 | 0 | for( uint32_t i = 0; i < numCh; i++ ) |
1120 | 0 | { |
1121 | | // check this otherwise it would turn out to get very weird |
1122 | 0 | CHECK( chromaFormat != other.chromaFormat , "Incompatible formats" ); |
1123 | 0 | CHECK( get( ComponentID( i ) ) != other.get( ComponentID( i ) ) , "Incompatible formats" ); |
1124 | 0 | CHECK( get( ComponentID( i ) ).stride != other.get( ComponentID( i ) ).stride, "Incompatible formats" ); |
1125 | |
|
1126 | 0 | std::swap( bufs[i].buf, other.bufs[i].buf ); |
1127 | 0 | std::swap( bufs[i].stride, other.bufs[i].stride ); |
1128 | 0 | std::swap( m_origin[i], other.m_origin[i] ); |
1129 | 0 | } |
1130 | 0 | } |
1131 | | |
1132 | | void PelStorage::destroy() |
1133 | 0 | { |
1134 | 0 | chromaFormat = NUM_CHROMA_FORMAT; |
1135 | 0 | for( uint32_t i = 0; i < MAX_NUM_COMP; i++ ) |
1136 | 0 | { |
1137 | 0 | if( m_origin[i] ) |
1138 | 0 | { |
1139 | 0 | xFree( m_origin[i] ); |
1140 | 0 | m_origin[i] = nullptr; |
1141 | 0 | } |
1142 | 0 | } |
1143 | 0 | bufs.clear(); |
1144 | 0 | } |
1145 | | |
1146 | | PelBuf PelStorage::getBuf( const ComponentID CompID ) |
1147 | 0 | { |
1148 | 0 | return bufs[CompID]; |
1149 | 0 | } |
1150 | | |
1151 | | const CPelBuf PelStorage::getBuf( const ComponentID CompID ) const |
1152 | 0 | { |
1153 | 0 | return bufs[CompID]; |
1154 | 0 | } |
1155 | | |
1156 | | PelBuf PelStorage::getBuf( const CompArea& blk ) |
1157 | 0 | { |
1158 | 0 | const PelBuf& r = bufs[blk.compID]; |
1159 | 0 | return PelBuf( r.buf + rsAddr( blk, r.stride ), r.stride, blk ); |
1160 | 0 | } |
1161 | | |
1162 | | const CPelBuf PelStorage::getBuf( const CompArea& blk ) const |
1163 | 0 | { |
1164 | 0 | const PelBuf& r = bufs[blk.compID]; |
1165 | 0 | return CPelBuf( r.buf + rsAddr( blk, r.stride ), r.stride, blk ); |
1166 | 0 | } |
1167 | | |
1168 | | PelUnitBuf PelStorage::getBuf( const UnitArea& unit ) |
1169 | 0 | { |
1170 | 0 | return ( chromaFormat == CHROMA_400 ) ? PelUnitBuf( chromaFormat, getBuf( unit.Y() ) ) : PelUnitBuf( chromaFormat, getBuf( unit.Y() ), getBuf( unit.Cb() ), getBuf( unit.Cr() ) ); |
1171 | 0 | } |
1172 | | |
1173 | | const CPelUnitBuf PelStorage::getBuf( const UnitArea& unit ) const |
1174 | 0 | { |
1175 | 0 | return ( chromaFormat == CHROMA_400 ) ? CPelUnitBuf( chromaFormat, getBuf( unit.Y() ) ) : CPelUnitBuf( chromaFormat, getBuf( unit.Y() ), getBuf( unit.Cb() ), getBuf( unit.Cr() ) ); |
1176 | 0 | } |
1177 | | |
1178 | | PelUnitBuf PelStorage::getBuf(const int strY, const int strCb, const int strCr, const UnitArea& unit) |
1179 | 0 | { |
1180 | 0 | CHECKD( unit.Y().width > bufs[COMP_Y].width && unit.Y().height > bufs[COMP_Y].height, "unsuported request" ); |
1181 | 0 | CHECKD( strY > bufs[COMP_Y].stride, "unsuported request" ); |
1182 | 0 | CHECKD( strCb > bufs[COMP_Cb].stride, "unsuported request" ); |
1183 | 0 | CHECKD( strCr > bufs[COMP_Cr].stride, "unsuported request" ); |
1184 | 0 | return (chromaFormat == CHROMA_400) ? PelUnitBuf(chromaFormat, PelBuf( bufs[COMP_Y].buf, strY, unit.Y())) : PelUnitBuf(chromaFormat, PelBuf( bufs[COMP_Y].buf, strY, unit.Y()), PelBuf( bufs[COMP_Cb].buf, strCb, unit.Cb()), PelBuf( bufs[COMP_Cr].buf, strCr, unit.Cr())); |
1185 | 0 | } |
1186 | | |
1187 | | const CPelUnitBuf PelStorage::getBuf(const int strY, const int strCb, const int strCr, const UnitArea& unit) const |
1188 | 0 | { |
1189 | 0 | CHECKD( unit.Y().width > bufs[COMP_Y].width && unit.Y().height > bufs[COMP_Y].height, "unsuported request" ); |
1190 | 0 | CHECKD( strY > bufs[COMP_Y].stride, "unsuported request" ); |
1191 | 0 | CHECKD( strCb > bufs[COMP_Cb].stride, "unsuported request" ); |
1192 | 0 | CHECKD( strCr > bufs[COMP_Cr].stride, "unsuported request" ); |
1193 | 0 | return (chromaFormat == CHROMA_400) ? CPelUnitBuf(chromaFormat, CPelBuf( bufs[COMP_Y].buf, strY, unit.Y())) : CPelUnitBuf(chromaFormat, CPelBuf( bufs[COMP_Y].buf, strY, unit.Y()), CPelBuf( bufs[COMP_Cb].buf, strCb, unit.Cb()), CPelBuf( bufs[COMP_Cr].buf, strCr, unit.Cr())); |
1194 | 0 | } |
1195 | | |
1196 | | PelUnitBuf PelStorage::getBufPart(const UnitArea& unit) |
1197 | 0 | { |
1198 | 0 | CHECKD( unit.Y().width > bufs[COMP_Y].width && unit.Y().height > bufs[COMP_Y].height, "unsuported request" ); |
1199 | 0 | return (chromaFormat == CHROMA_400) ? PelUnitBuf(chromaFormat, PelBuf( bufs[COMP_Y].buf, bufs[COMP_Y].stride, unit.Y())) : PelUnitBuf(chromaFormat, PelBuf( bufs[COMP_Y].buf, bufs[COMP_Y].stride, unit.Y()), PelBuf( bufs[COMP_Cb].buf, bufs[COMP_Cb].stride, unit.Cb()), PelBuf( bufs[COMP_Cr].buf, bufs[COMP_Cr].stride, unit.Cr())); |
1200 | 0 | } |
1201 | | |
1202 | | const CPelUnitBuf PelStorage::getBufPart(const UnitArea& unit) const |
1203 | 0 | { |
1204 | 0 | CHECKD(unit.Y().width > bufs[COMP_Y].width && unit.Y().height > bufs[COMP_Y].height, "unsuported request"); |
1205 | 0 | return (chromaFormat == CHROMA_400) ? CPelUnitBuf(chromaFormat, CPelBuf(bufs[COMP_Y].buf, unit.Y().width, unit.Y())) : CPelUnitBuf(chromaFormat, CPelBuf(bufs[COMP_Y].buf, unit.Y().width, unit.Y()), CPelBuf(bufs[COMP_Cb].buf, unit.Cb().width, unit.Cb()), CPelBuf(bufs[COMP_Cr].buf, unit.Cr().width, unit.Cr())); |
1206 | 0 | } |
1207 | | |
1208 | | const CPelUnitBuf PelStorage::getCompactBuf(const UnitArea& unit) const |
1209 | 0 | { |
1210 | 0 | CHECKD( unit.Y().width > bufs[COMP_Y].width && unit.Y().height > bufs[COMP_Y].height, "unsuported request" ); |
1211 | |
|
1212 | 0 | PelUnitBuf ret; |
1213 | 0 | ret.chromaFormat = chromaFormat; |
1214 | 0 | ret.bufs.resize_noinit( chromaFormat == CHROMA_400 ? 1 : 3 ); |
1215 | | |
1216 | 0 | ret.Y ().buf = bufs[COMP_Y ].buf; ret.Y ().width = ret.Y ().stride = unit.Y ().width; ret.Y ().height = unit.Y ().height; |
1217 | 0 | if( chromaFormat != CHROMA_400 ) |
1218 | 0 | { |
1219 | 0 | ret.Cb().buf = bufs[COMP_Cb].buf; ret.Cb().width = ret.Cb().stride = unit.Cb().width; ret.Cb().height = unit.Cb().height; |
1220 | 0 | ret.Cr().buf = bufs[COMP_Cr].buf; ret.Cr().width = ret.Cr().stride = unit.Cr().width; ret.Cr().height = unit.Cr().height; |
1221 | 0 | } |
1222 | |
|
1223 | 0 | return ret; |
1224 | 0 | } |
1225 | | |
1226 | | PelUnitBuf PelStorage::getCompactBuf(const UnitArea& unit) |
1227 | 0 | { |
1228 | 0 | CHECKD( unit.Y().width > bufs[COMP_Y].width && unit.Y().height > bufs[COMP_Y].height, "unsuported request" ); |
1229 | |
|
1230 | 0 | PelUnitBuf ret; |
1231 | 0 | ret.chromaFormat = chromaFormat; |
1232 | 0 | ret.bufs.resize_noinit( chromaFormat == CHROMA_400 ? 1 : 3 ); |
1233 | |
|
1234 | 0 | ret.Y ().buf = bufs[COMP_Y ].buf; ret.Y ().width = ret.Y ().stride = unit.Y ().width; ret.Y ().height = unit.Y ().height; |
1235 | 0 | if( chromaFormat != CHROMA_400 ) |
1236 | 0 | { |
1237 | 0 | ret.Cb().buf = bufs[COMP_Cb].buf; ret.Cb().width = ret.Cb().stride = unit.Cb().width; ret.Cb().height = unit.Cb().height; |
1238 | 0 | ret.Cr().buf = bufs[COMP_Cr].buf; ret.Cr().width = ret.Cr().stride = unit.Cr().width; ret.Cr().height = unit.Cr().height; |
1239 | 0 | } |
1240 | |
|
1241 | 0 | return ret; |
1242 | 0 | } |
1243 | | |
1244 | | const CPelBuf PelStorage::getCompactBuf(const CompArea& carea) const |
1245 | 0 | { |
1246 | 0 | return CPelBuf( bufs[carea.compID].buf, carea.width, carea); |
1247 | 0 | } |
1248 | | |
1249 | | PelBuf PelStorage::getCompactBuf(const CompArea& carea) |
1250 | 0 | { |
1251 | 0 | return PelBuf( bufs[carea.compID].buf, carea.width, carea); |
1252 | 0 | } |
1253 | | |
1254 | | void downsampleYuv(PelBuf& dest, const vvencYUVPlane& yuvPlaneIn, int downsampleStep) |
1255 | 0 | { |
1256 | 0 | const int widthd = dest.width; |
1257 | 0 | const int heightd = dest.height; |
1258 | 0 | int difStride = dest.stride - dest.width; |
1259 | |
|
1260 | 0 | const int16_t* src = yuvPlaneIn.ptr; |
1261 | 0 | const int instride = yuvPlaneIn.stride; |
1262 | 0 | const int width = yuvPlaneIn.width; |
1263 | 0 | int n = 0; |
1264 | 0 | for (int j = 0; j < heightd; j++) |
1265 | 0 | { |
1266 | 0 | int i = 0; |
1267 | 0 | for (i = 0; i < widthd; i++) |
1268 | 0 | { |
1269 | 0 | long int b = 0; |
1270 | 0 | for (int r = 0; r < downsampleStep; r++) |
1271 | 0 | { |
1272 | 0 | int posr = width * r; |
1273 | 0 | for (int n = 0; n < downsampleStep; n++) |
1274 | 0 | { |
1275 | 0 | b += src[posr + n]; |
1276 | 0 | } |
1277 | 0 | } |
1278 | 0 | src += downsampleStep; |
1279 | 0 | dest.buf[n] = (int16_t)((b + 2) / (downsampleStep << 1)); |
1280 | 0 | n++; |
1281 | 0 | } |
1282 | 0 | n += difStride; |
1283 | 0 | src = src - downsampleStep * i + width; |
1284 | |
|
1285 | 0 | src += (instride * (downsampleStep - 1)); |
1286 | 0 | } |
1287 | 0 | } |
1288 | | |
1289 | | void copyPadToPelUnitBuf( PelUnitBuf pelUnitBuf, const vvencYUVBuffer& yuvBuffer, const ChromaFormat& chFmt ) |
1290 | 0 | { |
1291 | 0 | CHECK( pelUnitBuf.bufs.size() == 0, "pelUnitBuf not initialized" ); |
1292 | 0 | pelUnitBuf.chromaFormat = chFmt; |
1293 | 0 | const int numComp = getNumberValidComponents( chFmt ); |
1294 | 0 | for ( int i = 0; i < numComp; i++ ) |
1295 | 0 | { |
1296 | 0 | const vvencYUVPlane& src = yuvBuffer.planes[ i ]; |
1297 | 0 | CHECK( src.ptr == nullptr, "yuvBuffer not setup" ); |
1298 | 0 | PelBuf& dest = pelUnitBuf.bufs[i]; |
1299 | 0 | CHECK( dest.buf == nullptr, "yuvBuffer not setup" ); |
1300 | |
|
1301 | 0 | if (dest.width < src.width) |
1302 | 0 | { |
1303 | 0 | downsampleYuv(dest, src, 2); |
1304 | 0 | } |
1305 | 0 | else |
1306 | 0 | { |
1307 | 0 | for (int y = 0; y < src.height; y++) |
1308 | 0 | { |
1309 | 0 | ::memcpy(dest.buf + y * dest.stride, src.ptr + y * src.stride, src.width * sizeof(int16_t)); |
1310 | | |
1311 | | // pad right if required |
1312 | 0 | for (int x = src.width; x < dest.width; x++) |
1313 | 0 | { |
1314 | 0 | dest.buf[x + y * dest.stride] = dest.buf[src.width - 1 + y * dest.stride]; |
1315 | 0 | } |
1316 | 0 | } |
1317 | | |
1318 | | // pad bottom if required |
1319 | 0 | for (int y = src.height; y < dest.height; y++) |
1320 | 0 | { |
1321 | 0 | ::memcpy(dest.buf + y * dest.stride, dest.buf + (src.height - 1) * dest.stride, dest.width * sizeof(int16_t)); |
1322 | 0 | } |
1323 | 0 | } |
1324 | 0 | } |
1325 | 0 | } |
1326 | | |
1327 | | /* |
1328 | | void setupPelUnitBuf( const YUVBuffer& yuvBuffer, PelUnitBuf& pelUnitBuf, const ChromaFormat& chFmt ) |
1329 | | { |
1330 | | CHECK( pelUnitBuf.bufs.size() != 0, "pelUnitBuf already in use" ); |
1331 | | pelUnitBuf.chromaFormat = chFmt; |
1332 | | const int numComp = getNumberValidComponents( chFmt ); |
1333 | | for ( int i = 0; i < numComp; i++ ) |
1334 | | { |
1335 | | const YUVBuffer::Plane& yuvPlane = yuvBuffer.planes[ i ]; |
1336 | | CHECK( yuvPlane.ptr == nullptr, "yuvBuffer not setup" ); |
1337 | | PelBuf area( yuvPlane.ptr, yuvPlane.stride, yuvPlane.width, yuvPlane.height ); |
1338 | | pelUnitBuf.bufs.push_back( area ); |
1339 | | } |
1340 | | } |
1341 | | */ |
1342 | | void setupYuvBuffer ( const PelUnitBuf& pelUnitBuf, vvencYUVBuffer& yuvBuffer, const Window* confWindow ) |
1343 | 0 | { |
1344 | 0 | const ChromaFormat chFmt = pelUnitBuf.chromaFormat; |
1345 | 0 | const int numComp = getNumberValidComponents( chFmt ); |
1346 | 0 | for ( int i = 0; i < numComp; i++ ) |
1347 | 0 | { |
1348 | 0 | const ComponentID compId = ComponentID( i ); |
1349 | 0 | PelBuf area = pelUnitBuf.get( compId ); |
1350 | 0 | const int sx = getComponentScaleX( compId, chFmt ); |
1351 | 0 | const int sy = getComponentScaleY( compId, chFmt ); |
1352 | 0 | vvencYUVPlane& yuvPlane = yuvBuffer.planes[ i ]; |
1353 | 0 | CHECK( yuvPlane.ptr != nullptr, "yuvBuffer already in use" ); |
1354 | 0 | yuvPlane.ptr = area.bufAt( confWindow->winLeftOffset >> sx, confWindow->winTopOffset >> sy ); |
1355 | 0 | yuvPlane.width = ( ( area.width << sx ) - ( confWindow->winLeftOffset + confWindow->winRightOffset ) ) >> sx; |
1356 | 0 | yuvPlane.height = ( ( area.height << sy ) - ( confWindow->winTopOffset + confWindow->winBottomOffset ) ) >> sy; |
1357 | 0 | yuvPlane.stride = area.stride; |
1358 | 0 | } |
1359 | 0 | } |
1360 | | |
1361 | | } // namespace vvenc |
1362 | | |
1363 | | //! \} |
1364 | | |