/src/vvdec/source/Lib/CommonLib/Buffer.cpp
Line | Count | Source |
1 | | /* ----------------------------------------------------------------------------- |
2 | | The copyright in this software is being made available under the Clear BSD |
3 | | License, included below. No patent rights, trademark rights and/or |
4 | | other Intellectual Property Rights other than the copyrights concerning |
5 | | the Software are granted under this license. |
6 | | |
7 | | The Clear BSD License |
8 | | |
9 | | Copyright (c) 2018-2026, Fraunhofer-Gesellschaft zur Förderung der angewandten Forschung e.V. & The VVdeC Authors. |
10 | | All rights reserved. |
11 | | |
12 | | Redistribution and use in source and binary forms, with or without modification, |
13 | | are permitted (subject to the limitations in the disclaimer below) provided that |
14 | | the following conditions are met: |
15 | | |
16 | | * Redistributions of source code must retain the above copyright notice, |
17 | | this list of conditions and the following disclaimer. |
18 | | |
19 | | * Redistributions in binary form must reproduce the above copyright |
20 | | notice, this list of conditions and the following disclaimer in the |
21 | | documentation and/or other materials provided with the distribution. |
22 | | |
23 | | * Neither the name of the copyright holder nor the names of its |
24 | | contributors may be used to endorse or promote products derived from this |
25 | | software without specific prior written permission. |
26 | | |
27 | | NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE GRANTED BY |
28 | | THIS LICENSE. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND |
29 | | CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
30 | | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A |
31 | | PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR |
32 | | CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, |
33 | | EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, |
34 | | PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR |
35 | | BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER |
36 | | IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
37 | | ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE |
38 | | POSSIBILITY OF SUCH DAMAGE. |
39 | | |
40 | | |
41 | | ------------------------------------------------------------------------------------------- */ |
42 | | |
43 | | /** \file Buffer.cpp |
44 | | * \brief Low-overhead class describing 2D memory layout |
45 | | */ |
46 | | |
47 | | #define DONT_UNDEF_SIZE_AWARE_PER_EL_OP |
48 | | |
49 | | // unit needs to come first due to a forward declaration |
50 | | |
51 | | #include "Unit.h" |
52 | | #include "Buffer.h" |
53 | | #include "InterpolationFilter.h" |
54 | | #include "Picture.h" |
55 | | #include "Slice.h" |
56 | | |
57 | | #if ENABLE_SIMD_OPT_BUFFER && defined( TARGET_SIMD_X86 ) |
58 | | #include "CommonDefX86.h" |
59 | | #include <simde/x86/sse.h> |
60 | | #endif |
61 | | |
62 | | namespace vvdec |
63 | | { |
64 | | |
65 | | template< typename T > |
66 | | void addAvgCore( const T* src1, ptrdiff_t src1Stride, const T* src2, ptrdiff_t src2Stride, T* dest, ptrdiff_t dstStride, int width, int height, int rshift, int offset, const ClpRng& clpRng ) |
67 | 0 | { |
68 | 0 | #define ADD_AVG_CORE_OP( ADDR ) dest[ADDR] = ClipPel( rightShift( ( src1[ADDR] + src2[ADDR] + offset ), rshift ), clpRng ) |
69 | 0 | #define ADD_AVG_CORE_INC \ |
70 | 0 | src1 += src1Stride; \ |
71 | 0 | src2 += src2Stride; \ |
72 | 0 | dest += dstStride; \ |
73 | 0 |
|
74 | 0 | SIZE_AWARE_PER_EL_OP( ADD_AVG_CORE_OP, ADD_AVG_CORE_INC ); |
75 | |
|
76 | 0 | #undef ADD_AVG_CORE_OP |
77 | 0 | #undef ADD_AVG_CORE_INC |
78 | 0 | } |
79 | | |
80 | | template<typename T> |
81 | | void reconstructCore( const T* src1, ptrdiff_t src1Stride, const T* src2, ptrdiff_t src2Stride, T* dest, ptrdiff_t dstStride, int width, int height, const ClpRng& clpRng ) |
82 | 0 | { |
83 | 0 | #define RECO_CORE_OP( ADDR ) dest[ADDR] = ClipPel( src1[ADDR] + src2[ADDR], clpRng ) |
84 | 0 | #define RECO_CORE_INC \ |
85 | 0 | src1 += src1Stride; \ |
86 | 0 | src2 += src2Stride; \ |
87 | 0 | dest += dstStride; \ |
88 | 0 |
|
89 | 0 | SIZE_AWARE_PER_EL_OP( RECO_CORE_OP, RECO_CORE_INC ); |
90 | |
|
91 | 0 | #undef RECO_CORE_OP |
92 | 0 | #undef RECO_CORE_INC |
93 | 0 | } |
94 | | |
95 | | |
96 | | template<typename T> |
97 | | void linTfCore( const T* src, ptrdiff_t srcStride, Pel *dst, ptrdiff_t dstStride, int width, int height, int scale, int shift, int offset, const ClpRng& clpRng, bool bClip ) |
98 | 0 | { |
99 | 0 | #define LINTF_CORE_OP( ADDR ) dst[ADDR] = ( Pel ) bClip ? ClipPel( rightShift( scale * src[ADDR], shift ) + offset, clpRng ) : ( rightShift( scale * src[ADDR], shift ) + offset ) |
100 | 0 | #define LINTF_CORE_INC \ |
101 | 0 | src += srcStride; \ |
102 | 0 | dst += dstStride; \ |
103 | 0 |
|
104 | 0 | SIZE_AWARE_PER_EL_OP( LINTF_CORE_OP, LINTF_CORE_INC ); |
105 | |
|
106 | 0 | #undef LINTF_CORE_OP |
107 | 0 | #undef LINTF_CORE_INC |
108 | 0 | } |
109 | | |
110 | | template<typename T> |
111 | | void transpose4x4Core( const Pel* src, ptrdiff_t srcStride, Pel* dst, ptrdiff_t dstStride ) |
112 | 0 | { |
113 | 0 | for( int i = 0; i < 4; i++ ) |
114 | 0 | { |
115 | 0 | for( int j = 0; j < 4; j++ ) |
116 | 0 | { |
117 | 0 | dst[j * dstStride] = src[j]; |
118 | 0 | } |
119 | |
|
120 | 0 | dst++; |
121 | 0 | src += srcStride; |
122 | 0 | } |
123 | 0 | } |
124 | | |
125 | | template<typename T> |
126 | | void transpose8x8Core( const Pel* src, ptrdiff_t srcStride, Pel* dst, ptrdiff_t dstStride ) |
127 | 0 | { |
128 | 0 | for( int i = 0; i < 8; i++ ) |
129 | 0 | { |
130 | 0 | for( int j = 0; j < 8; j++ ) |
131 | 0 | { |
132 | 0 | dst[j * dstStride] = src[j]; |
133 | 0 | } |
134 | |
|
135 | 0 | dst++; |
136 | 0 | src += srcStride; |
137 | 0 | } |
138 | 0 | } |
139 | | |
140 | | template<typename T> |
141 | | void copyClipCore( const T* src, ptrdiff_t srcStride, Pel *dst, ptrdiff_t dstStride, int width, int height, const ClpRng& clpRng ) |
142 | | { |
143 | | #define RECO_OP( ADDR ) dst[ADDR] = ClipPel( src[ADDR], clpRng ) |
144 | | #define RECO_INC \ |
145 | | src += srcStride; \ |
146 | | dst += dstStride; \ |
147 | | |
148 | | SIZE_AWARE_PER_EL_OP( RECO_OP, RECO_INC ); |
149 | | |
150 | | #undef RECO_OP |
151 | | #undef RECO_INC |
152 | | } |
153 | | |
154 | | template<typename T> |
155 | | void addWeightedAvgCore( const T* src1, ptrdiff_t src1Stride, const T* src2, ptrdiff_t src2Stride, T* dest, ptrdiff_t destStride, int width, int height, int rshift, int offset, int w0, int w1, const ClpRng& clpRng ) |
156 | 0 | { |
157 | 0 | #define ADD_WGHT_AVG_OP( ADDR ) dest[ADDR] = ClipPel( rightShift( ( src1[ADDR]*w0 + src2[ADDR]*w1 + offset ), rshift ), clpRng ) |
158 | 0 | #define ADD_WGHT_AVG_INC \ |
159 | 0 | src1 += src1Stride; \ |
160 | 0 | src2 += src2Stride; \ |
161 | 0 | dest += destStride; \ |
162 | 0 |
|
163 | 0 | SIZE_AWARE_PER_EL_OP( ADD_WGHT_AVG_OP, ADD_WGHT_AVG_INC ); |
164 | |
|
165 | 0 | #undef ADD_WGHT_AVG_OP |
166 | 0 | #undef ADD_WGHT_AVG_INC |
167 | 0 | } |
168 | | |
169 | | void copyBufferCore( const char *src, ptrdiff_t srcStride, char *dst, ptrdiff_t dstStride, int width, int height ) |
170 | 0 | { |
171 | 0 | #if ENABLE_SIMD_OPT_BUFFER && defined( TARGET_SIMD_X86 ) |
172 | 0 | _mm_prefetch( (const char *) ( src ), _MM_HINT_T0 ); |
173 | 0 | _mm_prefetch( (const char *) ( src + srcStride ), _MM_HINT_T0 ); |
174 | 0 | _mm_prefetch( (const char *) ( dst ), _MM_HINT_T0 ); |
175 | 0 | _mm_prefetch( (const char *) ( dst + dstStride ), _MM_HINT_T0 ); |
176 | |
|
177 | 0 | #endif |
178 | 0 | if( width == srcStride && width == dstStride ) |
179 | 0 | { |
180 | 0 | memcpy( dst, src, width * height ); |
181 | 0 | return; |
182 | 0 | } |
183 | | |
184 | 0 | for( int i = 0; i < height; i++ ) |
185 | 0 | { |
186 | 0 | #if ENABLE_SIMD_OPT_BUFFER && defined( TARGET_SIMD_X86 ) |
187 | 0 | _mm_prefetch( (const char *) ( src + srcStride ), _MM_HINT_T0 ); |
188 | 0 | _mm_prefetch( (const char *) ( dst + dstStride ), _MM_HINT_T0 ); |
189 | |
|
190 | 0 | #endif |
191 | 0 | memcpy( dst, src, width ); |
192 | |
|
193 | 0 | src += srcStride; |
194 | 0 | dst += dstStride; |
195 | 0 | } |
196 | 0 | } |
197 | | |
198 | | void applyLutCore( Pel* ptr, ptrdiff_t ptrStride, int width, int height, const Pel* lut ) |
199 | 0 | { |
200 | | // const auto rsp_sgnl_op = [=, &dst]( int ADDR ){ dst[ADDR] = lut[dst[ADDR]]; }; |
201 | | // const auto rsp_sgnl_inc = [=, &dst] { dst += stride; }; |
202 | | |
203 | | // size_aware_pel_op( rsp_sgnl_op, rsp_sgnl_inc, width, height ); |
204 | |
|
205 | 0 | #define RSP_SGNL_OP( ADDR ) ptr[ADDR] = lut[ptr[ADDR]] |
206 | 0 | #define RSP_SGNL_INC ptr += ptrStride; |
207 | |
|
208 | 0 | SIZE_AWARE_PER_EL_OP( RSP_SGNL_OP, RSP_SGNL_INC ) |
209 | |
|
210 | 0 | #undef RSP_SGNL_OP |
211 | 0 | #undef RSP_SGNL_INC |
212 | | |
213 | 0 | return; |
214 | 0 | } |
215 | | |
216 | | void fillN_CuCore( CodingUnit** ptr, ptrdiff_t ptrStride, int width, int height, CodingUnit* cuPtr ) |
217 | 0 | { |
218 | 0 | if( width == ptrStride ) |
219 | 0 | { |
220 | 0 | std::fill_n( ptr, width * height, cuPtr ); |
221 | 0 | } |
222 | 0 | else |
223 | 0 | { |
224 | 0 | CodingUnit** dst = ptr; |
225 | |
|
226 | 0 | for( int y = 0; y < height; y++, dst += ptrStride ) |
227 | 0 | { |
228 | 0 | std::fill_n( dst, width, cuPtr ); |
229 | 0 | } |
230 | 0 | } |
231 | 0 | } |
232 | | |
233 | | void sampleRateConvCore( const std::pair<int, int> scalingRatio, const std::pair<int, int> compScale, |
234 | | const Pel* orgSrc, const ptrdiff_t orgStride, const int orgWidth, const int orgHeight, |
235 | | const int beforeScaleLeftOffset, const int beforeScaleTopOffset, |
236 | | Pel* scaledSrc, const ptrdiff_t scaledStride, const int scaledWidth, const int scaledHeight, |
237 | | const int afterScaleLeftOffset, const int afterScaleTopOffset, |
238 | | const int bitDepth, const bool useLumaFilter, |
239 | | const bool horCollocatedPositionFlag, const bool verCollocatedPositionFlag ) |
240 | 0 | { |
241 | 0 | if( orgWidth == scaledWidth && orgHeight == scaledHeight && scalingRatio == SCALE_1X && !beforeScaleLeftOffset && !beforeScaleTopOffset && !afterScaleLeftOffset && !afterScaleTopOffset ) |
242 | 0 | { |
243 | 0 | g_pelBufOP.copyBuffer( ( const char * ) orgSrc, orgStride * sizeof( Pel ), ( char* ) scaledSrc, scaledStride * sizeof( Pel ), orgWidth * sizeof( Pel ), orgHeight ); |
244 | |
|
245 | 0 | return; |
246 | 0 | } |
247 | | |
248 | 0 | const TFilterCoeff* filterHor = useLumaFilter ? &InterpolationFilter::m_lumaFilter[0][0] : &InterpolationFilter::m_chromaFilter[0][0]; |
249 | 0 | const TFilterCoeff* filterVer = useLumaFilter ? &InterpolationFilter::m_lumaFilter[0][0] : &InterpolationFilter::m_chromaFilter[0][0]; |
250 | |
|
251 | 0 | const int numFracPositions = useLumaFilter ? 15 : 31; |
252 | 0 | const int numFracShift = useLumaFilter ? 4 : 5; |
253 | 0 | const int posShiftX = SCALE_RATIO_BITS - numFracShift + compScale.first; |
254 | 0 | const int posShiftY = SCALE_RATIO_BITS - numFracShift + compScale.second; |
255 | 0 | int addX = (1 << (posShiftX - 1)) + (beforeScaleLeftOffset << SCALE_RATIO_BITS) + ((int( 1 - horCollocatedPositionFlag ) * 8 * (scalingRatio.first - SCALE_1X.first) + (1 << (2 + compScale.first))) >> (3 + compScale.first)); |
256 | 0 | int addY = (1 << (posShiftY - 1)) + (beforeScaleTopOffset << SCALE_RATIO_BITS) + ((int( 1 - verCollocatedPositionFlag ) * 8 * (scalingRatio.second - SCALE_1X.second) + (1 << (2 + compScale.second))) >> (3 + compScale.second)); |
257 | |
|
258 | 0 | const int filterLength = useLumaFilter ? NTAPS_LUMA : NTAPS_CHROMA; |
259 | 0 | const int log2Norm = 12; |
260 | |
|
261 | 0 | int* buf = new int[orgHeight * scaledWidth]; |
262 | 0 | int maxVal = (1 << bitDepth) - 1; |
263 | |
|
264 | 0 | CHECK( bitDepth > 17, "Overflow may happen!" ); |
265 | |
|
266 | 0 | for( int i = 0; i < scaledWidth; i++ ) |
267 | 0 | { |
268 | 0 | const Pel* org = orgSrc; |
269 | 0 | int refPos = (((i << compScale.first) - afterScaleLeftOffset) * scalingRatio.first + addX) >> posShiftX; |
270 | 0 | int integer = refPos >> numFracShift; |
271 | 0 | int frac = refPos & numFracPositions; |
272 | 0 | int* tmp = buf + i; |
273 | |
|
274 | 0 | for( int j = 0; j < orgHeight; j++ ) |
275 | 0 | { |
276 | 0 | int sum = 0; |
277 | 0 | const TFilterCoeff* f = filterHor + frac * filterLength; |
278 | |
|
279 | 0 | for( int k = 0; k < filterLength; k++ ) |
280 | 0 | { |
281 | 0 | int xInt = std::min<int>( std::max( 0, integer + k - filterLength / 2 + 1 ), orgWidth - 1 ); |
282 | 0 | sum += f[k] * org[xInt]; // postpone horizontal filtering gain removal after vertical filtering |
283 | 0 | } |
284 | |
|
285 | 0 | *tmp = sum; |
286 | |
|
287 | 0 | tmp += scaledWidth; |
288 | 0 | org += orgStride; |
289 | 0 | } |
290 | 0 | } |
291 | |
|
292 | 0 | Pel* dst = scaledSrc; |
293 | |
|
294 | 0 | for( int j = 0; j < scaledHeight; j++ ) |
295 | 0 | { |
296 | 0 | int refPos = (((j << compScale.second) - afterScaleTopOffset) * scalingRatio.second + addY) >> posShiftY; |
297 | 0 | int integer = refPos >> numFracShift; |
298 | 0 | int frac = refPos & numFracPositions; |
299 | |
|
300 | 0 | for( int i = 0; i < scaledWidth; i++ ) |
301 | 0 | { |
302 | 0 | int sum = 0; |
303 | 0 | int* tmp = buf + i; |
304 | 0 | const TFilterCoeff* f = filterVer + frac * filterLength; |
305 | |
|
306 | 0 | for( int k = 0; k < filterLength; k++ ) |
307 | 0 | { |
308 | 0 | int yInt = std::min<int>( std::max( 0, integer + k - filterLength / 2 + 1 ), orgHeight - 1 ); |
309 | 0 | sum += f[k] * tmp[yInt * scaledWidth]; |
310 | 0 | } |
311 | |
|
312 | 0 | dst[i] = std::min<int>( std::max( 0, (sum + (1 << (log2Norm - 1))) >> log2Norm ), maxVal ); |
313 | 0 | } |
314 | |
|
315 | 0 | dst += scaledStride; |
316 | 0 | } |
317 | |
|
318 | 0 | delete[] buf; |
319 | 0 | } |
320 | | |
321 | | void rspFwdCore( Pel* ptr, ptrdiff_t ptrStride, int width, int height, const int bd, const Pel OrgCW, const Pel* LmcsPivot, const Pel* ScaleCoeff, const Pel* InputPivot ) |
322 | 0 | { |
323 | 0 | int idxY; |
324 | 0 | int shift = getLog2( OrgCW ); |
325 | | |
326 | | // const auto rsp_sgnl_op = [=, &dst]( int ADDR ){ idxY = ( dst[ADDR] >> shift ); dst[ADDR] = static_cast<Pel>( ClipBD<int>( LmcsPivot[idxY] + ( ( ScaleCoeff[idxY] * ( dst[ADDR] - InputPivot[idxY] ) + ( 1 << 10 ) ) >> 11 ), bd ) ); }; |
327 | | // const auto rsp_sgnl_inc = [=, &dst] { dst += stride; }; |
328 | | |
329 | | // size_aware_pel_op( rsp_sgnl_op, rsp_sgnl_inc, width, height ); |
330 | |
|
331 | 0 | #define RSP_FWD_OP( ADDR ) { idxY = ( ptr[ADDR] >> shift ); ptr[ADDR] = static_cast<Pel>( ClipBD<int>( LmcsPivot[idxY] + ( ( ScaleCoeff[idxY] * ( ptr[ADDR] - InputPivot[idxY] ) + ( 1 << 10 ) ) >> 11 ), bd ) ); } |
332 | 0 | #define RSP_FWD_INC ptr += ptrStride; |
333 | |
|
334 | 0 | SIZE_AWARE_PER_EL_OP( RSP_FWD_OP, RSP_FWD_INC ) |
335 | |
|
336 | 0 | #undef RSP_FWD_OP |
337 | 0 | #undef RSP_FWD_INC |
338 | 0 | } |
339 | | |
340 | | PelBufferOps::PelBufferOps() |
341 | 256 | { |
342 | 256 | addAvg4 = addAvgCore<Pel>; |
343 | 256 | addAvg8 = addAvgCore<Pel>; |
344 | 256 | addAvg16 = addAvgCore<Pel>; |
345 | | |
346 | 256 | reco4 = reconstructCore<Pel>; |
347 | 256 | reco8 = reconstructCore<Pel>; |
348 | | |
349 | 256 | linTf4 = linTfCore<Pel>; |
350 | 256 | linTf8 = linTfCore<Pel>; |
351 | | |
352 | 256 | wghtAvg4 = addWeightedAvgCore<Pel>; |
353 | 256 | wghtAvg8 = addWeightedAvgCore<Pel>; |
354 | | |
355 | 256 | copyBuffer = copyBufferCore; |
356 | | |
357 | 256 | transpose4x4 = transpose4x4Core<Pel>; |
358 | 256 | transpose8x8 = transpose8x8Core<Pel>; |
359 | | |
360 | 256 | applyLut = applyLutCore; |
361 | 256 | rspFwd = rspFwdCore; |
362 | 256 | rspBcw = nullptr; |
363 | | |
364 | 256 | fillN_CU = fillN_CuCore; |
365 | | |
366 | 256 | sampleRateConv = sampleRateConvCore; |
367 | 256 | } |
368 | | |
369 | | PelBufferOps g_pelBufOP = PelBufferOps(); |
370 | | |
371 | | template<> |
372 | | void AreaBuf<Pel>::addWeightedAvg(const AreaBuf<const Pel> &other1, const AreaBuf<const Pel> &other2, const ClpRng& clpRng, const int8_t bcwIdx) |
373 | 0 | { |
374 | 0 | const int8_t w0 = getBcwWeight(bcwIdx, REF_PIC_LIST_0); |
375 | 0 | const int8_t w1 = getBcwWeight(bcwIdx, REF_PIC_LIST_1); |
376 | 0 | const int8_t log2WeightBase = g_BcwLog2WeightBase; |
377 | 0 | const Pel* src0 = other1.buf; |
378 | 0 | const Pel* src2 = other2.buf; |
379 | 0 | Pel* dest = buf; |
380 | |
|
381 | 0 | const ptrdiff_t src1Stride = other1.stride; |
382 | 0 | const ptrdiff_t src2Stride = other2.stride; |
383 | 0 | const ptrdiff_t destStride = stride; |
384 | 0 | const int clipbd = clpRng.bd; |
385 | 0 | const int shiftNum = std::max<int>( 2, ( IF_INTERNAL_PREC - clipbd ) ) + log2WeightBase; |
386 | 0 | const int offset = ( 1 << ( shiftNum - 1 ) ) + ( IF_INTERNAL_OFFS << log2WeightBase ); |
387 | |
|
388 | 0 | if( ( width & 7 ) == 0 ) |
389 | 0 | { |
390 | 0 | g_pelBufOP.wghtAvg8( src0, src1Stride, src2, src2Stride, dest, destStride, width, height, shiftNum, offset, w0, w1, clpRng ); |
391 | 0 | } |
392 | 0 | else if( ( width & 3 ) == 0 ) |
393 | 0 | { |
394 | 0 | g_pelBufOP.wghtAvg4( src0, src1Stride, src2, src2Stride, dest, destStride, width, height, shiftNum, offset, w0, w1, clpRng ); |
395 | 0 | } |
396 | 0 | else |
397 | 0 | { |
398 | 0 | #define ADD_AVG_OP( ADDR ) dest[ADDR] = ClipPel( rightShift( ( src0[ADDR]*w0 + src2[ADDR]*w1 + offset ), shiftNum ), clpRng ) |
399 | 0 | #define ADD_AVG_INC \ |
400 | 0 | src0 += src1Stride; \ |
401 | 0 | src2 += src2Stride; \ |
402 | 0 | dest += destStride; \ |
403 | 0 |
|
404 | 0 | SIZE_AWARE_PER_EL_OP( ADD_AVG_OP, ADD_AVG_INC ); |
405 | |
|
406 | 0 | #undef ADD_AVG_OP |
407 | 0 | #undef ADD_AVG_INC |
408 | 0 | } |
409 | 0 | } |
410 | | |
411 | | template<> |
412 | | void AreaBuf<Pel>::scaleSignal(const int scale, const ClpRng& clpRng) |
413 | 0 | { |
414 | 0 | Pel* dst = buf; |
415 | 0 | Pel* src = buf; |
416 | 0 | int sign, absval; |
417 | 0 | int maxAbsclipBD = ( 1 << clpRng.bd ) - 1; |
418 | |
|
419 | 0 | for (unsigned y = 0; y < height; y++) |
420 | 0 | { |
421 | 0 | for (unsigned x = 0; x < width; x++) |
422 | 0 | { |
423 | 0 | src[x] = Clip3<Pel>( -maxAbsclipBD - 1, maxAbsclipBD, src[x] ); |
424 | 0 | sign = src[x] >= 0 ? 1 : -1; |
425 | 0 | absval = sign * src[x]; |
426 | |
|
427 | 0 | int val = sign * ((absval * scale + (1 << (CSCALE_FP_PREC - 1))) >> CSCALE_FP_PREC); |
428 | |
|
429 | 0 | if( sizeof( Pel ) == 2 ) // avoid overflow when storing data |
430 | 0 | { |
431 | 0 | val = Clip3<int>(-32768, 32767, val); |
432 | 0 | } |
433 | 0 | dst[x] = (Pel)val; |
434 | 0 | } |
435 | 0 | dst += stride; |
436 | 0 | src += stride; |
437 | 0 | } |
438 | 0 | } |
439 | | |
440 | | template<> |
441 | | void AreaBuf<Pel>::addAvg( const AreaBuf<const Pel> &other1, const AreaBuf<const Pel> &other2, const ClpRng& clpRng) |
442 | 0 | { |
443 | 0 | const Pel* src0 = other1.buf; |
444 | 0 | const Pel* src2 = other2.buf; |
445 | 0 | Pel* dest = buf; |
446 | |
|
447 | 0 | const ptrdiff_t src1Stride = other1.stride; |
448 | 0 | const ptrdiff_t src2Stride = other2.stride; |
449 | 0 | const ptrdiff_t destStride = stride; |
450 | 0 | const int clipbd = clpRng.bd; |
451 | 0 | const int shiftNum = std::max<int>(2, (IF_INTERNAL_PREC - clipbd)) + 1; |
452 | 0 | const int offset = (1 << (shiftNum - 1)) + 2 * IF_INTERNAL_OFFS; |
453 | |
|
454 | 0 | if( ( width & 15 ) == 0 ) |
455 | 0 | { |
456 | 0 | g_pelBufOP.addAvg16( src0, src1Stride, src2, src2Stride, dest, destStride, width, height, shiftNum, offset, clpRng ); |
457 | 0 | } |
458 | 0 | else if( ( width & 7 ) == 0 ) |
459 | 0 | { |
460 | 0 | g_pelBufOP.addAvg8( src0, src1Stride, src2, src2Stride, dest, destStride, width, height, shiftNum, offset, clpRng ); |
461 | 0 | } |
462 | 0 | else if( ( width & 3 ) == 0 ) |
463 | 0 | { |
464 | 0 | g_pelBufOP.addAvg4( src0, src1Stride, src2, src2Stride, dest, destStride, width, height, shiftNum, offset, clpRng ); |
465 | 0 | } |
466 | 0 | else |
467 | 0 | { |
468 | 0 | #define ADD_AVG_OP( ADDR ) dest[ADDR] = ClipPel( rightShift( ( src0[ADDR] + src2[ADDR] + offset ), shiftNum ), clpRng ) |
469 | 0 | #define ADD_AVG_INC \ |
470 | 0 | src0 += src1Stride; \ |
471 | 0 | src2 += src2Stride; \ |
472 | 0 | dest += destStride; \ |
473 | 0 |
|
474 | 0 | SIZE_AWARE_PER_EL_OP( ADD_AVG_OP, ADD_AVG_INC ); |
475 | |
|
476 | 0 | #undef ADD_AVG_OP |
477 | 0 | #undef ADD_AVG_INC |
478 | 0 | } |
479 | 0 | } |
480 | | |
481 | | template<> |
482 | | void AreaBuf<Pel>::reconstruct( const AreaBuf<const Pel> &pred, const AreaBuf<const Pel> &resi, const ClpRng& clpRng ) |
483 | 0 | { |
484 | 0 | const Pel* src1 = pred.buf; |
485 | 0 | const Pel* src2 = resi.buf; |
486 | 0 | Pel* dest = buf; |
487 | |
|
488 | 0 | const ptrdiff_t src1Stride = pred.stride; |
489 | 0 | const ptrdiff_t src2Stride = resi.stride; |
490 | 0 | const ptrdiff_t destStride = stride; |
491 | |
|
492 | 0 | if( ( width & 7 ) == 0 ) |
493 | 0 | { |
494 | 0 | g_pelBufOP.reco8( src1, src1Stride, src2, src2Stride, dest, destStride, width, height, clpRng ); |
495 | 0 | } |
496 | 0 | else if( ( width & 3 ) == 0 ) |
497 | 0 | { |
498 | 0 | g_pelBufOP.reco4( src1, src1Stride, src2, src2Stride, dest, destStride, width, height, clpRng ); |
499 | 0 | } |
500 | 0 | else |
501 | 0 | { |
502 | 0 | #define RECO_OP( ADDR ) dest[ADDR] = ClipPel( src1[ADDR] + src2[ADDR], clpRng ) |
503 | 0 | #define RECO_INC \ |
504 | 0 | src1 += src1Stride; \ |
505 | 0 | src2 += src2Stride; \ |
506 | 0 | dest += destStride; \ |
507 | 0 |
|
508 | 0 | SIZE_AWARE_PER_EL_OP( RECO_OP, RECO_INC ); |
509 | |
|
510 | 0 | #undef RECO_OP |
511 | 0 | #undef RECO_INC |
512 | 0 | } |
513 | 0 | } |
514 | | |
515 | | template<> |
516 | | void AreaBuf<Pel>::linearTransform( const int scale, const int shift, const int offset, bool bClip, const ClpRng& clpRng ) |
517 | 0 | { |
518 | 0 | const Pel* src = buf; |
519 | 0 | Pel* dst = buf; |
520 | |
|
521 | 0 | if( width == 1 ) |
522 | 0 | { |
523 | 0 | THROW_FATAL( "Blocks of width = 1 not supported" ); |
524 | 0 | } |
525 | 0 | else if( ( width & 7 ) == 0 ) |
526 | 0 | { |
527 | 0 | g_pelBufOP.linTf8( src, stride, dst, stride, width, height, scale, shift, offset, clpRng, bClip ); |
528 | 0 | } |
529 | 0 | else if( ( width & 3 ) == 0 ) |
530 | 0 | { |
531 | 0 | g_pelBufOP.linTf4( src, stride, dst, stride, width, height, scale, shift, offset, clpRng, bClip ); |
532 | 0 | } |
533 | 0 | else |
534 | 0 | { |
535 | 0 | #define LINTF_OP( ADDR ) dst[ADDR] = ( Pel ) bClip ? ClipPel( rightShift( scale * src[ADDR], shift ) + offset, clpRng ) : ( rightShift( scale * src[ADDR], shift ) + offset ) |
536 | 0 | #define LINTF_INC \ |
537 | 0 | src += stride; \ |
538 | 0 | dst += stride; \ |
539 | 0 |
|
540 | 0 | SIZE_AWARE_PER_EL_OP( LINTF_OP, LINTF_INC ); |
541 | |
|
542 | 0 | #undef RECO_OP |
543 | 0 | #undef RECO_INC |
544 | 0 | } |
545 | 0 | } |
546 | | |
547 | | #if ENABLE_SIMD_OPT_BUFFER && defined(TARGET_SIMD_X86) |
548 | | template<> |
549 | | void AreaBuf<Pel>::transposedFrom( const AreaBuf<const Pel> &other ) |
550 | 0 | { |
551 | 0 | CHECK( width != other.height || height != other.width, "Incompatible size" ); |
552 | |
|
553 | 0 | if( ( ( width | height ) & 7 ) == 0 ) |
554 | 0 | { |
555 | 0 | const Pel* src = other.buf; |
556 | |
|
557 | 0 | for( unsigned y = 0; y < other.height; y += 8 ) |
558 | 0 | { |
559 | 0 | Pel* dst = buf + y; |
560 | |
|
561 | 0 | for( unsigned x = 0; x < other.width; x += 8 ) |
562 | 0 | { |
563 | 0 | g_pelBufOP.transpose8x8( &src[x], other.stride, dst, stride ); |
564 | |
|
565 | 0 | dst += 8 * stride; |
566 | 0 | } |
567 | |
|
568 | 0 | src += 8 * other.stride; |
569 | 0 | } |
570 | 0 | } |
571 | 0 | else if( ( ( width | height ) & 3 ) == 0 ) |
572 | 0 | { |
573 | 0 | const Pel* src = other.buf; |
574 | |
|
575 | 0 | for( unsigned y = 0; y < other.height; y += 4 ) |
576 | 0 | { |
577 | 0 | Pel* dst = buf + y; |
578 | |
|
579 | 0 | for( unsigned x = 0; x < other.width; x += 4 ) |
580 | 0 | { |
581 | 0 | g_pelBufOP.transpose4x4( &src[x], other.stride, dst, stride ); |
582 | |
|
583 | 0 | dst += 4 * stride; |
584 | 0 | } |
585 | |
|
586 | 0 | src += 4 * other.stride; |
587 | 0 | } |
588 | 0 | } |
589 | 0 | else |
590 | 0 | { |
591 | 0 | Pel* dst = buf; |
592 | 0 | const Pel* src = other.buf; |
593 | 0 | width = other.height; |
594 | 0 | height = other.width; |
595 | 0 | stride = stride < width ? width : stride; |
596 | |
|
597 | 0 | for( unsigned y = 0; y < other.height; y++ ) |
598 | 0 | { |
599 | 0 | for( unsigned x = 0; x < other.width; x++ ) |
600 | 0 | { |
601 | 0 | dst[y + x*stride] = src[x + y * other.stride]; |
602 | 0 | } |
603 | 0 | } |
604 | 0 | } |
605 | 0 | } |
606 | | #endif |
607 | | |
608 | | template<> |
609 | | void AreaBuf<MotionInfo>::fill( const MotionInfo& val ) |
610 | 0 | { |
611 | 0 | if( width == stride ) |
612 | 0 | { |
613 | 0 | std::fill_n( buf, width * height, val ); |
614 | 0 | } |
615 | 0 | else |
616 | 0 | { |
617 | 0 | MotionInfo* dst = buf; |
618 | |
|
619 | 0 | for( int y = 0; y < height; y++, dst += stride ) |
620 | 0 | { |
621 | 0 | std::fill_n( dst, width, val ); |
622 | 0 | } |
623 | 0 | } |
624 | 0 | } |
625 | | |
626 | | PelStorage::PelStorage() |
627 | 0 | { |
628 | 0 | for( uint32_t i = 0; i < MAX_NUM_COMPONENT; i++ ) |
629 | 0 | { |
630 | 0 | m_origin[i] = nullptr; |
631 | 0 | m_allocator[i] = nullptr; |
632 | 0 | } |
633 | 0 | } |
634 | | |
635 | | PelStorage::~PelStorage() |
636 | 0 | { |
637 | 0 | destroy(); |
638 | 0 | } |
639 | | |
640 | | void PelStorage::create( const UnitArea &_UnitArea ) |
641 | 0 | { |
642 | 0 | create( _UnitArea.chromaFormat, _UnitArea.blocks[0] ); |
643 | 0 | } |
644 | | |
645 | | void PelStorage::create( const ChromaFormat _chromaFormat, const Size& _size, const unsigned _maxCUSize, const unsigned _margin, const unsigned _alignmentByte, const bool _scaleChromaMargin, const UserAllocator* userAlloc ) |
646 | 0 | { |
647 | 0 | CHECK( !bufs.empty(), "Trying to re-create an already initialized buffer" ); |
648 | |
|
649 | 0 | chromaFormat = _chromaFormat; |
650 | |
|
651 | 0 | const uint32_t numCh = getNumberValidComponents( _chromaFormat ); |
652 | |
|
653 | 0 | unsigned extHeight = _size.height; |
654 | 0 | unsigned extWidth = _size.width; |
655 | |
|
656 | 0 | if( _maxCUSize ) |
657 | 0 | { |
658 | 0 | extHeight = ( ( _size.height + _maxCUSize - 1 ) / _maxCUSize ) * _maxCUSize; |
659 | 0 | extWidth = ( ( _size.width + _maxCUSize - 1 ) / _maxCUSize ) * _maxCUSize; |
660 | 0 | } |
661 | |
|
662 | 0 | const unsigned _alignment = _alignmentByte / sizeof( Pel ); |
663 | |
|
664 | 0 | for( uint32_t i = 0; i < numCh; i++ ) |
665 | 0 | { |
666 | 0 | const ComponentID compID = ComponentID( i ); |
667 | 0 | const unsigned scaleX = getComponentScaleX( compID, _chromaFormat ); |
668 | 0 | const unsigned scaleY = getComponentScaleY( compID, _chromaFormat ); |
669 | |
|
670 | 0 | unsigned scaledHeight = extHeight >> scaleY; |
671 | 0 | unsigned scaledWidth = extWidth >> scaleX; |
672 | 0 | unsigned ymargin = _margin >> (_scaleChromaMargin?scaleY:0); |
673 | 0 | unsigned xmargin = _margin >> (_scaleChromaMargin?scaleX:0); |
674 | |
|
675 | 0 | #if 1 |
676 | 0 | if( _alignment && xmargin ) |
677 | 0 | { |
678 | 0 | xmargin = ( ( xmargin + _alignment - 1 ) / _alignment ) * _alignment; |
679 | 0 | } |
680 | |
|
681 | 0 | #endif |
682 | 0 | SizeType totalWidth = scaledWidth + 2 * xmargin; |
683 | 0 | SizeType totalHeight = scaledHeight +2 * ymargin; |
684 | |
|
685 | 0 | if( _alignment ) |
686 | 0 | { |
687 | | // make sure buffer lines are align |
688 | 0 | CHECK( _alignmentByte != MEMORY_ALIGN_DEF_SIZE, "Unsupported alignment" ); |
689 | 0 | totalWidth = ( ( totalWidth + _alignment - 1 ) / _alignment ) * _alignment; |
690 | 0 | } |
691 | | |
692 | 0 | #if ENABLE_SIMD_OPT_INTER |
693 | 0 | uint32_t area = totalWidth * totalHeight + 1; // +1 for the extra Pel overread in prefetchPad_SSE, in case reading from the very bottom right of the picture |
694 | | #else |
695 | | uint32_t area = totalWidth * totalHeight; |
696 | | #endif |
697 | 0 | CHECK( !area, "Trying to create a buffer with zero area" ); |
698 | |
|
699 | 0 | m_origSi[i] = Size{ totalWidth, totalHeight }; |
700 | 0 | if( userAlloc && userAlloc->enabled ) |
701 | 0 | { |
702 | 0 | m_origin[i] = ( Pel* ) userAlloc->create( userAlloc->opaque, (vvdecComponentType)i, sizeof(Pel)*area, MEMORY_ALIGN_DEF_SIZE, &m_allocator[i] ); |
703 | 0 | CHECK( m_origin[i] == nullptr, "external allocator callback failed (returned NULL)." ); |
704 | 0 | m_externAllocator = true; |
705 | 0 | m_userAlloc = userAlloc; |
706 | 0 | } |
707 | 0 | else |
708 | 0 | { |
709 | 0 | m_origin[i] = ( Pel* ) xMalloc( Pel, area ); |
710 | 0 | } |
711 | 0 | Pel* topLeft = m_origin[i] + totalWidth * ymargin + xmargin; |
712 | 0 | bufs.push_back( PelBuf( topLeft, totalWidth, _size.width >> scaleX, _size.height >> scaleY ) ); |
713 | 0 | } |
714 | 0 | } |
715 | | |
716 | | void PelStorage::createFromBuf( PelUnitBuf buf ) |
717 | 0 | { |
718 | 0 | chromaFormat = buf.chromaFormat; |
719 | |
|
720 | 0 | const uint32_t numCh = getNumberValidComponents( chromaFormat ); |
721 | |
|
722 | 0 | bufs.resize(numCh); |
723 | |
|
724 | 0 | for( uint32_t i = 0; i < numCh; i++ ) |
725 | 0 | { |
726 | 0 | PelBuf cPelBuf = buf.get( ComponentID( i ) ); |
727 | 0 | bufs[i] = PelBuf( cPelBuf.bufAt( 0, 0 ), cPelBuf.stride, cPelBuf.width, cPelBuf.height ); |
728 | 0 | } |
729 | 0 | } |
730 | | |
731 | | void PelStorage::swap( PelStorage& other ) |
732 | 0 | { |
733 | 0 | const uint32_t numCh = getNumberValidComponents( chromaFormat ); |
734 | |
|
735 | 0 | for( uint32_t i = 0; i < numCh; i++ ) |
736 | 0 | { |
737 | | // check this otherwise it would turn out to get very weird |
738 | 0 | CHECK( chromaFormat != other.chromaFormat , "Incompatible formats" ); |
739 | 0 | CHECK( get( ComponentID( i ) ) != other.get( ComponentID( i ) ) , "Incompatible formats" ); |
740 | 0 | CHECK( get( ComponentID( i ) ).stride != other.get( ComponentID( i ) ).stride, "Incompatible formats" ); |
741 | |
|
742 | 0 | std::swap( bufs[i].buf, other.bufs[i].buf ); |
743 | 0 | std::swap( bufs[i].stride, other.bufs[i].stride ); |
744 | 0 | std::swap( m_origin[i], other.m_origin[i] ); |
745 | 0 | std::swap( m_allocator[i], other.m_allocator[i] ); |
746 | 0 | } |
747 | 0 | std::swap( m_externAllocator, other.m_externAllocator ); |
748 | 0 | std::swap( m_userAlloc, other.m_userAlloc ); |
749 | 0 | } |
750 | | |
751 | | void PelStorage::destroy() |
752 | 0 | { |
753 | 0 | chromaFormat = NUM_CHROMA_FORMAT; |
754 | 0 | for( uint32_t i = 0; i < MAX_NUM_COMPONENT; i++ ) |
755 | 0 | { |
756 | 0 | if( m_origin[i] ) |
757 | 0 | { |
758 | 0 | if ( !m_externAllocator ) |
759 | 0 | { |
760 | 0 | xFree( m_origin[i] ); |
761 | 0 | } |
762 | 0 | else if( m_allocator[i]) |
763 | 0 | { |
764 | 0 | CHECK( m_userAlloc->unref == nullptr, "vvdecUnrefBufferCallback not valid, cannot unref picture buffer" ) |
765 | 0 | m_userAlloc->unref( m_userAlloc->opaque, m_allocator[i] ); |
766 | 0 | } |
767 | 0 | m_origin[i] = nullptr; |
768 | 0 | } |
769 | 0 | } |
770 | 0 | bufs.clear(); |
771 | 0 | } |
772 | | |
773 | | PelBuf PelStorage::getBuf( const ComponentID CompID ) |
774 | 0 | { |
775 | 0 | return bufs[CompID]; |
776 | 0 | } |
777 | | |
778 | | const CPelBuf PelStorage::getBuf( const ComponentID CompID ) const |
779 | 0 | { |
780 | 0 | return bufs[CompID]; |
781 | 0 | } |
782 | | |
783 | | PelBuf PelStorage::getBuf( const CompArea &blk ) |
784 | 0 | { |
785 | 0 | const PelBuf& r = bufs[blk.compID()]; |
786 | |
|
787 | 0 | CHECKD( rsAddr( blk.bottomRight(), r.stride ) >= ( ( r.height - 1 ) * r.stride + r.width ), "Trying to access a buf outside of bound!" ); |
788 | |
|
789 | 0 | return PelBuf( r.buf + rsAddr( blk, r.stride ), r.stride, blk ); |
790 | 0 | } |
791 | | |
792 | | const CPelBuf PelStorage::getBuf( const CompArea &blk ) const |
793 | 0 | { |
794 | 0 | const PelBuf& r = bufs[blk.compID()]; |
795 | 0 | return CPelBuf( r.buf + rsAddr( blk, r.stride ), r.stride, blk ); |
796 | 0 | } |
797 | | |
798 | | PelUnitBuf PelStorage::getBuf( const UnitArea &unit ) |
799 | 0 | { |
800 | 0 | return ( chromaFormat == CHROMA_400 ) ? PelUnitBuf( chromaFormat, getBuf( unit.Y() ) ) : PelUnitBuf( chromaFormat, getBuf( unit.Y() ), getBuf( unit.Cb() ), getBuf( unit.Cr() ) ); |
801 | 0 | } |
802 | | |
803 | | const CPelUnitBuf PelStorage::getBuf( const UnitArea &unit ) const |
804 | 0 | { |
805 | 0 | return ( chromaFormat == CHROMA_400 ) ? CPelUnitBuf( chromaFormat, getBuf( unit.Y() ) ) : CPelUnitBuf( chromaFormat, getBuf( unit.Y() ), getBuf( unit.Cb() ), getBuf( unit.Cr() ) ); |
806 | 0 | } |
807 | | |
808 | | template<> |
809 | | void UnitBuf<Pel>::colorSpaceConvert( const UnitBuf<Pel> &other, const ClpRng& clpRng ) |
810 | 0 | { |
811 | 0 | const Pel* pOrg0 = bufs[COMPONENT_Y ].buf; |
812 | 0 | const Pel* pOrg1 = bufs[COMPONENT_Cb].buf; |
813 | 0 | const Pel* pOrg2 = bufs[COMPONENT_Cr].buf; |
814 | 0 | const ptrdiff_t strideOrg = bufs[COMPONENT_Y ].stride; |
815 | |
|
816 | 0 | Pel* pDst0 = other.bufs[COMPONENT_Y ].buf; |
817 | 0 | Pel* pDst1 = other.bufs[COMPONENT_Cb].buf; |
818 | 0 | Pel* pDst2 = other.bufs[COMPONENT_Cr].buf; |
819 | 0 | const ptrdiff_t strideDst = other.bufs[COMPONENT_Y ].stride; |
820 | |
|
821 | 0 | int width = bufs[COMPONENT_Y].width; |
822 | 0 | int height = bufs[COMPONENT_Y].height; |
823 | 0 | int maxAbsclipBD = (1 << (clpRng.bd + 1)) - 1; |
824 | 0 | int y0, cg, co; |
825 | |
|
826 | 0 | CHECKD( bufs[COMPONENT_Y].stride != bufs[COMPONENT_Cb].stride || bufs[COMPONENT_Y].stride != bufs[COMPONENT_Cr].stride, "unequal stride for 444 content" ); |
827 | 0 | CHECKD( other.bufs[COMPONENT_Y].stride != other.bufs[COMPONENT_Cb].stride || other.bufs[COMPONENT_Y].stride != other.bufs[COMPONENT_Cr].stride, "unequal stride for 444 content" ); |
828 | 0 | CHECKD( bufs[COMPONENT_Y].width != other.bufs[COMPONENT_Y].width || bufs[COMPONENT_Y].height != other.bufs[COMPONENT_Y].height, "unequal block size" ); |
829 | |
|
830 | 0 | for( int y = 0; y < height; y++ ) |
831 | 0 | { |
832 | 0 | for( int x = 0; x < width; x++ ) |
833 | 0 | { |
834 | 0 | y0 = pOrg0[x]; |
835 | 0 | cg = pOrg1[x]; |
836 | 0 | co = pOrg2[x]; |
837 | |
|
838 | 0 | y0 = Clip3((-maxAbsclipBD - 1), maxAbsclipBD, y0); |
839 | 0 | cg = Clip3((-maxAbsclipBD - 1), maxAbsclipBD, cg); |
840 | 0 | co = Clip3((-maxAbsclipBD - 1), maxAbsclipBD, co); |
841 | |
|
842 | 0 | int t = y0 - (cg >> 1); |
843 | 0 | pDst0[x] = cg + t; |
844 | 0 | pDst1[x] = t - (co >> 1); |
845 | 0 | pDst2[x] = co + pDst1[x]; |
846 | 0 | } |
847 | |
|
848 | 0 | pOrg0 += strideOrg; |
849 | 0 | pOrg1 += strideOrg; |
850 | 0 | pOrg2 += strideOrg; |
851 | 0 | pDst0 += strideDst; |
852 | 0 | pDst1 += strideDst; |
853 | 0 | pDst2 += strideDst; |
854 | 0 | } |
855 | 0 | } |
856 | | |
857 | | template void UnitBuf<Pel>::writeToFile( std::string filename ) const; |
858 | | |
859 | | template<typename T> |
860 | | void UnitBuf<T>::writeToFile( std::string filename ) const |
861 | 0 | { |
862 | 0 | FILE* f = fopen( filename.c_str(), "w" ); |
863 | 0 | CHECK_FATAL( f == nullptr, "writeToFile() cannot open file for writing" ) |
864 | | |
865 | 0 | for( auto& b: bufs ) |
866 | 0 | { |
867 | 0 | for( unsigned y = 0; y < b.height; y++ ) |
868 | 0 | { |
869 | 0 | fwrite( b.bufAt( 0, y ), sizeof( T ), b.width, f ); |
870 | 0 | } |
871 | 0 | } |
872 | |
|
873 | 0 | fclose( f ); |
874 | 0 | } |
875 | | |
876 | | } |