/src/ffmpeg/libavcodec/ivi_dsp.c
Line | Count | Source |
1 | | /* |
2 | | * DSP functions for Indeo Video Interactive codecs (Indeo4 and Indeo5) |
3 | | * |
4 | | * Copyright (c) 2009-2011 Maxim Poliakovski |
5 | | * |
6 | | * This file is part of FFmpeg. |
7 | | * |
8 | | * FFmpeg is free software; you can redistribute it and/or |
9 | | * modify it under the terms of the GNU Lesser General Public |
10 | | * License as published by the Free Software Foundation; either |
11 | | * version 2.1 of the License, or (at your option) any later version. |
12 | | * |
13 | | * FFmpeg is distributed in the hope that it will be useful, |
14 | | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
15 | | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
16 | | * Lesser General Public License for more details. |
17 | | * |
18 | | * You should have received a copy of the GNU Lesser General Public |
19 | | * License along with FFmpeg; if not, write to the Free Software |
20 | | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
21 | | */ |
22 | | |
23 | | /** |
24 | | * @file |
25 | | * DSP functions (inverse transforms, motion compensation, wavelet recompositions) |
26 | | * for Indeo Video Interactive codecs. |
27 | | */ |
28 | | |
29 | | #include <string.h> |
30 | | #include "libavutil/common.h" |
31 | | #include "ivi.h" |
32 | | #include "ivi_dsp.h" |
33 | | |
34 | | void ff_ivi_recompose53(const IVIPlaneDesc *plane, uint8_t *dst, |
35 | | const ptrdiff_t dst_pitch) |
36 | 534 | { |
37 | 534 | int x, y, indx; |
38 | 534 | int32_t p0, p1, p2, p3, tmp0, tmp1, tmp2; |
39 | 534 | int32_t b0_1, b0_2, b1_1, b1_2, b1_3, b2_1, b2_2, b2_3, b2_4, b2_5, b2_6; |
40 | 534 | int32_t b3_1, b3_2, b3_3, b3_4, b3_5, b3_6, b3_7, b3_8, b3_9; |
41 | 534 | ptrdiff_t pitch, back_pitch; |
42 | 534 | const short *b0_ptr, *b1_ptr, *b2_ptr, *b3_ptr; |
43 | 534 | const int num_bands = 4; |
44 | | |
45 | | /* all bands should have the same pitch */ |
46 | 534 | pitch = plane->bands[0].pitch; |
47 | | |
48 | | /* pixels at the position "y-1" will be set to pixels at the "y" for the 1st iteration */ |
49 | 534 | back_pitch = 0; |
50 | | |
51 | | /* get pointers to the wavelet bands */ |
52 | 534 | b0_ptr = plane->bands[0].buf; |
53 | 534 | b1_ptr = plane->bands[1].buf; |
54 | 534 | b2_ptr = plane->bands[2].buf; |
55 | 534 | b3_ptr = plane->bands[3].buf; |
56 | | |
57 | 73.8k | for (y = 0; y < plane->height; y += 2) { |
58 | | |
59 | 73.3k | if (y+2 >= plane->height) |
60 | 534 | pitch= 0; |
61 | | /* load storage variables with values */ |
62 | 73.3k | if (num_bands > 0) { |
63 | 73.3k | b0_1 = b0_ptr[0]; |
64 | 73.3k | b0_2 = b0_ptr[pitch]; |
65 | 73.3k | } |
66 | | |
67 | 73.3k | if (num_bands > 1) { |
68 | 73.3k | b1_1 = b1_ptr[back_pitch]; |
69 | 73.3k | b1_2 = b1_ptr[0]; |
70 | 73.3k | b1_3 = b1_1 - b1_2*6 + b1_ptr[pitch]; |
71 | 73.3k | } |
72 | | |
73 | 73.3k | if (num_bands > 2) { |
74 | 73.3k | b2_2 = b2_ptr[0]; // b2[x, y ] |
75 | 73.3k | b2_3 = b2_2; // b2[x+1,y ] = b2[x,y] |
76 | 73.3k | b2_5 = b2_ptr[pitch]; // b2[x ,y+1] |
77 | 73.3k | b2_6 = b2_5; // b2[x+1,y+1] = b2[x,y+1] |
78 | 73.3k | } |
79 | | |
80 | 73.3k | if (num_bands > 3) { |
81 | 73.3k | b3_2 = b3_ptr[back_pitch]; // b3[x ,y-1] |
82 | 73.3k | b3_3 = b3_2; // b3[x+1,y-1] = b3[x ,y-1] |
83 | 73.3k | b3_5 = b3_ptr[0]; // b3[x ,y ] |
84 | 73.3k | b3_6 = b3_5; // b3[x+1,y ] = b3[x ,y ] |
85 | 73.3k | b3_8 = b3_2 - b3_5*6 + b3_ptr[pitch]; |
86 | 73.3k | b3_9 = b3_8; |
87 | 73.3k | } |
88 | | |
89 | 30.2M | for (x = 0, indx = 0; x < plane->width; x+=2, indx++) { |
90 | 30.1M | if (x+2 >= plane->width) { |
91 | 73.3k | b0_ptr --; |
92 | 73.3k | b1_ptr --; |
93 | 73.3k | b2_ptr --; |
94 | 73.3k | b3_ptr --; |
95 | 73.3k | } |
96 | | |
97 | | /* some values calculated in the previous iterations can */ |
98 | | /* be reused in the next ones, so do appropriate copying */ |
99 | 30.1M | b2_1 = b2_2; // b2[x-1,y ] = b2[x, y ] |
100 | 30.1M | b2_2 = b2_3; // b2[x ,y ] = b2[x+1,y ] |
101 | 30.1M | b2_4 = b2_5; // b2[x-1,y+1] = b2[x ,y+1] |
102 | 30.1M | b2_5 = b2_6; // b2[x ,y+1] = b2[x+1,y+1] |
103 | 30.1M | b3_1 = b3_2; // b3[x-1,y-1] = b3[x ,y-1] |
104 | 30.1M | b3_2 = b3_3; // b3[x ,y-1] = b3[x+1,y-1] |
105 | 30.1M | b3_4 = b3_5; // b3[x-1,y ] = b3[x ,y ] |
106 | 30.1M | b3_5 = b3_6; // b3[x ,y ] = b3[x+1,y ] |
107 | 30.1M | b3_7 = b3_8; // vert_HPF(x-1) |
108 | 30.1M | b3_8 = b3_9; // vert_HPF(x ) |
109 | | |
110 | 30.1M | p0 = p1 = p2 = p3 = 0; |
111 | | |
112 | | /* process the LL-band by applying LPF both vertically and horizontally */ |
113 | 30.1M | if (num_bands > 0) { |
114 | 30.1M | tmp0 = b0_1; |
115 | 30.1M | tmp2 = b0_2; |
116 | 30.1M | b0_1 = b0_ptr[indx+1]; |
117 | 30.1M | b0_2 = b0_ptr[pitch+indx+1]; |
118 | 30.1M | tmp1 = tmp0 + b0_1; |
119 | | |
120 | 30.1M | p0 = tmp0 * 16; |
121 | 30.1M | p1 = tmp1 * 8; |
122 | 30.1M | p2 = (tmp0 + tmp2) * 8; |
123 | 30.1M | p3 = (tmp1 + tmp2 + b0_2) * 4; |
124 | 30.1M | } |
125 | | |
126 | | /* process the HL-band by applying HPF vertically and LPF horizontally */ |
127 | 30.1M | if (num_bands > 1) { |
128 | 30.1M | tmp0 = b1_2; |
129 | 30.1M | tmp1 = b1_1; |
130 | 30.1M | b1_2 = b1_ptr[indx+1]; |
131 | 30.1M | b1_1 = b1_ptr[back_pitch+indx+1]; |
132 | | |
133 | 30.1M | tmp2 = tmp1 - tmp0*6 + b1_3; |
134 | 30.1M | b1_3 = b1_1 - b1_2*6 + b1_ptr[pitch+indx+1]; |
135 | | |
136 | 30.1M | p0 += (tmp0 + tmp1) * 8; |
137 | 30.1M | p1 += (tmp0 + tmp1 + b1_1 + b1_2) * 4; |
138 | 30.1M | p2 += tmp2 * 4; |
139 | 30.1M | p3 += (tmp2 + b1_3) * 2; |
140 | 30.1M | } |
141 | | |
142 | | /* process the LH-band by applying LPF vertically and HPF horizontally */ |
143 | 30.1M | if (num_bands > 2) { |
144 | 30.1M | b2_3 = b2_ptr[indx+1]; |
145 | 30.1M | b2_6 = b2_ptr[pitch+indx+1]; |
146 | | |
147 | 30.1M | tmp0 = b2_1 + b2_2; |
148 | 30.1M | tmp1 = b2_1 - b2_2*6 + b2_3; |
149 | | |
150 | 30.1M | p0 += tmp0 * 8; |
151 | 30.1M | p1 += tmp1 * 4; |
152 | 30.1M | p2 += (tmp0 + b2_4 + b2_5) * 4; |
153 | 30.1M | p3 += (tmp1 + b2_4 - b2_5*6 + b2_6) * 2; |
154 | 30.1M | } |
155 | | |
156 | | /* process the HH-band by applying HPF both vertically and horizontally */ |
157 | 30.1M | if (num_bands > 3) { |
158 | 30.1M | b3_6 = b3_ptr[indx+1]; // b3[x+1,y ] |
159 | 30.1M | b3_3 = b3_ptr[back_pitch+indx+1]; // b3[x+1,y-1] |
160 | | |
161 | 30.1M | tmp0 = b3_1 + b3_4; |
162 | 30.1M | tmp1 = b3_2 + b3_5; |
163 | 30.1M | tmp2 = b3_3 + b3_6; |
164 | | |
165 | 30.1M | b3_9 = b3_3 - b3_6*6 + b3_ptr[pitch+indx+1]; |
166 | | |
167 | 30.1M | p0 += (tmp0 + tmp1) * 4; |
168 | 30.1M | p1 += (tmp0 - tmp1*6 + tmp2) * 2; |
169 | 30.1M | p2 += (b3_7 + b3_8) * 2; |
170 | 30.1M | p3 += b3_7 - b3_8*6 + b3_9; |
171 | 30.1M | } |
172 | | |
173 | | /* output four pixels */ |
174 | 30.1M | dst[x] = av_clip_uint8((p0 >> 6) + 128); |
175 | 30.1M | dst[x+1] = av_clip_uint8((p1 >> 6) + 128); |
176 | 30.1M | dst[dst_pitch+x] = av_clip_uint8((p2 >> 6) + 128); |
177 | 30.1M | dst[dst_pitch+x+1] = av_clip_uint8((p3 >> 6) + 128); |
178 | 30.1M | }// for x |
179 | | |
180 | 73.3k | dst += dst_pitch << 1; |
181 | | |
182 | 73.3k | back_pitch = -pitch; |
183 | | |
184 | 73.3k | b0_ptr += pitch + 1; |
185 | 73.3k | b1_ptr += pitch + 1; |
186 | 73.3k | b2_ptr += pitch + 1; |
187 | 73.3k | b3_ptr += pitch + 1; |
188 | 73.3k | } |
189 | 534 | } |
190 | | |
191 | | void ff_ivi_recompose_haar(const IVIPlaneDesc *plane, uint8_t *dst, |
192 | | const ptrdiff_t dst_pitch) |
193 | 0 | { |
194 | 0 | int x, y, indx, b0, b1, b2, b3, p0, p1, p2, p3; |
195 | 0 | const short *b0_ptr, *b1_ptr, *b2_ptr, *b3_ptr; |
196 | 0 | ptrdiff_t pitch; |
197 | | |
198 | | /* all bands should have the same pitch */ |
199 | 0 | pitch = plane->bands[0].pitch; |
200 | | |
201 | | /* get pointers to the wavelet bands */ |
202 | 0 | b0_ptr = plane->bands[0].buf; |
203 | 0 | b1_ptr = plane->bands[1].buf; |
204 | 0 | b2_ptr = plane->bands[2].buf; |
205 | 0 | b3_ptr = plane->bands[3].buf; |
206 | |
|
207 | 0 | for (y = 0; y < plane->height; y += 2) { |
208 | 0 | for (x = 0, indx = 0; x < plane->width; x += 2, indx++) { |
209 | | /* load coefficients */ |
210 | 0 | b0 = b0_ptr[indx]; //should be: b0 = (num_bands > 0) ? b0_ptr[indx] : 0; |
211 | 0 | b1 = b1_ptr[indx]; //should be: b1 = (num_bands > 1) ? b1_ptr[indx] : 0; |
212 | 0 | b2 = b2_ptr[indx]; //should be: b2 = (num_bands > 2) ? b2_ptr[indx] : 0; |
213 | 0 | b3 = b3_ptr[indx]; //should be: b3 = (num_bands > 3) ? b3_ptr[indx] : 0; |
214 | | |
215 | | /* haar wavelet recomposition */ |
216 | 0 | p0 = (b0 + b1 + b2 + b3 + 2) >> 2; |
217 | 0 | p1 = (b0 + b1 - b2 - b3 + 2) >> 2; |
218 | 0 | p2 = (b0 - b1 + b2 - b3 + 2) >> 2; |
219 | 0 | p3 = (b0 - b1 - b2 + b3 + 2) >> 2; |
220 | | |
221 | | /* bias, convert and output four pixels */ |
222 | 0 | dst[x] = av_clip_uint8(p0 + 128); |
223 | 0 | dst[x + 1] = av_clip_uint8(p1 + 128); |
224 | 0 | dst[dst_pitch + x] = av_clip_uint8(p2 + 128); |
225 | 0 | dst[dst_pitch + x + 1] = av_clip_uint8(p3 + 128); |
226 | 0 | }// for x |
227 | |
|
228 | 0 | dst += dst_pitch << 1; |
229 | |
|
230 | 0 | b0_ptr += pitch; |
231 | 0 | b1_ptr += pitch; |
232 | 0 | b2_ptr += pitch; |
233 | 0 | b3_ptr += pitch; |
234 | 0 | }// for y |
235 | 0 | } |
236 | | |
237 | | /** butterfly operation for the inverse Haar transform */ |
238 | | #define IVI_HAAR_BFLY(s1, s2, o1, o2, t) \ |
239 | 946k | t = ((s1) - (s2)) >> 1;\ |
240 | 946k | o1 = ((s1) + (s2)) >> 1;\ |
241 | 946k | o2 = (t);\ |
242 | | |
243 | | /** inverse 8-point Haar transform */ |
244 | | #define INV_HAAR8(s1, s5, s3, s7, s2, s4, s6, s8,\ |
245 | | d1, d2, d3, d4, d5, d6, d7, d8,\ |
246 | 128k | t0, t1, t2, t3, t4, t5, t6, t7, t8) {\ |
247 | 128k | t1 = (s1) * 2; t5 = (s5) * 2;\ |
248 | 128k | IVI_HAAR_BFLY(t1, t5, t1, t5, t0); IVI_HAAR_BFLY(t1, s3, t1, t3, t0);\ |
249 | 128k | IVI_HAAR_BFLY(t5, s7, t5, t7, t0); IVI_HAAR_BFLY(t1, s2, t1, t2, t0);\ |
250 | 128k | IVI_HAAR_BFLY(t3, s4, t3, t4, t0); IVI_HAAR_BFLY(t5, s6, t5, t6, t0);\ |
251 | 128k | IVI_HAAR_BFLY(t7, s8, t7, t8, t0);\ |
252 | 128k | d1 = COMPENSATE(t1);\ |
253 | 128k | d2 = COMPENSATE(t2);\ |
254 | 128k | d3 = COMPENSATE(t3);\ |
255 | 128k | d4 = COMPENSATE(t4);\ |
256 | 128k | d5 = COMPENSATE(t5);\ |
257 | 128k | d6 = COMPENSATE(t6);\ |
258 | 128k | d7 = COMPENSATE(t7);\ |
259 | 128k | d8 = COMPENSATE(t8); } |
260 | | |
261 | | /** inverse 4-point Haar transform */ |
262 | 16.4k | #define INV_HAAR4(s1, s3, s5, s7, d1, d2, d3, d4, t0, t1, t2, t3, t4) {\ |
263 | 16.4k | IVI_HAAR_BFLY(s1, s3, t0, t1, t4);\ |
264 | 16.4k | IVI_HAAR_BFLY(t0, s5, t2, t3, t4);\ |
265 | 16.4k | d1 = COMPENSATE(t2);\ |
266 | 16.4k | d2 = COMPENSATE(t3);\ |
267 | 16.4k | IVI_HAAR_BFLY(t1, s7, t2, t3, t4);\ |
268 | 16.4k | d3 = COMPENSATE(t2);\ |
269 | 16.4k | d4 = COMPENSATE(t3); } |
270 | | |
271 | | void ff_ivi_inverse_haar_8x8(const int32_t *in, int16_t *out, ptrdiff_t pitch, |
272 | | const uint8_t *flags) |
273 | 5.65k | { |
274 | 5.65k | int i, shift, sp1, sp2, sp3, sp4; |
275 | 5.65k | const int32_t *src; |
276 | 5.65k | int32_t *dst; |
277 | 5.65k | int tmp[64]; |
278 | 5.65k | int t0, t1, t2, t3, t4, t5, t6, t7, t8; |
279 | | |
280 | | /* apply the InvHaar8 to all columns */ |
281 | 134k | #define COMPENSATE(x) (x) |
282 | 5.65k | src = in; |
283 | 5.65k | dst = tmp; |
284 | 50.8k | for (i = 0; i < 8; i++) { |
285 | 45.2k | if (flags[i]) { |
286 | | /* pre-scaling */ |
287 | 16.8k | shift = !(i & 4); |
288 | 16.8k | sp1 = src[ 0] * (1 << shift); |
289 | 16.8k | sp2 = src[ 8] * (1 << shift); |
290 | 16.8k | sp3 = src[16] * (1 << shift); |
291 | 16.8k | sp4 = src[24] * (1 << shift); |
292 | 16.8k | INV_HAAR8( sp1, sp2, sp3, sp4, |
293 | 16.8k | src[32], src[40], src[48], src[56], |
294 | 16.8k | dst[ 0], dst[ 8], dst[16], dst[24], |
295 | 16.8k | dst[32], dst[40], dst[48], dst[56], |
296 | 16.8k | t0, t1, t2, t3, t4, t5, t6, t7, t8); |
297 | 16.8k | } else |
298 | 28.4k | dst[ 0] = dst[ 8] = dst[16] = dst[24] = |
299 | 28.4k | dst[32] = dst[40] = dst[48] = dst[56] = 0; |
300 | | |
301 | 45.2k | src++; |
302 | 45.2k | dst++; |
303 | 45.2k | } |
304 | 5.65k | #undef COMPENSATE |
305 | | |
306 | | /* apply the InvHaar8 to all rows */ |
307 | 345k | #define COMPENSATE(x) (x) |
308 | 5.65k | src = tmp; |
309 | 50.8k | for (i = 0; i < 8; i++) { |
310 | 45.2k | if ( !src[0] && !src[1] && !src[2] && !src[3] |
311 | 3.41k | && !src[4] && !src[5] && !src[6] && !src[7]) { |
312 | 2.07k | memset(out, 0, 8 * sizeof(out[0])); |
313 | 43.1k | } else { |
314 | 43.1k | INV_HAAR8(src[0], src[1], src[2], src[3], |
315 | 43.1k | src[4], src[5], src[6], src[7], |
316 | 43.1k | out[0], out[1], out[2], out[3], |
317 | 43.1k | out[4], out[5], out[6], out[7], |
318 | 43.1k | t0, t1, t2, t3, t4, t5, t6, t7, t8); |
319 | 43.1k | } |
320 | 45.2k | src += 8; |
321 | 45.2k | out += pitch; |
322 | 45.2k | } |
323 | 5.65k | #undef COMPENSATE |
324 | 5.65k | } |
325 | | |
326 | | void ff_ivi_row_haar8(const int32_t *in, int16_t *out, ptrdiff_t pitch, |
327 | | const uint8_t *flags) |
328 | 5.56k | { |
329 | 5.56k | int i; |
330 | 5.56k | int t0, t1, t2, t3, t4, t5, t6, t7, t8; |
331 | | |
332 | | /* apply the InvHaar8 to all rows */ |
333 | 78.0k | #define COMPENSATE(x) (x) |
334 | 50.0k | for (i = 0; i < 8; i++) { |
335 | 44.5k | if ( !in[0] && !in[1] && !in[2] && !in[3] |
336 | 36.4k | && !in[4] && !in[5] && !in[6] && !in[7]) { |
337 | 34.7k | memset(out, 0, 8 * sizeof(out[0])); |
338 | 34.7k | } else { |
339 | 9.75k | INV_HAAR8(in[0], in[1], in[2], in[3], |
340 | 9.75k | in[4], in[5], in[6], in[7], |
341 | 9.75k | out[0], out[1], out[2], out[3], |
342 | 9.75k | out[4], out[5], out[6], out[7], |
343 | 9.75k | t0, t1, t2, t3, t4, t5, t6, t7, t8); |
344 | 9.75k | } |
345 | 44.5k | in += 8; |
346 | 44.5k | out += pitch; |
347 | 44.5k | } |
348 | 5.56k | #undef COMPENSATE |
349 | 5.56k | } |
350 | | |
351 | | void ff_ivi_col_haar8(const int32_t *in, int16_t *out, ptrdiff_t pitch, |
352 | | const uint8_t *flags) |
353 | 55.6k | { |
354 | 55.6k | int i; |
355 | 55.6k | int t0, t1, t2, t3, t4, t5, t6, t7, t8; |
356 | | |
357 | | /* apply the InvHaar8 to all columns */ |
358 | 467k | #define COMPENSATE(x) (x) |
359 | 501k | for (i = 0; i < 8; i++) { |
360 | 445k | if (flags[i]) { |
361 | 58.3k | INV_HAAR8(in[ 0], in[ 8], in[16], in[24], |
362 | 58.3k | in[32], in[40], in[48], in[56], |
363 | 58.3k | out[0 * pitch], out[1 * pitch], |
364 | 58.3k | out[2 * pitch], out[3 * pitch], |
365 | 58.3k | out[4 * pitch], out[5 * pitch], |
366 | 58.3k | out[6 * pitch], out[7 * pitch], |
367 | 58.3k | t0, t1, t2, t3, t4, t5, t6, t7, t8); |
368 | 58.3k | } else |
369 | 387k | out[0 * pitch] = out[1 * pitch] = |
370 | 387k | out[2 * pitch] = out[3 * pitch] = |
371 | 387k | out[4 * pitch] = out[5 * pitch] = |
372 | 387k | out[6 * pitch] = out[7 * pitch] = 0; |
373 | | |
374 | 445k | in++; |
375 | 445k | out++; |
376 | 445k | } |
377 | 55.6k | #undef COMPENSATE |
378 | 55.6k | } |
379 | | |
380 | | void ff_ivi_inverse_haar_4x4(const int32_t *in, int16_t *out, ptrdiff_t pitch, |
381 | | const uint8_t *flags) |
382 | 2.43k | { |
383 | 2.43k | int i, shift, sp1, sp2; |
384 | 2.43k | const int32_t *src; |
385 | 2.43k | int32_t *dst; |
386 | 2.43k | int tmp[16]; |
387 | 2.43k | int t0, t1, t2, t3, t4; |
388 | | |
389 | | /* apply the InvHaar4 to all columns */ |
390 | 10.6k | #define COMPENSATE(x) (x) |
391 | 2.43k | src = in; |
392 | 2.43k | dst = tmp; |
393 | 12.1k | for (i = 0; i < 4; i++) { |
394 | 9.74k | if (flags[i]) { |
395 | | /* pre-scaling */ |
396 | 2.65k | shift = !(i & 2); |
397 | 2.65k | sp1 = src[0] * (1 << shift); |
398 | 2.65k | sp2 = src[4] * (1 << shift); |
399 | 2.65k | INV_HAAR4( sp1, sp2, src[8], src[12], |
400 | 2.65k | dst[0], dst[4], dst[8], dst[12], |
401 | 2.65k | t0, t1, t2, t3, t4); |
402 | 2.65k | } else |
403 | 7.09k | dst[0] = dst[4] = dst[8] = dst[12] = 0; |
404 | | |
405 | 9.74k | src++; |
406 | 9.74k | dst++; |
407 | 9.74k | } |
408 | 2.43k | #undef COMPENSATE |
409 | | |
410 | | /* apply the InvHaar8 to all rows */ |
411 | 32.2k | #define COMPENSATE(x) (x) |
412 | 2.43k | src = tmp; |
413 | 12.1k | for (i = 0; i < 4; i++) { |
414 | 9.74k | if (!src[0] && !src[1] && !src[2] && !src[3]) { |
415 | 1.67k | memset(out, 0, 4 * sizeof(out[0])); |
416 | 8.07k | } else { |
417 | 8.07k | INV_HAAR4(src[0], src[1], src[2], src[3], |
418 | 8.07k | out[0], out[1], out[2], out[3], |
419 | 8.07k | t0, t1, t2, t3, t4); |
420 | 8.07k | } |
421 | 9.74k | src += 4; |
422 | 9.74k | out += pitch; |
423 | 9.74k | } |
424 | 2.43k | #undef COMPENSATE |
425 | 2.43k | } |
426 | | |
427 | | void ff_ivi_row_haar4(const int32_t *in, int16_t *out, ptrdiff_t pitch, |
428 | | const uint8_t *flags) |
429 | 2.08k | { |
430 | 2.08k | int i; |
431 | 2.08k | int t0, t1, t2, t3, t4; |
432 | | |
433 | | /* apply the InvHaar4 to all rows */ |
434 | 14.7k | #define COMPENSATE(x) (x) |
435 | 10.4k | for (i = 0; i < 4; i++) { |
436 | 8.34k | if (!in[0] && !in[1] && !in[2] && !in[3]) { |
437 | 4.65k | memset(out, 0, 4 * sizeof(out[0])); |
438 | 4.65k | } else { |
439 | 3.68k | INV_HAAR4(in[0], in[1], in[2], in[3], |
440 | 3.68k | out[0], out[1], out[2], out[3], |
441 | 3.68k | t0, t1, t2, t3, t4); |
442 | 3.68k | } |
443 | 8.34k | in += 4; |
444 | 8.34k | out += pitch; |
445 | 8.34k | } |
446 | 2.08k | #undef COMPENSATE |
447 | 2.08k | } |
448 | | |
449 | | void ff_ivi_col_haar4(const int32_t *in, int16_t *out, ptrdiff_t pitch, |
450 | | const uint8_t *flags) |
451 | 1.77k | { |
452 | 1.77k | int i; |
453 | 1.77k | int t0, t1, t2, t3, t4; |
454 | | |
455 | | /* apply the InvHaar8 to all columns */ |
456 | 8.33k | #define COMPENSATE(x) (x) |
457 | 8.85k | for (i = 0; i < 4; i++) { |
458 | 7.08k | if (flags[i]) { |
459 | 2.08k | INV_HAAR4(in[0], in[4], in[8], in[12], |
460 | 2.08k | out[0 * pitch], out[1 * pitch], |
461 | 2.08k | out[2 * pitch], out[3 * pitch], |
462 | 2.08k | t0, t1, t2, t3, t4); |
463 | 2.08k | } else |
464 | 4.99k | out[0 * pitch] = out[1 * pitch] = |
465 | 4.99k | out[2 * pitch] = out[3 * pitch] = 0; |
466 | | |
467 | 7.08k | in++; |
468 | 7.08k | out++; |
469 | 7.08k | } |
470 | 1.77k | #undef COMPENSATE |
471 | 1.77k | } |
472 | | |
473 | | void ff_ivi_dc_haar_2d(const int32_t *in, int16_t *out, ptrdiff_t pitch, |
474 | | int blk_size) |
475 | 36.9k | { |
476 | 36.9k | int x, y; |
477 | 36.9k | int16_t dc_coeff; |
478 | | |
479 | 36.9k | dc_coeff = (*in + 0) >> 3; |
480 | | |
481 | 327k | for (y = 0; y < blk_size; out += pitch, y++) { |
482 | 2.58M | for (x = 0; x < blk_size; x++) |
483 | 2.29M | out[x] = dc_coeff; |
484 | 290k | } |
485 | 36.9k | } |
486 | | |
487 | | /** butterfly operation for the inverse slant transform */ |
488 | | #define IVI_SLANT_BFLY(s1, s2, o1, o2, t) \ |
489 | 2.21M | t = (s1) - (s2);\ |
490 | 2.21M | o1 = (s1) + (s2);\ |
491 | 2.21M | o2 = (t);\ |
492 | | |
493 | | /** This is a reflection a,b = 1/2, 5/4 for the inverse slant transform */ |
494 | | #define IVI_IREFLECT(s1, s2, o1, o2, t) \ |
495 | 478k | t = (((s1) + (s2)*2 + 2) >> 2) + (s1);\ |
496 | 478k | o2 = (((s1)*2 - (s2) + 2) >> 2) - (s2);\ |
497 | 478k | o1 = (t);\ |
498 | | |
499 | | /** This is a reflection a,b = 1/2, 7/8 for the inverse slant transform */ |
500 | | #define IVI_SLANT_PART4(s1, s2, o1, o2, t) \ |
501 | 194k | t = (s2) + (((s1)*4 - (s2) + 4) >> 3);\ |
502 | 194k | o2 = (s1) + ((-(s1) - (s2)*4 + 4) >> 3);\ |
503 | 194k | o1 = (t);\ |
504 | | |
505 | | /** inverse slant8 transform */ |
506 | | #define IVI_INV_SLANT8(s1, s4, s8, s5, s2, s6, s3, s7,\ |
507 | | d1, d2, d3, d4, d5, d6, d7, d8,\ |
508 | 194k | t0, t1, t2, t3, t4, t5, t6, t7, t8) {\ |
509 | 194k | IVI_SLANT_PART4(s4, s5, t4, t5, t0);\ |
510 | 194k | \ |
511 | 194k | IVI_SLANT_BFLY(s1, t5, t1, t5, t0); IVI_SLANT_BFLY(s2, s6, t2, t6, t0);\ |
512 | 194k | IVI_SLANT_BFLY(s7, s3, t7, t3, t0); IVI_SLANT_BFLY(t4, s8, t4, t8, t0);\ |
513 | 194k | \ |
514 | 194k | IVI_SLANT_BFLY(t1, t2, t1, t2, t0); IVI_IREFLECT (t4, t3, t4, t3, t0);\ |
515 | 194k | IVI_SLANT_BFLY(t5, t6, t5, t6, t0); IVI_IREFLECT (t8, t7, t8, t7, t0);\ |
516 | 194k | IVI_SLANT_BFLY(t1, t4, t1, t4, t0); IVI_SLANT_BFLY(t2, t3, t2, t3, t0);\ |
517 | 194k | IVI_SLANT_BFLY(t5, t8, t5, t8, t0); IVI_SLANT_BFLY(t6, t7, t6, t7, t0);\ |
518 | 194k | d1 = COMPENSATE(t1);\ |
519 | 194k | d2 = COMPENSATE(t2);\ |
520 | 194k | d3 = COMPENSATE(t3);\ |
521 | 194k | d4 = COMPENSATE(t4);\ |
522 | 194k | d5 = COMPENSATE(t5);\ |
523 | 194k | d6 = COMPENSATE(t6);\ |
524 | 194k | d7 = COMPENSATE(t7);\ |
525 | 194k | d8 = COMPENSATE(t8);} |
526 | | |
527 | | /** inverse slant4 transform */ |
528 | 89.5k | #define IVI_INV_SLANT4(s1, s4, s2, s3, d1, d2, d3, d4, t0, t1, t2, t3, t4) {\ |
529 | 89.5k | IVI_SLANT_BFLY(s1, s2, t1, t2, t0); IVI_IREFLECT (s4, s3, t4, t3, t0);\ |
530 | 89.5k | \ |
531 | 89.5k | IVI_SLANT_BFLY(t1, t4, t1, t4, t0); IVI_SLANT_BFLY(t2, t3, t2, t3, t0);\ |
532 | 89.5k | d1 = COMPENSATE(t1);\ |
533 | 89.5k | d2 = COMPENSATE(t2);\ |
534 | 89.5k | d3 = COMPENSATE(t3);\ |
535 | 89.5k | d4 = COMPENSATE(t4);} |
536 | | |
537 | | void ff_ivi_inverse_slant_8x8(const int32_t *in, int16_t *out, ptrdiff_t pitch, const uint8_t *flags) |
538 | 14.2k | { |
539 | 14.2k | int i; |
540 | 14.2k | const int32_t *src; |
541 | 14.2k | int32_t *dst; |
542 | 14.2k | int tmp[64]; |
543 | 14.2k | int t0, t1, t2, t3, t4, t5, t6, t7, t8; |
544 | | |
545 | 221k | #define COMPENSATE(x) (x) |
546 | 14.2k | src = in; |
547 | 14.2k | dst = tmp; |
548 | 128k | for (i = 0; i < 8; i++) { |
549 | 113k | if (flags[i]) { |
550 | 27.6k | IVI_INV_SLANT8(src[0], src[8], src[16], src[24], src[32], src[40], src[48], src[56], |
551 | 27.6k | dst[0], dst[8], dst[16], dst[24], dst[32], dst[40], dst[48], dst[56], |
552 | 27.6k | t0, t1, t2, t3, t4, t5, t6, t7, t8); |
553 | 27.6k | } else |
554 | 86.1k | dst[0] = dst[8] = dst[16] = dst[24] = dst[32] = dst[40] = dst[48] = dst[56] = 0; |
555 | | |
556 | 113k | src++; |
557 | 113k | dst++; |
558 | 113k | } |
559 | 14.2k | #undef COMPENSATE |
560 | | |
561 | 900k | #define COMPENSATE(x) (((x) + 1)>>1) |
562 | 14.2k | src = tmp; |
563 | 128k | for (i = 0; i < 8; i++) { |
564 | 113k | if (!src[0] && !src[1] && !src[2] && !src[3] && !src[4] && !src[5] && !src[6] && !src[7]) { |
565 | 1.27k | memset(out, 0, 8*sizeof(out[0])); |
566 | 112k | } else { |
567 | 112k | IVI_INV_SLANT8(src[0], src[1], src[2], src[3], src[4], src[5], src[6], src[7], |
568 | 112k | out[0], out[1], out[2], out[3], out[4], out[5], out[6], out[7], |
569 | 112k | t0, t1, t2, t3, t4, t5, t6, t7, t8); |
570 | 112k | } |
571 | 113k | src += 8; |
572 | 113k | out += pitch; |
573 | 113k | } |
574 | 14.2k | #undef COMPENSATE |
575 | 14.2k | } |
576 | | |
577 | | void ff_ivi_inverse_slant_4x4(const int32_t *in, int16_t *out, ptrdiff_t pitch, const uint8_t *flags) |
578 | 17.1k | { |
579 | 17.1k | int i; |
580 | 17.1k | const int32_t *src; |
581 | 17.1k | int32_t *dst; |
582 | 17.1k | int tmp[16]; |
583 | 17.1k | int t0, t1, t2, t3, t4; |
584 | | |
585 | 74.7k | #define COMPENSATE(x) (x) |
586 | 17.1k | src = in; |
587 | 17.1k | dst = tmp; |
588 | 85.9k | for (i = 0; i < 4; i++) { |
589 | 68.7k | if (flags[i]) { |
590 | 18.6k | IVI_INV_SLANT4(src[0], src[4], src[8], src[12], |
591 | 18.6k | dst[0], dst[4], dst[8], dst[12], |
592 | 18.6k | t0, t1, t2, t3, t4); |
593 | 18.6k | } else |
594 | 50.1k | dst[0] = dst[4] = dst[8] = dst[12] = 0; |
595 | | |
596 | 68.7k | src++; |
597 | 68.7k | dst++; |
598 | 68.7k | } |
599 | 17.1k | #undef COMPENSATE |
600 | | |
601 | 270k | #define COMPENSATE(x) (((x) + 1)>>1) |
602 | 17.1k | src = tmp; |
603 | 85.9k | for (i = 0; i < 4; i++) { |
604 | 68.7k | if (!src[0] && !src[1] && !src[2] && !src[3]) { |
605 | 1.23k | out[0] = out[1] = out[2] = out[3] = 0; |
606 | 67.5k | } else { |
607 | 67.5k | IVI_INV_SLANT4(src[0], src[1], src[2], src[3], |
608 | 67.5k | out[0], out[1], out[2], out[3], |
609 | 67.5k | t0, t1, t2, t3, t4); |
610 | 67.5k | } |
611 | 68.7k | src += 4; |
612 | 68.7k | out += pitch; |
613 | 68.7k | } |
614 | 17.1k | #undef COMPENSATE |
615 | 17.1k | } |
616 | | |
617 | | void ff_ivi_dc_slant_2d(const int32_t *in, int16_t *out, ptrdiff_t pitch, int blk_size) |
618 | 1.62M | { |
619 | 1.62M | int x, y; |
620 | 1.62M | int16_t dc_coeff; |
621 | | |
622 | 1.62M | dc_coeff = (*in + 1) >> 1; |
623 | | |
624 | 14.6M | for (y = 0; y < blk_size; out += pitch, y++) { |
625 | 116M | for (x = 0; x < blk_size; x++) |
626 | 103M | out[x] = dc_coeff; |
627 | 12.9M | } |
628 | 1.62M | } |
629 | | |
630 | | void ff_ivi_row_slant8(const int32_t *in, int16_t *out, ptrdiff_t pitch, const uint8_t *flags) |
631 | 12.3k | { |
632 | 12.3k | int i; |
633 | 12.3k | int t0, t1, t2, t3, t4, t5, t6, t7, t8; |
634 | | |
635 | 400k | #define COMPENSATE(x) (((x) + 1)>>1) |
636 | 111k | for (i = 0; i < 8; i++) { |
637 | 99.1k | if (!in[0] && !in[1] && !in[2] && !in[3] && !in[4] && !in[5] && !in[6] && !in[7]) { |
638 | 49.0k | memset(out, 0, 8*sizeof(out[0])); |
639 | 50.1k | } else { |
640 | 50.1k | IVI_INV_SLANT8( in[0], in[1], in[2], in[3], in[4], in[5], in[6], in[7], |
641 | 50.1k | out[0], out[1], out[2], out[3], out[4], out[5], out[6], out[7], |
642 | 50.1k | t0, t1, t2, t3, t4, t5, t6, t7, t8); |
643 | 50.1k | } |
644 | 99.1k | in += 8; |
645 | 99.1k | out += pitch; |
646 | 99.1k | } |
647 | 12.3k | #undef COMPENSATE |
648 | 12.3k | } |
649 | | |
650 | | void ff_ivi_dc_row_slant(const int32_t *in, int16_t *out, ptrdiff_t pitch, int blk_size) |
651 | 28.3k | { |
652 | 28.3k | int x, y; |
653 | 28.3k | int16_t dc_coeff; |
654 | | |
655 | 28.3k | dc_coeff = (*in + 1) >> 1; |
656 | | |
657 | 252k | for (x = 0; x < blk_size; x++) |
658 | 224k | out[x] = dc_coeff; |
659 | | |
660 | 28.3k | out += pitch; |
661 | | |
662 | 224k | for (y = 1; y < blk_size; out += pitch, y++) { |
663 | 1.75M | for (x = 0; x < blk_size; x++) |
664 | 1.56M | out[x] = 0; |
665 | 195k | } |
666 | 28.3k | } |
667 | | |
668 | | void ff_ivi_col_slant8(const int32_t *in, int16_t *out, ptrdiff_t pitch, const uint8_t *flags) |
669 | 2.58k | { |
670 | 2.58k | int i, row2, row4, row8; |
671 | 2.58k | int t0, t1, t2, t3, t4, t5, t6, t7, t8; |
672 | | |
673 | 2.58k | row2 = pitch << 1; |
674 | 2.58k | row4 = pitch << 2; |
675 | 2.58k | row8 = pitch << 3; |
676 | | |
677 | 31.8k | #define COMPENSATE(x) (((x) + 1)>>1) |
678 | 23.2k | for (i = 0; i < 8; i++) { |
679 | 20.6k | if (flags[i]) { |
680 | 3.98k | IVI_INV_SLANT8(in[0], in[8], in[16], in[24], in[32], in[40], in[48], in[56], |
681 | 3.98k | out[0], out[pitch], out[row2], out[row2 + pitch], out[row4], |
682 | 3.98k | out[row4 + pitch], out[row4 + row2], out[row8 - pitch], |
683 | 3.98k | t0, t1, t2, t3, t4, t5, t6, t7, t8); |
684 | 16.6k | } else { |
685 | 16.6k | out[0] = out[pitch] = out[row2] = out[row2 + pitch] = out[row4] = |
686 | 16.6k | out[row4 + pitch] = out[row4 + row2] = out[row8 - pitch] = 0; |
687 | 16.6k | } |
688 | | |
689 | 20.6k | in++; |
690 | 20.6k | out++; |
691 | 20.6k | } |
692 | 2.58k | #undef COMPENSATE |
693 | 2.58k | } |
694 | | |
695 | | void ff_ivi_dc_col_slant(const int32_t *in, int16_t *out, ptrdiff_t pitch, int blk_size) |
696 | 2.83k | { |
697 | 2.83k | int x, y; |
698 | 2.83k | int16_t dc_coeff; |
699 | | |
700 | 2.83k | dc_coeff = (*in + 1) >> 1; |
701 | | |
702 | 23.8k | for (y = 0; y < blk_size; out += pitch, y++) { |
703 | 21.0k | out[0] = dc_coeff; |
704 | 161k | for (x = 1; x < blk_size; x++) |
705 | 140k | out[x] = 0; |
706 | 21.0k | } |
707 | 2.83k | } |
708 | | |
709 | | void ff_ivi_row_slant4(const int32_t *in, int16_t *out, ptrdiff_t pitch, const uint8_t *flags) |
710 | 1.61k | { |
711 | 1.61k | int i; |
712 | 1.61k | int t0, t1, t2, t3, t4; |
713 | | |
714 | 10.4k | #define COMPENSATE(x) (((x) + 1)>>1) |
715 | 8.05k | for (i = 0; i < 4; i++) { |
716 | 6.44k | if (!in[0] && !in[1] && !in[2] && !in[3]) { |
717 | 3.84k | memset(out, 0, 4*sizeof(out[0])); |
718 | 3.84k | } else { |
719 | 2.60k | IVI_INV_SLANT4( in[0], in[1], in[2], in[3], |
720 | 2.60k | out[0], out[1], out[2], out[3], |
721 | 2.60k | t0, t1, t2, t3, t4); |
722 | 2.60k | } |
723 | 6.44k | in += 4; |
724 | 6.44k | out += pitch; |
725 | 6.44k | } |
726 | 1.61k | #undef COMPENSATE |
727 | 1.61k | } |
728 | | |
729 | | void ff_ivi_col_slant4(const int32_t *in, int16_t *out, ptrdiff_t pitch, const uint8_t *flags) |
730 | 602 | { |
731 | 602 | int i, row2; |
732 | 602 | int t0, t1, t2, t3, t4; |
733 | | |
734 | 602 | row2 = pitch << 1; |
735 | | |
736 | 3.04k | #define COMPENSATE(x) (((x) + 1)>>1) |
737 | 3.01k | for (i = 0; i < 4; i++) { |
738 | 2.40k | if (flags[i]) { |
739 | 760 | IVI_INV_SLANT4(in[0], in[4], in[8], in[12], |
740 | 760 | out[0], out[pitch], out[row2], out[row2 + pitch], |
741 | 760 | t0, t1, t2, t3, t4); |
742 | 1.64k | } else { |
743 | 1.64k | out[0] = out[pitch] = out[row2] = out[row2 + pitch] = 0; |
744 | 1.64k | } |
745 | | |
746 | 2.40k | in++; |
747 | 2.40k | out++; |
748 | 2.40k | } |
749 | 602 | #undef COMPENSATE |
750 | 602 | } |
751 | | |
752 | | void ff_ivi_put_pixels_8x8(const int32_t *in, int16_t *out, ptrdiff_t pitch, |
753 | | const uint8_t *flags) |
754 | 1.28k | { |
755 | 1.28k | int x, y; |
756 | | |
757 | 11.5k | for (y = 0; y < 8; out += pitch, in += 8, y++) |
758 | 92.3k | for (x = 0; x < 8; x++) |
759 | 82.1k | out[x] = in[x]; |
760 | 1.28k | } |
761 | | |
762 | | void ff_ivi_put_dc_pixel_8x8(const int32_t *in, int16_t *out, ptrdiff_t pitch, |
763 | | int blk_size) |
764 | 4.74k | { |
765 | 4.74k | int y; |
766 | | |
767 | 4.74k | out[0] = in[0]; |
768 | 4.74k | memset(out + 1, 0, 7*sizeof(out[0])); |
769 | 4.74k | out += pitch; |
770 | | |
771 | 37.9k | for (y = 1; y < 8; out += pitch, y++) |
772 | 33.2k | memset(out, 0, 8*sizeof(out[0])); |
773 | 4.74k | } |
774 | | |
775 | | #define IVI_MC_TEMPLATE(size, suffix, OP) \ |
776 | | static void ivi_mc_ ## size ##x## size ## suffix(int16_t *buf, \ |
777 | | ptrdiff_t dpitch, \ |
778 | | const int16_t *ref_buf, \ |
779 | 1.95M | ptrdiff_t pitch, int mc_type) \ |
780 | 1.95M | { \ |
781 | 1.95M | int i, j; \ |
782 | 1.95M | const int16_t *wptr; \ |
783 | 1.95M | \ |
784 | 1.95M | switch (mc_type) { \ |
785 | 1.88M | case 0: /* fullpel (no interpolation) */ \ |
786 | 11.3M | for (i = 0; i < size; i++, buf += dpitch, ref_buf += pitch) { \ |
787 | 62.0M | for (j = 0; j < size; j++) {\ |
788 | 52.5M | OP(buf[j], ref_buf[j]); \ |
789 | 52.5M | } \ |
790 | 9.41M | } \ |
791 | 1.88M | break; \ |
792 | 20.9k | case 1: /* horizontal halfpel interpolation */ \ |
793 | 156k | for (i = 0; i < size; i++, buf += dpitch, ref_buf += pitch) \ |
794 | 1.09M | for (j = 0; j < size; j++) \ |
795 | 963k | OP(buf[j], (ref_buf[j] + ref_buf[j+1]) >> 1); \ |
796 | 20.9k | break; \ |
797 | 24.0k | case 2: /* vertical halfpel interpolation */ \ |
798 | 24.0k | wptr = ref_buf + pitch; \ |
799 | 188k | for (i = 0; i < size; i++, buf += dpitch, wptr += pitch, ref_buf += pitch) \ |
800 | 1.36M | for (j = 0; j < size; j++) \ |
801 | 1.20M | OP(buf[j], (ref_buf[j] + wptr[j]) >> 1); \ |
802 | 24.0k | break; \ |
803 | 22.4k | case 3: /* vertical and horizontal halfpel interpolation */ \ |
804 | 22.4k | wptr = ref_buf + pitch; \ |
805 | 173k | for (i = 0; i < size; i++, buf += dpitch, wptr += pitch, ref_buf += pitch) \ |
806 | 1.24M | for (j = 0; j < size; j++) \ |
807 | 1.09M | OP(buf[j], (ref_buf[j] + ref_buf[j+1] + wptr[j] + wptr[j+1]) >> 2); \ |
808 | 22.4k | break; \ |
809 | 1.95M | } \ |
810 | 1.95M | } \ ivi_dsp.c:ivi_mc_8x8_no_delta Line | Count | Source | 779 | 452k | ptrdiff_t pitch, int mc_type) \ | 780 | 452k | { \ | 781 | 452k | int i, j; \ | 782 | 452k | const int16_t *wptr; \ | 783 | 452k | \ | 784 | 452k | switch (mc_type) { \ | 785 | 422k | case 0: /* fullpel (no interpolation) */ \ | 786 | 3.80M | for (i = 0; i < size; i++, buf += dpitch, ref_buf += pitch) { \ | 787 | 30.4M | for (j = 0; j < size; j++) {\ | 788 | 27.0M | OP(buf[j], ref_buf[j]); \ | 789 | 27.0M | } \ | 790 | 3.37M | } \ | 791 | 422k | break; \ | 792 | 10.3k | case 1: /* horizontal halfpel interpolation */ \ | 793 | 93.1k | for (i = 0; i < size; i++, buf += dpitch, ref_buf += pitch) \ | 794 | 745k | for (j = 0; j < size; j++) \ | 795 | 662k | OP(buf[j], (ref_buf[j] + ref_buf[j+1]) >> 1); \ | 796 | 10.3k | break; \ | 797 | 11.4k | case 2: /* vertical halfpel interpolation */ \ | 798 | 11.4k | wptr = ref_buf + pitch; \ | 799 | 102k | for (i = 0; i < size; i++, buf += dpitch, wptr += pitch, ref_buf += pitch) \ | 800 | 822k | for (j = 0; j < size; j++) \ | 801 | 730k | OP(buf[j], (ref_buf[j] + wptr[j]) >> 1); \ | 802 | 11.4k | break; \ | 803 | 5.69k | case 3: /* vertical and horizontal halfpel interpolation */ \ | 804 | 5.69k | wptr = ref_buf + pitch; \ | 805 | 51.2k | for (i = 0; i < size; i++, buf += dpitch, wptr += pitch, ref_buf += pitch) \ | 806 | 410k | for (j = 0; j < size; j++) \ | 807 | 364k | OP(buf[j], (ref_buf[j] + ref_buf[j+1] + wptr[j] + wptr[j+1]) >> 2); \ | 808 | 5.69k | break; \ | 809 | 452k | } \ | 810 | 452k | } \ |
ivi_dsp.c:ivi_mc_8x8_delta Line | Count | Source | 779 | 62.9k | ptrdiff_t pitch, int mc_type) \ | 780 | 62.9k | { \ | 781 | 62.9k | int i, j; \ | 782 | 62.9k | const int16_t *wptr; \ | 783 | 62.9k | \ | 784 | 62.9k | switch (mc_type) { \ | 785 | 44.4k | case 0: /* fullpel (no interpolation) */ \ | 786 | 400k | for (i = 0; i < size; i++, buf += dpitch, ref_buf += pitch) { \ | 787 | 3.20M | for (j = 0; j < size; j++) {\ | 788 | 2.84M | OP(buf[j], ref_buf[j]); \ | 789 | 2.84M | } \ | 790 | 355k | } \ | 791 | 44.4k | break; \ | 792 | 2.76k | case 1: /* horizontal halfpel interpolation */ \ | 793 | 24.8k | for (i = 0; i < size; i++, buf += dpitch, ref_buf += pitch) \ | 794 | 199k | for (j = 0; j < size; j++) \ | 795 | 176k | OP(buf[j], (ref_buf[j] + ref_buf[j+1]) >> 1); \ | 796 | 2.76k | break; \ | 797 | 5.61k | case 2: /* vertical halfpel interpolation */ \ | 798 | 5.61k | wptr = ref_buf + pitch; \ | 799 | 50.5k | for (i = 0; i < size; i++, buf += dpitch, wptr += pitch, ref_buf += pitch) \ | 800 | 404k | for (j = 0; j < size; j++) \ | 801 | 359k | OP(buf[j], (ref_buf[j] + wptr[j]) >> 1); \ | 802 | 5.61k | break; \ | 803 | 9.61k | case 3: /* vertical and horizontal halfpel interpolation */ \ | 804 | 9.61k | wptr = ref_buf + pitch; \ | 805 | 86.5k | for (i = 0; i < size; i++, buf += dpitch, wptr += pitch, ref_buf += pitch) \ | 806 | 692k | for (j = 0; j < size; j++) \ | 807 | 615k | OP(buf[j], (ref_buf[j] + ref_buf[j+1] + wptr[j] + wptr[j+1]) >> 2); \ | 808 | 9.61k | break; \ | 809 | 62.9k | } \ | 810 | 62.9k | } \ |
ivi_dsp.c:ivi_mc_4x4_no_delta Line | Count | Source | 779 | 1.41M | ptrdiff_t pitch, int mc_type) \ | 780 | 1.41M | { \ | 781 | 1.41M | int i, j; \ | 782 | 1.41M | const int16_t *wptr; \ | 783 | 1.41M | \ | 784 | 1.41M | switch (mc_type) { \ | 785 | 1.40M | case 0: /* fullpel (no interpolation) */ \ | 786 | 7.00M | for (i = 0; i < size; i++, buf += dpitch, ref_buf += pitch) { \ | 787 | 28.0M | for (j = 0; j < size; j++) {\ | 788 | 22.4M | OP(buf[j], ref_buf[j]); \ | 789 | 22.4M | } \ | 790 | 5.60M | } \ | 791 | 1.40M | break; \ | 792 | 5.07k | case 1: /* horizontal halfpel interpolation */ \ | 793 | 25.3k | for (i = 0; i < size; i++, buf += dpitch, ref_buf += pitch) \ | 794 | 101k | for (j = 0; j < size; j++) \ | 795 | 81.2k | OP(buf[j], (ref_buf[j] + ref_buf[j+1]) >> 1); \ | 796 | 5.07k | break; \ | 797 | 5.05k | case 2: /* vertical halfpel interpolation */ \ | 798 | 5.05k | wptr = ref_buf + pitch; \ | 799 | 25.2k | for (i = 0; i < size; i++, buf += dpitch, wptr += pitch, ref_buf += pitch) \ | 800 | 101k | for (j = 0; j < size; j++) \ | 801 | 80.8k | OP(buf[j], (ref_buf[j] + wptr[j]) >> 1); \ | 802 | 5.05k | break; \ | 803 | 4.57k | case 3: /* vertical and horizontal halfpel interpolation */ \ | 804 | 4.57k | wptr = ref_buf + pitch; \ | 805 | 22.8k | for (i = 0; i < size; i++, buf += dpitch, wptr += pitch, ref_buf += pitch) \ | 806 | 91.4k | for (j = 0; j < size; j++) \ | 807 | 73.1k | OP(buf[j], (ref_buf[j] + ref_buf[j+1] + wptr[j] + wptr[j+1]) >> 2); \ | 808 | 4.57k | break; \ | 809 | 1.41M | } \ | 810 | 1.41M | } \ |
ivi_dsp.c:ivi_mc_4x4_delta Line | Count | Source | 779 | 26.6k | ptrdiff_t pitch, int mc_type) \ | 780 | 26.6k | { \ | 781 | 26.6k | int i, j; \ | 782 | 26.6k | const int16_t *wptr; \ | 783 | 26.6k | \ | 784 | 26.6k | switch (mc_type) { \ | 785 | 19.1k | case 0: /* fullpel (no interpolation) */ \ | 786 | 95.9k | for (i = 0; i < size; i++, buf += dpitch, ref_buf += pitch) { \ | 787 | 383k | for (j = 0; j < size; j++) {\ | 788 | 307k | OP(buf[j], ref_buf[j]); \ | 789 | 307k | } \ | 790 | 76.7k | } \ | 791 | 19.1k | break; \ | 792 | 2.71k | case 1: /* horizontal halfpel interpolation */ \ | 793 | 13.5k | for (i = 0; i < size; i++, buf += dpitch, ref_buf += pitch) \ | 794 | 54.2k | for (j = 0; j < size; j++) \ | 795 | 43.4k | OP(buf[j], (ref_buf[j] + ref_buf[j+1]) >> 1); \ | 796 | 2.71k | break; \ | 797 | 1.99k | case 2: /* vertical halfpel interpolation */ \ | 798 | 1.99k | wptr = ref_buf + pitch; \ | 799 | 9.95k | for (i = 0; i < size; i++, buf += dpitch, wptr += pitch, ref_buf += pitch) \ | 800 | 39.8k | for (j = 0; j < size; j++) \ | 801 | 31.8k | OP(buf[j], (ref_buf[j] + wptr[j]) >> 1); \ | 802 | 1.99k | break; \ | 803 | 2.52k | case 3: /* vertical and horizontal halfpel interpolation */ \ | 804 | 2.52k | wptr = ref_buf + pitch; \ | 805 | 12.6k | for (i = 0; i < size; i++, buf += dpitch, wptr += pitch, ref_buf += pitch) \ | 806 | 50.4k | for (j = 0; j < size; j++) \ | 807 | 40.3k | OP(buf[j], (ref_buf[j] + ref_buf[j+1] + wptr[j] + wptr[j+1]) >> 2); \ | 808 | 2.52k | break; \ | 809 | 26.6k | } \ | 810 | 26.6k | } \ |
|
811 | | \ |
812 | | void ff_ivi_mc_ ## size ##x## size ## suffix(int16_t *buf, const int16_t *ref_buf, \ |
813 | 1.90M | ptrdiff_t pitch, int mc_type) \ |
814 | 1.90M | { \ |
815 | 1.90M | ivi_mc_ ## size ##x## size ## suffix(buf, pitch, ref_buf, pitch, mc_type); \ |
816 | 1.90M | } \ Line | Count | Source | 813 | 431k | ptrdiff_t pitch, int mc_type) \ | 814 | 431k | { \ | 815 | 431k | ivi_mc_ ## size ##x## size ## suffix(buf, pitch, ref_buf, pitch, mc_type); \ | 816 | 431k | } \ |
Line | Count | Source | 813 | 41.6k | ptrdiff_t pitch, int mc_type) \ | 814 | 41.6k | { \ | 815 | 41.6k | ivi_mc_ ## size ##x## size ## suffix(buf, pitch, ref_buf, pitch, mc_type); \ | 816 | 41.6k | } \ |
Line | Count | Source | 813 | 1.41M | ptrdiff_t pitch, int mc_type) \ | 814 | 1.41M | { \ | 815 | 1.41M | ivi_mc_ ## size ##x## size ## suffix(buf, pitch, ref_buf, pitch, mc_type); \ | 816 | 1.41M | } \ |
Line | Count | Source | 813 | 20.5k | ptrdiff_t pitch, int mc_type) \ | 814 | 20.5k | { \ | 815 | 20.5k | ivi_mc_ ## size ##x## size ## suffix(buf, pitch, ref_buf, pitch, mc_type); \ | 816 | 20.5k | } \ |
|
817 | | |
818 | | #define IVI_MC_AVG_TEMPLATE(size, suffix, OP) \ |
819 | | void ff_ivi_mc_avg_ ## size ##x## size ## suffix(int16_t *buf, \ |
820 | | const int16_t *ref_buf, \ |
821 | | const int16_t *ref_buf2, \ |
822 | | ptrdiff_t pitch, \ |
823 | 27.3k | int mc_type, int mc_type2) \ |
824 | 27.3k | { \ |
825 | 27.3k | int16_t tmp[size * size]; \ |
826 | 27.3k | int i, j; \ |
827 | 27.3k | \ |
828 | 27.3k | ivi_mc_ ## size ##x## size ## _no_delta(tmp, size, ref_buf, pitch, mc_type); \ |
829 | 27.3k | ivi_mc_ ## size ##x## size ## _delta(tmp, size, ref_buf2, pitch, mc_type2); \ |
830 | 221k | for (i = 0; i < size; i++, buf += pitch) { \ |
831 | 1.65M | for (j = 0; j < size; j++) {\ |
832 | 1.45M | OP(buf[j], tmp[i * size + j] >> 1); \ |
833 | 1.45M | } \ |
834 | 194k | } \ |
835 | 27.3k | } \ ff_ivi_mc_avg_8x8_no_delta Line | Count | Source | 823 | 8.85k | int mc_type, int mc_type2) \ | 824 | 8.85k | { \ | 825 | 8.85k | int16_t tmp[size * size]; \ | 826 | 8.85k | int i, j; \ | 827 | 8.85k | \ | 828 | 8.85k | ivi_mc_ ## size ##x## size ## _no_delta(tmp, size, ref_buf, pitch, mc_type); \ | 829 | 8.85k | ivi_mc_ ## size ##x## size ## _delta(tmp, size, ref_buf2, pitch, mc_type2); \ | 830 | 79.7k | for (i = 0; i < size; i++, buf += pitch) { \ | 831 | 637k | for (j = 0; j < size; j++) {\ | 832 | 566k | OP(buf[j], tmp[i * size + j] >> 1); \ | 833 | 566k | } \ | 834 | 70.8k | } \ | 835 | 8.85k | } \ |
Line | Count | Source | 823 | 12.4k | int mc_type, int mc_type2) \ | 824 | 12.4k | { \ | 825 | 12.4k | int16_t tmp[size * size]; \ | 826 | 12.4k | int i, j; \ | 827 | 12.4k | \ | 828 | 12.4k | ivi_mc_ ## size ##x## size ## _no_delta(tmp, size, ref_buf, pitch, mc_type); \ | 829 | 12.4k | ivi_mc_ ## size ##x## size ## _delta(tmp, size, ref_buf2, pitch, mc_type2); \ | 830 | 111k | for (i = 0; i < size; i++, buf += pitch) { \ | 831 | 895k | for (j = 0; j < size; j++) {\ | 832 | 795k | OP(buf[j], tmp[i * size + j] >> 1); \ | 833 | 795k | } \ | 834 | 99.4k | } \ | 835 | 12.4k | } \ |
ff_ivi_mc_avg_4x4_no_delta Line | Count | Source | 823 | 3.64k | int mc_type, int mc_type2) \ | 824 | 3.64k | { \ | 825 | 3.64k | int16_t tmp[size * size]; \ | 826 | 3.64k | int i, j; \ | 827 | 3.64k | \ | 828 | 3.64k | ivi_mc_ ## size ##x## size ## _no_delta(tmp, size, ref_buf, pitch, mc_type); \ | 829 | 3.64k | ivi_mc_ ## size ##x## size ## _delta(tmp, size, ref_buf2, pitch, mc_type2); \ | 830 | 18.2k | for (i = 0; i < size; i++, buf += pitch) { \ | 831 | 72.9k | for (j = 0; j < size; j++) {\ | 832 | 58.3k | OP(buf[j], tmp[i * size + j] >> 1); \ | 833 | 58.3k | } \ | 834 | 14.5k | } \ | 835 | 3.64k | } \ |
Line | Count | Source | 823 | 2.41k | int mc_type, int mc_type2) \ | 824 | 2.41k | { \ | 825 | 2.41k | int16_t tmp[size * size]; \ | 826 | 2.41k | int i, j; \ | 827 | 2.41k | \ | 828 | 2.41k | ivi_mc_ ## size ##x## size ## _no_delta(tmp, size, ref_buf, pitch, mc_type); \ | 829 | 2.41k | ivi_mc_ ## size ##x## size ## _delta(tmp, size, ref_buf2, pitch, mc_type2); \ | 830 | 12.0k | for (i = 0; i < size; i++, buf += pitch) { \ | 831 | 48.2k | for (j = 0; j < size; j++) {\ | 832 | 38.6k | OP(buf[j], tmp[i * size + j] >> 1); \ | 833 | 38.6k | } \ | 834 | 9.65k | } \ | 835 | 2.41k | } \ |
|
836 | | |
837 | 52.1M | #define OP_PUT(a, b) (a) = (b) |
838 | 5.27M | #define OP_ADD(a, b) (a) += (b) |
839 | | |
840 | 28.7M | IVI_MC_TEMPLATE(8, _no_delta, OP_PUT) |
841 | 3.99M | IVI_MC_TEMPLATE(8, _delta, OP_ADD) |
842 | 22.6M | IVI_MC_TEMPLATE(4, _no_delta, OP_PUT) |
843 | 422k | IVI_MC_TEMPLATE(4, _delta, OP_ADD) |
844 | 566k | IVI_MC_AVG_TEMPLATE(8, _no_delta, OP_PUT) |
845 | 795k | IVI_MC_AVG_TEMPLATE(8, _delta, OP_ADD) |
846 | 58.3k | IVI_MC_AVG_TEMPLATE(4, _no_delta, OP_PUT) |
847 | 38.6k | IVI_MC_AVG_TEMPLATE(4, _delta, OP_ADD) |