Coverage Report

Created: 2026-02-14 06:59

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/ffmpeg/libavcodec/ivi_dsp.c
Line
Count
Source
1
/*
2
 * DSP functions for Indeo Video Interactive codecs (Indeo4 and Indeo5)
3
 *
4
 * Copyright (c) 2009-2011 Maxim Poliakovski
5
 *
6
 * This file is part of FFmpeg.
7
 *
8
 * FFmpeg is free software; you can redistribute it and/or
9
 * modify it under the terms of the GNU Lesser General Public
10
 * License as published by the Free Software Foundation; either
11
 * version 2.1 of the License, or (at your option) any later version.
12
 *
13
 * FFmpeg is distributed in the hope that it will be useful,
14
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
16
 * Lesser General Public License for more details.
17
 *
18
 * You should have received a copy of the GNU Lesser General Public
19
 * License along with FFmpeg; if not, write to the Free Software
20
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
21
 */
22
23
/**
24
 * @file
25
 * DSP functions (inverse transforms, motion compensation, wavelet recompositions)
26
 * for Indeo Video Interactive codecs.
27
 */
28
29
#include <string.h>
30
#include "libavutil/common.h"
31
#include "ivi.h"
32
#include "ivi_dsp.h"
33
34
void ff_ivi_recompose53(const IVIPlaneDesc *plane, uint8_t *dst,
35
                        const ptrdiff_t dst_pitch)
36
534
{
37
534
    int             x, y, indx;
38
534
    int32_t         p0, p1, p2, p3, tmp0, tmp1, tmp2;
39
534
    int32_t         b0_1, b0_2, b1_1, b1_2, b1_3, b2_1, b2_2, b2_3, b2_4, b2_5, b2_6;
40
534
    int32_t         b3_1, b3_2, b3_3, b3_4, b3_5, b3_6, b3_7, b3_8, b3_9;
41
534
    ptrdiff_t       pitch, back_pitch;
42
534
    const short     *b0_ptr, *b1_ptr, *b2_ptr, *b3_ptr;
43
534
    const int       num_bands = 4;
44
45
    /* all bands should have the same pitch */
46
534
    pitch = plane->bands[0].pitch;
47
48
    /* pixels at the position "y-1" will be set to pixels at the "y" for the 1st iteration */
49
534
    back_pitch = 0;
50
51
    /* get pointers to the wavelet bands */
52
534
    b0_ptr = plane->bands[0].buf;
53
534
    b1_ptr = plane->bands[1].buf;
54
534
    b2_ptr = plane->bands[2].buf;
55
534
    b3_ptr = plane->bands[3].buf;
56
57
73.8k
    for (y = 0; y < plane->height; y += 2) {
58
59
73.3k
        if (y+2 >= plane->height)
60
534
            pitch= 0;
61
        /* load storage variables with values */
62
73.3k
        if (num_bands > 0) {
63
73.3k
            b0_1 = b0_ptr[0];
64
73.3k
            b0_2 = b0_ptr[pitch];
65
73.3k
        }
66
67
73.3k
        if (num_bands > 1) {
68
73.3k
            b1_1 = b1_ptr[back_pitch];
69
73.3k
            b1_2 = b1_ptr[0];
70
73.3k
            b1_3 = b1_1 - b1_2*6 + b1_ptr[pitch];
71
73.3k
        }
72
73
73.3k
        if (num_bands > 2) {
74
73.3k
            b2_2 = b2_ptr[0];     // b2[x,  y  ]
75
73.3k
            b2_3 = b2_2;          // b2[x+1,y  ] = b2[x,y]
76
73.3k
            b2_5 = b2_ptr[pitch]; // b2[x  ,y+1]
77
73.3k
            b2_6 = b2_5;          // b2[x+1,y+1] = b2[x,y+1]
78
73.3k
        }
79
80
73.3k
        if (num_bands > 3) {
81
73.3k
            b3_2 = b3_ptr[back_pitch]; // b3[x  ,y-1]
82
73.3k
            b3_3 = b3_2;               // b3[x+1,y-1] = b3[x  ,y-1]
83
73.3k
            b3_5 = b3_ptr[0];          // b3[x  ,y  ]
84
73.3k
            b3_6 = b3_5;               // b3[x+1,y  ] = b3[x  ,y  ]
85
73.3k
            b3_8 = b3_2 - b3_5*6 + b3_ptr[pitch];
86
73.3k
            b3_9 = b3_8;
87
73.3k
        }
88
89
30.2M
        for (x = 0, indx = 0; x < plane->width; x+=2, indx++) {
90
30.1M
            if (x+2 >= plane->width) {
91
73.3k
                b0_ptr --;
92
73.3k
                b1_ptr --;
93
73.3k
                b2_ptr --;
94
73.3k
                b3_ptr --;
95
73.3k
            }
96
97
            /* some values calculated in the previous iterations can */
98
            /* be reused in the next ones, so do appropriate copying */
99
30.1M
            b2_1 = b2_2; // b2[x-1,y  ] = b2[x,  y  ]
100
30.1M
            b2_2 = b2_3; // b2[x  ,y  ] = b2[x+1,y  ]
101
30.1M
            b2_4 = b2_5; // b2[x-1,y+1] = b2[x  ,y+1]
102
30.1M
            b2_5 = b2_6; // b2[x  ,y+1] = b2[x+1,y+1]
103
30.1M
            b3_1 = b3_2; // b3[x-1,y-1] = b3[x  ,y-1]
104
30.1M
            b3_2 = b3_3; // b3[x  ,y-1] = b3[x+1,y-1]
105
30.1M
            b3_4 = b3_5; // b3[x-1,y  ] = b3[x  ,y  ]
106
30.1M
            b3_5 = b3_6; // b3[x  ,y  ] = b3[x+1,y  ]
107
30.1M
            b3_7 = b3_8; // vert_HPF(x-1)
108
30.1M
            b3_8 = b3_9; // vert_HPF(x  )
109
110
30.1M
            p0 = p1 = p2 = p3 = 0;
111
112
            /* process the LL-band by applying LPF both vertically and horizontally */
113
30.1M
            if (num_bands > 0) {
114
30.1M
                tmp0 = b0_1;
115
30.1M
                tmp2 = b0_2;
116
30.1M
                b0_1 = b0_ptr[indx+1];
117
30.1M
                b0_2 = b0_ptr[pitch+indx+1];
118
30.1M
                tmp1 = tmp0 + b0_1;
119
120
30.1M
                p0 =  tmp0 * 16;
121
30.1M
                p1 =  tmp1 * 8;
122
30.1M
                p2 = (tmp0 + tmp2) * 8;
123
30.1M
                p3 = (tmp1 + tmp2 + b0_2) * 4;
124
30.1M
            }
125
126
            /* process the HL-band by applying HPF vertically and LPF horizontally */
127
30.1M
            if (num_bands > 1) {
128
30.1M
                tmp0 = b1_2;
129
30.1M
                tmp1 = b1_1;
130
30.1M
                b1_2 = b1_ptr[indx+1];
131
30.1M
                b1_1 = b1_ptr[back_pitch+indx+1];
132
133
30.1M
                tmp2 = tmp1 - tmp0*6 + b1_3;
134
30.1M
                b1_3 = b1_1 - b1_2*6 + b1_ptr[pitch+indx+1];
135
136
30.1M
                p0 += (tmp0 + tmp1) * 8;
137
30.1M
                p1 += (tmp0 + tmp1 + b1_1 + b1_2) * 4;
138
30.1M
                p2 +=  tmp2 * 4;
139
30.1M
                p3 += (tmp2 + b1_3) * 2;
140
30.1M
            }
141
142
            /* process the LH-band by applying LPF vertically and HPF horizontally */
143
30.1M
            if (num_bands > 2) {
144
30.1M
                b2_3 = b2_ptr[indx+1];
145
30.1M
                b2_6 = b2_ptr[pitch+indx+1];
146
147
30.1M
                tmp0 = b2_1 + b2_2;
148
30.1M
                tmp1 = b2_1 - b2_2*6 + b2_3;
149
150
30.1M
                p0 += tmp0 * 8;
151
30.1M
                p1 += tmp1 * 4;
152
30.1M
                p2 += (tmp0 + b2_4 + b2_5) * 4;
153
30.1M
                p3 += (tmp1 + b2_4 - b2_5*6 + b2_6) * 2;
154
30.1M
            }
155
156
            /* process the HH-band by applying HPF both vertically and horizontally */
157
30.1M
            if (num_bands > 3) {
158
30.1M
                b3_6 = b3_ptr[indx+1];            // b3[x+1,y  ]
159
30.1M
                b3_3 = b3_ptr[back_pitch+indx+1]; // b3[x+1,y-1]
160
161
30.1M
                tmp0 = b3_1 + b3_4;
162
30.1M
                tmp1 = b3_2 + b3_5;
163
30.1M
                tmp2 = b3_3 + b3_6;
164
165
30.1M
                b3_9 = b3_3 - b3_6*6 + b3_ptr[pitch+indx+1];
166
167
30.1M
                p0 += (tmp0 + tmp1) * 4;
168
30.1M
                p1 += (tmp0 - tmp1*6 + tmp2) * 2;
169
30.1M
                p2 += (b3_7 + b3_8) * 2;
170
30.1M
                p3 +=  b3_7 - b3_8*6 + b3_9;
171
30.1M
            }
172
173
            /* output four pixels */
174
30.1M
            dst[x]             = av_clip_uint8((p0 >> 6) + 128);
175
30.1M
            dst[x+1]           = av_clip_uint8((p1 >> 6) + 128);
176
30.1M
            dst[dst_pitch+x]   = av_clip_uint8((p2 >> 6) + 128);
177
30.1M
            dst[dst_pitch+x+1] = av_clip_uint8((p3 >> 6) + 128);
178
30.1M
        }// for x
179
180
73.3k
        dst += dst_pitch << 1;
181
182
73.3k
        back_pitch = -pitch;
183
184
73.3k
        b0_ptr += pitch + 1;
185
73.3k
        b1_ptr += pitch + 1;
186
73.3k
        b2_ptr += pitch + 1;
187
73.3k
        b3_ptr += pitch + 1;
188
73.3k
    }
189
534
}
190
191
void ff_ivi_recompose_haar(const IVIPlaneDesc *plane, uint8_t *dst,
192
                           const ptrdiff_t dst_pitch)
193
0
{
194
0
    int             x, y, indx, b0, b1, b2, b3, p0, p1, p2, p3;
195
0
    const short     *b0_ptr, *b1_ptr, *b2_ptr, *b3_ptr;
196
0
    ptrdiff_t       pitch;
197
198
    /* all bands should have the same pitch */
199
0
    pitch = plane->bands[0].pitch;
200
201
    /* get pointers to the wavelet bands */
202
0
    b0_ptr = plane->bands[0].buf;
203
0
    b1_ptr = plane->bands[1].buf;
204
0
    b2_ptr = plane->bands[2].buf;
205
0
    b3_ptr = plane->bands[3].buf;
206
207
0
    for (y = 0; y < plane->height; y += 2) {
208
0
        for (x = 0, indx = 0; x < plane->width; x += 2, indx++) {
209
            /* load coefficients */
210
0
            b0 = b0_ptr[indx]; //should be: b0 = (num_bands > 0) ? b0_ptr[indx] : 0;
211
0
            b1 = b1_ptr[indx]; //should be: b1 = (num_bands > 1) ? b1_ptr[indx] : 0;
212
0
            b2 = b2_ptr[indx]; //should be: b2 = (num_bands > 2) ? b2_ptr[indx] : 0;
213
0
            b3 = b3_ptr[indx]; //should be: b3 = (num_bands > 3) ? b3_ptr[indx] : 0;
214
215
            /* haar wavelet recomposition */
216
0
            p0 = (b0 + b1 + b2 + b3 + 2) >> 2;
217
0
            p1 = (b0 + b1 - b2 - b3 + 2) >> 2;
218
0
            p2 = (b0 - b1 + b2 - b3 + 2) >> 2;
219
0
            p3 = (b0 - b1 - b2 + b3 + 2) >> 2;
220
221
            /* bias, convert and output four pixels */
222
0
            dst[x]                 = av_clip_uint8(p0 + 128);
223
0
            dst[x + 1]             = av_clip_uint8(p1 + 128);
224
0
            dst[dst_pitch + x]     = av_clip_uint8(p2 + 128);
225
0
            dst[dst_pitch + x + 1] = av_clip_uint8(p3 + 128);
226
0
        }// for x
227
228
0
        dst += dst_pitch << 1;
229
230
0
        b0_ptr += pitch;
231
0
        b1_ptr += pitch;
232
0
        b2_ptr += pitch;
233
0
        b3_ptr += pitch;
234
0
    }// for y
235
0
}
236
237
/** butterfly operation for the inverse Haar transform */
238
#define IVI_HAAR_BFLY(s1, s2, o1, o2, t) \
239
946k
    t  = ((s1) - (s2)) >> 1;\
240
946k
    o1 = ((s1) + (s2)) >> 1;\
241
946k
    o2 = (t);\
242
243
/** inverse 8-point Haar transform */
244
#define INV_HAAR8(s1, s5, s3, s7, s2, s4, s6, s8,\
245
                  d1, d2, d3, d4, d5, d6, d7, d8,\
246
128k
                  t0, t1, t2, t3, t4, t5, t6, t7, t8) {\
247
128k
    t1 = (s1) * 2; t5 = (s5) * 2;\
248
128k
    IVI_HAAR_BFLY(t1, t5, t1, t5, t0); IVI_HAAR_BFLY(t1, s3, t1, t3, t0);\
249
128k
    IVI_HAAR_BFLY(t5, s7, t5, t7, t0); IVI_HAAR_BFLY(t1, s2, t1, t2, t0);\
250
128k
    IVI_HAAR_BFLY(t3, s4, t3, t4, t0); IVI_HAAR_BFLY(t5, s6, t5, t6, t0);\
251
128k
    IVI_HAAR_BFLY(t7, s8, t7, t8, t0);\
252
128k
    d1 = COMPENSATE(t1);\
253
128k
    d2 = COMPENSATE(t2);\
254
128k
    d3 = COMPENSATE(t3);\
255
128k
    d4 = COMPENSATE(t4);\
256
128k
    d5 = COMPENSATE(t5);\
257
128k
    d6 = COMPENSATE(t6);\
258
128k
    d7 = COMPENSATE(t7);\
259
128k
    d8 = COMPENSATE(t8); }
260
261
/** inverse 4-point Haar transform */
262
16.4k
#define INV_HAAR4(s1, s3, s5, s7, d1, d2, d3, d4, t0, t1, t2, t3, t4) {\
263
16.4k
    IVI_HAAR_BFLY(s1, s3, t0, t1, t4);\
264
16.4k
    IVI_HAAR_BFLY(t0, s5, t2, t3, t4);\
265
16.4k
    d1 = COMPENSATE(t2);\
266
16.4k
    d2 = COMPENSATE(t3);\
267
16.4k
    IVI_HAAR_BFLY(t1, s7, t2, t3, t4);\
268
16.4k
    d3 = COMPENSATE(t2);\
269
16.4k
    d4 = COMPENSATE(t3); }
270
271
void ff_ivi_inverse_haar_8x8(const int32_t *in, int16_t *out, ptrdiff_t pitch,
272
                             const uint8_t *flags)
273
5.65k
{
274
5.65k
    int     i, shift, sp1, sp2, sp3, sp4;
275
5.65k
    const int32_t *src;
276
5.65k
    int32_t *dst;
277
5.65k
    int     tmp[64];
278
5.65k
    int     t0, t1, t2, t3, t4, t5, t6, t7, t8;
279
280
    /* apply the InvHaar8 to all columns */
281
134k
#define COMPENSATE(x) (x)
282
5.65k
    src = in;
283
5.65k
    dst = tmp;
284
50.8k
    for (i = 0; i < 8; i++) {
285
45.2k
        if (flags[i]) {
286
            /* pre-scaling */
287
16.8k
            shift = !(i & 4);
288
16.8k
            sp1 = src[ 0] * (1 << shift);
289
16.8k
            sp2 = src[ 8] * (1 << shift);
290
16.8k
            sp3 = src[16] * (1 << shift);
291
16.8k
            sp4 = src[24] * (1 << shift);
292
16.8k
            INV_HAAR8(    sp1,     sp2,     sp3,     sp4,
293
16.8k
                      src[32], src[40], src[48], src[56],
294
16.8k
                      dst[ 0], dst[ 8], dst[16], dst[24],
295
16.8k
                      dst[32], dst[40], dst[48], dst[56],
296
16.8k
                      t0, t1, t2, t3, t4, t5, t6, t7, t8);
297
16.8k
        } else
298
28.4k
            dst[ 0] = dst[ 8] = dst[16] = dst[24] =
299
28.4k
            dst[32] = dst[40] = dst[48] = dst[56] = 0;
300
301
45.2k
        src++;
302
45.2k
        dst++;
303
45.2k
    }
304
5.65k
#undef  COMPENSATE
305
306
    /* apply the InvHaar8 to all rows */
307
345k
#define COMPENSATE(x) (x)
308
5.65k
    src = tmp;
309
50.8k
    for (i = 0; i < 8; i++) {
310
45.2k
        if (   !src[0] && !src[1] && !src[2] && !src[3]
311
3.41k
            && !src[4] && !src[5] && !src[6] && !src[7]) {
312
2.07k
            memset(out, 0, 8 * sizeof(out[0]));
313
43.1k
        } else {
314
43.1k
            INV_HAAR8(src[0], src[1], src[2], src[3],
315
43.1k
                      src[4], src[5], src[6], src[7],
316
43.1k
                      out[0], out[1], out[2], out[3],
317
43.1k
                      out[4], out[5], out[6], out[7],
318
43.1k
                      t0, t1, t2, t3, t4, t5, t6, t7, t8);
319
43.1k
        }
320
45.2k
        src += 8;
321
45.2k
        out += pitch;
322
45.2k
    }
323
5.65k
#undef  COMPENSATE
324
5.65k
}
325
326
void ff_ivi_row_haar8(const int32_t *in, int16_t *out, ptrdiff_t pitch,
327
                      const uint8_t *flags)
328
5.56k
{
329
5.56k
    int     i;
330
5.56k
    int     t0, t1, t2, t3, t4, t5, t6, t7, t8;
331
332
    /* apply the InvHaar8 to all rows */
333
78.0k
#define COMPENSATE(x) (x)
334
50.0k
    for (i = 0; i < 8; i++) {
335
44.5k
        if (   !in[0] && !in[1] && !in[2] && !in[3]
336
36.4k
            && !in[4] && !in[5] && !in[6] && !in[7]) {
337
34.7k
            memset(out, 0, 8 * sizeof(out[0]));
338
34.7k
        } else {
339
9.75k
            INV_HAAR8(in[0],  in[1],  in[2],  in[3],
340
9.75k
                      in[4],  in[5],  in[6],  in[7],
341
9.75k
                      out[0], out[1], out[2], out[3],
342
9.75k
                      out[4], out[5], out[6], out[7],
343
9.75k
                      t0, t1, t2, t3, t4, t5, t6, t7, t8);
344
9.75k
        }
345
44.5k
        in  += 8;
346
44.5k
        out += pitch;
347
44.5k
    }
348
5.56k
#undef  COMPENSATE
349
5.56k
}
350
351
void ff_ivi_col_haar8(const int32_t *in, int16_t *out, ptrdiff_t pitch,
352
                      const uint8_t *flags)
353
55.6k
{
354
55.6k
    int     i;
355
55.6k
    int     t0, t1, t2, t3, t4, t5, t6, t7, t8;
356
357
    /* apply the InvHaar8 to all columns */
358
467k
#define COMPENSATE(x) (x)
359
501k
    for (i = 0; i < 8; i++) {
360
445k
        if (flags[i]) {
361
58.3k
            INV_HAAR8(in[ 0], in[ 8], in[16], in[24],
362
58.3k
                      in[32], in[40], in[48], in[56],
363
58.3k
                      out[0 * pitch], out[1 * pitch],
364
58.3k
                      out[2 * pitch], out[3 * pitch],
365
58.3k
                      out[4 * pitch], out[5 * pitch],
366
58.3k
                      out[6 * pitch], out[7 * pitch],
367
58.3k
                      t0, t1, t2, t3, t4, t5, t6, t7, t8);
368
58.3k
        } else
369
387k
            out[0 * pitch] = out[1 * pitch] =
370
387k
            out[2 * pitch] = out[3 * pitch] =
371
387k
            out[4 * pitch] = out[5 * pitch] =
372
387k
            out[6 * pitch] = out[7 * pitch] = 0;
373
374
445k
        in++;
375
445k
        out++;
376
445k
    }
377
55.6k
#undef  COMPENSATE
378
55.6k
}
379
380
void ff_ivi_inverse_haar_4x4(const int32_t *in, int16_t *out, ptrdiff_t pitch,
381
                             const uint8_t *flags)
382
2.43k
{
383
2.43k
    int     i, shift, sp1, sp2;
384
2.43k
    const int32_t *src;
385
2.43k
    int32_t *dst;
386
2.43k
    int     tmp[16];
387
2.43k
    int     t0, t1, t2, t3, t4;
388
389
    /* apply the InvHaar4 to all columns */
390
10.6k
#define COMPENSATE(x) (x)
391
2.43k
    src = in;
392
2.43k
    dst = tmp;
393
12.1k
    for (i = 0; i < 4; i++) {
394
9.74k
        if (flags[i]) {
395
            /* pre-scaling */
396
2.65k
            shift = !(i & 2);
397
2.65k
            sp1 = src[0] * (1 << shift);
398
2.65k
            sp2 = src[4] * (1 << shift);
399
2.65k
            INV_HAAR4(   sp1,    sp2, src[8], src[12],
400
2.65k
                      dst[0], dst[4], dst[8], dst[12],
401
2.65k
                      t0, t1, t2, t3, t4);
402
2.65k
        } else
403
7.09k
            dst[0] = dst[4] = dst[8] = dst[12] = 0;
404
405
9.74k
        src++;
406
9.74k
        dst++;
407
9.74k
    }
408
2.43k
#undef  COMPENSATE
409
410
    /* apply the InvHaar8 to all rows */
411
32.2k
#define COMPENSATE(x) (x)
412
2.43k
    src = tmp;
413
12.1k
    for (i = 0; i < 4; i++) {
414
9.74k
        if (!src[0] && !src[1] && !src[2] && !src[3]) {
415
1.67k
            memset(out, 0, 4 * sizeof(out[0]));
416
8.07k
        } else {
417
8.07k
            INV_HAAR4(src[0], src[1], src[2], src[3],
418
8.07k
                      out[0], out[1], out[2], out[3],
419
8.07k
                      t0, t1, t2, t3, t4);
420
8.07k
        }
421
9.74k
        src += 4;
422
9.74k
        out += pitch;
423
9.74k
    }
424
2.43k
#undef  COMPENSATE
425
2.43k
}
426
427
void ff_ivi_row_haar4(const int32_t *in, int16_t *out, ptrdiff_t pitch,
428
                      const uint8_t *flags)
429
2.08k
{
430
2.08k
    int     i;
431
2.08k
    int     t0, t1, t2, t3, t4;
432
433
    /* apply the InvHaar4 to all rows */
434
14.7k
#define COMPENSATE(x) (x)
435
10.4k
    for (i = 0; i < 4; i++) {
436
8.34k
        if (!in[0] && !in[1] && !in[2] && !in[3]) {
437
4.65k
            memset(out, 0, 4 * sizeof(out[0]));
438
4.65k
        } else {
439
3.68k
            INV_HAAR4(in[0], in[1], in[2], in[3],
440
3.68k
                      out[0], out[1], out[2], out[3],
441
3.68k
                      t0, t1, t2, t3, t4);
442
3.68k
        }
443
8.34k
        in  += 4;
444
8.34k
        out += pitch;
445
8.34k
    }
446
2.08k
#undef  COMPENSATE
447
2.08k
}
448
449
void ff_ivi_col_haar4(const int32_t *in, int16_t *out, ptrdiff_t pitch,
450
                      const uint8_t *flags)
451
1.77k
{
452
1.77k
    int     i;
453
1.77k
    int     t0, t1, t2, t3, t4;
454
455
    /* apply the InvHaar8 to all columns */
456
8.33k
#define COMPENSATE(x) (x)
457
8.85k
    for (i = 0; i < 4; i++) {
458
7.08k
        if (flags[i]) {
459
2.08k
            INV_HAAR4(in[0], in[4], in[8], in[12],
460
2.08k
                      out[0 * pitch], out[1 * pitch],
461
2.08k
                      out[2 * pitch], out[3 * pitch],
462
2.08k
                      t0, t1, t2, t3, t4);
463
2.08k
        } else
464
4.99k
            out[0 * pitch] = out[1 * pitch] =
465
4.99k
            out[2 * pitch] = out[3 * pitch] = 0;
466
467
7.08k
        in++;
468
7.08k
        out++;
469
7.08k
    }
470
1.77k
#undef  COMPENSATE
471
1.77k
}
472
473
void ff_ivi_dc_haar_2d(const int32_t *in, int16_t *out, ptrdiff_t pitch,
474
                       int blk_size)
475
36.9k
{
476
36.9k
    int     x, y;
477
36.9k
    int16_t dc_coeff;
478
479
36.9k
    dc_coeff = (*in + 0) >> 3;
480
481
327k
    for (y = 0; y < blk_size; out += pitch, y++) {
482
2.58M
        for (x = 0; x < blk_size; x++)
483
2.29M
            out[x] = dc_coeff;
484
290k
    }
485
36.9k
}
486
487
/** butterfly operation for the inverse slant transform */
488
#define IVI_SLANT_BFLY(s1, s2, o1, o2, t) \
489
2.21M
    t  = (s1) - (s2);\
490
2.21M
    o1 = (s1) + (s2);\
491
2.21M
    o2 = (t);\
492
493
/** This is a reflection a,b = 1/2, 5/4 for the inverse slant transform */
494
#define IVI_IREFLECT(s1, s2, o1, o2, t) \
495
478k
    t  = (((s1) + (s2)*2 + 2) >> 2) + (s1);\
496
478k
    o2 = (((s1)*2 - (s2) + 2) >> 2) - (s2);\
497
478k
    o1 = (t);\
498
499
/** This is a reflection a,b = 1/2, 7/8 for the inverse slant transform */
500
#define IVI_SLANT_PART4(s1, s2, o1, o2, t) \
501
194k
    t  = (s2) + (((s1)*4  - (s2) + 4) >> 3);\
502
194k
    o2 = (s1) + ((-(s1) - (s2)*4 + 4) >> 3);\
503
194k
    o1 = (t);\
504
505
/** inverse slant8 transform */
506
#define IVI_INV_SLANT8(s1, s4, s8, s5, s2, s6, s3, s7,\
507
                       d1, d2, d3, d4, d5, d6, d7, d8,\
508
194k
                       t0, t1, t2, t3, t4, t5, t6, t7, t8) {\
509
194k
    IVI_SLANT_PART4(s4, s5, t4, t5, t0);\
510
194k
\
511
194k
    IVI_SLANT_BFLY(s1, t5, t1, t5, t0); IVI_SLANT_BFLY(s2, s6, t2, t6, t0);\
512
194k
    IVI_SLANT_BFLY(s7, s3, t7, t3, t0); IVI_SLANT_BFLY(t4, s8, t4, t8, t0);\
513
194k
\
514
194k
    IVI_SLANT_BFLY(t1, t2, t1, t2, t0); IVI_IREFLECT  (t4, t3, t4, t3, t0);\
515
194k
    IVI_SLANT_BFLY(t5, t6, t5, t6, t0); IVI_IREFLECT  (t8, t7, t8, t7, t0);\
516
194k
    IVI_SLANT_BFLY(t1, t4, t1, t4, t0); IVI_SLANT_BFLY(t2, t3, t2, t3, t0);\
517
194k
    IVI_SLANT_BFLY(t5, t8, t5, t8, t0); IVI_SLANT_BFLY(t6, t7, t6, t7, t0);\
518
194k
    d1 = COMPENSATE(t1);\
519
194k
    d2 = COMPENSATE(t2);\
520
194k
    d3 = COMPENSATE(t3);\
521
194k
    d4 = COMPENSATE(t4);\
522
194k
    d5 = COMPENSATE(t5);\
523
194k
    d6 = COMPENSATE(t6);\
524
194k
    d7 = COMPENSATE(t7);\
525
194k
    d8 = COMPENSATE(t8);}
526
527
/** inverse slant4 transform */
528
89.5k
#define IVI_INV_SLANT4(s1, s4, s2, s3, d1, d2, d3, d4, t0, t1, t2, t3, t4) {\
529
89.5k
    IVI_SLANT_BFLY(s1, s2, t1, t2, t0); IVI_IREFLECT  (s4, s3, t4, t3, t0);\
530
89.5k
\
531
89.5k
    IVI_SLANT_BFLY(t1, t4, t1, t4, t0); IVI_SLANT_BFLY(t2, t3, t2, t3, t0);\
532
89.5k
    d1 = COMPENSATE(t1);\
533
89.5k
    d2 = COMPENSATE(t2);\
534
89.5k
    d3 = COMPENSATE(t3);\
535
89.5k
    d4 = COMPENSATE(t4);}
536
537
void ff_ivi_inverse_slant_8x8(const int32_t *in, int16_t *out, ptrdiff_t pitch, const uint8_t *flags)
538
14.2k
{
539
14.2k
    int     i;
540
14.2k
    const int32_t *src;
541
14.2k
    int32_t *dst;
542
14.2k
    int     tmp[64];
543
14.2k
    int     t0, t1, t2, t3, t4, t5, t6, t7, t8;
544
545
221k
#define COMPENSATE(x) (x)
546
14.2k
    src = in;
547
14.2k
    dst = tmp;
548
128k
    for (i = 0; i < 8; i++) {
549
113k
        if (flags[i]) {
550
27.6k
            IVI_INV_SLANT8(src[0], src[8], src[16], src[24], src[32], src[40], src[48], src[56],
551
27.6k
                           dst[0], dst[8], dst[16], dst[24], dst[32], dst[40], dst[48], dst[56],
552
27.6k
                           t0, t1, t2, t3, t4, t5, t6, t7, t8);
553
27.6k
        } else
554
86.1k
            dst[0] = dst[8] = dst[16] = dst[24] = dst[32] = dst[40] = dst[48] = dst[56] = 0;
555
556
113k
        src++;
557
113k
        dst++;
558
113k
    }
559
14.2k
#undef COMPENSATE
560
561
900k
#define COMPENSATE(x) (((x) + 1)>>1)
562
14.2k
    src = tmp;
563
128k
    for (i = 0; i < 8; i++) {
564
113k
        if (!src[0] && !src[1] && !src[2] && !src[3] && !src[4] && !src[5] && !src[6] && !src[7]) {
565
1.27k
            memset(out, 0, 8*sizeof(out[0]));
566
112k
        } else {
567
112k
            IVI_INV_SLANT8(src[0], src[1], src[2], src[3], src[4], src[5], src[6], src[7],
568
112k
                           out[0], out[1], out[2], out[3], out[4], out[5], out[6], out[7],
569
112k
                           t0, t1, t2, t3, t4, t5, t6, t7, t8);
570
112k
        }
571
113k
        src += 8;
572
113k
        out += pitch;
573
113k
    }
574
14.2k
#undef COMPENSATE
575
14.2k
}
576
577
void ff_ivi_inverse_slant_4x4(const int32_t *in, int16_t *out, ptrdiff_t pitch, const uint8_t *flags)
578
17.1k
{
579
17.1k
    int     i;
580
17.1k
    const int32_t *src;
581
17.1k
    int32_t *dst;
582
17.1k
    int     tmp[16];
583
17.1k
    int     t0, t1, t2, t3, t4;
584
585
74.7k
#define COMPENSATE(x) (x)
586
17.1k
    src = in;
587
17.1k
    dst = tmp;
588
85.9k
    for (i = 0; i < 4; i++) {
589
68.7k
        if (flags[i]) {
590
18.6k
            IVI_INV_SLANT4(src[0], src[4], src[8], src[12],
591
18.6k
                           dst[0], dst[4], dst[8], dst[12],
592
18.6k
                           t0, t1, t2, t3, t4);
593
18.6k
        } else
594
50.1k
            dst[0] = dst[4] = dst[8] = dst[12] = 0;
595
596
68.7k
        src++;
597
68.7k
        dst++;
598
68.7k
    }
599
17.1k
#undef COMPENSATE
600
601
270k
#define COMPENSATE(x) (((x) + 1)>>1)
602
17.1k
    src = tmp;
603
85.9k
    for (i = 0; i < 4; i++) {
604
68.7k
        if (!src[0] && !src[1] && !src[2] && !src[3]) {
605
1.23k
            out[0] = out[1] = out[2] = out[3] = 0;
606
67.5k
        } else {
607
67.5k
            IVI_INV_SLANT4(src[0], src[1], src[2], src[3],
608
67.5k
                           out[0], out[1], out[2], out[3],
609
67.5k
                           t0, t1, t2, t3, t4);
610
67.5k
        }
611
68.7k
        src += 4;
612
68.7k
        out += pitch;
613
68.7k
    }
614
17.1k
#undef COMPENSATE
615
17.1k
}
616
617
void ff_ivi_dc_slant_2d(const int32_t *in, int16_t *out, ptrdiff_t pitch, int blk_size)
618
1.62M
{
619
1.62M
    int     x, y;
620
1.62M
    int16_t dc_coeff;
621
622
1.62M
    dc_coeff = (*in + 1) >> 1;
623
624
14.6M
    for (y = 0; y < blk_size; out += pitch, y++) {
625
116M
        for (x = 0; x < blk_size; x++)
626
103M
            out[x] = dc_coeff;
627
12.9M
    }
628
1.62M
}
629
630
void ff_ivi_row_slant8(const int32_t *in, int16_t *out, ptrdiff_t pitch, const uint8_t *flags)
631
12.3k
{
632
12.3k
    int     i;
633
12.3k
    int     t0, t1, t2, t3, t4, t5, t6, t7, t8;
634
635
400k
#define COMPENSATE(x) (((x) + 1)>>1)
636
111k
    for (i = 0; i < 8; i++) {
637
99.1k
        if (!in[0] && !in[1] && !in[2] && !in[3] && !in[4] && !in[5] && !in[6] && !in[7]) {
638
49.0k
            memset(out, 0, 8*sizeof(out[0]));
639
50.1k
        } else {
640
50.1k
            IVI_INV_SLANT8( in[0],  in[1],  in[2],  in[3],  in[4],  in[5],  in[6],  in[7],
641
50.1k
                           out[0], out[1], out[2], out[3], out[4], out[5], out[6], out[7],
642
50.1k
                           t0, t1, t2, t3, t4, t5, t6, t7, t8);
643
50.1k
        }
644
99.1k
        in += 8;
645
99.1k
        out += pitch;
646
99.1k
    }
647
12.3k
#undef COMPENSATE
648
12.3k
}
649
650
void ff_ivi_dc_row_slant(const int32_t *in, int16_t *out, ptrdiff_t pitch, int blk_size)
651
28.3k
{
652
28.3k
    int     x, y;
653
28.3k
    int16_t dc_coeff;
654
655
28.3k
    dc_coeff = (*in + 1) >> 1;
656
657
252k
    for (x = 0; x < blk_size; x++)
658
224k
        out[x] = dc_coeff;
659
660
28.3k
    out += pitch;
661
662
224k
    for (y = 1; y < blk_size; out += pitch, y++) {
663
1.75M
        for (x = 0; x < blk_size; x++)
664
1.56M
            out[x] = 0;
665
195k
    }
666
28.3k
}
667
668
void ff_ivi_col_slant8(const int32_t *in, int16_t *out, ptrdiff_t pitch, const uint8_t *flags)
669
2.58k
{
670
2.58k
    int     i, row2, row4, row8;
671
2.58k
    int     t0, t1, t2, t3, t4, t5, t6, t7, t8;
672
673
2.58k
    row2 = pitch << 1;
674
2.58k
    row4 = pitch << 2;
675
2.58k
    row8 = pitch << 3;
676
677
31.8k
#define COMPENSATE(x) (((x) + 1)>>1)
678
23.2k
    for (i = 0; i < 8; i++) {
679
20.6k
        if (flags[i]) {
680
3.98k
            IVI_INV_SLANT8(in[0], in[8], in[16], in[24], in[32], in[40], in[48], in[56],
681
3.98k
                           out[0], out[pitch], out[row2], out[row2 + pitch], out[row4],
682
3.98k
                           out[row4 + pitch],  out[row4 + row2], out[row8 - pitch],
683
3.98k
                           t0, t1, t2, t3, t4, t5, t6, t7, t8);
684
16.6k
        } else {
685
16.6k
            out[0] = out[pitch] = out[row2] = out[row2 + pitch] = out[row4] =
686
16.6k
            out[row4 + pitch] =  out[row4 + row2] = out[row8 - pitch] = 0;
687
16.6k
        }
688
689
20.6k
        in++;
690
20.6k
        out++;
691
20.6k
    }
692
2.58k
#undef COMPENSATE
693
2.58k
}
694
695
void ff_ivi_dc_col_slant(const int32_t *in, int16_t *out, ptrdiff_t pitch, int blk_size)
696
2.83k
{
697
2.83k
    int     x, y;
698
2.83k
    int16_t dc_coeff;
699
700
2.83k
    dc_coeff = (*in + 1) >> 1;
701
702
23.8k
    for (y = 0; y < blk_size; out += pitch, y++) {
703
21.0k
        out[0] = dc_coeff;
704
161k
        for (x = 1; x < blk_size; x++)
705
140k
            out[x] = 0;
706
21.0k
    }
707
2.83k
}
708
709
void ff_ivi_row_slant4(const int32_t *in, int16_t *out, ptrdiff_t pitch, const uint8_t *flags)
710
1.61k
{
711
1.61k
    int     i;
712
1.61k
    int     t0, t1, t2, t3, t4;
713
714
10.4k
#define COMPENSATE(x) (((x) + 1)>>1)
715
8.05k
    for (i = 0; i < 4; i++) {
716
6.44k
        if (!in[0] && !in[1] && !in[2] && !in[3]) {
717
3.84k
            memset(out, 0, 4*sizeof(out[0]));
718
3.84k
        } else {
719
2.60k
            IVI_INV_SLANT4( in[0],  in[1],  in[2],  in[3],
720
2.60k
                           out[0], out[1], out[2], out[3],
721
2.60k
                           t0, t1, t2, t3, t4);
722
2.60k
        }
723
6.44k
        in  += 4;
724
6.44k
        out += pitch;
725
6.44k
    }
726
1.61k
#undef COMPENSATE
727
1.61k
}
728
729
void ff_ivi_col_slant4(const int32_t *in, int16_t *out, ptrdiff_t pitch, const uint8_t *flags)
730
602
{
731
602
    int     i, row2;
732
602
    int     t0, t1, t2, t3, t4;
733
734
602
    row2 = pitch << 1;
735
736
3.04k
#define COMPENSATE(x) (((x) + 1)>>1)
737
3.01k
    for (i = 0; i < 4; i++) {
738
2.40k
        if (flags[i]) {
739
760
            IVI_INV_SLANT4(in[0], in[4], in[8], in[12],
740
760
                           out[0], out[pitch], out[row2], out[row2 + pitch],
741
760
                           t0, t1, t2, t3, t4);
742
1.64k
        } else {
743
1.64k
            out[0] = out[pitch] = out[row2] = out[row2 + pitch] = 0;
744
1.64k
        }
745
746
2.40k
        in++;
747
2.40k
        out++;
748
2.40k
    }
749
602
#undef COMPENSATE
750
602
}
751
752
void ff_ivi_put_pixels_8x8(const int32_t *in, int16_t *out, ptrdiff_t pitch,
753
                           const uint8_t *flags)
754
1.28k
{
755
1.28k
    int     x, y;
756
757
11.5k
    for (y = 0; y < 8; out += pitch, in += 8, y++)
758
92.3k
        for (x = 0; x < 8; x++)
759
82.1k
            out[x] = in[x];
760
1.28k
}
761
762
void ff_ivi_put_dc_pixel_8x8(const int32_t *in, int16_t *out, ptrdiff_t pitch,
763
                             int blk_size)
764
4.74k
{
765
4.74k
    int     y;
766
767
4.74k
    out[0] = in[0];
768
4.74k
    memset(out + 1, 0, 7*sizeof(out[0]));
769
4.74k
    out += pitch;
770
771
37.9k
    for (y = 1; y < 8; out += pitch, y++)
772
33.2k
        memset(out, 0, 8*sizeof(out[0]));
773
4.74k
}
774
775
#define IVI_MC_TEMPLATE(size, suffix, OP) \
776
static void ivi_mc_ ## size ##x## size ## suffix(int16_t *buf, \
777
                                                 ptrdiff_t dpitch, \
778
                                                 const int16_t *ref_buf, \
779
1.95M
                                                 ptrdiff_t pitch, int mc_type) \
780
1.95M
{ \
781
1.95M
    int     i, j; \
782
1.95M
    const int16_t *wptr; \
783
1.95M
\
784
1.95M
    switch (mc_type) { \
785
1.88M
    case 0: /* fullpel (no interpolation) */ \
786
11.3M
        for (i = 0; i < size; i++, buf += dpitch, ref_buf += pitch) { \
787
62.0M
            for (j = 0; j < size; j++) {\
788
52.5M
                OP(buf[j], ref_buf[j]); \
789
52.5M
            } \
790
9.41M
        } \
791
1.88M
        break; \
792
20.9k
    case 1: /* horizontal halfpel interpolation */ \
793
156k
        for (i = 0; i < size; i++, buf += dpitch, ref_buf += pitch) \
794
1.09M
            for (j = 0; j < size; j++) \
795
963k
                OP(buf[j], (ref_buf[j] + ref_buf[j+1]) >> 1); \
796
20.9k
        break; \
797
24.0k
    case 2: /* vertical halfpel interpolation */ \
798
24.0k
        wptr = ref_buf + pitch; \
799
188k
        for (i = 0; i < size; i++, buf += dpitch, wptr += pitch, ref_buf += pitch) \
800
1.36M
            for (j = 0; j < size; j++) \
801
1.20M
                OP(buf[j], (ref_buf[j] + wptr[j]) >> 1); \
802
24.0k
        break; \
803
22.4k
    case 3: /* vertical and horizontal halfpel interpolation */ \
804
22.4k
        wptr = ref_buf + pitch; \
805
173k
        for (i = 0; i < size; i++, buf += dpitch, wptr += pitch, ref_buf += pitch) \
806
1.24M
            for (j = 0; j < size; j++) \
807
1.09M
                OP(buf[j], (ref_buf[j] + ref_buf[j+1] + wptr[j] + wptr[j+1]) >> 2); \
808
22.4k
        break; \
809
1.95M
    } \
810
1.95M
} \
ivi_dsp.c:ivi_mc_8x8_no_delta
Line
Count
Source
779
452k
                                                 ptrdiff_t pitch, int mc_type) \
780
452k
{ \
781
452k
    int     i, j; \
782
452k
    const int16_t *wptr; \
783
452k
\
784
452k
    switch (mc_type) { \
785
422k
    case 0: /* fullpel (no interpolation) */ \
786
3.80M
        for (i = 0; i < size; i++, buf += dpitch, ref_buf += pitch) { \
787
30.4M
            for (j = 0; j < size; j++) {\
788
27.0M
                OP(buf[j], ref_buf[j]); \
789
27.0M
            } \
790
3.37M
        } \
791
422k
        break; \
792
10.3k
    case 1: /* horizontal halfpel interpolation */ \
793
93.1k
        for (i = 0; i < size; i++, buf += dpitch, ref_buf += pitch) \
794
745k
            for (j = 0; j < size; j++) \
795
662k
                OP(buf[j], (ref_buf[j] + ref_buf[j+1]) >> 1); \
796
10.3k
        break; \
797
11.4k
    case 2: /* vertical halfpel interpolation */ \
798
11.4k
        wptr = ref_buf + pitch; \
799
102k
        for (i = 0; i < size; i++, buf += dpitch, wptr += pitch, ref_buf += pitch) \
800
822k
            for (j = 0; j < size; j++) \
801
730k
                OP(buf[j], (ref_buf[j] + wptr[j]) >> 1); \
802
11.4k
        break; \
803
5.69k
    case 3: /* vertical and horizontal halfpel interpolation */ \
804
5.69k
        wptr = ref_buf + pitch; \
805
51.2k
        for (i = 0; i < size; i++, buf += dpitch, wptr += pitch, ref_buf += pitch) \
806
410k
            for (j = 0; j < size; j++) \
807
364k
                OP(buf[j], (ref_buf[j] + ref_buf[j+1] + wptr[j] + wptr[j+1]) >> 2); \
808
5.69k
        break; \
809
452k
    } \
810
452k
} \
ivi_dsp.c:ivi_mc_8x8_delta
Line
Count
Source
779
62.9k
                                                 ptrdiff_t pitch, int mc_type) \
780
62.9k
{ \
781
62.9k
    int     i, j; \
782
62.9k
    const int16_t *wptr; \
783
62.9k
\
784
62.9k
    switch (mc_type) { \
785
44.4k
    case 0: /* fullpel (no interpolation) */ \
786
400k
        for (i = 0; i < size; i++, buf += dpitch, ref_buf += pitch) { \
787
3.20M
            for (j = 0; j < size; j++) {\
788
2.84M
                OP(buf[j], ref_buf[j]); \
789
2.84M
            } \
790
355k
        } \
791
44.4k
        break; \
792
2.76k
    case 1: /* horizontal halfpel interpolation */ \
793
24.8k
        for (i = 0; i < size; i++, buf += dpitch, ref_buf += pitch) \
794
199k
            for (j = 0; j < size; j++) \
795
176k
                OP(buf[j], (ref_buf[j] + ref_buf[j+1]) >> 1); \
796
2.76k
        break; \
797
5.61k
    case 2: /* vertical halfpel interpolation */ \
798
5.61k
        wptr = ref_buf + pitch; \
799
50.5k
        for (i = 0; i < size; i++, buf += dpitch, wptr += pitch, ref_buf += pitch) \
800
404k
            for (j = 0; j < size; j++) \
801
359k
                OP(buf[j], (ref_buf[j] + wptr[j]) >> 1); \
802
5.61k
        break; \
803
9.61k
    case 3: /* vertical and horizontal halfpel interpolation */ \
804
9.61k
        wptr = ref_buf + pitch; \
805
86.5k
        for (i = 0; i < size; i++, buf += dpitch, wptr += pitch, ref_buf += pitch) \
806
692k
            for (j = 0; j < size; j++) \
807
615k
                OP(buf[j], (ref_buf[j] + ref_buf[j+1] + wptr[j] + wptr[j+1]) >> 2); \
808
9.61k
        break; \
809
62.9k
    } \
810
62.9k
} \
ivi_dsp.c:ivi_mc_4x4_no_delta
Line
Count
Source
779
1.41M
                                                 ptrdiff_t pitch, int mc_type) \
780
1.41M
{ \
781
1.41M
    int     i, j; \
782
1.41M
    const int16_t *wptr; \
783
1.41M
\
784
1.41M
    switch (mc_type) { \
785
1.40M
    case 0: /* fullpel (no interpolation) */ \
786
7.00M
        for (i = 0; i < size; i++, buf += dpitch, ref_buf += pitch) { \
787
28.0M
            for (j = 0; j < size; j++) {\
788
22.4M
                OP(buf[j], ref_buf[j]); \
789
22.4M
            } \
790
5.60M
        } \
791
1.40M
        break; \
792
5.07k
    case 1: /* horizontal halfpel interpolation */ \
793
25.3k
        for (i = 0; i < size; i++, buf += dpitch, ref_buf += pitch) \
794
101k
            for (j = 0; j < size; j++) \
795
81.2k
                OP(buf[j], (ref_buf[j] + ref_buf[j+1]) >> 1); \
796
5.07k
        break; \
797
5.05k
    case 2: /* vertical halfpel interpolation */ \
798
5.05k
        wptr = ref_buf + pitch; \
799
25.2k
        for (i = 0; i < size; i++, buf += dpitch, wptr += pitch, ref_buf += pitch) \
800
101k
            for (j = 0; j < size; j++) \
801
80.8k
                OP(buf[j], (ref_buf[j] + wptr[j]) >> 1); \
802
5.05k
        break; \
803
4.57k
    case 3: /* vertical and horizontal halfpel interpolation */ \
804
4.57k
        wptr = ref_buf + pitch; \
805
22.8k
        for (i = 0; i < size; i++, buf += dpitch, wptr += pitch, ref_buf += pitch) \
806
91.4k
            for (j = 0; j < size; j++) \
807
73.1k
                OP(buf[j], (ref_buf[j] + ref_buf[j+1] + wptr[j] + wptr[j+1]) >> 2); \
808
4.57k
        break; \
809
1.41M
    } \
810
1.41M
} \
ivi_dsp.c:ivi_mc_4x4_delta
Line
Count
Source
779
26.6k
                                                 ptrdiff_t pitch, int mc_type) \
780
26.6k
{ \
781
26.6k
    int     i, j; \
782
26.6k
    const int16_t *wptr; \
783
26.6k
\
784
26.6k
    switch (mc_type) { \
785
19.1k
    case 0: /* fullpel (no interpolation) */ \
786
95.9k
        for (i = 0; i < size; i++, buf += dpitch, ref_buf += pitch) { \
787
383k
            for (j = 0; j < size; j++) {\
788
307k
                OP(buf[j], ref_buf[j]); \
789
307k
            } \
790
76.7k
        } \
791
19.1k
        break; \
792
2.71k
    case 1: /* horizontal halfpel interpolation */ \
793
13.5k
        for (i = 0; i < size; i++, buf += dpitch, ref_buf += pitch) \
794
54.2k
            for (j = 0; j < size; j++) \
795
43.4k
                OP(buf[j], (ref_buf[j] + ref_buf[j+1]) >> 1); \
796
2.71k
        break; \
797
1.99k
    case 2: /* vertical halfpel interpolation */ \
798
1.99k
        wptr = ref_buf + pitch; \
799
9.95k
        for (i = 0; i < size; i++, buf += dpitch, wptr += pitch, ref_buf += pitch) \
800
39.8k
            for (j = 0; j < size; j++) \
801
31.8k
                OP(buf[j], (ref_buf[j] + wptr[j]) >> 1); \
802
1.99k
        break; \
803
2.52k
    case 3: /* vertical and horizontal halfpel interpolation */ \
804
2.52k
        wptr = ref_buf + pitch; \
805
12.6k
        for (i = 0; i < size; i++, buf += dpitch, wptr += pitch, ref_buf += pitch) \
806
50.4k
            for (j = 0; j < size; j++) \
807
40.3k
                OP(buf[j], (ref_buf[j] + ref_buf[j+1] + wptr[j] + wptr[j+1]) >> 2); \
808
2.52k
        break; \
809
26.6k
    } \
810
26.6k
} \
811
\
812
void ff_ivi_mc_ ## size ##x## size ## suffix(int16_t *buf, const int16_t *ref_buf, \
813
1.90M
                                             ptrdiff_t pitch, int mc_type) \
814
1.90M
{ \
815
1.90M
    ivi_mc_ ## size ##x## size ## suffix(buf, pitch, ref_buf, pitch, mc_type); \
816
1.90M
} \
ff_ivi_mc_8x8_no_delta
Line
Count
Source
813
431k
                                             ptrdiff_t pitch, int mc_type) \
814
431k
{ \
815
431k
    ivi_mc_ ## size ##x## size ## suffix(buf, pitch, ref_buf, pitch, mc_type); \
816
431k
} \
ff_ivi_mc_8x8_delta
Line
Count
Source
813
41.6k
                                             ptrdiff_t pitch, int mc_type) \
814
41.6k
{ \
815
41.6k
    ivi_mc_ ## size ##x## size ## suffix(buf, pitch, ref_buf, pitch, mc_type); \
816
41.6k
} \
ff_ivi_mc_4x4_no_delta
Line
Count
Source
813
1.41M
                                             ptrdiff_t pitch, int mc_type) \
814
1.41M
{ \
815
1.41M
    ivi_mc_ ## size ##x## size ## suffix(buf, pitch, ref_buf, pitch, mc_type); \
816
1.41M
} \
ff_ivi_mc_4x4_delta
Line
Count
Source
813
20.5k
                                             ptrdiff_t pitch, int mc_type) \
814
20.5k
{ \
815
20.5k
    ivi_mc_ ## size ##x## size ## suffix(buf, pitch, ref_buf, pitch, mc_type); \
816
20.5k
} \
817
818
#define IVI_MC_AVG_TEMPLATE(size, suffix, OP) \
819
void ff_ivi_mc_avg_ ## size ##x## size ## suffix(int16_t *buf, \
820
                                                 const int16_t *ref_buf, \
821
                                                 const int16_t *ref_buf2, \
822
                                                 ptrdiff_t pitch, \
823
27.3k
                                                 int mc_type, int mc_type2) \
824
27.3k
{ \
825
27.3k
    int16_t tmp[size * size]; \
826
27.3k
    int i, j; \
827
27.3k
\
828
27.3k
    ivi_mc_ ## size ##x## size ## _no_delta(tmp, size, ref_buf, pitch, mc_type); \
829
27.3k
    ivi_mc_ ## size ##x## size ## _delta(tmp, size, ref_buf2, pitch, mc_type2); \
830
221k
    for (i = 0; i < size; i++, buf += pitch) { \
831
1.65M
        for (j = 0; j < size; j++) {\
832
1.45M
            OP(buf[j], tmp[i * size + j] >> 1); \
833
1.45M
        } \
834
194k
    } \
835
27.3k
} \
ff_ivi_mc_avg_8x8_no_delta
Line
Count
Source
823
8.85k
                                                 int mc_type, int mc_type2) \
824
8.85k
{ \
825
8.85k
    int16_t tmp[size * size]; \
826
8.85k
    int i, j; \
827
8.85k
\
828
8.85k
    ivi_mc_ ## size ##x## size ## _no_delta(tmp, size, ref_buf, pitch, mc_type); \
829
8.85k
    ivi_mc_ ## size ##x## size ## _delta(tmp, size, ref_buf2, pitch, mc_type2); \
830
79.7k
    for (i = 0; i < size; i++, buf += pitch) { \
831
637k
        for (j = 0; j < size; j++) {\
832
566k
            OP(buf[j], tmp[i * size + j] >> 1); \
833
566k
        } \
834
70.8k
    } \
835
8.85k
} \
ff_ivi_mc_avg_8x8_delta
Line
Count
Source
823
12.4k
                                                 int mc_type, int mc_type2) \
824
12.4k
{ \
825
12.4k
    int16_t tmp[size * size]; \
826
12.4k
    int i, j; \
827
12.4k
\
828
12.4k
    ivi_mc_ ## size ##x## size ## _no_delta(tmp, size, ref_buf, pitch, mc_type); \
829
12.4k
    ivi_mc_ ## size ##x## size ## _delta(tmp, size, ref_buf2, pitch, mc_type2); \
830
111k
    for (i = 0; i < size; i++, buf += pitch) { \
831
895k
        for (j = 0; j < size; j++) {\
832
795k
            OP(buf[j], tmp[i * size + j] >> 1); \
833
795k
        } \
834
99.4k
    } \
835
12.4k
} \
ff_ivi_mc_avg_4x4_no_delta
Line
Count
Source
823
3.64k
                                                 int mc_type, int mc_type2) \
824
3.64k
{ \
825
3.64k
    int16_t tmp[size * size]; \
826
3.64k
    int i, j; \
827
3.64k
\
828
3.64k
    ivi_mc_ ## size ##x## size ## _no_delta(tmp, size, ref_buf, pitch, mc_type); \
829
3.64k
    ivi_mc_ ## size ##x## size ## _delta(tmp, size, ref_buf2, pitch, mc_type2); \
830
18.2k
    for (i = 0; i < size; i++, buf += pitch) { \
831
72.9k
        for (j = 0; j < size; j++) {\
832
58.3k
            OP(buf[j], tmp[i * size + j] >> 1); \
833
58.3k
        } \
834
14.5k
    } \
835
3.64k
} \
ff_ivi_mc_avg_4x4_delta
Line
Count
Source
823
2.41k
                                                 int mc_type, int mc_type2) \
824
2.41k
{ \
825
2.41k
    int16_t tmp[size * size]; \
826
2.41k
    int i, j; \
827
2.41k
\
828
2.41k
    ivi_mc_ ## size ##x## size ## _no_delta(tmp, size, ref_buf, pitch, mc_type); \
829
2.41k
    ivi_mc_ ## size ##x## size ## _delta(tmp, size, ref_buf2, pitch, mc_type2); \
830
12.0k
    for (i = 0; i < size; i++, buf += pitch) { \
831
48.2k
        for (j = 0; j < size; j++) {\
832
38.6k
            OP(buf[j], tmp[i * size + j] >> 1); \
833
38.6k
        } \
834
9.65k
    } \
835
2.41k
} \
836
837
52.1M
#define OP_PUT(a, b)  (a) = (b)
838
5.27M
#define OP_ADD(a, b)  (a) += (b)
839
840
28.7M
IVI_MC_TEMPLATE(8, _no_delta, OP_PUT)
841
3.99M
IVI_MC_TEMPLATE(8, _delta,    OP_ADD)
842
22.6M
IVI_MC_TEMPLATE(4, _no_delta, OP_PUT)
843
422k
IVI_MC_TEMPLATE(4, _delta,    OP_ADD)
844
566k
IVI_MC_AVG_TEMPLATE(8, _no_delta, OP_PUT)
845
795k
IVI_MC_AVG_TEMPLATE(8, _delta,    OP_ADD)
846
58.3k
IVI_MC_AVG_TEMPLATE(4, _no_delta, OP_PUT)
847
38.6k
IVI_MC_AVG_TEMPLATE(4, _delta,    OP_ADD)