Coverage Report

Created: 2024-09-06 07:53

/src/ffmpeg/libavcodec/x86/hpeldsp_init.c
Line
Count
Source (jump to first uncovered line)
1
/*
2
 * SIMD-optimized halfpel functions
3
 * Copyright (c) 2000, 2001 Fabrice Bellard
4
 * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
5
 *
6
 * This file is part of FFmpeg.
7
 *
8
 * FFmpeg is free software; you can redistribute it and/or
9
 * modify it under the terms of the GNU Lesser General Public
10
 * License as published by the Free Software Foundation; either
11
 * version 2.1 of the License, or (at your option) any later version.
12
 *
13
 * FFmpeg is distributed in the hope that it will be useful,
14
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
16
 * Lesser General Public License for more details.
17
 *
18
 * You should have received a copy of the GNU Lesser General Public
19
 * License along with FFmpeg; if not, write to the Free Software
20
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
21
 *
22
 * MMX optimization by Nick Kurshev <nickols_k@mail.ru>
23
 */
24
25
#include "libavutil/attributes.h"
26
#include "libavutil/cpu.h"
27
#include "libavutil/x86/cpu.h"
28
#include "libavcodec/avcodec.h"
29
#include "libavcodec/hpeldsp.h"
30
#include "libavcodec/pixels.h"
31
#include "fpel.h"
32
#include "hpeldsp.h"
33
34
void ff_put_pixels8_x2_mmxext(uint8_t *block, const uint8_t *pixels,
35
                              ptrdiff_t line_size, int h);
36
void ff_put_pixels16_x2_mmxext(uint8_t *block, const uint8_t *pixels,
37
                               ptrdiff_t line_size, int h);
38
void ff_put_pixels16_x2_sse2(uint8_t *block, const uint8_t *pixels,
39
                             ptrdiff_t line_size, int h);
40
void ff_avg_pixels16_x2_sse2(uint8_t *block, const uint8_t *pixels,
41
                             ptrdiff_t line_size, int h);
42
void ff_put_pixels16_y2_sse2(uint8_t *block, const uint8_t *pixels,
43
                             ptrdiff_t line_size, int h);
44
void ff_avg_pixels16_y2_sse2(uint8_t *block, const uint8_t *pixels,
45
                             ptrdiff_t line_size, int h);
46
void ff_put_no_rnd_pixels8_x2_mmxext(uint8_t *block, const uint8_t *pixels,
47
                                     ptrdiff_t line_size, int h);
48
void ff_put_no_rnd_pixels8_x2_exact_mmxext(uint8_t *block,
49
                                           const uint8_t *pixels,
50
                                           ptrdiff_t line_size, int h);
51
void ff_put_pixels8_y2_mmxext(uint8_t *block, const uint8_t *pixels,
52
                              ptrdiff_t line_size, int h);
53
void ff_put_no_rnd_pixels8_y2_mmxext(uint8_t *block, const uint8_t *pixels,
54
                                     ptrdiff_t line_size, int h);
55
void ff_put_no_rnd_pixels8_y2_exact_mmxext(uint8_t *block,
56
                                           const uint8_t *pixels,
57
                                           ptrdiff_t line_size, int h);
58
void ff_avg_pixels8_x2_mmxext(uint8_t *block, const uint8_t *pixels,
59
                              ptrdiff_t line_size, int h);
60
void ff_avg_pixels8_y2_mmxext(uint8_t *block, const uint8_t *pixels,
61
                              ptrdiff_t line_size, int h);
62
void ff_avg_approx_pixels8_xy2_mmxext(uint8_t *block, const uint8_t *pixels,
63
                                      ptrdiff_t line_size, int h);
64
65
51.2k
#define put_pixels8_mmx         ff_put_pixels8_mmx
66
51.2k
#define put_pixels16_mmx        ff_put_pixels16_mmx
67
51.2k
#define put_pixels8_xy2_mmx     ff_put_pixels8_xy2_mmx
68
51.2k
#define put_no_rnd_pixels8_mmx  ff_put_pixels8_mmx
69
51.2k
#define put_no_rnd_pixels16_mmx ff_put_pixels16_mmx
70
71
#if HAVE_INLINE_ASM
72
73
/***********************************/
74
/* MMX no rounding */
75
#define DEF(x, y) x ## _no_rnd_ ## y ## _mmx
76
0
#define SET_RND  MOVQ_WONE
77
#define PAVGBP(a, b, c, d, e, f)        PAVGBP_MMX_NO_RND(a, b, c, d, e, f)
78
#define PAVGB(a, b, c, e)               PAVGB_MMX_NO_RND(a, b, c, e)
79
#define STATIC static
80
81
#include "rnd_template.c"
82
#include "hpeldsp_rnd_template.c"
83
84
#undef DEF
85
#undef SET_RND
86
#undef PAVGBP
87
#undef PAVGB
88
#undef STATIC
89
90
#if HAVE_MMX
91
CALL_2X_PIXELS(avg_no_rnd_pixels16_y2_mmx, avg_no_rnd_pixels8_y2_mmx, 8)
92
CALL_2X_PIXELS(put_no_rnd_pixels16_y2_mmx, put_no_rnd_pixels8_y2_mmx, 8)
93
94
CALL_2X_PIXELS(avg_no_rnd_pixels16_xy2_mmx, avg_no_rnd_pixels8_xy2_mmx, 8)
95
CALL_2X_PIXELS(put_no_rnd_pixels16_xy2_mmx, put_no_rnd_pixels8_xy2_mmx, 8)
96
#endif
97
98
/***********************************/
99
/* MMX rounding */
100
101
0
#define SET_RND  MOVQ_WTWO
102
#define DEF(x, y) ff_ ## x ## _ ## y ## _mmx
103
#define STATIC
104
#define NO_AVG
105
106
#include "rnd_template.c"
107
108
#undef NO_AVG
109
#undef DEF
110
#undef SET_RND
111
112
#if HAVE_MMX
113
CALL_2X_PIXELS(put_pixels16_xy2_mmx, ff_put_pixels8_xy2_mmx, 8)
114
#endif
115
116
#endif /* HAVE_INLINE_ASM */
117
118
119
#if HAVE_X86ASM
120
121
#define HPELDSP_AVG_PIXELS16(CPUEXT)                      \
122
    CALL_2X_PIXELS(put_no_rnd_pixels16_x2 ## CPUEXT, ff_put_no_rnd_pixels8_x2 ## CPUEXT, 8) \
123
    CALL_2X_PIXELS(put_pixels16_y2        ## CPUEXT, ff_put_pixels8_y2        ## CPUEXT, 8) \
124
    CALL_2X_PIXELS(put_no_rnd_pixels16_y2 ## CPUEXT, ff_put_no_rnd_pixels8_y2 ## CPUEXT, 8) \
125
    CALL_2X_PIXELS(avg_pixels16           ## CPUEXT, ff_avg_pixels8           ## CPUEXT, 8) \
126
    CALL_2X_PIXELS(avg_pixels16_x2        ## CPUEXT, ff_avg_pixels8_x2        ## CPUEXT, 8) \
127
    CALL_2X_PIXELS(avg_pixels16_y2        ## CPUEXT, ff_avg_pixels8_y2        ## CPUEXT, 8) \
128
    CALL_2X_PIXELS(avg_pixels16_xy2       ## CPUEXT, ff_avg_pixels8_xy2       ## CPUEXT, 8) \
129
    CALL_2X_PIXELS(avg_approx_pixels16_xy2## CPUEXT, ff_avg_approx_pixels8_xy2## CPUEXT, 8)
130
131
HPELDSP_AVG_PIXELS16(_mmxext)
132
133
#endif /* HAVE_X86ASM */
134
135
#define SET_HPEL_FUNCS_EXT(PFX, IDX, SIZE, CPU)                             \
136
205k
    if (HAVE_MMX_EXTERNAL)                                                  \
137
205k
        c->PFX ## _pixels_tab IDX [0] = PFX ## _pixels ## SIZE ## _ ## CPU
138
139
#define SET_HPEL_FUNCS03(PFX, IDX, SIZE, CPU)                                   \
140
205k
    do {                                                                        \
141
205k
        SET_HPEL_FUNCS_EXT(PFX, IDX, SIZE, CPU);                                \
142
205k
        c->PFX ## _pixels_tab IDX [3] = PFX ## _pixels ## SIZE ## _xy2_ ## CPU; \
143
205k
    } while (0)
144
#define SET_HPEL_FUNCS12(PFX, IDX, SIZE, CPU)                                   \
145
153k
    do {                                                                        \
146
153k
        c->PFX ## _pixels_tab IDX [1] = PFX ## _pixels ## SIZE ## _x2_  ## CPU; \
147
153k
        c->PFX ## _pixels_tab IDX [2] = PFX ## _pixels ## SIZE ## _y2_  ## CPU; \
148
153k
    } while (0)
149
#define SET_HPEL_FUNCS(PFX, IDX, SIZE, CPU)                                     \
150
102k
    do {                                                                        \
151
102k
        SET_HPEL_FUNCS03(PFX, IDX, SIZE, CPU);                                  \
152
102k
        SET_HPEL_FUNCS12(PFX, IDX, SIZE, CPU);                                  \
153
102k
    } while (0)
154
155
static void hpeldsp_init_mmx(HpelDSPContext *c, int flags)
156
51.2k
{
157
51.2k
#if HAVE_MMX_INLINE
158
51.2k
    SET_HPEL_FUNCS03(put,      [0], 16, mmx);
159
51.2k
    SET_HPEL_FUNCS(put_no_rnd, [0], 16, mmx);
160
51.2k
    SET_HPEL_FUNCS12(avg_no_rnd,  , 16, mmx);
161
51.2k
    c->avg_no_rnd_pixels_tab[3] = avg_no_rnd_pixels16_xy2_mmx;
162
51.2k
    SET_HPEL_FUNCS03(put,      [1],  8, mmx);
163
51.2k
    SET_HPEL_FUNCS(put_no_rnd, [1],  8, mmx);
164
51.2k
#endif
165
51.2k
}
166
167
static void hpeldsp_init_mmxext(HpelDSPContext *c, int flags)
168
51.2k
{
169
51.2k
#if HAVE_MMXEXT_EXTERNAL
170
51.2k
    c->put_pixels_tab[0][1] = ff_put_pixels16_x2_mmxext;
171
51.2k
    c->put_pixels_tab[0][2] = put_pixels16_y2_mmxext;
172
173
51.2k
    c->avg_pixels_tab[0][0] = avg_pixels16_mmxext;
174
51.2k
    c->avg_pixels_tab[0][1] = avg_pixels16_x2_mmxext;
175
51.2k
    c->avg_pixels_tab[0][2] = avg_pixels16_y2_mmxext;
176
51.2k
    c->avg_pixels_tab[0][3] = avg_pixels16_xy2_mmxext;
177
178
51.2k
    c->put_pixels_tab[1][1] = ff_put_pixels8_x2_mmxext;
179
51.2k
    c->put_pixels_tab[1][2] = ff_put_pixels8_y2_mmxext;
180
181
51.2k
    c->avg_pixels_tab[1][0] = ff_avg_pixels8_mmxext;
182
51.2k
    c->avg_pixels_tab[1][1] = ff_avg_pixels8_x2_mmxext;
183
51.2k
    c->avg_pixels_tab[1][2] = ff_avg_pixels8_y2_mmxext;
184
51.2k
    c->avg_pixels_tab[1][3] = ff_avg_pixels8_xy2_mmxext;
185
186
51.2k
    c->put_no_rnd_pixels_tab[1][1] = ff_put_no_rnd_pixels8_x2_exact_mmxext;
187
51.2k
    c->put_no_rnd_pixels_tab[1][2] = ff_put_no_rnd_pixels8_y2_exact_mmxext;
188
189
51.2k
    c->avg_no_rnd_pixels_tab[0] = ff_avg_pixels16_mmxext;
190
191
51.2k
    if (!(flags & AV_CODEC_FLAG_BITEXACT)) {
192
51.2k
        c->put_no_rnd_pixels_tab[0][1] = put_no_rnd_pixels16_x2_mmxext;
193
51.2k
        c->put_no_rnd_pixels_tab[0][2] = put_no_rnd_pixels16_y2_mmxext;
194
51.2k
        c->put_no_rnd_pixels_tab[1][1] = ff_put_no_rnd_pixels8_x2_mmxext;
195
51.2k
        c->put_no_rnd_pixels_tab[1][2] = ff_put_no_rnd_pixels8_y2_mmxext;
196
197
51.2k
        c->avg_pixels_tab[0][3] = avg_approx_pixels16_xy2_mmxext;
198
51.2k
        c->avg_pixels_tab[1][3] = ff_avg_approx_pixels8_xy2_mmxext;
199
51.2k
    }
200
51.2k
#endif /* HAVE_MMXEXT_EXTERNAL */
201
51.2k
}
202
203
static void hpeldsp_init_sse2_fast(HpelDSPContext *c, int flags)
204
51.2k
{
205
51.2k
#if HAVE_SSE2_EXTERNAL
206
51.2k
    c->put_pixels_tab[0][0]        = ff_put_pixels16_sse2;
207
51.2k
    c->put_no_rnd_pixels_tab[0][0] = ff_put_pixels16_sse2;
208
51.2k
    c->put_pixels_tab[0][1]        = ff_put_pixels16_x2_sse2;
209
51.2k
    c->put_pixels_tab[0][2]        = ff_put_pixels16_y2_sse2;
210
51.2k
    c->put_pixels_tab[0][3]        = ff_put_pixels16_xy2_sse2;
211
51.2k
    c->avg_pixels_tab[0][0]        = ff_avg_pixels16_sse2;
212
51.2k
    c->avg_pixels_tab[0][1]        = ff_avg_pixels16_x2_sse2;
213
51.2k
    c->avg_pixels_tab[0][2]        = ff_avg_pixels16_y2_sse2;
214
51.2k
    c->avg_pixels_tab[0][3]        = ff_avg_pixels16_xy2_sse2;
215
51.2k
    c->avg_no_rnd_pixels_tab[0]    = ff_avg_pixels16_sse2;
216
51.2k
#endif /* HAVE_SSE2_EXTERNAL */
217
51.2k
}
218
219
static void hpeldsp_init_ssse3(HpelDSPContext *c, int flags)
220
51.2k
{
221
51.2k
#if HAVE_SSSE3_EXTERNAL
222
51.2k
    c->put_pixels_tab[0][3]            = ff_put_pixels16_xy2_ssse3;
223
51.2k
    c->avg_pixels_tab[0][3]            = ff_avg_pixels16_xy2_ssse3;
224
51.2k
    c->put_pixels_tab[1][3]            = ff_put_pixels8_xy2_ssse3;
225
51.2k
    c->avg_pixels_tab[1][3]            = ff_avg_pixels8_xy2_ssse3;
226
51.2k
#endif
227
51.2k
}
228
229
av_cold void ff_hpeldsp_init_x86(HpelDSPContext *c, int flags)
230
197k
{
231
197k
    int cpu_flags = av_get_cpu_flags();
232
233
197k
    if (INLINE_MMX(cpu_flags))
234
51.2k
        hpeldsp_init_mmx(c, flags);
235
236
197k
    if (EXTERNAL_MMXEXT(cpu_flags))
237
51.2k
        hpeldsp_init_mmxext(c, flags);
238
239
197k
    if (EXTERNAL_SSE2_FAST(cpu_flags))
240
51.2k
        hpeldsp_init_sse2_fast(c, flags);
241
242
197k
    if (EXTERNAL_SSSE3(cpu_flags))
243
51.2k
        hpeldsp_init_ssse3(c, flags);
244
197k
}