Coverage Report

Created: 2025-08-28 07:12

/src/ffmpeg/libavcodec/idctdsp.c
Line
Count
Source (jump to first uncovered line)
1
/*
2
 * This file is part of FFmpeg.
3
 *
4
 * FFmpeg is free software; you can redistribute it and/or
5
 * modify it under the terms of the GNU Lesser General Public
6
 * License as published by the Free Software Foundation; either
7
 * version 2.1 of the License, or (at your option) any later version.
8
 *
9
 * FFmpeg is distributed in the hope that it will be useful,
10
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
12
 * Lesser General Public License for more details.
13
 *
14
 * You should have received a copy of the GNU Lesser General Public
15
 * License along with FFmpeg; if not, write to the Free Software
16
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17
 */
18
19
#include "config.h"
20
#include "config_components.h"
21
#include "libavutil/attributes.h"
22
#include "libavutil/common.h"
23
#include "avcodec.h"
24
#include "dct.h"
25
#include "faanidct.h"
26
#include "idctdsp.h"
27
#include "simple_idct.h"
28
#include "xvididct.h"
29
30
av_cold void ff_permute_scantable(uint8_t dst[64], const uint8_t src[64],
31
                                  const uint8_t permutation[64])
32
1.80M
{
33
117M
    for (int i = 0; i < 64; i++) {
34
115M
        int j = src[i];
35
115M
        dst[i] = permutation[j];
36
115M
    }
37
1.80M
}
38
39
av_cold void ff_init_scantable_permutation(uint8_t *idct_permutation,
40
                                           enum idct_permutation_type perm_type)
41
568k
{
42
568k
    int i;
43
44
568k
#if ARCH_X86
45
568k
    if (ff_init_scantable_permutation_x86(idct_permutation,
46
568k
                                          perm_type))
47
1.18k
        return;
48
567k
#endif
49
50
567k
    switch (perm_type) {
51
450k
    case FF_IDCT_PERM_NONE:
52
29.2M
        for (i = 0; i < 64; i++)
53
28.8M
            idct_permutation[i] = i;
54
450k
        break;
55
49.6k
    case FF_IDCT_PERM_LIBMPEG2:
56
3.22M
        for (i = 0; i < 64; i++)
57
3.17M
            idct_permutation[i] = (i & 0x38) | ((i & 6) >> 1) | ((i & 1) << 2);
58
49.6k
        break;
59
67.7k
    case FF_IDCT_PERM_TRANSPOSE:
60
4.40M
        for (i = 0; i < 64; i++)
61
4.33M
            idct_permutation[i] = ((i & 7) << 3) | (i >> 3);
62
67.7k
        break;
63
0
    case FF_IDCT_PERM_PARTTRANS:
64
0
        for (i = 0; i < 64; i++)
65
0
            idct_permutation[i] = (i & 0x24) | ((i & 3) << 3) | ((i >> 3) & 3);
66
0
        break;
67
0
    default:
68
0
        av_log(NULL, AV_LOG_ERROR,
69
0
               "Internal error, IDCT permutation not set\n");
70
567k
    }
71
567k
}
72
73
void ff_put_pixels_clamped_c(const int16_t *block, uint8_t *restrict pixels,
74
                             ptrdiff_t line_size)
75
92.8M
{
76
92.8M
    int i;
77
78
    /* read the pixels */
79
835M
    for (i = 0; i < 8; i++) {
80
742M
        pixels[0] = av_clip_uint8(block[0]);
81
742M
        pixels[1] = av_clip_uint8(block[1]);
82
742M
        pixels[2] = av_clip_uint8(block[2]);
83
742M
        pixels[3] = av_clip_uint8(block[3]);
84
742M
        pixels[4] = av_clip_uint8(block[4]);
85
742M
        pixels[5] = av_clip_uint8(block[5]);
86
742M
        pixels[6] = av_clip_uint8(block[6]);
87
742M
        pixels[7] = av_clip_uint8(block[7]);
88
89
742M
        pixels += line_size;
90
742M
        block  += 8;
91
742M
    }
92
92.8M
}
93
94
static void put_pixels_clamped4_c(const int16_t *block, uint8_t *restrict pixels,
95
                                 int line_size)
96
11.3M
{
97
11.3M
    int i;
98
99
    /* read the pixels */
100
56.7M
    for(i=0;i<4;i++) {
101
45.4M
        pixels[0] = av_clip_uint8(block[0]);
102
45.4M
        pixels[1] = av_clip_uint8(block[1]);
103
45.4M
        pixels[2] = av_clip_uint8(block[2]);
104
45.4M
        pixels[3] = av_clip_uint8(block[3]);
105
106
45.4M
        pixels += line_size;
107
45.4M
        block += 8;
108
45.4M
    }
109
11.3M
}
110
111
static void put_pixels_clamped2_c(const int16_t *block, uint8_t *restrict pixels,
112
                                 int line_size)
113
4.52M
{
114
4.52M
    int i;
115
116
    /* read the pixels */
117
13.5M
    for(i=0;i<2;i++) {
118
9.04M
        pixels[0] = av_clip_uint8(block[0]);
119
9.04M
        pixels[1] = av_clip_uint8(block[1]);
120
121
9.04M
        pixels += line_size;
122
9.04M
        block += 8;
123
9.04M
    }
124
4.52M
}
125
126
static void put_signed_pixels_clamped_c(const int16_t *block,
127
                                        uint8_t *restrict pixels,
128
                                        ptrdiff_t line_size)
129
31.8M
{
130
31.8M
    int i, j;
131
132
286M
    for (i = 0; i < 8; i++) {
133
2.29G
        for (j = 0; j < 8; j++) {
134
2.03G
            if (*block < -128)
135
470M
                *pixels = 0;
136
1.56G
            else if (*block > 127)
137
650M
                *pixels = 255;
138
918M
            else
139
918M
                *pixels = (uint8_t) (*block + 128);
140
2.03G
            block++;
141
2.03G
            pixels++;
142
2.03G
        }
143
254M
        pixels += (line_size - 8);
144
254M
    }
145
31.8M
}
146
147
void ff_add_pixels_clamped_c(const int16_t *block, uint8_t *restrict pixels,
148
                             ptrdiff_t line_size)
149
18.0M
{
150
18.0M
    int i;
151
152
    /* read the pixels */
153
162M
    for (i = 0; i < 8; i++) {
154
144M
        pixels[0] = av_clip_uint8(pixels[0] + block[0]);
155
144M
        pixels[1] = av_clip_uint8(pixels[1] + block[1]);
156
144M
        pixels[2] = av_clip_uint8(pixels[2] + block[2]);
157
144M
        pixels[3] = av_clip_uint8(pixels[3] + block[3]);
158
144M
        pixels[4] = av_clip_uint8(pixels[4] + block[4]);
159
144M
        pixels[5] = av_clip_uint8(pixels[5] + block[5]);
160
144M
        pixels[6] = av_clip_uint8(pixels[6] + block[6]);
161
144M
        pixels[7] = av_clip_uint8(pixels[7] + block[7]);
162
144M
        pixels   += line_size;
163
144M
        block    += 8;
164
144M
    }
165
18.0M
}
166
167
static void add_pixels_clamped4_c(const int16_t *block, uint8_t *restrict pixels,
168
                          int line_size)
169
3.61M
{
170
3.61M
    int i;
171
172
    /* read the pixels */
173
18.0M
    for(i=0;i<4;i++) {
174
14.4M
        pixels[0] = av_clip_uint8(pixels[0] + block[0]);
175
14.4M
        pixels[1] = av_clip_uint8(pixels[1] + block[1]);
176
14.4M
        pixels[2] = av_clip_uint8(pixels[2] + block[2]);
177
14.4M
        pixels[3] = av_clip_uint8(pixels[3] + block[3]);
178
14.4M
        pixels += line_size;
179
14.4M
        block += 8;
180
14.4M
    }
181
3.61M
}
182
183
static void add_pixels_clamped2_c(const int16_t *block, uint8_t *restrict pixels,
184
                          int line_size)
185
870k
{
186
870k
    int i;
187
188
    /* read the pixels */
189
2.61M
    for(i=0;i<2;i++) {
190
1.74M
        pixels[0] = av_clip_uint8(pixels[0] + block[0]);
191
1.74M
        pixels[1] = av_clip_uint8(pixels[1] + block[1]);
192
1.74M
        pixels += line_size;
193
1.74M
        block += 8;
194
1.74M
    }
195
870k
}
196
197
static void ff_jref_idct4_put(uint8_t *dest, ptrdiff_t line_size, int16_t *block)
198
11.3M
{
199
11.3M
    ff_j_rev_dct4 (block);
200
11.3M
    put_pixels_clamped4_c(block, dest, line_size);
201
11.3M
}
202
static void ff_jref_idct4_add(uint8_t *dest, ptrdiff_t line_size, int16_t *block)
203
3.61M
{
204
3.61M
    ff_j_rev_dct4 (block);
205
3.61M
    add_pixels_clamped4_c(block, dest, line_size);
206
3.61M
}
207
208
static void ff_jref_idct2_put(uint8_t *dest, ptrdiff_t line_size, int16_t *block)
209
4.52M
{
210
4.52M
    ff_j_rev_dct2 (block);
211
4.52M
    put_pixels_clamped2_c(block, dest, line_size);
212
4.52M
}
213
static void ff_jref_idct2_add(uint8_t *dest, ptrdiff_t line_size, int16_t *block)
214
870k
{
215
870k
    ff_j_rev_dct2 (block);
216
870k
    add_pixels_clamped2_c(block, dest, line_size);
217
870k
}
218
219
static void ff_jref_idct1_put(uint8_t *dest, ptrdiff_t line_size, int16_t *block)
220
6.32M
{
221
6.32M
    dest[0] = av_clip_uint8((block[0] + 4)>>3);
222
6.32M
}
223
static void ff_jref_idct1_add(uint8_t *dest, ptrdiff_t line_size, int16_t *block)
224
1.36M
{
225
1.36M
    dest[0] = av_clip_uint8(dest[0] + ((block[0] + 4)>>3));
226
1.36M
}
227
228
av_cold void ff_idctdsp_init(IDCTDSPContext *c, AVCodecContext *avctx)
229
485k
{
230
485k
    av_unused const unsigned high_bit_depth = avctx->bits_per_raw_sample > 8;
231
232
485k
    if (avctx->lowres==1) {
233
24.2k
        c->idct_put  = ff_jref_idct4_put;
234
24.2k
        c->idct_add  = ff_jref_idct4_add;
235
24.2k
        c->idct      = ff_j_rev_dct4;
236
24.2k
        c->perm_type = FF_IDCT_PERM_NONE;
237
461k
    } else if (avctx->lowres==2) {
238
12.7k
        c->idct_put  = ff_jref_idct2_put;
239
12.7k
        c->idct_add  = ff_jref_idct2_add;
240
12.7k
        c->idct      = ff_j_rev_dct2;
241
12.7k
        c->perm_type = FF_IDCT_PERM_NONE;
242
448k
    } else if (avctx->lowres==3) {
243
17.8k
        c->idct_put  = ff_jref_idct1_put;
244
17.8k
        c->idct_add  = ff_jref_idct1_add;
245
17.8k
        c->idct      = ff_j_rev_dct1;
246
17.8k
        c->perm_type = FF_IDCT_PERM_NONE;
247
430k
    } else {
248
430k
        if (avctx->bits_per_raw_sample == 10 || avctx->bits_per_raw_sample == 9) {
249
            /* 10-bit MPEG-4 Simple Studio Profile requires a higher precision IDCT
250
               However, it only uses idct_put */
251
42.9k
            if (c->mpeg4_studio_profile) {
252
911
                c->idct_put              = ff_simple_idct_put_int32_10bit;
253
911
                c->idct_add              = NULL;
254
911
                c->idct                  = NULL;
255
42.0k
            } else {
256
42.0k
                c->idct_put              = ff_simple_idct_put_int16_10bit;
257
42.0k
                c->idct_add              = ff_simple_idct_add_int16_10bit;
258
42.0k
                c->idct                  = ff_simple_idct_int16_10bit;
259
42.0k
            }
260
42.9k
            c->perm_type             = FF_IDCT_PERM_NONE;
261
388k
        } else if (avctx->bits_per_raw_sample == 12) {
262
23.1k
            c->idct_put              = ff_simple_idct_put_int16_12bit;
263
23.1k
            c->idct_add              = ff_simple_idct_add_int16_12bit;
264
23.1k
            c->idct                  = ff_simple_idct_int16_12bit;
265
23.1k
            c->perm_type             = FF_IDCT_PERM_NONE;
266
364k
        } else {
267
364k
            if (avctx->idct_algo == FF_IDCT_INT) {
268
49.6k
                c->idct_put  = ff_jref_idct_put;
269
49.6k
                c->idct_add  = ff_jref_idct_add;
270
49.6k
                c->idct      = ff_j_rev_dct;
271
49.6k
                c->perm_type = FF_IDCT_PERM_LIBMPEG2;
272
49.6k
#if CONFIG_FAANIDCT
273
315k
            } else if (avctx->idct_algo == FF_IDCT_FAAN) {
274
18.8k
                c->idct_put  = ff_faanidct_put;
275
18.8k
                c->idct_add  = ff_faanidct_add;
276
18.8k
                c->idct      = ff_faanidct;
277
18.8k
                c->perm_type = FF_IDCT_PERM_NONE;
278
18.8k
#endif /* CONFIG_FAANIDCT */
279
18.8k
#if CONFIG_MPEG4_DECODER
280
296k
            } else if (avctx->idct_algo == FF_IDCT_XVID) {
281
19.4k
                ff_xvid_idct_init(c);
282
19.4k
#endif
283
276k
            } else { // accurate/default
284
276k
                c->idct_put  = ff_simple_idct_put_int16_8bit;
285
276k
                c->idct_add  = ff_simple_idct_add_int16_8bit;
286
276k
                c->idct      = ff_simple_idct_int16_8bit;
287
276k
                c->perm_type = FF_IDCT_PERM_NONE;
288
276k
            }
289
364k
        }
290
430k
    }
291
292
485k
    c->put_pixels_clamped        = ff_put_pixels_clamped_c;
293
485k
    c->put_signed_pixels_clamped = put_signed_pixels_clamped_c;
294
485k
    c->add_pixels_clamped        = ff_add_pixels_clamped_c;
295
296
#if ARCH_AARCH64
297
    ff_idctdsp_init_aarch64(c, avctx, high_bit_depth);
298
#elif ARCH_ARM
299
    ff_idctdsp_init_arm(c, avctx, high_bit_depth);
300
#elif ARCH_PPC
301
    ff_idctdsp_init_ppc(c, avctx, high_bit_depth);
302
#elif ARCH_RISCV
303
    ff_idctdsp_init_riscv(c, avctx, high_bit_depth);
304
#elif ARCH_X86
305
    ff_idctdsp_init_x86(c, avctx, high_bit_depth);
306
#elif ARCH_MIPS
307
    ff_idctdsp_init_mips(c, avctx, high_bit_depth);
308
#elif ARCH_LOONGARCH
309
    ff_idctdsp_init_loongarch(c, avctx, high_bit_depth);
310
#endif
311
312
485k
    ff_init_scantable_permutation(c->idct_permutation,
313
485k
                                  c->perm_type);
314
485k
}