/src/ffmpeg/libavcodec/idctdsp.c
Line | Count | Source (jump to first uncovered line) |
1 | | /* |
2 | | * This file is part of FFmpeg. |
3 | | * |
4 | | * FFmpeg is free software; you can redistribute it and/or |
5 | | * modify it under the terms of the GNU Lesser General Public |
6 | | * License as published by the Free Software Foundation; either |
7 | | * version 2.1 of the License, or (at your option) any later version. |
8 | | * |
9 | | * FFmpeg is distributed in the hope that it will be useful, |
10 | | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
11 | | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
12 | | * Lesser General Public License for more details. |
13 | | * |
14 | | * You should have received a copy of the GNU Lesser General Public |
15 | | * License along with FFmpeg; if not, write to the Free Software |
16 | | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
17 | | */ |
18 | | |
19 | | #include "config.h" |
20 | | #include "config_components.h" |
21 | | #include "libavutil/attributes.h" |
22 | | #include "libavutil/common.h" |
23 | | #include "avcodec.h" |
24 | | #include "dct.h" |
25 | | #include "faanidct.h" |
26 | | #include "idctdsp.h" |
27 | | #include "simple_idct.h" |
28 | | #include "xvididct.h" |
29 | | |
30 | | av_cold void ff_permute_scantable(uint8_t dst[64], const uint8_t src[64], |
31 | | const uint8_t permutation[64]) |
32 | 1.80M | { |
33 | 117M | for (int i = 0; i < 64; i++) { |
34 | 115M | int j = src[i]; |
35 | 115M | dst[i] = permutation[j]; |
36 | 115M | } |
37 | 1.80M | } |
38 | | |
39 | | av_cold void ff_init_scantable_permutation(uint8_t *idct_permutation, |
40 | | enum idct_permutation_type perm_type) |
41 | 568k | { |
42 | 568k | int i; |
43 | | |
44 | 568k | #if ARCH_X86 |
45 | 568k | if (ff_init_scantable_permutation_x86(idct_permutation, |
46 | 568k | perm_type)) |
47 | 1.18k | return; |
48 | 567k | #endif |
49 | | |
50 | 567k | switch (perm_type) { |
51 | 450k | case FF_IDCT_PERM_NONE: |
52 | 29.2M | for (i = 0; i < 64; i++) |
53 | 28.8M | idct_permutation[i] = i; |
54 | 450k | break; |
55 | 49.6k | case FF_IDCT_PERM_LIBMPEG2: |
56 | 3.22M | for (i = 0; i < 64; i++) |
57 | 3.17M | idct_permutation[i] = (i & 0x38) | ((i & 6) >> 1) | ((i & 1) << 2); |
58 | 49.6k | break; |
59 | 67.7k | case FF_IDCT_PERM_TRANSPOSE: |
60 | 4.40M | for (i = 0; i < 64; i++) |
61 | 4.33M | idct_permutation[i] = ((i & 7) << 3) | (i >> 3); |
62 | 67.7k | break; |
63 | 0 | case FF_IDCT_PERM_PARTTRANS: |
64 | 0 | for (i = 0; i < 64; i++) |
65 | 0 | idct_permutation[i] = (i & 0x24) | ((i & 3) << 3) | ((i >> 3) & 3); |
66 | 0 | break; |
67 | 0 | default: |
68 | 0 | av_log(NULL, AV_LOG_ERROR, |
69 | 0 | "Internal error, IDCT permutation not set\n"); |
70 | 567k | } |
71 | 567k | } |
72 | | |
73 | | void ff_put_pixels_clamped_c(const int16_t *block, uint8_t *restrict pixels, |
74 | | ptrdiff_t line_size) |
75 | 92.8M | { |
76 | 92.8M | int i; |
77 | | |
78 | | /* read the pixels */ |
79 | 835M | for (i = 0; i < 8; i++) { |
80 | 742M | pixels[0] = av_clip_uint8(block[0]); |
81 | 742M | pixels[1] = av_clip_uint8(block[1]); |
82 | 742M | pixels[2] = av_clip_uint8(block[2]); |
83 | 742M | pixels[3] = av_clip_uint8(block[3]); |
84 | 742M | pixels[4] = av_clip_uint8(block[4]); |
85 | 742M | pixels[5] = av_clip_uint8(block[5]); |
86 | 742M | pixels[6] = av_clip_uint8(block[6]); |
87 | 742M | pixels[7] = av_clip_uint8(block[7]); |
88 | | |
89 | 742M | pixels += line_size; |
90 | 742M | block += 8; |
91 | 742M | } |
92 | 92.8M | } |
93 | | |
94 | | static void put_pixels_clamped4_c(const int16_t *block, uint8_t *restrict pixels, |
95 | | int line_size) |
96 | 11.3M | { |
97 | 11.3M | int i; |
98 | | |
99 | | /* read the pixels */ |
100 | 56.7M | for(i=0;i<4;i++) { |
101 | 45.4M | pixels[0] = av_clip_uint8(block[0]); |
102 | 45.4M | pixels[1] = av_clip_uint8(block[1]); |
103 | 45.4M | pixels[2] = av_clip_uint8(block[2]); |
104 | 45.4M | pixels[3] = av_clip_uint8(block[3]); |
105 | | |
106 | 45.4M | pixels += line_size; |
107 | 45.4M | block += 8; |
108 | 45.4M | } |
109 | 11.3M | } |
110 | | |
111 | | static void put_pixels_clamped2_c(const int16_t *block, uint8_t *restrict pixels, |
112 | | int line_size) |
113 | 4.52M | { |
114 | 4.52M | int i; |
115 | | |
116 | | /* read the pixels */ |
117 | 13.5M | for(i=0;i<2;i++) { |
118 | 9.04M | pixels[0] = av_clip_uint8(block[0]); |
119 | 9.04M | pixels[1] = av_clip_uint8(block[1]); |
120 | | |
121 | 9.04M | pixels += line_size; |
122 | 9.04M | block += 8; |
123 | 9.04M | } |
124 | 4.52M | } |
125 | | |
126 | | static void put_signed_pixels_clamped_c(const int16_t *block, |
127 | | uint8_t *restrict pixels, |
128 | | ptrdiff_t line_size) |
129 | 31.8M | { |
130 | 31.8M | int i, j; |
131 | | |
132 | 286M | for (i = 0; i < 8; i++) { |
133 | 2.29G | for (j = 0; j < 8; j++) { |
134 | 2.03G | if (*block < -128) |
135 | 470M | *pixels = 0; |
136 | 1.56G | else if (*block > 127) |
137 | 650M | *pixels = 255; |
138 | 918M | else |
139 | 918M | *pixels = (uint8_t) (*block + 128); |
140 | 2.03G | block++; |
141 | 2.03G | pixels++; |
142 | 2.03G | } |
143 | 254M | pixels += (line_size - 8); |
144 | 254M | } |
145 | 31.8M | } |
146 | | |
147 | | void ff_add_pixels_clamped_c(const int16_t *block, uint8_t *restrict pixels, |
148 | | ptrdiff_t line_size) |
149 | 18.0M | { |
150 | 18.0M | int i; |
151 | | |
152 | | /* read the pixels */ |
153 | 162M | for (i = 0; i < 8; i++) { |
154 | 144M | pixels[0] = av_clip_uint8(pixels[0] + block[0]); |
155 | 144M | pixels[1] = av_clip_uint8(pixels[1] + block[1]); |
156 | 144M | pixels[2] = av_clip_uint8(pixels[2] + block[2]); |
157 | 144M | pixels[3] = av_clip_uint8(pixels[3] + block[3]); |
158 | 144M | pixels[4] = av_clip_uint8(pixels[4] + block[4]); |
159 | 144M | pixels[5] = av_clip_uint8(pixels[5] + block[5]); |
160 | 144M | pixels[6] = av_clip_uint8(pixels[6] + block[6]); |
161 | 144M | pixels[7] = av_clip_uint8(pixels[7] + block[7]); |
162 | 144M | pixels += line_size; |
163 | 144M | block += 8; |
164 | 144M | } |
165 | 18.0M | } |
166 | | |
167 | | static void add_pixels_clamped4_c(const int16_t *block, uint8_t *restrict pixels, |
168 | | int line_size) |
169 | 3.61M | { |
170 | 3.61M | int i; |
171 | | |
172 | | /* read the pixels */ |
173 | 18.0M | for(i=0;i<4;i++) { |
174 | 14.4M | pixels[0] = av_clip_uint8(pixels[0] + block[0]); |
175 | 14.4M | pixels[1] = av_clip_uint8(pixels[1] + block[1]); |
176 | 14.4M | pixels[2] = av_clip_uint8(pixels[2] + block[2]); |
177 | 14.4M | pixels[3] = av_clip_uint8(pixels[3] + block[3]); |
178 | 14.4M | pixels += line_size; |
179 | 14.4M | block += 8; |
180 | 14.4M | } |
181 | 3.61M | } |
182 | | |
183 | | static void add_pixels_clamped2_c(const int16_t *block, uint8_t *restrict pixels, |
184 | | int line_size) |
185 | 870k | { |
186 | 870k | int i; |
187 | | |
188 | | /* read the pixels */ |
189 | 2.61M | for(i=0;i<2;i++) { |
190 | 1.74M | pixels[0] = av_clip_uint8(pixels[0] + block[0]); |
191 | 1.74M | pixels[1] = av_clip_uint8(pixels[1] + block[1]); |
192 | 1.74M | pixels += line_size; |
193 | 1.74M | block += 8; |
194 | 1.74M | } |
195 | 870k | } |
196 | | |
197 | | static void ff_jref_idct4_put(uint8_t *dest, ptrdiff_t line_size, int16_t *block) |
198 | 11.3M | { |
199 | 11.3M | ff_j_rev_dct4 (block); |
200 | 11.3M | put_pixels_clamped4_c(block, dest, line_size); |
201 | 11.3M | } |
202 | | static void ff_jref_idct4_add(uint8_t *dest, ptrdiff_t line_size, int16_t *block) |
203 | 3.61M | { |
204 | 3.61M | ff_j_rev_dct4 (block); |
205 | 3.61M | add_pixels_clamped4_c(block, dest, line_size); |
206 | 3.61M | } |
207 | | |
208 | | static void ff_jref_idct2_put(uint8_t *dest, ptrdiff_t line_size, int16_t *block) |
209 | 4.52M | { |
210 | 4.52M | ff_j_rev_dct2 (block); |
211 | 4.52M | put_pixels_clamped2_c(block, dest, line_size); |
212 | 4.52M | } |
213 | | static void ff_jref_idct2_add(uint8_t *dest, ptrdiff_t line_size, int16_t *block) |
214 | 870k | { |
215 | 870k | ff_j_rev_dct2 (block); |
216 | 870k | add_pixels_clamped2_c(block, dest, line_size); |
217 | 870k | } |
218 | | |
219 | | static void ff_jref_idct1_put(uint8_t *dest, ptrdiff_t line_size, int16_t *block) |
220 | 6.32M | { |
221 | 6.32M | dest[0] = av_clip_uint8((block[0] + 4)>>3); |
222 | 6.32M | } |
223 | | static void ff_jref_idct1_add(uint8_t *dest, ptrdiff_t line_size, int16_t *block) |
224 | 1.36M | { |
225 | 1.36M | dest[0] = av_clip_uint8(dest[0] + ((block[0] + 4)>>3)); |
226 | 1.36M | } |
227 | | |
228 | | av_cold void ff_idctdsp_init(IDCTDSPContext *c, AVCodecContext *avctx) |
229 | 485k | { |
230 | 485k | av_unused const unsigned high_bit_depth = avctx->bits_per_raw_sample > 8; |
231 | | |
232 | 485k | if (avctx->lowres==1) { |
233 | 24.2k | c->idct_put = ff_jref_idct4_put; |
234 | 24.2k | c->idct_add = ff_jref_idct4_add; |
235 | 24.2k | c->idct = ff_j_rev_dct4; |
236 | 24.2k | c->perm_type = FF_IDCT_PERM_NONE; |
237 | 461k | } else if (avctx->lowres==2) { |
238 | 12.7k | c->idct_put = ff_jref_idct2_put; |
239 | 12.7k | c->idct_add = ff_jref_idct2_add; |
240 | 12.7k | c->idct = ff_j_rev_dct2; |
241 | 12.7k | c->perm_type = FF_IDCT_PERM_NONE; |
242 | 448k | } else if (avctx->lowres==3) { |
243 | 17.8k | c->idct_put = ff_jref_idct1_put; |
244 | 17.8k | c->idct_add = ff_jref_idct1_add; |
245 | 17.8k | c->idct = ff_j_rev_dct1; |
246 | 17.8k | c->perm_type = FF_IDCT_PERM_NONE; |
247 | 430k | } else { |
248 | 430k | if (avctx->bits_per_raw_sample == 10 || avctx->bits_per_raw_sample == 9) { |
249 | | /* 10-bit MPEG-4 Simple Studio Profile requires a higher precision IDCT |
250 | | However, it only uses idct_put */ |
251 | 42.9k | if (c->mpeg4_studio_profile) { |
252 | 911 | c->idct_put = ff_simple_idct_put_int32_10bit; |
253 | 911 | c->idct_add = NULL; |
254 | 911 | c->idct = NULL; |
255 | 42.0k | } else { |
256 | 42.0k | c->idct_put = ff_simple_idct_put_int16_10bit; |
257 | 42.0k | c->idct_add = ff_simple_idct_add_int16_10bit; |
258 | 42.0k | c->idct = ff_simple_idct_int16_10bit; |
259 | 42.0k | } |
260 | 42.9k | c->perm_type = FF_IDCT_PERM_NONE; |
261 | 388k | } else if (avctx->bits_per_raw_sample == 12) { |
262 | 23.1k | c->idct_put = ff_simple_idct_put_int16_12bit; |
263 | 23.1k | c->idct_add = ff_simple_idct_add_int16_12bit; |
264 | 23.1k | c->idct = ff_simple_idct_int16_12bit; |
265 | 23.1k | c->perm_type = FF_IDCT_PERM_NONE; |
266 | 364k | } else { |
267 | 364k | if (avctx->idct_algo == FF_IDCT_INT) { |
268 | 49.6k | c->idct_put = ff_jref_idct_put; |
269 | 49.6k | c->idct_add = ff_jref_idct_add; |
270 | 49.6k | c->idct = ff_j_rev_dct; |
271 | 49.6k | c->perm_type = FF_IDCT_PERM_LIBMPEG2; |
272 | 49.6k | #if CONFIG_FAANIDCT |
273 | 315k | } else if (avctx->idct_algo == FF_IDCT_FAAN) { |
274 | 18.8k | c->idct_put = ff_faanidct_put; |
275 | 18.8k | c->idct_add = ff_faanidct_add; |
276 | 18.8k | c->idct = ff_faanidct; |
277 | 18.8k | c->perm_type = FF_IDCT_PERM_NONE; |
278 | 18.8k | #endif /* CONFIG_FAANIDCT */ |
279 | 18.8k | #if CONFIG_MPEG4_DECODER |
280 | 296k | } else if (avctx->idct_algo == FF_IDCT_XVID) { |
281 | 19.4k | ff_xvid_idct_init(c); |
282 | 19.4k | #endif |
283 | 276k | } else { // accurate/default |
284 | 276k | c->idct_put = ff_simple_idct_put_int16_8bit; |
285 | 276k | c->idct_add = ff_simple_idct_add_int16_8bit; |
286 | 276k | c->idct = ff_simple_idct_int16_8bit; |
287 | 276k | c->perm_type = FF_IDCT_PERM_NONE; |
288 | 276k | } |
289 | 364k | } |
290 | 430k | } |
291 | | |
292 | 485k | c->put_pixels_clamped = ff_put_pixels_clamped_c; |
293 | 485k | c->put_signed_pixels_clamped = put_signed_pixels_clamped_c; |
294 | 485k | c->add_pixels_clamped = ff_add_pixels_clamped_c; |
295 | | |
296 | | #if ARCH_AARCH64 |
297 | | ff_idctdsp_init_aarch64(c, avctx, high_bit_depth); |
298 | | #elif ARCH_ARM |
299 | | ff_idctdsp_init_arm(c, avctx, high_bit_depth); |
300 | | #elif ARCH_PPC |
301 | | ff_idctdsp_init_ppc(c, avctx, high_bit_depth); |
302 | | #elif ARCH_RISCV |
303 | | ff_idctdsp_init_riscv(c, avctx, high_bit_depth); |
304 | | #elif ARCH_X86 |
305 | | ff_idctdsp_init_x86(c, avctx, high_bit_depth); |
306 | | #elif ARCH_MIPS |
307 | | ff_idctdsp_init_mips(c, avctx, high_bit_depth); |
308 | | #elif ARCH_LOONGARCH |
309 | | ff_idctdsp_init_loongarch(c, avctx, high_bit_depth); |
310 | | #endif |
311 | | |
312 | 485k | ff_init_scantable_permutation(c->idct_permutation, |
313 | 485k | c->perm_type); |
314 | 485k | } |