/src/ffmpeg/libavcodec/ac3dsp.c
Line | Count | Source |
1 | | /* |
2 | | * AC-3 DSP functions |
3 | | * Copyright (c) 2011 Justin Ruggles |
4 | | * |
5 | | * This file is part of FFmpeg. |
6 | | * |
7 | | * FFmpeg is free software; you can redistribute it and/or |
8 | | * modify it under the terms of the GNU Lesser General Public |
9 | | * License as published by the Free Software Foundation; either |
10 | | * version 2.1 of the License, or (at your option) any later version. |
11 | | * |
12 | | * FFmpeg is distributed in the hope that it will be useful, |
13 | | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
14 | | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
15 | | * Lesser General Public License for more details. |
16 | | * |
17 | | * You should have received a copy of the GNU Lesser General Public |
18 | | * License along with FFmpeg; if not, write to the Free Software |
19 | | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
20 | | */ |
21 | | |
22 | | #include <math.h> |
23 | | #include <stdlib.h> |
24 | | #include <string.h> |
25 | | |
26 | | #include "config.h" |
27 | | #include "libavutil/attributes.h" |
28 | | #include "libavutil/common.h" |
29 | | #include "libavutil/intmath.h" |
30 | | #include "libavutil/mem_internal.h" |
31 | | |
32 | | #include "ac3defs.h" |
33 | | #include "ac3dsp.h" |
34 | | #include "ac3tab.h" |
35 | | #include "mathops.h" |
36 | | |
37 | | static void ac3_exponent_min_c(uint8_t *exp, int num_reuse_blocks, int nb_coefs) |
38 | 0 | { |
39 | 0 | int blk, i; |
40 | |
|
41 | 0 | if (!num_reuse_blocks) |
42 | 0 | return; |
43 | | |
44 | 0 | for (i = 0; i < nb_coefs; i++) { |
45 | 0 | uint8_t min_exp = *exp; |
46 | 0 | uint8_t *exp1 = exp + 256; |
47 | 0 | for (blk = 0; blk < num_reuse_blocks; blk++) { |
48 | 0 | uint8_t next_exp = *exp1; |
49 | 0 | if (next_exp < min_exp) |
50 | 0 | min_exp = next_exp; |
51 | 0 | exp1 += 256; |
52 | 0 | } |
53 | 0 | *exp++ = min_exp; |
54 | 0 | } |
55 | 0 | } |
56 | | |
57 | | static void float_to_fixed24_c(int32_t *dst, const float *src, size_t len) |
58 | 0 | { |
59 | 0 | const float scale = 1 << 24; |
60 | 0 | do { |
61 | 0 | *dst++ = lrintf(*src++ * scale); |
62 | 0 | *dst++ = lrintf(*src++ * scale); |
63 | 0 | *dst++ = lrintf(*src++ * scale); |
64 | 0 | *dst++ = lrintf(*src++ * scale); |
65 | 0 | *dst++ = lrintf(*src++ * scale); |
66 | 0 | *dst++ = lrintf(*src++ * scale); |
67 | 0 | *dst++ = lrintf(*src++ * scale); |
68 | 0 | *dst++ = lrintf(*src++ * scale); |
69 | 0 | len -= 8; |
70 | 0 | } while (len > 0); |
71 | 0 | } |
72 | | |
73 | | static void ac3_bit_alloc_calc_bap_c(int16_t *mask, int16_t *psd, |
74 | | int start, int end, |
75 | | int snr_offset, int floor, |
76 | | const uint8_t *bap_tab, uint8_t *bap) |
77 | 2.04M | { |
78 | 2.04M | int bin, band, band_end; |
79 | | |
80 | | /* special case, if snr offset is -960, set all bap's to zero */ |
81 | 2.04M | if (snr_offset == -960) { |
82 | 421k | memset(bap, 0, AC3_MAX_COEFS); |
83 | 421k | return; |
84 | 421k | } |
85 | | |
86 | 1.62M | bin = start; |
87 | 1.62M | band = ff_ac3_bin_to_band_tab[start]; |
88 | 43.3M | do { |
89 | 43.3M | int m = (FFMAX(mask[band] - snr_offset - floor, 0) & 0x1FE0) + floor; |
90 | 43.3M | band_end = ff_ac3_band_start_tab[++band]; |
91 | 43.3M | band_end = FFMIN(band_end, end); |
92 | | |
93 | 160M | for (; bin < band_end; bin++) { |
94 | 116M | int address = av_clip_uintp2((psd[bin] - m) >> 5, 6); |
95 | 116M | bap[bin] = bap_tab[address]; |
96 | 116M | } |
97 | 43.3M | } while (end > band_end); |
98 | 1.62M | } |
99 | | |
100 | | static void ac3_update_bap_counts_c(uint16_t mant_cnt[16], uint8_t *bap, |
101 | | int len) |
102 | 0 | { |
103 | 0 | while (len-- > 0) |
104 | 0 | mant_cnt[bap[len]]++; |
105 | 0 | } |
106 | | |
107 | | DECLARE_ALIGNED(16, const uint16_t, ff_ac3_bap_bits)[16] = { |
108 | | 0, 0, 0, 3, 0, 4, 5, 6, 7, 8, 9, 10, 11, 12, 14, 16 |
109 | | }; |
110 | | |
111 | | static int ac3_compute_mantissa_size_c(uint16_t mant_cnt[6][16]) |
112 | 0 | { |
113 | 0 | int blk, bap; |
114 | 0 | int bits = 0; |
115 | |
|
116 | 0 | for (blk = 0; blk < AC3_MAX_BLOCKS; blk++) { |
117 | | // bap=1 : 3 mantissas in 5 bits |
118 | 0 | bits += (mant_cnt[blk][1] / 3) * 5; |
119 | | // bap=2 : 3 mantissas in 7 bits |
120 | | // bap=4 : 2 mantissas in 7 bits |
121 | 0 | bits += ((mant_cnt[blk][2] / 3) + (mant_cnt[blk][4] >> 1)) * 7; |
122 | | // bap=3 : 1 mantissa in 3 bits |
123 | 0 | bits += mant_cnt[blk][3] * 3; |
124 | | // bap=5 to 15 : get bits per mantissa from table |
125 | 0 | for (bap = 5; bap < 16; bap++) |
126 | 0 | bits += mant_cnt[blk][bap] * ff_ac3_bap_bits[bap]; |
127 | 0 | } |
128 | 0 | return bits; |
129 | 0 | } |
130 | | |
131 | | static void ac3_extract_exponents_c(uint8_t *exp, int32_t *coef, int nb_coefs) |
132 | 0 | { |
133 | 0 | int i; |
134 | |
|
135 | 0 | for (i = 0; i < nb_coefs; i++) { |
136 | 0 | int v = abs(coef[i]); |
137 | 0 | exp[i] = v ? 23 - av_log2(v) : 24; |
138 | 0 | } |
139 | 0 | } |
140 | | |
141 | | static void ac3_sum_square_butterfly_int32_c(int64_t sum[4], |
142 | | const int32_t *coef0, |
143 | | const int32_t *coef1, |
144 | | int len) |
145 | 0 | { |
146 | 0 | int i; |
147 | |
|
148 | 0 | sum[0] = sum[1] = sum[2] = sum[3] = 0; |
149 | |
|
150 | 0 | for (i = 0; i < len; i++) { |
151 | 0 | int lt = coef0[i]; |
152 | 0 | int rt = coef1[i]; |
153 | 0 | int md = lt + rt; |
154 | 0 | int sd = lt - rt; |
155 | 0 | MAC64(sum[0], lt, lt); |
156 | 0 | MAC64(sum[1], rt, rt); |
157 | 0 | MAC64(sum[2], md, md); |
158 | 0 | MAC64(sum[3], sd, sd); |
159 | 0 | } |
160 | 0 | } |
161 | | |
162 | | static void ac3_sum_square_butterfly_float_c(float sum[4], |
163 | | const float *coef0, |
164 | | const float *coef1, |
165 | | int len) |
166 | 0 | { |
167 | 0 | int i; |
168 | |
|
169 | 0 | sum[0] = sum[1] = sum[2] = sum[3] = 0; |
170 | |
|
171 | 0 | for (i = 0; i < len; i++) { |
172 | 0 | float lt = coef0[i]; |
173 | 0 | float rt = coef1[i]; |
174 | 0 | float md = lt + rt; |
175 | 0 | float sd = lt - rt; |
176 | 0 | sum[0] += lt * lt; |
177 | 0 | sum[1] += rt * rt; |
178 | 0 | sum[2] += md * md; |
179 | 0 | sum[3] += sd * sd; |
180 | 0 | } |
181 | 0 | } |
182 | | |
183 | | static void ac3_downmix_5_to_2_symmetric_c(float **samples, float **matrix, |
184 | | int len) |
185 | 8.05k | { |
186 | 8.05k | int i; |
187 | 8.05k | float v0, v1; |
188 | 8.05k | float front_mix = matrix[0][0]; |
189 | 8.05k | float center_mix = matrix[0][1]; |
190 | 8.05k | float surround_mix = matrix[0][3]; |
191 | | |
192 | 2.00M | for (i = 0; i < len; i++) { |
193 | 1.99M | v0 = samples[0][i] * front_mix + |
194 | 1.99M | samples[1][i] * center_mix + |
195 | 1.99M | samples[3][i] * surround_mix; |
196 | | |
197 | 1.99M | v1 = samples[1][i] * center_mix + |
198 | 1.99M | samples[2][i] * front_mix + |
199 | 1.99M | samples[4][i] * surround_mix; |
200 | | |
201 | 1.99M | samples[0][i] = v0; |
202 | 1.99M | samples[1][i] = v1; |
203 | 1.99M | } |
204 | 8.05k | } |
205 | | |
206 | | static void ac3_downmix_5_to_1_symmetric_c(float **samples, float **matrix, |
207 | | int len) |
208 | 7.21k | { |
209 | 7.21k | int i; |
210 | 7.21k | float front_mix = matrix[0][0]; |
211 | 7.21k | float center_mix = matrix[0][1]; |
212 | 7.21k | float surround_mix = matrix[0][3]; |
213 | | |
214 | 1.79M | for (i = 0; i < len; i++) { |
215 | 1.78M | samples[0][i] = samples[0][i] * front_mix + |
216 | 1.78M | samples[1][i] * center_mix + |
217 | 1.78M | samples[2][i] * front_mix + |
218 | 1.78M | samples[3][i] * surround_mix + |
219 | 1.78M | samples[4][i] * surround_mix; |
220 | 1.78M | } |
221 | 7.21k | } |
222 | | |
223 | | static void ac3_downmix_c(float **samples, float **matrix, |
224 | | int out_ch, int in_ch, int len) |
225 | 58.7k | { |
226 | 58.7k | int i, j; |
227 | 58.7k | float v0, v1; |
228 | | |
229 | 58.7k | if (out_ch == 2) { |
230 | 9.60M | for (i = 0; i < len; i++) { |
231 | 9.56M | v0 = v1 = 0.0f; |
232 | 37.0M | for (j = 0; j < in_ch; j++) { |
233 | 27.5M | v0 += samples[j][i] * matrix[0][j]; |
234 | 27.5M | v1 += samples[j][i] * matrix[1][j]; |
235 | 27.5M | } |
236 | 9.56M | samples[0][i] = v0; |
237 | 9.56M | samples[1][i] = v1; |
238 | 9.56M | } |
239 | 39.7k | } else if (out_ch == 1) { |
240 | 4.42M | for (i = 0; i < len; i++) { |
241 | 4.40M | v0 = 0.0f; |
242 | 15.1M | for (j = 0; j < in_ch; j++) |
243 | 10.7M | v0 += samples[j][i] * matrix[0][j]; |
244 | 4.40M | samples[0][i] = v0; |
245 | 4.40M | } |
246 | 19.0k | } |
247 | 58.7k | } |
248 | | |
249 | | static void ac3_downmix_5_to_2_symmetric_c_fixed(int32_t **samples, int16_t **matrix, |
250 | | int len) |
251 | 2.96k | { |
252 | 2.96k | int i; |
253 | 2.96k | int64_t v0, v1; |
254 | 2.96k | int16_t front_mix = matrix[0][0]; |
255 | 2.96k | int16_t center_mix = matrix[0][1]; |
256 | 2.96k | int16_t surround_mix = matrix[0][3]; |
257 | | |
258 | 735k | for (i = 0; i < len; i++) { |
259 | 732k | v0 = (int64_t)samples[0][i] * front_mix + |
260 | 732k | (int64_t)samples[1][i] * center_mix + |
261 | 732k | (int64_t)samples[3][i] * surround_mix; |
262 | | |
263 | 732k | v1 = (int64_t)samples[1][i] * center_mix + |
264 | 732k | (int64_t)samples[2][i] * front_mix + |
265 | 732k | (int64_t)samples[4][i] * surround_mix; |
266 | | |
267 | 732k | samples[0][i] = (v0+2048)>>12; |
268 | 732k | samples[1][i] = (v1+2048)>>12; |
269 | 732k | } |
270 | 2.96k | } |
271 | | |
272 | | static void ac3_downmix_5_to_1_symmetric_c_fixed(int32_t **samples, int16_t **matrix, |
273 | | int len) |
274 | 5.04k | { |
275 | 5.04k | int i; |
276 | 5.04k | int64_t v0; |
277 | 5.04k | int16_t front_mix = matrix[0][0]; |
278 | 5.04k | int16_t center_mix = matrix[0][1]; |
279 | 5.04k | int16_t surround_mix = matrix[0][3]; |
280 | | |
281 | 1.24M | for (i = 0; i < len; i++) { |
282 | 1.23M | v0 = (int64_t)samples[0][i] * front_mix + |
283 | 1.23M | (int64_t)samples[1][i] * center_mix + |
284 | 1.23M | (int64_t)samples[2][i] * front_mix + |
285 | 1.23M | (int64_t)samples[3][i] * surround_mix + |
286 | 1.23M | (int64_t)samples[4][i] * surround_mix; |
287 | | |
288 | 1.23M | samples[0][i] = (v0+2048)>>12; |
289 | 1.23M | } |
290 | 5.04k | } |
291 | | |
292 | | static void ac3_downmix_c_fixed(int32_t **samples, int16_t **matrix, |
293 | | int out_ch, int in_ch, int len) |
294 | 25.2k | { |
295 | 25.2k | int i, j; |
296 | 25.2k | int64_t v0, v1; |
297 | 25.2k | if (out_ch == 2) { |
298 | 2.13M | for (i = 0; i < len; i++) { |
299 | 2.12M | v0 = v1 = 0; |
300 | 9.10M | for (j = 0; j < in_ch; j++) { |
301 | 6.98M | v0 += (int64_t)samples[j][i] * matrix[0][j]; |
302 | 6.98M | v1 += (int64_t)samples[j][i] * matrix[1][j]; |
303 | 6.98M | } |
304 | 2.12M | samples[0][i] = (v0+2048)>>12; |
305 | 2.12M | samples[1][i] = (v1+2048)>>12; |
306 | 2.12M | } |
307 | 15.8k | } else if (out_ch == 1) { |
308 | 3.54M | for (i = 0; i < len; i++) { |
309 | 3.53M | v0 = 0; |
310 | 10.6M | for (j = 0; j < in_ch; j++) |
311 | 7.14M | v0 += (int64_t)samples[j][i] * matrix[0][j]; |
312 | 3.53M | samples[0][i] = (v0+2048)>>12; |
313 | 3.53M | } |
314 | 15.8k | } |
315 | 25.2k | } |
316 | | |
317 | | void ff_ac3dsp_downmix_fixed(AC3DSPContext *c, int32_t **samples, int16_t **matrix, |
318 | | int out_ch, int in_ch, int len) |
319 | 33.2k | { |
320 | 33.2k | if (c->in_channels != in_ch || c->out_channels != out_ch) { |
321 | 3.67k | c->in_channels = in_ch; |
322 | 3.67k | c->out_channels = out_ch; |
323 | 3.67k | c->downmix_fixed = NULL; |
324 | | |
325 | 3.67k | if (in_ch == 5 && out_ch == 2 && |
326 | 362 | !(matrix[1][0] | matrix[0][2] | |
327 | 362 | matrix[1][3] | matrix[0][4] | |
328 | 362 | (matrix[0][1] ^ matrix[1][1]) | |
329 | 362 | (matrix[0][0] ^ matrix[1][2]))) { |
330 | 362 | c->downmix_fixed = ac3_downmix_5_to_2_symmetric_c_fixed; |
331 | 3.31k | } else if (in_ch == 5 && out_ch == 1 && |
332 | 888 | matrix[0][0] == matrix[0][2] && |
333 | 888 | matrix[0][3] == matrix[0][4]) { |
334 | 888 | c->downmix_fixed = ac3_downmix_5_to_1_symmetric_c_fixed; |
335 | 888 | } |
336 | 3.67k | } |
337 | | |
338 | 33.2k | if (c->downmix_fixed) |
339 | 8.00k | c->downmix_fixed(samples, matrix, len); |
340 | 25.2k | else |
341 | 25.2k | ac3_downmix_c_fixed(samples, matrix, out_ch, in_ch, len); |
342 | 33.2k | } |
343 | | |
344 | | void ff_ac3dsp_downmix(AC3DSPContext *c, float **samples, float **matrix, |
345 | | int out_ch, int in_ch, int len) |
346 | 74.0k | { |
347 | 74.0k | if (c->in_channels != in_ch || c->out_channels != out_ch) { |
348 | 7.24k | int **matrix_cmp = (int **)matrix; |
349 | | |
350 | 7.24k | c->in_channels = in_ch; |
351 | 7.24k | c->out_channels = out_ch; |
352 | 7.24k | c->downmix = NULL; |
353 | | |
354 | 7.24k | if (in_ch == 5 && out_ch == 2 && |
355 | 710 | !(matrix_cmp[1][0] | matrix_cmp[0][2] | |
356 | 710 | matrix_cmp[1][3] | matrix_cmp[0][4] | |
357 | 710 | (matrix_cmp[0][1] ^ matrix_cmp[1][1]) | |
358 | 710 | (matrix_cmp[0][0] ^ matrix_cmp[1][2]))) { |
359 | 710 | c->downmix = ac3_downmix_5_to_2_symmetric_c; |
360 | 6.53k | } else if (in_ch == 5 && out_ch == 1 && |
361 | 1.18k | matrix_cmp[0][0] == matrix_cmp[0][2] && |
362 | 1.18k | matrix_cmp[0][3] == matrix_cmp[0][4]) { |
363 | 1.18k | c->downmix = ac3_downmix_5_to_1_symmetric_c; |
364 | 1.18k | } |
365 | | |
366 | | #if ARCH_X86 && HAVE_X86ASM |
367 | | ff_ac3dsp_set_downmix_x86(c); |
368 | | #endif |
369 | 7.24k | } |
370 | | |
371 | 74.0k | if (c->downmix) |
372 | 15.2k | c->downmix(samples, matrix, len); |
373 | 58.7k | else |
374 | 58.7k | ac3_downmix_c(samples, matrix, out_ch, in_ch, len); |
375 | 74.0k | } |
376 | | |
377 | | av_cold void ff_ac3dsp_init(AC3DSPContext *c) |
378 | 20.8k | { |
379 | 20.8k | c->ac3_exponent_min = ac3_exponent_min_c; |
380 | 20.8k | c->float_to_fixed24 = float_to_fixed24_c; |
381 | 20.8k | c->bit_alloc_calc_bap = ac3_bit_alloc_calc_bap_c; |
382 | 20.8k | c->update_bap_counts = ac3_update_bap_counts_c; |
383 | 20.8k | c->compute_mantissa_size = ac3_compute_mantissa_size_c; |
384 | 20.8k | c->extract_exponents = ac3_extract_exponents_c; |
385 | 20.8k | c->sum_square_butterfly_int32 = ac3_sum_square_butterfly_int32_c; |
386 | 20.8k | c->sum_square_butterfly_float = ac3_sum_square_butterfly_float_c; |
387 | 20.8k | c->in_channels = 0; |
388 | 20.8k | c->out_channels = 0; |
389 | 20.8k | c->downmix = NULL; |
390 | 20.8k | c->downmix_fixed = NULL; |
391 | | |
392 | | #if ARCH_AARCH64 |
393 | | ff_ac3dsp_init_aarch64(c); |
394 | | #elif ARCH_ARM |
395 | | ff_ac3dsp_init_arm(c); |
396 | | #elif ARCH_X86 && HAVE_X86ASM |
397 | | ff_ac3dsp_init_x86(c); |
398 | | #elif ARCH_MIPS |
399 | | ff_ac3dsp_init_mips(c); |
400 | | #elif ARCH_RISCV |
401 | | ff_ac3dsp_init_riscv(c); |
402 | | #endif |
403 | 20.8k | } |