/src/ffmpeg/libavcodec/dcadsp.c
Line | Count | Source (jump to first uncovered line) |
1 | | /* |
2 | | * Copyright (C) 2016 foo86 |
3 | | * |
4 | | * This file is part of FFmpeg. |
5 | | * |
6 | | * FFmpeg is free software; you can redistribute it and/or |
7 | | * modify it under the terms of the GNU Lesser General Public |
8 | | * License as published by the Free Software Foundation; either |
9 | | * version 2.1 of the License, or (at your option) any later version. |
10 | | * |
11 | | * FFmpeg is distributed in the hope that it will be useful, |
12 | | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
13 | | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
14 | | * Lesser General Public License for more details. |
15 | | * |
16 | | * You should have received a copy of the GNU Lesser General Public |
17 | | * License along with FFmpeg; if not, write to the Free Software |
18 | | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
19 | | */ |
20 | | |
21 | | #include "libavutil/mem_internal.h" |
22 | | |
23 | | #include "dcadsp.h" |
24 | | #include "dcamath.h" |
25 | | |
26 | | static void decode_hf_c(int32_t **dst, |
27 | | const int32_t *vq_index, |
28 | | const int8_t hf_vq[1024][32], |
29 | | int32_t scale_factors[32][2], |
30 | | ptrdiff_t sb_start, ptrdiff_t sb_end, |
31 | | ptrdiff_t ofs, ptrdiff_t len) |
32 | 28.2k | { |
33 | 28.2k | int i, j; |
34 | | |
35 | 142k | for (i = sb_start; i < sb_end; i++) { |
36 | 114k | const int8_t *coeff = hf_vq[vq_index[i]]; |
37 | 114k | int32_t scale = scale_factors[i][0]; |
38 | 1.98M | for (j = 0; j < len; j++) |
39 | 1.87M | dst[i][j + ofs] = clip23(coeff[j] * scale + (1 << 3) >> 4); |
40 | 114k | } |
41 | 28.2k | } |
42 | | |
43 | | static void decode_joint_c(int32_t **dst, int32_t **src, |
44 | | const int32_t *scale_factors, |
45 | | ptrdiff_t sb_start, ptrdiff_t sb_end, |
46 | | ptrdiff_t ofs, ptrdiff_t len) |
47 | 13.2k | { |
48 | 13.2k | int i, j; |
49 | | |
50 | 47.0k | for (i = sb_start; i < sb_end; i++) { |
51 | 33.7k | int32_t scale = scale_factors[i]; |
52 | 453k | for (j = 0; j < len; j++) |
53 | 419k | dst[i][j + ofs] = clip23(mul17(src[i][j + ofs], scale)); |
54 | 33.7k | } |
55 | 13.2k | } |
56 | | |
57 | | static void lfe_fir_float_c(float *pcm_samples, int32_t *lfe_samples, |
58 | | const float *filter_coeff, ptrdiff_t npcmblocks, |
59 | | int dec_select) |
60 | 19.2k | { |
61 | | // Select decimation factor |
62 | 19.2k | int factor = 64 << dec_select; |
63 | 19.2k | int ncoeffs = 8 >> dec_select; |
64 | 19.2k | int nlfesamples = npcmblocks >> (dec_select + 1); |
65 | 19.2k | int i, j, k; |
66 | | |
67 | 1.06M | for (i = 0; i < nlfesamples; i++) { |
68 | | // One decimated sample generates 64 or 128 interpolated ones |
69 | 35.0M | for (j = 0; j < factor / 2; j++) { |
70 | 34.0M | float a = 0; |
71 | 34.0M | float b = 0; |
72 | | |
73 | 302M | for (k = 0; k < ncoeffs; k++) { |
74 | 268M | a += filter_coeff[ j * ncoeffs + k] * lfe_samples[-k]; |
75 | 268M | b += filter_coeff[255 - j * ncoeffs - k] * lfe_samples[-k]; |
76 | 268M | } |
77 | | |
78 | 34.0M | pcm_samples[ j] = a; |
79 | 34.0M | pcm_samples[factor / 2 + j] = b; |
80 | 34.0M | } |
81 | | |
82 | 1.04M | lfe_samples++; |
83 | 1.04M | pcm_samples += factor; |
84 | 1.04M | } |
85 | 19.2k | } |
86 | | |
87 | | static void lfe_fir0_float_c(float *pcm_samples, int32_t *lfe_samples, |
88 | | const float *filter_coeff, ptrdiff_t npcmblocks) |
89 | 16.7k | { |
90 | 16.7k | lfe_fir_float_c(pcm_samples, lfe_samples, filter_coeff, npcmblocks, 0); |
91 | 16.7k | } |
92 | | |
93 | | static void lfe_fir1_float_c(float *pcm_samples, int32_t *lfe_samples, |
94 | | const float *filter_coeff, ptrdiff_t npcmblocks) |
95 | 2.47k | { |
96 | 2.47k | lfe_fir_float_c(pcm_samples, lfe_samples, filter_coeff, npcmblocks, 1); |
97 | 2.47k | } |
98 | | |
99 | | static void lfe_x96_float_c(float *dst, const float *src, |
100 | | float *hist, ptrdiff_t len) |
101 | 0 | { |
102 | 0 | float prev = *hist; |
103 | 0 | int i; |
104 | |
|
105 | 0 | for (i = 0; i < len; i++) { |
106 | 0 | float a = 0.25f * src[i] + 0.75f * prev; |
107 | 0 | float b = 0.75f * src[i] + 0.25f * prev; |
108 | 0 | prev = src[i]; |
109 | 0 | *dst++ = a; |
110 | 0 | *dst++ = b; |
111 | 0 | } |
112 | |
|
113 | 0 | *hist = prev; |
114 | 0 | } |
115 | | |
116 | | static void sub_qmf32_float_c(SynthFilterContext *synth, |
117 | | AVTXContext *imdct, |
118 | | av_tx_fn imdct_fn, |
119 | | float *pcm_samples, |
120 | | int32_t **subband_samples_lo, |
121 | | int32_t **subband_samples_hi, |
122 | | float *hist1, int *offset, float *hist2, |
123 | | const float *filter_coeff, ptrdiff_t npcmblocks, |
124 | | float scale) |
125 | 36.6k | { |
126 | 36.6k | LOCAL_ALIGNED_32(float, input, [32]); |
127 | 36.6k | int i, j; |
128 | | |
129 | 3.53M | for (j = 0; j < npcmblocks; j++) { |
130 | | // Load in one sample from each subband |
131 | 115M | for (i = 0; i < 32; i++) { |
132 | 112M | if ((i - 1) & 2) |
133 | 56.0M | input[i] = -subband_samples_lo[i][j]; |
134 | 56.0M | else |
135 | 56.0M | input[i] = subband_samples_lo[i][j]; |
136 | 112M | } |
137 | | |
138 | | // One subband sample generates 32 interpolated ones |
139 | 3.50M | synth->synth_filter_float(imdct, hist1, offset, |
140 | 3.50M | hist2, filter_coeff, |
141 | 3.50M | pcm_samples, input, scale, imdct_fn); |
142 | 3.50M | pcm_samples += 32; |
143 | 3.50M | } |
144 | 36.6k | } |
145 | | |
146 | | static void sub_qmf64_float_c(SynthFilterContext *synth, |
147 | | AVTXContext *imdct, |
148 | | av_tx_fn imdct_fn, |
149 | | float *pcm_samples, |
150 | | int32_t **subband_samples_lo, |
151 | | int32_t **subband_samples_hi, |
152 | | float *hist1, int *offset, float *hist2, |
153 | | const float *filter_coeff, ptrdiff_t npcmblocks, |
154 | | float scale) |
155 | 0 | { |
156 | 0 | LOCAL_ALIGNED_32(float, input, [64]); |
157 | 0 | int i, j; |
158 | |
|
159 | 0 | if (!subband_samples_hi) |
160 | 0 | memset(&input[32], 0, sizeof(input[0]) * 32); |
161 | |
|
162 | 0 | for (j = 0; j < npcmblocks; j++) { |
163 | | // Load in one sample from each subband |
164 | 0 | if (subband_samples_hi) { |
165 | | // Full 64 subbands, first 32 are residual coded |
166 | 0 | for (i = 0; i < 32; i++) { |
167 | 0 | if ((i - 1) & 2) |
168 | 0 | input[i] = -subband_samples_lo[i][j] - subband_samples_hi[i][j]; |
169 | 0 | else |
170 | 0 | input[i] = subband_samples_lo[i][j] + subband_samples_hi[i][j]; |
171 | 0 | } |
172 | 0 | for (i = 32; i < 64; i++) { |
173 | 0 | if ((i - 1) & 2) |
174 | 0 | input[i] = -subband_samples_hi[i][j]; |
175 | 0 | else |
176 | 0 | input[i] = subband_samples_hi[i][j]; |
177 | 0 | } |
178 | 0 | } else { |
179 | | // Only first 32 subbands |
180 | 0 | for (i = 0; i < 32; i++) { |
181 | 0 | if ((i - 1) & 2) |
182 | 0 | input[i] = -subband_samples_lo[i][j]; |
183 | 0 | else |
184 | 0 | input[i] = subband_samples_lo[i][j]; |
185 | 0 | } |
186 | 0 | } |
187 | | |
188 | | // One subband sample generates 64 interpolated ones |
189 | 0 | synth->synth_filter_float_64(imdct, hist1, offset, |
190 | 0 | hist2, filter_coeff, |
191 | 0 | pcm_samples, input, scale, imdct_fn); |
192 | 0 | pcm_samples += 64; |
193 | 0 | } |
194 | 0 | } |
195 | | |
196 | | static void lfe_fir_fixed_c(int32_t *pcm_samples, int32_t *lfe_samples, |
197 | | const int32_t *filter_coeff, ptrdiff_t npcmblocks) |
198 | 497 | { |
199 | | // Select decimation factor |
200 | 497 | int nlfesamples = npcmblocks >> 1; |
201 | 497 | int i, j, k; |
202 | | |
203 | 18.7k | for (i = 0; i < nlfesamples; i++) { |
204 | | // One decimated sample generates 64 interpolated ones |
205 | 602k | for (j = 0; j < 32; j++) { |
206 | 583k | int64_t a = 0; |
207 | 583k | int64_t b = 0; |
208 | | |
209 | 5.25M | for (k = 0; k < 8; k++) { |
210 | 4.67M | a += (int64_t)filter_coeff[ j * 8 + k] * lfe_samples[-k]; |
211 | 4.67M | b += (int64_t)filter_coeff[255 - j * 8 - k] * lfe_samples[-k]; |
212 | 4.67M | } |
213 | | |
214 | 583k | pcm_samples[ j] = clip23(norm23(a)); |
215 | 583k | pcm_samples[32 + j] = clip23(norm23(b)); |
216 | 583k | } |
217 | | |
218 | 18.2k | lfe_samples++; |
219 | 18.2k | pcm_samples += 64; |
220 | 18.2k | } |
221 | 497 | } |
222 | | |
223 | | static void lfe_x96_fixed_c(int32_t *dst, const int32_t *src, |
224 | | int32_t *hist, ptrdiff_t len) |
225 | 0 | { |
226 | 0 | int32_t prev = *hist; |
227 | 0 | int i; |
228 | |
|
229 | 0 | for (i = 0; i < len; i++) { |
230 | 0 | int64_t a = INT64_C(2097471) * src[i] + INT64_C(6291137) * prev; |
231 | 0 | int64_t b = INT64_C(6291137) * src[i] + INT64_C(2097471) * prev; |
232 | 0 | prev = src[i]; |
233 | 0 | *dst++ = clip23(norm23(a)); |
234 | 0 | *dst++ = clip23(norm23(b)); |
235 | 0 | } |
236 | |
|
237 | 0 | *hist = prev; |
238 | 0 | } |
239 | | |
240 | | static void sub_qmf32_fixed_c(SynthFilterContext *synth, |
241 | | DCADCTContext *imdct, |
242 | | int32_t *pcm_samples, |
243 | | int32_t **subband_samples_lo, |
244 | | int32_t **subband_samples_hi, |
245 | | int32_t *hist1, int *offset, int32_t *hist2, |
246 | | const int32_t *filter_coeff, ptrdiff_t npcmblocks) |
247 | 4.89k | { |
248 | 4.89k | LOCAL_ALIGNED_32(int32_t, input, [32]); |
249 | 4.89k | int i, j; |
250 | | |
251 | 462k | for (j = 0; j < npcmblocks; j++) { |
252 | | // Load in one sample from each subband |
253 | 15.0M | for (i = 0; i < 32; i++) |
254 | 14.6M | input[i] = subband_samples_lo[i][j]; |
255 | | |
256 | | // One subband sample generates 32 interpolated ones |
257 | 457k | synth->synth_filter_fixed(imdct, hist1, offset, |
258 | 457k | hist2, filter_coeff, |
259 | 457k | pcm_samples, input); |
260 | 457k | pcm_samples += 32; |
261 | 457k | } |
262 | 4.89k | } |
263 | | |
264 | | static void sub_qmf64_fixed_c(SynthFilterContext *synth, |
265 | | DCADCTContext *imdct, |
266 | | int32_t *pcm_samples, |
267 | | int32_t **subband_samples_lo, |
268 | | int32_t **subband_samples_hi, |
269 | | int32_t *hist1, int *offset, int32_t *hist2, |
270 | | const int32_t *filter_coeff, ptrdiff_t npcmblocks) |
271 | 0 | { |
272 | 0 | LOCAL_ALIGNED_32(int32_t, input, [64]); |
273 | 0 | int i, j; |
274 | |
|
275 | 0 | if (!subband_samples_hi) |
276 | 0 | memset(&input[32], 0, sizeof(input[0]) * 32); |
277 | |
|
278 | 0 | for (j = 0; j < npcmblocks; j++) { |
279 | | // Load in one sample from each subband |
280 | 0 | if (subband_samples_hi) { |
281 | | // Full 64 subbands, first 32 are residual coded |
282 | 0 | for (i = 0; i < 32; i++) |
283 | 0 | input[i] = subband_samples_lo[i][j] + subband_samples_hi[i][j]; |
284 | 0 | for (i = 32; i < 64; i++) |
285 | 0 | input[i] = subband_samples_hi[i][j]; |
286 | 0 | } else { |
287 | | // Only first 32 subbands |
288 | 0 | for (i = 0; i < 32; i++) |
289 | 0 | input[i] = subband_samples_lo[i][j]; |
290 | 0 | } |
291 | | |
292 | | // One subband sample generates 64 interpolated ones |
293 | 0 | synth->synth_filter_fixed_64(imdct, hist1, offset, |
294 | 0 | hist2, filter_coeff, |
295 | 0 | pcm_samples, input); |
296 | 0 | pcm_samples += 64; |
297 | 0 | } |
298 | 0 | } |
299 | | |
300 | | static void decor_c(int32_t *dst, const int32_t *src, int coeff, ptrdiff_t len) |
301 | 954 | { |
302 | 954 | int i; |
303 | | |
304 | 50.7k | for (i = 0; i < len; i++) |
305 | 49.8k | dst[i] += (SUINT)((int)(src[i] * (SUINT)coeff + (1 << 2)) >> 3); |
306 | 954 | } |
307 | | |
308 | | static void dmix_sub_xch_c(int32_t *dst1, int32_t *dst2, |
309 | | const int32_t *src, ptrdiff_t len) |
310 | 0 | { |
311 | 0 | int i; |
312 | |
|
313 | 0 | for (i = 0; i < len; i++) { |
314 | 0 | int32_t cs = mul23(src[i], 5931520 /* M_SQRT1_2 * (1 << 23) */); |
315 | 0 | dst1[i] -= cs; |
316 | 0 | dst2[i] -= cs; |
317 | 0 | } |
318 | 0 | } |
319 | | |
320 | | static void dmix_sub_c(int32_t *dst, const int32_t *src, int coeff, ptrdiff_t len) |
321 | 0 | { |
322 | 0 | int i; |
323 | |
|
324 | 0 | for (i = 0; i < len; i++) |
325 | 0 | dst[i] -= (unsigned)mul15(src[i], coeff); |
326 | 0 | } |
327 | | |
328 | | static void dmix_add_c(int32_t *dst, const int32_t *src, int coeff, ptrdiff_t len) |
329 | 0 | { |
330 | 0 | int i; |
331 | |
|
332 | 0 | for (i = 0; i < len; i++) |
333 | 0 | dst[i] += (unsigned)mul15(src[i], coeff); |
334 | 0 | } |
335 | | |
336 | | static void dmix_scale_c(int32_t *dst, int scale, ptrdiff_t len) |
337 | 48 | { |
338 | 48 | int i; |
339 | | |
340 | 24.6k | for (i = 0; i < len; i++) |
341 | 24.5k | dst[i] = mul15(dst[i], scale); |
342 | 48 | } |
343 | | |
344 | | static void dmix_scale_inv_c(int32_t *dst, int scale_inv, ptrdiff_t len) |
345 | 0 | { |
346 | 0 | int i; |
347 | |
|
348 | 0 | for (i = 0; i < len; i++) |
349 | 0 | dst[i] = mul16(dst[i], scale_inv); |
350 | 0 | } |
351 | | |
352 | | static void filter0(SUINT32 *dst, const int32_t *src, int32_t coeff, ptrdiff_t len) |
353 | 42.7k | { |
354 | 42.7k | int i; |
355 | | |
356 | 2.21M | for (i = 0; i < len; i++) |
357 | 2.17M | dst[i] -= mul22(src[i], coeff); |
358 | 42.7k | } |
359 | | |
360 | | static void filter1(SUINT32 *dst, const int32_t *src, int32_t coeff, ptrdiff_t len) |
361 | 256k | { |
362 | 256k | int i; |
363 | | |
364 | 13.3M | for (i = 0; i < len; i++) |
365 | 13.0M | dst[i] -= mul23(src[i], coeff); |
366 | 256k | } |
367 | | |
368 | | static void assemble_freq_bands_c(int32_t *dst, int32_t *src0, int32_t *src1, |
369 | | const int32_t *coeff, ptrdiff_t len) |
370 | 10.6k | { |
371 | 10.6k | int i; |
372 | | |
373 | 10.6k | filter0(src0, src1, coeff[0], len); |
374 | 10.6k | filter0(src1, src0, coeff[1], len); |
375 | 10.6k | filter0(src0, src1, coeff[2], len); |
376 | 10.6k | filter0(src1, src0, coeff[3], len); |
377 | | |
378 | 96.2k | for (i = 0; i < 8; i++, src0--) { |
379 | 85.5k | filter1(src0, src1, coeff[i + 4], len); |
380 | 85.5k | filter1(src1, src0, coeff[i + 12], len); |
381 | 85.5k | filter1(src0, src1, coeff[i + 4], len); |
382 | 85.5k | } |
383 | | |
384 | 554k | for (i = 0; i < len; i++) { |
385 | 544k | *dst++ = *src1++; |
386 | 544k | *dst++ = *++src0; |
387 | 544k | } |
388 | 10.6k | } |
389 | | |
390 | | static void lbr_bank_c(float output[32][4], float **input, |
391 | | const float *coeff, ptrdiff_t ofs, ptrdiff_t len) |
392 | 2.86M | { |
393 | 2.86M | float SW0 = coeff[0]; |
394 | 2.86M | float SW1 = coeff[1]; |
395 | 2.86M | float SW2 = coeff[2]; |
396 | 2.86M | float SW3 = coeff[3]; |
397 | | |
398 | 2.86M | float C1 = coeff[4]; |
399 | 2.86M | float C2 = coeff[5]; |
400 | 2.86M | float C3 = coeff[6]; |
401 | 2.86M | float C4 = coeff[7]; |
402 | | |
403 | 2.86M | float AL1 = coeff[8]; |
404 | 2.86M | float AL2 = coeff[9]; |
405 | | |
406 | 2.86M | int i; |
407 | | |
408 | | // Short window and 8 point forward MDCT |
409 | 51.0M | for (i = 0; i < len; i++) { |
410 | 48.1M | float *src = input[i] + ofs; |
411 | | |
412 | 48.1M | float a = src[-4] * SW0 - src[-1] * SW3; |
413 | 48.1M | float b = src[-3] * SW1 - src[-2] * SW2; |
414 | 48.1M | float c = src[ 2] * SW1 + src[ 1] * SW2; |
415 | 48.1M | float d = src[ 3] * SW0 + src[ 0] * SW3; |
416 | | |
417 | 48.1M | output[i][0] = C1 * b - C2 * c + C4 * a - C3 * d; |
418 | 48.1M | output[i][1] = C1 * d - C2 * a - C4 * b - C3 * c; |
419 | 48.1M | output[i][2] = C3 * b + C2 * d - C4 * c + C1 * a; |
420 | 48.1M | output[i][3] = C3 * a - C2 * b + C4 * d - C1 * c; |
421 | 48.1M | } |
422 | | |
423 | | // Aliasing cancellation for high frequencies |
424 | 18.6M | for (i = 12; i < len - 1; i++) { |
425 | 15.7M | float a = output[i ][3] * AL1; |
426 | 15.7M | float b = output[i+1][0] * AL1; |
427 | 15.7M | output[i ][3] += b - a; |
428 | 15.7M | output[i+1][0] -= b + a; |
429 | 15.7M | a = output[i ][2] * AL2; |
430 | 15.7M | b = output[i+1][1] * AL2; |
431 | 15.7M | output[i ][2] += b - a; |
432 | 15.7M | output[i+1][1] -= b + a; |
433 | 15.7M | } |
434 | 2.86M | } |
435 | | |
436 | | static void lfe_iir_c(float *output, const float *input, |
437 | | const float iir[5][4], float hist[5][2], |
438 | | ptrdiff_t factor) |
439 | 2.35k | { |
440 | 2.35k | float res, tmp; |
441 | 2.35k | int i, j, k; |
442 | | |
443 | 153k | for (i = 0; i < 64; i++) { |
444 | 150k | res = *input++; |
445 | | |
446 | 9.79M | for (j = 0; j < factor; j++) { |
447 | 57.8M | for (k = 0; k < 5; k++) { |
448 | 48.2M | tmp = hist[k][0] * iir[k][0] + hist[k][1] * iir[k][1] + res; |
449 | 48.2M | res = hist[k][0] * iir[k][2] + hist[k][1] * iir[k][3] + tmp; |
450 | | |
451 | 48.2M | hist[k][0] = hist[k][1]; |
452 | 48.2M | hist[k][1] = tmp; |
453 | 48.2M | } |
454 | | |
455 | 9.64M | *output++ = res; |
456 | 9.64M | res = 0; |
457 | 9.64M | } |
458 | 150k | } |
459 | 2.35k | } |
460 | | |
461 | | av_cold void ff_dcadsp_init(DCADSPContext *s) |
462 | 10.3k | { |
463 | 10.3k | s->decode_hf = decode_hf_c; |
464 | 10.3k | s->decode_joint = decode_joint_c; |
465 | | |
466 | 10.3k | s->lfe_fir_float[0] = lfe_fir0_float_c; |
467 | 10.3k | s->lfe_fir_float[1] = lfe_fir1_float_c; |
468 | 10.3k | s->lfe_x96_float = lfe_x96_float_c; |
469 | 10.3k | s->sub_qmf_float[0] = sub_qmf32_float_c; |
470 | 10.3k | s->sub_qmf_float[1] = sub_qmf64_float_c; |
471 | | |
472 | 10.3k | s->lfe_fir_fixed = lfe_fir_fixed_c; |
473 | 10.3k | s->lfe_x96_fixed = lfe_x96_fixed_c; |
474 | 10.3k | s->sub_qmf_fixed[0] = sub_qmf32_fixed_c; |
475 | 10.3k | s->sub_qmf_fixed[1] = sub_qmf64_fixed_c; |
476 | | |
477 | 10.3k | s->decor = decor_c; |
478 | | |
479 | 10.3k | s->dmix_sub_xch = dmix_sub_xch_c; |
480 | 10.3k | s->dmix_sub = dmix_sub_c; |
481 | 10.3k | s->dmix_add = dmix_add_c; |
482 | 10.3k | s->dmix_scale = dmix_scale_c; |
483 | 10.3k | s->dmix_scale_inv = dmix_scale_inv_c; |
484 | | |
485 | 10.3k | s->assemble_freq_bands = assemble_freq_bands_c; |
486 | | |
487 | 10.3k | s->lbr_bank = lbr_bank_c; |
488 | 10.3k | s->lfe_iir = lfe_iir_c; |
489 | | |
490 | 10.3k | #if ARCH_X86 |
491 | 10.3k | ff_dcadsp_init_x86(s); |
492 | 10.3k | #endif |
493 | 10.3k | } |