Coverage Report

Created: 2025-08-28 07:12

/src/ffmpeg/libavcodec/dcadsp.c
Line
Count
Source (jump to first uncovered line)
1
/*
2
 * Copyright (C) 2016 foo86
3
 *
4
 * This file is part of FFmpeg.
5
 *
6
 * FFmpeg is free software; you can redistribute it and/or
7
 * modify it under the terms of the GNU Lesser General Public
8
 * License as published by the Free Software Foundation; either
9
 * version 2.1 of the License, or (at your option) any later version.
10
 *
11
 * FFmpeg is distributed in the hope that it will be useful,
12
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
 * Lesser General Public License for more details.
15
 *
16
 * You should have received a copy of the GNU Lesser General Public
17
 * License along with FFmpeg; if not, write to the Free Software
18
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19
 */
20
21
#include "libavutil/mem_internal.h"
22
23
#include "dcadsp.h"
24
#include "dcamath.h"
25
26
static void decode_hf_c(int32_t **dst,
27
                        const int32_t *vq_index,
28
                        const int8_t hf_vq[1024][32],
29
                        int32_t scale_factors[32][2],
30
                        ptrdiff_t sb_start, ptrdiff_t sb_end,
31
                        ptrdiff_t ofs, ptrdiff_t len)
32
28.2k
{
33
28.2k
    int i, j;
34
35
142k
    for (i = sb_start; i < sb_end; i++) {
36
114k
        const int8_t *coeff = hf_vq[vq_index[i]];
37
114k
        int32_t scale = scale_factors[i][0];
38
1.98M
        for (j = 0; j < len; j++)
39
1.87M
            dst[i][j + ofs] = clip23(coeff[j] * scale + (1 << 3) >> 4);
40
114k
    }
41
28.2k
}
42
43
static void decode_joint_c(int32_t **dst, int32_t **src,
44
                           const int32_t *scale_factors,
45
                           ptrdiff_t sb_start, ptrdiff_t sb_end,
46
                           ptrdiff_t ofs, ptrdiff_t len)
47
13.2k
{
48
13.2k
    int i, j;
49
50
47.0k
    for (i = sb_start; i < sb_end; i++) {
51
33.7k
        int32_t scale = scale_factors[i];
52
453k
        for (j = 0; j < len; j++)
53
419k
            dst[i][j + ofs] = clip23(mul17(src[i][j + ofs], scale));
54
33.7k
    }
55
13.2k
}
56
57
static void lfe_fir_float_c(float *pcm_samples, int32_t *lfe_samples,
58
                            const float *filter_coeff, ptrdiff_t npcmblocks,
59
                            int dec_select)
60
19.2k
{
61
    // Select decimation factor
62
19.2k
    int factor = 64 << dec_select;
63
19.2k
    int ncoeffs = 8 >> dec_select;
64
19.2k
    int nlfesamples = npcmblocks >> (dec_select + 1);
65
19.2k
    int i, j, k;
66
67
1.06M
    for (i = 0; i < nlfesamples; i++) {
68
        // One decimated sample generates 64 or 128 interpolated ones
69
35.0M
        for (j = 0; j < factor / 2; j++) {
70
34.0M
            float a = 0;
71
34.0M
            float b = 0;
72
73
302M
            for (k = 0; k < ncoeffs; k++) {
74
268M
                a += filter_coeff[      j * ncoeffs + k] * lfe_samples[-k];
75
268M
                b += filter_coeff[255 - j * ncoeffs - k] * lfe_samples[-k];
76
268M
            }
77
78
34.0M
            pcm_samples[             j] = a;
79
34.0M
            pcm_samples[factor / 2 + j] = b;
80
34.0M
        }
81
82
1.04M
        lfe_samples++;
83
1.04M
        pcm_samples += factor;
84
1.04M
    }
85
19.2k
}
86
87
static void lfe_fir0_float_c(float *pcm_samples, int32_t *lfe_samples,
88
                             const float *filter_coeff, ptrdiff_t npcmblocks)
89
16.7k
{
90
16.7k
    lfe_fir_float_c(pcm_samples, lfe_samples, filter_coeff, npcmblocks, 0);
91
16.7k
}
92
93
static void lfe_fir1_float_c(float *pcm_samples, int32_t *lfe_samples,
94
                             const float *filter_coeff, ptrdiff_t npcmblocks)
95
2.47k
{
96
2.47k
    lfe_fir_float_c(pcm_samples, lfe_samples, filter_coeff, npcmblocks, 1);
97
2.47k
}
98
99
static void lfe_x96_float_c(float *dst, const float *src,
100
                            float *hist, ptrdiff_t len)
101
0
{
102
0
    float prev = *hist;
103
0
    int i;
104
105
0
    for (i = 0; i < len; i++) {
106
0
        float a = 0.25f * src[i] + 0.75f * prev;
107
0
        float b = 0.75f * src[i] + 0.25f * prev;
108
0
        prev = src[i];
109
0
        *dst++ = a;
110
0
        *dst++ = b;
111
0
    }
112
113
0
    *hist = prev;
114
0
}
115
116
static void sub_qmf32_float_c(SynthFilterContext *synth,
117
                              AVTXContext *imdct,
118
                              av_tx_fn imdct_fn,
119
                              float *pcm_samples,
120
                              int32_t **subband_samples_lo,
121
                              int32_t **subband_samples_hi,
122
                              float *hist1, int *offset, float *hist2,
123
                              const float *filter_coeff, ptrdiff_t npcmblocks,
124
                              float scale)
125
36.6k
{
126
36.6k
    LOCAL_ALIGNED_32(float, input, [32]);
127
36.6k
    int i, j;
128
129
3.53M
    for (j = 0; j < npcmblocks; j++) {
130
        // Load in one sample from each subband
131
115M
        for (i = 0; i < 32; i++) {
132
112M
            if ((i - 1) & 2)
133
56.0M
                input[i] = -subband_samples_lo[i][j];
134
56.0M
            else
135
56.0M
                input[i] =  subband_samples_lo[i][j];
136
112M
        }
137
138
        // One subband sample generates 32 interpolated ones
139
3.50M
        synth->synth_filter_float(imdct, hist1, offset,
140
3.50M
                                  hist2, filter_coeff,
141
3.50M
                                  pcm_samples, input, scale, imdct_fn);
142
3.50M
        pcm_samples += 32;
143
3.50M
    }
144
36.6k
}
145
146
static void sub_qmf64_float_c(SynthFilterContext *synth,
147
                              AVTXContext *imdct,
148
                              av_tx_fn imdct_fn,
149
                              float *pcm_samples,
150
                              int32_t **subband_samples_lo,
151
                              int32_t **subband_samples_hi,
152
                              float *hist1, int *offset, float *hist2,
153
                              const float *filter_coeff, ptrdiff_t npcmblocks,
154
                              float scale)
155
0
{
156
0
    LOCAL_ALIGNED_32(float, input, [64]);
157
0
    int i, j;
158
159
0
    if (!subband_samples_hi)
160
0
        memset(&input[32], 0, sizeof(input[0]) * 32);
161
162
0
    for (j = 0; j < npcmblocks; j++) {
163
        // Load in one sample from each subband
164
0
        if (subband_samples_hi) {
165
            // Full 64 subbands, first 32 are residual coded
166
0
            for (i =  0; i < 32; i++) {
167
0
                if ((i - 1) & 2)
168
0
                    input[i] = -subband_samples_lo[i][j] - subband_samples_hi[i][j];
169
0
                else
170
0
                    input[i] =  subband_samples_lo[i][j] + subband_samples_hi[i][j];
171
0
            }
172
0
            for (i = 32; i < 64; i++) {
173
0
                if ((i - 1) & 2)
174
0
                    input[i] = -subband_samples_hi[i][j];
175
0
                else
176
0
                    input[i] =  subband_samples_hi[i][j];
177
0
            }
178
0
        } else {
179
            // Only first 32 subbands
180
0
            for (i =  0; i < 32; i++) {
181
0
                if ((i - 1) & 2)
182
0
                    input[i] = -subband_samples_lo[i][j];
183
0
                else
184
0
                    input[i] =  subband_samples_lo[i][j];
185
0
            }
186
0
        }
187
188
        // One subband sample generates 64 interpolated ones
189
0
        synth->synth_filter_float_64(imdct, hist1, offset,
190
0
                                     hist2, filter_coeff,
191
0
                                     pcm_samples, input, scale, imdct_fn);
192
0
        pcm_samples += 64;
193
0
    }
194
0
}
195
196
static void lfe_fir_fixed_c(int32_t *pcm_samples, int32_t *lfe_samples,
197
                            const int32_t *filter_coeff, ptrdiff_t npcmblocks)
198
497
{
199
    // Select decimation factor
200
497
    int nlfesamples = npcmblocks >> 1;
201
497
    int i, j, k;
202
203
18.7k
    for (i = 0; i < nlfesamples; i++) {
204
        // One decimated sample generates 64 interpolated ones
205
602k
        for (j = 0; j < 32; j++) {
206
583k
            int64_t a = 0;
207
583k
            int64_t b = 0;
208
209
5.25M
            for (k = 0; k < 8; k++) {
210
4.67M
                a += (int64_t)filter_coeff[      j * 8 + k] * lfe_samples[-k];
211
4.67M
                b += (int64_t)filter_coeff[255 - j * 8 - k] * lfe_samples[-k];
212
4.67M
            }
213
214
583k
            pcm_samples[     j] = clip23(norm23(a));
215
583k
            pcm_samples[32 + j] = clip23(norm23(b));
216
583k
        }
217
218
18.2k
        lfe_samples++;
219
18.2k
        pcm_samples += 64;
220
18.2k
    }
221
497
}
222
223
static void lfe_x96_fixed_c(int32_t *dst, const int32_t *src,
224
                            int32_t *hist, ptrdiff_t len)
225
0
{
226
0
    int32_t prev = *hist;
227
0
    int i;
228
229
0
    for (i = 0; i < len; i++) {
230
0
        int64_t a = INT64_C(2097471) * src[i] + INT64_C(6291137) * prev;
231
0
        int64_t b = INT64_C(6291137) * src[i] + INT64_C(2097471) * prev;
232
0
        prev = src[i];
233
0
        *dst++ = clip23(norm23(a));
234
0
        *dst++ = clip23(norm23(b));
235
0
    }
236
237
0
    *hist = prev;
238
0
}
239
240
static void sub_qmf32_fixed_c(SynthFilterContext *synth,
241
                              DCADCTContext *imdct,
242
                              int32_t *pcm_samples,
243
                              int32_t **subband_samples_lo,
244
                              int32_t **subband_samples_hi,
245
                              int32_t *hist1, int *offset, int32_t *hist2,
246
                              const int32_t *filter_coeff, ptrdiff_t npcmblocks)
247
4.89k
{
248
4.89k
    LOCAL_ALIGNED_32(int32_t, input, [32]);
249
4.89k
    int i, j;
250
251
462k
    for (j = 0; j < npcmblocks; j++) {
252
        // Load in one sample from each subband
253
15.0M
        for (i = 0; i < 32; i++)
254
14.6M
            input[i] = subband_samples_lo[i][j];
255
256
        // One subband sample generates 32 interpolated ones
257
457k
        synth->synth_filter_fixed(imdct, hist1, offset,
258
457k
                                  hist2, filter_coeff,
259
457k
                                  pcm_samples, input);
260
457k
        pcm_samples += 32;
261
457k
    }
262
4.89k
}
263
264
static void sub_qmf64_fixed_c(SynthFilterContext *synth,
265
                              DCADCTContext *imdct,
266
                              int32_t *pcm_samples,
267
                              int32_t **subband_samples_lo,
268
                              int32_t **subband_samples_hi,
269
                              int32_t *hist1, int *offset, int32_t *hist2,
270
                              const int32_t *filter_coeff, ptrdiff_t npcmblocks)
271
0
{
272
0
    LOCAL_ALIGNED_32(int32_t, input, [64]);
273
0
    int i, j;
274
275
0
    if (!subband_samples_hi)
276
0
        memset(&input[32], 0, sizeof(input[0]) * 32);
277
278
0
    for (j = 0; j < npcmblocks; j++) {
279
        // Load in one sample from each subband
280
0
        if (subband_samples_hi) {
281
            // Full 64 subbands, first 32 are residual coded
282
0
            for (i =  0; i < 32; i++)
283
0
                input[i] = subband_samples_lo[i][j] + subband_samples_hi[i][j];
284
0
            for (i = 32; i < 64; i++)
285
0
                input[i] = subband_samples_hi[i][j];
286
0
        } else {
287
            // Only first 32 subbands
288
0
            for (i =  0; i < 32; i++)
289
0
                input[i] = subband_samples_lo[i][j];
290
0
        }
291
292
        // One subband sample generates 64 interpolated ones
293
0
        synth->synth_filter_fixed_64(imdct, hist1, offset,
294
0
                                     hist2, filter_coeff,
295
0
                                     pcm_samples, input);
296
0
        pcm_samples += 64;
297
0
    }
298
0
}
299
300
static void decor_c(int32_t *dst, const int32_t *src, int coeff, ptrdiff_t len)
301
954
{
302
954
    int i;
303
304
50.7k
    for (i = 0; i < len; i++)
305
49.8k
        dst[i] += (SUINT)((int)(src[i] * (SUINT)coeff + (1 << 2)) >> 3);
306
954
}
307
308
static void dmix_sub_xch_c(int32_t *dst1, int32_t *dst2,
309
                           const int32_t *src, ptrdiff_t len)
310
0
{
311
0
    int i;
312
313
0
    for (i = 0; i < len; i++) {
314
0
        int32_t cs = mul23(src[i], 5931520 /* M_SQRT1_2 * (1 << 23) */);
315
0
        dst1[i] -= cs;
316
0
        dst2[i] -= cs;
317
0
    }
318
0
}
319
320
static void dmix_sub_c(int32_t *dst, const int32_t *src, int coeff, ptrdiff_t len)
321
0
{
322
0
    int i;
323
324
0
    for (i = 0; i < len; i++)
325
0
        dst[i] -= (unsigned)mul15(src[i], coeff);
326
0
}
327
328
static void dmix_add_c(int32_t *dst, const int32_t *src, int coeff, ptrdiff_t len)
329
0
{
330
0
    int i;
331
332
0
    for (i = 0; i < len; i++)
333
0
        dst[i] += (unsigned)mul15(src[i], coeff);
334
0
}
335
336
static void dmix_scale_c(int32_t *dst, int scale, ptrdiff_t len)
337
48
{
338
48
    int i;
339
340
24.6k
    for (i = 0; i < len; i++)
341
24.5k
        dst[i] = mul15(dst[i], scale);
342
48
}
343
344
static void dmix_scale_inv_c(int32_t *dst, int scale_inv, ptrdiff_t len)
345
0
{
346
0
    int i;
347
348
0
    for (i = 0; i < len; i++)
349
0
        dst[i] = mul16(dst[i], scale_inv);
350
0
}
351
352
static void filter0(SUINT32 *dst, const int32_t *src, int32_t coeff, ptrdiff_t len)
353
42.7k
{
354
42.7k
    int i;
355
356
2.21M
    for (i = 0; i < len; i++)
357
2.17M
        dst[i] -= mul22(src[i], coeff);
358
42.7k
}
359
360
static void filter1(SUINT32 *dst, const int32_t *src, int32_t coeff, ptrdiff_t len)
361
256k
{
362
256k
    int i;
363
364
13.3M
    for (i = 0; i < len; i++)
365
13.0M
        dst[i] -= mul23(src[i], coeff);
366
256k
}
367
368
static void assemble_freq_bands_c(int32_t *dst, int32_t *src0, int32_t *src1,
369
                                  const int32_t *coeff, ptrdiff_t len)
370
10.6k
{
371
10.6k
    int i;
372
373
10.6k
    filter0(src0, src1, coeff[0], len);
374
10.6k
    filter0(src1, src0, coeff[1], len);
375
10.6k
    filter0(src0, src1, coeff[2], len);
376
10.6k
    filter0(src1, src0, coeff[3], len);
377
378
96.2k
    for (i = 0; i < 8; i++, src0--) {
379
85.5k
        filter1(src0, src1, coeff[i +  4], len);
380
85.5k
        filter1(src1, src0, coeff[i + 12], len);
381
85.5k
        filter1(src0, src1, coeff[i +  4], len);
382
85.5k
    }
383
384
554k
    for (i = 0; i < len; i++) {
385
544k
        *dst++ = *src1++;
386
544k
        *dst++ = *++src0;
387
544k
    }
388
10.6k
}
389
390
static void lbr_bank_c(float output[32][4], float **input,
391
                       const float *coeff, ptrdiff_t ofs, ptrdiff_t len)
392
2.86M
{
393
2.86M
    float SW0 = coeff[0];
394
2.86M
    float SW1 = coeff[1];
395
2.86M
    float SW2 = coeff[2];
396
2.86M
    float SW3 = coeff[3];
397
398
2.86M
    float C1  = coeff[4];
399
2.86M
    float C2  = coeff[5];
400
2.86M
    float C3  = coeff[6];
401
2.86M
    float C4  = coeff[7];
402
403
2.86M
    float AL1 = coeff[8];
404
2.86M
    float AL2 = coeff[9];
405
406
2.86M
    int i;
407
408
    // Short window and 8 point forward MDCT
409
51.0M
    for (i = 0; i < len; i++) {
410
48.1M
        float *src = input[i] + ofs;
411
412
48.1M
        float a = src[-4] * SW0 - src[-1] * SW3;
413
48.1M
        float b = src[-3] * SW1 - src[-2] * SW2;
414
48.1M
        float c = src[ 2] * SW1 + src[ 1] * SW2;
415
48.1M
        float d = src[ 3] * SW0 + src[ 0] * SW3;
416
417
48.1M
        output[i][0] = C1 * b - C2 * c + C4 * a - C3 * d;
418
48.1M
        output[i][1] = C1 * d - C2 * a - C4 * b - C3 * c;
419
48.1M
        output[i][2] = C3 * b + C2 * d - C4 * c + C1 * a;
420
48.1M
        output[i][3] = C3 * a - C2 * b + C4 * d - C1 * c;
421
48.1M
    }
422
423
    // Aliasing cancellation for high frequencies
424
18.6M
    for (i = 12; i < len - 1; i++) {
425
15.7M
        float a = output[i  ][3] * AL1;
426
15.7M
        float b = output[i+1][0] * AL1;
427
15.7M
        output[i  ][3] += b - a;
428
15.7M
        output[i+1][0] -= b + a;
429
15.7M
        a = output[i  ][2] * AL2;
430
15.7M
        b = output[i+1][1] * AL2;
431
15.7M
        output[i  ][2] += b - a;
432
15.7M
        output[i+1][1] -= b + a;
433
15.7M
    }
434
2.86M
}
435
436
static void lfe_iir_c(float *output, const float *input,
437
                      const float iir[5][4], float hist[5][2],
438
                      ptrdiff_t factor)
439
2.35k
{
440
2.35k
    float res, tmp;
441
2.35k
    int i, j, k;
442
443
153k
    for (i = 0; i < 64; i++) {
444
150k
        res = *input++;
445
446
9.79M
        for (j = 0; j < factor; j++) {
447
57.8M
            for (k = 0; k < 5; k++) {
448
48.2M
                tmp = hist[k][0] * iir[k][0] + hist[k][1] * iir[k][1] + res;
449
48.2M
                res = hist[k][0] * iir[k][2] + hist[k][1] * iir[k][3] + tmp;
450
451
48.2M
                hist[k][0] = hist[k][1];
452
48.2M
                hist[k][1] = tmp;
453
48.2M
            }
454
455
9.64M
            *output++ = res;
456
9.64M
            res = 0;
457
9.64M
        }
458
150k
    }
459
2.35k
}
460
461
av_cold void ff_dcadsp_init(DCADSPContext *s)
462
10.3k
{
463
10.3k
    s->decode_hf     = decode_hf_c;
464
10.3k
    s->decode_joint  = decode_joint_c;
465
466
10.3k
    s->lfe_fir_float[0] = lfe_fir0_float_c;
467
10.3k
    s->lfe_fir_float[1] = lfe_fir1_float_c;
468
10.3k
    s->lfe_x96_float    = lfe_x96_float_c;
469
10.3k
    s->sub_qmf_float[0] = sub_qmf32_float_c;
470
10.3k
    s->sub_qmf_float[1] = sub_qmf64_float_c;
471
472
10.3k
    s->lfe_fir_fixed    = lfe_fir_fixed_c;
473
10.3k
    s->lfe_x96_fixed    = lfe_x96_fixed_c;
474
10.3k
    s->sub_qmf_fixed[0] = sub_qmf32_fixed_c;
475
10.3k
    s->sub_qmf_fixed[1] = sub_qmf64_fixed_c;
476
477
10.3k
    s->decor   = decor_c;
478
479
10.3k
    s->dmix_sub_xch   = dmix_sub_xch_c;
480
10.3k
    s->dmix_sub       = dmix_sub_c;
481
10.3k
    s->dmix_add       = dmix_add_c;
482
10.3k
    s->dmix_scale     = dmix_scale_c;
483
10.3k
    s->dmix_scale_inv = dmix_scale_inv_c;
484
485
10.3k
    s->assemble_freq_bands = assemble_freq_bands_c;
486
487
10.3k
    s->lbr_bank = lbr_bank_c;
488
10.3k
    s->lfe_iir = lfe_iir_c;
489
490
10.3k
#if ARCH_X86
491
10.3k
    ff_dcadsp_init_x86(s);
492
10.3k
#endif
493
10.3k
}