/src/ffmpeg/libavcodec/dcadsp.c

Source (jump to first uncovered line)
/*
 * Copyright (C) 2016 foo86
 *
 * This file is part of FFmpeg.
 *
 * FFmpeg is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2.1 of the License, or (at your option) any later version.
 *
 * FFmpeg is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with FFmpeg; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
 */

#include "libavutil/mem_internal.h"

#include "dcadsp.h"
#include "dcamath.h"

static void decode_hf_c(int32_t **dst,
                        const int32_t *vq_index,
                        const int8_t hf_vq[1024][32],
                        int32_t scale_factors[32][2],
                        ptrdiff_t sb_start, ptrdiff_t sb_end,
                        ptrdiff_t ofs, ptrdiff_t len)
{
    int i, j;

    for (i = sb_start; i < sb_end; i++) {
        const int8_t *coeff = hf_vq[vq_index[i]];
        int32_t scale = scale_factors[i][0];
        for (j = 0; j < len; j++)
            dst[i][j + ofs] = clip23(coeff[j] * scale + (1 << 3) >> 4);
    }
}

static void decode_joint_c(int32_t **dst, int32_t **src,
                           const int32_t *scale_factors,
                           ptrdiff_t sb_start, ptrdiff_t sb_end,
                           ptrdiff_t ofs, ptrdiff_t len)
{
    int i, j;

    for (i = sb_start; i < sb_end; i++) {
        int32_t scale = scale_factors[i];
        for (j = 0; j < len; j++)
            dst[i][j + ofs] = clip23(mul17(src[i][j + ofs], scale));
    }
}

static void lfe_fir_float_c(float *pcm_samples, int32_t *lfe_samples,
                            const float *filter_coeff, ptrdiff_t npcmblocks,
                            int dec_select)
{
    // Select decimation factor
    int factor = 64 << dec_select;
    int ncoeffs = 8 >> dec_select;
    int nlfesamples = npcmblocks >> (dec_select + 1);
    int i, j, k;

    for (i = 0; i < nlfesamples; i++) {
        // One decimated sample generates 64 or 128 interpolated ones
        for (j = 0; j < factor / 2; j++) {
            float a = 0;
            float b = 0;

            for (k = 0; k < ncoeffs; k++) {
                a += filter_coeff[      j * ncoeffs + k] * lfe_samples[-k];
                b += filter_coeff[255 - j * ncoeffs - k] * lfe_samples[-k];
            }

            pcm_samples[             j] = a;
            pcm_samples[factor / 2 + j] = b;
        }

        lfe_samples++;
        pcm_samples += factor;
    }
}

static void lfe_fir0_float_c(float *pcm_samples, int32_t *lfe_samples,
                             const float *filter_coeff, ptrdiff_t npcmblocks)
{
    lfe_fir_float_c(pcm_samples, lfe_samples, filter_coeff, npcmblocks, 0);
}

static void lfe_fir1_float_c(float *pcm_samples, int32_t *lfe_samples,
                             const float *filter_coeff, ptrdiff_t npcmblocks)
{
    lfe_fir_float_c(pcm_samples, lfe_samples, filter_coeff, npcmblocks, 1);
}

static void lfe_x96_float_c(float *dst, const float *src,
                            float *hist, ptrdiff_t len)
{
    float prev = *hist;
    int i;

    for (i = 0; i < len; i++) {
        float a = 0.25f * src[i] + 0.75f * prev;
        float b = 0.75f * src[i] + 0.25f * prev;
        prev = src[i];
        *dst++ = a;
        *dst++ = b;
    }

    *hist = prev;
}

static void sub_qmf32_float_c(SynthFilterContext *synth,
                              AVTXContext *imdct,
                              av_tx_fn imdct_fn,
                              float *pcm_samples,
                              int32_t **subband_samples_lo,
                              int32_t **subband_samples_hi,
                              float *hist1, int *offset, float *hist2,
                              const float *filter_coeff, ptrdiff_t npcmblocks,
                              float scale)
{
    LOCAL_ALIGNED_32(float, input, [32]);
    int i, j;

    for (j = 0; j < npcmblocks; j++) {
        // Load in one sample from each subband
        for (i = 0; i < 32; i++) {
            if ((i - 1) & 2)
                input[i] = -subband_samples_lo[i][j];
            else
                input[i] =  subband_samples_lo[i][j];
        }

        // One subband sample generates 32 interpolated ones
        synth->synth_filter_float(imdct, hist1, offset,
                                  hist2, filter_coeff,
                                  pcm_samples, input, scale, imdct_fn);
        pcm_samples += 32;
    }
}

static void sub_qmf64_float_c(SynthFilterContext *synth,
                              AVTXContext *imdct,
                              av_tx_fn imdct_fn,
                              float *pcm_samples,
                              int32_t **subband_samples_lo,
                              int32_t **subband_samples_hi,
                              float *hist1, int *offset, float *hist2,
                              const float *filter_coeff, ptrdiff_t npcmblocks,
                              float scale)
{
    LOCAL_ALIGNED_32(float, input, [64]);
    int i, j;

    if (!subband_samples_hi)
        memset(&input[32], 0, sizeof(input[0]) * 32);

    for (j = 0; j < npcmblocks; j++) {
        // Load in one sample from each subband
        if (subband_samples_hi) {
            // Full 64 subbands, first 32 are residual coded
            for (i =  0; i < 32; i++) {
                if ((i - 1) & 2)
                    input[i] = -subband_samples_lo[i][j] - subband_samples_hi[i][j];
                else
                    input[i] =  subband_samples_lo[i][j] + subband_samples_hi[i][j];
            }
            for (i = 32; i < 64; i++) {
                if ((i - 1) & 2)
                    input[i] = -subband_samples_hi[i][j];
                else
                    input[i] =  subband_samples_hi[i][j];
            }
        } else {
            // Only first 32 subbands
            for (i =  0; i < 32; i++) {
                if ((i - 1) & 2)
                    input[i] = -subband_samples_lo[i][j];
                else
                    input[i] =  subband_samples_lo[i][j];
            }
        }

        // One subband sample generates 64 interpolated ones
        synth->synth_filter_float_64(imdct, hist1, offset,
                                     hist2, filter_coeff,
                                     pcm_samples, input, scale, imdct_fn);
        pcm_samples += 64;
    }
}

static void lfe_fir_fixed_c(int32_t *pcm_samples, int32_t *lfe_samples,
                            const int32_t *filter_coeff, ptrdiff_t npcmblocks)
{
    // Select decimation factor
    int nlfesamples = npcmblocks >> 1;
    int i, j, k;

    for (i = 0; i < nlfesamples; i++) {
        // One decimated sample generates 64 interpolated ones
        for (j = 0; j < 32; j++) {
            int64_t a = 0;
            int64_t b = 0;

            for (k = 0; k < 8; k++) {
                a += (int64_t)filter_coeff[      j * 8 + k] * lfe_samples[-k];
                b += (int64_t)filter_coeff[255 - j * 8 - k] * lfe_samples[-k];
            }

            pcm_samples[     j] = clip23(norm23(a));
            pcm_samples[32 + j] = clip23(norm23(b));
        }

        lfe_samples++;
        pcm_samples += 64;
    }
}

static void lfe_x96_fixed_c(int32_t *dst, const int32_t *src,
                            int32_t *hist, ptrdiff_t len)
{
    int32_t prev = *hist;
    int i;

    for (i = 0; i < len; i++) {
        int64_t a = INT64_C(2097471) * src[i] + INT64_C(6291137) * prev;
        int64_t b = INT64_C(6291137) * src[i] + INT64_C(2097471) * prev;
        prev = src[i];
        *dst++ = clip23(norm23(a));
        *dst++ = clip23(norm23(b));
    }

    *hist = prev;
}

static void sub_qmf32_fixed_c(SynthFilterContext *synth,
                              DCADCTContext *imdct,
                              int32_t *pcm_samples,
                              int32_t **subband_samples_lo,
                              int32_t **subband_samples_hi,
                              int32_t *hist1, int *offset, int32_t *hist2,
                              const int32_t *filter_coeff, ptrdiff_t npcmblocks)
{
    LOCAL_ALIGNED_32(int32_t, input, [32]);
    int i, j;

    for (j = 0; j < npcmblocks; j++) {
        // Load in one sample from each subband
        for (i = 0; i < 32; i++)
            input[i] = subband_samples_lo[i][j];

        // One subband sample generates 32 interpolated ones
        synth->synth_filter_fixed(imdct, hist1, offset,
                                  hist2, filter_coeff,
                                  pcm_samples, input);
        pcm_samples += 32;
    }
}

static void sub_qmf64_fixed_c(SynthFilterContext *synth,
                              DCADCTContext *imdct,
                              int32_t *pcm_samples,
                              int32_t **subband_samples_lo,
                              int32_t **subband_samples_hi,
                              int32_t *hist1, int *offset, int32_t *hist2,
                              const int32_t *filter_coeff, ptrdiff_t npcmblocks)
{
    LOCAL_ALIGNED_32(int32_t, input, [64]);
    int i, j;

    if (!subband_samples_hi)
        memset(&input[32], 0, sizeof(input[0]) * 32);

    for (j = 0; j < npcmblocks; j++) {
        // Load in one sample from each subband
        if (subband_samples_hi) {
            // Full 64 subbands, first 32 are residual coded
            for (i =  0; i < 32; i++)
                input[i] = subband_samples_lo[i][j] + subband_samples_hi[i][j];
            for (i = 32; i < 64; i++)
                input[i] = subband_samples_hi[i][j];
        } else {
            // Only first 32 subbands
            for (i =  0; i < 32; i++)
                input[i] = subband_samples_lo[i][j];
        }

        // One subband sample generates 64 interpolated ones
        synth->synth_filter_fixed_64(imdct, hist1, offset,
                                     hist2, filter_coeff,
                                     pcm_samples, input);
        pcm_samples += 64;
    }
}

static void decor_c(int32_t *dst, const int32_t *src, int coeff, ptrdiff_t len)
{
    int i;

    for (i = 0; i < len; i++)
        dst[i] += (SUINT)((int)(src[i] * (SUINT)coeff + (1 << 2)) >> 3);
}

static void dmix_sub_xch_c(int32_t *dst1, int32_t *dst2,
                           const int32_t *src, ptrdiff_t len)
{
    int i;

    for (i = 0; i < len; i++) {
        int32_t cs = mul23(src[i], 5931520 /* M_SQRT1_2 * (1 << 23) */);
        dst1[i] -= cs;
        dst2[i] -= cs;
    }
}

static void dmix_sub_c(int32_t *dst, const int32_t *src, int coeff, ptrdiff_t len)
{
    int i;

    for (i = 0; i < len; i++)
        dst[i] -= (unsigned)mul15(src[i], coeff);
}

static void dmix_add_c(int32_t *dst, const int32_t *src, int coeff, ptrdiff_t len)
{
    int i;

    for (i = 0; i < len; i++)
        dst[i] += (unsigned)mul15(src[i], coeff);
}

static void dmix_scale_c(int32_t *dst, int scale, ptrdiff_t len)
{
    int i;

    for (i = 0; i < len; i++)
        dst[i] = mul15(dst[i], scale);
}

static void dmix_scale_inv_c(int32_t *dst, int scale_inv, ptrdiff_t len)
{
    int i;

    for (i = 0; i < len; i++)
        dst[i] = mul16(dst[i], scale_inv);
}

static void filter0(SUINT32 *dst, const int32_t *src, int32_t coeff, ptrdiff_t len)
{
    int i;

    for (i = 0; i < len; i++)
        dst[i] -= mul22(src[i], coeff);
}

static void filter1(SUINT32 *dst, const int32_t *src, int32_t coeff, ptrdiff_t len)
{
    int i;

    for (i = 0; i < len; i++)
        dst[i] -= mul23(src[i], coeff);
}

static void assemble_freq_bands_c(int32_t *dst, int32_t *src0, int32_t *src1,
                                  const int32_t *coeff, ptrdiff_t len)
{
    int i;

    filter0(src0, src1, coeff[0], len);
    filter0(src1, src0, coeff[1], len);
    filter0(src0, src1, coeff[2], len);
    filter0(src1, src0, coeff[3], len);

    for (i = 0; i < 8; i++, src0--) {
        filter1(src0, src1, coeff[i +  4], len);
        filter1(src1, src0, coeff[i + 12], len);
        filter1(src0, src1, coeff[i +  4], len);
    }

    for (i = 0; i < len; i++) {
        *dst++ = *src1++;
        *dst++ = *++src0;
    }
}

static void lbr_bank_c(float output[32][4], float **input,
                       const float *coeff, ptrdiff_t ofs, ptrdiff_t len)
{
    float SW0 = coeff[0];
    float SW1 = coeff[1];
    float SW2 = coeff[2];
    float SW3 = coeff[3];

    float C1  = coeff[4];
    float C2  = coeff[5];
    float C3  = coeff[6];
    float C4  = coeff[7];

    float AL1 = coeff[8];
    float AL2 = coeff[9];

    int i;

    // Short window and 8 point forward MDCT
    for (i = 0; i < len; i++) {
        float *src = input[i] + ofs;

        float a = src[-4] * SW0 - src[-1] * SW3;
        float b = src[-3] * SW1 - src[-2] * SW2;
        float c = src[ 2] * SW1 + src[ 1] * SW2;
        float d = src[ 3] * SW0 + src[ 0] * SW3;

        output[i][0] = C1 * b - C2 * c + C4 * a - C3 * d;
        output[i][1] = C1 * d - C2 * a - C4 * b - C3 * c;
        output[i][2] = C3 * b + C2 * d - C4 * c + C1 * a;
        output[i][3] = C3 * a - C2 * b + C4 * d - C1 * c;
    }

    // Aliasing cancellation for high frequencies
    for (i = 12; i < len - 1; i++) {
        float a = output[i  ][3] * AL1;
        float b = output[i+1][0] * AL1;
        output[i  ][3] += b - a;
        output[i+1][0] -= b + a;
        a = output[i  ][2] * AL2;
        b = output[i+1][1] * AL2;
        output[i  ][2] += b - a;
        output[i+1][1] -= b + a;
    }
}

static void lfe_iir_c(float *output, const float *input,
                      const float iir[5][4], float hist[5][2],
                      ptrdiff_t factor)
{
    float res, tmp;
    int i, j, k;

    for (i = 0; i < 64; i++) {
        res = *input++;

        for (j = 0; j < factor; j++) {
            for (k = 0; k < 5; k++) {
                tmp = hist[k][0] * iir[k][0] + hist[k][1] * iir[k][1] + res;
                res = hist[k][0] * iir[k][2] + hist[k][1] * iir[k][3] + tmp;

                hist[k][0] = hist[k][1];
                hist[k][1] = tmp;
            }

            *output++ = res;
            res = 0;
        }
    }
}

av_cold void ff_dcadsp_init(DCADSPContext *s)
{
    s->decode_hf     = decode_hf_c;
    s->decode_joint  = decode_joint_c;

    s->lfe_fir_float[0] = lfe_fir0_float_c;
    s->lfe_fir_float[1] = lfe_fir1_float_c;
    s->lfe_x96_float    = lfe_x96_float_c;
    s->sub_qmf_float[0] = sub_qmf32_float_c;
    s->sub_qmf_float[1] = sub_qmf64_float_c;

    s->lfe_fir_fixed    = lfe_fir_fixed_c;
    s->lfe_x96_fixed    = lfe_x96_fixed_c;
    s->sub_qmf_fixed[0] = sub_qmf32_fixed_c;
    s->sub_qmf_fixed[1] = sub_qmf64_fixed_c;

    s->decor   = decor_c;

    s->dmix_sub_xch   = dmix_sub_xch_c;
    s->dmix_sub       = dmix_sub_c;
    s->dmix_add       = dmix_add_c;
    s->dmix_scale     = dmix_scale_c;
    s->dmix_scale_inv = dmix_scale_inv_c;

    s->assemble_freq_bands = assemble_freq_bands_c;

    s->lbr_bank = lbr_bank_c;
    s->lfe_iir = lfe_iir_c;

#if ARCH_X86
    ff_dcadsp_init_x86(s);
#endif
}

Coverage Report

Created: 2025-08-28 07:12

Line	Count	Source (jump to first uncovered line)
1		/*
2		* Copyright (C) 2016 foo86
3		*
4		* This file is part of FFmpeg.
5		*
6		* FFmpeg is free software; you can redistribute it and/or
7		* modify it under the terms of the GNU Lesser General Public
8		* License as published by the Free Software Foundation; either
9		* version 2.1 of the License, or (at your option) any later version.
10		*
11		* FFmpeg is distributed in the hope that it will be useful,
12		* but WITHOUT ANY WARRANTY; without even the implied warranty of
13		* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14		* Lesser General Public License for more details.
15		*
16		* You should have received a copy of the GNU Lesser General Public
17		* License along with FFmpeg; if not, write to the Free Software
18		* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19		*/
20
21		#include "libavutil/mem_internal.h"
22
23		#include "dcadsp.h"
24		#include "dcamath.h"
25
26		static void decode_hf_c(int32_t **dst,
27		const int32_t *vq_index,
28		const int8_t hf_vq[1024][32],
29		int32_t scale_factors[32][2],
30		ptrdiff_t sb_start, ptrdiff_t sb_end,
31		ptrdiff_t ofs, ptrdiff_t len)
32	28.2k	{
33	28.2k	int i, j;
34
35	142k	for (i = sb_start; i < sb_end; i++) {
36	114k	const int8_t *coeff = hf_vq[vq_index[i]];
37	114k	int32_t scale = scale_factors[i][0];
38	1.98M	for (j = 0; j < len; j++)
39	1.87M	dst[i][j + ofs] = clip23(coeff[j] * scale + (1 << 3) >> 4);
40	114k	}
41	28.2k	}
42
43		static void decode_joint_c(int32_t dst, int32_t src,
44		const int32_t *scale_factors,
45		ptrdiff_t sb_start, ptrdiff_t sb_end,
46		ptrdiff_t ofs, ptrdiff_t len)
47	13.2k	{
48	13.2k	int i, j;
49
50	47.0k	for (i = sb_start; i < sb_end; i++) {
51	33.7k	int32_t scale = scale_factors[i];
52	453k	for (j = 0; j < len; j++)
53	419k	dst[i][j + ofs] = clip23(mul17(src[i][j + ofs], scale));
54	33.7k	}
55	13.2k	}
56
57		static void lfe_fir_float_c(float pcm_samples, int32_t lfe_samples,
58		const float *filter_coeff, ptrdiff_t npcmblocks,
59		int dec_select)
60	19.2k	{
61		// Select decimation factor
62	19.2k	int factor = 64 << dec_select;
63	19.2k	int ncoeffs = 8 >> dec_select;
64	19.2k	int nlfesamples = npcmblocks >> (dec_select + 1);
65	19.2k	int i, j, k;
66
67	1.06M	for (i = 0; i < nlfesamples; i++) {
68		// One decimated sample generates 64 or 128 interpolated ones
69	35.0M	for (j = 0; j < factor / 2; j++) {
70	34.0M	float a = 0;
71	34.0M	float b = 0;
72
73	302M	for (k = 0; k < ncoeffs; k++) {
74	268M	a += filter_coeff[ j * ncoeffs + k] * lfe_samples[-k];
75	268M	b += filter_coeff[255 - j * ncoeffs - k] * lfe_samples[-k];
76	268M	}
77
78	34.0M	pcm_samples[ j] = a;
79	34.0M	pcm_samples[factor / 2 + j] = b;
80	34.0M	}
81
82	1.04M	lfe_samples++;
83	1.04M	pcm_samples += factor;
84	1.04M	}
85	19.2k	}
86
87		static void lfe_fir0_float_c(float pcm_samples, int32_t lfe_samples,
88		const float *filter_coeff, ptrdiff_t npcmblocks)
89	16.7k	{
90	16.7k	lfe_fir_float_c(pcm_samples, lfe_samples, filter_coeff, npcmblocks, 0);
91	16.7k	}
92
93		static void lfe_fir1_float_c(float pcm_samples, int32_t lfe_samples,
94		const float *filter_coeff, ptrdiff_t npcmblocks)
95	2.47k	{
96	2.47k	lfe_fir_float_c(pcm_samples, lfe_samples, filter_coeff, npcmblocks, 1);
97	2.47k	}
98
99		static void lfe_x96_float_c(float dst, const float src,
100		float *hist, ptrdiff_t len)
101	0	{
102	0	float prev = *hist;
103	0	int i;
104
105	0	for (i = 0; i < len; i++) {
106	0	float a = 0.25f * src[i] + 0.75f * prev;
107	0	float b = 0.75f * src[i] + 0.25f * prev;
108	0	prev = src[i];
109	0	*dst++ = a;
110	0	*dst++ = b;
111	0	}
112
113	0	*hist = prev;
114	0	}
115
116		static void sub_qmf32_float_c(SynthFilterContext *synth,
117		AVTXContext *imdct,
118		av_tx_fn imdct_fn,
119		float *pcm_samples,
120		int32_t **subband_samples_lo,
121		int32_t **subband_samples_hi,
122		float hist1, int offset, float *hist2,
123		const float *filter_coeff, ptrdiff_t npcmblocks,
124		float scale)
125	36.6k	{
126	36.6k	LOCAL_ALIGNED_32(float, input, [32]);
127	36.6k	int i, j;
128
129	3.53M	for (j = 0; j < npcmblocks; j++) {
130		// Load in one sample from each subband
131	115M	for (i = 0; i < 32; i++) {
132	112M	if ((i - 1) & 2)
133	56.0M	input[i] = -subband_samples_lo[i][j];
134	56.0M	else
135	56.0M	input[i] = subband_samples_lo[i][j];
136	112M	}
137
138		// One subband sample generates 32 interpolated ones
139	3.50M	synth->synth_filter_float(imdct, hist1, offset,
140	3.50M	hist2, filter_coeff,
141	3.50M	pcm_samples, input, scale, imdct_fn);
142	3.50M	pcm_samples += 32;
143	3.50M	}
144	36.6k	}
145
146		static void sub_qmf64_float_c(SynthFilterContext *synth,
147		AVTXContext *imdct,
148		av_tx_fn imdct_fn,
149		float *pcm_samples,
150		int32_t **subband_samples_lo,
151		int32_t **subband_samples_hi,
152		float hist1, int offset, float *hist2,
153		const float *filter_coeff, ptrdiff_t npcmblocks,
154		float scale)
155	0	{
156	0	LOCAL_ALIGNED_32(float, input, [64]);
157	0	int i, j;
158
159	0	if (!subband_samples_hi)
160	0	memset(&input[32], 0, sizeof(input[0]) * 32);
161
162	0	for (j = 0; j < npcmblocks; j++) {
163		// Load in one sample from each subband
164	0	if (subband_samples_hi) {
165		// Full 64 subbands, first 32 are residual coded
166	0	for (i = 0; i < 32; i++) {
167	0	if ((i - 1) & 2)
168	0	input[i] = -subband_samples_lo[i][j] - subband_samples_hi[i][j];
169	0	else
170	0	input[i] = subband_samples_lo[i][j] + subband_samples_hi[i][j];
171	0	}
172	0	for (i = 32; i < 64; i++) {
173	0	if ((i - 1) & 2)
174	0	input[i] = -subband_samples_hi[i][j];
175	0	else
176	0	input[i] = subband_samples_hi[i][j];
177	0	}
178	0	} else {
179		// Only first 32 subbands
180	0	for (i = 0; i < 32; i++) {
181	0	if ((i - 1) & 2)
182	0	input[i] = -subband_samples_lo[i][j];
183	0	else
184	0	input[i] = subband_samples_lo[i][j];
185	0	}
186	0	}
187
188		// One subband sample generates 64 interpolated ones
189	0	synth->synth_filter_float_64(imdct, hist1, offset,
190	0	hist2, filter_coeff,
191	0	pcm_samples, input, scale, imdct_fn);
192	0	pcm_samples += 64;
193	0	}
194	0	}
195
196		static void lfe_fir_fixed_c(int32_t pcm_samples, int32_t lfe_samples,
197		const int32_t *filter_coeff, ptrdiff_t npcmblocks)
198	497	{
199		// Select decimation factor
200	497	int nlfesamples = npcmblocks >> 1;
201	497	int i, j, k;
202
203	18.7k	for (i = 0; i < nlfesamples; i++) {
204		// One decimated sample generates 64 interpolated ones
205	602k	for (j = 0; j < 32; j++) {
206	583k	int64_t a = 0;
207	583k	int64_t b = 0;
208
209	5.25M	for (k = 0; k < 8; k++) {
210	4.67M	a += (int64_t)filter_coeff[ j * 8 + k] * lfe_samples[-k];
211	4.67M	b += (int64_t)filter_coeff[255 - j * 8 - k] * lfe_samples[-k];
212	4.67M	}
213
214	583k	pcm_samples[ j] = clip23(norm23(a));
215	583k	pcm_samples[32 + j] = clip23(norm23(b));
216	583k	}
217
218	18.2k	lfe_samples++;
219	18.2k	pcm_samples += 64;
220	18.2k	}
221	497	}
222
223		static void lfe_x96_fixed_c(int32_t dst, const int32_t src,
224		int32_t *hist, ptrdiff_t len)
225	0	{
226	0	int32_t prev = *hist;
227	0	int i;
228
229	0	for (i = 0; i < len; i++) {
230	0	int64_t a = INT64_C(2097471) * src[i] + INT64_C(6291137) * prev;
231	0	int64_t b = INT64_C(6291137) * src[i] + INT64_C(2097471) * prev;
232	0	prev = src[i];
233	0	*dst++ = clip23(norm23(a));
234	0	*dst++ = clip23(norm23(b));
235	0	}
236
237	0	*hist = prev;
238	0	}
239
240		static void sub_qmf32_fixed_c(SynthFilterContext *synth,
241		DCADCTContext *imdct,
242		int32_t *pcm_samples,
243		int32_t **subband_samples_lo,
244		int32_t **subband_samples_hi,
245		int32_t hist1, int offset, int32_t *hist2,
246		const int32_t *filter_coeff, ptrdiff_t npcmblocks)
247	4.89k	{
248	4.89k	LOCAL_ALIGNED_32(int32_t, input, [32]);
249	4.89k	int i, j;
250
251	462k	for (j = 0; j < npcmblocks; j++) {
252		// Load in one sample from each subband
253	15.0M	for (i = 0; i < 32; i++)
254	14.6M	input[i] = subband_samples_lo[i][j];
255
256		// One subband sample generates 32 interpolated ones
257	457k	synth->synth_filter_fixed(imdct, hist1, offset,
258	457k	hist2, filter_coeff,
259	457k	pcm_samples, input);
260	457k	pcm_samples += 32;
261	457k	}
262	4.89k	}
263
264		static void sub_qmf64_fixed_c(SynthFilterContext *synth,
265		DCADCTContext *imdct,
266		int32_t *pcm_samples,
267		int32_t **subband_samples_lo,
268		int32_t **subband_samples_hi,
269		int32_t hist1, int offset, int32_t *hist2,
270		const int32_t *filter_coeff, ptrdiff_t npcmblocks)
271	0	{
272	0	LOCAL_ALIGNED_32(int32_t, input, [64]);
273	0	int i, j;
274
275	0	if (!subband_samples_hi)
276	0	memset(&input[32], 0, sizeof(input[0]) * 32);
277
278	0	for (j = 0; j < npcmblocks; j++) {
279		// Load in one sample from each subband
280	0	if (subband_samples_hi) {
281		// Full 64 subbands, first 32 are residual coded
282	0	for (i = 0; i < 32; i++)
283	0	input[i] = subband_samples_lo[i][j] + subband_samples_hi[i][j];
284	0	for (i = 32; i < 64; i++)
285	0	input[i] = subband_samples_hi[i][j];
286	0	} else {
287		// Only first 32 subbands
288	0	for (i = 0; i < 32; i++)
289	0	input[i] = subband_samples_lo[i][j];
290	0	}
291
292		// One subband sample generates 64 interpolated ones
293	0	synth->synth_filter_fixed_64(imdct, hist1, offset,
294	0	hist2, filter_coeff,
295	0	pcm_samples, input);
296	0	pcm_samples += 64;
297	0	}
298	0	}
299
300		static void decor_c(int32_t dst, const int32_t src, int coeff, ptrdiff_t len)
301	954	{
302	954	int i;
303
304	50.7k	for (i = 0; i < len; i++)
305	49.8k	dst[i] += (SUINT)((int)(src[i] * (SUINT)coeff + (1 << 2)) >> 3);
306	954	}
307
308		static void dmix_sub_xch_c(int32_t dst1, int32_t dst2,
309		const int32_t *src, ptrdiff_t len)
310	0	{
311	0	int i;
312
313	0	for (i = 0; i < len; i++) {
314	0	int32_t cs = mul23(src[i], 5931520 /* M_SQRT1_2 * (1 << 23) */);
315	0	dst1[i] -= cs;
316	0	dst2[i] -= cs;
317	0	}
318	0	}
319
320		static void dmix_sub_c(int32_t dst, const int32_t src, int coeff, ptrdiff_t len)
321	0	{
322	0	int i;
323
324	0	for (i = 0; i < len; i++)
325	0	dst[i] -= (unsigned)mul15(src[i], coeff);
326	0	}
327
328		static void dmix_add_c(int32_t dst, const int32_t src, int coeff, ptrdiff_t len)
329	0	{
330	0	int i;
331
332	0	for (i = 0; i < len; i++)
333	0	dst[i] += (unsigned)mul15(src[i], coeff);
334	0	}
335
336		static void dmix_scale_c(int32_t *dst, int scale, ptrdiff_t len)
337	48	{
338	48	int i;
339
340	24.6k	for (i = 0; i < len; i++)
341	24.5k	dst[i] = mul15(dst[i], scale);
342	48	}
343
344		static void dmix_scale_inv_c(int32_t *dst, int scale_inv, ptrdiff_t len)
345	0	{
346	0	int i;
347
348	0	for (i = 0; i < len; i++)
349	0	dst[i] = mul16(dst[i], scale_inv);
350	0	}
351
352		static void filter0(SUINT32 dst, const int32_t src, int32_t coeff, ptrdiff_t len)
353	42.7k	{
354	42.7k	int i;
355
356	2.21M	for (i = 0; i < len; i++)
357	2.17M	dst[i] -= mul22(src[i], coeff);
358	42.7k	}
359
360		static void filter1(SUINT32 dst, const int32_t src, int32_t coeff, ptrdiff_t len)
361	256k	{
362	256k	int i;
363
364	13.3M	for (i = 0; i < len; i++)
365	13.0M	dst[i] -= mul23(src[i], coeff);
366	256k	}
367
368		static void assemble_freq_bands_c(int32_t dst, int32_t src0, int32_t *src1,
369		const int32_t *coeff, ptrdiff_t len)
370	10.6k	{
371	10.6k	int i;
372
373	10.6k	filter0(src0, src1, coeff[0], len);
374	10.6k	filter0(src1, src0, coeff[1], len);
375	10.6k	filter0(src0, src1, coeff[2], len);
376	10.6k	filter0(src1, src0, coeff[3], len);
377
378	96.2k	for (i = 0; i < 8; i++, src0--) {
379	85.5k	filter1(src0, src1, coeff[i + 4], len);
380	85.5k	filter1(src1, src0, coeff[i + 12], len);
381	85.5k	filter1(src0, src1, coeff[i + 4], len);
382	85.5k	}
383
384	554k	for (i = 0; i < len; i++) {
385	544k	dst++ = src1++;
386	544k	dst++ = ++src0;
387	544k	}
388	10.6k	}
389
390		static void lbr_bank_c(float output[32][4], float **input,
391		const float *coeff, ptrdiff_t ofs, ptrdiff_t len)
392	2.86M	{
393	2.86M	float SW0 = coeff[0];
394	2.86M	float SW1 = coeff[1];
395	2.86M	float SW2 = coeff[2];
396	2.86M	float SW3 = coeff[3];
397
398	2.86M	float C1 = coeff[4];
399	2.86M	float C2 = coeff[5];
400	2.86M	float C3 = coeff[6];
401	2.86M	float C4 = coeff[7];
402
403	2.86M	float AL1 = coeff[8];
404	2.86M	float AL2 = coeff[9];
405
406	2.86M	int i;
407
408		// Short window and 8 point forward MDCT
409	51.0M	for (i = 0; i < len; i++) {
410	48.1M	float *src = input[i] + ofs;
411
412	48.1M	float a = src[-4] * SW0 - src[-1] * SW3;
413	48.1M	float b = src[-3] * SW1 - src[-2] * SW2;
414	48.1M	float c = src[ 2] * SW1 + src[ 1] * SW2;
415	48.1M	float d = src[ 3] * SW0 + src[ 0] * SW3;
416
417	48.1M	output[i][0] = C1 * b - C2 * c + C4 * a - C3 * d;
418	48.1M	output[i][1] = C1 * d - C2 * a - C4 * b - C3 * c;
419	48.1M	output[i][2] = C3 * b + C2 * d - C4 * c + C1 * a;
420	48.1M	output[i][3] = C3 * a - C2 * b + C4 * d - C1 * c;
421	48.1M	}
422
423		// Aliasing cancellation for high frequencies
424	18.6M	for (i = 12; i < len - 1; i++) {
425	15.7M	float a = output[i ][3] * AL1;
426	15.7M	float b = output[i+1][0] * AL1;
427	15.7M	output[i ][3] += b - a;
428	15.7M	output[i+1][0] -= b + a;
429	15.7M	a = output[i ][2] * AL2;
430	15.7M	b = output[i+1][1] * AL2;
431	15.7M	output[i ][2] += b - a;
432	15.7M	output[i+1][1] -= b + a;
433	15.7M	}
434	2.86M	}
435
436		static void lfe_iir_c(float output, const float input,
437		const float iir[5][4], float hist[5][2],
438		ptrdiff_t factor)
439	2.35k	{
440	2.35k	float res, tmp;
441	2.35k	int i, j, k;
442
443	153k	for (i = 0; i < 64; i++) {
444	150k	res = *input++;
445
446	9.79M	for (j = 0; j < factor; j++) {
447	57.8M	for (k = 0; k < 5; k++) {
448	48.2M	tmp = hist[k][0] * iir[k][0] + hist[k][1] * iir[k][1] + res;
449	48.2M	res = hist[k][0] * iir[k][2] + hist[k][1] * iir[k][3] + tmp;
450
451	48.2M	hist[k][0] = hist[k][1];
452	48.2M	hist[k][1] = tmp;
453	48.2M	}
454
455	9.64M	*output++ = res;
456	9.64M	res = 0;
457	9.64M	}
458	150k	}
459	2.35k	}
460
461		av_cold void ff_dcadsp_init(DCADSPContext *s)
462	10.3k	{
463	10.3k	s->decode_hf = decode_hf_c;
464	10.3k	s->decode_joint = decode_joint_c;
465
466	10.3k	s->lfe_fir_float[0] = lfe_fir0_float_c;
467	10.3k	s->lfe_fir_float[1] = lfe_fir1_float_c;
468	10.3k	s->lfe_x96_float = lfe_x96_float_c;
469	10.3k	s->sub_qmf_float[0] = sub_qmf32_float_c;
470	10.3k	s->sub_qmf_float[1] = sub_qmf64_float_c;
471
472	10.3k	s->lfe_fir_fixed = lfe_fir_fixed_c;
473	10.3k	s->lfe_x96_fixed = lfe_x96_fixed_c;
474	10.3k	s->sub_qmf_fixed[0] = sub_qmf32_fixed_c;
475	10.3k	s->sub_qmf_fixed[1] = sub_qmf64_fixed_c;
476
477	10.3k	s->decor = decor_c;
478
479	10.3k	s->dmix_sub_xch = dmix_sub_xch_c;
480	10.3k	s->dmix_sub = dmix_sub_c;
481	10.3k	s->dmix_add = dmix_add_c;
482	10.3k	s->dmix_scale = dmix_scale_c;
483	10.3k	s->dmix_scale_inv = dmix_scale_inv_c;
484
485	10.3k	s->assemble_freq_bands = assemble_freq_bands_c;
486
487	10.3k	s->lbr_bank = lbr_bank_c;
488	10.3k	s->lfe_iir = lfe_iir_c;
489
490	10.3k	#if ARCH_X86
491	10.3k	ff_dcadsp_init_x86(s);
492	10.3k	#endif
493	10.3k	}