Coverage Report

Created: 2025-07-23 07:59

/src/opus/celt/celt_lpc.c
Line
Count
Source (jump to first uncovered line)
1
/* Copyright (c) 2009-2010 Xiph.Org Foundation
2
   Written by Jean-Marc Valin */
3
/*
4
   Redistribution and use in source and binary forms, with or without
5
   modification, are permitted provided that the following conditions
6
   are met:
7
8
   - Redistributions of source code must retain the above copyright
9
   notice, this list of conditions and the following disclaimer.
10
11
   - Redistributions in binary form must reproduce the above copyright
12
   notice, this list of conditions and the following disclaimer in the
13
   documentation and/or other materials provided with the distribution.
14
15
   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
16
   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
17
   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
18
   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
19
   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
20
   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
21
   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
22
   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
23
   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
24
   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25
   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26
*/
27
28
#ifdef HAVE_CONFIG_H
29
#include "config.h"
30
#endif
31
32
#include "celt_lpc.h"
33
#include "stack_alloc.h"
34
#include "mathops.h"
35
#include "pitch.h"
36
37
void _celt_lpc(
38
      opus_val16       *_lpc, /* out: [0...p-1] LPC coefficients      */
39
const opus_val32 *ac,  /* in:  [0...p] autocorrelation values  */
40
int          p
41
)
42
331k
{
43
331k
   int i, j;
44
331k
   opus_val32 r;
45
331k
   opus_val32 error = ac[0];
46
#ifdef FIXED_POINT
47
   opus_val32 lpc[CELT_LPC_ORDER];
48
#else
49
   float *lpc = _lpc;
50
#endif
51
52
331k
   OPUS_CLEAR(lpc, p);
53
#ifdef FIXED_POINT
54
200k
   if (ac[0] != 0)
55
#else
56
130k
   if (ac[0] > 1e-10f)
57
126k
#endif
58
327k
   {
59
3.02M
      for (i = 0; i < p; i++) {
60
         /* Sum up this iteration's reflection coefficient */
61
2.75M
         opus_val32 rr = 0;
62
24.6M
         for (j = 0; j < i; j++)
63
21.8M
            rr += MULT32_32_Q31(lpc[j],ac[i - j]);
64
2.75M
         rr += SHR32(ac[i + 1],6);
65
2.75M
         r = -frac_div32(SHL32(rr,6), error);
66
         /*  Update LPC coefficients and total error */
67
2.75M
         lpc[i] = SHR32(r,6);
68
14.3M
         for (j = 0; j < (i+1)>>1; j++)
69
11.6M
         {
70
11.6M
            opus_val32 tmp1, tmp2;
71
11.6M
            tmp1 = lpc[j];
72
11.6M
            tmp2 = lpc[i-1-j];
73
11.6M
            lpc[j]     = tmp1 + MULT32_32_Q31(r,tmp2);
74
11.6M
            lpc[i-1-j] = tmp2 + MULT32_32_Q31(r,tmp1);
75
11.6M
         }
76
77
2.75M
         error = error - MULT32_32_Q31(MULT32_32_Q31(r,r),error);
78
         /* Bail out once we get 30 dB gain */
79
#ifdef FIXED_POINT
80
1.83M
         if (error<=SHR32(ac[0],10))
81
24.4k
            break;
82
#else
83
912k
         if (error<=.001f*ac[0])
84
24.3k
            break;
85
#endif
86
2.75M
      }
87
327k
   }
88
#ifdef FIXED_POINT
89
   {
90
      /* Convert the int32 lpcs to int16 and ensure there are no wrap-arounds.
91
         This reuses the logic in silk_LPC_fit() and silk_bwexpander_32(). Any bug
92
         fixes should also be applied there. */
93
      int iter, idx = 0;
94
      opus_val32 maxabs, absval, chirp_Q16, chirp_minus_one_Q16;
95
96
200k
      for (iter = 0; iter < 10; iter++) {
97
200k
         maxabs = 0;
98
2.49M
         for (i = 0; i < p; i++) {
99
2.29M
            absval = ABS32(lpc[i]);
100
2.29M
            if (absval > maxabs) {
101
214k
               maxabs = absval;
102
214k
               idx = i;
103
214k
            }
104
2.29M
         }
105
200k
         maxabs = PSHR32(maxabs, 13);  /* Q25->Q12 */
106
107
200k
         if (maxabs > 32767) {
108
0
            maxabs = MIN32(maxabs, 163838);
109
0
            chirp_Q16 = QCONST32(0.999, 16) - DIV32(SHL32(maxabs - 32767, 14),
110
0
                                                    SHR32(MULT32_32_32(maxabs, idx + 1), 2));
111
0
            chirp_minus_one_Q16 = chirp_Q16 - 65536;
112
113
            /* Apply bandwidth expansion. */
114
0
            for (i = 0; i < p - 1; i++) {
115
0
               lpc[i] = MULT32_32_Q16(chirp_Q16, lpc[i]);
116
0
               chirp_Q16 += PSHR32(MULT32_32_32(chirp_Q16, chirp_minus_one_Q16), 16);
117
0
            }
118
0
            lpc[p - 1] = MULT32_32_Q16(chirp_Q16, lpc[p - 1]);
119
200k
         } else {
120
200k
            break;
121
200k
         }
122
200k
      }
123
124
200k
      if (iter == 10) {
125
         /* If the coeffs still do not fit into the 16 bit range after 10 iterations,
126
            fall back to the A(z)=1 filter. */
127
0
         OPUS_CLEAR(lpc, p);
128
0
         _lpc[0] = 4096;  /* Q12 */
129
200k
      } else {
130
2.49M
         for (i = 0; i < p; i++) {
131
2.29M
            _lpc[i] = EXTRACT16(PSHR32(lpc[i], 13));  /* Q25->Q12 */
132
2.29M
         }
133
200k
      }
134
   }
135
#endif
136
331k
}
_celt_lpc
Line
Count
Source
42
200k
{
43
200k
   int i, j;
44
200k
   opus_val32 r;
45
200k
   opus_val32 error = ac[0];
46
200k
#ifdef FIXED_POINT
47
200k
   opus_val32 lpc[CELT_LPC_ORDER];
48
#else
49
   float *lpc = _lpc;
50
#endif
51
52
200k
   OPUS_CLEAR(lpc, p);
53
200k
#ifdef FIXED_POINT
54
200k
   if (ac[0] != 0)
55
#else
56
   if (ac[0] > 1e-10f)
57
#endif
58
200k
   {
59
2.01M
      for (i = 0; i < p; i++) {
60
         /* Sum up this iteration's reflection coefficient */
61
1.83M
         opus_val32 rr = 0;
62
17.2M
         for (j = 0; j < i; j++)
63
15.4M
            rr += MULT32_32_Q31(lpc[j],ac[i - j]);
64
1.83M
         rr += SHR32(ac[i + 1],6);
65
1.83M
         r = -frac_div32(SHL32(rr,6), error);
66
         /*  Update LPC coefficients and total error */
67
1.83M
         lpc[i] = SHR32(r,6);
68
10.0M
         for (j = 0; j < (i+1)>>1; j++)
69
8.16M
         {
70
8.16M
            opus_val32 tmp1, tmp2;
71
8.16M
            tmp1 = lpc[j];
72
8.16M
            tmp2 = lpc[i-1-j];
73
8.16M
            lpc[j]     = tmp1 + MULT32_32_Q31(r,tmp2);
74
8.16M
            lpc[i-1-j] = tmp2 + MULT32_32_Q31(r,tmp1);
75
8.16M
         }
76
77
1.83M
         error = error - MULT32_32_Q31(MULT32_32_Q31(r,r),error);
78
         /* Bail out once we get 30 dB gain */
79
1.83M
#ifdef FIXED_POINT
80
1.83M
         if (error<=SHR32(ac[0],10))
81
24.4k
            break;
82
#else
83
         if (error<=.001f*ac[0])
84
            break;
85
#endif
86
1.83M
      }
87
200k
   }
88
200k
#ifdef FIXED_POINT
89
200k
   {
90
      /* Convert the int32 lpcs to int16 and ensure there are no wrap-arounds.
91
         This reuses the logic in silk_LPC_fit() and silk_bwexpander_32(). Any bug
92
         fixes should also be applied there. */
93
200k
      int iter, idx = 0;
94
200k
      opus_val32 maxabs, absval, chirp_Q16, chirp_minus_one_Q16;
95
96
200k
      for (iter = 0; iter < 10; iter++) {
97
200k
         maxabs = 0;
98
2.49M
         for (i = 0; i < p; i++) {
99
2.29M
            absval = ABS32(lpc[i]);
100
2.29M
            if (absval > maxabs) {
101
214k
               maxabs = absval;
102
214k
               idx = i;
103
214k
            }
104
2.29M
         }
105
200k
         maxabs = PSHR32(maxabs, 13);  /* Q25->Q12 */
106
107
200k
         if (maxabs > 32767) {
108
0
            maxabs = MIN32(maxabs, 163838);
109
0
            chirp_Q16 = QCONST32(0.999, 16) - DIV32(SHL32(maxabs - 32767, 14),
110
0
                                                    SHR32(MULT32_32_32(maxabs, idx + 1), 2));
111
0
            chirp_minus_one_Q16 = chirp_Q16 - 65536;
112
113
            /* Apply bandwidth expansion. */
114
0
            for (i = 0; i < p - 1; i++) {
115
0
               lpc[i] = MULT32_32_Q16(chirp_Q16, lpc[i]);
116
0
               chirp_Q16 += PSHR32(MULT32_32_32(chirp_Q16, chirp_minus_one_Q16), 16);
117
0
            }
118
0
            lpc[p - 1] = MULT32_32_Q16(chirp_Q16, lpc[p - 1]);
119
200k
         } else {
120
200k
            break;
121
200k
         }
122
200k
      }
123
124
200k
      if (iter == 10) {
125
         /* If the coeffs still do not fit into the 16 bit range after 10 iterations,
126
            fall back to the A(z)=1 filter. */
127
0
         OPUS_CLEAR(lpc, p);
128
0
         _lpc[0] = 4096;  /* Q12 */
129
200k
      } else {
130
2.49M
         for (i = 0; i < p; i++) {
131
2.29M
            _lpc[i] = EXTRACT16(PSHR32(lpc[i], 13));  /* Q25->Q12 */
132
2.29M
         }
133
200k
      }
134
200k
   }
135
200k
#endif
136
200k
}
_celt_lpc
Line
Count
Source
42
130k
{
43
130k
   int i, j;
44
130k
   opus_val32 r;
45
130k
   opus_val32 error = ac[0];
46
#ifdef FIXED_POINT
47
   opus_val32 lpc[CELT_LPC_ORDER];
48
#else
49
130k
   float *lpc = _lpc;
50
130k
#endif
51
52
130k
   OPUS_CLEAR(lpc, p);
53
#ifdef FIXED_POINT
54
   if (ac[0] != 0)
55
#else
56
130k
   if (ac[0] > 1e-10f)
57
126k
#endif
58
126k
   {
59
1.01M
      for (i = 0; i < p; i++) {
60
         /* Sum up this iteration's reflection coefficient */
61
912k
         opus_val32 rr = 0;
62
7.38M
         for (j = 0; j < i; j++)
63
6.47M
            rr += MULT32_32_Q31(lpc[j],ac[i - j]);
64
912k
         rr += SHR32(ac[i + 1],6);
65
912k
         r = -frac_div32(SHL32(rr,6), error);
66
         /*  Update LPC coefficients and total error */
67
912k
         lpc[i] = SHR32(r,6);
68
4.37M
         for (j = 0; j < (i+1)>>1; j++)
69
3.46M
         {
70
3.46M
            opus_val32 tmp1, tmp2;
71
3.46M
            tmp1 = lpc[j];
72
3.46M
            tmp2 = lpc[i-1-j];
73
3.46M
            lpc[j]     = tmp1 + MULT32_32_Q31(r,tmp2);
74
3.46M
            lpc[i-1-j] = tmp2 + MULT32_32_Q31(r,tmp1);
75
3.46M
         }
76
77
912k
         error = error - MULT32_32_Q31(MULT32_32_Q31(r,r),error);
78
         /* Bail out once we get 30 dB gain */
79
#ifdef FIXED_POINT
80
         if (error<=SHR32(ac[0],10))
81
            break;
82
#else
83
912k
         if (error<=.001f*ac[0])
84
24.3k
            break;
85
912k
#endif
86
912k
      }
87
126k
   }
88
#ifdef FIXED_POINT
89
   {
90
      /* Convert the int32 lpcs to int16 and ensure there are no wrap-arounds.
91
         This reuses the logic in silk_LPC_fit() and silk_bwexpander_32(). Any bug
92
         fixes should also be applied there. */
93
      int iter, idx = 0;
94
      opus_val32 maxabs, absval, chirp_Q16, chirp_minus_one_Q16;
95
96
      for (iter = 0; iter < 10; iter++) {
97
         maxabs = 0;
98
         for (i = 0; i < p; i++) {
99
            absval = ABS32(lpc[i]);
100
            if (absval > maxabs) {
101
               maxabs = absval;
102
               idx = i;
103
            }
104
         }
105
         maxabs = PSHR32(maxabs, 13);  /* Q25->Q12 */
106
107
         if (maxabs > 32767) {
108
            maxabs = MIN32(maxabs, 163838);
109
            chirp_Q16 = QCONST32(0.999, 16) - DIV32(SHL32(maxabs - 32767, 14),
110
                                                    SHR32(MULT32_32_32(maxabs, idx + 1), 2));
111
            chirp_minus_one_Q16 = chirp_Q16 - 65536;
112
113
            /* Apply bandwidth expansion. */
114
            for (i = 0; i < p - 1; i++) {
115
               lpc[i] = MULT32_32_Q16(chirp_Q16, lpc[i]);
116
               chirp_Q16 += PSHR32(MULT32_32_32(chirp_Q16, chirp_minus_one_Q16), 16);
117
            }
118
            lpc[p - 1] = MULT32_32_Q16(chirp_Q16, lpc[p - 1]);
119
         } else {
120
            break;
121
         }
122
      }
123
124
      if (iter == 10) {
125
         /* If the coeffs still do not fit into the 16 bit range after 10 iterations,
126
            fall back to the A(z)=1 filter. */
127
         OPUS_CLEAR(lpc, p);
128
         _lpc[0] = 4096;  /* Q12 */
129
      } else {
130
         for (i = 0; i < p; i++) {
131
            _lpc[i] = EXTRACT16(PSHR32(lpc[i], 13));  /* Q25->Q12 */
132
         }
133
      }
134
   }
135
#endif
136
130k
}
137
138
139
void celt_fir_c(
140
         const opus_val16 *x,
141
         const opus_val16 *num,
142
         opus_val16 *y,
143
         int N,
144
         int ord,
145
         int arch)
146
78.2k
{
147
78.2k
   int i,j;
148
78.2k
   VARDECL(opus_val16, rnum);
149
78.2k
   SAVE_STACK;
150
78.2k
   celt_assert(x != y);
151
78.2k
   ALLOC(rnum, ord, opus_val16);
152
1.95M
   for(i=0;i<ord;i++)
153
1.87M
      rnum[i] = num[ord-i-1];
154
10.5M
   for (i=0;i<N-3;i+=4)
155
10.4M
   {
156
10.4M
      opus_val32 sum[4];
157
10.4M
      sum[0] = SHL32(EXTEND32(x[i  ]), SIG_SHIFT);
158
10.4M
      sum[1] = SHL32(EXTEND32(x[i+1]), SIG_SHIFT);
159
10.4M
      sum[2] = SHL32(EXTEND32(x[i+2]), SIG_SHIFT);
160
10.4M
      sum[3] = SHL32(EXTEND32(x[i+3]), SIG_SHIFT);
161
#if defined(OPUS_CHECK_ASM) && defined(FIXED_POINT)
162
      {
163
         opus_val32 sum_c[4];
164
         memcpy(sum_c, sum, sizeof(sum_c));
165
         xcorr_kernel_c(rnum, x+i-ord, sum_c, ord);
166
#endif
167
10.4M
         xcorr_kernel(rnum, x+i-ord, sum, ord, arch);
168
#if defined(OPUS_CHECK_ASM) && defined(FIXED_POINT)
169
0
         celt_assert(memcmp(sum, sum_c, sizeof(sum)) == 0);
170
0
      }
171
0
#endif
172
10.4M
      y[i  ] = SROUND16(sum[0], SIG_SHIFT);
173
10.4M
      y[i+1] = SROUND16(sum[1], SIG_SHIFT);
174
10.4M
      y[i+2] = SROUND16(sum[2], SIG_SHIFT);
175
10.4M
      y[i+3] = SROUND16(sum[3], SIG_SHIFT);
176
0
   }
177
118k
   for (;i<N;i++)
178
39.7k
   {
179
39.7k
      opus_val32 sum = SHL32(EXTEND32(x[i]), SIG_SHIFT);
180
994k
      for (j=0;j<ord;j++)
181
954k
         sum = MAC16_16(sum,rnum[j],x[i+j-ord]);
182
39.7k
      y[i] = SROUND16(sum, SIG_SHIFT);
183
39.7k
   }
184
0
   RESTORE_STACK;
185
0
}
Unexecuted instantiation: celt_fir_c
celt_fir_c
Line
Count
Source
146
78.2k
{
147
78.2k
   int i,j;
148
78.2k
   VARDECL(opus_val16, rnum);
149
78.2k
   SAVE_STACK;
150
78.2k
   celt_assert(x != y);
151
78.2k
   ALLOC(rnum, ord, opus_val16);
152
1.95M
   for(i=0;i<ord;i++)
153
1.87M
      rnum[i] = num[ord-i-1];
154
10.5M
   for (i=0;i<N-3;i+=4)
155
10.4M
   {
156
10.4M
      opus_val32 sum[4];
157
10.4M
      sum[0] = SHL32(EXTEND32(x[i  ]), SIG_SHIFT);
158
10.4M
      sum[1] = SHL32(EXTEND32(x[i+1]), SIG_SHIFT);
159
10.4M
      sum[2] = SHL32(EXTEND32(x[i+2]), SIG_SHIFT);
160
10.4M
      sum[3] = SHL32(EXTEND32(x[i+3]), SIG_SHIFT);
161
#if defined(OPUS_CHECK_ASM) && defined(FIXED_POINT)
162
      {
163
         opus_val32 sum_c[4];
164
         memcpy(sum_c, sum, sizeof(sum_c));
165
         xcorr_kernel_c(rnum, x+i-ord, sum_c, ord);
166
#endif
167
10.4M
         xcorr_kernel(rnum, x+i-ord, sum, ord, arch);
168
#if defined(OPUS_CHECK_ASM) && defined(FIXED_POINT)
169
         celt_assert(memcmp(sum, sum_c, sizeof(sum)) == 0);
170
      }
171
#endif
172
10.4M
      y[i  ] = SROUND16(sum[0], SIG_SHIFT);
173
10.4M
      y[i+1] = SROUND16(sum[1], SIG_SHIFT);
174
10.4M
      y[i+2] = SROUND16(sum[2], SIG_SHIFT);
175
10.4M
      y[i+3] = SROUND16(sum[3], SIG_SHIFT);
176
10.4M
   }
177
118k
   for (;i<N;i++)
178
39.7k
   {
179
39.7k
      opus_val32 sum = SHL32(EXTEND32(x[i]), SIG_SHIFT);
180
994k
      for (j=0;j<ord;j++)
181
954k
         sum = MAC16_16(sum,rnum[j],x[i+j-ord]);
182
39.7k
      y[i] = SROUND16(sum, SIG_SHIFT);
183
39.7k
   }
184
78.2k
   RESTORE_STACK;
185
78.2k
}
186
187
void celt_iir(const opus_val32 *_x,
188
         const opus_val16 *den,
189
         opus_val32 *_y,
190
         int N,
191
         int ord,
192
         opus_val16 *mem,
193
         int arch)
194
216k
{
195
#ifdef SMALL_FOOTPRINT
196
   int i,j;
197
   (void)arch;
198
   for (i=0;i<N;i++)
199
   {
200
      opus_val32 sum = _x[i];
201
      for (j=0;j<ord;j++)
202
      {
203
         sum -= MULT16_16(den[j],mem[j]);
204
      }
205
      for (j=ord-1;j>=1;j--)
206
      {
207
         mem[j]=mem[j-1];
208
      }
209
      mem[0] = SROUND16(sum, SIG_SHIFT);
210
      _y[i] = sum;
211
   }
212
#else
213
216k
   int i,j;
214
216k
   VARDECL(opus_val16, rden);
215
216k
   VARDECL(opus_val16, y);
216
216k
   SAVE_STACK;
217
218
216k
   celt_assert((ord&3)==0);
219
216k
   ALLOC(rden, ord, opus_val16);
220
216k
   ALLOC(y, N+ord, opus_val16);
221
5.40M
   for(i=0;i<ord;i++)
222
5.18M
      rden[i] = den[ord-i-1];
223
5.40M
   for(i=0;i<ord;i++)
224
5.18M
      y[i] = -mem[ord-i-1];
225
80.0M
   for(;i<N+ord;i++)
226
79.8M
      y[i]=0;
227
20.1M
   for (i=0;i<N-3;i+=4)
228
19.9M
   {
229
      /* Unroll by 4 as if it were an FIR filter */
230
19.9M
      opus_val32 sum[4];
231
19.9M
      sum[0]=_x[i];
232
19.9M
      sum[1]=_x[i+1];
233
19.9M
      sum[2]=_x[i+2];
234
19.9M
      sum[3]=_x[i+3];
235
#if defined(OPUS_CHECK_ASM) && defined(FIXED_POINT)
236
      {
237
         opus_val32 sum_c[4];
238
         memcpy(sum_c, sum, sizeof(sum_c));
239
         xcorr_kernel_c(rden, y+i, sum_c, ord);
240
#endif
241
19.9M
         xcorr_kernel(rden, y+i, sum, ord, arch);
242
#if defined(OPUS_CHECK_ASM) && defined(FIXED_POINT)
243
12.0M
         celt_assert(memcmp(sum, sum_c, sizeof(sum)) == 0);
244
12.0M
      }
245
0
#endif
246
      /* Patch up the result to compensate for the fact that this is an IIR */
247
19.9M
      y[i+ord  ] = -SROUND16(sum[0],SIG_SHIFT);
248
12.0M
      _y[i  ] = sum[0];
249
19.9M
      sum[1] = MAC16_16(sum[1], y[i+ord  ], den[0]);
250
19.9M
      y[i+ord+1] = -SROUND16(sum[1],SIG_SHIFT);
251
12.0M
      _y[i+1] = sum[1];
252
19.9M
      sum[2] = MAC16_16(sum[2], y[i+ord+1], den[0]);
253
19.9M
      sum[2] = MAC16_16(sum[2], y[i+ord  ], den[1]);
254
19.9M
      y[i+ord+2] = -SROUND16(sum[2],SIG_SHIFT);
255
12.0M
      _y[i+2] = sum[2];
256
257
19.9M
      sum[3] = MAC16_16(sum[3], y[i+ord+2], den[0]);
258
19.9M
      sum[3] = MAC16_16(sum[3], y[i+ord+1], den[1]);
259
19.9M
      sum[3] = MAC16_16(sum[3], y[i+ord  ], den[2]);
260
19.9M
      y[i+ord+3] = -SROUND16(sum[3],SIG_SHIFT);
261
12.0M
      _y[i+3] = sum[3];
262
12.0M
   }
263
216k
   for (;i<N;i++)
264
0
   {
265
0
      opus_val32 sum = _x[i];
266
0
      for (j=0;j<ord;j++)
267
0
         sum -= MULT16_16(rden[j],y[i+j]);
268
0
      y[i+ord] = SROUND16(sum,SIG_SHIFT);
269
0
      _y[i] = sum;
270
0
   }
271
5.40M
   for(i=0;i<ord;i++)
272
5.18M
      mem[i] = _y[N-i-1];
273
137k
   RESTORE_STACK;
274
137k
#endif
275
137k
}
celt_iir
Line
Count
Source
194
137k
{
195
#ifdef SMALL_FOOTPRINT
196
   int i,j;
197
   (void)arch;
198
   for (i=0;i<N;i++)
199
   {
200
      opus_val32 sum = _x[i];
201
      for (j=0;j<ord;j++)
202
      {
203
         sum -= MULT16_16(den[j],mem[j]);
204
      }
205
      for (j=ord-1;j>=1;j--)
206
      {
207
         mem[j]=mem[j-1];
208
      }
209
      mem[0] = SROUND16(sum, SIG_SHIFT);
210
      _y[i] = sum;
211
   }
212
#else
213
137k
   int i,j;
214
137k
   VARDECL(opus_val16, rden);
215
137k
   VARDECL(opus_val16, y);
216
137k
   SAVE_STACK;
217
218
137k
   celt_assert((ord&3)==0);
219
137k
   ALLOC(rden, ord, opus_val16);
220
137k
   ALLOC(y, N+ord, opus_val16);
221
3.44M
   for(i=0;i<ord;i++)
222
3.30M
      rden[i] = den[ord-i-1];
223
3.44M
   for(i=0;i<ord;i++)
224
3.30M
      y[i] = -mem[ord-i-1];
225
48.2M
   for(;i<N+ord;i++)
226
48.0M
      y[i]=0;
227
12.1M
   for (i=0;i<N-3;i+=4)
228
12.0M
   {
229
      /* Unroll by 4 as if it were an FIR filter */
230
12.0M
      opus_val32 sum[4];
231
12.0M
      sum[0]=_x[i];
232
12.0M
      sum[1]=_x[i+1];
233
12.0M
      sum[2]=_x[i+2];
234
12.0M
      sum[3]=_x[i+3];
235
12.0M
#if defined(OPUS_CHECK_ASM) && defined(FIXED_POINT)
236
12.0M
      {
237
12.0M
         opus_val32 sum_c[4];
238
12.0M
         memcpy(sum_c, sum, sizeof(sum_c));
239
12.0M
         xcorr_kernel_c(rden, y+i, sum_c, ord);
240
12.0M
#endif
241
12.0M
         xcorr_kernel(rden, y+i, sum, ord, arch);
242
12.0M
#if defined(OPUS_CHECK_ASM) && defined(FIXED_POINT)
243
12.0M
         celt_assert(memcmp(sum, sum_c, sizeof(sum)) == 0);
244
12.0M
      }
245
0
#endif
246
      /* Patch up the result to compensate for the fact that this is an IIR */
247
12.0M
      y[i+ord  ] = -SROUND16(sum[0],SIG_SHIFT);
248
12.0M
      _y[i  ] = sum[0];
249
12.0M
      sum[1] = MAC16_16(sum[1], y[i+ord  ], den[0]);
250
12.0M
      y[i+ord+1] = -SROUND16(sum[1],SIG_SHIFT);
251
12.0M
      _y[i+1] = sum[1];
252
12.0M
      sum[2] = MAC16_16(sum[2], y[i+ord+1], den[0]);
253
12.0M
      sum[2] = MAC16_16(sum[2], y[i+ord  ], den[1]);
254
12.0M
      y[i+ord+2] = -SROUND16(sum[2],SIG_SHIFT);
255
12.0M
      _y[i+2] = sum[2];
256
257
12.0M
      sum[3] = MAC16_16(sum[3], y[i+ord+2], den[0]);
258
12.0M
      sum[3] = MAC16_16(sum[3], y[i+ord+1], den[1]);
259
12.0M
      sum[3] = MAC16_16(sum[3], y[i+ord  ], den[2]);
260
12.0M
      y[i+ord+3] = -SROUND16(sum[3],SIG_SHIFT);
261
12.0M
      _y[i+3] = sum[3];
262
12.0M
   }
263
137k
   for (;i<N;i++)
264
0
   {
265
0
      opus_val32 sum = _x[i];
266
0
      for (j=0;j<ord;j++)
267
0
         sum -= MULT16_16(rden[j],y[i+j]);
268
0
      y[i+ord] = SROUND16(sum,SIG_SHIFT);
269
0
      _y[i] = sum;
270
0
   }
271
3.44M
   for(i=0;i<ord;i++)
272
3.30M
      mem[i] = _y[N-i-1];
273
137k
   RESTORE_STACK;
274
137k
#endif
275
137k
}
celt_iir
Line
Count
Source
194
78.2k
{
195
#ifdef SMALL_FOOTPRINT
196
   int i,j;
197
   (void)arch;
198
   for (i=0;i<N;i++)
199
   {
200
      opus_val32 sum = _x[i];
201
      for (j=0;j<ord;j++)
202
      {
203
         sum -= MULT16_16(den[j],mem[j]);
204
      }
205
      for (j=ord-1;j>=1;j--)
206
      {
207
         mem[j]=mem[j-1];
208
      }
209
      mem[0] = SROUND16(sum, SIG_SHIFT);
210
      _y[i] = sum;
211
   }
212
#else
213
78.2k
   int i,j;
214
78.2k
   VARDECL(opus_val16, rden);
215
78.2k
   VARDECL(opus_val16, y);
216
78.2k
   SAVE_STACK;
217
218
78.2k
   celt_assert((ord&3)==0);
219
78.2k
   ALLOC(rden, ord, opus_val16);
220
78.2k
   ALLOC(y, N+ord, opus_val16);
221
1.95M
   for(i=0;i<ord;i++)
222
1.87M
      rden[i] = den[ord-i-1];
223
1.95M
   for(i=0;i<ord;i++)
224
1.87M
      y[i] = -mem[ord-i-1];
225
31.8M
   for(;i<N+ord;i++)
226
31.7M
      y[i]=0;
227
8.01M
   for (i=0;i<N-3;i+=4)
228
7.94M
   {
229
      /* Unroll by 4 as if it were an FIR filter */
230
7.94M
      opus_val32 sum[4];
231
7.94M
      sum[0]=_x[i];
232
7.94M
      sum[1]=_x[i+1];
233
7.94M
      sum[2]=_x[i+2];
234
7.94M
      sum[3]=_x[i+3];
235
#if defined(OPUS_CHECK_ASM) && defined(FIXED_POINT)
236
      {
237
         opus_val32 sum_c[4];
238
         memcpy(sum_c, sum, sizeof(sum_c));
239
         xcorr_kernel_c(rden, y+i, sum_c, ord);
240
#endif
241
7.94M
         xcorr_kernel(rden, y+i, sum, ord, arch);
242
#if defined(OPUS_CHECK_ASM) && defined(FIXED_POINT)
243
         celt_assert(memcmp(sum, sum_c, sizeof(sum)) == 0);
244
      }
245
#endif
246
      /* Patch up the result to compensate for the fact that this is an IIR */
247
7.94M
      y[i+ord  ] = -SROUND16(sum[0],SIG_SHIFT);
248
7.94M
      _y[i  ] = sum[0];
249
7.94M
      sum[1] = MAC16_16(sum[1], y[i+ord  ], den[0]);
250
7.94M
      y[i+ord+1] = -SROUND16(sum[1],SIG_SHIFT);
251
7.94M
      _y[i+1] = sum[1];
252
7.94M
      sum[2] = MAC16_16(sum[2], y[i+ord+1], den[0]);
253
7.94M
      sum[2] = MAC16_16(sum[2], y[i+ord  ], den[1]);
254
7.94M
      y[i+ord+2] = -SROUND16(sum[2],SIG_SHIFT);
255
7.94M
      _y[i+2] = sum[2];
256
257
7.94M
      sum[3] = MAC16_16(sum[3], y[i+ord+2], den[0]);
258
7.94M
      sum[3] = MAC16_16(sum[3], y[i+ord+1], den[1]);
259
7.94M
      sum[3] = MAC16_16(sum[3], y[i+ord  ], den[2]);
260
7.94M
      y[i+ord+3] = -SROUND16(sum[3],SIG_SHIFT);
261
7.94M
      _y[i+3] = sum[3];
262
7.94M
   }
263
78.2k
   for (;i<N;i++)
264
0
   {
265
0
      opus_val32 sum = _x[i];
266
0
      for (j=0;j<ord;j++)
267
0
         sum -= MULT16_16(rden[j],y[i+j]);
268
0
      y[i+ord] = SROUND16(sum,SIG_SHIFT);
269
0
      _y[i] = sum;
270
0
   }
271
1.95M
   for(i=0;i<ord;i++)
272
1.87M
      mem[i] = _y[N-i-1];
273
78.2k
   RESTORE_STACK;
274
78.2k
#endif
275
78.2k
}
276
277
int _celt_autocorr(
278
                   const opus_val16 *x,   /*  in: [0...n-1] samples x   */
279
                   opus_val32       *ac,  /* out: [0...lag-1] ac values */
280
                   const celt_coef  *window,
281
                   int          overlap,
282
                   int          lag,
283
                   int          n,
284
                   int          arch
285
                  )
286
1.50M
{
287
1.50M
   opus_val32 d;
288
1.50M
   int i, k;
289
1.50M
   int fastN=n-lag;
290
1.50M
   int shift;
291
1.50M
   const opus_val16 *xptr;
292
1.50M
   VARDECL(opus_val16, xx);
293
1.50M
   SAVE_STACK;
294
1.50M
   ALLOC(xx, n, opus_val16);
295
1.50M
   celt_assert(n>0);
296
1.50M
   celt_assert(overlap>=0);
297
1.50M
   if (overlap == 0)
298
1.30M
   {
299
1.30M
      xptr = x;
300
1.30M
   } else {
301
218M
      for (i=0;i<n;i++)
302
217M
         xx[i] = x[i];
303
25.7M
      for (i=0;i<overlap;i++)
304
25.5M
      {
305
25.5M
         opus_val16 w = COEF2VAL16(window[i]);
306
25.5M
         xx[i] = MULT16_16_Q15(x[i],w);
307
25.5M
         xx[n-i-1] = MULT16_16_Q15(x[n-i-1],w);
308
25.5M
      }
309
194k
      xptr = xx;
310
194k
   }
311
1.50M
   shift=0;
312
#ifdef FIXED_POINT
313
   {
314
      opus_val32 ac0;
315
      int ac0_shift = celt_ilog2(n + (n>>4));
316
      ac0 = 1+(n<<7);
317
1.37M
      if (n&1) ac0 += SHR32(MULT16_16(xptr[0],xptr[0]),ac0_shift);
318
277M
      for(i=(n&1);i<n;i+=2)
319
276M
      {
320
276M
         ac0 += SHR32(MULT16_16(xptr[i],xptr[i]),ac0_shift);
321
276M
         ac0 += SHR32(MULT16_16(xptr[i+1],xptr[i+1]),ac0_shift);
322
276M
      }
323
      /* Consider the effect of rounding-to-nearest when scaling the signal. */
324
1.37M
      ac0 += SHR32(ac0,7);
325
326
      shift = celt_ilog2(ac0)-30+ac0_shift+1;
327
      shift = (shift)/2;
328
1.37M
      if (shift>0)
329
327k
      {
330
72.7M
         for(i=0;i<n;i++)
331
72.4M
            xx[i] = PSHR32(xptr[i], shift);
332
327k
         xptr = xx;
333
327k
      } else
334
1.04M
         shift = 0;
335
   }
336
#endif
337
1.50M
   celt_pitch_xcorr(xptr, xptr, ac, fastN, lag+1, arch);
338
19.8M
   for (k=0;k<=lag;k++)
339
18.3M
   {
340
149M
      for (i = k+fastN, d = 0; i < n; i++)
341
131M
         d = MAC16_16(d, xptr[i], xptr[i-k]);
342
18.3M
      ac[k] += d;
343
18.3M
   }
344
#ifdef FIXED_POINT
345
   shift = 2*shift;
346
1.37M
   if (shift<=0)
347
1.04M
      ac[0] += SHL32((opus_int32)1, -shift);
348
1.37M
   if (ac[0] < 268435456)
349
872k
   {
350
872k
      int shift2 = 29 - EC_ILOG(ac[0]);
351
11.5M
      for (i=0;i<=lag;i++)
352
10.6M
         ac[i] = SHL32(ac[i], shift2);
353
872k
      shift -= shift2;
354
872k
   } else if (ac[0] >= 536870912)
355
444k
   {
356
444k
      int shift2=1;
357
444k
      if (ac[0] >= 1073741824)
358
210k
         shift2++;
359
5.98M
      for (i=0;i<=lag;i++)
360
5.53M
         ac[i] = SHR32(ac[i], shift2);
361
444k
      shift += shift2;
362
444k
   }
363
#endif
364
365
1.50M
   RESTORE_STACK;
366
1.50M
   return shift;
367
1.50M
}
_celt_autocorr
Line
Count
Source
286
685k
{
287
685k
   opus_val32 d;
288
685k
   int i, k;
289
685k
   int fastN=n-lag;
290
685k
   int shift;
291
685k
   const opus_val16 *xptr;
292
685k
   VARDECL(opus_val16, xx);
293
685k
   SAVE_STACK;
294
685k
   ALLOC(xx, n, opus_val16);
295
685k
   celt_assert(n>0);
296
685k
   celt_assert(overlap>=0);
297
685k
   if (overlap == 0)
298
611k
   {
299
611k
      xptr = x;
300
611k
   } else {
301
83.4M
      for (i=0;i<n;i++)
302
83.3M
         xx[i] = x[i];
303
9.84M
      for (i=0;i<overlap;i++)
304
9.76M
      {
305
9.76M
         opus_val16 w = COEF2VAL16(window[i]);
306
9.76M
         xx[i] = MULT16_16_Q15(x[i],w);
307
9.76M
         xx[n-i-1] = MULT16_16_Q15(x[n-i-1],w);
308
9.76M
      }
309
74.3k
      xptr = xx;
310
74.3k
   }
311
685k
   shift=0;
312
685k
#ifdef FIXED_POINT
313
685k
   {
314
685k
      opus_val32 ac0;
315
685k
      int ac0_shift = celt_ilog2(n + (n>>4));
316
685k
      ac0 = 1+(n<<7);
317
685k
      if (n&1) ac0 += SHR32(MULT16_16(xptr[0],xptr[0]),ac0_shift);
318
138M
      for(i=(n&1);i<n;i+=2)
319
138M
      {
320
138M
         ac0 += SHR32(MULT16_16(xptr[i],xptr[i]),ac0_shift);
321
138M
         ac0 += SHR32(MULT16_16(xptr[i+1],xptr[i+1]),ac0_shift);
322
138M
      }
323
      /* Consider the effect of rounding-to-nearest when scaling the signal. */
324
685k
      ac0 += SHR32(ac0,7);
325
326
685k
      shift = celt_ilog2(ac0)-30+ac0_shift+1;
327
685k
      shift = (shift)/2;
328
685k
      if (shift>0)
329
163k
      {
330
36.3M
         for(i=0;i<n;i++)
331
36.2M
            xx[i] = PSHR32(xptr[i], shift);
332
163k
         xptr = xx;
333
163k
      } else
334
521k
         shift = 0;
335
685k
   }
336
685k
#endif
337
685k
   celt_pitch_xcorr(xptr, xptr, ac, fastN, lag+1, arch);
338
9.09M
   for (k=0;k<=lag;k++)
339
8.41M
   {
340
66.8M
      for (i = k+fastN, d = 0; i < n; i++)
341
58.4M
         d = MAC16_16(d, xptr[i], xptr[i-k]);
342
8.41M
      ac[k] += d;
343
8.41M
   }
344
685k
#ifdef FIXED_POINT
345
685k
   shift = 2*shift;
346
685k
   if (shift<=0)
347
521k
      ac[0] += SHL32((opus_int32)1, -shift);
348
685k
   if (ac[0] < 268435456)
349
436k
   {
350
436k
      int shift2 = 29 - EC_ILOG(ac[0]);
351
5.75M
      for (i=0;i<=lag;i++)
352
5.32M
         ac[i] = SHL32(ac[i], shift2);
353
436k
      shift -= shift2;
354
436k
   } else if (ac[0] >= 536870912)
355
222k
   {
356
222k
      int shift2=1;
357
222k
      if (ac[0] >= 1073741824)
358
105k
         shift2++;
359
2.99M
      for (i=0;i<=lag;i++)
360
2.76M
         ac[i] = SHR32(ac[i], shift2);
361
222k
      shift += shift2;
362
222k
   }
363
685k
#endif
364
365
685k
   RESTORE_STACK;
366
685k
   return shift;
367
685k
}
_celt_autocorr
Line
Count
Source
286
685k
{
287
685k
   opus_val32 d;
288
685k
   int i, k;
289
685k
   int fastN=n-lag;
290
685k
   int shift;
291
685k
   const opus_val16 *xptr;
292
685k
   VARDECL(opus_val16, xx);
293
685k
   SAVE_STACK;
294
685k
   ALLOC(xx, n, opus_val16);
295
685k
   celt_assert(n>0);
296
685k
   celt_assert(overlap>=0);
297
685k
   if (overlap == 0)
298
611k
   {
299
611k
      xptr = x;
300
611k
   } else {
301
83.4M
      for (i=0;i<n;i++)
302
83.3M
         xx[i] = x[i];
303
9.84M
      for (i=0;i<overlap;i++)
304
9.76M
      {
305
9.76M
         opus_val16 w = COEF2VAL16(window[i]);
306
9.76M
         xx[i] = MULT16_16_Q15(x[i],w);
307
9.76M
         xx[n-i-1] = MULT16_16_Q15(x[n-i-1],w);
308
9.76M
      }
309
74.3k
      xptr = xx;
310
74.3k
   }
311
685k
   shift=0;
312
685k
#ifdef FIXED_POINT
313
685k
   {
314
685k
      opus_val32 ac0;
315
685k
      int ac0_shift = celt_ilog2(n + (n>>4));
316
685k
      ac0 = 1+(n<<7);
317
685k
      if (n&1) ac0 += SHR32(MULT16_16(xptr[0],xptr[0]),ac0_shift);
318
138M
      for(i=(n&1);i<n;i+=2)
319
138M
      {
320
138M
         ac0 += SHR32(MULT16_16(xptr[i],xptr[i]),ac0_shift);
321
138M
         ac0 += SHR32(MULT16_16(xptr[i+1],xptr[i+1]),ac0_shift);
322
138M
      }
323
      /* Consider the effect of rounding-to-nearest when scaling the signal. */
324
685k
      ac0 += SHR32(ac0,7);
325
326
685k
      shift = celt_ilog2(ac0)-30+ac0_shift+1;
327
685k
      shift = (shift)/2;
328
685k
      if (shift>0)
329
163k
      {
330
36.3M
         for(i=0;i<n;i++)
331
36.2M
            xx[i] = PSHR32(xptr[i], shift);
332
163k
         xptr = xx;
333
163k
      } else
334
521k
         shift = 0;
335
685k
   }
336
685k
#endif
337
685k
   celt_pitch_xcorr(xptr, xptr, ac, fastN, lag+1, arch);
338
9.09M
   for (k=0;k<=lag;k++)
339
8.41M
   {
340
66.8M
      for (i = k+fastN, d = 0; i < n; i++)
341
58.4M
         d = MAC16_16(d, xptr[i], xptr[i-k]);
342
8.41M
      ac[k] += d;
343
8.41M
   }
344
685k
#ifdef FIXED_POINT
345
685k
   shift = 2*shift;
346
685k
   if (shift<=0)
347
521k
      ac[0] += SHL32((opus_int32)1, -shift);
348
685k
   if (ac[0] < 268435456)
349
436k
   {
350
436k
      int shift2 = 29 - EC_ILOG(ac[0]);
351
5.75M
      for (i=0;i<=lag;i++)
352
5.32M
         ac[i] = SHL32(ac[i], shift2);
353
436k
      shift -= shift2;
354
436k
   } else if (ac[0] >= 536870912)
355
222k
   {
356
222k
      int shift2=1;
357
222k
      if (ac[0] >= 1073741824)
358
105k
         shift2++;
359
2.99M
      for (i=0;i<=lag;i++)
360
2.76M
         ac[i] = SHR32(ac[i], shift2);
361
222k
      shift += shift2;
362
222k
   }
363
685k
#endif
364
365
685k
   RESTORE_STACK;
366
685k
   return shift;
367
685k
}
_celt_autocorr
Line
Count
Source
286
130k
{
287
130k
   opus_val32 d;
288
130k
   int i, k;
289
130k
   int fastN=n-lag;
290
130k
   int shift;
291
130k
   const opus_val16 *xptr;
292
130k
   VARDECL(opus_val16, xx);
293
130k
   SAVE_STACK;
294
130k
   ALLOC(xx, n, opus_val16);
295
130k
   celt_assert(n>0);
296
130k
   celt_assert(overlap>=0);
297
130k
   if (overlap == 0)
298
84.9k
   {
299
84.9k
      xptr = x;
300
84.9k
   } else {
301
51.1M
      for (i=0;i<n;i++)
302
51.1M
         xx[i] = x[i];
303
6.03M
      for (i=0;i<overlap;i++)
304
5.99M
      {
305
5.99M
         opus_val16 w = COEF2VAL16(window[i]);
306
5.99M
         xx[i] = MULT16_16_Q15(x[i],w);
307
5.99M
         xx[n-i-1] = MULT16_16_Q15(x[n-i-1],w);
308
5.99M
      }
309
45.3k
      xptr = xx;
310
45.3k
   }
311
130k
   shift=0;
312
#ifdef FIXED_POINT
313
   {
314
      opus_val32 ac0;
315
      int ac0_shift = celt_ilog2(n + (n>>4));
316
      ac0 = 1+(n<<7);
317
      if (n&1) ac0 += SHR32(MULT16_16(xptr[0],xptr[0]),ac0_shift);
318
      for(i=(n&1);i<n;i+=2)
319
      {
320
         ac0 += SHR32(MULT16_16(xptr[i],xptr[i]),ac0_shift);
321
         ac0 += SHR32(MULT16_16(xptr[i+1],xptr[i+1]),ac0_shift);
322
      }
323
      /* Consider the effect of rounding-to-nearest when scaling the signal. */
324
      ac0 += SHR32(ac0,7);
325
326
      shift = celt_ilog2(ac0)-30+ac0_shift+1;
327
      shift = (shift)/2;
328
      if (shift>0)
329
      {
330
         for(i=0;i<n;i++)
331
            xx[i] = PSHR32(xptr[i], shift);
332
         xptr = xx;
333
      } else
334
         shift = 0;
335
   }
336
#endif
337
130k
   celt_pitch_xcorr(xptr, xptr, ac, fastN, lag+1, arch);
338
1.68M
   for (k=0;k<=lag;k++)
339
1.55M
   {
340
16.0M
      for (i = k+fastN, d = 0; i < n; i++)
341
14.4M
         d = MAC16_16(d, xptr[i], xptr[i-k]);
342
1.55M
      ac[k] += d;
343
1.55M
   }
344
#ifdef FIXED_POINT
345
   shift = 2*shift;
346
   if (shift<=0)
347
      ac[0] += SHL32((opus_int32)1, -shift);
348
   if (ac[0] < 268435456)
349
   {
350
      int shift2 = 29 - EC_ILOG(ac[0]);
351
      for (i=0;i<=lag;i++)
352
         ac[i] = SHL32(ac[i], shift2);
353
      shift -= shift2;
354
   } else if (ac[0] >= 536870912)
355
   {
356
      int shift2=1;
357
      if (ac[0] >= 1073741824)
358
         shift2++;
359
      for (i=0;i<=lag;i++)
360
         ac[i] = SHR32(ac[i], shift2);
361
      shift += shift2;
362
   }
363
#endif
364
365
130k
   RESTORE_STACK;
366
130k
   return shift;
367
130k
}