Coverage Report

Created: 2025-07-11 07:51

/src/opus/celt/celt_lpc.c
Line
Count
Source (jump to first uncovered line)
1
/* Copyright (c) 2009-2010 Xiph.Org Foundation
2
   Written by Jean-Marc Valin */
3
/*
4
   Redistribution and use in source and binary forms, with or without
5
   modification, are permitted provided that the following conditions
6
   are met:
7
8
   - Redistributions of source code must retain the above copyright
9
   notice, this list of conditions and the following disclaimer.
10
11
   - Redistributions in binary form must reproduce the above copyright
12
   notice, this list of conditions and the following disclaimer in the
13
   documentation and/or other materials provided with the distribution.
14
15
   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
16
   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
17
   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
18
   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
19
   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
20
   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
21
   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
22
   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
23
   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
24
   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25
   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26
*/
27
28
#ifdef HAVE_CONFIG_H
29
#include "config.h"
30
#endif
31
32
#include "celt_lpc.h"
33
#include "stack_alloc.h"
34
#include "mathops.h"
35
#include "pitch.h"
36
37
void _celt_lpc(
38
      opus_val16       *_lpc, /* out: [0...p-1] LPC coefficients      */
39
const opus_val32 *ac,  /* in:  [0...p] autocorrelation values  */
40
int          p
41
)
42
1.00M
{
43
1.00M
   int i, j;
44
1.00M
   opus_val32 r;
45
1.00M
   opus_val32 error = ac[0];
46
#ifdef FIXED_POINT
47
   opus_val32 lpc[CELT_LPC_ORDER];
48
#else
49
   float *lpc = _lpc;
50
#endif
51
52
1.00M
   OPUS_CLEAR(lpc, p);
53
#ifdef FIXED_POINT
54
193k
   if (ac[0] != 0)
55
#else
56
812k
   if (ac[0] > 1e-10f)
57
808k
#endif
58
1.00M
   {
59
6.34M
      for (i = 0; i < p; i++) {
60
         /* Sum up this iteration's reflection coefficient */
61
5.38M
         opus_val32 rr = 0;
62
30.5M
         for (j = 0; j < i; j++)
63
25.1M
            rr += MULT32_32_Q31(lpc[j],ac[i - j]);
64
5.38M
         rr += SHR32(ac[i + 1],6);
65
5.38M
         r = -frac_div32(SHL32(rr,6), error);
66
         /*  Update LPC coefficients and total error */
67
5.38M
         lpc[i] = SHR32(r,6);
68
19.3M
         for (j = 0; j < (i+1)>>1; j++)
69
13.9M
         {
70
13.9M
            opus_val32 tmp1, tmp2;
71
13.9M
            tmp1 = lpc[j];
72
13.9M
            tmp2 = lpc[i-1-j];
73
13.9M
            lpc[j]     = tmp1 + MULT32_32_Q31(r,tmp2);
74
13.9M
            lpc[i-1-j] = tmp2 + MULT32_32_Q31(r,tmp1);
75
13.9M
         }
76
77
5.38M
         error = error - MULT32_32_Q31(MULT32_32_Q31(r,r),error);
78
         /* Bail out once we get 30 dB gain */
79
#ifdef FIXED_POINT
80
1.74M
         if (error<=SHR32(ac[0],10))
81
23.8k
            break;
82
#else
83
3.63M
         if (error<=.001f*ac[0])
84
24.3k
            break;
85
#endif
86
5.38M
      }
87
1.00M
   }
88
#ifdef FIXED_POINT
89
   {
90
      /* Convert the int32 lpcs to int16 and ensure there are no wrap-arounds.
91
         This reuses the logic in silk_LPC_fit() and silk_bwexpander_32(). Any bug
92
         fixes should also be applied there. */
93
      int iter, idx = 0;
94
      opus_val32 maxabs, absval, chirp_Q16, chirp_minus_one_Q16;
95
96
193k
      for (iter = 0; iter < 10; iter++) {
97
193k
         maxabs = 0;
98
2.38M
         for (i = 0; i < p; i++) {
99
2.19M
            absval = ABS32(lpc[i]);
100
2.19M
            if (absval > maxabs) {
101
206k
               maxabs = absval;
102
206k
               idx = i;
103
206k
            }
104
2.19M
         }
105
193k
         maxabs = PSHR32(maxabs, 13);  /* Q25->Q12 */
106
107
193k
         if (maxabs > 32767) {
108
0
            maxabs = MIN32(maxabs, 163838);
109
0
            chirp_Q16 = QCONST32(0.999, 16) - DIV32(SHL32(maxabs - 32767, 14),
110
0
                                                    SHR32(MULT32_32_32(maxabs, idx + 1), 2));
111
0
            chirp_minus_one_Q16 = chirp_Q16 - 65536;
112
113
            /* Apply bandwidth expansion. */
114
0
            for (i = 0; i < p - 1; i++) {
115
0
               lpc[i] = MULT32_32_Q16(chirp_Q16, lpc[i]);
116
0
               chirp_Q16 += PSHR32(MULT32_32_32(chirp_Q16, chirp_minus_one_Q16), 16);
117
0
            }
118
0
            lpc[p - 1] = MULT32_32_Q16(chirp_Q16, lpc[p - 1]);
119
193k
         } else {
120
193k
            break;
121
193k
         }
122
193k
      }
123
124
193k
      if (iter == 10) {
125
         /* If the coeffs still do not fit into the 16 bit range after 10 iterations,
126
            fall back to the A(z)=1 filter. */
127
0
         OPUS_CLEAR(lpc, p);
128
0
         _lpc[0] = 4096;  /* Q12 */
129
193k
      } else {
130
2.38M
         for (i = 0; i < p; i++) {
131
2.19M
            _lpc[i] = EXTRACT16(PSHR32(lpc[i], 13));  /* Q25->Q12 */
132
2.19M
         }
133
193k
      }
134
   }
135
#endif
136
1.00M
}
_celt_lpc
Line
Count
Source
42
193k
{
43
193k
   int i, j;
44
193k
   opus_val32 r;
45
193k
   opus_val32 error = ac[0];
46
193k
#ifdef FIXED_POINT
47
193k
   opus_val32 lpc[CELT_LPC_ORDER];
48
#else
49
   float *lpc = _lpc;
50
#endif
51
52
193k
   OPUS_CLEAR(lpc, p);
53
193k
#ifdef FIXED_POINT
54
193k
   if (ac[0] != 0)
55
#else
56
   if (ac[0] > 1e-10f)
57
#endif
58
193k
   {
59
1.91M
      for (i = 0; i < p; i++) {
60
         /* Sum up this iteration's reflection coefficient */
61
1.74M
         opus_val32 rr = 0;
62
16.3M
         for (j = 0; j < i; j++)
63
14.5M
            rr += MULT32_32_Q31(lpc[j],ac[i - j]);
64
1.74M
         rr += SHR32(ac[i + 1],6);
65
1.74M
         r = -frac_div32(SHL32(rr,6), error);
66
         /*  Update LPC coefficients and total error */
67
1.74M
         lpc[i] = SHR32(r,6);
68
9.48M
         for (j = 0; j < (i+1)>>1; j++)
69
7.73M
         {
70
7.73M
            opus_val32 tmp1, tmp2;
71
7.73M
            tmp1 = lpc[j];
72
7.73M
            tmp2 = lpc[i-1-j];
73
7.73M
            lpc[j]     = tmp1 + MULT32_32_Q31(r,tmp2);
74
7.73M
            lpc[i-1-j] = tmp2 + MULT32_32_Q31(r,tmp1);
75
7.73M
         }
76
77
1.74M
         error = error - MULT32_32_Q31(MULT32_32_Q31(r,r),error);
78
         /* Bail out once we get 30 dB gain */
79
1.74M
#ifdef FIXED_POINT
80
1.74M
         if (error<=SHR32(ac[0],10))
81
23.8k
            break;
82
#else
83
         if (error<=.001f*ac[0])
84
            break;
85
#endif
86
1.74M
      }
87
193k
   }
88
193k
#ifdef FIXED_POINT
89
193k
   {
90
      /* Convert the int32 lpcs to int16 and ensure there are no wrap-arounds.
91
         This reuses the logic in silk_LPC_fit() and silk_bwexpander_32(). Any bug
92
         fixes should also be applied there. */
93
193k
      int iter, idx = 0;
94
193k
      opus_val32 maxabs, absval, chirp_Q16, chirp_minus_one_Q16;
95
96
193k
      for (iter = 0; iter < 10; iter++) {
97
193k
         maxabs = 0;
98
2.38M
         for (i = 0; i < p; i++) {
99
2.19M
            absval = ABS32(lpc[i]);
100
2.19M
            if (absval > maxabs) {
101
206k
               maxabs = absval;
102
206k
               idx = i;
103
206k
            }
104
2.19M
         }
105
193k
         maxabs = PSHR32(maxabs, 13);  /* Q25->Q12 */
106
107
193k
         if (maxabs > 32767) {
108
0
            maxabs = MIN32(maxabs, 163838);
109
0
            chirp_Q16 = QCONST32(0.999, 16) - DIV32(SHL32(maxabs - 32767, 14),
110
0
                                                    SHR32(MULT32_32_32(maxabs, idx + 1), 2));
111
0
            chirp_minus_one_Q16 = chirp_Q16 - 65536;
112
113
            /* Apply bandwidth expansion. */
114
0
            for (i = 0; i < p - 1; i++) {
115
0
               lpc[i] = MULT32_32_Q16(chirp_Q16, lpc[i]);
116
0
               chirp_Q16 += PSHR32(MULT32_32_32(chirp_Q16, chirp_minus_one_Q16), 16);
117
0
            }
118
0
            lpc[p - 1] = MULT32_32_Q16(chirp_Q16, lpc[p - 1]);
119
193k
         } else {
120
193k
            break;
121
193k
         }
122
193k
      }
123
124
193k
      if (iter == 10) {
125
         /* If the coeffs still do not fit into the 16 bit range after 10 iterations,
126
            fall back to the A(z)=1 filter. */
127
0
         OPUS_CLEAR(lpc, p);
128
0
         _lpc[0] = 4096;  /* Q12 */
129
193k
      } else {
130
2.38M
         for (i = 0; i < p; i++) {
131
2.19M
            _lpc[i] = EXTRACT16(PSHR32(lpc[i], 13));  /* Q25->Q12 */
132
2.19M
         }
133
193k
      }
134
193k
   }
135
193k
#endif
136
193k
}
_celt_lpc
Line
Count
Source
42
812k
{
43
812k
   int i, j;
44
812k
   opus_val32 r;
45
812k
   opus_val32 error = ac[0];
46
#ifdef FIXED_POINT
47
   opus_val32 lpc[CELT_LPC_ORDER];
48
#else
49
812k
   float *lpc = _lpc;
50
812k
#endif
51
52
812k
   OPUS_CLEAR(lpc, p);
53
#ifdef FIXED_POINT
54
   if (ac[0] != 0)
55
#else
56
812k
   if (ac[0] > 1e-10f)
57
808k
#endif
58
808k
   {
59
4.42M
      for (i = 0; i < p; i++) {
60
         /* Sum up this iteration's reflection coefficient */
61
3.63M
         opus_val32 rr = 0;
62
14.1M
         for (j = 0; j < i; j++)
63
10.5M
            rr += MULT32_32_Q31(lpc[j],ac[i - j]);
64
3.63M
         rr += SHR32(ac[i + 1],6);
65
3.63M
         r = -frac_div32(SHL32(rr,6), error);
66
         /*  Update LPC coefficients and total error */
67
3.63M
         lpc[i] = SHR32(r,6);
68
9.82M
         for (j = 0; j < (i+1)>>1; j++)
69
6.18M
         {
70
6.18M
            opus_val32 tmp1, tmp2;
71
6.18M
            tmp1 = lpc[j];
72
6.18M
            tmp2 = lpc[i-1-j];
73
6.18M
            lpc[j]     = tmp1 + MULT32_32_Q31(r,tmp2);
74
6.18M
            lpc[i-1-j] = tmp2 + MULT32_32_Q31(r,tmp1);
75
6.18M
         }
76
77
3.63M
         error = error - MULT32_32_Q31(MULT32_32_Q31(r,r),error);
78
         /* Bail out once we get 30 dB gain */
79
#ifdef FIXED_POINT
80
         if (error<=SHR32(ac[0],10))
81
            break;
82
#else
83
3.63M
         if (error<=.001f*ac[0])
84
24.3k
            break;
85
3.63M
#endif
86
3.63M
      }
87
808k
   }
88
#ifdef FIXED_POINT
89
   {
90
      /* Convert the int32 lpcs to int16 and ensure there are no wrap-arounds.
91
         This reuses the logic in silk_LPC_fit() and silk_bwexpander_32(). Any bug
92
         fixes should also be applied there. */
93
      int iter, idx = 0;
94
      opus_val32 maxabs, absval, chirp_Q16, chirp_minus_one_Q16;
95
96
      for (iter = 0; iter < 10; iter++) {
97
         maxabs = 0;
98
         for (i = 0; i < p; i++) {
99
            absval = ABS32(lpc[i]);
100
            if (absval > maxabs) {
101
               maxabs = absval;
102
               idx = i;
103
            }
104
         }
105
         maxabs = PSHR32(maxabs, 13);  /* Q25->Q12 */
106
107
         if (maxabs > 32767) {
108
            maxabs = MIN32(maxabs, 163838);
109
            chirp_Q16 = QCONST32(0.999, 16) - DIV32(SHL32(maxabs - 32767, 14),
110
                                                    SHR32(MULT32_32_32(maxabs, idx + 1), 2));
111
            chirp_minus_one_Q16 = chirp_Q16 - 65536;
112
113
            /* Apply bandwidth expansion. */
114
            for (i = 0; i < p - 1; i++) {
115
               lpc[i] = MULT32_32_Q16(chirp_Q16, lpc[i]);
116
               chirp_Q16 += PSHR32(MULT32_32_32(chirp_Q16, chirp_minus_one_Q16), 16);
117
            }
118
            lpc[p - 1] = MULT32_32_Q16(chirp_Q16, lpc[p - 1]);
119
         } else {
120
            break;
121
         }
122
      }
123
124
      if (iter == 10) {
125
         /* If the coeffs still do not fit into the 16 bit range after 10 iterations,
126
            fall back to the A(z)=1 filter. */
127
         OPUS_CLEAR(lpc, p);
128
         _lpc[0] = 4096;  /* Q12 */
129
      } else {
130
         for (i = 0; i < p; i++) {
131
            _lpc[i] = EXTRACT16(PSHR32(lpc[i], 13));  /* Q25->Q12 */
132
         }
133
      }
134
   }
135
#endif
136
812k
}
137
138
139
void celt_fir_c(
140
         const opus_val16 *x,
141
         const opus_val16 *num,
142
         opus_val16 *y,
143
         int N,
144
         int ord,
145
         int arch)
146
77.9k
{
147
77.9k
   int i,j;
148
77.9k
   VARDECL(opus_val16, rnum);
149
77.9k
   SAVE_STACK;
150
77.9k
   celt_assert(x != y);
151
77.9k
   ALLOC(rnum, ord, opus_val16);
152
1.94M
   for(i=0;i<ord;i++)
153
1.87M
      rnum[i] = num[ord-i-1];
154
10.6M
   for (i=0;i<N-3;i+=4)
155
10.5M
   {
156
10.5M
      opus_val32 sum[4];
157
10.5M
      sum[0] = SHL32(EXTEND32(x[i  ]), SIG_SHIFT);
158
10.5M
      sum[1] = SHL32(EXTEND32(x[i+1]), SIG_SHIFT);
159
10.5M
      sum[2] = SHL32(EXTEND32(x[i+2]), SIG_SHIFT);
160
10.5M
      sum[3] = SHL32(EXTEND32(x[i+3]), SIG_SHIFT);
161
#if defined(OPUS_CHECK_ASM) && defined(FIXED_POINT)
162
      {
163
         opus_val32 sum_c[4];
164
         memcpy(sum_c, sum, sizeof(sum_c));
165
         xcorr_kernel_c(rnum, x+i-ord, sum_c, ord);
166
#endif
167
10.5M
         xcorr_kernel(rnum, x+i-ord, sum, ord, arch);
168
#if defined(OPUS_CHECK_ASM) && defined(FIXED_POINT)
169
0
         celt_assert(memcmp(sum, sum_c, sizeof(sum)) == 0);
170
0
      }
171
0
#endif
172
10.5M
      y[i  ] = SROUND16(sum[0], SIG_SHIFT);
173
10.5M
      y[i+1] = SROUND16(sum[1], SIG_SHIFT);
174
10.5M
      y[i+2] = SROUND16(sum[2], SIG_SHIFT);
175
10.5M
      y[i+3] = SROUND16(sum[3], SIG_SHIFT);
176
0
   }
177
117k
   for (;i<N;i++)
178
39.6k
   {
179
39.6k
      opus_val32 sum = SHL32(EXTEND32(x[i]), SIG_SHIFT);
180
990k
      for (j=0;j<ord;j++)
181
950k
         sum = MAC16_16(sum,rnum[j],x[i+j-ord]);
182
39.6k
      y[i] = SROUND16(sum, SIG_SHIFT);
183
39.6k
   }
184
0
   RESTORE_STACK;
185
0
}
Unexecuted instantiation: celt_fir_c
celt_fir_c
Line
Count
Source
146
77.9k
{
147
77.9k
   int i,j;
148
77.9k
   VARDECL(opus_val16, rnum);
149
77.9k
   SAVE_STACK;
150
77.9k
   celt_assert(x != y);
151
77.9k
   ALLOC(rnum, ord, opus_val16);
152
1.94M
   for(i=0;i<ord;i++)
153
1.87M
      rnum[i] = num[ord-i-1];
154
10.6M
   for (i=0;i<N-3;i+=4)
155
10.5M
   {
156
10.5M
      opus_val32 sum[4];
157
10.5M
      sum[0] = SHL32(EXTEND32(x[i  ]), SIG_SHIFT);
158
10.5M
      sum[1] = SHL32(EXTEND32(x[i+1]), SIG_SHIFT);
159
10.5M
      sum[2] = SHL32(EXTEND32(x[i+2]), SIG_SHIFT);
160
10.5M
      sum[3] = SHL32(EXTEND32(x[i+3]), SIG_SHIFT);
161
#if defined(OPUS_CHECK_ASM) && defined(FIXED_POINT)
162
      {
163
         opus_val32 sum_c[4];
164
         memcpy(sum_c, sum, sizeof(sum_c));
165
         xcorr_kernel_c(rnum, x+i-ord, sum_c, ord);
166
#endif
167
10.5M
         xcorr_kernel(rnum, x+i-ord, sum, ord, arch);
168
#if defined(OPUS_CHECK_ASM) && defined(FIXED_POINT)
169
         celt_assert(memcmp(sum, sum_c, sizeof(sum)) == 0);
170
      }
171
#endif
172
10.5M
      y[i  ] = SROUND16(sum[0], SIG_SHIFT);
173
10.5M
      y[i+1] = SROUND16(sum[1], SIG_SHIFT);
174
10.5M
      y[i+2] = SROUND16(sum[2], SIG_SHIFT);
175
10.5M
      y[i+3] = SROUND16(sum[3], SIG_SHIFT);
176
10.5M
   }
177
117k
   for (;i<N;i++)
178
39.6k
   {
179
39.6k
      opus_val32 sum = SHL32(EXTEND32(x[i]), SIG_SHIFT);
180
990k
      for (j=0;j<ord;j++)
181
950k
         sum = MAC16_16(sum,rnum[j],x[i+j-ord]);
182
39.6k
      y[i] = SROUND16(sum, SIG_SHIFT);
183
39.6k
   }
184
77.9k
   RESTORE_STACK;
185
77.9k
}
186
187
void celt_iir(const opus_val32 *_x,
188
         const opus_val16 *den,
189
         opus_val32 *_y,
190
         int N,
191
         int ord,
192
         opus_val16 *mem,
193
         int arch)
194
211k
{
195
#ifdef SMALL_FOOTPRINT
196
   int i,j;
197
   (void)arch;
198
   for (i=0;i<N;i++)
199
   {
200
      opus_val32 sum = _x[i];
201
      for (j=0;j<ord;j++)
202
      {
203
         sum -= MULT16_16(den[j],mem[j]);
204
      }
205
      for (j=ord-1;j>=1;j--)
206
      {
207
         mem[j]=mem[j-1];
208
      }
209
      mem[0] = SROUND16(sum, SIG_SHIFT);
210
      _y[i] = sum;
211
   }
212
#else
213
211k
   int i,j;
214
211k
   VARDECL(opus_val16, rden);
215
211k
   VARDECL(opus_val16, y);
216
211k
   SAVE_STACK;
217
218
211k
   celt_assert((ord&3)==0);
219
211k
   ALLOC(rden, ord, opus_val16);
220
211k
   ALLOC(y, N+ord, opus_val16);
221
5.28M
   for(i=0;i<ord;i++)
222
5.06M
      rden[i] = den[ord-i-1];
223
5.28M
   for(i=0;i<ord;i++)
224
5.06M
      y[i] = -mem[ord-i-1];
225
78.5M
   for(;i<N+ord;i++)
226
78.3M
      y[i]=0;
227
19.8M
   for (i=0;i<N-3;i+=4)
228
19.5M
   {
229
      /* Unroll by 4 as if it were an FIR filter */
230
19.5M
      opus_val32 sum[4];
231
19.5M
      sum[0]=_x[i];
232
19.5M
      sum[1]=_x[i+1];
233
19.5M
      sum[2]=_x[i+2];
234
19.5M
      sum[3]=_x[i+3];
235
#if defined(OPUS_CHECK_ASM) && defined(FIXED_POINT)
236
      {
237
         opus_val32 sum_c[4];
238
         memcpy(sum_c, sum, sizeof(sum_c));
239
         xcorr_kernel_c(rden, y+i, sum_c, ord);
240
#endif
241
19.5M
         xcorr_kernel(rden, y+i, sum, ord, arch);
242
#if defined(OPUS_CHECK_ASM) && defined(FIXED_POINT)
243
11.6M
         celt_assert(memcmp(sum, sum_c, sizeof(sum)) == 0);
244
11.6M
      }
245
0
#endif
246
      /* Patch up the result to compensate for the fact that this is an IIR */
247
19.5M
      y[i+ord  ] = -SROUND16(sum[0],SIG_SHIFT);
248
11.6M
      _y[i  ] = sum[0];
249
19.5M
      sum[1] = MAC16_16(sum[1], y[i+ord  ], den[0]);
250
19.5M
      y[i+ord+1] = -SROUND16(sum[1],SIG_SHIFT);
251
11.6M
      _y[i+1] = sum[1];
252
19.5M
      sum[2] = MAC16_16(sum[2], y[i+ord+1], den[0]);
253
19.5M
      sum[2] = MAC16_16(sum[2], y[i+ord  ], den[1]);
254
19.5M
      y[i+ord+2] = -SROUND16(sum[2],SIG_SHIFT);
255
11.6M
      _y[i+2] = sum[2];
256
257
19.5M
      sum[3] = MAC16_16(sum[3], y[i+ord+2], den[0]);
258
19.5M
      sum[3] = MAC16_16(sum[3], y[i+ord+1], den[1]);
259
19.5M
      sum[3] = MAC16_16(sum[3], y[i+ord  ], den[2]);
260
19.5M
      y[i+ord+3] = -SROUND16(sum[3],SIG_SHIFT);
261
11.6M
      _y[i+3] = sum[3];
262
11.6M
   }
263
211k
   for (;i<N;i++)
264
0
   {
265
0
      opus_val32 sum = _x[i];
266
0
      for (j=0;j<ord;j++)
267
0
         sum -= MULT16_16(rden[j],y[i+j]);
268
0
      y[i+ord] = SROUND16(sum,SIG_SHIFT);
269
0
      _y[i] = sum;
270
0
   }
271
5.28M
   for(i=0;i<ord;i++)
272
5.06M
      mem[i] = _y[N-i-1];
273
133k
   RESTORE_STACK;
274
133k
#endif
275
133k
}
celt_iir
Line
Count
Source
194
133k
{
195
#ifdef SMALL_FOOTPRINT
196
   int i,j;
197
   (void)arch;
198
   for (i=0;i<N;i++)
199
   {
200
      opus_val32 sum = _x[i];
201
      for (j=0;j<ord;j++)
202
      {
203
         sum -= MULT16_16(den[j],mem[j]);
204
      }
205
      for (j=ord-1;j>=1;j--)
206
      {
207
         mem[j]=mem[j-1];
208
      }
209
      mem[0] = SROUND16(sum, SIG_SHIFT);
210
      _y[i] = sum;
211
   }
212
#else
213
133k
   int i,j;
214
133k
   VARDECL(opus_val16, rden);
215
133k
   VARDECL(opus_val16, y);
216
133k
   SAVE_STACK;
217
218
133k
   celt_assert((ord&3)==0);
219
133k
   ALLOC(rden, ord, opus_val16);
220
133k
   ALLOC(y, N+ord, opus_val16);
221
3.33M
   for(i=0;i<ord;i++)
222
3.19M
      rden[i] = den[ord-i-1];
223
3.33M
   for(i=0;i<ord;i++)
224
3.19M
      y[i] = -mem[ord-i-1];
225
46.8M
   for(;i<N+ord;i++)
226
46.7M
      y[i]=0;
227
11.8M
   for (i=0;i<N-3;i+=4)
228
11.6M
   {
229
      /* Unroll by 4 as if it were an FIR filter */
230
11.6M
      opus_val32 sum[4];
231
11.6M
      sum[0]=_x[i];
232
11.6M
      sum[1]=_x[i+1];
233
11.6M
      sum[2]=_x[i+2];
234
11.6M
      sum[3]=_x[i+3];
235
11.6M
#if defined(OPUS_CHECK_ASM) && defined(FIXED_POINT)
236
11.6M
      {
237
11.6M
         opus_val32 sum_c[4];
238
11.6M
         memcpy(sum_c, sum, sizeof(sum_c));
239
11.6M
         xcorr_kernel_c(rden, y+i, sum_c, ord);
240
11.6M
#endif
241
11.6M
         xcorr_kernel(rden, y+i, sum, ord, arch);
242
11.6M
#if defined(OPUS_CHECK_ASM) && defined(FIXED_POINT)
243
11.6M
         celt_assert(memcmp(sum, sum_c, sizeof(sum)) == 0);
244
11.6M
      }
245
0
#endif
246
      /* Patch up the result to compensate for the fact that this is an IIR */
247
11.6M
      y[i+ord  ] = -SROUND16(sum[0],SIG_SHIFT);
248
11.6M
      _y[i  ] = sum[0];
249
11.6M
      sum[1] = MAC16_16(sum[1], y[i+ord  ], den[0]);
250
11.6M
      y[i+ord+1] = -SROUND16(sum[1],SIG_SHIFT);
251
11.6M
      _y[i+1] = sum[1];
252
11.6M
      sum[2] = MAC16_16(sum[2], y[i+ord+1], den[0]);
253
11.6M
      sum[2] = MAC16_16(sum[2], y[i+ord  ], den[1]);
254
11.6M
      y[i+ord+2] = -SROUND16(sum[2],SIG_SHIFT);
255
11.6M
      _y[i+2] = sum[2];
256
257
11.6M
      sum[3] = MAC16_16(sum[3], y[i+ord+2], den[0]);
258
11.6M
      sum[3] = MAC16_16(sum[3], y[i+ord+1], den[1]);
259
11.6M
      sum[3] = MAC16_16(sum[3], y[i+ord  ], den[2]);
260
11.6M
      y[i+ord+3] = -SROUND16(sum[3],SIG_SHIFT);
261
11.6M
      _y[i+3] = sum[3];
262
11.6M
   }
263
133k
   for (;i<N;i++)
264
0
   {
265
0
      opus_val32 sum = _x[i];
266
0
      for (j=0;j<ord;j++)
267
0
         sum -= MULT16_16(rden[j],y[i+j]);
268
0
      y[i+ord] = SROUND16(sum,SIG_SHIFT);
269
0
      _y[i] = sum;
270
0
   }
271
3.33M
   for(i=0;i<ord;i++)
272
3.19M
      mem[i] = _y[N-i-1];
273
133k
   RESTORE_STACK;
274
133k
#endif
275
133k
}
celt_iir
Line
Count
Source
194
77.9k
{
195
#ifdef SMALL_FOOTPRINT
196
   int i,j;
197
   (void)arch;
198
   for (i=0;i<N;i++)
199
   {
200
      opus_val32 sum = _x[i];
201
      for (j=0;j<ord;j++)
202
      {
203
         sum -= MULT16_16(den[j],mem[j]);
204
      }
205
      for (j=ord-1;j>=1;j--)
206
      {
207
         mem[j]=mem[j-1];
208
      }
209
      mem[0] = SROUND16(sum, SIG_SHIFT);
210
      _y[i] = sum;
211
   }
212
#else
213
77.9k
   int i,j;
214
77.9k
   VARDECL(opus_val16, rden);
215
77.9k
   VARDECL(opus_val16, y);
216
77.9k
   SAVE_STACK;
217
218
77.9k
   celt_assert((ord&3)==0);
219
77.9k
   ALLOC(rden, ord, opus_val16);
220
77.9k
   ALLOC(y, N+ord, opus_val16);
221
1.94M
   for(i=0;i<ord;i++)
222
1.87M
      rden[i] = den[ord-i-1];
223
1.94M
   for(i=0;i<ord;i++)
224
1.87M
      y[i] = -mem[ord-i-1];
225
31.7M
   for(;i<N+ord;i++)
226
31.6M
      y[i]=0;
227
7.99M
   for (i=0;i<N-3;i+=4)
228
7.91M
   {
229
      /* Unroll by 4 as if it were an FIR filter */
230
7.91M
      opus_val32 sum[4];
231
7.91M
      sum[0]=_x[i];
232
7.91M
      sum[1]=_x[i+1];
233
7.91M
      sum[2]=_x[i+2];
234
7.91M
      sum[3]=_x[i+3];
235
#if defined(OPUS_CHECK_ASM) && defined(FIXED_POINT)
236
      {
237
         opus_val32 sum_c[4];
238
         memcpy(sum_c, sum, sizeof(sum_c));
239
         xcorr_kernel_c(rden, y+i, sum_c, ord);
240
#endif
241
7.91M
         xcorr_kernel(rden, y+i, sum, ord, arch);
242
#if defined(OPUS_CHECK_ASM) && defined(FIXED_POINT)
243
         celt_assert(memcmp(sum, sum_c, sizeof(sum)) == 0);
244
      }
245
#endif
246
      /* Patch up the result to compensate for the fact that this is an IIR */
247
7.91M
      y[i+ord  ] = -SROUND16(sum[0],SIG_SHIFT);
248
7.91M
      _y[i  ] = sum[0];
249
7.91M
      sum[1] = MAC16_16(sum[1], y[i+ord  ], den[0]);
250
7.91M
      y[i+ord+1] = -SROUND16(sum[1],SIG_SHIFT);
251
7.91M
      _y[i+1] = sum[1];
252
7.91M
      sum[2] = MAC16_16(sum[2], y[i+ord+1], den[0]);
253
7.91M
      sum[2] = MAC16_16(sum[2], y[i+ord  ], den[1]);
254
7.91M
      y[i+ord+2] = -SROUND16(sum[2],SIG_SHIFT);
255
7.91M
      _y[i+2] = sum[2];
256
257
7.91M
      sum[3] = MAC16_16(sum[3], y[i+ord+2], den[0]);
258
7.91M
      sum[3] = MAC16_16(sum[3], y[i+ord+1], den[1]);
259
7.91M
      sum[3] = MAC16_16(sum[3], y[i+ord  ], den[2]);
260
7.91M
      y[i+ord+3] = -SROUND16(sum[3],SIG_SHIFT);
261
7.91M
      _y[i+3] = sum[3];
262
7.91M
   }
263
77.9k
   for (;i<N;i++)
264
0
   {
265
0
      opus_val32 sum = _x[i];
266
0
      for (j=0;j<ord;j++)
267
0
         sum -= MULT16_16(rden[j],y[i+j]);
268
0
      y[i+ord] = SROUND16(sum,SIG_SHIFT);
269
0
      _y[i] = sum;
270
0
   }
271
1.94M
   for(i=0;i<ord;i++)
272
1.87M
      mem[i] = _y[N-i-1];
273
77.9k
   RESTORE_STACK;
274
77.9k
#endif
275
77.9k
}
276
277
int _celt_autocorr(
278
                   const opus_val16 *x,   /*  in: [0...n-1] samples x   */
279
                   opus_val32       *ac,  /* out: [0...lag-1] ac values */
280
                   const celt_coef  *window,
281
                   int          overlap,
282
                   int          lag,
283
                   int          n,
284
                   int          arch
285
                  )
286
2.17M
{
287
2.17M
   opus_val32 d;
288
2.17M
   int i, k;
289
2.17M
   int fastN=n-lag;
290
2.17M
   int shift;
291
2.17M
   const opus_val16 *xptr;
292
2.17M
   VARDECL(opus_val16, xx);
293
2.17M
   SAVE_STACK;
294
2.17M
   ALLOC(xx, n, opus_val16);
295
2.17M
   celt_assert(n>0);
296
2.17M
   celt_assert(overlap>=0);
297
2.17M
   if (overlap == 0)
298
1.98M
   {
299
1.98M
      xptr = x;
300
1.98M
   } else {
301
209M
      for (i=0;i<n;i++)
302
208M
         xx[i] = x[i];
303
24.6M
      for (i=0;i<overlap;i++)
304
24.4M
      {
305
24.4M
         opus_val16 w = COEF2VAL16(window[i]);
306
24.4M
         xx[i] = MULT16_16_Q15(x[i],w);
307
24.4M
         xx[n-i-1] = MULT16_16_Q15(x[n-i-1],w);
308
24.4M
      }
309
187k
      xptr = xx;
310
187k
   }
311
2.17M
   shift=0;
312
#ifdef FIXED_POINT
313
   {
314
      opus_val32 ac0;
315
      int ac0_shift = celt_ilog2(n + (n>>4));
316
      ac0 = 1+(n<<7);
317
1.36M
      if (n&1) ac0 += SHR32(MULT16_16(xptr[0],xptr[0]),ac0_shift);
318
268M
      for(i=(n&1);i<n;i+=2)
319
267M
      {
320
267M
         ac0 += SHR32(MULT16_16(xptr[i],xptr[i]),ac0_shift);
321
267M
         ac0 += SHR32(MULT16_16(xptr[i+1],xptr[i+1]),ac0_shift);
322
267M
      }
323
      /* Consider the effect of rounding-to-nearest when scaling the signal. */
324
1.36M
      ac0 += SHR32(ac0,7);
325
326
      shift = celt_ilog2(ac0)-30+ac0_shift+1;
327
      shift = (shift)/2;
328
1.36M
      if (shift>0)
329
333k
      {
330
73.9M
         for(i=0;i<n;i++)
331
73.6M
            xx[i] = PSHR32(xptr[i], shift);
332
333k
         xptr = xx;
333
333k
      } else
334
1.03M
         shift = 0;
335
   }
336
#endif
337
2.17M
   celt_pitch_xcorr(xptr, xptr, ac, fastN, lag+1, arch);
338
23.8M
   for (k=0;k<=lag;k++)
339
21.6M
   {
340
158M
      for (i = k+fastN, d = 0; i < n; i++)
341
136M
         d = MAC16_16(d, xptr[i], xptr[i-k]);
342
21.6M
      ac[k] += d;
343
21.6M
   }
344
#ifdef FIXED_POINT
345
   shift = 2*shift;
346
1.36M
   if (shift<=0)
347
1.03M
      ac[0] += SHL32((opus_int32)1, -shift);
348
1.36M
   if (ac[0] < 268435456)
349
858k
   {
350
858k
      int shift2 = 29 - EC_ILOG(ac[0]);
351
11.3M
      for (i=0;i<=lag;i++)
352
10.4M
         ac[i] = SHL32(ac[i], shift2);
353
858k
      shift -= shift2;
354
858k
   } else if (ac[0] >= 536870912)
355
451k
   {
356
451k
      int shift2=1;
357
451k
      if (ac[0] >= 1073741824)
358
213k
         shift2++;
359
6.07M
      for (i=0;i<=lag;i++)
360
5.62M
         ac[i] = SHR32(ac[i], shift2);
361
451k
      shift += shift2;
362
451k
   }
363
#endif
364
365
2.17M
   RESTORE_STACK;
366
2.17M
   return shift;
367
2.17M
}
_celt_autocorr
Line
Count
Source
286
682k
{
287
682k
   opus_val32 d;
288
682k
   int i, k;
289
682k
   int fastN=n-lag;
290
682k
   int shift;
291
682k
   const opus_val16 *xptr;
292
682k
   VARDECL(opus_val16, xx);
293
682k
   SAVE_STACK;
294
682k
   ALLOC(xx, n, opus_val16);
295
682k
   celt_assert(n>0);
296
682k
   celt_assert(overlap>=0);
297
682k
   if (overlap == 0)
298
611k
   {
299
611k
      xptr = x;
300
611k
   } else {
301
79.0M
      for (i=0;i<n;i++)
302
78.9M
         xx[i] = x[i];
303
9.32M
      for (i=0;i<overlap;i++)
304
9.25M
      {
305
9.25M
         opus_val16 w = COEF2VAL16(window[i]);
306
9.25M
         xx[i] = MULT16_16_Q15(x[i],w);
307
9.25M
         xx[n-i-1] = MULT16_16_Q15(x[n-i-1],w);
308
9.25M
      }
309
71.0k
      xptr = xx;
310
71.0k
   }
311
682k
   shift=0;
312
682k
#ifdef FIXED_POINT
313
682k
   {
314
682k
      opus_val32 ac0;
315
682k
      int ac0_shift = celt_ilog2(n + (n>>4));
316
682k
      ac0 = 1+(n<<7);
317
682k
      if (n&1) ac0 += SHR32(MULT16_16(xptr[0],xptr[0]),ac0_shift);
318
134M
      for(i=(n&1);i<n;i+=2)
319
133M
      {
320
133M
         ac0 += SHR32(MULT16_16(xptr[i],xptr[i]),ac0_shift);
321
133M
         ac0 += SHR32(MULT16_16(xptr[i+1],xptr[i+1]),ac0_shift);
322
133M
      }
323
      /* Consider the effect of rounding-to-nearest when scaling the signal. */
324
682k
      ac0 += SHR32(ac0,7);
325
326
682k
      shift = celt_ilog2(ac0)-30+ac0_shift+1;
327
682k
      shift = (shift)/2;
328
682k
      if (shift>0)
329
166k
      {
330
36.9M
         for(i=0;i<n;i++)
331
36.8M
            xx[i] = PSHR32(xptr[i], shift);
332
166k
         xptr = xx;
333
166k
      } else
334
515k
         shift = 0;
335
682k
   }
336
682k
#endif
337
682k
   celt_pitch_xcorr(xptr, xptr, ac, fastN, lag+1, arch);
338
9.04M
   for (k=0;k<=lag;k++)
339
8.36M
   {
340
66.0M
      for (i = k+fastN, d = 0; i < n; i++)
341
57.6M
         d = MAC16_16(d, xptr[i], xptr[i-k]);
342
8.36M
      ac[k] += d;
343
8.36M
   }
344
682k
#ifdef FIXED_POINT
345
682k
   shift = 2*shift;
346
682k
   if (shift<=0)
347
515k
      ac[0] += SHL32((opus_int32)1, -shift);
348
682k
   if (ac[0] < 268435456)
349
429k
   {
350
429k
      int shift2 = 29 - EC_ILOG(ac[0]);
351
5.65M
      for (i=0;i<=lag;i++)
352
5.22M
         ac[i] = SHL32(ac[i], shift2);
353
429k
      shift -= shift2;
354
429k
   } else if (ac[0] >= 536870912)
355
225k
   {
356
225k
      int shift2=1;
357
225k
      if (ac[0] >= 1073741824)
358
106k
         shift2++;
359
3.03M
      for (i=0;i<=lag;i++)
360
2.81M
         ac[i] = SHR32(ac[i], shift2);
361
225k
      shift += shift2;
362
225k
   }
363
682k
#endif
364
365
682k
   RESTORE_STACK;
366
682k
   return shift;
367
682k
}
_celt_autocorr
Line
Count
Source
286
682k
{
287
682k
   opus_val32 d;
288
682k
   int i, k;
289
682k
   int fastN=n-lag;
290
682k
   int shift;
291
682k
   const opus_val16 *xptr;
292
682k
   VARDECL(opus_val16, xx);
293
682k
   SAVE_STACK;
294
682k
   ALLOC(xx, n, opus_val16);
295
682k
   celt_assert(n>0);
296
682k
   celt_assert(overlap>=0);
297
682k
   if (overlap == 0)
298
611k
   {
299
611k
      xptr = x;
300
611k
   } else {
301
79.0M
      for (i=0;i<n;i++)
302
78.9M
         xx[i] = x[i];
303
9.32M
      for (i=0;i<overlap;i++)
304
9.25M
      {
305
9.25M
         opus_val16 w = COEF2VAL16(window[i]);
306
9.25M
         xx[i] = MULT16_16_Q15(x[i],w);
307
9.25M
         xx[n-i-1] = MULT16_16_Q15(x[n-i-1],w);
308
9.25M
      }
309
71.0k
      xptr = xx;
310
71.0k
   }
311
682k
   shift=0;
312
682k
#ifdef FIXED_POINT
313
682k
   {
314
682k
      opus_val32 ac0;
315
682k
      int ac0_shift = celt_ilog2(n + (n>>4));
316
682k
      ac0 = 1+(n<<7);
317
682k
      if (n&1) ac0 += SHR32(MULT16_16(xptr[0],xptr[0]),ac0_shift);
318
134M
      for(i=(n&1);i<n;i+=2)
319
133M
      {
320
133M
         ac0 += SHR32(MULT16_16(xptr[i],xptr[i]),ac0_shift);
321
133M
         ac0 += SHR32(MULT16_16(xptr[i+1],xptr[i+1]),ac0_shift);
322
133M
      }
323
      /* Consider the effect of rounding-to-nearest when scaling the signal. */
324
682k
      ac0 += SHR32(ac0,7);
325
326
682k
      shift = celt_ilog2(ac0)-30+ac0_shift+1;
327
682k
      shift = (shift)/2;
328
682k
      if (shift>0)
329
166k
      {
330
36.9M
         for(i=0;i<n;i++)
331
36.8M
            xx[i] = PSHR32(xptr[i], shift);
332
166k
         xptr = xx;
333
166k
      } else
334
515k
         shift = 0;
335
682k
   }
336
682k
#endif
337
682k
   celt_pitch_xcorr(xptr, xptr, ac, fastN, lag+1, arch);
338
9.04M
   for (k=0;k<=lag;k++)
339
8.36M
   {
340
66.0M
      for (i = k+fastN, d = 0; i < n; i++)
341
57.6M
         d = MAC16_16(d, xptr[i], xptr[i-k]);
342
8.36M
      ac[k] += d;
343
8.36M
   }
344
682k
#ifdef FIXED_POINT
345
682k
   shift = 2*shift;
346
682k
   if (shift<=0)
347
515k
      ac[0] += SHL32((opus_int32)1, -shift);
348
682k
   if (ac[0] < 268435456)
349
429k
   {
350
429k
      int shift2 = 29 - EC_ILOG(ac[0]);
351
5.65M
      for (i=0;i<=lag;i++)
352
5.22M
         ac[i] = SHL32(ac[i], shift2);
353
429k
      shift -= shift2;
354
429k
   } else if (ac[0] >= 536870912)
355
225k
   {
356
225k
      int shift2=1;
357
225k
      if (ac[0] >= 1073741824)
358
106k
         shift2++;
359
3.03M
      for (i=0;i<=lag;i++)
360
2.81M
         ac[i] = SHR32(ac[i], shift2);
361
225k
      shift += shift2;
362
225k
   }
363
682k
#endif
364
365
682k
   RESTORE_STACK;
366
682k
   return shift;
367
682k
}
_celt_autocorr
Line
Count
Source
286
812k
{
287
812k
   opus_val32 d;
288
812k
   int i, k;
289
812k
   int fastN=n-lag;
290
812k
   int shift;
291
812k
   const opus_val16 *xptr;
292
812k
   VARDECL(opus_val16, xx);
293
812k
   SAVE_STACK;
294
812k
   ALLOC(xx, n, opus_val16);
295
812k
   celt_assert(n>0);
296
812k
   celt_assert(overlap>=0);
297
812k
   if (overlap == 0)
298
767k
   {
299
767k
      xptr = x;
300
767k
   } else {
301
50.9M
      for (i=0;i<n;i++)
302
50.9M
         xx[i] = x[i];
303
6.01M
      for (i=0;i<overlap;i++)
304
5.96M
      {
305
5.96M
         opus_val16 w = COEF2VAL16(window[i]);
306
5.96M
         xx[i] = MULT16_16_Q15(x[i],w);
307
5.96M
         xx[n-i-1] = MULT16_16_Q15(x[n-i-1],w);
308
5.96M
      }
309
45.1k
      xptr = xx;
310
45.1k
   }
311
812k
   shift=0;
312
#ifdef FIXED_POINT
313
   {
314
      opus_val32 ac0;
315
      int ac0_shift = celt_ilog2(n + (n>>4));
316
      ac0 = 1+(n<<7);
317
      if (n&1) ac0 += SHR32(MULT16_16(xptr[0],xptr[0]),ac0_shift);
318
      for(i=(n&1);i<n;i+=2)
319
      {
320
         ac0 += SHR32(MULT16_16(xptr[i],xptr[i]),ac0_shift);
321
         ac0 += SHR32(MULT16_16(xptr[i+1],xptr[i+1]),ac0_shift);
322
      }
323
      /* Consider the effect of rounding-to-nearest when scaling the signal. */
324
      ac0 += SHR32(ac0,7);
325
326
      shift = celt_ilog2(ac0)-30+ac0_shift+1;
327
      shift = (shift)/2;
328
      if (shift>0)
329
      {
330
         for(i=0;i<n;i++)
331
            xx[i] = PSHR32(xptr[i], shift);
332
         xptr = xx;
333
      } else
334
         shift = 0;
335
   }
336
#endif
337
812k
   celt_pitch_xcorr(xptr, xptr, ac, fastN, lag+1, arch);
338
5.77M
   for (k=0;k<=lag;k++)
339
4.96M
   {
340
26.1M
      for (i = k+fastN, d = 0; i < n; i++)
341
21.2M
         d = MAC16_16(d, xptr[i], xptr[i-k]);
342
4.96M
      ac[k] += d;
343
4.96M
   }
344
#ifdef FIXED_POINT
345
   shift = 2*shift;
346
   if (shift<=0)
347
      ac[0] += SHL32((opus_int32)1, -shift);
348
   if (ac[0] < 268435456)
349
   {
350
      int shift2 = 29 - EC_ILOG(ac[0]);
351
      for (i=0;i<=lag;i++)
352
         ac[i] = SHL32(ac[i], shift2);
353
      shift -= shift2;
354
   } else if (ac[0] >= 536870912)
355
   {
356
      int shift2=1;
357
      if (ac[0] >= 1073741824)
358
         shift2++;
359
      for (i=0;i<=lag;i++)
360
         ac[i] = SHR32(ac[i], shift2);
361
      shift += shift2;
362
   }
363
#endif
364
365
812k
   RESTORE_STACK;
366
812k
   return shift;
367
812k
}