/src/opus/celt/celt_lpc.c
Line | Count | Source |
1 | | /* Copyright (c) 2009-2010 Xiph.Org Foundation |
2 | | Written by Jean-Marc Valin */ |
3 | | /* |
4 | | Redistribution and use in source and binary forms, with or without |
5 | | modification, are permitted provided that the following conditions |
6 | | are met: |
7 | | |
8 | | - Redistributions of source code must retain the above copyright |
9 | | notice, this list of conditions and the following disclaimer. |
10 | | |
11 | | - Redistributions in binary form must reproduce the above copyright |
12 | | notice, this list of conditions and the following disclaimer in the |
13 | | documentation and/or other materials provided with the distribution. |
14 | | |
15 | | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
16 | | ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
17 | | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR |
18 | | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER |
19 | | OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, |
20 | | EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, |
21 | | PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR |
22 | | PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF |
23 | | LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING |
24 | | NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS |
25 | | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
26 | | */ |
27 | | |
28 | | #ifdef HAVE_CONFIG_H |
29 | | #include "config.h" |
30 | | #endif |
31 | | |
32 | | #include "celt_lpc.h" |
33 | | #include "stack_alloc.h" |
34 | | #include "mathops.h" |
35 | | #include "pitch.h" |
36 | | |
37 | | void _celt_lpc( |
38 | | opus_val16 *_lpc, /* out: [0...p-1] LPC coefficients */ |
39 | | const opus_val32 *ac, /* in: [0...p] autocorrelation values */ |
40 | | int p |
41 | | ) |
42 | 4.98M | { |
43 | 4.98M | int i, j; |
44 | 4.98M | opus_val32 r; |
45 | 4.98M | opus_val32 error = ac[0]; |
46 | | #ifdef FIXED_POINT |
47 | | opus_val32 lpc[CELT_LPC_ORDER]; |
48 | | #else |
49 | | float *lpc = _lpc; |
50 | | #endif |
51 | | |
52 | 4.98M | OPUS_CLEAR(lpc, p); |
53 | | #ifdef FIXED_POINT |
54 | 3.74M | if (ac[0] != 0) |
55 | | #else |
56 | 1.24M | if (ac[0] > 1e-10f) |
57 | 1.23M | #endif |
58 | 4.97M | { |
59 | 26.6M | for (i = 0; i < p; i++) { |
60 | | /* Sum up this iteration's reflection coefficient */ |
61 | 21.7M | opus_val32 rr = 0; |
62 | | #if defined (FIXED_POINT) && OPUS_FAST_INT64 |
63 | | opus_int64 acc = 0; |
64 | 58.4M | for (j = 0; j < i; j++) |
65 | 42.0M | acc += (opus_int64)(lpc[j]) * (opus_int64)(ac[i - j]); |
66 | 16.3M | rr = (opus_val32)SHR64(acc, 31); |
67 | | #else |
68 | 19.3M | for (j = 0; j < i; j++) |
69 | 13.9M | rr += MULT32_32_Q31(lpc[j],ac[i - j]); |
70 | | #endif |
71 | 21.7M | rr += SHR32(ac[i + 1],6); |
72 | 21.7M | r = -frac_div32(SHL32(rr,6), error); |
73 | | /* Update LPC coefficients and total error */ |
74 | 21.7M | lpc[i] = SHR32(r,6); |
75 | 55.2M | for (j = 0; j < (i+1)>>1; j++) |
76 | 33.4M | { |
77 | 33.4M | opus_val32 tmp1, tmp2; |
78 | 33.4M | tmp1 = lpc[j]; |
79 | 33.4M | tmp2 = lpc[i-1-j]; |
80 | 33.4M | lpc[j] = tmp1 + MULT32_32_Q31(r,tmp2); |
81 | 33.4M | lpc[i-1-j] = tmp2 + MULT32_32_Q31(r,tmp1); |
82 | 33.4M | } |
83 | | |
84 | 21.7M | error = error - MULT32_32_Q31(MULT32_32_Q31(r,r),error); |
85 | | /* Bail out once we get 30 dB gain */ |
86 | | #ifdef FIXED_POINT |
87 | 16.3M | if (error<=SHR32(ac[0],10)) |
88 | 27.5k | break; |
89 | | #else |
90 | 5.39M | if (error<=.001f*ac[0]) |
91 | 44.1k | break; |
92 | | #endif |
93 | 21.7M | } |
94 | 4.97M | } |
95 | | #ifdef FIXED_POINT |
96 | | { |
97 | | /* Convert the int32 lpcs to int16 and ensure there are no wrap-arounds. |
98 | | This reuses the logic in silk_LPC_fit() and silk_bwexpander_32(). Any bug |
99 | | fixes should also be applied there. */ |
100 | | int iter, idx = 0; |
101 | | opus_val32 maxabs, absval, chirp_Q16, chirp_minus_one_Q16; |
102 | | |
103 | 3.74M | for (iter = 0; iter < 10; iter++) { |
104 | 3.74M | maxabs = 0; |
105 | 20.6M | for (i = 0; i < p; i++) { |
106 | 16.9M | absval = ABS32(lpc[i]); |
107 | 16.9M | if (absval > maxabs) { |
108 | 5.60M | maxabs = absval; |
109 | 5.60M | idx = i; |
110 | 5.60M | } |
111 | 16.9M | } |
112 | 3.74M | maxabs = PSHR32(maxabs, 13); /* Q25->Q12 */ |
113 | | |
114 | 3.74M | if (maxabs > 32767) { |
115 | 0 | maxabs = MIN32(maxabs, 163838); |
116 | 0 | chirp_Q16 = QCONST32(0.999, 16) - DIV32(SHL32(maxabs - 32767, 14), |
117 | 0 | SHR32(MULT32_32_32(maxabs, idx + 1), 2)); |
118 | 0 | chirp_minus_one_Q16 = chirp_Q16 - 65536; |
119 | | |
120 | | /* Apply bandwidth expansion. */ |
121 | 0 | for (i = 0; i < p - 1; i++) { |
122 | 0 | lpc[i] = MULT32_32_Q16(chirp_Q16, lpc[i]); |
123 | 0 | chirp_Q16 += PSHR32(MULT32_32_32(chirp_Q16, chirp_minus_one_Q16), 16); |
124 | 0 | } |
125 | 0 | lpc[p - 1] = MULT32_32_Q16(chirp_Q16, lpc[p - 1]); |
126 | 3.74M | } else { |
127 | 3.74M | break; |
128 | 3.74M | } |
129 | 3.74M | } |
130 | | |
131 | 3.74M | if (iter == 10) { |
132 | | /* If the coeffs still do not fit into the 16 bit range after 10 iterations, |
133 | | fall back to the A(z)=1 filter. */ |
134 | 0 | OPUS_CLEAR(lpc, p); |
135 | 0 | _lpc[0] = 4096; /* Q12 */ |
136 | 3.74M | } else { |
137 | 20.6M | for (i = 0; i < p; i++) { |
138 | 16.9M | _lpc[i] = EXTRACT16(PSHR32(lpc[i], 13)); /* Q25->Q12 */ |
139 | 16.9M | } |
140 | 3.74M | } |
141 | | } |
142 | | #endif |
143 | 4.98M | } Line | Count | Source | 42 | 3.74M | { | 43 | 3.74M | int i, j; | 44 | 3.74M | opus_val32 r; | 45 | 3.74M | opus_val32 error = ac[0]; | 46 | 3.74M | #ifdef FIXED_POINT | 47 | 3.74M | opus_val32 lpc[CELT_LPC_ORDER]; | 48 | | #else | 49 | | float *lpc = _lpc; | 50 | | #endif | 51 | | | 52 | 3.74M | OPUS_CLEAR(lpc, p); | 53 | 3.74M | #ifdef FIXED_POINT | 54 | 3.74M | if (ac[0] != 0) | 55 | | #else | 56 | | if (ac[0] > 1e-10f) | 57 | | #endif | 58 | 3.74M | { | 59 | 20.1M | for (i = 0; i < p; i++) { | 60 | | /* Sum up this iteration's reflection coefficient */ | 61 | 16.3M | opus_val32 rr = 0; | 62 | 16.3M | #if defined (FIXED_POINT) && OPUS_FAST_INT64 | 63 | 16.3M | opus_int64 acc = 0; | 64 | 58.4M | for (j = 0; j < i; j++) | 65 | 42.0M | acc += (opus_int64)(lpc[j]) * (opus_int64)(ac[i - j]); | 66 | 16.3M | rr = (opus_val32)SHR64(acc, 31); | 67 | | #else | 68 | | for (j = 0; j < i; j++) | 69 | | rr += MULT32_32_Q31(lpc[j],ac[i - j]); | 70 | | #endif | 71 | 16.3M | rr += SHR32(ac[i + 1],6); | 72 | 16.3M | r = -frac_div32(SHL32(rr,6), error); | 73 | | /* Update LPC coefficients and total error */ | 74 | 16.3M | lpc[i] = SHR32(r,6); | 75 | 41.5M | for (j = 0; j < (i+1)>>1; j++) | 76 | 25.1M | { | 77 | 25.1M | opus_val32 tmp1, tmp2; | 78 | 25.1M | tmp1 = lpc[j]; | 79 | 25.1M | tmp2 = lpc[i-1-j]; | 80 | 25.1M | lpc[j] = tmp1 + MULT32_32_Q31(r,tmp2); | 81 | 25.1M | lpc[i-1-j] = tmp2 + MULT32_32_Q31(r,tmp1); | 82 | 25.1M | } | 83 | | | 84 | 16.3M | error = error - MULT32_32_Q31(MULT32_32_Q31(r,r),error); | 85 | | /* Bail out once we get 30 dB gain */ | 86 | 16.3M | #ifdef FIXED_POINT | 87 | 16.3M | if (error<=SHR32(ac[0],10)) | 88 | 27.5k | break; | 89 | | #else | 90 | | if (error<=.001f*ac[0]) | 91 | | break; | 92 | | #endif | 93 | 16.3M | } | 94 | 3.74M | } | 95 | 3.74M | #ifdef FIXED_POINT | 96 | 3.74M | { | 97 | | /* Convert the int32 lpcs to int16 and ensure there are no wrap-arounds. | 98 | | This reuses the logic in silk_LPC_fit() and silk_bwexpander_32(). Any bug | 99 | | fixes should also be applied there. */ | 100 | 3.74M | int iter, idx = 0; | 101 | 3.74M | opus_val32 maxabs, absval, chirp_Q16, chirp_minus_one_Q16; | 102 | | | 103 | 3.74M | for (iter = 0; iter < 10; iter++) { | 104 | 3.74M | maxabs = 0; | 105 | 20.6M | for (i = 0; i < p; i++) { | 106 | 16.9M | absval = ABS32(lpc[i]); | 107 | 16.9M | if (absval > maxabs) { | 108 | 5.60M | maxabs = absval; | 109 | 5.60M | idx = i; | 110 | 5.60M | } | 111 | 16.9M | } | 112 | 3.74M | maxabs = PSHR32(maxabs, 13); /* Q25->Q12 */ | 113 | | | 114 | 3.74M | if (maxabs > 32767) { | 115 | 0 | maxabs = MIN32(maxabs, 163838); | 116 | 0 | chirp_Q16 = QCONST32(0.999, 16) - DIV32(SHL32(maxabs - 32767, 14), | 117 | 0 | SHR32(MULT32_32_32(maxabs, idx + 1), 2)); | 118 | 0 | chirp_minus_one_Q16 = chirp_Q16 - 65536; | 119 | | | 120 | | /* Apply bandwidth expansion. */ | 121 | 0 | for (i = 0; i < p - 1; i++) { | 122 | 0 | lpc[i] = MULT32_32_Q16(chirp_Q16, lpc[i]); | 123 | 0 | chirp_Q16 += PSHR32(MULT32_32_32(chirp_Q16, chirp_minus_one_Q16), 16); | 124 | 0 | } | 125 | 0 | lpc[p - 1] = MULT32_32_Q16(chirp_Q16, lpc[p - 1]); | 126 | 3.74M | } else { | 127 | 3.74M | break; | 128 | 3.74M | } | 129 | 3.74M | } | 130 | | | 131 | 3.74M | if (iter == 10) { | 132 | | /* If the coeffs still do not fit into the 16 bit range after 10 iterations, | 133 | | fall back to the A(z)=1 filter. */ | 134 | 0 | OPUS_CLEAR(lpc, p); | 135 | 0 | _lpc[0] = 4096; /* Q12 */ | 136 | 3.74M | } else { | 137 | 20.6M | for (i = 0; i < p; i++) { | 138 | 16.9M | _lpc[i] = EXTRACT16(PSHR32(lpc[i], 13)); /* Q25->Q12 */ | 139 | 16.9M | } | 140 | 3.74M | } | 141 | 3.74M | } | 142 | 3.74M | #endif | 143 | 3.74M | } |
Line | Count | Source | 42 | 1.24M | { | 43 | 1.24M | int i, j; | 44 | 1.24M | opus_val32 r; | 45 | 1.24M | opus_val32 error = ac[0]; | 46 | | #ifdef FIXED_POINT | 47 | | opus_val32 lpc[CELT_LPC_ORDER]; | 48 | | #else | 49 | 1.24M | float *lpc = _lpc; | 50 | 1.24M | #endif | 51 | | | 52 | 1.24M | OPUS_CLEAR(lpc, p); | 53 | | #ifdef FIXED_POINT | 54 | | if (ac[0] != 0) | 55 | | #else | 56 | 1.24M | if (ac[0] > 1e-10f) | 57 | 1.23M | #endif | 58 | 1.23M | { | 59 | 6.58M | for (i = 0; i < p; i++) { | 60 | | /* Sum up this iteration's reflection coefficient */ | 61 | 5.39M | opus_val32 rr = 0; | 62 | | #if defined (FIXED_POINT) && OPUS_FAST_INT64 | 63 | | opus_int64 acc = 0; | 64 | | for (j = 0; j < i; j++) | 65 | | acc += (opus_int64)(lpc[j]) * (opus_int64)(ac[i - j]); | 66 | | rr = (opus_val32)SHR64(acc, 31); | 67 | | #else | 68 | 19.3M | for (j = 0; j < i; j++) | 69 | 13.9M | rr += MULT32_32_Q31(lpc[j],ac[i - j]); | 70 | 5.39M | #endif | 71 | 5.39M | rr += SHR32(ac[i + 1],6); | 72 | 5.39M | r = -frac_div32(SHL32(rr,6), error); | 73 | | /* Update LPC coefficients and total error */ | 74 | 5.39M | lpc[i] = SHR32(r,6); | 75 | 13.7M | for (j = 0; j < (i+1)>>1; j++) | 76 | 8.31M | { | 77 | 8.31M | opus_val32 tmp1, tmp2; | 78 | 8.31M | tmp1 = lpc[j]; | 79 | 8.31M | tmp2 = lpc[i-1-j]; | 80 | 8.31M | lpc[j] = tmp1 + MULT32_32_Q31(r,tmp2); | 81 | 8.31M | lpc[i-1-j] = tmp2 + MULT32_32_Q31(r,tmp1); | 82 | 8.31M | } | 83 | | | 84 | 5.39M | error = error - MULT32_32_Q31(MULT32_32_Q31(r,r),error); | 85 | | /* Bail out once we get 30 dB gain */ | 86 | | #ifdef FIXED_POINT | 87 | | if (error<=SHR32(ac[0],10)) | 88 | | break; | 89 | | #else | 90 | 5.39M | if (error<=.001f*ac[0]) | 91 | 44.1k | break; | 92 | 5.39M | #endif | 93 | 5.39M | } | 94 | 1.23M | } | 95 | | #ifdef FIXED_POINT | 96 | | { | 97 | | /* Convert the int32 lpcs to int16 and ensure there are no wrap-arounds. | 98 | | This reuses the logic in silk_LPC_fit() and silk_bwexpander_32(). Any bug | 99 | | fixes should also be applied there. */ | 100 | | int iter, idx = 0; | 101 | | opus_val32 maxabs, absval, chirp_Q16, chirp_minus_one_Q16; | 102 | | | 103 | | for (iter = 0; iter < 10; iter++) { | 104 | | maxabs = 0; | 105 | | for (i = 0; i < p; i++) { | 106 | | absval = ABS32(lpc[i]); | 107 | | if (absval > maxabs) { | 108 | | maxabs = absval; | 109 | | idx = i; | 110 | | } | 111 | | } | 112 | | maxabs = PSHR32(maxabs, 13); /* Q25->Q12 */ | 113 | | | 114 | | if (maxabs > 32767) { | 115 | | maxabs = MIN32(maxabs, 163838); | 116 | | chirp_Q16 = QCONST32(0.999, 16) - DIV32(SHL32(maxabs - 32767, 14), | 117 | | SHR32(MULT32_32_32(maxabs, idx + 1), 2)); | 118 | | chirp_minus_one_Q16 = chirp_Q16 - 65536; | 119 | | | 120 | | /* Apply bandwidth expansion. */ | 121 | | for (i = 0; i < p - 1; i++) { | 122 | | lpc[i] = MULT32_32_Q16(chirp_Q16, lpc[i]); | 123 | | chirp_Q16 += PSHR32(MULT32_32_32(chirp_Q16, chirp_minus_one_Q16), 16); | 124 | | } | 125 | | lpc[p - 1] = MULT32_32_Q16(chirp_Q16, lpc[p - 1]); | 126 | | } else { | 127 | | break; | 128 | | } | 129 | | } | 130 | | | 131 | | if (iter == 10) { | 132 | | /* If the coeffs still do not fit into the 16 bit range after 10 iterations, | 133 | | fall back to the A(z)=1 filter. */ | 134 | | OPUS_CLEAR(lpc, p); | 135 | | _lpc[0] = 4096; /* Q12 */ | 136 | | } else { | 137 | | for (i = 0; i < p; i++) { | 138 | | _lpc[i] = EXTRACT16(PSHR32(lpc[i], 13)); /* Q25->Q12 */ | 139 | | } | 140 | | } | 141 | | } | 142 | | #endif | 143 | 1.24M | } |
|
144 | | |
145 | | |
146 | | void celt_fir_c( |
147 | | const opus_val16 *x, |
148 | | const opus_val16 *num, |
149 | | opus_val16 *y, |
150 | | int N, |
151 | | int ord, |
152 | | int arch) |
153 | 123k | { |
154 | 123k | int i,j; |
155 | 123k | VARDECL(opus_val16, rnum); |
156 | 123k | SAVE_STACK; |
157 | 123k | celt_assert(x != y); |
158 | 123k | ALLOC(rnum, ord, opus_val16); |
159 | 3.08M | for(i=0;i<ord;i++) |
160 | 2.96M | rnum[i] = num[ord-i-1]; |
161 | 17.0M | for (i=0;i<N-3;i+=4) |
162 | 16.9M | { |
163 | 16.9M | opus_val32 sum[4]; |
164 | 16.9M | sum[0] = SHL32(EXTEND32(x[i ]), SIG_SHIFT); |
165 | 16.9M | sum[1] = SHL32(EXTEND32(x[i+1]), SIG_SHIFT); |
166 | 16.9M | sum[2] = SHL32(EXTEND32(x[i+2]), SIG_SHIFT); |
167 | 16.9M | sum[3] = SHL32(EXTEND32(x[i+3]), SIG_SHIFT); |
168 | | #if defined(OPUS_CHECK_ASM) && defined(FIXED_POINT) |
169 | | { |
170 | | opus_val32 sum_c[4]; |
171 | | memcpy(sum_c, sum, sizeof(sum_c)); |
172 | | xcorr_kernel_c(rnum, x+i-ord, sum_c, ord); |
173 | | #endif |
174 | 16.9M | xcorr_kernel(rnum, x+i-ord, sum, ord, arch); |
175 | | #if defined(OPUS_CHECK_ASM) && defined(FIXED_POINT) |
176 | 0 | celt_assert(memcmp(sum, sum_c, sizeof(sum)) == 0); |
177 | 0 | } |
178 | 0 | #endif |
179 | 16.9M | y[i ] = SROUND16(sum[0], SIG_SHIFT); |
180 | 16.9M | y[i+1] = SROUND16(sum[1], SIG_SHIFT); |
181 | 16.9M | y[i+2] = SROUND16(sum[2], SIG_SHIFT); |
182 | 16.9M | y[i+3] = SROUND16(sum[3], SIG_SHIFT); |
183 | 0 | } |
184 | 179k | for (;i<N;i++) |
185 | 55.5k | { |
186 | 55.5k | opus_val32 sum = SHL32(EXTEND32(x[i]), SIG_SHIFT); |
187 | 1.38M | for (j=0;j<ord;j++) |
188 | 1.33M | sum = MAC16_16(sum,rnum[j],x[i+j-ord]); |
189 | 55.5k | y[i] = SROUND16(sum, SIG_SHIFT); |
190 | 55.5k | } |
191 | 0 | RESTORE_STACK; |
192 | 0 | } Unexecuted instantiation: celt_fir_c Line | Count | Source | 153 | 123k | { | 154 | 123k | int i,j; | 155 | 123k | VARDECL(opus_val16, rnum); | 156 | 123k | SAVE_STACK; | 157 | 123k | celt_assert(x != y); | 158 | 123k | ALLOC(rnum, ord, opus_val16); | 159 | 3.08M | for(i=0;i<ord;i++) | 160 | 2.96M | rnum[i] = num[ord-i-1]; | 161 | 17.0M | for (i=0;i<N-3;i+=4) | 162 | 16.9M | { | 163 | 16.9M | opus_val32 sum[4]; | 164 | 16.9M | sum[0] = SHL32(EXTEND32(x[i ]), SIG_SHIFT); | 165 | 16.9M | sum[1] = SHL32(EXTEND32(x[i+1]), SIG_SHIFT); | 166 | 16.9M | sum[2] = SHL32(EXTEND32(x[i+2]), SIG_SHIFT); | 167 | 16.9M | sum[3] = SHL32(EXTEND32(x[i+3]), SIG_SHIFT); | 168 | | #if defined(OPUS_CHECK_ASM) && defined(FIXED_POINT) | 169 | | { | 170 | | opus_val32 sum_c[4]; | 171 | | memcpy(sum_c, sum, sizeof(sum_c)); | 172 | | xcorr_kernel_c(rnum, x+i-ord, sum_c, ord); | 173 | | #endif | 174 | 16.9M | xcorr_kernel(rnum, x+i-ord, sum, ord, arch); | 175 | | #if defined(OPUS_CHECK_ASM) && defined(FIXED_POINT) | 176 | | celt_assert(memcmp(sum, sum_c, sizeof(sum)) == 0); | 177 | | } | 178 | | #endif | 179 | 16.9M | y[i ] = SROUND16(sum[0], SIG_SHIFT); | 180 | 16.9M | y[i+1] = SROUND16(sum[1], SIG_SHIFT); | 181 | 16.9M | y[i+2] = SROUND16(sum[2], SIG_SHIFT); | 182 | 16.9M | y[i+3] = SROUND16(sum[3], SIG_SHIFT); | 183 | 16.9M | } | 184 | 179k | for (;i<N;i++) | 185 | 55.5k | { | 186 | 55.5k | opus_val32 sum = SHL32(EXTEND32(x[i]), SIG_SHIFT); | 187 | 1.38M | for (j=0;j<ord;j++) | 188 | 1.33M | sum = MAC16_16(sum,rnum[j],x[i+j-ord]); | 189 | 55.5k | y[i] = SROUND16(sum, SIG_SHIFT); | 190 | 55.5k | } | 191 | 123k | RESTORE_STACK; | 192 | 123k | } |
|
193 | | |
194 | | void celt_iir(const opus_val32 *_x, |
195 | | const opus_val16 *den, |
196 | | opus_val32 *_y, |
197 | | int N, |
198 | | int ord, |
199 | | opus_val16 *mem, |
200 | | int arch) |
201 | 289k | { |
202 | | #ifdef SMALL_FOOTPRINT |
203 | | int i,j; |
204 | | (void)arch; |
205 | | for (i=0;i<N;i++) |
206 | | { |
207 | | opus_val32 sum = _x[i]; |
208 | | for (j=0;j<ord;j++) |
209 | | { |
210 | | sum -= MULT16_16(den[j],mem[j]); |
211 | | } |
212 | | for (j=ord-1;j>=1;j--) |
213 | | { |
214 | | mem[j]=mem[j-1]; |
215 | | } |
216 | | mem[0] = SROUND16(sum, SIG_SHIFT); |
217 | | _y[i] = sum; |
218 | | } |
219 | | #else |
220 | 289k | int i,j; |
221 | 289k | VARDECL(opus_val16, rden); |
222 | 289k | VARDECL(opus_val16, y); |
223 | 289k | SAVE_STACK; |
224 | | |
225 | 289k | celt_assert((ord&3)==0); |
226 | 289k | ALLOC(rden, ord, opus_val16); |
227 | 289k | ALLOC(y, N+ord, opus_val16); |
228 | 7.24M | for(i=0;i<ord;i++) |
229 | 6.95M | rden[i] = den[ord-i-1]; |
230 | 7.24M | for(i=0;i<ord;i++) |
231 | 6.95M | y[i] = -mem[ord-i-1]; |
232 | 105M | for(;i<N+ord;i++) |
233 | 104M | y[i]=0; |
234 | 26.5M | for (i=0;i<N-3;i+=4) |
235 | 26.2M | { |
236 | | /* Unroll by 4 as if it were an FIR filter */ |
237 | 26.2M | opus_val32 sum[4]; |
238 | 26.2M | sum[0]=_x[i]; |
239 | 26.2M | sum[1]=_x[i+1]; |
240 | 26.2M | sum[2]=_x[i+2]; |
241 | 26.2M | sum[3]=_x[i+3]; |
242 | | #if defined(OPUS_CHECK_ASM) && defined(FIXED_POINT) |
243 | | { |
244 | | opus_val32 sum_c[4]; |
245 | | memcpy(sum_c, sum, sizeof(sum_c)); |
246 | | xcorr_kernel_c(rden, y+i, sum_c, ord); |
247 | | #endif |
248 | 26.2M | xcorr_kernel(rden, y+i, sum, ord, arch); |
249 | | #if defined(OPUS_CHECK_ASM) && defined(FIXED_POINT) |
250 | 14.0M | celt_assert(memcmp(sum, sum_c, sizeof(sum)) == 0); |
251 | 14.0M | } |
252 | 0 | #endif |
253 | | /* Patch up the result to compensate for the fact that this is an IIR */ |
254 | 26.2M | y[i+ord ] = -SROUND16(sum[0],SIG_SHIFT); |
255 | 14.0M | _y[i ] = sum[0]; |
256 | 26.2M | sum[1] = MAC16_16(sum[1], y[i+ord ], den[0]); |
257 | 26.2M | y[i+ord+1] = -SROUND16(sum[1],SIG_SHIFT); |
258 | 14.0M | _y[i+1] = sum[1]; |
259 | 26.2M | sum[2] = MAC16_16(sum[2], y[i+ord+1], den[0]); |
260 | 26.2M | sum[2] = MAC16_16(sum[2], y[i+ord ], den[1]); |
261 | 26.2M | y[i+ord+2] = -SROUND16(sum[2],SIG_SHIFT); |
262 | 14.0M | _y[i+2] = sum[2]; |
263 | | |
264 | 26.2M | sum[3] = MAC16_16(sum[3], y[i+ord+2], den[0]); |
265 | 26.2M | sum[3] = MAC16_16(sum[3], y[i+ord+1], den[1]); |
266 | 26.2M | sum[3] = MAC16_16(sum[3], y[i+ord ], den[2]); |
267 | 26.2M | y[i+ord+3] = -SROUND16(sum[3],SIG_SHIFT); |
268 | 14.0M | _y[i+3] = sum[3]; |
269 | 14.0M | } |
270 | 289k | for (;i<N;i++) |
271 | 0 | { |
272 | 0 | opus_val32 sum = _x[i]; |
273 | 0 | for (j=0;j<ord;j++) |
274 | 0 | sum -= MULT16_16(rden[j],y[i+j]); |
275 | 0 | y[i+ord] = SROUND16(sum,SIG_SHIFT); |
276 | 0 | _y[i] = sum; |
277 | 0 | } |
278 | 7.24M | for(i=0;i<ord;i++) |
279 | 6.95M | mem[i] = _y[N-i-1]; |
280 | 166k | RESTORE_STACK; |
281 | 166k | #endif |
282 | 166k | } Line | Count | Source | 201 | 166k | { | 202 | | #ifdef SMALL_FOOTPRINT | 203 | | int i,j; | 204 | | (void)arch; | 205 | | for (i=0;i<N;i++) | 206 | | { | 207 | | opus_val32 sum = _x[i]; | 208 | | for (j=0;j<ord;j++) | 209 | | { | 210 | | sum -= MULT16_16(den[j],mem[j]); | 211 | | } | 212 | | for (j=ord-1;j>=1;j--) | 213 | | { | 214 | | mem[j]=mem[j-1]; | 215 | | } | 216 | | mem[0] = SROUND16(sum, SIG_SHIFT); | 217 | | _y[i] = sum; | 218 | | } | 219 | | #else | 220 | 166k | int i,j; | 221 | 166k | VARDECL(opus_val16, rden); | 222 | 166k | VARDECL(opus_val16, y); | 223 | 166k | SAVE_STACK; | 224 | | | 225 | 166k | celt_assert((ord&3)==0); | 226 | 166k | ALLOC(rden, ord, opus_val16); | 227 | 166k | ALLOC(y, N+ord, opus_val16); | 228 | 4.15M | for(i=0;i<ord;i++) | 229 | 3.99M | rden[i] = den[ord-i-1]; | 230 | 4.15M | for(i=0;i<ord;i++) | 231 | 3.99M | y[i] = -mem[ord-i-1]; | 232 | 56.5M | for(;i<N+ord;i++) | 233 | 56.3M | y[i]=0; | 234 | 14.2M | for (i=0;i<N-3;i+=4) | 235 | 14.0M | { | 236 | | /* Unroll by 4 as if it were an FIR filter */ | 237 | 14.0M | opus_val32 sum[4]; | 238 | 14.0M | sum[0]=_x[i]; | 239 | 14.0M | sum[1]=_x[i+1]; | 240 | 14.0M | sum[2]=_x[i+2]; | 241 | 14.0M | sum[3]=_x[i+3]; | 242 | 14.0M | #if defined(OPUS_CHECK_ASM) && defined(FIXED_POINT) | 243 | 14.0M | { | 244 | 14.0M | opus_val32 sum_c[4]; | 245 | 14.0M | memcpy(sum_c, sum, sizeof(sum_c)); | 246 | 14.0M | xcorr_kernel_c(rden, y+i, sum_c, ord); | 247 | 14.0M | #endif | 248 | 14.0M | xcorr_kernel(rden, y+i, sum, ord, arch); | 249 | 14.0M | #if defined(OPUS_CHECK_ASM) && defined(FIXED_POINT) | 250 | 14.0M | celt_assert(memcmp(sum, sum_c, sizeof(sum)) == 0); | 251 | 14.0M | } | 252 | 0 | #endif | 253 | | /* Patch up the result to compensate for the fact that this is an IIR */ | 254 | 14.0M | y[i+ord ] = -SROUND16(sum[0],SIG_SHIFT); | 255 | 14.0M | _y[i ] = sum[0]; | 256 | 14.0M | sum[1] = MAC16_16(sum[1], y[i+ord ], den[0]); | 257 | 14.0M | y[i+ord+1] = -SROUND16(sum[1],SIG_SHIFT); | 258 | 14.0M | _y[i+1] = sum[1]; | 259 | 14.0M | sum[2] = MAC16_16(sum[2], y[i+ord+1], den[0]); | 260 | 14.0M | sum[2] = MAC16_16(sum[2], y[i+ord ], den[1]); | 261 | 14.0M | y[i+ord+2] = -SROUND16(sum[2],SIG_SHIFT); | 262 | 14.0M | _y[i+2] = sum[2]; | 263 | | | 264 | 14.0M | sum[3] = MAC16_16(sum[3], y[i+ord+2], den[0]); | 265 | 14.0M | sum[3] = MAC16_16(sum[3], y[i+ord+1], den[1]); | 266 | 14.0M | sum[3] = MAC16_16(sum[3], y[i+ord ], den[2]); | 267 | 14.0M | y[i+ord+3] = -SROUND16(sum[3],SIG_SHIFT); | 268 | 14.0M | _y[i+3] = sum[3]; | 269 | 14.0M | } | 270 | 166k | for (;i<N;i++) | 271 | 0 | { | 272 | 0 | opus_val32 sum = _x[i]; | 273 | 0 | for (j=0;j<ord;j++) | 274 | 0 | sum -= MULT16_16(rden[j],y[i+j]); | 275 | 0 | y[i+ord] = SROUND16(sum,SIG_SHIFT); | 276 | 0 | _y[i] = sum; | 277 | 0 | } | 278 | 4.15M | for(i=0;i<ord;i++) | 279 | 3.99M | mem[i] = _y[N-i-1]; | 280 | 166k | RESTORE_STACK; | 281 | 166k | #endif | 282 | 166k | } |
Line | Count | Source | 201 | 123k | { | 202 | | #ifdef SMALL_FOOTPRINT | 203 | | int i,j; | 204 | | (void)arch; | 205 | | for (i=0;i<N;i++) | 206 | | { | 207 | | opus_val32 sum = _x[i]; | 208 | | for (j=0;j<ord;j++) | 209 | | { | 210 | | sum -= MULT16_16(den[j],mem[j]); | 211 | | } | 212 | | for (j=ord-1;j>=1;j--) | 213 | | { | 214 | | mem[j]=mem[j-1]; | 215 | | } | 216 | | mem[0] = SROUND16(sum, SIG_SHIFT); | 217 | | _y[i] = sum; | 218 | | } | 219 | | #else | 220 | 123k | int i,j; | 221 | 123k | VARDECL(opus_val16, rden); | 222 | 123k | VARDECL(opus_val16, y); | 223 | 123k | SAVE_STACK; | 224 | | | 225 | 123k | celt_assert((ord&3)==0); | 226 | 123k | ALLOC(rden, ord, opus_val16); | 227 | 123k | ALLOC(y, N+ord, opus_val16); | 228 | 3.08M | for(i=0;i<ord;i++) | 229 | 2.96M | rden[i] = den[ord-i-1]; | 230 | 3.08M | for(i=0;i<ord;i++) | 231 | 2.96M | y[i] = -mem[ord-i-1]; | 232 | 48.7M | for(;i<N+ord;i++) | 233 | 48.5M | y[i]=0; | 234 | 12.2M | for (i=0;i<N-3;i+=4) | 235 | 12.1M | { | 236 | | /* Unroll by 4 as if it were an FIR filter */ | 237 | 12.1M | opus_val32 sum[4]; | 238 | 12.1M | sum[0]=_x[i]; | 239 | 12.1M | sum[1]=_x[i+1]; | 240 | 12.1M | sum[2]=_x[i+2]; | 241 | 12.1M | sum[3]=_x[i+3]; | 242 | | #if defined(OPUS_CHECK_ASM) && defined(FIXED_POINT) | 243 | | { | 244 | | opus_val32 sum_c[4]; | 245 | | memcpy(sum_c, sum, sizeof(sum_c)); | 246 | | xcorr_kernel_c(rden, y+i, sum_c, ord); | 247 | | #endif | 248 | 12.1M | xcorr_kernel(rden, y+i, sum, ord, arch); | 249 | | #if defined(OPUS_CHECK_ASM) && defined(FIXED_POINT) | 250 | | celt_assert(memcmp(sum, sum_c, sizeof(sum)) == 0); | 251 | | } | 252 | | #endif | 253 | | /* Patch up the result to compensate for the fact that this is an IIR */ | 254 | 12.1M | y[i+ord ] = -SROUND16(sum[0],SIG_SHIFT); | 255 | 12.1M | _y[i ] = sum[0]; | 256 | 12.1M | sum[1] = MAC16_16(sum[1], y[i+ord ], den[0]); | 257 | 12.1M | y[i+ord+1] = -SROUND16(sum[1],SIG_SHIFT); | 258 | 12.1M | _y[i+1] = sum[1]; | 259 | 12.1M | sum[2] = MAC16_16(sum[2], y[i+ord+1], den[0]); | 260 | 12.1M | sum[2] = MAC16_16(sum[2], y[i+ord ], den[1]); | 261 | 12.1M | y[i+ord+2] = -SROUND16(sum[2],SIG_SHIFT); | 262 | 12.1M | _y[i+2] = sum[2]; | 263 | | | 264 | 12.1M | sum[3] = MAC16_16(sum[3], y[i+ord+2], den[0]); | 265 | 12.1M | sum[3] = MAC16_16(sum[3], y[i+ord+1], den[1]); | 266 | 12.1M | sum[3] = MAC16_16(sum[3], y[i+ord ], den[2]); | 267 | 12.1M | y[i+ord+3] = -SROUND16(sum[3],SIG_SHIFT); | 268 | 12.1M | _y[i+3] = sum[3]; | 269 | 12.1M | } | 270 | 123k | for (;i<N;i++) | 271 | 0 | { | 272 | 0 | opus_val32 sum = _x[i]; | 273 | 0 | for (j=0;j<ord;j++) | 274 | 0 | sum -= MULT16_16(rden[j],y[i+j]); | 275 | 0 | y[i+ord] = SROUND16(sum,SIG_SHIFT); | 276 | 0 | _y[i] = sum; | 277 | 0 | } | 278 | 3.08M | for(i=0;i<ord;i++) | 279 | 2.96M | mem[i] = _y[N-i-1]; | 280 | 123k | RESTORE_STACK; | 281 | 123k | #endif | 282 | 123k | } |
|
283 | | |
284 | | int _celt_autocorr( |
285 | | const opus_val16 *x, /* in: [0...n-1] samples x */ |
286 | | opus_val32 *ac, /* out: [0...lag-1] ac values */ |
287 | | const celt_coef *window, |
288 | | int overlap, |
289 | | int lag, |
290 | | int n, |
291 | | int arch |
292 | | ) |
293 | 313M | { |
294 | 313M | opus_val32 d; |
295 | 313M | int i, k; |
296 | 313M | int fastN=n-lag; |
297 | 313M | int shift; |
298 | 313M | const opus_val16 *xptr; |
299 | 313M | VARDECL(opus_val16, xx); |
300 | 313M | SAVE_STACK; |
301 | 313M | ALLOC(xx, n, opus_val16); |
302 | 313M | celt_assert(n>0); |
303 | 313M | celt_assert(overlap>=0); |
304 | 313M | if (overlap == 0) |
305 | 313M | { |
306 | 313M | xptr = x; |
307 | 313M | } else { |
308 | 297M | for (i=0;i<n;i++) |
309 | 296M | xx[i] = x[i]; |
310 | 35.0M | for (i=0;i<overlap;i++) |
311 | 34.7M | { |
312 | 34.7M | opus_val16 w = COEF2VAL16(window[i]); |
313 | 34.7M | xx[i] = MULT16_16_Q15(x[i],w); |
314 | 34.7M | xx[n-i-1] = MULT16_16_Q15(x[n-i-1],w); |
315 | 34.7M | } |
316 | 262k | xptr = xx; |
317 | 262k | } |
318 | 313M | shift=0; |
319 | | #ifdef FIXED_POINT |
320 | | { |
321 | | opus_val32 ac0; |
322 | | int ac0_shift = celt_ilog2(n + (n>>4)); |
323 | | ac0 = 1+(n<<7); |
324 | 312M | if (n&1) ac0 += SHR32(MULT16_16(xptr[0],xptr[0]),ac0_shift); |
325 | 24.6G | for(i=(n&1);i<n;i+=2) |
326 | 24.3G | { |
327 | 24.3G | ac0 += SHR32(MULT16_16(xptr[i],xptr[i]),ac0_shift); |
328 | 24.3G | ac0 += SHR32(MULT16_16(xptr[i+1],xptr[i+1]),ac0_shift); |
329 | 24.3G | } |
330 | | /* Consider the effect of rounding-to-nearest when scaling the signal. */ |
331 | 312M | ac0 += SHR32(ac0,7); |
332 | | |
333 | | shift = celt_ilog2(ac0)-30+ac0_shift+1; |
334 | | shift = (shift)/2; |
335 | 312M | if (shift>0) |
336 | 4.08M | { |
337 | 694M | for(i=0;i<n;i++) |
338 | 689M | xx[i] = PSHR32(xptr[i], shift); |
339 | 4.08M | xptr = xx; |
340 | 4.08M | } else |
341 | 308M | shift = 0; |
342 | | } |
343 | | #endif |
344 | 313M | celt_pitch_xcorr(xptr, xptr, ac, fastN, lag+1, arch); |
345 | 4.04G | for (k=0;k<=lag;k++) |
346 | 3.72G | { |
347 | 25.2G | for (i = k+fastN, d = 0; i < n; i++) |
348 | 21.5G | d = MAC16_16(d, xptr[i], xptr[i-k]); |
349 | 3.72G | ac[k] += d; |
350 | 3.72G | } |
351 | | #ifdef FIXED_POINT |
352 | | shift = 2*shift; |
353 | 312M | if (shift<=0) |
354 | 308M | ac[0] += SHL32((opus_int32)1, -shift); |
355 | 312M | if (ac[0] < 268435456) |
356 | 307M | { |
357 | 307M | int shift2 = 29 - EC_ILOG(ac[0]); |
358 | 3.96G | for (i=0;i<=lag;i++) |
359 | 3.65G | ac[i] = SHL32(ac[i], shift2); |
360 | 307M | shift -= shift2; |
361 | 307M | } else if (ac[0] >= 536870912) |
362 | 5.18M | { |
363 | 5.18M | int shift2=1; |
364 | 5.18M | if (ac[0] >= 1073741824) |
365 | 2.59M | shift2++; |
366 | 66.4M | for (i=0;i<=lag;i++) |
367 | 61.2M | ac[i] = SHR32(ac[i], shift2); |
368 | 5.18M | shift += shift2; |
369 | 5.18M | } |
370 | | #endif |
371 | | |
372 | 313M | RESTORE_STACK; |
373 | 313M | return shift; |
374 | 313M | } Line | Count | Source | 293 | 156M | { | 294 | 156M | opus_val32 d; | 295 | 156M | int i, k; | 296 | 156M | int fastN=n-lag; | 297 | 156M | int shift; | 298 | 156M | const opus_val16 *xptr; | 299 | 156M | VARDECL(opus_val16, xx); | 300 | 156M | SAVE_STACK; | 301 | 156M | ALLOC(xx, n, opus_val16); | 302 | 156M | celt_assert(n>0); | 303 | 156M | celt_assert(overlap>=0); | 304 | 156M | if (overlap == 0) | 305 | 156M | { | 306 | 156M | xptr = x; | 307 | 156M | } else { | 308 | 109M | for (i=0;i<n;i++) | 309 | 109M | xx[i] = x[i]; | 310 | 12.8M | for (i=0;i<overlap;i++) | 311 | 12.7M | { | 312 | 12.7M | opus_val16 w = COEF2VAL16(window[i]); | 313 | 12.7M | xx[i] = MULT16_16_Q15(x[i],w); | 314 | 12.7M | xx[n-i-1] = MULT16_16_Q15(x[n-i-1],w); | 315 | 12.7M | } | 316 | 97.1k | xptr = xx; | 317 | 97.1k | } | 318 | 156M | shift=0; | 319 | 156M | #ifdef FIXED_POINT | 320 | 156M | { | 321 | 156M | opus_val32 ac0; | 322 | 156M | int ac0_shift = celt_ilog2(n + (n>>4)); | 323 | 156M | ac0 = 1+(n<<7); | 324 | 156M | if (n&1) ac0 += SHR32(MULT16_16(xptr[0],xptr[0]),ac0_shift); | 325 | 12.3G | for(i=(n&1);i<n;i+=2) | 326 | 12.1G | { | 327 | 12.1G | ac0 += SHR32(MULT16_16(xptr[i],xptr[i]),ac0_shift); | 328 | 12.1G | ac0 += SHR32(MULT16_16(xptr[i+1],xptr[i+1]),ac0_shift); | 329 | 12.1G | } | 330 | | /* Consider the effect of rounding-to-nearest when scaling the signal. */ | 331 | 156M | ac0 += SHR32(ac0,7); | 332 | | | 333 | 156M | shift = celt_ilog2(ac0)-30+ac0_shift+1; | 334 | 156M | shift = (shift)/2; | 335 | 156M | if (shift>0) | 336 | 2.04M | { | 337 | 347M | for(i=0;i<n;i++) | 338 | 344M | xx[i] = PSHR32(xptr[i], shift); | 339 | 2.04M | xptr = xx; | 340 | 2.04M | } else | 341 | 154M | shift = 0; | 342 | 156M | } | 343 | 156M | #endif | 344 | 156M | celt_pitch_xcorr(xptr, xptr, ac, fastN, lag+1, arch); | 345 | 2.01G | for (k=0;k<=lag;k++) | 346 | 1.86G | { | 347 | 12.6G | for (i = k+fastN, d = 0; i < n; i++) | 348 | 10.7G | d = MAC16_16(d, xptr[i], xptr[i-k]); | 349 | 1.86G | ac[k] += d; | 350 | 1.86G | } | 351 | 156M | #ifdef FIXED_POINT | 352 | 156M | shift = 2*shift; | 353 | 156M | if (shift<=0) | 354 | 154M | ac[0] += SHL32((opus_int32)1, -shift); | 355 | 156M | if (ac[0] < 268435456) | 356 | 153M | { | 357 | 153M | int shift2 = 29 - EC_ILOG(ac[0]); | 358 | 1.98G | for (i=0;i<=lag;i++) | 359 | 1.82G | ac[i] = SHL32(ac[i], shift2); | 360 | 153M | shift -= shift2; | 361 | 153M | } else if (ac[0] >= 536870912) | 362 | 2.59M | { | 363 | 2.59M | int shift2=1; | 364 | 2.59M | if (ac[0] >= 1073741824) | 365 | 1.29M | shift2++; | 366 | 33.2M | for (i=0;i<=lag;i++) | 367 | 30.6M | ac[i] = SHR32(ac[i], shift2); | 368 | 2.59M | shift += shift2; | 369 | 2.59M | } | 370 | 156M | #endif | 371 | | | 372 | 156M | RESTORE_STACK; | 373 | 156M | return shift; | 374 | 156M | } |
Line | Count | Source | 293 | 156M | { | 294 | 156M | opus_val32 d; | 295 | 156M | int i, k; | 296 | 156M | int fastN=n-lag; | 297 | 156M | int shift; | 298 | 156M | const opus_val16 *xptr; | 299 | 156M | VARDECL(opus_val16, xx); | 300 | 156M | SAVE_STACK; | 301 | 156M | ALLOC(xx, n, opus_val16); | 302 | 156M | celt_assert(n>0); | 303 | 156M | celt_assert(overlap>=0); | 304 | 156M | if (overlap == 0) | 305 | 156M | { | 306 | 156M | xptr = x; | 307 | 156M | } else { | 308 | 109M | for (i=0;i<n;i++) | 309 | 109M | xx[i] = x[i]; | 310 | 12.8M | for (i=0;i<overlap;i++) | 311 | 12.7M | { | 312 | 12.7M | opus_val16 w = COEF2VAL16(window[i]); | 313 | 12.7M | xx[i] = MULT16_16_Q15(x[i],w); | 314 | 12.7M | xx[n-i-1] = MULT16_16_Q15(x[n-i-1],w); | 315 | 12.7M | } | 316 | 97.1k | xptr = xx; | 317 | 97.1k | } | 318 | 156M | shift=0; | 319 | 156M | #ifdef FIXED_POINT | 320 | 156M | { | 321 | 156M | opus_val32 ac0; | 322 | 156M | int ac0_shift = celt_ilog2(n + (n>>4)); | 323 | 156M | ac0 = 1+(n<<7); | 324 | 156M | if (n&1) ac0 += SHR32(MULT16_16(xptr[0],xptr[0]),ac0_shift); | 325 | 12.3G | for(i=(n&1);i<n;i+=2) | 326 | 12.1G | { | 327 | 12.1G | ac0 += SHR32(MULT16_16(xptr[i],xptr[i]),ac0_shift); | 328 | 12.1G | ac0 += SHR32(MULT16_16(xptr[i+1],xptr[i+1]),ac0_shift); | 329 | 12.1G | } | 330 | | /* Consider the effect of rounding-to-nearest when scaling the signal. */ | 331 | 156M | ac0 += SHR32(ac0,7); | 332 | | | 333 | 156M | shift = celt_ilog2(ac0)-30+ac0_shift+1; | 334 | 156M | shift = (shift)/2; | 335 | 156M | if (shift>0) | 336 | 2.04M | { | 337 | 347M | for(i=0;i<n;i++) | 338 | 344M | xx[i] = PSHR32(xptr[i], shift); | 339 | 2.04M | xptr = xx; | 340 | 2.04M | } else | 341 | 154M | shift = 0; | 342 | 156M | } | 343 | 156M | #endif | 344 | 156M | celt_pitch_xcorr(xptr, xptr, ac, fastN, lag+1, arch); | 345 | 2.01G | for (k=0;k<=lag;k++) | 346 | 1.86G | { | 347 | 12.6G | for (i = k+fastN, d = 0; i < n; i++) | 348 | 10.7G | d = MAC16_16(d, xptr[i], xptr[i-k]); | 349 | 1.86G | ac[k] += d; | 350 | 1.86G | } | 351 | 156M | #ifdef FIXED_POINT | 352 | 156M | shift = 2*shift; | 353 | 156M | if (shift<=0) | 354 | 154M | ac[0] += SHL32((opus_int32)1, -shift); | 355 | 156M | if (ac[0] < 268435456) | 356 | 153M | { | 357 | 153M | int shift2 = 29 - EC_ILOG(ac[0]); | 358 | 1.98G | for (i=0;i<=lag;i++) | 359 | 1.82G | ac[i] = SHL32(ac[i], shift2); | 360 | 153M | shift -= shift2; | 361 | 153M | } else if (ac[0] >= 536870912) | 362 | 2.59M | { | 363 | 2.59M | int shift2=1; | 364 | 2.59M | if (ac[0] >= 1073741824) | 365 | 1.29M | shift2++; | 366 | 33.2M | for (i=0;i<=lag;i++) | 367 | 30.6M | ac[i] = SHR32(ac[i], shift2); | 368 | 2.59M | shift += shift2; | 369 | 2.59M | } | 370 | 156M | #endif | 371 | | | 372 | 156M | RESTORE_STACK; | 373 | 156M | return shift; | 374 | 156M | } |
Line | Count | Source | 293 | 1.24M | { | 294 | 1.24M | opus_val32 d; | 295 | 1.24M | int i, k; | 296 | 1.24M | int fastN=n-lag; | 297 | 1.24M | int shift; | 298 | 1.24M | const opus_val16 *xptr; | 299 | 1.24M | VARDECL(opus_val16, xx); | 300 | 1.24M | SAVE_STACK; | 301 | 1.24M | ALLOC(xx, n, opus_val16); | 302 | 1.24M | celt_assert(n>0); | 303 | 1.24M | celt_assert(overlap>=0); | 304 | 1.24M | if (overlap == 0) | 305 | 1.17M | { | 306 | 1.17M | xptr = x; | 307 | 1.17M | } else { | 308 | 78.7M | for (i=0;i<n;i++) | 309 | 78.6M | xx[i] = x[i]; | 310 | 9.28M | for (i=0;i<overlap;i++) | 311 | 9.21M | { | 312 | 9.21M | opus_val16 w = COEF2VAL16(window[i]); | 313 | 9.21M | xx[i] = MULT16_16_Q15(x[i],w); | 314 | 9.21M | xx[n-i-1] = MULT16_16_Q15(x[n-i-1],w); | 315 | 9.21M | } | 316 | 68.6k | xptr = xx; | 317 | 68.6k | } | 318 | 1.24M | shift=0; | 319 | | #ifdef FIXED_POINT | 320 | | { | 321 | | opus_val32 ac0; | 322 | | int ac0_shift = celt_ilog2(n + (n>>4)); | 323 | | ac0 = 1+(n<<7); | 324 | | if (n&1) ac0 += SHR32(MULT16_16(xptr[0],xptr[0]),ac0_shift); | 325 | | for(i=(n&1);i<n;i+=2) | 326 | | { | 327 | | ac0 += SHR32(MULT16_16(xptr[i],xptr[i]),ac0_shift); | 328 | | ac0 += SHR32(MULT16_16(xptr[i+1],xptr[i+1]),ac0_shift); | 329 | | } | 330 | | /* Consider the effect of rounding-to-nearest when scaling the signal. */ | 331 | | ac0 += SHR32(ac0,7); | 332 | | | 333 | | shift = celt_ilog2(ac0)-30+ac0_shift+1; | 334 | | shift = (shift)/2; | 335 | | if (shift>0) | 336 | | { | 337 | | for(i=0;i<n;i++) | 338 | | xx[i] = PSHR32(xptr[i], shift); | 339 | | xptr = xx; | 340 | | } else | 341 | | shift = 0; | 342 | | } | 343 | | #endif | 344 | 1.24M | celt_pitch_xcorr(xptr, xptr, ac, fastN, lag+1, arch); | 345 | 8.83M | for (k=0;k<=lag;k++) | 346 | 7.58M | { | 347 | 39.9M | for (i = k+fastN, d = 0; i < n; i++) | 348 | 32.3M | d = MAC16_16(d, xptr[i], xptr[i-k]); | 349 | 7.58M | ac[k] += d; | 350 | 7.58M | } | 351 | | #ifdef FIXED_POINT | 352 | | shift = 2*shift; | 353 | | if (shift<=0) | 354 | | ac[0] += SHL32((opus_int32)1, -shift); | 355 | | if (ac[0] < 268435456) | 356 | | { | 357 | | int shift2 = 29 - EC_ILOG(ac[0]); | 358 | | for (i=0;i<=lag;i++) | 359 | | ac[i] = SHL32(ac[i], shift2); | 360 | | shift -= shift2; | 361 | | } else if (ac[0] >= 536870912) | 362 | | { | 363 | | int shift2=1; | 364 | | if (ac[0] >= 1073741824) | 365 | | shift2++; | 366 | | for (i=0;i<=lag;i++) | 367 | | ac[i] = SHR32(ac[i], shift2); | 368 | | shift += shift2; | 369 | | } | 370 | | #endif | 371 | | | 372 | 1.24M | RESTORE_STACK; | 373 | 1.24M | return shift; | 374 | 1.24M | } |
|