/src/opus/silk/fixed/burg_modified_FIX.c
Line | Count | Source (jump to first uncovered line) |
1 | | /*********************************************************************** |
2 | | Copyright (c) 2006-2011, Skype Limited. All rights reserved. |
3 | | Redistribution and use in source and binary forms, with or without |
4 | | modification, are permitted provided that the following conditions |
5 | | are met: |
6 | | - Redistributions of source code must retain the above copyright notice, |
7 | | this list of conditions and the following disclaimer. |
8 | | - Redistributions in binary form must reproduce the above copyright |
9 | | notice, this list of conditions and the following disclaimer in the |
10 | | documentation and/or other materials provided with the distribution. |
11 | | - Neither the name of Internet Society, IETF or IETF Trust, nor the |
12 | | names of specific contributors, may be used to endorse or promote |
13 | | products derived from this software without specific prior written |
14 | | permission. |
15 | | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" |
16 | | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
17 | | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
18 | | ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE |
19 | | LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR |
20 | | CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF |
21 | | SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS |
22 | | INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN |
23 | | CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
24 | | ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE |
25 | | POSSIBILITY OF SUCH DAMAGE. |
26 | | ***********************************************************************/ |
27 | | |
28 | | #ifdef HAVE_CONFIG_H |
29 | | #include "config.h" |
30 | | #endif |
31 | | |
32 | | #include "SigProc_FIX.h" |
33 | | #include "define.h" |
34 | | #include "tuning_parameters.h" |
35 | | #include "pitch.h" |
36 | | |
37 | | #define MAX_FRAME_SIZE 384 /* subfr_length * nb_subfr = ( 0.005 * 16000 + 16 ) * 4 = 384 */ |
38 | | |
39 | 318k | #define QA 25 |
40 | 318k | #define N_BITS_HEAD_ROOM 3 |
41 | 419k | #define MIN_RSHIFTS -16 |
42 | 318k | #define MAX_RSHIFTS (32 - QA) |
43 | | |
44 | | /* Compute reflection coefficients from input signal */ |
45 | | void silk_burg_modified_c( |
46 | | opus_int32 *res_nrg, /* O Residual energy */ |
47 | | opus_int *res_nrg_Q, /* O Residual energy Q value */ |
48 | | opus_int32 A_Q16[], /* O Prediction coefficients (length order) */ |
49 | | const opus_int16 x[], /* I Input signal, length: nb_subfr * ( D + subfr_length ) */ |
50 | | const opus_int32 minInvGain_Q30, /* I Inverse of max prediction gain */ |
51 | | const opus_int subfr_length, /* I Input signal subframe length (incl. D preceding samples) */ |
52 | | const opus_int nb_subfr, /* I Number of subframes stacked in x */ |
53 | | const opus_int D, /* I Order */ |
54 | | int arch /* I Run-time architecture */ |
55 | | ) |
56 | 318k | { |
57 | 318k | opus_int k, n, s, lz, rshifts, reached_max_gain; |
58 | 318k | opus_int32 C0, num, nrg, rc_Q31, invGain_Q30, Atmp_QA, Atmp1, tmp1, tmp2, x1, x2; |
59 | 318k | const opus_int16 *x_ptr; |
60 | 318k | opus_int32 C_first_row[ SILK_MAX_ORDER_LPC ]; |
61 | 318k | opus_int32 C_last_row[ SILK_MAX_ORDER_LPC ]; |
62 | 318k | opus_int32 Af_QA[ SILK_MAX_ORDER_LPC ]; |
63 | 318k | opus_int32 CAf[ SILK_MAX_ORDER_LPC + 1 ]; |
64 | 318k | opus_int32 CAb[ SILK_MAX_ORDER_LPC + 1 ]; |
65 | 318k | opus_int32 xcorr[ SILK_MAX_ORDER_LPC ]; |
66 | 318k | opus_int64 C0_64; |
67 | | |
68 | 318k | celt_assert( subfr_length * nb_subfr <= MAX_FRAME_SIZE ); |
69 | | |
70 | | /* Compute autocorrelations, added over subframes */ |
71 | 318k | C0_64 = silk_inner_prod16( x, x, subfr_length*nb_subfr, arch ); |
72 | 318k | lz = silk_CLZ64(C0_64); |
73 | 318k | rshifts = 32 + 1 + N_BITS_HEAD_ROOM - lz; |
74 | 318k | if (rshifts > MAX_RSHIFTS) rshifts = MAX_RSHIFTS; |
75 | 318k | if (rshifts < MIN_RSHIFTS) rshifts = MIN_RSHIFTS; |
76 | | |
77 | 318k | if (rshifts > 0) { |
78 | 14.1k | C0 = (opus_int32)silk_RSHIFT64(C0_64, rshifts ); |
79 | 304k | } else { |
80 | 304k | C0 = silk_LSHIFT32((opus_int32)C0_64, -rshifts ); |
81 | 304k | } |
82 | | |
83 | 318k | CAb[ 0 ] = CAf[ 0 ] = C0 + silk_SMMUL( SILK_FIX_CONST( FIND_LPC_COND_FAC, 32 ), C0 ) + 1; /* Q(-rshifts) */ |
84 | 318k | silk_memset( C_first_row, 0, SILK_MAX_ORDER_LPC * sizeof( opus_int32 ) ); |
85 | 318k | if( rshifts > 0 ) { |
86 | 63.6k | for( s = 0; s < nb_subfr; s++ ) { |
87 | 49.5k | x_ptr = x + s * subfr_length; |
88 | 615k | for( n = 1; n < D + 1; n++ ) { |
89 | 566k | C_first_row[ n - 1 ] += (opus_int32)silk_RSHIFT64( |
90 | 566k | silk_inner_prod16( x_ptr, x_ptr + n, subfr_length - n, arch ), rshifts ); |
91 | 566k | } |
92 | 49.5k | } |
93 | 304k | } else { |
94 | 1.24M | for( s = 0; s < nb_subfr; s++ ) { |
95 | 945k | int i; |
96 | 945k | opus_int32 d; |
97 | 945k | x_ptr = x + s * subfr_length; |
98 | 945k | celt_pitch_xcorr(x_ptr, x_ptr + 1, xcorr, subfr_length - D, D, arch ); |
99 | 11.4M | for( n = 1; n < D + 1; n++ ) { |
100 | 66.5M | for ( i = n + subfr_length - D, d = 0; i < subfr_length; i++ ) |
101 | 56.0M | d = MAC16_16( d, x_ptr[ i ], x_ptr[ i - n ] ); |
102 | 10.5M | xcorr[ n - 1 ] += d; |
103 | 10.5M | } |
104 | 11.4M | for( n = 1; n < D + 1; n++ ) { |
105 | 10.5M | C_first_row[ n - 1 ] += silk_LSHIFT32( xcorr[ n - 1 ], -rshifts ); |
106 | 10.5M | } |
107 | 945k | } |
108 | 304k | } |
109 | 318k | silk_memcpy( C_last_row, C_first_row, SILK_MAX_ORDER_LPC * sizeof( opus_int32 ) ); |
110 | | |
111 | | /* Initialize */ |
112 | 318k | CAb[ 0 ] = CAf[ 0 ] = C0 + silk_SMMUL( SILK_FIX_CONST( FIND_LPC_COND_FAC, 32 ), C0 ) + 1; /* Q(-rshifts) */ |
113 | | |
114 | 318k | invGain_Q30 = (opus_int32)1 << 30; |
115 | 318k | reached_max_gain = 0; |
116 | 3.71M | for( n = 0; n < D; n++ ) { |
117 | | /* Update first row of correlation matrix (without first element) */ |
118 | | /* Update last row of correlation matrix (without last element, stored in reversed order) */ |
119 | | /* Update C * Af */ |
120 | | /* Update C * flipud(Af) (stored in reversed order) */ |
121 | 3.41M | if( rshifts > -2 ) { |
122 | 1.92M | for( s = 0; s < nb_subfr; s++ ) { |
123 | 1.49M | x_ptr = x + s * subfr_length; |
124 | 1.49M | x1 = -silk_LSHIFT32( (opus_int32)x_ptr[ n ], 16 - rshifts ); /* Q(16-rshifts) */ |
125 | 1.49M | x2 = -silk_LSHIFT32( (opus_int32)x_ptr[ subfr_length - n - 1 ], 16 - rshifts ); /* Q(16-rshifts) */ |
126 | 1.49M | tmp1 = silk_LSHIFT32( (opus_int32)x_ptr[ n ], QA - 16 ); /* Q(QA-16) */ |
127 | 1.49M | tmp2 = silk_LSHIFT32( (opus_int32)x_ptr[ subfr_length - n - 1 ], QA - 16 ); /* Q(QA-16) */ |
128 | 9.68M | for( k = 0; k < n; k++ ) { |
129 | 8.19M | C_first_row[ k ] = silk_SMLAWB( C_first_row[ k ], x1, x_ptr[ n - k - 1 ] ); /* Q( -rshifts ) */ |
130 | 8.19M | C_last_row[ k ] = silk_SMLAWB( C_last_row[ k ], x2, x_ptr[ subfr_length - n + k ] ); /* Q( -rshifts ) */ |
131 | 8.19M | Atmp_QA = Af_QA[ k ]; |
132 | 8.19M | tmp1 = silk_SMLAWB( tmp1, Atmp_QA, x_ptr[ n - k - 1 ] ); /* Q(QA-16) */ |
133 | 8.19M | tmp2 = silk_SMLAWB( tmp2, Atmp_QA, x_ptr[ subfr_length - n + k ] ); /* Q(QA-16) */ |
134 | 8.19M | } |
135 | 1.49M | tmp1 = silk_LSHIFT32( -tmp1, 32 - QA - rshifts ); /* Q(16-rshifts) */ |
136 | 1.49M | tmp2 = silk_LSHIFT32( -tmp2, 32 - QA - rshifts ); /* Q(16-rshifts) */ |
137 | 11.1M | for( k = 0; k <= n; k++ ) { |
138 | 9.68M | CAf[ k ] = silk_SMLAWB( CAf[ k ], tmp1, x_ptr[ n - k ] ); /* Q( -rshift ) */ |
139 | 9.68M | CAb[ k ] = silk_SMLAWB( CAb[ k ], tmp2, x_ptr[ subfr_length - n + k - 1 ] ); /* Q( -rshift ) */ |
140 | 9.68M | } |
141 | 1.49M | } |
142 | 2.98M | } else { |
143 | 12.1M | for( s = 0; s < nb_subfr; s++ ) { |
144 | 9.20M | x_ptr = x + s * subfr_length; |
145 | 9.20M | x1 = -silk_LSHIFT32( (opus_int32)x_ptr[ n ], -rshifts ); /* Q( -rshifts ) */ |
146 | 9.20M | x2 = -silk_LSHIFT32( (opus_int32)x_ptr[ subfr_length - n - 1 ], -rshifts ); /* Q( -rshifts ) */ |
147 | 9.20M | tmp1 = silk_LSHIFT32( (opus_int32)x_ptr[ n ], 17 ); /* Q17 */ |
148 | 9.20M | tmp2 = silk_LSHIFT32( (opus_int32)x_ptr[ subfr_length - n - 1 ], 17 ); /* Q17 */ |
149 | 57.5M | for( k = 0; k < n; k++ ) { |
150 | 48.3M | C_first_row[ k ] = silk_MLA( C_first_row[ k ], x1, x_ptr[ n - k - 1 ] ); /* Q( -rshifts ) */ |
151 | 48.3M | C_last_row[ k ] = silk_MLA( C_last_row[ k ], x2, x_ptr[ subfr_length - n + k ] ); /* Q( -rshifts ) */ |
152 | 48.3M | Atmp1 = silk_RSHIFT_ROUND( Af_QA[ k ], QA - 17 ); /* Q17 */ |
153 | | /* We sometimes get overflows in the multiplications (even beyond +/- 2^32), |
154 | | but they cancel each other and the real result seems to always fit in a 32-bit |
155 | | signed integer. This was determined experimentally, not theoretically (unfortunately). */ |
156 | 48.3M | tmp1 = silk_MLA_ovflw( tmp1, x_ptr[ n - k - 1 ], Atmp1 ); /* Q17 */ |
157 | 48.3M | tmp2 = silk_MLA_ovflw( tmp2, x_ptr[ subfr_length - n + k ], Atmp1 ); /* Q17 */ |
158 | 48.3M | } |
159 | 9.20M | tmp1 = -tmp1; /* Q17 */ |
160 | 9.20M | tmp2 = -tmp2; /* Q17 */ |
161 | 66.7M | for( k = 0; k <= n; k++ ) { |
162 | 57.5M | CAf[ k ] = silk_SMLAWW( CAf[ k ], tmp1, |
163 | 57.5M | silk_LSHIFT32( (opus_int32)x_ptr[ n - k ], -rshifts - 1 ) ); /* Q( -rshift ) */ |
164 | 57.5M | CAb[ k ] = silk_SMLAWW( CAb[ k ], tmp2, |
165 | 57.5M | silk_LSHIFT32( (opus_int32)x_ptr[ subfr_length - n + k - 1 ], -rshifts - 1 ) ); /* Q( -rshift ) */ |
166 | 57.5M | } |
167 | 9.20M | } |
168 | 2.98M | } |
169 | | |
170 | | /* Calculate nominator and denominator for the next order reflection (parcor) coefficient */ |
171 | 3.41M | tmp1 = C_first_row[ n ]; /* Q( -rshifts ) */ |
172 | 3.41M | tmp2 = C_last_row[ n ]; /* Q( -rshifts ) */ |
173 | 3.41M | num = 0; /* Q( -rshifts ) */ |
174 | 3.41M | nrg = silk_ADD32( CAb[ 0 ], CAf[ 0 ] ); /* Q( 1-rshifts ) */ |
175 | 21.4M | for( k = 0; k < n; k++ ) { |
176 | 18.0M | Atmp_QA = Af_QA[ k ]; |
177 | 18.0M | lz = silk_CLZ32( silk_abs( Atmp_QA ) ) - 1; |
178 | 18.0M | lz = silk_min( 32 - QA, lz ); |
179 | 18.0M | Atmp1 = silk_LSHIFT32( Atmp_QA, lz ); /* Q( QA + lz ) */ |
180 | | |
181 | 18.0M | tmp1 = silk_ADD_LSHIFT32( tmp1, silk_SMMUL( C_last_row[ n - k - 1 ], Atmp1 ), 32 - QA - lz ); /* Q( -rshifts ) */ |
182 | 18.0M | tmp2 = silk_ADD_LSHIFT32( tmp2, silk_SMMUL( C_first_row[ n - k - 1 ], Atmp1 ), 32 - QA - lz ); /* Q( -rshifts ) */ |
183 | 18.0M | num = silk_ADD_LSHIFT32( num, silk_SMMUL( CAb[ n - k ], Atmp1 ), 32 - QA - lz ); /* Q( -rshifts ) */ |
184 | 18.0M | nrg = silk_ADD_LSHIFT32( nrg, silk_SMMUL( silk_ADD32( CAb[ k + 1 ], CAf[ k + 1 ] ), |
185 | 18.0M | Atmp1 ), 32 - QA - lz ); /* Q( 1-rshifts ) */ |
186 | 18.0M | } |
187 | 3.41M | CAf[ n + 1 ] = tmp1; /* Q( -rshifts ) */ |
188 | 3.41M | CAb[ n + 1 ] = tmp2; /* Q( -rshifts ) */ |
189 | 3.41M | num = silk_ADD32( num, tmp2 ); /* Q( -rshifts ) */ |
190 | 3.41M | num = silk_LSHIFT32( -num, 1 ); /* Q( 1-rshifts ) */ |
191 | | |
192 | | /* Calculate the next order reflection (parcor) coefficient */ |
193 | 3.41M | if( silk_abs( num ) < nrg ) { |
194 | 3.41M | rc_Q31 = silk_DIV32_varQ( num, nrg, 31 ); |
195 | 3.41M | } else { |
196 | 1 | rc_Q31 = ( num > 0 ) ? silk_int32_MAX : silk_int32_MIN; |
197 | 1 | } |
198 | | |
199 | | /* Update inverse prediction gain */ |
200 | 3.41M | tmp1 = ( (opus_int32)1 << 30 ) - silk_SMMUL( rc_Q31, rc_Q31 ); |
201 | 3.41M | tmp1 = silk_LSHIFT( silk_SMMUL( invGain_Q30, tmp1 ), 2 ); |
202 | 3.41M | if( tmp1 <= minInvGain_Q30 ) { |
203 | | /* Max prediction gain exceeded; set reflection coefficient such that max prediction gain is exactly hit */ |
204 | 16.6k | tmp2 = ( (opus_int32)1 << 30 ) - silk_DIV32_varQ( minInvGain_Q30, invGain_Q30, 30 ); /* Q30 */ |
205 | 16.6k | rc_Q31 = silk_SQRT_APPROX( tmp2 ); /* Q15 */ |
206 | 16.6k | if( rc_Q31 > 0 ) { |
207 | | /* Newton-Raphson iteration */ |
208 | 16.6k | rc_Q31 = silk_RSHIFT32( rc_Q31 + silk_DIV32( tmp2, rc_Q31 ), 1 ); /* Q15 */ |
209 | 16.6k | rc_Q31 = silk_LSHIFT32( rc_Q31, 16 ); /* Q31 */ |
210 | 16.6k | if( num < 0 ) { |
211 | | /* Ensure adjusted reflection coefficients has the original sign */ |
212 | 13.6k | rc_Q31 = -rc_Q31; |
213 | 13.6k | } |
214 | 16.6k | } |
215 | 16.6k | invGain_Q30 = minInvGain_Q30; |
216 | 16.6k | reached_max_gain = 1; |
217 | 3.40M | } else { |
218 | 3.40M | invGain_Q30 = tmp1; |
219 | 3.40M | } |
220 | | |
221 | | /* Update the AR coefficients */ |
222 | 13.2M | for( k = 0; k < (n + 1) >> 1; k++ ) { |
223 | 9.88M | tmp1 = Af_QA[ k ]; /* QA */ |
224 | 9.88M | tmp2 = Af_QA[ n - k - 1 ]; /* QA */ |
225 | 9.88M | Af_QA[ k ] = silk_ADD_LSHIFT32( tmp1, silk_SMMUL( tmp2, rc_Q31 ), 1 ); /* QA */ |
226 | 9.88M | Af_QA[ n - k - 1 ] = silk_ADD_LSHIFT32( tmp2, silk_SMMUL( tmp1, rc_Q31 ), 1 ); /* QA */ |
227 | 9.88M | } |
228 | 3.41M | Af_QA[ n ] = silk_RSHIFT32( rc_Q31, 31 - QA ); /* QA */ |
229 | | |
230 | 3.41M | if( reached_max_gain ) { |
231 | | /* Reached max prediction gain; set remaining coefficients to zero and exit loop */ |
232 | 147k | for( k = n + 1; k < D; k++ ) { |
233 | 130k | Af_QA[ k ] = 0; |
234 | 130k | } |
235 | 16.6k | break; |
236 | 16.6k | } |
237 | | |
238 | | /* Update C * Af and C * Ab */ |
239 | 28.2M | for( k = 0; k <= n + 1; k++ ) { |
240 | 24.8M | tmp1 = CAf[ k ]; /* Q( -rshifts ) */ |
241 | 24.8M | tmp2 = CAb[ n - k + 1 ]; /* Q( -rshifts ) */ |
242 | 24.8M | CAf[ k ] = silk_ADD_LSHIFT32( tmp1, silk_SMMUL( tmp2, rc_Q31 ), 1 ); /* Q( -rshifts ) */ |
243 | 24.8M | CAb[ n - k + 1 ] = silk_ADD_LSHIFT32( tmp2, silk_SMMUL( tmp1, rc_Q31 ), 1 ); /* Q( -rshifts ) */ |
244 | 24.8M | } |
245 | 3.40M | } |
246 | | |
247 | 318k | if( reached_max_gain ) { |
248 | 205k | for( k = 0; k < D; k++ ) { |
249 | | /* Scale coefficients */ |
250 | 188k | A_Q16[ k ] = -silk_RSHIFT_ROUND( Af_QA[ k ], QA - 16 ); |
251 | 188k | } |
252 | | /* Subtract energy of preceding samples from C0 */ |
253 | 16.6k | if( rshifts > 0 ) { |
254 | 13.2k | for( s = 0; s < nb_subfr; s++ ) { |
255 | 10.1k | x_ptr = x + s * subfr_length; |
256 | 10.1k | C0 -= (opus_int32)silk_RSHIFT64( silk_inner_prod16( x_ptr, x_ptr, D, arch ), rshifts ); |
257 | 10.1k | } |
258 | 13.5k | } else { |
259 | 54.5k | for( s = 0; s < nb_subfr; s++ ) { |
260 | 41.0k | x_ptr = x + s * subfr_length; |
261 | 41.0k | C0 -= silk_LSHIFT32( silk_inner_prod_aligned( x_ptr, x_ptr, D, arch), -rshifts); |
262 | 41.0k | } |
263 | 13.5k | } |
264 | | /* Approximate residual energy */ |
265 | 16.6k | *res_nrg = silk_LSHIFT( silk_SMMUL( invGain_Q30, C0 ), 2 ); |
266 | 16.6k | *res_nrg_Q = -rshifts; |
267 | 301k | } else { |
268 | | /* Return residual energy */ |
269 | 301k | nrg = CAf[ 0 ]; /* Q( -rshifts ) */ |
270 | 301k | tmp1 = (opus_int32)1 << 16; /* Q16 */ |
271 | 3.66M | for( k = 0; k < D; k++ ) { |
272 | 3.35M | Atmp1 = silk_RSHIFT_ROUND( Af_QA[ k ], QA - 16 ); /* Q16 */ |
273 | 3.35M | nrg = silk_SMLAWW( nrg, CAf[ k + 1 ], Atmp1 ); /* Q( -rshifts ) */ |
274 | 3.35M | tmp1 = silk_SMLAWW( tmp1, Atmp1, Atmp1 ); /* Q16 */ |
275 | 3.35M | A_Q16[ k ] = -Atmp1; |
276 | 3.35M | } |
277 | 301k | *res_nrg = silk_SMLAWW( nrg, silk_SMMUL( SILK_FIX_CONST( FIND_LPC_COND_FAC, 32 ), C0 ), -tmp1 );/* Q( -rshifts ) */ |
278 | 301k | *res_nrg_Q = -rshifts; |
279 | 301k | } |
280 | 318k | } |