Line | Count | Source (jump to first uncovered line) |
1 | | /*********************************************************************** |
2 | | Copyright (c) 2006-2011, Skype Limited. All rights reserved. |
3 | | Redistribution and use in source and binary forms, with or without |
4 | | modification, are permitted provided that the following conditions |
5 | | are met: |
6 | | - Redistributions of source code must retain the above copyright notice, |
7 | | this list of conditions and the following disclaimer. |
8 | | - Redistributions in binary form must reproduce the above copyright |
9 | | notice, this list of conditions and the following disclaimer in the |
10 | | documentation and/or other materials provided with the distribution. |
11 | | - Neither the name of Internet Society, IETF or IETF Trust, nor the |
12 | | names of specific contributors, may be used to endorse or promote |
13 | | products derived from this software without specific prior written |
14 | | permission. |
15 | | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" |
16 | | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
17 | | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
18 | | ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE |
19 | | LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR |
20 | | CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF |
21 | | SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS |
22 | | INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN |
23 | | CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
24 | | ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE |
25 | | POSSIBILITY OF SUCH DAMAGE. |
26 | | ***********************************************************************/ |
27 | | |
28 | | #ifdef HAVE_CONFIG_H |
29 | | #include "config.h" |
30 | | #endif |
31 | | #include "API.h" |
32 | | #include "main.h" |
33 | | #include "stack_alloc.h" |
34 | | #include "os_support.h" |
35 | | |
36 | | #ifdef ENABLE_OSCE |
37 | | #include "osce.h" |
38 | | #include "osce_structs.h" |
39 | | #endif |
40 | | |
41 | | /************************/ |
42 | | /* Decoder Super Struct */ |
43 | | /************************/ |
44 | | typedef struct { |
45 | | silk_decoder_state channel_state[ DECODER_NUM_CHANNELS ]; |
46 | | stereo_dec_state sStereo; |
47 | | opus_int nChannelsAPI; |
48 | | opus_int nChannelsInternal; |
49 | | opus_int prev_decode_only_middle; |
50 | | #ifdef ENABLE_OSCE |
51 | | OSCEModel osce_model; |
52 | | #endif |
53 | | } silk_decoder; |
54 | | |
55 | | /*********************/ |
56 | | /* Decoder functions */ |
57 | | /*********************/ |
58 | | |
59 | | |
60 | | |
61 | | opus_int silk_LoadOSCEModels(void *decState, const unsigned char *data, int len) |
62 | 393k | { |
63 | | #ifdef ENABLE_OSCE |
64 | | opus_int ret = SILK_NO_ERROR; |
65 | | |
66 | | ret = osce_load_models(&((silk_decoder *)decState)->osce_model, data, len); |
67 | | ((silk_decoder *)decState)->osce_model.loaded = (ret == 0); |
68 | | return ret; |
69 | | #else |
70 | 393k | (void) decState; |
71 | 393k | (void) data; |
72 | 393k | (void) len; |
73 | 393k | return SILK_NO_ERROR; |
74 | 393k | #endif |
75 | 393k | } |
76 | | |
77 | | opus_int silk_Get_Decoder_Size( /* O Returns error code */ |
78 | | opus_int *decSizeBytes /* O Number of bytes in SILK decoder state */ |
79 | | ) |
80 | 1.20M | { |
81 | 1.20M | opus_int ret = SILK_NO_ERROR; |
82 | | |
83 | 1.20M | *decSizeBytes = sizeof( silk_decoder ); |
84 | | |
85 | 1.20M | return ret; |
86 | 1.20M | } |
87 | | |
88 | | /* Reset decoder state */ |
89 | | opus_int silk_ResetDecoder( /* O Returns error code */ |
90 | | void *decState /* I/O State */ |
91 | | ) |
92 | 386 | { |
93 | 386 | opus_int n, ret = SILK_NO_ERROR; |
94 | 386 | silk_decoder_state *channel_state = ((silk_decoder *)decState)->channel_state; |
95 | | |
96 | 1.15k | for( n = 0; n < DECODER_NUM_CHANNELS; n++ ) { |
97 | 772 | ret = silk_reset_decoder( &channel_state[ n ] ); |
98 | 772 | } |
99 | 386 | silk_memset(&((silk_decoder *)decState)->sStereo, 0, sizeof(((silk_decoder *)decState)->sStereo)); |
100 | | /* Not strictly needed, but it's cleaner that way */ |
101 | 386 | ((silk_decoder *)decState)->prev_decode_only_middle = 0; |
102 | | |
103 | 386 | return ret; |
104 | 386 | } |
105 | | |
106 | | |
107 | | opus_int silk_InitDecoder( /* O Returns error code */ |
108 | | void *decState /* I/O State */ |
109 | | ) |
110 | 393k | { |
111 | 393k | opus_int n, ret = SILK_NO_ERROR; |
112 | 393k | silk_decoder_state *channel_state = ((silk_decoder *)decState)->channel_state; |
113 | | #ifdef ENABLE_OSCE |
114 | | ((silk_decoder *)decState)->osce_model.loaded = 0; |
115 | | #endif |
116 | 393k | #ifndef USE_WEIGHTS_FILE |
117 | | /* load osce models */ |
118 | 393k | silk_LoadOSCEModels(decState, NULL, 0); |
119 | 393k | #endif |
120 | | |
121 | 1.17M | for( n = 0; n < DECODER_NUM_CHANNELS; n++ ) { |
122 | 786k | ret = silk_init_decoder( &channel_state[ n ] ); |
123 | 786k | } |
124 | 393k | silk_memset(&((silk_decoder *)decState)->sStereo, 0, sizeof(((silk_decoder *)decState)->sStereo)); |
125 | | /* Not strictly needed, but it's cleaner that way */ |
126 | 393k | ((silk_decoder *)decState)->prev_decode_only_middle = 0; |
127 | | |
128 | 393k | return ret; |
129 | 393k | } |
130 | | |
131 | | /* Decode a frame */ |
132 | | opus_int silk_Decode( /* O Returns error code */ |
133 | | void* decState, /* I/O State */ |
134 | | silk_DecControlStruct* decControl, /* I/O Control Structure */ |
135 | | opus_int lostFlag, /* I 0: no loss, 1 loss, 2 decode fec */ |
136 | | opus_int newPacketFlag, /* I Indicates first decoder call for this packet */ |
137 | | ec_dec *psRangeDec, /* I/O Compressor data structure */ |
138 | | opus_res *samplesOut, /* O Decoded output speech vector */ |
139 | | opus_int32 *nSamplesOut, /* O Number of samples decoded */ |
140 | | #ifdef ENABLE_DEEP_PLC |
141 | | LPCNetPLCState *lpcnet, |
142 | | #endif |
143 | | int arch /* I Run-time architecture */ |
144 | | ) |
145 | 1.17M | { |
146 | 1.17M | opus_int i, n, decode_only_middle = 0, ret = SILK_NO_ERROR; |
147 | 1.17M | opus_int32 nSamplesOutDec, LBRR_symbol; |
148 | 1.17M | opus_int16 *samplesOut1_tmp[ 2 ]; |
149 | 1.17M | VARDECL( opus_int16, samplesOut1_tmp_storage1 ); |
150 | 1.17M | VARDECL( opus_int16, samplesOut2_tmp ); |
151 | 1.17M | opus_int32 MS_pred_Q13[ 2 ] = { 0 }; |
152 | 1.17M | opus_int16 *resample_out_ptr; |
153 | 1.17M | silk_decoder *psDec = ( silk_decoder * )decState; |
154 | 1.17M | silk_decoder_state *channel_state = psDec->channel_state; |
155 | 1.17M | opus_int has_side; |
156 | 1.17M | opus_int stereo_to_mono; |
157 | 1.17M | SAVE_STACK; |
158 | | |
159 | 1.17M | celt_assert( decControl->nChannelsInternal == 1 || decControl->nChannelsInternal == 2 ); |
160 | | |
161 | | /**********************************/ |
162 | | /* Test if first frame in payload */ |
163 | | /**********************************/ |
164 | 1.17M | if( newPacketFlag ) { |
165 | 2.06M | for( n = 0; n < decControl->nChannelsInternal; n++ ) { |
166 | 1.28M | channel_state[ n ].nFramesDecoded = 0; /* Used to count frames in packet */ |
167 | 1.28M | } |
168 | 785k | } |
169 | | |
170 | | /* If Mono -> Stereo transition in bitstream: init state of second channel */ |
171 | 1.17M | if( decControl->nChannelsInternal > psDec->nChannelsInternal ) { |
172 | 417k | ret += silk_init_decoder( &channel_state[ 1 ] ); |
173 | 417k | } |
174 | | |
175 | 1.17M | stereo_to_mono = decControl->nChannelsInternal == 1 && psDec->nChannelsInternal == 2 && |
176 | 1.17M | ( decControl->internalSampleRate == 1000*channel_state[ 0 ].fs_kHz ); |
177 | | |
178 | 1.17M | if( channel_state[ 0 ].nFramesDecoded == 0 ) { |
179 | 2.06M | for( n = 0; n < decControl->nChannelsInternal; n++ ) { |
180 | 1.28M | opus_int fs_kHz_dec; |
181 | 1.28M | if( decControl->payloadSize_ms == 0 ) { |
182 | | /* Assuming packet loss, use 10 ms */ |
183 | 0 | channel_state[ n ].nFramesPerPacket = 1; |
184 | 0 | channel_state[ n ].nb_subfr = 2; |
185 | 1.28M | } else if( decControl->payloadSize_ms == 10 ) { |
186 | 672k | channel_state[ n ].nFramesPerPacket = 1; |
187 | 672k | channel_state[ n ].nb_subfr = 2; |
188 | 672k | } else if( decControl->payloadSize_ms == 20 ) { |
189 | 248k | channel_state[ n ].nFramesPerPacket = 1; |
190 | 248k | channel_state[ n ].nb_subfr = 4; |
191 | 362k | } else if( decControl->payloadSize_ms == 40 ) { |
192 | 45.1k | channel_state[ n ].nFramesPerPacket = 2; |
193 | 45.1k | channel_state[ n ].nb_subfr = 4; |
194 | 317k | } else if( decControl->payloadSize_ms == 60 ) { |
195 | 317k | channel_state[ n ].nFramesPerPacket = 3; |
196 | 317k | channel_state[ n ].nb_subfr = 4; |
197 | 317k | } else { |
198 | 0 | celt_assert( 0 ); |
199 | 0 | RESTORE_STACK; |
200 | 0 | return SILK_DEC_INVALID_FRAME_SIZE; |
201 | 0 | } |
202 | 1.28M | fs_kHz_dec = ( decControl->internalSampleRate >> 10 ) + 1; |
203 | 1.28M | if( fs_kHz_dec != 8 && fs_kHz_dec != 12 && fs_kHz_dec != 16 ) { |
204 | 0 | celt_assert( 0 ); |
205 | 0 | RESTORE_STACK; |
206 | 0 | return SILK_DEC_INVALID_SAMPLING_FREQUENCY; |
207 | 0 | } |
208 | 1.28M | ret += silk_decoder_set_fs( &channel_state[ n ], fs_kHz_dec, decControl->API_sampleRate ); |
209 | 1.28M | } |
210 | 785k | } |
211 | | |
212 | 1.17M | if( decControl->nChannelsAPI == 2 && decControl->nChannelsInternal == 2 && ( psDec->nChannelsAPI == 1 || psDec->nChannelsInternal == 1 ) ) { |
213 | 0 | silk_memset( psDec->sStereo.pred_prev_Q13, 0, sizeof( psDec->sStereo.pred_prev_Q13 ) ); |
214 | 0 | silk_memset( psDec->sStereo.sSide, 0, sizeof( psDec->sStereo.sSide ) ); |
215 | 0 | silk_memcpy( &channel_state[ 1 ].resampler_state, &channel_state[ 0 ].resampler_state, sizeof( silk_resampler_state_struct ) ); |
216 | 0 | } |
217 | 1.17M | psDec->nChannelsAPI = decControl->nChannelsAPI; |
218 | 1.17M | psDec->nChannelsInternal = decControl->nChannelsInternal; |
219 | | |
220 | 1.17M | if( decControl->API_sampleRate > (opus_int32)MAX_API_FS_KHZ * 1000 || decControl->API_sampleRate < 8000 ) { |
221 | 0 | ret = SILK_DEC_INVALID_SAMPLING_FREQUENCY; |
222 | 0 | RESTORE_STACK; |
223 | 0 | return( ret ); |
224 | 0 | } |
225 | | |
226 | 1.17M | if( lostFlag != FLAG_PACKET_LOST && channel_state[ 0 ].nFramesDecoded == 0 ) { |
227 | | /* First decoder call for this payload */ |
228 | | /* Decode VAD flags and LBRR flag */ |
229 | 1.51M | for( n = 0; n < decControl->nChannelsInternal; n++ ) { |
230 | 2.52M | for( i = 0; i < channel_state[ n ].nFramesPerPacket; i++ ) { |
231 | 1.60M | channel_state[ n ].VAD_flags[ i ] = ec_dec_bit_logp(psRangeDec, 1); |
232 | 1.60M | } |
233 | 924k | channel_state[ n ].LBRR_flag = ec_dec_bit_logp(psRangeDec, 1); |
234 | 924k | } |
235 | | /* Decode LBRR flags */ |
236 | 1.51M | for( n = 0; n < decControl->nChannelsInternal; n++ ) { |
237 | 924k | silk_memset( channel_state[ n ].LBRR_flags, 0, sizeof( channel_state[ n ].LBRR_flags ) ); |
238 | 924k | if( channel_state[ n ].LBRR_flag ) { |
239 | 384k | if( channel_state[ n ].nFramesPerPacket == 1 ) { |
240 | 156k | channel_state[ n ].LBRR_flags[ 0 ] = 1; |
241 | 227k | } else { |
242 | 227k | LBRR_symbol = ec_dec_icdf( psRangeDec, silk_LBRR_flags_iCDF_ptr[ channel_state[ n ].nFramesPerPacket - 2 ], 8 ) + 1; |
243 | 888k | for( i = 0; i < channel_state[ n ].nFramesPerPacket; i++ ) { |
244 | 660k | channel_state[ n ].LBRR_flags[ i ] = silk_RSHIFT( LBRR_symbol, i ) & 1; |
245 | 660k | } |
246 | 227k | } |
247 | 384k | } |
248 | 924k | } |
249 | | |
250 | 587k | if( lostFlag == FLAG_DECODE_NORMAL ) { |
251 | | /* Regular decoding: skip all LBRR data */ |
252 | 1.24M | for( i = 0; i < channel_state[ 0 ].nFramesPerPacket; i++ ) { |
253 | 2.00M | for( n = 0; n < decControl->nChannelsInternal; n++ ) { |
254 | 1.24M | if( channel_state[ n ].LBRR_flags[ i ] ) { |
255 | 425k | opus_int16 pulses[ MAX_FRAME_LENGTH ]; |
256 | 425k | opus_int condCoding; |
257 | | |
258 | 425k | if( decControl->nChannelsInternal == 2 && n == 0 ) { |
259 | 184k | silk_stereo_decode_pred( psRangeDec, MS_pred_Q13 ); |
260 | 184k | if( channel_state[ 1 ].LBRR_flags[ i ] == 0 ) { |
261 | 92.6k | silk_stereo_decode_mid_only( psRangeDec, &decode_only_middle ); |
262 | 92.6k | } |
263 | 184k | } |
264 | | /* Use conditional coding if previous frame available */ |
265 | 425k | if( i > 0 && channel_state[ n ].LBRR_flags[ i - 1 ] ) { |
266 | 131k | condCoding = CODE_CONDITIONALLY; |
267 | 293k | } else { |
268 | 293k | condCoding = CODE_INDEPENDENTLY; |
269 | 293k | } |
270 | 425k | silk_decode_indices( &channel_state[ n ], psRangeDec, i, 1, condCoding ); |
271 | 425k | silk_decode_pulses( psRangeDec, pulses, channel_state[ n ].indices.signalType, |
272 | 425k | channel_state[ n ].indices.quantOffsetType, channel_state[ n ].frame_length ); |
273 | 425k | } |
274 | 1.24M | } |
275 | 758k | } |
276 | 484k | } |
277 | 587k | } |
278 | | |
279 | | /* Get MS predictor index */ |
280 | 1.17M | if( decControl->nChannelsInternal == 2 ) { |
281 | 790k | if( lostFlag == FLAG_DECODE_NORMAL || |
282 | 790k | ( lostFlag == FLAG_DECODE_LBRR && channel_state[ 0 ].LBRR_flags[ channel_state[ 0 ].nFramesDecoded ] == 1 ) ) |
283 | 545k | { |
284 | 545k | silk_stereo_decode_pred( psRangeDec, MS_pred_Q13 ); |
285 | | /* For LBRR data, decode mid-only flag only if side-channel's LBRR flag is false */ |
286 | 545k | if( ( lostFlag == FLAG_DECODE_NORMAL && channel_state[ 1 ].VAD_flags[ channel_state[ 0 ].nFramesDecoded ] == 0 ) || |
287 | 545k | ( lostFlag == FLAG_DECODE_LBRR && channel_state[ 1 ].LBRR_flags[ channel_state[ 0 ].nFramesDecoded ] == 0 ) ) |
288 | 276k | { |
289 | 276k | silk_stereo_decode_mid_only( psRangeDec, &decode_only_middle ); |
290 | 276k | } else { |
291 | 269k | decode_only_middle = 0; |
292 | 269k | } |
293 | 545k | } else { |
294 | 734k | for( n = 0; n < 2; n++ ) { |
295 | 489k | MS_pred_Q13[ n ] = psDec->sStereo.pred_prev_Q13[ n ]; |
296 | 489k | } |
297 | 244k | } |
298 | 790k | } |
299 | | |
300 | | /* Reset side channel decoder prediction memory for first frame with side coding */ |
301 | 1.17M | if( decControl->nChannelsInternal == 2 && decode_only_middle == 0 && psDec->prev_decode_only_middle == 1 ) { |
302 | 54.3k | silk_memset( psDec->channel_state[ 1 ].outBuf, 0, sizeof(psDec->channel_state[ 1 ].outBuf) ); |
303 | 54.3k | silk_memset( psDec->channel_state[ 1 ].sLPC_Q14_buf, 0, sizeof(psDec->channel_state[ 1 ].sLPC_Q14_buf) ); |
304 | 54.3k | psDec->channel_state[ 1 ].lagPrev = 100; |
305 | 54.3k | psDec->channel_state[ 1 ].LastGainIndex = 10; |
306 | 54.3k | psDec->channel_state[ 1 ].prevSignalType = TYPE_NO_VOICE_ACTIVITY; |
307 | 54.3k | psDec->channel_state[ 1 ].first_frame_after_reset = 1; |
308 | 54.3k | } |
309 | | |
310 | | /* Check if the temp buffer fits into the output PCM buffer. If it fits, |
311 | | we can delay allocating the temp buffer until after the SILK peak stack |
312 | | usage. We need to use a < and not a <= because of the two extra samples. */ |
313 | 1.17M | ALLOC( samplesOut1_tmp_storage1, decControl->nChannelsInternal*(channel_state[ 0 ].frame_length + 2 ), |
314 | 1.17M | opus_int16 ); |
315 | 1.17M | samplesOut1_tmp[ 0 ] = samplesOut1_tmp_storage1; |
316 | 1.17M | samplesOut1_tmp[ 1 ] = samplesOut1_tmp_storage1 + channel_state[ 0 ].frame_length + 2; |
317 | | |
318 | 1.17M | if( lostFlag == FLAG_DECODE_NORMAL ) { |
319 | 758k | has_side = !decode_only_middle; |
320 | 758k | } else { |
321 | 415k | has_side = !psDec->prev_decode_only_middle |
322 | 415k | || (decControl->nChannelsInternal == 2 && lostFlag == FLAG_DECODE_LBRR && channel_state[1].LBRR_flags[ channel_state[1].nFramesDecoded ] == 1 ); |
323 | 415k | } |
324 | 1.17M | channel_state[ 0 ].sPLC.enable_deep_plc = decControl->enable_deep_plc; |
325 | | /* Call decoder for one frame */ |
326 | 3.13M | for( n = 0; n < decControl->nChannelsInternal; n++ ) { |
327 | 1.96M | if( n == 0 || has_side ) { |
328 | 1.89M | opus_int FrameIndex; |
329 | 1.89M | opus_int condCoding; |
330 | | |
331 | 1.89M | FrameIndex = channel_state[ 0 ].nFramesDecoded - n; |
332 | | /* Use independent coding if no previous frame available */ |
333 | 1.89M | if( FrameIndex <= 0 ) { |
334 | 1.24M | condCoding = CODE_INDEPENDENTLY; |
335 | 1.24M | } else if( lostFlag == FLAG_DECODE_LBRR ) { |
336 | 174k | condCoding = channel_state[ n ].LBRR_flags[ FrameIndex - 1 ] ? CODE_CONDITIONALLY : CODE_INDEPENDENTLY; |
337 | 473k | } else if( n > 0 && psDec->prev_decode_only_middle ) { |
338 | | /* If we skipped a side frame in this packet, we don't |
339 | | need LTP scaling; the LTP state is well-defined. */ |
340 | 14.9k | condCoding = CODE_INDEPENDENTLY_NO_LTP_SCALING; |
341 | 458k | } else { |
342 | 458k | condCoding = CODE_CONDITIONALLY; |
343 | 458k | } |
344 | | #ifdef ENABLE_OSCE |
345 | | if ( channel_state[n].osce.method != decControl->osce_method ) { |
346 | | osce_reset( &channel_state[n].osce, decControl->osce_method ); |
347 | | } |
348 | | #endif |
349 | 1.89M | ret += silk_decode_frame( &channel_state[ n ], psRangeDec, &samplesOut1_tmp[ n ][ 2 ], &nSamplesOutDec, lostFlag, condCoding, |
350 | | #ifdef ENABLE_DEEP_PLC |
351 | | n == 0 ? lpcnet : NULL, |
352 | | #endif |
353 | | #ifdef ENABLE_OSCE |
354 | | &psDec->osce_model, |
355 | | #endif |
356 | 1.89M | arch); |
357 | 1.89M | } else { |
358 | 70.8k | silk_memset( &samplesOut1_tmp[ n ][ 2 ], 0, nSamplesOutDec * sizeof( opus_int16 ) ); |
359 | 70.8k | } |
360 | 1.96M | channel_state[ n ].nFramesDecoded++; |
361 | 1.96M | } |
362 | | |
363 | 1.17M | if( decControl->nChannelsAPI == 2 && decControl->nChannelsInternal == 2 ) { |
364 | | /* Convert Mid/Side to Left/Right */ |
365 | 305k | silk_stereo_MS_to_LR( &psDec->sStereo, samplesOut1_tmp[ 0 ], samplesOut1_tmp[ 1 ], MS_pred_Q13, channel_state[ 0 ].fs_kHz, nSamplesOutDec ); |
366 | 868k | } else { |
367 | | /* Buffering */ |
368 | 868k | silk_memcpy( samplesOut1_tmp[ 0 ], psDec->sStereo.sMid, 2 * sizeof( opus_int16 ) ); |
369 | 868k | silk_memcpy( psDec->sStereo.sMid, &samplesOut1_tmp[ 0 ][ nSamplesOutDec ], 2 * sizeof( opus_int16 ) ); |
370 | 868k | } |
371 | | |
372 | | /* Number of output samples */ |
373 | 1.17M | *nSamplesOut = silk_DIV32( nSamplesOutDec * decControl->API_sampleRate, silk_SMULBB( channel_state[ 0 ].fs_kHz, 1000 ) ); |
374 | | |
375 | | /* Set up pointers to temp buffers */ |
376 | 1.17M | ALLOC( samplesOut2_tmp, *nSamplesOut, opus_int16 ); |
377 | 1.17M | resample_out_ptr = samplesOut2_tmp; |
378 | | |
379 | 2.65M | for( n = 0; n < silk_min( decControl->nChannelsAPI, decControl->nChannelsInternal ); n++ ) { |
380 | | |
381 | | /* Resample decoded signal to API_sampleRate */ |
382 | 1.47M | ret += silk_resampler( &channel_state[ n ].resampler_state, resample_out_ptr, &samplesOut1_tmp[ n ][ 1 ], nSamplesOutDec ); |
383 | | |
384 | | /* Interleave if stereo output and stereo stream */ |
385 | 1.47M | if( decControl->nChannelsAPI == 2 ) { |
386 | 316M | for( i = 0; i < *nSamplesOut; i++ ) { |
387 | 315M | samplesOut[ n + 2 * i ] = INT16TORES(resample_out_ptr[ i ]); |
388 | 315M | } |
389 | 790k | } else { |
390 | 271M | for( i = 0; i < *nSamplesOut; i++ ) { |
391 | 270M | samplesOut[ i ] = INT16TORES(resample_out_ptr[ i ]); |
392 | 270M | } |
393 | 688k | } |
394 | 1.47M | } |
395 | | |
396 | | /* Create two channel output from mono stream */ |
397 | 1.17M | if( decControl->nChannelsAPI == 2 && decControl->nChannelsInternal == 1 ) { |
398 | 180k | if ( stereo_to_mono ){ |
399 | | /* Resample right channel for newly collapsed stereo just in case |
400 | | we weren't doing collapsing when switching to mono */ |
401 | 0 | ret += silk_resampler( &channel_state[ 1 ].resampler_state, resample_out_ptr, &samplesOut1_tmp[ 0 ][ 1 ], nSamplesOutDec ); |
402 | |
|
403 | 0 | for( i = 0; i < *nSamplesOut; i++ ) { |
404 | 0 | samplesOut[ 1 + 2 * i ] = INT16TORES(resample_out_ptr[ i ]); |
405 | 0 | } |
406 | 180k | } else { |
407 | 68.3M | for( i = 0; i < *nSamplesOut; i++ ) { |
408 | 68.1M | samplesOut[ 1 + 2 * i ] = samplesOut[ 0 + 2 * i ]; |
409 | 68.1M | } |
410 | 180k | } |
411 | 180k | } |
412 | | |
413 | | /* Export pitch lag, measured at 48 kHz sampling rate */ |
414 | 1.17M | if( channel_state[ 0 ].prevSignalType == TYPE_VOICED ) { |
415 | 249k | int mult_tab[ 3 ] = { 6, 4, 3 }; |
416 | 249k | decControl->prevPitchLag = channel_state[ 0 ].lagPrev * mult_tab[ ( channel_state[ 0 ].fs_kHz - 8 ) >> 2 ]; |
417 | 924k | } else { |
418 | 924k | decControl->prevPitchLag = 0; |
419 | 924k | } |
420 | | |
421 | 1.17M | if( lostFlag == FLAG_PACKET_LOST ) { |
422 | | /* On packet loss, remove the gain clamping to prevent having the energy "bounce back" |
423 | | if we lose packets when the energy is going down */ |
424 | 557k | for ( i = 0; i < psDec->nChannelsInternal; i++ ) |
425 | 359k | psDec->channel_state[ i ].LastGainIndex = 10; |
426 | 975k | } else { |
427 | 975k | psDec->prev_decode_only_middle = decode_only_middle; |
428 | 975k | } |
429 | 1.17M | RESTORE_STACK; |
430 | 1.17M | return ret; |
431 | 1.17M | } Line | Count | Source | 145 | 391k | { | 146 | 391k | opus_int i, n, decode_only_middle = 0, ret = SILK_NO_ERROR; | 147 | 391k | opus_int32 nSamplesOutDec, LBRR_symbol; | 148 | 391k | opus_int16 *samplesOut1_tmp[ 2 ]; | 149 | 391k | VARDECL( opus_int16, samplesOut1_tmp_storage1 ); | 150 | 391k | VARDECL( opus_int16, samplesOut2_tmp ); | 151 | 391k | opus_int32 MS_pred_Q13[ 2 ] = { 0 }; | 152 | 391k | opus_int16 *resample_out_ptr; | 153 | 391k | silk_decoder *psDec = ( silk_decoder * )decState; | 154 | 391k | silk_decoder_state *channel_state = psDec->channel_state; | 155 | 391k | opus_int has_side; | 156 | 391k | opus_int stereo_to_mono; | 157 | 391k | SAVE_STACK; | 158 | | | 159 | 391k | celt_assert( decControl->nChannelsInternal == 1 || decControl->nChannelsInternal == 2 ); | 160 | | | 161 | | /**********************************/ | 162 | | /* Test if first frame in payload */ | 163 | | /**********************************/ | 164 | 391k | if( newPacketFlag ) { | 165 | 689k | for( n = 0; n < decControl->nChannelsInternal; n++ ) { | 166 | 427k | channel_state[ n ].nFramesDecoded = 0; /* Used to count frames in packet */ | 167 | 427k | } | 168 | 261k | } | 169 | | | 170 | | /* If Mono -> Stereo transition in bitstream: init state of second channel */ | 171 | 391k | if( decControl->nChannelsInternal > psDec->nChannelsInternal ) { | 172 | 139k | ret += silk_init_decoder( &channel_state[ 1 ] ); | 173 | 139k | } | 174 | | | 175 | 391k | stereo_to_mono = decControl->nChannelsInternal == 1 && psDec->nChannelsInternal == 2 && | 176 | 391k | ( decControl->internalSampleRate == 1000*channel_state[ 0 ].fs_kHz ); | 177 | | | 178 | 391k | if( channel_state[ 0 ].nFramesDecoded == 0 ) { | 179 | 689k | for( n = 0; n < decControl->nChannelsInternal; n++ ) { | 180 | 427k | opus_int fs_kHz_dec; | 181 | 427k | if( decControl->payloadSize_ms == 0 ) { | 182 | | /* Assuming packet loss, use 10 ms */ | 183 | 0 | channel_state[ n ].nFramesPerPacket = 1; | 184 | 0 | channel_state[ n ].nb_subfr = 2; | 185 | 427k | } else if( decControl->payloadSize_ms == 10 ) { | 186 | 224k | channel_state[ n ].nFramesPerPacket = 1; | 187 | 224k | channel_state[ n ].nb_subfr = 2; | 188 | 224k | } else if( decControl->payloadSize_ms == 20 ) { | 189 | 82.8k | channel_state[ n ].nFramesPerPacket = 1; | 190 | 82.8k | channel_state[ n ].nb_subfr = 4; | 191 | 120k | } else if( decControl->payloadSize_ms == 40 ) { | 192 | 15.0k | channel_state[ n ].nFramesPerPacket = 2; | 193 | 15.0k | channel_state[ n ].nb_subfr = 4; | 194 | 105k | } else if( decControl->payloadSize_ms == 60 ) { | 195 | 105k | channel_state[ n ].nFramesPerPacket = 3; | 196 | 105k | channel_state[ n ].nb_subfr = 4; | 197 | 105k | } else { | 198 | 0 | celt_assert( 0 ); | 199 | 0 | RESTORE_STACK; | 200 | 0 | return SILK_DEC_INVALID_FRAME_SIZE; | 201 | 0 | } | 202 | 427k | fs_kHz_dec = ( decControl->internalSampleRate >> 10 ) + 1; | 203 | 427k | if( fs_kHz_dec != 8 && fs_kHz_dec != 12 && fs_kHz_dec != 16 ) { | 204 | 0 | celt_assert( 0 ); | 205 | 0 | RESTORE_STACK; | 206 | 0 | return SILK_DEC_INVALID_SAMPLING_FREQUENCY; | 207 | 0 | } | 208 | 427k | ret += silk_decoder_set_fs( &channel_state[ n ], fs_kHz_dec, decControl->API_sampleRate ); | 209 | 427k | } | 210 | 261k | } | 211 | | | 212 | 391k | if( decControl->nChannelsAPI == 2 && decControl->nChannelsInternal == 2 && ( psDec->nChannelsAPI == 1 || psDec->nChannelsInternal == 1 ) ) { | 213 | 0 | silk_memset( psDec->sStereo.pred_prev_Q13, 0, sizeof( psDec->sStereo.pred_prev_Q13 ) ); | 214 | 0 | silk_memset( psDec->sStereo.sSide, 0, sizeof( psDec->sStereo.sSide ) ); | 215 | 0 | silk_memcpy( &channel_state[ 1 ].resampler_state, &channel_state[ 0 ].resampler_state, sizeof( silk_resampler_state_struct ) ); | 216 | 0 | } | 217 | 391k | psDec->nChannelsAPI = decControl->nChannelsAPI; | 218 | 391k | psDec->nChannelsInternal = decControl->nChannelsInternal; | 219 | | | 220 | 391k | if( decControl->API_sampleRate > (opus_int32)MAX_API_FS_KHZ * 1000 || decControl->API_sampleRate < 8000 ) { | 221 | 0 | ret = SILK_DEC_INVALID_SAMPLING_FREQUENCY; | 222 | 0 | RESTORE_STACK; | 223 | 0 | return( ret ); | 224 | 0 | } | 225 | | | 226 | 391k | if( lostFlag != FLAG_PACKET_LOST && channel_state[ 0 ].nFramesDecoded == 0 ) { | 227 | | /* First decoder call for this payload */ | 228 | | /* Decode VAD flags and LBRR flag */ | 229 | 504k | for( n = 0; n < decControl->nChannelsInternal; n++ ) { | 230 | 843k | for( i = 0; i < channel_state[ n ].nFramesPerPacket; i++ ) { | 231 | 534k | channel_state[ n ].VAD_flags[ i ] = ec_dec_bit_logp(psRangeDec, 1); | 232 | 534k | } | 233 | 308k | channel_state[ n ].LBRR_flag = ec_dec_bit_logp(psRangeDec, 1); | 234 | 308k | } | 235 | | /* Decode LBRR flags */ | 236 | 504k | for( n = 0; n < decControl->nChannelsInternal; n++ ) { | 237 | 308k | silk_memset( channel_state[ n ].LBRR_flags, 0, sizeof( channel_state[ n ].LBRR_flags ) ); | 238 | 308k | if( channel_state[ n ].LBRR_flag ) { | 239 | 128k | if( channel_state[ n ].nFramesPerPacket == 1 ) { | 240 | 52.0k | channel_state[ n ].LBRR_flags[ 0 ] = 1; | 241 | 75.9k | } else { | 242 | 75.9k | LBRR_symbol = ec_dec_icdf( psRangeDec, silk_LBRR_flags_iCDF_ptr[ channel_state[ n ].nFramesPerPacket - 2 ], 8 ) + 1; | 243 | 296k | for( i = 0; i < channel_state[ n ].nFramesPerPacket; i++ ) { | 244 | 220k | channel_state[ n ].LBRR_flags[ i ] = silk_RSHIFT( LBRR_symbol, i ) & 1; | 245 | 220k | } | 246 | 75.9k | } | 247 | 128k | } | 248 | 308k | } | 249 | | | 250 | 195k | if( lostFlag == FLAG_DECODE_NORMAL ) { | 251 | | /* Regular decoding: skip all LBRR data */ | 252 | 414k | for( i = 0; i < channel_state[ 0 ].nFramesPerPacket; i++ ) { | 253 | 667k | for( n = 0; n < decControl->nChannelsInternal; n++ ) { | 254 | 414k | if( channel_state[ n ].LBRR_flags[ i ] ) { | 255 | 141k | opus_int16 pulses[ MAX_FRAME_LENGTH ]; | 256 | 141k | opus_int condCoding; | 257 | | | 258 | 141k | if( decControl->nChannelsInternal == 2 && n == 0 ) { | 259 | 61.3k | silk_stereo_decode_pred( psRangeDec, MS_pred_Q13 ); | 260 | 61.3k | if( channel_state[ 1 ].LBRR_flags[ i ] == 0 ) { | 261 | 30.8k | silk_stereo_decode_mid_only( psRangeDec, &decode_only_middle ); | 262 | 30.8k | } | 263 | 61.3k | } | 264 | | /* Use conditional coding if previous frame available */ | 265 | 141k | if( i > 0 && channel_state[ n ].LBRR_flags[ i - 1 ] ) { | 266 | 43.7k | condCoding = CODE_CONDITIONALLY; | 267 | 97.9k | } else { | 268 | 97.9k | condCoding = CODE_INDEPENDENTLY; | 269 | 97.9k | } | 270 | 141k | silk_decode_indices( &channel_state[ n ], psRangeDec, i, 1, condCoding ); | 271 | 141k | silk_decode_pulses( psRangeDec, pulses, channel_state[ n ].indices.signalType, | 272 | 141k | channel_state[ n ].indices.quantOffsetType, channel_state[ n ].frame_length ); | 273 | 141k | } | 274 | 414k | } | 275 | 252k | } | 276 | 161k | } | 277 | 195k | } | 278 | | | 279 | | /* Get MS predictor index */ | 280 | 391k | if( decControl->nChannelsInternal == 2 ) { | 281 | 263k | if( lostFlag == FLAG_DECODE_NORMAL || | 282 | 263k | ( lostFlag == FLAG_DECODE_LBRR && channel_state[ 0 ].LBRR_flags[ channel_state[ 0 ].nFramesDecoded ] == 1 ) ) | 283 | 181k | { | 284 | 181k | silk_stereo_decode_pred( psRangeDec, MS_pred_Q13 ); | 285 | | /* For LBRR data, decode mid-only flag only if side-channel's LBRR flag is false */ | 286 | 181k | if( ( lostFlag == FLAG_DECODE_NORMAL && channel_state[ 1 ].VAD_flags[ channel_state[ 0 ].nFramesDecoded ] == 0 ) || | 287 | 181k | ( lostFlag == FLAG_DECODE_LBRR && channel_state[ 1 ].LBRR_flags[ channel_state[ 0 ].nFramesDecoded ] == 0 ) ) | 288 | 92.0k | { | 289 | 92.0k | silk_stereo_decode_mid_only( psRangeDec, &decode_only_middle ); | 290 | 92.0k | } else { | 291 | 89.6k | decode_only_middle = 0; | 292 | 89.6k | } | 293 | 181k | } else { | 294 | 244k | for( n = 0; n < 2; n++ ) { | 295 | 163k | MS_pred_Q13[ n ] = psDec->sStereo.pred_prev_Q13[ n ]; | 296 | 163k | } | 297 | 81.6k | } | 298 | 263k | } | 299 | | | 300 | | /* Reset side channel decoder prediction memory for first frame with side coding */ | 301 | 391k | if( decControl->nChannelsInternal == 2 && decode_only_middle == 0 && psDec->prev_decode_only_middle == 1 ) { | 302 | 18.1k | silk_memset( psDec->channel_state[ 1 ].outBuf, 0, sizeof(psDec->channel_state[ 1 ].outBuf) ); | 303 | 18.1k | silk_memset( psDec->channel_state[ 1 ].sLPC_Q14_buf, 0, sizeof(psDec->channel_state[ 1 ].sLPC_Q14_buf) ); | 304 | 18.1k | psDec->channel_state[ 1 ].lagPrev = 100; | 305 | 18.1k | psDec->channel_state[ 1 ].LastGainIndex = 10; | 306 | 18.1k | psDec->channel_state[ 1 ].prevSignalType = TYPE_NO_VOICE_ACTIVITY; | 307 | 18.1k | psDec->channel_state[ 1 ].first_frame_after_reset = 1; | 308 | 18.1k | } | 309 | | | 310 | | /* Check if the temp buffer fits into the output PCM buffer. If it fits, | 311 | | we can delay allocating the temp buffer until after the SILK peak stack | 312 | | usage. We need to use a < and not a <= because of the two extra samples. */ | 313 | 391k | ALLOC( samplesOut1_tmp_storage1, decControl->nChannelsInternal*(channel_state[ 0 ].frame_length + 2 ), | 314 | 391k | opus_int16 ); | 315 | 391k | samplesOut1_tmp[ 0 ] = samplesOut1_tmp_storage1; | 316 | 391k | samplesOut1_tmp[ 1 ] = samplesOut1_tmp_storage1 + channel_state[ 0 ].frame_length + 2; | 317 | | | 318 | 391k | if( lostFlag == FLAG_DECODE_NORMAL ) { | 319 | 252k | has_side = !decode_only_middle; | 320 | 252k | } else { | 321 | 138k | has_side = !psDec->prev_decode_only_middle | 322 | 138k | || (decControl->nChannelsInternal == 2 && lostFlag == FLAG_DECODE_LBRR && channel_state[1].LBRR_flags[ channel_state[1].nFramesDecoded ] == 1 ); | 323 | 138k | } | 324 | 391k | channel_state[ 0 ].sPLC.enable_deep_plc = decControl->enable_deep_plc; | 325 | | /* Call decoder for one frame */ | 326 | 1.04M | for( n = 0; n < decControl->nChannelsInternal; n++ ) { | 327 | 654k | if( n == 0 || has_side ) { | 328 | 631k | opus_int FrameIndex; | 329 | 631k | opus_int condCoding; | 330 | | | 331 | 631k | FrameIndex = channel_state[ 0 ].nFramesDecoded - n; | 332 | | /* Use independent coding if no previous frame available */ | 333 | 631k | if( FrameIndex <= 0 ) { | 334 | 415k | condCoding = CODE_INDEPENDENTLY; | 335 | 415k | } else if( lostFlag == FLAG_DECODE_LBRR ) { | 336 | 58.1k | condCoding = channel_state[ n ].LBRR_flags[ FrameIndex - 1 ] ? CODE_CONDITIONALLY : CODE_INDEPENDENTLY; | 337 | 157k | } else if( n > 0 && psDec->prev_decode_only_middle ) { | 338 | | /* If we skipped a side frame in this packet, we don't | 339 | | need LTP scaling; the LTP state is well-defined. */ | 340 | 4.96k | condCoding = CODE_INDEPENDENTLY_NO_LTP_SCALING; | 341 | 152k | } else { | 342 | 152k | condCoding = CODE_CONDITIONALLY; | 343 | 152k | } | 344 | | #ifdef ENABLE_OSCE | 345 | | if ( channel_state[n].osce.method != decControl->osce_method ) { | 346 | | osce_reset( &channel_state[n].osce, decControl->osce_method ); | 347 | | } | 348 | | #endif | 349 | 631k | ret += silk_decode_frame( &channel_state[ n ], psRangeDec, &samplesOut1_tmp[ n ][ 2 ], &nSamplesOutDec, lostFlag, condCoding, | 350 | | #ifdef ENABLE_DEEP_PLC | 351 | | n == 0 ? lpcnet : NULL, | 352 | | #endif | 353 | | #ifdef ENABLE_OSCE | 354 | | &psDec->osce_model, | 355 | | #endif | 356 | 631k | arch); | 357 | 631k | } else { | 358 | 23.6k | silk_memset( &samplesOut1_tmp[ n ][ 2 ], 0, nSamplesOutDec * sizeof( opus_int16 ) ); | 359 | 23.6k | } | 360 | 654k | channel_state[ n ].nFramesDecoded++; | 361 | 654k | } | 362 | | | 363 | 391k | if( decControl->nChannelsAPI == 2 && decControl->nChannelsInternal == 2 ) { | 364 | | /* Convert Mid/Side to Left/Right */ | 365 | 101k | silk_stereo_MS_to_LR( &psDec->sStereo, samplesOut1_tmp[ 0 ], samplesOut1_tmp[ 1 ], MS_pred_Q13, channel_state[ 0 ].fs_kHz, nSamplesOutDec ); | 366 | 289k | } else { | 367 | | /* Buffering */ | 368 | 289k | silk_memcpy( samplesOut1_tmp[ 0 ], psDec->sStereo.sMid, 2 * sizeof( opus_int16 ) ); | 369 | 289k | silk_memcpy( psDec->sStereo.sMid, &samplesOut1_tmp[ 0 ][ nSamplesOutDec ], 2 * sizeof( opus_int16 ) ); | 370 | 289k | } | 371 | | | 372 | | /* Number of output samples */ | 373 | 391k | *nSamplesOut = silk_DIV32( nSamplesOutDec * decControl->API_sampleRate, silk_SMULBB( channel_state[ 0 ].fs_kHz, 1000 ) ); | 374 | | | 375 | | /* Set up pointers to temp buffers */ | 376 | 391k | ALLOC( samplesOut2_tmp, *nSamplesOut, opus_int16 ); | 377 | 391k | resample_out_ptr = samplesOut2_tmp; | 378 | | | 379 | 884k | for( n = 0; n < silk_min( decControl->nChannelsAPI, decControl->nChannelsInternal ); n++ ) { | 380 | | | 381 | | /* Resample decoded signal to API_sampleRate */ | 382 | 493k | ret += silk_resampler( &channel_state[ n ].resampler_state, resample_out_ptr, &samplesOut1_tmp[ n ][ 1 ], nSamplesOutDec ); | 383 | | | 384 | | /* Interleave if stereo output and stereo stream */ | 385 | 493k | if( decControl->nChannelsAPI == 2 ) { | 386 | 105M | for( i = 0; i < *nSamplesOut; i++ ) { | 387 | 105M | samplesOut[ n + 2 * i ] = INT16TORES(resample_out_ptr[ i ]); | 388 | 105M | } | 389 | 263k | } else { | 390 | 90.3M | for( i = 0; i < *nSamplesOut; i++ ) { | 391 | 90.1M | samplesOut[ i ] = INT16TORES(resample_out_ptr[ i ]); | 392 | 90.1M | } | 393 | 229k | } | 394 | 493k | } | 395 | | | 396 | | /* Create two channel output from mono stream */ | 397 | 391k | if( decControl->nChannelsAPI == 2 && decControl->nChannelsInternal == 1 ) { | 398 | 60.0k | if ( stereo_to_mono ){ | 399 | | /* Resample right channel for newly collapsed stereo just in case | 400 | | we weren't doing collapsing when switching to mono */ | 401 | 0 | ret += silk_resampler( &channel_state[ 1 ].resampler_state, resample_out_ptr, &samplesOut1_tmp[ 0 ][ 1 ], nSamplesOutDec ); | 402 | |
| 403 | 0 | for( i = 0; i < *nSamplesOut; i++ ) { | 404 | 0 | samplesOut[ 1 + 2 * i ] = INT16TORES(resample_out_ptr[ i ]); | 405 | 0 | } | 406 | 60.0k | } else { | 407 | 22.7M | for( i = 0; i < *nSamplesOut; i++ ) { | 408 | 22.7M | samplesOut[ 1 + 2 * i ] = samplesOut[ 0 + 2 * i ]; | 409 | 22.7M | } | 410 | 60.0k | } | 411 | 60.0k | } | 412 | | | 413 | | /* Export pitch lag, measured at 48 kHz sampling rate */ | 414 | 391k | if( channel_state[ 0 ].prevSignalType == TYPE_VOICED ) { | 415 | 83.0k | int mult_tab[ 3 ] = { 6, 4, 3 }; | 416 | 83.0k | decControl->prevPitchLag = channel_state[ 0 ].lagPrev * mult_tab[ ( channel_state[ 0 ].fs_kHz - 8 ) >> 2 ]; | 417 | 308k | } else { | 418 | 308k | decControl->prevPitchLag = 0; | 419 | 308k | } | 420 | | | 421 | 391k | if( lostFlag == FLAG_PACKET_LOST ) { | 422 | | /* On packet loss, remove the gain clamping to prevent having the energy "bounce back" | 423 | | if we lose packets when the energy is going down */ | 424 | 185k | for ( i = 0; i < psDec->nChannelsInternal; i++ ) | 425 | 119k | psDec->channel_state[ i ].LastGainIndex = 10; | 426 | 325k | } else { | 427 | 325k | psDec->prev_decode_only_middle = decode_only_middle; | 428 | 325k | } | 429 | 391k | RESTORE_STACK; | 430 | 391k | return ret; | 431 | 391k | } |
Line | Count | Source | 145 | 391k | { | 146 | 391k | opus_int i, n, decode_only_middle = 0, ret = SILK_NO_ERROR; | 147 | 391k | opus_int32 nSamplesOutDec, LBRR_symbol; | 148 | 391k | opus_int16 *samplesOut1_tmp[ 2 ]; | 149 | 391k | VARDECL( opus_int16, samplesOut1_tmp_storage1 ); | 150 | 391k | VARDECL( opus_int16, samplesOut2_tmp ); | 151 | 391k | opus_int32 MS_pred_Q13[ 2 ] = { 0 }; | 152 | 391k | opus_int16 *resample_out_ptr; | 153 | 391k | silk_decoder *psDec = ( silk_decoder * )decState; | 154 | 391k | silk_decoder_state *channel_state = psDec->channel_state; | 155 | 391k | opus_int has_side; | 156 | 391k | opus_int stereo_to_mono; | 157 | 391k | SAVE_STACK; | 158 | | | 159 | 391k | celt_assert( decControl->nChannelsInternal == 1 || decControl->nChannelsInternal == 2 ); | 160 | | | 161 | | /**********************************/ | 162 | | /* Test if first frame in payload */ | 163 | | /**********************************/ | 164 | 391k | if( newPacketFlag ) { | 165 | 689k | for( n = 0; n < decControl->nChannelsInternal; n++ ) { | 166 | 427k | channel_state[ n ].nFramesDecoded = 0; /* Used to count frames in packet */ | 167 | 427k | } | 168 | 261k | } | 169 | | | 170 | | /* If Mono -> Stereo transition in bitstream: init state of second channel */ | 171 | 391k | if( decControl->nChannelsInternal > psDec->nChannelsInternal ) { | 172 | 139k | ret += silk_init_decoder( &channel_state[ 1 ] ); | 173 | 139k | } | 174 | | | 175 | 391k | stereo_to_mono = decControl->nChannelsInternal == 1 && psDec->nChannelsInternal == 2 && | 176 | 391k | ( decControl->internalSampleRate == 1000*channel_state[ 0 ].fs_kHz ); | 177 | | | 178 | 391k | if( channel_state[ 0 ].nFramesDecoded == 0 ) { | 179 | 689k | for( n = 0; n < decControl->nChannelsInternal; n++ ) { | 180 | 427k | opus_int fs_kHz_dec; | 181 | 427k | if( decControl->payloadSize_ms == 0 ) { | 182 | | /* Assuming packet loss, use 10 ms */ | 183 | 0 | channel_state[ n ].nFramesPerPacket = 1; | 184 | 0 | channel_state[ n ].nb_subfr = 2; | 185 | 427k | } else if( decControl->payloadSize_ms == 10 ) { | 186 | 224k | channel_state[ n ].nFramesPerPacket = 1; | 187 | 224k | channel_state[ n ].nb_subfr = 2; | 188 | 224k | } else if( decControl->payloadSize_ms == 20 ) { | 189 | 82.8k | channel_state[ n ].nFramesPerPacket = 1; | 190 | 82.8k | channel_state[ n ].nb_subfr = 4; | 191 | 120k | } else if( decControl->payloadSize_ms == 40 ) { | 192 | 15.0k | channel_state[ n ].nFramesPerPacket = 2; | 193 | 15.0k | channel_state[ n ].nb_subfr = 4; | 194 | 105k | } else if( decControl->payloadSize_ms == 60 ) { | 195 | 105k | channel_state[ n ].nFramesPerPacket = 3; | 196 | 105k | channel_state[ n ].nb_subfr = 4; | 197 | 105k | } else { | 198 | 0 | celt_assert( 0 ); | 199 | 0 | RESTORE_STACK; | 200 | 0 | return SILK_DEC_INVALID_FRAME_SIZE; | 201 | 0 | } | 202 | 427k | fs_kHz_dec = ( decControl->internalSampleRate >> 10 ) + 1; | 203 | 427k | if( fs_kHz_dec != 8 && fs_kHz_dec != 12 && fs_kHz_dec != 16 ) { | 204 | 0 | celt_assert( 0 ); | 205 | 0 | RESTORE_STACK; | 206 | 0 | return SILK_DEC_INVALID_SAMPLING_FREQUENCY; | 207 | 0 | } | 208 | 427k | ret += silk_decoder_set_fs( &channel_state[ n ], fs_kHz_dec, decControl->API_sampleRate ); | 209 | 427k | } | 210 | 261k | } | 211 | | | 212 | 391k | if( decControl->nChannelsAPI == 2 && decControl->nChannelsInternal == 2 && ( psDec->nChannelsAPI == 1 || psDec->nChannelsInternal == 1 ) ) { | 213 | 0 | silk_memset( psDec->sStereo.pred_prev_Q13, 0, sizeof( psDec->sStereo.pred_prev_Q13 ) ); | 214 | 0 | silk_memset( psDec->sStereo.sSide, 0, sizeof( psDec->sStereo.sSide ) ); | 215 | 0 | silk_memcpy( &channel_state[ 1 ].resampler_state, &channel_state[ 0 ].resampler_state, sizeof( silk_resampler_state_struct ) ); | 216 | 0 | } | 217 | 391k | psDec->nChannelsAPI = decControl->nChannelsAPI; | 218 | 391k | psDec->nChannelsInternal = decControl->nChannelsInternal; | 219 | | | 220 | 391k | if( decControl->API_sampleRate > (opus_int32)MAX_API_FS_KHZ * 1000 || decControl->API_sampleRate < 8000 ) { | 221 | 0 | ret = SILK_DEC_INVALID_SAMPLING_FREQUENCY; | 222 | 0 | RESTORE_STACK; | 223 | 0 | return( ret ); | 224 | 0 | } | 225 | | | 226 | 391k | if( lostFlag != FLAG_PACKET_LOST && channel_state[ 0 ].nFramesDecoded == 0 ) { | 227 | | /* First decoder call for this payload */ | 228 | | /* Decode VAD flags and LBRR flag */ | 229 | 504k | for( n = 0; n < decControl->nChannelsInternal; n++ ) { | 230 | 843k | for( i = 0; i < channel_state[ n ].nFramesPerPacket; i++ ) { | 231 | 534k | channel_state[ n ].VAD_flags[ i ] = ec_dec_bit_logp(psRangeDec, 1); | 232 | 534k | } | 233 | 308k | channel_state[ n ].LBRR_flag = ec_dec_bit_logp(psRangeDec, 1); | 234 | 308k | } | 235 | | /* Decode LBRR flags */ | 236 | 504k | for( n = 0; n < decControl->nChannelsInternal; n++ ) { | 237 | 308k | silk_memset( channel_state[ n ].LBRR_flags, 0, sizeof( channel_state[ n ].LBRR_flags ) ); | 238 | 308k | if( channel_state[ n ].LBRR_flag ) { | 239 | 128k | if( channel_state[ n ].nFramesPerPacket == 1 ) { | 240 | 52.0k | channel_state[ n ].LBRR_flags[ 0 ] = 1; | 241 | 75.9k | } else { | 242 | 75.9k | LBRR_symbol = ec_dec_icdf( psRangeDec, silk_LBRR_flags_iCDF_ptr[ channel_state[ n ].nFramesPerPacket - 2 ], 8 ) + 1; | 243 | 296k | for( i = 0; i < channel_state[ n ].nFramesPerPacket; i++ ) { | 244 | 220k | channel_state[ n ].LBRR_flags[ i ] = silk_RSHIFT( LBRR_symbol, i ) & 1; | 245 | 220k | } | 246 | 75.9k | } | 247 | 128k | } | 248 | 308k | } | 249 | | | 250 | 195k | if( lostFlag == FLAG_DECODE_NORMAL ) { | 251 | | /* Regular decoding: skip all LBRR data */ | 252 | 414k | for( i = 0; i < channel_state[ 0 ].nFramesPerPacket; i++ ) { | 253 | 667k | for( n = 0; n < decControl->nChannelsInternal; n++ ) { | 254 | 414k | if( channel_state[ n ].LBRR_flags[ i ] ) { | 255 | 141k | opus_int16 pulses[ MAX_FRAME_LENGTH ]; | 256 | 141k | opus_int condCoding; | 257 | | | 258 | 141k | if( decControl->nChannelsInternal == 2 && n == 0 ) { | 259 | 61.3k | silk_stereo_decode_pred( psRangeDec, MS_pred_Q13 ); | 260 | 61.3k | if( channel_state[ 1 ].LBRR_flags[ i ] == 0 ) { | 261 | 30.8k | silk_stereo_decode_mid_only( psRangeDec, &decode_only_middle ); | 262 | 30.8k | } | 263 | 61.3k | } | 264 | | /* Use conditional coding if previous frame available */ | 265 | 141k | if( i > 0 && channel_state[ n ].LBRR_flags[ i - 1 ] ) { | 266 | 43.7k | condCoding = CODE_CONDITIONALLY; | 267 | 97.9k | } else { | 268 | 97.9k | condCoding = CODE_INDEPENDENTLY; | 269 | 97.9k | } | 270 | 141k | silk_decode_indices( &channel_state[ n ], psRangeDec, i, 1, condCoding ); | 271 | 141k | silk_decode_pulses( psRangeDec, pulses, channel_state[ n ].indices.signalType, | 272 | 141k | channel_state[ n ].indices.quantOffsetType, channel_state[ n ].frame_length ); | 273 | 141k | } | 274 | 414k | } | 275 | 252k | } | 276 | 161k | } | 277 | 195k | } | 278 | | | 279 | | /* Get MS predictor index */ | 280 | 391k | if( decControl->nChannelsInternal == 2 ) { | 281 | 263k | if( lostFlag == FLAG_DECODE_NORMAL || | 282 | 263k | ( lostFlag == FLAG_DECODE_LBRR && channel_state[ 0 ].LBRR_flags[ channel_state[ 0 ].nFramesDecoded ] == 1 ) ) | 283 | 181k | { | 284 | 181k | silk_stereo_decode_pred( psRangeDec, MS_pred_Q13 ); | 285 | | /* For LBRR data, decode mid-only flag only if side-channel's LBRR flag is false */ | 286 | 181k | if( ( lostFlag == FLAG_DECODE_NORMAL && channel_state[ 1 ].VAD_flags[ channel_state[ 0 ].nFramesDecoded ] == 0 ) || | 287 | 181k | ( lostFlag == FLAG_DECODE_LBRR && channel_state[ 1 ].LBRR_flags[ channel_state[ 0 ].nFramesDecoded ] == 0 ) ) | 288 | 92.0k | { | 289 | 92.0k | silk_stereo_decode_mid_only( psRangeDec, &decode_only_middle ); | 290 | 92.0k | } else { | 291 | 89.6k | decode_only_middle = 0; | 292 | 89.6k | } | 293 | 181k | } else { | 294 | 244k | for( n = 0; n < 2; n++ ) { | 295 | 163k | MS_pred_Q13[ n ] = psDec->sStereo.pred_prev_Q13[ n ]; | 296 | 163k | } | 297 | 81.6k | } | 298 | 263k | } | 299 | | | 300 | | /* Reset side channel decoder prediction memory for first frame with side coding */ | 301 | 391k | if( decControl->nChannelsInternal == 2 && decode_only_middle == 0 && psDec->prev_decode_only_middle == 1 ) { | 302 | 18.1k | silk_memset( psDec->channel_state[ 1 ].outBuf, 0, sizeof(psDec->channel_state[ 1 ].outBuf) ); | 303 | 18.1k | silk_memset( psDec->channel_state[ 1 ].sLPC_Q14_buf, 0, sizeof(psDec->channel_state[ 1 ].sLPC_Q14_buf) ); | 304 | 18.1k | psDec->channel_state[ 1 ].lagPrev = 100; | 305 | 18.1k | psDec->channel_state[ 1 ].LastGainIndex = 10; | 306 | 18.1k | psDec->channel_state[ 1 ].prevSignalType = TYPE_NO_VOICE_ACTIVITY; | 307 | 18.1k | psDec->channel_state[ 1 ].first_frame_after_reset = 1; | 308 | 18.1k | } | 309 | | | 310 | | /* Check if the temp buffer fits into the output PCM buffer. If it fits, | 311 | | we can delay allocating the temp buffer until after the SILK peak stack | 312 | | usage. We need to use a < and not a <= because of the two extra samples. */ | 313 | 391k | ALLOC( samplesOut1_tmp_storage1, decControl->nChannelsInternal*(channel_state[ 0 ].frame_length + 2 ), | 314 | 391k | opus_int16 ); | 315 | 391k | samplesOut1_tmp[ 0 ] = samplesOut1_tmp_storage1; | 316 | 391k | samplesOut1_tmp[ 1 ] = samplesOut1_tmp_storage1 + channel_state[ 0 ].frame_length + 2; | 317 | | | 318 | 391k | if( lostFlag == FLAG_DECODE_NORMAL ) { | 319 | 252k | has_side = !decode_only_middle; | 320 | 252k | } else { | 321 | 138k | has_side = !psDec->prev_decode_only_middle | 322 | 138k | || (decControl->nChannelsInternal == 2 && lostFlag == FLAG_DECODE_LBRR && channel_state[1].LBRR_flags[ channel_state[1].nFramesDecoded ] == 1 ); | 323 | 138k | } | 324 | 391k | channel_state[ 0 ].sPLC.enable_deep_plc = decControl->enable_deep_plc; | 325 | | /* Call decoder for one frame */ | 326 | 1.04M | for( n = 0; n < decControl->nChannelsInternal; n++ ) { | 327 | 654k | if( n == 0 || has_side ) { | 328 | 631k | opus_int FrameIndex; | 329 | 631k | opus_int condCoding; | 330 | | | 331 | 631k | FrameIndex = channel_state[ 0 ].nFramesDecoded - n; | 332 | | /* Use independent coding if no previous frame available */ | 333 | 631k | if( FrameIndex <= 0 ) { | 334 | 415k | condCoding = CODE_INDEPENDENTLY; | 335 | 415k | } else if( lostFlag == FLAG_DECODE_LBRR ) { | 336 | 58.1k | condCoding = channel_state[ n ].LBRR_flags[ FrameIndex - 1 ] ? CODE_CONDITIONALLY : CODE_INDEPENDENTLY; | 337 | 157k | } else if( n > 0 && psDec->prev_decode_only_middle ) { | 338 | | /* If we skipped a side frame in this packet, we don't | 339 | | need LTP scaling; the LTP state is well-defined. */ | 340 | 4.96k | condCoding = CODE_INDEPENDENTLY_NO_LTP_SCALING; | 341 | 152k | } else { | 342 | 152k | condCoding = CODE_CONDITIONALLY; | 343 | 152k | } | 344 | | #ifdef ENABLE_OSCE | 345 | | if ( channel_state[n].osce.method != decControl->osce_method ) { | 346 | | osce_reset( &channel_state[n].osce, decControl->osce_method ); | 347 | | } | 348 | | #endif | 349 | 631k | ret += silk_decode_frame( &channel_state[ n ], psRangeDec, &samplesOut1_tmp[ n ][ 2 ], &nSamplesOutDec, lostFlag, condCoding, | 350 | | #ifdef ENABLE_DEEP_PLC | 351 | | n == 0 ? lpcnet : NULL, | 352 | | #endif | 353 | | #ifdef ENABLE_OSCE | 354 | | &psDec->osce_model, | 355 | | #endif | 356 | 631k | arch); | 357 | 631k | } else { | 358 | 23.6k | silk_memset( &samplesOut1_tmp[ n ][ 2 ], 0, nSamplesOutDec * sizeof( opus_int16 ) ); | 359 | 23.6k | } | 360 | 654k | channel_state[ n ].nFramesDecoded++; | 361 | 654k | } | 362 | | | 363 | 391k | if( decControl->nChannelsAPI == 2 && decControl->nChannelsInternal == 2 ) { | 364 | | /* Convert Mid/Side to Left/Right */ | 365 | 101k | silk_stereo_MS_to_LR( &psDec->sStereo, samplesOut1_tmp[ 0 ], samplesOut1_tmp[ 1 ], MS_pred_Q13, channel_state[ 0 ].fs_kHz, nSamplesOutDec ); | 366 | 289k | } else { | 367 | | /* Buffering */ | 368 | 289k | silk_memcpy( samplesOut1_tmp[ 0 ], psDec->sStereo.sMid, 2 * sizeof( opus_int16 ) ); | 369 | 289k | silk_memcpy( psDec->sStereo.sMid, &samplesOut1_tmp[ 0 ][ nSamplesOutDec ], 2 * sizeof( opus_int16 ) ); | 370 | 289k | } | 371 | | | 372 | | /* Number of output samples */ | 373 | 391k | *nSamplesOut = silk_DIV32( nSamplesOutDec * decControl->API_sampleRate, silk_SMULBB( channel_state[ 0 ].fs_kHz, 1000 ) ); | 374 | | | 375 | | /* Set up pointers to temp buffers */ | 376 | 391k | ALLOC( samplesOut2_tmp, *nSamplesOut, opus_int16 ); | 377 | 391k | resample_out_ptr = samplesOut2_tmp; | 378 | | | 379 | 884k | for( n = 0; n < silk_min( decControl->nChannelsAPI, decControl->nChannelsInternal ); n++ ) { | 380 | | | 381 | | /* Resample decoded signal to API_sampleRate */ | 382 | 493k | ret += silk_resampler( &channel_state[ n ].resampler_state, resample_out_ptr, &samplesOut1_tmp[ n ][ 1 ], nSamplesOutDec ); | 383 | | | 384 | | /* Interleave if stereo output and stereo stream */ | 385 | 493k | if( decControl->nChannelsAPI == 2 ) { | 386 | 105M | for( i = 0; i < *nSamplesOut; i++ ) { | 387 | 105M | samplesOut[ n + 2 * i ] = INT16TORES(resample_out_ptr[ i ]); | 388 | 105M | } | 389 | 263k | } else { | 390 | 90.3M | for( i = 0; i < *nSamplesOut; i++ ) { | 391 | 90.1M | samplesOut[ i ] = INT16TORES(resample_out_ptr[ i ]); | 392 | 90.1M | } | 393 | 229k | } | 394 | 493k | } | 395 | | | 396 | | /* Create two channel output from mono stream */ | 397 | 391k | if( decControl->nChannelsAPI == 2 && decControl->nChannelsInternal == 1 ) { | 398 | 60.0k | if ( stereo_to_mono ){ | 399 | | /* Resample right channel for newly collapsed stereo just in case | 400 | | we weren't doing collapsing when switching to mono */ | 401 | 0 | ret += silk_resampler( &channel_state[ 1 ].resampler_state, resample_out_ptr, &samplesOut1_tmp[ 0 ][ 1 ], nSamplesOutDec ); | 402 | |
| 403 | 0 | for( i = 0; i < *nSamplesOut; i++ ) { | 404 | 0 | samplesOut[ 1 + 2 * i ] = INT16TORES(resample_out_ptr[ i ]); | 405 | 0 | } | 406 | 60.0k | } else { | 407 | 22.7M | for( i = 0; i < *nSamplesOut; i++ ) { | 408 | 22.7M | samplesOut[ 1 + 2 * i ] = samplesOut[ 0 + 2 * i ]; | 409 | 22.7M | } | 410 | 60.0k | } | 411 | 60.0k | } | 412 | | | 413 | | /* Export pitch lag, measured at 48 kHz sampling rate */ | 414 | 391k | if( channel_state[ 0 ].prevSignalType == TYPE_VOICED ) { | 415 | 83.0k | int mult_tab[ 3 ] = { 6, 4, 3 }; | 416 | 83.0k | decControl->prevPitchLag = channel_state[ 0 ].lagPrev * mult_tab[ ( channel_state[ 0 ].fs_kHz - 8 ) >> 2 ]; | 417 | 308k | } else { | 418 | 308k | decControl->prevPitchLag = 0; | 419 | 308k | } | 420 | | | 421 | 391k | if( lostFlag == FLAG_PACKET_LOST ) { | 422 | | /* On packet loss, remove the gain clamping to prevent having the energy "bounce back" | 423 | | if we lose packets when the energy is going down */ | 424 | 185k | for ( i = 0; i < psDec->nChannelsInternal; i++ ) | 425 | 119k | psDec->channel_state[ i ].LastGainIndex = 10; | 426 | 325k | } else { | 427 | 325k | psDec->prev_decode_only_middle = decode_only_middle; | 428 | 325k | } | 429 | 391k | RESTORE_STACK; | 430 | 391k | return ret; | 431 | 391k | } |
Line | Count | Source | 145 | 391k | { | 146 | 391k | opus_int i, n, decode_only_middle = 0, ret = SILK_NO_ERROR; | 147 | 391k | opus_int32 nSamplesOutDec, LBRR_symbol; | 148 | 391k | opus_int16 *samplesOut1_tmp[ 2 ]; | 149 | 391k | VARDECL( opus_int16, samplesOut1_tmp_storage1 ); | 150 | 391k | VARDECL( opus_int16, samplesOut2_tmp ); | 151 | 391k | opus_int32 MS_pred_Q13[ 2 ] = { 0 }; | 152 | 391k | opus_int16 *resample_out_ptr; | 153 | 391k | silk_decoder *psDec = ( silk_decoder * )decState; | 154 | 391k | silk_decoder_state *channel_state = psDec->channel_state; | 155 | 391k | opus_int has_side; | 156 | 391k | opus_int stereo_to_mono; | 157 | 391k | SAVE_STACK; | 158 | | | 159 | 391k | celt_assert( decControl->nChannelsInternal == 1 || decControl->nChannelsInternal == 2 ); | 160 | | | 161 | | /**********************************/ | 162 | | /* Test if first frame in payload */ | 163 | | /**********************************/ | 164 | 391k | if( newPacketFlag ) { | 165 | 689k | for( n = 0; n < decControl->nChannelsInternal; n++ ) { | 166 | 427k | channel_state[ n ].nFramesDecoded = 0; /* Used to count frames in packet */ | 167 | 427k | } | 168 | 261k | } | 169 | | | 170 | | /* If Mono -> Stereo transition in bitstream: init state of second channel */ | 171 | 391k | if( decControl->nChannelsInternal > psDec->nChannelsInternal ) { | 172 | 139k | ret += silk_init_decoder( &channel_state[ 1 ] ); | 173 | 139k | } | 174 | | | 175 | 391k | stereo_to_mono = decControl->nChannelsInternal == 1 && psDec->nChannelsInternal == 2 && | 176 | 391k | ( decControl->internalSampleRate == 1000*channel_state[ 0 ].fs_kHz ); | 177 | | | 178 | 391k | if( channel_state[ 0 ].nFramesDecoded == 0 ) { | 179 | 689k | for( n = 0; n < decControl->nChannelsInternal; n++ ) { | 180 | 427k | opus_int fs_kHz_dec; | 181 | 427k | if( decControl->payloadSize_ms == 0 ) { | 182 | | /* Assuming packet loss, use 10 ms */ | 183 | 0 | channel_state[ n ].nFramesPerPacket = 1; | 184 | 0 | channel_state[ n ].nb_subfr = 2; | 185 | 427k | } else if( decControl->payloadSize_ms == 10 ) { | 186 | 224k | channel_state[ n ].nFramesPerPacket = 1; | 187 | 224k | channel_state[ n ].nb_subfr = 2; | 188 | 224k | } else if( decControl->payloadSize_ms == 20 ) { | 189 | 82.8k | channel_state[ n ].nFramesPerPacket = 1; | 190 | 82.8k | channel_state[ n ].nb_subfr = 4; | 191 | 120k | } else if( decControl->payloadSize_ms == 40 ) { | 192 | 15.0k | channel_state[ n ].nFramesPerPacket = 2; | 193 | 15.0k | channel_state[ n ].nb_subfr = 4; | 194 | 105k | } else if( decControl->payloadSize_ms == 60 ) { | 195 | 105k | channel_state[ n ].nFramesPerPacket = 3; | 196 | 105k | channel_state[ n ].nb_subfr = 4; | 197 | 105k | } else { | 198 | 0 | celt_assert( 0 ); | 199 | 0 | RESTORE_STACK; | 200 | 0 | return SILK_DEC_INVALID_FRAME_SIZE; | 201 | 0 | } | 202 | 427k | fs_kHz_dec = ( decControl->internalSampleRate >> 10 ) + 1; | 203 | 427k | if( fs_kHz_dec != 8 && fs_kHz_dec != 12 && fs_kHz_dec != 16 ) { | 204 | 0 | celt_assert( 0 ); | 205 | 0 | RESTORE_STACK; | 206 | 0 | return SILK_DEC_INVALID_SAMPLING_FREQUENCY; | 207 | 0 | } | 208 | 427k | ret += silk_decoder_set_fs( &channel_state[ n ], fs_kHz_dec, decControl->API_sampleRate ); | 209 | 427k | } | 210 | 261k | } | 211 | | | 212 | 391k | if( decControl->nChannelsAPI == 2 && decControl->nChannelsInternal == 2 && ( psDec->nChannelsAPI == 1 || psDec->nChannelsInternal == 1 ) ) { | 213 | 0 | silk_memset( psDec->sStereo.pred_prev_Q13, 0, sizeof( psDec->sStereo.pred_prev_Q13 ) ); | 214 | 0 | silk_memset( psDec->sStereo.sSide, 0, sizeof( psDec->sStereo.sSide ) ); | 215 | 0 | silk_memcpy( &channel_state[ 1 ].resampler_state, &channel_state[ 0 ].resampler_state, sizeof( silk_resampler_state_struct ) ); | 216 | 0 | } | 217 | 391k | psDec->nChannelsAPI = decControl->nChannelsAPI; | 218 | 391k | psDec->nChannelsInternal = decControl->nChannelsInternal; | 219 | | | 220 | 391k | if( decControl->API_sampleRate > (opus_int32)MAX_API_FS_KHZ * 1000 || decControl->API_sampleRate < 8000 ) { | 221 | 0 | ret = SILK_DEC_INVALID_SAMPLING_FREQUENCY; | 222 | 0 | RESTORE_STACK; | 223 | 0 | return( ret ); | 224 | 0 | } | 225 | | | 226 | 391k | if( lostFlag != FLAG_PACKET_LOST && channel_state[ 0 ].nFramesDecoded == 0 ) { | 227 | | /* First decoder call for this payload */ | 228 | | /* Decode VAD flags and LBRR flag */ | 229 | 504k | for( n = 0; n < decControl->nChannelsInternal; n++ ) { | 230 | 843k | for( i = 0; i < channel_state[ n ].nFramesPerPacket; i++ ) { | 231 | 534k | channel_state[ n ].VAD_flags[ i ] = ec_dec_bit_logp(psRangeDec, 1); | 232 | 534k | } | 233 | 308k | channel_state[ n ].LBRR_flag = ec_dec_bit_logp(psRangeDec, 1); | 234 | 308k | } | 235 | | /* Decode LBRR flags */ | 236 | 504k | for( n = 0; n < decControl->nChannelsInternal; n++ ) { | 237 | 308k | silk_memset( channel_state[ n ].LBRR_flags, 0, sizeof( channel_state[ n ].LBRR_flags ) ); | 238 | 308k | if( channel_state[ n ].LBRR_flag ) { | 239 | 128k | if( channel_state[ n ].nFramesPerPacket == 1 ) { | 240 | 52.0k | channel_state[ n ].LBRR_flags[ 0 ] = 1; | 241 | 75.9k | } else { | 242 | 75.9k | LBRR_symbol = ec_dec_icdf( psRangeDec, silk_LBRR_flags_iCDF_ptr[ channel_state[ n ].nFramesPerPacket - 2 ], 8 ) + 1; | 243 | 296k | for( i = 0; i < channel_state[ n ].nFramesPerPacket; i++ ) { | 244 | 220k | channel_state[ n ].LBRR_flags[ i ] = silk_RSHIFT( LBRR_symbol, i ) & 1; | 245 | 220k | } | 246 | 75.9k | } | 247 | 128k | } | 248 | 308k | } | 249 | | | 250 | 195k | if( lostFlag == FLAG_DECODE_NORMAL ) { | 251 | | /* Regular decoding: skip all LBRR data */ | 252 | 414k | for( i = 0; i < channel_state[ 0 ].nFramesPerPacket; i++ ) { | 253 | 667k | for( n = 0; n < decControl->nChannelsInternal; n++ ) { | 254 | 414k | if( channel_state[ n ].LBRR_flags[ i ] ) { | 255 | 141k | opus_int16 pulses[ MAX_FRAME_LENGTH ]; | 256 | 141k | opus_int condCoding; | 257 | | | 258 | 141k | if( decControl->nChannelsInternal == 2 && n == 0 ) { | 259 | 61.3k | silk_stereo_decode_pred( psRangeDec, MS_pred_Q13 ); | 260 | 61.3k | if( channel_state[ 1 ].LBRR_flags[ i ] == 0 ) { | 261 | 30.8k | silk_stereo_decode_mid_only( psRangeDec, &decode_only_middle ); | 262 | 30.8k | } | 263 | 61.3k | } | 264 | | /* Use conditional coding if previous frame available */ | 265 | 141k | if( i > 0 && channel_state[ n ].LBRR_flags[ i - 1 ] ) { | 266 | 43.7k | condCoding = CODE_CONDITIONALLY; | 267 | 97.9k | } else { | 268 | 97.9k | condCoding = CODE_INDEPENDENTLY; | 269 | 97.9k | } | 270 | 141k | silk_decode_indices( &channel_state[ n ], psRangeDec, i, 1, condCoding ); | 271 | 141k | silk_decode_pulses( psRangeDec, pulses, channel_state[ n ].indices.signalType, | 272 | 141k | channel_state[ n ].indices.quantOffsetType, channel_state[ n ].frame_length ); | 273 | 141k | } | 274 | 414k | } | 275 | 252k | } | 276 | 161k | } | 277 | 195k | } | 278 | | | 279 | | /* Get MS predictor index */ | 280 | 391k | if( decControl->nChannelsInternal == 2 ) { | 281 | 263k | if( lostFlag == FLAG_DECODE_NORMAL || | 282 | 263k | ( lostFlag == FLAG_DECODE_LBRR && channel_state[ 0 ].LBRR_flags[ channel_state[ 0 ].nFramesDecoded ] == 1 ) ) | 283 | 181k | { | 284 | 181k | silk_stereo_decode_pred( psRangeDec, MS_pred_Q13 ); | 285 | | /* For LBRR data, decode mid-only flag only if side-channel's LBRR flag is false */ | 286 | 181k | if( ( lostFlag == FLAG_DECODE_NORMAL && channel_state[ 1 ].VAD_flags[ channel_state[ 0 ].nFramesDecoded ] == 0 ) || | 287 | 181k | ( lostFlag == FLAG_DECODE_LBRR && channel_state[ 1 ].LBRR_flags[ channel_state[ 0 ].nFramesDecoded ] == 0 ) ) | 288 | 92.0k | { | 289 | 92.0k | silk_stereo_decode_mid_only( psRangeDec, &decode_only_middle ); | 290 | 92.0k | } else { | 291 | 89.6k | decode_only_middle = 0; | 292 | 89.6k | } | 293 | 181k | } else { | 294 | 244k | for( n = 0; n < 2; n++ ) { | 295 | 163k | MS_pred_Q13[ n ] = psDec->sStereo.pred_prev_Q13[ n ]; | 296 | 163k | } | 297 | 81.6k | } | 298 | 263k | } | 299 | | | 300 | | /* Reset side channel decoder prediction memory for first frame with side coding */ | 301 | 391k | if( decControl->nChannelsInternal == 2 && decode_only_middle == 0 && psDec->prev_decode_only_middle == 1 ) { | 302 | 18.1k | silk_memset( psDec->channel_state[ 1 ].outBuf, 0, sizeof(psDec->channel_state[ 1 ].outBuf) ); | 303 | 18.1k | silk_memset( psDec->channel_state[ 1 ].sLPC_Q14_buf, 0, sizeof(psDec->channel_state[ 1 ].sLPC_Q14_buf) ); | 304 | 18.1k | psDec->channel_state[ 1 ].lagPrev = 100; | 305 | 18.1k | psDec->channel_state[ 1 ].LastGainIndex = 10; | 306 | 18.1k | psDec->channel_state[ 1 ].prevSignalType = TYPE_NO_VOICE_ACTIVITY; | 307 | 18.1k | psDec->channel_state[ 1 ].first_frame_after_reset = 1; | 308 | 18.1k | } | 309 | | | 310 | | /* Check if the temp buffer fits into the output PCM buffer. If it fits, | 311 | | we can delay allocating the temp buffer until after the SILK peak stack | 312 | | usage. We need to use a < and not a <= because of the two extra samples. */ | 313 | 391k | ALLOC( samplesOut1_tmp_storage1, decControl->nChannelsInternal*(channel_state[ 0 ].frame_length + 2 ), | 314 | 391k | opus_int16 ); | 315 | 391k | samplesOut1_tmp[ 0 ] = samplesOut1_tmp_storage1; | 316 | 391k | samplesOut1_tmp[ 1 ] = samplesOut1_tmp_storage1 + channel_state[ 0 ].frame_length + 2; | 317 | | | 318 | 391k | if( lostFlag == FLAG_DECODE_NORMAL ) { | 319 | 252k | has_side = !decode_only_middle; | 320 | 252k | } else { | 321 | 138k | has_side = !psDec->prev_decode_only_middle | 322 | 138k | || (decControl->nChannelsInternal == 2 && lostFlag == FLAG_DECODE_LBRR && channel_state[1].LBRR_flags[ channel_state[1].nFramesDecoded ] == 1 ); | 323 | 138k | } | 324 | 391k | channel_state[ 0 ].sPLC.enable_deep_plc = decControl->enable_deep_plc; | 325 | | /* Call decoder for one frame */ | 326 | 1.04M | for( n = 0; n < decControl->nChannelsInternal; n++ ) { | 327 | 654k | if( n == 0 || has_side ) { | 328 | 631k | opus_int FrameIndex; | 329 | 631k | opus_int condCoding; | 330 | | | 331 | 631k | FrameIndex = channel_state[ 0 ].nFramesDecoded - n; | 332 | | /* Use independent coding if no previous frame available */ | 333 | 631k | if( FrameIndex <= 0 ) { | 334 | 415k | condCoding = CODE_INDEPENDENTLY; | 335 | 415k | } else if( lostFlag == FLAG_DECODE_LBRR ) { | 336 | 58.1k | condCoding = channel_state[ n ].LBRR_flags[ FrameIndex - 1 ] ? CODE_CONDITIONALLY : CODE_INDEPENDENTLY; | 337 | 157k | } else if( n > 0 && psDec->prev_decode_only_middle ) { | 338 | | /* If we skipped a side frame in this packet, we don't | 339 | | need LTP scaling; the LTP state is well-defined. */ | 340 | 4.96k | condCoding = CODE_INDEPENDENTLY_NO_LTP_SCALING; | 341 | 152k | } else { | 342 | 152k | condCoding = CODE_CONDITIONALLY; | 343 | 152k | } | 344 | | #ifdef ENABLE_OSCE | 345 | | if ( channel_state[n].osce.method != decControl->osce_method ) { | 346 | | osce_reset( &channel_state[n].osce, decControl->osce_method ); | 347 | | } | 348 | | #endif | 349 | 631k | ret += silk_decode_frame( &channel_state[ n ], psRangeDec, &samplesOut1_tmp[ n ][ 2 ], &nSamplesOutDec, lostFlag, condCoding, | 350 | | #ifdef ENABLE_DEEP_PLC | 351 | | n == 0 ? lpcnet : NULL, | 352 | | #endif | 353 | | #ifdef ENABLE_OSCE | 354 | | &psDec->osce_model, | 355 | | #endif | 356 | 631k | arch); | 357 | 631k | } else { | 358 | 23.6k | silk_memset( &samplesOut1_tmp[ n ][ 2 ], 0, nSamplesOutDec * sizeof( opus_int16 ) ); | 359 | 23.6k | } | 360 | 654k | channel_state[ n ].nFramesDecoded++; | 361 | 654k | } | 362 | | | 363 | 391k | if( decControl->nChannelsAPI == 2 && decControl->nChannelsInternal == 2 ) { | 364 | | /* Convert Mid/Side to Left/Right */ | 365 | 101k | silk_stereo_MS_to_LR( &psDec->sStereo, samplesOut1_tmp[ 0 ], samplesOut1_tmp[ 1 ], MS_pred_Q13, channel_state[ 0 ].fs_kHz, nSamplesOutDec ); | 366 | 289k | } else { | 367 | | /* Buffering */ | 368 | 289k | silk_memcpy( samplesOut1_tmp[ 0 ], psDec->sStereo.sMid, 2 * sizeof( opus_int16 ) ); | 369 | 289k | silk_memcpy( psDec->sStereo.sMid, &samplesOut1_tmp[ 0 ][ nSamplesOutDec ], 2 * sizeof( opus_int16 ) ); | 370 | 289k | } | 371 | | | 372 | | /* Number of output samples */ | 373 | 391k | *nSamplesOut = silk_DIV32( nSamplesOutDec * decControl->API_sampleRate, silk_SMULBB( channel_state[ 0 ].fs_kHz, 1000 ) ); | 374 | | | 375 | | /* Set up pointers to temp buffers */ | 376 | 391k | ALLOC( samplesOut2_tmp, *nSamplesOut, opus_int16 ); | 377 | 391k | resample_out_ptr = samplesOut2_tmp; | 378 | | | 379 | 884k | for( n = 0; n < silk_min( decControl->nChannelsAPI, decControl->nChannelsInternal ); n++ ) { | 380 | | | 381 | | /* Resample decoded signal to API_sampleRate */ | 382 | 493k | ret += silk_resampler( &channel_state[ n ].resampler_state, resample_out_ptr, &samplesOut1_tmp[ n ][ 1 ], nSamplesOutDec ); | 383 | | | 384 | | /* Interleave if stereo output and stereo stream */ | 385 | 493k | if( decControl->nChannelsAPI == 2 ) { | 386 | 105M | for( i = 0; i < *nSamplesOut; i++ ) { | 387 | 105M | samplesOut[ n + 2 * i ] = INT16TORES(resample_out_ptr[ i ]); | 388 | 105M | } | 389 | 263k | } else { | 390 | 90.3M | for( i = 0; i < *nSamplesOut; i++ ) { | 391 | 90.1M | samplesOut[ i ] = INT16TORES(resample_out_ptr[ i ]); | 392 | 90.1M | } | 393 | 229k | } | 394 | 493k | } | 395 | | | 396 | | /* Create two channel output from mono stream */ | 397 | 391k | if( decControl->nChannelsAPI == 2 && decControl->nChannelsInternal == 1 ) { | 398 | 60.0k | if ( stereo_to_mono ){ | 399 | | /* Resample right channel for newly collapsed stereo just in case | 400 | | we weren't doing collapsing when switching to mono */ | 401 | 0 | ret += silk_resampler( &channel_state[ 1 ].resampler_state, resample_out_ptr, &samplesOut1_tmp[ 0 ][ 1 ], nSamplesOutDec ); | 402 | |
| 403 | 0 | for( i = 0; i < *nSamplesOut; i++ ) { | 404 | 0 | samplesOut[ 1 + 2 * i ] = INT16TORES(resample_out_ptr[ i ]); | 405 | 0 | } | 406 | 60.0k | } else { | 407 | 22.7M | for( i = 0; i < *nSamplesOut; i++ ) { | 408 | 22.7M | samplesOut[ 1 + 2 * i ] = samplesOut[ 0 + 2 * i ]; | 409 | 22.7M | } | 410 | 60.0k | } | 411 | 60.0k | } | 412 | | | 413 | | /* Export pitch lag, measured at 48 kHz sampling rate */ | 414 | 391k | if( channel_state[ 0 ].prevSignalType == TYPE_VOICED ) { | 415 | 83.0k | int mult_tab[ 3 ] = { 6, 4, 3 }; | 416 | 83.0k | decControl->prevPitchLag = channel_state[ 0 ].lagPrev * mult_tab[ ( channel_state[ 0 ].fs_kHz - 8 ) >> 2 ]; | 417 | 308k | } else { | 418 | 308k | decControl->prevPitchLag = 0; | 419 | 308k | } | 420 | | | 421 | 391k | if( lostFlag == FLAG_PACKET_LOST ) { | 422 | | /* On packet loss, remove the gain clamping to prevent having the energy "bounce back" | 423 | | if we lose packets when the energy is going down */ | 424 | 185k | for ( i = 0; i < psDec->nChannelsInternal; i++ ) | 425 | 119k | psDec->channel_state[ i ].LastGainIndex = 10; | 426 | 325k | } else { | 427 | 325k | psDec->prev_decode_only_middle = decode_only_middle; | 428 | 325k | } | 429 | 391k | RESTORE_STACK; | 430 | 391k | return ret; | 431 | 391k | } |
|
432 | | |
433 | | #if 0 |
434 | | /* Getting table of contents for a packet */ |
435 | | opus_int silk_get_TOC( |
436 | | const opus_uint8 *payload, /* I Payload data */ |
437 | | const opus_int nBytesIn, /* I Number of input bytes */ |
438 | | const opus_int nFramesPerPayload, /* I Number of SILK frames per payload */ |
439 | | silk_TOC_struct *Silk_TOC /* O Type of content */ |
440 | | ) |
441 | | { |
442 | | opus_int i, flags, ret = SILK_NO_ERROR; |
443 | | |
444 | | if( nBytesIn < 1 ) { |
445 | | return -1; |
446 | | } |
447 | | if( nFramesPerPayload < 0 || nFramesPerPayload > 3 ) { |
448 | | return -1; |
449 | | } |
450 | | |
451 | | silk_memset( Silk_TOC, 0, sizeof( *Silk_TOC ) ); |
452 | | |
453 | | /* For stereo, extract the flags for the mid channel */ |
454 | | flags = silk_RSHIFT( payload[ 0 ], 7 - nFramesPerPayload ) & ( silk_LSHIFT( 1, nFramesPerPayload + 1 ) - 1 ); |
455 | | |
456 | | Silk_TOC->inbandFECFlag = flags & 1; |
457 | | for( i = nFramesPerPayload - 1; i >= 0 ; i-- ) { |
458 | | flags = silk_RSHIFT( flags, 1 ); |
459 | | Silk_TOC->VADFlags[ i ] = flags & 1; |
460 | | Silk_TOC->VADFlag |= flags & 1; |
461 | | } |
462 | | |
463 | | return ret; |
464 | | } |
465 | | #endif |