Coverage Report

Created: 2026-06-07 08:05

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/opus/silk/x86/VAD_sse4_1.c
Line
Count
Source
1
/* Copyright (c) 2014-2020, Cisco Systems, INC
2
   Written by XiangMingZhu WeiZhou MinPeng YanWang FrancisQuiers
3
4
   Redistribution and use in source and binary forms, with or without
5
   modification, are permitted provided that the following conditions
6
   are met:
7
8
   - Redistributions of source code must retain the above copyright
9
   notice, this list of conditions and the following disclaimer.
10
11
   - Redistributions in binary form must reproduce the above copyright
12
   notice, this list of conditions and the following disclaimer in the
13
   documentation and/or other materials provided with the distribution.
14
15
   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
16
   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
17
   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
18
   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
19
   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
20
   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
21
   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
22
   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
23
   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
24
   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25
   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26
*/
27
28
#ifdef HAVE_CONFIG_H
29
#include "config.h"
30
#endif
31
32
#include <xmmintrin.h>
33
#include <emmintrin.h>
34
#include <smmintrin.h>
35
36
#include "main.h"
37
#include "stack_alloc.h"
38
39
/* Weighting factors for tilt measure */
40
static const opus_int32 tiltWeights[ VAD_N_BANDS ] = { 30000, 6000, -12000, -12000 };
41
42
/***************************************/
43
/* Get the speech activity level in Q8 */
44
/***************************************/
45
opus_int silk_VAD_GetSA_Q8_sse4_1(                  /* O    Return value, 0 if success                  */
46
    silk_encoder_state          *psEncC,            /* I/O  Encoder state                               */
47
    const opus_int16            pIn[]               /* I    PCM input                                   */
48
)
49
466k
{
50
466k
    opus_int   SA_Q15, pSNR_dB_Q7, input_tilt;
51
466k
    opus_int   decimated_framelength1, decimated_framelength2;
52
466k
    opus_int   decimated_framelength;
53
466k
    opus_int   dec_subframe_length, dec_subframe_offset, SNR_Q7, i, b, s;
54
466k
    opus_int32 sumSquared, smooth_coef_Q16;
55
466k
    opus_int16 HPstateTmp;
56
466k
    VARDECL( opus_int16, X );
57
466k
    opus_int32 Xnrg[ VAD_N_BANDS ];
58
466k
    opus_int32 NrgToNoiseRatio_Q8[ VAD_N_BANDS ];
59
466k
    opus_int32 speech_nrg, x_tmp;
60
466k
    opus_int   X_offset[ VAD_N_BANDS ];
61
466k
    opus_int   ret = 0;
62
466k
    silk_VAD_state *psSilk_VAD = &psEncC->sVAD;
63
64
466k
    SAVE_STACK;
65
66
#ifdef OPUS_CHECK_ASM
67
    silk_encoder_state psEncC_c;
68
233k
    opus_int ret_c;
69
70
233k
    silk_memcpy( &psEncC_c, psEncC, sizeof( psEncC_c ) );
71
    ret_c = silk_VAD_GetSA_Q8_c( &psEncC_c, pIn );
72
#endif
73
74
    /* Safety checks */
75
466k
    silk_assert( VAD_N_BANDS == 4 );
76
466k
    celt_assert( MAX_FRAME_LENGTH >= psEncC->frame_length );
77
466k
    celt_assert( psEncC->frame_length <= 512 );
78
466k
    celt_assert( psEncC->frame_length == 8 * silk_RSHIFT( psEncC->frame_length, 3 ) );
79
80
    /***********************/
81
    /* Filter and Decimate */
82
    /***********************/
83
466k
    decimated_framelength1 = silk_RSHIFT( psEncC->frame_length, 1 );
84
466k
    decimated_framelength2 = silk_RSHIFT( psEncC->frame_length, 2 );
85
466k
    decimated_framelength = silk_RSHIFT( psEncC->frame_length, 3 );
86
    /* Decimate into 4 bands:
87
       0       L      3L       L              3L                             5L
88
               -      --       -              --                             --
89
               8       8       2               4                              4
90
91
       [0-1 kHz| temp. |1-2 kHz|    2-4 kHz    |            4-8 kHz           |
92
93
       They're arranged to allow the minimal ( frame_length / 4 ) extra
94
       scratch space during the downsampling process */
95
466k
    X_offset[ 0 ] = 0;
96
466k
    X_offset[ 1 ] = decimated_framelength + decimated_framelength2;
97
466k
    X_offset[ 2 ] = X_offset[ 1 ] + decimated_framelength;
98
466k
    X_offset[ 3 ] = X_offset[ 2 ] + decimated_framelength2;
99
466k
    ALLOC( X, X_offset[ 3 ] + decimated_framelength1, opus_int16 );
100
101
    /* 0-8 kHz to 0-4 kHz and 4-8 kHz */
102
466k
    silk_ana_filt_bank_1( pIn, &psSilk_VAD->AnaState[  0 ],
103
466k
        X, &X[ X_offset[ 3 ] ], psEncC->frame_length );
104
105
    /* 0-4 kHz to 0-2 kHz and 2-4 kHz */
106
466k
    silk_ana_filt_bank_1( X, &psSilk_VAD->AnaState1[ 0 ],
107
466k
        X, &X[ X_offset[ 2 ] ], decimated_framelength1 );
108
109
    /* 0-2 kHz to 0-1 kHz and 1-2 kHz */
110
466k
    silk_ana_filt_bank_1( X, &psSilk_VAD->AnaState2[ 0 ],
111
466k
        X, &X[ X_offset[ 1 ] ], decimated_framelength2 );
112
113
    /*********************************************/
114
    /* HP filter on lowest band (differentiator) */
115
    /*********************************************/
116
466k
    X[ decimated_framelength - 1 ] = silk_RSHIFT( X[ decimated_framelength - 1 ], 1 );
117
466k
    HPstateTmp = X[ decimated_framelength - 1 ];
118
10.0M
    for( i = decimated_framelength - 1; i > 0; i-- ) {
119
9.57M
        X[ i - 1 ]  = silk_RSHIFT( X[ i - 1 ], 1 );
120
9.57M
        X[ i ]     -= X[ i - 1 ];
121
9.57M
    }
122
466k
    X[ 0 ] -= psSilk_VAD->HPstate;
123
466k
    psSilk_VAD->HPstate = HPstateTmp;
124
125
    /*************************************/
126
    /* Calculate the energy in each band */
127
    /*************************************/
128
2.33M
    for( b = 0; b < VAD_N_BANDS; b++ ) {
129
        /* Find the decimated framelength in the non-uniformly divided bands */
130
1.86M
        decimated_framelength = silk_RSHIFT( psEncC->frame_length, silk_min_int( VAD_N_BANDS - b, VAD_N_BANDS - 1 ) );
131
132
        /* Split length into subframe lengths */
133
1.86M
        dec_subframe_length = silk_RSHIFT( decimated_framelength, VAD_INTERNAL_SUBFRAMES_LOG2 );
134
1.86M
        dec_subframe_offset = 0;
135
136
        /* Compute energy per sub-frame */
137
        /* initialize with summed energy of last subframe */
138
1.86M
        Xnrg[ b ] = psSilk_VAD->XnrgSubfr[ b ];
139
9.33M
        for( s = 0; s < VAD_INTERNAL_SUBFRAMES; s++ ) {
140
7.46M
            __m128i xmm_X, xmm_acc;
141
7.46M
            sumSquared = 0;
142
143
7.46M
            xmm_acc = _mm_setzero_si128();
144
145
13.9M
            for( i = 0; i < dec_subframe_length - 7; i += 8 )
146
6.52M
            {
147
6.52M
                xmm_X   = _mm_loadu_si128( (__m128i *)(void*)&(X[ X_offset[ b ] + i + dec_subframe_offset ] ) );
148
6.52M
                xmm_X   = _mm_srai_epi16( xmm_X, 3 );
149
6.52M
                xmm_X   = _mm_madd_epi16( xmm_X, xmm_X );
150
6.52M
                xmm_acc = _mm_add_epi32( xmm_acc, xmm_X );
151
6.52M
            }
152
153
7.46M
            xmm_acc = _mm_add_epi32( xmm_acc, _mm_unpackhi_epi64( xmm_acc, xmm_acc ) );
154
7.46M
            xmm_acc = _mm_add_epi32( xmm_acc, _mm_shufflelo_epi16( xmm_acc, 0x0E ) );
155
156
7.46M
            sumSquared += _mm_cvtsi128_si32( xmm_acc );
157
158
34.9M
            for( ; i < dec_subframe_length; i++ ) {
159
                /* The energy will be less than dec_subframe_length * ( silk_int16_MIN / 8 ) ^ 2.            */
160
                /* Therefore we can accumulate with no risk of overflow (unless dec_subframe_length > 128)  */
161
27.5M
                x_tmp = silk_RSHIFT(
162
27.5M
                    X[ X_offset[ b ] + i + dec_subframe_offset ], 3 );
163
27.5M
                sumSquared = silk_SMLABB( sumSquared, x_tmp, x_tmp );
164
165
                /* Safety check */
166
27.5M
                silk_assert( sumSquared >= 0 );
167
27.5M
            }
168
169
            /* Add/saturate summed energy of current subframe */
170
7.46M
            if( s < VAD_INTERNAL_SUBFRAMES - 1 ) {
171
5.60M
                Xnrg[ b ] = silk_ADD_POS_SAT32( Xnrg[ b ], sumSquared );
172
5.60M
            } else {
173
                /* Look-ahead subframe */
174
1.86M
                Xnrg[ b ] = silk_ADD_POS_SAT32( Xnrg[ b ], silk_RSHIFT( sumSquared, 1 ) );
175
1.86M
            }
176
177
7.46M
            dec_subframe_offset += dec_subframe_length;
178
7.46M
        }
179
1.86M
        psSilk_VAD->XnrgSubfr[ b ] = sumSquared;
180
1.86M
    }
181
182
    /********************/
183
    /* Noise estimation */
184
    /********************/
185
466k
    silk_VAD_GetNoiseLevels( &Xnrg[ 0 ], psSilk_VAD );
186
187
    /***********************************************/
188
    /* Signal-plus-noise to noise ratio estimation */
189
    /***********************************************/
190
232k
    sumSquared = 0;
191
232k
    input_tilt = 0;
192
2.33M
    for( b = 0; b < VAD_N_BANDS; b++ ) {
193
1.86M
        speech_nrg = Xnrg[ b ] - psSilk_VAD->NL[ b ];
194
1.86M
        if( speech_nrg > 0 ) {
195
            /* Divide, with sufficient resolution */
196
1.35M
            if( ( Xnrg[ b ] & 0xFF800000 ) == 0 ) {
197
1.05M
                NrgToNoiseRatio_Q8[ b ] = silk_DIV32( silk_LSHIFT( Xnrg[ b ], 8 ), psSilk_VAD->NL[ b ] + 1 );
198
1.05M
            } else {
199
296k
                NrgToNoiseRatio_Q8[ b ] = silk_DIV32( Xnrg[ b ], silk_RSHIFT( psSilk_VAD->NL[ b ], 8 ) + 1 );
200
296k
            }
201
202
            /* Convert to log domain */
203
1.35M
            SNR_Q7 = silk_lin2log( NrgToNoiseRatio_Q8[ b ] ) - 8 * 128;
204
205
            /* Sum-of-squares */
206
1.35M
            sumSquared = silk_SMLABB( sumSquared, SNR_Q7, SNR_Q7 );          /* Q14 */
207
208
            /* Tilt measure */
209
1.35M
            if( speech_nrg < ( (opus_int32)1 << 20 ) ) {
210
                /* Scale down SNR value for small subband speech energies */
211
667k
                SNR_Q7 = silk_SMULWB( silk_LSHIFT( silk_SQRT_APPROX( speech_nrg ), 6 ), SNR_Q7 );
212
667k
            }
213
1.35M
            input_tilt = silk_SMLAWB( input_tilt, tiltWeights[ b ], SNR_Q7 );
214
1.35M
        } else {
215
512k
            NrgToNoiseRatio_Q8[ b ] = 256;
216
512k
        }
217
1.86M
    }
218
219
    /* Mean-of-squares */
220
466k
    sumSquared = silk_DIV32_16( sumSquared, VAD_N_BANDS ); /* Q14 */
221
222
    /* Root-mean-square approximation, scale to dBs, and write to output pointer */
223
232k
    pSNR_dB_Q7 = (opus_int16)( 3 * silk_SQRT_APPROX( sumSquared ) ); /* Q7 */
224
225
    /*********************************/
226
    /* Speech Probability Estimation */
227
    /*********************************/
228
466k
    SA_Q15 = silk_sigm_Q15( silk_SMULWB( VAD_SNR_FACTOR_Q16, pSNR_dB_Q7 ) - VAD_NEGATIVE_OFFSET_Q5 );
229
230
    /**************************/
231
    /* Frequency Tilt Measure */
232
    /**************************/
233
466k
    psEncC->input_tilt_Q15 = silk_LSHIFT( silk_sigm_Q15( input_tilt ) - 16384, 1 );
234
235
    /**************************************************/
236
    /* Scale the sigmoid output based on power levels */
237
    /**************************************************/
238
232k
    speech_nrg = 0;
239
2.33M
    for( b = 0; b < VAD_N_BANDS; b++ ) {
240
        /* Accumulate signal-without-noise energies, higher frequency bands have more weight */
241
1.86M
        speech_nrg += ( b + 1 ) * silk_RSHIFT( Xnrg[ b ] - psSilk_VAD->NL[ b ], 4 );
242
1.86M
    }
243
244
466k
    if( psEncC->frame_length == 20 * psEncC->fs_kHz ) {
245
350k
        speech_nrg = silk_RSHIFT32( speech_nrg, 1 );
246
350k
    }
247
    /* Power scaling */
248
466k
    if( speech_nrg <= 0 ) {
249
96.3k
        SA_Q15 = silk_RSHIFT( SA_Q15, 1 );
250
370k
    } else if( speech_nrg < 16384 ) {
251
46.3k
        speech_nrg = silk_LSHIFT32( speech_nrg, 16 );
252
253
        /* square-root */
254
46.3k
        speech_nrg = silk_SQRT_APPROX( speech_nrg );
255
46.3k
        SA_Q15 = silk_SMULWB( 32768 + speech_nrg, SA_Q15 );
256
46.3k
    }
257
258
    /* Copy the resulting speech activity in Q8 */
259
466k
    psEncC->speech_activity_Q8 = silk_min_int( silk_RSHIFT( SA_Q15, 7 ), silk_uint8_MAX );
260
261
    /***********************************/
262
    /* Energy Level and SNR estimation */
263
    /***********************************/
264
    /* Smoothing coefficient */
265
466k
    smooth_coef_Q16 = silk_SMULWB( VAD_SNR_SMOOTH_COEF_Q18, silk_SMULWB( (opus_int32)SA_Q15, SA_Q15 ) );
266
267
466k
    if( psEncC->frame_length == 10 * psEncC->fs_kHz ) {
268
116k
        smooth_coef_Q16 >>= 1;
269
116k
    }
270
271
2.33M
    for( b = 0; b < VAD_N_BANDS; b++ ) {
272
        /* compute smoothed energy-to-noise ratio per band */
273
1.86M
        psSilk_VAD->NrgRatioSmth_Q8[ b ] = silk_SMLAWB( psSilk_VAD->NrgRatioSmth_Q8[ b ],
274
1.86M
            NrgToNoiseRatio_Q8[ b ] - psSilk_VAD->NrgRatioSmth_Q8[ b ], smooth_coef_Q16 );
275
276
        /* signal to noise ratio in dB per band */
277
1.86M
        SNR_Q7 = 3 * ( silk_lin2log( psSilk_VAD->NrgRatioSmth_Q8[b] ) - 8 * 128 );
278
        /* quality = sigmoid( 0.25 * ( SNR_dB - 16 ) ); */
279
1.86M
        psEncC->input_quality_bands_Q15[ b ] = silk_sigm_Q15( silk_RSHIFT( SNR_Q7 - 16 * 128, 4 ) );
280
1.86M
    }
281
282
#ifdef OPUS_CHECK_ASM
283
233k
    silk_assert( ret == ret_c );
284
233k
    silk_assert( !memcmp( &psEncC_c, psEncC, sizeof( psEncC_c ) ) );
285
233k
#endif
286
287
233k
    RESTORE_STACK;
288
233k
    return( ret );
289
233k
}
silk_VAD_GetSA_Q8_sse4_1
Line
Count
Source
49
233k
{
50
233k
    opus_int   SA_Q15, pSNR_dB_Q7, input_tilt;
51
233k
    opus_int   decimated_framelength1, decimated_framelength2;
52
233k
    opus_int   decimated_framelength;
53
233k
    opus_int   dec_subframe_length, dec_subframe_offset, SNR_Q7, i, b, s;
54
233k
    opus_int32 sumSquared, smooth_coef_Q16;
55
233k
    opus_int16 HPstateTmp;
56
233k
    VARDECL( opus_int16, X );
57
233k
    opus_int32 Xnrg[ VAD_N_BANDS ];
58
233k
    opus_int32 NrgToNoiseRatio_Q8[ VAD_N_BANDS ];
59
233k
    opus_int32 speech_nrg, x_tmp;
60
233k
    opus_int   X_offset[ VAD_N_BANDS ];
61
233k
    opus_int   ret = 0;
62
233k
    silk_VAD_state *psSilk_VAD = &psEncC->sVAD;
63
64
233k
    SAVE_STACK;
65
66
233k
#ifdef OPUS_CHECK_ASM
67
233k
    silk_encoder_state psEncC_c;
68
233k
    opus_int ret_c;
69
70
233k
    silk_memcpy( &psEncC_c, psEncC, sizeof( psEncC_c ) );
71
233k
    ret_c = silk_VAD_GetSA_Q8_c( &psEncC_c, pIn );
72
233k
#endif
73
74
    /* Safety checks */
75
233k
    silk_assert( VAD_N_BANDS == 4 );
76
233k
    celt_assert( MAX_FRAME_LENGTH >= psEncC->frame_length );
77
233k
    celt_assert( psEncC->frame_length <= 512 );
78
233k
    celt_assert( psEncC->frame_length == 8 * silk_RSHIFT( psEncC->frame_length, 3 ) );
79
80
    /***********************/
81
    /* Filter and Decimate */
82
    /***********************/
83
233k
    decimated_framelength1 = silk_RSHIFT( psEncC->frame_length, 1 );
84
233k
    decimated_framelength2 = silk_RSHIFT( psEncC->frame_length, 2 );
85
233k
    decimated_framelength = silk_RSHIFT( psEncC->frame_length, 3 );
86
    /* Decimate into 4 bands:
87
       0       L      3L       L              3L                             5L
88
               -      --       -              --                             --
89
               8       8       2               4                              4
90
91
       [0-1 kHz| temp. |1-2 kHz|    2-4 kHz    |            4-8 kHz           |
92
93
       They're arranged to allow the minimal ( frame_length / 4 ) extra
94
       scratch space during the downsampling process */
95
233k
    X_offset[ 0 ] = 0;
96
233k
    X_offset[ 1 ] = decimated_framelength + decimated_framelength2;
97
233k
    X_offset[ 2 ] = X_offset[ 1 ] + decimated_framelength;
98
233k
    X_offset[ 3 ] = X_offset[ 2 ] + decimated_framelength2;
99
233k
    ALLOC( X, X_offset[ 3 ] + decimated_framelength1, opus_int16 );
100
101
    /* 0-8 kHz to 0-4 kHz and 4-8 kHz */
102
233k
    silk_ana_filt_bank_1( pIn, &psSilk_VAD->AnaState[  0 ],
103
233k
        X, &X[ X_offset[ 3 ] ], psEncC->frame_length );
104
105
    /* 0-4 kHz to 0-2 kHz and 2-4 kHz */
106
233k
    silk_ana_filt_bank_1( X, &psSilk_VAD->AnaState1[ 0 ],
107
233k
        X, &X[ X_offset[ 2 ] ], decimated_framelength1 );
108
109
    /* 0-2 kHz to 0-1 kHz and 1-2 kHz */
110
233k
    silk_ana_filt_bank_1( X, &psSilk_VAD->AnaState2[ 0 ],
111
233k
        X, &X[ X_offset[ 1 ] ], decimated_framelength2 );
112
113
    /*********************************************/
114
    /* HP filter on lowest band (differentiator) */
115
    /*********************************************/
116
233k
    X[ decimated_framelength - 1 ] = silk_RSHIFT( X[ decimated_framelength - 1 ], 1 );
117
233k
    HPstateTmp = X[ decimated_framelength - 1 ];
118
4.93M
    for( i = decimated_framelength - 1; i > 0; i-- ) {
119
4.70M
        X[ i - 1 ]  = silk_RSHIFT( X[ i - 1 ], 1 );
120
4.70M
        X[ i ]     -= X[ i - 1 ];
121
4.70M
    }
122
233k
    X[ 0 ] -= psSilk_VAD->HPstate;
123
233k
    psSilk_VAD->HPstate = HPstateTmp;
124
125
    /*************************************/
126
    /* Calculate the energy in each band */
127
    /*************************************/
128
1.16M
    for( b = 0; b < VAD_N_BANDS; b++ ) {
129
        /* Find the decimated framelength in the non-uniformly divided bands */
130
935k
        decimated_framelength = silk_RSHIFT( psEncC->frame_length, silk_min_int( VAD_N_BANDS - b, VAD_N_BANDS - 1 ) );
131
132
        /* Split length into subframe lengths */
133
935k
        dec_subframe_length = silk_RSHIFT( decimated_framelength, VAD_INTERNAL_SUBFRAMES_LOG2 );
134
935k
        dec_subframe_offset = 0;
135
136
        /* Compute energy per sub-frame */
137
        /* initialize with summed energy of last subframe */
138
935k
        Xnrg[ b ] = psSilk_VAD->XnrgSubfr[ b ];
139
4.67M
        for( s = 0; s < VAD_INTERNAL_SUBFRAMES; s++ ) {
140
3.74M
            __m128i xmm_X, xmm_acc;
141
3.74M
            sumSquared = 0;
142
143
3.74M
            xmm_acc = _mm_setzero_si128();
144
145
6.92M
            for( i = 0; i < dec_subframe_length - 7; i += 8 )
146
3.18M
            {
147
3.18M
                xmm_X   = _mm_loadu_si128( (__m128i *)(void*)&(X[ X_offset[ b ] + i + dec_subframe_offset ] ) );
148
3.18M
                xmm_X   = _mm_srai_epi16( xmm_X, 3 );
149
3.18M
                xmm_X   = _mm_madd_epi16( xmm_X, xmm_X );
150
3.18M
                xmm_acc = _mm_add_epi32( xmm_acc, xmm_X );
151
3.18M
            }
152
153
3.74M
            xmm_acc = _mm_add_epi32( xmm_acc, _mm_unpackhi_epi64( xmm_acc, xmm_acc ) );
154
3.74M
            xmm_acc = _mm_add_epi32( xmm_acc, _mm_shufflelo_epi16( xmm_acc, 0x0E ) );
155
156
3.74M
            sumSquared += _mm_cvtsi128_si32( xmm_acc );
157
158
17.4M
            for( ; i < dec_subframe_length; i++ ) {
159
                /* The energy will be less than dec_subframe_length * ( silk_int16_MIN / 8 ) ^ 2.            */
160
                /* Therefore we can accumulate with no risk of overflow (unless dec_subframe_length > 128)  */
161
13.7M
                x_tmp = silk_RSHIFT(
162
13.7M
                    X[ X_offset[ b ] + i + dec_subframe_offset ], 3 );
163
13.7M
                sumSquared = silk_SMLABB( sumSquared, x_tmp, x_tmp );
164
165
                /* Safety check */
166
13.7M
                silk_assert( sumSquared >= 0 );
167
13.7M
            }
168
169
            /* Add/saturate summed energy of current subframe */
170
3.74M
            if( s < VAD_INTERNAL_SUBFRAMES - 1 ) {
171
2.80M
                Xnrg[ b ] = silk_ADD_POS_SAT32( Xnrg[ b ], sumSquared );
172
2.80M
            } else {
173
                /* Look-ahead subframe */
174
935k
                Xnrg[ b ] = silk_ADD_POS_SAT32( Xnrg[ b ], silk_RSHIFT( sumSquared, 1 ) );
175
935k
            }
176
177
3.74M
            dec_subframe_offset += dec_subframe_length;
178
3.74M
        }
179
935k
        psSilk_VAD->XnrgSubfr[ b ] = sumSquared;
180
935k
    }
181
182
    /********************/
183
    /* Noise estimation */
184
    /********************/
185
233k
    silk_VAD_GetNoiseLevels( &Xnrg[ 0 ], psSilk_VAD );
186
187
    /***********************************************/
188
    /* Signal-plus-noise to noise ratio estimation */
189
    /***********************************************/
190
233k
    sumSquared = 0;
191
233k
    input_tilt = 0;
192
1.16M
    for( b = 0; b < VAD_N_BANDS; b++ ) {
193
935k
        speech_nrg = Xnrg[ b ] - psSilk_VAD->NL[ b ];
194
935k
        if( speech_nrg > 0 ) {
195
            /* Divide, with sufficient resolution */
196
664k
            if( ( Xnrg[ b ] & 0xFF800000 ) == 0 ) {
197
518k
                NrgToNoiseRatio_Q8[ b ] = silk_DIV32( silk_LSHIFT( Xnrg[ b ], 8 ), psSilk_VAD->NL[ b ] + 1 );
198
518k
            } else {
199
145k
                NrgToNoiseRatio_Q8[ b ] = silk_DIV32( Xnrg[ b ], silk_RSHIFT( psSilk_VAD->NL[ b ], 8 ) + 1 );
200
145k
            }
201
202
            /* Convert to log domain */
203
664k
            SNR_Q7 = silk_lin2log( NrgToNoiseRatio_Q8[ b ] ) - 8 * 128;
204
205
            /* Sum-of-squares */
206
664k
            sumSquared = silk_SMLABB( sumSquared, SNR_Q7, SNR_Q7 );          /* Q14 */
207
208
            /* Tilt measure */
209
664k
            if( speech_nrg < ( (opus_int32)1 << 20 ) ) {
210
                /* Scale down SNR value for small subband speech energies */
211
326k
                SNR_Q7 = silk_SMULWB( silk_LSHIFT( silk_SQRT_APPROX( speech_nrg ), 6 ), SNR_Q7 );
212
326k
            }
213
664k
            input_tilt = silk_SMLAWB( input_tilt, tiltWeights[ b ], SNR_Q7 );
214
664k
        } else {
215
270k
            NrgToNoiseRatio_Q8[ b ] = 256;
216
270k
        }
217
935k
    }
218
219
    /* Mean-of-squares */
220
233k
    sumSquared = silk_DIV32_16( sumSquared, VAD_N_BANDS ); /* Q14 */
221
222
    /* Root-mean-square approximation, scale to dBs, and write to output pointer */
223
233k
    pSNR_dB_Q7 = (opus_int16)( 3 * silk_SQRT_APPROX( sumSquared ) ); /* Q7 */
224
225
    /*********************************/
226
    /* Speech Probability Estimation */
227
    /*********************************/
228
233k
    SA_Q15 = silk_sigm_Q15( silk_SMULWB( VAD_SNR_FACTOR_Q16, pSNR_dB_Q7 ) - VAD_NEGATIVE_OFFSET_Q5 );
229
230
    /**************************/
231
    /* Frequency Tilt Measure */
232
    /**************************/
233
233k
    psEncC->input_tilt_Q15 = silk_LSHIFT( silk_sigm_Q15( input_tilt ) - 16384, 1 );
234
235
    /**************************************************/
236
    /* Scale the sigmoid output based on power levels */
237
    /**************************************************/
238
233k
    speech_nrg = 0;
239
1.16M
    for( b = 0; b < VAD_N_BANDS; b++ ) {
240
        /* Accumulate signal-without-noise energies, higher frequency bands have more weight */
241
935k
        speech_nrg += ( b + 1 ) * silk_RSHIFT( Xnrg[ b ] - psSilk_VAD->NL[ b ], 4 );
242
935k
    }
243
244
233k
    if( psEncC->frame_length == 20 * psEncC->fs_kHz ) {
245
173k
        speech_nrg = silk_RSHIFT32( speech_nrg, 1 );
246
173k
    }
247
    /* Power scaling */
248
233k
    if( speech_nrg <= 0 ) {
249
51.8k
        SA_Q15 = silk_RSHIFT( SA_Q15, 1 );
250
181k
    } else if( speech_nrg < 16384 ) {
251
22.4k
        speech_nrg = silk_LSHIFT32( speech_nrg, 16 );
252
253
        /* square-root */
254
22.4k
        speech_nrg = silk_SQRT_APPROX( speech_nrg );
255
22.4k
        SA_Q15 = silk_SMULWB( 32768 + speech_nrg, SA_Q15 );
256
22.4k
    }
257
258
    /* Copy the resulting speech activity in Q8 */
259
233k
    psEncC->speech_activity_Q8 = silk_min_int( silk_RSHIFT( SA_Q15, 7 ), silk_uint8_MAX );
260
261
    /***********************************/
262
    /* Energy Level and SNR estimation */
263
    /***********************************/
264
    /* Smoothing coefficient */
265
233k
    smooth_coef_Q16 = silk_SMULWB( VAD_SNR_SMOOTH_COEF_Q18, silk_SMULWB( (opus_int32)SA_Q15, SA_Q15 ) );
266
267
233k
    if( psEncC->frame_length == 10 * psEncC->fs_kHz ) {
268
59.9k
        smooth_coef_Q16 >>= 1;
269
59.9k
    }
270
271
1.16M
    for( b = 0; b < VAD_N_BANDS; b++ ) {
272
        /* compute smoothed energy-to-noise ratio per band */
273
935k
        psSilk_VAD->NrgRatioSmth_Q8[ b ] = silk_SMLAWB( psSilk_VAD->NrgRatioSmth_Q8[ b ],
274
935k
            NrgToNoiseRatio_Q8[ b ] - psSilk_VAD->NrgRatioSmth_Q8[ b ], smooth_coef_Q16 );
275
276
        /* signal to noise ratio in dB per band */
277
935k
        SNR_Q7 = 3 * ( silk_lin2log( psSilk_VAD->NrgRatioSmth_Q8[b] ) - 8 * 128 );
278
        /* quality = sigmoid( 0.25 * ( SNR_dB - 16 ) ); */
279
935k
        psEncC->input_quality_bands_Q15[ b ] = silk_sigm_Q15( silk_RSHIFT( SNR_Q7 - 16 * 128, 4 ) );
280
935k
    }
281
282
233k
#ifdef OPUS_CHECK_ASM
283
233k
    silk_assert( ret == ret_c );
284
233k
    silk_assert( !memcmp( &psEncC_c, psEncC, sizeof( psEncC_c ) ) );
285
233k
#endif
286
287
233k
    RESTORE_STACK;
288
233k
    return( ret );
289
233k
}
silk_VAD_GetSA_Q8_sse4_1
Line
Count
Source
49
232k
{
50
232k
    opus_int   SA_Q15, pSNR_dB_Q7, input_tilt;
51
232k
    opus_int   decimated_framelength1, decimated_framelength2;
52
232k
    opus_int   decimated_framelength;
53
232k
    opus_int   dec_subframe_length, dec_subframe_offset, SNR_Q7, i, b, s;
54
232k
    opus_int32 sumSquared, smooth_coef_Q16;
55
232k
    opus_int16 HPstateTmp;
56
232k
    VARDECL( opus_int16, X );
57
232k
    opus_int32 Xnrg[ VAD_N_BANDS ];
58
232k
    opus_int32 NrgToNoiseRatio_Q8[ VAD_N_BANDS ];
59
232k
    opus_int32 speech_nrg, x_tmp;
60
232k
    opus_int   X_offset[ VAD_N_BANDS ];
61
232k
    opus_int   ret = 0;
62
232k
    silk_VAD_state *psSilk_VAD = &psEncC->sVAD;
63
64
232k
    SAVE_STACK;
65
66
#ifdef OPUS_CHECK_ASM
67
    silk_encoder_state psEncC_c;
68
    opus_int ret_c;
69
70
    silk_memcpy( &psEncC_c, psEncC, sizeof( psEncC_c ) );
71
    ret_c = silk_VAD_GetSA_Q8_c( &psEncC_c, pIn );
72
#endif
73
74
    /* Safety checks */
75
232k
    silk_assert( VAD_N_BANDS == 4 );
76
232k
    celt_assert( MAX_FRAME_LENGTH >= psEncC->frame_length );
77
232k
    celt_assert( psEncC->frame_length <= 512 );
78
232k
    celt_assert( psEncC->frame_length == 8 * silk_RSHIFT( psEncC->frame_length, 3 ) );
79
80
    /***********************/
81
    /* Filter and Decimate */
82
    /***********************/
83
232k
    decimated_framelength1 = silk_RSHIFT( psEncC->frame_length, 1 );
84
232k
    decimated_framelength2 = silk_RSHIFT( psEncC->frame_length, 2 );
85
232k
    decimated_framelength = silk_RSHIFT( psEncC->frame_length, 3 );
86
    /* Decimate into 4 bands:
87
       0       L      3L       L              3L                             5L
88
               -      --       -              --                             --
89
               8       8       2               4                              4
90
91
       [0-1 kHz| temp. |1-2 kHz|    2-4 kHz    |            4-8 kHz           |
92
93
       They're arranged to allow the minimal ( frame_length / 4 ) extra
94
       scratch space during the downsampling process */
95
232k
    X_offset[ 0 ] = 0;
96
232k
    X_offset[ 1 ] = decimated_framelength + decimated_framelength2;
97
232k
    X_offset[ 2 ] = X_offset[ 1 ] + decimated_framelength;
98
232k
    X_offset[ 3 ] = X_offset[ 2 ] + decimated_framelength2;
99
232k
    ALLOC( X, X_offset[ 3 ] + decimated_framelength1, opus_int16 );
100
101
    /* 0-8 kHz to 0-4 kHz and 4-8 kHz */
102
232k
    silk_ana_filt_bank_1( pIn, &psSilk_VAD->AnaState[  0 ],
103
232k
        X, &X[ X_offset[ 3 ] ], psEncC->frame_length );
104
105
    /* 0-4 kHz to 0-2 kHz and 2-4 kHz */
106
232k
    silk_ana_filt_bank_1( X, &psSilk_VAD->AnaState1[ 0 ],
107
232k
        X, &X[ X_offset[ 2 ] ], decimated_framelength1 );
108
109
    /* 0-2 kHz to 0-1 kHz and 1-2 kHz */
110
232k
    silk_ana_filt_bank_1( X, &psSilk_VAD->AnaState2[ 0 ],
111
232k
        X, &X[ X_offset[ 1 ] ], decimated_framelength2 );
112
113
    /*********************************************/
114
    /* HP filter on lowest band (differentiator) */
115
    /*********************************************/
116
232k
    X[ decimated_framelength - 1 ] = silk_RSHIFT( X[ decimated_framelength - 1 ], 1 );
117
232k
    HPstateTmp = X[ decimated_framelength - 1 ];
118
5.09M
    for( i = decimated_framelength - 1; i > 0; i-- ) {
119
4.86M
        X[ i - 1 ]  = silk_RSHIFT( X[ i - 1 ], 1 );
120
4.86M
        X[ i ]     -= X[ i - 1 ];
121
4.86M
    }
122
232k
    X[ 0 ] -= psSilk_VAD->HPstate;
123
232k
    psSilk_VAD->HPstate = HPstateTmp;
124
125
    /*************************************/
126
    /* Calculate the energy in each band */
127
    /*************************************/
128
1.16M
    for( b = 0; b < VAD_N_BANDS; b++ ) {
129
        /* Find the decimated framelength in the non-uniformly divided bands */
130
931k
        decimated_framelength = silk_RSHIFT( psEncC->frame_length, silk_min_int( VAD_N_BANDS - b, VAD_N_BANDS - 1 ) );
131
132
        /* Split length into subframe lengths */
133
931k
        dec_subframe_length = silk_RSHIFT( decimated_framelength, VAD_INTERNAL_SUBFRAMES_LOG2 );
134
931k
        dec_subframe_offset = 0;
135
136
        /* Compute energy per sub-frame */
137
        /* initialize with summed energy of last subframe */
138
931k
        Xnrg[ b ] = psSilk_VAD->XnrgSubfr[ b ];
139
4.65M
        for( s = 0; s < VAD_INTERNAL_SUBFRAMES; s++ ) {
140
3.72M
            __m128i xmm_X, xmm_acc;
141
3.72M
            sumSquared = 0;
142
143
3.72M
            xmm_acc = _mm_setzero_si128();
144
145
7.06M
            for( i = 0; i < dec_subframe_length - 7; i += 8 )
146
3.33M
            {
147
3.33M
                xmm_X   = _mm_loadu_si128( (__m128i *)(void*)&(X[ X_offset[ b ] + i + dec_subframe_offset ] ) );
148
3.33M
                xmm_X   = _mm_srai_epi16( xmm_X, 3 );
149
3.33M
                xmm_X   = _mm_madd_epi16( xmm_X, xmm_X );
150
3.33M
                xmm_acc = _mm_add_epi32( xmm_acc, xmm_X );
151
3.33M
            }
152
153
3.72M
            xmm_acc = _mm_add_epi32( xmm_acc, _mm_unpackhi_epi64( xmm_acc, xmm_acc ) );
154
3.72M
            xmm_acc = _mm_add_epi32( xmm_acc, _mm_shufflelo_epi16( xmm_acc, 0x0E ) );
155
156
3.72M
            sumSquared += _mm_cvtsi128_si32( xmm_acc );
157
158
17.5M
            for( ; i < dec_subframe_length; i++ ) {
159
                /* The energy will be less than dec_subframe_length * ( silk_int16_MIN / 8 ) ^ 2.            */
160
                /* Therefore we can accumulate with no risk of overflow (unless dec_subframe_length > 128)  */
161
13.8M
                x_tmp = silk_RSHIFT(
162
13.8M
                    X[ X_offset[ b ] + i + dec_subframe_offset ], 3 );
163
13.8M
                sumSquared = silk_SMLABB( sumSquared, x_tmp, x_tmp );
164
165
                /* Safety check */
166
13.8M
                silk_assert( sumSquared >= 0 );
167
13.8M
            }
168
169
            /* Add/saturate summed energy of current subframe */
170
3.72M
            if( s < VAD_INTERNAL_SUBFRAMES - 1 ) {
171
2.79M
                Xnrg[ b ] = silk_ADD_POS_SAT32( Xnrg[ b ], sumSquared );
172
2.79M
            } else {
173
                /* Look-ahead subframe */
174
931k
                Xnrg[ b ] = silk_ADD_POS_SAT32( Xnrg[ b ], silk_RSHIFT( sumSquared, 1 ) );
175
931k
            }
176
177
3.72M
            dec_subframe_offset += dec_subframe_length;
178
3.72M
        }
179
931k
        psSilk_VAD->XnrgSubfr[ b ] = sumSquared;
180
931k
    }
181
182
    /********************/
183
    /* Noise estimation */
184
    /********************/
185
232k
    silk_VAD_GetNoiseLevels( &Xnrg[ 0 ], psSilk_VAD );
186
187
    /***********************************************/
188
    /* Signal-plus-noise to noise ratio estimation */
189
    /***********************************************/
190
232k
    sumSquared = 0;
191
232k
    input_tilt = 0;
192
1.16M
    for( b = 0; b < VAD_N_BANDS; b++ ) {
193
931k
        speech_nrg = Xnrg[ b ] - psSilk_VAD->NL[ b ];
194
931k
        if( speech_nrg > 0 ) {
195
            /* Divide, with sufficient resolution */
196
690k
            if( ( Xnrg[ b ] & 0xFF800000 ) == 0 ) {
197
538k
                NrgToNoiseRatio_Q8[ b ] = silk_DIV32( silk_LSHIFT( Xnrg[ b ], 8 ), psSilk_VAD->NL[ b ] + 1 );
198
538k
            } else {
199
151k
                NrgToNoiseRatio_Q8[ b ] = silk_DIV32( Xnrg[ b ], silk_RSHIFT( psSilk_VAD->NL[ b ], 8 ) + 1 );
200
151k
            }
201
202
            /* Convert to log domain */
203
690k
            SNR_Q7 = silk_lin2log( NrgToNoiseRatio_Q8[ b ] ) - 8 * 128;
204
205
            /* Sum-of-squares */
206
690k
            sumSquared = silk_SMLABB( sumSquared, SNR_Q7, SNR_Q7 );          /* Q14 */
207
208
            /* Tilt measure */
209
690k
            if( speech_nrg < ( (opus_int32)1 << 20 ) ) {
210
                /* Scale down SNR value for small subband speech energies */
211
341k
                SNR_Q7 = silk_SMULWB( silk_LSHIFT( silk_SQRT_APPROX( speech_nrg ), 6 ), SNR_Q7 );
212
341k
            }
213
690k
            input_tilt = silk_SMLAWB( input_tilt, tiltWeights[ b ], SNR_Q7 );
214
690k
        } else {
215
241k
            NrgToNoiseRatio_Q8[ b ] = 256;
216
241k
        }
217
931k
    }
218
219
    /* Mean-of-squares */
220
232k
    sumSquared = silk_DIV32_16( sumSquared, VAD_N_BANDS ); /* Q14 */
221
222
    /* Root-mean-square approximation, scale to dBs, and write to output pointer */
223
232k
    pSNR_dB_Q7 = (opus_int16)( 3 * silk_SQRT_APPROX( sumSquared ) ); /* Q7 */
224
225
    /*********************************/
226
    /* Speech Probability Estimation */
227
    /*********************************/
228
232k
    SA_Q15 = silk_sigm_Q15( silk_SMULWB( VAD_SNR_FACTOR_Q16, pSNR_dB_Q7 ) - VAD_NEGATIVE_OFFSET_Q5 );
229
230
    /**************************/
231
    /* Frequency Tilt Measure */
232
    /**************************/
233
232k
    psEncC->input_tilt_Q15 = silk_LSHIFT( silk_sigm_Q15( input_tilt ) - 16384, 1 );
234
235
    /**************************************************/
236
    /* Scale the sigmoid output based on power levels */
237
    /**************************************************/
238
232k
    speech_nrg = 0;
239
1.16M
    for( b = 0; b < VAD_N_BANDS; b++ ) {
240
        /* Accumulate signal-without-noise energies, higher frequency bands have more weight */
241
931k
        speech_nrg += ( b + 1 ) * silk_RSHIFT( Xnrg[ b ] - psSilk_VAD->NL[ b ], 4 );
242
931k
    }
243
244
232k
    if( psEncC->frame_length == 20 * psEncC->fs_kHz ) {
245
176k
        speech_nrg = silk_RSHIFT32( speech_nrg, 1 );
246
176k
    }
247
    /* Power scaling */
248
232k
    if( speech_nrg <= 0 ) {
249
44.4k
        SA_Q15 = silk_RSHIFT( SA_Q15, 1 );
250
188k
    } else if( speech_nrg < 16384 ) {
251
23.9k
        speech_nrg = silk_LSHIFT32( speech_nrg, 16 );
252
253
        /* square-root */
254
23.9k
        speech_nrg = silk_SQRT_APPROX( speech_nrg );
255
23.9k
        SA_Q15 = silk_SMULWB( 32768 + speech_nrg, SA_Q15 );
256
23.9k
    }
257
258
    /* Copy the resulting speech activity in Q8 */
259
232k
    psEncC->speech_activity_Q8 = silk_min_int( silk_RSHIFT( SA_Q15, 7 ), silk_uint8_MAX );
260
261
    /***********************************/
262
    /* Energy Level and SNR estimation */
263
    /***********************************/
264
    /* Smoothing coefficient */
265
232k
    smooth_coef_Q16 = silk_SMULWB( VAD_SNR_SMOOTH_COEF_Q18, silk_SMULWB( (opus_int32)SA_Q15, SA_Q15 ) );
266
267
232k
    if( psEncC->frame_length == 10 * psEncC->fs_kHz ) {
268
56.3k
        smooth_coef_Q16 >>= 1;
269
56.3k
    }
270
271
1.16M
    for( b = 0; b < VAD_N_BANDS; b++ ) {
272
        /* compute smoothed energy-to-noise ratio per band */
273
931k
        psSilk_VAD->NrgRatioSmth_Q8[ b ] = silk_SMLAWB( psSilk_VAD->NrgRatioSmth_Q8[ b ],
274
931k
            NrgToNoiseRatio_Q8[ b ] - psSilk_VAD->NrgRatioSmth_Q8[ b ], smooth_coef_Q16 );
275
276
        /* signal to noise ratio in dB per band */
277
931k
        SNR_Q7 = 3 * ( silk_lin2log( psSilk_VAD->NrgRatioSmth_Q8[b] ) - 8 * 128 );
278
        /* quality = sigmoid( 0.25 * ( SNR_dB - 16 ) ); */
279
931k
        psEncC->input_quality_bands_Q15[ b ] = silk_sigm_Q15( silk_RSHIFT( SNR_Q7 - 16 * 128, 4 ) );
280
931k
    }
281
282
#ifdef OPUS_CHECK_ASM
283
    silk_assert( ret == ret_c );
284
    silk_assert( !memcmp( &psEncC_c, psEncC, sizeof( psEncC_c ) ) );
285
#endif
286
287
232k
    RESTORE_STACK;
288
232k
    return( ret );
289
232k
}