/src/speex/libspeex/cb_search_sse.h
Line | Count | Source |
1 | | /* Copyright (C) 2004 Jean-Marc Valin */ |
2 | | /** |
3 | | @file cb_search_sse.h |
4 | | @brief Fixed codebook functions (SSE version) |
5 | | */ |
6 | | /* |
7 | | Redistribution and use in source and binary forms, with or without |
8 | | modification, are permitted provided that the following conditions |
9 | | are met: |
10 | | |
11 | | - Redistributions of source code must retain the above copyright |
12 | | notice, this list of conditions and the following disclaimer. |
13 | | |
14 | | - Redistributions in binary form must reproduce the above copyright |
15 | | notice, this list of conditions and the following disclaimer in the |
16 | | documentation and/or other materials provided with the distribution. |
17 | | |
18 | | - Neither the name of the Xiph.org Foundation nor the names of its |
19 | | contributors may be used to endorse or promote products derived from |
20 | | this software without specific prior written permission. |
21 | | |
22 | | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
23 | | ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
24 | | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR |
25 | | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR |
26 | | CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, |
27 | | EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, |
28 | | PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR |
29 | | PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF |
30 | | LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING |
31 | | NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS |
32 | | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
33 | | */ |
34 | | |
35 | | #include <xmmintrin.h> |
36 | | |
37 | | static inline void _spx_mm_getr_ps (__m128 U, float *__Z, float *__Y, float *__X, float *__W) |
38 | 2.86M | { |
39 | 2.86M | union { |
40 | 2.86M | float __a[4]; |
41 | 2.86M | __m128 __v; |
42 | 2.86M | } __u; |
43 | | |
44 | 2.86M | __u.__v = U; |
45 | | |
46 | 2.86M | *__Z = __u.__a[0]; |
47 | 2.86M | *__Y = __u.__a[1]; |
48 | 2.86M | *__X = __u.__a[2]; |
49 | 2.86M | *__W = __u.__a[3]; |
50 | | |
51 | 2.86M | } |
52 | | |
53 | | #define OVERRIDE_COMPUTE_WEIGHTED_CODEBOOK |
54 | | static void compute_weighted_codebook(const signed char *shape_cb, const spx_sig_t *_r, float *resp, __m128 *resp2, __m128 *E, int shape_cb_size, int subvect_size, char *stack) |
55 | 18.0k | { |
56 | 18.0k | int i, j, k; |
57 | 18.0k | __m128 resj, EE; |
58 | 18.0k | VARDECL(__m128 *r); |
59 | 18.0k | VARDECL(__m128 *shape); |
60 | 18.0k | ALLOC(r, subvect_size, __m128); |
61 | 18.0k | ALLOC(shape, subvect_size, __m128); |
62 | 188k | for(j=0;j<subvect_size;j++) |
63 | 170k | r[j] = _mm_load_ps1(_r+j); |
64 | 408k | for (i=0;i<shape_cb_size;i+=4) |
65 | 390k | { |
66 | 390k | float *_res = resp+i*subvect_size; |
67 | 390k | const signed char *_shape = shape_cb+i*subvect_size; |
68 | 390k | EE = _mm_setzero_ps(); |
69 | 3.25M | for(j=0;j<subvect_size;j++) |
70 | 2.86M | { |
71 | 2.86M | shape[j] = _mm_setr_ps(0.03125*_shape[j], 0.03125*_shape[subvect_size+j], 0.03125*_shape[2*subvect_size+j], 0.03125*_shape[3*subvect_size+j]); |
72 | 2.86M | } |
73 | 3.25M | for(j=0;j<subvect_size;j++) |
74 | 2.86M | { |
75 | 2.86M | resj = _mm_setzero_ps(); |
76 | 17.5M | for (k=0;k<=j;k++) |
77 | 14.6M | resj = _mm_add_ps(resj, _mm_mul_ps(shape[k],r[j-k])); |
78 | 2.86M | _spx_mm_getr_ps(resj, _res+j, _res+subvect_size+j, _res+2*subvect_size+j, _res+3*subvect_size+j); |
79 | 2.86M | *resp2++ = resj; |
80 | 2.86M | EE = _mm_add_ps(EE, _mm_mul_ps(resj, resj)); |
81 | 2.86M | } |
82 | 390k | E[i>>2] = EE; |
83 | 390k | } |
84 | 18.0k | } |