/src/opus/celt/x86/pitch_sse2.c
Line | Count | Source |
1 | | /* Copyright (c) 2014, Cisco Systems, INC |
2 | | Written by XiangMingZhu WeiZhou MinPeng YanWang |
3 | | |
4 | | Redistribution and use in source and binary forms, with or without |
5 | | modification, are permitted provided that the following conditions |
6 | | are met: |
7 | | |
8 | | - Redistributions of source code must retain the above copyright |
9 | | notice, this list of conditions and the following disclaimer. |
10 | | |
11 | | - Redistributions in binary form must reproduce the above copyright |
12 | | notice, this list of conditions and the following disclaimer in the |
13 | | documentation and/or other materials provided with the distribution. |
14 | | |
15 | | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
16 | | ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
17 | | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR |
18 | | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER |
19 | | OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, |
20 | | EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, |
21 | | PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR |
22 | | PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF |
23 | | LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING |
24 | | NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS |
25 | | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
26 | | */ |
27 | | |
28 | | #ifdef HAVE_CONFIG_H |
29 | | #include "config.h" |
30 | | #endif |
31 | | |
32 | | #include <xmmintrin.h> |
33 | | #include <emmintrin.h> |
34 | | |
35 | | #include "macros.h" |
36 | | #include "celt_lpc.h" |
37 | | #include "stack_alloc.h" |
38 | | #include "mathops.h" |
39 | | #include "pitch.h" |
40 | | |
41 | | #if defined(OPUS_X86_MAY_HAVE_SSE2) && defined(FIXED_POINT) |
42 | | opus_val32 celt_inner_prod_sse2(const opus_val16 *x, const opus_val16 *y, |
43 | | int N) |
44 | 0 | { |
45 | 0 | opus_int i, dataSize16; |
46 | 0 | opus_int32 sum; |
47 | |
|
48 | 0 | __m128i inVec1_76543210, inVec1_FEDCBA98, acc1; |
49 | 0 | __m128i inVec2_76543210, inVec2_FEDCBA98, acc2; |
50 | |
|
51 | 0 | sum = 0; |
52 | 0 | dataSize16 = N & ~15; |
53 | |
|
54 | 0 | acc1 = _mm_setzero_si128(); |
55 | 0 | acc2 = _mm_setzero_si128(); |
56 | |
|
57 | 0 | for (i=0;i<dataSize16;i+=16) |
58 | 0 | { |
59 | 0 | inVec1_76543210 = _mm_loadu_si128((__m128i *)(&x[i + 0])); |
60 | 0 | inVec2_76543210 = _mm_loadu_si128((__m128i *)(&y[i + 0])); |
61 | |
|
62 | 0 | inVec1_FEDCBA98 = _mm_loadu_si128((__m128i *)(&x[i + 8])); |
63 | 0 | inVec2_FEDCBA98 = _mm_loadu_si128((__m128i *)(&y[i + 8])); |
64 | |
|
65 | 0 | inVec1_76543210 = _mm_madd_epi16(inVec1_76543210, inVec2_76543210); |
66 | 0 | inVec1_FEDCBA98 = _mm_madd_epi16(inVec1_FEDCBA98, inVec2_FEDCBA98); |
67 | |
|
68 | 0 | acc1 = _mm_add_epi32(acc1, inVec1_76543210); |
69 | 0 | acc2 = _mm_add_epi32(acc2, inVec1_FEDCBA98); |
70 | 0 | } |
71 | |
|
72 | 0 | acc1 = _mm_add_epi32( acc1, acc2 ); |
73 | |
|
74 | 0 | if (N - i >= 8) |
75 | 0 | { |
76 | 0 | inVec1_76543210 = _mm_loadu_si128((__m128i *)(&x[i + 0])); |
77 | 0 | inVec2_76543210 = _mm_loadu_si128((__m128i *)(&y[i + 0])); |
78 | |
|
79 | 0 | inVec1_76543210 = _mm_madd_epi16(inVec1_76543210, inVec2_76543210); |
80 | |
|
81 | 0 | acc1 = _mm_add_epi32(acc1, inVec1_76543210); |
82 | 0 | i += 8; |
83 | 0 | } |
84 | |
|
85 | 0 | acc1 = _mm_add_epi32(acc1, _mm_unpackhi_epi64( acc1, acc1)); |
86 | 0 | acc1 = _mm_add_epi32(acc1, _mm_shufflelo_epi16( acc1, 0x0E)); |
87 | 0 | sum += _mm_cvtsi128_si32(acc1); |
88 | |
|
89 | 0 | for (;i<N;i++) { |
90 | 0 | sum = silk_SMLABB(sum, x[i], y[i]); |
91 | 0 | } |
92 | |
|
93 | 0 | return sum; |
94 | 0 | } |
95 | | #endif |