/src/opus/celt/x86/celt_lpc_sse4_1.c
Line | Count | Source (jump to first uncovered line) |
1 | | /* Copyright (c) 2014, Cisco Systems, INC |
2 | | Written by XiangMingZhu WeiZhou MinPeng YanWang |
3 | | |
4 | | Redistribution and use in source and binary forms, with or without |
5 | | modification, are permitted provided that the following conditions |
6 | | are met: |
7 | | |
8 | | - Redistributions of source code must retain the above copyright |
9 | | notice, this list of conditions and the following disclaimer. |
10 | | |
11 | | - Redistributions in binary form must reproduce the above copyright |
12 | | notice, this list of conditions and the following disclaimer in the |
13 | | documentation and/or other materials provided with the distribution. |
14 | | |
15 | | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
16 | | ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
17 | | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR |
18 | | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER |
19 | | OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, |
20 | | EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, |
21 | | PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR |
22 | | PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF |
23 | | LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING |
24 | | NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS |
25 | | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
26 | | */ |
27 | | |
28 | | #ifdef HAVE_CONFIG_H |
29 | | #include "config.h" |
30 | | #endif |
31 | | |
32 | | #include <xmmintrin.h> |
33 | | #include <emmintrin.h> |
34 | | #include <smmintrin.h> |
35 | | #include "celt_lpc.h" |
36 | | #include "stack_alloc.h" |
37 | | #include "mathops.h" |
38 | | #include "pitch.h" |
39 | | #include "x86cpu.h" |
40 | | |
41 | | #if defined(FIXED_POINT) |
42 | | |
43 | | void celt_fir_sse4_1(const opus_val16 *x, |
44 | | const opus_val16 *num, |
45 | | opus_val16 *y, |
46 | | int N, |
47 | | int ord, |
48 | | int arch) |
49 | 138k | { |
50 | 138k | int i,j; |
51 | 138k | VARDECL(opus_val16, rnum); |
52 | | |
53 | 138k | __m128i vecNoA; |
54 | 138k | opus_int32 noA ; |
55 | 138k | SAVE_STACK; |
56 | | |
57 | 138k | ALLOC(rnum, ord, opus_val16); |
58 | 3.46M | for(i=0;i<ord;i++) |
59 | 3.32M | rnum[i] = num[ord-i-1]; |
60 | 138k | noA = EXTEND32(1) << SIG_SHIFT >> 1; |
61 | 138k | vecNoA = _mm_set_epi32(noA, noA, noA, noA); |
62 | | |
63 | 19.3M | for (i=0;i<N-3;i+=4) |
64 | 19.2M | { |
65 | 19.2M | opus_val32 sums[4] = {0}; |
66 | 19.2M | __m128i vecSum, vecX; |
67 | 19.2M | #if defined(OPUS_CHECK_ASM) |
68 | 19.2M | { |
69 | 19.2M | opus_val32 sums_c[4] = {0}; |
70 | 19.2M | xcorr_kernel_c(rnum, x+i-ord, sums_c, ord); |
71 | 19.2M | #endif |
72 | 19.2M | xcorr_kernel(rnum, x+i-ord, sums, ord, arch); |
73 | 19.2M | #if defined(OPUS_CHECK_ASM) |
74 | 19.2M | celt_assert(memcmp(sums, sums_c, sizeof(sums)) == 0); |
75 | 19.2M | } |
76 | 0 | #endif |
77 | 0 | vecSum = _mm_loadu_si128((__m128i *)sums); |
78 | 19.2M | vecSum = _mm_add_epi32(vecSum, vecNoA); |
79 | 19.2M | vecSum = _mm_srai_epi32(vecSum, SIG_SHIFT); |
80 | 19.2M | vecX = OP_CVTEPI16_EPI32_M64(x + i); |
81 | 19.2M | vecSum = _mm_add_epi32(vecSum, vecX); |
82 | 19.2M | vecSum = _mm_packs_epi32(vecSum, vecSum); |
83 | 19.2M | _mm_storel_epi64((__m128i *)(y + i), vecSum); |
84 | 19.2M | } |
85 | 217k | for (;i<N;i++) |
86 | 79.3k | { |
87 | 79.3k | opus_val32 sum = 0; |
88 | 1.98M | for (j=0;j<ord;j++) |
89 | 1.90M | sum = MAC16_16(sum, rnum[j], x[i+j-ord]); |
90 | 79.3k | y[i] = SATURATE16(ADD32(EXTEND32(x[i]), PSHR32(sum, SIG_SHIFT))); |
91 | 79.3k | } |
92 | | |
93 | 138k | RESTORE_STACK; |
94 | 138k | } |
95 | | |
96 | | #endif |