Line | Count | Source |
1 | | /* Copyright (c) 2007-2008 CSIRO |
2 | | Copyright (c) 2007-2009 Xiph.Org Foundation |
3 | | Written by Jean-Marc Valin */ |
4 | | /** |
5 | | @file pitch.h |
6 | | @brief Pitch analysis |
7 | | */ |
8 | | |
9 | | /* |
10 | | Redistribution and use in source and binary forms, with or without |
11 | | modification, are permitted provided that the following conditions |
12 | | are met: |
13 | | |
14 | | - Redistributions of source code must retain the above copyright |
15 | | notice, this list of conditions and the following disclaimer. |
16 | | |
17 | | - Redistributions in binary form must reproduce the above copyright |
18 | | notice, this list of conditions and the following disclaimer in the |
19 | | documentation and/or other materials provided with the distribution. |
20 | | |
21 | | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
22 | | ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
23 | | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR |
24 | | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER |
25 | | OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, |
26 | | EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, |
27 | | PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR |
28 | | PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF |
29 | | LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING |
30 | | NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS |
31 | | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
32 | | */ |
33 | | |
34 | | #ifndef PITCH_H |
35 | | #define PITCH_H |
36 | | |
37 | | #include "modes.h" |
38 | | #include "cpu_support.h" |
39 | | |
40 | | #if (defined(OPUS_X86_MAY_HAVE_SSE) && !defined(FIXED_POINT)) \ |
41 | | || ((defined(OPUS_X86_MAY_HAVE_SSE4_1) || defined(OPUS_X86_MAY_HAVE_SSE2)) && defined(FIXED_POINT)) |
42 | | #include "x86/pitch_sse.h" |
43 | | #endif |
44 | | |
45 | | #if defined(FIXED_POINT) && defined(__mips) |
46 | | #include "mips/pitch_mipsr1.h" |
47 | | #endif |
48 | | |
49 | | #if (defined(OPUS_ARM_ASM) || defined(OPUS_ARM_MAY_HAVE_NEON_INTR)) |
50 | | # include "arm/pitch_arm.h" |
51 | | #endif |
52 | | |
53 | | void pitch_downsample(celt_sig * OPUS_RESTRICT x[], opus_val16 * OPUS_RESTRICT x_lp, |
54 | | int len, int C, int factor, int arch); |
55 | | |
56 | | void pitch_search(const opus_val16 * OPUS_RESTRICT x_lp, opus_val16 * OPUS_RESTRICT y, |
57 | | int len, int max_pitch, int *pitch, int arch); |
58 | | |
59 | | opus_val16 remove_doubling(opus_val16 *x, int maxperiod, int minperiod, |
60 | | int N, int *T0, int prev_period, opus_val16 prev_gain, int arch); |
61 | | |
62 | | |
63 | | /* OPT: This is the kernel you really want to optimize. It gets used a lot |
64 | | by the prefilter and by the PLC. */ |
65 | | static OPUS_INLINE void xcorr_kernel_c(const opus_val16 * x, const opus_val16 * y, opus_val32 sum[4], int len) |
66 | 2.03G | { |
67 | 2.03G | int j; |
68 | 2.03G | opus_val16 y_0, y_1, y_2, y_3; |
69 | 2.03G | celt_assert(len>=3); |
70 | 2.03G | y_3=0; /* gcc doesn't realize that y_3 can't be used uninitialized */ |
71 | 2.03G | y_0=*y++; |
72 | 2.03G | y_1=*y++; |
73 | 2.03G | y_2=*y++; |
74 | 59.6G | for (j=0;j<len-3;j+=4) |
75 | 57.6G | { |
76 | 57.6G | opus_val16 tmp; |
77 | 57.6G | tmp = *x++; |
78 | 57.6G | y_3=*y++; |
79 | 57.6G | sum[0] = MAC16_16(sum[0],tmp,y_0); |
80 | 57.6G | sum[1] = MAC16_16(sum[1],tmp,y_1); |
81 | 57.6G | sum[2] = MAC16_16(sum[2],tmp,y_2); |
82 | 57.6G | sum[3] = MAC16_16(sum[3],tmp,y_3); |
83 | 57.6G | tmp=*x++; |
84 | 57.6G | y_0=*y++; |
85 | 57.6G | sum[0] = MAC16_16(sum[0],tmp,y_1); |
86 | 57.6G | sum[1] = MAC16_16(sum[1],tmp,y_2); |
87 | 57.6G | sum[2] = MAC16_16(sum[2],tmp,y_3); |
88 | 57.6G | sum[3] = MAC16_16(sum[3],tmp,y_0); |
89 | 57.6G | tmp=*x++; |
90 | 57.6G | y_1=*y++; |
91 | 57.6G | sum[0] = MAC16_16(sum[0],tmp,y_2); |
92 | 57.6G | sum[1] = MAC16_16(sum[1],tmp,y_3); |
93 | 57.6G | sum[2] = MAC16_16(sum[2],tmp,y_0); |
94 | 57.6G | sum[3] = MAC16_16(sum[3],tmp,y_1); |
95 | 57.6G | tmp=*x++; |
96 | 57.6G | y_2=*y++; |
97 | 57.6G | sum[0] = MAC16_16(sum[0],tmp,y_3); |
98 | 57.6G | sum[1] = MAC16_16(sum[1],tmp,y_0); |
99 | 57.6G | sum[2] = MAC16_16(sum[2],tmp,y_1); |
100 | 57.6G | sum[3] = MAC16_16(sum[3],tmp,y_2); |
101 | 57.6G | } |
102 | 2.03G | if (j++<len) |
103 | 164M | { |
104 | 164M | opus_val16 tmp = *x++; |
105 | 164M | y_3=*y++; |
106 | 164M | sum[0] = MAC16_16(sum[0],tmp,y_0); |
107 | 164M | sum[1] = MAC16_16(sum[1],tmp,y_1); |
108 | 164M | sum[2] = MAC16_16(sum[2],tmp,y_2); |
109 | 164M | sum[3] = MAC16_16(sum[3],tmp,y_3); |
110 | 164M | } |
111 | 2.03G | if (j++<len) |
112 | 164M | { |
113 | 164M | opus_val16 tmp=*x++; |
114 | 164M | y_0=*y++; |
115 | 164M | sum[0] = MAC16_16(sum[0],tmp,y_1); |
116 | 164M | sum[1] = MAC16_16(sum[1],tmp,y_2); |
117 | 164M | sum[2] = MAC16_16(sum[2],tmp,y_3); |
118 | 164M | sum[3] = MAC16_16(sum[3],tmp,y_0); |
119 | 164M | } |
120 | 2.03G | if (j<len) |
121 | 0 | { |
122 | 0 | opus_val16 tmp=*x++; |
123 | 0 | y_1=*y++; |
124 | 0 | sum[0] = MAC16_16(sum[0],tmp,y_2); |
125 | 0 | sum[1] = MAC16_16(sum[1],tmp,y_3); |
126 | 0 | sum[2] = MAC16_16(sum[2],tmp,y_0); |
127 | 0 | sum[3] = MAC16_16(sum[3],tmp,y_1); |
128 | 0 | } |
129 | 2.03G | } Unexecuted instantiation: celt.c:xcorr_kernel_c Unexecuted instantiation: celt_decoder.c:xcorr_kernel_c Line | Count | Source | 66 | 981M | { | 67 | 981M | int j; | 68 | 981M | opus_val16 y_0, y_1, y_2, y_3; | 69 | 981M | celt_assert(len>=3); | 70 | 981M | y_3=0; /* gcc doesn't realize that y_3 can't be used uninitialized */ | 71 | 981M | y_0=*y++; | 72 | 981M | y_1=*y++; | 73 | 981M | y_2=*y++; | 74 | 29.5G | for (j=0;j<len-3;j+=4) | 75 | 28.6G | { | 76 | 28.6G | opus_val16 tmp; | 77 | 28.6G | tmp = *x++; | 78 | 28.6G | y_3=*y++; | 79 | 28.6G | sum[0] = MAC16_16(sum[0],tmp,y_0); | 80 | 28.6G | sum[1] = MAC16_16(sum[1],tmp,y_1); | 81 | 28.6G | sum[2] = MAC16_16(sum[2],tmp,y_2); | 82 | 28.6G | sum[3] = MAC16_16(sum[3],tmp,y_3); | 83 | 28.6G | tmp=*x++; | 84 | 28.6G | y_0=*y++; | 85 | 28.6G | sum[0] = MAC16_16(sum[0],tmp,y_1); | 86 | 28.6G | sum[1] = MAC16_16(sum[1],tmp,y_2); | 87 | 28.6G | sum[2] = MAC16_16(sum[2],tmp,y_3); | 88 | 28.6G | sum[3] = MAC16_16(sum[3],tmp,y_0); | 89 | 28.6G | tmp=*x++; | 90 | 28.6G | y_1=*y++; | 91 | 28.6G | sum[0] = MAC16_16(sum[0],tmp,y_2); | 92 | 28.6G | sum[1] = MAC16_16(sum[1],tmp,y_3); | 93 | 28.6G | sum[2] = MAC16_16(sum[2],tmp,y_0); | 94 | 28.6G | sum[3] = MAC16_16(sum[3],tmp,y_1); | 95 | 28.6G | tmp=*x++; | 96 | 28.6G | y_2=*y++; | 97 | 28.6G | sum[0] = MAC16_16(sum[0],tmp,y_3); | 98 | 28.6G | sum[1] = MAC16_16(sum[1],tmp,y_0); | 99 | 28.6G | sum[2] = MAC16_16(sum[2],tmp,y_1); | 100 | 28.6G | sum[3] = MAC16_16(sum[3],tmp,y_2); | 101 | 28.6G | } | 102 | 981M | if (j++<len) | 103 | 82.4M | { | 104 | 82.4M | opus_val16 tmp = *x++; | 105 | 82.4M | y_3=*y++; | 106 | 82.4M | sum[0] = MAC16_16(sum[0],tmp,y_0); | 107 | 82.4M | sum[1] = MAC16_16(sum[1],tmp,y_1); | 108 | 82.4M | sum[2] = MAC16_16(sum[2],tmp,y_2); | 109 | 82.4M | sum[3] = MAC16_16(sum[3],tmp,y_3); | 110 | 82.4M | } | 111 | 981M | if (j++<len) | 112 | 82.4M | { | 113 | 82.4M | opus_val16 tmp=*x++; | 114 | 82.4M | y_0=*y++; | 115 | 82.4M | sum[0] = MAC16_16(sum[0],tmp,y_1); | 116 | 82.4M | sum[1] = MAC16_16(sum[1],tmp,y_2); | 117 | 82.4M | sum[2] = MAC16_16(sum[2],tmp,y_3); | 118 | 82.4M | sum[3] = MAC16_16(sum[3],tmp,y_0); | 119 | 82.4M | } | 120 | 981M | if (j<len) | 121 | 0 | { | 122 | 0 | opus_val16 tmp=*x++; | 123 | 0 | y_1=*y++; | 124 | 0 | sum[0] = MAC16_16(sum[0],tmp,y_2); | 125 | 0 | sum[1] = MAC16_16(sum[1],tmp,y_3); | 126 | 0 | sum[2] = MAC16_16(sum[2],tmp,y_0); | 127 | 0 | sum[3] = MAC16_16(sum[3],tmp,y_1); | 128 | 0 | } | 129 | 981M | } |
celt_lpc.c:xcorr_kernel_c Line | Count | Source | 66 | 13.6M | { | 67 | 13.6M | int j; | 68 | 13.6M | opus_val16 y_0, y_1, y_2, y_3; | 69 | 13.6M | celt_assert(len>=3); | 70 | 13.6M | y_3=0; /* gcc doesn't realize that y_3 can't be used uninitialized */ | 71 | 13.6M | y_0=*y++; | 72 | 13.6M | y_1=*y++; | 73 | 13.6M | y_2=*y++; | 74 | 95.5M | for (j=0;j<len-3;j+=4) | 75 | 81.9M | { | 76 | 81.9M | opus_val16 tmp; | 77 | 81.9M | tmp = *x++; | 78 | 81.9M | y_3=*y++; | 79 | 81.9M | sum[0] = MAC16_16(sum[0],tmp,y_0); | 80 | 81.9M | sum[1] = MAC16_16(sum[1],tmp,y_1); | 81 | 81.9M | sum[2] = MAC16_16(sum[2],tmp,y_2); | 82 | 81.9M | sum[3] = MAC16_16(sum[3],tmp,y_3); | 83 | 81.9M | tmp=*x++; | 84 | 81.9M | y_0=*y++; | 85 | 81.9M | sum[0] = MAC16_16(sum[0],tmp,y_1); | 86 | 81.9M | sum[1] = MAC16_16(sum[1],tmp,y_2); | 87 | 81.9M | sum[2] = MAC16_16(sum[2],tmp,y_3); | 88 | 81.9M | sum[3] = MAC16_16(sum[3],tmp,y_0); | 89 | 81.9M | tmp=*x++; | 90 | 81.9M | y_1=*y++; | 91 | 81.9M | sum[0] = MAC16_16(sum[0],tmp,y_2); | 92 | 81.9M | sum[1] = MAC16_16(sum[1],tmp,y_3); | 93 | 81.9M | sum[2] = MAC16_16(sum[2],tmp,y_0); | 94 | 81.9M | sum[3] = MAC16_16(sum[3],tmp,y_1); | 95 | 81.9M | tmp=*x++; | 96 | 81.9M | y_2=*y++; | 97 | 81.9M | sum[0] = MAC16_16(sum[0],tmp,y_3); | 98 | 81.9M | sum[1] = MAC16_16(sum[1],tmp,y_0); | 99 | 81.9M | sum[2] = MAC16_16(sum[2],tmp,y_1); | 100 | 81.9M | sum[3] = MAC16_16(sum[3],tmp,y_2); | 101 | 81.9M | } | 102 | 13.6M | if (j++<len) | 103 | 0 | { | 104 | 0 | opus_val16 tmp = *x++; | 105 | 0 | y_3=*y++; | 106 | 0 | sum[0] = MAC16_16(sum[0],tmp,y_0); | 107 | 0 | sum[1] = MAC16_16(sum[1],tmp,y_1); | 108 | 0 | sum[2] = MAC16_16(sum[2],tmp,y_2); | 109 | 0 | sum[3] = MAC16_16(sum[3],tmp,y_3); | 110 | 0 | } | 111 | 13.6M | if (j++<len) | 112 | 0 | { | 113 | 0 | opus_val16 tmp=*x++; | 114 | 0 | y_0=*y++; | 115 | 0 | sum[0] = MAC16_16(sum[0],tmp,y_1); | 116 | 0 | sum[1] = MAC16_16(sum[1],tmp,y_2); | 117 | 0 | sum[2] = MAC16_16(sum[2],tmp,y_3); | 118 | 0 | sum[3] = MAC16_16(sum[3],tmp,y_0); | 119 | 0 | } | 120 | 13.6M | if (j<len) | 121 | 0 | { | 122 | 0 | opus_val16 tmp=*x++; | 123 | 0 | y_1=*y++; | 124 | 0 | sum[0] = MAC16_16(sum[0],tmp,y_2); | 125 | 0 | sum[1] = MAC16_16(sum[1],tmp,y_3); | 126 | 0 | sum[2] = MAC16_16(sum[2],tmp,y_0); | 127 | 0 | sum[3] = MAC16_16(sum[3],tmp,y_1); | 128 | 0 | } | 129 | 13.6M | } |
Unexecuted instantiation: vq.c:xcorr_kernel_c Unexecuted instantiation: x86cpu.c:xcorr_kernel_c x86_celt_map.c:xcorr_kernel_c Line | Count | Source | 66 | 226M | { | 67 | 226M | int j; | 68 | 226M | opus_val16 y_0, y_1, y_2, y_3; | 69 | 226M | celt_assert(len>=3); | 70 | 226M | y_3=0; /* gcc doesn't realize that y_3 can't be used uninitialized */ | 71 | 226M | y_0=*y++; | 72 | 226M | y_1=*y++; | 73 | 226M | y_2=*y++; | 74 | 3.42G | for (j=0;j<len-3;j+=4) | 75 | 3.19G | { | 76 | 3.19G | opus_val16 tmp; | 77 | 3.19G | tmp = *x++; | 78 | 3.19G | y_3=*y++; | 79 | 3.19G | sum[0] = MAC16_16(sum[0],tmp,y_0); | 80 | 3.19G | sum[1] = MAC16_16(sum[1],tmp,y_1); | 81 | 3.19G | sum[2] = MAC16_16(sum[2],tmp,y_2); | 82 | 3.19G | sum[3] = MAC16_16(sum[3],tmp,y_3); | 83 | 3.19G | tmp=*x++; | 84 | 3.19G | y_0=*y++; | 85 | 3.19G | sum[0] = MAC16_16(sum[0],tmp,y_1); | 86 | 3.19G | sum[1] = MAC16_16(sum[1],tmp,y_2); | 87 | 3.19G | sum[2] = MAC16_16(sum[2],tmp,y_3); | 88 | 3.19G | sum[3] = MAC16_16(sum[3],tmp,y_0); | 89 | 3.19G | tmp=*x++; | 90 | 3.19G | y_1=*y++; | 91 | 3.19G | sum[0] = MAC16_16(sum[0],tmp,y_2); | 92 | 3.19G | sum[1] = MAC16_16(sum[1],tmp,y_3); | 93 | 3.19G | sum[2] = MAC16_16(sum[2],tmp,y_0); | 94 | 3.19G | sum[3] = MAC16_16(sum[3],tmp,y_1); | 95 | 3.19G | tmp=*x++; | 96 | 3.19G | y_2=*y++; | 97 | 3.19G | sum[0] = MAC16_16(sum[0],tmp,y_3); | 98 | 3.19G | sum[1] = MAC16_16(sum[1],tmp,y_0); | 99 | 3.19G | sum[2] = MAC16_16(sum[2],tmp,y_1); | 100 | 3.19G | sum[3] = MAC16_16(sum[3],tmp,y_2); | 101 | 3.19G | } | 102 | 226M | if (j++<len) | 103 | 0 | { | 104 | 0 | opus_val16 tmp = *x++; | 105 | 0 | y_3=*y++; | 106 | 0 | sum[0] = MAC16_16(sum[0],tmp,y_0); | 107 | 0 | sum[1] = MAC16_16(sum[1],tmp,y_1); | 108 | 0 | sum[2] = MAC16_16(sum[2],tmp,y_2); | 109 | 0 | sum[3] = MAC16_16(sum[3],tmp,y_3); | 110 | 0 | } | 111 | 226M | if (j++<len) | 112 | 0 | { | 113 | 0 | opus_val16 tmp=*x++; | 114 | 0 | y_0=*y++; | 115 | 0 | sum[0] = MAC16_16(sum[0],tmp,y_1); | 116 | 0 | sum[1] = MAC16_16(sum[1],tmp,y_2); | 117 | 0 | sum[2] = MAC16_16(sum[2],tmp,y_3); | 118 | 0 | sum[3] = MAC16_16(sum[3],tmp,y_0); | 119 | 0 | } | 120 | 226M | if (j<len) | 121 | 0 | { | 122 | 0 | opus_val16 tmp=*x++; | 123 | 0 | y_1=*y++; | 124 | 0 | sum[0] = MAC16_16(sum[0],tmp,y_2); | 125 | 0 | sum[1] = MAC16_16(sum[1],tmp,y_3); | 126 | 0 | sum[2] = MAC16_16(sum[2],tmp,y_0); | 127 | 0 | sum[3] = MAC16_16(sum[3],tmp,y_1); | 128 | 0 | } | 129 | 226M | } |
Unexecuted instantiation: pitch_sse2.c:xcorr_kernel_c celt_lpc_sse4_1.c:xcorr_kernel_c Line | Count | Source | 66 | 21.5M | { | 67 | 21.5M | int j; | 68 | 21.5M | opus_val16 y_0, y_1, y_2, y_3; | 69 | 21.5M | celt_assert(len>=3); | 70 | 21.5M | y_3=0; /* gcc doesn't realize that y_3 can't be used uninitialized */ | 71 | 21.5M | y_0=*y++; | 72 | 21.5M | y_1=*y++; | 73 | 21.5M | y_2=*y++; | 74 | 150M | for (j=0;j<len-3;j+=4) | 75 | 129M | { | 76 | 129M | opus_val16 tmp; | 77 | 129M | tmp = *x++; | 78 | 129M | y_3=*y++; | 79 | 129M | sum[0] = MAC16_16(sum[0],tmp,y_0); | 80 | 129M | sum[1] = MAC16_16(sum[1],tmp,y_1); | 81 | 129M | sum[2] = MAC16_16(sum[2],tmp,y_2); | 82 | 129M | sum[3] = MAC16_16(sum[3],tmp,y_3); | 83 | 129M | tmp=*x++; | 84 | 129M | y_0=*y++; | 85 | 129M | sum[0] = MAC16_16(sum[0],tmp,y_1); | 86 | 129M | sum[1] = MAC16_16(sum[1],tmp,y_2); | 87 | 129M | sum[2] = MAC16_16(sum[2],tmp,y_3); | 88 | 129M | sum[3] = MAC16_16(sum[3],tmp,y_0); | 89 | 129M | tmp=*x++; | 90 | 129M | y_1=*y++; | 91 | 129M | sum[0] = MAC16_16(sum[0],tmp,y_2); | 92 | 129M | sum[1] = MAC16_16(sum[1],tmp,y_3); | 93 | 129M | sum[2] = MAC16_16(sum[2],tmp,y_0); | 94 | 129M | sum[3] = MAC16_16(sum[3],tmp,y_1); | 95 | 129M | tmp=*x++; | 96 | 129M | y_2=*y++; | 97 | 129M | sum[0] = MAC16_16(sum[0],tmp,y_3); | 98 | 129M | sum[1] = MAC16_16(sum[1],tmp,y_0); | 99 | 129M | sum[2] = MAC16_16(sum[2],tmp,y_1); | 100 | 129M | sum[3] = MAC16_16(sum[3],tmp,y_2); | 101 | 129M | } | 102 | 21.5M | if (j++<len) | 103 | 0 | { | 104 | 0 | opus_val16 tmp = *x++; | 105 | 0 | y_3=*y++; | 106 | 0 | sum[0] = MAC16_16(sum[0],tmp,y_0); | 107 | 0 | sum[1] = MAC16_16(sum[1],tmp,y_1); | 108 | 0 | sum[2] = MAC16_16(sum[2],tmp,y_2); | 109 | 0 | sum[3] = MAC16_16(sum[3],tmp,y_3); | 110 | 0 | } | 111 | 21.5M | if (j++<len) | 112 | 0 | { | 113 | 0 | opus_val16 tmp=*x++; | 114 | 0 | y_0=*y++; | 115 | 0 | sum[0] = MAC16_16(sum[0],tmp,y_1); | 116 | 0 | sum[1] = MAC16_16(sum[1],tmp,y_2); | 117 | 0 | sum[2] = MAC16_16(sum[2],tmp,y_3); | 118 | 0 | sum[3] = MAC16_16(sum[3],tmp,y_0); | 119 | 0 | } | 120 | 21.5M | if (j<len) | 121 | 0 | { | 122 | 0 | opus_val16 tmp=*x++; | 123 | 0 | y_1=*y++; | 124 | 0 | sum[0] = MAC16_16(sum[0],tmp,y_2); | 125 | 0 | sum[1] = MAC16_16(sum[1],tmp,y_3); | 126 | 0 | sum[2] = MAC16_16(sum[2],tmp,y_0); | 127 | 0 | sum[3] = MAC16_16(sum[3],tmp,y_1); | 128 | 0 | } | 129 | 21.5M | } |
pitch_sse4_1.c:xcorr_kernel_c Line | Count | Source | 66 | 789M | { | 67 | 789M | int j; | 68 | 789M | opus_val16 y_0, y_1, y_2, y_3; | 69 | 789M | celt_assert(len>=3); | 70 | 789M | y_3=0; /* gcc doesn't realize that y_3 can't be used uninitialized */ | 71 | 789M | y_0=*y++; | 72 | 789M | y_1=*y++; | 73 | 789M | y_2=*y++; | 74 | 26.4G | for (j=0;j<len-3;j+=4) | 75 | 25.6G | { | 76 | 25.6G | opus_val16 tmp; | 77 | 25.6G | tmp = *x++; | 78 | 25.6G | y_3=*y++; | 79 | 25.6G | sum[0] = MAC16_16(sum[0],tmp,y_0); | 80 | 25.6G | sum[1] = MAC16_16(sum[1],tmp,y_1); | 81 | 25.6G | sum[2] = MAC16_16(sum[2],tmp,y_2); | 82 | 25.6G | sum[3] = MAC16_16(sum[3],tmp,y_3); | 83 | 25.6G | tmp=*x++; | 84 | 25.6G | y_0=*y++; | 85 | 25.6G | sum[0] = MAC16_16(sum[0],tmp,y_1); | 86 | 25.6G | sum[1] = MAC16_16(sum[1],tmp,y_2); | 87 | 25.6G | sum[2] = MAC16_16(sum[2],tmp,y_3); | 88 | 25.6G | sum[3] = MAC16_16(sum[3],tmp,y_0); | 89 | 25.6G | tmp=*x++; | 90 | 25.6G | y_1=*y++; | 91 | 25.6G | sum[0] = MAC16_16(sum[0],tmp,y_2); | 92 | 25.6G | sum[1] = MAC16_16(sum[1],tmp,y_3); | 93 | 25.6G | sum[2] = MAC16_16(sum[2],tmp,y_0); | 94 | 25.6G | sum[3] = MAC16_16(sum[3],tmp,y_1); | 95 | 25.6G | tmp=*x++; | 96 | 25.6G | y_2=*y++; | 97 | 25.6G | sum[0] = MAC16_16(sum[0],tmp,y_3); | 98 | 25.6G | sum[1] = MAC16_16(sum[1],tmp,y_0); | 99 | 25.6G | sum[2] = MAC16_16(sum[2],tmp,y_1); | 100 | 25.6G | sum[3] = MAC16_16(sum[3],tmp,y_2); | 101 | 25.6G | } | 102 | 789M | if (j++<len) | 103 | 82.4M | { | 104 | 82.4M | opus_val16 tmp = *x++; | 105 | 82.4M | y_3=*y++; | 106 | 82.4M | sum[0] = MAC16_16(sum[0],tmp,y_0); | 107 | 82.4M | sum[1] = MAC16_16(sum[1],tmp,y_1); | 108 | 82.4M | sum[2] = MAC16_16(sum[2],tmp,y_2); | 109 | 82.4M | sum[3] = MAC16_16(sum[3],tmp,y_3); | 110 | 82.4M | } | 111 | 789M | if (j++<len) | 112 | 82.4M | { | 113 | 82.4M | opus_val16 tmp=*x++; | 114 | 82.4M | y_0=*y++; | 115 | 82.4M | sum[0] = MAC16_16(sum[0],tmp,y_1); | 116 | 82.4M | sum[1] = MAC16_16(sum[1],tmp,y_2); | 117 | 82.4M | sum[2] = MAC16_16(sum[2],tmp,y_3); | 118 | 82.4M | sum[3] = MAC16_16(sum[3],tmp,y_0); | 119 | 82.4M | } | 120 | 789M | if (j<len) | 121 | 0 | { | 122 | 0 | opus_val16 tmp=*x++; | 123 | 0 | y_1=*y++; | 124 | 0 | sum[0] = MAC16_16(sum[0],tmp,y_2); | 125 | 0 | sum[1] = MAC16_16(sum[1],tmp,y_3); | 126 | 0 | sum[2] = MAC16_16(sum[2],tmp,y_0); | 127 | 0 | sum[3] = MAC16_16(sum[3],tmp,y_1); | 128 | 0 | } | 129 | 789M | } |
Unexecuted instantiation: bands.c:xcorr_kernel_c Unexecuted instantiation: opus_encoder.c:xcorr_kernel_c Unexecuted instantiation: opus_multistream_encoder.c:xcorr_kernel_c Unexecuted instantiation: celt_encoder.c:xcorr_kernel_c Unexecuted instantiation: pitch_analysis_core_FIX.c:xcorr_kernel_c Unexecuted instantiation: vector_ops_FIX.c:xcorr_kernel_c Unexecuted instantiation: x86_silk_map.c:xcorr_kernel_c Unexecuted instantiation: burg_modified_FIX.c:xcorr_kernel_c Unexecuted instantiation: vector_ops_FIX_sse4_1.c:xcorr_kernel_c Unexecuted instantiation: burg_modified_FIX_sse4_1.c:xcorr_kernel_c Unexecuted instantiation: pitch_sse.c:xcorr_kernel_c Unexecuted instantiation: pitch_avx.c:xcorr_kernel_c Unexecuted instantiation: pitch_analysis_core_FLP.c:xcorr_kernel_c |
130 | | |
131 | | #ifndef OVERRIDE_XCORR_KERNEL |
132 | | #define xcorr_kernel(x, y, sum, len, arch) \ |
133 | | ((void)(arch),xcorr_kernel_c(x, y, sum, len)) |
134 | | #endif /* OVERRIDE_XCORR_KERNEL */ |
135 | | |
136 | | |
137 | | static OPUS_INLINE void dual_inner_prod_c(const opus_val16 *x, const opus_val16 *y01, const opus_val16 *y02, |
138 | | int N, opus_val32 *xy1, opus_val32 *xy2) |
139 | 73.9M | { |
140 | 73.9M | int i; |
141 | 73.9M | opus_val32 xy01=0; |
142 | 73.9M | opus_val32 xy02=0; |
143 | 26.9G | for (i=0;i<N;i++) |
144 | 26.8G | { |
145 | 26.8G | xy01 = MAC16_16(xy01, x[i], y01[i]); |
146 | 26.8G | xy02 = MAC16_16(xy02, x[i], y02[i]); |
147 | 26.8G | } |
148 | 73.9M | *xy1 = xy01; |
149 | 73.9M | *xy2 = xy02; |
150 | 73.9M | } Unexecuted instantiation: celt.c:dual_inner_prod_c Unexecuted instantiation: celt_decoder.c:dual_inner_prod_c pitch.c:dual_inner_prod_c Line | Count | Source | 139 | 73.9M | { | 140 | 73.9M | int i; | 141 | 73.9M | opus_val32 xy01=0; | 142 | 73.9M | opus_val32 xy02=0; | 143 | 26.9G | for (i=0;i<N;i++) | 144 | 26.8G | { | 145 | 26.8G | xy01 = MAC16_16(xy01, x[i], y01[i]); | 146 | 26.8G | xy02 = MAC16_16(xy02, x[i], y02[i]); | 147 | 26.8G | } | 148 | 73.9M | *xy1 = xy01; | 149 | 73.9M | *xy2 = xy02; | 150 | 73.9M | } |
Unexecuted instantiation: celt_lpc.c:dual_inner_prod_c Unexecuted instantiation: vq.c:dual_inner_prod_c Unexecuted instantiation: x86cpu.c:dual_inner_prod_c Unexecuted instantiation: x86_celt_map.c:dual_inner_prod_c Unexecuted instantiation: pitch_sse2.c:dual_inner_prod_c Unexecuted instantiation: celt_lpc_sse4_1.c:dual_inner_prod_c Unexecuted instantiation: pitch_sse4_1.c:dual_inner_prod_c Unexecuted instantiation: bands.c:dual_inner_prod_c Unexecuted instantiation: opus_encoder.c:dual_inner_prod_c Unexecuted instantiation: opus_multistream_encoder.c:dual_inner_prod_c Unexecuted instantiation: celt_encoder.c:dual_inner_prod_c Unexecuted instantiation: pitch_analysis_core_FIX.c:dual_inner_prod_c Unexecuted instantiation: vector_ops_FIX.c:dual_inner_prod_c Unexecuted instantiation: x86_silk_map.c:dual_inner_prod_c Unexecuted instantiation: burg_modified_FIX.c:dual_inner_prod_c Unexecuted instantiation: vector_ops_FIX_sse4_1.c:dual_inner_prod_c Unexecuted instantiation: burg_modified_FIX_sse4_1.c:dual_inner_prod_c Unexecuted instantiation: pitch_sse.c:dual_inner_prod_c Unexecuted instantiation: pitch_avx.c:dual_inner_prod_c Unexecuted instantiation: pitch_analysis_core_FLP.c:dual_inner_prod_c |
151 | | |
152 | | #ifndef OVERRIDE_DUAL_INNER_PROD |
153 | | # define dual_inner_prod(x, y01, y02, N, xy1, xy2, arch) \ |
154 | 84.6M | ((void)(arch),dual_inner_prod_c(x, y01, y02, N, xy1, xy2)) |
155 | | #endif |
156 | | |
157 | | /*We make sure a C version is always available for cases where the overhead of |
158 | | vectorization and passing around an arch flag aren't worth it.*/ |
159 | | static OPUS_INLINE opus_val32 celt_inner_prod_c(const opus_val16 *x, |
160 | | const opus_val16 *y, int N) |
161 | 159M | { |
162 | 159M | int i; |
163 | 159M | opus_val32 xy=0; |
164 | 7.17G | for (i=0;i<N;i++) |
165 | 7.01G | xy = MAC16_16(xy, x[i], y[i]); |
166 | 159M | return xy; |
167 | 159M | } Unexecuted instantiation: celt.c:celt_inner_prod_c Unexecuted instantiation: celt_decoder.c:celt_inner_prod_c Unexecuted instantiation: pitch.c:celt_inner_prod_c Unexecuted instantiation: celt_lpc.c:celt_inner_prod_c Unexecuted instantiation: vq.c:celt_inner_prod_c Unexecuted instantiation: x86cpu.c:celt_inner_prod_c x86_celt_map.c:celt_inner_prod_c Line | Count | Source | 161 | 159M | { | 162 | 159M | int i; | 163 | 159M | opus_val32 xy=0; | 164 | 7.17G | for (i=0;i<N;i++) | 165 | 7.01G | xy = MAC16_16(xy, x[i], y[i]); | 166 | 159M | return xy; | 167 | 159M | } |
Unexecuted instantiation: pitch_sse2.c:celt_inner_prod_c Unexecuted instantiation: celt_lpc_sse4_1.c:celt_inner_prod_c Unexecuted instantiation: pitch_sse4_1.c:celt_inner_prod_c Unexecuted instantiation: bands.c:celt_inner_prod_c Unexecuted instantiation: opus_encoder.c:celt_inner_prod_c Unexecuted instantiation: opus_multistream_encoder.c:celt_inner_prod_c Unexecuted instantiation: celt_encoder.c:celt_inner_prod_c Unexecuted instantiation: pitch_analysis_core_FIX.c:celt_inner_prod_c Unexecuted instantiation: vector_ops_FIX.c:celt_inner_prod_c Unexecuted instantiation: x86_silk_map.c:celt_inner_prod_c Unexecuted instantiation: burg_modified_FIX.c:celt_inner_prod_c Unexecuted instantiation: vector_ops_FIX_sse4_1.c:celt_inner_prod_c Unexecuted instantiation: burg_modified_FIX_sse4_1.c:celt_inner_prod_c Unexecuted instantiation: pitch_sse.c:celt_inner_prod_c Unexecuted instantiation: pitch_avx.c:celt_inner_prod_c Unexecuted instantiation: pitch_analysis_core_FLP.c:celt_inner_prod_c |
168 | | |
169 | | #if !defined(OVERRIDE_CELT_INNER_PROD) |
170 | | # define celt_inner_prod(x, y, N, arch) \ |
171 | | ((void)(arch),celt_inner_prod_c(x, y, N)) |
172 | | #endif |
173 | | |
174 | | #ifdef NON_STATIC_COMB_FILTER_CONST_C |
175 | | void comb_filter_const_c(opus_val32 *y, opus_val32 *x, int T, int N, |
176 | | opus_val16 g10, opus_val16 g11, opus_val16 g12); |
177 | | #endif |
178 | | |
179 | | |
180 | | #ifdef FIXED_POINT |
181 | | opus_val32 |
182 | | #else |
183 | | void |
184 | | #endif |
185 | | celt_pitch_xcorr_c(const opus_val16 *_x, const opus_val16 *_y, |
186 | | opus_val32 *xcorr, int len, int max_pitch, int arch); |
187 | | |
188 | | #ifndef OVERRIDE_PITCH_XCORR |
189 | 362M | # define celt_pitch_xcorr celt_pitch_xcorr_c |
190 | | #endif |
191 | | |
192 | | #ifdef NON_STATIC_COMB_FILTER_CONST_C |
193 | | void comb_filter_const_c(opus_val32 *y, opus_val32 *x, int T, int N, |
194 | | opus_val16 g10, opus_val16 g11, opus_val16 g12); |
195 | | #endif |
196 | | |
197 | | #ifndef OVERRIDE_COMB_FILTER_CONST |
198 | | # define comb_filter_const(y, x, T, N, g10, g11, g12, arch) \ |
199 | 1.82M | ((void)(arch),comb_filter_const_c(y, x, T, N, g10, g11, g12)) |
200 | | #endif |
201 | | |
202 | | |
203 | | #endif |