Line | Count | Source |
1 | | /* Copyright (c) 2007-2008 CSIRO |
2 | | Copyright (c) 2007-2009 Xiph.Org Foundation |
3 | | Written by Jean-Marc Valin */ |
4 | | /* |
5 | | Redistribution and use in source and binary forms, with or without |
6 | | modification, are permitted provided that the following conditions |
7 | | are met: |
8 | | |
9 | | - Redistributions of source code must retain the above copyright |
10 | | notice, this list of conditions and the following disclaimer. |
11 | | |
12 | | - Redistributions in binary form must reproduce the above copyright |
13 | | notice, this list of conditions and the following disclaimer in the |
14 | | documentation and/or other materials provided with the distribution. |
15 | | |
16 | | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
17 | | ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
18 | | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR |
19 | | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER |
20 | | OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, |
21 | | EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, |
22 | | PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR |
23 | | PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF |
24 | | LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING |
25 | | NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS |
26 | | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
27 | | */ |
28 | | |
29 | | #ifdef HAVE_CONFIG_H |
30 | | #include "config.h" |
31 | | #endif |
32 | | |
33 | | #include "mathops.h" |
34 | | #include "cwrs.h" |
35 | | #include "vq.h" |
36 | | #include "arch.h" |
37 | | #include "os_support.h" |
38 | | #include "bands.h" |
39 | | #include "rate.h" |
40 | | #include "pitch.h" |
41 | | #include "SigProc_FIX.h" |
42 | | |
43 | | #if defined(FIXED_POINT) |
44 | 108M | void norm_scaleup(celt_norm *X, int N, int shift) { |
45 | 108M | int i; |
46 | 108M | celt_assert(shift >= 0); |
47 | 108M | if (shift <= 0) return; |
48 | 1.30G | for (i=0;i<N;i++) X[i] = SHL32(X[i], shift); |
49 | 108M | } |
50 | | |
51 | 578M | void norm_scaledown(celt_norm *X, int N, int shift) { |
52 | 578M | int i; |
53 | 578M | celt_assert(shift >= 0); |
54 | 578M | if (shift <= 0) return; |
55 | 2.34G | for (i=0;i<N;i++) X[i] = PSHR32(X[i], shift); |
56 | 274M | } |
57 | | |
58 | 55.2M | opus_val32 celt_inner_prod_norm(const celt_norm *x, const celt_norm *y, int len, int arch) { |
59 | 55.2M | int i; |
60 | 55.2M | opus_val32 sum = 0; |
61 | 55.2M | (void)arch; |
62 | 682M | for (i=0;i<len;i++) sum += x[i]*y[i]; |
63 | 55.2M | return sum; |
64 | 55.2M | } |
65 | 1.19G | opus_val32 celt_inner_prod_norm_shift(const celt_norm *x, const celt_norm *y, int len, int arch) { |
66 | 1.19G | int i; |
67 | 1.19G | opus_val64 sum = 0; |
68 | 1.19G | (void)arch; |
69 | 10.4G | for (i=0;i<len;i++) sum += x[i]*(opus_val64)y[i]; |
70 | 1.19G | return sum>>2*(NORM_SHIFT-14); |
71 | 1.19G | } |
72 | | #endif |
73 | | |
74 | | #ifndef OVERRIDE_vq_exp_rotation1 |
75 | | static void exp_rotation1(celt_norm *X, int len, int stride, opus_val16 c, opus_val16 s) |
76 | 111M | { |
77 | 111M | int i; |
78 | 111M | opus_val16 ms; |
79 | 111M | celt_norm *Xptr; |
80 | 111M | Xptr = X; |
81 | 111M | ms = NEG16(s); |
82 | 111M | norm_scaledown(X, len, NORM_SHIFT-14); |
83 | 1.06G | for (i=0;i<len-stride;i++) |
84 | 949M | { |
85 | 949M | celt_norm x1, x2; |
86 | 949M | x1 = Xptr[0]; |
87 | 949M | x2 = Xptr[stride]; |
88 | 949M | Xptr[stride] = EXTRACT16(PSHR32(MAC16_16(MULT16_16(c, x2), s, x1), 15)); |
89 | 949M | *Xptr++ = EXTRACT16(PSHR32(MAC16_16(MULT16_16(c, x1), ms, x2), 15)); |
90 | 949M | } |
91 | 111M | Xptr = &X[len-2*stride-1]; |
92 | 869M | for (i=len-2*stride-1;i>=0;i--) |
93 | 757M | { |
94 | 757M | celt_norm x1, x2; |
95 | 757M | x1 = Xptr[0]; |
96 | 757M | x2 = Xptr[stride]; |
97 | 757M | Xptr[stride] = EXTRACT16(PSHR32(MAC16_16(MULT16_16(c, x2), s, x1), 15)); |
98 | 757M | *Xptr-- = EXTRACT16(PSHR32(MAC16_16(MULT16_16(c, x1), ms, x2), 15)); |
99 | 757M | } |
100 | 111M | norm_scaleup(X, len, NORM_SHIFT-14); |
101 | 111M | } Line | Count | Source | 76 | 55.9M | { | 77 | 55.9M | int i; | 78 | 55.9M | opus_val16 ms; | 79 | 55.9M | celt_norm *Xptr; | 80 | 55.9M | Xptr = X; | 81 | 55.9M | ms = NEG16(s); | 82 | 55.9M | norm_scaledown(X, len, NORM_SHIFT-14); | 83 | 530M | for (i=0;i<len-stride;i++) | 84 | 474M | { | 85 | 474M | celt_norm x1, x2; | 86 | 474M | x1 = Xptr[0]; | 87 | 474M | x2 = Xptr[stride]; | 88 | 474M | Xptr[stride] = EXTRACT16(PSHR32(MAC16_16(MULT16_16(c, x2), s, x1), 15)); | 89 | 474M | *Xptr++ = EXTRACT16(PSHR32(MAC16_16(MULT16_16(c, x1), ms, x2), 15)); | 90 | 474M | } | 91 | 55.9M | Xptr = &X[len-2*stride-1]; | 92 | 434M | for (i=len-2*stride-1;i>=0;i--) | 93 | 378M | { | 94 | 378M | celt_norm x1, x2; | 95 | 378M | x1 = Xptr[0]; | 96 | 378M | x2 = Xptr[stride]; | 97 | 378M | Xptr[stride] = EXTRACT16(PSHR32(MAC16_16(MULT16_16(c, x2), s, x1), 15)); | 98 | 378M | *Xptr-- = EXTRACT16(PSHR32(MAC16_16(MULT16_16(c, x1), ms, x2), 15)); | 99 | 378M | } | 100 | 55.9M | norm_scaleup(X, len, NORM_SHIFT-14); | 101 | 55.9M | } |
Line | Count | Source | 76 | 55.9M | { | 77 | 55.9M | int i; | 78 | 55.9M | opus_val16 ms; | 79 | 55.9M | celt_norm *Xptr; | 80 | 55.9M | Xptr = X; | 81 | 55.9M | ms = NEG16(s); | 82 | 55.9M | norm_scaledown(X, len, NORM_SHIFT-14); | 83 | 530M | for (i=0;i<len-stride;i++) | 84 | 474M | { | 85 | 474M | celt_norm x1, x2; | 86 | 474M | x1 = Xptr[0]; | 87 | 474M | x2 = Xptr[stride]; | 88 | 474M | Xptr[stride] = EXTRACT16(PSHR32(MAC16_16(MULT16_16(c, x2), s, x1), 15)); | 89 | 474M | *Xptr++ = EXTRACT16(PSHR32(MAC16_16(MULT16_16(c, x1), ms, x2), 15)); | 90 | 474M | } | 91 | 55.9M | Xptr = &X[len-2*stride-1]; | 92 | 434M | for (i=len-2*stride-1;i>=0;i--) | 93 | 378M | { | 94 | 378M | celt_norm x1, x2; | 95 | 378M | x1 = Xptr[0]; | 96 | 378M | x2 = Xptr[stride]; | 97 | 378M | Xptr[stride] = EXTRACT16(PSHR32(MAC16_16(MULT16_16(c, x2), s, x1), 15)); | 98 | 378M | *Xptr-- = EXTRACT16(PSHR32(MAC16_16(MULT16_16(c, x1), ms, x2), 15)); | 99 | 378M | } | 100 | 55.9M | norm_scaleup(X, len, NORM_SHIFT-14); | 101 | 55.9M | } |
|
102 | | #endif /* OVERRIDE_vq_exp_rotation1 */ |
103 | | |
104 | | void exp_rotation(celt_norm *X, int len, int dir, int stride, int K, int spread) |
105 | 1.05G | { |
106 | 1.05G | static const int SPREAD_FACTOR[3]={15,10,5}; |
107 | 1.05G | int i; |
108 | 1.05G | opus_val16 c, s; |
109 | 1.05G | opus_val16 gain, theta; |
110 | 1.05G | int stride2=0; |
111 | 1.05G | int factor; |
112 | | |
113 | 1.05G | if (2*K>=len || spread==SPREAD_NONE) |
114 | 1.00G | return; |
115 | 42.6M | factor = SPREAD_FACTOR[spread-1]; |
116 | | |
117 | 42.6M | gain = celt_div((opus_val32)MULT16_16(Q15_ONE,len),(opus_val32)(len+factor*K)); |
118 | 42.6M | theta = HALF16(MULT16_16_Q15(gain,gain)); |
119 | | |
120 | 42.6M | c = celt_cos_norm(EXTEND32(theta)); |
121 | 42.6M | s = celt_cos_norm(EXTEND32(SUB16(Q15ONE,theta))); /* sin(theta) */ |
122 | | |
123 | 42.6M | if (len>=8*stride) |
124 | 30.6M | { |
125 | 30.6M | stride2 = 1; |
126 | | /* This is just a simple (equivalent) way of computing sqrt(len/stride) with rounding. |
127 | | It's basically incrementing long as (stride2+0.5)^2 < len/stride. */ |
128 | 120M | while ((stride2*stride2+stride2)*stride + (stride>>2) < len) |
129 | 89.4M | stride2++; |
130 | 30.6M | } |
131 | | /*NOTE: As a minor optimization, we could be passing around log2(B), not B, for both this and for |
132 | | extract_collapse_mask().*/ |
133 | 42.6M | len = celt_udiv(len, stride); |
134 | 121M | for (i=0;i<stride;i++) |
135 | 78.7M | { |
136 | 78.7M | if (dir < 0) |
137 | 7.70M | { |
138 | 7.70M | if (stride2) |
139 | 1.11M | exp_rotation1(X+i*len, len, stride2, s, c); |
140 | 7.70M | exp_rotation1(X+i*len, len, 1, c, s); |
141 | 71.0M | } else { |
142 | 71.0M | exp_rotation1(X+i*len, len, 1, c, -s); |
143 | 71.0M | if (stride2) |
144 | 32.1M | exp_rotation1(X+i*len, len, stride2, s, -c); |
145 | 71.0M | } |
146 | 78.7M | } |
147 | 42.6M | } Line | Count | Source | 105 | 525M | { | 106 | 525M | static const int SPREAD_FACTOR[3]={15,10,5}; | 107 | 525M | int i; | 108 | 525M | opus_val16 c, s; | 109 | 525M | opus_val16 gain, theta; | 110 | 525M | int stride2=0; | 111 | 525M | int factor; | 112 | | | 113 | 525M | if (2*K>=len || spread==SPREAD_NONE) | 114 | 504M | return; | 115 | 21.3M | factor = SPREAD_FACTOR[spread-1]; | 116 | | | 117 | 21.3M | gain = celt_div((opus_val32)MULT16_16(Q15_ONE,len),(opus_val32)(len+factor*K)); | 118 | 21.3M | theta = HALF16(MULT16_16_Q15(gain,gain)); | 119 | | | 120 | 21.3M | c = celt_cos_norm(EXTEND32(theta)); | 121 | 21.3M | s = celt_cos_norm(EXTEND32(SUB16(Q15ONE,theta))); /* sin(theta) */ | 122 | | | 123 | 21.3M | if (len>=8*stride) | 124 | 15.3M | { | 125 | 15.3M | stride2 = 1; | 126 | | /* This is just a simple (equivalent) way of computing sqrt(len/stride) with rounding. | 127 | | It's basically incrementing long as (stride2+0.5)^2 < len/stride. */ | 128 | 60.0M | while ((stride2*stride2+stride2)*stride + (stride>>2) < len) | 129 | 44.7M | stride2++; | 130 | 15.3M | } | 131 | | /*NOTE: As a minor optimization, we could be passing around log2(B), not B, for both this and for | 132 | | extract_collapse_mask().*/ | 133 | 21.3M | len = celt_udiv(len, stride); | 134 | 60.6M | for (i=0;i<stride;i++) | 135 | 39.3M | { | 136 | 39.3M | if (dir < 0) | 137 | 3.85M | { | 138 | 3.85M | if (stride2) | 139 | 556k | exp_rotation1(X+i*len, len, stride2, s, c); | 140 | 3.85M | exp_rotation1(X+i*len, len, 1, c, s); | 141 | 35.5M | } else { | 142 | 35.5M | exp_rotation1(X+i*len, len, 1, c, -s); | 143 | 35.5M | if (stride2) | 144 | 16.0M | exp_rotation1(X+i*len, len, stride2, s, -c); | 145 | 35.5M | } | 146 | 39.3M | } | 147 | 21.3M | } |
Line | Count | Source | 105 | 525M | { | 106 | 525M | static const int SPREAD_FACTOR[3]={15,10,5}; | 107 | 525M | int i; | 108 | 525M | opus_val16 c, s; | 109 | 525M | opus_val16 gain, theta; | 110 | 525M | int stride2=0; | 111 | 525M | int factor; | 112 | | | 113 | 525M | if (2*K>=len || spread==SPREAD_NONE) | 114 | 504M | return; | 115 | 21.3M | factor = SPREAD_FACTOR[spread-1]; | 116 | | | 117 | 21.3M | gain = celt_div((opus_val32)MULT16_16(Q15_ONE,len),(opus_val32)(len+factor*K)); | 118 | 21.3M | theta = HALF16(MULT16_16_Q15(gain,gain)); | 119 | | | 120 | 21.3M | c = celt_cos_norm(EXTEND32(theta)); | 121 | 21.3M | s = celt_cos_norm(EXTEND32(SUB16(Q15ONE,theta))); /* sin(theta) */ | 122 | | | 123 | 21.3M | if (len>=8*stride) | 124 | 15.3M | { | 125 | 15.3M | stride2 = 1; | 126 | | /* This is just a simple (equivalent) way of computing sqrt(len/stride) with rounding. | 127 | | It's basically incrementing long as (stride2+0.5)^2 < len/stride. */ | 128 | 60.0M | while ((stride2*stride2+stride2)*stride + (stride>>2) < len) | 129 | 44.7M | stride2++; | 130 | 15.3M | } | 131 | | /*NOTE: As a minor optimization, we could be passing around log2(B), not B, for both this and for | 132 | | extract_collapse_mask().*/ | 133 | 21.3M | len = celt_udiv(len, stride); | 134 | 60.6M | for (i=0;i<stride;i++) | 135 | 39.3M | { | 136 | 39.3M | if (dir < 0) | 137 | 3.85M | { | 138 | 3.85M | if (stride2) | 139 | 556k | exp_rotation1(X+i*len, len, stride2, s, c); | 140 | 3.85M | exp_rotation1(X+i*len, len, 1, c, s); | 141 | 35.5M | } else { | 142 | 35.5M | exp_rotation1(X+i*len, len, 1, c, -s); | 143 | 35.5M | if (stride2) | 144 | 16.0M | exp_rotation1(X+i*len, len, stride2, s, -c); | 145 | 35.5M | } | 146 | 39.3M | } | 147 | 21.3M | } |
|
148 | | |
149 | | /** Normalizes the decoded integer pvq codeword to unit norm. */ |
150 | | static void normalise_residual(int * OPUS_RESTRICT iy, celt_norm * OPUS_RESTRICT X, |
151 | | int N, opus_val32 Ryy, opus_val32 gain, int shift) |
152 | 53.0M | { |
153 | 53.0M | int i; |
154 | | #ifdef FIXED_POINT |
155 | | int k; |
156 | | #endif |
157 | 53.0M | opus_val32 t; |
158 | 53.0M | opus_val32 g; |
159 | | |
160 | | #ifdef FIXED_POINT |
161 | | k = celt_ilog2(Ryy)>>1; |
162 | | #endif |
163 | 53.0M | t = VSHR32(Ryy, 2*(k-7)-15); |
164 | 53.0M | g = MULT32_32_Q31(celt_rsqrt_norm32(t),gain); |
165 | 53.0M | i=0; |
166 | 53.0M | (void)shift; |
167 | | #if defined(FIXED_POINT) && defined(ENABLE_QEXT) |
168 | 7.64M | if (shift>0) { |
169 | 10.1k | int tot_shift = NORM_SHIFT+1-k-shift; |
170 | 10.1k | if (tot_shift >= 0) { |
171 | 39.0k | do X[i] = MULT32_32_Q31(g, SHL32(iy[i], tot_shift)); |
172 | 39.0k | while (++i < N); |
173 | 10.1k | } else { |
174 | 0 | do X[i] = MULT32_32_Q31(g, PSHR32(iy[i], -tot_shift)); |
175 | 0 | while (++i < N); |
176 | 0 | } |
177 | 10.1k | } else |
178 | 7.63M | #endif |
179 | 376M | do X[i] = VSHR32(MULT16_32_Q15(iy[i], g), k+15-NORM_SHIFT); |
180 | 376M | while (++i < N); |
181 | 53.0M | } Line | Count | Source | 152 | 44.0M | { | 153 | 44.0M | int i; | 154 | 44.0M | #ifdef FIXED_POINT | 155 | 44.0M | int k; | 156 | 44.0M | #endif | 157 | 44.0M | opus_val32 t; | 158 | 44.0M | opus_val32 g; | 159 | | | 160 | 44.0M | #ifdef FIXED_POINT | 161 | 44.0M | k = celt_ilog2(Ryy)>>1; | 162 | 44.0M | #endif | 163 | 44.0M | t = VSHR32(Ryy, 2*(k-7)-15); | 164 | 44.0M | g = MULT32_32_Q31(celt_rsqrt_norm32(t),gain); | 165 | 44.0M | i=0; | 166 | 44.0M | (void)shift; | 167 | | #if defined(FIXED_POINT) && defined(ENABLE_QEXT) | 168 | | if (shift>0) { | 169 | | int tot_shift = NORM_SHIFT+1-k-shift; | 170 | | if (tot_shift >= 0) { | 171 | | do X[i] = MULT32_32_Q31(g, SHL32(iy[i], tot_shift)); | 172 | | while (++i < N); | 173 | | } else { | 174 | | do X[i] = MULT32_32_Q31(g, PSHR32(iy[i], -tot_shift)); | 175 | | while (++i < N); | 176 | | } | 177 | | } else | 178 | | #endif | 179 | 299M | do X[i] = VSHR32(MULT16_32_Q15(iy[i], g), k+15-NORM_SHIFT); | 180 | 299M | while (++i < N); | 181 | 44.0M | } |
Line | Count | Source | 152 | 7.64M | { | 153 | 7.64M | int i; | 154 | 7.64M | #ifdef FIXED_POINT | 155 | 7.64M | int k; | 156 | 7.64M | #endif | 157 | 7.64M | opus_val32 t; | 158 | 7.64M | opus_val32 g; | 159 | | | 160 | 7.64M | #ifdef FIXED_POINT | 161 | 7.64M | k = celt_ilog2(Ryy)>>1; | 162 | 7.64M | #endif | 163 | 7.64M | t = VSHR32(Ryy, 2*(k-7)-15); | 164 | 7.64M | g = MULT32_32_Q31(celt_rsqrt_norm32(t),gain); | 165 | 7.64M | i=0; | 166 | 7.64M | (void)shift; | 167 | 7.64M | #if defined(FIXED_POINT) && defined(ENABLE_QEXT) | 168 | 7.64M | if (shift>0) { | 169 | 10.1k | int tot_shift = NORM_SHIFT+1-k-shift; | 170 | 10.1k | if (tot_shift >= 0) { | 171 | 39.0k | do X[i] = MULT32_32_Q31(g, SHL32(iy[i], tot_shift)); | 172 | 39.0k | while (++i < N); | 173 | 10.1k | } else { | 174 | 0 | do X[i] = MULT32_32_Q31(g, PSHR32(iy[i], -tot_shift)); | 175 | 0 | while (++i < N); | 176 | 0 | } | 177 | 10.1k | } else | 178 | 7.63M | #endif | 179 | 68.6M | do X[i] = VSHR32(MULT16_32_Q15(iy[i], g), k+15-NORM_SHIFT); | 180 | 68.6M | while (++i < N); | 181 | 7.64M | } |
Line | Count | Source | 152 | 1.42M | { | 153 | 1.42M | int i; | 154 | | #ifdef FIXED_POINT | 155 | | int k; | 156 | | #endif | 157 | 1.42M | opus_val32 t; | 158 | 1.42M | opus_val32 g; | 159 | | | 160 | | #ifdef FIXED_POINT | 161 | | k = celt_ilog2(Ryy)>>1; | 162 | | #endif | 163 | 1.42M | t = VSHR32(Ryy, 2*(k-7)-15); | 164 | 1.42M | g = MULT32_32_Q31(celt_rsqrt_norm32(t),gain); | 165 | 1.42M | i=0; | 166 | 1.42M | (void)shift; | 167 | | #if defined(FIXED_POINT) && defined(ENABLE_QEXT) | 168 | | if (shift>0) { | 169 | | int tot_shift = NORM_SHIFT+1-k-shift; | 170 | | if (tot_shift >= 0) { | 171 | | do X[i] = MULT32_32_Q31(g, SHL32(iy[i], tot_shift)); | 172 | | while (++i < N); | 173 | | } else { | 174 | | do X[i] = MULT32_32_Q31(g, PSHR32(iy[i], -tot_shift)); | 175 | | while (++i < N); | 176 | | } | 177 | | } else | 178 | | #endif | 179 | 7.35M | do X[i] = VSHR32(MULT16_32_Q15(iy[i], g), k+15-NORM_SHIFT); | 180 | 7.35M | while (++i < N); | 181 | 1.42M | } |
|
182 | | |
183 | | static unsigned extract_collapse_mask(int *iy, int N, int B) |
184 | 474M | { |
185 | 474M | unsigned collapse_mask; |
186 | 474M | int N0; |
187 | 474M | int i; |
188 | 474M | if (B<=1) |
189 | 452M | return 1; |
190 | | /*NOTE: As a minor optimization, we could be passing around log2(B), not B, for both this and for |
191 | | exp_rotation().*/ |
192 | 21.9M | N0 = celt_udiv(N, B); |
193 | 21.9M | collapse_mask = 0; |
194 | 82.8M | i=0; do { |
195 | 82.8M | int j; |
196 | 82.8M | unsigned tmp=0; |
197 | 170M | j=0; do { |
198 | 170M | tmp |= iy[i*N0+j]; |
199 | 170M | } while (++j<N0); |
200 | 82.8M | collapse_mask |= (tmp!=0)<<i; |
201 | 82.8M | } while (++i<B); |
202 | 21.9M | return collapse_mask; |
203 | 474M | } |
204 | | |
205 | | opus_val16 op_pvq_search_c(celt_norm *X, int *iy, int K, int N, int arch) |
206 | 469M | { |
207 | 469M | VARDECL(celt_norm, y); |
208 | 469M | VARDECL(int, signx); |
209 | 469M | int i, j; |
210 | 469M | int pulsesLeft; |
211 | 469M | opus_val32 sum; |
212 | 469M | opus_val32 xy; |
213 | 469M | opus_val16 yy; |
214 | 469M | SAVE_STACK; |
215 | | |
216 | 469M | (void)arch; |
217 | 469M | ALLOC(y, N, celt_norm); |
218 | 469M | ALLOC(signx, N, int); |
219 | | #ifdef FIXED_POINT |
220 | | { |
221 | | int shift = (celt_ilog2(1+celt_inner_prod_norm_shift(X, X, N, arch))+1)/2; |
222 | 469M | shift = IMAX(0, shift+(NORM_SHIFT-14)-14); |
223 | | norm_scaledown(X, N, shift); |
224 | | } |
225 | | #endif |
226 | | /* Get rid of the sign */ |
227 | 469M | sum = 0; |
228 | 2.88G | j=0; do { |
229 | 2.88G | signx[j] = X[j]<0; |
230 | | /* OPT: Make sure the compiler doesn't use a branch on ABS16(). */ |
231 | 2.88G | X[j] = ABS16(X[j]); |
232 | 2.88G | iy[j] = 0; |
233 | 2.88G | y[j] = 0; |
234 | 2.88G | } while (++j<N); |
235 | | |
236 | 469M | xy = yy = 0; |
237 | | |
238 | 469M | pulsesLeft = K; |
239 | | |
240 | | /* Do a pre-search by projecting on the pyramid */ |
241 | 469M | if (K > (N>>1)) |
242 | 332M | { |
243 | 332M | opus_val16 rcp; |
244 | 1.48G | j=0; do { |
245 | 1.48G | sum += X[j]; |
246 | 1.48G | } while (++j<N); |
247 | | |
248 | | /* If X is too small, just replace it with a pulse at 0 */ |
249 | | #ifdef FIXED_POINT |
250 | 332M | if (sum <= K) |
251 | | #else |
252 | | /* Prevents infinities and NaNs from causing too many pulses |
253 | | to be allocated. 64 is an approximation of infinity here. */ |
254 | 0 | if (!(sum > EPSILON && sum < 64)) |
255 | 0 | #endif |
256 | 187M | { |
257 | 187M | X[0] = QCONST16(1.f,14); |
258 | 187M | j=1; do |
259 | 686M | X[j]=0; |
260 | 686M | while (++j<N); |
261 | 187M | sum = QCONST16(1.f,14); |
262 | 187M | } |
263 | | #ifdef FIXED_POINT |
264 | 332M | rcp = EXTRACT16(MULT16_32_Q16(K, celt_rcp(sum))); |
265 | | #else |
266 | | /* Using K+e with e < 1 guarantees we cannot get more than K pulses. */ |
267 | 0 | rcp = EXTRACT16(MULT16_32_Q16(K+0.8f, celt_rcp(sum))); |
268 | | #endif |
269 | 1.48G | j=0; do { |
270 | | #ifdef FIXED_POINT |
271 | | /* It's really important to round *towards zero* here */ |
272 | 1.48G | iy[j] = MULT16_16_Q15(X[j],rcp); |
273 | | #else |
274 | | iy[j] = (int)floor(rcp*X[j]); |
275 | | #endif |
276 | 1.48G | y[j] = (celt_norm)iy[j]; |
277 | 1.48G | yy = MAC16_16(yy, y[j],y[j]); |
278 | 1.48G | xy = MAC16_16(xy, X[j],y[j]); |
279 | 1.48G | y[j] *= 2; |
280 | 1.48G | pulsesLeft -= iy[j]; |
281 | 1.48G | } while (++j<N); |
282 | 332M | } |
283 | 469M | celt_sig_assert(pulsesLeft>=0); |
284 | | |
285 | | /* This should never happen, but just in case it does (e.g. on silence) |
286 | | we fill the first bin with pulses. */ |
287 | | #ifdef FIXED_POINT_DEBUG |
288 | | celt_sig_assert(pulsesLeft<=N+3); |
289 | | #endif |
290 | 469M | if (pulsesLeft > N+3) |
291 | 0 | { |
292 | 0 | opus_val16 tmp = (opus_val16)pulsesLeft; |
293 | 0 | yy = MAC16_16(yy, tmp, tmp); |
294 | 0 | yy = MAC16_16(yy, tmp, y[0]); |
295 | 0 | iy[0] += pulsesLeft; |
296 | 0 | pulsesLeft=0; |
297 | 0 | } |
298 | | |
299 | 1.30G | for (i=0;i<pulsesLeft;i++) |
300 | 830M | { |
301 | 830M | opus_val16 Rxy, Ryy; |
302 | 830M | int best_id; |
303 | 830M | opus_val32 best_num; |
304 | 830M | opus_val16 best_den; |
305 | | #ifdef FIXED_POINT |
306 | | int rshift; |
307 | | #endif |
308 | | #ifdef FIXED_POINT |
309 | | rshift = 1+celt_ilog2(K-pulsesLeft+i+1); |
310 | | #endif |
311 | 830M | best_id = 0; |
312 | | /* The squared magnitude term gets added anyway, so we might as well |
313 | | add it outside the loop */ |
314 | 830M | yy = ADD16(yy, 1); |
315 | | |
316 | | /* Calculations for position 0 are out of the loop, in part to reduce |
317 | | mispredicted branches (since the if condition is usually false) |
318 | | in the loop. */ |
319 | | /* Temporary sums of the new pulse(s) */ |
320 | 830M | Rxy = EXTRACT16(SHR32(ADD32(xy, EXTEND32(X[0])),rshift)); |
321 | | /* We're multiplying y[j] by two so we don't have to do it here */ |
322 | 830M | Ryy = ADD16(yy, y[0]); |
323 | | |
324 | | /* Approximate score: we maximise Rxy/sqrt(Ryy) (we're guaranteed that |
325 | | Rxy is positive because the sign is pre-computed) */ |
326 | 830M | Rxy = MULT16_16_Q15(Rxy,Rxy); |
327 | 830M | best_den = Ryy; |
328 | 830M | best_num = Rxy; |
329 | 830M | j=1; |
330 | 6.50G | do { |
331 | | /* Temporary sums of the new pulse(s) */ |
332 | 6.50G | Rxy = EXTRACT16(SHR32(ADD32(xy, EXTEND32(X[j])),rshift)); |
333 | | /* We're multiplying y[j] by two so we don't have to do it here */ |
334 | 6.50G | Ryy = ADD16(yy, y[j]); |
335 | | |
336 | | /* Approximate score: we maximise Rxy/sqrt(Ryy) (we're guaranteed that |
337 | | Rxy is positive because the sign is pre-computed) */ |
338 | 6.50G | Rxy = MULT16_16_Q15(Rxy,Rxy); |
339 | | /* The idea is to check for num/den >= best_num/best_den, but that way |
340 | | we can do it without any division */ |
341 | | /* OPT: It's not clear whether a cmov is faster than a branch here |
342 | | since the condition is more often false than true and using |
343 | | a cmov introduces data dependencies across iterations. The optimal |
344 | | choice may be architecture-dependent. */ |
345 | 6.50G | if (opus_unlikely(MULT16_16(best_den, Rxy) > MULT16_16(Ryy, best_num))) |
346 | 471M | { |
347 | 471M | best_den = Ryy; |
348 | 471M | best_num = Rxy; |
349 | 471M | best_id = j; |
350 | 471M | } |
351 | 6.50G | } while (++j<N); |
352 | | |
353 | | /* Updating the sums of the new pulse(s) */ |
354 | 830M | xy = ADD32(xy, EXTEND32(X[best_id])); |
355 | | /* We're multiplying y[j] by two so we don't have to do it here */ |
356 | 830M | yy = ADD16(yy, y[best_id]); |
357 | | |
358 | | /* Only now that we've made the final choice, update y/iy */ |
359 | | /* Multiplying y[j] by 2 so we don't have to do it everywhere else */ |
360 | 830M | y[best_id] += 2; |
361 | 830M | iy[best_id]++; |
362 | 830M | } |
363 | | |
364 | | /* Put the original sign back */ |
365 | 469M | j=0; |
366 | 2.88G | do { |
367 | | /*iy[j] = signx[j] ? -iy[j] : iy[j];*/ |
368 | | /* OPT: The is more likely to be compiled without a branch than the code above |
369 | | but has the same performance otherwise. */ |
370 | 2.88G | iy[j] = (iy[j]^-signx[j]) + signx[j]; |
371 | 2.88G | } while (++j<N); |
372 | 469M | RESTORE_STACK; |
373 | 469M | return yy; |
374 | 469M | } Line | Count | Source | 206 | 469M | { | 207 | 469M | VARDECL(celt_norm, y); | 208 | 469M | VARDECL(int, signx); | 209 | 469M | int i, j; | 210 | 469M | int pulsesLeft; | 211 | 469M | opus_val32 sum; | 212 | 469M | opus_val32 xy; | 213 | 469M | opus_val16 yy; | 214 | 469M | SAVE_STACK; | 215 | | | 216 | 469M | (void)arch; | 217 | 469M | ALLOC(y, N, celt_norm); | 218 | 469M | ALLOC(signx, N, int); | 219 | 469M | #ifdef FIXED_POINT | 220 | 469M | { | 221 | 469M | int shift = (celt_ilog2(1+celt_inner_prod_norm_shift(X, X, N, arch))+1)/2; | 222 | 469M | shift = IMAX(0, shift+(NORM_SHIFT-14)-14); | 223 | 469M | norm_scaledown(X, N, shift); | 224 | 469M | } | 225 | 469M | #endif | 226 | | /* Get rid of the sign */ | 227 | 469M | sum = 0; | 228 | 2.88G | j=0; do { | 229 | 2.88G | signx[j] = X[j]<0; | 230 | | /* OPT: Make sure the compiler doesn't use a branch on ABS16(). */ | 231 | 2.88G | X[j] = ABS16(X[j]); | 232 | 2.88G | iy[j] = 0; | 233 | 2.88G | y[j] = 0; | 234 | 2.88G | } while (++j<N); | 235 | | | 236 | 469M | xy = yy = 0; | 237 | | | 238 | 469M | pulsesLeft = K; | 239 | | | 240 | | /* Do a pre-search by projecting on the pyramid */ | 241 | 469M | if (K > (N>>1)) | 242 | 332M | { | 243 | 332M | opus_val16 rcp; | 244 | 1.48G | j=0; do { | 245 | 1.48G | sum += X[j]; | 246 | 1.48G | } while (++j<N); | 247 | | | 248 | | /* If X is too small, just replace it with a pulse at 0 */ | 249 | 332M | #ifdef FIXED_POINT | 250 | 332M | if (sum <= K) | 251 | | #else | 252 | | /* Prevents infinities and NaNs from causing too many pulses | 253 | | to be allocated. 64 is an approximation of infinity here. */ | 254 | | if (!(sum > EPSILON && sum < 64)) | 255 | | #endif | 256 | 187M | { | 257 | 187M | X[0] = QCONST16(1.f,14); | 258 | 187M | j=1; do | 259 | 686M | X[j]=0; | 260 | 686M | while (++j<N); | 261 | 187M | sum = QCONST16(1.f,14); | 262 | 187M | } | 263 | 332M | #ifdef FIXED_POINT | 264 | 332M | rcp = EXTRACT16(MULT16_32_Q16(K, celt_rcp(sum))); | 265 | | #else | 266 | | /* Using K+e with e < 1 guarantees we cannot get more than K pulses. */ | 267 | | rcp = EXTRACT16(MULT16_32_Q16(K+0.8f, celt_rcp(sum))); | 268 | | #endif | 269 | 1.48G | j=0; do { | 270 | 1.48G | #ifdef FIXED_POINT | 271 | | /* It's really important to round *towards zero* here */ | 272 | 1.48G | iy[j] = MULT16_16_Q15(X[j],rcp); | 273 | | #else | 274 | | iy[j] = (int)floor(rcp*X[j]); | 275 | | #endif | 276 | 1.48G | y[j] = (celt_norm)iy[j]; | 277 | 1.48G | yy = MAC16_16(yy, y[j],y[j]); | 278 | 1.48G | xy = MAC16_16(xy, X[j],y[j]); | 279 | 1.48G | y[j] *= 2; | 280 | 1.48G | pulsesLeft -= iy[j]; | 281 | 1.48G | } while (++j<N); | 282 | 332M | } | 283 | 469M | celt_sig_assert(pulsesLeft>=0); | 284 | | | 285 | | /* This should never happen, but just in case it does (e.g. on silence) | 286 | | we fill the first bin with pulses. */ | 287 | | #ifdef FIXED_POINT_DEBUG | 288 | | celt_sig_assert(pulsesLeft<=N+3); | 289 | | #endif | 290 | 469M | if (pulsesLeft > N+3) | 291 | 0 | { | 292 | 0 | opus_val16 tmp = (opus_val16)pulsesLeft; | 293 | 0 | yy = MAC16_16(yy, tmp, tmp); | 294 | 0 | yy = MAC16_16(yy, tmp, y[0]); | 295 | 0 | iy[0] += pulsesLeft; | 296 | 0 | pulsesLeft=0; | 297 | 0 | } | 298 | | | 299 | 1.30G | for (i=0;i<pulsesLeft;i++) | 300 | 830M | { | 301 | 830M | opus_val16 Rxy, Ryy; | 302 | 830M | int best_id; | 303 | 830M | opus_val32 best_num; | 304 | 830M | opus_val16 best_den; | 305 | 830M | #ifdef FIXED_POINT | 306 | 830M | int rshift; | 307 | 830M | #endif | 308 | 830M | #ifdef FIXED_POINT | 309 | 830M | rshift = 1+celt_ilog2(K-pulsesLeft+i+1); | 310 | 830M | #endif | 311 | 830M | best_id = 0; | 312 | | /* The squared magnitude term gets added anyway, so we might as well | 313 | | add it outside the loop */ | 314 | 830M | yy = ADD16(yy, 1); | 315 | | | 316 | | /* Calculations for position 0 are out of the loop, in part to reduce | 317 | | mispredicted branches (since the if condition is usually false) | 318 | | in the loop. */ | 319 | | /* Temporary sums of the new pulse(s) */ | 320 | 830M | Rxy = EXTRACT16(SHR32(ADD32(xy, EXTEND32(X[0])),rshift)); | 321 | | /* We're multiplying y[j] by two so we don't have to do it here */ | 322 | 830M | Ryy = ADD16(yy, y[0]); | 323 | | | 324 | | /* Approximate score: we maximise Rxy/sqrt(Ryy) (we're guaranteed that | 325 | | Rxy is positive because the sign is pre-computed) */ | 326 | 830M | Rxy = MULT16_16_Q15(Rxy,Rxy); | 327 | 830M | best_den = Ryy; | 328 | 830M | best_num = Rxy; | 329 | 830M | j=1; | 330 | 6.50G | do { | 331 | | /* Temporary sums of the new pulse(s) */ | 332 | 6.50G | Rxy = EXTRACT16(SHR32(ADD32(xy, EXTEND32(X[j])),rshift)); | 333 | | /* We're multiplying y[j] by two so we don't have to do it here */ | 334 | 6.50G | Ryy = ADD16(yy, y[j]); | 335 | | | 336 | | /* Approximate score: we maximise Rxy/sqrt(Ryy) (we're guaranteed that | 337 | | Rxy is positive because the sign is pre-computed) */ | 338 | 6.50G | Rxy = MULT16_16_Q15(Rxy,Rxy); | 339 | | /* The idea is to check for num/den >= best_num/best_den, but that way | 340 | | we can do it without any division */ | 341 | | /* OPT: It's not clear whether a cmov is faster than a branch here | 342 | | since the condition is more often false than true and using | 343 | | a cmov introduces data dependencies across iterations. The optimal | 344 | | choice may be architecture-dependent. */ | 345 | 6.50G | if (opus_unlikely(MULT16_16(best_den, Rxy) > MULT16_16(Ryy, best_num))) | 346 | 471M | { | 347 | 471M | best_den = Ryy; | 348 | 471M | best_num = Rxy; | 349 | 471M | best_id = j; | 350 | 471M | } | 351 | 6.50G | } while (++j<N); | 352 | | | 353 | | /* Updating the sums of the new pulse(s) */ | 354 | 830M | xy = ADD32(xy, EXTEND32(X[best_id])); | 355 | | /* We're multiplying y[j] by two so we don't have to do it here */ | 356 | 830M | yy = ADD16(yy, y[best_id]); | 357 | | | 358 | | /* Only now that we've made the final choice, update y/iy */ | 359 | | /* Multiplying y[j] by 2 so we don't have to do it everywhere else */ | 360 | 830M | y[best_id] += 2; | 361 | 830M | iy[best_id]++; | 362 | 830M | } | 363 | | | 364 | | /* Put the original sign back */ | 365 | 469M | j=0; | 366 | 2.88G | do { | 367 | | /*iy[j] = signx[j] ? -iy[j] : iy[j];*/ | 368 | | /* OPT: The is more likely to be compiled without a branch than the code above | 369 | | but has the same performance otherwise. */ | 370 | 2.88G | iy[j] = (iy[j]^-signx[j]) + signx[j]; | 371 | 2.88G | } while (++j<N); | 372 | 469M | RESTORE_STACK; | 373 | 469M | return yy; | 374 | 469M | } |
Unexecuted instantiation: op_pvq_search_c |
375 | | |
376 | | #ifdef ENABLE_QEXT |
377 | | #include "macros.h" |
378 | | |
379 | 303k | static opus_val32 op_pvq_search_N2(const celt_norm *X, int *iy, int *up_iy, int K, int up, int *refine, int shift) { |
380 | 303k | opus_val32 sum; |
381 | 303k | opus_val32 rcp_sum; |
382 | 303k | int offset; |
383 | 303k | sum = ABS32(X[0]) + ABS32(X[1]); |
384 | 303k | if (sum < EPSILON) { |
385 | 100k | iy[0] = K; |
386 | 100k | up_iy[0] = up*K; |
387 | 100k | iy[1]=up_iy[1]=0; |
388 | 100k | *refine=0; |
389 | 100k | #ifdef FIXED_POINT |
390 | 100k | return (opus_val64)K*K*up*up>>2*shift; |
391 | | #else |
392 | | (void)shift; |
393 | | return K*(float)K*up*up; |
394 | | #endif |
395 | 100k | } |
396 | 202k | #ifdef FIXED_POINT |
397 | 202k | int sum_shift; |
398 | 202k | opus_val32 X0; |
399 | 202k | sum_shift = 30-celt_ilog2(sum); |
400 | 202k | rcp_sum = celt_rcp_norm32(SHL32(sum, sum_shift)); |
401 | 202k | X0 = MULT32_32_Q31(SHL32(X[0], sum_shift), rcp_sum); |
402 | 202k | iy[0] = PSHR32(MULT32_32_Q31(SHL32(K, 8), X0), 7); |
403 | 202k | up_iy[0] = PSHR32(MULT32_32_Q31(SHL32(up*K, 8), X0), 7); |
404 | | #else |
405 | | rcp_sum = 1.f/sum; |
406 | | iy[0] = (int)floor(.5f+K*X[0]*rcp_sum); |
407 | | up_iy[0] = (int)floor(.5f+up*K*X[0]*rcp_sum); |
408 | | #endif |
409 | 202k | up_iy[0] = IMAX(up*iy[0] - (up-1)/2, IMIN(up*iy[0] + (up-1)/2, up_iy[0])); |
410 | 202k | offset = up_iy[0] - up*iy[0]; |
411 | 202k | iy[1] = K-abs(iy[0]); |
412 | 202k | up_iy[1] = up*K-abs(up_iy[0]); |
413 | 202k | if (X[1] < 0) { |
414 | 178k | iy[1] = -iy[1]; |
415 | 178k | up_iy[1] = -up_iy[1]; |
416 | 178k | offset = -offset; |
417 | 178k | } |
418 | 202k | *refine = offset; |
419 | 202k | #ifdef FIXED_POINT |
420 | 202k | return (up_iy[0]*(opus_val64)up_iy[0] + up_iy[1]*(opus_val64)up_iy[1] + (1<<2*shift>>1))>>2*shift; |
421 | | #else |
422 | | return up_iy[0]*(opus_val64)up_iy[0] + up_iy[1]*(opus_val64)up_iy[1]; |
423 | | #endif |
424 | 303k | } |
425 | | |
426 | 1.43M | static int op_pvq_refine(const opus_val32 *Xn, int *iy, int *iy0, int K, int up, int margin, int N) { |
427 | 1.43M | int i; |
428 | 1.43M | int dir; |
429 | 1.43M | VARDECL(opus_val32, rounding); |
430 | 1.43M | int iysum = 0; |
431 | 1.43M | SAVE_STACK; |
432 | 1.43M | ALLOC(rounding, N, opus_val32); |
433 | 16.2M | for (i=0;i<N;i++) { |
434 | 14.8M | opus_val32 tmp; |
435 | 14.8M | tmp = MULT32_32_Q31(SHL32(K, 8), Xn[i]); |
436 | | #ifdef FIXED_POINT |
437 | | iy[i] = (tmp+64) >> 7; |
438 | | #else |
439 | | iy[i] = (int)floor(.5+tmp); |
440 | | #endif |
441 | 14.8M | rounding[i] = tmp - SHL32(iy[i], 7); |
442 | 14.8M | } |
443 | 1.43M | if (iy != iy0) { |
444 | 8.12M | for (i=0;i<N;i++) iy[i] = IMIN(up*iy0[i]+up-1, IMAX(up*iy0[i]-up+1, iy[i])); |
445 | 719k | } |
446 | 16.2M | for (i=0;i<N;i++) iysum += iy[i]; |
447 | 1.43M | if (abs(iysum - K) > 32) { |
448 | 0 | RESTORE_STACK; |
449 | 0 | return 1; |
450 | 0 | } |
451 | 1.43M | dir = iysum < K ? 1 : -1; |
452 | 2.78M | while (iysum != K) { |
453 | 1.34M | opus_val32 roundval=-1000000*dir; |
454 | 1.34M | int roundpos=0; |
455 | 20.7M | for (i=0;i<N;i++) { |
456 | 19.4M | if ((rounding[i]-roundval)*dir > 0 && abs(iy[i]-up*iy0[i]) < (margin-1) && !(dir==-1 && iy[i] == 0)) { |
457 | 3.51M | roundval = rounding[i]; |
458 | 3.51M | roundpos = i; |
459 | 3.51M | } |
460 | 19.4M | } |
461 | 1.34M | iy[roundpos] += dir; |
462 | 1.34M | rounding[roundpos] -= SHL32(dir, 15); |
463 | 1.34M | iysum+=dir; |
464 | 1.34M | } |
465 | 1.43M | RESTORE_STACK; |
466 | 1.43M | return 0; |
467 | 1.43M | } Line | Count | Source | 426 | 719k | static int op_pvq_refine(const opus_val32 *Xn, int *iy, int *iy0, int K, int up, int margin, int N) { | 427 | 719k | int i; | 428 | 719k | int dir; | 429 | 719k | VARDECL(opus_val32, rounding); | 430 | 719k | int iysum = 0; | 431 | 719k | SAVE_STACK; | 432 | 719k | ALLOC(rounding, N, opus_val32); | 433 | 8.12M | for (i=0;i<N;i++) { | 434 | 7.41M | opus_val32 tmp; | 435 | 7.41M | tmp = MULT32_32_Q31(SHL32(K, 8), Xn[i]); | 436 | 7.41M | #ifdef FIXED_POINT | 437 | 7.41M | iy[i] = (tmp+64) >> 7; | 438 | | #else | 439 | | iy[i] = (int)floor(.5+tmp); | 440 | | #endif | 441 | 7.41M | rounding[i] = tmp - SHL32(iy[i], 7); | 442 | 7.41M | } | 443 | 719k | if (iy != iy0) { | 444 | 4.06M | for (i=0;i<N;i++) iy[i] = IMIN(up*iy0[i]+up-1, IMAX(up*iy0[i]-up+1, iy[i])); | 445 | 359k | } | 446 | 8.12M | for (i=0;i<N;i++) iysum += iy[i]; | 447 | 719k | if (abs(iysum - K) > 32) { | 448 | 0 | RESTORE_STACK; | 449 | 0 | return 1; | 450 | 0 | } | 451 | 719k | dir = iysum < K ? 1 : -1; | 452 | 1.39M | while (iysum != K) { | 453 | 672k | opus_val32 roundval=-1000000*dir; | 454 | 672k | int roundpos=0; | 455 | 10.3M | for (i=0;i<N;i++) { | 456 | 9.71M | if ((rounding[i]-roundval)*dir > 0 && abs(iy[i]-up*iy0[i]) < (margin-1) && !(dir==-1 && iy[i] == 0)) { | 457 | 1.75M | roundval = rounding[i]; | 458 | 1.75M | roundpos = i; | 459 | 1.75M | } | 460 | 9.71M | } | 461 | 672k | iy[roundpos] += dir; | 462 | 672k | rounding[roundpos] -= SHL32(dir, 15); | 463 | 672k | iysum+=dir; | 464 | 672k | } | 465 | 719k | RESTORE_STACK; | 466 | 719k | return 0; | 467 | 719k | } |
Line | Count | Source | 426 | 719k | static int op_pvq_refine(const opus_val32 *Xn, int *iy, int *iy0, int K, int up, int margin, int N) { | 427 | 719k | int i; | 428 | 719k | int dir; | 429 | 719k | VARDECL(opus_val32, rounding); | 430 | 719k | int iysum = 0; | 431 | 719k | SAVE_STACK; | 432 | 719k | ALLOC(rounding, N, opus_val32); | 433 | 8.12M | for (i=0;i<N;i++) { | 434 | 7.41M | opus_val32 tmp; | 435 | 7.41M | tmp = MULT32_32_Q31(SHL32(K, 8), Xn[i]); | 436 | | #ifdef FIXED_POINT | 437 | | iy[i] = (tmp+64) >> 7; | 438 | | #else | 439 | 7.41M | iy[i] = (int)floor(.5+tmp); | 440 | 7.41M | #endif | 441 | 7.41M | rounding[i] = tmp - SHL32(iy[i], 7); | 442 | 7.41M | } | 443 | 719k | if (iy != iy0) { | 444 | 4.06M | for (i=0;i<N;i++) iy[i] = IMIN(up*iy0[i]+up-1, IMAX(up*iy0[i]-up+1, iy[i])); | 445 | 359k | } | 446 | 8.12M | for (i=0;i<N;i++) iysum += iy[i]; | 447 | 719k | if (abs(iysum - K) > 32) { | 448 | 0 | RESTORE_STACK; | 449 | 0 | return 1; | 450 | 0 | } | 451 | 719k | dir = iysum < K ? 1 : -1; | 452 | 1.39M | while (iysum != K) { | 453 | 672k | opus_val32 roundval=-1000000*dir; | 454 | 672k | int roundpos=0; | 455 | 10.3M | for (i=0;i<N;i++) { | 456 | 9.71M | if ((rounding[i]-roundval)*dir > 0 && abs(iy[i]-up*iy0[i]) < (margin-1) && !(dir==-1 && iy[i] == 0)) { | 457 | 1.75M | roundval = rounding[i]; | 458 | 1.75M | roundpos = i; | 459 | 1.75M | } | 460 | 9.71M | } | 461 | 672k | iy[roundpos] += dir; | 462 | 672k | rounding[roundpos] -= SHL32(dir, 15); | 463 | 672k | iysum+=dir; | 464 | 672k | } | 465 | 719k | RESTORE_STACK; | 466 | 719k | return 0; | 467 | 719k | } |
|
468 | | |
469 | 460k | static opus_val32 op_pvq_search_extra(const celt_norm *X, int *iy, int *up_iy, int K, int up, int *refine, int N, int shift) { |
470 | 460k | opus_val32 rcp_sum; |
471 | 460k | opus_val32 sum=0; |
472 | 460k | int i; |
473 | 460k | int failed=0; |
474 | 460k | opus_val64 yy=0; |
475 | 460k | VARDECL(opus_val32, Xn); |
476 | 460k | SAVE_STACK; |
477 | 4.69M | for (i=0;i<N;i++) sum += ABS32(X[i]); |
478 | 460k | ALLOC(Xn, N, opus_val32); |
479 | 460k | if (sum < EPSILON) |
480 | 100k | failed = 1; |
481 | 359k | else { |
482 | 359k | #ifdef FIXED_POINT |
483 | 359k | int sum_shift = 30-celt_ilog2(sum); |
484 | 359k | rcp_sum = celt_rcp_norm32(SHL32(sum, sum_shift)); |
485 | 4.06M | for (i=0;i<N;i++) { |
486 | 3.70M | Xn[i] = MULT32_32_Q31(SHL32(ABS32(X[i]), sum_shift), rcp_sum); |
487 | 3.70M | } |
488 | | #else |
489 | | rcp_sum = celt_rcp(sum); |
490 | | for (i=0;i<N;i++) { |
491 | | Xn[i] = ABS32(X[i])*rcp_sum; |
492 | | } |
493 | | #endif |
494 | 359k | } |
495 | 460k | failed = failed || op_pvq_refine(Xn, iy, iy, K, 1, K+1, N); |
496 | 460k | failed = failed || op_pvq_refine(Xn, up_iy, iy, up*K, up, up, N); |
497 | 460k | if (failed) { |
498 | 100k | iy[0] = K; |
499 | 533k | for (i=1;i<N;i++) iy[i] = 0; |
500 | 100k | up_iy[0] = up*K; |
501 | 533k | for (i=1;i<N;i++) up_iy[i] = 0; |
502 | 100k | } |
503 | 4.69M | for (i=0;i<N;i++) { |
504 | 4.23M | yy += up_iy[i]*(opus_val64)up_iy[i]; |
505 | 4.23M | if (X[i] < 0) { |
506 | 1.75M | iy[i] = -iy[i]; |
507 | 1.75M | up_iy[i] = -up_iy[i]; |
508 | 1.75M | } |
509 | 4.23M | refine[i] = up_iy[i]-up*iy[i]; |
510 | 4.23M | } |
511 | 460k | RESTORE_STACK; |
512 | 460k | #ifdef FIXED_POINT |
513 | 460k | return (yy + (1<<2*shift>>1))>>2*shift; |
514 | | #else |
515 | | (void)shift; |
516 | | return yy; |
517 | | #endif |
518 | 460k | } |
519 | | #endif |
520 | | |
521 | | #ifdef ENABLE_QEXT |
522 | | /* Take advantage of the fact that "large" refine values are much less likely |
523 | | than smaller ones. */ |
524 | 3.77M | static void ec_enc_refine(ec_enc *enc, opus_int32 refine, opus_int32 up, int extra_bits, int use_entropy) { |
525 | 3.77M | int large; |
526 | 3.77M | large = abs(refine)>up/2; |
527 | 3.77M | ec_enc_bit_logp(enc, large, use_entropy ? 3 : 1); |
528 | 3.77M | if (large) { |
529 | 344k | ec_enc_bits(enc, refine < 0, 1); |
530 | 344k | ec_enc_bits(enc, abs(refine)-up/2-1, extra_bits-1); |
531 | 3.43M | } else { |
532 | 3.43M | ec_enc_bits(enc, refine+up/2, extra_bits); |
533 | 3.43M | } |
534 | 3.77M | } |
535 | | |
536 | 136k | static int ec_dec_refine(ec_enc *dec, opus_int32 up, int extra_bits, int use_entropy) { |
537 | 136k | int large, refine; |
538 | 136k | large = ec_dec_bit_logp(dec, use_entropy ? 3 : 1); |
539 | 136k | if (large) { |
540 | 21.7k | int sign = ec_dec_bits(dec, 1); |
541 | 21.7k | refine = ec_dec_bits(dec, extra_bits-1) + up/2+1; |
542 | 21.7k | if (sign) refine = -refine; |
543 | 114k | } else { |
544 | 114k | refine = (opus_int32)ec_dec_bits(dec, extra_bits)-up/2; |
545 | 114k | } |
546 | 136k | return refine; |
547 | 136k | } |
548 | | #endif |
549 | | |
550 | | unsigned alg_quant(celt_norm *X, int N, int K, int spread, int B, ec_enc *enc, |
551 | | opus_val32 gain, int resynth |
552 | | ARG_QEXT(ec_enc *ext_enc) ARG_QEXT(int extra_bits), int arch) |
553 | 945M | { |
554 | 945M | VARDECL(int, iy); |
555 | 945M | opus_val32 yy; |
556 | 945M | unsigned collapse_mask; |
557 | | #ifdef ENABLE_QEXT |
558 | | int yy_shift = 0; |
559 | | #endif |
560 | 945M | SAVE_STACK; |
561 | | |
562 | 945M | celt_assert2(K>0, "alg_quant() needs at least one pulse"); |
563 | 945M | celt_assert2(N>1, "alg_quant() needs at least two dimensions"); |
564 | | |
565 | | /* Covers vectorization by up to 4. */ |
566 | 945M | ALLOC(iy, N+3, int); |
567 | | |
568 | 945M | exp_rotation(X, N, 1, B, K, spread); |
569 | | |
570 | | #ifdef ENABLE_QEXT |
571 | 375M | if (N==2 && extra_bits >= 2) { |
572 | 606k | int refine; |
573 | 606k | int up_iy[2]; |
574 | 606k | int up; |
575 | 606k | yy_shift = IMAX(0, extra_bits-7); |
576 | 606k | up = (1<<extra_bits)-1; |
577 | 606k | yy = op_pvq_search_N2(X, iy, up_iy, K, up, &refine, yy_shift); |
578 | 606k | collapse_mask = extract_collapse_mask(up_iy, N, B); |
579 | 606k | encode_pulses(iy, N, K, enc); |
580 | 606k | ec_enc_uint(ext_enc, refine+(up-1)/2, up); |
581 | 606k | if (resynth) |
582 | 0 | normalise_residual(up_iy, X, N, yy, gain, yy_shift); |
583 | 374M | } else if (extra_bits >= 2) { |
584 | 920k | int i; |
585 | 920k | VARDECL(int, up_iy); |
586 | 920k | VARDECL(int, refine); |
587 | 920k | int up, use_entropy; |
588 | 920k | ALLOC(up_iy, N, int); |
589 | 920k | ALLOC(refine, N, int); |
590 | 920k | yy_shift = IMAX(0, extra_bits-7); |
591 | 920k | up = (1<<extra_bits)-1; |
592 | 920k | yy = op_pvq_search_extra(X, iy, up_iy, K, up, refine, N, yy_shift); |
593 | 920k | collapse_mask = extract_collapse_mask(up_iy, N, B); |
594 | 920k | encode_pulses(iy, N, K, enc); |
595 | 920k | use_entropy = (ext_enc->storage*8 - ec_tell(ext_enc)) > (unsigned)(N-1)*(extra_bits+3)+1; |
596 | 8.47M | for (i=0;i<N-1;i++) ec_enc_refine(ext_enc, refine[i], up, extra_bits, use_entropy); |
597 | 920k | if (iy[N-1]==0) ec_enc_bits(ext_enc, up_iy[N-1]<0, 1); |
598 | 920k | if (resynth) |
599 | 0 | normalise_residual(up_iy, X, N, yy, gain, yy_shift); |
600 | 920k | } else |
601 | 373M | #endif |
602 | 373M | { |
603 | 943M | yy = op_pvq_search(X, iy, K, N, arch); |
604 | 373M | collapse_mask = extract_collapse_mask(iy, N, B); |
605 | 373M | encode_pulses(iy, N, K, enc); |
606 | 943M | if (resynth) |
607 | 102M | normalise_residual(iy, X, N, yy, gain, 0); |
608 | 373M | } |
609 | | |
610 | 945M | if (resynth) |
611 | 102M | exp_rotation(X, N, -1, B, K, spread); |
612 | | |
613 | 945M | RESTORE_STACK; |
614 | 945M | return collapse_mask; |
615 | 945M | } Line | Count | Source | 553 | 284M | { | 554 | 284M | VARDECL(int, iy); | 555 | 284M | opus_val32 yy; | 556 | 284M | unsigned collapse_mask; | 557 | | #ifdef ENABLE_QEXT | 558 | | int yy_shift = 0; | 559 | | #endif | 560 | 284M | SAVE_STACK; | 561 | | | 562 | 284M | celt_assert2(K>0, "alg_quant() needs at least one pulse"); | 563 | 284M | celt_assert2(N>1, "alg_quant() needs at least two dimensions"); | 564 | | | 565 | | /* Covers vectorization by up to 4. */ | 566 | 284M | ALLOC(iy, N+3, int); | 567 | | | 568 | 284M | exp_rotation(X, N, 1, B, K, spread); | 569 | | | 570 | | #ifdef ENABLE_QEXT | 571 | | if (N==2 && extra_bits >= 2) { | 572 | | int refine; | 573 | | int up_iy[2]; | 574 | | int up; | 575 | | yy_shift = IMAX(0, extra_bits-7); | 576 | | up = (1<<extra_bits)-1; | 577 | | yy = op_pvq_search_N2(X, iy, up_iy, K, up, &refine, yy_shift); | 578 | | collapse_mask = extract_collapse_mask(up_iy, N, B); | 579 | | encode_pulses(iy, N, K, enc); | 580 | | ec_enc_uint(ext_enc, refine+(up-1)/2, up); | 581 | | if (resynth) | 582 | | normalise_residual(up_iy, X, N, yy, gain, yy_shift); | 583 | | } else if (extra_bits >= 2) { | 584 | | int i; | 585 | | VARDECL(int, up_iy); | 586 | | VARDECL(int, refine); | 587 | | int up, use_entropy; | 588 | | ALLOC(up_iy, N, int); | 589 | | ALLOC(refine, N, int); | 590 | | yy_shift = IMAX(0, extra_bits-7); | 591 | | up = (1<<extra_bits)-1; | 592 | | yy = op_pvq_search_extra(X, iy, up_iy, K, up, refine, N, yy_shift); | 593 | | collapse_mask = extract_collapse_mask(up_iy, N, B); | 594 | | encode_pulses(iy, N, K, enc); | 595 | | use_entropy = (ext_enc->storage*8 - ec_tell(ext_enc)) > (unsigned)(N-1)*(extra_bits+3)+1; | 596 | | for (i=0;i<N-1;i++) ec_enc_refine(ext_enc, refine[i], up, extra_bits, use_entropy); | 597 | | if (iy[N-1]==0) ec_enc_bits(ext_enc, up_iy[N-1]<0, 1); | 598 | | if (resynth) | 599 | | normalise_residual(up_iy, X, N, yy, gain, yy_shift); | 600 | | } else | 601 | | #endif | 602 | 284M | { | 603 | 284M | yy = op_pvq_search(X, iy, K, N, arch); | 604 | 284M | collapse_mask = extract_collapse_mask(iy, N, B); | 605 | 284M | encode_pulses(iy, N, K, enc); | 606 | 284M | if (resynth) | 607 | 43.9M | normalise_residual(iy, X, N, yy, gain, 0); | 608 | 284M | } | 609 | | | 610 | 284M | if (resynth) | 611 | 43.9M | exp_rotation(X, N, -1, B, K, spread); | 612 | | | 613 | 284M | RESTORE_STACK; | 614 | 284M | return collapse_mask; | 615 | 284M | } |
Line | Count | Source | 553 | 187M | { | 554 | 187M | VARDECL(int, iy); | 555 | 187M | opus_val32 yy; | 556 | 187M | unsigned collapse_mask; | 557 | 187M | #ifdef ENABLE_QEXT | 558 | 187M | int yy_shift = 0; | 559 | 187M | #endif | 560 | 187M | SAVE_STACK; | 561 | | | 562 | 187M | celt_assert2(K>0, "alg_quant() needs at least one pulse"); | 563 | 187M | celt_assert2(N>1, "alg_quant() needs at least two dimensions"); | 564 | | | 565 | | /* Covers vectorization by up to 4. */ | 566 | 187M | ALLOC(iy, N+3, int); | 567 | | | 568 | 187M | exp_rotation(X, N, 1, B, K, spread); | 569 | | | 570 | 187M | #ifdef ENABLE_QEXT | 571 | 187M | if (N==2 && extra_bits >= 2) { | 572 | 303k | int refine; | 573 | 303k | int up_iy[2]; | 574 | 303k | int up; | 575 | 303k | yy_shift = IMAX(0, extra_bits-7); | 576 | 303k | up = (1<<extra_bits)-1; | 577 | 303k | yy = op_pvq_search_N2(X, iy, up_iy, K, up, &refine, yy_shift); | 578 | 303k | collapse_mask = extract_collapse_mask(up_iy, N, B); | 579 | 303k | encode_pulses(iy, N, K, enc); | 580 | 303k | ec_enc_uint(ext_enc, refine+(up-1)/2, up); | 581 | 303k | if (resynth) | 582 | 0 | normalise_residual(up_iy, X, N, yy, gain, yy_shift); | 583 | 187M | } else if (extra_bits >= 2) { | 584 | 460k | int i; | 585 | 460k | VARDECL(int, up_iy); | 586 | 460k | VARDECL(int, refine); | 587 | 460k | int up, use_entropy; | 588 | 460k | ALLOC(up_iy, N, int); | 589 | 460k | ALLOC(refine, N, int); | 590 | 460k | yy_shift = IMAX(0, extra_bits-7); | 591 | 460k | up = (1<<extra_bits)-1; | 592 | 460k | yy = op_pvq_search_extra(X, iy, up_iy, K, up, refine, N, yy_shift); | 593 | 460k | collapse_mask = extract_collapse_mask(up_iy, N, B); | 594 | 460k | encode_pulses(iy, N, K, enc); | 595 | 460k | use_entropy = (ext_enc->storage*8 - ec_tell(ext_enc)) > (unsigned)(N-1)*(extra_bits+3)+1; | 596 | 4.23M | for (i=0;i<N-1;i++) ec_enc_refine(ext_enc, refine[i], up, extra_bits, use_entropy); | 597 | 460k | if (iy[N-1]==0) ec_enc_bits(ext_enc, up_iy[N-1]<0, 1); | 598 | 460k | if (resynth) | 599 | 0 | normalise_residual(up_iy, X, N, yy, gain, yy_shift); | 600 | 460k | } else | 601 | 186M | #endif | 602 | 186M | { | 603 | 186M | yy = op_pvq_search(X, iy, K, N, arch); | 604 | 186M | collapse_mask = extract_collapse_mask(iy, N, B); | 605 | 186M | encode_pulses(iy, N, K, enc); | 606 | 186M | if (resynth) | 607 | 7.40M | normalise_residual(iy, X, N, yy, gain, 0); | 608 | 186M | } | 609 | | | 610 | 187M | if (resynth) | 611 | 7.40M | exp_rotation(X, N, -1, B, K, spread); | 612 | | | 613 | 187M | RESTORE_STACK; | 614 | 187M | return collapse_mask; | 615 | 187M | } |
Line | Count | Source | 553 | 187M | { | 554 | 187M | VARDECL(int, iy); | 555 | 187M | opus_val32 yy; | 556 | 187M | unsigned collapse_mask; | 557 | 187M | #ifdef ENABLE_QEXT | 558 | 187M | int yy_shift = 0; | 559 | 187M | #endif | 560 | 187M | SAVE_STACK; | 561 | | | 562 | 187M | celt_assert2(K>0, "alg_quant() needs at least one pulse"); | 563 | 187M | celt_assert2(N>1, "alg_quant() needs at least two dimensions"); | 564 | | | 565 | | /* Covers vectorization by up to 4. */ | 566 | 187M | ALLOC(iy, N+3, int); | 567 | | | 568 | 187M | exp_rotation(X, N, 1, B, K, spread); | 569 | | | 570 | 187M | #ifdef ENABLE_QEXT | 571 | 187M | if (N==2 && extra_bits >= 2) { | 572 | 303k | int refine; | 573 | 303k | int up_iy[2]; | 574 | 303k | int up; | 575 | 303k | yy_shift = IMAX(0, extra_bits-7); | 576 | 303k | up = (1<<extra_bits)-1; | 577 | 303k | yy = op_pvq_search_N2(X, iy, up_iy, K, up, &refine, yy_shift); | 578 | 303k | collapse_mask = extract_collapse_mask(up_iy, N, B); | 579 | 303k | encode_pulses(iy, N, K, enc); | 580 | 303k | ec_enc_uint(ext_enc, refine+(up-1)/2, up); | 581 | 303k | if (resynth) | 582 | 0 | normalise_residual(up_iy, X, N, yy, gain, yy_shift); | 583 | 187M | } else if (extra_bits >= 2) { | 584 | 460k | int i; | 585 | 460k | VARDECL(int, up_iy); | 586 | 460k | VARDECL(int, refine); | 587 | 460k | int up, use_entropy; | 588 | 460k | ALLOC(up_iy, N, int); | 589 | 460k | ALLOC(refine, N, int); | 590 | 460k | yy_shift = IMAX(0, extra_bits-7); | 591 | 460k | up = (1<<extra_bits)-1; | 592 | 460k | yy = op_pvq_search_extra(X, iy, up_iy, K, up, refine, N, yy_shift); | 593 | 460k | collapse_mask = extract_collapse_mask(up_iy, N, B); | 594 | 460k | encode_pulses(iy, N, K, enc); | 595 | 460k | use_entropy = (ext_enc->storage*8 - ec_tell(ext_enc)) > (unsigned)(N-1)*(extra_bits+3)+1; | 596 | 4.23M | for (i=0;i<N-1;i++) ec_enc_refine(ext_enc, refine[i], up, extra_bits, use_entropy); | 597 | 460k | if (iy[N-1]==0) ec_enc_bits(ext_enc, up_iy[N-1]<0, 1); | 598 | 460k | if (resynth) | 599 | 0 | normalise_residual(up_iy, X, N, yy, gain, yy_shift); | 600 | 460k | } else | 601 | 186M | #endif | 602 | 186M | { | 603 | 186M | yy = op_pvq_search(X, iy, K, N, arch); | 604 | 186M | collapse_mask = extract_collapse_mask(iy, N, B); | 605 | 186M | encode_pulses(iy, N, K, enc); | 606 | 186M | if (resynth) | 607 | 7.40M | normalise_residual(iy, X, N, yy, gain, 0); | 608 | 186M | } | 609 | | | 610 | 187M | if (resynth) | 611 | 7.40M | exp_rotation(X, N, -1, B, K, spread); | 612 | | | 613 | 187M | RESTORE_STACK; | 614 | 187M | return collapse_mask; | 615 | 187M | } |
Line | Count | Source | 553 | 284M | { | 554 | 284M | VARDECL(int, iy); | 555 | 284M | opus_val32 yy; | 556 | 284M | unsigned collapse_mask; | 557 | | #ifdef ENABLE_QEXT | 558 | | int yy_shift = 0; | 559 | | #endif | 560 | 284M | SAVE_STACK; | 561 | | | 562 | 284M | celt_assert2(K>0, "alg_quant() needs at least one pulse"); | 563 | 284M | celt_assert2(N>1, "alg_quant() needs at least two dimensions"); | 564 | | | 565 | | /* Covers vectorization by up to 4. */ | 566 | 284M | ALLOC(iy, N+3, int); | 567 | | | 568 | 284M | exp_rotation(X, N, 1, B, K, spread); | 569 | | | 570 | | #ifdef ENABLE_QEXT | 571 | | if (N==2 && extra_bits >= 2) { | 572 | | int refine; | 573 | | int up_iy[2]; | 574 | | int up; | 575 | | yy_shift = IMAX(0, extra_bits-7); | 576 | | up = (1<<extra_bits)-1; | 577 | | yy = op_pvq_search_N2(X, iy, up_iy, K, up, &refine, yy_shift); | 578 | | collapse_mask = extract_collapse_mask(up_iy, N, B); | 579 | | encode_pulses(iy, N, K, enc); | 580 | | ec_enc_uint(ext_enc, refine+(up-1)/2, up); | 581 | | if (resynth) | 582 | | normalise_residual(up_iy, X, N, yy, gain, yy_shift); | 583 | | } else if (extra_bits >= 2) { | 584 | | int i; | 585 | | VARDECL(int, up_iy); | 586 | | VARDECL(int, refine); | 587 | | int up, use_entropy; | 588 | | ALLOC(up_iy, N, int); | 589 | | ALLOC(refine, N, int); | 590 | | yy_shift = IMAX(0, extra_bits-7); | 591 | | up = (1<<extra_bits)-1; | 592 | | yy = op_pvq_search_extra(X, iy, up_iy, K, up, refine, N, yy_shift); | 593 | | collapse_mask = extract_collapse_mask(up_iy, N, B); | 594 | | encode_pulses(iy, N, K, enc); | 595 | | use_entropy = (ext_enc->storage*8 - ec_tell(ext_enc)) > (unsigned)(N-1)*(extra_bits+3)+1; | 596 | | for (i=0;i<N-1;i++) ec_enc_refine(ext_enc, refine[i], up, extra_bits, use_entropy); | 597 | | if (iy[N-1]==0) ec_enc_bits(ext_enc, up_iy[N-1]<0, 1); | 598 | | if (resynth) | 599 | | normalise_residual(up_iy, X, N, yy, gain, yy_shift); | 600 | | } else | 601 | | #endif | 602 | 284M | { | 603 | 284M | yy = op_pvq_search(X, iy, K, N, arch); | 604 | 284M | collapse_mask = extract_collapse_mask(iy, N, B); | 605 | 284M | encode_pulses(iy, N, K, enc); | 606 | 284M | if (resynth) | 607 | 43.9M | normalise_residual(iy, X, N, yy, gain, 0); | 608 | 284M | } | 609 | | | 610 | 284M | if (resynth) | 611 | 43.9M | exp_rotation(X, N, -1, B, K, spread); | 612 | | | 613 | 284M | RESTORE_STACK; | 614 | 284M | return collapse_mask; | 615 | 284M | } |
|
616 | | |
617 | | /** Decode pulse vector and combine the result with the pitch vector to produce |
618 | | the final normalised signal in the current band. */ |
619 | | unsigned alg_unquant(celt_norm *X, int N, int K, int spread, int B, |
620 | | ec_dec *dec, opus_val32 gain |
621 | | ARG_QEXT(ec_enc *ext_dec) ARG_QEXT(int extra_bits)) |
622 | 1.72M | { |
623 | 1.72M | opus_val32 Ryy; |
624 | 1.72M | unsigned collapse_mask; |
625 | 1.72M | VARDECL(int, iy); |
626 | 1.72M | int yy_shift=0; |
627 | 1.72M | SAVE_STACK; |
628 | | |
629 | 1.72M | celt_assert2(K>0, "alg_unquant() needs at least one pulse"); |
630 | 1.72M | celt_assert2(N>1, "alg_unquant() needs at least two dimensions"); |
631 | 1.72M | ALLOC(iy, N, int); |
632 | 1.72M | Ryy = decode_pulses(iy, N, K, dec); |
633 | | #ifdef ENABLE_QEXT |
634 | 1.00M | if (N==2 && extra_bits >= 2) { |
635 | 22.8k | int up; |
636 | 22.8k | int refine; |
637 | 22.8k | yy_shift = IMAX(0, extra_bits-7); |
638 | 22.8k | up = (1<<extra_bits)-1; |
639 | 22.8k | refine = (opus_int32)ec_dec_uint(ext_dec, up) - (up-1)/2; |
640 | 22.8k | iy[0] *= up; |
641 | 22.8k | iy[1] *= up; |
642 | 22.8k | if (iy[1] == 0) { |
643 | 2.14k | iy[1] = (iy[0] > 0) ? -refine : refine; |
644 | 2.14k | iy[0] += (refine*(opus_int64)iy[0] > 0) ? -refine : refine; |
645 | 20.6k | } else if (iy[1] > 0) { |
646 | 10.5k | iy[0] += refine; |
647 | 10.5k | iy[1] -= refine*(iy[0]>0?1:-1); |
648 | 10.5k | } else { |
649 | 10.1k | iy[0] -= refine; |
650 | 10.1k | iy[1] -= refine*(iy[0]>0?1:-1); |
651 | 10.1k | } |
652 | 22.8k | #ifdef FIXED_POINT |
653 | 22.8k | Ryy = (iy[0]*(opus_val64)iy[0] + iy[1]*(opus_val64)iy[1] + (1<<2*yy_shift>>1)) >> 2*yy_shift; |
654 | | #else |
655 | | Ryy = iy[0]*(opus_val64)iy[0] + iy[1]*(opus_val64)iy[1]; |
656 | | #endif |
657 | 981k | } else if (extra_bits >= 2) { |
658 | 22.2k | int i; |
659 | 22.2k | opus_val64 yy64; |
660 | 22.2k | VARDECL(int, refine); |
661 | 22.2k | int up, use_entropy; |
662 | 22.2k | int sign=0; |
663 | 22.2k | ALLOC(refine, N, int); |
664 | 22.2k | yy_shift = IMAX(0, extra_bits-7); |
665 | 22.2k | up = (1<<extra_bits)-1; |
666 | 22.2k | use_entropy = (ext_dec->storage*8 - ec_tell(ext_dec)) > (unsigned)(N-1)*(extra_bits+3)+1; |
667 | 158k | for (i=0;i<N-1;i++) refine[i] = ec_dec_refine(ext_dec, up, extra_bits, use_entropy); |
668 | 22.2k | if (iy[N-1]==0) sign = ec_dec_bits(ext_dec, 1); |
669 | 17.3k | else sign = iy[N-1] < 0; |
670 | 158k | for (i=0;i<N-1;i++) { |
671 | 136k | iy[i] = iy[i]*up + refine[i]; |
672 | 136k | } |
673 | 22.2k | iy[N-1] = up*K; |
674 | 158k | for (i=0;i<N-1;i++) iy[N-1] -= abs(iy[i]); |
675 | 22.2k | if (sign) iy[N-1] = -iy[N-1]; |
676 | 22.2k | yy64 = 0; |
677 | 180k | for (i=0;i<N;i++) yy64 += iy[i]*(opus_val64)iy[i]; |
678 | 22.2k | #ifdef FIXED_POINT |
679 | 22.2k | Ryy = (yy64 + (1<<2*yy_shift>>1)) >> 2*yy_shift; |
680 | | #else |
681 | | Ryy = yy64; |
682 | | #endif |
683 | 22.2k | } |
684 | | #endif |
685 | 1.72M | normalise_residual(iy, X, N, Ryy, gain, yy_shift); |
686 | 1.72M | exp_rotation(X, N, -1, B, K, spread); |
687 | 1.72M | collapse_mask = extract_collapse_mask(iy, N, B); |
688 | 1.72M | RESTORE_STACK; |
689 | 1.72M | return collapse_mask; |
690 | 1.72M | } Line | Count | Source | 622 | 722k | { | 623 | 722k | opus_val32 Ryy; | 624 | 722k | unsigned collapse_mask; | 625 | 722k | VARDECL(int, iy); | 626 | 722k | int yy_shift=0; | 627 | 722k | SAVE_STACK; | 628 | | | 629 | 722k | celt_assert2(K>0, "alg_unquant() needs at least one pulse"); | 630 | 722k | celt_assert2(N>1, "alg_unquant() needs at least two dimensions"); | 631 | 722k | ALLOC(iy, N, int); | 632 | 722k | Ryy = decode_pulses(iy, N, K, dec); | 633 | | #ifdef ENABLE_QEXT | 634 | | if (N==2 && extra_bits >= 2) { | 635 | | int up; | 636 | | int refine; | 637 | | yy_shift = IMAX(0, extra_bits-7); | 638 | | up = (1<<extra_bits)-1; | 639 | | refine = (opus_int32)ec_dec_uint(ext_dec, up) - (up-1)/2; | 640 | | iy[0] *= up; | 641 | | iy[1] *= up; | 642 | | if (iy[1] == 0) { | 643 | | iy[1] = (iy[0] > 0) ? -refine : refine; | 644 | | iy[0] += (refine*(opus_int64)iy[0] > 0) ? -refine : refine; | 645 | | } else if (iy[1] > 0) { | 646 | | iy[0] += refine; | 647 | | iy[1] -= refine*(iy[0]>0?1:-1); | 648 | | } else { | 649 | | iy[0] -= refine; | 650 | | iy[1] -= refine*(iy[0]>0?1:-1); | 651 | | } | 652 | | #ifdef FIXED_POINT | 653 | | Ryy = (iy[0]*(opus_val64)iy[0] + iy[1]*(opus_val64)iy[1] + (1<<2*yy_shift>>1)) >> 2*yy_shift; | 654 | | #else | 655 | | Ryy = iy[0]*(opus_val64)iy[0] + iy[1]*(opus_val64)iy[1]; | 656 | | #endif | 657 | | } else if (extra_bits >= 2) { | 658 | | int i; | 659 | | opus_val64 yy64; | 660 | | VARDECL(int, refine); | 661 | | int up, use_entropy; | 662 | | int sign=0; | 663 | | ALLOC(refine, N, int); | 664 | | yy_shift = IMAX(0, extra_bits-7); | 665 | | up = (1<<extra_bits)-1; | 666 | | use_entropy = (ext_dec->storage*8 - ec_tell(ext_dec)) > (unsigned)(N-1)*(extra_bits+3)+1; | 667 | | for (i=0;i<N-1;i++) refine[i] = ec_dec_refine(ext_dec, up, extra_bits, use_entropy); | 668 | | if (iy[N-1]==0) sign = ec_dec_bits(ext_dec, 1); | 669 | | else sign = iy[N-1] < 0; | 670 | | for (i=0;i<N-1;i++) { | 671 | | iy[i] = iy[i]*up + refine[i]; | 672 | | } | 673 | | iy[N-1] = up*K; | 674 | | for (i=0;i<N-1;i++) iy[N-1] -= abs(iy[i]); | 675 | | if (sign) iy[N-1] = -iy[N-1]; | 676 | | yy64 = 0; | 677 | | for (i=0;i<N;i++) yy64 += iy[i]*(opus_val64)iy[i]; | 678 | | #ifdef FIXED_POINT | 679 | | Ryy = (yy64 + (1<<2*yy_shift>>1)) >> 2*yy_shift; | 680 | | #else | 681 | | Ryy = yy64; | 682 | | #endif | 683 | | } | 684 | | #endif | 685 | 722k | normalise_residual(iy, X, N, Ryy, gain, yy_shift); | 686 | 722k | exp_rotation(X, N, -1, B, K, spread); | 687 | 722k | collapse_mask = extract_collapse_mask(iy, N, B); | 688 | 722k | RESTORE_STACK; | 689 | 722k | return collapse_mask; | 690 | 722k | } |
Line | Count | Source | 622 | 1.00M | { | 623 | 1.00M | opus_val32 Ryy; | 624 | 1.00M | unsigned collapse_mask; | 625 | 1.00M | VARDECL(int, iy); | 626 | 1.00M | int yy_shift=0; | 627 | 1.00M | SAVE_STACK; | 628 | | | 629 | 1.00M | celt_assert2(K>0, "alg_unquant() needs at least one pulse"); | 630 | 1.00M | celt_assert2(N>1, "alg_unquant() needs at least two dimensions"); | 631 | 1.00M | ALLOC(iy, N, int); | 632 | 1.00M | Ryy = decode_pulses(iy, N, K, dec); | 633 | 1.00M | #ifdef ENABLE_QEXT | 634 | 1.00M | if (N==2 && extra_bits >= 2) { | 635 | 22.8k | int up; | 636 | 22.8k | int refine; | 637 | 22.8k | yy_shift = IMAX(0, extra_bits-7); | 638 | 22.8k | up = (1<<extra_bits)-1; | 639 | 22.8k | refine = (opus_int32)ec_dec_uint(ext_dec, up) - (up-1)/2; | 640 | 22.8k | iy[0] *= up; | 641 | 22.8k | iy[1] *= up; | 642 | 22.8k | if (iy[1] == 0) { | 643 | 2.14k | iy[1] = (iy[0] > 0) ? -refine : refine; | 644 | 2.14k | iy[0] += (refine*(opus_int64)iy[0] > 0) ? -refine : refine; | 645 | 20.6k | } else if (iy[1] > 0) { | 646 | 10.5k | iy[0] += refine; | 647 | 10.5k | iy[1] -= refine*(iy[0]>0?1:-1); | 648 | 10.5k | } else { | 649 | 10.1k | iy[0] -= refine; | 650 | 10.1k | iy[1] -= refine*(iy[0]>0?1:-1); | 651 | 10.1k | } | 652 | 22.8k | #ifdef FIXED_POINT | 653 | 22.8k | Ryy = (iy[0]*(opus_val64)iy[0] + iy[1]*(opus_val64)iy[1] + (1<<2*yy_shift>>1)) >> 2*yy_shift; | 654 | | #else | 655 | | Ryy = iy[0]*(opus_val64)iy[0] + iy[1]*(opus_val64)iy[1]; | 656 | | #endif | 657 | 981k | } else if (extra_bits >= 2) { | 658 | 22.2k | int i; | 659 | 22.2k | opus_val64 yy64; | 660 | 22.2k | VARDECL(int, refine); | 661 | 22.2k | int up, use_entropy; | 662 | 22.2k | int sign=0; | 663 | 22.2k | ALLOC(refine, N, int); | 664 | 22.2k | yy_shift = IMAX(0, extra_bits-7); | 665 | 22.2k | up = (1<<extra_bits)-1; | 666 | 22.2k | use_entropy = (ext_dec->storage*8 - ec_tell(ext_dec)) > (unsigned)(N-1)*(extra_bits+3)+1; | 667 | 158k | for (i=0;i<N-1;i++) refine[i] = ec_dec_refine(ext_dec, up, extra_bits, use_entropy); | 668 | 22.2k | if (iy[N-1]==0) sign = ec_dec_bits(ext_dec, 1); | 669 | 17.3k | else sign = iy[N-1] < 0; | 670 | 158k | for (i=0;i<N-1;i++) { | 671 | 136k | iy[i] = iy[i]*up + refine[i]; | 672 | 136k | } | 673 | 22.2k | iy[N-1] = up*K; | 674 | 158k | for (i=0;i<N-1;i++) iy[N-1] -= abs(iy[i]); | 675 | 22.2k | if (sign) iy[N-1] = -iy[N-1]; | 676 | 22.2k | yy64 = 0; | 677 | 180k | for (i=0;i<N;i++) yy64 += iy[i]*(opus_val64)iy[i]; | 678 | 22.2k | #ifdef FIXED_POINT | 679 | 22.2k | Ryy = (yy64 + (1<<2*yy_shift>>1)) >> 2*yy_shift; | 680 | | #else | 681 | | Ryy = yy64; | 682 | | #endif | 683 | 22.2k | } | 684 | 1.00M | #endif | 685 | 1.00M | normalise_residual(iy, X, N, Ryy, gain, yy_shift); | 686 | 1.00M | exp_rotation(X, N, -1, B, K, spread); | 687 | 1.00M | collapse_mask = extract_collapse_mask(iy, N, B); | 688 | 1.00M | RESTORE_STACK; | 689 | 1.00M | return collapse_mask; | 690 | 1.00M | } |
|
691 | | |
692 | | #ifndef OVERRIDE_renormalise_vector |
693 | | void renormalise_vector(celt_norm *X, int N, opus_val32 gain, int arch) |
694 | 56.8M | { |
695 | 56.8M | int i; |
696 | | #ifdef FIXED_POINT |
697 | | int k; |
698 | | #endif |
699 | 56.8M | opus_val32 E; |
700 | 56.8M | opus_val16 g; |
701 | 56.8M | opus_val32 t; |
702 | 56.8M | celt_norm *xptr; |
703 | 56.8M | norm_scaledown(X, N, NORM_SHIFT-14); |
704 | 56.8M | E = EPSILON + celt_inner_prod_norm(X, X, N, arch); |
705 | | #ifdef FIXED_POINT |
706 | | k = celt_ilog2(E)>>1; |
707 | | #endif |
708 | 56.8M | t = VSHR32(E, 2*(k-7)); |
709 | 56.8M | g = MULT32_32_Q31(celt_rsqrt_norm(t),gain); |
710 | | |
711 | 56.8M | xptr = X; |
712 | 709M | for (i=0;i<N;i++) |
713 | 652M | { |
714 | 652M | *xptr = EXTRACT16(PSHR32(MULT16_16(g, *xptr), k+15-14)); |
715 | 652M | xptr++; |
716 | 652M | } |
717 | 56.8M | norm_scaleup(X, N, NORM_SHIFT-14); |
718 | | /*return celt_sqrt(E);*/ |
719 | 56.8M | } Line | Count | Source | 694 | 55.2M | { | 695 | 55.2M | int i; | 696 | 55.2M | #ifdef FIXED_POINT | 697 | 55.2M | int k; | 698 | 55.2M | #endif | 699 | 55.2M | opus_val32 E; | 700 | 55.2M | opus_val16 g; | 701 | 55.2M | opus_val32 t; | 702 | 55.2M | celt_norm *xptr; | 703 | 55.2M | norm_scaledown(X, N, NORM_SHIFT-14); | 704 | 55.2M | E = EPSILON + celt_inner_prod_norm(X, X, N, arch); | 705 | 55.2M | #ifdef FIXED_POINT | 706 | 55.2M | k = celt_ilog2(E)>>1; | 707 | 55.2M | #endif | 708 | 55.2M | t = VSHR32(E, 2*(k-7)); | 709 | 55.2M | g = MULT32_32_Q31(celt_rsqrt_norm(t),gain); | 710 | | | 711 | 55.2M | xptr = X; | 712 | 682M | for (i=0;i<N;i++) | 713 | 626M | { | 714 | 626M | *xptr = EXTRACT16(PSHR32(MULT16_16(g, *xptr), k+15-14)); | 715 | 626M | xptr++; | 716 | 626M | } | 717 | 55.2M | norm_scaleup(X, N, NORM_SHIFT-14); | 718 | | /*return celt_sqrt(E);*/ | 719 | 55.2M | } |
Line | Count | Source | 694 | 1.57M | { | 695 | 1.57M | int i; | 696 | | #ifdef FIXED_POINT | 697 | | int k; | 698 | | #endif | 699 | 1.57M | opus_val32 E; | 700 | 1.57M | opus_val16 g; | 701 | 1.57M | opus_val32 t; | 702 | 1.57M | celt_norm *xptr; | 703 | 1.57M | norm_scaledown(X, N, NORM_SHIFT-14); | 704 | 1.57M | E = EPSILON + celt_inner_prod_norm(X, X, N, arch); | 705 | | #ifdef FIXED_POINT | 706 | | k = celt_ilog2(E)>>1; | 707 | | #endif | 708 | 1.57M | t = VSHR32(E, 2*(k-7)); | 709 | 1.57M | g = MULT32_32_Q31(celt_rsqrt_norm(t),gain); | 710 | | | 711 | 1.57M | xptr = X; | 712 | 27.5M | for (i=0;i<N;i++) | 713 | 26.0M | { | 714 | 26.0M | *xptr = EXTRACT16(PSHR32(MULT16_16(g, *xptr), k+15-14)); | 715 | 26.0M | xptr++; | 716 | 26.0M | } | 717 | 1.57M | norm_scaleup(X, N, NORM_SHIFT-14); | 718 | | /*return celt_sqrt(E);*/ | 719 | 1.57M | } |
|
720 | | #endif /* OVERRIDE_renormalise_vector */ |
721 | | |
722 | | opus_int32 stereo_itheta(const celt_norm *X, const celt_norm *Y, int stereo, int N, int arch) |
723 | 909M | { |
724 | 909M | int i; |
725 | 909M | int itheta; |
726 | 909M | opus_val32 mid, side; |
727 | 909M | opus_val32 Emid, Eside; |
728 | | |
729 | 909M | Emid = Eside = 0; |
730 | 909M | if (stereo) |
731 | 516M | { |
732 | 5.63G | for (i=0;i<N;i++) |
733 | 5.12G | { |
734 | 5.12G | celt_norm m, s; |
735 | 5.12G | m = PSHR32(ADD32(X[i], Y[i]), NORM_SHIFT-13); |
736 | 5.12G | s = PSHR32(SUB32(X[i], Y[i]), NORM_SHIFT-13); |
737 | 5.12G | Emid = MAC16_16(Emid, m, m); |
738 | 5.12G | Eside = MAC16_16(Eside, s, s); |
739 | 5.12G | } |
740 | 516M | } else { |
741 | 392M | Emid += celt_inner_prod_norm_shift(X, X, N, arch); |
742 | 392M | Eside += celt_inner_prod_norm_shift(Y, Y, N, arch); |
743 | 392M | } |
744 | 909M | mid = celt_sqrt32(Emid); |
745 | 909M | side = celt_sqrt32(Eside); |
746 | | #if defined(FIXED_POINT) |
747 | | itheta = celt_atan2p_norm(side, mid); |
748 | | #else |
749 | | itheta = (int)floor(.5f+65536.f*16384*celt_atan2p_norm(side,mid)); |
750 | | #endif |
751 | | |
752 | 909M | return itheta; |
753 | 909M | } Line | Count | Source | 723 | 454M | { | 724 | 454M | int i; | 725 | 454M | int itheta; | 726 | 454M | opus_val32 mid, side; | 727 | 454M | opus_val32 Emid, Eside; | 728 | | | 729 | 454M | Emid = Eside = 0; | 730 | 454M | if (stereo) | 731 | 258M | { | 732 | 2.81G | for (i=0;i<N;i++) | 733 | 2.56G | { | 734 | 2.56G | celt_norm m, s; | 735 | 2.56G | m = PSHR32(ADD32(X[i], Y[i]), NORM_SHIFT-13); | 736 | 2.56G | s = PSHR32(SUB32(X[i], Y[i]), NORM_SHIFT-13); | 737 | 2.56G | Emid = MAC16_16(Emid, m, m); | 738 | 2.56G | Eside = MAC16_16(Eside, s, s); | 739 | 2.56G | } | 740 | 258M | } else { | 741 | 196M | Emid += celt_inner_prod_norm_shift(X, X, N, arch); | 742 | 196M | Eside += celt_inner_prod_norm_shift(Y, Y, N, arch); | 743 | 196M | } | 744 | 454M | mid = celt_sqrt32(Emid); | 745 | 454M | side = celt_sqrt32(Eside); | 746 | 454M | #if defined(FIXED_POINT) | 747 | 454M | itheta = celt_atan2p_norm(side, mid); | 748 | | #else | 749 | | itheta = (int)floor(.5f+65536.f*16384*celt_atan2p_norm(side,mid)); | 750 | | #endif | 751 | | | 752 | 454M | return itheta; | 753 | 454M | } |
Line | Count | Source | 723 | 454M | { | 724 | 454M | int i; | 725 | 454M | int itheta; | 726 | 454M | opus_val32 mid, side; | 727 | 454M | opus_val32 Emid, Eside; | 728 | | | 729 | 454M | Emid = Eside = 0; | 730 | 454M | if (stereo) | 731 | 258M | { | 732 | 2.81G | for (i=0;i<N;i++) | 733 | 2.56G | { | 734 | 2.56G | celt_norm m, s; | 735 | 2.56G | m = PSHR32(ADD32(X[i], Y[i]), NORM_SHIFT-13); | 736 | 2.56G | s = PSHR32(SUB32(X[i], Y[i]), NORM_SHIFT-13); | 737 | 2.56G | Emid = MAC16_16(Emid, m, m); | 738 | 2.56G | Eside = MAC16_16(Eside, s, s); | 739 | 2.56G | } | 740 | 258M | } else { | 741 | 196M | Emid += celt_inner_prod_norm_shift(X, X, N, arch); | 742 | 196M | Eside += celt_inner_prod_norm_shift(Y, Y, N, arch); | 743 | 196M | } | 744 | 454M | mid = celt_sqrt32(Emid); | 745 | 454M | side = celt_sqrt32(Eside); | 746 | | #if defined(FIXED_POINT) | 747 | | itheta = celt_atan2p_norm(side, mid); | 748 | | #else | 749 | 454M | itheta = (int)floor(.5f+65536.f*16384*celt_atan2p_norm(side,mid)); | 750 | 454M | #endif | 751 | | | 752 | 454M | return itheta; | 753 | 454M | } |
|
754 | | |
755 | | #ifdef ENABLE_QEXT |
756 | | |
757 | 56.8k | static void cubic_synthesis(celt_norm *X, int *iy, int N, int K, int face, int sign, opus_val32 gain) { |
758 | 56.8k | int i; |
759 | 56.8k | opus_val32 sum=0; |
760 | 56.8k | opus_val32 mag; |
761 | | #ifdef FIXED_POINT |
762 | | int sum_shift; |
763 | 23.2k | int shift = IMAX(celt_ilog2(K) + celt_ilog2(N)/2 - 13, 0); |
764 | | #endif |
765 | 664k | for (i=0;i<N;i++) { |
766 | 608k | X[i] = (1+2*iy[i])-K; |
767 | 608k | } |
768 | 56.8k | X[face] = sign ? -K : K; |
769 | 664k | for (i=0;i<N;i++) { |
770 | 608k | sum += PSHR32(MULT16_16(X[i],X[i]), 2*shift); |
771 | 608k | } |
772 | | #ifdef FIXED_POINT |
773 | | sum_shift = (29-celt_ilog2(sum))>>1; |
774 | 23.2k | mag = celt_rsqrt_norm32(SHL32(sum, 2*sum_shift+1)); |
775 | 272k | for (i=0;i<N;i++) { |
776 | 248k | X[i] = VSHR32(MULT16_32_Q15(X[i],MULT32_32_Q31(mag,gain)), shift-sum_shift+29-NORM_SHIFT); |
777 | 248k | } |
778 | | #else |
779 | | mag = 1.f/sqrt(sum); |
780 | 392k | for (i=0;i<N;i++) { |
781 | 359k | X[i] *= mag*gain; |
782 | 359k | } |
783 | | #endif |
784 | 56.8k | } Line | Count | Source | 757 | 23.2k | static void cubic_synthesis(celt_norm *X, int *iy, int N, int K, int face, int sign, opus_val32 gain) { | 758 | 23.2k | int i; | 759 | 23.2k | opus_val32 sum=0; | 760 | 23.2k | opus_val32 mag; | 761 | 23.2k | #ifdef FIXED_POINT | 762 | 23.2k | int sum_shift; | 763 | 23.2k | int shift = IMAX(celt_ilog2(K) + celt_ilog2(N)/2 - 13, 0); | 764 | 23.2k | #endif | 765 | 272k | for (i=0;i<N;i++) { | 766 | 248k | X[i] = (1+2*iy[i])-K; | 767 | 248k | } | 768 | 23.2k | X[face] = sign ? -K : K; | 769 | 272k | for (i=0;i<N;i++) { | 770 | 248k | sum += PSHR32(MULT16_16(X[i],X[i]), 2*shift); | 771 | 248k | } | 772 | 23.2k | #ifdef FIXED_POINT | 773 | 23.2k | sum_shift = (29-celt_ilog2(sum))>>1; | 774 | 23.2k | mag = celt_rsqrt_norm32(SHL32(sum, 2*sum_shift+1)); | 775 | 272k | for (i=0;i<N;i++) { | 776 | 248k | X[i] = VSHR32(MULT16_32_Q15(X[i],MULT32_32_Q31(mag,gain)), shift-sum_shift+29-NORM_SHIFT); | 777 | 248k | } | 778 | | #else | 779 | | mag = 1.f/sqrt(sum); | 780 | | for (i=0;i<N;i++) { | 781 | | X[i] *= mag*gain; | 782 | | } | 783 | | #endif | 784 | 23.2k | } |
Line | Count | Source | 757 | 33.5k | static void cubic_synthesis(celt_norm *X, int *iy, int N, int K, int face, int sign, opus_val32 gain) { | 758 | 33.5k | int i; | 759 | 33.5k | opus_val32 sum=0; | 760 | 33.5k | opus_val32 mag; | 761 | | #ifdef FIXED_POINT | 762 | | int sum_shift; | 763 | | int shift = IMAX(celt_ilog2(K) + celt_ilog2(N)/2 - 13, 0); | 764 | | #endif | 765 | 392k | for (i=0;i<N;i++) { | 766 | 359k | X[i] = (1+2*iy[i])-K; | 767 | 359k | } | 768 | 33.5k | X[face] = sign ? -K : K; | 769 | 392k | for (i=0;i<N;i++) { | 770 | 359k | sum += PSHR32(MULT16_16(X[i],X[i]), 2*shift); | 771 | 359k | } | 772 | | #ifdef FIXED_POINT | 773 | | sum_shift = (29-celt_ilog2(sum))>>1; | 774 | | mag = celt_rsqrt_norm32(SHL32(sum, 2*sum_shift+1)); | 775 | | for (i=0;i<N;i++) { | 776 | | X[i] = VSHR32(MULT16_32_Q15(X[i],MULT32_32_Q31(mag,gain)), shift-sum_shift+29-NORM_SHIFT); | 777 | | } | 778 | | #else | 779 | 33.5k | mag = 1.f/sqrt(sum); | 780 | 392k | for (i=0;i<N;i++) { | 781 | 359k | X[i] *= mag*gain; | 782 | 359k | } | 783 | 33.5k | #endif | 784 | 33.5k | } |
|
785 | | |
786 | 298k | unsigned cubic_quant(celt_norm *X, int N, int res, int B, ec_enc *enc, opus_val32 gain, int resynth) { |
787 | 298k | int i; |
788 | 298k | int face=0; |
789 | 298k | int K; |
790 | 298k | VARDECL(int, iy); |
791 | 298k | celt_norm faceval=-1; |
792 | 298k | opus_val32 norm; |
793 | 298k | int sign; |
794 | 298k | SAVE_STACK; |
795 | 298k | ALLOC(iy, N, int); |
796 | 298k | K = 1<<res; |
797 | | /* Using odd K on transients to avoid adding pre-echo. */ |
798 | 298k | if (B!=1) K=IMAX(1, K-1); |
799 | 298k | if (K==1) { |
800 | 62 | if (resynth) OPUS_CLEAR(X, N); |
801 | 62 | RESTORE_STACK; |
802 | 62 | return 0; |
803 | 62 | } |
804 | 5.09M | for (i=0;i<N;i++) { |
805 | 4.80M | if (ABS32(X[i]) > faceval) { |
806 | 298k | faceval = ABS32(X[i]); |
807 | 298k | face = i; |
808 | 298k | } |
809 | 4.80M | } |
810 | 298k | sign = X[face]<0; |
811 | 298k | ec_enc_uint(enc, face, N); |
812 | 298k | ec_enc_bits(enc, sign, 1); |
813 | | #ifdef FIXED_POINT |
814 | 298k | if (faceval != 0) { |
815 | 928 | int face_shift = 30-celt_ilog2(faceval); |
816 | 928 | norm = celt_rcp_norm32(SHL32(faceval, face_shift)); |
817 | 928 | norm = MULT16_32_Q15(K, norm); |
818 | 5.42k | for (i=0;i<N;i++) { |
819 | | /* By computing X[i]+faceval inside the shift, the result is guaranteed non-negative. */ |
820 | 4.49k | iy[i] = IMIN(K-1, (MULT32_32_Q31(SHL32(X[i]+faceval, face_shift-1), norm)) >> 15); |
821 | 4.49k | } |
822 | 297k | } else { |
823 | 297k | OPUS_CLEAR(iy, N); |
824 | 297k | } |
825 | | #else |
826 | 0 | norm = .5f*K/(faceval+EPSILON); |
827 | 0 | for (i=0;i<N;i++) { |
828 | 0 | iy[i] = IMIN(K-1, (int)floor((X[i]+faceval)*norm)); |
829 | 0 | } |
830 | | #endif |
831 | 5.09M | for (i=0;i<N;i++) { |
832 | 4.80M | if (i != face) ec_enc_bits(enc, iy[i], res); |
833 | 4.80M | } |
834 | 298k | if (resynth) { |
835 | 0 | cubic_synthesis(X, iy, N, K, face, sign, gain); |
836 | 0 | } |
837 | 298k | RESTORE_STACK; |
838 | 298k | return (1<<B)-1; |
839 | 298k | } Line | Count | Source | 786 | 298k | unsigned cubic_quant(celt_norm *X, int N, int res, int B, ec_enc *enc, opus_val32 gain, int resynth) { | 787 | 298k | int i; | 788 | 298k | int face=0; | 789 | 298k | int K; | 790 | 298k | VARDECL(int, iy); | 791 | 298k | celt_norm faceval=-1; | 792 | 298k | opus_val32 norm; | 793 | 298k | int sign; | 794 | 298k | SAVE_STACK; | 795 | 298k | ALLOC(iy, N, int); | 796 | 298k | K = 1<<res; | 797 | | /* Using odd K on transients to avoid adding pre-echo. */ | 798 | 298k | if (B!=1) K=IMAX(1, K-1); | 799 | 298k | if (K==1) { | 800 | 62 | if (resynth) OPUS_CLEAR(X, N); | 801 | 62 | RESTORE_STACK; | 802 | 62 | return 0; | 803 | 62 | } | 804 | 5.09M | for (i=0;i<N;i++) { | 805 | 4.80M | if (ABS32(X[i]) > faceval) { | 806 | 298k | faceval = ABS32(X[i]); | 807 | 298k | face = i; | 808 | 298k | } | 809 | 4.80M | } | 810 | 298k | sign = X[face]<0; | 811 | 298k | ec_enc_uint(enc, face, N); | 812 | 298k | ec_enc_bits(enc, sign, 1); | 813 | 298k | #ifdef FIXED_POINT | 814 | 298k | if (faceval != 0) { | 815 | 928 | int face_shift = 30-celt_ilog2(faceval); | 816 | 928 | norm = celt_rcp_norm32(SHL32(faceval, face_shift)); | 817 | 928 | norm = MULT16_32_Q15(K, norm); | 818 | 5.42k | for (i=0;i<N;i++) { | 819 | | /* By computing X[i]+faceval inside the shift, the result is guaranteed non-negative. */ | 820 | 4.49k | iy[i] = IMIN(K-1, (MULT32_32_Q31(SHL32(X[i]+faceval, face_shift-1), norm)) >> 15); | 821 | 4.49k | } | 822 | 297k | } else { | 823 | 297k | OPUS_CLEAR(iy, N); | 824 | 297k | } | 825 | | #else | 826 | | norm = .5f*K/(faceval+EPSILON); | 827 | | for (i=0;i<N;i++) { | 828 | | iy[i] = IMIN(K-1, (int)floor((X[i]+faceval)*norm)); | 829 | | } | 830 | | #endif | 831 | 5.09M | for (i=0;i<N;i++) { | 832 | 4.80M | if (i != face) ec_enc_bits(enc, iy[i], res); | 833 | 4.80M | } | 834 | 298k | if (resynth) { | 835 | 0 | cubic_synthesis(X, iy, N, K, face, sign, gain); | 836 | 0 | } | 837 | 298k | RESTORE_STACK; | 838 | 298k | return (1<<B)-1; | 839 | 298k | } |
Unexecuted instantiation: cubic_quant |
840 | | |
841 | 59.9k | unsigned cubic_unquant(celt_norm *X, int N, int res, int B, ec_dec *dec, opus_val32 gain) { |
842 | 59.9k | int i; |
843 | 59.9k | int face; |
844 | 59.9k | int sign; |
845 | 59.9k | int K; |
846 | 59.9k | VARDECL(int, iy); |
847 | 59.9k | SAVE_STACK; |
848 | 59.9k | ALLOC(iy, N, int); |
849 | 59.9k | K = 1<<res; |
850 | | /* Using odd K on transients to avoid adding pre-echo. */ |
851 | 59.9k | if (B!=1) K=IMAX(1, K-1); |
852 | 59.9k | if (K==1) { |
853 | 3.16k | OPUS_CLEAR(X, N); |
854 | 3.16k | RESTORE_STACK; |
855 | 3.16k | return 0; |
856 | 3.16k | } |
857 | 56.8k | face = ec_dec_uint(dec, N); |
858 | 56.8k | sign = ec_dec_bits(dec, 1); |
859 | 664k | for (i=0;i<N;i++) { |
860 | 608k | if (i != face) iy[i] = ec_dec_bits(dec, res); |
861 | 608k | } |
862 | 56.8k | iy[face]=0; |
863 | 56.8k | cubic_synthesis(X, iy, N, K, face, sign, gain); |
864 | 56.8k | RESTORE_STACK; |
865 | 56.8k | return (1<<B)-1; |
866 | 59.9k | } |
867 | | #endif |