Line | Count | Source (jump to first uncovered line) |
1 | | /* Copyright (c) 2002-2008 Jean-Marc Valin |
2 | | Copyright (c) 2007-2008 CSIRO |
3 | | Copyright (c) 2007-2009 Xiph.Org Foundation |
4 | | Copyright (c) 2024 Arm Limited |
5 | | Written by Jean-Marc Valin */ |
6 | | /** |
7 | | @file mathops.h |
8 | | @brief Various math functions |
9 | | */ |
10 | | /* |
11 | | Redistribution and use in source and binary forms, with or without |
12 | | modification, are permitted provided that the following conditions |
13 | | are met: |
14 | | |
15 | | - Redistributions of source code must retain the above copyright |
16 | | notice, this list of conditions and the following disclaimer. |
17 | | |
18 | | - Redistributions in binary form must reproduce the above copyright |
19 | | notice, this list of conditions and the following disclaimer in the |
20 | | documentation and/or other materials provided with the distribution. |
21 | | |
22 | | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
23 | | ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
24 | | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR |
25 | | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER |
26 | | OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, |
27 | | EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, |
28 | | PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR |
29 | | PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF |
30 | | LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING |
31 | | NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS |
32 | | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
33 | | */ |
34 | | |
35 | | #ifdef HAVE_CONFIG_H |
36 | | #include "config.h" |
37 | | #endif |
38 | | |
39 | | #include "float_cast.h" |
40 | | #include "mathops.h" |
41 | | |
42 | | /*Compute floor(sqrt(_val)) with exact arithmetic. |
43 | | _val must be greater than 0. |
44 | | This has been tested on all possible 32-bit inputs greater than 0.*/ |
45 | 488k | unsigned isqrt32(opus_uint32 _val){ |
46 | 488k | unsigned b; |
47 | 488k | unsigned g; |
48 | 488k | int bshift; |
49 | | /*Uses the second method from |
50 | | http://www.azillionmonkeys.com/qed/sqroot.html |
51 | | The main idea is to search for the largest binary digit b such that |
52 | | (g+b)*(g+b) <= _val, and add it to the solution g.*/ |
53 | 488k | g=0; |
54 | 488k | bshift=(EC_ILOG(_val)-1)>>1; |
55 | 488k | b=1U<<bshift; |
56 | 2.93M | do{ |
57 | 2.93M | opus_uint32 t; |
58 | 2.93M | t=(((opus_uint32)g<<1)+b)<<bshift; |
59 | 2.93M | if(t<=_val){ |
60 | 1.70M | g+=b; |
61 | 1.70M | _val-=t; |
62 | 1.70M | } |
63 | 2.93M | b>>=1; |
64 | 2.93M | bshift--; |
65 | 2.93M | } |
66 | 2.93M | while(bshift>=0); |
67 | 488k | return g; |
68 | 488k | } |
69 | | |
70 | | #ifdef FIXED_POINT |
71 | | |
72 | | opus_val32 frac_div32_q29(opus_val32 a, opus_val32 b) |
73 | 3.70M | { |
74 | 3.70M | opus_val16 rcp; |
75 | 3.70M | opus_val32 result, rem; |
76 | 3.70M | int shift = celt_ilog2(b)-29; |
77 | 3.70M | a = VSHR32(a,shift); |
78 | 3.70M | b = VSHR32(b,shift); |
79 | | /* 16-bit reciprocal */ |
80 | 3.70M | rcp = ROUND16(celt_rcp(ROUND16(b,16)),3); |
81 | 3.70M | result = MULT16_32_Q15(rcp, a); |
82 | 3.70M | rem = PSHR32(a,2)-MULT32_32_Q31(result, b); |
83 | 3.70M | result = ADD32(result, SHL32(MULT16_32_Q15(rcp, rem),2)); |
84 | 3.70M | return result; |
85 | 3.70M | } |
86 | | |
87 | 3.47M | opus_val32 frac_div32(opus_val32 a, opus_val32 b) { |
88 | 3.47M | opus_val32 result = frac_div32_q29(a,b); |
89 | 3.47M | if (result >= 536870912) /* 2^29 */ |
90 | 1.32k | return 2147483647; /* 2^31 - 1 */ |
91 | 3.47M | else if (result <= -536870912) /* -2^29 */ |
92 | 0 | return -2147483647; /* -2^31 */ |
93 | 3.47M | else |
94 | 3.47M | return SHL32(result, 2); |
95 | 3.47M | } |
96 | | |
97 | | /** Reciprocal sqrt approximation in the range [0.25,1) (Q16 in, Q14 out) */ |
98 | | opus_val16 celt_rsqrt_norm(opus_val32 x) |
99 | 16.4M | { |
100 | 16.4M | opus_val16 n; |
101 | 16.4M | opus_val16 r; |
102 | 16.4M | opus_val16 r2; |
103 | 16.4M | opus_val16 y; |
104 | | /* Range of n is [-16384,32767] ([-0.5,1) in Q15). */ |
105 | 16.4M | n = x-32768; |
106 | | /* Get a rough initial guess for the root. |
107 | | The optimal minimax quadratic approximation (using relative error) is |
108 | | r = 1.437799046117536+n*(-0.823394375837328+n*0.4096419668459485). |
109 | | Coefficients here, and the final result r, are Q14.*/ |
110 | 16.4M | r = ADD16(23557, MULT16_16_Q15(n, ADD16(-13490, MULT16_16_Q15(n, 6713)))); |
111 | | /* We want y = x*r*r-1 in Q15, but x is 32-bit Q16 and r is Q14. |
112 | | We can compute the result from n and r using Q15 multiplies with some |
113 | | adjustment, carefully done to avoid overflow. |
114 | | Range of y is [-1564,1594]. */ |
115 | 16.4M | r2 = MULT16_16_Q15(r, r); |
116 | 16.4M | y = SHL16(SUB16(ADD16(MULT16_16_Q15(r2, n), r2), 16384), 1); |
117 | | /* Apply a 2nd-order Householder iteration: r += r*y*(y*0.375-0.5). |
118 | | This yields the Q14 reciprocal square root of the Q16 x, with a maximum |
119 | | relative error of 1.04956E-4, a (relative) RMSE of 2.80979E-5, and a |
120 | | peak absolute error of 2.26591/16384. */ |
121 | 16.4M | return ADD16(r, MULT16_16_Q15(r, MULT16_16_Q15(y, |
122 | 16.4M | SUB16(MULT16_16_Q15(y, 12288), 16384)))); |
123 | 16.4M | } |
124 | | |
125 | | /** Reciprocal sqrt approximation in the range [0.25,1) (Q31 in, Q29 out) */ |
126 | | opus_val32 celt_rsqrt_norm32(opus_val32 x) |
127 | 14.1M | { |
128 | 14.1M | opus_int32 tmp; |
129 | | /* Use the first-order Newton-Raphson method to refine the root estimate. |
130 | | * r = r * (1.5 - 0.5*x*r*r) */ |
131 | 14.1M | opus_int32 r_q29 = SHL32(celt_rsqrt_norm(SHR32(x, 31-16)), 15); |
132 | | /* Split evaluation in steps to avoid exploding macro expansion. */ |
133 | 14.1M | tmp = MULT32_32_Q31(r_q29, r_q29); |
134 | 14.1M | tmp = MULT32_32_Q31(1073741824 /* Q31 */, tmp); |
135 | 14.1M | tmp = MULT32_32_Q31(x, tmp); |
136 | 14.1M | return SHL32(MULT32_32_Q31(r_q29, SUB32(201326592 /* Q27 */, tmp)), 4); |
137 | 14.1M | } |
138 | | |
139 | | /** Sqrt approximation (QX input, QX/2 output) */ |
140 | | opus_val32 celt_sqrt(opus_val32 x) |
141 | 4.16M | { |
142 | 4.16M | int k; |
143 | 4.16M | opus_val16 n; |
144 | 4.16M | opus_val32 rt; |
145 | | /* These coeffs are optimized in fixed-point to minimize both RMS and max error |
146 | | of sqrt(x) over .25<x<1 without exceeding 32767. |
147 | | The RMS error is 3.4e-5 and the max is 8.2e-5. */ |
148 | 4.16M | static const opus_val16 C[6] = {23171, 11574, -2901, 1592, -1002, 336}; |
149 | 4.16M | if (x==0) |
150 | 161k | return 0; |
151 | 4.00M | else if (x>=1073741824) |
152 | 65.5k | return 32767; |
153 | 3.93M | k = (celt_ilog2(x)>>1)-7; |
154 | 3.93M | x = VSHR32(x, 2*k); |
155 | 3.93M | n = x-32768; |
156 | 3.93M | rt = ADD32(C[0], MULT16_16_Q15(n, ADD16(C[1], MULT16_16_Q15(n, ADD16(C[2], |
157 | 3.93M | MULT16_16_Q15(n, ADD16(C[3], MULT16_16_Q15(n, ADD16(C[4], MULT16_16_Q15(n, (C[5]))))))))))); |
158 | 3.93M | rt = VSHR32(rt,7-k); |
159 | 3.93M | return rt; |
160 | 4.16M | } |
161 | | |
162 | | /* Perform fixed-point arithmetic to approximate the square root. When the input |
163 | | * is in Qx format, the output will be in Q(x/2 + 16) format. */ |
164 | | opus_val32 celt_sqrt32(opus_val32 x) |
165 | 10.9M | { |
166 | 10.9M | int k; |
167 | 10.9M | opus_int32 x_frac; |
168 | 10.9M | if (x==0) |
169 | 357k | return 0; |
170 | 10.5M | else if (x>=1073741824) |
171 | 0 | return 2147483647; /* 2^31 -1 */ |
172 | 10.5M | k = (celt_ilog2(x)>>1); |
173 | 10.5M | x_frac = VSHR32(x, 2*(k-14)-1); |
174 | 10.5M | x_frac = MULT32_32_Q31(celt_rsqrt_norm32(x_frac), x_frac); |
175 | 10.5M | if (k < 12) return PSHR32(x_frac, 12-k); |
176 | 9.75M | else return SHL32(x_frac, k-12); |
177 | 10.5M | } |
178 | | |
179 | | #define L1 32767 |
180 | | #define L2 -7651 |
181 | | #define L3 8277 |
182 | | #define L4 -626 |
183 | | |
184 | | static OPUS_INLINE opus_val16 _celt_cos_pi_2(opus_val16 x) |
185 | 1.11M | { |
186 | 1.11M | opus_val16 x2; |
187 | | |
188 | 1.11M | x2 = MULT16_16_P15(x,x); |
189 | 1.11M | return ADD16(1,MIN16(32766,ADD32(SUB16(L1,x2), MULT16_16_P15(x2, ADD32(L2, MULT16_16_P15(x2, ADD32(L3, MULT16_16_P15(L4, x2 |
190 | 1.11M | )))))))); |
191 | 1.11M | } |
192 | | |
193 | | #undef L1 |
194 | | #undef L2 |
195 | | #undef L3 |
196 | | #undef L4 |
197 | | |
198 | | opus_val16 celt_cos_norm(opus_val32 x) |
199 | 1.11M | { |
200 | 1.11M | x = x&0x0001ffff; |
201 | 1.11M | if (x>SHL32(EXTEND32(1), 16)) |
202 | 0 | x = SUB32(SHL32(EXTEND32(1), 17),x); |
203 | 1.11M | if (x&0x00007fff) |
204 | 1.11M | { |
205 | 1.11M | if (x<SHL32(EXTEND32(1), 15)) |
206 | 1.11M | { |
207 | 1.11M | return _celt_cos_pi_2(EXTRACT16(x)); |
208 | 1.11M | } else { |
209 | 0 | return NEG16(_celt_cos_pi_2(EXTRACT16(65536-x))); |
210 | 0 | } |
211 | 1.11M | } else { |
212 | 0 | if (x&0x0000ffff) |
213 | 0 | return 0; |
214 | 0 | else if (x&0x0001ffff) |
215 | 0 | return -32767; |
216 | 0 | else |
217 | 0 | return 32767; |
218 | 0 | } |
219 | 1.11M | } |
220 | | |
221 | | /* Calculates the cosine of (PI*0.5*x) where the input x ranges from -1 to 1 and |
222 | | * is in Q30 format. The output will also be in Q31 format. */ |
223 | | opus_val32 celt_cos_norm32(opus_val32 x) |
224 | 2.86M | { |
225 | 2.86M | static const opus_val32 COS_NORM_COEFF_A0 = 134217720; /* Q27 */ |
226 | 2.86M | static const opus_val32 COS_NORM_COEFF_A1 = -662336704; /* Q29 */ |
227 | 2.86M | static const opus_val32 COS_NORM_COEFF_A2 = 544710848; /* Q31 */ |
228 | 2.86M | static const opus_val32 COS_NORM_COEFF_A3 = -178761936; /* Q33 */ |
229 | 2.86M | static const opus_val32 COS_NORM_COEFF_A4 = 29487206; /* Q35 */ |
230 | 2.86M | opus_int32 x_sq_q29, tmp; |
231 | | /* The expected x is in the range of [-1.0f, 1.0f] */ |
232 | 2.86M | celt_sig_assert((x >= -1073741824) && (x <= 1073741824)); |
233 | | /* Make cos(+/- pi/2) exactly zero. */ |
234 | 2.86M | if (ABS32(x) == 1<<30) return 0; |
235 | 2.20M | x_sq_q29 = MULT32_32_Q31(x, x); |
236 | | /* Split evaluation in steps to avoid exploding macro expansion. */ |
237 | 2.20M | tmp = ADD32(COS_NORM_COEFF_A3, MULT32_32_Q31(x_sq_q29, COS_NORM_COEFF_A4)); |
238 | 2.20M | tmp = ADD32(COS_NORM_COEFF_A2, MULT32_32_Q31(x_sq_q29, tmp)); |
239 | 2.20M | tmp = ADD32(COS_NORM_COEFF_A1, MULT32_32_Q31(x_sq_q29, tmp)); |
240 | 2.20M | return SHL32(ADD32(COS_NORM_COEFF_A0, MULT32_32_Q31(x_sq_q29, tmp)), 4); |
241 | 2.86M | } |
242 | | |
243 | | /* Computes a 16 bit approximate reciprocal (1/x) for a normalized Q15 input, |
244 | | * resulting in a Q15 output. */ |
245 | | opus_val16 celt_rcp_norm16(opus_val16 x) |
246 | 9.80M | { |
247 | 9.80M | opus_val16 r; |
248 | | /* Start with a linear approximation: |
249 | | r = 1.8823529411764706-0.9411764705882353*n. |
250 | | The coefficients and the result are Q14 in the range [15420,30840].*/ |
251 | 9.80M | r = ADD16(30840, MULT16_16_Q15(-15420, x)); |
252 | | /* Perform two Newton iterations: |
253 | | r -= r*((r*n)+(r-1.Q15)) |
254 | | = r*((r*n)+(r-1.Q15)). */ |
255 | 9.80M | r = SUB16(r, MULT16_16_Q15(r, |
256 | 9.80M | ADD16(MULT16_16_Q15(r, x), ADD16(r, -32768)))); |
257 | | /* We subtract an extra 1 in the second iteration to avoid overflow; it also |
258 | | neatly compensates for truncation error in the rest of the process. */ |
259 | 9.80M | return SUB16(r, ADD16(1, MULT16_16_Q15(r, |
260 | 9.80M | ADD16(MULT16_16_Q15(r, x), ADD16(r, -32768))))); |
261 | 9.80M | } |
262 | | |
263 | | /* Computes a 32 bit approximated reciprocal (1/x) for a normalized Q31 input, |
264 | | * resulting in a Q30 output. The expected input range is [0.5f, 1.0f) in Q31 |
265 | | * and the expected output range is [1.0f, 2.0f) in Q30. */ |
266 | | opus_val32 celt_rcp_norm32(opus_val32 x) |
267 | 3.42M | { |
268 | 3.42M | opus_val32 r_q30; |
269 | 3.42M | celt_sig_assert(x >= 1073741824); |
270 | 3.42M | r_q30 = SHL32(EXTEND32(celt_rcp_norm16(SHR32(x, 15)-32768)), 16); |
271 | | /* Solving f(y) = a - 1/y using the Newton Method |
272 | | * Note: f(y)' = 1/y^2 |
273 | | * r = r - f(r)/f(r)' = r - (x * r*r - r) |
274 | | * = r - r*(r*x - 1) |
275 | | * where |
276 | | * - r means 1/y's approximation. |
277 | | * - x means a, the input of function. |
278 | | * Please note that: |
279 | | * - It adds 1 to avoid overflow |
280 | | * - -1.0f in Q30 is -1073741824. */ |
281 | 3.42M | return SUB32(r_q30, ADD32(SHL32( |
282 | 3.42M | MULT32_32_Q31(ADD32(MULT32_32_Q31(r_q30, x), -1073741824), |
283 | 3.42M | r_q30), 1), 1)); |
284 | 3.42M | } |
285 | | |
286 | | /** Reciprocal approximation (Q15 input, Q16 output) */ |
287 | | opus_val32 celt_rcp(opus_val32 x) |
288 | 6.38M | { |
289 | 6.38M | int i; |
290 | 6.38M | opus_val16 r; |
291 | 6.38M | celt_sig_assert(x>0); |
292 | 6.38M | i = celt_ilog2(x); |
293 | | |
294 | | /* Compute the reciprocal of a Q15 number in the range [0, 1). */ |
295 | 6.38M | r = celt_rcp_norm16(VSHR32(x,i-15)-32768); |
296 | | |
297 | | /* r is now the Q15 solution to 2/(n+1), with a maximum relative error |
298 | | of 7.05346E-5, a (relative) RMSE of 2.14418E-5, and a peak absolute |
299 | | error of 1.24665/32768. */ |
300 | 6.38M | return VSHR32(EXTEND32(r),i-16); |
301 | 6.38M | } |
302 | | |
303 | | #endif |
304 | | |
305 | | #ifndef DISABLE_FLOAT_API |
306 | | |
307 | | void celt_float2int16_c(const float * OPUS_RESTRICT in, short * OPUS_RESTRICT out, int cnt) |
308 | 0 | { |
309 | 0 | int i; |
310 | 0 | for (i = 0; i < cnt; i++) |
311 | 0 | { |
312 | 0 | out[i] = FLOAT2INT16(in[i]); |
313 | 0 | } |
314 | 0 | } |
315 | | |
316 | | int opus_limit2_checkwithin1_c(float * samples, int cnt) |
317 | 86.3k | { |
318 | 86.3k | int i; |
319 | 86.3k | if (cnt <= 0) |
320 | 0 | { |
321 | 0 | return 1; |
322 | 0 | } |
323 | | |
324 | 103M | for (i = 0; i < cnt; i++) |
325 | 103M | { |
326 | 103M | float clippedVal = samples[i]; |
327 | 103M | clippedVal = FMAX(-2.0f, clippedVal); |
328 | 103M | clippedVal = FMIN(2.0f, clippedVal); |
329 | 103M | samples[i] = clippedVal; |
330 | 103M | } |
331 | | |
332 | | /* C implementation can't provide quick hint. Assume it might exceed -1/+1. */ |
333 | 86.3k | return 0; |
334 | 86.3k | } |
335 | | |
336 | | #endif /* DISABLE_FLOAT_API */ |