Line | Count | Source (jump to first uncovered line) |
1 | | /* Copyright (c) 2002-2008 Jean-Marc Valin |
2 | | Copyright (c) 2007-2008 CSIRO |
3 | | Copyright (c) 2007-2009 Xiph.Org Foundation |
4 | | Written by Jean-Marc Valin */ |
5 | | /** |
6 | | @file mathops.h |
7 | | @brief Various math functions |
8 | | */ |
9 | | /* |
10 | | Redistribution and use in source and binary forms, with or without |
11 | | modification, are permitted provided that the following conditions |
12 | | are met: |
13 | | |
14 | | - Redistributions of source code must retain the above copyright |
15 | | notice, this list of conditions and the following disclaimer. |
16 | | |
17 | | - Redistributions in binary form must reproduce the above copyright |
18 | | notice, this list of conditions and the following disclaimer in the |
19 | | documentation and/or other materials provided with the distribution. |
20 | | |
21 | | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
22 | | ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
23 | | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR |
24 | | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER |
25 | | OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, |
26 | | EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, |
27 | | PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR |
28 | | PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF |
29 | | LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING |
30 | | NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS |
31 | | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
32 | | */ |
33 | | |
34 | | #ifdef HAVE_CONFIG_H |
35 | | #include "config.h" |
36 | | #endif |
37 | | |
38 | | #include "mathops.h" |
39 | | |
40 | | /*Compute floor(sqrt(_val)) with exact arithmetic. |
41 | | _val must be greater than 0. |
42 | | This has been tested on all possible 32-bit inputs greater than 0.*/ |
43 | 0 | unsigned isqrt32(opus_uint32 _val){ |
44 | 0 | unsigned b; |
45 | 0 | unsigned g; |
46 | 0 | int bshift; |
47 | | /*Uses the second method from |
48 | | http://www.azillionmonkeys.com/qed/sqroot.html |
49 | | The main idea is to search for the largest binary digit b such that |
50 | | (g+b)*(g+b) <= _val, and add it to the solution g.*/ |
51 | 0 | g=0; |
52 | 0 | bshift=(EC_ILOG(_val)-1)>>1; |
53 | 0 | b=1U<<bshift; |
54 | 0 | do{ |
55 | 0 | opus_uint32 t; |
56 | 0 | t=(((opus_uint32)g<<1)+b)<<bshift; |
57 | 0 | if(t<=_val){ |
58 | 0 | g+=b; |
59 | 0 | _val-=t; |
60 | 0 | } |
61 | 0 | b>>=1; |
62 | 0 | bshift--; |
63 | 0 | } |
64 | 0 | while(bshift>=0); |
65 | 0 | return g; |
66 | 0 | } |
67 | | |
68 | | #ifdef FIXED_POINT |
69 | | |
70 | | opus_val32 frac_div32(opus_val32 a, opus_val32 b) |
71 | | { |
72 | | opus_val16 rcp; |
73 | | opus_val32 result, rem; |
74 | | int shift = celt_ilog2(b)-29; |
75 | | a = VSHR32(a,shift); |
76 | | b = VSHR32(b,shift); |
77 | | /* 16-bit reciprocal */ |
78 | | rcp = ROUND16(celt_rcp(ROUND16(b,16)),3); |
79 | | result = MULT16_32_Q15(rcp, a); |
80 | | rem = PSHR32(a,2)-MULT32_32_Q31(result, b); |
81 | | result = ADD32(result, SHL32(MULT16_32_Q15(rcp, rem),2)); |
82 | | if (result >= 536870912) /* 2^29 */ |
83 | | return 2147483647; /* 2^31 - 1 */ |
84 | | else if (result <= -536870912) /* -2^29 */ |
85 | | return -2147483647; /* -2^31 */ |
86 | | else |
87 | | return SHL32(result, 2); |
88 | | } |
89 | | |
90 | | /** Reciprocal sqrt approximation in the range [0.25,1) (Q16 in, Q14 out) */ |
91 | | opus_val16 celt_rsqrt_norm(opus_val32 x) |
92 | | { |
93 | | opus_val16 n; |
94 | | opus_val16 r; |
95 | | opus_val16 r2; |
96 | | opus_val16 y; |
97 | | /* Range of n is [-16384,32767] ([-0.5,1) in Q15). */ |
98 | | n = x-32768; |
99 | | /* Get a rough initial guess for the root. |
100 | | The optimal minimax quadratic approximation (using relative error) is |
101 | | r = 1.437799046117536+n*(-0.823394375837328+n*0.4096419668459485). |
102 | | Coefficients here, and the final result r, are Q14.*/ |
103 | | r = ADD16(23557, MULT16_16_Q15(n, ADD16(-13490, MULT16_16_Q15(n, 6713)))); |
104 | | /* We want y = x*r*r-1 in Q15, but x is 32-bit Q16 and r is Q14. |
105 | | We can compute the result from n and r using Q15 multiplies with some |
106 | | adjustment, carefully done to avoid overflow. |
107 | | Range of y is [-1564,1594]. */ |
108 | | r2 = MULT16_16_Q15(r, r); |
109 | | y = SHL16(SUB16(ADD16(MULT16_16_Q15(r2, n), r2), 16384), 1); |
110 | | /* Apply a 2nd-order Householder iteration: r += r*y*(y*0.375-0.5). |
111 | | This yields the Q14 reciprocal square root of the Q16 x, with a maximum |
112 | | relative error of 1.04956E-4, a (relative) RMSE of 2.80979E-5, and a |
113 | | peak absolute error of 2.26591/16384. */ |
114 | | return ADD16(r, MULT16_16_Q15(r, MULT16_16_Q15(y, |
115 | | SUB16(MULT16_16_Q15(y, 12288), 16384)))); |
116 | | } |
117 | | |
118 | | /** Sqrt approximation (QX input, QX/2 output) */ |
119 | | opus_val32 celt_sqrt(opus_val32 x) |
120 | | { |
121 | | int k; |
122 | | opus_val16 n; |
123 | | opus_val32 rt; |
124 | | static const opus_val16 C[5] = {23175, 11561, -3011, 1699, -664}; |
125 | | if (x==0) |
126 | | return 0; |
127 | | else if (x>=1073741824) |
128 | | return 32767; |
129 | | k = (celt_ilog2(x)>>1)-7; |
130 | | x = VSHR32(x, 2*k); |
131 | | n = x-32768; |
132 | | rt = ADD16(C[0], MULT16_16_Q15(n, ADD16(C[1], MULT16_16_Q15(n, ADD16(C[2], |
133 | | MULT16_16_Q15(n, ADD16(C[3], MULT16_16_Q15(n, (C[4]))))))))); |
134 | | rt = VSHR32(rt,7-k); |
135 | | return rt; |
136 | | } |
137 | | |
138 | | #define L1 32767 |
139 | | #define L2 -7651 |
140 | | #define L3 8277 |
141 | | #define L4 -626 |
142 | | |
143 | | static OPUS_INLINE opus_val16 _celt_cos_pi_2(opus_val16 x) |
144 | | { |
145 | | opus_val16 x2; |
146 | | |
147 | | x2 = MULT16_16_P15(x,x); |
148 | | return ADD16(1,MIN16(32766,ADD32(SUB16(L1,x2), MULT16_16_P15(x2, ADD32(L2, MULT16_16_P15(x2, ADD32(L3, MULT16_16_P15(L4, x2 |
149 | | )))))))); |
150 | | } |
151 | | |
152 | | #undef L1 |
153 | | #undef L2 |
154 | | #undef L3 |
155 | | #undef L4 |
156 | | |
157 | | opus_val16 celt_cos_norm(opus_val32 x) |
158 | | { |
159 | | x = x&0x0001ffff; |
160 | | if (x>SHL32(EXTEND32(1), 16)) |
161 | | x = SUB32(SHL32(EXTEND32(1), 17),x); |
162 | | if (x&0x00007fff) |
163 | | { |
164 | | if (x<SHL32(EXTEND32(1), 15)) |
165 | | { |
166 | | return _celt_cos_pi_2(EXTRACT16(x)); |
167 | | } else { |
168 | | return NEG16(_celt_cos_pi_2(EXTRACT16(65536-x))); |
169 | | } |
170 | | } else { |
171 | | if (x&0x0000ffff) |
172 | | return 0; |
173 | | else if (x&0x0001ffff) |
174 | | return -32767; |
175 | | else |
176 | | return 32767; |
177 | | } |
178 | | } |
179 | | |
180 | | /** Reciprocal approximation (Q15 input, Q16 output) */ |
181 | | opus_val32 celt_rcp(opus_val32 x) |
182 | | { |
183 | | int i; |
184 | | opus_val16 n; |
185 | | opus_val16 r; |
186 | | celt_sig_assert(x>0); |
187 | | i = celt_ilog2(x); |
188 | | /* n is Q15 with range [0,1). */ |
189 | | n = VSHR32(x,i-15)-32768; |
190 | | /* Start with a linear approximation: |
191 | | r = 1.8823529411764706-0.9411764705882353*n. |
192 | | The coefficients and the result are Q14 in the range [15420,30840].*/ |
193 | | r = ADD16(30840, MULT16_16_Q15(-15420, n)); |
194 | | /* Perform two Newton iterations: |
195 | | r -= r*((r*n)-1.Q15) |
196 | | = r*((r*n)+(r-1.Q15)). */ |
197 | | r = SUB16(r, MULT16_16_Q15(r, |
198 | | ADD16(MULT16_16_Q15(r, n), ADD16(r, -32768)))); |
199 | | /* We subtract an extra 1 in the second iteration to avoid overflow; it also |
200 | | neatly compensates for truncation error in the rest of the process. */ |
201 | | r = SUB16(r, ADD16(1, MULT16_16_Q15(r, |
202 | | ADD16(MULT16_16_Q15(r, n), ADD16(r, -32768))))); |
203 | | /* r is now the Q15 solution to 2/(n+1), with a maximum relative error |
204 | | of 7.05346E-5, a (relative) RMSE of 2.14418E-5, and a peak absolute |
205 | | error of 1.24665/32768. */ |
206 | | return VSHR32(EXTEND32(r),i-16); |
207 | | } |
208 | | |
209 | | #endif |