Line | Count | Source (jump to first uncovered line) |
1 | | /* Copyright (c) 2007-2008 CSIRO |
2 | | Copyright (c) 2007-2009 Xiph.Org Foundation |
3 | | Written by Jean-Marc Valin */ |
4 | | /* |
5 | | Redistribution and use in source and binary forms, with or without |
6 | | modification, are permitted provided that the following conditions |
7 | | are met: |
8 | | |
9 | | - Redistributions of source code must retain the above copyright |
10 | | notice, this list of conditions and the following disclaimer. |
11 | | |
12 | | - Redistributions in binary form must reproduce the above copyright |
13 | | notice, this list of conditions and the following disclaimer in the |
14 | | documentation and/or other materials provided with the distribution. |
15 | | |
16 | | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
17 | | ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
18 | | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR |
19 | | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER |
20 | | OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, |
21 | | EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, |
22 | | PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR |
23 | | PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF |
24 | | LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING |
25 | | NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS |
26 | | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
27 | | */ |
28 | | |
29 | | #ifdef HAVE_CONFIG_H |
30 | | #include "config.h" |
31 | | #endif |
32 | | |
33 | | #include "mathops.h" |
34 | | #include "cwrs.h" |
35 | | #include "vq.h" |
36 | | #include "arch.h" |
37 | | #include "os_support.h" |
38 | | #include "bands.h" |
39 | | #include "rate.h" |
40 | | #include "pitch.h" |
41 | | #include "SigProc_FIX.h" |
42 | | |
43 | | #if defined(MIPSr1_ASM) |
44 | | #include "mips/vq_mipsr1.h" |
45 | | #endif |
46 | | |
47 | | #if defined(FIXED_POINT) |
48 | | void norm_scaleup(celt_norm *X, int N, int shift) { |
49 | | int i; |
50 | | celt_assert(shift >= 0); |
51 | | if (shift <= 0) return; |
52 | | for (i=0;i<N;i++) X[i] = SHL32(X[i], shift); |
53 | | } |
54 | | |
55 | | void norm_scaledown(celt_norm *X, int N, int shift) { |
56 | | int i; |
57 | | celt_assert(shift >= 0); |
58 | | if (shift <= 0) return; |
59 | | for (i=0;i<N;i++) X[i] = PSHR32(X[i], shift); |
60 | | } |
61 | | |
62 | | opus_val32 celt_inner_prod_norm(const celt_norm *x, const celt_norm *y, int len, int arch) { |
63 | | int i; |
64 | | opus_val32 sum = 0; |
65 | | (void)arch; |
66 | | for (i=0;i<len;i++) sum += x[i]*y[i]; |
67 | | return sum; |
68 | | } |
69 | | opus_val32 celt_inner_prod_norm_shift(const celt_norm *x, const celt_norm *y, int len, int arch) { |
70 | | int i; |
71 | | opus_val64 sum = 0; |
72 | | (void)arch; |
73 | | for (i=0;i<len;i++) sum += x[i]*(opus_val64)y[i]; |
74 | | return sum>>2*(NORM_SHIFT-14); |
75 | | } |
76 | | #endif |
77 | | |
78 | | #ifndef OVERRIDE_vq_exp_rotation1 |
79 | | static void exp_rotation1(celt_norm *X, int len, int stride, opus_val16 c, opus_val16 s) |
80 | 0 | { |
81 | 0 | int i; |
82 | 0 | opus_val16 ms; |
83 | 0 | celt_norm *Xptr; |
84 | 0 | Xptr = X; |
85 | 0 | ms = NEG16(s); |
86 | 0 | norm_scaledown(X, len, NORM_SHIFT-14); |
87 | 0 | for (i=0;i<len-stride;i++) |
88 | 0 | { |
89 | 0 | celt_norm x1, x2; |
90 | 0 | x1 = Xptr[0]; |
91 | 0 | x2 = Xptr[stride]; |
92 | 0 | Xptr[stride] = EXTRACT16(PSHR32(MAC16_16(MULT16_16(c, x2), s, x1), 15)); |
93 | 0 | *Xptr++ = EXTRACT16(PSHR32(MAC16_16(MULT16_16(c, x1), ms, x2), 15)); |
94 | 0 | } |
95 | 0 | Xptr = &X[len-2*stride-1]; |
96 | 0 | for (i=len-2*stride-1;i>=0;i--) |
97 | 0 | { |
98 | 0 | celt_norm x1, x2; |
99 | 0 | x1 = Xptr[0]; |
100 | 0 | x2 = Xptr[stride]; |
101 | 0 | Xptr[stride] = EXTRACT16(PSHR32(MAC16_16(MULT16_16(c, x2), s, x1), 15)); |
102 | 0 | *Xptr-- = EXTRACT16(PSHR32(MAC16_16(MULT16_16(c, x1), ms, x2), 15)); |
103 | 0 | } |
104 | 0 | norm_scaleup(X, len, NORM_SHIFT-14); |
105 | 0 | } |
106 | | #endif /* OVERRIDE_vq_exp_rotation1 */ |
107 | | |
108 | | void exp_rotation(celt_norm *X, int len, int dir, int stride, int K, int spread) |
109 | 0 | { |
110 | 0 | static const int SPREAD_FACTOR[3]={15,10,5}; |
111 | 0 | int i; |
112 | 0 | opus_val16 c, s; |
113 | 0 | opus_val16 gain, theta; |
114 | 0 | int stride2=0; |
115 | 0 | int factor; |
116 | |
|
117 | 0 | if (2*K>=len || spread==SPREAD_NONE) |
118 | 0 | return; |
119 | 0 | factor = SPREAD_FACTOR[spread-1]; |
120 | |
|
121 | 0 | gain = celt_div((opus_val32)MULT16_16(Q15_ONE,len),(opus_val32)(len+factor*K)); |
122 | 0 | theta = HALF16(MULT16_16_Q15(gain,gain)); |
123 | |
|
124 | 0 | c = celt_cos_norm(EXTEND32(theta)); |
125 | 0 | s = celt_cos_norm(EXTEND32(SUB16(Q15ONE,theta))); /* sin(theta) */ |
126 | |
|
127 | 0 | if (len>=8*stride) |
128 | 0 | { |
129 | 0 | stride2 = 1; |
130 | | /* This is just a simple (equivalent) way of computing sqrt(len/stride) with rounding. |
131 | | It's basically incrementing long as (stride2+0.5)^2 < len/stride. */ |
132 | 0 | while ((stride2*stride2+stride2)*stride + (stride>>2) < len) |
133 | 0 | stride2++; |
134 | 0 | } |
135 | | /*NOTE: As a minor optimization, we could be passing around log2(B), not B, for both this and for |
136 | | extract_collapse_mask().*/ |
137 | 0 | len = celt_udiv(len, stride); |
138 | 0 | for (i=0;i<stride;i++) |
139 | 0 | { |
140 | 0 | if (dir < 0) |
141 | 0 | { |
142 | 0 | if (stride2) |
143 | 0 | exp_rotation1(X+i*len, len, stride2, s, c); |
144 | 0 | exp_rotation1(X+i*len, len, 1, c, s); |
145 | 0 | } else { |
146 | 0 | exp_rotation1(X+i*len, len, 1, c, -s); |
147 | 0 | if (stride2) |
148 | 0 | exp_rotation1(X+i*len, len, stride2, s, -c); |
149 | 0 | } |
150 | 0 | } |
151 | 0 | } |
152 | | |
153 | | /** Normalizes the decoded integer pvq codeword to unit norm. */ |
154 | | static void normalise_residual(int * OPUS_RESTRICT iy, celt_norm * OPUS_RESTRICT X, |
155 | | int N, opus_val32 Ryy, opus_val32 gain, int shift) |
156 | 0 | { |
157 | 0 | int i; |
158 | | #ifdef FIXED_POINT |
159 | | int k; |
160 | | #endif |
161 | 0 | opus_val32 t; |
162 | 0 | opus_val32 g; |
163 | |
|
164 | | #ifdef FIXED_POINT |
165 | | k = celt_ilog2(Ryy)>>1; |
166 | | #endif |
167 | 0 | t = VSHR32(Ryy, 2*(k-7)-15); |
168 | 0 | g = MULT32_32_Q31(celt_rsqrt_norm32(t),gain); |
169 | 0 | i=0; |
170 | 0 | (void)shift; |
171 | | #if defined(FIXED_POINT) && defined(ENABLE_QEXT) |
172 | | if (shift>0) { |
173 | | int tot_shift = NORM_SHIFT+1-k-shift; |
174 | | if (tot_shift >= 0) { |
175 | | do X[i] = MULT32_32_Q31(g, SHL32(iy[i], tot_shift)); |
176 | | while (++i < N); |
177 | | } else { |
178 | | do X[i] = MULT32_32_Q31(g, PSHR32(iy[i], -tot_shift)); |
179 | | while (++i < N); |
180 | | } |
181 | | } else |
182 | | #endif |
183 | 0 | do X[i] = VSHR32(MULT16_32_Q15(iy[i], g), k+15-NORM_SHIFT); |
184 | 0 | while (++i < N); |
185 | 0 | } |
186 | | |
187 | | static unsigned extract_collapse_mask(int *iy, int N, int B) |
188 | 0 | { |
189 | 0 | unsigned collapse_mask; |
190 | 0 | int N0; |
191 | 0 | int i; |
192 | 0 | if (B<=1) |
193 | 0 | return 1; |
194 | | /*NOTE: As a minor optimization, we could be passing around log2(B), not B, for both this and for |
195 | | exp_rotation().*/ |
196 | 0 | N0 = celt_udiv(N, B); |
197 | 0 | collapse_mask = 0; |
198 | 0 | i=0; do { |
199 | 0 | int j; |
200 | 0 | unsigned tmp=0; |
201 | 0 | j=0; do { |
202 | 0 | tmp |= iy[i*N0+j]; |
203 | 0 | } while (++j<N0); |
204 | 0 | collapse_mask |= (tmp!=0)<<i; |
205 | 0 | } while (++i<B); |
206 | 0 | return collapse_mask; |
207 | 0 | } |
208 | | |
209 | | opus_val16 op_pvq_search_c(celt_norm *X, int *iy, int K, int N, int arch) |
210 | 0 | { |
211 | 0 | VARDECL(celt_norm, y); |
212 | 0 | VARDECL(int, signx); |
213 | 0 | int i, j; |
214 | 0 | int pulsesLeft; |
215 | 0 | opus_val32 sum; |
216 | 0 | opus_val32 xy; |
217 | 0 | opus_val16 yy; |
218 | 0 | SAVE_STACK; |
219 | |
|
220 | 0 | (void)arch; |
221 | 0 | ALLOC(y, N, celt_norm); |
222 | 0 | ALLOC(signx, N, int); |
223 | | #ifdef FIXED_POINT |
224 | | { |
225 | | int shift = (celt_ilog2(1+celt_inner_prod_norm_shift(X, X, N, arch))+1)/2; |
226 | | shift = IMAX(0, shift+(NORM_SHIFT-14)-14); |
227 | | norm_scaledown(X, N, shift); |
228 | | } |
229 | | #endif |
230 | | /* Get rid of the sign */ |
231 | 0 | sum = 0; |
232 | 0 | j=0; do { |
233 | 0 | signx[j] = X[j]<0; |
234 | | /* OPT: Make sure the compiler doesn't use a branch on ABS16(). */ |
235 | 0 | X[j] = ABS16(X[j]); |
236 | 0 | iy[j] = 0; |
237 | 0 | y[j] = 0; |
238 | 0 | } while (++j<N); |
239 | |
|
240 | 0 | xy = yy = 0; |
241 | |
|
242 | 0 | pulsesLeft = K; |
243 | | |
244 | | /* Do a pre-search by projecting on the pyramid */ |
245 | 0 | if (K > (N>>1)) |
246 | 0 | { |
247 | 0 | opus_val16 rcp; |
248 | 0 | j=0; do { |
249 | 0 | sum += X[j]; |
250 | 0 | } while (++j<N); |
251 | | |
252 | | /* If X is too small, just replace it with a pulse at 0 */ |
253 | | #ifdef FIXED_POINT |
254 | | if (sum <= K) |
255 | | #else |
256 | | /* Prevents infinities and NaNs from causing too many pulses |
257 | | to be allocated. 64 is an approximation of infinity here. */ |
258 | 0 | if (!(sum > EPSILON && sum < 64)) |
259 | 0 | #endif |
260 | 0 | { |
261 | 0 | X[0] = QCONST16(1.f,14); |
262 | 0 | j=1; do |
263 | 0 | X[j]=0; |
264 | 0 | while (++j<N); |
265 | 0 | sum = QCONST16(1.f,14); |
266 | 0 | } |
267 | | #ifdef FIXED_POINT |
268 | | rcp = EXTRACT16(MULT16_32_Q16(K, celt_rcp(sum))); |
269 | | #else |
270 | | /* Using K+e with e < 1 guarantees we cannot get more than K pulses. */ |
271 | 0 | rcp = EXTRACT16(MULT16_32_Q16(K+0.8f, celt_rcp(sum))); |
272 | 0 | #endif |
273 | 0 | j=0; do { |
274 | | #ifdef FIXED_POINT |
275 | | /* It's really important to round *towards zero* here */ |
276 | | iy[j] = MULT16_16_Q15(X[j],rcp); |
277 | | #else |
278 | 0 | iy[j] = (int)floor(rcp*X[j]); |
279 | 0 | #endif |
280 | 0 | y[j] = (celt_norm)iy[j]; |
281 | 0 | yy = MAC16_16(yy, y[j],y[j]); |
282 | 0 | xy = MAC16_16(xy, X[j],y[j]); |
283 | 0 | y[j] *= 2; |
284 | 0 | pulsesLeft -= iy[j]; |
285 | 0 | } while (++j<N); |
286 | 0 | } |
287 | 0 | celt_sig_assert(pulsesLeft>=0); |
288 | | |
289 | | /* This should never happen, but just in case it does (e.g. on silence) |
290 | | we fill the first bin with pulses. */ |
291 | | #ifdef FIXED_POINT_DEBUG |
292 | | celt_sig_assert(pulsesLeft<=N+3); |
293 | | #endif |
294 | 0 | if (pulsesLeft > N+3) |
295 | 0 | { |
296 | 0 | opus_val16 tmp = (opus_val16)pulsesLeft; |
297 | 0 | yy = MAC16_16(yy, tmp, tmp); |
298 | 0 | yy = MAC16_16(yy, tmp, y[0]); |
299 | 0 | iy[0] += pulsesLeft; |
300 | 0 | pulsesLeft=0; |
301 | 0 | } |
302 | |
|
303 | 0 | for (i=0;i<pulsesLeft;i++) |
304 | 0 | { |
305 | 0 | opus_val16 Rxy, Ryy; |
306 | 0 | int best_id; |
307 | 0 | opus_val32 best_num; |
308 | 0 | opus_val16 best_den; |
309 | | #ifdef FIXED_POINT |
310 | | int rshift; |
311 | | #endif |
312 | | #ifdef FIXED_POINT |
313 | | rshift = 1+celt_ilog2(K-pulsesLeft+i+1); |
314 | | #endif |
315 | 0 | best_id = 0; |
316 | | /* The squared magnitude term gets added anyway, so we might as well |
317 | | add it outside the loop */ |
318 | 0 | yy = ADD16(yy, 1); |
319 | | |
320 | | /* Calculations for position 0 are out of the loop, in part to reduce |
321 | | mispredicted branches (since the if condition is usually false) |
322 | | in the loop. */ |
323 | | /* Temporary sums of the new pulse(s) */ |
324 | 0 | Rxy = EXTRACT16(SHR32(ADD32(xy, EXTEND32(X[0])),rshift)); |
325 | | /* We're multiplying y[j] by two so we don't have to do it here */ |
326 | 0 | Ryy = ADD16(yy, y[0]); |
327 | | |
328 | | /* Approximate score: we maximise Rxy/sqrt(Ryy) (we're guaranteed that |
329 | | Rxy is positive because the sign is pre-computed) */ |
330 | 0 | Rxy = MULT16_16_Q15(Rxy,Rxy); |
331 | 0 | best_den = Ryy; |
332 | 0 | best_num = Rxy; |
333 | 0 | j=1; |
334 | 0 | do { |
335 | | /* Temporary sums of the new pulse(s) */ |
336 | 0 | Rxy = EXTRACT16(SHR32(ADD32(xy, EXTEND32(X[j])),rshift)); |
337 | | /* We're multiplying y[j] by two so we don't have to do it here */ |
338 | 0 | Ryy = ADD16(yy, y[j]); |
339 | | |
340 | | /* Approximate score: we maximise Rxy/sqrt(Ryy) (we're guaranteed that |
341 | | Rxy is positive because the sign is pre-computed) */ |
342 | 0 | Rxy = MULT16_16_Q15(Rxy,Rxy); |
343 | | /* The idea is to check for num/den >= best_num/best_den, but that way |
344 | | we can do it without any division */ |
345 | | /* OPT: It's not clear whether a cmov is faster than a branch here |
346 | | since the condition is more often false than true and using |
347 | | a cmov introduces data dependencies across iterations. The optimal |
348 | | choice may be architecture-dependent. */ |
349 | 0 | if (opus_unlikely(MULT16_16(best_den, Rxy) > MULT16_16(Ryy, best_num))) |
350 | 0 | { |
351 | 0 | best_den = Ryy; |
352 | 0 | best_num = Rxy; |
353 | 0 | best_id = j; |
354 | 0 | } |
355 | 0 | } while (++j<N); |
356 | | |
357 | | /* Updating the sums of the new pulse(s) */ |
358 | 0 | xy = ADD32(xy, EXTEND32(X[best_id])); |
359 | | /* We're multiplying y[j] by two so we don't have to do it here */ |
360 | 0 | yy = ADD16(yy, y[best_id]); |
361 | | |
362 | | /* Only now that we've made the final choice, update y/iy */ |
363 | | /* Multiplying y[j] by 2 so we don't have to do it everywhere else */ |
364 | 0 | y[best_id] += 2; |
365 | 0 | iy[best_id]++; |
366 | 0 | } |
367 | | |
368 | | /* Put the original sign back */ |
369 | 0 | j=0; |
370 | 0 | do { |
371 | | /*iy[j] = signx[j] ? -iy[j] : iy[j];*/ |
372 | | /* OPT: The is more likely to be compiled without a branch than the code above |
373 | | but has the same performance otherwise. */ |
374 | 0 | iy[j] = (iy[j]^-signx[j]) + signx[j]; |
375 | 0 | } while (++j<N); |
376 | 0 | RESTORE_STACK; |
377 | 0 | return yy; |
378 | 0 | } |
379 | | |
380 | | #ifdef ENABLE_QEXT |
381 | | #include "macros.h" |
382 | | |
383 | | static opus_val32 op_pvq_search_N2(const celt_norm *X, int *iy, int *up_iy, int K, int up, int *refine, int shift) { |
384 | | opus_val32 sum; |
385 | | opus_val32 rcp_sum; |
386 | | int offset; |
387 | | sum = ABS32(X[0]) + ABS32(X[1]); |
388 | | if (sum < EPSILON) { |
389 | | iy[0] = K; |
390 | | up_iy[0] = up*K; |
391 | | iy[1]=up_iy[1]=0; |
392 | | *refine=0; |
393 | | #ifdef FIXED_POINT |
394 | | return (opus_val64)K*K*up*up>>2*shift; |
395 | | #else |
396 | | (void)shift; |
397 | | return K*(float)K*up*up; |
398 | | #endif |
399 | | } |
400 | | #ifdef FIXED_POINT |
401 | | int sum_shift; |
402 | | opus_val32 X0; |
403 | | sum_shift = 30-celt_ilog2(sum); |
404 | | rcp_sum = celt_rcp_norm32(SHL32(sum, sum_shift)); |
405 | | X0 = MULT32_32_Q31(SHL32(X[0], sum_shift), rcp_sum); |
406 | | iy[0] = PSHR32(MULT32_32_Q31(SHL32(K, 8), X0), 7); |
407 | | up_iy[0] = PSHR32(MULT32_32_Q31(SHL32(up*K, 8), X0), 7); |
408 | | #else |
409 | | rcp_sum = 1.f/sum; |
410 | | iy[0] = (int)floor(.5f+K*X[0]*rcp_sum); |
411 | | up_iy[0] = (int)floor(.5f+up*K*X[0]*rcp_sum); |
412 | | #endif |
413 | | up_iy[0] = IMAX(up*iy[0] - (up-1)/2, IMIN(up*iy[0] + (up-1)/2, up_iy[0])); |
414 | | offset = up_iy[0] - up*iy[0]; |
415 | | iy[1] = K-abs(iy[0]); |
416 | | up_iy[1] = up*K-abs(up_iy[0]); |
417 | | if (X[1] < 0) { |
418 | | iy[1] = -iy[1]; |
419 | | up_iy[1] = -up_iy[1]; |
420 | | offset = -offset; |
421 | | } |
422 | | *refine = offset; |
423 | | #ifdef FIXED_POINT |
424 | | return (up_iy[0]*(opus_val64)up_iy[0] + up_iy[1]*(opus_val64)up_iy[1] + (1<<2*shift>>1))>>2*shift; |
425 | | #else |
426 | | return up_iy[0]*(opus_val64)up_iy[0] + up_iy[1]*(opus_val64)up_iy[1]; |
427 | | #endif |
428 | | } |
429 | | |
430 | | static int op_pvq_refine(const opus_val32 *Xn, int *iy, int *iy0, int K, int up, int margin, int N) { |
431 | | int i; |
432 | | int dir; |
433 | | VARDECL(opus_val32, rounding); |
434 | | int iysum = 0; |
435 | | SAVE_STACK; |
436 | | ALLOC(rounding, N, opus_val32); |
437 | | for (i=0;i<N;i++) { |
438 | | opus_val32 tmp; |
439 | | tmp = MULT32_32_Q31(SHL32(K, 8), Xn[i]); |
440 | | #ifdef FIXED_POINT |
441 | | iy[i] = (tmp+64) >> 7; |
442 | | #else |
443 | | iy[i] = (int)floor(.5+tmp); |
444 | | #endif |
445 | | rounding[i] = tmp - SHL32(iy[i], 7); |
446 | | } |
447 | | if (iy != iy0) { |
448 | | for (i=0;i<N;i++) iy[i] = IMIN(up*iy0[i]+up-1, IMAX(up*iy0[i]-up+1, iy[i])); |
449 | | } |
450 | | for (i=0;i<N;i++) iysum += iy[i]; |
451 | | if (abs(iysum - K) > 32) { |
452 | | return 1; |
453 | | } |
454 | | dir = iysum < K ? 1 : -1; |
455 | | while (iysum != K) { |
456 | | opus_val32 roundval=-1000000*dir; |
457 | | int roundpos=0; |
458 | | for (i=0;i<N;i++) { |
459 | | if ((rounding[i]-roundval)*dir > 0 && abs(iy[i]-up*iy0[i]) < (margin-1) && !(dir==-1 && iy[i] == 0)) { |
460 | | roundval = rounding[i]; |
461 | | roundpos = i; |
462 | | } |
463 | | } |
464 | | iy[roundpos] += dir; |
465 | | rounding[roundpos] -= SHL32(dir, 15); |
466 | | iysum+=dir; |
467 | | } |
468 | | return 0; |
469 | | } |
470 | | |
471 | | static opus_val32 op_pvq_search_extra(const celt_norm *X, int *iy, int *up_iy, int K, int up, int *refine, int N, int shift) { |
472 | | opus_val32 rcp_sum; |
473 | | opus_val32 sum=0; |
474 | | int i; |
475 | | int failed=0; |
476 | | opus_val64 yy=0; |
477 | | VARDECL(opus_val32, Xn); |
478 | | SAVE_STACK; |
479 | | for (i=0;i<N;i++) sum += ABS32(X[i]); |
480 | | ALLOC(Xn, N, opus_val32); |
481 | | if (sum < EPSILON) |
482 | | failed = 1; |
483 | | else { |
484 | | #ifdef FIXED_POINT |
485 | | int sum_shift = 30-celt_ilog2(sum); |
486 | | rcp_sum = celt_rcp_norm32(SHL32(sum, sum_shift)); |
487 | | for (i=0;i<N;i++) { |
488 | | Xn[i] = MULT32_32_Q31(SHL32(ABS32(X[i]), sum_shift), rcp_sum); |
489 | | } |
490 | | #else |
491 | | rcp_sum = celt_rcp(sum); |
492 | | for (i=0;i<N;i++) { |
493 | | Xn[i] = ABS32(X[i])*rcp_sum; |
494 | | } |
495 | | #endif |
496 | | } |
497 | | failed = failed || op_pvq_refine(Xn, iy, iy, K, 1, K+1, N); |
498 | | failed = failed || op_pvq_refine(Xn, up_iy, iy, up*K, up, up, N); |
499 | | if (failed) { |
500 | | iy[0] = K; |
501 | | for (i=1;i<N;i++) iy[i] = 0; |
502 | | up_iy[0] = up*K; |
503 | | for (i=1;i<N;i++) up_iy[i] = 0; |
504 | | } |
505 | | for (i=0;i<N;i++) { |
506 | | yy += up_iy[i]*(opus_val64)up_iy[i]; |
507 | | if (X[i] < 0) { |
508 | | iy[i] = -iy[i]; |
509 | | up_iy[i] = -up_iy[i]; |
510 | | } |
511 | | refine[i] = up_iy[i]-up*iy[i]; |
512 | | } |
513 | | #ifdef FIXED_POINT |
514 | | return (yy + (1<<2*shift>>1))>>2*shift; |
515 | | #else |
516 | | (void)shift; |
517 | | return yy; |
518 | | #endif |
519 | | } |
520 | | #endif |
521 | | |
522 | | #ifdef ENABLE_QEXT |
523 | | /* Take advantage of the fact that "large" refine values are much less likely |
524 | | than smaller ones. */ |
525 | | static void ec_enc_refine(ec_enc *enc, opus_int32 refine, opus_int32 up, int extra_bits, int use_entropy) { |
526 | | int large; |
527 | | large = abs(refine)>up/2; |
528 | | ec_enc_bit_logp(enc, large, use_entropy ? 3 : 1); |
529 | | if (large) { |
530 | | ec_enc_bits(enc, refine < 0, 1); |
531 | | ec_enc_bits(enc, abs(refine)-up/2-1, extra_bits-1); |
532 | | } else { |
533 | | ec_enc_bits(enc, refine+up/2, extra_bits); |
534 | | } |
535 | | } |
536 | | |
537 | | static int ec_dec_refine(ec_enc *dec, opus_int32 up, int extra_bits, int use_entropy) { |
538 | | int large, refine; |
539 | | large = ec_dec_bit_logp(dec, use_entropy ? 3 : 1); |
540 | | if (large) { |
541 | | int sign = ec_dec_bits(dec, 1); |
542 | | refine = ec_dec_bits(dec, extra_bits-1) + up/2+1; |
543 | | if (sign) refine = -refine; |
544 | | } else { |
545 | | refine = (opus_int32)ec_dec_bits(dec, extra_bits)-up/2; |
546 | | } |
547 | | return refine; |
548 | | } |
549 | | #endif |
550 | | |
551 | | unsigned alg_quant(celt_norm *X, int N, int K, int spread, int B, ec_enc *enc, |
552 | | opus_val32 gain, int resynth |
553 | | ARG_QEXT(ec_enc *ext_enc) ARG_QEXT(int extra_bits), int arch) |
554 | 0 | { |
555 | 0 | VARDECL(int, iy); |
556 | 0 | opus_val32 yy; |
557 | 0 | unsigned collapse_mask; |
558 | | #ifdef ENABLE_QEXT |
559 | | int yy_shift = 0; |
560 | | #endif |
561 | 0 | SAVE_STACK; |
562 | |
|
563 | 0 | celt_assert2(K>0, "alg_quant() needs at least one pulse"); |
564 | 0 | celt_assert2(N>1, "alg_quant() needs at least two dimensions"); |
565 | | |
566 | | /* Covers vectorization by up to 4. */ |
567 | 0 | ALLOC(iy, N+3, int); |
568 | |
|
569 | 0 | exp_rotation(X, N, 1, B, K, spread); |
570 | |
|
571 | | #ifdef ENABLE_QEXT |
572 | | if (N==2 && extra_bits >= 2) { |
573 | | int refine; |
574 | | int up_iy[2]; |
575 | | int up; |
576 | | yy_shift = IMAX(0, extra_bits-7); |
577 | | up = (1<<extra_bits)-1; |
578 | | yy = op_pvq_search_N2(X, iy, up_iy, K, up, &refine, yy_shift); |
579 | | collapse_mask = extract_collapse_mask(up_iy, N, B); |
580 | | encode_pulses(iy, N, K, enc); |
581 | | ec_enc_uint(ext_enc, refine+(up-1)/2, up); |
582 | | if (resynth) |
583 | | normalise_residual(up_iy, X, N, yy, gain, yy_shift); |
584 | | } else if (extra_bits >= 2) { |
585 | | int i; |
586 | | VARDECL(int, up_iy); |
587 | | VARDECL(int, refine); |
588 | | int up, use_entropy; |
589 | | ALLOC(up_iy, N, int); |
590 | | ALLOC(refine, N, int); |
591 | | yy_shift = IMAX(0, extra_bits-7); |
592 | | up = (1<<extra_bits)-1; |
593 | | yy = op_pvq_search_extra(X, iy, up_iy, K, up, refine, N, yy_shift); |
594 | | collapse_mask = extract_collapse_mask(up_iy, N, B); |
595 | | encode_pulses(iy, N, K, enc); |
596 | | use_entropy = (ext_enc->storage*8 - ec_tell(ext_enc)) > (unsigned)(N-1)*(extra_bits+3)+1; |
597 | | for (i=0;i<N-1;i++) ec_enc_refine(ext_enc, refine[i], up, extra_bits, use_entropy); |
598 | | if (iy[N-1]==0) ec_enc_bits(ext_enc, up_iy[N-1]<0, 1); |
599 | | if (resynth) |
600 | | normalise_residual(up_iy, X, N, yy, gain, yy_shift); |
601 | | } else |
602 | | #endif |
603 | 0 | { |
604 | 0 | yy = op_pvq_search(X, iy, K, N, arch); |
605 | 0 | collapse_mask = extract_collapse_mask(iy, N, B); |
606 | 0 | encode_pulses(iy, N, K, enc); |
607 | 0 | if (resynth) |
608 | 0 | normalise_residual(iy, X, N, yy, gain, 0); |
609 | 0 | } |
610 | |
|
611 | 0 | if (resynth) |
612 | 0 | exp_rotation(X, N, -1, B, K, spread); |
613 | |
|
614 | 0 | RESTORE_STACK; |
615 | 0 | return collapse_mask; |
616 | 0 | } |
617 | | |
618 | | /** Decode pulse vector and combine the result with the pitch vector to produce |
619 | | the final normalised signal in the current band. */ |
620 | | unsigned alg_unquant(celt_norm *X, int N, int K, int spread, int B, |
621 | | ec_dec *dec, opus_val32 gain |
622 | | ARG_QEXT(ec_enc *ext_dec) ARG_QEXT(int extra_bits)) |
623 | 0 | { |
624 | 0 | opus_val32 Ryy; |
625 | 0 | unsigned collapse_mask; |
626 | 0 | VARDECL(int, iy); |
627 | 0 | int yy_shift=0; |
628 | 0 | SAVE_STACK; |
629 | |
|
630 | 0 | celt_assert2(K>0, "alg_unquant() needs at least one pulse"); |
631 | 0 | celt_assert2(N>1, "alg_unquant() needs at least two dimensions"); |
632 | 0 | ALLOC(iy, N, int); |
633 | 0 | Ryy = decode_pulses(iy, N, K, dec); |
634 | | #ifdef ENABLE_QEXT |
635 | | if (N==2 && extra_bits >= 2) { |
636 | | int up; |
637 | | int refine; |
638 | | yy_shift = IMAX(0, extra_bits-7); |
639 | | up = (1<<extra_bits)-1; |
640 | | refine = (opus_int32)ec_dec_uint(ext_dec, up) - (up-1)/2; |
641 | | iy[0] *= up; |
642 | | iy[1] *= up; |
643 | | if (iy[1] == 0) { |
644 | | iy[1] = (iy[0] > 0) ? -refine : refine; |
645 | | iy[0] += (refine*(opus_int64)iy[0] > 0) ? -refine : refine; |
646 | | } else if (iy[1] > 0) { |
647 | | iy[0] += refine; |
648 | | iy[1] -= refine*(iy[0]>0?1:-1); |
649 | | } else { |
650 | | iy[0] -= refine; |
651 | | iy[1] -= refine*(iy[0]>0?1:-1); |
652 | | } |
653 | | #ifdef FIXED_POINT |
654 | | Ryy = (iy[0]*(opus_val64)iy[0] + iy[1]*(opus_val64)iy[1] + (1<<2*yy_shift>>1)) >> 2*yy_shift; |
655 | | #else |
656 | | Ryy = iy[0]*(opus_val64)iy[0] + iy[1]*(opus_val64)iy[1]; |
657 | | #endif |
658 | | } else if (extra_bits >= 2) { |
659 | | int i; |
660 | | opus_val64 yy64; |
661 | | VARDECL(int, refine); |
662 | | int up, use_entropy; |
663 | | int sign=0; |
664 | | ALLOC(refine, N, int); |
665 | | yy_shift = IMAX(0, extra_bits-7); |
666 | | up = (1<<extra_bits)-1; |
667 | | use_entropy = (ext_dec->storage*8 - ec_tell(ext_dec)) > (unsigned)(N-1)*(extra_bits+3)+1; |
668 | | for (i=0;i<N-1;i++) refine[i] = ec_dec_refine(ext_dec, up, extra_bits, use_entropy); |
669 | | if (iy[N-1]==0) sign = ec_dec_bits(ext_dec, 1); |
670 | | else sign = iy[N-1] < 0; |
671 | | for (i=0;i<N-1;i++) { |
672 | | iy[i] = iy[i]*up + refine[i]; |
673 | | } |
674 | | iy[N-1] = up*K; |
675 | | for (i=0;i<N-1;i++) iy[N-1] -= abs(iy[i]); |
676 | | if (sign) iy[N-1] = -iy[N-1]; |
677 | | yy64 = 0; |
678 | | for (i=0;i<N;i++) yy64 += iy[i]*(opus_val64)iy[i]; |
679 | | #ifdef FIXED_POINT |
680 | | Ryy = (yy64 + (1<<2*yy_shift>>1)) >> 2*yy_shift; |
681 | | #else |
682 | | Ryy = yy64; |
683 | | #endif |
684 | | } |
685 | | #endif |
686 | 0 | normalise_residual(iy, X, N, Ryy, gain, yy_shift); |
687 | 0 | exp_rotation(X, N, -1, B, K, spread); |
688 | 0 | collapse_mask = extract_collapse_mask(iy, N, B); |
689 | 0 | RESTORE_STACK; |
690 | 0 | return collapse_mask; |
691 | 0 | } |
692 | | |
693 | | #ifndef OVERRIDE_renormalise_vector |
694 | | void renormalise_vector(celt_norm *X, int N, opus_val32 gain, int arch) |
695 | 0 | { |
696 | 0 | int i; |
697 | | #ifdef FIXED_POINT |
698 | | int k; |
699 | | #endif |
700 | 0 | opus_val32 E; |
701 | 0 | opus_val16 g; |
702 | 0 | opus_val32 t; |
703 | 0 | celt_norm *xptr; |
704 | 0 | norm_scaledown(X, N, NORM_SHIFT-14); |
705 | 0 | E = EPSILON + celt_inner_prod_norm(X, X, N, arch); |
706 | | #ifdef FIXED_POINT |
707 | | k = celt_ilog2(E)>>1; |
708 | | #endif |
709 | 0 | t = VSHR32(E, 2*(k-7)); |
710 | 0 | g = MULT32_32_Q31(celt_rsqrt_norm(t),gain); |
711 | |
|
712 | 0 | xptr = X; |
713 | 0 | for (i=0;i<N;i++) |
714 | 0 | { |
715 | 0 | *xptr = EXTRACT16(PSHR32(MULT16_16(g, *xptr), k+15-14)); |
716 | 0 | xptr++; |
717 | 0 | } |
718 | 0 | norm_scaleup(X, N, NORM_SHIFT-14); |
719 | | /*return celt_sqrt(E);*/ |
720 | 0 | } |
721 | | #endif /* OVERRIDE_renormalise_vector */ |
722 | | |
723 | | opus_int32 stereo_itheta(const celt_norm *X, const celt_norm *Y, int stereo, int N, int arch) |
724 | 0 | { |
725 | 0 | int i; |
726 | 0 | int itheta; |
727 | 0 | opus_val32 mid, side; |
728 | 0 | opus_val32 Emid, Eside; |
729 | |
|
730 | 0 | Emid = Eside = 0; |
731 | 0 | if (stereo) |
732 | 0 | { |
733 | 0 | for (i=0;i<N;i++) |
734 | 0 | { |
735 | 0 | celt_norm m, s; |
736 | 0 | m = PSHR32(ADD32(X[i], Y[i]), NORM_SHIFT-13); |
737 | 0 | s = PSHR32(SUB32(X[i], Y[i]), NORM_SHIFT-13); |
738 | 0 | Emid = MAC16_16(Emid, m, m); |
739 | 0 | Eside = MAC16_16(Eside, s, s); |
740 | 0 | } |
741 | 0 | } else { |
742 | 0 | Emid += celt_inner_prod_norm_shift(X, X, N, arch); |
743 | 0 | Eside += celt_inner_prod_norm_shift(Y, Y, N, arch); |
744 | 0 | } |
745 | 0 | mid = celt_sqrt32(Emid); |
746 | 0 | side = celt_sqrt32(Eside); |
747 | | #if defined(FIXED_POINT) |
748 | | itheta = celt_atan2p_norm(side, mid); |
749 | | #else |
750 | 0 | itheta = (int)floor(.5f+65536.f*16384*celt_atan2p_norm(side,mid)); |
751 | 0 | #endif |
752 | |
|
753 | 0 | return itheta; |
754 | 0 | } |
755 | | |
756 | | #ifdef ENABLE_QEXT |
757 | | |
758 | | static void cubic_synthesis(celt_norm *X, int *iy, int N, int K, int face, int sign, opus_val32 gain) { |
759 | | int i; |
760 | | opus_val32 sum=0; |
761 | | opus_val32 mag; |
762 | | #ifdef FIXED_POINT |
763 | | int sum_shift; |
764 | | int shift = IMAX(celt_ilog2(K) + celt_ilog2(N)/2 - 13, 0); |
765 | | #endif |
766 | | for (i=0;i<N;i++) { |
767 | | X[i] = (1+2*iy[i])-K; |
768 | | } |
769 | | X[face] = sign ? -K : K; |
770 | | for (i=0;i<N;i++) { |
771 | | sum += PSHR32(MULT16_16(X[i],X[i]), 2*shift); |
772 | | } |
773 | | #ifdef FIXED_POINT |
774 | | sum_shift = (29-celt_ilog2(sum))>>1; |
775 | | mag = celt_rsqrt_norm32(SHL32(sum, 2*sum_shift+1)); |
776 | | for (i=0;i<N;i++) { |
777 | | X[i] = VSHR32(MULT16_32_Q15(X[i],MULT32_32_Q31(mag,gain)), shift-sum_shift+29-NORM_SHIFT); |
778 | | } |
779 | | #else |
780 | | mag = 1.f/sqrt(sum); |
781 | | for (i=0;i<N;i++) { |
782 | | X[i] *= mag*gain; |
783 | | } |
784 | | #endif |
785 | | } |
786 | | |
787 | | unsigned cubic_quant(celt_norm *X, int N, int res, int B, ec_enc *enc, opus_val32 gain, int resynth) { |
788 | | int i; |
789 | | int face=0; |
790 | | int K; |
791 | | VARDECL(int, iy); |
792 | | celt_norm faceval=-1; |
793 | | opus_val32 norm; |
794 | | int sign; |
795 | | SAVE_STACK; |
796 | | ALLOC(iy, N, int); |
797 | | K = 1<<res; |
798 | | /* Using odd K on transients to avoid adding pre-echo. */ |
799 | | if (B!=1) K=IMAX(1, K-1); |
800 | | if (K==1) { |
801 | | if (resynth) OPUS_CLEAR(X, N); |
802 | | RESTORE_STACK; |
803 | | return 0; |
804 | | } |
805 | | for (i=0;i<N;i++) { |
806 | | if (ABS32(X[i]) > faceval) { |
807 | | faceval = ABS32(X[i]); |
808 | | face = i; |
809 | | } |
810 | | } |
811 | | sign = X[face]<0; |
812 | | ec_enc_uint(enc, face, N); |
813 | | ec_enc_bits(enc, sign, 1); |
814 | | #ifdef FIXED_POINT |
815 | | if (faceval != 0) { |
816 | | int face_shift = 30-celt_ilog2(faceval); |
817 | | norm = celt_rcp_norm32(SHL32(faceval, face_shift)); |
818 | | norm = MULT16_32_Q15(K, norm); |
819 | | for (i=0;i<N;i++) { |
820 | | /* By computing X[i]+faceval inside the shift, the result is guaranteed non-negative. */ |
821 | | iy[i] = IMIN(K-1, (MULT32_32_Q31(SHL32(X[i]+faceval, face_shift-1), norm)) >> 15); |
822 | | } |
823 | | } else { |
824 | | OPUS_CLEAR(iy, N); |
825 | | } |
826 | | #else |
827 | | norm = .5f*K/(faceval+EPSILON); |
828 | | for (i=0;i<N;i++) { |
829 | | iy[i] = IMIN(K-1, (int)floor((X[i]+faceval)*norm)); |
830 | | } |
831 | | #endif |
832 | | for (i=0;i<N;i++) { |
833 | | if (i != face) ec_enc_bits(enc, iy[i], res); |
834 | | } |
835 | | if (resynth) { |
836 | | cubic_synthesis(X, iy, N, K, face, sign, gain); |
837 | | } |
838 | | RESTORE_STACK; |
839 | | return (1<<B)-1; |
840 | | } |
841 | | |
842 | | unsigned cubic_unquant(celt_norm *X, int N, int res, int B, ec_dec *dec, opus_val32 gain) { |
843 | | int i; |
844 | | int face; |
845 | | int sign; |
846 | | int K; |
847 | | VARDECL(int, iy); |
848 | | SAVE_STACK; |
849 | | ALLOC(iy, N, int); |
850 | | K = 1<<res; |
851 | | /* Using odd K on transients to avoid adding pre-echo. */ |
852 | | if (B!=1) K=IMAX(1, K-1); |
853 | | if (K==1) { |
854 | | OPUS_CLEAR(X, N); |
855 | | return 0; |
856 | | } |
857 | | face = ec_dec_uint(dec, N); |
858 | | sign = ec_dec_bits(dec, 1); |
859 | | for (i=0;i<N;i++) { |
860 | | if (i != face) iy[i] = ec_dec_bits(dec, res); |
861 | | } |
862 | | iy[face]=0; |
863 | | cubic_synthesis(X, iy, N, K, face, sign, gain); |
864 | | RESTORE_STACK; |
865 | | return (1<<B)-1; |
866 | | } |
867 | | #endif |