Coverage Report

Created: 2026-02-14 07:28

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/opus/celt/vq.c
Line
Count
Source
1
/* Copyright (c) 2007-2008 CSIRO
2
   Copyright (c) 2007-2009 Xiph.Org Foundation
3
   Written by Jean-Marc Valin */
4
/*
5
   Redistribution and use in source and binary forms, with or without
6
   modification, are permitted provided that the following conditions
7
   are met:
8
9
   - Redistributions of source code must retain the above copyright
10
   notice, this list of conditions and the following disclaimer.
11
12
   - Redistributions in binary form must reproduce the above copyright
13
   notice, this list of conditions and the following disclaimer in the
14
   documentation and/or other materials provided with the distribution.
15
16
   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17
   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18
   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
19
   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
20
   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
21
   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
22
   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
23
   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
24
   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
25
   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
26
   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27
*/
28
29
#ifdef HAVE_CONFIG_H
30
#include "config.h"
31
#endif
32
33
#include "mathops.h"
34
#include "cwrs.h"
35
#include "vq.h"
36
#include "arch.h"
37
#include "os_support.h"
38
#include "bands.h"
39
#include "rate.h"
40
#include "pitch.h"
41
#include "SigProc_FIX.h"
42
43
#if defined(FIXED_POINT)
44
void norm_scaleup(celt_norm *X, int N, int shift) {
45
   int i;
46
   celt_assert(shift >= 0);
47
   if (shift <= 0) return;
48
   for (i=0;i<N;i++) X[i] = SHL32(X[i], shift);
49
}
50
51
void norm_scaledown(celt_norm *X, int N, int shift) {
52
   int i;
53
   celt_assert(shift >= 0);
54
   if (shift <= 0) return;
55
   for (i=0;i<N;i++) X[i] = PSHR32(X[i], shift);
56
}
57
58
opus_val32 celt_inner_prod_norm(const celt_norm *x, const celt_norm *y, int len, int arch) {
59
   int i;
60
   opus_val32 sum = 0;
61
   (void)arch;
62
   for (i=0;i<len;i++) sum += x[i]*y[i];
63
   return sum;
64
}
65
opus_val32 celt_inner_prod_norm_shift(const celt_norm *x, const celt_norm *y, int len, int arch) {
66
   int i;
67
   opus_val64 sum = 0;
68
   (void)arch;
69
   for (i=0;i<len;i++) sum += x[i]*(opus_val64)y[i];
70
   return sum>>2*(NORM_SHIFT-14);
71
}
72
#endif
73
74
#ifndef OVERRIDE_vq_exp_rotation1
75
static void exp_rotation1(celt_norm *X, int len, int stride, opus_val16 c, opus_val16 s)
76
19.8M
{
77
19.8M
   int i;
78
19.8M
   opus_val16 ms;
79
19.8M
   celt_norm *Xptr;
80
19.8M
   Xptr = X;
81
19.8M
   ms = NEG16(s);
82
19.8M
   norm_scaledown(X, len, NORM_SHIFT-14);
83
158M
   for (i=0;i<len-stride;i++)
84
138M
   {
85
138M
      celt_norm x1, x2;
86
138M
      x1 = Xptr[0];
87
138M
      x2 = Xptr[stride];
88
138M
      Xptr[stride] = EXTRACT16(PSHR32(MAC16_16(MULT16_16(c, x2),  s, x1), 15));
89
138M
      *Xptr++      = EXTRACT16(PSHR32(MAC16_16(MULT16_16(c, x1), ms, x2), 15));
90
138M
   }
91
19.8M
   Xptr = &X[len-2*stride-1];
92
128M
   for (i=len-2*stride-1;i>=0;i--)
93
108M
   {
94
108M
      celt_norm x1, x2;
95
108M
      x1 = Xptr[0];
96
108M
      x2 = Xptr[stride];
97
108M
      Xptr[stride] = EXTRACT16(PSHR32(MAC16_16(MULT16_16(c, x2),  s, x1), 15));
98
108M
      *Xptr--      = EXTRACT16(PSHR32(MAC16_16(MULT16_16(c, x1), ms, x2), 15));
99
108M
   }
100
19.8M
   norm_scaleup(X, len, NORM_SHIFT-14);
101
19.8M
}
102
#endif /* OVERRIDE_vq_exp_rotation1 */
103
104
void exp_rotation(celt_norm *X, int len, int dir, int stride, int K, int spread)
105
29.0M
{
106
29.0M
   static const int SPREAD_FACTOR[3]={15,10,5};
107
29.0M
   int i;
108
29.0M
   opus_val16 c, s;
109
29.0M
   opus_val16 gain, theta;
110
29.0M
   int stride2=0;
111
29.0M
   int factor;
112
113
29.0M
   if (2*K>=len || spread==SPREAD_NONE)
114
23.3M
      return;
115
5.67M
   factor = SPREAD_FACTOR[spread-1];
116
117
5.67M
   gain = celt_div((opus_val32)MULT16_16(Q15_ONE,len),(opus_val32)(len+factor*K));
118
5.67M
   theta = HALF16(MULT16_16_Q15(gain,gain));
119
120
5.67M
   c = celt_cos_norm(EXTEND32(theta));
121
5.67M
   s = celt_cos_norm(EXTEND32(SUB16(Q15ONE,theta))); /*  sin(theta) */
122
123
5.67M
   if (len>=8*stride)
124
3.71M
   {
125
3.71M
      stride2 = 1;
126
      /* This is just a simple (equivalent) way of computing sqrt(len/stride) with rounding.
127
         It's basically incrementing long as (stride2+0.5)^2 < len/stride. */
128
15.1M
      while ((stride2*stride2+stride2)*stride + (stride>>2) < len)
129
11.3M
         stride2++;
130
3.71M
   }
131
   /*NOTE: As a minor optimization, we could be passing around log2(B), not B, for both this and for
132
      extract_collapse_mask().*/
133
5.67M
   len = celt_udiv(len, stride);
134
21.1M
   for (i=0;i<stride;i++)
135
15.4M
   {
136
15.4M
      if (dir < 0)
137
2.36M
      {
138
2.36M
         if (stride2)
139
472k
            exp_rotation1(X+i*len, len, stride2, s, c);
140
2.36M
         exp_rotation1(X+i*len, len, 1, c, s);
141
13.1M
      } else {
142
13.1M
         exp_rotation1(X+i*len, len, 1, c, -s);
143
13.1M
         if (stride2)
144
3.91M
            exp_rotation1(X+i*len, len, stride2, s, -c);
145
13.1M
      }
146
15.4M
   }
147
5.67M
}
148
149
/** Normalizes the decoded integer pvq codeword to unit norm. */
150
static void normalise_residual(int * OPUS_RESTRICT iy, celt_norm * OPUS_RESTRICT X,
151
      int N, opus_val32 Ryy, opus_val32 gain, int shift)
152
3.46M
{
153
3.46M
   int i;
154
#ifdef FIXED_POINT
155
   int k;
156
#endif
157
3.46M
   opus_val32 t;
158
3.46M
   opus_val32 g;
159
160
#ifdef FIXED_POINT
161
   k = celt_ilog2(Ryy)>>1;
162
#endif
163
3.46M
   t = VSHR32(Ryy, 2*(k-7)-15);
164
3.46M
   g = MULT32_32_Q31(celt_rsqrt_norm32(t),gain);
165
3.46M
   i=0;
166
3.46M
   (void)shift;
167
#if defined(FIXED_POINT) && defined(ENABLE_QEXT)
168
   if (shift>0) {
169
      int tot_shift = NORM_SHIFT+1-k-shift;
170
      if (tot_shift >= 0) {
171
         do X[i] = MULT32_32_Q31(g, SHL32(iy[i], tot_shift));
172
         while (++i < N);
173
      } else {
174
         do X[i] = MULT32_32_Q31(g, PSHR32(iy[i], -tot_shift));
175
         while (++i < N);
176
      }
177
   } else
178
#endif
179
26.2M
   do X[i] = VSHR32(MULT16_32_Q15(iy[i], g), k+15-NORM_SHIFT);
180
26.2M
   while (++i < N);
181
3.46M
}
182
183
static unsigned extract_collapse_mask(int *iy, int N, int B)
184
25.5M
{
185
25.5M
   unsigned collapse_mask;
186
25.5M
   int N0;
187
25.5M
   int i;
188
25.5M
   if (B<=1)
189
18.9M
      return 1;
190
   /*NOTE: As a minor optimization, we could be passing around log2(B), not B, for both this and for
191
      exp_rotation().*/
192
6.66M
   N0 = celt_udiv(N, B);
193
6.66M
   collapse_mask = 0;
194
31.3M
   i=0; do {
195
31.3M
      int j;
196
31.3M
      unsigned tmp=0;
197
66.8M
      j=0; do {
198
66.8M
         tmp |= iy[i*N0+j];
199
66.8M
      } while (++j<N0);
200
31.3M
      collapse_mask |= (tmp!=0)<<i;
201
31.3M
   } while (++i<B);
202
6.66M
   return collapse_mask;
203
25.5M
}
204
205
opus_val16 op_pvq_search_c(celt_norm *X, int *iy, int K, int N, int arch)
206
0
{
207
0
   VARDECL(celt_norm, y);
208
0
   VARDECL(int, signx);
209
0
   int i, j;
210
0
   int pulsesLeft;
211
0
   opus_val32 sum;
212
0
   opus_val32 xy;
213
0
   opus_val16 yy;
214
0
   SAVE_STACK;
215
216
0
   (void)arch;
217
0
   ALLOC(y, N, celt_norm);
218
0
   ALLOC(signx, N, int);
219
#ifdef FIXED_POINT
220
   {
221
      int shift = (celt_ilog2(1+celt_inner_prod_norm_shift(X, X, N, arch))+1)/2;
222
      shift = IMAX(0, shift+(NORM_SHIFT-14)-14);
223
      norm_scaledown(X, N, shift);
224
   }
225
#endif
226
   /* Get rid of the sign */
227
0
   sum = 0;
228
0
   j=0; do {
229
0
      signx[j] = X[j]<0;
230
      /* OPT: Make sure the compiler doesn't use a branch on ABS16(). */
231
0
      X[j] = ABS16(X[j]);
232
0
      iy[j] = 0;
233
0
      y[j] = 0;
234
0
   } while (++j<N);
235
236
0
   xy = yy = 0;
237
238
0
   pulsesLeft = K;
239
240
   /* Do a pre-search by projecting on the pyramid */
241
0
   if (K > (N>>1))
242
0
   {
243
0
      opus_val16 rcp;
244
0
      j=0; do {
245
0
         sum += X[j];
246
0
      }  while (++j<N);
247
248
      /* If X is too small, just replace it with a pulse at 0 */
249
#ifdef FIXED_POINT
250
      if (sum <= K)
251
#else
252
      /* Prevents infinities and NaNs from causing too many pulses
253
         to be allocated. 64 is an approximation of infinity here. */
254
0
      if (!(sum > EPSILON && sum < 64))
255
0
#endif
256
0
      {
257
0
         X[0] = QCONST16(1.f,14);
258
0
         j=1; do
259
0
            X[j]=0;
260
0
         while (++j<N);
261
0
         sum = QCONST16(1.f,14);
262
0
      }
263
#ifdef FIXED_POINT
264
      rcp = EXTRACT16(MULT16_32_Q16(K, celt_rcp(sum)));
265
#else
266
      /* Using K+e with e < 1 guarantees we cannot get more than K pulses. */
267
0
      rcp = EXTRACT16(MULT16_32_Q16(K+0.8f, celt_rcp(sum)));
268
0
#endif
269
0
      j=0; do {
270
#ifdef FIXED_POINT
271
         /* It's really important to round *towards zero* here */
272
         iy[j] = MULT16_16_Q15(X[j],rcp);
273
#else
274
0
         iy[j] = (int)floor(rcp*X[j]);
275
0
#endif
276
0
         y[j] = (celt_norm)iy[j];
277
0
         yy = MAC16_16(yy, y[j],y[j]);
278
0
         xy = MAC16_16(xy, X[j],y[j]);
279
0
         y[j] *= 2;
280
0
         pulsesLeft -= iy[j];
281
0
      }  while (++j<N);
282
0
   }
283
0
   celt_sig_assert(pulsesLeft>=0);
284
285
   /* This should never happen, but just in case it does (e.g. on silence)
286
      we fill the first bin with pulses. */
287
#ifdef FIXED_POINT_DEBUG
288
   celt_sig_assert(pulsesLeft<=N+3);
289
#endif
290
0
   if (pulsesLeft > N+3)
291
0
   {
292
0
      opus_val16 tmp = (opus_val16)pulsesLeft;
293
0
      yy = MAC16_16(yy, tmp, tmp);
294
0
      yy = MAC16_16(yy, tmp, y[0]);
295
0
      iy[0] += pulsesLeft;
296
0
      pulsesLeft=0;
297
0
   }
298
299
0
   for (i=0;i<pulsesLeft;i++)
300
0
   {
301
0
      opus_val16 Rxy, Ryy;
302
0
      int best_id;
303
0
      opus_val32 best_num;
304
0
      opus_val16 best_den;
305
#ifdef FIXED_POINT
306
      int rshift;
307
#endif
308
#ifdef FIXED_POINT
309
      rshift = 1+celt_ilog2(K-pulsesLeft+i+1);
310
#endif
311
0
      best_id = 0;
312
      /* The squared magnitude term gets added anyway, so we might as well
313
         add it outside the loop */
314
0
      yy = ADD16(yy, 1);
315
316
      /* Calculations for position 0 are out of the loop, in part to reduce
317
         mispredicted branches (since the if condition is usually false)
318
         in the loop. */
319
      /* Temporary sums of the new pulse(s) */
320
0
      Rxy = EXTRACT16(SHR32(ADD32(xy, EXTEND32(X[0])),rshift));
321
      /* We're multiplying y[j] by two so we don't have to do it here */
322
0
      Ryy = ADD16(yy, y[0]);
323
324
      /* Approximate score: we maximise Rxy/sqrt(Ryy) (we're guaranteed that
325
         Rxy is positive because the sign is pre-computed) */
326
0
      Rxy = MULT16_16_Q15(Rxy,Rxy);
327
0
      best_den = Ryy;
328
0
      best_num = Rxy;
329
0
      j=1;
330
0
      do {
331
         /* Temporary sums of the new pulse(s) */
332
0
         Rxy = EXTRACT16(SHR32(ADD32(xy, EXTEND32(X[j])),rshift));
333
         /* We're multiplying y[j] by two so we don't have to do it here */
334
0
         Ryy = ADD16(yy, y[j]);
335
336
         /* Approximate score: we maximise Rxy/sqrt(Ryy) (we're guaranteed that
337
            Rxy is positive because the sign is pre-computed) */
338
0
         Rxy = MULT16_16_Q15(Rxy,Rxy);
339
         /* The idea is to check for num/den >= best_num/best_den, but that way
340
            we can do it without any division */
341
         /* OPT: It's not clear whether a cmov is faster than a branch here
342
            since the condition is more often false than true and using
343
            a cmov introduces data dependencies across iterations. The optimal
344
            choice may be architecture-dependent. */
345
0
         if (opus_unlikely(MULT16_16(best_den, Rxy) > MULT16_16(Ryy, best_num)))
346
0
         {
347
0
            best_den = Ryy;
348
0
            best_num = Rxy;
349
0
            best_id = j;
350
0
         }
351
0
      } while (++j<N);
352
353
      /* Updating the sums of the new pulse(s) */
354
0
      xy = ADD32(xy, EXTEND32(X[best_id]));
355
      /* We're multiplying y[j] by two so we don't have to do it here */
356
0
      yy = ADD16(yy, y[best_id]);
357
358
      /* Only now that we've made the final choice, update y/iy */
359
      /* Multiplying y[j] by 2 so we don't have to do it everywhere else */
360
0
      y[best_id] += 2;
361
0
      iy[best_id]++;
362
0
   }
363
364
   /* Put the original sign back */
365
0
   j=0;
366
0
   do {
367
      /*iy[j] = signx[j] ? -iy[j] : iy[j];*/
368
      /* OPT: The is more likely to be compiled without a branch than the code above
369
         but has the same performance otherwise. */
370
0
      iy[j] = (iy[j]^-signx[j]) + signx[j];
371
0
   } while (++j<N);
372
0
   RESTORE_STACK;
373
0
   return yy;
374
0
}
375
376
#ifdef ENABLE_QEXT
377
#include "macros.h"
378
379
static opus_val32 op_pvq_search_N2(const celt_norm *X, int *iy, int *up_iy, int K, int up, int *refine, int shift) {
380
   opus_val32 sum;
381
   opus_val32 rcp_sum;
382
   int offset;
383
   sum = ABS32(X[0]) + ABS32(X[1]);
384
   if (sum < EPSILON) {
385
      iy[0] = K;
386
      up_iy[0] = up*K;
387
      iy[1]=up_iy[1]=0;
388
      *refine=0;
389
#ifdef FIXED_POINT
390
      return (opus_val64)K*K*up*up>>2*shift;
391
#else
392
      (void)shift;
393
      return K*(float)K*up*up;
394
#endif
395
   }
396
#ifdef FIXED_POINT
397
   int sum_shift;
398
   opus_val32 X0;
399
   sum_shift = 30-celt_ilog2(sum);
400
   rcp_sum = celt_rcp_norm32(SHL32(sum, sum_shift));
401
   X0 = MULT32_32_Q31(SHL32(X[0], sum_shift), rcp_sum);
402
   iy[0] = PSHR32(MULT32_32_Q31(SHL32(K, 8), X0), 7);
403
   up_iy[0] = PSHR32(MULT32_32_Q31(SHL32(up*K, 8), X0), 7);
404
#else
405
   rcp_sum = 1.f/sum;
406
   iy[0] = (int)floor(.5f+K*X[0]*rcp_sum);
407
   up_iy[0] = (int)floor(.5f+up*K*X[0]*rcp_sum);
408
#endif
409
   up_iy[0] = IMAX(up*iy[0] - (up-1)/2, IMIN(up*iy[0] + (up-1)/2, up_iy[0]));
410
   offset = up_iy[0] - up*iy[0];
411
   iy[1] = K-abs(iy[0]);
412
   up_iy[1] = up*K-abs(up_iy[0]);
413
   if (X[1] < 0) {
414
      iy[1] = -iy[1];
415
      up_iy[1] = -up_iy[1];
416
      offset = -offset;
417
   }
418
   *refine = offset;
419
#ifdef FIXED_POINT
420
   return (up_iy[0]*(opus_val64)up_iy[0] + up_iy[1]*(opus_val64)up_iy[1] + (1<<2*shift>>1))>>2*shift;
421
#else
422
   return up_iy[0]*(opus_val64)up_iy[0] + up_iy[1]*(opus_val64)up_iy[1];
423
#endif
424
}
425
426
static int op_pvq_refine(const opus_val32 *Xn, int *iy, int *iy0, int K, int up, int margin, int N) {
427
   int i;
428
   int dir;
429
   VARDECL(opus_val32, rounding);
430
   int iysum = 0;
431
   SAVE_STACK;
432
   ALLOC(rounding, N, opus_val32);
433
   for (i=0;i<N;i++) {
434
      opus_val32 tmp;
435
      tmp = MULT32_32_Q31(SHL32(K, 8), Xn[i]);
436
#ifdef FIXED_POINT
437
      iy[i] = (tmp+64) >> 7;
438
#else
439
      iy[i] = (int)floor(.5+tmp);
440
#endif
441
      rounding[i] = tmp - SHL32(iy[i], 7);
442
   }
443
   if (iy != iy0) {
444
      for (i=0;i<N;i++) iy[i] = IMIN(up*iy0[i]+up-1, IMAX(up*iy0[i]-up+1, iy[i]));
445
   }
446
   for (i=0;i<N;i++) iysum += iy[i];
447
   if (abs(iysum - K) > 32) {
448
      RESTORE_STACK;
449
      return 1;
450
   }
451
   dir = iysum < K ? 1 : -1;
452
   while (iysum != K) {
453
      opus_val32 roundval=-1000000*dir;
454
      int roundpos=0;
455
      for (i=0;i<N;i++) {
456
         if ((rounding[i]-roundval)*dir > 0 && abs(iy[i]-up*iy0[i]) < (margin-1) && !(dir==-1 && iy[i] == 0)) {
457
            roundval = rounding[i];
458
            roundpos = i;
459
         }
460
      }
461
      iy[roundpos] += dir;
462
      rounding[roundpos] -= SHL32(dir, 15);
463
      iysum+=dir;
464
   }
465
   RESTORE_STACK;
466
   return 0;
467
}
468
469
static opus_val32 op_pvq_search_extra(const celt_norm *X, int *iy, int *up_iy, int K, int up, int *refine, int N, int shift) {
470
   opus_val32 rcp_sum;
471
   opus_val32 sum=0;
472
   int i;
473
   int failed=0;
474
   opus_val64 yy=0;
475
   VARDECL(opus_val32, Xn);
476
   SAVE_STACK;
477
   for (i=0;i<N;i++) sum += ABS32(X[i]);
478
   ALLOC(Xn, N, opus_val32);
479
   if (sum < EPSILON)
480
      failed = 1;
481
   else {
482
#ifdef FIXED_POINT
483
      int sum_shift = 30-celt_ilog2(sum);
484
      rcp_sum = celt_rcp_norm32(SHL32(sum, sum_shift));
485
      for (i=0;i<N;i++) {
486
         Xn[i] = MULT32_32_Q31(SHL32(ABS32(X[i]), sum_shift), rcp_sum);
487
      }
488
#else
489
      rcp_sum = celt_rcp(sum);
490
      for (i=0;i<N;i++) {
491
         Xn[i] = ABS32(X[i])*rcp_sum;
492
      }
493
#endif
494
   }
495
   failed = failed || op_pvq_refine(Xn, iy, iy, K, 1, K+1, N);
496
   failed = failed || op_pvq_refine(Xn, up_iy, iy, up*K, up, up, N);
497
   if (failed) {
498
      iy[0] = K;
499
      for (i=1;i<N;i++) iy[i] = 0;
500
      up_iy[0] = up*K;
501
      for (i=1;i<N;i++) up_iy[i] = 0;
502
   }
503
   for (i=0;i<N;i++) {
504
      yy += up_iy[i]*(opus_val64)up_iy[i];
505
      if (X[i] < 0) {
506
         iy[i] = -iy[i];
507
         up_iy[i] = -up_iy[i];
508
      }
509
      refine[i] = up_iy[i]-up*iy[i];
510
   }
511
   RESTORE_STACK;
512
#ifdef FIXED_POINT
513
   return (yy + (1<<2*shift>>1))>>2*shift;
514
#else
515
   (void)shift;
516
   return yy;
517
#endif
518
}
519
#endif
520
521
#ifdef ENABLE_QEXT
522
/* Take advantage of the fact that "large" refine values are much less likely
523
   than smaller ones. */
524
static void ec_enc_refine(ec_enc *enc, opus_int32 refine, opus_int32 up, int extra_bits, int use_entropy) {
525
   int large;
526
   large = abs(refine)>up/2;
527
   ec_enc_bit_logp(enc, large, use_entropy ? 3 : 1);
528
   if (large) {
529
      ec_enc_bits(enc, refine < 0, 1);
530
      ec_enc_bits(enc, abs(refine)-up/2-1, extra_bits-1);
531
   } else {
532
      ec_enc_bits(enc, refine+up/2, extra_bits);
533
   }
534
}
535
536
static int ec_dec_refine(ec_enc *dec, opus_int32 up, int extra_bits, int use_entropy) {
537
   int large, refine;
538
   large = ec_dec_bit_logp(dec, use_entropy ? 3 : 1);
539
   if (large) {
540
      int sign = ec_dec_bits(dec, 1);
541
      refine = ec_dec_bits(dec, extra_bits-1) + up/2+1;
542
      if (sign) refine = -refine;
543
   } else {
544
      refine = (opus_int32)ec_dec_bits(dec, extra_bits)-up/2;
545
   }
546
   return refine;
547
}
548
#endif
549
550
unsigned alg_quant(celt_norm *X, int N, int K, int spread, int B, ec_enc *enc,
551
      opus_val32 gain, int resynth
552
      ARG_QEXT(ec_enc *ext_enc) ARG_QEXT(int extra_bits), int arch)
553
25.5M
{
554
25.5M
   VARDECL(int, iy);
555
25.5M
   opus_val32 yy;
556
25.5M
   unsigned collapse_mask;
557
#ifdef ENABLE_QEXT
558
   int yy_shift = 0;
559
#endif
560
25.5M
   SAVE_STACK;
561
562
25.5M
   celt_assert2(K>0, "alg_quant() needs at least one pulse");
563
25.5M
   celt_assert2(N>1, "alg_quant() needs at least two dimensions");
564
565
   /* Covers vectorization by up to 4. */
566
25.5M
   ALLOC(iy, N+3, int);
567
568
25.5M
   exp_rotation(X, N, 1, B, K, spread);
569
570
#ifdef ENABLE_QEXT
571
   if (N==2 && extra_bits >= 2) {
572
      int refine;
573
      int up_iy[2];
574
      int up;
575
      yy_shift = IMAX(0, extra_bits-7);
576
      up = (1<<extra_bits)-1;
577
      yy = op_pvq_search_N2(X, iy, up_iy, K, up, &refine, yy_shift);
578
      collapse_mask = extract_collapse_mask(up_iy, N, B);
579
      encode_pulses(iy, N, K, enc);
580
      ec_enc_uint(ext_enc, refine+(up-1)/2, up);
581
      if (resynth)
582
         normalise_residual(up_iy, X, N, yy, gain, yy_shift);
583
   } else if (extra_bits >= 2) {
584
      int i;
585
      VARDECL(int, up_iy);
586
      VARDECL(int, refine);
587
      int up, use_entropy;
588
      ALLOC(up_iy, N, int);
589
      ALLOC(refine, N, int);
590
      yy_shift = IMAX(0, extra_bits-7);
591
      up = (1<<extra_bits)-1;
592
      yy = op_pvq_search_extra(X, iy, up_iy, K, up, refine, N, yy_shift);
593
      collapse_mask = extract_collapse_mask(up_iy, N, B);
594
      encode_pulses(iy, N, K, enc);
595
      use_entropy = (ext_enc->storage*8 - ec_tell(ext_enc)) > (unsigned)(N-1)*(extra_bits+3)+1;
596
      for (i=0;i<N-1;i++) ec_enc_refine(ext_enc, refine[i], up, extra_bits, use_entropy);
597
      if (iy[N-1]==0) ec_enc_bits(ext_enc, up_iy[N-1]<0, 1);
598
      if (resynth)
599
         normalise_residual(up_iy, X, N, yy, gain, yy_shift);
600
   } else
601
#endif
602
25.5M
   {
603
25.5M
      yy = op_pvq_search(X, iy, K, N, arch);
604
25.5M
      collapse_mask = extract_collapse_mask(iy, N, B);
605
25.5M
      encode_pulses(iy, N, K, enc);
606
25.5M
      if (resynth)
607
3.46M
         normalise_residual(iy, X, N, yy, gain, 0);
608
25.5M
   }
609
610
25.5M
   if (resynth)
611
3.46M
      exp_rotation(X, N, -1, B, K, spread);
612
613
25.5M
   RESTORE_STACK;
614
25.5M
   return collapse_mask;
615
25.5M
}
616
617
/** Decode pulse vector and combine the result with the pitch vector to produce
618
    the final normalised signal in the current band. */
619
unsigned alg_unquant(celt_norm *X, int N, int K, int spread, int B,
620
      ec_dec *dec, opus_val32 gain
621
      ARG_QEXT(ec_enc *ext_dec) ARG_QEXT(int extra_bits))
622
0
{
623
0
   opus_val32 Ryy;
624
0
   unsigned collapse_mask;
625
0
   VARDECL(int, iy);
626
0
   int yy_shift=0;
627
0
   SAVE_STACK;
628
629
0
   celt_assert2(K>0, "alg_unquant() needs at least one pulse");
630
0
   celt_assert2(N>1, "alg_unquant() needs at least two dimensions");
631
0
   ALLOC(iy, N, int);
632
0
   Ryy = decode_pulses(iy, N, K, dec);
633
#ifdef ENABLE_QEXT
634
   if (N==2 && extra_bits >= 2) {
635
      int up;
636
      int refine;
637
      yy_shift = IMAX(0, extra_bits-7);
638
      up = (1<<extra_bits)-1;
639
      refine = (opus_int32)ec_dec_uint(ext_dec, up) - (up-1)/2;
640
      iy[0] *= up;
641
      iy[1] *= up;
642
      if (iy[1] == 0) {
643
         iy[1] = (iy[0] > 0) ? -refine : refine;
644
         iy[0] += (refine*(opus_int64)iy[0] > 0) ? -refine : refine;
645
      } else if (iy[1] > 0) {
646
         iy[0] += refine;
647
         iy[1] -= refine*(iy[0]>0?1:-1);
648
      } else {
649
         iy[0] -= refine;
650
         iy[1] -= refine*(iy[0]>0?1:-1);
651
      }
652
#ifdef FIXED_POINT
653
      Ryy = (iy[0]*(opus_val64)iy[0] + iy[1]*(opus_val64)iy[1] + (1<<2*yy_shift>>1)) >> 2*yy_shift;
654
#else
655
      Ryy = iy[0]*(opus_val64)iy[0] + iy[1]*(opus_val64)iy[1];
656
#endif
657
   } else if (extra_bits >= 2) {
658
      int i;
659
      opus_val64 yy64;
660
      VARDECL(int, refine);
661
      int up, use_entropy;
662
      int sign=0;
663
      ALLOC(refine, N, int);
664
      yy_shift = IMAX(0, extra_bits-7);
665
      up = (1<<extra_bits)-1;
666
      use_entropy = (ext_dec->storage*8 - ec_tell(ext_dec)) > (unsigned)(N-1)*(extra_bits+3)+1;
667
      for (i=0;i<N-1;i++) refine[i] = ec_dec_refine(ext_dec, up, extra_bits, use_entropy);
668
      if (iy[N-1]==0) sign = ec_dec_bits(ext_dec, 1);
669
      else sign = iy[N-1] < 0;
670
      for (i=0;i<N-1;i++) {
671
         iy[i] = iy[i]*up + refine[i];
672
      }
673
      iy[N-1] = up*K;
674
      for (i=0;i<N-1;i++) iy[N-1] -= abs(iy[i]);
675
      if (sign) iy[N-1] = -iy[N-1];
676
      yy64 = 0;
677
      for (i=0;i<N;i++) yy64 += iy[i]*(opus_val64)iy[i];
678
#ifdef FIXED_POINT
679
      Ryy = (yy64 + (1<<2*yy_shift>>1)) >> 2*yy_shift;
680
#else
681
      Ryy = yy64;
682
#endif
683
   }
684
#endif
685
0
   normalise_residual(iy, X, N, Ryy, gain, yy_shift);
686
0
   exp_rotation(X, N, -1, B, K, spread);
687
0
   collapse_mask = extract_collapse_mask(iy, N, B);
688
0
   RESTORE_STACK;
689
0
   return collapse_mask;
690
0
}
691
692
#ifndef OVERRIDE_renormalise_vector
693
void renormalise_vector(celt_norm *X, int N, opus_val32 gain, int arch)
694
44.1M
{
695
44.1M
   int i;
696
#ifdef FIXED_POINT
697
   int k;
698
#endif
699
44.1M
   opus_val32 E;
700
44.1M
   opus_val16 g;
701
44.1M
   opus_val32 t;
702
44.1M
   celt_norm *xptr;
703
44.1M
   norm_scaledown(X, N, NORM_SHIFT-14);
704
44.1M
   E = EPSILON + celt_inner_prod_norm(X, X, N, arch);
705
#ifdef FIXED_POINT
706
   k = celt_ilog2(E)>>1;
707
#endif
708
44.1M
   t = VSHR32(E, 2*(k-7));
709
44.1M
   g = MULT32_32_Q31(celt_rsqrt_norm(t),gain);
710
711
44.1M
   xptr = X;
712
652M
   for (i=0;i<N;i++)
713
608M
   {
714
608M
      *xptr = EXTRACT16(PSHR32(MULT16_16(g, *xptr), k+15-14));
715
608M
      xptr++;
716
608M
   }
717
44.1M
   norm_scaleup(X, N, NORM_SHIFT-14);
718
   /*return celt_sqrt(E);*/
719
44.1M
}
720
#endif /* OVERRIDE_renormalise_vector */
721
722
opus_int32 stereo_itheta(const celt_norm *X, const celt_norm *Y, int stereo, int N, int arch)
723
127M
{
724
127M
   int i;
725
127M
   int itheta;
726
127M
   opus_val32 mid, side;
727
127M
   opus_val32 Emid, Eside;
728
729
127M
   Emid = Eside = 0;
730
127M
   if (stereo)
731
114M
   {
732
1.45G
      for (i=0;i<N;i++)
733
1.34G
      {
734
1.34G
         celt_norm m, s;
735
1.34G
         m = PSHR32(ADD32(X[i], Y[i]), NORM_SHIFT-13);
736
1.34G
         s = PSHR32(SUB32(X[i], Y[i]), NORM_SHIFT-13);
737
1.34G
         Emid = MAC16_16(Emid, m, m);
738
1.34G
         Eside = MAC16_16(Eside, s, s);
739
1.34G
      }
740
114M
   } else {
741
13.4M
      Emid += celt_inner_prod_norm_shift(X, X, N, arch);
742
13.4M
      Eside += celt_inner_prod_norm_shift(Y, Y, N, arch);
743
13.4M
   }
744
127M
   mid = celt_sqrt32(Emid);
745
127M
   side = celt_sqrt32(Eside);
746
#if defined(FIXED_POINT)
747
   itheta = celt_atan2p_norm(side, mid);
748
#else
749
127M
   itheta = (int)floor(.5f+65536.f*16384*celt_atan2p_norm(side,mid));
750
127M
#endif
751
752
127M
   return itheta;
753
127M
}
754
755
#ifdef ENABLE_QEXT
756
757
static void cubic_synthesis(celt_norm *X, int *iy, int N, int K, int face, int sign, opus_val32 gain) {
758
   int i;
759
   opus_val32 sum=0;
760
   opus_val32 mag;
761
#ifdef FIXED_POINT
762
   int sum_shift;
763
   int shift = IMAX(celt_ilog2(K) + celt_ilog2(N)/2 - 13, 0);
764
#endif
765
   for (i=0;i<N;i++) {
766
      X[i] = (1+2*iy[i])-K;
767
   }
768
   X[face] = sign ? -K : K;
769
   for (i=0;i<N;i++) {
770
      sum += PSHR32(MULT16_16(X[i],X[i]), 2*shift);
771
   }
772
#ifdef FIXED_POINT
773
   sum_shift = (29-celt_ilog2(sum))>>1;
774
   mag = celt_rsqrt_norm32(SHL32(sum, 2*sum_shift+1));
775
   for (i=0;i<N;i++) {
776
      X[i] = VSHR32(MULT16_32_Q15(X[i],MULT32_32_Q31(mag,gain)), shift-sum_shift+29-NORM_SHIFT);
777
   }
778
#else
779
   mag = 1.f/sqrt(sum);
780
   for (i=0;i<N;i++) {
781
      X[i] *= mag*gain;
782
   }
783
#endif
784
}
785
786
unsigned cubic_quant(celt_norm *X, int N, int res, int B, ec_enc *enc, opus_val32 gain, int resynth) {
787
   int i;
788
   int face=0;
789
   int K;
790
   VARDECL(int, iy);
791
   celt_norm faceval=-1;
792
   opus_val32 norm;
793
   int sign;
794
   SAVE_STACK;
795
   ALLOC(iy, N, int);
796
   K = 1<<res;
797
   /* Using odd K on transients to avoid adding pre-echo. */
798
   if (B!=1) K=IMAX(1, K-1);
799
   if (K==1) {
800
      if (resynth) OPUS_CLEAR(X, N);
801
      RESTORE_STACK;
802
      return 0;
803
   }
804
   for (i=0;i<N;i++) {
805
      if (ABS32(X[i]) > faceval) {
806
         faceval = ABS32(X[i]);
807
         face = i;
808
      }
809
   }
810
   sign = X[face]<0;
811
   ec_enc_uint(enc, face, N);
812
   ec_enc_bits(enc, sign, 1);
813
#ifdef FIXED_POINT
814
   if (faceval != 0) {
815
      int face_shift = 30-celt_ilog2(faceval);
816
      norm = celt_rcp_norm32(SHL32(faceval, face_shift));
817
      norm = MULT16_32_Q15(K, norm);
818
      for (i=0;i<N;i++) {
819
         /* By computing X[i]+faceval inside the shift, the result is guaranteed non-negative. */
820
         iy[i] = IMIN(K-1, (MULT32_32_Q31(SHL32(X[i]+faceval, face_shift-1), norm)) >> 15);
821
      }
822
   } else {
823
      OPUS_CLEAR(iy, N);
824
   }
825
#else
826
   norm = .5f*K/(faceval+EPSILON);
827
   for (i=0;i<N;i++) {
828
      iy[i] = IMIN(K-1, (int)floor((X[i]+faceval)*norm));
829
   }
830
#endif
831
   for (i=0;i<N;i++) {
832
      if (i != face) ec_enc_bits(enc, iy[i], res);
833
   }
834
   if (resynth) {
835
      cubic_synthesis(X, iy, N, K, face, sign, gain);
836
   }
837
   RESTORE_STACK;
838
   return (1<<B)-1;
839
}
840
841
unsigned cubic_unquant(celt_norm *X, int N, int res, int B, ec_dec *dec, opus_val32 gain) {
842
   int i;
843
   int face;
844
   int sign;
845
   int K;
846
   VARDECL(int, iy);
847
   SAVE_STACK;
848
   ALLOC(iy, N, int);
849
   K = 1<<res;
850
   /* Using odd K on transients to avoid adding pre-echo. */
851
   if (B!=1) K=IMAX(1, K-1);
852
   if (K==1) {
853
      OPUS_CLEAR(X, N);
854
      RESTORE_STACK;
855
      return 0;
856
   }
857
   face = ec_dec_uint(dec, N);
858
   sign = ec_dec_bits(dec, 1);
859
   for (i=0;i<N;i++) {
860
      if (i != face) iy[i] = ec_dec_bits(dec, res);
861
   }
862
   iy[face]=0;
863
   cubic_synthesis(X, iy, N, K, face, sign, gain);
864
   RESTORE_STACK;
865
   return (1<<B)-1;
866
}
867
#endif