Coverage Report

Created: 2025-07-01 07:04

/src/opus/celt/vq.c
Line
Count
Source (jump to first uncovered line)
1
/* Copyright (c) 2007-2008 CSIRO
2
   Copyright (c) 2007-2009 Xiph.Org Foundation
3
   Written by Jean-Marc Valin */
4
/*
5
   Redistribution and use in source and binary forms, with or without
6
   modification, are permitted provided that the following conditions
7
   are met:
8
9
   - Redistributions of source code must retain the above copyright
10
   notice, this list of conditions and the following disclaimer.
11
12
   - Redistributions in binary form must reproduce the above copyright
13
   notice, this list of conditions and the following disclaimer in the
14
   documentation and/or other materials provided with the distribution.
15
16
   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17
   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18
   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
19
   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
20
   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
21
   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
22
   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
23
   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
24
   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
25
   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
26
   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27
*/
28
29
#ifdef HAVE_CONFIG_H
30
#include "config.h"
31
#endif
32
33
#include "mathops.h"
34
#include "cwrs.h"
35
#include "vq.h"
36
#include "arch.h"
37
#include "os_support.h"
38
#include "bands.h"
39
#include "rate.h"
40
#include "pitch.h"
41
#include "SigProc_FIX.h"
42
43
#if defined(MIPSr1_ASM)
44
#include "mips/vq_mipsr1.h"
45
#endif
46
47
#if defined(FIXED_POINT)
48
void norm_scaleup(celt_norm *X, int N, int shift) {
49
   int i;
50
   celt_assert(shift >= 0);
51
   if (shift <= 0) return;
52
   for (i=0;i<N;i++) X[i] = SHL32(X[i], shift);
53
}
54
55
void norm_scaledown(celt_norm *X, int N, int shift) {
56
   int i;
57
   celt_assert(shift >= 0);
58
   if (shift <= 0) return;
59
   for (i=0;i<N;i++) X[i] = PSHR32(X[i], shift);
60
}
61
62
opus_val32 celt_inner_prod_norm(const celt_norm *x, const celt_norm *y, int len, int arch) {
63
   int i;
64
   opus_val32 sum = 0;
65
   (void)arch;
66
   for (i=0;i<len;i++) sum += x[i]*y[i];
67
   return sum;
68
}
69
opus_val32 celt_inner_prod_norm_shift(const celt_norm *x, const celt_norm *y, int len, int arch) {
70
   int i;
71
   opus_val64 sum = 0;
72
   (void)arch;
73
   for (i=0;i<len;i++) sum += x[i]*(opus_val64)y[i];
74
   return sum>>2*(NORM_SHIFT-14);
75
}
76
#endif
77
78
#ifndef OVERRIDE_vq_exp_rotation1
79
static void exp_rotation1(celt_norm *X, int len, int stride, opus_val16 c, opus_val16 s)
80
0
{
81
0
   int i;
82
0
   opus_val16 ms;
83
0
   celt_norm *Xptr;
84
0
   Xptr = X;
85
0
   ms = NEG16(s);
86
0
   norm_scaledown(X, len, NORM_SHIFT-14);
87
0
   for (i=0;i<len-stride;i++)
88
0
   {
89
0
      celt_norm x1, x2;
90
0
      x1 = Xptr[0];
91
0
      x2 = Xptr[stride];
92
0
      Xptr[stride] = EXTRACT16(PSHR32(MAC16_16(MULT16_16(c, x2),  s, x1), 15));
93
0
      *Xptr++      = EXTRACT16(PSHR32(MAC16_16(MULT16_16(c, x1), ms, x2), 15));
94
0
   }
95
0
   Xptr = &X[len-2*stride-1];
96
0
   for (i=len-2*stride-1;i>=0;i--)
97
0
   {
98
0
      celt_norm x1, x2;
99
0
      x1 = Xptr[0];
100
0
      x2 = Xptr[stride];
101
0
      Xptr[stride] = EXTRACT16(PSHR32(MAC16_16(MULT16_16(c, x2),  s, x1), 15));
102
0
      *Xptr--      = EXTRACT16(PSHR32(MAC16_16(MULT16_16(c, x1), ms, x2), 15));
103
0
   }
104
0
   norm_scaleup(X, len, NORM_SHIFT-14);
105
0
}
106
#endif /* OVERRIDE_vq_exp_rotation1 */
107
108
void exp_rotation(celt_norm *X, int len, int dir, int stride, int K, int spread)
109
0
{
110
0
   static const int SPREAD_FACTOR[3]={15,10,5};
111
0
   int i;
112
0
   opus_val16 c, s;
113
0
   opus_val16 gain, theta;
114
0
   int stride2=0;
115
0
   int factor;
116
117
0
   if (2*K>=len || spread==SPREAD_NONE)
118
0
      return;
119
0
   factor = SPREAD_FACTOR[spread-1];
120
121
0
   gain = celt_div((opus_val32)MULT16_16(Q15_ONE,len),(opus_val32)(len+factor*K));
122
0
   theta = HALF16(MULT16_16_Q15(gain,gain));
123
124
0
   c = celt_cos_norm(EXTEND32(theta));
125
0
   s = celt_cos_norm(EXTEND32(SUB16(Q15ONE,theta))); /*  sin(theta) */
126
127
0
   if (len>=8*stride)
128
0
   {
129
0
      stride2 = 1;
130
      /* This is just a simple (equivalent) way of computing sqrt(len/stride) with rounding.
131
         It's basically incrementing long as (stride2+0.5)^2 < len/stride. */
132
0
      while ((stride2*stride2+stride2)*stride + (stride>>2) < len)
133
0
         stride2++;
134
0
   }
135
   /*NOTE: As a minor optimization, we could be passing around log2(B), not B, for both this and for
136
      extract_collapse_mask().*/
137
0
   len = celt_udiv(len, stride);
138
0
   for (i=0;i<stride;i++)
139
0
   {
140
0
      if (dir < 0)
141
0
      {
142
0
         if (stride2)
143
0
            exp_rotation1(X+i*len, len, stride2, s, c);
144
0
         exp_rotation1(X+i*len, len, 1, c, s);
145
0
      } else {
146
0
         exp_rotation1(X+i*len, len, 1, c, -s);
147
0
         if (stride2)
148
0
            exp_rotation1(X+i*len, len, stride2, s, -c);
149
0
      }
150
0
   }
151
0
}
152
153
/** Normalizes the decoded integer pvq codeword to unit norm. */
154
static void normalise_residual(int * OPUS_RESTRICT iy, celt_norm * OPUS_RESTRICT X,
155
      int N, opus_val32 Ryy, opus_val32 gain, int shift)
156
0
{
157
0
   int i;
158
#ifdef FIXED_POINT
159
   int k;
160
#endif
161
0
   opus_val32 t;
162
0
   opus_val32 g;
163
164
#ifdef FIXED_POINT
165
   k = celt_ilog2(Ryy)>>1;
166
#endif
167
0
   t = VSHR32(Ryy, 2*(k-7)-15);
168
0
   g = MULT32_32_Q31(celt_rsqrt_norm32(t),gain);
169
0
   i=0;
170
0
   (void)shift;
171
#if defined(FIXED_POINT) && defined(ENABLE_QEXT)
172
   if (shift>0) {
173
      int tot_shift = NORM_SHIFT+1-k-shift;
174
      if (tot_shift >= 0) {
175
         do X[i] = MULT32_32_Q31(g, SHL32(iy[i], tot_shift));
176
         while (++i < N);
177
      } else {
178
         do X[i] = MULT32_32_Q31(g, PSHR32(iy[i], -tot_shift));
179
         while (++i < N);
180
      }
181
   } else
182
#endif
183
0
   do X[i] = VSHR32(MULT16_32_Q15(iy[i], g), k+15-NORM_SHIFT);
184
0
   while (++i < N);
185
0
}
186
187
static unsigned extract_collapse_mask(int *iy, int N, int B)
188
0
{
189
0
   unsigned collapse_mask;
190
0
   int N0;
191
0
   int i;
192
0
   if (B<=1)
193
0
      return 1;
194
   /*NOTE: As a minor optimization, we could be passing around log2(B), not B, for both this and for
195
      exp_rotation().*/
196
0
   N0 = celt_udiv(N, B);
197
0
   collapse_mask = 0;
198
0
   i=0; do {
199
0
      int j;
200
0
      unsigned tmp=0;
201
0
      j=0; do {
202
0
         tmp |= iy[i*N0+j];
203
0
      } while (++j<N0);
204
0
      collapse_mask |= (tmp!=0)<<i;
205
0
   } while (++i<B);
206
0
   return collapse_mask;
207
0
}
208
209
opus_val16 op_pvq_search_c(celt_norm *X, int *iy, int K, int N, int arch)
210
0
{
211
0
   VARDECL(celt_norm, y);
212
0
   VARDECL(int, signx);
213
0
   int i, j;
214
0
   int pulsesLeft;
215
0
   opus_val32 sum;
216
0
   opus_val32 xy;
217
0
   opus_val16 yy;
218
0
   SAVE_STACK;
219
220
0
   (void)arch;
221
0
   ALLOC(y, N, celt_norm);
222
0
   ALLOC(signx, N, int);
223
#ifdef FIXED_POINT
224
   {
225
      int shift = (celt_ilog2(1+celt_inner_prod_norm_shift(X, X, N, arch))+1)/2;
226
      shift = IMAX(0, shift+(NORM_SHIFT-14)-14);
227
      norm_scaledown(X, N, shift);
228
   }
229
#endif
230
   /* Get rid of the sign */
231
0
   sum = 0;
232
0
   j=0; do {
233
0
      signx[j] = X[j]<0;
234
      /* OPT: Make sure the compiler doesn't use a branch on ABS16(). */
235
0
      X[j] = ABS16(X[j]);
236
0
      iy[j] = 0;
237
0
      y[j] = 0;
238
0
   } while (++j<N);
239
240
0
   xy = yy = 0;
241
242
0
   pulsesLeft = K;
243
244
   /* Do a pre-search by projecting on the pyramid */
245
0
   if (K > (N>>1))
246
0
   {
247
0
      opus_val16 rcp;
248
0
      j=0; do {
249
0
         sum += X[j];
250
0
      }  while (++j<N);
251
252
      /* If X is too small, just replace it with a pulse at 0 */
253
#ifdef FIXED_POINT
254
      if (sum <= K)
255
#else
256
      /* Prevents infinities and NaNs from causing too many pulses
257
         to be allocated. 64 is an approximation of infinity here. */
258
0
      if (!(sum > EPSILON && sum < 64))
259
0
#endif
260
0
      {
261
0
         X[0] = QCONST16(1.f,14);
262
0
         j=1; do
263
0
            X[j]=0;
264
0
         while (++j<N);
265
0
         sum = QCONST16(1.f,14);
266
0
      }
267
#ifdef FIXED_POINT
268
      rcp = EXTRACT16(MULT16_32_Q16(K, celt_rcp(sum)));
269
#else
270
      /* Using K+e with e < 1 guarantees we cannot get more than K pulses. */
271
0
      rcp = EXTRACT16(MULT16_32_Q16(K+0.8f, celt_rcp(sum)));
272
0
#endif
273
0
      j=0; do {
274
#ifdef FIXED_POINT
275
         /* It's really important to round *towards zero* here */
276
         iy[j] = MULT16_16_Q15(X[j],rcp);
277
#else
278
0
         iy[j] = (int)floor(rcp*X[j]);
279
0
#endif
280
0
         y[j] = (celt_norm)iy[j];
281
0
         yy = MAC16_16(yy, y[j],y[j]);
282
0
         xy = MAC16_16(xy, X[j],y[j]);
283
0
         y[j] *= 2;
284
0
         pulsesLeft -= iy[j];
285
0
      }  while (++j<N);
286
0
   }
287
0
   celt_sig_assert(pulsesLeft>=0);
288
289
   /* This should never happen, but just in case it does (e.g. on silence)
290
      we fill the first bin with pulses. */
291
#ifdef FIXED_POINT_DEBUG
292
   celt_sig_assert(pulsesLeft<=N+3);
293
#endif
294
0
   if (pulsesLeft > N+3)
295
0
   {
296
0
      opus_val16 tmp = (opus_val16)pulsesLeft;
297
0
      yy = MAC16_16(yy, tmp, tmp);
298
0
      yy = MAC16_16(yy, tmp, y[0]);
299
0
      iy[0] += pulsesLeft;
300
0
      pulsesLeft=0;
301
0
   }
302
303
0
   for (i=0;i<pulsesLeft;i++)
304
0
   {
305
0
      opus_val16 Rxy, Ryy;
306
0
      int best_id;
307
0
      opus_val32 best_num;
308
0
      opus_val16 best_den;
309
#ifdef FIXED_POINT
310
      int rshift;
311
#endif
312
#ifdef FIXED_POINT
313
      rshift = 1+celt_ilog2(K-pulsesLeft+i+1);
314
#endif
315
0
      best_id = 0;
316
      /* The squared magnitude term gets added anyway, so we might as well
317
         add it outside the loop */
318
0
      yy = ADD16(yy, 1);
319
320
      /* Calculations for position 0 are out of the loop, in part to reduce
321
         mispredicted branches (since the if condition is usually false)
322
         in the loop. */
323
      /* Temporary sums of the new pulse(s) */
324
0
      Rxy = EXTRACT16(SHR32(ADD32(xy, EXTEND32(X[0])),rshift));
325
      /* We're multiplying y[j] by two so we don't have to do it here */
326
0
      Ryy = ADD16(yy, y[0]);
327
328
      /* Approximate score: we maximise Rxy/sqrt(Ryy) (we're guaranteed that
329
         Rxy is positive because the sign is pre-computed) */
330
0
      Rxy = MULT16_16_Q15(Rxy,Rxy);
331
0
      best_den = Ryy;
332
0
      best_num = Rxy;
333
0
      j=1;
334
0
      do {
335
         /* Temporary sums of the new pulse(s) */
336
0
         Rxy = EXTRACT16(SHR32(ADD32(xy, EXTEND32(X[j])),rshift));
337
         /* We're multiplying y[j] by two so we don't have to do it here */
338
0
         Ryy = ADD16(yy, y[j]);
339
340
         /* Approximate score: we maximise Rxy/sqrt(Ryy) (we're guaranteed that
341
            Rxy is positive because the sign is pre-computed) */
342
0
         Rxy = MULT16_16_Q15(Rxy,Rxy);
343
         /* The idea is to check for num/den >= best_num/best_den, but that way
344
            we can do it without any division */
345
         /* OPT: It's not clear whether a cmov is faster than a branch here
346
            since the condition is more often false than true and using
347
            a cmov introduces data dependencies across iterations. The optimal
348
            choice may be architecture-dependent. */
349
0
         if (opus_unlikely(MULT16_16(best_den, Rxy) > MULT16_16(Ryy, best_num)))
350
0
         {
351
0
            best_den = Ryy;
352
0
            best_num = Rxy;
353
0
            best_id = j;
354
0
         }
355
0
      } while (++j<N);
356
357
      /* Updating the sums of the new pulse(s) */
358
0
      xy = ADD32(xy, EXTEND32(X[best_id]));
359
      /* We're multiplying y[j] by two so we don't have to do it here */
360
0
      yy = ADD16(yy, y[best_id]);
361
362
      /* Only now that we've made the final choice, update y/iy */
363
      /* Multiplying y[j] by 2 so we don't have to do it everywhere else */
364
0
      y[best_id] += 2;
365
0
      iy[best_id]++;
366
0
   }
367
368
   /* Put the original sign back */
369
0
   j=0;
370
0
   do {
371
      /*iy[j] = signx[j] ? -iy[j] : iy[j];*/
372
      /* OPT: The is more likely to be compiled without a branch than the code above
373
         but has the same performance otherwise. */
374
0
      iy[j] = (iy[j]^-signx[j]) + signx[j];
375
0
   } while (++j<N);
376
0
   RESTORE_STACK;
377
0
   return yy;
378
0
}
379
380
#ifdef ENABLE_QEXT
381
#include "macros.h"
382
383
static opus_val32 op_pvq_search_N2(const celt_norm *X, int *iy, int *up_iy, int K, int up, int *refine, int shift) {
384
   opus_val32 sum;
385
   opus_val32 rcp_sum;
386
   int offset;
387
   sum = ABS32(X[0]) + ABS32(X[1]);
388
   if (sum < EPSILON) {
389
      iy[0] = K;
390
      up_iy[0] = up*K;
391
      iy[1]=up_iy[1]=0;
392
      *refine=0;
393
#ifdef FIXED_POINT
394
      return (opus_val64)K*K*up*up>>2*shift;
395
#else
396
      (void)shift;
397
      return K*(float)K*up*up;
398
#endif
399
   }
400
#ifdef FIXED_POINT
401
   int sum_shift;
402
   opus_val32 X0;
403
   sum_shift = 30-celt_ilog2(sum);
404
   rcp_sum = celt_rcp_norm32(SHL32(sum, sum_shift));
405
   X0 = MULT32_32_Q31(SHL32(X[0], sum_shift), rcp_sum);
406
   iy[0] = PSHR32(MULT32_32_Q31(SHL32(K, 8), X0), 7);
407
   up_iy[0] = PSHR32(MULT32_32_Q31(SHL32(up*K, 8), X0), 7);
408
#else
409
   rcp_sum = 1.f/sum;
410
   iy[0] = (int)floor(.5f+K*X[0]*rcp_sum);
411
   up_iy[0] = (int)floor(.5f+up*K*X[0]*rcp_sum);
412
#endif
413
   up_iy[0] = IMAX(up*iy[0] - (up-1)/2, IMIN(up*iy[0] + (up-1)/2, up_iy[0]));
414
   offset = up_iy[0] - up*iy[0];
415
   iy[1] = K-abs(iy[0]);
416
   up_iy[1] = up*K-abs(up_iy[0]);
417
   if (X[1] < 0) {
418
      iy[1] = -iy[1];
419
      up_iy[1] = -up_iy[1];
420
      offset = -offset;
421
   }
422
   *refine = offset;
423
#ifdef FIXED_POINT
424
   return (up_iy[0]*(opus_val64)up_iy[0] + up_iy[1]*(opus_val64)up_iy[1] + (1<<2*shift>>1))>>2*shift;
425
#else
426
   return up_iy[0]*(opus_val64)up_iy[0] + up_iy[1]*(opus_val64)up_iy[1];
427
#endif
428
}
429
430
static int op_pvq_refine(const opus_val32 *Xn, int *iy, int *iy0, int K, int up, int margin, int N) {
431
   int i;
432
   int dir;
433
   VARDECL(opus_val32, rounding);
434
   int iysum = 0;
435
   SAVE_STACK;
436
   ALLOC(rounding, N, opus_val32);
437
   for (i=0;i<N;i++) {
438
      opus_val32 tmp;
439
      tmp = MULT32_32_Q31(SHL32(K, 8), Xn[i]);
440
#ifdef FIXED_POINT
441
      iy[i] = (tmp+64) >> 7;
442
#else
443
      iy[i] = (int)floor(.5+tmp);
444
#endif
445
      rounding[i] = tmp - SHL32(iy[i], 7);
446
   }
447
   if (iy != iy0) {
448
      for (i=0;i<N;i++) iy[i] = IMIN(up*iy0[i]+up-1, IMAX(up*iy0[i]-up+1, iy[i]));
449
   }
450
   for (i=0;i<N;i++) iysum += iy[i];
451
   if (abs(iysum - K) > 32) {
452
      return 1;
453
   }
454
   dir = iysum < K ? 1 : -1;
455
   while (iysum != K) {
456
      opus_val32 roundval=-1000000*dir;
457
      int roundpos=0;
458
      for (i=0;i<N;i++) {
459
         if ((rounding[i]-roundval)*dir > 0 && abs(iy[i]-up*iy0[i]) < (margin-1) && !(dir==-1 && iy[i] == 0)) {
460
            roundval = rounding[i];
461
            roundpos = i;
462
         }
463
      }
464
      iy[roundpos] += dir;
465
      rounding[roundpos] -= SHL32(dir, 15);
466
      iysum+=dir;
467
   }
468
   return 0;
469
}
470
471
static opus_val32 op_pvq_search_extra(const celt_norm *X, int *iy, int *up_iy, int K, int up, int *refine, int N, int shift) {
472
   opus_val32 rcp_sum;
473
   opus_val32 sum=0;
474
   int i;
475
   int failed=0;
476
   opus_val64 yy=0;
477
   VARDECL(opus_val32, Xn);
478
   SAVE_STACK;
479
   for (i=0;i<N;i++) sum += ABS32(X[i]);
480
   ALLOC(Xn, N, opus_val32);
481
   if (sum < EPSILON)
482
      failed = 1;
483
   else {
484
#ifdef FIXED_POINT
485
      int sum_shift = 30-celt_ilog2(sum);
486
      rcp_sum = celt_rcp_norm32(SHL32(sum, sum_shift));
487
      for (i=0;i<N;i++) {
488
         Xn[i] = MULT32_32_Q31(SHL32(ABS32(X[i]), sum_shift), rcp_sum);
489
      }
490
#else
491
      rcp_sum = celt_rcp(sum);
492
      for (i=0;i<N;i++) {
493
         Xn[i] = ABS32(X[i])*rcp_sum;
494
      }
495
#endif
496
   }
497
   failed = failed || op_pvq_refine(Xn, iy, iy, K, 1, K+1, N);
498
   failed = failed || op_pvq_refine(Xn, up_iy, iy, up*K, up, up, N);
499
   if (failed) {
500
      iy[0] = K;
501
      for (i=1;i<N;i++) iy[i] = 0;
502
      up_iy[0] = up*K;
503
      for (i=1;i<N;i++) up_iy[i] = 0;
504
   }
505
   for (i=0;i<N;i++) {
506
      yy += up_iy[i]*(opus_val64)up_iy[i];
507
      if (X[i] < 0) {
508
         iy[i] = -iy[i];
509
         up_iy[i] = -up_iy[i];
510
      }
511
      refine[i] = up_iy[i]-up*iy[i];
512
   }
513
#ifdef FIXED_POINT
514
   return (yy + (1<<2*shift>>1))>>2*shift;
515
#else
516
   (void)shift;
517
   return yy;
518
#endif
519
}
520
#endif
521
522
#ifdef ENABLE_QEXT
523
/* Take advantage of the fact that "large" refine values are much less likely
524
   than smaller ones. */
525
static void ec_enc_refine(ec_enc *enc, opus_int32 refine, opus_int32 up, int extra_bits, int use_entropy) {
526
   int large;
527
   large = abs(refine)>up/2;
528
   ec_enc_bit_logp(enc, large, use_entropy ? 3 : 1);
529
   if (large) {
530
      ec_enc_bits(enc, refine < 0, 1);
531
      ec_enc_bits(enc, abs(refine)-up/2-1, extra_bits-1);
532
   } else {
533
      ec_enc_bits(enc, refine+up/2, extra_bits);
534
   }
535
}
536
537
static int ec_dec_refine(ec_enc *dec, opus_int32 up, int extra_bits, int use_entropy) {
538
   int large, refine;
539
   large = ec_dec_bit_logp(dec, use_entropy ? 3 : 1);
540
   if (large) {
541
      int sign = ec_dec_bits(dec, 1);
542
      refine = ec_dec_bits(dec, extra_bits-1) + up/2+1;
543
      if (sign) refine = -refine;
544
   } else {
545
      refine = (opus_int32)ec_dec_bits(dec, extra_bits)-up/2;
546
   }
547
   return refine;
548
}
549
#endif
550
551
unsigned alg_quant(celt_norm *X, int N, int K, int spread, int B, ec_enc *enc,
552
      opus_val32 gain, int resynth
553
      ARG_QEXT(ec_enc *ext_enc) ARG_QEXT(int extra_bits), int arch)
554
0
{
555
0
   VARDECL(int, iy);
556
0
   opus_val32 yy;
557
0
   unsigned collapse_mask;
558
#ifdef ENABLE_QEXT
559
   int yy_shift = 0;
560
#endif
561
0
   SAVE_STACK;
562
563
0
   celt_assert2(K>0, "alg_quant() needs at least one pulse");
564
0
   celt_assert2(N>1, "alg_quant() needs at least two dimensions");
565
566
   /* Covers vectorization by up to 4. */
567
0
   ALLOC(iy, N+3, int);
568
569
0
   exp_rotation(X, N, 1, B, K, spread);
570
571
#ifdef ENABLE_QEXT
572
   if (N==2 && extra_bits >= 2) {
573
      int refine;
574
      int up_iy[2];
575
      int up;
576
      yy_shift = IMAX(0, extra_bits-7);
577
      up = (1<<extra_bits)-1;
578
      yy = op_pvq_search_N2(X, iy, up_iy, K, up, &refine, yy_shift);
579
      collapse_mask = extract_collapse_mask(up_iy, N, B);
580
      encode_pulses(iy, N, K, enc);
581
      ec_enc_uint(ext_enc, refine+(up-1)/2, up);
582
      if (resynth)
583
         normalise_residual(up_iy, X, N, yy, gain, yy_shift);
584
   } else if (extra_bits >= 2) {
585
      int i;
586
      VARDECL(int, up_iy);
587
      VARDECL(int, refine);
588
      int up, use_entropy;
589
      ALLOC(up_iy, N, int);
590
      ALLOC(refine, N, int);
591
      yy_shift = IMAX(0, extra_bits-7);
592
      up = (1<<extra_bits)-1;
593
      yy = op_pvq_search_extra(X, iy, up_iy, K, up, refine, N, yy_shift);
594
      collapse_mask = extract_collapse_mask(up_iy, N, B);
595
      encode_pulses(iy, N, K, enc);
596
      use_entropy = (ext_enc->storage*8 - ec_tell(ext_enc)) > (unsigned)(N-1)*(extra_bits+3)+1;
597
      for (i=0;i<N-1;i++) ec_enc_refine(ext_enc, refine[i], up, extra_bits, use_entropy);
598
      if (iy[N-1]==0) ec_enc_bits(ext_enc, up_iy[N-1]<0, 1);
599
      if (resynth)
600
         normalise_residual(up_iy, X, N, yy, gain, yy_shift);
601
   } else
602
#endif
603
0
   {
604
0
      yy = op_pvq_search(X, iy, K, N, arch);
605
0
      collapse_mask = extract_collapse_mask(iy, N, B);
606
0
      encode_pulses(iy, N, K, enc);
607
0
      if (resynth)
608
0
         normalise_residual(iy, X, N, yy, gain, 0);
609
0
   }
610
611
0
   if (resynth)
612
0
      exp_rotation(X, N, -1, B, K, spread);
613
614
0
   RESTORE_STACK;
615
0
   return collapse_mask;
616
0
}
617
618
/** Decode pulse vector and combine the result with the pitch vector to produce
619
    the final normalised signal in the current band. */
620
unsigned alg_unquant(celt_norm *X, int N, int K, int spread, int B,
621
      ec_dec *dec, opus_val32 gain
622
      ARG_QEXT(ec_enc *ext_dec) ARG_QEXT(int extra_bits))
623
0
{
624
0
   opus_val32 Ryy;
625
0
   unsigned collapse_mask;
626
0
   VARDECL(int, iy);
627
0
   int yy_shift=0;
628
0
   SAVE_STACK;
629
630
0
   celt_assert2(K>0, "alg_unquant() needs at least one pulse");
631
0
   celt_assert2(N>1, "alg_unquant() needs at least two dimensions");
632
0
   ALLOC(iy, N, int);
633
0
   Ryy = decode_pulses(iy, N, K, dec);
634
#ifdef ENABLE_QEXT
635
   if (N==2 && extra_bits >= 2) {
636
      int up;
637
      int refine;
638
      yy_shift = IMAX(0, extra_bits-7);
639
      up = (1<<extra_bits)-1;
640
      refine = (opus_int32)ec_dec_uint(ext_dec, up) - (up-1)/2;
641
      iy[0] *= up;
642
      iy[1] *= up;
643
      if (iy[1] == 0) {
644
         iy[1] = (iy[0] > 0) ? -refine : refine;
645
         iy[0] += (refine*(opus_int64)iy[0] > 0) ? -refine : refine;
646
      } else if (iy[1] > 0) {
647
         iy[0] += refine;
648
         iy[1] -= refine*(iy[0]>0?1:-1);
649
      } else {
650
         iy[0] -= refine;
651
         iy[1] -= refine*(iy[0]>0?1:-1);
652
      }
653
#ifdef FIXED_POINT
654
      Ryy = (iy[0]*(opus_val64)iy[0] + iy[1]*(opus_val64)iy[1] + (1<<2*yy_shift>>1)) >> 2*yy_shift;
655
#else
656
      Ryy = iy[0]*(opus_val64)iy[0] + iy[1]*(opus_val64)iy[1];
657
#endif
658
   } else if (extra_bits >= 2) {
659
      int i;
660
      opus_val64 yy64;
661
      VARDECL(int, refine);
662
      int up, use_entropy;
663
      int sign=0;
664
      ALLOC(refine, N, int);
665
      yy_shift = IMAX(0, extra_bits-7);
666
      up = (1<<extra_bits)-1;
667
      use_entropy = (ext_dec->storage*8 - ec_tell(ext_dec)) > (unsigned)(N-1)*(extra_bits+3)+1;
668
      for (i=0;i<N-1;i++) refine[i] = ec_dec_refine(ext_dec, up, extra_bits, use_entropy);
669
      if (iy[N-1]==0) sign = ec_dec_bits(ext_dec, 1);
670
      else sign = iy[N-1] < 0;
671
      for (i=0;i<N-1;i++) {
672
         iy[i] = iy[i]*up + refine[i];
673
      }
674
      iy[N-1] = up*K;
675
      for (i=0;i<N-1;i++) iy[N-1] -= abs(iy[i]);
676
      if (sign) iy[N-1] = -iy[N-1];
677
      yy64 = 0;
678
      for (i=0;i<N;i++) yy64 += iy[i]*(opus_val64)iy[i];
679
#ifdef FIXED_POINT
680
      Ryy = (yy64 + (1<<2*yy_shift>>1)) >> 2*yy_shift;
681
#else
682
      Ryy = yy64;
683
#endif
684
   }
685
#endif
686
0
   normalise_residual(iy, X, N, Ryy, gain, yy_shift);
687
0
   exp_rotation(X, N, -1, B, K, spread);
688
0
   collapse_mask = extract_collapse_mask(iy, N, B);
689
0
   RESTORE_STACK;
690
0
   return collapse_mask;
691
0
}
692
693
#ifndef OVERRIDE_renormalise_vector
694
void renormalise_vector(celt_norm *X, int N, opus_val32 gain, int arch)
695
0
{
696
0
   int i;
697
#ifdef FIXED_POINT
698
   int k;
699
#endif
700
0
   opus_val32 E;
701
0
   opus_val16 g;
702
0
   opus_val32 t;
703
0
   celt_norm *xptr;
704
0
   norm_scaledown(X, N, NORM_SHIFT-14);
705
0
   E = EPSILON + celt_inner_prod_norm(X, X, N, arch);
706
#ifdef FIXED_POINT
707
   k = celt_ilog2(E)>>1;
708
#endif
709
0
   t = VSHR32(E, 2*(k-7));
710
0
   g = MULT32_32_Q31(celt_rsqrt_norm(t),gain);
711
712
0
   xptr = X;
713
0
   for (i=0;i<N;i++)
714
0
   {
715
0
      *xptr = EXTRACT16(PSHR32(MULT16_16(g, *xptr), k+15-14));
716
0
      xptr++;
717
0
   }
718
0
   norm_scaleup(X, N, NORM_SHIFT-14);
719
   /*return celt_sqrt(E);*/
720
0
}
721
#endif /* OVERRIDE_renormalise_vector */
722
723
opus_int32 stereo_itheta(const celt_norm *X, const celt_norm *Y, int stereo, int N, int arch)
724
0
{
725
0
   int i;
726
0
   int itheta;
727
0
   opus_val32 mid, side;
728
0
   opus_val32 Emid, Eside;
729
730
0
   Emid = Eside = 0;
731
0
   if (stereo)
732
0
   {
733
0
      for (i=0;i<N;i++)
734
0
      {
735
0
         celt_norm m, s;
736
0
         m = PSHR32(ADD32(X[i], Y[i]), NORM_SHIFT-13);
737
0
         s = PSHR32(SUB32(X[i], Y[i]), NORM_SHIFT-13);
738
0
         Emid = MAC16_16(Emid, m, m);
739
0
         Eside = MAC16_16(Eside, s, s);
740
0
      }
741
0
   } else {
742
0
      Emid += celt_inner_prod_norm_shift(X, X, N, arch);
743
0
      Eside += celt_inner_prod_norm_shift(Y, Y, N, arch);
744
0
   }
745
0
   mid = celt_sqrt32(Emid);
746
0
   side = celt_sqrt32(Eside);
747
#if defined(FIXED_POINT)
748
   itheta = celt_atan2p_norm(side, mid);
749
#else
750
0
   itheta = (int)floor(.5f+65536.f*16384*celt_atan2p_norm(side,mid));
751
0
#endif
752
753
0
   return itheta;
754
0
}
755
756
#ifdef ENABLE_QEXT
757
758
static void cubic_synthesis(celt_norm *X, int *iy, int N, int K, int face, int sign, opus_val32 gain) {
759
   int i;
760
   opus_val32 sum=0;
761
   opus_val32 mag;
762
#ifdef FIXED_POINT
763
   int sum_shift;
764
   int shift = IMAX(celt_ilog2(K) + celt_ilog2(N)/2 - 13, 0);
765
#endif
766
   for (i=0;i<N;i++) {
767
      X[i] = (1+2*iy[i])-K;
768
   }
769
   X[face] = sign ? -K : K;
770
   for (i=0;i<N;i++) {
771
      sum += PSHR32(MULT16_16(X[i],X[i]), 2*shift);
772
   }
773
#ifdef FIXED_POINT
774
   sum_shift = (29-celt_ilog2(sum))>>1;
775
   mag = celt_rsqrt_norm32(SHL32(sum, 2*sum_shift+1));
776
   for (i=0;i<N;i++) {
777
      X[i] = VSHR32(MULT16_32_Q15(X[i],MULT32_32_Q31(mag,gain)), shift-sum_shift+29-NORM_SHIFT);
778
   }
779
#else
780
   mag = 1.f/sqrt(sum);
781
   for (i=0;i<N;i++) {
782
      X[i] *= mag*gain;
783
   }
784
#endif
785
}
786
787
unsigned cubic_quant(celt_norm *X, int N, int res, int B, ec_enc *enc, opus_val32 gain, int resynth) {
788
   int i;
789
   int face=0;
790
   int K;
791
   VARDECL(int, iy);
792
   celt_norm faceval=-1;
793
   opus_val32 norm;
794
   int sign;
795
   SAVE_STACK;
796
   ALLOC(iy, N, int);
797
   K = 1<<res;
798
   /* Using odd K on transients to avoid adding pre-echo. */
799
   if (B!=1) K=IMAX(1, K-1);
800
   if (K==1) {
801
      if (resynth) OPUS_CLEAR(X, N);
802
      RESTORE_STACK;
803
      return 0;
804
   }
805
   for (i=0;i<N;i++) {
806
      if (ABS32(X[i]) > faceval) {
807
         faceval = ABS32(X[i]);
808
         face = i;
809
      }
810
   }
811
   sign = X[face]<0;
812
   ec_enc_uint(enc, face, N);
813
   ec_enc_bits(enc, sign, 1);
814
#ifdef FIXED_POINT
815
   if (faceval != 0) {
816
      int face_shift = 30-celt_ilog2(faceval);
817
      norm = celt_rcp_norm32(SHL32(faceval, face_shift));
818
      norm = MULT16_32_Q15(K, norm);
819
      for (i=0;i<N;i++) {
820
         /* By computing X[i]+faceval inside the shift, the result is guaranteed non-negative. */
821
         iy[i] = IMIN(K-1, (MULT32_32_Q31(SHL32(X[i]+faceval, face_shift-1), norm)) >> 15);
822
      }
823
   } else {
824
      OPUS_CLEAR(iy, N);
825
   }
826
#else
827
   norm = .5f*K/(faceval+EPSILON);
828
   for (i=0;i<N;i++) {
829
      iy[i] = IMIN(K-1, (int)floor((X[i]+faceval)*norm));
830
   }
831
#endif
832
   for (i=0;i<N;i++) {
833
      if (i != face) ec_enc_bits(enc, iy[i], res);
834
   }
835
   if (resynth) {
836
      cubic_synthesis(X, iy, N, K, face, sign, gain);
837
   }
838
   RESTORE_STACK;
839
   return (1<<B)-1;
840
}
841
842
unsigned cubic_unquant(celt_norm *X, int N, int res, int B, ec_dec *dec, opus_val32 gain) {
843
   int i;
844
   int face;
845
   int sign;
846
   int K;
847
   VARDECL(int, iy);
848
   SAVE_STACK;
849
   ALLOC(iy, N, int);
850
   K = 1<<res;
851
   /* Using odd K on transients to avoid adding pre-echo. */
852
   if (B!=1) K=IMAX(1, K-1);
853
   if (K==1) {
854
      OPUS_CLEAR(X, N);
855
      return 0;
856
   }
857
   face = ec_dec_uint(dec, N);
858
   sign = ec_dec_bits(dec, 1);
859
   for (i=0;i<N;i++) {
860
      if (i != face) iy[i] = ec_dec_bits(dec, res);
861
   }
862
   iy[face]=0;
863
   cubic_synthesis(X, iy, N, K, face, sign, gain);
864
   RESTORE_STACK;
865
   return (1<<B)-1;
866
}
867
#endif