Coverage Report

Created: 2025-11-16 07:20

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/opus/celt/vq.c
Line
Count
Source
1
/* Copyright (c) 2007-2008 CSIRO
2
   Copyright (c) 2007-2009 Xiph.Org Foundation
3
   Written by Jean-Marc Valin */
4
/*
5
   Redistribution and use in source and binary forms, with or without
6
   modification, are permitted provided that the following conditions
7
   are met:
8
9
   - Redistributions of source code must retain the above copyright
10
   notice, this list of conditions and the following disclaimer.
11
12
   - Redistributions in binary form must reproduce the above copyright
13
   notice, this list of conditions and the following disclaimer in the
14
   documentation and/or other materials provided with the distribution.
15
16
   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17
   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18
   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
19
   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
20
   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
21
   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
22
   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
23
   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
24
   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
25
   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
26
   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27
*/
28
29
#ifdef HAVE_CONFIG_H
30
#include "config.h"
31
#endif
32
33
#include "mathops.h"
34
#include "cwrs.h"
35
#include "vq.h"
36
#include "arch.h"
37
#include "os_support.h"
38
#include "bands.h"
39
#include "rate.h"
40
#include "pitch.h"
41
#include "SigProc_FIX.h"
42
43
#if defined(FIXED_POINT)
44
void norm_scaleup(celt_norm *X, int N, int shift) {
45
   int i;
46
   celt_assert(shift >= 0);
47
   if (shift <= 0) return;
48
   for (i=0;i<N;i++) X[i] = SHL32(X[i], shift);
49
}
50
51
void norm_scaledown(celt_norm *X, int N, int shift) {
52
   int i;
53
   celt_assert(shift >= 0);
54
   if (shift <= 0) return;
55
   for (i=0;i<N;i++) X[i] = PSHR32(X[i], shift);
56
}
57
58
opus_val32 celt_inner_prod_norm(const celt_norm *x, const celt_norm *y, int len, int arch) {
59
   int i;
60
   opus_val32 sum = 0;
61
   (void)arch;
62
   for (i=0;i<len;i++) sum += x[i]*y[i];
63
   return sum;
64
}
65
opus_val32 celt_inner_prod_norm_shift(const celt_norm *x, const celt_norm *y, int len, int arch) {
66
   int i;
67
   opus_val64 sum = 0;
68
   (void)arch;
69
   for (i=0;i<len;i++) sum += x[i]*(opus_val64)y[i];
70
   return sum>>2*(NORM_SHIFT-14);
71
}
72
#endif
73
74
#ifndef OVERRIDE_vq_exp_rotation1
75
static void exp_rotation1(celt_norm *X, int len, int stride, opus_val16 c, opus_val16 s)
76
169k
{
77
169k
   int i;
78
169k
   opus_val16 ms;
79
169k
   celt_norm *Xptr;
80
169k
   Xptr = X;
81
169k
   ms = NEG16(s);
82
169k
   norm_scaledown(X, len, NORM_SHIFT-14);
83
1.33M
   for (i=0;i<len-stride;i++)
84
1.16M
   {
85
1.16M
      celt_norm x1, x2;
86
1.16M
      x1 = Xptr[0];
87
1.16M
      x2 = Xptr[stride];
88
1.16M
      Xptr[stride] = EXTRACT16(PSHR32(MAC16_16(MULT16_16(c, x2),  s, x1), 15));
89
1.16M
      *Xptr++      = EXTRACT16(PSHR32(MAC16_16(MULT16_16(c, x1), ms, x2), 15));
90
1.16M
   }
91
169k
   Xptr = &X[len-2*stride-1];
92
1.09M
   for (i=len-2*stride-1;i>=0;i--)
93
929k
   {
94
929k
      celt_norm x1, x2;
95
929k
      x1 = Xptr[0];
96
929k
      x2 = Xptr[stride];
97
929k
      Xptr[stride] = EXTRACT16(PSHR32(MAC16_16(MULT16_16(c, x2),  s, x1), 15));
98
929k
      *Xptr--      = EXTRACT16(PSHR32(MAC16_16(MULT16_16(c, x1), ms, x2), 15));
99
929k
   }
100
169k
   norm_scaleup(X, len, NORM_SHIFT-14);
101
169k
}
102
#endif /* OVERRIDE_vq_exp_rotation1 */
103
104
void exp_rotation(celt_norm *X, int len, int dir, int stride, int K, int spread)
105
858k
{
106
858k
   static const int SPREAD_FACTOR[3]={15,10,5};
107
858k
   int i;
108
858k
   opus_val16 c, s;
109
858k
   opus_val16 gain, theta;
110
858k
   int stride2=0;
111
858k
   int factor;
112
113
858k
   if (2*K>=len || spread==SPREAD_NONE)
114
789k
      return;
115
69.2k
   factor = SPREAD_FACTOR[spread-1];
116
117
69.2k
   gain = celt_div((opus_val32)MULT16_16(Q15_ONE,len),(opus_val32)(len+factor*K));
118
69.2k
   theta = HALF16(MULT16_16_Q15(gain,gain));
119
120
69.2k
   c = celt_cos_norm(EXTEND32(theta));
121
69.2k
   s = celt_cos_norm(EXTEND32(SUB16(Q15ONE,theta))); /*  sin(theta) */
122
123
69.2k
   if (len>=8*stride)
124
34.2k
   {
125
34.2k
      stride2 = 1;
126
      /* This is just a simple (equivalent) way of computing sqrt(len/stride) with rounding.
127
         It's basically incrementing long as (stride2+0.5)^2 < len/stride. */
128
134k
      while ((stride2*stride2+stride2)*stride + (stride>>2) < len)
129
100k
         stride2++;
130
34.2k
   }
131
   /*NOTE: As a minor optimization, we could be passing around log2(B), not B, for both this and for
132
      extract_collapse_mask().*/
133
69.2k
   len = celt_udiv(len, stride);
134
201k
   for (i=0;i<stride;i++)
135
132k
   {
136
132k
      if (dir < 0)
137
132k
      {
138
132k
         if (stride2)
139
36.9k
            exp_rotation1(X+i*len, len, stride2, s, c);
140
132k
         exp_rotation1(X+i*len, len, 1, c, s);
141
132k
      } else {
142
0
         exp_rotation1(X+i*len, len, 1, c, -s);
143
0
         if (stride2)
144
0
            exp_rotation1(X+i*len, len, stride2, s, -c);
145
0
      }
146
132k
   }
147
69.2k
}
148
149
/** Normalizes the decoded integer pvq codeword to unit norm. */
150
static void normalise_residual(int * OPUS_RESTRICT iy, celt_norm * OPUS_RESTRICT X,
151
      int N, opus_val32 Ryy, opus_val32 gain, int shift)
152
858k
{
153
858k
   int i;
154
#ifdef FIXED_POINT
155
   int k;
156
#endif
157
858k
   opus_val32 t;
158
858k
   opus_val32 g;
159
160
#ifdef FIXED_POINT
161
   k = celt_ilog2(Ryy)>>1;
162
#endif
163
858k
   t = VSHR32(Ryy, 2*(k-7)-15);
164
858k
   g = MULT32_32_Q31(celt_rsqrt_norm32(t),gain);
165
858k
   i=0;
166
858k
   (void)shift;
167
#if defined(FIXED_POINT) && defined(ENABLE_QEXT)
168
   if (shift>0) {
169
      int tot_shift = NORM_SHIFT+1-k-shift;
170
      if (tot_shift >= 0) {
171
         do X[i] = MULT32_32_Q31(g, SHL32(iy[i], tot_shift));
172
         while (++i < N);
173
      } else {
174
         do X[i] = MULT32_32_Q31(g, PSHR32(iy[i], -tot_shift));
175
         while (++i < N);
176
      }
177
   } else
178
#endif
179
3.94M
   do X[i] = VSHR32(MULT16_32_Q15(iy[i], g), k+15-NORM_SHIFT);
180
3.94M
   while (++i < N);
181
858k
}
182
183
static unsigned extract_collapse_mask(int *iy, int N, int B)
184
858k
{
185
858k
   unsigned collapse_mask;
186
858k
   int N0;
187
858k
   int i;
188
858k
   if (B<=1)
189
697k
      return 1;
190
   /*NOTE: As a minor optimization, we could be passing around log2(B), not B, for both this and for
191
      exp_rotation().*/
192
160k
   N0 = celt_udiv(N, B);
193
160k
   collapse_mask = 0;
194
465k
   i=0; do {
195
465k
      int j;
196
465k
      unsigned tmp=0;
197
806k
      j=0; do {
198
806k
         tmp |= iy[i*N0+j];
199
806k
      } while (++j<N0);
200
465k
      collapse_mask |= (tmp!=0)<<i;
201
465k
   } while (++i<B);
202
160k
   return collapse_mask;
203
858k
}
204
205
opus_val16 op_pvq_search_c(celt_norm *X, int *iy, int K, int N, int arch)
206
0
{
207
0
   VARDECL(celt_norm, y);
208
0
   VARDECL(int, signx);
209
0
   int i, j;
210
0
   int pulsesLeft;
211
0
   opus_val32 sum;
212
0
   opus_val32 xy;
213
0
   opus_val16 yy;
214
0
   SAVE_STACK;
215
216
0
   (void)arch;
217
0
   ALLOC(y, N, celt_norm);
218
0
   ALLOC(signx, N, int);
219
#ifdef FIXED_POINT
220
   {
221
      int shift = (celt_ilog2(1+celt_inner_prod_norm_shift(X, X, N, arch))+1)/2;
222
      shift = IMAX(0, shift+(NORM_SHIFT-14)-14);
223
      norm_scaledown(X, N, shift);
224
   }
225
#endif
226
   /* Get rid of the sign */
227
0
   sum = 0;
228
0
   j=0; do {
229
0
      signx[j] = X[j]<0;
230
      /* OPT: Make sure the compiler doesn't use a branch on ABS16(). */
231
0
      X[j] = ABS16(X[j]);
232
0
      iy[j] = 0;
233
0
      y[j] = 0;
234
0
   } while (++j<N);
235
236
0
   xy = yy = 0;
237
238
0
   pulsesLeft = K;
239
240
   /* Do a pre-search by projecting on the pyramid */
241
0
   if (K > (N>>1))
242
0
   {
243
0
      opus_val16 rcp;
244
0
      j=0; do {
245
0
         sum += X[j];
246
0
      }  while (++j<N);
247
248
      /* If X is too small, just replace it with a pulse at 0 */
249
#ifdef FIXED_POINT
250
      if (sum <= K)
251
#else
252
      /* Prevents infinities and NaNs from causing too many pulses
253
         to be allocated. 64 is an approximation of infinity here. */
254
0
      if (!(sum > EPSILON && sum < 64))
255
0
#endif
256
0
      {
257
0
         X[0] = QCONST16(1.f,14);
258
0
         j=1; do
259
0
            X[j]=0;
260
0
         while (++j<N);
261
0
         sum = QCONST16(1.f,14);
262
0
      }
263
#ifdef FIXED_POINT
264
      rcp = EXTRACT16(MULT16_32_Q16(K, celt_rcp(sum)));
265
#else
266
      /* Using K+e with e < 1 guarantees we cannot get more than K pulses. */
267
0
      rcp = EXTRACT16(MULT16_32_Q16(K+0.8f, celt_rcp(sum)));
268
0
#endif
269
0
      j=0; do {
270
#ifdef FIXED_POINT
271
         /* It's really important to round *towards zero* here */
272
         iy[j] = MULT16_16_Q15(X[j],rcp);
273
#else
274
0
         iy[j] = (int)floor(rcp*X[j]);
275
0
#endif
276
0
         y[j] = (celt_norm)iy[j];
277
0
         yy = MAC16_16(yy, y[j],y[j]);
278
0
         xy = MAC16_16(xy, X[j],y[j]);
279
0
         y[j] *= 2;
280
0
         pulsesLeft -= iy[j];
281
0
      }  while (++j<N);
282
0
   }
283
0
   celt_sig_assert(pulsesLeft>=0);
284
285
   /* This should never happen, but just in case it does (e.g. on silence)
286
      we fill the first bin with pulses. */
287
#ifdef FIXED_POINT_DEBUG
288
   celt_sig_assert(pulsesLeft<=N+3);
289
#endif
290
0
   if (pulsesLeft > N+3)
291
0
   {
292
0
      opus_val16 tmp = (opus_val16)pulsesLeft;
293
0
      yy = MAC16_16(yy, tmp, tmp);
294
0
      yy = MAC16_16(yy, tmp, y[0]);
295
0
      iy[0] += pulsesLeft;
296
0
      pulsesLeft=0;
297
0
   }
298
299
0
   for (i=0;i<pulsesLeft;i++)
300
0
   {
301
0
      opus_val16 Rxy, Ryy;
302
0
      int best_id;
303
0
      opus_val32 best_num;
304
0
      opus_val16 best_den;
305
#ifdef FIXED_POINT
306
      int rshift;
307
#endif
308
#ifdef FIXED_POINT
309
      rshift = 1+celt_ilog2(K-pulsesLeft+i+1);
310
#endif
311
0
      best_id = 0;
312
      /* The squared magnitude term gets added anyway, so we might as well
313
         add it outside the loop */
314
0
      yy = ADD16(yy, 1);
315
316
      /* Calculations for position 0 are out of the loop, in part to reduce
317
         mispredicted branches (since the if condition is usually false)
318
         in the loop. */
319
      /* Temporary sums of the new pulse(s) */
320
0
      Rxy = EXTRACT16(SHR32(ADD32(xy, EXTEND32(X[0])),rshift));
321
      /* We're multiplying y[j] by two so we don't have to do it here */
322
0
      Ryy = ADD16(yy, y[0]);
323
324
      /* Approximate score: we maximise Rxy/sqrt(Ryy) (we're guaranteed that
325
         Rxy is positive because the sign is pre-computed) */
326
0
      Rxy = MULT16_16_Q15(Rxy,Rxy);
327
0
      best_den = Ryy;
328
0
      best_num = Rxy;
329
0
      j=1;
330
0
      do {
331
         /* Temporary sums of the new pulse(s) */
332
0
         Rxy = EXTRACT16(SHR32(ADD32(xy, EXTEND32(X[j])),rshift));
333
         /* We're multiplying y[j] by two so we don't have to do it here */
334
0
         Ryy = ADD16(yy, y[j]);
335
336
         /* Approximate score: we maximise Rxy/sqrt(Ryy) (we're guaranteed that
337
            Rxy is positive because the sign is pre-computed) */
338
0
         Rxy = MULT16_16_Q15(Rxy,Rxy);
339
         /* The idea is to check for num/den >= best_num/best_den, but that way
340
            we can do it without any division */
341
         /* OPT: It's not clear whether a cmov is faster than a branch here
342
            since the condition is more often false than true and using
343
            a cmov introduces data dependencies across iterations. The optimal
344
            choice may be architecture-dependent. */
345
0
         if (opus_unlikely(MULT16_16(best_den, Rxy) > MULT16_16(Ryy, best_num)))
346
0
         {
347
0
            best_den = Ryy;
348
0
            best_num = Rxy;
349
0
            best_id = j;
350
0
         }
351
0
      } while (++j<N);
352
353
      /* Updating the sums of the new pulse(s) */
354
0
      xy = ADD32(xy, EXTEND32(X[best_id]));
355
      /* We're multiplying y[j] by two so we don't have to do it here */
356
0
      yy = ADD16(yy, y[best_id]);
357
358
      /* Only now that we've made the final choice, update y/iy */
359
      /* Multiplying y[j] by 2 so we don't have to do it everywhere else */
360
0
      y[best_id] += 2;
361
0
      iy[best_id]++;
362
0
   }
363
364
   /* Put the original sign back */
365
0
   j=0;
366
0
   do {
367
      /*iy[j] = signx[j] ? -iy[j] : iy[j];*/
368
      /* OPT: The is more likely to be compiled without a branch than the code above
369
         but has the same performance otherwise. */
370
0
      iy[j] = (iy[j]^-signx[j]) + signx[j];
371
0
   } while (++j<N);
372
0
   RESTORE_STACK;
373
0
   return yy;
374
0
}
375
376
#ifdef ENABLE_QEXT
377
#include "macros.h"
378
379
static opus_val32 op_pvq_search_N2(const celt_norm *X, int *iy, int *up_iy, int K, int up, int *refine, int shift) {
380
   opus_val32 sum;
381
   opus_val32 rcp_sum;
382
   int offset;
383
   sum = ABS32(X[0]) + ABS32(X[1]);
384
   if (sum < EPSILON) {
385
      iy[0] = K;
386
      up_iy[0] = up*K;
387
      iy[1]=up_iy[1]=0;
388
      *refine=0;
389
#ifdef FIXED_POINT
390
      return (opus_val64)K*K*up*up>>2*shift;
391
#else
392
      (void)shift;
393
      return K*(float)K*up*up;
394
#endif
395
   }
396
#ifdef FIXED_POINT
397
   int sum_shift;
398
   opus_val32 X0;
399
   sum_shift = 30-celt_ilog2(sum);
400
   rcp_sum = celt_rcp_norm32(SHL32(sum, sum_shift));
401
   X0 = MULT32_32_Q31(SHL32(X[0], sum_shift), rcp_sum);
402
   iy[0] = PSHR32(MULT32_32_Q31(SHL32(K, 8), X0), 7);
403
   up_iy[0] = PSHR32(MULT32_32_Q31(SHL32(up*K, 8), X0), 7);
404
#else
405
   rcp_sum = 1.f/sum;
406
   iy[0] = (int)floor(.5f+K*X[0]*rcp_sum);
407
   up_iy[0] = (int)floor(.5f+up*K*X[0]*rcp_sum);
408
#endif
409
   up_iy[0] = IMAX(up*iy[0] - (up-1)/2, IMIN(up*iy[0] + (up-1)/2, up_iy[0]));
410
   offset = up_iy[0] - up*iy[0];
411
   iy[1] = K-abs(iy[0]);
412
   up_iy[1] = up*K-abs(up_iy[0]);
413
   if (X[1] < 0) {
414
      iy[1] = -iy[1];
415
      up_iy[1] = -up_iy[1];
416
      offset = -offset;
417
   }
418
   *refine = offset;
419
#ifdef FIXED_POINT
420
   return (up_iy[0]*(opus_val64)up_iy[0] + up_iy[1]*(opus_val64)up_iy[1] + (1<<2*shift>>1))>>2*shift;
421
#else
422
   return up_iy[0]*(opus_val64)up_iy[0] + up_iy[1]*(opus_val64)up_iy[1];
423
#endif
424
}
425
426
static int op_pvq_refine(const opus_val32 *Xn, int *iy, int *iy0, int K, int up, int margin, int N) {
427
   int i;
428
   int dir;
429
   VARDECL(opus_val32, rounding);
430
   int iysum = 0;
431
   SAVE_STACK;
432
   ALLOC(rounding, N, opus_val32);
433
   for (i=0;i<N;i++) {
434
      opus_val32 tmp;
435
      tmp = MULT32_32_Q31(SHL32(K, 8), Xn[i]);
436
#ifdef FIXED_POINT
437
      iy[i] = (tmp+64) >> 7;
438
#else
439
      iy[i] = (int)floor(.5+tmp);
440
#endif
441
      rounding[i] = tmp - SHL32(iy[i], 7);
442
   }
443
   if (iy != iy0) {
444
      for (i=0;i<N;i++) iy[i] = IMIN(up*iy0[i]+up-1, IMAX(up*iy0[i]-up+1, iy[i]));
445
   }
446
   for (i=0;i<N;i++) iysum += iy[i];
447
   if (abs(iysum - K) > 32) {
448
      return 1;
449
   }
450
   dir = iysum < K ? 1 : -1;
451
   while (iysum != K) {
452
      opus_val32 roundval=-1000000*dir;
453
      int roundpos=0;
454
      for (i=0;i<N;i++) {
455
         if ((rounding[i]-roundval)*dir > 0 && abs(iy[i]-up*iy0[i]) < (margin-1) && !(dir==-1 && iy[i] == 0)) {
456
            roundval = rounding[i];
457
            roundpos = i;
458
         }
459
      }
460
      iy[roundpos] += dir;
461
      rounding[roundpos] -= SHL32(dir, 15);
462
      iysum+=dir;
463
   }
464
   return 0;
465
}
466
467
static opus_val32 op_pvq_search_extra(const celt_norm *X, int *iy, int *up_iy, int K, int up, int *refine, int N, int shift) {
468
   opus_val32 rcp_sum;
469
   opus_val32 sum=0;
470
   int i;
471
   int failed=0;
472
   opus_val64 yy=0;
473
   VARDECL(opus_val32, Xn);
474
   SAVE_STACK;
475
   for (i=0;i<N;i++) sum += ABS32(X[i]);
476
   ALLOC(Xn, N, opus_val32);
477
   if (sum < EPSILON)
478
      failed = 1;
479
   else {
480
#ifdef FIXED_POINT
481
      int sum_shift = 30-celt_ilog2(sum);
482
      rcp_sum = celt_rcp_norm32(SHL32(sum, sum_shift));
483
      for (i=0;i<N;i++) {
484
         Xn[i] = MULT32_32_Q31(SHL32(ABS32(X[i]), sum_shift), rcp_sum);
485
      }
486
#else
487
      rcp_sum = celt_rcp(sum);
488
      for (i=0;i<N;i++) {
489
         Xn[i] = ABS32(X[i])*rcp_sum;
490
      }
491
#endif
492
   }
493
   failed = failed || op_pvq_refine(Xn, iy, iy, K, 1, K+1, N);
494
   failed = failed || op_pvq_refine(Xn, up_iy, iy, up*K, up, up, N);
495
   if (failed) {
496
      iy[0] = K;
497
      for (i=1;i<N;i++) iy[i] = 0;
498
      up_iy[0] = up*K;
499
      for (i=1;i<N;i++) up_iy[i] = 0;
500
   }
501
   for (i=0;i<N;i++) {
502
      yy += up_iy[i]*(opus_val64)up_iy[i];
503
      if (X[i] < 0) {
504
         iy[i] = -iy[i];
505
         up_iy[i] = -up_iy[i];
506
      }
507
      refine[i] = up_iy[i]-up*iy[i];
508
   }
509
#ifdef FIXED_POINT
510
   return (yy + (1<<2*shift>>1))>>2*shift;
511
#else
512
   (void)shift;
513
   return yy;
514
#endif
515
}
516
#endif
517
518
#ifdef ENABLE_QEXT
519
/* Take advantage of the fact that "large" refine values are much less likely
520
   than smaller ones. */
521
static void ec_enc_refine(ec_enc *enc, opus_int32 refine, opus_int32 up, int extra_bits, int use_entropy) {
522
   int large;
523
   large = abs(refine)>up/2;
524
   ec_enc_bit_logp(enc, large, use_entropy ? 3 : 1);
525
   if (large) {
526
      ec_enc_bits(enc, refine < 0, 1);
527
      ec_enc_bits(enc, abs(refine)-up/2-1, extra_bits-1);
528
   } else {
529
      ec_enc_bits(enc, refine+up/2, extra_bits);
530
   }
531
}
532
533
static int ec_dec_refine(ec_enc *dec, opus_int32 up, int extra_bits, int use_entropy) {
534
   int large, refine;
535
   large = ec_dec_bit_logp(dec, use_entropy ? 3 : 1);
536
   if (large) {
537
      int sign = ec_dec_bits(dec, 1);
538
      refine = ec_dec_bits(dec, extra_bits-1) + up/2+1;
539
      if (sign) refine = -refine;
540
   } else {
541
      refine = (opus_int32)ec_dec_bits(dec, extra_bits)-up/2;
542
   }
543
   return refine;
544
}
545
#endif
546
547
unsigned alg_quant(celt_norm *X, int N, int K, int spread, int B, ec_enc *enc,
548
      opus_val32 gain, int resynth
549
      ARG_QEXT(ec_enc *ext_enc) ARG_QEXT(int extra_bits), int arch)
550
0
{
551
0
   VARDECL(int, iy);
552
0
   opus_val32 yy;
553
0
   unsigned collapse_mask;
554
#ifdef ENABLE_QEXT
555
   int yy_shift = 0;
556
#endif
557
0
   SAVE_STACK;
558
559
0
   celt_assert2(K>0, "alg_quant() needs at least one pulse");
560
0
   celt_assert2(N>1, "alg_quant() needs at least two dimensions");
561
562
   /* Covers vectorization by up to 4. */
563
0
   ALLOC(iy, N+3, int);
564
565
0
   exp_rotation(X, N, 1, B, K, spread);
566
567
#ifdef ENABLE_QEXT
568
   if (N==2 && extra_bits >= 2) {
569
      int refine;
570
      int up_iy[2];
571
      int up;
572
      yy_shift = IMAX(0, extra_bits-7);
573
      up = (1<<extra_bits)-1;
574
      yy = op_pvq_search_N2(X, iy, up_iy, K, up, &refine, yy_shift);
575
      collapse_mask = extract_collapse_mask(up_iy, N, B);
576
      encode_pulses(iy, N, K, enc);
577
      ec_enc_uint(ext_enc, refine+(up-1)/2, up);
578
      if (resynth)
579
         normalise_residual(up_iy, X, N, yy, gain, yy_shift);
580
   } else if (extra_bits >= 2) {
581
      int i;
582
      VARDECL(int, up_iy);
583
      VARDECL(int, refine);
584
      int up, use_entropy;
585
      ALLOC(up_iy, N, int);
586
      ALLOC(refine, N, int);
587
      yy_shift = IMAX(0, extra_bits-7);
588
      up = (1<<extra_bits)-1;
589
      yy = op_pvq_search_extra(X, iy, up_iy, K, up, refine, N, yy_shift);
590
      collapse_mask = extract_collapse_mask(up_iy, N, B);
591
      encode_pulses(iy, N, K, enc);
592
      use_entropy = (ext_enc->storage*8 - ec_tell(ext_enc)) > (unsigned)(N-1)*(extra_bits+3)+1;
593
      for (i=0;i<N-1;i++) ec_enc_refine(ext_enc, refine[i], up, extra_bits, use_entropy);
594
      if (iy[N-1]==0) ec_enc_bits(ext_enc, up_iy[N-1]<0, 1);
595
      if (resynth)
596
         normalise_residual(up_iy, X, N, yy, gain, yy_shift);
597
   } else
598
#endif
599
0
   {
600
0
      yy = op_pvq_search(X, iy, K, N, arch);
601
0
      collapse_mask = extract_collapse_mask(iy, N, B);
602
0
      encode_pulses(iy, N, K, enc);
603
0
      if (resynth)
604
0
         normalise_residual(iy, X, N, yy, gain, 0);
605
0
   }
606
607
0
   if (resynth)
608
0
      exp_rotation(X, N, -1, B, K, spread);
609
610
0
   RESTORE_STACK;
611
0
   return collapse_mask;
612
0
}
613
614
/** Decode pulse vector and combine the result with the pitch vector to produce
615
    the final normalised signal in the current band. */
616
unsigned alg_unquant(celt_norm *X, int N, int K, int spread, int B,
617
      ec_dec *dec, opus_val32 gain
618
      ARG_QEXT(ec_enc *ext_dec) ARG_QEXT(int extra_bits))
619
858k
{
620
858k
   opus_val32 Ryy;
621
858k
   unsigned collapse_mask;
622
858k
   VARDECL(int, iy);
623
858k
   int yy_shift=0;
624
858k
   SAVE_STACK;
625
626
858k
   celt_assert2(K>0, "alg_unquant() needs at least one pulse");
627
858k
   celt_assert2(N>1, "alg_unquant() needs at least two dimensions");
628
858k
   ALLOC(iy, N, int);
629
858k
   Ryy = decode_pulses(iy, N, K, dec);
630
#ifdef ENABLE_QEXT
631
   if (N==2 && extra_bits >= 2) {
632
      int up;
633
      int refine;
634
      yy_shift = IMAX(0, extra_bits-7);
635
      up = (1<<extra_bits)-1;
636
      refine = (opus_int32)ec_dec_uint(ext_dec, up) - (up-1)/2;
637
      iy[0] *= up;
638
      iy[1] *= up;
639
      if (iy[1] == 0) {
640
         iy[1] = (iy[0] > 0) ? -refine : refine;
641
         iy[0] += (refine*(opus_int64)iy[0] > 0) ? -refine : refine;
642
      } else if (iy[1] > 0) {
643
         iy[0] += refine;
644
         iy[1] -= refine*(iy[0]>0?1:-1);
645
      } else {
646
         iy[0] -= refine;
647
         iy[1] -= refine*(iy[0]>0?1:-1);
648
      }
649
#ifdef FIXED_POINT
650
      Ryy = (iy[0]*(opus_val64)iy[0] + iy[1]*(opus_val64)iy[1] + (1<<2*yy_shift>>1)) >> 2*yy_shift;
651
#else
652
      Ryy = iy[0]*(opus_val64)iy[0] + iy[1]*(opus_val64)iy[1];
653
#endif
654
   } else if (extra_bits >= 2) {
655
      int i;
656
      opus_val64 yy64;
657
      VARDECL(int, refine);
658
      int up, use_entropy;
659
      int sign=0;
660
      ALLOC(refine, N, int);
661
      yy_shift = IMAX(0, extra_bits-7);
662
      up = (1<<extra_bits)-1;
663
      use_entropy = (ext_dec->storage*8 - ec_tell(ext_dec)) > (unsigned)(N-1)*(extra_bits+3)+1;
664
      for (i=0;i<N-1;i++) refine[i] = ec_dec_refine(ext_dec, up, extra_bits, use_entropy);
665
      if (iy[N-1]==0) sign = ec_dec_bits(ext_dec, 1);
666
      else sign = iy[N-1] < 0;
667
      for (i=0;i<N-1;i++) {
668
         iy[i] = iy[i]*up + refine[i];
669
      }
670
      iy[N-1] = up*K;
671
      for (i=0;i<N-1;i++) iy[N-1] -= abs(iy[i]);
672
      if (sign) iy[N-1] = -iy[N-1];
673
      yy64 = 0;
674
      for (i=0;i<N;i++) yy64 += iy[i]*(opus_val64)iy[i];
675
#ifdef FIXED_POINT
676
      Ryy = (yy64 + (1<<2*yy_shift>>1)) >> 2*yy_shift;
677
#else
678
      Ryy = yy64;
679
#endif
680
   }
681
#endif
682
858k
   normalise_residual(iy, X, N, Ryy, gain, yy_shift);
683
858k
   exp_rotation(X, N, -1, B, K, spread);
684
858k
   collapse_mask = extract_collapse_mask(iy, N, B);
685
858k
   RESTORE_STACK;
686
858k
   return collapse_mask;
687
858k
}
688
689
#ifndef OVERRIDE_renormalise_vector
690
void renormalise_vector(celt_norm *X, int N, opus_val32 gain, int arch)
691
2.67M
{
692
2.67M
   int i;
693
#ifdef FIXED_POINT
694
   int k;
695
#endif
696
2.67M
   opus_val32 E;
697
2.67M
   opus_val16 g;
698
2.67M
   opus_val32 t;
699
2.67M
   celt_norm *xptr;
700
2.67M
   norm_scaledown(X, N, NORM_SHIFT-14);
701
2.67M
   E = EPSILON + celt_inner_prod_norm(X, X, N, arch);
702
#ifdef FIXED_POINT
703
   k = celt_ilog2(E)>>1;
704
#endif
705
2.67M
   t = VSHR32(E, 2*(k-7));
706
2.67M
   g = MULT32_32_Q31(celt_rsqrt_norm(t),gain);
707
708
2.67M
   xptr = X;
709
51.1M
   for (i=0;i<N;i++)
710
48.5M
   {
711
48.5M
      *xptr = EXTRACT16(PSHR32(MULT16_16(g, *xptr), k+15-14));
712
48.5M
      xptr++;
713
48.5M
   }
714
2.67M
   norm_scaleup(X, N, NORM_SHIFT-14);
715
   /*return celt_sqrt(E);*/
716
2.67M
}
717
#endif /* OVERRIDE_renormalise_vector */
718
719
opus_int32 stereo_itheta(const celt_norm *X, const celt_norm *Y, int stereo, int N, int arch)
720
0
{
721
0
   int i;
722
0
   int itheta;
723
0
   opus_val32 mid, side;
724
0
   opus_val32 Emid, Eside;
725
726
0
   Emid = Eside = 0;
727
0
   if (stereo)
728
0
   {
729
0
      for (i=0;i<N;i++)
730
0
      {
731
0
         celt_norm m, s;
732
0
         m = PSHR32(ADD32(X[i], Y[i]), NORM_SHIFT-13);
733
0
         s = PSHR32(SUB32(X[i], Y[i]), NORM_SHIFT-13);
734
0
         Emid = MAC16_16(Emid, m, m);
735
0
         Eside = MAC16_16(Eside, s, s);
736
0
      }
737
0
   } else {
738
0
      Emid += celt_inner_prod_norm_shift(X, X, N, arch);
739
0
      Eside += celt_inner_prod_norm_shift(Y, Y, N, arch);
740
0
   }
741
0
   mid = celt_sqrt32(Emid);
742
0
   side = celt_sqrt32(Eside);
743
#if defined(FIXED_POINT)
744
   itheta = celt_atan2p_norm(side, mid);
745
#else
746
0
   itheta = (int)floor(.5f+65536.f*16384*celt_atan2p_norm(side,mid));
747
0
#endif
748
749
0
   return itheta;
750
0
}
751
752
#ifdef ENABLE_QEXT
753
754
static void cubic_synthesis(celt_norm *X, int *iy, int N, int K, int face, int sign, opus_val32 gain) {
755
   int i;
756
   opus_val32 sum=0;
757
   opus_val32 mag;
758
#ifdef FIXED_POINT
759
   int sum_shift;
760
   int shift = IMAX(celt_ilog2(K) + celt_ilog2(N)/2 - 13, 0);
761
#endif
762
   for (i=0;i<N;i++) {
763
      X[i] = (1+2*iy[i])-K;
764
   }
765
   X[face] = sign ? -K : K;
766
   for (i=0;i<N;i++) {
767
      sum += PSHR32(MULT16_16(X[i],X[i]), 2*shift);
768
   }
769
#ifdef FIXED_POINT
770
   sum_shift = (29-celt_ilog2(sum))>>1;
771
   mag = celt_rsqrt_norm32(SHL32(sum, 2*sum_shift+1));
772
   for (i=0;i<N;i++) {
773
      X[i] = VSHR32(MULT16_32_Q15(X[i],MULT32_32_Q31(mag,gain)), shift-sum_shift+29-NORM_SHIFT);
774
   }
775
#else
776
   mag = 1.f/sqrt(sum);
777
   for (i=0;i<N;i++) {
778
      X[i] *= mag*gain;
779
   }
780
#endif
781
}
782
783
unsigned cubic_quant(celt_norm *X, int N, int res, int B, ec_enc *enc, opus_val32 gain, int resynth) {
784
   int i;
785
   int face=0;
786
   int K;
787
   VARDECL(int, iy);
788
   celt_norm faceval=-1;
789
   opus_val32 norm;
790
   int sign;
791
   SAVE_STACK;
792
   ALLOC(iy, N, int);
793
   K = 1<<res;
794
   /* Using odd K on transients to avoid adding pre-echo. */
795
   if (B!=1) K=IMAX(1, K-1);
796
   if (K==1) {
797
      if (resynth) OPUS_CLEAR(X, N);
798
      RESTORE_STACK;
799
      return 0;
800
   }
801
   for (i=0;i<N;i++) {
802
      if (ABS32(X[i]) > faceval) {
803
         faceval = ABS32(X[i]);
804
         face = i;
805
      }
806
   }
807
   sign = X[face]<0;
808
   ec_enc_uint(enc, face, N);
809
   ec_enc_bits(enc, sign, 1);
810
#ifdef FIXED_POINT
811
   if (faceval != 0) {
812
      int face_shift = 30-celt_ilog2(faceval);
813
      norm = celt_rcp_norm32(SHL32(faceval, face_shift));
814
      norm = MULT16_32_Q15(K, norm);
815
      for (i=0;i<N;i++) {
816
         /* By computing X[i]+faceval inside the shift, the result is guaranteed non-negative. */
817
         iy[i] = IMIN(K-1, (MULT32_32_Q31(SHL32(X[i]+faceval, face_shift-1), norm)) >> 15);
818
      }
819
   } else {
820
      OPUS_CLEAR(iy, N);
821
   }
822
#else
823
   norm = .5f*K/(faceval+EPSILON);
824
   for (i=0;i<N;i++) {
825
      iy[i] = IMIN(K-1, (int)floor((X[i]+faceval)*norm));
826
   }
827
#endif
828
   for (i=0;i<N;i++) {
829
      if (i != face) ec_enc_bits(enc, iy[i], res);
830
   }
831
   if (resynth) {
832
      cubic_synthesis(X, iy, N, K, face, sign, gain);
833
   }
834
   RESTORE_STACK;
835
   return (1<<B)-1;
836
}
837
838
unsigned cubic_unquant(celt_norm *X, int N, int res, int B, ec_dec *dec, opus_val32 gain) {
839
   int i;
840
   int face;
841
   int sign;
842
   int K;
843
   VARDECL(int, iy);
844
   SAVE_STACK;
845
   ALLOC(iy, N, int);
846
   K = 1<<res;
847
   /* Using odd K on transients to avoid adding pre-echo. */
848
   if (B!=1) K=IMAX(1, K-1);
849
   if (K==1) {
850
      OPUS_CLEAR(X, N);
851
      return 0;
852
   }
853
   face = ec_dec_uint(dec, N);
854
   sign = ec_dec_bits(dec, 1);
855
   for (i=0;i<N;i++) {
856
      if (i != face) iy[i] = ec_dec_bits(dec, res);
857
   }
858
   iy[face]=0;
859
   cubic_synthesis(X, iy, N, K, face, sign, gain);
860
   RESTORE_STACK;
861
   return (1<<B)-1;
862
}
863
#endif