Coverage Report

Created: 2025-12-31 07:57

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/opus/celt/vq.c
Line
Count
Source
1
/* Copyright (c) 2007-2008 CSIRO
2
   Copyright (c) 2007-2009 Xiph.Org Foundation
3
   Written by Jean-Marc Valin */
4
/*
5
   Redistribution and use in source and binary forms, with or without
6
   modification, are permitted provided that the following conditions
7
   are met:
8
9
   - Redistributions of source code must retain the above copyright
10
   notice, this list of conditions and the following disclaimer.
11
12
   - Redistributions in binary form must reproduce the above copyright
13
   notice, this list of conditions and the following disclaimer in the
14
   documentation and/or other materials provided with the distribution.
15
16
   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17
   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18
   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
19
   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
20
   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
21
   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
22
   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
23
   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
24
   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
25
   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
26
   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27
*/
28
29
#ifdef HAVE_CONFIG_H
30
#include "config.h"
31
#endif
32
33
#include "mathops.h"
34
#include "cwrs.h"
35
#include "vq.h"
36
#include "arch.h"
37
#include "os_support.h"
38
#include "bands.h"
39
#include "rate.h"
40
#include "pitch.h"
41
#include "SigProc_FIX.h"
42
43
#if defined(FIXED_POINT)
44
void norm_scaleup(celt_norm *X, int N, int shift) {
45
   int i;
46
   celt_assert(shift >= 0);
47
   if (shift <= 0) return;
48
   for (i=0;i<N;i++) X[i] = SHL32(X[i], shift);
49
}
50
51
void norm_scaledown(celt_norm *X, int N, int shift) {
52
   int i;
53
   celt_assert(shift >= 0);
54
   if (shift <= 0) return;
55
   for (i=0;i<N;i++) X[i] = PSHR32(X[i], shift);
56
}
57
58
opus_val32 celt_inner_prod_norm(const celt_norm *x, const celt_norm *y, int len, int arch) {
59
   int i;
60
   opus_val32 sum = 0;
61
   (void)arch;
62
   for (i=0;i<len;i++) sum += x[i]*y[i];
63
   return sum;
64
}
65
opus_val32 celt_inner_prod_norm_shift(const celt_norm *x, const celt_norm *y, int len, int arch) {
66
   int i;
67
   opus_val64 sum = 0;
68
   (void)arch;
69
   for (i=0;i<len;i++) sum += x[i]*(opus_val64)y[i];
70
   return sum>>2*(NORM_SHIFT-14);
71
}
72
#endif
73
74
#ifndef OVERRIDE_vq_exp_rotation1
75
static void exp_rotation1(celt_norm *X, int len, int stride, opus_val16 c, opus_val16 s)
76
189k
{
77
189k
   int i;
78
189k
   opus_val16 ms;
79
189k
   celt_norm *Xptr;
80
189k
   Xptr = X;
81
189k
   ms = NEG16(s);
82
189k
   norm_scaledown(X, len, NORM_SHIFT-14);
83
1.14M
   for (i=0;i<len-stride;i++)
84
950k
   {
85
950k
      celt_norm x1, x2;
86
950k
      x1 = Xptr[0];
87
950k
      x2 = Xptr[stride];
88
950k
      Xptr[stride] = EXTRACT16(PSHR32(MAC16_16(MULT16_16(c, x2),  s, x1), 15));
89
950k
      *Xptr++      = EXTRACT16(PSHR32(MAC16_16(MULT16_16(c, x1), ms, x2), 15));
90
950k
   }
91
189k
   Xptr = &X[len-2*stride-1];
92
908k
   for (i=len-2*stride-1;i>=0;i--)
93
718k
   {
94
718k
      celt_norm x1, x2;
95
718k
      x1 = Xptr[0];
96
718k
      x2 = Xptr[stride];
97
718k
      Xptr[stride] = EXTRACT16(PSHR32(MAC16_16(MULT16_16(c, x2),  s, x1), 15));
98
718k
      *Xptr--      = EXTRACT16(PSHR32(MAC16_16(MULT16_16(c, x1), ms, x2), 15));
99
718k
   }
100
189k
   norm_scaleup(X, len, NORM_SHIFT-14);
101
189k
}
102
#endif /* OVERRIDE_vq_exp_rotation1 */
103
104
void exp_rotation(celt_norm *X, int len, int dir, int stride, int K, int spread)
105
863k
{
106
863k
   static const int SPREAD_FACTOR[3]={15,10,5};
107
863k
   int i;
108
863k
   opus_val16 c, s;
109
863k
   opus_val16 gain, theta;
110
863k
   int stride2=0;
111
863k
   int factor;
112
113
863k
   if (2*K>=len || spread==SPREAD_NONE)
114
790k
      return;
115
72.9k
   factor = SPREAD_FACTOR[spread-1];
116
117
72.9k
   gain = celt_div((opus_val32)MULT16_16(Q15_ONE,len),(opus_val32)(len+factor*K));
118
72.9k
   theta = HALF16(MULT16_16_Q15(gain,gain));
119
120
72.9k
   c = celt_cos_norm(EXTEND32(theta));
121
72.9k
   s = celt_cos_norm(EXTEND32(SUB16(Q15ONE,theta))); /*  sin(theta) */
122
123
72.9k
   if (len>=8*stride)
124
34.5k
   {
125
34.5k
      stride2 = 1;
126
      /* This is just a simple (equivalent) way of computing sqrt(len/stride) with rounding.
127
         It's basically incrementing long as (stride2+0.5)^2 < len/stride. */
128
124k
      while ((stride2*stride2+stride2)*stride + (stride>>2) < len)
129
89.7k
         stride2++;
130
34.5k
   }
131
   /*NOTE: As a minor optimization, we could be passing around log2(B), not B, for both this and for
132
      extract_collapse_mask().*/
133
72.9k
   len = celt_udiv(len, stride);
134
225k
   for (i=0;i<stride;i++)
135
152k
   {
136
152k
      if (dir < 0)
137
152k
      {
138
152k
         if (stride2)
139
37.5k
            exp_rotation1(X+i*len, len, stride2, s, c);
140
152k
         exp_rotation1(X+i*len, len, 1, c, s);
141
152k
      } else {
142
0
         exp_rotation1(X+i*len, len, 1, c, -s);
143
0
         if (stride2)
144
0
            exp_rotation1(X+i*len, len, stride2, s, -c);
145
0
      }
146
152k
   }
147
72.9k
}
148
149
/** Normalizes the decoded integer pvq codeword to unit norm. */
150
static void normalise_residual(int * OPUS_RESTRICT iy, celt_norm * OPUS_RESTRICT X,
151
      int N, opus_val32 Ryy, opus_val32 gain, int shift)
152
863k
{
153
863k
   int i;
154
#ifdef FIXED_POINT
155
   int k;
156
#endif
157
863k
   opus_val32 t;
158
863k
   opus_val32 g;
159
160
#ifdef FIXED_POINT
161
   k = celt_ilog2(Ryy)>>1;
162
#endif
163
863k
   t = VSHR32(Ryy, 2*(k-7)-15);
164
863k
   g = MULT32_32_Q31(celt_rsqrt_norm32(t),gain);
165
863k
   i=0;
166
863k
   (void)shift;
167
#if defined(FIXED_POINT) && defined(ENABLE_QEXT)
168
   if (shift>0) {
169
      int tot_shift = NORM_SHIFT+1-k-shift;
170
      if (tot_shift >= 0) {
171
         do X[i] = MULT32_32_Q31(g, SHL32(iy[i], tot_shift));
172
         while (++i < N);
173
      } else {
174
         do X[i] = MULT32_32_Q31(g, PSHR32(iy[i], -tot_shift));
175
         while (++i < N);
176
      }
177
   } else
178
#endif
179
3.88M
   do X[i] = VSHR32(MULT16_32_Q15(iy[i], g), k+15-NORM_SHIFT);
180
3.88M
   while (++i < N);
181
863k
}
182
183
static unsigned extract_collapse_mask(int *iy, int N, int B)
184
863k
{
185
863k
   unsigned collapse_mask;
186
863k
   int N0;
187
863k
   int i;
188
863k
   if (B<=1)
189
680k
      return 1;
190
   /*NOTE: As a minor optimization, we could be passing around log2(B), not B, for both this and for
191
      exp_rotation().*/
192
182k
   N0 = celt_udiv(N, B);
193
182k
   collapse_mask = 0;
194
581k
   i=0; do {
195
581k
      int j;
196
581k
      unsigned tmp=0;
197
1.01M
      j=0; do {
198
1.01M
         tmp |= iy[i*N0+j];
199
1.01M
      } while (++j<N0);
200
581k
      collapse_mask |= (tmp!=0)<<i;
201
581k
   } while (++i<B);
202
182k
   return collapse_mask;
203
863k
}
204
205
opus_val16 op_pvq_search_c(celt_norm *X, int *iy, int K, int N, int arch)
206
0
{
207
0
   VARDECL(celt_norm, y);
208
0
   VARDECL(int, signx);
209
0
   int i, j;
210
0
   int pulsesLeft;
211
0
   opus_val32 sum;
212
0
   opus_val32 xy;
213
0
   opus_val16 yy;
214
0
   SAVE_STACK;
215
216
0
   (void)arch;
217
0
   ALLOC(y, N, celt_norm);
218
0
   ALLOC(signx, N, int);
219
#ifdef FIXED_POINT
220
   {
221
      int shift = (celt_ilog2(1+celt_inner_prod_norm_shift(X, X, N, arch))+1)/2;
222
      shift = IMAX(0, shift+(NORM_SHIFT-14)-14);
223
      norm_scaledown(X, N, shift);
224
   }
225
#endif
226
   /* Get rid of the sign */
227
0
   sum = 0;
228
0
   j=0; do {
229
0
      signx[j] = X[j]<0;
230
      /* OPT: Make sure the compiler doesn't use a branch on ABS16(). */
231
0
      X[j] = ABS16(X[j]);
232
0
      iy[j] = 0;
233
0
      y[j] = 0;
234
0
   } while (++j<N);
235
236
0
   xy = yy = 0;
237
238
0
   pulsesLeft = K;
239
240
   /* Do a pre-search by projecting on the pyramid */
241
0
   if (K > (N>>1))
242
0
   {
243
0
      opus_val16 rcp;
244
0
      j=0; do {
245
0
         sum += X[j];
246
0
      }  while (++j<N);
247
248
      /* If X is too small, just replace it with a pulse at 0 */
249
#ifdef FIXED_POINT
250
      if (sum <= K)
251
#else
252
      /* Prevents infinities and NaNs from causing too many pulses
253
         to be allocated. 64 is an approximation of infinity here. */
254
0
      if (!(sum > EPSILON && sum < 64))
255
0
#endif
256
0
      {
257
0
         X[0] = QCONST16(1.f,14);
258
0
         j=1; do
259
0
            X[j]=0;
260
0
         while (++j<N);
261
0
         sum = QCONST16(1.f,14);
262
0
      }
263
#ifdef FIXED_POINT
264
      rcp = EXTRACT16(MULT16_32_Q16(K, celt_rcp(sum)));
265
#else
266
      /* Using K+e with e < 1 guarantees we cannot get more than K pulses. */
267
0
      rcp = EXTRACT16(MULT16_32_Q16(K+0.8f, celt_rcp(sum)));
268
0
#endif
269
0
      j=0; do {
270
#ifdef FIXED_POINT
271
         /* It's really important to round *towards zero* here */
272
         iy[j] = MULT16_16_Q15(X[j],rcp);
273
#else
274
0
         iy[j] = (int)floor(rcp*X[j]);
275
0
#endif
276
0
         y[j] = (celt_norm)iy[j];
277
0
         yy = MAC16_16(yy, y[j],y[j]);
278
0
         xy = MAC16_16(xy, X[j],y[j]);
279
0
         y[j] *= 2;
280
0
         pulsesLeft -= iy[j];
281
0
      }  while (++j<N);
282
0
   }
283
0
   celt_sig_assert(pulsesLeft>=0);
284
285
   /* This should never happen, but just in case it does (e.g. on silence)
286
      we fill the first bin with pulses. */
287
#ifdef FIXED_POINT_DEBUG
288
   celt_sig_assert(pulsesLeft<=N+3);
289
#endif
290
0
   if (pulsesLeft > N+3)
291
0
   {
292
0
      opus_val16 tmp = (opus_val16)pulsesLeft;
293
0
      yy = MAC16_16(yy, tmp, tmp);
294
0
      yy = MAC16_16(yy, tmp, y[0]);
295
0
      iy[0] += pulsesLeft;
296
0
      pulsesLeft=0;
297
0
   }
298
299
0
   for (i=0;i<pulsesLeft;i++)
300
0
   {
301
0
      opus_val16 Rxy, Ryy;
302
0
      int best_id;
303
0
      opus_val32 best_num;
304
0
      opus_val16 best_den;
305
#ifdef FIXED_POINT
306
      int rshift;
307
#endif
308
#ifdef FIXED_POINT
309
      rshift = 1+celt_ilog2(K-pulsesLeft+i+1);
310
#endif
311
0
      best_id = 0;
312
      /* The squared magnitude term gets added anyway, so we might as well
313
         add it outside the loop */
314
0
      yy = ADD16(yy, 1);
315
316
      /* Calculations for position 0 are out of the loop, in part to reduce
317
         mispredicted branches (since the if condition is usually false)
318
         in the loop. */
319
      /* Temporary sums of the new pulse(s) */
320
0
      Rxy = EXTRACT16(SHR32(ADD32(xy, EXTEND32(X[0])),rshift));
321
      /* We're multiplying y[j] by two so we don't have to do it here */
322
0
      Ryy = ADD16(yy, y[0]);
323
324
      /* Approximate score: we maximise Rxy/sqrt(Ryy) (we're guaranteed that
325
         Rxy is positive because the sign is pre-computed) */
326
0
      Rxy = MULT16_16_Q15(Rxy,Rxy);
327
0
      best_den = Ryy;
328
0
      best_num = Rxy;
329
0
      j=1;
330
0
      do {
331
         /* Temporary sums of the new pulse(s) */
332
0
         Rxy = EXTRACT16(SHR32(ADD32(xy, EXTEND32(X[j])),rshift));
333
         /* We're multiplying y[j] by two so we don't have to do it here */
334
0
         Ryy = ADD16(yy, y[j]);
335
336
         /* Approximate score: we maximise Rxy/sqrt(Ryy) (we're guaranteed that
337
            Rxy is positive because the sign is pre-computed) */
338
0
         Rxy = MULT16_16_Q15(Rxy,Rxy);
339
         /* The idea is to check for num/den >= best_num/best_den, but that way
340
            we can do it without any division */
341
         /* OPT: It's not clear whether a cmov is faster than a branch here
342
            since the condition is more often false than true and using
343
            a cmov introduces data dependencies across iterations. The optimal
344
            choice may be architecture-dependent. */
345
0
         if (opus_unlikely(MULT16_16(best_den, Rxy) > MULT16_16(Ryy, best_num)))
346
0
         {
347
0
            best_den = Ryy;
348
0
            best_num = Rxy;
349
0
            best_id = j;
350
0
         }
351
0
      } while (++j<N);
352
353
      /* Updating the sums of the new pulse(s) */
354
0
      xy = ADD32(xy, EXTEND32(X[best_id]));
355
      /* We're multiplying y[j] by two so we don't have to do it here */
356
0
      yy = ADD16(yy, y[best_id]);
357
358
      /* Only now that we've made the final choice, update y/iy */
359
      /* Multiplying y[j] by 2 so we don't have to do it everywhere else */
360
0
      y[best_id] += 2;
361
0
      iy[best_id]++;
362
0
   }
363
364
   /* Put the original sign back */
365
0
   j=0;
366
0
   do {
367
      /*iy[j] = signx[j] ? -iy[j] : iy[j];*/
368
      /* OPT: The is more likely to be compiled without a branch than the code above
369
         but has the same performance otherwise. */
370
0
      iy[j] = (iy[j]^-signx[j]) + signx[j];
371
0
   } while (++j<N);
372
0
   RESTORE_STACK;
373
0
   return yy;
374
0
}
375
376
#ifdef ENABLE_QEXT
377
#include "macros.h"
378
379
static opus_val32 op_pvq_search_N2(const celt_norm *X, int *iy, int *up_iy, int K, int up, int *refine, int shift) {
380
   opus_val32 sum;
381
   opus_val32 rcp_sum;
382
   int offset;
383
   sum = ABS32(X[0]) + ABS32(X[1]);
384
   if (sum < EPSILON) {
385
      iy[0] = K;
386
      up_iy[0] = up*K;
387
      iy[1]=up_iy[1]=0;
388
      *refine=0;
389
#ifdef FIXED_POINT
390
      return (opus_val64)K*K*up*up>>2*shift;
391
#else
392
      (void)shift;
393
      return K*(float)K*up*up;
394
#endif
395
   }
396
#ifdef FIXED_POINT
397
   int sum_shift;
398
   opus_val32 X0;
399
   sum_shift = 30-celt_ilog2(sum);
400
   rcp_sum = celt_rcp_norm32(SHL32(sum, sum_shift));
401
   X0 = MULT32_32_Q31(SHL32(X[0], sum_shift), rcp_sum);
402
   iy[0] = PSHR32(MULT32_32_Q31(SHL32(K, 8), X0), 7);
403
   up_iy[0] = PSHR32(MULT32_32_Q31(SHL32(up*K, 8), X0), 7);
404
#else
405
   rcp_sum = 1.f/sum;
406
   iy[0] = (int)floor(.5f+K*X[0]*rcp_sum);
407
   up_iy[0] = (int)floor(.5f+up*K*X[0]*rcp_sum);
408
#endif
409
   up_iy[0] = IMAX(up*iy[0] - (up-1)/2, IMIN(up*iy[0] + (up-1)/2, up_iy[0]));
410
   offset = up_iy[0] - up*iy[0];
411
   iy[1] = K-abs(iy[0]);
412
   up_iy[1] = up*K-abs(up_iy[0]);
413
   if (X[1] < 0) {
414
      iy[1] = -iy[1];
415
      up_iy[1] = -up_iy[1];
416
      offset = -offset;
417
   }
418
   *refine = offset;
419
#ifdef FIXED_POINT
420
   return (up_iy[0]*(opus_val64)up_iy[0] + up_iy[1]*(opus_val64)up_iy[1] + (1<<2*shift>>1))>>2*shift;
421
#else
422
   return up_iy[0]*(opus_val64)up_iy[0] + up_iy[1]*(opus_val64)up_iy[1];
423
#endif
424
}
425
426
static int op_pvq_refine(const opus_val32 *Xn, int *iy, int *iy0, int K, int up, int margin, int N) {
427
   int i;
428
   int dir;
429
   VARDECL(opus_val32, rounding);
430
   int iysum = 0;
431
   SAVE_STACK;
432
   ALLOC(rounding, N, opus_val32);
433
   for (i=0;i<N;i++) {
434
      opus_val32 tmp;
435
      tmp = MULT32_32_Q31(SHL32(K, 8), Xn[i]);
436
#ifdef FIXED_POINT
437
      iy[i] = (tmp+64) >> 7;
438
#else
439
      iy[i] = (int)floor(.5+tmp);
440
#endif
441
      rounding[i] = tmp - SHL32(iy[i], 7);
442
   }
443
   if (iy != iy0) {
444
      for (i=0;i<N;i++) iy[i] = IMIN(up*iy0[i]+up-1, IMAX(up*iy0[i]-up+1, iy[i]));
445
   }
446
   for (i=0;i<N;i++) iysum += iy[i];
447
   if (abs(iysum - K) > 32) {
448
      RESTORE_STACK;
449
      return 1;
450
   }
451
   dir = iysum < K ? 1 : -1;
452
   while (iysum != K) {
453
      opus_val32 roundval=-1000000*dir;
454
      int roundpos=0;
455
      for (i=0;i<N;i++) {
456
         if ((rounding[i]-roundval)*dir > 0 && abs(iy[i]-up*iy0[i]) < (margin-1) && !(dir==-1 && iy[i] == 0)) {
457
            roundval = rounding[i];
458
            roundpos = i;
459
         }
460
      }
461
      iy[roundpos] += dir;
462
      rounding[roundpos] -= SHL32(dir, 15);
463
      iysum+=dir;
464
   }
465
   RESTORE_STACK;
466
   return 0;
467
}
468
469
static opus_val32 op_pvq_search_extra(const celt_norm *X, int *iy, int *up_iy, int K, int up, int *refine, int N, int shift) {
470
   opus_val32 rcp_sum;
471
   opus_val32 sum=0;
472
   int i;
473
   int failed=0;
474
   opus_val64 yy=0;
475
   VARDECL(opus_val32, Xn);
476
   SAVE_STACK;
477
   for (i=0;i<N;i++) sum += ABS32(X[i]);
478
   ALLOC(Xn, N, opus_val32);
479
   if (sum < EPSILON)
480
      failed = 1;
481
   else {
482
#ifdef FIXED_POINT
483
      int sum_shift = 30-celt_ilog2(sum);
484
      rcp_sum = celt_rcp_norm32(SHL32(sum, sum_shift));
485
      for (i=0;i<N;i++) {
486
         Xn[i] = MULT32_32_Q31(SHL32(ABS32(X[i]), sum_shift), rcp_sum);
487
      }
488
#else
489
      rcp_sum = celt_rcp(sum);
490
      for (i=0;i<N;i++) {
491
         Xn[i] = ABS32(X[i])*rcp_sum;
492
      }
493
#endif
494
   }
495
   failed = failed || op_pvq_refine(Xn, iy, iy, K, 1, K+1, N);
496
   failed = failed || op_pvq_refine(Xn, up_iy, iy, up*K, up, up, N);
497
   if (failed) {
498
      iy[0] = K;
499
      for (i=1;i<N;i++) iy[i] = 0;
500
      up_iy[0] = up*K;
501
      for (i=1;i<N;i++) up_iy[i] = 0;
502
   }
503
   for (i=0;i<N;i++) {
504
      yy += up_iy[i]*(opus_val64)up_iy[i];
505
      if (X[i] < 0) {
506
         iy[i] = -iy[i];
507
         up_iy[i] = -up_iy[i];
508
      }
509
      refine[i] = up_iy[i]-up*iy[i];
510
   }
511
   RESTORE_STACK;
512
#ifdef FIXED_POINT
513
   return (yy + (1<<2*shift>>1))>>2*shift;
514
#else
515
   (void)shift;
516
   return yy;
517
#endif
518
}
519
#endif
520
521
#ifdef ENABLE_QEXT
522
/* Take advantage of the fact that "large" refine values are much less likely
523
   than smaller ones. */
524
static void ec_enc_refine(ec_enc *enc, opus_int32 refine, opus_int32 up, int extra_bits, int use_entropy) {
525
   int large;
526
   large = abs(refine)>up/2;
527
   ec_enc_bit_logp(enc, large, use_entropy ? 3 : 1);
528
   if (large) {
529
      ec_enc_bits(enc, refine < 0, 1);
530
      ec_enc_bits(enc, abs(refine)-up/2-1, extra_bits-1);
531
   } else {
532
      ec_enc_bits(enc, refine+up/2, extra_bits);
533
   }
534
}
535
536
static int ec_dec_refine(ec_enc *dec, opus_int32 up, int extra_bits, int use_entropy) {
537
   int large, refine;
538
   large = ec_dec_bit_logp(dec, use_entropy ? 3 : 1);
539
   if (large) {
540
      int sign = ec_dec_bits(dec, 1);
541
      refine = ec_dec_bits(dec, extra_bits-1) + up/2+1;
542
      if (sign) refine = -refine;
543
   } else {
544
      refine = (opus_int32)ec_dec_bits(dec, extra_bits)-up/2;
545
   }
546
   return refine;
547
}
548
#endif
549
550
unsigned alg_quant(celt_norm *X, int N, int K, int spread, int B, ec_enc *enc,
551
      opus_val32 gain, int resynth
552
      ARG_QEXT(ec_enc *ext_enc) ARG_QEXT(int extra_bits), int arch)
553
0
{
554
0
   VARDECL(int, iy);
555
0
   opus_val32 yy;
556
0
   unsigned collapse_mask;
557
#ifdef ENABLE_QEXT
558
   int yy_shift = 0;
559
#endif
560
0
   SAVE_STACK;
561
562
0
   celt_assert2(K>0, "alg_quant() needs at least one pulse");
563
0
   celt_assert2(N>1, "alg_quant() needs at least two dimensions");
564
565
   /* Covers vectorization by up to 4. */
566
0
   ALLOC(iy, N+3, int);
567
568
0
   exp_rotation(X, N, 1, B, K, spread);
569
570
#ifdef ENABLE_QEXT
571
   if (N==2 && extra_bits >= 2) {
572
      int refine;
573
      int up_iy[2];
574
      int up;
575
      yy_shift = IMAX(0, extra_bits-7);
576
      up = (1<<extra_bits)-1;
577
      yy = op_pvq_search_N2(X, iy, up_iy, K, up, &refine, yy_shift);
578
      collapse_mask = extract_collapse_mask(up_iy, N, B);
579
      encode_pulses(iy, N, K, enc);
580
      ec_enc_uint(ext_enc, refine+(up-1)/2, up);
581
      if (resynth)
582
         normalise_residual(up_iy, X, N, yy, gain, yy_shift);
583
   } else if (extra_bits >= 2) {
584
      int i;
585
      VARDECL(int, up_iy);
586
      VARDECL(int, refine);
587
      int up, use_entropy;
588
      ALLOC(up_iy, N, int);
589
      ALLOC(refine, N, int);
590
      yy_shift = IMAX(0, extra_bits-7);
591
      up = (1<<extra_bits)-1;
592
      yy = op_pvq_search_extra(X, iy, up_iy, K, up, refine, N, yy_shift);
593
      collapse_mask = extract_collapse_mask(up_iy, N, B);
594
      encode_pulses(iy, N, K, enc);
595
      use_entropy = (ext_enc->storage*8 - ec_tell(ext_enc)) > (unsigned)(N-1)*(extra_bits+3)+1;
596
      for (i=0;i<N-1;i++) ec_enc_refine(ext_enc, refine[i], up, extra_bits, use_entropy);
597
      if (iy[N-1]==0) ec_enc_bits(ext_enc, up_iy[N-1]<0, 1);
598
      if (resynth)
599
         normalise_residual(up_iy, X, N, yy, gain, yy_shift);
600
   } else
601
#endif
602
0
   {
603
0
      yy = op_pvq_search(X, iy, K, N, arch);
604
0
      collapse_mask = extract_collapse_mask(iy, N, B);
605
0
      encode_pulses(iy, N, K, enc);
606
0
      if (resynth)
607
0
         normalise_residual(iy, X, N, yy, gain, 0);
608
0
   }
609
610
0
   if (resynth)
611
0
      exp_rotation(X, N, -1, B, K, spread);
612
613
0
   RESTORE_STACK;
614
0
   return collapse_mask;
615
0
}
616
617
/** Decode pulse vector and combine the result with the pitch vector to produce
618
    the final normalised signal in the current band. */
619
unsigned alg_unquant(celt_norm *X, int N, int K, int spread, int B,
620
      ec_dec *dec, opus_val32 gain
621
      ARG_QEXT(ec_enc *ext_dec) ARG_QEXT(int extra_bits))
622
863k
{
623
863k
   opus_val32 Ryy;
624
863k
   unsigned collapse_mask;
625
863k
   VARDECL(int, iy);
626
863k
   int yy_shift=0;
627
863k
   SAVE_STACK;
628
629
863k
   celt_assert2(K>0, "alg_unquant() needs at least one pulse");
630
863k
   celt_assert2(N>1, "alg_unquant() needs at least two dimensions");
631
863k
   ALLOC(iy, N, int);
632
863k
   Ryy = decode_pulses(iy, N, K, dec);
633
#ifdef ENABLE_QEXT
634
   if (N==2 && extra_bits >= 2) {
635
      int up;
636
      int refine;
637
      yy_shift = IMAX(0, extra_bits-7);
638
      up = (1<<extra_bits)-1;
639
      refine = (opus_int32)ec_dec_uint(ext_dec, up) - (up-1)/2;
640
      iy[0] *= up;
641
      iy[1] *= up;
642
      if (iy[1] == 0) {
643
         iy[1] = (iy[0] > 0) ? -refine : refine;
644
         iy[0] += (refine*(opus_int64)iy[0] > 0) ? -refine : refine;
645
      } else if (iy[1] > 0) {
646
         iy[0] += refine;
647
         iy[1] -= refine*(iy[0]>0?1:-1);
648
      } else {
649
         iy[0] -= refine;
650
         iy[1] -= refine*(iy[0]>0?1:-1);
651
      }
652
#ifdef FIXED_POINT
653
      Ryy = (iy[0]*(opus_val64)iy[0] + iy[1]*(opus_val64)iy[1] + (1<<2*yy_shift>>1)) >> 2*yy_shift;
654
#else
655
      Ryy = iy[0]*(opus_val64)iy[0] + iy[1]*(opus_val64)iy[1];
656
#endif
657
   } else if (extra_bits >= 2) {
658
      int i;
659
      opus_val64 yy64;
660
      VARDECL(int, refine);
661
      int up, use_entropy;
662
      int sign=0;
663
      ALLOC(refine, N, int);
664
      yy_shift = IMAX(0, extra_bits-7);
665
      up = (1<<extra_bits)-1;
666
      use_entropy = (ext_dec->storage*8 - ec_tell(ext_dec)) > (unsigned)(N-1)*(extra_bits+3)+1;
667
      for (i=0;i<N-1;i++) refine[i] = ec_dec_refine(ext_dec, up, extra_bits, use_entropy);
668
      if (iy[N-1]==0) sign = ec_dec_bits(ext_dec, 1);
669
      else sign = iy[N-1] < 0;
670
      for (i=0;i<N-1;i++) {
671
         iy[i] = iy[i]*up + refine[i];
672
      }
673
      iy[N-1] = up*K;
674
      for (i=0;i<N-1;i++) iy[N-1] -= abs(iy[i]);
675
      if (sign) iy[N-1] = -iy[N-1];
676
      yy64 = 0;
677
      for (i=0;i<N;i++) yy64 += iy[i]*(opus_val64)iy[i];
678
#ifdef FIXED_POINT
679
      Ryy = (yy64 + (1<<2*yy_shift>>1)) >> 2*yy_shift;
680
#else
681
      Ryy = yy64;
682
#endif
683
   }
684
#endif
685
863k
   normalise_residual(iy, X, N, Ryy, gain, yy_shift);
686
863k
   exp_rotation(X, N, -1, B, K, spread);
687
863k
   collapse_mask = extract_collapse_mask(iy, N, B);
688
863k
   RESTORE_STACK;
689
863k
   return collapse_mask;
690
863k
}
691
692
#ifndef OVERRIDE_renormalise_vector
693
void renormalise_vector(celt_norm *X, int N, opus_val32 gain, int arch)
694
2.96M
{
695
2.96M
   int i;
696
#ifdef FIXED_POINT
697
   int k;
698
#endif
699
2.96M
   opus_val32 E;
700
2.96M
   opus_val16 g;
701
2.96M
   opus_val32 t;
702
2.96M
   celt_norm *xptr;
703
2.96M
   norm_scaledown(X, N, NORM_SHIFT-14);
704
2.96M
   E = EPSILON + celt_inner_prod_norm(X, X, N, arch);
705
#ifdef FIXED_POINT
706
   k = celt_ilog2(E)>>1;
707
#endif
708
2.96M
   t = VSHR32(E, 2*(k-7));
709
2.96M
   g = MULT32_32_Q31(celt_rsqrt_norm(t),gain);
710
711
2.96M
   xptr = X;
712
62.8M
   for (i=0;i<N;i++)
713
59.8M
   {
714
59.8M
      *xptr = EXTRACT16(PSHR32(MULT16_16(g, *xptr), k+15-14));
715
59.8M
      xptr++;
716
59.8M
   }
717
2.96M
   norm_scaleup(X, N, NORM_SHIFT-14);
718
   /*return celt_sqrt(E);*/
719
2.96M
}
720
#endif /* OVERRIDE_renormalise_vector */
721
722
opus_int32 stereo_itheta(const celt_norm *X, const celt_norm *Y, int stereo, int N, int arch)
723
0
{
724
0
   int i;
725
0
   int itheta;
726
0
   opus_val32 mid, side;
727
0
   opus_val32 Emid, Eside;
728
729
0
   Emid = Eside = 0;
730
0
   if (stereo)
731
0
   {
732
0
      for (i=0;i<N;i++)
733
0
      {
734
0
         celt_norm m, s;
735
0
         m = PSHR32(ADD32(X[i], Y[i]), NORM_SHIFT-13);
736
0
         s = PSHR32(SUB32(X[i], Y[i]), NORM_SHIFT-13);
737
0
         Emid = MAC16_16(Emid, m, m);
738
0
         Eside = MAC16_16(Eside, s, s);
739
0
      }
740
0
   } else {
741
0
      Emid += celt_inner_prod_norm_shift(X, X, N, arch);
742
0
      Eside += celt_inner_prod_norm_shift(Y, Y, N, arch);
743
0
   }
744
0
   mid = celt_sqrt32(Emid);
745
0
   side = celt_sqrt32(Eside);
746
#if defined(FIXED_POINT)
747
   itheta = celt_atan2p_norm(side, mid);
748
#else
749
0
   itheta = (int)floor(.5f+65536.f*16384*celt_atan2p_norm(side,mid));
750
0
#endif
751
752
0
   return itheta;
753
0
}
754
755
#ifdef ENABLE_QEXT
756
757
static void cubic_synthesis(celt_norm *X, int *iy, int N, int K, int face, int sign, opus_val32 gain) {
758
   int i;
759
   opus_val32 sum=0;
760
   opus_val32 mag;
761
#ifdef FIXED_POINT
762
   int sum_shift;
763
   int shift = IMAX(celt_ilog2(K) + celt_ilog2(N)/2 - 13, 0);
764
#endif
765
   for (i=0;i<N;i++) {
766
      X[i] = (1+2*iy[i])-K;
767
   }
768
   X[face] = sign ? -K : K;
769
   for (i=0;i<N;i++) {
770
      sum += PSHR32(MULT16_16(X[i],X[i]), 2*shift);
771
   }
772
#ifdef FIXED_POINT
773
   sum_shift = (29-celt_ilog2(sum))>>1;
774
   mag = celt_rsqrt_norm32(SHL32(sum, 2*sum_shift+1));
775
   for (i=0;i<N;i++) {
776
      X[i] = VSHR32(MULT16_32_Q15(X[i],MULT32_32_Q31(mag,gain)), shift-sum_shift+29-NORM_SHIFT);
777
   }
778
#else
779
   mag = 1.f/sqrt(sum);
780
   for (i=0;i<N;i++) {
781
      X[i] *= mag*gain;
782
   }
783
#endif
784
}
785
786
unsigned cubic_quant(celt_norm *X, int N, int res, int B, ec_enc *enc, opus_val32 gain, int resynth) {
787
   int i;
788
   int face=0;
789
   int K;
790
   VARDECL(int, iy);
791
   celt_norm faceval=-1;
792
   opus_val32 norm;
793
   int sign;
794
   SAVE_STACK;
795
   ALLOC(iy, N, int);
796
   K = 1<<res;
797
   /* Using odd K on transients to avoid adding pre-echo. */
798
   if (B!=1) K=IMAX(1, K-1);
799
   if (K==1) {
800
      if (resynth) OPUS_CLEAR(X, N);
801
      RESTORE_STACK;
802
      return 0;
803
   }
804
   for (i=0;i<N;i++) {
805
      if (ABS32(X[i]) > faceval) {
806
         faceval = ABS32(X[i]);
807
         face = i;
808
      }
809
   }
810
   sign = X[face]<0;
811
   ec_enc_uint(enc, face, N);
812
   ec_enc_bits(enc, sign, 1);
813
#ifdef FIXED_POINT
814
   if (faceval != 0) {
815
      int face_shift = 30-celt_ilog2(faceval);
816
      norm = celt_rcp_norm32(SHL32(faceval, face_shift));
817
      norm = MULT16_32_Q15(K, norm);
818
      for (i=0;i<N;i++) {
819
         /* By computing X[i]+faceval inside the shift, the result is guaranteed non-negative. */
820
         iy[i] = IMIN(K-1, (MULT32_32_Q31(SHL32(X[i]+faceval, face_shift-1), norm)) >> 15);
821
      }
822
   } else {
823
      OPUS_CLEAR(iy, N);
824
   }
825
#else
826
   norm = .5f*K/(faceval+EPSILON);
827
   for (i=0;i<N;i++) {
828
      iy[i] = IMIN(K-1, (int)floor((X[i]+faceval)*norm));
829
   }
830
#endif
831
   for (i=0;i<N;i++) {
832
      if (i != face) ec_enc_bits(enc, iy[i], res);
833
   }
834
   if (resynth) {
835
      cubic_synthesis(X, iy, N, K, face, sign, gain);
836
   }
837
   RESTORE_STACK;
838
   return (1<<B)-1;
839
}
840
841
unsigned cubic_unquant(celt_norm *X, int N, int res, int B, ec_dec *dec, opus_val32 gain) {
842
   int i;
843
   int face;
844
   int sign;
845
   int K;
846
   VARDECL(int, iy);
847
   SAVE_STACK;
848
   ALLOC(iy, N, int);
849
   K = 1<<res;
850
   /* Using odd K on transients to avoid adding pre-echo. */
851
   if (B!=1) K=IMAX(1, K-1);
852
   if (K==1) {
853
      OPUS_CLEAR(X, N);
854
      RESTORE_STACK;
855
      return 0;
856
   }
857
   face = ec_dec_uint(dec, N);
858
   sign = ec_dec_bits(dec, 1);
859
   for (i=0;i<N;i++) {
860
      if (i != face) iy[i] = ec_dec_bits(dec, res);
861
   }
862
   iy[face]=0;
863
   cubic_synthesis(X, iy, N, K, face, sign, gain);
864
   RESTORE_STACK;
865
   return (1<<B)-1;
866
}
867
#endif