Coverage Report

Created: 2025-11-11 07:28

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/opus/celt/vq.c
Line
Count
Source
1
/* Copyright (c) 2007-2008 CSIRO
2
   Copyright (c) 2007-2009 Xiph.Org Foundation
3
   Written by Jean-Marc Valin */
4
/*
5
   Redistribution and use in source and binary forms, with or without
6
   modification, are permitted provided that the following conditions
7
   are met:
8
9
   - Redistributions of source code must retain the above copyright
10
   notice, this list of conditions and the following disclaimer.
11
12
   - Redistributions in binary form must reproduce the above copyright
13
   notice, this list of conditions and the following disclaimer in the
14
   documentation and/or other materials provided with the distribution.
15
16
   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17
   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18
   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
19
   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
20
   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
21
   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
22
   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
23
   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
24
   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
25
   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
26
   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27
*/
28
29
#ifdef HAVE_CONFIG_H
30
#include "config.h"
31
#endif
32
33
#include "mathops.h"
34
#include "cwrs.h"
35
#include "vq.h"
36
#include "arch.h"
37
#include "os_support.h"
38
#include "bands.h"
39
#include "rate.h"
40
#include "pitch.h"
41
#include "SigProc_FIX.h"
42
43
#if defined(FIXED_POINT)
44
void norm_scaleup(celt_norm *X, int N, int shift) {
45
   int i;
46
   celt_assert(shift >= 0);
47
   if (shift <= 0) return;
48
   for (i=0;i<N;i++) X[i] = SHL32(X[i], shift);
49
}
50
51
void norm_scaledown(celt_norm *X, int N, int shift) {
52
   int i;
53
   celt_assert(shift >= 0);
54
   if (shift <= 0) return;
55
   for (i=0;i<N;i++) X[i] = PSHR32(X[i], shift);
56
}
57
58
opus_val32 celt_inner_prod_norm(const celt_norm *x, const celt_norm *y, int len, int arch) {
59
   int i;
60
   opus_val32 sum = 0;
61
   (void)arch;
62
   for (i=0;i<len;i++) sum += x[i]*y[i];
63
   return sum;
64
}
65
opus_val32 celt_inner_prod_norm_shift(const celt_norm *x, const celt_norm *y, int len, int arch) {
66
   int i;
67
   opus_val64 sum = 0;
68
   (void)arch;
69
   for (i=0;i<len;i++) sum += x[i]*(opus_val64)y[i];
70
   return sum>>2*(NORM_SHIFT-14);
71
}
72
#endif
73
74
#ifndef OVERRIDE_vq_exp_rotation1
75
static void exp_rotation1(celt_norm *X, int len, int stride, opus_val16 c, opus_val16 s)
76
24.2M
{
77
24.2M
   int i;
78
24.2M
   opus_val16 ms;
79
24.2M
   celt_norm *Xptr;
80
24.2M
   Xptr = X;
81
24.2M
   ms = NEG16(s);
82
24.2M
   norm_scaledown(X, len, NORM_SHIFT-14);
83
199M
   for (i=0;i<len-stride;i++)
84
175M
   {
85
175M
      celt_norm x1, x2;
86
175M
      x1 = Xptr[0];
87
175M
      x2 = Xptr[stride];
88
175M
      Xptr[stride] = EXTRACT16(PSHR32(MAC16_16(MULT16_16(c, x2),  s, x1), 15));
89
175M
      *Xptr++      = EXTRACT16(PSHR32(MAC16_16(MULT16_16(c, x1), ms, x2), 15));
90
175M
   }
91
24.2M
   Xptr = &X[len-2*stride-1];
92
163M
   for (i=len-2*stride-1;i>=0;i--)
93
139M
   {
94
139M
      celt_norm x1, x2;
95
139M
      x1 = Xptr[0];
96
139M
      x2 = Xptr[stride];
97
139M
      Xptr[stride] = EXTRACT16(PSHR32(MAC16_16(MULT16_16(c, x2),  s, x1), 15));
98
139M
      *Xptr--      = EXTRACT16(PSHR32(MAC16_16(MULT16_16(c, x1), ms, x2), 15));
99
139M
   }
100
24.2M
   norm_scaleup(X, len, NORM_SHIFT-14);
101
24.2M
}
102
#endif /* OVERRIDE_vq_exp_rotation1 */
103
104
void exp_rotation(celt_norm *X, int len, int dir, int stride, int K, int spread)
105
37.5M
{
106
37.5M
   static const int SPREAD_FACTOR[3]={15,10,5};
107
37.5M
   int i;
108
37.5M
   opus_val16 c, s;
109
37.5M
   opus_val16 gain, theta;
110
37.5M
   int stride2=0;
111
37.5M
   int factor;
112
113
37.5M
   if (2*K>=len || spread==SPREAD_NONE)
114
30.3M
      return;
115
7.21M
   factor = SPREAD_FACTOR[spread-1];
116
117
7.21M
   gain = celt_div((opus_val32)MULT16_16(Q15_ONE,len),(opus_val32)(len+factor*K));
118
7.21M
   theta = HALF16(MULT16_16_Q15(gain,gain));
119
120
7.21M
   c = celt_cos_norm(EXTEND32(theta));
121
7.21M
   s = celt_cos_norm(EXTEND32(SUB16(Q15ONE,theta))); /*  sin(theta) */
122
123
7.21M
   if (len>=8*stride)
124
4.45M
   {
125
4.45M
      stride2 = 1;
126
      /* This is just a simple (equivalent) way of computing sqrt(len/stride) with rounding.
127
         It's basically incrementing long as (stride2+0.5)^2 < len/stride. */
128
18.3M
      while ((stride2*stride2+stride2)*stride + (stride>>2) < len)
129
13.8M
         stride2++;
130
4.45M
   }
131
   /*NOTE: As a minor optimization, we could be passing around log2(B), not B, for both this and for
132
      extract_collapse_mask().*/
133
7.21M
   len = celt_udiv(len, stride);
134
26.1M
   for (i=0;i<stride;i++)
135
18.9M
   {
136
18.9M
      if (dir < 0)
137
2.82M
      {
138
2.82M
         if (stride2)
139
607k
            exp_rotation1(X+i*len, len, stride2, s, c);
140
2.82M
         exp_rotation1(X+i*len, len, 1, c, s);
141
16.1M
      } else {
142
16.1M
         exp_rotation1(X+i*len, len, 1, c, -s);
143
16.1M
         if (stride2)
144
4.65M
            exp_rotation1(X+i*len, len, stride2, s, -c);
145
16.1M
      }
146
18.9M
   }
147
7.21M
}
148
149
/** Normalizes the decoded integer pvq codeword to unit norm. */
150
static void normalise_residual(int * OPUS_RESTRICT iy, celt_norm * OPUS_RESTRICT X,
151
      int N, opus_val32 Ryy, opus_val32 gain, int shift)
152
5.60M
{
153
5.60M
   int i;
154
#ifdef FIXED_POINT
155
   int k;
156
#endif
157
5.60M
   opus_val32 t;
158
5.60M
   opus_val32 g;
159
160
#ifdef FIXED_POINT
161
   k = celt_ilog2(Ryy)>>1;
162
#endif
163
5.60M
   t = VSHR32(Ryy, 2*(k-7)-15);
164
5.60M
   g = MULT32_32_Q31(celt_rsqrt_norm32(t),gain);
165
5.60M
   i=0;
166
5.60M
   (void)shift;
167
#if defined(FIXED_POINT) && defined(ENABLE_QEXT)
168
   if (shift>0) {
169
      int tot_shift = NORM_SHIFT+1-k-shift;
170
      if (tot_shift >= 0) {
171
         do X[i] = MULT32_32_Q31(g, SHL32(iy[i], tot_shift));
172
         while (++i < N);
173
      } else {
174
         do X[i] = MULT32_32_Q31(g, PSHR32(iy[i], -tot_shift));
175
         while (++i < N);
176
      }
177
   } else
178
#endif
179
37.1M
   do X[i] = VSHR32(MULT16_32_Q15(iy[i], g), k+15-NORM_SHIFT);
180
37.1M
   while (++i < N);
181
5.60M
}
182
183
static unsigned extract_collapse_mask(int *iy, int N, int B)
184
31.9M
{
185
31.9M
   unsigned collapse_mask;
186
31.9M
   int N0;
187
31.9M
   int i;
188
31.9M
   if (B<=1)
189
24.0M
      return 1;
190
   /*NOTE: As a minor optimization, we could be passing around log2(B), not B, for both this and for
191
      exp_rotation().*/
192
7.88M
   N0 = celt_udiv(N, B);
193
7.88M
   collapse_mask = 0;
194
35.5M
   i=0; do {
195
35.5M
      int j;
196
35.5M
      unsigned tmp=0;
197
76.9M
      j=0; do {
198
76.9M
         tmp |= iy[i*N0+j];
199
76.9M
      } while (++j<N0);
200
35.5M
      collapse_mask |= (tmp!=0)<<i;
201
35.5M
   } while (++i<B);
202
7.88M
   return collapse_mask;
203
31.9M
}
204
205
opus_val16 op_pvq_search_c(celt_norm *X, int *iy, int K, int N, int arch)
206
0
{
207
0
   VARDECL(celt_norm, y);
208
0
   VARDECL(int, signx);
209
0
   int i, j;
210
0
   int pulsesLeft;
211
0
   opus_val32 sum;
212
0
   opus_val32 xy;
213
0
   opus_val16 yy;
214
0
   SAVE_STACK;
215
216
0
   (void)arch;
217
0
   ALLOC(y, N, celt_norm);
218
0
   ALLOC(signx, N, int);
219
#ifdef FIXED_POINT
220
   {
221
      int shift = (celt_ilog2(1+celt_inner_prod_norm_shift(X, X, N, arch))+1)/2;
222
      shift = IMAX(0, shift+(NORM_SHIFT-14)-14);
223
      norm_scaledown(X, N, shift);
224
   }
225
#endif
226
   /* Get rid of the sign */
227
0
   sum = 0;
228
0
   j=0; do {
229
0
      signx[j] = X[j]<0;
230
      /* OPT: Make sure the compiler doesn't use a branch on ABS16(). */
231
0
      X[j] = ABS16(X[j]);
232
0
      iy[j] = 0;
233
0
      y[j] = 0;
234
0
   } while (++j<N);
235
236
0
   xy = yy = 0;
237
238
0
   pulsesLeft = K;
239
240
   /* Do a pre-search by projecting on the pyramid */
241
0
   if (K > (N>>1))
242
0
   {
243
0
      opus_val16 rcp;
244
0
      j=0; do {
245
0
         sum += X[j];
246
0
      }  while (++j<N);
247
248
      /* If X is too small, just replace it with a pulse at 0 */
249
#ifdef FIXED_POINT
250
      if (sum <= K)
251
#else
252
      /* Prevents infinities and NaNs from causing too many pulses
253
         to be allocated. 64 is an approximation of infinity here. */
254
0
      if (!(sum > EPSILON && sum < 64))
255
0
#endif
256
0
      {
257
0
         X[0] = QCONST16(1.f,14);
258
0
         j=1; do
259
0
            X[j]=0;
260
0
         while (++j<N);
261
0
         sum = QCONST16(1.f,14);
262
0
      }
263
#ifdef FIXED_POINT
264
      rcp = EXTRACT16(MULT16_32_Q16(K, celt_rcp(sum)));
265
#else
266
      /* Using K+e with e < 1 guarantees we cannot get more than K pulses. */
267
0
      rcp = EXTRACT16(MULT16_32_Q16(K+0.8f, celt_rcp(sum)));
268
0
#endif
269
0
      j=0; do {
270
#ifdef FIXED_POINT
271
         /* It's really important to round *towards zero* here */
272
         iy[j] = MULT16_16_Q15(X[j],rcp);
273
#else
274
0
         iy[j] = (int)floor(rcp*X[j]);
275
0
#endif
276
0
         y[j] = (celt_norm)iy[j];
277
0
         yy = MAC16_16(yy, y[j],y[j]);
278
0
         xy = MAC16_16(xy, X[j],y[j]);
279
0
         y[j] *= 2;
280
0
         pulsesLeft -= iy[j];
281
0
      }  while (++j<N);
282
0
   }
283
0
   celt_sig_assert(pulsesLeft>=0);
284
285
   /* This should never happen, but just in case it does (e.g. on silence)
286
      we fill the first bin with pulses. */
287
#ifdef FIXED_POINT_DEBUG
288
   celt_sig_assert(pulsesLeft<=N+3);
289
#endif
290
0
   if (pulsesLeft > N+3)
291
0
   {
292
0
      opus_val16 tmp = (opus_val16)pulsesLeft;
293
0
      yy = MAC16_16(yy, tmp, tmp);
294
0
      yy = MAC16_16(yy, tmp, y[0]);
295
0
      iy[0] += pulsesLeft;
296
0
      pulsesLeft=0;
297
0
   }
298
299
0
   for (i=0;i<pulsesLeft;i++)
300
0
   {
301
0
      opus_val16 Rxy, Ryy;
302
0
      int best_id;
303
0
      opus_val32 best_num;
304
0
      opus_val16 best_den;
305
#ifdef FIXED_POINT
306
      int rshift;
307
#endif
308
#ifdef FIXED_POINT
309
      rshift = 1+celt_ilog2(K-pulsesLeft+i+1);
310
#endif
311
0
      best_id = 0;
312
      /* The squared magnitude term gets added anyway, so we might as well
313
         add it outside the loop */
314
0
      yy = ADD16(yy, 1);
315
316
      /* Calculations for position 0 are out of the loop, in part to reduce
317
         mispredicted branches (since the if condition is usually false)
318
         in the loop. */
319
      /* Temporary sums of the new pulse(s) */
320
0
      Rxy = EXTRACT16(SHR32(ADD32(xy, EXTEND32(X[0])),rshift));
321
      /* We're multiplying y[j] by two so we don't have to do it here */
322
0
      Ryy = ADD16(yy, y[0]);
323
324
      /* Approximate score: we maximise Rxy/sqrt(Ryy) (we're guaranteed that
325
         Rxy is positive because the sign is pre-computed) */
326
0
      Rxy = MULT16_16_Q15(Rxy,Rxy);
327
0
      best_den = Ryy;
328
0
      best_num = Rxy;
329
0
      j=1;
330
0
      do {
331
         /* Temporary sums of the new pulse(s) */
332
0
         Rxy = EXTRACT16(SHR32(ADD32(xy, EXTEND32(X[j])),rshift));
333
         /* We're multiplying y[j] by two so we don't have to do it here */
334
0
         Ryy = ADD16(yy, y[j]);
335
336
         /* Approximate score: we maximise Rxy/sqrt(Ryy) (we're guaranteed that
337
            Rxy is positive because the sign is pre-computed) */
338
0
         Rxy = MULT16_16_Q15(Rxy,Rxy);
339
         /* The idea is to check for num/den >= best_num/best_den, but that way
340
            we can do it without any division */
341
         /* OPT: It's not clear whether a cmov is faster than a branch here
342
            since the condition is more often false than true and using
343
            a cmov introduces data dependencies across iterations. The optimal
344
            choice may be architecture-dependent. */
345
0
         if (opus_unlikely(MULT16_16(best_den, Rxy) > MULT16_16(Ryy, best_num)))
346
0
         {
347
0
            best_den = Ryy;
348
0
            best_num = Rxy;
349
0
            best_id = j;
350
0
         }
351
0
      } while (++j<N);
352
353
      /* Updating the sums of the new pulse(s) */
354
0
      xy = ADD32(xy, EXTEND32(X[best_id]));
355
      /* We're multiplying y[j] by two so we don't have to do it here */
356
0
      yy = ADD16(yy, y[best_id]);
357
358
      /* Only now that we've made the final choice, update y/iy */
359
      /* Multiplying y[j] by 2 so we don't have to do it everywhere else */
360
0
      y[best_id] += 2;
361
0
      iy[best_id]++;
362
0
   }
363
364
   /* Put the original sign back */
365
0
   j=0;
366
0
   do {
367
      /*iy[j] = signx[j] ? -iy[j] : iy[j];*/
368
      /* OPT: The is more likely to be compiled without a branch than the code above
369
         but has the same performance otherwise. */
370
0
      iy[j] = (iy[j]^-signx[j]) + signx[j];
371
0
   } while (++j<N);
372
0
   RESTORE_STACK;
373
0
   return yy;
374
0
}
375
376
#ifdef ENABLE_QEXT
377
#include "macros.h"
378
379
static opus_val32 op_pvq_search_N2(const celt_norm *X, int *iy, int *up_iy, int K, int up, int *refine, int shift) {
380
   opus_val32 sum;
381
   opus_val32 rcp_sum;
382
   int offset;
383
   sum = ABS32(X[0]) + ABS32(X[1]);
384
   if (sum < EPSILON) {
385
      iy[0] = K;
386
      up_iy[0] = up*K;
387
      iy[1]=up_iy[1]=0;
388
      *refine=0;
389
#ifdef FIXED_POINT
390
      return (opus_val64)K*K*up*up>>2*shift;
391
#else
392
      (void)shift;
393
      return K*(float)K*up*up;
394
#endif
395
   }
396
#ifdef FIXED_POINT
397
   int sum_shift;
398
   opus_val32 X0;
399
   sum_shift = 30-celt_ilog2(sum);
400
   rcp_sum = celt_rcp_norm32(SHL32(sum, sum_shift));
401
   X0 = MULT32_32_Q31(SHL32(X[0], sum_shift), rcp_sum);
402
   iy[0] = PSHR32(MULT32_32_Q31(SHL32(K, 8), X0), 7);
403
   up_iy[0] = PSHR32(MULT32_32_Q31(SHL32(up*K, 8), X0), 7);
404
#else
405
   rcp_sum = 1.f/sum;
406
   iy[0] = (int)floor(.5f+K*X[0]*rcp_sum);
407
   up_iy[0] = (int)floor(.5f+up*K*X[0]*rcp_sum);
408
#endif
409
   up_iy[0] = IMAX(up*iy[0] - (up-1)/2, IMIN(up*iy[0] + (up-1)/2, up_iy[0]));
410
   offset = up_iy[0] - up*iy[0];
411
   iy[1] = K-abs(iy[0]);
412
   up_iy[1] = up*K-abs(up_iy[0]);
413
   if (X[1] < 0) {
414
      iy[1] = -iy[1];
415
      up_iy[1] = -up_iy[1];
416
      offset = -offset;
417
   }
418
   *refine = offset;
419
#ifdef FIXED_POINT
420
   return (up_iy[0]*(opus_val64)up_iy[0] + up_iy[1]*(opus_val64)up_iy[1] + (1<<2*shift>>1))>>2*shift;
421
#else
422
   return up_iy[0]*(opus_val64)up_iy[0] + up_iy[1]*(opus_val64)up_iy[1];
423
#endif
424
}
425
426
static int op_pvq_refine(const opus_val32 *Xn, int *iy, int *iy0, int K, int up, int margin, int N) {
427
   int i;
428
   int dir;
429
   VARDECL(opus_val32, rounding);
430
   int iysum = 0;
431
   SAVE_STACK;
432
   ALLOC(rounding, N, opus_val32);
433
   for (i=0;i<N;i++) {
434
      opus_val32 tmp;
435
      tmp = MULT32_32_Q31(SHL32(K, 8), Xn[i]);
436
#ifdef FIXED_POINT
437
      iy[i] = (tmp+64) >> 7;
438
#else
439
      iy[i] = (int)floor(.5+tmp);
440
#endif
441
      rounding[i] = tmp - SHL32(iy[i], 7);
442
   }
443
   if (iy != iy0) {
444
      for (i=0;i<N;i++) iy[i] = IMIN(up*iy0[i]+up-1, IMAX(up*iy0[i]-up+1, iy[i]));
445
   }
446
   for (i=0;i<N;i++) iysum += iy[i];
447
   if (abs(iysum - K) > 32) {
448
      return 1;
449
   }
450
   dir = iysum < K ? 1 : -1;
451
   while (iysum != K) {
452
      opus_val32 roundval=-1000000*dir;
453
      int roundpos=0;
454
      for (i=0;i<N;i++) {
455
         if ((rounding[i]-roundval)*dir > 0 && abs(iy[i]-up*iy0[i]) < (margin-1) && !(dir==-1 && iy[i] == 0)) {
456
            roundval = rounding[i];
457
            roundpos = i;
458
         }
459
      }
460
      iy[roundpos] += dir;
461
      rounding[roundpos] -= SHL32(dir, 15);
462
      iysum+=dir;
463
   }
464
   return 0;
465
}
466
467
static opus_val32 op_pvq_search_extra(const celt_norm *X, int *iy, int *up_iy, int K, int up, int *refine, int N, int shift) {
468
   opus_val32 rcp_sum;
469
   opus_val32 sum=0;
470
   int i;
471
   int failed=0;
472
   opus_val64 yy=0;
473
   VARDECL(opus_val32, Xn);
474
   SAVE_STACK;
475
   for (i=0;i<N;i++) sum += ABS32(X[i]);
476
   ALLOC(Xn, N, opus_val32);
477
   if (sum < EPSILON)
478
      failed = 1;
479
   else {
480
#ifdef FIXED_POINT
481
      int sum_shift = 30-celt_ilog2(sum);
482
      rcp_sum = celt_rcp_norm32(SHL32(sum, sum_shift));
483
      for (i=0;i<N;i++) {
484
         Xn[i] = MULT32_32_Q31(SHL32(ABS32(X[i]), sum_shift), rcp_sum);
485
      }
486
#else
487
      rcp_sum = celt_rcp(sum);
488
      for (i=0;i<N;i++) {
489
         Xn[i] = ABS32(X[i])*rcp_sum;
490
      }
491
#endif
492
   }
493
   failed = failed || op_pvq_refine(Xn, iy, iy, K, 1, K+1, N);
494
   failed = failed || op_pvq_refine(Xn, up_iy, iy, up*K, up, up, N);
495
   if (failed) {
496
      iy[0] = K;
497
      for (i=1;i<N;i++) iy[i] = 0;
498
      up_iy[0] = up*K;
499
      for (i=1;i<N;i++) up_iy[i] = 0;
500
   }
501
   for (i=0;i<N;i++) {
502
      yy += up_iy[i]*(opus_val64)up_iy[i];
503
      if (X[i] < 0) {
504
         iy[i] = -iy[i];
505
         up_iy[i] = -up_iy[i];
506
      }
507
      refine[i] = up_iy[i]-up*iy[i];
508
   }
509
#ifdef FIXED_POINT
510
   return (yy + (1<<2*shift>>1))>>2*shift;
511
#else
512
   (void)shift;
513
   return yy;
514
#endif
515
}
516
#endif
517
518
#ifdef ENABLE_QEXT
519
/* Take advantage of the fact that "large" refine values are much less likely
520
   than smaller ones. */
521
static void ec_enc_refine(ec_enc *enc, opus_int32 refine, opus_int32 up, int extra_bits, int use_entropy) {
522
   int large;
523
   large = abs(refine)>up/2;
524
   ec_enc_bit_logp(enc, large, use_entropy ? 3 : 1);
525
   if (large) {
526
      ec_enc_bits(enc, refine < 0, 1);
527
      ec_enc_bits(enc, abs(refine)-up/2-1, extra_bits-1);
528
   } else {
529
      ec_enc_bits(enc, refine+up/2, extra_bits);
530
   }
531
}
532
533
static int ec_dec_refine(ec_enc *dec, opus_int32 up, int extra_bits, int use_entropy) {
534
   int large, refine;
535
   large = ec_dec_bit_logp(dec, use_entropy ? 3 : 1);
536
   if (large) {
537
      int sign = ec_dec_bits(dec, 1);
538
      refine = ec_dec_bits(dec, extra_bits-1) + up/2+1;
539
      if (sign) refine = -refine;
540
   } else {
541
      refine = (opus_int32)ec_dec_bits(dec, extra_bits)-up/2;
542
   }
543
   return refine;
544
}
545
#endif
546
547
unsigned alg_quant(celt_norm *X, int N, int K, int spread, int B, ec_enc *enc,
548
      opus_val32 gain, int resynth
549
      ARG_QEXT(ec_enc *ext_enc) ARG_QEXT(int extra_bits), int arch)
550
31.9M
{
551
31.9M
   VARDECL(int, iy);
552
31.9M
   opus_val32 yy;
553
31.9M
   unsigned collapse_mask;
554
#ifdef ENABLE_QEXT
555
   int yy_shift = 0;
556
#endif
557
31.9M
   SAVE_STACK;
558
559
31.9M
   celt_assert2(K>0, "alg_quant() needs at least one pulse");
560
31.9M
   celt_assert2(N>1, "alg_quant() needs at least two dimensions");
561
562
   /* Covers vectorization by up to 4. */
563
31.9M
   ALLOC(iy, N+3, int);
564
565
31.9M
   exp_rotation(X, N, 1, B, K, spread);
566
567
#ifdef ENABLE_QEXT
568
   if (N==2 && extra_bits >= 2) {
569
      int refine;
570
      int up_iy[2];
571
      int up;
572
      yy_shift = IMAX(0, extra_bits-7);
573
      up = (1<<extra_bits)-1;
574
      yy = op_pvq_search_N2(X, iy, up_iy, K, up, &refine, yy_shift);
575
      collapse_mask = extract_collapse_mask(up_iy, N, B);
576
      encode_pulses(iy, N, K, enc);
577
      ec_enc_uint(ext_enc, refine+(up-1)/2, up);
578
      if (resynth)
579
         normalise_residual(up_iy, X, N, yy, gain, yy_shift);
580
   } else if (extra_bits >= 2) {
581
      int i;
582
      VARDECL(int, up_iy);
583
      VARDECL(int, refine);
584
      int up, use_entropy;
585
      ALLOC(up_iy, N, int);
586
      ALLOC(refine, N, int);
587
      yy_shift = IMAX(0, extra_bits-7);
588
      up = (1<<extra_bits)-1;
589
      yy = op_pvq_search_extra(X, iy, up_iy, K, up, refine, N, yy_shift);
590
      collapse_mask = extract_collapse_mask(up_iy, N, B);
591
      encode_pulses(iy, N, K, enc);
592
      use_entropy = (ext_enc->storage*8 - ec_tell(ext_enc)) > (unsigned)(N-1)*(extra_bits+3)+1;
593
      for (i=0;i<N-1;i++) ec_enc_refine(ext_enc, refine[i], up, extra_bits, use_entropy);
594
      if (iy[N-1]==0) ec_enc_bits(ext_enc, up_iy[N-1]<0, 1);
595
      if (resynth)
596
         normalise_residual(up_iy, X, N, yy, gain, yy_shift);
597
   } else
598
#endif
599
31.9M
   {
600
31.9M
      yy = op_pvq_search(X, iy, K, N, arch);
601
31.9M
      collapse_mask = extract_collapse_mask(iy, N, B);
602
31.9M
      encode_pulses(iy, N, K, enc);
603
31.9M
      if (resynth)
604
5.60M
         normalise_residual(iy, X, N, yy, gain, 0);
605
31.9M
   }
606
607
31.9M
   if (resynth)
608
5.60M
      exp_rotation(X, N, -1, B, K, spread);
609
610
31.9M
   RESTORE_STACK;
611
31.9M
   return collapse_mask;
612
31.9M
}
613
614
/** Decode pulse vector and combine the result with the pitch vector to produce
615
    the final normalised signal in the current band. */
616
unsigned alg_unquant(celt_norm *X, int N, int K, int spread, int B,
617
      ec_dec *dec, opus_val32 gain
618
      ARG_QEXT(ec_enc *ext_dec) ARG_QEXT(int extra_bits))
619
0
{
620
0
   opus_val32 Ryy;
621
0
   unsigned collapse_mask;
622
0
   VARDECL(int, iy);
623
0
   int yy_shift=0;
624
0
   SAVE_STACK;
625
626
0
   celt_assert2(K>0, "alg_unquant() needs at least one pulse");
627
0
   celt_assert2(N>1, "alg_unquant() needs at least two dimensions");
628
0
   ALLOC(iy, N, int);
629
0
   Ryy = decode_pulses(iy, N, K, dec);
630
#ifdef ENABLE_QEXT
631
   if (N==2 && extra_bits >= 2) {
632
      int up;
633
      int refine;
634
      yy_shift = IMAX(0, extra_bits-7);
635
      up = (1<<extra_bits)-1;
636
      refine = (opus_int32)ec_dec_uint(ext_dec, up) - (up-1)/2;
637
      iy[0] *= up;
638
      iy[1] *= up;
639
      if (iy[1] == 0) {
640
         iy[1] = (iy[0] > 0) ? -refine : refine;
641
         iy[0] += (refine*(opus_int64)iy[0] > 0) ? -refine : refine;
642
      } else if (iy[1] > 0) {
643
         iy[0] += refine;
644
         iy[1] -= refine*(iy[0]>0?1:-1);
645
      } else {
646
         iy[0] -= refine;
647
         iy[1] -= refine*(iy[0]>0?1:-1);
648
      }
649
#ifdef FIXED_POINT
650
      Ryy = (iy[0]*(opus_val64)iy[0] + iy[1]*(opus_val64)iy[1] + (1<<2*yy_shift>>1)) >> 2*yy_shift;
651
#else
652
      Ryy = iy[0]*(opus_val64)iy[0] + iy[1]*(opus_val64)iy[1];
653
#endif
654
   } else if (extra_bits >= 2) {
655
      int i;
656
      opus_val64 yy64;
657
      VARDECL(int, refine);
658
      int up, use_entropy;
659
      int sign=0;
660
      ALLOC(refine, N, int);
661
      yy_shift = IMAX(0, extra_bits-7);
662
      up = (1<<extra_bits)-1;
663
      use_entropy = (ext_dec->storage*8 - ec_tell(ext_dec)) > (unsigned)(N-1)*(extra_bits+3)+1;
664
      for (i=0;i<N-1;i++) refine[i] = ec_dec_refine(ext_dec, up, extra_bits, use_entropy);
665
      if (iy[N-1]==0) sign = ec_dec_bits(ext_dec, 1);
666
      else sign = iy[N-1] < 0;
667
      for (i=0;i<N-1;i++) {
668
         iy[i] = iy[i]*up + refine[i];
669
      }
670
      iy[N-1] = up*K;
671
      for (i=0;i<N-1;i++) iy[N-1] -= abs(iy[i]);
672
      if (sign) iy[N-1] = -iy[N-1];
673
      yy64 = 0;
674
      for (i=0;i<N;i++) yy64 += iy[i]*(opus_val64)iy[i];
675
#ifdef FIXED_POINT
676
      Ryy = (yy64 + (1<<2*yy_shift>>1)) >> 2*yy_shift;
677
#else
678
      Ryy = yy64;
679
#endif
680
   }
681
#endif
682
0
   normalise_residual(iy, X, N, Ryy, gain, yy_shift);
683
0
   exp_rotation(X, N, -1, B, K, spread);
684
0
   collapse_mask = extract_collapse_mask(iy, N, B);
685
0
   RESTORE_STACK;
686
0
   return collapse_mask;
687
0
}
688
689
#ifndef OVERRIDE_renormalise_vector
690
void renormalise_vector(celt_norm *X, int N, opus_val32 gain, int arch)
691
85.7M
{
692
85.7M
   int i;
693
#ifdef FIXED_POINT
694
   int k;
695
#endif
696
85.7M
   opus_val32 E;
697
85.7M
   opus_val16 g;
698
85.7M
   opus_val32 t;
699
85.7M
   celt_norm *xptr;
700
85.7M
   norm_scaledown(X, N, NORM_SHIFT-14);
701
85.7M
   E = EPSILON + celt_inner_prod_norm(X, X, N, arch);
702
#ifdef FIXED_POINT
703
   k = celt_ilog2(E)>>1;
704
#endif
705
85.7M
   t = VSHR32(E, 2*(k-7));
706
85.7M
   g = MULT32_32_Q31(celt_rsqrt_norm(t),gain);
707
708
85.7M
   xptr = X;
709
928M
   for (i=0;i<N;i++)
710
842M
   {
711
842M
      *xptr = EXTRACT16(PSHR32(MULT16_16(g, *xptr), k+15-14));
712
842M
      xptr++;
713
842M
   }
714
85.7M
   norm_scaleup(X, N, NORM_SHIFT-14);
715
   /*return celt_sqrt(E);*/
716
85.7M
}
717
#endif /* OVERRIDE_renormalise_vector */
718
719
opus_int32 stereo_itheta(const celt_norm *X, const celt_norm *Y, int stereo, int N, int arch)
720
208M
{
721
208M
   int i;
722
208M
   int itheta;
723
208M
   opus_val32 mid, side;
724
208M
   opus_val32 Emid, Eside;
725
726
208M
   Emid = Eside = 0;
727
208M
   if (stereo)
728
187M
   {
729
1.88G
      for (i=0;i<N;i++)
730
1.69G
      {
731
1.69G
         celt_norm m, s;
732
1.69G
         m = PSHR32(ADD32(X[i], Y[i]), NORM_SHIFT-13);
733
1.69G
         s = PSHR32(SUB32(X[i], Y[i]), NORM_SHIFT-13);
734
1.69G
         Emid = MAC16_16(Emid, m, m);
735
1.69G
         Eside = MAC16_16(Eside, s, s);
736
1.69G
      }
737
187M
   } else {
738
20.8M
      Emid += celt_inner_prod_norm_shift(X, X, N, arch);
739
20.8M
      Eside += celt_inner_prod_norm_shift(Y, Y, N, arch);
740
20.8M
   }
741
208M
   mid = celt_sqrt32(Emid);
742
208M
   side = celt_sqrt32(Eside);
743
#if defined(FIXED_POINT)
744
   itheta = celt_atan2p_norm(side, mid);
745
#else
746
208M
   itheta = (int)floor(.5f+65536.f*16384*celt_atan2p_norm(side,mid));
747
208M
#endif
748
749
208M
   return itheta;
750
208M
}
751
752
#ifdef ENABLE_QEXT
753
754
static void cubic_synthesis(celt_norm *X, int *iy, int N, int K, int face, int sign, opus_val32 gain) {
755
   int i;
756
   opus_val32 sum=0;
757
   opus_val32 mag;
758
#ifdef FIXED_POINT
759
   int sum_shift;
760
   int shift = IMAX(celt_ilog2(K) + celt_ilog2(N)/2 - 13, 0);
761
#endif
762
   for (i=0;i<N;i++) {
763
      X[i] = (1+2*iy[i])-K;
764
   }
765
   X[face] = sign ? -K : K;
766
   for (i=0;i<N;i++) {
767
      sum += PSHR32(MULT16_16(X[i],X[i]), 2*shift);
768
   }
769
#ifdef FIXED_POINT
770
   sum_shift = (29-celt_ilog2(sum))>>1;
771
   mag = celt_rsqrt_norm32(SHL32(sum, 2*sum_shift+1));
772
   for (i=0;i<N;i++) {
773
      X[i] = VSHR32(MULT16_32_Q15(X[i],MULT32_32_Q31(mag,gain)), shift-sum_shift+29-NORM_SHIFT);
774
   }
775
#else
776
   mag = 1.f/sqrt(sum);
777
   for (i=0;i<N;i++) {
778
      X[i] *= mag*gain;
779
   }
780
#endif
781
}
782
783
unsigned cubic_quant(celt_norm *X, int N, int res, int B, ec_enc *enc, opus_val32 gain, int resynth) {
784
   int i;
785
   int face=0;
786
   int K;
787
   VARDECL(int, iy);
788
   celt_norm faceval=-1;
789
   opus_val32 norm;
790
   int sign;
791
   SAVE_STACK;
792
   ALLOC(iy, N, int);
793
   K = 1<<res;
794
   /* Using odd K on transients to avoid adding pre-echo. */
795
   if (B!=1) K=IMAX(1, K-1);
796
   if (K==1) {
797
      if (resynth) OPUS_CLEAR(X, N);
798
      RESTORE_STACK;
799
      return 0;
800
   }
801
   for (i=0;i<N;i++) {
802
      if (ABS32(X[i]) > faceval) {
803
         faceval = ABS32(X[i]);
804
         face = i;
805
      }
806
   }
807
   sign = X[face]<0;
808
   ec_enc_uint(enc, face, N);
809
   ec_enc_bits(enc, sign, 1);
810
#ifdef FIXED_POINT
811
   if (faceval != 0) {
812
      int face_shift = 30-celt_ilog2(faceval);
813
      norm = celt_rcp_norm32(SHL32(faceval, face_shift));
814
      norm = MULT16_32_Q15(K, norm);
815
      for (i=0;i<N;i++) {
816
         /* By computing X[i]+faceval inside the shift, the result is guaranteed non-negative. */
817
         iy[i] = IMIN(K-1, (MULT32_32_Q31(SHL32(X[i]+faceval, face_shift-1), norm)) >> 15);
818
      }
819
   } else {
820
      OPUS_CLEAR(iy, N);
821
   }
822
#else
823
   norm = .5f*K/(faceval+EPSILON);
824
   for (i=0;i<N;i++) {
825
      iy[i] = IMIN(K-1, (int)floor((X[i]+faceval)*norm));
826
   }
827
#endif
828
   for (i=0;i<N;i++) {
829
      if (i != face) ec_enc_bits(enc, iy[i], res);
830
   }
831
   if (resynth) {
832
      cubic_synthesis(X, iy, N, K, face, sign, gain);
833
   }
834
   RESTORE_STACK;
835
   return (1<<B)-1;
836
}
837
838
unsigned cubic_unquant(celt_norm *X, int N, int res, int B, ec_dec *dec, opus_val32 gain) {
839
   int i;
840
   int face;
841
   int sign;
842
   int K;
843
   VARDECL(int, iy);
844
   SAVE_STACK;
845
   ALLOC(iy, N, int);
846
   K = 1<<res;
847
   /* Using odd K on transients to avoid adding pre-echo. */
848
   if (B!=1) K=IMAX(1, K-1);
849
   if (K==1) {
850
      OPUS_CLEAR(X, N);
851
      return 0;
852
   }
853
   face = ec_dec_uint(dec, N);
854
   sign = ec_dec_bits(dec, 1);
855
   for (i=0;i<N;i++) {
856
      if (i != face) iy[i] = ec_dec_bits(dec, res);
857
   }
858
   iy[face]=0;
859
   cubic_synthesis(X, iy, N, K, face, sign, gain);
860
   RESTORE_STACK;
861
   return (1<<B)-1;
862
}
863
#endif