Coverage Report

Created: 2025-08-29 07:42

/src/opus/celt/bands.c
Line
Count
Source (jump to first uncovered line)
1
/* Copyright (c) 2007-2008 CSIRO
2
   Copyright (c) 2007-2009 Xiph.Org Foundation
3
   Copyright (c) 2008-2009 Gregory Maxwell
4
   Written by Jean-Marc Valin and Gregory Maxwell */
5
/*
6
   Redistribution and use in source and binary forms, with or without
7
   modification, are permitted provided that the following conditions
8
   are met:
9
10
   - Redistributions of source code must retain the above copyright
11
   notice, this list of conditions and the following disclaimer.
12
13
   - Redistributions in binary form must reproduce the above copyright
14
   notice, this list of conditions and the following disclaimer in the
15
   documentation and/or other materials provided with the distribution.
16
17
   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
18
   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
19
   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
20
   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
21
   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
22
   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
23
   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
24
   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
25
   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
26
   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
27
   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28
*/
29
30
#ifdef HAVE_CONFIG_H
31
#include "config.h"
32
#endif
33
34
#include <math.h>
35
#include "bands.h"
36
#include "modes.h"
37
#include "vq.h"
38
#include "cwrs.h"
39
#include "stack_alloc.h"
40
#include "os_support.h"
41
#include "mathops.h"
42
#include "rate.h"
43
#include "quant_bands.h"
44
#include "pitch.h"
45
46
int hysteresis_decision(opus_val16 val, const opus_val16 *thresholds, const opus_val16 *hysteresis, int N, int prev)
47
16.7M
{
48
16.7M
   int i;
49
250M
   for (i=0;i<N;i++)
50
250M
   {
51
250M
      if (val < thresholds[i])
52
16.7M
         break;
53
250M
   }
54
16.7M
   if (i>prev && val < thresholds[prev]+hysteresis[prev])
55
27.8k
      i=prev;
56
16.7M
   if (i<prev && val > thresholds[prev-1]-hysteresis[prev-1])
57
10.2k
      i=prev;
58
16.7M
   return i;
59
16.7M
}
60
61
opus_uint32 celt_lcg_rand(opus_uint32 seed)
62
857M
{
63
857M
   return 1664525 * seed + 1013904223;
64
857M
}
65
66
/* This is a cos() approximation designed to be bit-exact on any platform. Bit exactness
67
   with this approximation is important because it has an impact on the bit allocation */
68
opus_int16 bitexact_cos(opus_int16 x)
69
18.0M
{
70
18.0M
   opus_int32 tmp;
71
18.0M
   opus_int16 x2;
72
18.0M
   tmp = (4096+((opus_int32)(x)*(x)))>>13;
73
18.0M
   celt_sig_assert(tmp<=32767);
74
18.0M
   x2 = tmp;
75
18.0M
   x2 = (32767-x2) + FRAC_MUL16(x2, (-7651 + FRAC_MUL16(x2, (8277 + FRAC_MUL16(-626, x2)))));
76
18.0M
   celt_sig_assert(x2<=32766);
77
18.0M
   return 1+x2;
78
18.0M
}
79
80
int bitexact_log2tan(int isin,int icos)
81
9.03M
{
82
9.03M
   int lc;
83
9.03M
   int ls;
84
9.03M
   lc=EC_ILOG(icos);
85
9.03M
   ls=EC_ILOG(isin);
86
9.03M
   icos<<=15-lc;
87
9.03M
   isin<<=15-ls;
88
9.03M
   return (ls-lc)*(1<<11)
89
9.03M
         +FRAC_MUL16(isin, FRAC_MUL16(isin, -2597) + 7932)
90
9.03M
         -FRAC_MUL16(icos, FRAC_MUL16(icos, -2597) + 7932);
91
9.03M
}
92
93
#ifdef FIXED_POINT
94
/* Compute the amplitude (sqrt energy) in each of the bands */
95
void compute_band_energies(const CELTMode *m, const celt_sig *X, celt_ener *bandE, int end, int C, int LM, int arch)
96
{
97
   int i, c, N;
98
   const opus_int16 *eBands = m->eBands;
99
   (void)arch;
100
   N = m->shortMdctSize<<LM;
101
   c=0; do {
102
      for (i=0;i<end;i++)
103
      {
104
         int j;
105
         opus_val32 maxval=0;
106
         opus_val32 sum = 0;
107
108
         maxval = celt_maxabs32(&X[c*N+(eBands[i]<<LM)], (eBands[i+1]-eBands[i])<<LM);
109
         if (maxval > 0)
110
         {
111
            int shift = IMAX(0, 30 - celt_ilog2(maxval+(maxval>>14)+1) - ((((m->logN[i]+7)>>BITRES)+LM+1)>>1));
112
            j=eBands[i]<<LM; do {
113
               opus_val32 x = SHL32(X[j+c*N],shift);
114
               sum = ADD32(sum, MULT32_32_Q31(x, x));
115
            } while (++j<eBands[i+1]<<LM);
116
            bandE[i+c*m->nbEBands] = MAX32(maxval, PSHR32(celt_sqrt32(SHR32(sum,1)), shift));
117
         } else {
118
            bandE[i+c*m->nbEBands] = EPSILON;
119
         }
120
      }
121
   } while (++c<C);
122
}
123
124
/* Normalise each band such that the energy is one. */
125
void normalise_bands(const CELTMode *m, const celt_sig * OPUS_RESTRICT freq, celt_norm * OPUS_RESTRICT X, const celt_ener *bandE, int end, int C, int M)
126
{
127
   int i, c, N;
128
   const opus_int16 *eBands = m->eBands;
129
   N = M*m->shortMdctSize;
130
   c=0; do {
131
      i=0; do {
132
         int j,shift;
133
         opus_val32 E;
134
         opus_val32 g;
135
         E = bandE[i+c*m->nbEBands];
136
         /* For very low energies, we need this to make sure not to prevent energy rounding from
137
            blowing up the normalized signal. */
138
         if (E < 10) E += EPSILON;
139
         shift = 30-celt_zlog2(E);
140
         E = SHL32(E, shift);
141
         g = celt_rcp_norm32(E);
142
         j=M*eBands[i]; do {
143
            X[j+c*N] = PSHR32(MULT32_32_Q31(g, SHL32(freq[j+c*N], shift)), 30-NORM_SHIFT);
144
         } while (++j<M*eBands[i+1]);
145
      } while (++i<end);
146
   } while (++c<C);
147
}
148
149
#else /* FIXED_POINT */
150
/* Compute the amplitude (sqrt energy) in each of the bands */
151
void compute_band_energies(const CELTMode *m, const celt_sig *X, celt_ener *bandE, int end, int C, int LM, int arch)
152
55.3M
{
153
55.3M
   int i, c, N;
154
55.3M
   const opus_int16 *eBands = m->eBands;
155
55.3M
   N = m->shortMdctSize<<LM;
156
72.0M
   c=0; do {
157
1.21G
      for (i=0;i<end;i++)
158
1.14G
      {
159
1.14G
         opus_val32 sum;
160
1.14G
         sum = 1e-27f + celt_inner_prod(&X[c*N+(eBands[i]<<LM)], &X[c*N+(eBands[i]<<LM)], (eBands[i+1]-eBands[i])<<LM, arch);
161
1.14G
         bandE[i+c*m->nbEBands] = celt_sqrt(sum);
162
         /*printf ("%f ", bandE[i+c*m->nbEBands]);*/
163
1.14G
      }
164
72.0M
   } while (++c<C);
165
   /*printf ("\n");*/
166
55.3M
}
167
168
/* Normalise each band such that the energy is one. */
169
void normalise_bands(const CELTMode *m, const celt_sig * OPUS_RESTRICT freq, celt_norm * OPUS_RESTRICT X, const celt_ener *bandE, int end, int C, int M)
170
55.1M
{
171
55.1M
   int i, c, N;
172
55.1M
   const opus_int16 *eBands = m->eBands;
173
55.1M
   N = M*m->shortMdctSize;
174
71.8M
   c=0; do {
175
1.21G
      for (i=0;i<end;i++)
176
1.13G
      {
177
1.13G
         int j;
178
1.13G
         opus_val16 g = 1.f/(1e-27f+bandE[i+c*m->nbEBands]);
179
8.83G
         for (j=M*eBands[i];j<M*eBands[i+1];j++)
180
7.69G
            X[j+c*N] = freq[j+c*N]*g;
181
1.13G
      }
182
71.8M
   } while (++c<C);
183
55.1M
}
184
185
#endif /* FIXED_POINT */
186
187
/* De-normalise the energy to produce the synthesis from the unit-energy bands */
188
void denormalise_bands(const CELTMode *m, const celt_norm * OPUS_RESTRICT X,
189
      celt_sig * OPUS_RESTRICT freq, const celt_glog *bandLogE, int start,
190
      int end, int M, int downsample, int silence)
191
0
{
192
0
   int i, N;
193
0
   int bound;
194
0
   celt_sig * OPUS_RESTRICT f;
195
0
   const celt_norm * OPUS_RESTRICT x;
196
0
   const opus_int16 *eBands = m->eBands;
197
0
   N = M*m->shortMdctSize;
198
0
   bound = M*eBands[end];
199
0
   if (downsample!=1)
200
0
      bound = IMIN(bound, N/downsample);
201
0
   if (silence)
202
0
   {
203
0
      bound = 0;
204
0
      start = end = 0;
205
0
   }
206
0
   f = freq;
207
0
   x = X+M*eBands[start];
208
0
   if (start != 0)
209
0
   {
210
0
      for (i=0;i<M*eBands[start];i++)
211
0
         *f++ = 0;
212
0
   } else {
213
0
      f += M*eBands[start];
214
0
   }
215
0
   for (i=start;i<end;i++)
216
0
   {
217
0
      int j, band_end;
218
0
      opus_val32 g;
219
0
      celt_glog lg;
220
#ifdef FIXED_POINT
221
      int shift;
222
#endif
223
0
      j=M*eBands[i];
224
0
      band_end = M*eBands[i+1];
225
0
      lg = ADD32(bandLogE[i], SHL32((opus_val32)eMeans[i],DB_SHIFT-4));
226
0
#ifndef FIXED_POINT
227
0
      g = celt_exp2_db(MIN32(32.f, lg));
228
#else
229
      /* Handle the integer part of the log energy */
230
      shift = 17-(lg>>DB_SHIFT);
231
      if (shift>=31)
232
      {
233
         shift=0;
234
         g=0;
235
      } else {
236
         /* Handle the fractional part. */
237
         g = SHL32(celt_exp2_db_frac((lg&((1<<DB_SHIFT)-1))), 2);
238
      }
239
      /* Handle extreme gains with negative shift. */
240
      if (shift<0)
241
      {
242
         /* To avoid overflow, we're
243
            capping the gain here, which is equivalent to a cap of 18 on lg.
244
            This shouldn't trigger unless the bitstream is already corrupted. */
245
         g = 2147483647;
246
         shift = 0;
247
      }
248
#endif
249
0
      do {
250
0
         *f++ = PSHR32(MULT32_32_Q31(SHL32(*x, 30-NORM_SHIFT), g), shift);
251
0
         x++;
252
0
      } while (++j<band_end);
253
0
   }
254
0
   celt_assert(start <= end);
255
0
   OPUS_CLEAR(&freq[bound], N-bound);
256
0
}
257
258
/* This prevents energy collapse for transients with multiple short MDCTs */
259
void anti_collapse(const CELTMode *m, celt_norm *X_, unsigned char *collapse_masks, int LM, int C, int size,
260
      int start, int end, const celt_glog *logE, const celt_glog *prev1logE,
261
      const celt_glog *prev2logE, const int *pulses, opus_uint32 seed, int encode, int arch)
262
0
{
263
0
   int c, i, j, k;
264
0
   for (i=start;i<end;i++)
265
0
   {
266
0
      int N0;
267
0
      opus_val16 thresh, sqrt_1;
268
0
      int depth;
269
#ifdef FIXED_POINT
270
      int shift;
271
      opus_val32 thresh32;
272
#endif
273
274
0
      N0 = m->eBands[i+1]-m->eBands[i];
275
      /* depth in 1/8 bits */
276
0
      celt_sig_assert(pulses[i]>=0);
277
0
      depth = celt_udiv(1+pulses[i], (m->eBands[i+1]-m->eBands[i]))>>LM;
278
279
#ifdef FIXED_POINT
280
      thresh32 = SHR32(celt_exp2(-SHL16(depth, 10-BITRES)),1);
281
      thresh = MULT16_32_Q15(QCONST16(0.5f, 15), MIN32(32767,thresh32));
282
      {
283
         opus_val32 t;
284
         t = N0<<LM;
285
         shift = celt_ilog2(t)>>1;
286
         t = SHL32(t, (7-shift)<<1);
287
         sqrt_1 = celt_rsqrt_norm(t);
288
      }
289
#else
290
0
      thresh = .5f*celt_exp2(-.125f*depth);
291
0
      sqrt_1 = celt_rsqrt(N0<<LM);
292
0
#endif
293
294
0
      c=0; do
295
0
      {
296
0
         celt_norm *X;
297
0
         celt_glog prev1;
298
0
         celt_glog prev2;
299
0
         opus_val32 Ediff;
300
0
         celt_norm r;
301
0
         int renormalize=0;
302
0
         prev1 = prev1logE[c*m->nbEBands+i];
303
0
         prev2 = prev2logE[c*m->nbEBands+i];
304
0
         if (!encode && C==1)
305
0
         {
306
0
            prev1 = MAXG(prev1,prev1logE[m->nbEBands+i]);
307
0
            prev2 = MAXG(prev2,prev2logE[m->nbEBands+i]);
308
0
         }
309
0
         Ediff = logE[c*m->nbEBands+i]-MING(prev1,prev2);
310
0
         Ediff = MAX32(0, Ediff);
311
312
#ifdef FIXED_POINT
313
         if (Ediff < GCONST(16.f))
314
         {
315
            opus_val32 r32 = SHR32(celt_exp2_db(-Ediff),1);
316
            r = 2*MIN16(16383,r32);
317
         } else {
318
            r = 0;
319
         }
320
         if (LM==3)
321
            r = MULT16_16_Q14(23170, MIN32(23169, r));
322
         r = SHR16(MIN16(thresh, r),1);
323
         r = VSHR32(MULT16_16_Q15(sqrt_1, r),shift+14-NORM_SHIFT);
324
#else
325
         /* r needs to be multiplied by 2 or 2*sqrt(2) depending on LM because
326
            short blocks don't have the same energy as long */
327
0
         r = 2.f*celt_exp2_db(-Ediff);
328
0
         if (LM==3)
329
0
            r *= 1.41421356f;
330
0
         r = MIN16(thresh, r);
331
0
         r = r*sqrt_1;
332
0
#endif
333
0
         X = X_+c*size+(m->eBands[i]<<LM);
334
0
         for (k=0;k<1<<LM;k++)
335
0
         {
336
            /* Detect collapse */
337
0
            if (!(collapse_masks[i*C+c]&1<<k))
338
0
            {
339
               /* Fill with noise */
340
0
               for (j=0;j<N0;j++)
341
0
               {
342
0
                  seed = celt_lcg_rand(seed);
343
0
                  X[(j<<LM)+k] = (seed&0x8000 ? r : -r);
344
0
               }
345
0
               renormalize = 1;
346
0
            }
347
0
         }
348
         /* We just added some energy, so we need to renormalise */
349
0
         if (renormalize)
350
0
            renormalise_vector(X, N0<<LM, Q31ONE, arch);
351
0
      } while (++c<C);
352
0
   }
353
0
}
354
355
/* Compute the weights to use for optimizing normalized distortion across
356
   channels. We use the amplitude to weight square distortion, which means
357
   that we use the square root of the value we would have been using if we
358
   wanted to minimize the MSE in the non-normalized domain. This roughly
359
   corresponds to some quick-and-dirty perceptual experiments I ran to
360
   measure inter-aural masking (there doesn't seem to be any published data
361
   on the topic). */
362
static void compute_channel_weights(celt_ener Ex, celt_ener Ey, opus_val16 w[2])
363
2.15M
{
364
2.15M
   celt_ener minE;
365
#ifdef FIXED_POINT
366
   int shift;
367
#endif
368
2.15M
   minE = MIN32(Ex, Ey);
369
   /* Adjustment to make the weights a bit more conservative. */
370
2.15M
   Ex = ADD32(Ex, minE/3);
371
2.15M
   Ey = ADD32(Ey, minE/3);
372
#ifdef FIXED_POINT
373
   shift = celt_ilog2(EPSILON+MAX32(Ex, Ey))-14;
374
#endif
375
2.15M
   w[0] = VSHR32(Ex, shift);
376
2.15M
   w[1] = VSHR32(Ey, shift);
377
2.15M
}
378
379
static void intensity_stereo(const CELTMode *m, celt_norm * OPUS_RESTRICT X, const celt_norm * OPUS_RESTRICT Y, const celt_ener *bandE, int bandID, int N)
380
189M
{
381
189M
   int i = bandID;
382
189M
   int j;
383
189M
   opus_val16 a1, a2;
384
189M
   opus_val16 left, right;
385
189M
   opus_val16 norm;
386
#ifdef FIXED_POINT
387
   int shift = celt_zlog2(MAX32(bandE[i], bandE[i+m->nbEBands]))-13;
388
#endif
389
189M
   left = VSHR32(bandE[i],shift);
390
189M
   right = VSHR32(bandE[i+m->nbEBands],shift);
391
189M
   norm = EPSILON + celt_sqrt(EPSILON+MULT16_16(left,left)+MULT16_16(right,right));
392
#ifdef FIXED_POINT
393
   left = MIN32(left, norm-1);
394
   right = MIN32(right, norm-1);
395
#endif
396
189M
   a1 = DIV32_16(SHL32(EXTEND32(left),15),norm);
397
189M
   a2 = DIV32_16(SHL32(EXTEND32(right),15),norm);
398
1.91G
   for (j=0;j<N;j++)
399
1.72G
   {
400
1.72G
      X[j] = ADD32(MULT16_32_Q15(a1, X[j]), MULT16_32_Q15(a2, Y[j]));
401
      /* Side is not encoded, no need to calculate */
402
1.72G
   }
403
189M
}
404
405
static void stereo_split(celt_norm * OPUS_RESTRICT X, celt_norm * OPUS_RESTRICT Y, int N)
406
1.77M
{
407
1.77M
   int j;
408
22.4M
   for (j=0;j<N;j++)
409
20.6M
   {
410
20.6M
      opus_val32 r, l;
411
20.6M
      l = MULT32_32_Q31(QCONST32(.70710678f,31), X[j]);
412
20.6M
      r = MULT32_32_Q31(QCONST32(.70710678f,31), Y[j]);
413
20.6M
      X[j] = ADD32(l, r);
414
20.6M
      Y[j] = SUB32(r, l);
415
20.6M
   }
416
1.77M
}
417
418
static void stereo_merge(celt_norm * OPUS_RESTRICT X, celt_norm * OPUS_RESTRICT Y, opus_val32 mid, int N, int arch)
419
64.8M
{
420
64.8M
   int j;
421
64.8M
   opus_val32 xp=0, side=0;
422
64.8M
   opus_val32 El, Er;
423
#ifdef FIXED_POINT
424
   int kl, kr;
425
#endif
426
64.8M
   opus_val32 t, lgain, rgain;
427
428
   /* Compute the norm of X+Y and X-Y as |X|^2 + |Y|^2 +/- sum(xy) */
429
64.8M
   xp = celt_inner_prod_norm_shift(Y, X, N, arch);
430
64.8M
   side = celt_inner_prod_norm_shift(Y, Y, N, arch);
431
   /* Compensating for the mid normalization */
432
64.8M
   xp = MULT32_32_Q31(mid, xp);
433
   /* mid and side are in Q15, not Q14 like X and Y */
434
64.8M
   El = SHR32(MULT32_32_Q31(mid, mid),3) + side - 2*xp;
435
64.8M
   Er = SHR32(MULT32_32_Q31(mid, mid),3) + side + 2*xp;
436
64.8M
   if (Er < QCONST32(6e-4f, 28) || El < QCONST32(6e-4f, 28))
437
3.64k
   {
438
3.64k
      OPUS_COPY(Y, X, N);
439
3.64k
      return;
440
3.64k
   }
441
442
#ifdef FIXED_POINT
443
   kl = celt_ilog2(El)>>1;
444
   kr = celt_ilog2(Er)>>1;
445
#endif
446
64.8M
   t = VSHR32(El, (kl<<1)-29);
447
64.8M
   lgain = celt_rsqrt_norm32(t);
448
64.8M
   t = VSHR32(Er, (kr<<1)-29);
449
64.8M
   rgain = celt_rsqrt_norm32(t);
450
451
#ifdef FIXED_POINT
452
   if (kl < 7)
453
      kl = 7;
454
   if (kr < 7)
455
      kr = 7;
456
#endif
457
458
910M
   for (j=0;j<N;j++)
459
845M
   {
460
845M
      celt_norm r, l;
461
      /* Apply mid scaling (side is already scaled) */
462
845M
      l = MULT32_32_Q31(mid, X[j]);
463
845M
      r = Y[j];
464
845M
      X[j] = VSHR32(MULT32_32_Q31(lgain, SUB32(l,r)), kl-15);
465
845M
      Y[j] = VSHR32(MULT32_32_Q31(rgain, ADD32(l,r)), kr-15);
466
845M
   }
467
64.8M
}
468
469
/* Decide whether we should spread the pulses in the current frame */
470
int spreading_decision(const CELTMode *m, const celt_norm *X, int *average,
471
      int last_decision, int *hf_average, int *tapset_decision, int update_hf,
472
      int end, int C, int M, const int *spread_weight)
473
927k
{
474
927k
   int i, c, N0;
475
927k
   int sum = 0, nbBands=0;
476
927k
   const opus_int16 * OPUS_RESTRICT eBands = m->eBands;
477
927k
   int decision;
478
927k
   int hf_sum=0;
479
480
927k
   celt_assert(end>0);
481
482
927k
   N0 = M*m->shortMdctSize;
483
484
927k
   if (M*(eBands[end]-eBands[end-1]) <= 8)
485
405k
      return SPREAD_NONE;
486
661k
   c=0; do {
487
12.4M
      for (i=0;i<end;i++)
488
11.8M
      {
489
11.8M
         int j, N, tmp=0;
490
11.8M
         int tcount[3] = {0,0,0};
491
11.8M
         const celt_norm * OPUS_RESTRICT x = X+M*eBands[i]+c*N0;
492
11.8M
         N = M*(eBands[i+1]-eBands[i]);
493
11.8M
         if (N<=8)
494
8.81M
            continue;
495
         /* Compute rough CDF of |x[j]| */
496
92.5M
         for (j=0;j<N;j++)
497
89.5M
         {
498
89.5M
            opus_val32 x2N; /* Q13 */
499
500
89.5M
            x2N = MULT16_16(MULT16_16_Q15(SHR32(x[j], NORM_SHIFT-14), SHR32(x[j], NORM_SHIFT-14)), N);
501
89.5M
            if (x2N < QCONST16(0.25f,13))
502
36.0M
               tcount[0]++;
503
89.5M
            if (x2N < QCONST16(0.0625f,13))
504
20.0M
               tcount[1]++;
505
89.5M
            if (x2N < QCONST16(0.015625f,13))
506
10.8M
               tcount[2]++;
507
89.5M
         }
508
509
         /* Only include four last bands (8 kHz and up) */
510
2.98M
         if (i>m->nbEBands-4)
511
622k
            hf_sum += celt_udiv(32*(tcount[1]+tcount[0]), N);
512
2.98M
         tmp = (2*tcount[2] >= N) + (2*tcount[1] >= N) + (2*tcount[0] >= N);
513
2.98M
         sum += tmp*spread_weight[i];
514
2.98M
         nbBands+=spread_weight[i];
515
2.98M
      }
516
661k
   } while (++c<C);
517
518
521k
   if (update_hf)
519
24.8k
   {
520
24.8k
      if (hf_sum)
521
22.2k
         hf_sum = celt_udiv(hf_sum, C*(4-m->nbEBands+end));
522
24.8k
      *hf_average = (*hf_average+hf_sum)>>1;
523
24.8k
      hf_sum = *hf_average;
524
24.8k
      if (*tapset_decision==2)
525
5.78k
         hf_sum += 4;
526
19.0k
      else if (*tapset_decision==0)
527
17.3k
         hf_sum -= 4;
528
24.8k
      if (hf_sum > 22)
529
5.79k
         *tapset_decision=2;
530
19.0k
      else if (hf_sum > 18)
531
1.70k
         *tapset_decision=1;
532
17.3k
      else
533
17.3k
         *tapset_decision=0;
534
24.8k
   }
535
   /*printf("%d %d %d\n", hf_sum, *hf_average, *tapset_decision);*/
536
521k
   celt_assert(nbBands>0); /* end has to be non-zero */
537
521k
   celt_assert(sum>=0);
538
521k
   sum = celt_udiv((opus_int32)sum<<8, nbBands);
539
   /* Recursive averaging */
540
521k
   sum = (sum+*average)>>1;
541
521k
   *average = sum;
542
   /* Hysteresis */
543
521k
   sum = (3*sum + (((3-last_decision)<<7) + 64) + 2)>>2;
544
521k
   if (sum < 80)
545
310k
   {
546
310k
      decision = SPREAD_AGGRESSIVE;
547
310k
   } else if (sum < 256)
548
173k
   {
549
173k
      decision = SPREAD_NORMAL;
550
173k
   } else if (sum < 384)
551
22.4k
   {
552
22.4k
      decision = SPREAD_LIGHT;
553
22.4k
   } else {
554
15.0k
      decision = SPREAD_NONE;
555
15.0k
   }
556
#ifdef FUZZING
557
   decision = rand()&0x3;
558
   *tapset_decision=rand()%3;
559
#endif
560
521k
   return decision;
561
521k
}
562
563
/* Indexing table for converting from natural Hadamard to ordery Hadamard
564
   This is essentially a bit-reversed Gray, on top of which we've added
565
   an inversion of the order because we want the DC at the end rather than
566
   the beginning. The lines are for N=2, 4, 8, 16 */
567
static const int ordery_table[] = {
568
       1,  0,
569
       3,  0,  2,  1,
570
       7,  0,  4,  3,  6,  1,  5,  2,
571
      15,  0,  8,  7, 12,  3, 11,  4, 14,  1,  9,  6, 13,  2, 10,  5,
572
};
573
574
static void deinterleave_hadamard(celt_norm *X, int N0, int stride, int hadamard)
575
8.11M
{
576
8.11M
   int i,j;
577
8.11M
   VARDECL(celt_norm, tmp);
578
8.11M
   int N;
579
8.11M
   SAVE_STACK;
580
8.11M
   N = N0*stride;
581
8.11M
   ALLOC(tmp, N, celt_norm);
582
8.11M
   celt_assert(stride>0);
583
8.11M
   if (hadamard)
584
1.66M
   {
585
1.66M
      const int *ordery = ordery_table+stride-2;
586
8.24M
      for (i=0;i<stride;i++)
587
6.57M
      {
588
36.7M
         for (j=0;j<N0;j++)
589
30.1M
            tmp[ordery[i]*N0+j] = X[j*stride+i];
590
6.57M
      }
591
6.44M
   } else {
592
54.0M
      for (i=0;i<stride;i++)
593
170M
         for (j=0;j<N0;j++)
594
122M
            tmp[i*N0+j] = X[j*stride+i];
595
6.44M
   }
596
8.11M
   OPUS_COPY(X, tmp, N);
597
8.11M
   RESTORE_STACK;
598
8.11M
}
599
600
static void interleave_hadamard(celt_norm *X, int N0, int stride, int hadamard)
601
1.55M
{
602
1.55M
   int i,j;
603
1.55M
   VARDECL(celt_norm, tmp);
604
1.55M
   int N;
605
1.55M
   SAVE_STACK;
606
1.55M
   N = N0*stride;
607
1.55M
   ALLOC(tmp, N, celt_norm);
608
1.55M
   if (hadamard)
609
594k
   {
610
594k
      const int *ordery = ordery_table+stride-2;
611
2.97M
      for (i=0;i<stride;i++)
612
12.1M
         for (j=0;j<N0;j++)
613
9.76M
            tmp[j*stride+i] = X[ordery[i]*N0+j];
614
956k
   } else {
615
9.84M
      for (i=0;i<stride;i++)
616
27.6M
         for (j=0;j<N0;j++)
617
18.7M
            tmp[j*stride+i] = X[i*N0+j];
618
956k
   }
619
1.55M
   OPUS_COPY(X, tmp, N);
620
1.55M
   RESTORE_STACK;
621
1.55M
}
622
623
void haar1(celt_norm *X, int N0, int stride)
624
398M
{
625
398M
   int i, j;
626
398M
   N0 >>= 1;
627
1.27G
   for (i=0;i<stride;i++)
628
3.84G
      for (j=0;j<N0;j++)
629
2.97G
      {
630
2.97G
         opus_val32 tmp1, tmp2;
631
2.97G
         tmp1 = MULT32_32_Q31(QCONST32(.70710678f,31), X[stride*2*j+i]);
632
2.97G
         tmp2 = MULT32_32_Q31(QCONST32(.70710678f,31), X[stride*(2*j+1)+i]);
633
2.97G
         X[stride*2*j+i] = ADD32(tmp1, tmp2);
634
2.97G
         X[stride*(2*j+1)+i] = SUB32(tmp1, tmp2);
635
2.97G
      }
636
398M
}
637
638
static int compute_qn(int N, int b, int offset, int pulse_cap, int stereo)
639
213M
{
640
213M
   static const opus_int16 exp2_table8[8] =
641
213M
      {16384, 17866, 19483, 21247, 23170, 25267, 27554, 30048};
642
213M
   int qn, qb;
643
213M
   int N2 = 2*N-1;
644
213M
   if (stereo && N==2)
645
62.1M
      N2--;
646
   /* The upper limit ensures that in a stereo split with itheta==16384, we'll
647
       always have enough bits left over to code at least one pulse in the
648
       side; otherwise it would collapse, since it doesn't get folded. */
649
213M
   qb = celt_sudiv(b+N2*offset, N2);
650
213M
   qb = IMIN(b-pulse_cap-(4<<BITRES), qb);
651
652
213M
   qb = IMIN(8<<BITRES, qb);
653
654
213M
   if (qb<(1<<BITRES>>1)) {
655
187M
      qn = 1;
656
187M
   } else {
657
26.1M
      qn = exp2_table8[qb&0x7]>>(14-(qb>>BITRES));
658
26.1M
      qn = (qn+1)>>1<<1;
659
26.1M
   }
660
213M
   celt_assert(qn <= 256);
661
213M
   return qn;
662
213M
}
663
664
struct band_ctx {
665
   int encode;
666
   int resynth;
667
   const CELTMode *m;
668
   int i;
669
   int intensity;
670
   int spread;
671
   int tf_change;
672
   ec_ctx *ec;
673
   opus_int32 remaining_bits;
674
   const celt_ener *bandE;
675
   opus_uint32 seed;
676
   int arch;
677
   int theta_round;
678
   int disable_inv;
679
   int avoid_split_noise;
680
#ifdef ENABLE_QEXT
681
   ec_ctx *ext_ec;
682
   int extra_bits;
683
   opus_int32 ext_total_bits;
684
   int extra_bands;
685
#endif
686
};
687
688
struct split_ctx {
689
   int inv;
690
   int imid;
691
   int iside;
692
   int delta;
693
   int itheta;
694
#ifdef ENABLE_QEXT
695
   int itheta_q30;
696
#endif
697
   int qalloc;
698
};
699
700
static void compute_theta(struct band_ctx *ctx, struct split_ctx *sctx,
701
      celt_norm *X, celt_norm *Y, int N, int *b, int B, int B0,
702
      int LM,
703
      int stereo, int *fill ARG_QEXT(int *ext_b))
704
213M
{
705
213M
   int qn;
706
213M
   int itheta=0;
707
213M
   int itheta_q30=0;
708
213M
   int delta;
709
213M
   int imid, iside;
710
213M
   int qalloc;
711
213M
   int pulse_cap;
712
213M
   int offset;
713
213M
   opus_int32 tell;
714
213M
   int inv=0;
715
213M
   int encode;
716
213M
   const CELTMode *m;
717
213M
   int i;
718
213M
   int intensity;
719
213M
   ec_ctx *ec;
720
213M
   const celt_ener *bandE;
721
722
213M
   encode = ctx->encode;
723
213M
   m = ctx->m;
724
213M
   i = ctx->i;
725
213M
   intensity = ctx->intensity;
726
213M
   ec = ctx->ec;
727
213M
   bandE = ctx->bandE;
728
729
   /* Decide on the resolution to give to the split parameter theta */
730
213M
   pulse_cap = m->logN[i]+LM*(1<<BITRES);
731
213M
   offset = (pulse_cap>>1) - (stereo&&N==2 ? QTHETA_OFFSET_TWOPHASE : QTHETA_OFFSET);
732
213M
   qn = compute_qn(N, *b, offset, pulse_cap, stereo);
733
213M
   if (stereo && i>=intensity)
734
188M
      qn = 1;
735
213M
   if (encode)
736
213M
   {
737
      /* theta is the atan() of the ratio between the (normalized)
738
         side and mid. With just that parameter, we can re-scale both
739
         mid and side because we know that 1) they have unit norm and
740
         2) they are orthogonal. */
741
213M
      itheta_q30 = stereo_itheta(X, Y, stereo, N, ctx->arch);
742
213M
      itheta = itheta_q30>>16;
743
213M
   }
744
213M
   tell = ec_tell_frac(ec);
745
213M
   if (qn!=1)
746
25.0M
   {
747
25.0M
      if (encode)
748
25.0M
      {
749
25.0M
         if (!stereo || ctx->theta_round == 0)
750
23.1M
         {
751
23.1M
            itheta = (itheta*(opus_int32)qn+8192)>>14;
752
23.1M
            if (!stereo && ctx->avoid_split_noise && itheta > 0 && itheta < qn)
753
280k
            {
754
               /* Check if the selected value of theta will cause the bit allocation
755
                  to inject noise on one side. If so, make sure the energy of that side
756
                  is zero. */
757
280k
               int unquantized = celt_udiv((opus_int32)itheta*16384, qn);
758
280k
               imid = bitexact_cos((opus_int16)unquantized);
759
280k
               iside = bitexact_cos((opus_int16)(16384-unquantized));
760
280k
               delta = FRAC_MUL16((N-1)<<7,bitexact_log2tan(iside,imid));
761
280k
               if (delta > *b)
762
911
                  itheta = qn;
763
279k
               else if (delta < -*b)
764
1.36k
                  itheta = 0;
765
280k
            }
766
23.1M
         } else {
767
1.93M
            int down;
768
            /* Bias quantization towards itheta=0 and itheta=16384. */
769
1.93M
            int bias = itheta > 8192 ? 32767/qn : -32767/qn;
770
1.93M
            down = IMIN(qn-1, IMAX(0, (itheta*(opus_int32)qn + bias)>>14));
771
1.93M
            if (ctx->theta_round < 0)
772
965k
               itheta = down;
773
965k
            else
774
965k
               itheta = down+1;
775
1.93M
         }
776
25.0M
      }
777
      /* Entropy coding of the angle. We use a uniform pdf for the
778
         time split, a step for stereo, and a triangular one for the rest. */
779
25.0M
      if (stereo && N>2)
780
2.26M
      {
781
2.26M
         int p0 = 3;
782
2.26M
         int x = itheta;
783
2.26M
         int x0 = qn/2;
784
2.26M
         int ft = p0*(x0+1) + x0;
785
         /* Use a probability of p0 up to itheta=8192 and then use 1 after */
786
2.26M
         if (encode)
787
2.26M
         {
788
2.26M
            ec_encode(ec,x<=x0?p0*x:(x-1-x0)+(x0+1)*p0,x<=x0?p0*(x+1):(x-x0)+(x0+1)*p0,ft);
789
2.26M
         } else {
790
0
            int fs;
791
0
            fs=ec_decode(ec,ft);
792
0
            if (fs<(x0+1)*p0)
793
0
               x=fs/p0;
794
0
            else
795
0
               x=x0+1+(fs-(x0+1)*p0);
796
0
            ec_dec_update(ec,x<=x0?p0*x:(x-1-x0)+(x0+1)*p0,x<=x0?p0*(x+1):(x-x0)+(x0+1)*p0,ft);
797
0
            itheta = x;
798
0
         }
799
22.7M
      } else if (B0>1 || stereo) {
800
         /* Uniform pdf */
801
4.45M
         if (encode)
802
4.45M
            ec_enc_uint(ec, itheta, qn+1);
803
0
         else
804
0
            itheta = ec_dec_uint(ec, qn+1);
805
18.3M
      } else {
806
18.3M
         int fs=1, ft;
807
18.3M
         ft = ((qn>>1)+1)*((qn>>1)+1);
808
18.3M
         if (encode)
809
18.3M
         {
810
18.3M
            int fl;
811
812
18.3M
            fs = itheta <= (qn>>1) ? itheta + 1 : qn + 1 - itheta;
813
18.3M
            fl = itheta <= (qn>>1) ? itheta*(itheta + 1)>>1 :
814
18.3M
             ft - ((qn + 1 - itheta)*(qn + 2 - itheta)>>1);
815
816
18.3M
            ec_encode(ec, fl, fl+fs, ft);
817
18.3M
         } else {
818
            /* Triangular pdf */
819
0
            int fl=0;
820
0
            int fm;
821
0
            fm = ec_decode(ec, ft);
822
823
0
            if (fm < ((qn>>1)*((qn>>1) + 1)>>1))
824
0
            {
825
0
               itheta = (isqrt32(8*(opus_uint32)fm + 1) - 1)>>1;
826
0
               fs = itheta + 1;
827
0
               fl = itheta*(itheta + 1)>>1;
828
0
            }
829
0
            else
830
0
            {
831
0
               itheta = (2*(qn + 1)
832
0
                - isqrt32(8*(opus_uint32)(ft - fm - 1) + 1))>>1;
833
0
               fs = qn + 1 - itheta;
834
0
               fl = ft - ((qn + 1 - itheta)*(qn + 2 - itheta)>>1);
835
0
            }
836
837
0
            ec_dec_update(ec, fl, fl+fs, ft);
838
0
         }
839
18.3M
      }
840
25.0M
      celt_assert(itheta>=0);
841
25.0M
      itheta = celt_udiv((opus_int32)itheta*16384, qn);
842
#ifdef ENABLE_QEXT
843
      *ext_b = IMIN(*ext_b, ctx->ext_total_bits - (opus_int32)ec_tell_frac(ctx->ext_ec));
844
      if (*ext_b >= 2*N<<BITRES && ctx->ext_total_bits-ec_tell_frac(ctx->ext_ec)-1 > 2<<BITRES) {
845
         int extra_bits;
846
         int ext_tell = ec_tell_frac(ctx->ext_ec);
847
         extra_bits = IMIN(12, IMAX(2, celt_sudiv(*ext_b, (2*N-1)<<BITRES)));
848
         if (encode) {
849
            itheta_q30 = itheta_q30 - (itheta<<16);
850
            itheta_q30 = (itheta_q30*(opus_int64)qn*((1<<extra_bits)-1)+(1<<29))>>30;
851
            itheta_q30 += (1<<(extra_bits-1))-1;
852
            itheta_q30 = IMAX(0, IMIN((1<<extra_bits)-2, itheta_q30));
853
            ec_enc_uint(ctx->ext_ec, itheta_q30, (1<<extra_bits)-1);
854
         } else {
855
            itheta_q30 = ec_dec_uint(ctx->ext_ec, (1<<extra_bits)-1);
856
         }
857
         itheta_q30 -= (1<<(extra_bits-1))-1;
858
         itheta_q30 = (itheta<<16) + itheta_q30*(opus_int64)(1<<30)/(qn*((1<<extra_bits)-1));
859
         /* Hard bounds on itheta (can only trigger on corrupted bitstreams). */
860
         itheta_q30 = IMAX(0, IMIN(itheta_q30, 1073741824));
861
         *ext_b -= ec_tell_frac(ctx->ext_ec) - ext_tell;
862
      } else {
863
         itheta_q30 = (opus_int32)itheta<<16;
864
      }
865
#endif
866
25.0M
      if (encode && stereo)
867
2.78M
      {
868
2.78M
         if (itheta==0)
869
1.01M
            intensity_stereo(m, X, Y, bandE, i, N);
870
1.77M
         else
871
1.77M
            stereo_split(X, Y, N);
872
2.78M
      }
873
      /* NOTE: Renormalising X and Y *may* help fixed-point a bit at very high rate.
874
               Let's do that at higher complexity */
875
188M
   } else if (stereo) {
876
188M
      if (encode)
877
188M
      {
878
188M
         inv = itheta > 8192 && !ctx->disable_inv;
879
188M
         if (inv)
880
252k
         {
881
252k
            int j;
882
5.28M
            for (j=0;j<N;j++)
883
5.03M
               Y[j] = -Y[j];
884
252k
         }
885
188M
         intensity_stereo(m, X, Y, bandE, i, N);
886
188M
      }
887
188M
      if (*b>2<<BITRES && ctx->remaining_bits > 2<<BITRES)
888
2.03M
      {
889
2.03M
         if (encode)
890
2.03M
            ec_enc_bit_logp(ec, inv, 2);
891
0
         else
892
0
            inv = ec_dec_bit_logp(ec, 2);
893
2.03M
      } else
894
186M
         inv = 0;
895
      /* inv flag override to avoid problems with downmixing. */
896
188M
      if (ctx->disable_inv)
897
89.3M
         inv = 0;
898
188M
      itheta = 0;
899
188M
      itheta_q30 = 0;
900
188M
   }
901
213M
   qalloc = ec_tell_frac(ec) - tell;
902
213M
   *b -= qalloc;
903
904
213M
   if (itheta == 0)
905
204M
   {
906
204M
      imid = 32767;
907
204M
      iside = 0;
908
204M
      *fill &= (1<<B)-1;
909
204M
      delta = -16384;
910
204M
   } else if (itheta == 16384)
911
464k
   {
912
464k
      imid = 0;
913
464k
      iside = 32767;
914
464k
      *fill &= ((1<<B)-1)<<B;
915
464k
      delta = 16384;
916
8.75M
   } else {
917
8.75M
      imid = bitexact_cos((opus_int16)itheta);
918
8.75M
      iside = bitexact_cos((opus_int16)(16384-itheta));
919
      /* This is the mid vs side allocation that minimizes squared error
920
         in that band. */
921
8.75M
      delta = FRAC_MUL16((N-1)<<7,bitexact_log2tan(iside,imid));
922
8.75M
   }
923
924
213M
   sctx->inv = inv;
925
213M
   sctx->imid = imid;
926
213M
   sctx->iside = iside;
927
213M
   sctx->delta = delta;
928
213M
   sctx->itheta = itheta;
929
#ifdef ENABLE_QEXT
930
   sctx->itheta_q30 = itheta_q30;
931
#endif
932
213M
   sctx->qalloc = qalloc;
933
213M
}
934
static unsigned quant_band_n1(struct band_ctx *ctx, celt_norm *X, celt_norm *Y,
935
      celt_norm *lowband_out)
936
260M
{
937
260M
   int c;
938
260M
   int stereo;
939
260M
   celt_norm *x = X;
940
260M
   int encode;
941
260M
   ec_ctx *ec;
942
943
260M
   encode = ctx->encode;
944
260M
   ec = ctx->ec;
945
946
260M
   stereo = Y != NULL;
947
332M
   c=0; do {
948
332M
      int sign=0;
949
332M
      if (ctx->remaining_bits>=1<<BITRES)
950
13.1M
      {
951
13.1M
         if (encode)
952
13.1M
         {
953
13.1M
            sign = x[0]<0;
954
13.1M
            ec_enc_bits(ec, sign, 1);
955
13.1M
         } else {
956
0
            sign = ec_dec_bits(ec, 1);
957
0
         }
958
13.1M
         ctx->remaining_bits -= 1<<BITRES;
959
13.1M
      }
960
332M
      if (ctx->resynth)
961
52.2M
         x[0] = sign ? -NORM_SCALING : NORM_SCALING;
962
332M
      x = Y;
963
332M
   } while (++c<1+stereo);
964
260M
   if (lowband_out)
965
260M
      lowband_out[0] = SHR32(X[0],4);
966
260M
   return 1;
967
260M
}
968
969
/* This function is responsible for encoding and decoding a mono partition.
970
   It can split the band in two and transmit the energy difference with
971
   the two half-bands. It can be called recursively so bands can end up being
972
   split in 8 parts. */
973
static unsigned quant_partition(struct band_ctx *ctx, celt_norm *X,
974
      int N, int b, int B, celt_norm *lowband,
975
      int LM,
976
      opus_val32 gain, int fill
977
      ARG_QEXT(int ext_b))
978
756M
{
979
756M
   const unsigned char *cache;
980
756M
   int q;
981
756M
   int curr_bits;
982
756M
   int imid=0, iside=0;
983
756M
   int B0=B;
984
756M
   opus_val32 mid=0, side=0;
985
756M
   unsigned cm=0;
986
756M
   celt_norm *Y=NULL;
987
756M
   int encode;
988
756M
   const CELTMode *m;
989
756M
   int i;
990
756M
   int spread;
991
756M
   ec_ctx *ec;
992
993
756M
   encode = ctx->encode;
994
756M
   m = ctx->m;
995
756M
   i = ctx->i;
996
756M
   spread = ctx->spread;
997
756M
   ec = ctx->ec;
998
999
   /* If we need 1.5 more bit than we can produce, split the band in two. */
1000
756M
   cache = m->cache.bits + m->cache.index[(LM+1)*m->nbEBands+i];
1001
756M
   if (LM != -1 && b > cache[cache[0]]+12 && N>2)
1002
22.2M
   {
1003
22.2M
      int mbits, sbits, delta;
1004
22.2M
      int itheta;
1005
22.2M
      int qalloc;
1006
22.2M
      struct split_ctx sctx;
1007
22.2M
      celt_norm *next_lowband2=NULL;
1008
22.2M
      opus_int32 rebalance;
1009
1010
22.2M
      N >>= 1;
1011
22.2M
      Y = X+N;
1012
22.2M
      LM -= 1;
1013
22.2M
      if (B==1)
1014
18.3M
         fill = (fill&1)|(fill<<1);
1015
22.2M
      B = (B+1)>>1;
1016
1017
22.2M
      compute_theta(ctx, &sctx, X, Y, N, &b, B, B0, LM, 0, &fill ARG_QEXT(&ext_b));
1018
22.2M
      imid = sctx.imid;
1019
22.2M
      iside = sctx.iside;
1020
22.2M
      delta = sctx.delta;
1021
22.2M
      itheta = sctx.itheta;
1022
22.2M
      qalloc = sctx.qalloc;
1023
#ifdef FIXED_POINT
1024
# ifdef ENABLE_QEXT
1025
      (void)imid;
1026
      (void)iside;
1027
      mid = celt_cos_norm32(sctx.itheta_q30);
1028
      side = celt_cos_norm32((1<<30)-sctx.itheta_q30);
1029
# else
1030
      mid = SHL32(EXTEND32(imid), 16);
1031
      side = SHL32(EXTEND32(iside), 16);
1032
# endif
1033
#else
1034
# ifdef ENABLE_QEXT
1035
      (void)imid;
1036
      (void)iside;
1037
      mid = celt_cos_norm2(sctx.itheta_q30*(1.f/(1<<30)));
1038
      side = celt_cos_norm2(1.f-sctx.itheta_q30*(1.f/(1<<30)));
1039
# else
1040
22.2M
      mid = (1.f/32768)*imid;
1041
22.2M
      side = (1.f/32768)*iside;
1042
22.2M
# endif
1043
22.2M
#endif
1044
1045
      /* Give more bits to low-energy MDCTs than they would otherwise deserve */
1046
22.2M
      if (B0>1 && (itheta&0x3fff))
1047
3.19M
      {
1048
3.19M
         if (itheta > 8192)
1049
            /* Rough approximation for pre-echo masking */
1050
1.25M
            delta -= delta>>(4-LM);
1051
1.93M
         else
1052
            /* Corresponds to a forward-masking slope of 1.5 dB per 10 ms */
1053
1.93M
            delta = IMIN(0, delta + (N<<BITRES>>(5-LM)));
1054
3.19M
      }
1055
22.2M
      mbits = IMAX(0, IMIN(b, (b-delta)/2));
1056
22.2M
      sbits = b-mbits;
1057
22.2M
      ctx->remaining_bits -= qalloc;
1058
1059
22.2M
      if (lowband)
1060
837k
         next_lowband2 = lowband+N; /* >32-bit split case */
1061
1062
22.2M
      rebalance = ctx->remaining_bits;
1063
22.2M
      if (mbits >= sbits)
1064
18.5M
      {
1065
18.5M
         cm = quant_partition(ctx, X, N, mbits, B, lowband, LM,
1066
18.5M
               MULT32_32_Q31(gain,mid), fill ARG_QEXT(ext_b/2));
1067
18.5M
         rebalance = mbits - (rebalance-ctx->remaining_bits);
1068
18.5M
         if (rebalance > 3<<BITRES && itheta!=0)
1069
506k
            sbits += rebalance - (3<<BITRES);
1070
18.5M
         cm |= quant_partition(ctx, Y, N, sbits, B, next_lowband2, LM,
1071
18.5M
               MULT32_32_Q31(gain,side), fill>>B ARG_QEXT(ext_b/2))<<(B0>>1);
1072
18.5M
      } else {
1073
3.68M
         cm = quant_partition(ctx, Y, N, sbits, B, next_lowband2, LM,
1074
3.68M
               MULT32_32_Q31(gain,side), fill>>B ARG_QEXT(ext_b/2))<<(B0>>1);
1075
3.68M
         rebalance = sbits - (rebalance-ctx->remaining_bits);
1076
3.68M
         if (rebalance > 3<<BITRES && itheta!=16384)
1077
383k
            mbits += rebalance - (3<<BITRES);
1078
3.68M
         cm |= quant_partition(ctx, X, N, mbits, B, lowband, LM,
1079
3.68M
               MULT32_32_Q31(gain,mid), fill ARG_QEXT(ext_b/2));
1080
3.68M
      }
1081
734M
   } else {
1082
#ifdef ENABLE_QEXT
1083
      int extra_bits;
1084
      int ext_remaining_bits;
1085
      extra_bits = ext_b/(N-1)>>BITRES;
1086
      ext_remaining_bits = ctx->ext_total_bits-(opus_int32)ec_tell_frac(ctx->ext_ec);
1087
      if (ext_remaining_bits < ((extra_bits+1)*(N-1)+N)<<BITRES) {
1088
         extra_bits = (ext_remaining_bits-(N<<BITRES))/(N-1)>>BITRES;
1089
         extra_bits = IMAX(extra_bits-1, 0);
1090
      }
1091
      extra_bits = IMIN(12, extra_bits);
1092
#endif
1093
      /* This is the basic no-split case */
1094
734M
      q = bits2pulses(m, i, LM, b);
1095
734M
      curr_bits = pulses2bits(m, i, LM, q);
1096
734M
      ctx->remaining_bits -= curr_bits;
1097
1098
      /* Ensures we can never bust the budget */
1099
735M
      while (ctx->remaining_bits < 0 && q > 0)
1100
1.07M
      {
1101
1.07M
         ctx->remaining_bits += curr_bits;
1102
1.07M
         q--;
1103
1.07M
         curr_bits = pulses2bits(m, i, LM, q);
1104
1.07M
         ctx->remaining_bits -= curr_bits;
1105
1.07M
      }
1106
1107
734M
      if (q!=0)
1108
33.1M
      {
1109
33.1M
         int K = get_pulses(q);
1110
1111
         /* Finally do the actual quantization */
1112
33.1M
         if (encode)
1113
33.1M
         {
1114
33.1M
            cm = alg_quant(X, N, K, spread, B, ec, gain, ctx->resynth
1115
33.1M
                           ARG_QEXT(ctx->ext_ec) ARG_QEXT(extra_bits),
1116
33.1M
                           ctx->arch);
1117
33.1M
         } else {
1118
0
            cm = alg_unquant(X, N, K, spread, B, ec, gain
1119
0
                             ARG_QEXT(ctx->ext_ec) ARG_QEXT(extra_bits));
1120
0
         }
1121
#ifdef ENABLE_QEXT
1122
      } else if (ext_b > 2*N<<BITRES)
1123
      {
1124
         extra_bits = ext_b/(N-1)>>BITRES;
1125
         ext_remaining_bits = ctx->ext_total_bits-ec_tell_frac(ctx->ext_ec);
1126
         if (ext_remaining_bits < ((extra_bits+1)*(N-1)+N)<<BITRES) {
1127
            extra_bits = (ext_remaining_bits-(N<<BITRES))/(N-1)>>BITRES;
1128
            extra_bits = IMAX(extra_bits-1, 0);
1129
         }
1130
         extra_bits = IMIN(14, extra_bits);
1131
         if (encode) cm = cubic_quant(X, N, extra_bits, B, ctx->ext_ec, gain, ctx->resynth);
1132
         else cm = cubic_unquant(X, N, extra_bits, B, ctx->ext_ec, gain);
1133
#endif
1134
700M
      } else {
1135
         /* If there's no pulse, fill the band anyway */
1136
700M
         int j;
1137
700M
         if (ctx->resynth)
1138
150M
         {
1139
150M
            unsigned cm_mask;
1140
            /* B can be as large as 16, so this shift might overflow an int on a
1141
               16-bit platform; use a long to get defined behavior.*/
1142
150M
            cm_mask = (unsigned)(1UL<<B)-1;
1143
150M
            fill &= cm_mask;
1144
150M
            if (!fill)
1145
64.4M
            {
1146
64.4M
               OPUS_CLEAR(X, N);
1147
86.1M
            } else {
1148
86.1M
               if (lowband == NULL)
1149
3.75M
               {
1150
                  /* Noise */
1151
25.3M
                  for (j=0;j<N;j++)
1152
21.6M
                  {
1153
21.6M
                     ctx->seed = celt_lcg_rand(ctx->seed);
1154
21.6M
                     X[j] = SHL32((celt_norm)((opus_int32)ctx->seed>>20), NORM_SHIFT-14);
1155
21.6M
                  }
1156
3.75M
                  cm = cm_mask;
1157
82.4M
               } else {
1158
                  /* Folded spectrum */
1159
918M
                  for (j=0;j<N;j++)
1160
835M
                  {
1161
835M
                     opus_val16 tmp;
1162
835M
                     ctx->seed = celt_lcg_rand(ctx->seed);
1163
                     /* About 48 dB below the "normal" folding level */
1164
835M
                     tmp = QCONST16(1.0f/256, NORM_SHIFT-4);
1165
835M
                     tmp = (ctx->seed)&0x8000 ? tmp : -tmp;
1166
835M
                     X[j] = lowband[j]+tmp;
1167
835M
                  }
1168
82.4M
                  cm = fill;
1169
82.4M
               }
1170
86.1M
               renormalise_vector(X, N, gain, ctx->arch);
1171
86.1M
            }
1172
150M
         }
1173
700M
      }
1174
734M
   }
1175
1176
756M
   return cm;
1177
756M
}
1178
1179
#ifdef ENABLE_QEXT
1180
static unsigned cubic_quant_partition(struct band_ctx *ctx, celt_norm *X, int N, int b, int B, ec_ctx *ec, int LM, opus_val32 gain, int resynth, int encode)
1181
{
1182
   celt_assert(LM>=0);
1183
   ctx->remaining_bits = ctx->ec->storage*8*8 - ec_tell_frac(ctx->ec);
1184
   b = IMIN(b, ctx->remaining_bits);
1185
   /* As long as we have at least two bits of depth, split all the way to LM=0 (not -1 like PVQ). */
1186
   if (LM==0 || b<=2*N<<BITRES) {
1187
      int res, ret;
1188
      b = IMIN(b + ((N-1)<<BITRES)/2, ctx->remaining_bits);
1189
      /* Resolution left after taking into account coding the cube face. */
1190
      res = (b-(1<<BITRES)-ctx->m->logN[ctx->i]-(LM<<BITRES)-1)/(N-1)>>BITRES;
1191
      res = IMIN(14, IMAX(0, res));
1192
      if (encode) ret = cubic_quant(X, N, res, B, ec, gain, resynth);
1193
      else ret = cubic_unquant(X, N, res, B, ec, gain);
1194
      ctx->remaining_bits = ctx->ec->storage*8*8 - ec_tell_frac(ctx->ec);
1195
      return ret;
1196
   } else {
1197
      celt_norm *Y;
1198
      opus_int32 itheta_q30;
1199
      opus_val32 g1, g2;
1200
      opus_int32 theta_res;
1201
      opus_int32 qtheta;
1202
      int delta;
1203
      int b1, b2;
1204
      int cm;
1205
      int N0;
1206
      N0 = N;
1207
      N >>= 1;
1208
      Y = X+N;
1209
      LM -= 1;
1210
      B = (B+1)>>1;
1211
      theta_res = IMIN(16, (b>>BITRES)/(N0-1) + 1);
1212
      if (encode) {
1213
         itheta_q30 = stereo_itheta(X, Y, 0, N, ctx->arch);
1214
         qtheta = (itheta_q30+(1<<(29-theta_res)))>>(30-theta_res);
1215
         ec_enc_uint(ec, qtheta, (1<<theta_res)+1);
1216
      } else {
1217
         qtheta = ec_dec_uint(ec, (1<<theta_res)+1);
1218
      }
1219
      itheta_q30 = qtheta<<(30-theta_res);
1220
      b -= theta_res<<BITRES;
1221
      delta = (N0-1) * 23 * ((itheta_q30>>16)-8192) >> (17-BITRES);
1222
1223
#ifdef FIXED_POINT
1224
      g1 = celt_cos_norm32(itheta_q30);
1225
      g2 = celt_cos_norm32((1<<30)-itheta_q30);
1226
#else
1227
      g1 = celt_cos_norm2(itheta_q30*(1.f/(1<<30)));
1228
      g2 = celt_cos_norm2(1.f-itheta_q30*(1.f/(1<<30)));
1229
#endif
1230
      if (itheta_q30 == 0) {
1231
         b1=b;
1232
         b2=0;
1233
      } else if (itheta_q30==1073741824) {
1234
         b1=0;
1235
         b2=b;
1236
      } else {
1237
         b1 = IMIN(b, IMAX(0, (b-delta)/2));
1238
         b2 = b-b1;
1239
      }
1240
      cm  = cubic_quant_partition(ctx, X, N, b1, B, ec, LM, MULT32_32_Q31(gain, g1), resynth, encode);
1241
      cm |= cubic_quant_partition(ctx, Y, N, b2, B, ec, LM, MULT32_32_Q31(gain, g2), resynth, encode);
1242
      return cm;
1243
   }
1244
}
1245
#endif
1246
1247
/* This function is responsible for encoding and decoding a band for the mono case. */
1248
static unsigned quant_band(struct band_ctx *ctx, celt_norm *X,
1249
      int N, int b, int B, celt_norm *lowband,
1250
      int LM, celt_norm *lowband_out,
1251
      opus_val32 gain, celt_norm *lowband_scratch, int fill
1252
      ARG_QEXT(int ext_b))
1253
899M
{
1254
899M
   int N0=N;
1255
899M
   int N_B=N;
1256
899M
   int N_B0;
1257
899M
   int B0=B;
1258
899M
   int time_divide=0;
1259
899M
   int recombine=0;
1260
899M
   int longBlocks;
1261
899M
   unsigned cm=0;
1262
899M
   int k;
1263
899M
   int encode;
1264
899M
   int tf_change;
1265
1266
899M
   encode = ctx->encode;
1267
899M
   tf_change = ctx->tf_change;
1268
1269
899M
   longBlocks = B0==1;
1270
1271
899M
   N_B = celt_udiv(N_B, B);
1272
1273
   /* Special case for one sample */
1274
899M
   if (N==1)
1275
187M
   {
1276
187M
      return quant_band_n1(ctx, X, NULL, lowband_out);
1277
187M
   }
1278
1279
711M
   if (tf_change>0)
1280
1.46M
      recombine = tf_change;
1281
   /* Band recombining to increase frequency resolution */
1282
1283
711M
   if (lowband_scratch && lowband && (recombine || ((N_B&1) == 0 && tf_change<0) || B0>1))
1284
829k
   {
1285
829k
      OPUS_COPY(lowband_scratch, lowband, N);
1286
829k
      lowband = lowband_scratch;
1287
829k
   }
1288
1289
714M
   for (k=0;k<recombine;k++)
1290
2.80M
   {
1291
2.80M
      static const unsigned char bit_interleave_table[16]={
1292
2.80M
            0,1,1,1,2,3,3,3,2,3,3,3,2,3,3,3
1293
2.80M
      };
1294
2.80M
      if (encode)
1295
2.80M
         haar1(X, N>>k, 1<<k);
1296
2.80M
      if (lowband)
1297
282k
         haar1(lowband, N>>k, 1<<k);
1298
2.80M
      fill = bit_interleave_table[fill&0xF]|bit_interleave_table[fill>>4]<<2;
1299
2.80M
   }
1300
711M
   B>>=recombine;
1301
711M
   N_B<<=recombine;
1302
1303
   /* Increasing the time resolution */
1304
716M
   while ((N_B&1) == 0 && tf_change<0)
1305
4.74M
   {
1306
4.74M
      if (encode)
1307
4.74M
         haar1(X, N_B, B);
1308
4.74M
      if (lowband)
1309
703k
         haar1(lowband, N_B, B);
1310
4.74M
      fill |= fill<<B;
1311
4.74M
      B <<= 1;
1312
4.74M
      N_B >>= 1;
1313
4.74M
      time_divide++;
1314
4.74M
      tf_change++;
1315
4.74M
   }
1316
711M
   B0=B;
1317
711M
   N_B0 = N_B;
1318
1319
   /* Reorganize the samples in time order instead of frequency order */
1320
711M
   if (B0>1)
1321
7.36M
   {
1322
7.36M
      if (encode)
1323
7.36M
         deinterleave_hadamard(X, N_B>>recombine, B0<<recombine, longBlocks);
1324
7.36M
      if (lowband)
1325
747k
         deinterleave_hadamard(lowband, N_B>>recombine, B0<<recombine, longBlocks);
1326
7.36M
   }
1327
1328
#ifdef ENABLE_QEXT
1329
   if (ctx->extra_bands && b > (3*N<<BITRES)+(ctx->m->logN[ctx->i]+8+8*LM)) {
1330
      cm = cubic_quant_partition(ctx, X, N, b, B, ctx->ec, LM, gain, ctx->resynth, encode);
1331
   } else
1332
#endif
1333
711M
   {
1334
711M
      cm = quant_partition(ctx, X, N, b, B, lowband, LM, gain, fill ARG_QEXT(ext_b));
1335
711M
   }
1336
1337
   /* This code is used by the decoder and by the resynthesis-enabled encoder */
1338
711M
   if (ctx->resynth)
1339
153M
   {
1340
      /* Undo the sample reorganization going from time order to frequency order */
1341
153M
      if (B0>1)
1342
1.55M
         interleave_hadamard(X, N_B>>recombine, B0<<recombine, longBlocks);
1343
1344
      /* Undo time-freq changes that we did earlier */
1345
153M
      N_B = N_B0;
1346
153M
      B = B0;
1347
155M
      for (k=0;k<time_divide;k++)
1348
1.45M
      {
1349
1.45M
         B >>= 1;
1350
1.45M
         N_B <<= 1;
1351
1.45M
         cm |= cm>>B;
1352
1.45M
         haar1(X, N_B, B);
1353
1.45M
      }
1354
1355
154M
      for (k=0;k<recombine;k++)
1356
607k
      {
1357
607k
         static const unsigned char bit_deinterleave_table[16]={
1358
607k
               0x00,0x03,0x0C,0x0F,0x30,0x33,0x3C,0x3F,
1359
607k
               0xC0,0xC3,0xCC,0xCF,0xF0,0xF3,0xFC,0xFF
1360
607k
         };
1361
607k
         cm = bit_deinterleave_table[cm];
1362
607k
         haar1(X, N0>>k, 1<<k);
1363
607k
      }
1364
153M
      B<<=recombine;
1365
1366
      /* Scale output for later folding */
1367
153M
      if (lowband_out)
1368
82.0M
      {
1369
82.0M
         int j;
1370
82.0M
         opus_val16 n;
1371
82.0M
         n = celt_sqrt(SHL32(EXTEND32(N0),22));
1372
800M
         for (j=0;j<N0;j++)
1373
718M
            lowband_out[j] = MULT16_32_Q15(n,X[j]);
1374
82.0M
      }
1375
153M
      cm &= (1<<B)-1;
1376
153M
   }
1377
711M
   return cm;
1378
899M
}
1379
1380
#ifdef FIXED_POINT
1381
#define MIN_STEREO_ENERGY 2
1382
#else
1383
388M
#define MIN_STEREO_ENERGY 1e-10f
1384
#endif
1385
1386
/* This function is responsible for encoding and decoding a band for the stereo case. */
1387
static unsigned quant_band_stereo(struct band_ctx *ctx, celt_norm *X, celt_norm *Y,
1388
      int N, int b, int B, celt_norm *lowband,
1389
      int LM, celt_norm *lowband_out,
1390
      celt_norm *lowband_scratch, int fill
1391
      ARG_QEXT(int ext_b) ARG_QEXT(const int *cap))
1392
264M
{
1393
264M
   int imid=0, iside=0;
1394
264M
   int inv = 0;
1395
264M
   opus_val32 mid=0, side=0;
1396
264M
   unsigned cm=0;
1397
264M
   int mbits, sbits, delta;
1398
264M
   int itheta;
1399
264M
   int qalloc;
1400
264M
   struct split_ctx sctx;
1401
264M
   int orig_fill;
1402
264M
   int encode;
1403
264M
   ec_ctx *ec;
1404
1405
264M
   encode = ctx->encode;
1406
264M
   ec = ctx->ec;
1407
1408
   /* Special case for one sample */
1409
264M
   if (N==1)
1410
72.5M
   {
1411
72.5M
      return quant_band_n1(ctx, X, Y, lowband_out);
1412
72.5M
   }
1413
1414
191M
   orig_fill = fill;
1415
1416
191M
   if (encode) {
1417
191M
      if (ctx->bandE[ctx->i] < MIN_STEREO_ENERGY || ctx->bandE[ctx->m->nbEBands+ctx->i] < MIN_STEREO_ENERGY) {
1418
185M
         if (ctx->bandE[ctx->i] > ctx->bandE[ctx->m->nbEBands+ctx->i]) OPUS_COPY(Y, X, N);
1419
185M
         else OPUS_COPY(X, Y, N);
1420
185M
      }
1421
191M
   }
1422
191M
   compute_theta(ctx, &sctx, X, Y, N, &b, B, B, LM, 1, &fill ARG_QEXT(&ext_b));
1423
191M
   inv = sctx.inv;
1424
191M
   imid = sctx.imid;
1425
191M
   iside = sctx.iside;
1426
191M
   delta = sctx.delta;
1427
191M
   itheta = sctx.itheta;
1428
191M
   qalloc = sctx.qalloc;
1429
#ifdef FIXED_POINT
1430
# ifdef ENABLE_QEXT
1431
   (void)imid;
1432
   (void)iside;
1433
   mid = celt_cos_norm32(sctx.itheta_q30);
1434
   side = celt_cos_norm32((1<<30)-sctx.itheta_q30);
1435
# else
1436
   mid = SHL32(EXTEND32(imid), 16);
1437
   side = SHL32(EXTEND32(iside), 16);
1438
# endif
1439
#else
1440
# ifdef ENABLE_QEXT
1441
   (void)imid;
1442
   (void)iside;
1443
   mid = celt_cos_norm2(sctx.itheta_q30*(1.f/(1<<30)));
1444
   side = celt_cos_norm2(1.f-sctx.itheta_q30*(1.f/(1<<30)));
1445
# else
1446
191M
   mid = (1.f/32768)*imid;
1447
191M
   side = (1.f/32768)*iside;
1448
191M
# endif
1449
191M
#endif
1450
1451
   /* This is a special case for N=2 that only works for stereo and takes
1452
      advantage of the fact that mid and side are orthogonal to encode
1453
      the side with just one bit. */
1454
191M
   if (N==2)
1455
62.1M
   {
1456
62.1M
      int c;
1457
62.1M
      int sign=0;
1458
62.1M
      celt_norm *x2, *y2;
1459
62.1M
      mbits = b;
1460
62.1M
      sbits = 0;
1461
      /* Only need one bit for the side. */
1462
62.1M
      if (itheta != 0 && itheta != 16384)
1463
278k
         sbits = 1<<BITRES;
1464
62.1M
      mbits -= sbits;
1465
62.1M
      c = itheta > 8192;
1466
62.1M
      ctx->remaining_bits -= qalloc+sbits;
1467
1468
62.1M
      x2 = c ? Y : X;
1469
62.1M
      y2 = c ? X : Y;
1470
62.1M
      if (sbits)
1471
278k
      {
1472
278k
         if (encode)
1473
278k
         {
1474
            /* Here we only need to encode a sign for the side. */
1475
            /* FIXME: Need to increase fixed-point precision? */
1476
278k
            sign = MULT32_32_Q31(x2[0],y2[1]) - MULT32_32_Q31(x2[1],y2[0]) < 0;
1477
278k
            ec_enc_bits(ec, sign, 1);
1478
278k
         } else {
1479
0
            sign = ec_dec_bits(ec, 1);
1480
0
         }
1481
278k
      }
1482
62.1M
      sign = 1-2*sign;
1483
      /* We use orig_fill here because we want to fold the side, but if
1484
         itheta==16384, we'll have cleared the low bits of fill. */
1485
62.1M
      cm = quant_band(ctx, x2, N, mbits, B, lowband, LM, lowband_out, Q31ONE,
1486
62.1M
            lowband_scratch, orig_fill ARG_QEXT(ext_b));
1487
      /* We don't split N=2 bands, so cm is either 1 or 0 (for a fold-collapse),
1488
         and there's no need to worry about mixing with the other channel. */
1489
62.1M
      y2[0] = -sign*x2[1];
1490
62.1M
      y2[1] = sign*x2[0];
1491
62.1M
      if (ctx->resynth)
1492
24.1M
      {
1493
24.1M
         celt_norm tmp;
1494
24.1M
         X[0] = MULT32_32_Q31(mid, X[0]);
1495
24.1M
         X[1] = MULT32_32_Q31(mid, X[1]);
1496
24.1M
         Y[0] = MULT32_32_Q31(side, Y[0]);
1497
24.1M
         Y[1] = MULT32_32_Q31(side, Y[1]);
1498
24.1M
         tmp = X[0];
1499
24.1M
         X[0] = SUB32(tmp,Y[0]);
1500
24.1M
         Y[0] = ADD32(tmp,Y[0]);
1501
24.1M
         tmp = X[1];
1502
24.1M
         X[1] = SUB32(tmp,Y[1]);
1503
24.1M
         Y[1] = ADD32(tmp,Y[1]);
1504
24.1M
      }
1505
129M
   } else {
1506
      /* "Normal" split code */
1507
129M
      opus_int32 rebalance;
1508
1509
129M
      mbits = IMAX(0, IMIN(b, (b-delta)/2));
1510
129M
      sbits = b-mbits;
1511
129M
      ctx->remaining_bits -= qalloc;
1512
1513
129M
      rebalance = ctx->remaining_bits;
1514
129M
      if (mbits >= sbits)
1515
128M
      {
1516
#ifdef ENABLE_QEXT
1517
         int qext_extra = 0;
1518
         /* Reallocate any mid bits that cannot be used to extra mid bits. */
1519
         if (cap != NULL && ext_b != 0) qext_extra = IMAX(0, IMIN(ext_b/2, mbits - cap[ctx->i]/2));
1520
#endif
1521
         /* In stereo mode, we do not apply a scaling to the mid because we need the normalized
1522
            mid for folding later. */
1523
128M
         cm = quant_band(ctx, X, N, mbits, B, lowband, LM, lowband_out, Q31ONE,
1524
128M
               lowband_scratch, fill ARG_QEXT(ext_b/2+qext_extra));
1525
128M
         rebalance = mbits - (rebalance-ctx->remaining_bits);
1526
128M
         if (rebalance > 3<<BITRES && itheta!=0)
1527
46.2k
            sbits += rebalance - (3<<BITRES);
1528
#ifdef ENABLE_QEXT
1529
         /* Guard against overflowing the EC with the angle if the cubic quant used too many bits for the mid. */
1530
         if (ctx->extra_bands) sbits = IMIN(sbits, ctx->remaining_bits);
1531
#endif
1532
         /* For a stereo split, the high bits of fill are always zero, so no
1533
            folding will be done to the side. */
1534
128M
         cm |= quant_band(ctx, Y, N, sbits, B, NULL, LM, NULL, side, NULL, fill>>B ARG_QEXT(ext_b/2-qext_extra));
1535
128M
      } else {
1536
#ifdef ENABLE_QEXT
1537
         int qext_extra = 0;
1538
         /* Reallocate any side bits that cannot be used to extra side bits. */
1539
         if (cap != NULL && ext_b != 0) qext_extra = IMAX(0, IMIN(ext_b/2, sbits - cap[ctx->i]/2));
1540
#endif
1541
         /* For a stereo split, the high bits of fill are always zero, so no
1542
            folding will be done to the side. */
1543
344k
         cm = quant_band(ctx, Y, N, sbits, B, NULL, LM, NULL, side, NULL, fill>>B ARG_QEXT(ext_b/2+qext_extra));
1544
344k
         rebalance = sbits - (rebalance-ctx->remaining_bits);
1545
344k
         if (rebalance > 3<<BITRES && itheta!=16384)
1546
10.2k
            mbits += rebalance - (3<<BITRES);
1547
#ifdef ENABLE_QEXT
1548
         /* Guard against overflowing the EC with the angle if the cubic quant used too many bits for the side. */
1549
         if (ctx->extra_bands) mbits = IMIN(mbits, ctx->remaining_bits);
1550
#endif
1551
         /* In stereo mode, we do not apply a scaling to the mid because we need the normalized
1552
            mid for folding later. */
1553
344k
         cm |= quant_band(ctx, X, N, mbits, B, lowband, LM, lowband_out, Q31ONE,
1554
344k
               lowband_scratch, fill ARG_QEXT(ext_b/2-qext_extra));
1555
344k
      }
1556
129M
   }
1557
1558
1559
   /* This code is used by the decoder and by the resynthesis-enabled encoder */
1560
191M
   if (ctx->resynth)
1561
89.0M
   {
1562
89.0M
      if (N!=2)
1563
64.8M
         stereo_merge(X, Y, mid, N, ctx->arch);
1564
89.0M
      if (inv)
1565
99.5k
      {
1566
99.5k
         int j;
1567
1.44M
         for (j=0;j<N;j++)
1568
1.34M
            Y[j] = -Y[j];
1569
99.5k
      }
1570
89.0M
   }
1571
191M
   return cm;
1572
264M
}
1573
1574
#ifndef DISABLE_UPDATE_DRAFT
1575
static void special_hybrid_folding(const CELTMode *m, celt_norm *norm, celt_norm *norm2, int start, int M, int dual_stereo)
1576
55.3M
{
1577
55.3M
   int n1, n2;
1578
55.3M
   const opus_int16 * OPUS_RESTRICT eBands = m->eBands;
1579
55.3M
   n1 = M*(eBands[start+1]-eBands[start]);
1580
55.3M
   n2 = M*(eBands[start+2]-eBands[start+1]);
1581
   /* Duplicate enough of the first band folding data to be able to fold the second band.
1582
      Copies no data for CELT-only mode. */
1583
55.3M
   OPUS_COPY(&norm[n1], &norm[2*n1 - n2], n2-n1);
1584
55.3M
   if (dual_stereo)
1585
41.3k
      OPUS_COPY(&norm2[n1], &norm2[2*n1 - n2], n2-n1);
1586
55.3M
}
1587
#endif
1588
1589
void quant_all_bands(int encode, const CELTMode *m, int start, int end,
1590
      celt_norm *X_, celt_norm *Y_, unsigned char *collapse_masks,
1591
      const celt_ener *bandE, int *pulses, int shortBlocks, int spread,
1592
      int dual_stereo, int intensity, int *tf_res, opus_int32 total_bits,
1593
      opus_int32 balance, ec_ctx *ec, int LM, int codedBands,
1594
      opus_uint32 *seed, int complexity, int arch, int disable_inv
1595
      ARG_QEXT(ec_ctx *ext_ec) ARG_QEXT(int *extra_pulses)
1596
      ARG_QEXT(opus_int32 ext_total_bits) ARG_QEXT(const int *cap))
1597
55.1M
{
1598
55.1M
   int i;
1599
55.1M
   opus_int32 remaining_bits;
1600
55.1M
   const opus_int16 * OPUS_RESTRICT eBands = m->eBands;
1601
55.1M
   celt_norm * OPUS_RESTRICT norm, * OPUS_RESTRICT norm2;
1602
55.1M
   VARDECL(celt_norm, _norm);
1603
55.1M
   VARDECL(celt_norm, _lowband_scratch);
1604
55.1M
   VARDECL(celt_norm, X_save);
1605
55.1M
   VARDECL(celt_norm, Y_save);
1606
55.1M
   VARDECL(celt_norm, X_save2);
1607
55.1M
   VARDECL(celt_norm, Y_save2);
1608
55.1M
   VARDECL(celt_norm, norm_save2);
1609
55.1M
   int resynth_alloc;
1610
55.1M
   celt_norm *lowband_scratch;
1611
55.1M
   int B;
1612
55.1M
   int M;
1613
55.1M
   int lowband_offset;
1614
55.1M
   int update_lowband = 1;
1615
55.1M
   int C = Y_ != NULL ? 2 : 1;
1616
55.1M
   int norm_offset;
1617
55.1M
   int theta_rdo = encode && Y_!=NULL && !dual_stereo && complexity>=8;
1618
#ifdef RESYNTH
1619
   int resynth = 1;
1620
#else
1621
55.1M
   int resynth = !encode || theta_rdo;
1622
55.1M
#endif
1623
55.1M
   struct band_ctx ctx;
1624
#ifdef ENABLE_QEXT
1625
   int ext_b;
1626
   opus_int32 ext_balance=0;
1627
   opus_int32 ext_tell=0;
1628
#endif
1629
55.1M
   SAVE_STACK;
1630
1631
55.1M
   M = 1<<LM;
1632
55.1M
   B = shortBlocks ? M : 1;
1633
55.1M
   norm_offset = M*eBands[start];
1634
   /* No need to allocate norm for the last band because we don't need an
1635
      output in that band. */
1636
55.1M
   ALLOC(_norm, C*(M*eBands[m->nbEBands-1]-norm_offset), celt_norm);
1637
55.1M
   norm = _norm;
1638
55.1M
   norm2 = norm + M*eBands[m->nbEBands-1]-norm_offset;
1639
1640
   /* For decoding, we can use the last band as scratch space because we don't need that
1641
      scratch space for the last band and we don't care about the data there until we're
1642
      decoding the last band. */
1643
55.1M
   if (encode && resynth)
1644
6.93M
      resynth_alloc = M*(eBands[m->nbEBands]-eBands[m->nbEBands-1]);
1645
48.2M
   else
1646
48.2M
      resynth_alloc = ALLOC_NONE;
1647
55.1M
   ALLOC(_lowband_scratch, resynth_alloc, celt_norm);
1648
55.1M
   if (encode && resynth)
1649
6.93M
      lowband_scratch = _lowband_scratch;
1650
48.2M
   else
1651
48.2M
      lowband_scratch = X_+M*eBands[m->effEBands-1];
1652
55.1M
   ALLOC(X_save, resynth_alloc, celt_norm);
1653
55.1M
   ALLOC(Y_save, resynth_alloc, celt_norm);
1654
55.1M
   ALLOC(X_save2, resynth_alloc, celt_norm);
1655
55.1M
   ALLOC(Y_save2, resynth_alloc, celt_norm);
1656
55.1M
   ALLOC(norm_save2, resynth_alloc, celt_norm);
1657
1658
55.1M
   lowband_offset = 0;
1659
55.1M
   ctx.bandE = bandE;
1660
55.1M
   ctx.ec = ec;
1661
55.1M
   ctx.encode = encode;
1662
55.1M
   ctx.intensity = intensity;
1663
55.1M
   ctx.m = m;
1664
55.1M
   ctx.seed = *seed;
1665
55.1M
   ctx.spread = spread;
1666
55.1M
   ctx.arch = arch;
1667
55.1M
   ctx.disable_inv = disable_inv;
1668
55.1M
   ctx.resynth = resynth;
1669
55.1M
   ctx.theta_round = 0;
1670
#ifdef ENABLE_QEXT
1671
   ctx.ext_ec = ext_ec;
1672
   ctx.ext_total_bits = ext_total_bits;
1673
   ctx.extra_bands = end == NB_QEXT_BANDS || end == 2;
1674
   if (ctx.extra_bands) theta_rdo = 0;
1675
#endif
1676
   /* Avoid injecting noise in the first band on transients. */
1677
55.1M
   ctx.avoid_split_noise = B > 1;
1678
895M
   for (i=start;i<end;i++)
1679
840M
   {
1680
840M
      opus_int32 tell;
1681
840M
      int b;
1682
840M
      int N;
1683
840M
      opus_int32 curr_balance;
1684
840M
      int effective_lowband=-1;
1685
840M
      celt_norm * OPUS_RESTRICT X, * OPUS_RESTRICT Y;
1686
840M
      int tf_change=0;
1687
840M
      unsigned x_cm;
1688
840M
      unsigned y_cm;
1689
840M
      int last;
1690
1691
840M
      ctx.i = i;
1692
840M
      last = (i==end-1);
1693
1694
840M
      X = X_+M*eBands[i];
1695
840M
      if (Y_!=NULL)
1696
262M
         Y = Y_+M*eBands[i];
1697
577M
      else
1698
577M
         Y = NULL;
1699
840M
      N = M*eBands[i+1]-M*eBands[i];
1700
840M
      celt_assert(N > 0);
1701
840M
      tell = ec_tell_frac(ec);
1702
1703
      /* Compute how many bits we want to allocate to this band */
1704
840M
      if (i != start)
1705
784M
         balance -= tell;
1706
840M
      remaining_bits = total_bits-tell-1;
1707
840M
      ctx.remaining_bits = remaining_bits;
1708
#ifdef ENABLE_QEXT
1709
      if (i != start) {
1710
         ext_balance += extra_pulses[i-1] + ext_tell;
1711
      }
1712
      ext_tell = ec_tell_frac(ext_ec);
1713
      ctx.extra_bits = extra_pulses[i];
1714
      if (i != start)
1715
         ext_balance -= ext_tell;
1716
      if (i <= codedBands-1)
1717
      {
1718
         opus_int32 ext_curr_balance = celt_sudiv(ext_balance, IMIN(3, codedBands-i));
1719
         ext_b = IMAX(0, IMIN(16383, IMIN(ext_total_bits-ext_tell,extra_pulses[i]+ext_curr_balance)));
1720
      } else {
1721
         ext_b = 0;
1722
      }
1723
#endif
1724
840M
      if (i <= codedBands-1)
1725
84.5M
      {
1726
84.5M
         curr_balance = celt_sudiv(balance, IMIN(3, codedBands-i));
1727
84.5M
         b = IMAX(0, IMIN(16383, IMIN(remaining_bits+1,pulses[i]+curr_balance)));
1728
755M
      } else {
1729
755M
         b = 0;
1730
755M
      }
1731
1732
840M
#ifndef DISABLE_UPDATE_DRAFT
1733
840M
      if (resynth && (M*eBands[i]-N >= M*eBands[start] || i==start+1) && (update_lowband || lowband_offset==0))
1734
9.02M
            lowband_offset = i;
1735
840M
      if (i == start+1)
1736
55.1M
         special_hybrid_folding(m, norm, norm2, start, M, dual_stereo);
1737
#else
1738
      if (resynth && M*eBands[i]-N >= M*eBands[start] && (update_lowband || lowband_offset==0))
1739
            lowband_offset = i;
1740
#endif
1741
1742
840M
      tf_change = tf_res[i];
1743
840M
      ctx.tf_change = tf_change;
1744
840M
      if (i>=m->effEBands)
1745
0
      {
1746
0
         X=norm;
1747
0
         if (Y_!=NULL)
1748
0
            Y = norm;
1749
0
         lowband_scratch = NULL;
1750
0
      }
1751
840M
      if (last && !theta_rdo)
1752
48.2M
         lowband_scratch = NULL;
1753
1754
      /* Get a conservative estimate of the collapse_mask's for the bands we're
1755
         going to be folding from. */
1756
840M
      if (lowband_offset != 0 && (spread!=SPREAD_AGGRESSIVE || B>1 || tf_change<0))
1757
105M
      {
1758
105M
         int fold_start;
1759
105M
         int fold_end;
1760
105M
         int fold_i;
1761
         /* This ensures we never repeat spectral content within one band */
1762
105M
         effective_lowband = IMAX(0, M*eBands[lowband_offset]-norm_offset-N);
1763
105M
         fold_start = lowband_offset;
1764
105M
         while(M*eBands[--fold_start] > effective_lowband+norm_offset);
1765
105M
         fold_end = lowband_offset-1;
1766
105M
#ifndef DISABLE_UPDATE_DRAFT
1767
283M
         while(++fold_end < i && M*eBands[fold_end] < effective_lowband+norm_offset+N);
1768
#else
1769
         while(M*eBands[++fold_end] < effective_lowband+norm_offset+N);
1770
#endif
1771
105M
         x_cm = y_cm = 0;
1772
284M
         fold_i = fold_start; do {
1773
284M
           x_cm |= collapse_masks[fold_i*C+0];
1774
284M
           y_cm |= collapse_masks[fold_i*C+C-1];
1775
284M
         } while (++fold_i<fold_end);
1776
105M
      }
1777
      /* Otherwise, we'll be using the LCG to fold, so all blocks will (almost
1778
         always) be non-zero. */
1779
734M
      else
1780
734M
         x_cm = y_cm = (1<<B)-1;
1781
1782
840M
      if (dual_stereo && i==intensity)
1783
26.9k
      {
1784
26.9k
         int j;
1785
1786
         /* Switch off dual stereo to do intensity. */
1787
26.9k
         dual_stereo = 0;
1788
26.9k
         if (resynth)
1789
0
            for (j=0;j<M*eBands[i]-norm_offset;j++)
1790
0
               norm[j] = HALF32(norm[j]+norm2[j]);
1791
26.9k
      }
1792
840M
      if (dual_stereo)
1793
543k
      {
1794
543k
         x_cm = quant_band(&ctx, X, N, b/2, B,
1795
543k
               effective_lowband != -1 ? norm+effective_lowband : NULL, LM,
1796
543k
               last?NULL:norm+M*eBands[i]-norm_offset, Q31ONE, lowband_scratch, x_cm ARG_QEXT(ext_b/2));
1797
543k
         y_cm = quant_band(&ctx, Y, N, b/2, B,
1798
543k
               effective_lowband != -1 ? norm2+effective_lowband : NULL, LM,
1799
543k
               last?NULL:norm2+M*eBands[i]-norm_offset, Q31ONE, lowband_scratch, y_cm ARG_QEXT(ext_b/2));
1800
839M
      } else {
1801
839M
         if (Y!=NULL)
1802
261M
         {
1803
261M
            if (theta_rdo && i < intensity)
1804
2.15M
            {
1805
2.15M
               ec_ctx ec_save, ec_save2;
1806
2.15M
               struct band_ctx ctx_save, ctx_save2;
1807
2.15M
               opus_val32 dist0, dist1;
1808
2.15M
               unsigned cm, cm2;
1809
2.15M
               int nstart_bytes, nend_bytes, save_bytes;
1810
2.15M
               unsigned char *bytes_buf;
1811
2.15M
               unsigned char bytes_save[1275];
1812
#ifdef ENABLE_QEXT
1813
               ec_ctx ext_ec_save, ext_ec_save2;
1814
               unsigned char *ext_bytes_buf;
1815
               int ext_nstart_bytes, ext_nend_bytes, ext_save_bytes;
1816
               unsigned char ext_bytes_save[QEXT_PACKET_SIZE_CAP];
1817
#endif
1818
2.15M
               opus_val16 w[2];
1819
2.15M
               compute_channel_weights(bandE[i], bandE[i+m->nbEBands], w);
1820
               /* Make a copy. */
1821
2.15M
               cm = x_cm|y_cm;
1822
2.15M
               ec_save = *ec;
1823
#ifdef ENABLE_QEXT
1824
               ext_ec_save = *ext_ec;
1825
#endif
1826
2.15M
               ctx_save = ctx;
1827
2.15M
               OPUS_COPY(X_save, X, N);
1828
2.15M
               OPUS_COPY(Y_save, Y, N);
1829
               /* Encode and round down. */
1830
2.15M
               ctx.theta_round = -1;
1831
2.15M
               x_cm = quant_band_stereo(&ctx, X, Y, N, b, B,
1832
2.15M
                     effective_lowband != -1 ? norm+effective_lowband : NULL, LM,
1833
2.15M
                     last?NULL:norm+M*eBands[i]-norm_offset, lowband_scratch, cm ARG_QEXT(ext_b) ARG_QEXT(cap));
1834
2.15M
               dist0 = MULT16_32_Q15(w[0], celt_inner_prod_norm_shift(X_save, X, N, arch)) + MULT16_32_Q15(w[1], celt_inner_prod_norm_shift(Y_save, Y, N, arch));
1835
1836
               /* Save first result. */
1837
2.15M
               cm2 = x_cm;
1838
2.15M
               ec_save2 = *ec;
1839
#ifdef ENABLE_QEXT
1840
               ext_ec_save2 = *ext_ec;
1841
#endif
1842
2.15M
               ctx_save2 = ctx;
1843
2.15M
               OPUS_COPY(X_save2, X, N);
1844
2.15M
               OPUS_COPY(Y_save2, Y, N);
1845
2.15M
               if (!last)
1846
2.12M
                  OPUS_COPY(norm_save2, norm+M*eBands[i]-norm_offset, N);
1847
2.15M
               nstart_bytes = ec_save.offs;
1848
2.15M
               nend_bytes = ec_save.storage;
1849
2.15M
               bytes_buf = ec_save.buf+nstart_bytes;
1850
2.15M
               save_bytes = nend_bytes-nstart_bytes;
1851
2.15M
               OPUS_COPY(bytes_save, bytes_buf, save_bytes);
1852
#ifdef ENABLE_QEXT
1853
               ext_nstart_bytes = ext_ec_save.offs;
1854
               ext_nend_bytes = ext_ec_save.storage;
1855
               ext_bytes_buf = ext_ec_save.buf!=NULL ? ext_ec_save.buf+ext_nstart_bytes : NULL;
1856
               ext_save_bytes = ext_nend_bytes-ext_nstart_bytes;
1857
               if (ext_save_bytes) OPUS_COPY(ext_bytes_save, ext_bytes_buf, ext_save_bytes);
1858
#endif
1859
               /* Restore */
1860
2.15M
               *ec = ec_save;
1861
#ifdef ENABLE_QEXT
1862
               *ext_ec = ext_ec_save;
1863
#endif
1864
2.15M
               ctx = ctx_save;
1865
2.15M
               OPUS_COPY(X, X_save, N);
1866
2.15M
               OPUS_COPY(Y, Y_save, N);
1867
2.15M
#ifndef DISABLE_UPDATE_DRAFT
1868
2.15M
               if (i == start+1)
1869
196k
                  special_hybrid_folding(m, norm, norm2, start, M, dual_stereo);
1870
2.15M
#endif
1871
               /* Encode and round up. */
1872
2.15M
               ctx.theta_round = 1;
1873
2.15M
               x_cm = quant_band_stereo(&ctx, X, Y, N, b, B,
1874
2.15M
                     effective_lowband != -1 ? norm+effective_lowband : NULL, LM,
1875
2.15M
                     last?NULL:norm+M*eBands[i]-norm_offset, lowband_scratch, cm ARG_QEXT(ext_b) ARG_QEXT(cap));
1876
2.15M
               dist1 = MULT16_32_Q15(w[0], celt_inner_prod_norm_shift(X_save, X, N, arch)) + MULT16_32_Q15(w[1], celt_inner_prod_norm_shift(Y_save, Y, N, arch));
1877
2.15M
               if (dist0 >= dist1) {
1878
1.83M
                  x_cm = cm2;
1879
1.83M
                  *ec = ec_save2;
1880
#ifdef ENABLE_QEXT
1881
                  *ext_ec = ext_ec_save2;
1882
#endif
1883
1.83M
                  ctx = ctx_save2;
1884
1.83M
                  OPUS_COPY(X, X_save2, N);
1885
1.83M
                  OPUS_COPY(Y, Y_save2, N);
1886
1.83M
                  if (!last)
1887
1.82M
                     OPUS_COPY(norm+M*eBands[i]-norm_offset, norm_save2, N);
1888
1.83M
                  OPUS_COPY(bytes_buf, bytes_save, save_bytes);
1889
#ifdef ENABLE_QEXT
1890
                  if (ext_save_bytes) OPUS_COPY(ext_bytes_buf, ext_bytes_save, ext_save_bytes);
1891
#endif
1892
1.83M
               }
1893
259M
            } else {
1894
259M
               ctx.theta_round = 0;
1895
259M
               x_cm = quant_band_stereo(&ctx, X, Y, N, b, B,
1896
259M
                     effective_lowband != -1 ? norm+effective_lowband : NULL, LM,
1897
259M
                     last?NULL:norm+M*eBands[i]-norm_offset, lowband_scratch, x_cm|y_cm ARG_QEXT(ext_b) ARG_QEXT(cap));
1898
259M
            }
1899
577M
         } else {
1900
577M
            x_cm = quant_band(&ctx, X, N, b, B,
1901
577M
                  effective_lowband != -1 ? norm+effective_lowband : NULL, LM,
1902
577M
                  last?NULL:norm+M*eBands[i]-norm_offset, Q31ONE, lowband_scratch, x_cm|y_cm ARG_QEXT(ext_b));
1903
577M
         }
1904
839M
         y_cm = x_cm;
1905
839M
      }
1906
840M
      collapse_masks[i*C+0] = (unsigned char)x_cm;
1907
840M
      collapse_masks[i*C+C-1] = (unsigned char)y_cm;
1908
840M
      balance += pulses[i] + tell;
1909
1910
      /* Update the folding position only as long as we have 1 bit/sample depth. */
1911
840M
      update_lowband = b>(N<<BITRES);
1912
      /* We only need to avoid noise on a split for the first band. After that, we
1913
         have folding. */
1914
840M
      ctx.avoid_split_noise = 0;
1915
840M
   }
1916
55.1M
   *seed = ctx.seed;
1917
1918
55.1M
   RESTORE_STACK;
1919
55.1M
}