Coverage Report

Created: 2025-12-31 07:02

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/opus/celt/bands.c
Line
Count
Source
1
/* Copyright (c) 2007-2008 CSIRO
2
   Copyright (c) 2007-2009 Xiph.Org Foundation
3
   Copyright (c) 2008-2009 Gregory Maxwell
4
   Written by Jean-Marc Valin and Gregory Maxwell */
5
/*
6
   Redistribution and use in source and binary forms, with or without
7
   modification, are permitted provided that the following conditions
8
   are met:
9
10
   - Redistributions of source code must retain the above copyright
11
   notice, this list of conditions and the following disclaimer.
12
13
   - Redistributions in binary form must reproduce the above copyright
14
   notice, this list of conditions and the following disclaimer in the
15
   documentation and/or other materials provided with the distribution.
16
17
   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
18
   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
19
   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
20
   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
21
   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
22
   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
23
   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
24
   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
25
   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
26
   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
27
   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28
*/
29
30
#ifdef HAVE_CONFIG_H
31
#include "config.h"
32
#endif
33
34
#include <math.h>
35
#include "bands.h"
36
#include "modes.h"
37
#include "vq.h"
38
#include "cwrs.h"
39
#include "stack_alloc.h"
40
#include "os_support.h"
41
#include "mathops.h"
42
#include "rate.h"
43
#include "quant_bands.h"
44
#include "pitch.h"
45
46
int hysteresis_decision(opus_val16 val, const opus_val16 *thresholds, const opus_val16 *hysteresis, int N, int prev)
47
16.4M
{
48
16.4M
   int i;
49
242M
   for (i=0;i<N;i++)
50
242M
   {
51
242M
      if (val < thresholds[i])
52
16.4M
         break;
53
242M
   }
54
16.4M
   if (i>prev && val < thresholds[prev]+hysteresis[prev])
55
28.9k
      i=prev;
56
16.4M
   if (i<prev && val > thresholds[prev-1]-hysteresis[prev-1])
57
10.2k
      i=prev;
58
16.4M
   return i;
59
16.4M
}
60
61
opus_uint32 celt_lcg_rand(opus_uint32 seed)
62
841M
{
63
841M
   return 1664525 * seed + 1013904223;
64
841M
}
65
66
/* This is a cos() approximation designed to be bit-exact on any platform. Bit exactness
67
   with this approximation is important because it has an impact on the bit allocation */
68
opus_int16 bitexact_cos(opus_int16 x)
69
18.8M
{
70
18.8M
   opus_int32 tmp;
71
18.8M
   opus_int16 x2;
72
18.8M
   tmp = (4096+((opus_int32)(x)*(x)))>>13;
73
18.8M
   celt_sig_assert(tmp<=32767);
74
18.8M
   x2 = tmp;
75
18.8M
   x2 = (32767-x2) + FRAC_MUL16(x2, (-7651 + FRAC_MUL16(x2, (8277 + FRAC_MUL16(-626, x2)))));
76
18.8M
   celt_sig_assert(x2<=32766);
77
18.8M
   return 1+x2;
78
18.8M
}
79
80
int bitexact_log2tan(int isin,int icos)
81
9.42M
{
82
9.42M
   int lc;
83
9.42M
   int ls;
84
9.42M
   lc=EC_ILOG(icos);
85
9.42M
   ls=EC_ILOG(isin);
86
9.42M
   icos<<=15-lc;
87
9.42M
   isin<<=15-ls;
88
9.42M
   return (ls-lc)*(1<<11)
89
9.42M
         +FRAC_MUL16(isin, FRAC_MUL16(isin, -2597) + 7932)
90
9.42M
         -FRAC_MUL16(icos, FRAC_MUL16(icos, -2597) + 7932);
91
9.42M
}
92
93
#ifdef FIXED_POINT
94
/* Compute the amplitude (sqrt energy) in each of the bands */
95
void compute_band_energies(const CELTMode *m, const celt_sig *X, celt_ener *bandE, int end, int C, int LM, int arch)
96
{
97
   int i, c, N;
98
   const opus_int16 *eBands = m->eBands;
99
   (void)arch;
100
   N = m->shortMdctSize<<LM;
101
   c=0; do {
102
      for (i=0;i<end;i++)
103
      {
104
         int j;
105
         opus_val32 maxval=0;
106
         opus_val32 sum = 0;
107
108
         maxval = celt_maxabs32(&X[c*N+(eBands[i]<<LM)], (eBands[i+1]-eBands[i])<<LM);
109
         if (maxval > 0)
110
         {
111
            int shift = IMAX(0, 30 - celt_ilog2(maxval+(maxval>>14)+1) - ((((m->logN[i]+7)>>BITRES)+LM+1)>>1));
112
            j=eBands[i]<<LM; do {
113
               opus_val32 x = SHL32(X[j+c*N],shift);
114
               sum = ADD32(sum, MULT32_32_Q31(x, x));
115
            } while (++j<eBands[i+1]<<LM);
116
            bandE[i+c*m->nbEBands] = MAX32(maxval, PSHR32(celt_sqrt32(SHR32(sum,1)), shift));
117
         } else {
118
            bandE[i+c*m->nbEBands] = EPSILON;
119
         }
120
      }
121
   } while (++c<C);
122
}
123
124
/* Normalise each band such that the energy is one. */
125
void normalise_bands(const CELTMode *m, const celt_sig * OPUS_RESTRICT freq, celt_norm * OPUS_RESTRICT X, const celt_ener *bandE, int end, int C, int M)
126
{
127
   int i, c, N;
128
   const opus_int16 *eBands = m->eBands;
129
   N = M*m->shortMdctSize;
130
   c=0; do {
131
      i=0; do {
132
         int j,shift;
133
         opus_val32 E;
134
         opus_val32 g;
135
         E = bandE[i+c*m->nbEBands];
136
         /* For very low energies, we need this to make sure not to prevent energy rounding from
137
            blowing up the normalized signal. */
138
         if (E < 10) E += EPSILON;
139
         shift = 30-celt_zlog2(E);
140
         E = SHL32(E, shift);
141
         g = celt_rcp_norm32(E);
142
         j=M*eBands[i]; do {
143
            X[j+c*N] = PSHR32(MULT32_32_Q31(g, SHL32(freq[j+c*N], shift)), 30-NORM_SHIFT);
144
         } while (++j<M*eBands[i+1]);
145
      } while (++i<end);
146
   } while (++c<C);
147
}
148
149
#else /* FIXED_POINT */
150
/* Compute the amplitude (sqrt energy) in each of the bands */
151
void compute_band_energies(const CELTMode *m, const celt_sig *X, celt_ener *bandE, int end, int C, int LM, int arch)
152
53.7M
{
153
53.7M
   int i, c, N;
154
53.7M
   const opus_int16 *eBands = m->eBands;
155
53.7M
   N = m->shortMdctSize<<LM;
156
70.2M
   c=0; do {
157
1.18G
      for (i=0;i<end;i++)
158
1.11G
      {
159
1.11G
         opus_val32 sum;
160
1.11G
         sum = 1e-27f + celt_inner_prod(&X[c*N+(eBands[i]<<LM)], &X[c*N+(eBands[i]<<LM)], (eBands[i+1]-eBands[i])<<LM, arch);
161
1.11G
         bandE[i+c*m->nbEBands] = celt_sqrt(sum);
162
         /*printf ("%f ", bandE[i+c*m->nbEBands]);*/
163
1.11G
      }
164
70.2M
   } while (++c<C);
165
   /*printf ("\n");*/
166
53.7M
}
167
168
/* Normalise each band such that the energy is one. */
169
void normalise_bands(const CELTMode *m, const celt_sig * OPUS_RESTRICT freq, celt_norm * OPUS_RESTRICT X, const celt_ener *bandE, int end, int C, int M)
170
53.5M
{
171
53.5M
   int i, c, N;
172
53.5M
   const opus_int16 *eBands = m->eBands;
173
53.5M
   N = M*m->shortMdctSize;
174
69.9M
   c=0; do {
175
1.17G
      for (i=0;i<end;i++)
176
1.10G
      {
177
1.10G
         int j;
178
1.10G
         opus_val16 g = 1.f/(1e-27f+bandE[i+c*m->nbEBands]);
179
8.59G
         for (j=M*eBands[i];j<M*eBands[i+1];j++)
180
7.48G
            X[j+c*N] = freq[j+c*N]*g;
181
1.10G
      }
182
69.9M
   } while (++c<C);
183
53.5M
}
184
185
#endif /* FIXED_POINT */
186
187
/* De-normalise the energy to produce the synthesis from the unit-energy bands */
188
void denormalise_bands(const CELTMode *m, const celt_norm * OPUS_RESTRICT X,
189
      celt_sig * OPUS_RESTRICT freq, const celt_glog *bandLogE, int start,
190
      int end, int M, int downsample, int silence)
191
0
{
192
0
   int i, N;
193
0
   int bound;
194
0
   celt_sig * OPUS_RESTRICT f;
195
0
   const celt_norm * OPUS_RESTRICT x;
196
0
   const opus_int16 *eBands = m->eBands;
197
0
   N = M*m->shortMdctSize;
198
0
   bound = M*eBands[end];
199
0
   if (downsample!=1)
200
0
      bound = IMIN(bound, N/downsample);
201
0
   if (silence)
202
0
   {
203
0
      bound = 0;
204
0
      start = end = 0;
205
0
   }
206
0
   f = freq;
207
0
   x = X+M*eBands[start];
208
0
   if (start != 0)
209
0
   {
210
0
      for (i=0;i<M*eBands[start];i++)
211
0
         *f++ = 0;
212
0
   } else {
213
0
      f += M*eBands[start];
214
0
   }
215
0
   for (i=start;i<end;i++)
216
0
   {
217
0
      int j, band_end;
218
0
      opus_val32 g;
219
0
      celt_glog lg;
220
#ifdef FIXED_POINT
221
      int shift;
222
#endif
223
0
      j=M*eBands[i];
224
0
      band_end = M*eBands[i+1];
225
0
      lg = ADD32(bandLogE[i], SHL32((opus_val32)eMeans[i],DB_SHIFT-4));
226
0
#ifndef FIXED_POINT
227
0
      g = celt_exp2_db(MIN32(32.f, lg));
228
#else
229
      /* Handle the integer part of the log energy */
230
      shift = 17-(lg>>DB_SHIFT);
231
      if (shift>=31)
232
      {
233
         shift=0;
234
         g=0;
235
      } else {
236
         /* Handle the fractional part. */
237
         g = SHL32(celt_exp2_db_frac((lg&((1<<DB_SHIFT)-1))), 2);
238
      }
239
      /* Handle extreme gains with negative shift. */
240
      if (shift<0)
241
      {
242
         /* To avoid overflow, we're
243
            capping the gain here, which is equivalent to a cap of 18 on lg.
244
            This shouldn't trigger unless the bitstream is already corrupted. */
245
         g = 2147483647;
246
         shift = 0;
247
      }
248
#endif
249
0
      do {
250
0
         *f++ = PSHR32(MULT32_32_Q31(SHL32(*x, 30-NORM_SHIFT), g), shift);
251
0
         x++;
252
0
      } while (++j<band_end);
253
0
   }
254
0
   celt_assert(start <= end);
255
0
   OPUS_CLEAR(&freq[bound], N-bound);
256
0
}
257
258
/* This prevents energy collapse for transients with multiple short MDCTs */
259
void anti_collapse(const CELTMode *m, celt_norm *X_, unsigned char *collapse_masks, int LM, int C, int size,
260
      int start, int end, const celt_glog *logE, const celt_glog *prev1logE,
261
      const celt_glog *prev2logE, const int *pulses, opus_uint32 seed, int encode, int arch)
262
0
{
263
0
   int c, i, j, k;
264
0
   for (i=start;i<end;i++)
265
0
   {
266
0
      int N0;
267
0
      opus_val16 thresh, sqrt_1;
268
0
      int depth;
269
#ifdef FIXED_POINT
270
      int shift;
271
      opus_val32 thresh32;
272
#endif
273
274
0
      N0 = m->eBands[i+1]-m->eBands[i];
275
      /* depth in 1/8 bits */
276
0
      celt_sig_assert(pulses[i]>=0);
277
0
      depth = celt_udiv(1+pulses[i], (m->eBands[i+1]-m->eBands[i]))>>LM;
278
279
#ifdef FIXED_POINT
280
      thresh32 = SHR32(celt_exp2(-SHL16(depth, 10-BITRES)),1);
281
      thresh = MULT16_32_Q15(QCONST16(0.5f, 15), MIN32(32767,thresh32));
282
      {
283
         opus_val32 t;
284
         t = N0<<LM;
285
         shift = celt_ilog2(t)>>1;
286
         t = SHL32(t, (7-shift)<<1);
287
         sqrt_1 = celt_rsqrt_norm(t);
288
      }
289
#else
290
0
      thresh = .5f*celt_exp2(-.125f*depth);
291
0
      sqrt_1 = celt_rsqrt(N0<<LM);
292
0
#endif
293
294
0
      c=0; do
295
0
      {
296
0
         celt_norm *X;
297
0
         celt_glog prev1;
298
0
         celt_glog prev2;
299
0
         opus_val32 Ediff;
300
0
         celt_norm r;
301
0
         int renormalize=0;
302
0
         prev1 = prev1logE[c*m->nbEBands+i];
303
0
         prev2 = prev2logE[c*m->nbEBands+i];
304
0
         if (!encode && C==1)
305
0
         {
306
0
            prev1 = MAXG(prev1,prev1logE[m->nbEBands+i]);
307
0
            prev2 = MAXG(prev2,prev2logE[m->nbEBands+i]);
308
0
         }
309
0
         Ediff = logE[c*m->nbEBands+i]-MING(prev1,prev2);
310
0
         Ediff = MAX32(0, Ediff);
311
312
#ifdef FIXED_POINT
313
         if (Ediff < GCONST(16.f))
314
         {
315
            opus_val32 r32 = SHR32(celt_exp2_db(-Ediff),1);
316
            r = 2*MIN16(16383,r32);
317
         } else {
318
            r = 0;
319
         }
320
         if (LM==3)
321
            r = MULT16_16_Q14(23170, MIN32(23169, r));
322
         r = SHR16(MIN16(thresh, r),1);
323
         r = VSHR32(MULT16_16_Q15(sqrt_1, r),shift+14-NORM_SHIFT);
324
#else
325
         /* r needs to be multiplied by 2 or 2*sqrt(2) depending on LM because
326
            short blocks don't have the same energy as long */
327
0
         r = 2.f*celt_exp2_db(-Ediff);
328
0
         if (LM==3)
329
0
            r *= 1.41421356f;
330
0
         r = MIN16(thresh, r);
331
0
         r = r*sqrt_1;
332
0
#endif
333
0
         X = X_+c*size+(m->eBands[i]<<LM);
334
0
         for (k=0;k<1<<LM;k++)
335
0
         {
336
            /* Detect collapse */
337
0
            if (!(collapse_masks[i*C+c]&1<<k))
338
0
            {
339
               /* Fill with noise */
340
0
               for (j=0;j<N0;j++)
341
0
               {
342
0
                  seed = celt_lcg_rand(seed);
343
0
                  X[(j<<LM)+k] = (seed&0x8000 ? r : -r);
344
0
               }
345
0
               renormalize = 1;
346
0
            }
347
0
         }
348
         /* We just added some energy, so we need to renormalise */
349
0
         if (renormalize)
350
0
            renormalise_vector(X, N0<<LM, Q31ONE, arch);
351
0
      } while (++c<C);
352
0
   }
353
0
}
354
355
/* Compute the weights to use for optimizing normalized distortion across
356
   channels. We use the amplitude to weight square distortion, which means
357
   that we use the square root of the value we would have been using if we
358
   wanted to minimize the MSE in the non-normalized domain. This roughly
359
   corresponds to some quick-and-dirty perceptual experiments I ran to
360
   measure inter-aural masking (there doesn't seem to be any published data
361
   on the topic). */
362
static void compute_channel_weights(celt_ener Ex, celt_ener Ey, opus_val16 w[2])
363
2.11M
{
364
2.11M
   celt_ener minE;
365
#ifdef FIXED_POINT
366
   int shift;
367
#endif
368
2.11M
   minE = MIN32(Ex, Ey);
369
   /* Adjustment to make the weights a bit more conservative. */
370
2.11M
   Ex = ADD32(Ex, minE/3);
371
2.11M
   Ey = ADD32(Ey, minE/3);
372
#ifdef FIXED_POINT
373
   shift = celt_ilog2(EPSILON+MAX32(Ex, Ey))-14;
374
#endif
375
2.11M
   w[0] = VSHR32(Ex, shift);
376
2.11M
   w[1] = VSHR32(Ey, shift);
377
2.11M
}
378
379
static void intensity_stereo(const CELTMode *m, celt_norm * OPUS_RESTRICT X, const celt_norm * OPUS_RESTRICT Y, const celt_ener *bandE, int bandID, int N)
380
184M
{
381
184M
   int i = bandID;
382
184M
   int j;
383
184M
   opus_val16 a1, a2;
384
184M
   opus_val16 left, right;
385
184M
   opus_val16 norm;
386
#ifdef FIXED_POINT
387
   int shift = celt_zlog2(MAX32(bandE[i], bandE[i+m->nbEBands]))-13;
388
#endif
389
184M
   left = VSHR32(bandE[i],shift);
390
184M
   right = VSHR32(bandE[i+m->nbEBands],shift);
391
184M
   norm = EPSILON + celt_sqrt(EPSILON+MULT16_16(left,left)+MULT16_16(right,right));
392
#ifdef FIXED_POINT
393
   left = MIN32(left, norm-1);
394
   right = MIN32(right, norm-1);
395
#endif
396
184M
   a1 = DIV32_16(SHL32(EXTEND32(left),15),norm);
397
184M
   a2 = DIV32_16(SHL32(EXTEND32(right),15),norm);
398
1.88G
   for (j=0;j<N;j++)
399
1.69G
   {
400
1.69G
      X[j] = ADD32(MULT16_32_Q15(a1, X[j]), MULT16_32_Q15(a2, Y[j]));
401
      /* Side is not encoded, no need to calculate */
402
1.69G
   }
403
184M
}
404
405
static void stereo_split(celt_norm * OPUS_RESTRICT X, celt_norm * OPUS_RESTRICT Y, int N)
406
2.10M
{
407
2.10M
   int j;
408
23.6M
   for (j=0;j<N;j++)
409
21.5M
   {
410
21.5M
      opus_val32 r, l;
411
21.5M
      l = MULT32_32_Q31(QCONST32(.70710678f,31), X[j]);
412
21.5M
      r = MULT32_32_Q31(QCONST32(.70710678f,31), Y[j]);
413
21.5M
      X[j] = ADD32(l, r);
414
21.5M
      Y[j] = SUB32(r, l);
415
21.5M
   }
416
2.10M
}
417
418
static void stereo_merge(celt_norm * OPUS_RESTRICT X, celt_norm * OPUS_RESTRICT Y, opus_val32 mid, int N, int arch)
419
63.2M
{
420
63.2M
   int j;
421
63.2M
   opus_val32 xp=0, side=0;
422
63.2M
   opus_val32 El, Er;
423
#ifdef FIXED_POINT
424
   int kl, kr;
425
#endif
426
63.2M
   opus_val32 t, lgain, rgain;
427
428
   /* Compute the norm of X+Y and X-Y as |X|^2 + |Y|^2 +/- sum(xy) */
429
63.2M
   xp = celt_inner_prod_norm_shift(Y, X, N, arch);
430
63.2M
   side = celt_inner_prod_norm_shift(Y, Y, N, arch);
431
   /* Compensating for the mid normalization */
432
63.2M
   xp = MULT32_32_Q31(mid, xp);
433
   /* mid and side are in Q15, not Q14 like X and Y */
434
63.2M
   El = SHR32(MULT32_32_Q31(mid, mid),3) + side - 2*xp;
435
63.2M
   Er = SHR32(MULT32_32_Q31(mid, mid),3) + side + 2*xp;
436
63.2M
   if (Er < QCONST32(6e-4f, 28) || El < QCONST32(6e-4f, 28))
437
1.55k
   {
438
1.55k
      OPUS_COPY(Y, X, N);
439
1.55k
      return;
440
1.55k
   }
441
442
#ifdef FIXED_POINT
443
   kl = celt_ilog2(El)>>1;
444
   kr = celt_ilog2(Er)>>1;
445
#endif
446
63.2M
   t = VSHR32(El, (kl<<1)-29);
447
63.2M
   lgain = celt_rsqrt_norm32(t);
448
63.2M
   t = VSHR32(Er, (kr<<1)-29);
449
63.2M
   rgain = celt_rsqrt_norm32(t);
450
451
#ifdef FIXED_POINT
452
   if (kl < 7)
453
      kl = 7;
454
   if (kr < 7)
455
      kr = 7;
456
#endif
457
458
891M
   for (j=0;j<N;j++)
459
827M
   {
460
827M
      celt_norm r, l;
461
      /* Apply mid scaling (side is already scaled) */
462
827M
      l = MULT32_32_Q31(mid, X[j]);
463
827M
      r = Y[j];
464
827M
      X[j] = VSHR32(MULT32_32_Q31(lgain, SUB32(l,r)), kl-15);
465
827M
      Y[j] = VSHR32(MULT32_32_Q31(rgain, ADD32(l,r)), kr-15);
466
827M
   }
467
63.2M
}
468
469
/* Decide whether we should spread the pulses in the current frame */
470
int spreading_decision(const CELTMode *m, const celt_norm *X, int *average,
471
      int last_decision, int *hf_average, int *tapset_decision, int update_hf,
472
      int end, int C, int M, const int *spread_weight)
473
953k
{
474
953k
   int i, c, N0;
475
953k
   int sum = 0, nbBands=0;
476
953k
   const opus_int16 * OPUS_RESTRICT eBands = m->eBands;
477
953k
   int decision;
478
953k
   int hf_sum=0;
479
480
953k
   celt_assert(end>0);
481
482
953k
   N0 = M*m->shortMdctSize;
483
484
953k
   if (M*(eBands[end]-eBands[end-1]) <= 8)
485
468k
      return SPREAD_NONE;
486
623k
   c=0; do {
487
11.5M
      for (i=0;i<end;i++)
488
10.9M
      {
489
10.9M
         int j, N, tmp=0;
490
10.9M
         int tcount[3] = {0,0,0};
491
10.9M
         const celt_norm * OPUS_RESTRICT x = X+M*eBands[i]+c*N0;
492
10.9M
         N = M*(eBands[i+1]-eBands[i]);
493
10.9M
         if (N<=8)
494
8.01M
            continue;
495
         /* Compute rough CDF of |x[j]| */
496
90.4M
         for (j=0;j<N;j++)
497
87.4M
         {
498
87.4M
            opus_val32 x2N; /* Q13 */
499
500
87.4M
            x2N = MULT16_16(MULT16_16_Q15(SHR32(x[j], NORM_SHIFT-14), SHR32(x[j], NORM_SHIFT-14)), N);
501
87.4M
            if (x2N < QCONST16(0.25f,13))
502
35.3M
               tcount[0]++;
503
87.4M
            if (x2N < QCONST16(0.0625f,13))
504
19.4M
               tcount[1]++;
505
87.4M
            if (x2N < QCONST16(0.015625f,13))
506
10.2M
               tcount[2]++;
507
87.4M
         }
508
509
         /* Only include four last bands (8 kHz and up) */
510
2.95M
         if (i>m->nbEBands-4)
511
516k
            hf_sum += celt_udiv(32*(tcount[1]+tcount[0]), N);
512
2.95M
         tmp = (2*tcount[2] >= N) + (2*tcount[1] >= N) + (2*tcount[0] >= N);
513
2.95M
         sum += tmp*spread_weight[i];
514
2.95M
         nbBands+=spread_weight[i];
515
2.95M
      }
516
623k
   } while (++c<C);
517
518
485k
   if (update_hf)
519
26.2k
   {
520
26.2k
      if (hf_sum)
521
23.6k
         hf_sum = celt_udiv(hf_sum, C*(4-m->nbEBands+end));
522
26.2k
      *hf_average = (*hf_average+hf_sum)>>1;
523
26.2k
      hf_sum = *hf_average;
524
26.2k
      if (*tapset_decision==2)
525
5.14k
         hf_sum += 4;
526
21.0k
      else if (*tapset_decision==0)
527
19.4k
         hf_sum -= 4;
528
26.2k
      if (hf_sum > 22)
529
5.14k
         *tapset_decision=2;
530
21.0k
      else if (hf_sum > 18)
531
1.65k
         *tapset_decision=1;
532
19.4k
      else
533
19.4k
         *tapset_decision=0;
534
26.2k
   }
535
   /*printf("%d %d %d\n", hf_sum, *hf_average, *tapset_decision);*/
536
485k
   celt_assert(nbBands>0); /* end has to be non-zero */
537
485k
   celt_assert(sum>=0);
538
485k
   sum = celt_udiv((opus_int32)sum<<8, nbBands);
539
   /* Recursive averaging */
540
485k
   sum = (sum+*average)>>1;
541
485k
   *average = sum;
542
   /* Hysteresis */
543
485k
   sum = (3*sum + (((3-last_decision)<<7) + 64) + 2)>>2;
544
485k
   if (sum < 80)
545
287k
   {
546
287k
      decision = SPREAD_AGGRESSIVE;
547
287k
   } else if (sum < 256)
548
159k
   {
549
159k
      decision = SPREAD_NORMAL;
550
159k
   } else if (sum < 384)
551
22.8k
   {
552
22.8k
      decision = SPREAD_LIGHT;
553
22.8k
   } else {
554
14.8k
      decision = SPREAD_NONE;
555
14.8k
   }
556
#ifdef FUZZING
557
   decision = rand()&0x3;
558
   *tapset_decision=rand()%3;
559
#endif
560
485k
   return decision;
561
485k
}
562
563
/* Indexing table for converting from natural Hadamard to ordery Hadamard
564
   This is essentially a bit-reversed Gray, on top of which we've added
565
   an inversion of the order because we want the DC at the end rather than
566
   the beginning. The lines are for N=2, 4, 8, 16 */
567
static const int ordery_table[] = {
568
       1,  0,
569
       3,  0,  2,  1,
570
       7,  0,  4,  3,  6,  1,  5,  2,
571
      15,  0,  8,  7, 12,  3, 11,  4, 14,  1,  9,  6, 13,  2, 10,  5,
572
};
573
574
static void deinterleave_hadamard(celt_norm *X, int N0, int stride, int hadamard)
575
9.07M
{
576
9.07M
   int i,j;
577
9.07M
   VARDECL(celt_norm, tmp);
578
9.07M
   int N;
579
9.07M
   SAVE_STACK;
580
9.07M
   N = N0*stride;
581
9.07M
   ALLOC(tmp, N, celt_norm);
582
9.07M
   celt_assert(stride>0);
583
9.07M
   if (hadamard)
584
1.65M
   {
585
1.65M
      const int *ordery = ordery_table+stride-2;
586
8.17M
      for (i=0;i<stride;i++)
587
6.52M
      {
588
36.0M
         for (j=0;j<N0;j++)
589
29.4M
            tmp[ordery[i]*N0+j] = X[j*stride+i];
590
6.52M
      }
591
7.42M
   } else {
592
57.9M
      for (i=0;i<stride;i++)
593
181M
         for (j=0;j<N0;j++)
594
130M
            tmp[i*N0+j] = X[j*stride+i];
595
7.42M
   }
596
9.07M
   OPUS_COPY(X, tmp, N);
597
9.07M
   RESTORE_STACK;
598
9.07M
}
599
600
static void interleave_hadamard(celt_norm *X, int N0, int stride, int hadamard)
601
2.01M
{
602
2.01M
   int i,j;
603
2.01M
   VARDECL(celt_norm, tmp);
604
2.01M
   int N;
605
2.01M
   SAVE_STACK;
606
2.01M
   N = N0*stride;
607
2.01M
   ALLOC(tmp, N, celt_norm);
608
2.01M
   if (hadamard)
609
603k
   {
610
603k
      const int *ordery = ordery_table+stride-2;
611
3.01M
      for (i=0;i<stride;i++)
612
12.2M
         for (j=0;j<N0;j++)
613
9.83M
            tmp[j*stride+i] = X[ordery[i]*N0+j];
614
1.41M
   } else {
615
11.9M
      for (i=0;i<stride;i++)
616
32.3M
         for (j=0;j<N0;j++)
617
21.8M
            tmp[j*stride+i] = X[i*N0+j];
618
1.41M
   }
619
2.01M
   OPUS_COPY(X, tmp, N);
620
2.01M
   RESTORE_STACK;
621
2.01M
}
622
623
void haar1(celt_norm *X, int N0, int stride)
624
396M
{
625
396M
   int i, j;
626
396M
   N0 >>= 1;
627
1.26G
   for (i=0;i<stride;i++)
628
3.77G
      for (j=0;j<N0;j++)
629
2.90G
      {
630
2.90G
         opus_val32 tmp1, tmp2;
631
2.90G
         tmp1 = MULT32_32_Q31(QCONST32(.70710678f,31), X[stride*2*j+i]);
632
2.90G
         tmp2 = MULT32_32_Q31(QCONST32(.70710678f,31), X[stride*(2*j+1)+i]);
633
2.90G
         X[stride*2*j+i] = ADD32(tmp1, tmp2);
634
2.90G
         X[stride*(2*j+1)+i] = SUB32(tmp1, tmp2);
635
2.90G
      }
636
396M
}
637
638
static int compute_qn(int N, int b, int offset, int pulse_cap, int stereo)
639
208M
{
640
208M
   static const opus_int16 exp2_table8[8] =
641
208M
      {16384, 17866, 19483, 21247, 23170, 25267, 27554, 30048};
642
208M
   int qn, qb;
643
208M
   int N2 = 2*N-1;
644
208M
   if (stereo && N==2)
645
61.0M
      N2--;
646
   /* The upper limit ensures that in a stereo split with itheta==16384, we'll
647
       always have enough bits left over to code at least one pulse in the
648
       side; otherwise it would collapse, since it doesn't get folded. */
649
208M
   qb = celt_sudiv(b+N2*offset, N2);
650
208M
   qb = IMIN(b-pulse_cap-(4<<BITRES), qb);
651
652
208M
   qb = IMIN(8<<BITRES, qb);
653
654
208M
   if (qb<(1<<BITRES>>1)) {
655
182M
      qn = 1;
656
182M
   } else {
657
26.0M
      qn = exp2_table8[qb&0x7]>>(14-(qb>>BITRES));
658
26.0M
      qn = (qn+1)>>1<<1;
659
26.0M
   }
660
208M
   celt_assert(qn <= 256);
661
208M
   return qn;
662
208M
}
663
664
struct band_ctx {
665
   int encode;
666
   int resynth;
667
   const CELTMode *m;
668
   int i;
669
   int intensity;
670
   int spread;
671
   int tf_change;
672
   ec_ctx *ec;
673
   opus_int32 remaining_bits;
674
   const celt_ener *bandE;
675
   opus_uint32 seed;
676
   int arch;
677
   int theta_round;
678
   int disable_inv;
679
   int avoid_split_noise;
680
#ifdef ENABLE_QEXT
681
   ec_ctx *ext_ec;
682
   int extra_bits;
683
   opus_int32 ext_total_bits;
684
   int extra_bands;
685
#endif
686
};
687
688
struct split_ctx {
689
   int inv;
690
   int imid;
691
   int iside;
692
   int delta;
693
   int itheta;
694
#ifdef ENABLE_QEXT
695
   int itheta_q30;
696
#endif
697
   int qalloc;
698
};
699
700
static void compute_theta(struct band_ctx *ctx, struct split_ctx *sctx,
701
      celt_norm *X, celt_norm *Y, int N, int *b, int B, int B0,
702
      int LM,
703
      int stereo, int *fill ARG_QEXT(int *ext_b))
704
208M
{
705
208M
   int qn;
706
208M
   int itheta=0;
707
208M
   int itheta_q30=0;
708
208M
   int delta;
709
208M
   int imid, iside;
710
208M
   int qalloc;
711
208M
   int pulse_cap;
712
208M
   int offset;
713
208M
   opus_int32 tell;
714
208M
   int inv=0;
715
208M
   int encode;
716
208M
   const CELTMode *m;
717
208M
   int i;
718
208M
   int intensity;
719
208M
   ec_ctx *ec;
720
208M
   const celt_ener *bandE;
721
722
208M
   encode = ctx->encode;
723
208M
   m = ctx->m;
724
208M
   i = ctx->i;
725
208M
   intensity = ctx->intensity;
726
208M
   ec = ctx->ec;
727
208M
   bandE = ctx->bandE;
728
729
   /* Decide on the resolution to give to the split parameter theta */
730
208M
   pulse_cap = m->logN[i]+LM*(1<<BITRES);
731
208M
   offset = (pulse_cap>>1) - (stereo&&N==2 ? QTHETA_OFFSET_TWOPHASE : QTHETA_OFFSET);
732
208M
   qn = compute_qn(N, *b, offset, pulse_cap, stereo);
733
208M
   if (stereo && i>=intensity)
734
183M
      qn = 1;
735
208M
   if (encode)
736
208M
   {
737
      /* theta is the atan() of the ratio between the (normalized)
738
         side and mid. With just that parameter, we can re-scale both
739
         mid and side because we know that 1) they have unit norm and
740
         2) they are orthogonal. */
741
208M
      itheta_q30 = stereo_itheta(X, Y, stereo, N, ctx->arch);
742
208M
      itheta = itheta_q30>>16;
743
208M
   }
744
208M
   tell = ec_tell_frac(ec);
745
208M
   if (qn!=1)
746
24.8M
   {
747
24.8M
      if (encode)
748
24.8M
      {
749
24.8M
         if (!stereo || ctx->theta_round == 0)
750
22.7M
         {
751
22.7M
            itheta = (itheta*(opus_int32)qn+8192)>>14;
752
22.7M
            if (!stereo && ctx->avoid_split_noise && itheta > 0 && itheta < qn)
753
317k
            {
754
               /* Check if the selected value of theta will cause the bit allocation
755
                  to inject noise on one side. If so, make sure the energy of that side
756
                  is zero. */
757
317k
               int unquantized = celt_udiv((opus_int32)itheta*16384, qn);
758
317k
               imid = bitexact_cos((opus_int16)unquantized);
759
317k
               iside = bitexact_cos((opus_int16)(16384-unquantized));
760
317k
               delta = FRAC_MUL16((N-1)<<7,bitexact_log2tan(iside,imid));
761
317k
               if (delta > *b)
762
929
                  itheta = qn;
763
316k
               else if (delta < -*b)
764
1.43k
                  itheta = 0;
765
317k
            }
766
22.7M
         } else {
767
2.08M
            int down;
768
            /* Bias quantization towards itheta=0 and itheta=16384. */
769
2.08M
            int bias = itheta > 8192 ? 32767/qn : -32767/qn;
770
2.08M
            down = IMIN(qn-1, IMAX(0, (itheta*(opus_int32)qn + bias)>>14));
771
2.08M
            if (ctx->theta_round < 0)
772
1.04M
               itheta = down;
773
1.04M
            else
774
1.04M
               itheta = down+1;
775
2.08M
         }
776
24.8M
      }
777
      /* Entropy coding of the angle. We use a uniform pdf for the
778
         time split, a step for stereo, and a triangular one for the rest. */
779
24.8M
      if (stereo && N>2)
780
2.33M
      {
781
2.33M
         int p0 = 3;
782
2.33M
         int x = itheta;
783
2.33M
         int x0 = qn/2;
784
2.33M
         int ft = p0*(x0+1) + x0;
785
         /* Use a probability of p0 up to itheta=8192 and then use 1 after */
786
2.33M
         if (encode)
787
2.33M
         {
788
2.33M
            ec_encode(ec,x<=x0?p0*x:(x-1-x0)+(x0+1)*p0,x<=x0?p0*(x+1):(x-x0)+(x0+1)*p0,ft);
789
2.33M
         } else {
790
0
            int fs;
791
0
            fs=ec_decode(ec,ft);
792
0
            if (fs<(x0+1)*p0)
793
0
               x=fs/p0;
794
0
            else
795
0
               x=x0+1+(fs-(x0+1)*p0);
796
0
            ec_dec_update(ec,x<=x0?p0*x:(x-1-x0)+(x0+1)*p0,x<=x0?p0*(x+1):(x-x0)+(x0+1)*p0,ft);
797
0
            itheta = x;
798
0
         }
799
22.4M
      } else if (B0>1 || stereo) {
800
         /* Uniform pdf */
801
4.76M
         if (encode)
802
4.76M
            ec_enc_uint(ec, itheta, qn+1);
803
0
         else
804
0
            itheta = ec_dec_uint(ec, qn+1);
805
17.6M
      } else {
806
17.6M
         int fs=1, ft;
807
17.6M
         ft = ((qn>>1)+1)*((qn>>1)+1);
808
17.6M
         if (encode)
809
17.6M
         {
810
17.6M
            int fl;
811
812
17.6M
            fs = itheta <= (qn>>1) ? itheta + 1 : qn + 1 - itheta;
813
17.6M
            fl = itheta <= (qn>>1) ? itheta*(itheta + 1)>>1 :
814
17.6M
             ft - ((qn + 1 - itheta)*(qn + 2 - itheta)>>1);
815
816
17.6M
            ec_encode(ec, fl, fl+fs, ft);
817
17.6M
         } else {
818
            /* Triangular pdf */
819
0
            int fl=0;
820
0
            int fm;
821
0
            fm = ec_decode(ec, ft);
822
823
0
            if (fm < ((qn>>1)*((qn>>1) + 1)>>1))
824
0
            {
825
0
               itheta = (isqrt32(8*(opus_uint32)fm + 1) - 1)>>1;
826
0
               fs = itheta + 1;
827
0
               fl = itheta*(itheta + 1)>>1;
828
0
            }
829
0
            else
830
0
            {
831
0
               itheta = (2*(qn + 1)
832
0
                - isqrt32(8*(opus_uint32)(ft - fm - 1) + 1))>>1;
833
0
               fs = qn + 1 - itheta;
834
0
               fl = ft - ((qn + 1 - itheta)*(qn + 2 - itheta)>>1);
835
0
            }
836
837
0
            ec_dec_update(ec, fl, fl+fs, ft);
838
0
         }
839
17.6M
      }
840
24.8M
      celt_assert(itheta>=0);
841
24.8M
      itheta = celt_udiv((opus_int32)itheta*16384, qn);
842
#ifdef ENABLE_QEXT
843
      *ext_b = IMIN(*ext_b, ctx->ext_total_bits - (opus_int32)ec_tell_frac(ctx->ext_ec));
844
      if (*ext_b >= 2*N<<BITRES && ctx->ext_total_bits-ec_tell_frac(ctx->ext_ec)-1 > 2<<BITRES) {
845
         int extra_bits;
846
         int ext_tell = ec_tell_frac(ctx->ext_ec);
847
         extra_bits = IMIN(12, IMAX(2, celt_sudiv(*ext_b, (2*N-1)<<BITRES)));
848
         if (encode) {
849
            itheta_q30 = itheta_q30 - (itheta<<16);
850
            itheta_q30 = (itheta_q30*(opus_int64)qn*((1<<extra_bits)-1)+(1<<29))>>30;
851
            itheta_q30 += (1<<(extra_bits-1))-1;
852
            itheta_q30 = IMAX(0, IMIN((1<<extra_bits)-2, itheta_q30));
853
            ec_enc_uint(ctx->ext_ec, itheta_q30, (1<<extra_bits)-1);
854
         } else {
855
            itheta_q30 = ec_dec_uint(ctx->ext_ec, (1<<extra_bits)-1);
856
         }
857
         itheta_q30 -= (1<<(extra_bits-1))-1;
858
         itheta_q30 = (itheta<<16) + itheta_q30*(opus_int64)(1<<30)/(qn*((1<<extra_bits)-1));
859
         /* Hard bounds on itheta (can only trigger on corrupted bitstreams). */
860
         itheta_q30 = IMAX(0, IMIN(itheta_q30, 1073741824));
861
         *ext_b -= ec_tell_frac(ctx->ext_ec) - ext_tell;
862
      } else {
863
         itheta_q30 = (opus_int32)itheta<<16;
864
      }
865
#endif
866
24.8M
      if (encode && stereo)
867
2.99M
      {
868
2.99M
         if (itheta==0)
869
890k
            intensity_stereo(m, X, Y, bandE, i, N);
870
2.10M
         else
871
2.10M
            stereo_split(X, Y, N);
872
2.99M
      }
873
      /* NOTE: Renormalising X and Y *may* help fixed-point a bit at very high rate.
874
               Let's do that at higher complexity */
875
183M
   } else if (stereo) {
876
183M
      if (encode)
877
183M
      {
878
183M
         inv = itheta > 8192 && !ctx->disable_inv;
879
183M
         if (inv)
880
370k
         {
881
370k
            int j;
882
7.05M
            for (j=0;j<N;j++)
883
6.68M
               Y[j] = -Y[j];
884
370k
         }
885
183M
         intensity_stereo(m, X, Y, bandE, i, N);
886
183M
      }
887
183M
      if (*b>2<<BITRES && ctx->remaining_bits > 2<<BITRES)
888
2.11M
      {
889
2.11M
         if (encode)
890
2.11M
            ec_enc_bit_logp(ec, inv, 2);
891
0
         else
892
0
            inv = ec_dec_bit_logp(ec, 2);
893
2.11M
      } else
894
181M
         inv = 0;
895
      /* inv flag override to avoid problems with downmixing. */
896
183M
      if (ctx->disable_inv)
897
86.1M
         inv = 0;
898
183M
      itheta = 0;
899
183M
      itheta_q30 = 0;
900
183M
   }
901
208M
   qalloc = ec_tell_frac(ec) - tell;
902
208M
   *b -= qalloc;
903
904
208M
   if (itheta == 0)
905
198M
   {
906
198M
      imid = 32767;
907
198M
      iside = 0;
908
198M
      *fill &= (1<<B)-1;
909
198M
      delta = -16384;
910
198M
   } else if (itheta == 16384)
911
548k
   {
912
548k
      imid = 0;
913
548k
      iside = 32767;
914
548k
      *fill &= ((1<<B)-1)<<B;
915
548k
      delta = 16384;
916
9.10M
   } else {
917
9.10M
      imid = bitexact_cos((opus_int16)itheta);
918
9.10M
      iside = bitexact_cos((opus_int16)(16384-itheta));
919
      /* This is the mid vs side allocation that minimizes squared error
920
         in that band. */
921
9.10M
      delta = FRAC_MUL16((N-1)<<7,bitexact_log2tan(iside,imid));
922
9.10M
   }
923
924
208M
   sctx->inv = inv;
925
208M
   sctx->imid = imid;
926
208M
   sctx->iside = iside;
927
208M
   sctx->delta = delta;
928
208M
   sctx->itheta = itheta;
929
#ifdef ENABLE_QEXT
930
   sctx->itheta_q30 = itheta_q30;
931
#endif
932
208M
   sctx->qalloc = qalloc;
933
208M
}
934
static unsigned quant_band_n1(struct band_ctx *ctx, celt_norm *X, celt_norm *Y,
935
      celt_norm *lowband_out)
936
249M
{
937
249M
   int c;
938
249M
   int stereo;
939
249M
   celt_norm *x = X;
940
249M
   int encode;
941
249M
   ec_ctx *ec;
942
943
249M
   encode = ctx->encode;
944
249M
   ec = ctx->ec;
945
946
249M
   stereo = Y != NULL;
947
320M
   c=0; do {
948
320M
      int sign=0;
949
320M
      if (ctx->remaining_bits>=1<<BITRES)
950
11.4M
      {
951
11.4M
         if (encode)
952
11.4M
         {
953
11.4M
            sign = x[0]<0;
954
11.4M
            ec_enc_bits(ec, sign, 1);
955
11.4M
         } else {
956
0
            sign = ec_dec_bits(ec, 1);
957
0
         }
958
11.4M
         ctx->remaining_bits -= 1<<BITRES;
959
11.4M
      }
960
320M
      if (ctx->resynth)
961
52.7M
         x[0] = sign ? -NORM_SCALING : NORM_SCALING;
962
320M
      x = Y;
963
320M
   } while (++c<1+stereo);
964
249M
   if (lowband_out)
965
249M
      lowband_out[0] = SHR32(X[0],4);
966
249M
   return 1;
967
249M
}
968
969
/* This function is responsible for encoding and decoding a mono partition.
970
   It can split the band in two and transmit the energy difference with
971
   the two half-bands. It can be called recursively so bands can end up being
972
   split in 8 parts. */
973
static unsigned quant_partition(struct band_ctx *ctx, celt_norm *X,
974
      int N, int b, int B, celt_norm *lowband,
975
      int LM,
976
      opus_val32 gain, int fill
977
      ARG_QEXT(int ext_b))
978
734M
{
979
734M
   const unsigned char *cache;
980
734M
   int q;
981
734M
   int curr_bits;
982
734M
   int imid=0, iside=0;
983
734M
   int B0=B;
984
734M
   opus_val32 mid=0, side=0;
985
734M
   unsigned cm=0;
986
734M
   celt_norm *Y=NULL;
987
734M
   int encode;
988
734M
   const CELTMode *m;
989
734M
   int i;
990
734M
   int spread;
991
734M
   ec_ctx *ec;
992
993
734M
   encode = ctx->encode;
994
734M
   m = ctx->m;
995
734M
   i = ctx->i;
996
734M
   spread = ctx->spread;
997
734M
   ec = ctx->ec;
998
999
   /* If we need 1.5 more bit than we can produce, split the band in two. */
1000
734M
   cache = m->cache.bits + m->cache.index[(LM+1)*m->nbEBands+i];
1001
734M
   if (LM != -1 && b > cache[cache[0]]+12 && N>2)
1002
21.8M
   {
1003
21.8M
      int mbits, sbits, delta;
1004
21.8M
      int itheta;
1005
21.8M
      int qalloc;
1006
21.8M
      struct split_ctx sctx;
1007
21.8M
      celt_norm *next_lowband2=NULL;
1008
21.8M
      opus_int32 rebalance;
1009
1010
21.8M
      N >>= 1;
1011
21.8M
      Y = X+N;
1012
21.8M
      LM -= 1;
1013
21.8M
      if (B==1)
1014
17.6M
         fill = (fill&1)|(fill<<1);
1015
21.8M
      B = (B+1)>>1;
1016
1017
21.8M
      compute_theta(ctx, &sctx, X, Y, N, &b, B, B0, LM, 0, &fill ARG_QEXT(&ext_b));
1018
21.8M
      imid = sctx.imid;
1019
21.8M
      iside = sctx.iside;
1020
21.8M
      delta = sctx.delta;
1021
21.8M
      itheta = sctx.itheta;
1022
21.8M
      qalloc = sctx.qalloc;
1023
#ifdef FIXED_POINT
1024
# ifdef ENABLE_QEXT
1025
      (void)imid;
1026
      (void)iside;
1027
      mid = celt_cos_norm32(sctx.itheta_q30);
1028
      side = celt_cos_norm32((1<<30)-sctx.itheta_q30);
1029
# else
1030
      mid = SHL32(EXTEND32(imid), 16);
1031
      side = SHL32(EXTEND32(iside), 16);
1032
# endif
1033
#else
1034
# ifdef ENABLE_QEXT
1035
      (void)imid;
1036
      (void)iside;
1037
      mid = celt_cos_norm2(sctx.itheta_q30*(1.f/(1<<30)));
1038
      side = celt_cos_norm2(1.f-sctx.itheta_q30*(1.f/(1<<30)));
1039
# else
1040
21.8M
      mid = (1.f/32768)*imid;
1041
21.8M
      side = (1.f/32768)*iside;
1042
21.8M
# endif
1043
21.8M
#endif
1044
1045
      /* Give more bits to low-energy MDCTs than they would otherwise deserve */
1046
21.8M
      if (B0>1 && (itheta&0x3fff))
1047
3.32M
      {
1048
3.32M
         if (itheta > 8192)
1049
            /* Rough approximation for pre-echo masking */
1050
1.24M
            delta -= delta>>(4-LM);
1051
2.07M
         else
1052
            /* Corresponds to a forward-masking slope of 1.5 dB per 10 ms */
1053
2.07M
            delta = IMIN(0, delta + (N<<BITRES>>(5-LM)));
1054
3.32M
      }
1055
21.8M
      mbits = IMAX(0, IMIN(b, (b-delta)/2));
1056
21.8M
      sbits = b-mbits;
1057
21.8M
      ctx->remaining_bits -= qalloc;
1058
1059
21.8M
      if (lowband)
1060
794k
         next_lowband2 = lowband+N; /* >32-bit split case */
1061
1062
21.8M
      rebalance = ctx->remaining_bits;
1063
21.8M
      if (mbits >= sbits)
1064
17.9M
      {
1065
17.9M
         cm = quant_partition(ctx, X, N, mbits, B, lowband, LM,
1066
17.9M
               MULT32_32_Q31(gain,mid), fill ARG_QEXT(ext_b/2));
1067
17.9M
         rebalance = mbits - (rebalance-ctx->remaining_bits);
1068
17.9M
         if (rebalance > 3<<BITRES && itheta!=0)
1069
548k
            sbits += rebalance - (3<<BITRES);
1070
17.9M
         cm |= quant_partition(ctx, Y, N, sbits, B, next_lowband2, LM,
1071
17.9M
               MULT32_32_Q31(gain,side), fill>>B ARG_QEXT(ext_b/2))<<(B0>>1);
1072
17.9M
      } else {
1073
3.81M
         cm = quant_partition(ctx, Y, N, sbits, B, next_lowband2, LM,
1074
3.81M
               MULT32_32_Q31(gain,side), fill>>B ARG_QEXT(ext_b/2))<<(B0>>1);
1075
3.81M
         rebalance = sbits - (rebalance-ctx->remaining_bits);
1076
3.81M
         if (rebalance > 3<<BITRES && itheta!=16384)
1077
434k
            mbits += rebalance - (3<<BITRES);
1078
3.81M
         cm |= quant_partition(ctx, X, N, mbits, B, lowband, LM,
1079
3.81M
               MULT32_32_Q31(gain,mid), fill ARG_QEXT(ext_b/2));
1080
3.81M
      }
1081
713M
   } else {
1082
#ifdef ENABLE_QEXT
1083
      int extra_bits;
1084
      int ext_remaining_bits;
1085
      extra_bits = ext_b/(N-1)>>BITRES;
1086
      ext_remaining_bits = ctx->ext_total_bits-(opus_int32)ec_tell_frac(ctx->ext_ec);
1087
      if (ext_remaining_bits < ((extra_bits+1)*(N-1)+N)<<BITRES) {
1088
         extra_bits = (ext_remaining_bits-(N<<BITRES))/(N-1)>>BITRES;
1089
         extra_bits = IMAX(extra_bits-1, 0);
1090
      }
1091
      extra_bits = IMIN(12, extra_bits);
1092
#endif
1093
      /* This is the basic no-split case */
1094
713M
      q = bits2pulses(m, i, LM, b);
1095
713M
      curr_bits = pulses2bits(m, i, LM, q);
1096
713M
      ctx->remaining_bits -= curr_bits;
1097
1098
      /* Ensures we can never bust the budget */
1099
714M
      while (ctx->remaining_bits < 0 && q > 0)
1100
1.01M
      {
1101
1.01M
         ctx->remaining_bits += curr_bits;
1102
1.01M
         q--;
1103
1.01M
         curr_bits = pulses2bits(m, i, LM, q);
1104
1.01M
         ctx->remaining_bits -= curr_bits;
1105
1.01M
      }
1106
1107
713M
      if (q!=0)
1108
32.9M
      {
1109
32.9M
         int K = get_pulses(q);
1110
1111
         /* Finally do the actual quantization */
1112
32.9M
         if (encode)
1113
32.9M
         {
1114
32.9M
            cm = alg_quant(X, N, K, spread, B, ec, gain, ctx->resynth
1115
32.9M
                           ARG_QEXT(ctx->ext_ec) ARG_QEXT(extra_bits),
1116
32.9M
                           ctx->arch);
1117
32.9M
         } else {
1118
0
            cm = alg_unquant(X, N, K, spread, B, ec, gain
1119
0
                             ARG_QEXT(ctx->ext_ec) ARG_QEXT(extra_bits));
1120
0
         }
1121
#ifdef ENABLE_QEXT
1122
      } else if (ext_b > 2*N<<BITRES)
1123
      {
1124
         extra_bits = ext_b/(N-1)>>BITRES;
1125
         ext_remaining_bits = ctx->ext_total_bits-ec_tell_frac(ctx->ext_ec);
1126
         if (ext_remaining_bits < ((extra_bits+1)*(N-1)+N)<<BITRES) {
1127
            extra_bits = (ext_remaining_bits-(N<<BITRES))/(N-1)>>BITRES;
1128
            extra_bits = IMAX(extra_bits-1, 0);
1129
         }
1130
         extra_bits = IMIN(14, extra_bits);
1131
         if (encode) cm = cubic_quant(X, N, extra_bits, B, ctx->ext_ec, gain, ctx->resynth);
1132
         else cm = cubic_unquant(X, N, extra_bits, B, ctx->ext_ec, gain);
1133
#endif
1134
680M
      } else {
1135
         /* If there's no pulse, fill the band anyway */
1136
680M
         int j;
1137
680M
         if (ctx->resynth)
1138
147M
         {
1139
147M
            unsigned cm_mask;
1140
            /* B can be as large as 16, so this shift might overflow an int on a
1141
               16-bit platform; use a long to get defined behavior.*/
1142
147M
            cm_mask = (unsigned)(1UL<<B)-1;
1143
147M
            fill &= cm_mask;
1144
147M
            if (!fill)
1145
62.6M
            {
1146
62.6M
               OPUS_CLEAR(X, N);
1147
84.5M
            } else {
1148
84.5M
               if (lowband == NULL)
1149
3.71M
               {
1150
                  /* Noise */
1151
25.9M
                  for (j=0;j<N;j++)
1152
22.2M
                  {
1153
22.2M
                     ctx->seed = celt_lcg_rand(ctx->seed);
1154
22.2M
                     X[j] = SHL32((celt_norm)((opus_int32)ctx->seed>>20), NORM_SHIFT-14);
1155
22.2M
                  }
1156
3.71M
                  cm = cm_mask;
1157
80.8M
               } else {
1158
                  /* Folded spectrum */
1159
900M
                  for (j=0;j<N;j++)
1160
819M
                  {
1161
819M
                     opus_val16 tmp;
1162
819M
                     ctx->seed = celt_lcg_rand(ctx->seed);
1163
                     /* About 48 dB below the "normal" folding level */
1164
819M
                     tmp = QCONST16(1.0f/256, NORM_SHIFT-4);
1165
819M
                     tmp = (ctx->seed)&0x8000 ? tmp : -tmp;
1166
819M
                     X[j] = lowband[j]+tmp;
1167
819M
                  }
1168
80.8M
                  cm = fill;
1169
80.8M
               }
1170
84.5M
               renormalise_vector(X, N, gain, ctx->arch);
1171
84.5M
            }
1172
147M
         }
1173
680M
      }
1174
713M
   }
1175
1176
734M
   return cm;
1177
734M
}
1178
1179
#ifdef ENABLE_QEXT
1180
static unsigned cubic_quant_partition(struct band_ctx *ctx, celt_norm *X, int N, int b, int B, ec_ctx *ec, int LM, opus_val32 gain, int resynth, int encode)
1181
{
1182
   celt_assert(LM>=0);
1183
   ctx->remaining_bits = ctx->ec->storage*8*8 - ec_tell_frac(ctx->ec);
1184
   b = IMIN(b, ctx->remaining_bits);
1185
   /* As long as we have at least two bits of depth, split all the way to LM=0 (not -1 like PVQ). */
1186
   if (LM==0 || b<=2*N<<BITRES) {
1187
      int res, ret;
1188
      b = IMIN(b + ((N-1)<<BITRES)/2, ctx->remaining_bits);
1189
      /* Resolution left after taking into account coding the cube face. */
1190
      res = (b-(1<<BITRES)-ctx->m->logN[ctx->i]-(LM<<BITRES)-1)/(N-1)>>BITRES;
1191
      res = IMIN(14, IMAX(0, res));
1192
      if (encode) ret = cubic_quant(X, N, res, B, ec, gain, resynth);
1193
      else ret = cubic_unquant(X, N, res, B, ec, gain);
1194
      ctx->remaining_bits = ctx->ec->storage*8*8 - ec_tell_frac(ctx->ec);
1195
      return ret;
1196
   } else {
1197
      celt_norm *Y;
1198
      opus_int32 itheta_q30;
1199
      opus_val32 g1, g2;
1200
      opus_int32 theta_res;
1201
      opus_int32 qtheta;
1202
      int delta;
1203
      int b1, b2;
1204
      int cm;
1205
      int N0;
1206
      N0 = N;
1207
      N >>= 1;
1208
      Y = X+N;
1209
      LM -= 1;
1210
      B = (B+1)>>1;
1211
      theta_res = IMIN(16, (b>>BITRES)/(N0-1) + 1);
1212
      if (encode) {
1213
         itheta_q30 = stereo_itheta(X, Y, 0, N, ctx->arch);
1214
         qtheta = (itheta_q30+(1<<(29-theta_res)))>>(30-theta_res);
1215
         ec_enc_uint(ec, qtheta, (1<<theta_res)+1);
1216
      } else {
1217
         qtheta = ec_dec_uint(ec, (1<<theta_res)+1);
1218
      }
1219
      itheta_q30 = qtheta<<(30-theta_res);
1220
      b -= theta_res<<BITRES;
1221
      delta = (N0-1) * 23 * ((itheta_q30>>16)-8192) >> (17-BITRES);
1222
1223
#ifdef FIXED_POINT
1224
      g1 = celt_cos_norm32(itheta_q30);
1225
      g2 = celt_cos_norm32((1<<30)-itheta_q30);
1226
#else
1227
      g1 = celt_cos_norm2(itheta_q30*(1.f/(1<<30)));
1228
      g2 = celt_cos_norm2(1.f-itheta_q30*(1.f/(1<<30)));
1229
#endif
1230
      if (itheta_q30 == 0) {
1231
         b1=b;
1232
         b2=0;
1233
      } else if (itheta_q30==1073741824) {
1234
         b1=0;
1235
         b2=b;
1236
      } else {
1237
         b1 = IMIN(b, IMAX(0, (b-delta)/2));
1238
         b2 = b-b1;
1239
      }
1240
      cm  = cubic_quant_partition(ctx, X, N, b1, B, ec, LM, MULT32_32_Q31(gain, g1), resynth, encode);
1241
      cm |= cubic_quant_partition(ctx, Y, N, b2, B, ec, LM, MULT32_32_Q31(gain, g2), resynth, encode);
1242
      return cm;
1243
   }
1244
}
1245
#endif
1246
1247
/* This function is responsible for encoding and decoding a band for the mono case. */
1248
static unsigned quant_band(struct band_ctx *ctx, celt_norm *X,
1249
      int N, int b, int B, celt_norm *lowband,
1250
      int LM, celt_norm *lowband_out,
1251
      opus_val32 gain, celt_norm *lowband_scratch, int fill
1252
      ARG_QEXT(int ext_b))
1253
869M
{
1254
869M
   int N0=N;
1255
869M
   int N_B=N;
1256
869M
   int N_B0;
1257
869M
   int B0=B;
1258
869M
   int time_divide=0;
1259
869M
   int recombine=0;
1260
869M
   int longBlocks;
1261
869M
   unsigned cm=0;
1262
869M
   int k;
1263
869M
   int encode;
1264
869M
   int tf_change;
1265
1266
869M
   encode = ctx->encode;
1267
869M
   tf_change = ctx->tf_change;
1268
1269
869M
   longBlocks = B0==1;
1270
1271
869M
   N_B = celt_udiv(N_B, B);
1272
1273
   /* Special case for one sample */
1274
869M
   if (N==1)
1275
178M
   {
1276
178M
      return quant_band_n1(ctx, X, NULL, lowband_out);
1277
178M
   }
1278
1279
691M
   if (tf_change>0)
1280
1.32M
      recombine = tf_change;
1281
   /* Band recombining to increase frequency resolution */
1282
1283
691M
   if (lowband_scratch && lowband && (recombine || ((N_B&1) == 0 && tf_change<0) || B0>1))
1284
1.09M
   {
1285
1.09M
      OPUS_COPY(lowband_scratch, lowband, N);
1286
1.09M
      lowband = lowband_scratch;
1287
1.09M
   }
1288
1289
693M
   for (k=0;k<recombine;k++)
1290
2.21M
   {
1291
2.21M
      static const unsigned char bit_interleave_table[16]={
1292
2.21M
            0,1,1,1,2,3,3,3,2,3,3,3,2,3,3,3
1293
2.21M
      };
1294
2.21M
      if (encode)
1295
2.21M
         haar1(X, N>>k, 1<<k);
1296
2.21M
      if (lowband)
1297
235k
         haar1(lowband, N>>k, 1<<k);
1298
2.21M
      fill = bit_interleave_table[fill&0xF]|bit_interleave_table[fill>>4]<<2;
1299
2.21M
   }
1300
691M
   B>>=recombine;
1301
691M
   N_B<<=recombine;
1302
1303
   /* Increasing the time resolution */
1304
696M
   while ((N_B&1) == 0 && tf_change<0)
1305
4.95M
   {
1306
4.95M
      if (encode)
1307
4.95M
         haar1(X, N_B, B);
1308
4.95M
      if (lowband)
1309
817k
         haar1(lowband, N_B, B);
1310
4.95M
      fill |= fill<<B;
1311
4.95M
      B <<= 1;
1312
4.95M
      N_B >>= 1;
1313
4.95M
      time_divide++;
1314
4.95M
      tf_change++;
1315
4.95M
   }
1316
691M
   B0=B;
1317
691M
   N_B0 = N_B;
1318
1319
   /* Reorganize the samples in time order instead of frequency order */
1320
691M
   if (B0>1)
1321
8.04M
   {
1322
8.04M
      if (encode)
1323
8.04M
         deinterleave_hadamard(X, N_B>>recombine, B0<<recombine, longBlocks);
1324
8.04M
      if (lowband)
1325
1.02M
         deinterleave_hadamard(lowband, N_B>>recombine, B0<<recombine, longBlocks);
1326
8.04M
   }
1327
1328
#ifdef ENABLE_QEXT
1329
   if (ctx->extra_bands && b > (3*N<<BITRES)+(ctx->m->logN[ctx->i]+8+8*LM)) {
1330
      cm = cubic_quant_partition(ctx, X, N, b, B, ctx->ec, LM, gain, ctx->resynth, encode);
1331
   } else
1332
#endif
1333
691M
   {
1334
691M
      cm = quant_partition(ctx, X, N, b, B, lowband, LM, gain, fill ARG_QEXT(ext_b));
1335
691M
   }
1336
1337
   /* This code is used by the decoder and by the resynthesis-enabled encoder */
1338
691M
   if (ctx->resynth)
1339
150M
   {
1340
      /* Undo the sample reorganization going from time order to frequency order */
1341
150M
      if (B0>1)
1342
2.01M
         interleave_hadamard(X, N_B>>recombine, B0<<recombine, longBlocks);
1343
1344
      /* Undo time-freq changes that we did earlier */
1345
150M
      N_B = N_B0;
1346
150M
      B = B0;
1347
152M
      for (k=0;k<time_divide;k++)
1348
1.68M
      {
1349
1.68M
         B >>= 1;
1350
1.68M
         N_B <<= 1;
1351
1.68M
         cm |= cm>>B;
1352
1.68M
         haar1(X, N_B, B);
1353
1.68M
      }
1354
1355
151M
      for (k=0;k<recombine;k++)
1356
502k
      {
1357
502k
         static const unsigned char bit_deinterleave_table[16]={
1358
502k
               0x00,0x03,0x0C,0x0F,0x30,0x33,0x3C,0x3F,
1359
502k
               0xC0,0xC3,0xCC,0xCF,0xF0,0xF3,0xFC,0xFF
1360
502k
         };
1361
502k
         cm = bit_deinterleave_table[cm];
1362
502k
         haar1(X, N0>>k, 1<<k);
1363
502k
      }
1364
150M
      B<<=recombine;
1365
1366
      /* Scale output for later folding */
1367
150M
      if (lowband_out)
1368
80.6M
      {
1369
80.6M
         int j;
1370
80.6M
         opus_val16 n;
1371
80.6M
         n = celt_sqrt(SHL32(EXTEND32(N0),22));
1372
785M
         for (j=0;j<N0;j++)
1373
704M
            lowband_out[j] = MULT16_32_Q15(n,X[j]);
1374
80.6M
      }
1375
150M
      cm &= (1<<B)-1;
1376
150M
   }
1377
691M
   return cm;
1378
869M
}
1379
1380
#ifdef FIXED_POINT
1381
#define MIN_STEREO_ENERGY 2
1382
#else
1383
379M
#define MIN_STEREO_ENERGY 1e-10f
1384
#endif
1385
1386
/* This function is responsible for encoding and decoding a band for the stereo case. */
1387
static unsigned quant_band_stereo(struct band_ctx *ctx, celt_norm *X, celt_norm *Y,
1388
      int N, int b, int B, celt_norm *lowband,
1389
      int LM, celt_norm *lowband_out,
1390
      celt_norm *lowband_scratch, int fill
1391
      ARG_QEXT(int ext_b) ARG_QEXT(const int *cap))
1392
257M
{
1393
257M
   int imid=0, iside=0;
1394
257M
   int inv = 0;
1395
257M
   opus_val32 mid=0, side=0;
1396
257M
   unsigned cm=0;
1397
257M
   int mbits, sbits, delta;
1398
257M
   int itheta;
1399
257M
   int qalloc;
1400
257M
   struct split_ctx sctx;
1401
257M
   int orig_fill;
1402
257M
   int encode;
1403
257M
   ec_ctx *ec;
1404
1405
257M
   encode = ctx->encode;
1406
257M
   ec = ctx->ec;
1407
1408
   /* Special case for one sample */
1409
257M
   if (N==1)
1410
70.9M
   {
1411
70.9M
      return quant_band_n1(ctx, X, Y, lowband_out);
1412
70.9M
   }
1413
1414
186M
   orig_fill = fill;
1415
1416
186M
   if (encode) {
1417
186M
      if (ctx->bandE[ctx->i] < MIN_STEREO_ENERGY || ctx->bandE[ctx->m->nbEBands+ctx->i] < MIN_STEREO_ENERGY) {
1418
180M
         if (ctx->bandE[ctx->i] > ctx->bandE[ctx->m->nbEBands+ctx->i]) OPUS_COPY(Y, X, N);
1419
179M
         else OPUS_COPY(X, Y, N);
1420
180M
      }
1421
186M
   }
1422
186M
   compute_theta(ctx, &sctx, X, Y, N, &b, B, B, LM, 1, &fill ARG_QEXT(&ext_b));
1423
186M
   inv = sctx.inv;
1424
186M
   imid = sctx.imid;
1425
186M
   iside = sctx.iside;
1426
186M
   delta = sctx.delta;
1427
186M
   itheta = sctx.itheta;
1428
186M
   qalloc = sctx.qalloc;
1429
#ifdef FIXED_POINT
1430
# ifdef ENABLE_QEXT
1431
   (void)imid;
1432
   (void)iside;
1433
   mid = celt_cos_norm32(sctx.itheta_q30);
1434
   side = celt_cos_norm32((1<<30)-sctx.itheta_q30);
1435
# else
1436
   mid = SHL32(EXTEND32(imid), 16);
1437
   side = SHL32(EXTEND32(iside), 16);
1438
# endif
1439
#else
1440
# ifdef ENABLE_QEXT
1441
   (void)imid;
1442
   (void)iside;
1443
   mid = celt_cos_norm2(sctx.itheta_q30*(1.f/(1<<30)));
1444
   side = celt_cos_norm2(1.f-sctx.itheta_q30*(1.f/(1<<30)));
1445
# else
1446
186M
   mid = (1.f/32768)*imid;
1447
186M
   side = (1.f/32768)*iside;
1448
186M
# endif
1449
186M
#endif
1450
1451
   /* This is a special case for N=2 that only works for stereo and takes
1452
      advantage of the fact that mid and side are orthogonal to encode
1453
      the side with just one bit. */
1454
186M
   if (N==2)
1455
61.0M
   {
1456
61.0M
      int c;
1457
61.0M
      int sign=0;
1458
61.0M
      celt_norm *x2, *y2;
1459
61.0M
      mbits = b;
1460
61.0M
      sbits = 0;
1461
      /* Only need one bit for the side. */
1462
61.0M
      if (itheta != 0 && itheta != 16384)
1463
387k
         sbits = 1<<BITRES;
1464
61.0M
      mbits -= sbits;
1465
61.0M
      c = itheta > 8192;
1466
61.0M
      ctx->remaining_bits -= qalloc+sbits;
1467
1468
61.0M
      x2 = c ? Y : X;
1469
61.0M
      y2 = c ? X : Y;
1470
61.0M
      if (sbits)
1471
387k
      {
1472
387k
         if (encode)
1473
387k
         {
1474
            /* Here we only need to encode a sign for the side. */
1475
            /* FIXME: Need to increase fixed-point precision? */
1476
387k
            sign = MULT32_32_Q31(x2[0],y2[1]) - MULT32_32_Q31(x2[1],y2[0]) < 0;
1477
387k
            ec_enc_bits(ec, sign, 1);
1478
387k
         } else {
1479
0
            sign = ec_dec_bits(ec, 1);
1480
0
         }
1481
387k
      }
1482
61.0M
      sign = 1-2*sign;
1483
      /* We use orig_fill here because we want to fold the side, but if
1484
         itheta==16384, we'll have cleared the low bits of fill. */
1485
61.0M
      cm = quant_band(ctx, x2, N, mbits, B, lowband, LM, lowband_out, Q31ONE,
1486
61.0M
            lowband_scratch, orig_fill ARG_QEXT(ext_b));
1487
      /* We don't split N=2 bands, so cm is either 1 or 0 (for a fold-collapse),
1488
         and there's no need to worry about mixing with the other channel. */
1489
61.0M
      y2[0] = -sign*x2[1];
1490
61.0M
      y2[1] = sign*x2[0];
1491
61.0M
      if (ctx->resynth)
1492
24.3M
      {
1493
24.3M
         celt_norm tmp;
1494
24.3M
         X[0] = MULT32_32_Q31(mid, X[0]);
1495
24.3M
         X[1] = MULT32_32_Q31(mid, X[1]);
1496
24.3M
         Y[0] = MULT32_32_Q31(side, Y[0]);
1497
24.3M
         Y[1] = MULT32_32_Q31(side, Y[1]);
1498
24.3M
         tmp = X[0];
1499
24.3M
         X[0] = SUB32(tmp,Y[0]);
1500
24.3M
         Y[0] = ADD32(tmp,Y[0]);
1501
24.3M
         tmp = X[1];
1502
24.3M
         X[1] = SUB32(tmp,Y[1]);
1503
24.3M
         Y[1] = ADD32(tmp,Y[1]);
1504
24.3M
      }
1505
125M
   } else {
1506
      /* "Normal" split code */
1507
125M
      opus_int32 rebalance;
1508
1509
125M
      mbits = IMAX(0, IMIN(b, (b-delta)/2));
1510
125M
      sbits = b-mbits;
1511
125M
      ctx->remaining_bits -= qalloc;
1512
1513
125M
      rebalance = ctx->remaining_bits;
1514
125M
      if (mbits >= sbits)
1515
125M
      {
1516
#ifdef ENABLE_QEXT
1517
         int qext_extra = 0;
1518
         /* Reallocate any mid bits that cannot be used to extra mid bits. */
1519
         if (cap != NULL && ext_b != 0) qext_extra = IMAX(0, IMIN(ext_b/2, mbits - cap[ctx->i]/2));
1520
#endif
1521
         /* In stereo mode, we do not apply a scaling to the mid because we need the normalized
1522
            mid for folding later. */
1523
125M
         cm = quant_band(ctx, X, N, mbits, B, lowband, LM, lowband_out, Q31ONE,
1524
125M
               lowband_scratch, fill ARG_QEXT(ext_b/2+qext_extra));
1525
125M
         rebalance = mbits - (rebalance-ctx->remaining_bits);
1526
125M
         if (rebalance > 3<<BITRES && itheta!=0)
1527
33.8k
            sbits += rebalance - (3<<BITRES);
1528
#ifdef ENABLE_QEXT
1529
         /* Guard against overflowing the EC with the angle if the cubic quant used too many bits for the mid. */
1530
         if (ctx->extra_bands) sbits = IMIN(sbits, ctx->remaining_bits);
1531
#endif
1532
         /* For a stereo split, the high bits of fill are always zero, so no
1533
            folding will be done to the side. */
1534
125M
         cm |= quant_band(ctx, Y, N, sbits, B, NULL, LM, NULL, side, NULL, fill>>B ARG_QEXT(ext_b/2-qext_extra));
1535
125M
      } else {
1536
#ifdef ENABLE_QEXT
1537
         int qext_extra = 0;
1538
         /* Reallocate any side bits that cannot be used to extra side bits. */
1539
         if (cap != NULL && ext_b != 0) qext_extra = IMAX(0, IMIN(ext_b/2, sbits - cap[ctx->i]/2));
1540
#endif
1541
         /* For a stereo split, the high bits of fill are always zero, so no
1542
            folding will be done to the side. */
1543
490k
         cm = quant_band(ctx, Y, N, sbits, B, NULL, LM, NULL, side, NULL, fill>>B ARG_QEXT(ext_b/2+qext_extra));
1544
490k
         rebalance = sbits - (rebalance-ctx->remaining_bits);
1545
490k
         if (rebalance > 3<<BITRES && itheta!=16384)
1546
18.0k
            mbits += rebalance - (3<<BITRES);
1547
#ifdef ENABLE_QEXT
1548
         /* Guard against overflowing the EC with the angle if the cubic quant used too many bits for the side. */
1549
         if (ctx->extra_bands) mbits = IMIN(mbits, ctx->remaining_bits);
1550
#endif
1551
         /* In stereo mode, we do not apply a scaling to the mid because we need the normalized
1552
            mid for folding later. */
1553
490k
         cm |= quant_band(ctx, X, N, mbits, B, lowband, LM, lowband_out, Q31ONE,
1554
490k
               lowband_scratch, fill ARG_QEXT(ext_b/2-qext_extra));
1555
490k
      }
1556
125M
   }
1557
1558
1559
   /* This code is used by the decoder and by the resynthesis-enabled encoder */
1560
186M
   if (ctx->resynth)
1561
87.6M
   {
1562
87.6M
      if (N!=2)
1563
63.2M
         stereo_merge(X, Y, mid, N, ctx->arch);
1564
87.6M
      if (inv)
1565
136k
      {
1566
136k
         int j;
1567
2.05M
         for (j=0;j<N;j++)
1568
1.91M
            Y[j] = -Y[j];
1569
136k
      }
1570
87.6M
   }
1571
186M
   return cm;
1572
257M
}
1573
1574
#ifndef DISABLE_UPDATE_DRAFT
1575
static void special_hybrid_folding(const CELTMode *m, celt_norm *norm, celt_norm *norm2, int start, int M, int dual_stereo)
1576
53.7M
{
1577
53.7M
   int n1, n2;
1578
53.7M
   const opus_int16 * OPUS_RESTRICT eBands = m->eBands;
1579
53.7M
   n1 = M*(eBands[start+1]-eBands[start]);
1580
53.7M
   n2 = M*(eBands[start+2]-eBands[start+1]);
1581
   /* Duplicate enough of the first band folding data to be able to fold the second band.
1582
      Copies no data for CELT-only mode. */
1583
53.7M
   OPUS_COPY(&norm[n1], &norm[2*n1 - n2], n2-n1);
1584
53.7M
   if (dual_stereo)
1585
47.3k
      OPUS_COPY(&norm2[n1], &norm2[2*n1 - n2], n2-n1);
1586
53.7M
}
1587
#endif
1588
1589
void quant_all_bands(int encode, const CELTMode *m, int start, int end,
1590
      celt_norm *X_, celt_norm *Y_, unsigned char *collapse_masks,
1591
      const celt_ener *bandE, int *pulses, int shortBlocks, int spread,
1592
      int dual_stereo, int intensity, int *tf_res, opus_int32 total_bits,
1593
      opus_int32 balance, ec_ctx *ec, int LM, int codedBands,
1594
      opus_uint32 *seed, int complexity, int arch, int disable_inv
1595
      ARG_QEXT(ec_ctx *ext_ec) ARG_QEXT(int *extra_pulses)
1596
      ARG_QEXT(opus_int32 ext_total_bits) ARG_QEXT(const int *cap))
1597
53.5M
{
1598
53.5M
   int i;
1599
53.5M
   opus_int32 remaining_bits;
1600
53.5M
   const opus_int16 * OPUS_RESTRICT eBands = m->eBands;
1601
53.5M
   celt_norm * OPUS_RESTRICT norm, * OPUS_RESTRICT norm2;
1602
53.5M
   VARDECL(celt_norm, _norm);
1603
53.5M
   VARDECL(celt_norm, _lowband_scratch);
1604
53.5M
   VARDECL(celt_norm, X_save);
1605
53.5M
   VARDECL(celt_norm, Y_save);
1606
53.5M
   VARDECL(celt_norm, X_save2);
1607
53.5M
   VARDECL(celt_norm, Y_save2);
1608
53.5M
   VARDECL(celt_norm, norm_save2);
1609
53.5M
   int resynth_alloc;
1610
53.5M
   celt_norm *lowband_scratch;
1611
53.5M
   int B;
1612
53.5M
   int M;
1613
53.5M
   int lowband_offset;
1614
53.5M
   int update_lowband = 1;
1615
53.5M
   int C = Y_ != NULL ? 2 : 1;
1616
53.5M
   int norm_offset;
1617
53.5M
   int theta_rdo = encode && Y_!=NULL && !dual_stereo && complexity>=8;
1618
#ifdef RESYNTH
1619
   int resynth = 1;
1620
#else
1621
53.5M
   int resynth = !encode || theta_rdo;
1622
53.5M
#endif
1623
53.5M
   struct band_ctx ctx;
1624
#ifdef ENABLE_QEXT
1625
   int ext_b;
1626
   opus_int32 ext_balance=0;
1627
   opus_int32 ext_tell=0;
1628
#endif
1629
53.5M
   SAVE_STACK;
1630
1631
53.5M
   M = 1<<LM;
1632
53.5M
   B = shortBlocks ? M : 1;
1633
53.5M
   norm_offset = M*eBands[start];
1634
   /* No need to allocate norm for the last band because we don't need an
1635
      output in that band. */
1636
53.5M
   ALLOC(_norm, C*(M*eBands[m->nbEBands-1]-norm_offset), celt_norm);
1637
53.5M
   norm = _norm;
1638
53.5M
   norm2 = norm + M*eBands[m->nbEBands-1]-norm_offset;
1639
1640
   /* For decoding, we can use the last band as scratch space because we don't need that
1641
      scratch space for the last band and we don't care about the data there until we're
1642
      decoding the last band. */
1643
53.5M
   if (encode && resynth)
1644
6.96M
      resynth_alloc = M*(eBands[m->nbEBands]-eBands[m->nbEBands-1]);
1645
46.5M
   else
1646
46.5M
      resynth_alloc = ALLOC_NONE;
1647
53.5M
   ALLOC(_lowband_scratch, resynth_alloc, celt_norm);
1648
53.5M
   if (encode && resynth)
1649
6.96M
      lowband_scratch = _lowband_scratch;
1650
46.5M
   else
1651
46.5M
      lowband_scratch = X_+M*eBands[m->effEBands-1];
1652
53.5M
   ALLOC(X_save, resynth_alloc, celt_norm);
1653
53.5M
   ALLOC(Y_save, resynth_alloc, celt_norm);
1654
53.5M
   ALLOC(X_save2, resynth_alloc, celt_norm);
1655
53.5M
   ALLOC(Y_save2, resynth_alloc, celt_norm);
1656
53.5M
   ALLOC(norm_save2, resynth_alloc, celt_norm);
1657
1658
53.5M
   lowband_offset = 0;
1659
53.5M
   ctx.bandE = bandE;
1660
53.5M
   ctx.ec = ec;
1661
53.5M
   ctx.encode = encode;
1662
53.5M
   ctx.intensity = intensity;
1663
53.5M
   ctx.m = m;
1664
53.5M
   ctx.seed = *seed;
1665
53.5M
   ctx.spread = spread;
1666
53.5M
   ctx.arch = arch;
1667
53.5M
   ctx.disable_inv = disable_inv;
1668
53.5M
   ctx.resynth = resynth;
1669
53.5M
   ctx.theta_round = 0;
1670
#ifdef ENABLE_QEXT
1671
   ctx.ext_ec = ext_ec;
1672
   ctx.ext_total_bits = ext_total_bits;
1673
   ctx.extra_bands = end == NB_QEXT_BANDS || end == 2;
1674
   if (ctx.extra_bands) theta_rdo = 0;
1675
#endif
1676
   /* Avoid injecting noise in the first band on transients. */
1677
53.5M
   ctx.avoid_split_noise = B > 1;
1678
865M
   for (i=start;i<end;i++)
1679
812M
   {
1680
812M
      opus_int32 tell;
1681
812M
      int b;
1682
812M
      int N;
1683
812M
      opus_int32 curr_balance;
1684
812M
      int effective_lowband=-1;
1685
812M
      celt_norm * OPUS_RESTRICT X, * OPUS_RESTRICT Y;
1686
812M
      int tf_change=0;
1687
812M
      unsigned x_cm;
1688
812M
      unsigned y_cm;
1689
812M
      int last;
1690
1691
812M
      ctx.i = i;
1692
812M
      last = (i==end-1);
1693
1694
812M
      X = X_+M*eBands[i];
1695
812M
      if (Y_!=NULL)
1696
256M
         Y = Y_+M*eBands[i];
1697
556M
      else
1698
556M
         Y = NULL;
1699
812M
      N = M*eBands[i+1]-M*eBands[i];
1700
812M
      celt_assert(N > 0);
1701
812M
      tell = ec_tell_frac(ec);
1702
1703
      /* Compute how many bits we want to allocate to this band */
1704
812M
      if (i != start)
1705
758M
         balance -= tell;
1706
812M
      remaining_bits = total_bits-tell-1;
1707
812M
      ctx.remaining_bits = remaining_bits;
1708
#ifdef ENABLE_QEXT
1709
      if (i != start) {
1710
         ext_balance += extra_pulses[i-1] + ext_tell;
1711
      }
1712
      ext_tell = ec_tell_frac(ext_ec);
1713
      ctx.extra_bits = extra_pulses[i];
1714
      if (i != start)
1715
         ext_balance -= ext_tell;
1716
      if (i <= codedBands-1)
1717
      {
1718
         opus_int32 ext_curr_balance = celt_sudiv(ext_balance, IMIN(3, codedBands-i));
1719
         ext_b = IMAX(0, IMIN(16383, IMIN(ext_total_bits-ext_tell,extra_pulses[i]+ext_curr_balance)));
1720
      } else {
1721
         ext_b = 0;
1722
      }
1723
#endif
1724
812M
      if (i <= codedBands-1)
1725
80.9M
      {
1726
80.9M
         curr_balance = celt_sudiv(balance, IMIN(3, codedBands-i));
1727
80.9M
         b = IMAX(0, IMIN(16383, IMIN(remaining_bits+1,pulses[i]+curr_balance)));
1728
731M
      } else {
1729
731M
         b = 0;
1730
731M
      }
1731
1732
812M
#ifndef DISABLE_UPDATE_DRAFT
1733
812M
      if (resynth && (M*eBands[i]-N >= M*eBands[start] || i==start+1) && (update_lowband || lowband_offset==0))
1734
9.10M
            lowband_offset = i;
1735
812M
      if (i == start+1)
1736
53.5M
         special_hybrid_folding(m, norm, norm2, start, M, dual_stereo);
1737
#else
1738
      if (resynth && M*eBands[i]-N >= M*eBands[start] && (update_lowband || lowband_offset==0))
1739
            lowband_offset = i;
1740
#endif
1741
1742
812M
      tf_change = tf_res[i];
1743
812M
      ctx.tf_change = tf_change;
1744
812M
      if (i>=m->effEBands)
1745
0
      {
1746
0
         X=norm;
1747
0
         if (Y_!=NULL)
1748
0
            Y = norm;
1749
0
         lowband_scratch = NULL;
1750
0
      }
1751
812M
      if (last && !theta_rdo)
1752
46.5M
         lowband_scratch = NULL;
1753
1754
      /* Get a conservative estimate of the collapse_mask's for the bands we're
1755
         going to be folding from. */
1756
812M
      if (lowband_offset != 0 && (spread!=SPREAD_AGGRESSIVE || B>1 || tf_change<0))
1757
104M
      {
1758
104M
         int fold_start;
1759
104M
         int fold_end;
1760
104M
         int fold_i;
1761
         /* This ensures we never repeat spectral content within one band */
1762
104M
         effective_lowband = IMAX(0, M*eBands[lowband_offset]-norm_offset-N);
1763
104M
         fold_start = lowband_offset;
1764
105M
         while(M*eBands[--fold_start] > effective_lowband+norm_offset);
1765
104M
         fold_end = lowband_offset-1;
1766
104M
#ifndef DISABLE_UPDATE_DRAFT
1767
272M
         while(++fold_end < i && M*eBands[fold_end] < effective_lowband+norm_offset+N);
1768
#else
1769
         while(M*eBands[++fold_end] < effective_lowband+norm_offset+N);
1770
#endif
1771
104M
         x_cm = y_cm = 0;
1772
272M
         fold_i = fold_start; do {
1773
272M
           x_cm |= collapse_masks[fold_i*C+0];
1774
272M
           y_cm |= collapse_masks[fold_i*C+C-1];
1775
272M
         } while (++fold_i<fold_end);
1776
104M
      }
1777
      /* Otherwise, we'll be using the LCG to fold, so all blocks will (almost
1778
         always) be non-zero. */
1779
707M
      else
1780
707M
         x_cm = y_cm = (1<<B)-1;
1781
1782
812M
      if (dual_stereo && i==intensity)
1783
33.0k
      {
1784
33.0k
         int j;
1785
1786
         /* Switch off dual stereo to do intensity. */
1787
33.0k
         dual_stereo = 0;
1788
33.0k
         if (resynth)
1789
0
            for (j=0;j<M*eBands[i]-norm_offset;j++)
1790
0
               norm[j] = HALF32(norm[j]+norm2[j]);
1791
33.0k
      }
1792
812M
      if (dual_stereo)
1793
617k
      {
1794
617k
         x_cm = quant_band(&ctx, X, N, b/2, B,
1795
617k
               effective_lowband != -1 ? norm+effective_lowband : NULL, LM,
1796
617k
               last?NULL:norm+M*eBands[i]-norm_offset, Q31ONE, lowband_scratch, x_cm ARG_QEXT(ext_b/2));
1797
617k
         y_cm = quant_band(&ctx, Y, N, b/2, B,
1798
617k
               effective_lowband != -1 ? norm2+effective_lowband : NULL, LM,
1799
617k
               last?NULL:norm2+M*eBands[i]-norm_offset, Q31ONE, lowband_scratch, y_cm ARG_QEXT(ext_b/2));
1800
811M
      } else {
1801
811M
         if (Y!=NULL)
1802
255M
         {
1803
255M
            if (theta_rdo && i < intensity)
1804
2.11M
            {
1805
2.11M
               ec_ctx ec_save, ec_save2;
1806
2.11M
               struct band_ctx ctx_save, ctx_save2;
1807
2.11M
               opus_val32 dist0, dist1;
1808
2.11M
               unsigned cm, cm2;
1809
2.11M
               int nstart_bytes, nend_bytes, save_bytes;
1810
2.11M
               unsigned char *bytes_buf;
1811
2.11M
               unsigned char bytes_save[1275];
1812
#ifdef ENABLE_QEXT
1813
               ec_ctx ext_ec_save, ext_ec_save2;
1814
               unsigned char *ext_bytes_buf;
1815
               int ext_nstart_bytes, ext_nend_bytes, ext_save_bytes;
1816
               unsigned char ext_bytes_save[QEXT_PACKET_SIZE_CAP];
1817
#endif
1818
2.11M
               opus_val16 w[2];
1819
2.11M
               compute_channel_weights(bandE[i], bandE[i+m->nbEBands], w);
1820
               /* Make a copy. */
1821
2.11M
               cm = x_cm|y_cm;
1822
2.11M
               ec_save = *ec;
1823
#ifdef ENABLE_QEXT
1824
               ext_ec_save = *ext_ec;
1825
#endif
1826
2.11M
               ctx_save = ctx;
1827
2.11M
               OPUS_COPY(X_save, X, N);
1828
2.11M
               OPUS_COPY(Y_save, Y, N);
1829
               /* Encode and round down. */
1830
2.11M
               ctx.theta_round = -1;
1831
2.11M
               x_cm = quant_band_stereo(&ctx, X, Y, N, b, B,
1832
2.11M
                     effective_lowband != -1 ? norm+effective_lowband : NULL, LM,
1833
2.11M
                     last?NULL:norm+M*eBands[i]-norm_offset, lowband_scratch, cm ARG_QEXT(ext_b) ARG_QEXT(cap));
1834
2.11M
               dist0 = MULT16_32_Q15(w[0], celt_inner_prod_norm_shift(X_save, X, N, arch)) + MULT16_32_Q15(w[1], celt_inner_prod_norm_shift(Y_save, Y, N, arch));
1835
1836
               /* Save first result. */
1837
2.11M
               cm2 = x_cm;
1838
2.11M
               ec_save2 = *ec;
1839
#ifdef ENABLE_QEXT
1840
               ext_ec_save2 = *ext_ec;
1841
#endif
1842
2.11M
               ctx_save2 = ctx;
1843
2.11M
               OPUS_COPY(X_save2, X, N);
1844
2.11M
               OPUS_COPY(Y_save2, Y, N);
1845
2.11M
               if (!last)
1846
2.08M
                  OPUS_COPY(norm_save2, norm+M*eBands[i]-norm_offset, N);
1847
2.11M
               nstart_bytes = ec_save.offs;
1848
2.11M
               nend_bytes = ec_save.storage;
1849
2.11M
               bytes_buf = ec_save.buf+nstart_bytes;
1850
2.11M
               save_bytes = nend_bytes-nstart_bytes;
1851
2.11M
               OPUS_COPY(bytes_save, bytes_buf, save_bytes);
1852
#ifdef ENABLE_QEXT
1853
               ext_nstart_bytes = ext_ec_save.offs;
1854
               ext_nend_bytes = ext_ec_save.storage;
1855
               ext_bytes_buf = ext_ec_save.buf!=NULL ? ext_ec_save.buf+ext_nstart_bytes : NULL;
1856
               ext_save_bytes = ext_nend_bytes-ext_nstart_bytes;
1857
               if (ext_save_bytes) OPUS_COPY(ext_bytes_save, ext_bytes_buf, ext_save_bytes);
1858
#endif
1859
               /* Restore */
1860
2.11M
               *ec = ec_save;
1861
#ifdef ENABLE_QEXT
1862
               *ext_ec = ext_ec_save;
1863
#endif
1864
2.11M
               ctx = ctx_save;
1865
2.11M
               OPUS_COPY(X, X_save, N);
1866
2.11M
               OPUS_COPY(Y, Y_save, N);
1867
2.11M
#ifndef DISABLE_UPDATE_DRAFT
1868
2.11M
               if (i == start+1)
1869
194k
                  special_hybrid_folding(m, norm, norm2, start, M, dual_stereo);
1870
2.11M
#endif
1871
               /* Encode and round up. */
1872
2.11M
               ctx.theta_round = 1;
1873
2.11M
               x_cm = quant_band_stereo(&ctx, X, Y, N, b, B,
1874
2.11M
                     effective_lowband != -1 ? norm+effective_lowband : NULL, LM,
1875
2.11M
                     last?NULL:norm+M*eBands[i]-norm_offset, lowband_scratch, cm ARG_QEXT(ext_b) ARG_QEXT(cap));
1876
2.11M
               dist1 = MULT16_32_Q15(w[0], celt_inner_prod_norm_shift(X_save, X, N, arch)) + MULT16_32_Q15(w[1], celt_inner_prod_norm_shift(Y_save, Y, N, arch));
1877
2.11M
               if (dist0 >= dist1) {
1878
1.70M
                  x_cm = cm2;
1879
1.70M
                  *ec = ec_save2;
1880
#ifdef ENABLE_QEXT
1881
                  *ext_ec = ext_ec_save2;
1882
#endif
1883
1.70M
                  ctx = ctx_save2;
1884
1.70M
                  OPUS_COPY(X, X_save2, N);
1885
1.70M
                  OPUS_COPY(Y, Y_save2, N);
1886
1.70M
                  if (!last)
1887
1.69M
                     OPUS_COPY(norm+M*eBands[i]-norm_offset, norm_save2, N);
1888
1.70M
                  OPUS_COPY(bytes_buf, bytes_save, save_bytes);
1889
#ifdef ENABLE_QEXT
1890
                  if (ext_save_bytes) OPUS_COPY(ext_bytes_buf, ext_bytes_save, ext_save_bytes);
1891
#endif
1892
1.70M
               }
1893
253M
            } else {
1894
253M
               ctx.theta_round = 0;
1895
253M
               x_cm = quant_band_stereo(&ctx, X, Y, N, b, B,
1896
253M
                     effective_lowband != -1 ? norm+effective_lowband : NULL, LM,
1897
253M
                     last?NULL:norm+M*eBands[i]-norm_offset, lowband_scratch, x_cm|y_cm ARG_QEXT(ext_b) ARG_QEXT(cap));
1898
253M
            }
1899
556M
         } else {
1900
556M
            x_cm = quant_band(&ctx, X, N, b, B,
1901
556M
                  effective_lowband != -1 ? norm+effective_lowband : NULL, LM,
1902
556M
                  last?NULL:norm+M*eBands[i]-norm_offset, Q31ONE, lowband_scratch, x_cm|y_cm ARG_QEXT(ext_b));
1903
556M
         }
1904
811M
         y_cm = x_cm;
1905
811M
      }
1906
812M
      collapse_masks[i*C+0] = (unsigned char)x_cm;
1907
812M
      collapse_masks[i*C+C-1] = (unsigned char)y_cm;
1908
812M
      balance += pulses[i] + tell;
1909
1910
      /* Update the folding position only as long as we have 1 bit/sample depth. */
1911
812M
      update_lowband = b>(N<<BITRES);
1912
      /* We only need to avoid noise on a split for the first band. After that, we
1913
         have folding. */
1914
812M
      ctx.avoid_split_noise = 0;
1915
812M
   }
1916
53.5M
   *seed = ctx.seed;
1917
1918
53.5M
   RESTORE_STACK;
1919
53.5M
}