Coverage Report

Created: 2026-03-19 07:24

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/opus/celt/vq.c
Line
Count
Source
1
/* Copyright (c) 2007-2008 CSIRO
2
   Copyright (c) 2007-2009 Xiph.Org Foundation
3
   Written by Jean-Marc Valin */
4
/*
5
   Redistribution and use in source and binary forms, with or without
6
   modification, are permitted provided that the following conditions
7
   are met:
8
9
   - Redistributions of source code must retain the above copyright
10
   notice, this list of conditions and the following disclaimer.
11
12
   - Redistributions in binary form must reproduce the above copyright
13
   notice, this list of conditions and the following disclaimer in the
14
   documentation and/or other materials provided with the distribution.
15
16
   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17
   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18
   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
19
   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
20
   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
21
   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
22
   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
23
   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
24
   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
25
   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
26
   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27
*/
28
29
#ifdef HAVE_CONFIG_H
30
#include "config.h"
31
#endif
32
33
#include "mathops.h"
34
#include "cwrs.h"
35
#include "vq.h"
36
#include "arch.h"
37
#include "os_support.h"
38
#include "bands.h"
39
#include "rate.h"
40
#include "pitch.h"
41
#include "SigProc_FIX.h"
42
43
#if defined(FIXED_POINT)
44
108M
void norm_scaleup(celt_norm *X, int N, int shift) {
45
108M
   int i;
46
108M
   celt_assert(shift >= 0);
47
108M
   if (shift <= 0) return;
48
1.30G
   for (i=0;i<N;i++) X[i] = SHL32(X[i], shift);
49
108M
}
50
51
578M
void norm_scaledown(celt_norm *X, int N, int shift) {
52
578M
   int i;
53
578M
   celt_assert(shift >= 0);
54
578M
   if (shift <= 0) return;
55
2.34G
   for (i=0;i<N;i++) X[i] = PSHR32(X[i], shift);
56
274M
}
57
58
55.2M
opus_val32 celt_inner_prod_norm(const celt_norm *x, const celt_norm *y, int len, int arch) {
59
55.2M
   int i;
60
55.2M
   opus_val32 sum = 0;
61
55.2M
   (void)arch;
62
682M
   for (i=0;i<len;i++) sum += x[i]*y[i];
63
55.2M
   return sum;
64
55.2M
}
65
1.19G
opus_val32 celt_inner_prod_norm_shift(const celt_norm *x, const celt_norm *y, int len, int arch) {
66
1.19G
   int i;
67
1.19G
   opus_val64 sum = 0;
68
1.19G
   (void)arch;
69
10.4G
   for (i=0;i<len;i++) sum += x[i]*(opus_val64)y[i];
70
1.19G
   return sum>>2*(NORM_SHIFT-14);
71
1.19G
}
72
#endif
73
74
#ifndef OVERRIDE_vq_exp_rotation1
75
static void exp_rotation1(celt_norm *X, int len, int stride, opus_val16 c, opus_val16 s)
76
111M
{
77
111M
   int i;
78
111M
   opus_val16 ms;
79
111M
   celt_norm *Xptr;
80
111M
   Xptr = X;
81
111M
   ms = NEG16(s);
82
111M
   norm_scaledown(X, len, NORM_SHIFT-14);
83
1.06G
   for (i=0;i<len-stride;i++)
84
949M
   {
85
949M
      celt_norm x1, x2;
86
949M
      x1 = Xptr[0];
87
949M
      x2 = Xptr[stride];
88
949M
      Xptr[stride] = EXTRACT16(PSHR32(MAC16_16(MULT16_16(c, x2),  s, x1), 15));
89
949M
      *Xptr++      = EXTRACT16(PSHR32(MAC16_16(MULT16_16(c, x1), ms, x2), 15));
90
949M
   }
91
111M
   Xptr = &X[len-2*stride-1];
92
869M
   for (i=len-2*stride-1;i>=0;i--)
93
757M
   {
94
757M
      celt_norm x1, x2;
95
757M
      x1 = Xptr[0];
96
757M
      x2 = Xptr[stride];
97
757M
      Xptr[stride] = EXTRACT16(PSHR32(MAC16_16(MULT16_16(c, x2),  s, x1), 15));
98
757M
      *Xptr--      = EXTRACT16(PSHR32(MAC16_16(MULT16_16(c, x1), ms, x2), 15));
99
757M
   }
100
111M
   norm_scaleup(X, len, NORM_SHIFT-14);
101
111M
}
vq.c:exp_rotation1
Line
Count
Source
76
55.9M
{
77
55.9M
   int i;
78
55.9M
   opus_val16 ms;
79
55.9M
   celt_norm *Xptr;
80
55.9M
   Xptr = X;
81
55.9M
   ms = NEG16(s);
82
55.9M
   norm_scaledown(X, len, NORM_SHIFT-14);
83
530M
   for (i=0;i<len-stride;i++)
84
474M
   {
85
474M
      celt_norm x1, x2;
86
474M
      x1 = Xptr[0];
87
474M
      x2 = Xptr[stride];
88
474M
      Xptr[stride] = EXTRACT16(PSHR32(MAC16_16(MULT16_16(c, x2),  s, x1), 15));
89
474M
      *Xptr++      = EXTRACT16(PSHR32(MAC16_16(MULT16_16(c, x1), ms, x2), 15));
90
474M
   }
91
55.9M
   Xptr = &X[len-2*stride-1];
92
434M
   for (i=len-2*stride-1;i>=0;i--)
93
378M
   {
94
378M
      celt_norm x1, x2;
95
378M
      x1 = Xptr[0];
96
378M
      x2 = Xptr[stride];
97
378M
      Xptr[stride] = EXTRACT16(PSHR32(MAC16_16(MULT16_16(c, x2),  s, x1), 15));
98
378M
      *Xptr--      = EXTRACT16(PSHR32(MAC16_16(MULT16_16(c, x1), ms, x2), 15));
99
378M
   }
100
55.9M
   norm_scaleup(X, len, NORM_SHIFT-14);
101
55.9M
}
vq.c:exp_rotation1
Line
Count
Source
76
55.9M
{
77
55.9M
   int i;
78
55.9M
   opus_val16 ms;
79
55.9M
   celt_norm *Xptr;
80
55.9M
   Xptr = X;
81
55.9M
   ms = NEG16(s);
82
55.9M
   norm_scaledown(X, len, NORM_SHIFT-14);
83
530M
   for (i=0;i<len-stride;i++)
84
474M
   {
85
474M
      celt_norm x1, x2;
86
474M
      x1 = Xptr[0];
87
474M
      x2 = Xptr[stride];
88
474M
      Xptr[stride] = EXTRACT16(PSHR32(MAC16_16(MULT16_16(c, x2),  s, x1), 15));
89
474M
      *Xptr++      = EXTRACT16(PSHR32(MAC16_16(MULT16_16(c, x1), ms, x2), 15));
90
474M
   }
91
55.9M
   Xptr = &X[len-2*stride-1];
92
434M
   for (i=len-2*stride-1;i>=0;i--)
93
378M
   {
94
378M
      celt_norm x1, x2;
95
378M
      x1 = Xptr[0];
96
378M
      x2 = Xptr[stride];
97
378M
      Xptr[stride] = EXTRACT16(PSHR32(MAC16_16(MULT16_16(c, x2),  s, x1), 15));
98
378M
      *Xptr--      = EXTRACT16(PSHR32(MAC16_16(MULT16_16(c, x1), ms, x2), 15));
99
378M
   }
100
55.9M
   norm_scaleup(X, len, NORM_SHIFT-14);
101
55.9M
}
102
#endif /* OVERRIDE_vq_exp_rotation1 */
103
104
void exp_rotation(celt_norm *X, int len, int dir, int stride, int K, int spread)
105
1.05G
{
106
1.05G
   static const int SPREAD_FACTOR[3]={15,10,5};
107
1.05G
   int i;
108
1.05G
   opus_val16 c, s;
109
1.05G
   opus_val16 gain, theta;
110
1.05G
   int stride2=0;
111
1.05G
   int factor;
112
113
1.05G
   if (2*K>=len || spread==SPREAD_NONE)
114
1.00G
      return;
115
42.6M
   factor = SPREAD_FACTOR[spread-1];
116
117
42.6M
   gain = celt_div((opus_val32)MULT16_16(Q15_ONE,len),(opus_val32)(len+factor*K));
118
42.6M
   theta = HALF16(MULT16_16_Q15(gain,gain));
119
120
42.6M
   c = celt_cos_norm(EXTEND32(theta));
121
42.6M
   s = celt_cos_norm(EXTEND32(SUB16(Q15ONE,theta))); /*  sin(theta) */
122
123
42.6M
   if (len>=8*stride)
124
30.6M
   {
125
30.6M
      stride2 = 1;
126
      /* This is just a simple (equivalent) way of computing sqrt(len/stride) with rounding.
127
         It's basically incrementing long as (stride2+0.5)^2 < len/stride. */
128
120M
      while ((stride2*stride2+stride2)*stride + (stride>>2) < len)
129
89.4M
         stride2++;
130
30.6M
   }
131
   /*NOTE: As a minor optimization, we could be passing around log2(B), not B, for both this and for
132
      extract_collapse_mask().*/
133
42.6M
   len = celt_udiv(len, stride);
134
121M
   for (i=0;i<stride;i++)
135
78.7M
   {
136
78.7M
      if (dir < 0)
137
7.70M
      {
138
7.70M
         if (stride2)
139
1.11M
            exp_rotation1(X+i*len, len, stride2, s, c);
140
7.70M
         exp_rotation1(X+i*len, len, 1, c, s);
141
71.0M
      } else {
142
71.0M
         exp_rotation1(X+i*len, len, 1, c, -s);
143
71.0M
         if (stride2)
144
32.1M
            exp_rotation1(X+i*len, len, stride2, s, -c);
145
71.0M
      }
146
78.7M
   }
147
42.6M
}
exp_rotation
Line
Count
Source
105
525M
{
106
525M
   static const int SPREAD_FACTOR[3]={15,10,5};
107
525M
   int i;
108
525M
   opus_val16 c, s;
109
525M
   opus_val16 gain, theta;
110
525M
   int stride2=0;
111
525M
   int factor;
112
113
525M
   if (2*K>=len || spread==SPREAD_NONE)
114
504M
      return;
115
21.3M
   factor = SPREAD_FACTOR[spread-1];
116
117
21.3M
   gain = celt_div((opus_val32)MULT16_16(Q15_ONE,len),(opus_val32)(len+factor*K));
118
21.3M
   theta = HALF16(MULT16_16_Q15(gain,gain));
119
120
21.3M
   c = celt_cos_norm(EXTEND32(theta));
121
21.3M
   s = celt_cos_norm(EXTEND32(SUB16(Q15ONE,theta))); /*  sin(theta) */
122
123
21.3M
   if (len>=8*stride)
124
15.3M
   {
125
15.3M
      stride2 = 1;
126
      /* This is just a simple (equivalent) way of computing sqrt(len/stride) with rounding.
127
         It's basically incrementing long as (stride2+0.5)^2 < len/stride. */
128
60.0M
      while ((stride2*stride2+stride2)*stride + (stride>>2) < len)
129
44.7M
         stride2++;
130
15.3M
   }
131
   /*NOTE: As a minor optimization, we could be passing around log2(B), not B, for both this and for
132
      extract_collapse_mask().*/
133
21.3M
   len = celt_udiv(len, stride);
134
60.6M
   for (i=0;i<stride;i++)
135
39.3M
   {
136
39.3M
      if (dir < 0)
137
3.85M
      {
138
3.85M
         if (stride2)
139
556k
            exp_rotation1(X+i*len, len, stride2, s, c);
140
3.85M
         exp_rotation1(X+i*len, len, 1, c, s);
141
35.5M
      } else {
142
35.5M
         exp_rotation1(X+i*len, len, 1, c, -s);
143
35.5M
         if (stride2)
144
16.0M
            exp_rotation1(X+i*len, len, stride2, s, -c);
145
35.5M
      }
146
39.3M
   }
147
21.3M
}
exp_rotation
Line
Count
Source
105
525M
{
106
525M
   static const int SPREAD_FACTOR[3]={15,10,5};
107
525M
   int i;
108
525M
   opus_val16 c, s;
109
525M
   opus_val16 gain, theta;
110
525M
   int stride2=0;
111
525M
   int factor;
112
113
525M
   if (2*K>=len || spread==SPREAD_NONE)
114
504M
      return;
115
21.3M
   factor = SPREAD_FACTOR[spread-1];
116
117
21.3M
   gain = celt_div((opus_val32)MULT16_16(Q15_ONE,len),(opus_val32)(len+factor*K));
118
21.3M
   theta = HALF16(MULT16_16_Q15(gain,gain));
119
120
21.3M
   c = celt_cos_norm(EXTEND32(theta));
121
21.3M
   s = celt_cos_norm(EXTEND32(SUB16(Q15ONE,theta))); /*  sin(theta) */
122
123
21.3M
   if (len>=8*stride)
124
15.3M
   {
125
15.3M
      stride2 = 1;
126
      /* This is just a simple (equivalent) way of computing sqrt(len/stride) with rounding.
127
         It's basically incrementing long as (stride2+0.5)^2 < len/stride. */
128
60.0M
      while ((stride2*stride2+stride2)*stride + (stride>>2) < len)
129
44.7M
         stride2++;
130
15.3M
   }
131
   /*NOTE: As a minor optimization, we could be passing around log2(B), not B, for both this and for
132
      extract_collapse_mask().*/
133
21.3M
   len = celt_udiv(len, stride);
134
60.6M
   for (i=0;i<stride;i++)
135
39.3M
   {
136
39.3M
      if (dir < 0)
137
3.85M
      {
138
3.85M
         if (stride2)
139
556k
            exp_rotation1(X+i*len, len, stride2, s, c);
140
3.85M
         exp_rotation1(X+i*len, len, 1, c, s);
141
35.5M
      } else {
142
35.5M
         exp_rotation1(X+i*len, len, 1, c, -s);
143
35.5M
         if (stride2)
144
16.0M
            exp_rotation1(X+i*len, len, stride2, s, -c);
145
35.5M
      }
146
39.3M
   }
147
21.3M
}
148
149
/** Normalizes the decoded integer pvq codeword to unit norm. */
150
static void normalise_residual(int * OPUS_RESTRICT iy, celt_norm * OPUS_RESTRICT X,
151
      int N, opus_val32 Ryy, opus_val32 gain, int shift)
152
53.0M
{
153
53.0M
   int i;
154
#ifdef FIXED_POINT
155
   int k;
156
#endif
157
53.0M
   opus_val32 t;
158
53.0M
   opus_val32 g;
159
160
#ifdef FIXED_POINT
161
   k = celt_ilog2(Ryy)>>1;
162
#endif
163
53.0M
   t = VSHR32(Ryy, 2*(k-7)-15);
164
53.0M
   g = MULT32_32_Q31(celt_rsqrt_norm32(t),gain);
165
53.0M
   i=0;
166
53.0M
   (void)shift;
167
#if defined(FIXED_POINT) && defined(ENABLE_QEXT)
168
7.64M
   if (shift>0) {
169
10.1k
      int tot_shift = NORM_SHIFT+1-k-shift;
170
10.1k
      if (tot_shift >= 0) {
171
39.0k
         do X[i] = MULT32_32_Q31(g, SHL32(iy[i], tot_shift));
172
39.0k
         while (++i < N);
173
10.1k
      } else {
174
0
         do X[i] = MULT32_32_Q31(g, PSHR32(iy[i], -tot_shift));
175
0
         while (++i < N);
176
0
      }
177
10.1k
   } else
178
7.63M
#endif
179
376M
   do X[i] = VSHR32(MULT16_32_Q15(iy[i], g), k+15-NORM_SHIFT);
180
376M
   while (++i < N);
181
53.0M
}
vq.c:normalise_residual
Line
Count
Source
152
44.0M
{
153
44.0M
   int i;
154
44.0M
#ifdef FIXED_POINT
155
44.0M
   int k;
156
44.0M
#endif
157
44.0M
   opus_val32 t;
158
44.0M
   opus_val32 g;
159
160
44.0M
#ifdef FIXED_POINT
161
44.0M
   k = celt_ilog2(Ryy)>>1;
162
44.0M
#endif
163
44.0M
   t = VSHR32(Ryy, 2*(k-7)-15);
164
44.0M
   g = MULT32_32_Q31(celt_rsqrt_norm32(t),gain);
165
44.0M
   i=0;
166
44.0M
   (void)shift;
167
#if defined(FIXED_POINT) && defined(ENABLE_QEXT)
168
   if (shift>0) {
169
      int tot_shift = NORM_SHIFT+1-k-shift;
170
      if (tot_shift >= 0) {
171
         do X[i] = MULT32_32_Q31(g, SHL32(iy[i], tot_shift));
172
         while (++i < N);
173
      } else {
174
         do X[i] = MULT32_32_Q31(g, PSHR32(iy[i], -tot_shift));
175
         while (++i < N);
176
      }
177
   } else
178
#endif
179
299M
   do X[i] = VSHR32(MULT16_32_Q15(iy[i], g), k+15-NORM_SHIFT);
180
299M
   while (++i < N);
181
44.0M
}
vq.c:normalise_residual
Line
Count
Source
152
7.64M
{
153
7.64M
   int i;
154
7.64M
#ifdef FIXED_POINT
155
7.64M
   int k;
156
7.64M
#endif
157
7.64M
   opus_val32 t;
158
7.64M
   opus_val32 g;
159
160
7.64M
#ifdef FIXED_POINT
161
7.64M
   k = celt_ilog2(Ryy)>>1;
162
7.64M
#endif
163
7.64M
   t = VSHR32(Ryy, 2*(k-7)-15);
164
7.64M
   g = MULT32_32_Q31(celt_rsqrt_norm32(t),gain);
165
7.64M
   i=0;
166
7.64M
   (void)shift;
167
7.64M
#if defined(FIXED_POINT) && defined(ENABLE_QEXT)
168
7.64M
   if (shift>0) {
169
10.1k
      int tot_shift = NORM_SHIFT+1-k-shift;
170
10.1k
      if (tot_shift >= 0) {
171
39.0k
         do X[i] = MULT32_32_Q31(g, SHL32(iy[i], tot_shift));
172
39.0k
         while (++i < N);
173
10.1k
      } else {
174
0
         do X[i] = MULT32_32_Q31(g, PSHR32(iy[i], -tot_shift));
175
0
         while (++i < N);
176
0
      }
177
10.1k
   } else
178
7.63M
#endif
179
68.6M
   do X[i] = VSHR32(MULT16_32_Q15(iy[i], g), k+15-NORM_SHIFT);
180
68.6M
   while (++i < N);
181
7.64M
}
vq.c:normalise_residual
Line
Count
Source
152
1.42M
{
153
1.42M
   int i;
154
#ifdef FIXED_POINT
155
   int k;
156
#endif
157
1.42M
   opus_val32 t;
158
1.42M
   opus_val32 g;
159
160
#ifdef FIXED_POINT
161
   k = celt_ilog2(Ryy)>>1;
162
#endif
163
1.42M
   t = VSHR32(Ryy, 2*(k-7)-15);
164
1.42M
   g = MULT32_32_Q31(celt_rsqrt_norm32(t),gain);
165
1.42M
   i=0;
166
1.42M
   (void)shift;
167
#if defined(FIXED_POINT) && defined(ENABLE_QEXT)
168
   if (shift>0) {
169
      int tot_shift = NORM_SHIFT+1-k-shift;
170
      if (tot_shift >= 0) {
171
         do X[i] = MULT32_32_Q31(g, SHL32(iy[i], tot_shift));
172
         while (++i < N);
173
      } else {
174
         do X[i] = MULT32_32_Q31(g, PSHR32(iy[i], -tot_shift));
175
         while (++i < N);
176
      }
177
   } else
178
#endif
179
7.35M
   do X[i] = VSHR32(MULT16_32_Q15(iy[i], g), k+15-NORM_SHIFT);
180
7.35M
   while (++i < N);
181
1.42M
}
182
183
static unsigned extract_collapse_mask(int *iy, int N, int B)
184
474M
{
185
474M
   unsigned collapse_mask;
186
474M
   int N0;
187
474M
   int i;
188
474M
   if (B<=1)
189
452M
      return 1;
190
   /*NOTE: As a minor optimization, we could be passing around log2(B), not B, for both this and for
191
      exp_rotation().*/
192
21.9M
   N0 = celt_udiv(N, B);
193
21.9M
   collapse_mask = 0;
194
82.8M
   i=0; do {
195
82.8M
      int j;
196
82.8M
      unsigned tmp=0;
197
170M
      j=0; do {
198
170M
         tmp |= iy[i*N0+j];
199
170M
      } while (++j<N0);
200
82.8M
      collapse_mask |= (tmp!=0)<<i;
201
82.8M
   } while (++i<B);
202
21.9M
   return collapse_mask;
203
474M
}
204
205
opus_val16 op_pvq_search_c(celt_norm *X, int *iy, int K, int N, int arch)
206
469M
{
207
469M
   VARDECL(celt_norm, y);
208
469M
   VARDECL(int, signx);
209
469M
   int i, j;
210
469M
   int pulsesLeft;
211
469M
   opus_val32 sum;
212
469M
   opus_val32 xy;
213
469M
   opus_val16 yy;
214
469M
   SAVE_STACK;
215
216
469M
   (void)arch;
217
469M
   ALLOC(y, N, celt_norm);
218
469M
   ALLOC(signx, N, int);
219
#ifdef FIXED_POINT
220
   {
221
      int shift = (celt_ilog2(1+celt_inner_prod_norm_shift(X, X, N, arch))+1)/2;
222
469M
      shift = IMAX(0, shift+(NORM_SHIFT-14)-14);
223
      norm_scaledown(X, N, shift);
224
   }
225
#endif
226
   /* Get rid of the sign */
227
469M
   sum = 0;
228
2.88G
   j=0; do {
229
2.88G
      signx[j] = X[j]<0;
230
      /* OPT: Make sure the compiler doesn't use a branch on ABS16(). */
231
2.88G
      X[j] = ABS16(X[j]);
232
2.88G
      iy[j] = 0;
233
2.88G
      y[j] = 0;
234
2.88G
   } while (++j<N);
235
236
469M
   xy = yy = 0;
237
238
469M
   pulsesLeft = K;
239
240
   /* Do a pre-search by projecting on the pyramid */
241
469M
   if (K > (N>>1))
242
332M
   {
243
332M
      opus_val16 rcp;
244
1.48G
      j=0; do {
245
1.48G
         sum += X[j];
246
1.48G
      }  while (++j<N);
247
248
      /* If X is too small, just replace it with a pulse at 0 */
249
#ifdef FIXED_POINT
250
332M
      if (sum <= K)
251
#else
252
      /* Prevents infinities and NaNs from causing too many pulses
253
         to be allocated. 64 is an approximation of infinity here. */
254
0
      if (!(sum > EPSILON && sum < 64))
255
0
#endif
256
187M
      {
257
187M
         X[0] = QCONST16(1.f,14);
258
187M
         j=1; do
259
686M
            X[j]=0;
260
686M
         while (++j<N);
261
187M
         sum = QCONST16(1.f,14);
262
187M
      }
263
#ifdef FIXED_POINT
264
332M
      rcp = EXTRACT16(MULT16_32_Q16(K, celt_rcp(sum)));
265
#else
266
      /* Using K+e with e < 1 guarantees we cannot get more than K pulses. */
267
0
      rcp = EXTRACT16(MULT16_32_Q16(K+0.8f, celt_rcp(sum)));
268
#endif
269
1.48G
      j=0; do {
270
#ifdef FIXED_POINT
271
         /* It's really important to round *towards zero* here */
272
1.48G
         iy[j] = MULT16_16_Q15(X[j],rcp);
273
#else
274
         iy[j] = (int)floor(rcp*X[j]);
275
#endif
276
1.48G
         y[j] = (celt_norm)iy[j];
277
1.48G
         yy = MAC16_16(yy, y[j],y[j]);
278
1.48G
         xy = MAC16_16(xy, X[j],y[j]);
279
1.48G
         y[j] *= 2;
280
1.48G
         pulsesLeft -= iy[j];
281
1.48G
      }  while (++j<N);
282
332M
   }
283
469M
   celt_sig_assert(pulsesLeft>=0);
284
285
   /* This should never happen, but just in case it does (e.g. on silence)
286
      we fill the first bin with pulses. */
287
#ifdef FIXED_POINT_DEBUG
288
   celt_sig_assert(pulsesLeft<=N+3);
289
#endif
290
469M
   if (pulsesLeft > N+3)
291
0
   {
292
0
      opus_val16 tmp = (opus_val16)pulsesLeft;
293
0
      yy = MAC16_16(yy, tmp, tmp);
294
0
      yy = MAC16_16(yy, tmp, y[0]);
295
0
      iy[0] += pulsesLeft;
296
0
      pulsesLeft=0;
297
0
   }
298
299
1.30G
   for (i=0;i<pulsesLeft;i++)
300
830M
   {
301
830M
      opus_val16 Rxy, Ryy;
302
830M
      int best_id;
303
830M
      opus_val32 best_num;
304
830M
      opus_val16 best_den;
305
#ifdef FIXED_POINT
306
      int rshift;
307
#endif
308
#ifdef FIXED_POINT
309
      rshift = 1+celt_ilog2(K-pulsesLeft+i+1);
310
#endif
311
830M
      best_id = 0;
312
      /* The squared magnitude term gets added anyway, so we might as well
313
         add it outside the loop */
314
830M
      yy = ADD16(yy, 1);
315
316
      /* Calculations for position 0 are out of the loop, in part to reduce
317
         mispredicted branches (since the if condition is usually false)
318
         in the loop. */
319
      /* Temporary sums of the new pulse(s) */
320
830M
      Rxy = EXTRACT16(SHR32(ADD32(xy, EXTEND32(X[0])),rshift));
321
      /* We're multiplying y[j] by two so we don't have to do it here */
322
830M
      Ryy = ADD16(yy, y[0]);
323
324
      /* Approximate score: we maximise Rxy/sqrt(Ryy) (we're guaranteed that
325
         Rxy is positive because the sign is pre-computed) */
326
830M
      Rxy = MULT16_16_Q15(Rxy,Rxy);
327
830M
      best_den = Ryy;
328
830M
      best_num = Rxy;
329
830M
      j=1;
330
6.50G
      do {
331
         /* Temporary sums of the new pulse(s) */
332
6.50G
         Rxy = EXTRACT16(SHR32(ADD32(xy, EXTEND32(X[j])),rshift));
333
         /* We're multiplying y[j] by two so we don't have to do it here */
334
6.50G
         Ryy = ADD16(yy, y[j]);
335
336
         /* Approximate score: we maximise Rxy/sqrt(Ryy) (we're guaranteed that
337
            Rxy is positive because the sign is pre-computed) */
338
6.50G
         Rxy = MULT16_16_Q15(Rxy,Rxy);
339
         /* The idea is to check for num/den >= best_num/best_den, but that way
340
            we can do it without any division */
341
         /* OPT: It's not clear whether a cmov is faster than a branch here
342
            since the condition is more often false than true and using
343
            a cmov introduces data dependencies across iterations. The optimal
344
            choice may be architecture-dependent. */
345
6.50G
         if (opus_unlikely(MULT16_16(best_den, Rxy) > MULT16_16(Ryy, best_num)))
346
471M
         {
347
471M
            best_den = Ryy;
348
471M
            best_num = Rxy;
349
471M
            best_id = j;
350
471M
         }
351
6.50G
      } while (++j<N);
352
353
      /* Updating the sums of the new pulse(s) */
354
830M
      xy = ADD32(xy, EXTEND32(X[best_id]));
355
      /* We're multiplying y[j] by two so we don't have to do it here */
356
830M
      yy = ADD16(yy, y[best_id]);
357
358
      /* Only now that we've made the final choice, update y/iy */
359
      /* Multiplying y[j] by 2 so we don't have to do it everywhere else */
360
830M
      y[best_id] += 2;
361
830M
      iy[best_id]++;
362
830M
   }
363
364
   /* Put the original sign back */
365
469M
   j=0;
366
2.88G
   do {
367
      /*iy[j] = signx[j] ? -iy[j] : iy[j];*/
368
      /* OPT: The is more likely to be compiled without a branch than the code above
369
         but has the same performance otherwise. */
370
2.88G
      iy[j] = (iy[j]^-signx[j]) + signx[j];
371
2.88G
   } while (++j<N);
372
469M
   RESTORE_STACK;
373
469M
   return yy;
374
469M
}
op_pvq_search_c
Line
Count
Source
206
469M
{
207
469M
   VARDECL(celt_norm, y);
208
469M
   VARDECL(int, signx);
209
469M
   int i, j;
210
469M
   int pulsesLeft;
211
469M
   opus_val32 sum;
212
469M
   opus_val32 xy;
213
469M
   opus_val16 yy;
214
469M
   SAVE_STACK;
215
216
469M
   (void)arch;
217
469M
   ALLOC(y, N, celt_norm);
218
469M
   ALLOC(signx, N, int);
219
469M
#ifdef FIXED_POINT
220
469M
   {
221
469M
      int shift = (celt_ilog2(1+celt_inner_prod_norm_shift(X, X, N, arch))+1)/2;
222
469M
      shift = IMAX(0, shift+(NORM_SHIFT-14)-14);
223
469M
      norm_scaledown(X, N, shift);
224
469M
   }
225
469M
#endif
226
   /* Get rid of the sign */
227
469M
   sum = 0;
228
2.88G
   j=0; do {
229
2.88G
      signx[j] = X[j]<0;
230
      /* OPT: Make sure the compiler doesn't use a branch on ABS16(). */
231
2.88G
      X[j] = ABS16(X[j]);
232
2.88G
      iy[j] = 0;
233
2.88G
      y[j] = 0;
234
2.88G
   } while (++j<N);
235
236
469M
   xy = yy = 0;
237
238
469M
   pulsesLeft = K;
239
240
   /* Do a pre-search by projecting on the pyramid */
241
469M
   if (K > (N>>1))
242
332M
   {
243
332M
      opus_val16 rcp;
244
1.48G
      j=0; do {
245
1.48G
         sum += X[j];
246
1.48G
      }  while (++j<N);
247
248
      /* If X is too small, just replace it with a pulse at 0 */
249
332M
#ifdef FIXED_POINT
250
332M
      if (sum <= K)
251
#else
252
      /* Prevents infinities and NaNs from causing too many pulses
253
         to be allocated. 64 is an approximation of infinity here. */
254
      if (!(sum > EPSILON && sum < 64))
255
#endif
256
187M
      {
257
187M
         X[0] = QCONST16(1.f,14);
258
187M
         j=1; do
259
686M
            X[j]=0;
260
686M
         while (++j<N);
261
187M
         sum = QCONST16(1.f,14);
262
187M
      }
263
332M
#ifdef FIXED_POINT
264
332M
      rcp = EXTRACT16(MULT16_32_Q16(K, celt_rcp(sum)));
265
#else
266
      /* Using K+e with e < 1 guarantees we cannot get more than K pulses. */
267
      rcp = EXTRACT16(MULT16_32_Q16(K+0.8f, celt_rcp(sum)));
268
#endif
269
1.48G
      j=0; do {
270
1.48G
#ifdef FIXED_POINT
271
         /* It's really important to round *towards zero* here */
272
1.48G
         iy[j] = MULT16_16_Q15(X[j],rcp);
273
#else
274
         iy[j] = (int)floor(rcp*X[j]);
275
#endif
276
1.48G
         y[j] = (celt_norm)iy[j];
277
1.48G
         yy = MAC16_16(yy, y[j],y[j]);
278
1.48G
         xy = MAC16_16(xy, X[j],y[j]);
279
1.48G
         y[j] *= 2;
280
1.48G
         pulsesLeft -= iy[j];
281
1.48G
      }  while (++j<N);
282
332M
   }
283
469M
   celt_sig_assert(pulsesLeft>=0);
284
285
   /* This should never happen, but just in case it does (e.g. on silence)
286
      we fill the first bin with pulses. */
287
#ifdef FIXED_POINT_DEBUG
288
   celt_sig_assert(pulsesLeft<=N+3);
289
#endif
290
469M
   if (pulsesLeft > N+3)
291
0
   {
292
0
      opus_val16 tmp = (opus_val16)pulsesLeft;
293
0
      yy = MAC16_16(yy, tmp, tmp);
294
0
      yy = MAC16_16(yy, tmp, y[0]);
295
0
      iy[0] += pulsesLeft;
296
0
      pulsesLeft=0;
297
0
   }
298
299
1.30G
   for (i=0;i<pulsesLeft;i++)
300
830M
   {
301
830M
      opus_val16 Rxy, Ryy;
302
830M
      int best_id;
303
830M
      opus_val32 best_num;
304
830M
      opus_val16 best_den;
305
830M
#ifdef FIXED_POINT
306
830M
      int rshift;
307
830M
#endif
308
830M
#ifdef FIXED_POINT
309
830M
      rshift = 1+celt_ilog2(K-pulsesLeft+i+1);
310
830M
#endif
311
830M
      best_id = 0;
312
      /* The squared magnitude term gets added anyway, so we might as well
313
         add it outside the loop */
314
830M
      yy = ADD16(yy, 1);
315
316
      /* Calculations for position 0 are out of the loop, in part to reduce
317
         mispredicted branches (since the if condition is usually false)
318
         in the loop. */
319
      /* Temporary sums of the new pulse(s) */
320
830M
      Rxy = EXTRACT16(SHR32(ADD32(xy, EXTEND32(X[0])),rshift));
321
      /* We're multiplying y[j] by two so we don't have to do it here */
322
830M
      Ryy = ADD16(yy, y[0]);
323
324
      /* Approximate score: we maximise Rxy/sqrt(Ryy) (we're guaranteed that
325
         Rxy is positive because the sign is pre-computed) */
326
830M
      Rxy = MULT16_16_Q15(Rxy,Rxy);
327
830M
      best_den = Ryy;
328
830M
      best_num = Rxy;
329
830M
      j=1;
330
6.50G
      do {
331
         /* Temporary sums of the new pulse(s) */
332
6.50G
         Rxy = EXTRACT16(SHR32(ADD32(xy, EXTEND32(X[j])),rshift));
333
         /* We're multiplying y[j] by two so we don't have to do it here */
334
6.50G
         Ryy = ADD16(yy, y[j]);
335
336
         /* Approximate score: we maximise Rxy/sqrt(Ryy) (we're guaranteed that
337
            Rxy is positive because the sign is pre-computed) */
338
6.50G
         Rxy = MULT16_16_Q15(Rxy,Rxy);
339
         /* The idea is to check for num/den >= best_num/best_den, but that way
340
            we can do it without any division */
341
         /* OPT: It's not clear whether a cmov is faster than a branch here
342
            since the condition is more often false than true and using
343
            a cmov introduces data dependencies across iterations. The optimal
344
            choice may be architecture-dependent. */
345
6.50G
         if (opus_unlikely(MULT16_16(best_den, Rxy) > MULT16_16(Ryy, best_num)))
346
471M
         {
347
471M
            best_den = Ryy;
348
471M
            best_num = Rxy;
349
471M
            best_id = j;
350
471M
         }
351
6.50G
      } while (++j<N);
352
353
      /* Updating the sums of the new pulse(s) */
354
830M
      xy = ADD32(xy, EXTEND32(X[best_id]));
355
      /* We're multiplying y[j] by two so we don't have to do it here */
356
830M
      yy = ADD16(yy, y[best_id]);
357
358
      /* Only now that we've made the final choice, update y/iy */
359
      /* Multiplying y[j] by 2 so we don't have to do it everywhere else */
360
830M
      y[best_id] += 2;
361
830M
      iy[best_id]++;
362
830M
   }
363
364
   /* Put the original sign back */
365
469M
   j=0;
366
2.88G
   do {
367
      /*iy[j] = signx[j] ? -iy[j] : iy[j];*/
368
      /* OPT: The is more likely to be compiled without a branch than the code above
369
         but has the same performance otherwise. */
370
2.88G
      iy[j] = (iy[j]^-signx[j]) + signx[j];
371
2.88G
   } while (++j<N);
372
469M
   RESTORE_STACK;
373
469M
   return yy;
374
469M
}
Unexecuted instantiation: op_pvq_search_c
375
376
#ifdef ENABLE_QEXT
377
#include "macros.h"
378
379
303k
static opus_val32 op_pvq_search_N2(const celt_norm *X, int *iy, int *up_iy, int K, int up, int *refine, int shift) {
380
303k
   opus_val32 sum;
381
303k
   opus_val32 rcp_sum;
382
303k
   int offset;
383
303k
   sum = ABS32(X[0]) + ABS32(X[1]);
384
303k
   if (sum < EPSILON) {
385
100k
      iy[0] = K;
386
100k
      up_iy[0] = up*K;
387
100k
      iy[1]=up_iy[1]=0;
388
100k
      *refine=0;
389
100k
#ifdef FIXED_POINT
390
100k
      return (opus_val64)K*K*up*up>>2*shift;
391
#else
392
      (void)shift;
393
      return K*(float)K*up*up;
394
#endif
395
100k
   }
396
202k
#ifdef FIXED_POINT
397
202k
   int sum_shift;
398
202k
   opus_val32 X0;
399
202k
   sum_shift = 30-celt_ilog2(sum);
400
202k
   rcp_sum = celt_rcp_norm32(SHL32(sum, sum_shift));
401
202k
   X0 = MULT32_32_Q31(SHL32(X[0], sum_shift), rcp_sum);
402
202k
   iy[0] = PSHR32(MULT32_32_Q31(SHL32(K, 8), X0), 7);
403
202k
   up_iy[0] = PSHR32(MULT32_32_Q31(SHL32(up*K, 8), X0), 7);
404
#else
405
   rcp_sum = 1.f/sum;
406
   iy[0] = (int)floor(.5f+K*X[0]*rcp_sum);
407
   up_iy[0] = (int)floor(.5f+up*K*X[0]*rcp_sum);
408
#endif
409
202k
   up_iy[0] = IMAX(up*iy[0] - (up-1)/2, IMIN(up*iy[0] + (up-1)/2, up_iy[0]));
410
202k
   offset = up_iy[0] - up*iy[0];
411
202k
   iy[1] = K-abs(iy[0]);
412
202k
   up_iy[1] = up*K-abs(up_iy[0]);
413
202k
   if (X[1] < 0) {
414
178k
      iy[1] = -iy[1];
415
178k
      up_iy[1] = -up_iy[1];
416
178k
      offset = -offset;
417
178k
   }
418
202k
   *refine = offset;
419
202k
#ifdef FIXED_POINT
420
202k
   return (up_iy[0]*(opus_val64)up_iy[0] + up_iy[1]*(opus_val64)up_iy[1] + (1<<2*shift>>1))>>2*shift;
421
#else
422
   return up_iy[0]*(opus_val64)up_iy[0] + up_iy[1]*(opus_val64)up_iy[1];
423
#endif
424
303k
}
425
426
1.43M
static int op_pvq_refine(const opus_val32 *Xn, int *iy, int *iy0, int K, int up, int margin, int N) {
427
1.43M
   int i;
428
1.43M
   int dir;
429
1.43M
   VARDECL(opus_val32, rounding);
430
1.43M
   int iysum = 0;
431
1.43M
   SAVE_STACK;
432
1.43M
   ALLOC(rounding, N, opus_val32);
433
16.2M
   for (i=0;i<N;i++) {
434
14.8M
      opus_val32 tmp;
435
14.8M
      tmp = MULT32_32_Q31(SHL32(K, 8), Xn[i]);
436
#ifdef FIXED_POINT
437
      iy[i] = (tmp+64) >> 7;
438
#else
439
      iy[i] = (int)floor(.5+tmp);
440
#endif
441
14.8M
      rounding[i] = tmp - SHL32(iy[i], 7);
442
14.8M
   }
443
1.43M
   if (iy != iy0) {
444
8.12M
      for (i=0;i<N;i++) iy[i] = IMIN(up*iy0[i]+up-1, IMAX(up*iy0[i]-up+1, iy[i]));
445
719k
   }
446
16.2M
   for (i=0;i<N;i++) iysum += iy[i];
447
1.43M
   if (abs(iysum - K) > 32) {
448
0
      RESTORE_STACK;
449
0
      return 1;
450
0
   }
451
1.43M
   dir = iysum < K ? 1 : -1;
452
2.78M
   while (iysum != K) {
453
1.34M
      opus_val32 roundval=-1000000*dir;
454
1.34M
      int roundpos=0;
455
20.7M
      for (i=0;i<N;i++) {
456
19.4M
         if ((rounding[i]-roundval)*dir > 0 && abs(iy[i]-up*iy0[i]) < (margin-1) && !(dir==-1 && iy[i] == 0)) {
457
3.51M
            roundval = rounding[i];
458
3.51M
            roundpos = i;
459
3.51M
         }
460
19.4M
      }
461
1.34M
      iy[roundpos] += dir;
462
1.34M
      rounding[roundpos] -= SHL32(dir, 15);
463
1.34M
      iysum+=dir;
464
1.34M
   }
465
1.43M
   RESTORE_STACK;
466
1.43M
   return 0;
467
1.43M
}
vq.c:op_pvq_refine
Line
Count
Source
426
719k
static int op_pvq_refine(const opus_val32 *Xn, int *iy, int *iy0, int K, int up, int margin, int N) {
427
719k
   int i;
428
719k
   int dir;
429
719k
   VARDECL(opus_val32, rounding);
430
719k
   int iysum = 0;
431
719k
   SAVE_STACK;
432
719k
   ALLOC(rounding, N, opus_val32);
433
8.12M
   for (i=0;i<N;i++) {
434
7.41M
      opus_val32 tmp;
435
7.41M
      tmp = MULT32_32_Q31(SHL32(K, 8), Xn[i]);
436
7.41M
#ifdef FIXED_POINT
437
7.41M
      iy[i] = (tmp+64) >> 7;
438
#else
439
      iy[i] = (int)floor(.5+tmp);
440
#endif
441
7.41M
      rounding[i] = tmp - SHL32(iy[i], 7);
442
7.41M
   }
443
719k
   if (iy != iy0) {
444
4.06M
      for (i=0;i<N;i++) iy[i] = IMIN(up*iy0[i]+up-1, IMAX(up*iy0[i]-up+1, iy[i]));
445
359k
   }
446
8.12M
   for (i=0;i<N;i++) iysum += iy[i];
447
719k
   if (abs(iysum - K) > 32) {
448
0
      RESTORE_STACK;
449
0
      return 1;
450
0
   }
451
719k
   dir = iysum < K ? 1 : -1;
452
1.39M
   while (iysum != K) {
453
672k
      opus_val32 roundval=-1000000*dir;
454
672k
      int roundpos=0;
455
10.3M
      for (i=0;i<N;i++) {
456
9.71M
         if ((rounding[i]-roundval)*dir > 0 && abs(iy[i]-up*iy0[i]) < (margin-1) && !(dir==-1 && iy[i] == 0)) {
457
1.75M
            roundval = rounding[i];
458
1.75M
            roundpos = i;
459
1.75M
         }
460
9.71M
      }
461
672k
      iy[roundpos] += dir;
462
672k
      rounding[roundpos] -= SHL32(dir, 15);
463
672k
      iysum+=dir;
464
672k
   }
465
719k
   RESTORE_STACK;
466
719k
   return 0;
467
719k
}
vq.c:op_pvq_refine
Line
Count
Source
426
719k
static int op_pvq_refine(const opus_val32 *Xn, int *iy, int *iy0, int K, int up, int margin, int N) {
427
719k
   int i;
428
719k
   int dir;
429
719k
   VARDECL(opus_val32, rounding);
430
719k
   int iysum = 0;
431
719k
   SAVE_STACK;
432
719k
   ALLOC(rounding, N, opus_val32);
433
8.12M
   for (i=0;i<N;i++) {
434
7.41M
      opus_val32 tmp;
435
7.41M
      tmp = MULT32_32_Q31(SHL32(K, 8), Xn[i]);
436
#ifdef FIXED_POINT
437
      iy[i] = (tmp+64) >> 7;
438
#else
439
7.41M
      iy[i] = (int)floor(.5+tmp);
440
7.41M
#endif
441
7.41M
      rounding[i] = tmp - SHL32(iy[i], 7);
442
7.41M
   }
443
719k
   if (iy != iy0) {
444
4.06M
      for (i=0;i<N;i++) iy[i] = IMIN(up*iy0[i]+up-1, IMAX(up*iy0[i]-up+1, iy[i]));
445
359k
   }
446
8.12M
   for (i=0;i<N;i++) iysum += iy[i];
447
719k
   if (abs(iysum - K) > 32) {
448
0
      RESTORE_STACK;
449
0
      return 1;
450
0
   }
451
719k
   dir = iysum < K ? 1 : -1;
452
1.39M
   while (iysum != K) {
453
672k
      opus_val32 roundval=-1000000*dir;
454
672k
      int roundpos=0;
455
10.3M
      for (i=0;i<N;i++) {
456
9.71M
         if ((rounding[i]-roundval)*dir > 0 && abs(iy[i]-up*iy0[i]) < (margin-1) && !(dir==-1 && iy[i] == 0)) {
457
1.75M
            roundval = rounding[i];
458
1.75M
            roundpos = i;
459
1.75M
         }
460
9.71M
      }
461
672k
      iy[roundpos] += dir;
462
672k
      rounding[roundpos] -= SHL32(dir, 15);
463
672k
      iysum+=dir;
464
672k
   }
465
719k
   RESTORE_STACK;
466
719k
   return 0;
467
719k
}
468
469
460k
static opus_val32 op_pvq_search_extra(const celt_norm *X, int *iy, int *up_iy, int K, int up, int *refine, int N, int shift) {
470
460k
   opus_val32 rcp_sum;
471
460k
   opus_val32 sum=0;
472
460k
   int i;
473
460k
   int failed=0;
474
460k
   opus_val64 yy=0;
475
460k
   VARDECL(opus_val32, Xn);
476
460k
   SAVE_STACK;
477
4.69M
   for (i=0;i<N;i++) sum += ABS32(X[i]);
478
460k
   ALLOC(Xn, N, opus_val32);
479
460k
   if (sum < EPSILON)
480
100k
      failed = 1;
481
359k
   else {
482
359k
#ifdef FIXED_POINT
483
359k
      int sum_shift = 30-celt_ilog2(sum);
484
359k
      rcp_sum = celt_rcp_norm32(SHL32(sum, sum_shift));
485
4.06M
      for (i=0;i<N;i++) {
486
3.70M
         Xn[i] = MULT32_32_Q31(SHL32(ABS32(X[i]), sum_shift), rcp_sum);
487
3.70M
      }
488
#else
489
      rcp_sum = celt_rcp(sum);
490
      for (i=0;i<N;i++) {
491
         Xn[i] = ABS32(X[i])*rcp_sum;
492
      }
493
#endif
494
359k
   }
495
460k
   failed = failed || op_pvq_refine(Xn, iy, iy, K, 1, K+1, N);
496
460k
   failed = failed || op_pvq_refine(Xn, up_iy, iy, up*K, up, up, N);
497
460k
   if (failed) {
498
100k
      iy[0] = K;
499
533k
      for (i=1;i<N;i++) iy[i] = 0;
500
100k
      up_iy[0] = up*K;
501
533k
      for (i=1;i<N;i++) up_iy[i] = 0;
502
100k
   }
503
4.69M
   for (i=0;i<N;i++) {
504
4.23M
      yy += up_iy[i]*(opus_val64)up_iy[i];
505
4.23M
      if (X[i] < 0) {
506
1.75M
         iy[i] = -iy[i];
507
1.75M
         up_iy[i] = -up_iy[i];
508
1.75M
      }
509
4.23M
      refine[i] = up_iy[i]-up*iy[i];
510
4.23M
   }
511
460k
   RESTORE_STACK;
512
460k
#ifdef FIXED_POINT
513
460k
   return (yy + (1<<2*shift>>1))>>2*shift;
514
#else
515
   (void)shift;
516
   return yy;
517
#endif
518
460k
}
519
#endif
520
521
#ifdef ENABLE_QEXT
522
/* Take advantage of the fact that "large" refine values are much less likely
523
   than smaller ones. */
524
3.77M
static void ec_enc_refine(ec_enc *enc, opus_int32 refine, opus_int32 up, int extra_bits, int use_entropy) {
525
3.77M
   int large;
526
3.77M
   large = abs(refine)>up/2;
527
3.77M
   ec_enc_bit_logp(enc, large, use_entropy ? 3 : 1);
528
3.77M
   if (large) {
529
344k
      ec_enc_bits(enc, refine < 0, 1);
530
344k
      ec_enc_bits(enc, abs(refine)-up/2-1, extra_bits-1);
531
3.43M
   } else {
532
3.43M
      ec_enc_bits(enc, refine+up/2, extra_bits);
533
3.43M
   }
534
3.77M
}
535
536
136k
static int ec_dec_refine(ec_enc *dec, opus_int32 up, int extra_bits, int use_entropy) {
537
136k
   int large, refine;
538
136k
   large = ec_dec_bit_logp(dec, use_entropy ? 3 : 1);
539
136k
   if (large) {
540
21.7k
      int sign = ec_dec_bits(dec, 1);
541
21.7k
      refine = ec_dec_bits(dec, extra_bits-1) + up/2+1;
542
21.7k
      if (sign) refine = -refine;
543
114k
   } else {
544
114k
      refine = (opus_int32)ec_dec_bits(dec, extra_bits)-up/2;
545
114k
   }
546
136k
   return refine;
547
136k
}
548
#endif
549
550
unsigned alg_quant(celt_norm *X, int N, int K, int spread, int B, ec_enc *enc,
551
      opus_val32 gain, int resynth
552
      ARG_QEXT(ec_enc *ext_enc) ARG_QEXT(int extra_bits), int arch)
553
945M
{
554
945M
   VARDECL(int, iy);
555
945M
   opus_val32 yy;
556
945M
   unsigned collapse_mask;
557
#ifdef ENABLE_QEXT
558
   int yy_shift = 0;
559
#endif
560
945M
   SAVE_STACK;
561
562
945M
   celt_assert2(K>0, "alg_quant() needs at least one pulse");
563
945M
   celt_assert2(N>1, "alg_quant() needs at least two dimensions");
564
565
   /* Covers vectorization by up to 4. */
566
945M
   ALLOC(iy, N+3, int);
567
568
945M
   exp_rotation(X, N, 1, B, K, spread);
569
570
#ifdef ENABLE_QEXT
571
375M
   if (N==2 && extra_bits >= 2) {
572
606k
      int refine;
573
606k
      int up_iy[2];
574
606k
      int up;
575
606k
      yy_shift = IMAX(0, extra_bits-7);
576
606k
      up = (1<<extra_bits)-1;
577
606k
      yy = op_pvq_search_N2(X, iy, up_iy, K, up, &refine, yy_shift);
578
606k
      collapse_mask = extract_collapse_mask(up_iy, N, B);
579
606k
      encode_pulses(iy, N, K, enc);
580
606k
      ec_enc_uint(ext_enc, refine+(up-1)/2, up);
581
606k
      if (resynth)
582
0
         normalise_residual(up_iy, X, N, yy, gain, yy_shift);
583
374M
   } else if (extra_bits >= 2) {
584
920k
      int i;
585
920k
      VARDECL(int, up_iy);
586
920k
      VARDECL(int, refine);
587
920k
      int up, use_entropy;
588
920k
      ALLOC(up_iy, N, int);
589
920k
      ALLOC(refine, N, int);
590
920k
      yy_shift = IMAX(0, extra_bits-7);
591
920k
      up = (1<<extra_bits)-1;
592
920k
      yy = op_pvq_search_extra(X, iy, up_iy, K, up, refine, N, yy_shift);
593
920k
      collapse_mask = extract_collapse_mask(up_iy, N, B);
594
920k
      encode_pulses(iy, N, K, enc);
595
920k
      use_entropy = (ext_enc->storage*8 - ec_tell(ext_enc)) > (unsigned)(N-1)*(extra_bits+3)+1;
596
8.47M
      for (i=0;i<N-1;i++) ec_enc_refine(ext_enc, refine[i], up, extra_bits, use_entropy);
597
920k
      if (iy[N-1]==0) ec_enc_bits(ext_enc, up_iy[N-1]<0, 1);
598
920k
      if (resynth)
599
0
         normalise_residual(up_iy, X, N, yy, gain, yy_shift);
600
920k
   } else
601
373M
#endif
602
373M
   {
603
943M
      yy = op_pvq_search(X, iy, K, N, arch);
604
373M
      collapse_mask = extract_collapse_mask(iy, N, B);
605
373M
      encode_pulses(iy, N, K, enc);
606
943M
      if (resynth)
607
102M
         normalise_residual(iy, X, N, yy, gain, 0);
608
373M
   }
609
610
945M
   if (resynth)
611
102M
      exp_rotation(X, N, -1, B, K, spread);
612
613
945M
   RESTORE_STACK;
614
945M
   return collapse_mask;
615
945M
}
alg_quant
Line
Count
Source
553
284M
{
554
284M
   VARDECL(int, iy);
555
284M
   opus_val32 yy;
556
284M
   unsigned collapse_mask;
557
#ifdef ENABLE_QEXT
558
   int yy_shift = 0;
559
#endif
560
284M
   SAVE_STACK;
561
562
284M
   celt_assert2(K>0, "alg_quant() needs at least one pulse");
563
284M
   celt_assert2(N>1, "alg_quant() needs at least two dimensions");
564
565
   /* Covers vectorization by up to 4. */
566
284M
   ALLOC(iy, N+3, int);
567
568
284M
   exp_rotation(X, N, 1, B, K, spread);
569
570
#ifdef ENABLE_QEXT
571
   if (N==2 && extra_bits >= 2) {
572
      int refine;
573
      int up_iy[2];
574
      int up;
575
      yy_shift = IMAX(0, extra_bits-7);
576
      up = (1<<extra_bits)-1;
577
      yy = op_pvq_search_N2(X, iy, up_iy, K, up, &refine, yy_shift);
578
      collapse_mask = extract_collapse_mask(up_iy, N, B);
579
      encode_pulses(iy, N, K, enc);
580
      ec_enc_uint(ext_enc, refine+(up-1)/2, up);
581
      if (resynth)
582
         normalise_residual(up_iy, X, N, yy, gain, yy_shift);
583
   } else if (extra_bits >= 2) {
584
      int i;
585
      VARDECL(int, up_iy);
586
      VARDECL(int, refine);
587
      int up, use_entropy;
588
      ALLOC(up_iy, N, int);
589
      ALLOC(refine, N, int);
590
      yy_shift = IMAX(0, extra_bits-7);
591
      up = (1<<extra_bits)-1;
592
      yy = op_pvq_search_extra(X, iy, up_iy, K, up, refine, N, yy_shift);
593
      collapse_mask = extract_collapse_mask(up_iy, N, B);
594
      encode_pulses(iy, N, K, enc);
595
      use_entropy = (ext_enc->storage*8 - ec_tell(ext_enc)) > (unsigned)(N-1)*(extra_bits+3)+1;
596
      for (i=0;i<N-1;i++) ec_enc_refine(ext_enc, refine[i], up, extra_bits, use_entropy);
597
      if (iy[N-1]==0) ec_enc_bits(ext_enc, up_iy[N-1]<0, 1);
598
      if (resynth)
599
         normalise_residual(up_iy, X, N, yy, gain, yy_shift);
600
   } else
601
#endif
602
284M
   {
603
284M
      yy = op_pvq_search(X, iy, K, N, arch);
604
284M
      collapse_mask = extract_collapse_mask(iy, N, B);
605
284M
      encode_pulses(iy, N, K, enc);
606
284M
      if (resynth)
607
43.9M
         normalise_residual(iy, X, N, yy, gain, 0);
608
284M
   }
609
610
284M
   if (resynth)
611
43.9M
      exp_rotation(X, N, -1, B, K, spread);
612
613
284M
   RESTORE_STACK;
614
284M
   return collapse_mask;
615
284M
}
alg_quant
Line
Count
Source
553
187M
{
554
187M
   VARDECL(int, iy);
555
187M
   opus_val32 yy;
556
187M
   unsigned collapse_mask;
557
187M
#ifdef ENABLE_QEXT
558
187M
   int yy_shift = 0;
559
187M
#endif
560
187M
   SAVE_STACK;
561
562
187M
   celt_assert2(K>0, "alg_quant() needs at least one pulse");
563
187M
   celt_assert2(N>1, "alg_quant() needs at least two dimensions");
564
565
   /* Covers vectorization by up to 4. */
566
187M
   ALLOC(iy, N+3, int);
567
568
187M
   exp_rotation(X, N, 1, B, K, spread);
569
570
187M
#ifdef ENABLE_QEXT
571
187M
   if (N==2 && extra_bits >= 2) {
572
303k
      int refine;
573
303k
      int up_iy[2];
574
303k
      int up;
575
303k
      yy_shift = IMAX(0, extra_bits-7);
576
303k
      up = (1<<extra_bits)-1;
577
303k
      yy = op_pvq_search_N2(X, iy, up_iy, K, up, &refine, yy_shift);
578
303k
      collapse_mask = extract_collapse_mask(up_iy, N, B);
579
303k
      encode_pulses(iy, N, K, enc);
580
303k
      ec_enc_uint(ext_enc, refine+(up-1)/2, up);
581
303k
      if (resynth)
582
0
         normalise_residual(up_iy, X, N, yy, gain, yy_shift);
583
187M
   } else if (extra_bits >= 2) {
584
460k
      int i;
585
460k
      VARDECL(int, up_iy);
586
460k
      VARDECL(int, refine);
587
460k
      int up, use_entropy;
588
460k
      ALLOC(up_iy, N, int);
589
460k
      ALLOC(refine, N, int);
590
460k
      yy_shift = IMAX(0, extra_bits-7);
591
460k
      up = (1<<extra_bits)-1;
592
460k
      yy = op_pvq_search_extra(X, iy, up_iy, K, up, refine, N, yy_shift);
593
460k
      collapse_mask = extract_collapse_mask(up_iy, N, B);
594
460k
      encode_pulses(iy, N, K, enc);
595
460k
      use_entropy = (ext_enc->storage*8 - ec_tell(ext_enc)) > (unsigned)(N-1)*(extra_bits+3)+1;
596
4.23M
      for (i=0;i<N-1;i++) ec_enc_refine(ext_enc, refine[i], up, extra_bits, use_entropy);
597
460k
      if (iy[N-1]==0) ec_enc_bits(ext_enc, up_iy[N-1]<0, 1);
598
460k
      if (resynth)
599
0
         normalise_residual(up_iy, X, N, yy, gain, yy_shift);
600
460k
   } else
601
186M
#endif
602
186M
   {
603
186M
      yy = op_pvq_search(X, iy, K, N, arch);
604
186M
      collapse_mask = extract_collapse_mask(iy, N, B);
605
186M
      encode_pulses(iy, N, K, enc);
606
186M
      if (resynth)
607
7.40M
         normalise_residual(iy, X, N, yy, gain, 0);
608
186M
   }
609
610
187M
   if (resynth)
611
7.40M
      exp_rotation(X, N, -1, B, K, spread);
612
613
187M
   RESTORE_STACK;
614
187M
   return collapse_mask;
615
187M
}
alg_quant
Line
Count
Source
553
187M
{
554
187M
   VARDECL(int, iy);
555
187M
   opus_val32 yy;
556
187M
   unsigned collapse_mask;
557
187M
#ifdef ENABLE_QEXT
558
187M
   int yy_shift = 0;
559
187M
#endif
560
187M
   SAVE_STACK;
561
562
187M
   celt_assert2(K>0, "alg_quant() needs at least one pulse");
563
187M
   celt_assert2(N>1, "alg_quant() needs at least two dimensions");
564
565
   /* Covers vectorization by up to 4. */
566
187M
   ALLOC(iy, N+3, int);
567
568
187M
   exp_rotation(X, N, 1, B, K, spread);
569
570
187M
#ifdef ENABLE_QEXT
571
187M
   if (N==2 && extra_bits >= 2) {
572
303k
      int refine;
573
303k
      int up_iy[2];
574
303k
      int up;
575
303k
      yy_shift = IMAX(0, extra_bits-7);
576
303k
      up = (1<<extra_bits)-1;
577
303k
      yy = op_pvq_search_N2(X, iy, up_iy, K, up, &refine, yy_shift);
578
303k
      collapse_mask = extract_collapse_mask(up_iy, N, B);
579
303k
      encode_pulses(iy, N, K, enc);
580
303k
      ec_enc_uint(ext_enc, refine+(up-1)/2, up);
581
303k
      if (resynth)
582
0
         normalise_residual(up_iy, X, N, yy, gain, yy_shift);
583
187M
   } else if (extra_bits >= 2) {
584
460k
      int i;
585
460k
      VARDECL(int, up_iy);
586
460k
      VARDECL(int, refine);
587
460k
      int up, use_entropy;
588
460k
      ALLOC(up_iy, N, int);
589
460k
      ALLOC(refine, N, int);
590
460k
      yy_shift = IMAX(0, extra_bits-7);
591
460k
      up = (1<<extra_bits)-1;
592
460k
      yy = op_pvq_search_extra(X, iy, up_iy, K, up, refine, N, yy_shift);
593
460k
      collapse_mask = extract_collapse_mask(up_iy, N, B);
594
460k
      encode_pulses(iy, N, K, enc);
595
460k
      use_entropy = (ext_enc->storage*8 - ec_tell(ext_enc)) > (unsigned)(N-1)*(extra_bits+3)+1;
596
4.23M
      for (i=0;i<N-1;i++) ec_enc_refine(ext_enc, refine[i], up, extra_bits, use_entropy);
597
460k
      if (iy[N-1]==0) ec_enc_bits(ext_enc, up_iy[N-1]<0, 1);
598
460k
      if (resynth)
599
0
         normalise_residual(up_iy, X, N, yy, gain, yy_shift);
600
460k
   } else
601
186M
#endif
602
186M
   {
603
186M
      yy = op_pvq_search(X, iy, K, N, arch);
604
186M
      collapse_mask = extract_collapse_mask(iy, N, B);
605
186M
      encode_pulses(iy, N, K, enc);
606
186M
      if (resynth)
607
7.40M
         normalise_residual(iy, X, N, yy, gain, 0);
608
186M
   }
609
610
187M
   if (resynth)
611
7.40M
      exp_rotation(X, N, -1, B, K, spread);
612
613
187M
   RESTORE_STACK;
614
187M
   return collapse_mask;
615
187M
}
alg_quant
Line
Count
Source
553
284M
{
554
284M
   VARDECL(int, iy);
555
284M
   opus_val32 yy;
556
284M
   unsigned collapse_mask;
557
#ifdef ENABLE_QEXT
558
   int yy_shift = 0;
559
#endif
560
284M
   SAVE_STACK;
561
562
284M
   celt_assert2(K>0, "alg_quant() needs at least one pulse");
563
284M
   celt_assert2(N>1, "alg_quant() needs at least two dimensions");
564
565
   /* Covers vectorization by up to 4. */
566
284M
   ALLOC(iy, N+3, int);
567
568
284M
   exp_rotation(X, N, 1, B, K, spread);
569
570
#ifdef ENABLE_QEXT
571
   if (N==2 && extra_bits >= 2) {
572
      int refine;
573
      int up_iy[2];
574
      int up;
575
      yy_shift = IMAX(0, extra_bits-7);
576
      up = (1<<extra_bits)-1;
577
      yy = op_pvq_search_N2(X, iy, up_iy, K, up, &refine, yy_shift);
578
      collapse_mask = extract_collapse_mask(up_iy, N, B);
579
      encode_pulses(iy, N, K, enc);
580
      ec_enc_uint(ext_enc, refine+(up-1)/2, up);
581
      if (resynth)
582
         normalise_residual(up_iy, X, N, yy, gain, yy_shift);
583
   } else if (extra_bits >= 2) {
584
      int i;
585
      VARDECL(int, up_iy);
586
      VARDECL(int, refine);
587
      int up, use_entropy;
588
      ALLOC(up_iy, N, int);
589
      ALLOC(refine, N, int);
590
      yy_shift = IMAX(0, extra_bits-7);
591
      up = (1<<extra_bits)-1;
592
      yy = op_pvq_search_extra(X, iy, up_iy, K, up, refine, N, yy_shift);
593
      collapse_mask = extract_collapse_mask(up_iy, N, B);
594
      encode_pulses(iy, N, K, enc);
595
      use_entropy = (ext_enc->storage*8 - ec_tell(ext_enc)) > (unsigned)(N-1)*(extra_bits+3)+1;
596
      for (i=0;i<N-1;i++) ec_enc_refine(ext_enc, refine[i], up, extra_bits, use_entropy);
597
      if (iy[N-1]==0) ec_enc_bits(ext_enc, up_iy[N-1]<0, 1);
598
      if (resynth)
599
         normalise_residual(up_iy, X, N, yy, gain, yy_shift);
600
   } else
601
#endif
602
284M
   {
603
284M
      yy = op_pvq_search(X, iy, K, N, arch);
604
284M
      collapse_mask = extract_collapse_mask(iy, N, B);
605
284M
      encode_pulses(iy, N, K, enc);
606
284M
      if (resynth)
607
43.9M
         normalise_residual(iy, X, N, yy, gain, 0);
608
284M
   }
609
610
284M
   if (resynth)
611
43.9M
      exp_rotation(X, N, -1, B, K, spread);
612
613
284M
   RESTORE_STACK;
614
284M
   return collapse_mask;
615
284M
}
616
617
/** Decode pulse vector and combine the result with the pitch vector to produce
618
    the final normalised signal in the current band. */
619
unsigned alg_unquant(celt_norm *X, int N, int K, int spread, int B,
620
      ec_dec *dec, opus_val32 gain
621
      ARG_QEXT(ec_enc *ext_dec) ARG_QEXT(int extra_bits))
622
1.72M
{
623
1.72M
   opus_val32 Ryy;
624
1.72M
   unsigned collapse_mask;
625
1.72M
   VARDECL(int, iy);
626
1.72M
   int yy_shift=0;
627
1.72M
   SAVE_STACK;
628
629
1.72M
   celt_assert2(K>0, "alg_unquant() needs at least one pulse");
630
1.72M
   celt_assert2(N>1, "alg_unquant() needs at least two dimensions");
631
1.72M
   ALLOC(iy, N, int);
632
1.72M
   Ryy = decode_pulses(iy, N, K, dec);
633
#ifdef ENABLE_QEXT
634
1.00M
   if (N==2 && extra_bits >= 2) {
635
22.8k
      int up;
636
22.8k
      int refine;
637
22.8k
      yy_shift = IMAX(0, extra_bits-7);
638
22.8k
      up = (1<<extra_bits)-1;
639
22.8k
      refine = (opus_int32)ec_dec_uint(ext_dec, up) - (up-1)/2;
640
22.8k
      iy[0] *= up;
641
22.8k
      iy[1] *= up;
642
22.8k
      if (iy[1] == 0) {
643
2.14k
         iy[1] = (iy[0] > 0) ? -refine : refine;
644
2.14k
         iy[0] += (refine*(opus_int64)iy[0] > 0) ? -refine : refine;
645
20.6k
      } else if (iy[1] > 0) {
646
10.5k
         iy[0] += refine;
647
10.5k
         iy[1] -= refine*(iy[0]>0?1:-1);
648
10.5k
      } else {
649
10.1k
         iy[0] -= refine;
650
10.1k
         iy[1] -= refine*(iy[0]>0?1:-1);
651
10.1k
      }
652
22.8k
#ifdef FIXED_POINT
653
22.8k
      Ryy = (iy[0]*(opus_val64)iy[0] + iy[1]*(opus_val64)iy[1] + (1<<2*yy_shift>>1)) >> 2*yy_shift;
654
#else
655
      Ryy = iy[0]*(opus_val64)iy[0] + iy[1]*(opus_val64)iy[1];
656
#endif
657
981k
   } else if (extra_bits >= 2) {
658
22.2k
      int i;
659
22.2k
      opus_val64 yy64;
660
22.2k
      VARDECL(int, refine);
661
22.2k
      int up, use_entropy;
662
22.2k
      int sign=0;
663
22.2k
      ALLOC(refine, N, int);
664
22.2k
      yy_shift = IMAX(0, extra_bits-7);
665
22.2k
      up = (1<<extra_bits)-1;
666
22.2k
      use_entropy = (ext_dec->storage*8 - ec_tell(ext_dec)) > (unsigned)(N-1)*(extra_bits+3)+1;
667
158k
      for (i=0;i<N-1;i++) refine[i] = ec_dec_refine(ext_dec, up, extra_bits, use_entropy);
668
22.2k
      if (iy[N-1]==0) sign = ec_dec_bits(ext_dec, 1);
669
17.3k
      else sign = iy[N-1] < 0;
670
158k
      for (i=0;i<N-1;i++) {
671
136k
         iy[i] = iy[i]*up + refine[i];
672
136k
      }
673
22.2k
      iy[N-1] = up*K;
674
158k
      for (i=0;i<N-1;i++) iy[N-1] -= abs(iy[i]);
675
22.2k
      if (sign) iy[N-1] = -iy[N-1];
676
22.2k
      yy64 = 0;
677
180k
      for (i=0;i<N;i++) yy64 += iy[i]*(opus_val64)iy[i];
678
22.2k
#ifdef FIXED_POINT
679
22.2k
      Ryy = (yy64 + (1<<2*yy_shift>>1)) >> 2*yy_shift;
680
#else
681
      Ryy = yy64;
682
#endif
683
22.2k
   }
684
#endif
685
1.72M
   normalise_residual(iy, X, N, Ryy, gain, yy_shift);
686
1.72M
   exp_rotation(X, N, -1, B, K, spread);
687
1.72M
   collapse_mask = extract_collapse_mask(iy, N, B);
688
1.72M
   RESTORE_STACK;
689
1.72M
   return collapse_mask;
690
1.72M
}
alg_unquant
Line
Count
Source
622
722k
{
623
722k
   opus_val32 Ryy;
624
722k
   unsigned collapse_mask;
625
722k
   VARDECL(int, iy);
626
722k
   int yy_shift=0;
627
722k
   SAVE_STACK;
628
629
722k
   celt_assert2(K>0, "alg_unquant() needs at least one pulse");
630
722k
   celt_assert2(N>1, "alg_unquant() needs at least two dimensions");
631
722k
   ALLOC(iy, N, int);
632
722k
   Ryy = decode_pulses(iy, N, K, dec);
633
#ifdef ENABLE_QEXT
634
   if (N==2 && extra_bits >= 2) {
635
      int up;
636
      int refine;
637
      yy_shift = IMAX(0, extra_bits-7);
638
      up = (1<<extra_bits)-1;
639
      refine = (opus_int32)ec_dec_uint(ext_dec, up) - (up-1)/2;
640
      iy[0] *= up;
641
      iy[1] *= up;
642
      if (iy[1] == 0) {
643
         iy[1] = (iy[0] > 0) ? -refine : refine;
644
         iy[0] += (refine*(opus_int64)iy[0] > 0) ? -refine : refine;
645
      } else if (iy[1] > 0) {
646
         iy[0] += refine;
647
         iy[1] -= refine*(iy[0]>0?1:-1);
648
      } else {
649
         iy[0] -= refine;
650
         iy[1] -= refine*(iy[0]>0?1:-1);
651
      }
652
#ifdef FIXED_POINT
653
      Ryy = (iy[0]*(opus_val64)iy[0] + iy[1]*(opus_val64)iy[1] + (1<<2*yy_shift>>1)) >> 2*yy_shift;
654
#else
655
      Ryy = iy[0]*(opus_val64)iy[0] + iy[1]*(opus_val64)iy[1];
656
#endif
657
   } else if (extra_bits >= 2) {
658
      int i;
659
      opus_val64 yy64;
660
      VARDECL(int, refine);
661
      int up, use_entropy;
662
      int sign=0;
663
      ALLOC(refine, N, int);
664
      yy_shift = IMAX(0, extra_bits-7);
665
      up = (1<<extra_bits)-1;
666
      use_entropy = (ext_dec->storage*8 - ec_tell(ext_dec)) > (unsigned)(N-1)*(extra_bits+3)+1;
667
      for (i=0;i<N-1;i++) refine[i] = ec_dec_refine(ext_dec, up, extra_bits, use_entropy);
668
      if (iy[N-1]==0) sign = ec_dec_bits(ext_dec, 1);
669
      else sign = iy[N-1] < 0;
670
      for (i=0;i<N-1;i++) {
671
         iy[i] = iy[i]*up + refine[i];
672
      }
673
      iy[N-1] = up*K;
674
      for (i=0;i<N-1;i++) iy[N-1] -= abs(iy[i]);
675
      if (sign) iy[N-1] = -iy[N-1];
676
      yy64 = 0;
677
      for (i=0;i<N;i++) yy64 += iy[i]*(opus_val64)iy[i];
678
#ifdef FIXED_POINT
679
      Ryy = (yy64 + (1<<2*yy_shift>>1)) >> 2*yy_shift;
680
#else
681
      Ryy = yy64;
682
#endif
683
   }
684
#endif
685
722k
   normalise_residual(iy, X, N, Ryy, gain, yy_shift);
686
722k
   exp_rotation(X, N, -1, B, K, spread);
687
722k
   collapse_mask = extract_collapse_mask(iy, N, B);
688
722k
   RESTORE_STACK;
689
722k
   return collapse_mask;
690
722k
}
alg_unquant
Line
Count
Source
622
1.00M
{
623
1.00M
   opus_val32 Ryy;
624
1.00M
   unsigned collapse_mask;
625
1.00M
   VARDECL(int, iy);
626
1.00M
   int yy_shift=0;
627
1.00M
   SAVE_STACK;
628
629
1.00M
   celt_assert2(K>0, "alg_unquant() needs at least one pulse");
630
1.00M
   celt_assert2(N>1, "alg_unquant() needs at least two dimensions");
631
1.00M
   ALLOC(iy, N, int);
632
1.00M
   Ryy = decode_pulses(iy, N, K, dec);
633
1.00M
#ifdef ENABLE_QEXT
634
1.00M
   if (N==2 && extra_bits >= 2) {
635
22.8k
      int up;
636
22.8k
      int refine;
637
22.8k
      yy_shift = IMAX(0, extra_bits-7);
638
22.8k
      up = (1<<extra_bits)-1;
639
22.8k
      refine = (opus_int32)ec_dec_uint(ext_dec, up) - (up-1)/2;
640
22.8k
      iy[0] *= up;
641
22.8k
      iy[1] *= up;
642
22.8k
      if (iy[1] == 0) {
643
2.14k
         iy[1] = (iy[0] > 0) ? -refine : refine;
644
2.14k
         iy[0] += (refine*(opus_int64)iy[0] > 0) ? -refine : refine;
645
20.6k
      } else if (iy[1] > 0) {
646
10.5k
         iy[0] += refine;
647
10.5k
         iy[1] -= refine*(iy[0]>0?1:-1);
648
10.5k
      } else {
649
10.1k
         iy[0] -= refine;
650
10.1k
         iy[1] -= refine*(iy[0]>0?1:-1);
651
10.1k
      }
652
22.8k
#ifdef FIXED_POINT
653
22.8k
      Ryy = (iy[0]*(opus_val64)iy[0] + iy[1]*(opus_val64)iy[1] + (1<<2*yy_shift>>1)) >> 2*yy_shift;
654
#else
655
      Ryy = iy[0]*(opus_val64)iy[0] + iy[1]*(opus_val64)iy[1];
656
#endif
657
981k
   } else if (extra_bits >= 2) {
658
22.2k
      int i;
659
22.2k
      opus_val64 yy64;
660
22.2k
      VARDECL(int, refine);
661
22.2k
      int up, use_entropy;
662
22.2k
      int sign=0;
663
22.2k
      ALLOC(refine, N, int);
664
22.2k
      yy_shift = IMAX(0, extra_bits-7);
665
22.2k
      up = (1<<extra_bits)-1;
666
22.2k
      use_entropy = (ext_dec->storage*8 - ec_tell(ext_dec)) > (unsigned)(N-1)*(extra_bits+3)+1;
667
158k
      for (i=0;i<N-1;i++) refine[i] = ec_dec_refine(ext_dec, up, extra_bits, use_entropy);
668
22.2k
      if (iy[N-1]==0) sign = ec_dec_bits(ext_dec, 1);
669
17.3k
      else sign = iy[N-1] < 0;
670
158k
      for (i=0;i<N-1;i++) {
671
136k
         iy[i] = iy[i]*up + refine[i];
672
136k
      }
673
22.2k
      iy[N-1] = up*K;
674
158k
      for (i=0;i<N-1;i++) iy[N-1] -= abs(iy[i]);
675
22.2k
      if (sign) iy[N-1] = -iy[N-1];
676
22.2k
      yy64 = 0;
677
180k
      for (i=0;i<N;i++) yy64 += iy[i]*(opus_val64)iy[i];
678
22.2k
#ifdef FIXED_POINT
679
22.2k
      Ryy = (yy64 + (1<<2*yy_shift>>1)) >> 2*yy_shift;
680
#else
681
      Ryy = yy64;
682
#endif
683
22.2k
   }
684
1.00M
#endif
685
1.00M
   normalise_residual(iy, X, N, Ryy, gain, yy_shift);
686
1.00M
   exp_rotation(X, N, -1, B, K, spread);
687
1.00M
   collapse_mask = extract_collapse_mask(iy, N, B);
688
1.00M
   RESTORE_STACK;
689
1.00M
   return collapse_mask;
690
1.00M
}
691
692
#ifndef OVERRIDE_renormalise_vector
693
void renormalise_vector(celt_norm *X, int N, opus_val32 gain, int arch)
694
56.8M
{
695
56.8M
   int i;
696
#ifdef FIXED_POINT
697
   int k;
698
#endif
699
56.8M
   opus_val32 E;
700
56.8M
   opus_val16 g;
701
56.8M
   opus_val32 t;
702
56.8M
   celt_norm *xptr;
703
56.8M
   norm_scaledown(X, N, NORM_SHIFT-14);
704
56.8M
   E = EPSILON + celt_inner_prod_norm(X, X, N, arch);
705
#ifdef FIXED_POINT
706
   k = celt_ilog2(E)>>1;
707
#endif
708
56.8M
   t = VSHR32(E, 2*(k-7));
709
56.8M
   g = MULT32_32_Q31(celt_rsqrt_norm(t),gain);
710
711
56.8M
   xptr = X;
712
709M
   for (i=0;i<N;i++)
713
652M
   {
714
652M
      *xptr = EXTRACT16(PSHR32(MULT16_16(g, *xptr), k+15-14));
715
652M
      xptr++;
716
652M
   }
717
56.8M
   norm_scaleup(X, N, NORM_SHIFT-14);
718
   /*return celt_sqrt(E);*/
719
56.8M
}
renormalise_vector
Line
Count
Source
694
55.2M
{
695
55.2M
   int i;
696
55.2M
#ifdef FIXED_POINT
697
55.2M
   int k;
698
55.2M
#endif
699
55.2M
   opus_val32 E;
700
55.2M
   opus_val16 g;
701
55.2M
   opus_val32 t;
702
55.2M
   celt_norm *xptr;
703
55.2M
   norm_scaledown(X, N, NORM_SHIFT-14);
704
55.2M
   E = EPSILON + celt_inner_prod_norm(X, X, N, arch);
705
55.2M
#ifdef FIXED_POINT
706
55.2M
   k = celt_ilog2(E)>>1;
707
55.2M
#endif
708
55.2M
   t = VSHR32(E, 2*(k-7));
709
55.2M
   g = MULT32_32_Q31(celt_rsqrt_norm(t),gain);
710
711
55.2M
   xptr = X;
712
682M
   for (i=0;i<N;i++)
713
626M
   {
714
626M
      *xptr = EXTRACT16(PSHR32(MULT16_16(g, *xptr), k+15-14));
715
626M
      xptr++;
716
626M
   }
717
55.2M
   norm_scaleup(X, N, NORM_SHIFT-14);
718
   /*return celt_sqrt(E);*/
719
55.2M
}
renormalise_vector
Line
Count
Source
694
1.57M
{
695
1.57M
   int i;
696
#ifdef FIXED_POINT
697
   int k;
698
#endif
699
1.57M
   opus_val32 E;
700
1.57M
   opus_val16 g;
701
1.57M
   opus_val32 t;
702
1.57M
   celt_norm *xptr;
703
1.57M
   norm_scaledown(X, N, NORM_SHIFT-14);
704
1.57M
   E = EPSILON + celt_inner_prod_norm(X, X, N, arch);
705
#ifdef FIXED_POINT
706
   k = celt_ilog2(E)>>1;
707
#endif
708
1.57M
   t = VSHR32(E, 2*(k-7));
709
1.57M
   g = MULT32_32_Q31(celt_rsqrt_norm(t),gain);
710
711
1.57M
   xptr = X;
712
27.5M
   for (i=0;i<N;i++)
713
26.0M
   {
714
26.0M
      *xptr = EXTRACT16(PSHR32(MULT16_16(g, *xptr), k+15-14));
715
26.0M
      xptr++;
716
26.0M
   }
717
1.57M
   norm_scaleup(X, N, NORM_SHIFT-14);
718
   /*return celt_sqrt(E);*/
719
1.57M
}
720
#endif /* OVERRIDE_renormalise_vector */
721
722
opus_int32 stereo_itheta(const celt_norm *X, const celt_norm *Y, int stereo, int N, int arch)
723
909M
{
724
909M
   int i;
725
909M
   int itheta;
726
909M
   opus_val32 mid, side;
727
909M
   opus_val32 Emid, Eside;
728
729
909M
   Emid = Eside = 0;
730
909M
   if (stereo)
731
516M
   {
732
5.63G
      for (i=0;i<N;i++)
733
5.12G
      {
734
5.12G
         celt_norm m, s;
735
5.12G
         m = PSHR32(ADD32(X[i], Y[i]), NORM_SHIFT-13);
736
5.12G
         s = PSHR32(SUB32(X[i], Y[i]), NORM_SHIFT-13);
737
5.12G
         Emid = MAC16_16(Emid, m, m);
738
5.12G
         Eside = MAC16_16(Eside, s, s);
739
5.12G
      }
740
516M
   } else {
741
392M
      Emid += celt_inner_prod_norm_shift(X, X, N, arch);
742
392M
      Eside += celt_inner_prod_norm_shift(Y, Y, N, arch);
743
392M
   }
744
909M
   mid = celt_sqrt32(Emid);
745
909M
   side = celt_sqrt32(Eside);
746
#if defined(FIXED_POINT)
747
   itheta = celt_atan2p_norm(side, mid);
748
#else
749
   itheta = (int)floor(.5f+65536.f*16384*celt_atan2p_norm(side,mid));
750
#endif
751
752
909M
   return itheta;
753
909M
}
stereo_itheta
Line
Count
Source
723
454M
{
724
454M
   int i;
725
454M
   int itheta;
726
454M
   opus_val32 mid, side;
727
454M
   opus_val32 Emid, Eside;
728
729
454M
   Emid = Eside = 0;
730
454M
   if (stereo)
731
258M
   {
732
2.81G
      for (i=0;i<N;i++)
733
2.56G
      {
734
2.56G
         celt_norm m, s;
735
2.56G
         m = PSHR32(ADD32(X[i], Y[i]), NORM_SHIFT-13);
736
2.56G
         s = PSHR32(SUB32(X[i], Y[i]), NORM_SHIFT-13);
737
2.56G
         Emid = MAC16_16(Emid, m, m);
738
2.56G
         Eside = MAC16_16(Eside, s, s);
739
2.56G
      }
740
258M
   } else {
741
196M
      Emid += celt_inner_prod_norm_shift(X, X, N, arch);
742
196M
      Eside += celt_inner_prod_norm_shift(Y, Y, N, arch);
743
196M
   }
744
454M
   mid = celt_sqrt32(Emid);
745
454M
   side = celt_sqrt32(Eside);
746
454M
#if defined(FIXED_POINT)
747
454M
   itheta = celt_atan2p_norm(side, mid);
748
#else
749
   itheta = (int)floor(.5f+65536.f*16384*celt_atan2p_norm(side,mid));
750
#endif
751
752
454M
   return itheta;
753
454M
}
stereo_itheta
Line
Count
Source
723
454M
{
724
454M
   int i;
725
454M
   int itheta;
726
454M
   opus_val32 mid, side;
727
454M
   opus_val32 Emid, Eside;
728
729
454M
   Emid = Eside = 0;
730
454M
   if (stereo)
731
258M
   {
732
2.81G
      for (i=0;i<N;i++)
733
2.56G
      {
734
2.56G
         celt_norm m, s;
735
2.56G
         m = PSHR32(ADD32(X[i], Y[i]), NORM_SHIFT-13);
736
2.56G
         s = PSHR32(SUB32(X[i], Y[i]), NORM_SHIFT-13);
737
2.56G
         Emid = MAC16_16(Emid, m, m);
738
2.56G
         Eside = MAC16_16(Eside, s, s);
739
2.56G
      }
740
258M
   } else {
741
196M
      Emid += celt_inner_prod_norm_shift(X, X, N, arch);
742
196M
      Eside += celt_inner_prod_norm_shift(Y, Y, N, arch);
743
196M
   }
744
454M
   mid = celt_sqrt32(Emid);
745
454M
   side = celt_sqrt32(Eside);
746
#if defined(FIXED_POINT)
747
   itheta = celt_atan2p_norm(side, mid);
748
#else
749
454M
   itheta = (int)floor(.5f+65536.f*16384*celt_atan2p_norm(side,mid));
750
454M
#endif
751
752
454M
   return itheta;
753
454M
}
754
755
#ifdef ENABLE_QEXT
756
757
56.8k
static void cubic_synthesis(celt_norm *X, int *iy, int N, int K, int face, int sign, opus_val32 gain) {
758
56.8k
   int i;
759
56.8k
   opus_val32 sum=0;
760
56.8k
   opus_val32 mag;
761
#ifdef FIXED_POINT
762
   int sum_shift;
763
23.2k
   int shift = IMAX(celt_ilog2(K) + celt_ilog2(N)/2 - 13, 0);
764
#endif
765
664k
   for (i=0;i<N;i++) {
766
608k
      X[i] = (1+2*iy[i])-K;
767
608k
   }
768
56.8k
   X[face] = sign ? -K : K;
769
664k
   for (i=0;i<N;i++) {
770
608k
      sum += PSHR32(MULT16_16(X[i],X[i]), 2*shift);
771
608k
   }
772
#ifdef FIXED_POINT
773
   sum_shift = (29-celt_ilog2(sum))>>1;
774
23.2k
   mag = celt_rsqrt_norm32(SHL32(sum, 2*sum_shift+1));
775
272k
   for (i=0;i<N;i++) {
776
248k
      X[i] = VSHR32(MULT16_32_Q15(X[i],MULT32_32_Q31(mag,gain)), shift-sum_shift+29-NORM_SHIFT);
777
248k
   }
778
#else
779
   mag = 1.f/sqrt(sum);
780
392k
   for (i=0;i<N;i++) {
781
359k
      X[i] *= mag*gain;
782
359k
   }
783
#endif
784
56.8k
}
vq.c:cubic_synthesis
Line
Count
Source
757
23.2k
static void cubic_synthesis(celt_norm *X, int *iy, int N, int K, int face, int sign, opus_val32 gain) {
758
23.2k
   int i;
759
23.2k
   opus_val32 sum=0;
760
23.2k
   opus_val32 mag;
761
23.2k
#ifdef FIXED_POINT
762
23.2k
   int sum_shift;
763
23.2k
   int shift = IMAX(celt_ilog2(K) + celt_ilog2(N)/2 - 13, 0);
764
23.2k
#endif
765
272k
   for (i=0;i<N;i++) {
766
248k
      X[i] = (1+2*iy[i])-K;
767
248k
   }
768
23.2k
   X[face] = sign ? -K : K;
769
272k
   for (i=0;i<N;i++) {
770
248k
      sum += PSHR32(MULT16_16(X[i],X[i]), 2*shift);
771
248k
   }
772
23.2k
#ifdef FIXED_POINT
773
23.2k
   sum_shift = (29-celt_ilog2(sum))>>1;
774
23.2k
   mag = celt_rsqrt_norm32(SHL32(sum, 2*sum_shift+1));
775
272k
   for (i=0;i<N;i++) {
776
248k
      X[i] = VSHR32(MULT16_32_Q15(X[i],MULT32_32_Q31(mag,gain)), shift-sum_shift+29-NORM_SHIFT);
777
248k
   }
778
#else
779
   mag = 1.f/sqrt(sum);
780
   for (i=0;i<N;i++) {
781
      X[i] *= mag*gain;
782
   }
783
#endif
784
23.2k
}
vq.c:cubic_synthesis
Line
Count
Source
757
33.5k
static void cubic_synthesis(celt_norm *X, int *iy, int N, int K, int face, int sign, opus_val32 gain) {
758
33.5k
   int i;
759
33.5k
   opus_val32 sum=0;
760
33.5k
   opus_val32 mag;
761
#ifdef FIXED_POINT
762
   int sum_shift;
763
   int shift = IMAX(celt_ilog2(K) + celt_ilog2(N)/2 - 13, 0);
764
#endif
765
392k
   for (i=0;i<N;i++) {
766
359k
      X[i] = (1+2*iy[i])-K;
767
359k
   }
768
33.5k
   X[face] = sign ? -K : K;
769
392k
   for (i=0;i<N;i++) {
770
359k
      sum += PSHR32(MULT16_16(X[i],X[i]), 2*shift);
771
359k
   }
772
#ifdef FIXED_POINT
773
   sum_shift = (29-celt_ilog2(sum))>>1;
774
   mag = celt_rsqrt_norm32(SHL32(sum, 2*sum_shift+1));
775
   for (i=0;i<N;i++) {
776
      X[i] = VSHR32(MULT16_32_Q15(X[i],MULT32_32_Q31(mag,gain)), shift-sum_shift+29-NORM_SHIFT);
777
   }
778
#else
779
33.5k
   mag = 1.f/sqrt(sum);
780
392k
   for (i=0;i<N;i++) {
781
359k
      X[i] *= mag*gain;
782
359k
   }
783
33.5k
#endif
784
33.5k
}
785
786
298k
unsigned cubic_quant(celt_norm *X, int N, int res, int B, ec_enc *enc, opus_val32 gain, int resynth) {
787
298k
   int i;
788
298k
   int face=0;
789
298k
   int K;
790
298k
   VARDECL(int, iy);
791
298k
   celt_norm faceval=-1;
792
298k
   opus_val32 norm;
793
298k
   int sign;
794
298k
   SAVE_STACK;
795
298k
   ALLOC(iy, N, int);
796
298k
   K = 1<<res;
797
   /* Using odd K on transients to avoid adding pre-echo. */
798
298k
   if (B!=1) K=IMAX(1, K-1);
799
298k
   if (K==1) {
800
62
      if (resynth) OPUS_CLEAR(X, N);
801
62
      RESTORE_STACK;
802
62
      return 0;
803
62
   }
804
5.09M
   for (i=0;i<N;i++) {
805
4.80M
      if (ABS32(X[i]) > faceval) {
806
298k
         faceval = ABS32(X[i]);
807
298k
         face = i;
808
298k
      }
809
4.80M
   }
810
298k
   sign = X[face]<0;
811
298k
   ec_enc_uint(enc, face, N);
812
298k
   ec_enc_bits(enc, sign, 1);
813
#ifdef FIXED_POINT
814
298k
   if (faceval != 0) {
815
928
      int face_shift = 30-celt_ilog2(faceval);
816
928
      norm = celt_rcp_norm32(SHL32(faceval, face_shift));
817
928
      norm = MULT16_32_Q15(K, norm);
818
5.42k
      for (i=0;i<N;i++) {
819
         /* By computing X[i]+faceval inside the shift, the result is guaranteed non-negative. */
820
4.49k
         iy[i] = IMIN(K-1, (MULT32_32_Q31(SHL32(X[i]+faceval, face_shift-1), norm)) >> 15);
821
4.49k
      }
822
297k
   } else {
823
297k
      OPUS_CLEAR(iy, N);
824
297k
   }
825
#else
826
0
   norm = .5f*K/(faceval+EPSILON);
827
0
   for (i=0;i<N;i++) {
828
0
      iy[i] = IMIN(K-1, (int)floor((X[i]+faceval)*norm));
829
0
   }
830
#endif
831
5.09M
   for (i=0;i<N;i++) {
832
4.80M
      if (i != face) ec_enc_bits(enc, iy[i], res);
833
4.80M
   }
834
298k
   if (resynth) {
835
0
      cubic_synthesis(X, iy, N, K, face, sign, gain);
836
0
   }
837
298k
   RESTORE_STACK;
838
298k
   return (1<<B)-1;
839
298k
}
cubic_quant
Line
Count
Source
786
298k
unsigned cubic_quant(celt_norm *X, int N, int res, int B, ec_enc *enc, opus_val32 gain, int resynth) {
787
298k
   int i;
788
298k
   int face=0;
789
298k
   int K;
790
298k
   VARDECL(int, iy);
791
298k
   celt_norm faceval=-1;
792
298k
   opus_val32 norm;
793
298k
   int sign;
794
298k
   SAVE_STACK;
795
298k
   ALLOC(iy, N, int);
796
298k
   K = 1<<res;
797
   /* Using odd K on transients to avoid adding pre-echo. */
798
298k
   if (B!=1) K=IMAX(1, K-1);
799
298k
   if (K==1) {
800
62
      if (resynth) OPUS_CLEAR(X, N);
801
62
      RESTORE_STACK;
802
62
      return 0;
803
62
   }
804
5.09M
   for (i=0;i<N;i++) {
805
4.80M
      if (ABS32(X[i]) > faceval) {
806
298k
         faceval = ABS32(X[i]);
807
298k
         face = i;
808
298k
      }
809
4.80M
   }
810
298k
   sign = X[face]<0;
811
298k
   ec_enc_uint(enc, face, N);
812
298k
   ec_enc_bits(enc, sign, 1);
813
298k
#ifdef FIXED_POINT
814
298k
   if (faceval != 0) {
815
928
      int face_shift = 30-celt_ilog2(faceval);
816
928
      norm = celt_rcp_norm32(SHL32(faceval, face_shift));
817
928
      norm = MULT16_32_Q15(K, norm);
818
5.42k
      for (i=0;i<N;i++) {
819
         /* By computing X[i]+faceval inside the shift, the result is guaranteed non-negative. */
820
4.49k
         iy[i] = IMIN(K-1, (MULT32_32_Q31(SHL32(X[i]+faceval, face_shift-1), norm)) >> 15);
821
4.49k
      }
822
297k
   } else {
823
297k
      OPUS_CLEAR(iy, N);
824
297k
   }
825
#else
826
   norm = .5f*K/(faceval+EPSILON);
827
   for (i=0;i<N;i++) {
828
      iy[i] = IMIN(K-1, (int)floor((X[i]+faceval)*norm));
829
   }
830
#endif
831
5.09M
   for (i=0;i<N;i++) {
832
4.80M
      if (i != face) ec_enc_bits(enc, iy[i], res);
833
4.80M
   }
834
298k
   if (resynth) {
835
0
      cubic_synthesis(X, iy, N, K, face, sign, gain);
836
0
   }
837
298k
   RESTORE_STACK;
838
298k
   return (1<<B)-1;
839
298k
}
Unexecuted instantiation: cubic_quant
840
841
59.9k
unsigned cubic_unquant(celt_norm *X, int N, int res, int B, ec_dec *dec, opus_val32 gain) {
842
59.9k
   int i;
843
59.9k
   int face;
844
59.9k
   int sign;
845
59.9k
   int K;
846
59.9k
   VARDECL(int, iy);
847
59.9k
   SAVE_STACK;
848
59.9k
   ALLOC(iy, N, int);
849
59.9k
   K = 1<<res;
850
   /* Using odd K on transients to avoid adding pre-echo. */
851
59.9k
   if (B!=1) K=IMAX(1, K-1);
852
59.9k
   if (K==1) {
853
3.16k
      OPUS_CLEAR(X, N);
854
3.16k
      RESTORE_STACK;
855
3.16k
      return 0;
856
3.16k
   }
857
56.8k
   face = ec_dec_uint(dec, N);
858
56.8k
   sign = ec_dec_bits(dec, 1);
859
664k
   for (i=0;i<N;i++) {
860
608k
      if (i != face) iy[i] = ec_dec_bits(dec, res);
861
608k
   }
862
56.8k
   iy[face]=0;
863
56.8k
   cubic_synthesis(X, iy, N, K, face, sign, gain);
864
56.8k
   RESTORE_STACK;
865
56.8k
   return (1<<B)-1;
866
59.9k
}
867
#endif