Coverage Report

Created: 2025-11-11 06:38

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/speex/libspeex/cb_search.c
Line
Count
Source
1
/* Copyright (C) 2002-2006 Jean-Marc Valin
2
   File: cb_search.c
3
4
   Redistribution and use in source and binary forms, with or without
5
   modification, are permitted provided that the following conditions
6
   are met:
7
8
   - Redistributions of source code must retain the above copyright
9
   notice, this list of conditions and the following disclaimer.
10
11
   - Redistributions in binary form must reproduce the above copyright
12
   notice, this list of conditions and the following disclaimer in the
13
   documentation and/or other materials provided with the distribution.
14
15
   - Neither the name of the Xiph.org Foundation nor the names of its
16
   contributors may be used to endorse or promote products derived from
17
   this software without specific prior written permission.
18
19
   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20
   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21
   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22
   A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR
23
   CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
24
   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
25
   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
26
   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
27
   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
28
   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
29
   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30
*/
31
32
#ifdef HAVE_CONFIG_H
33
#include "config.h"
34
#endif
35
36
#include "cb_search.h"
37
#include "filters.h"
38
#include "stack_alloc.h"
39
#include "vq.h"
40
#include "arch.h"
41
#include "math_approx.h"
42
#include "os_support.h"
43
44
#ifdef _USE_SSE
45
#include "cb_search_sse.h"
46
#elif defined(ARM4_ASM) || defined(ARM5E_ASM)
47
#include "cb_search_arm4.h"
48
#elif defined(BFIN_ASM)
49
#include "cb_search_bfin.h"
50
#endif
51
52
#ifndef DISABLE_ENCODER
53
54
#ifndef OVERRIDE_COMPUTE_WEIGHTED_CODEBOOK
55
static void compute_weighted_codebook(const signed char *shape_cb, const spx_word16_t *r, spx_word16_t *resp, spx_word16_t *resp2, spx_word32_t *E, int shape_cb_size, int subvect_size, char *stack)
56
46.2k
{
57
46.2k
   int i, j, k;
58
46.2k
   VARDECL(spx_word16_t *shape);
59
46.2k
   ALLOC(shape, subvect_size, spx_word16_t);
60
4.18M
   for (i=0;i<shape_cb_size;i++)
61
4.13M
   {
62
4.13M
      spx_word16_t *res;
63
64
4.13M
      res = resp+i*subvect_size;
65
34.5M
      for (k=0;k<subvect_size;k++)
66
30.4M
         shape[k] = (spx_word16_t)shape_cb[i*subvect_size+k];
67
4.13M
      E[i]=0;
68
69
      /* Compute codeword response using convolution with impulse response */
70
34.5M
      for(j=0;j<subvect_size;j++)
71
30.4M
      {
72
30.4M
         spx_word32_t resj=0;
73
30.4M
         spx_word16_t res16;
74
185M
         for (k=0;k<=j;k++)
75
154M
            resj = MAC16_16(resj,shape[k],r[j-k]);
76
30.4M
#ifdef FIXED_POINT
77
30.4M
         res16 = EXTRACT16(SHR32(resj, 13));
78
#else
79
         res16 = 0.03125f*resj;
80
#endif
81
         /* Compute codeword energy */
82
30.4M
         E[i]=MAC16_16(E[i],res16,res16);
83
30.4M
         res[j] = res16;
84
         /*printf ("%d\n", (int)res[j]);*/
85
30.4M
      }
86
4.13M
   }
87
88
46.2k
}
89
#endif
90
91
#ifndef OVERRIDE_TARGET_UPDATE
92
static inline void target_update(spx_word16_t *t, spx_word16_t g, spx_word16_t *r, int len)
93
17.0M
{
94
17.0M
   int n;
95
277M
   for (n=0;n<len;n++)
96
260M
      t[n] = SUB16(t[n],PSHR32(MULT16_16(g,r[n]),13));
97
17.0M
}
cb_search.c:target_update
Line
Count
Source
93
8.51M
{
94
8.51M
   int n;
95
138M
   for (n=0;n<len;n++)
96
130M
      t[n] = SUB16(t[n],PSHR32(MULT16_16(g,r[n]),13));
97
8.51M
}
cb_search.c:target_update
Line
Count
Source
93
8.51M
{
94
8.51M
   int n;
95
138M
   for (n=0;n<len;n++)
96
130M
      t[n] = SUB16(t[n],PSHR32(MULT16_16(g,r[n]),13));
97
8.51M
}
98
#endif
99
100
101
102
static void split_cb_search_shape_sign_N1(
103
spx_word16_t target[],      /* target vector */
104
spx_coef_t ak[],      /* LPCs for this subframe */
105
spx_coef_t awk1[],      /* Weighted LPCs for this subframe */
106
spx_coef_t awk2[],      /* Weighted LPCs for this subframe */
107
const void *par,                      /* Codebook/search parameters*/
108
int   p,                        /* number of LPC coeffs */
109
int   nsf,                      /* number of samples in subframe */
110
spx_sig_t *exc,
111
spx_word16_t *r,
112
SpeexBits *bits,
113
char *stack,
114
int   update_target
115
)
116
19.9k
{
117
19.9k
   int i,j,m,q;
118
19.9k
   VARDECL(spx_word16_t *resp);
119
#ifdef _USE_SSE
120
   VARDECL(__m128 *resp2);
121
   VARDECL(__m128 *E);
122
#else
123
   spx_word16_t *resp2;
124
   VARDECL(spx_word32_t *E);
125
#endif
126
19.9k
   VARDECL(spx_word16_t *t);
127
19.9k
   VARDECL(spx_sig_t *e);
128
19.9k
   const signed char *shape_cb;
129
19.9k
   int shape_cb_size, subvect_size, nb_subvect;
130
19.9k
   const split_cb_params *params;
131
19.9k
   int best_index;
132
19.9k
   spx_word32_t best_dist;
133
19.9k
   int have_sign;
134
135
19.9k
   params = (const split_cb_params *) par;
136
19.9k
   subvect_size = params->subvect_size;
137
19.9k
   nb_subvect = params->nb_subvect;
138
19.9k
   shape_cb_size = 1<<params->shape_bits;
139
19.9k
   shape_cb = params->shape_cb;
140
19.9k
   have_sign = params->have_sign;
141
19.9k
   ALLOC(resp, shape_cb_size*subvect_size, spx_word16_t);
142
#ifdef _USE_SSE
143
5.54k
   ALLOC(resp2, (shape_cb_size*subvect_size)>>2, __m128);
144
5.54k
   ALLOC(E, shape_cb_size>>2, __m128);
145
#else
146
   resp2 = resp;
147
14.4k
   ALLOC(E, shape_cb_size, spx_word32_t);
148
#endif
149
19.9k
   ALLOC(t, nsf, spx_word16_t);
150
19.9k
   ALLOC(e, nsf, spx_sig_t);
151
152
   /* FIXME: Do we still need to copy the target? */
153
19.9k
   SPEEX_COPY(t, target, nsf);
154
155
19.9k
   compute_weighted_codebook(shape_cb, r, resp, resp2, E, shape_cb_size, subvect_size, stack);
156
157
120k
   for (i=0;i<nb_subvect;i++)
158
100k
   {
159
100k
      spx_word16_t *x=t+subvect_size*i;
160
      /*Find new n-best based on previous n-best j*/
161
100k
#ifndef DISABLE_WIDEBAND
162
100k
      if (have_sign)
163
13.9k
         vq_nbest_sign(x, resp2, subvect_size, shape_cb_size, E, 1, &best_index, &best_dist, stack);
164
86.2k
      else
165
86.2k
#endif /* DISABLE_WIDEBAND */
166
86.2k
         vq_nbest(x, resp2, subvect_size, shape_cb_size, E, 1, &best_index, &best_dist, stack);
167
168
100k
      speex_bits_pack(bits,best_index,params->shape_bits+have_sign);
169
170
100k
      {
171
100k
         int rind;
172
100k
         spx_word16_t *res;
173
100k
         spx_word16_t sign=1;
174
100k
         rind = best_index;
175
100k
         if (rind>=shape_cb_size)
176
10.6k
         {
177
10.6k
            sign=-1;
178
10.6k
            rind-=shape_cb_size;
179
10.6k
         }
180
100k
         res = resp+rind*subvect_size;
181
100k
         if (sign>0)
182
803k
            for (m=0;m<subvect_size;m++)
183
714k
               t[subvect_size*i+m] = SUB16(t[subvect_size*i+m], res[m]);
184
10.6k
         else
185
95.6k
            for (m=0;m<subvect_size;m++)
186
85.0k
               t[subvect_size*i+m] = ADD16(t[subvect_size*i+m], res[m]);
187
188
#ifdef FIXED_POINT
189
73.3k
         if (sign==1)
190
66.2k
         {
191
587k
            for (j=0;j<subvect_size;j++)
192
520k
               e[subvect_size*i+j]=SHL32(EXTEND32(shape_cb[rind*subvect_size+j]),SIG_SHIFT-5);
193
66.2k
         } else {
194
63.9k
            for (j=0;j<subvect_size;j++)
195
56.8k
               e[subvect_size*i+j]=NEG32(SHL32(EXTEND32(shape_cb[rind*subvect_size+j]),SIG_SHIFT-5));
196
7.10k
         }
197
#else
198
248k
         for (j=0;j<subvect_size;j++)
199
221k
            e[subvect_size*i+j]=sign*0.03125*shape_cb[rind*subvect_size+j];
200
#endif
201
202
100k
      }
203
204
899k
      for (m=0;m<subvect_size;m++)
205
799k
      {
206
799k
         spx_word16_t g;
207
799k
         int rind;
208
799k
         spx_word16_t sign=1;
209
799k
         rind = best_index;
210
799k
         if (rind>=shape_cb_size)
211
85.0k
         {
212
85.0k
            sign=-1;
213
85.0k
            rind-=shape_cb_size;
214
85.0k
         }
215
216
799k
         q=subvect_size-m;
217
#ifdef FIXED_POINT
218
         g=sign*shape_cb[rind*subvect_size+m];
219
#else
220
         g=sign*0.03125*shape_cb[rind*subvect_size+m];
221
#endif
222
799k
         target_update(t+subvect_size*(i+1), g, r+q, nsf-subvect_size*(i+1));
223
799k
      }
224
100k
   }
225
226
   /* Update excitation */
227
   /* FIXME: We could update the excitation directly above */
228
819k
   for (j=0;j<nsf;j++)
229
799k
      exc[j]=ADD32(exc[j],e[j]);
230
231
   /* Update target: only update target if necessary */
232
19.9k
   if (update_target)
233
1.88k
   {
234
1.88k
      VARDECL(spx_word16_t *r2);
235
1.88k
      ALLOC(r2, nsf, spx_word16_t);
236
77.4k
      for (j=0;j<nsf;j++)
237
75.5k
         r2[j] = EXTRACT16(PSHR32(e[j] ,6));
238
1.88k
      syn_percep_zero16(r2, ak, awk1, awk2, r2, nsf,p, stack);
239
77.4k
      for (j=0;j<nsf;j++)
240
75.5k
         target[j]=SUB16(target[j],PSHR16(r2[j],2));
241
1.88k
   }
242
19.9k
}
cb_search.c:split_cb_search_shape_sign_N1
Line
Count
Source
116
5.54k
{
117
5.54k
   int i,j,m,q;
118
5.54k
   VARDECL(spx_word16_t *resp);
119
5.54k
#ifdef _USE_SSE
120
5.54k
   VARDECL(__m128 *resp2);
121
5.54k
   VARDECL(__m128 *E);
122
#else
123
   spx_word16_t *resp2;
124
   VARDECL(spx_word32_t *E);
125
#endif
126
5.54k
   VARDECL(spx_word16_t *t);
127
5.54k
   VARDECL(spx_sig_t *e);
128
5.54k
   const signed char *shape_cb;
129
5.54k
   int shape_cb_size, subvect_size, nb_subvect;
130
5.54k
   const split_cb_params *params;
131
5.54k
   int best_index;
132
5.54k
   spx_word32_t best_dist;
133
5.54k
   int have_sign;
134
135
5.54k
   params = (const split_cb_params *) par;
136
5.54k
   subvect_size = params->subvect_size;
137
5.54k
   nb_subvect = params->nb_subvect;
138
5.54k
   shape_cb_size = 1<<params->shape_bits;
139
5.54k
   shape_cb = params->shape_cb;
140
5.54k
   have_sign = params->have_sign;
141
5.54k
   ALLOC(resp, shape_cb_size*subvect_size, spx_word16_t);
142
5.54k
#ifdef _USE_SSE
143
5.54k
   ALLOC(resp2, (shape_cb_size*subvect_size)>>2, __m128);
144
5.54k
   ALLOC(E, shape_cb_size>>2, __m128);
145
#else
146
   resp2 = resp;
147
   ALLOC(E, shape_cb_size, spx_word32_t);
148
#endif
149
5.54k
   ALLOC(t, nsf, spx_word16_t);
150
5.54k
   ALLOC(e, nsf, spx_sig_t);
151
152
   /* FIXME: Do we still need to copy the target? */
153
5.54k
   SPEEX_COPY(t, target, nsf);
154
155
5.54k
   compute_weighted_codebook(shape_cb, r, resp, resp2, E, shape_cb_size, subvect_size, stack);
156
157
32.4k
   for (i=0;i<nb_subvect;i++)
158
26.8k
   {
159
26.8k
      spx_word16_t *x=t+subvect_size*i;
160
      /*Find new n-best based on previous n-best j*/
161
26.8k
#ifndef DISABLE_WIDEBAND
162
26.8k
      if (have_sign)
163
3.90k
         vq_nbest_sign(x, resp2, subvect_size, shape_cb_size, E, 1, &best_index, &best_dist, stack);
164
22.9k
      else
165
22.9k
#endif /* DISABLE_WIDEBAND */
166
22.9k
         vq_nbest(x, resp2, subvect_size, shape_cb_size, E, 1, &best_index, &best_dist, stack);
167
168
26.8k
      speex_bits_pack(bits,best_index,params->shape_bits+have_sign);
169
170
26.8k
      {
171
26.8k
         int rind;
172
26.8k
         spx_word16_t *res;
173
26.8k
         spx_word16_t sign=1;
174
26.8k
         rind = best_index;
175
26.8k
         if (rind>=shape_cb_size)
176
3.52k
         {
177
3.52k
            sign=-1;
178
3.52k
            rind-=shape_cb_size;
179
3.52k
         }
180
26.8k
         res = resp+rind*subvect_size;
181
26.8k
         if (sign>0)
182
216k
            for (m=0;m<subvect_size;m++)
183
193k
               t[subvect_size*i+m] = SUB16(t[subvect_size*i+m], res[m]);
184
3.52k
         else
185
31.7k
            for (m=0;m<subvect_size;m++)
186
28.2k
               t[subvect_size*i+m] = ADD16(t[subvect_size*i+m], res[m]);
187
188
#ifdef FIXED_POINT
189
         if (sign==1)
190
         {
191
            for (j=0;j<subvect_size;j++)
192
               e[subvect_size*i+j]=SHL32(EXTEND32(shape_cb[rind*subvect_size+j]),SIG_SHIFT-5);
193
         } else {
194
            for (j=0;j<subvect_size;j++)
195
               e[subvect_size*i+j]=NEG32(SHL32(EXTEND32(shape_cb[rind*subvect_size+j]),SIG_SHIFT-5));
196
         }
197
#else
198
248k
         for (j=0;j<subvect_size;j++)
199
221k
            e[subvect_size*i+j]=sign*0.03125*shape_cb[rind*subvect_size+j];
200
26.8k
#endif
201
202
26.8k
      }
203
204
248k
      for (m=0;m<subvect_size;m++)
205
221k
      {
206
221k
         spx_word16_t g;
207
221k
         int rind;
208
221k
         spx_word16_t sign=1;
209
221k
         rind = best_index;
210
221k
         if (rind>=shape_cb_size)
211
28.2k
         {
212
28.2k
            sign=-1;
213
28.2k
            rind-=shape_cb_size;
214
28.2k
         }
215
216
221k
         q=subvect_size-m;
217
#ifdef FIXED_POINT
218
         g=sign*shape_cb[rind*subvect_size+m];
219
#else
220
221k
         g=sign*0.03125*shape_cb[rind*subvect_size+m];
221
221k
#endif
222
221k
         target_update(t+subvect_size*(i+1), g, r+q, nsf-subvect_size*(i+1));
223
221k
      }
224
26.8k
   }
225
226
   /* Update excitation */
227
   /* FIXME: We could update the excitation directly above */
228
227k
   for (j=0;j<nsf;j++)
229
221k
      exc[j]=ADD32(exc[j],e[j]);
230
231
   /* Update target: only update target if necessary */
232
5.54k
   if (update_target)
233
460
   {
234
460
      VARDECL(spx_word16_t *r2);
235
460
      ALLOC(r2, nsf, spx_word16_t);
236
18.8k
      for (j=0;j<nsf;j++)
237
18.4k
         r2[j] = EXTRACT16(PSHR32(e[j] ,6));
238
460
      syn_percep_zero16(r2, ak, awk1, awk2, r2, nsf,p, stack);
239
18.8k
      for (j=0;j<nsf;j++)
240
18.4k
         target[j]=SUB16(target[j],PSHR16(r2[j],2));
241
460
   }
242
5.54k
}
cb_search.c:split_cb_search_shape_sign_N1
Line
Count
Source
116
14.4k
{
117
14.4k
   int i,j,m,q;
118
14.4k
   VARDECL(spx_word16_t *resp);
119
#ifdef _USE_SSE
120
   VARDECL(__m128 *resp2);
121
   VARDECL(__m128 *E);
122
#else
123
14.4k
   spx_word16_t *resp2;
124
14.4k
   VARDECL(spx_word32_t *E);
125
14.4k
#endif
126
14.4k
   VARDECL(spx_word16_t *t);
127
14.4k
   VARDECL(spx_sig_t *e);
128
14.4k
   const signed char *shape_cb;
129
14.4k
   int shape_cb_size, subvect_size, nb_subvect;
130
14.4k
   const split_cb_params *params;
131
14.4k
   int best_index;
132
14.4k
   spx_word32_t best_dist;
133
14.4k
   int have_sign;
134
135
14.4k
   params = (const split_cb_params *) par;
136
14.4k
   subvect_size = params->subvect_size;
137
14.4k
   nb_subvect = params->nb_subvect;
138
14.4k
   shape_cb_size = 1<<params->shape_bits;
139
14.4k
   shape_cb = params->shape_cb;
140
14.4k
   have_sign = params->have_sign;
141
14.4k
   ALLOC(resp, shape_cb_size*subvect_size, spx_word16_t);
142
#ifdef _USE_SSE
143
   ALLOC(resp2, (shape_cb_size*subvect_size)>>2, __m128);
144
   ALLOC(E, shape_cb_size>>2, __m128);
145
#else
146
14.4k
   resp2 = resp;
147
14.4k
   ALLOC(E, shape_cb_size, spx_word32_t);
148
14.4k
#endif
149
14.4k
   ALLOC(t, nsf, spx_word16_t);
150
14.4k
   ALLOC(e, nsf, spx_sig_t);
151
152
   /* FIXME: Do we still need to copy the target? */
153
14.4k
   SPEEX_COPY(t, target, nsf);
154
155
14.4k
   compute_weighted_codebook(shape_cb, r, resp, resp2, E, shape_cb_size, subvect_size, stack);
156
157
87.7k
   for (i=0;i<nb_subvect;i++)
158
73.3k
   {
159
73.3k
      spx_word16_t *x=t+subvect_size*i;
160
      /*Find new n-best based on previous n-best j*/
161
73.3k
#ifndef DISABLE_WIDEBAND
162
73.3k
      if (have_sign)
163
10.0k
         vq_nbest_sign(x, resp2, subvect_size, shape_cb_size, E, 1, &best_index, &best_dist, stack);
164
63.2k
      else
165
63.2k
#endif /* DISABLE_WIDEBAND */
166
63.2k
         vq_nbest(x, resp2, subvect_size, shape_cb_size, E, 1, &best_index, &best_dist, stack);
167
168
73.3k
      speex_bits_pack(bits,best_index,params->shape_bits+have_sign);
169
170
73.3k
      {
171
73.3k
         int rind;
172
73.3k
         spx_word16_t *res;
173
73.3k
         spx_word16_t sign=1;
174
73.3k
         rind = best_index;
175
73.3k
         if (rind>=shape_cb_size)
176
7.10k
         {
177
7.10k
            sign=-1;
178
7.10k
            rind-=shape_cb_size;
179
7.10k
         }
180
73.3k
         res = resp+rind*subvect_size;
181
73.3k
         if (sign>0)
182
587k
            for (m=0;m<subvect_size;m++)
183
520k
               t[subvect_size*i+m] = SUB16(t[subvect_size*i+m], res[m]);
184
7.10k
         else
185
63.9k
            for (m=0;m<subvect_size;m++)
186
56.8k
               t[subvect_size*i+m] = ADD16(t[subvect_size*i+m], res[m]);
187
188
73.3k
#ifdef FIXED_POINT
189
73.3k
         if (sign==1)
190
66.2k
         {
191
587k
            for (j=0;j<subvect_size;j++)
192
520k
               e[subvect_size*i+j]=SHL32(EXTEND32(shape_cb[rind*subvect_size+j]),SIG_SHIFT-5);
193
66.2k
         } else {
194
63.9k
            for (j=0;j<subvect_size;j++)
195
56.8k
               e[subvect_size*i+j]=NEG32(SHL32(EXTEND32(shape_cb[rind*subvect_size+j]),SIG_SHIFT-5));
196
7.10k
         }
197
#else
198
         for (j=0;j<subvect_size;j++)
199
            e[subvect_size*i+j]=sign*0.03125*shape_cb[rind*subvect_size+j];
200
#endif
201
202
73.3k
      }
203
204
651k
      for (m=0;m<subvect_size;m++)
205
577k
      {
206
577k
         spx_word16_t g;
207
577k
         int rind;
208
577k
         spx_word16_t sign=1;
209
577k
         rind = best_index;
210
577k
         if (rind>=shape_cb_size)
211
56.8k
         {
212
56.8k
            sign=-1;
213
56.8k
            rind-=shape_cb_size;
214
56.8k
         }
215
216
577k
         q=subvect_size-m;
217
577k
#ifdef FIXED_POINT
218
577k
         g=sign*shape_cb[rind*subvect_size+m];
219
#else
220
         g=sign*0.03125*shape_cb[rind*subvect_size+m];
221
#endif
222
577k
         target_update(t+subvect_size*(i+1), g, r+q, nsf-subvect_size*(i+1));
223
577k
      }
224
73.3k
   }
225
226
   /* Update excitation */
227
   /* FIXME: We could update the excitation directly above */
228
592k
   for (j=0;j<nsf;j++)
229
577k
      exc[j]=ADD32(exc[j],e[j]);
230
231
   /* Update target: only update target if necessary */
232
14.4k
   if (update_target)
233
1.42k
   {
234
1.42k
      VARDECL(spx_word16_t *r2);
235
1.42k
      ALLOC(r2, nsf, spx_word16_t);
236
58.5k
      for (j=0;j<nsf;j++)
237
57.1k
         r2[j] = EXTRACT16(PSHR32(e[j] ,6));
238
1.42k
      syn_percep_zero16(r2, ak, awk1, awk2, r2, nsf,p, stack);
239
58.5k
      for (j=0;j<nsf;j++)
240
57.1k
         target[j]=SUB16(target[j],PSHR16(r2[j],2));
241
1.42k
   }
242
14.4k
}
243
244
245
246
void split_cb_search_shape_sign(
247
spx_word16_t target[],      /* target vector */
248
spx_coef_t ak[],      /* LPCs for this subframe */
249
spx_coef_t awk1[],      /* Weighted LPCs for this subframe */
250
spx_coef_t awk2[],      /* Weighted LPCs for this subframe */
251
const void *par,                      /* Codebook/search parameters*/
252
int   p,                        /* number of LPC coeffs */
253
int   nsf,                      /* number of samples in subframe */
254
spx_sig_t *exc,
255
spx_word16_t *r,
256
SpeexBits *bits,
257
char *stack,
258
int   complexity,
259
int   update_target
260
)
261
64.8k
{
262
64.8k
   int i,j,k,m,n,q;
263
64.8k
   VARDECL(spx_word16_t *resp);
264
#ifdef _USE_SSE
265
   VARDECL(__m128 *resp2);
266
   VARDECL(__m128 *E);
267
#else
268
   spx_word16_t *resp2;
269
   VARDECL(spx_word32_t *E);
270
#endif
271
64.8k
   VARDECL(spx_word16_t *t);
272
64.8k
   VARDECL(spx_sig_t *e);
273
64.8k
   VARDECL(spx_word16_t *tmp);
274
64.8k
   VARDECL(spx_word32_t *ndist);
275
64.8k
   VARDECL(spx_word32_t *odist);
276
64.8k
   VARDECL(int *itmp);
277
64.8k
   VARDECL(spx_word16_t **ot2);
278
64.8k
   VARDECL(spx_word16_t **nt2);
279
64.8k
   spx_word16_t **ot, **nt;
280
64.8k
   VARDECL(int **nind);
281
64.8k
   VARDECL(int **oind);
282
64.8k
   VARDECL(int *ind);
283
64.8k
   const signed char *shape_cb;
284
64.8k
   int shape_cb_size, subvect_size, nb_subvect;
285
64.8k
   const split_cb_params *params;
286
64.8k
   int N=2;
287
64.8k
   VARDECL(int *best_index);
288
64.8k
   VARDECL(spx_word32_t *best_dist);
289
64.8k
   VARDECL(int *best_nind);
290
64.8k
   VARDECL(int *best_ntarget);
291
64.8k
   int have_sign;
292
64.8k
   N=complexity;
293
64.8k
   if (N>10)
294
0
      N=10;
295
   /* Complexity isn't as important for the codebooks as it is for the pitch */
296
64.8k
   N=(2*N)/3;
297
64.8k
   if (N<1)
298
14.7k
      N=1;
299
64.8k
   if (N==1)
300
19.9k
   {
301
19.9k
      split_cb_search_shape_sign_N1(target,ak,awk1,awk2,par,p,nsf,exc,r,bits,stack,update_target);
302
19.9k
      return;
303
19.9k
   }
304
44.8k
   ALLOC(ot2, N, spx_word16_t*);
305
44.8k
   ALLOC(nt2, N, spx_word16_t*);
306
44.8k
   ALLOC(oind, N, int*);
307
44.8k
   ALLOC(nind, N, int*);
308
309
44.8k
   params = (const split_cb_params *) par;
310
44.8k
   subvect_size = params->subvect_size;
311
44.8k
   nb_subvect = params->nb_subvect;
312
44.8k
   shape_cb_size = 1<<params->shape_bits;
313
44.8k
   shape_cb = params->shape_cb;
314
44.8k
   have_sign = params->have_sign;
315
44.8k
   ALLOC(resp, shape_cb_size*subvect_size, spx_word16_t);
316
#ifdef _USE_SSE
317
13.0k
   ALLOC(resp2, (shape_cb_size*subvect_size)>>2, __m128);
318
13.0k
   ALLOC(E, shape_cb_size>>2, __m128);
319
#else
320
   resp2 = resp;
321
31.8k
   ALLOC(E, shape_cb_size, spx_word32_t);
322
#endif
323
44.8k
   ALLOC(t, nsf, spx_word16_t);
324
44.8k
   ALLOC(e, nsf, spx_sig_t);
325
44.8k
   ALLOC(ind, nb_subvect, int);
326
327
44.8k
   ALLOC(tmp, 2*N*nsf, spx_word16_t);
328
237k
   for (i=0;i<N;i++)
329
192k
   {
330
192k
      ot2[i]=tmp+2*i*nsf;
331
192k
      nt2[i]=tmp+(2*i+1)*nsf;
332
192k
   }
333
44.8k
   ot=ot2;
334
44.8k
   nt=nt2;
335
44.8k
   ALLOC(best_index, N, int);
336
44.8k
   ALLOC(best_dist, N, spx_word32_t);
337
44.8k
   ALLOC(best_nind, N, int);
338
44.8k
   ALLOC(best_ntarget, N, int);
339
44.8k
   ALLOC(ndist, N, spx_word32_t);
340
44.8k
   ALLOC(odist, N, spx_word32_t);
341
342
44.8k
   ALLOC(itmp, 2*N*nb_subvect, int);
343
237k
   for (i=0;i<N;i++)
344
192k
   {
345
192k
      nind[i]=itmp+2*i*nb_subvect;
346
192k
      oind[i]=itmp+(2*i+1)*nb_subvect;
347
192k
   }
348
349
44.8k
   SPEEX_COPY(t, target, nsf);
350
351
237k
   for (j=0;j<N;j++)
352
192k
      SPEEX_COPY(&ot[j][0], t, nsf);
353
354
   /* Pre-compute codewords response and energy */
355
44.8k
   compute_weighted_codebook(shape_cb, r, resp, resp2, E, shape_cb_size, subvect_size, stack);
356
357
237k
   for (j=0;j<N;j++)
358
192k
      odist[j]=0;
359
360
   /*For all subvectors*/
361
285k
   for (i=0;i<nb_subvect;i++)
362
240k
   {
363
      /*"erase" nbest list*/
364
1.26M
      for (j=0;j<N;j++)
365
1.02M
         ndist[j]=VERY_LARGE32;
366
      /* This is not strictly necessary, but it provides an additional safety
367
         to prevent crashes in case something goes wrong in the previous
368
         steps (e.g. NaNs) */
369
1.26M
      for (j=0;j<N;j++)
370
1.02M
         best_nind[j] = best_ntarget[j] = 0;
371
      /*For all n-bests of previous subvector*/
372
1.07M
      for (j=0;j<N;j++)
373
880k
      {
374
880k
         spx_word16_t *x=ot[j]+subvect_size*i;
375
880k
         spx_word32_t tener = 0;
376
7.21M
         for (m=0;m<subvect_size;m++)
377
6.33M
            tener = MAC16_16(tener, x[m],x[m]);
378
#ifdef FIXED_POINT
379
616k
         tener = SHR32(tener,1);
380
#else
381
         tener *= .5;
382
#endif
383
         /*Find new n-best based on previous n-best j*/
384
880k
#ifndef DISABLE_WIDEBAND
385
880k
         if (have_sign)
386
116k
            vq_nbest_sign(x, resp2, subvect_size, shape_cb_size, E, N, best_index, best_dist, stack);
387
763k
         else
388
763k
#endif /* DISABLE_WIDEBAND */
389
763k
            vq_nbest(x, resp2, subvect_size, shape_cb_size, E, N, best_index, best_dist, stack);
390
391
         /*For all new n-bests*/
392
5.01M
         for (k=0;k<N;k++)
393
4.13M
         {
394
            /* Compute total distance (including previous sub-vectors */
395
4.13M
            spx_word32_t err = ADD32(ADD32(odist[j],best_dist[k]),tener);
396
397
            /*update n-best list*/
398
4.13M
            if (err<ndist[N-1])
399
1.61M
            {
400
5.06M
               for (m=0;m<N;m++)
401
5.06M
               {
402
5.06M
                  if (err < ndist[m])
403
1.61M
                  {
404
4.32M
                     for (n=N-1;n>m;n--)
405
2.70M
                     {
406
2.70M
                        ndist[n] = ndist[n-1];
407
2.70M
                        best_nind[n] = best_nind[n-1];
408
2.70M
                        best_ntarget[n] = best_ntarget[n-1];
409
2.70M
                     }
410
                     /* n is equal to m here, so they're interchangeable */
411
1.61M
                     ndist[m] = err;
412
1.61M
                     best_nind[n] = best_index[k];
413
1.61M
                     best_ntarget[n] = j;
414
1.61M
                     break;
415
1.61M
                  }
416
5.06M
               }
417
1.61M
            }
418
4.13M
         }
419
880k
         if (i==0)
420
44.8k
            break;
421
880k
      }
422
1.26M
      for (j=0;j<N;j++)
423
1.02M
      {
424
         /*previous target (we don't care what happened before*/
425
17.7M
         for (m=(i+1)*subvect_size;m<nsf;m++)
426
16.7M
            nt[j][m]=ot[best_ntarget[j]][m];
427
428
         /* New code: update the rest of the target only if it's worth it */
429
8.74M
         for (m=0;m<subvect_size;m++)
430
7.71M
         {
431
7.71M
            spx_word16_t g;
432
7.71M
            int rind;
433
7.71M
            spx_word16_t sign=1;
434
7.71M
            rind = best_nind[j];
435
7.71M
            if (rind>=shape_cb_size)
436
739k
            {
437
739k
               sign=-1;
438
739k
               rind-=shape_cb_size;
439
739k
            }
440
441
7.71M
            q=subvect_size-m;
442
#ifdef FIXED_POINT
443
            g=sign*shape_cb[rind*subvect_size+m];
444
#else
445
            g=sign*0.03125*shape_cb[rind*subvect_size+m];
446
#endif
447
7.71M
            target_update(nt[j]+subvect_size*(i+1), g, r+q, nsf-subvect_size*(i+1));
448
7.71M
         }
449
450
7.39M
         for (q=0;q<nb_subvect;q++)
451
6.36M
            nind[j][q]=oind[best_ntarget[j]][q];
452
1.02M
         nind[j][i]=best_nind[j];
453
1.02M
      }
454
455
      /*update old-new data*/
456
      /* just swap pointers instead of a long copy */
457
240k
      {
458
240k
         spx_word16_t **tmp2;
459
240k
         tmp2=ot;
460
240k
         ot=nt;
461
240k
         nt=tmp2;
462
240k
      }
463
1.26M
      for (j=0;j<N;j++)
464
7.39M
         for (m=0;m<nb_subvect;m++)
465
6.36M
            oind[j][m]=nind[j][m];
466
1.26M
      for (j=0;j<N;j++)
467
1.02M
         odist[j]=ndist[j];
468
240k
   }
469
470
   /*save indices*/
471
285k
   for (i=0;i<nb_subvect;i++)
472
240k
   {
473
240k
      ind[i]=nind[0][i];
474
240k
      speex_bits_pack(bits,ind[i],params->shape_bits+have_sign);
475
240k
   }
476
477
   /* Put everything back together */
478
285k
   for (i=0;i<nb_subvect;i++)
479
240k
   {
480
240k
      int rind;
481
240k
      spx_word16_t sign=1;
482
240k
      rind = ind[i];
483
240k
      if (rind>=shape_cb_size)
484
25.6k
      {
485
25.6k
         sign=-1;
486
25.6k
         rind-=shape_cb_size;
487
25.6k
      }
488
#ifdef FIXED_POINT
489
170k
      if (sign==1)
490
154k
      {
491
1.29M
         for (j=0;j<subvect_size;j++)
492
1.14M
            e[subvect_size*i+j]=SHL32(EXTEND32(shape_cb[rind*subvect_size+j]),SIG_SHIFT-5);
493
154k
      } else {
494
147k
         for (j=0;j<subvect_size;j++)
495
131k
            e[subvect_size*i+j]=NEG32(SHL32(EXTEND32(shape_cb[rind*subvect_size+j]),SIG_SHIFT-5));
496
16.4k
      }
497
#else
498
590k
      for (j=0;j<subvect_size;j++)
499
520k
         e[subvect_size*i+j]=sign*0.03125*shape_cb[rind*subvect_size+j];
500
#endif
501
240k
   }
502
   /* Update excitation */
503
1.83M
   for (j=0;j<nsf;j++)
504
1.79M
      exc[j]=ADD32(exc[j],e[j]);
505
506
   /* Update target: only update target if necessary */
507
44.8k
   if (update_target)
508
3.88k
   {
509
3.88k
      VARDECL(spx_word16_t *r2);
510
3.88k
      ALLOC(r2, nsf, spx_word16_t);
511
159k
      for (j=0;j<nsf;j++)
512
155k
         r2[j] = EXTRACT16(PSHR32(e[j] ,6));
513
3.88k
      syn_percep_zero16(r2, ak, awk1, awk2, r2, nsf,p, stack);
514
159k
      for (j=0;j<nsf;j++)
515
155k
         target[j]=SUB16(target[j],PSHR16(r2[j],2));
516
3.88k
   }
517
44.8k
}
split_cb_search_shape_sign
Line
Count
Source
261
18.5k
{
262
18.5k
   int i,j,k,m,n,q;
263
18.5k
   VARDECL(spx_word16_t *resp);
264
18.5k
#ifdef _USE_SSE
265
18.5k
   VARDECL(__m128 *resp2);
266
18.5k
   VARDECL(__m128 *E);
267
#else
268
   spx_word16_t *resp2;
269
   VARDECL(spx_word32_t *E);
270
#endif
271
18.5k
   VARDECL(spx_word16_t *t);
272
18.5k
   VARDECL(spx_sig_t *e);
273
18.5k
   VARDECL(spx_word16_t *tmp);
274
18.5k
   VARDECL(spx_word32_t *ndist);
275
18.5k
   VARDECL(spx_word32_t *odist);
276
18.5k
   VARDECL(int *itmp);
277
18.5k
   VARDECL(spx_word16_t **ot2);
278
18.5k
   VARDECL(spx_word16_t **nt2);
279
18.5k
   spx_word16_t **ot, **nt;
280
18.5k
   VARDECL(int **nind);
281
18.5k
   VARDECL(int **oind);
282
18.5k
   VARDECL(int *ind);
283
18.5k
   const signed char *shape_cb;
284
18.5k
   int shape_cb_size, subvect_size, nb_subvect;
285
18.5k
   const split_cb_params *params;
286
18.5k
   int N=2;
287
18.5k
   VARDECL(int *best_index);
288
18.5k
   VARDECL(spx_word32_t *best_dist);
289
18.5k
   VARDECL(int *best_nind);
290
18.5k
   VARDECL(int *best_ntarget);
291
18.5k
   int have_sign;
292
18.5k
   N=complexity;
293
18.5k
   if (N>10)
294
0
      N=10;
295
   /* Complexity isn't as important for the codebooks as it is for the pitch */
296
18.5k
   N=(2*N)/3;
297
18.5k
   if (N<1)
298
3.70k
      N=1;
299
18.5k
   if (N==1)
300
5.54k
   {
301
5.54k
      split_cb_search_shape_sign_N1(target,ak,awk1,awk2,par,p,nsf,exc,r,bits,stack,update_target);
302
5.54k
      return;
303
5.54k
   }
304
13.0k
   ALLOC(ot2, N, spx_word16_t*);
305
13.0k
   ALLOC(nt2, N, spx_word16_t*);
306
13.0k
   ALLOC(oind, N, int*);
307
13.0k
   ALLOC(nind, N, int*);
308
309
13.0k
   params = (const split_cb_params *) par;
310
13.0k
   subvect_size = params->subvect_size;
311
13.0k
   nb_subvect = params->nb_subvect;
312
13.0k
   shape_cb_size = 1<<params->shape_bits;
313
13.0k
   shape_cb = params->shape_cb;
314
13.0k
   have_sign = params->have_sign;
315
13.0k
   ALLOC(resp, shape_cb_size*subvect_size, spx_word16_t);
316
13.0k
#ifdef _USE_SSE
317
13.0k
   ALLOC(resp2, (shape_cb_size*subvect_size)>>2, __m128);
318
13.0k
   ALLOC(E, shape_cb_size>>2, __m128);
319
#else
320
   resp2 = resp;
321
   ALLOC(E, shape_cb_size, spx_word32_t);
322
#endif
323
13.0k
   ALLOC(t, nsf, spx_word16_t);
324
13.0k
   ALLOC(e, nsf, spx_sig_t);
325
13.0k
   ALLOC(ind, nb_subvect, int);
326
327
13.0k
   ALLOC(tmp, 2*N*nsf, spx_word16_t);
328
70.9k
   for (i=0;i<N;i++)
329
57.9k
   {
330
57.9k
      ot2[i]=tmp+2*i*nsf;
331
57.9k
      nt2[i]=tmp+(2*i+1)*nsf;
332
57.9k
   }
333
13.0k
   ot=ot2;
334
13.0k
   nt=nt2;
335
13.0k
   ALLOC(best_index, N, int);
336
13.0k
   ALLOC(best_dist, N, spx_word32_t);
337
13.0k
   ALLOC(best_nind, N, int);
338
13.0k
   ALLOC(best_ntarget, N, int);
339
13.0k
   ALLOC(ndist, N, spx_word32_t);
340
13.0k
   ALLOC(odist, N, spx_word32_t);
341
342
13.0k
   ALLOC(itmp, 2*N*nb_subvect, int);
343
70.9k
   for (i=0;i<N;i++)
344
57.9k
   {
345
57.9k
      nind[i]=itmp+2*i*nb_subvect;
346
57.9k
      oind[i]=itmp+(2*i+1)*nb_subvect;
347
57.9k
   }
348
349
13.0k
   SPEEX_COPY(t, target, nsf);
350
351
70.9k
   for (j=0;j<N;j++)
352
57.9k
      SPEEX_COPY(&ot[j][0], t, nsf);
353
354
   /* Pre-compute codewords response and energy */
355
13.0k
   compute_weighted_codebook(shape_cb, r, resp, resp2, E, shape_cb_size, subvect_size, stack);
356
357
70.9k
   for (j=0;j<N;j++)
358
57.9k
      odist[j]=0;
359
360
   /*For all subvectors*/
361
82.7k
   for (i=0;i<nb_subvect;i++)
362
69.7k
   {
363
      /*"erase" nbest list*/
364
378k
      for (j=0;j<N;j++)
365
308k
         ndist[j]=VERY_LARGE32;
366
      /* This is not strictly necessary, but it provides an additional safety
367
         to prevent crashes in case something goes wrong in the previous
368
         steps (e.g. NaNs) */
369
378k
      for (j=0;j<N;j++)
370
308k
         best_nind[j] = best_ntarget[j] = 0;
371
      /*For all n-bests of previous subvector*/
372
320k
      for (j=0;j<N;j++)
373
263k
      {
374
263k
         spx_word16_t *x=ot[j]+subvect_size*i;
375
263k
         spx_word32_t tener = 0;
376
2.15M
         for (m=0;m<subvect_size;m++)
377
1.89M
            tener = MAC16_16(tener, x[m],x[m]);
378
#ifdef FIXED_POINT
379
         tener = SHR32(tener,1);
380
#else
381
263k
         tener *= .5;
382
263k
#endif
383
         /*Find new n-best based on previous n-best j*/
384
263k
#ifndef DISABLE_WIDEBAND
385
263k
         if (have_sign)
386
36.4k
            vq_nbest_sign(x, resp2, subvect_size, shape_cb_size, E, N, best_index, best_dist, stack);
387
226k
         else
388
226k
#endif /* DISABLE_WIDEBAND */
389
226k
            vq_nbest(x, resp2, subvect_size, shape_cb_size, E, N, best_index, best_dist, stack);
390
391
         /*For all new n-bests*/
392
1.54M
         for (k=0;k<N;k++)
393
1.28M
         {
394
            /* Compute total distance (including previous sub-vectors */
395
1.28M
            spx_word32_t err = ADD32(ADD32(odist[j],best_dist[k]),tener);
396
397
            /*update n-best list*/
398
1.28M
            if (err<ndist[N-1])
399
485k
            {
400
1.58M
               for (m=0;m<N;m++)
401
1.58M
               {
402
1.58M
                  if (err < ndist[m])
403
485k
                  {
404
1.33M
                     for (n=N-1;n>m;n--)
405
848k
                     {
406
848k
                        ndist[n] = ndist[n-1];
407
848k
                        best_nind[n] = best_nind[n-1];
408
848k
                        best_ntarget[n] = best_ntarget[n-1];
409
848k
                     }
410
                     /* n is equal to m here, so they're interchangeable */
411
485k
                     ndist[m] = err;
412
485k
                     best_nind[n] = best_index[k];
413
485k
                     best_ntarget[n] = j;
414
485k
                     break;
415
485k
                  }
416
1.58M
               }
417
485k
            }
418
1.28M
         }
419
263k
         if (i==0)
420
13.0k
            break;
421
263k
      }
422
378k
      for (j=0;j<N;j++)
423
308k
      {
424
         /*previous target (we don't care what happened before*/
425
5.31M
         for (m=(i+1)*subvect_size;m<nsf;m++)
426
5.00M
            nt[j][m]=ot[best_ntarget[j]][m];
427
428
         /* New code: update the rest of the target only if it's worth it */
429
2.62M
         for (m=0;m<subvect_size;m++)
430
2.31M
         {
431
2.31M
            spx_word16_t g;
432
2.31M
            int rind;
433
2.31M
            spx_word16_t sign=1;
434
2.31M
            rind = best_nind[j];
435
2.31M
            if (rind>=shape_cb_size)
436
229k
            {
437
229k
               sign=-1;
438
229k
               rind-=shape_cb_size;
439
229k
            }
440
441
2.31M
            q=subvect_size-m;
442
#ifdef FIXED_POINT
443
            g=sign*shape_cb[rind*subvect_size+m];
444
#else
445
2.31M
            g=sign*0.03125*shape_cb[rind*subvect_size+m];
446
2.31M
#endif
447
2.31M
            target_update(nt[j]+subvect_size*(i+1), g, r+q, nsf-subvect_size*(i+1));
448
2.31M
         }
449
450
2.22M
         for (q=0;q<nb_subvect;q++)
451
1.91M
            nind[j][q]=oind[best_ntarget[j]][q];
452
308k
         nind[j][i]=best_nind[j];
453
308k
      }
454
455
      /*update old-new data*/
456
      /* just swap pointers instead of a long copy */
457
69.7k
      {
458
69.7k
         spx_word16_t **tmp2;
459
69.7k
         tmp2=ot;
460
69.7k
         ot=nt;
461
69.7k
         nt=tmp2;
462
69.7k
      }
463
378k
      for (j=0;j<N;j++)
464
2.22M
         for (m=0;m<nb_subvect;m++)
465
1.91M
            oind[j][m]=nind[j][m];
466
378k
      for (j=0;j<N;j++)
467
308k
         odist[j]=ndist[j];
468
69.7k
   }
469
470
   /*save indices*/
471
82.7k
   for (i=0;i<nb_subvect;i++)
472
69.7k
   {
473
69.7k
      ind[i]=nind[0][i];
474
69.7k
      speex_bits_pack(bits,ind[i],params->shape_bits+have_sign);
475
69.7k
   }
476
477
   /* Put everything back together */
478
82.7k
   for (i=0;i<nb_subvect;i++)
479
69.7k
   {
480
69.7k
      int rind;
481
69.7k
      spx_word16_t sign=1;
482
69.7k
      rind = ind[i];
483
69.7k
      if (rind>=shape_cb_size)
484
9.23k
      {
485
9.23k
         sign=-1;
486
9.23k
         rind-=shape_cb_size;
487
9.23k
      }
488
#ifdef FIXED_POINT
489
      if (sign==1)
490
      {
491
         for (j=0;j<subvect_size;j++)
492
            e[subvect_size*i+j]=SHL32(EXTEND32(shape_cb[rind*subvect_size+j]),SIG_SHIFT-5);
493
      } else {
494
         for (j=0;j<subvect_size;j++)
495
            e[subvect_size*i+j]=NEG32(SHL32(EXTEND32(shape_cb[rind*subvect_size+j]),SIG_SHIFT-5));
496
      }
497
#else
498
590k
      for (j=0;j<subvect_size;j++)
499
520k
         e[subvect_size*i+j]=sign*0.03125*shape_cb[rind*subvect_size+j];
500
69.7k
#endif
501
69.7k
   }
502
   /* Update excitation */
503
533k
   for (j=0;j<nsf;j++)
504
520k
      exc[j]=ADD32(exc[j],e[j]);
505
506
   /* Update target: only update target if necessary */
507
13.0k
   if (update_target)
508
1.24k
   {
509
1.24k
      VARDECL(spx_word16_t *r2);
510
1.24k
      ALLOC(r2, nsf, spx_word16_t);
511
51.1k
      for (j=0;j<nsf;j++)
512
49.9k
         r2[j] = EXTRACT16(PSHR32(e[j] ,6));
513
1.24k
      syn_percep_zero16(r2, ak, awk1, awk2, r2, nsf,p, stack);
514
51.1k
      for (j=0;j<nsf;j++)
515
49.9k
         target[j]=SUB16(target[j],PSHR16(r2[j],2));
516
1.24k
   }
517
13.0k
}
split_cb_search_shape_sign
Line
Count
Source
261
46.2k
{
262
46.2k
   int i,j,k,m,n,q;
263
46.2k
   VARDECL(spx_word16_t *resp);
264
#ifdef _USE_SSE
265
   VARDECL(__m128 *resp2);
266
   VARDECL(__m128 *E);
267
#else
268
46.2k
   spx_word16_t *resp2;
269
46.2k
   VARDECL(spx_word32_t *E);
270
46.2k
#endif
271
46.2k
   VARDECL(spx_word16_t *t);
272
46.2k
   VARDECL(spx_sig_t *e);
273
46.2k
   VARDECL(spx_word16_t *tmp);
274
46.2k
   VARDECL(spx_word32_t *ndist);
275
46.2k
   VARDECL(spx_word32_t *odist);
276
46.2k
   VARDECL(int *itmp);
277
46.2k
   VARDECL(spx_word16_t **ot2);
278
46.2k
   VARDECL(spx_word16_t **nt2);
279
46.2k
   spx_word16_t **ot, **nt;
280
46.2k
   VARDECL(int **nind);
281
46.2k
   VARDECL(int **oind);
282
46.2k
   VARDECL(int *ind);
283
46.2k
   const signed char *shape_cb;
284
46.2k
   int shape_cb_size, subvect_size, nb_subvect;
285
46.2k
   const split_cb_params *params;
286
46.2k
   int N=2;
287
46.2k
   VARDECL(int *best_index);
288
46.2k
   VARDECL(spx_word32_t *best_dist);
289
46.2k
   VARDECL(int *best_nind);
290
46.2k
   VARDECL(int *best_ntarget);
291
46.2k
   int have_sign;
292
46.2k
   N=complexity;
293
46.2k
   if (N>10)
294
0
      N=10;
295
   /* Complexity isn't as important for the codebooks as it is for the pitch */
296
46.2k
   N=(2*N)/3;
297
46.2k
   if (N<1)
298
11.0k
      N=1;
299
46.2k
   if (N==1)
300
14.4k
   {
301
14.4k
      split_cb_search_shape_sign_N1(target,ak,awk1,awk2,par,p,nsf,exc,r,bits,stack,update_target);
302
14.4k
      return;
303
14.4k
   }
304
31.8k
   ALLOC(ot2, N, spx_word16_t*);
305
31.8k
   ALLOC(nt2, N, spx_word16_t*);
306
31.8k
   ALLOC(oind, N, int*);
307
31.8k
   ALLOC(nind, N, int*);
308
309
31.8k
   params = (const split_cb_params *) par;
310
31.8k
   subvect_size = params->subvect_size;
311
31.8k
   nb_subvect = params->nb_subvect;
312
31.8k
   shape_cb_size = 1<<params->shape_bits;
313
31.8k
   shape_cb = params->shape_cb;
314
31.8k
   have_sign = params->have_sign;
315
31.8k
   ALLOC(resp, shape_cb_size*subvect_size, spx_word16_t);
316
#ifdef _USE_SSE
317
   ALLOC(resp2, (shape_cb_size*subvect_size)>>2, __m128);
318
   ALLOC(E, shape_cb_size>>2, __m128);
319
#else
320
31.8k
   resp2 = resp;
321
31.8k
   ALLOC(E, shape_cb_size, spx_word32_t);
322
31.8k
#endif
323
31.8k
   ALLOC(t, nsf, spx_word16_t);
324
31.8k
   ALLOC(e, nsf, spx_sig_t);
325
31.8k
   ALLOC(ind, nb_subvect, int);
326
327
31.8k
   ALLOC(tmp, 2*N*nsf, spx_word16_t);
328
166k
   for (i=0;i<N;i++)
329
134k
   {
330
134k
      ot2[i]=tmp+2*i*nsf;
331
134k
      nt2[i]=tmp+(2*i+1)*nsf;
332
134k
   }
333
31.8k
   ot=ot2;
334
31.8k
   nt=nt2;
335
31.8k
   ALLOC(best_index, N, int);
336
31.8k
   ALLOC(best_dist, N, spx_word32_t);
337
31.8k
   ALLOC(best_nind, N, int);
338
31.8k
   ALLOC(best_ntarget, N, int);
339
31.8k
   ALLOC(ndist, N, spx_word32_t);
340
31.8k
   ALLOC(odist, N, spx_word32_t);
341
342
31.8k
   ALLOC(itmp, 2*N*nb_subvect, int);
343
166k
   for (i=0;i<N;i++)
344
134k
   {
345
134k
      nind[i]=itmp+2*i*nb_subvect;
346
134k
      oind[i]=itmp+(2*i+1)*nb_subvect;
347
134k
   }
348
349
31.8k
   SPEEX_COPY(t, target, nsf);
350
351
166k
   for (j=0;j<N;j++)
352
134k
      SPEEX_COPY(&ot[j][0], t, nsf);
353
354
   /* Pre-compute codewords response and energy */
355
31.8k
   compute_weighted_codebook(shape_cb, r, resp, resp2, E, shape_cb_size, subvect_size, stack);
356
357
166k
   for (j=0;j<N;j++)
358
134k
      odist[j]=0;
359
360
   /*For all subvectors*/
361
202k
   for (i=0;i<nb_subvect;i++)
362
170k
   {
363
      /*"erase" nbest list*/
364
890k
      for (j=0;j<N;j++)
365
719k
         ndist[j]=VERY_LARGE32;
366
      /* This is not strictly necessary, but it provides an additional safety
367
         to prevent crashes in case something goes wrong in the previous
368
         steps (e.g. NaNs) */
369
890k
      for (j=0;j<N;j++)
370
719k
         best_nind[j] = best_ntarget[j] = 0;
371
      /*For all n-bests of previous subvector*/
372
755k
      for (j=0;j<N;j++)
373
616k
      {
374
616k
         spx_word16_t *x=ot[j]+subvect_size*i;
375
616k
         spx_word32_t tener = 0;
376
5.05M
         for (m=0;m<subvect_size;m++)
377
4.44M
            tener = MAC16_16(tener, x[m],x[m]);
378
616k
#ifdef FIXED_POINT
379
616k
         tener = SHR32(tener,1);
380
#else
381
         tener *= .5;
382
#endif
383
         /*Find new n-best based on previous n-best j*/
384
616k
#ifndef DISABLE_WIDEBAND
385
616k
         if (have_sign)
386
80.3k
            vq_nbest_sign(x, resp2, subvect_size, shape_cb_size, E, N, best_index, best_dist, stack);
387
536k
         else
388
536k
#endif /* DISABLE_WIDEBAND */
389
536k
            vq_nbest(x, resp2, subvect_size, shape_cb_size, E, N, best_index, best_dist, stack);
390
391
         /*For all new n-bests*/
392
3.46M
         for (k=0;k<N;k++)
393
2.84M
         {
394
            /* Compute total distance (including previous sub-vectors */
395
2.84M
            spx_word32_t err = ADD32(ADD32(odist[j],best_dist[k]),tener);
396
397
            /*update n-best list*/
398
2.84M
            if (err<ndist[N-1])
399
1.12M
            {
400
3.48M
               for (m=0;m<N;m++)
401
3.48M
               {
402
3.48M
                  if (err < ndist[m])
403
1.12M
                  {
404
2.98M
                     for (n=N-1;n>m;n--)
405
1.86M
                     {
406
1.86M
                        ndist[n] = ndist[n-1];
407
1.86M
                        best_nind[n] = best_nind[n-1];
408
1.86M
                        best_ntarget[n] = best_ntarget[n-1];
409
1.86M
                     }
410
                     /* n is equal to m here, so they're interchangeable */
411
1.12M
                     ndist[m] = err;
412
1.12M
                     best_nind[n] = best_index[k];
413
1.12M
                     best_ntarget[n] = j;
414
1.12M
                     break;
415
1.12M
                  }
416
3.48M
               }
417
1.12M
            }
418
2.84M
         }
419
616k
         if (i==0)
420
31.8k
            break;
421
616k
      }
422
890k
      for (j=0;j<N;j++)
423
719k
      {
424
         /*previous target (we don't care what happened before*/
425
12.4M
         for (m=(i+1)*subvect_size;m<nsf;m++)
426
11.6M
            nt[j][m]=ot[best_ntarget[j]][m];
427
428
         /* New code: update the rest of the target only if it's worth it */
429
6.11M
         for (m=0;m<subvect_size;m++)
430
5.39M
         {
431
5.39M
            spx_word16_t g;
432
5.39M
            int rind;
433
5.39M
            spx_word16_t sign=1;
434
5.39M
            rind = best_nind[j];
435
5.39M
            if (rind>=shape_cb_size)
436
510k
            {
437
510k
               sign=-1;
438
510k
               rind-=shape_cb_size;
439
510k
            }
440
441
5.39M
            q=subvect_size-m;
442
5.39M
#ifdef FIXED_POINT
443
5.39M
            g=sign*shape_cb[rind*subvect_size+m];
444
#else
445
            g=sign*0.03125*shape_cb[rind*subvect_size+m];
446
#endif
447
5.39M
            target_update(nt[j]+subvect_size*(i+1), g, r+q, nsf-subvect_size*(i+1));
448
5.39M
         }
449
450
5.17M
         for (q=0;q<nb_subvect;q++)
451
4.45M
            nind[j][q]=oind[best_ntarget[j]][q];
452
719k
         nind[j][i]=best_nind[j];
453
719k
      }
454
455
      /*update old-new data*/
456
      /* just swap pointers instead of a long copy */
457
170k
      {
458
170k
         spx_word16_t **tmp2;
459
170k
         tmp2=ot;
460
170k
         ot=nt;
461
170k
         nt=tmp2;
462
170k
      }
463
890k
      for (j=0;j<N;j++)
464
5.17M
         for (m=0;m<nb_subvect;m++)
465
4.45M
            oind[j][m]=nind[j][m];
466
890k
      for (j=0;j<N;j++)
467
719k
         odist[j]=ndist[j];
468
170k
   }
469
470
   /*save indices*/
471
202k
   for (i=0;i<nb_subvect;i++)
472
170k
   {
473
170k
      ind[i]=nind[0][i];
474
170k
      speex_bits_pack(bits,ind[i],params->shape_bits+have_sign);
475
170k
   }
476
477
   /* Put everything back together */
478
202k
   for (i=0;i<nb_subvect;i++)
479
170k
   {
480
170k
      int rind;
481
170k
      spx_word16_t sign=1;
482
170k
      rind = ind[i];
483
170k
      if (rind>=shape_cb_size)
484
16.4k
      {
485
16.4k
         sign=-1;
486
16.4k
         rind-=shape_cb_size;
487
16.4k
      }
488
170k
#ifdef FIXED_POINT
489
170k
      if (sign==1)
490
154k
      {
491
1.29M
         for (j=0;j<subvect_size;j++)
492
1.14M
            e[subvect_size*i+j]=SHL32(EXTEND32(shape_cb[rind*subvect_size+j]),SIG_SHIFT-5);
493
154k
      } else {
494
147k
         for (j=0;j<subvect_size;j++)
495
131k
            e[subvect_size*i+j]=NEG32(SHL32(EXTEND32(shape_cb[rind*subvect_size+j]),SIG_SHIFT-5));
496
16.4k
      }
497
#else
498
      for (j=0;j<subvect_size;j++)
499
         e[subvect_size*i+j]=sign*0.03125*shape_cb[rind*subvect_size+j];
500
#endif
501
170k
   }
502
   /* Update excitation */
503
1.30M
   for (j=0;j<nsf;j++)
504
1.27M
      exc[j]=ADD32(exc[j],e[j]);
505
506
   /* Update target: only update target if necessary */
507
31.8k
   if (update_target)
508
2.64k
   {
509
2.64k
      VARDECL(spx_word16_t *r2);
510
2.64k
      ALLOC(r2, nsf, spx_word16_t);
511
108k
      for (j=0;j<nsf;j++)
512
105k
         r2[j] = EXTRACT16(PSHR32(e[j] ,6));
513
2.64k
      syn_percep_zero16(r2, ak, awk1, awk2, r2, nsf,p, stack);
514
108k
      for (j=0;j<nsf;j++)
515
105k
         target[j]=SUB16(target[j],PSHR16(r2[j],2));
516
2.64k
   }
517
31.8k
}
518
#endif /* DISABLE_ENCODER */
519
520
#ifndef DISABLE_DECODER
521
void split_cb_shape_sign_unquant(
522
spx_sig_t *exc,
523
const void *par,                      /* non-overlapping codebook */
524
int   nsf,                      /* number of samples in subframe */
525
SpeexBits *bits,
526
char *stack,
527
spx_uint32_t *seed
528
)
529
104k
{
530
104k
   int i,j;
531
104k
   VARDECL(int *ind);
532
104k
   VARDECL(int *signs);
533
104k
   const signed char *shape_cb;
534
104k
   int subvect_size, nb_subvect;
535
104k
   const split_cb_params *params;
536
104k
   int have_sign;
537
538
104k
   params = (const split_cb_params *) par;
539
104k
   subvect_size = params->subvect_size;
540
104k
   nb_subvect = params->nb_subvect;
541
542
104k
   shape_cb = params->shape_cb;
543
104k
   have_sign = params->have_sign;
544
545
104k
   ALLOC(ind, nb_subvect, int);
546
104k
   ALLOC(signs, nb_subvect, int);
547
548
   /* Decode codewords and gains */
549
638k
   for (i=0;i<nb_subvect;i++)
550
534k
   {
551
534k
      if (have_sign)
552
73.8k
         signs[i] = speex_bits_unpack_unsigned(bits, 1);
553
460k
      else
554
460k
         signs[i] = 0;
555
534k
      ind[i] = speex_bits_unpack_unsigned(bits, params->shape_bits);
556
534k
   }
557
   /* Compute decoded excitation */
558
638k
   for (i=0;i<nb_subvect;i++)
559
534k
   {
560
534k
      spx_word16_t s=1;
561
534k
      if (signs[i])
562
1.28k
         s=-1;
563
#ifdef FIXED_POINT
564
309k
      if (s==1)
565
308k
      {
566
2.72M
         for (j=0;j<subvect_size;j++)
567
2.41M
            exc[subvect_size*i+j]=SHL32(EXTEND32(shape_cb[ind[i]*subvect_size+j]),SIG_SHIFT-5);
568
308k
      } else {
569
7.74k
         for (j=0;j<subvect_size;j++)
570
6.88k
            exc[subvect_size*i+j]=NEG32(SHL32(EXTEND32(shape_cb[ind[i]*subvect_size+j]),SIG_SHIFT-5));
571
861
      }
572
#else
573
1.99M
      for (j=0;j<subvect_size;j++)
574
1.76M
         exc[subvect_size*i+j]+=s*0.03125*shape_cb[ind[i]*subvect_size+j];
575
#endif
576
534k
   }
577
104k
}
split_cb_shape_sign_unquant
Line
Count
Source
529
44.2k
{
530
44.2k
   int i,j;
531
44.2k
   VARDECL(int *ind);
532
44.2k
   VARDECL(int *signs);
533
44.2k
   const signed char *shape_cb;
534
44.2k
   int subvect_size, nb_subvect;
535
44.2k
   const split_cb_params *params;
536
44.2k
   int have_sign;
537
538
44.2k
   params = (const split_cb_params *) par;
539
44.2k
   subvect_size = params->subvect_size;
540
44.2k
   nb_subvect = params->nb_subvect;
541
542
44.2k
   shape_cb = params->shape_cb;
543
44.2k
   have_sign = params->have_sign;
544
545
44.2k
   ALLOC(ind, nb_subvect, int);
546
44.2k
   ALLOC(signs, nb_subvect, int);
547
548
   /* Decode codewords and gains */
549
268k
   for (i=0;i<nb_subvect;i++)
550
224k
   {
551
224k
      if (have_sign)
552
33.0k
         signs[i] = speex_bits_unpack_unsigned(bits, 1);
553
191k
      else
554
191k
         signs[i] = 0;
555
224k
      ind[i] = speex_bits_unpack_unsigned(bits, params->shape_bits);
556
224k
   }
557
   /* Compute decoded excitation */
558
268k
   for (i=0;i<nb_subvect;i++)
559
224k
   {
560
224k
      spx_word16_t s=1;
561
224k
      if (signs[i])
562
424
         s=-1;
563
#ifdef FIXED_POINT
564
      if (s==1)
565
      {
566
         for (j=0;j<subvect_size;j++)
567
            exc[subvect_size*i+j]=SHL32(EXTEND32(shape_cb[ind[i]*subvect_size+j]),SIG_SHIFT-5);
568
      } else {
569
         for (j=0;j<subvect_size;j++)
570
            exc[subvect_size*i+j]=NEG32(SHL32(EXTEND32(shape_cb[ind[i]*subvect_size+j]),SIG_SHIFT-5));
571
      }
572
#else
573
1.99M
      for (j=0;j<subvect_size;j++)
574
1.76M
         exc[subvect_size*i+j]+=s*0.03125*shape_cb[ind[i]*subvect_size+j];
575
224k
#endif
576
224k
   }
577
44.2k
}
split_cb_shape_sign_unquant
Line
Count
Source
529
60.6k
{
530
60.6k
   int i,j;
531
60.6k
   VARDECL(int *ind);
532
60.6k
   VARDECL(int *signs);
533
60.6k
   const signed char *shape_cb;
534
60.6k
   int subvect_size, nb_subvect;
535
60.6k
   const split_cb_params *params;
536
60.6k
   int have_sign;
537
538
60.6k
   params = (const split_cb_params *) par;
539
60.6k
   subvect_size = params->subvect_size;
540
60.6k
   nb_subvect = params->nb_subvect;
541
542
60.6k
   shape_cb = params->shape_cb;
543
60.6k
   have_sign = params->have_sign;
544
545
60.6k
   ALLOC(ind, nb_subvect, int);
546
60.6k
   ALLOC(signs, nb_subvect, int);
547
548
   /* Decode codewords and gains */
549
370k
   for (i=0;i<nb_subvect;i++)
550
309k
   {
551
309k
      if (have_sign)
552
40.7k
         signs[i] = speex_bits_unpack_unsigned(bits, 1);
553
268k
      else
554
268k
         signs[i] = 0;
555
309k
      ind[i] = speex_bits_unpack_unsigned(bits, params->shape_bits);
556
309k
   }
557
   /* Compute decoded excitation */
558
370k
   for (i=0;i<nb_subvect;i++)
559
309k
   {
560
309k
      spx_word16_t s=1;
561
309k
      if (signs[i])
562
861
         s=-1;
563
309k
#ifdef FIXED_POINT
564
309k
      if (s==1)
565
308k
      {
566
2.72M
         for (j=0;j<subvect_size;j++)
567
2.41M
            exc[subvect_size*i+j]=SHL32(EXTEND32(shape_cb[ind[i]*subvect_size+j]),SIG_SHIFT-5);
568
308k
      } else {
569
7.74k
         for (j=0;j<subvect_size;j++)
570
6.88k
            exc[subvect_size*i+j]=NEG32(SHL32(EXTEND32(shape_cb[ind[i]*subvect_size+j]),SIG_SHIFT-5));
571
861
      }
572
#else
573
      for (j=0;j<subvect_size;j++)
574
         exc[subvect_size*i+j]+=s*0.03125*shape_cb[ind[i]*subvect_size+j];
575
#endif
576
309k
   }
577
60.6k
}
578
#endif /* DISABLE_DECODER */
579
580
#ifndef DISABLE_ENCODER
581
void noise_codebook_quant(
582
spx_word16_t target[],      /* target vector */
583
spx_coef_t ak[],      /* LPCs for this subframe */
584
spx_coef_t awk1[],      /* Weighted LPCs for this subframe */
585
spx_coef_t awk2[],      /* Weighted LPCs for this subframe */
586
const void *par,                      /* Codebook/search parameters*/
587
int   p,                        /* number of LPC coeffs */
588
int   nsf,                      /* number of samples in subframe */
589
spx_sig_t *exc,
590
spx_word16_t *r,
591
SpeexBits *bits,
592
char *stack,
593
int   complexity,
594
int   update_target
595
)
596
97.3k
{
597
97.3k
   int i;
598
97.3k
   VARDECL(spx_word16_t *tmp);
599
97.3k
   ALLOC(tmp, nsf, spx_word16_t);
600
97.3k
   residue_percep_zero16(target, ak, awk1, awk2, tmp, nsf, p, stack);
601
602
3.98M
   for (i=0;i<nsf;i++)
603
3.89M
      exc[i]+=SHL32(EXTEND32(tmp[i]),8);
604
97.3k
   SPEEX_MEMSET(target, 0, nsf);
605
97.3k
}
noise_codebook_quant
Line
Count
Source
596
48.6k
{
597
48.6k
   int i;
598
48.6k
   VARDECL(spx_word16_t *tmp);
599
48.6k
   ALLOC(tmp, nsf, spx_word16_t);
600
48.6k
   residue_percep_zero16(target, ak, awk1, awk2, tmp, nsf, p, stack);
601
602
1.99M
   for (i=0;i<nsf;i++)
603
1.94M
      exc[i]+=SHL32(EXTEND32(tmp[i]),8);
604
48.6k
   SPEEX_MEMSET(target, 0, nsf);
605
48.6k
}
noise_codebook_quant
Line
Count
Source
596
48.6k
{
597
48.6k
   int i;
598
48.6k
   VARDECL(spx_word16_t *tmp);
599
48.6k
   ALLOC(tmp, nsf, spx_word16_t);
600
48.6k
   residue_percep_zero16(target, ak, awk1, awk2, tmp, nsf, p, stack);
601
602
1.99M
   for (i=0;i<nsf;i++)
603
1.94M
      exc[i]+=SHL32(EXTEND32(tmp[i]),8);
604
48.6k
   SPEEX_MEMSET(target, 0, nsf);
605
48.6k
}
606
#endif /* DISABLE_ENCODER */
607
608
#ifndef DISABLE_DECODER
609
void noise_codebook_unquant(
610
spx_sig_t *exc,
611
const void *par,                      /* non-overlapping codebook */
612
int   nsf,                      /* number of samples in subframe */
613
SpeexBits *bits,
614
char *stack,
615
spx_uint32_t *seed
616
)
617
32.0k
{
618
32.0k
   int i;
619
   /* FIXME: This is bad, but I don't think the function ever gets called anyway */
620
1.31M
   for (i=0;i<nsf;i++)
621
1.28M
      exc[i]=SHL32(EXTEND32(speex_rand(1, seed)),SIG_SHIFT);
622
32.0k
}
noise_codebook_unquant
Line
Count
Source
617
16.0k
{
618
16.0k
   int i;
619
   /* FIXME: This is bad, but I don't think the function ever gets called anyway */
620
657k
   for (i=0;i<nsf;i++)
621
641k
      exc[i]=SHL32(EXTEND32(speex_rand(1, seed)),SIG_SHIFT);
622
16.0k
}
noise_codebook_unquant
Line
Count
Source
617
16.0k
{
618
16.0k
   int i;
619
   /* FIXME: This is bad, but I don't think the function ever gets called anyway */
620
657k
   for (i=0;i<nsf;i++)
621
641k
      exc[i]=SHL32(EXTEND32(speex_rand(1, seed)),SIG_SHIFT);
622
16.0k
}
623
#endif /* DISABLE_DECODER */