/src/opus/celt/pitch.c

Source
/* Copyright (c) 2007-2008 CSIRO
   Copyright (c) 2007-2009 Xiph.Org Foundation
   Written by Jean-Marc Valin */
/**
   @file pitch.c
   @brief Pitch analysis
 */

/*
   Redistribution and use in source and binary forms, with or without
   modification, are permitted provided that the following conditions
   are met:

   - Redistributions of source code must retain the above copyright
   notice, this list of conditions and the following disclaimer.

   - Redistributions in binary form must reproduce the above copyright
   notice, this list of conditions and the following disclaimer in the
   documentation and/or other materials provided with the distribution.

   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/

#ifdef HAVE_CONFIG_H
#include "config.h"
#endif

#include "pitch.h"
#include "os_support.h"
#include "modes.h"
#include "stack_alloc.h"
#include "mathops.h"
#include "celt_lpc.h"

static void find_best_pitch(opus_val32 *xcorr, opus_val16 *y, int len,
                            int max_pitch, int *best_pitch
#ifdef FIXED_POINT
                            , int yshift, opus_val32 maxcorr
#endif
                            )
{
   int i, j;
   opus_val32 Syy=1;
   opus_val16 best_num[2];
   opus_val32 best_den[2];
#ifdef FIXED_POINT
   int xshift;

   xshift = celt_ilog2(maxcorr)-14;
#endif

   best_num[0] = -1;
   best_num[1] = -1;
   best_den[0] = 0;
   best_den[1] = 0;
   best_pitch[0] = 0;
   best_pitch[1] = 1;
   for (j=0;j<len;j++)
      Syy = ADD32(Syy, SHR32(MULT16_16(y[j],y[j]), yshift));
   for (i=0;i<max_pitch;i++)
   {
      if (xcorr[i]>0)
      {
         opus_val16 num;
         opus_val32 xcorr16;
         xcorr16 = EXTRACT16(VSHR32(xcorr[i], xshift));
#ifndef FIXED_POINT
         /* Considering the range of xcorr16, this should avoid both underflows
            and overflows (inf) when squaring xcorr16 */
         xcorr16 *= 1e-12f;
#endif
         num = MULT16_16_Q15(xcorr16,xcorr16);
         if (MULT16_32_Q15(num,best_den[1]) > MULT16_32_Q15(best_num[1],Syy))
         {
            if (MULT16_32_Q15(num,best_den[0]) > MULT16_32_Q15(best_num[0],Syy))
            {
               best_num[1] = best_num[0];
               best_den[1] = best_den[0];
               best_pitch[1] = best_pitch[0];
               best_num[0] = num;
               best_den[0] = Syy;
               best_pitch[0] = i;
            } else {
               best_num[1] = num;
               best_den[1] = Syy;
               best_pitch[1] = i;
            }
         }
      }
      Syy += SHR32(MULT16_16(y[i+len],y[i+len]),yshift) - SHR32(MULT16_16(y[i],y[i]),yshift);
      Syy = MAX32(1, Syy);
   }
}

static void celt_fir5(opus_val16 *x,
         const opus_val16 *num,
         int N)
{
   int i;
   opus_val16 num0, num1, num2, num3, num4;
   opus_val32 mem0, mem1, mem2, mem3, mem4;
   num0=num[0];
   num1=num[1];
   num2=num[2];
   num3=num[3];
   num4=num[4];
   mem0=0;
   mem1=0;
   mem2=0;
   mem3=0;
   mem4=0;
   for (i=0;i<N;i++)
   {
      opus_val32 sum = SHL32(EXTEND32(x[i]), SIG_SHIFT);
      sum = MAC16_16(sum,num0,mem0);
      sum = MAC16_16(sum,num1,mem1);
      sum = MAC16_16(sum,num2,mem2);
      sum = MAC16_16(sum,num3,mem3);
      sum = MAC16_16(sum,num4,mem4);
      mem4 = mem3;
      mem3 = mem2;
      mem2 = mem1;
      mem1 = mem0;
      mem0 = x[i];
      x[i] = ROUND16(sum, SIG_SHIFT);
   }
}


void pitch_downsample(celt_sig * OPUS_RESTRICT x[], opus_val16 * OPUS_RESTRICT x_lp,
      int len, int C, int factor, int arch)
{
   int i;
   opus_val32 ac[5];
   opus_val16 tmp=Q15ONE;
   opus_val16 lpc[4];
   opus_val16 lpc2[5];
   opus_val16 c1 = QCONST16(.8f,15);
   int offset;
#ifdef FIXED_POINT
   int shift;
   opus_val32 maxabs;
#endif
   offset = factor/2;
#ifdef FIXED_POINT
   maxabs = celt_maxabs32(x[0], len*factor);
   if (C==2)
   {
      opus_val32 maxabs_1 = celt_maxabs32(x[1], len*factor);
      maxabs = MAX32(maxabs, maxabs_1);
   }
   if (maxabs<1)
      maxabs=1;
   shift = celt_ilog2(maxabs)-10;
   if (shift<0)
      shift=0;
   if (C==2)
      shift++;
   for (i=1;i<len;i++)
      x_lp[i] = SHR32(x[0][(factor*i-offset)], shift+2) + SHR32(x[0][(factor*i+offset)], shift+2) + SHR32(x[0][factor*i], shift+1);
   x_lp[0] = SHR32(x[0][offset], shift+2) + SHR32(x[0][0], shift+1);
   if (C==2)
   {
      for (i=1;i<len;i++)
         x_lp[i] += SHR32(x[1][(factor*i-offset)], shift+2) + SHR32(x[1][(factor*i+offset)], shift+2) + SHR32(x[1][factor*i], shift+1);
      x_lp[0] += SHR32(x[1][offset], shift+2) + SHR32(x[1][0], shift+1);
   }
#else
   for (i=1;i<len;i++)
      x_lp[i] = .25f*x[0][(factor*i-offset)] + .25f*x[0][(factor*i+offset)] + .5f*x[0][factor*i];
   x_lp[0] = .25f*x[0][offset] + .5f*x[0][0];
   if (C==2)
   {
      for (i=1;i<len;i++)
         x_lp[i] += .25f*x[1][(factor*i-offset)] + .25f*x[1][(factor*i+offset)] + .5f*x[1][factor*i];
      x_lp[0] += .25f*x[1][offset] + .5f*x[1][0];
   }
#endif
   _celt_autocorr(x_lp, ac, NULL, 0,
                  4, len, arch);

   /* Noise floor -40 dB */
#ifdef FIXED_POINT
   ac[0] += SHR32(ac[0],13);
#else
   ac[0] *= 1.0001f;
#endif
   /* Lag windowing */
   for (i=1;i<=4;i++)
   {
      /*ac[i] *= exp(-.5*(2*M_PI*.002*i)*(2*M_PI*.002*i));*/
#ifdef FIXED_POINT
      ac[i] -= MULT16_32_Q15(2*i*i, ac[i]);
#else
      ac[i] -= ac[i]*(.008f*i)*(.008f*i);
#endif
   }

   _celt_lpc(lpc, ac, 4);
   for (i=0;i<4;i++)
   {
      tmp = MULT16_16_Q15(QCONST16(.9f,15), tmp);
      lpc[i] = MULT16_16_Q15(lpc[i], tmp);
   }
   /* Add a zero */
   lpc2[0] = lpc[0] + QCONST16(.8f,SIG_SHIFT);
   lpc2[1] = lpc[1] + MULT16_16_Q15(c1,lpc[0]);
   lpc2[2] = lpc[2] + MULT16_16_Q15(c1,lpc[1]);
   lpc2[3] = lpc[3] + MULT16_16_Q15(c1,lpc[2]);
   lpc2[4] = MULT16_16_Q15(c1,lpc[3]);
   celt_fir5(x_lp, lpc2, len);
}

/* Pure C implementation. */
#ifdef FIXED_POINT
opus_val32
#else
void
#endif
celt_pitch_xcorr_c(const opus_val16 *_x, const opus_val16 *_y,
      opus_val32 *xcorr, int len, int max_pitch, int arch)
{

#if 0 /* This is a simple version of the pitch correlation that should work
         well on DSPs like Blackfin and TI C5x/C6x */
   int i, j;
#ifdef FIXED_POINT
   opus_val32 maxcorr=1;
#endif
#if !defined(OVERRIDE_PITCH_XCORR)
   (void)arch;
#endif
   for (i=0;i<max_pitch;i++)
   {
      opus_val32 sum = 0;
      for (j=0;j<len;j++)
         sum = MAC16_16(sum, _x[j], _y[i+j]);
      xcorr[i] = sum;
#ifdef FIXED_POINT
      maxcorr = MAX32(maxcorr, sum);
#endif
   }
#ifdef FIXED_POINT
   return maxcorr;
#endif

#else /* Unrolled version of the pitch correlation -- runs faster on x86 and ARM */
   int i;
   /*The EDSP version requires that max_pitch is at least 1, and that _x is
      32-bit aligned.
     Since it's hard to put asserts in assembly, put them here.*/
#ifdef FIXED_POINT
   opus_val32 maxcorr=1;
#endif
   celt_assert(max_pitch>0);
   celt_sig_assert(((size_t)_x&3)==0);
   for (i=0;i<max_pitch-3;i+=4)
   {
      opus_val32 sum[4]={0,0,0,0};
#if defined(OPUS_CHECK_ASM) && defined(FIXED_POINT)
      {
         opus_val32 sum_c[4]={0,0,0,0};
         xcorr_kernel_c(_x, _y+i, sum_c, len);
#endif
         xcorr_kernel(_x, _y+i, sum, len, arch);
#if defined(OPUS_CHECK_ASM) && defined(FIXED_POINT)
         celt_assert(memcmp(sum, sum_c, sizeof(sum)) == 0);
      }
#endif
      xcorr[i]=sum[0];
      xcorr[i+1]=sum[1];
      xcorr[i+2]=sum[2];
      xcorr[i+3]=sum[3];
#ifdef FIXED_POINT
      sum[0] = MAX32(sum[0], sum[1]);
      sum[2] = MAX32(sum[2], sum[3]);
      sum[0] = MAX32(sum[0], sum[2]);
      maxcorr = MAX32(maxcorr, sum[0]);
#endif
   }
   /* In case max_pitch isn't a multiple of 4, do non-unrolled version. */
   for (;i<max_pitch;i++)
   {
      opus_val32 sum;
      sum = celt_inner_prod(_x, _y+i, len, arch);
      xcorr[i] = sum;
#ifdef FIXED_POINT
      maxcorr = MAX32(maxcorr, sum);
#endif
   }
#ifdef FIXED_POINT
   return maxcorr;
#endif
#endif
}

void pitch_search(const opus_val16 * OPUS_RESTRICT x_lp, opus_val16 * OPUS_RESTRICT y,
                  int len, int max_pitch, int *pitch, int arch)
{
   int i, j;
   int lag;
   int best_pitch[2]={0,0};
   VARDECL(opus_val16, x_lp4);
   VARDECL(opus_val16, y_lp4);
   VARDECL(opus_val32, xcorr);
#ifdef FIXED_POINT
   opus_val32 maxcorr;
   opus_val32 xmax, ymax;
   int shift=0;
#endif
   int offset;

   SAVE_STACK;

   celt_assert(len>0);
   celt_assert(max_pitch>0);
   lag = len+max_pitch;

   ALLOC(x_lp4, len>>2, opus_val16);
   ALLOC(y_lp4, lag>>2, opus_val16);
   ALLOC(xcorr, max_pitch>>1, opus_val32);

   /* Downsample by 2 again */
   for (j=0;j<len>>2;j++)
      x_lp4[j] = x_lp[2*j];
   for (j=0;j<lag>>2;j++)
      y_lp4[j] = y[2*j];

#ifdef FIXED_POINT
   xmax = celt_maxabs16(x_lp4, len>>2);
   ymax = celt_maxabs16(y_lp4, lag>>2);
   shift = celt_ilog2(MAX32(1, MAX32(xmax, ymax))) - 14 + celt_ilog2(len)/2;
   if (shift>0)
   {
      for (j=0;j<len>>2;j++)
         x_lp4[j] = SHR16(x_lp4[j], shift);
      for (j=0;j<lag>>2;j++)
         y_lp4[j] = SHR16(y_lp4[j], shift);
      /* Use double the shift for a MAC */
      shift *= 2;
   } else {
      shift = 0;
   }
#endif

   /* Coarse search with 4x decimation */

#ifdef FIXED_POINT
   maxcorr =
#endif
   celt_pitch_xcorr(x_lp4, y_lp4, xcorr, len>>2, max_pitch>>2, arch);

   find_best_pitch(xcorr, y_lp4, len>>2, max_pitch>>2, best_pitch
#ifdef FIXED_POINT
                   , 0, maxcorr
#endif
                   );

   /* Finer search with 2x decimation */
#ifdef FIXED_POINT
   maxcorr=1;
#endif
   for (i=0;i<max_pitch>>1;i++)
   {
      opus_val32 sum;
      xcorr[i] = 0;
      if (abs(i-2*best_pitch[0])>2 && abs(i-2*best_pitch[1])>2)
         continue;
#ifdef FIXED_POINT
      sum = 0;
      for (j=0;j<len>>1;j++)
         sum += SHR32(MULT16_16(x_lp[j],y[i+j]), shift);
#else
      sum = celt_inner_prod(x_lp, y+i, len>>1, arch);
#endif
      xcorr[i] = MAX32(-1, sum);
#ifdef FIXED_POINT
      maxcorr = MAX32(maxcorr, sum);
#endif
   }
   find_best_pitch(xcorr, y, len>>1, max_pitch>>1, best_pitch
#ifdef FIXED_POINT
                   , shift+1, maxcorr
#endif
                   );

   /* Refine by pseudo-interpolation */
   if (best_pitch[0]>0 && best_pitch[0]<(max_pitch>>1)-1)
   {
      opus_val32 a, b, c;
      a = xcorr[best_pitch[0]-1];
      b = xcorr[best_pitch[0]];
      c = xcorr[best_pitch[0]+1];
      if ((c-a) > MULT16_32_Q15(QCONST16(.7f,15),b-a))
         offset = 1;
      else if ((a-c) > MULT16_32_Q15(QCONST16(.7f,15),b-c))
         offset = -1;
      else
         offset = 0;
   } else {
      offset = 0;
   }
   *pitch = 2*best_pitch[0]-offset;

   RESTORE_STACK;
}

#ifdef FIXED_POINT
static opus_val16 compute_pitch_gain(opus_val32 xy, opus_val32 xx, opus_val32 yy)
{
   opus_val32 x2y2;
   int sx, sy, shift;
   opus_val32 g;
   opus_val16 den;
   if (xy == 0 || xx == 0 || yy == 0)
      return 0;
   sx = celt_ilog2(xx)-14;
   sy = celt_ilog2(yy)-14;
   shift = sx + sy;
   x2y2 = SHR32(MULT16_16(VSHR32(xx, sx), VSHR32(yy, sy)), 14);
   if (shift & 1) {
      if (x2y2 < 32768)
      {
         x2y2 <<= 1;
         shift--;
      } else {
         x2y2 >>= 1;
         shift++;
      }
   }
   den = celt_rsqrt_norm(x2y2);
   g = MULT16_32_Q15(den, xy);
   g = VSHR32(g, (shift>>1)-1);
   return EXTRACT16(MAX32(-Q15ONE, MIN32(g, Q15ONE)));
}
#else
static opus_val16 compute_pitch_gain(opus_val32 xy, opus_val32 xx, opus_val32 yy)
{
   return xy/celt_sqrt(1+xx*yy);
}
#endif

static const int second_check[16] = {0, 0, 3, 2, 3, 2, 5, 2, 3, 2, 3, 2, 5, 2, 3, 2};
opus_val16 remove_doubling(opus_val16 *x, int maxperiod, int minperiod,
      int N, int *T0_, int prev_period, opus_val16 prev_gain, int arch)
{
   int k, i, T, T0;
   opus_val16 g, g0;
   opus_val16 pg;
   opus_val32 xy,xx,yy,xy2;
   opus_val32 xcorr[3];
   opus_val32 best_xy, best_yy;
   int offset;
   int minperiod0;
   VARDECL(opus_val32, yy_lookup);
   SAVE_STACK;

   minperiod0 = minperiod;
   maxperiod /= 2;
   minperiod /= 2;
   *T0_ /= 2;
   prev_period /= 2;
   N /= 2;
   x += maxperiod;
   if (*T0_>=maxperiod)
      *T0_=maxperiod-1;

   T = T0 = *T0_;
   ALLOC(yy_lookup, maxperiod+1, opus_val32);
   dual_inner_prod(x, x, x-T0, N, &xx, &xy, arch);
   yy_lookup[0] = xx;
   yy=xx;
   for (i=1;i<=maxperiod;i++)
   {
      yy = yy+MULT16_16(x[-i],x[-i])-MULT16_16(x[N-i],x[N-i]);
      yy_lookup[i] = MAX32(0, yy);
   }
   yy = yy_lookup[T0];
   best_xy = xy;
   best_yy = yy;
   g = g0 = compute_pitch_gain(xy, xx, yy);
   /* Look for any pitch at T/k */
   for (k=2;k<=15;k++)
   {
      int T1, T1b;
      opus_val16 g1;
      opus_val16 cont=0;
      opus_val16 thresh;
      T1 = celt_udiv(2*T0+k, 2*k);
      if (T1 < minperiod)
         break;
      /* Look for another strong correlation at T1b */
      if (k==2)
      {
         if (T1+T0>maxperiod)
            T1b = T0;
         else
            T1b = T0+T1;
      } else
      {
         T1b = celt_udiv(2*second_check[k]*T0+k, 2*k);
      }
      dual_inner_prod(x, &x[-T1], &x[-T1b], N, &xy, &xy2, arch);
      xy = HALF32(xy + xy2);
      yy = HALF32(yy_lookup[T1] + yy_lookup[T1b]);
      g1 = compute_pitch_gain(xy, xx, yy);
      if (abs(T1-prev_period)<=1)
         cont = prev_gain;
      else if (abs(T1-prev_period)<=2 && 5*k*k < T0)
         cont = HALF16(prev_gain);
      else
         cont = 0;
      thresh = MAX16(QCONST16(.3f,15), MULT16_16_Q15(QCONST16(.7f,15),g0)-cont);
      /* Bias against very high pitch (very short period) to avoid false-positives
         due to short-term correlation */
      if (T1<3*minperiod)
         thresh = MAX16(QCONST16(.4f,15), MULT16_16_Q15(QCONST16(.85f,15),g0)-cont);
      else if (T1<2*minperiod)
         thresh = MAX16(QCONST16(.5f,15), MULT16_16_Q15(QCONST16(.9f,15),g0)-cont);
      if (g1 > thresh)
      {
         best_xy = xy;
         best_yy = yy;
         T = T1;
         g = g1;
      }
   }
   best_xy = MAX32(0, best_xy);
   if (best_yy <= best_xy)
      pg = Q15ONE;
   else
      pg = SHR32(frac_div32(best_xy,best_yy+1),16);

   for (k=0;k<3;k++)
      xcorr[k] = celt_inner_prod(x, x-(T+k-1), N, arch);
   if ((xcorr[2]-xcorr[0]) > MULT16_32_Q15(QCONST16(.7f,15),xcorr[1]-xcorr[0]))
      offset = 1;
   else if ((xcorr[0]-xcorr[2]) > MULT16_32_Q15(QCONST16(.7f,15),xcorr[1]-xcorr[2]))
      offset = -1;
   else
      offset = 0;
   if (pg > g)
      pg = g;
   *T0_ = 2*T+offset;

   if (*T0_<minperiod0)
      *T0_=minperiod0;
   RESTORE_STACK;
   return pg;
}

Coverage Report

Created: 2025-11-16 07:20

Line	Count	Source
1		/* Copyright (c) 2007-2008 CSIRO
2		Copyright (c) 2007-2009 Xiph.Org Foundation
3		Written by Jean-Marc Valin */
4		/**
5		@file pitch.c
6		@brief Pitch analysis
7		*/
8
9		/*
10		Redistribution and use in source and binary forms, with or without
11		modification, are permitted provided that the following conditions
12		are met:
13
14		- Redistributions of source code must retain the above copyright
15		notice, this list of conditions and the following disclaimer.
16
17		- Redistributions in binary form must reproduce the above copyright
18		notice, this list of conditions and the following disclaimer in the
19		documentation and/or other materials provided with the distribution.
20
21		THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22		``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23		LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24		A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
25		OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
26		EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
27		PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
28		PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
29		LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
30		NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
31		SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32		*/
33
34		#ifdef HAVE_CONFIG_H
35		#include "config.h"
36		#endif
37
38		#include "pitch.h"
39		#include "os_support.h"
40		#include "modes.h"
41		#include "stack_alloc.h"
42		#include "mathops.h"
43		#include "celt_lpc.h"
44
45		static void find_best_pitch(opus_val32 xcorr, opus_val16 y, int len,
46		int max_pitch, int *best_pitch
47		#ifdef FIXED_POINT
48		, int yshift, opus_val32 maxcorr
49		#endif
50		)
51	36.2k	{
52	36.2k	int i, j;
53	36.2k	opus_val32 Syy=1;
54	36.2k	opus_val16 best_num[2];
55	36.2k	opus_val32 best_den[2];
56		#ifdef FIXED_POINT
57		int xshift;
58
59		xshift = celt_ilog2(maxcorr)-14;
60		#endif
61
62	36.2k	best_num[0] = -1;
63	36.2k	best_num[1] = -1;
64	36.2k	best_den[0] = 0;
65	36.2k	best_den[1] = 0;
66	36.2k	best_pitch[0] = 0;
67	36.2k	best_pitch[1] = 1;
68	18.0M	for (j=0;j<len;j++)
69	18.0M	Syy = ADD32(Syy, SHR32(MULT16_16(y[j],y[j]), yshift));
70	8.45M	for (i=0;i<max_pitch;i++)
71	8.42M	{
72	8.42M	if (xcorr[i]>0)
73	1.19M	{
74	1.19M	opus_val16 num;
75	1.19M	opus_val32 xcorr16;
76	1.19M	xcorr16 = EXTRACT16(VSHR32(xcorr[i], xshift));
77	1.19M	#ifndef FIXED_POINT
78		/* Considering the range of xcorr16, this should avoid both underflows
79		and overflows (inf) when squaring xcorr16 */
80	1.19M	xcorr16 *= 1e-12f;
81	1.19M	#endif
82	1.19M	num = MULT16_16_Q15(xcorr16,xcorr16);
83	1.19M	if (MULT16_32_Q15(num,best_den[1]) > MULT16_32_Q15(best_num[1],Syy))
84	303k	{
85	303k	if (MULT16_32_Q15(num,best_den[0]) > MULT16_32_Q15(best_num[0],Syy))
86	215k	{
87	215k	best_num[1] = best_num[0];
88	215k	best_den[1] = best_den[0];
89	215k	best_pitch[1] = best_pitch[0];
90	215k	best_num[0] = num;
91	215k	best_den[0] = Syy;
92	215k	best_pitch[0] = i;
93	215k	} else {
94	87.6k	best_num[1] = num;
95	87.6k	best_den[1] = Syy;
96	87.6k	best_pitch[1] = i;
97	87.6k	}
98	303k	}
99	1.19M	}
100	8.42M	Syy += SHR32(MULT16_16(y[i+len],y[i+len]),yshift) - SHR32(MULT16_16(y[i],y[i]),yshift);
101	8.42M	Syy = MAX32(1, Syy);
102	8.42M	}
103	36.2k	}
104
105		static void celt_fir5(opus_val16 *x,
106		const opus_val16 *num,
107		int N)
108	18.1k	{
109	18.1k	int i;
110	18.1k	opus_val16 num0, num1, num2, num3, num4;
111	18.1k	opus_val32 mem0, mem1, mem2, mem3, mem4;
112	18.1k	num0=num[0];
113	18.1k	num1=num[1];
114	18.1k	num2=num[2];
115	18.1k	num3=num[3];
116	18.1k	num4=num[4];
117	18.1k	mem0=0;
118	18.1k	mem1=0;
119	18.1k	mem2=0;
120	18.1k	mem3=0;
121	18.1k	mem4=0;
122	18.5M	for (i=0;i<N;i++)
123	18.5M	{
124	18.5M	opus_val32 sum = SHL32(EXTEND32(x[i]), SIG_SHIFT);
125	18.5M	sum = MAC16_16(sum,num0,mem0);
126	18.5M	sum = MAC16_16(sum,num1,mem1);
127	18.5M	sum = MAC16_16(sum,num2,mem2);
128	18.5M	sum = MAC16_16(sum,num3,mem3);
129	18.5M	sum = MAC16_16(sum,num4,mem4);
130	18.5M	mem4 = mem3;
131	18.5M	mem3 = mem2;
132	18.5M	mem2 = mem1;
133	18.5M	mem1 = mem0;
134	18.5M	mem0 = x[i];
135	18.5M	x[i] = ROUND16(sum, SIG_SHIFT);
136	18.5M	}
137	18.1k	}
138
139
140		void pitch_downsample(celt_sig * OPUS_RESTRICT x[], opus_val16 * OPUS_RESTRICT x_lp,
141		int len, int C, int factor, int arch)
142	18.1k	{
143	18.1k	int i;
144	18.1k	opus_val32 ac[5];
145	18.1k	opus_val16 tmp=Q15ONE;
146	18.1k	opus_val16 lpc[4];
147	18.1k	opus_val16 lpc2[5];
148	18.1k	opus_val16 c1 = QCONST16(.8f,15);
149	18.1k	int offset;
150		#ifdef FIXED_POINT
151		int shift;
152		opus_val32 maxabs;
153		#endif
154	18.1k	offset = factor/2;
155		#ifdef FIXED_POINT
156		maxabs = celt_maxabs32(x[0], len*factor);
157		if (C==2)
158		{
159		opus_val32 maxabs_1 = celt_maxabs32(x[1], len*factor);
160		maxabs = MAX32(maxabs, maxabs_1);
161		}
162		if (maxabs<1)
163		maxabs=1;
164		shift = celt_ilog2(maxabs)-10;
165		if (shift<0)
166		shift=0;
167		if (C==2)
168		shift++;
169		for (i=1;i<len;i++)
170		x_lp[i] = SHR32(x[0][(factori-offset)], shift+2) + SHR32(x[0][(factori+offset)], shift+2) + SHR32(x[0][factor*i], shift+1);
171		x_lp[0] = SHR32(x[0][offset], shift+2) + SHR32(x[0][0], shift+1);
172		if (C==2)
173		{
174		for (i=1;i<len;i++)
175		x_lp[i] += SHR32(x[1][(factori-offset)], shift+2) + SHR32(x[1][(factori+offset)], shift+2) + SHR32(x[1][factor*i], shift+1);
176		x_lp[0] += SHR32(x[1][offset], shift+2) + SHR32(x[1][0], shift+1);
177		}
178		#else
179	18.5M	for (i=1;i<len;i++)
180	18.5M	x_lp[i] = .25fx[0][(factori-offset)] + .25fx[0][(factori+offset)] + .5fx[0][factori];
181	18.1k	x_lp[0] = .25fx[0][offset] + .5fx[0][0];
182	18.1k	if (C==2)
183	14.9k	{
184	15.2M	for (i=1;i<len;i++)
185	15.2M	x_lp[i] += .25fx[1][(factori-offset)] + .25fx[1][(factori+offset)] + .5fx[1][factori];
186	14.9k	x_lp[0] += .25fx[1][offset] + .5fx[1][0];
187	14.9k	}
188	18.1k	#endif
189	18.1k	_celt_autocorr(x_lp, ac, NULL, 0,
190	18.1k	4, len, arch);
191
192		/* Noise floor -40 dB */
193		#ifdef FIXED_POINT
194		ac[0] += SHR32(ac[0],13);
195		#else
196	18.1k	ac[0] *= 1.0001f;
197	18.1k	#endif
198		/* Lag windowing */
199	90.5k	for (i=1;i<=4;i++)
200	72.4k	{
201		/ac[i] = exp(-.5(2M_PI.002i)(2M_PI.002i));*/
202		#ifdef FIXED_POINT
203		ac[i] -= MULT16_32_Q15(2ii, ac[i]);
204		#else
205	72.4k	ac[i] -= ac[i](.008fi)(.008fi);
206	72.4k	#endif
207	72.4k	}
208
209	18.1k	_celt_lpc(lpc, ac, 4);
210	90.5k	for (i=0;i<4;i++)
211	72.4k	{
212	72.4k	tmp = MULT16_16_Q15(QCONST16(.9f,15), tmp);
213	72.4k	lpc[i] = MULT16_16_Q15(lpc[i], tmp);
214	72.4k	}
215		/* Add a zero */
216	18.1k	lpc2[0] = lpc[0] + QCONST16(.8f,SIG_SHIFT);
217	18.1k	lpc2[1] = lpc[1] + MULT16_16_Q15(c1,lpc[0]);
218	18.1k	lpc2[2] = lpc[2] + MULT16_16_Q15(c1,lpc[1]);
219	18.1k	lpc2[3] = lpc[3] + MULT16_16_Q15(c1,lpc[2]);
220	18.1k	lpc2[4] = MULT16_16_Q15(c1,lpc[3]);
221	18.1k	celt_fir5(x_lp, lpc2, len);
222	18.1k	}
223
224		/* Pure C implementation. */
225		#ifdef FIXED_POINT
226		opus_val32
227		#else
228		void
229		#endif
230		celt_pitch_xcorr_c(const opus_val16 _x, const opus_val16 _y,
231		opus_val32 *xcorr, int len, int max_pitch, int arch)
232	0	{
233
234		#if 0 /* This is a simple version of the pitch correlation that should work
235		well on DSPs like Blackfin and TI C5x/C6x */
236		int i, j;
237		#ifdef FIXED_POINT
238		opus_val32 maxcorr=1;
239		#endif
240		#if !defined(OVERRIDE_PITCH_XCORR)
241		(void)arch;
242		#endif
243		for (i=0;i<max_pitch;i++)
244		{
245		opus_val32 sum = 0;
246		for (j=0;j<len;j++)
247		sum = MAC16_16(sum, _x[j], _y[i+j]);
248		xcorr[i] = sum;
249		#ifdef FIXED_POINT
250		maxcorr = MAX32(maxcorr, sum);
251		#endif
252		}
253		#ifdef FIXED_POINT
254		return maxcorr;
255		#endif
256
257		#else /* Unrolled version of the pitch correlation -- runs faster on x86 and ARM */
258	0	int i;
259		/*The EDSP version requires that max_pitch is at least 1, and that _x is
260		32-bit aligned.
261		Since it's hard to put asserts in assembly, put them here.*/
262		#ifdef FIXED_POINT
263		opus_val32 maxcorr=1;
264		#endif
265	0	celt_assert(max_pitch>0);
266	0	celt_sig_assert(((size_t)_x&3)==0);
267	0	for (i=0;i<max_pitch-3;i+=4)
268	0	{
269	0	opus_val32 sum[4]={0,0,0,0};
270		#if defined(OPUS_CHECK_ASM) && defined(FIXED_POINT)
271		{
272		opus_val32 sum_c[4]={0,0,0,0};
273		xcorr_kernel_c(_x, _y+i, sum_c, len);
274		#endif
275	0	xcorr_kernel(_x, _y+i, sum, len, arch);
276		#if defined(OPUS_CHECK_ASM) && defined(FIXED_POINT)
277		celt_assert(memcmp(sum, sum_c, sizeof(sum)) == 0);
278		}
279		#endif
280	0	xcorr[i]=sum[0];
281	0	xcorr[i+1]=sum[1];
282	0	xcorr[i+2]=sum[2];
283	0	xcorr[i+3]=sum[3];
284		#ifdef FIXED_POINT
285		sum[0] = MAX32(sum[0], sum[1]);
286		sum[2] = MAX32(sum[2], sum[3]);
287		sum[0] = MAX32(sum[0], sum[2]);
288		maxcorr = MAX32(maxcorr, sum[0]);
289		#endif
290	0	}
291		/* In case max_pitch isn't a multiple of 4, do non-unrolled version. */
292	0	for (;i<max_pitch;i++)
293	0	{
294	0	opus_val32 sum;
295	0	sum = celt_inner_prod(_x, _y+i, len, arch);
296	0	xcorr[i] = sum;
297		#ifdef FIXED_POINT
298		maxcorr = MAX32(maxcorr, sum);
299		#endif
300	0	}
301		#ifdef FIXED_POINT
302		return maxcorr;
303		#endif
304	0	#endif
305	0	}
306
307		void pitch_search(const opus_val16 * OPUS_RESTRICT x_lp, opus_val16 * OPUS_RESTRICT y,
308		int len, int max_pitch, int *pitch, int arch)
309	18.1k	{
310	18.1k	int i, j;
311	18.1k	int lag;
312	18.1k	int best_pitch[2]={0,0};
313	18.1k	VARDECL(opus_val16, x_lp4);
314	18.1k	VARDECL(opus_val16, y_lp4);
315	18.1k	VARDECL(opus_val32, xcorr);
316		#ifdef FIXED_POINT
317		opus_val32 maxcorr;
318		opus_val32 xmax, ymax;
319		int shift=0;
320		#endif
321	18.1k	int offset;
322
323	18.1k	SAVE_STACK;
324
325	18.1k	celt_assert(len>0);
326	18.1k	celt_assert(max_pitch>0);
327	18.1k	lag = len+max_pitch;
328
329	18.1k	ALLOC(x_lp4, len>>2, opus_val16);
330	18.1k	ALLOC(y_lp4, lag>>2, opus_val16);
331	18.1k	ALLOC(xcorr, max_pitch>>1, opus_val32);
332
333		/* Downsample by 2 again */
334	6.03M	for (j=0;j<len>>2;j++)
335	6.01M	x_lp4[j] = x_lp[2*j];
336	8.83M	for (j=0;j<lag>>2;j++)
337	8.82M	y_lp4[j] = y[2*j];
338
339		#ifdef FIXED_POINT
340		xmax = celt_maxabs16(x_lp4, len>>2);
341		ymax = celt_maxabs16(y_lp4, lag>>2);
342		shift = celt_ilog2(MAX32(1, MAX32(xmax, ymax))) - 14 + celt_ilog2(len)/2;
343		if (shift>0)
344		{
345		for (j=0;j<len>>2;j++)
346		x_lp4[j] = SHR16(x_lp4[j], shift);
347		for (j=0;j<lag>>2;j++)
348		y_lp4[j] = SHR16(y_lp4[j], shift);
349		/* Use double the shift for a MAC */
350		shift *= 2;
351		} else {
352		shift = 0;
353		}
354		#endif
355
356		/* Coarse search with 4x decimation */
357
358		#ifdef FIXED_POINT
359		maxcorr =
360		#endif
361	18.1k	celt_pitch_xcorr(x_lp4, y_lp4, xcorr, len>>2, max_pitch>>2, arch);
362
363	18.1k	find_best_pitch(xcorr, y_lp4, len>>2, max_pitch>>2, best_pitch
364		#ifdef FIXED_POINT
365		, 0, maxcorr
366		#endif
367	18.1k	);
368
369		/* Finer search with 2x decimation */
370		#ifdef FIXED_POINT
371		maxcorr=1;
372		#endif
373	5.63M	for (i=0;i<max_pitch>>1;i++)
374	5.61M	{
375	5.61M	opus_val32 sum;
376	5.61M	xcorr[i] = 0;
377	5.61M	if (abs(i-2best_pitch[0])>2 && abs(i-2best_pitch[1])>2)
378	5.45M	continue;
379		#ifdef FIXED_POINT
380		sum = 0;
381		for (j=0;j<len>>1;j++)
382		sum += SHR32(MULT16_16(x_lp[j],y[i+j]), shift);
383		#else
384	161k	sum = celt_inner_prod(x_lp, y+i, len>>1, arch);
385	161k	#endif
386	161k	xcorr[i] = MAX32(-1, sum);
387		#ifdef FIXED_POINT
388		maxcorr = MAX32(maxcorr, sum);
389		#endif
390	161k	}
391	18.1k	find_best_pitch(xcorr, y, len>>1, max_pitch>>1, best_pitch
392		#ifdef FIXED_POINT
393		, shift+1, maxcorr
394		#endif
395	18.1k	);
396
397		/* Refine by pseudo-interpolation */
398	18.1k	if (best_pitch[0]>0 && best_pitch[0]<(max_pitch>>1)-1)
399	16.3k	{
400	16.3k	opus_val32 a, b, c;
401	16.3k	a = xcorr[best_pitch[0]-1];
402	16.3k	b = xcorr[best_pitch[0]];
403	16.3k	c = xcorr[best_pitch[0]+1];
404	16.3k	if ((c-a) > MULT16_32_Q15(QCONST16(.7f,15),b-a))
405	2.60k	offset = 1;
406	13.7k	else if ((a-c) > MULT16_32_Q15(QCONST16(.7f,15),b-c))
407	3.70k	offset = -1;
408	10.0k	else
409	10.0k	offset = 0;
410	16.3k	} else {
411	1.72k	offset = 0;
412	1.72k	}
413	18.1k	pitch = 2best_pitch[0]-offset;
414
415	18.1k	RESTORE_STACK;
416	18.1k	}
417
418		#ifdef FIXED_POINT
419		static opus_val16 compute_pitch_gain(opus_val32 xy, opus_val32 xx, opus_val32 yy)
420		{
421		opus_val32 x2y2;
422		int sx, sy, shift;
423		opus_val32 g;
424		opus_val16 den;
425		if (xy == 0 \|\| xx == 0 \|\| yy == 0)
426		return 0;
427		sx = celt_ilog2(xx)-14;
428		sy = celt_ilog2(yy)-14;
429		shift = sx + sy;
430		x2y2 = SHR32(MULT16_16(VSHR32(xx, sx), VSHR32(yy, sy)), 14);
431		if (shift & 1) {
432		if (x2y2 < 32768)
433		{
434		x2y2 <<= 1;
435		shift--;
436		} else {
437		x2y2 >>= 1;
438		shift++;
439		}
440		}
441		den = celt_rsqrt_norm(x2y2);
442		g = MULT16_32_Q15(den, xy);
443		g = VSHR32(g, (shift>>1)-1);
444		return EXTRACT16(MAX32(-Q15ONE, MIN32(g, Q15ONE)));
445		}
446		#else
447		static opus_val16 compute_pitch_gain(opus_val32 xy, opus_val32 xx, opus_val32 yy)
448	0	{
449	0	return xy/celt_sqrt(1+xx*yy);
450	0	}
451		#endif
452
453		static const int second_check[16] = {0, 0, 3, 2, 3, 2, 5, 2, 3, 2, 3, 2, 5, 2, 3, 2};
454		opus_val16 remove_doubling(opus_val16 *x, int maxperiod, int minperiod,
455		int N, int *T0_, int prev_period, opus_val16 prev_gain, int arch)
456	0	{
457	0	int k, i, T, T0;
458	0	opus_val16 g, g0;
459	0	opus_val16 pg;
460	0	opus_val32 xy,xx,yy,xy2;
461	0	opus_val32 xcorr[3];
462	0	opus_val32 best_xy, best_yy;
463	0	int offset;
464	0	int minperiod0;
465	0	VARDECL(opus_val32, yy_lookup);
466	0	SAVE_STACK;
467
468	0	minperiod0 = minperiod;
469	0	maxperiod /= 2;
470	0	minperiod /= 2;
471	0	*T0_ /= 2;
472	0	prev_period /= 2;
473	0	N /= 2;
474	0	x += maxperiod;
475	0	if (*T0_>=maxperiod)
476	0	*T0_=maxperiod-1;
477
478	0	T = T0 = *T0_;
479	0	ALLOC(yy_lookup, maxperiod+1, opus_val32);
480	0	dual_inner_prod(x, x, x-T0, N, &xx, &xy, arch);
481	0	yy_lookup[0] = xx;
482	0	yy=xx;
483	0	for (i=1;i<=maxperiod;i++)
484	0	{
485	0	yy = yy+MULT16_16(x[-i],x[-i])-MULT16_16(x[N-i],x[N-i]);
486	0	yy_lookup[i] = MAX32(0, yy);
487	0	}
488	0	yy = yy_lookup[T0];
489	0	best_xy = xy;
490	0	best_yy = yy;
491	0	g = g0 = compute_pitch_gain(xy, xx, yy);
492		/* Look for any pitch at T/k */
493	0	for (k=2;k<=15;k++)
494	0	{
495	0	int T1, T1b;
496	0	opus_val16 g1;
497	0	opus_val16 cont=0;
498	0	opus_val16 thresh;
499	0	T1 = celt_udiv(2T0+k, 2k);
500	0	if (T1 < minperiod)
501	0	break;
502		/* Look for another strong correlation at T1b */
503	0	if (k==2)
504	0	{
505	0	if (T1+T0>maxperiod)
506	0	T1b = T0;
507	0	else
508	0	T1b = T0+T1;
509	0	} else
510	0	{
511	0	T1b = celt_udiv(2second_check[k]T0+k, 2*k);
512	0	}
513	0	dual_inner_prod(x, &x[-T1], &x[-T1b], N, &xy, &xy2, arch);
514	0	xy = HALF32(xy + xy2);
515	0	yy = HALF32(yy_lookup[T1] + yy_lookup[T1b]);
516	0	g1 = compute_pitch_gain(xy, xx, yy);
517	0	if (abs(T1-prev_period)<=1)
518	0	cont = prev_gain;
519	0	else if (abs(T1-prev_period)<=2 && 5kk < T0)
520	0	cont = HALF16(prev_gain);
521	0	else
522	0	cont = 0;
523	0	thresh = MAX16(QCONST16(.3f,15), MULT16_16_Q15(QCONST16(.7f,15),g0)-cont);
524		/* Bias against very high pitch (very short period) to avoid false-positives
525		due to short-term correlation */
526	0	if (T1<3*minperiod)
527	0	thresh = MAX16(QCONST16(.4f,15), MULT16_16_Q15(QCONST16(.85f,15),g0)-cont);
528	0	else if (T1<2*minperiod)
529	0	thresh = MAX16(QCONST16(.5f,15), MULT16_16_Q15(QCONST16(.9f,15),g0)-cont);
530	0	if (g1 > thresh)
531	0	{
532	0	best_xy = xy;
533	0	best_yy = yy;
534	0	T = T1;
535	0	g = g1;
536	0	}
537	0	}
538	0	best_xy = MAX32(0, best_xy);
539	0	if (best_yy <= best_xy)
540	0	pg = Q15ONE;
541	0	else
542	0	pg = SHR32(frac_div32(best_xy,best_yy+1),16);
543
544	0	for (k=0;k<3;k++)
545	0	xcorr[k] = celt_inner_prod(x, x-(T+k-1), N, arch);
546	0	if ((xcorr[2]-xcorr[0]) > MULT16_32_Q15(QCONST16(.7f,15),xcorr[1]-xcorr[0]))
547	0	offset = 1;
548	0	else if ((xcorr[0]-xcorr[2]) > MULT16_32_Q15(QCONST16(.7f,15),xcorr[1]-xcorr[2]))
549	0	offset = -1;
550	0	else
551	0	offset = 0;
552	0	if (pg > g)
553	0	pg = g;
554	0	T0_ = 2T+offset;
555
556	0	if (*T0_<minperiod0)
557	0	*T0_=minperiod0;
558	0	RESTORE_STACK;
559	0	return pg;
560	0	}