/src/aac/libFDK/src/dct.cpp

Source (jump to first uncovered line)
/* -----------------------------------------------------------------------------
Software License for The Fraunhofer FDK AAC Codec Library for Android

© Copyright  1995 - 2020 Fraunhofer-Gesellschaft zur Förderung der angewandten
Forschung e.V. All rights reserved.

 1.    INTRODUCTION
The Fraunhofer FDK AAC Codec Library for Android ("FDK AAC Codec") is software
that implements the MPEG Advanced Audio Coding ("AAC") encoding and decoding
scheme for digital audio. This FDK AAC Codec software is intended to be used on
a wide variety of Android devices.

AAC's HE-AAC and HE-AAC v2 versions are regarded as today's most efficient
general perceptual audio codecs. AAC-ELD is considered the best-performing
full-bandwidth communications codec by independent studies and is widely
deployed. AAC has been standardized by ISO and IEC as part of the MPEG
specifications.

Patent licenses for necessary patent claims for the FDK AAC Codec (including
those of Fraunhofer) may be obtained through Via Licensing
(www.vialicensing.com) or through the respective patent owners individually for
the purpose of encoding or decoding bit streams in products that are compliant
with the ISO/IEC MPEG audio standards. Please note that most manufacturers of
Android devices already license these patent claims through Via Licensing or
directly from the patent owners, and therefore FDK AAC Codec software may
already be covered under those patent licenses when it is used for those
licensed purposes only.

Commercially-licensed AAC software libraries, including floating-point versions
with enhanced sound quality, are also available from Fraunhofer. Users are
encouraged to check the Fraunhofer website for additional applications
information and documentation.

2.    COPYRIGHT LICENSE

Redistribution and use in source and binary forms, with or without modification,
are permitted without payment of copyright license fees provided that you
satisfy the following conditions:

You must retain the complete text of this software license in redistributions of
the FDK AAC Codec or your modifications thereto in source code form.

You must retain the complete text of this software license in the documentation
and/or other materials provided with redistributions of the FDK AAC Codec or
your modifications thereto in binary form. You must make available free of
charge copies of the complete source code of the FDK AAC Codec and your
modifications thereto to recipients of copies in binary form.

The name of Fraunhofer may not be used to endorse or promote products derived
from this library without prior written permission.

You may not charge copyright license fees for anyone to use, copy or distribute
the FDK AAC Codec software or your modifications thereto.

Your modified versions of the FDK AAC Codec must carry prominent notices stating
that you changed the software and the date of any change. For modified versions
of the FDK AAC Codec, the term "Fraunhofer FDK AAC Codec Library for Android"
must be replaced by the term "Third-Party Modified Version of the Fraunhofer FDK
AAC Codec Library for Android."

3.    NO PATENT LICENSE

NO EXPRESS OR IMPLIED LICENSES TO ANY PATENT CLAIMS, including without
limitation the patents of Fraunhofer, ARE GRANTED BY THIS SOFTWARE LICENSE.
Fraunhofer provides no warranty of patent non-infringement with respect to this
software.

You may use this FDK AAC Codec software or modifications thereto only for
purposes that are authorized by appropriate patent licenses.

4.    DISCLAIMER

This FDK AAC Codec software is provided by Fraunhofer on behalf of the copyright
holders and contributors "AS IS" and WITHOUT ANY EXPRESS OR IMPLIED WARRANTIES,
including but not limited to the implied warranties of merchantability and
fitness for a particular purpose. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
CONTRIBUTORS BE LIABLE for any direct, indirect, incidental, special, exemplary,
or consequential damages, including but not limited to procurement of substitute
goods or services; loss of use, data, or profits, or business interruption,
however caused and on any theory of liability, whether in contract, strict
liability, or tort (including negligence), arising in any way out of the use of
this software, even if advised of the possibility of such damage.

5.    CONTACT INFORMATION

Fraunhofer Institute for Integrated Circuits IIS
Attention: Audio and Multimedia Departments - FDK AAC LL
Am Wolfsmantel 33
91058 Erlangen, Germany

www.iis.fraunhofer.de/amm
amm-info@iis.fraunhofer.de
----------------------------------------------------------------------------- */

/******************* Library for basic calculation routines ********************

   Author(s):

   Description:

*******************************************************************************/

/*!
  \file   dct.cpp
  \brief  DCT Implementations
  Library functions to calculate standard DCTs. This will most likely be
  replaced by hand-optimized functions for the specific target processor.

  Three different implementations of the dct type II and the dct type III
  transforms are provided.

  By default implementations which are based on a single, standard complex
  FFT-kernel are used (dctII_f() and dctIII_f()). These are specifically helpful
  in cases where optimized FFT libraries are already available. The FFT used in
  these implementation is FFT rad2 from FDK_tools.

  Of course, one might also use DCT-libraries should they be available. The DCT
  and DST type IV implementations are only available in a version based on a
  complex FFT kernel.
*/

#include "dct.h"

#include "FDK_tools_rom.h"
#include "fft.h"

void dct_getTables(const FIXP_WTP **ptwiddle, const FIXP_STP **sin_twiddle,
                   int *sin_step, int length) {
  const FIXP_WTP *twiddle;
  int ld2_length;

  /* Get ld2 of length - 2 + 1
      -2: because first table entry is window of size 4
      +1: because we already include +1 because of ceil(log2(length)) */
  ld2_length = DFRACT_BITS - 1 - fNormz((FIXP_DBL)length) - 1;

  /* Extract sort of "eigenvalue" (the 4 left most bits) of length. */
  switch ((length) >> (ld2_length - 1)) {
    case 0x4: /* radix 2 */
      *sin_twiddle = SineTable1024;
      *sin_step = 1 << (10 - ld2_length);
      twiddle = windowSlopes[0][0][ld2_length - 1];
      break;
    case 0x7: /* 10 ms */
      *sin_twiddle = SineTable480;
      *sin_step = 1 << (8 - ld2_length);
      twiddle = windowSlopes[0][1][ld2_length];
      break;
    case 0x6: /* 3/4 of radix 2 */
      *sin_twiddle = SineTable384;
      *sin_step = 1 << (8 - ld2_length);
      twiddle = windowSlopes[0][2][ld2_length];
      break;
    case 0x5: /* 5/16 of radix 2*/
      *sin_twiddle = SineTable80;
      *sin_step = 1 << (6 - ld2_length);
      twiddle = windowSlopes[0][3][ld2_length];
      break;
    default:
      *sin_twiddle = NULL;
      *sin_step = 0;
      twiddle = NULL;
      break;
  }

  if (ptwiddle != NULL) {
    FDK_ASSERT(twiddle != NULL);
    *ptwiddle = twiddle;
  }

  FDK_ASSERT(*sin_step > 0);
}

#if !defined(FUNCTION_dct_III)
void dct_III(FIXP_DBL *pDat, /*!< pointer to input/output */
             FIXP_DBL *tmp,  /*!< pointer to temporal working buffer */
             int L,          /*!< lenght of transform */
             int *pDat_e) {
  const FIXP_WTP *sin_twiddle;
  int i;
  FIXP_DBL xr, accu1, accu2;
  int inc, index;
  int M = L >> 1;

  FDK_ASSERT(L % 4 == 0);
  dct_getTables(NULL, &sin_twiddle, &inc, L);
  inc >>= 1;

  FIXP_DBL *pTmp_0 = &tmp[2];
  FIXP_DBL *pTmp_1 = &tmp[(M - 1) * 2];

  index = 4 * inc;

  /* This loop performs multiplication for index i (i*inc) */
  for (i = 1; i<M>> 1; i++, pTmp_0 += 2, pTmp_1 -= 2) {
    FIXP_DBL accu3, accu4, accu5, accu6;

    cplxMultDiv2(&accu2, &accu1, pDat[L - i], pDat[i], sin_twiddle[i * inc]);
    cplxMultDiv2(&accu4, &accu3, pDat[M + i], pDat[M - i],
                 sin_twiddle[(M - i) * inc]);
    accu3 >>= 1;
    accu4 >>= 1;

    /* This method is better for ARM926, that uses operand2 shifted right by 1
     * always */
    if (2 * i < (M / 2)) {
      cplxMultDiv2(&accu6, &accu5, (accu3 - (accu1 >> 1)),
                   ((accu2 >> 1) + accu4), sin_twiddle[index]);
    } else {
      cplxMultDiv2(&accu6, &accu5, ((accu2 >> 1) + accu4),
                   (accu3 - (accu1 >> 1)), sin_twiddle[index]);
      accu6 = -accu6;
    }
    xr = (accu1 >> 1) + accu3;
    pTmp_0[0] = (xr >> 1) - accu5;
    pTmp_1[0] = (xr >> 1) + accu5;

    xr = (accu2 >> 1) - accu4;
    pTmp_0[1] = (xr >> 1) - accu6;
    pTmp_1[1] = -((xr >> 1) + accu6);

    /* Create index helper variables for (4*i)*inc indexed equivalent values of
     * short tables. */
    if (2 * i < ((M / 2) - 1)) {
      index += 4 * inc;
    } else if (2 * i >= ((M / 2))) {
      index -= 4 * inc;
    }
  }

  xr = fMultDiv2(pDat[M], sin_twiddle[M * inc].v.re); /* cos((PI/(2*L))*M); */
  tmp[0] = ((pDat[0] >> 1) + xr) >> 1;
  tmp[1] = ((pDat[0] >> 1) - xr) >> 1;

  cplxMultDiv2(&accu2, &accu1, pDat[L - (M / 2)], pDat[M / 2],
               sin_twiddle[M * inc / 2]);
  tmp[M] = accu1 >> 1;
  tmp[M + 1] = accu2 >> 1;

  /* dit_fft expects 1 bit scaled input values */
  fft(M, tmp, pDat_e);

  /* ARM926: 12 cycles per 2-iteration, no overhead code by compiler */
  pTmp_1 = &tmp[L];
  for (i = M >> 1; i--;) {
    FIXP_DBL tmp1, tmp2, tmp3, tmp4;
    tmp1 = *tmp++;
    tmp2 = *tmp++;
    tmp3 = *--pTmp_1;
    tmp4 = *--pTmp_1;
    *pDat++ = tmp1;
    *pDat++ = tmp3;
    *pDat++ = tmp2;
    *pDat++ = tmp4;
  }

  *pDat_e += 2;
}

void dst_III(FIXP_DBL *pDat, /*!< pointer to input/output */
             FIXP_DBL *tmp,  /*!< pointer to temporal working buffer */
             int L,          /*!< lenght of transform */
             int *pDat_e) {
  int L2 = L >> 1;
  int i;
  FIXP_DBL t;

  /* note: DCT III is reused here, direct DST III implementation might be more
   * efficient */

  /* mirror input */
  for (i = 0; i < L2; i++) {
    t = pDat[i];
    pDat[i] = pDat[L - 1 - i];
    pDat[L - 1 - i] = t;
  }

  /* DCT-III */
  dct_III(pDat, tmp, L, pDat_e);

  /* flip signs at odd indices */
  for (i = 1; i < L; i += 2) pDat[i] = -pDat[i];
}

#endif

#if !defined(FUNCTION_dct_II)
void dct_II(
    FIXP_DBL *pDat, /*!< pointer to input/output */
    FIXP_DBL *tmp,  /*!< pointer to temporal working buffer */
    int L, /*!< lenght of transform (has to be a multiple of 8 (or 4 in case
              DCT_II_L_MULTIPLE_OF_4_SUPPORT is defined) */
    int *pDat_e) {
  const FIXP_WTP *sin_twiddle;
  FIXP_DBL accu1, accu2;
  FIXP_DBL *pTmp_0, *pTmp_1;

  int i;
  int inc, index = 0;
  int M = L >> 1;

  FDK_ASSERT(L % 4 == 0);
  dct_getTables(NULL, &sin_twiddle, &inc, L);
  inc >>= 1;

  {
    for (i = 0; i < M; i++) {
      tmp[i] = pDat[2 * i] >> 2;
      tmp[L - 1 - i] = pDat[2 * i + 1] >> 2;
    }
  }

  fft(M, tmp, pDat_e);

  pTmp_0 = &tmp[2];
  pTmp_1 = &tmp[(M - 1) * 2];

  index = inc * 4;

  for (i = 1; i<M>> 1; i++, pTmp_0 += 2, pTmp_1 -= 2) {
    FIXP_DBL a1, a2;
    FIXP_DBL accu3, accu4;

    a1 = ((pTmp_0[1] >> 1) + (pTmp_1[1] >> 1));
    a2 = ((pTmp_1[0] >> 1) - (pTmp_0[0] >> 1));

    if (2 * i < (M / 2)) {
      cplxMultDiv2(&accu1, &accu2, a2, a1, sin_twiddle[index]);
    } else {
      cplxMultDiv2(&accu1, &accu2, a1, a2, sin_twiddle[index]);
      accu1 = -accu1;
    }
    accu1 <<= 1;
    accu2 <<= 1;

    a1 = ((pTmp_0[0] >> 1) + (pTmp_1[0] >> 1));
    a2 = ((pTmp_0[1] >> 1) - (pTmp_1[1] >> 1));

    cplxMult(&accu3, &accu4, (accu1 + a2), (a1 + accu2), sin_twiddle[i * inc]);
    pDat[L - i] = -accu3;
    pDat[i] = accu4;

    cplxMult(&accu3, &accu4, (accu1 - a2), (a1 - accu2),
             sin_twiddle[(M - i) * inc]);
    pDat[M + i] = -accu3;
    pDat[M - i] = accu4;

    /* Create index helper variables for (4*i)*inc indexed equivalent values of
     * short tables. */
    if (2 * i < ((M / 2) - 1)) {
      index += 4 * inc;
    } else if (2 * i >= ((M / 2))) {
      index -= 4 * inc;
    }
  }

  cplxMult(&accu1, &accu2, tmp[M], tmp[M + 1], sin_twiddle[(M / 2) * inc]);
  pDat[L - (M / 2)] = accu2;
  pDat[M / 2] = accu1;

  pDat[0] = tmp[0] + tmp[1];
  pDat[M] = fMult(tmp[0] - tmp[1],
                  sin_twiddle[M * inc].v.re); /* cos((PI/(2*L))*M); */

  *pDat_e += 2;
}
#endif

#if !defined(FUNCTION_dct_IV)

void dct_IV(FIXP_DBL *pDat, int L, int *pDat_e) {
  int sin_step = 0;
  int M = L >> 1;

  const FIXP_WTP *twiddle;
  const FIXP_STP *sin_twiddle;

  FDK_ASSERT(L >= 4);

  FDK_ASSERT(L >= 4);

  dct_getTables(&twiddle, &sin_twiddle, &sin_step, L);

  {
    FIXP_DBL *RESTRICT pDat_0 = &pDat[0];
    FIXP_DBL *RESTRICT pDat_1 = &pDat[L - 2];
    int i;

    /* 29 cycles on ARM926 */
    for (i = 0; i < M - 1; i += 2, pDat_0 += 2, pDat_1 -= 2) {
      FIXP_DBL accu1, accu2, accu3, accu4;

      accu1 = pDat_1[1];
      accu2 = pDat_0[0];
      accu3 = pDat_0[1];
      accu4 = pDat_1[0];

      cplxMultDiv2(&accu1, &accu2, accu1, accu2, twiddle[i]);
      cplxMultDiv2(&accu3, &accu4, accu4, accu3, twiddle[i + 1]);

      pDat_0[0] = accu2 >> 1;
      pDat_0[1] = accu1 >> 1;
      pDat_1[0] = accu4 >> 1;
      pDat_1[1] = -(accu3 >> 1);
    }
    if (M & 1) {
      FIXP_DBL accu1, accu2;

      accu1 = pDat_1[1];
      accu2 = pDat_0[0];

      cplxMultDiv2(&accu1, &accu2, accu1, accu2, twiddle[i]);

      pDat_0[0] = accu2 >> 1;
      pDat_0[1] = accu1 >> 1;
    }
  }

  fft(M, pDat, pDat_e);

  {
    FIXP_DBL *RESTRICT pDat_0 = &pDat[0];
    FIXP_DBL *RESTRICT pDat_1 = &pDat[L - 2];
    FIXP_DBL accu1, accu2, accu3, accu4;
    int idx, i;

    /* Sin and Cos values are 0.0f and 1.0f */
    accu1 = pDat_1[0];
    accu2 = pDat_1[1];

    pDat_1[1] = -pDat_0[1];

    /* 28 cycles for ARM926 */
    for (idx = sin_step, i = 1; i<(M + 1)>> 1; i++, idx += sin_step) {
      FIXP_STP twd = sin_twiddle[idx];
      cplxMult(&accu3, &accu4, accu1, accu2, twd);
      pDat_0[1] = accu3;
      pDat_1[0] = accu4;

      pDat_0 += 2;
      pDat_1 -= 2;

      cplxMult(&accu3, &accu4, pDat_0[1], pDat_0[0], twd);

      accu1 = pDat_1[0];
      accu2 = pDat_1[1];

      pDat_1[1] = -accu3;
      pDat_0[0] = accu4;
    }

    if ((M & 1) == 0) {
      /* Last Sin and Cos value pair are the same */
      accu1 = fMult(accu1, WTC(0x5a82799a));
      accu2 = fMult(accu2, WTC(0x5a82799a));

      pDat_1[0] = accu1 + accu2;
      pDat_0[1] = accu1 - accu2;
    }
  }

  /* Add twiddeling scale. */
  *pDat_e += 2;
}
#endif /* defined (FUNCTION_dct_IV) */

#if !defined(FUNCTION_dst_IV)
void dst_IV(FIXP_DBL *pDat, int L, int *pDat_e) {
  int sin_step = 0;
  int M = L >> 1;

  const FIXP_WTP *twiddle;
  const FIXP_STP *sin_twiddle;

  FDK_ASSERT(L >= 4);

  FDK_ASSERT(L >= 4);

  dct_getTables(&twiddle, &sin_twiddle, &sin_step, L);

  {
    FIXP_DBL *RESTRICT pDat_0 = &pDat[0];
    FIXP_DBL *RESTRICT pDat_1 = &pDat[L - 2];
    int i;

    /* 34 cycles on ARM926 */
    for (i = 0; i < M - 1; i += 2, pDat_0 += 2, pDat_1 -= 2) {
      FIXP_DBL accu1, accu2, accu3, accu4;

      accu1 = pDat_1[1] >> 1;
      accu2 = -(pDat_0[0] >> 1);
      accu3 = pDat_0[1] >> 1;
      accu4 = -(pDat_1[0] >> 1);

      cplxMultDiv2(&accu1, &accu2, accu1, accu2, twiddle[i]);
      cplxMultDiv2(&accu3, &accu4, accu4, accu3, twiddle[i + 1]);

      pDat_0[0] = accu2;
      pDat_0[1] = accu1;
      pDat_1[0] = accu4;
      pDat_1[1] = -accu3;
    }
    if (M & 1) {
      FIXP_DBL accu1, accu2;

      accu1 = pDat_1[1];
      accu2 = -pDat_0[0];

      cplxMultDiv2(&accu1, &accu2, accu1, accu2, twiddle[i]);

      pDat_0[0] = accu2 >> 1;
      pDat_0[1] = accu1 >> 1;
    }
  }

  fft(M, pDat, pDat_e);

  {
    FIXP_DBL *RESTRICT pDat_0;
    FIXP_DBL *RESTRICT pDat_1;
    FIXP_DBL accu1, accu2, accu3, accu4;
    int idx, i;

    pDat_0 = &pDat[0];
    pDat_1 = &pDat[L - 2];

    /* Sin and Cos values are 0.0f and 1.0f */
    accu1 = pDat_1[0];
    accu2 = pDat_1[1];

    pDat_1[1] = -pDat_0[0];
    pDat_0[0] = pDat_0[1];

    for (idx = sin_step, i = 1; i<(M + 1)>> 1; i++, idx += sin_step) {
      FIXP_STP twd = sin_twiddle[idx];

      cplxMult(&accu3, &accu4, accu1, accu2, twd);
      pDat_1[0] = -accu3;
      pDat_0[1] = -accu4;

      pDat_0 += 2;
      pDat_1 -= 2;

      cplxMult(&accu3, &accu4, pDat_0[1], pDat_0[0], twd);

      accu1 = pDat_1[0];
      accu2 = pDat_1[1];

      pDat_0[0] = accu3;
      pDat_1[1] = -accu4;
    }

    if ((M & 1) == 0) {
      /* Last Sin and Cos value pair are the same */
      accu1 = fMult(accu1, WTC(0x5a82799a));
      accu2 = fMult(accu2, WTC(0x5a82799a));

      pDat_0[1] = -accu1 - accu2;
      pDat_1[0] = accu2 - accu1;
    }
  }

  /* Add twiddeling scale. */
  *pDat_e += 2;
}
#endif /* !defined(FUNCTION_dst_IV) */

Coverage Report

Created: 2025-09-05 06:55

Line	Count	Source (jump to first uncovered line)
1		/* -----------------------------------------------------------------------------
2		Software License for The Fraunhofer FDK AAC Codec Library for Android
3
4		© Copyright 1995 - 2020 Fraunhofer-Gesellschaft zur Förderung der angewandten
5		Forschung e.V. All rights reserved.
6
7		1. INTRODUCTION
8		The Fraunhofer FDK AAC Codec Library for Android ("FDK AAC Codec") is software
9		that implements the MPEG Advanced Audio Coding ("AAC") encoding and decoding
10		scheme for digital audio. This FDK AAC Codec software is intended to be used on
11		a wide variety of Android devices.
12
13		AAC's HE-AAC and HE-AAC v2 versions are regarded as today's most efficient
14		general perceptual audio codecs. AAC-ELD is considered the best-performing
15		full-bandwidth communications codec by independent studies and is widely
16		deployed. AAC has been standardized by ISO and IEC as part of the MPEG
17		specifications.
18
19		Patent licenses for necessary patent claims for the FDK AAC Codec (including
20		those of Fraunhofer) may be obtained through Via Licensing
21		(www.vialicensing.com) or through the respective patent owners individually for
22		the purpose of encoding or decoding bit streams in products that are compliant
23		with the ISO/IEC MPEG audio standards. Please note that most manufacturers of
24		Android devices already license these patent claims through Via Licensing or
25		directly from the patent owners, and therefore FDK AAC Codec software may
26		already be covered under those patent licenses when it is used for those
27		licensed purposes only.
28
29		Commercially-licensed AAC software libraries, including floating-point versions
30		with enhanced sound quality, are also available from Fraunhofer. Users are
31		encouraged to check the Fraunhofer website for additional applications
32		information and documentation.
33
34		2. COPYRIGHT LICENSE
35
36		Redistribution and use in source and binary forms, with or without modification,
37		are permitted without payment of copyright license fees provided that you
38		satisfy the following conditions:
39
40		You must retain the complete text of this software license in redistributions of
41		the FDK AAC Codec or your modifications thereto in source code form.
42
43		You must retain the complete text of this software license in the documentation
44		and/or other materials provided with redistributions of the FDK AAC Codec or
45		your modifications thereto in binary form. You must make available free of
46		charge copies of the complete source code of the FDK AAC Codec and your
47		modifications thereto to recipients of copies in binary form.
48
49		The name of Fraunhofer may not be used to endorse or promote products derived
50		from this library without prior written permission.
51
52		You may not charge copyright license fees for anyone to use, copy or distribute
53		the FDK AAC Codec software or your modifications thereto.
54
55		Your modified versions of the FDK AAC Codec must carry prominent notices stating
56		that you changed the software and the date of any change. For modified versions
57		of the FDK AAC Codec, the term "Fraunhofer FDK AAC Codec Library for Android"
58		must be replaced by the term "Third-Party Modified Version of the Fraunhofer FDK
59		AAC Codec Library for Android."
60
61		3. NO PATENT LICENSE
62
63		NO EXPRESS OR IMPLIED LICENSES TO ANY PATENT CLAIMS, including without
64		limitation the patents of Fraunhofer, ARE GRANTED BY THIS SOFTWARE LICENSE.
65		Fraunhofer provides no warranty of patent non-infringement with respect to this
66		software.
67
68		You may use this FDK AAC Codec software or modifications thereto only for
69		purposes that are authorized by appropriate patent licenses.
70
71		4. DISCLAIMER
72
73		This FDK AAC Codec software is provided by Fraunhofer on behalf of the copyright
74		holders and contributors "AS IS" and WITHOUT ANY EXPRESS OR IMPLIED WARRANTIES,
75		including but not limited to the implied warranties of merchantability and
76		fitness for a particular purpose. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
77		CONTRIBUTORS BE LIABLE for any direct, indirect, incidental, special, exemplary,
78		or consequential damages, including but not limited to procurement of substitute
79		goods or services; loss of use, data, or profits, or business interruption,
80		however caused and on any theory of liability, whether in contract, strict
81		liability, or tort (including negligence), arising in any way out of the use of
82		this software, even if advised of the possibility of such damage.
83
84		5. CONTACT INFORMATION
85
86		Fraunhofer Institute for Integrated Circuits IIS
87		Attention: Audio and Multimedia Departments - FDK AAC LL
88		Am Wolfsmantel 33
89		91058 Erlangen, Germany
90
91		www.iis.fraunhofer.de/amm
92		amm-info@iis.fraunhofer.de
93		----------------------------------------------------------------------------- */
94
95		/***************** Library for basic calculation routines ******************
96
97		Author(s):
98
99		Description:
100
101		*******************************************************************************/
102
103		/*!
104		\file dct.cpp
105		\brief DCT Implementations
106		Library functions to calculate standard DCTs. This will most likely be
107		replaced by hand-optimized functions for the specific target processor.
108
109		Three different implementations of the dct type II and the dct type III
110		transforms are provided.
111
112		By default implementations which are based on a single, standard complex
113		FFT-kernel are used (dctII_f() and dctIII_f()). These are specifically helpful
114		in cases where optimized FFT libraries are already available. The FFT used in
115		these implementation is FFT rad2 from FDK_tools.
116
117		Of course, one might also use DCT-libraries should they be available. The DCT
118		and DST type IV implementations are only available in a version based on a
119		complex FFT kernel.
120		*/
121
122		#include "dct.h"
123
124		#include "FDK_tools_rom.h"
125		#include "fft.h"
126
127		void dct_getTables(const FIXP_WTP ptwiddle, const FIXP_STP sin_twiddle,
128	93.2M	int *sin_step, int length) {
129	93.2M	const FIXP_WTP *twiddle;
130	93.2M	int ld2_length;
131
132		/* Get ld2 of length - 2 + 1
133		-2: because first table entry is window of size 4
134		+1: because we already include +1 because of ceil(log2(length)) */
135	93.2M	ld2_length = DFRACT_BITS - 1 - fNormz((FIXP_DBL)length) - 1;
136
137		/* Extract sort of "eigenvalue" (the 4 left most bits) of length. */
138	93.2M	switch ((length) >> (ld2_length - 1)) {
139	74.7M	case 0x4: /* radix 2 */
140	74.7M	*sin_twiddle = SineTable1024;
141	74.7M	*sin_step = 1 << (10 - ld2_length);
142	74.7M	twiddle = windowSlopes[0][0][ld2_length - 1];
143	74.7M	break;
144	504k	case 0x7: /* 10 ms */
145	504k	*sin_twiddle = SineTable480;
146	504k	*sin_step = 1 << (8 - ld2_length);
147	504k	twiddle = windowSlopes[0][1][ld2_length];
148	504k	break;
149	17.7M	case 0x6: /* 3/4 of radix 2 */
150	17.7M	*sin_twiddle = SineTable384;
151	17.7M	*sin_step = 1 << (8 - ld2_length);
152	17.7M	twiddle = windowSlopes[0][2][ld2_length];
153	17.7M	break;
154	294k	case 0x5: /* 5/16 of radix 2*/
155	294k	*sin_twiddle = SineTable80;
156	294k	*sin_step = 1 << (6 - ld2_length);
157	294k	twiddle = windowSlopes[0][3][ld2_length];
158	294k	break;
159	0	default:
160	0	*sin_twiddle = NULL;
161	0	*sin_step = 0;
162	0	twiddle = NULL;
163	0	break;
164	93.2M	}
165
166	93.2M	if (ptwiddle != NULL) {
167	83.6M	FDK_ASSERT(twiddle != NULL);
168	83.6M	*ptwiddle = twiddle;
169	83.6M	}
170
171	93.2M	FDK_ASSERT(*sin_step > 0);
172	93.2M	}
173
174		#if !defined(FUNCTION_dct_III)
175		void dct_III(FIXP_DBL pDat, /!< pointer to input/output */
176		FIXP_DBL tmp, /!< pointer to temporal working buffer */
177		int L, /!< lenght of transform /
178	1.36M	int *pDat_e) {
179	1.36M	const FIXP_WTP *sin_twiddle;
180	1.36M	int i;
181	1.36M	FIXP_DBL xr, accu1, accu2;
182	1.36M	int inc, index;
183	1.36M	int M = L >> 1;
184
185	1.36M	FDK_ASSERT(L % 4 == 0);
186	1.36M	dct_getTables(NULL, &sin_twiddle, &inc, L);
187	1.36M	inc >>= 1;
188
189	1.36M	FIXP_DBL *pTmp_0 = &tmp[2];
190	1.36M	FIXP_DBL pTmp_1 = &tmp[(M - 1) 2];
191
192	1.36M	index = 4 * inc;
193
194		/* This loop performs multiplication for index i (iinc) /
195	10.9M	for (i = 1; i<M>> 1; i++, pTmp_0 += 2, pTmp_1 -= 2) {
196	9.54M	FIXP_DBL accu3, accu4, accu5, accu6;
197
198	9.54M	cplxMultDiv2(&accu2, &accu1, pDat[L - i], pDat[i], sin_twiddle[i * inc]);
199	9.54M	cplxMultDiv2(&accu4, &accu3, pDat[M + i], pDat[M - i],
200	9.54M	sin_twiddle[(M - i) * inc]);
201	9.54M	accu3 >>= 1;
202	9.54M	accu4 >>= 1;
203
204		/* This method is better for ARM926, that uses operand2 shifted right by 1
205		* always */
206	9.54M	if (2 * i < (M / 2)) {
207	4.08M	cplxMultDiv2(&accu6, &accu5, (accu3 - (accu1 >> 1)),
208	4.08M	((accu2 >> 1) + accu4), sin_twiddle[index]);
209	5.45M	} else {
210	5.45M	cplxMultDiv2(&accu6, &accu5, ((accu2 >> 1) + accu4),
211	5.45M	(accu3 - (accu1 >> 1)), sin_twiddle[index]);
212	5.45M	accu6 = -accu6;
213	5.45M	}
214	9.54M	xr = (accu1 >> 1) + accu3;
215	9.54M	pTmp_0[0] = (xr >> 1) - accu5;
216	9.54M	pTmp_1[0] = (xr >> 1) + accu5;
217
218	9.54M	xr = (accu2 >> 1) - accu4;
219	9.54M	pTmp_0[1] = (xr >> 1) - accu6;
220	9.54M	pTmp_1[1] = -((xr >> 1) + accu6);
221
222		/* Create index helper variables for (4i)inc indexed equivalent values of
223		* short tables. */
224	9.54M	if (2 * i < ((M / 2) - 1)) {
225	4.08M	index += 4 * inc;
226	5.45M	} else if (2 * i >= ((M / 2))) {
227	5.45M	index -= 4 * inc;
228	5.45M	}
229	9.54M	}
230
231	1.36M	xr = fMultDiv2(pDat[M], sin_twiddle[M * inc].v.re); /* cos((PI/(2L))M); */
232	1.36M	tmp[0] = ((pDat[0] >> 1) + xr) >> 1;
233	1.36M	tmp[1] = ((pDat[0] >> 1) - xr) >> 1;
234
235	1.36M	cplxMultDiv2(&accu2, &accu1, pDat[L - (M / 2)], pDat[M / 2],
236	1.36M	sin_twiddle[M * inc / 2]);
237	1.36M	tmp[M] = accu1 >> 1;
238	1.36M	tmp[M + 1] = accu2 >> 1;
239
240		/* dit_fft expects 1 bit scaled input values */
241	1.36M	fft(M, tmp, pDat_e);
242
243		/* ARM926: 12 cycles per 2-iteration, no overhead code by compiler */
244	1.36M	pTmp_1 = &tmp[L];
245	12.2M	for (i = M >> 1; i--;) {
246	10.9M	FIXP_DBL tmp1, tmp2, tmp3, tmp4;
247	10.9M	tmp1 = *tmp++;
248	10.9M	tmp2 = *tmp++;
249	10.9M	tmp3 = *--pTmp_1;
250	10.9M	tmp4 = *--pTmp_1;
251	10.9M	*pDat++ = tmp1;
252	10.9M	*pDat++ = tmp3;
253	10.9M	*pDat++ = tmp2;
254	10.9M	*pDat++ = tmp4;
255	10.9M	}
256
257	1.36M	*pDat_e += 2;
258	1.36M	}
259
260		void dst_III(FIXP_DBL pDat, /!< pointer to input/output */
261		FIXP_DBL tmp, /!< pointer to temporal working buffer */
262		int L, /!< lenght of transform /
263	0	int *pDat_e) {
264	0	int L2 = L >> 1;
265	0	int i;
266	0	FIXP_DBL t;
267
268		/* note: DCT III is reused here, direct DST III implementation might be more
269		* efficient */
270
271		/* mirror input */
272	0	for (i = 0; i < L2; i++) {
273	0	t = pDat[i];
274	0	pDat[i] = pDat[L - 1 - i];
275	0	pDat[L - 1 - i] = t;
276	0	}
277
278		/* DCT-III */
279	0	dct_III(pDat, tmp, L, pDat_e);
280
281		/* flip signs at odd indices */
282	0	for (i = 1; i < L; i += 2) pDat[i] = -pDat[i];
283	0	}
284
285		#endif
286
287		#if !defined(FUNCTION_dct_II)
288		void dct_II(
289		FIXP_DBL pDat, /!< pointer to input/output */
290		FIXP_DBL tmp, /!< pointer to temporal working buffer */
291		int L, /*!< lenght of transform (has to be a multiple of 8 (or 4 in case
292		DCT_II_L_MULTIPLE_OF_4_SUPPORT is defined) */
293	8.26M	int *pDat_e) {
294	8.26M	const FIXP_WTP *sin_twiddle;
295	8.26M	FIXP_DBL accu1, accu2;
296	8.26M	FIXP_DBL pTmp_0, pTmp_1;
297
298	8.26M	int i;
299	8.26M	int inc, index = 0;
300	8.26M	int M = L >> 1;
301
302	8.26M	FDK_ASSERT(L % 4 == 0);
303	8.26M	dct_getTables(NULL, &sin_twiddle, &inc, L);
304	8.26M	inc >>= 1;
305
306	8.26M	{
307	93.6M	for (i = 0; i < M; i++) {
308	85.4M	tmp[i] = pDat[2 * i] >> 2;
309	85.4M	tmp[L - 1 - i] = pDat[2 * i + 1] >> 2;
310	85.4M	}
311	8.26M	}
312
313	8.26M	fft(M, tmp, pDat_e);
314
315	8.26M	pTmp_0 = &tmp[2];
316	8.26M	pTmp_1 = &tmp[(M - 1) * 2];
317
318	8.26M	index = inc * 4;
319
320	42.7M	for (i = 1; i<M>> 1; i++, pTmp_0 += 2, pTmp_1 -= 2) {
321	34.4M	FIXP_DBL a1, a2;
322	34.4M	FIXP_DBL accu3, accu4;
323
324	34.4M	a1 = ((pTmp_0[1] >> 1) + (pTmp_1[1] >> 1));
325	34.4M	a2 = ((pTmp_1[0] >> 1) - (pTmp_0[0] >> 1));
326
327	34.4M	if (2 * i < (M / 2)) {
328	15.1M	cplxMultDiv2(&accu1, &accu2, a2, a1, sin_twiddle[index]);
329	19.3M	} else {
330	19.3M	cplxMultDiv2(&accu1, &accu2, a1, a2, sin_twiddle[index]);
331	19.3M	accu1 = -accu1;
332	19.3M	}
333	34.4M	accu1 <<= 1;
334	34.4M	accu2 <<= 1;
335
336	34.4M	a1 = ((pTmp_0[0] >> 1) + (pTmp_1[0] >> 1));
337	34.4M	a2 = ((pTmp_0[1] >> 1) - (pTmp_1[1] >> 1));
338
339	34.4M	cplxMult(&accu3, &accu4, (accu1 + a2), (a1 + accu2), sin_twiddle[i * inc]);
340	34.4M	pDat[L - i] = -accu3;
341	34.4M	pDat[i] = accu4;
342
343	34.4M	cplxMult(&accu3, &accu4, (accu1 - a2), (a1 - accu2),
344	34.4M	sin_twiddle[(M - i) * inc]);
345	34.4M	pDat[M + i] = -accu3;
346	34.4M	pDat[M - i] = accu4;
347
348		/* Create index helper variables for (4i)inc indexed equivalent values of
349		* short tables. */
350	34.4M	if (2 * i < ((M / 2) - 1)) {
351	11.0M	index += 4 * inc;
352	23.3M	} else if (2 * i >= ((M / 2))) {
353	19.3M	index -= 4 * inc;
354	19.3M	}
355	34.4M	}
356
357	8.26M	cplxMult(&accu1, &accu2, tmp[M], tmp[M + 1], sin_twiddle[(M / 2) * inc]);
358	8.26M	pDat[L - (M / 2)] = accu2;
359	8.26M	pDat[M / 2] = accu1;
360
361	8.26M	pDat[0] = tmp[0] + tmp[1];
362	8.26M	pDat[M] = fMult(tmp[0] - tmp[1],
363	8.26M	sin_twiddle[M * inc].v.re); /* cos((PI/(2L))M); */
364
365	8.26M	*pDat_e += 2;
366	8.26M	}
367		#endif
368
369		#if !defined(FUNCTION_dct_IV)
370
371	45.2M	void dct_IV(FIXP_DBL pDat, int L, int pDat_e) {
372	45.2M	int sin_step = 0;
373	45.2M	int M = L >> 1;
374
375	45.2M	const FIXP_WTP *twiddle;
376	45.2M	const FIXP_STP *sin_twiddle;
377
378	45.2M	FDK_ASSERT(L >= 4);
379
380	45.2M	FDK_ASSERT(L >= 4);
381
382	45.2M	dct_getTables(&twiddle, &sin_twiddle, &sin_step, L);
383
384	45.2M	{
385	45.2M	FIXP_DBL *RESTRICT pDat_0 = &pDat[0];
386	45.2M	FIXP_DBL *RESTRICT pDat_1 = &pDat[L - 2];
387	45.2M	int i;
388
389		/* 29 cycles on ARM926 */
390	670M	for (i = 0; i < M - 1; i += 2, pDat_0 += 2, pDat_1 -= 2) {
391	625M	FIXP_DBL accu1, accu2, accu3, accu4;
392
393	625M	accu1 = pDat_1[1];
394	625M	accu2 = pDat_0[0];
395	625M	accu3 = pDat_0[1];
396	625M	accu4 = pDat_1[0];
397
398	625M	cplxMultDiv2(&accu1, &accu2, accu1, accu2, twiddle[i]);
399	625M	cplxMultDiv2(&accu3, &accu4, accu4, accu3, twiddle[i + 1]);
400
401	625M	pDat_0[0] = accu2 >> 1;
402	625M	pDat_0[1] = accu1 >> 1;
403	625M	pDat_1[0] = accu4 >> 1;
404	625M	pDat_1[1] = -(accu3 >> 1);
405	625M	}
406	45.2M	if (M & 1) {
407	0	FIXP_DBL accu1, accu2;
408
409	0	accu1 = pDat_1[1];
410	0	accu2 = pDat_0[0];
411
412	0	cplxMultDiv2(&accu1, &accu2, accu1, accu2, twiddle[i]);
413
414	0	pDat_0[0] = accu2 >> 1;
415	0	pDat_0[1] = accu1 >> 1;
416	0	}
417	45.2M	}
418
419	45.2M	fft(M, pDat, pDat_e);
420
421	45.2M	{
422	45.2M	FIXP_DBL *RESTRICT pDat_0 = &pDat[0];
423	45.2M	FIXP_DBL *RESTRICT pDat_1 = &pDat[L - 2];
424	45.2M	FIXP_DBL accu1, accu2, accu3, accu4;
425	45.2M	int idx, i;
426
427		/* Sin and Cos values are 0.0f and 1.0f */
428	45.2M	accu1 = pDat_1[0];
429	45.2M	accu2 = pDat_1[1];
430
431	45.2M	pDat_1[1] = -pDat_0[1];
432
433		/* 28 cycles for ARM926 */
434	625M	for (idx = sin_step, i = 1; i<(M + 1)>> 1; i++, idx += sin_step) {
435	580M	FIXP_STP twd = sin_twiddle[idx];
436	580M	cplxMult(&accu3, &accu4, accu1, accu2, twd);
437	580M	pDat_0[1] = accu3;
438	580M	pDat_1[0] = accu4;
439
440	580M	pDat_0 += 2;
441	580M	pDat_1 -= 2;
442
443	580M	cplxMult(&accu3, &accu4, pDat_0[1], pDat_0[0], twd);
444
445	580M	accu1 = pDat_1[0];
446	580M	accu2 = pDat_1[1];
447
448	580M	pDat_1[1] = -accu3;
449	580M	pDat_0[0] = accu4;
450	580M	}
451
452	45.2M	if ((M & 1) == 0) {
453		/* Last Sin and Cos value pair are the same */
454	45.2M	accu1 = fMult(accu1, WTC(0x5a82799a));
455	45.2M	accu2 = fMult(accu2, WTC(0x5a82799a));
456
457	45.2M	pDat_1[0] = accu1 + accu2;
458	45.2M	pDat_0[1] = accu1 - accu2;
459	45.2M	}
460	45.2M	}
461
462		/* Add twiddeling scale. */
463	45.2M	*pDat_e += 2;
464	45.2M	}
465		#endif /* defined (FUNCTION_dct_IV) */
466
467		#if !defined(FUNCTION_dst_IV)
468	38.4M	void dst_IV(FIXP_DBL pDat, int L, int pDat_e) {
469	38.4M	int sin_step = 0;
470	38.4M	int M = L >> 1;
471
472	38.4M	const FIXP_WTP *twiddle;
473	38.4M	const FIXP_STP *sin_twiddle;
474
475	38.4M	FDK_ASSERT(L >= 4);
476
477	38.4M	FDK_ASSERT(L >= 4);
478
479	38.4M	dct_getTables(&twiddle, &sin_twiddle, &sin_step, L);
480
481	38.4M	{
482	38.4M	FIXP_DBL *RESTRICT pDat_0 = &pDat[0];
483	38.4M	FIXP_DBL *RESTRICT pDat_1 = &pDat[L - 2];
484	38.4M	int i;
485
486		/* 34 cycles on ARM926 */
487	465M	for (i = 0; i < M - 1; i += 2, pDat_0 += 2, pDat_1 -= 2) {
488	427M	FIXP_DBL accu1, accu2, accu3, accu4;
489
490	427M	accu1 = pDat_1[1] >> 1;
491	427M	accu2 = -(pDat_0[0] >> 1);
492	427M	accu3 = pDat_0[1] >> 1;
493	427M	accu4 = -(pDat_1[0] >> 1);
494
495	427M	cplxMultDiv2(&accu1, &accu2, accu1, accu2, twiddle[i]);
496	427M	cplxMultDiv2(&accu3, &accu4, accu4, accu3, twiddle[i + 1]);
497
498	427M	pDat_0[0] = accu2;
499	427M	pDat_0[1] = accu1;
500	427M	pDat_1[0] = accu4;
501	427M	pDat_1[1] = -accu3;
502	427M	}
503	38.4M	if (M & 1) {
504	0	FIXP_DBL accu1, accu2;
505
506	0	accu1 = pDat_1[1];
507	0	accu2 = -pDat_0[0];
508
509	0	cplxMultDiv2(&accu1, &accu2, accu1, accu2, twiddle[i]);
510
511	0	pDat_0[0] = accu2 >> 1;
512	0	pDat_0[1] = accu1 >> 1;
513	0	}
514	38.4M	}
515
516	38.4M	fft(M, pDat, pDat_e);
517
518	38.4M	{
519	38.4M	FIXP_DBL *RESTRICT pDat_0;
520	38.4M	FIXP_DBL *RESTRICT pDat_1;
521	38.4M	FIXP_DBL accu1, accu2, accu3, accu4;
522	38.4M	int idx, i;
523
524	38.4M	pDat_0 = &pDat[0];
525	38.4M	pDat_1 = &pDat[L - 2];
526
527		/* Sin and Cos values are 0.0f and 1.0f */
528	38.4M	accu1 = pDat_1[0];
529	38.4M	accu2 = pDat_1[1];
530
531	38.4M	pDat_1[1] = -pDat_0[0];
532	38.4M	pDat_0[0] = pDat_0[1];
533
534	427M	for (idx = sin_step, i = 1; i<(M + 1)>> 1; i++, idx += sin_step) {
535	388M	FIXP_STP twd = sin_twiddle[idx];
536
537	388M	cplxMult(&accu3, &accu4, accu1, accu2, twd);
538	388M	pDat_1[0] = -accu3;
539	388M	pDat_0[1] = -accu4;
540
541	388M	pDat_0 += 2;
542	388M	pDat_1 -= 2;
543
544	388M	cplxMult(&accu3, &accu4, pDat_0[1], pDat_0[0], twd);
545
546	388M	accu1 = pDat_1[0];
547	388M	accu2 = pDat_1[1];
548
549	388M	pDat_0[0] = accu3;
550	388M	pDat_1[1] = -accu4;
551	388M	}
552
553	38.4M	if ((M & 1) == 0) {
554		/* Last Sin and Cos value pair are the same */
555	38.4M	accu1 = fMult(accu1, WTC(0x5a82799a));
556	38.4M	accu2 = fMult(accu2, WTC(0x5a82799a));
557
558	38.4M	pDat_0[1] = -accu1 - accu2;
559	38.4M	pDat_1[0] = accu2 - accu1;
560	38.4M	}
561	38.4M	}
562
563		/* Add twiddeling scale. */
564	38.4M	*pDat_e += 2;
565	38.4M	}
566		#endif /* !defined(FUNCTION_dst_IV) */