/src/vvdec/source/Lib/CommonLib/TrQuant_EMT.cpp

Source
/* -----------------------------------------------------------------------------
The copyright in this software is being made available under the Clear BSD
License, included below. No patent rights, trademark rights and/or 
other Intellectual Property Rights other than the copyrights concerning 
the Software are granted under this license.

The Clear BSD License

Copyright (c) 2018-2026, Fraunhofer-Gesellschaft zur Förderung der angewandten Forschung e.V. & The VVdeC Authors.
All rights reserved.

Redistribution and use in source and binary forms, with or without modification,
are permitted (subject to the limitations in the disclaimer below) provided that
the following conditions are met:

     * Redistributions of source code must retain the above copyright notice,
     this list of conditions and the following disclaimer.

     * Redistributions in binary form must reproduce the above copyright
     notice, this list of conditions and the following disclaimer in the
     documentation and/or other materials provided with the distribution.

     * Neither the name of the copyright holder nor the names of its
     contributors may be used to endorse or promote products derived from this
     software without specific prior written permission.

NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE GRANTED BY
THIS LICENSE. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.


------------------------------------------------------------------------------------------- */

/** \file     TrQuant_EMT.cpp
    \brief    transform and quantization class
*/

#include "TrQuant_EMT.h"

#include "Rom.h"

#include <memory.h>

namespace vvdec
{

template<int uiTrSize>
inline void _fastInverseMM( const TCoeff *src, TCoeff *dst, int shift, int line, int iSkipLine, int iSkipLine2, bool clip, const TCoeff outputMinimum, const TCoeff outputMaximum, const TMatrixCoeff* iT );

template<>
inline void _fastInverseMM<2>( const TCoeff *src, TCoeff *dst, int shift, int line, int iSkipLine, int iSkipLine2, bool clip, const TCoeff outputMinimum, const TCoeff outputMaximum, const TMatrixCoeff* iT )
{
  const int rnd_factor  = 1 << (shift - 1);
  const int reducedLine = line - iSkipLine;
  const int cutoff      = 2 - iSkipLine2;

  memset( dst, 0, reducedLine * 2 * sizeof( TCoeff ) );

  for( int k = 0; k < cutoff; k++ )
  {
    const TCoeff* srcPtr = &src[k * line];
    for( int i = 0; i < reducedLine; i++ )
    {
            TCoeff*       dstPtr = &dst[i << 1];
      const TMatrixCoeff*  itPtr =  &iT[k << 1];
      const TCoeff        srcVal = *srcPtr;
      for( int j = 0; j < 2; j++ )
      {
        *dstPtr++ += srcVal * *itPtr++;
      }
      srcPtr++;
    }
  }

  if( clip )
  {
    for( int i = 0; i < reducedLine; i++ )
    {
      TCoeff* dstPtr = &dst[i << 1];
      for( int j = 0; j < 2; j++, dstPtr++ )
      {
        *dstPtr = Clip3( outputMinimum, outputMaximum, ( int ) ( *dstPtr + rnd_factor ) >> shift );
      }
    }
  }

  if( iSkipLine )
  {
    memset( dst + ( reducedLine << 1 ), 0, ( iSkipLine << 1 ) * sizeof( TCoeff ) );
  }
}

template<int uiTrSize>
inline void _fastInverseMM( const TCoeff *src, TCoeff *dst, int shift, int line, int iSkipLine, int iSkipLine2, bool clip, const TCoeff outputMinimum, const TCoeff outputMaximum, const TMatrixCoeff* iT )
{
  const int  rnd_factor  = 1 << (shift - 1);
  const int  reducedLine = line - iSkipLine;
  const int  cutoff      = uiTrSize - iSkipLine2;

  memset( dst, 0, line * uiTrSize * sizeof( TCoeff ) );

  g_tCoeffOps.fastInvCore[getLog2( uiTrSize ) - 2]( iT, src, dst, line, reducedLine, cutoff );

  if( clip )
  {
    if( uiTrSize == 4 )
      g_tCoeffOps.roundClip4( dst, uiTrSize, reducedLine, uiTrSize, outputMinimum, outputMaximum, rnd_factor, shift );
    else
      g_tCoeffOps.roundClip8( dst, uiTrSize, reducedLine, uiTrSize, outputMinimum, outputMaximum, rnd_factor, shift );
  }
}


// ********************************** DCT-II **********************************

//Fast DCT-II transforms
void fastInverseDCT2_B2(const TCoeff *src, TCoeff *dst, int shift, int line, int iSkipLine, int iSkipLine2, bool clip, const TCoeff outputMinimum, const TCoeff outputMaximum)
{
  int j;
  int E, O;
  int add = 1 << (shift - 1);
  TCoeff* dstOrg = dst;

  const TMatrixCoeff *iT = g_trCoreDCT2P2[0];

  const int  reducedLine = line - iSkipLine;

  for (j = 0; j<reducedLine; j++)
  {
    /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */
    E = iT[0] * (src[0] + src[line]);
    O = iT[2] * (src[0] - src[line]);

    /* Combining even and odd terms at each hierarchy levels to calculate the final spatial domain vector */
    dst[0] = E;
    dst[1] = O;

    src++;
    dst += 2;
  }

  if( clip )
  {
    for( int i = 0; i < reducedLine; i++ )
    {
      TCoeff* dstPtr = &dstOrg[i << 1];
      for( int j = 0; j < 2; j++, dstPtr++ )
      {
        *dstPtr = Clip3( outputMinimum, outputMaximum, ( int ) ( *dstPtr + add ) >> shift );
      }
    }
  }

  if (iSkipLine)
  {
    memset(dst, 0, (iSkipLine << 1) * sizeof(TCoeff));
  }
}

/** 4x4 inverse transform implemented using partial butterfly structure (1D)
*  \param src   input data (transform coefficients)
*  \param dst   output data (residual)
*  \param shift specifies right shift after 1D transform
*  \param line
*  \param outputMinimum  minimum for clipping
*  \param outputMaximum  maximum for clipping
*/
void fastInverseDCT2_B4( const TCoeff *src, TCoeff *dst, int shift, int line, int iSkipLine, int iSkipLine2, bool clip, const TCoeff outputMinimum, const TCoeff outputMaximum )
{
#if 0
  const TMatrixCoeff *iT = g_trCoreDCT2P4[0];

  _fastInverseMM<4>( src, dst, shift, line, iSkipLine, iSkipLine2, outputMinimum, outputMaximum, iT );
#else
  int j;
  int E[2], O[2];
  int add = 1 << ( shift - 1 );

  const TMatrixCoeff *iT = g_trCoreDCT2P4[0];

  TCoeff* orgDst = dst;

  const int  reducedLine = line - iSkipLine;
  for( j = 0; j < reducedLine; j++ )
  {
    /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */
    O[0] = iT[1 * 4 + 0] * src[line] + iT[3 * 4 + 0] * src[3 * line];
    O[1] = iT[1 * 4 + 1] * src[line] + iT[3 * 4 + 1] * src[3 * line];
    E[0] = iT[0 * 4 + 0] * src[   0] + iT[2 * 4 + 0] * src[2 * line];
    E[1] = iT[0 * 4 + 1] * src[   0] + iT[2 * 4 + 1] * src[2 * line];

    /* Combining even and odd terms at each hierarchy levels to calculate the final spatial domain vector */
    dst[0] = E[0] + O[0];
    dst[1] = E[1] + O[1];
    dst[2] = E[1] - O[1];
    dst[3] = E[0] - O[0];

    src++;
    dst += 4;
  }

  if( clip )
    g_tCoeffOps.roundClip4( orgDst, 4, reducedLine, 4, outputMinimum, outputMaximum, add, shift );

  if( iSkipLine )
  {
    memset( dst, 0, ( iSkipLine << 2 ) * sizeof( TCoeff ) );
  }
#endif
}

/** 8x8 inverse transform implemented using partial butterfly structure (1D)
*  \param src   input data (transform coefficients)
*  \param dst   output data (residual)
*  \param shift specifies right shift after 1D transform
*  \param line
*  \param outputMinimum  minimum for clipping
*  \param outputMaximum  maximum for clipping
*/
void fastInverseDCT2_B8(const TCoeff *src, TCoeff *dst, int shift, int line, int iSkipLine, int iSkipLine2, bool clip, const TCoeff outputMinimum, const TCoeff outputMaximum)
{
#if 1
  _fastInverseMM<8>( src, dst, shift, line, iSkipLine, iSkipLine2, clip, outputMinimum, outputMaximum, g_trCoreDCT2P8[0] );
#else
  int j, k;
  int E[4], O[4];
  int EE[2], EO[2];
  int add = 1 << (shift - 1);

  const TMatrixCoeff *iT = g_trCoreDCT2P8[0];

  TCoeff *orgDst = dst;

  const int  reducedLine = line - iSkipLine;
  for( j = 0; j < reducedLine; j++ )
  {
    /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */
    for( k = 0; k < 4; k++ )
    {
      O[k] = iT[1 * 8 + k] * src[line] + iT[3 * 8 + k] * src[3 * line] + iT[5 * 8 + k] * src[5 * line] + iT[7 * 8 + k] * src[7 * line];
    }

    EO[0] = iT[2 * 8 + 0] * src[2 * line] + iT[6 * 8 + 0] * src[6 * line];
    EO[1] = iT[2 * 8 + 1] * src[2 * line] + iT[6 * 8 + 1] * src[6 * line];
    EE[0] = iT[0 * 8 + 0] * src[0       ] + iT[4 * 8 + 0] * src[4 * line];
    EE[1] = iT[0 * 8 + 1] * src[0       ] + iT[4 * 8 + 1] * src[4 * line];

    /* Combining even and odd terms at each hierarchy levels to calculate the final spatial domain vector */
    E[0] = EE[0] + EO[0];
    E[3] = EE[0] - EO[0];
    E[1] = EE[1] + EO[1];
    E[2] = EE[1] - EO[1];

    for( k = 0; k < 4; k++ )
    {
      dst[k    ] = E[    k] + O[    k];
      dst[k + 4] = E[3 - k] - O[3 - k];
    }
    src++;
    dst += 8;
  }

  if( clip )
    g_tCoeffOps.roundClip8( orgDst, 8, reducedLine, 8, outputMinimum, outputMaximum, add, shift );

  if( iSkipLine )
  {
    memset( dst, 0, ( iSkipLine << 3 ) * sizeof( TCoeff ) );
  }
#endif
}

/** 16x16 inverse transform implemented using partial butterfly structure (1D)
*  \param src            input data (transform coefficients)
*  \param dst            output data (residual)
*  \param shift          specifies right shift after 1D transform
*  \param line
*  \param outputMinimum  minimum for clipping
*  \param outputMaximum  maximum for clipping
*/
void fastInverseDCT2_B16( const TCoeff *src, TCoeff *dst, int shift, int line, int iSkipLine, int iSkipLine2, bool clip, const TCoeff outputMinimum, const TCoeff outputMaximum )
{
  _fastInverseMM<16>( src, dst, shift, line, iSkipLine, iSkipLine2, clip, outputMinimum, outputMaximum, g_trCoreDCT2P16[0] );
}

/** 32x32 inverse transform implemented using partial butterfly structure (1D)
*  \param src   input data (transform coefficients)
*  \param dst   output data (residual)
*  \param shift specifies right shift after 1D transform
*  \param line
*  \param outputMinimum  minimum for clipping
*  \param outputMaximum  maximum for clipping
*/
void fastInverseDCT2_B32(const TCoeff *src, TCoeff *dst, int shift, int line, int iSkipLine, int iSkipLine2, bool clip, const TCoeff outputMinimum, const TCoeff outputMaximum)
{
  _fastInverseMM<32>( src, dst, shift, line, iSkipLine, iSkipLine2, clip, outputMinimum, outputMaximum, g_trCoreDCT2P32[0] );
}

void fastInverseDCT2_B64(const TCoeff *src, TCoeff *dst, int shift, int line, int iSkipLine, int iSkipLine2, bool clip, const TCoeff outputMinimum, const TCoeff outputMaximum)
{
  _fastInverseMM<64>( src, dst, shift, line, iSkipLine, iSkipLine2, clip, outputMinimum, outputMaximum, g_trCoreDCT2P64[0] );
}

void fastInverseDST7_B4(const TCoeff *src, TCoeff *dst, int shift, int line, int iSkipLine, int iSkipLine2, bool clip, const TCoeff outputMinimum, const TCoeff outputMaximum)
{
  _fastInverseMM<4>( src, dst, shift, line, iSkipLine, iSkipLine2, clip, outputMinimum, outputMaximum, g_trCoreDST7P4[0] );
}

void fastInverseDST7_B8(const TCoeff *src, TCoeff *dst, int shift, int line, int iSkipLine, int iSkipLine2, bool clip, const TCoeff outputMinimum, const TCoeff outputMaximum)
{
  _fastInverseMM< 8 >( src, dst, shift, line, iSkipLine, iSkipLine2, clip, outputMinimum, outputMaximum, g_trCoreDST7P8[0]);
}

void fastInverseDST7_B16(const TCoeff *src, TCoeff *dst, int shift, int line, int iSkipLine, int iSkipLine2, bool clip, const TCoeff outputMinimum, const TCoeff outputMaximum)
{
  _fastInverseMM< 16 >( src, dst, shift, line, iSkipLine, iSkipLine2, clip, outputMinimum, outputMaximum, g_trCoreDST7P16[0] );
}

void fastInverseDST7_B32(const TCoeff *src, TCoeff *dst, int shift, int line, int iSkipLine, int iSkipLine2, bool clip, const TCoeff outputMinimum, const TCoeff outputMaximum)
{
  _fastInverseMM< 32 >( src, dst, shift, line, iSkipLine, iSkipLine2, clip, outputMinimum, outputMaximum, g_trCoreDST7P32[0] );
}


// ********************************** DCT-VIII **********************************

void fastInverseDCT8_B4(const TCoeff *src, TCoeff *dst, int shift, int line, int iSkipLine, int iSkipLine2, bool clip, const TCoeff outputMinimum, const TCoeff outputMaximum)
{
  _fastInverseMM<4>( src, dst, shift, line, iSkipLine, iSkipLine2, clip, outputMinimum, outputMaximum, g_trCoreDCT8P4[0] );
}

void fastInverseDCT8_B8(const TCoeff *src, TCoeff *dst, int shift, int line, int iSkipLine, int iSkipLine2, bool clip, const TCoeff outputMinimum, const TCoeff outputMaximum)
{
  _fastInverseMM< 8 >( src, dst, shift, line, iSkipLine, iSkipLine2, clip, outputMinimum, outputMaximum, g_trCoreDCT8P8[0] );
}

void fastInverseDCT8_B16(const TCoeff *src, TCoeff *dst, int shift, int line, int iSkipLine, int iSkipLine2, bool clip, const TCoeff outputMinimum, const TCoeff outputMaximum)
{
  _fastInverseMM< 16 >( src, dst, shift, line, iSkipLine, iSkipLine2, clip, outputMinimum, outputMaximum, g_trCoreDCT8P16[0] );
}

void fastInverseDCT8_B32(const TCoeff *src, TCoeff *dst, int shift, int line, int iSkipLine, int iSkipLine2, bool clip, const TCoeff outputMinimum, const TCoeff outputMaximum)
{
  _fastInverseMM< 32 >( src, dst, shift, line, iSkipLine, iSkipLine2, clip, outputMinimum, outputMaximum, g_trCoreDCT8P32[0] );
}

}

#define DONT_UNDEF_SIZE_AWARE_PER_EL_OP 1

#include "Unit.h"
#include "Buffer.h"

namespace vvdec
{

void cpyResiClipCore( const TCoeff* src, Pel* dst, ptrdiff_t stride, unsigned width, unsigned height, const TCoeff outputMin, const TCoeff outputMax, const TCoeff round, const TCoeff shift )
{
#define CPYRESI_OP( ADDR ) dst[ADDR] = Clip3( outputMin, outputMax, ( src[ADDR] + round ) >> shift )
#define CPYRESI_INC dst += stride; src += width;

  SIZE_AWARE_PER_EL_OP( CPYRESI_OP, CPYRESI_INC );

#undef CPYRESI_INC
#undef CPYRESI_OP
}

void clipCore( TCoeff *dst, unsigned width, unsigned height, unsigned stride, const TCoeff outputMin, const TCoeff outputMax, const TCoeff round, const TCoeff shift )
{
#define CLIP_OP( ADDR ) dst[ADDR] = Clip3( outputMin, outputMax, ( dst[ADDR] + round ) >> shift )
#define CLIP_INC        dst      += stride

  SIZE_AWARE_PER_EL_OP( CLIP_OP, CLIP_INC );

#undef CLIP_INC
#undef CLIP_OP
}

template<int trSize>
void fastInvCore_( const TMatrixCoeff* it, const TCoeff* src, TCoeff* dst, unsigned lines, unsigned reducedLines, unsigned rows )
{
  for( int k = 0; k < rows; k++ )
  {
    const TCoeff* srcPtr = &src[k * lines];
    for( int i = 0; i < reducedLines; i++ )
    {
            TCoeff*       dstPtr = &dst[i * trSize];
      const TMatrixCoeff*  itPtr =  &it[k * trSize];
      for( int j = 0; j < trSize; j++ )
      {
        *dstPtr++ += *srcPtr * *itPtr++;
      }
      srcPtr++;
    }
  }
}

TCoeffOps::TCoeffOps()
{
  cpyResiClip[0] = cpyResiClipCore; //  1
  cpyResiClip[1] = cpyResiClipCore; //  2
  cpyResiClip[2] = cpyResiClipCore; //  4
  cpyResiClip[3] = cpyResiClipCore; //  8
  cpyResiClip[4] = cpyResiClipCore; // 16
  cpyResiClip[5] = cpyResiClipCore; // 32
  cpyResiClip[6] = cpyResiClipCore; // 64
  roundClip4     = clipCore;
  roundClip8     = clipCore;
  fastInvCore[0] = fastInvCore_< 4>;
  fastInvCore[1] = fastInvCore_< 8>;
  fastInvCore[2] = fastInvCore_<16>;
  fastInvCore[3] = fastInvCore_<32>;
  fastInvCore[4] = fastInvCore_<64>;
}

TCoeffOps g_tCoeffOps;

}

Coverage Report

Created: 2026-04-01 07:49

Line	Count	Source
1		/* -----------------------------------------------------------------------------
2		The copyright in this software is being made available under the Clear BSD
3		License, included below. No patent rights, trademark rights and/or
4		other Intellectual Property Rights other than the copyrights concerning
5		the Software are granted under this license.
6
7		The Clear BSD License
8
9		Copyright (c) 2018-2026, Fraunhofer-Gesellschaft zur Förderung der angewandten Forschung e.V. & The VVdeC Authors.
10		All rights reserved.
11
12		Redistribution and use in source and binary forms, with or without modification,
13		are permitted (subject to the limitations in the disclaimer below) provided that
14		the following conditions are met:
15
16		* Redistributions of source code must retain the above copyright notice,
17		this list of conditions and the following disclaimer.
18
19		* Redistributions in binary form must reproduce the above copyright
20		notice, this list of conditions and the following disclaimer in the
21		documentation and/or other materials provided with the distribution.
22
23		* Neither the name of the copyright holder nor the names of its
24		contributors may be used to endorse or promote products derived from this
25		software without specific prior written permission.
26
27		NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE GRANTED BY
28		THIS LICENSE. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
29		CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
30		LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
31		PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
32		CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
33		EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
34		PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
35		BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
36		IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
37		ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
38		POSSIBILITY OF SUCH DAMAGE.
39
40
41		------------------------------------------------------------------------------------------- */
42
43		/** \file TrQuant_EMT.cpp
44		\brief transform and quantization class
45		*/
46
47		#include "TrQuant_EMT.h"
48
49		#include "Rom.h"
50
51		#include <memory.h>
52
53		namespace vvdec
54		{
55
56		template<int uiTrSize>
57		inline void _fastInverseMM( const TCoeff src, TCoeff dst, int shift, int line, int iSkipLine, int iSkipLine2, bool clip, const TCoeff outputMinimum, const TCoeff outputMaximum, const TMatrixCoeff* iT );
58
59		template<>
60		inline void _fastInverseMM<2>( const TCoeff src, TCoeff dst, int shift, int line, int iSkipLine, int iSkipLine2, bool clip, const TCoeff outputMinimum, const TCoeff outputMaximum, const TMatrixCoeff* iT )
61	0	{
62	0	const int rnd_factor = 1 << (shift - 1);
63	0	const int reducedLine = line - iSkipLine;
64	0	const int cutoff = 2 - iSkipLine2;
65	0
66	0	memset( dst, 0, reducedLine * 2 * sizeof( TCoeff ) );
67	0
68	0	for( int k = 0; k < cutoff; k++ )
69	0	{
70	0	const TCoeff* srcPtr = &src[k * line];
71	0	for( int i = 0; i < reducedLine; i++ )
72	0	{
73	0	TCoeff* dstPtr = &dst[i << 1];
74	0	const TMatrixCoeff* itPtr = &iT[k << 1];
75	0	const TCoeff srcVal = *srcPtr;
76	0	for( int j = 0; j < 2; j++ )
77	0	{
78	0	dstPtr++ += srcVal *itPtr++;
79	0	}
80	0	srcPtr++;
81	0	}
82	0	}
83	0
84	0	if( clip )
85	0	{
86	0	for( int i = 0; i < reducedLine; i++ )
87	0	{
88	0	TCoeff* dstPtr = &dst[i << 1];
89	0	for( int j = 0; j < 2; j++, dstPtr++ )
90	0	{
91	0	dstPtr = Clip3( outputMinimum, outputMaximum, ( int ) ( dstPtr + rnd_factor ) >> shift );
92	0	}
93	0	}
94	0	}
95	0
96	0	if( iSkipLine )
97	0	{
98	0	memset( dst + ( reducedLine << 1 ), 0, ( iSkipLine << 1 ) * sizeof( TCoeff ) );
99	0	}
100	0	}
101
102		template<int uiTrSize>
103		inline void _fastInverseMM( const TCoeff src, TCoeff dst, int shift, int line, int iSkipLine, int iSkipLine2, bool clip, const TCoeff outputMinimum, const TCoeff outputMaximum, const TMatrixCoeff* iT )
104	0	{
105	0	const int rnd_factor = 1 << (shift - 1);
106	0	const int reducedLine = line - iSkipLine;
107	0	const int cutoff = uiTrSize - iSkipLine2;
108
109	0	memset( dst, 0, line * uiTrSize * sizeof( TCoeff ) );
110
111	0	g_tCoeffOps.fastInvCore[getLog2( uiTrSize ) - 2]( iT, src, dst, line, reducedLine, cutoff );
112
113	0	if( clip )
114	0	{
115	0	if( uiTrSize == 4 )
116	0	g_tCoeffOps.roundClip4( dst, uiTrSize, reducedLine, uiTrSize, outputMinimum, outputMaximum, rnd_factor, shift );
117	0	else
118	0	g_tCoeffOps.roundClip8( dst, uiTrSize, reducedLine, uiTrSize, outputMinimum, outputMaximum, rnd_factor, shift );
119	0	}
120	0	} Unexecuted instantiation: void vvdec::_fastInverseMM<8>(int const, int, int, int, int, int, bool, int, int, short const) Unexecuted instantiation: void vvdec::_fastInverseMM<16>(int const, int, int, int, int, int, bool, int, int, short const) Unexecuted instantiation: void vvdec::_fastInverseMM<32>(int const, int, int, int, int, int, bool, int, int, short const) Unexecuted instantiation: void vvdec::_fastInverseMM<64>(int const, int, int, int, int, int, bool, int, int, short const) Unexecuted instantiation: void vvdec::_fastInverseMM<4>(int const, int, int, int, int, int, bool, int, int, short const*)
121
122
123		// ******************************** DCT-II ********************************
124
125		//Fast DCT-II transforms
126		void fastInverseDCT2_B2(const TCoeff src, TCoeff dst, int shift, int line, int iSkipLine, int iSkipLine2, bool clip, const TCoeff outputMinimum, const TCoeff outputMaximum)
127	0	{
128	0	int j;
129	0	int E, O;
130	0	int add = 1 << (shift - 1);
131	0	TCoeff* dstOrg = dst;
132
133	0	const TMatrixCoeff *iT = g_trCoreDCT2P2[0];
134
135	0	const int reducedLine = line - iSkipLine;
136
137	0	for (j = 0; j<reducedLine; j++)
138	0	{
139		/* Utilizing symmetry properties to the maximum to minimize the number of multiplications */
140	0	E = iT[0] * (src[0] + src[line]);
141	0	O = iT[2] * (src[0] - src[line]);
142
143		/* Combining even and odd terms at each hierarchy levels to calculate the final spatial domain vector */
144	0	dst[0] = E;
145	0	dst[1] = O;
146
147	0	src++;
148	0	dst += 2;
149	0	}
150
151	0	if( clip )
152	0	{
153	0	for( int i = 0; i < reducedLine; i++ )
154	0	{
155	0	TCoeff* dstPtr = &dstOrg[i << 1];
156	0	for( int j = 0; j < 2; j++, dstPtr++ )
157	0	{
158	0	dstPtr = Clip3( outputMinimum, outputMaximum, ( int ) ( dstPtr + add ) >> shift );
159	0	}
160	0	}
161	0	}
162
163	0	if (iSkipLine)
164	0	{
165	0	memset(dst, 0, (iSkipLine << 1) * sizeof(TCoeff));
166	0	}
167	0	}
168
169		/** 4x4 inverse transform implemented using partial butterfly structure (1D)
170		* \param src input data (transform coefficients)
171		* \param dst output data (residual)
172		* \param shift specifies right shift after 1D transform
173		* \param line
174		* \param outputMinimum minimum for clipping
175		* \param outputMaximum maximum for clipping
176		*/
177		void fastInverseDCT2_B4( const TCoeff src, TCoeff dst, int shift, int line, int iSkipLine, int iSkipLine2, bool clip, const TCoeff outputMinimum, const TCoeff outputMaximum )
178	0	{
179		#if 0
180		const TMatrixCoeff *iT = g_trCoreDCT2P4[0];
181
182		_fastInverseMM<4>( src, dst, shift, line, iSkipLine, iSkipLine2, outputMinimum, outputMaximum, iT );
183		#else
184	0	int j;
185	0	int E[2], O[2];
186	0	int add = 1 << ( shift - 1 );
187
188	0	const TMatrixCoeff *iT = g_trCoreDCT2P4[0];
189
190	0	TCoeff* orgDst = dst;
191
192	0	const int reducedLine = line - iSkipLine;
193	0	for( j = 0; j < reducedLine; j++ )
194	0	{
195		/* Utilizing symmetry properties to the maximum to minimize the number of multiplications */
196	0	O[0] = iT[1 * 4 + 0] * src[line] + iT[3 * 4 + 0] * src[3 * line];
197	0	O[1] = iT[1 * 4 + 1] * src[line] + iT[3 * 4 + 1] * src[3 * line];
198	0	E[0] = iT[0 * 4 + 0] * src[ 0] + iT[2 * 4 + 0] * src[2 * line];
199	0	E[1] = iT[0 * 4 + 1] * src[ 0] + iT[2 * 4 + 1] * src[2 * line];
200
201		/* Combining even and odd terms at each hierarchy levels to calculate the final spatial domain vector */
202	0	dst[0] = E[0] + O[0];
203	0	dst[1] = E[1] + O[1];
204	0	dst[2] = E[1] - O[1];
205	0	dst[3] = E[0] - O[0];
206
207	0	src++;
208	0	dst += 4;
209	0	}
210
211	0	if( clip )
212	0	g_tCoeffOps.roundClip4( orgDst, 4, reducedLine, 4, outputMinimum, outputMaximum, add, shift );
213
214	0	if( iSkipLine )
215	0	{
216	0	memset( dst, 0, ( iSkipLine << 2 ) * sizeof( TCoeff ) );
217	0	}
218	0	#endif
219	0	}
220
221		/** 8x8 inverse transform implemented using partial butterfly structure (1D)
222		* \param src input data (transform coefficients)
223		* \param dst output data (residual)
224		* \param shift specifies right shift after 1D transform
225		* \param line
226		* \param outputMinimum minimum for clipping
227		* \param outputMaximum maximum for clipping
228		*/
229		void fastInverseDCT2_B8(const TCoeff src, TCoeff dst, int shift, int line, int iSkipLine, int iSkipLine2, bool clip, const TCoeff outputMinimum, const TCoeff outputMaximum)
230	0	{
231	0	#if 1
232	0	_fastInverseMM<8>( src, dst, shift, line, iSkipLine, iSkipLine2, clip, outputMinimum, outputMaximum, g_trCoreDCT2P8[0] );
233		#else
234		int j, k;
235		int E[4], O[4];
236		int EE[2], EO[2];
237		int add = 1 << (shift - 1);
238
239		const TMatrixCoeff *iT = g_trCoreDCT2P8[0];
240
241		TCoeff *orgDst = dst;
242
243		const int reducedLine = line - iSkipLine;
244		for( j = 0; j < reducedLine; j++ )
245		{
246		/* Utilizing symmetry properties to the maximum to minimize the number of multiplications */
247		for( k = 0; k < 4; k++ )
248		{
249		O[k] = iT[1 * 8 + k] * src[line] + iT[3 * 8 + k] * src[3 * line] + iT[5 * 8 + k] * src[5 * line] + iT[7 * 8 + k] * src[7 * line];
250		}
251
252		EO[0] = iT[2 * 8 + 0] * src[2 * line] + iT[6 * 8 + 0] * src[6 * line];
253		EO[1] = iT[2 * 8 + 1] * src[2 * line] + iT[6 * 8 + 1] * src[6 * line];
254		EE[0] = iT[0 * 8 + 0] * src[0 ] + iT[4 * 8 + 0] * src[4 * line];
255		EE[1] = iT[0 * 8 + 1] * src[0 ] + iT[4 * 8 + 1] * src[4 * line];
256
257		/* Combining even and odd terms at each hierarchy levels to calculate the final spatial domain vector */
258		E[0] = EE[0] + EO[0];
259		E[3] = EE[0] - EO[0];
260		E[1] = EE[1] + EO[1];
261		E[2] = EE[1] - EO[1];
262
263		for( k = 0; k < 4; k++ )
264		{
265		dst[k ] = E[ k] + O[ k];
266		dst[k + 4] = E[3 - k] - O[3 - k];
267		}
268		src++;
269		dst += 8;
270		}
271
272		if( clip )
273		g_tCoeffOps.roundClip8( orgDst, 8, reducedLine, 8, outputMinimum, outputMaximum, add, shift );
274
275		if( iSkipLine )
276		{
277		memset( dst, 0, ( iSkipLine << 3 ) * sizeof( TCoeff ) );
278		}
279		#endif
280	0	}
281
282		/** 16x16 inverse transform implemented using partial butterfly structure (1D)
283		* \param src input data (transform coefficients)
284		* \param dst output data (residual)
285		* \param shift specifies right shift after 1D transform
286		* \param line
287		* \param outputMinimum minimum for clipping
288		* \param outputMaximum maximum for clipping
289		*/
290		void fastInverseDCT2_B16( const TCoeff src, TCoeff dst, int shift, int line, int iSkipLine, int iSkipLine2, bool clip, const TCoeff outputMinimum, const TCoeff outputMaximum )
291	0	{
292	0	_fastInverseMM<16>( src, dst, shift, line, iSkipLine, iSkipLine2, clip, outputMinimum, outputMaximum, g_trCoreDCT2P16[0] );
293	0	}
294
295		/** 32x32 inverse transform implemented using partial butterfly structure (1D)
296		* \param src input data (transform coefficients)
297		* \param dst output data (residual)
298		* \param shift specifies right shift after 1D transform
299		* \param line
300		* \param outputMinimum minimum for clipping
301		* \param outputMaximum maximum for clipping
302		*/
303		void fastInverseDCT2_B32(const TCoeff src, TCoeff dst, int shift, int line, int iSkipLine, int iSkipLine2, bool clip, const TCoeff outputMinimum, const TCoeff outputMaximum)
304	0	{
305	0	_fastInverseMM<32>( src, dst, shift, line, iSkipLine, iSkipLine2, clip, outputMinimum, outputMaximum, g_trCoreDCT2P32[0] );
306	0	}
307
308		void fastInverseDCT2_B64(const TCoeff src, TCoeff dst, int shift, int line, int iSkipLine, int iSkipLine2, bool clip, const TCoeff outputMinimum, const TCoeff outputMaximum)
309	0	{
310	0	_fastInverseMM<64>( src, dst, shift, line, iSkipLine, iSkipLine2, clip, outputMinimum, outputMaximum, g_trCoreDCT2P64[0] );
311	0	}
312
313		void fastInverseDST7_B4(const TCoeff src, TCoeff dst, int shift, int line, int iSkipLine, int iSkipLine2, bool clip, const TCoeff outputMinimum, const TCoeff outputMaximum)
314	0	{
315	0	_fastInverseMM<4>( src, dst, shift, line, iSkipLine, iSkipLine2, clip, outputMinimum, outputMaximum, g_trCoreDST7P4[0] );
316	0	}
317
318		void fastInverseDST7_B8(const TCoeff src, TCoeff dst, int shift, int line, int iSkipLine, int iSkipLine2, bool clip, const TCoeff outputMinimum, const TCoeff outputMaximum)
319	0	{
320	0	_fastInverseMM< 8 >( src, dst, shift, line, iSkipLine, iSkipLine2, clip, outputMinimum, outputMaximum, g_trCoreDST7P8[0]);
321	0	}
322
323		void fastInverseDST7_B16(const TCoeff src, TCoeff dst, int shift, int line, int iSkipLine, int iSkipLine2, bool clip, const TCoeff outputMinimum, const TCoeff outputMaximum)
324	0	{
325	0	_fastInverseMM< 16 >( src, dst, shift, line, iSkipLine, iSkipLine2, clip, outputMinimum, outputMaximum, g_trCoreDST7P16[0] );
326	0	}
327
328		void fastInverseDST7_B32(const TCoeff src, TCoeff dst, int shift, int line, int iSkipLine, int iSkipLine2, bool clip, const TCoeff outputMinimum, const TCoeff outputMaximum)
329	0	{
330	0	_fastInverseMM< 32 >( src, dst, shift, line, iSkipLine, iSkipLine2, clip, outputMinimum, outputMaximum, g_trCoreDST7P32[0] );
331	0	}
332
333
334		// ******************************** DCT-VIII ********************************
335
336		void fastInverseDCT8_B4(const TCoeff src, TCoeff dst, int shift, int line, int iSkipLine, int iSkipLine2, bool clip, const TCoeff outputMinimum, const TCoeff outputMaximum)
337	0	{
338	0	_fastInverseMM<4>( src, dst, shift, line, iSkipLine, iSkipLine2, clip, outputMinimum, outputMaximum, g_trCoreDCT8P4[0] );
339	0	}
340
341		void fastInverseDCT8_B8(const TCoeff src, TCoeff dst, int shift, int line, int iSkipLine, int iSkipLine2, bool clip, const TCoeff outputMinimum, const TCoeff outputMaximum)
342	0	{
343	0	_fastInverseMM< 8 >( src, dst, shift, line, iSkipLine, iSkipLine2, clip, outputMinimum, outputMaximum, g_trCoreDCT8P8[0] );
344	0	}
345
346		void fastInverseDCT8_B16(const TCoeff src, TCoeff dst, int shift, int line, int iSkipLine, int iSkipLine2, bool clip, const TCoeff outputMinimum, const TCoeff outputMaximum)
347	0	{
348	0	_fastInverseMM< 16 >( src, dst, shift, line, iSkipLine, iSkipLine2, clip, outputMinimum, outputMaximum, g_trCoreDCT8P16[0] );
349	0	}
350
351		void fastInverseDCT8_B32(const TCoeff src, TCoeff dst, int shift, int line, int iSkipLine, int iSkipLine2, bool clip, const TCoeff outputMinimum, const TCoeff outputMaximum)
352	0	{
353	0	_fastInverseMM< 32 >( src, dst, shift, line, iSkipLine, iSkipLine2, clip, outputMinimum, outputMaximum, g_trCoreDCT8P32[0] );
354	0	}
355
356		}
357
358		#define DONT_UNDEF_SIZE_AWARE_PER_EL_OP 1
359
360		#include "Unit.h"
361		#include "Buffer.h"
362
363		namespace vvdec
364		{
365
366		void cpyResiClipCore( const TCoeff* src, Pel* dst, ptrdiff_t stride, unsigned width, unsigned height, const TCoeff outputMin, const TCoeff outputMax, const TCoeff round, const TCoeff shift )
367	0	{
368	0	#define CPYRESI_OP( ADDR ) dst[ADDR] = Clip3( outputMin, outputMax, ( src[ADDR] + round ) >> shift )
369	0	#define CPYRESI_INC dst += stride; src += width;
370
371	0	SIZE_AWARE_PER_EL_OP( CPYRESI_OP, CPYRESI_INC );
372
373	0	#undef CPYRESI_INC
374	0	#undef CPYRESI_OP
375	0	}
376
377		void clipCore( TCoeff *dst, unsigned width, unsigned height, unsigned stride, const TCoeff outputMin, const TCoeff outputMax, const TCoeff round, const TCoeff shift )
378	0	{
379	0	#define CLIP_OP( ADDR ) dst[ADDR] = Clip3( outputMin, outputMax, ( dst[ADDR] + round ) >> shift )
380	0	#define CLIP_INC dst += stride
381
382	0	SIZE_AWARE_PER_EL_OP( CLIP_OP, CLIP_INC );
383
384	0	#undef CLIP_INC
385	0	#undef CLIP_OP
386	0	}
387
388		template<int trSize>
389		void fastInvCore_( const TMatrixCoeff* it, const TCoeff* src, TCoeff* dst, unsigned lines, unsigned reducedLines, unsigned rows )
390	0	{
391	0	for( int k = 0; k < rows; k++ )
392	0	{
393	0	const TCoeff* srcPtr = &src[k * lines];
394	0	for( int i = 0; i < reducedLines; i++ )
395	0	{
396	0	TCoeff* dstPtr = &dst[i * trSize];
397	0	const TMatrixCoeff* itPtr = &it[k * trSize];
398	0	for( int j = 0; j < trSize; j++ )
399	0	{
400	0	dstPtr++ += srcPtr * *itPtr++;
401	0	}
402	0	srcPtr++;
403	0	}
404	0	}
405	0	} Unexecuted instantiation: void vvdec::fastInvCore_<4>(short const, int const, int, unsigned int, unsigned int, unsigned int) Unexecuted instantiation: void vvdec::fastInvCore_<8>(short const, int const, int, unsigned int, unsigned int, unsigned int) Unexecuted instantiation: void vvdec::fastInvCore_<16>(short const, int const, int, unsigned int, unsigned int, unsigned int) Unexecuted instantiation: void vvdec::fastInvCore_<32>(short const, int const, int, unsigned int, unsigned int, unsigned int) Unexecuted instantiation: void vvdec::fastInvCore_<64>(short const, int const, int*, unsigned int, unsigned int, unsigned int)
406
407		TCoeffOps::TCoeffOps()
408	256	{
409	256	cpyResiClip[0] = cpyResiClipCore; // 1
410	256	cpyResiClip[1] = cpyResiClipCore; // 2
411	256	cpyResiClip[2] = cpyResiClipCore; // 4
412	256	cpyResiClip[3] = cpyResiClipCore; // 8
413	256	cpyResiClip[4] = cpyResiClipCore; // 16
414	256	cpyResiClip[5] = cpyResiClipCore; // 32
415	256	cpyResiClip[6] = cpyResiClipCore; // 64
416	256	roundClip4 = clipCore;
417	256	roundClip8 = clipCore;
418	256	fastInvCore[0] = fastInvCore_< 4>;
419	256	fastInvCore[1] = fastInvCore_< 8>;
420	256	fastInvCore[2] = fastInvCore_<16>;
421	256	fastInvCore[3] = fastInvCore_<32>;
422	256	fastInvCore[4] = fastInvCore_<64>;
423	256	}
424
425		TCoeffOps g_tCoeffOps;
426
427		}