Coverage Report

Created: 2026-06-16 07:20

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/vvenc/source/Lib/CommonLib/TrQuant.cpp
Line
Count
Source
1
/* -----------------------------------------------------------------------------
2
The copyright in this software is being made available under the Clear BSD
3
License, included below. No patent rights, trademark rights and/or 
4
other Intellectual Property Rights other than the copyrights concerning 
5
the Software are granted under this license.
6
7
The Clear BSD License
8
9
Copyright (c) 2019-2026, Fraunhofer-Gesellschaft zur Förderung der angewandten Forschung e.V. & The VVenC Authors.
10
All rights reserved.
11
12
Redistribution and use in source and binary forms, with or without modification,
13
are permitted (subject to the limitations in the disclaimer below) provided that
14
the following conditions are met:
15
16
     * Redistributions of source code must retain the above copyright notice,
17
     this list of conditions and the following disclaimer.
18
19
     * Redistributions in binary form must reproduce the above copyright
20
     notice, this list of conditions and the following disclaimer in the
21
     documentation and/or other materials provided with the distribution.
22
23
     * Neither the name of the copyright holder nor the names of its
24
     contributors may be used to endorse or promote products derived from this
25
     software without specific prior written permission.
26
27
NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE GRANTED BY
28
THIS LICENSE. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
29
CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
30
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
31
PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
32
CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
33
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
34
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
35
BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
36
IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
37
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
38
POSSIBILITY OF SUCH DAMAGE.
39
40
41
------------------------------------------------------------------------------------------- */
42
43
44
/** \file     TrQuant.cpp
45
    \brief    transform and quantization class
46
*/
47
48
#include "TrQuant.h"
49
#include "TrQuant_EMT.h"
50
#include "QuantRDOQ.h"
51
#include "DepQuant.h"
52
#include "UnitTools.h"
53
#include "ContextModelling.h"
54
#include "CodingStructure.h"
55
#include "dtrace_buffer.h"
56
#include "TimeProfiler.h"
57
#include "SearchSpaceCounter.h"
58
59
#include <stdlib.h>
60
#include <memory.h>
61
62
//! \ingroup CommonLib
63
//! \{
64
65
namespace vvenc {
66
67
struct coeffGroupRDStats
68
{
69
  int    iNNZbeforePos0;
70
  double d64CodedLevelandDist; // distortion and level cost only
71
  double d64UncodedDist;    // all zero coded block distortion
72
  double d64SigCost;
73
  double d64SigCost_0;
74
};
75
76
FwdTrans *const fastFwdTrans[NUM_TRANS_TYPE][g_numTransformMatrixSizes] =
77
{
78
  { fastForwardDCT2_B2, fastForwardDCT2_B4, fastForwardDCT2_B8, fastForwardDCT2_B16, fastForwardDCT2_B32, fastForwardDCT2_B64 },
79
  { nullptr,            fastForwardDCT8_B4, fastForwardDCT8_B8, fastForwardDCT8_B16, fastForwardDCT8_B32, nullptr },
80
  { nullptr,            fastForwardDST7_B4, fastForwardDST7_B8, fastForwardDST7_B16, fastForwardDST7_B32, nullptr },
81
};
82
83
InvTrans *const fastInvTrans[NUM_TRANS_TYPE][g_numTransformMatrixSizes] =
84
{
85
  { fastInverseDCT2_B2, fastInverseDCT2_B4, fastInverseDCT2_B8, fastInverseDCT2_B16, fastInverseDCT2_B32, fastInverseDCT2_B64 },
86
  { nullptr,            fastInverseDCT8_B4, fastInverseDCT8_B8, fastInverseDCT8_B16, fastInverseDCT8_B32, nullptr },
87
  { nullptr,            fastInverseDST7_B4, fastInverseDST7_B8, fastInverseDST7_B16, fastInverseDST7_B32, nullptr },
88
};
89
90
//! \ingroup CommonLib
91
//! \{
92
93
0
static inline int64_t square( const int d ) { return d * (int64_t)d; }
94
95
template<int signedMode> std::pair<int64_t,int64_t> fwdTransformCbCr( const PelBuf& resCb, const PelBuf& resCr, PelBuf& resC1, PelBuf& resC2 )
96
0
{
97
0
  const Pel*  cb  = resCb.buf;
98
0
  const Pel*  cr  = resCr.buf;
99
0
  Pel*        c1  = resC1.buf;
100
0
  Pel*        c2  = resC2.buf;
101
0
  int64_t     d1  = 0;
102
0
  int64_t     d2  = 0;
103
0
  for( SizeType y = 0; y < resCb.height; y++, cb += resCb.stride, cr += resCr.stride, c1 += resC1.stride, c2 += resC2.stride )
104
0
  {
105
0
    for( SizeType x = 0; x < resCb.width; x++ )
106
0
    {
107
0
      int cbx = cb[x], crx = cr[x];
108
0
      if      ( signedMode ==  1 )
109
0
      {
110
0
        c1[x] = Pel( ( 4*cbx + 2*crx ) / 5 );
111
0
        d1   += square( cbx - c1[x] ) + square( crx - (c1[x]>>1) );
112
0
      }
113
0
      else if ( signedMode == -1 )
114
0
      {
115
0
        c1[x] = Pel( ( 4*cbx - 2*crx ) / 5 );
116
0
        d1   += square( cbx - c1[x] ) + square( crx - (-c1[x]>>1) );
117
0
      }
118
0
      else if ( signedMode ==  2 )
119
0
      {
120
0
        c1[x] = Pel( ( cbx + crx ) / 2 );
121
0
        d1   += square( cbx - c1[x] ) + square( crx - c1[x] );
122
0
      }
123
0
      else if ( signedMode == -2 )
124
0
      {
125
0
        c1[x] = Pel( ( cbx - crx ) / 2 );
126
0
        d1   += square( cbx - c1[x] ) + square( crx + c1[x] );
127
0
      }
128
0
      else if ( signedMode ==  3 )
129
0
      {
130
0
        c2[x] = Pel( ( 4*crx + 2*cbx ) / 5 );
131
0
        d1   += square( cbx - (c2[x]>>1) ) + square( crx - c2[x] );
132
0
      }
133
0
      else if ( signedMode == -3 )
134
0
      {
135
0
        c2[x] = Pel( ( 4*crx - 2*cbx ) / 5 );
136
0
        d1   += square( cbx - (-c2[x]>>1) ) + square( crx - c2[x] );
137
0
      }
138
0
      else
139
0
      {
140
0
        d1   += square( cbx );
141
0
        d2   += square( crx );
142
0
      }
143
0
    }
144
0
  }
145
0
  return std::make_pair(d1,d2);
146
0
}
Unexecuted instantiation: std::__1::pair<long, long> vvenc::fwdTransformCbCr<0>(vvenc::AreaBuf<short> const&, vvenc::AreaBuf<short> const&, vvenc::AreaBuf<short>&, vvenc::AreaBuf<short>&)
Unexecuted instantiation: std::__1::pair<long, long> vvenc::fwdTransformCbCr<1>(vvenc::AreaBuf<short> const&, vvenc::AreaBuf<short> const&, vvenc::AreaBuf<short>&, vvenc::AreaBuf<short>&)
Unexecuted instantiation: std::__1::pair<long, long> vvenc::fwdTransformCbCr<-1>(vvenc::AreaBuf<short> const&, vvenc::AreaBuf<short> const&, vvenc::AreaBuf<short>&, vvenc::AreaBuf<short>&)
Unexecuted instantiation: std::__1::pair<long, long> vvenc::fwdTransformCbCr<2>(vvenc::AreaBuf<short> const&, vvenc::AreaBuf<short> const&, vvenc::AreaBuf<short>&, vvenc::AreaBuf<short>&)
Unexecuted instantiation: std::__1::pair<long, long> vvenc::fwdTransformCbCr<-2>(vvenc::AreaBuf<short> const&, vvenc::AreaBuf<short> const&, vvenc::AreaBuf<short>&, vvenc::AreaBuf<short>&)
Unexecuted instantiation: std::__1::pair<long, long> vvenc::fwdTransformCbCr<3>(vvenc::AreaBuf<short> const&, vvenc::AreaBuf<short> const&, vvenc::AreaBuf<short>&, vvenc::AreaBuf<short>&)
Unexecuted instantiation: std::__1::pair<long, long> vvenc::fwdTransformCbCr<-3>(vvenc::AreaBuf<short> const&, vvenc::AreaBuf<short> const&, vvenc::AreaBuf<short>&, vvenc::AreaBuf<short>&)
147
148
template<int signedMode> void invTransformCbCr( PelBuf& resCb, PelBuf& resCr )
149
0
{
150
0
  Pel*  cb  = resCb.buf;
151
0
  Pel*  cr  = resCr.buf;
152
0
  for( SizeType y = 0; y < resCb.height; y++, cb += resCb.stride, cr += resCr.stride )
153
0
  {
154
0
    for( SizeType x = 0; x < resCb.width; x++ )
155
0
    {
156
0
      if      ( signedMode ==  1 )  { cr[x] =  cb[x] >> 1;  }
157
0
      else if ( signedMode == -1 )  { cr[x] = -cb[x] >> 1;  }
158
0
      else if ( signedMode ==  2 )  { cr[x] =  cb[x]; }
159
0
      else if ( signedMode == -2 )  { cr[x] = -cb[x]; }
160
0
      else if ( signedMode ==  3 )  { cb[x] =  cr[x] >> 1; }
161
0
      else if ( signedMode == -3 )  { cb[x] = -cr[x] >> 1; }
162
0
    }
163
0
  }
164
0
}
Unexecuted instantiation: void vvenc::invTransformCbCr<0>(vvenc::AreaBuf<short>&, vvenc::AreaBuf<short>&)
Unexecuted instantiation: void vvenc::invTransformCbCr<1>(vvenc::AreaBuf<short>&, vvenc::AreaBuf<short>&)
Unexecuted instantiation: void vvenc::invTransformCbCr<-1>(vvenc::AreaBuf<short>&, vvenc::AreaBuf<short>&)
Unexecuted instantiation: void vvenc::invTransformCbCr<2>(vvenc::AreaBuf<short>&, vvenc::AreaBuf<short>&)
Unexecuted instantiation: void vvenc::invTransformCbCr<-2>(vvenc::AreaBuf<short>&, vvenc::AreaBuf<short>&)
Unexecuted instantiation: void vvenc::invTransformCbCr<3>(vvenc::AreaBuf<short>&, vvenc::AreaBuf<short>&)
Unexecuted instantiation: void vvenc::invTransformCbCr<-3>(vvenc::AreaBuf<short>&, vvenc::AreaBuf<short>&)
165
166
void xFwdLfnstNxNCore(int *src, int *dst, const uint32_t mode, const uint32_t index, const uint32_t size, int zeroOutSize)
167
0
{
168
0
  const int8_t *trMat  = (size > 4) ? g_lfnstFwd8x8[mode][index][0] : g_lfnstFwd4x4[mode][index][0];
169
0
  const int     trSize = (size > 4) ? 48 : 16;
170
0
  int           coef;
171
0
  int *         out = dst;
172
173
0
  for (int j = 0; j < zeroOutSize; j++)
174
0
  {
175
0
    int *         srcPtr   = src;
176
0
    const int8_t *trMatTmp = trMat;
177
0
    coef                   = 0;
178
0
    for (int i = 0; i < trSize; i++)
179
0
    {
180
0
      coef += *srcPtr++ * *trMatTmp++;
181
0
    }
182
0
    *out++ = (coef + 64) >> 7;
183
0
    trMat += trSize;
184
0
  }
185
186
0
  ::memset(out, 0, (trSize - zeroOutSize) * sizeof(int));
187
0
}
188
189
190
void xInvLfnstNxNCore(int *src, int *dst, const uint32_t mode, const uint32_t index, const uint32_t size, int zeroOutSize)
191
0
{
192
0
  int           maxLog2TrDynamicRange = 15;
193
0
  const TCoeff  outputMinimum         = -(1 << maxLog2TrDynamicRange);
194
0
  const TCoeff  outputMaximum         = (1 << maxLog2TrDynamicRange) - 1;
195
0
  const int8_t *trMat                 = (size > 4) ? g_lfnstInv8x8[mode][index][0] : g_lfnstInv4x4[mode][index][0];
196
0
  const int     trSize                = (size > 4) ? 48 : 16;
197
0
  int           resi;
198
0
  int *         out                   = dst;
199
200
0
  for( int j = 0; j < trSize; j++, trMat += 16 )
201
0
  {
202
0
    resi = 0;
203
0
    const int8_t* trMatTmp = trMat;
204
0
    int*          srcPtr   = src;
205
206
0
    for( int i = 0; i < zeroOutSize; i++ )
207
0
    {
208
0
      resi += *srcPtr++ * *trMatTmp++;
209
0
    }
210
211
0
    *out++ = Clip3( outputMinimum, outputMaximum, ( int ) ( resi + 64 ) >> 7 );
212
0
  }
213
0
}
214
215
// ====================================================================================================================
216
// TrQuant class member functions
217
// ====================================================================================================================
218
0
TrQuant::TrQuant() : m_scalingListEnabled(false), m_quant( nullptr )
219
0
{
220
  // allocate temporary buffers
221
0
  m_plTempCoeff = ( TCoeff* ) xMalloc( TCoeff, MAX_TB_SIZEY * MAX_TB_SIZEY );
222
0
  m_tmp         = ( TCoeff* ) xMalloc( TCoeff, MAX_TB_SIZEY * MAX_TB_SIZEY );
223
0
  m_blk         = ( TCoeff* ) xMalloc( TCoeff, MAX_TB_SIZEY * MAX_TB_SIZEY );
224
225
0
  for( int i = 0; i < NUM_TRAFO_MODES_MTS; i++ )
226
0
  {
227
0
    m_mtsCoeffs[i] = ( TCoeff* ) xMalloc( TCoeff, MAX_TB_SIZEY * MAX_TB_SIZEY );
228
0
  }
229
230
0
  {
231
0
    m_invICT      = m_invICTMem + maxAbsIctMode;
232
0
    m_invICT[ 0]  = invTransformCbCr< 0>;
233
0
    m_invICT[ 1]  = invTransformCbCr< 1>;
234
0
    m_invICT[-1]  = invTransformCbCr<-1>;
235
0
    m_invICT[ 2]  = invTransformCbCr< 2>;
236
0
    m_invICT[-2]  = invTransformCbCr<-2>;
237
0
    m_invICT[ 3]  = invTransformCbCr< 3>;
238
0
    m_invICT[-3]  = invTransformCbCr<-3>;
239
0
    m_fwdICT      = m_fwdICTMem + maxAbsIctMode;
240
0
    m_fwdICT[ 0]  = fwdTransformCbCr< 0>;
241
0
    m_fwdICT[ 1]  = fwdTransformCbCr< 1>;
242
0
    m_fwdICT[-1]  = fwdTransformCbCr<-1>;
243
0
    m_fwdICT[ 2]  = fwdTransformCbCr< 2>;
244
0
    m_fwdICT[-2]  = fwdTransformCbCr<-2>;
245
0
    m_fwdICT[ 3]  = fwdTransformCbCr< 3>;
246
0
    m_fwdICT[-3]  = fwdTransformCbCr<-3>;
247
0
  }
248
249
0
  m_invLfnstNxN = xInvLfnstNxNCore;
250
0
  m_fwdLfnstNxN = xFwdLfnstNxNCore;
251
252
0
#if defined( TARGET_SIMD_X86 ) && ENABLE_SIMD_TRAFO
253
0
  initTrQuantX86();
254
0
#endif
255
0
}
256
257
TrQuant::~TrQuant()
258
0
{
259
0
  if( m_quant )
260
0
  {
261
0
    delete m_quant;
262
0
    m_quant = nullptr;
263
0
  }
264
265
  // delete temporary buffers
266
0
  if( m_plTempCoeff )
267
0
  {
268
0
    xFree( m_plTempCoeff );
269
0
    m_plTempCoeff = nullptr;
270
0
  }
271
272
0
  if( m_blk )
273
0
  {
274
0
    xFree( m_blk );
275
0
    m_blk = nullptr;
276
0
  }
277
278
0
  if( m_tmp )
279
0
  {
280
0
    xFree( m_tmp );
281
0
    m_tmp = nullptr;
282
0
  }
283
284
0
  for( int i = 0; i < NUM_TRAFO_MODES_MTS; i++ )
285
0
  {
286
0
     xFree( m_mtsCoeffs[i] );
287
0
  }
288
0
}
289
290
void TrQuant::xDeQuant(const TransformUnit& tu,
291
                             CoeffBuf      &dstCoeff,
292
                       const ComponentID   &compID,
293
                       const QpParam       &cQP)
294
0
{
295
0
  PROFILER_SCOPE_AND_STAGE( 1, _TPROF, P_DEQUANT );
296
0
  m_quant->dequant( tu, dstCoeff, compID, cQP );
297
0
}
298
299
void TrQuant::init( const Quant* otherQuant,
300
                    const int  rdoq,
301
                    const bool bUseRDOQTS,
302
                    const bool scalingListsEnabled,
303
                    const bool bEnc,
304
                    const int  thrVal
305
)
306
0
{
307
0
  m_bEnc = bEnc;
308
309
0
  delete m_quant;
310
0
  m_quant = nullptr;
311
312
0
  m_quant = new(std::nothrow) DepQuant( otherQuant, bEnc, scalingListsEnabled );
313
0
  CHECK( !m_quant, "allocation failed" );
314
0
  m_quant->init( rdoq, bUseRDOQTS, thrVal );
315
0
}
316
317
318
void TrQuant::invTransformNxN( TransformUnit& tu, const ComponentID compID, PelBuf& pResi, const QpParam& cQP )
319
0
{
320
0
  const CompArea& area    = tu.blocks[compID];
321
0
  const uint32_t uiWidth  = area.width;
322
0
  const uint32_t uiHeight = area.height;
323
324
0
  CHECK( uiWidth > tu.cs->sps->getMaxTbSize() || uiHeight > tu.cs->sps->getMaxTbSize(), "Maximal allowed transformation size exceeded!" );
325
326
0
  {
327
0
    CoeffBuf tempCoeff = CoeffBuf( m_plTempCoeff, area );
328
0
    xDeQuant( tu, tempCoeff, compID, cQP );
329
330
0
    DTRACE_COEFF_BUF( D_TCOEFF, tempCoeff, tu, tu.cu->predMode, compID );
331
332
0
    if (tu.cs->sps->LFNST)
333
0
    {
334
0
      xInvLfnst(tu, compID);
335
0
    }
336
0
    if (tu.mtsIdx[compID] == MTS_SKIP)
337
0
    {
338
0
      xITransformSkip(tempCoeff, pResi, tu, compID);
339
0
    }
340
0
    else
341
0
    {
342
0
      xIT(tu, compID, tempCoeff, pResi);
343
0
    }
344
0
  }
345
346
  //DTRACE_BLOCK_COEFF(tu.getCoeffs(compID), tu, tu.cu->predMode, compID);
347
0
  DTRACE_PEL_BUF( D_RESIDUALS, pResi, tu, tu.cu->predMode, compID);
348
0
}
349
350
std::pair<int64_t,int64_t> TrQuant::fwdTransformICT( const TransformUnit& tu, const PelBuf& resCb, const PelBuf& resCr, PelBuf& resC1, PelBuf& resC2, int jointCbCr )
351
0
{
352
0
  CHECK( Size(resCb) != Size(resCr), "resCb and resCr have different sizes" );
353
0
  CHECK( Size(resCb) != Size(resC1), "resCb and resC1 have different sizes" );
354
0
  CHECK( Size(resCb) != Size(resC2), "resCb and resC2 have different sizes" );
355
0
  return (*m_fwdICT[ TU::getICTMode(tu, jointCbCr) ])( resCb, resCr, resC1, resC2 );
356
0
}
357
358
void TrQuant::invTransformICT( const TransformUnit& tu, PelBuf& resCb, PelBuf& resCr )
359
0
{
360
0
  CHECK( Size(resCb) != Size(resCr), "resCb and resCr have different sizes" );
361
0
  (*m_invICT[ TU::getICTMode(tu) ])( resCb, resCr );
362
0
}
363
364
std::vector<int> TrQuant::selectICTCandidates( const TransformUnit& tu, CompStorage* resCb, CompStorage* resCr )
365
0
{
366
0
  CHECK( !resCb[0].valid() || !resCr[0].valid(), "standard components are not valid" );
367
368
0
  if( !CU::isIntra( *tu.cu ) )
369
0
  {
370
0
    int cbfMask = 3;
371
0
    fwdTransformICT( tu, resCb[0], resCr[0], resCb[cbfMask], resCr[cbfMask], cbfMask );
372
0
    std::vector<int> cbfMasksToTest;
373
0
    cbfMasksToTest.push_back( cbfMask );
374
0
    return cbfMasksToTest;
375
0
  }
376
377
0
  std::pair<int64_t,int64_t> pairDist[4];
378
0
  for( int cbfMask = 0; cbfMask < 4; cbfMask++ )
379
0
  {
380
0
    pairDist[cbfMask] = fwdTransformICT( tu, resCb[0], resCr[0], resCb[cbfMask], resCr[cbfMask], cbfMask );
381
0
  }
382
383
0
  std::vector<int> cbfMasksToTest;
384
0
  int64_t minDist1  = std::min<int64_t>( pairDist[0].first, pairDist[0].second );
385
0
  int64_t minDist2  = std::numeric_limits<int64_t>::max();
386
0
  int     cbfMask1  = 0;
387
0
  int     cbfMask2  = 0;
388
0
  for( int cbfMask : { 1, 2, 3 } )
389
0
  {
390
0
    if( pairDist[cbfMask].first < minDist1 )
391
0
    {
392
0
      cbfMask2  = cbfMask1; minDist2  = minDist1;
393
0
      cbfMask1  = cbfMask;  minDist1  = pairDist[cbfMask1].first;
394
0
    }
395
0
    else if( pairDist[cbfMask].first < minDist2 )
396
0
    {
397
0
      cbfMask2  = cbfMask;  minDist2  = pairDist[cbfMask2].first;
398
0
    }
399
0
  }
400
0
  if( cbfMask1 )
401
0
  {
402
0
    cbfMasksToTest.push_back( cbfMask1 );
403
0
  }
404
0
  if( cbfMask2 && ( ( minDist2 < (9*minDist1)/8 ) || ( !cbfMask1 && minDist2 < (3*minDist1)/2 ) ) )
405
0
  {
406
0
    cbfMasksToTest.push_back( cbfMask2 );
407
0
  }
408
409
0
  return cbfMasksToTest;
410
0
}
411
412
413
414
// ------------------------------------------------------------------------------------------------
415
// Logical transform
416
// ------------------------------------------------------------------------------------------------
417
void TrQuant::xSetTrTypes( const TransformUnit& tu, const ComponentID compID, const int width, const int height, int &trTypeHor, int &trTypeVer )
418
0
{
419
0
  const bool isISP = CU::isIntra(*tu.cu) && tu.cu->ispMode && isLuma(compID);
420
0
  if (isISP && tu.cu->lfnstIdx)
421
0
  {
422
0
    return;
423
0
  }
424
0
  if (!tu.cs->sps->MTS)
425
0
  {
426
0
    return;
427
0
  }
428
0
  if (CU::isIntra(*tu.cu) && isLuma(compID) && ((tu.cs->sps->getUseImplicitMTS() && tu.cu->lfnstIdx == 0 && tu.cu->mipFlag == 0) || tu.cu->ispMode))
429
0
  {
430
0
    if (width >= 4 && width <= 16)
431
0
      trTypeHor = DST7;
432
0
    if (height >= 4 && height <= 16)
433
0
      trTypeVer = DST7;
434
0
  }
435
0
  else if( tu.cs->sps->MTS && tu.cu->sbtInfo && isLuma(compID)/*isSBT*/ )
436
0
  {
437
0
    const uint8_t sbtIdx = CU::getSbtIdx( tu.cu->sbtInfo );
438
0
    const uint8_t sbtPos = CU::getSbtPos( tu.cu->sbtInfo );
439
440
0
    if( sbtIdx == SBT_VER_HALF || sbtIdx == SBT_VER_QUAD )
441
0
    {
442
0
      assert( tu.lwidth() <= MTS_INTER_MAX_CU_SIZE );
443
0
      if( tu.lheight() > MTS_INTER_MAX_CU_SIZE )
444
0
      {
445
0
        trTypeHor = trTypeVer = DCT2;
446
0
      }
447
0
      else
448
0
      {
449
0
        if( sbtPos == SBT_POS0 )  { trTypeHor = DCT8;  trTypeVer = DST7; }
450
0
        else                      { trTypeHor = DST7;  trTypeVer = DST7; }
451
0
      }
452
0
    }
453
0
    else
454
0
    {
455
0
      assert( tu.lheight() <= MTS_INTER_MAX_CU_SIZE );
456
0
      if( tu.lwidth() > MTS_INTER_MAX_CU_SIZE )
457
0
      {
458
0
        trTypeHor = trTypeVer = DCT2;
459
0
      }
460
0
      else
461
0
      {
462
0
        if( sbtPos == SBT_POS0 )  { trTypeHor = DST7;  trTypeVer = DCT8; }
463
0
        else                      { trTypeHor = DST7;  trTypeVer = DST7; }
464
0
      }
465
0
    }
466
0
  }
467
0
  const bool isExplicitMTS = (CU::isIntra(*tu.cu) ? tu.cs->sps->MTS : tu.cs->sps->MTSInter && CU::isInter(*tu.cu)) && isLuma(compID);
468
0
  if (isExplicitMTS)
469
0
  {
470
0
    if (tu.mtsIdx[compID] > MTS_SKIP)
471
0
    {
472
0
      int indHor = (tu.mtsIdx[compID] - MTS_DST7_DST7) & 1;
473
0
      int indVer = (tu.mtsIdx[compID] - MTS_DST7_DST7) >> 1;
474
0
      trTypeHor  = indHor ? DCT8 : DST7;
475
0
      trTypeVer  = indVer ? DCT8 : DST7;
476
0
    }
477
0
  }
478
0
}
479
480
481
void TrQuant::xT( const TransformUnit& tu, const ComponentID compID, const CPelBuf& resi, CoeffBuf& dstCoeff, const int width, const int height )
482
0
{
483
0
  PROFILER_SCOPE_AND_STAGE( 1, _TPROF, P_TRAFO );
484
485
0
  const unsigned maxLog2TrDynamicRange  = tu.cs->sps->getMaxLog2TrDynamicRange();
486
0
  const unsigned bitDepth               = tu.cs->sps->bitDepths[toChannelType( compID )];
487
0
  const int      TRANSFORM_MATRIX_SHIFT = g_transformMatrixShift[TRANSFORM_FORWARD];
488
0
  const uint32_t transformWidthIndex    = Log2(width ) - 1;  // nLog2WidthMinus1, since transform start from 2-point
489
0
  const uint32_t transformHeightIndex   = Log2(height) - 1;  // nLog2HeightMinus1, since transform start from 2-point
490
491
0
  int trTypeHor = DCT2;
492
0
  int trTypeVer = DCT2;
493
494
0
  xSetTrTypes( tu, compID, width, height, trTypeHor, trTypeVer );
495
496
0
  int  skipWidth  = ( trTypeHor != DCT2 && width  == 32 ) ? 16 : width  > JVET_C0024_ZERO_OUT_TH ? width  - JVET_C0024_ZERO_OUT_TH : 0;
497
0
  int  skipHeight = ( trTypeVer != DCT2 && height == 32 ) ? 16 : height > JVET_C0024_ZERO_OUT_TH ? height - JVET_C0024_ZERO_OUT_TH : 0;
498
499
0
  if( tu.cu->lfnstIdx )
500
0
  {
501
0
    if ((width == 4 && height > 4) || (width > 4 && height == 4))
502
0
    {
503
0
      skipWidth  = width - 4;
504
0
      skipHeight = height - 4;
505
0
    }
506
0
    else if ((width >= 8 && height >= 8))
507
0
    {
508
0
      skipWidth  = width - 8;
509
0
      skipHeight = height - 8;
510
0
    }
511
0
  }
512
513
0
  TCoeff* block = m_blk;
514
0
  TCoeff* tmp   = m_tmp;
515
516
0
  const Pel* resiBuf    = resi.buf;
517
0
  const int  resiStride = resi.stride;
518
519
0
#if ENABLE_SIMD_TRAFO
520
0
  if( width & 3 )
521
0
#endif
522
0
  {
523
0
    for( int y = 0; y < height; y++ )
524
0
    {
525
0
      for( int x = 0; x < width; x++ )
526
0
      {
527
0
        block[( y * width ) + x] = resiBuf[( y * resiStride ) + x];
528
0
      }
529
0
    }
530
0
  }
531
0
#if ENABLE_SIMD_TRAFO
532
0
  else if( width & 7 )
533
0
  {
534
0
    g_tCoeffOps.cpyCoeff4( resiBuf, resiStride, block, width, height );
535
0
  }
536
0
  else
537
0
  {
538
0
    g_tCoeffOps.cpyCoeff8( resiBuf, resiStride, block, width, height );
539
0
  }
540
0
#endif //ENABLE_SIMD_TRAFO
541
542
0
  if (width > 1 && height > 1)
543
0
  {
544
0
    const int shift_1st = ((Log2(width )) + bitDepth + TRANSFORM_MATRIX_SHIFT) - maxLog2TrDynamicRange;
545
0
    const int shift_2nd =  (Log2(height))            + TRANSFORM_MATRIX_SHIFT;
546
0
    CHECK( shift_1st < 0, "Negative shift" );
547
0
    CHECK( shift_2nd < 0, "Negative shift" );
548
0
    fastFwdTrans[trTypeHor][transformWidthIndex](block, tmp, shift_1st, height, 0, skipWidth);
549
0
    fastFwdTrans[trTypeVer][transformHeightIndex](tmp, dstCoeff.buf, shift_2nd, width, skipWidth, skipHeight);
550
0
  }
551
0
  else if (height == 1)   // 1-D horizontal transform
552
0
  {
553
0
    const int shift = ((Log2(width )) + bitDepth + TRANSFORM_MATRIX_SHIFT) - maxLog2TrDynamicRange;
554
0
    CHECK( shift < 0, "Negative shift" );
555
0
    fastFwdTrans[trTypeHor][transformWidthIndex](block, dstCoeff.buf, shift, 1, 0, skipWidth);
556
0
  }
557
0
  else   // if (iWidth == 1) //1-D vertical transform
558
0
  {
559
0
    int shift = ((floorLog2(height)) + bitDepth + TRANSFORM_MATRIX_SHIFT) - maxLog2TrDynamicRange;
560
0
    CHECK(shift < 0, "Negative shift");
561
0
    CHECKD((transformHeightIndex < 0), "There is a problem with the height.");
562
0
    fastFwdTrans[trTypeVer][transformHeightIndex](block, dstCoeff.buf, shift, 1, 0, skipHeight);
563
0
  }
564
0
}
565
566
567
void TrQuant::xIT( const TransformUnit& tu, const ComponentID compID, const CCoeffBuf& pCoeff, PelBuf& pResidual )
568
0
{
569
0
  PROFILER_SCOPE_AND_STAGE( 1, _TPROF, P_TRAFO );
570
571
0
  const int      width                  = pCoeff.width;
572
0
  const int      height                 = pCoeff.height;
573
0
  const unsigned maxLog2TrDynamicRange  = tu.cs->sps->getMaxLog2TrDynamicRange();
574
0
  const unsigned bitDepth               = tu.cs->sps->bitDepths[toChannelType( compID )];
575
0
  const int      TRANSFORM_MATRIX_SHIFT = g_transformMatrixShift[TRANSFORM_INVERSE];
576
0
  const TCoeff   clipMinimum            = -( 1 << maxLog2TrDynamicRange );
577
0
  const TCoeff   clipMaximum            =  ( 1 << maxLog2TrDynamicRange ) - 1;
578
0
  const uint32_t transformWidthIndex    = Log2(width )- 1;                                // nLog2WidthMinus1, since transform start from 2-point
579
0
  const uint32_t transformHeightIndex   = Log2(height) - 1;                                // nLog2HeightMinus1, since transform start from 2-point
580
581
582
0
  int trTypeHor = DCT2;
583
0
  int trTypeVer = DCT2;
584
585
0
  xSetTrTypes( tu, compID, width, height, trTypeHor, trTypeVer );
586
587
0
  int skipWidth  = ( trTypeHor != DCT2 && width  == 32 ) ? 16 : width  > JVET_C0024_ZERO_OUT_TH ? width  - JVET_C0024_ZERO_OUT_TH : 0;
588
0
  int skipHeight = ( trTypeVer != DCT2 && height == 32 ) ? 16 : height > JVET_C0024_ZERO_OUT_TH ? height - JVET_C0024_ZERO_OUT_TH : 0;
589
590
0
  if (tu.cs->sps->LFNST && tu.cu->lfnstIdx)
591
0
  {
592
0
    if ((width == 4 && height > 4) || (width > 4 && height == 4))
593
0
    {
594
0
      skipWidth = width - 4;
595
0
      skipHeight = height - 4;
596
0
    }
597
0
    else if ((width >= 8 && height >= 8))
598
0
    {
599
0
      skipWidth = width - 8;
600
0
      skipHeight = height - 8;
601
0
    }
602
0
  }
603
604
0
  TCoeff *block = m_blk;
605
0
  TCoeff *tmp   = m_tmp;
606
0
  if (width > 1 && height > 1)   // 2-D transform
607
0
  {
608
0
    const int shift_1st =   TRANSFORM_MATRIX_SHIFT + 1; // 1 has been added to shift_1st at the expense of shift_2nd
609
0
    const int shift_2nd = ( TRANSFORM_MATRIX_SHIFT + maxLog2TrDynamicRange - 1 ) - bitDepth;
610
0
    CHECK( shift_1st < 0, "Negative shift" );
611
0
    CHECK( shift_2nd < 0, "Negative shift" );
612
0
    fastInvTrans[trTypeVer][transformHeightIndex](pCoeff.buf, tmp, shift_1st, width, skipWidth, skipHeight, clipMinimum, clipMaximum);
613
0
    fastInvTrans[trTypeHor][transformWidthIndex](tmp, block, shift_2nd, height, 0, skipWidth, clipMinimum, clipMaximum);
614
0
  }
615
0
  else if (width == 1)   // 1-D vertical transform
616
0
  {
617
0
    int shift = (TRANSFORM_MATRIX_SHIFT + maxLog2TrDynamicRange - 1) - bitDepth;
618
0
    CHECK(shift < 0, "Negative shift");
619
0
    fastInvTrans[trTypeVer][transformHeightIndex](pCoeff.buf, block, shift + 1, 1, 0, skipHeight, clipMinimum, clipMaximum);
620
0
  }
621
0
  else   // if(iHeight == 1) //1-D horizontal transform
622
0
  {
623
0
    const int shift = (TRANSFORM_MATRIX_SHIFT + maxLog2TrDynamicRange - 1) - bitDepth;
624
0
    CHECK(shift < 0, "Negative shift");
625
0
    fastInvTrans[trTypeHor][transformWidthIndex](pCoeff.buf, block, shift + 1, 1, 0, skipWidth, clipMinimum, clipMaximum);
626
0
  }
627
628
0
#if ENABLE_SIMD_TRAFO
629
0
  if( width & 3 )
630
0
#endif //ENABLE_SIMD_TRAFO
631
0
  {
632
0
    Pel       *dst    = pResidual.buf;
633
0
    ptrdiff_t  stride = pResidual.stride;
634
635
0
    for( int y = 0; y < height; y++ )
636
0
    {
637
0
      for( int x = 0; x < width; x++ )
638
0
      {
639
0
        dst[x] = ( Pel ) *block++;
640
0
      }
641
642
0
      dst += stride;
643
0
    }
644
0
  }
645
0
#if ENABLE_SIMD_TRAFO
646
0
  else if( width & 7 )
647
0
  {
648
0
    g_tCoeffOps.cpyResi4( block, pResidual.buf, pResidual.stride, width, height );
649
0
  }
650
0
  else
651
0
  {
652
0
    g_tCoeffOps.cpyResi8( block, pResidual.buf, pResidual.stride, width, height );
653
0
  }
654
0
#endif //ENABLE_SIMD_TRAFO
655
0
}
656
657
/** Wrapper function between HM interface and core NxN transform skipping
658
 */
659
void TrQuant::xITransformSkip(const CCoeffBuf& pCoeff,
660
  PelBuf& pResidual,
661
  const TransformUnit& tu,
662
  const ComponentID compID)
663
0
{
664
0
  const CompArea& area = tu.blocks[compID];
665
0
  const int width = area.width;
666
0
  const int height = area.height;
667
668
0
  for (uint32_t y = 0; y < height; y++)
669
0
  {
670
0
    for (uint32_t x = 0; x < width; x++)
671
0
    {
672
0
      pResidual.at(x, y) = Pel(pCoeff.at(x, y));
673
0
    }
674
0
  }
675
0
}
676
677
void TrQuant::xQuant(TransformUnit& tu, const ComponentID compID, const CCoeffBuf& pSrc, TCoeff &uiAbsSum, const QpParam& cQP, const Ctx& ctx)
678
0
{
679
0
  PROFILER_SCOPE_AND_STAGE( 1, _TPROF, P_QUANT );
680
0
  m_quant->quant( tu, compID, pSrc, uiAbsSum, cQP, ctx );
681
#if ENABLE_MEASURE_SEARCH_SPACE
682
683
  g_searchSpaceAcc.addQuant( tu, toChannelType( compID ) );
684
#endif
685
0
}
686
687
688
void TrQuant::transformNxN(TransformUnit &tu, const ComponentID compID, const QpParam &cQP, TCoeff &uiAbsSum, const Ctx &ctx, const bool loadTr)
689
0
{
690
0
        CodingStructure &cs = *tu.cs;
691
0
  const CompArea& rect      = tu.blocks[compID];
692
0
  const uint32_t uiWidth        = rect.width;
693
0
  const uint32_t uiHeight       = rect.height;
694
695
0
  const CPelBuf resiBuf     = cs.getResiBuf(rect);
696
697
0
  if( tu.noResidual )
698
0
  {
699
0
    uiAbsSum = 0;
700
0
    TU::setCbfAtDepth( tu, compID, tu.depth, uiAbsSum > 0 );
701
0
    return;
702
0
  }
703
0
  if (tu.cu->bdpcmM[toChannelType(compID)])
704
0
  {
705
0
    tu.mtsIdx[compID] = MTS_SKIP;
706
0
  }
707
708
0
  uiAbsSum = 0;
709
0
  CHECK( cs.sps->getMaxTbSize() < uiWidth, "Unsupported transformation size" );
710
711
0
  CoeffBuf tempCoeff(loadTr ? m_mtsCoeffs[tu.mtsIdx[compID]] : m_plTempCoeff, rect);
712
0
  if (!loadTr)
713
0
  {
714
0
    DTRACE_PEL_BUF( D_RESIDUALS, resiBuf, tu, tu.cu->predMode, compID );
715
0
    if (tu.mtsIdx[compID] == MTS_SKIP)
716
0
    {
717
0
      xTransformSkip(tu, compID, resiBuf, tempCoeff.buf);
718
0
    }
719
0
    else
720
0
    {
721
0
      xT(tu, compID, resiBuf, tempCoeff, uiWidth, uiHeight);
722
0
    }
723
0
  }
724
0
  if (cs.sps->LFNST)
725
0
  {
726
0
    xFwdLfnst(tu, compID, loadTr);
727
0
  }
728
0
  DTRACE_COEFF_BUF( D_TCOEFF, tempCoeff, tu, tu.cu->predMode, compID );
729
730
0
  xQuant( tu, compID, tempCoeff, uiAbsSum, cQP, ctx );
731
732
0
  DTRACE_COEFF_BUF( D_TCOEFF, tu.getCoeffs( compID ), tu, tu.cu->predMode, compID );
733
734
  // set coded block flag (CBF)
735
0
  TU::setCbfAtDepth (tu, compID, tu.depth, uiAbsSum > 0);
736
0
}
737
738
void TrQuant::checktransformsNxN( TransformUnit &tu, std::vector<TrMode> *trModes, const int maxCand, const ComponentID compID)
739
0
{
740
0
  CodingStructure &cs     = *tu.cs;
741
0
  const CompArea& rect    = tu.blocks[compID];
742
0
  const uint32_t   width  = rect.width;
743
0
  const uint32_t   height = rect.height;
744
745
0
  const CPelBuf resiBuf = cs.getResiBuf(rect);
746
747
0
  CHECK(cs.sps->getMaxTbSize() < width, "Unsupported transformation size");
748
0
  int                           pos = 0;
749
0
  std::vector<TrCost>           trCosts;
750
0
  std::vector<TrMode>::iterator it      = trModes->begin();
751
0
  const double                  facBB[] = { 1.2, 1.3, 1.3, 1.4, 1.5 };
752
0
  while (it != trModes->end())
753
0
  {
754
0
    tu.mtsIdx[compID] = it->first;
755
0
    CoeffBuf tempCoeff(m_mtsCoeffs[tu.mtsIdx[compID]], rect);
756
0
    if (tu.noResidual)
757
0
    {
758
0
      int sumAbs = 0;
759
0
      trCosts.push_back(TrCost(sumAbs, pos++));
760
0
      it++;
761
0
      continue;
762
0
    }
763
0
    if (tu.mtsIdx[compID] == MTS_SKIP)
764
0
    {
765
0
      xTransformSkip(tu, compID, resiBuf, tempCoeff.buf);
766
0
    }
767
0
    else
768
0
    {
769
0
      xT(tu, compID, resiBuf, tempCoeff, width, height);
770
0
    }
771
772
0
    int sumAbs = 0;
773
0
    for (int pos = 0; pos < width * height; pos++)
774
0
    {
775
0
      sumAbs += abs(tempCoeff.buf[pos]);
776
0
    }
777
778
0
    double scaleSAD = 1.0;
779
0
    if (tu.mtsIdx[compID] == MTS_SKIP && ((floorLog2(width) + floorLog2(height)) & 1) == 1)
780
0
    {
781
0
      scaleSAD = 1.0 / 1.414213562;   // compensate for not scaling transform skip coefficients by 1/sqrt(2)
782
0
    }
783
0
    if (tu.mtsIdx[compID] == MTS_SKIP)
784
0
    {
785
0
      int trShift = getTransformShift(tu.cu->slice->sps->bitDepths[CH_L], rect.size(), tu.cu->slice->sps->getMaxLog2TrDynamicRange());
786
0
      scaleSAD *= pow(2, trShift);
787
0
    }
788
0
    trCosts.push_back(TrCost(int(std::min<double>(sumAbs * scaleSAD, std::numeric_limits<int>::max())), pos++));
789
0
    it++;
790
0
  }
791
792
0
  int                           numTests = 0;
793
0
  std::vector<TrCost>::iterator itC      = trCosts.begin();
794
0
  const double                  fac      = facBB[std::max(0, floorLog2(std::max(width, height)) - 2)];
795
0
  const double                  thr      = fac * trCosts.begin()->first;
796
0
  const double                  thrTS    = trCosts.begin()->first;
797
0
  while (itC != trCosts.end())
798
0
  {
799
0
    const bool testTr               = itC->first <= (trModes->at(itC->second).first == 1 ? thrTS : thr) && numTests <= maxCand;
800
0
    trModes->at(itC->second).second = testTr;
801
0
    numTests += testTr;
802
0
    itC++;
803
0
  }
804
0
}
805
806
uint32_t TrQuant::xGetLFNSTIntraMode( const Area& tuArea, const uint32_t dirMode )
807
0
{
808
0
  if (dirMode < 2)
809
0
  {
810
0
    return dirMode;
811
0
  }
812
813
0
  static const int modeShift[] = { 0, 6, 10, 12, 14, 15 };
814
815
0
  const int width  = int(tuArea.width);
816
0
  const int height = int(tuArea.height);
817
818
0
  if (width > height && dirMode < 2 + modeShift[floorLog2(width) - floorLog2(height)])
819
0
  {
820
0
    return dirMode + (VDIA_IDX - 1) + (NUM_EXT_LUMA_MODE >> 1);
821
0
  }
822
0
  else if (height > width && dirMode > VDIA_IDX - modeShift[floorLog2(height) - floorLog2(width)])
823
0
  {
824
0
    return dirMode - (VDIA_IDX + 1) + (NUM_EXT_LUMA_MODE >> 1) + NUM_LUMA_MODE;
825
0
  }
826
827
0
  return dirMode;
828
0
}
829
830
831
bool TrQuant::xGetTransposeFlag(uint32_t intraMode)
832
0
{
833
0
  return ((intraMode >= NUM_LUMA_MODE) && (intraMode >= (NUM_LUMA_MODE + (NUM_EXT_LUMA_MODE >> 1))))
834
0
         || ((intraMode < NUM_LUMA_MODE) && (intraMode > DIA_IDX));
835
0
}
836
837
838
void TrQuant::xInvLfnst(const TransformUnit &tu, const ComponentID compID)
839
0
{
840
0
  const CompArea &area     = tu.blocks[compID];
841
0
  const uint32_t  width    = area.width;
842
0
  const uint32_t  height   = area.height;
843
0
  const uint32_t  lfnstIdx = tu.cu->lfnstIdx;
844
0
  if (lfnstIdx && tu.mtsIdx[compID] != MTS_SKIP && (CU::isSepTree(*tu.cu) ? true : isLuma(compID)))
845
0
  {
846
0
    const CodingUnit& cu = *tu.cs->getCU(area.pos(), toChannelType(compID), TREE_D);
847
0
    const bool         whge3 = width >= 8 && height >= 8;
848
0
    const ScanElement *scan =
849
0
      whge3
850
0
        ? g_coefTopLeftDiagScan8x8[Log2(width)] 
851
0
        : getScanOrder(SCAN_GROUPED_4x4, Log2(area.width), Log2(area.height));
852
0
    uint32_t intraMode = CU::getFinalIntraMode(cu, toChannelType(compID));
853
854
0
    if (CU::isLMCMode( cu.intraDir[toChannelType(compID)]))
855
0
    {
856
0
      intraMode = CU::getCoLocatedIntraLumaMode(cu);
857
0
    }
858
0
    if (CU::isMIP(cu, toChannelType(compID)))
859
0
    {
860
0
      intraMode = PLANAR_IDX;
861
0
    }
862
0
    CHECK(intraMode >= NUM_INTRA_MODE - 1, "Invalid intra mode");
863
864
0
    if (lfnstIdx < 3)
865
0
    {
866
0
      if (tu.cu->ispMode && isLuma(compID))
867
0
      {
868
0
        intraMode = xGetLFNSTIntraMode(tu.cu->blocks[compID], intraMode);
869
0
      }
870
0
      else
871
0
        intraMode = xGetLFNSTIntraMode(tu.blocks[compID], intraMode);
872
0
      bool      transposeFlag = xGetTransposeFlag(intraMode);
873
0
      const int sbSize        = whge3 ? 8 : 4;
874
0
      bool      tu4x4Flag     = (width == 4 && height == 4);
875
0
      bool      tu8x8Flag     = (width == 8 && height == 8);
876
0
      TCoeff *  lfnstTemp;
877
0
      TCoeff *  coeffTemp;
878
0
      int       y;
879
0
      lfnstTemp                  = m_tempInMatrix;   // inverse spectral rearrangement
880
0
      coeffTemp                  = m_plTempCoeff;
881
0
      TCoeff *           dst     = lfnstTemp;
882
0
      const ScanElement *scanPtr = scan;
883
0
      for (y = 0; y < 16; y++)
884
0
      {
885
0
        *dst++ = coeffTemp[scanPtr->idx];
886
0
        scanPtr++;
887
0
      }
888
889
0
      m_invLfnstNxN( m_tempInMatrix, m_tempOutMatrix, g_lfnstLut[intraMode], lfnstIdx - 1, sbSize, ( tu4x4Flag || tu8x8Flag ) ? 8 : 16 );
890
891
0
      lfnstTemp = m_tempOutMatrix;   // inverse spectral rearrangement
892
893
0
      if (transposeFlag)
894
0
      {
895
0
        if (sbSize == 4)
896
0
        {
897
0
          for (y = 0; y < 4; y++)
898
0
          {
899
0
            coeffTemp[0] = lfnstTemp[0];
900
0
            coeffTemp[1] = lfnstTemp[4];
901
0
            coeffTemp[2] = lfnstTemp[8];
902
0
            coeffTemp[3] = lfnstTemp[12];
903
0
            lfnstTemp++;
904
0
            coeffTemp += width;
905
0
          }
906
0
        }
907
0
        else   // ( sbSize == 8 )
908
0
        {
909
0
          for (y = 0; y < 8; y++)
910
0
          {
911
0
            coeffTemp[0] = lfnstTemp[0];
912
0
            coeffTemp[1] = lfnstTemp[8];
913
0
            coeffTemp[2] = lfnstTemp[16];
914
0
            coeffTemp[3] = lfnstTemp[24];
915
0
            if (y < 4)
916
0
            {
917
0
              coeffTemp[4] = lfnstTemp[32];
918
0
              coeffTemp[5] = lfnstTemp[36];
919
0
              coeffTemp[6] = lfnstTemp[40];
920
0
              coeffTemp[7] = lfnstTemp[44];
921
0
            }
922
0
            lfnstTemp++;
923
0
            coeffTemp += width;
924
0
          }
925
0
        }
926
0
      }
927
0
      else
928
0
      {
929
0
        for (y = 0; y < sbSize; y++)
930
0
        {
931
0
          uint32_t uiStride = (y < 4) ? sbSize : 4;
932
0
          ::memcpy(coeffTemp, lfnstTemp, uiStride * sizeof(TCoeff));
933
0
          lfnstTemp += uiStride;
934
0
          coeffTemp += width;
935
0
        }
936
0
      }
937
0
    }
938
0
  }
939
0
}
940
941
942
void TrQuant::xFwdLfnst(const TransformUnit &tu, const ComponentID compID, const bool loadTr)
943
0
{
944
0
  const CompArea &area     = tu.blocks[compID];
945
0
  const uint32_t  width    = area.width;
946
0
  const uint32_t  height   = area.height;
947
0
  const uint32_t  lfnstIdx = tu.cu->lfnstIdx;
948
0
  if (lfnstIdx && tu.mtsIdx[compID] != MTS_SKIP && (CU::isSepTree(*tu.cu) ? true : isLuma(compID)))
949
0
  {
950
0
    const CodingUnit& cu = *tu.cs->getCU(area.pos(), toChannelType(compID), TREE_D);
951
0
    const bool         whge3 = width >= 8 && height >= 8;
952
0
    const ScanElement *scan =
953
0
      whge3
954
0
        ? g_coefTopLeftDiagScan8x8[Log2(width)] 
955
0
        : getScanOrder(SCAN_GROUPED_4x4, Log2(area.width), Log2(area.height));   
956
0
    uint32_t intraMode = CU::getFinalIntraMode(cu, toChannelType(compID));
957
958
0
    if (CU::isLMCMode(cu.intraDir[toChannelType(compID)]))
959
0
    {
960
0
      intraMode = CU::getCoLocatedIntraLumaMode(cu);
961
0
    }
962
0
    if (CU::isMIP(cu, toChannelType(compID)))
963
0
    {
964
0
      intraMode = PLANAR_IDX;
965
0
    }
966
0
    CHECK(intraMode >= NUM_INTRA_MODE - 1, "Invalid intra mode");
967
968
0
    if (lfnstIdx < 3)
969
0
    {
970
0
      if (tu.cu->ispMode && isLuma(compID))
971
0
      {
972
0
        intraMode = xGetLFNSTIntraMode(tu.cu->blocks[compID], intraMode);
973
0
      }
974
0
      else
975
0
      {
976
0
        intraMode = xGetLFNSTIntraMode(tu.blocks[compID], intraMode);
977
0
      }
978
0
      bool      transposeFlag = xGetTransposeFlag(intraMode);
979
0
      const int sbSize        = whge3 ? 8 : 4;
980
0
      bool      tu4x4Flag     = (width == 4 && height == 4);
981
0
      bool      tu8x8Flag     = (width == 8 && height == 8);
982
0
      TCoeff*   lfnstTemp;
983
0
      TCoeff*   coeffTemp;
984
0
      TCoeff*   tempCoeff     = loadTr ? m_mtsCoeffs[tu.mtsIdx[compID]] : m_plTempCoeff;
985
986
0
      int y;
987
0
      lfnstTemp = m_tempInMatrix;   // forward low frequency non-separable transform
988
0
      coeffTemp = tempCoeff;
989
990
0
      if (transposeFlag)
991
0
      {
992
0
        if (sbSize == 4)
993
0
        {
994
0
          for (y = 0; y < 4; y++)
995
0
          {
996
0
            lfnstTemp[0]  = coeffTemp[0];
997
0
            lfnstTemp[4]  = coeffTemp[1];
998
0
            lfnstTemp[8]  = coeffTemp[2];
999
0
            lfnstTemp[12] = coeffTemp[3];
1000
0
            lfnstTemp++;
1001
0
            coeffTemp += width;
1002
0
          }
1003
0
        }
1004
0
        else   // ( sbSize == 8 )
1005
0
        {
1006
0
          for (y = 0; y < 8; y++)
1007
0
          {
1008
0
            lfnstTemp[0]  = coeffTemp[0];
1009
0
            lfnstTemp[8]  = coeffTemp[1];
1010
0
            lfnstTemp[16] = coeffTemp[2];
1011
0
            lfnstTemp[24] = coeffTemp[3];
1012
0
            if (y < 4)
1013
0
            {
1014
0
              lfnstTemp[32] = coeffTemp[4];
1015
0
              lfnstTemp[36] = coeffTemp[5];
1016
0
              lfnstTemp[40] = coeffTemp[6];
1017
0
              lfnstTemp[44] = coeffTemp[7];
1018
0
            }
1019
0
            lfnstTemp++;
1020
0
            coeffTemp += width;
1021
0
          }
1022
0
        }
1023
0
      }
1024
0
      else
1025
0
      {
1026
0
        for (y = 0; y < sbSize; y++)
1027
0
        {
1028
0
          uint32_t uiStride = (y < 4) ? sbSize : 4;
1029
0
          ::memcpy(lfnstTemp, coeffTemp, uiStride * sizeof(TCoeff));
1030
0
          lfnstTemp += uiStride;
1031
0
          coeffTemp += width;
1032
0
        }
1033
0
      }
1034
1035
0
      m_fwdLfnstNxN( m_tempInMatrix, m_tempOutMatrix, g_lfnstLut[intraMode], lfnstIdx - 1, sbSize, ( tu4x4Flag || tu8x8Flag ) ? 8 : 16 );
1036
1037
0
      lfnstTemp                        = m_tempOutMatrix;   // forward spectral rearrangement
1038
0
      coeffTemp                        = tempCoeff;
1039
0
      const ScanElement *scanPtr       = scan;
1040
0
      int                lfnstCoeffNum = (sbSize == 4) ? sbSize * sbSize : 48;
1041
0
      for (y = 0; y < lfnstCoeffNum; y++)
1042
0
      {
1043
0
        coeffTemp[scanPtr->idx] = *lfnstTemp++;
1044
0
        scanPtr++;
1045
0
      }
1046
0
    }
1047
0
  }
1048
0
}
1049
1050
void TrQuant::xTransformSkip(const TransformUnit& tu, const ComponentID& compID, const CPelBuf& resi, TCoeff* psCoeff)
1051
0
{
1052
0
  const CompArea& rect = tu.blocks[compID];
1053
0
  const uint32_t width = rect.width;
1054
0
  const uint32_t height = rect.height;
1055
1056
0
  for (uint32_t y = 0, coefficientIndex = 0; y < height; y++)
1057
0
  {
1058
0
    for (uint32_t x = 0; x < width; x++, coefficientIndex++)
1059
0
    {
1060
0
      psCoeff[coefficientIndex] = TCoeff(resi.at(x, y));
1061
0
    }
1062
0
  }
1063
0
}
1064
} // namespace vvenc
1065
1066
//! \}
1067