Coverage Report

Created: 2026-06-15 06:25

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/work/vvenc/source/Lib/CommonLib/TrQuant.cpp
Line
Count
Source
1
/* -----------------------------------------------------------------------------
2
The copyright in this software is being made available under the Clear BSD
3
License, included below. No patent rights, trademark rights and/or 
4
other Intellectual Property Rights other than the copyrights concerning 
5
the Software are granted under this license.
6
7
The Clear BSD License
8
9
Copyright (c) 2019-2026, Fraunhofer-Gesellschaft zur Förderung der angewandten Forschung e.V. & The VVenC Authors.
10
All rights reserved.
11
12
Redistribution and use in source and binary forms, with or without modification,
13
are permitted (subject to the limitations in the disclaimer below) provided that
14
the following conditions are met:
15
16
     * Redistributions of source code must retain the above copyright notice,
17
     this list of conditions and the following disclaimer.
18
19
     * Redistributions in binary form must reproduce the above copyright
20
     notice, this list of conditions and the following disclaimer in the
21
     documentation and/or other materials provided with the distribution.
22
23
     * Neither the name of the copyright holder nor the names of its
24
     contributors may be used to endorse or promote products derived from this
25
     software without specific prior written permission.
26
27
NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE GRANTED BY
28
THIS LICENSE. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
29
CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
30
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
31
PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
32
CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
33
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
34
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
35
BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
36
IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
37
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
38
POSSIBILITY OF SUCH DAMAGE.
39
40
41
------------------------------------------------------------------------------------------- */
42
43
44
/** \file     TrQuant.cpp
45
    \brief    transform and quantization class
46
*/
47
48
#include "TrQuant.h"
49
#include "TrQuant_EMT.h"
50
#include "QuantRDOQ.h"
51
#include "DepQuant.h"
52
#include "UnitTools.h"
53
#include "ContextModelling.h"
54
#include "CodingStructure.h"
55
#include "dtrace_buffer.h"
56
#include "TimeProfiler.h"
57
#include "SearchSpaceCounter.h"
58
59
#include <stdlib.h>
60
#include <memory.h>
61
62
//! \ingroup CommonLib
63
//! \{
64
65
namespace vvenc {
66
67
struct coeffGroupRDStats
68
{
69
  int    iNNZbeforePos0;
70
  double d64CodedLevelandDist; // distortion and level cost only
71
  double d64UncodedDist;    // all zero coded block distortion
72
  double d64SigCost;
73
  double d64SigCost_0;
74
};
75
76
FwdTrans *const fastFwdTrans[NUM_TRANS_TYPE][g_numTransformMatrixSizes] =
77
{
78
  { fastForwardDCT2_B2, fastForwardDCT2_B4, fastForwardDCT2_B8, fastForwardDCT2_B16, fastForwardDCT2_B32, fastForwardDCT2_B64 },
79
  { nullptr,            fastForwardDCT8_B4, fastForwardDCT8_B8, fastForwardDCT8_B16, fastForwardDCT8_B32, nullptr },
80
  { nullptr,            fastForwardDST7_B4, fastForwardDST7_B8, fastForwardDST7_B16, fastForwardDST7_B32, nullptr },
81
};
82
83
InvTrans *const fastInvTrans[NUM_TRANS_TYPE][g_numTransformMatrixSizes] =
84
{
85
  { fastInverseDCT2_B2, fastInverseDCT2_B4, fastInverseDCT2_B8, fastInverseDCT2_B16, fastInverseDCT2_B32, fastInverseDCT2_B64 },
86
  { nullptr,            fastInverseDCT8_B4, fastInverseDCT8_B8, fastInverseDCT8_B16, fastInverseDCT8_B32, nullptr },
87
  { nullptr,            fastInverseDST7_B4, fastInverseDST7_B8, fastInverseDST7_B16, fastInverseDST7_B32, nullptr },
88
};
89
90
//! \ingroup CommonLib
91
//! \{
92
93
311M
static inline int64_t square( const int d ) { return d * (int64_t)d; }
94
95
template<int signedMode> std::pair<int64_t,int64_t> fwdTransformCbCr( const PelBuf& resCb, const PelBuf& resCr, PelBuf& resC1, PelBuf& resC2 )
96
957k
{
97
957k
  const Pel*  cb  = resCb.buf;
98
957k
  const Pel*  cr  = resCr.buf;
99
957k
  Pel*        c1  = resC1.buf;
100
957k
  Pel*        c2  = resC2.buf;
101
957k
  int64_t     d1  = 0;
102
957k
  int64_t     d2  = 0;
103
12.6M
  for( SizeType y = 0; y < resCb.height; y++, cb += resCb.stride, cr += resCr.stride, c1 += resC1.stride, c2 += resC2.stride )
104
11.7M
  {
105
167M
    for( SizeType x = 0; x < resCb.width; x++ )
106
155M
    {
107
155M
      int cbx = cb[x], crx = cr[x];
108
155M
      if      ( signedMode ==  1 )
109
38.9M
      {
110
38.9M
        c1[x] = Pel( ( 4*cbx + 2*crx ) / 5 );
111
38.9M
        d1   += square( cbx - c1[x] ) + square( crx - (c1[x]>>1) );
112
38.9M
      }
113
116M
      else if ( signedMode == -1 )
114
0
      {
115
0
        c1[x] = Pel( ( 4*cbx - 2*crx ) / 5 );
116
0
        d1   += square( cbx - c1[x] ) + square( crx - (-c1[x]>>1) );
117
0
      }
118
116M
      else if ( signedMode ==  2 )
119
38.9M
      {
120
38.9M
        c1[x] = Pel( ( cbx + crx ) / 2 );
121
38.9M
        d1   += square( cbx - c1[x] ) + square( crx - c1[x] );
122
38.9M
      }
123
77.8M
      else if ( signedMode == -2 )
124
0
      {
125
0
        c1[x] = Pel( ( cbx - crx ) / 2 );
126
0
        d1   += square( cbx - c1[x] ) + square( crx + c1[x] );
127
0
      }
128
77.8M
      else if ( signedMode ==  3 )
129
38.9M
      {
130
38.9M
        c2[x] = Pel( ( 4*crx + 2*cbx ) / 5 );
131
38.9M
        d1   += square( cbx - (c2[x]>>1) ) + square( crx - c2[x] );
132
38.9M
      }
133
38.9M
      else if ( signedMode == -3 )
134
0
      {
135
0
        c2[x] = Pel( ( 4*crx - 2*cbx ) / 5 );
136
0
        d1   += square( cbx - (-c2[x]>>1) ) + square( crx - c2[x] );
137
0
      }
138
38.9M
      else
139
38.9M
      {
140
38.9M
        d1   += square( cbx );
141
38.9M
        d2   += square( crx );
142
38.9M
      }
143
155M
    }
144
11.7M
  }
145
957k
  return std::make_pair(d1,d2);
146
957k
}
std::__1::pair<long, long> vvenc::fwdTransformCbCr<0>(vvenc::AreaBuf<short> const&, vvenc::AreaBuf<short> const&, vvenc::AreaBuf<short>&, vvenc::AreaBuf<short>&)
Line
Count
Source
96
239k
{
97
239k
  const Pel*  cb  = resCb.buf;
98
239k
  const Pel*  cr  = resCr.buf;
99
239k
  Pel*        c1  = resC1.buf;
100
239k
  Pel*        c2  = resC2.buf;
101
239k
  int64_t     d1  = 0;
102
239k
  int64_t     d2  = 0;
103
3.17M
  for( SizeType y = 0; y < resCb.height; y++, cb += resCb.stride, cr += resCr.stride, c1 += resC1.stride, c2 += resC2.stride )
104
2.93M
  {
105
41.8M
    for( SizeType x = 0; x < resCb.width; x++ )
106
38.9M
    {
107
38.9M
      int cbx = cb[x], crx = cr[x];
108
38.9M
      if      ( signedMode ==  1 )
109
0
      {
110
0
        c1[x] = Pel( ( 4*cbx + 2*crx ) / 5 );
111
0
        d1   += square( cbx - c1[x] ) + square( crx - (c1[x]>>1) );
112
0
      }
113
38.9M
      else if ( signedMode == -1 )
114
0
      {
115
0
        c1[x] = Pel( ( 4*cbx - 2*crx ) / 5 );
116
0
        d1   += square( cbx - c1[x] ) + square( crx - (-c1[x]>>1) );
117
0
      }
118
38.9M
      else if ( signedMode ==  2 )
119
0
      {
120
0
        c1[x] = Pel( ( cbx + crx ) / 2 );
121
0
        d1   += square( cbx - c1[x] ) + square( crx - c1[x] );
122
0
      }
123
38.9M
      else if ( signedMode == -2 )
124
0
      {
125
0
        c1[x] = Pel( ( cbx - crx ) / 2 );
126
0
        d1   += square( cbx - c1[x] ) + square( crx + c1[x] );
127
0
      }
128
38.9M
      else if ( signedMode ==  3 )
129
0
      {
130
0
        c2[x] = Pel( ( 4*crx + 2*cbx ) / 5 );
131
0
        d1   += square( cbx - (c2[x]>>1) ) + square( crx - c2[x] );
132
0
      }
133
38.9M
      else if ( signedMode == -3 )
134
0
      {
135
0
        c2[x] = Pel( ( 4*crx - 2*cbx ) / 5 );
136
0
        d1   += square( cbx - (-c2[x]>>1) ) + square( crx - c2[x] );
137
0
      }
138
38.9M
      else
139
38.9M
      {
140
38.9M
        d1   += square( cbx );
141
38.9M
        d2   += square( crx );
142
38.9M
      }
143
38.9M
    }
144
2.93M
  }
145
239k
  return std::make_pair(d1,d2);
146
239k
}
std::__1::pair<long, long> vvenc::fwdTransformCbCr<1>(vvenc::AreaBuf<short> const&, vvenc::AreaBuf<short> const&, vvenc::AreaBuf<short>&, vvenc::AreaBuf<short>&)
Line
Count
Source
96
239k
{
97
239k
  const Pel*  cb  = resCb.buf;
98
239k
  const Pel*  cr  = resCr.buf;
99
239k
  Pel*        c1  = resC1.buf;
100
239k
  Pel*        c2  = resC2.buf;
101
239k
  int64_t     d1  = 0;
102
239k
  int64_t     d2  = 0;
103
3.17M
  for( SizeType y = 0; y < resCb.height; y++, cb += resCb.stride, cr += resCr.stride, c1 += resC1.stride, c2 += resC2.stride )
104
2.93M
  {
105
41.8M
    for( SizeType x = 0; x < resCb.width; x++ )
106
38.9M
    {
107
38.9M
      int cbx = cb[x], crx = cr[x];
108
38.9M
      if      ( signedMode ==  1 )
109
38.9M
      {
110
38.9M
        c1[x] = Pel( ( 4*cbx + 2*crx ) / 5 );
111
38.9M
        d1   += square( cbx - c1[x] ) + square( crx - (c1[x]>>1) );
112
38.9M
      }
113
0
      else if ( signedMode == -1 )
114
0
      {
115
0
        c1[x] = Pel( ( 4*cbx - 2*crx ) / 5 );
116
0
        d1   += square( cbx - c1[x] ) + square( crx - (-c1[x]>>1) );
117
0
      }
118
0
      else if ( signedMode ==  2 )
119
0
      {
120
0
        c1[x] = Pel( ( cbx + crx ) / 2 );
121
0
        d1   += square( cbx - c1[x] ) + square( crx - c1[x] );
122
0
      }
123
0
      else if ( signedMode == -2 )
124
0
      {
125
0
        c1[x] = Pel( ( cbx - crx ) / 2 );
126
0
        d1   += square( cbx - c1[x] ) + square( crx + c1[x] );
127
0
      }
128
0
      else if ( signedMode ==  3 )
129
0
      {
130
0
        c2[x] = Pel( ( 4*crx + 2*cbx ) / 5 );
131
0
        d1   += square( cbx - (c2[x]>>1) ) + square( crx - c2[x] );
132
0
      }
133
0
      else if ( signedMode == -3 )
134
0
      {
135
0
        c2[x] = Pel( ( 4*crx - 2*cbx ) / 5 );
136
0
        d1   += square( cbx - (-c2[x]>>1) ) + square( crx - c2[x] );
137
0
      }
138
0
      else
139
0
      {
140
0
        d1   += square( cbx );
141
0
        d2   += square( crx );
142
0
      }
143
38.9M
    }
144
2.93M
  }
145
239k
  return std::make_pair(d1,d2);
146
239k
}
Unexecuted instantiation: std::__1::pair<long, long> vvenc::fwdTransformCbCr<-1>(vvenc::AreaBuf<short> const&, vvenc::AreaBuf<short> const&, vvenc::AreaBuf<short>&, vvenc::AreaBuf<short>&)
std::__1::pair<long, long> vvenc::fwdTransformCbCr<2>(vvenc::AreaBuf<short> const&, vvenc::AreaBuf<short> const&, vvenc::AreaBuf<short>&, vvenc::AreaBuf<short>&)
Line
Count
Source
96
239k
{
97
239k
  const Pel*  cb  = resCb.buf;
98
239k
  const Pel*  cr  = resCr.buf;
99
239k
  Pel*        c1  = resC1.buf;
100
239k
  Pel*        c2  = resC2.buf;
101
239k
  int64_t     d1  = 0;
102
239k
  int64_t     d2  = 0;
103
3.17M
  for( SizeType y = 0; y < resCb.height; y++, cb += resCb.stride, cr += resCr.stride, c1 += resC1.stride, c2 += resC2.stride )
104
2.93M
  {
105
41.8M
    for( SizeType x = 0; x < resCb.width; x++ )
106
38.9M
    {
107
38.9M
      int cbx = cb[x], crx = cr[x];
108
38.9M
      if      ( signedMode ==  1 )
109
0
      {
110
0
        c1[x] = Pel( ( 4*cbx + 2*crx ) / 5 );
111
0
        d1   += square( cbx - c1[x] ) + square( crx - (c1[x]>>1) );
112
0
      }
113
38.9M
      else if ( signedMode == -1 )
114
0
      {
115
0
        c1[x] = Pel( ( 4*cbx - 2*crx ) / 5 );
116
0
        d1   += square( cbx - c1[x] ) + square( crx - (-c1[x]>>1) );
117
0
      }
118
38.9M
      else if ( signedMode ==  2 )
119
38.9M
      {
120
38.9M
        c1[x] = Pel( ( cbx + crx ) / 2 );
121
38.9M
        d1   += square( cbx - c1[x] ) + square( crx - c1[x] );
122
38.9M
      }
123
0
      else if ( signedMode == -2 )
124
0
      {
125
0
        c1[x] = Pel( ( cbx - crx ) / 2 );
126
0
        d1   += square( cbx - c1[x] ) + square( crx + c1[x] );
127
0
      }
128
0
      else if ( signedMode ==  3 )
129
0
      {
130
0
        c2[x] = Pel( ( 4*crx + 2*cbx ) / 5 );
131
0
        d1   += square( cbx - (c2[x]>>1) ) + square( crx - c2[x] );
132
0
      }
133
0
      else if ( signedMode == -3 )
134
0
      {
135
0
        c2[x] = Pel( ( 4*crx - 2*cbx ) / 5 );
136
0
        d1   += square( cbx - (-c2[x]>>1) ) + square( crx - c2[x] );
137
0
      }
138
0
      else
139
0
      {
140
0
        d1   += square( cbx );
141
0
        d2   += square( crx );
142
0
      }
143
38.9M
    }
144
2.93M
  }
145
239k
  return std::make_pair(d1,d2);
146
239k
}
Unexecuted instantiation: std::__1::pair<long, long> vvenc::fwdTransformCbCr<-2>(vvenc::AreaBuf<short> const&, vvenc::AreaBuf<short> const&, vvenc::AreaBuf<short>&, vvenc::AreaBuf<short>&)
std::__1::pair<long, long> vvenc::fwdTransformCbCr<3>(vvenc::AreaBuf<short> const&, vvenc::AreaBuf<short> const&, vvenc::AreaBuf<short>&, vvenc::AreaBuf<short>&)
Line
Count
Source
96
239k
{
97
239k
  const Pel*  cb  = resCb.buf;
98
239k
  const Pel*  cr  = resCr.buf;
99
239k
  Pel*        c1  = resC1.buf;
100
239k
  Pel*        c2  = resC2.buf;
101
239k
  int64_t     d1  = 0;
102
239k
  int64_t     d2  = 0;
103
3.17M
  for( SizeType y = 0; y < resCb.height; y++, cb += resCb.stride, cr += resCr.stride, c1 += resC1.stride, c2 += resC2.stride )
104
2.93M
  {
105
41.8M
    for( SizeType x = 0; x < resCb.width; x++ )
106
38.9M
    {
107
38.9M
      int cbx = cb[x], crx = cr[x];
108
38.9M
      if      ( signedMode ==  1 )
109
0
      {
110
0
        c1[x] = Pel( ( 4*cbx + 2*crx ) / 5 );
111
0
        d1   += square( cbx - c1[x] ) + square( crx - (c1[x]>>1) );
112
0
      }
113
38.9M
      else if ( signedMode == -1 )
114
0
      {
115
0
        c1[x] = Pel( ( 4*cbx - 2*crx ) / 5 );
116
0
        d1   += square( cbx - c1[x] ) + square( crx - (-c1[x]>>1) );
117
0
      }
118
38.9M
      else if ( signedMode ==  2 )
119
0
      {
120
0
        c1[x] = Pel( ( cbx + crx ) / 2 );
121
0
        d1   += square( cbx - c1[x] ) + square( crx - c1[x] );
122
0
      }
123
38.9M
      else if ( signedMode == -2 )
124
0
      {
125
0
        c1[x] = Pel( ( cbx - crx ) / 2 );
126
0
        d1   += square( cbx - c1[x] ) + square( crx + c1[x] );
127
0
      }
128
38.9M
      else if ( signedMode ==  3 )
129
38.9M
      {
130
38.9M
        c2[x] = Pel( ( 4*crx + 2*cbx ) / 5 );
131
38.9M
        d1   += square( cbx - (c2[x]>>1) ) + square( crx - c2[x] );
132
38.9M
      }
133
0
      else if ( signedMode == -3 )
134
0
      {
135
0
        c2[x] = Pel( ( 4*crx - 2*cbx ) / 5 );
136
0
        d1   += square( cbx - (-c2[x]>>1) ) + square( crx - c2[x] );
137
0
      }
138
0
      else
139
0
      {
140
0
        d1   += square( cbx );
141
0
        d2   += square( crx );
142
0
      }
143
38.9M
    }
144
2.93M
  }
145
239k
  return std::make_pair(d1,d2);
146
239k
}
Unexecuted instantiation: std::__1::pair<long, long> vvenc::fwdTransformCbCr<-3>(vvenc::AreaBuf<short> const&, vvenc::AreaBuf<short> const&, vvenc::AreaBuf<short>&, vvenc::AreaBuf<short>&)
147
148
template<int signedMode> void invTransformCbCr( PelBuf& resCb, PelBuf& resCr )
149
236k
{
150
236k
  Pel*  cb  = resCb.buf;
151
236k
  Pel*  cr  = resCr.buf;
152
3.13M
  for( SizeType y = 0; y < resCb.height; y++, cb += resCb.stride, cr += resCr.stride )
153
2.89M
  {
154
41.4M
    for( SizeType x = 0; x < resCb.width; x++ )
155
38.5M
    {
156
38.5M
      if      ( signedMode ==  1 )  { cr[x] =  cb[x] >> 1;  }
157
38.5M
      else if ( signedMode == -1 )  { cr[x] = -cb[x] >> 1;  }
158
38.5M
      else if ( signedMode ==  2 )  { cr[x] =  cb[x]; }
159
177k
      else if ( signedMode == -2 )  { cr[x] = -cb[x]; }
160
177k
      else if ( signedMode ==  3 )  { cb[x] =  cr[x] >> 1; }
161
0
      else if ( signedMode == -3 )  { cb[x] = -cr[x] >> 1; }
162
38.5M
    }
163
2.89M
  }
164
236k
}
Unexecuted instantiation: void vvenc::invTransformCbCr<0>(vvenc::AreaBuf<short>&, vvenc::AreaBuf<short>&)
Unexecuted instantiation: void vvenc::invTransformCbCr<1>(vvenc::AreaBuf<short>&, vvenc::AreaBuf<short>&)
Unexecuted instantiation: void vvenc::invTransformCbCr<-1>(vvenc::AreaBuf<short>&, vvenc::AreaBuf<short>&)
void vvenc::invTransformCbCr<2>(vvenc::AreaBuf<short>&, vvenc::AreaBuf<short>&)
Line
Count
Source
149
235k
{
150
235k
  Pel*  cb  = resCb.buf;
151
235k
  Pel*  cr  = resCr.buf;
152
3.11M
  for( SizeType y = 0; y < resCb.height; y++, cb += resCb.stride, cr += resCr.stride )
153
2.88M
  {
154
41.2M
    for( SizeType x = 0; x < resCb.width; x++ )
155
38.4M
    {
156
38.4M
      if      ( signedMode ==  1 )  { cr[x] =  cb[x] >> 1;  }
157
38.4M
      else if ( signedMode == -1 )  { cr[x] = -cb[x] >> 1;  }
158
38.4M
      else if ( signedMode ==  2 )  { cr[x] =  cb[x]; }
159
0
      else if ( signedMode == -2 )  { cr[x] = -cb[x]; }
160
0
      else if ( signedMode ==  3 )  { cb[x] =  cr[x] >> 1; }
161
0
      else if ( signedMode == -3 )  { cb[x] = -cr[x] >> 1; }
162
38.4M
    }
163
2.88M
  }
164
235k
}
Unexecuted instantiation: void vvenc::invTransformCbCr<-2>(vvenc::AreaBuf<short>&, vvenc::AreaBuf<short>&)
void vvenc::invTransformCbCr<3>(vvenc::AreaBuf<short>&, vvenc::AreaBuf<short>&)
Line
Count
Source
149
1.45k
{
150
1.45k
  Pel*  cb  = resCb.buf;
151
1.45k
  Pel*  cr  = resCr.buf;
152
17.6k
  for( SizeType y = 0; y < resCb.height; y++, cb += resCb.stride, cr += resCr.stride )
153
16.2k
  {
154
193k
    for( SizeType x = 0; x < resCb.width; x++ )
155
177k
    {
156
177k
      if      ( signedMode ==  1 )  { cr[x] =  cb[x] >> 1;  }
157
177k
      else if ( signedMode == -1 )  { cr[x] = -cb[x] >> 1;  }
158
177k
      else if ( signedMode ==  2 )  { cr[x] =  cb[x]; }
159
177k
      else if ( signedMode == -2 )  { cr[x] = -cb[x]; }
160
177k
      else if ( signedMode ==  3 )  { cb[x] =  cr[x] >> 1; }
161
0
      else if ( signedMode == -3 )  { cb[x] = -cr[x] >> 1; }
162
177k
    }
163
16.2k
  }
164
1.45k
}
Unexecuted instantiation: void vvenc::invTransformCbCr<-3>(vvenc::AreaBuf<short>&, vvenc::AreaBuf<short>&)
165
166
void xFwdLfnstNxNCore(int *src, int *dst, const uint32_t mode, const uint32_t index, const uint32_t size, int zeroOutSize)
167
1.05M
{
168
1.05M
  const int8_t *trMat  = (size > 4) ? g_lfnstFwd8x8[mode][index][0] : g_lfnstFwd4x4[mode][index][0];
169
1.05M
  const int     trSize = (size > 4) ? 48 : 16;
170
1.05M
  int           coef;
171
1.05M
  int *         out = dst;
172
173
16.3M
  for (int j = 0; j < zeroOutSize; j++)
174
15.3M
  {
175
15.3M
    int *         srcPtr   = src;
176
15.3M
    const int8_t *trMatTmp = trMat;
177
15.3M
    coef                   = 0;
178
576M
    for (int i = 0; i < trSize; i++)
179
561M
    {
180
561M
      coef += *srcPtr++ * *trMatTmp++;
181
561M
    }
182
15.3M
    *out++ = (coef + 64) >> 7;
183
15.3M
    trMat += trSize;
184
15.3M
  }
185
186
1.05M
  ::memset(out, 0, (trSize - zeroOutSize) * sizeof(int));
187
1.05M
}
188
189
190
void xInvLfnstNxNCore(int *src, int *dst, const uint32_t mode, const uint32_t index, const uint32_t size, int zeroOutSize)
191
450k
{
192
450k
  int           maxLog2TrDynamicRange = 15;
193
450k
  const TCoeff  outputMinimum         = -(1 << maxLog2TrDynamicRange);
194
450k
  const TCoeff  outputMaximum         = (1 << maxLog2TrDynamicRange) - 1;
195
450k
  const int8_t *trMat                 = (size > 4) ? g_lfnstInv8x8[mode][index][0] : g_lfnstInv4x4[mode][index][0];
196
450k
  const int     trSize                = (size > 4) ? 48 : 16;
197
450k
  int           resi;
198
450k
  int *         out                   = dst;
199
200
16.6M
  for( int j = 0; j < trSize; j++, trMat += 16 )
201
16.2M
  {
202
16.2M
    resi = 0;
203
16.2M
    const int8_t* trMatTmp = trMat;
204
16.2M
    int*          srcPtr   = src;
205
206
246M
    for( int i = 0; i < zeroOutSize; i++ )
207
229M
    {
208
229M
      resi += *srcPtr++ * *trMatTmp++;
209
229M
    }
210
211
16.2M
    *out++ = Clip3( outputMinimum, outputMaximum, ( int ) ( resi + 64 ) >> 7 );
212
16.2M
  }
213
450k
}
214
215
// ====================================================================================================================
216
// TrQuant class member functions
217
// ====================================================================================================================
218
17.3k
TrQuant::TrQuant() : m_scalingListEnabled(false), m_quant( nullptr )
219
17.3k
{
220
  // allocate temporary buffers
221
17.3k
  m_plTempCoeff = ( TCoeff* ) xMalloc( TCoeff, MAX_TB_SIZEY * MAX_TB_SIZEY );
222
17.3k
  m_tmp         = ( TCoeff* ) xMalloc( TCoeff, MAX_TB_SIZEY * MAX_TB_SIZEY );
223
17.3k
  m_blk         = ( TCoeff* ) xMalloc( TCoeff, MAX_TB_SIZEY * MAX_TB_SIZEY );
224
225
121k
  for( int i = 0; i < NUM_TRAFO_MODES_MTS; i++ )
226
104k
  {
227
104k
    m_mtsCoeffs[i] = ( TCoeff* ) xMalloc( TCoeff, MAX_TB_SIZEY * MAX_TB_SIZEY );
228
104k
  }
229
230
17.3k
  {
231
17.3k
    m_invICT      = m_invICTMem + maxAbsIctMode;
232
17.3k
    m_invICT[ 0]  = invTransformCbCr< 0>;
233
17.3k
    m_invICT[ 1]  = invTransformCbCr< 1>;
234
17.3k
    m_invICT[-1]  = invTransformCbCr<-1>;
235
17.3k
    m_invICT[ 2]  = invTransformCbCr< 2>;
236
17.3k
    m_invICT[-2]  = invTransformCbCr<-2>;
237
17.3k
    m_invICT[ 3]  = invTransformCbCr< 3>;
238
17.3k
    m_invICT[-3]  = invTransformCbCr<-3>;
239
17.3k
    m_fwdICT      = m_fwdICTMem + maxAbsIctMode;
240
17.3k
    m_fwdICT[ 0]  = fwdTransformCbCr< 0>;
241
17.3k
    m_fwdICT[ 1]  = fwdTransformCbCr< 1>;
242
17.3k
    m_fwdICT[-1]  = fwdTransformCbCr<-1>;
243
17.3k
    m_fwdICT[ 2]  = fwdTransformCbCr< 2>;
244
17.3k
    m_fwdICT[-2]  = fwdTransformCbCr<-2>;
245
17.3k
    m_fwdICT[ 3]  = fwdTransformCbCr< 3>;
246
17.3k
    m_fwdICT[-3]  = fwdTransformCbCr<-3>;
247
17.3k
  }
248
249
17.3k
  m_invLfnstNxN = xInvLfnstNxNCore;
250
17.3k
  m_fwdLfnstNxN = xFwdLfnstNxNCore;
251
252
#if defined( TARGET_SIMD_X86 ) && ENABLE_SIMD_TRAFO
253
  initTrQuantX86();
254
#endif
255
17.3k
}
256
257
TrQuant::~TrQuant()
258
17.3k
{
259
17.3k
  if( m_quant )
260
17.3k
  {
261
17.3k
    delete m_quant;
262
17.3k
    m_quant = nullptr;
263
17.3k
  }
264
265
  // delete temporary buffers
266
17.3k
  if( m_plTempCoeff )
267
17.3k
  {
268
17.3k
    xFree( m_plTempCoeff );
269
17.3k
    m_plTempCoeff = nullptr;
270
17.3k
  }
271
272
17.3k
  if( m_blk )
273
17.3k
  {
274
17.3k
    xFree( m_blk );
275
17.3k
    m_blk = nullptr;
276
17.3k
  }
277
278
17.3k
  if( m_tmp )
279
17.3k
  {
280
17.3k
    xFree( m_tmp );
281
17.3k
    m_tmp = nullptr;
282
17.3k
  }
283
284
121k
  for( int i = 0; i < NUM_TRAFO_MODES_MTS; i++ )
285
104k
  {
286
104k
     xFree( m_mtsCoeffs[i] );
287
104k
  }
288
17.3k
}
289
290
void TrQuant::xDeQuant(const TransformUnit& tu,
291
                             CoeffBuf      &dstCoeff,
292
                       const ComponentID   &compID,
293
                       const QpParam       &cQP)
294
740k
{
295
740k
  PROFILER_SCOPE_AND_STAGE( 1, _TPROF, P_DEQUANT );
296
740k
  m_quant->dequant( tu, dstCoeff, compID, cQP );
297
740k
}
298
299
void TrQuant::init( const Quant* otherQuant,
300
                    const int  rdoq,
301
                    const bool bUseRDOQTS,
302
                    const bool scalingListsEnabled,
303
                    const bool bEnc,
304
                    const int  thrVal
305
)
306
17.3k
{
307
17.3k
  m_bEnc = bEnc;
308
309
17.3k
  delete m_quant;
310
17.3k
  m_quant = nullptr;
311
312
17.3k
  m_quant = new(std::nothrow) DepQuant( otherQuant, bEnc, scalingListsEnabled );
313
17.3k
  CHECK( !m_quant, "allocation failed" );
314
17.3k
  m_quant->init( rdoq, bUseRDOQTS, thrVal );
315
17.3k
}
316
317
318
void TrQuant::invTransformNxN( TransformUnit& tu, const ComponentID compID, PelBuf& pResi, const QpParam& cQP )
319
740k
{
320
740k
  const CompArea& area    = tu.blocks[compID];
321
740k
  const uint32_t uiWidth  = area.width;
322
740k
  const uint32_t uiHeight = area.height;
323
324
740k
  CHECK( uiWidth > tu.cs->sps->getMaxTbSize() || uiHeight > tu.cs->sps->getMaxTbSize(), "Maximal allowed transformation size exceeded!" );
325
326
740k
  {
327
740k
    CoeffBuf tempCoeff = CoeffBuf( m_plTempCoeff, area );
328
740k
    xDeQuant( tu, tempCoeff, compID, cQP );
329
330
740k
    DTRACE_COEFF_BUF( D_TCOEFF, tempCoeff, tu, tu.cu->predMode, compID );
331
332
740k
    if (tu.cs->sps->LFNST)
333
740k
    {
334
740k
      xInvLfnst(tu, compID);
335
740k
    }
336
740k
    if (tu.mtsIdx[compID] == MTS_SKIP)
337
43.8k
    {
338
43.8k
      xITransformSkip(tempCoeff, pResi, tu, compID);
339
43.8k
    }
340
696k
    else
341
696k
    {
342
696k
      xIT(tu, compID, tempCoeff, pResi);
343
696k
    }
344
740k
  }
345
346
  //DTRACE_BLOCK_COEFF(tu.getCoeffs(compID), tu, tu.cu->predMode, compID);
347
740k
  DTRACE_PEL_BUF( D_RESIDUALS, pResi, tu, tu.cu->predMode, compID);
348
740k
}
349
350
std::pair<int64_t,int64_t> TrQuant::fwdTransformICT( const TransformUnit& tu, const PelBuf& resCb, const PelBuf& resCr, PelBuf& resC1, PelBuf& resC2, int jointCbCr )
351
957k
{
352
957k
  CHECK( Size(resCb) != Size(resCr), "resCb and resCr have different sizes" );
353
957k
  CHECK( Size(resCb) != Size(resC1), "resCb and resC1 have different sizes" );
354
957k
  CHECK( Size(resCb) != Size(resC2), "resCb and resC2 have different sizes" );
355
957k
  return (*m_fwdICT[ TU::getICTMode(tu, jointCbCr) ])( resCb, resCr, resC1, resC2 );
356
957k
}
357
358
void TrQuant::invTransformICT( const TransformUnit& tu, PelBuf& resCb, PelBuf& resCr )
359
236k
{
360
236k
  CHECK( Size(resCb) != Size(resCr), "resCb and resCr have different sizes" );
361
236k
  (*m_invICT[ TU::getICTMode(tu) ])( resCb, resCr );
362
236k
}
363
364
std::vector<int> TrQuant::selectICTCandidates( const TransformUnit& tu, CompStorage* resCb, CompStorage* resCr )
365
239k
{
366
239k
  CHECK( !resCb[0].valid() || !resCr[0].valid(), "standard components are not valid" );
367
368
239k
  if( !CU::isIntra( *tu.cu ) )
369
0
  {
370
0
    int cbfMask = 3;
371
0
    fwdTransformICT( tu, resCb[0], resCr[0], resCb[cbfMask], resCr[cbfMask], cbfMask );
372
0
    std::vector<int> cbfMasksToTest;
373
0
    cbfMasksToTest.push_back( cbfMask );
374
0
    return cbfMasksToTest;
375
0
  }
376
377
239k
  std::pair<int64_t,int64_t> pairDist[4];
378
1.19M
  for( int cbfMask = 0; cbfMask < 4; cbfMask++ )
379
957k
  {
380
957k
    pairDist[cbfMask] = fwdTransformICT( tu, resCb[0], resCr[0], resCb[cbfMask], resCr[cbfMask], cbfMask );
381
957k
  }
382
383
239k
  std::vector<int> cbfMasksToTest;
384
239k
  int64_t minDist1  = std::min<int64_t>( pairDist[0].first, pairDist[0].second );
385
239k
  int64_t minDist2  = std::numeric_limits<int64_t>::max();
386
239k
  int     cbfMask1  = 0;
387
239k
  int     cbfMask2  = 0;
388
239k
  for( int cbfMask : { 1, 2, 3 } )
389
718k
  {
390
718k
    if( pairDist[cbfMask].first < minDist1 )
391
474k
    {
392
474k
      cbfMask2  = cbfMask1; minDist2  = minDist1;
393
474k
      cbfMask1  = cbfMask;  minDist1  = pairDist[cbfMask1].first;
394
474k
    }
395
243k
    else if( pairDist[cbfMask].first < minDist2 )
396
239k
    {
397
239k
      cbfMask2  = cbfMask;  minDist2  = pairDist[cbfMask2].first;
398
239k
    }
399
718k
  }
400
239k
  if( cbfMask1 )
401
239k
  {
402
239k
    cbfMasksToTest.push_back( cbfMask1 );
403
239k
  }
404
239k
  if( cbfMask2 && ( ( minDist2 < (9*minDist1)/8 ) || ( !cbfMask1 && minDist2 < (3*minDist1)/2 ) ) )
405
0
  {
406
0
    cbfMasksToTest.push_back( cbfMask2 );
407
0
  }
408
409
239k
  return cbfMasksToTest;
410
239k
}
411
412
413
414
// ------------------------------------------------------------------------------------------------
415
// Logical transform
416
// ------------------------------------------------------------------------------------------------
417
void TrQuant::xSetTrTypes( const TransformUnit& tu, const ComponentID compID, const int width, const int height, int &trTypeHor, int &trTypeVer )
418
2.39M
{
419
2.39M
  const bool isISP = CU::isIntra(*tu.cu) && tu.cu->ispMode && isLuma(compID);
420
2.39M
  if (isISP && tu.cu->lfnstIdx)
421
17.1k
  {
422
17.1k
    return;
423
17.1k
  }
424
2.37M
  if (!tu.cs->sps->MTS)
425
0
  {
426
0
    return;
427
0
  }
428
2.37M
  if (CU::isIntra(*tu.cu) && isLuma(compID) && ((tu.cs->sps->getUseImplicitMTS() && tu.cu->lfnstIdx == 0 && tu.cu->mipFlag == 0) || tu.cu->ispMode))
429
75.1k
  {
430
75.1k
    if (width >= 4 && width <= 16)
431
34.5k
      trTypeHor = DST7;
432
75.1k
    if (height >= 4 && height <= 16)
433
33.3k
      trTypeVer = DST7;
434
75.1k
  }
435
2.29M
  else if( tu.cs->sps->MTS && tu.cu->sbtInfo && isLuma(compID)/*isSBT*/ )
436
0
  {
437
0
    const uint8_t sbtIdx = CU::getSbtIdx( tu.cu->sbtInfo );
438
0
    const uint8_t sbtPos = CU::getSbtPos( tu.cu->sbtInfo );
439
440
0
    if( sbtIdx == SBT_VER_HALF || sbtIdx == SBT_VER_QUAD )
441
0
    {
442
0
      assert( tu.lwidth() <= MTS_INTER_MAX_CU_SIZE );
443
0
      if( tu.lheight() > MTS_INTER_MAX_CU_SIZE )
444
0
      {
445
0
        trTypeHor = trTypeVer = DCT2;
446
0
      }
447
0
      else
448
0
      {
449
0
        if( sbtPos == SBT_POS0 )  { trTypeHor = DCT8;  trTypeVer = DST7; }
450
0
        else                      { trTypeHor = DST7;  trTypeVer = DST7; }
451
0
      }
452
0
    }
453
0
    else
454
0
    {
455
0
      assert( tu.lheight() <= MTS_INTER_MAX_CU_SIZE );
456
0
      if( tu.lwidth() > MTS_INTER_MAX_CU_SIZE )
457
0
      {
458
0
        trTypeHor = trTypeVer = DCT2;
459
0
      }
460
0
      else
461
0
      {
462
0
        if( sbtPos == SBT_POS0 )  { trTypeHor = DST7;  trTypeVer = DCT8; }
463
0
        else                      { trTypeHor = DST7;  trTypeVer = DST7; }
464
0
      }
465
0
    }
466
0
  }
467
2.37M
  const bool isExplicitMTS = (CU::isIntra(*tu.cu) ? tu.cs->sps->MTS : tu.cs->sps->MTSInter && CU::isInter(*tu.cu)) && isLuma(compID);
468
2.37M
  if (isExplicitMTS)
469
176k
  {
470
176k
    if (tu.mtsIdx[compID] > MTS_SKIP)
471
0
    {
472
0
      int indHor = (tu.mtsIdx[compID] - MTS_DST7_DST7) & 1;
473
0
      int indVer = (tu.mtsIdx[compID] - MTS_DST7_DST7) >> 1;
474
0
      trTypeHor  = indHor ? DCT8 : DST7;
475
0
      trTypeVer  = indVer ? DCT8 : DST7;
476
0
    }
477
176k
  }
478
2.37M
}
479
480
481
void TrQuant::xT( const TransformUnit& tu, const ComponentID compID, const CPelBuf& resi, CoeffBuf& dstCoeff, const int width, const int height )
482
1.69M
{
483
1.69M
  PROFILER_SCOPE_AND_STAGE( 1, _TPROF, P_TRAFO );
484
485
1.69M
  const unsigned maxLog2TrDynamicRange  = tu.cs->sps->getMaxLog2TrDynamicRange();
486
1.69M
  const unsigned bitDepth               = tu.cs->sps->bitDepths[toChannelType( compID )];
487
1.69M
  const int      TRANSFORM_MATRIX_SHIFT = g_transformMatrixShift[TRANSFORM_FORWARD];
488
1.69M
  const uint32_t transformWidthIndex    = Log2(width ) - 1;  // nLog2WidthMinus1, since transform start from 2-point
489
1.69M
  const uint32_t transformHeightIndex   = Log2(height) - 1;  // nLog2HeightMinus1, since transform start from 2-point
490
491
1.69M
  int trTypeHor = DCT2;
492
1.69M
  int trTypeVer = DCT2;
493
494
1.69M
  xSetTrTypes( tu, compID, width, height, trTypeHor, trTypeVer );
495
496
1.69M
  int  skipWidth  = ( trTypeHor != DCT2 && width  == 32 ) ? 16 : width  > JVET_C0024_ZERO_OUT_TH ? width  - JVET_C0024_ZERO_OUT_TH : 0;
497
1.69M
  int  skipHeight = ( trTypeVer != DCT2 && height == 32 ) ? 16 : height > JVET_C0024_ZERO_OUT_TH ? height - JVET_C0024_ZERO_OUT_TH : 0;
498
499
1.69M
  if( tu.cu->lfnstIdx )
500
1.05M
  {
501
1.05M
    if ((width == 4 && height > 4) || (width > 4 && height == 4))
502
305k
    {
503
305k
      skipWidth  = width - 4;
504
305k
      skipHeight = height - 4;
505
305k
    }
506
749k
    else if ((width >= 8 && height >= 8))
507
682k
    {
508
682k
      skipWidth  = width - 8;
509
682k
      skipHeight = height - 8;
510
682k
    }
511
1.05M
  }
512
513
1.69M
  TCoeff* block = m_blk;
514
1.69M
  TCoeff* tmp   = m_tmp;
515
516
1.69M
  const Pel* resiBuf    = resi.buf;
517
1.69M
  const int  resiStride = resi.stride;
518
519
1.69M
#if ENABLE_SIMD_TRAFO
520
1.69M
  if( width & 3 )
521
0
#endif
522
0
  {
523
0
    for( int y = 0; y < height; y++ )
524
0
    {
525
0
      for( int x = 0; x < width; x++ )
526
0
      {
527
0
        block[( y * width ) + x] = resiBuf[( y * resiStride ) + x];
528
0
      }
529
0
    }
530
0
  }
531
1.69M
#if ENABLE_SIMD_TRAFO
532
1.69M
  else if( width & 7 )
533
328k
  {
534
328k
    g_tCoeffOps.cpyCoeff4( resiBuf, resiStride, block, width, height );
535
328k
  }
536
1.36M
  else
537
1.36M
  {
538
1.36M
    g_tCoeffOps.cpyCoeff8( resiBuf, resiStride, block, width, height );
539
1.36M
  }
540
1.69M
#endif //ENABLE_SIMD_TRAFO
541
542
1.69M
  if (width > 1 && height > 1)
543
1.69M
  {
544
1.69M
    const int shift_1st = ((Log2(width )) + bitDepth + TRANSFORM_MATRIX_SHIFT) - maxLog2TrDynamicRange;
545
1.69M
    const int shift_2nd =  (Log2(height))            + TRANSFORM_MATRIX_SHIFT;
546
1.69M
    CHECK( shift_1st < 0, "Negative shift" );
547
1.69M
    CHECK( shift_2nd < 0, "Negative shift" );
548
1.69M
    fastFwdTrans[trTypeHor][transformWidthIndex](block, tmp, shift_1st, height, 0, skipWidth);
549
1.69M
    fastFwdTrans[trTypeVer][transformHeightIndex](tmp, dstCoeff.buf, shift_2nd, width, skipWidth, skipHeight);
550
1.69M
  }
551
278
  else if (height == 1)   // 1-D horizontal transform
552
288
  {
553
288
    const int shift = ((Log2(width )) + bitDepth + TRANSFORM_MATRIX_SHIFT) - maxLog2TrDynamicRange;
554
288
    CHECK( shift < 0, "Negative shift" );
555
288
    fastFwdTrans[trTypeHor][transformWidthIndex](block, dstCoeff.buf, shift, 1, 0, skipWidth);
556
288
  }
557
18.4E
  else   // if (iWidth == 1) //1-D vertical transform
558
18.4E
  {
559
18.4E
    int shift = ((floorLog2(height)) + bitDepth + TRANSFORM_MATRIX_SHIFT) - maxLog2TrDynamicRange;
560
18.4E
    CHECK(shift < 0, "Negative shift");
561
18.4E
    CHECKD((transformHeightIndex < 0), "There is a problem with the height.");
562
18.4E
    fastFwdTrans[trTypeVer][transformHeightIndex](block, dstCoeff.buf, shift, 1, 0, skipHeight);
563
18.4E
  }
564
1.69M
}
565
566
567
void TrQuant::xIT( const TransformUnit& tu, const ComponentID compID, const CCoeffBuf& pCoeff, PelBuf& pResidual )
568
696k
{
569
696k
  PROFILER_SCOPE_AND_STAGE( 1, _TPROF, P_TRAFO );
570
571
696k
  const int      width                  = pCoeff.width;
572
696k
  const int      height                 = pCoeff.height;
573
696k
  const unsigned maxLog2TrDynamicRange  = tu.cs->sps->getMaxLog2TrDynamicRange();
574
696k
  const unsigned bitDepth               = tu.cs->sps->bitDepths[toChannelType( compID )];
575
696k
  const int      TRANSFORM_MATRIX_SHIFT = g_transformMatrixShift[TRANSFORM_INVERSE];
576
696k
  const TCoeff   clipMinimum            = -( 1 << maxLog2TrDynamicRange );
577
696k
  const TCoeff   clipMaximum            =  ( 1 << maxLog2TrDynamicRange ) - 1;
578
696k
  const uint32_t transformWidthIndex    = Log2(width )- 1;                                // nLog2WidthMinus1, since transform start from 2-point
579
696k
  const uint32_t transformHeightIndex   = Log2(height) - 1;                                // nLog2HeightMinus1, since transform start from 2-point
580
581
582
696k
  int trTypeHor = DCT2;
583
696k
  int trTypeVer = DCT2;
584
585
696k
  xSetTrTypes( tu, compID, width, height, trTypeHor, trTypeVer );
586
587
696k
  int skipWidth  = ( trTypeHor != DCT2 && width  == 32 ) ? 16 : width  > JVET_C0024_ZERO_OUT_TH ? width  - JVET_C0024_ZERO_OUT_TH : 0;
588
696k
  int skipHeight = ( trTypeVer != DCT2 && height == 32 ) ? 16 : height > JVET_C0024_ZERO_OUT_TH ? height - JVET_C0024_ZERO_OUT_TH : 0;
589
590
696k
  if (tu.cs->sps->LFNST && tu.cu->lfnstIdx)
591
450k
  {
592
450k
    if ((width == 4 && height > 4) || (width > 4 && height == 4))
593
131k
    {
594
131k
      skipWidth = width - 4;
595
131k
      skipHeight = height - 4;
596
131k
    }
597
318k
    else if ((width >= 8 && height >= 8))
598
281k
    {
599
281k
      skipWidth = width - 8;
600
281k
      skipHeight = height - 8;
601
281k
    }
602
450k
  }
603
604
696k
  TCoeff *block = m_blk;
605
696k
  TCoeff *tmp   = m_tmp;
606
696k
  if (width > 1 && height > 1)   // 2-D transform
607
696k
  {
608
696k
    const int shift_1st =   TRANSFORM_MATRIX_SHIFT + 1; // 1 has been added to shift_1st at the expense of shift_2nd
609
696k
    const int shift_2nd = ( TRANSFORM_MATRIX_SHIFT + maxLog2TrDynamicRange - 1 ) - bitDepth;
610
696k
    CHECK( shift_1st < 0, "Negative shift" );
611
696k
    CHECK( shift_2nd < 0, "Negative shift" );
612
696k
    fastInvTrans[trTypeVer][transformHeightIndex](pCoeff.buf, tmp, shift_1st, width, skipWidth, skipHeight, clipMinimum, clipMaximum);
613
696k
    fastInvTrans[trTypeHor][transformWidthIndex](tmp, block, shift_2nd, height, 0, skipWidth, clipMinimum, clipMaximum);
614
696k
  }
615
72
  else if (width == 1)   // 1-D vertical transform
616
0
  {
617
0
    int shift = (TRANSFORM_MATRIX_SHIFT + maxLog2TrDynamicRange - 1) - bitDepth;
618
0
    CHECK(shift < 0, "Negative shift");
619
0
    fastInvTrans[trTypeVer][transformHeightIndex](pCoeff.buf, block, shift + 1, 1, 0, skipHeight, clipMinimum, clipMaximum);
620
0
  }
621
72
  else   // if(iHeight == 1) //1-D horizontal transform
622
72
  {
623
72
    const int shift = (TRANSFORM_MATRIX_SHIFT + maxLog2TrDynamicRange - 1) - bitDepth;
624
72
    CHECK(shift < 0, "Negative shift");
625
72
    fastInvTrans[trTypeHor][transformWidthIndex](pCoeff.buf, block, shift + 1, 1, 0, skipWidth, clipMinimum, clipMaximum);
626
72
  }
627
628
696k
#if ENABLE_SIMD_TRAFO
629
696k
  if( width & 3 )
630
0
#endif //ENABLE_SIMD_TRAFO
631
0
  {
632
0
    Pel       *dst    = pResidual.buf;
633
0
    ptrdiff_t  stride = pResidual.stride;
634
635
0
    for( int y = 0; y < height; y++ )
636
0
    {
637
0
      for( int x = 0; x < width; x++ )
638
0
      {
639
0
        dst[x] = ( Pel ) *block++;
640
0
      }
641
642
0
      dst += stride;
643
0
    }
644
0
  }
645
696k
#if ENABLE_SIMD_TRAFO
646
696k
  else if( width & 7 )
647
153k
  {
648
153k
    g_tCoeffOps.cpyResi4( block, pResidual.buf, pResidual.stride, width, height );
649
153k
  }
650
543k
  else
651
543k
  {
652
543k
    g_tCoeffOps.cpyResi8( block, pResidual.buf, pResidual.stride, width, height );
653
543k
  }
654
696k
#endif //ENABLE_SIMD_TRAFO
655
696k
}
656
657
/** Wrapper function between HM interface and core NxN transform skipping
658
 */
659
void TrQuant::xITransformSkip(const CCoeffBuf& pCoeff,
660
  PelBuf& pResidual,
661
  const TransformUnit& tu,
662
  const ComponentID compID)
663
43.8k
{
664
43.8k
  const CompArea& area = tu.blocks[compID];
665
43.8k
  const int width = area.width;
666
43.8k
  const int height = area.height;
667
668
436k
  for (uint32_t y = 0; y < height; y++)
669
392k
  {
670
4.33M
    for (uint32_t x = 0; x < width; x++)
671
3.93M
    {
672
3.93M
      pResidual.at(x, y) = Pel(pCoeff.at(x, y));
673
3.93M
    }
674
392k
  }
675
43.8k
}
676
677
void TrQuant::xQuant(TransformUnit& tu, const ComponentID compID, const CCoeffBuf& pSrc, TCoeff &uiAbsSum, const QpParam& cQP, const Ctx& ctx)
678
1.78M
{
679
1.78M
  PROFILER_SCOPE_AND_STAGE( 1, _TPROF, P_QUANT );
680
1.78M
  m_quant->quant( tu, compID, pSrc, uiAbsSum, cQP, ctx );
681
#if ENABLE_MEASURE_SEARCH_SPACE
682
683
  g_searchSpaceAcc.addQuant( tu, toChannelType( compID ) );
684
#endif
685
1.78M
}
686
687
688
void TrQuant::transformNxN(TransformUnit &tu, const ComponentID compID, const QpParam &cQP, TCoeff &uiAbsSum, const Ctx &ctx, const bool loadTr)
689
1.78M
{
690
1.78M
        CodingStructure &cs = *tu.cs;
691
1.78M
  const CompArea& rect      = tu.blocks[compID];
692
1.78M
  const uint32_t uiWidth        = rect.width;
693
1.78M
  const uint32_t uiHeight       = rect.height;
694
695
1.78M
  const CPelBuf resiBuf     = cs.getResiBuf(rect);
696
697
1.78M
  if( tu.noResidual )
698
0
  {
699
0
    uiAbsSum = 0;
700
0
    TU::setCbfAtDepth( tu, compID, tu.depth, uiAbsSum > 0 );
701
0
    return;
702
0
  }
703
1.78M
  if (tu.cu->bdpcmM[toChannelType(compID)])
704
90.6k
  {
705
90.6k
    tu.mtsIdx[compID] = MTS_SKIP;
706
90.6k
  }
707
708
1.78M
  uiAbsSum = 0;
709
1.78M
  CHECK( cs.sps->getMaxTbSize() < uiWidth, "Unsupported transformation size" );
710
711
1.78M
  CoeffBuf tempCoeff(loadTr ? m_mtsCoeffs[tu.mtsIdx[compID]] : m_plTempCoeff, rect);
712
1.78M
  if (!loadTr)
713
1.76M
  {
714
1.76M
    DTRACE_PEL_BUF( D_RESIDUALS, resiBuf, tu, tu.cu->predMode, compID );
715
1.76M
    if (tu.mtsIdx[compID] == MTS_SKIP)
716
90.6k
    {
717
90.6k
      xTransformSkip(tu, compID, resiBuf, tempCoeff.buf);
718
90.6k
    }
719
1.67M
    else
720
1.67M
    {
721
1.67M
      xT(tu, compID, resiBuf, tempCoeff, uiWidth, uiHeight);
722
1.67M
    }
723
1.76M
  }
724
1.78M
  if (cs.sps->LFNST)
725
1.78M
  {
726
1.78M
    xFwdLfnst(tu, compID, loadTr);
727
1.78M
  }
728
1.78M
  DTRACE_COEFF_BUF( D_TCOEFF, tempCoeff, tu, tu.cu->predMode, compID );
729
730
1.78M
  xQuant( tu, compID, tempCoeff, uiAbsSum, cQP, ctx );
731
732
1.78M
  DTRACE_COEFF_BUF( D_TCOEFF, tu.getCoeffs( compID ), tu, tu.cu->predMode, compID );
733
734
  // set coded block flag (CBF)
735
1.78M
  TU::setCbfAtDepth (tu, compID, tu.depth, uiAbsSum > 0);
736
1.78M
}
737
738
void TrQuant::checktransformsNxN( TransformUnit &tu, std::vector<TrMode> *trModes, const int maxCand, const ComponentID compID)
739
17.0k
{
740
17.0k
  CodingStructure &cs     = *tu.cs;
741
17.0k
  const CompArea& rect    = tu.blocks[compID];
742
17.0k
  const uint32_t   width  = rect.width;
743
17.0k
  const uint32_t   height = rect.height;
744
745
17.0k
  const CPelBuf resiBuf = cs.getResiBuf(rect);
746
747
17.0k
  CHECK(cs.sps->getMaxTbSize() < width, "Unsupported transformation size");
748
17.0k
  int                           pos = 0;
749
17.0k
  std::vector<TrCost>           trCosts;
750
17.0k
  std::vector<TrMode>::iterator it      = trModes->begin();
751
17.0k
  const double                  facBB[] = { 1.2, 1.3, 1.3, 1.4, 1.5 };
752
51.2k
  while (it != trModes->end())
753
34.1k
  {
754
34.1k
    tu.mtsIdx[compID] = it->first;
755
34.1k
    CoeffBuf tempCoeff(m_mtsCoeffs[tu.mtsIdx[compID]], rect);
756
34.1k
    if (tu.noResidual)
757
0
    {
758
0
      int sumAbs = 0;
759
0
      trCosts.push_back(TrCost(sumAbs, pos++));
760
0
      it++;
761
0
      continue;
762
0
    }
763
34.1k
    if (tu.mtsIdx[compID] == MTS_SKIP)
764
17.0k
    {
765
17.0k
      xTransformSkip(tu, compID, resiBuf, tempCoeff.buf);
766
17.0k
    }
767
17.0k
    else
768
17.0k
    {
769
17.0k
      xT(tu, compID, resiBuf, tempCoeff, width, height);
770
17.0k
    }
771
772
34.1k
    int sumAbs = 0;
773
6.12M
    for (int pos = 0; pos < width * height; pos++)
774
6.09M
    {
775
6.09M
      sumAbs += abs(tempCoeff.buf[pos]);
776
6.09M
    }
777
778
34.1k
    double scaleSAD = 1.0;
779
34.1k
    if (tu.mtsIdx[compID] == MTS_SKIP && ((floorLog2(width) + floorLog2(height)) & 1) == 1)
780
7.19k
    {
781
7.19k
      scaleSAD = 1.0 / 1.414213562;   // compensate for not scaling transform skip coefficients by 1/sqrt(2)
782
7.19k
    }
783
34.1k
    if (tu.mtsIdx[compID] == MTS_SKIP)
784
17.0k
    {
785
17.0k
      int trShift = getTransformShift(tu.cu->slice->sps->bitDepths[CH_L], rect.size(), tu.cu->slice->sps->getMaxLog2TrDynamicRange());
786
17.0k
      scaleSAD *= pow(2, trShift);
787
17.0k
    }
788
34.1k
    trCosts.push_back(TrCost(int(std::min<double>(sumAbs * scaleSAD, std::numeric_limits<int>::max())), pos++));
789
34.1k
    it++;
790
34.1k
  }
791
792
17.0k
  int                           numTests = 0;
793
17.0k
  std::vector<TrCost>::iterator itC      = trCosts.begin();
794
17.0k
  const double                  fac      = facBB[std::max(0, floorLog2(std::max(width, height)) - 2)];
795
17.0k
  const double                  thr      = fac * trCosts.begin()->first;
796
17.0k
  const double                  thrTS    = trCosts.begin()->first;
797
51.2k
  while (itC != trCosts.end())
798
34.1k
  {
799
34.1k
    const bool testTr               = itC->first <= (trModes->at(itC->second).first == 1 ? thrTS : thr) && numTests <= maxCand;
800
34.1k
    trModes->at(itC->second).second = testTr;
801
34.1k
    numTests += testTr;
802
34.1k
    itC++;
803
34.1k
  }
804
17.0k
}
805
806
uint32_t TrQuant::xGetLFNSTIntraMode( const Area& tuArea, const uint32_t dirMode )
807
1.50M
{
808
1.50M
  if (dirMode < 2)
809
848k
  {
810
848k
    return dirMode;
811
848k
  }
812
813
657k
  static const int modeShift[] = { 0, 6, 10, 12, 14, 15 };
814
815
657k
  const int width  = int(tuArea.width);
816
657k
  const int height = int(tuArea.height);
817
818
657k
  if (width > height && dirMode < 2 + modeShift[floorLog2(width) - floorLog2(height)])
819
196
  {
820
196
    return dirMode + (VDIA_IDX - 1) + (NUM_EXT_LUMA_MODE >> 1);
821
196
  }
822
657k
  else if (height > width && dirMode > VDIA_IDX - modeShift[floorLog2(height) - floorLog2(width)])
823
61.0k
  {
824
61.0k
    return dirMode - (VDIA_IDX + 1) + (NUM_EXT_LUMA_MODE >> 1) + NUM_LUMA_MODE;
825
61.0k
  }
826
827
595k
  return dirMode;
828
657k
}
829
830
831
bool TrQuant::xGetTransposeFlag(uint32_t intraMode)
832
1.50M
{
833
1.50M
  return ((intraMode >= NUM_LUMA_MODE) && (intraMode >= (NUM_LUMA_MODE + (NUM_EXT_LUMA_MODE >> 1))))
834
1.50M
         || ((intraMode < NUM_LUMA_MODE) && (intraMode > DIA_IDX));
835
1.50M
}
836
837
838
void TrQuant::xInvLfnst(const TransformUnit &tu, const ComponentID compID)
839
740k
{
840
740k
  const CompArea &area     = tu.blocks[compID];
841
740k
  const uint32_t  width    = area.width;
842
740k
  const uint32_t  height   = area.height;
843
740k
  const uint32_t  lfnstIdx = tu.cu->lfnstIdx;
844
740k
  if (lfnstIdx && tu.mtsIdx[compID] != MTS_SKIP && (CU::isSepTree(*tu.cu) ? true : isLuma(compID)))
845
450k
  {
846
450k
    const CodingUnit& cu = *tu.cs->getCU(area.pos(), toChannelType(compID), TREE_D);
847
450k
    const bool         whge3 = width >= 8 && height >= 8;
848
450k
    const ScanElement *scan =
849
450k
      whge3
850
450k
        ? g_coefTopLeftDiagScan8x8[Log2(width)] 
851
450k
        : getScanOrder(SCAN_GROUPED_4x4, Log2(area.width), Log2(area.height));
852
450k
    uint32_t intraMode = CU::getFinalIntraMode(cu, toChannelType(compID));
853
854
450k
    if (CU::isLMCMode( cu.intraDir[toChannelType(compID)]))
855
74.7k
    {
856
74.7k
      intraMode = CU::getCoLocatedIntraLumaMode(cu);
857
74.7k
    }
858
450k
    if (CU::isMIP(cu, toChannelType(compID)))
859
4.37k
    {
860
4.37k
      intraMode = PLANAR_IDX;
861
4.37k
    }
862
450k
    CHECK(intraMode >= NUM_INTRA_MODE - 1, "Invalid intra mode");
863
864
450k
    if (lfnstIdx < 3)
865
450k
    {
866
450k
      if (tu.cu->ispMode && isLuma(compID))
867
6.72k
      {
868
6.72k
        intraMode = xGetLFNSTIntraMode(tu.cu->blocks[compID], intraMode);
869
6.72k
      }
870
444k
      else
871
444k
        intraMode = xGetLFNSTIntraMode(tu.blocks[compID], intraMode);
872
450k
      bool      transposeFlag = xGetTransposeFlag(intraMode);
873
450k
      const int sbSize        = whge3 ? 8 : 4;
874
450k
      bool      tu4x4Flag     = (width == 4 && height == 4);
875
450k
      bool      tu8x8Flag     = (width == 8 && height == 8);
876
450k
      TCoeff *  lfnstTemp;
877
450k
      TCoeff *  coeffTemp;
878
450k
      int       y;
879
450k
      lfnstTemp                  = m_tempInMatrix;   // inverse spectral rearrangement
880
450k
      coeffTemp                  = m_plTempCoeff;
881
450k
      TCoeff *           dst     = lfnstTemp;
882
450k
      const ScanElement *scanPtr = scan;
883
7.66M
      for (y = 0; y < 16; y++)
884
7.21M
      {
885
7.21M
        *dst++ = coeffTemp[scanPtr->idx];
886
7.21M
        scanPtr++;
887
7.21M
      }
888
889
450k
      m_invLfnstNxN( m_tempInMatrix, m_tempOutMatrix, g_lfnstLut[intraMode], lfnstIdx - 1, sbSize, ( tu4x4Flag || tu8x8Flag ) ? 8 : 16 );
890
891
450k
      lfnstTemp = m_tempOutMatrix;   // inverse spectral rearrangement
892
893
450k
      if (transposeFlag)
894
41.2k
      {
895
41.2k
        if (sbSize == 4)
896
8.44k
        {
897
42.2k
          for (y = 0; y < 4; y++)
898
33.7k
          {
899
33.7k
            coeffTemp[0] = lfnstTemp[0];
900
33.7k
            coeffTemp[1] = lfnstTemp[4];
901
33.7k
            coeffTemp[2] = lfnstTemp[8];
902
33.7k
            coeffTemp[3] = lfnstTemp[12];
903
33.7k
            lfnstTemp++;
904
33.7k
            coeffTemp += width;
905
33.7k
          }
906
8.44k
        }
907
32.7k
        else   // ( sbSize == 8 )
908
32.7k
        {
909
294k
          for (y = 0; y < 8; y++)
910
262k
          {
911
262k
            coeffTemp[0] = lfnstTemp[0];
912
262k
            coeffTemp[1] = lfnstTemp[8];
913
262k
            coeffTemp[2] = lfnstTemp[16];
914
262k
            coeffTemp[3] = lfnstTemp[24];
915
262k
            if (y < 4)
916
131k
            {
917
131k
              coeffTemp[4] = lfnstTemp[32];
918
131k
              coeffTemp[5] = lfnstTemp[36];
919
131k
              coeffTemp[6] = lfnstTemp[40];
920
131k
              coeffTemp[7] = lfnstTemp[44];
921
131k
            }
922
262k
            lfnstTemp++;
923
262k
            coeffTemp += width;
924
262k
          }
925
32.7k
        }
926
41.2k
      }
927
409k
      else
928
409k
      {
929
3.04M
        for (y = 0; y < sbSize; y++)
930
2.63M
        {
931
2.63M
          uint32_t uiStride = (y < 4) ? sbSize : 4;
932
2.63M
          ::memcpy(coeffTemp, lfnstTemp, uiStride * sizeof(TCoeff));
933
2.63M
          lfnstTemp += uiStride;
934
2.63M
          coeffTemp += width;
935
2.63M
        }
936
409k
      }
937
450k
    }
938
450k
  }
939
740k
}
940
941
942
void TrQuant::xFwdLfnst(const TransformUnit &tu, const ComponentID compID, const bool loadTr)
943
1.78M
{
944
1.78M
  const CompArea &area     = tu.blocks[compID];
945
1.78M
  const uint32_t  width    = area.width;
946
1.78M
  const uint32_t  height   = area.height;
947
1.78M
  const uint32_t  lfnstIdx = tu.cu->lfnstIdx;
948
1.78M
  if (lfnstIdx && tu.mtsIdx[compID] != MTS_SKIP && (CU::isSepTree(*tu.cu) ? true : isLuma(compID)))
949
1.05M
  {
950
1.05M
    const CodingUnit& cu = *tu.cs->getCU(area.pos(), toChannelType(compID), TREE_D);
951
1.05M
    const bool         whge3 = width >= 8 && height >= 8;
952
1.05M
    const ScanElement *scan =
953
1.05M
      whge3
954
1.05M
        ? g_coefTopLeftDiagScan8x8[Log2(width)] 
955
1.05M
        : getScanOrder(SCAN_GROUPED_4x4, Log2(area.width), Log2(area.height));   
956
1.05M
    uint32_t intraMode = CU::getFinalIntraMode(cu, toChannelType(compID));
957
958
1.05M
    if (CU::isLMCMode(cu.intraDir[toChannelType(compID)]))
959
94.1k
    {
960
94.1k
      intraMode = CU::getCoLocatedIntraLumaMode(cu);
961
94.1k
    }
962
1.05M
    if (CU::isMIP(cu, toChannelType(compID)))
963
7.14k
    {
964
7.14k
      intraMode = PLANAR_IDX;
965
7.14k
    }
966
1.05M
    CHECK(intraMode >= NUM_INTRA_MODE - 1, "Invalid intra mode");
967
968
1.05M
    if (lfnstIdx < 3)
969
1.05M
    {
970
1.05M
      if (tu.cu->ispMode && isLuma(compID))
971
10.4k
      {
972
10.4k
        intraMode = xGetLFNSTIntraMode(tu.cu->blocks[compID], intraMode);
973
10.4k
      }
974
1.04M
      else
975
1.04M
      {
976
1.04M
        intraMode = xGetLFNSTIntraMode(tu.blocks[compID], intraMode);
977
1.04M
      }
978
1.05M
      bool      transposeFlag = xGetTransposeFlag(intraMode);
979
1.05M
      const int sbSize        = whge3 ? 8 : 4;
980
1.05M
      bool      tu4x4Flag     = (width == 4 && height == 4);
981
1.05M
      bool      tu8x8Flag     = (width == 8 && height == 8);
982
1.05M
      TCoeff*   lfnstTemp;
983
1.05M
      TCoeff*   coeffTemp;
984
1.05M
      TCoeff*   tempCoeff     = loadTr ? m_mtsCoeffs[tu.mtsIdx[compID]] : m_plTempCoeff;
985
986
1.05M
      int y;
987
1.05M
      lfnstTemp = m_tempInMatrix;   // forward low frequency non-separable transform
988
1.05M
      coeffTemp = tempCoeff;
989
990
1.05M
      if (transposeFlag)
991
224k
      {
992
224k
        if (sbSize == 4)
993
67.9k
        {
994
339k
          for (y = 0; y < 4; y++)
995
271k
          {
996
271k
            lfnstTemp[0]  = coeffTemp[0];
997
271k
            lfnstTemp[4]  = coeffTemp[1];
998
271k
            lfnstTemp[8]  = coeffTemp[2];
999
271k
            lfnstTemp[12] = coeffTemp[3];
1000
271k
            lfnstTemp++;
1001
271k
            coeffTemp += width;
1002
271k
          }
1003
67.9k
        }
1004
157k
        else   // ( sbSize == 8 )
1005
157k
        {
1006
1.41M
          for (y = 0; y < 8; y++)
1007
1.25M
          {
1008
1.25M
            lfnstTemp[0]  = coeffTemp[0];
1009
1.25M
            lfnstTemp[8]  = coeffTemp[1];
1010
1.25M
            lfnstTemp[16] = coeffTemp[2];
1011
1.25M
            lfnstTemp[24] = coeffTemp[3];
1012
1.25M
            if (y < 4)
1013
628k
            {
1014
628k
              lfnstTemp[32] = coeffTemp[4];
1015
628k
              lfnstTemp[36] = coeffTemp[5];
1016
628k
              lfnstTemp[40] = coeffTemp[6];
1017
628k
              lfnstTemp[44] = coeffTemp[7];
1018
628k
            }
1019
1.25M
            lfnstTemp++;
1020
1.25M
            coeffTemp += width;
1021
1.25M
          }
1022
157k
        }
1023
224k
      }
1024
830k
      else
1025
830k
      {
1026
6.25M
        for (y = 0; y < sbSize; y++)
1027
5.42M
        {
1028
5.42M
          uint32_t uiStride = (y < 4) ? sbSize : 4;
1029
5.42M
          ::memcpy(lfnstTemp, coeffTemp, uiStride * sizeof(TCoeff));
1030
5.42M
          lfnstTemp += uiStride;
1031
5.42M
          coeffTemp += width;
1032
5.42M
        }
1033
830k
      }
1034
1035
1.05M
      m_fwdLfnstNxN( m_tempInMatrix, m_tempOutMatrix, g_lfnstLut[intraMode], lfnstIdx - 1, sbSize, ( tu4x4Flag || tu8x8Flag ) ? 8 : 16 );
1036
1037
1.05M
      lfnstTemp                        = m_tempOutMatrix;   // forward spectral rearrangement
1038
1.05M
      coeffTemp                        = tempCoeff;
1039
1.05M
      const ScanElement *scanPtr       = scan;
1040
1.05M
      int                lfnstCoeffNum = (sbSize == 4) ? sbSize * sbSize : 48;
1041
39.7M
      for (y = 0; y < lfnstCoeffNum; y++)
1042
38.7M
      {
1043
38.7M
        coeffTemp[scanPtr->idx] = *lfnstTemp++;
1044
38.7M
        scanPtr++;
1045
38.7M
      }
1046
1.05M
    }
1047
1.05M
  }
1048
1.78M
}
1049
1050
void TrQuant::xTransformSkip(const TransformUnit& tu, const ComponentID& compID, const CPelBuf& resi, TCoeff* psCoeff)
1051
107k
{
1052
107k
  const CompArea& rect = tu.blocks[compID];
1053
107k
  const uint32_t width = rect.width;
1054
107k
  const uint32_t height = rect.height;
1055
1056
1.17M
  for (uint32_t y = 0, coefficientIndex = 0; y < height; y++)
1057
1.06M
  {
1058
12.7M
    for (uint32_t x = 0; x < width; x++, coefficientIndex++)
1059
11.6M
    {
1060
11.6M
      psCoeff[coefficientIndex] = TCoeff(resi.at(x, y));
1061
11.6M
    }
1062
1.06M
  }
1063
107k
}
1064
} // namespace vvenc
1065
1066
//! \}
1067