Coverage Report

Created: 2026-06-10 07:00

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/work/vvenc/source/Lib/CommonLib/TrQuant.cpp
Line
Count
Source
1
/* -----------------------------------------------------------------------------
2
The copyright in this software is being made available under the Clear BSD
3
License, included below. No patent rights, trademark rights and/or 
4
other Intellectual Property Rights other than the copyrights concerning 
5
the Software are granted under this license.
6
7
The Clear BSD License
8
9
Copyright (c) 2019-2026, Fraunhofer-Gesellschaft zur Förderung der angewandten Forschung e.V. & The VVenC Authors.
10
All rights reserved.
11
12
Redistribution and use in source and binary forms, with or without modification,
13
are permitted (subject to the limitations in the disclaimer below) provided that
14
the following conditions are met:
15
16
     * Redistributions of source code must retain the above copyright notice,
17
     this list of conditions and the following disclaimer.
18
19
     * Redistributions in binary form must reproduce the above copyright
20
     notice, this list of conditions and the following disclaimer in the
21
     documentation and/or other materials provided with the distribution.
22
23
     * Neither the name of the copyright holder nor the names of its
24
     contributors may be used to endorse or promote products derived from this
25
     software without specific prior written permission.
26
27
NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE GRANTED BY
28
THIS LICENSE. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
29
CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
30
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
31
PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
32
CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
33
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
34
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
35
BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
36
IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
37
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
38
POSSIBILITY OF SUCH DAMAGE.
39
40
41
------------------------------------------------------------------------------------------- */
42
43
44
/** \file     TrQuant.cpp
45
    \brief    transform and quantization class
46
*/
47
48
#include "TrQuant.h"
49
#include "TrQuant_EMT.h"
50
#include "QuantRDOQ.h"
51
#include "DepQuant.h"
52
#include "UnitTools.h"
53
#include "ContextModelling.h"
54
#include "CodingStructure.h"
55
#include "dtrace_buffer.h"
56
#include "TimeProfiler.h"
57
#include "SearchSpaceCounter.h"
58
59
#include <stdlib.h>
60
#include <memory.h>
61
62
//! \ingroup CommonLib
63
//! \{
64
65
namespace vvenc {
66
67
struct coeffGroupRDStats
68
{
69
  int    iNNZbeforePos0;
70
  double d64CodedLevelandDist; // distortion and level cost only
71
  double d64UncodedDist;    // all zero coded block distortion
72
  double d64SigCost;
73
  double d64SigCost_0;
74
};
75
76
FwdTrans *const fastFwdTrans[NUM_TRANS_TYPE][g_numTransformMatrixSizes] =
77
{
78
  { fastForwardDCT2_B2, fastForwardDCT2_B4, fastForwardDCT2_B8, fastForwardDCT2_B16, fastForwardDCT2_B32, fastForwardDCT2_B64 },
79
  { nullptr,            fastForwardDCT8_B4, fastForwardDCT8_B8, fastForwardDCT8_B16, fastForwardDCT8_B32, nullptr },
80
  { nullptr,            fastForwardDST7_B4, fastForwardDST7_B8, fastForwardDST7_B16, fastForwardDST7_B32, nullptr },
81
};
82
83
InvTrans *const fastInvTrans[NUM_TRANS_TYPE][g_numTransformMatrixSizes] =
84
{
85
  { fastInverseDCT2_B2, fastInverseDCT2_B4, fastInverseDCT2_B8, fastInverseDCT2_B16, fastInverseDCT2_B32, fastInverseDCT2_B64 },
86
  { nullptr,            fastInverseDCT8_B4, fastInverseDCT8_B8, fastInverseDCT8_B16, fastInverseDCT8_B32, nullptr },
87
  { nullptr,            fastInverseDST7_B4, fastInverseDST7_B8, fastInverseDST7_B16, fastInverseDST7_B32, nullptr },
88
};
89
90
//! \ingroup CommonLib
91
//! \{
92
93
320M
static inline int64_t square( const int d ) { return d * (int64_t)d; }
94
95
template<int signedMode> std::pair<int64_t,int64_t> fwdTransformCbCr( const PelBuf& resCb, const PelBuf& resCr, PelBuf& resC1, PelBuf& resC2 )
96
972k
{
97
972k
  const Pel*  cb  = resCb.buf;
98
972k
  const Pel*  cr  = resCr.buf;
99
972k
  Pel*        c1  = resC1.buf;
100
972k
  Pel*        c2  = resC2.buf;
101
972k
  int64_t     d1  = 0;
102
972k
  int64_t     d2  = 0;
103
12.9M
  for( SizeType y = 0; y < resCb.height; y++, cb += resCb.stride, cr += resCr.stride, c1 += resC1.stride, c2 += resC2.stride )
104
12.0M
  {
105
172M
    for( SizeType x = 0; x < resCb.width; x++ )
106
160M
    {
107
160M
      int cbx = cb[x], crx = cr[x];
108
160M
      if      ( signedMode ==  1 )
109
40.1M
      {
110
40.1M
        c1[x] = Pel( ( 4*cbx + 2*crx ) / 5 );
111
40.1M
        d1   += square( cbx - c1[x] ) + square( crx - (c1[x]>>1) );
112
40.1M
      }
113
120M
      else if ( signedMode == -1 )
114
0
      {
115
0
        c1[x] = Pel( ( 4*cbx - 2*crx ) / 5 );
116
0
        d1   += square( cbx - c1[x] ) + square( crx - (-c1[x]>>1) );
117
0
      }
118
120M
      else if ( signedMode ==  2 )
119
40.1M
      {
120
40.1M
        c1[x] = Pel( ( cbx + crx ) / 2 );
121
40.1M
        d1   += square( cbx - c1[x] ) + square( crx - c1[x] );
122
40.1M
      }
123
80.2M
      else if ( signedMode == -2 )
124
0
      {
125
0
        c1[x] = Pel( ( cbx - crx ) / 2 );
126
0
        d1   += square( cbx - c1[x] ) + square( crx + c1[x] );
127
0
      }
128
80.2M
      else if ( signedMode ==  3 )
129
40.1M
      {
130
40.1M
        c2[x] = Pel( ( 4*crx + 2*cbx ) / 5 );
131
40.1M
        d1   += square( cbx - (c2[x]>>1) ) + square( crx - c2[x] );
132
40.1M
      }
133
40.1M
      else if ( signedMode == -3 )
134
0
      {
135
0
        c2[x] = Pel( ( 4*crx - 2*cbx ) / 5 );
136
0
        d1   += square( cbx - (-c2[x]>>1) ) + square( crx - c2[x] );
137
0
      }
138
40.1M
      else
139
40.1M
      {
140
40.1M
        d1   += square( cbx );
141
40.1M
        d2   += square( crx );
142
40.1M
      }
143
160M
    }
144
12.0M
  }
145
972k
  return std::make_pair(d1,d2);
146
972k
}
std::__1::pair<long, long> vvenc::fwdTransformCbCr<0>(vvenc::AreaBuf<short> const&, vvenc::AreaBuf<short> const&, vvenc::AreaBuf<short>&, vvenc::AreaBuf<short>&)
Line
Count
Source
96
243k
{
97
243k
  const Pel*  cb  = resCb.buf;
98
243k
  const Pel*  cr  = resCr.buf;
99
243k
  Pel*        c1  = resC1.buf;
100
243k
  Pel*        c2  = resC2.buf;
101
243k
  int64_t     d1  = 0;
102
243k
  int64_t     d2  = 0;
103
3.24M
  for( SizeType y = 0; y < resCb.height; y++, cb += resCb.stride, cr += resCr.stride, c1 += resC1.stride, c2 += resC2.stride )
104
3.00M
  {
105
43.1M
    for( SizeType x = 0; x < resCb.width; x++ )
106
40.1M
    {
107
40.1M
      int cbx = cb[x], crx = cr[x];
108
40.1M
      if      ( signedMode ==  1 )
109
0
      {
110
0
        c1[x] = Pel( ( 4*cbx + 2*crx ) / 5 );
111
0
        d1   += square( cbx - c1[x] ) + square( crx - (c1[x]>>1) );
112
0
      }
113
40.1M
      else if ( signedMode == -1 )
114
0
      {
115
0
        c1[x] = Pel( ( 4*cbx - 2*crx ) / 5 );
116
0
        d1   += square( cbx - c1[x] ) + square( crx - (-c1[x]>>1) );
117
0
      }
118
40.1M
      else if ( signedMode ==  2 )
119
0
      {
120
0
        c1[x] = Pel( ( cbx + crx ) / 2 );
121
0
        d1   += square( cbx - c1[x] ) + square( crx - c1[x] );
122
0
      }
123
40.1M
      else if ( signedMode == -2 )
124
0
      {
125
0
        c1[x] = Pel( ( cbx - crx ) / 2 );
126
0
        d1   += square( cbx - c1[x] ) + square( crx + c1[x] );
127
0
      }
128
40.1M
      else if ( signedMode ==  3 )
129
0
      {
130
0
        c2[x] = Pel( ( 4*crx + 2*cbx ) / 5 );
131
0
        d1   += square( cbx - (c2[x]>>1) ) + square( crx - c2[x] );
132
0
      }
133
40.1M
      else if ( signedMode == -3 )
134
0
      {
135
0
        c2[x] = Pel( ( 4*crx - 2*cbx ) / 5 );
136
0
        d1   += square( cbx - (-c2[x]>>1) ) + square( crx - c2[x] );
137
0
      }
138
40.1M
      else
139
40.1M
      {
140
40.1M
        d1   += square( cbx );
141
40.1M
        d2   += square( crx );
142
40.1M
      }
143
40.1M
    }
144
3.00M
  }
145
243k
  return std::make_pair(d1,d2);
146
243k
}
std::__1::pair<long, long> vvenc::fwdTransformCbCr<1>(vvenc::AreaBuf<short> const&, vvenc::AreaBuf<short> const&, vvenc::AreaBuf<short>&, vvenc::AreaBuf<short>&)
Line
Count
Source
96
243k
{
97
243k
  const Pel*  cb  = resCb.buf;
98
243k
  const Pel*  cr  = resCr.buf;
99
243k
  Pel*        c1  = resC1.buf;
100
243k
  Pel*        c2  = resC2.buf;
101
243k
  int64_t     d1  = 0;
102
243k
  int64_t     d2  = 0;
103
3.24M
  for( SizeType y = 0; y < resCb.height; y++, cb += resCb.stride, cr += resCr.stride, c1 += resC1.stride, c2 += resC2.stride )
104
3.00M
  {
105
43.1M
    for( SizeType x = 0; x < resCb.width; x++ )
106
40.1M
    {
107
40.1M
      int cbx = cb[x], crx = cr[x];
108
40.1M
      if      ( signedMode ==  1 )
109
40.1M
      {
110
40.1M
        c1[x] = Pel( ( 4*cbx + 2*crx ) / 5 );
111
40.1M
        d1   += square( cbx - c1[x] ) + square( crx - (c1[x]>>1) );
112
40.1M
      }
113
0
      else if ( signedMode == -1 )
114
0
      {
115
0
        c1[x] = Pel( ( 4*cbx - 2*crx ) / 5 );
116
0
        d1   += square( cbx - c1[x] ) + square( crx - (-c1[x]>>1) );
117
0
      }
118
0
      else if ( signedMode ==  2 )
119
0
      {
120
0
        c1[x] = Pel( ( cbx + crx ) / 2 );
121
0
        d1   += square( cbx - c1[x] ) + square( crx - c1[x] );
122
0
      }
123
0
      else if ( signedMode == -2 )
124
0
      {
125
0
        c1[x] = Pel( ( cbx - crx ) / 2 );
126
0
        d1   += square( cbx - c1[x] ) + square( crx + c1[x] );
127
0
      }
128
0
      else if ( signedMode ==  3 )
129
0
      {
130
0
        c2[x] = Pel( ( 4*crx + 2*cbx ) / 5 );
131
0
        d1   += square( cbx - (c2[x]>>1) ) + square( crx - c2[x] );
132
0
      }
133
0
      else if ( signedMode == -3 )
134
0
      {
135
0
        c2[x] = Pel( ( 4*crx - 2*cbx ) / 5 );
136
0
        d1   += square( cbx - (-c2[x]>>1) ) + square( crx - c2[x] );
137
0
      }
138
0
      else
139
0
      {
140
0
        d1   += square( cbx );
141
0
        d2   += square( crx );
142
0
      }
143
40.1M
    }
144
3.00M
  }
145
243k
  return std::make_pair(d1,d2);
146
243k
}
Unexecuted instantiation: std::__1::pair<long, long> vvenc::fwdTransformCbCr<-1>(vvenc::AreaBuf<short> const&, vvenc::AreaBuf<short> const&, vvenc::AreaBuf<short>&, vvenc::AreaBuf<short>&)
std::__1::pair<long, long> vvenc::fwdTransformCbCr<2>(vvenc::AreaBuf<short> const&, vvenc::AreaBuf<short> const&, vvenc::AreaBuf<short>&, vvenc::AreaBuf<short>&)
Line
Count
Source
96
243k
{
97
243k
  const Pel*  cb  = resCb.buf;
98
243k
  const Pel*  cr  = resCr.buf;
99
243k
  Pel*        c1  = resC1.buf;
100
243k
  Pel*        c2  = resC2.buf;
101
243k
  int64_t     d1  = 0;
102
243k
  int64_t     d2  = 0;
103
3.24M
  for( SizeType y = 0; y < resCb.height; y++, cb += resCb.stride, cr += resCr.stride, c1 += resC1.stride, c2 += resC2.stride )
104
3.00M
  {
105
43.1M
    for( SizeType x = 0; x < resCb.width; x++ )
106
40.1M
    {
107
40.1M
      int cbx = cb[x], crx = cr[x];
108
40.1M
      if      ( signedMode ==  1 )
109
0
      {
110
0
        c1[x] = Pel( ( 4*cbx + 2*crx ) / 5 );
111
0
        d1   += square( cbx - c1[x] ) + square( crx - (c1[x]>>1) );
112
0
      }
113
40.1M
      else if ( signedMode == -1 )
114
0
      {
115
0
        c1[x] = Pel( ( 4*cbx - 2*crx ) / 5 );
116
0
        d1   += square( cbx - c1[x] ) + square( crx - (-c1[x]>>1) );
117
0
      }
118
40.1M
      else if ( signedMode ==  2 )
119
40.1M
      {
120
40.1M
        c1[x] = Pel( ( cbx + crx ) / 2 );
121
40.1M
        d1   += square( cbx - c1[x] ) + square( crx - c1[x] );
122
40.1M
      }
123
0
      else if ( signedMode == -2 )
124
0
      {
125
0
        c1[x] = Pel( ( cbx - crx ) / 2 );
126
0
        d1   += square( cbx - c1[x] ) + square( crx + c1[x] );
127
0
      }
128
0
      else if ( signedMode ==  3 )
129
0
      {
130
0
        c2[x] = Pel( ( 4*crx + 2*cbx ) / 5 );
131
0
        d1   += square( cbx - (c2[x]>>1) ) + square( crx - c2[x] );
132
0
      }
133
0
      else if ( signedMode == -3 )
134
0
      {
135
0
        c2[x] = Pel( ( 4*crx - 2*cbx ) / 5 );
136
0
        d1   += square( cbx - (-c2[x]>>1) ) + square( crx - c2[x] );
137
0
      }
138
0
      else
139
0
      {
140
0
        d1   += square( cbx );
141
0
        d2   += square( crx );
142
0
      }
143
40.1M
    }
144
3.00M
  }
145
243k
  return std::make_pair(d1,d2);
146
243k
}
Unexecuted instantiation: std::__1::pair<long, long> vvenc::fwdTransformCbCr<-2>(vvenc::AreaBuf<short> const&, vvenc::AreaBuf<short> const&, vvenc::AreaBuf<short>&, vvenc::AreaBuf<short>&)
std::__1::pair<long, long> vvenc::fwdTransformCbCr<3>(vvenc::AreaBuf<short> const&, vvenc::AreaBuf<short> const&, vvenc::AreaBuf<short>&, vvenc::AreaBuf<short>&)
Line
Count
Source
96
243k
{
97
243k
  const Pel*  cb  = resCb.buf;
98
243k
  const Pel*  cr  = resCr.buf;
99
243k
  Pel*        c1  = resC1.buf;
100
243k
  Pel*        c2  = resC2.buf;
101
243k
  int64_t     d1  = 0;
102
243k
  int64_t     d2  = 0;
103
3.24M
  for( SizeType y = 0; y < resCb.height; y++, cb += resCb.stride, cr += resCr.stride, c1 += resC1.stride, c2 += resC2.stride )
104
3.00M
  {
105
43.1M
    for( SizeType x = 0; x < resCb.width; x++ )
106
40.1M
    {
107
40.1M
      int cbx = cb[x], crx = cr[x];
108
40.1M
      if      ( signedMode ==  1 )
109
0
      {
110
0
        c1[x] = Pel( ( 4*cbx + 2*crx ) / 5 );
111
0
        d1   += square( cbx - c1[x] ) + square( crx - (c1[x]>>1) );
112
0
      }
113
40.1M
      else if ( signedMode == -1 )
114
0
      {
115
0
        c1[x] = Pel( ( 4*cbx - 2*crx ) / 5 );
116
0
        d1   += square( cbx - c1[x] ) + square( crx - (-c1[x]>>1) );
117
0
      }
118
40.1M
      else if ( signedMode ==  2 )
119
0
      {
120
0
        c1[x] = Pel( ( cbx + crx ) / 2 );
121
0
        d1   += square( cbx - c1[x] ) + square( crx - c1[x] );
122
0
      }
123
40.1M
      else if ( signedMode == -2 )
124
0
      {
125
0
        c1[x] = Pel( ( cbx - crx ) / 2 );
126
0
        d1   += square( cbx - c1[x] ) + square( crx + c1[x] );
127
0
      }
128
40.1M
      else if ( signedMode ==  3 )
129
40.1M
      {
130
40.1M
        c2[x] = Pel( ( 4*crx + 2*cbx ) / 5 );
131
40.1M
        d1   += square( cbx - (c2[x]>>1) ) + square( crx - c2[x] );
132
40.1M
      }
133
0
      else if ( signedMode == -3 )
134
0
      {
135
0
        c2[x] = Pel( ( 4*crx - 2*cbx ) / 5 );
136
0
        d1   += square( cbx - (-c2[x]>>1) ) + square( crx - c2[x] );
137
0
      }
138
0
      else
139
0
      {
140
0
        d1   += square( cbx );
141
0
        d2   += square( crx );
142
0
      }
143
40.1M
    }
144
3.00M
  }
145
243k
  return std::make_pair(d1,d2);
146
243k
}
Unexecuted instantiation: std::__1::pair<long, long> vvenc::fwdTransformCbCr<-3>(vvenc::AreaBuf<short> const&, vvenc::AreaBuf<short> const&, vvenc::AreaBuf<short>&, vvenc::AreaBuf<short>&)
147
148
template<int signedMode> void invTransformCbCr( PelBuf& resCb, PelBuf& resCr )
149
240k
{
150
240k
  Pel*  cb  = resCb.buf;
151
240k
  Pel*  cr  = resCr.buf;
152
3.21M
  for( SizeType y = 0; y < resCb.height; y++, cb += resCb.stride, cr += resCr.stride )
153
2.97M
  {
154
42.7M
    for( SizeType x = 0; x < resCb.width; x++ )
155
39.7M
    {
156
39.7M
      if      ( signedMode ==  1 )  { cr[x] =  cb[x] >> 1;  }
157
39.7M
      else if ( signedMode == -1 )  { cr[x] = -cb[x] >> 1;  }
158
39.7M
      else if ( signedMode ==  2 )  { cr[x] =  cb[x]; }
159
163k
      else if ( signedMode == -2 )  { cr[x] = -cb[x]; }
160
163k
      else if ( signedMode ==  3 )  { cb[x] =  cr[x] >> 1; }
161
0
      else if ( signedMode == -3 )  { cb[x] = -cr[x] >> 1; }
162
39.7M
    }
163
2.97M
  }
164
240k
}
Unexecuted instantiation: void vvenc::invTransformCbCr<0>(vvenc::AreaBuf<short>&, vvenc::AreaBuf<short>&)
Unexecuted instantiation: void vvenc::invTransformCbCr<1>(vvenc::AreaBuf<short>&, vvenc::AreaBuf<short>&)
Unexecuted instantiation: void vvenc::invTransformCbCr<-1>(vvenc::AreaBuf<short>&, vvenc::AreaBuf<short>&)
void vvenc::invTransformCbCr<2>(vvenc::AreaBuf<short>&, vvenc::AreaBuf<short>&)
Line
Count
Source
149
239k
{
150
239k
  Pel*  cb  = resCb.buf;
151
239k
  Pel*  cr  = resCr.buf;
152
3.19M
  for( SizeType y = 0; y < resCb.height; y++, cb += resCb.stride, cr += resCr.stride )
153
2.95M
  {
154
42.5M
    for( SizeType x = 0; x < resCb.width; x++ )
155
39.6M
    {
156
39.6M
      if      ( signedMode ==  1 )  { cr[x] =  cb[x] >> 1;  }
157
39.6M
      else if ( signedMode == -1 )  { cr[x] = -cb[x] >> 1;  }
158
39.6M
      else if ( signedMode ==  2 )  { cr[x] =  cb[x]; }
159
0
      else if ( signedMode == -2 )  { cr[x] = -cb[x]; }
160
0
      else if ( signedMode ==  3 )  { cb[x] =  cr[x] >> 1; }
161
0
      else if ( signedMode == -3 )  { cb[x] = -cr[x] >> 1; }
162
39.6M
    }
163
2.95M
  }
164
239k
}
Unexecuted instantiation: void vvenc::invTransformCbCr<-2>(vvenc::AreaBuf<short>&, vvenc::AreaBuf<short>&)
void vvenc::invTransformCbCr<3>(vvenc::AreaBuf<short>&, vvenc::AreaBuf<short>&)
Line
Count
Source
149
1.36k
{
150
1.36k
  Pel*  cb  = resCb.buf;
151
1.36k
  Pel*  cr  = resCr.buf;
152
16.1k
  for( SizeType y = 0; y < resCb.height; y++, cb += resCb.stride, cr += resCr.stride )
153
14.8k
  {
154
178k
    for( SizeType x = 0; x < resCb.width; x++ )
155
163k
    {
156
163k
      if      ( signedMode ==  1 )  { cr[x] =  cb[x] >> 1;  }
157
163k
      else if ( signedMode == -1 )  { cr[x] = -cb[x] >> 1;  }
158
163k
      else if ( signedMode ==  2 )  { cr[x] =  cb[x]; }
159
163k
      else if ( signedMode == -2 )  { cr[x] = -cb[x]; }
160
163k
      else if ( signedMode ==  3 )  { cb[x] =  cr[x] >> 1; }
161
0
      else if ( signedMode == -3 )  { cb[x] = -cr[x] >> 1; }
162
163k
    }
163
14.8k
  }
164
1.36k
}
Unexecuted instantiation: void vvenc::invTransformCbCr<-3>(vvenc::AreaBuf<short>&, vvenc::AreaBuf<short>&)
165
166
void xFwdLfnstNxNCore(int *src, int *dst, const uint32_t mode, const uint32_t index, const uint32_t size, int zeroOutSize)
167
1.07M
{
168
1.07M
  const int8_t *trMat  = (size > 4) ? g_lfnstFwd8x8[mode][index][0] : g_lfnstFwd4x4[mode][index][0];
169
1.07M
  const int     trSize = (size > 4) ? 48 : 16;
170
1.07M
  int           coef;
171
1.07M
  int *         out = dst;
172
173
16.6M
  for (int j = 0; j < zeroOutSize; j++)
174
15.6M
  {
175
15.6M
    int *         srcPtr   = src;
176
15.6M
    const int8_t *trMatTmp = trMat;
177
15.6M
    coef                   = 0;
178
589M
    for (int i = 0; i < trSize; i++)
179
574M
    {
180
574M
      coef += *srcPtr++ * *trMatTmp++;
181
574M
    }
182
15.6M
    *out++ = (coef + 64) >> 7;
183
15.6M
    trMat += trSize;
184
15.6M
  }
185
186
1.07M
  ::memset(out, 0, (trSize - zeroOutSize) * sizeof(int));
187
1.07M
}
188
189
190
void xInvLfnstNxNCore(int *src, int *dst, const uint32_t mode, const uint32_t index, const uint32_t size, int zeroOutSize)
191
458k
{
192
458k
  int           maxLog2TrDynamicRange = 15;
193
458k
  const TCoeff  outputMinimum         = -(1 << maxLog2TrDynamicRange);
194
458k
  const TCoeff  outputMaximum         = (1 << maxLog2TrDynamicRange) - 1;
195
458k
  const int8_t *trMat                 = (size > 4) ? g_lfnstInv8x8[mode][index][0] : g_lfnstInv4x4[mode][index][0];
196
458k
  const int     trSize                = (size > 4) ? 48 : 16;
197
458k
  int           resi;
198
458k
  int *         out                   = dst;
199
200
17.0M
  for( int j = 0; j < trSize; j++, trMat += 16 )
201
16.6M
  {
202
16.6M
    resi = 0;
203
16.6M
    const int8_t* trMatTmp = trMat;
204
16.6M
    int*          srcPtr   = src;
205
206
252M
    for( int i = 0; i < zeroOutSize; i++ )
207
235M
    {
208
235M
      resi += *srcPtr++ * *trMatTmp++;
209
235M
    }
210
211
16.6M
    *out++ = Clip3( outputMinimum, outputMaximum, ( int ) ( resi + 64 ) >> 7 );
212
16.6M
  }
213
458k
}
214
215
// ====================================================================================================================
216
// TrQuant class member functions
217
// ====================================================================================================================
218
17.7k
TrQuant::TrQuant() : m_scalingListEnabled(false), m_quant( nullptr )
219
17.7k
{
220
  // allocate temporary buffers
221
17.7k
  m_plTempCoeff = ( TCoeff* ) xMalloc( TCoeff, MAX_TB_SIZEY * MAX_TB_SIZEY );
222
17.7k
  m_tmp         = ( TCoeff* ) xMalloc( TCoeff, MAX_TB_SIZEY * MAX_TB_SIZEY );
223
17.7k
  m_blk         = ( TCoeff* ) xMalloc( TCoeff, MAX_TB_SIZEY * MAX_TB_SIZEY );
224
225
124k
  for( int i = 0; i < NUM_TRAFO_MODES_MTS; i++ )
226
106k
  {
227
106k
    m_mtsCoeffs[i] = ( TCoeff* ) xMalloc( TCoeff, MAX_TB_SIZEY * MAX_TB_SIZEY );
228
106k
  }
229
230
17.7k
  {
231
17.7k
    m_invICT      = m_invICTMem + maxAbsIctMode;
232
17.7k
    m_invICT[ 0]  = invTransformCbCr< 0>;
233
17.7k
    m_invICT[ 1]  = invTransformCbCr< 1>;
234
17.7k
    m_invICT[-1]  = invTransformCbCr<-1>;
235
17.7k
    m_invICT[ 2]  = invTransformCbCr< 2>;
236
17.7k
    m_invICT[-2]  = invTransformCbCr<-2>;
237
17.7k
    m_invICT[ 3]  = invTransformCbCr< 3>;
238
17.7k
    m_invICT[-3]  = invTransformCbCr<-3>;
239
17.7k
    m_fwdICT      = m_fwdICTMem + maxAbsIctMode;
240
17.7k
    m_fwdICT[ 0]  = fwdTransformCbCr< 0>;
241
17.7k
    m_fwdICT[ 1]  = fwdTransformCbCr< 1>;
242
17.7k
    m_fwdICT[-1]  = fwdTransformCbCr<-1>;
243
17.7k
    m_fwdICT[ 2]  = fwdTransformCbCr< 2>;
244
17.7k
    m_fwdICT[-2]  = fwdTransformCbCr<-2>;
245
17.7k
    m_fwdICT[ 3]  = fwdTransformCbCr< 3>;
246
17.7k
    m_fwdICT[-3]  = fwdTransformCbCr<-3>;
247
17.7k
  }
248
249
17.7k
  m_invLfnstNxN = xInvLfnstNxNCore;
250
17.7k
  m_fwdLfnstNxN = xFwdLfnstNxNCore;
251
252
#if defined( TARGET_SIMD_X86 ) && ENABLE_SIMD_TRAFO
253
  initTrQuantX86();
254
#endif
255
17.7k
}
256
257
TrQuant::~TrQuant()
258
17.7k
{
259
17.7k
  if( m_quant )
260
17.7k
  {
261
17.7k
    delete m_quant;
262
17.7k
    m_quant = nullptr;
263
17.7k
  }
264
265
  // delete temporary buffers
266
17.7k
  if( m_plTempCoeff )
267
17.7k
  {
268
17.7k
    xFree( m_plTempCoeff );
269
17.7k
    m_plTempCoeff = nullptr;
270
17.7k
  }
271
272
17.7k
  if( m_blk )
273
17.7k
  {
274
17.7k
    xFree( m_blk );
275
17.7k
    m_blk = nullptr;
276
17.7k
  }
277
278
17.7k
  if( m_tmp )
279
17.7k
  {
280
17.7k
    xFree( m_tmp );
281
17.7k
    m_tmp = nullptr;
282
17.7k
  }
283
284
124k
  for( int i = 0; i < NUM_TRAFO_MODES_MTS; i++ )
285
106k
  {
286
106k
     xFree( m_mtsCoeffs[i] );
287
106k
  }
288
17.7k
}
289
290
void TrQuant::xDeQuant(const TransformUnit& tu,
291
                             CoeffBuf      &dstCoeff,
292
                       const ComponentID   &compID,
293
                       const QpParam       &cQP)
294
752k
{
295
752k
  PROFILER_SCOPE_AND_STAGE( 1, _TPROF, P_DEQUANT );
296
752k
  m_quant->dequant( tu, dstCoeff, compID, cQP );
297
752k
}
298
299
void TrQuant::init( const Quant* otherQuant,
300
                    const int  rdoq,
301
                    const bool bUseRDOQTS,
302
                    const bool scalingListsEnabled,
303
                    const bool bEnc,
304
                    const int  thrVal
305
)
306
17.7k
{
307
17.7k
  m_bEnc = bEnc;
308
309
17.7k
  delete m_quant;
310
17.7k
  m_quant = nullptr;
311
312
17.7k
  m_quant = new(std::nothrow) DepQuant( otherQuant, bEnc, scalingListsEnabled );
313
17.7k
  CHECK( !m_quant, "allocation failed" );
314
17.7k
  m_quant->init( rdoq, bUseRDOQTS, thrVal );
315
17.7k
}
316
317
318
void TrQuant::invTransformNxN( TransformUnit& tu, const ComponentID compID, PelBuf& pResi, const QpParam& cQP )
319
752k
{
320
752k
  const CompArea& area    = tu.blocks[compID];
321
752k
  const uint32_t uiWidth  = area.width;
322
752k
  const uint32_t uiHeight = area.height;
323
324
752k
  CHECK( uiWidth > tu.cs->sps->getMaxTbSize() || uiHeight > tu.cs->sps->getMaxTbSize(), "Maximal allowed transformation size exceeded!" );
325
326
752k
  {
327
752k
    CoeffBuf tempCoeff = CoeffBuf( m_plTempCoeff, area );
328
752k
    xDeQuant( tu, tempCoeff, compID, cQP );
329
330
752k
    DTRACE_COEFF_BUF( D_TCOEFF, tempCoeff, tu, tu.cu->predMode, compID );
331
332
752k
    if (tu.cs->sps->LFNST)
333
752k
    {
334
752k
      xInvLfnst(tu, compID);
335
752k
    }
336
752k
    if (tu.mtsIdx[compID] == MTS_SKIP)
337
44.3k
    {
338
44.3k
      xITransformSkip(tempCoeff, pResi, tu, compID);
339
44.3k
    }
340
708k
    else
341
708k
    {
342
708k
      xIT(tu, compID, tempCoeff, pResi);
343
708k
    }
344
752k
  }
345
346
  //DTRACE_BLOCK_COEFF(tu.getCoeffs(compID), tu, tu.cu->predMode, compID);
347
752k
  DTRACE_PEL_BUF( D_RESIDUALS, pResi, tu, tu.cu->predMode, compID);
348
752k
}
349
350
std::pair<int64_t,int64_t> TrQuant::fwdTransformICT( const TransformUnit& tu, const PelBuf& resCb, const PelBuf& resCr, PelBuf& resC1, PelBuf& resC2, int jointCbCr )
351
972k
{
352
972k
  CHECK( Size(resCb) != Size(resCr), "resCb and resCr have different sizes" );
353
972k
  CHECK( Size(resCb) != Size(resC1), "resCb and resC1 have different sizes" );
354
972k
  CHECK( Size(resCb) != Size(resC2), "resCb and resC2 have different sizes" );
355
972k
  return (*m_fwdICT[ TU::getICTMode(tu, jointCbCr) ])( resCb, resCr, resC1, resC2 );
356
972k
}
357
358
void TrQuant::invTransformICT( const TransformUnit& tu, PelBuf& resCb, PelBuf& resCr )
359
240k
{
360
240k
  CHECK( Size(resCb) != Size(resCr), "resCb and resCr have different sizes" );
361
240k
  (*m_invICT[ TU::getICTMode(tu) ])( resCb, resCr );
362
240k
}
363
364
std::vector<int> TrQuant::selectICTCandidates( const TransformUnit& tu, CompStorage* resCb, CompStorage* resCr )
365
243k
{
366
243k
  CHECK( !resCb[0].valid() || !resCr[0].valid(), "standard components are not valid" );
367
368
243k
  if( !CU::isIntra( *tu.cu ) )
369
0
  {
370
0
    int cbfMask = 3;
371
0
    fwdTransformICT( tu, resCb[0], resCr[0], resCb[cbfMask], resCr[cbfMask], cbfMask );
372
0
    std::vector<int> cbfMasksToTest;
373
0
    cbfMasksToTest.push_back( cbfMask );
374
0
    return cbfMasksToTest;
375
0
  }
376
377
243k
  std::pair<int64_t,int64_t> pairDist[4];
378
1.21M
  for( int cbfMask = 0; cbfMask < 4; cbfMask++ )
379
972k
  {
380
972k
    pairDist[cbfMask] = fwdTransformICT( tu, resCb[0], resCr[0], resCb[cbfMask], resCr[cbfMask], cbfMask );
381
972k
  }
382
383
243k
  std::vector<int> cbfMasksToTest;
384
243k
  int64_t minDist1  = std::min<int64_t>( pairDist[0].first, pairDist[0].second );
385
243k
  int64_t minDist2  = std::numeric_limits<int64_t>::max();
386
243k
  int     cbfMask1  = 0;
387
243k
  int     cbfMask2  = 0;
388
243k
  for( int cbfMask : { 1, 2, 3 } )
389
729k
  {
390
729k
    if( pairDist[cbfMask].first < minDist1 )
391
482k
    {
392
482k
      cbfMask2  = cbfMask1; minDist2  = minDist1;
393
482k
      cbfMask1  = cbfMask;  minDist1  = pairDist[cbfMask1].first;
394
482k
    }
395
247k
    else if( pairDist[cbfMask].first < minDist2 )
396
243k
    {
397
243k
      cbfMask2  = cbfMask;  minDist2  = pairDist[cbfMask2].first;
398
243k
    }
399
729k
  }
400
243k
  if( cbfMask1 )
401
243k
  {
402
243k
    cbfMasksToTest.push_back( cbfMask1 );
403
243k
  }
404
243k
  if( cbfMask2 && ( ( minDist2 < (9*minDist1)/8 ) || ( !cbfMask1 && minDist2 < (3*minDist1)/2 ) ) )
405
0
  {
406
0
    cbfMasksToTest.push_back( cbfMask2 );
407
0
  }
408
409
243k
  return cbfMasksToTest;
410
243k
}
411
412
413
414
// ------------------------------------------------------------------------------------------------
415
// Logical transform
416
// ------------------------------------------------------------------------------------------------
417
void TrQuant::xSetTrTypes( const TransformUnit& tu, const ComponentID compID, const int width, const int height, int &trTypeHor, int &trTypeVer )
418
2.43M
{
419
2.43M
  const bool isISP = CU::isIntra(*tu.cu) && tu.cu->ispMode && isLuma(compID);
420
2.43M
  if (isISP && tu.cu->lfnstIdx)
421
17.4k
  {
422
17.4k
    return;
423
17.4k
  }
424
2.41M
  if (!tu.cs->sps->MTS)
425
0
  {
426
0
    return;
427
0
  }
428
2.41M
  if (CU::isIntra(*tu.cu) && isLuma(compID) && ((tu.cs->sps->getUseImplicitMTS() && tu.cu->lfnstIdx == 0 && tu.cu->mipFlag == 0) || tu.cu->ispMode))
429
76.3k
  {
430
76.3k
    if (width >= 4 && width <= 16)
431
34.7k
      trTypeHor = DST7;
432
76.3k
    if (height >= 4 && height <= 16)
433
34.0k
      trTypeVer = DST7;
434
76.3k
  }
435
2.33M
  else if( tu.cs->sps->MTS && tu.cu->sbtInfo && isLuma(compID)/*isSBT*/ )
436
0
  {
437
0
    const uint8_t sbtIdx = CU::getSbtIdx( tu.cu->sbtInfo );
438
0
    const uint8_t sbtPos = CU::getSbtPos( tu.cu->sbtInfo );
439
440
0
    if( sbtIdx == SBT_VER_HALF || sbtIdx == SBT_VER_QUAD )
441
0
    {
442
0
      assert( tu.lwidth() <= MTS_INTER_MAX_CU_SIZE );
443
0
      if( tu.lheight() > MTS_INTER_MAX_CU_SIZE )
444
0
      {
445
0
        trTypeHor = trTypeVer = DCT2;
446
0
      }
447
0
      else
448
0
      {
449
0
        if( sbtPos == SBT_POS0 )  { trTypeHor = DCT8;  trTypeVer = DST7; }
450
0
        else                      { trTypeHor = DST7;  trTypeVer = DST7; }
451
0
      }
452
0
    }
453
0
    else
454
0
    {
455
0
      assert( tu.lheight() <= MTS_INTER_MAX_CU_SIZE );
456
0
      if( tu.lwidth() > MTS_INTER_MAX_CU_SIZE )
457
0
      {
458
0
        trTypeHor = trTypeVer = DCT2;
459
0
      }
460
0
      else
461
0
      {
462
0
        if( sbtPos == SBT_POS0 )  { trTypeHor = DST7;  trTypeVer = DCT8; }
463
0
        else                      { trTypeHor = DST7;  trTypeVer = DST7; }
464
0
      }
465
0
    }
466
0
  }
467
2.41M
  const bool isExplicitMTS = (CU::isIntra(*tu.cu) ? tu.cs->sps->MTS : tu.cs->sps->MTSInter && CU::isInter(*tu.cu)) && isLuma(compID);
468
2.41M
  if (isExplicitMTS)
469
180k
  {
470
180k
    if (tu.mtsIdx[compID] > MTS_SKIP)
471
0
    {
472
0
      int indHor = (tu.mtsIdx[compID] - MTS_DST7_DST7) & 1;
473
0
      int indVer = (tu.mtsIdx[compID] - MTS_DST7_DST7) >> 1;
474
0
      trTypeHor  = indHor ? DCT8 : DST7;
475
0
      trTypeVer  = indVer ? DCT8 : DST7;
476
0
    }
477
180k
  }
478
2.41M
}
479
480
481
void TrQuant::xT( const TransformUnit& tu, const ComponentID compID, const CPelBuf& resi, CoeffBuf& dstCoeff, const int width, const int height )
482
1.72M
{
483
1.72M
  PROFILER_SCOPE_AND_STAGE( 1, _TPROF, P_TRAFO );
484
485
1.72M
  const unsigned maxLog2TrDynamicRange  = tu.cs->sps->getMaxLog2TrDynamicRange();
486
1.72M
  const unsigned bitDepth               = tu.cs->sps->bitDepths[toChannelType( compID )];
487
1.72M
  const int      TRANSFORM_MATRIX_SHIFT = g_transformMatrixShift[TRANSFORM_FORWARD];
488
1.72M
  const uint32_t transformWidthIndex    = Log2(width ) - 1;  // nLog2WidthMinus1, since transform start from 2-point
489
1.72M
  const uint32_t transformHeightIndex   = Log2(height) - 1;  // nLog2HeightMinus1, since transform start from 2-point
490
491
1.72M
  int trTypeHor = DCT2;
492
1.72M
  int trTypeVer = DCT2;
493
494
1.72M
  xSetTrTypes( tu, compID, width, height, trTypeHor, trTypeVer );
495
496
1.72M
  int  skipWidth  = ( trTypeHor != DCT2 && width  == 32 ) ? 16 : width  > JVET_C0024_ZERO_OUT_TH ? width  - JVET_C0024_ZERO_OUT_TH : 0;
497
1.72M
  int  skipHeight = ( trTypeVer != DCT2 && height == 32 ) ? 16 : height > JVET_C0024_ZERO_OUT_TH ? height - JVET_C0024_ZERO_OUT_TH : 0;
498
499
1.72M
  if( tu.cu->lfnstIdx )
500
1.07M
  {
501
1.07M
    if ((width == 4 && height > 4) || (width > 4 && height == 4))
502
308k
    {
503
308k
      skipWidth  = width - 4;
504
308k
      skipHeight = height - 4;
505
308k
    }
506
765k
    else if ((width >= 8 && height >= 8))
507
698k
    {
508
698k
      skipWidth  = width - 8;
509
698k
      skipHeight = height - 8;
510
698k
    }
511
1.07M
  }
512
513
1.72M
  TCoeff* block = m_blk;
514
1.72M
  TCoeff* tmp   = m_tmp;
515
516
1.72M
  const Pel* resiBuf    = resi.buf;
517
1.72M
  const int  resiStride = resi.stride;
518
519
1.72M
#if ENABLE_SIMD_TRAFO
520
1.72M
  if( width & 3 )
521
0
#endif
522
0
  {
523
0
    for( int y = 0; y < height; y++ )
524
0
    {
525
0
      for( int x = 0; x < width; x++ )
526
0
      {
527
0
        block[( y * width ) + x] = resiBuf[( y * resiStride ) + x];
528
0
      }
529
0
    }
530
0
  }
531
1.72M
#if ENABLE_SIMD_TRAFO
532
1.72M
  else if( width & 7 )
533
328k
  {
534
328k
    g_tCoeffOps.cpyCoeff4( resiBuf, resiStride, block, width, height );
535
328k
  }
536
1.39M
  else
537
1.39M
  {
538
1.39M
    g_tCoeffOps.cpyCoeff8( resiBuf, resiStride, block, width, height );
539
1.39M
  }
540
1.72M
#endif //ENABLE_SIMD_TRAFO
541
542
1.72M
  if (width > 1 && height > 1)
543
1.72M
  {
544
1.72M
    const int shift_1st = ((Log2(width )) + bitDepth + TRANSFORM_MATRIX_SHIFT) - maxLog2TrDynamicRange;
545
1.72M
    const int shift_2nd =  (Log2(height))            + TRANSFORM_MATRIX_SHIFT;
546
1.72M
    CHECK( shift_1st < 0, "Negative shift" );
547
1.72M
    CHECK( shift_2nd < 0, "Negative shift" );
548
1.72M
    fastFwdTrans[trTypeHor][transformWidthIndex](block, tmp, shift_1st, height, 0, skipWidth);
549
1.72M
    fastFwdTrans[trTypeVer][transformHeightIndex](tmp, dstCoeff.buf, shift_2nd, width, skipWidth, skipHeight);
550
1.72M
  }
551
218
  else if (height == 1)   // 1-D horizontal transform
552
300
  {
553
300
    const int shift = ((Log2(width )) + bitDepth + TRANSFORM_MATRIX_SHIFT) - maxLog2TrDynamicRange;
554
300
    CHECK( shift < 0, "Negative shift" );
555
300
    fastFwdTrans[trTypeHor][transformWidthIndex](block, dstCoeff.buf, shift, 1, 0, skipWidth);
556
300
  }
557
18.4E
  else   // if (iWidth == 1) //1-D vertical transform
558
18.4E
  {
559
18.4E
    int shift = ((floorLog2(height)) + bitDepth + TRANSFORM_MATRIX_SHIFT) - maxLog2TrDynamicRange;
560
18.4E
    CHECK(shift < 0, "Negative shift");
561
18.4E
    CHECKD((transformHeightIndex < 0), "There is a problem with the height.");
562
18.4E
    fastFwdTrans[trTypeVer][transformHeightIndex](block, dstCoeff.buf, shift, 1, 0, skipHeight);
563
18.4E
  }
564
1.72M
}
565
566
567
void TrQuant::xIT( const TransformUnit& tu, const ComponentID compID, const CCoeffBuf& pCoeff, PelBuf& pResidual )
568
708k
{
569
708k
  PROFILER_SCOPE_AND_STAGE( 1, _TPROF, P_TRAFO );
570
571
708k
  const int      width                  = pCoeff.width;
572
708k
  const int      height                 = pCoeff.height;
573
708k
  const unsigned maxLog2TrDynamicRange  = tu.cs->sps->getMaxLog2TrDynamicRange();
574
708k
  const unsigned bitDepth               = tu.cs->sps->bitDepths[toChannelType( compID )];
575
708k
  const int      TRANSFORM_MATRIX_SHIFT = g_transformMatrixShift[TRANSFORM_INVERSE];
576
708k
  const TCoeff   clipMinimum            = -( 1 << maxLog2TrDynamicRange );
577
708k
  const TCoeff   clipMaximum            =  ( 1 << maxLog2TrDynamicRange ) - 1;
578
708k
  const uint32_t transformWidthIndex    = Log2(width )- 1;                                // nLog2WidthMinus1, since transform start from 2-point
579
708k
  const uint32_t transformHeightIndex   = Log2(height) - 1;                                // nLog2HeightMinus1, since transform start from 2-point
580
581
582
708k
  int trTypeHor = DCT2;
583
708k
  int trTypeVer = DCT2;
584
585
708k
  xSetTrTypes( tu, compID, width, height, trTypeHor, trTypeVer );
586
587
708k
  int skipWidth  = ( trTypeHor != DCT2 && width  == 32 ) ? 16 : width  > JVET_C0024_ZERO_OUT_TH ? width  - JVET_C0024_ZERO_OUT_TH : 0;
588
708k
  int skipHeight = ( trTypeVer != DCT2 && height == 32 ) ? 16 : height > JVET_C0024_ZERO_OUT_TH ? height - JVET_C0024_ZERO_OUT_TH : 0;
589
590
708k
  if (tu.cs->sps->LFNST && tu.cu->lfnstIdx)
591
458k
  {
592
458k
    if ((width == 4 && height > 4) || (width > 4 && height == 4))
593
133k
    {
594
133k
      skipWidth = width - 4;
595
133k
      skipHeight = height - 4;
596
133k
    }
597
325k
    else if ((width >= 8 && height >= 8))
598
289k
    {
599
289k
      skipWidth = width - 8;
600
289k
      skipHeight = height - 8;
601
289k
    }
602
458k
  }
603
604
708k
  TCoeff *block = m_blk;
605
708k
  TCoeff *tmp   = m_tmp;
606
708k
  if (width > 1 && height > 1)   // 2-D transform
607
708k
  {
608
708k
    const int shift_1st =   TRANSFORM_MATRIX_SHIFT + 1; // 1 has been added to shift_1st at the expense of shift_2nd
609
708k
    const int shift_2nd = ( TRANSFORM_MATRIX_SHIFT + maxLog2TrDynamicRange - 1 ) - bitDepth;
610
708k
    CHECK( shift_1st < 0, "Negative shift" );
611
708k
    CHECK( shift_2nd < 0, "Negative shift" );
612
708k
    fastInvTrans[trTypeVer][transformHeightIndex](pCoeff.buf, tmp, shift_1st, width, skipWidth, skipHeight, clipMinimum, clipMaximum);
613
708k
    fastInvTrans[trTypeHor][transformWidthIndex](tmp, block, shift_2nd, height, 0, skipWidth, clipMinimum, clipMaximum);
614
708k
  }
615
75
  else if (width == 1)   // 1-D vertical transform
616
0
  {
617
0
    int shift = (TRANSFORM_MATRIX_SHIFT + maxLog2TrDynamicRange - 1) - bitDepth;
618
0
    CHECK(shift < 0, "Negative shift");
619
0
    fastInvTrans[trTypeVer][transformHeightIndex](pCoeff.buf, block, shift + 1, 1, 0, skipHeight, clipMinimum, clipMaximum);
620
0
  }
621
75
  else   // if(iHeight == 1) //1-D horizontal transform
622
75
  {
623
75
    const int shift = (TRANSFORM_MATRIX_SHIFT + maxLog2TrDynamicRange - 1) - bitDepth;
624
75
    CHECK(shift < 0, "Negative shift");
625
75
    fastInvTrans[trTypeHor][transformWidthIndex](pCoeff.buf, block, shift + 1, 1, 0, skipWidth, clipMinimum, clipMaximum);
626
75
  }
627
628
708k
#if ENABLE_SIMD_TRAFO
629
708k
  if( width & 3 )
630
0
#endif //ENABLE_SIMD_TRAFO
631
0
  {
632
0
    Pel       *dst    = pResidual.buf;
633
0
    ptrdiff_t  stride = pResidual.stride;
634
635
0
    for( int y = 0; y < height; y++ )
636
0
    {
637
0
      for( int x = 0; x < width; x++ )
638
0
      {
639
0
        dst[x] = ( Pel ) *block++;
640
0
      }
641
642
0
      dst += stride;
643
0
    }
644
0
  }
645
708k
#if ENABLE_SIMD_TRAFO
646
708k
  else if( width & 7 )
647
154k
  {
648
154k
    g_tCoeffOps.cpyResi4( block, pResidual.buf, pResidual.stride, width, height );
649
154k
  }
650
554k
  else
651
554k
  {
652
554k
    g_tCoeffOps.cpyResi8( block, pResidual.buf, pResidual.stride, width, height );
653
554k
  }
654
708k
#endif //ENABLE_SIMD_TRAFO
655
708k
}
656
657
/** Wrapper function between HM interface and core NxN transform skipping
658
 */
659
void TrQuant::xITransformSkip(const CCoeffBuf& pCoeff,
660
  PelBuf& pResidual,
661
  const TransformUnit& tu,
662
  const ComponentID compID)
663
44.3k
{
664
44.3k
  const CompArea& area = tu.blocks[compID];
665
44.3k
  const int width = area.width;
666
44.3k
  const int height = area.height;
667
668
444k
  for (uint32_t y = 0; y < height; y++)
669
399k
  {
670
4.42M
    for (uint32_t x = 0; x < width; x++)
671
4.02M
    {
672
4.02M
      pResidual.at(x, y) = Pel(pCoeff.at(x, y));
673
4.02M
    }
674
399k
  }
675
44.3k
}
676
677
void TrQuant::xQuant(TransformUnit& tu, const ComponentID compID, const CCoeffBuf& pSrc, TCoeff &uiAbsSum, const QpParam& cQP, const Ctx& ctx)
678
1.81M
{
679
1.81M
  PROFILER_SCOPE_AND_STAGE( 1, _TPROF, P_QUANT );
680
1.81M
  m_quant->quant( tu, compID, pSrc, uiAbsSum, cQP, ctx );
681
#if ENABLE_MEASURE_SEARCH_SPACE
682
683
  g_searchSpaceAcc.addQuant( tu, toChannelType( compID ) );
684
#endif
685
1.81M
}
686
687
688
void TrQuant::transformNxN(TransformUnit &tu, const ComponentID compID, const QpParam &cQP, TCoeff &uiAbsSum, const Ctx &ctx, const bool loadTr)
689
1.81M
{
690
1.81M
        CodingStructure &cs = *tu.cs;
691
1.81M
  const CompArea& rect      = tu.blocks[compID];
692
1.81M
  const uint32_t uiWidth        = rect.width;
693
1.81M
  const uint32_t uiHeight       = rect.height;
694
695
1.81M
  const CPelBuf resiBuf     = cs.getResiBuf(rect);
696
697
1.81M
  if( tu.noResidual )
698
0
  {
699
0
    uiAbsSum = 0;
700
0
    TU::setCbfAtDepth( tu, compID, tu.depth, uiAbsSum > 0 );
701
0
    return;
702
0
  }
703
1.81M
  if (tu.cu->bdpcmM[toChannelType(compID)])
704
91.3k
  {
705
91.3k
    tu.mtsIdx[compID] = MTS_SKIP;
706
91.3k
  }
707
708
1.81M
  uiAbsSum = 0;
709
1.81M
  CHECK( cs.sps->getMaxTbSize() < uiWidth, "Unsupported transformation size" );
710
711
1.81M
  CoeffBuf tempCoeff(loadTr ? m_mtsCoeffs[tu.mtsIdx[compID]] : m_plTempCoeff, rect);
712
1.81M
  if (!loadTr)
713
1.79M
  {
714
1.79M
    DTRACE_PEL_BUF( D_RESIDUALS, resiBuf, tu, tu.cu->predMode, compID );
715
1.79M
    if (tu.mtsIdx[compID] == MTS_SKIP)
716
91.3k
    {
717
91.3k
      xTransformSkip(tu, compID, resiBuf, tempCoeff.buf);
718
91.3k
    }
719
1.70M
    else
720
1.70M
    {
721
1.70M
      xT(tu, compID, resiBuf, tempCoeff, uiWidth, uiHeight);
722
1.70M
    }
723
1.79M
  }
724
1.81M
  if (cs.sps->LFNST)
725
1.81M
  {
726
1.81M
    xFwdLfnst(tu, compID, loadTr);
727
1.81M
  }
728
1.81M
  DTRACE_COEFF_BUF( D_TCOEFF, tempCoeff, tu, tu.cu->predMode, compID );
729
730
1.81M
  xQuant( tu, compID, tempCoeff, uiAbsSum, cQP, ctx );
731
732
1.81M
  DTRACE_COEFF_BUF( D_TCOEFF, tu.getCoeffs( compID ), tu, tu.cu->predMode, compID );
733
734
  // set coded block flag (CBF)
735
1.81M
  TU::setCbfAtDepth (tu, compID, tu.depth, uiAbsSum > 0);
736
1.81M
}
737
738
void TrQuant::checktransformsNxN( TransformUnit &tu, std::vector<TrMode> *trModes, const int maxCand, const ComponentID compID)
739
17.0k
{
740
17.0k
  CodingStructure &cs     = *tu.cs;
741
17.0k
  const CompArea& rect    = tu.blocks[compID];
742
17.0k
  const uint32_t   width  = rect.width;
743
17.0k
  const uint32_t   height = rect.height;
744
745
17.0k
  const CPelBuf resiBuf = cs.getResiBuf(rect);
746
747
17.0k
  CHECK(cs.sps->getMaxTbSize() < width, "Unsupported transformation size");
748
17.0k
  int                           pos = 0;
749
17.0k
  std::vector<TrCost>           trCosts;
750
17.0k
  std::vector<TrMode>::iterator it      = trModes->begin();
751
17.0k
  const double                  facBB[] = { 1.2, 1.3, 1.3, 1.4, 1.5 };
752
51.0k
  while (it != trModes->end())
753
34.0k
  {
754
34.0k
    tu.mtsIdx[compID] = it->first;
755
34.0k
    CoeffBuf tempCoeff(m_mtsCoeffs[tu.mtsIdx[compID]], rect);
756
34.0k
    if (tu.noResidual)
757
0
    {
758
0
      int sumAbs = 0;
759
0
      trCosts.push_back(TrCost(sumAbs, pos++));
760
0
      it++;
761
0
      continue;
762
0
    }
763
34.0k
    if (tu.mtsIdx[compID] == MTS_SKIP)
764
17.0k
    {
765
17.0k
      xTransformSkip(tu, compID, resiBuf, tempCoeff.buf);
766
17.0k
    }
767
17.0k
    else
768
17.0k
    {
769
17.0k
      xT(tu, compID, resiBuf, tempCoeff, width, height);
770
17.0k
    }
771
772
34.0k
    int sumAbs = 0;
773
6.09M
    for (int pos = 0; pos < width * height; pos++)
774
6.05M
    {
775
6.05M
      sumAbs += abs(tempCoeff.buf[pos]);
776
6.05M
    }
777
778
34.0k
    double scaleSAD = 1.0;
779
34.0k
    if (tu.mtsIdx[compID] == MTS_SKIP && ((floorLog2(width) + floorLog2(height)) & 1) == 1)
780
7.15k
    {
781
7.15k
      scaleSAD = 1.0 / 1.414213562;   // compensate for not scaling transform skip coefficients by 1/sqrt(2)
782
7.15k
    }
783
34.0k
    if (tu.mtsIdx[compID] == MTS_SKIP)
784
17.0k
    {
785
17.0k
      int trShift = getTransformShift(tu.cu->slice->sps->bitDepths[CH_L], rect.size(), tu.cu->slice->sps->getMaxLog2TrDynamicRange());
786
17.0k
      scaleSAD *= pow(2, trShift);
787
17.0k
    }
788
34.0k
    trCosts.push_back(TrCost(int(std::min<double>(sumAbs * scaleSAD, std::numeric_limits<int>::max())), pos++));
789
34.0k
    it++;
790
34.0k
  }
791
792
17.0k
  int                           numTests = 0;
793
17.0k
  std::vector<TrCost>::iterator itC      = trCosts.begin();
794
17.0k
  const double                  fac      = facBB[std::max(0, floorLog2(std::max(width, height)) - 2)];
795
17.0k
  const double                  thr      = fac * trCosts.begin()->first;
796
17.0k
  const double                  thrTS    = trCosts.begin()->first;
797
51.0k
  while (itC != trCosts.end())
798
34.0k
  {
799
34.0k
    const bool testTr               = itC->first <= (trModes->at(itC->second).first == 1 ? thrTS : thr) && numTests <= maxCand;
800
34.0k
    trModes->at(itC->second).second = testTr;
801
34.0k
    numTests += testTr;
802
34.0k
    itC++;
803
34.0k
  }
804
17.0k
}
805
806
uint32_t TrQuant::xGetLFNSTIntraMode( const Area& tuArea, const uint32_t dirMode )
807
1.53M
{
808
1.53M
  if (dirMode < 2)
809
863k
  {
810
863k
    return dirMode;
811
863k
  }
812
813
669k
  static const int modeShift[] = { 0, 6, 10, 12, 14, 15 };
814
815
669k
  const int width  = int(tuArea.width);
816
669k
  const int height = int(tuArea.height);
817
818
669k
  if (width > height && dirMode < 2 + modeShift[floorLog2(width) - floorLog2(height)])
819
198
  {
820
198
    return dirMode + (VDIA_IDX - 1) + (NUM_EXT_LUMA_MODE >> 1);
821
198
  }
822
669k
  else if (height > width && dirMode > VDIA_IDX - modeShift[floorLog2(height) - floorLog2(width)])
823
61.8k
  {
824
61.8k
    return dirMode - (VDIA_IDX + 1) + (NUM_EXT_LUMA_MODE >> 1) + NUM_LUMA_MODE;
825
61.8k
  }
826
827
607k
  return dirMode;
828
669k
}
829
830
831
bool TrQuant::xGetTransposeFlag(uint32_t intraMode)
832
1.53M
{
833
1.53M
  return ((intraMode >= NUM_LUMA_MODE) && (intraMode >= (NUM_LUMA_MODE + (NUM_EXT_LUMA_MODE >> 1))))
834
1.53M
         || ((intraMode < NUM_LUMA_MODE) && (intraMode > DIA_IDX));
835
1.53M
}
836
837
838
void TrQuant::xInvLfnst(const TransformUnit &tu, const ComponentID compID)
839
752k
{
840
752k
  const CompArea &area     = tu.blocks[compID];
841
752k
  const uint32_t  width    = area.width;
842
752k
  const uint32_t  height   = area.height;
843
752k
  const uint32_t  lfnstIdx = tu.cu->lfnstIdx;
844
752k
  if (lfnstIdx && tu.mtsIdx[compID] != MTS_SKIP && (CU::isSepTree(*tu.cu) ? true : isLuma(compID)))
845
458k
  {
846
458k
    const CodingUnit& cu = *tu.cs->getCU(area.pos(), toChannelType(compID), TREE_D);
847
458k
    const bool         whge3 = width >= 8 && height >= 8;
848
458k
    const ScanElement *scan =
849
458k
      whge3
850
458k
        ? g_coefTopLeftDiagScan8x8[Log2(width)] 
851
458k
        : getScanOrder(SCAN_GROUPED_4x4, Log2(area.width), Log2(area.height));
852
458k
    uint32_t intraMode = CU::getFinalIntraMode(cu, toChannelType(compID));
853
854
458k
    if (CU::isLMCMode( cu.intraDir[toChannelType(compID)]))
855
75.6k
    {
856
75.6k
      intraMode = CU::getCoLocatedIntraLumaMode(cu);
857
75.6k
    }
858
458k
    if (CU::isMIP(cu, toChannelType(compID)))
859
4.48k
    {
860
4.48k
      intraMode = PLANAR_IDX;
861
4.48k
    }
862
458k
    CHECK(intraMode >= NUM_INTRA_MODE - 1, "Invalid intra mode");
863
864
458k
    if (lfnstIdx < 3)
865
458k
    {
866
458k
      if (tu.cu->ispMode && isLuma(compID))
867
6.81k
      {
868
6.81k
        intraMode = xGetLFNSTIntraMode(tu.cu->blocks[compID], intraMode);
869
6.81k
      }
870
452k
      else
871
452k
        intraMode = xGetLFNSTIntraMode(tu.blocks[compID], intraMode);
872
458k
      bool      transposeFlag = xGetTransposeFlag(intraMode);
873
458k
      const int sbSize        = whge3 ? 8 : 4;
874
458k
      bool      tu4x4Flag     = (width == 4 && height == 4);
875
458k
      bool      tu8x8Flag     = (width == 8 && height == 8);
876
458k
      TCoeff *  lfnstTemp;
877
458k
      TCoeff *  coeffTemp;
878
458k
      int       y;
879
458k
      lfnstTemp                  = m_tempInMatrix;   // inverse spectral rearrangement
880
458k
      coeffTemp                  = m_plTempCoeff;
881
458k
      TCoeff *           dst     = lfnstTemp;
882
458k
      const ScanElement *scanPtr = scan;
883
7.80M
      for (y = 0; y < 16; y++)
884
7.34M
      {
885
7.34M
        *dst++ = coeffTemp[scanPtr->idx];
886
7.34M
        scanPtr++;
887
7.34M
      }
888
889
458k
      m_invLfnstNxN( m_tempInMatrix, m_tempOutMatrix, g_lfnstLut[intraMode], lfnstIdx - 1, sbSize, ( tu4x4Flag || tu8x8Flag ) ? 8 : 16 );
890
891
458k
      lfnstTemp = m_tempOutMatrix;   // inverse spectral rearrangement
892
893
458k
      if (transposeFlag)
894
42.4k
      {
895
42.4k
        if (sbSize == 4)
896
8.63k
        {
897
43.1k
          for (y = 0; y < 4; y++)
898
34.5k
          {
899
34.5k
            coeffTemp[0] = lfnstTemp[0];
900
34.5k
            coeffTemp[1] = lfnstTemp[4];
901
34.5k
            coeffTemp[2] = lfnstTemp[8];
902
34.5k
            coeffTemp[3] = lfnstTemp[12];
903
34.5k
            lfnstTemp++;
904
34.5k
            coeffTemp += width;
905
34.5k
          }
906
8.63k
        }
907
33.7k
        else   // ( sbSize == 8 )
908
33.7k
        {
909
304k
          for (y = 0; y < 8; y++)
910
270k
          {
911
270k
            coeffTemp[0] = lfnstTemp[0];
912
270k
            coeffTemp[1] = lfnstTemp[8];
913
270k
            coeffTemp[2] = lfnstTemp[16];
914
270k
            coeffTemp[3] = lfnstTemp[24];
915
270k
            if (y < 4)
916
135k
            {
917
135k
              coeffTemp[4] = lfnstTemp[32];
918
135k
              coeffTemp[5] = lfnstTemp[36];
919
135k
              coeffTemp[6] = lfnstTemp[40];
920
135k
              coeffTemp[7] = lfnstTemp[44];
921
135k
            }
922
270k
            lfnstTemp++;
923
270k
            coeffTemp += width;
924
270k
          }
925
33.7k
        }
926
42.4k
      }
927
416k
      else
928
416k
      {
929
3.10M
        for (y = 0; y < sbSize; y++)
930
2.68M
        {
931
2.68M
          uint32_t uiStride = (y < 4) ? sbSize : 4;
932
2.68M
          ::memcpy(coeffTemp, lfnstTemp, uiStride * sizeof(TCoeff));
933
2.68M
          lfnstTemp += uiStride;
934
2.68M
          coeffTemp += width;
935
2.68M
        }
936
416k
      }
937
458k
    }
938
458k
  }
939
752k
}
940
941
942
void TrQuant::xFwdLfnst(const TransformUnit &tu, const ComponentID compID, const bool loadTr)
943
1.81M
{
944
1.81M
  const CompArea &area     = tu.blocks[compID];
945
1.81M
  const uint32_t  width    = area.width;
946
1.81M
  const uint32_t  height   = area.height;
947
1.81M
  const uint32_t  lfnstIdx = tu.cu->lfnstIdx;
948
1.81M
  if (lfnstIdx && tu.mtsIdx[compID] != MTS_SKIP && (CU::isSepTree(*tu.cu) ? true : isLuma(compID)))
949
1.07M
  {
950
1.07M
    const CodingUnit& cu = *tu.cs->getCU(area.pos(), toChannelType(compID), TREE_D);
951
1.07M
    const bool         whge3 = width >= 8 && height >= 8;
952
1.07M
    const ScanElement *scan =
953
1.07M
      whge3
954
1.07M
        ? g_coefTopLeftDiagScan8x8[Log2(width)] 
955
1.07M
        : getScanOrder(SCAN_GROUPED_4x4, Log2(area.width), Log2(area.height));   
956
1.07M
    uint32_t intraMode = CU::getFinalIntraMode(cu, toChannelType(compID));
957
958
1.07M
    if (CU::isLMCMode(cu.intraDir[toChannelType(compID)]))
959
95.6k
    {
960
95.6k
      intraMode = CU::getCoLocatedIntraLumaMode(cu);
961
95.6k
    }
962
1.07M
    if (CU::isMIP(cu, toChannelType(compID)))
963
7.34k
    {
964
7.34k
      intraMode = PLANAR_IDX;
965
7.34k
    }
966
1.07M
    CHECK(intraMode >= NUM_INTRA_MODE - 1, "Invalid intra mode");
967
968
1.07M
    if (lfnstIdx < 3)
969
1.07M
    {
970
1.07M
      if (tu.cu->ispMode && isLuma(compID))
971
10.6k
      {
972
10.6k
        intraMode = xGetLFNSTIntraMode(tu.cu->blocks[compID], intraMode);
973
10.6k
      }
974
1.06M
      else
975
1.06M
      {
976
1.06M
        intraMode = xGetLFNSTIntraMode(tu.blocks[compID], intraMode);
977
1.06M
      }
978
1.07M
      bool      transposeFlag = xGetTransposeFlag(intraMode);
979
1.07M
      const int sbSize        = whge3 ? 8 : 4;
980
1.07M
      bool      tu4x4Flag     = (width == 4 && height == 4);
981
1.07M
      bool      tu8x8Flag     = (width == 8 && height == 8);
982
1.07M
      TCoeff*   lfnstTemp;
983
1.07M
      TCoeff*   coeffTemp;
984
1.07M
      TCoeff*   tempCoeff     = loadTr ? m_mtsCoeffs[tu.mtsIdx[compID]] : m_plTempCoeff;
985
986
1.07M
      int y;
987
1.07M
      lfnstTemp = m_tempInMatrix;   // forward low frequency non-separable transform
988
1.07M
      coeffTemp = tempCoeff;
989
990
1.07M
      if (transposeFlag)
991
229k
      {
992
229k
        if (sbSize == 4)
993
68.3k
        {
994
341k
          for (y = 0; y < 4; y++)
995
273k
          {
996
273k
            lfnstTemp[0]  = coeffTemp[0];
997
273k
            lfnstTemp[4]  = coeffTemp[1];
998
273k
            lfnstTemp[8]  = coeffTemp[2];
999
273k
            lfnstTemp[12] = coeffTemp[3];
1000
273k
            lfnstTemp++;
1001
273k
            coeffTemp += width;
1002
273k
          }
1003
68.3k
        }
1004
161k
        else   // ( sbSize == 8 )
1005
161k
        {
1006
1.45M
          for (y = 0; y < 8; y++)
1007
1.28M
          {
1008
1.28M
            lfnstTemp[0]  = coeffTemp[0];
1009
1.28M
            lfnstTemp[8]  = coeffTemp[1];
1010
1.28M
            lfnstTemp[16] = coeffTemp[2];
1011
1.28M
            lfnstTemp[24] = coeffTemp[3];
1012
1.28M
            if (y < 4)
1013
644k
            {
1014
644k
              lfnstTemp[32] = coeffTemp[4];
1015
644k
              lfnstTemp[36] = coeffTemp[5];
1016
644k
              lfnstTemp[40] = coeffTemp[6];
1017
644k
              lfnstTemp[44] = coeffTemp[7];
1018
644k
            }
1019
1.28M
            lfnstTemp++;
1020
1.28M
            coeffTemp += width;
1021
1.28M
          }
1022
161k
        }
1023
229k
      }
1024
844k
      else
1025
844k
      {
1026
6.37M
        for (y = 0; y < sbSize; y++)
1027
5.52M
        {
1028
5.52M
          uint32_t uiStride = (y < 4) ? sbSize : 4;
1029
5.52M
          ::memcpy(lfnstTemp, coeffTemp, uiStride * sizeof(TCoeff));
1030
5.52M
          lfnstTemp += uiStride;
1031
5.52M
          coeffTemp += width;
1032
5.52M
        }
1033
844k
      }
1034
1035
1.07M
      m_fwdLfnstNxN( m_tempInMatrix, m_tempOutMatrix, g_lfnstLut[intraMode], lfnstIdx - 1, sbSize, ( tu4x4Flag || tu8x8Flag ) ? 8 : 16 );
1036
1037
1.07M
      lfnstTemp                        = m_tempOutMatrix;   // forward spectral rearrangement
1038
1.07M
      coeffTemp                        = tempCoeff;
1039
1.07M
      const ScanElement *scanPtr       = scan;
1040
1.07M
      int                lfnstCoeffNum = (sbSize == 4) ? sbSize * sbSize : 48;
1041
40.6M
      for (y = 0; y < lfnstCoeffNum; y++)
1042
39.5M
      {
1043
39.5M
        coeffTemp[scanPtr->idx] = *lfnstTemp++;
1044
39.5M
        scanPtr++;
1045
39.5M
      }
1046
1.07M
    }
1047
1.07M
  }
1048
1.81M
}
1049
1050
void TrQuant::xTransformSkip(const TransformUnit& tu, const ComponentID& compID, const CPelBuf& resi, TCoeff* psCoeff)
1051
108k
{
1052
108k
  const CompArea& rect = tu.blocks[compID];
1053
108k
  const uint32_t width = rect.width;
1054
108k
  const uint32_t height = rect.height;
1055
1056
1.18M
  for (uint32_t y = 0, coefficientIndex = 0; y < height; y++)
1057
1.07M
  {
1058
12.8M
    for (uint32_t x = 0; x < width; x++, coefficientIndex++)
1059
11.7M
    {
1060
11.7M
      psCoeff[coefficientIndex] = TCoeff(resi.at(x, y));
1061
11.7M
    }
1062
1.07M
  }
1063
108k
}
1064
} // namespace vvenc
1065
1066
//! \}
1067