Coverage Report

Created: 2026-05-30 06:10

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/work/vvenc/source/Lib/CommonLib/TrQuant.cpp
Line
Count
Source
1
/* -----------------------------------------------------------------------------
2
The copyright in this software is being made available under the Clear BSD
3
License, included below. No patent rights, trademark rights and/or 
4
other Intellectual Property Rights other than the copyrights concerning 
5
the Software are granted under this license.
6
7
The Clear BSD License
8
9
Copyright (c) 2019-2026, Fraunhofer-Gesellschaft zur Förderung der angewandten Forschung e.V. & The VVenC Authors.
10
All rights reserved.
11
12
Redistribution and use in source and binary forms, with or without modification,
13
are permitted (subject to the limitations in the disclaimer below) provided that
14
the following conditions are met:
15
16
     * Redistributions of source code must retain the above copyright notice,
17
     this list of conditions and the following disclaimer.
18
19
     * Redistributions in binary form must reproduce the above copyright
20
     notice, this list of conditions and the following disclaimer in the
21
     documentation and/or other materials provided with the distribution.
22
23
     * Neither the name of the copyright holder nor the names of its
24
     contributors may be used to endorse or promote products derived from this
25
     software without specific prior written permission.
26
27
NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE GRANTED BY
28
THIS LICENSE. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
29
CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
30
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
31
PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
32
CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
33
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
34
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
35
BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
36
IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
37
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
38
POSSIBILITY OF SUCH DAMAGE.
39
40
41
------------------------------------------------------------------------------------------- */
42
43
44
/** \file     TrQuant.cpp
45
    \brief    transform and quantization class
46
*/
47
48
#include "TrQuant.h"
49
#include "TrQuant_EMT.h"
50
#include "QuantRDOQ.h"
51
#include "DepQuant.h"
52
#include "UnitTools.h"
53
#include "ContextModelling.h"
54
#include "CodingStructure.h"
55
#include "dtrace_buffer.h"
56
#include "TimeProfiler.h"
57
#include "SearchSpaceCounter.h"
58
59
#include <stdlib.h>
60
#include <memory.h>
61
62
//! \ingroup CommonLib
63
//! \{
64
65
namespace vvenc {
66
67
struct coeffGroupRDStats
68
{
69
  int    iNNZbeforePos0;
70
  double d64CodedLevelandDist; // distortion and level cost only
71
  double d64UncodedDist;    // all zero coded block distortion
72
  double d64SigCost;
73
  double d64SigCost_0;
74
};
75
76
FwdTrans *const fastFwdTrans[NUM_TRANS_TYPE][g_numTransformMatrixSizes] =
77
{
78
  { fastForwardDCT2_B2, fastForwardDCT2_B4, fastForwardDCT2_B8, fastForwardDCT2_B16, fastForwardDCT2_B32, fastForwardDCT2_B64 },
79
  { nullptr,            fastForwardDCT8_B4, fastForwardDCT8_B8, fastForwardDCT8_B16, fastForwardDCT8_B32, nullptr },
80
  { nullptr,            fastForwardDST7_B4, fastForwardDST7_B8, fastForwardDST7_B16, fastForwardDST7_B32, nullptr },
81
};
82
83
InvTrans *const fastInvTrans[NUM_TRANS_TYPE][g_numTransformMatrixSizes] =
84
{
85
  { fastInverseDCT2_B2, fastInverseDCT2_B4, fastInverseDCT2_B8, fastInverseDCT2_B16, fastInverseDCT2_B32, fastInverseDCT2_B64 },
86
  { nullptr,            fastInverseDCT8_B4, fastInverseDCT8_B8, fastInverseDCT8_B16, fastInverseDCT8_B32, nullptr },
87
  { nullptr,            fastInverseDST7_B4, fastInverseDST7_B8, fastInverseDST7_B16, fastInverseDST7_B32, nullptr },
88
};
89
90
//! \ingroup CommonLib
91
//! \{
92
93
385M
static inline int64_t square( const int d ) { return d * (int64_t)d; }
94
95
template<int signedMode> std::pair<int64_t,int64_t> fwdTransformCbCr( const PelBuf& resCb, const PelBuf& resCr, PelBuf& resC1, PelBuf& resC2 )
96
1.13M
{
97
1.13M
  const Pel*  cb  = resCb.buf;
98
1.13M
  const Pel*  cr  = resCr.buf;
99
1.13M
  Pel*        c1  = resC1.buf;
100
1.13M
  Pel*        c2  = resC2.buf;
101
1.13M
  int64_t     d1  = 0;
102
1.13M
  int64_t     d2  = 0;
103
15.3M
  for( SizeType y = 0; y < resCb.height; y++, cb += resCb.stride, cr += resCr.stride, c1 += resC1.stride, c2 += resC2.stride )
104
14.2M
  {
105
206M
    for( SizeType x = 0; x < resCb.width; x++ )
106
192M
    {
107
192M
      int cbx = cb[x], crx = cr[x];
108
192M
      if      ( signedMode ==  1 )
109
48.1M
      {
110
48.1M
        c1[x] = Pel( ( 4*cbx + 2*crx ) / 5 );
111
48.1M
        d1   += square( cbx - c1[x] ) + square( crx - (c1[x]>>1) );
112
48.1M
      }
113
144M
      else if ( signedMode == -1 )
114
0
      {
115
0
        c1[x] = Pel( ( 4*cbx - 2*crx ) / 5 );
116
0
        d1   += square( cbx - c1[x] ) + square( crx - (-c1[x]>>1) );
117
0
      }
118
144M
      else if ( signedMode ==  2 )
119
48.1M
      {
120
48.1M
        c1[x] = Pel( ( cbx + crx ) / 2 );
121
48.1M
        d1   += square( cbx - c1[x] ) + square( crx - c1[x] );
122
48.1M
      }
123
96.2M
      else if ( signedMode == -2 )
124
0
      {
125
0
        c1[x] = Pel( ( cbx - crx ) / 2 );
126
0
        d1   += square( cbx - c1[x] ) + square( crx + c1[x] );
127
0
      }
128
96.2M
      else if ( signedMode ==  3 )
129
48.1M
      {
130
48.1M
        c2[x] = Pel( ( 4*crx + 2*cbx ) / 5 );
131
48.1M
        d1   += square( cbx - (c2[x]>>1) ) + square( crx - c2[x] );
132
48.1M
      }
133
48.1M
      else if ( signedMode == -3 )
134
0
      {
135
0
        c2[x] = Pel( ( 4*crx - 2*cbx ) / 5 );
136
0
        d1   += square( cbx - (-c2[x]>>1) ) + square( crx - c2[x] );
137
0
      }
138
48.1M
      else
139
48.1M
      {
140
48.1M
        d1   += square( cbx );
141
48.1M
        d2   += square( crx );
142
48.1M
      }
143
192M
    }
144
14.2M
  }
145
1.13M
  return std::make_pair(d1,d2);
146
1.13M
}
std::__1::pair<long, long> vvenc::fwdTransformCbCr<0>(vvenc::AreaBuf<short> const&, vvenc::AreaBuf<short> const&, vvenc::AreaBuf<short>&, vvenc::AreaBuf<short>&)
Line
Count
Source
96
283k
{
97
283k
  const Pel*  cb  = resCb.buf;
98
283k
  const Pel*  cr  = resCr.buf;
99
283k
  Pel*        c1  = resC1.buf;
100
283k
  Pel*        c2  = resC2.buf;
101
283k
  int64_t     d1  = 0;
102
283k
  int64_t     d2  = 0;
103
3.84M
  for( SizeType y = 0; y < resCb.height; y++, cb += resCb.stride, cr += resCr.stride, c1 += resC1.stride, c2 += resC2.stride )
104
3.56M
  {
105
51.6M
    for( SizeType x = 0; x < resCb.width; x++ )
106
48.1M
    {
107
48.1M
      int cbx = cb[x], crx = cr[x];
108
48.1M
      if      ( signedMode ==  1 )
109
0
      {
110
0
        c1[x] = Pel( ( 4*cbx + 2*crx ) / 5 );
111
0
        d1   += square( cbx - c1[x] ) + square( crx - (c1[x]>>1) );
112
0
      }
113
48.1M
      else if ( signedMode == -1 )
114
0
      {
115
0
        c1[x] = Pel( ( 4*cbx - 2*crx ) / 5 );
116
0
        d1   += square( cbx - c1[x] ) + square( crx - (-c1[x]>>1) );
117
0
      }
118
48.1M
      else if ( signedMode ==  2 )
119
0
      {
120
0
        c1[x] = Pel( ( cbx + crx ) / 2 );
121
0
        d1   += square( cbx - c1[x] ) + square( crx - c1[x] );
122
0
      }
123
48.1M
      else if ( signedMode == -2 )
124
0
      {
125
0
        c1[x] = Pel( ( cbx - crx ) / 2 );
126
0
        d1   += square( cbx - c1[x] ) + square( crx + c1[x] );
127
0
      }
128
48.1M
      else if ( signedMode ==  3 )
129
0
      {
130
0
        c2[x] = Pel( ( 4*crx + 2*cbx ) / 5 );
131
0
        d1   += square( cbx - (c2[x]>>1) ) + square( crx - c2[x] );
132
0
      }
133
48.1M
      else if ( signedMode == -3 )
134
0
      {
135
0
        c2[x] = Pel( ( 4*crx - 2*cbx ) / 5 );
136
0
        d1   += square( cbx - (-c2[x]>>1) ) + square( crx - c2[x] );
137
0
      }
138
48.1M
      else
139
48.1M
      {
140
48.1M
        d1   += square( cbx );
141
48.1M
        d2   += square( crx );
142
48.1M
      }
143
48.1M
    }
144
3.56M
  }
145
283k
  return std::make_pair(d1,d2);
146
283k
}
std::__1::pair<long, long> vvenc::fwdTransformCbCr<1>(vvenc::AreaBuf<short> const&, vvenc::AreaBuf<short> const&, vvenc::AreaBuf<short>&, vvenc::AreaBuf<short>&)
Line
Count
Source
96
283k
{
97
283k
  const Pel*  cb  = resCb.buf;
98
283k
  const Pel*  cr  = resCr.buf;
99
283k
  Pel*        c1  = resC1.buf;
100
283k
  Pel*        c2  = resC2.buf;
101
283k
  int64_t     d1  = 0;
102
283k
  int64_t     d2  = 0;
103
3.84M
  for( SizeType y = 0; y < resCb.height; y++, cb += resCb.stride, cr += resCr.stride, c1 += resC1.stride, c2 += resC2.stride )
104
3.56M
  {
105
51.6M
    for( SizeType x = 0; x < resCb.width; x++ )
106
48.1M
    {
107
48.1M
      int cbx = cb[x], crx = cr[x];
108
48.1M
      if      ( signedMode ==  1 )
109
48.1M
      {
110
48.1M
        c1[x] = Pel( ( 4*cbx + 2*crx ) / 5 );
111
48.1M
        d1   += square( cbx - c1[x] ) + square( crx - (c1[x]>>1) );
112
48.1M
      }
113
0
      else if ( signedMode == -1 )
114
0
      {
115
0
        c1[x] = Pel( ( 4*cbx - 2*crx ) / 5 );
116
0
        d1   += square( cbx - c1[x] ) + square( crx - (-c1[x]>>1) );
117
0
      }
118
0
      else if ( signedMode ==  2 )
119
0
      {
120
0
        c1[x] = Pel( ( cbx + crx ) / 2 );
121
0
        d1   += square( cbx - c1[x] ) + square( crx - c1[x] );
122
0
      }
123
0
      else if ( signedMode == -2 )
124
0
      {
125
0
        c1[x] = Pel( ( cbx - crx ) / 2 );
126
0
        d1   += square( cbx - c1[x] ) + square( crx + c1[x] );
127
0
      }
128
0
      else if ( signedMode ==  3 )
129
0
      {
130
0
        c2[x] = Pel( ( 4*crx + 2*cbx ) / 5 );
131
0
        d1   += square( cbx - (c2[x]>>1) ) + square( crx - c2[x] );
132
0
      }
133
0
      else if ( signedMode == -3 )
134
0
      {
135
0
        c2[x] = Pel( ( 4*crx - 2*cbx ) / 5 );
136
0
        d1   += square( cbx - (-c2[x]>>1) ) + square( crx - c2[x] );
137
0
      }
138
0
      else
139
0
      {
140
0
        d1   += square( cbx );
141
0
        d2   += square( crx );
142
0
      }
143
48.1M
    }
144
3.56M
  }
145
283k
  return std::make_pair(d1,d2);
146
283k
}
Unexecuted instantiation: std::__1::pair<long, long> vvenc::fwdTransformCbCr<-1>(vvenc::AreaBuf<short> const&, vvenc::AreaBuf<short> const&, vvenc::AreaBuf<short>&, vvenc::AreaBuf<short>&)
std::__1::pair<long, long> vvenc::fwdTransformCbCr<2>(vvenc::AreaBuf<short> const&, vvenc::AreaBuf<short> const&, vvenc::AreaBuf<short>&, vvenc::AreaBuf<short>&)
Line
Count
Source
96
283k
{
97
283k
  const Pel*  cb  = resCb.buf;
98
283k
  const Pel*  cr  = resCr.buf;
99
283k
  Pel*        c1  = resC1.buf;
100
283k
  Pel*        c2  = resC2.buf;
101
283k
  int64_t     d1  = 0;
102
283k
  int64_t     d2  = 0;
103
3.84M
  for( SizeType y = 0; y < resCb.height; y++, cb += resCb.stride, cr += resCr.stride, c1 += resC1.stride, c2 += resC2.stride )
104
3.56M
  {
105
51.6M
    for( SizeType x = 0; x < resCb.width; x++ )
106
48.1M
    {
107
48.1M
      int cbx = cb[x], crx = cr[x];
108
48.1M
      if      ( signedMode ==  1 )
109
0
      {
110
0
        c1[x] = Pel( ( 4*cbx + 2*crx ) / 5 );
111
0
        d1   += square( cbx - c1[x] ) + square( crx - (c1[x]>>1) );
112
0
      }
113
48.1M
      else if ( signedMode == -1 )
114
0
      {
115
0
        c1[x] = Pel( ( 4*cbx - 2*crx ) / 5 );
116
0
        d1   += square( cbx - c1[x] ) + square( crx - (-c1[x]>>1) );
117
0
      }
118
48.1M
      else if ( signedMode ==  2 )
119
48.1M
      {
120
48.1M
        c1[x] = Pel( ( cbx + crx ) / 2 );
121
48.1M
        d1   += square( cbx - c1[x] ) + square( crx - c1[x] );
122
48.1M
      }
123
0
      else if ( signedMode == -2 )
124
0
      {
125
0
        c1[x] = Pel( ( cbx - crx ) / 2 );
126
0
        d1   += square( cbx - c1[x] ) + square( crx + c1[x] );
127
0
      }
128
0
      else if ( signedMode ==  3 )
129
0
      {
130
0
        c2[x] = Pel( ( 4*crx + 2*cbx ) / 5 );
131
0
        d1   += square( cbx - (c2[x]>>1) ) + square( crx - c2[x] );
132
0
      }
133
0
      else if ( signedMode == -3 )
134
0
      {
135
0
        c2[x] = Pel( ( 4*crx - 2*cbx ) / 5 );
136
0
        d1   += square( cbx - (-c2[x]>>1) ) + square( crx - c2[x] );
137
0
      }
138
0
      else
139
0
      {
140
0
        d1   += square( cbx );
141
0
        d2   += square( crx );
142
0
      }
143
48.1M
    }
144
3.56M
  }
145
283k
  return std::make_pair(d1,d2);
146
283k
}
Unexecuted instantiation: std::__1::pair<long, long> vvenc::fwdTransformCbCr<-2>(vvenc::AreaBuf<short> const&, vvenc::AreaBuf<short> const&, vvenc::AreaBuf<short>&, vvenc::AreaBuf<short>&)
std::__1::pair<long, long> vvenc::fwdTransformCbCr<3>(vvenc::AreaBuf<short> const&, vvenc::AreaBuf<short> const&, vvenc::AreaBuf<short>&, vvenc::AreaBuf<short>&)
Line
Count
Source
96
283k
{
97
283k
  const Pel*  cb  = resCb.buf;
98
283k
  const Pel*  cr  = resCr.buf;
99
283k
  Pel*        c1  = resC1.buf;
100
283k
  Pel*        c2  = resC2.buf;
101
283k
  int64_t     d1  = 0;
102
283k
  int64_t     d2  = 0;
103
3.84M
  for( SizeType y = 0; y < resCb.height; y++, cb += resCb.stride, cr += resCr.stride, c1 += resC1.stride, c2 += resC2.stride )
104
3.56M
  {
105
51.6M
    for( SizeType x = 0; x < resCb.width; x++ )
106
48.1M
    {
107
48.1M
      int cbx = cb[x], crx = cr[x];
108
48.1M
      if      ( signedMode ==  1 )
109
0
      {
110
0
        c1[x] = Pel( ( 4*cbx + 2*crx ) / 5 );
111
0
        d1   += square( cbx - c1[x] ) + square( crx - (c1[x]>>1) );
112
0
      }
113
48.1M
      else if ( signedMode == -1 )
114
0
      {
115
0
        c1[x] = Pel( ( 4*cbx - 2*crx ) / 5 );
116
0
        d1   += square( cbx - c1[x] ) + square( crx - (-c1[x]>>1) );
117
0
      }
118
48.1M
      else if ( signedMode ==  2 )
119
0
      {
120
0
        c1[x] = Pel( ( cbx + crx ) / 2 );
121
0
        d1   += square( cbx - c1[x] ) + square( crx - c1[x] );
122
0
      }
123
48.1M
      else if ( signedMode == -2 )
124
0
      {
125
0
        c1[x] = Pel( ( cbx - crx ) / 2 );
126
0
        d1   += square( cbx - c1[x] ) + square( crx + c1[x] );
127
0
      }
128
48.1M
      else if ( signedMode ==  3 )
129
48.1M
      {
130
48.1M
        c2[x] = Pel( ( 4*crx + 2*cbx ) / 5 );
131
48.1M
        d1   += square( cbx - (c2[x]>>1) ) + square( crx - c2[x] );
132
48.1M
      }
133
0
      else if ( signedMode == -3 )
134
0
      {
135
0
        c2[x] = Pel( ( 4*crx - 2*cbx ) / 5 );
136
0
        d1   += square( cbx - (-c2[x]>>1) ) + square( crx - c2[x] );
137
0
      }
138
0
      else
139
0
      {
140
0
        d1   += square( cbx );
141
0
        d2   += square( crx );
142
0
      }
143
48.1M
    }
144
3.56M
  }
145
283k
  return std::make_pair(d1,d2);
146
283k
}
Unexecuted instantiation: std::__1::pair<long, long> vvenc::fwdTransformCbCr<-3>(vvenc::AreaBuf<short> const&, vvenc::AreaBuf<short> const&, vvenc::AreaBuf<short>&, vvenc::AreaBuf<short>&)
147
148
template<int signedMode> void invTransformCbCr( PelBuf& resCb, PelBuf& resCr )
149
279k
{
150
279k
  Pel*  cb  = resCb.buf;
151
279k
  Pel*  cr  = resCr.buf;
152
3.79M
  for( SizeType y = 0; y < resCb.height; y++, cb += resCb.stride, cr += resCr.stride )
153
3.51M
  {
154
51.1M
    for( SizeType x = 0; x < resCb.width; x++ )
155
47.6M
    {
156
47.6M
      if      ( signedMode ==  1 )  { cr[x] =  cb[x] >> 1;  }
157
47.6M
      else if ( signedMode == -1 )  { cr[x] = -cb[x] >> 1;  }
158
47.6M
      else if ( signedMode ==  2 )  { cr[x] =  cb[x]; }
159
239k
      else if ( signedMode == -2 )  { cr[x] = -cb[x]; }
160
239k
      else if ( signedMode ==  3 )  { cb[x] =  cr[x] >> 1; }
161
0
      else if ( signedMode == -3 )  { cb[x] = -cr[x] >> 1; }
162
47.6M
    }
163
3.51M
  }
164
279k
}
Unexecuted instantiation: void vvenc::invTransformCbCr<0>(vvenc::AreaBuf<short>&, vvenc::AreaBuf<short>&)
Unexecuted instantiation: void vvenc::invTransformCbCr<1>(vvenc::AreaBuf<short>&, vvenc::AreaBuf<short>&)
Unexecuted instantiation: void vvenc::invTransformCbCr<-1>(vvenc::AreaBuf<short>&, vvenc::AreaBuf<short>&)
void vvenc::invTransformCbCr<2>(vvenc::AreaBuf<short>&, vvenc::AreaBuf<short>&)
Line
Count
Source
149
278k
{
150
278k
  Pel*  cb  = resCb.buf;
151
278k
  Pel*  cr  = resCr.buf;
152
3.77M
  for( SizeType y = 0; y < resCb.height; y++, cb += resCb.stride, cr += resCr.stride )
153
3.49M
  {
154
50.9M
    for( SizeType x = 0; x < resCb.width; x++ )
155
47.4M
    {
156
47.4M
      if      ( signedMode ==  1 )  { cr[x] =  cb[x] >> 1;  }
157
47.4M
      else if ( signedMode == -1 )  { cr[x] = -cb[x] >> 1;  }
158
47.4M
      else if ( signedMode ==  2 )  { cr[x] =  cb[x]; }
159
0
      else if ( signedMode == -2 )  { cr[x] = -cb[x]; }
160
0
      else if ( signedMode ==  3 )  { cb[x] =  cr[x] >> 1; }
161
0
      else if ( signedMode == -3 )  { cb[x] = -cr[x] >> 1; }
162
47.4M
    }
163
3.49M
  }
164
278k
}
Unexecuted instantiation: void vvenc::invTransformCbCr<-2>(vvenc::AreaBuf<short>&, vvenc::AreaBuf<short>&)
void vvenc::invTransformCbCr<3>(vvenc::AreaBuf<short>&, vvenc::AreaBuf<short>&)
Line
Count
Source
149
1.79k
{
150
1.79k
  Pel*  cb  = resCb.buf;
151
1.79k
  Pel*  cr  = resCr.buf;
152
22.6k
  for( SizeType y = 0; y < resCb.height; y++, cb += resCb.stride, cr += resCr.stride )
153
20.8k
  {
154
260k
    for( SizeType x = 0; x < resCb.width; x++ )
155
239k
    {
156
239k
      if      ( signedMode ==  1 )  { cr[x] =  cb[x] >> 1;  }
157
239k
      else if ( signedMode == -1 )  { cr[x] = -cb[x] >> 1;  }
158
239k
      else if ( signedMode ==  2 )  { cr[x] =  cb[x]; }
159
239k
      else if ( signedMode == -2 )  { cr[x] = -cb[x]; }
160
239k
      else if ( signedMode ==  3 )  { cb[x] =  cr[x] >> 1; }
161
0
      else if ( signedMode == -3 )  { cb[x] = -cr[x] >> 1; }
162
239k
    }
163
20.8k
  }
164
1.79k
}
Unexecuted instantiation: void vvenc::invTransformCbCr<-3>(vvenc::AreaBuf<short>&, vvenc::AreaBuf<short>&)
165
166
void xFwdLfnstNxNCore(int *src, int *dst, const uint32_t mode, const uint32_t index, const uint32_t size, int zeroOutSize)
167
1.25M
{
168
1.25M
  const int8_t *trMat  = (size > 4) ? g_lfnstFwd8x8[mode][index][0] : g_lfnstFwd4x4[mode][index][0];
169
1.25M
  const int     trSize = (size > 4) ? 48 : 16;
170
1.25M
  int           coef;
171
1.25M
  int *         out = dst;
172
173
19.5M
  for (int j = 0; j < zeroOutSize; j++)
174
18.2M
  {
175
18.2M
    int *         srcPtr   = src;
176
18.2M
    const int8_t *trMatTmp = trMat;
177
18.2M
    coef                   = 0;
178
695M
    for (int i = 0; i < trSize; i++)
179
677M
    {
180
677M
      coef += *srcPtr++ * *trMatTmp++;
181
677M
    }
182
18.2M
    *out++ = (coef + 64) >> 7;
183
18.2M
    trMat += trSize;
184
18.2M
  }
185
186
1.25M
  ::memset(out, 0, (trSize - zeroOutSize) * sizeof(int));
187
1.25M
}
188
189
190
void xInvLfnstNxNCore(int *src, int *dst, const uint32_t mode, const uint32_t index, const uint32_t size, int zeroOutSize)
191
534k
{
192
534k
  int           maxLog2TrDynamicRange = 15;
193
534k
  const TCoeff  outputMinimum         = -(1 << maxLog2TrDynamicRange);
194
534k
  const TCoeff  outputMaximum         = (1 << maxLog2TrDynamicRange) - 1;
195
534k
  const int8_t *trMat                 = (size > 4) ? g_lfnstInv8x8[mode][index][0] : g_lfnstInv4x4[mode][index][0];
196
534k
  const int     trSize                = (size > 4) ? 48 : 16;
197
534k
  int           resi;
198
534k
  int *         out                   = dst;
199
200
20.0M
  for( int j = 0; j < trSize; j++, trMat += 16 )
201
19.5M
  {
202
19.5M
    resi = 0;
203
19.5M
    const int8_t* trMatTmp = trMat;
204
19.5M
    int*          srcPtr   = src;
205
206
297M
    for( int i = 0; i < zeroOutSize; i++ )
207
277M
    {
208
277M
      resi += *srcPtr++ * *trMatTmp++;
209
277M
    }
210
211
19.5M
    *out++ = Clip3( outputMinimum, outputMaximum, ( int ) ( resi + 64 ) >> 7 );
212
19.5M
  }
213
534k
}
214
215
// ====================================================================================================================
216
// TrQuant class member functions
217
// ====================================================================================================================
218
20.7k
TrQuant::TrQuant() : m_scalingListEnabled(false), m_quant( nullptr )
219
20.7k
{
220
  // allocate temporary buffers
221
20.7k
  m_plTempCoeff = ( TCoeff* ) xMalloc( TCoeff, MAX_TB_SIZEY * MAX_TB_SIZEY );
222
20.7k
  m_tmp         = ( TCoeff* ) xMalloc( TCoeff, MAX_TB_SIZEY * MAX_TB_SIZEY );
223
20.7k
  m_blk         = ( TCoeff* ) xMalloc( TCoeff, MAX_TB_SIZEY * MAX_TB_SIZEY );
224
225
145k
  for( int i = 0; i < NUM_TRAFO_MODES_MTS; i++ )
226
124k
  {
227
124k
    m_mtsCoeffs[i] = ( TCoeff* ) xMalloc( TCoeff, MAX_TB_SIZEY * MAX_TB_SIZEY );
228
124k
  }
229
230
20.7k
  {
231
20.7k
    m_invICT      = m_invICTMem + maxAbsIctMode;
232
20.7k
    m_invICT[ 0]  = invTransformCbCr< 0>;
233
20.7k
    m_invICT[ 1]  = invTransformCbCr< 1>;
234
20.7k
    m_invICT[-1]  = invTransformCbCr<-1>;
235
20.7k
    m_invICT[ 2]  = invTransformCbCr< 2>;
236
20.7k
    m_invICT[-2]  = invTransformCbCr<-2>;
237
20.7k
    m_invICT[ 3]  = invTransformCbCr< 3>;
238
20.7k
    m_invICT[-3]  = invTransformCbCr<-3>;
239
20.7k
    m_fwdICT      = m_fwdICTMem + maxAbsIctMode;
240
20.7k
    m_fwdICT[ 0]  = fwdTransformCbCr< 0>;
241
20.7k
    m_fwdICT[ 1]  = fwdTransformCbCr< 1>;
242
20.7k
    m_fwdICT[-1]  = fwdTransformCbCr<-1>;
243
20.7k
    m_fwdICT[ 2]  = fwdTransformCbCr< 2>;
244
20.7k
    m_fwdICT[-2]  = fwdTransformCbCr<-2>;
245
20.7k
    m_fwdICT[ 3]  = fwdTransformCbCr< 3>;
246
20.7k
    m_fwdICT[-3]  = fwdTransformCbCr<-3>;
247
20.7k
  }
248
249
20.7k
  m_invLfnstNxN = xInvLfnstNxNCore;
250
20.7k
  m_fwdLfnstNxN = xFwdLfnstNxNCore;
251
252
#if defined( TARGET_SIMD_X86 ) && ENABLE_SIMD_TRAFO
253
  initTrQuantX86();
254
#endif
255
20.7k
}
256
257
TrQuant::~TrQuant()
258
20.7k
{
259
20.7k
  if( m_quant )
260
20.7k
  {
261
20.7k
    delete m_quant;
262
20.7k
    m_quant = nullptr;
263
20.7k
  }
264
265
  // delete temporary buffers
266
20.7k
  if( m_plTempCoeff )
267
20.7k
  {
268
20.7k
    xFree( m_plTempCoeff );
269
20.7k
    m_plTempCoeff = nullptr;
270
20.7k
  }
271
272
20.7k
  if( m_blk )
273
20.7k
  {
274
20.7k
    xFree( m_blk );
275
20.7k
    m_blk = nullptr;
276
20.7k
  }
277
278
20.7k
  if( m_tmp )
279
20.7k
  {
280
20.7k
    xFree( m_tmp );
281
20.7k
    m_tmp = nullptr;
282
20.7k
  }
283
284
145k
  for( int i = 0; i < NUM_TRAFO_MODES_MTS; i++ )
285
124k
  {
286
124k
     xFree( m_mtsCoeffs[i] );
287
124k
  }
288
20.7k
}
289
290
void TrQuant::xDeQuant(const TransformUnit& tu,
291
                             CoeffBuf      &dstCoeff,
292
                       const ComponentID   &compID,
293
                       const QpParam       &cQP)
294
875k
{
295
875k
  PROFILER_SCOPE_AND_STAGE( 1, _TPROF, P_DEQUANT );
296
875k
  m_quant->dequant( tu, dstCoeff, compID, cQP );
297
875k
}
298
299
void TrQuant::init( const Quant* otherQuant,
300
                    const int  rdoq,
301
                    const bool bUseRDOQTS,
302
                    const bool scalingListsEnabled,
303
                    const bool bEnc,
304
                    const int  thrVal
305
)
306
20.7k
{
307
20.7k
  m_bEnc = bEnc;
308
309
20.7k
  delete m_quant;
310
20.7k
  m_quant = nullptr;
311
312
20.7k
  m_quant = new(std::nothrow) DepQuant( otherQuant, bEnc, scalingListsEnabled );
313
20.7k
  CHECK( !m_quant, "allocation failed" );
314
20.7k
  m_quant->init( rdoq, bUseRDOQTS, thrVal );
315
20.7k
}
316
317
318
void TrQuant::invTransformNxN( TransformUnit& tu, const ComponentID compID, PelBuf& pResi, const QpParam& cQP )
319
875k
{
320
875k
  const CompArea& area    = tu.blocks[compID];
321
875k
  const uint32_t uiWidth  = area.width;
322
875k
  const uint32_t uiHeight = area.height;
323
324
875k
  CHECK( uiWidth > tu.cs->sps->getMaxTbSize() || uiHeight > tu.cs->sps->getMaxTbSize(), "Maximal allowed transformation size exceeded!" );
325
326
875k
  {
327
875k
    CoeffBuf tempCoeff = CoeffBuf( m_plTempCoeff, area );
328
875k
    xDeQuant( tu, tempCoeff, compID, cQP );
329
330
875k
    DTRACE_COEFF_BUF( D_TCOEFF, tempCoeff, tu, tu.cu->predMode, compID );
331
332
875k
    if (tu.cs->sps->LFNST)
333
875k
    {
334
875k
      xInvLfnst(tu, compID);
335
875k
    }
336
875k
    if (tu.mtsIdx[compID] == MTS_SKIP)
337
51.0k
    {
338
51.0k
      xITransformSkip(tempCoeff, pResi, tu, compID);
339
51.0k
    }
340
824k
    else
341
824k
    {
342
824k
      xIT(tu, compID, tempCoeff, pResi);
343
824k
    }
344
875k
  }
345
346
  //DTRACE_BLOCK_COEFF(tu.getCoeffs(compID), tu, tu.cu->predMode, compID);
347
875k
  DTRACE_PEL_BUF( D_RESIDUALS, pResi, tu, tu.cu->predMode, compID);
348
875k
}
349
350
std::pair<int64_t,int64_t> TrQuant::fwdTransformICT( const TransformUnit& tu, const PelBuf& resCb, const PelBuf& resCr, PelBuf& resC1, PelBuf& resC2, int jointCbCr )
351
1.13M
{
352
1.13M
  CHECK( Size(resCb) != Size(resCr), "resCb and resCr have different sizes" );
353
1.13M
  CHECK( Size(resCb) != Size(resC1), "resCb and resC1 have different sizes" );
354
1.13M
  CHECK( Size(resCb) != Size(resC2), "resCb and resC2 have different sizes" );
355
1.13M
  return (*m_fwdICT[ TU::getICTMode(tu, jointCbCr) ])( resCb, resCr, resC1, resC2 );
356
1.13M
}
357
358
void TrQuant::invTransformICT( const TransformUnit& tu, PelBuf& resCb, PelBuf& resCr )
359
279k
{
360
279k
  CHECK( Size(resCb) != Size(resCr), "resCb and resCr have different sizes" );
361
279k
  (*m_invICT[ TU::getICTMode(tu) ])( resCb, resCr );
362
279k
}
363
364
std::vector<int> TrQuant::selectICTCandidates( const TransformUnit& tu, CompStorage* resCb, CompStorage* resCr )
365
283k
{
366
283k
  CHECK( !resCb[0].valid() || !resCr[0].valid(), "standard components are not valid" );
367
368
283k
  if( !CU::isIntra( *tu.cu ) )
369
0
  {
370
0
    int cbfMask = 3;
371
0
    fwdTransformICT( tu, resCb[0], resCr[0], resCb[cbfMask], resCr[cbfMask], cbfMask );
372
0
    std::vector<int> cbfMasksToTest;
373
0
    cbfMasksToTest.push_back( cbfMask );
374
0
    return cbfMasksToTest;
375
0
  }
376
377
283k
  std::pair<int64_t,int64_t> pairDist[4];
378
1.41M
  for( int cbfMask = 0; cbfMask < 4; cbfMask++ )
379
1.13M
  {
380
1.13M
    pairDist[cbfMask] = fwdTransformICT( tu, resCb[0], resCr[0], resCb[cbfMask], resCr[cbfMask], cbfMask );
381
1.13M
  }
382
383
283k
  std::vector<int> cbfMasksToTest;
384
283k
  int64_t minDist1  = std::min<int64_t>( pairDist[0].first, pairDist[0].second );
385
283k
  int64_t minDist2  = std::numeric_limits<int64_t>::max();
386
283k
  int     cbfMask1  = 0;
387
283k
  int     cbfMask2  = 0;
388
283k
  for( int cbfMask : { 1, 2, 3 } )
389
850k
  {
390
850k
    if( pairDist[cbfMask].first < minDist1 )
391
561k
    {
392
561k
      cbfMask2  = cbfMask1; minDist2  = minDist1;
393
561k
      cbfMask1  = cbfMask;  minDist1  = pairDist[cbfMask1].first;
394
561k
    }
395
288k
    else if( pairDist[cbfMask].first < minDist2 )
396
283k
    {
397
283k
      cbfMask2  = cbfMask;  minDist2  = pairDist[cbfMask2].first;
398
283k
    }
399
850k
  }
400
283k
  if( cbfMask1 )
401
283k
  {
402
283k
    cbfMasksToTest.push_back( cbfMask1 );
403
283k
  }
404
283k
  if( cbfMask2 && ( ( minDist2 < (9*minDist1)/8 ) || ( !cbfMask1 && minDist2 < (3*minDist1)/2 ) ) )
405
0
  {
406
0
    cbfMasksToTest.push_back( cbfMask2 );
407
0
  }
408
409
283k
  return cbfMasksToTest;
410
283k
}
411
412
413
414
// ------------------------------------------------------------------------------------------------
415
// Logical transform
416
// ------------------------------------------------------------------------------------------------
417
void TrQuant::xSetTrTypes( const TransformUnit& tu, const ComponentID compID, const int width, const int height, int &trTypeHor, int &trTypeVer )
418
2.83M
{
419
2.83M
  const bool isISP = CU::isIntra(*tu.cu) && tu.cu->ispMode && isLuma(compID);
420
2.83M
  if (isISP && tu.cu->lfnstIdx)
421
19.7k
  {
422
19.7k
    return;
423
19.7k
  }
424
2.81M
  if (!tu.cs->sps->MTS)
425
0
  {
426
0
    return;
427
0
  }
428
2.81M
  if (CU::isIntra(*tu.cu) && isLuma(compID) && ((tu.cs->sps->getUseImplicitMTS() && tu.cu->lfnstIdx == 0 && tu.cu->mipFlag == 0) || tu.cu->ispMode))
429
88.8k
  {
430
88.8k
    if (width >= 4 && width <= 16)
431
39.6k
      trTypeHor = DST7;
432
88.8k
    if (height >= 4 && height <= 16)
433
38.2k
      trTypeVer = DST7;
434
88.8k
  }
435
2.73M
  else if( tu.cs->sps->MTS && tu.cu->sbtInfo && isLuma(compID)/*isSBT*/ )
436
0
  {
437
0
    const uint8_t sbtIdx = CU::getSbtIdx( tu.cu->sbtInfo );
438
0
    const uint8_t sbtPos = CU::getSbtPos( tu.cu->sbtInfo );
439
440
0
    if( sbtIdx == SBT_VER_HALF || sbtIdx == SBT_VER_QUAD )
441
0
    {
442
0
      assert( tu.lwidth() <= MTS_INTER_MAX_CU_SIZE );
443
0
      if( tu.lheight() > MTS_INTER_MAX_CU_SIZE )
444
0
      {
445
0
        trTypeHor = trTypeVer = DCT2;
446
0
      }
447
0
      else
448
0
      {
449
0
        if( sbtPos == SBT_POS0 )  { trTypeHor = DCT8;  trTypeVer = DST7; }
450
0
        else                      { trTypeHor = DST7;  trTypeVer = DST7; }
451
0
      }
452
0
    }
453
0
    else
454
0
    {
455
0
      assert( tu.lheight() <= MTS_INTER_MAX_CU_SIZE );
456
0
      if( tu.lwidth() > MTS_INTER_MAX_CU_SIZE )
457
0
      {
458
0
        trTypeHor = trTypeVer = DCT2;
459
0
      }
460
0
      else
461
0
      {
462
0
        if( sbtPos == SBT_POS0 )  { trTypeHor = DST7;  trTypeVer = DCT8; }
463
0
        else                      { trTypeHor = DST7;  trTypeVer = DST7; }
464
0
      }
465
0
    }
466
0
  }
467
2.81M
  const bool isExplicitMTS = (CU::isIntra(*tu.cu) ? tu.cs->sps->MTS : tu.cs->sps->MTSInter && CU::isInter(*tu.cu)) && isLuma(compID);
468
2.81M
  if (isExplicitMTS)
469
210k
  {
470
210k
    if (tu.mtsIdx[compID] > MTS_SKIP)
471
0
    {
472
0
      int indHor = (tu.mtsIdx[compID] - MTS_DST7_DST7) & 1;
473
0
      int indVer = (tu.mtsIdx[compID] - MTS_DST7_DST7) >> 1;
474
0
      trTypeHor  = indHor ? DCT8 : DST7;
475
0
      trTypeVer  = indVer ? DCT8 : DST7;
476
0
    }
477
210k
  }
478
2.81M
}
479
480
481
void TrQuant::xT( const TransformUnit& tu, const ComponentID compID, const CPelBuf& resi, CoeffBuf& dstCoeff, const int width, const int height )
482
2.01M
{
483
2.01M
  PROFILER_SCOPE_AND_STAGE( 1, _TPROF, P_TRAFO );
484
485
2.01M
  const unsigned maxLog2TrDynamicRange  = tu.cs->sps->getMaxLog2TrDynamicRange();
486
2.01M
  const unsigned bitDepth               = tu.cs->sps->bitDepths[toChannelType( compID )];
487
2.01M
  const int      TRANSFORM_MATRIX_SHIFT = g_transformMatrixShift[TRANSFORM_FORWARD];
488
2.01M
  const uint32_t transformWidthIndex    = Log2(width ) - 1;  // nLog2WidthMinus1, since transform start from 2-point
489
2.01M
  const uint32_t transformHeightIndex   = Log2(height) - 1;  // nLog2HeightMinus1, since transform start from 2-point
490
491
2.01M
  int trTypeHor = DCT2;
492
2.01M
  int trTypeVer = DCT2;
493
494
2.01M
  xSetTrTypes( tu, compID, width, height, trTypeHor, trTypeVer );
495
496
2.01M
  int  skipWidth  = ( trTypeHor != DCT2 && width  == 32 ) ? 16 : width  > JVET_C0024_ZERO_OUT_TH ? width  - JVET_C0024_ZERO_OUT_TH : 0;
497
2.01M
  int  skipHeight = ( trTypeVer != DCT2 && height == 32 ) ? 16 : height > JVET_C0024_ZERO_OUT_TH ? height - JVET_C0024_ZERO_OUT_TH : 0;
498
499
2.01M
  if( tu.cu->lfnstIdx )
500
1.25M
  {
501
1.25M
    if ((width == 4 && height > 4) || (width > 4 && height == 4))
502
350k
    {
503
350k
      skipWidth  = width - 4;
504
350k
      skipHeight = height - 4;
505
350k
    }
506
904k
    else if ((width >= 8 && height >= 8))
507
828k
    {
508
828k
      skipWidth  = width - 8;
509
828k
      skipHeight = height - 8;
510
828k
    }
511
1.25M
  }
512
513
2.01M
  TCoeff* block = m_blk;
514
2.01M
  TCoeff* tmp   = m_tmp;
515
516
2.01M
  const Pel* resiBuf    = resi.buf;
517
2.01M
  const int  resiStride = resi.stride;
518
519
2.01M
#if ENABLE_SIMD_TRAFO
520
2.01M
  if( width & 3 )
521
0
#endif
522
0
  {
523
0
    for( int y = 0; y < height; y++ )
524
0
    {
525
0
      for( int x = 0; x < width; x++ )
526
0
      {
527
0
        block[( y * width ) + x] = resiBuf[( y * resiStride ) + x];
528
0
      }
529
0
    }
530
0
  }
531
2.01M
#if ENABLE_SIMD_TRAFO
532
2.01M
  else if( width & 7 )
533
376k
  {
534
376k
    g_tCoeffOps.cpyCoeff4( resiBuf, resiStride, block, width, height );
535
376k
  }
536
1.63M
  else
537
1.63M
  {
538
1.63M
    g_tCoeffOps.cpyCoeff8( resiBuf, resiStride, block, width, height );
539
1.63M
  }
540
2.01M
#endif //ENABLE_SIMD_TRAFO
541
542
2.01M
  if (width > 1 && height > 1)
543
2.01M
  {
544
2.01M
    const int shift_1st = ((Log2(width )) + bitDepth + TRANSFORM_MATRIX_SHIFT) - maxLog2TrDynamicRange;
545
2.01M
    const int shift_2nd =  (Log2(height))            + TRANSFORM_MATRIX_SHIFT;
546
2.01M
    CHECK( shift_1st < 0, "Negative shift" );
547
2.01M
    CHECK( shift_2nd < 0, "Negative shift" );
548
2.01M
    fastFwdTrans[trTypeHor][transformWidthIndex](block, tmp, shift_1st, height, 0, skipWidth);
549
2.01M
    fastFwdTrans[trTypeVer][transformHeightIndex](tmp, dstCoeff.buf, shift_2nd, width, skipWidth, skipHeight);
550
2.01M
  }
551
281
  else if (height == 1)   // 1-D horizontal transform
552
288
  {
553
288
    const int shift = ((Log2(width )) + bitDepth + TRANSFORM_MATRIX_SHIFT) - maxLog2TrDynamicRange;
554
288
    CHECK( shift < 0, "Negative shift" );
555
288
    fastFwdTrans[trTypeHor][transformWidthIndex](block, dstCoeff.buf, shift, 1, 0, skipWidth);
556
288
  }
557
18.4E
  else   // if (iWidth == 1) //1-D vertical transform
558
18.4E
  {
559
18.4E
    int shift = ((floorLog2(height)) + bitDepth + TRANSFORM_MATRIX_SHIFT) - maxLog2TrDynamicRange;
560
18.4E
    CHECK(shift < 0, "Negative shift");
561
18.4E
    CHECKD((transformHeightIndex < 0), "There is a problem with the height.");
562
18.4E
    fastFwdTrans[trTypeVer][transformHeightIndex](block, dstCoeff.buf, shift, 1, 0, skipHeight);
563
18.4E
  }
564
2.01M
}
565
566
567
void TrQuant::xIT( const TransformUnit& tu, const ComponentID compID, const CCoeffBuf& pCoeff, PelBuf& pResidual )
568
824k
{
569
824k
  PROFILER_SCOPE_AND_STAGE( 1, _TPROF, P_TRAFO );
570
571
824k
  const int      width                  = pCoeff.width;
572
824k
  const int      height                 = pCoeff.height;
573
824k
  const unsigned maxLog2TrDynamicRange  = tu.cs->sps->getMaxLog2TrDynamicRange();
574
824k
  const unsigned bitDepth               = tu.cs->sps->bitDepths[toChannelType( compID )];
575
824k
  const int      TRANSFORM_MATRIX_SHIFT = g_transformMatrixShift[TRANSFORM_INVERSE];
576
824k
  const TCoeff   clipMinimum            = -( 1 << maxLog2TrDynamicRange );
577
824k
  const TCoeff   clipMaximum            =  ( 1 << maxLog2TrDynamicRange ) - 1;
578
824k
  const uint32_t transformWidthIndex    = Log2(width )- 1;                                // nLog2WidthMinus1, since transform start from 2-point
579
824k
  const uint32_t transformHeightIndex   = Log2(height) - 1;                                // nLog2HeightMinus1, since transform start from 2-point
580
581
582
824k
  int trTypeHor = DCT2;
583
824k
  int trTypeVer = DCT2;
584
585
824k
  xSetTrTypes( tu, compID, width, height, trTypeHor, trTypeVer );
586
587
824k
  int skipWidth  = ( trTypeHor != DCT2 && width  == 32 ) ? 16 : width  > JVET_C0024_ZERO_OUT_TH ? width  - JVET_C0024_ZERO_OUT_TH : 0;
588
824k
  int skipHeight = ( trTypeVer != DCT2 && height == 32 ) ? 16 : height > JVET_C0024_ZERO_OUT_TH ? height - JVET_C0024_ZERO_OUT_TH : 0;
589
590
824k
  if (tu.cs->sps->LFNST && tu.cu->lfnstIdx)
591
534k
  {
592
534k
    if ((width == 4 && height > 4) || (width > 4 && height == 4))
593
150k
    {
594
150k
      skipWidth = width - 4;
595
150k
      skipHeight = height - 4;
596
150k
    }
597
383k
    else if ((width >= 8 && height >= 8))
598
342k
    {
599
342k
      skipWidth = width - 8;
600
342k
      skipHeight = height - 8;
601
342k
    }
602
534k
  }
603
604
824k
  TCoeff *block = m_blk;
605
824k
  TCoeff *tmp   = m_tmp;
606
824k
  if (width > 1 && height > 1)   // 2-D transform
607
824k
  {
608
824k
    const int shift_1st =   TRANSFORM_MATRIX_SHIFT + 1; // 1 has been added to shift_1st at the expense of shift_2nd
609
824k
    const int shift_2nd = ( TRANSFORM_MATRIX_SHIFT + maxLog2TrDynamicRange - 1 ) - bitDepth;
610
824k
    CHECK( shift_1st < 0, "Negative shift" );
611
824k
    CHECK( shift_2nd < 0, "Negative shift" );
612
824k
    fastInvTrans[trTypeVer][transformHeightIndex](pCoeff.buf, tmp, shift_1st, width, skipWidth, skipHeight, clipMinimum, clipMaximum);
613
824k
    fastInvTrans[trTypeHor][transformWidthIndex](tmp, block, shift_2nd, height, 0, skipWidth, clipMinimum, clipMaximum);
614
824k
  }
615
72
  else if (width == 1)   // 1-D vertical transform
616
0
  {
617
0
    int shift = (TRANSFORM_MATRIX_SHIFT + maxLog2TrDynamicRange - 1) - bitDepth;
618
0
    CHECK(shift < 0, "Negative shift");
619
0
    fastInvTrans[trTypeVer][transformHeightIndex](pCoeff.buf, block, shift + 1, 1, 0, skipHeight, clipMinimum, clipMaximum);
620
0
  }
621
72
  else   // if(iHeight == 1) //1-D horizontal transform
622
72
  {
623
72
    const int shift = (TRANSFORM_MATRIX_SHIFT + maxLog2TrDynamicRange - 1) - bitDepth;
624
72
    CHECK(shift < 0, "Negative shift");
625
72
    fastInvTrans[trTypeHor][transformWidthIndex](pCoeff.buf, block, shift + 1, 1, 0, skipWidth, clipMinimum, clipMaximum);
626
72
  }
627
628
824k
#if ENABLE_SIMD_TRAFO
629
824k
  if( width & 3 )
630
0
#endif //ENABLE_SIMD_TRAFO
631
0
  {
632
0
    Pel       *dst    = pResidual.buf;
633
0
    ptrdiff_t  stride = pResidual.stride;
634
635
0
    for( int y = 0; y < height; y++ )
636
0
    {
637
0
      for( int x = 0; x < width; x++ )
638
0
      {
639
0
        dst[x] = ( Pel ) *block++;
640
0
      }
641
642
0
      dst += stride;
643
0
    }
644
0
  }
645
824k
#if ENABLE_SIMD_TRAFO
646
824k
  else if( width & 7 )
647
174k
  {
648
174k
    g_tCoeffOps.cpyResi4( block, pResidual.buf, pResidual.stride, width, height );
649
174k
  }
650
649k
  else
651
649k
  {
652
649k
    g_tCoeffOps.cpyResi8( block, pResidual.buf, pResidual.stride, width, height );
653
649k
  }
654
824k
#endif //ENABLE_SIMD_TRAFO
655
824k
}
656
657
/** Wrapper function between HM interface and core NxN transform skipping
658
 */
659
void TrQuant::xITransformSkip(const CCoeffBuf& pCoeff,
660
  PelBuf& pResidual,
661
  const TransformUnit& tu,
662
  const ComponentID compID)
663
51.0k
{
664
51.0k
  const CompArea& area = tu.blocks[compID];
665
51.0k
  const int width = area.width;
666
51.0k
  const int height = area.height;
667
668
515k
  for (uint32_t y = 0; y < height; y++)
669
464k
  {
670
5.16M
    for (uint32_t x = 0; x < width; x++)
671
4.70M
    {
672
4.70M
      pResidual.at(x, y) = Pel(pCoeff.at(x, y));
673
4.70M
    }
674
464k
  }
675
51.0k
}
676
677
void TrQuant::xQuant(TransformUnit& tu, const ComponentID compID, const CCoeffBuf& pSrc, TCoeff &uiAbsSum, const QpParam& cQP, const Ctx& ctx)
678
2.12M
{
679
2.12M
  PROFILER_SCOPE_AND_STAGE( 1, _TPROF, P_QUANT );
680
2.12M
  m_quant->quant( tu, compID, pSrc, uiAbsSum, cQP, ctx );
681
#if ENABLE_MEASURE_SEARCH_SPACE
682
683
  g_searchSpaceAcc.addQuant( tu, toChannelType( compID ) );
684
#endif
685
2.12M
}
686
687
688
void TrQuant::transformNxN(TransformUnit &tu, const ComponentID compID, const QpParam &cQP, TCoeff &uiAbsSum, const Ctx &ctx, const bool loadTr)
689
2.12M
{
690
2.12M
        CodingStructure &cs = *tu.cs;
691
2.12M
  const CompArea& rect      = tu.blocks[compID];
692
2.12M
  const uint32_t uiWidth        = rect.width;
693
2.12M
  const uint32_t uiHeight       = rect.height;
694
695
2.12M
  const CPelBuf resiBuf     = cs.getResiBuf(rect);
696
697
2.12M
  if( tu.noResidual )
698
0
  {
699
0
    uiAbsSum = 0;
700
0
    TU::setCbfAtDepth( tu, compID, tu.depth, uiAbsSum > 0 );
701
0
    return;
702
0
  }
703
2.12M
  if (tu.cu->bdpcmM[toChannelType(compID)])
704
105k
  {
705
105k
    tu.mtsIdx[compID] = MTS_SKIP;
706
105k
  }
707
708
2.12M
  uiAbsSum = 0;
709
2.12M
  CHECK( cs.sps->getMaxTbSize() < uiWidth, "Unsupported transformation size" );
710
711
2.12M
  CoeffBuf tempCoeff(loadTr ? m_mtsCoeffs[tu.mtsIdx[compID]] : m_plTempCoeff, rect);
712
2.12M
  if (!loadTr)
713
2.10M
  {
714
2.10M
    DTRACE_PEL_BUF( D_RESIDUALS, resiBuf, tu, tu.cu->predMode, compID );
715
2.10M
    if (tu.mtsIdx[compID] == MTS_SKIP)
716
105k
    {
717
105k
      xTransformSkip(tu, compID, resiBuf, tempCoeff.buf);
718
105k
    }
719
1.99M
    else
720
1.99M
    {
721
1.99M
      xT(tu, compID, resiBuf, tempCoeff, uiWidth, uiHeight);
722
1.99M
    }
723
2.10M
  }
724
2.12M
  if (cs.sps->LFNST)
725
2.12M
  {
726
2.12M
    xFwdLfnst(tu, compID, loadTr);
727
2.12M
  }
728
2.12M
  DTRACE_COEFF_BUF( D_TCOEFF, tempCoeff, tu, tu.cu->predMode, compID );
729
730
2.12M
  xQuant( tu, compID, tempCoeff, uiAbsSum, cQP, ctx );
731
732
2.12M
  DTRACE_COEFF_BUF( D_TCOEFF, tu.getCoeffs( compID ), tu, tu.cu->predMode, compID );
733
734
  // set coded block flag (CBF)
735
2.12M
  TU::setCbfAtDepth (tu, compID, tu.depth, uiAbsSum > 0);
736
2.12M
}
737
738
void TrQuant::checktransformsNxN( TransformUnit &tu, std::vector<TrMode> *trModes, const int maxCand, const ComponentID compID)
739
18.7k
{
740
18.7k
  CodingStructure &cs     = *tu.cs;
741
18.7k
  const CompArea& rect    = tu.blocks[compID];
742
18.7k
  const uint32_t   width  = rect.width;
743
18.7k
  const uint32_t   height = rect.height;
744
745
18.7k
  const CPelBuf resiBuf = cs.getResiBuf(rect);
746
747
18.7k
  CHECK(cs.sps->getMaxTbSize() < width, "Unsupported transformation size");
748
18.7k
  int                           pos = 0;
749
18.7k
  std::vector<TrCost>           trCosts;
750
18.7k
  std::vector<TrMode>::iterator it      = trModes->begin();
751
18.7k
  const double                  facBB[] = { 1.2, 1.3, 1.3, 1.4, 1.5 };
752
56.1k
  while (it != trModes->end())
753
37.4k
  {
754
37.4k
    tu.mtsIdx[compID] = it->first;
755
37.4k
    CoeffBuf tempCoeff(m_mtsCoeffs[tu.mtsIdx[compID]], rect);
756
37.4k
    if (tu.noResidual)
757
0
    {
758
0
      int sumAbs = 0;
759
0
      trCosts.push_back(TrCost(sumAbs, pos++));
760
0
      it++;
761
0
      continue;
762
0
    }
763
37.4k
    if (tu.mtsIdx[compID] == MTS_SKIP)
764
18.7k
    {
765
18.7k
      xTransformSkip(tu, compID, resiBuf, tempCoeff.buf);
766
18.7k
    }
767
18.7k
    else
768
18.7k
    {
769
18.7k
      xT(tu, compID, resiBuf, tempCoeff, width, height);
770
18.7k
    }
771
772
37.4k
    int sumAbs = 0;
773
6.70M
    for (int pos = 0; pos < width * height; pos++)
774
6.66M
    {
775
6.66M
      sumAbs += abs(tempCoeff.buf[pos]);
776
6.66M
    }
777
778
37.4k
    double scaleSAD = 1.0;
779
37.4k
    if (tu.mtsIdx[compID] == MTS_SKIP && ((floorLog2(width) + floorLog2(height)) & 1) == 1)
780
7.85k
    {
781
7.85k
      scaleSAD = 1.0 / 1.414213562;   // compensate for not scaling transform skip coefficients by 1/sqrt(2)
782
7.85k
    }
783
37.4k
    if (tu.mtsIdx[compID] == MTS_SKIP)
784
18.7k
    {
785
18.7k
      int trShift = getTransformShift(tu.cu->slice->sps->bitDepths[CH_L], rect.size(), tu.cu->slice->sps->getMaxLog2TrDynamicRange());
786
18.7k
      scaleSAD *= pow(2, trShift);
787
18.7k
    }
788
37.4k
    trCosts.push_back(TrCost(int(std::min<double>(sumAbs * scaleSAD, std::numeric_limits<int>::max())), pos++));
789
37.4k
    it++;
790
37.4k
  }
791
792
18.7k
  int                           numTests = 0;
793
18.7k
  std::vector<TrCost>::iterator itC      = trCosts.begin();
794
18.7k
  const double                  fac      = facBB[std::max(0, floorLog2(std::max(width, height)) - 2)];
795
18.7k
  const double                  thr      = fac * trCosts.begin()->first;
796
18.7k
  const double                  thrTS    = trCosts.begin()->first;
797
56.1k
  while (itC != trCosts.end())
798
37.4k
  {
799
37.4k
    const bool testTr               = itC->first <= (trModes->at(itC->second).first == 1 ? thrTS : thr) && numTests <= maxCand;
800
37.4k
    trModes->at(itC->second).second = testTr;
801
37.4k
    numTests += testTr;
802
37.4k
    itC++;
803
37.4k
  }
804
18.7k
}
805
806
uint32_t TrQuant::xGetLFNSTIntraMode( const Area& tuArea, const uint32_t dirMode )
807
1.78M
{
808
1.78M
  if (dirMode < 2)
809
1.00M
  {
810
1.00M
    return dirMode;
811
1.00M
  }
812
813
781k
  static const int modeShift[] = { 0, 6, 10, 12, 14, 15 };
814
815
781k
  const int width  = int(tuArea.width);
816
781k
  const int height = int(tuArea.height);
817
818
781k
  if (width > height && dirMode < 2 + modeShift[floorLog2(width) - floorLog2(height)])
819
218
  {
820
218
    return dirMode + (VDIA_IDX - 1) + (NUM_EXT_LUMA_MODE >> 1);
821
218
  }
822
781k
  else if (height > width && dirMode > VDIA_IDX - modeShift[floorLog2(height) - floorLog2(width)])
823
73.1k
  {
824
73.1k
    return dirMode - (VDIA_IDX + 1) + (NUM_EXT_LUMA_MODE >> 1) + NUM_LUMA_MODE;
825
73.1k
  }
826
827
708k
  return dirMode;
828
781k
}
829
830
831
bool TrQuant::xGetTransposeFlag(uint32_t intraMode)
832
1.78M
{
833
1.78M
  return ((intraMode >= NUM_LUMA_MODE) && (intraMode >= (NUM_LUMA_MODE + (NUM_EXT_LUMA_MODE >> 1))))
834
1.78M
         || ((intraMode < NUM_LUMA_MODE) && (intraMode > DIA_IDX));
835
1.78M
}
836
837
838
void TrQuant::xInvLfnst(const TransformUnit &tu, const ComponentID compID)
839
875k
{
840
875k
  const CompArea &area     = tu.blocks[compID];
841
875k
  const uint32_t  width    = area.width;
842
875k
  const uint32_t  height   = area.height;
843
875k
  const uint32_t  lfnstIdx = tu.cu->lfnstIdx;
844
875k
  if (lfnstIdx && tu.mtsIdx[compID] != MTS_SKIP && (CU::isSepTree(*tu.cu) ? true : isLuma(compID)))
845
534k
  {
846
534k
    const CodingUnit& cu = *tu.cs->getCU(area.pos(), toChannelType(compID), TREE_D);
847
534k
    const bool         whge3 = width >= 8 && height >= 8;
848
534k
    const ScanElement *scan =
849
534k
      whge3
850
534k
        ? g_coefTopLeftDiagScan8x8[Log2(width)] 
851
534k
        : getScanOrder(SCAN_GROUPED_4x4, Log2(area.width), Log2(area.height));
852
534k
    uint32_t intraMode = CU::getFinalIntraMode(cu, toChannelType(compID));
853
854
534k
    if (CU::isLMCMode( cu.intraDir[toChannelType(compID)]))
855
88.2k
    {
856
88.2k
      intraMode = CU::getCoLocatedIntraLumaMode(cu);
857
88.2k
    }
858
534k
    if (CU::isMIP(cu, toChannelType(compID)))
859
5.22k
    {
860
5.22k
      intraMode = PLANAR_IDX;
861
5.22k
    }
862
534k
    CHECK(intraMode >= NUM_INTRA_MODE - 1, "Invalid intra mode");
863
864
534k
    if (lfnstIdx < 3)
865
534k
    {
866
534k
      if (tu.cu->ispMode && isLuma(compID))
867
7.71k
      {
868
7.71k
        intraMode = xGetLFNSTIntraMode(tu.cu->blocks[compID], intraMode);
869
7.71k
      }
870
526k
      else
871
526k
        intraMode = xGetLFNSTIntraMode(tu.blocks[compID], intraMode);
872
534k
      bool      transposeFlag = xGetTransposeFlag(intraMode);
873
534k
      const int sbSize        = whge3 ? 8 : 4;
874
534k
      bool      tu4x4Flag     = (width == 4 && height == 4);
875
534k
      bool      tu8x8Flag     = (width == 8 && height == 8);
876
534k
      TCoeff *  lfnstTemp;
877
534k
      TCoeff *  coeffTemp;
878
534k
      int       y;
879
534k
      lfnstTemp                  = m_tempInMatrix;   // inverse spectral rearrangement
880
534k
      coeffTemp                  = m_plTempCoeff;
881
534k
      TCoeff *           dst     = lfnstTemp;
882
534k
      const ScanElement *scanPtr = scan;
883
9.07M
      for (y = 0; y < 16; y++)
884
8.54M
      {
885
8.54M
        *dst++ = coeffTemp[scanPtr->idx];
886
8.54M
        scanPtr++;
887
8.54M
      }
888
889
534k
      m_invLfnstNxN( m_tempInMatrix, m_tempOutMatrix, g_lfnstLut[intraMode], lfnstIdx - 1, sbSize, ( tu4x4Flag || tu8x8Flag ) ? 8 : 16 );
890
891
534k
      lfnstTemp = m_tempOutMatrix;   // inverse spectral rearrangement
892
893
534k
      if (transposeFlag)
894
48.8k
      {
895
48.8k
        if (sbSize == 4)
896
9.05k
        {
897
45.2k
          for (y = 0; y < 4; y++)
898
36.2k
          {
899
36.2k
            coeffTemp[0] = lfnstTemp[0];
900
36.2k
            coeffTemp[1] = lfnstTemp[4];
901
36.2k
            coeffTemp[2] = lfnstTemp[8];
902
36.2k
            coeffTemp[3] = lfnstTemp[12];
903
36.2k
            lfnstTemp++;
904
36.2k
            coeffTemp += width;
905
36.2k
          }
906
9.05k
        }
907
39.7k
        else   // ( sbSize == 8 )
908
39.7k
        {
909
358k
          for (y = 0; y < 8; y++)
910
318k
          {
911
318k
            coeffTemp[0] = lfnstTemp[0];
912
318k
            coeffTemp[1] = lfnstTemp[8];
913
318k
            coeffTemp[2] = lfnstTemp[16];
914
318k
            coeffTemp[3] = lfnstTemp[24];
915
318k
            if (y < 4)
916
159k
            {
917
159k
              coeffTemp[4] = lfnstTemp[32];
918
159k
              coeffTemp[5] = lfnstTemp[36];
919
159k
              coeffTemp[6] = lfnstTemp[40];
920
159k
              coeffTemp[7] = lfnstTemp[44];
921
159k
            }
922
318k
            lfnstTemp++;
923
318k
            coeffTemp += width;
924
318k
          }
925
39.7k
        }
926
48.8k
      }
927
485k
      else
928
485k
      {
929
3.63M
        for (y = 0; y < sbSize; y++)
930
3.15M
        {
931
3.15M
          uint32_t uiStride = (y < 4) ? sbSize : 4;
932
3.15M
          ::memcpy(coeffTemp, lfnstTemp, uiStride * sizeof(TCoeff));
933
3.15M
          lfnstTemp += uiStride;
934
3.15M
          coeffTemp += width;
935
3.15M
        }
936
485k
      }
937
534k
    }
938
534k
  }
939
875k
}
940
941
942
void TrQuant::xFwdLfnst(const TransformUnit &tu, const ComponentID compID, const bool loadTr)
943
2.12M
{
944
2.12M
  const CompArea &area     = tu.blocks[compID];
945
2.12M
  const uint32_t  width    = area.width;
946
2.12M
  const uint32_t  height   = area.height;
947
2.12M
  const uint32_t  lfnstIdx = tu.cu->lfnstIdx;
948
18.4E
  if (lfnstIdx && tu.mtsIdx[compID] != MTS_SKIP && (CU::isSepTree(*tu.cu) ? true : isLuma(compID)))
949
1.25M
  {
950
1.25M
    const CodingUnit& cu = *tu.cs->getCU(area.pos(), toChannelType(compID), TREE_D);
951
1.25M
    const bool         whge3 = width >= 8 && height >= 8;
952
1.25M
    const ScanElement *scan =
953
1.25M
      whge3
954
1.25M
        ? g_coefTopLeftDiagScan8x8[Log2(width)] 
955
1.25M
        : getScanOrder(SCAN_GROUPED_4x4, Log2(area.width), Log2(area.height));   
956
1.25M
    uint32_t intraMode = CU::getFinalIntraMode(cu, toChannelType(compID));
957
958
1.25M
    if (CU::isLMCMode(cu.intraDir[toChannelType(compID)]))
959
111k
    {
960
111k
      intraMode = CU::getCoLocatedIntraLumaMode(cu);
961
111k
    }
962
1.25M
    if (CU::isMIP(cu, toChannelType(compID)))
963
8.56k
    {
964
8.56k
      intraMode = PLANAR_IDX;
965
8.56k
    }
966
1.25M
    CHECK(intraMode >= NUM_INTRA_MODE - 1, "Invalid intra mode");
967
968
1.25M
    if (lfnstIdx < 3)
969
1.25M
    {
970
1.25M
      if (tu.cu->ispMode && isLuma(compID))
971
12.0k
      {
972
12.0k
        intraMode = xGetLFNSTIntraMode(tu.cu->blocks[compID], intraMode);
973
12.0k
      }
974
1.24M
      else
975
1.24M
      {
976
1.24M
        intraMode = xGetLFNSTIntraMode(tu.blocks[compID], intraMode);
977
1.24M
      }
978
1.25M
      bool      transposeFlag = xGetTransposeFlag(intraMode);
979
1.25M
      const int sbSize        = whge3 ? 8 : 4;
980
1.25M
      bool      tu4x4Flag     = (width == 4 && height == 4);
981
1.25M
      bool      tu8x8Flag     = (width == 8 && height == 8);
982
1.25M
      TCoeff*   lfnstTemp;
983
1.25M
      TCoeff*   coeffTemp;
984
1.25M
      TCoeff*   tempCoeff     = loadTr ? m_mtsCoeffs[tu.mtsIdx[compID]] : m_plTempCoeff;
985
986
1.25M
      int y;
987
1.25M
      lfnstTemp = m_tempInMatrix;   // forward low frequency non-separable transform
988
1.25M
      coeffTemp = tempCoeff;
989
990
1.25M
      if (transposeFlag)
991
267k
      {
992
267k
        if (sbSize == 4)
993
76.7k
        {
994
383k
          for (y = 0; y < 4; y++)
995
307k
          {
996
307k
            lfnstTemp[0]  = coeffTemp[0];
997
307k
            lfnstTemp[4]  = coeffTemp[1];
998
307k
            lfnstTemp[8]  = coeffTemp[2];
999
307k
            lfnstTemp[12] = coeffTemp[3];
1000
307k
            lfnstTemp++;
1001
307k
            coeffTemp += width;
1002
307k
          }
1003
76.7k
        }
1004
190k
        else   // ( sbSize == 8 )
1005
190k
        {
1006
1.71M
          for (y = 0; y < 8; y++)
1007
1.52M
          {
1008
1.52M
            lfnstTemp[0]  = coeffTemp[0];
1009
1.52M
            lfnstTemp[8]  = coeffTemp[1];
1010
1.52M
            lfnstTemp[16] = coeffTemp[2];
1011
1.52M
            lfnstTemp[24] = coeffTemp[3];
1012
1.52M
            if (y < 4)
1013
763k
            {
1014
763k
              lfnstTemp[32] = coeffTemp[4];
1015
763k
              lfnstTemp[36] = coeffTemp[5];
1016
763k
              lfnstTemp[40] = coeffTemp[6];
1017
763k
              lfnstTemp[44] = coeffTemp[7];
1018
763k
            }
1019
1.52M
            lfnstTemp++;
1020
1.52M
            coeffTemp += width;
1021
1.52M
          }
1022
190k
        }
1023
267k
      }
1024
987k
      else
1025
987k
      {
1026
7.48M
        for (y = 0; y < sbSize; y++)
1027
6.49M
        {
1028
6.49M
          uint32_t uiStride = (y < 4) ? sbSize : 4;
1029
6.49M
          ::memcpy(lfnstTemp, coeffTemp, uiStride * sizeof(TCoeff));
1030
6.49M
          lfnstTemp += uiStride;
1031
6.49M
          coeffTemp += width;
1032
6.49M
        }
1033
987k
      }
1034
1035
1.25M
      m_fwdLfnstNxN( m_tempInMatrix, m_tempOutMatrix, g_lfnstLut[intraMode], lfnstIdx - 1, sbSize, ( tu4x4Flag || tu8x8Flag ) ? 8 : 16 );
1036
1037
1.25M
      lfnstTemp                        = m_tempOutMatrix;   // forward spectral rearrangement
1038
1.25M
      coeffTemp                        = tempCoeff;
1039
1.25M
      const ScanElement *scanPtr       = scan;
1040
1.25M
      int                lfnstCoeffNum = (sbSize == 4) ? sbSize * sbSize : 48;
1041
47.8M
      for (y = 0; y < lfnstCoeffNum; y++)
1042
46.5M
      {
1043
46.5M
        coeffTemp[scanPtr->idx] = *lfnstTemp++;
1044
46.5M
        scanPtr++;
1045
46.5M
      }
1046
1.25M
    }
1047
1.25M
  }
1048
2.12M
}
1049
1050
void TrQuant::xTransformSkip(const TransformUnit& tu, const ComponentID& compID, const CPelBuf& resi, TCoeff* psCoeff)
1051
123k
{
1052
123k
  const CompArea& rect = tu.blocks[compID];
1053
123k
  const uint32_t width = rect.width;
1054
123k
  const uint32_t height = rect.height;
1055
1056
1.35M
  for (uint32_t y = 0, coefficientIndex = 0; y < height; y++)
1057
1.23M
  {
1058
14.7M
    for (uint32_t x = 0; x < width; x++, coefficientIndex++)
1059
13.5M
    {
1060
13.5M
      psCoeff[coefficientIndex] = TCoeff(resi.at(x, y));
1061
13.5M
    }
1062
1.23M
  }
1063
123k
}
1064
} // namespace vvenc
1065
1066
//! \}
1067