Coverage Report

Created: 2026-04-01 07:49

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/vvenc/source/Lib/EncoderLib/EncCu.cpp
Line
Count
Source
1
/* -----------------------------------------------------------------------------
2
The copyright in this software is being made available under the Clear BSD
3
License, included below. No patent rights, trademark rights and/or
4
other Intellectual Property Rights other than the copyrights concerning
5
the Software are granted under this license.
6
7
The Clear BSD License
8
9
Copyright (c) 2019-2026, Fraunhofer-Gesellschaft zur Förderung der angewandten Forschung e.V. & The VVenC Authors.
10
All rights reserved.
11
12
Redistribution and use in source and binary forms, with or without modification,
13
are permitted (subject to the limitations in the disclaimer below) provided that
14
the following conditions are met:
15
16
     * Redistributions of source code must retain the above copyright notice,
17
     this list of conditions and the following disclaimer.
18
19
     * Redistributions in binary form must reproduce the above copyright
20
     notice, this list of conditions and the following disclaimer in the
21
     documentation and/or other materials provided with the distribution.
22
23
     * Neither the name of the copyright holder nor the names of its
24
     contributors may be used to endorse or promote products derived from this
25
     software without specific prior written permission.
26
27
NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE GRANTED BY
28
THIS LICENSE. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
29
CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
30
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
31
PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
32
CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
33
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
34
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
35
BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
36
IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
37
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
38
POSSIBILITY OF SUCH DAMAGE.
39
40
41
------------------------------------------------------------------------------------------- */
42
43
44
/** \file     EncCu.cpp
45
    \brief    Coding Unit (CU) encoder class
46
*/
47
48
#include "EncCu.h"
49
#include "EncLib.h"
50
#include "Analyze.h"
51
#include "EncPicture.h"
52
#include "EncModeCtrl.h"
53
#include "BitAllocation.h"
54
#include "EncStage.h"
55
56
#include "CommonLib/dtrace_codingstruct.h"
57
#include "CommonLib/Picture.h"
58
#include "CommonLib/UnitTools.h"
59
#include "CommonLib/dtrace_buffer.h"
60
#include "CommonLib/TimeProfiler.h"
61
#include "CommonLib/SearchSpaceCounter.h"
62
63
#include <mutex>
64
#include <cmath>
65
#include <algorithm>
66
67
//! \ingroup EncoderLib
68
//! \{
69
70
namespace vvenc {
71
72
const MergeIdxPair EncCu::m_GeoModeTest[GEO_MAX_NUM_CANDS] = { MergeIdxPair{0, 1}, MergeIdxPair{1, 0}, MergeIdxPair{0, 2}, MergeIdxPair{1, 2}, MergeIdxPair{2, 0},
73
                                                               MergeIdxPair{2, 1}, MergeIdxPair{0, 3}, MergeIdxPair{1, 3}, MergeIdxPair{2, 3}, MergeIdxPair{3, 0},
74
                                                               MergeIdxPair{3, 1}, MergeIdxPair{3, 2}, MergeIdxPair{0, 4}, MergeIdxPair{1, 4}, MergeIdxPair{2, 4},
75
                                                               MergeIdxPair{3, 4}, MergeIdxPair{4, 0}, MergeIdxPair{4, 1}, MergeIdxPair{4, 2}, MergeIdxPair{4, 3},
76
                                                               MergeIdxPair{0, 5}, MergeIdxPair{1, 5}, MergeIdxPair{2, 5}, MergeIdxPair{3, 5}, MergeIdxPair{4, 5},
77
                                                               MergeIdxPair{5, 0}, MergeIdxPair{5, 1}, MergeIdxPair{5, 2}, MergeIdxPair{5, 3}, MergeIdxPair{5, 4} };
78
79
80
// Shape coefSquareCUsFasterFastMedium (2 x 5 x 2 x 2 x 2): preset (faster and fast + medium) x cusize x nspred x sptype x numcoef
81
82
const double EncCu::coefSquareCUsFasterFastMedium[2][5][2][2][2] = {
83
{{{{-1.00000000, -1.00000000, }, {-1.00000000, -1.00000000, }, },  {{-1.00000000, -1.00000000, }, {-1.00000000, -1.00000000, }, },  },
84
{{{-1.00000000, -1.00000000, }, {-1.00000000, -1.00000000, }, },  {{-1.00000000, -1.00000000, }, {-1.00000000, -1.00000000, }, },  },
85
{{{-1.00000000, -1.00000000, }, {0.07848505, 0.00225808, }, },  {{-1.00000000, -1.00000000, }, {0.07509575, 0.00204789, }, },  },
86
{{{-1.00000000, -1.00000000, }, {0.10833051, 0.00053144, }, },  {{-1.00000000, -1.00000000, }, {0.08304352, 0.00142876, }, },  },
87
{{{-1.00000000, -1.00000000, }, {-1.00000000, -1.00000000, }, },  {{-1.00000000, -1.00000000, }, {-1.00000000, -1.00000000, }, },  },
88
},
89
{{{{-1.00000000, -1.00000000, }, {-1.00000000, -1.00000000, }, },  {{-1.00000000, -1.00000000, }, {-1.00000000, -1.00000000, }, },  },
90
{{{-1.00000000, -1.00000000, }, {-1.00000000, -1.00000000, }, },  {{-1.00000000, -1.00000000, }, {-1.00000000, -1.00000000, }, },  },
91
{{{0.06852235, 0.00388054, }, {0.09236045, 0.00084528, }, },  {{0.06955832, 0.00289679, }, {0.09598522, 0.00096187, }, },  },
92
{{{0.07268085, 0.00302796, }, {0.09323753, 0.00050996, }, },  {{0.06123618, 0.00471601, }, {0.09253389, 0.00046826, }, },  },
93
{{{-1.00000000, -1.00000000, }, {-1.00000000, -1.00000000, }, },  {{-1.00000000, -1.00000000, }, {-1.00000000, -1.00000000, }, },  },
94
},
95
};
96
97
// Shape coefSquareCUsSlowSlower (2 x 5 x 2 x 5 x 2 x 2 x 2): preset (Slow + Slower) x cusize x nspred x qtdepth x mtdepth x sptype x numcoef
98
99
const double EncCu::coefSquareCUsSlowSlower[2][5][2][5][2][2][2] = {
100
{{{{{{-1.00000000, -1.00000000, }, {-1.00000000, -1.00000000, }, }, {{-1.00000000, -1.00000000, }, {-1.00000000, -1.00000000, }, }, },
101
{{{-1.00000000, -1.00000000, }, {-1.00000000, -1.00000000, }, }, {{-1.00000000, -1.00000000, }, {-1.00000000, -1.00000000, }, }, },
102
{{{-1.00000000, -1.00000000, }, {-1.00000000, -1.00000000, }, }, {{-1.00000000, -1.00000000, }, {-1.00000000, -1.00000000, }, }, },
103
{{{-1.00000000, -1.00000000, }, {-1.00000000, -1.00000000, }, }, {{-1.00000000, -1.00000000, }, {-1.00000000, -1.00000000, }, }, },
104
{{{-1.00000000, -1.00000000, }, {-1.00000000, -1.00000000, }, }, {{-1.00000000, -1.00000000, }, {-1.00000000, -1.00000000, }, }, },
105
},
106
{{{{-1.00000000, -1.00000000, }, {-1.00000000, -1.00000000, }, }, {{-1.00000000, -1.00000000, }, {-1.00000000, -1.00000000, }, }, },
107
{{{-1.00000000, -1.00000000, }, {-1.00000000, -1.00000000, }, }, {{-1.00000000, -1.00000000, }, {-1.00000000, -1.00000000, }, }, },
108
{{{-1.00000000, -1.00000000, }, {-1.00000000, -1.00000000, }, }, {{-1.00000000, -1.00000000, }, {-1.00000000, -1.00000000, }, }, },
109
{{{-1.00000000, -1.00000000, }, {-1.00000000, -1.00000000, }, }, {{-1.00000000, -1.00000000, }, {-1.00000000, -1.00000000, }, }, },
110
{{{-1.00000000, -1.00000000, }, {-1.00000000, -1.00000000, }, }, {{-1.00000000, -1.00000000, }, {-1.00000000, -1.00000000, }, }, },
111
},
112
},
113
{{{{{-1.00000000, -1.00000000, }, {-1.00000000, -1.00000000, }, }, {{-1.00000000, -1.00000000, }, {-1.00000000, -1.00000000, }, }, },
114
{{{-1.00000000, -1.00000000, }, {-1.00000000, -1.00000000, }, }, {{-1.00000000, -1.00000000, }, {-1.00000000, -1.00000000, }, }, },
115
{{{-1.00000000, -1.00000000, }, {-1.00000000, -1.00000000, }, }, {{-1.00000000, -1.00000000, }, {-1.00000000, -1.00000000, }, }, },
116
{{{-1.00000000, -1.00000000, }, {-1.00000000, -1.00000000, }, }, {{-1.00000000, -1.00000000, }, {-1.00000000, -1.00000000, }, }, },
117
{{{-1.00000000, -1.00000000, }, {-1.00000000, -1.00000000, }, }, {{-1.00000000, -1.00000000, }, {-1.00000000, -1.00000000, }, }, },
118
},
119
{{{{-1.00000000, -1.00000000, }, {-1.00000000, -1.00000000, }, }, {{-1.00000000, -1.00000000, }, {-1.00000000, -1.00000000, }, }, },
120
{{{-1.00000000, -1.00000000, }, {-1.00000000, -1.00000000, }, }, {{-1.00000000, -1.00000000, }, {-1.00000000, -1.00000000, }, }, },
121
{{{-1.00000000, -1.00000000, }, {-1.00000000, -1.00000000, }, }, {{-1.00000000, -1.00000000, }, {-1.00000000, -1.00000000, }, }, },
122
{{{-1.00000000, -1.00000000, }, {-1.00000000, -1.00000000, }, }, {{-1.00000000, -1.00000000, }, {-1.00000000, -1.00000000, }, }, },
123
{{{-1.00000000, -1.00000000, }, {-1.00000000, -1.00000000, }, }, {{-1.00000000, -1.00000000, }, {-1.00000000, -1.00000000, }, }, },
124
},
125
},
126
{{{{{-1.00000000, -1.00000000, }, {-1.00000000, -1.00000000, }, }, {{-1.00000000, -1.00000000, }, {-1.00000000, -1.00000000, }, }, },
127
{{{0.06057349, 0.00447803, }, {-1.00000000, -1.00000000, }, }, {{-1.00000000, -1.00000000, }, {-1.00000000, -1.00000000, }, }, },
128
{{{-1.00000000, -1.00000000, }, {-1.00000000, -1.00000000, }, }, {{0.06948736, 0.00327774, }, {0.06396861, 0.00789923, }, }, },
129
{{{-1.00000000, -1.00000000, }, {-1.00000000, -1.00000000, }, }, {{-1.00000000, -1.00000000, }, {-1.00000000, -1.00000000, }, }, },
130
{{{-1.00000000, -1.00000000, }, {-1.00000000, -1.00000000, }, }, {{-1.00000000, -1.00000000, }, {-1.00000000, -1.00000000, }, }, },
131
},
132
{{{{-1.00000000, -1.00000000, }, {-1.00000000, -1.00000000, }, }, {{-1.00000000, -1.00000000, }, {-1.00000000, -1.00000000, }, }, },
133
{{{0.05526461, 0.00436703, }, {-1.00000000, -1.00000000, }, }, {{-1.00000000, -1.00000000, }, {-1.00000000, -1.00000000, }, }, },
134
{{{-1.00000000, -1.00000000, }, {-1.00000000, -1.00000000, }, }, {{0.05661641, 0.00546358, }, {0.06680048, 0.00670604, }, }, },
135
{{{-1.00000000, -1.00000000, }, {-1.00000000, -1.00000000, }, }, {{-1.00000000, -1.00000000, }, {-1.00000000, -1.00000000, }, }, },
136
{{{-1.00000000, -1.00000000, }, {-1.00000000, -1.00000000, }, }, {{-1.00000000, -1.00000000, }, {-1.00000000, -1.00000000, }, }, },
137
},
138
},
139
{{{{{-1.00000000, -1.00000000, }, {-1.00000000, -1.00000000, }, }, {{-1.00000000, -1.00000000, }, {-1.00000000, -1.00000000, }, }, },
140
{{{-1.00000000, -1.00000000, }, {-1.00000000, -1.00000000, }, }, {{0.06556926, 0.00455697, }, {0.06803347, 0.00365396, }, }, },
141
{{{-1.00000000, -1.00000000, }, {-1.00000000, -1.00000000, }, }, {{-1.00000000, -1.00000000, }, {-1.00000000, -1.00000000, }, }, },
142
{{{-1.00000000, -1.00000000, }, {-1.00000000, -1.00000000, }, }, {{-1.00000000, -1.00000000, }, {-1.00000000, -1.00000000, }, }, },
143
{{{-1.00000000, -1.00000000, }, {-1.00000000, -1.00000000, }, }, {{-1.00000000, -1.00000000, }, {-1.00000000, -1.00000000, }, }, },
144
},
145
{{{{-1.00000000, -1.00000000, }, {-1.00000000, -1.00000000, }, }, {{-1.00000000, -1.00000000, }, {-1.00000000, -1.00000000, }, }, },
146
{{{-1.00000000, -1.00000000, }, {-1.00000000, -1.00000000, }, }, {{0.06394328, 0.00403533, }, {0.06688063, 0.00272684, }, }, },
147
{{{-1.00000000, -1.00000000, }, {-1.00000000, -1.00000000, }, }, {{-1.00000000, -1.00000000, }, {-1.00000000, -1.00000000, }, }, },
148
{{{-1.00000000, -1.00000000, }, {-1.00000000, -1.00000000, }, }, {{-1.00000000, -1.00000000, }, {-1.00000000, -1.00000000, }, }, },
149
{{{-1.00000000, -1.00000000, }, {-1.00000000, -1.00000000, }, }, {{-1.00000000, -1.00000000, }, {-1.00000000, -1.00000000, }, }, },
150
},
151
},
152
{{{{{-1.00000000, -1.00000000, }, {-1.00000000, -1.00000000, }, }, {{0.05533525, 0.00235007, }, {0.07184475, 0.00086154, }, }, },
153
{{{-1.00000000, -1.00000000, }, {-1.00000000, -1.00000000, }, }, {{-1.00000000, -1.00000000, }, {-1.00000000, -1.00000000, }, }, },
154
{{{-1.00000000, -1.00000000, }, {-1.00000000, -1.00000000, }, }, {{-1.00000000, -1.00000000, }, {-1.00000000, -1.00000000, }, }, },
155
{{{-1.00000000, -1.00000000, }, {-1.00000000, -1.00000000, }, }, {{-1.00000000, -1.00000000, }, {-1.00000000, -1.00000000, }, }, },
156
{{{-1.00000000, -1.00000000, }, {-1.00000000, -1.00000000, }, }, {{-1.00000000, -1.00000000, }, {-1.00000000, -1.00000000, }, }, },
157
},
158
{{{{-1.00000000, -1.00000000, }, {-1.00000000, -1.00000000, }, }, {{-1.00000000, -1.00000000, }, {-1.00000000, -1.00000000, }, }, },
159
{{{-1.00000000, -1.00000000, }, {-1.00000000, -1.00000000, }, }, {{-1.00000000, -1.00000000, }, {-1.00000000, -1.00000000, }, }, },
160
{{{-1.00000000, -1.00000000, }, {-1.00000000, -1.00000000, }, }, {{-1.00000000, -1.00000000, }, {-1.00000000, -1.00000000, }, }, },
161
{{{-1.00000000, -1.00000000, }, {-1.00000000, -1.00000000, }, }, {{-1.00000000, -1.00000000, }, {-1.00000000, -1.00000000, }, }, },
162
{{{-1.00000000, -1.00000000, }, {-1.00000000, -1.00000000, }, }, {{-1.00000000, -1.00000000, }, {-1.00000000, -1.00000000, }, }, },
163
},
164
},
165
},
166
{{{{{{-1.00000000, -1.00000000, }, {-1.00000000, -1.00000000, }, }, {{-1.00000000, -1.00000000, }, {-1.00000000, -1.00000000, }, }, },
167
{{{-1.00000000, -1.00000000, }, {-1.00000000, -1.00000000, }, }, {{-1.00000000, -1.00000000, }, {-1.00000000, -1.00000000, }, }, },
168
{{{-1.00000000, -1.00000000, }, {-1.00000000, -1.00000000, }, }, {{-1.00000000, -1.00000000, }, {-1.00000000, -1.00000000, }, }, },
169
{{{-1.00000000, -1.00000000, }, {-1.00000000, -1.00000000, }, }, {{-1.00000000, -1.00000000, }, {-1.00000000, -1.00000000, }, }, },
170
{{{-1.00000000, -1.00000000, }, {-1.00000000, -1.00000000, }, }, {{-1.00000000, -1.00000000, }, {-1.00000000, -1.00000000, }, }, },
171
},
172
{{{{-1.00000000, -1.00000000, }, {-1.00000000, -1.00000000, }, }, {{-1.00000000, -1.00000000, }, {-1.00000000, -1.00000000, }, }, },
173
{{{-1.00000000, -1.00000000, }, {-1.00000000, -1.00000000, }, }, {{-1.00000000, -1.00000000, }, {-1.00000000, -1.00000000, }, }, },
174
{{{-1.00000000, -1.00000000, }, {-1.00000000, -1.00000000, }, }, {{-1.00000000, -1.00000000, }, {-1.00000000, -1.00000000, }, }, },
175
{{{-1.00000000, -1.00000000, }, {-1.00000000, -1.00000000, }, }, {{-1.00000000, -1.00000000, }, {-1.00000000, -1.00000000, }, }, },
176
{{{-1.00000000, -1.00000000, }, {-1.00000000, -1.00000000, }, }, {{-1.00000000, -1.00000000, }, {-1.00000000, -1.00000000, }, }, },
177
},
178
},
179
{{{{{-1.00000000, -1.00000000, }, {-1.00000000, -1.00000000, }, }, {{-1.00000000, -1.00000000, }, {-1.00000000, -1.00000000, }, }, },
180
{{{0.04770815, 0.00774597, }, {-1.00000000, -1.00000000, }, }, {{-1.00000000, -1.00000000, }, {-1.00000000, -1.00000000, }, }, },
181
{{{0.05509813, 0.00812349, }, {-1.00000000, -1.00000000, }, }, {{-1.00000000, -1.00000000, }, {-1.00000000, -1.00000000, }, }, },
182
{{{-1.00000000, -1.00000000, }, {-1.00000000, -1.00000000, }, }, {{0.04719064, 0.01023887, }, {0.04641434, 0.01279769, }, }, },
183
{{{-1.00000000, -1.00000000, }, {-1.00000000, -1.00000000, }, }, {{-1.00000000, -1.00000000, }, {-1.00000000, -1.00000000, }, }, },
184
},
185
{{{{-1.00000000, -1.00000000, }, {-1.00000000, -1.00000000, }, }, {{-1.00000000, -1.00000000, }, {-1.00000000, -1.00000000, }, }, },
186
{{{0.02777251, 0.00811989, }, {-1.00000000, -1.00000000, }, }, {{-1.00000000, -1.00000000, }, {-1.00000000, -1.00000000, }, }, },
187
{{{0.04848973, 0.00628111, }, {-1.00000000, -1.00000000, }, }, {{-1.00000000, -1.00000000, }, {-1.00000000, -1.00000000, }, }, },
188
{{{-1.00000000, -1.00000000, }, {-1.00000000, -1.00000000, }, }, {{0.04924294, 0.00769725, }, {0.04491680, 0.01120645, }, }, },
189
{{{-1.00000000, -1.00000000, }, {-1.00000000, -1.00000000, }, }, {{-1.00000000, -1.00000000, }, {-1.00000000, -1.00000000, }, }, },
190
},
191
},
192
{{{{{-1.00000000, -1.00000000, }, {-1.00000000, -1.00000000, }, }, {{-1.00000000, -1.00000000, }, {-1.00000000, -1.00000000, }, }, },
193
{{{0.06856533, 0.00418949, }, {-1.00000000, -1.00000000, }, }, {{-1.00000000, -1.00000000, }, {-1.00000000, -1.00000000, }, }, },
194
{{{-1.00000000, -1.00000000, }, {-1.00000000, -1.00000000, }, }, {{0.07059085, 0.00348835, }, {0.06051544, 0.00821365, }, }, },
195
{{{-1.00000000, -1.00000000, }, {-1.00000000, -1.00000000, }, }, {{-1.00000000, -1.00000000, }, {-1.00000000, -1.00000000, }, }, },
196
{{{-1.00000000, -1.00000000, }, {-1.00000000, -1.00000000, }, }, {{-1.00000000, -1.00000000, }, {-1.00000000, -1.00000000, }, }, },
197
},
198
{{{{-1.00000000, -1.00000000, }, {-1.00000000, -1.00000000, }, }, {{-1.00000000, -1.00000000, }, {-1.00000000, -1.00000000, }, }, },
199
{{{0.07461087, 0.00229937, }, {-1.00000000, -1.00000000, }, }, {{-1.00000000, -1.00000000, }, {-1.00000000, -1.00000000, }, }, },
200
{{{-1.00000000, -1.00000000, }, {-1.00000000, -1.00000000, }, }, {{0.05893714, 0.00513168, }, {0.05809189, 0.00772994, }, }, },
201
{{{-1.00000000, -1.00000000, }, {-1.00000000, -1.00000000, }, }, {{-1.00000000, -1.00000000, }, {-1.00000000, -1.00000000, }, }, },
202
{{{-1.00000000, -1.00000000, }, {-1.00000000, -1.00000000, }, }, {{-1.00000000, -1.00000000, }, {-1.00000000, -1.00000000, }, }, },
203
},
204
},
205
{{{{{0.06922080, 0.00274816, }, {-1.00000000, -1.00000000, }, }, {{-1.00000000, -1.00000000, }, {-1.00000000, -1.00000000, }, }, },
206
{{{-1.00000000, -1.00000000, }, {-1.00000000, -1.00000000, }, }, {{0.06176300, 0.00374356, }, {0.06085891, 0.00361274, }, }, },
207
{{{-1.00000000, -1.00000000, }, {-1.00000000, -1.00000000, }, }, {{-1.00000000, -1.00000000, }, {-1.00000000, -1.00000000, }, }, },
208
{{{-1.00000000, -1.00000000, }, {-1.00000000, -1.00000000, }, }, {{-1.00000000, -1.00000000, }, {-1.00000000, -1.00000000, }, }, },
209
{{{-1.00000000, -1.00000000, }, {-1.00000000, -1.00000000, }, }, {{-1.00000000, -1.00000000, }, {-1.00000000, -1.00000000, }, }, },
210
},
211
{{{{0.06611224, 0.00256748, }, {-1.00000000, -1.00000000, }, }, {{-1.00000000, -1.00000000, }, {-1.00000000, -1.00000000, }, }, },
212
{{{-1.00000000, -1.00000000, }, {-1.00000000, -1.00000000, }, }, {{0.06364765, 0.00282819, }, {0.05888407, 0.00320947, }, }, },
213
{{{-1.00000000, -1.00000000, }, {-1.00000000, -1.00000000, }, }, {{-1.00000000, -1.00000000, }, {-1.00000000, -1.00000000, }, }, },
214
{{{-1.00000000, -1.00000000, }, {-1.00000000, -1.00000000, }, }, {{-1.00000000, -1.00000000, }, {-1.00000000, -1.00000000, }, }, },
215
{{{-1.00000000, -1.00000000, }, {-1.00000000, -1.00000000, }, }, {{-1.00000000, -1.00000000, }, {-1.00000000, -1.00000000, }, }, },
216
},
217
},
218
{{{{{-1.00000000, -1.00000000, }, {-1.00000000, -1.00000000, }, }, {{0.05637817, 0.00184637, }, {0.06937475, 0.00091860, }, }, },
219
{{{-1.00000000, -1.00000000, }, {-1.00000000, -1.00000000, }, }, {{-1.00000000, -1.00000000, }, {-1.00000000, -1.00000000, }, }, },
220
{{{-1.00000000, -1.00000000, }, {-1.00000000, -1.00000000, }, }, {{-1.00000000, -1.00000000, }, {-1.00000000, -1.00000000, }, }, },
221
{{{-1.00000000, -1.00000000, }, {-1.00000000, -1.00000000, }, }, {{-1.00000000, -1.00000000, }, {-1.00000000, -1.00000000, }, }, },
222
{{{-1.00000000, -1.00000000, }, {-1.00000000, -1.00000000, }, }, {{-1.00000000, -1.00000000, }, {-1.00000000, -1.00000000, }, }, },
223
},
224
{{{{-1.00000000, -1.00000000, }, {-1.00000000, -1.00000000, }, }, {{-1.00000000, -1.00000000, }, {-1.00000000, -1.00000000, }, }, },
225
{{{-1.00000000, -1.00000000, }, {-1.00000000, -1.00000000, }, }, {{-1.00000000, -1.00000000, }, {-1.00000000, -1.00000000, }, }, },
226
{{{-1.00000000, -1.00000000, }, {-1.00000000, -1.00000000, }, }, {{-1.00000000, -1.00000000, }, {-1.00000000, -1.00000000, }, }, },
227
{{{-1.00000000, -1.00000000, }, {-1.00000000, -1.00000000, }, }, {{-1.00000000, -1.00000000, }, {-1.00000000, -1.00000000, }, }, },
228
{{{-1.00000000, -1.00000000, }, {-1.00000000, -1.00000000, }, }, {{-1.00000000, -1.00000000, }, {-1.00000000, -1.00000000, }, }, },
229
},
230
},
231
},
232
};
233
234
// ====================================================================================================================
235
EncCu::EncCu()
236
0
  : m_CtxCache          ( nullptr )
237
0
  , m_globalCtuQpVector ( nullptr )
238
0
  , m_wppMutex          ( nullptr )
239
0
  , m_CABACEstimator    ( nullptr )
240
0
{
241
0
}
242
243
void EncCu::initPic( Picture* pic )
244
0
{
245
0
  const ReshapeData& reshapeData = pic->reshapeData;
246
0
  m_cRdCost.setReshapeParams( reshapeData.getReshapeLumaLevelToWeightPLUT(), reshapeData.getChromaWeight() );
247
0
  m_cInterSearch.setSearchRange( pic->cs->slice, *m_pcEncCfg );
248
249
0
  m_wppMutex = (m_pcEncCfg->m_numThreads > 0 ) ? &pic->wppMutex : nullptr;
250
0
}
251
252
void EncCu::initSlice( const Slice* slice )
253
0
{
254
0
  m_cTrQuant.setLambdas( slice->getLambdas() );
255
0
  m_cRdCost.setLambda( slice->getLambdas()[0], slice->sps->bitDepths );
256
0
}
257
258
void EncCu::setCtuEncRsrc( CABACWriter* cabacEstimator, CtxCache* ctxCache, ReuseUniMv* pReuseUniMv, BlkUniMvInfoBuffer* pBlkUniMvInfoBuffer, AffineProfList* pAffineProfList, IbcBvCand* pCachedBvs )
259
0
{
260
0
  m_CABACEstimator = cabacEstimator;
261
0
  m_CtxCache       = ctxCache;
262
0
  m_cIntraSearch.setCtuEncRsrc( cabacEstimator, ctxCache );
263
0
  m_cInterSearch.setCtuEncRsrc( cabacEstimator, ctxCache, pReuseUniMv, pBlkUniMvInfoBuffer, pAffineProfList, pCachedBvs );
264
0
}
265
266
void EncCu::setUpLambda (Slice& slice, const double dLambda, const int iQP, const bool setSliceLambda, const bool saveUnadjusted)
267
0
{
268
  // store lambda
269
0
  m_cRdCost.setLambda( dLambda, slice.sps->bitDepths );
270
271
  // for RDO
272
  // in RdCost there is only one lambda because the luma and chroma bits are not separated, instead we weight the distortion of chroma.
273
0
  double dLambdas[MAX_NUM_COMP] = { dLambda };
274
0
  for( uint32_t compIdx = 1; compIdx < MAX_NUM_COMP; compIdx++ )
275
0
  {
276
0
    const ComponentID compID = ComponentID( compIdx );
277
0
    int chromaQPOffset       = slice.pps->chromaQpOffset[compID] + slice.sliceChromaQpDelta[ compID ];
278
0
    int qpc = slice.sps->chromaQpMappingTable.getMappedChromaQpValue(compID, iQP) + chromaQPOffset;
279
0
    double tmpWeight         = pow( 2.0, ( iQP - qpc ) / 3.0 );  // takes into account of the chroma qp mapping and chroma qp Offset
280
0
    if( m_pcEncCfg->m_DepQuantEnabled/* && !( m_pcEncCfg->getLFNST() ) */)
281
0
    {
282
0
      tmpWeight *= ( m_pcEncCfg->m_GOPSize >= 8 ? pow( 2.0, 0.1/3.0 ) : pow( 2.0, 0.2/3.0 ) );  // increase chroma weight for dependent quantization (in order to reduce bit rate shift from chroma to luma)
283
0
    }
284
0
    m_cRdCost.setDistortionWeight( compID, tmpWeight );
285
0
    dLambdas[compIdx] = dLambda / tmpWeight;
286
0
  }
287
288
  // for RDOQ
289
0
  m_cTrQuant.setLambdas( dLambdas );
290
291
  // for SAO, ALF
292
0
  if (setSliceLambda)
293
0
  {
294
0
    slice.setLambdas( dLambdas );
295
0
  }
296
0
  if( saveUnadjusted )
297
0
  {
298
0
    m_cRdCost.saveUnadjustedLambda();
299
0
  }
300
0
}
301
302
void EncCu::updateLambda(const Slice& slice, const double ctuLambda, const int ctuQP, const int newQP, const bool saveUnadjusted)
303
0
{
304
0
  const double  corrFactor = pow (2.0, double (newQP - ctuQP) / 3.0);
305
0
  const double  newLambda  = ctuLambda * corrFactor;
306
0
  const double* oldLambdas = slice.getLambdas(); // assumes prior setUpLambda (slice, ctuLambda) call!
307
0
  const double  newLambdas[MAX_NUM_COMP] = { oldLambdas[COMP_Y] * corrFactor, oldLambdas[COMP_Cb] * corrFactor, oldLambdas[COMP_Cr] * corrFactor };
308
309
0
  m_cTrQuant.setLambdas ( newLambdas);
310
0
  m_cRdCost.setLambda   ( newLambda, slice.sps->bitDepths);
311
312
0
  if (saveUnadjusted)
313
0
  {
314
0
    m_cRdCost.saveUnadjustedLambda(); // TODO hlm: check if this actually improves the overall quality
315
0
  }
316
0
}
317
318
void EncCu::init( const VVEncCfg& encCfg, const SPS& sps, std::vector<int>* const globalCtuQpVector, Ctx* syncPicCtx, RateCtrl* pRateCtrl )
319
0
{
320
0
  DecCu::init( &m_cTrQuant, &m_cIntraSearch, &m_cInterSearch, encCfg.m_internChromaFormat );
321
0
  m_cRdCost.create     ();
322
0
  m_cRdCost.setCostMode( encCfg.m_costMode );
323
0
  if ( encCfg.m_lumaReshapeEnable || encCfg.m_lumaLevelToDeltaQPEnabled )
324
0
  {
325
0
    m_cRdCost.setReshapeInfo( encCfg.m_lumaReshapeEnable ? encCfg.m_reshapeSignalType : RESHAPE_SIGNAL_PQ, encCfg.m_internalBitDepth[ CH_L ], encCfg.m_internChromaFormat );
326
0
  }
327
328
0
  m_modeCtrl.init     ( encCfg, &m_cRdCost );
329
0
  m_cIntraSearch.init ( encCfg, &m_cTrQuant, &m_cRdCost, &m_SortedPelUnitBufs, m_unitCache );
330
0
  m_cInterSearch.init ( encCfg, &m_cTrQuant, &m_cRdCost, &m_modeCtrl, m_cIntraSearch.getSaveCSBuf() );
331
0
  m_cTrQuant.init     ( nullptr, encCfg.m_RDOQ, encCfg.m_useRDOQTS, false, true, encCfg.m_quantThresholdVal );
332
333
0
  m_syncPicCtx = syncPicCtx;                         ///< context storage for state of contexts at the wavefront/WPP/entropy-coding-sync second CTU of tile-row used for estimation
334
0
  m_pcRateCtrl = pRateCtrl;
335
336
  // Initialise scaling lists: The encoder will only use the SPS scaling lists. The PPS will never be marked present.
337
0
  const int maxLog2TrDynamicRange[ MAX_NUM_CH ] = { sps.getMaxLog2TrDynamicRange(), sps.getMaxLog2TrDynamicRange() };
338
0
  m_cTrQuant.getQuant()->setFlatScalingList( maxLog2TrDynamicRange, sps.bitDepths );
339
340
0
  m_pcEncCfg       = &encCfg;
341
342
0
  m_GeoCostList.init( encCfg.m_maxNumGeoCand );
343
344
0
  unsigned      uiMaxSize    = encCfg.m_CTUSize;
345
0
  ChromaFormat  chromaFormat = encCfg.m_internChromaFormat;
346
347
0
  Area ctuArea = Area( 0, 0, uiMaxSize, uiMaxSize );
348
349
0
  m_mergeItemList.init( encCfg.m_maxMergeRdCandNumTotal, m_pcEncCfg->m_Geo > 1 ? 3 : 1, chromaFormat, uiMaxSize, uiMaxSize );
350
351
0
  for( int i = 0; i < maxCuDepth; i++ )
352
0
  {
353
0
    Area area = Area( 0, 0, uiMaxSize >> ( i >> 1 ), uiMaxSize >> ( ( i + 1 ) >> 1 ) );
354
355
0
    if( area.width < (1 << MIN_CU_LOG2) || area.height < (1 << MIN_CU_LOG2) )
356
0
    {
357
0
      m_pTempCS[i] = m_pBestCS[i] = nullptr;
358
0
      continue;
359
0
    }
360
361
0
    m_pTempCS[i] = new CodingStructure( m_unitCache, nullptr );
362
0
    m_pBestCS[i] = new CodingStructure( m_unitCache, nullptr );
363
364
0
    m_pTempCS[i]->createForSearch( chromaFormat, area );
365
0
    m_pBestCS[i]->createForSearch( chromaFormat, area );
366
367
0
    m_pOrgBuffer[i].create( chromaFormat, area );
368
0
    m_pRspBuffer[i].create( CHROMA_400, area );
369
0
  }
370
371
0
  m_pTempCS2 = new CodingStructure( m_unitCache, nullptr );
372
0
  m_pBestCS2 = new CodingStructure( m_unitCache, nullptr );
373
374
0
  m_pTempCS2->createForSearch( chromaFormat, ctuArea );
375
0
  m_pBestCS2->createForSearch( chromaFormat, ctuArea );
376
377
0
  m_cuChromaQpOffsetIdxPlus1 = 0;
378
0
  m_tempQpDiff = 0;
379
0
  m_globalCtuQpVector = globalCtuQpVector;
380
381
0
  m_SortedPelUnitBufs.create( chromaFormat, uiMaxSize, uiMaxSize );
382
383
0
  for( uint8_t i = 0; i < MAX_TMP_BUFS; i++)
384
0
  {
385
0
    m_aTmpStorageLCU[i].create(chromaFormat, Area(0, 0, uiMaxSize, uiMaxSize));
386
0
  }
387
0
  for (unsigned ui = 0; ui < MRG_MAX_NUM_CANDS; ui++)
388
0
  {
389
0
    m_acMergeTmpBuffer[ui].create(chromaFormat, Area(0, 0, uiMaxSize, uiMaxSize));
390
0
  }
391
392
0
  const unsigned maxDepth = 2 * MAX_CU_SIZE_IDX;
393
0
  m_CtxBuffer.resize( maxDepth );
394
0
  m_CurrCtx = 0;
395
0
  if( encCfg.m_EDO )
396
0
    m_dbBuffer.create( chromaFormat, Area( 0, 0, uiMaxSize, uiMaxSize ), 0, 8 );
397
398
0
  m_MergeSimpleFlag = 0;
399
0
  m_tileIdx = 0;
400
0
}
401
402
403
void EncCu::destroy()
404
0
{
405
0
  for( int i = 0; i < maxCuDepth; i++ )
406
0
  {
407
0
    if( m_pTempCS[i] )
408
0
    {
409
0
      m_pTempCS[i]->destroy();
410
0
      delete m_pTempCS[i]; m_pTempCS[i] = nullptr;
411
0
    }
412
413
0
    if( m_pBestCS[i] )
414
0
    {
415
0
      m_pBestCS[i]->destroy();
416
0
      delete m_pBestCS[i]; m_pBestCS[i] = nullptr;
417
0
    }
418
419
0
    m_pOrgBuffer[i].destroy();
420
0
    m_pRspBuffer[i].destroy();
421
0
  }
422
423
0
  m_pTempCS2->destroy();
424
0
  m_pBestCS2->destroy();
425
426
0
  delete m_pTempCS2; m_pTempCS2 = nullptr;
427
0
  delete m_pBestCS2; m_pBestCS2 = nullptr;
428
429
0
  m_SortedPelUnitBufs.destroy();
430
431
0
  for( uint8_t i = 0; i < MAX_TMP_BUFS; i++)
432
0
  {
433
0
    m_aTmpStorageLCU[i].destroy();
434
0
  }
435
0
  for (unsigned ui = 0; ui < MRG_MAX_NUM_CANDS; ui++)
436
0
  {
437
0
    m_acMergeTmpBuffer[ui].destroy();
438
0
  }
439
440
441
0
  m_dbBuffer.destroy();
442
0
}
443
444
445
EncCu::~EncCu()
446
0
{
447
0
  destroy();
448
0
}
449
450
// ====================================================================================================================
451
// Public member functions
452
// ====================================================================================================================
453
454
void EncCu::encodeCtu( Picture* pic, int (&prevQP)[MAX_NUM_CH], uint32_t ctuXPosInCtus, uint32_t ctuYPosInCtus )
455
0
{
456
0
  CodingStructure&     cs          = *pic->cs;
457
0
  Slice*               slice       = cs.slice;
458
0
  const PreCalcValues& pcv         = *cs.pcv;
459
460
#if ENABLE_MEASURE_SEARCH_SPACE
461
  if( ctuXPosInCtus == 0 && ctuYPosInCtus == 0 )
462
  {
463
    g_searchSpaceAcc.picW = pic->lwidth();
464
    g_searchSpaceAcc.picH = pic->lheight();
465
    g_searchSpaceAcc.addSlice( slice->isIntra(), slice->depth );
466
  }
467
468
#endif
469
0
  const int ctuRsAddr                 = ctuYPosInCtus * pcv.widthInCtus + ctuXPosInCtus;
470
471
0
  const Position pos (ctuXPosInCtus * pcv.maxCUSize, ctuYPosInCtus * pcv.maxCUSize);
472
0
  const UnitArea ctuArea( cs.area.chromaFormat, Area( pos.x, pos.y, pcv.maxCUSize, pcv.maxCUSize ) );
473
0
  DTRACE_UPDATE( g_trace_ctx, std::make_pair( "ctu", ctuRsAddr ) );
474
475
0
  const int tileXPosInCtus = cs.pps->tileColBd[cs.pps->ctuToTileCol[ctuXPosInCtus]];
476
0
  const int tileYPosInCtus = cs.pps->tileRowBd[cs.pps->ctuToTileRow[ctuYPosInCtus]];
477
478
0
  if( ( cs.slice->sliceType != VVENC_I_SLICE || cs.sps->IBC ) && ctuXPosInCtus == tileXPosInCtus )
479
0
  {
480
0
    const int tileRowId = cs.pps->getTileLineId( ctuXPosInCtus, ctuYPosInCtus );
481
0
    cs.motionLutBuf[tileRowId].lut.resize( 0 );
482
0
    cs.motionLutBuf[tileRowId].lutIbc.resize( 0 );
483
0
  }
484
485
0
  if( ( m_pcEncCfg->m_ensureWppBitEqual || m_pcEncCfg->m_entropyCodingSyncEnabled ) && ctuXPosInCtus == tileXPosInCtus )
486
0
  {
487
0
    m_CABACEstimator->initCtxModels( *slice );
488
489
0
    if( m_pcEncCfg->m_entropyCodingSyncEnabled && ( ctuYPosInCtus > tileYPosInCtus ) )
490
0
    {
491
0
      m_CABACEstimator->getCtx() = m_syncPicCtx[slice->pps->getTileLineId( ctuXPosInCtus, ctuYPosInCtus - 1 )];
492
0
    }
493
494
0
    prevQP[CH_L] = prevQP[CH_C] = slice->sliceQp; // hlm: call CU::predictQP() here!
495
0
  }
496
0
  else if( ctuXPosInCtus == tileXPosInCtus && ctuYPosInCtus == tileYPosInCtus )
497
0
  {
498
0
    m_CABACEstimator->initCtxModels( *slice );
499
0
    prevQP[CH_L] = prevQP[CH_C] = slice->sliceQp; // hlm: call CU::predictQP() here!
500
0
  }
501
502
0
  xCompressCtu( cs, ctuArea, ctuRsAddr, prevQP );
503
504
0
  m_CABACEstimator->resetBits();
505
0
  m_CABACEstimator->coding_tree_unit( cs, ctuArea, prevQP, ctuRsAddr, true, true );
506
507
  // Store probabilities of second CTU in line into buffer - used only if wavefront-parallel-processing is enabled.
508
0
  if( ctuXPosInCtus == tileXPosInCtus && m_pcEncCfg->m_entropyCodingSyncEnabled )
509
0
  {
510
0
    m_syncPicCtx[slice->pps->getTileLineId( ctuXPosInCtus, ctuYPosInCtus )] = m_CABACEstimator->getCtx();
511
0
  }
512
513
0
  DTRACE_AREA_CRC( g_trace_ctx, D_CRC, cs, ctuArea );
514
0
}
515
516
// ====================================================================================================================
517
// Protected member functions
518
// ====================================================================================================================
519
520
void EncCu::xCompressCtu( CodingStructure& cs, const UnitArea& area, const unsigned ctuRsAddr, const int prevQP[] )
521
0
{
522
0
  m_tileIdx = cs.pps->getTileIdx( area.lumaPos() );
523
524
0
  m_modeCtrl.initCTUEncoding( *cs.slice, m_tileIdx );
525
526
  // init the partitioning manager
527
0
  Partitioner *partitioner = &m_partitioner;
528
0
  partitioner->initCtu( area, CH_L, *cs.slice );
529
  
530
0
  const Position& lumaPos = area.lumaPos();
531
0
  const bool leftSameTile  = lumaPos.x == 0 || m_tileIdx == cs.pps->getTileIdx( lumaPos.offset(-1, 0) );
532
0
  const bool aboveSameTile = lumaPos.y == 0 || m_tileIdx == cs.pps->getTileIdx( lumaPos.offset( 0,-1) );
533
0
  m_EDO = (!m_pcEncCfg->m_tileParallelCtuEnc || (leftSameTile && aboveSameTile)) ? m_pcEncCfg->m_EDO : 0;
534
  
535
0
  if( m_pcEncCfg->m_IBCMode )
536
0
  {
537
0
    m_cInterSearch.resetCtuRecordIBC();
538
0
  }
539
540
  // init current context pointer
541
0
  m_CurrCtx = m_CtxBuffer.data();
542
543
0
  PelStorage* orgBuffer = &m_pOrgBuffer[0];
544
0
  PelStorage* rspBuffer = &m_pRspBuffer[0];
545
0
  CodingStructure *tempCS =  m_pTempCS [0];
546
0
  CodingStructure *bestCS =  m_pBestCS [0];
547
0
  cs.initSubStructure( *tempCS, partitioner->chType, partitioner->currArea(), false, orgBuffer, rspBuffer );
548
0
  cs.initSubStructure( *bestCS, partitioner->chType, partitioner->currArea(), false, orgBuffer, rspBuffer );
549
0
  PROFILER_SCOPE_AND_STAGE_EXT( 1, _TPROF, P_COMPRESS_CU, tempCS, CH_L );
550
551
  // copy the relevant area
552
0
  UnitArea clippedArea = clipArea( partitioner->currArea(), cs.area );
553
0
  CPelUnitBuf org = cs.picture->getFilteredOrigBuffer().valid() ? cs.picture->getRspOrigBuf( clippedArea ) : cs.picture->getOrigBuf( clippedArea );
554
0
  tempCS->getOrgBuf( clippedArea ).copyFrom( org );
555
0
  const ReshapeData& reshapeData = cs.picture->reshapeData;
556
0
  if( cs.slice->lmcsEnabled && reshapeData.getCTUFlag() )
557
0
  {
558
0
    tempCS->getRspOrgBuf( clippedArea.Y() ).rspSignal( org.get( COMP_Y) , reshapeData.getFwdLUT() );
559
0
  }
560
561
0
  tempCS->currQP[CH_L] = bestCS->currQP[CH_L] =
562
0
  tempCS->baseQP       = bestCS->baseQP       = cs.slice->sliceQp;
563
0
  tempCS->prevQP[CH_L] = bestCS->prevQP[CH_L] = prevQP[CH_L];
564
565
0
  xCompressCU( tempCS, bestCS, *partitioner );
566
  // all signals were already copied during compression if the CTU was split - at this point only the structures are copied to the top level CS
567
  
568
  // Ensure that a coding was found
569
  // Selected mode's RD-cost must be not MAX_DOUBLE.
570
0
  CHECK( bestCS->cus.empty()                                   , "No possible encoding found" );
571
0
  CHECK( bestCS->cus[0]->predMode == NUMBER_OF_PREDICTION_MODES, "No possible encoding found" );
572
0
  CHECK( bestCS->cost             == MAX_DOUBLE                , "No possible encoding found" );
573
574
0
  if ( m_wppMutex ) m_wppMutex->lock();
575
576
0
  cs.useSubStructure( *bestCS, partitioner->chType, TREE_D, CS::getArea( *bestCS, area, partitioner->chType, partitioner->treeType ) );
577
578
0
  if ( m_wppMutex ) m_wppMutex->unlock();
579
580
0
  if( CS::isDualITree( cs ) && isChromaEnabled( cs.pcv->chrFormat ) )
581
0
  {
582
0
    m_CABACEstimator->getCtx() = m_CurrCtx->start;
583
584
0
    partitioner->initCtu( area, CH_C, *cs.slice );
585
586
0
    cs.initSubStructure( *tempCS, partitioner->chType, partitioner->currArea(), false, orgBuffer, rspBuffer );
587
0
    cs.initSubStructure( *bestCS, partitioner->chType, partitioner->currArea(), false, orgBuffer, rspBuffer );
588
0
    tempCS->currQP[CH_C] = bestCS->currQP[CH_C] =
589
0
    tempCS->baseQP       = bestCS->baseQP       = cs.slice->sliceQp;
590
0
    tempCS->prevQP[CH_C] = bestCS->prevQP[CH_C] = prevQP[CH_C];
591
592
0
    xCompressCU( tempCS, bestCS, *partitioner );
593
    
594
    // Ensure that a coding was found
595
    // Selected mode's RD-cost must be not MAX_DOUBLE.
596
0
    CHECK( bestCS->cus.empty()                                   , "No possible encoding found" );
597
0
    CHECK( bestCS->cus[0]->predMode == NUMBER_OF_PREDICTION_MODES, "No possible encoding found" );
598
0
    CHECK( bestCS->cost             == MAX_DOUBLE                , "No possible encoding found" );
599
600
0
    if ( m_wppMutex ) m_wppMutex->lock();
601
602
0
    cs.useSubStructure( *bestCS, partitioner->chType, TREE_D, CS::getArea( *bestCS, area, partitioner->chType, partitioner->treeType ) );
603
604
0
    if ( m_wppMutex ) m_wppMutex->unlock();
605
0
  }
606
607
  // reset context states and uninit context pointer
608
0
  m_CABACEstimator->getCtx() = m_CurrCtx->start;
609
0
  m_CurrCtx                  = 0;
610
0
}
611
612
613
614
bool EncCu::xCheckBestMode( CodingStructure *&tempCS, CodingStructure *&bestCS, Partitioner &partitioner, const EncTestMode& encTestMode, const bool useEDO )
615
0
{
616
0
  bool bestCSUpdated = false;
617
618
0
  if( !tempCS->cus.empty() )
619
0
  {
620
0
    if( tempCS->cus.size() == 1 )
621
0
    {
622
0
      const CodingUnit& cu = *tempCS->cus.front();
623
0
      CHECK( cu.skip && !cu.mergeFlag, "Skip flag without a merge flag is not allowed!" );
624
0
    }
625
626
0
    DTRACE_BEST_MODE( tempCS, bestCS, m_cRdCost.getLambda(true), useEDO );
627
628
0
    if( m_modeCtrl.useModeResult( encTestMode, tempCS, partitioner, useEDO ) )
629
0
    {
630
0
      std::swap( tempCS, bestCS );
631
      // store temp best CI for next CU coding
632
0
      m_CurrCtx->best = m_CABACEstimator->getCtx();
633
0
      bestCSUpdated = true;
634
0
    }
635
0
  }
636
637
  // reset context states
638
0
  m_CABACEstimator->getCtx() = m_CurrCtx->start;
639
0
  return bestCSUpdated;
640
641
0
}
642
643
void xCheckFastCuChromaSplitting( CodingStructure*& tempCS, CodingStructure*& bestCS, Partitioner&  partitioner, ComprCUCtx& cuECtx )
644
0
{
645
0
  const uint32_t uiLPelX = tempCS->area.Cb().lumaPos().x;
646
0
  const uint32_t uiTPelY = tempCS->area.Cb().lumaPos().y;
647
648
0
  int lumaw = 0, lumah = 0;
649
0
  bool splitver      = true;
650
0
  bool splithor      = true;
651
0
  bool qtSplitChroma = true;
652
653
0
  if( partitioner.isSepTree( *tempCS ) && isChroma( partitioner.chType ) )
654
0
  {
655
0
    Position lumaRefPos( uiLPelX, uiTPelY );
656
0
    CodingUnit* colLumaCu = bestCS->lumaCS->getCU( lumaRefPos, CH_L, TREE_D );
657
658
0
    if( colLumaCu )
659
0
    {
660
0
      lumah = colLumaCu->Y().height;
661
0
      lumaw = colLumaCu->Y().width;
662
0
    }
663
0
  }
664
0
  else
665
0
  {
666
0
    return;
667
0
  }
668
669
0
  if( partitioner.getImplicitSplit( *tempCS ) != CU_DONT_SPLIT ) return;
670
671
0
  const CPelBuf orgCb = tempCS->getOrgBuf( COMP_Cb );
672
0
  const CPelBuf orgCr = tempCS->getOrgBuf( COMP_Cr );
673
674
0
  int th1 = FCBP_TH1;
675
676
0
  if( ( lumaw >> getChannelTypeScaleX( CH_C, tempCS->area.chromaFormat ) ) == orgCb.width )
677
0
  {
678
0
    if( ( bestCS->cost < ( th1*orgCb.width*orgCb.height ) ) )
679
0
    {
680
0
      splitver      = false;
681
0
      qtSplitChroma = false;
682
0
    }
683
0
  }
684
685
0
  if( ( lumah >> getChannelTypeScaleY( CH_C, tempCS->area.chromaFormat ) ) == orgCb.height )
686
0
  {
687
0
    if( ( bestCS->cost < ( th1*orgCb.width*orgCb.height ) ) )
688
0
    {
689
0
      splithor      = false;
690
0
      qtSplitChroma = false;
691
0
    }
692
0
  }
693
694
0
  cuECtx.doHorChromaSplit = splithor;
695
0
  cuECtx.doVerChromaSplit = splitver;
696
0
  cuECtx.doQtChromaSplit  = qtSplitChroma;
697
698
0
  if( orgCb.width == orgCb.height )
699
0
  {
700
0
    int varh_cb, varv_cb;
701
0
    int varh_cr, varv_cr;
702
703
0
    orgCb.calcVarianceSplit( orgCb, orgCb.width, varh_cb, varv_cb );
704
0
    orgCr.calcVarianceSplit( orgCr, orgCr.width, varh_cr, varv_cr );
705
706
0
    if( ( varh_cr*FCBP_TH2 < varv_cr * 100 ) && ( varh_cb*FCBP_TH2 < varv_cb * 100 ) )
707
0
    {
708
0
      cuECtx.doVerChromaSplit = false;
709
0
    }
710
0
    else if( ( varv_cr*FCBP_TH2 < varh_cr * 100 ) && ( varv_cb*FCBP_TH2 < varh_cb * 100 ) )
711
0
    {
712
0
      cuECtx.doHorChromaSplit = false;
713
0
    }
714
0
  }
715
0
}
716
717
void EncCu::xCompressCU( CodingStructure*& tempCS, CodingStructure*& bestCS, Partitioner& partitioner )
718
0
{
719
0
  const Area& lumaArea = tempCS->area.Y();
720
721
0
  Slice&   slice      = *tempCS->slice;
722
0
  const PPS &pps      = *tempCS->pps;
723
0
  const SPS &sps      = *tempCS->sps;
724
0
  const uint32_t uiLPelX  = tempCS->area.Y().lumaPos().x;
725
0
  const uint32_t uiTPelY  = tempCS->area.Y().lumaPos().y;
726
0
  const bool isBimEnabled = (m_pcEncCfg->m_blockImportanceMapping && !bestCS->picture->m_picShared->m_ctuBimQpOffset.empty());
727
728
0
  m_modeCtrl.initBlk( tempCS->area, slice.pic->poc );
729
0
  m_CABACEstimator->determineNeighborCus( *tempCS, partitioner.currArea(), partitioner.chType, partitioner.treeType );
730
731
0
  if ((m_pcEncCfg->m_usePerceptQPA || isBimEnabled) && pps.useDQP && isLuma (partitioner.chType) && partitioner.currQgEnable())
732
0
  {
733
0
    const PreCalcValues &pcv = *pps.pcv;
734
0
    Picture* const pic = bestCS->picture;
735
0
    const uint32_t ctuRsAddr = getCtuAddr (partitioner.currQgPos, pcv);
736
737
0
    if (partitioner.currSubdiv == 0) // CTU-level QP adaptation
738
0
    {
739
0
      if (m_pcEncCfg->m_usePerceptQPA)
740
0
      {
741
0
        if (m_pcEncCfg->m_internalUsePerceptQPATempFiltISlice == 2)
742
0
        {
743
0
          m_tempQpDiff = pic->ctuAdaptedQP[ctuRsAddr] - BitAllocation::applyQPAdaptationSubCtu (&slice, m_pcEncCfg, lumaArea, m_pcRateCtrl->getMinNoiseLevels());
744
0
        }
745
746
0
        if ((!slice.isIntra()) && (pcv.maxCUSize > 64) && // sub-CTU QPA behavior - Museum fix
747
0
            (uiLPelX + (pcv.maxCUSize >> 1) < (m_pcEncCfg->m_PadSourceWidth)) &&
748
0
            (uiTPelY + (pcv.maxCUSize >> 1) < (m_pcEncCfg->m_PadSourceHeight)))
749
0
        {
750
0
          const uint32_t h = lumaArea.height >> 1;
751
0
          const uint32_t w = lumaArea.width  >> 1;
752
0
          const int adQPTL = BitAllocation::applyQPAdaptationSubCtu (&slice, m_pcEncCfg, Area (uiLPelX + 0, uiTPelY + 0, w, h), m_pcRateCtrl->getMinNoiseLevels());
753
0
          const int adQPTR = BitAllocation::applyQPAdaptationSubCtu (&slice, m_pcEncCfg, Area (uiLPelX + w, uiTPelY + 0, w, h), m_pcRateCtrl->getMinNoiseLevels());
754
0
          const int adQPBL = BitAllocation::applyQPAdaptationSubCtu (&slice, m_pcEncCfg, Area (uiLPelX + 0, uiTPelY + h, w, h), m_pcRateCtrl->getMinNoiseLevels());
755
0
          const int adQPBR = BitAllocation::applyQPAdaptationSubCtu (&slice, m_pcEncCfg, Area (uiLPelX + w, uiTPelY + h, w, h), m_pcRateCtrl->getMinNoiseLevels());
756
757
0
          tempCS->currQP[partitioner.chType] = tempCS->baseQP =
758
0
          bestCS->currQP[partitioner.chType] = bestCS->baseQP = std::min (std::min (adQPTL, adQPTR), std::min (adQPBL, adQPBR));
759
760
0
          if (m_pcEncCfg->m_internalUsePerceptQPATempFiltISlice == 2)
761
0
          {
762
0
            if ((m_globalCtuQpVector->size() > ctuRsAddr) && (slice.TLayer == 0) && // last CTU row of non-Intra key-frame
763
0
                (m_pcEncCfg->m_IntraPeriod == 2 * m_pcEncCfg->m_GOPSize) && (ctuRsAddr >= pcv.widthInCtus) && (uiTPelY + pcv.maxCUSize > m_pcEncCfg->m_PadSourceHeight))
764
0
            {
765
0
              m_globalCtuQpVector->at (ctuRsAddr) = m_globalCtuQpVector->at (ctuRsAddr - pcv.widthInCtus); // copy the pumping reducing QP offset from the top CTU neighbor
766
0
              tempCS->currQP[partitioner.chType] = tempCS->baseQP =
767
0
              bestCS->currQP[partitioner.chType] = bestCS->baseQP = tempCS->baseQP - m_globalCtuQpVector->at (ctuRsAddr);
768
0
            }
769
0
            tempCS->currQP[partitioner.chType] = tempCS->baseQP =
770
0
            bestCS->currQP[partitioner.chType] = bestCS->baseQP = Clip3 (0, MAX_QP, tempCS->baseQP + m_tempQpDiff);
771
0
          }
772
0
        }
773
0
        else
774
0
        {
775
0
          tempCS->currQP[partitioner.chType] = tempCS->baseQP =
776
0
          bestCS->currQP[partitioner.chType] = bestCS->baseQP = pic->ctuAdaptedQP[ctuRsAddr];
777
0
        }
778
779
0
        setUpLambda (slice, pic->ctuQpaLambda[ctuRsAddr], pic->ctuAdaptedQP[ctuRsAddr], false, true);
780
0
      }
781
0
      else // isBimEnabled without QPA
782
0
      {
783
0
        const int baseQp         = tempCS->baseQP;
784
0
        const unsigned bimQpSize = (unsigned) bestCS->picture->m_picShared->m_ctuBimQpOffset.size();
785
0
        uint32_t ctuAddr         = ctuRsAddr;
786
787
0
        if (bimQpSize != pcv.sizeInCtus) // re-calculate correct address of BIM CTU QP offset
788
0
        {
789
0
          const unsigned bimCtuSize  = m_pcEncCfg->m_bimCtuSize;
790
0
          const unsigned bimCtuWidth = (pcv.lumaWidth + bimCtuSize - 1) / bimCtuSize;
791
792
0
          ctuAddr = getCtuAddrFromCtuSize (partitioner.currQgPos, Log2 (bimCtuSize), bimCtuWidth);
793
0
          CHECK (ctuAddr >= bimQpSize, "ctuAddr exceeds size of m_ctuBimQpOffset");
794
0
        }
795
0
        tempCS->currQP[partitioner.chType] = tempCS->baseQP =
796
0
        bestCS->currQP[partitioner.chType] = bestCS->baseQP = Clip3 (-sps.qpBDOffset[CH_L], MAX_QP, tempCS->baseQP + pic->m_picShared->m_ctuBimQpOffset[ctuAddr]);
797
798
0
        updateLambda (slice, slice.getLambdas()[0], baseQp, tempCS->baseQP, true);
799
0
      }
800
0
    }
801
0
    else if (m_pcEncCfg->m_usePerceptQPA && slice.isIntra()) // currSubdiv 2 - use sub-CTU QPA
802
0
    {
803
0
      CHECK ((partitioner.currArea().lwidth() >= pcv.maxCUSize) || (partitioner.currArea().lheight() >= pcv.maxCUSize), "sub-CTU delta-QP error");
804
0
      tempCS->currQP[partitioner.chType] = tempCS->baseQP = BitAllocation::applyQPAdaptationSubCtu (&slice, m_pcEncCfg, lumaArea, m_pcRateCtrl->getMinNoiseLevels());
805
806
0
      if (m_pcEncCfg->m_internalUsePerceptQPATempFiltISlice == 2)
807
0
      {
808
0
        tempCS->currQP[partitioner.chType] = tempCS->baseQP = Clip3 (0, MAX_QP, tempCS->baseQP + m_tempQpDiff);
809
0
      }
810
811
0
      updateLambda (slice, pic->ctuQpaLambda[ctuRsAddr], pic->ctuAdaptedQP[ctuRsAddr], tempCS->baseQP, true);
812
0
    }
813
0
  }
814
815
0
  if (partitioner.currQtDepth == 0)
816
0
  {
817
0
    m_MergeSimpleFlag = 0;
818
0
  }
819
0
  m_modeCtrl.initCULevel( partitioner, *tempCS, m_MergeSimpleFlag );
820
0
  m_sbtCostSave[0] = m_sbtCostSave[1] = MAX_DOUBLE;
821
822
0
  m_CurrCtx->start = m_CABACEstimator->getCtx();
823
824
0
  m_cuChromaQpOffsetIdxPlus1 = 0;
825
826
0
  if( slice.chromaQpAdjEnabled && partitioner.currQgChromaEnable() )
827
0
  {
828
    // TODO M0133 : double check encoder decisions with respect to chroma QG detection and actual encode
829
0
    int cuChromaQpOffsetSubdiv = slice.isIntra() ? slice.picHeader->cuChromaQpOffsetSubdivIntra : slice.picHeader->cuChromaQpOffsetSubdivInter;
830
0
    int lgMinCuSize = sps.log2MinCodingBlockSize +
831
0
      std::max<int>(0, floorLog2(sps.CTUSize) - sps.log2MinCodingBlockSize - int((cuChromaQpOffsetSubdiv + 1) / 2));
832
0
    m_cuChromaQpOffsetIdxPlus1 = ( ( uiLPelX >> lgMinCuSize ) + ( uiTPelY >> lgMinCuSize ) ) % ( pps.chromaQpOffsetListLen + 1 );
833
0
  }
834
835
0
  DTRACE_UPDATE( g_trace_ctx, std::make_pair( "cux", uiLPelX ) );
836
0
  DTRACE_UPDATE( g_trace_ctx, std::make_pair( "cuy", uiTPelY ) );
837
0
  DTRACE_UPDATE( g_trace_ctx, std::make_pair( "cuw", tempCS->area.lwidth() ) );
838
0
  DTRACE_UPDATE( g_trace_ctx, std::make_pair( "cuh", tempCS->area.lheight() ) );
839
0
  DTRACE( g_trace_ctx, D_COMMON, "@(%4d,%4d) [%2dx%2d]\n", tempCS->area.lx(), tempCS->area.ly(), tempCS->area.lwidth(), tempCS->area.lheight() );
840
841
0
  if( tempCS->slice->checkLDC )
842
0
  {
843
0
    m_bestBcwCost[0] = m_bestBcwCost[1] = std::numeric_limits<double>::max();
844
0
    m_bestBcwIdx[0] = m_bestBcwIdx[1] = -1;
845
0
  }
846
847
0
  m_cInterSearch.resetSavedAffineMotion();
848
0
  {
849
0
    const ComprCUCtx &cuECtx      = *m_modeCtrl.comprCUCtx;
850
0
    const CodingStructure& cs     = *tempCS;
851
0
    const PartSplit implicitSplit = partitioner.getImplicitSplit( cs );
852
0
    const bool isBoundary         = implicitSplit != CU_DONT_SPLIT;
853
0
    const bool lossless           = false;
854
0
    int qp                        = cs.baseQP;
855
856
#if ENABLE_MEASURE_SEARCH_SPACE
857
    if( !isBoundary )
858
    {
859
      g_searchSpaceAcc.addPartition( partitioner.currArea(), partitioner.isSepTree( *tempCS ) ? partitioner.chType : MAX_NUM_CH );
860
    }
861
862
#endif
863
0
    if( ! isBoundary )
864
0
    {
865
0
      if (pps.useDQP && partitioner.isSepTree (*tempCS) && isChroma (partitioner.chType))
866
0
      {
867
0
        const ChromaFormat chromaFm = tempCS->area.chromaFormat;
868
0
        const Position chromaCentral (tempCS->area.Cb().chromaPos().offset (tempCS->area.Cb().chromaSize().width >> 1, tempCS->area.Cb().chromaSize().height >> 1));
869
0
        const Position lumaRefPos (chromaCentral.x << getChannelTypeScaleX (CH_C, chromaFm), chromaCentral.y << getChannelTypeScaleY (CH_C, chromaFm));
870
0
        const CodingUnit* colLumaCu = bestCS->lumaCS->getCU (lumaRefPos, CH_L, TREE_D);
871
        // update qp
872
0
        qp = colLumaCu->qp;
873
0
      }
874
875
0
      m_cIntraSearch.reset();
876
877
0
      bool isReuseCU = m_modeCtrl.isReusingCuValid( cs, partitioner, qp );
878
879
0
      bool checkIbc = m_pcEncCfg->m_IBCMode && bestCS->picture->useIBC && (partitioner.chType == CH_L);
880
0
      if ((m_pcEncCfg->m_IBCFastMethod>3) && (cs.area.lwidth() * cs.area.lheight()) > (16 * 16))
881
0
      {
882
0
        checkIbc = false;
883
0
      }
884
0
      if( isReuseCU )
885
0
      {
886
0
        xReuseCachedResult( tempCS, bestCS, partitioner );
887
0
      }
888
0
      else
889
0
      {
890
        // add first pass modes
891
0
        if ( !slice.isIntra() && !slice.isIRAP() && !( cs.area.lwidth() == 4 && cs.area.lheight() == 4 ) && !partitioner.isConsIntra() )
892
0
        {
893
          // add inter modes
894
0
          EncTestMode encTestModeSkip = { ETM_MERGE_SKIP, ETO_STANDARD, qp, lossless };
895
0
          if (m_modeCtrl.tryMode(encTestModeSkip, cs, partitioner))
896
0
          {
897
0
            xCheckRDCostUnifiedMerge(tempCS, bestCS, partitioner, encTestModeSkip);
898
899
0
            CodingUnit* cu = bestCS->getCU(partitioner.chType, partitioner.treeType);
900
0
            if (cu)
901
0
              cu->mmvdSkip = cu->skip == false ? false : cu->mmvdSkip;
902
0
          }
903
0
          EncTestMode encTestMode = { ETM_INTER_ME, ETO_STANDARD, qp, lossless };
904
0
          if (m_modeCtrl.tryMode(encTestMode, cs, partitioner))
905
0
          {
906
0
            xCheckRDCostInter(tempCS, bestCS, partitioner, encTestMode);
907
0
          }
908
909
0
          if (m_pcEncCfg->m_AMVRspeed)
910
0
          {
911
0
            double bestIntPelCost = MAX_DOUBLE;
912
913
0
            EncTestMode encTestMode = {ETM_INTER_IMV, ETO_STANDARD, qp, lossless};
914
0
            if( m_modeCtrl.tryMode( encTestMode, cs, partitioner ) )
915
0
            {
916
0
              const bool skipAltHpelIF = ( int( ( encTestMode.opts & ETO_IMV ) >> ETO_IMV_SHIFT ) == 4 ) && ( bestIntPelCost > 1.25 * bestCS->cost );
917
0
              if (!skipAltHpelIF)
918
0
              {
919
0
                xCheckRDCostInterIMV(tempCS, bestCS, partitioner, encTestMode );
920
0
              }
921
0
            }
922
0
          }
923
0
        }
924
925
0
        if (checkIbc && !partitioner.isConsInter())
926
0
        {
927
0
          EncTestMode encTestModeIBCMerge = { ETM_IBC_MERGE, ETO_STANDARD, qp, lossless };
928
0
          if ((m_pcEncCfg->m_IBCFastMethod < 4) && (partitioner.chType == CH_L) && m_modeCtrl.tryMode(encTestModeIBCMerge, cs, partitioner))
929
0
          {
930
0
            xCheckRDCostIBCModeMerge2Nx2N(tempCS, bestCS, partitioner, encTestModeIBCMerge);
931
0
          }
932
933
0
          EncTestMode encTestModeIBC = { ETM_IBC, ETO_STANDARD, qp, lossless };
934
0
          if (m_modeCtrl.tryMode(encTestModeIBC, cs, partitioner))
935
0
          {
936
0
            xCheckRDCostIBCMode(tempCS, bestCS, partitioner, encTestModeIBC);
937
0
          }
938
0
        }
939
0
        if( m_EDO && bestCS->cost != MAX_DOUBLE )
940
0
        {
941
0
          xCalDebCost(*bestCS, partitioner);
942
0
        }
943
944
        // add intra modes
945
0
        EncTestMode encTestMode( {ETM_INTRA, ETO_STANDARD, qp, lossless} );
946
0
        if( !partitioner.isConsInter() && m_modeCtrl.tryMode( encTestMode, cs, partitioner ) )
947
0
        {
948
0
          xCheckRDCostIntra( tempCS, bestCS, partitioner, encTestMode );
949
0
        }
950
0
      } // reusing cu
951
952
0
      m_modeCtrl.beforeSplit( partitioner );
953
954
0
      if (cuECtx.bestCS && ((cuECtx.bestCostNoImv == (MAX_DOUBLE * .5) || cuECtx.isReusingCu) && !slice.isIntra()) )
955
0
      {
956
0
        m_cInterSearch.loadGlobalUniMvs( lumaArea, *pps.pcv );
957
0
      }
958
959
0
      if (!cs.slice->isIntra() && (partitioner.chType == CH_L) && ( m_pcEncCfg->m_qtbttSpeedUpMode & 2) && (partitioner.currQtDepth < 3) && bestCS->cus.size())
960
0
      {
961
0
        int flagDbefore = (bestCS->cus[0]->mergeFlag && !bestCS->cus[0]->mmvdMergeFlag && !bestCS->cus[0]->ispMode && !bestCS->cus[0]->geo) ? 1 : 0;
962
0
        if (partitioner.currQtDepth == 0)
963
0
        {
964
0
          m_MergeSimpleFlag = flagDbefore;
965
0
        }
966
0
        else
967
0
        {
968
0
          int markFlag = (partitioner.currQtDepth == 1) ? 1 : 3;
969
0
          m_MergeSimpleFlag = (flagDbefore << partitioner.currQtDepth) | (m_MergeSimpleFlag & markFlag);
970
0
        }
971
0
      }
972
0
    } //boundary
973
974
0
    if( ( m_pcEncCfg->m_IntraPeriod == 1 ) && ( partitioner.chType == CH_C ) )
975
0
    {
976
0
      xCheckFastCuChromaSplitting( tempCS, bestCS, partitioner, *m_modeCtrl.comprCUCtx );
977
0
    }
978
    //////////////////////////////////////////////////////////////////////////
979
    // split modes
980
0
    EncTestMode lastTestMode;
981
982
0
    if( cuECtx.qtBeforeBt )
983
0
    {
984
0
      EncTestMode encTestMode( { ETM_SPLIT_QT, ETO_STANDARD, qp, false } );
985
0
      if( m_modeCtrl.trySplit( encTestMode, cs, partitioner, lastTestMode ) )
986
0
      {
987
0
        lastTestMode = encTestMode;
988
0
        xCheckModeSplit( tempCS, bestCS, partitioner, encTestMode );
989
0
      }
990
0
    }
991
992
0
    if( partitioner.canSplit( CU_HORZ_SPLIT, cs ) )
993
0
    {
994
      // add split modes
995
0
      EncTestMode encTestMode( { ETM_SPLIT_BT_H, ETO_STANDARD, qp, false } );
996
0
      if( m_modeCtrl.trySplit( encTestMode, cs, partitioner, lastTestMode ) )
997
0
      {
998
0
        lastTestMode = encTestMode;
999
0
        xCheckModeSplit( tempCS, bestCS, partitioner, encTestMode );
1000
0
      }
1001
0
    }
1002
1003
0
    if( partitioner.canSplit( CU_VERT_SPLIT, cs ) )
1004
0
    {
1005
      // add split modes
1006
0
      EncTestMode encTestMode( { ETM_SPLIT_BT_V, ETO_STANDARD, qp, false } );
1007
0
      if( m_modeCtrl.trySplit( encTestMode, cs, partitioner, lastTestMode ) )
1008
0
      {
1009
0
        lastTestMode = encTestMode;
1010
0
        xCheckModeSplit( tempCS, bestCS, partitioner, encTestMode );
1011
0
      }
1012
0
    }
1013
1014
0
    if( partitioner.canSplit( CU_TRIH_SPLIT, cs ) )
1015
0
    {
1016
      // add split modes
1017
0
      EncTestMode encTestMode( { ETM_SPLIT_TT_H, ETO_STANDARD, qp, false } );
1018
0
      if( m_modeCtrl.trySplit( encTestMode, cs, partitioner, lastTestMode ) )
1019
0
      {
1020
0
        lastTestMode = encTestMode;
1021
0
        xCheckModeSplit( tempCS, bestCS, partitioner, encTestMode );
1022
0
      }
1023
0
    }
1024
1025
0
    if( partitioner.canSplit( CU_TRIV_SPLIT, cs ) )
1026
0
    {
1027
      // add split modes
1028
0
      EncTestMode encTestMode( { ETM_SPLIT_TT_V, ETO_STANDARD, qp, false } );
1029
0
      if( m_modeCtrl.trySplit( encTestMode, cs, partitioner, lastTestMode ) )
1030
0
      {
1031
0
        lastTestMode = encTestMode;
1032
0
        xCheckModeSplit( tempCS, bestCS, partitioner, encTestMode );
1033
0
      }
1034
0
    }
1035
1036
0
    if( !cuECtx.qtBeforeBt )
1037
0
    {
1038
0
      EncTestMode encTestMode( { ETM_SPLIT_QT, ETO_STANDARD, qp, false } );
1039
0
      if( m_modeCtrl.trySplit( encTestMode, cs, partitioner, lastTestMode ) )
1040
0
      {
1041
0
        lastTestMode = encTestMode;
1042
0
        xCheckModeSplit( tempCS, bestCS, partitioner, encTestMode );
1043
0
      }
1044
0
    }
1045
0
  }
1046
1047
0
  if( bestCS->cus.empty() )
1048
0
  {
1049
0
    m_modeCtrl.finishCULevel( partitioner );
1050
0
    return;
1051
0
  }
1052
1053
  //////////////////////////////////////////////////////////////////////////
1054
  // Finishing CU
1055
  // set context states
1056
0
  m_CABACEstimator->getCtx() = m_CurrCtx->best;
1057
1058
  // QP from last processed CU for further processing
1059
  //copy the qp of the last non-chroma CU
1060
0
  int numCUInThisNode = (int)bestCS->cus.size();
1061
0
  if( numCUInThisNode > 1 && bestCS->cus.back()->chType == CH_C && !CS::isDualITree( *bestCS ) )
1062
0
  {
1063
0
    CHECK( bestCS->cus[numCUInThisNode-2]->chType != CH_L, "wrong chType" );
1064
0
    bestCS->prevQP[partitioner.chType] = bestCS->cus[numCUInThisNode-2]->qp;
1065
0
  }
1066
0
  else
1067
0
  {
1068
0
    bestCS->prevQP[partitioner.chType] = bestCS->cus.back()->qp;
1069
0
  }
1070
0
  if( ( !slice.isIntra() || slice.sps->IBC )
1071
0
    && partitioner.chType == CH_L
1072
0
    && bestCS->cus.size() == 1 && ( bestCS->cus.back()->predMode == MODE_INTER || bestCS->cus.back()->predMode == MODE_IBC )
1073
0
    && bestCS->area.Y() == (*bestCS->cus.back()).Y() )
1074
0
  {
1075
0
    const CodingUnit& cu = *bestCS->cus.front();
1076
0
    bool isIbcSmallBlk = CU::isIBC(cu) && (cu.lwidth() * cu.lheight() <= 16);
1077
0
    if (!cu.affine && !cu.geo && !isIbcSmallBlk)
1078
0
    {
1079
0
      const MotionInfo &mi = cu.getMotionInfo();
1080
0
      HPMVInfo hMi( mi, ( mi.interDir() == 3 ) ? cu.BcwIdx : BCW_DEFAULT, cu.imv == IMV_HPEL, CU::isIBC( cu ) );
1081
0
      cu.cs->addMiToLut( CU::isIBC( cu ) ? cu.cs->motionLut.lutIbc : cu.cs->motionLut.lut, hMi );
1082
0
    }
1083
0
  }
1084
1085
0
  m_modeCtrl.finishCULevel( partitioner );
1086
0
  if( m_cIntraSearch.getSaveCuCostInSCIPU() && bestCS->cus.size() == 1 )
1087
0
  {
1088
0
    m_cIntraSearch.saveCuAreaCostInSCIPU( Area( partitioner.currArea().lumaPos(), partitioner.currArea().lumaSize() ), bestCS->cost );
1089
0
  }
1090
1091
  // Assert if Best prediction mode is NONE
1092
  // Selected mode's RD-cost must be not MAX_DOUBLE.
1093
0
  CHECK( bestCS->cus.empty()                                   , "No possible encoding found" );
1094
0
  CHECK( bestCS->cus[0]->predMode == NUMBER_OF_PREDICTION_MODES, "No possible encoding found" );
1095
0
  CHECK( bestCS->cost             == MAX_DOUBLE                , "No possible encoding found" );
1096
0
}
1097
1098
1099
void EncCu::xCheckModeSplit(CodingStructure *&tempCS, CodingStructure *&bestCS, Partitioner &partitioner, const EncTestMode& encTestMode )
1100
0
{
1101
0
  const ModeType modeTypeParent  = partitioner.modeType;
1102
0
  const TreeType treeTypeParent  = partitioner.treeType;
1103
0
  const ChannelType chTypeParent = partitioner.chType;
1104
1105
0
  int signalModeConsVal = CS::signalModeCons( *tempCS, partitioner.currArea(), getPartSplit(encTestMode), modeTypeParent);
1106
0
  int numRoundRdo = signalModeConsVal == LDT_MODE_TYPE_SIGNAL ? 2 : 1;
1107
0
  bool skipInterPass = false;
1108
0
  for( int i = 0; i < numRoundRdo; i++ )
1109
0
  {
1110
    //change cons modes
1111
0
    if( signalModeConsVal == LDT_MODE_TYPE_SIGNAL )
1112
0
    {
1113
0
      CHECK( numRoundRdo != 2, "numRoundRdo shall be 2 - [LDT_MODE_TYPE_SIGNAL]" );
1114
0
      partitioner.modeType = (i == 0) ? MODE_TYPE_INTER : MODE_TYPE_INTRA;
1115
0
    }
1116
0
    else if( signalModeConsVal == LDT_MODE_TYPE_INFER )
1117
0
    {
1118
0
      CHECK( numRoundRdo != 1, "numRoundRdo shall be 1 - [LDT_MODE_TYPE_INFER]" );
1119
0
      partitioner.modeType = MODE_TYPE_INTRA;
1120
0
    }
1121
0
    else if( signalModeConsVal == LDT_MODE_TYPE_INHERIT )
1122
0
    {
1123
0
      CHECK( numRoundRdo != 1, "numRoundRdo shall be 1 - [LDT_MODE_TYPE_INHERIT]" );
1124
0
      partitioner.modeType = modeTypeParent;
1125
0
    }
1126
1127
    //for lite intra encoding fast algorithm, set the status to save inter coding info
1128
0
    if( modeTypeParent == MODE_TYPE_ALL && partitioner.modeType == MODE_TYPE_INTER )
1129
0
    {
1130
0
      m_cIntraSearch.setSaveCuCostInSCIPU( true );
1131
0
      m_cIntraSearch.setNumCuInSCIPU( 0 );
1132
0
    }
1133
0
    else if( modeTypeParent == MODE_TYPE_ALL && partitioner.modeType != MODE_TYPE_INTER )
1134
0
    {
1135
0
      m_cIntraSearch.setSaveCuCostInSCIPU( false );
1136
0
      if( partitioner.modeType == MODE_TYPE_ALL )
1137
0
      {
1138
0
        m_cIntraSearch.setNumCuInSCIPU( 0 );
1139
0
      }
1140
0
    }
1141
1142
0
    xCheckModeSplitInternal( tempCS, bestCS, partitioner, encTestMode, modeTypeParent, skipInterPass );
1143
    //recover cons modes
1144
0
    partitioner.modeType = modeTypeParent;
1145
0
    partitioner.treeType = treeTypeParent;
1146
0
    partitioner.chType = chTypeParent;
1147
0
    if( modeTypeParent == MODE_TYPE_ALL )
1148
0
    {
1149
0
      m_cIntraSearch.setSaveCuCostInSCIPU( false );
1150
0
      if( numRoundRdo == 2 && partitioner.modeType == MODE_TYPE_INTRA )
1151
0
      {
1152
0
        m_cIntraSearch.initCuAreaCostInSCIPU();
1153
0
      }
1154
0
    }
1155
0
    if( skipInterPass )
1156
0
    {
1157
0
      break;
1158
0
    }
1159
0
  }
1160
0
}
1161
1162
void EncCu::xCheckModeSplitInternal(CodingStructure *&tempCS, CodingStructure *&bestCS, Partitioner &partitioner, const EncTestMode& encTestMode, const ModeType modeTypeParent, bool& skipInterPass )
1163
0
{
1164
0
  const int qp                     = encTestMode.qp;
1165
0
  const int oldPrevQp              = tempCS->prevQP[partitioner.chType];
1166
0
  const auto oldMotionLut          = tempCS->motionLut;
1167
0
  const ReshapeData& reshapeData   = tempCS->picture->reshapeData;
1168
                                   
1169
0
  const PartSplit split            = getPartSplit( encTestMode );
1170
0
  const ModeType  modeTypeChild    = partitioner.modeType;
1171
1172
0
  CHECK( !( split == CU_QUAD_SPLIT || split == CU_HORZ_SPLIT || split == CU_VERT_SPLIT
1173
0
         || split == CU_TRIH_SPLIT || split == CU_TRIV_SPLIT ), "invalid split type" );
1174
1175
0
  tempCS->initStructData( qp );
1176
1177
0
  m_CABACEstimator->getCtx()       = m_CurrCtx->start;
1178
1179
0
  const uint16_t split_ctx_size    = Ctx::SplitFlag.size() + Ctx::SplitQtFlag.size() + Ctx::SplitHvFlag.size() + Ctx::Split12Flag.size() + Ctx::ModeConsFlag.size();
1180
0
  const TempCtx  ctxSplitFlags     ( m_CtxCache, SubCtx( CtxSet( Ctx::SplitFlag(), split_ctx_size ), m_CABACEstimator->getCtx() ) );
1181
1182
0
  m_CABACEstimator->determineNeighborCus( *tempCS, partitioner.currArea(), partitioner.chType, partitioner.treeType );
1183
0
  m_CABACEstimator->resetBits           ();
1184
0
  m_CABACEstimator->split_cu_mode       ( split, *tempCS, partitioner );
1185
0
  partitioner     . modeType            = modeTypeParent;
1186
0
  m_CABACEstimator->mode_constraint     ( split, *tempCS, partitioner, modeTypeChild );
1187
0
  partitioner     . modeType            = modeTypeChild;
1188
1189
0
  const int64_t splitBits   = m_CABACEstimator->getEstFracBits();
1190
1191
0
  const bool chromaNotSplit = modeTypeParent == MODE_TYPE_ALL && modeTypeChild == MODE_TYPE_INTRA;
1192
0
  const bool isChromaTooBig = isChromaEnabled( tempCS->pps->pcv->chrFormat ) && tempCS->area.Y().maxDim() > tempCS->sps->getMaxTbSize();
1193
0
  bool       skipSplitTest  = chromaNotSplit && isChromaTooBig;
1194
1195
0
  if( !skipSplitTest )
1196
0
  {
1197
0
    double         a = -1, b = -1;
1198
0
    const unsigned w       = partitioner.currArea().lwidth();
1199
0
    const unsigned h       = partitioner.currArea().lheight();
1200
0
    const bool contextCond = w == h && tempCS->slice->sliceType == VVENC_B_SLICE && isLuma( partitioner.chType ) && m_pcEncCfg->m_splitCostThrParamId >= 0 && m_pcEncCfg->m_splitCostThrParamId <= 3;
1201
1202
0
    if( contextCond )
1203
0
    {
1204
0
      uint8_t nsPredInd = m_modeCtrl.comprCUCtx->bestNsPredMode.type == ETM_INTRA;
1205
0
      uint8_t szInd     = getLog2( w ) - 3;
1206
0
      uint8_t splitInd  = split == CU_QUAD_SPLIT ? 1 : 0;
1207
0
      if ( m_pcEncCfg->m_splitCostThrParamId <= 1 )
1208
0
      {
1209
0
        a = coefSquareCUsFasterFastMedium[m_pcEncCfg->m_splitCostThrParamId][szInd][nsPredInd][splitInd][0];
1210
0
        b = coefSquareCUsFasterFastMedium[m_pcEncCfg->m_splitCostThrParamId][szInd][nsPredInd][splitInd][1];
1211
0
      }
1212
0
      else
1213
0
      {
1214
0
        uint8_t mtInd = (partitioner.currMtDepth == 0);
1215
0
        a = coefSquareCUsSlowSlower[m_pcEncCfg->m_splitCostThrParamId - 2][szInd][nsPredInd][partitioner.currQtDepth][mtInd][splitInd][0];
1216
0
        b = coefSquareCUsSlowSlower[m_pcEncCfg->m_splitCostThrParamId - 2][szInd][nsPredInd][partitioner.currQtDepth][mtInd][splitInd][1];
1217
0
      }
1218
0
    }
1219
1220
0
    if( a > -1 && b > -1 )
1221
0
    {
1222
0
      const double bestNsCost    = m_modeCtrl.comprCUCtx->bestCostBeforeSplit == MAX_DOUBLE ? -1 : m_modeCtrl.comprCUCtx->bestCostBeforeSplit;
1223
0
      const double factor        = 1.0 + b * exp( a * qp );
1224
0
      const double predSplitCost = bestNsCost / factor + splitBits;
1225
0
      skipSplitTest              = bestNsCost >= 0 && predSplitCost >= bestNsCost;
1226
0
    }
1227
0
    else
1228
0
    {
1229
0
      int numChild = 3;
1230
0
      if( split == CU_VERT_SPLIT || split == CU_HORZ_SPLIT ) numChild--;
1231
0
      else if( split == CU_QUAD_SPLIT ) numChild++;
1232
1233
0
      int64_t approxBits = m_pcEncCfg->m_qtbttSpeedUp > 0 ? numChild << SCALE_BITS : 0;
1234
1235
0
      const double factor     = ( tempCS->currQP[partitioner.chType] > 30                              ? 1.1  : 1.075 ) +
1236
0
                                (   m_pcEncCfg->m_qtbttSpeedUp > 0                                     ? 0.01 : 0.0   ) +
1237
0
                                ( ( m_pcEncCfg->m_qtbttSpeedUp > 0 && isChroma( partitioner.chType ) ) ? 0.2  : 0.0   );
1238
       
1239
0
      const double baseCost   = bestCS->cost + bestCS->costDbOffset;
1240
0
      const double predCost   = baseCost / factor + splitBits + approxBits;
1241
0
      skipSplitTest           = predCost >= baseCost;
1242
0
    }
1243
0
  }
1244
1245
0
  if( skipSplitTest )
1246
0
  {
1247
0
    m_CABACEstimator->getCtx() = SubCtx( CtxSet( Ctx::SplitFlag(), split_ctx_size ), ctxSplitFlags );
1248
0
    xCheckBestMode( tempCS, bestCS, partitioner, encTestMode );
1249
0
    return;
1250
0
  }
1251
1252
0
  if( partitioner.treeType == TREE_D )
1253
0
  {
1254
0
    if( chromaNotSplit )
1255
0
    {
1256
0
      CHECK( partitioner.chType != CH_L, "chType must be luma" );
1257
0
      partitioner.treeType = TREE_L;
1258
0
    }
1259
0
    else
1260
0
    {
1261
0
      partitioner.treeType = TREE_D;
1262
0
    }
1263
0
  }
1264
1265
0
  partitioner.splitCurrArea( split, *tempCS );
1266
0
  bool qgEnableChildren = partitioner.currQgEnable(); // QG possible at children level
1267
1268
0
  m_CurrCtx++;
1269
1270
0
  AffineMVInfo tmpMVInfo;
1271
0
  bool isAffMVInfoSaved = m_cInterSearch.m_AffineProfList->savePrevAffMVInfo( 0, tmpMVInfo );
1272
1273
0
  BlkUniMvInfo tmpUniMvInfo;
1274
0
  bool         isUniMvInfoSaved = false;
1275
0
  if( !tempCS->slice->isIntra() )
1276
0
  {
1277
0
    m_cInterSearch.m_BlkUniMvInfoBuffer->savePrevUniMvInfo( tempCS->area.Y(), tmpUniMvInfo, isUniMvInfoSaved );
1278
0
  }
1279
1280
0
  DeriveCtx deriveCtx = m_CABACEstimator->getDeriveCtx();
1281
1282
0
  do
1283
0
  {
1284
0
    const auto &subCUArea  = partitioner.currArea();
1285
1286
0
    if( tempCS->picture->Y().contains( subCUArea.lumaPos() ) )
1287
0
    {
1288
0
      PelStorage* orgBuffer =  &m_pOrgBuffer[partitioner.currDepth];
1289
0
      PelStorage* rspBuffer =  &m_pRspBuffer[partitioner.currDepth];
1290
0
      CodingStructure *tempSubCS = m_pTempCS[partitioner.currDepth];
1291
0
      CodingStructure *bestSubCS = m_pBestCS[partitioner.currDepth];
1292
1293
0
      tempCS->initSubStructure( *tempSubCS, partitioner.chType, subCUArea, false, orgBuffer, rspBuffer );
1294
0
      tempCS->initSubStructure( *bestSubCS, partitioner.chType, subCUArea, false, orgBuffer, rspBuffer );
1295
1296
      // copy org buffer, need to be done after initSubStructure because of reshaping!
1297
0
      orgBuffer->copyFrom( tempCS->getOrgBuf( subCUArea ) );
1298
0
      if( tempCS->slice->lmcsEnabled && reshapeData.getCTUFlag() )
1299
0
      {
1300
0
        rspBuffer->Y().copyFrom( tempCS->getRspOrgBuf( subCUArea.Y() ) );
1301
0
      }
1302
1303
0
      tempSubCS->bestParent = bestSubCS->bestParent = bestCS;
1304
1305
0
      xCompressCU(tempSubCS, bestSubCS, partitioner );
1306
1307
0
      tempSubCS->bestParent = bestSubCS->bestParent = nullptr;
1308
1309
0
      if( bestSubCS->cost == MAX_DOUBLE )
1310
0
      {
1311
0
        CHECK( split == CU_QUAD_SPLIT, "Split decision reusing cannot skip quad split" );
1312
0
        tempCS->cost = MAX_DOUBLE;
1313
0
        tempCS->costDbOffset = 0;
1314
0
        m_CurrCtx--;
1315
0
        partitioner.exitCurrSplit();
1316
0
        xCheckBestMode( tempCS, bestCS, partitioner, encTestMode );
1317
0
        if( partitioner.chType == CH_L )
1318
0
        {
1319
0
          tempCS->motionLut = oldMotionLut;
1320
0
        }
1321
1322
0
        m_CABACEstimator->getDeriveCtx() = deriveCtx;
1323
0
        return;
1324
0
      }
1325
1326
0
      tempCS->useSubStructure( *bestSubCS, partitioner.chType, TREE_D, CS::getArea( *tempCS, subCUArea, partitioner.chType, partitioner.treeType ), partitioner.hasNextPart() || chromaNotSplit );
1327
1328
0
      if( partitioner.currQgEnable() )
1329
0
      {
1330
0
        tempCS->prevQP[partitioner.chType] = bestSubCS->prevQP[partitioner.chType];
1331
0
      }
1332
0
      if( partitioner.isConsInter() )
1333
0
      {
1334
0
        for( int i = 0; i < bestSubCS->cus.size(); i++ )
1335
0
        {
1336
0
          CHECK( bestSubCS->cus[i]->predMode != MODE_INTER, "all CUs must be inter mode in an Inter coding region (SCIPU)" );
1337
0
        }
1338
0
      }
1339
0
      else if( partitioner.isConsIntra() )
1340
0
      {
1341
0
        for( int i = 0; i < bestSubCS->cus.size(); i++ )
1342
0
        {
1343
0
          CHECK( bestSubCS->cus[i]->predMode == MODE_INTER, "all CUs must not be inter mode in an Intra coding region (SCIPU)" );
1344
0
        }
1345
0
      }
1346
1347
0
      tempSubCS->releaseIntermediateData();
1348
0
      bestSubCS->releaseIntermediateData();
1349
0
      if( !tempCS->slice->isIntra() && partitioner.isConsIntra() )
1350
0
      {
1351
0
        tempCS->cost = m_cRdCost.calcRdCost( tempCS->fracBits, tempCS->dist );
1352
0
        if( tempCS->cost > bestCS->cost )
1353
0
        {
1354
0
          tempCS->cost = MAX_DOUBLE;
1355
0
          tempCS->costDbOffset = 0;
1356
0
          m_CurrCtx--;
1357
0
          partitioner.exitCurrSplit();
1358
0
          if( partitioner.chType == CH_L )
1359
0
          {
1360
0
            tempCS->motionLut = oldMotionLut;
1361
0
          }
1362
1363
0
          m_CABACEstimator->getDeriveCtx() = deriveCtx;
1364
0
          return;
1365
0
        }
1366
0
      }
1367
0
    }
1368
0
  } while( partitioner.nextPart( *tempCS ) );
1369
1370
0
  partitioner.exitCurrSplit();
1371
1372
0
  m_CurrCtx--;
1373
1374
0
  m_CABACEstimator->getDeriveCtx() = deriveCtx;
1375
1376
0
  if( chromaNotSplit )
1377
0
  {
1378
    //Note: In local dual tree region, the chroma CU refers to the central luma CU's QP.
1379
    //If the luma CU QP shall be predQP (no residual in it and before it in the QG), it must be revised to predQP before encoding the chroma CU
1380
    //Otherwise, the chroma CU uses predQP+deltaQP in encoding but is decoded as using predQP, thus causing encoder-decoded mismatch on chroma qp.
1381
0
    if( tempCS->pps->useDQP )
1382
0
    {
1383
      //find parent CS that including all coded CUs in the QG before this node
1384
0
      CodingStructure* qgCS = tempCS;
1385
0
      bool deltaQpCodedBeforeThisNode = false;
1386
0
      if( partitioner.currArea().lumaPos() != partitioner.currQgPos )
1387
0
      {
1388
0
        int numParentNodeToQgCS = 0;
1389
0
        while( qgCS->area.lumaPos() != partitioner.currQgPos )
1390
0
        {
1391
0
          CHECK( qgCS->parent == nullptr, "parent of qgCS shall exsit" );
1392
0
          qgCS = qgCS->parent;
1393
0
          numParentNodeToQgCS++;
1394
0
        }
1395
1396
        //check whether deltaQP has been coded (in luma CU or luma&chroma CU) before this node
1397
0
        CodingStructure* parentCS = tempCS->parent;
1398
0
        for( int i = 0; i < numParentNodeToQgCS; i++ )
1399
0
        {
1400
          //checking each parent
1401
0
          CHECK( parentCS == nullptr, "parentCS shall exsit" );
1402
0
          for( const auto &cu : parentCS->cus )
1403
0
          {
1404
0
            if( cu->rootCbf && !isChroma( cu->chType ) )
1405
0
            {
1406
0
              deltaQpCodedBeforeThisNode = true;
1407
0
              break;
1408
0
            }
1409
0
          }
1410
0
          parentCS = parentCS->parent;
1411
0
        }
1412
0
      }
1413
1414
      //revise luma CU qp before the first luma CU with residual in the SCIPU to predQP
1415
0
      if( !deltaQpCodedBeforeThisNode )
1416
0
      {
1417
        //get pred QP of the QG
1418
0
        const CodingUnit* cuFirst = qgCS->getCU( CH_L, TREE_D );
1419
0
        CHECK( cuFirst->lumaPos() != partitioner.currQgPos, "First cu of the Qg is wrong" );
1420
0
        int predQp = CU::predictQP( *cuFirst, qgCS->prevQP[CH_L] );
1421
1422
        //revise to predQP
1423
0
        int firstCuHasResidual = (int)tempCS->cus.size();
1424
0
        for( int i = 0; i < tempCS->cus.size(); i++ )
1425
0
        {
1426
0
          if( tempCS->cus[i]->rootCbf )
1427
0
          {
1428
0
            firstCuHasResidual = i;
1429
0
            break;
1430
0
          }
1431
0
        }
1432
1433
0
        for( int i = 0; i < firstCuHasResidual; i++ )
1434
0
        {
1435
0
          tempCS->cus[i]->qp = predQp;
1436
0
        }
1437
0
      }
1438
0
    }
1439
0
    partitioner.chType   = CH_C;
1440
0
    partitioner.treeType = TREE_C;
1441
1442
0
    m_CurrCtx++;
1443
1444
0
    CodingStructure *tempCSChroma = m_pTempCS2;
1445
0
    CodingStructure *bestCSChroma = m_pBestCS2;
1446
1447
0
    tempCS->initSubStructure( *tempCSChroma, partitioner.chType, partitioner.currArea(), false );
1448
0
    tempCS->initSubStructure( *bestCSChroma, partitioner.chType, partitioner.currArea(), false );
1449
0
    tempCSChroma->lumaCS = tempCS;
1450
0
    bestCSChroma->lumaCS = tempCS;
1451
0
    xCompressCU( tempCSChroma, bestCSChroma, partitioner );
1452
1453
    //attach chromaCS to luma CS and update cost
1454
0
    tempCS->useSubStructure( *bestCSChroma, partitioner.chType, TREE_D, CS::getArea( *bestCSChroma, partitioner.currArea(), partitioner.chType, partitioner.treeType ), false );
1455
1456
    //release tmp resource
1457
0
    tempCSChroma->releaseIntermediateData();
1458
0
    bestCSChroma->releaseIntermediateData();
1459
1460
0
    m_CurrCtx--;
1461
    //recover luma tree status
1462
0
    partitioner.chType = CH_L;
1463
0
    partitioner.treeType = TREE_D;
1464
0
    partitioner.modeType = MODE_TYPE_ALL;
1465
0
  }
1466
1467
  // Finally, add split-signaling bits for RD-cost check
1468
0
  tempCS->fracBits += splitBits; // split bits
1469
0
  tempCS->cost      = m_cRdCost.calcRdCost( tempCS->fracBits, tempCS->dist );
1470
0
  partitioner.modeType = modeTypeParent;
1471
1472
  // Check Delta QP bits for splitted structure
1473
0
  if( !qgEnableChildren ) // check at deepest QG level only
1474
0
    xCheckDQP( *tempCS, partitioner, true );
1475
1476
  // If the configuration being tested exceeds the maximum number of bytes for a slice / slice-segment, then
1477
  // a proper RD evaluation cannot be performed. Therefore, termination of the
1478
  // slice/slice-segment must be made prior to this CTU.
1479
  // This can be achieved by forcing the decision to be that of the rpcTempCU.
1480
  // The exception is each slice / slice-segment must have at least one CTU.
1481
0
  if( bestCS->cost == MAX_DOUBLE )
1482
0
  {
1483
0
    bestCS->costDbOffset = 0;
1484
0
  }
1485
1486
0
  if( tempCS->cus.size() > 0 && modeTypeParent == MODE_TYPE_ALL && modeTypeChild == MODE_TYPE_INTER )
1487
0
  {
1488
0
    int areaSizeNoResiCu = 0;
1489
0
    for( int k = 0; k < tempCS->cus.size(); k++ )
1490
0
    {
1491
0
      areaSizeNoResiCu += (tempCS->cus[k]->rootCbf == false) ? tempCS->cus[k]->lumaSize().area() : 0;
1492
0
    }
1493
0
    if( areaSizeNoResiCu >= (tempCS->area.lumaSize().area() >> 1) )
1494
0
    {
1495
0
      skipInterPass = true;
1496
0
    }
1497
0
  }
1498
1499
  // RD check for sub partitioned coding structure.
1500
0
  xCheckBestMode( tempCS, bestCS, partitioner, encTestMode, m_EDO );
1501
1502
0
  if( isAffMVInfoSaved )
1503
0
  {
1504
0
    m_cInterSearch.m_AffineProfList->addAffMVInfo(tmpMVInfo);
1505
0
  }
1506
1507
0
  if( !tempCS->slice->isIntra() && isUniMvInfoSaved )
1508
0
  {
1509
0
    m_cInterSearch.m_BlkUniMvInfoBuffer->addUniMvInfo(tmpUniMvInfo);
1510
0
  }
1511
1512
0
  tempCS->motionLut = oldMotionLut;
1513
0
  tempCS->releaseIntermediateData();
1514
0
  tempCS->prevQP[partitioner.chType] = oldPrevQp;
1515
0
}
1516
1517
1518
void EncCu::xCheckRDCostIntra( CodingStructure *&tempCS, CodingStructure *&bestCS, Partitioner &partitioner, const EncTestMode& encTestMode )
1519
0
{
1520
0
  PROFILER_SCOPE_AND_STAGE_EXT( 1, _TPROF, P_INTRA, tempCS, partitioner.chType );
1521
1522
0
  tempCS->initStructData( encTestMode.qp, false ); // clear motion buffer
1523
1524
0
  CodingUnit &cu      = tempCS->addCU( CS::getArea( *tempCS, tempCS->area, partitioner.chType, partitioner.treeType ), partitioner.chType );
1525
1526
0
  partitioner.setCUData( cu );
1527
0
  cu.slice            = tempCS->slice;
1528
0
  cu.tileIdx          = m_tileIdx;
1529
0
  cu.skip             = false;
1530
0
  cu.mmvdSkip         = false;
1531
0
  cu.predMode         = MODE_INTRA;
1532
0
  cu.chromaQpAdj      = m_cuChromaQpOffsetIdxPlus1;
1533
0
  cu.qp               = encTestMode.qp;
1534
0
  cu.ispMode          = NOT_INTRA_SUBPARTITIONS;
1535
0
  cu.initPuData();
1536
1537
0
  m_cIntraSearch.m_ispTestedModes[0].init(0, 0, 1);
1538
0
  if (m_pcEncCfg->m_FastIntraTools)
1539
0
  {
1540
0
    m_modeCtrl.comprCUCtx->intraWasTested = false;
1541
0
    m_cIntraSearch.m_ispTestedModes[0].relatedCuIsValid = m_modeCtrl.comprCUCtx->relatedCuIsValid;
1542
0
    if (!bestCS->cus.empty())
1543
0
    {
1544
0
      if ((bestCS->cus[0]->mergeFlag || bestCS->cus[0]->imv || bestCS->cus[0]->affine) && (!bestCS->cus[0]->ciip))
1545
0
      {
1546
0
        m_cIntraSearch.m_ispTestedModes[0].bestBefore[0] = -1;
1547
0
      }
1548
0
    }
1549
0
    if (!bestCS->slice->isIntra())
1550
0
    {
1551
0
      const Position posBL = cu.Y().bottomLeft();
1552
0
      const Position posTR = cu.Y().topRight();
1553
0
      for (int i = 0; i < 2; i++)
1554
0
      {
1555
0
        const CodingUnit* neigh = i ? cu.cs->getCURestricted(posTR.offset(0, -1), cu, CH_L) :cu.cs->getCURestricted(posBL.offset(-1, 0), cu, CH_L);
1556
0
        m_cIntraSearch.m_ispTestedModes[0].bestBefore[i+1] = -1;
1557
0
        if (neigh != nullptr)
1558
0
        {
1559
0
          int bestMode = neigh->firstTU->mtsIdx[0] ? 4 : 0;
1560
0
          bestMode |= neigh->lfnstIdx ? 2 : 0;
1561
0
          bestMode |= neigh->ispMode ? 1 : 0;
1562
0
          m_cIntraSearch.m_ispTestedModes[0].bestBefore[i+1] = bestMode;
1563
0
        }
1564
0
      }
1565
0
    }
1566
0
  }
1567
1568
0
  tempCS->interHad    = m_modeCtrl.comprCUCtx->interHad;
1569
0
  double maxCostAllowedForChroma = MAX_DOUBLE;
1570
0
  if( isLuma( partitioner.chType ) )
1571
0
  {
1572
0
    if (!tempCS->slice->isIntra() && bestCS)
1573
0
    {
1574
0
      m_cIntraSearch.estIntraPredLumaQT(cu, partitioner, bestCS->cost);
1575
0
    }
1576
0
    else
1577
0
    {
1578
0
      m_cIntraSearch.estIntraPredLumaQT(cu, partitioner);
1579
0
    }
1580
0
    if (m_pcEncCfg->m_FastIntraTools)
1581
0
    {
1582
0
      if (m_cIntraSearch.m_ispTestedModes[0].intraWasTested)
1583
0
      {
1584
0
        m_modeCtrl.comprCUCtx->intraWasTested = m_cIntraSearch.m_ispTestedModes[0].intraWasTested;
1585
0
      }
1586
0
    }
1587
1588
0
    if( !partitioner.isSepTree( *tempCS ) )
1589
0
    {
1590
0
      tempCS->lumaCost = m_cRdCost.calcRdCost( tempCS->fracBits, tempCS->dist );
1591
0
    }
1592
0
    if (m_pcEncCfg->m_usePbIntraFast && tempCS->dist == MAX_DISTORTION && tempCS->interHad == 0)
1593
0
    {
1594
      // JEM assumes only perfect reconstructions can from now on beat the inter mode
1595
0
      m_modeCtrl.comprCUCtx->interHad = 0;
1596
0
      return;
1597
0
    }
1598
0
  }
1599
1600
0
  if( tempCS->area.chromaFormat != CHROMA_400 && ( partitioner.chType == CH_C || !CU::isSepTree(cu) ) )
1601
0
  {
1602
0
    bool useIntraSubPartitions = cu.ispMode != NOT_INTRA_SUBPARTITIONS;
1603
0
    Partitioner subTuPartitioner = partitioner;
1604
0
    if ((m_pcEncCfg->m_ISP >= 3) && (!partitioner.isSepTree(*tempCS) && useIntraSubPartitions))
1605
0
    {
1606
0
      maxCostAllowedForChroma = bestCS->cost < MAX_DOUBLE ? bestCS->cost - tempCS->lumaCost : MAX_DOUBLE;
1607
0
    }
1608
0
    m_cIntraSearch.estIntraPredChromaQT(
1609
0
      cu, (!useIntraSubPartitions || (CU::isSepTree(cu) && !isLuma(CH_C))) ? partitioner : subTuPartitioner,
1610
0
      maxCostAllowedForChroma);
1611
0
    if ((m_pcEncCfg->m_ISP >= 3) && useIntraSubPartitions && !cu.ispMode)
1612
0
    {
1613
0
      return;
1614
0
    }
1615
0
  }
1616
1617
0
  cu.rootCbf = false;
1618
1619
0
  for (uint32_t t = 0; t < getNumberValidTBlocks(*cu.cs->pcv); t++)
1620
0
  {
1621
0
    cu.rootCbf |= cu.firstTU->cbf[t] != 0;
1622
0
  }
1623
1624
  // Get total bits for current mode: encode CU
1625
0
  m_CABACEstimator->resetBits();
1626
1627
0
  if ((!cu.cs->slice->isIntra() || cu.cs->slice->sps->IBC) && cu.Y().valid())
1628
0
  {
1629
0
    m_CABACEstimator->cu_skip_flag(cu);
1630
0
  }
1631
0
  m_CABACEstimator->pred_mode(cu);
1632
0
  m_CABACEstimator->cu_pred_data(cu);
1633
1634
  // Encode Coefficients
1635
0
  CUCtx cuCtx;
1636
0
  cuCtx.isDQPCoded = true;
1637
0
  cuCtx.isChromaQpAdjCoded = true;
1638
0
  m_CABACEstimator->cu_residual(cu, partitioner, cuCtx);
1639
1640
0
  tempCS->fracBits = m_CABACEstimator->getEstFracBits();
1641
0
  tempCS->cost = m_cRdCost.calcRdCost(tempCS->fracBits, tempCS->dist);
1642
1643
0
  xEncodeDontSplit(*tempCS, partitioner);
1644
1645
0
  xCheckDQP(*tempCS, partitioner);
1646
1647
0
  if( m_EDO )
1648
0
  {
1649
0
    xCalDebCost(*tempCS, partitioner);
1650
0
  }
1651
1652
0
  DTRACE_MODE_COST(*tempCS, m_cRdCost.getLambda(true));
1653
0
  xCheckBestMode(tempCS, bestCS, partitioner, encTestMode, m_EDO);
1654
1655
0
  STAT_COUNT_CU_MODES( partitioner.chType == CH_L, g_cuCounters1D[CU_MODES_TESTED][0][!tempCS->slice->isIntra() + tempCS->slice->depth] );
1656
0
  STAT_COUNT_CU_MODES( partitioner.chType == CH_L && !tempCS->slice->isIntra(), g_cuCounters2D[CU_MODES_TESTED][Log2( tempCS->area.lheight() )][Log2( tempCS->area.lwidth() )] );
1657
0
}
1658
1659
void EncCu::xCheckDQP( CodingStructure& cs, Partitioner& partitioner, bool bKeepCtx )
1660
0
{
1661
0
  if( !cs.pps->useDQP )
1662
0
  {
1663
0
    return;
1664
0
  }
1665
1666
0
  if (partitioner.isSepTree(cs) && isChroma(partitioner.chType))
1667
0
  {
1668
0
    return;
1669
0
  }
1670
1671
0
  if( !partitioner.currQgEnable() ) // do not consider split or leaf/not leaf QG condition (checked by caller)
1672
0
  {
1673
0
    return;
1674
0
  }
1675
1676
0
  CodingUnit* cuFirst = cs.getCU( partitioner.chType, partitioner.treeType );
1677
1678
0
  CHECK( bKeepCtx && cs.cus.size() <= 1 && partitioner.getImplicitSplit( cs ) == CU_DONT_SPLIT, "bKeepCtx should only be set in split case" );
1679
0
  CHECK( !bKeepCtx && cs.cus.size() > 1, "bKeepCtx should never be set for non-split case" );
1680
0
  CHECK( !cuFirst, "No CU available" );
1681
1682
0
  bool hasResidual = false;
1683
0
  for( const auto &cu : cs.cus )
1684
0
  {
1685
    //not include the chroma CU because chroma CU is decided based on corresponding luma QP and deltaQP is not signaled at chroma CU
1686
0
    if( cu->rootCbf && !isChroma( cu->chType ))
1687
0
    {
1688
0
      hasResidual = true;
1689
0
      break;
1690
0
    }
1691
0
  }
1692
1693
0
  int predQP = CU::predictQP( *cuFirst, cs.prevQP[partitioner.chType] );
1694
1695
0
  if( hasResidual )
1696
0
  {
1697
0
    TempCtx ctxTemp( m_CtxCache );
1698
0
    if( !bKeepCtx ) ctxTemp = SubCtx( Ctx::DeltaQP, m_CABACEstimator->getCtx() );
1699
1700
0
    m_CABACEstimator->resetBits();
1701
0
    m_CABACEstimator->cu_qp_delta( *cuFirst, predQP, cuFirst->qp );
1702
1703
0
    cs.fracBits += m_CABACEstimator->getEstFracBits(); // dQP bits
1704
0
    cs.cost      = m_cRdCost.calcRdCost(cs.fracBits, cs.dist);
1705
1706
1707
0
    if( !bKeepCtx ) m_CABACEstimator->getCtx() = SubCtx( Ctx::DeltaQP, ctxTemp );
1708
1709
    // NOTE: reset QPs for CUs without residuals up to first coded CU
1710
0
    for( const auto &cu : cs.cus )
1711
0
    {
1712
      //not include the chroma CU because chroma CU is decided based on corresponding luma QP and deltaQP is not signaled at chroma CU
1713
0
      if( cu->rootCbf && !isChroma( cu->chType ))
1714
0
      {
1715
0
        break;
1716
0
      }
1717
0
      cu->qp = predQP;
1718
0
    }
1719
0
  }
1720
0
  else
1721
0
  {
1722
    // No residuals: reset CU QP to predicted value
1723
0
    for( const auto &cu : cs.cus )
1724
0
    {
1725
0
      cu->qp = predQP;
1726
0
    }
1727
0
  }
1728
0
}
1729
1730
CodingUnit *EncCu::getCuForInterPrediction( CodingStructure *cs, const EncTestMode& encTestMode )
1731
0
{
1732
0
  CodingUnit *cu = cs->getCU( CH_L, TREE_D );
1733
1734
0
  if( cu == nullptr )
1735
0
  {
1736
0
    CHECK( cs->getCU( CH_L, TREE_D ) != nullptr, "Wrong CU/PU setting in CS" );
1737
0
    cu = &cs->addCU( cs->area, CH_L );
1738
0
  }
1739
1740
0
  cu->slice       = cs->slice;
1741
0
  cu->tileIdx     = m_tileIdx;
1742
0
  cu->skip        = false;
1743
0
  cu->mmvdSkip    = false;
1744
0
  cu->mmvdMergeFlag
1745
0
                  = false;
1746
0
  cu->geo         = false;
1747
0
  cu->predMode    = MODE_INTER;
1748
0
  cu->chromaQpAdj = m_cuChromaQpOffsetIdxPlus1;
1749
0
  cu->qp          = encTestMode.qp;
1750
0
  cu->affine      = false;
1751
0
  cu->multiRefIdx = 0;
1752
0
  cu->mipFlag     = false;
1753
0
  cu->ciip        = false;
1754
1755
0
  return cu;
1756
0
}
1757
1758
int getDmvrMvdNum( const CodingUnit &cu )
1759
0
{
1760
0
  const int dx = std::max<int>( cu.lwidth()  >> DMVR_SUBCU_SIZE_LOG2, 1 );
1761
0
  const int dy = std::max<int>( cu.lheight() >> DMVR_SUBCU_SIZE_LOG2, 1 );
1762
0
  return dx * dy;
1763
0
}
1764
1765
void EncCu::xCheckRDCostUnifiedMerge( CodingStructure *&tempCS, CodingStructure *&bestCS, Partitioner &partitioner, EncTestMode &encTestMode )
1766
0
{
1767
0
  const Slice &slice = *tempCS->slice;
1768
1769
0
  CHECK( slice.sliceType == VVENC_I_SLICE, "Merge modes not available for I-slices" );
1770
1771
0
  tempCS->initStructData( encTestMode.qp );
1772
1773
0
  MergeCtx          mergeCtx, gpmMergeCtx;
1774
0
  AffineMergeCtx    affineMergeCtx;
1775
0
  GeoComboCostList &comboList = m_comboList;
1776
0
  const SPS        &sps       = *tempCS->sps;
1777
1778
0
  if( sps.SbtMvp )
1779
0
  {
1780
0
    const Size bufSize           = g_miScaling.scale( tempCS->area.lumaSize() );
1781
0
    affineMergeCtx.subPuMvpMiBuf = MotionBuf        ( m_subPuMiBuf, bufSize );
1782
0
  }
1783
1784
0
  m_mergeBestSATDCost = MAX_DOUBLE;
1785
1786
0
  CodingUnit *cu = getCuForInterPrediction( tempCS, encTestMode );
1787
0
  partitioner.setCUData            ( *cu );
1788
0
  CU::getInterMergeCandidates      ( *cu, mergeCtx, 0 );
1789
0
  if( sps.MMVD )
1790
0
    CU::getInterMMVDMergeCandidates( *cu, mergeCtx );
1791
1792
0
  bool sameMV[MRG_MAX_NUM_CANDS] = { false, };
1793
0
  if( m_pcEncCfg->m_useFastMrg >= 2 )
1794
0
  {
1795
0
    for( int m = 0; m < mergeCtx.numValidMergeCand - 1; m++ )
1796
0
    {
1797
0
      if( !sameMV[m] )
1798
0
      {
1799
0
        for( int n = m + 1; n < mergeCtx.numValidMergeCand; n++ )
1800
0
        {
1801
0
          sameMV[n] |= mergeCtx.mvFieldNeighbours[m][0] == mergeCtx.mvFieldNeighbours[n][0]
1802
0
                    && mergeCtx.mvFieldNeighbours[m][1] == mergeCtx.mvFieldNeighbours[n][1];
1803
0
        }
1804
0
      }
1805
0
    }
1806
0
  }
1807
1808
0
  MergeBufVector mrgPredBufNoCiip;
1809
0
  MergeBufVector geoBuffer;
1810
0
  const double  sqrtLambdaForFirstPass = m_cRdCost.getMotionLambda() * FRAC_BITS_SCALE;
1811
1812
0
  const UnitArea localUnitArea( cu->chromaFormat, Area( 0, 0, cu->Y().width, cu->Y().height ) );
1813
0
  for( int i = 0; i < mergeCtx.numValidMergeCand; i++ )
1814
0
  {
1815
0
    mrgPredBufNoCiip.push_back( m_acMergeTmpBuffer[i].getCompactBuf( localUnitArea ) );
1816
0
  }
1817
1818
0
  int numMergeSatdCand = std::min( bestCS->area.lumaSize().area() >= 64 ? m_pcEncCfg->m_mergeRdCandQuotaRegular : m_pcEncCfg->m_mergeRdCandQuotaRegularSmallBlk, mergeCtx.numValidMergeCand );
1819
1820
0
  bool isCiipEnabled  = sps.CIIP && bestCS->area.lumaSize().area() >= 64 && bestCS->area.lumaSize().maxDim() < MAX_CU_SIZE;
1821
0
       isCiipEnabled &= m_pcEncCfg->m_CIIP <= 1 || !m_modeCtrl.getBlkInfo( tempCS->area ).isSkip; //5
1822
1823
0
  if( isCiipEnabled )
1824
0
  {
1825
0
    numMergeSatdCand += std::min( m_pcEncCfg->m_mergeRdCandQuotaCiip, mergeCtx.numValidMergeCand );
1826
0
  }
1827
1828
0
  const bool affineMrgAvail = ( m_pcEncCfg->m_Affine <= 2 || slice.TLayer <= 3 || m_pcEncCfg->m_SbTMVP )
1829
0
                           && ( m_pcEncCfg->m_Affine || sps.SbtMvp ) && m_pcEncCfg->m_maxNumAffineMergeCand && bestCS->area.Y().minDim() >= 8;
1830
1831
0
  if( affineMrgAvail )
1832
0
  {
1833
0
    CU::getAffineMergeCand( *cu, affineMergeCtx );
1834
0
    numMergeSatdCand += std::min( m_pcEncCfg->m_mergeRdCandQuotaSubBlk, affineMergeCtx.numValidMergeCand );
1835
0
  }
1836
1837
0
  int numSatdCandPreGeo = std::min( numMergeSatdCand, m_pcEncCfg->m_maxMergeRdCandNumTotal );
1838
0
  bool toAddGpmCand     = false;
1839
0
  if( sps.GEO && slice.isInterB() // base checks
1840
0
      && cu->lumaSize().minDim() >= GEO_MIN_CU_SIZE  && cu->lumaSize().maxDim() <= GEO_MAX_CU_SIZE && cu->lumaSize().maxDim() < 8 * cu->lumaSize().minDim() // size checks
1841
0
      && !( m_pcEncCfg->m_Geo > 2 && slice.TLayer <= 1 ) ) // speedups
1842
0
  {
1843
0
    cu->mergeFlag            = true;
1844
0
    cu->geo                  = true;
1845
0
    CU::getGeoMergeCandidates( *cu, gpmMergeCtx );
1846
0
    toAddGpmCand             = prepareGpmComboList( gpmMergeCtx, localUnitArea, sqrtLambdaForFirstPass, comboList, geoBuffer, *cu );
1847
0
    numMergeSatdCand        += toAddGpmCand ? std::min( m_pcEncCfg->m_mergeRdCandQuotaGpm, ( int ) comboList.list.size() ) : 0;
1848
0
  }
1849
1850
0
  numMergeSatdCand  = std::min( numMergeSatdCand, m_pcEncCfg->m_maxMergeRdCandNumTotal );
1851
1852
  // 1. Pass: get SATD-cost for selected candidates and reduce their count
1853
0
  m_mergeItemList.resetList( numMergeSatdCand );
1854
0
  const TempCtx ctxStart   ( m_CtxCache, m_CABACEstimator->getCtx() );
1855
0
  const DFunc   dfunc      = encTestMode.lossless ? DF_SAD : ( m_pcEncCfg->m_fastHad ? DF_HAD_fast : DF_HAD );
1856
0
  DistParam     distParam  = m_cRdCost.setDistParam( tempCS->getOrgBuf().Y(), tempCS->getOrgBuf().Y(), sps.bitDepths[CH_L], dfunc );
1857
0
  m_uiSadBestForQPA        = MAX_DISTORTION;
1858
1859
0
  addRegularCandsToPruningList( mergeCtx, localUnitArea, sqrtLambdaForFirstPass, ctxStart, distParam, *cu, sameMV, mrgPredBufNoCiip );
1860
1861
  // add CIIP candidates directly after adding regular cands
1862
0
  if( isCiipEnabled )
1863
0
  {
1864
0
    addCiipCandsToPruningList( mergeCtx, localUnitArea, sqrtLambdaForFirstPass, ctxStart, distParam, *cu, sameMV );
1865
0
  }
1866
1867
0
  if( sps.MMVD && !!m_mergeItemList.size() && !( m_pcEncCfg->m_useFastMrg >= 2 && m_mergeItemList.size() <= 1 ) )
1868
0
  {
1869
0
    addMmvdCandsToPruningList( mergeCtx, localUnitArea, sqrtLambdaForFirstPass, ctxStart, distParam, *cu );
1870
0
  }
1871
1872
0
  if( affineMergeCtx.numValidMergeCand > 0 )
1873
0
  {
1874
0
    addAffineCandsToPruningList( affineMergeCtx, localUnitArea, sqrtLambdaForFirstPass, ctxStart, distParam, *cu );
1875
0
  }
1876
1877
0
  if( m_pcEncCfg->m_useFastMrg > 0 && m_mergeItemList.size() > 0 )
1878
0
  {
1879
0
    m_mergeBestSATDCost    = m_mergeItemList.getMergeItemInList( 0 )->cost;
1880
0
    const double threshold = m_mergeBestSATDCost * MRG_FAST_RATIO[tempCS->picture->useFastMrg];
1881
0
    const   int shrinkSize = std::min( numSatdCandPreGeo, ( int ) updateRdCheckingNum( m_mergeItemList, threshold, numMergeSatdCand ) );
1882
0
    m_mergeItemList        . shrinkList( shrinkSize );
1883
0
  }
1884
0
  else
1885
0
  {
1886
0
    m_mergeItemList        . shrinkList( numSatdCandPreGeo );
1887
0
  }
1888
1889
0
  if( toAddGpmCand )
1890
0
  {
1891
0
    addGpmCandsToPruningList( gpmMergeCtx, localUnitArea, sqrtLambdaForFirstPass, ctxStart, comboList, geoBuffer, distParam, *cu );
1892
0
  }
1893
1894
0
  if(    m_pcEncCfg->m_internalUsePerceptQPATempFiltISlice == 2 && m_uiSadBestForQPA < MAX_DISTORTION && slice.TLayer == 0 // non-Intra key-frame
1895
0
      && m_pcEncCfg->m_salienceBasedOpt
1896
0
      && m_pcEncCfg->m_usePerceptQPA && partitioner.currQgEnable() && partitioner.currSubdiv == 0 ) // CTU-level luma quantization group
1897
0
  {
1898
0
    CHECK( bestCS->cost < MAX_DOUBLE, "This has to be the first test performed!" );
1899
1900
0
    const Picture *pic         = slice.pic;
1901
0
    const bool     isBIM       = m_pcEncCfg->m_RCNumPasses != 2 && m_pcEncCfg->m_blockImportanceMapping && !pic->m_picShared->m_ctuBimQpOffset.empty();
1902
0
    const uint32_t rsAddr      = getCtuAddr( partitioner.currQgPos, *pic->cs->pcv );
1903
0
    const int      pumpReducQP = BitAllocation::getCtuPumpingReducingQP( &slice, tempCS->getOrgBuf( COMP_Y ), m_uiSadBestForQPA, *m_globalCtuQpVector, rsAddr,
1904
0
                                                                         m_pcEncCfg->m_QP, isBIM );
1905
1906
0
    if( pumpReducQP != 0 ) // subtract QP offset, reduces Intra-period pumping or overcoding
1907
0
    {
1908
0
      encTestMode.qp = Clip3( 0, MAX_QP, encTestMode.qp - pumpReducQP );
1909
0
      tempCS->currQP[partitioner.chType] = tempCS->baseQP =
1910
0
      bestCS->currQP[partitioner.chType] = bestCS->baseQP = Clip3( 0, MAX_QP, tempCS->baseQP - pumpReducQP );
1911
1912
0
      updateLambda( slice, pic->ctuQpaLambda[rsAddr], pic->ctuAdaptedQP[rsAddr], tempCS->baseQP, true );
1913
0
    }
1914
0
  }
1915
1916
  // Try to limit number of candidates using SATD-costs
1917
0
  if( m_pcEncCfg->m_useFastMrg > 0 && m_mergeItemList.size() > 0 )
1918
0
  {
1919
    // shrink GEO list as well
1920
0
    const double threshold = m_mergeItemList.getMergeItemInList( 0 )->cost * MRG_FAST_RATIO[0];
1921
0
    numMergeSatdCand       = updateRdCheckingNum( m_mergeItemList, threshold, numMergeSatdCand );
1922
0
    m_mergeBestSATDCost    = m_mergeItemList.size() != 0 ? m_mergeItemList.getMergeItemInList( 0 )->cost : MAX_DOUBLE;
1923
0
  }
1924
0
  else
1925
0
  {
1926
0
    numMergeSatdCand       = std::min<int>( numMergeSatdCand, ( int ) m_mergeItemList.size() );
1927
0
  }
1928
1929
  // 2. Pass: RD checking 
1930
0
  tempCS->initStructData( encTestMode.qp );
1931
0
  m_CABACEstimator->getCtx() = ctxStart;
1932
1933
0
  double bestEndCost                            =   MAX_DOUBLE;
1934
0
  bool bestIsSkip                               =   false;
1935
0
  PelUnitBuf ciipBuf                            =   m_aTmpStorageLCU[1].getCompactBuf( *cu );
1936
0
  bool ciipChromaDone                           =   false;
1937
0
  bool isRegularTestedAsSkip[MRG_MAX_NUM_CANDS] = { false, };
1938
0
  bool geoWasTested                             =   false;
1939
0
  int  stopCand                                 =   numMergeSatdCand;
1940
1941
0
  CHECK( numMergeSatdCand > 0 && m_mergeItemList.size() == 0, "Empty merge item list is not expected" );
1942
1943
0
  for( uint32_t noResidualPass = 0; noResidualPass < 2; noResidualPass++ )
1944
0
  {
1945
0
    const bool forceNoResidual = noResidualPass == 1;
1946
0
    for( uint32_t mrgHadIdx = 0; mrgHadIdx < stopCand; mrgHadIdx++ )
1947
0
    {
1948
0
      auto mergeItem = m_mergeItemList.getMergeItemInList( mrgHadIdx );
1949
0
      CHECK( mergeItem == nullptr, "Wrong merge item" );
1950
1951
0
      const bool isCiip = mergeItem->mergeItemType == MergeItem::MergeItemType::CIIP;
1952
0
      const bool isGeo  = mergeItem->mergeItemType == MergeItem::MergeItemType::GPM;
1953
0
      const bool isRglr = mergeItem->mergeItemType == MergeItem::MergeItemType::REGULAR;
1954
0
      const bool isMmvd = mergeItem->mergeItemType == MergeItem::MergeItemType::MMVD;
1955
1956
0
      if( noResidualPass != 0 && isCiip && isRegularTestedAsSkip[mergeItem->mergeIdx] )
1957
0
      {
1958
0
        continue;
1959
0
      }
1960
1961
0
      if( noResidualPass ? mergeItem->noResidual : bestIsSkip )
1962
0
      {
1963
0
        continue;
1964
0
      }
1965
1966
0
      if( isGeo )
1967
0
      {
1968
0
        if( m_pcEncCfg->m_Geo > 2 && geoWasTested && !bestCS->cus.empty() && !bestCS->getCU( partitioner.chType, partitioner.treeType )->geo )
1969
0
        {
1970
0
          continue;
1971
0
        }
1972
1973
0
        geoWasTested = true;
1974
0
      }
1975
1976
0
      cu = getCuForInterPrediction( tempCS, encTestMode );
1977
0
      partitioner.setCUData( *cu );
1978
0
      const bool resetCiip2Regular = mergeItem->exportMergeInfo( *cu, forceNoResidual );
1979
1980
0
      if( isRglr || resetCiip2Regular )
1981
0
      {
1982
0
        if( CU::checkDMVRCondition( *cu ) ) std::copy_n( m_subPuMvOffset[mergeItem->mergeIdx].data(), getDmvrMvdNum( *cu ), cu->mvdL0SubPu );
1983
0
      }
1984
1985
0
      if( isMmvd && mergeItem->noBdofRefine )
1986
0
      {
1987
        // no BDOF refinement was made for the luma prediction, need to have luma prediction again
1988
0
        mergeItem->lumaPredReady = false;
1989
0
      }
1990
1991
0
      PelUnitBuf *predBuf1   = nullptr, *predBuf2 = isCiip ? &ciipBuf : nullptr;
1992
0
      PelUnitBuf  dstPredBuf = tempCS->getPredBuf( *cu );
1993
1994
0
      if( isGeo )
1995
0
      {
1996
0
        predBuf1 = &geoBuffer[cu->geoMergeIdx[0]];
1997
0
        predBuf2 = &geoBuffer[cu->geoMergeIdx[1]];
1998
0
      }
1999
2000
0
      if( resetCiip2Regular )
2001
0
      {
2002
0
        dstPredBuf.copyFrom( mrgPredBufNoCiip[mergeItem->mergeIdx] );
2003
0
      }
2004
0
      else
2005
0
      {
2006
0
        if( isCiip && !resetCiip2Regular && isChromaEnabled( cu->chromaFormat ) && cu->chromaSize().width > 2 )
2007
0
        {
2008
0
          if( !ciipChromaDone )
2009
0
          {
2010
0
            cu->intraDir[0] = PLANAR_IDX;
2011
0
            cu->intraDir[1] = DM_CHROMA_IDX;
2012
2013
0
            m_cIntraSearch  . initIntraPatternChType( *cu, cu->Cb() );
2014
0
            m_cIntraSearch  . predIntraAng          ( COMP_Cb, ciipBuf.Cb(), *cu );
2015
0
            m_cIntraSearch  . initIntraPatternChType( *cu, cu->Cr() );
2016
0
            m_cIntraSearch  . predIntraAng          ( COMP_Cr, ciipBuf.Cr(), *cu );
2017
2018
0
            ciipChromaDone  = true;
2019
0
          }
2020
0
        }
2021
2022
0
        if(  mergeItem->lumaPredReady ||  mergeItem->chromaPredReady )
2023
0
          dstPredBuf.copyFrom( mergeItem->getPredBuf( localUnitArea ), mergeItem->lumaPredReady, mergeItem->chromaPredReady );
2024
0
        if( !mergeItem->lumaPredReady || !mergeItem->chromaPredReady )
2025
0
          generateMergePrediction( localUnitArea, mergeItem, *cu, !mergeItem->lumaPredReady, !mergeItem->chromaPredReady, dstPredBuf, true, forceNoResidual, predBuf1, predBuf2 );
2026
0
      }
2027
2028
0
      if( !cu->mmvdSkip && !cu->ciip && !cu->affine && !cu->geo && noResidualPass != 0 )
2029
0
      {
2030
0
        CHECK( mergeItem->mergeIdx >= mergeCtx.numValidMergeCand, "out of normal merge" );
2031
0
        isRegularTestedAsSkip[mergeItem->mergeIdx] = true;
2032
0
      }
2033
2034
0
      xEncodeInterResidual( tempCS, bestCS, partitioner, encTestMode, noResidualPass, noResidualPass == 0 ? &mergeItem->noResidual : nullptr );
2035
2036
0
      if( m_pcEncCfg->m_useFastMrg >= 2 )
2037
0
      {
2038
0
        if( cu->ciip && bestCS->cost == MAX_DOUBLE && mrgHadIdx + 1 == numMergeSatdCand )
2039
0
        {
2040
0
          numMergeSatdCand = ( unsigned ) m_mergeItemList.size();
2041
0
        }
2042
      
2043
0
        if( mrgHadIdx > 0 && tempCS->cost >= bestEndCost && !cu->ciip && !isGeo )
2044
0
        {
2045
0
          stopCand = mrgHadIdx + 1;
2046
0
        }
2047
      
2048
0
        if( noResidualPass == 0 )
2049
0
        {
2050
0
          bestEndCost = std::min( bestEndCost, tempCS->cost );
2051
0
        }
2052
0
      }
2053
2054
0
      if( m_pcEncCfg->m_useFastDecisionForMerge && !bestIsSkip && !cu->ciip )
2055
0
      {
2056
0
        bestIsSkip = !bestCS->cus.empty() && bestCS->getCU( partitioner.chType, partitioner.treeType )->rootCbf == 0;
2057
0
      }
2058
2059
0
      tempCS->initStructData( encTestMode.qp );
2060
0
    }   // end loop mrgHadIdx
2061
0
  }
2062
0
}
2063
2064
unsigned int EncCu::updateRdCheckingNum( MergeItemList &mergeItemList, double threshold, unsigned int numMergeSatdCand )
2065
0
{
2066
0
  for( uint32_t i = 0; i < mergeItemList.size(); i++ )
2067
0
  {
2068
0
    const auto mergeItem = mergeItemList.getMergeItemInList( i );
2069
0
    if( mergeItem == nullptr || mergeItem->cost > threshold )
2070
0
    {
2071
0
      numMergeSatdCand = i;
2072
0
      break;
2073
0
    }
2074
0
  }
2075
0
  return std::min( numMergeSatdCand, ( unsigned ) mergeItemList.size() );
2076
0
}
2077
2078
void EncCu::generateMergePrediction( const UnitArea &unitArea, MergeItem *mergeItem, CodingUnit &pu, bool luma, bool chroma,
2079
                                     PelUnitBuf &dstBuf, bool finalRd, bool forceNoResidual, PelUnitBuf *predBuf1, PelUnitBuf *predBuf2 )
2080
0
{
2081
0
  CHECK( ( luma && mergeItem->lumaPredReady ) || ( chroma && mergeItem->chromaPredReady ), "Prediction has been avaiable" );
2082
2083
0
  pu.mcControl = ( !luma ? 4 : 0 ) | ( !chroma ? 2 : 0 );
2084
2085
0
  switch( mergeItem->mergeItemType )
2086
0
  {
2087
0
  case MergeItem::MergeItemType::REGULAR:
2088
    // here predBuf1 is predBufNoCiip
2089
0
    pu.mvRefine = true;
2090
0
    m_cInterSearch.motionCompensation( pu, dstBuf, REF_PIC_LIST_X );
2091
0
    pu.mvRefine = false;
2092
0
    if( predBuf1 != nullptr )
2093
0
    {
2094
0
      predBuf1->copyFrom( dstBuf, luma, chroma );
2095
0
    }
2096
0
    break;
2097
2098
0
  case MergeItem::MergeItemType::CIIP:
2099
0
    m_cInterSearch.motionCompensation( pu, dstBuf, REF_PIC_LIST_X );
2100
2101
0
    if( luma )
2102
0
    {
2103
0
      const ReshapeData& reshapeData = pu.cs->picture->reshapeData;
2104
0
      if( pu.cs->slice->lmcsEnabled && reshapeData.getCTUFlag() )
2105
0
      {
2106
0
        dstBuf.Y().rspSignal( reshapeData.getFwdLUT() );
2107
0
      }
2108
      // generate intrainter Y prediction
2109
0
      dstBuf.Y().weightCiip( predBuf2->Y(), mergeItem->numCiipIntra );
2110
0
    }
2111
2112
0
    if( chroma )
2113
0
    {
2114
0
      if( pu.chromaSize().width > 2 )
2115
0
      {
2116
0
        dstBuf.Cb().weightCiip( predBuf2->Cb(), mergeItem->numCiipIntra );
2117
0
        dstBuf.Cr().weightCiip( predBuf2->Cr(), mergeItem->numCiipIntra );
2118
0
      }
2119
0
    }
2120
2121
0
    break;
2122
2123
0
  case MergeItem::MergeItemType::MMVD:
2124
0
    pu.mcControl           |= finalRd ? 0 : ( pu.mmvdMergeIdx.pos.step > 2 || m_pcEncCfg->m_MMVD > 1 ) ? 1 : 0;
2125
0
    mergeItem->noBdofRefine = pu.mccNoBdof() && pu.cs->sps->BDOF && !pu.cs->picHeader->disBdofFlag;
2126
0
    m_cInterSearch.motionCompensation( pu, dstBuf, REF_PIC_LIST_X );
2127
0
    break;
2128
2129
0
  case MergeItem::MergeItemType::SBTMVP:
2130
0
    m_cInterSearch.motionCompensation( pu, dstBuf, REF_PIC_LIST_X );
2131
0
    break;
2132
2133
0
  case MergeItem::MergeItemType::AFFINE:
2134
0
    m_cInterSearch.motionCompensation( pu, dstBuf, REF_PIC_LIST_X );
2135
0
    break;
2136
2137
0
  case MergeItem::MergeItemType::GPM:
2138
    // here predBuf1 and predBuf2 point to geoBuffer[mergeCand0] and geoBuffer[mergeCand1], respectively
2139
0
    CHECK( predBuf1 == nullptr || predBuf2 == nullptr, "Invalid input buffer to GPM" );
2140
0
    m_cInterSearch.weightedGeoBlk( pu.slice->clpRngs, pu, pu.geoSplitDir, luma && chroma ? MAX_NUM_CH : luma ? CH_L : CH_C, dstBuf, *predBuf1, *predBuf2 );
2141
0
    break;
2142
2143
0
  default:
2144
0
    THROW("Wrong merge item type");
2145
0
  }
2146
2147
0
  auto mergeItemPredBuf = mergeItem->getPredBuf( unitArea );
2148
2149
0
  if( dstBuf.Y().buf == mergeItemPredBuf.Y().buf )
2150
0
  {
2151
    // dst is the internal buffer
2152
0
    mergeItem->lumaPredReady   |= luma;
2153
0
    mergeItem->chromaPredReady |= chroma;
2154
0
  }
2155
0
  else if( finalRd && !forceNoResidual )
2156
0
  {
2157
    // at final RD stage, with and without residuals are both checked
2158
    // it makes sense to buffer the prediction
2159
0
    mergeItemPredBuf.copyFrom( dstBuf, luma, chroma );
2160
0
    mergeItem->lumaPredReady   |= luma;
2161
0
    mergeItem->chromaPredReady |= chroma;
2162
0
  }
2163
0
}
2164
2165
void EncCu::addRegularCandsToPruningList( const MergeCtx &mergeCtx, const UnitArea &localUnitArea, double sqrtLambdaForFirstPassIntra, const TempCtx &ctxStart,
2166
                                          DistParam& distParam, CodingUnit& pu, bool* sameMv, MergeBufVector& regularPred )
2167
0
{
2168
0
  pu.geo = pu.affine
2169
0
         = pu.mmvdMergeFlag = pu.mmvdSkip
2170
0
         = pu.ciip
2171
0
         = false;
2172
2173
0
  for( uint32_t uiMergeCand = 0; uiMergeCand < mergeCtx.numValidMergeCand; uiMergeCand++ )
2174
0
  {
2175
0
    if( sameMv[uiMergeCand] ) continue;
2176
2177
0
    mergeCtx.setMergeInfo   ( pu, uiMergeCand );
2178
2179
0
    if( m_pcEncCfg->m_ifpLines && // what about DMVR?
2180
0
        ( ( pu.refIdx[L0] >= 0 && !CU::isMvInRangeFPP( pu.ly(), pu.lheight(), pu.mv[L0][0].ver, m_pcEncCfg->m_ifpLines, *pu.cs->pcv ) ) ||
2181
0
          ( pu.refIdx[L1] >= 0 && !CU::isMvInRangeFPP( pu.ly(), pu.lheight(), pu.mv[L1][0].ver, m_pcEncCfg->m_ifpLines, *pu.cs->pcv ) ) ) )
2182
0
    {
2183
0
      continue;
2184
0
    }
2185
2186
0
    pu.interDir             = mergeCtx.interDirNeighbours[uiMergeCand];
2187
0
    pu.BcwIdx               = pu.interDir == 3 ? mergeCtx.BcwIdx[uiMergeCand] : BCW_DEFAULT;
2188
0
    pu.imv                  = mergeCtx.useAltHpelIf[uiMergeCand] ? IMV_HPEL : IMV_OFF;
2189
0
    CU::spanMotionInfo      ( pu );
2190
2191
0
    MergeItem *regularMerge = m_mergeItemList.allocateNewMergeItem();
2192
0
    regularMerge->importMergeInfo( mergeCtx, uiMergeCand, MergeItem::MergeItemType::REGULAR, pu );
2193
0
    auto dstBuf             = regularMerge->getPredBuf( localUnitArea );
2194
0
    generateMergePrediction ( localUnitArea, regularMerge, pu, true, true, dstBuf, false, false, &regularPred[uiMergeCand], nullptr );
2195
0
    regularMerge->cost      = calcLumaCost4MergePrediction( ctxStart, dstBuf, sqrtLambdaForFirstPassIntra, pu, distParam );
2196
0
    if( CU::checkDMVRCondition( pu ) ) std::copy_n( pu.mvdL0SubPu, getDmvrMvdNum( pu ), m_subPuMvOffset[uiMergeCand].data() );
2197
0
    m_mergeItemList         . insertMergeItemToList( regularMerge );
2198
0
  }
2199
0
}
2200
2201
void EncCu::addCiipCandsToPruningList( const MergeCtx &mergeCtx, const UnitArea &localUnitArea, double sqrtLambdaForFirstPassIntra, const TempCtx &ctxStart, DistParam &distParam, CodingUnit &pu, bool* sameMv )
2202
0
{
2203
0
  const ReshapeData& reshapeData  = pu.cs->picture->reshapeData;
2204
0
  int                numCiipIntra = -1;
2205
0
  PelUnitBuf         rspBuffer    = m_aTmpStorageLCU[0].getCompactBuf( pu );
2206
0
  PelUnitBuf         ciipBuf      = m_aTmpStorageLCU[1].getCompactBuf( pu );
2207
2208
0
  pu.ciip        = true;
2209
0
  pu.intraDir[0] = PLANAR_IDX;
2210
0
  pu.geo         = pu.affine
2211
0
                 = pu.mmvdMergeFlag = pu.mmvdSkip
2212
0
                 = false;
2213
0
  m_cIntraSearch . initIntraPatternChType        ( pu, pu.Y() );
2214
0
  m_cIntraSearch . predIntraAng                  ( COMP_Y, ciipBuf.Y(), pu );
2215
0
  numCiipIntra   = m_cIntraSearch.getNumIntraCiip( pu );
2216
2217
0
  int nonCiipMrgCnds[MRG_MAX_NUM_CANDS] = { 0, };
2218
0
  int numNonCiipCnds                    =   0;
2219
0
  for( ; numNonCiipCnds < m_mergeItemList.size(); numNonCiipCnds++ ) nonCiipMrgCnds[numNonCiipCnds] = m_mergeItemList.getMergeItemInList( numNonCiipCnds )->mergeIdx;
2220
2221
0
  for( int i = 0; i < numNonCiipCnds; i++ )
2222
0
  {
2223
0
    const unsigned int uiMergeCand = nonCiipMrgCnds[i];
2224
2225
0
    if( sameMv[uiMergeCand] ) continue;
2226
2227
0
    mergeCtx.setMergeInfo     ( pu, uiMergeCand );
2228
2229
0
    if( m_pcEncCfg->m_ifpLines && 
2230
0
        ( ( pu.refIdx[L0] >= 0 && !CU::isMvInRangeFPP( pu.ly(), pu.lheight(), pu.mv[L0][0].ver, m_pcEncCfg->m_ifpLines, *pu.cs->pcv ) ) ||
2231
0
          ( pu.refIdx[L1] >= 0 && !CU::isMvInRangeFPP( pu.ly(), pu.lheight(), pu.mv[L1][0].ver, m_pcEncCfg->m_ifpLines, *pu.cs->pcv ) ) ) )
2232
0
    {
2233
0
      continue;
2234
0
    }
2235
2236
0
    pu.interDir               = mergeCtx.interDirNeighbours[uiMergeCand];
2237
0
    pu.BcwIdx                 = pu.interDir == 3 ? mergeCtx.BcwIdx[uiMergeCand] : BCW_DEFAULT;
2238
0
    pu.imv                    = mergeCtx.useAltHpelIf[uiMergeCand] ? IMV_HPEL : IMV_OFF;
2239
0
    CU::spanMotionInfo        ( pu );
2240
2241
0
    MergeItem* ciipMerge      = m_mergeItemList.allocateNewMergeItem();
2242
0
    ciipMerge->importMergeInfo( mergeCtx, uiMergeCand, MergeItem::MergeItemType::CIIP, pu );
2243
0
    ciipMerge->numCiipIntra   = numCiipIntra;
2244
0
    auto dstBuf               = ciipMerge->getPredBuf( localUnitArea );
2245
0
    generateMergePrediction   ( localUnitArea, ciipMerge, pu, true, false, dstBuf, false, false, nullptr, &ciipBuf );
2246
2247
0
    if( pu.cs->slice->lmcsEnabled && reshapeData.getCTUFlag() )
2248
0
    {
2249
      // distortion is calculated in the original domain
2250
0
      rspBuffer.Y()           . rspSignal( dstBuf.Y(), reshapeData.getInvLUT() );
2251
0
      ciipMerge->cost         = calcLumaCost4MergePrediction( ctxStart, rspBuffer, sqrtLambdaForFirstPassIntra, pu, distParam );
2252
0
    }
2253
0
    else
2254
0
    {
2255
0
      ciipMerge->cost         = calcLumaCost4MergePrediction( ctxStart, dstBuf, sqrtLambdaForFirstPassIntra, pu, distParam );
2256
0
    }
2257
0
    if( !m_mergeItemList      . insertMergeItemToList( ciipMerge ) && m_pcEncCfg->m_CIIP > 1 )
2258
0
    {
2259
0
      break;
2260
0
    }
2261
0
  }
2262
0
}
2263
2264
void EncCu::addMmvdCandsToPruningList( const MergeCtx &mergeCtx, const UnitArea &localUnitArea, double sqrtLambdaForFirstPassIntra, const TempCtx& ctxStart,
2265
                                       DistParam& distParam, CodingUnit& pu )
2266
0
{
2267
0
  pu.mmvdSkip              = true;
2268
0
  pu.affine                = pu.geo
2269
0
                           = pu.ciip
2270
0
                           = false;
2271
2272
0
  int       mmvdTestNum    = mergeCtx.numValidMergeCand > 1 ? MmvdIdx::ADD_NUM : MmvdIdx::ADD_NUM >> 1;
2273
0
  int       bestDir        = 0;
2274
0
  size_t    curListSize    = m_mergeItemList.size();
2275
0
  double    bestCostMerge  = m_mergeItemList.getMergeItemInList( curListSize - 1 )->cost;
2276
0
  double    bestCostOffset = MAX_DOUBLE;
2277
0
  int       shiftCandStart = 0;
2278
2279
0
  if( m_pcEncCfg->m_MMVD == 4 )
2280
0
  {
2281
0
    const int cnd1idx = m_mergeItemList.size() == 1 ? 0 : 1;
2282
0
    const int mrgCnd0 = m_mergeItemList.getMergeItemInList(       0 )->mergeIdx;
2283
0
    const int mrgCnd1 = m_mergeItemList.getMergeItemInList( cnd1idx )->mergeIdx;
2284
2285
0
    if( mrgCnd0 > 1 && mrgCnd1 > 1 )
2286
0
    {
2287
0
      mmvdTestNum = 0;
2288
0
    }
2289
0
    else if( mrgCnd0 > 1 || mrgCnd1 > 1 )
2290
0
    {
2291
0
      int shiftCand = mrgCnd0 < 2 ? mrgCnd0 : mrgCnd1;
2292
2293
0
      if( shiftCand )
2294
0
      {
2295
0
        shiftCandStart = MMVD_MAX_REFINE_NUM;
2296
0
      }
2297
0
      else
2298
0
      {
2299
0
        mmvdTestNum    = MMVD_MAX_REFINE_NUM;
2300
0
      }
2301
0
    }
2302
0
  }
2303
2304
0
  for( int mmvdMergeCand = shiftCandStart; mmvdMergeCand < mmvdTestNum; mmvdMergeCand++ )
2305
0
  {
2306
0
    MmvdIdx mmvdIdx;
2307
0
    mmvdIdx.val = mmvdMergeCand;
2308
2309
0
    if( mmvdIdx.pos.step >= m_pcEncCfg->m_MmvdDisNum )
2310
0
    {
2311
0
      continue;
2312
0
    }
2313
2314
0
    if( m_pcEncCfg->m_MMVD > 1 )
2315
0
    {
2316
0
      int checkMMVD = xCheckMMVDCand( mmvdIdx, bestDir, mmvdTestNum, bestCostOffset, bestCostMerge, m_mergeItemList.getMergeItemInList( curListSize - 1 )->cost );
2317
0
      mmvdMergeCand = mmvdIdx.val;
2318
2319
0
      if( checkMMVD )
2320
0
      {
2321
0
        if( checkMMVD == 2 )
2322
0
        {
2323
0
          break;
2324
0
        }
2325
0
        continue;
2326
0
      }
2327
0
    }
2328
2329
0
    mergeCtx.setMmvdMergeCandiInfo( pu, mmvdIdx );
2330
2331
0
    if( m_pcEncCfg->m_ifpLines &&
2332
0
        ( ( pu.refIdx[L0] >= 0 && !CU::isMvInRangeFPP( pu.ly(), pu.lheight(), pu.mv[L0][0].ver, m_pcEncCfg->m_ifpLines, *pu.cs->pcv ) ) ||
2333
0
          ( pu.refIdx[L1] >= 0 && !CU::isMvInRangeFPP( pu.ly(), pu.lheight(), pu.mv[L1][0].ver, m_pcEncCfg->m_ifpLines, *pu.cs->pcv ) ) ) )
2334
0
    {
2335
      // skip candidate
2336
0
      continue;
2337
0
    }
2338
2339
0
    pu.interDir               = mergeCtx.interDirNeighbours[mmvdIdx.pos.baseIdx];
2340
0
    pu.BcwIdx                 = pu.interDir == 3 ? mergeCtx.BcwIdx[mmvdIdx.pos.baseIdx] : BCW_DEFAULT;
2341
0
    pu.imv                    = mergeCtx.useAltHpelIf[mmvdIdx.pos.baseIdx] ? IMV_HPEL : IMV_OFF;
2342
0
    CU::spanMotionInfo        ( pu );
2343
2344
0
    MergeItem *mmvdMerge      = m_mergeItemList.allocateNewMergeItem();
2345
0
    mmvdMerge->importMergeInfo( mergeCtx, mmvdIdx.val, MergeItem::MergeItemType::MMVD, pu );
2346
0
    auto dstBuf               = mmvdMerge->getPredBuf( localUnitArea );
2347
0
    generateMergePrediction   ( localUnitArea, mmvdMerge, pu, true, false, dstBuf, false, false, nullptr, nullptr );
2348
0
    mmvdMerge->cost           = calcLumaCost4MergePrediction( ctxStart, dstBuf, sqrtLambdaForFirstPassIntra, pu, distParam );
2349
0
    m_mergeItemList           . insertMergeItemToList( mmvdMerge );
2350
2351
0
    if( m_pcEncCfg->m_MMVD > 1 && mmvdMerge->cost < bestCostOffset )
2352
0
    {
2353
0
      bestCostOffset          = mmvdMerge->cost;
2354
0
      int CandCur             = mmvdIdx.val - MMVD_MAX_REFINE_NUM * mmvdIdx.pos.baseIdx;
2355
0
      if( CandCur < 4 )
2356
0
        bestDir               = CandCur;
2357
0
    }
2358
0
  }
2359
2360
0
  if( m_pcEncCfg->m_useFastMrg >= 2 )
2361
0
  {
2362
0
    m_mergeItemList           . shrinkList( curListSize );
2363
0
  }
2364
0
}
2365
2366
void EncCu::addAffineCandsToPruningList( AffineMergeCtx &affineMergeCtx, const UnitArea &localUnitArea, double sqrtLambdaForFirstPass,
2367
                                         const TempCtx& ctxStart, DistParam& distParam, CodingUnit& pu)
2368
0
{
2369
0
  bool sameMV[AFFINE_MRG_MAX_NUM_CANDS + 1]
2370
0
                      = { false, };
2371
0
  size_t curListSize  = m_mergeItemList.size();
2372
2373
0
  pu.mergeFlag = true;
2374
0
  pu.affine    = true;
2375
0
  pu.imv       = IMV_OFF;
2376
0
  pu.geo       = pu.mmvdMergeFlag = pu.mmvdSkip
2377
0
               = pu.ciip
2378
0
               = false;
2379
2380
0
  if( m_pcEncCfg->m_Affine > 1 )
2381
0
  {
2382
0
    for( int m = 0; m < affineMergeCtx.numValidMergeCand; m++ )
2383
0
    {
2384
0
      if( pu.cs->slice->TLayer > 3 && affineMergeCtx.mergeType[m] != MRG_TYPE_SUBPU_ATMVP )
2385
0
      {
2386
0
        sameMV[m] = m != 0;
2387
0
      }
2388
0
      else if( !sameMV[m + 1] )
2389
0
      {
2390
0
        for( int n = m + 1; n < affineMergeCtx.numValidMergeCand; n++ )
2391
0
        {
2392
0
          sameMV[n] |= affineMergeCtx.mvFieldNeighbours[m][0][0] == affineMergeCtx.mvFieldNeighbours[n][0][0]
2393
0
                    && affineMergeCtx.mvFieldNeighbours[m][1][0] == affineMergeCtx.mvFieldNeighbours[n][1][0];
2394
0
        }
2395
0
      }
2396
0
    }
2397
0
  }
2398
2399
0
  for( uint32_t mergeIdx = 0; mergeIdx < affineMergeCtx.numValidMergeCand; mergeIdx++ )
2400
0
  {
2401
0
    if( ( affineMergeCtx.mergeType[mergeIdx] != MRG_TYPE_SUBPU_ATMVP && m_pcEncCfg->m_Affine == 0 ) || sameMV[mergeIdx] )
2402
0
    {
2403
0
      continue;
2404
0
    }
2405
2406
0
    pu.mergeType              = affineMergeCtx.mergeType[mergeIdx];
2407
0
    pu.affineType             = affineMergeCtx.affineType[mergeIdx];
2408
0
    pu.interDir               = affineMergeCtx.interDirNeighbours[mergeIdx];
2409
0
    pu.BcwIdx                 = pu.interDir == 3 ? affineMergeCtx.BcwIdx[mergeIdx] : BCW_DEFAULT;
2410
2411
    // generate motion buf for IFP
2412
0
    if( affineMergeCtx.mergeType[mergeIdx] == MRG_TYPE_SUBPU_ATMVP )
2413
0
    {
2414
0
      pu.refIdx[L0]           = affineMergeCtx.mvFieldNeighbours[mergeIdx][L0][0].refIdx;
2415
0
      pu.refIdx[L1]           = affineMergeCtx.mvFieldNeighbours[mergeIdx][L1][0].refIdx;
2416
0
      pu.mv    [L0][0]        = affineMergeCtx.mvFieldNeighbours[mergeIdx][L0][0].mv;
2417
0
      pu.mv    [L1][0]        = affineMergeCtx.mvFieldNeighbours[mergeIdx][L1][0].mv;
2418
0
      CU::spanMotionInfo      ( pu, &affineMergeCtx );
2419
0
    }
2420
0
    else
2421
0
    {
2422
0
      CU::setAllAffineMvField ( pu, affineMergeCtx.mvFieldNeighbours[mergeIdx][L0], L0 );
2423
0
      CU::setAllAffineMvField ( pu, affineMergeCtx.mvFieldNeighbours[mergeIdx][L1], L1 );
2424
0
      CU::spanMotionInfo      ( pu );
2425
0
    }
2426
2427
0
    if( m_pcEncCfg->m_ifpLines && !CU::isMotionBufInRangeFPP( pu, m_pcEncCfg->m_ifpLines ) )
2428
0
    {
2429
0
      continue;
2430
0
    }
2431
2432
0
    MergeItem *mergeItem   = m_mergeItemList.allocateNewMergeItem();
2433
0
    mergeItem->importMergeInfo( affineMergeCtx, mergeIdx, affineMergeCtx.mergeType[mergeIdx] == MRG_TYPE_SUBPU_ATMVP ? MergeItem::MergeItemType::SBTMVP : MergeItem::MergeItemType::AFFINE, pu );
2434
0
    auto dstBuf            = mergeItem->getPredBuf( localUnitArea );
2435
0
    generateMergePrediction( localUnitArea, mergeItem, pu, true, false, dstBuf, false, false, nullptr, nullptr );
2436
0
    mergeItem->cost        = calcLumaCost4MergePrediction( ctxStart, dstBuf, sqrtLambdaForFirstPass, pu, distParam );
2437
0
    m_mergeItemList        . insertMergeItemToList( mergeItem );
2438
0
  }
2439
0
  if( m_pcEncCfg->m_useFastMrg >= 2 )
2440
0
  {
2441
0
    m_mergeItemList        . shrinkList( curListSize );
2442
0
  }
2443
0
}
2444
2445
void EncCu::addGpmCandsToPruningList( const MergeCtx &mergeCtx, const UnitArea &localUnitArea, double sqrtLambdaForFirstPass,
2446
                                      const TempCtx& ctxStart, const GeoComboCostList& comboList, MergeBufVector& geoBuffer, DistParam& distParam, CodingUnit& pu)
2447
0
{
2448
0
  int geoNumMrgSadCand    = std::min( GEO_MAX_TRY_WEIGHTED_SAD, ( int ) comboList.list.size() );
2449
0
  geoNumMrgSadCand        = std::min( geoNumMrgSadCand, m_pcEncCfg->m_Geo > 2 ? 10 : GEO_MAX_TRY_WEIGHTED_SAD );
2450
0
  double bestGeoCost      = MAX_DOUBLE / 2.0;
2451
0
  MergeItem* best2geo[2]  = { nullptr, nullptr };
2452
2453
0
  pu.mergeFlag = true;
2454
0
  pu.geo       = true;
2455
0
  pu.mergeType = MRG_TYPE_DEFAULT_N;
2456
0
  pu.BcwIdx    = BCW_DEFAULT;
2457
0
  pu.interDir  = 3;
2458
0
  pu.imv       = IMV_OFF;
2459
0
  pu.affine    = pu.mmvdMergeFlag = pu.mmvdSkip
2460
0
               = pu.ciip
2461
0
               = false;
2462
2463
0
  for( int candidateIdx = 0; candidateIdx < geoNumMrgSadCand; candidateIdx++ )
2464
0
  {
2465
0
    const int          splitDir     = comboList.list[candidateIdx].splitDir;
2466
0
    const MergeIdxPair mergeIdxPair { comboList.list[candidateIdx].mergeIdx0, comboList.list[candidateIdx].mergeIdx1 };
2467
0
    const int          gpmIndex     = MergeItem::getGpmUnfiedIndex( splitDir, mergeIdxPair );
2468
2469
0
    pu.mergeIdx            = gpmIndex;
2470
0
    pu.geoMergeIdx         = mergeIdxPair;
2471
0
    pu.geoSplitDir         = splitDir;
2472
0
    CU::spanGeoMotionInfo  ( pu, mergeCtx, pu.geoSplitDir, pu.geoMergeIdx[0], pu.geoMergeIdx[1] );
2473
2474
0
    MergeItem *mergeItem   = m_mergeItemList.allocateNewMergeItem();
2475
0
    mergeItem->importMergeInfo( mergeCtx, gpmIndex, MergeItem::MergeItemType::GPM, pu );
2476
0
    auto dstBuf            = mergeItem->getPredBuf( localUnitArea );
2477
0
    generateMergePrediction( localUnitArea, mergeItem, pu, true, false, dstBuf, false, false, &geoBuffer[mergeIdxPair[0]], &geoBuffer[mergeIdxPair[1]] );
2478
0
    mergeItem->cost        = calcLumaCost4MergePrediction( ctxStart, dstBuf, sqrtLambdaForFirstPass, pu, distParam );
2479
0
    bestGeoCost            = std::min( mergeItem->cost, bestGeoCost );
2480
2481
0
    if( mergeItem->cost > MRG_FAST_RATIO[0] * bestGeoCost || mergeItem->cost > m_mergeBestSATDCost )
2482
0
    {
2483
0
      m_mergeItemList      . giveBackMergeItem( mergeItem );
2484
2485
0
      if( m_pcEncCfg->m_Geo > 2 ) break;
2486
0
    }
2487
0
    else if( m_pcEncCfg->m_Geo < 2 )
2488
0
    {
2489
0
      m_mergeItemList      . insertMergeItemToList( mergeItem );
2490
0
    }
2491
0
    else
2492
0
    {
2493
0
      if( ( m_mergeItemList.size() > 0 && m_mergeItemList.getMergeItemInList( m_mergeItemList.size() - 1 )->cost <= mergeItem->cost ) ||
2494
0
        ( best2geo[1] && best2geo[1]->cost <= mergeItem->cost ) )
2495
0
      {
2496
0
        m_mergeItemList    . giveBackMergeItem( mergeItem );
2497
0
      }
2498
0
      else
2499
0
      {
2500
0
        if( !best2geo[0] || mergeItem->cost < best2geo[0]->cost )
2501
0
        {
2502
0
          if( best2geo[1] )
2503
0
            m_mergeItemList. giveBackMergeItem( best2geo[1] );
2504
2505
0
          best2geo[1] = best2geo[0]; best2geo[0] = mergeItem;
2506
0
        }
2507
0
        else
2508
0
        {
2509
0
          if( best2geo[1] ) 
2510
0
            m_mergeItemList. giveBackMergeItem( best2geo[1] );
2511
2512
0
          best2geo[1] = mergeItem;
2513
0
        }
2514
0
      }
2515
0
    }
2516
0
  }
2517
2518
0
  if( best2geo[0] )
2519
0
    m_mergeItemList        . insertMergeItemToList( best2geo[0] );
2520
0
  if( best2geo[1] )
2521
0
    m_mergeItemList        . insertMergeItemToList( best2geo[1] );
2522
0
}
2523
2524
bool EncCu::prepareGpmComboList( const MergeCtx &mergeCtx, const UnitArea &localUnitArea, double sqrtLambdaForFirstPass,
2525
                                 GeoComboCostList& comboList, MergeBufVector& geoBuffer, CodingUnit& pu )
2526
0
{
2527
0
          sqrtLambdaForFirstPass /= FRAC_BITS_SCALE;
2528
0
  const int bitsForPartitionIdx   = floorLog2(GEO_NUM_PARTITION_MODE);
2529
0
  const int maxNumMergeCandidates = std::min( ( int ) pu.cs->sps->maxNumGeoCand, MRG_MAX_NUM_CANDS );
2530
0
  DistParam distParam;
2531
  // the second arguments to setDistParam is dummy and will be updated before being used
2532
0
  DistParam  distParamWholeBlk     = m_cRdCost.setDistParam( pu.cs->getOrgBuf().Y(), pu.cs->getOrgBuf().Y(), pu.cs->sps->bitDepths[ CH_L ], DF_SAD );
2533
0
  Distortion bestWholeBlkSad       = MAX_UINT64;
2534
0
  double     bestWholeBlkCost      = MAX_DOUBLE;
2535
0
  const ClpRng&  lclpRng           = pu.slice->clpRngs[COMP_Y];
2536
0
  const unsigned rshift            = std::max<int>( 2, ( IF_INTERNAL_PREC - lclpRng.bd ) );
2537
0
  const int      offset            = ( 1 << ( rshift - 1 ) ) + IF_INTERNAL_OFFS;
2538
0
  const int      numSamples        = pu.Y().area();
2539
0
  Distortion sadWholeBlk            [GEO_MAX_NUM_UNI_CANDS];
2540
0
  int        pocMrg                 [GEO_MAX_NUM_UNI_CANDS];
2541
0
  Mv         mergeMv                [GEO_MAX_NUM_UNI_CANDS];
2542
0
  bool       isSkipThisCand         [GEO_MAX_NUM_UNI_CANDS]
2543
0
                                   = { false, };
2544
0
  bool       sameMV                 [MRG_MAX_NUM_CANDS]
2545
0
                                   = { false, };
2546
0
  MergeBufVector geoTempBuf;
2547
2548
0
  if( m_pcEncCfg->m_Geo > 2 )
2549
0
  {
2550
0
    for( int m = 0; m < maxNumMergeCandidates; m++ )
2551
0
    {
2552
0
      if( !sameMV[m] )
2553
0
      {
2554
0
        for( int n = m + 1; n < maxNumMergeCandidates; n++ )
2555
0
        {
2556
0
          sameMV[n] |= mergeCtx.mvFieldNeighbours[m][0] == mergeCtx.mvFieldNeighbours[n][0]
2557
0
                    && mergeCtx.mvFieldNeighbours[m][1] == mergeCtx.mvFieldNeighbours[n][1];
2558
0
        }
2559
0
      }
2560
0
    }
2561
0
  }
2562
2563
0
  for( uint8_t mergeCand = 0; mergeCand < maxNumMergeCandidates; mergeCand++ )
2564
0
  {
2565
0
    geoBuffer .push_back ( m_aTmpStorageLCU[2                         + mergeCand].getCompactBuf( localUnitArea ) );
2566
0
    geoTempBuf.push_back ( m_aTmpStorageLCU[2 + GEO_MAX_NUM_UNI_CANDS + mergeCand].getCompactBuf( localUnitArea ) );
2567
2568
0
    const int  listIdx    = mergeCtx.mvFieldNeighbours[mergeCand][0]      .refIdx == -1 ? 1 : 0;
2569
0
    const auto refPicList = RefPicList(listIdx);
2570
0
    const int  refIdx     = mergeCtx.mvFieldNeighbours[mergeCand][listIdx].refIdx;
2571
2572
0
    pocMrg [mergeCand]    = pu.cs->slice->getRefPic( refPicList, refIdx )->poc;
2573
0
    mergeMv[mergeCand]    = mergeCtx.mvFieldNeighbours[mergeCand][listIdx].mv;
2574
2575
0
    for( int i = 0; i < mergeCand; i++ )
2576
0
    {
2577
0
      if( pocMrg[mergeCand] == pocMrg[i] && mergeMv[mergeCand] == mergeMv[i] )
2578
0
      {
2579
0
        isSkipThisCand[mergeCand] = true;
2580
0
        break;
2581
0
      }
2582
0
    }
2583
2584
0
    if( sameMV[mergeCand] )
2585
0
    {
2586
0
      continue;
2587
0
    }
2588
2589
0
    if( m_pcEncCfg->m_ifpLines ) 
2590
0
    {
2591
0
      bool isOutOfRange  = !CU::isMvInRangeFPP( pu.ly(), pu.lheight(), mergeCtx.mvFieldNeighbours[mergeCand][0].mv.ver, m_pcEncCfg->m_ifpLines, *pu.cs->pcv );
2592
0
           isOutOfRange |= !CU::isMvInRangeFPP( pu.ly(), pu.lheight(), mergeCtx.mvFieldNeighbours[mergeCand][1].mv.ver, m_pcEncCfg->m_ifpLines, *pu.cs->pcv );
2593
2594
      // use sameMV to surpress processing of this cand later on...
2595
0
      sameMV[mergeCand] |= isOutOfRange;
2596
2597
0
      if( isOutOfRange )
2598
0
        continue;
2599
0
    }
2600
2601
0
    mergeCtx.setMergeInfo            ( pu, mergeCand );
2602
0
    CU::spanMotionInfo               ( pu );
2603
0
    m_cInterSearch.motionCompensation( pu, geoBuffer[mergeCand], REF_PIC_LIST_X );
2604
2605
0
    g_pelBufOP.roundGeo( geoBuffer[mergeCand].Y().buf, geoTempBuf[mergeCand].Y().buf, numSamples, rshift, offset, lclpRng );
2606
2607
0
    distParamWholeBlk.cur  = geoTempBuf[mergeCand].Y();
2608
0
    sadWholeBlk[mergeCand] = distParamWholeBlk.distFunc( distParamWholeBlk );
2609
2610
0
    if( sadWholeBlk[mergeCand] < bestWholeBlkSad )
2611
0
    {
2612
0
      bestWholeBlkSad  = sadWholeBlk[mergeCand];
2613
0
      int bitsCand     = mergeCand + 1;
2614
0
      bestWholeBlkCost = ( double ) bestWholeBlkSad + ( double ) bitsCand * sqrtLambdaForFirstPass;
2615
0
    }
2616
0
  }
2617
2618
0
  bool allCandsAreSame = true;
2619
0
  for( uint8_t mergeCand = 1; mergeCand < maxNumMergeCandidates; mergeCand++ )
2620
0
  {
2621
0
    allCandsAreSame &= isSkipThisCand[mergeCand];
2622
0
  }
2623
0
  if( allCandsAreSame )
2624
0
  {
2625
0
    return false;
2626
0
  }
2627
2628
0
  const int wIdx = floorLog2( pu.lwidth() )  - GEO_MIN_CU_LOG2;
2629
0
  const int hIdx = floorLog2( pu.lheight() ) - GEO_MIN_CU_LOG2;
2630
2631
0
  for( int splitDir = 0; splitDir < GEO_NUM_PARTITION_MODE; )
2632
0
  {
2633
0
    int maskStride = 0, maskStride2 = 0;
2634
0
    int stepX = 1;
2635
0
    Pel *sadMask;
2636
0
    int16_t angle = g_GeoParams[splitDir][0];
2637
    
2638
0
    if( g_angle2mirror[angle] == 2 )
2639
0
    {
2640
0
      maskStride  = -GEO_WEIGHT_MASK_SIZE;
2641
0
      maskStride2 = -( int ) pu.lwidth();
2642
0
      sadMask     = &g_globalGeoEncSADmask[g_angle2mask[g_GeoParams[splitDir][0]]]
2643
0
                      [( GEO_WEIGHT_MASK_SIZE - 1 - g_weightOffset[hIdx][wIdx][splitDir][1] ) * GEO_WEIGHT_MASK_SIZE
2644
0
                                                  + g_weightOffset[hIdx][wIdx][splitDir][0]
2645
0
                      ];
2646
0
    }
2647
0
    else if( g_angle2mirror[angle] == 1 )
2648
0
    {
2649
0
      stepX       = -1;
2650
0
      maskStride2 = pu.lwidth();
2651
0
      maskStride  = GEO_WEIGHT_MASK_SIZE;
2652
0
      sadMask     = &g_globalGeoEncSADmask[g_angle2mask[g_GeoParams[splitDir][0]]]
2653
0
                      [     GEO_WEIGHT_MASK_SIZE *     g_weightOffset[hIdx][wIdx][splitDir][1]
2654
0
                        + ( GEO_WEIGHT_MASK_SIZE - 1 - g_weightOffset[hIdx][wIdx][splitDir][0] )
2655
0
                      ];
2656
0
    }
2657
0
    else
2658
0
    {
2659
0
      maskStride  = GEO_WEIGHT_MASK_SIZE;
2660
0
      maskStride2 = -( int ) pu.lwidth();
2661
0
      sadMask     = &g_globalGeoEncSADmask[g_angle2mask[g_GeoParams[splitDir][0]]]
2662
0
                      [   g_weightOffset[hIdx][wIdx][splitDir][1] * GEO_WEIGHT_MASK_SIZE
2663
0
                        + g_weightOffset[hIdx][wIdx][splitDir][0]
2664
0
                      ];
2665
0
    }
2666
2667
0
    m_cRdCost.setDistParamGeo ( distParam, pu.cs->getOrgBuf().Y(),
2668
0
                                nullptr, 0,
2669
0
                                sadMask, maskStride, stepX, maskStride2,
2670
0
                                pu.cs->sps->bitDepths[CH_L], COMP_Y );
2671
2672
0
    for( uint8_t mergeCand = 0; mergeCand < maxNumMergeCandidates; mergeCand++ )
2673
0
    {
2674
0
      if( sameMV[mergeCand] )
2675
0
      {
2676
0
        continue;
2677
0
      }
2678
2679
0
      distParam.cur.buf         = geoTempBuf[mergeCand].Y().buf;
2680
0
      distParam.cur.stride      = geoTempBuf[mergeCand].Y().stride;
2681
0
      const Distortion sadLarge = distParam.distFunc( distParam );
2682
0
      const Distortion sadSmall = sadWholeBlk[mergeCand] - sadLarge;
2683
2684
0
      const int bitsCand        = mergeCand + 1;
2685
2686
0
      const double cost0        = ( double ) sadLarge + ( double ) bitsCand * sqrtLambdaForFirstPass;
2687
0
      const double cost1        = ( double ) sadSmall + ( double ) bitsCand * sqrtLambdaForFirstPass;
2688
2689
0
      m_GeoCostList.insert( splitDir, 0, mergeCand, cost0 );
2690
0
      m_GeoCostList.insert( splitDir, 1, mergeCand, cost1 );
2691
0
    }
2692
2693
0
    if( m_pcEncCfg->m_Geo == 4 )
2694
0
    {
2695
0
      if( splitDir == 1 )
2696
0
      {
2697
0
        splitDir += 7;
2698
0
      }
2699
0
      else if( splitDir == 35 || ( splitDir + 1 ) % 4 != 0 )
2700
0
      {
2701
0
        splitDir++;
2702
0
      }
2703
0
      else
2704
0
      {
2705
0
        splitDir += 5;
2706
0
      }
2707
0
    }
2708
0
    else
2709
0
    {
2710
0
      splitDir++;
2711
0
    }
2712
0
  }
2713
2714
0
  comboList.list.clear();
2715
2716
0
  for( int splitDir = 0; splitDir < GEO_NUM_PARTITION_MODE; )
2717
0
  {
2718
0
    for( int geoMotionIdx = 0; geoMotionIdx < maxNumMergeCandidates * ( maxNumMergeCandidates - 1 ); geoMotionIdx++ )
2719
0
    {
2720
0
      const MergeIdxPair mergeIdxPair = m_GeoModeTest[geoMotionIdx];
2721
2722
0
      if( sameMV[mergeIdxPair[0]] || sameMV[mergeIdxPair[1]] )
2723
0
      {
2724
0
        continue;
2725
0
      }
2726
2727
0
      double tempCost = m_GeoCostList.getCost( splitDir, mergeIdxPair[0], mergeIdxPair[1] );
2728
2729
0
      if( tempCost > bestWholeBlkCost )
2730
0
      {
2731
0
        continue;
2732
0
      }
2733
2734
0
      tempCost = tempCost + ( double ) bitsForPartitionIdx * sqrtLambdaForFirstPass;
2735
0
      comboList.list.push_back( GeoMergeCombo{ splitDir, mergeIdxPair[0], mergeIdxPair[1], tempCost } );
2736
0
    }
2737
2738
0
    if( m_pcEncCfg->m_Geo == 4 )
2739
0
    {
2740
0
      if( splitDir == 1 )
2741
0
      {
2742
0
        splitDir += 7;
2743
0
      }
2744
0
      else if( splitDir == 35 || ( splitDir + 1 ) % 4 != 0 )
2745
0
      {
2746
0
        splitDir++;
2747
0
      }
2748
0
      else
2749
0
      {
2750
0
        splitDir += 5;
2751
0
      }
2752
0
    }
2753
0
    else
2754
0
    {
2755
0
      splitDir++;
2756
0
    }
2757
0
  }
2758
2759
0
  if( comboList.list.empty() )
2760
0
  {
2761
0
    return false;
2762
0
  }
2763
2764
0
  comboList.sortByCost();
2765
0
  return true;
2766
0
}
2767
2768
double EncCu::calcLumaCost4MergePrediction( const TempCtx &ctxStart, const PelUnitBuf &predBuf, double lambda, CodingUnit &cu, DistParam &distParam )
2769
0
{
2770
0
  distParam.cur = predBuf.Y();
2771
0
  auto dist     = distParam.distFunc(distParam);
2772
2773
0
  m_CABACEstimator->getCtx() = ctxStart;
2774
0
  auto fracBits = xCalcPuMeBits( cu );
2775
2776
0
  double cost   = ( double ) dist + ( double ) fracBits * lambda;
2777
2778
0
  m_uiSadBestForQPA = std::min( dist, m_uiSadBestForQPA );
2779
2780
0
  return cost;
2781
0
}
2782
2783
//////////////////////////////////////////////////////////////////////////////////////////////
2784
// ibc merge/skip mode check
2785
void EncCu::xCheckRDCostIBCModeMerge2Nx2N(CodingStructure*& tempCS, CodingStructure*& bestCS, Partitioner& partitioner, const EncTestMode& encTestMode)
2786
0
{
2787
0
  assert(partitioner.chType != CH_C); // chroma IBC is derived
2788
0
  if (tempCS->area.lwidth() == 128 || tempCS->area.lheight() == 128) // disable IBC mode larger than 64x64
2789
0
  {
2790
0
    return;
2791
0
  }
2792
2793
0
  if ((m_pcEncCfg->m_IBCFastMethod > 1) && !bestCS->slice->isIntra() && (bestCS->cus.size() != 0))
2794
0
  {
2795
0
    if (bestCS->getCU(partitioner.chType, partitioner.treeType)->skip)
2796
0
    {
2797
0
      return;
2798
0
    }
2799
0
  }
2800
2801
0
  const SPS& sps = *tempCS->sps;
2802
2803
0
  tempCS->initStructData(encTestMode.qp);
2804
0
  MergeCtx mergeCtx;
2805
2806
0
  {
2807
    // first get merge candidates
2808
0
    CodingUnit cu(tempCS->area);
2809
0
    cu.cs = tempCS;
2810
0
    cu.predMode = MODE_IBC;
2811
0
    cu.slice = tempCS->slice;
2812
0
    cu.tileIdx = m_tileIdx;
2813
0
    cu.initPuData();
2814
0
    cu.cs = tempCS;
2815
0
    cu.mmvdSkip = false;
2816
0
    cu.mmvdMergeFlag = false;
2817
0
    cu.geo = false;
2818
0
    CU::getIBCMergeCandidates(cu, mergeCtx);
2819
0
  }
2820
0
  int candHasNoResidual[MRG_MAX_NUM_CANDS];
2821
0
  for (unsigned int ui = 0; ui < mergeCtx.numValidMergeCand; ui++)
2822
0
  {
2823
0
    candHasNoResidual[ui] = 0;
2824
0
  }
2825
2826
0
  bool                                        bestIsSkip = false;
2827
0
  unsigned                                    numMrgSATDCand = mergeCtx.numValidMergeCand;
2828
0
  static_vector<unsigned, MRG_MAX_NUM_CANDS>  RdModeList(MRG_MAX_NUM_CANDS);
2829
0
  for (unsigned i = 0; i < MRG_MAX_NUM_CANDS; i++)
2830
0
  {
2831
0
    RdModeList[i] = i;
2832
0
  }
2833
2834
  //{
2835
0
  static_vector<double, MRG_MAX_NUM_CANDS>  candCostList(MRG_MAX_NUM_CANDS, MAX_DOUBLE);
2836
  // 1. Pass: get SATD-cost for selected candidates and reduce their count
2837
0
  {
2838
0
    const double sqrtLambdaForFirstPass = m_cRdCost.getMotionLambda();
2839
2840
0
    CodingUnit& cu = tempCS->addCU(CS::getArea(*tempCS, tempCS->area, partitioner.chType,partitioner.treeType), partitioner.chType);
2841
2842
0
    partitioner.setCUData(cu);
2843
0
    cu.slice = tempCS->slice;
2844
0
    cu.tileIdx = m_tileIdx;
2845
0
    cu.skip = false;
2846
0
    cu.predMode = MODE_IBC;
2847
0
    cu.chromaQpAdj = m_cuChromaQpOffsetIdxPlus1;
2848
0
    cu.qp = encTestMode.qp;
2849
0
    cu.mmvdSkip = false;
2850
0
    cu.geo = false;
2851
0
    DistParam distParam;
2852
0
    cu.initPuData();
2853
0
    cu.mmvdMergeFlag = false;
2854
0
    Picture* refPic = cu.slice->pic;
2855
0
    const UnitArea localUnitArea(tempCS->area.chromaFormat, Area(cu.blocks[COMP_Y].x, cu.blocks[COMP_Y].y, tempCS->area.Y().width, tempCS->area.Y().height));
2856
0
    const CompArea& compArea = localUnitArea.block(COMP_Y);
2857
0
    const CPelBuf refBuf = refPic->getRecoBuf(compArea);
2858
0
    const Pel* piRefSrch = refBuf.buf;
2859
0
    const ReshapeData& reshapeData = cu.cs->picture->reshapeData;
2860
0
    if (cu.cs->slice->lmcsEnabled && reshapeData.getCTUFlag())
2861
0
    {
2862
0
      PelBuf tmpLmcs = m_aTmpStorageLCU[0].getCompactBuf(cu.Y());
2863
0
      tmpLmcs.rspSignal(tempCS->getOrgBuf().Y(), reshapeData.getFwdLUT());
2864
0
      distParam = m_cRdCost.setDistParam( tmpLmcs, refBuf, sps.bitDepths[CH_L], DF_HAD);
2865
0
    }
2866
0
    else
2867
0
    {
2868
0
      distParam = m_cRdCost.setDistParam(tempCS->getOrgBuf(COMP_Y), refBuf, sps.bitDepths[CH_L], DF_HAD);
2869
0
    }
2870
0
    int refStride = refBuf.stride;
2871
2872
0
    int numValidBv = mergeCtx.numValidMergeCand;
2873
0
    for (unsigned int mergeCand = 0; mergeCand < mergeCtx.numValidMergeCand; mergeCand++)
2874
0
    {
2875
0
      mergeCtx.setMergeInfo(cu, mergeCand); // set bv info in merge mode
2876
0
      const int cuPelX = cu.Y().x;
2877
0
      const int cuPelY = cu.Y().y;
2878
0
      int roiWidth     = cu.lwidth();
2879
0
      int roiHeight    = cu.lheight();
2880
0
      const int picWidth  = cu.cs->slice->pps->picWidthInLumaSamples;
2881
0
      const int picHeight = cu.cs->slice->pps->picHeightInLumaSamples;
2882
0
      const unsigned int lcuWidth = cu.cs->slice->sps->CTUSize;
2883
2884
0
      Mv bv = cu.mv[0][0];
2885
0
      bv.changePrecision( MV_PRECISION_INTERNAL, MV_PRECISION_INT);
2886
0
      int xPred = bv.hor;
2887
0
      int yPred = bv.ver;
2888
      
2889
0
      if( !m_cInterSearch.searchBvIBC( cu, cuPelX, cuPelY, roiWidth, roiHeight, picWidth, picHeight, xPred, yPred, lcuWidth ) ) // not valid bv derived
2890
0
      {
2891
0
        numValidBv--;
2892
0
        continue;
2893
0
      }
2894
0
      CU::spanMotionInfo(cu);
2895
0
      distParam.cur.buf = piRefSrch + refStride * yPred + xPred;
2896
2897
0
      Distortion sad = distParam.distFunc(distParam);
2898
0
      unsigned int bitsCand = mergeCand + 1;
2899
0
      if (mergeCand == tempCS->sps->maxNumIBCMergeCand - 1)
2900
0
      {
2901
0
        bitsCand--;
2902
0
      }
2903
0
      double cost = (double)sad + (double)bitsCand * sqrtLambdaForFirstPass;
2904
2905
0
      updateCandList( mergeCand, cost, RdModeList, candCostList, numMrgSATDCand );
2906
0
    }
2907
2908
    // Try to limit number of candidates using SATD-costs
2909
0
    if (numValidBv)
2910
0
    {
2911
0
      numMrgSATDCand = numValidBv;
2912
0
      for (unsigned int i = 1; i < numValidBv; i++)
2913
0
      {
2914
0
        if (candCostList[i] > MRG_FAST_RATIO[0] * candCostList[0])
2915
0
        {
2916
0
          numMrgSATDCand = i;
2917
0
          break;
2918
0
        }
2919
0
      }
2920
0
    }
2921
0
    else
2922
0
    {
2923
0
      tempCS->dist = 0;
2924
0
      tempCS->fracBits = 0;
2925
0
      tempCS->cost = MAX_DOUBLE;
2926
0
      tempCS->costDbOffset = 0;
2927
0
      tempCS->initStructData(encTestMode.qp);
2928
0
      return;
2929
0
    }
2930
2931
0
    tempCS->initStructData(encTestMode.qp);
2932
0
  }
2933
  //}
2934
2935
2936
0
  const unsigned int iteration = 2;
2937
 // m_bestModeUpdated = tempCS->cost = bestCS->cost = false;
2938
  // 2. Pass: check candidates using full RD test
2939
0
  for (unsigned int numResidualPass = 0; numResidualPass < iteration; numResidualPass++)
2940
0
  {
2941
0
    for (unsigned int mrgHADIdx = 0; mrgHADIdx < numMrgSATDCand; mrgHADIdx++)
2942
0
    {
2943
0
      unsigned int mergeCand = RdModeList[mrgHADIdx];
2944
0
      if (!(numResidualPass == 1 && candHasNoResidual[mergeCand] == 1))
2945
0
      {
2946
0
        if (!(bestIsSkip && (numResidualPass == 0)))
2947
0
        {
2948
0
          {
2949
2950
            // first get merge candidates
2951
0
            CodingUnit& cu = tempCS->addCU(CS::getArea(*tempCS, tempCS->area, (const ChannelType)partitioner.chType,partitioner.treeType), (const ChannelType)partitioner.chType);
2952
2953
0
            partitioner.setCUData(cu);
2954
0
            cu.slice = tempCS->slice;
2955
0
            cu.tileIdx = m_tileIdx;
2956
0
            cu.skip = false;
2957
0
            cu.predMode = MODE_IBC;
2958
0
            cu.chromaQpAdj = m_cuChromaQpOffsetIdxPlus1;
2959
0
            cu.qp = encTestMode.qp;
2960
0
            cu.sbtInfo = 0;
2961
0
            cu.initPuData();
2962
0
            cu.intraDir[0] = DC_IDX; // set intra pred for ibc block
2963
0
            cu.intraDir[1] = PLANAR_IDX; // set intra pred for ibc block
2964
0
            cu.mmvdSkip = false;
2965
0
            cu.mmvdMergeFlag = false;
2966
0
            cu.geo = false;
2967
0
            mergeCtx.setMergeInfo(cu, mergeCand);
2968
0
            CU::spanMotionInfo(cu);
2969
2970
0
            assert(mergeCtx.mrgTypeNeighbours[mergeCand] == MRG_TYPE_IBC);
2971
0
            const bool chroma = !CU::isSepTree(cu);
2972
2973
            //  MC
2974
0
            cu.mcControl = chroma ? 0: 2;
2975
0
            m_cInterSearch.motionCompensationIBC(cu, tempCS->getPredBuf());
2976
0
            m_CABACEstimator->getCtx() = m_CurrCtx->start;
2977
2978
0
            m_cInterSearch.encodeResAndCalcRdInterCU(*tempCS, partitioner, (numResidualPass != 0));
2979
0
            cu.mcControl = 0;
2980
0
            xEncodeDontSplit(*tempCS, partitioner);
2981
0
            xCheckDQP(*tempCS, partitioner);
2982
0
            xCheckBestMode(tempCS, bestCS, partitioner, encTestMode);
2983
2984
0
            tempCS->initStructData(encTestMode.qp);
2985
0
          }
2986
2987
0
          if (m_pcEncCfg->m_useFastDecisionForMerge && !bestIsSkip)
2988
0
          {
2989
0
            if (bestCS->getCU(partitioner.chType, partitioner.treeType) == NULL)
2990
0
              bestIsSkip = 0;
2991
0
            else
2992
0
              bestIsSkip = bestCS->getCU(partitioner.chType, partitioner.treeType)->rootCbf == 0;
2993
0
          }
2994
0
        }
2995
0
      }
2996
0
    }
2997
0
  }
2998
0
}
2999
3000
void EncCu::xCheckRDCostIBCMode(CodingStructure*& tempCS, CodingStructure*& bestCS, Partitioner& partitioner,
3001
  const EncTestMode& encTestMode)
3002
0
{
3003
0
  if (tempCS->area.lwidth() == 128 || tempCS->area.lheight() == 128)   // disable IBC mode larger than 64x64
3004
0
  {
3005
0
    return;
3006
0
  }
3007
0
  if ((m_pcEncCfg->m_IBCFastMethod > 1) && !bestCS->slice->isIntra() && (bestCS->cus.size() != 0))
3008
0
  {
3009
0
    if (bestCS->getCU(partitioner.chType, partitioner.treeType)->skip)
3010
0
    {
3011
0
      return;
3012
0
    }
3013
0
  }
3014
3015
0
  tempCS->initStructData(encTestMode.qp);
3016
3017
0
  CodingUnit& cu = tempCS->addCU(CS::getArea(*tempCS, tempCS->area, partitioner.chType, partitioner.treeType), partitioner.chType);
3018
3019
0
  partitioner.setCUData(cu);
3020
0
  cu.slice = tempCS->slice;
3021
0
  cu.tileIdx = m_tileIdx;
3022
0
  cu.skip = false;
3023
0
  cu.predMode = MODE_IBC;
3024
0
  cu.chromaQpAdj = m_cuChromaQpOffsetIdxPlus1;
3025
0
  cu.qp = encTestMode.qp;
3026
0
  cu.initPuData();
3027
0
  cu.imv = IMV_OFF;
3028
0
  cu.sbtInfo = 0;
3029
0
  cu.mmvdSkip = false;
3030
0
  cu.mmvdMergeFlag = false;
3031
3032
0
  cu.intraDir[0] = DC_IDX; // set intra pred for ibc block
3033
0
  cu.intraDir[1] = PLANAR_IDX; // set intra pred for ibc block
3034
3035
0
  cu.interDir = 1; // use list 0 for IBC mode
3036
0
  cu.refIdx[REF_PIC_LIST_0] = MAX_NUM_REF; // last idx in the list
3037
0
  bool bValid = m_cInterSearch.predIBCSearch(cu, partitioner);
3038
3039
0
  if (bValid)
3040
0
  {
3041
0
    CU::spanMotionInfo(cu);
3042
0
    const bool chroma = !CU::isSepTree(cu);
3043
    //  MC
3044
0
    cu.mcControl = chroma ? 0 : 2;
3045
0
    m_cInterSearch.motionCompensationIBC(cu, tempCS->getPredBuf());
3046
3047
0
    m_cInterSearch.encodeResAndCalcRdInterCU(*tempCS, partitioner, false);
3048
0
    cu.mcControl = 0;
3049
3050
0
    xEncodeDontSplit(*tempCS, partitioner);
3051
0
    xCheckDQP(*tempCS, partitioner);
3052
0
    xCheckBestMode(tempCS, bestCS, partitioner, encTestMode);
3053
0
  } // bValid
3054
0
  else
3055
0
  {
3056
0
    tempCS->dist = 0;
3057
0
    tempCS->fracBits = 0;
3058
0
    tempCS->cost = MAX_DOUBLE;
3059
0
    tempCS->costDbOffset = 0;
3060
0
  }
3061
0
}
3062
3063
void EncCu::xCheckRDCostInter( CodingStructure *&tempCS, CodingStructure *&bestCS, Partitioner &partitioner, const EncTestMode& encTestMode )
3064
0
{
3065
0
  PROFILER_SCOPE_AND_STAGE_EXT( 1, _TPROF, P_INTER_MVD, tempCS, partitioner.chType );
3066
0
  tempCS->initStructData( encTestMode.qp );
3067
3068
0
  m_cInterSearch.setAffineModeSelected( false );
3069
3070
0
  m_cInterSearch.resetBufferedUniMotions();
3071
3072
0
  int bcwLoopNum = BCW_NUM;
3073
3074
0
  if( tempCS->area.Y().area() < BCW_SIZE_CONSTRAINT || !tempCS->slice->isInterB() || !tempCS->sps->BCW )
3075
0
  {
3076
0
    bcwLoopNum = 1;
3077
0
  }
3078
  
3079
0
  double curBestCost = bestCS->cost;
3080
0
  double equBcwCost = MAX_DOUBLE;
3081
3082
0
  for( int bcwLoopIdx = 0; bcwLoopIdx < bcwLoopNum; bcwLoopIdx++ )
3083
0
  {
3084
0
    if( m_pcEncCfg->m_BCW == 2 )
3085
0
    {
3086
0
      bool isBestInter   = m_modeCtrl.getBlkInfo( bestCS->area ).isInter;
3087
0
      uint8_t bestBcwIdx = m_modeCtrl.getBlkInfo( bestCS->area).BcwIdx;
3088
3089
0
      if( isBestInter && g_BcwSearchOrder[bcwLoopIdx] != BCW_DEFAULT && g_BcwSearchOrder[bcwLoopIdx] != bestBcwIdx )
3090
0
      {
3091
0
        continue;
3092
0
      }
3093
0
    }
3094
    
3095
0
    if( !tempCS->slice->checkLDC )
3096
0
    {
3097
0
      if( bcwLoopIdx != 0 && bcwLoopIdx != 3 && bcwLoopIdx != 4 )
3098
0
      {
3099
0
        continue;
3100
0
      }
3101
0
    }
3102
  
3103
0
    CodingUnit &cu      = tempCS->addCU( tempCS->area, partitioner.chType );
3104
3105
0
    partitioner.setCUData( cu );
3106
0
    cu.slice            = tempCS->slice;
3107
0
    cu.tileIdx          = m_tileIdx;
3108
0
    cu.skip             = false;
3109
0
    cu.mmvdSkip         = false;
3110
0
    cu.predMode         = MODE_INTER;
3111
0
    cu.chromaQpAdj      = m_cuChromaQpOffsetIdxPlus1;
3112
0
    cu.qp               = encTestMode.qp;
3113
0
    cu.initPuData();
3114
3115
0
    cu.BcwIdx = g_BcwSearchOrder[bcwLoopIdx];
3116
0
    uint8_t bcwIdx = cu.BcwIdx;
3117
0
    bool testBcw = (bcwIdx != BCW_DEFAULT);
3118
3119
0
    bool StopInterRes = (m_pcEncCfg->m_FastInferMerge >> 3) & 1;
3120
0
    StopInterRes &= bestCS->slice->TLayer > (m_pcEncCfg->m_maxTLayer - (m_pcEncCfg->m_FastInferMerge & 7));
3121
0
    double bestCostInter = StopInterRes ? m_mergeBestSATDCost : MAX_DOUBLE;
3122
3123
0
    bool stopTest = m_cInterSearch.predInterSearch(cu, partitioner, bestCostInter);
3124
3125
0
    if (StopInterRes && (bestCostInter != m_mergeBestSATDCost))
3126
0
    {
3127
0
      int L = (cu.slice->TLayer <= 2) ? 0 : (cu.slice->TLayer - 2);
3128
0
      if ((bestCostInter > MRG_FAST_RATIOMYV[L] * m_mergeBestSATDCost))
3129
0
      {
3130
0
        stopTest = true;
3131
0
      }
3132
0
    }
3133
3134
0
    if( !stopTest )
3135
0
    {
3136
0
      bcwIdx   = CU::getValidBcwIdx(cu);
3137
0
      stopTest = testBcw && bcwIdx == BCW_DEFAULT;
3138
0
    }
3139
    
3140
0
    if( stopTest )
3141
0
    {
3142
0
      tempCS->initStructData(encTestMode.qp);
3143
0
      continue;
3144
0
    }
3145
3146
0
    CHECK(!(testBcw || (!testBcw && bcwIdx == BCW_DEFAULT)), " !( bTestBcw || (!bTestBcw && bcwIdx == BCW_DEFAULT ) )");
3147
        
3148
0
    xEncodeInterResidual(tempCS, bestCS, partitioner, encTestMode, 0, 0, &equBcwCost);
3149
    
3150
0
    if( bcwIdx == BCW_DEFAULT )
3151
0
    {
3152
0
      m_cInterSearch.setAffineModeSelected( bestCS->cus.front()->affine && !bestCS->cus.front()->mergeFlag );
3153
0
    }
3154
3155
0
    tempCS->initStructData(encTestMode.qp);
3156
  
3157
0
    double skipTH = MAX_DOUBLE;
3158
0
    skipTH = (m_pcEncCfg->m_BCW == 2 ? 1.05 : MAX_DOUBLE);
3159
0
    if( equBcwCost > curBestCost * skipTH )
3160
0
    {
3161
0
      break;
3162
0
    }
3163
3164
0
    if( m_pcEncCfg->m_BCW == 2 )
3165
0
    {
3166
0
      if( ( cu.interDir != 3 && testBcw == 0 && ! m_pcEncCfg->m_picReordering )
3167
0
         || ( g_BcwSearchOrder[bcwLoopIdx] == BCW_DEFAULT && xIsBcwSkip( cu ) ) )
3168
0
      {
3169
0
        break;
3170
0
      }
3171
0
    }
3172
0
  }
3173
0
  STAT_COUNT_CU_MODES( partitioner.chType == CH_L, g_cuCounters1D[CU_MODES_TESTED][0][!tempCS->slice->isIntra() + tempCS->slice->depth] );
3174
0
  STAT_COUNT_CU_MODES( partitioner.chType == CH_L && !tempCS->slice->isIntra(), g_cuCounters2D[CU_MODES_TESTED][Log2( tempCS->area.lheight() )][Log2( tempCS->area.lwidth() )] );
3175
0
}
3176
3177
void EncCu::xCheckRDCostInterIMV(CodingStructure *&tempCS, CodingStructure *&bestCS, Partitioner &partitioner, const EncTestMode& encTestMode)
3178
0
{
3179
0
  PROFILER_SCOPE_AND_STAGE_EXT( 1, _TPROF, P_INTER_MVD_IMV, tempCS, partitioner.chType );
3180
0
  bool Test_AMVR = m_pcEncCfg->m_AMVRspeed ? true: false;
3181
0
  if (m_pcEncCfg->m_AMVRspeed > 2 && m_pcEncCfg->m_AMVRspeed < 5 && !bestCS->cus.empty() && bestCS->getCU(partitioner.chType, partitioner.treeType)->skip)
3182
0
  {
3183
0
    Test_AMVR = false;
3184
0
  }
3185
0
  else if (m_pcEncCfg->m_AMVRspeed > 4 && !bestCS->cus.empty() && bestCS->getCU(partitioner.chType, partitioner.treeType)->mergeFlag && !bestCS->getCU(partitioner.chType, partitioner.treeType)->ciip)
3186
0
  {
3187
0
    Test_AMVR = false;
3188
0
  }
3189
0
  bool Do_Limit = !bestCS->cus.empty() && (m_pcEncCfg->m_AMVRspeed == 4 || m_pcEncCfg->m_AMVRspeed == 6) ? true : false;
3190
0
  bool Do_OnceRes = !bestCS->cus.empty() && (m_pcEncCfg->m_AMVRspeed == 7) ? true : false;
3191
3192
0
  if( Test_AMVR )
3193
0
  {
3194
0
    double Fpel_cost    = m_pcEncCfg->m_AMVRspeed == 1 ? MAX_DOUBLE*0.5 : MAX_DOUBLE;
3195
0
    double costCurStart = m_pcEncCfg->m_AMVRspeed == 1 ? m_modeCtrl.comprCUCtx->bestCostNoImv : bestCS->cost;
3196
0
    double costCur      = MAX_DOUBLE;
3197
0
    double bestCostIMV  = MAX_DOUBLE;
3198
3199
0
    if (Do_OnceRes)
3200
0
    {
3201
0
      costCurStart = xCalcDistortion(bestCS, partitioner.chType, bestCS->sps->bitDepths[CH_L], 0);
3202
0
      Fpel_cost = costCurStart;
3203
0
      tempCS->initSubStructure(*m_pTempCS2, partitioner.chType, partitioner.currArea(), false);
3204
0
    }
3205
3206
0
    CodingStructure *tempCSbest = m_pTempCS2;
3207
3208
0
    m_cInterSearch.setAffineModeSelected( false );
3209
3210
0
    m_cInterSearch.resetBufferedUniMotions();
3211
3212
0
    int bcwLoopNum = (tempCS->slice->isInterB() ? BCW_NUM : 1);
3213
0
    bcwLoopNum = (tempCS->sps->BCW ? bcwLoopNum : 1);
3214
3215
0
    if( tempCS->area.lwidth() * tempCS->area.lheight() < BCW_SIZE_CONSTRAINT )
3216
0
    {
3217
0
      bcwLoopNum = 1;
3218
0
    }
3219
3220
0
    for (int i = 1; i <= IMV_HPEL; i++)
3221
0
    {
3222
0
      double curBestCost = bestCS->cost;
3223
0
      double equBcwCost  = MAX_DOUBLE;
3224
3225
0
      for( int bcwLoopIdx = 0; bcwLoopIdx < bcwLoopNum; bcwLoopIdx++ )
3226
0
      {
3227
0
        if( m_pcEncCfg->m_BCW == 2 )
3228
0
        {
3229
0
          bool isBestInter   = m_modeCtrl.getBlkInfo( bestCS->area ).isInter;
3230
0
          uint8_t bestBcwIdx = m_modeCtrl.getBlkInfo( bestCS->area).BcwIdx;
3231
3232
0
          if( isBestInter && g_BcwSearchOrder[bcwLoopIdx] != BCW_DEFAULT && g_BcwSearchOrder[bcwLoopIdx] != bestBcwIdx )
3233
0
          {
3234
0
            continue;
3235
0
          }
3236
          
3237
0
          if( tempCS->slice->checkLDC && g_BcwSearchOrder[bcwLoopIdx] != BCW_DEFAULT
3238
0
            && (m_bestBcwIdx[0] >= 0 && g_BcwSearchOrder[bcwLoopIdx] != m_bestBcwIdx[0])
3239
0
            && (m_bestBcwIdx[1] >= 0 && g_BcwSearchOrder[bcwLoopIdx] != m_bestBcwIdx[1]))
3240
0
          {
3241
0
            continue;
3242
0
          }
3243
0
        }
3244
3245
0
        if( !tempCS->slice->checkLDC )
3246
0
        {
3247
0
          if( bcwLoopIdx != 0 && bcwLoopIdx != 3 && bcwLoopIdx != 4 )
3248
0
          {
3249
0
            continue;
3250
0
          }
3251
0
        }
3252
3253
0
        bool testBcw;
3254
0
        uint8_t bcwIdx;
3255
0
        bool isEqualUni = false;
3256
3257
0
        if (i > IMV_FPEL)
3258
0
        {
3259
0
          bool nextimv = false;
3260
0
          double stopCost = i == IMV_HPEL ? 1.25 : 1.06;
3261
0
          if (Fpel_cost > stopCost * costCurStart)
3262
0
          {
3263
0
            nextimv = true;
3264
0
          }
3265
0
          if ( m_pcEncCfg->m_AMVRspeed == 1 )
3266
0
          {
3267
0
            costCurStart = bestCS->cost;
3268
0
          }
3269
0
          if (nextimv)
3270
0
          {
3271
0
            continue;
3272
0
          }
3273
0
        }
3274
3275
0
        bool Do_Search = Do_OnceRes ? false : true;
3276
3277
0
        if (Do_Limit)
3278
0
        {
3279
0
          Do_Search = i == IMV_FPEL ? true : false;
3280
3281
0
          if (i == IMV_HPEL)
3282
0
          {
3283
0
            if (bestCS->slice->TLayer > 3)
3284
0
            {
3285
0
              continue;
3286
0
            }
3287
0
            if (bestCS->getCU(partitioner.chType, partitioner.treeType)->imv != 0)
3288
0
            {
3289
0
              Do_Search = true; //do_est
3290
0
            }
3291
0
          }
3292
0
          if (bestCS->getCU(partitioner.chType, partitioner.treeType)->mmvdMergeFlag || bestCS->getCU(partitioner.chType, partitioner.treeType)->geo)
3293
0
          {
3294
0
            Do_Search = true;
3295
0
          }
3296
0
        }
3297
0
        tempCS->initStructData(encTestMode.qp);
3298
3299
0
        if (!Do_Search)
3300
0
        {
3301
0
          tempCS->copyStructure(*bestCS, partitioner.chType, TREE_D);
3302
0
        }
3303
0
        tempCS->dist = 0;
3304
0
        tempCS->fracBits = 0;
3305
0
        tempCS->cost = MAX_DOUBLE;
3306
0
        CodingUnit &cu = (Do_Search) ? tempCS->addCU(tempCS->area, partitioner.chType) : *tempCS->getCU(partitioner.chType, partitioner.treeType);
3307
0
        if (Do_Search)
3308
0
        {
3309
0
          partitioner.setCUData(cu);
3310
0
          cu.slice = tempCS->slice;
3311
0
          cu.tileIdx = m_tileIdx;
3312
0
          cu.skip = false;
3313
0
          cu.mmvdSkip = false;
3314
0
          cu.predMode = MODE_INTER;
3315
0
          cu.chromaQpAdj = m_cuChromaQpOffsetIdxPlus1;
3316
0
          cu.qp = encTestMode.qp;
3317
3318
0
          cu.initPuData();
3319
3320
0
          cu.imv = i;
3321
3322
0
          cu.BcwIdx = g_BcwSearchOrder[bcwLoopIdx];
3323
0
          bcwIdx    = cu.BcwIdx;
3324
0
          testBcw   = (bcwIdx != BCW_DEFAULT);
3325
3326
0
          cu.interDir = 10;
3327
          
3328
0
          double bestCostInter = MAX_DOUBLE;
3329
0
          m_cInterSearch.predInterSearch(cu, partitioner, bestCostInter);
3330
          
3331
0
          if ( cu.interDir <= 3 )
3332
0
          {
3333
0
            bcwIdx = CU::getValidBcwIdx(cu);
3334
0
          }
3335
0
          else
3336
0
          {
3337
0
            continue;
3338
0
          }
3339
          
3340
0
          if( testBcw && bcwIdx == BCW_DEFAULT ) // Enabled Bcw but the search results is uni.
3341
0
          {
3342
0
            continue;
3343
0
          }
3344
0
          CHECK(!(testBcw || (!testBcw && bcwIdx == BCW_DEFAULT)), " !( bTestBcw || (!bTestBcw && bcwIdx == BCW_DEFAULT ) )");
3345
3346
0
          if( m_pcEncCfg->m_BCW == 2 )
3347
0
          {
3348
0
            if( cu.interDir != 3 && testBcw == 0 )
3349
0
            {
3350
0
              isEqualUni = true;
3351
0
            }
3352
0
          }
3353
3354
0
          if (!CU::hasSubCUNonZeroMVd(cu))
3355
0
          {
3356
0
            continue;
3357
0
          }
3358
0
        }
3359
0
        else
3360
0
        {
3361
0
          cu.smvdMode = 0;
3362
0
          cu.affine = false;
3363
0
          cu.imv = i ;
3364
0
          CU::resetMVDandMV2Int(cu);
3365
0
          if (!CU::hasSubCUNonZeroMVd(cu))
3366
0
          {
3367
0
            continue;
3368
0
          }
3369
3370
0
          cu.BcwIdx = g_BcwSearchOrder[bcwLoopIdx];
3371
3372
0
          cu.mvRefine = true;
3373
0
          m_cInterSearch.motionCompensation(cu, tempCS->getPredBuf() );
3374
0
          cu.mvRefine = false;
3375
0
        }
3376
3377
0
        if( Do_OnceRes )
3378
0
        {
3379
0
          costCur = xCalcDistortion(tempCS, partitioner.chType, tempCS->sps->bitDepths[CH_L], cu.imv );
3380
0
          if (costCur < bestCostIMV)
3381
0
          {
3382
0
            bestCostIMV = costCur;
3383
0
            tempCSbest->getPredBuf().copyFrom(tempCS->getPredBuf());
3384
0
            tempCSbest->clearCUs();
3385
0
            tempCSbest->clearTUs();
3386
0
            tempCSbest->copyStructure(*tempCS, partitioner.chType, TREE_D);
3387
0
          }
3388
0
          if (i > IMV_FPEL)
3389
0
          {
3390
0
            costCurStart = costCurStart > costCur ? costCur : costCurStart;
3391
0
          }
3392
0
        }
3393
0
        else
3394
0
        {
3395
0
          xEncodeInterResidual(tempCS, bestCS, partitioner, encTestMode, 0, 0, &equBcwCost);
3396
0
          costCur = tempCS->cost;
3397
3398
0
          if (i > IMV_FPEL)
3399
0
          {
3400
0
            costCurStart = bestCS->cost;
3401
0
          }
3402
0
        }
3403
3404
0
        if (i == IMV_FPEL)
3405
0
        {
3406
0
           Fpel_cost = costCur;
3407
0
        }
3408
3409
0
        double skipTH = MAX_DOUBLE;
3410
0
        skipTH = (m_pcEncCfg->m_BCW == 2 ? 1.05 : MAX_DOUBLE);
3411
0
        if( equBcwCost > curBestCost * skipTH )
3412
0
        {
3413
0
          break;
3414
0
        }
3415
3416
0
        if( m_pcEncCfg->m_BCW == 2 )
3417
0
        {
3418
0
          if( isEqualUni == true && ! m_pcEncCfg->m_picReordering )
3419
0
          {
3420
0
            break;
3421
0
          }
3422
0
          if( g_BcwSearchOrder[bcwLoopIdx] == BCW_DEFAULT && xIsBcwSkip( cu ) )
3423
0
          {
3424
0
            break;
3425
0
          }
3426
0
        }
3427
0
      }
3428
0
    }
3429
3430
0
    if (Do_OnceRes && (bestCostIMV != MAX_DOUBLE))
3431
0
    {
3432
0
      CodingStructure* CSCandBest = tempCSbest;
3433
0
      tempCS->initStructData(bestCS->currQP[partitioner.chType]);
3434
0
      tempCS->copyStructure(*CSCandBest, partitioner.chType, TREE_D);
3435
0
      tempCS->getPredBuf().copyFrom(tempCSbest->getPredBuf());
3436
0
      tempCS->dist = 0;
3437
0
      tempCS->fracBits = 0;
3438
0
      tempCS->cost = MAX_DOUBLE;
3439
3440
0
      xEncodeInterResidual(tempCS, bestCS, partitioner, encTestMode, 0, 0, NULL);
3441
0
    }
3442
3443
0
    tempCS->initStructData(encTestMode.qp);
3444
0
  }
3445
0
  STAT_COUNT_CU_MODES( partitioner.chType == CH_L, g_cuCounters1D[CU_MODES_TESTED][0][!tempCS->slice->isIntra() + tempCS->slice->depth] );
3446
0
  STAT_COUNT_CU_MODES( partitioner.chType == CH_L && !tempCS->slice->isIntra(), g_cuCounters2D[CU_MODES_TESTED][Log2( tempCS->area.lheight() )][Log2( tempCS->area.lwidth() )] );
3447
0
}
3448
3449
void EncCu::xCalDebCost( CodingStructure &cs, Partitioner &partitioner )
3450
0
{
3451
0
  PROFILER_SCOPE_AND_STAGE_EXT( 1, _TPROF, P_DEBLOCK_FILTER, &cs, partitioner.chType );
3452
0
  if ( cs.slice->deblockingFilterDisable )
3453
0
  {
3454
0
    return;
3455
0
  }
3456
3457
0
  const ChromaFormat format = cs.area.chromaFormat;
3458
0
  CodingUnit*            cu = cs.getCU(partitioner.chType, partitioner.treeType);
3459
0
  const Position    lumaPos = cu->Y().valid() ? cu->Y().pos() : recalcPosition( format, cu->chType, CH_L, cu->blocks[cu->chType].pos() );
3460
0
  bool    topEdgeAvai = lumaPos.y > 0 && ((lumaPos.y % 4) == 0);
3461
0
  bool   leftEdgeAvai = lumaPos.x > 0 && ((lumaPos.x % 4) == 0);
3462
3463
0
  if( ! ( topEdgeAvai || leftEdgeAvai ))
3464
0
  {
3465
0
    return;
3466
0
  }
3467
3468
0
  ComponentID compStr = ( CU::isSepTree(*cu) && !isLuma( partitioner.chType ) ) ? COMP_Cb : COMP_Y;
3469
0
  ComponentID compEnd = (( CU::isSepTree(*cu) && isLuma( partitioner.chType )) || cu->chromaFormat == VVENC_CHROMA_400 ) ? COMP_Y : COMP_Cr;
3470
0
  const UnitArea currCsArea = clipArea( CS::getArea( cs, cs.area, partitioner.chType, partitioner.treeType ), *cs.picture );
3471
3472
0
  PelStorage&  picDbBuf = m_dbBuffer; //th we could reduce the buffer size and do some relocate
3473
3474
  //deblock neighbour pixels
3475
0
  const Size     lumaSize = cu->Y().valid() ? cu->Y().size() : recalcSize( format, cu->chType, CH_L, cu->blocks[cu->chType].size() );
3476
3477
0
  int verOffset = lumaPos.y > 7 ? 8 : 4;
3478
0
  int horOffset = lumaPos.x > 7 ? 8 : 4;
3479
3480
0
  LoopFilter::calcFilterStrengths( *cu, true );
3481
3482
0
  if( m_EDO == 2 && CS::isDualITree( cs ) && isLuma( partitioner.chType ) )
3483
0
  {
3484
0
    m_cLoopFilter.getMaxFilterLength( *cu, verOffset, horOffset );
3485
3486
0
    if( 0== (verOffset + horOffset) )
3487
0
    {
3488
0
      return;
3489
0
    }
3490
3491
0
    topEdgeAvai  &= verOffset != 0;
3492
0
    leftEdgeAvai &= horOffset != 0;
3493
0
  }
3494
3495
0
  const UnitArea  areaTop  = UnitArea( format, Area( lumaPos.x,             lumaPos.y - verOffset, lumaSize.width, verOffset       ) );
3496
0
  const UnitArea  areaLeft = UnitArea( format, Area( lumaPos.x - horOffset, lumaPos.y,             horOffset,      lumaSize.height ) );
3497
3498
0
  for ( int compIdx = compStr; compIdx <= compEnd; compIdx++ )
3499
0
  {
3500
0
    ComponentID compId = (ComponentID)compIdx;
3501
3502
    //Copy current CU's reco to Deblock Pic Buffer
3503
0
    const ReshapeData& reshapeData = cs.picture->reshapeData;
3504
0
    const CompArea&  compArea = currCsArea.block( compId );
3505
0
    CompArea         locArea  = compArea;
3506
0
    locArea.x -= cu->blocks[compIdx].x;
3507
0
    locArea.y -= cu->blocks[compIdx].y;
3508
0
    PelBuf dbReco = picDbBuf.getBuf( locArea );
3509
0
    if (cs.slice->lmcsEnabled && isLuma(compId) )
3510
0
    {
3511
0
      if ((!cs.sps->LFNST) && (!cs.sps->MTS) && (!cs.sps->ISP)&& reshapeData.getCTUFlag())
3512
0
      {
3513
0
        PelBuf rspReco = cs.getRspRecoBuf();
3514
0
        dbReco.copyFrom( rspReco );
3515
0
      }
3516
0
      else
3517
0
      {
3518
0
        PelBuf reco = cs.getRecoBuf( compId );
3519
0
        dbReco.rspSignal( reco, reshapeData.getInvLUT() );
3520
0
      }
3521
0
    }
3522
0
    else
3523
0
    {
3524
0
      PelBuf reco = cs.getRecoBuf( compId );
3525
0
      dbReco.copyFrom( reco );
3526
0
    }
3527
    //left neighbour
3528
0
    if ( leftEdgeAvai )
3529
0
    {
3530
0
      const CompArea&  compArea = areaLeft.block(compId);
3531
0
      CompArea         locArea = compArea;
3532
0
      locArea.x -= cu->blocks[compIdx].x;
3533
0
      locArea.y -= cu->blocks[compIdx].y;
3534
0
      PelBuf dbReco = picDbBuf.getBuf( locArea );
3535
0
      if (cs.slice->lmcsEnabled && isLuma(compId))
3536
0
      {
3537
0
        dbReco.rspSignal( cs.picture->getRecoBuf( compArea ), reshapeData.getInvLUT() );
3538
0
      }
3539
0
      else
3540
0
      {
3541
0
        dbReco.copyFrom( cs.picture->getRecoBuf( compArea ) );
3542
0
      }
3543
0
    }
3544
    //top neighbour
3545
0
    if ( topEdgeAvai )
3546
0
    {
3547
0
      const CompArea&  compArea = areaTop.block( compId );
3548
0
      CompArea         locArea = compArea;
3549
0
      locArea.x -= cu->blocks[compIdx].x;
3550
0
      locArea.y -= cu->blocks[compIdx].y;
3551
0
      PelBuf dbReco = picDbBuf.getBuf( locArea );
3552
0
      if (cs.slice->lmcsEnabled && isLuma(compId))
3553
0
      {
3554
0
        dbReco.rspSignal( cs.picture->getRecoBuf( compArea ), reshapeData.getInvLUT() );
3555
0
      }
3556
0
      else
3557
0
      {
3558
0
        dbReco.copyFrom( cs.picture->getRecoBuf( compArea ) );
3559
0
      }
3560
0
    }
3561
0
  }
3562
3563
0
  ChannelType dbChType = CU::isSepTree(*cu) ? partitioner.chType : MAX_NUM_CH;
3564
3565
0
  CHECK( CU::isSepTree(*cu) && !cu->Y().valid() && partitioner.chType == CH_L, "xxx" );
3566
3567
0
  if( cu->Y() .valid() ) m_cLoopFilter.setOrigin( CH_L, cu->lumaPos() );
3568
0
  if( cu->chromaFormat != VVENC_CHROMA_400 && cu->Cb().valid() ) m_cLoopFilter.setOrigin( CH_C, cu->chromaPos() );
3569
3570
  //deblock
3571
0
  if( leftEdgeAvai )
3572
0
  {
3573
0
    m_cLoopFilter.loopFilterCu( *cu, dbChType, EDGE_VER, m_dbBuffer );
3574
0
  }
3575
3576
0
  if( topEdgeAvai )
3577
0
  {
3578
0
    m_cLoopFilter.loopFilterCu( *cu, dbChType, EDGE_HOR, m_dbBuffer );
3579
0
  }
3580
3581
  //calculate difference between DB_before_SSE and DB_after_SSE for neighbouring CUs
3582
0
  Distortion distBeforeDb = 0, distAfterDb = 0, distCur = 0;
3583
0
  for (int compIdx = compStr; compIdx <= compEnd; compIdx++)
3584
0
  {
3585
0
    ComponentID compId = (ComponentID)compIdx;
3586
0
    {
3587
0
      CompArea compArea = currCsArea.block( compId );
3588
0
      CompArea         locArea  = compArea;
3589
0
      locArea.x -= cu->blocks[compIdx].x;
3590
0
      locArea.y -= cu->blocks[compIdx].y;
3591
0
      CPelBuf reco      = picDbBuf.getBuf( locArea );
3592
0
      CPelBuf org       = cs.getOrgBuf( compId );
3593
0
      distCur += xGetDistortionDb( cs, org, reco, compArea, false );
3594
0
    }
3595
3596
0
    if ( leftEdgeAvai )
3597
0
    {
3598
0
      const CompArea&  compArea = areaLeft.block( compId );
3599
0
      CompArea         locArea  = compArea;
3600
0
      locArea.x -= cu->blocks[compIdx].x;
3601
0
      locArea.y -= cu->blocks[compIdx].y;
3602
0
      CPelBuf org    = cs.picture->getOrigBuf( compArea );
3603
0
      if ( cs.picture->getFilteredOrigBuffer().valid() )
3604
0
      {
3605
0
        org = cs.picture->getRspOrigBuf( compArea );
3606
0
      }
3607
0
      CPelBuf reco   = cs.picture->getRecoBuf( compArea );
3608
0
      CPelBuf recoDb = picDbBuf.getBuf( locArea );
3609
0
      distBeforeDb  += xGetDistortionDb( cs, org, reco,   compArea, true );
3610
0
      distAfterDb   += xGetDistortionDb( cs, org, recoDb, compArea, false  );
3611
0
    }
3612
3613
0
    if ( topEdgeAvai )
3614
0
    {
3615
0
      const CompArea&  compArea = areaTop.block( compId );
3616
0
      CompArea         locArea  = compArea;
3617
0
      locArea.x -= cu->blocks[compIdx].x;
3618
0
      locArea.y -= cu->blocks[compIdx].y;
3619
0
      CPelBuf org    = cs.picture->getOrigBuf( compArea );
3620
0
      if ( cs.picture->getFilteredOrigBuffer().valid() )
3621
0
      {
3622
0
        org = cs.picture->getRspOrigBuf( compArea );
3623
0
      }
3624
0
      CPelBuf reco   = cs.picture->getRecoBuf( compArea );
3625
0
      CPelBuf recoDb = picDbBuf.getBuf( locArea );
3626
0
      distBeforeDb  += xGetDistortionDb( cs, org, reco,   compArea, true );
3627
0
      distAfterDb   += xGetDistortionDb( cs, org, recoDb, compArea, false  );
3628
0
    }
3629
0
  }
3630
3631
  //updated cost
3632
0
  int64_t distTmp = distCur - cs.dist + distAfterDb - distBeforeDb;
3633
0
  cs.costDbOffset = distTmp < 0 ? -m_cRdCost.calcRdCost( 0, -distTmp ) : m_cRdCost.calcRdCost( 0, distTmp );
3634
0
}
3635
3636
Distortion EncCu::xGetDistortionDb(CodingStructure &cs, CPelBuf& org, CPelBuf& reco, const CompArea& compArea, bool beforeDb)
3637
0
{
3638
0
  Distortion dist;
3639
0
  const ReshapeData& reshapeData = cs.picture->reshapeData;
3640
0
  const ComponentID compID = compArea.compID;
3641
0
  if( (cs.slice->lmcsEnabled && reshapeData.getCTUFlag()) || m_pcEncCfg->m_lumaLevelToDeltaQPEnabled)
3642
0
  {
3643
0
    if ( compID == COMP_Y && !m_pcEncCfg->m_lumaLevelToDeltaQPEnabled)
3644
0
    {
3645
0
      CPelBuf tmpReco;
3646
0
      if( beforeDb )
3647
0
      {
3648
0
        PelBuf tmpLmcs = m_aTmpStorageLCU[0].getCompactBuf( compArea );
3649
0
        tmpLmcs.rspSignal( reco, reshapeData.getInvLUT() );
3650
0
        tmpReco = tmpLmcs;
3651
0
      }
3652
0
      else
3653
0
      {
3654
0
        tmpReco = reco;
3655
0
      }
3656
0
      dist = m_cRdCost.getDistPart( org, tmpReco, cs.sps->bitDepths[CH_L], compID, DF_SSE_WTD, &org );
3657
0
    }
3658
0
    else if( m_EDO == 2)
3659
0
    {
3660
      // use the correct luma area to scale chroma
3661
0
      const int csx = getComponentScaleX( compID, cs.area.chromaFormat );
3662
0
      const int csy = getComponentScaleY( compID, cs.area.chromaFormat );
3663
0
      CompArea lumaArea = CompArea( COMP_Y, cs.area.chromaFormat, Area( compArea.x << csx, compArea.y << csy, compArea.width << csx, compArea.height << csy), true);
3664
0
      CPelBuf orgLuma = cs.picture->getFilteredOrigBuffer().valid() ? cs.picture->getRspOrigBuf( lumaArea ): cs.picture->getOrigBuf( lumaArea );
3665
0
      dist = m_cRdCost.getDistPart( org, reco, cs.sps->bitDepths[toChannelType( compID )], compID, DF_SSE_WTD, &orgLuma );
3666
0
    }
3667
0
    else
3668
0
    {
3669
0
      const int csx = getComponentScaleX( compID, cs.area.chromaFormat );
3670
0
      const int csy = getComponentScaleY( compID, cs.area.chromaFormat );
3671
0
      CompArea lumaArea = compArea.compID ? CompArea( COMP_Y, cs.area.chromaFormat, Area( compArea.x << csx, compArea.y << csy, compArea.width << csx, compArea.height << csy), true) : cs.area.blocks[COMP_Y];
3672
0
      CPelBuf orgLuma = cs.picture->getFilteredOrigBuffer().valid() ? cs.picture->getRspOrigBuf( lumaArea ): cs.picture->getOrigBuf( lumaArea );
3673
//      CPelBuf orgLuma = cs.picture->getFilteredOrigBuffer().valid() ? cs.picture->getRspOrigBuf( cs.area.blocks[COMP_Y] ): cs.picture->getOrigBuf( cs.area.blocks[COMP_Y] );
3674
0
      dist = m_cRdCost.getDistPart( org, reco, cs.sps->bitDepths[toChannelType( compID )], compID, DF_SSE_WTD, &orgLuma );
3675
0
    }
3676
0
    return dist;
3677
0
  }
3678
3679
0
  if ( cs.slice->lmcsEnabled && cs.slice->isIntra() && compID == COMP_Y && !beforeDb ) //intra slice
3680
0
  {
3681
0
    PelBuf tmpLmcs = m_aTmpStorageLCU[0].getCompactBuf( compArea );
3682
0
    tmpLmcs.rspSignal( reco, reshapeData.getFwdLUT() );
3683
0
    dist = m_cRdCost.getDistPart( org, tmpLmcs, cs.sps->bitDepths[CH_L], compID, DF_SSE );
3684
0
    return dist;
3685
0
  }
3686
0
  dist = m_cRdCost.getDistPart(org, reco, cs.sps->bitDepths[toChannelType(compID)], compID, DF_SSE);
3687
0
  return dist;
3688
0
}
3689
3690
bool checkValidMvs( const CodingUnit& cu)
3691
0
{
3692
  // clang-format off
3693
0
  const int affineShiftTab[3] =
3694
0
  {
3695
0
    MV_PRECISION_INTERNAL - MV_PRECISION_QUARTER,
3696
0
    MV_PRECISION_INTERNAL - MV_PRECISION_SIXTEENTH,
3697
0
    MV_PRECISION_INTERNAL - MV_PRECISION_INT
3698
0
  };
3699
3700
0
  const int normalShiftTab[NUM_IMV_MODES] =
3701
0
  {
3702
0
    MV_PRECISION_INTERNAL - MV_PRECISION_QUARTER,
3703
0
    MV_PRECISION_INTERNAL - MV_PRECISION_INT,
3704
0
    MV_PRECISION_INTERNAL - MV_PRECISION_4PEL,
3705
0
    MV_PRECISION_INTERNAL - MV_PRECISION_HALF,
3706
0
  };
3707
  // clang-format on
3708
3709
0
  int mvShift;
3710
3711
0
  for (int refList = 0; refList < NUM_REF_PIC_LIST_01; refList++)
3712
0
  {
3713
0
    if (cu.refIdx[refList] >= 0)
3714
0
    {
3715
0
      if (!cu.affine)
3716
0
      {
3717
0
        mvShift = normalShiftTab[cu.imv];
3718
0
        Mv signaledmvd(cu.mvd[refList][0].hor >> mvShift, cu.mvd[refList][0].ver >> mvShift);
3719
0
        if (!((signaledmvd.hor >= MVD_MIN) && (signaledmvd.hor <= MVD_MAX)) || !((signaledmvd.ver >= MVD_MIN) && (signaledmvd.ver <= MVD_MAX)))
3720
0
          return false;
3721
0
      }
3722
0
      else
3723
0
      {
3724
0
        for (int ctrlP = 1 + (cu.affineType == AFFINEMODEL_6PARAM); ctrlP >= 0; ctrlP--)
3725
0
        {
3726
0
          mvShift = affineShiftTab[cu.imv];
3727
0
          Mv signaledmvd(cu.mvd[refList][ctrlP].hor >> mvShift, cu.mvd[refList][ctrlP].ver >> mvShift);
3728
0
          if (!((signaledmvd.hor >= MVD_MIN) && (signaledmvd.hor <= MVD_MAX)) || !((signaledmvd.ver >= MVD_MIN) && (signaledmvd.ver <= MVD_MAX)))
3729
0
            return false;;
3730
0
        }
3731
0
      }
3732
0
    }
3733
0
  }
3734
  // avoid MV exceeding 18-bit dynamic range
3735
0
  const int maxMv = 1 << 17;
3736
0
  if (!cu.affine && !cu.mergeFlag)
3737
0
  {
3738
0
    if(    ( cu.refIdx[ 0 ] >= 0 && ( cu.mv[ 0 ][ 0 ].getAbsHor() >= maxMv || cu.mv[ 0 ][ 0 ].getAbsVer() >= maxMv ) )
3739
0
        || ( cu.refIdx[ 1 ] >= 0 && ( cu.mv[ 1 ][ 0 ].getAbsHor() >= maxMv || cu.mv[ 1 ][ 0 ].getAbsVer() >= maxMv ) ) )
3740
0
    {
3741
0
      return false;
3742
0
    }
3743
0
  }
3744
0
  if( cu.affine && !cu.mergeFlag )
3745
0
  {
3746
0
    for( int refList = 0; refList < NUM_REF_PIC_LIST_01; refList++ )
3747
0
    {
3748
0
      if( cu.refIdx[ refList ] >= 0 )
3749
0
      {
3750
0
        for( int ctrlP = 1 + ( cu.affineType == AFFINEMODEL_6PARAM ); ctrlP >= 0; ctrlP-- )
3751
0
        {
3752
0
          if( cu.mv[ refList ][ ctrlP ].getAbsHor() >= maxMv || cu.mv[ refList ][ ctrlP ].getAbsVer() >= maxMv )
3753
0
          {
3754
0
            return false;
3755
0
          }
3756
0
        }
3757
0
      }
3758
0
    }
3759
0
  }
3760
0
  return true;
3761
0
}
3762
3763
3764
void EncCu::xEncodeInterResidual( CodingStructure *&tempCS, CodingStructure *&bestCS, Partitioner &partitioner, const EncTestMode& encTestMode, int residualPass, bool* bestHasNonResi, double* equBcwCost )
3765
0
{
3766
0
  if( residualPass == 1 && encTestMode.lossless )
3767
0
  {
3768
0
    return;
3769
0
  }
3770
3771
0
  CodingUnit*            cu        = tempCS->getCU( partitioner.chType, partitioner.treeType );
3772
0
  double   bestCostInternal        = MAX_DOUBLE;
3773
3774
0
  if( !checkValidMvs( *cu ) )
3775
0
    return;
3776
3777
0
  double  currBestCost = MAX_DOUBLE;
3778
3779
  // For SBT
3780
0
  double     bestCost          = bestCS->cost;
3781
0
  double     bestCostBegin     = bestCS->cost;
3782
0
  const CodingUnit* prevBestCU = bestCS->getCU( partitioner.chType, partitioner.treeType );
3783
0
  uint8_t    prevBestSbt       = ( prevBestCU == nullptr ) ? 0 : prevBestCU->sbtInfo;
3784
0
  Distortion sbtOffDist        = 0;
3785
0
  bool       sbtOffRootCbf     = 0;
3786
0
  double     sbtOffCost        = MAX_DOUBLE;
3787
0
  uint8_t    currBestSbt       = 0;
3788
0
  uint8_t    histBestSbt       = MAX_UCHAR;
3789
0
  Distortion curPuSse          = MAX_DISTORTION;
3790
0
  uint8_t    numRDOTried       = 0;
3791
0
  bool       doPreAnalyzeResi  = false;
3792
0
  const bool mtsAllowed        =   tempCS->sps->MTSInter && cu->Y().maxDim() <= MTS_INTER_MAX_CU_SIZE;
3793
0
  const uint8_t sbtAllowed     = ( tempCS->pps->picWidthInLumaSamples < SBT_FAST64_WIDTH_THRESHOLD || m_pcEncCfg->m_SBT > 1 ) && cu->Y().maxDim() > 32 ? 0 : CU::checkAllowedSbt(*cu);
3794
3795
0
  if( sbtAllowed )
3796
0
  {
3797
    //SBT resolution-dependent fast algorithm: not try size-64 SBT in RDO for low-resolution sequences (now resolution below HD)
3798
0
    doPreAnalyzeResi = ( sbtAllowed || mtsAllowed ) && residualPass == 0;
3799
0
    m_cInterSearch.getBestSbt( tempCS, cu, histBestSbt, curPuSse, sbtAllowed, doPreAnalyzeResi, mtsAllowed );
3800
0
  }
3801
3802
0
  cu->skip    = false;
3803
0
  cu->sbtInfo = 0;
3804
3805
0
  const bool skipResidual = residualPass == 1;
3806
0
  if( skipResidual || histBestSbt == MAX_UCHAR || !CU::isSbtMode( histBestSbt ) )
3807
0
  {
3808
0
    m_cInterSearch.encodeResAndCalcRdInterCU( *tempCS, partitioner, skipResidual );
3809
0
    xEncodeDontSplit( *tempCS, partitioner );
3810
0
    xCheckDQP       ( *tempCS, partitioner );
3811
3812
0
    if( NULL != bestHasNonResi && (bestCostInternal > tempCS->cost) )
3813
0
    {
3814
0
      bestCostInternal = tempCS->cost;
3815
0
      if( !cu->ciip )
3816
0
        *bestHasNonResi = !cu->rootCbf;
3817
0
    }
3818
3819
0
    if( cu->rootCbf == false )
3820
0
    {
3821
0
      if( cu->ciip )
3822
0
      {
3823
0
        tempCS->cost = MAX_DOUBLE;
3824
0
        tempCS->costDbOffset = 0;
3825
0
        return;
3826
0
      }
3827
0
    }
3828
0
    currBestCost = tempCS->cost;
3829
0
    if( sbtAllowed )
3830
0
    {
3831
0
      sbtOffCost    = tempCS->cost;
3832
0
      sbtOffDist    = tempCS->dist;
3833
0
      sbtOffRootCbf = cu->rootCbf;
3834
0
      currBestSbt   = cu->firstTU->mtsIdx[COMP_Y] > MTS_SKIP ? SBT_OFF_MTS : SBT_OFF_DCT;
3835
0
      numRDOTried  += mtsAllowed ? 2 : 1;
3836
0
    }
3837
3838
0
    DTRACE_MODE_COST( *tempCS, m_cRdCost.getLambda( true ) );
3839
0
    xCheckBestMode( tempCS, bestCS, partitioner, encTestMode );
3840
3841
0
    STAT_COUNT_CU_MODES( partitioner.chType == CH_L, g_cuCounters1D[CU_RD_TESTS][0][!tempCS->slice->isIntra() + tempCS->slice->depth] );
3842
0
    STAT_COUNT_CU_MODES( partitioner.chType == CH_L && !tempCS->slice->isIntra(), g_cuCounters2D[CU_RD_TESTS][Log2( tempCS->area.lheight() )][Log2( tempCS->area.lwidth() )] );
3843
0
  }
3844
3845
0
  if( sbtAllowed && ( m_pcEncCfg->m_SBT == 1 || sbtOffRootCbf ) )
3846
0
  {
3847
0
    bool swapped = false; // avoid unwanted data copy
3848
0
    uint8_t numSbtRdo = CU::numSbtModeRdo( sbtAllowed );
3849
    //early termination if all SBT modes are not allowed
3850
    //normative
3851
0
    if( !sbtAllowed || skipResidual )
3852
0
    {
3853
0
      numSbtRdo = 0;
3854
0
    }
3855
    //fast algorithm
3856
0
    if( ( histBestSbt != MAX_UCHAR && !CU::isSbtMode( histBestSbt ) ) || m_cInterSearch.getSkipSbtAll() )
3857
0
    {
3858
0
      numSbtRdo = 0;
3859
0
    }
3860
0
    if( bestCost != MAX_DOUBLE && sbtOffCost != MAX_DOUBLE )
3861
0
    {
3862
0
      double th = 1.07;
3863
0
      if( !( prevBestSbt == 0 || m_sbtCostSave[0] == MAX_DOUBLE ) )
3864
0
      {
3865
0
        assert( m_sbtCostSave[1] <= m_sbtCostSave[0] );
3866
0
        th *= ( m_sbtCostSave[0] / m_sbtCostSave[1] );
3867
0
      }
3868
0
      if( sbtOffCost > bestCost * th )
3869
0
      {
3870
0
        numSbtRdo = 0;
3871
0
      }
3872
0
    }
3873
0
    if( !sbtOffRootCbf && sbtOffCost != MAX_DOUBLE )
3874
0
    {
3875
0
      double th = Clip3( 0.05, 0.55, ( 27 - cu->qp ) * 0.02 + 0.35 );
3876
0
      if( sbtOffCost < m_cRdCost.calcRdCost( ( cu->lwidth() * cu->lheight() ) << SCALE_BITS, 0 ) * th )
3877
0
      {
3878
0
        numSbtRdo = 0;
3879
0
      }
3880
0
    }
3881
3882
0
    if( histBestSbt != MAX_UCHAR && numSbtRdo != 0 )
3883
0
    {
3884
0
      numSbtRdo = 1;
3885
0
      m_cInterSearch.initSbtRdoOrder( CU::getSbtMode( CU::getSbtIdx( histBestSbt ), CU::getSbtPos( histBestSbt ) ) );
3886
0
    }
3887
3888
0
    for( int sbtModeIdx = 0; sbtModeIdx < numSbtRdo; sbtModeIdx++ )
3889
0
    {
3890
0
      uint8_t sbtMode = m_cInterSearch.getSbtRdoOrder( sbtModeIdx );
3891
0
      uint8_t sbtIdx = CU::getSbtIdxFromSbtMode( sbtMode );
3892
0
      uint8_t sbtPos = CU::getSbtPosFromSbtMode( sbtMode );
3893
3894
      //fast algorithm (early skip, save & load)
3895
0
      if( histBestSbt == MAX_UCHAR )
3896
0
      {
3897
0
        uint8_t skipCode = m_cInterSearch.skipSbtByRDCost( cu->lwidth(), cu->lheight(), cu->mtDepth, sbtIdx, sbtPos, bestCS->cost, sbtOffDist, sbtOffCost, sbtOffRootCbf );
3898
0
        if( skipCode != MAX_UCHAR )
3899
0
        {
3900
0
          continue;
3901
0
        }
3902
3903
0
        if( sbtModeIdx > 0 )
3904
0
        {
3905
0
          uint8_t prevSbtMode = m_cInterSearch.getSbtRdoOrder( sbtModeIdx - 1 );
3906
          //make sure the prevSbtMode is the same size as the current SBT mode (otherwise the estimated dist may not be comparable)
3907
0
          if( CU::isSameSbtSize( prevSbtMode, sbtMode ) )
3908
0
          {
3909
0
            Distortion currEstDist = m_cInterSearch.getEstDistSbt( sbtMode );
3910
0
            Distortion prevEstDist = m_cInterSearch.getEstDistSbt( prevSbtMode );
3911
0
            if( currEstDist > prevEstDist * 1.15 )
3912
0
            {
3913
0
              continue;
3914
0
            }
3915
0
          }
3916
0
        }
3917
0
      }
3918
3919
      //init tempCS and TU
3920
0
      if( bestCost == bestCS->cost ) //The first EMT pass didn't become the bestCS, so we clear the TUs generated
3921
0
      {
3922
0
        tempCS->clearTUs();
3923
0
      }
3924
0
      else if( !swapped )
3925
0
      {
3926
0
        tempCS->initStructData( encTestMode.qp );
3927
0
        tempCS->copyStructure( *bestCS, partitioner.chType, partitioner.treeType );
3928
0
        tempCS->getPredBuf().copyFrom( bestCS->getPredBuf() );
3929
0
        bestCost = bestCS->cost;
3930
0
        cu = tempCS->getCU( partitioner.chType, partitioner.treeType );
3931
0
        swapped = true;
3932
0
      }
3933
0
      else
3934
0
      {
3935
0
        tempCS->clearTUs();
3936
0
        bestCost = bestCS->cost;
3937
0
        cu = tempCS->getCU( partitioner.chType, partitioner.treeType );
3938
0
      }
3939
3940
      //we need to restart the distortion for the new tempCS, the bit count and the cost
3941
0
      tempCS->dist     = 0;
3942
0
      tempCS->fracBits = 0;
3943
0
      tempCS->cost     = MAX_DOUBLE;
3944
0
      cu->skip         = false;
3945
3946
3947
      //set SBT info
3948
0
      cu->sbtInfo = (sbtPos << 4) + sbtIdx;
3949
3950
      //try residual coding
3951
0
      m_cInterSearch.encodeResAndCalcRdInterCU( *tempCS, partitioner, skipResidual );
3952
0
      numRDOTried++;
3953
3954
0
      xEncodeDontSplit( *tempCS, partitioner );
3955
0
      xCheckDQP( *tempCS, partitioner );
3956
3957
0
      if( NULL != bestHasNonResi && ( bestCostInternal > tempCS->cost ) )
3958
0
      {
3959
0
        bestCostInternal = tempCS->cost;
3960
0
        if( !( cu->ciip ) )
3961
0
          *bestHasNonResi = !cu->rootCbf;
3962
0
      }
3963
3964
0
      if( tempCS->cost < currBestCost )
3965
0
      {
3966
0
        currBestSbt = cu->sbtInfo;
3967
0
        currBestCost = tempCS->cost;
3968
0
      }
3969
0
      else if( m_pcEncCfg->m_SBT > 2 )
3970
0
      {
3971
0
        sbtModeIdx = numSbtRdo;
3972
0
      }
3973
3974
0
      DTRACE_MODE_COST( *tempCS, m_cRdCost.getLambda( true ) );
3975
0
      xCheckBestMode( tempCS, bestCS, partitioner, encTestMode );
3976
0
      STAT_COUNT_CU_MODES( partitioner.chType == CH_L, g_cuCounters1D[CU_RD_TESTS][0][!tempCS->slice->isIntra() + tempCS->slice->depth] );
3977
0
      STAT_COUNT_CU_MODES( partitioner.chType == CH_L && !tempCS->slice->isIntra(), g_cuCounters2D[CU_RD_TESTS][Log2( tempCS->area.lheight() )][Log2( tempCS->area.lwidth() )] );
3978
0
    }
3979
3980
0
    if( bestCostBegin != bestCS->cost )
3981
0
    {
3982
0
      m_sbtCostSave[0] = sbtOffCost;
3983
0
      m_sbtCostSave[1] = currBestCost;
3984
0
    }
3985
3986
0
    if( histBestSbt == MAX_UCHAR && doPreAnalyzeResi && numRDOTried > 1 )
3987
0
    {
3988
0
      auto slsSbt = static_cast<CacheBlkInfoCtrl&>( m_modeCtrl );
3989
0
      int slShift = 4 + std::min( Log2( cu->lwidth() ) + Log2( cu->lheight() ), 9 );
3990
0
      slsSbt.saveBestSbt( cu->cs->area, (uint32_t)( curPuSse >> slShift ), currBestSbt );
3991
0
    }
3992
    
3993
0
    if( ETM_INTER_ME == encTestMode.type )
3994
0
    {
3995
0
      if( equBcwCost != NULL )
3996
0
      {
3997
0
        if( tempCS->cost < ( *equBcwCost ) && cu->BcwIdx == BCW_DEFAULT )
3998
0
        {
3999
0
          ( *equBcwCost ) = tempCS->cost;
4000
0
        }
4001
0
      }
4002
0
      else
4003
0
      {
4004
0
        CHECK( equBcwCost == NULL, "equBcwCost == NULL" );
4005
0
      }
4006
0
      if( tempCS->slice->checkLDC && !cu->imv && cu->BcwIdx != BCW_DEFAULT && tempCS->cost < m_bestBcwCost[1] )
4007
0
      {
4008
0
        if( tempCS->cost < m_bestBcwCost[0] )
4009
0
        {
4010
0
          m_bestBcwCost[1] = m_bestBcwCost[0];
4011
0
          m_bestBcwCost[0] = tempCS->cost;
4012
0
          m_bestBcwIdx[1] = m_bestBcwIdx[0];
4013
0
          m_bestBcwIdx[0] = cu->BcwIdx;
4014
0
        }
4015
0
        else
4016
0
        {
4017
0
          m_bestBcwCost[1] = tempCS->cost;
4018
0
          m_bestBcwIdx[1] = cu->BcwIdx;
4019
0
        }
4020
0
      }
4021
0
    }
4022
0
  }
4023
4024
0
  tempCS->cost = currBestCost;
4025
0
}
4026
4027
void EncCu::xEncodeDontSplit( CodingStructure &cs, Partitioner &partitioner )
4028
0
{
4029
0
  m_CABACEstimator->resetBits();
4030
4031
0
  m_CABACEstimator->split_cu_mode( CU_DONT_SPLIT, cs, partitioner );
4032
0
  if( partitioner.treeType == TREE_C )
4033
0
    CHECK( m_CABACEstimator->getEstFracBits() != 0, "must be 0 bit" );
4034
4035
0
  cs.fracBits += m_CABACEstimator->getEstFracBits(); // split bits
4036
0
  cs.cost      = m_cRdCost.calcRdCost( cs.fracBits, cs.dist );
4037
0
}
4038
4039
void EncCu::xReuseCachedResult( CodingStructure *&tempCS, CodingStructure *&bestCS, Partitioner &partitioner )
4040
0
{
4041
0
  EncTestMode cachedMode;
4042
4043
0
  if( ! m_modeCtrl.setCsFrom( *tempCS, cachedMode, partitioner ) )
4044
0
  {
4045
0
    THROW( "Should never happen!" );
4046
0
  }
4047
4048
0
  CodingUnit& cu = *tempCS->cus.front();
4049
0
  partitioner.setCUData( cu );
4050
4051
0
  if( CU::isIntra( cu ) )
4052
0
  {
4053
0
    if( isLuma( cu.chType ) )
4054
0
    {
4055
0
      cu.getMotionBuf().memset( -1 ); // clear motion buf
4056
0
    }
4057
0
    xReconIntraQT( cu );
4058
0
  }
4059
0
  else
4060
0
  {
4061
0
    xDeriveCUMV( cu );
4062
0
    xReconInter( cu );
4063
0
  }
4064
4065
0
  m_CABACEstimator->getCtx() = m_CurrCtx->start;
4066
0
  m_CABACEstimator->resetBits();
4067
4068
0
  CUCtx cuCtx;
4069
0
  cuCtx.isDQPCoded = true;
4070
0
  cuCtx.isChromaQpAdjCoded = true;
4071
0
  m_CABACEstimator->coding_unit( cu, partitioner, cuCtx );
4072
4073
0
  tempCS->fracBits = m_CABACEstimator->getEstFracBits();
4074
0
  tempCS->cost     = m_cRdCost.calcRdCost( tempCS->fracBits, tempCS->dist );
4075
4076
0
  xEncodeDontSplit( *tempCS,         partitioner );
4077
0
  xCheckDQP       ( *tempCS,         partitioner );
4078
0
  xCheckBestMode  (  tempCS, bestCS, partitioner, cachedMode, m_EDO );
4079
0
}
4080
4081
uint64_t EncCu::xCalcPuMeBits( const CodingUnit &cu )
4082
0
{
4083
0
  CHECK( !cu.mergeFlag, "Should only be used for merge!" );
4084
0
  CHECK( CU::isIBC( cu ), "Shound not be used for IBC" );
4085
4086
0
  m_CABACEstimator->resetBits();
4087
0
  m_CABACEstimator->merge_flag(cu);
4088
0
  if( cu.mergeFlag )
4089
0
  {
4090
0
    m_CABACEstimator->merge_data( cu );
4091
0
  }
4092
0
  return m_CABACEstimator->getEstFracBits();
4093
0
}
4094
4095
double EncCu::xCalcDistortion(CodingStructure *&cur_CS, ChannelType chType, int BitDepth, int imv)
4096
0
{
4097
0
  const auto currDist1 = m_cRdCost.getDistPart(cur_CS->getOrgBuf( COMP_Y ), cur_CS->getPredBuf( COMP_Y ), BitDepth, COMP_Y, m_pcEncCfg->m_fastHad ? DF_HAD_fast : DF_HAD );
4098
0
  unsigned int uiMvBits = 0;
4099
0
  unsigned imvShift = imv == IMV_HPEL ? 1 : (imv << 1);
4100
0
  const CodingUnit& cu = *cur_CS->getCU( chType, TREE_D);
4101
0
  if (cu.interDir != 2)
4102
0
  {
4103
0
    uiMvBits += m_cRdCost.getBitsOfVectorWithPredictor(cu.mvd[0][0].hor, cu.mvd[0][0].ver, imvShift + MV_FRACTIONAL_BITS_DIFF);
4104
0
  }
4105
0
  if (cu.interDir != 1)
4106
0
  {
4107
0
    uiMvBits += m_cRdCost.getBitsOfVectorWithPredictor(cu.mvd[1][0].hor, cu.mvd[1][0].ver, imvShift + MV_FRACTIONAL_BITS_DIFF);
4108
0
  }
4109
0
  return (double(currDist1) + (double)m_cRdCost.getCost(uiMvBits));
4110
0
}
4111
4112
int EncCu::xCheckMMVDCand(MmvdIdx& mmvdMergeCand, int& bestDir, int tempNum, double& bestCostOffset, double& bestCostMerge, double bestCostList )
4113
0
{
4114
0
  int baseIdx = mmvdMergeCand.val / MMVD_MAX_REFINE_NUM;
4115
0
  int CandCur = mmvdMergeCand.val - MMVD_MAX_REFINE_NUM * baseIdx;
4116
4117
0
  if( m_pcEncCfg->m_MMVD > 2 )
4118
0
  {
4119
0
    if( CandCur % 4 == 0 )
4120
0
    {
4121
0
      if( ( bestCostOffset >= bestCostMerge ) && ( CandCur >= 4 ) )
4122
0
      {
4123
0
        if( mmvdMergeCand.val > MMVD_MAX_REFINE_NUM )
4124
0
        {
4125
0
          return 2;
4126
0
        }
4127
0
        else
4128
0
        {
4129
0
          mmvdMergeCand.val = MMVD_MAX_REFINE_NUM;
4130
0
          if( tempNum == mmvdMergeCand.val )
4131
0
          {
4132
0
            return 2;
4133
0
          }
4134
0
        }
4135
0
      }
4136
      //reset
4137
0
      bestCostOffset = MAX_DOUBLE;
4138
0
      bestCostMerge  = bestCostList;
4139
0
    }
4140
0
  }
4141
4142
0
  if( mmvdMergeCand.val == MMVD_MAX_REFINE_NUM )
4143
0
  {
4144
0
    bestDir = 0;
4145
0
  }
4146
0
  if( CandCur >= 4 )
4147
0
  {
4148
0
    if( CandCur % 4 != bestDir )
4149
0
    {
4150
0
      return 1;
4151
0
    }
4152
0
  }
4153
0
  return 0;
4154
0
}
4155
4156
4157
MergeItem::MergeItem()
4158
0
{
4159
4160
0
}
4161
MergeItem::~MergeItem()
4162
0
{
4163
4164
0
}
4165
4166
void MergeItem::create( ChromaFormat chromaFormat, const Area &area )
4167
0
{
4168
0
  if( m_pelStorage.bufs.empty() )
4169
0
  {
4170
0
    m_pelStorage.create( chromaFormat, area );
4171
0
    m_mvStorage .resize( area.area() >> ( MIN_CU_LOG2 << 1 ) );
4172
0
  }
4173
4174
0
  init();
4175
0
}
4176
4177
void MergeItem::init()
4178
0
{
4179
  // reset data
4180
0
  cost        = MAX_DOUBLE;
4181
0
  mergeIdx    = 0;
4182
0
  bcwIdx      = 0;
4183
0
  interDir    = 0;
4184
0
  useAltHpelIf  = false;
4185
0
  affineType    = AFFINEMODEL_4PARAM;
4186
0
  mergeItemType = MergeItemType::NUM;
4187
4188
0
  noBdofRefine  = false;
4189
0
  noResidual    = false;
4190
4191
0
  lumaPredReady   = false;
4192
0
  chromaPredReady = false;
4193
0
}
4194
4195
void MergeItem::importMergeInfo(const MergeCtx& mergeCtx, int _mergeIdx, MergeItemType _mergeItemType, CodingUnit& pu)
4196
0
{
4197
0
  mergeIdx      = _mergeIdx;
4198
0
  mergeItemType = _mergeItemType;
4199
4200
0
  if( mergeItemType != MergeItemType::GPM && mergeItemType != MergeItemType::MMVD )
4201
0
  {
4202
0
    mvField[REF_PIC_LIST_0][0] = mergeCtx.mvFieldNeighbours [mergeIdx][REF_PIC_LIST_0];
4203
0
    mvField[REF_PIC_LIST_1][0] = mergeCtx.mvFieldNeighbours [mergeIdx][REF_PIC_LIST_1];
4204
0
    interDir                   = mergeCtx.interDirNeighbours[mergeIdx];
4205
0
    bcwIdx                     = mergeCtx.BcwIdx            [mergeIdx];
4206
0
    useAltHpelIf               = mergeCtx.useAltHpelIf      [mergeIdx];
4207
0
  }
4208
4209
0
  switch( _mergeItemType )
4210
0
  {
4211
0
  case MergeItemType::REGULAR:
4212
0
  case MergeItemType::CIIP:
4213
0
    break;
4214
4215
0
  case MergeItemType::MMVD:
4216
0
  {
4217
0
    MmvdIdx candIdx;
4218
4219
0
    candIdx.val                = mergeIdx;
4220
0
    mvField[L0][0]             . setMvField( pu.mv[L0][0], pu.refIdx[0] );
4221
0
    mvField[L1][0]             . setMvField( pu.mv[L1][0], pu.refIdx[1] );
4222
0
    interDir                   = pu.interDir;
4223
0
    bcwIdx                     = pu.BcwIdx;
4224
0
    useAltHpelIf               = mergeCtx.useAltHpelIf[candIdx.pos.baseIdx];
4225
4226
0
    break;
4227
0
  }
4228
4229
0
  case MergeItemType::GPM:
4230
0
    mvField[L0][0]             . setMvField( Mv( 0, 0 ), -1 );
4231
0
    mvField[L1][0]             . setMvField( Mv( 0, 0 ), -1 );
4232
0
    bcwIdx                     = BCW_DEFAULT;
4233
0
    useAltHpelIf               = false;
4234
4235
0
    break;
4236
4237
0
  case MergeItemType::IBC:
4238
0
  default:
4239
0
    THROW( "Wrong merge item type" );
4240
0
  }
4241
4242
0
  getMvBuf( pu ).copyFrom( pu.getMotionBuf() );
4243
0
}
4244
4245
void MergeItem::importMergeInfo( const AffineMergeCtx &mergeCtx, int _mergeIdx, MergeItemType _mergeItemType, CodingUnit& pu )
4246
0
{
4247
0
  mergeIdx      = _mergeIdx;
4248
0
  mergeItemType = _mergeItemType;
4249
4250
0
  affineType    = mergeCtx.affineType         [mergeIdx];
4251
0
  interDir      = mergeCtx.interDirNeighbours [mergeIdx];
4252
0
  bcwIdx        = mergeCtx.BcwIdx             [mergeIdx];
4253
0
  useAltHpelIf  = false;
4254
4255
0
  switch( _mergeItemType )
4256
0
  {
4257
0
  case MergeItemType::SBTMVP:
4258
    // the pu motion was already generated preparing for IFP check (unconditional)
4259
0
    mvField[L0][0] . setMvField( pu.mv[L0][0], pu.refIdx[L0] );
4260
0
    mvField[L1][0] . setMvField( pu.mv[L1][0], pu.refIdx[L1] );
4261
4262
0
    break;
4263
4264
0
  case MergeItemType::AFFINE:
4265
    // the pu motion was already generated preparing for IFP check (unconditional)
4266
0
    mvField[L0][0] . setMvField( pu.mv[L0][0], pu.refIdx[L0] );
4267
0
    mvField[L0][1] . setMvField( pu.mv[L0][1], pu.refIdx[L0] );
4268
0
    mvField[L0][2] . setMvField( pu.mv[L0][2], pu.refIdx[L0] );
4269
0
    mvField[L1][0] . setMvField( pu.mv[L1][0], pu.refIdx[L1] );
4270
0
    mvField[L1][1] . setMvField( pu.mv[L1][1], pu.refIdx[L1] );
4271
0
    mvField[L1][2] . setMvField( pu.mv[L1][2], pu.refIdx[L1] );
4272
4273
0
    break;
4274
4275
0
  default:
4276
0
    THROW( "Wrong merge item type" );
4277
0
  }
4278
4279
  // the MI buf was already generated preparing for IFP check (unconditional)
4280
0
  getMvBuf( pu ).copyFrom( pu.getMotionBuf() );
4281
0
}
4282
4283
bool MergeItem::exportMergeInfo( CodingUnit &pu, bool forceNoResidual ) const
4284
0
{
4285
0
  pu.mergeFlag        = true;
4286
0
  pu.mmvdMergeFlag    = false;
4287
0
  pu.interDir         = interDir;
4288
0
  pu.mergeIdx         = mergeIdx;
4289
0
  pu.mergeType        = MRG_TYPE_DEFAULT_N;
4290
0
  pu.mv[REF_PIC_LIST_0][0]  = mvField[REF_PIC_LIST_0][0].mv;
4291
0
  pu.mv[REF_PIC_LIST_1][0]  = mvField[REF_PIC_LIST_1][0].mv;
4292
0
  pu.refIdx[REF_PIC_LIST_0] = mvField[REF_PIC_LIST_0][0].refIdx;
4293
0
  pu.refIdx[REF_PIC_LIST_1] = mvField[REF_PIC_LIST_1][0].refIdx;
4294
0
  pu.mvd[REF_PIC_LIST_0][0] = Mv();
4295
0
  pu.mvd[REF_PIC_LIST_1][0] = Mv();
4296
0
  pu.mvpIdx[REF_PIC_LIST_0] = NOT_VALID;
4297
0
  pu.mvpIdx[REF_PIC_LIST_1] = NOT_VALID;
4298
0
  pu.mvpNum[REF_PIC_LIST_0] = NOT_VALID;
4299
0
  pu.mvpNum[REF_PIC_LIST_1] = NOT_VALID;
4300
0
  pu.BcwIdx         = ( interDir == 3 ) ? bcwIdx : BCW_DEFAULT;
4301
0
  pu.mcControl      = 0;
4302
0
  pu.mmvdSkip       = false;
4303
0
  pu.affine         = false;
4304
0
  pu.affineType     = AFFINEMODEL_4PARAM;
4305
0
  pu.geo            = false;
4306
0
  pu.mtsFlag        = false;
4307
0
  pu.ciip           = false;
4308
0
  pu.imv            = ( !pu.geo && useAltHpelIf ) ? IMV_HPEL : IMV_OFF;
4309
0
  pu.mvRefine       = false;
4310
4311
0
  const bool resetCiip2Regular = mergeItemType == MergeItemType::CIIP && forceNoResidual;
4312
0
  MergeItemType updatedType    = resetCiip2Regular ? MergeItemType::REGULAR : mergeItemType;
4313
4314
0
  switch( updatedType )
4315
0
  {
4316
0
  case MergeItemType::REGULAR:
4317
0
    CU::restrictBiPredMergeCandsOne( pu );
4318
0
    break;
4319
4320
0
  case MergeItemType::CIIP:
4321
0
    CHECK( forceNoResidual, "Cannot force no residuals for CIIP" );
4322
0
    pu.ciip           = true;
4323
0
    pu.intraDir[CH_L] = PLANAR_IDX;
4324
0
    pu.intraDir[CH_C] = DM_CHROMA_IDX;
4325
0
    break;
4326
4327
0
  case MergeItemType::MMVD:
4328
0
    pu.mmvdMergeFlag    = true;
4329
0
    pu.mmvdMergeIdx.val = mergeIdx;
4330
0
    if( forceNoResidual )
4331
0
    {
4332
0
      pu.mmvdSkip       = true;
4333
0
    }
4334
0
    CU::restrictBiPredMergeCandsOne( pu );
4335
0
    break;
4336
4337
0
  case MergeItemType::SBTMVP:
4338
0
    pu.affine    = true;
4339
0
    pu.mergeType = MRG_TYPE_SUBPU_ATMVP;
4340
0
    break;
4341
4342
0
  case MergeItemType::AFFINE:
4343
0
    pu.affine     = true;
4344
0
    pu.affineType = affineType;
4345
0
    pu.mv[L0][0]  = mvField[L0][0].mv;
4346
0
    pu.mv[L1][0]  = mvField[L1][0].mv;
4347
0
    pu.mv[L0][1]  = mvField[L0][1].mv;
4348
0
    pu.mv[L1][1]  = mvField[L1][1].mv;
4349
0
    pu.mv[L0][2]  = mvField[L0][2].mv;
4350
0
    pu.mv[L1][2]  = mvField[L1][2].mv;
4351
0
    pu.refIdx[L0] = mvField[L0][0].refIdx;
4352
0
    pu.refIdx[L1] = mvField[L1][0].refIdx;
4353
0
    break;
4354
4355
0
  case MergeItemType::GPM:
4356
0
    pu.mergeIdx = -1;
4357
0
    pu.geo      = true;
4358
0
    pu.BcwIdx   = BCW_DEFAULT;
4359
0
    updateGpmIdx( mergeIdx, pu.geoSplitDir, pu.geoMergeIdx );
4360
0
    pu.imv      = IMV_OFF;
4361
0
    break;
4362
4363
0
  case MergeItemType::IBC:
4364
0
  default:
4365
0
    THROW( "Wrong merge item type" );
4366
0
  }
4367
4368
0
  pu.getMotionBuf().copyFrom( getMvBuf( pu ) );
4369
4370
0
  return resetCiip2Regular;
4371
0
}
4372
4373
MergeItemList::MergeItemList()
4374
0
{
4375
4376
0
}
4377
4378
MergeItemList::~MergeItemList()
4379
0
{
4380
0
  for( MergeItem* p : m_list )
4381
0
  {
4382
0
    delete p;
4383
0
  }
4384
0
  m_list.clear();
4385
4386
0
  for( MergeItem *p : m_mergeItems )
4387
0
  {
4388
0
    delete p;
4389
0
  }
4390
0
  m_mergeItems.clear();
4391
0
}
4392
4393
void MergeItemList::init( size_t maxSize, size_t maxExtSize, ChromaFormat chromaFormat, SizeType ctuWidth, SizeType ctuHeight )
4394
0
{
4395
0
  CHECK( !m_mergeItems.empty() || !m_list.empty(), "MergeItemList already initialized" );
4396
4397
0
  m_list      . reserve( maxSize + 1 ); // to avoid reallocation when inserting a new item
4398
0
  m_mergeItems. reserve( maxSize + 1 );
4399
0
  m_maxSize   = maxSize;
4400
0
  m_maxExtSize= maxExtSize;
4401
0
  m_numExt    = 0;
4402
4403
0
  for( int i = 0; i < maxSize + m_maxExtSize; i++ )
4404
0
  {
4405
0
    MergeItem *p = new MergeItem;
4406
0
    p->create( chromaFormat, Area{ 0, 0, ctuWidth, ctuHeight } );
4407
0
    m_mergeItems.push_back( p );
4408
0
  }
4409
0
}
4410
4411
MergeItem *MergeItemList::allocateNewMergeItem()
4412
0
{
4413
0
  m_numExt++;
4414
0
  CHECK( m_mergeItems.empty(), "Missing merge items!" );
4415
0
  CHECK( m_numExt > m_maxExtSize, "Taking out more external items than specified during list allocation!" );
4416
0
  MergeItem *p = m_mergeItems.back();
4417
0
  m_mergeItems.pop_back();
4418
0
  p->init();
4419
0
  return p;
4420
0
}
4421
4422
bool MergeItemList::insertMergeItemToList( MergeItem *p )
4423
0
{
4424
0
  CHECK( m_list.size() + m_mergeItems.size() + m_numExt != m_maxSize + m_maxExtSize, "Wrong number of items held" );
4425
4426
0
  m_numExt--;
4427
4428
0
  if( m_list.empty() )
4429
0
  {
4430
0
    m_list.push_back( p );
4431
0
  }
4432
0
  else if( m_list.size() == m_maxTrackingNum && p->cost >= m_list.back()->cost )
4433
0
  {
4434
0
    m_mergeItems.push_back( p );
4435
0
    return false;
4436
0
  }
4437
0
  else
4438
0
  {
4439
0
    if( m_list.size() == m_maxTrackingNum )
4440
0
    {
4441
0
      m_mergeItems.push_back( m_list.back() );
4442
0
      m_list      .pop_back();
4443
0
    }
4444
0
    auto it = std::find_if( m_list.begin(), m_list.end(), [&p]( const MergeItem *mi ) { return p->cost < mi->cost; } );
4445
0
    m_list.insert( it, p );
4446
0
  }
4447
4448
0
  return true;
4449
0
}
4450
4451
void MergeItemList::giveBackMergeItem( MergeItem *p )
4452
0
{
4453
0
  CHECK( m_list.size() + m_mergeItems.size() + m_numExt != m_maxSize + m_maxExtSize, "Wrong number of items held" );
4454
4455
0
  m_numExt--;
4456
4457
0
  m_mergeItems.push_back( p );
4458
0
}
4459
4460
MergeItem *MergeItemList::getMergeItemInList( size_t index )
4461
0
{
4462
0
  return index < m_maxTrackingNum ? m_list[index] : nullptr;
4463
0
}
4464
4465
void MergeItemList::resetList( size_t maxTrackingNum )
4466
0
{
4467
0
  CHECK( maxTrackingNum > m_maxSize, "Not enough items allocated to track " << maxTrackingNum << " items" );
4468
4469
0
  for( auto p : m_list )
4470
0
  {
4471
0
    m_mergeItems.push_back( p );
4472
0
  }
4473
0
  m_list.clear  ();
4474
4475
0
  m_maxTrackingNum = maxTrackingNum;
4476
0
}
4477
4478
void MergeItemList::shrinkList( size_t reduceTo )
4479
0
{
4480
0
  CHECK( reduceTo > m_maxSize, "Not enough items allocated to track " << reduceTo << " items" );
4481
4482
0
  while( m_list.size() > reduceTo )
4483
0
  {
4484
0
    m_mergeItems.push_back( m_list.back() );
4485
0
    m_list      .pop_back();
4486
0
  }
4487
0
}
4488
4489
} // namespace vvenc
4490
4491
//! \}