/work/vvenc/source/Lib/EncoderLib/EncSampleAdaptiveOffset.cpp
Line | Count | Source |
1 | | /* ----------------------------------------------------------------------------- |
2 | | The copyright in this software is being made available under the Clear BSD |
3 | | License, included below. No patent rights, trademark rights and/or |
4 | | other Intellectual Property Rights other than the copyrights concerning |
5 | | the Software are granted under this license. |
6 | | |
7 | | The Clear BSD License |
8 | | |
9 | | Copyright (c) 2019-2026, Fraunhofer-Gesellschaft zur Förderung der angewandten Forschung e.V. & The VVenC Authors. |
10 | | All rights reserved. |
11 | | |
12 | | Redistribution and use in source and binary forms, with or without modification, |
13 | | are permitted (subject to the limitations in the disclaimer below) provided that |
14 | | the following conditions are met: |
15 | | |
16 | | * Redistributions of source code must retain the above copyright notice, |
17 | | this list of conditions and the following disclaimer. |
18 | | |
19 | | * Redistributions in binary form must reproduce the above copyright |
20 | | notice, this list of conditions and the following disclaimer in the |
21 | | documentation and/or other materials provided with the distribution. |
22 | | |
23 | | * Neither the name of the copyright holder nor the names of its |
24 | | contributors may be used to endorse or promote products derived from this |
25 | | software without specific prior written permission. |
26 | | |
27 | | NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE GRANTED BY |
28 | | THIS LICENSE. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND |
29 | | CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
30 | | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A |
31 | | PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR |
32 | | CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, |
33 | | EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, |
34 | | PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR |
35 | | BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER |
36 | | IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
37 | | ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE |
38 | | POSSIBILITY OF SUCH DAMAGE. |
39 | | |
40 | | |
41 | | ------------------------------------------------------------------------------------------- */ |
42 | | |
43 | | |
44 | | /** |
45 | | \file EncSampleAdaptiveOffset.cpp |
46 | | \brief estimation part of sample adaptive offset class |
47 | | */ |
48 | | |
49 | | #include "EncSampleAdaptiveOffset.h" |
50 | | #include "CommonLib/UnitTools.h" |
51 | | #include "CommonLib/dtrace_codingstruct.h" |
52 | | #include "CommonLib/dtrace_buffer.h" |
53 | | #include "CommonLib/CodingStructure.h" |
54 | | #include <string.h> |
55 | | #include <stdlib.h> |
56 | | #include <math.h> |
57 | | #include "vvenc/vvencCfg.h" |
58 | | |
59 | | //! \ingroup EncoderLib |
60 | | //! \{ |
61 | | |
62 | | namespace vvenc { |
63 | | |
64 | | |
65 | 92.9k | #define SAOCtx(c) SubCtx( Ctx::Sao, c ) |
66 | | |
67 | | |
68 | | //! rounding with IBDI |
69 | | inline double xRoundIbdi2(int bitDepth, double x) |
70 | 0 | { |
71 | 0 | return ((x) >= 0 ? ((int)((x) + 0.5)) : ((int)((x) -0.5))); |
72 | 0 | } |
73 | | |
74 | | inline double xRoundIbdi(int bitDepth, double x) |
75 | 14.7k | { |
76 | 14.7k | return (bitDepth > 8 ? xRoundIbdi2(bitDepth, (x)) : ((x)>=0 ? ((int)((x)+0.5)) : ((int)((x)-0.5)))) ; |
77 | 14.7k | } |
78 | | |
79 | | |
80 | | EncSampleAdaptiveOffset::EncSampleAdaptiveOffset() |
81 | 9.06k | : m_CABACEstimator( nullptr ) |
82 | 9.06k | , m_CtxCache ( nullptr ) |
83 | 9.06k | { |
84 | 9.06k | } |
85 | | |
86 | | EncSampleAdaptiveOffset::~EncSampleAdaptiveOffset() |
87 | 9.06k | { |
88 | 9.06k | } |
89 | | |
90 | | void EncSampleAdaptiveOffset::init( const VVEncCfg& encCfg ) |
91 | 9.06k | { |
92 | 9.06k | m_EncCfg = &encCfg; |
93 | | |
94 | 9.06k | if ( encCfg.m_bUseSAO ) |
95 | 9.06k | { |
96 | 9.06k | SampleAdaptiveOffset::init( encCfg.m_internChromaFormat, encCfg.m_CTUSize, encCfg.m_CTUSize, encCfg.m_log2SaoOffsetScale[CH_L], encCfg.m_log2SaoOffsetScale[CH_C] ); |
97 | 9.06k | } |
98 | 9.06k | } |
99 | | |
100 | | void EncSampleAdaptiveOffset::initSlice( const Slice* slice ) |
101 | 2.26k | { |
102 | 2.26k | memcpy( m_lambda, slice->getLambdas(), sizeof( m_lambda ) ); |
103 | 2.26k | } |
104 | | |
105 | | void EncSampleAdaptiveOffset::setCtuEncRsrc( CABACWriter* cabacEstimator, CtxCache* ctxCache ) |
106 | 4.05k | { |
107 | 4.05k | m_CABACEstimator = cabacEstimator; |
108 | 4.05k | m_CtxCache = ctxCache; |
109 | 4.05k | } |
110 | | |
111 | | void EncSampleAdaptiveOffset::disabledRate( CodingStructure& cs, double saoDisabledRate[ MAX_NUM_COMP ][ VVENC_MAX_TLAYER ], SAOBlkParam* reconParams, const double saoEncodingRate, const double saoEncodingRateChroma, const ChromaFormat& chromaFormat ) |
112 | 0 | { |
113 | 0 | if ( saoEncodingRate > 0.0 ) |
114 | 0 | { |
115 | 0 | const PreCalcValues& pcv = *cs.pcv; |
116 | 0 | const int numberOfComponents = getNumberValidComponents( chromaFormat ); |
117 | 0 | const int picTempLayer = cs.slice->TLayer; |
118 | 0 | int numCtusForSAOOff[MAX_NUM_COMP]; |
119 | |
|
120 | 0 | for (int compIdx = 0; compIdx < numberOfComponents; compIdx++) |
121 | 0 | { |
122 | 0 | numCtusForSAOOff[compIdx] = 0; |
123 | 0 | for( int ctuRsAddr=0; ctuRsAddr< pcv.sizeInCtus; ctuRsAddr++) |
124 | 0 | { |
125 | 0 | if( reconParams[ctuRsAddr][compIdx].modeIdc == SAO_MODE_OFF) |
126 | 0 | { |
127 | 0 | numCtusForSAOOff[compIdx]++; |
128 | 0 | } |
129 | 0 | } |
130 | 0 | } |
131 | 0 | if (saoEncodingRateChroma > 0.0) |
132 | 0 | { |
133 | 0 | for (int compIdx = 0; compIdx < numberOfComponents; compIdx++) |
134 | 0 | { |
135 | 0 | saoDisabledRate[compIdx][picTempLayer] = (double)numCtusForSAOOff[compIdx]/(double)pcv.sizeInCtus; |
136 | 0 | } |
137 | 0 | } |
138 | 0 | else if (picTempLayer == 0) |
139 | 0 | { |
140 | 0 | saoDisabledRate[COMP_Y][0] = (double)(numCtusForSAOOff[COMP_Y]+numCtusForSAOOff[COMP_Cb]+numCtusForSAOOff[COMP_Cr])/(double)(pcv.sizeInCtus *3); |
141 | 0 | } |
142 | 0 | } |
143 | 0 | } |
144 | | |
145 | | void EncSampleAdaptiveOffset::decidePicParams( const CodingStructure& cs, double saoDisabledRate[ MAX_NUM_COMP ][ VVENC_MAX_TLAYER ], bool saoEnabled[ MAX_NUM_COMP ], const double saoEncodingRate, const double saoEncodingRateChroma, const ChromaFormat& chromaFormat ) |
146 | 1.29k | { |
147 | 1.29k | const Slice& slice = *cs.slice; |
148 | 1.29k | const int numberOfComponents = getNumberValidComponents( chromaFormat ); |
149 | | |
150 | | // reset |
151 | 1.29k | if( slice.pendingRasInit ) |
152 | 0 | { |
153 | 0 | for( int compIdx = 0; compIdx < MAX_NUM_COMP; compIdx++ ) |
154 | 0 | { |
155 | 0 | for( int tempLayer = 1; tempLayer < VVENC_MAX_TLAYER; tempLayer++ ) |
156 | 0 | { |
157 | 0 | saoDisabledRate[ compIdx ][ tempLayer ] = 0.0; |
158 | 0 | } |
159 | 0 | } |
160 | 0 | } |
161 | | |
162 | 5.19k | for( int compIdx = 0; compIdx < MAX_NUM_COMP; compIdx++ ) |
163 | 3.89k | { |
164 | 3.89k | saoEnabled[ compIdx ] = false; |
165 | 3.89k | } |
166 | | |
167 | 1.29k | const int picTempLayer = slice.TLayer; |
168 | 5.19k | for( int compIdx = 0; compIdx < numberOfComponents; compIdx++ ) |
169 | 3.89k | { |
170 | | // enable per default |
171 | 3.89k | saoEnabled[ compIdx ] = true; |
172 | | |
173 | 3.89k | if( saoEncodingRate > 0.0 ) |
174 | 0 | { |
175 | 0 | if( saoEncodingRateChroma > 0.0 ) |
176 | 0 | { |
177 | | // decide slice-level on/off based on previous results |
178 | 0 | if( ( picTempLayer > 0 ) |
179 | 0 | && ( saoDisabledRate[ compIdx ][ picTempLayer - 1 ] > ( ( compIdx == COMP_Y ) ? saoEncodingRate : saoEncodingRateChroma ) ) ) |
180 | 0 | { |
181 | 0 | saoEnabled[ compIdx ] = false; |
182 | 0 | } |
183 | 0 | } |
184 | 0 | else |
185 | 0 | { |
186 | | // decide slice-level on/off based on previous results |
187 | 0 | if( ( picTempLayer > 0 ) |
188 | 0 | && ( saoDisabledRate[ COMP_Y ][ 0 ] > saoEncodingRate ) ) |
189 | 0 | { |
190 | 0 | saoEnabled[ compIdx ] = false; |
191 | 0 | } |
192 | 0 | } |
193 | 0 | } |
194 | 3.89k | } |
195 | 1.29k | } |
196 | | |
197 | | void EncSampleAdaptiveOffset::storeCtuReco( CodingStructure& cs, const UnitArea& ctuArea, const int ctuX, const int ctuY ) |
198 | 4.04k | { |
199 | 4.04k | const int STORE_CTU_INCREASE = 8; |
200 | 4.04k | Position lPos( ctuArea.lx() + STORE_CTU_INCREASE, ctuArea.ly() + STORE_CTU_INCREASE ); |
201 | 4.04k | Size lSize( ctuArea.lwidth(), ctuArea.lheight() ); |
202 | | |
203 | 4.04k | const bool tileBdryClip = cs.pps->getNumTiles() > 1 && !cs.pps->loopFilterAcrossTilesEnabled; |
204 | 4.04k | int startX = 0; |
205 | 4.04k | int startY = 0; |
206 | 4.04k | if( tileBdryClip ) |
207 | 0 | { |
208 | 0 | startX = cs.pps->tileColBd[cs.pps->ctuToTileCol[ctuX]] << cs.pcv->maxCUSizeLog2; |
209 | 0 | startY = cs.pps->tileRowBd[cs.pps->ctuToTileRow[ctuY]] << cs.pcv->maxCUSizeLog2; |
210 | 0 | } |
211 | | |
212 | 4.04k | if ( ctuArea.lx() == startX ) |
213 | 2.26k | { |
214 | 2.26k | lPos.x = ctuArea.lx(); |
215 | 2.26k | lSize.width += STORE_CTU_INCREASE; |
216 | 2.26k | } |
217 | 4.04k | if ( ctuArea.ly() == startY ) |
218 | 2.23k | { |
219 | 2.23k | lPos.y = ctuArea.ly(); |
220 | 2.23k | lSize.height += STORE_CTU_INCREASE; |
221 | 2.23k | } |
222 | | |
223 | 4.04k | int clipX = cs.pcv->lumaWidth - lPos.x; |
224 | 4.04k | int clipY = cs.pcv->lumaHeight - lPos.y; |
225 | 4.04k | if( tileBdryClip ) |
226 | 0 | { |
227 | 0 | clipX = cs.pps->tileColBdRgt[cs.pps->ctuToTileCol[ctuX]] - lPos.x; |
228 | 0 | clipY = cs.pps->tileRowBdBot[cs.pps->ctuToTileRow[ctuY]] - lPos.y; |
229 | 0 | } |
230 | 4.04k | lSize.clipSize( clipX, clipY ); |
231 | | |
232 | 4.04k | const UnitArea relocArea( ctuArea.chromaFormat, Area( lPos, lSize ) ); |
233 | 4.04k | Picture& pic = *cs.picture; |
234 | 4.04k | PelUnitBuf recoYuv = pic.getRecoBuf().subBuf( relocArea ); |
235 | 4.04k | PelUnitBuf tempYuv = pic.getSaoBuf().subBuf( relocArea ); |
236 | 4.04k | tempYuv.copyFrom( recoYuv ); |
237 | 4.04k | } |
238 | | |
239 | | void EncSampleAdaptiveOffset::getCtuStatistics( CodingStructure& cs, std::vector<SAOStatData**>& saoStatistics, const UnitArea& ctuArea, const int ctuRsAddr ) |
240 | 4.05k | { |
241 | 4.05k | const PreCalcValues& pcv = *cs.pcv; |
242 | 4.05k | const int numberOfComponents = getNumberValidComponents( pcv.chrFormat ); |
243 | 4.05k | bool isLeftAvail = false; |
244 | 4.05k | bool isRightAvail = false; |
245 | 4.05k | bool isAboveAvail = false; |
246 | 4.05k | bool isBelowAvail = false; |
247 | 4.05k | bool isAboveLeftAvail = false; |
248 | 4.05k | bool isAboveRightAvail = false; |
249 | | |
250 | 4.05k | deriveLoopFilterBoundaryAvailibility( cs, ctuArea.Y(), isLeftAvail, isAboveAvail, isAboveLeftAvail ); |
251 | | |
252 | | // NOTE: The number of skipped lines during gathering CTU statistics depends on the slice boundary availabilities. |
253 | | // For simplicity, here only picture boundaries are considered. |
254 | | |
255 | 4.05k | isRightAvail = ( ctuArea.Y().x + pcv.maxCUSize < pcv.lumaWidth ); |
256 | 4.05k | isBelowAvail = ( ctuArea.Y().y + pcv.maxCUSize < pcv.lumaHeight ); |
257 | 4.05k | isAboveRightAvail = ( ( ctuArea.Y().y > 0 ) && ( isRightAvail ) ); |
258 | | |
259 | 4.05k | CHECK( !cs.pps->loopFilterAcrossSlicesEnabled, "Not implemented" ); |
260 | 4.05k | if( cs.pps->getNumTiles() > 1 && !cs.pps->loopFilterAcrossTilesEnabled ) |
261 | 0 | { |
262 | 0 | const int ctuX = ctuArea.lx() >> cs.pcv->maxCUSizeLog2; |
263 | 0 | const int ctuY = ctuArea.ly() >> cs.pcv->maxCUSizeLog2; |
264 | 0 | isRightAvail = isRightAvail && cs.pps->canFilterCtuBdry( ctuX, ctuY, 1, 0 ); |
265 | 0 | isBelowAvail = isBelowAvail && cs.pps->canFilterCtuBdry( ctuX, ctuY, 0, 1 ); |
266 | 0 | isAboveRightAvail = isAboveRightAvail && cs.pps->canFilterCtuBdry( ctuX, ctuY, 1,-1 ); |
267 | 0 | } |
268 | | |
269 | | //VirtualBoundaries vb; |
270 | | //bool isCtuCrossedByVirtualBoundaries = vb.isCrossedByVirtualBoundaries(xPos, yPos, width, height, cs.slice->pps); |
271 | | |
272 | 16.2k | for( int compIdx = 0; compIdx < numberOfComponents; compIdx++ ) |
273 | 12.1k | { |
274 | 12.1k | const ComponentID compID = ComponentID( compIdx ); |
275 | 12.1k | const CompArea& compArea = ctuArea.block( compID ); |
276 | | |
277 | 12.1k | PelBuf srcBuf = cs.picture->getSaoBuf().get( compID ); |
278 | 12.1k | PelBuf orgBuf = cs.picture->getOrigBuf().get( compID ); |
279 | | |
280 | 12.1k | getBlkStats( compID, |
281 | 12.1k | cs.sps->bitDepths[ toChannelType( compID ) ], |
282 | 12.1k | saoStatistics[ ctuRsAddr ][ compID ], |
283 | 12.1k | srcBuf.bufAt( compArea ), |
284 | 12.1k | orgBuf.bufAt( compArea ), |
285 | 12.1k | srcBuf.stride, |
286 | 12.1k | orgBuf.stride, |
287 | 12.1k | compArea.width, |
288 | 12.1k | compArea.height, |
289 | 12.1k | isLeftAvail, isRightAvail, isAboveAvail, isBelowAvail, isAboveLeftAvail, isAboveRightAvail |
290 | 12.1k | ); |
291 | 12.1k | } |
292 | 4.05k | } |
293 | | |
294 | | void EncSampleAdaptiveOffset::getStatistics(std::vector<SAOStatData**>& blkStats, PelUnitBuf& orgYuv, PelUnitBuf& srcYuv, CodingStructure& cs ) |
295 | 0 | { |
296 | 0 | bool isLeftAvail, isRightAvail, isAboveAvail, isBelowAvail, isAboveLeftAvail, isAboveRightAvail; |
297 | |
|
298 | 0 | const PreCalcValues& pcv = *cs.pcv; |
299 | 0 | const int numberOfComponents = getNumberValidComponents(pcv.chrFormat); |
300 | |
|
301 | 0 | size_t lineBufferSize = pcv.maxCUSize + 1; |
302 | 0 | if (m_signLineBuf1.size() != lineBufferSize) |
303 | 0 | { |
304 | 0 | m_signLineBuf1.resize(lineBufferSize); |
305 | 0 | m_signLineBuf2.resize(lineBufferSize); |
306 | 0 | } |
307 | |
|
308 | 0 | int ctuRsAddr = 0; |
309 | 0 | for( uint32_t yPos = 0; yPos < pcv.lumaHeight; yPos += pcv.maxCUSize ) |
310 | 0 | { |
311 | 0 | for( uint32_t xPos = 0; xPos < pcv.lumaWidth; xPos += pcv.maxCUSize ) |
312 | 0 | { |
313 | 0 | const uint32_t width = (xPos + pcv.maxCUSize > pcv.lumaWidth) ? (pcv.lumaWidth - xPos) : pcv.maxCUSize; |
314 | 0 | const uint32_t height = (yPos + pcv.maxCUSize > pcv.lumaHeight) ? (pcv.lumaHeight - yPos) : pcv.maxCUSize; |
315 | 0 | const UnitArea area( cs.area.chromaFormat, Area(xPos , yPos, width, height) ); |
316 | |
|
317 | 0 | deriveLoopFilterBoundaryAvailibility(cs, area.Y(), isLeftAvail, isAboveAvail, isAboveLeftAvail ); |
318 | | |
319 | | //NOTE: The number of skipped lines during gathering CTU statistics depends on the slice boundary availabilities. |
320 | | //For simplicity, here only picture boundaries are considered. |
321 | |
|
322 | 0 | isRightAvail = (xPos + pcv.maxCUSize < pcv.lumaWidth ); |
323 | 0 | isBelowAvail = (yPos + pcv.maxCUSize < pcv.lumaHeight); |
324 | 0 | isAboveRightAvail = ((yPos > 0) && (isRightAvail)); |
325 | |
|
326 | 0 | for(int compIdx = 0; compIdx < numberOfComponents; compIdx++) |
327 | 0 | { |
328 | 0 | const ComponentID compID = ComponentID(compIdx); |
329 | 0 | const CompArea& compArea = area.block( compID ); |
330 | |
|
331 | 0 | int srcStride = srcYuv.get(compID).stride; |
332 | 0 | Pel* srcBlk = srcYuv.get(compID).bufAt( compArea ); |
333 | |
|
334 | 0 | int orgStride = orgYuv.get(compID).stride; |
335 | 0 | Pel* orgBlk = orgYuv.get(compID).bufAt( compArea ); |
336 | |
|
337 | 0 | getBlkStats(compID, cs.sps->bitDepths[toChannelType(compID)], blkStats[ctuRsAddr][compID] |
338 | 0 | , srcBlk, orgBlk, srcStride, orgStride, compArea.width, compArea.height |
339 | 0 | , isLeftAvail, isRightAvail, isAboveAvail, isBelowAvail, isAboveLeftAvail, isAboveRightAvail ); |
340 | 0 | } |
341 | 0 | ctuRsAddr++; |
342 | 0 | } |
343 | 0 | } |
344 | 0 | } |
345 | | |
346 | | void EncSampleAdaptiveOffset::decideCtuParams( CodingStructure& cs, const std::vector<SAOStatData**>& saoStatistics, const bool saoEnabled[ MAX_NUM_COMP ], const bool allBlksDisabled, const UnitArea& ctuArea, const int ctuRsAddr, SAOBlkParam* reconParams, SAOBlkParam* codedParams ) |
347 | 4.05k | { |
348 | 4.05k | const PreCalcValues& pcv = *cs.pcv; |
349 | 4.05k | const Slice& slice = *cs.slice; |
350 | 4.05k | const int ctuPosX = ctuRsAddr % pcv.widthInCtus; |
351 | 4.05k | const int ctuPosY = ctuRsAddr / pcv.widthInCtus; |
352 | | |
353 | | // reset CABAC estimator |
354 | 4.05k | if( m_EncCfg->m_ensureWppBitEqual |
355 | 4.05k | && m_EncCfg->m_numThreads < 1 |
356 | 0 | && ctuPosX == 0 |
357 | 0 | && ctuPosY > 0 ) |
358 | 0 | { |
359 | 0 | m_CABACEstimator->initCtxModels( slice ); |
360 | 0 | } |
361 | | |
362 | | // check disabled |
363 | 4.05k | if( allBlksDisabled ) |
364 | 0 | { |
365 | 0 | codedParams[ ctuRsAddr ].reset(); |
366 | 0 | return; |
367 | 0 | } |
368 | | |
369 | | // get merge list |
370 | 4.05k | SAOBlkParam* mergeList[ NUM_SAO_MERGE_TYPES ] = { NULL }; |
371 | 4.05k | getMergeList( cs, ctuRsAddr, reconParams, mergeList ); |
372 | | |
373 | 4.05k | const TempCtx ctxStart( m_CtxCache, SAOCtx( m_CABACEstimator->getCtx() ) ); |
374 | 4.05k | TempCtx ctxBest ( m_CtxCache ); |
375 | | |
376 | 4.05k | SAOBlkParam modeParam; |
377 | 4.05k | double minCost = MAX_DOUBLE; |
378 | 4.05k | double modeCost = MAX_DOUBLE; |
379 | 12.1k | for( int mode = 1; mode < NUM_SAO_MODES; mode++ ) |
380 | 8.10k | { |
381 | 8.10k | if( mode > 1 ) |
382 | 4.05k | { |
383 | 4.05k | m_CABACEstimator->getCtx() = SAOCtx( ctxStart ); |
384 | 4.05k | } |
385 | 8.10k | switch( mode ) |
386 | 8.10k | { |
387 | 4.05k | case SAO_MODE_NEW: |
388 | 4.05k | { |
389 | 4.05k | deriveModeNewRDO( cs.sps->bitDepths, ctuRsAddr, mergeList, saoEnabled, saoStatistics, modeParam, modeCost ); |
390 | 4.05k | } |
391 | 4.05k | break; |
392 | 4.05k | case SAO_MODE_MERGE: |
393 | 4.05k | { |
394 | 4.05k | deriveModeMergeRDO( cs.sps->bitDepths, ctuRsAddr, mergeList, saoEnabled, saoStatistics, modeParam, modeCost ); |
395 | 4.05k | } |
396 | 4.05k | break; |
397 | 0 | default: |
398 | 0 | { |
399 | 0 | THROW( "Not a supported SAO mode." ); |
400 | 0 | } |
401 | 8.10k | } |
402 | | |
403 | 8.10k | if( modeCost < minCost ) |
404 | 6.80k | { |
405 | 6.80k | minCost = modeCost; |
406 | 6.80k | codedParams[ ctuRsAddr ] = modeParam; |
407 | 6.80k | ctxBest = SAOCtx( m_CABACEstimator->getCtx() ); |
408 | 6.80k | } |
409 | 8.10k | } |
410 | | |
411 | | // apply reconstructed offsets |
412 | 4.05k | m_CABACEstimator->getCtx() = SAOCtx( ctxBest ); |
413 | 4.05k | reconParams[ ctuRsAddr ] = codedParams[ ctuRsAddr ]; |
414 | | |
415 | 4.05k | reconstructBlkSAOParam( reconParams[ ctuRsAddr ], mergeList ); |
416 | | |
417 | 4.05k | Picture& pic = *cs.picture; |
418 | 4.05k | offsetCTU( ctuArea, pic.getSaoBuf(), cs.getRecoBuf(), reconParams[ ctuRsAddr ], cs ); |
419 | 4.05k | } |
420 | | |
421 | | int64_t EncSampleAdaptiveOffset::getDistortion(const int channelBitDepth, int typeIdc, int typeAuxInfo, int* invQuantOffset, SAOStatData& statData) |
422 | 60.7k | { |
423 | 60.7k | int64_t dist = 0; |
424 | 60.7k | int shift = 2 * DISTORTION_PRECISION_ADJUSTMENT(channelBitDepth); |
425 | | |
426 | 60.7k | switch(typeIdc) |
427 | 60.7k | { |
428 | 12.1k | case SAO_TYPE_EO_0: |
429 | 24.3k | case SAO_TYPE_EO_90: |
430 | 36.4k | case SAO_TYPE_EO_135: |
431 | 48.6k | case SAO_TYPE_EO_45: |
432 | 48.6k | { |
433 | 291k | for (int offsetIdx=0; offsetIdx<NUM_SAO_EO_CLASSES; offsetIdx++) |
434 | 243k | { |
435 | 243k | dist += estSaoDist( statData.count[offsetIdx], invQuantOffset[offsetIdx], statData.diff[offsetIdx], shift); |
436 | 243k | } |
437 | 48.6k | } |
438 | 48.6k | break; |
439 | 12.1k | case SAO_TYPE_BO: |
440 | 12.1k | { |
441 | 60.8k | for (int offsetIdx=typeAuxInfo; offsetIdx<typeAuxInfo+4; offsetIdx++) |
442 | 48.6k | { |
443 | 48.6k | int bandIdx = offsetIdx % NUM_SAO_BO_CLASSES ; |
444 | 48.6k | dist += estSaoDist( statData.count[bandIdx], invQuantOffset[bandIdx], statData.diff[bandIdx], shift); |
445 | 48.6k | } |
446 | 12.1k | } |
447 | 12.1k | break; |
448 | 0 | default: |
449 | 0 | { |
450 | 0 | THROW("Not a supported type"); |
451 | 36.4k | } |
452 | 60.7k | } |
453 | | |
454 | 60.7k | return dist; |
455 | 60.7k | } |
456 | | |
457 | | inline int64_t EncSampleAdaptiveOffset::estSaoDist(int64_t count, int64_t offset, int64_t diffSum, int shift) |
458 | 293k | { |
459 | 293k | return (( count*offset*offset-diffSum*offset*2 ) >> shift); |
460 | 293k | } |
461 | | |
462 | | |
463 | | inline int EncSampleAdaptiveOffset::estIterOffset(int typeIdx, double lambda, int offsetInput, int64_t count, int64_t diffSum, int shift, int bitIncrease, int64_t& bestDist, double& bestCost, int offsetTh ) |
464 | 1.55k | { |
465 | 1.55k | int iterOffset, tempOffset; |
466 | 1.55k | int64_t tempDist, tempRate; |
467 | 1.55k | double tempCost, tempMinCost; |
468 | 1.55k | int offsetOutput = 0; |
469 | 1.55k | iterOffset = offsetInput; |
470 | | // Assuming sending quantized value 0 results in zero offset and sending the value zero needs 1 bit. entropy coder can be used to measure the exact rate here. |
471 | 1.55k | tempMinCost = lambda; |
472 | 3.12k | while (iterOffset != 0) |
473 | 1.56k | { |
474 | | // Calculate the bits required for signaling the offset |
475 | 1.56k | tempRate = (typeIdx == SAO_TYPE_BO) ? (abs((int)iterOffset)+2) : (abs((int)iterOffset)+1); |
476 | 1.56k | if (abs((int)iterOffset)==offsetTh) //inclusive |
477 | 0 | { |
478 | 0 | tempRate --; |
479 | 0 | } |
480 | | // Do the dequantization before distortion calculation |
481 | 1.56k | tempOffset = iterOffset * (1<< bitIncrease); |
482 | 1.56k | tempDist = estSaoDist( count, tempOffset, diffSum, shift); |
483 | 1.56k | tempCost = ((double)tempDist + lambda * (double) tempRate); |
484 | 1.56k | if(tempCost < tempMinCost) |
485 | 330 | { |
486 | 330 | tempMinCost = tempCost; |
487 | 330 | offsetOutput = iterOffset; |
488 | 330 | bestDist = tempDist; |
489 | 330 | bestCost = tempCost; |
490 | 330 | } |
491 | 1.56k | iterOffset = (iterOffset > 0) ? (iterOffset-1):(iterOffset+1); |
492 | 1.56k | } |
493 | 1.55k | return offsetOutput; |
494 | 1.55k | } |
495 | | |
496 | | void EncSampleAdaptiveOffset::deriveOffsets(ComponentID compIdx, const int channelBitDepth, int typeIdc, SAOStatData& statData, int* quantOffsets, int& typeAuxInfo) |
497 | 60.7k | { |
498 | 60.7k | int bitDepth = channelBitDepth; |
499 | 60.7k | int shift = 2 * DISTORTION_PRECISION_ADJUSTMENT(bitDepth); |
500 | 60.7k | int offsetTh = SampleAdaptiveOffset::getMaxOffsetQVal(channelBitDepth); //inclusive |
501 | | |
502 | 60.7k | ::memset(quantOffsets, 0, sizeof(int)*MAX_NUM_SAO_CLASSES); |
503 | | |
504 | | //derive initial offsets |
505 | 60.7k | int numClasses = (typeIdc == SAO_TYPE_BO)?((int)NUM_SAO_BO_CLASSES):((int)NUM_SAO_EO_CLASSES); |
506 | 692k | for(int classIdx=0; classIdx< numClasses; classIdx++) |
507 | 631k | { |
508 | 631k | if( (typeIdc != SAO_TYPE_BO) && (classIdx==SAO_CLASS_EO_PLAIN) ) |
509 | 48.6k | { |
510 | 48.6k | continue; //offset will be zero |
511 | 48.6k | } |
512 | | |
513 | 583k | if(statData.count[classIdx] == 0) |
514 | 568k | { |
515 | 568k | continue; //offset will be zero |
516 | 568k | } |
517 | 14.7k | #if ( DISTORTION_PRECISION_ADJUSTMENT(x) == 0 ) |
518 | 14.7k | quantOffsets[classIdx] = |
519 | 14.7k | (int) xRoundIbdi(bitDepth, (double)(statData.diff[classIdx] ) / (double)(statData.count[classIdx] << m_offsetStepLog2[compIdx])); |
520 | 14.7k | quantOffsets[classIdx] = Clip3(-offsetTh, offsetTh, quantOffsets[classIdx]); |
521 | | #else |
522 | | quantOffsets[classIdx] = |
523 | | (int) xRoundIbdi(bitDepth, (double)(statData.diff[classIdx] << DISTORTION_PRECISION_ADJUSTMENT(bitDepth)) |
524 | | / (double)(statData.count[classIdx] << m_offsetStepLog2[compIdx])); |
525 | | quantOffsets[classIdx] = Clip3(-offsetTh, offsetTh, quantOffsets[classIdx]); |
526 | | #endif |
527 | 14.7k | } |
528 | | |
529 | | // adjust offsets |
530 | 60.7k | switch(typeIdc) |
531 | 60.7k | { |
532 | 12.1k | case SAO_TYPE_EO_0: |
533 | 24.3k | case SAO_TYPE_EO_90: |
534 | 36.4k | case SAO_TYPE_EO_135: |
535 | 48.6k | case SAO_TYPE_EO_45: |
536 | 48.6k | { |
537 | 48.6k | int64_t classDist; |
538 | 48.6k | double classCost; |
539 | 291k | for(int classIdx=0; classIdx<NUM_SAO_EO_CLASSES; classIdx++) |
540 | 243k | { |
541 | 243k | if(classIdx==SAO_CLASS_EO_FULL_VALLEY && quantOffsets[classIdx] < 0) |
542 | 0 | { |
543 | 0 | quantOffsets[classIdx] =0; |
544 | 0 | } |
545 | 243k | if(classIdx==SAO_CLASS_EO_HALF_VALLEY && quantOffsets[classIdx] < 0) |
546 | 0 | { |
547 | 0 | quantOffsets[classIdx] =0; |
548 | 0 | } |
549 | 243k | if(classIdx==SAO_CLASS_EO_HALF_PEAK && quantOffsets[classIdx] > 0) |
550 | 0 | { |
551 | 0 | quantOffsets[classIdx] =0; |
552 | 0 | } |
553 | 243k | if(classIdx==SAO_CLASS_EO_FULL_PEAK && quantOffsets[classIdx] > 0) |
554 | 0 | { |
555 | 0 | quantOffsets[classIdx] =0; |
556 | 0 | } |
557 | | |
558 | 243k | if( quantOffsets[classIdx] != 0 ) //iterative adjustment only when derived offset is not zero |
559 | 1.19k | { |
560 | 1.19k | quantOffsets[classIdx] = estIterOffset( typeIdc, m_lambda[compIdx], quantOffsets[classIdx], statData.count[classIdx], statData.diff[classIdx], shift, m_offsetStepLog2[compIdx], classDist , classCost , offsetTh ); |
561 | 1.19k | } |
562 | 243k | } |
563 | | |
564 | 48.6k | typeAuxInfo =0; |
565 | 48.6k | } |
566 | 48.6k | break; |
567 | 12.1k | case SAO_TYPE_BO: |
568 | 12.1k | { |
569 | 12.1k | int64_t distBOClasses[NUM_SAO_BO_CLASSES]; |
570 | 12.1k | double costBOClasses[NUM_SAO_BO_CLASSES]; |
571 | 12.1k | ::memset(distBOClasses, 0, sizeof(int64_t)*NUM_SAO_BO_CLASSES); |
572 | 401k | for(int classIdx=0; classIdx< NUM_SAO_BO_CLASSES; classIdx++) |
573 | 388k | { |
574 | 388k | costBOClasses[classIdx]= m_lambda[compIdx]; |
575 | 388k | if( quantOffsets[classIdx] != 0 ) //iterative adjustment only when derived offset is not zero |
576 | 360 | { |
577 | 360 | quantOffsets[classIdx] = estIterOffset( typeIdc, m_lambda[compIdx], quantOffsets[classIdx], statData.count[classIdx], statData.diff[classIdx], shift, m_offsetStepLog2[compIdx], distBOClasses[classIdx], costBOClasses[classIdx], offsetTh ); |
578 | 360 | } |
579 | 388k | } |
580 | | |
581 | | //decide the starting band index |
582 | 12.1k | double minCost = MAX_DOUBLE, cost; |
583 | 401k | for(int band=0; band< NUM_SAO_BO_CLASSES; band++) |
584 | 388k | { |
585 | 388k | cost = costBOClasses[(band )%NUM_SAO_BO_CLASSES]; |
586 | 388k | cost += costBOClasses[(band+1)%NUM_SAO_BO_CLASSES]; |
587 | 388k | cost += costBOClasses[(band+2)%NUM_SAO_BO_CLASSES]; |
588 | 388k | cost += costBOClasses[(band+3)%NUM_SAO_BO_CLASSES]; |
589 | | |
590 | 388k | if(cost < minCost) |
591 | 12.2k | { |
592 | 12.2k | minCost = cost; |
593 | 12.2k | typeAuxInfo = band; |
594 | 12.2k | } |
595 | 388k | } |
596 | | //clear those unused classes |
597 | 12.1k | int clearQuantOffset[NUM_SAO_BO_CLASSES]; |
598 | 12.1k | ::memset(clearQuantOffset, 0, sizeof(int)*NUM_SAO_BO_CLASSES); |
599 | 60.7k | for(int i=0; i< 4; i++) |
600 | 48.6k | { |
601 | 48.6k | int band = (typeAuxInfo+i)%NUM_SAO_BO_CLASSES; |
602 | 48.6k | clearQuantOffset[band] = quantOffsets[band]; |
603 | 48.6k | } |
604 | 12.1k | ::memcpy(quantOffsets, clearQuantOffset, sizeof(int)*NUM_SAO_BO_CLASSES); |
605 | 12.1k | } |
606 | 12.1k | break; |
607 | 0 | default: |
608 | 0 | { |
609 | 0 | THROW("Not a supported type"); |
610 | 36.4k | } |
611 | 60.7k | } |
612 | 60.7k | } |
613 | | |
614 | | void EncSampleAdaptiveOffset::deriveModeNewRDO(const BitDepths &bitDepths, int ctuRsAddr, SAOBlkParam* mergeList[NUM_SAO_MERGE_TYPES], const bool* sliceEnabled, const std::vector<SAOStatData**>& blkStats, SAOBlkParam& modeParam, double& modeNormCost ) |
615 | 4.05k | { |
616 | 4.05k | double minCost, cost; |
617 | 4.05k | uint64_t previousFracBits; |
618 | 4.05k | const int numberOfComponents = m_numberOfComponents; |
619 | | |
620 | 4.05k | int64_t dist[MAX_NUM_COMP], modeDist[MAX_NUM_COMP]; |
621 | 4.05k | SAOOffset testOffset[MAX_NUM_COMP]; |
622 | 4.05k | int invQuantOffset[MAX_NUM_SAO_CLASSES]; |
623 | 16.2k | for(int comp=0; comp < MAX_NUM_COMP; comp++) |
624 | 12.1k | { |
625 | 12.1k | modeDist[comp] = 0; |
626 | 12.1k | } |
627 | | |
628 | | //pre-encode merge flags |
629 | 4.05k | modeParam[COMP_Y].modeIdc = SAO_MODE_OFF; |
630 | 4.05k | const TempCtx ctxStartBlk ( m_CtxCache, SAOCtx( m_CABACEstimator->getCtx() ) ); |
631 | 4.05k | m_CABACEstimator->sao_block_pars( modeParam, bitDepths, sliceEnabled, (mergeList[SAO_MERGE_LEFT]!= NULL), (mergeList[SAO_MERGE_ABOVE]!= NULL), true ); |
632 | 4.05k | const TempCtx ctxStartLuma ( m_CtxCache, SAOCtx( m_CABACEstimator->getCtx() ) ); |
633 | 4.05k | TempCtx ctxBestLuma ( m_CtxCache ); |
634 | | |
635 | | //------ luma --------// |
636 | 4.05k | { |
637 | 4.05k | const ComponentID compIdx = COMP_Y; |
638 | | //"off" case as initial cost |
639 | 4.05k | modeParam[compIdx].modeIdc = SAO_MODE_OFF; |
640 | 4.05k | m_CABACEstimator->resetBits(); |
641 | 4.05k | m_CABACEstimator->sao_offset_pars( modeParam[compIdx], compIdx, sliceEnabled[compIdx], bitDepths[CH_L] ); |
642 | 4.05k | modeDist[compIdx] = 0; |
643 | 4.05k | minCost = m_lambda[compIdx] * (FRAC_BITS_SCALE * m_CABACEstimator->getEstFracBits()); |
644 | 4.05k | ctxBestLuma = SAOCtx( m_CABACEstimator->getCtx() ); |
645 | 4.05k | if(sliceEnabled[compIdx]) |
646 | 4.05k | { |
647 | 24.3k | for(int typeIdc=0; typeIdc< NUM_SAO_NEW_TYPES; typeIdc++) |
648 | 20.2k | { |
649 | 20.2k | testOffset[compIdx].modeIdc = SAO_MODE_NEW; |
650 | 20.2k | testOffset[compIdx].typeIdc = typeIdc; |
651 | | |
652 | | //derive coded offset |
653 | 20.2k | deriveOffsets(compIdx, bitDepths[CH_L], typeIdc, blkStats[ctuRsAddr][compIdx][typeIdc], testOffset[compIdx].offset, testOffset[compIdx].typeAuxInfo); |
654 | | |
655 | | //inversed quantized offsets |
656 | 20.2k | invertQuantOffsets(compIdx, typeIdc, testOffset[compIdx].typeAuxInfo, invQuantOffset, testOffset[compIdx].offset); |
657 | | |
658 | | //get distortion |
659 | 20.2k | dist[compIdx] = getDistortion(bitDepths[CH_L], testOffset[compIdx].typeIdc, testOffset[compIdx].typeAuxInfo, invQuantOffset, blkStats[ctuRsAddr][compIdx][typeIdc]); |
660 | | |
661 | | //get rate |
662 | 20.2k | m_CABACEstimator->getCtx() = SAOCtx( ctxStartLuma ); |
663 | 20.2k | m_CABACEstimator->resetBits(); |
664 | 20.2k | m_CABACEstimator->sao_offset_pars( testOffset[compIdx], compIdx, sliceEnabled[compIdx], bitDepths[CH_L] ); |
665 | 20.2k | double rate = FRAC_BITS_SCALE * m_CABACEstimator->getEstFracBits(); |
666 | 20.2k | cost = (double)dist[compIdx] + m_lambda[compIdx]*rate; |
667 | 20.2k | if(cost < minCost) |
668 | 29 | { |
669 | 29 | minCost = cost; |
670 | 29 | modeDist[compIdx] = dist[compIdx]; |
671 | 29 | modeParam[compIdx]= testOffset[compIdx]; |
672 | 29 | ctxBestLuma = SAOCtx( m_CABACEstimator->getCtx() ); |
673 | 29 | } |
674 | 20.2k | } |
675 | 4.05k | } |
676 | 4.05k | m_CABACEstimator->getCtx() = SAOCtx( ctxBestLuma ); |
677 | 4.05k | } |
678 | | |
679 | | //------ chroma --------// |
680 | | //"off" case as initial cost |
681 | 4.05k | cost = 0; |
682 | 4.05k | previousFracBits = 0; |
683 | 4.05k | m_CABACEstimator->resetBits(); |
684 | 12.1k | for(uint32_t componentIndex = COMP_Cb; componentIndex < numberOfComponents; componentIndex++) |
685 | 8.10k | { |
686 | 8.10k | const ComponentID component = ComponentID(componentIndex); |
687 | | |
688 | 8.10k | modeParam[component].modeIdc = SAO_MODE_OFF; |
689 | 8.10k | modeDist [component] = 0; |
690 | 8.10k | m_CABACEstimator->sao_offset_pars( modeParam[component], component, sliceEnabled[component], bitDepths[CH_C] ); |
691 | 8.10k | const uint64_t currentFracBits = m_CABACEstimator->getEstFracBits(); |
692 | 8.10k | cost += m_lambda[component] * FRAC_BITS_SCALE * (currentFracBits - previousFracBits); |
693 | 8.10k | previousFracBits = currentFracBits; |
694 | 8.10k | } |
695 | | |
696 | 4.05k | minCost = cost; |
697 | | |
698 | | //doesn't need to store cabac status here since the whole CTU parameters will be re-encoded at the end of this function |
699 | | |
700 | 24.3k | for(int typeIdc=0; typeIdc< NUM_SAO_NEW_TYPES; typeIdc++) |
701 | 20.2k | { |
702 | 20.2k | m_CABACEstimator->getCtx() = SAOCtx( ctxBestLuma ); |
703 | 20.2k | m_CABACEstimator->resetBits(); |
704 | 20.2k | previousFracBits = 0; |
705 | 20.2k | cost = 0; |
706 | | |
707 | 60.7k | for(uint32_t componentIndex = COMP_Cb; componentIndex < numberOfComponents; componentIndex++) |
708 | 40.5k | { |
709 | 40.5k | const ComponentID component = ComponentID(componentIndex); |
710 | 40.5k | if(!sliceEnabled[component]) |
711 | 0 | { |
712 | 0 | testOffset[component].modeIdc = SAO_MODE_OFF; |
713 | 0 | dist[component]= 0; |
714 | 0 | continue; |
715 | 0 | } |
716 | 40.5k | testOffset[component].modeIdc = SAO_MODE_NEW; |
717 | 40.5k | testOffset[component].typeIdc = typeIdc; |
718 | | |
719 | | //derive offset & get distortion |
720 | 40.5k | deriveOffsets(component, bitDepths[CH_C], typeIdc, blkStats[ctuRsAddr][component][typeIdc], testOffset[component].offset, testOffset[component].typeAuxInfo); |
721 | 40.5k | invertQuantOffsets(component, typeIdc, testOffset[component].typeAuxInfo, invQuantOffset, testOffset[component].offset); |
722 | 40.5k | dist[component] = getDistortion(bitDepths[CH_C], typeIdc, testOffset[component].typeAuxInfo, invQuantOffset, blkStats[ctuRsAddr][component][typeIdc]); |
723 | 40.5k | m_CABACEstimator->sao_offset_pars( testOffset[component], component, sliceEnabled[component], bitDepths[CH_C] ); |
724 | 40.5k | const uint64_t currentFracBits = m_CABACEstimator->getEstFracBits(); |
725 | 40.5k | cost += dist[component] + (m_lambda[component] * FRAC_BITS_SCALE * (currentFracBits - previousFracBits)); |
726 | 40.5k | previousFracBits = currentFracBits; |
727 | 40.5k | } |
728 | | |
729 | 20.2k | if(cost < minCost) |
730 | 1 | { |
731 | 1 | minCost = cost; |
732 | 3 | for(uint32_t componentIndex = COMP_Cb; componentIndex < numberOfComponents; componentIndex++) |
733 | 2 | { |
734 | 2 | modeDist[componentIndex] = dist[componentIndex]; |
735 | 2 | modeParam[componentIndex] = testOffset[componentIndex]; |
736 | 2 | } |
737 | 1 | } |
738 | | |
739 | 20.2k | } // SAO_TYPE loop |
740 | | |
741 | | //----- re-gen rate & normalized cost----// |
742 | 4.05k | modeNormCost = 0; |
743 | 16.2k | for(uint32_t componentIndex = COMP_Y; componentIndex < numberOfComponents; componentIndex++) |
744 | 12.1k | { |
745 | 12.1k | modeNormCost += (double)modeDist[componentIndex] / m_lambda[componentIndex]; |
746 | 12.1k | } |
747 | | |
748 | 4.05k | m_CABACEstimator->getCtx() = SAOCtx( ctxStartBlk ); |
749 | 4.05k | m_CABACEstimator->resetBits(); |
750 | 4.05k | m_CABACEstimator->sao_block_pars( modeParam, bitDepths, sliceEnabled, (mergeList[SAO_MERGE_LEFT]!= NULL), (mergeList[SAO_MERGE_ABOVE]!= NULL), false ); |
751 | 4.05k | modeNormCost += FRAC_BITS_SCALE * m_CABACEstimator->getEstFracBits(); |
752 | 4.05k | } |
753 | | |
754 | | void EncSampleAdaptiveOffset::deriveModeMergeRDO(const BitDepths &bitDepths, int ctuRsAddr, SAOBlkParam* mergeList[NUM_SAO_MERGE_TYPES], const bool* sliceEnabled, const std::vector<SAOStatData**>& blkStats, SAOBlkParam& modeParam, double& modeNormCost ) |
755 | 4.05k | { |
756 | 4.05k | modeNormCost = MAX_DOUBLE; |
757 | | |
758 | 4.05k | double cost; |
759 | 4.05k | SAOBlkParam testBlkParam; |
760 | 4.05k | const int numberOfComponents = m_numberOfComponents; |
761 | | |
762 | 4.05k | const TempCtx ctxStart ( m_CtxCache, SAOCtx( m_CABACEstimator->getCtx() ) ); |
763 | 4.05k | TempCtx ctxBest ( m_CtxCache ); |
764 | | |
765 | 12.1k | for(int mergeType=0; mergeType< NUM_SAO_MERGE_TYPES; mergeType++) |
766 | 8.10k | { |
767 | 8.10k | if(mergeList[mergeType] == NULL) |
768 | 4.50k | { |
769 | 4.50k | continue; |
770 | 4.50k | } |
771 | | |
772 | 3.60k | testBlkParam = *(mergeList[mergeType]); |
773 | | //normalized distortion |
774 | 3.60k | double normDist=0; |
775 | 14.4k | for(int compIdx = 0; compIdx < numberOfComponents; compIdx++) |
776 | 10.8k | { |
777 | 10.8k | testBlkParam[compIdx].modeIdc = SAO_MODE_MERGE; |
778 | 10.8k | testBlkParam[compIdx].typeIdc = mergeType; |
779 | | |
780 | 10.8k | SAOOffset& mergedOffsetParam = (*(mergeList[mergeType]))[compIdx]; |
781 | | |
782 | 10.8k | if( mergedOffsetParam.modeIdc != SAO_MODE_OFF) |
783 | 14 | { |
784 | | //offsets have been reconstructed. Don't call inversed quantization function. |
785 | 14 | normDist += (((double)getDistortion(bitDepths[toChannelType(ComponentID(compIdx))], mergedOffsetParam.typeIdc, mergedOffsetParam.typeAuxInfo, mergedOffsetParam.offset, blkStats[ctuRsAddr][compIdx][mergedOffsetParam.typeIdc])) |
786 | 14 | /m_lambda[compIdx] ); |
787 | 14 | } |
788 | 10.8k | } |
789 | | |
790 | | //rate |
791 | 3.60k | m_CABACEstimator->getCtx() = SAOCtx( ctxStart ); |
792 | 3.60k | m_CABACEstimator->resetBits(); |
793 | 3.60k | m_CABACEstimator->sao_block_pars( testBlkParam, bitDepths, sliceEnabled, (mergeList[SAO_MERGE_LEFT]!= NULL), (mergeList[SAO_MERGE_ABOVE]!= NULL), false ); |
794 | 3.60k | double rate = FRAC_BITS_SCALE * m_CABACEstimator->getEstFracBits(); |
795 | 3.60k | cost = normDist+rate; |
796 | | |
797 | 3.60k | if(cost < modeNormCost) |
798 | 2.75k | { |
799 | 2.75k | modeNormCost = cost; |
800 | 2.75k | modeParam = testBlkParam; |
801 | 2.75k | ctxBest = SAOCtx( m_CABACEstimator->getCtx() ); |
802 | 2.75k | } |
803 | 3.60k | } |
804 | 4.05k | if( modeNormCost < MAX_DOUBLE ) |
805 | 2.75k | { |
806 | 2.75k | m_CABACEstimator->getCtx() = SAOCtx( ctxBest ); |
807 | 2.75k | } |
808 | 4.05k | } |
809 | | |
810 | | void EncSampleAdaptiveOffset::getBlkStats(const ComponentID compIdx, const int channelBitDepth, SAOStatData* statsDataTypes |
811 | | , Pel* srcBlk, Pel* orgBlk, int srcStride, int orgStride, int width, int height |
812 | | , bool isLeftAvail, bool isRightAvail, bool isAboveAvail, bool isBelowAvail, bool isAboveLeftAvail, bool isAboveRightAvail ) |
813 | 12.1k | { |
814 | 12.1k | int x, startX, startY, endX, endY, edgeType, firstLineStartX, firstLineEndX; |
815 | 12.1k | int64_t *diff, *count; |
816 | 12.1k | Pel* srcLine, *orgLine; |
817 | 12.1k | const int skipLinesR = compIdx == COMP_Y ? 5 : 3; |
818 | 12.1k | const int skipLinesB = compIdx == COMP_Y ? 4 : 2; |
819 | | |
820 | 72.9k | for(int typeIdx=0; typeIdx< NUM_SAO_NEW_TYPES; typeIdx++) |
821 | 60.7k | { |
822 | 60.7k | SAOStatData& statsData= statsDataTypes[typeIdx]; |
823 | 60.7k | statsData.reset(); |
824 | 60.7k | srcLine = srcBlk; |
825 | 60.7k | orgLine = orgBlk; |
826 | 60.7k | diff = statsData.diff; |
827 | 60.7k | count = statsData.count; |
828 | 60.7k | switch(typeIdx) |
829 | 60.7k | { |
830 | 12.1k | case SAO_TYPE_EO_0: |
831 | 12.1k | { |
832 | 12.1k | endY = isBelowAvail ? (height - skipLinesB) : height; |
833 | 12.1k | startX = (isLeftAvail ? 0 : 1); |
834 | 12.1k | endX = (isRightAvail ? (width - skipLinesR) : (width - 1)); |
835 | 12.1k | calcSaoStatisticsEo0(width,startX,endX,endY,srcLine,orgLine,srcStride,orgStride,count,diff); |
836 | 12.1k | } |
837 | 12.1k | break; |
838 | 12.1k | case SAO_TYPE_EO_90: |
839 | 12.1k | { |
840 | 12.1k | int8_t *signUpLine = &m_signLineBuf1[0]; |
841 | 12.1k | startX = 0; |
842 | 12.1k | startY = isAboveAvail ? 0 : 1; |
843 | 12.1k | endX = (isRightAvail ? (width - skipLinesR) : width); |
844 | 12.1k | endY = isBelowAvail ? (height - skipLinesB) : (height - 1); |
845 | 12.1k | if (!isAboveAvail) |
846 | 6.70k | { |
847 | 6.70k | srcLine += srcStride; |
848 | 6.70k | orgLine += orgStride; |
849 | 6.70k | } |
850 | 12.1k | calcSaoStatisticsEo90(width,endX,startY,endY,srcLine,orgLine,srcStride,orgStride,count,diff,signUpLine); |
851 | 12.1k | } |
852 | 12.1k | break; |
853 | 12.1k | case SAO_TYPE_EO_135: |
854 | 12.1k | { |
855 | 12.1k | diff +=2; |
856 | 12.1k | count+=2; |
857 | 12.1k | int8_t *signUpLine, *signDownLine; |
858 | 12.1k | signUpLine = &m_signLineBuf1[0]; |
859 | 12.1k | signDownLine= &m_signLineBuf2[0]; |
860 | 12.1k | startX = isLeftAvail ? 0 : 1; |
861 | 12.1k | endX = isRightAvail ? (width - skipLinesR): (width - 1); |
862 | 12.1k | endY = isBelowAvail ? (height - skipLinesB) : (height - 1); |
863 | | //prepare 2nd line's upper sign |
864 | 12.1k | Pel* srcLineBelow = srcLine + srcStride; |
865 | 699k | for (x=startX; x<endX+1; x++) |
866 | 687k | { |
867 | 687k | signUpLine[x] = (int8_t)sgn(srcLineBelow[x] - srcLine[x-1]); |
868 | 687k | } |
869 | | //1st line |
870 | 12.1k | Pel* srcLineAbove = srcLine - srcStride; |
871 | 12.1k | firstLineStartX = isAboveLeftAvail ? 0 : 1; |
872 | 12.1k | firstLineEndX = isAboveAvail ? endX : 1; |
873 | 319k | for(x=firstLineStartX; x<firstLineEndX; x++) |
874 | 307k | { |
875 | 307k | edgeType = sgn(srcLine[x] - srcLineAbove[x-1]) - signUpLine[x+1]; |
876 | 307k | diff [edgeType] += (orgLine[x] - srcLine[x]); |
877 | 307k | count[edgeType] ++; |
878 | 307k | } |
879 | 12.1k | srcLine += srcStride; |
880 | 12.1k | orgLine += orgStride; |
881 | 12.1k | calcSaoStatisticsEo135(width,startX,endX,endY,srcLine,orgLine,srcStride,orgStride,count,diff,signUpLine,signDownLine); |
882 | 12.1k | } |
883 | 12.1k | break; |
884 | 12.1k | case SAO_TYPE_EO_45: |
885 | 12.1k | { |
886 | 12.1k | diff +=2; |
887 | 12.1k | count+=2; |
888 | 12.1k | int8_t *signUpLine = &m_signLineBuf1[1]; |
889 | | |
890 | 12.1k | startX = isLeftAvail ? 0 : 1; |
891 | 12.1k | endX = isRightAvail ? (width - skipLinesR) : (width - 1); |
892 | 12.1k | endY = isBelowAvail ? (height - skipLinesB) : (height - 1); |
893 | | |
894 | | //prepare 2nd line upper sign |
895 | 12.1k | Pel* srcLineBelow = srcLine + srcStride; |
896 | 699k | for (x=startX-1; x<endX; x++) |
897 | 687k | { |
898 | 687k | signUpLine[x] = (int8_t)sgn(srcLineBelow[x] - srcLine[x+1]); |
899 | 687k | } |
900 | | //first line |
901 | 12.1k | Pel* srcLineAbove = srcLine - srcStride; |
902 | 12.1k | firstLineStartX = isAboveAvail ? startX : endX; |
903 | 12.1k | firstLineEndX = (!isRightAvail && isAboveRightAvail) ? width : endX; |
904 | 319k | for(x=firstLineStartX; x<firstLineEndX; x++) |
905 | 307k | { |
906 | 307k | edgeType = sgn(srcLine[x] - srcLineAbove[x+1]) - signUpLine[x-1]; |
907 | 307k | diff [edgeType] += (orgLine[x] - srcLine[x]); |
908 | 307k | count[edgeType] ++; |
909 | 307k | } |
910 | 12.1k | srcLine += srcStride; |
911 | 12.1k | orgLine += orgStride; |
912 | 12.1k | calcSaoStatisticsEo45(width,startX,endX,endY,srcLine,orgLine,srcStride,orgStride,count,diff,signUpLine); |
913 | 12.1k | } |
914 | 12.1k | break; |
915 | 12.1k | case SAO_TYPE_BO: |
916 | 12.1k | { |
917 | 12.1k | startX = 0; |
918 | 12.1k | endX = isRightAvail ? (width - skipLinesR) : width; |
919 | 12.1k | endY = isBelowAvail ? (height- skipLinesB) : height; |
920 | 12.1k | calcSaoStatisticsBo(width,endX,endY,srcLine,orgLine,srcStride,orgStride,channelBitDepth,count,diff); |
921 | 12.1k | } |
922 | 12.1k | break; |
923 | 0 | default: |
924 | 0 | { |
925 | 0 | THROW("Not a supported SAO type"); |
926 | 0 | } |
927 | 60.7k | } |
928 | 60.7k | } |
929 | 12.1k | } |
930 | | |
931 | | void EncSampleAdaptiveOffset::deriveLoopFilterBoundaryAvailibility(CodingStructure& cs, const Position& pos, bool& isLeftAvail, bool& isAboveAvail, bool& isAboveLeftAvail) const |
932 | 4.05k | { |
933 | 4.05k | const bool isLoopFiltAcrossSlicePPS = cs.pps->loopFilterAcrossSlicesEnabled; |
934 | 4.05k | const bool isLoopFiltAcrossTilePPS = cs.pps->loopFilterAcrossTilesEnabled; |
935 | | |
936 | 4.05k | const int width = cs.pcv->maxCUSize; |
937 | 4.05k | const int height = cs.pcv->maxCUSize; |
938 | 4.05k | const CodingUnit* cuCurr = cs.getCU(pos, CH_L, TREE_D); |
939 | 4.05k | const int ctuX = pos.x >> cs.pcv->maxCUSizeLog2; |
940 | 4.05k | const int ctuY = pos.y >> cs.pcv->maxCUSizeLog2; |
941 | 4.05k | const PPS* pps = cs.slice->pps; |
942 | 4.05k | const CodingUnit* cuLeft = ctuX > 0 && pps->canFilterCtuBdry( ctuX, ctuY, -1, 0 ) ? cs.getCU(pos.offset(-width, 0), CH_L, TREE_D): nullptr; |
943 | 4.05k | const CodingUnit* cuAbove = ctuY > 0 && pps->canFilterCtuBdry( ctuX, ctuY, 0, -1 ) ? cs.getCU(pos.offset(0, -height), CH_L, TREE_D): nullptr; |
944 | 4.05k | const CodingUnit* cuAboveLeft = ctuY > 0 && ctuX > 0 && pps->canFilterCtuBdry( ctuX, ctuY, -1,-1 ) ? cs.getCU(pos.offset(-width, -height), CH_L, TREE_D): nullptr; |
945 | | |
946 | 4.05k | if (!isLoopFiltAcrossSlicePPS) |
947 | 0 | { |
948 | 0 | isLeftAvail = (cuLeft == NULL) ? false : CU::isSameSlice(*cuCurr, *cuLeft); |
949 | 0 | isAboveAvail = (cuAbove == NULL) ? false : CU::isSameSlice(*cuCurr, *cuAbove); |
950 | 0 | isAboveLeftAvail = (cuAboveLeft == NULL) ? false : CU::isSameSlice(*cuCurr, *cuAboveLeft); |
951 | 0 | } |
952 | 4.05k | else |
953 | 4.05k | { |
954 | 4.05k | isLeftAvail = (cuLeft != NULL); |
955 | 4.05k | isAboveAvail = (cuAbove != NULL); |
956 | 4.05k | isAboveLeftAvail = (cuAboveLeft != NULL); |
957 | 4.05k | } |
958 | | |
959 | 4.05k | if (!isLoopFiltAcrossTilePPS) |
960 | 0 | { |
961 | 0 | isLeftAvail = (!isLeftAvail) ? false : CU::isSameTile(*cuCurr, *cuLeft); |
962 | 0 | isAboveAvail = (!isAboveAvail) ? false : CU::isSameTile(*cuCurr, *cuAbove); |
963 | 0 | isAboveLeftAvail = (!isAboveLeftAvail) ? false : CU::isSameTile(*cuCurr, *cuAboveLeft); |
964 | 0 | } |
965 | | |
966 | | |
967 | 4.05k | SubPic curSubPic = cs.pps->getSubPicFromCU(*cuCurr); |
968 | 4.05k | if (!curSubPic.loopFilterAcrossSubPicEnabled ) |
969 | 0 | { |
970 | 0 | isLeftAvail = (!isLeftAvail) ? false : CU::isSameSubPic(*cuCurr, *cuLeft); |
971 | 0 | isAboveAvail = (!isAboveAvail) ? false : CU::isSameSubPic(*cuCurr, *cuAbove); |
972 | 0 | isAboveLeftAvail = (!isAboveLeftAvail) ? false : CU::isSameSubPic(*cuCurr, *cuAboveLeft); |
973 | 0 | } |
974 | | |
975 | 4.05k | } |
976 | | |
977 | | } // namespace vvenc |
978 | | |
979 | | //! \} |
980 | | |