Coverage Report

Created: 2026-05-30 06:10

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/work/vvenc/source/Lib/EncoderLib/EncSampleAdaptiveOffset.cpp
Line
Count
Source
1
/* -----------------------------------------------------------------------------
2
The copyright in this software is being made available under the Clear BSD
3
License, included below. No patent rights, trademark rights and/or 
4
other Intellectual Property Rights other than the copyrights concerning 
5
the Software are granted under this license.
6
7
The Clear BSD License
8
9
Copyright (c) 2019-2026, Fraunhofer-Gesellschaft zur Förderung der angewandten Forschung e.V. & The VVenC Authors.
10
All rights reserved.
11
12
Redistribution and use in source and binary forms, with or without modification,
13
are permitted (subject to the limitations in the disclaimer below) provided that
14
the following conditions are met:
15
16
     * Redistributions of source code must retain the above copyright notice,
17
     this list of conditions and the following disclaimer.
18
19
     * Redistributions in binary form must reproduce the above copyright
20
     notice, this list of conditions and the following disclaimer in the
21
     documentation and/or other materials provided with the distribution.
22
23
     * Neither the name of the copyright holder nor the names of its
24
     contributors may be used to endorse or promote products derived from this
25
     software without specific prior written permission.
26
27
NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE GRANTED BY
28
THIS LICENSE. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
29
CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
30
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
31
PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
32
CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
33
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
34
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
35
BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
36
IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
37
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
38
POSSIBILITY OF SUCH DAMAGE.
39
40
41
------------------------------------------------------------------------------------------- */
42
43
44
/**
45
 \file     EncSampleAdaptiveOffset.cpp
46
 \brief       estimation part of sample adaptive offset class
47
 */
48
49
#include "EncSampleAdaptiveOffset.h"
50
#include "CommonLib/UnitTools.h"
51
#include "CommonLib/dtrace_codingstruct.h"
52
#include "CommonLib/dtrace_buffer.h"
53
#include "CommonLib/CodingStructure.h"
54
#include <string.h>
55
#include <stdlib.h>
56
#include <math.h>
57
#include "vvenc/vvencCfg.h"
58
59
//! \ingroup EncoderLib
60
//! \{
61
62
namespace vvenc {
63
64
65
92.9k
#define SAOCtx(c) SubCtx( Ctx::Sao, c )
66
67
68
//! rounding with IBDI
69
inline double xRoundIbdi2(int bitDepth, double x)
70
0
{
71
0
  return ((x) >= 0 ? ((int)((x) + 0.5)) : ((int)((x) -0.5)));
72
0
}
73
74
inline double xRoundIbdi(int bitDepth, double x)
75
14.7k
{
76
14.7k
  return (bitDepth > 8 ? xRoundIbdi2(bitDepth, (x)) : ((x)>=0 ? ((int)((x)+0.5)) : ((int)((x)-0.5)))) ;
77
14.7k
}
78
79
80
EncSampleAdaptiveOffset::EncSampleAdaptiveOffset()
81
9.06k
  : m_CABACEstimator( nullptr )
82
9.06k
  , m_CtxCache      ( nullptr )
83
9.06k
{
84
9.06k
}
85
86
EncSampleAdaptiveOffset::~EncSampleAdaptiveOffset()
87
9.06k
{
88
9.06k
}
89
90
void EncSampleAdaptiveOffset::init( const VVEncCfg& encCfg )
91
9.06k
{
92
9.06k
  m_EncCfg = &encCfg;
93
94
9.06k
  if ( encCfg.m_bUseSAO )
95
9.06k
  {
96
9.06k
    SampleAdaptiveOffset::init( encCfg.m_internChromaFormat, encCfg.m_CTUSize, encCfg.m_CTUSize, encCfg.m_log2SaoOffsetScale[CH_L], encCfg.m_log2SaoOffsetScale[CH_C] );
97
9.06k
  }
98
9.06k
}
99
100
void EncSampleAdaptiveOffset::initSlice( const Slice* slice )
101
2.26k
{
102
2.26k
  memcpy( m_lambda, slice->getLambdas(), sizeof( m_lambda ) );
103
2.26k
}
104
105
void EncSampleAdaptiveOffset::setCtuEncRsrc( CABACWriter* cabacEstimator, CtxCache* ctxCache )
106
4.05k
{
107
4.05k
  m_CABACEstimator = cabacEstimator;
108
4.05k
  m_CtxCache       = ctxCache;
109
4.05k
}
110
111
void EncSampleAdaptiveOffset::disabledRate( CodingStructure& cs, double saoDisabledRate[ MAX_NUM_COMP ][ VVENC_MAX_TLAYER ], SAOBlkParam* reconParams, const double saoEncodingRate, const double saoEncodingRateChroma, const ChromaFormat& chromaFormat )
112
0
{
113
0
  if ( saoEncodingRate > 0.0 )
114
0
  {
115
0
    const PreCalcValues& pcv     = *cs.pcv;
116
0
    const int numberOfComponents = getNumberValidComponents( chromaFormat );
117
0
    const int picTempLayer       = cs.slice->TLayer;
118
0
    int numCtusForSAOOff[MAX_NUM_COMP];
119
120
0
    for (int compIdx = 0; compIdx < numberOfComponents; compIdx++)
121
0
    {
122
0
      numCtusForSAOOff[compIdx] = 0;
123
0
      for( int ctuRsAddr=0; ctuRsAddr< pcv.sizeInCtus; ctuRsAddr++)
124
0
      {
125
0
        if( reconParams[ctuRsAddr][compIdx].modeIdc == SAO_MODE_OFF)
126
0
        {
127
0
          numCtusForSAOOff[compIdx]++;
128
0
        }
129
0
      }
130
0
    }
131
0
    if (saoEncodingRateChroma > 0.0)
132
0
    {
133
0
      for (int compIdx = 0; compIdx < numberOfComponents; compIdx++)
134
0
      {
135
0
        saoDisabledRate[compIdx][picTempLayer] = (double)numCtusForSAOOff[compIdx]/(double)pcv.sizeInCtus;
136
0
      }
137
0
    }
138
0
    else if (picTempLayer == 0)
139
0
    {
140
0
      saoDisabledRate[COMP_Y][0] = (double)(numCtusForSAOOff[COMP_Y]+numCtusForSAOOff[COMP_Cb]+numCtusForSAOOff[COMP_Cr])/(double)(pcv.sizeInCtus *3);
141
0
    }
142
0
  }
143
0
}
144
145
void EncSampleAdaptiveOffset::decidePicParams( const CodingStructure& cs, double saoDisabledRate[ MAX_NUM_COMP ][ VVENC_MAX_TLAYER ], bool saoEnabled[ MAX_NUM_COMP ], const double saoEncodingRate, const double saoEncodingRateChroma, const ChromaFormat& chromaFormat )
146
1.29k
{
147
1.29k
  const Slice& slice           = *cs.slice;
148
1.29k
  const int numberOfComponents = getNumberValidComponents( chromaFormat );
149
150
  // reset
151
1.29k
  if( slice.pendingRasInit )
152
0
  {
153
0
    for( int compIdx = 0; compIdx < MAX_NUM_COMP; compIdx++ )
154
0
    {
155
0
      for( int tempLayer = 1; tempLayer < VVENC_MAX_TLAYER; tempLayer++ )
156
0
      {
157
0
        saoDisabledRate[ compIdx ][ tempLayer ] = 0.0;
158
0
      }
159
0
    }
160
0
  }
161
162
5.19k
  for( int compIdx = 0; compIdx < MAX_NUM_COMP; compIdx++ )
163
3.89k
  {
164
3.89k
    saoEnabled[ compIdx ] = false;
165
3.89k
  }
166
167
1.29k
  const int picTempLayer = slice.TLayer;
168
5.19k
  for( int compIdx = 0; compIdx < numberOfComponents; compIdx++ )
169
3.89k
  {
170
    // enable per default
171
3.89k
    saoEnabled[ compIdx ] = true;
172
173
3.89k
    if( saoEncodingRate > 0.0 )
174
0
    {
175
0
      if( saoEncodingRateChroma > 0.0 )
176
0
      {
177
        // decide slice-level on/off based on previous results
178
0
        if( ( picTempLayer > 0 )
179
0
          && ( saoDisabledRate[ compIdx ][ picTempLayer - 1 ] > ( ( compIdx == COMP_Y ) ? saoEncodingRate : saoEncodingRateChroma ) ) )
180
0
        {
181
0
          saoEnabled[ compIdx ] = false;
182
0
        }
183
0
      }
184
0
      else
185
0
      {
186
        // decide slice-level on/off based on previous results
187
0
        if( ( picTempLayer > 0 )
188
0
          && ( saoDisabledRate[ COMP_Y ][ 0 ] > saoEncodingRate ) )
189
0
        {
190
0
          saoEnabled[ compIdx ] = false;
191
0
        }
192
0
      }
193
0
    }
194
3.89k
  }
195
1.29k
}
196
197
void EncSampleAdaptiveOffset::storeCtuReco( CodingStructure& cs, const UnitArea& ctuArea, const int ctuX, const int ctuY )
198
4.04k
{
199
4.04k
  const int STORE_CTU_INCREASE = 8;
200
4.04k
  Position lPos( ctuArea.lx() + STORE_CTU_INCREASE, ctuArea.ly() + STORE_CTU_INCREASE );
201
4.04k
  Size    lSize( ctuArea.lwidth(), ctuArea.lheight() );
202
203
4.04k
  const bool tileBdryClip = cs.pps->getNumTiles() > 1 && !cs.pps->loopFilterAcrossTilesEnabled;
204
4.04k
  int startX = 0;
205
4.04k
  int startY = 0;
206
4.04k
  if( tileBdryClip )  
207
0
  {
208
0
    startX = cs.pps->tileColBd[cs.pps->ctuToTileCol[ctuX]] << cs.pcv->maxCUSizeLog2;
209
0
    startY = cs.pps->tileRowBd[cs.pps->ctuToTileRow[ctuY]] << cs.pcv->maxCUSizeLog2;
210
0
  }
211
212
4.04k
  if ( ctuArea.lx() == startX )
213
2.26k
  {
214
2.26k
    lPos.x       = ctuArea.lx();
215
2.26k
    lSize.width += STORE_CTU_INCREASE;
216
2.26k
  }
217
4.04k
  if ( ctuArea.ly() == startY )
218
2.23k
  {
219
2.23k
    lPos.y        = ctuArea.ly();
220
2.23k
    lSize.height += STORE_CTU_INCREASE;
221
2.23k
  }
222
223
4.04k
  int clipX = cs.pcv->lumaWidth  - lPos.x;
224
4.04k
  int clipY = cs.pcv->lumaHeight - lPos.y;
225
4.04k
  if( tileBdryClip )  
226
0
  {
227
0
    clipX  = cs.pps->tileColBdRgt[cs.pps->ctuToTileCol[ctuX]] - lPos.x;
228
0
    clipY  = cs.pps->tileRowBdBot[cs.pps->ctuToTileRow[ctuY]] - lPos.y;
229
0
  }
230
4.04k
  lSize.clipSize( clipX, clipY );
231
232
4.04k
  const UnitArea relocArea( ctuArea.chromaFormat, Area( lPos, lSize ) );
233
4.04k
  Picture& pic       = *cs.picture;
234
4.04k
  PelUnitBuf recoYuv = pic.getRecoBuf().subBuf( relocArea );
235
4.04k
  PelUnitBuf tempYuv = pic.getSaoBuf().subBuf( relocArea );
236
4.04k
  tempYuv.copyFrom( recoYuv );
237
4.04k
}
238
239
void EncSampleAdaptiveOffset::getCtuStatistics( CodingStructure& cs, std::vector<SAOStatData**>& saoStatistics, const UnitArea& ctuArea, const int ctuRsAddr )
240
4.05k
{
241
4.05k
  const PreCalcValues& pcv     = *cs.pcv;
242
4.05k
  const int numberOfComponents = getNumberValidComponents( pcv.chrFormat );
243
4.05k
  bool isLeftAvail             = false;
244
4.05k
  bool isRightAvail            = false;
245
4.05k
  bool isAboveAvail            = false;
246
4.05k
  bool isBelowAvail            = false;
247
4.05k
  bool isAboveLeftAvail        = false;
248
4.05k
  bool isAboveRightAvail       = false;
249
250
4.05k
  deriveLoopFilterBoundaryAvailibility( cs, ctuArea.Y(), isLeftAvail, isAboveAvail, isAboveLeftAvail );
251
252
  // NOTE: The number of skipped lines during gathering CTU statistics depends on the slice boundary availabilities.
253
  // For simplicity, here only picture boundaries are considered.
254
255
4.05k
  isRightAvail      = ( ctuArea.Y().x + pcv.maxCUSize < pcv.lumaWidth  );
256
4.05k
  isBelowAvail      = ( ctuArea.Y().y + pcv.maxCUSize < pcv.lumaHeight );
257
4.05k
  isAboveRightAvail = ( ( ctuArea.Y().y > 0 ) && ( isRightAvail ) );
258
259
4.05k
  CHECK( !cs.pps->loopFilterAcrossSlicesEnabled, "Not implemented" );
260
4.05k
  if( cs.pps->getNumTiles() > 1 && !cs.pps->loopFilterAcrossTilesEnabled )
261
0
  {
262
0
    const int ctuX    = ctuArea.lx() >> cs.pcv->maxCUSizeLog2;
263
0
    const int ctuY    = ctuArea.ly() >> cs.pcv->maxCUSizeLog2;
264
0
    isRightAvail      = isRightAvail      && cs.pps->canFilterCtuBdry( ctuX, ctuY,  1, 0 );
265
0
    isBelowAvail      = isBelowAvail      && cs.pps->canFilterCtuBdry( ctuX, ctuY,  0, 1 );
266
0
    isAboveRightAvail = isAboveRightAvail && cs.pps->canFilterCtuBdry( ctuX, ctuY,  1,-1 );
267
0
  }
268
269
  //VirtualBoundaries vb;
270
  //bool isCtuCrossedByVirtualBoundaries = vb.isCrossedByVirtualBoundaries(xPos, yPos, width, height, cs.slice->pps);
271
272
16.2k
  for( int compIdx = 0; compIdx < numberOfComponents; compIdx++ )
273
12.1k
  {
274
12.1k
    const ComponentID compID = ComponentID( compIdx );
275
12.1k
    const CompArea& compArea = ctuArea.block( compID );
276
277
12.1k
    PelBuf srcBuf = cs.picture->getSaoBuf().get( compID );
278
12.1k
    PelBuf orgBuf = cs.picture->getOrigBuf().get( compID );
279
280
12.1k
    getBlkStats( compID,
281
12.1k
                 cs.sps->bitDepths[ toChannelType( compID ) ],
282
12.1k
                 saoStatistics[ ctuRsAddr ][ compID ],
283
12.1k
                 srcBuf.bufAt( compArea ),
284
12.1k
                 orgBuf.bufAt( compArea ),
285
12.1k
                 srcBuf.stride,
286
12.1k
                 orgBuf.stride,
287
12.1k
                 compArea.width,
288
12.1k
                 compArea.height,
289
12.1k
                 isLeftAvail, isRightAvail, isAboveAvail, isBelowAvail, isAboveLeftAvail, isAboveRightAvail
290
12.1k
               );
291
12.1k
  }
292
4.05k
}
293
294
void EncSampleAdaptiveOffset::getStatistics(std::vector<SAOStatData**>& blkStats, PelUnitBuf& orgYuv, PelUnitBuf& srcYuv, CodingStructure& cs )
295
0
{
296
0
  bool isLeftAvail, isRightAvail, isAboveAvail, isBelowAvail, isAboveLeftAvail, isAboveRightAvail;
297
298
0
  const PreCalcValues& pcv = *cs.pcv;
299
0
  const int numberOfComponents = getNumberValidComponents(pcv.chrFormat);
300
301
0
  size_t lineBufferSize = pcv.maxCUSize + 1;
302
0
  if (m_signLineBuf1.size() != lineBufferSize)
303
0
  {
304
0
    m_signLineBuf1.resize(lineBufferSize);
305
0
    m_signLineBuf2.resize(lineBufferSize);
306
0
  }
307
308
0
  int ctuRsAddr = 0;
309
0
  for( uint32_t yPos = 0; yPos < pcv.lumaHeight; yPos += pcv.maxCUSize )
310
0
  {
311
0
    for( uint32_t xPos = 0; xPos < pcv.lumaWidth; xPos += pcv.maxCUSize )
312
0
    {
313
0
      const uint32_t width  = (xPos + pcv.maxCUSize  > pcv.lumaWidth)  ? (pcv.lumaWidth - xPos)  : pcv.maxCUSize;
314
0
      const uint32_t height = (yPos + pcv.maxCUSize > pcv.lumaHeight) ? (pcv.lumaHeight - yPos) : pcv.maxCUSize;
315
0
      const UnitArea area( cs.area.chromaFormat, Area(xPos , yPos, width, height) );
316
317
0
      deriveLoopFilterBoundaryAvailibility(cs, area.Y(), isLeftAvail, isAboveAvail, isAboveLeftAvail );
318
319
      //NOTE: The number of skipped lines during gathering CTU statistics depends on the slice boundary availabilities.
320
      //For simplicity, here only picture boundaries are considered.
321
322
0
      isRightAvail      = (xPos + pcv.maxCUSize  < pcv.lumaWidth );
323
0
      isBelowAvail      = (yPos + pcv.maxCUSize < pcv.lumaHeight);
324
0
      isAboveRightAvail = ((yPos > 0) && (isRightAvail));
325
326
0
      for(int compIdx = 0; compIdx < numberOfComponents; compIdx++)
327
0
      {
328
0
        const ComponentID compID = ComponentID(compIdx);
329
0
        const CompArea& compArea = area.block( compID );
330
331
0
        int  srcStride  = srcYuv.get(compID).stride;
332
0
        Pel* srcBlk     = srcYuv.get(compID).bufAt( compArea );
333
334
0
        int  orgStride  = orgYuv.get(compID).stride;
335
0
        Pel* orgBlk     = orgYuv.get(compID).bufAt( compArea );
336
337
0
        getBlkStats(compID, cs.sps->bitDepths[toChannelType(compID)], blkStats[ctuRsAddr][compID]
338
0
                  , srcBlk, orgBlk, srcStride, orgStride, compArea.width, compArea.height
339
0
                  , isLeftAvail,  isRightAvail, isAboveAvail, isBelowAvail, isAboveLeftAvail, isAboveRightAvail );
340
0
      }
341
0
      ctuRsAddr++;
342
0
    }
343
0
  }
344
0
}
345
346
void EncSampleAdaptiveOffset::decideCtuParams( CodingStructure& cs, const std::vector<SAOStatData**>& saoStatistics, const bool saoEnabled[ MAX_NUM_COMP ], const bool allBlksDisabled, const UnitArea& ctuArea, const int ctuRsAddr, SAOBlkParam* reconParams, SAOBlkParam* codedParams )
347
4.05k
{
348
4.05k
  const PreCalcValues& pcv = *cs.pcv;
349
4.05k
  const Slice& slice       = *cs.slice;
350
4.05k
  const int  ctuPosX       = ctuRsAddr % pcv.widthInCtus;
351
4.05k
  const int  ctuPosY       = ctuRsAddr / pcv.widthInCtus;
352
353
  // reset CABAC estimator
354
4.05k
  if( m_EncCfg->m_ensureWppBitEqual
355
4.05k
      && m_EncCfg->m_numThreads < 1
356
0
      && ctuPosX == 0
357
0
      && ctuPosY > 0 )
358
0
  {
359
0
    m_CABACEstimator->initCtxModels( slice );
360
0
  }
361
362
  // check disabled
363
4.05k
  if( allBlksDisabled )
364
0
  {
365
0
    codedParams[ ctuRsAddr ].reset();
366
0
    return;
367
0
  }
368
369
  // get merge list
370
4.05k
  SAOBlkParam* mergeList[ NUM_SAO_MERGE_TYPES ] = { NULL };
371
4.05k
  getMergeList( cs, ctuRsAddr, reconParams, mergeList );
372
373
4.05k
  const TempCtx ctxStart( m_CtxCache, SAOCtx( m_CABACEstimator->getCtx() ) );
374
4.05k
  TempCtx       ctxBest ( m_CtxCache );
375
376
4.05k
  SAOBlkParam modeParam;
377
4.05k
  double minCost  = MAX_DOUBLE;
378
4.05k
  double modeCost = MAX_DOUBLE;
379
12.1k
  for( int mode = 1; mode < NUM_SAO_MODES; mode++ )
380
8.10k
  {
381
8.10k
    if( mode > 1 )
382
4.05k
    {
383
4.05k
      m_CABACEstimator->getCtx() = SAOCtx( ctxStart );
384
4.05k
    }
385
8.10k
    switch( mode )
386
8.10k
    {
387
4.05k
    case SAO_MODE_NEW:
388
4.05k
      {
389
4.05k
        deriveModeNewRDO( cs.sps->bitDepths, ctuRsAddr, mergeList, saoEnabled, saoStatistics, modeParam, modeCost );
390
4.05k
      }
391
4.05k
      break;
392
4.05k
    case SAO_MODE_MERGE:
393
4.05k
      {
394
4.05k
        deriveModeMergeRDO( cs.sps->bitDepths, ctuRsAddr, mergeList, saoEnabled, saoStatistics, modeParam, modeCost );
395
4.05k
      }
396
4.05k
      break;
397
0
    default:
398
0
      {
399
0
        THROW( "Not a supported SAO mode." );
400
0
      }
401
8.10k
    }
402
403
8.10k
    if( modeCost < minCost )
404
6.80k
    {
405
6.80k
      minCost                  = modeCost;
406
6.80k
      codedParams[ ctuRsAddr ] = modeParam;
407
6.80k
      ctxBest                  = SAOCtx( m_CABACEstimator->getCtx() );
408
6.80k
    }
409
8.10k
  }
410
411
  // apply reconstructed offsets
412
4.05k
  m_CABACEstimator->getCtx() = SAOCtx( ctxBest );
413
4.05k
  reconParams[ ctuRsAddr ] = codedParams[ ctuRsAddr ];
414
415
4.05k
  reconstructBlkSAOParam( reconParams[ ctuRsAddr ], mergeList );
416
417
4.05k
  Picture& pic = *cs.picture;
418
4.05k
  offsetCTU( ctuArea, pic.getSaoBuf(), cs.getRecoBuf(), reconParams[ ctuRsAddr ], cs );
419
4.05k
}
420
421
int64_t EncSampleAdaptiveOffset::getDistortion(const int channelBitDepth, int typeIdc, int typeAuxInfo, int* invQuantOffset, SAOStatData& statData)
422
60.7k
{
423
60.7k
  int64_t dist        = 0;
424
60.7k
  int shift = 2 * DISTORTION_PRECISION_ADJUSTMENT(channelBitDepth);
425
426
60.7k
  switch(typeIdc)
427
60.7k
  {
428
12.1k
  case SAO_TYPE_EO_0:
429
24.3k
  case SAO_TYPE_EO_90:
430
36.4k
  case SAO_TYPE_EO_135:
431
48.6k
  case SAO_TYPE_EO_45:
432
48.6k
    {
433
291k
      for (int offsetIdx=0; offsetIdx<NUM_SAO_EO_CLASSES; offsetIdx++)
434
243k
      {
435
243k
        dist += estSaoDist( statData.count[offsetIdx], invQuantOffset[offsetIdx], statData.diff[offsetIdx], shift);
436
243k
      }
437
48.6k
    }
438
48.6k
    break;
439
12.1k
  case SAO_TYPE_BO:
440
12.1k
    {
441
60.8k
      for (int offsetIdx=typeAuxInfo; offsetIdx<typeAuxInfo+4; offsetIdx++)
442
48.6k
      {
443
48.6k
        int bandIdx = offsetIdx % NUM_SAO_BO_CLASSES ;
444
48.6k
        dist += estSaoDist( statData.count[bandIdx], invQuantOffset[bandIdx], statData.diff[bandIdx], shift);
445
48.6k
      }
446
12.1k
    }
447
12.1k
    break;
448
0
  default:
449
0
    {
450
0
      THROW("Not a supported type");
451
36.4k
    }
452
60.7k
  }
453
454
60.7k
  return dist;
455
60.7k
}
456
457
inline int64_t EncSampleAdaptiveOffset::estSaoDist(int64_t count, int64_t offset, int64_t diffSum, int shift)
458
293k
{
459
293k
  return (( count*offset*offset-diffSum*offset*2 ) >> shift);
460
293k
}
461
462
463
inline int EncSampleAdaptiveOffset::estIterOffset(int typeIdx, double lambda, int offsetInput, int64_t count, int64_t diffSum, int shift, int bitIncrease, int64_t& bestDist, double& bestCost, int offsetTh )
464
1.55k
{
465
1.55k
  int iterOffset, tempOffset;
466
1.55k
  int64_t tempDist, tempRate;
467
1.55k
  double tempCost, tempMinCost;
468
1.55k
  int offsetOutput = 0;
469
1.55k
  iterOffset = offsetInput;
470
  // Assuming sending quantized value 0 results in zero offset and sending the value zero needs 1 bit. entropy coder can be used to measure the exact rate here.
471
1.55k
  tempMinCost = lambda;
472
3.12k
  while (iterOffset != 0)
473
1.56k
  {
474
    // Calculate the bits required for signaling the offset
475
1.56k
    tempRate = (typeIdx == SAO_TYPE_BO) ? (abs((int)iterOffset)+2) : (abs((int)iterOffset)+1);
476
1.56k
    if (abs((int)iterOffset)==offsetTh) //inclusive
477
0
    {
478
0
      tempRate --;
479
0
    }
480
    // Do the dequantization before distortion calculation
481
1.56k
    tempOffset  = iterOffset * (1<< bitIncrease);
482
1.56k
    tempDist    = estSaoDist( count, tempOffset, diffSum, shift);
483
1.56k
    tempCost    = ((double)tempDist + lambda * (double) tempRate);
484
1.56k
    if(tempCost < tempMinCost)
485
330
    {
486
330
      tempMinCost = tempCost;
487
330
      offsetOutput = iterOffset;
488
330
      bestDist = tempDist;
489
330
      bestCost = tempCost;
490
330
    }
491
1.56k
    iterOffset = (iterOffset > 0) ? (iterOffset-1):(iterOffset+1);
492
1.56k
  }
493
1.55k
  return offsetOutput;
494
1.55k
}
495
496
void EncSampleAdaptiveOffset::deriveOffsets(ComponentID compIdx, const int channelBitDepth, int typeIdc, SAOStatData& statData, int* quantOffsets, int& typeAuxInfo)
497
60.7k
{
498
60.7k
  int bitDepth = channelBitDepth;
499
60.7k
  int shift = 2 * DISTORTION_PRECISION_ADJUSTMENT(bitDepth);
500
60.7k
  int offsetTh = SampleAdaptiveOffset::getMaxOffsetQVal(channelBitDepth);  //inclusive
501
502
60.7k
  ::memset(quantOffsets, 0, sizeof(int)*MAX_NUM_SAO_CLASSES);
503
504
  //derive initial offsets
505
60.7k
  int numClasses = (typeIdc == SAO_TYPE_BO)?((int)NUM_SAO_BO_CLASSES):((int)NUM_SAO_EO_CLASSES);
506
692k
  for(int classIdx=0; classIdx< numClasses; classIdx++)
507
631k
  {
508
631k
    if( (typeIdc != SAO_TYPE_BO) && (classIdx==SAO_CLASS_EO_PLAIN)  )
509
48.6k
    {
510
48.6k
      continue; //offset will be zero
511
48.6k
    }
512
513
583k
    if(statData.count[classIdx] == 0)
514
568k
    {
515
568k
      continue; //offset will be zero
516
568k
    }
517
14.7k
#if (  DISTORTION_PRECISION_ADJUSTMENT(x)  == 0 )
518
14.7k
    quantOffsets[classIdx] =
519
14.7k
       (int) xRoundIbdi(bitDepth, (double)(statData.diff[classIdx] ) / (double)(statData.count[classIdx] << m_offsetStepLog2[compIdx]));
520
14.7k
     quantOffsets[classIdx] = Clip3(-offsetTh, offsetTh, quantOffsets[classIdx]);
521
#else
522
      quantOffsets[classIdx] =
523
        (int) xRoundIbdi(bitDepth, (double)(statData.diff[classIdx] << DISTORTION_PRECISION_ADJUSTMENT(bitDepth))
524
                                     / (double)(statData.count[classIdx] << m_offsetStepLog2[compIdx]));
525
      quantOffsets[classIdx] = Clip3(-offsetTh, offsetTh, quantOffsets[classIdx]);
526
#endif
527
14.7k
  }
528
529
  // adjust offsets
530
60.7k
  switch(typeIdc)
531
60.7k
  {
532
12.1k
  case SAO_TYPE_EO_0:
533
24.3k
  case SAO_TYPE_EO_90:
534
36.4k
  case SAO_TYPE_EO_135:
535
48.6k
  case SAO_TYPE_EO_45:
536
48.6k
    {
537
48.6k
      int64_t classDist;
538
48.6k
      double classCost;
539
291k
      for(int classIdx=0; classIdx<NUM_SAO_EO_CLASSES; classIdx++)
540
243k
      {
541
243k
        if(classIdx==SAO_CLASS_EO_FULL_VALLEY && quantOffsets[classIdx] < 0)
542
0
        {
543
0
          quantOffsets[classIdx] =0;
544
0
        }
545
243k
        if(classIdx==SAO_CLASS_EO_HALF_VALLEY && quantOffsets[classIdx] < 0)
546
0
        {
547
0
          quantOffsets[classIdx] =0;
548
0
        }
549
243k
        if(classIdx==SAO_CLASS_EO_HALF_PEAK   && quantOffsets[classIdx] > 0)
550
0
        {
551
0
          quantOffsets[classIdx] =0;
552
0
        }
553
243k
        if(classIdx==SAO_CLASS_EO_FULL_PEAK   && quantOffsets[classIdx] > 0)
554
0
        {
555
0
          quantOffsets[classIdx] =0;
556
0
        }
557
558
243k
        if( quantOffsets[classIdx] != 0 ) //iterative adjustment only when derived offset is not zero
559
1.19k
        {
560
1.19k
          quantOffsets[classIdx] = estIterOffset( typeIdc, m_lambda[compIdx], quantOffsets[classIdx], statData.count[classIdx], statData.diff[classIdx], shift, m_offsetStepLog2[compIdx], classDist , classCost , offsetTh );
561
1.19k
        }
562
243k
      }
563
564
48.6k
      typeAuxInfo =0;
565
48.6k
    }
566
48.6k
    break;
567
12.1k
  case SAO_TYPE_BO:
568
12.1k
    {
569
12.1k
      int64_t  distBOClasses[NUM_SAO_BO_CLASSES];
570
12.1k
      double costBOClasses[NUM_SAO_BO_CLASSES];
571
12.1k
      ::memset(distBOClasses, 0, sizeof(int64_t)*NUM_SAO_BO_CLASSES);
572
401k
      for(int classIdx=0; classIdx< NUM_SAO_BO_CLASSES; classIdx++)
573
388k
      {
574
388k
        costBOClasses[classIdx]= m_lambda[compIdx];
575
388k
        if( quantOffsets[classIdx] != 0 ) //iterative adjustment only when derived offset is not zero
576
360
        {
577
360
          quantOffsets[classIdx] = estIterOffset( typeIdc, m_lambda[compIdx], quantOffsets[classIdx], statData.count[classIdx], statData.diff[classIdx], shift, m_offsetStepLog2[compIdx], distBOClasses[classIdx], costBOClasses[classIdx], offsetTh );
578
360
        }
579
388k
      }
580
581
      //decide the starting band index
582
12.1k
      double minCost = MAX_DOUBLE, cost;
583
401k
      for(int band=0; band< NUM_SAO_BO_CLASSES; band++)
584
388k
      {
585
388k
        cost  = costBOClasses[(band  )%NUM_SAO_BO_CLASSES];
586
388k
        cost += costBOClasses[(band+1)%NUM_SAO_BO_CLASSES];
587
388k
        cost += costBOClasses[(band+2)%NUM_SAO_BO_CLASSES];
588
388k
        cost += costBOClasses[(band+3)%NUM_SAO_BO_CLASSES];
589
590
388k
        if(cost < minCost)
591
12.2k
        {
592
12.2k
          minCost = cost;
593
12.2k
          typeAuxInfo = band;
594
12.2k
        }
595
388k
      }
596
      //clear those unused classes
597
12.1k
      int clearQuantOffset[NUM_SAO_BO_CLASSES];
598
12.1k
      ::memset(clearQuantOffset, 0, sizeof(int)*NUM_SAO_BO_CLASSES);
599
60.7k
      for(int i=0; i< 4; i++)
600
48.6k
      {
601
48.6k
        int band = (typeAuxInfo+i)%NUM_SAO_BO_CLASSES;
602
48.6k
        clearQuantOffset[band] = quantOffsets[band];
603
48.6k
      }
604
12.1k
      ::memcpy(quantOffsets, clearQuantOffset, sizeof(int)*NUM_SAO_BO_CLASSES);
605
12.1k
    }
606
12.1k
    break;
607
0
  default:
608
0
    {
609
0
      THROW("Not a supported type");
610
36.4k
    }
611
60.7k
  }
612
60.7k
}
613
614
void EncSampleAdaptiveOffset::deriveModeNewRDO(const BitDepths &bitDepths, int ctuRsAddr, SAOBlkParam* mergeList[NUM_SAO_MERGE_TYPES], const bool* sliceEnabled, const std::vector<SAOStatData**>& blkStats, SAOBlkParam& modeParam, double& modeNormCost )
615
4.05k
{
616
4.05k
  double minCost, cost;
617
4.05k
  uint64_t previousFracBits;
618
4.05k
  const int numberOfComponents = m_numberOfComponents;
619
620
4.05k
  int64_t dist[MAX_NUM_COMP], modeDist[MAX_NUM_COMP];
621
4.05k
  SAOOffset testOffset[MAX_NUM_COMP];
622
4.05k
  int invQuantOffset[MAX_NUM_SAO_CLASSES];
623
16.2k
  for(int comp=0; comp < MAX_NUM_COMP; comp++)
624
12.1k
  {
625
12.1k
    modeDist[comp] = 0;
626
12.1k
  }
627
628
  //pre-encode merge flags
629
4.05k
  modeParam[COMP_Y].modeIdc = SAO_MODE_OFF;
630
4.05k
  const TempCtx ctxStartBlk   ( m_CtxCache, SAOCtx( m_CABACEstimator->getCtx() ) );
631
4.05k
  m_CABACEstimator->sao_block_pars( modeParam, bitDepths, sliceEnabled, (mergeList[SAO_MERGE_LEFT]!= NULL), (mergeList[SAO_MERGE_ABOVE]!= NULL), true );
632
4.05k
  const TempCtx ctxStartLuma  ( m_CtxCache, SAOCtx( m_CABACEstimator->getCtx() ) );
633
4.05k
  TempCtx       ctxBestLuma   ( m_CtxCache );
634
635
    //------ luma --------//
636
4.05k
  {
637
4.05k
    const ComponentID compIdx = COMP_Y;
638
    //"off" case as initial cost
639
4.05k
    modeParam[compIdx].modeIdc = SAO_MODE_OFF;
640
4.05k
    m_CABACEstimator->resetBits();
641
4.05k
    m_CABACEstimator->sao_offset_pars( modeParam[compIdx], compIdx, sliceEnabled[compIdx], bitDepths[CH_L] );
642
4.05k
    modeDist[compIdx] = 0;
643
4.05k
    minCost           = m_lambda[compIdx] * (FRAC_BITS_SCALE * m_CABACEstimator->getEstFracBits());
644
4.05k
    ctxBestLuma = SAOCtx( m_CABACEstimator->getCtx() );
645
4.05k
    if(sliceEnabled[compIdx])
646
4.05k
    {
647
24.3k
      for(int typeIdc=0; typeIdc< NUM_SAO_NEW_TYPES; typeIdc++)
648
20.2k
      {
649
20.2k
        testOffset[compIdx].modeIdc = SAO_MODE_NEW;
650
20.2k
        testOffset[compIdx].typeIdc = typeIdc;
651
652
        //derive coded offset
653
20.2k
        deriveOffsets(compIdx, bitDepths[CH_L], typeIdc, blkStats[ctuRsAddr][compIdx][typeIdc], testOffset[compIdx].offset, testOffset[compIdx].typeAuxInfo);
654
655
        //inversed quantized offsets
656
20.2k
        invertQuantOffsets(compIdx, typeIdc, testOffset[compIdx].typeAuxInfo, invQuantOffset, testOffset[compIdx].offset);
657
658
        //get distortion
659
20.2k
        dist[compIdx] = getDistortion(bitDepths[CH_L], testOffset[compIdx].typeIdc, testOffset[compIdx].typeAuxInfo, invQuantOffset, blkStats[ctuRsAddr][compIdx][typeIdc]);
660
661
        //get rate
662
20.2k
        m_CABACEstimator->getCtx() = SAOCtx( ctxStartLuma );
663
20.2k
        m_CABACEstimator->resetBits();
664
20.2k
        m_CABACEstimator->sao_offset_pars( testOffset[compIdx], compIdx, sliceEnabled[compIdx], bitDepths[CH_L] );
665
20.2k
        double rate = FRAC_BITS_SCALE * m_CABACEstimator->getEstFracBits();
666
20.2k
        cost = (double)dist[compIdx] + m_lambda[compIdx]*rate;
667
20.2k
        if(cost < minCost)
668
29
        {
669
29
          minCost = cost;
670
29
          modeDist[compIdx] = dist[compIdx];
671
29
          modeParam[compIdx]= testOffset[compIdx];
672
29
          ctxBestLuma = SAOCtx( m_CABACEstimator->getCtx() );
673
29
        }
674
20.2k
      }
675
4.05k
    }
676
4.05k
    m_CABACEstimator->getCtx() = SAOCtx( ctxBestLuma );
677
4.05k
  }
678
679
  //------ chroma --------//
680
//"off" case as initial cost
681
4.05k
  cost = 0;
682
4.05k
  previousFracBits = 0;
683
4.05k
  m_CABACEstimator->resetBits();
684
12.1k
  for(uint32_t componentIndex = COMP_Cb; componentIndex < numberOfComponents; componentIndex++)
685
8.10k
  {
686
8.10k
    const ComponentID component = ComponentID(componentIndex);
687
688
8.10k
    modeParam[component].modeIdc = SAO_MODE_OFF;
689
8.10k
    modeDist [component]         = 0;
690
8.10k
    m_CABACEstimator->sao_offset_pars( modeParam[component], component, sliceEnabled[component], bitDepths[CH_C] );
691
8.10k
    const uint64_t currentFracBits = m_CABACEstimator->getEstFracBits();
692
8.10k
    cost += m_lambda[component] * FRAC_BITS_SCALE * (currentFracBits - previousFracBits);
693
8.10k
    previousFracBits = currentFracBits;
694
8.10k
  }
695
696
4.05k
  minCost = cost;
697
698
  //doesn't need to store cabac status here since the whole CTU parameters will be re-encoded at the end of this function
699
700
24.3k
  for(int typeIdc=0; typeIdc< NUM_SAO_NEW_TYPES; typeIdc++)
701
20.2k
  {
702
20.2k
    m_CABACEstimator->getCtx() = SAOCtx( ctxBestLuma );
703
20.2k
    m_CABACEstimator->resetBits();
704
20.2k
    previousFracBits = 0;
705
20.2k
    cost = 0;
706
707
60.7k
    for(uint32_t componentIndex = COMP_Cb; componentIndex < numberOfComponents; componentIndex++)
708
40.5k
    {
709
40.5k
      const ComponentID component = ComponentID(componentIndex);
710
40.5k
      if(!sliceEnabled[component])
711
0
      {
712
0
        testOffset[component].modeIdc = SAO_MODE_OFF;
713
0
        dist[component]= 0;
714
0
        continue;
715
0
      }
716
40.5k
      testOffset[component].modeIdc = SAO_MODE_NEW;
717
40.5k
      testOffset[component].typeIdc = typeIdc;
718
719
      //derive offset & get distortion
720
40.5k
      deriveOffsets(component, bitDepths[CH_C], typeIdc, blkStats[ctuRsAddr][component][typeIdc], testOffset[component].offset, testOffset[component].typeAuxInfo);
721
40.5k
      invertQuantOffsets(component, typeIdc, testOffset[component].typeAuxInfo, invQuantOffset, testOffset[component].offset);
722
40.5k
      dist[component] = getDistortion(bitDepths[CH_C], typeIdc, testOffset[component].typeAuxInfo, invQuantOffset, blkStats[ctuRsAddr][component][typeIdc]);
723
40.5k
      m_CABACEstimator->sao_offset_pars( testOffset[component], component, sliceEnabled[component], bitDepths[CH_C] );
724
40.5k
      const uint64_t currentFracBits = m_CABACEstimator->getEstFracBits();
725
40.5k
      cost += dist[component] + (m_lambda[component] * FRAC_BITS_SCALE * (currentFracBits - previousFracBits));
726
40.5k
      previousFracBits = currentFracBits;
727
40.5k
    }
728
729
20.2k
    if(cost < minCost)
730
1
    {
731
1
      minCost = cost;
732
3
      for(uint32_t componentIndex = COMP_Cb; componentIndex < numberOfComponents; componentIndex++)
733
2
      {
734
2
        modeDist[componentIndex]  = dist[componentIndex];
735
2
        modeParam[componentIndex] = testOffset[componentIndex];
736
2
      }
737
1
    }
738
739
20.2k
  } // SAO_TYPE loop
740
741
  //----- re-gen rate & normalized cost----//
742
4.05k
  modeNormCost = 0;
743
16.2k
  for(uint32_t componentIndex = COMP_Y; componentIndex < numberOfComponents; componentIndex++)
744
12.1k
  {
745
12.1k
    modeNormCost += (double)modeDist[componentIndex] / m_lambda[componentIndex];
746
12.1k
  }
747
748
4.05k
  m_CABACEstimator->getCtx() = SAOCtx( ctxStartBlk );
749
4.05k
  m_CABACEstimator->resetBits();
750
4.05k
  m_CABACEstimator->sao_block_pars( modeParam, bitDepths, sliceEnabled, (mergeList[SAO_MERGE_LEFT]!= NULL), (mergeList[SAO_MERGE_ABOVE]!= NULL), false );
751
4.05k
  modeNormCost += FRAC_BITS_SCALE * m_CABACEstimator->getEstFracBits();
752
4.05k
}
753
754
void EncSampleAdaptiveOffset::deriveModeMergeRDO(const BitDepths &bitDepths, int ctuRsAddr, SAOBlkParam* mergeList[NUM_SAO_MERGE_TYPES], const bool* sliceEnabled, const std::vector<SAOStatData**>& blkStats, SAOBlkParam& modeParam, double& modeNormCost )
755
4.05k
{
756
4.05k
  modeNormCost = MAX_DOUBLE;
757
758
4.05k
  double cost;
759
4.05k
  SAOBlkParam testBlkParam;
760
4.05k
  const int numberOfComponents = m_numberOfComponents;
761
762
4.05k
  const TempCtx ctxStart  ( m_CtxCache, SAOCtx( m_CABACEstimator->getCtx() ) );
763
4.05k
  TempCtx       ctxBest   ( m_CtxCache );
764
765
12.1k
  for(int mergeType=0; mergeType< NUM_SAO_MERGE_TYPES; mergeType++)
766
8.10k
  {
767
8.10k
    if(mergeList[mergeType] == NULL)
768
4.50k
    {
769
4.50k
      continue;
770
4.50k
    }
771
772
3.60k
    testBlkParam = *(mergeList[mergeType]);
773
    //normalized distortion
774
3.60k
    double normDist=0;
775
14.4k
    for(int compIdx = 0; compIdx < numberOfComponents; compIdx++)
776
10.8k
    {
777
10.8k
      testBlkParam[compIdx].modeIdc = SAO_MODE_MERGE;
778
10.8k
      testBlkParam[compIdx].typeIdc = mergeType;
779
780
10.8k
      SAOOffset& mergedOffsetParam = (*(mergeList[mergeType]))[compIdx];
781
782
10.8k
      if( mergedOffsetParam.modeIdc != SAO_MODE_OFF)
783
14
      {
784
        //offsets have been reconstructed. Don't call inversed quantization function.
785
14
        normDist += (((double)getDistortion(bitDepths[toChannelType(ComponentID(compIdx))], mergedOffsetParam.typeIdc, mergedOffsetParam.typeAuxInfo, mergedOffsetParam.offset, blkStats[ctuRsAddr][compIdx][mergedOffsetParam.typeIdc]))
786
14
                       /m_lambda[compIdx] );
787
14
      }
788
10.8k
    }
789
790
    //rate
791
3.60k
    m_CABACEstimator->getCtx() = SAOCtx( ctxStart );
792
3.60k
    m_CABACEstimator->resetBits();
793
3.60k
    m_CABACEstimator->sao_block_pars( testBlkParam, bitDepths, sliceEnabled, (mergeList[SAO_MERGE_LEFT]!= NULL), (mergeList[SAO_MERGE_ABOVE]!= NULL), false );
794
3.60k
    double rate = FRAC_BITS_SCALE * m_CABACEstimator->getEstFracBits();
795
3.60k
    cost = normDist+rate;
796
797
3.60k
    if(cost < modeNormCost)
798
2.75k
    {
799
2.75k
      modeNormCost = cost;
800
2.75k
      modeParam    = testBlkParam;
801
2.75k
      ctxBest      = SAOCtx( m_CABACEstimator->getCtx() );
802
2.75k
    }
803
3.60k
  }
804
4.05k
  if( modeNormCost < MAX_DOUBLE )
805
2.75k
  {
806
2.75k
    m_CABACEstimator->getCtx() = SAOCtx( ctxBest );
807
2.75k
  }
808
4.05k
}
809
810
void EncSampleAdaptiveOffset::getBlkStats(const ComponentID compIdx, const int channelBitDepth, SAOStatData* statsDataTypes
811
                        , Pel* srcBlk, Pel* orgBlk, int srcStride, int orgStride, int width, int height
812
                        , bool isLeftAvail,  bool isRightAvail, bool isAboveAvail, bool isBelowAvail, bool isAboveLeftAvail, bool isAboveRightAvail )
813
12.1k
{
814
12.1k
  int x, startX, startY, endX, endY, edgeType, firstLineStartX, firstLineEndX;
815
12.1k
  int64_t *diff, *count;
816
12.1k
  Pel* srcLine, *orgLine;
817
12.1k
  const int skipLinesR = compIdx == COMP_Y ? 5 : 3;
818
12.1k
  const int skipLinesB = compIdx == COMP_Y ? 4 : 2;
819
820
72.9k
  for(int typeIdx=0; typeIdx< NUM_SAO_NEW_TYPES; typeIdx++)
821
60.7k
  {
822
60.7k
    SAOStatData& statsData= statsDataTypes[typeIdx];
823
60.7k
    statsData.reset();
824
60.7k
    srcLine = srcBlk;
825
60.7k
    orgLine = orgBlk;
826
60.7k
    diff    = statsData.diff;
827
60.7k
    count   = statsData.count;
828
60.7k
    switch(typeIdx)
829
60.7k
    {
830
12.1k
    case SAO_TYPE_EO_0:
831
12.1k
      {
832
12.1k
        endY   =  isBelowAvail ? (height - skipLinesB) : height;
833
12.1k
        startX = (isLeftAvail  ? 0 : 1);
834
12.1k
        endX   = (isRightAvail ? (width - skipLinesR) : (width - 1));
835
12.1k
        calcSaoStatisticsEo0(width,startX,endX,endY,srcLine,orgLine,srcStride,orgStride,count,diff);
836
12.1k
      }
837
12.1k
      break;
838
12.1k
    case SAO_TYPE_EO_90:
839
12.1k
      {
840
12.1k
        int8_t *signUpLine = &m_signLineBuf1[0];
841
12.1k
        startX = 0;
842
12.1k
        startY = isAboveAvail ? 0 : 1;
843
12.1k
        endX   = (isRightAvail ? (width - skipLinesR) : width);
844
12.1k
        endY   = isBelowAvail ? (height - skipLinesB) : (height - 1);
845
12.1k
        if (!isAboveAvail)
846
6.70k
        {
847
6.70k
          srcLine += srcStride;
848
6.70k
          orgLine += orgStride;
849
6.70k
        }
850
12.1k
        calcSaoStatisticsEo90(width,endX,startY,endY,srcLine,orgLine,srcStride,orgStride,count,diff,signUpLine);
851
12.1k
      }
852
12.1k
      break;
853
12.1k
    case SAO_TYPE_EO_135:
854
12.1k
      {
855
12.1k
        diff +=2;
856
12.1k
        count+=2;
857
12.1k
        int8_t *signUpLine, *signDownLine;
858
12.1k
        signUpLine  = &m_signLineBuf1[0];
859
12.1k
        signDownLine= &m_signLineBuf2[0];
860
12.1k
        startX = isLeftAvail  ? 0 : 1;
861
12.1k
        endX   = isRightAvail ? (width - skipLinesR): (width - 1);
862
12.1k
        endY   = isBelowAvail ? (height - skipLinesB) : (height - 1);
863
        //prepare 2nd line's upper sign
864
12.1k
        Pel* srcLineBelow = srcLine + srcStride;
865
699k
        for (x=startX; x<endX+1; x++)
866
687k
        {
867
687k
          signUpLine[x] = (int8_t)sgn(srcLineBelow[x] - srcLine[x-1]);
868
687k
        }
869
        //1st line
870
12.1k
        Pel* srcLineAbove = srcLine - srcStride;
871
12.1k
        firstLineStartX = isAboveLeftAvail ? 0    : 1;
872
12.1k
        firstLineEndX   = isAboveAvail     ? endX : 1;
873
319k
        for(x=firstLineStartX; x<firstLineEndX; x++)
874
307k
        {
875
307k
          edgeType = sgn(srcLine[x] - srcLineAbove[x-1]) - signUpLine[x+1];
876
307k
          diff [edgeType] += (orgLine[x] - srcLine[x]);
877
307k
          count[edgeType] ++;
878
307k
        }
879
12.1k
        srcLine  += srcStride;
880
12.1k
        orgLine  += orgStride;
881
12.1k
        calcSaoStatisticsEo135(width,startX,endX,endY,srcLine,orgLine,srcStride,orgStride,count,diff,signUpLine,signDownLine);
882
12.1k
      }
883
12.1k
      break;
884
12.1k
    case SAO_TYPE_EO_45:
885
12.1k
      {
886
12.1k
        diff +=2;
887
12.1k
        count+=2;
888
12.1k
        int8_t *signUpLine = &m_signLineBuf1[1];
889
890
12.1k
        startX = isLeftAvail  ? 0 : 1;
891
12.1k
        endX   = isRightAvail ? (width - skipLinesR) : (width - 1);
892
12.1k
        endY   = isBelowAvail ? (height - skipLinesB) : (height - 1);
893
894
        //prepare 2nd line upper sign
895
12.1k
        Pel* srcLineBelow = srcLine + srcStride;
896
699k
        for (x=startX-1; x<endX; x++)
897
687k
        {
898
687k
          signUpLine[x] = (int8_t)sgn(srcLineBelow[x] - srcLine[x+1]);
899
687k
        }
900
        //first line
901
12.1k
        Pel* srcLineAbove = srcLine - srcStride;
902
12.1k
        firstLineStartX = isAboveAvail ? startX : endX;
903
12.1k
        firstLineEndX   = (!isRightAvail && isAboveRightAvail) ? width : endX;
904
319k
        for(x=firstLineStartX; x<firstLineEndX; x++)
905
307k
        {
906
307k
          edgeType = sgn(srcLine[x] - srcLineAbove[x+1]) - signUpLine[x-1];
907
307k
          diff [edgeType] += (orgLine[x] - srcLine[x]);
908
307k
          count[edgeType] ++;
909
307k
        }
910
12.1k
        srcLine += srcStride;
911
12.1k
        orgLine += orgStride;
912
12.1k
        calcSaoStatisticsEo45(width,startX,endX,endY,srcLine,orgLine,srcStride,orgStride,count,diff,signUpLine);
913
12.1k
      }
914
12.1k
      break;
915
12.1k
    case SAO_TYPE_BO:
916
12.1k
      {
917
12.1k
        startX = 0;
918
12.1k
        endX   = isRightAvail ? (width - skipLinesR) : width;
919
12.1k
        endY   = isBelowAvail ? (height- skipLinesB) : height;
920
12.1k
        calcSaoStatisticsBo(width,endX,endY,srcLine,orgLine,srcStride,orgStride,channelBitDepth,count,diff);
921
12.1k
      }
922
12.1k
      break;
923
0
    default:
924
0
      {
925
0
        THROW("Not a supported SAO type");
926
0
      }
927
60.7k
    }
928
60.7k
  }
929
12.1k
}
930
931
void EncSampleAdaptiveOffset::deriveLoopFilterBoundaryAvailibility(CodingStructure& cs, const Position& pos, bool& isLeftAvail, bool& isAboveAvail, bool& isAboveLeftAvail) const
932
4.05k
{
933
4.05k
  const bool isLoopFiltAcrossSlicePPS = cs.pps->loopFilterAcrossSlicesEnabled;
934
4.05k
  const bool isLoopFiltAcrossTilePPS = cs.pps->loopFilterAcrossTilesEnabled;
935
936
4.05k
  const int width = cs.pcv->maxCUSize;
937
4.05k
  const int height = cs.pcv->maxCUSize;
938
4.05k
  const CodingUnit* cuCurr = cs.getCU(pos, CH_L, TREE_D);
939
4.05k
  const int ctuX = pos.x >> cs.pcv->maxCUSizeLog2;
940
4.05k
  const int ctuY = pos.y >> cs.pcv->maxCUSizeLog2;
941
4.05k
  const PPS* pps = cs.slice->pps;
942
4.05k
  const CodingUnit* cuLeft      = ctuX > 0 &&             pps->canFilterCtuBdry( ctuX, ctuY, -1, 0 ) ? cs.getCU(pos.offset(-width, 0), CH_L, TREE_D): nullptr;
943
4.05k
  const CodingUnit* cuAbove     = ctuY > 0 &&             pps->canFilterCtuBdry( ctuX, ctuY, 0, -1 ) ? cs.getCU(pos.offset(0, -height), CH_L, TREE_D): nullptr;
944
4.05k
  const CodingUnit* cuAboveLeft = ctuY > 0 && ctuX > 0 && pps->canFilterCtuBdry( ctuX, ctuY, -1,-1 ) ? cs.getCU(pos.offset(-width, -height), CH_L, TREE_D): nullptr;
945
946
4.05k
  if (!isLoopFiltAcrossSlicePPS)
947
0
  {
948
0
    isLeftAvail      = (cuLeft == NULL)      ? false : CU::isSameSlice(*cuCurr, *cuLeft);
949
0
    isAboveAvail     = (cuAbove == NULL)     ? false : CU::isSameSlice(*cuCurr, *cuAbove);
950
0
    isAboveLeftAvail = (cuAboveLeft == NULL) ? false : CU::isSameSlice(*cuCurr, *cuAboveLeft);
951
0
  }
952
4.05k
  else
953
4.05k
  {
954
4.05k
    isLeftAvail      = (cuLeft != NULL);
955
4.05k
    isAboveAvail     = (cuAbove != NULL);
956
4.05k
    isAboveLeftAvail = (cuAboveLeft != NULL);
957
4.05k
  }
958
959
4.05k
  if (!isLoopFiltAcrossTilePPS)
960
0
  {
961
0
    isLeftAvail      = (!isLeftAvail)      ? false : CU::isSameTile(*cuCurr, *cuLeft);
962
0
    isAboveAvail     = (!isAboveAvail)     ? false : CU::isSameTile(*cuCurr, *cuAbove);
963
0
    isAboveLeftAvail = (!isAboveLeftAvail) ? false : CU::isSameTile(*cuCurr, *cuAboveLeft);
964
0
  }
965
966
967
4.05k
  SubPic curSubPic = cs.pps->getSubPicFromCU(*cuCurr);
968
4.05k
  if (!curSubPic.loopFilterAcrossSubPicEnabled )
969
0
  {
970
0
    isLeftAvail      = (!isLeftAvail)      ? false : CU::isSameSubPic(*cuCurr, *cuLeft);
971
0
    isAboveAvail     = (!isAboveAvail)     ? false : CU::isSameSubPic(*cuCurr, *cuAbove);
972
0
    isAboveLeftAvail = (!isAboveLeftAvail) ? false : CU::isSameSubPic(*cuCurr, *cuAboveLeft);
973
0
  }
974
975
4.05k
}
976
977
} // namespace vvenc
978
979
//! \}
980