Coverage Report

Created: 2026-06-10 07:00

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/work/vvenc/source/Lib/EncoderLib/EncSampleAdaptiveOffset.cpp
Line
Count
Source
1
/* -----------------------------------------------------------------------------
2
The copyright in this software is being made available under the Clear BSD
3
License, included below. No patent rights, trademark rights and/or 
4
other Intellectual Property Rights other than the copyrights concerning 
5
the Software are granted under this license.
6
7
The Clear BSD License
8
9
Copyright (c) 2019-2026, Fraunhofer-Gesellschaft zur Förderung der angewandten Forschung e.V. & The VVenC Authors.
10
All rights reserved.
11
12
Redistribution and use in source and binary forms, with or without modification,
13
are permitted (subject to the limitations in the disclaimer below) provided that
14
the following conditions are met:
15
16
     * Redistributions of source code must retain the above copyright notice,
17
     this list of conditions and the following disclaimer.
18
19
     * Redistributions in binary form must reproduce the above copyright
20
     notice, this list of conditions and the following disclaimer in the
21
     documentation and/or other materials provided with the distribution.
22
23
     * Neither the name of the copyright holder nor the names of its
24
     contributors may be used to endorse or promote products derived from this
25
     software without specific prior written permission.
26
27
NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE GRANTED BY
28
THIS LICENSE. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
29
CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
30
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
31
PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
32
CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
33
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
34
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
35
BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
36
IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
37
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
38
POSSIBILITY OF SUCH DAMAGE.
39
40
41
------------------------------------------------------------------------------------------- */
42
43
44
/**
45
 \file     EncSampleAdaptiveOffset.cpp
46
 \brief       estimation part of sample adaptive offset class
47
 */
48
49
#include "EncSampleAdaptiveOffset.h"
50
#include "CommonLib/UnitTools.h"
51
#include "CommonLib/dtrace_codingstruct.h"
52
#include "CommonLib/dtrace_buffer.h"
53
#include "CommonLib/CodingStructure.h"
54
#include <string.h>
55
#include <stdlib.h>
56
#include <math.h>
57
#include "vvenc/vvencCfg.h"
58
59
//! \ingroup EncoderLib
60
//! \{
61
62
namespace vvenc {
63
64
65
79.4k
#define SAOCtx(c) SubCtx( Ctx::Sao, c )
66
67
68
//! rounding with IBDI
69
inline double xRoundIbdi2(int bitDepth, double x)
70
0
{
71
0
  return ((x) >= 0 ? ((int)((x) + 0.5)) : ((int)((x) -0.5)));
72
0
}
73
74
inline double xRoundIbdi(int bitDepth, double x)
75
12.7k
{
76
12.7k
  return (bitDepth > 8 ? xRoundIbdi2(bitDepth, (x)) : ((x)>=0 ? ((int)((x)+0.5)) : ((int)((x)-0.5)))) ;
77
12.7k
}
78
79
80
EncSampleAdaptiveOffset::EncSampleAdaptiveOffset()
81
7.81k
  : m_CABACEstimator( nullptr )
82
7.81k
  , m_CtxCache      ( nullptr )
83
7.81k
{
84
7.81k
}
85
86
EncSampleAdaptiveOffset::~EncSampleAdaptiveOffset()
87
7.81k
{
88
7.81k
}
89
90
void EncSampleAdaptiveOffset::init( const VVEncCfg& encCfg )
91
7.81k
{
92
7.81k
  m_EncCfg = &encCfg;
93
94
7.81k
  if ( encCfg.m_bUseSAO )
95
7.81k
  {
96
7.81k
    SampleAdaptiveOffset::init( encCfg.m_internChromaFormat, encCfg.m_CTUSize, encCfg.m_CTUSize, encCfg.m_log2SaoOffsetScale[CH_L], encCfg.m_log2SaoOffsetScale[CH_C] );
97
7.81k
  }
98
7.81k
}
99
100
void EncSampleAdaptiveOffset::initSlice( const Slice* slice )
101
1.95k
{
102
1.95k
  memcpy( m_lambda, slice->getLambdas(), sizeof( m_lambda ) );
103
1.95k
}
104
105
void EncSampleAdaptiveOffset::setCtuEncRsrc( CABACWriter* cabacEstimator, CtxCache* ctxCache )
106
3.45k
{
107
3.45k
  m_CABACEstimator = cabacEstimator;
108
3.45k
  m_CtxCache       = ctxCache;
109
3.45k
}
110
111
void EncSampleAdaptiveOffset::disabledRate( CodingStructure& cs, double saoDisabledRate[ MAX_NUM_COMP ][ VVENC_MAX_TLAYER ], SAOBlkParam* reconParams, const double saoEncodingRate, const double saoEncodingRateChroma, const ChromaFormat& chromaFormat )
112
0
{
113
0
  if ( saoEncodingRate > 0.0 )
114
0
  {
115
0
    const PreCalcValues& pcv     = *cs.pcv;
116
0
    const int numberOfComponents = getNumberValidComponents( chromaFormat );
117
0
    const int picTempLayer       = cs.slice->TLayer;
118
0
    int numCtusForSAOOff[MAX_NUM_COMP];
119
120
0
    for (int compIdx = 0; compIdx < numberOfComponents; compIdx++)
121
0
    {
122
0
      numCtusForSAOOff[compIdx] = 0;
123
0
      for( int ctuRsAddr=0; ctuRsAddr< pcv.sizeInCtus; ctuRsAddr++)
124
0
      {
125
0
        if( reconParams[ctuRsAddr][compIdx].modeIdc == SAO_MODE_OFF)
126
0
        {
127
0
          numCtusForSAOOff[compIdx]++;
128
0
        }
129
0
      }
130
0
    }
131
0
    if (saoEncodingRateChroma > 0.0)
132
0
    {
133
0
      for (int compIdx = 0; compIdx < numberOfComponents; compIdx++)
134
0
      {
135
0
        saoDisabledRate[compIdx][picTempLayer] = (double)numCtusForSAOOff[compIdx]/(double)pcv.sizeInCtus;
136
0
      }
137
0
    }
138
0
    else if (picTempLayer == 0)
139
0
    {
140
0
      saoDisabledRate[COMP_Y][0] = (double)(numCtusForSAOOff[COMP_Y]+numCtusForSAOOff[COMP_Cb]+numCtusForSAOOff[COMP_Cr])/(double)(pcv.sizeInCtus *3);
141
0
    }
142
0
  }
143
0
}
144
145
void EncSampleAdaptiveOffset::decidePicParams( const CodingStructure& cs, double saoDisabledRate[ MAX_NUM_COMP ][ VVENC_MAX_TLAYER ], bool saoEnabled[ MAX_NUM_COMP ], const double saoEncodingRate, const double saoEncodingRateChroma, const ChromaFormat& chromaFormat )
146
1.11k
{
147
1.11k
  const Slice& slice           = *cs.slice;
148
1.11k
  const int numberOfComponents = getNumberValidComponents( chromaFormat );
149
150
  // reset
151
1.11k
  if( slice.pendingRasInit )
152
0
  {
153
0
    for( int compIdx = 0; compIdx < MAX_NUM_COMP; compIdx++ )
154
0
    {
155
0
      for( int tempLayer = 1; tempLayer < VVENC_MAX_TLAYER; tempLayer++ )
156
0
      {
157
0
        saoDisabledRate[ compIdx ][ tempLayer ] = 0.0;
158
0
      }
159
0
    }
160
0
  }
161
162
4.44k
  for( int compIdx = 0; compIdx < MAX_NUM_COMP; compIdx++ )
163
3.33k
  {
164
3.33k
    saoEnabled[ compIdx ] = false;
165
3.33k
  }
166
167
1.11k
  const int picTempLayer = slice.TLayer;
168
4.44k
  for( int compIdx = 0; compIdx < numberOfComponents; compIdx++ )
169
3.33k
  {
170
    // enable per default
171
3.33k
    saoEnabled[ compIdx ] = true;
172
173
3.33k
    if( saoEncodingRate > 0.0 )
174
0
    {
175
0
      if( saoEncodingRateChroma > 0.0 )
176
0
      {
177
        // decide slice-level on/off based on previous results
178
0
        if( ( picTempLayer > 0 )
179
0
          && ( saoDisabledRate[ compIdx ][ picTempLayer - 1 ] > ( ( compIdx == COMP_Y ) ? saoEncodingRate : saoEncodingRateChroma ) ) )
180
0
        {
181
0
          saoEnabled[ compIdx ] = false;
182
0
        }
183
0
      }
184
0
      else
185
0
      {
186
        // decide slice-level on/off based on previous results
187
0
        if( ( picTempLayer > 0 )
188
0
          && ( saoDisabledRate[ COMP_Y ][ 0 ] > saoEncodingRate ) )
189
0
        {
190
0
          saoEnabled[ compIdx ] = false;
191
0
        }
192
0
      }
193
0
    }
194
3.33k
  }
195
1.11k
}
196
197
void EncSampleAdaptiveOffset::storeCtuReco( CodingStructure& cs, const UnitArea& ctuArea, const int ctuX, const int ctuY )
198
3.45k
{
199
3.45k
  const int STORE_CTU_INCREASE = 8;
200
3.45k
  Position lPos( ctuArea.lx() + STORE_CTU_INCREASE, ctuArea.ly() + STORE_CTU_INCREASE );
201
3.45k
  Size    lSize( ctuArea.lwidth(), ctuArea.lheight() );
202
203
3.45k
  const bool tileBdryClip = cs.pps->getNumTiles() > 1 && !cs.pps->loopFilterAcrossTilesEnabled;
204
3.45k
  int startX = 0;
205
3.45k
  int startY = 0;
206
3.45k
  if( tileBdryClip )  
207
0
  {
208
0
    startX = cs.pps->tileColBd[cs.pps->ctuToTileCol[ctuX]] << cs.pcv->maxCUSizeLog2;
209
0
    startY = cs.pps->tileRowBd[cs.pps->ctuToTileRow[ctuY]] << cs.pcv->maxCUSizeLog2;
210
0
  }
211
212
3.45k
  if ( ctuArea.lx() == startX )
213
1.95k
  {
214
1.95k
    lPos.x       = ctuArea.lx();
215
1.95k
    lSize.width += STORE_CTU_INCREASE;
216
1.95k
  }
217
3.45k
  if ( ctuArea.ly() == startY )
218
1.89k
  {
219
1.89k
    lPos.y        = ctuArea.ly();
220
1.89k
    lSize.height += STORE_CTU_INCREASE;
221
1.89k
  }
222
223
3.45k
  int clipX = cs.pcv->lumaWidth  - lPos.x;
224
3.45k
  int clipY = cs.pcv->lumaHeight - lPos.y;
225
3.45k
  if( tileBdryClip )  
226
0
  {
227
0
    clipX  = cs.pps->tileColBdRgt[cs.pps->ctuToTileCol[ctuX]] - lPos.x;
228
0
    clipY  = cs.pps->tileRowBdBot[cs.pps->ctuToTileRow[ctuY]] - lPos.y;
229
0
  }
230
3.45k
  lSize.clipSize( clipX, clipY );
231
232
3.45k
  const UnitArea relocArea( ctuArea.chromaFormat, Area( lPos, lSize ) );
233
3.45k
  Picture& pic       = *cs.picture;
234
3.45k
  PelUnitBuf recoYuv = pic.getRecoBuf().subBuf( relocArea );
235
3.45k
  PelUnitBuf tempYuv = pic.getSaoBuf().subBuf( relocArea );
236
3.45k
  tempYuv.copyFrom( recoYuv );
237
3.45k
}
238
239
void EncSampleAdaptiveOffset::getCtuStatistics( CodingStructure& cs, std::vector<SAOStatData**>& saoStatistics, const UnitArea& ctuArea, const int ctuRsAddr )
240
3.46k
{
241
3.46k
  const PreCalcValues& pcv     = *cs.pcv;
242
3.46k
  const int numberOfComponents = getNumberValidComponents( pcv.chrFormat );
243
3.46k
  bool isLeftAvail             = false;
244
3.46k
  bool isRightAvail            = false;
245
3.46k
  bool isAboveAvail            = false;
246
3.46k
  bool isBelowAvail            = false;
247
3.46k
  bool isAboveLeftAvail        = false;
248
3.46k
  bool isAboveRightAvail       = false;
249
250
3.46k
  deriveLoopFilterBoundaryAvailibility( cs, ctuArea.Y(), isLeftAvail, isAboveAvail, isAboveLeftAvail );
251
252
  // NOTE: The number of skipped lines during gathering CTU statistics depends on the slice boundary availabilities.
253
  // For simplicity, here only picture boundaries are considered.
254
255
3.46k
  isRightAvail      = ( ctuArea.Y().x + pcv.maxCUSize < pcv.lumaWidth  );
256
3.46k
  isBelowAvail      = ( ctuArea.Y().y + pcv.maxCUSize < pcv.lumaHeight );
257
3.46k
  isAboveRightAvail = ( ( ctuArea.Y().y > 0 ) && ( isRightAvail ) );
258
259
3.46k
  CHECK( !cs.pps->loopFilterAcrossSlicesEnabled, "Not implemented" );
260
3.46k
  if( cs.pps->getNumTiles() > 1 && !cs.pps->loopFilterAcrossTilesEnabled )
261
0
  {
262
0
    const int ctuX    = ctuArea.lx() >> cs.pcv->maxCUSizeLog2;
263
0
    const int ctuY    = ctuArea.ly() >> cs.pcv->maxCUSizeLog2;
264
0
    isRightAvail      = isRightAvail      && cs.pps->canFilterCtuBdry( ctuX, ctuY,  1, 0 );
265
0
    isBelowAvail      = isBelowAvail      && cs.pps->canFilterCtuBdry( ctuX, ctuY,  0, 1 );
266
0
    isAboveRightAvail = isAboveRightAvail && cs.pps->canFilterCtuBdry( ctuX, ctuY,  1,-1 );
267
0
  }
268
269
  //VirtualBoundaries vb;
270
  //bool isCtuCrossedByVirtualBoundaries = vb.isCrossedByVirtualBoundaries(xPos, yPos, width, height, cs.slice->pps);
271
272
13.8k
  for( int compIdx = 0; compIdx < numberOfComponents; compIdx++ )
273
10.3k
  {
274
10.3k
    const ComponentID compID = ComponentID( compIdx );
275
10.3k
    const CompArea& compArea = ctuArea.block( compID );
276
277
10.3k
    PelBuf srcBuf = cs.picture->getSaoBuf().get( compID );
278
10.3k
    PelBuf orgBuf = cs.picture->getOrigBuf().get( compID );
279
280
10.3k
    getBlkStats( compID,
281
10.3k
                 cs.sps->bitDepths[ toChannelType( compID ) ],
282
10.3k
                 saoStatistics[ ctuRsAddr ][ compID ],
283
10.3k
                 srcBuf.bufAt( compArea ),
284
10.3k
                 orgBuf.bufAt( compArea ),
285
10.3k
                 srcBuf.stride,
286
10.3k
                 orgBuf.stride,
287
10.3k
                 compArea.width,
288
10.3k
                 compArea.height,
289
10.3k
                 isLeftAvail, isRightAvail, isAboveAvail, isBelowAvail, isAboveLeftAvail, isAboveRightAvail
290
10.3k
               );
291
10.3k
  }
292
3.46k
}
293
294
void EncSampleAdaptiveOffset::getStatistics(std::vector<SAOStatData**>& blkStats, PelUnitBuf& orgYuv, PelUnitBuf& srcYuv, CodingStructure& cs )
295
0
{
296
0
  bool isLeftAvail, isRightAvail, isAboveAvail, isBelowAvail, isAboveLeftAvail, isAboveRightAvail;
297
298
0
  const PreCalcValues& pcv = *cs.pcv;
299
0
  const int numberOfComponents = getNumberValidComponents(pcv.chrFormat);
300
301
0
  size_t lineBufferSize = pcv.maxCUSize + 1;
302
0
  if (m_signLineBuf1.size() != lineBufferSize)
303
0
  {
304
0
    m_signLineBuf1.resize(lineBufferSize);
305
0
    m_signLineBuf2.resize(lineBufferSize);
306
0
  }
307
308
0
  int ctuRsAddr = 0;
309
0
  for( uint32_t yPos = 0; yPos < pcv.lumaHeight; yPos += pcv.maxCUSize )
310
0
  {
311
0
    for( uint32_t xPos = 0; xPos < pcv.lumaWidth; xPos += pcv.maxCUSize )
312
0
    {
313
0
      const uint32_t width  = (xPos + pcv.maxCUSize  > pcv.lumaWidth)  ? (pcv.lumaWidth - xPos)  : pcv.maxCUSize;
314
0
      const uint32_t height = (yPos + pcv.maxCUSize > pcv.lumaHeight) ? (pcv.lumaHeight - yPos) : pcv.maxCUSize;
315
0
      const UnitArea area( cs.area.chromaFormat, Area(xPos , yPos, width, height) );
316
317
0
      deriveLoopFilterBoundaryAvailibility(cs, area.Y(), isLeftAvail, isAboveAvail, isAboveLeftAvail );
318
319
      //NOTE: The number of skipped lines during gathering CTU statistics depends on the slice boundary availabilities.
320
      //For simplicity, here only picture boundaries are considered.
321
322
0
      isRightAvail      = (xPos + pcv.maxCUSize  < pcv.lumaWidth );
323
0
      isBelowAvail      = (yPos + pcv.maxCUSize < pcv.lumaHeight);
324
0
      isAboveRightAvail = ((yPos > 0) && (isRightAvail));
325
326
0
      for(int compIdx = 0; compIdx < numberOfComponents; compIdx++)
327
0
      {
328
0
        const ComponentID compID = ComponentID(compIdx);
329
0
        const CompArea& compArea = area.block( compID );
330
331
0
        int  srcStride  = srcYuv.get(compID).stride;
332
0
        Pel* srcBlk     = srcYuv.get(compID).bufAt( compArea );
333
334
0
        int  orgStride  = orgYuv.get(compID).stride;
335
0
        Pel* orgBlk     = orgYuv.get(compID).bufAt( compArea );
336
337
0
        getBlkStats(compID, cs.sps->bitDepths[toChannelType(compID)], blkStats[ctuRsAddr][compID]
338
0
                  , srcBlk, orgBlk, srcStride, orgStride, compArea.width, compArea.height
339
0
                  , isLeftAvail,  isRightAvail, isAboveAvail, isBelowAvail, isAboveLeftAvail, isAboveRightAvail );
340
0
      }
341
0
      ctuRsAddr++;
342
0
    }
343
0
  }
344
0
}
345
346
void EncSampleAdaptiveOffset::decideCtuParams( CodingStructure& cs, const std::vector<SAOStatData**>& saoStatistics, const bool saoEnabled[ MAX_NUM_COMP ], const bool allBlksDisabled, const UnitArea& ctuArea, const int ctuRsAddr, SAOBlkParam* reconParams, SAOBlkParam* codedParams )
347
3.46k
{
348
3.46k
  const PreCalcValues& pcv = *cs.pcv;
349
3.46k
  const Slice& slice       = *cs.slice;
350
3.46k
  const int  ctuPosX       = ctuRsAddr % pcv.widthInCtus;
351
3.46k
  const int  ctuPosY       = ctuRsAddr / pcv.widthInCtus;
352
353
  // reset CABAC estimator
354
3.46k
  if( m_EncCfg->m_ensureWppBitEqual
355
3.46k
      && m_EncCfg->m_numThreads < 1
356
0
      && ctuPosX == 0
357
0
      && ctuPosY > 0 )
358
0
  {
359
0
    m_CABACEstimator->initCtxModels( slice );
360
0
  }
361
362
  // check disabled
363
3.46k
  if( allBlksDisabled )
364
0
  {
365
0
    codedParams[ ctuRsAddr ].reset();
366
0
    return;
367
0
  }
368
369
  // get merge list
370
3.46k
  SAOBlkParam* mergeList[ NUM_SAO_MERGE_TYPES ] = { NULL };
371
3.46k
  getMergeList( cs, ctuRsAddr, reconParams, mergeList );
372
373
3.46k
  const TempCtx ctxStart( m_CtxCache, SAOCtx( m_CABACEstimator->getCtx() ) );
374
3.46k
  TempCtx       ctxBest ( m_CtxCache );
375
376
3.46k
  SAOBlkParam modeParam;
377
3.46k
  double minCost  = MAX_DOUBLE;
378
3.46k
  double modeCost = MAX_DOUBLE;
379
10.3k
  for( int mode = 1; mode < NUM_SAO_MODES; mode++ )
380
6.92k
  {
381
6.92k
    if( mode > 1 )
382
3.46k
    {
383
3.46k
      m_CABACEstimator->getCtx() = SAOCtx( ctxStart );
384
3.46k
    }
385
6.92k
    switch( mode )
386
6.92k
    {
387
3.46k
    case SAO_MODE_NEW:
388
3.46k
      {
389
3.46k
        deriveModeNewRDO( cs.sps->bitDepths, ctuRsAddr, mergeList, saoEnabled, saoStatistics, modeParam, modeCost );
390
3.46k
      }
391
3.46k
      break;
392
3.46k
    case SAO_MODE_MERGE:
393
3.46k
      {
394
3.46k
        deriveModeMergeRDO( cs.sps->bitDepths, ctuRsAddr, mergeList, saoEnabled, saoStatistics, modeParam, modeCost );
395
3.46k
      }
396
3.46k
      break;
397
0
    default:
398
0
      {
399
0
        THROW( "Not a supported SAO mode." );
400
0
      }
401
6.92k
    }
402
403
6.92k
    if( modeCost < minCost )
404
5.81k
    {
405
5.81k
      minCost                  = modeCost;
406
5.81k
      codedParams[ ctuRsAddr ] = modeParam;
407
5.81k
      ctxBest                  = SAOCtx( m_CABACEstimator->getCtx() );
408
5.81k
    }
409
6.92k
  }
410
411
  // apply reconstructed offsets
412
3.46k
  m_CABACEstimator->getCtx() = SAOCtx( ctxBest );
413
3.46k
  reconParams[ ctuRsAddr ] = codedParams[ ctuRsAddr ];
414
415
3.46k
  reconstructBlkSAOParam( reconParams[ ctuRsAddr ], mergeList );
416
417
3.46k
  Picture& pic = *cs.picture;
418
3.46k
  offsetCTU( ctuArea, pic.getSaoBuf(), cs.getRecoBuf(), reconParams[ ctuRsAddr ], cs );
419
3.46k
}
420
421
int64_t EncSampleAdaptiveOffset::getDistortion(const int channelBitDepth, int typeIdc, int typeAuxInfo, int* invQuantOffset, SAOStatData& statData)
422
51.9k
{
423
51.9k
  int64_t dist        = 0;
424
51.9k
  int shift = 2 * DISTORTION_PRECISION_ADJUSTMENT(channelBitDepth);
425
426
51.9k
  switch(typeIdc)
427
51.9k
  {
428
10.3k
  case SAO_TYPE_EO_0:
429
20.7k
  case SAO_TYPE_EO_90:
430
31.1k
  case SAO_TYPE_EO_135:
431
41.5k
  case SAO_TYPE_EO_45:
432
41.5k
    {
433
249k
      for (int offsetIdx=0; offsetIdx<NUM_SAO_EO_CLASSES; offsetIdx++)
434
207k
      {
435
207k
        dist += estSaoDist( statData.count[offsetIdx], invQuantOffset[offsetIdx], statData.diff[offsetIdx], shift);
436
207k
      }
437
41.5k
    }
438
41.5k
    break;
439
10.4k
  case SAO_TYPE_BO:
440
10.4k
    {
441
52.0k
      for (int offsetIdx=typeAuxInfo; offsetIdx<typeAuxInfo+4; offsetIdx++)
442
41.6k
      {
443
41.6k
        int bandIdx = offsetIdx % NUM_SAO_BO_CLASSES ;
444
41.6k
        dist += estSaoDist( statData.count[bandIdx], invQuantOffset[bandIdx], statData.diff[bandIdx], shift);
445
41.6k
      }
446
10.4k
    }
447
10.4k
    break;
448
0
  default:
449
0
    {
450
0
      THROW("Not a supported type");
451
31.1k
    }
452
51.9k
  }
453
454
51.9k
  return dist;
455
51.9k
}
456
457
inline int64_t EncSampleAdaptiveOffset::estSaoDist(int64_t count, int64_t offset, int64_t diffSum, int shift)
458
250k
{
459
250k
  return (( count*offset*offset-diffSum*offset*2 ) >> shift);
460
250k
}
461
462
463
inline int EncSampleAdaptiveOffset::estIterOffset(int typeIdx, double lambda, int offsetInput, int64_t count, int64_t diffSum, int shift, int bitIncrease, int64_t& bestDist, double& bestCost, int offsetTh )
464
1.41k
{
465
1.41k
  int iterOffset, tempOffset;
466
1.41k
  int64_t tempDist, tempRate;
467
1.41k
  double tempCost, tempMinCost;
468
1.41k
  int offsetOutput = 0;
469
1.41k
  iterOffset = offsetInput;
470
  // Assuming sending quantized value 0 results in zero offset and sending the value zero needs 1 bit. entropy coder can be used to measure the exact rate here.
471
1.41k
  tempMinCost = lambda;
472
2.83k
  while (iterOffset != 0)
473
1.42k
  {
474
    // Calculate the bits required for signaling the offset
475
1.42k
    tempRate = (typeIdx == SAO_TYPE_BO) ? (abs((int)iterOffset)+2) : (abs((int)iterOffset)+1);
476
1.42k
    if (abs((int)iterOffset)==offsetTh) //inclusive
477
0
    {
478
0
      tempRate --;
479
0
    }
480
    // Do the dequantization before distortion calculation
481
1.42k
    tempOffset  = iterOffset * (1<< bitIncrease);
482
1.42k
    tempDist    = estSaoDist( count, tempOffset, diffSum, shift);
483
1.42k
    tempCost    = ((double)tempDist + lambda * (double) tempRate);
484
1.42k
    if(tempCost < tempMinCost)
485
298
    {
486
298
      tempMinCost = tempCost;
487
298
      offsetOutput = iterOffset;
488
298
      bestDist = tempDist;
489
298
      bestCost = tempCost;
490
298
    }
491
1.42k
    iterOffset = (iterOffset > 0) ? (iterOffset-1):(iterOffset+1);
492
1.42k
  }
493
1.41k
  return offsetOutput;
494
1.41k
}
495
496
void EncSampleAdaptiveOffset::deriveOffsets(ComponentID compIdx, const int channelBitDepth, int typeIdc, SAOStatData& statData, int* quantOffsets, int& typeAuxInfo)
497
51.9k
{
498
51.9k
  int bitDepth = channelBitDepth;
499
51.9k
  int shift = 2 * DISTORTION_PRECISION_ADJUSTMENT(bitDepth);
500
51.9k
  int offsetTh = SampleAdaptiveOffset::getMaxOffsetQVal(channelBitDepth);  //inclusive
501
502
51.9k
  ::memset(quantOffsets, 0, sizeof(int)*MAX_NUM_SAO_CLASSES);
503
504
  //derive initial offsets
505
51.9k
  int numClasses = (typeIdc == SAO_TYPE_BO)?((int)NUM_SAO_BO_CLASSES):((int)NUM_SAO_EO_CLASSES);
506
592k
  for(int classIdx=0; classIdx< numClasses; classIdx++)
507
540k
  {
508
540k
    if( (typeIdc != SAO_TYPE_BO) && (classIdx==SAO_CLASS_EO_PLAIN)  )
509
41.5k
    {
510
41.5k
      continue; //offset will be zero
511
41.5k
    }
512
513
498k
    if(statData.count[classIdx] == 0)
514
485k
    {
515
485k
      continue; //offset will be zero
516
485k
    }
517
12.7k
#if (  DISTORTION_PRECISION_ADJUSTMENT(x)  == 0 )
518
12.7k
    quantOffsets[classIdx] =
519
12.7k
       (int) xRoundIbdi(bitDepth, (double)(statData.diff[classIdx] ) / (double)(statData.count[classIdx] << m_offsetStepLog2[compIdx]));
520
12.7k
     quantOffsets[classIdx] = Clip3(-offsetTh, offsetTh, quantOffsets[classIdx]);
521
#else
522
      quantOffsets[classIdx] =
523
        (int) xRoundIbdi(bitDepth, (double)(statData.diff[classIdx] << DISTORTION_PRECISION_ADJUSTMENT(bitDepth))
524
                                     / (double)(statData.count[classIdx] << m_offsetStepLog2[compIdx]));
525
      quantOffsets[classIdx] = Clip3(-offsetTh, offsetTh, quantOffsets[classIdx]);
526
#endif
527
12.7k
  }
528
529
  // adjust offsets
530
51.9k
  switch(typeIdc)
531
51.9k
  {
532
10.3k
  case SAO_TYPE_EO_0:
533
20.7k
  case SAO_TYPE_EO_90:
534
31.1k
  case SAO_TYPE_EO_135:
535
41.5k
  case SAO_TYPE_EO_45:
536
41.5k
    {
537
41.5k
      int64_t classDist;
538
41.5k
      double classCost;
539
249k
      for(int classIdx=0; classIdx<NUM_SAO_EO_CLASSES; classIdx++)
540
207k
      {
541
207k
        if(classIdx==SAO_CLASS_EO_FULL_VALLEY && quantOffsets[classIdx] < 0)
542
0
        {
543
0
          quantOffsets[classIdx] =0;
544
0
        }
545
207k
        if(classIdx==SAO_CLASS_EO_HALF_VALLEY && quantOffsets[classIdx] < 0)
546
0
        {
547
0
          quantOffsets[classIdx] =0;
548
0
        }
549
207k
        if(classIdx==SAO_CLASS_EO_HALF_PEAK   && quantOffsets[classIdx] > 0)
550
0
        {
551
0
          quantOffsets[classIdx] =0;
552
0
        }
553
207k
        if(classIdx==SAO_CLASS_EO_FULL_PEAK   && quantOffsets[classIdx] > 0)
554
0
        {
555
0
          quantOffsets[classIdx] =0;
556
0
        }
557
558
207k
        if( quantOffsets[classIdx] != 0 ) //iterative adjustment only when derived offset is not zero
559
1.07k
        {
560
1.07k
          quantOffsets[classIdx] = estIterOffset( typeIdc, m_lambda[compIdx], quantOffsets[classIdx], statData.count[classIdx], statData.diff[classIdx], shift, m_offsetStepLog2[compIdx], classDist , classCost , offsetTh );
561
1.07k
        }
562
207k
      }
563
564
41.5k
      typeAuxInfo =0;
565
41.5k
    }
566
41.5k
    break;
567
10.3k
  case SAO_TYPE_BO:
568
10.3k
    {
569
10.3k
      int64_t  distBOClasses[NUM_SAO_BO_CLASSES];
570
10.3k
      double costBOClasses[NUM_SAO_BO_CLASSES];
571
10.3k
      ::memset(distBOClasses, 0, sizeof(int64_t)*NUM_SAO_BO_CLASSES);
572
343k
      for(int classIdx=0; classIdx< NUM_SAO_BO_CLASSES; classIdx++)
573
332k
      {
574
332k
        costBOClasses[classIdx]= m_lambda[compIdx];
575
332k
        if( quantOffsets[classIdx] != 0 ) //iterative adjustment only when derived offset is not zero
576
338
        {
577
338
          quantOffsets[classIdx] = estIterOffset( typeIdc, m_lambda[compIdx], quantOffsets[classIdx], statData.count[classIdx], statData.diff[classIdx], shift, m_offsetStepLog2[compIdx], distBOClasses[classIdx], costBOClasses[classIdx], offsetTh );
578
338
        }
579
332k
      }
580
581
      //decide the starting band index
582
10.3k
      double minCost = MAX_DOUBLE, cost;
583
343k
      for(int band=0; band< NUM_SAO_BO_CLASSES; band++)
584
332k
      {
585
332k
        cost  = costBOClasses[(band  )%NUM_SAO_BO_CLASSES];
586
332k
        cost += costBOClasses[(band+1)%NUM_SAO_BO_CLASSES];
587
332k
        cost += costBOClasses[(band+2)%NUM_SAO_BO_CLASSES];
588
332k
        cost += costBOClasses[(band+3)%NUM_SAO_BO_CLASSES];
589
590
332k
        if(cost < minCost)
591
10.4k
        {
592
10.4k
          minCost = cost;
593
10.4k
          typeAuxInfo = band;
594
10.4k
        }
595
332k
      }
596
      //clear those unused classes
597
10.3k
      int clearQuantOffset[NUM_SAO_BO_CLASSES];
598
10.3k
      ::memset(clearQuantOffset, 0, sizeof(int)*NUM_SAO_BO_CLASSES);
599
51.9k
      for(int i=0; i< 4; i++)
600
41.5k
      {
601
41.5k
        int band = (typeAuxInfo+i)%NUM_SAO_BO_CLASSES;
602
41.5k
        clearQuantOffset[band] = quantOffsets[band];
603
41.5k
      }
604
10.3k
      ::memcpy(quantOffsets, clearQuantOffset, sizeof(int)*NUM_SAO_BO_CLASSES);
605
10.3k
    }
606
10.3k
    break;
607
0
  default:
608
0
    {
609
0
      THROW("Not a supported type");
610
31.1k
    }
611
51.9k
  }
612
51.9k
}
613
614
void EncSampleAdaptiveOffset::deriveModeNewRDO(const BitDepths &bitDepths, int ctuRsAddr, SAOBlkParam* mergeList[NUM_SAO_MERGE_TYPES], const bool* sliceEnabled, const std::vector<SAOStatData**>& blkStats, SAOBlkParam& modeParam, double& modeNormCost )
615
3.46k
{
616
3.46k
  double minCost, cost;
617
3.46k
  uint64_t previousFracBits;
618
3.46k
  const int numberOfComponents = m_numberOfComponents;
619
620
3.46k
  int64_t dist[MAX_NUM_COMP], modeDist[MAX_NUM_COMP];
621
3.46k
  SAOOffset testOffset[MAX_NUM_COMP];
622
3.46k
  int invQuantOffset[MAX_NUM_SAO_CLASSES];
623
13.8k
  for(int comp=0; comp < MAX_NUM_COMP; comp++)
624
10.3k
  {
625
10.3k
    modeDist[comp] = 0;
626
10.3k
  }
627
628
  //pre-encode merge flags
629
3.46k
  modeParam[COMP_Y].modeIdc = SAO_MODE_OFF;
630
3.46k
  const TempCtx ctxStartBlk   ( m_CtxCache, SAOCtx( m_CABACEstimator->getCtx() ) );
631
3.46k
  m_CABACEstimator->sao_block_pars( modeParam, bitDepths, sliceEnabled, (mergeList[SAO_MERGE_LEFT]!= NULL), (mergeList[SAO_MERGE_ABOVE]!= NULL), true );
632
3.46k
  const TempCtx ctxStartLuma  ( m_CtxCache, SAOCtx( m_CABACEstimator->getCtx() ) );
633
3.46k
  TempCtx       ctxBestLuma   ( m_CtxCache );
634
635
    //------ luma --------//
636
3.46k
  {
637
3.46k
    const ComponentID compIdx = COMP_Y;
638
    //"off" case as initial cost
639
3.46k
    modeParam[compIdx].modeIdc = SAO_MODE_OFF;
640
3.46k
    m_CABACEstimator->resetBits();
641
3.46k
    m_CABACEstimator->sao_offset_pars( modeParam[compIdx], compIdx, sliceEnabled[compIdx], bitDepths[CH_L] );
642
3.46k
    modeDist[compIdx] = 0;
643
3.46k
    minCost           = m_lambda[compIdx] * (FRAC_BITS_SCALE * m_CABACEstimator->getEstFracBits());
644
3.46k
    ctxBestLuma = SAOCtx( m_CABACEstimator->getCtx() );
645
3.46k
    if(sliceEnabled[compIdx])
646
3.46k
    {
647
20.7k
      for(int typeIdc=0; typeIdc< NUM_SAO_NEW_TYPES; typeIdc++)
648
17.3k
      {
649
17.3k
        testOffset[compIdx].modeIdc = SAO_MODE_NEW;
650
17.3k
        testOffset[compIdx].typeIdc = typeIdc;
651
652
        //derive coded offset
653
17.3k
        deriveOffsets(compIdx, bitDepths[CH_L], typeIdc, blkStats[ctuRsAddr][compIdx][typeIdc], testOffset[compIdx].offset, testOffset[compIdx].typeAuxInfo);
654
655
        //inversed quantized offsets
656
17.3k
        invertQuantOffsets(compIdx, typeIdc, testOffset[compIdx].typeAuxInfo, invQuantOffset, testOffset[compIdx].offset);
657
658
        //get distortion
659
17.3k
        dist[compIdx] = getDistortion(bitDepths[CH_L], testOffset[compIdx].typeIdc, testOffset[compIdx].typeAuxInfo, invQuantOffset, blkStats[ctuRsAddr][compIdx][typeIdc]);
660
661
        //get rate
662
17.3k
        m_CABACEstimator->getCtx() = SAOCtx( ctxStartLuma );
663
17.3k
        m_CABACEstimator->resetBits();
664
17.3k
        m_CABACEstimator->sao_offset_pars( testOffset[compIdx], compIdx, sliceEnabled[compIdx], bitDepths[CH_L] );
665
17.3k
        double rate = FRAC_BITS_SCALE * m_CABACEstimator->getEstFracBits();
666
17.3k
        cost = (double)dist[compIdx] + m_lambda[compIdx]*rate;
667
17.3k
        if(cost < minCost)
668
22
        {
669
22
          minCost = cost;
670
22
          modeDist[compIdx] = dist[compIdx];
671
22
          modeParam[compIdx]= testOffset[compIdx];
672
22
          ctxBestLuma = SAOCtx( m_CABACEstimator->getCtx() );
673
22
        }
674
17.3k
      }
675
3.46k
    }
676
3.46k
    m_CABACEstimator->getCtx() = SAOCtx( ctxBestLuma );
677
3.46k
  }
678
679
  //------ chroma --------//
680
//"off" case as initial cost
681
3.46k
  cost = 0;
682
3.46k
  previousFracBits = 0;
683
3.46k
  m_CABACEstimator->resetBits();
684
10.3k
  for(uint32_t componentIndex = COMP_Cb; componentIndex < numberOfComponents; componentIndex++)
685
6.92k
  {
686
6.92k
    const ComponentID component = ComponentID(componentIndex);
687
688
6.92k
    modeParam[component].modeIdc = SAO_MODE_OFF;
689
6.92k
    modeDist [component]         = 0;
690
6.92k
    m_CABACEstimator->sao_offset_pars( modeParam[component], component, sliceEnabled[component], bitDepths[CH_C] );
691
6.92k
    const uint64_t currentFracBits = m_CABACEstimator->getEstFracBits();
692
6.92k
    cost += m_lambda[component] * FRAC_BITS_SCALE * (currentFracBits - previousFracBits);
693
6.92k
    previousFracBits = currentFracBits;
694
6.92k
  }
695
696
3.46k
  minCost = cost;
697
698
  //doesn't need to store cabac status here since the whole CTU parameters will be re-encoded at the end of this function
699
700
20.7k
  for(int typeIdc=0; typeIdc< NUM_SAO_NEW_TYPES; typeIdc++)
701
17.3k
  {
702
17.3k
    m_CABACEstimator->getCtx() = SAOCtx( ctxBestLuma );
703
17.3k
    m_CABACEstimator->resetBits();
704
17.3k
    previousFracBits = 0;
705
17.3k
    cost = 0;
706
707
51.9k
    for(uint32_t componentIndex = COMP_Cb; componentIndex < numberOfComponents; componentIndex++)
708
34.6k
    {
709
34.6k
      const ComponentID component = ComponentID(componentIndex);
710
34.6k
      if(!sliceEnabled[component])
711
0
      {
712
0
        testOffset[component].modeIdc = SAO_MODE_OFF;
713
0
        dist[component]= 0;
714
0
        continue;
715
0
      }
716
34.6k
      testOffset[component].modeIdc = SAO_MODE_NEW;
717
34.6k
      testOffset[component].typeIdc = typeIdc;
718
719
      //derive offset & get distortion
720
34.6k
      deriveOffsets(component, bitDepths[CH_C], typeIdc, blkStats[ctuRsAddr][component][typeIdc], testOffset[component].offset, testOffset[component].typeAuxInfo);
721
34.6k
      invertQuantOffsets(component, typeIdc, testOffset[component].typeAuxInfo, invQuantOffset, testOffset[component].offset);
722
34.6k
      dist[component] = getDistortion(bitDepths[CH_C], typeIdc, testOffset[component].typeAuxInfo, invQuantOffset, blkStats[ctuRsAddr][component][typeIdc]);
723
34.6k
      m_CABACEstimator->sao_offset_pars( testOffset[component], component, sliceEnabled[component], bitDepths[CH_C] );
724
34.6k
      const uint64_t currentFracBits = m_CABACEstimator->getEstFracBits();
725
34.6k
      cost += dist[component] + (m_lambda[component] * FRAC_BITS_SCALE * (currentFracBits - previousFracBits));
726
34.6k
      previousFracBits = currentFracBits;
727
34.6k
    }
728
729
17.3k
    if(cost < minCost)
730
1
    {
731
1
      minCost = cost;
732
3
      for(uint32_t componentIndex = COMP_Cb; componentIndex < numberOfComponents; componentIndex++)
733
2
      {
734
2
        modeDist[componentIndex]  = dist[componentIndex];
735
2
        modeParam[componentIndex] = testOffset[componentIndex];
736
2
      }
737
1
    }
738
739
17.3k
  } // SAO_TYPE loop
740
741
  //----- re-gen rate & normalized cost----//
742
3.46k
  modeNormCost = 0;
743
13.8k
  for(uint32_t componentIndex = COMP_Y; componentIndex < numberOfComponents; componentIndex++)
744
10.3k
  {
745
10.3k
    modeNormCost += (double)modeDist[componentIndex] / m_lambda[componentIndex];
746
10.3k
  }
747
748
3.46k
  m_CABACEstimator->getCtx() = SAOCtx( ctxStartBlk );
749
3.46k
  m_CABACEstimator->resetBits();
750
3.46k
  m_CABACEstimator->sao_block_pars( modeParam, bitDepths, sliceEnabled, (mergeList[SAO_MERGE_LEFT]!= NULL), (mergeList[SAO_MERGE_ABOVE]!= NULL), false );
751
3.46k
  modeNormCost += FRAC_BITS_SCALE * m_CABACEstimator->getEstFracBits();
752
3.46k
}
753
754
void EncSampleAdaptiveOffset::deriveModeMergeRDO(const BitDepths &bitDepths, int ctuRsAddr, SAOBlkParam* mergeList[NUM_SAO_MERGE_TYPES], const bool* sliceEnabled, const std::vector<SAOStatData**>& blkStats, SAOBlkParam& modeParam, double& modeNormCost )
755
3.46k
{
756
3.46k
  modeNormCost = MAX_DOUBLE;
757
758
3.46k
  double cost;
759
3.46k
  SAOBlkParam testBlkParam;
760
3.46k
  const int numberOfComponents = m_numberOfComponents;
761
762
3.46k
  const TempCtx ctxStart  ( m_CtxCache, SAOCtx( m_CABACEstimator->getCtx() ) );
763
3.46k
  TempCtx       ctxBest   ( m_CtxCache );
764
765
10.3k
  for(int mergeType=0; mergeType< NUM_SAO_MERGE_TYPES; mergeType++)
766
6.92k
  {
767
6.92k
    if(mergeList[mergeType] == NULL)
768
3.84k
    {
769
3.84k
      continue;
770
3.84k
    }
771
772
3.08k
    testBlkParam = *(mergeList[mergeType]);
773
    //normalized distortion
774
3.08k
    double normDist=0;
775
12.3k
    for(int compIdx = 0; compIdx < numberOfComponents; compIdx++)
776
9.24k
    {
777
9.24k
      testBlkParam[compIdx].modeIdc = SAO_MODE_MERGE;
778
9.24k
      testBlkParam[compIdx].typeIdc = mergeType;
779
780
9.24k
      SAOOffset& mergedOffsetParam = (*(mergeList[mergeType]))[compIdx];
781
782
9.24k
      if( mergedOffsetParam.modeIdc != SAO_MODE_OFF)
783
9
      {
784
        //offsets have been reconstructed. Don't call inversed quantization function.
785
9
        normDist += (((double)getDistortion(bitDepths[toChannelType(ComponentID(compIdx))], mergedOffsetParam.typeIdc, mergedOffsetParam.typeAuxInfo, mergedOffsetParam.offset, blkStats[ctuRsAddr][compIdx][mergedOffsetParam.typeIdc]))
786
9
                       /m_lambda[compIdx] );
787
9
      }
788
9.24k
    }
789
790
    //rate
791
3.08k
    m_CABACEstimator->getCtx() = SAOCtx( ctxStart );
792
3.08k
    m_CABACEstimator->resetBits();
793
3.08k
    m_CABACEstimator->sao_block_pars( testBlkParam, bitDepths, sliceEnabled, (mergeList[SAO_MERGE_LEFT]!= NULL), (mergeList[SAO_MERGE_ABOVE]!= NULL), false );
794
3.08k
    double rate = FRAC_BITS_SCALE * m_CABACEstimator->getEstFracBits();
795
3.08k
    cost = normDist+rate;
796
797
3.08k
    if(cost < modeNormCost)
798
2.35k
    {
799
2.35k
      modeNormCost = cost;
800
2.35k
      modeParam    = testBlkParam;
801
2.35k
      ctxBest      = SAOCtx( m_CABACEstimator->getCtx() );
802
2.35k
    }
803
3.08k
  }
804
3.46k
  if( modeNormCost < MAX_DOUBLE )
805
2.35k
  {
806
2.35k
    m_CABACEstimator->getCtx() = SAOCtx( ctxBest );
807
2.35k
  }
808
3.46k
}
809
810
void EncSampleAdaptiveOffset::getBlkStats(const ComponentID compIdx, const int channelBitDepth, SAOStatData* statsDataTypes
811
                        , Pel* srcBlk, Pel* orgBlk, int srcStride, int orgStride, int width, int height
812
                        , bool isLeftAvail,  bool isRightAvail, bool isAboveAvail, bool isBelowAvail, bool isAboveLeftAvail, bool isAboveRightAvail )
813
10.3k
{
814
10.3k
  int x, startX, startY, endX, endY, edgeType, firstLineStartX, firstLineEndX;
815
10.3k
  int64_t *diff, *count;
816
10.3k
  Pel* srcLine, *orgLine;
817
10.3k
  const int skipLinesR = compIdx == COMP_Y ? 5 : 3;
818
10.3k
  const int skipLinesB = compIdx == COMP_Y ? 4 : 2;
819
820
62.3k
  for(int typeIdx=0; typeIdx< NUM_SAO_NEW_TYPES; typeIdx++)
821
51.9k
  {
822
51.9k
    SAOStatData& statsData= statsDataTypes[typeIdx];
823
51.9k
    statsData.reset();
824
51.9k
    srcLine = srcBlk;
825
51.9k
    orgLine = orgBlk;
826
51.9k
    diff    = statsData.diff;
827
51.9k
    count   = statsData.count;
828
51.9k
    switch(typeIdx)
829
51.9k
    {
830
10.3k
    case SAO_TYPE_EO_0:
831
10.3k
      {
832
10.3k
        endY   =  isBelowAvail ? (height - skipLinesB) : height;
833
10.3k
        startX = (isLeftAvail  ? 0 : 1);
834
10.3k
        endX   = (isRightAvail ? (width - skipLinesR) : (width - 1));
835
10.3k
        calcSaoStatisticsEo0(width,startX,endX,endY,srcLine,orgLine,srcStride,orgStride,count,diff);
836
10.3k
      }
837
10.3k
      break;
838
10.3k
    case SAO_TYPE_EO_90:
839
10.3k
      {
840
10.3k
        int8_t *signUpLine = &m_signLineBuf1[0];
841
10.3k
        startX = 0;
842
10.3k
        startY = isAboveAvail ? 0 : 1;
843
10.3k
        endX   = (isRightAvail ? (width - skipLinesR) : width);
844
10.3k
        endY   = isBelowAvail ? (height - skipLinesB) : (height - 1);
845
10.3k
        if (!isAboveAvail)
846
5.67k
        {
847
5.67k
          srcLine += srcStride;
848
5.67k
          orgLine += orgStride;
849
5.67k
        }
850
10.3k
        calcSaoStatisticsEo90(width,endX,startY,endY,srcLine,orgLine,srcStride,orgStride,count,diff,signUpLine);
851
10.3k
      }
852
10.3k
      break;
853
10.3k
    case SAO_TYPE_EO_135:
854
10.3k
      {
855
10.3k
        diff +=2;
856
10.3k
        count+=2;
857
10.3k
        int8_t *signUpLine, *signDownLine;
858
10.3k
        signUpLine  = &m_signLineBuf1[0];
859
10.3k
        signDownLine= &m_signLineBuf2[0];
860
10.3k
        startX = isLeftAvail  ? 0 : 1;
861
10.3k
        endX   = isRightAvail ? (width - skipLinesR): (width - 1);
862
10.3k
        endY   = isBelowAvail ? (height - skipLinesB) : (height - 1);
863
        //prepare 2nd line's upper sign
864
10.3k
        Pel* srcLineBelow = srcLine + srcStride;
865
593k
        for (x=startX; x<endX+1; x++)
866
583k
        {
867
583k
          signUpLine[x] = (int8_t)sgn(srcLineBelow[x] - srcLine[x-1]);
868
583k
        }
869
        //1st line
870
10.3k
        Pel* srcLineAbove = srcLine - srcStride;
871
10.3k
        firstLineStartX = isAboveLeftAvail ? 0    : 1;
872
10.3k
        firstLineEndX   = isAboveAvail     ? endX : 1;
873
273k
        for(x=firstLineStartX; x<firstLineEndX; x++)
874
263k
        {
875
263k
          edgeType = sgn(srcLine[x] - srcLineAbove[x-1]) - signUpLine[x+1];
876
263k
          diff [edgeType] += (orgLine[x] - srcLine[x]);
877
263k
          count[edgeType] ++;
878
263k
        }
879
10.3k
        srcLine  += srcStride;
880
10.3k
        orgLine  += orgStride;
881
10.3k
        calcSaoStatisticsEo135(width,startX,endX,endY,srcLine,orgLine,srcStride,orgStride,count,diff,signUpLine,signDownLine);
882
10.3k
      }
883
10.3k
      break;
884
10.3k
    case SAO_TYPE_EO_45:
885
10.3k
      {
886
10.3k
        diff +=2;
887
10.3k
        count+=2;
888
10.3k
        int8_t *signUpLine = &m_signLineBuf1[1];
889
890
10.3k
        startX = isLeftAvail  ? 0 : 1;
891
10.3k
        endX   = isRightAvail ? (width - skipLinesR) : (width - 1);
892
10.3k
        endY   = isBelowAvail ? (height - skipLinesB) : (height - 1);
893
894
        //prepare 2nd line upper sign
895
10.3k
        Pel* srcLineBelow = srcLine + srcStride;
896
593k
        for (x=startX-1; x<endX; x++)
897
583k
        {
898
583k
          signUpLine[x] = (int8_t)sgn(srcLineBelow[x] - srcLine[x+1]);
899
583k
        }
900
        //first line
901
10.3k
        Pel* srcLineAbove = srcLine - srcStride;
902
10.3k
        firstLineStartX = isAboveAvail ? startX : endX;
903
10.3k
        firstLineEndX   = (!isRightAvail && isAboveRightAvail) ? width : endX;
904
273k
        for(x=firstLineStartX; x<firstLineEndX; x++)
905
263k
        {
906
263k
          edgeType = sgn(srcLine[x] - srcLineAbove[x+1]) - signUpLine[x-1];
907
263k
          diff [edgeType] += (orgLine[x] - srcLine[x]);
908
263k
          count[edgeType] ++;
909
263k
        }
910
10.3k
        srcLine += srcStride;
911
10.3k
        orgLine += orgStride;
912
10.3k
        calcSaoStatisticsEo45(width,startX,endX,endY,srcLine,orgLine,srcStride,orgStride,count,diff,signUpLine);
913
10.3k
      }
914
10.3k
      break;
915
10.3k
    case SAO_TYPE_BO:
916
10.3k
      {
917
10.3k
        startX = 0;
918
10.3k
        endX   = isRightAvail ? (width - skipLinesR) : width;
919
10.3k
        endY   = isBelowAvail ? (height- skipLinesB) : height;
920
10.3k
        calcSaoStatisticsBo(width,endX,endY,srcLine,orgLine,srcStride,orgStride,channelBitDepth,count,diff);
921
10.3k
      }
922
10.3k
      break;
923
0
    default:
924
0
      {
925
0
        THROW("Not a supported SAO type");
926
0
      }
927
51.9k
    }
928
51.9k
  }
929
10.3k
}
930
931
void EncSampleAdaptiveOffset::deriveLoopFilterBoundaryAvailibility(CodingStructure& cs, const Position& pos, bool& isLeftAvail, bool& isAboveAvail, bool& isAboveLeftAvail) const
932
3.46k
{
933
3.46k
  const bool isLoopFiltAcrossSlicePPS = cs.pps->loopFilterAcrossSlicesEnabled;
934
3.46k
  const bool isLoopFiltAcrossTilePPS = cs.pps->loopFilterAcrossTilesEnabled;
935
936
3.46k
  const int width = cs.pcv->maxCUSize;
937
3.46k
  const int height = cs.pcv->maxCUSize;
938
3.46k
  const CodingUnit* cuCurr = cs.getCU(pos, CH_L, TREE_D);
939
3.46k
  const int ctuX = pos.x >> cs.pcv->maxCUSizeLog2;
940
3.46k
  const int ctuY = pos.y >> cs.pcv->maxCUSizeLog2;
941
3.46k
  const PPS* pps = cs.slice->pps;
942
3.46k
  const CodingUnit* cuLeft      = ctuX > 0 &&             pps->canFilterCtuBdry( ctuX, ctuY, -1, 0 ) ? cs.getCU(pos.offset(-width, 0), CH_L, TREE_D): nullptr;
943
3.46k
  const CodingUnit* cuAbove     = ctuY > 0 &&             pps->canFilterCtuBdry( ctuX, ctuY, 0, -1 ) ? cs.getCU(pos.offset(0, -height), CH_L, TREE_D): nullptr;
944
3.46k
  const CodingUnit* cuAboveLeft = ctuY > 0 && ctuX > 0 && pps->canFilterCtuBdry( ctuX, ctuY, -1,-1 ) ? cs.getCU(pos.offset(-width, -height), CH_L, TREE_D): nullptr;
945
946
3.46k
  if (!isLoopFiltAcrossSlicePPS)
947
0
  {
948
0
    isLeftAvail      = (cuLeft == NULL)      ? false : CU::isSameSlice(*cuCurr, *cuLeft);
949
0
    isAboveAvail     = (cuAbove == NULL)     ? false : CU::isSameSlice(*cuCurr, *cuAbove);
950
0
    isAboveLeftAvail = (cuAboveLeft == NULL) ? false : CU::isSameSlice(*cuCurr, *cuAboveLeft);
951
0
  }
952
3.46k
  else
953
3.46k
  {
954
3.46k
    isLeftAvail      = (cuLeft != NULL);
955
3.46k
    isAboveAvail     = (cuAbove != NULL);
956
3.46k
    isAboveLeftAvail = (cuAboveLeft != NULL);
957
3.46k
  }
958
959
3.46k
  if (!isLoopFiltAcrossTilePPS)
960
0
  {
961
0
    isLeftAvail      = (!isLeftAvail)      ? false : CU::isSameTile(*cuCurr, *cuLeft);
962
0
    isAboveAvail     = (!isAboveAvail)     ? false : CU::isSameTile(*cuCurr, *cuAbove);
963
0
    isAboveLeftAvail = (!isAboveLeftAvail) ? false : CU::isSameTile(*cuCurr, *cuAboveLeft);
964
0
  }
965
966
967
3.46k
  SubPic curSubPic = cs.pps->getSubPicFromCU(*cuCurr);
968
3.46k
  if (!curSubPic.loopFilterAcrossSubPicEnabled )
969
0
  {
970
0
    isLeftAvail      = (!isLeftAvail)      ? false : CU::isSameSubPic(*cuCurr, *cuLeft);
971
0
    isAboveAvail     = (!isAboveAvail)     ? false : CU::isSameSubPic(*cuCurr, *cuAbove);
972
0
    isAboveLeftAvail = (!isAboveLeftAvail) ? false : CU::isSameSubPic(*cuCurr, *cuAboveLeft);
973
0
  }
974
975
3.46k
}
976
977
} // namespace vvenc
978
979
//! \}
980