Coverage Report

Created: 2026-06-15 06:25

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/work/vvenc/source/Lib/EncoderLib/EncSampleAdaptiveOffset.cpp
Line
Count
Source
1
/* -----------------------------------------------------------------------------
2
The copyright in this software is being made available under the Clear BSD
3
License, included below. No patent rights, trademark rights and/or 
4
other Intellectual Property Rights other than the copyrights concerning 
5
the Software are granted under this license.
6
7
The Clear BSD License
8
9
Copyright (c) 2019-2026, Fraunhofer-Gesellschaft zur Förderung der angewandten Forschung e.V. & The VVenC Authors.
10
All rights reserved.
11
12
Redistribution and use in source and binary forms, with or without modification,
13
are permitted (subject to the limitations in the disclaimer below) provided that
14
the following conditions are met:
15
16
     * Redistributions of source code must retain the above copyright notice,
17
     this list of conditions and the following disclaimer.
18
19
     * Redistributions in binary form must reproduce the above copyright
20
     notice, this list of conditions and the following disclaimer in the
21
     documentation and/or other materials provided with the distribution.
22
23
     * Neither the name of the copyright holder nor the names of its
24
     contributors may be used to endorse or promote products derived from this
25
     software without specific prior written permission.
26
27
NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE GRANTED BY
28
THIS LICENSE. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
29
CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
30
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
31
PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
32
CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
33
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
34
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
35
BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
36
IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
37
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
38
POSSIBILITY OF SUCH DAMAGE.
39
40
41
------------------------------------------------------------------------------------------- */
42
43
44
/**
45
 \file     EncSampleAdaptiveOffset.cpp
46
 \brief       estimation part of sample adaptive offset class
47
 */
48
49
#include "EncSampleAdaptiveOffset.h"
50
#include "CommonLib/UnitTools.h"
51
#include "CommonLib/dtrace_codingstruct.h"
52
#include "CommonLib/dtrace_buffer.h"
53
#include "CommonLib/CodingStructure.h"
54
#include <string.h>
55
#include <stdlib.h>
56
#include <math.h>
57
#include "vvenc/vvencCfg.h"
58
59
//! \ingroup EncoderLib
60
//! \{
61
62
namespace vvenc {
63
64
65
76.3k
#define SAOCtx(c) SubCtx( Ctx::Sao, c )
66
67
68
//! rounding with IBDI
69
inline double xRoundIbdi2(int bitDepth, double x)
70
0
{
71
0
  return ((x) >= 0 ? ((int)((x) + 0.5)) : ((int)((x) -0.5)));
72
0
}
73
74
inline double xRoundIbdi(int bitDepth, double x)
75
12.2k
{
76
12.2k
  return (bitDepth > 8 ? xRoundIbdi2(bitDepth, (x)) : ((x)>=0 ? ((int)((x)+0.5)) : ((int)((x)-0.5)))) ;
77
12.2k
}
78
79
80
EncSampleAdaptiveOffset::EncSampleAdaptiveOffset()
81
7.60k
  : m_CABACEstimator( nullptr )
82
7.60k
  , m_CtxCache      ( nullptr )
83
7.60k
{
84
7.60k
}
85
86
EncSampleAdaptiveOffset::~EncSampleAdaptiveOffset()
87
7.60k
{
88
7.60k
}
89
90
void EncSampleAdaptiveOffset::init( const VVEncCfg& encCfg )
91
7.60k
{
92
7.60k
  m_EncCfg = &encCfg;
93
94
7.60k
  if ( encCfg.m_bUseSAO )
95
7.60k
  {
96
7.60k
    SampleAdaptiveOffset::init( encCfg.m_internChromaFormat, encCfg.m_CTUSize, encCfg.m_CTUSize, encCfg.m_log2SaoOffsetScale[CH_L], encCfg.m_log2SaoOffsetScale[CH_C] );
97
7.60k
  }
98
7.60k
}
99
100
void EncSampleAdaptiveOffset::initSlice( const Slice* slice )
101
1.90k
{
102
1.90k
  memcpy( m_lambda, slice->getLambdas(), sizeof( m_lambda ) );
103
1.90k
}
104
105
void EncSampleAdaptiveOffset::setCtuEncRsrc( CABACWriter* cabacEstimator, CtxCache* ctxCache )
106
3.33k
{
107
3.33k
  m_CABACEstimator = cabacEstimator;
108
3.33k
  m_CtxCache       = ctxCache;
109
3.33k
}
110
111
void EncSampleAdaptiveOffset::disabledRate( CodingStructure& cs, double saoDisabledRate[ MAX_NUM_COMP ][ VVENC_MAX_TLAYER ], SAOBlkParam* reconParams, const double saoEncodingRate, const double saoEncodingRateChroma, const ChromaFormat& chromaFormat )
112
0
{
113
0
  if ( saoEncodingRate > 0.0 )
114
0
  {
115
0
    const PreCalcValues& pcv     = *cs.pcv;
116
0
    const int numberOfComponents = getNumberValidComponents( chromaFormat );
117
0
    const int picTempLayer       = cs.slice->TLayer;
118
0
    int numCtusForSAOOff[MAX_NUM_COMP];
119
120
0
    for (int compIdx = 0; compIdx < numberOfComponents; compIdx++)
121
0
    {
122
0
      numCtusForSAOOff[compIdx] = 0;
123
0
      for( int ctuRsAddr=0; ctuRsAddr< pcv.sizeInCtus; ctuRsAddr++)
124
0
      {
125
0
        if( reconParams[ctuRsAddr][compIdx].modeIdc == SAO_MODE_OFF)
126
0
        {
127
0
          numCtusForSAOOff[compIdx]++;
128
0
        }
129
0
      }
130
0
    }
131
0
    if (saoEncodingRateChroma > 0.0)
132
0
    {
133
0
      for (int compIdx = 0; compIdx < numberOfComponents; compIdx++)
134
0
      {
135
0
        saoDisabledRate[compIdx][picTempLayer] = (double)numCtusForSAOOff[compIdx]/(double)pcv.sizeInCtus;
136
0
      }
137
0
    }
138
0
    else if (picTempLayer == 0)
139
0
    {
140
0
      saoDisabledRate[COMP_Y][0] = (double)(numCtusForSAOOff[COMP_Y]+numCtusForSAOOff[COMP_Cb]+numCtusForSAOOff[COMP_Cr])/(double)(pcv.sizeInCtus *3);
141
0
    }
142
0
  }
143
0
}
144
145
void EncSampleAdaptiveOffset::decidePicParams( const CodingStructure& cs, double saoDisabledRate[ MAX_NUM_COMP ][ VVENC_MAX_TLAYER ], bool saoEnabled[ MAX_NUM_COMP ], const double saoEncodingRate, const double saoEncodingRateChroma, const ChromaFormat& chromaFormat )
146
1.08k
{
147
1.08k
  const Slice& slice           = *cs.slice;
148
1.08k
  const int numberOfComponents = getNumberValidComponents( chromaFormat );
149
150
  // reset
151
1.08k
  if( slice.pendingRasInit )
152
0
  {
153
0
    for( int compIdx = 0; compIdx < MAX_NUM_COMP; compIdx++ )
154
0
    {
155
0
      for( int tempLayer = 1; tempLayer < VVENC_MAX_TLAYER; tempLayer++ )
156
0
      {
157
0
        saoDisabledRate[ compIdx ][ tempLayer ] = 0.0;
158
0
      }
159
0
    }
160
0
  }
161
162
4.34k
  for( int compIdx = 0; compIdx < MAX_NUM_COMP; compIdx++ )
163
3.25k
  {
164
3.25k
    saoEnabled[ compIdx ] = false;
165
3.25k
  }
166
167
1.08k
  const int picTempLayer = slice.TLayer;
168
4.34k
  for( int compIdx = 0; compIdx < numberOfComponents; compIdx++ )
169
3.25k
  {
170
    // enable per default
171
3.25k
    saoEnabled[ compIdx ] = true;
172
173
3.25k
    if( saoEncodingRate > 0.0 )
174
0
    {
175
0
      if( saoEncodingRateChroma > 0.0 )
176
0
      {
177
        // decide slice-level on/off based on previous results
178
0
        if( ( picTempLayer > 0 )
179
0
          && ( saoDisabledRate[ compIdx ][ picTempLayer - 1 ] > ( ( compIdx == COMP_Y ) ? saoEncodingRate : saoEncodingRateChroma ) ) )
180
0
        {
181
0
          saoEnabled[ compIdx ] = false;
182
0
        }
183
0
      }
184
0
      else
185
0
      {
186
        // decide slice-level on/off based on previous results
187
0
        if( ( picTempLayer > 0 )
188
0
          && ( saoDisabledRate[ COMP_Y ][ 0 ] > saoEncodingRate ) )
189
0
        {
190
0
          saoEnabled[ compIdx ] = false;
191
0
        }
192
0
      }
193
0
    }
194
3.25k
  }
195
1.08k
}
196
197
void EncSampleAdaptiveOffset::storeCtuReco( CodingStructure& cs, const UnitArea& ctuArea, const int ctuX, const int ctuY )
198
3.33k
{
199
3.33k
  const int STORE_CTU_INCREASE = 8;
200
3.33k
  Position lPos( ctuArea.lx() + STORE_CTU_INCREASE, ctuArea.ly() + STORE_CTU_INCREASE );
201
3.33k
  Size    lSize( ctuArea.lwidth(), ctuArea.lheight() );
202
203
3.33k
  const bool tileBdryClip = cs.pps->getNumTiles() > 1 && !cs.pps->loopFilterAcrossTilesEnabled;
204
3.33k
  int startX = 0;
205
3.33k
  int startY = 0;
206
3.33k
  if( tileBdryClip )  
207
0
  {
208
0
    startX = cs.pps->tileColBd[cs.pps->ctuToTileCol[ctuX]] << cs.pcv->maxCUSizeLog2;
209
0
    startY = cs.pps->tileRowBd[cs.pps->ctuToTileRow[ctuY]] << cs.pcv->maxCUSizeLog2;
210
0
  }
211
212
3.33k
  if ( ctuArea.lx() == startX )
213
1.90k
  {
214
1.90k
    lPos.x       = ctuArea.lx();
215
1.90k
    lSize.width += STORE_CTU_INCREASE;
216
1.90k
  }
217
3.33k
  if ( ctuArea.ly() == startY )
218
1.82k
  {
219
1.82k
    lPos.y        = ctuArea.ly();
220
1.82k
    lSize.height += STORE_CTU_INCREASE;
221
1.82k
  }
222
223
3.33k
  int clipX = cs.pcv->lumaWidth  - lPos.x;
224
3.33k
  int clipY = cs.pcv->lumaHeight - lPos.y;
225
3.33k
  if( tileBdryClip )  
226
0
  {
227
0
    clipX  = cs.pps->tileColBdRgt[cs.pps->ctuToTileCol[ctuX]] - lPos.x;
228
0
    clipY  = cs.pps->tileRowBdBot[cs.pps->ctuToTileRow[ctuY]] - lPos.y;
229
0
  }
230
3.33k
  lSize.clipSize( clipX, clipY );
231
232
3.33k
  const UnitArea relocArea( ctuArea.chromaFormat, Area( lPos, lSize ) );
233
3.33k
  Picture& pic       = *cs.picture;
234
3.33k
  PelUnitBuf recoYuv = pic.getRecoBuf().subBuf( relocArea );
235
3.33k
  PelUnitBuf tempYuv = pic.getSaoBuf().subBuf( relocArea );
236
3.33k
  tempYuv.copyFrom( recoYuv );
237
3.33k
}
238
239
void EncSampleAdaptiveOffset::getCtuStatistics( CodingStructure& cs, std::vector<SAOStatData**>& saoStatistics, const UnitArea& ctuArea, const int ctuRsAddr )
240
3.33k
{
241
3.33k
  const PreCalcValues& pcv     = *cs.pcv;
242
3.33k
  const int numberOfComponents = getNumberValidComponents( pcv.chrFormat );
243
3.33k
  bool isLeftAvail             = false;
244
3.33k
  bool isRightAvail            = false;
245
3.33k
  bool isAboveAvail            = false;
246
3.33k
  bool isBelowAvail            = false;
247
3.33k
  bool isAboveLeftAvail        = false;
248
3.33k
  bool isAboveRightAvail       = false;
249
250
3.33k
  deriveLoopFilterBoundaryAvailibility( cs, ctuArea.Y(), isLeftAvail, isAboveAvail, isAboveLeftAvail );
251
252
  // NOTE: The number of skipped lines during gathering CTU statistics depends on the slice boundary availabilities.
253
  // For simplicity, here only picture boundaries are considered.
254
255
3.33k
  isRightAvail      = ( ctuArea.Y().x + pcv.maxCUSize < pcv.lumaWidth  );
256
3.33k
  isBelowAvail      = ( ctuArea.Y().y + pcv.maxCUSize < pcv.lumaHeight );
257
3.33k
  isAboveRightAvail = ( ( ctuArea.Y().y > 0 ) && ( isRightAvail ) );
258
259
3.33k
  CHECK( !cs.pps->loopFilterAcrossSlicesEnabled, "Not implemented" );
260
3.33k
  if( cs.pps->getNumTiles() > 1 && !cs.pps->loopFilterAcrossTilesEnabled )
261
0
  {
262
0
    const int ctuX    = ctuArea.lx() >> cs.pcv->maxCUSizeLog2;
263
0
    const int ctuY    = ctuArea.ly() >> cs.pcv->maxCUSizeLog2;
264
0
    isRightAvail      = isRightAvail      && cs.pps->canFilterCtuBdry( ctuX, ctuY,  1, 0 );
265
0
    isBelowAvail      = isBelowAvail      && cs.pps->canFilterCtuBdry( ctuX, ctuY,  0, 1 );
266
0
    isAboveRightAvail = isAboveRightAvail && cs.pps->canFilterCtuBdry( ctuX, ctuY,  1,-1 );
267
0
  }
268
269
  //VirtualBoundaries vb;
270
  //bool isCtuCrossedByVirtualBoundaries = vb.isCrossedByVirtualBoundaries(xPos, yPos, width, height, cs.slice->pps);
271
272
13.3k
  for( int compIdx = 0; compIdx < numberOfComponents; compIdx++ )
273
9.99k
  {
274
9.99k
    const ComponentID compID = ComponentID( compIdx );
275
9.99k
    const CompArea& compArea = ctuArea.block( compID );
276
277
9.99k
    PelBuf srcBuf = cs.picture->getSaoBuf().get( compID );
278
9.99k
    PelBuf orgBuf = cs.picture->getOrigBuf().get( compID );
279
280
9.99k
    getBlkStats( compID,
281
9.99k
                 cs.sps->bitDepths[ toChannelType( compID ) ],
282
9.99k
                 saoStatistics[ ctuRsAddr ][ compID ],
283
9.99k
                 srcBuf.bufAt( compArea ),
284
9.99k
                 orgBuf.bufAt( compArea ),
285
9.99k
                 srcBuf.stride,
286
9.99k
                 orgBuf.stride,
287
9.99k
                 compArea.width,
288
9.99k
                 compArea.height,
289
9.99k
                 isLeftAvail, isRightAvail, isAboveAvail, isBelowAvail, isAboveLeftAvail, isAboveRightAvail
290
9.99k
               );
291
9.99k
  }
292
3.33k
}
293
294
void EncSampleAdaptiveOffset::getStatistics(std::vector<SAOStatData**>& blkStats, PelUnitBuf& orgYuv, PelUnitBuf& srcYuv, CodingStructure& cs )
295
0
{
296
0
  bool isLeftAvail, isRightAvail, isAboveAvail, isBelowAvail, isAboveLeftAvail, isAboveRightAvail;
297
298
0
  const PreCalcValues& pcv = *cs.pcv;
299
0
  const int numberOfComponents = getNumberValidComponents(pcv.chrFormat);
300
301
0
  size_t lineBufferSize = pcv.maxCUSize + 1;
302
0
  if (m_signLineBuf1.size() != lineBufferSize)
303
0
  {
304
0
    m_signLineBuf1.resize(lineBufferSize);
305
0
    m_signLineBuf2.resize(lineBufferSize);
306
0
  }
307
308
0
  int ctuRsAddr = 0;
309
0
  for( uint32_t yPos = 0; yPos < pcv.lumaHeight; yPos += pcv.maxCUSize )
310
0
  {
311
0
    for( uint32_t xPos = 0; xPos < pcv.lumaWidth; xPos += pcv.maxCUSize )
312
0
    {
313
0
      const uint32_t width  = (xPos + pcv.maxCUSize  > pcv.lumaWidth)  ? (pcv.lumaWidth - xPos)  : pcv.maxCUSize;
314
0
      const uint32_t height = (yPos + pcv.maxCUSize > pcv.lumaHeight) ? (pcv.lumaHeight - yPos) : pcv.maxCUSize;
315
0
      const UnitArea area( cs.area.chromaFormat, Area(xPos , yPos, width, height) );
316
317
0
      deriveLoopFilterBoundaryAvailibility(cs, area.Y(), isLeftAvail, isAboveAvail, isAboveLeftAvail );
318
319
      //NOTE: The number of skipped lines during gathering CTU statistics depends on the slice boundary availabilities.
320
      //For simplicity, here only picture boundaries are considered.
321
322
0
      isRightAvail      = (xPos + pcv.maxCUSize  < pcv.lumaWidth );
323
0
      isBelowAvail      = (yPos + pcv.maxCUSize < pcv.lumaHeight);
324
0
      isAboveRightAvail = ((yPos > 0) && (isRightAvail));
325
326
0
      for(int compIdx = 0; compIdx < numberOfComponents; compIdx++)
327
0
      {
328
0
        const ComponentID compID = ComponentID(compIdx);
329
0
        const CompArea& compArea = area.block( compID );
330
331
0
        int  srcStride  = srcYuv.get(compID).stride;
332
0
        Pel* srcBlk     = srcYuv.get(compID).bufAt( compArea );
333
334
0
        int  orgStride  = orgYuv.get(compID).stride;
335
0
        Pel* orgBlk     = orgYuv.get(compID).bufAt( compArea );
336
337
0
        getBlkStats(compID, cs.sps->bitDepths[toChannelType(compID)], blkStats[ctuRsAddr][compID]
338
0
                  , srcBlk, orgBlk, srcStride, orgStride, compArea.width, compArea.height
339
0
                  , isLeftAvail,  isRightAvail, isAboveAvail, isBelowAvail, isAboveLeftAvail, isAboveRightAvail );
340
0
      }
341
0
      ctuRsAddr++;
342
0
    }
343
0
  }
344
0
}
345
346
void EncSampleAdaptiveOffset::decideCtuParams( CodingStructure& cs, const std::vector<SAOStatData**>& saoStatistics, const bool saoEnabled[ MAX_NUM_COMP ], const bool allBlksDisabled, const UnitArea& ctuArea, const int ctuRsAddr, SAOBlkParam* reconParams, SAOBlkParam* codedParams )
347
3.33k
{
348
3.33k
  const PreCalcValues& pcv = *cs.pcv;
349
3.33k
  const Slice& slice       = *cs.slice;
350
3.33k
  const int  ctuPosX       = ctuRsAddr % pcv.widthInCtus;
351
3.33k
  const int  ctuPosY       = ctuRsAddr / pcv.widthInCtus;
352
353
  // reset CABAC estimator
354
3.33k
  if( m_EncCfg->m_ensureWppBitEqual
355
3.33k
      && m_EncCfg->m_numThreads < 1
356
0
      && ctuPosX == 0
357
0
      && ctuPosY > 0 )
358
0
  {
359
0
    m_CABACEstimator->initCtxModels( slice );
360
0
  }
361
362
  // check disabled
363
3.33k
  if( allBlksDisabled )
364
0
  {
365
0
    codedParams[ ctuRsAddr ].reset();
366
0
    return;
367
0
  }
368
369
  // get merge list
370
3.33k
  SAOBlkParam* mergeList[ NUM_SAO_MERGE_TYPES ] = { NULL };
371
3.33k
  getMergeList( cs, ctuRsAddr, reconParams, mergeList );
372
373
3.33k
  const TempCtx ctxStart( m_CtxCache, SAOCtx( m_CABACEstimator->getCtx() ) );
374
3.33k
  TempCtx       ctxBest ( m_CtxCache );
375
376
3.33k
  SAOBlkParam modeParam;
377
3.33k
  double minCost  = MAX_DOUBLE;
378
3.33k
  double modeCost = MAX_DOUBLE;
379
9.99k
  for( int mode = 1; mode < NUM_SAO_MODES; mode++ )
380
6.66k
  {
381
6.66k
    if( mode > 1 )
382
3.33k
    {
383
3.33k
      m_CABACEstimator->getCtx() = SAOCtx( ctxStart );
384
3.33k
    }
385
6.66k
    switch( mode )
386
6.66k
    {
387
3.33k
    case SAO_MODE_NEW:
388
3.33k
      {
389
3.33k
        deriveModeNewRDO( cs.sps->bitDepths, ctuRsAddr, mergeList, saoEnabled, saoStatistics, modeParam, modeCost );
390
3.33k
      }
391
3.33k
      break;
392
3.33k
    case SAO_MODE_MERGE:
393
3.33k
      {
394
3.33k
        deriveModeMergeRDO( cs.sps->bitDepths, ctuRsAddr, mergeList, saoEnabled, saoStatistics, modeParam, modeCost );
395
3.33k
      }
396
3.33k
      break;
397
0
    default:
398
0
      {
399
0
        THROW( "Not a supported SAO mode." );
400
0
      }
401
6.66k
    }
402
403
6.66k
    if( modeCost < minCost )
404
5.57k
    {
405
5.57k
      minCost                  = modeCost;
406
5.57k
      codedParams[ ctuRsAddr ] = modeParam;
407
5.57k
      ctxBest                  = SAOCtx( m_CABACEstimator->getCtx() );
408
5.57k
    }
409
6.66k
  }
410
411
  // apply reconstructed offsets
412
3.33k
  m_CABACEstimator->getCtx() = SAOCtx( ctxBest );
413
3.33k
  reconParams[ ctuRsAddr ] = codedParams[ ctuRsAddr ];
414
415
3.33k
  reconstructBlkSAOParam( reconParams[ ctuRsAddr ], mergeList );
416
417
3.33k
  Picture& pic = *cs.picture;
418
3.33k
  offsetCTU( ctuArea, pic.getSaoBuf(), cs.getRecoBuf(), reconParams[ ctuRsAddr ], cs );
419
3.33k
}
420
421
int64_t EncSampleAdaptiveOffset::getDistortion(const int channelBitDepth, int typeIdc, int typeAuxInfo, int* invQuantOffset, SAOStatData& statData)
422
49.9k
{
423
49.9k
  int64_t dist        = 0;
424
49.9k
  int shift = 2 * DISTORTION_PRECISION_ADJUSTMENT(channelBitDepth);
425
426
49.9k
  switch(typeIdc)
427
49.9k
  {
428
9.99k
  case SAO_TYPE_EO_0:
429
19.9k
  case SAO_TYPE_EO_90:
430
29.9k
  case SAO_TYPE_EO_135:
431
39.9k
  case SAO_TYPE_EO_45:
432
39.9k
    {
433
239k
      for (int offsetIdx=0; offsetIdx<NUM_SAO_EO_CLASSES; offsetIdx++)
434
199k
      {
435
199k
        dist += estSaoDist( statData.count[offsetIdx], invQuantOffset[offsetIdx], statData.diff[offsetIdx], shift);
436
199k
      }
437
39.9k
    }
438
39.9k
    break;
439
10.0k
  case SAO_TYPE_BO:
440
10.0k
    {
441
50.0k
      for (int offsetIdx=typeAuxInfo; offsetIdx<typeAuxInfo+4; offsetIdx++)
442
40.0k
      {
443
40.0k
        int bandIdx = offsetIdx % NUM_SAO_BO_CLASSES ;
444
40.0k
        dist += estSaoDist( statData.count[bandIdx], invQuantOffset[bandIdx], statData.diff[bandIdx], shift);
445
40.0k
      }
446
10.0k
    }
447
10.0k
    break;
448
0
  default:
449
0
    {
450
0
      THROW("Not a supported type");
451
29.9k
    }
452
49.9k
  }
453
454
49.9k
  return dist;
455
49.9k
}
456
457
inline int64_t EncSampleAdaptiveOffset::estSaoDist(int64_t count, int64_t offset, int64_t diffSum, int shift)
458
241k
{
459
241k
  return (( count*offset*offset-diffSum*offset*2 ) >> shift);
460
241k
}
461
462
463
inline int EncSampleAdaptiveOffset::estIterOffset(int typeIdx, double lambda, int offsetInput, int64_t count, int64_t diffSum, int shift, int bitIncrease, int64_t& bestDist, double& bestCost, int offsetTh )
464
1.33k
{
465
1.33k
  int iterOffset, tempOffset;
466
1.33k
  int64_t tempDist, tempRate;
467
1.33k
  double tempCost, tempMinCost;
468
1.33k
  int offsetOutput = 0;
469
1.33k
  iterOffset = offsetInput;
470
  // Assuming sending quantized value 0 results in zero offset and sending the value zero needs 1 bit. entropy coder can be used to measure the exact rate here.
471
1.33k
  tempMinCost = lambda;
472
2.67k
  while (iterOffset != 0)
473
1.34k
  {
474
    // Calculate the bits required for signaling the offset
475
1.34k
    tempRate = (typeIdx == SAO_TYPE_BO) ? (abs((int)iterOffset)+2) : (abs((int)iterOffset)+1);
476
1.34k
    if (abs((int)iterOffset)==offsetTh) //inclusive
477
0
    {
478
0
      tempRate --;
479
0
    }
480
    // Do the dequantization before distortion calculation
481
1.34k
    tempOffset  = iterOffset * (1<< bitIncrease);
482
1.34k
    tempDist    = estSaoDist( count, tempOffset, diffSum, shift);
483
1.34k
    tempCost    = ((double)tempDist + lambda * (double) tempRate);
484
1.34k
    if(tempCost < tempMinCost)
485
281
    {
486
281
      tempMinCost = tempCost;
487
281
      offsetOutput = iterOffset;
488
281
      bestDist = tempDist;
489
281
      bestCost = tempCost;
490
281
    }
491
1.34k
    iterOffset = (iterOffset > 0) ? (iterOffset-1):(iterOffset+1);
492
1.34k
  }
493
1.33k
  return offsetOutput;
494
1.33k
}
495
496
void EncSampleAdaptiveOffset::deriveOffsets(ComponentID compIdx, const int channelBitDepth, int typeIdc, SAOStatData& statData, int* quantOffsets, int& typeAuxInfo)
497
49.9k
{
498
49.9k
  int bitDepth = channelBitDepth;
499
49.9k
  int shift = 2 * DISTORTION_PRECISION_ADJUSTMENT(bitDepth);
500
49.9k
  int offsetTh = SampleAdaptiveOffset::getMaxOffsetQVal(channelBitDepth);  //inclusive
501
502
49.9k
  ::memset(quantOffsets, 0, sizeof(int)*MAX_NUM_SAO_CLASSES);
503
504
  //derive initial offsets
505
49.9k
  int numClasses = (typeIdc == SAO_TYPE_BO)?((int)NUM_SAO_BO_CLASSES):((int)NUM_SAO_EO_CLASSES);
506
569k
  for(int classIdx=0; classIdx< numClasses; classIdx++)
507
519k
  {
508
519k
    if( (typeIdc != SAO_TYPE_BO) && (classIdx==SAO_CLASS_EO_PLAIN)  )
509
39.9k
    {
510
39.9k
      continue; //offset will be zero
511
39.9k
    }
512
513
479k
    if(statData.count[classIdx] == 0)
514
467k
    {
515
467k
      continue; //offset will be zero
516
467k
    }
517
12.2k
#if (  DISTORTION_PRECISION_ADJUSTMENT(x)  == 0 )
518
12.2k
    quantOffsets[classIdx] =
519
12.2k
       (int) xRoundIbdi(bitDepth, (double)(statData.diff[classIdx] ) / (double)(statData.count[classIdx] << m_offsetStepLog2[compIdx]));
520
12.2k
     quantOffsets[classIdx] = Clip3(-offsetTh, offsetTh, quantOffsets[classIdx]);
521
#else
522
      quantOffsets[classIdx] =
523
        (int) xRoundIbdi(bitDepth, (double)(statData.diff[classIdx] << DISTORTION_PRECISION_ADJUSTMENT(bitDepth))
524
                                     / (double)(statData.count[classIdx] << m_offsetStepLog2[compIdx]));
525
      quantOffsets[classIdx] = Clip3(-offsetTh, offsetTh, quantOffsets[classIdx]);
526
#endif
527
12.2k
  }
528
529
  // adjust offsets
530
49.9k
  switch(typeIdc)
531
49.9k
  {
532
9.99k
  case SAO_TYPE_EO_0:
533
19.9k
  case SAO_TYPE_EO_90:
534
29.9k
  case SAO_TYPE_EO_135:
535
39.9k
  case SAO_TYPE_EO_45:
536
39.9k
    {
537
39.9k
      int64_t classDist;
538
39.9k
      double classCost;
539
239k
      for(int classIdx=0; classIdx<NUM_SAO_EO_CLASSES; classIdx++)
540
199k
      {
541
199k
        if(classIdx==SAO_CLASS_EO_FULL_VALLEY && quantOffsets[classIdx] < 0)
542
0
        {
543
0
          quantOffsets[classIdx] =0;
544
0
        }
545
199k
        if(classIdx==SAO_CLASS_EO_HALF_VALLEY && quantOffsets[classIdx] < 0)
546
0
        {
547
0
          quantOffsets[classIdx] =0;
548
0
        }
549
199k
        if(classIdx==SAO_CLASS_EO_HALF_PEAK   && quantOffsets[classIdx] > 0)
550
0
        {
551
0
          quantOffsets[classIdx] =0;
552
0
        }
553
199k
        if(classIdx==SAO_CLASS_EO_FULL_PEAK   && quantOffsets[classIdx] > 0)
554
0
        {
555
0
          quantOffsets[classIdx] =0;
556
0
        }
557
558
199k
        if( quantOffsets[classIdx] != 0 ) //iterative adjustment only when derived offset is not zero
559
1.00k
        {
560
1.00k
          quantOffsets[classIdx] = estIterOffset( typeIdc, m_lambda[compIdx], quantOffsets[classIdx], statData.count[classIdx], statData.diff[classIdx], shift, m_offsetStepLog2[compIdx], classDist , classCost , offsetTh );
561
1.00k
        }
562
199k
      }
563
564
39.9k
      typeAuxInfo =0;
565
39.9k
    }
566
39.9k
    break;
567
9.99k
  case SAO_TYPE_BO:
568
9.99k
    {
569
9.99k
      int64_t  distBOClasses[NUM_SAO_BO_CLASSES];
570
9.99k
      double costBOClasses[NUM_SAO_BO_CLASSES];
571
9.99k
      ::memset(distBOClasses, 0, sizeof(int64_t)*NUM_SAO_BO_CLASSES);
572
329k
      for(int classIdx=0; classIdx< NUM_SAO_BO_CLASSES; classIdx++)
573
319k
      {
574
319k
        costBOClasses[classIdx]= m_lambda[compIdx];
575
319k
        if( quantOffsets[classIdx] != 0 ) //iterative adjustment only when derived offset is not zero
576
328
        {
577
328
          quantOffsets[classIdx] = estIterOffset( typeIdc, m_lambda[compIdx], quantOffsets[classIdx], statData.count[classIdx], statData.diff[classIdx], shift, m_offsetStepLog2[compIdx], distBOClasses[classIdx], costBOClasses[classIdx], offsetTh );
578
328
        }
579
319k
      }
580
581
      //decide the starting band index
582
9.99k
      double minCost = MAX_DOUBLE, cost;
583
329k
      for(int band=0; band< NUM_SAO_BO_CLASSES; band++)
584
319k
      {
585
319k
        cost  = costBOClasses[(band  )%NUM_SAO_BO_CLASSES];
586
319k
        cost += costBOClasses[(band+1)%NUM_SAO_BO_CLASSES];
587
319k
        cost += costBOClasses[(band+2)%NUM_SAO_BO_CLASSES];
588
319k
        cost += costBOClasses[(band+3)%NUM_SAO_BO_CLASSES];
589
590
319k
        if(cost < minCost)
591
10.0k
        {
592
10.0k
          minCost = cost;
593
10.0k
          typeAuxInfo = band;
594
10.0k
        }
595
319k
      }
596
      //clear those unused classes
597
9.99k
      int clearQuantOffset[NUM_SAO_BO_CLASSES];
598
9.99k
      ::memset(clearQuantOffset, 0, sizeof(int)*NUM_SAO_BO_CLASSES);
599
49.9k
      for(int i=0; i< 4; i++)
600
39.9k
      {
601
39.9k
        int band = (typeAuxInfo+i)%NUM_SAO_BO_CLASSES;
602
39.9k
        clearQuantOffset[band] = quantOffsets[band];
603
39.9k
      }
604
9.99k
      ::memcpy(quantOffsets, clearQuantOffset, sizeof(int)*NUM_SAO_BO_CLASSES);
605
9.99k
    }
606
9.99k
    break;
607
0
  default:
608
0
    {
609
0
      THROW("Not a supported type");
610
29.9k
    }
611
49.9k
  }
612
49.9k
}
613
614
void EncSampleAdaptiveOffset::deriveModeNewRDO(const BitDepths &bitDepths, int ctuRsAddr, SAOBlkParam* mergeList[NUM_SAO_MERGE_TYPES], const bool* sliceEnabled, const std::vector<SAOStatData**>& blkStats, SAOBlkParam& modeParam, double& modeNormCost )
615
3.33k
{
616
3.33k
  double minCost, cost;
617
3.33k
  uint64_t previousFracBits;
618
3.33k
  const int numberOfComponents = m_numberOfComponents;
619
620
3.33k
  int64_t dist[MAX_NUM_COMP], modeDist[MAX_NUM_COMP];
621
3.33k
  SAOOffset testOffset[MAX_NUM_COMP];
622
3.33k
  int invQuantOffset[MAX_NUM_SAO_CLASSES];
623
13.3k
  for(int comp=0; comp < MAX_NUM_COMP; comp++)
624
9.99k
  {
625
9.99k
    modeDist[comp] = 0;
626
9.99k
  }
627
628
  //pre-encode merge flags
629
3.33k
  modeParam[COMP_Y].modeIdc = SAO_MODE_OFF;
630
3.33k
  const TempCtx ctxStartBlk   ( m_CtxCache, SAOCtx( m_CABACEstimator->getCtx() ) );
631
3.33k
  m_CABACEstimator->sao_block_pars( modeParam, bitDepths, sliceEnabled, (mergeList[SAO_MERGE_LEFT]!= NULL), (mergeList[SAO_MERGE_ABOVE]!= NULL), true );
632
3.33k
  const TempCtx ctxStartLuma  ( m_CtxCache, SAOCtx( m_CABACEstimator->getCtx() ) );
633
3.33k
  TempCtx       ctxBestLuma   ( m_CtxCache );
634
635
    //------ luma --------//
636
3.33k
  {
637
3.33k
    const ComponentID compIdx = COMP_Y;
638
    //"off" case as initial cost
639
3.33k
    modeParam[compIdx].modeIdc = SAO_MODE_OFF;
640
3.33k
    m_CABACEstimator->resetBits();
641
3.33k
    m_CABACEstimator->sao_offset_pars( modeParam[compIdx], compIdx, sliceEnabled[compIdx], bitDepths[CH_L] );
642
3.33k
    modeDist[compIdx] = 0;
643
3.33k
    minCost           = m_lambda[compIdx] * (FRAC_BITS_SCALE * m_CABACEstimator->getEstFracBits());
644
3.33k
    ctxBestLuma = SAOCtx( m_CABACEstimator->getCtx() );
645
3.33k
    if(sliceEnabled[compIdx])
646
3.33k
    {
647
19.9k
      for(int typeIdc=0; typeIdc< NUM_SAO_NEW_TYPES; typeIdc++)
648
16.6k
      {
649
16.6k
        testOffset[compIdx].modeIdc = SAO_MODE_NEW;
650
16.6k
        testOffset[compIdx].typeIdc = typeIdc;
651
652
        //derive coded offset
653
16.6k
        deriveOffsets(compIdx, bitDepths[CH_L], typeIdc, blkStats[ctuRsAddr][compIdx][typeIdc], testOffset[compIdx].offset, testOffset[compIdx].typeAuxInfo);
654
655
        //inversed quantized offsets
656
16.6k
        invertQuantOffsets(compIdx, typeIdc, testOffset[compIdx].typeAuxInfo, invQuantOffset, testOffset[compIdx].offset);
657
658
        //get distortion
659
16.6k
        dist[compIdx] = getDistortion(bitDepths[CH_L], testOffset[compIdx].typeIdc, testOffset[compIdx].typeAuxInfo, invQuantOffset, blkStats[ctuRsAddr][compIdx][typeIdc]);
660
661
        //get rate
662
16.6k
        m_CABACEstimator->getCtx() = SAOCtx( ctxStartLuma );
663
16.6k
        m_CABACEstimator->resetBits();
664
16.6k
        m_CABACEstimator->sao_offset_pars( testOffset[compIdx], compIdx, sliceEnabled[compIdx], bitDepths[CH_L] );
665
16.6k
        double rate = FRAC_BITS_SCALE * m_CABACEstimator->getEstFracBits();
666
16.6k
        cost = (double)dist[compIdx] + m_lambda[compIdx]*rate;
667
16.6k
        if(cost < minCost)
668
22
        {
669
22
          minCost = cost;
670
22
          modeDist[compIdx] = dist[compIdx];
671
22
          modeParam[compIdx]= testOffset[compIdx];
672
22
          ctxBestLuma = SAOCtx( m_CABACEstimator->getCtx() );
673
22
        }
674
16.6k
      }
675
3.33k
    }
676
3.33k
    m_CABACEstimator->getCtx() = SAOCtx( ctxBestLuma );
677
3.33k
  }
678
679
  //------ chroma --------//
680
//"off" case as initial cost
681
3.33k
  cost = 0;
682
3.33k
  previousFracBits = 0;
683
3.33k
  m_CABACEstimator->resetBits();
684
9.99k
  for(uint32_t componentIndex = COMP_Cb; componentIndex < numberOfComponents; componentIndex++)
685
6.66k
  {
686
6.66k
    const ComponentID component = ComponentID(componentIndex);
687
688
6.66k
    modeParam[component].modeIdc = SAO_MODE_OFF;
689
6.66k
    modeDist [component]         = 0;
690
6.66k
    m_CABACEstimator->sao_offset_pars( modeParam[component], component, sliceEnabled[component], bitDepths[CH_C] );
691
6.66k
    const uint64_t currentFracBits = m_CABACEstimator->getEstFracBits();
692
6.66k
    cost += m_lambda[component] * FRAC_BITS_SCALE * (currentFracBits - previousFracBits);
693
6.66k
    previousFracBits = currentFracBits;
694
6.66k
  }
695
696
3.33k
  minCost = cost;
697
698
  //doesn't need to store cabac status here since the whole CTU parameters will be re-encoded at the end of this function
699
700
19.9k
  for(int typeIdc=0; typeIdc< NUM_SAO_NEW_TYPES; typeIdc++)
701
16.6k
  {
702
16.6k
    m_CABACEstimator->getCtx() = SAOCtx( ctxBestLuma );
703
16.6k
    m_CABACEstimator->resetBits();
704
16.6k
    previousFracBits = 0;
705
16.6k
    cost = 0;
706
707
49.9k
    for(uint32_t componentIndex = COMP_Cb; componentIndex < numberOfComponents; componentIndex++)
708
33.3k
    {
709
33.3k
      const ComponentID component = ComponentID(componentIndex);
710
33.3k
      if(!sliceEnabled[component])
711
0
      {
712
0
        testOffset[component].modeIdc = SAO_MODE_OFF;
713
0
        dist[component]= 0;
714
0
        continue;
715
0
      }
716
33.3k
      testOffset[component].modeIdc = SAO_MODE_NEW;
717
33.3k
      testOffset[component].typeIdc = typeIdc;
718
719
      //derive offset & get distortion
720
33.3k
      deriveOffsets(component, bitDepths[CH_C], typeIdc, blkStats[ctuRsAddr][component][typeIdc], testOffset[component].offset, testOffset[component].typeAuxInfo);
721
33.3k
      invertQuantOffsets(component, typeIdc, testOffset[component].typeAuxInfo, invQuantOffset, testOffset[component].offset);
722
33.3k
      dist[component] = getDistortion(bitDepths[CH_C], typeIdc, testOffset[component].typeAuxInfo, invQuantOffset, blkStats[ctuRsAddr][component][typeIdc]);
723
33.3k
      m_CABACEstimator->sao_offset_pars( testOffset[component], component, sliceEnabled[component], bitDepths[CH_C] );
724
33.3k
      const uint64_t currentFracBits = m_CABACEstimator->getEstFracBits();
725
33.3k
      cost += dist[component] + (m_lambda[component] * FRAC_BITS_SCALE * (currentFracBits - previousFracBits));
726
33.3k
      previousFracBits = currentFracBits;
727
33.3k
    }
728
729
16.6k
    if(cost < minCost)
730
1
    {
731
1
      minCost = cost;
732
3
      for(uint32_t componentIndex = COMP_Cb; componentIndex < numberOfComponents; componentIndex++)
733
2
      {
734
2
        modeDist[componentIndex]  = dist[componentIndex];
735
2
        modeParam[componentIndex] = testOffset[componentIndex];
736
2
      }
737
1
    }
738
739
16.6k
  } // SAO_TYPE loop
740
741
  //----- re-gen rate & normalized cost----//
742
3.33k
  modeNormCost = 0;
743
13.3k
  for(uint32_t componentIndex = COMP_Y; componentIndex < numberOfComponents; componentIndex++)
744
9.99k
  {
745
9.99k
    modeNormCost += (double)modeDist[componentIndex] / m_lambda[componentIndex];
746
9.99k
  }
747
748
3.33k
  m_CABACEstimator->getCtx() = SAOCtx( ctxStartBlk );
749
3.33k
  m_CABACEstimator->resetBits();
750
3.33k
  m_CABACEstimator->sao_block_pars( modeParam, bitDepths, sliceEnabled, (mergeList[SAO_MERGE_LEFT]!= NULL), (mergeList[SAO_MERGE_ABOVE]!= NULL), false );
751
3.33k
  modeNormCost += FRAC_BITS_SCALE * m_CABACEstimator->getEstFracBits();
752
3.33k
}
753
754
void EncSampleAdaptiveOffset::deriveModeMergeRDO(const BitDepths &bitDepths, int ctuRsAddr, SAOBlkParam* mergeList[NUM_SAO_MERGE_TYPES], const bool* sliceEnabled, const std::vector<SAOStatData**>& blkStats, SAOBlkParam& modeParam, double& modeNormCost )
755
3.33k
{
756
3.33k
  modeNormCost = MAX_DOUBLE;
757
758
3.33k
  double cost;
759
3.33k
  SAOBlkParam testBlkParam;
760
3.33k
  const int numberOfComponents = m_numberOfComponents;
761
762
3.33k
  const TempCtx ctxStart  ( m_CtxCache, SAOCtx( m_CABACEstimator->getCtx() ) );
763
3.33k
  TempCtx       ctxBest   ( m_CtxCache );
764
765
9.99k
  for(int mergeType=0; mergeType< NUM_SAO_MERGE_TYPES; mergeType++)
766
6.66k
  {
767
6.66k
    if(mergeList[mergeType] == NULL)
768
3.72k
    {
769
3.72k
      continue;
770
3.72k
    }
771
772
2.93k
    testBlkParam = *(mergeList[mergeType]);
773
    //normalized distortion
774
2.93k
    double normDist=0;
775
11.7k
    for(int compIdx = 0; compIdx < numberOfComponents; compIdx++)
776
8.80k
    {
777
8.80k
      testBlkParam[compIdx].modeIdc = SAO_MODE_MERGE;
778
8.80k
      testBlkParam[compIdx].typeIdc = mergeType;
779
780
8.80k
      SAOOffset& mergedOffsetParam = (*(mergeList[mergeType]))[compIdx];
781
782
8.80k
      if( mergedOffsetParam.modeIdc != SAO_MODE_OFF)
783
10
      {
784
        //offsets have been reconstructed. Don't call inversed quantization function.
785
10
        normDist += (((double)getDistortion(bitDepths[toChannelType(ComponentID(compIdx))], mergedOffsetParam.typeIdc, mergedOffsetParam.typeAuxInfo, mergedOffsetParam.offset, blkStats[ctuRsAddr][compIdx][mergedOffsetParam.typeIdc]))
786
10
                       /m_lambda[compIdx] );
787
10
      }
788
8.80k
    }
789
790
    //rate
791
2.93k
    m_CABACEstimator->getCtx() = SAOCtx( ctxStart );
792
2.93k
    m_CABACEstimator->resetBits();
793
2.93k
    m_CABACEstimator->sao_block_pars( testBlkParam, bitDepths, sliceEnabled, (mergeList[SAO_MERGE_LEFT]!= NULL), (mergeList[SAO_MERGE_ABOVE]!= NULL), false );
794
2.93k
    double rate = FRAC_BITS_SCALE * m_CABACEstimator->getEstFracBits();
795
2.93k
    cost = normDist+rate;
796
797
2.93k
    if(cost < modeNormCost)
798
2.24k
    {
799
2.24k
      modeNormCost = cost;
800
2.24k
      modeParam    = testBlkParam;
801
2.24k
      ctxBest      = SAOCtx( m_CABACEstimator->getCtx() );
802
2.24k
    }
803
2.93k
  }
804
3.33k
  if( modeNormCost < MAX_DOUBLE )
805
2.24k
  {
806
2.24k
    m_CABACEstimator->getCtx() = SAOCtx( ctxBest );
807
2.24k
  }
808
3.33k
}
809
810
void EncSampleAdaptiveOffset::getBlkStats(const ComponentID compIdx, const int channelBitDepth, SAOStatData* statsDataTypes
811
                        , Pel* srcBlk, Pel* orgBlk, int srcStride, int orgStride, int width, int height
812
                        , bool isLeftAvail,  bool isRightAvail, bool isAboveAvail, bool isBelowAvail, bool isAboveLeftAvail, bool isAboveRightAvail )
813
9.99k
{
814
9.99k
  int x, startX, startY, endX, endY, edgeType, firstLineStartX, firstLineEndX;
815
9.99k
  int64_t *diff, *count;
816
9.99k
  Pel* srcLine, *orgLine;
817
9.99k
  const int skipLinesR = compIdx == COMP_Y ? 5 : 3;
818
9.99k
  const int skipLinesB = compIdx == COMP_Y ? 4 : 2;
819
820
59.9k
  for(int typeIdx=0; typeIdx< NUM_SAO_NEW_TYPES; typeIdx++)
821
49.9k
  {
822
49.9k
    SAOStatData& statsData= statsDataTypes[typeIdx];
823
49.9k
    statsData.reset();
824
49.9k
    srcLine = srcBlk;
825
49.9k
    orgLine = orgBlk;
826
49.9k
    diff    = statsData.diff;
827
49.9k
    count   = statsData.count;
828
49.9k
    switch(typeIdx)
829
49.9k
    {
830
9.99k
    case SAO_TYPE_EO_0:
831
9.99k
      {
832
9.99k
        endY   =  isBelowAvail ? (height - skipLinesB) : height;
833
9.99k
        startX = (isLeftAvail  ? 0 : 1);
834
9.99k
        endX   = (isRightAvail ? (width - skipLinesR) : (width - 1));
835
9.99k
        calcSaoStatisticsEo0(width,startX,endX,endY,srcLine,orgLine,srcStride,orgStride,count,diff);
836
9.99k
      }
837
9.99k
      break;
838
9.99k
    case SAO_TYPE_EO_90:
839
9.99k
      {
840
9.99k
        int8_t *signUpLine = &m_signLineBuf1[0];
841
9.99k
        startX = 0;
842
9.99k
        startY = isAboveAvail ? 0 : 1;
843
9.99k
        endX   = (isRightAvail ? (width - skipLinesR) : width);
844
9.99k
        endY   = isBelowAvail ? (height - skipLinesB) : (height - 1);
845
9.99k
        if (!isAboveAvail)
846
5.47k
        {
847
5.47k
          srcLine += srcStride;
848
5.47k
          orgLine += orgStride;
849
5.47k
        }
850
9.99k
        calcSaoStatisticsEo90(width,endX,startY,endY,srcLine,orgLine,srcStride,orgStride,count,diff,signUpLine);
851
9.99k
      }
852
9.99k
      break;
853
9.99k
    case SAO_TYPE_EO_135:
854
9.99k
      {
855
9.99k
        diff +=2;
856
9.99k
        count+=2;
857
9.99k
        int8_t *signUpLine, *signDownLine;
858
9.99k
        signUpLine  = &m_signLineBuf1[0];
859
9.99k
        signDownLine= &m_signLineBuf2[0];
860
9.99k
        startX = isLeftAvail  ? 0 : 1;
861
9.99k
        endX   = isRightAvail ? (width - skipLinesR): (width - 1);
862
9.99k
        endY   = isBelowAvail ? (height - skipLinesB) : (height - 1);
863
        //prepare 2nd line's upper sign
864
9.99k
        Pel* srcLineBelow = srcLine + srcStride;
865
574k
        for (x=startX; x<endX+1; x++)
866
564k
        {
867
564k
          signUpLine[x] = (int8_t)sgn(srcLineBelow[x] - srcLine[x-1]);
868
564k
        }
869
        //1st line
870
9.99k
        Pel* srcLineAbove = srcLine - srcStride;
871
9.99k
        firstLineStartX = isAboveLeftAvail ? 0    : 1;
872
9.99k
        firstLineEndX   = isAboveAvail     ? endX : 1;
873
266k
        for(x=firstLineStartX; x<firstLineEndX; x++)
874
256k
        {
875
256k
          edgeType = sgn(srcLine[x] - srcLineAbove[x-1]) - signUpLine[x+1];
876
256k
          diff [edgeType] += (orgLine[x] - srcLine[x]);
877
256k
          count[edgeType] ++;
878
256k
        }
879
9.99k
        srcLine  += srcStride;
880
9.99k
        orgLine  += orgStride;
881
9.99k
        calcSaoStatisticsEo135(width,startX,endX,endY,srcLine,orgLine,srcStride,orgStride,count,diff,signUpLine,signDownLine);
882
9.99k
      }
883
9.99k
      break;
884
9.99k
    case SAO_TYPE_EO_45:
885
9.99k
      {
886
9.99k
        diff +=2;
887
9.99k
        count+=2;
888
9.99k
        int8_t *signUpLine = &m_signLineBuf1[1];
889
890
9.99k
        startX = isLeftAvail  ? 0 : 1;
891
9.99k
        endX   = isRightAvail ? (width - skipLinesR) : (width - 1);
892
9.99k
        endY   = isBelowAvail ? (height - skipLinesB) : (height - 1);
893
894
        //prepare 2nd line upper sign
895
9.99k
        Pel* srcLineBelow = srcLine + srcStride;
896
574k
        for (x=startX-1; x<endX; x++)
897
564k
        {
898
564k
          signUpLine[x] = (int8_t)sgn(srcLineBelow[x] - srcLine[x+1]);
899
564k
        }
900
        //first line
901
9.99k
        Pel* srcLineAbove = srcLine - srcStride;
902
9.99k
        firstLineStartX = isAboveAvail ? startX : endX;
903
9.99k
        firstLineEndX   = (!isRightAvail && isAboveRightAvail) ? width : endX;
904
266k
        for(x=firstLineStartX; x<firstLineEndX; x++)
905
256k
        {
906
256k
          edgeType = sgn(srcLine[x] - srcLineAbove[x+1]) - signUpLine[x-1];
907
256k
          diff [edgeType] += (orgLine[x] - srcLine[x]);
908
256k
          count[edgeType] ++;
909
256k
        }
910
9.99k
        srcLine += srcStride;
911
9.99k
        orgLine += orgStride;
912
9.99k
        calcSaoStatisticsEo45(width,startX,endX,endY,srcLine,orgLine,srcStride,orgStride,count,diff,signUpLine);
913
9.99k
      }
914
9.99k
      break;
915
9.99k
    case SAO_TYPE_BO:
916
9.99k
      {
917
9.99k
        startX = 0;
918
9.99k
        endX   = isRightAvail ? (width - skipLinesR) : width;
919
9.99k
        endY   = isBelowAvail ? (height- skipLinesB) : height;
920
9.99k
        calcSaoStatisticsBo(width,endX,endY,srcLine,orgLine,srcStride,orgStride,channelBitDepth,count,diff);
921
9.99k
      }
922
9.99k
      break;
923
0
    default:
924
0
      {
925
0
        THROW("Not a supported SAO type");
926
0
      }
927
49.9k
    }
928
49.9k
  }
929
9.99k
}
930
931
void EncSampleAdaptiveOffset::deriveLoopFilterBoundaryAvailibility(CodingStructure& cs, const Position& pos, bool& isLeftAvail, bool& isAboveAvail, bool& isAboveLeftAvail) const
932
3.33k
{
933
3.33k
  const bool isLoopFiltAcrossSlicePPS = cs.pps->loopFilterAcrossSlicesEnabled;
934
3.33k
  const bool isLoopFiltAcrossTilePPS = cs.pps->loopFilterAcrossTilesEnabled;
935
936
3.33k
  const int width = cs.pcv->maxCUSize;
937
3.33k
  const int height = cs.pcv->maxCUSize;
938
3.33k
  const CodingUnit* cuCurr = cs.getCU(pos, CH_L, TREE_D);
939
3.33k
  const int ctuX = pos.x >> cs.pcv->maxCUSizeLog2;
940
3.33k
  const int ctuY = pos.y >> cs.pcv->maxCUSizeLog2;
941
3.33k
  const PPS* pps = cs.slice->pps;
942
3.33k
  const CodingUnit* cuLeft      = ctuX > 0 &&             pps->canFilterCtuBdry( ctuX, ctuY, -1, 0 ) ? cs.getCU(pos.offset(-width, 0), CH_L, TREE_D): nullptr;
943
3.33k
  const CodingUnit* cuAbove     = ctuY > 0 &&             pps->canFilterCtuBdry( ctuX, ctuY, 0, -1 ) ? cs.getCU(pos.offset(0, -height), CH_L, TREE_D): nullptr;
944
3.33k
  const CodingUnit* cuAboveLeft = ctuY > 0 && ctuX > 0 && pps->canFilterCtuBdry( ctuX, ctuY, -1,-1 ) ? cs.getCU(pos.offset(-width, -height), CH_L, TREE_D): nullptr;
945
946
3.33k
  if (!isLoopFiltAcrossSlicePPS)
947
0
  {
948
0
    isLeftAvail      = (cuLeft == NULL)      ? false : CU::isSameSlice(*cuCurr, *cuLeft);
949
0
    isAboveAvail     = (cuAbove == NULL)     ? false : CU::isSameSlice(*cuCurr, *cuAbove);
950
0
    isAboveLeftAvail = (cuAboveLeft == NULL) ? false : CU::isSameSlice(*cuCurr, *cuAboveLeft);
951
0
  }
952
3.33k
  else
953
3.33k
  {
954
3.33k
    isLeftAvail      = (cuLeft != NULL);
955
3.33k
    isAboveAvail     = (cuAbove != NULL);
956
3.33k
    isAboveLeftAvail = (cuAboveLeft != NULL);
957
3.33k
  }
958
959
3.33k
  if (!isLoopFiltAcrossTilePPS)
960
0
  {
961
0
    isLeftAvail      = (!isLeftAvail)      ? false : CU::isSameTile(*cuCurr, *cuLeft);
962
0
    isAboveAvail     = (!isAboveAvail)     ? false : CU::isSameTile(*cuCurr, *cuAbove);
963
0
    isAboveLeftAvail = (!isAboveLeftAvail) ? false : CU::isSameTile(*cuCurr, *cuAboveLeft);
964
0
  }
965
966
967
3.33k
  SubPic curSubPic = cs.pps->getSubPicFromCU(*cuCurr);
968
3.33k
  if (!curSubPic.loopFilterAcrossSubPicEnabled )
969
0
  {
970
0
    isLeftAvail      = (!isLeftAvail)      ? false : CU::isSameSubPic(*cuCurr, *cuLeft);
971
0
    isAboveAvail     = (!isAboveAvail)     ? false : CU::isSameSubPic(*cuCurr, *cuAbove);
972
0
    isAboveLeftAvail = (!isAboveLeftAvail) ? false : CU::isSameSubPic(*cuCurr, *cuAboveLeft);
973
0
  }
974
975
3.33k
}
976
977
} // namespace vvenc
978
979
//! \}
980