Coverage Report

Created: 2026-05-24 07:45

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/libde265/libde265/sao.cc
Line
Count
Source
1
/*
2
 * H.265 video codec.
3
 * Copyright (c) 2013-2014 struktur AG, Dirk Farin <farin@struktur.de>
4
 *
5
 * This file is part of libde265.
6
 *
7
 * libde265 is free software: you can redistribute it and/or modify
8
 * it under the terms of the GNU Lesser General Public License as
9
 * published by the Free Software Foundation, either version 3 of
10
 * the License, or (at your option) any later version.
11
 *
12
 * libde265 is distributed in the hope that it will be useful,
13
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15
 * GNU Lesser General Public License for more details.
16
 *
17
 * You should have received a copy of the GNU Lesser General Public License
18
 * along with libde265.  If not, see <http://www.gnu.org/licenses/>.
19
 */
20
21
#include "sao.h"
22
#include "util.h"
23
24
#include <stdlib.h>
25
#include <string.h>
26
27
28
template <class pixel_t>
29
void apply_sao_internal(de265_image* img, int xCtb,int yCtb,
30
                        const slice_segment_header* shdr, int cIdx, int nSW,int nSH,
31
                        const pixel_t* in_img,  int in_stride,
32
                        /* */ pixel_t* out_img, int out_stride)
33
1.64M
{
34
1.64M
  const sao_info* saoinfo = img->get_sao_info(xCtb,yCtb);
35
36
1.64M
  int SaoTypeIdx = (saoinfo->SaoTypeIdx >> (2*cIdx)) & 0x3;
37
38
1.64M
  logtrace(LogSAO,"apply_sao CTB %d;%d cIdx:%d type=%d (%dx%d)\n",xCtb,yCtb,cIdx, SaoTypeIdx, nSW,nSH);
39
40
1.64M
  if (SaoTypeIdx==0) {
41
970k
    return;
42
970k
  }
43
44
670k
  const seq_parameter_set* sps = &img->get_sps();
45
670k
  const pic_parameter_set* pps = &img->get_pps();
46
670k
  const int bitDepth = (cIdx==0 ? sps->BitDepth_Y : sps->BitDepth_C);
47
670k
  const int maxPixelValue = (1<<bitDepth)-1;
48
49
  // top left position of CTB in pixels
50
670k
  const int xC = xCtb*nSW;
51
670k
  const int yC = yCtb*nSH;
52
53
670k
  const int width  = img->get_width(cIdx);
54
670k
  const int height = img->get_height(cIdx);
55
56
670k
  const int ctbSliceAddrRS = img->get_SliceHeader(xC,yC)->SliceAddrRS;
57
58
670k
  const int picWidthInCtbs = sps->PicWidthInCtbsY;
59
670k
  const int chromashiftW = sps->get_chroma_shift_W(cIdx);
60
670k
  const int chromashiftH = sps->get_chroma_shift_H(cIdx);
61
670k
  const int ctbshiftW = sps->Log2CtbSizeY - chromashiftW;
62
670k
  const int ctbshiftH = sps->Log2CtbSizeY - chromashiftH;
63
64
65
4.02M
  for (int i=0;i<5;i++)
66
3.35M
    {
67
3.35M
      logtrace(LogSAO,"offset[%d] = %d\n", i, i==0 ? 0 : saoinfo->saoOffsetVal[cIdx][i-1]);
68
3.35M
    }
69
70
71
  // actual size of CTB to be processed (can be smaller when partially outside of image)
72
670k
  const int ctbW = (xC+nSW>width)  ? width -xC : nSW;
73
670k
  const int ctbH = (yC+nSH>height) ? height-yC : nSH;
74
75
76
670k
  const bool extendedTests = img->get_CTB_has_pcm_or_cu_transquant_bypass(xCtb,yCtb);
77
78
670k
  if (SaoTypeIdx==2) {
79
289k
    int hPos[2], vPos[2];
80
289k
    int vPosStride[2]; // vPos[] multiplied by image stride
81
289k
    int SaoEoClass = (saoinfo->SaoEoClass >> (2*cIdx)) & 0x3;
82
83
289k
    switch (SaoEoClass) {
84
67.4k
    case 0: hPos[0]=-1; hPos[1]= 1; vPos[0]= 0; vPos[1]=0; break;
85
82.4k
    case 1: hPos[0]= 0; hPos[1]= 0; vPos[0]=-1; vPos[1]=1; break;
86
66.7k
    case 2: hPos[0]=-1; hPos[1]= 1; vPos[0]=-1; vPos[1]=1; break;
87
73.2k
    case 3: hPos[0]= 1; hPos[1]=-1; vPos[0]=-1; vPos[1]=1; break;
88
289k
    }
89
90
289k
    vPosStride[0] = vPos[0] * in_stride;
91
289k
    vPosStride[1] = vPos[1] * in_stride;
92
93
    /* Reorder sao_info.saoOffsetVal[] array, so that we can index it
94
       directly with the sum of the two pixel-difference signs. */
95
289k
    int8_t  saoOffsetVal[5]; // [2] unused
96
289k
    saoOffsetVal[0] = saoinfo->saoOffsetVal[cIdx][1-1];
97
289k
    saoOffsetVal[1] = saoinfo->saoOffsetVal[cIdx][2-1];
98
289k
    saoOffsetVal[2] = 0;
99
289k
    saoOffsetVal[3] = saoinfo->saoOffsetVal[cIdx][3-1];
100
289k
    saoOffsetVal[4] = saoinfo->saoOffsetVal[cIdx][4-1];
101
102
103
12.7M
    for (int j=0;j<ctbH;j++) {
104
12.4M
      const pixel_t* in_ptr  = &in_img [xC+(yC+j)*in_stride];
105
12.4M
      /* */ pixel_t* out_ptr = &out_img[xC+(yC+j)*out_stride];
106
107
633M
      for (int i=0;i<ctbW;i++) {
108
620M
        int edgeIdx = -1;
109
110
620M
        logtrace(LogSAO, "pos %d,%d\n",xC+i,yC+j);
111
112
620M
        if ((extendedTests &&
113
37.3M
             (sps->pcm_loop_filter_disable_flag &&
114
193k
              img->get_pcm_flag((xC+i)<<chromashiftW,(yC+j)<<chromashiftH))) ||
115
621M
            img->get_cu_transquant_bypass((xC+i)<<chromashiftW,(yC+j)<<chromashiftH)) {
116
20.7M
          continue;
117
20.7M
        }
118
119
        // do the expensive test for boundaries only at the boundaries
120
600M
        bool testBoundary = (i==0 || j==0 || i==ctbW-1 || j==ctbH-1);
121
122
600M
        if (testBoundary)
123
138M
          for (int k=0;k<2;k++) {
124
93.5M
            int xS = xC+i+hPos[k];
125
93.5M
            int yS = yC+j+vPos[k];
126
127
93.5M
            if (xS<0 || yS<0 || xS>=width || yS>=height) {
128
3.29M
              edgeIdx=0;
129
3.29M
              break;
130
3.29M
            }
131
132
133
            // This part seems inefficient with all the get_SliceHeaderIndex() calls,
134
            // but removing this part (because the input was known to have only a single
135
            // slice anyway) reduced computation time only by 1.3%.
136
            // TODO: however, this may still be a big part of SAO itself.
137
138
90.2M
            slice_segment_header* sliceHeader = img->get_SliceHeader(xS<<chromashiftW,
139
90.2M
                                                                     yS<<chromashiftH);
140
90.2M
            if (sliceHeader==nullptr) { return; }
141
142
90.2M
            int sliceAddrRS = sliceHeader->SliceAddrRS;
143
90.2M
            if (sliceAddrRS <  ctbSliceAddrRS &&
144
10.6k
                img->get_SliceHeader((xC+i)<<chromashiftW,
145
10.6k
                                     (yC+j)<<chromashiftH)->slice_loop_filter_across_slices_enabled_flag==0) {
146
7.70k
              edgeIdx=0;
147
7.70k
              break;
148
7.70k
            }
149
150
90.2M
            if (sliceAddrRS >  ctbSliceAddrRS &&
151
5.82k
                img->get_SliceHeader(xS<<chromashiftW,
152
5.82k
                                     yS<<chromashiftH)->slice_loop_filter_across_slices_enabled_flag==0) {
153
5.23k
              edgeIdx=0;
154
5.23k
              break;
155
5.23k
            }
156
157
158
90.2M
            if (pps->loop_filter_across_tiles_enabled_flag==0 &&
159
90.2M
                pps->TileIdRS[(xS>>ctbshiftW) + (yS>>ctbshiftH)*picWidthInCtbs] !=
160
90.2M
                pps->TileIdRS[(xC>>ctbshiftW) + (yC>>ctbshiftH)*picWidthInCtbs]) {
161
4.92k
              edgeIdx=0;
162
4.92k
              break;
163
4.92k
            }
164
90.2M
          }
165
166
600M
        if (edgeIdx != 0) {
167
168
597M
          edgeIdx = ( Sign(in_ptr[i] - in_ptr[i+hPos[0]+vPosStride[0]]) +
169
597M
                      Sign(in_ptr[i] - in_ptr[i+hPos[1]+vPosStride[1]])   );
170
171
597M
          if (1) { // edgeIdx != 0) {   // seems to be faster without this check (zero in offset table)
172
597M
            int offset = saoOffsetVal[edgeIdx+2];
173
174
597M
            out_ptr[i] = Clip3(0,maxPixelValue,
175
597M
                               in_ptr[i] + offset);
176
597M
          }
177
597M
        }
178
600M
      }
179
12.4M
    }
180
289k
  }
181
380k
  else {
182
380k
    int bandShift = bitDepth-5;
183
380k
    int saoLeftClass = saoinfo->sao_band_position[cIdx];
184
380k
    logtrace(LogSAO,"saoLeftClass: %d\n",saoLeftClass);
185
186
380k
    int bandTable[32];
187
380k
    memset(bandTable, 0, sizeof(int)*32);
188
189
1.90M
    for (int k=0;k<4;k++) {
190
1.52M
      bandTable[ (k+saoLeftClass)&31 ] = k+1;
191
1.52M
    }
192
193
194
    /* If PCM or transquant_bypass is used in this CTB, we have to
195
       run all checks (A).
196
       Otherwise, we run a simplified version of the code (B).
197
198
       NOTE: this whole part of SAO does not seem to be a significant part of the time spent
199
    */
200
201
380k
    if (extendedTests) {
202
203
      // (A) full version with all checks
204
205
1.13M
      for (int j=0;j<ctbH;j++)
206
54.4M
        for (int i=0;i<ctbW;i++) {
207
208
53.2M
          if ((sps->pcm_loop_filter_disable_flag &&
209
229k
               img->get_pcm_flag((xC+i)<<chromashiftW,(yC+j)<<chromashiftH)) ||
210
53.2M
              img->get_cu_transquant_bypass((xC+i)<<chromashiftW,(yC+j)<<chromashiftH)) {
211
39.7M
            continue;
212
39.7M
          }
213
214
          // Shifts are a strange thing. On x86, >>x actually computes >>(x%64).
215
          // But this should never happen, because the maximum bit-depth is 16.
216
13.5M
          int pixel = in_img[xC + i + (yC + j) * in_stride];
217
218
          // Note: the input pixel value should never exceed the valid range, but it seems that it still does,
219
          // maybe when there was a decoding error and the pixels have not been filled in correctly.
220
          // Thus, we have to limit the pixel range to ensure that we have no illegal table access.
221
13.5M
          pixel = Clip3(0, maxPixelValue, pixel);
222
223
13.5M
          int bandIdx = bandTable[pixel >> bandShift];
224
225
13.5M
          if (bandIdx>0) {
226
1.56M
            int offset = saoinfo->saoOffsetVal[cIdx][bandIdx-1];
227
228
1.56M
            logtrace(LogSAO,"%d %d (%d) offset %d  %x -> %x\n",xC+i,yC+j,bandIdx,
229
1.56M
                     offset,
230
1.56M
                     in_img[xC+i+(yC+j)*in_stride],
231
1.56M
                     in_img[xC+i+(yC+j)*in_stride]+offset);
232
233
1.56M
            out_img[xC+i+(yC+j)*out_stride] = Clip3(0,maxPixelValue,
234
1.56M
                                                    in_img[xC+i+(yC+j)*in_stride] + offset);
235
1.56M
          }
236
13.5M
        }
237
35.6k
    }
238
344k
    else
239
344k
      {
240
        // (B) simplified version (only works if no PCM and transquant_bypass is active)
241
242
13.6M
        for (int j=0;j<ctbH;j++)
243
615M
          for (int i=0;i<ctbW;i++) {
244
245
602M
            int pixel = in_img[xC + i + (yC + j) * in_stride];
246
247
            // Note: the input pixel value should never exceed the valid range, but it seems that it still does,
248
            // maybe when there was a decoding error and the pixels have not been filled in correctly.
249
            // Thus, we have to limit the pixel range to ensure that we have no illegal table access.
250
602M
            pixel = Clip3(0, maxPixelValue, pixel);
251
252
602M
            int bandIdx = bandTable[pixel >> bandShift];
253
254
602M
            if (bandIdx>0) {
255
77.5M
              int offset = saoinfo->saoOffsetVal[cIdx][bandIdx-1];
256
257
77.5M
              out_img[xC+i+(yC+j)*out_stride] = Clip3(0,maxPixelValue,
258
77.5M
                                                      in_img[xC+i+(yC+j)*in_stride] + offset);
259
77.5M
            }
260
602M
          }
261
344k
      }
262
380k
  }
263
670k
}
void apply_sao_internal<unsigned short>(de265_image*, int, int, slice_segment_header const*, int, int, int, unsigned short const*, int, unsigned short*, int)
Line
Count
Source
33
259k
{
34
259k
  const sao_info* saoinfo = img->get_sao_info(xCtb,yCtb);
35
36
259k
  int SaoTypeIdx = (saoinfo->SaoTypeIdx >> (2*cIdx)) & 0x3;
37
38
259k
  logtrace(LogSAO,"apply_sao CTB %d;%d cIdx:%d type=%d (%dx%d)\n",xCtb,yCtb,cIdx, SaoTypeIdx, nSW,nSH);
39
40
259k
  if (SaoTypeIdx==0) {
41
159k
    return;
42
159k
  }
43
44
99.9k
  const seq_parameter_set* sps = &img->get_sps();
45
99.9k
  const pic_parameter_set* pps = &img->get_pps();
46
99.9k
  const int bitDepth = (cIdx==0 ? sps->BitDepth_Y : sps->BitDepth_C);
47
99.9k
  const int maxPixelValue = (1<<bitDepth)-1;
48
49
  // top left position of CTB in pixels
50
99.9k
  const int xC = xCtb*nSW;
51
99.9k
  const int yC = yCtb*nSH;
52
53
99.9k
  const int width  = img->get_width(cIdx);
54
99.9k
  const int height = img->get_height(cIdx);
55
56
99.9k
  const int ctbSliceAddrRS = img->get_SliceHeader(xC,yC)->SliceAddrRS;
57
58
99.9k
  const int picWidthInCtbs = sps->PicWidthInCtbsY;
59
99.9k
  const int chromashiftW = sps->get_chroma_shift_W(cIdx);
60
99.9k
  const int chromashiftH = sps->get_chroma_shift_H(cIdx);
61
99.9k
  const int ctbshiftW = sps->Log2CtbSizeY - chromashiftW;
62
99.9k
  const int ctbshiftH = sps->Log2CtbSizeY - chromashiftH;
63
64
65
599k
  for (int i=0;i<5;i++)
66
499k
    {
67
499k
      logtrace(LogSAO,"offset[%d] = %d\n", i, i==0 ? 0 : saoinfo->saoOffsetVal[cIdx][i-1]);
68
499k
    }
69
70
71
  // actual size of CTB to be processed (can be smaller when partially outside of image)
72
99.9k
  const int ctbW = (xC+nSW>width)  ? width -xC : nSW;
73
99.9k
  const int ctbH = (yC+nSH>height) ? height-yC : nSH;
74
75
76
99.9k
  const bool extendedTests = img->get_CTB_has_pcm_or_cu_transquant_bypass(xCtb,yCtb);
77
78
99.9k
  if (SaoTypeIdx==2) {
79
14.7k
    int hPos[2], vPos[2];
80
14.7k
    int vPosStride[2]; // vPos[] multiplied by image stride
81
14.7k
    int SaoEoClass = (saoinfo->SaoEoClass >> (2*cIdx)) & 0x3;
82
83
14.7k
    switch (SaoEoClass) {
84
3.11k
    case 0: hPos[0]=-1; hPos[1]= 1; vPos[0]= 0; vPos[1]=0; break;
85
2.29k
    case 1: hPos[0]= 0; hPos[1]= 0; vPos[0]=-1; vPos[1]=1; break;
86
4.99k
    case 2: hPos[0]=-1; hPos[1]= 1; vPos[0]=-1; vPos[1]=1; break;
87
4.34k
    case 3: hPos[0]= 1; hPos[1]=-1; vPos[0]=-1; vPos[1]=1; break;
88
14.7k
    }
89
90
14.7k
    vPosStride[0] = vPos[0] * in_stride;
91
14.7k
    vPosStride[1] = vPos[1] * in_stride;
92
93
    /* Reorder sao_info.saoOffsetVal[] array, so that we can index it
94
       directly with the sum of the two pixel-difference signs. */
95
14.7k
    int8_t  saoOffsetVal[5]; // [2] unused
96
14.7k
    saoOffsetVal[0] = saoinfo->saoOffsetVal[cIdx][1-1];
97
14.7k
    saoOffsetVal[1] = saoinfo->saoOffsetVal[cIdx][2-1];
98
14.7k
    saoOffsetVal[2] = 0;
99
14.7k
    saoOffsetVal[3] = saoinfo->saoOffsetVal[cIdx][3-1];
100
14.7k
    saoOffsetVal[4] = saoinfo->saoOffsetVal[cIdx][4-1];
101
102
103
410k
    for (int j=0;j<ctbH;j++) {
104
395k
      const pixel_t* in_ptr  = &in_img [xC+(yC+j)*in_stride];
105
395k
      /* */ pixel_t* out_ptr = &out_img[xC+(yC+j)*out_stride];
106
107
18.3M
      for (int i=0;i<ctbW;i++) {
108
17.9M
        int edgeIdx = -1;
109
110
17.9M
        logtrace(LogSAO, "pos %d,%d\n",xC+i,yC+j);
111
112
17.9M
        if ((extendedTests &&
113
10.7M
             (sps->pcm_loop_filter_disable_flag &&
114
151k
              img->get_pcm_flag((xC+i)<<chromashiftW,(yC+j)<<chromashiftH))) ||
115
17.9M
            img->get_cu_transquant_bypass((xC+i)<<chromashiftW,(yC+j)<<chromashiftH)) {
116
5.92M
          continue;
117
5.92M
        }
118
119
        // do the expensive test for boundaries only at the boundaries
120
12.0M
        bool testBoundary = (i==0 || j==0 || i==ctbW-1 || j==ctbH-1);
121
122
12.0M
        if (testBoundary)
123
2.60M
          for (int k=0;k<2;k++) {
124
1.89M
            int xS = xC+i+hPos[k];
125
1.89M
            int yS = yC+j+vPos[k];
126
127
1.89M
            if (xS<0 || yS<0 || xS>=width || yS>=height) {
128
318k
              edgeIdx=0;
129
318k
              break;
130
318k
            }
131
132
133
            // This part seems inefficient with all the get_SliceHeaderIndex() calls,
134
            // but removing this part (because the input was known to have only a single
135
            // slice anyway) reduced computation time only by 1.3%.
136
            // TODO: however, this may still be a big part of SAO itself.
137
138
1.57M
            slice_segment_header* sliceHeader = img->get_SliceHeader(xS<<chromashiftW,
139
1.57M
                                                                     yS<<chromashiftH);
140
1.57M
            if (sliceHeader==nullptr) { return; }
141
142
1.57M
            int sliceAddrRS = sliceHeader->SliceAddrRS;
143
1.57M
            if (sliceAddrRS <  ctbSliceAddrRS &&
144
3.01k
                img->get_SliceHeader((xC+i)<<chromashiftW,
145
3.01k
                                     (yC+j)<<chromashiftH)->slice_loop_filter_across_slices_enabled_flag==0) {
146
2.23k
              edgeIdx=0;
147
2.23k
              break;
148
2.23k
            }
149
150
1.57M
            if (sliceAddrRS >  ctbSliceAddrRS &&
151
1.55k
                img->get_SliceHeader(xS<<chromashiftW,
152
1.55k
                                     yS<<chromashiftH)->slice_loop_filter_across_slices_enabled_flag==0) {
153
1.44k
              edgeIdx=0;
154
1.44k
              break;
155
1.44k
            }
156
157
158
1.57M
            if (pps->loop_filter_across_tiles_enabled_flag==0 &&
159
1.56M
                pps->TileIdRS[(xS>>ctbshiftW) + (yS>>ctbshiftH)*picWidthInCtbs] !=
160
1.56M
                pps->TileIdRS[(xC>>ctbshiftW) + (yC>>ctbshiftH)*picWidthInCtbs]) {
161
1.03k
              edgeIdx=0;
162
1.03k
              break;
163
1.03k
            }
164
1.57M
          }
165
166
12.0M
        if (edgeIdx != 0) {
167
168
11.7M
          edgeIdx = ( Sign(in_ptr[i] - in_ptr[i+hPos[0]+vPosStride[0]]) +
169
11.7M
                      Sign(in_ptr[i] - in_ptr[i+hPos[1]+vPosStride[1]])   );
170
171
11.7M
          if (1) { // edgeIdx != 0) {   // seems to be faster without this check (zero in offset table)
172
11.7M
            int offset = saoOffsetVal[edgeIdx+2];
173
174
11.7M
            out_ptr[i] = Clip3(0,maxPixelValue,
175
11.7M
                               in_ptr[i] + offset);
176
11.7M
          }
177
11.7M
        }
178
12.0M
      }
179
395k
    }
180
14.7k
  }
181
85.1k
  else {
182
85.1k
    int bandShift = bitDepth-5;
183
85.1k
    int saoLeftClass = saoinfo->sao_band_position[cIdx];
184
85.1k
    logtrace(LogSAO,"saoLeftClass: %d\n",saoLeftClass);
185
186
85.1k
    int bandTable[32];
187
85.1k
    memset(bandTable, 0, sizeof(int)*32);
188
189
425k
    for (int k=0;k<4;k++) {
190
340k
      bandTable[ (k+saoLeftClass)&31 ] = k+1;
191
340k
    }
192
193
194
    /* If PCM or transquant_bypass is used in this CTB, we have to
195
       run all checks (A).
196
       Otherwise, we run a simplified version of the code (B).
197
198
       NOTE: this whole part of SAO does not seem to be a significant part of the time spent
199
    */
200
201
85.1k
    if (extendedTests) {
202
203
      // (A) full version with all checks
204
205
468k
      for (int j=0;j<ctbH;j++)
206
21.9M
        for (int i=0;i<ctbW;i++) {
207
208
21.4M
          if ((sps->pcm_loop_filter_disable_flag &&
209
184k
               img->get_pcm_flag((xC+i)<<chromashiftW,(yC+j)<<chromashiftH)) ||
210
21.4M
              img->get_cu_transquant_bypass((xC+i)<<chromashiftW,(yC+j)<<chromashiftH)) {
211
16.4M
            continue;
212
16.4M
          }
213
214
          // Shifts are a strange thing. On x86, >>x actually computes >>(x%64).
215
          // But this should never happen, because the maximum bit-depth is 16.
216
5.07M
          int pixel = in_img[xC + i + (yC + j) * in_stride];
217
218
          // Note: the input pixel value should never exceed the valid range, but it seems that it still does,
219
          // maybe when there was a decoding error and the pixels have not been filled in correctly.
220
          // Thus, we have to limit the pixel range to ensure that we have no illegal table access.
221
5.07M
          pixel = Clip3(0, maxPixelValue, pixel);
222
223
5.07M
          int bandIdx = bandTable[pixel >> bandShift];
224
225
5.07M
          if (bandIdx>0) {
226
539k
            int offset = saoinfo->saoOffsetVal[cIdx][bandIdx-1];
227
228
539k
            logtrace(LogSAO,"%d %d (%d) offset %d  %x -> %x\n",xC+i,yC+j,bandIdx,
229
539k
                     offset,
230
539k
                     in_img[xC+i+(yC+j)*in_stride],
231
539k
                     in_img[xC+i+(yC+j)*in_stride]+offset);
232
233
539k
            out_img[xC+i+(yC+j)*out_stride] = Clip3(0,maxPixelValue,
234
539k
                                                    in_img[xC+i+(yC+j)*in_stride] + offset);
235
539k
          }
236
5.07M
        }
237
15.3k
    }
238
69.7k
    else
239
69.7k
      {
240
        // (B) simplified version (only works if no PCM and transquant_bypass is active)
241
242
2.39M
        for (int j=0;j<ctbH;j++)
243
92.6M
          for (int i=0;i<ctbW;i++) {
244
245
90.3M
            int pixel = in_img[xC + i + (yC + j) * in_stride];
246
247
            // Note: the input pixel value should never exceed the valid range, but it seems that it still does,
248
            // maybe when there was a decoding error and the pixels have not been filled in correctly.
249
            // Thus, we have to limit the pixel range to ensure that we have no illegal table access.
250
90.3M
            pixel = Clip3(0, maxPixelValue, pixel);
251
252
90.3M
            int bandIdx = bandTable[pixel >> bandShift];
253
254
90.3M
            if (bandIdx>0) {
255
15.2M
              int offset = saoinfo->saoOffsetVal[cIdx][bandIdx-1];
256
257
15.2M
              out_img[xC+i+(yC+j)*out_stride] = Clip3(0,maxPixelValue,
258
15.2M
                                                      in_img[xC+i+(yC+j)*in_stride] + offset);
259
15.2M
            }
260
90.3M
          }
261
69.7k
      }
262
85.1k
  }
263
99.9k
}
void apply_sao_internal<unsigned char>(de265_image*, int, int, slice_segment_header const*, int, int, int, unsigned char const*, int, unsigned char*, int)
Line
Count
Source
33
1.38M
{
34
1.38M
  const sao_info* saoinfo = img->get_sao_info(xCtb,yCtb);
35
36
1.38M
  int SaoTypeIdx = (saoinfo->SaoTypeIdx >> (2*cIdx)) & 0x3;
37
38
1.38M
  logtrace(LogSAO,"apply_sao CTB %d;%d cIdx:%d type=%d (%dx%d)\n",xCtb,yCtb,cIdx, SaoTypeIdx, nSW,nSH);
39
40
1.38M
  if (SaoTypeIdx==0) {
41
810k
    return;
42
810k
  }
43
44
570k
  const seq_parameter_set* sps = &img->get_sps();
45
570k
  const pic_parameter_set* pps = &img->get_pps();
46
570k
  const int bitDepth = (cIdx==0 ? sps->BitDepth_Y : sps->BitDepth_C);
47
570k
  const int maxPixelValue = (1<<bitDepth)-1;
48
49
  // top left position of CTB in pixels
50
570k
  const int xC = xCtb*nSW;
51
570k
  const int yC = yCtb*nSH;
52
53
570k
  const int width  = img->get_width(cIdx);
54
570k
  const int height = img->get_height(cIdx);
55
56
570k
  const int ctbSliceAddrRS = img->get_SliceHeader(xC,yC)->SliceAddrRS;
57
58
570k
  const int picWidthInCtbs = sps->PicWidthInCtbsY;
59
570k
  const int chromashiftW = sps->get_chroma_shift_W(cIdx);
60
570k
  const int chromashiftH = sps->get_chroma_shift_H(cIdx);
61
570k
  const int ctbshiftW = sps->Log2CtbSizeY - chromashiftW;
62
570k
  const int ctbshiftH = sps->Log2CtbSizeY - chromashiftH;
63
64
65
3.42M
  for (int i=0;i<5;i++)
66
2.85M
    {
67
2.85M
      logtrace(LogSAO,"offset[%d] = %d\n", i, i==0 ? 0 : saoinfo->saoOffsetVal[cIdx][i-1]);
68
2.85M
    }
69
70
71
  // actual size of CTB to be processed (can be smaller when partially outside of image)
72
570k
  const int ctbW = (xC+nSW>width)  ? width -xC : nSW;
73
570k
  const int ctbH = (yC+nSH>height) ? height-yC : nSH;
74
75
76
570k
  const bool extendedTests = img->get_CTB_has_pcm_or_cu_transquant_bypass(xCtb,yCtb);
77
78
570k
  if (SaoTypeIdx==2) {
79
274k
    int hPos[2], vPos[2];
80
274k
    int vPosStride[2]; // vPos[] multiplied by image stride
81
274k
    int SaoEoClass = (saoinfo->SaoEoClass >> (2*cIdx)) & 0x3;
82
83
274k
    switch (SaoEoClass) {
84
64.2k
    case 0: hPos[0]=-1; hPos[1]= 1; vPos[0]= 0; vPos[1]=0; break;
85
80.1k
    case 1: hPos[0]= 0; hPos[1]= 0; vPos[0]=-1; vPos[1]=1; break;
86
61.7k
    case 2: hPos[0]=-1; hPos[1]= 1; vPos[0]=-1; vPos[1]=1; break;
87
68.8k
    case 3: hPos[0]= 1; hPos[1]=-1; vPos[0]=-1; vPos[1]=1; break;
88
274k
    }
89
90
274k
    vPosStride[0] = vPos[0] * in_stride;
91
274k
    vPosStride[1] = vPos[1] * in_stride;
92
93
    /* Reorder sao_info.saoOffsetVal[] array, so that we can index it
94
       directly with the sum of the two pixel-difference signs. */
95
274k
    int8_t  saoOffsetVal[5]; // [2] unused
96
274k
    saoOffsetVal[0] = saoinfo->saoOffsetVal[cIdx][1-1];
97
274k
    saoOffsetVal[1] = saoinfo->saoOffsetVal[cIdx][2-1];
98
274k
    saoOffsetVal[2] = 0;
99
274k
    saoOffsetVal[3] = saoinfo->saoOffsetVal[cIdx][3-1];
100
274k
    saoOffsetVal[4] = saoinfo->saoOffsetVal[cIdx][4-1];
101
102
103
12.3M
    for (int j=0;j<ctbH;j++) {
104
12.0M
      const pixel_t* in_ptr  = &in_img [xC+(yC+j)*in_stride];
105
12.0M
      /* */ pixel_t* out_ptr = &out_img[xC+(yC+j)*out_stride];
106
107
615M
      for (int i=0;i<ctbW;i++) {
108
602M
        int edgeIdx = -1;
109
110
602M
        logtrace(LogSAO, "pos %d,%d\n",xC+i,yC+j);
111
112
602M
        if ((extendedTests &&
113
26.6M
             (sps->pcm_loop_filter_disable_flag &&
114
41.9k
              img->get_pcm_flag((xC+i)<<chromashiftW,(yC+j)<<chromashiftH))) ||
115
603M
            img->get_cu_transquant_bypass((xC+i)<<chromashiftW,(yC+j)<<chromashiftH)) {
116
14.7M
          continue;
117
14.7M
        }
118
119
        // do the expensive test for boundaries only at the boundaries
120
588M
        bool testBoundary = (i==0 || j==0 || i==ctbW-1 || j==ctbH-1);
121
122
588M
        if (testBoundary)
123
135M
          for (int k=0;k<2;k++) {
124
91.6M
            int xS = xC+i+hPos[k];
125
91.6M
            int yS = yC+j+vPos[k];
126
127
91.6M
            if (xS<0 || yS<0 || xS>=width || yS>=height) {
128
2.98M
              edgeIdx=0;
129
2.98M
              break;
130
2.98M
            }
131
132
133
            // This part seems inefficient with all the get_SliceHeaderIndex() calls,
134
            // but removing this part (because the input was known to have only a single
135
            // slice anyway) reduced computation time only by 1.3%.
136
            // TODO: however, this may still be a big part of SAO itself.
137
138
88.6M
            slice_segment_header* sliceHeader = img->get_SliceHeader(xS<<chromashiftW,
139
88.6M
                                                                     yS<<chromashiftH);
140
88.6M
            if (sliceHeader==nullptr) { return; }
141
142
88.6M
            int sliceAddrRS = sliceHeader->SliceAddrRS;
143
88.6M
            if (sliceAddrRS <  ctbSliceAddrRS &&
144
7.63k
                img->get_SliceHeader((xC+i)<<chromashiftW,
145
7.63k
                                     (yC+j)<<chromashiftH)->slice_loop_filter_across_slices_enabled_flag==0) {
146
5.47k
              edgeIdx=0;
147
5.47k
              break;
148
5.47k
            }
149
150
88.6M
            if (sliceAddrRS >  ctbSliceAddrRS &&
151
4.27k
                img->get_SliceHeader(xS<<chromashiftW,
152
4.27k
                                     yS<<chromashiftH)->slice_loop_filter_across_slices_enabled_flag==0) {
153
3.79k
              edgeIdx=0;
154
3.79k
              break;
155
3.79k
            }
156
157
158
88.6M
            if (pps->loop_filter_across_tiles_enabled_flag==0 &&
159
88.6M
                pps->TileIdRS[(xS>>ctbshiftW) + (yS>>ctbshiftH)*picWidthInCtbs] !=
160
88.6M
                pps->TileIdRS[(xC>>ctbshiftW) + (yC>>ctbshiftH)*picWidthInCtbs]) {
161
3.89k
              edgeIdx=0;
162
3.89k
              break;
163
3.89k
            }
164
88.6M
          }
165
166
588M
        if (edgeIdx != 0) {
167
168
585M
          edgeIdx = ( Sign(in_ptr[i] - in_ptr[i+hPos[0]+vPosStride[0]]) +
169
585M
                      Sign(in_ptr[i] - in_ptr[i+hPos[1]+vPosStride[1]])   );
170
171
585M
          if (1) { // edgeIdx != 0) {   // seems to be faster without this check (zero in offset table)
172
585M
            int offset = saoOffsetVal[edgeIdx+2];
173
174
585M
            out_ptr[i] = Clip3(0,maxPixelValue,
175
585M
                               in_ptr[i] + offset);
176
585M
          }
177
585M
        }
178
588M
      }
179
12.0M
    }
180
274k
  }
181
295k
  else {
182
295k
    int bandShift = bitDepth-5;
183
295k
    int saoLeftClass = saoinfo->sao_band_position[cIdx];
184
295k
    logtrace(LogSAO,"saoLeftClass: %d\n",saoLeftClass);
185
186
295k
    int bandTable[32];
187
295k
    memset(bandTable, 0, sizeof(int)*32);
188
189
1.47M
    for (int k=0;k<4;k++) {
190
1.18M
      bandTable[ (k+saoLeftClass)&31 ] = k+1;
191
1.18M
    }
192
193
194
    /* If PCM or transquant_bypass is used in this CTB, we have to
195
       run all checks (A).
196
       Otherwise, we run a simplified version of the code (B).
197
198
       NOTE: this whole part of SAO does not seem to be a significant part of the time spent
199
    */
200
201
295k
    if (extendedTests) {
202
203
      // (A) full version with all checks
204
205
671k
      for (int j=0;j<ctbH;j++)
206
32.4M
        for (int i=0;i<ctbW;i++) {
207
208
31.8M
          if ((sps->pcm_loop_filter_disable_flag &&
209
45.7k
               img->get_pcm_flag((xC+i)<<chromashiftW,(yC+j)<<chromashiftH)) ||
210
31.7M
              img->get_cu_transquant_bypass((xC+i)<<chromashiftW,(yC+j)<<chromashiftH)) {
211
23.3M
            continue;
212
23.3M
          }
213
214
          // Shifts are a strange thing. On x86, >>x actually computes >>(x%64).
215
          // But this should never happen, because the maximum bit-depth is 16.
216
8.49M
          int pixel = in_img[xC + i + (yC + j) * in_stride];
217
218
          // Note: the input pixel value should never exceed the valid range, but it seems that it still does,
219
          // maybe when there was a decoding error and the pixels have not been filled in correctly.
220
          // Thus, we have to limit the pixel range to ensure that we have no illegal table access.
221
8.49M
          pixel = Clip3(0, maxPixelValue, pixel);
222
223
8.49M
          int bandIdx = bandTable[pixel >> bandShift];
224
225
8.49M
          if (bandIdx>0) {
226
1.02M
            int offset = saoinfo->saoOffsetVal[cIdx][bandIdx-1];
227
228
1.02M
            logtrace(LogSAO,"%d %d (%d) offset %d  %x -> %x\n",xC+i,yC+j,bandIdx,
229
1.02M
                     offset,
230
1.02M
                     in_img[xC+i+(yC+j)*in_stride],
231
1.02M
                     in_img[xC+i+(yC+j)*in_stride]+offset);
232
233
1.02M
            out_img[xC+i+(yC+j)*out_stride] = Clip3(0,maxPixelValue,
234
1.02M
                                                    in_img[xC+i+(yC+j)*in_stride] + offset);
235
1.02M
          }
236
8.49M
        }
237
20.2k
    }
238
275k
    else
239
275k
      {
240
        // (B) simplified version (only works if no PCM and transquant_bypass is active)
241
242
11.2M
        for (int j=0;j<ctbH;j++)
243
523M
          for (int i=0;i<ctbW;i++) {
244
245
512M
            int pixel = in_img[xC + i + (yC + j) * in_stride];
246
247
            // Note: the input pixel value should never exceed the valid range, but it seems that it still does,
248
            // maybe when there was a decoding error and the pixels have not been filled in correctly.
249
            // Thus, we have to limit the pixel range to ensure that we have no illegal table access.
250
512M
            pixel = Clip3(0, maxPixelValue, pixel);
251
252
512M
            int bandIdx = bandTable[pixel >> bandShift];
253
254
512M
            if (bandIdx>0) {
255
62.2M
              int offset = saoinfo->saoOffsetVal[cIdx][bandIdx-1];
256
257
62.2M
              out_img[xC+i+(yC+j)*out_stride] = Clip3(0,maxPixelValue,
258
62.2M
                                                      in_img[xC+i+(yC+j)*in_stride] + offset);
259
62.2M
            }
260
512M
          }
261
275k
      }
262
295k
  }
263
570k
}
264
265
266
template <class pixel_t>
267
void apply_sao(de265_image* img, int xCtb,int yCtb,
268
               const slice_segment_header* shdr, int cIdx, int nSW,int nSH,
269
               const pixel_t* in_img,  int in_stride,
270
               /* */ pixel_t* out_img, int out_stride)
271
1.64M
{
272
1.64M
  if (img->high_bit_depth(cIdx)) {
273
259k
    apply_sao_internal<uint16_t>(img,xCtb,yCtb, shdr,cIdx,nSW,nSH,
274
259k
                                 reinterpret_cast<const uint16_t*>(in_img), in_stride,
275
259k
                                 reinterpret_cast<uint16_t*>(out_img),out_stride);
276
259k
  }
277
1.38M
  else {
278
1.38M
    apply_sao_internal<uint8_t>(img,xCtb,yCtb, shdr,cIdx,nSW,nSH,
279
1.38M
                                in_img, in_stride,
280
1.38M
                                out_img,out_stride);
281
1.38M
  }
282
1.64M
}
283
284
285
void apply_sample_adaptive_offset(de265_image* img)
286
0
{
287
0
  const seq_parameter_set& sps = img->get_sps();
288
289
0
  if (sps.sample_adaptive_offset_enabled_flag==0) {
290
0
    return;
291
0
  }
292
293
0
  de265_image inputCopy;
294
0
  de265_error err = inputCopy.copy_image(img);
295
0
  if (err != DE265_OK) {
296
0
    img->decctx->add_warning(DE265_WARNING_CANNOT_APPLY_SAO_OUT_OF_MEMORY,false);
297
0
    return;
298
0
  }
299
300
0
  for (int yCtb=0; yCtb<sps.PicHeightInCtbsY; yCtb++)
301
0
    for (int xCtb=0; xCtb<sps.PicWidthInCtbsY; xCtb++)
302
0
      {
303
0
        const slice_segment_header* shdr = img->get_SliceHeaderCtb(xCtb,yCtb);
304
305
0
        if (shdr->slice_sao_luma_flag) {
306
0
          apply_sao(img, xCtb,yCtb, shdr, 0, 1<<sps.Log2CtbSizeY, 1<<sps.Log2CtbSizeY,
307
0
                    inputCopy.get_image_plane(0), inputCopy.get_image_stride(0),
308
0
                    img->get_image_plane(0), img->get_image_stride(0));
309
0
        }
310
311
0
        if (shdr->slice_sao_chroma_flag) {
312
0
          int nSW = (1<<sps.Log2CtbSizeY) / sps.SubWidthC;
313
0
          int nSH = (1<<sps.Log2CtbSizeY) / sps.SubHeightC;
314
315
0
          apply_sao(img, xCtb,yCtb, shdr, 1, nSW,nSH,
316
0
                    inputCopy.get_image_plane(1), inputCopy.get_image_stride(1),
317
0
                    img->get_image_plane(1), img->get_image_stride(1));
318
319
0
          apply_sao(img, xCtb,yCtb, shdr, 2, nSW,nSH,
320
0
                    inputCopy.get_image_plane(2), inputCopy.get_image_stride(2),
321
0
                    img->get_image_plane(2), img->get_image_stride(2));
322
0
        }
323
0
      }
324
0
}
325
326
327
void apply_sample_adaptive_offset_sequential(de265_image* img)
328
0
{
329
0
  const seq_parameter_set& sps = img->get_sps();
330
331
0
  if (sps.sample_adaptive_offset_enabled_flag==0) {
332
0
    return;
333
0
  }
334
335
0
  int lumaImageSize   = img->get_image_stride(0) * img->get_height(0) * img->get_bytes_per_pixel(0);
336
0
  int chromaImageSize = img->get_image_stride(1) * img->get_height(1) * img->get_bytes_per_pixel(1);
337
338
0
  uint8_t* inputCopy = new uint8_t[ libde265_max(lumaImageSize, chromaImageSize) ];
339
0
  if (inputCopy == nullptr) {
340
0
    img->decctx->add_warning(DE265_WARNING_CANNOT_APPLY_SAO_OUT_OF_MEMORY,false);
341
0
    return;
342
0
  }
343
344
345
0
  int nChannels = 3;
346
0
  if (sps.ChromaArrayType == CHROMA_MONO) { nChannels=1; }
347
348
0
  for (int cIdx=0;cIdx<nChannels;cIdx++) {
349
350
0
    int stride = img->get_image_stride(cIdx);
351
0
    int height = img->get_height(cIdx);
352
353
0
    memcpy(inputCopy, img->get_image_plane(cIdx), stride * height * img->get_bytes_per_pixel(cIdx));
354
355
0
    for (int yCtb=0; yCtb<sps.PicHeightInCtbsY; yCtb++)
356
0
      for (int xCtb=0; xCtb<sps.PicWidthInCtbsY; xCtb++)
357
0
        {
358
0
          const slice_segment_header* shdr = img->get_SliceHeaderCtb(xCtb,yCtb);
359
0
          if (shdr==nullptr) {
360
0
      delete[] inputCopy;
361
0
      return;
362
0
    }
363
364
0
          if (cIdx==0 && shdr->slice_sao_luma_flag) {
365
0
            apply_sao(img, xCtb,yCtb, shdr, 0, 1<<sps.Log2CtbSizeY, 1<<sps.Log2CtbSizeY,
366
0
                      inputCopy, stride,
367
0
                      img->get_image_plane(0), img->get_image_stride(0));
368
0
          }
369
370
0
          if (cIdx!=0 && shdr->slice_sao_chroma_flag) {
371
0
            int nSW = (1<<sps.Log2CtbSizeY) / sps.SubWidthC;
372
0
            int nSH = (1<<sps.Log2CtbSizeY) / sps.SubHeightC;
373
374
0
            apply_sao(img, xCtb,yCtb, shdr, cIdx, nSW,nSH,
375
0
                      inputCopy, stride,
376
0
                      img->get_image_plane(cIdx), img->get_image_stride(cIdx));
377
0
          }
378
0
        }
379
0
  }
380
381
0
  delete[] inputCopy;
382
0
}
383
384
385
386
387
class thread_task_sao : public thread_task
388
{
389
public:
390
  int  ctb_y;
391
  de265_image* img; /* this is where we get the SPS from
392
                       (either inputImg or outputImg can be a dummy image)
393
                    */
394
395
  de265_image* inputImg;
396
  de265_image* outputImg;
397
  int inputProgress;
398
399
  virtual void work();
400
0
  virtual std::string name() const {
401
0
    char buf[100];
402
0
    sprintf(buf,"sao-%d",ctb_y);
403
0
    return buf;
404
0
  }
405
};
406
407
408
void thread_task_sao::work()
409
49.2k
{
410
49.2k
  state = Running;
411
49.2k
  img->thread_run(this);
412
413
49.2k
  const seq_parameter_set& sps = img->get_sps();
414
415
49.2k
  const int rightCtb = sps.PicWidthInCtbsY-1;
416
49.2k
  const int ctbSize  = (1<<sps.Log2CtbSizeY);
417
418
419
  // wait until also the CTB-rows below and above are ready
420
421
49.2k
  img->wait_for_progress(this, rightCtb,ctb_y,  inputProgress);
422
423
49.2k
  if (ctb_y>0) {
424
37.5k
    img->wait_for_progress(this, rightCtb,ctb_y-1, inputProgress);
425
37.5k
  }
426
427
49.2k
  if (ctb_y+1<sps.PicHeightInCtbsY) {
428
37.5k
    img->wait_for_progress(this, rightCtb,ctb_y+1, inputProgress);
429
37.5k
  }
430
431
432
  // copy input image to output for this CTB-row
433
434
49.2k
  outputImg->copy_lines_from(inputImg, ctb_y * ctbSize, (ctb_y+1) * ctbSize);
435
436
437
  // process SAO in the CTB-row
438
439
656k
  for (int xCtb=0; xCtb<sps.PicWidthInCtbsY; xCtb++)
440
607k
    {
441
607k
      const slice_segment_header* shdr = img->get_SliceHeaderCtb(xCtb,ctb_y);
442
607k
      if (shdr==nullptr) {
443
2
        break;
444
2
      }
445
446
607k
      if (shdr->slice_sao_luma_flag) {
447
585k
        apply_sao(img, xCtb,ctb_y, shdr, 0, ctbSize, ctbSize,
448
585k
                  inputImg ->get_image_plane(0), inputImg ->get_image_stride(0),
449
585k
                  outputImg->get_image_plane(0), outputImg->get_image_stride(0));
450
585k
      }
451
452
607k
      if (shdr->slice_sao_chroma_flag) {
453
527k
        int nSW = ctbSize / sps.SubWidthC;
454
527k
        int nSH = ctbSize / sps.SubHeightC;
455
456
527k
        apply_sao(img, xCtb,ctb_y, shdr, 1, nSW,nSH,
457
527k
                  inputImg ->get_image_plane(1), inputImg ->get_image_stride(1),
458
527k
                  outputImg->get_image_plane(1), outputImg->get_image_stride(1));
459
460
527k
        apply_sao(img, xCtb,ctb_y, shdr, 2, nSW,nSH,
461
527k
                  inputImg ->get_image_plane(2), inputImg ->get_image_stride(2),
462
527k
                  outputImg->get_image_plane(2), outputImg->get_image_stride(2));
463
527k
      }
464
607k
    }
465
466
467
  // mark SAO progress
468
469
656k
  for (int x=0;x<=rightCtb;x++) {
470
607k
    const int CtbWidth = sps.PicWidthInCtbsY;
471
607k
    img->ctb_progress[x+ctb_y*CtbWidth].set_progress(CTB_PROGRESS_SAO);
472
607k
  }
473
474
475
49.2k
  state = Finished;
476
49.2k
  img->thread_finishes(this);
477
49.2k
}
478
479
480
bool add_sao_tasks(image_unit* imgunit, int saoInputProgress)
481
13.0k
{
482
13.0k
  de265_image* img = imgunit->img;
483
13.0k
  const seq_parameter_set& sps = img->get_sps();
484
485
13.0k
  if (sps.sample_adaptive_offset_enabled_flag==0) {
486
1.27k
    return false;
487
1.27k
  }
488
489
490
11.7k
  decoder_context* ctx = img->decctx;
491
492
11.7k
  de265_error err = imgunit->sao_output.alloc_image(img->get_width(), img->get_height(),
493
11.7k
                                                    img->get_chroma_format(),
494
11.7k
                                                    img->get_shared_sps(),
495
11.7k
                                                    false,
496
11.7k
                                                    img->decctx, //img->encctx,
497
11.7k
                                                    img->pts, img->user_data, true);
498
11.7k
  if (err != DE265_OK) {
499
0
    img->decctx->add_warning(DE265_WARNING_CANNOT_APPLY_SAO_OUT_OF_MEMORY,false);
500
0
    return false;
501
0
  }
502
503
11.7k
  int nRows = sps.PicHeightInCtbsY;
504
505
11.7k
  img->thread_start(nRows);
506
507
61.0k
  for (int y=0;y<nRows;y++)
508
49.2k
    {
509
49.2k
      thread_task_sao* task = new thread_task_sao;
510
511
49.2k
      task->inputImg  = img;
512
49.2k
      task->outputImg = &imgunit->sao_output;
513
49.2k
      task->img = img;
514
49.2k
      task->ctb_y = y;
515
49.2k
      task->inputProgress = saoInputProgress;
516
517
49.2k
      imgunit->tasks.push_back(task);
518
49.2k
      ctx->thread_pool_.add_task(task);
519
49.2k
    }
520
521
  /* Currently need barrier here because when are finished, we have to swap the pixel
522
     data back into the main image. */
523
11.7k
  img->wait_for_completion();
524
525
11.7k
  img->exchange_pixel_data_with(imgunit->sao_output);
526
527
11.7k
  return true;
528
11.7k
}