/work/libde265/libde265/sao.cc
Line | Count | Source |
1 | | /* |
2 | | * H.265 video codec. |
3 | | * Copyright (c) 2013-2014 struktur AG, Dirk Farin <farin@struktur.de> |
4 | | * |
5 | | * This file is part of libde265. |
6 | | * |
7 | | * libde265 is free software: you can redistribute it and/or modify |
8 | | * it under the terms of the GNU Lesser General Public License as |
9 | | * published by the Free Software Foundation, either version 3 of |
10 | | * the License, or (at your option) any later version. |
11 | | * |
12 | | * libde265 is distributed in the hope that it will be useful, |
13 | | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
14 | | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
15 | | * GNU Lesser General Public License for more details. |
16 | | * |
17 | | * You should have received a copy of the GNU Lesser General Public License |
18 | | * along with libde265. If not, see <http://www.gnu.org/licenses/>. |
19 | | */ |
20 | | |
21 | | #include "sao.h" |
22 | | #include "util.h" |
23 | | |
24 | | #include <stdlib.h> |
25 | | #include <string.h> |
26 | | |
27 | | |
28 | | template <class pixel_t> |
29 | | void apply_sao_internal(de265_image* img, int xCtb,int yCtb, |
30 | | const slice_segment_header* shdr, int cIdx, int nSW,int nSH, |
31 | | const pixel_t* in_img, int in_stride, |
32 | | /* */ pixel_t* out_img, int out_stride) |
33 | 0 | { |
34 | 0 | const sao_info* saoinfo = img->get_sao_info(xCtb,yCtb); |
35 | |
|
36 | 0 | int SaoTypeIdx = (saoinfo->SaoTypeIdx >> (2*cIdx)) & 0x3; |
37 | |
|
38 | 0 | logtrace(LogSAO,"apply_sao CTB %d;%d cIdx:%d type=%d (%dx%d)\n",xCtb,yCtb,cIdx, SaoTypeIdx, nSW,nSH); |
39 | |
|
40 | 0 | if (SaoTypeIdx==0) { |
41 | 0 | return; |
42 | 0 | } |
43 | | |
44 | 0 | const seq_parameter_set* sps = &img->get_sps(); |
45 | 0 | const pic_parameter_set* pps = &img->get_pps(); |
46 | 0 | const int bitDepth = (cIdx==0 ? sps->BitDepth_Y : sps->BitDepth_C); |
47 | 0 | const int maxPixelValue = (1<<bitDepth)-1; |
48 | | |
49 | | // top left position of CTB in pixels |
50 | 0 | const int xC = xCtb*nSW; |
51 | 0 | const int yC = yCtb*nSH; |
52 | |
|
53 | 0 | const int width = img->get_width(cIdx); |
54 | 0 | const int height = img->get_height(cIdx); |
55 | |
|
56 | 0 | const int ctbSliceAddrRS = img->get_SliceHeader(xC,yC)->SliceAddrRS; |
57 | |
|
58 | 0 | const int picWidthInCtbs = sps->PicWidthInCtbsY; |
59 | 0 | const int chromashiftW = sps->get_chroma_shift_W(cIdx); |
60 | 0 | const int chromashiftH = sps->get_chroma_shift_H(cIdx); |
61 | 0 | const int ctbshiftW = sps->Log2CtbSizeY - chromashiftW; |
62 | 0 | const int ctbshiftH = sps->Log2CtbSizeY - chromashiftH; |
63 | | |
64 | |
|
65 | 0 | for (int i=0;i<5;i++) |
66 | 0 | { |
67 | 0 | logtrace(LogSAO,"offset[%d] = %d\n", i, i==0 ? 0 : saoinfo->saoOffsetVal[cIdx][i-1]); |
68 | 0 | } |
69 | | |
70 | | |
71 | | // actual size of CTB to be processed (can be smaller when partially outside of image) |
72 | 0 | const int ctbW = (xC+nSW>width) ? width -xC : nSW; |
73 | 0 | const int ctbH = (yC+nSH>height) ? height-yC : nSH; |
74 | | |
75 | |
|
76 | 0 | const bool extendedTests = img->get_CTB_has_pcm_or_cu_transquant_bypass(xCtb,yCtb); |
77 | |
|
78 | 0 | if (SaoTypeIdx==2) { |
79 | 0 | int hPos[2], vPos[2]; |
80 | 0 | int vPosStride[2]; // vPos[] multiplied by image stride |
81 | 0 | int SaoEoClass = (saoinfo->SaoEoClass >> (2*cIdx)) & 0x3; |
82 | |
|
83 | 0 | switch (SaoEoClass) { |
84 | 0 | case 0: hPos[0]=-1; hPos[1]= 1; vPos[0]= 0; vPos[1]=0; break; |
85 | 0 | case 1: hPos[0]= 0; hPos[1]= 0; vPos[0]=-1; vPos[1]=1; break; |
86 | 0 | case 2: hPos[0]=-1; hPos[1]= 1; vPos[0]=-1; vPos[1]=1; break; |
87 | 0 | case 3: hPos[0]= 1; hPos[1]=-1; vPos[0]=-1; vPos[1]=1; break; |
88 | 0 | } |
89 | | |
90 | 0 | vPosStride[0] = vPos[0] * in_stride; |
91 | 0 | vPosStride[1] = vPos[1] * in_stride; |
92 | | |
93 | | /* Reorder sao_info.saoOffsetVal[] array, so that we can index it |
94 | | directly with the sum of the two pixel-difference signs. */ |
95 | 0 | int8_t saoOffsetVal[5]; // [2] unused |
96 | 0 | saoOffsetVal[0] = saoinfo->saoOffsetVal[cIdx][1-1]; |
97 | 0 | saoOffsetVal[1] = saoinfo->saoOffsetVal[cIdx][2-1]; |
98 | 0 | saoOffsetVal[2] = 0; |
99 | 0 | saoOffsetVal[3] = saoinfo->saoOffsetVal[cIdx][3-1]; |
100 | 0 | saoOffsetVal[4] = saoinfo->saoOffsetVal[cIdx][4-1]; |
101 | | |
102 | |
|
103 | 0 | for (int j=0;j<ctbH;j++) { |
104 | 0 | const pixel_t* in_ptr = &in_img [xC+(yC+j)*in_stride]; |
105 | 0 | /* */ pixel_t* out_ptr = &out_img[xC+(yC+j)*out_stride]; |
106 | |
|
107 | 0 | for (int i=0;i<ctbW;i++) { |
108 | 0 | int edgeIdx = -1; |
109 | |
|
110 | 0 | logtrace(LogSAO, "pos %d,%d\n",xC+i,yC+j); |
111 | |
|
112 | 0 | if ((extendedTests && |
113 | 0 | (sps->pcm_loop_filter_disable_flag && |
114 | 0 | img->get_pcm_flag((xC+i)<<chromashiftW,(yC+j)<<chromashiftH))) || |
115 | 0 | img->get_cu_transquant_bypass((xC+i)<<chromashiftW,(yC+j)<<chromashiftH)) { |
116 | 0 | continue; |
117 | 0 | } |
118 | | |
119 | | // do the expensive test for boundaries only at the boundaries |
120 | 0 | bool testBoundary = (i==0 || j==0 || i==ctbW-1 || j==ctbH-1); |
121 | |
|
122 | 0 | if (testBoundary) |
123 | 0 | for (int k=0;k<2;k++) { |
124 | 0 | int xS = xC+i+hPos[k]; |
125 | 0 | int yS = yC+j+vPos[k]; |
126 | |
|
127 | 0 | if (xS<0 || yS<0 || xS>=width || yS>=height) { |
128 | 0 | edgeIdx=0; |
129 | 0 | break; |
130 | 0 | } |
131 | | |
132 | | |
133 | | // This part seems inefficient with all the get_SliceHeaderIndex() calls, |
134 | | // but removing this part (because the input was known to have only a single |
135 | | // slice anyway) reduced computation time only by 1.3%. |
136 | | // TODO: however, this may still be a big part of SAO itself. |
137 | | |
138 | 0 | slice_segment_header* sliceHeader = img->get_SliceHeader(xS<<chromashiftW, |
139 | 0 | yS<<chromashiftH); |
140 | 0 | if (sliceHeader==NULL) { return; } |
141 | | |
142 | 0 | int sliceAddrRS = sliceHeader->SliceAddrRS; |
143 | 0 | if (sliceAddrRS < ctbSliceAddrRS && |
144 | 0 | img->get_SliceHeader((xC+i)<<chromashiftW, |
145 | 0 | (yC+j)<<chromashiftH)->slice_loop_filter_across_slices_enabled_flag==0) { |
146 | 0 | edgeIdx=0; |
147 | 0 | break; |
148 | 0 | } |
149 | | |
150 | 0 | if (sliceAddrRS > ctbSliceAddrRS && |
151 | 0 | img->get_SliceHeader(xS<<chromashiftW, |
152 | 0 | yS<<chromashiftH)->slice_loop_filter_across_slices_enabled_flag==0) { |
153 | 0 | edgeIdx=0; |
154 | 0 | break; |
155 | 0 | } |
156 | | |
157 | | |
158 | 0 | if (pps->loop_filter_across_tiles_enabled_flag==0 && |
159 | 0 | pps->TileIdRS[(xS>>ctbshiftW) + (yS>>ctbshiftH)*picWidthInCtbs] != |
160 | 0 | pps->TileIdRS[(xC>>ctbshiftW) + (yC>>ctbshiftH)*picWidthInCtbs]) { |
161 | 0 | edgeIdx=0; |
162 | 0 | break; |
163 | 0 | } |
164 | 0 | } |
165 | | |
166 | 0 | if (edgeIdx != 0) { |
167 | |
|
168 | 0 | edgeIdx = ( Sign(in_ptr[i] - in_ptr[i+hPos[0]+vPosStride[0]]) + |
169 | 0 | Sign(in_ptr[i] - in_ptr[i+hPos[1]+vPosStride[1]]) ); |
170 | |
|
171 | 0 | if (1) { // edgeIdx != 0) { // seems to be faster without this check (zero in offset table) |
172 | 0 | int offset = saoOffsetVal[edgeIdx+2]; |
173 | |
|
174 | 0 | out_ptr[i] = Clip3(0,maxPixelValue, |
175 | 0 | in_ptr[i] + offset); |
176 | 0 | } |
177 | 0 | } |
178 | 0 | } |
179 | 0 | } |
180 | 0 | } |
181 | 0 | else { |
182 | 0 | int bandShift = bitDepth-5; |
183 | 0 | int saoLeftClass = saoinfo->sao_band_position[cIdx]; |
184 | 0 | logtrace(LogSAO,"saoLeftClass: %d\n",saoLeftClass); |
185 | |
|
186 | 0 | int bandTable[32]; |
187 | 0 | memset(bandTable, 0, sizeof(int)*32); |
188 | |
|
189 | 0 | for (int k=0;k<4;k++) { |
190 | 0 | bandTable[ (k+saoLeftClass)&31 ] = k+1; |
191 | 0 | } |
192 | | |
193 | | |
194 | | /* If PCM or transquant_bypass is used in this CTB, we have to |
195 | | run all checks (A). |
196 | | Otherwise, we run a simplified version of the code (B). |
197 | | |
198 | | NOTE: this whole part of SAO does not seem to be a significant part of the time spent |
199 | | */ |
200 | |
|
201 | 0 | if (extendedTests) { |
202 | | |
203 | | // (A) full version with all checks |
204 | |
|
205 | 0 | for (int j=0;j<ctbH;j++) |
206 | 0 | for (int i=0;i<ctbW;i++) { |
207 | |
|
208 | 0 | if ((sps->pcm_loop_filter_disable_flag && |
209 | 0 | img->get_pcm_flag((xC+i)<<chromashiftW,(yC+j)<<chromashiftH)) || |
210 | 0 | img->get_cu_transquant_bypass((xC+i)<<chromashiftW,(yC+j)<<chromashiftH)) { |
211 | 0 | continue; |
212 | 0 | } |
213 | | |
214 | | // Shifts are a strange thing. On x86, >>x actually computes >>(x%64). |
215 | | // But this should never happen, because the maximum bit-depth is 16. |
216 | 0 | int pixel = in_img[xC + i + (yC + j) * in_stride]; |
217 | | |
218 | | // Note: the input pixel value should never exceed the valid range, but it seems that it still does, |
219 | | // maybe when there was a decoding error and the pixels have not been filled in correctly. |
220 | | // Thus, we have to limit the pixel range to ensure that we have no illegal table access. |
221 | 0 | pixel = Clip3(0, maxPixelValue, pixel); |
222 | |
|
223 | 0 | int bandIdx = bandTable[pixel >> bandShift]; |
224 | |
|
225 | 0 | if (bandIdx>0) { |
226 | 0 | int offset = saoinfo->saoOffsetVal[cIdx][bandIdx-1]; |
227 | |
|
228 | 0 | logtrace(LogSAO,"%d %d (%d) offset %d %x -> %x\n",xC+i,yC+j,bandIdx, |
229 | 0 | offset, |
230 | 0 | in_img[xC+i+(yC+j)*in_stride], |
231 | 0 | in_img[xC+i+(yC+j)*in_stride]+offset); |
232 | |
|
233 | 0 | out_img[xC+i+(yC+j)*out_stride] = Clip3(0,maxPixelValue, |
234 | 0 | in_img[xC+i+(yC+j)*in_stride] + offset); |
235 | 0 | } |
236 | 0 | } |
237 | 0 | } |
238 | 0 | else |
239 | 0 | { |
240 | | // (B) simplified version (only works if no PCM and transquant_bypass is active) |
241 | |
|
242 | 0 | for (int j=0;j<ctbH;j++) |
243 | 0 | for (int i=0;i<ctbW;i++) { |
244 | |
|
245 | 0 | int pixel = in_img[xC + i + (yC + j) * in_stride]; |
246 | | |
247 | | // Note: the input pixel value should never exceed the valid range, but it seems that it still does, |
248 | | // maybe when there was a decoding error and the pixels have not been filled in correctly. |
249 | | // Thus, we have to limit the pixel range to ensure that we have no illegal table access. |
250 | 0 | pixel = Clip3(0, maxPixelValue, pixel); |
251 | |
|
252 | 0 | int bandIdx = bandTable[pixel >> bandShift]; |
253 | |
|
254 | 0 | if (bandIdx>0) { |
255 | 0 | int offset = saoinfo->saoOffsetVal[cIdx][bandIdx-1]; |
256 | |
|
257 | 0 | out_img[xC+i+(yC+j)*out_stride] = Clip3(0,maxPixelValue, |
258 | 0 | in_img[xC+i+(yC+j)*in_stride] + offset); |
259 | 0 | } |
260 | 0 | } |
261 | 0 | } |
262 | 0 | } |
263 | 0 | } Unexecuted instantiation: void apply_sao_internal<unsigned short>(de265_image*, int, int, slice_segment_header const*, int, int, int, unsigned short const*, int, unsigned short*, int) Unexecuted instantiation: void apply_sao_internal<unsigned char>(de265_image*, int, int, slice_segment_header const*, int, int, int, unsigned char const*, int, unsigned char*, int) |
264 | | |
265 | | |
266 | | template <class pixel_t> |
267 | | void apply_sao(de265_image* img, int xCtb,int yCtb, |
268 | | const slice_segment_header* shdr, int cIdx, int nSW,int nSH, |
269 | | const pixel_t* in_img, int in_stride, |
270 | | /* */ pixel_t* out_img, int out_stride) |
271 | 0 | { |
272 | 0 | if (img->high_bit_depth(cIdx)) { |
273 | 0 | apply_sao_internal<uint16_t>(img,xCtb,yCtb, shdr,cIdx,nSW,nSH, |
274 | 0 | (uint16_t*)in_img, in_stride, |
275 | 0 | (uint16_t*)out_img,out_stride); |
276 | 0 | } |
277 | 0 | else { |
278 | 0 | apply_sao_internal<uint8_t>(img,xCtb,yCtb, shdr,cIdx,nSW,nSH, |
279 | 0 | in_img, in_stride, |
280 | 0 | out_img,out_stride); |
281 | 0 | } |
282 | 0 | } |
283 | | |
284 | | |
285 | | void apply_sample_adaptive_offset(de265_image* img) |
286 | 0 | { |
287 | 0 | const seq_parameter_set& sps = img->get_sps(); |
288 | |
|
289 | 0 | if (sps.sample_adaptive_offset_enabled_flag==0) { |
290 | 0 | return; |
291 | 0 | } |
292 | | |
293 | 0 | de265_image inputCopy; |
294 | 0 | de265_error err = inputCopy.copy_image(img); |
295 | 0 | if (err != DE265_OK) { |
296 | 0 | img->decctx->add_warning(DE265_WARNING_CANNOT_APPLY_SAO_OUT_OF_MEMORY,false); |
297 | 0 | return; |
298 | 0 | } |
299 | | |
300 | 0 | for (int yCtb=0; yCtb<sps.PicHeightInCtbsY; yCtb++) |
301 | 0 | for (int xCtb=0; xCtb<sps.PicWidthInCtbsY; xCtb++) |
302 | 0 | { |
303 | 0 | const slice_segment_header* shdr = img->get_SliceHeaderCtb(xCtb,yCtb); |
304 | |
|
305 | 0 | if (shdr->slice_sao_luma_flag) { |
306 | 0 | apply_sao(img, xCtb,yCtb, shdr, 0, 1<<sps.Log2CtbSizeY, 1<<sps.Log2CtbSizeY, |
307 | 0 | inputCopy.get_image_plane(0), inputCopy.get_image_stride(0), |
308 | 0 | img->get_image_plane(0), img->get_image_stride(0)); |
309 | 0 | } |
310 | |
|
311 | 0 | if (shdr->slice_sao_chroma_flag) { |
312 | 0 | int nSW = (1<<sps.Log2CtbSizeY) / sps.SubWidthC; |
313 | 0 | int nSH = (1<<sps.Log2CtbSizeY) / sps.SubHeightC; |
314 | |
|
315 | 0 | apply_sao(img, xCtb,yCtb, shdr, 1, nSW,nSH, |
316 | 0 | inputCopy.get_image_plane(1), inputCopy.get_image_stride(1), |
317 | 0 | img->get_image_plane(1), img->get_image_stride(1)); |
318 | |
|
319 | 0 | apply_sao(img, xCtb,yCtb, shdr, 2, nSW,nSH, |
320 | 0 | inputCopy.get_image_plane(2), inputCopy.get_image_stride(2), |
321 | 0 | img->get_image_plane(2), img->get_image_stride(2)); |
322 | 0 | } |
323 | 0 | } |
324 | 0 | } |
325 | | |
326 | | |
327 | | void apply_sample_adaptive_offset_sequential(de265_image* img) |
328 | 0 | { |
329 | 0 | const seq_parameter_set& sps = img->get_sps(); |
330 | |
|
331 | 0 | if (sps.sample_adaptive_offset_enabled_flag==0) { |
332 | 0 | return; |
333 | 0 | } |
334 | | |
335 | 0 | int lumaImageSize = img->get_image_stride(0) * img->get_height(0) * img->get_bytes_per_pixel(0); |
336 | 0 | int chromaImageSize = img->get_image_stride(1) * img->get_height(1) * img->get_bytes_per_pixel(1); |
337 | |
|
338 | 0 | uint8_t* inputCopy = new uint8_t[ libde265_max(lumaImageSize, chromaImageSize) ]; |
339 | 0 | if (inputCopy == NULL) { |
340 | 0 | img->decctx->add_warning(DE265_WARNING_CANNOT_APPLY_SAO_OUT_OF_MEMORY,false); |
341 | 0 | return; |
342 | 0 | } |
343 | | |
344 | | |
345 | 0 | int nChannels = 3; |
346 | 0 | if (sps.ChromaArrayType == CHROMA_MONO) { nChannels=1; } |
347 | |
|
348 | 0 | for (int cIdx=0;cIdx<nChannels;cIdx++) { |
349 | |
|
350 | 0 | int stride = img->get_image_stride(cIdx); |
351 | 0 | int height = img->get_height(cIdx); |
352 | |
|
353 | 0 | memcpy(inputCopy, img->get_image_plane(cIdx), stride * height * img->get_bytes_per_pixel(cIdx)); |
354 | |
|
355 | 0 | for (int yCtb=0; yCtb<sps.PicHeightInCtbsY; yCtb++) |
356 | 0 | for (int xCtb=0; xCtb<sps.PicWidthInCtbsY; xCtb++) |
357 | 0 | { |
358 | 0 | const slice_segment_header* shdr = img->get_SliceHeaderCtb(xCtb,yCtb); |
359 | 0 | if (shdr==NULL) { |
360 | 0 | delete[] inputCopy; |
361 | 0 | return; |
362 | 0 | } |
363 | | |
364 | 0 | if (cIdx==0 && shdr->slice_sao_luma_flag) { |
365 | 0 | apply_sao(img, xCtb,yCtb, shdr, 0, 1<<sps.Log2CtbSizeY, 1<<sps.Log2CtbSizeY, |
366 | 0 | inputCopy, stride, |
367 | 0 | img->get_image_plane(0), img->get_image_stride(0)); |
368 | 0 | } |
369 | |
|
370 | 0 | if (cIdx!=0 && shdr->slice_sao_chroma_flag) { |
371 | 0 | int nSW = (1<<sps.Log2CtbSizeY) / sps.SubWidthC; |
372 | 0 | int nSH = (1<<sps.Log2CtbSizeY) / sps.SubHeightC; |
373 | |
|
374 | 0 | apply_sao(img, xCtb,yCtb, shdr, cIdx, nSW,nSH, |
375 | 0 | inputCopy, stride, |
376 | 0 | img->get_image_plane(cIdx), img->get_image_stride(cIdx)); |
377 | 0 | } |
378 | 0 | } |
379 | 0 | } |
380 | | |
381 | 0 | delete[] inputCopy; |
382 | 0 | } |
383 | | |
384 | | |
385 | | |
386 | | |
387 | | class thread_task_sao : public thread_task |
388 | | { |
389 | | public: |
390 | | int ctb_y; |
391 | | de265_image* img; /* this is where we get the SPS from |
392 | | (either inputImg or outputImg can be a dummy image) |
393 | | */ |
394 | | |
395 | | de265_image* inputImg; |
396 | | de265_image* outputImg; |
397 | | int inputProgress; |
398 | | |
399 | | virtual void work(); |
400 | 0 | virtual std::string name() const { |
401 | 0 | char buf[100]; |
402 | 0 | sprintf(buf,"sao-%d",ctb_y); |
403 | 0 | return buf; |
404 | 0 | } |
405 | | }; |
406 | | |
407 | | |
408 | | void thread_task_sao::work() |
409 | 0 | { |
410 | 0 | state = Running; |
411 | 0 | img->thread_run(this); |
412 | |
|
413 | 0 | const seq_parameter_set& sps = img->get_sps(); |
414 | |
|
415 | 0 | const int rightCtb = sps.PicWidthInCtbsY-1; |
416 | 0 | const int ctbSize = (1<<sps.Log2CtbSizeY); |
417 | | |
418 | | |
419 | | // wait until also the CTB-rows below and above are ready |
420 | |
|
421 | 0 | img->wait_for_progress(this, rightCtb,ctb_y, inputProgress); |
422 | |
|
423 | 0 | if (ctb_y>0) { |
424 | 0 | img->wait_for_progress(this, rightCtb,ctb_y-1, inputProgress); |
425 | 0 | } |
426 | |
|
427 | 0 | if (ctb_y+1<sps.PicHeightInCtbsY) { |
428 | 0 | img->wait_for_progress(this, rightCtb,ctb_y+1, inputProgress); |
429 | 0 | } |
430 | | |
431 | | |
432 | | // copy input image to output for this CTB-row |
433 | |
|
434 | 0 | outputImg->copy_lines_from(inputImg, ctb_y * ctbSize, (ctb_y+1) * ctbSize); |
435 | | |
436 | | |
437 | | // process SAO in the CTB-row |
438 | |
|
439 | 0 | for (int xCtb=0; xCtb<sps.PicWidthInCtbsY; xCtb++) |
440 | 0 | { |
441 | 0 | const slice_segment_header* shdr = img->get_SliceHeaderCtb(xCtb,ctb_y); |
442 | 0 | if (shdr==NULL) { |
443 | 0 | break; |
444 | 0 | } |
445 | | |
446 | 0 | if (shdr->slice_sao_luma_flag) { |
447 | 0 | apply_sao(img, xCtb,ctb_y, shdr, 0, ctbSize, ctbSize, |
448 | 0 | inputImg ->get_image_plane(0), inputImg ->get_image_stride(0), |
449 | 0 | outputImg->get_image_plane(0), outputImg->get_image_stride(0)); |
450 | 0 | } |
451 | |
|
452 | 0 | if (shdr->slice_sao_chroma_flag) { |
453 | 0 | int nSW = ctbSize / sps.SubWidthC; |
454 | 0 | int nSH = ctbSize / sps.SubHeightC; |
455 | |
|
456 | 0 | apply_sao(img, xCtb,ctb_y, shdr, 1, nSW,nSH, |
457 | 0 | inputImg ->get_image_plane(1), inputImg ->get_image_stride(1), |
458 | 0 | outputImg->get_image_plane(1), outputImg->get_image_stride(1)); |
459 | |
|
460 | 0 | apply_sao(img, xCtb,ctb_y, shdr, 2, nSW,nSH, |
461 | 0 | inputImg ->get_image_plane(2), inputImg ->get_image_stride(2), |
462 | 0 | outputImg->get_image_plane(2), outputImg->get_image_stride(2)); |
463 | 0 | } |
464 | 0 | } |
465 | | |
466 | | |
467 | | // mark SAO progress |
468 | |
|
469 | 0 | for (int x=0;x<=rightCtb;x++) { |
470 | 0 | const int CtbWidth = sps.PicWidthInCtbsY; |
471 | 0 | img->ctb_progress[x+ctb_y*CtbWidth].set_progress(CTB_PROGRESS_SAO); |
472 | 0 | } |
473 | | |
474 | |
|
475 | 0 | state = Finished; |
476 | 0 | img->thread_finishes(this); |
477 | 0 | } |
478 | | |
479 | | |
480 | | bool add_sao_tasks(image_unit* imgunit, int saoInputProgress) |
481 | 0 | { |
482 | 0 | de265_image* img = imgunit->img; |
483 | 0 | const seq_parameter_set& sps = img->get_sps(); |
484 | |
|
485 | 0 | if (sps.sample_adaptive_offset_enabled_flag==0) { |
486 | 0 | return false; |
487 | 0 | } |
488 | | |
489 | | |
490 | 0 | decoder_context* ctx = img->decctx; |
491 | |
|
492 | 0 | de265_error err = imgunit->sao_output.alloc_image(img->get_width(), img->get_height(), |
493 | 0 | img->get_chroma_format(), |
494 | 0 | img->get_shared_sps(), |
495 | 0 | false, |
496 | 0 | img->decctx, //img->encctx, |
497 | 0 | img->pts, img->user_data, true); |
498 | 0 | if (err != DE265_OK) { |
499 | 0 | img->decctx->add_warning(DE265_WARNING_CANNOT_APPLY_SAO_OUT_OF_MEMORY,false); |
500 | 0 | return false; |
501 | 0 | } |
502 | | |
503 | 0 | int nRows = sps.PicHeightInCtbsY; |
504 | |
|
505 | 0 | int n=0; |
506 | 0 | img->thread_start(nRows); |
507 | |
|
508 | 0 | for (int y=0;y<nRows;y++) |
509 | 0 | { |
510 | 0 | thread_task_sao* task = new thread_task_sao; |
511 | |
|
512 | 0 | task->inputImg = img; |
513 | 0 | task->outputImg = &imgunit->sao_output; |
514 | 0 | task->img = img; |
515 | 0 | task->ctb_y = y; |
516 | 0 | task->inputProgress = saoInputProgress; |
517 | |
|
518 | 0 | imgunit->tasks.push_back(task); |
519 | 0 | add_task(&ctx->thread_pool_, task); |
520 | 0 | n++; |
521 | 0 | } |
522 | | |
523 | | /* Currently need barrier here because when are finished, we have to swap the pixel |
524 | | data back into the main image. */ |
525 | 0 | img->wait_for_completion(); |
526 | |
|
527 | 0 | img->exchange_pixel_data_with(imgunit->sao_output); |
528 | |
|
529 | 0 | return true; |
530 | 0 | } |