/src/libde265/libde265/sao.cc
Line | Count | Source (jump to first uncovered line) |
1 | | /* |
2 | | * H.265 video codec. |
3 | | * Copyright (c) 2013-2014 struktur AG, Dirk Farin <farin@struktur.de> |
4 | | * |
5 | | * This file is part of libde265. |
6 | | * |
7 | | * libde265 is free software: you can redistribute it and/or modify |
8 | | * it under the terms of the GNU Lesser General Public License as |
9 | | * published by the Free Software Foundation, either version 3 of |
10 | | * the License, or (at your option) any later version. |
11 | | * |
12 | | * libde265 is distributed in the hope that it will be useful, |
13 | | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
14 | | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
15 | | * GNU Lesser General Public License for more details. |
16 | | * |
17 | | * You should have received a copy of the GNU Lesser General Public License |
18 | | * along with libde265. If not, see <http://www.gnu.org/licenses/>. |
19 | | */ |
20 | | |
21 | | #include "sao.h" |
22 | | #include "util.h" |
23 | | |
24 | | #include <stdlib.h> |
25 | | #include <string.h> |
26 | | |
27 | | |
28 | | template <class pixel_t> |
29 | | void apply_sao_internal(de265_image* img, int xCtb,int yCtb, |
30 | | const slice_segment_header* shdr, int cIdx, int nSW,int nSH, |
31 | | const pixel_t* in_img, int in_stride, |
32 | | /* */ pixel_t* out_img, int out_stride) |
33 | 3.11M | { |
34 | 3.11M | const sao_info* saoinfo = img->get_sao_info(xCtb,yCtb); |
35 | | |
36 | 3.11M | int SaoTypeIdx = (saoinfo->SaoTypeIdx >> (2*cIdx)) & 0x3; |
37 | | |
38 | 3.11M | logtrace(LogSAO,"apply_sao CTB %d;%d cIdx:%d type=%d (%dx%d)\n",xCtb,yCtb,cIdx, SaoTypeIdx, nSW,nSH); |
39 | | |
40 | 3.11M | if (SaoTypeIdx==0) { |
41 | 1.49M | return; |
42 | 1.49M | } |
43 | | |
44 | 1.61M | const seq_parameter_set* sps = &img->get_sps(); |
45 | 1.61M | const pic_parameter_set* pps = &img->get_pps(); |
46 | 1.61M | const int bitDepth = (cIdx==0 ? sps->BitDepth_Y : sps->BitDepth_C); |
47 | 1.61M | const int maxPixelValue = (1<<bitDepth)-1; |
48 | | |
49 | | // top left position of CTB in pixels |
50 | 1.61M | const int xC = xCtb*nSW; |
51 | 1.61M | const int yC = yCtb*nSH; |
52 | | |
53 | 1.61M | const int width = img->get_width(cIdx); |
54 | 1.61M | const int height = img->get_height(cIdx); |
55 | | |
56 | 1.61M | const int ctbSliceAddrRS = img->get_SliceHeader(xC,yC)->SliceAddrRS; |
57 | | |
58 | 1.61M | const int picWidthInCtbs = sps->PicWidthInCtbsY; |
59 | 1.61M | const int chromashiftW = sps->get_chroma_shift_W(cIdx); |
60 | 1.61M | const int chromashiftH = sps->get_chroma_shift_H(cIdx); |
61 | 1.61M | const int ctbshiftW = sps->Log2CtbSizeY - chromashiftW; |
62 | 1.61M | const int ctbshiftH = sps->Log2CtbSizeY - chromashiftH; |
63 | | |
64 | | |
65 | 9.65M | for (int i=0;i<5;i++) |
66 | 8.04M | { |
67 | 8.04M | logtrace(LogSAO,"offset[%d] = %d\n", i, i==0 ? 0 : saoinfo->saoOffsetVal[cIdx][i-1]); |
68 | 8.04M | } |
69 | | |
70 | | |
71 | | // actual size of CTB to be processed (can be smaller when partially outside of image) |
72 | 1.61M | const int ctbW = (xC+nSW>width) ? width -xC : nSW; |
73 | 1.61M | const int ctbH = (yC+nSH>height) ? height-yC : nSH; |
74 | | |
75 | | |
76 | 1.61M | const bool extendedTests = img->get_CTB_has_pcm_or_cu_transquant_bypass(xCtb,yCtb); |
77 | | |
78 | 1.61M | if (SaoTypeIdx==2) { |
79 | 79.5k | int hPos[2], vPos[2]; |
80 | 79.5k | int vPosStride[2]; // vPos[] multiplied by image stride |
81 | 79.5k | int SaoEoClass = (saoinfo->SaoEoClass >> (2*cIdx)) & 0x3; |
82 | | |
83 | 79.5k | switch (SaoEoClass) { |
84 | 5.18k | case 0: hPos[0]=-1; hPos[1]= 1; vPos[0]= 0; vPos[1]=0; break; |
85 | 10.8k | case 1: hPos[0]= 0; hPos[1]= 0; vPos[0]=-1; vPos[1]=1; break; |
86 | 24.5k | case 2: hPos[0]=-1; hPos[1]= 1; vPos[0]=-1; vPos[1]=1; break; |
87 | 38.9k | case 3: hPos[0]= 1; hPos[1]=-1; vPos[0]=-1; vPos[1]=1; break; |
88 | 79.5k | } |
89 | | |
90 | 79.5k | vPosStride[0] = vPos[0] * in_stride; |
91 | 79.5k | vPosStride[1] = vPos[1] * in_stride; |
92 | | |
93 | | /* Reorder sao_info.saoOffsetVal[] array, so that we can index it |
94 | | directly with the sum of the two pixel-difference signs. */ |
95 | 79.5k | int8_t saoOffsetVal[5]; // [2] unused |
96 | 79.5k | saoOffsetVal[0] = saoinfo->saoOffsetVal[cIdx][1-1]; |
97 | 79.5k | saoOffsetVal[1] = saoinfo->saoOffsetVal[cIdx][2-1]; |
98 | 79.5k | saoOffsetVal[2] = 0; |
99 | 79.5k | saoOffsetVal[3] = saoinfo->saoOffsetVal[cIdx][3-1]; |
100 | 79.5k | saoOffsetVal[4] = saoinfo->saoOffsetVal[cIdx][4-1]; |
101 | | |
102 | | |
103 | 2.04M | for (int j=0;j<ctbH;j++) { |
104 | 1.96M | const pixel_t* in_ptr = &in_img [xC+(yC+j)*in_stride]; |
105 | 1.96M | /* */ pixel_t* out_ptr = &out_img[xC+(yC+j)*out_stride]; |
106 | | |
107 | 52.6M | for (int i=0;i<ctbW;i++) { |
108 | 50.6M | int edgeIdx = -1; |
109 | | |
110 | 50.6M | logtrace(LogSAO, "pos %d,%d\n",xC+i,yC+j); |
111 | | |
112 | 50.6M | if ((extendedTests && |
113 | 50.6M | (sps->pcm_loop_filter_disable_flag && |
114 | 899k | img->get_pcm_flag((xC+i)<<chromashiftW,(yC+j)<<chromashiftH))) || |
115 | 50.6M | img->get_cu_transquant_bypass((xC+i)<<chromashiftW,(yC+j)<<chromashiftH)) { |
116 | 727k | continue; |
117 | 727k | } |
118 | | |
119 | | // do the expensive test for boundaries only at the boundaries |
120 | 49.9M | bool testBoundary = (i==0 || j==0 || i==ctbW-1 || j==ctbH-1); |
121 | | |
122 | 49.9M | if (testBoundary) |
123 | 21.4M | for (int k=0;k<2;k++) { |
124 | 14.3M | int xS = xC+i+hPos[k]; |
125 | 14.3M | int yS = yC+j+vPos[k]; |
126 | | |
127 | 14.3M | if (xS<0 || yS<0 || xS>=width || yS>=height) { |
128 | 318k | edgeIdx=0; |
129 | 318k | break; |
130 | 318k | } |
131 | | |
132 | | |
133 | | // This part seems inefficient with all the get_SliceHeaderIndex() calls, |
134 | | // but removing this part (because the input was known to have only a single |
135 | | // slice anyway) reduced computation time only by 1.3%. |
136 | | // TODO: however, this may still be a big part of SAO itself. |
137 | | |
138 | 14.0M | slice_segment_header* sliceHeader = img->get_SliceHeader(xS<<chromashiftW, |
139 | 14.0M | yS<<chromashiftH); |
140 | 14.0M | if (sliceHeader==NULL) { return; } |
141 | | |
142 | 14.0M | int sliceAddrRS = sliceHeader->SliceAddrRS; |
143 | 14.0M | if (sliceAddrRS < ctbSliceAddrRS && |
144 | 14.0M | img->get_SliceHeader((xC+i)<<chromashiftW, |
145 | 0 | (yC+j)<<chromashiftH)->slice_loop_filter_across_slices_enabled_flag==0) { |
146 | 0 | edgeIdx=0; |
147 | 0 | break; |
148 | 0 | } |
149 | | |
150 | 14.0M | if (sliceAddrRS > ctbSliceAddrRS && |
151 | 14.0M | img->get_SliceHeader(xS<<chromashiftW, |
152 | 0 | yS<<chromashiftH)->slice_loop_filter_across_slices_enabled_flag==0) { |
153 | 0 | edgeIdx=0; |
154 | 0 | break; |
155 | 0 | } |
156 | | |
157 | | |
158 | 14.0M | if (pps->loop_filter_across_tiles_enabled_flag==0 && |
159 | 14.0M | pps->TileIdRS[(xS>>ctbshiftW) + (yS>>ctbshiftH)*picWidthInCtbs] != |
160 | 14.0M | pps->TileIdRS[(xC>>ctbshiftW) + (yC>>ctbshiftH)*picWidthInCtbs]) { |
161 | 0 | edgeIdx=0; |
162 | 0 | break; |
163 | 0 | } |
164 | 14.0M | } |
165 | | |
166 | 49.9M | if (edgeIdx != 0) { |
167 | | |
168 | 49.6M | edgeIdx = ( Sign(in_ptr[i] - in_ptr[i+hPos[0]+vPosStride[0]]) + |
169 | 49.6M | Sign(in_ptr[i] - in_ptr[i+hPos[1]+vPosStride[1]]) ); |
170 | | |
171 | 49.6M | if (1) { // edgeIdx != 0) { // seems to be faster without this check (zero in offset table) |
172 | 49.6M | int offset = saoOffsetVal[edgeIdx+2]; |
173 | | |
174 | 49.6M | out_ptr[i] = Clip3(0,maxPixelValue, |
175 | 49.6M | in_ptr[i] + offset); |
176 | 49.6M | } |
177 | 49.6M | } |
178 | 49.9M | } |
179 | 1.96M | } |
180 | 79.5k | } |
181 | 1.53M | else { |
182 | 1.53M | int bandShift = bitDepth-5; |
183 | 1.53M | int saoLeftClass = saoinfo->sao_band_position[cIdx]; |
184 | 1.53M | logtrace(LogSAO,"saoLeftClass: %d\n",saoLeftClass); |
185 | | |
186 | 1.53M | int bandTable[32]; |
187 | 1.53M | memset(bandTable, 0, sizeof(int)*32); |
188 | | |
189 | 7.63M | for (int k=0;k<4;k++) { |
190 | 6.10M | bandTable[ (k+saoLeftClass)&31 ] = k+1; |
191 | 6.10M | } |
192 | | |
193 | | |
194 | | /* If PCM or transquant_bypass is used in this CTB, we have to |
195 | | run all checks (A). |
196 | | Otherwise, we run a simplified version of the code (B). |
197 | | |
198 | | NOTE: this whole part of SAO does not seem to be a significant part of the time spent |
199 | | */ |
200 | | |
201 | 1.53M | if (extendedTests) { |
202 | | |
203 | | // (A) full version with all checks |
204 | | |
205 | 3.88M | for (int j=0;j<ctbH;j++) |
206 | 87.0M | for (int i=0;i<ctbW;i++) { |
207 | | |
208 | 83.3M | if ((sps->pcm_loop_filter_disable_flag && |
209 | 83.3M | img->get_pcm_flag((xC+i)<<chromashiftW,(yC+j)<<chromashiftH)) || |
210 | 83.3M | img->get_cu_transquant_bypass((xC+i)<<chromashiftW,(yC+j)<<chromashiftH)) { |
211 | 83.2M | continue; |
212 | 83.2M | } |
213 | | |
214 | | // Shifts are a strange thing. On x86, >>x actually computes >>(x%64). |
215 | | // So we have to take care of large bandShifts. |
216 | 88.7k | int bandIdx; |
217 | 88.7k | if (bandShift >= 8) { |
218 | 0 | bandIdx = 0; |
219 | 88.7k | } else { |
220 | 88.7k | bandIdx = bandTable[ in_img[xC+i+(yC+j)*in_stride]>>bandShift ]; |
221 | 88.7k | } |
222 | | |
223 | 88.7k | if (bandIdx>0) { |
224 | 8.80k | int offset = saoinfo->saoOffsetVal[cIdx][bandIdx-1]; |
225 | | |
226 | 8.80k | logtrace(LogSAO,"%d %d (%d) offset %d %x -> %x\n",xC+i,yC+j,bandIdx, |
227 | 8.80k | offset, |
228 | 8.80k | in_img[xC+i+(yC+j)*in_stride], |
229 | 8.80k | in_img[xC+i+(yC+j)*in_stride]+offset); |
230 | | |
231 | 8.80k | out_img[xC+i+(yC+j)*out_stride] = Clip3(0,maxPixelValue, |
232 | 8.80k | in_img[xC+i+(yC+j)*in_stride] + offset); |
233 | 8.80k | } |
234 | 88.7k | } |
235 | 183k | } |
236 | 1.35M | else |
237 | 1.35M | { |
238 | | // (B) simplified version (only works if no PCM and transquant_bypass is active) |
239 | | |
240 | 29.4M | for (int j=0;j<ctbH;j++) |
241 | 697M | for (int i=0;i<ctbW;i++) { |
242 | | |
243 | | // see above |
244 | 669M | int bandIdx; |
245 | 669M | if (bandShift >= 8) { |
246 | 29.6k | bandIdx = 0; |
247 | 669M | } else { |
248 | 669M | bandIdx = bandTable[ in_img[xC+i+(yC+j)*in_stride]>>bandShift ]; |
249 | 669M | } |
250 | | |
251 | 669M | if (bandIdx>0) { |
252 | 9.32M | int offset = saoinfo->saoOffsetVal[cIdx][bandIdx-1]; |
253 | | |
254 | 9.32M | out_img[xC+i+(yC+j)*out_stride] = Clip3(0,maxPixelValue, |
255 | 9.32M | in_img[xC+i+(yC+j)*in_stride] + offset); |
256 | 9.32M | } |
257 | 669M | } |
258 | 1.35M | } |
259 | 1.53M | } |
260 | 1.61M | } void apply_sao_internal<unsigned short>(de265_image*, int, int, slice_segment_header const*, int, int, int, unsigned short const*, int, unsigned short*, int) Line | Count | Source | 33 | 44.2k | { | 34 | 44.2k | const sao_info* saoinfo = img->get_sao_info(xCtb,yCtb); | 35 | | | 36 | 44.2k | int SaoTypeIdx = (saoinfo->SaoTypeIdx >> (2*cIdx)) & 0x3; | 37 | | | 38 | 44.2k | logtrace(LogSAO,"apply_sao CTB %d;%d cIdx:%d type=%d (%dx%d)\n",xCtb,yCtb,cIdx, SaoTypeIdx, nSW,nSH); | 39 | | | 40 | 44.2k | if (SaoTypeIdx==0) { | 41 | 37.4k | return; | 42 | 37.4k | } | 43 | | | 44 | 6.82k | const seq_parameter_set* sps = &img->get_sps(); | 45 | 6.82k | const pic_parameter_set* pps = &img->get_pps(); | 46 | 6.82k | const int bitDepth = (cIdx==0 ? sps->BitDepth_Y : sps->BitDepth_C); | 47 | 6.82k | const int maxPixelValue = (1<<bitDepth)-1; | 48 | | | 49 | | // top left position of CTB in pixels | 50 | 6.82k | const int xC = xCtb*nSW; | 51 | 6.82k | const int yC = yCtb*nSH; | 52 | | | 53 | 6.82k | const int width = img->get_width(cIdx); | 54 | 6.82k | const int height = img->get_height(cIdx); | 55 | | | 56 | 6.82k | const int ctbSliceAddrRS = img->get_SliceHeader(xC,yC)->SliceAddrRS; | 57 | | | 58 | 6.82k | const int picWidthInCtbs = sps->PicWidthInCtbsY; | 59 | 6.82k | const int chromashiftW = sps->get_chroma_shift_W(cIdx); | 60 | 6.82k | const int chromashiftH = sps->get_chroma_shift_H(cIdx); | 61 | 6.82k | const int ctbshiftW = sps->Log2CtbSizeY - chromashiftW; | 62 | 6.82k | const int ctbshiftH = sps->Log2CtbSizeY - chromashiftH; | 63 | | | 64 | | | 65 | 40.9k | for (int i=0;i<5;i++) | 66 | 34.1k | { | 67 | 34.1k | logtrace(LogSAO,"offset[%d] = %d\n", i, i==0 ? 0 : saoinfo->saoOffsetVal[cIdx][i-1]); | 68 | 34.1k | } | 69 | | | 70 | | | 71 | | // actual size of CTB to be processed (can be smaller when partially outside of image) | 72 | 6.82k | const int ctbW = (xC+nSW>width) ? width -xC : nSW; | 73 | 6.82k | const int ctbH = (yC+nSH>height) ? height-yC : nSH; | 74 | | | 75 | | | 76 | 6.82k | const bool extendedTests = img->get_CTB_has_pcm_or_cu_transquant_bypass(xCtb,yCtb); | 77 | | | 78 | 6.82k | if (SaoTypeIdx==2) { | 79 | 1.05k | int hPos[2], vPos[2]; | 80 | 1.05k | int vPosStride[2]; // vPos[] multiplied by image stride | 81 | 1.05k | int SaoEoClass = (saoinfo->SaoEoClass >> (2*cIdx)) & 0x3; | 82 | | | 83 | 1.05k | switch (SaoEoClass) { | 84 | 451 | case 0: hPos[0]=-1; hPos[1]= 1; vPos[0]= 0; vPos[1]=0; break; | 85 | 114 | case 1: hPos[0]= 0; hPos[1]= 0; vPos[0]=-1; vPos[1]=1; break; | 86 | 197 | case 2: hPos[0]=-1; hPos[1]= 1; vPos[0]=-1; vPos[1]=1; break; | 87 | 297 | case 3: hPos[0]= 1; hPos[1]=-1; vPos[0]=-1; vPos[1]=1; break; | 88 | 1.05k | } | 89 | | | 90 | 1.05k | vPosStride[0] = vPos[0] * in_stride; | 91 | 1.05k | vPosStride[1] = vPos[1] * in_stride; | 92 | | | 93 | | /* Reorder sao_info.saoOffsetVal[] array, so that we can index it | 94 | | directly with the sum of the two pixel-difference signs. */ | 95 | 1.05k | int8_t saoOffsetVal[5]; // [2] unused | 96 | 1.05k | saoOffsetVal[0] = saoinfo->saoOffsetVal[cIdx][1-1]; | 97 | 1.05k | saoOffsetVal[1] = saoinfo->saoOffsetVal[cIdx][2-1]; | 98 | 1.05k | saoOffsetVal[2] = 0; | 99 | 1.05k | saoOffsetVal[3] = saoinfo->saoOffsetVal[cIdx][3-1]; | 100 | 1.05k | saoOffsetVal[4] = saoinfo->saoOffsetVal[cIdx][4-1]; | 101 | | | 102 | | | 103 | 19.7k | for (int j=0;j<ctbH;j++) { | 104 | 18.7k | const pixel_t* in_ptr = &in_img [xC+(yC+j)*in_stride]; | 105 | 18.7k | /* */ pixel_t* out_ptr = &out_img[xC+(yC+j)*out_stride]; | 106 | | | 107 | 496k | for (int i=0;i<ctbW;i++) { | 108 | 478k | int edgeIdx = -1; | 109 | | | 110 | 478k | logtrace(LogSAO, "pos %d,%d\n",xC+i,yC+j); | 111 | | | 112 | 478k | if ((extendedTests && | 113 | 478k | (sps->pcm_loop_filter_disable_flag && | 114 | 191k | img->get_pcm_flag((xC+i)<<chromashiftW,(yC+j)<<chromashiftH))) || | 115 | 478k | img->get_cu_transquant_bypass((xC+i)<<chromashiftW,(yC+j)<<chromashiftH)) { | 116 | 151k | continue; | 117 | 151k | } | 118 | | | 119 | | // do the expensive test for boundaries only at the boundaries | 120 | 326k | bool testBoundary = (i==0 || j==0 || i==ctbW-1 || j==ctbH-1); | 121 | | | 122 | 326k | if (testBoundary) | 123 | 165k | for (int k=0;k<2;k++) { | 124 | 112k | int xS = xC+i+hPos[k]; | 125 | 112k | int yS = yC+j+vPos[k]; | 126 | | | 127 | 112k | if (xS<0 || yS<0 || xS>=width || yS>=height) { | 128 | 4.97k | edgeIdx=0; | 129 | 4.97k | break; | 130 | 4.97k | } | 131 | | | 132 | | | 133 | | // This part seems inefficient with all the get_SliceHeaderIndex() calls, | 134 | | // but removing this part (because the input was known to have only a single | 135 | | // slice anyway) reduced computation time only by 1.3%. | 136 | | // TODO: however, this may still be a big part of SAO itself. | 137 | | | 138 | 107k | slice_segment_header* sliceHeader = img->get_SliceHeader(xS<<chromashiftW, | 139 | 107k | yS<<chromashiftH); | 140 | 107k | if (sliceHeader==NULL) { return; } | 141 | | | 142 | 107k | int sliceAddrRS = sliceHeader->SliceAddrRS; | 143 | 107k | if (sliceAddrRS < ctbSliceAddrRS && | 144 | 107k | img->get_SliceHeader((xC+i)<<chromashiftW, | 145 | 0 | (yC+j)<<chromashiftH)->slice_loop_filter_across_slices_enabled_flag==0) { | 146 | 0 | edgeIdx=0; | 147 | 0 | break; | 148 | 0 | } | 149 | | | 150 | 107k | if (sliceAddrRS > ctbSliceAddrRS && | 151 | 107k | img->get_SliceHeader(xS<<chromashiftW, | 152 | 0 | yS<<chromashiftH)->slice_loop_filter_across_slices_enabled_flag==0) { | 153 | 0 | edgeIdx=0; | 154 | 0 | break; | 155 | 0 | } | 156 | | | 157 | | | 158 | 107k | if (pps->loop_filter_across_tiles_enabled_flag==0 && | 159 | 107k | pps->TileIdRS[(xS>>ctbshiftW) + (yS>>ctbshiftH)*picWidthInCtbs] != | 160 | 61.8k | pps->TileIdRS[(xC>>ctbshiftW) + (yC>>ctbshiftH)*picWidthInCtbs]) { | 161 | 0 | edgeIdx=0; | 162 | 0 | break; | 163 | 0 | } | 164 | 107k | } | 165 | | | 166 | 326k | if (edgeIdx != 0) { | 167 | | | 168 | 321k | edgeIdx = ( Sign(in_ptr[i] - in_ptr[i+hPos[0]+vPosStride[0]]) + | 169 | 321k | Sign(in_ptr[i] - in_ptr[i+hPos[1]+vPosStride[1]]) ); | 170 | | | 171 | 321k | if (1) { // edgeIdx != 0) { // seems to be faster without this check (zero in offset table) | 172 | 321k | int offset = saoOffsetVal[edgeIdx+2]; | 173 | | | 174 | 321k | out_ptr[i] = Clip3(0,maxPixelValue, | 175 | 321k | in_ptr[i] + offset); | 176 | 321k | } | 177 | 321k | } | 178 | 326k | } | 179 | 18.7k | } | 180 | 1.05k | } | 181 | 5.76k | else { | 182 | 5.76k | int bandShift = bitDepth-5; | 183 | 5.76k | int saoLeftClass = saoinfo->sao_band_position[cIdx]; | 184 | 5.76k | logtrace(LogSAO,"saoLeftClass: %d\n",saoLeftClass); | 185 | | | 186 | 5.76k | int bandTable[32]; | 187 | 5.76k | memset(bandTable, 0, sizeof(int)*32); | 188 | | | 189 | 28.8k | for (int k=0;k<4;k++) { | 190 | 23.0k | bandTable[ (k+saoLeftClass)&31 ] = k+1; | 191 | 23.0k | } | 192 | | | 193 | | | 194 | | /* If PCM or transquant_bypass is used in this CTB, we have to | 195 | | run all checks (A). | 196 | | Otherwise, we run a simplified version of the code (B). | 197 | | | 198 | | NOTE: this whole part of SAO does not seem to be a significant part of the time spent | 199 | | */ | 200 | | | 201 | 5.76k | if (extendedTests) { | 202 | | | 203 | | // (A) full version with all checks | 204 | | | 205 | 199k | for (int j=0;j<ctbH;j++) | 206 | 9.50M | for (int i=0;i<ctbW;i++) { | 207 | | | 208 | 9.30M | if ((sps->pcm_loop_filter_disable_flag && | 209 | 9.30M | img->get_pcm_flag((xC+i)<<chromashiftW,(yC+j)<<chromashiftH)) || | 210 | 9.30M | img->get_cu_transquant_bypass((xC+i)<<chromashiftW,(yC+j)<<chromashiftH)) { | 211 | 9.30M | continue; | 212 | 9.30M | } | 213 | | | 214 | | // Shifts are a strange thing. On x86, >>x actually computes >>(x%64). | 215 | | // So we have to take care of large bandShifts. | 216 | 4.89k | int bandIdx; | 217 | 4.89k | if (bandShift >= 8) { | 218 | 0 | bandIdx = 0; | 219 | 4.89k | } else { | 220 | 4.89k | bandIdx = bandTable[ in_img[xC+i+(yC+j)*in_stride]>>bandShift ]; | 221 | 4.89k | } | 222 | | | 223 | 4.89k | if (bandIdx>0) { | 224 | 132 | int offset = saoinfo->saoOffsetVal[cIdx][bandIdx-1]; | 225 | | | 226 | 132 | logtrace(LogSAO,"%d %d (%d) offset %d %x -> %x\n",xC+i,yC+j,bandIdx, | 227 | 132 | offset, | 228 | 132 | in_img[xC+i+(yC+j)*in_stride], | 229 | 132 | in_img[xC+i+(yC+j)*in_stride]+offset); | 230 | | | 231 | 132 | out_img[xC+i+(yC+j)*out_stride] = Clip3(0,maxPixelValue, | 232 | 132 | in_img[xC+i+(yC+j)*in_stride] + offset); | 233 | 132 | } | 234 | 4.89k | } | 235 | 4.68k | } | 236 | 1.08k | else | 237 | 1.08k | { | 238 | | // (B) simplified version (only works if no PCM and transquant_bypass is active) | 239 | | | 240 | 38.4k | for (int j=0;j<ctbH;j++) | 241 | 1.69M | for (int i=0;i<ctbW;i++) { | 242 | | | 243 | | // see above | 244 | 1.65M | int bandIdx; | 245 | 1.65M | if (bandShift >= 8) { | 246 | 29.6k | bandIdx = 0; | 247 | 1.62M | } else { | 248 | 1.62M | bandIdx = bandTable[ in_img[xC+i+(yC+j)*in_stride]>>bandShift ]; | 249 | 1.62M | } | 250 | | | 251 | 1.65M | if (bandIdx>0) { | 252 | 7.42k | int offset = saoinfo->saoOffsetVal[cIdx][bandIdx-1]; | 253 | | | 254 | 7.42k | out_img[xC+i+(yC+j)*out_stride] = Clip3(0,maxPixelValue, | 255 | 7.42k | in_img[xC+i+(yC+j)*in_stride] + offset); | 256 | 7.42k | } | 257 | 1.65M | } | 258 | 1.08k | } | 259 | 5.76k | } | 260 | 6.82k | } |
void apply_sao_internal<unsigned char>(de265_image*, int, int, slice_segment_header const*, int, int, int, unsigned char const*, int, unsigned char*, int) Line | Count | Source | 33 | 3.07M | { | 34 | 3.07M | const sao_info* saoinfo = img->get_sao_info(xCtb,yCtb); | 35 | | | 36 | 3.07M | int SaoTypeIdx = (saoinfo->SaoTypeIdx >> (2*cIdx)) & 0x3; | 37 | | | 38 | 3.07M | logtrace(LogSAO,"apply_sao CTB %d;%d cIdx:%d type=%d (%dx%d)\n",xCtb,yCtb,cIdx, SaoTypeIdx, nSW,nSH); | 39 | | | 40 | 3.07M | if (SaoTypeIdx==0) { | 41 | 1.46M | return; | 42 | 1.46M | } | 43 | | | 44 | 1.60M | const seq_parameter_set* sps = &img->get_sps(); | 45 | 1.60M | const pic_parameter_set* pps = &img->get_pps(); | 46 | 1.60M | const int bitDepth = (cIdx==0 ? sps->BitDepth_Y : sps->BitDepth_C); | 47 | 1.60M | const int maxPixelValue = (1<<bitDepth)-1; | 48 | | | 49 | | // top left position of CTB in pixels | 50 | 1.60M | const int xC = xCtb*nSW; | 51 | 1.60M | const int yC = yCtb*nSH; | 52 | | | 53 | 1.60M | const int width = img->get_width(cIdx); | 54 | 1.60M | const int height = img->get_height(cIdx); | 55 | | | 56 | 1.60M | const int ctbSliceAddrRS = img->get_SliceHeader(xC,yC)->SliceAddrRS; | 57 | | | 58 | 1.60M | const int picWidthInCtbs = sps->PicWidthInCtbsY; | 59 | 1.60M | const int chromashiftW = sps->get_chroma_shift_W(cIdx); | 60 | 1.60M | const int chromashiftH = sps->get_chroma_shift_H(cIdx); | 61 | 1.60M | const int ctbshiftW = sps->Log2CtbSizeY - chromashiftW; | 62 | 1.60M | const int ctbshiftH = sps->Log2CtbSizeY - chromashiftH; | 63 | | | 64 | | | 65 | 9.61M | for (int i=0;i<5;i++) | 66 | 8.00M | { | 67 | 8.00M | logtrace(LogSAO,"offset[%d] = %d\n", i, i==0 ? 0 : saoinfo->saoOffsetVal[cIdx][i-1]); | 68 | 8.00M | } | 69 | | | 70 | | | 71 | | // actual size of CTB to be processed (can be smaller when partially outside of image) | 72 | 1.60M | const int ctbW = (xC+nSW>width) ? width -xC : nSW; | 73 | 1.60M | const int ctbH = (yC+nSH>height) ? height-yC : nSH; | 74 | | | 75 | | | 76 | 1.60M | const bool extendedTests = img->get_CTB_has_pcm_or_cu_transquant_bypass(xCtb,yCtb); | 77 | | | 78 | 1.60M | if (SaoTypeIdx==2) { | 79 | 78.4k | int hPos[2], vPos[2]; | 80 | 78.4k | int vPosStride[2]; // vPos[] multiplied by image stride | 81 | 78.4k | int SaoEoClass = (saoinfo->SaoEoClass >> (2*cIdx)) & 0x3; | 82 | | | 83 | 78.4k | switch (SaoEoClass) { | 84 | 4.73k | case 0: hPos[0]=-1; hPos[1]= 1; vPos[0]= 0; vPos[1]=0; break; | 85 | 10.7k | case 1: hPos[0]= 0; hPos[1]= 0; vPos[0]=-1; vPos[1]=1; break; | 86 | 24.3k | case 2: hPos[0]=-1; hPos[1]= 1; vPos[0]=-1; vPos[1]=1; break; | 87 | 38.6k | case 3: hPos[0]= 1; hPos[1]=-1; vPos[0]=-1; vPos[1]=1; break; | 88 | 78.4k | } | 89 | | | 90 | 78.4k | vPosStride[0] = vPos[0] * in_stride; | 91 | 78.4k | vPosStride[1] = vPos[1] * in_stride; | 92 | | | 93 | | /* Reorder sao_info.saoOffsetVal[] array, so that we can index it | 94 | | directly with the sum of the two pixel-difference signs. */ | 95 | 78.4k | int8_t saoOffsetVal[5]; // [2] unused | 96 | 78.4k | saoOffsetVal[0] = saoinfo->saoOffsetVal[cIdx][1-1]; | 97 | 78.4k | saoOffsetVal[1] = saoinfo->saoOffsetVal[cIdx][2-1]; | 98 | 78.4k | saoOffsetVal[2] = 0; | 99 | 78.4k | saoOffsetVal[3] = saoinfo->saoOffsetVal[cIdx][3-1]; | 100 | 78.4k | saoOffsetVal[4] = saoinfo->saoOffsetVal[cIdx][4-1]; | 101 | | | 102 | | | 103 | 2.02M | for (int j=0;j<ctbH;j++) { | 104 | 1.94M | const pixel_t* in_ptr = &in_img [xC+(yC+j)*in_stride]; | 105 | 1.94M | /* */ pixel_t* out_ptr = &out_img[xC+(yC+j)*out_stride]; | 106 | | | 107 | 52.1M | for (int i=0;i<ctbW;i++) { | 108 | 50.1M | int edgeIdx = -1; | 109 | | | 110 | 50.1M | logtrace(LogSAO, "pos %d,%d\n",xC+i,yC+j); | 111 | | | 112 | 50.1M | if ((extendedTests && | 113 | 50.1M | (sps->pcm_loop_filter_disable_flag && | 114 | 708k | img->get_pcm_flag((xC+i)<<chromashiftW,(yC+j)<<chromashiftH))) || | 115 | 50.2M | img->get_cu_transquant_bypass((xC+i)<<chromashiftW,(yC+j)<<chromashiftH)) { | 116 | 576k | continue; | 117 | 576k | } | 118 | | | 119 | | // do the expensive test for boundaries only at the boundaries | 120 | 49.6M | bool testBoundary = (i==0 || j==0 || i==ctbW-1 || j==ctbH-1); | 121 | | | 122 | 49.6M | if (testBoundary) | 123 | 21.2M | for (int k=0;k<2;k++) { | 124 | 14.2M | int xS = xC+i+hPos[k]; | 125 | 14.2M | int yS = yC+j+vPos[k]; | 126 | | | 127 | 14.2M | if (xS<0 || yS<0 || xS>=width || yS>=height) { | 128 | 313k | edgeIdx=0; | 129 | 313k | break; | 130 | 313k | } | 131 | | | 132 | | | 133 | | // This part seems inefficient with all the get_SliceHeaderIndex() calls, | 134 | | // but removing this part (because the input was known to have only a single | 135 | | // slice anyway) reduced computation time only by 1.3%. | 136 | | // TODO: however, this may still be a big part of SAO itself. | 137 | | | 138 | 13.9M | slice_segment_header* sliceHeader = img->get_SliceHeader(xS<<chromashiftW, | 139 | 13.9M | yS<<chromashiftH); | 140 | 13.9M | if (sliceHeader==NULL) { return; } | 141 | | | 142 | 13.9M | int sliceAddrRS = sliceHeader->SliceAddrRS; | 143 | 13.9M | if (sliceAddrRS < ctbSliceAddrRS && | 144 | 13.9M | img->get_SliceHeader((xC+i)<<chromashiftW, | 145 | 0 | (yC+j)<<chromashiftH)->slice_loop_filter_across_slices_enabled_flag==0) { | 146 | 0 | edgeIdx=0; | 147 | 0 | break; | 148 | 0 | } | 149 | | | 150 | 13.9M | if (sliceAddrRS > ctbSliceAddrRS && | 151 | 13.9M | img->get_SliceHeader(xS<<chromashiftW, | 152 | 0 | yS<<chromashiftH)->slice_loop_filter_across_slices_enabled_flag==0) { | 153 | 0 | edgeIdx=0; | 154 | 0 | break; | 155 | 0 | } | 156 | | | 157 | | | 158 | 13.9M | if (pps->loop_filter_across_tiles_enabled_flag==0 && | 159 | 13.9M | pps->TileIdRS[(xS>>ctbshiftW) + (yS>>ctbshiftH)*picWidthInCtbs] != | 160 | 13.9M | pps->TileIdRS[(xC>>ctbshiftW) + (yC>>ctbshiftH)*picWidthInCtbs]) { | 161 | 0 | edgeIdx=0; | 162 | 0 | break; | 163 | 0 | } | 164 | 13.9M | } | 165 | | | 166 | 49.6M | if (edgeIdx != 0) { | 167 | | | 168 | 49.2M | edgeIdx = ( Sign(in_ptr[i] - in_ptr[i+hPos[0]+vPosStride[0]]) + | 169 | 49.2M | Sign(in_ptr[i] - in_ptr[i+hPos[1]+vPosStride[1]]) ); | 170 | | | 171 | 49.3M | if (1) { // edgeIdx != 0) { // seems to be faster without this check (zero in offset table) | 172 | 49.3M | int offset = saoOffsetVal[edgeIdx+2]; | 173 | | | 174 | 49.3M | out_ptr[i] = Clip3(0,maxPixelValue, | 175 | 49.3M | in_ptr[i] + offset); | 176 | 49.3M | } | 177 | 49.2M | } | 178 | 49.6M | } | 179 | 1.94M | } | 180 | 78.4k | } | 181 | 1.53M | else { | 182 | 1.53M | int bandShift = bitDepth-5; | 183 | 1.53M | int saoLeftClass = saoinfo->sao_band_position[cIdx]; | 184 | 1.53M | logtrace(LogSAO,"saoLeftClass: %d\n",saoLeftClass); | 185 | | | 186 | 1.53M | int bandTable[32]; | 187 | 1.53M | memset(bandTable, 0, sizeof(int)*32); | 188 | | | 189 | 7.61M | for (int k=0;k<4;k++) { | 190 | 6.07M | bandTable[ (k+saoLeftClass)&31 ] = k+1; | 191 | 6.07M | } | 192 | | | 193 | | | 194 | | /* If PCM or transquant_bypass is used in this CTB, we have to | 195 | | run all checks (A). | 196 | | Otherwise, we run a simplified version of the code (B). | 197 | | | 198 | | NOTE: this whole part of SAO does not seem to be a significant part of the time spent | 199 | | */ | 200 | | | 201 | 1.53M | if (extendedTests) { | 202 | | | 203 | | // (A) full version with all checks | 204 | | | 205 | 3.68M | for (int j=0;j<ctbH;j++) | 206 | 77.5M | for (int i=0;i<ctbW;i++) { | 207 | | | 208 | 74.0M | if ((sps->pcm_loop_filter_disable_flag && | 209 | 74.0M | img->get_pcm_flag((xC+i)<<chromashiftW,(yC+j)<<chromashiftH)) || | 210 | 74.0M | img->get_cu_transquant_bypass((xC+i)<<chromashiftW,(yC+j)<<chromashiftH)) { | 211 | 73.9M | continue; | 212 | 73.9M | } | 213 | | | 214 | | // Shifts are a strange thing. On x86, >>x actually computes >>(x%64). | 215 | | // So we have to take care of large bandShifts. | 216 | 83.8k | int bandIdx; | 217 | 83.8k | if (bandShift >= 8) { | 218 | 0 | bandIdx = 0; | 219 | 83.8k | } else { | 220 | 83.8k | bandIdx = bandTable[ in_img[xC+i+(yC+j)*in_stride]>>bandShift ]; | 221 | 83.8k | } | 222 | | | 223 | 83.8k | if (bandIdx>0) { | 224 | 8.66k | int offset = saoinfo->saoOffsetVal[cIdx][bandIdx-1]; | 225 | | | 226 | 8.66k | logtrace(LogSAO,"%d %d (%d) offset %d %x -> %x\n",xC+i,yC+j,bandIdx, | 227 | 8.66k | offset, | 228 | 8.66k | in_img[xC+i+(yC+j)*in_stride], | 229 | 8.66k | in_img[xC+i+(yC+j)*in_stride]+offset); | 230 | | | 231 | 8.66k | out_img[xC+i+(yC+j)*out_stride] = Clip3(0,maxPixelValue, | 232 | 8.66k | in_img[xC+i+(yC+j)*in_stride] + offset); | 233 | 8.66k | } | 234 | 83.8k | } | 235 | 178k | } | 236 | 1.35M | else | 237 | 1.35M | { | 238 | | // (B) simplified version (only works if no PCM and transquant_bypass is active) | 239 | | | 240 | 29.4M | for (int j=0;j<ctbH;j++) | 241 | 695M | for (int i=0;i<ctbW;i++) { | 242 | | | 243 | | // see above | 244 | 667M | int bandIdx; | 245 | 667M | if (bandShift >= 8) { | 246 | 0 | bandIdx = 0; | 247 | 667M | } else { | 248 | 667M | bandIdx = bandTable[ in_img[xC+i+(yC+j)*in_stride]>>bandShift ]; | 249 | 667M | } | 250 | | | 251 | 667M | if (bandIdx>0) { | 252 | 9.32M | int offset = saoinfo->saoOffsetVal[cIdx][bandIdx-1]; | 253 | | | 254 | 9.32M | out_img[xC+i+(yC+j)*out_stride] = Clip3(0,maxPixelValue, | 255 | 9.32M | in_img[xC+i+(yC+j)*in_stride] + offset); | 256 | 9.32M | } | 257 | 667M | } | 258 | 1.35M | } | 259 | 1.53M | } | 260 | 1.60M | } |
|
261 | | |
262 | | |
263 | | template <class pixel_t> |
264 | | void apply_sao(de265_image* img, int xCtb,int yCtb, |
265 | | const slice_segment_header* shdr, int cIdx, int nSW,int nSH, |
266 | | const pixel_t* in_img, int in_stride, |
267 | | /* */ pixel_t* out_img, int out_stride) |
268 | 3.11M | { |
269 | 3.11M | if (img->high_bit_depth(cIdx)) { |
270 | 44.2k | apply_sao_internal<uint16_t>(img,xCtb,yCtb, shdr,cIdx,nSW,nSH, |
271 | 44.2k | (uint16_t*)in_img, in_stride, |
272 | 44.2k | (uint16_t*)out_img,out_stride); |
273 | 44.2k | } |
274 | 3.07M | else { |
275 | 3.07M | apply_sao_internal<uint8_t>(img,xCtb,yCtb, shdr,cIdx,nSW,nSH, |
276 | 3.07M | in_img, in_stride, |
277 | 3.07M | out_img,out_stride); |
278 | 3.07M | } |
279 | 3.11M | } |
280 | | |
281 | | |
282 | | void apply_sample_adaptive_offset(de265_image* img) |
283 | 0 | { |
284 | 0 | const seq_parameter_set& sps = img->get_sps(); |
285 | |
|
286 | 0 | if (sps.sample_adaptive_offset_enabled_flag==0) { |
287 | 0 | return; |
288 | 0 | } |
289 | | |
290 | 0 | de265_image inputCopy; |
291 | 0 | de265_error err = inputCopy.copy_image(img); |
292 | 0 | if (err != DE265_OK) { |
293 | 0 | img->decctx->add_warning(DE265_WARNING_CANNOT_APPLY_SAO_OUT_OF_MEMORY,false); |
294 | 0 | return; |
295 | 0 | } |
296 | | |
297 | 0 | for (int yCtb=0; yCtb<sps.PicHeightInCtbsY; yCtb++) |
298 | 0 | for (int xCtb=0; xCtb<sps.PicWidthInCtbsY; xCtb++) |
299 | 0 | { |
300 | 0 | const slice_segment_header* shdr = img->get_SliceHeaderCtb(xCtb,yCtb); |
301 | |
|
302 | 0 | if (shdr->slice_sao_luma_flag) { |
303 | 0 | apply_sao(img, xCtb,yCtb, shdr, 0, 1<<sps.Log2CtbSizeY, 1<<sps.Log2CtbSizeY, |
304 | 0 | inputCopy.get_image_plane(0), inputCopy.get_image_stride(0), |
305 | 0 | img->get_image_plane(0), img->get_image_stride(0)); |
306 | 0 | } |
307 | |
|
308 | 0 | if (shdr->slice_sao_chroma_flag) { |
309 | 0 | int nSW = (1<<sps.Log2CtbSizeY) / sps.SubWidthC; |
310 | 0 | int nSH = (1<<sps.Log2CtbSizeY) / sps.SubHeightC; |
311 | |
|
312 | 0 | apply_sao(img, xCtb,yCtb, shdr, 1, nSW,nSH, |
313 | 0 | inputCopy.get_image_plane(1), inputCopy.get_image_stride(1), |
314 | 0 | img->get_image_plane(1), img->get_image_stride(1)); |
315 | |
|
316 | 0 | apply_sao(img, xCtb,yCtb, shdr, 2, nSW,nSH, |
317 | 0 | inputCopy.get_image_plane(2), inputCopy.get_image_stride(2), |
318 | 0 | img->get_image_plane(2), img->get_image_stride(2)); |
319 | 0 | } |
320 | 0 | } |
321 | 0 | } |
322 | | |
323 | | |
324 | | void apply_sample_adaptive_offset_sequential(de265_image* img) |
325 | 0 | { |
326 | 0 | const seq_parameter_set& sps = img->get_sps(); |
327 | |
|
328 | 0 | if (sps.sample_adaptive_offset_enabled_flag==0) { |
329 | 0 | return; |
330 | 0 | } |
331 | | |
332 | 0 | int lumaImageSize = img->get_image_stride(0) * img->get_height(0) * img->get_bytes_per_pixel(0); |
333 | 0 | int chromaImageSize = img->get_image_stride(1) * img->get_height(1) * img->get_bytes_per_pixel(1); |
334 | |
|
335 | 0 | uint8_t* inputCopy = new uint8_t[ libde265_max(lumaImageSize, chromaImageSize) ]; |
336 | 0 | if (inputCopy == NULL) { |
337 | 0 | img->decctx->add_warning(DE265_WARNING_CANNOT_APPLY_SAO_OUT_OF_MEMORY,false); |
338 | 0 | return; |
339 | 0 | } |
340 | | |
341 | | |
342 | 0 | int nChannels = 3; |
343 | 0 | if (sps.ChromaArrayType == CHROMA_MONO) { nChannels=1; } |
344 | |
|
345 | 0 | for (int cIdx=0;cIdx<nChannels;cIdx++) { |
346 | |
|
347 | 0 | int stride = img->get_image_stride(cIdx); |
348 | 0 | int height = img->get_height(cIdx); |
349 | |
|
350 | 0 | memcpy(inputCopy, img->get_image_plane(cIdx), stride * height * img->get_bytes_per_pixel(cIdx)); |
351 | |
|
352 | 0 | for (int yCtb=0; yCtb<sps.PicHeightInCtbsY; yCtb++) |
353 | 0 | for (int xCtb=0; xCtb<sps.PicWidthInCtbsY; xCtb++) |
354 | 0 | { |
355 | 0 | const slice_segment_header* shdr = img->get_SliceHeaderCtb(xCtb,yCtb); |
356 | 0 | if (shdr==NULL) { |
357 | 0 | delete[] inputCopy; |
358 | 0 | return; |
359 | 0 | } |
360 | | |
361 | 0 | if (cIdx==0 && shdr->slice_sao_luma_flag) { |
362 | 0 | apply_sao(img, xCtb,yCtb, shdr, 0, 1<<sps.Log2CtbSizeY, 1<<sps.Log2CtbSizeY, |
363 | 0 | inputCopy, stride, |
364 | 0 | img->get_image_plane(0), img->get_image_stride(0)); |
365 | 0 | } |
366 | |
|
367 | 0 | if (cIdx!=0 && shdr->slice_sao_chroma_flag) { |
368 | 0 | int nSW = (1<<sps.Log2CtbSizeY) / sps.SubWidthC; |
369 | 0 | int nSH = (1<<sps.Log2CtbSizeY) / sps.SubHeightC; |
370 | |
|
371 | 0 | apply_sao(img, xCtb,yCtb, shdr, cIdx, nSW,nSH, |
372 | 0 | inputCopy, stride, |
373 | 0 | img->get_image_plane(cIdx), img->get_image_stride(cIdx)); |
374 | 0 | } |
375 | 0 | } |
376 | 0 | } |
377 | | |
378 | 0 | delete[] inputCopy; |
379 | 0 | } |
380 | | |
381 | | |
382 | | |
383 | | |
384 | | class thread_task_sao : public thread_task |
385 | | { |
386 | | public: |
387 | | int ctb_y; |
388 | | de265_image* img; /* this is where we get the SPS from |
389 | | (either inputImg or outputImg can be a dummy image) |
390 | | */ |
391 | | |
392 | | de265_image* inputImg; |
393 | | de265_image* outputImg; |
394 | | int inputProgress; |
395 | | |
396 | | virtual void work(); |
397 | 0 | virtual std::string name() const { |
398 | 0 | char buf[100]; |
399 | 0 | sprintf(buf,"sao-%d",ctb_y); |
400 | 0 | return buf; |
401 | 0 | } |
402 | | }; |
403 | | |
404 | | |
405 | | void thread_task_sao::work() |
406 | 56.0k | { |
407 | 56.0k | state = Running; |
408 | 56.0k | img->thread_run(this); |
409 | | |
410 | 56.0k | const seq_parameter_set& sps = img->get_sps(); |
411 | | |
412 | 56.0k | const int rightCtb = sps.PicWidthInCtbsY-1; |
413 | 56.0k | const int ctbSize = (1<<sps.Log2CtbSizeY); |
414 | | |
415 | | |
416 | | // wait until also the CTB-rows below and above are ready |
417 | | |
418 | 56.0k | img->wait_for_progress(this, rightCtb,ctb_y, inputProgress); |
419 | | |
420 | 56.0k | if (ctb_y>0) { |
421 | 52.5k | img->wait_for_progress(this, rightCtb,ctb_y-1, inputProgress); |
422 | 52.5k | } |
423 | | |
424 | 56.0k | if (ctb_y+1<sps.PicHeightInCtbsY) { |
425 | 52.5k | img->wait_for_progress(this, rightCtb,ctb_y+1, inputProgress); |
426 | 52.5k | } |
427 | | |
428 | | |
429 | | // copy input image to output for this CTB-row |
430 | | |
431 | 56.0k | outputImg->copy_lines_from(inputImg, ctb_y * ctbSize, (ctb_y+1) * ctbSize); |
432 | | |
433 | | |
434 | | // process SAO in the CTB-row |
435 | | |
436 | 1.10M | for (int xCtb=0; xCtb<sps.PicWidthInCtbsY; xCtb++) |
437 | 1.04M | { |
438 | 1.04M | const slice_segment_header* shdr = img->get_SliceHeaderCtb(xCtb,ctb_y); |
439 | 1.04M | if (shdr==NULL) { |
440 | 0 | break; |
441 | 0 | } |
442 | | |
443 | 1.04M | if (shdr->slice_sao_luma_flag) { |
444 | 1.04M | apply_sao(img, xCtb,ctb_y, shdr, 0, ctbSize, ctbSize, |
445 | 1.04M | inputImg ->get_image_plane(0), inputImg ->get_image_stride(0), |
446 | 1.04M | outputImg->get_image_plane(0), outputImg->get_image_stride(0)); |
447 | 1.04M | } |
448 | | |
449 | 1.04M | if (shdr->slice_sao_chroma_flag) { |
450 | 1.03M | int nSW = ctbSize / sps.SubWidthC; |
451 | 1.03M | int nSH = ctbSize / sps.SubHeightC; |
452 | | |
453 | 1.03M | apply_sao(img, xCtb,ctb_y, shdr, 1, nSW,nSH, |
454 | 1.03M | inputImg ->get_image_plane(1), inputImg ->get_image_stride(1), |
455 | 1.03M | outputImg->get_image_plane(1), outputImg->get_image_stride(1)); |
456 | | |
457 | 1.03M | apply_sao(img, xCtb,ctb_y, shdr, 2, nSW,nSH, |
458 | 1.03M | inputImg ->get_image_plane(2), inputImg ->get_image_stride(2), |
459 | 1.03M | outputImg->get_image_plane(2), outputImg->get_image_stride(2)); |
460 | 1.03M | } |
461 | 1.04M | } |
462 | | |
463 | | |
464 | | // mark SAO progress |
465 | | |
466 | 1.10M | for (int x=0;x<=rightCtb;x++) { |
467 | 1.04M | const int CtbWidth = sps.PicWidthInCtbsY; |
468 | 1.04M | img->ctb_progress[x+ctb_y*CtbWidth].set_progress(CTB_PROGRESS_SAO); |
469 | 1.04M | } |
470 | | |
471 | | |
472 | 56.0k | state = Finished; |
473 | 56.0k | img->thread_finishes(this); |
474 | 56.0k | } |
475 | | |
476 | | |
477 | | bool add_sao_tasks(image_unit* imgunit, int saoInputProgress) |
478 | 3.70k | { |
479 | 3.70k | de265_image* img = imgunit->img; |
480 | 3.70k | const seq_parameter_set& sps = img->get_sps(); |
481 | | |
482 | 3.70k | if (sps.sample_adaptive_offset_enabled_flag==0) { |
483 | 170 | return false; |
484 | 170 | } |
485 | | |
486 | | |
487 | 3.53k | decoder_context* ctx = img->decctx; |
488 | | |
489 | 3.53k | de265_error err = imgunit->sao_output.alloc_image(img->get_width(), img->get_height(), |
490 | 3.53k | img->get_chroma_format(), |
491 | 3.53k | img->get_shared_sps(), |
492 | 3.53k | false, |
493 | 3.53k | img->decctx, //img->encctx, |
494 | 3.53k | img->pts, img->user_data, true); |
495 | 3.53k | if (err != DE265_OK) { |
496 | 0 | img->decctx->add_warning(DE265_WARNING_CANNOT_APPLY_SAO_OUT_OF_MEMORY,false); |
497 | 0 | return false; |
498 | 0 | } |
499 | | |
500 | 3.53k | int nRows = sps.PicHeightInCtbsY; |
501 | | |
502 | 3.53k | int n=0; |
503 | 3.53k | img->thread_start(nRows); |
504 | | |
505 | 59.6k | for (int y=0;y<nRows;y++) |
506 | 56.0k | { |
507 | 56.0k | thread_task_sao* task = new thread_task_sao; |
508 | | |
509 | 56.0k | task->inputImg = img; |
510 | 56.0k | task->outputImg = &imgunit->sao_output; |
511 | 56.0k | task->img = img; |
512 | 56.0k | task->ctb_y = y; |
513 | 56.0k | task->inputProgress = saoInputProgress; |
514 | | |
515 | 56.0k | imgunit->tasks.push_back(task); |
516 | 56.0k | add_task(&ctx->thread_pool_, task); |
517 | 56.0k | n++; |
518 | 56.0k | } |
519 | | |
520 | | /* Currently need barrier here because when are finished, we have to swap the pixel |
521 | | data back into the main image. */ |
522 | 3.53k | img->wait_for_completion(); |
523 | | |
524 | 3.53k | img->exchange_pixel_data_with(imgunit->sao_output); |
525 | | |
526 | 3.53k | return true; |
527 | 3.53k | } |