/src/libhevc/encoder/ihevce_sao.c
Line | Count | Source (jump to first uncovered line) |
1 | | /****************************************************************************** |
2 | | * |
3 | | * Copyright (C) 2018 The Android Open Source Project |
4 | | * |
5 | | * Licensed under the Apache License, Version 2.0 (the "License"); |
6 | | * you may not use this file except in compliance with the License. |
7 | | * You may obtain a copy of the License at: |
8 | | * |
9 | | * http://www.apache.org/licenses/LICENSE-2.0 |
10 | | * |
11 | | * Unless required by applicable law or agreed to in writing, software |
12 | | * distributed under the License is distributed on an "AS IS" BASIS, |
13 | | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
14 | | * See the License for the specific language governing permissions and |
15 | | * limitations under the License. |
16 | | * |
17 | | ***************************************************************************** |
18 | | * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore |
19 | | */ |
20 | | /** |
21 | | ******************************************************************************* |
22 | | * @file |
23 | | * ihevce_sao.c |
24 | | * |
25 | | * @brief |
26 | | * Contains definition for the ctb level sao function |
27 | | * |
28 | | * @author |
29 | | * Ittiam |
30 | | * |
31 | | * @par List of Functions: |
32 | | * ihevce_sao_set_avilability() |
33 | | * ihevce_sao_ctb() |
34 | | * ihevce_sao_analyse() |
35 | | * |
36 | | * @remarks |
37 | | * None |
38 | | * |
39 | | ******************************************************************************* |
40 | | */ |
41 | | |
42 | | /*****************************************************************************/ |
43 | | /* File Includes */ |
44 | | /*****************************************************************************/ |
45 | | /* System include files */ |
46 | | #include <stdio.h> |
47 | | #include <string.h> |
48 | | #include <stdlib.h> |
49 | | #include <assert.h> |
50 | | #include <stdarg.h> |
51 | | #include <math.h> |
52 | | |
53 | | /* User include files */ |
54 | | #include "ihevc_typedefs.h" |
55 | | #include "itt_video_api.h" |
56 | | #include "ihevce_api.h" |
57 | | |
58 | | #include "rc_cntrl_param.h" |
59 | | #include "rc_frame_info_collector.h" |
60 | | #include "rc_look_ahead_params.h" |
61 | | |
62 | | #include "ihevc_defs.h" |
63 | | #include "ihevc_structs.h" |
64 | | #include "ihevc_platform_macros.h" |
65 | | #include "ihevc_deblk.h" |
66 | | #include "ihevc_itrans_recon.h" |
67 | | #include "ihevc_chroma_itrans_recon.h" |
68 | | #include "ihevc_chroma_intra_pred.h" |
69 | | #include "ihevc_intra_pred.h" |
70 | | #include "ihevc_inter_pred.h" |
71 | | #include "ihevc_mem_fns.h" |
72 | | #include "ihevc_padding.h" |
73 | | #include "ihevc_weighted_pred.h" |
74 | | #include "ihevc_sao.h" |
75 | | #include "ihevc_resi_trans.h" |
76 | | #include "ihevc_quant_iquant_ssd.h" |
77 | | #include "ihevc_cabac_tables.h" |
78 | | |
79 | | #include "ihevce_defs.h" |
80 | | #include "ihevce_lap_enc_structs.h" |
81 | | #include "ihevce_multi_thrd_structs.h" |
82 | | #include "ihevce_me_common_defs.h" |
83 | | #include "ihevce_had_satd.h" |
84 | | #include "ihevce_error_codes.h" |
85 | | #include "ihevce_bitstream.h" |
86 | | #include "ihevce_cabac.h" |
87 | | #include "ihevce_rdoq_macros.h" |
88 | | #include "ihevce_function_selector.h" |
89 | | #include "ihevce_enc_structs.h" |
90 | | #include "ihevce_entropy_structs.h" |
91 | | #include "ihevce_cmn_utils_instr_set_router.h" |
92 | | #include "ihevce_enc_loop_structs.h" |
93 | | #include "ihevce_cabac_rdo.h" |
94 | | #include "ihevce_sao.h" |
95 | | |
96 | | /*****************************************************************************/ |
97 | | /* Function Definitions */ |
98 | | /*****************************************************************************/ |
99 | | |
100 | | /** |
101 | | ******************************************************************************* |
102 | | * |
103 | | * @brief |
104 | | * ihevce_sao_set_avilability |
105 | | * |
106 | | * @par Description: |
107 | | * Sets the availability flag for SAO. |
108 | | * |
109 | | * @param[in] |
110 | | * ps_sao_ctxt: Pointer to SAO context |
111 | | * @returns |
112 | | * |
113 | | * @remarks |
114 | | * None |
115 | | * |
116 | | ******************************************************************************* |
117 | | */ |
118 | | void ihevce_sao_set_avilability( |
119 | | UWORD8 *pu1_avail, sao_ctxt_t *ps_sao_ctxt, ihevce_tile_params_t *ps_tile_params) |
120 | 926k | { |
121 | 926k | WORD32 i; |
122 | | |
123 | 926k | WORD32 ctb_x_pos = ps_sao_ctxt->i4_ctb_x; |
124 | 926k | WORD32 ctb_y_pos = ps_sao_ctxt->i4_ctb_y; |
125 | | |
126 | 8.33M | for(i = 0; i < 8; i++) |
127 | 7.40M | { |
128 | 7.40M | pu1_avail[i] = 255; |
129 | 7.40M | } |
130 | | |
131 | | /* SAO_note_01: If the CTB lies on a tile or a slice boundary and |
132 | | in-loop filtering is enabled at tile and slice boundary, then SAO must |
133 | | be performed at tile/slice boundaries also. |
134 | | Hence the boundary checks should be based on frame position of CTB |
135 | | rather than s_ctb_nbr_avail_flags.u1_left_avail flags. |
136 | | Search for <SAO_note_01> in workspace to know more */ |
137 | | /* Availaibility flags for first col*/ |
138 | 926k | if(ctb_x_pos == ps_tile_params->i4_first_ctb_x) |
139 | 470k | { |
140 | 470k | pu1_avail[0] = 0; |
141 | 470k | pu1_avail[4] = 0; |
142 | 470k | pu1_avail[6] = 0; |
143 | 470k | } |
144 | | |
145 | | /* Availaibility flags for last col*/ |
146 | 926k | if((ctb_x_pos + 1) == |
147 | 926k | (ps_tile_params->i4_first_ctb_x + ps_tile_params->i4_curr_tile_wd_in_ctb_unit)) |
148 | 476k | { |
149 | 476k | pu1_avail[1] = 0; |
150 | 476k | pu1_avail[5] = 0; |
151 | 476k | pu1_avail[7] = 0; |
152 | 476k | } |
153 | | |
154 | | /* Availaibility flags for first row*/ |
155 | 926k | if(ctb_y_pos == ps_tile_params->i4_first_ctb_y) |
156 | 546k | { |
157 | 546k | pu1_avail[2] = 0; |
158 | 546k | pu1_avail[4] = 0; |
159 | 546k | pu1_avail[5] = 0; |
160 | 546k | } |
161 | | |
162 | | /* Availaibility flags for last row*/ |
163 | 926k | if((ctb_y_pos + 1) == |
164 | 926k | (ps_tile_params->i4_first_ctb_y + ps_tile_params->i4_curr_tile_ht_in_ctb_unit)) |
165 | 551k | { |
166 | 551k | pu1_avail[3] = 0; |
167 | 551k | pu1_avail[6] = 0; |
168 | 551k | pu1_avail[7] = 0; |
169 | 551k | } |
170 | 926k | } |
171 | | |
172 | | /** |
173 | | ******************************************************************************* |
174 | | * |
175 | | * @brief |
176 | | * Sao CTB level function. |
177 | | * |
178 | | * @par Description: |
179 | | * For a given CTB, sao is done. Both the luma and chroma |
180 | | * blocks are processed |
181 | | * |
182 | | * @param[in] |
183 | | * ps_sao_ctxt: Pointer to SAO context |
184 | | * |
185 | | * @returns |
186 | | * |
187 | | * @remarks |
188 | | * None |
189 | | * |
190 | | ******************************************************************************* |
191 | | */ |
192 | | void ihevce_sao_ctb(sao_ctxt_t *ps_sao_ctxt, ihevce_tile_params_t *ps_tile_params) |
193 | 985k | { |
194 | 985k | sao_enc_t *ps_sao; |
195 | 985k | UWORD8 u1_src_top_left_luma, u1_src_top_left_chroma[2]; |
196 | 985k | UWORD8 *pu1_src_left_luma_buf, *pu1_src_top_luma_buf; |
197 | 985k | UWORD8 *pu1_src_left_chroma_buf, *pu1_src_top_chroma_buf; |
198 | 985k | UWORD8 *pu1_src_luma, *pu1_src_chroma; |
199 | 985k | WORD32 luma_src_stride, ctb_size; |
200 | 985k | WORD32 chroma_src_stride; |
201 | 985k | UWORD8 au1_avail_luma[8], au1_avail_chroma[8]; |
202 | 985k | WORD32 sao_blk_wd, sao_blk_ht, sao_wd_chroma, sao_ht_chroma; |
203 | 985k | UWORD8 *pu1_top_left_luma, *pu1_top_left_chroma; |
204 | 985k | UWORD8 *pu1_src_bot_left_luma, *pu1_src_top_right_luma; |
205 | 985k | UWORD8 *pu1_src_bot_left_chroma, *pu1_src_top_right_chroma; |
206 | 985k | UWORD8 u1_is_422 = (ps_sao_ctxt->ps_sps->i1_chroma_format_idc == 2); |
207 | | |
208 | 985k | ps_sao = ps_sao_ctxt->ps_sao; |
209 | | |
210 | 985k | ASSERT( |
211 | 985k | (abs(ps_sao->u1_y_offset[1]) <= 7) && (abs(ps_sao->u1_y_offset[2]) <= 7) && |
212 | 985k | (abs(ps_sao->u1_y_offset[3]) <= 7) && (abs(ps_sao->u1_y_offset[4]) <= 7)); |
213 | 985k | ASSERT( |
214 | 985k | (abs(ps_sao->u1_cb_offset[1]) <= 7) && (abs(ps_sao->u1_cb_offset[2]) <= 7) && |
215 | 985k | (abs(ps_sao->u1_cb_offset[3]) <= 7) && (abs(ps_sao->u1_cb_offset[4]) <= 7)); |
216 | 985k | ASSERT( |
217 | 985k | (abs(ps_sao->u1_cr_offset[1]) <= 7) && (abs(ps_sao->u1_cr_offset[2]) <= 7) && |
218 | 985k | (abs(ps_sao->u1_cr_offset[3]) <= 7) && (abs(ps_sao->u1_cr_offset[4]) <= 7)); |
219 | 985k | ASSERT( |
220 | 985k | (ps_sao->b5_y_band_pos <= 28) && (ps_sao->b5_cb_band_pos <= 28) && |
221 | 985k | (ps_sao->b5_cr_band_pos <= 28)); |
222 | | |
223 | 985k | if(ps_sao_ctxt->i1_slice_sao_luma_flag) |
224 | 985k | { |
225 | | /*initialize the src pointer to current row*/ |
226 | 985k | luma_src_stride = ps_sao_ctxt->i4_cur_luma_recon_stride; |
227 | | |
228 | 985k | ctb_size = ps_sao_ctxt->i4_ctb_size; |
229 | | |
230 | | /* 1 extra byte in top buf stride for top left of 1st ctb of every row*/ |
231 | 985k | ps_sao->u1_y_offset[0] = 0; /* 0th element is not being used */ |
232 | 985k | sao_blk_wd = ps_sao_ctxt->i4_sao_blk_wd; |
233 | 985k | sao_blk_ht = ps_sao_ctxt->i4_sao_blk_ht; |
234 | | |
235 | 985k | pu1_src_luma = ps_sao_ctxt->pu1_cur_luma_recon_buf; |
236 | | /* Pointer to the top luma buffer corresponding to the current ctb row*/ |
237 | 985k | pu1_src_top_luma_buf = ps_sao_ctxt->pu1_curr_sao_src_top_luma; |
238 | | |
239 | | /* Pointer to left luma buffer corresponding to the current ctb row*/ |
240 | 985k | pu1_src_left_luma_buf = ps_sao_ctxt->au1_left_luma_scratch; |
241 | | |
242 | | /* Pointer to the top right luma buffer corresponding to the current ctb row*/ |
243 | 985k | pu1_src_top_right_luma = pu1_src_top_luma_buf /*- top_buf_stide*/ + sao_blk_wd; |
244 | | |
245 | | /* Pointer to the bottom left luma buffer corresponding to the current ctb row*/ |
246 | 985k | pu1_src_bot_left_luma = |
247 | 985k | ps_sao_ctxt->pu1_frm_luma_recon_buf + ctb_size * ps_sao_ctxt->i4_frm_luma_recon_stride - |
248 | 985k | 1 + (ps_sao_ctxt->i4_frm_luma_recon_stride * ps_sao_ctxt->i4_ctb_y * ctb_size) + |
249 | 985k | (ps_sao_ctxt->i4_ctb_x * ctb_size); /* Bottom left*/ |
250 | | |
251 | | /* Back up the top left pixel for (x+1, y+1)th ctb*/ |
252 | 985k | u1_src_top_left_luma = *(pu1_src_top_luma_buf + sao_blk_wd - 1); |
253 | 985k | pu1_top_left_luma = pu1_src_top_luma_buf - 1; |
254 | | |
255 | 985k | if(SAO_BAND == ps_sao->b3_y_type_idx) |
256 | 0 | { |
257 | 0 | ihevc_sao_band_offset_luma( |
258 | 0 | pu1_src_luma, |
259 | 0 | luma_src_stride, |
260 | 0 | pu1_src_left_luma_buf, /* Pass the pointer to the left luma buffer backed up in the (x-1,y)th ctb */ |
261 | 0 | pu1_src_top_luma_buf, /* Pass the ptr to the top luma buf backed up in the (x,y-1)th ctb */ |
262 | 0 | pu1_src_top_luma_buf - 1, /* Top left*/ |
263 | 0 | ps_sao->b5_y_band_pos, |
264 | 0 | ps_sao->u1_y_offset, |
265 | 0 | sao_blk_wd, |
266 | 0 | sao_blk_ht); |
267 | |
|
268 | 0 | if((ps_sao_ctxt->i4_ctb_y > 0)) |
269 | 0 | { |
270 | 0 | *(pu1_src_top_luma_buf + sao_blk_wd - 1) = u1_src_top_left_luma; |
271 | 0 | } |
272 | 0 | } |
273 | 985k | else if(ps_sao->b3_y_type_idx >= SAO_EDGE_0_DEG) |
274 | 751k | { |
275 | | /*In case of edge offset, 1st and 2nd offsets are always inferred as offsets |
276 | | * corresponding to EO category 1 and 2 which should be always positive |
277 | | * And 3rd and 4th offsets are always inferred as offsets corresponding to |
278 | | * EO category 3 and 4 which should be negative for all the EO classes(or EO typeidx) |
279 | | */ |
280 | | // clang-format off |
281 | 751k | ASSERT((ps_sao->u1_y_offset[1] >= 0) && (ps_sao->u1_y_offset[2] >= 0)); |
282 | 751k | ASSERT((ps_sao->u1_y_offset[3] <= 0) && (ps_sao->u1_y_offset[4] <= 0)); |
283 | | // clang-format on |
284 | | |
285 | 751k | ihevce_sao_set_avilability(au1_avail_luma, ps_sao_ctxt, ps_tile_params); |
286 | | |
287 | 751k | ps_sao_ctxt->apf_sao_luma[ps_sao->b3_y_type_idx - 2]( |
288 | 751k | pu1_src_luma, |
289 | 751k | luma_src_stride, |
290 | 751k | pu1_src_left_luma_buf, /* Pass the pointer to the left luma buffer backed up in the (x-1,y)th ctb */ |
291 | 751k | pu1_src_top_luma_buf, /* Pass the ptr to the top luma buf backed up in the (x,y-1)th ctb */ |
292 | 751k | pu1_top_left_luma, /* Top left*/ |
293 | 751k | pu1_src_top_right_luma, /* Top right*/ |
294 | 751k | pu1_src_bot_left_luma, /* Bottom left*/ |
295 | 751k | au1_avail_luma, |
296 | 751k | ps_sao->u1_y_offset, |
297 | 751k | sao_blk_wd, |
298 | 751k | sao_blk_ht); |
299 | | |
300 | 751k | if((ps_sao_ctxt->i4_ctb_y > 0)) |
301 | 300k | { |
302 | 300k | *(pu1_src_top_luma_buf + sao_blk_wd - 1) = u1_src_top_left_luma; |
303 | 300k | } |
304 | 751k | } |
305 | 985k | } |
306 | | |
307 | 985k | if(ps_sao_ctxt->i1_slice_sao_chroma_flag) |
308 | 409k | { |
309 | | /*initialize the src pointer to current row*/ |
310 | 409k | chroma_src_stride = ps_sao_ctxt->i4_cur_chroma_recon_stride; |
311 | 409k | ctb_size = ps_sao_ctxt->i4_ctb_size; |
312 | | |
313 | | /* 1 extra byte in top buf stride for top left of 1st ctb of every row*/ |
314 | | //top_buf_stide = ps_sao_ctxt->u4_ctb_aligned_wd + 2; |
315 | 409k | ps_sao->u1_cb_offset[0] = 0; /* 0th element is not used */ |
316 | 409k | ps_sao->u1_cr_offset[0] = 0; |
317 | 409k | sao_wd_chroma = ps_sao_ctxt->i4_sao_blk_wd; |
318 | 409k | sao_ht_chroma = ps_sao_ctxt->i4_sao_blk_ht / (!u1_is_422 + 1); |
319 | | |
320 | 409k | pu1_src_chroma = ps_sao_ctxt->pu1_cur_chroma_recon_buf; |
321 | | /* Pointer to the top luma buffer corresponding to the current ctb row*/ |
322 | 409k | pu1_src_top_chroma_buf = ps_sao_ctxt->pu1_curr_sao_src_top_chroma; |
323 | | // clang-format off |
324 | | /* Pointer to left luma buffer corresponding to the current ctb row*/ |
325 | 409k | pu1_src_left_chroma_buf = ps_sao_ctxt->au1_left_chroma_scratch; //ps_sao_ctxt->au1_sao_src_left_chroma; |
326 | | // clang-format on |
327 | | /* Pointer to the top right chroma buffer corresponding to the current ctb row*/ |
328 | 409k | pu1_src_top_right_chroma = pu1_src_top_chroma_buf /*- top_buf_stide*/ + sao_wd_chroma; |
329 | | |
330 | | /* Pointer to the bottom left luma buffer corresponding to the current ctb row*/ |
331 | 409k | pu1_src_bot_left_chroma = |
332 | 409k | ps_sao_ctxt->pu1_frm_chroma_recon_buf + |
333 | 409k | (ctb_size >> !u1_is_422) * ps_sao_ctxt->i4_frm_chroma_recon_stride - 2 + |
334 | 409k | (ps_sao_ctxt->i4_frm_chroma_recon_stride * ps_sao_ctxt->i4_ctb_y * |
335 | 409k | (ctb_size >> !u1_is_422)) + |
336 | 409k | (ps_sao_ctxt->i4_ctb_x * ctb_size); /* Bottom left*/ |
337 | | |
338 | | /* Back up the top left pixel for (x+1, y+1)th ctb*/ |
339 | 409k | u1_src_top_left_chroma[0] = *(pu1_src_top_chroma_buf + sao_wd_chroma - 2); |
340 | 409k | u1_src_top_left_chroma[1] = *(pu1_src_top_chroma_buf + sao_wd_chroma - 1); |
341 | 409k | pu1_top_left_chroma = pu1_src_top_chroma_buf - 2; |
342 | | |
343 | 409k | if(SAO_BAND == ps_sao->b3_cb_type_idx) |
344 | 0 | { |
345 | 0 | ihevc_sao_band_offset_chroma( |
346 | 0 | pu1_src_chroma, |
347 | 0 | chroma_src_stride, |
348 | 0 | pu1_src_left_chroma_buf, /* Pass the pointer to the left luma buffer backed up in the (x-1,y)th ctb */ |
349 | 0 | pu1_src_top_chroma_buf, /* Pass the ptr to the top luma buf backed up in the (x,y-1)th ctb */ |
350 | 0 | pu1_top_left_chroma, /* Top left*/ |
351 | 0 | ps_sao->b5_cb_band_pos, |
352 | 0 | ps_sao->b5_cr_band_pos, |
353 | 0 | ps_sao->u1_cb_offset, |
354 | 0 | ps_sao->u1_cr_offset, |
355 | 0 | sao_wd_chroma, |
356 | 0 | sao_ht_chroma); |
357 | |
|
358 | 0 | if((ps_sao_ctxt->i4_ctb_y > 0)) |
359 | 0 | { |
360 | 0 | *(pu1_src_top_chroma_buf + sao_wd_chroma - 2) = u1_src_top_left_chroma[0]; |
361 | 0 | *(pu1_src_top_chroma_buf + sao_wd_chroma - 1) = u1_src_top_left_chroma[1]; |
362 | 0 | } |
363 | 0 | } |
364 | 409k | else if(ps_sao->b3_cb_type_idx >= SAO_EDGE_0_DEG) |
365 | 175k | { |
366 | | /*In case of edge offset, 1st and 2nd offsets are always inferred as offsets |
367 | | * corresponding to EO category 1 and 2 which should be always positive |
368 | | * And 3rd and 4th offsets are always inferred as offsets corresponding to |
369 | | * EO category 3 and 4 which should be negative for all the EO classes(or EO typeidx) |
370 | | */ |
371 | 175k | ASSERT((ps_sao->u1_cb_offset[1] >= 0) && (ps_sao->u1_cb_offset[2] >= 0)); |
372 | 175k | ASSERT((ps_sao->u1_cb_offset[3] <= 0) && (ps_sao->u1_cb_offset[4] <= 0)); |
373 | | |
374 | 175k | ASSERT((ps_sao->u1_cr_offset[1] >= 0) && (ps_sao->u1_cr_offset[2] >= 0)); |
375 | 175k | ASSERT((ps_sao->u1_cr_offset[3] <= 0) && (ps_sao->u1_cr_offset[4] <= 0)); |
376 | | |
377 | 175k | ihevce_sao_set_avilability(au1_avail_chroma, ps_sao_ctxt, ps_tile_params); |
378 | | |
379 | 175k | ps_sao_ctxt->apf_sao_chroma[ps_sao->b3_cb_type_idx - 2]( |
380 | 175k | pu1_src_chroma, |
381 | 175k | chroma_src_stride, |
382 | 175k | pu1_src_left_chroma_buf, /* Pass the pointer to the left luma buffer backed up in the (x-1,y)th ctb */ |
383 | 175k | pu1_src_top_chroma_buf, /* Pass the ptr to the top luma buf backed up in the (x,y-1)th ctb */ |
384 | 175k | pu1_top_left_chroma, /* Top left*/ |
385 | 175k | pu1_src_top_right_chroma, /* Top right*/ |
386 | 175k | pu1_src_bot_left_chroma, /* Bottom left*/ |
387 | 175k | au1_avail_chroma, |
388 | 175k | ps_sao->u1_cb_offset, |
389 | 175k | ps_sao->u1_cr_offset, |
390 | 175k | sao_wd_chroma, |
391 | 175k | sao_ht_chroma); |
392 | | |
393 | 175k | if((ps_sao_ctxt->i4_ctb_y > 0)) |
394 | 79.3k | { |
395 | 79.3k | *(pu1_src_top_chroma_buf + sao_wd_chroma - 2) = u1_src_top_left_chroma[0]; |
396 | 79.3k | *(pu1_src_top_chroma_buf + sao_wd_chroma - 1) = u1_src_top_left_chroma[1]; |
397 | 79.3k | } |
398 | 175k | } |
399 | 409k | } |
400 | 985k | } |
401 | | |
402 | | /** |
403 | | ******************************************************************************* |
404 | | * |
405 | | * @brief |
406 | | * CTB level function to do SAO analysis. |
407 | | * |
408 | | * @par Description: |
409 | | * For a given CTB, sao analysis is done for both luma and chroma. |
410 | | * |
411 | | * |
412 | | * @param[in] |
413 | | * ps_sao_ctxt: Pointer to SAO context |
414 | | * ps_ctb_enc_loop_out : pointer to ctb level output structure from enc loop |
415 | | * |
416 | | * @returns |
417 | | * |
418 | | * @remarks |
419 | | * None |
420 | | * |
421 | | * @Assumptions: |
422 | | * 1) Initial Cabac state for current ctb to be sao'ed (i.e (x-1,y-1)th ctb) is assumed to be |
423 | | * almost same as cabac state of (x,y)th ctb. |
424 | | * 2) Distortion is calculated in spatial domain but lamda used to calculate the cost is |
425 | | * in freq domain. |
426 | | ******************************************************************************* |
427 | | */ |
428 | | void ihevce_sao_analyse( |
429 | | sao_ctxt_t *ps_sao_ctxt, |
430 | | ctb_enc_loop_out_t *ps_ctb_enc_loop_out, |
431 | | UWORD32 *pu4_frame_rdopt_header_bits, |
432 | | ihevce_tile_params_t *ps_tile_params) |
433 | 143k | { |
434 | 143k | UWORD8 *pu1_luma_scratch_buf; |
435 | 143k | UWORD8 *pu1_chroma_scratch_buf; |
436 | 143k | UWORD8 *pu1_src_luma, *pu1_recon_luma; |
437 | 143k | UWORD8 *pu1_src_chroma, *pu1_recon_chroma; |
438 | 143k | WORD32 luma_src_stride, luma_recon_stride, ctb_size, ctb_wd, ctb_ht; |
439 | 143k | WORD32 chroma_src_stride, chroma_recon_stride; |
440 | 143k | WORD32 i4_luma_scratch_buf_stride; |
441 | 143k | WORD32 i4_chroma_scratch_buf_stride; |
442 | 143k | sao_ctxt_t s_sao_ctxt; |
443 | 143k | UWORD32 ctb_bits = 0, distortion = 0, curr_cost = 0, best_cost = 0; |
444 | 143k | LWORD64 i8_cl_ssd_lambda_qf, i8_cl_ssd_lambda_chroma_qf; |
445 | 143k | WORD32 rdo_cand, num_luma_rdo_cand = 0, num_rdo_cand = 0; |
446 | 143k | WORD32 curr_buf_idx, best_buf_idx, best_cand_idx; |
447 | 143k | WORD32 row; |
448 | 143k | WORD32 edgeidx; |
449 | 143k | WORD32 acc_error_category[5] = { 0, 0, 0, 0, 0 }, category_count[5] = { 0, 0, 0, 0, 0 }; |
450 | 143k | sao_enc_t s_best_luma_chroma_cand; |
451 | 143k | WORD32 best_ctb_sao_bits = 0; |
452 | 143k | #if DISABLE_SAO_WHEN_NOISY && !defined(ENC_VER_v2) |
453 | 143k | UWORD8 u1_force_no_offset = |
454 | 143k | ps_sao_ctxt |
455 | 143k | ->ps_ctb_data |
456 | 143k | [ps_sao_ctxt->i4_ctb_x + ps_sao_ctxt->i4_ctb_data_stride * ps_sao_ctxt->i4_ctb_y] |
457 | 143k | .s_ctb_noise_params.i4_noise_present; |
458 | 143k | #endif |
459 | 143k | UWORD8 u1_is_422 = (ps_sao_ctxt->ps_sps->i1_chroma_format_idc == 2); |
460 | | |
461 | 143k | *pu4_frame_rdopt_header_bits = 0; |
462 | | |
463 | 143k | ctb_size = ps_sao_ctxt->i4_ctb_size; |
464 | 143k | ctb_wd = ps_sao_ctxt->i4_sao_blk_wd; |
465 | 143k | ctb_ht = ps_sao_ctxt->i4_sao_blk_ht; |
466 | | |
467 | 143k | s_sao_ctxt = ps_sao_ctxt[0]; |
468 | | |
469 | | /* Memset the best luma_chroma_cand structure to avoid asserts in debug mode*/ |
470 | 143k | memset(&s_best_luma_chroma_cand, 0, sizeof(sao_enc_t)); |
471 | | |
472 | | /* Initialize the pointer and strides for luma buffers*/ |
473 | 143k | pu1_recon_luma = ps_sao_ctxt->pu1_cur_luma_recon_buf; |
474 | 143k | luma_recon_stride = ps_sao_ctxt->i4_cur_luma_recon_stride; |
475 | | |
476 | 143k | pu1_src_luma = ps_sao_ctxt->pu1_cur_luma_src_buf; |
477 | 143k | luma_src_stride = ps_sao_ctxt->i4_cur_luma_src_stride; |
478 | 143k | i4_luma_scratch_buf_stride = SCRATCH_BUF_STRIDE; |
479 | | |
480 | | /* Initialize the pointer and strides for luma buffers*/ |
481 | 143k | pu1_recon_chroma = ps_sao_ctxt->pu1_cur_chroma_recon_buf; |
482 | 143k | chroma_recon_stride = ps_sao_ctxt->i4_cur_chroma_recon_stride; |
483 | | |
484 | 143k | pu1_src_chroma = ps_sao_ctxt->pu1_cur_chroma_src_buf; |
485 | 143k | chroma_src_stride = ps_sao_ctxt->i4_cur_chroma_src_stride; |
486 | 143k | i4_chroma_scratch_buf_stride = SCRATCH_BUF_STRIDE; |
487 | | |
488 | 143k | i8_cl_ssd_lambda_qf = ps_sao_ctxt->i8_cl_ssd_lambda_qf; |
489 | 143k | i8_cl_ssd_lambda_chroma_qf = ps_sao_ctxt->i8_cl_ssd_lambda_chroma_qf; |
490 | | |
491 | | /*****************************************************/ |
492 | | /********************RDO FOR LUMA CAND****************/ |
493 | | /*****************************************************/ |
494 | | |
495 | | #if !DISABLE_SAO_WHEN_NOISY |
496 | | if(ps_sao_ctxt->ps_slice_hdr->i1_slice_sao_luma_flag) |
497 | | #else |
498 | 143k | if(ps_sao_ctxt->ps_slice_hdr->i1_slice_sao_luma_flag && !u1_force_no_offset) |
499 | 143k | #endif |
500 | 143k | { |
501 | | /* Candidate for Edge offset SAO*/ |
502 | | /* Following is the convention for curr pixel and |
503 | | * two neighbouring pixels for 0 deg, 90 deg, 135 deg and 45 deg */ |
504 | | /* |
505 | | * 0 deg : a c b 90 deg: a 135 deg: a 45 deg: a |
506 | | * c c c |
507 | | * b b b |
508 | | */ |
509 | | |
510 | | /* 0 deg SAO CAND*/ |
511 | | /* Reset the error and edge count*/ |
512 | 863k | for(edgeidx = 0; edgeidx < 5; edgeidx++) |
513 | 719k | { |
514 | 719k | acc_error_category[edgeidx] = 0; |
515 | 719k | category_count[edgeidx] = 0; |
516 | 719k | } |
517 | | |
518 | | /* Call the funciton to populate the EO parameter for this ctb for 0 deg EO class*/ |
519 | | // clang-format off |
520 | 143k | ps_sao_ctxt->ps_cmn_utils_optimised_function_list->pf_get_luma_eo_sao_params(ps_sao_ctxt, SAO_EDGE_0_DEG, |
521 | 143k | acc_error_category, category_count); |
522 | | // clang-format on |
523 | | // clang-format off |
524 | 143k | ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].b3_y_type_idx = SAO_EDGE_0_DEG; |
525 | 143k | ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_y_offset[1] = category_count[0] |
526 | 143k | ? (CLIP3(acc_error_category[0] / category_count[0], 0, 7)) |
527 | 143k | : 0; |
528 | 143k | ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_y_offset[2] = category_count[1] |
529 | 143k | ? (CLIP3(acc_error_category[1] / category_count[1], 0, 7)) |
530 | 143k | : 0; |
531 | 143k | ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_y_offset[3] = category_count[3] |
532 | 143k | ? (CLIP3(acc_error_category[3] / category_count[3], -7, 0)) |
533 | 143k | : 0; |
534 | 143k | ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_y_offset[4] =category_count[4] |
535 | 143k | ? (CLIP3(acc_error_category[4] / category_count[4], -7, 0)) |
536 | 143k | : 0; |
537 | | // clang-format on |
538 | 143k | ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].b5_y_band_pos = 0; |
539 | | // clang-format off |
540 | 143k | ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].b3_cb_type_idx = SAO_NONE; |
541 | | // clang-format on |
542 | 143k | ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_cb_offset[1] = 0; |
543 | 143k | ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_cb_offset[2] = 0; |
544 | 143k | ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_cb_offset[3] = 0; |
545 | 143k | ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_cb_offset[4] = 0; |
546 | 143k | ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].b5_cb_band_pos = 0; |
547 | | |
548 | 143k | ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].b3_cr_type_idx = SAO_NONE; |
549 | 143k | ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_cr_offset[1] = 0; |
550 | 143k | ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_cr_offset[2] = 0; |
551 | 143k | ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_cr_offset[3] = 0; |
552 | 143k | ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_cr_offset[4] = 0; |
553 | 143k | ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].b5_cr_band_pos = 0; |
554 | 143k | ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].b1_sao_merge_left_flag = 0; |
555 | 143k | ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].b1_sao_merge_up_flag = 0; |
556 | | |
557 | 143k | num_luma_rdo_cand++; |
558 | | |
559 | | /* 90 degree SAO CAND*/ |
560 | 863k | for(edgeidx = 0; edgeidx < 5; edgeidx++) |
561 | 719k | { |
562 | 719k | acc_error_category[edgeidx] = 0; |
563 | 719k | category_count[edgeidx] = 0; |
564 | 719k | } |
565 | | |
566 | | /* Call the funciton to populate the EO parameter for this ctb for 90 deg EO class*/ |
567 | | // clang-format off |
568 | 143k | ps_sao_ctxt->ps_cmn_utils_optimised_function_list->pf_get_luma_eo_sao_params(ps_sao_ctxt, SAO_EDGE_90_DEG, |
569 | 143k | acc_error_category, category_count); |
570 | | |
571 | 143k | ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].b3_y_type_idx = SAO_EDGE_90_DEG; |
572 | 143k | ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_y_offset[1] = category_count[0] |
573 | 143k | ? (CLIP3(acc_error_category[0] / category_count[0], 0, 7)) |
574 | 143k | : 0; |
575 | 143k | ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_y_offset[2] = category_count[1] |
576 | 143k | ? (CLIP3(acc_error_category[1] / category_count[1], 0, 7)) |
577 | 143k | : 0; |
578 | 143k | ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_y_offset[3] = category_count[3] |
579 | 143k | ? (CLIP3(acc_error_category[3] / category_count[3], -7, 0)) |
580 | 143k | : 0; |
581 | 143k | ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_y_offset[4] = category_count[4] |
582 | 143k | ? (CLIP3(acc_error_category[4] / category_count[4], -7, 0)) |
583 | 143k | : 0; |
584 | | // clang-format on |
585 | 143k | ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].b5_y_band_pos = 0; |
586 | | |
587 | 143k | ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].b3_cb_type_idx = SAO_NONE; |
588 | 143k | ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_cb_offset[1] = 0; |
589 | 143k | ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_cb_offset[2] = 0; |
590 | 143k | ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_cb_offset[3] = 0; |
591 | 143k | ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_cb_offset[4] = 0; |
592 | 143k | ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].b5_cb_band_pos = 0; |
593 | | |
594 | 143k | ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].b3_cr_type_idx = SAO_NONE; |
595 | 143k | ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_cr_offset[1] = 0; |
596 | 143k | ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_cr_offset[2] = 0; |
597 | 143k | ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_cr_offset[3] = 0; |
598 | 143k | ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_cr_offset[4] = 0; |
599 | 143k | ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].b5_cr_band_pos = 0; |
600 | 143k | ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].b1_sao_merge_left_flag = 0; |
601 | 143k | ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].b1_sao_merge_up_flag = 0; |
602 | | |
603 | 143k | num_luma_rdo_cand++; |
604 | | |
605 | | /* 135 degree SAO CAND*/ |
606 | 863k | for(edgeidx = 0; edgeidx < 5; edgeidx++) |
607 | 719k | { |
608 | 719k | acc_error_category[edgeidx] = 0; |
609 | 719k | category_count[edgeidx] = 0; |
610 | 719k | } |
611 | | |
612 | | /* Call the funciton to populate the EO parameter for this ctb for 135 deg EO class*/ |
613 | | // clang-format off |
614 | 143k | ps_sao_ctxt->ps_cmn_utils_optimised_function_list->pf_get_luma_eo_sao_params(ps_sao_ctxt, SAO_EDGE_135_DEG, |
615 | 143k | acc_error_category, category_count); |
616 | | |
617 | 143k | ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].b3_y_type_idx = SAO_EDGE_135_DEG; |
618 | 143k | ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_y_offset[1] = category_count[0] |
619 | 143k | ? (CLIP3(acc_error_category[0] / category_count[0], 0, 7)) |
620 | 143k | : 0; |
621 | 143k | ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_y_offset[2] = category_count[1] |
622 | 143k | ? (CLIP3(acc_error_category[1] / category_count[1], 0, 7)) |
623 | 143k | : 0; |
624 | 143k | ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_y_offset[3] = category_count[3] |
625 | 143k | ? (CLIP3(acc_error_category[3] / category_count[3], -7, 0)) |
626 | 143k | : 0; |
627 | 143k | ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_y_offset[4] = category_count[4] |
628 | 143k | ? (CLIP3(acc_error_category[4] / category_count[4], -7, 0)) |
629 | 143k | : 0; |
630 | | // clang-format on |
631 | 143k | ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].b5_y_band_pos = 0; |
632 | | |
633 | 143k | ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].b3_cb_type_idx = SAO_NONE; |
634 | 143k | ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_cb_offset[1] = 0; |
635 | 143k | ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_cb_offset[2] = 0; |
636 | 143k | ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_cb_offset[3] = 0; |
637 | 143k | ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_cb_offset[4] = 0; |
638 | 143k | ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].b5_cb_band_pos = 0; |
639 | | |
640 | 143k | ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].b3_cr_type_idx = SAO_NONE; |
641 | 143k | ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_cr_offset[1] = 0; |
642 | 143k | ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_cr_offset[2] = 0; |
643 | 143k | ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_cr_offset[3] = 0; |
644 | 143k | ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_cr_offset[4] = 0; |
645 | 143k | ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].b5_cr_band_pos = 0; |
646 | 143k | ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].b1_sao_merge_left_flag = 0; |
647 | 143k | ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].b1_sao_merge_up_flag = 0; |
648 | | |
649 | 143k | num_luma_rdo_cand++; |
650 | | |
651 | | /* 45 degree SAO CAND*/ |
652 | 863k | for(edgeidx = 0; edgeidx < 5; edgeidx++) |
653 | 719k | { |
654 | 719k | acc_error_category[edgeidx] = 0; |
655 | 719k | category_count[edgeidx] = 0; |
656 | 719k | } |
657 | | |
658 | | /* Call the funciton to populate the EO parameter for this ctb for 45 deg EO class*/ |
659 | | // clang-format off |
660 | 143k | ps_sao_ctxt->ps_cmn_utils_optimised_function_list->pf_get_luma_eo_sao_params(ps_sao_ctxt, SAO_EDGE_45_DEG, |
661 | 143k | acc_error_category, category_count); |
662 | | |
663 | 143k | ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].b3_y_type_idx = SAO_EDGE_45_DEG; |
664 | 143k | ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_y_offset[1] = category_count[0] |
665 | 143k | ? (CLIP3(acc_error_category[0] / category_count[0], 0, 7)) |
666 | 143k | : 0; |
667 | 143k | ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_y_offset[2] = category_count[1] |
668 | 143k | ? (CLIP3(acc_error_category[1] / category_count[1], 0, 7)) |
669 | 143k | : 0; |
670 | 143k | ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_y_offset[3] = category_count[3] |
671 | 143k | ? (CLIP3(acc_error_category[3] / category_count[3], -7, 0)) |
672 | 143k | : 0; |
673 | 143k | ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_y_offset[4] = category_count[4] |
674 | 143k | ? (CLIP3(acc_error_category[4] / category_count[4], -7, 0)) |
675 | 143k | : 0; |
676 | | // clang-format on |
677 | 143k | ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].b5_y_band_pos = 0; |
678 | | |
679 | 143k | ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].b3_cb_type_idx = SAO_NONE; |
680 | 143k | ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_cb_offset[1] = 0; |
681 | 143k | ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_cb_offset[2] = 0; |
682 | 143k | ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_cb_offset[3] = 0; |
683 | 143k | ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_cb_offset[4] = 0; |
684 | 143k | ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].b5_cb_band_pos = 0; |
685 | | |
686 | 143k | ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].b3_cr_type_idx = SAO_NONE; |
687 | 143k | ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_cr_offset[1] = 0; |
688 | 143k | ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_cr_offset[2] = 0; |
689 | 143k | ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_cr_offset[3] = 0; |
690 | 143k | ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_cr_offset[4] = 0; |
691 | 143k | ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].b5_cr_band_pos = 0; |
692 | 143k | ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].b1_sao_merge_left_flag = 0; |
693 | 143k | ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].b1_sao_merge_up_flag = 0; |
694 | | |
695 | 143k | num_luma_rdo_cand++; |
696 | | |
697 | | /* First cand will be best cand after 1st iteration*/ |
698 | 143k | curr_buf_idx = 0; |
699 | 143k | best_buf_idx = 1; |
700 | 143k | best_cost = 0xFFFFFFFF; |
701 | 143k | best_cand_idx = 0; |
702 | | |
703 | | /*Back up the top pixels for (x,y+1)th ctb*/ |
704 | 143k | if(!ps_sao_ctxt->i4_is_last_ctb_row) |
705 | 55.3k | { |
706 | 55.3k | memcpy( |
707 | 55.3k | ps_sao_ctxt->pu1_curr_sao_src_top_luma + ps_sao_ctxt->i4_frm_top_luma_buf_stride, |
708 | 55.3k | pu1_recon_luma + luma_recon_stride * (ctb_size - 1), |
709 | 55.3k | ps_sao_ctxt->i4_sao_blk_wd); |
710 | 55.3k | } |
711 | | |
712 | 719k | for(rdo_cand = 0; rdo_cand < num_luma_rdo_cand; rdo_cand++) |
713 | 575k | { |
714 | 575k | s_sao_ctxt.ps_sao = &ps_sao_ctxt->as_sao_rd_cand[rdo_cand]; |
715 | | |
716 | | /* This memcpy is required because cabac uses parameters from this structure |
717 | | * to evaluate bits and this structure ptr is sent to cabac through |
718 | | * "ihevce_cabac_rdo_encode_sao" function |
719 | | */ |
720 | 575k | memcpy(&ps_ctb_enc_loop_out->s_sao, s_sao_ctxt.ps_sao, sizeof(sao_enc_t)); |
721 | | |
722 | | /* Copy the left pixels to the scratch buffer for evry rdo cand because its |
723 | | overwritten by the sao leaf level function for next ctb*/ |
724 | 575k | memcpy( |
725 | 575k | s_sao_ctxt.au1_left_luma_scratch, |
726 | 575k | ps_sao_ctxt->au1_sao_src_left_luma, |
727 | 575k | ps_sao_ctxt->i4_sao_blk_ht); |
728 | | |
729 | | /* Copy the top and top left pixels to the scratch buffer for evry rdo cand because its |
730 | | overwritten by the sao leaf level function for next ctb*/ |
731 | 575k | memcpy( |
732 | 575k | s_sao_ctxt.au1_top_luma_scratch, |
733 | 575k | ps_sao_ctxt->pu1_curr_sao_src_top_luma - 1, |
734 | 575k | ps_sao_ctxt->i4_sao_blk_wd + 2); |
735 | 575k | s_sao_ctxt.pu1_curr_sao_src_top_luma = s_sao_ctxt.au1_top_luma_scratch + 1; |
736 | | |
737 | 575k | pu1_luma_scratch_buf = ps_sao_ctxt->au1_sao_luma_scratch[curr_buf_idx]; |
738 | | |
739 | 575k | ASSERT( |
740 | 575k | (abs(s_sao_ctxt.ps_sao->u1_y_offset[1]) <= 7) && |
741 | 575k | (abs(s_sao_ctxt.ps_sao->u1_y_offset[2]) <= 7) && |
742 | 575k | (abs(s_sao_ctxt.ps_sao->u1_y_offset[3]) <= 7) && |
743 | 575k | (abs(s_sao_ctxt.ps_sao->u1_y_offset[4]) <= 7)); |
744 | 575k | ASSERT( |
745 | 575k | (abs(s_sao_ctxt.ps_sao->u1_cb_offset[1]) <= 7) && |
746 | 575k | (abs(s_sao_ctxt.ps_sao->u1_cb_offset[2]) <= 7) && |
747 | 575k | (abs(s_sao_ctxt.ps_sao->u1_cb_offset[3]) <= 7) && |
748 | 575k | (abs(s_sao_ctxt.ps_sao->u1_cb_offset[4]) <= 7)); |
749 | 575k | ASSERT( |
750 | 575k | (abs(s_sao_ctxt.ps_sao->u1_cr_offset[1]) <= 7) && |
751 | 575k | (abs(s_sao_ctxt.ps_sao->u1_cr_offset[2]) <= 7) && |
752 | 575k | (abs(s_sao_ctxt.ps_sao->u1_cr_offset[3]) <= 7) && |
753 | 575k | (abs(s_sao_ctxt.ps_sao->u1_cr_offset[4]) <= 7)); |
754 | 575k | ASSERT( |
755 | 575k | (s_sao_ctxt.ps_sao->b5_y_band_pos <= 28) && |
756 | 575k | (s_sao_ctxt.ps_sao->b5_cb_band_pos <= 28) && |
757 | 575k | (s_sao_ctxt.ps_sao->b5_cr_band_pos <= 28)); |
758 | | |
759 | | /* Copy the deblocked recon data to scratch buffer to do sao*/ |
760 | | |
761 | 575k | ps_sao_ctxt->ps_cmn_utils_optimised_function_list->pf_copy_2d( |
762 | 575k | pu1_luma_scratch_buf, |
763 | 575k | i4_luma_scratch_buf_stride, |
764 | 575k | pu1_recon_luma, |
765 | 575k | luma_recon_stride, |
766 | 575k | SCRATCH_BUF_STRIDE, |
767 | 575k | ctb_ht + 1); |
768 | | |
769 | 575k | s_sao_ctxt.pu1_cur_luma_recon_buf = pu1_luma_scratch_buf; |
770 | 575k | s_sao_ctxt.i4_cur_luma_recon_stride = i4_luma_scratch_buf_stride; |
771 | | |
772 | 575k | s_sao_ctxt.i1_slice_sao_luma_flag = s_sao_ctxt.ps_slice_hdr->i1_slice_sao_luma_flag; |
773 | 575k | s_sao_ctxt.i1_slice_sao_chroma_flag = 0; |
774 | | |
775 | 575k | ihevce_sao_ctb(&s_sao_ctxt, ps_tile_params); |
776 | | |
777 | | /* Calculate the distortion between sao'ed ctb and original src ctb*/ |
778 | | // clang-format off |
779 | 575k | distortion = |
780 | 575k | ps_sao_ctxt->ps_cmn_utils_optimised_function_list->pf_ssd_calculator(pu1_src_luma, |
781 | 575k | s_sao_ctxt.pu1_cur_luma_recon_buf, luma_src_stride, |
782 | 575k | s_sao_ctxt.i4_cur_luma_recon_stride, ctb_wd, ctb_ht, NULL_PLANE); |
783 | | // clang-format on |
784 | | |
785 | 575k | ps_sao_ctxt->ps_rdopt_entropy_ctxt->i4_curr_buf_idx = curr_buf_idx; |
786 | 575k | ctb_bits = ihevce_cabac_rdo_encode_sao( |
787 | 575k | ps_sao_ctxt->ps_rdopt_entropy_ctxt, ps_ctb_enc_loop_out); |
788 | | |
789 | | /* Calculate the cost as D+(lamda)*R */ |
790 | 575k | curr_cost = distortion + |
791 | 575k | COMPUTE_RATE_COST_CLIP30(ctb_bits, i8_cl_ssd_lambda_qf, LAMBDA_Q_SHIFT); |
792 | | |
793 | 575k | if(curr_cost < best_cost) |
794 | 201k | { |
795 | 201k | best_cost = curr_cost; |
796 | 201k | best_buf_idx = ps_sao_ctxt->ps_rdopt_entropy_ctxt->i4_curr_buf_idx; |
797 | 201k | best_cand_idx = rdo_cand; |
798 | 201k | curr_buf_idx = !curr_buf_idx; |
799 | 201k | } |
800 | 575k | } |
801 | | |
802 | | /* Copy the sao parameters of the best luma cand into the luma_chroma cnad structure for next stage of RDO |
803 | | * between luma_chroma combined cand, NO SAO cand, LEFT and TOP merge cand |
804 | | */ |
805 | 143k | s_best_luma_chroma_cand.b3_y_type_idx = |
806 | 143k | ps_sao_ctxt->as_sao_rd_cand[best_cand_idx].b3_y_type_idx; |
807 | 143k | s_best_luma_chroma_cand.u1_y_offset[1] = |
808 | 143k | ps_sao_ctxt->as_sao_rd_cand[best_cand_idx].u1_y_offset[1]; |
809 | 143k | s_best_luma_chroma_cand.u1_y_offset[2] = |
810 | 143k | ps_sao_ctxt->as_sao_rd_cand[best_cand_idx].u1_y_offset[2]; |
811 | 143k | s_best_luma_chroma_cand.u1_y_offset[3] = |
812 | 143k | ps_sao_ctxt->as_sao_rd_cand[best_cand_idx].u1_y_offset[3]; |
813 | 143k | s_best_luma_chroma_cand.u1_y_offset[4] = |
814 | 143k | ps_sao_ctxt->as_sao_rd_cand[best_cand_idx].u1_y_offset[4]; |
815 | 143k | s_best_luma_chroma_cand.b5_y_band_pos = |
816 | 143k | ps_sao_ctxt->as_sao_rd_cand[best_cand_idx].b5_y_band_pos; |
817 | 143k | } |
818 | 0 | else |
819 | 0 | { |
820 | | /*Back up the top pixels for (x,y+1)th ctb*/ |
821 | 0 | if(!ps_sao_ctxt->i4_is_last_ctb_row) |
822 | 0 | { |
823 | 0 | memcpy( |
824 | 0 | ps_sao_ctxt->pu1_curr_sao_src_top_luma + ps_sao_ctxt->i4_frm_top_luma_buf_stride, |
825 | 0 | pu1_recon_luma + luma_recon_stride * (ctb_size - 1), |
826 | 0 | ps_sao_ctxt->i4_sao_blk_wd); |
827 | 0 | } |
828 | |
|
829 | 0 | s_best_luma_chroma_cand.b3_y_type_idx = SAO_NONE; |
830 | 0 | s_best_luma_chroma_cand.u1_y_offset[1] = 0; |
831 | 0 | s_best_luma_chroma_cand.u1_y_offset[2] = 0; |
832 | 0 | s_best_luma_chroma_cand.u1_y_offset[3] = 0; |
833 | 0 | s_best_luma_chroma_cand.u1_y_offset[4] = 0; |
834 | 0 | s_best_luma_chroma_cand.b5_y_band_pos = 0; |
835 | 0 | s_best_luma_chroma_cand.b1_sao_merge_left_flag = 0; |
836 | 0 | s_best_luma_chroma_cand.b1_sao_merge_up_flag = 0; |
837 | |
|
838 | 0 | s_best_luma_chroma_cand.b3_cb_type_idx = SAO_NONE; |
839 | 0 | s_best_luma_chroma_cand.u1_cb_offset[1] = 0; |
840 | 0 | s_best_luma_chroma_cand.u1_cb_offset[2] = 0; |
841 | 0 | s_best_luma_chroma_cand.u1_cb_offset[3] = 0; |
842 | 0 | s_best_luma_chroma_cand.u1_cb_offset[4] = 0; |
843 | 0 | s_best_luma_chroma_cand.b5_cb_band_pos = 0; |
844 | |
|
845 | 0 | s_best_luma_chroma_cand.b3_cr_type_idx = SAO_NONE; |
846 | 0 | s_best_luma_chroma_cand.u1_cr_offset[1] = 0; |
847 | 0 | s_best_luma_chroma_cand.u1_cr_offset[2] = 0; |
848 | 0 | s_best_luma_chroma_cand.u1_cr_offset[3] = 0; |
849 | 0 | s_best_luma_chroma_cand.u1_cr_offset[4] = 0; |
850 | 0 | s_best_luma_chroma_cand.b5_cr_band_pos = 0; |
851 | 0 | } |
852 | | /*****************************************************/ |
853 | | /********************RDO FOR CHROMA CAND**************/ |
854 | | /*****************************************************/ |
855 | | #if !DISABLE_SAO_WHEN_NOISY |
856 | | if(ps_sao_ctxt->ps_slice_hdr->i1_slice_sao_chroma_flag) |
857 | | #else |
858 | 143k | if(ps_sao_ctxt->ps_slice_hdr->i1_slice_sao_chroma_flag && !u1_force_no_offset) |
859 | 143k | #endif |
860 | 143k | { |
861 | | /*Back up the top pixels for (x,y+1)th ctb*/ |
862 | 143k | if(!ps_sao_ctxt->i4_is_last_ctb_row) |
863 | 55.3k | { |
864 | 55.3k | memcpy( |
865 | 55.3k | ps_sao_ctxt->pu1_curr_sao_src_top_chroma + |
866 | 55.3k | ps_sao_ctxt->i4_frm_top_chroma_buf_stride, |
867 | 55.3k | pu1_recon_chroma + chroma_recon_stride * ((ctb_size >> !u1_is_422) - 1), |
868 | 55.3k | ps_sao_ctxt->i4_sao_blk_wd); |
869 | 55.3k | } |
870 | | |
871 | | /* Reset the error and edge count*/ |
872 | 863k | for(edgeidx = 0; edgeidx < 5; edgeidx++) |
873 | 719k | { |
874 | 719k | acc_error_category[edgeidx] = 0; |
875 | 719k | category_count[edgeidx] = 0; |
876 | 719k | } |
877 | | // clang-format off |
878 | 143k | ps_sao_ctxt->ps_cmn_utils_optimised_function_list->pf_get_chroma_eo_sao_params(ps_sao_ctxt, |
879 | 143k | s_best_luma_chroma_cand.b3_y_type_idx, acc_error_category, |
880 | 143k | category_count); |
881 | | // clang-format on |
882 | | |
883 | | /* Copy the sao parameters of the best luma cand into the luma_chroma cnad structure for next stage of RDO |
884 | | * between luma_chroma combined cand, NO SAO cand, LEFT and TOP merge cand |
885 | | */ |
886 | | // clang-format off |
887 | 143k | s_best_luma_chroma_cand.b3_cb_type_idx = s_best_luma_chroma_cand.b3_y_type_idx; |
888 | 143k | s_best_luma_chroma_cand.u1_cb_offset[1] = category_count[0] |
889 | 143k | ? (CLIP3(acc_error_category[0] / category_count[0], 0, 7)) |
890 | 143k | : 0; |
891 | 143k | s_best_luma_chroma_cand.u1_cb_offset[2] = category_count[1] |
892 | 143k | ? (CLIP3(acc_error_category[1] / category_count[1], 0, 7)) |
893 | 143k | : 0; |
894 | 143k | s_best_luma_chroma_cand.u1_cb_offset[3] = category_count[3] |
895 | 143k | ? (CLIP3(acc_error_category[3] / category_count[3], -7, 0)) |
896 | 143k | : 0; |
897 | 143k | s_best_luma_chroma_cand.u1_cb_offset[4] = category_count[4] |
898 | 143k | ? (CLIP3(acc_error_category[4] / category_count[4], -7, 0)) |
899 | 143k | : 0; |
900 | 143k | s_best_luma_chroma_cand.b5_cb_band_pos = 0; |
901 | | |
902 | 143k | s_best_luma_chroma_cand.b3_cr_type_idx = s_best_luma_chroma_cand.b3_y_type_idx; |
903 | 143k | s_best_luma_chroma_cand.u1_cr_offset[1] = category_count[0] |
904 | 143k | ? (CLIP3(acc_error_category[0] / category_count[0], 0, 7)) |
905 | 143k | : 0; |
906 | 143k | s_best_luma_chroma_cand.u1_cr_offset[2] = category_count[1] |
907 | 143k | ? (CLIP3(acc_error_category[1] / category_count[1], 0, 7)) |
908 | 143k | : 0; |
909 | 143k | s_best_luma_chroma_cand.u1_cr_offset[3] = category_count[3] |
910 | 143k | ? (CLIP3(acc_error_category[3] / category_count[3], -7, 0)) |
911 | 143k | : 0; |
912 | 143k | s_best_luma_chroma_cand.u1_cr_offset[4] = category_count[4] |
913 | 143k | ? (CLIP3(acc_error_category[4] / category_count[4], -7, 0)) |
914 | 143k | : 0; |
915 | | // clang-format on |
916 | 143k | s_best_luma_chroma_cand.b5_cr_band_pos = 0; |
917 | 143k | } |
918 | 0 | else |
919 | 0 | { |
920 | | /*Back up the top pixels for (x,y+1)th ctb*/ |
921 | 0 | if(!ps_sao_ctxt->i4_is_last_ctb_row) |
922 | 0 | { |
923 | 0 | memcpy( |
924 | 0 | ps_sao_ctxt->pu1_curr_sao_src_top_chroma + |
925 | 0 | ps_sao_ctxt->i4_frm_top_chroma_buf_stride, |
926 | 0 | pu1_recon_chroma + chroma_recon_stride * ((ctb_size >> !u1_is_422) - 1), |
927 | 0 | ps_sao_ctxt->i4_sao_blk_wd); |
928 | 0 | } |
929 | |
|
930 | 0 | s_best_luma_chroma_cand.b3_cb_type_idx = SAO_NONE; |
931 | 0 | s_best_luma_chroma_cand.u1_cb_offset[1] = 0; |
932 | 0 | s_best_luma_chroma_cand.u1_cb_offset[2] = 0; |
933 | 0 | s_best_luma_chroma_cand.u1_cb_offset[3] = 0; |
934 | 0 | s_best_luma_chroma_cand.u1_cb_offset[4] = 0; |
935 | 0 | s_best_luma_chroma_cand.b5_cb_band_pos = 0; |
936 | |
|
937 | 0 | s_best_luma_chroma_cand.b3_cr_type_idx = SAO_NONE; |
938 | 0 | s_best_luma_chroma_cand.u1_cr_offset[1] = 0; |
939 | 0 | s_best_luma_chroma_cand.u1_cr_offset[2] = 0; |
940 | 0 | s_best_luma_chroma_cand.u1_cr_offset[3] = 0; |
941 | 0 | s_best_luma_chroma_cand.u1_cr_offset[4] = 0; |
942 | 0 | s_best_luma_chroma_cand.b5_cr_band_pos = 0; |
943 | |
|
944 | 0 | s_best_luma_chroma_cand.b1_sao_merge_left_flag = 0; |
945 | 0 | s_best_luma_chroma_cand.b1_sao_merge_up_flag = 0; |
946 | 0 | } |
947 | | |
948 | 143k | s_best_luma_chroma_cand.b1_sao_merge_left_flag = 0; |
949 | 143k | s_best_luma_chroma_cand.b1_sao_merge_up_flag = 0; |
950 | | |
951 | | /*****************************************************/ |
952 | | /**RDO for Best Luma - Chroma combined, No SAO,*******/ |
953 | | /*************Left merge and Top merge****************/ |
954 | | /*****************************************************/ |
955 | | |
956 | | /* No SAO cand*/ |
957 | 143k | ps_sao_ctxt->as_sao_rd_cand[num_rdo_cand].b1_sao_merge_left_flag = 0; |
958 | 143k | ps_sao_ctxt->as_sao_rd_cand[num_rdo_cand].b1_sao_merge_up_flag = 0; |
959 | | |
960 | 143k | ps_sao_ctxt->as_sao_rd_cand[num_rdo_cand].b3_y_type_idx = SAO_NONE; |
961 | 143k | ps_sao_ctxt->as_sao_rd_cand[num_rdo_cand].u1_y_offset[1] = 0; |
962 | 143k | ps_sao_ctxt->as_sao_rd_cand[num_rdo_cand].u1_y_offset[2] = 0; |
963 | 143k | ps_sao_ctxt->as_sao_rd_cand[num_rdo_cand].u1_y_offset[3] = 0; |
964 | 143k | ps_sao_ctxt->as_sao_rd_cand[num_rdo_cand].u1_y_offset[4] = 0; |
965 | 143k | ps_sao_ctxt->as_sao_rd_cand[num_rdo_cand].b5_y_band_pos = 0; |
966 | | |
967 | 143k | ps_sao_ctxt->as_sao_rd_cand[num_rdo_cand].b3_cb_type_idx = SAO_NONE; |
968 | 143k | ps_sao_ctxt->as_sao_rd_cand[num_rdo_cand].u1_cb_offset[1] = 0; |
969 | 143k | ps_sao_ctxt->as_sao_rd_cand[num_rdo_cand].u1_cb_offset[2] = 0; |
970 | 143k | ps_sao_ctxt->as_sao_rd_cand[num_rdo_cand].u1_cb_offset[3] = 0; |
971 | 143k | ps_sao_ctxt->as_sao_rd_cand[num_rdo_cand].u1_cb_offset[4] = 0; |
972 | 143k | ps_sao_ctxt->as_sao_rd_cand[num_rdo_cand].b5_cb_band_pos = 0; |
973 | | |
974 | 143k | ps_sao_ctxt->as_sao_rd_cand[num_rdo_cand].b3_cr_type_idx = SAO_NONE; |
975 | 143k | ps_sao_ctxt->as_sao_rd_cand[num_rdo_cand].u1_cr_offset[1] = 0; |
976 | 143k | ps_sao_ctxt->as_sao_rd_cand[num_rdo_cand].u1_cr_offset[2] = 0; |
977 | 143k | ps_sao_ctxt->as_sao_rd_cand[num_rdo_cand].u1_cr_offset[3] = 0; |
978 | 143k | ps_sao_ctxt->as_sao_rd_cand[num_rdo_cand].u1_cr_offset[4] = 0; |
979 | 143k | ps_sao_ctxt->as_sao_rd_cand[num_rdo_cand].b5_cr_band_pos = 0; |
980 | 143k | ps_sao_ctxt->as_sao_rd_cand[num_rdo_cand].b1_sao_merge_left_flag = 0; |
981 | 143k | ps_sao_ctxt->as_sao_rd_cand[num_rdo_cand].b1_sao_merge_up_flag = 0; |
982 | | |
983 | 143k | num_rdo_cand++; |
984 | | |
985 | | /* SAO_note_01: If the CTB lies on a tile or a slice boundary, then |
986 | | the standard mandates that the merge candidates must be set to unavailable. |
987 | | Hence, check for tile boundary condition by reading |
988 | | s_ctb_nbr_avail_flags.u1_left_avail rather than frame position of CTB. |
989 | | A special case: Merge-candidates should be available at dependent-slices boundaries. |
990 | | Search for <SAO_note_01> in workspace to know more */ |
991 | | |
992 | | #if !DISABLE_SAO_WHEN_NOISY |
993 | | if(1) |
994 | | #else |
995 | 143k | if(!u1_force_no_offset) |
996 | 143k | #endif |
997 | 143k | { |
998 | | /* Merge left cand*/ |
999 | 143k | if(ps_ctb_enc_loop_out->s_ctb_nbr_avail_flags.u1_left_avail) |
1000 | 66.6k | { |
1001 | 66.6k | memcpy( |
1002 | 66.6k | &ps_sao_ctxt->as_sao_rd_cand[num_rdo_cand], |
1003 | 66.6k | &ps_sao_ctxt->s_left_ctb_sao, |
1004 | 66.6k | sizeof(sao_enc_t)); |
1005 | 66.6k | ps_sao_ctxt->as_sao_rd_cand[num_rdo_cand].b1_sao_merge_left_flag = 1; |
1006 | 66.6k | ps_sao_ctxt->as_sao_rd_cand[num_rdo_cand].b1_sao_merge_up_flag = 0; |
1007 | 66.6k | num_rdo_cand++; |
1008 | 66.6k | } |
1009 | | |
1010 | | /* Merge top cand*/ |
1011 | 143k | if(ps_ctb_enc_loop_out->s_ctb_nbr_avail_flags.u1_top_avail) |
1012 | 55.3k | { |
1013 | 55.3k | memcpy( |
1014 | 55.3k | &ps_sao_ctxt->as_sao_rd_cand[num_rdo_cand], |
1015 | 55.3k | (ps_sao_ctxt->ps_top_ctb_sao - ps_sao_ctxt->u4_num_ctbs_horz), |
1016 | 55.3k | sizeof(sao_enc_t)); |
1017 | 55.3k | ps_sao_ctxt->as_sao_rd_cand[num_rdo_cand].b1_sao_merge_left_flag = 0; |
1018 | 55.3k | ps_sao_ctxt->as_sao_rd_cand[num_rdo_cand].b1_sao_merge_up_flag = 1; |
1019 | 55.3k | num_rdo_cand++; |
1020 | 55.3k | } |
1021 | | |
1022 | | /* Best luma-chroma candidate*/ |
1023 | 143k | memcpy( |
1024 | 143k | &ps_sao_ctxt->as_sao_rd_cand[num_rdo_cand], |
1025 | 143k | &s_best_luma_chroma_cand, |
1026 | 143k | sizeof(sao_enc_t)); |
1027 | 143k | num_rdo_cand++; |
1028 | 143k | } |
1029 | | |
1030 | 143k | { |
1031 | 143k | UWORD32 luma_distortion = 0, chroma_distortion = 0; |
1032 | | /* First cand will be best cand after 1st iteration*/ |
1033 | 143k | curr_buf_idx = 0; |
1034 | 143k | best_buf_idx = 1; |
1035 | 143k | best_cost = 0xFFFFFFFF; |
1036 | 143k | best_cand_idx = 0; |
1037 | | |
1038 | 553k | for(rdo_cand = 0; rdo_cand < num_rdo_cand; rdo_cand++) |
1039 | 409k | { |
1040 | 409k | s_sao_ctxt.ps_sao = &ps_sao_ctxt->as_sao_rd_cand[rdo_cand]; |
1041 | | |
1042 | 409k | distortion = 0; |
1043 | | |
1044 | | /* This memcpy is required because cabac uses parameters from this structure |
1045 | | * to evaluate bits and this structure ptr is sent to cabac through |
1046 | | * "ihevce_cabac_rdo_encode_sao" function |
1047 | | */ |
1048 | 409k | memcpy(&ps_ctb_enc_loop_out->s_sao, s_sao_ctxt.ps_sao, sizeof(sao_enc_t)); |
1049 | | |
1050 | 409k | if(ps_sao_ctxt->ps_slice_hdr->i1_slice_sao_luma_flag) |
1051 | 409k | { |
1052 | | /* Copy the left pixels to the scratch buffer for evry rdo cand because its |
1053 | | overwritten by the sao leaf level function for next ctb*/ |
1054 | 409k | memcpy( |
1055 | 409k | s_sao_ctxt.au1_left_luma_scratch, |
1056 | 409k | ps_sao_ctxt->au1_sao_src_left_luma, |
1057 | 409k | ps_sao_ctxt->i4_sao_blk_ht); |
1058 | | |
1059 | | /* Copy the top and top left pixels to the scratch buffer for evry rdo cand because its |
1060 | | overwritten by the sao leaf level function for next ctb*/ |
1061 | 409k | memcpy( |
1062 | 409k | s_sao_ctxt.au1_top_luma_scratch, |
1063 | 409k | ps_sao_ctxt->pu1_curr_sao_src_top_luma - 1, |
1064 | 409k | ps_sao_ctxt->i4_sao_blk_wd + 2); |
1065 | 409k | s_sao_ctxt.pu1_curr_sao_src_top_luma = s_sao_ctxt.au1_top_luma_scratch + 1; |
1066 | | |
1067 | 409k | pu1_luma_scratch_buf = ps_sao_ctxt->au1_sao_luma_scratch[curr_buf_idx]; |
1068 | | |
1069 | | /* Copy the deblocked recon data to scratch buffer to do sao*/ |
1070 | | |
1071 | 409k | ps_sao_ctxt->ps_cmn_utils_optimised_function_list->pf_copy_2d( |
1072 | 409k | pu1_luma_scratch_buf, |
1073 | 409k | i4_luma_scratch_buf_stride, |
1074 | 409k | pu1_recon_luma, |
1075 | 409k | luma_recon_stride, |
1076 | 409k | SCRATCH_BUF_STRIDE, |
1077 | 409k | ctb_ht + 1); |
1078 | 409k | s_sao_ctxt.pu1_cur_luma_recon_buf = pu1_luma_scratch_buf; |
1079 | 409k | s_sao_ctxt.i4_cur_luma_recon_stride = i4_luma_scratch_buf_stride; |
1080 | | |
1081 | 409k | ASSERT( |
1082 | 409k | (abs(s_sao_ctxt.ps_sao->u1_y_offset[1]) <= 7) && |
1083 | 409k | (abs(s_sao_ctxt.ps_sao->u1_y_offset[2]) <= 7) && |
1084 | 409k | (abs(s_sao_ctxt.ps_sao->u1_y_offset[3]) <= 7) && |
1085 | 409k | (abs(s_sao_ctxt.ps_sao->u1_y_offset[4]) <= 7)); |
1086 | 409k | } |
1087 | 409k | if(ps_sao_ctxt->ps_slice_hdr->i1_slice_sao_chroma_flag) |
1088 | 409k | { |
1089 | | /* Copy the left pixels to the scratch buffer for evry rdo cand because its |
1090 | | overwritten by the sao leaf level function for next ctb*/ |
1091 | 409k | memcpy( |
1092 | 409k | s_sao_ctxt.au1_left_chroma_scratch, |
1093 | 409k | ps_sao_ctxt->au1_sao_src_left_chroma, |
1094 | 409k | (ps_sao_ctxt->i4_sao_blk_ht >> !u1_is_422) * 2); |
1095 | | |
1096 | | /* Copy the top and top left pixels to the scratch buffer for evry rdo cand because its |
1097 | | overwritten by the sao leaf level function for next ctb*/ |
1098 | 409k | memcpy( |
1099 | 409k | s_sao_ctxt.au1_top_chroma_scratch, |
1100 | 409k | ps_sao_ctxt->pu1_curr_sao_src_top_chroma - 2, |
1101 | 409k | ps_sao_ctxt->i4_sao_blk_wd + 4); |
1102 | | |
1103 | 409k | s_sao_ctxt.pu1_curr_sao_src_top_chroma = s_sao_ctxt.au1_top_chroma_scratch + 2; |
1104 | | |
1105 | 409k | pu1_chroma_scratch_buf = ps_sao_ctxt->au1_sao_chroma_scratch[curr_buf_idx]; |
1106 | | |
1107 | | /* Copy the deblocked recon data to scratch buffer to do sao*/ |
1108 | | |
1109 | 409k | ps_sao_ctxt->ps_cmn_utils_optimised_function_list->pf_copy_2d( |
1110 | 409k | pu1_chroma_scratch_buf, |
1111 | 409k | i4_chroma_scratch_buf_stride, |
1112 | 409k | pu1_recon_chroma, |
1113 | 409k | chroma_recon_stride, |
1114 | 409k | SCRATCH_BUF_STRIDE, |
1115 | 409k | (ctb_ht >> !u1_is_422) + 1); |
1116 | | |
1117 | 409k | s_sao_ctxt.pu1_cur_chroma_recon_buf = pu1_chroma_scratch_buf; |
1118 | 409k | s_sao_ctxt.i4_cur_chroma_recon_stride = i4_chroma_scratch_buf_stride; |
1119 | | |
1120 | 409k | ASSERT( |
1121 | 409k | (abs(s_sao_ctxt.ps_sao->u1_cb_offset[1]) <= 7) && |
1122 | 409k | (abs(s_sao_ctxt.ps_sao->u1_cb_offset[2]) <= 7) && |
1123 | 409k | (abs(s_sao_ctxt.ps_sao->u1_cb_offset[3]) <= 7) && |
1124 | 409k | (abs(s_sao_ctxt.ps_sao->u1_cb_offset[4]) <= 7)); |
1125 | 409k | ASSERT( |
1126 | 409k | (abs(s_sao_ctxt.ps_sao->u1_cr_offset[1]) <= 7) && |
1127 | 409k | (abs(s_sao_ctxt.ps_sao->u1_cr_offset[2]) <= 7) && |
1128 | 409k | (abs(s_sao_ctxt.ps_sao->u1_cr_offset[3]) <= 7) && |
1129 | 409k | (abs(s_sao_ctxt.ps_sao->u1_cr_offset[4]) <= 7)); |
1130 | 409k | } |
1131 | | |
1132 | 409k | ASSERT( |
1133 | 409k | (s_sao_ctxt.ps_sao->b5_y_band_pos <= 28) && |
1134 | 409k | (s_sao_ctxt.ps_sao->b5_cb_band_pos <= 28) && |
1135 | 409k | (s_sao_ctxt.ps_sao->b5_cr_band_pos <= 28)); |
1136 | | |
1137 | 409k | s_sao_ctxt.i1_slice_sao_luma_flag = s_sao_ctxt.ps_slice_hdr->i1_slice_sao_luma_flag; |
1138 | 409k | s_sao_ctxt.i1_slice_sao_chroma_flag = s_sao_ctxt.ps_slice_hdr->i1_slice_sao_chroma_flag; |
1139 | | |
1140 | 409k | ihevce_sao_ctb(&s_sao_ctxt, ps_tile_params); |
1141 | | |
1142 | 409k | if(ps_sao_ctxt->ps_slice_hdr->i1_slice_sao_luma_flag) |
1143 | 409k | { // clang-format off |
1144 | 409k | luma_distortion = |
1145 | 409k | ps_sao_ctxt->ps_cmn_utils_optimised_function_list->pf_ssd_calculator(pu1_src_luma, |
1146 | 409k | s_sao_ctxt.pu1_cur_luma_recon_buf, luma_src_stride, |
1147 | 409k | s_sao_ctxt.i4_cur_luma_recon_stride, ctb_wd, |
1148 | 409k | ctb_ht, |
1149 | 409k | NULL_PLANE); |
1150 | 409k | } // clang-format on |
1151 | | |
1152 | 409k | if(ps_sao_ctxt->ps_slice_hdr->i1_slice_sao_chroma_flag) |
1153 | 409k | { // clang-format off |
1154 | 409k | chroma_distortion = |
1155 | 409k | ps_sao_ctxt->ps_cmn_utils_optimised_function_list->pf_ssd_calculator(pu1_src_chroma, |
1156 | 409k | s_sao_ctxt.pu1_cur_chroma_recon_buf, |
1157 | 409k | chroma_src_stride, |
1158 | 409k | s_sao_ctxt.i4_cur_chroma_recon_stride, ctb_wd, |
1159 | 409k | (ctb_ht >> !u1_is_422), |
1160 | 409k | NULL_PLANE); |
1161 | 409k | } // clang-format on |
1162 | | |
1163 | | /*chroma distortion is added after correction because of lambda difference*/ |
1164 | 409k | distortion = |
1165 | 409k | luma_distortion + |
1166 | 409k | (UWORD32)(chroma_distortion * (i8_cl_ssd_lambda_qf / i8_cl_ssd_lambda_chroma_qf)); |
1167 | | |
1168 | 409k | ps_sao_ctxt->ps_rdopt_entropy_ctxt->i4_curr_buf_idx = curr_buf_idx; |
1169 | 409k | ctb_bits = ihevce_cabac_rdo_encode_sao( |
1170 | 409k | ps_sao_ctxt->ps_rdopt_entropy_ctxt, ps_ctb_enc_loop_out); |
1171 | | |
1172 | | /* Calculate the cost as D+(lamda)*R */ |
1173 | 409k | curr_cost = distortion + |
1174 | 409k | COMPUTE_RATE_COST_CLIP30(ctb_bits, i8_cl_ssd_lambda_qf, LAMBDA_Q_SHIFT); |
1175 | | |
1176 | 409k | if(curr_cost < best_cost) |
1177 | 213k | { |
1178 | 213k | best_ctb_sao_bits = ctb_bits; |
1179 | 213k | best_cost = curr_cost; |
1180 | 213k | best_buf_idx = ps_sao_ctxt->ps_rdopt_entropy_ctxt->i4_curr_buf_idx; |
1181 | 213k | best_cand_idx = rdo_cand; |
1182 | 213k | curr_buf_idx = !curr_buf_idx; |
1183 | 213k | } |
1184 | 409k | } |
1185 | | /*Adding sao bits to header bits*/ |
1186 | 143k | *pu4_frame_rdopt_header_bits = best_ctb_sao_bits; |
1187 | | |
1188 | 143k | ihevce_update_best_sao_cabac_state(ps_sao_ctxt->ps_rdopt_entropy_ctxt, best_buf_idx); |
1189 | | |
1190 | | /* store the sao parameters of curr ctb for top merge and left merge*/ |
1191 | 143k | memcpy( |
1192 | 143k | ps_sao_ctxt->ps_top_ctb_sao, |
1193 | 143k | &ps_sao_ctxt->as_sao_rd_cand[best_cand_idx], |
1194 | 143k | sizeof(sao_enc_t)); |
1195 | 143k | memcpy( |
1196 | 143k | &ps_sao_ctxt->s_left_ctb_sao, |
1197 | 143k | &ps_sao_ctxt->as_sao_rd_cand[best_cand_idx], |
1198 | 143k | sizeof(sao_enc_t)); |
1199 | | |
1200 | | /* Copy the sao parameters of winning candidate into the structure which will be sent to entropy thrd*/ |
1201 | 143k | memcpy( |
1202 | 143k | &ps_ctb_enc_loop_out->s_sao, |
1203 | 143k | &ps_sao_ctxt->as_sao_rd_cand[best_cand_idx], |
1204 | 143k | sizeof(sao_enc_t)); |
1205 | | |
1206 | 143k | if(!ps_sao_ctxt->i4_is_last_ctb_col) |
1207 | 66.6k | { |
1208 | | /* Update left luma buffer for next ctb */ |
1209 | 4.11M | for(row = 0; row < ps_sao_ctxt->i4_sao_blk_ht; row++) |
1210 | 4.05M | { |
1211 | 4.05M | ps_sao_ctxt->au1_sao_src_left_luma[row] = |
1212 | 4.05M | ps_sao_ctxt->pu1_cur_luma_recon_buf |
1213 | 4.05M | [row * ps_sao_ctxt->i4_cur_luma_recon_stride + |
1214 | 4.05M | (ps_sao_ctxt->i4_sao_blk_wd - 1)]; |
1215 | 4.05M | } |
1216 | 66.6k | } |
1217 | | |
1218 | 143k | if(!ps_sao_ctxt->i4_is_last_ctb_col) |
1219 | 66.6k | { |
1220 | | /* Update left chroma buffer for next ctb */ |
1221 | 2.09M | for(row = 0; row < (ps_sao_ctxt->i4_sao_blk_ht >> 1); row++) |
1222 | 2.02M | { |
1223 | 2.02M | *(UWORD16 *)(ps_sao_ctxt->au1_sao_src_left_chroma + row * 2) = |
1224 | 2.02M | *(UWORD16 *)(ps_sao_ctxt->pu1_cur_chroma_recon_buf + |
1225 | 2.02M | row * ps_sao_ctxt->i4_cur_chroma_recon_stride + |
1226 | 2.02M | (ps_sao_ctxt->i4_sao_blk_wd - 2)); |
1227 | 2.02M | } |
1228 | 66.6k | } |
1229 | | |
1230 | 143k | if(ps_sao_ctxt->ps_slice_hdr->i1_slice_sao_luma_flag) |
1231 | 143k | { |
1232 | | /* Copy the sao'ed output of the best candidate to the recon buffer*/ |
1233 | | |
1234 | 143k | ps_sao_ctxt->ps_cmn_utils_optimised_function_list->pf_copy_2d( |
1235 | 143k | ps_sao_ctxt->pu1_cur_luma_recon_buf, |
1236 | 143k | ps_sao_ctxt->i4_cur_luma_recon_stride, |
1237 | 143k | ps_sao_ctxt->au1_sao_luma_scratch[best_buf_idx], |
1238 | 143k | i4_luma_scratch_buf_stride, |
1239 | 143k | ctb_wd, |
1240 | 143k | ctb_ht); |
1241 | 143k | } |
1242 | 143k | if(ps_sao_ctxt->ps_slice_hdr->i1_slice_sao_chroma_flag) |
1243 | 143k | { |
1244 | | /* Copy the sao'ed output of the best candidate to the chroma recon buffer*/ |
1245 | | |
1246 | 143k | ps_sao_ctxt->ps_cmn_utils_optimised_function_list->pf_copy_2d( |
1247 | 143k | ps_sao_ctxt->pu1_cur_chroma_recon_buf, |
1248 | 143k | ps_sao_ctxt->i4_cur_chroma_recon_stride, |
1249 | 143k | ps_sao_ctxt->au1_sao_chroma_scratch[best_buf_idx], |
1250 | 143k | i4_chroma_scratch_buf_stride, |
1251 | 143k | ctb_wd, |
1252 | 143k | ctb_ht >> !u1_is_422); |
1253 | 143k | } |
1254 | 143k | } |
1255 | 143k | } |