/src/libhevc/encoder/ihevce_deblk.c
Line | Count | Source |
1 | | /****************************************************************************** |
2 | | * |
3 | | * Copyright (C) 2018 The Android Open Source Project |
4 | | * |
5 | | * Licensed under the Apache License, Version 2.0 (the "License"); |
6 | | * you may not use this file except in compliance with the License. |
7 | | * You may obtain a copy of the License at: |
8 | | * |
9 | | * http://www.apache.org/licenses/LICENSE-2.0 |
10 | | * |
11 | | * Unless required by applicable law or agreed to in writing, software |
12 | | * distributed under the License is distributed on an "AS IS" BASIS, |
13 | | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
14 | | * See the License for the specific language governing permissions and |
15 | | * limitations under the License. |
16 | | * |
17 | | ***************************************************************************** |
18 | | * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore |
19 | | */ |
20 | | /** |
21 | | ******************************************************************************* |
22 | | * @file |
23 | | * ihevce_deblk.c |
24 | | * |
25 | | * @brief |
26 | | * Contains definition for the ctb level deblk function |
27 | | * |
28 | | * @author |
29 | | * ittiam |
30 | | * |
31 | | * @List of Functions: |
32 | | * ihevce_deblk_populate_qp_map() |
33 | | * ihevce_deblk_ctb() |
34 | | * ihevce_hbd_deblk_ctb() |
35 | | * |
36 | | * @remarks |
37 | | * None |
38 | | * |
39 | | ******************************************************************************* |
40 | | */ |
41 | | |
42 | | /*****************************************************************************/ |
43 | | /* File Includes */ |
44 | | /*****************************************************************************/ |
45 | | /* System include files */ |
46 | | #include <stdio.h> |
47 | | #include <string.h> |
48 | | #include <stdlib.h> |
49 | | #include <assert.h> |
50 | | #include <stdarg.h> |
51 | | #include <math.h> |
52 | | |
53 | | /* User include files */ |
54 | | #include "ihevc_typedefs.h" |
55 | | #include "itt_video_api.h" |
56 | | #include "ihevce_api.h" |
57 | | |
58 | | #include "rc_cntrl_param.h" |
59 | | #include "rc_frame_info_collector.h" |
60 | | #include "rc_look_ahead_params.h" |
61 | | |
62 | | #include "ihevc_defs.h" |
63 | | #include "ihevc_debug.h" |
64 | | #include "ihevc_structs.h" |
65 | | #include "ihevc_platform_macros.h" |
66 | | #include "ihevc_deblk.h" |
67 | | #include "ihevc_deblk_tables.h" |
68 | | #include "ihevc_common_tables.h" |
69 | | #include "ihevc_itrans_recon.h" |
70 | | #include "ihevc_chroma_itrans_recon.h" |
71 | | #include "ihevc_chroma_intra_pred.h" |
72 | | #include "ihevc_intra_pred.h" |
73 | | #include "ihevc_inter_pred.h" |
74 | | #include "ihevc_mem_fns.h" |
75 | | #include "ihevc_padding.h" |
76 | | #include "ihevc_weighted_pred.h" |
77 | | #include "ihevc_sao.h" |
78 | | #include "ihevc_resi_trans.h" |
79 | | #include "ihevc_quant_iquant_ssd.h" |
80 | | #include "ihevc_cabac_tables.h" |
81 | | |
82 | | #include "ihevce_defs.h" |
83 | | #include "ihevce_hle_interface.h" |
84 | | #include "ihevce_lap_enc_structs.h" |
85 | | #include "ihevce_multi_thrd_structs.h" |
86 | | #include "ihevce_me_common_defs.h" |
87 | | #include "ihevce_had_satd.h" |
88 | | #include "ihevce_error_codes.h" |
89 | | #include "ihevce_bitstream.h" |
90 | | #include "ihevce_cabac.h" |
91 | | #include "ihevce_rdoq_macros.h" |
92 | | #include "ihevce_function_selector.h" |
93 | | #include "ihevce_enc_structs.h" |
94 | | #include "ihevce_entropy_structs.h" |
95 | | #include "ihevce_cmn_utils_instr_set_router.h" |
96 | | #include "ihevce_enc_loop_structs.h" |
97 | | #include "ihevce_common_utils.h" |
98 | | #include "ihevce_global_tables.h" |
99 | | #include "ihevce_deblk.h" |
100 | | #include "ihevce_tile_interface.h" |
101 | | |
102 | | /*****************************************************************************/ |
103 | | /* Function Definitions */ |
104 | | /*****************************************************************************/ |
105 | | |
106 | | /*! |
107 | | ****************************************************************************** |
108 | | * \if Function name : ihevce_deblk_populate_qp_map \endif |
109 | | * |
110 | | * \brief |
111 | | * |
112 | | * |
113 | | ***************************************************************************** |
114 | | */ |
115 | | void ihevce_deblk_populate_qp_map( |
116 | | ihevce_enc_loop_ctxt_t *ps_ctxt, |
117 | | deblk_ctbrow_prms_t *ps_deblk_ctb_row_params, |
118 | | ctb_enc_loop_out_t *ps_ctb_out_dblk, |
119 | | WORD32 vert_ctr, |
120 | | frm_ctb_ctxt_t *ps_frm_ctb_prms, |
121 | | ihevce_tile_params_t *ps_col_tile_params) |
122 | 78.0k | { |
123 | 78.0k | ctb_enc_loop_out_t *ps_ctb_out; |
124 | 78.0k | WORD32 ctb_ctr, ctb_start, ctb_end; |
125 | 78.0k | WORD32 tile_qp_offset, tile_qp_size, i4_offset_for_last_cu_qp; |
126 | | /* Create the Qp map for the entire current CTB-row for deblocking purpose(only)*/ |
127 | | /* Do this iff cur pic is referred or recon dump is enabled or psnr calc is on*/ |
128 | | /*Qp of the last CU of previous CTB row*/ |
129 | 78.0k | WORD8 i1_last_cu_qp; |
130 | | /*A pointer pointing to the top 4x4 block's Qp for all CTb rows*/ |
131 | 78.0k | WORD8 *pi1_qp_top_4x4_ctb_row = |
132 | 78.0k | ps_deblk_ctb_row_params->api1_qp_top_4x4_ctb_row[ps_ctxt->i4_enc_frm_id] + |
133 | 78.0k | (ps_deblk_ctb_row_params->u4_qp_top_4x4_buf_size * ps_ctxt->i4_bitrate_instance_num); |
134 | | |
135 | 78.0k | UWORD32 u4_qp_top_4x4_buf_strd = ps_deblk_ctb_row_params->u4_qp_top_4x4_buf_strd; |
136 | | |
137 | | /*The Qp map which has to be populated*/ |
138 | 78.0k | UWORD32 u4_qp_buffer_stride = ps_deblk_ctb_row_params->u4_qp_buffer_stride; |
139 | 78.0k | WORD8 *pi1_ctb_tile_qp = ps_deblk_ctb_row_params->pi1_ctb_row_qp; |
140 | | |
141 | | /*Temporary pointers to Qp map at CTB level*/ |
142 | 78.0k | WORD8 *pi1_ctb_qp_map_tile; |
143 | | |
144 | 78.0k | i4_offset_for_last_cu_qp = ps_ctxt->pi4_offset_for_last_cu_qp[ps_ctxt->i4_tile_col_idx]; |
145 | | /* total QPs to be copied for current row is : */ |
146 | 78.0k | tile_qp_size = i4_offset_for_last_cu_qp + 1; |
147 | | /*Pointing to the first CTB of current CTB row*/ |
148 | 78.0k | ps_ctb_out = ps_ctb_out_dblk; |
149 | | /* Offset req. for the row QP to the tile start */ |
150 | 78.0k | tile_qp_offset = ps_col_tile_params->i4_first_ctb_x * (ps_frm_ctb_prms->i4_ctb_size / 4); |
151 | | |
152 | 78.0k | ctb_start = ps_col_tile_params->i4_first_ctb_x; |
153 | 78.0k | ctb_end = |
154 | 78.0k | (ps_col_tile_params->i4_first_ctb_x + ps_col_tile_params->i4_curr_tile_wd_in_ctb_unit); |
155 | | |
156 | 78.0k | if(vert_ctr) /*Not first CTB row of frame*/ |
157 | 10.1k | { |
158 | | /*copy from top4x4_array data stored by upper CTB-row to qp-map*/ |
159 | 10.1k | memcpy( |
160 | 10.1k | pi1_ctb_tile_qp, |
161 | 10.1k | (pi1_qp_top_4x4_ctb_row + (vert_ctr - 1) * u4_qp_top_4x4_buf_strd + tile_qp_offset), |
162 | 10.1k | tile_qp_size); |
163 | 10.1k | } |
164 | | |
165 | | /*pu1_ctb_row_qp points to top4x4 row in Qp-map. |
166 | | Now pointing pu1_ctb_qp_map to cur 4x4 row*/ |
167 | 78.0k | pi1_ctb_qp_map_tile = pi1_ctb_tile_qp + u4_qp_buffer_stride; |
168 | | |
169 | | /* This i1_last_cu_qp will be conditionally overwritten later */ |
170 | 78.0k | i1_last_cu_qp = ps_ctxt->i4_frame_qp; |
171 | | |
172 | | /* -- Loop over all the CTBs in a CTB-row for populating the Qp-map ----- */ |
173 | 212k | for(ctb_ctr = ctb_start; ctb_ctr < ctb_end; ctb_ctr++) |
174 | 134k | { |
175 | 134k | WORD32 cu_ctr; |
176 | 134k | cu_enc_loop_out_t *ps_curr_cu; |
177 | | |
178 | | /* Update i1_last_cu_qp based on CTB's position in tile */ |
179 | 134k | update_last_coded_cu_qp( |
180 | 134k | (ps_deblk_ctb_row_params->pi1_ctb_row_qp + i4_offset_for_last_cu_qp), |
181 | 134k | ps_ctxt->i1_entropy_coding_sync_enabled_flag, |
182 | 134k | ps_frm_ctb_prms, |
183 | 134k | ps_ctxt->i4_frame_qp, |
184 | 134k | vert_ctr, |
185 | 134k | ctb_ctr, |
186 | 134k | &i1_last_cu_qp); |
187 | | |
188 | | /* store the pointer of first cu of current ctb */ |
189 | 134k | ps_curr_cu = ps_ctb_out->ps_enc_cu; |
190 | | |
191 | | /* --------- loop over all the CUs in the CTB --------------- */ |
192 | 1.14M | for(cu_ctr = 0; cu_ctr < ps_ctb_out->u1_num_cus_in_ctb; cu_ctr++) |
193 | 1.00M | { |
194 | 1.00M | UWORD8 u1_vert_4x4, u1_horz_4x4; //for_loop counters |
195 | 1.00M | WORD8 *pi1_cu_qp_map; |
196 | | |
197 | 1.00M | WORD8 i1_qp, i1_qp_left, i1_qp_top; |
198 | | |
199 | 1.00M | pi1_cu_qp_map = pi1_ctb_qp_map_tile + |
200 | 1.00M | (ps_curr_cu->b3_cu_pos_y * 2) * u4_qp_buffer_stride + |
201 | 1.00M | (ps_curr_cu->b3_cu_pos_x * 2); |
202 | | |
203 | | /*If the current CU is coded in skip_mode/zero_CBF then |
204 | | for deblocking, Qp of the previously coded CU will be used*/ |
205 | 1.00M | if(ps_curr_cu->b1_skip_flag || ps_curr_cu->b1_no_residual_syntax_flag) |
206 | 605k | { |
207 | 605k | if(0 == ps_curr_cu->b3_cu_pos_x) |
208 | 206k | i1_qp_left = i1_last_cu_qp; |
209 | 399k | else |
210 | 399k | i1_qp_left = *(pi1_cu_qp_map - 1); |
211 | | |
212 | 605k | if(0 == ps_curr_cu->b3_cu_pos_y) |
213 | 187k | i1_qp_top = i1_last_cu_qp; |
214 | 417k | else |
215 | 417k | i1_qp_top = *(pi1_cu_qp_map - u4_qp_buffer_stride); |
216 | | |
217 | 605k | i1_qp = (i1_qp_left + i1_qp_top + 1) / 2; |
218 | | |
219 | 605k | if(0 == ps_curr_cu->b1_first_cu_in_qg) |
220 | 0 | { |
221 | 0 | i1_qp = i1_last_cu_qp; |
222 | 0 | } |
223 | 605k | } |
224 | 402k | else |
225 | 402k | { |
226 | 402k | i1_qp = ps_curr_cu->i1_cu_qp; |
227 | 402k | } |
228 | | |
229 | 1.00M | i1_last_cu_qp = i1_qp; |
230 | | |
231 | | /*---- Loop for populating Qp map for the current CU -------*/ |
232 | 5.55M | for(u1_vert_4x4 = 0; u1_vert_4x4 < (ps_curr_cu->b4_cu_size * 2); u1_vert_4x4++) |
233 | 4.54M | { |
234 | 36.5M | for(u1_horz_4x4 = 0; u1_horz_4x4 < (ps_curr_cu->b4_cu_size * 2); u1_horz_4x4++) |
235 | 32.0M | { |
236 | 32.0M | pi1_cu_qp_map[u1_horz_4x4] = i1_qp; |
237 | 32.0M | } |
238 | 4.54M | pi1_cu_qp_map += u4_qp_buffer_stride; |
239 | 4.54M | } |
240 | | /*Update Qp-map ptr. Qp map is at 4x4 level but b4_cu_size is at 8x8 level*/ |
241 | 1.00M | ps_curr_cu++; |
242 | 1.00M | } |
243 | 134k | pi1_ctb_qp_map_tile += (ps_frm_ctb_prms->i4_ctb_size / 4); //one qp per 4x4 block. |
244 | 134k | ps_ctb_out++; |
245 | | |
246 | 134k | } //for(ctb_ctr = 0; ctb_ctr < num_ctbs_horz; ctb_ctr++) |
247 | | |
248 | | /*fill into the top4x4_array Qp for the lower CTB-row from bottom part of cur CTB row*/ |
249 | 78.0k | memcpy( |
250 | 78.0k | (pi1_qp_top_4x4_ctb_row + vert_ctr * u4_qp_top_4x4_buf_strd + tile_qp_offset), |
251 | 78.0k | (pi1_ctb_tile_qp + (ps_frm_ctb_prms->i4_ctb_size / 4) * u4_qp_buffer_stride), |
252 | 78.0k | tile_qp_size); |
253 | 78.0k | } |
254 | | |
255 | | /** |
256 | | ******************************************************************************* |
257 | | * |
258 | | * @brief |
259 | | * Deblock CTB level function. |
260 | | * |
261 | | * @par Description: |
262 | | * For a given CTB, deblocking on both vertical and |
263 | | * horizontal edges is done. Both the luma and chroma |
264 | | * blocks are processed |
265 | | * |
266 | | * @param[in] |
267 | | * ps_deblk: Pointer to the deblock context |
268 | | * last_col: if the CTB is the last CTB of current CTB-row value is 1 else 0 |
269 | | * ps_deblk_ctb_row_params: deblk ctb row params |
270 | | * |
271 | | * @returns |
272 | | * |
273 | | * @remarks |
274 | | * None |
275 | | * |
276 | | ******************************************************************************* |
277 | | */ |
278 | | void ihevce_deblk_ctb( |
279 | | deblk_ctb_params_t *ps_deblk, WORD32 last_col, deblk_ctbrow_prms_t *ps_deblk_ctb_row_params) |
280 | 134k | { |
281 | 134k | WORD32 ctb_size; |
282 | 134k | UWORD32 u4_bs; |
283 | 134k | WORD32 bs_lz; /*Leading zeros in boundary strength*/ |
284 | 134k | WORD32 qp_p, qp_q; |
285 | 134k | UWORD8 *pu1_src; |
286 | 134k | UWORD8 *pu1_src_uv; |
287 | 134k | UWORD8 *pu1_curr_src; |
288 | 134k | WORD32 col_size; |
289 | 134k | WORD32 col, row, i4_edge_count; |
290 | 134k | WORD32 num_columns_for_vert_filt; |
291 | 134k | WORD32 num_blks_for_vert_filt; |
292 | 134k | WORD32 num_rows_for_horz_filt; |
293 | | |
294 | 134k | ihevc_deblk_chroma_horz_ft *pf_deblk_chroma_horz; |
295 | 134k | ihevc_deblk_chroma_horz_ft *pf_deblk_chroma_vert; |
296 | | |
297 | | /* Filter flags are packed along with the qp info. |
298 | | 6 out of the 8 bits correspond to qp and 1 to filter flag. */ |
299 | | /* filter_p and filter_q are initialized to 1. |
300 | | They are to be extracted along with the qp info. */ |
301 | 134k | WORD32 filter_p, filter_q; |
302 | 134k | WORD8 *pi1_ctb_row_qp_p, *pi1_ctb_row_qp_temp; |
303 | 134k | WORD8 *pi1_ctb_row_qp_q; |
304 | | |
305 | 134k | func_selector_t *ps_func_slector = ps_deblk->ps_func_selector; |
306 | | |
307 | 134k | WORD32 left_luma_edge_filter_flag = ps_deblk->i4_deblock_left_ctb_edge; |
308 | 134k | WORD32 top_luma_edge_filter_flag = ps_deblk->i4_deblock_top_ctb_edge; |
309 | 134k | WORD32 left_chroma_edge_filter_flag = ps_deblk->i4_deblock_left_ctb_edge; |
310 | 134k | WORD32 top_chroma_edge_filter_flag = ps_deblk->i4_deblock_top_ctb_edge; |
311 | 134k | UWORD32 *bs_vert = ps_deblk_ctb_row_params->pu4_ctb_row_bs_vert; |
312 | 134k | UWORD32 *bs_horz = ps_deblk_ctb_row_params->pu4_ctb_row_bs_horz; |
313 | 134k | UWORD32 *bs_vert_uv = bs_vert; |
314 | 134k | UWORD32 *bs_horz_uv = bs_horz; |
315 | 134k | UWORD32 u4_qp_buffer_stride = ps_deblk_ctb_row_params->u4_qp_buffer_stride; |
316 | 134k | UWORD8 u1_is_422 = (ps_deblk->u1_chroma_array_type == 2); |
317 | | |
318 | 134k | if(u1_is_422) |
319 | 0 | { |
320 | 0 | pf_deblk_chroma_horz = ps_func_slector->ihevc_deblk_422chroma_horz_fptr; |
321 | 0 | pf_deblk_chroma_vert = ps_func_slector->ihevc_deblk_422chroma_vert_fptr; |
322 | 0 | } |
323 | 134k | else |
324 | 134k | { |
325 | 134k | pf_deblk_chroma_horz = ps_func_slector->ihevc_deblk_chroma_horz_fptr; |
326 | 134k | pf_deblk_chroma_vert = ps_func_slector->ihevc_deblk_chroma_vert_fptr; |
327 | 134k | } |
328 | | |
329 | 134k | ctb_size = ps_deblk->i4_ctb_size; |
330 | | |
331 | | /* The PCM filter flag and bypass trans flag are always set to 1 in encoder profile */ |
332 | | /* Can be removed during optimization */ |
333 | 134k | filter_q = 1; |
334 | 134k | filter_p = 1; |
335 | | |
336 | | ////////////////////////////////////////////////////////////////////////////// |
337 | | /* Luma Veritcal Edge */ |
338 | 134k | pu1_src = ps_deblk->pu1_ctb_y; |
339 | 134k | pi1_ctb_row_qp_temp = ps_deblk_ctb_row_params->pi1_ctb_row_qp + u4_qp_buffer_stride; |
340 | 134k | num_columns_for_vert_filt = ctb_size / 8; |
341 | 134k | num_blks_for_vert_filt = ctb_size / 4; |
342 | | |
343 | 1.21M | for(i4_edge_count = 0; i4_edge_count < num_columns_for_vert_filt; i4_edge_count++) |
344 | 1.07M | { |
345 | 1.07M | u4_bs = *bs_vert; |
346 | | /* get the current 4x4 vertical pointer */ |
347 | 1.07M | pu1_curr_src = pu1_src; |
348 | 1.07M | pi1_ctb_row_qp_q = pi1_ctb_row_qp_temp + (i4_edge_count << 1); |
349 | | |
350 | | /* If the current edge is not the 1st edge of frame or slice */ |
351 | 1.07M | if(1 == left_luma_edge_filter_flag) |
352 | 1.00M | { |
353 | 5.67M | for(row = 0; row < num_blks_for_vert_filt;) |
354 | 4.67M | { |
355 | 4.67M | bs_lz = CLZ(u4_bs) >> 1; |
356 | | /* If BS = 0, skip the egde filtering */ |
357 | 4.67M | if(0 != bs_lz) |
358 | 1.57M | { |
359 | 1.57M | u4_bs = u4_bs << (bs_lz << 1); |
360 | 1.57M | pu1_curr_src += ((bs_lz << 2) * ps_deblk->i4_luma_pic_stride); |
361 | 1.57M | pi1_ctb_row_qp_q += (bs_lz * u4_qp_buffer_stride); |
362 | 1.57M | row += bs_lz; |
363 | 1.57M | continue; |
364 | 1.57M | } |
365 | 3.09M | qp_p = *(pi1_ctb_row_qp_q - 1); |
366 | 3.09M | qp_q = *pi1_ctb_row_qp_q; |
367 | | |
368 | 3.09M | ps_func_slector->ihevc_deblk_luma_vert_fptr( |
369 | 3.09M | pu1_curr_src, |
370 | 3.09M | ps_deblk->i4_luma_pic_stride, |
371 | 3.09M | (u4_bs >> 30), /* bits 31 and 30 are extracted */ |
372 | 3.09M | qp_p, |
373 | 3.09M | qp_q, |
374 | 3.09M | ps_deblk->i4_beta_offset_div2, |
375 | 3.09M | ps_deblk->i4_tc_offset_div2, |
376 | 3.09M | filter_p, |
377 | 3.09M | filter_q); |
378 | | |
379 | 3.09M | u4_bs = u4_bs << 2; |
380 | 3.09M | pu1_curr_src += (ps_deblk->i4_luma_pic_stride << 2); |
381 | 3.09M | pi1_ctb_row_qp_q += u4_qp_buffer_stride; |
382 | 3.09M | row++; |
383 | 3.09M | } |
384 | 1.00M | } |
385 | | |
386 | | /* Increment the boundary strength and src pointer for the next column */ |
387 | 1.07M | bs_vert += 1; |
388 | 1.07M | pu1_src += 8; |
389 | | |
390 | | /* Enable for the next edges of ctb*/ |
391 | 1.07M | left_luma_edge_filter_flag = 1; |
392 | 1.07M | } |
393 | | |
394 | | ////////////////////////////////////////////////////////////////////////////// |
395 | | /* Chroma Veritcal Edge */ |
396 | 134k | pu1_src_uv = ps_deblk->pu1_ctb_uv; |
397 | 134k | pi1_ctb_row_qp_temp = ps_deblk_ctb_row_params->pi1_ctb_row_qp + u4_qp_buffer_stride; |
398 | | |
399 | | /* Column spacing is 4 for each chroma component */ |
400 | | /* and hence 8 when they are interleaved. */ |
401 | | /* But, only those columns with a x co-ordinate */ |
402 | | /* that is divisiblee by 8 are filtered */ |
403 | | /* Hence, denominator is 16 */ |
404 | 134k | num_columns_for_vert_filt = ctb_size / 16; |
405 | | /* blk_size is 4 and chroma_ctb_height is ctb_size/2 */ |
406 | 134k | num_blks_for_vert_filt = (0 == u1_is_422) ? (ctb_size / 2) / 4 : (ctb_size) / 4; |
407 | | |
408 | 674k | for(i4_edge_count = 0; i4_edge_count < num_columns_for_vert_filt; i4_edge_count++) |
409 | 539k | { |
410 | | /* Every alternate boundary strength value is used for 420 chroma */ |
411 | 539k | u4_bs = *(bs_vert_uv) & ((0 == u1_is_422) ? 0x88888888 : 0xaaaaaaaa); |
412 | 539k | pu1_curr_src = pu1_src_uv; |
413 | 539k | pi1_ctb_row_qp_q = pi1_ctb_row_qp_temp + (i4_edge_count << 2); |
414 | | |
415 | | /* If the current edge is not the 1st edge of frame or slice */ |
416 | 539k | if(1 == left_chroma_edge_filter_flag) |
417 | 461k | { |
418 | | /* Each 'bs' is 2 bits long */ |
419 | | /* The divby4 in 420 is */ |
420 | | /* necessitated by the fact that */ |
421 | | /* chroma ctb_ht is half that of luma */ |
422 | 461k | WORD32 i4_log2_num_bits_per_bs = ((0 == u1_is_422) + 1); |
423 | | /* i4_sub_heightC = 2 for 420 */ |
424 | | /* i4_sub_heightC = 1 for 422 */ |
425 | 461k | WORD32 i4_sub_heightC = i4_log2_num_bits_per_bs; |
426 | | |
427 | 2.29M | for(row = 0; row < num_blks_for_vert_filt;) |
428 | 1.83M | { |
429 | 1.83M | bs_lz = CLZ(u4_bs) >> i4_log2_num_bits_per_bs; |
430 | | |
431 | | /* If BS = 0, skip the egde filtering */ |
432 | 1.83M | if(0 != bs_lz) |
433 | 612k | { |
434 | 612k | row += bs_lz; |
435 | 612k | u4_bs = u4_bs << (bs_lz << i4_log2_num_bits_per_bs); |
436 | | /* '<<2' because of blk_size being 4x4 */ |
437 | 612k | pu1_curr_src += ((bs_lz << 2) * ps_deblk->i4_chroma_pic_stride); |
438 | | |
439 | | /* In 420, every alternate QP row is skipped, because chroma height */ |
440 | | /* In 422, no row is skipped */ |
441 | 612k | pi1_ctb_row_qp_q += ((u4_qp_buffer_stride << (i4_sub_heightC - 1)) * bs_lz); |
442 | | |
443 | 612k | continue; |
444 | 612k | } |
445 | | |
446 | 1.21M | qp_p = *(pi1_ctb_row_qp_q - i4_sub_heightC); |
447 | 1.21M | qp_q = *pi1_ctb_row_qp_q; |
448 | | |
449 | 1.21M | pf_deblk_chroma_vert( |
450 | 1.21M | pu1_curr_src, |
451 | 1.21M | ps_deblk->i4_chroma_pic_stride, |
452 | 1.21M | qp_p, |
453 | 1.21M | qp_q, |
454 | 1.21M | ps_deblk->i4_cb_qp_indx_offset, |
455 | 1.21M | ps_deblk->i4_cr_qp_indx_offset, |
456 | 1.21M | ps_deblk->i4_tc_offset_div2, |
457 | 1.21M | filter_p, |
458 | 1.21M | filter_q); |
459 | | |
460 | 1.21M | u4_bs = u4_bs << (1 << i4_log2_num_bits_per_bs); |
461 | 1.21M | pu1_curr_src += (ps_deblk->i4_chroma_pic_stride << 2); |
462 | 1.21M | pi1_ctb_row_qp_q += (u4_qp_buffer_stride << (i4_sub_heightC - 1)); |
463 | 1.21M | row++; |
464 | 1.21M | } |
465 | 461k | } |
466 | | /* Increment the boundary strength by 2 and src pointer for the next column */ |
467 | | /* As the edge filtering happens for alternate column */ |
468 | 539k | bs_vert_uv += 2; |
469 | 539k | pu1_src_uv += 16; |
470 | 539k | left_chroma_edge_filter_flag = 1; |
471 | 539k | } |
472 | | |
473 | | ////////////////////////////////////////////////////////////////////////////// |
474 | | |
475 | | /* Luma Horizontal Edge */ |
476 | 134k | pu1_src = ps_deblk->pu1_ctb_y; |
477 | 134k | col_size = ctb_size / 4; |
478 | | |
479 | | /* If the ctb is the 1st ctb of row, */ |
480 | | /* Decrement the loop count to exclude filtering of last 4 pixels */ |
481 | | /* else shift the src pointer by 4 pixels to do filtering for shifted ctb */ |
482 | 134k | if(ps_deblk->i4_deblock_left_ctb_edge == 1) |
483 | 56.7k | { |
484 | 56.7k | pu1_src -= 4; |
485 | | /*If the ctb is at the horizonatl end of PIC*/ |
486 | | /* Increase the column size to filter last 4 pixels */ |
487 | 56.7k | col_size += last_col; |
488 | 56.7k | } |
489 | 78.0k | else if(!last_col) |
490 | 8.83k | { |
491 | 8.83k | col_size -= 1; |
492 | 8.83k | } |
493 | 134k | { |
494 | 134k | UWORD8 *pu1_src_temp = pu1_src; |
495 | | //pu1_ctb_row_qp_p and pu1_ctb_row_qp_q point to alternate rows |
496 | 134k | pi1_ctb_row_qp_p = ps_deblk_ctb_row_params->pi1_ctb_row_qp; |
497 | | |
498 | 134k | num_rows_for_horz_filt = ctb_size / 8; |
499 | | |
500 | 1.21M | for(i4_edge_count = 0; i4_edge_count < num_rows_for_horz_filt; i4_edge_count++) |
501 | 1.07M | { |
502 | 1.07M | WORD32 col_size_temp = col_size; |
503 | 1.07M | pi1_ctb_row_qp_q = pi1_ctb_row_qp_p + u4_qp_buffer_stride; |
504 | 1.07M | pu1_src = pu1_src_temp + (i4_edge_count * 8 * ps_deblk->i4_luma_pic_stride); |
505 | | |
506 | 1.07M | if(1 == top_luma_edge_filter_flag) |
507 | 990k | { |
508 | | //Deblock the last vertical_4x4_column of previous CTB |
509 | 990k | if(ps_deblk->i4_deblock_left_ctb_edge == 1) |
510 | 433k | { |
511 | 433k | u4_bs = ps_deblk->au1_prev_bs[i4_edge_count] & 0x3; |
512 | 433k | if(u4_bs != 0) |
513 | 117k | { |
514 | 117k | qp_p = *(pi1_ctb_row_qp_p - 1); |
515 | 117k | qp_q = *(pi1_ctb_row_qp_q - 1); |
516 | | |
517 | 117k | ps_func_slector->ihevc_deblk_luma_horz_fptr( |
518 | 117k | pu1_src, |
519 | 117k | ps_deblk->i4_luma_pic_stride, |
520 | 117k | u4_bs, |
521 | 117k | qp_p, |
522 | 117k | qp_q, |
523 | 117k | ps_deblk->i4_beta_offset_div2, |
524 | 117k | ps_deblk->i4_tc_offset_div2, |
525 | 117k | 1, |
526 | 117k | 1); |
527 | 117k | } |
528 | | |
529 | 433k | pu1_src += 4; |
530 | 433k | col_size_temp--; |
531 | 433k | } |
532 | | //Start deblocking current CTB |
533 | 990k | u4_bs = *(bs_horz); |
534 | | |
535 | 5.20M | for(col = 0; col < col_size_temp;) |
536 | 4.21M | { |
537 | 4.21M | bs_lz = CLZ(u4_bs) >> 1; |
538 | 4.21M | if(0 != bs_lz) |
539 | 1.25M | { |
540 | 1.25M | u4_bs = u4_bs << (bs_lz << 1); |
541 | 1.25M | pu1_src += 4 * bs_lz; |
542 | 1.25M | col += bs_lz; |
543 | 1.25M | continue; |
544 | 1.25M | } |
545 | 2.96M | qp_p = *(pi1_ctb_row_qp_p + col); |
546 | 2.96M | qp_q = *(pi1_ctb_row_qp_q + col); |
547 | | |
548 | 2.96M | ps_func_slector->ihevc_deblk_luma_horz_fptr( |
549 | 2.96M | pu1_src, |
550 | 2.96M | ps_deblk->i4_luma_pic_stride, |
551 | 2.96M | u4_bs >> (sizeof(u4_bs) * 8 - 2), |
552 | 2.96M | qp_p, |
553 | 2.96M | qp_q, |
554 | 2.96M | ps_deblk->i4_beta_offset_div2, |
555 | 2.96M | ps_deblk->i4_tc_offset_div2, |
556 | 2.96M | filter_p, |
557 | 2.96M | filter_q); |
558 | | |
559 | 2.96M | pu1_src += 4; |
560 | 2.96M | u4_bs = u4_bs << 2; |
561 | 2.96M | col++; |
562 | 2.96M | } |
563 | | //Store the last vertical_4x4 column of CTB's info for next CTB deblocking |
564 | 990k | u4_bs = *bs_horz; |
565 | 990k | ps_deblk->au1_prev_bs[i4_edge_count] = |
566 | 990k | (UWORD8)(((u4_bs << ((ctb_size >> 1) - 2))) >> 30); |
567 | 990k | } |
568 | 1.07M | bs_horz += 1; |
569 | 1.07M | pi1_ctb_row_qp_p += (u4_qp_buffer_stride << 1); |
570 | 1.07M | top_luma_edge_filter_flag = 1; |
571 | 1.07M | } |
572 | 134k | } |
573 | | |
574 | | ////////////////////////////////////////////////////////////////////////////// |
575 | | /* Chroma Horizontal Edge */ |
576 | 134k | pu1_src_uv = ps_deblk->pu1_ctb_uv; |
577 | 134k | col_size = ctb_size / 8; |
578 | | |
579 | | /* If the ctb is the 1st ctb of row, */ |
580 | | /* Decrement the loop count to exclude filtering of last 4 pixels */ |
581 | | /* else shift the src pointer by 8 (uv) pixels to do filtering for shifted ctb */ |
582 | 134k | if(ps_deblk->i4_deblock_left_ctb_edge == 1) |
583 | 56.7k | { |
584 | 56.7k | pu1_src_uv -= 8; |
585 | | |
586 | | /*If the ctb is at the horizonatl end of PIC*/ |
587 | | /* Increase the column size to filter last 8 (uv) pixels */ |
588 | 56.7k | col_size += last_col; |
589 | 56.7k | } |
590 | 78.0k | else if(!last_col) |
591 | 8.83k | { |
592 | 8.83k | col_size--; |
593 | 8.83k | } |
594 | | |
595 | 134k | { |
596 | 134k | UWORD8 *pu1_src_temp = pu1_src_uv; |
597 | | |
598 | | //pu1_ctb_row_qp_p and pu1_ctb_row_qp_q point to alternate rows |
599 | 134k | pi1_ctb_row_qp_p = ps_deblk_ctb_row_params->pi1_ctb_row_qp; |
600 | 134k | num_rows_for_horz_filt = ctb_size / ((0 == u1_is_422) ? 16 : 8); |
601 | | |
602 | 674k | for(i4_edge_count = 0; i4_edge_count < num_rows_for_horz_filt; i4_edge_count++) |
603 | 539k | { |
604 | 539k | WORD32 col_size_temp = col_size; |
605 | | |
606 | 539k | pi1_ctb_row_qp_q = pi1_ctb_row_qp_p + u4_qp_buffer_stride; |
607 | 539k | pu1_src_uv = pu1_src_temp + (i4_edge_count * 8 * ps_deblk->i4_chroma_pic_stride); |
608 | | |
609 | 539k | if(1 == top_chroma_edge_filter_flag) |
610 | 450k | { |
611 | | //Deblock the last vertical _4x4_column of previous CTB |
612 | 450k | if(ps_deblk->i4_deblock_left_ctb_edge == 1) |
613 | 206k | { |
614 | 206k | u4_bs = ps_deblk->au1_prev_bs_uv[i4_edge_count] & 0x2; |
615 | | |
616 | 206k | if(u4_bs == 2) |
617 | 97.0k | { |
618 | 97.0k | qp_p = *(pi1_ctb_row_qp_p - 1); |
619 | 97.0k | qp_q = *(pi1_ctb_row_qp_q - 1); |
620 | | |
621 | 97.0k | pf_deblk_chroma_horz( |
622 | 97.0k | pu1_src_uv, |
623 | 97.0k | ps_deblk->i4_chroma_pic_stride, |
624 | 97.0k | qp_p, |
625 | 97.0k | qp_q, |
626 | 97.0k | ps_deblk->i4_cb_qp_indx_offset, |
627 | 97.0k | ps_deblk->i4_cr_qp_indx_offset, |
628 | 97.0k | ps_deblk->i4_tc_offset_div2, |
629 | 97.0k | 1, |
630 | 97.0k | 1); |
631 | 97.0k | } |
632 | | |
633 | 206k | pu1_src_uv += 8; |
634 | 206k | col_size_temp--; |
635 | 206k | } |
636 | | |
637 | | //Start deblocking current CTB |
638 | 450k | u4_bs = *(bs_horz_uv)&0x88888888; |
639 | | |
640 | 2.01M | for(col = 0; col < col_size_temp;) |
641 | 1.56M | { |
642 | 1.56M | bs_lz = CLZ(u4_bs) >> 2; |
643 | | |
644 | 1.56M | if(0 != bs_lz) |
645 | 498k | { |
646 | 498k | u4_bs = u4_bs << (bs_lz << 2); |
647 | 498k | pu1_src_uv += (8 * bs_lz); |
648 | | |
649 | 498k | col += bs_lz; |
650 | 498k | continue; |
651 | 498k | } |
652 | | |
653 | 1.06M | qp_p = *(pi1_ctb_row_qp_p + (col << 1)); |
654 | 1.06M | qp_q = *(pi1_ctb_row_qp_q + (col << 1)); |
655 | | |
656 | 1.06M | pf_deblk_chroma_horz( |
657 | 1.06M | pu1_src_uv, |
658 | 1.06M | ps_deblk->i4_chroma_pic_stride, |
659 | 1.06M | qp_p, |
660 | 1.06M | qp_q, |
661 | 1.06M | ps_deblk->i4_cb_qp_indx_offset, |
662 | 1.06M | ps_deblk->i4_cr_qp_indx_offset, |
663 | 1.06M | ps_deblk->i4_tc_offset_div2, |
664 | 1.06M | filter_p, |
665 | 1.06M | filter_q); |
666 | | |
667 | 1.06M | pu1_src_uv += 8; |
668 | 1.06M | u4_bs = u4_bs << 4; |
669 | 1.06M | col++; |
670 | 1.06M | } |
671 | | |
672 | | //Store the last vertical_4x4 column of CTB's info for next CTB deblocking |
673 | 450k | u4_bs = *bs_horz_uv; |
674 | 450k | ps_deblk->au1_prev_bs_uv[i4_edge_count] = |
675 | 450k | (UWORD8)(((u4_bs << ((ctb_size >> 1) - 4))) >> 30); |
676 | 450k | } |
677 | | |
678 | 539k | bs_horz_uv += ((0 == u1_is_422) + 1); |
679 | 539k | pi1_ctb_row_qp_p += (u4_qp_buffer_stride << ((0 == u1_is_422) + 1)); |
680 | 539k | top_chroma_edge_filter_flag = 1; |
681 | 539k | } |
682 | 134k | } |
683 | | |
684 | 134k | return; |
685 | 134k | } |