/src/libhevc/encoder/ihevce_enc_subpel_gen.c
Line | Count | Source |
1 | | /****************************************************************************** |
2 | | * |
3 | | * Copyright (C) 2018 The Android Open Source Project |
4 | | * |
5 | | * Licensed under the Apache License, Version 2.0 (the "License"); |
6 | | * you may not use this file except in compliance with the License. |
7 | | * You may obtain a copy of the License at: |
8 | | * |
9 | | * http://www.apache.org/licenses/LICENSE-2.0 |
10 | | * |
11 | | * Unless required by applicable law or agreed to in writing, software |
12 | | * distributed under the License is distributed on an "AS IS" BASIS, |
13 | | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
14 | | * See the License for the specific language governing permissions and |
15 | | * limitations under the License. |
16 | | * |
17 | | ***************************************************************************** |
18 | | * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore |
19 | | */ |
20 | | |
21 | | /*! |
22 | | ****************************************************************************** |
23 | | * \file ihevce_enc_subpel_gen.c |
24 | | * |
25 | | * \brief |
26 | | * This file contains Padding and Subpel plane generation functions |
27 | | * at CTB level |
28 | | * |
29 | | * \date |
30 | | * 29/12/2012 |
31 | | * |
32 | | * \author |
33 | | * Ittiam |
34 | | * |
35 | | * |
36 | | * List of Functions |
37 | | * - ihevce_suppel_padding() |
38 | | * - ihevce_pad_interp_recon_ctb() |
39 | | * |
40 | | * |
41 | | ****************************************************************************** |
42 | | */ |
43 | | |
44 | | /*****************************************************************************/ |
45 | | /* File Includes */ |
46 | | /*****************************************************************************/ |
47 | | /* System include files */ |
48 | | #include <stdio.h> |
49 | | #include <string.h> |
50 | | #include <stdlib.h> |
51 | | #include <assert.h> |
52 | | #include <stdarg.h> |
53 | | #include <math.h> |
54 | | |
55 | | /* User include files */ |
56 | | #include "ihevc_typedefs.h" |
57 | | #include "itt_video_api.h" |
58 | | #include "ihevce_api.h" |
59 | | |
60 | | #include "rc_cntrl_param.h" |
61 | | #include "rc_frame_info_collector.h" |
62 | | #include "rc_look_ahead_params.h" |
63 | | |
64 | | #include "ihevc_defs.h" |
65 | | #include "ihevc_debug.h" |
66 | | #include "ihevc_macros.h" |
67 | | #include "ihevc_structs.h" |
68 | | #include "ihevc_platform_macros.h" |
69 | | #include "ihevc_deblk.h" |
70 | | #include "ihevc_itrans_recon.h" |
71 | | #include "ihevc_chroma_itrans_recon.h" |
72 | | #include "ihevc_chroma_intra_pred.h" |
73 | | #include "ihevc_intra_pred.h" |
74 | | #include "ihevc_inter_pred.h" |
75 | | #include "ihevc_mem_fns.h" |
76 | | #include "ihevc_padding.h" |
77 | | #include "ihevc_weighted_pred.h" |
78 | | #include "ihevc_sao.h" |
79 | | #include "ihevc_resi_trans.h" |
80 | | #include "ihevc_quant_iquant_ssd.h" |
81 | | #include "ihevc_cabac_tables.h" |
82 | | #include "ihevc_trans_tables.h" |
83 | | #include "ihevc_trans_macros.h" |
84 | | |
85 | | #include "ihevce_defs.h" |
86 | | #include "ihevce_lap_enc_structs.h" |
87 | | #include "ihevce_multi_thrd_structs.h" |
88 | | #include "ihevce_multi_thrd_funcs.h" |
89 | | #include "ihevce_me_common_defs.h" |
90 | | #include "ihevce_had_satd.h" |
91 | | #include "ihevce_error_codes.h" |
92 | | #include "ihevce_bitstream.h" |
93 | | #include "ihevce_cabac.h" |
94 | | #include "ihevce_rdoq_macros.h" |
95 | | #include "ihevce_function_selector.h" |
96 | | #include "ihevce_enc_structs.h" |
97 | | #include "ihevce_global_tables.h" |
98 | | #include "ihevce_cmn_utils_instr_set_router.h" |
99 | | #include "ihevce_entropy_structs.h" |
100 | | #include "ihevce_enc_loop_structs.h" |
101 | | #include "ihevce_enc_loop_utils.h" |
102 | | #include "ihevce_inter_pred.h" |
103 | | #include "ihevce_common_utils.h" |
104 | | |
105 | | /*! |
106 | | ****************************************************************************** |
107 | | * \if Function name : ihevce_suppel_padding \endif |
108 | | * |
109 | | * \brief |
110 | | * Subpel Plane planes Padding Function |
111 | | * |
112 | | * \param[in] pu1_dst : pointer to subpel plane |
113 | | * stride : subpel plane stride same as recon stride |
114 | | * tot_wd : width of the block in subpel plane |
115 | | * tot_ht : hieght of the block in subpel plane |
116 | | * ctb_ctr : ctb horizontal position |
117 | | * vert_ctr : ctb vertical position |
118 | | * ps_frm_ctb_prms : CTB characteristics parameters |
119 | | * \return |
120 | | * None |
121 | | * |
122 | | * |
123 | | * \author |
124 | | * Ittiam |
125 | | * |
126 | | ***************************************************************************** |
127 | | */ |
128 | | void ihevce_subpel_padding( |
129 | | UWORD8 *pu1_dst, |
130 | | WORD32 stride, |
131 | | WORD32 tot_wd, |
132 | | WORD32 tot_ht, |
133 | | WORD32 pad_subpel_x, |
134 | | WORD32 pad_subpel_y, |
135 | | WORD32 ctb_ctr, |
136 | | WORD32 vert_ctr, |
137 | | WORD32 i4_num_ctbs_horz, |
138 | | WORD32 i4_num_ctbs_vert, |
139 | | func_selector_t *ps_func_selector) |
140 | 619k | { |
141 | 619k | ihevc_pad_top_ft *pf_pad_top = ps_func_selector->ihevc_pad_top_fptr; |
142 | 619k | ihevc_pad_bottom_ft *pf_pad_bottom = ps_func_selector->ihevc_pad_bottom_fptr; |
143 | 619k | ihevc_pad_left_luma_ft *pf_pad_left_luma = ps_func_selector->ihevc_pad_left_luma_fptr; |
144 | 619k | ihevc_pad_right_luma_ft *pf_pad_right_luma = ps_func_selector->ihevc_pad_right_luma_fptr; |
145 | | |
146 | 619k | UWORD8 *pu1_dst_tmp = pu1_dst; |
147 | 619k | WORD32 cpy_ht = tot_ht; |
148 | | |
149 | | /* Top padding*/ |
150 | 619k | if(vert_ctr == 0) |
151 | 399k | { |
152 | 399k | PAD_BUF_VER(pu1_dst, stride, tot_wd, pad_subpel_x, pad_subpel_y, pf_pad_top); |
153 | | /*if curr ctb is 1st ctb in ctb row, update dst pointer for Left padding*/ |
154 | 399k | pu1_dst_tmp = pu1_dst - pad_subpel_y * stride; |
155 | 399k | cpy_ht += pad_subpel_y; |
156 | 399k | } |
157 | | /*bottom padding*/ |
158 | 619k | if(vert_ctr == (i4_num_ctbs_vert - 1)) |
159 | 399k | { |
160 | 399k | PAD_BUF_VER( |
161 | 399k | (pu1_dst + (tot_ht * stride)), |
162 | 399k | stride, |
163 | 399k | tot_wd, |
164 | 399k | pad_subpel_x, |
165 | 399k | pad_subpel_y, |
166 | 399k | pf_pad_bottom); |
167 | | /*if curr ctb is 1st ctb in ctb row, update dst pointer for right padding*/ |
168 | 399k | cpy_ht += pad_subpel_y; |
169 | 399k | } |
170 | | |
171 | | /*left padding*/ |
172 | 619k | if(ctb_ctr == 0) |
173 | 353k | { |
174 | 353k | PAD_BUF_HOR(pu1_dst_tmp, stride, cpy_ht, pad_subpel_x, pad_subpel_y, pf_pad_left_luma); |
175 | 353k | } |
176 | | |
177 | | /*right padding*/ |
178 | 619k | if(ctb_ctr == (i4_num_ctbs_horz - 1)) |
179 | 353k | { |
180 | 353k | PAD_BUF_HOR( |
181 | 353k | pu1_dst_tmp + tot_wd, stride, cpy_ht, pad_subpel_x, pad_subpel_y, pf_pad_right_luma); |
182 | 353k | } |
183 | 619k | } |
184 | | |
185 | | /*! |
186 | | ****************************************************************************** |
187 | | * \if Function name : ihevce_pad_interp_recon_ctb \endif |
188 | | * |
189 | | * \brief |
190 | | * Ctb level Subpel Plane generation and padding function |
191 | | * |
192 | | * \param[in] |
193 | | * s_cu_prms : coding unit params structures (recon buffers) |
194 | | * ctb_ctr : ctb horizontal position |
195 | | * vert_ctr : ctb vertical position |
196 | | * ps_frm_ctb_prms : CTB characteristics parameters |
197 | | * i4_dist_nbr_mask : nbr-mask for distributed mode. Should be 0 for standalone |
198 | | * or distributed-single-client mode |
199 | | * |
200 | | * \return |
201 | | * None |
202 | | * |
203 | | * \author |
204 | | * Ittiam |
205 | | * |
206 | | ***************************************************************************** |
207 | | */ |
208 | | void ihevce_pad_interp_recon_ctb( |
209 | | pad_interp_recon_frm_t *ps_pad_interp_recon, |
210 | | WORD32 ctb_ctr, |
211 | | WORD32 vert_ctr, |
212 | | WORD32 quality_preset, |
213 | | frm_ctb_ctxt_t *ps_frm_ctb_prms, |
214 | | WORD16 *pi2_hxhy_interm, |
215 | | WORD32 i4_bitrate_instance_id, |
216 | | func_selector_t *ps_func_selector) |
217 | 206k | { |
218 | 206k | UWORD8 *pu1_src, *pu1_src_uv; |
219 | 206k | WORD32 stride, stride_uv, wd, ht, wd_uv, ht_uv, pad_x, pad_y, pad_subpel_x, pad_subpel_y; |
220 | 206k | WORD32 tot_wd, tot_ht, offset, cpy_ht_y, cpy_ht_uv; |
221 | 206k | WORD32 i4_chroma_vert_pad_default; |
222 | | |
223 | 206k | WORD32 ctb_size = ps_frm_ctb_prms->i4_ctb_size; |
224 | 206k | UWORD8 *pu1_dst_hxfy = ps_pad_interp_recon->pu1_sbpel_hxfy + |
225 | 206k | (vert_ctr * ctb_size * ps_pad_interp_recon->i4_luma_recon_stride) + |
226 | 206k | (ctb_ctr * ctb_size); |
227 | 206k | UWORD8 *pu1_dst_fxhy = ps_pad_interp_recon->pu1_sbpel_fxhy + |
228 | 206k | (vert_ctr * ctb_size * ps_pad_interp_recon->i4_luma_recon_stride) + |
229 | 206k | (ctb_ctr * ctb_size); |
230 | 206k | UWORD8 *pu1_dst_hxhy = ps_pad_interp_recon->pu1_sbpel_hxhy + |
231 | 206k | (vert_ctr * ctb_size * ps_pad_interp_recon->i4_luma_recon_stride) + |
232 | 206k | (ctb_ctr * ctb_size); |
233 | 206k | UWORD8 u1_is_422 = (ps_pad_interp_recon->u1_chroma_array_type == 2); |
234 | | |
235 | 206k | ihevc_inter_pred_ft *pf_inter_pred_luma_horz = |
236 | 206k | ps_func_selector->ihevc_inter_pred_luma_horz_fptr; |
237 | 206k | ihevc_inter_pred_ft *pf_inter_pred_luma_vert = |
238 | 206k | ps_func_selector->ihevc_inter_pred_luma_vert_fptr; |
239 | 206k | ihevc_inter_pred_w16out_ft *pf_inter_pred_luma_horz_w16out = |
240 | 206k | ps_func_selector->ihevc_inter_pred_luma_horz_w16out_fptr; |
241 | 206k | ihevc_inter_pred_w16inp_ft *pf_inter_pred_luma_vert_w16inp = |
242 | 206k | ps_func_selector->ihevc_inter_pred_luma_vert_w16inp_fptr; |
243 | 206k | stride = ps_pad_interp_recon->i4_luma_recon_stride; |
244 | 206k | wd = ps_pad_interp_recon->i4_ctb_size; |
245 | 206k | ht = ps_pad_interp_recon->i4_ctb_size; |
246 | | |
247 | 206k | pu1_src = (UWORD8 *)ps_pad_interp_recon->pu1_luma_recon + (vert_ctr * ctb_size * stride) + |
248 | 206k | (ctb_ctr * ctb_size); |
249 | | |
250 | 206k | stride_uv = ps_pad_interp_recon->i4_chrm_recon_stride; |
251 | 206k | wd_uv = ps_pad_interp_recon->i4_ctb_size; |
252 | 206k | ht_uv = ps_pad_interp_recon->i4_ctb_size >> (0 == u1_is_422); |
253 | | |
254 | 206k | pu1_src_uv = (UWORD8 *)ps_pad_interp_recon->pu1_chrm_recon + |
255 | 206k | (vert_ctr * (ctb_size >> (0 == u1_is_422)) * stride_uv) + (ctb_ctr * ctb_size); |
256 | | |
257 | 206k | pad_x = ALIGN8(NTAPS_LUMA); |
258 | 206k | pad_y = ALIGN8(NTAPS_LUMA); |
259 | 206k | pad_subpel_x = PAD_HORZ - pad_x; |
260 | 206k | pad_subpel_y = PAD_VERT - pad_y; |
261 | | |
262 | 206k | offset = pad_x + (pad_y * stride); |
263 | | |
264 | 206k | tot_wd = wd + (pad_x << 1); |
265 | 206k | tot_ht = ht + (pad_y << 1); |
266 | | |
267 | 206k | i4_chroma_vert_pad_default = PAD_VERT >> (0 == u1_is_422); |
268 | | |
269 | 206k | if(ctb_ctr == (ps_frm_ctb_prms->i4_num_ctbs_horz - 1)) |
270 | 117k | { |
271 | 117k | WORD32 last_ctb_x = |
272 | 117k | ps_frm_ctb_prms->i4_cu_aligned_pic_wd - |
273 | 117k | ((ps_frm_ctb_prms->i4_num_ctbs_horz - 1) * ps_pad_interp_recon->i4_ctb_size); |
274 | 117k | wd = last_ctb_x; |
275 | 117k | wd_uv = last_ctb_x; |
276 | 117k | } |
277 | 206k | if(vert_ctr == (ps_frm_ctb_prms->i4_num_ctbs_vert - 1)) |
278 | 133k | { |
279 | 133k | WORD32 last_ctb_y = |
280 | 133k | ps_frm_ctb_prms->i4_cu_aligned_pic_ht - |
281 | 133k | ((ps_frm_ctb_prms->i4_num_ctbs_vert - 1) * ps_pad_interp_recon->i4_ctb_size); |
282 | 133k | ht = last_ctb_y; |
283 | 133k | ht_uv = last_ctb_y >> (0 == u1_is_422); |
284 | 133k | } |
285 | 206k | tot_ht = ht; |
286 | 206k | tot_wd = wd; |
287 | | |
288 | | /*top padding*/ |
289 | 206k | if(vert_ctr == 0) |
290 | 133k | { |
291 | 133k | tot_ht = pad_y + ht - 8; |
292 | 133k | } |
293 | | /*bottom padding*/ |
294 | 206k | if(vert_ctr == (ps_frm_ctb_prms->i4_num_ctbs_vert - 1)) |
295 | 133k | { |
296 | 133k | tot_ht = pad_y + ht + 8; |
297 | 133k | } |
298 | | |
299 | | /*Left padding*/ |
300 | 206k | if(ctb_ctr == 0) |
301 | 117k | { |
302 | 117k | tot_wd = pad_x + wd - 8; |
303 | 117k | } |
304 | | /*right padding*/ |
305 | 206k | if(ctb_ctr == (ps_frm_ctb_prms->i4_num_ctbs_horz - 1)) |
306 | 117k | { |
307 | 117k | tot_wd = pad_x + wd + 8; |
308 | 117k | } |
309 | | |
310 | 206k | pu1_src -= offset; |
311 | 206k | pu1_dst_hxhy -= offset; |
312 | 206k | pu1_dst_hxfy -= offset; |
313 | 206k | pu1_dst_fxhy -= offset; |
314 | | |
315 | 206k | { |
316 | 206k | tot_wd = ALIGN16(tot_wd); |
317 | 206k | if(0 == |
318 | 206k | i4_bitrate_instance_id) //do the following subpel calculations for reference bit-rate instance only |
319 | 206k | { |
320 | | /* HxFY plane */ |
321 | 206k | pf_inter_pred_luma_horz( |
322 | 206k | pu1_src, |
323 | 206k | pu1_dst_hxfy, |
324 | 206k | stride, |
325 | 206k | stride, |
326 | 206k | (WORD8 *)gai1_hevc_luma_filter_taps[2], |
327 | 206k | tot_ht, |
328 | 206k | tot_wd); |
329 | | |
330 | 206k | pf_inter_pred_luma_vert( |
331 | 206k | pu1_src, |
332 | 206k | pu1_dst_fxhy, |
333 | 206k | stride, |
334 | 206k | stride, |
335 | 206k | (WORD8 *)gai1_hevc_luma_filter_taps[2], |
336 | 206k | tot_ht, |
337 | 206k | tot_wd); |
338 | | |
339 | 206k | pf_inter_pred_luma_horz_w16out( |
340 | 206k | pu1_src - 3 * stride, |
341 | 206k | pi2_hxhy_interm, |
342 | 206k | stride, |
343 | 206k | tot_wd, |
344 | 206k | (WORD8 *)gai1_hevc_luma_filter_taps[2], |
345 | 206k | (tot_ht + 7), |
346 | 206k | tot_wd); |
347 | | |
348 | | /* "Stride" of intermediate buffer in pixels,equals tot_wd */ |
349 | 206k | pf_inter_pred_luma_vert_w16inp( |
350 | 206k | pi2_hxhy_interm + (3 * tot_wd), |
351 | 206k | pu1_dst_hxhy, |
352 | 206k | tot_wd, |
353 | 206k | stride, |
354 | 206k | (WORD8 *)gai1_hevc_luma_filter_taps[2], |
355 | 206k | tot_ht, |
356 | 206k | tot_wd); |
357 | | |
358 | 206k | ihevce_subpel_padding( |
359 | 206k | pu1_dst_fxhy, |
360 | 206k | stride, |
361 | 206k | tot_wd, |
362 | 206k | tot_ht, |
363 | 206k | pad_subpel_x, |
364 | 206k | pad_subpel_y, |
365 | 206k | ctb_ctr, |
366 | 206k | vert_ctr, |
367 | 206k | ps_frm_ctb_prms->i4_num_ctbs_horz, |
368 | 206k | ps_frm_ctb_prms->i4_num_ctbs_vert, |
369 | 206k | ps_func_selector); |
370 | | |
371 | 206k | ihevce_subpel_padding( |
372 | 206k | pu1_dst_hxfy, |
373 | 206k | stride, |
374 | 206k | tot_wd, |
375 | 206k | tot_ht, |
376 | 206k | pad_subpel_x, |
377 | 206k | pad_subpel_y, |
378 | 206k | ctb_ctr, |
379 | 206k | vert_ctr, |
380 | 206k | ps_frm_ctb_prms->i4_num_ctbs_horz, |
381 | 206k | ps_frm_ctb_prms->i4_num_ctbs_vert, |
382 | 206k | ps_func_selector); |
383 | | |
384 | 206k | ihevce_subpel_padding( |
385 | 206k | pu1_dst_hxhy, |
386 | 206k | stride, |
387 | 206k | tot_wd, |
388 | 206k | tot_ht, |
389 | 206k | pad_subpel_x, |
390 | 206k | pad_subpel_y, |
391 | 206k | ctb_ctr, |
392 | 206k | vert_ctr, |
393 | 206k | ps_frm_ctb_prms->i4_num_ctbs_horz, |
394 | 206k | ps_frm_ctb_prms->i4_num_ctbs_vert, |
395 | 206k | ps_func_selector); |
396 | 206k | } |
397 | 206k | } |
398 | 206k | } |
399 | | |
400 | | void ihevce_recon_padding( |
401 | | pad_interp_recon_frm_t *ps_pad_interp_recon, |
402 | | WORD32 ctb_ctr, |
403 | | WORD32 vert_ctr, |
404 | | frm_ctb_ctxt_t *ps_frm_ctb_prms, |
405 | | func_selector_t *ps_func_selector) |
406 | 206k | { |
407 | 206k | UWORD8 *pu1_src, *pu1_src_uv, *pu1_buf_y, *pu1_buf_uv; |
408 | 206k | WORD32 stride, stride_uv, wd, ht, wd_uv, ht_uv; |
409 | 206k | WORD32 cpy_ht_y, cpy_ht_uv; |
410 | 206k | WORD32 i4_chroma_vert_pad_default; |
411 | | |
412 | 206k | WORD32 top_extra_pix = 0, left_extra_pix = 0; |
413 | 206k | WORD32 ctb_size = ps_frm_ctb_prms->i4_ctb_size; |
414 | 206k | UWORD8 u1_is_422 = (ps_pad_interp_recon->u1_chroma_array_type == 2); |
415 | | |
416 | 206k | ihevc_pad_top_ft *pf_pad_top = ps_func_selector->ihevc_pad_top_fptr; |
417 | 206k | ihevc_pad_bottom_ft *pf_pad_bottom = ps_func_selector->ihevc_pad_bottom_fptr; |
418 | 206k | ihevc_pad_left_luma_ft *pf_pad_left_luma = ps_func_selector->ihevc_pad_left_luma_fptr; |
419 | 206k | ihevc_pad_left_chroma_ft *pf_pad_left_chroma = ps_func_selector->ihevc_pad_left_chroma_fptr; |
420 | 206k | ihevc_pad_right_luma_ft *pf_pad_right_luma = ps_func_selector->ihevc_pad_right_luma_fptr; |
421 | 206k | ihevc_pad_right_chroma_ft *pf_pad_right_chroma = ps_func_selector->ihevc_pad_right_chroma_fptr; |
422 | | |
423 | 206k | stride = ps_pad_interp_recon->i4_luma_recon_stride; |
424 | 206k | wd = ps_pad_interp_recon->i4_ctb_size; |
425 | 206k | ht = ps_pad_interp_recon->i4_ctb_size; |
426 | | |
427 | 206k | pu1_src = (UWORD8 *)ps_pad_interp_recon->pu1_luma_recon + (vert_ctr * ctb_size * stride) + |
428 | 206k | (ctb_ctr * ctb_size); |
429 | | |
430 | 206k | stride_uv = ps_pad_interp_recon->i4_chrm_recon_stride; |
431 | 206k | wd_uv = ps_pad_interp_recon->i4_ctb_size; |
432 | 206k | ht_uv = ps_pad_interp_recon->i4_ctb_size >> (0 == u1_is_422); |
433 | | |
434 | 206k | pu1_src_uv = (UWORD8 *)ps_pad_interp_recon->pu1_chrm_recon + |
435 | 206k | (vert_ctr * (ctb_size >> (0 == u1_is_422)) * stride_uv) + (ctb_ctr * ctb_size); |
436 | | |
437 | 206k | i4_chroma_vert_pad_default = PAD_VERT >> (0 == u1_is_422); |
438 | | |
439 | 206k | if(ctb_ctr == (ps_frm_ctb_prms->i4_num_ctbs_horz - 1)) |
440 | 117k | { |
441 | 117k | WORD32 last_ctb_x = |
442 | 117k | ps_frm_ctb_prms->i4_cu_aligned_pic_wd - |
443 | 117k | ((ps_frm_ctb_prms->i4_num_ctbs_horz - 1) * ps_pad_interp_recon->i4_ctb_size); |
444 | 117k | wd = last_ctb_x; |
445 | 117k | wd_uv = last_ctb_x; |
446 | 117k | } |
447 | 206k | if(vert_ctr == (ps_frm_ctb_prms->i4_num_ctbs_vert - 1)) |
448 | 133k | { |
449 | 133k | WORD32 last_ctb_y = |
450 | 133k | ps_frm_ctb_prms->i4_cu_aligned_pic_ht - |
451 | 133k | ((ps_frm_ctb_prms->i4_num_ctbs_vert - 1) * ps_pad_interp_recon->i4_ctb_size); |
452 | 133k | ht = last_ctb_y; |
453 | 133k | ht_uv = last_ctb_y >> (0 == u1_is_422); |
454 | 133k | } |
455 | | |
456 | 206k | pu1_buf_y = pu1_src; |
457 | 206k | pu1_buf_uv = pu1_src_uv; |
458 | 206k | cpy_ht_y = ht; |
459 | 206k | cpy_ht_uv = ht_uv; |
460 | 206k | if(vert_ctr > 0) |
461 | 73.2k | { |
462 | 73.2k | top_extra_pix = 8; |
463 | 73.2k | } |
464 | 206k | if(ctb_ctr > 0) |
465 | 88.6k | { |
466 | 88.6k | left_extra_pix = 8; |
467 | 88.6k | } |
468 | | |
469 | | /*top padding*/ |
470 | 206k | if(vert_ctr == 0) |
471 | 133k | { |
472 | 133k | PAD_BUF_VER( |
473 | 133k | pu1_src - left_extra_pix, stride, wd + left_extra_pix, PAD_HORZ, PAD_VERT, pf_pad_top); |
474 | 133k | PAD_BUF_VER( |
475 | 133k | pu1_src_uv - left_extra_pix, |
476 | 133k | stride_uv, |
477 | 133k | wd_uv + left_extra_pix, |
478 | 133k | PAD_HORZ, |
479 | 133k | i4_chroma_vert_pad_default, |
480 | 133k | pf_pad_top); |
481 | | /*if curr ctb is 1st ctb in ctb row, update dst pointer for Left padding*/ |
482 | 133k | pu1_buf_y = pu1_src - PAD_VERT * stride; |
483 | 133k | pu1_buf_uv = pu1_src_uv - i4_chroma_vert_pad_default * stride_uv; |
484 | 133k | cpy_ht_y += PAD_VERT; |
485 | 133k | cpy_ht_uv += i4_chroma_vert_pad_default; |
486 | 133k | } |
487 | | |
488 | | /*bottom padding*/ |
489 | 206k | if(vert_ctr == (ps_frm_ctb_prms->i4_num_ctbs_vert - 1)) |
490 | 133k | { |
491 | 133k | PAD_BUF_VER( |
492 | 133k | ((pu1_src - left_extra_pix) + (ht * stride)), |
493 | 133k | stride, |
494 | 133k | wd + left_extra_pix, |
495 | 133k | PAD_HORZ, |
496 | 133k | PAD_VERT, |
497 | 133k | pf_pad_bottom); |
498 | 133k | PAD_BUF_VER( |
499 | 133k | ((pu1_src_uv - left_extra_pix) + (ht_uv * stride_uv)), |
500 | 133k | stride_uv, |
501 | 133k | wd_uv + left_extra_pix, |
502 | 133k | PAD_HORZ, |
503 | 133k | i4_chroma_vert_pad_default, |
504 | 133k | pf_pad_bottom); |
505 | | /*if curr ctb is 1st ctb in ctb row, update dst pointer for right padding*/ |
506 | 133k | cpy_ht_y += PAD_VERT; |
507 | 133k | cpy_ht_uv += i4_chroma_vert_pad_default; |
508 | 133k | } |
509 | | |
510 | | /*Left padding*/ |
511 | 206k | if(ctb_ctr == 0) |
512 | 117k | { |
513 | 117k | PAD_BUF_HOR( |
514 | 117k | (pu1_buf_y - top_extra_pix * stride), |
515 | 117k | stride, |
516 | 117k | cpy_ht_y + top_extra_pix, |
517 | 117k | PAD_HORZ, |
518 | 117k | PAD_VERT, |
519 | 117k | pf_pad_left_luma); |
520 | 117k | PAD_BUF_HOR( |
521 | 117k | pu1_buf_uv - (top_extra_pix >> 1) * (u1_is_422 + 1) * stride_uv, |
522 | 117k | stride_uv, |
523 | 117k | cpy_ht_uv + (top_extra_pix >> 1) * (u1_is_422 + 1), |
524 | 117k | PAD_HORZ, |
525 | 117k | i4_chroma_vert_pad_default, |
526 | 117k | pf_pad_left_chroma); |
527 | 117k | } |
528 | | |
529 | | /*right padding*/ |
530 | 206k | if(ctb_ctr == (ps_frm_ctb_prms->i4_num_ctbs_horz - 1)) |
531 | 117k | { |
532 | 117k | PAD_BUF_HOR( |
533 | 117k | ((pu1_buf_y - (top_extra_pix * stride)) + wd), |
534 | 117k | stride, |
535 | 117k | cpy_ht_y + top_extra_pix, |
536 | 117k | PAD_HORZ, |
537 | 117k | PAD_VERT, |
538 | 117k | pf_pad_right_luma); |
539 | 117k | PAD_BUF_HOR( |
540 | 117k | ((pu1_buf_uv - ((top_extra_pix >> 1) * (u1_is_422 + 1) * stride_uv)) + wd_uv), |
541 | 117k | stride_uv, |
542 | 117k | cpy_ht_uv + (top_extra_pix >> 1) * (u1_is_422 + 1), |
543 | 117k | PAD_HORZ, |
544 | 117k | i4_chroma_vert_pad_default, |
545 | 117k | pf_pad_right_chroma); |
546 | 117k | } |
547 | 206k | } |
548 | | |
549 | | void ihevce_pad_interp_recon_src_ctb( |
550 | | pad_interp_recon_frm_t *ps_pad_interp_recon, |
551 | | WORD32 ctb_ctr, |
552 | | WORD32 vert_ctr, |
553 | | frm_ctb_ctxt_t *ps_frm_ctb_prms, |
554 | | WORD32 i4_bitrate_instance_id, |
555 | | func_selector_t *ps_func_selector, |
556 | | WORD32 is_chroma_needs_padding) |
557 | 0 | { |
558 | 0 | UWORD8 *pu1_src, *pu1_src_uv; |
559 | 0 | WORD32 stride, stride_uv, wd, ht, wd_uv, ht_uv, pad_x, pad_y; |
560 | 0 | WORD32 tot_wd, tot_ht; |
561 | 0 | WORD32 i4_chroma_vert_pad_default; |
562 | |
|
563 | 0 | WORD32 ctb_size = ps_frm_ctb_prms->i4_ctb_size; |
564 | 0 | UWORD8 u1_is_422 = (ps_pad_interp_recon->u1_chroma_array_type == 2); |
565 | |
|
566 | 0 | ihevc_pad_top_ft *pf_pad_top = ps_func_selector->ihevc_pad_top_fptr; |
567 | 0 | ihevc_pad_bottom_ft *pf_pad_bottom = ps_func_selector->ihevc_pad_bottom_fptr; |
568 | 0 | ihevc_pad_left_luma_ft *pf_pad_left_luma = ps_func_selector->ihevc_pad_left_luma_fptr; |
569 | 0 | ihevc_pad_left_chroma_ft *pf_pad_left_chroma = ps_func_selector->ihevc_pad_left_chroma_fptr; |
570 | 0 | ihevc_pad_right_luma_ft *pf_pad_right_luma = ps_func_selector->ihevc_pad_right_luma_fptr; |
571 | 0 | ihevc_pad_right_chroma_ft *pf_pad_right_chroma = ps_func_selector->ihevc_pad_right_chroma_fptr; |
572 | | |
573 | | /* Luma padding */ |
574 | 0 | pu1_src = (UWORD8 *)ps_pad_interp_recon->pu1_luma_recon_src + |
575 | 0 | (vert_ctr * ctb_size * ps_pad_interp_recon->i4_luma_recon_stride) + |
576 | 0 | (ctb_ctr * ctb_size); |
577 | |
|
578 | 0 | stride = ps_pad_interp_recon->i4_luma_recon_stride; |
579 | 0 | wd = ps_pad_interp_recon->i4_ctb_size; |
580 | 0 | ht = ps_pad_interp_recon->i4_ctb_size; |
581 | |
|
582 | 0 | pu1_src_uv = |
583 | 0 | (UWORD8 *)ps_pad_interp_recon->pu1_chrm_recon_src + |
584 | 0 | (vert_ctr * (ctb_size >> (0 == u1_is_422)) * ps_pad_interp_recon->i4_chrm_recon_stride) + |
585 | 0 | (ctb_ctr * ctb_size); |
586 | |
|
587 | 0 | stride_uv = ps_pad_interp_recon->i4_chrm_recon_stride; |
588 | 0 | wd_uv = ps_pad_interp_recon->i4_ctb_size; |
589 | 0 | ht_uv = ps_pad_interp_recon->i4_ctb_size >> (0 == u1_is_422); |
590 | |
|
591 | 0 | pad_x = ALIGN8(NTAPS_LUMA); |
592 | 0 | pad_y = ALIGN8(NTAPS_LUMA); |
593 | |
|
594 | 0 | tot_wd = wd + (pad_x << 1); |
595 | 0 | tot_ht = ht + (pad_y << 1); |
596 | |
|
597 | 0 | i4_chroma_vert_pad_default = PAD_VERT >> (0 == u1_is_422); |
598 | |
|
599 | 0 | if(ctb_ctr == (ps_frm_ctb_prms->i4_num_ctbs_horz - 1)) |
600 | 0 | { |
601 | 0 | WORD32 last_ctb_x = |
602 | 0 | ps_frm_ctb_prms->i4_cu_aligned_pic_wd - |
603 | 0 | ((ps_frm_ctb_prms->i4_num_ctbs_horz - 1) * ps_pad_interp_recon->i4_ctb_size); |
604 | 0 | wd = last_ctb_x; |
605 | 0 | wd_uv = last_ctb_x; |
606 | 0 | } |
607 | 0 | if(vert_ctr == (ps_frm_ctb_prms->i4_num_ctbs_vert - 1)) |
608 | 0 | { |
609 | 0 | WORD32 last_ctb_y = |
610 | 0 | ps_frm_ctb_prms->i4_cu_aligned_pic_ht - |
611 | 0 | ((ps_frm_ctb_prms->i4_num_ctbs_vert - 1) * ps_pad_interp_recon->i4_ctb_size); |
612 | 0 | ht = last_ctb_y; |
613 | 0 | ht_uv = last_ctb_y >> (0 == u1_is_422); |
614 | 0 | } |
615 | |
|
616 | 0 | if(ctb_ctr == 0) |
617 | 0 | { |
618 | 0 | if(vert_ctr == 0) |
619 | 0 | { |
620 | 0 | PAD_BUF_HOR(pu1_src, stride, ht, PAD_HORZ, PAD_VERT, pf_pad_left_luma); |
621 | 0 | PAD_BUF_VER(pu1_src - PAD_HORZ, stride, PAD_HORZ + wd, PAD_HORZ, PAD_VERT, pf_pad_top); |
622 | 0 | if(is_chroma_needs_padding) |
623 | 0 | { |
624 | 0 | PAD_BUF_HOR( |
625 | 0 | pu1_src_uv, |
626 | 0 | stride_uv, |
627 | 0 | ht_uv, |
628 | 0 | PAD_HORZ, |
629 | 0 | i4_chroma_vert_pad_default, |
630 | 0 | pf_pad_left_chroma); |
631 | 0 | PAD_BUF_VER( |
632 | 0 | pu1_src_uv - PAD_HORZ, |
633 | 0 | stride_uv, |
634 | 0 | PAD_HORZ + wd_uv, |
635 | 0 | PAD_HORZ, |
636 | 0 | i4_chroma_vert_pad_default, |
637 | 0 | pf_pad_top); |
638 | 0 | } |
639 | 0 | } |
640 | 0 | else if(vert_ctr == (ps_frm_ctb_prms->i4_num_ctbs_vert - 1)) |
641 | 0 | { |
642 | 0 | PAD_BUF_HOR(pu1_src - 8 * stride, stride, ht + 8, PAD_HORZ, PAD_VERT, pf_pad_left_luma); |
643 | 0 | PAD_BUF_VER( |
644 | 0 | (pu1_src - PAD_HORZ + (ht * stride)), |
645 | 0 | stride, |
646 | 0 | PAD_HORZ + wd, |
647 | 0 | PAD_HORZ, |
648 | 0 | PAD_VERT, |
649 | 0 | pf_pad_bottom); |
650 | 0 | if(is_chroma_needs_padding) |
651 | 0 | { |
652 | 0 | PAD_BUF_HOR( |
653 | 0 | pu1_src_uv - 4 * (u1_is_422 + 1) * stride_uv, |
654 | 0 | stride_uv, |
655 | 0 | ht_uv + 4 * (u1_is_422 + 1), |
656 | 0 | PAD_HORZ, |
657 | 0 | i4_chroma_vert_pad_default, |
658 | 0 | pf_pad_left_chroma); |
659 | 0 | PAD_BUF_VER( |
660 | 0 | (pu1_src_uv - PAD_HORZ + (ht_uv * stride_uv)), |
661 | 0 | stride_uv, |
662 | 0 | PAD_HORZ + wd_uv, |
663 | 0 | PAD_HORZ, |
664 | 0 | i4_chroma_vert_pad_default, |
665 | 0 | pf_pad_bottom); |
666 | 0 | } |
667 | 0 | } |
668 | 0 | else |
669 | 0 | { |
670 | 0 | PAD_BUF_HOR(pu1_src - 8 * stride, stride, ht + 8, PAD_HORZ, PAD_VERT, pf_pad_left_luma); |
671 | 0 | if(is_chroma_needs_padding) |
672 | 0 | { |
673 | 0 | PAD_BUF_HOR( |
674 | 0 | pu1_src_uv - 4 * (u1_is_422 + 1) * stride_uv, |
675 | 0 | stride_uv, |
676 | 0 | ht_uv + 4 * (u1_is_422 + 1), |
677 | 0 | PAD_HORZ, |
678 | 0 | i4_chroma_vert_pad_default, |
679 | 0 | pf_pad_left_chroma); |
680 | 0 | } |
681 | 0 | } |
682 | 0 | } |
683 | 0 | else if(ctb_ctr == (ps_frm_ctb_prms->i4_num_ctbs_horz - 1)) |
684 | 0 | { |
685 | 0 | if(vert_ctr == 0) |
686 | 0 | { |
687 | 0 | PAD_BUF_HOR(pu1_src + wd, stride, ht, PAD_HORZ, PAD_VERT, pf_pad_right_luma); |
688 | 0 | PAD_BUF_VER(pu1_src - 8, stride, PAD_HORZ + (wd + 8), PAD_HORZ, PAD_VERT, pf_pad_top); |
689 | 0 | if(is_chroma_needs_padding) |
690 | 0 | { |
691 | 0 | PAD_BUF_HOR( |
692 | 0 | pu1_src_uv + wd_uv, |
693 | 0 | stride_uv, |
694 | 0 | ht_uv, |
695 | 0 | PAD_HORZ, |
696 | 0 | i4_chroma_vert_pad_default, |
697 | 0 | pf_pad_right_chroma); |
698 | 0 | PAD_BUF_VER( |
699 | 0 | pu1_src_uv - 8, |
700 | 0 | stride_uv, |
701 | 0 | PAD_HORZ + (wd_uv + 8), |
702 | 0 | PAD_HORZ, |
703 | 0 | i4_chroma_vert_pad_default, |
704 | 0 | pf_pad_top); |
705 | 0 | } |
706 | 0 | } |
707 | 0 | else if(vert_ctr == (ps_frm_ctb_prms->i4_num_ctbs_vert - 1)) |
708 | 0 | { |
709 | 0 | PAD_BUF_HOR( |
710 | 0 | (pu1_src - (8 * stride) + wd), |
711 | 0 | stride, |
712 | 0 | ht + 8, |
713 | 0 | PAD_HORZ, |
714 | 0 | PAD_VERT, |
715 | 0 | pf_pad_right_luma); |
716 | 0 | PAD_BUF_VER( |
717 | 0 | (pu1_src - 8 + (ht * stride)), |
718 | 0 | stride, |
719 | 0 | PAD_HORZ + (wd + 8), |
720 | 0 | PAD_HORZ, |
721 | 0 | PAD_VERT, |
722 | 0 | pf_pad_bottom); |
723 | 0 | if(is_chroma_needs_padding) |
724 | 0 | { |
725 | 0 | PAD_BUF_HOR( |
726 | 0 | (pu1_src_uv - (4 * (u1_is_422 + 1) * stride_uv) + wd_uv), |
727 | 0 | stride_uv, |
728 | 0 | ht_uv + 4 * (u1_is_422 + 1), |
729 | 0 | PAD_HORZ, |
730 | 0 | i4_chroma_vert_pad_default, |
731 | 0 | pf_pad_right_chroma); |
732 | 0 | PAD_BUF_VER( |
733 | 0 | (pu1_src_uv - 8 + (ht_uv * stride_uv)), |
734 | 0 | stride_uv, |
735 | 0 | PAD_HORZ + (wd_uv + 8), |
736 | 0 | PAD_HORZ, |
737 | 0 | i4_chroma_vert_pad_default, |
738 | 0 | pf_pad_bottom); |
739 | 0 | } |
740 | 0 | } |
741 | 0 | else |
742 | 0 | { |
743 | 0 | PAD_BUF_HOR( |
744 | 0 | (pu1_src - (8 * stride) + wd), |
745 | 0 | stride, |
746 | 0 | ht + 8, |
747 | 0 | PAD_HORZ, |
748 | 0 | PAD_VERT, |
749 | 0 | pf_pad_right_luma); |
750 | 0 | if(is_chroma_needs_padding) |
751 | 0 | { |
752 | 0 | PAD_BUF_HOR( |
753 | 0 | (pu1_src_uv - (4 * (u1_is_422 + 1) * stride_uv) + wd_uv), |
754 | 0 | stride_uv, |
755 | 0 | ht_uv + 4 * (u1_is_422 + 1), |
756 | 0 | PAD_HORZ, |
757 | 0 | i4_chroma_vert_pad_default, |
758 | 0 | pf_pad_right_chroma); |
759 | 0 | } |
760 | 0 | } |
761 | 0 | } |
762 | 0 | else if(vert_ctr == 0) |
763 | 0 | { |
764 | 0 | PAD_BUF_VER(pu1_src - 8, stride, (wd + 8), PAD_HORZ, PAD_VERT, pf_pad_top); |
765 | 0 | if(is_chroma_needs_padding) |
766 | 0 | { |
767 | 0 | PAD_BUF_VER( |
768 | 0 | pu1_src_uv - 8, |
769 | 0 | stride_uv, |
770 | 0 | (wd_uv + 8), |
771 | 0 | PAD_HORZ, |
772 | 0 | i4_chroma_vert_pad_default, |
773 | 0 | pf_pad_top); |
774 | 0 | } |
775 | 0 | } |
776 | 0 | else if(vert_ctr == (ps_frm_ctb_prms->i4_num_ctbs_vert - 1)) |
777 | 0 | { |
778 | 0 | PAD_BUF_VER( |
779 | 0 | (pu1_src - 8 + (ht * stride)), stride, (wd + 8), PAD_HORZ, PAD_VERT, pf_pad_bottom); |
780 | 0 | if(is_chroma_needs_padding) |
781 | 0 | { |
782 | 0 | PAD_BUF_VER( |
783 | 0 | (pu1_src_uv - 8 + (ht_uv * stride_uv)), |
784 | 0 | stride_uv, |
785 | 0 | (wd_uv + 8), |
786 | 0 | PAD_HORZ, |
787 | 0 | i4_chroma_vert_pad_default, |
788 | 0 | pf_pad_bottom); |
789 | 0 | } |
790 | 0 | } |
791 | 0 | } |