/src/libhevc/encoder/ihevce_inter_pred.c
Line | Count | Source |
1 | | /****************************************************************************** |
2 | | * |
3 | | * Copyright (C) 2018 The Android Open Source Project |
4 | | * |
5 | | * Licensed under the Apache License, Version 2.0 (the "License"); |
6 | | * you may not use this file except in compliance with the License. |
7 | | * You may obtain a copy of the License at: |
8 | | * |
9 | | * http://www.apache.org/licenses/LICENSE-2.0 |
10 | | * |
11 | | * Unless required by applicable law or agreed to in writing, software |
12 | | * distributed under the License is distributed on an "AS IS" BASIS, |
13 | | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
14 | | * See the License for the specific language governing permissions and |
15 | | * limitations under the License. |
16 | | * |
17 | | ***************************************************************************** |
18 | | * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore |
19 | | */ |
20 | | |
21 | | /** |
22 | | ******************************************************************************* |
23 | | * @file |
24 | | * ihevce_inter_pred.c |
25 | | * |
26 | | * @brief |
27 | | * Contains funtions for giving out prediction samples for a given pu |
28 | | * |
29 | | * @author |
30 | | * Ittiam |
31 | | * |
32 | | * @par List of Functions: |
33 | | * - ihevc_inter_pred() |
34 | | * |
35 | | * |
36 | | ******************************************************************************* |
37 | | */ |
38 | | /* System include files */ |
39 | | #include <stdio.h> |
40 | | #include <string.h> |
41 | | #include <stdlib.h> |
42 | | #include <assert.h> |
43 | | #include <stdarg.h> |
44 | | #include <math.h> |
45 | | |
46 | | /* User include files */ |
47 | | #include "ihevc_typedefs.h" |
48 | | #include "itt_video_api.h" |
49 | | #include "ihevce_api.h" |
50 | | |
51 | | #include "rc_cntrl_param.h" |
52 | | #include "rc_frame_info_collector.h" |
53 | | #include "rc_look_ahead_params.h" |
54 | | |
55 | | #include "ihevc_debug.h" |
56 | | #include "ihevc_defs.h" |
57 | | #include "ihevc_structs.h" |
58 | | #include "ihevc_platform_macros.h" |
59 | | #include "ihevc_deblk.h" |
60 | | #include "ihevc_itrans_recon.h" |
61 | | #include "ihevc_chroma_itrans_recon.h" |
62 | | #include "ihevc_chroma_intra_pred.h" |
63 | | #include "ihevc_intra_pred.h" |
64 | | #include "ihevc_inter_pred.h" |
65 | | #include "ihevc_mem_fns.h" |
66 | | #include "ihevc_padding.h" |
67 | | #include "ihevc_weighted_pred.h" |
68 | | #include "ihevc_sao.h" |
69 | | #include "ihevc_resi_trans.h" |
70 | | #include "ihevc_quant_iquant_ssd.h" |
71 | | #include "ihevc_cabac_tables.h" |
72 | | |
73 | | #include "ihevce_defs.h" |
74 | | #include "ihevce_lap_enc_structs.h" |
75 | | #include "ihevce_multi_thrd_structs.h" |
76 | | #include "ihevce_me_common_defs.h" |
77 | | #include "ihevce_had_satd.h" |
78 | | #include "ihevce_error_codes.h" |
79 | | #include "ihevce_bitstream.h" |
80 | | #include "ihevce_cabac.h" |
81 | | #include "ihevce_rdoq_macros.h" |
82 | | #include "ihevce_function_selector.h" |
83 | | #include "ihevce_enc_structs.h" |
84 | | #include "ihevce_entropy_structs.h" |
85 | | #include "ihevce_cmn_utils_instr_set_router.h" |
86 | | #include "ihevce_enc_loop_structs.h" |
87 | | #include "ihevce_inter_pred.h" |
88 | | #include "ihevc_weighted_pred.h" |
89 | | |
90 | | /*****************************************************************************/ |
91 | | /* Global tables */ |
92 | | /*****************************************************************************/ |
93 | | |
94 | | /** |
95 | | ****************************************************************************** |
96 | | * @brief Table of filter tap coefficients for HEVC luma inter prediction |
97 | | * input : sub pel mv position (dx/dy = 0:3) |
98 | | * output : filter coeffs to be used for that position |
99 | | * |
100 | | * @remarks See section 8.5.2.2.2.1 Luma sample interpolation process of HEVC |
101 | | ****************************************************************************** |
102 | | */ |
103 | | WORD8 gai1_hevc_luma_filter_taps[4][NTAPS_LUMA] = { { 0, 0, 0, 64, 0, 0, 0, 0 }, |
104 | | { -1, 4, -10, 58, 17, -5, 1, 0 }, |
105 | | { -1, 4, -11, 40, 40, -11, 4, -1 }, |
106 | | { 0, 1, -5, 17, 58, -10, 4, -1 } }; |
107 | | |
108 | | /** |
109 | | ****************************************************************************** |
110 | | * @brief Table of filter tap coefficients for HEVC chroma inter prediction |
111 | | * input : chroma sub pel mv position (dx/dy = 0:7) |
112 | | * output : filter coeffs to be used for that position |
113 | | * |
114 | | * @remarks See section 8.5.2.2.2.2 Chroma sample interpolation process of HEVC |
115 | | The filter uses only the first four elements in each array |
116 | | ****************************************************************************** |
117 | | */ |
118 | | WORD8 gai1_hevc_chroma_filter_taps[8][NTAPS_CHROMA] = { { 0, 64, 0, 0 }, { -2, 58, 10, -2 }, |
119 | | { -4, 54, 16, -2 }, { -6, 46, 28, -4 }, |
120 | | { -4, 36, 36, -4 }, { -4, 28, 46, -6 }, |
121 | | { -2, 16, 54, -4 }, { -2, 10, 58, -2 } }; |
122 | | |
123 | | /*****************************************************************************/ |
124 | | /* Function Definitions */ |
125 | | /*****************************************************************************/ |
126 | | |
127 | | /** |
128 | | ******************************************************************************* |
129 | | * |
130 | | * @brief |
131 | | * Performs Luma inter pred based on sub pel position dxdy and store the result |
132 | | * in a 16 bit destination buffer |
133 | | * |
134 | | * @param[in] pu1_src |
135 | | * pointer to the source correspoding to integer pel position of a mv (left and |
136 | | * top justified integer position) |
137 | | * |
138 | | * @param[out] pi2_dst |
139 | | * WORD16 pointer to the destination |
140 | | * |
141 | | * @param[in] src_strd |
142 | | * source buffer stride |
143 | | * |
144 | | * @param[in] dst_strd |
145 | | * destination buffer stride |
146 | | * |
147 | | * @param[in] pi2_hdst_scratch |
148 | | * scratch buffer for intermediate storage of horizontal filter output; used as |
149 | | * input for vertical filtering when sub pel components (dx != 0) && (dy != 0) |
150 | | * |
151 | | * Max scratch buffer required is w * (h + 7) * sizeof(WORD16) |
152 | | * |
153 | | * @param[in] ht |
154 | | * width of the prediction unit |
155 | | * |
156 | | * @param[in] wd |
157 | | * width of the prediction unit |
158 | | * |
159 | | * @param[in] dx |
160 | | * qpel position[0:3] of mv in x direction |
161 | | * |
162 | | * @param[in] dy |
163 | | * qpel position[0:3] of mv in y direction |
164 | | * |
165 | | * @returns |
166 | | * none |
167 | | * |
168 | | * @remarks |
169 | | * |
170 | | ******************************************************************************* |
171 | | */ |
172 | | void ihevce_luma_interpolate_16bit_dxdy( |
173 | | UWORD8 *pu1_src, |
174 | | WORD16 *pi2_dst, |
175 | | WORD32 src_strd, |
176 | | WORD32 dst_strd, |
177 | | WORD16 *pi2_hdst_scratch, |
178 | | WORD32 ht, |
179 | | WORD32 wd, |
180 | | WORD32 dy, |
181 | | WORD32 dx, |
182 | | func_selector_t *ps_func_selector) |
183 | 3.67M | { |
184 | 3.67M | if((0 == dx) && (0 == dy)) |
185 | 2.87M | { |
186 | | /*--------- full pel position : copy input by upscaling-------*/ |
187 | | |
188 | 2.87M | ps_func_selector->ihevc_inter_pred_luma_copy_w16out_fptr( |
189 | 2.87M | pu1_src, pi2_dst, src_strd, dst_strd, &gai1_hevc_luma_filter_taps[0][0], ht, wd); |
190 | 2.87M | } |
191 | 804k | else if((0 != dx) && (0 != dy)) |
192 | 257k | { |
193 | | /*----------sub pel in both x and y direction---------*/ |
194 | | |
195 | 257k | UWORD8 *pu1_horz_src = pu1_src - (3 * src_strd); |
196 | 257k | WORD32 hdst_buf_stride = wd; |
197 | 257k | WORD16 *pi2_vert_src = pi2_hdst_scratch + (3 * hdst_buf_stride); |
198 | | |
199 | | /* horizontal filtering of source done in a scratch buffer first */ |
200 | 257k | ps_func_selector->ihevc_inter_pred_luma_horz_w16out_fptr( |
201 | 257k | pu1_horz_src, |
202 | 257k | pi2_hdst_scratch, |
203 | 257k | src_strd, |
204 | 257k | hdst_buf_stride, |
205 | 257k | &gai1_hevc_luma_filter_taps[dx][0], |
206 | 257k | (ht + NTAPS_LUMA - 1), |
207 | 257k | wd); |
208 | | |
209 | | /* vertical filtering on scratch buffer and stored in desitnation */ |
210 | 257k | ps_func_selector->ihevc_inter_pred_luma_vert_w16inp_w16out_fptr( |
211 | 257k | pi2_vert_src, |
212 | 257k | pi2_dst, |
213 | 257k | hdst_buf_stride, |
214 | 257k | dst_strd, |
215 | 257k | &gai1_hevc_luma_filter_taps[dy][0], |
216 | 257k | ht, |
217 | 257k | wd); |
218 | 257k | } |
219 | 547k | else if(0 == dy) |
220 | 236k | { |
221 | | /*----------sub pel in x direction only ---------*/ |
222 | | |
223 | 236k | ps_func_selector->ihevc_inter_pred_luma_horz_w16out_fptr( |
224 | 236k | pu1_src, pi2_dst, src_strd, dst_strd, &gai1_hevc_luma_filter_taps[dx][0], ht, wd); |
225 | 236k | } |
226 | 311k | else /* if (0 == dx) */ |
227 | 311k | { |
228 | | /*----------sub pel in y direction only ---------*/ |
229 | | |
230 | 311k | ps_func_selector->ihevc_inter_pred_luma_vert_w16out_fptr( |
231 | 311k | pu1_src, pi2_dst, src_strd, dst_strd, &gai1_hevc_luma_filter_taps[dy][0], ht, wd); |
232 | 311k | } |
233 | 3.67M | } |
234 | | |
235 | | /** |
236 | | ******************************************************************************* |
237 | | * |
238 | | * @brief |
239 | | * Performs Luma inter pred based on sub pel position dxdy and store the result |
240 | | * in a 8 bit destination buffer |
241 | | * |
242 | | * @param[in] pu1_src |
243 | | * pointer to the source correspoding to integer pel position of a mv (left and |
244 | | * top justified integer position) |
245 | | * |
246 | | * @param[out] pu1_dst |
247 | | * UWORD8 pointer to the destination |
248 | | * |
249 | | * @param[in] src_strd |
250 | | * source buffer stride |
251 | | * |
252 | | * @param[in] dst_strd |
253 | | * destination buffer stride |
254 | | * |
255 | | * @param[in] pi2_hdst_scratch |
256 | | * scratch buffer for intermediate storage of horizontal filter output; used as |
257 | | * input for vertical filtering when sub pel components (dx != 0) && (dy != 0) |
258 | | * |
259 | | * Max scratch buffer required is w * (h + 7) * sizeof(WORD16) |
260 | | * |
261 | | * @param[in] ht |
262 | | * width of the prediction unit |
263 | | * |
264 | | * @param[in] wd |
265 | | * width of the prediction unit |
266 | | * |
267 | | * @param[in] dx |
268 | | * qpel position[0:3] of mv in x direction |
269 | | * |
270 | | * @param[in] dy |
271 | | * qpel position[0:3] of mv in y direction |
272 | | * |
273 | | * @returns |
274 | | * none |
275 | | * |
276 | | * @remarks |
277 | | * |
278 | | ******************************************************************************* |
279 | | */ |
280 | | void ihevce_luma_interpolate_8bit_dxdy( |
281 | | UWORD8 *pu1_src, |
282 | | UWORD8 *pu1_dst, |
283 | | WORD32 src_strd, |
284 | | WORD32 dst_strd, |
285 | | WORD16 *pi2_hdst_scratch, |
286 | | WORD32 ht, |
287 | | WORD32 wd, |
288 | | WORD32 dy, |
289 | | WORD32 dx, |
290 | | func_selector_t *ps_func_selector) |
291 | 6.43M | { |
292 | 6.43M | if((0 == dx) && (0 == dy)) |
293 | 5.43M | { |
294 | | /*--------- full pel position : copy input as is -------*/ |
295 | | |
296 | 5.43M | ps_func_selector->ihevc_inter_pred_luma_copy_fptr( |
297 | 5.43M | pu1_src, pu1_dst, src_strd, dst_strd, &gai1_hevc_luma_filter_taps[0][0], ht, wd); |
298 | 5.43M | } |
299 | 993k | else if((0 != dx) && (0 != dy)) |
300 | 340k | { |
301 | | /*----------sub pel in both x and y direction---------*/ |
302 | | |
303 | 340k | UWORD8 *pu1_horz_src = pu1_src - (3 * src_strd); |
304 | 340k | WORD32 hdst_buf_stride = wd; |
305 | 340k | WORD16 *pi2_vert_src = pi2_hdst_scratch + (3 * hdst_buf_stride); |
306 | | |
307 | | /* horizontal filtering of source done in a scratch buffer first */ |
308 | 340k | ps_func_selector->ihevc_inter_pred_luma_horz_w16out_fptr( |
309 | 340k | pu1_horz_src, |
310 | 340k | pi2_hdst_scratch, |
311 | 340k | src_strd, |
312 | 340k | hdst_buf_stride, |
313 | 340k | &gai1_hevc_luma_filter_taps[dx][0], |
314 | 340k | (ht + NTAPS_LUMA - 1), |
315 | 340k | wd); |
316 | | |
317 | | /* vertical filtering on scratch buffer and stored in desitnation */ |
318 | 340k | ps_func_selector->ihevc_inter_pred_luma_vert_w16inp_fptr( |
319 | 340k | pi2_vert_src, |
320 | 340k | pu1_dst, |
321 | 340k | hdst_buf_stride, |
322 | 340k | dst_strd, |
323 | 340k | &gai1_hevc_luma_filter_taps[dy][0], |
324 | 340k | ht, |
325 | 340k | wd); |
326 | 340k | } |
327 | 653k | else if(0 == dy) |
328 | 323k | { |
329 | | /*----------sub pel in x direction only ---------*/ |
330 | | |
331 | 323k | ps_func_selector->ihevc_inter_pred_luma_horz_fptr( |
332 | 323k | pu1_src, pu1_dst, src_strd, dst_strd, &gai1_hevc_luma_filter_taps[dx][0], ht, wd); |
333 | 323k | } |
334 | 329k | else /* if (0 == dx) */ |
335 | 329k | { |
336 | | /*----------sub pel in y direction only ---------*/ |
337 | | |
338 | 329k | ps_func_selector->ihevc_inter_pred_luma_vert_fptr( |
339 | 329k | pu1_src, pu1_dst, src_strd, dst_strd, &gai1_hevc_luma_filter_taps[dy][0], ht, wd); |
340 | 329k | } |
341 | 6.43M | } |
342 | | |
343 | | /** |
344 | | ******************************************************************************* |
345 | | * |
346 | | * @brief |
347 | | * Performs Luma prediction for a inter prediction unit(PU) |
348 | | * |
349 | | * @par Description: |
350 | | * For a given PU, Inter prediction followed by weighted prediction (if |
351 | | * required) |
352 | | * |
353 | | * @param[in] ps_inter_pred_ctxt |
354 | | * context for inter prediction; contains ref list, weight offsets, ctb offsets |
355 | | * |
356 | | * @param[in] ps_pu |
357 | | * pointer to PU structure whose inter prediction needs to be done |
358 | | * |
359 | | * @param[in] pu1_dst_buf |
360 | | * pointer to destination buffer where the inter prediction is done |
361 | | * |
362 | | * @param[in] dst_stride |
363 | | * pitch of the destination buffer |
364 | | * |
365 | | * @returns |
366 | | * IV_FAIL for mvs going outside ref frame padded limits |
367 | | * IV_SUCCESS after completing mc for given inter pu |
368 | | * |
369 | | * @remarks |
370 | | * |
371 | | ******************************************************************************* |
372 | | */ |
373 | | IV_API_CALL_STATUS_T ihevce_luma_inter_pred_pu( |
374 | | void *pv_inter_pred_ctxt, |
375 | | pu_t *ps_pu, |
376 | | void *pv_dst_buf, |
377 | | WORD32 dst_stride, |
378 | | WORD32 i4_flag_inter_pred_source) |
379 | 8.27M | { |
380 | 8.27M | inter_pred_ctxt_t *ps_inter_pred_ctxt = (inter_pred_ctxt_t *)pv_inter_pred_ctxt; |
381 | 8.27M | func_selector_t *ps_func_selector = ps_inter_pred_ctxt->ps_func_selector; |
382 | | |
383 | 8.27M | WORD32 inter_pred_idc = ps_pu->b2_pred_mode; |
384 | 8.27M | UWORD8 *pu1_dst_buf = (UWORD8 *)pv_dst_buf; |
385 | 8.27M | WORD32 pu_wd = (ps_pu->b4_wd + 1) << 2; |
386 | 8.27M | WORD32 pu_ht = (ps_pu->b4_ht + 1) << 2; |
387 | | |
388 | 8.27M | WORD32 wp_flag = ps_inter_pred_ctxt->i1_weighted_pred_flag || |
389 | 8.27M | ps_inter_pred_ctxt->i1_weighted_bipred_flag; |
390 | | |
391 | | /* 16bit dest required for interpolate if weighted pred is on or bipred */ |
392 | 8.27M | WORD32 store_16bit_output; |
393 | | |
394 | 8.27M | recon_pic_buf_t *ps_ref_pic_l0, *ps_ref_pic_l1; |
395 | 8.27M | UWORD8 *pu1_ref_pic, *pu1_ref_int_pel; |
396 | 8.27M | WORD32 ref_pic_stride; |
397 | | |
398 | | /* offset of reference block in integer pel units */ |
399 | 8.27M | WORD32 frm_x_ofst, frm_y_ofst; |
400 | 8.27M | WORD32 frm_x_pu, frm_y_pu; |
401 | | |
402 | | /* scratch 16 bit buffers for interpolation in l0 and l1 direction */ |
403 | 8.27M | WORD16 *pi2_scr_buf_l0 = &ps_inter_pred_ctxt->ai2_scratch_buf_l0[0]; |
404 | 8.27M | WORD16 *pi2_scr_buf_l1 = &ps_inter_pred_ctxt->ai2_scratch_buf_l1[0]; |
405 | | |
406 | | /* scratch buffer for horizontal interpolation destination */ |
407 | 8.27M | WORD16 *pi2_horz_scratch = &ps_inter_pred_ctxt->ai2_horz_scratch[0]; |
408 | | |
409 | 8.27M | WORD32 wgt0, wgt1, off0, off1, shift, lvl_shift0, lvl_shift1; |
410 | | |
411 | | /* get PU's frm x and frm y offset */ |
412 | 8.27M | frm_x_pu = ps_inter_pred_ctxt->i4_ctb_frm_pos_x + (ps_pu->b4_pos_x << 2); |
413 | 8.27M | frm_y_pu = ps_inter_pred_ctxt->i4_ctb_frm_pos_y + (ps_pu->b4_pos_y << 2); |
414 | | |
415 | | /* sanity checks */ |
416 | 8.27M | ASSERT((wp_flag == 0) || (wp_flag == 1)); |
417 | 8.27M | ASSERT(dst_stride >= pu_wd); |
418 | 8.27M | ASSERT(ps_pu->b1_intra_flag == 0); |
419 | | |
420 | 8.27M | lvl_shift0 = 0; |
421 | 8.27M | lvl_shift1 = 0; |
422 | | |
423 | 8.27M | if(wp_flag) |
424 | 0 | { |
425 | 0 | UWORD8 u1_is_wgt_pred_L0, u1_is_wgt_pred_L1; |
426 | |
|
427 | 0 | if(inter_pred_idc != PRED_L1) |
428 | 0 | { |
429 | 0 | ps_ref_pic_l0 = ps_inter_pred_ctxt->ps_ref_list[0][ps_pu->mv.i1_l0_ref_idx]; |
430 | 0 | u1_is_wgt_pred_L0 = ps_ref_pic_l0->s_weight_offset.u1_luma_weight_enable_flag; |
431 | 0 | } |
432 | 0 | if(inter_pred_idc != PRED_L0) |
433 | 0 | { |
434 | 0 | ps_ref_pic_l1 = ps_inter_pred_ctxt->ps_ref_list[1][ps_pu->mv.i1_l1_ref_idx]; |
435 | 0 | u1_is_wgt_pred_L1 = ps_ref_pic_l1->s_weight_offset.u1_luma_weight_enable_flag; |
436 | 0 | } |
437 | 0 | if(inter_pred_idc == PRED_BI) |
438 | 0 | { |
439 | 0 | wp_flag = (u1_is_wgt_pred_L0 || u1_is_wgt_pred_L1); |
440 | 0 | } |
441 | 0 | else if(inter_pred_idc == PRED_L0) |
442 | 0 | { |
443 | 0 | wp_flag = u1_is_wgt_pred_L0; |
444 | 0 | } |
445 | 0 | else if(inter_pred_idc == PRED_L1) |
446 | 0 | { |
447 | 0 | wp_flag = u1_is_wgt_pred_L1; |
448 | 0 | } |
449 | 0 | else |
450 | 0 | { |
451 | | /*other values are not allowed*/ |
452 | 0 | assert(0); |
453 | 0 | } |
454 | 0 | } |
455 | 8.27M | store_16bit_output = (inter_pred_idc == PRED_BI) || (wp_flag); |
456 | | |
457 | 8.27M | if(inter_pred_idc != PRED_L1) |
458 | 8.07M | { |
459 | | /*****************************************************/ |
460 | | /* L0 inter prediction */ |
461 | | /*****************************************************/ |
462 | | |
463 | | /* motion vecs in qpel precision */ |
464 | 8.07M | WORD32 mv_x = ps_pu->mv.s_l0_mv.i2_mvx; |
465 | 8.07M | WORD32 mv_y = ps_pu->mv.s_l0_mv.i2_mvy; |
466 | | |
467 | | /* sub pel offsets in x and y direction w.r.t integer pel */ |
468 | 8.07M | WORD32 dx = mv_x & 0x3; |
469 | 8.07M | WORD32 dy = mv_y & 0x3; |
470 | | |
471 | | /* ref idx is currently stored in the lower 4bits */ |
472 | 8.07M | WORD32 ref_idx = (ps_pu->mv.i1_l0_ref_idx); |
473 | | |
474 | | /* x and y integer offsets w.r.t frame start */ |
475 | 8.07M | frm_x_ofst = (frm_x_pu + (mv_x >> 2)); |
476 | 8.07M | frm_y_ofst = (frm_y_pu + (mv_y >> 2)); |
477 | | |
478 | 8.07M | ps_ref_pic_l0 = ps_inter_pred_ctxt->ps_ref_list[0][ref_idx]; |
479 | | |
480 | | /* picture buffer start and stride */ |
481 | 8.07M | if(i4_flag_inter_pred_source == 1) |
482 | 0 | { |
483 | 0 | pu1_ref_pic = (UWORD8 *)ps_ref_pic_l0->s_yuv_buf_desc_src.pv_y_buf; |
484 | 0 | } |
485 | 8.07M | else |
486 | 8.07M | { |
487 | 8.07M | pu1_ref_pic = (UWORD8 *)ps_ref_pic_l0->s_yuv_buf_desc.pv_y_buf; |
488 | 8.07M | } |
489 | 8.07M | ref_pic_stride = ps_ref_pic_l0->s_yuv_buf_desc.i4_y_strd; |
490 | | |
491 | | /* Error check for mvs going out of ref frame padded limits */ |
492 | 8.07M | { |
493 | 8.07M | WORD32 min_x, max_x = ps_ref_pic_l0->s_yuv_buf_desc.i4_y_wd; |
494 | 8.07M | WORD32 min_y, max_y = ps_ref_pic_l0->s_yuv_buf_desc.i4_y_ht; |
495 | | |
496 | 8.07M | min_x = |
497 | 8.07M | -(ps_inter_pred_ctxt->ai4_tile_xtra_pel[E_LEFT] |
498 | 8.07M | ? (ps_inter_pred_ctxt->ai4_tile_xtra_pel[E_LEFT] - 4) |
499 | 8.07M | : (PAD_HORZ - 4)); |
500 | | |
501 | 8.07M | max_x += ps_inter_pred_ctxt->ai4_tile_xtra_pel[E_RIGHT] |
502 | 8.07M | ? (ps_inter_pred_ctxt->ai4_tile_xtra_pel[E_RIGHT] - 4) |
503 | 8.07M | : (PAD_HORZ - 4); |
504 | | |
505 | 8.07M | min_y = |
506 | 8.07M | -(ps_inter_pred_ctxt->ai4_tile_xtra_pel[E_TOP] |
507 | 8.07M | ? (ps_inter_pred_ctxt->ai4_tile_xtra_pel[E_TOP] - 4) |
508 | 8.07M | : (PAD_VERT - 4)); |
509 | | |
510 | 8.07M | max_y += ps_inter_pred_ctxt->ai4_tile_xtra_pel[E_BOT] |
511 | 8.07M | ? (ps_inter_pred_ctxt->ai4_tile_xtra_pel[E_BOT] - 4) |
512 | 8.07M | : (PAD_VERT - 4); |
513 | | |
514 | 8.07M | if((frm_x_ofst < min_x) || (frm_x_ofst + pu_wd) > max_x) |
515 | | //ASSERT(0); |
516 | 1.65k | return (IV_FAIL); |
517 | | |
518 | 8.07M | if((frm_y_ofst < min_y) || (frm_y_ofst + pu_ht) > max_y) |
519 | | //ASSERT(0); |
520 | 556 | return (IV_FAIL); |
521 | 8.07M | } |
522 | | |
523 | | /* point to reference start location in ref frame */ |
524 | | /* Assuming clipping of mv is not required here as ME would */ |
525 | | /* take care of mv access not going beyond padded data */ |
526 | 8.07M | pu1_ref_int_pel = pu1_ref_pic + frm_x_ofst + (ref_pic_stride * frm_y_ofst); |
527 | | |
528 | | /* level shifted for subpel with both x and y componenet being non 0 */ |
529 | | /* this is because the interpolate function subtract this to contain */ |
530 | | /* the resulting data in 16 bits */ |
531 | 8.07M | lvl_shift0 = (dx != 0) && (dy != 0) ? OFFSET14 : 0; |
532 | | |
533 | 8.07M | if(store_16bit_output) |
534 | 1.83M | { |
535 | | /* do interpolation in 16bit L0 scratch buffer */ |
536 | 1.83M | ihevce_luma_interpolate_16bit_dxdy( |
537 | 1.83M | pu1_ref_int_pel, |
538 | 1.83M | pi2_scr_buf_l0, |
539 | 1.83M | ref_pic_stride, |
540 | 1.83M | pu_wd, |
541 | 1.83M | pi2_horz_scratch, |
542 | 1.83M | pu_ht, |
543 | 1.83M | pu_wd, |
544 | 1.83M | dy, |
545 | 1.83M | dx, |
546 | 1.83M | ps_func_selector); |
547 | 1.83M | } |
548 | 6.23M | else |
549 | 6.23M | { |
550 | | /* do interpolation in 8bit destination buffer and return */ |
551 | 6.23M | ihevce_luma_interpolate_8bit_dxdy( |
552 | 6.23M | pu1_ref_int_pel, |
553 | 6.23M | pu1_dst_buf, |
554 | 6.23M | ref_pic_stride, |
555 | 6.23M | dst_stride, |
556 | 6.23M | pi2_horz_scratch, |
557 | 6.23M | pu_ht, |
558 | 6.23M | pu_wd, |
559 | 6.23M | dy, |
560 | 6.23M | dx, |
561 | 6.23M | ps_func_selector); |
562 | | |
563 | 6.23M | return (IV_SUCCESS); |
564 | 6.23M | } |
565 | 8.07M | } |
566 | | |
567 | 2.04M | if(inter_pred_idc != PRED_L0) |
568 | 2.04M | { |
569 | | /*****************************************************/ |
570 | | /* L1 inter prediction */ |
571 | | /*****************************************************/ |
572 | | |
573 | | /* motion vecs in qpel precision */ |
574 | 2.04M | WORD32 mv_x = ps_pu->mv.s_l1_mv.i2_mvx; |
575 | 2.04M | WORD32 mv_y = ps_pu->mv.s_l1_mv.i2_mvy; |
576 | | |
577 | | /* sub pel offsets in x and y direction w.r.t integer pel */ |
578 | 2.04M | WORD32 dx = mv_x & 0x3; |
579 | 2.04M | WORD32 dy = mv_y & 0x3; |
580 | | |
581 | | /* ref idx is currently stored in the lower 4bits */ |
582 | 2.04M | WORD32 ref_idx = (ps_pu->mv.i1_l1_ref_idx); |
583 | | |
584 | | /* x and y integer offsets w.r.t frame start */ |
585 | 2.04M | frm_x_ofst = (frm_x_pu + (mv_x >> 2)); |
586 | 2.04M | frm_y_ofst = (frm_y_pu + (mv_y >> 2)); |
587 | | |
588 | 2.04M | ps_ref_pic_l1 = ps_inter_pred_ctxt->ps_ref_list[1][ref_idx]; |
589 | | |
590 | | /* picture buffer start and stride */ |
591 | | |
592 | 2.04M | if(i4_flag_inter_pred_source == 1) |
593 | 0 | { |
594 | 0 | pu1_ref_pic = (UWORD8 *)ps_ref_pic_l1->s_yuv_buf_desc_src.pv_y_buf; |
595 | 0 | } |
596 | 2.04M | else |
597 | 2.04M | { |
598 | 2.04M | pu1_ref_pic = (UWORD8 *)ps_ref_pic_l1->s_yuv_buf_desc.pv_y_buf; |
599 | 2.04M | } |
600 | 2.04M | ref_pic_stride = ps_ref_pic_l1->s_yuv_buf_desc.i4_y_strd; |
601 | | |
602 | | /* Error check for mvs going out of ref frame padded limits */ |
603 | 2.04M | { |
604 | 2.04M | WORD32 min_x, max_x = ps_ref_pic_l1->s_yuv_buf_desc.i4_y_wd; |
605 | 2.04M | WORD32 min_y, max_y = ps_ref_pic_l1->s_yuv_buf_desc.i4_y_ht; |
606 | | |
607 | 2.04M | min_x = |
608 | 2.04M | -(ps_inter_pred_ctxt->ai4_tile_xtra_pel[E_LEFT] |
609 | 2.04M | ? (ps_inter_pred_ctxt->ai4_tile_xtra_pel[E_LEFT] - 4) |
610 | 2.04M | : (PAD_HORZ - 4)); |
611 | | |
612 | 2.04M | max_x += ps_inter_pred_ctxt->ai4_tile_xtra_pel[E_RIGHT] |
613 | 2.04M | ? (ps_inter_pred_ctxt->ai4_tile_xtra_pel[E_RIGHT] - 4) |
614 | 2.04M | : (PAD_HORZ - 4); |
615 | | |
616 | 2.04M | min_y = |
617 | 2.04M | -(ps_inter_pred_ctxt->ai4_tile_xtra_pel[E_TOP] |
618 | 2.04M | ? (ps_inter_pred_ctxt->ai4_tile_xtra_pel[E_TOP] - 4) |
619 | 2.04M | : (PAD_VERT - 4)); |
620 | | |
621 | 2.04M | max_y += ps_inter_pred_ctxt->ai4_tile_xtra_pel[E_BOT] |
622 | 2.04M | ? (ps_inter_pred_ctxt->ai4_tile_xtra_pel[E_BOT] - 4) |
623 | 2.04M | : (PAD_VERT - 4); |
624 | | |
625 | 2.04M | if((frm_x_ofst < min_x) || (frm_x_ofst + pu_wd) > max_x) |
626 | | //ASSERT(0); |
627 | 363 | return (IV_FAIL); |
628 | | |
629 | 2.03M | if((frm_y_ofst < min_y) || (frm_y_ofst + pu_ht) > max_y) |
630 | | //ASSERT(0); |
631 | 176 | return (IV_FAIL); |
632 | 2.03M | } |
633 | | |
634 | | /* point to reference start location in ref frame */ |
635 | | /* Assuming clipping of mv is not required here as ME would */ |
636 | | /* take care of mv access not going beyond padded data */ |
637 | 2.03M | pu1_ref_int_pel = pu1_ref_pic + frm_x_ofst + (ref_pic_stride * frm_y_ofst); |
638 | | |
639 | | /* level shifted for subpel with both x and y componenet being non 0 */ |
640 | | /* this is because the interpolate function subtract this to contain */ |
641 | | /* the resulting data in 16 bits */ |
642 | 2.03M | lvl_shift1 = (dx != 0) && (dy != 0) ? OFFSET14 : 0; |
643 | | |
644 | 2.03M | if(store_16bit_output) |
645 | 1.83M | { |
646 | | /* do interpolation in 16bit L1 scratch buffer */ |
647 | 1.83M | ihevce_luma_interpolate_16bit_dxdy( |
648 | 1.83M | pu1_ref_int_pel, |
649 | 1.83M | pi2_scr_buf_l1, |
650 | 1.83M | ref_pic_stride, |
651 | 1.83M | pu_wd, |
652 | 1.83M | pi2_horz_scratch, |
653 | 1.83M | pu_ht, |
654 | 1.83M | pu_wd, |
655 | 1.83M | dy, |
656 | 1.83M | dx, |
657 | 1.83M | ps_func_selector); |
658 | 1.83M | } |
659 | 200k | else |
660 | 200k | { |
661 | | /* do interpolation in 8bit destination buffer and return */ |
662 | 200k | ihevce_luma_interpolate_8bit_dxdy( |
663 | 200k | pu1_ref_int_pel, |
664 | 200k | pu1_dst_buf, |
665 | 200k | ref_pic_stride, |
666 | 200k | dst_stride, |
667 | 200k | pi2_horz_scratch, |
668 | 200k | pu_ht, |
669 | 200k | pu_wd, |
670 | 200k | dy, |
671 | 200k | dx, |
672 | 200k | ps_func_selector); |
673 | | |
674 | 200k | return (IV_SUCCESS); |
675 | 200k | } |
676 | 2.03M | } |
677 | | |
678 | 1.83M | if((inter_pred_idc != PRED_BI) && wp_flag) |
679 | 0 | { |
680 | | /*****************************************************/ |
681 | | /* unidirection weighted prediction */ |
682 | | /*****************************************************/ |
683 | 0 | ihevce_wght_offst_t *ps_weight_offset; |
684 | 0 | WORD16 *pi2_src; |
685 | 0 | WORD32 lvl_shift; |
686 | | |
687 | | /* intialize the weight, offsets and ref based on l0/l1 mode */ |
688 | 0 | if(inter_pred_idc == PRED_L0) |
689 | 0 | { |
690 | 0 | pi2_src = pi2_scr_buf_l0; |
691 | 0 | ps_weight_offset = &ps_ref_pic_l0->s_weight_offset; |
692 | 0 | lvl_shift = lvl_shift0; |
693 | 0 | } |
694 | 0 | else |
695 | 0 | { |
696 | 0 | pi2_src = pi2_scr_buf_l1; |
697 | 0 | ps_weight_offset = &ps_ref_pic_l1->s_weight_offset; |
698 | 0 | lvl_shift = lvl_shift1; |
699 | 0 | } |
700 | |
|
701 | 0 | wgt0 = ps_weight_offset->i2_luma_weight; |
702 | 0 | off0 = ps_weight_offset->i2_luma_offset; |
703 | 0 | shift = ps_inter_pred_ctxt->i4_log2_luma_wght_denom + SHIFT_14_MINUS_BIT_DEPTH; |
704 | | |
705 | | /* do the uni directional weighted prediction */ |
706 | 0 | ps_func_selector->ihevc_weighted_pred_uni_fptr( |
707 | 0 | pi2_src, pu1_dst_buf, pu_wd, dst_stride, wgt0, off0, shift, lvl_shift, pu_ht, pu_wd); |
708 | 0 | } |
709 | 1.83M | else |
710 | 1.83M | { |
711 | | /*****************************************************/ |
712 | | /* Bipred prediction */ |
713 | | /*****************************************************/ |
714 | | |
715 | 1.83M | if(wp_flag) |
716 | 0 | { |
717 | | /*****************************************************/ |
718 | | /* Bi pred weighted prediction */ |
719 | | /*****************************************************/ |
720 | 0 | wgt0 = ps_ref_pic_l0->s_weight_offset.i2_luma_weight; |
721 | 0 | off0 = ps_ref_pic_l0->s_weight_offset.i2_luma_offset; |
722 | |
|
723 | 0 | wgt1 = ps_ref_pic_l1->s_weight_offset.i2_luma_weight; |
724 | 0 | off1 = ps_ref_pic_l1->s_weight_offset.i2_luma_offset; |
725 | |
|
726 | 0 | shift = ps_inter_pred_ctxt->i4_log2_luma_wght_denom + SHIFT_14_MINUS_BIT_DEPTH + 1; |
727 | |
|
728 | 0 | ps_func_selector->ihevc_weighted_pred_bi_fptr( |
729 | 0 | pi2_scr_buf_l0, |
730 | 0 | pi2_scr_buf_l1, |
731 | 0 | pu1_dst_buf, |
732 | 0 | pu_wd, |
733 | 0 | pu_wd, |
734 | 0 | dst_stride, |
735 | 0 | wgt0, |
736 | 0 | off0, |
737 | 0 | wgt1, |
738 | 0 | off1, |
739 | 0 | shift, |
740 | 0 | lvl_shift0, |
741 | 0 | lvl_shift1, |
742 | 0 | pu_ht, |
743 | 0 | pu_wd); |
744 | 0 | } |
745 | 1.83M | else |
746 | 1.83M | { |
747 | | /*****************************************************/ |
748 | | /* Default Bi pred prediction */ |
749 | | /*****************************************************/ |
750 | 1.83M | ps_func_selector->ihevc_weighted_pred_bi_default_fptr( |
751 | 1.83M | pi2_scr_buf_l0, |
752 | 1.83M | pi2_scr_buf_l1, |
753 | 1.83M | pu1_dst_buf, |
754 | 1.83M | pu_wd, |
755 | 1.83M | pu_wd, |
756 | 1.83M | dst_stride, |
757 | 1.83M | lvl_shift0, |
758 | 1.83M | lvl_shift1, |
759 | 1.83M | pu_ht, |
760 | 1.83M | pu_wd); |
761 | 1.83M | } |
762 | 1.83M | } |
763 | | |
764 | 1.83M | return (IV_SUCCESS); |
765 | 2.04M | } |
766 | | |
767 | | /** |
768 | | ******************************************************************************* |
769 | | * |
770 | | * @brief |
771 | | * Performs Chroma inter pred based on sub pel position dxdy and store the |
772 | | * result in a 16 bit destination buffer |
773 | | * |
774 | | * @param[in] pu1_src |
775 | | * pointer to the source correspoding to integer pel position of a mv (left and |
776 | | * top justified integer position) |
777 | | * |
778 | | * @param[out] pi2_dst |
779 | | * WORD16 pointer to the destination |
780 | | * |
781 | | * @param[in] src_strd |
782 | | * source buffer stride |
783 | | * |
784 | | * @param[in] dst_strd |
785 | | * destination buffer stride |
786 | | * |
787 | | * @param[in] pi2_hdst_scratch |
788 | | * scratch buffer for intermediate storage of horizontal filter output; used as |
789 | | * input for vertical filtering when sub pel components (dx != 0) && (dy != 0) |
790 | | * |
791 | | * Max scratch buffer required is w * (h + 3) * sizeof(WORD16) |
792 | | * |
793 | | * @param[in] ht |
794 | | * width of the prediction unit |
795 | | * |
796 | | * @param[in] wd |
797 | | * width of the prediction unit |
798 | | * |
799 | | * @param[in] dx |
800 | | * 1/8th pel position[0:7] of mv in x direction |
801 | | * |
802 | | * @param[in] dy |
803 | | * 1/8th pel position[0:7] of mv in y direction |
804 | | * |
805 | | * @returns |
806 | | * none |
807 | | * |
808 | | * @remarks |
809 | | * |
810 | | ******************************************************************************* |
811 | | */ |
812 | | void ihevce_chroma_interpolate_16bit_dxdy( |
813 | | UWORD8 *pu1_src, |
814 | | WORD16 *pi2_dst, |
815 | | WORD32 src_strd, |
816 | | WORD32 dst_strd, |
817 | | WORD16 *pi2_hdst_scratch, |
818 | | WORD32 ht, |
819 | | WORD32 wd, |
820 | | WORD32 dy, |
821 | | WORD32 dx, |
822 | | func_selector_t *ps_func_selector) |
823 | 869k | { |
824 | 869k | if((0 == dx) && (0 == dy)) |
825 | 571k | { |
826 | | /*--------- full pel position : copy input by upscaling-------*/ |
827 | | |
828 | 571k | ps_func_selector->ihevc_inter_pred_chroma_copy_w16out_fptr( |
829 | 571k | pu1_src, pi2_dst, src_strd, dst_strd, &gai1_hevc_chroma_filter_taps[0][0], ht, wd); |
830 | 571k | } |
831 | 298k | else if((0 != dx) && (0 != dy)) |
832 | 125k | { |
833 | | /*----------sub pel in both x and y direction---------*/ |
834 | | |
835 | 125k | UWORD8 *pu1_horz_src = pu1_src - src_strd; |
836 | 125k | WORD32 hdst_buf_stride = (wd << 1); /* uv interleave */ |
837 | 125k | WORD16 *pi2_vert_src = pi2_hdst_scratch + hdst_buf_stride; |
838 | | |
839 | | /* horizontal filtering of source done in a scratch buffer first */ |
840 | 125k | ps_func_selector->ihevc_inter_pred_chroma_horz_w16out_fptr( |
841 | 125k | pu1_horz_src, |
842 | 125k | pi2_hdst_scratch, |
843 | 125k | src_strd, |
844 | 125k | hdst_buf_stride, |
845 | 125k | &gai1_hevc_chroma_filter_taps[dx][0], |
846 | 125k | (ht + NTAPS_CHROMA - 1), |
847 | 125k | wd); |
848 | | |
849 | | /* vertical filtering on scratch buffer and stored in desitnation */ |
850 | 125k | ps_func_selector->ihevc_inter_pred_chroma_vert_w16inp_w16out_fptr( |
851 | 125k | pi2_vert_src, |
852 | 125k | pi2_dst, |
853 | 125k | hdst_buf_stride, |
854 | 125k | dst_strd, |
855 | 125k | &gai1_hevc_chroma_filter_taps[dy][0], |
856 | 125k | ht, |
857 | 125k | wd); |
858 | 125k | } |
859 | 172k | else if(0 == dy) |
860 | 74.9k | { |
861 | | /*----------sub pel in x direction only ---------*/ |
862 | | |
863 | 74.9k | ps_func_selector->ihevc_inter_pred_chroma_horz_w16out_fptr( |
864 | 74.9k | pu1_src, pi2_dst, src_strd, dst_strd, &gai1_hevc_chroma_filter_taps[dx][0], ht, wd); |
865 | 74.9k | } |
866 | 97.9k | else /* if (0 == dx) */ |
867 | 97.9k | { |
868 | | /*----------sub pel in y direction only ---------*/ |
869 | | |
870 | 97.9k | ps_func_selector->ihevc_inter_pred_chroma_vert_w16out_fptr( |
871 | 97.9k | pu1_src, pi2_dst, src_strd, dst_strd, &gai1_hevc_chroma_filter_taps[dy][0], ht, wd); |
872 | 97.9k | } |
873 | 869k | } |
874 | | |
875 | | /** |
876 | | ******************************************************************************* |
877 | | * |
878 | | * @brief |
879 | | * Performs Chroma inter pred based on sub pel position dxdy and store the |
880 | | * result in a 8 bit destination buffer |
881 | | * |
882 | | * @param[in] pu1_src |
883 | | * pointer to the source correspoding to integer pel position of a mv (left and |
884 | | * top justified integer position) |
885 | | * |
886 | | * @param[out] pu1_dst |
887 | | * UWORD8 pointer to the destination |
888 | | * |
889 | | * @param[in] src_strd |
890 | | * source buffer stride |
891 | | * |
892 | | * @param[in] dst_strd |
893 | | * destination buffer stride |
894 | | * |
895 | | * @param[in] pi2_hdst_scratch |
896 | | * scratch buffer for intermediate storage of horizontal filter output; used as |
897 | | * input for vertical filtering when sub pel components (dx != 0) && (dy != 0) |
898 | | * |
899 | | * Max scratch buffer required is w * (h + 3) * sizeof(WORD16) |
900 | | * |
901 | | * @param[in] ht |
902 | | * width of the prediction unit |
903 | | * |
904 | | * @param[in] wd |
905 | | * width of the prediction unit |
906 | | * |
907 | | * @param[in] dx |
908 | | * 1/8th pel position[0:7] of mv in x direction |
909 | | * |
910 | | * @param[in] dy |
911 | | * 1/8th pel position[0:7] of mv in y direction |
912 | | * |
913 | | * @returns |
914 | | * none |
915 | | * |
916 | | * @remarks |
917 | | * |
918 | | ******************************************************************************* |
919 | | */ |
920 | | void ihevce_chroma_interpolate_8bit_dxdy( |
921 | | UWORD8 *pu1_src, |
922 | | UWORD8 *pu1_dst, |
923 | | WORD32 src_strd, |
924 | | WORD32 dst_strd, |
925 | | WORD16 *pi2_hdst_scratch, |
926 | | WORD32 ht, |
927 | | WORD32 wd, |
928 | | WORD32 dy, |
929 | | WORD32 dx, |
930 | | func_selector_t *ps_func_selector) |
931 | 3.41M | { |
932 | 3.41M | if((0 == dx) && (0 == dy)) |
933 | 2.35M | { |
934 | | /*--------- full pel position : copy input as is -------*/ |
935 | 2.35M | ps_func_selector->ihevc_inter_pred_chroma_copy_fptr( |
936 | 2.35M | pu1_src, pu1_dst, src_strd, dst_strd, &gai1_hevc_chroma_filter_taps[0][0], ht, wd); |
937 | 2.35M | } |
938 | 1.05M | else if((0 != dx) && (0 != dy)) |
939 | 468k | { |
940 | | /*----------sub pel in both x and y direction---------*/ |
941 | 468k | UWORD8 *pu1_horz_src = pu1_src - src_strd; |
942 | 468k | WORD32 hdst_buf_stride = (wd << 1); /* uv interleave */ |
943 | 468k | WORD16 *pi2_vert_src = pi2_hdst_scratch + hdst_buf_stride; |
944 | | |
945 | | /* horizontal filtering of source done in a scratch buffer first */ |
946 | 468k | ps_func_selector->ihevc_inter_pred_chroma_horz_w16out_fptr( |
947 | 468k | pu1_horz_src, |
948 | 468k | pi2_hdst_scratch, |
949 | 468k | src_strd, |
950 | 468k | hdst_buf_stride, |
951 | 468k | &gai1_hevc_chroma_filter_taps[dx][0], |
952 | 468k | (ht + NTAPS_CHROMA - 1), |
953 | 468k | wd); |
954 | | |
955 | | /* vertical filtering on scratch buffer and stored in desitnation */ |
956 | 468k | ps_func_selector->ihevc_inter_pred_chroma_vert_w16inp_fptr( |
957 | 468k | pi2_vert_src, |
958 | 468k | pu1_dst, |
959 | 468k | hdst_buf_stride, |
960 | 468k | dst_strd, |
961 | 468k | &gai1_hevc_chroma_filter_taps[dy][0], |
962 | 468k | ht, |
963 | 468k | wd); |
964 | 468k | } |
965 | 586k | else if(0 == dy) |
966 | 289k | { |
967 | | /*----------sub pel in x direction only ---------*/ |
968 | 289k | ps_func_selector->ihevc_inter_pred_chroma_horz_fptr( |
969 | 289k | pu1_src, pu1_dst, src_strd, dst_strd, &gai1_hevc_chroma_filter_taps[dx][0], ht, wd); |
970 | 289k | } |
971 | 296k | else /* if (0 == dx) */ |
972 | 296k | { |
973 | | /*----------sub pel in y direction only ---------*/ |
974 | 296k | ps_func_selector->ihevc_inter_pred_chroma_vert_fptr( |
975 | 296k | pu1_src, pu1_dst, src_strd, dst_strd, &gai1_hevc_chroma_filter_taps[dy][0], ht, wd); |
976 | 296k | } |
977 | 3.41M | } |
978 | | |
979 | | /** |
980 | | ******************************************************************************* |
981 | | * |
982 | | * @brief |
983 | | * Performs Chroma prediction for a inter prediction unit(PU) |
984 | | * |
985 | | * @par Description: |
986 | | * For a given PU, Inter prediction followed by weighted prediction (if |
987 | | * required). The reference and destination buffers are uv interleaved |
988 | | * |
989 | | * @param[in] ps_inter_pred_ctxt |
990 | | * context for inter prediction; contains ref list, weight offsets, ctb offsets |
991 | | * |
992 | | * @param[in] ps_pu |
993 | | * pointer to PU structure whose inter prediction needs to be done |
994 | | * |
995 | | * @param[in] pu1_dst_buf |
996 | | * pointer to destination buffer where the inter prediction is done |
997 | | * |
998 | | * @param[in] dst_stride |
999 | | * pitch of the destination buffer |
1000 | | * |
1001 | | * @returns |
1002 | | * none |
1003 | | * |
1004 | | * @remarks |
1005 | | * |
1006 | | ******************************************************************************* |
1007 | | */ |
1008 | | void ihevce_chroma_inter_pred_pu( |
1009 | | void *pv_inter_pred_ctxt, pu_t *ps_pu, UWORD8 *pu1_dst_buf, WORD32 dst_stride) |
1010 | 3.84M | { |
1011 | 3.84M | inter_pred_ctxt_t *ps_inter_pred_ctxt = (inter_pred_ctxt_t *)pv_inter_pred_ctxt; |
1012 | 3.84M | func_selector_t *ps_func_selector = ps_inter_pred_ctxt->ps_func_selector; |
1013 | | |
1014 | 3.84M | WORD32 inter_pred_idc = ps_pu->b2_pred_mode; |
1015 | 3.84M | UWORD8 u1_is_422 = (ps_inter_pred_ctxt->u1_chroma_array_type == 2); |
1016 | | /* chroma width and height are half of luma width and height */ |
1017 | 3.84M | WORD32 pu_wd_chroma = (ps_pu->b4_wd + 1) << 1; |
1018 | 3.84M | WORD32 pu_ht_chroma = (ps_pu->b4_ht + 1) << (u1_is_422 + 1); |
1019 | | |
1020 | 3.84M | WORD32 wp_flag = ps_inter_pred_ctxt->i1_weighted_pred_flag || |
1021 | 3.84M | ps_inter_pred_ctxt->i1_weighted_bipred_flag; |
1022 | | |
1023 | | /* 16bit dest required for interpolate if weighted pred is on or bipred */ |
1024 | 3.84M | WORD32 store_16bit_output; |
1025 | | |
1026 | 3.84M | recon_pic_buf_t *ps_ref_pic_l0, *ps_ref_pic_l1; |
1027 | 3.84M | UWORD8 *pu1_ref_pic, *pu1_ref_int_pel; |
1028 | 3.84M | WORD32 ref_pic_stride; |
1029 | | |
1030 | | /* offset of reference block in integer pel units */ |
1031 | 3.84M | WORD32 frm_x_ofst, frm_y_ofst; |
1032 | 3.84M | WORD32 frm_x_pu, frm_y_pu; |
1033 | | |
1034 | | /* scratch 16 bit buffers for interpolation in l0 and l1 direction */ |
1035 | 3.84M | WORD16 *pi2_scr_buf_l0 = &ps_inter_pred_ctxt->ai2_scratch_buf_l0[0]; |
1036 | 3.84M | WORD16 *pi2_scr_buf_l1 = &ps_inter_pred_ctxt->ai2_scratch_buf_l1[0]; |
1037 | | |
1038 | | /* scratch buffer for horizontal interpolation destination */ |
1039 | 3.84M | WORD16 *pi2_horz_scratch = &ps_inter_pred_ctxt->ai2_horz_scratch[0]; |
1040 | | |
1041 | | /* get PU's frm x and frm y offset : Note uv is interleaved */ |
1042 | 3.84M | frm_x_pu = ps_inter_pred_ctxt->i4_ctb_frm_pos_x + (ps_pu->b4_pos_x << 2); |
1043 | 3.84M | frm_y_pu = (ps_inter_pred_ctxt->i4_ctb_frm_pos_y >> (u1_is_422 == 0)) + |
1044 | 3.84M | (ps_pu->b4_pos_y << (u1_is_422 + 1)); |
1045 | | |
1046 | | /* sanity checks */ |
1047 | 3.84M | ASSERT((wp_flag == 0) || (wp_flag == 1)); |
1048 | 3.84M | ASSERT(dst_stride >= (pu_wd_chroma << 1)); /* uv interleaved */ |
1049 | 3.84M | ASSERT(ps_pu->b1_intra_flag == 0); |
1050 | | |
1051 | 3.84M | if(wp_flag) |
1052 | 0 | { |
1053 | 0 | UWORD8 u1_is_wgt_pred_L0, u1_is_wgt_pred_L1; |
1054 | |
|
1055 | 0 | if(inter_pred_idc != PRED_L1) |
1056 | 0 | { |
1057 | 0 | ps_ref_pic_l0 = ps_inter_pred_ctxt->ps_ref_list[0][ps_pu->mv.i1_l0_ref_idx]; |
1058 | 0 | u1_is_wgt_pred_L0 = ps_ref_pic_l0->s_weight_offset.u1_chroma_weight_enable_flag; |
1059 | 0 | } |
1060 | 0 | if(inter_pred_idc != PRED_L0) |
1061 | 0 | { |
1062 | 0 | ps_ref_pic_l1 = ps_inter_pred_ctxt->ps_ref_list[1][ps_pu->mv.i1_l1_ref_idx]; |
1063 | 0 | u1_is_wgt_pred_L1 = ps_ref_pic_l1->s_weight_offset.u1_chroma_weight_enable_flag; |
1064 | 0 | } |
1065 | 0 | if(inter_pred_idc == PRED_BI) |
1066 | 0 | { |
1067 | 0 | wp_flag = (u1_is_wgt_pred_L0 || u1_is_wgt_pred_L1); |
1068 | 0 | } |
1069 | 0 | else if(inter_pred_idc == PRED_L0) |
1070 | 0 | { |
1071 | 0 | wp_flag = u1_is_wgt_pred_L0; |
1072 | 0 | } |
1073 | 0 | else if(inter_pred_idc == PRED_L1) |
1074 | 0 | { |
1075 | 0 | wp_flag = u1_is_wgt_pred_L1; |
1076 | 0 | } |
1077 | 0 | else |
1078 | 0 | { |
1079 | | /*other values are not allowed*/ |
1080 | 0 | assert(0); |
1081 | 0 | } |
1082 | 0 | } |
1083 | 3.84M | store_16bit_output = (inter_pred_idc == PRED_BI) || (wp_flag); |
1084 | | |
1085 | 3.84M | if(inter_pred_idc != PRED_L1) |
1086 | 3.62M | { |
1087 | | /*****************************************************/ |
1088 | | /* L0 inter prediction(Chroma ) */ |
1089 | | /*****************************************************/ |
1090 | | |
1091 | | /* motion vecs in qpel precision */ |
1092 | 3.62M | WORD32 mv_x = ps_pu->mv.s_l0_mv.i2_mvx; |
1093 | 3.62M | WORD32 mv_y = ps_pu->mv.s_l0_mv.i2_mvy; |
1094 | | |
1095 | | /* sub pel offsets in x and y direction w.r.t integer pel */ |
1096 | 3.62M | WORD32 dx = mv_x & 0x7; |
1097 | 3.62M | WORD32 dy = (mv_y & ((1 << (!u1_is_422 + 2)) - 1)) << u1_is_422; |
1098 | | |
1099 | | /* ref idx is currently stored in the lower 4bits */ |
1100 | 3.62M | WORD32 ref_idx = (ps_pu->mv.i1_l0_ref_idx); |
1101 | | |
1102 | | /* x and y integer offsets w.r.t frame start */ |
1103 | | |
1104 | 3.62M | frm_x_ofst = (frm_x_pu + ((mv_x >> 3) << 1)); /* uv interleaved */ |
1105 | 3.62M | frm_y_ofst = (frm_y_pu + ((mv_y >> (3 - u1_is_422)))); |
1106 | | |
1107 | 3.62M | ps_ref_pic_l0 = ps_inter_pred_ctxt->ps_ref_list[0][ref_idx]; |
1108 | | |
1109 | | /* picture buffer start and stride */ |
1110 | 3.62M | pu1_ref_pic = (UWORD8 *)ps_ref_pic_l0->s_yuv_buf_desc.pv_u_buf; |
1111 | 3.62M | ref_pic_stride = ps_ref_pic_l0->s_yuv_buf_desc.i4_uv_strd; |
1112 | | |
1113 | | /* point to reference start location in ref frame */ |
1114 | | /* Assuming clipping of mv is not required here as ME would */ |
1115 | | /* take care of mv access not going beyond padded data */ |
1116 | 3.62M | pu1_ref_int_pel = pu1_ref_pic + frm_x_ofst + (ref_pic_stride * frm_y_ofst); |
1117 | | |
1118 | 3.62M | if(store_16bit_output) |
1119 | 434k | { |
1120 | | /* do interpolation in 16bit L0 scratch buffer */ |
1121 | 434k | ihevce_chroma_interpolate_16bit_dxdy( |
1122 | 434k | pu1_ref_int_pel, |
1123 | 434k | pi2_scr_buf_l0, |
1124 | 434k | ref_pic_stride, |
1125 | 434k | (pu_wd_chroma << 1), |
1126 | 434k | pi2_horz_scratch, |
1127 | 434k | pu_ht_chroma, |
1128 | 434k | pu_wd_chroma, |
1129 | 434k | dy, |
1130 | 434k | dx, |
1131 | 434k | ps_func_selector); |
1132 | 434k | } |
1133 | 3.18M | else |
1134 | 3.18M | { |
1135 | | /* do interpolation in 8bit destination buffer and return */ |
1136 | 3.18M | ihevce_chroma_interpolate_8bit_dxdy( |
1137 | 3.18M | pu1_ref_int_pel, |
1138 | 3.18M | pu1_dst_buf, |
1139 | 3.18M | ref_pic_stride, |
1140 | 3.18M | dst_stride, |
1141 | 3.18M | pi2_horz_scratch, |
1142 | 3.18M | pu_ht_chroma, |
1143 | 3.18M | pu_wd_chroma, |
1144 | 3.18M | dy, |
1145 | 3.18M | dx, |
1146 | 3.18M | ps_func_selector); |
1147 | | |
1148 | 3.18M | return; |
1149 | 3.18M | } |
1150 | 3.62M | } |
1151 | | |
1152 | 661k | if(inter_pred_idc != PRED_L0) |
1153 | 661k | { |
1154 | | /*****************************************************/ |
1155 | | /* L1 inter prediction(Chroma) */ |
1156 | | /*****************************************************/ |
1157 | | |
1158 | | /* motion vecs in qpel precision */ |
1159 | 661k | WORD32 mv_x = ps_pu->mv.s_l1_mv.i2_mvx; |
1160 | 661k | WORD32 mv_y = ps_pu->mv.s_l1_mv.i2_mvy; |
1161 | | |
1162 | | /* sub pel offsets in x and y direction w.r.t integer pel */ |
1163 | 661k | WORD32 dx = mv_x & 0x7; |
1164 | 661k | WORD32 dy = (mv_y & ((1 << (!u1_is_422 + 2)) - 1)) << u1_is_422; |
1165 | | |
1166 | | /* ref idx is currently stored in the lower 4bits */ |
1167 | 661k | WORD32 ref_idx = (ps_pu->mv.i1_l1_ref_idx); |
1168 | | |
1169 | | /* x and y integer offsets w.r.t frame start */ |
1170 | 661k | frm_x_ofst = (frm_x_pu + ((mv_x >> 3) << 1)); /* uv interleaved */ |
1171 | 661k | frm_y_ofst = (frm_y_pu + ((mv_y >> (3 - u1_is_422)))); |
1172 | | |
1173 | 661k | ps_ref_pic_l1 = ps_inter_pred_ctxt->ps_ref_list[1][ref_idx]; |
1174 | | |
1175 | | /* picture buffer start and stride */ |
1176 | 661k | pu1_ref_pic = (UWORD8 *)ps_ref_pic_l1->s_yuv_buf_desc.pv_u_buf; |
1177 | 661k | ref_pic_stride = ps_ref_pic_l1->s_yuv_buf_desc.i4_uv_strd; |
1178 | | |
1179 | | /* point to reference start location in ref frame */ |
1180 | | /* Assuming clipping of mv is not required here as ME would */ |
1181 | | /* take care of mv access not going beyond padded data */ |
1182 | 661k | pu1_ref_int_pel = pu1_ref_pic + frm_x_ofst + (ref_pic_stride * frm_y_ofst); |
1183 | | |
1184 | 661k | if(store_16bit_output) |
1185 | 434k | { |
1186 | | /* do interpolation in 16bit L1 scratch buffer */ |
1187 | 434k | ihevce_chroma_interpolate_16bit_dxdy( |
1188 | 434k | pu1_ref_int_pel, |
1189 | 434k | pi2_scr_buf_l1, |
1190 | 434k | ref_pic_stride, |
1191 | 434k | (pu_wd_chroma << 1), |
1192 | 434k | pi2_horz_scratch, |
1193 | 434k | pu_ht_chroma, |
1194 | 434k | pu_wd_chroma, |
1195 | 434k | dy, |
1196 | 434k | dx, |
1197 | 434k | ps_func_selector); |
1198 | 434k | } |
1199 | 226k | else |
1200 | 226k | { |
1201 | | /* do interpolation in 8bit destination buffer and return */ |
1202 | 226k | ihevce_chroma_interpolate_8bit_dxdy( |
1203 | 226k | pu1_ref_int_pel, |
1204 | 226k | pu1_dst_buf, |
1205 | 226k | ref_pic_stride, |
1206 | 226k | dst_stride, |
1207 | 226k | pi2_horz_scratch, |
1208 | 226k | pu_ht_chroma, |
1209 | 226k | pu_wd_chroma, |
1210 | 226k | dy, |
1211 | 226k | dx, |
1212 | 226k | ps_func_selector); |
1213 | | |
1214 | 226k | return; |
1215 | 226k | } |
1216 | 661k | } |
1217 | | |
1218 | 434k | if((inter_pred_idc != PRED_BI) && wp_flag) |
1219 | 0 | { |
1220 | | /*****************************************************/ |
1221 | | /* unidirection weighted prediction(Chroma) */ |
1222 | | /*****************************************************/ |
1223 | 0 | ihevce_wght_offst_t *ps_weight_offset; |
1224 | 0 | WORD16 *pi2_src; |
1225 | 0 | WORD32 lvl_shift = 0; |
1226 | 0 | WORD32 wgt_cb, wgt_cr, off_cb, off_cr; |
1227 | 0 | WORD32 shift; |
1228 | | |
1229 | | /* intialize the weight, offsets and ref based on l0/l1 mode */ |
1230 | 0 | if(inter_pred_idc == PRED_L0) |
1231 | 0 | { |
1232 | 0 | pi2_src = pi2_scr_buf_l0; |
1233 | 0 | ps_weight_offset = &ps_ref_pic_l0->s_weight_offset; |
1234 | 0 | } |
1235 | 0 | else |
1236 | 0 | { |
1237 | 0 | pi2_src = pi2_scr_buf_l1; |
1238 | 0 | ps_weight_offset = &ps_ref_pic_l1->s_weight_offset; |
1239 | 0 | } |
1240 | |
|
1241 | 0 | wgt_cb = ps_weight_offset->i2_cb_weight; |
1242 | 0 | off_cb = ps_weight_offset->i2_cb_offset; |
1243 | 0 | wgt_cr = ps_weight_offset->i2_cr_weight; |
1244 | 0 | off_cr = ps_weight_offset->i2_cr_offset; |
1245 | |
|
1246 | 0 | shift = ps_inter_pred_ctxt->i4_log2_chroma_wght_denom + SHIFT_14_MINUS_BIT_DEPTH; |
1247 | | |
1248 | | /* do the uni directional weighted prediction */ |
1249 | 0 | ps_func_selector->ihevc_weighted_pred_chroma_uni_fptr( |
1250 | 0 | pi2_src, |
1251 | 0 | pu1_dst_buf, |
1252 | 0 | (pu_wd_chroma << 1), |
1253 | 0 | dst_stride, |
1254 | 0 | wgt_cb, |
1255 | 0 | wgt_cr, |
1256 | 0 | off_cb, |
1257 | 0 | off_cr, |
1258 | 0 | shift, |
1259 | 0 | lvl_shift, |
1260 | 0 | pu_ht_chroma, |
1261 | 0 | pu_wd_chroma); |
1262 | 0 | } |
1263 | 434k | else |
1264 | 434k | { |
1265 | | /*****************************************************/ |
1266 | | /* Bipred prediction(Chroma) */ |
1267 | | /*****************************************************/ |
1268 | 434k | if(wp_flag) |
1269 | 0 | { |
1270 | 0 | WORD32 wgt0_cb, wgt1_cb, wgt0_cr, wgt1_cr; |
1271 | 0 | WORD32 off0_cb, off1_cb, off0_cr, off1_cr; |
1272 | 0 | WORD32 shift; |
1273 | | |
1274 | | /*****************************************************/ |
1275 | | /* Bi pred weighted prediction (Chroma) */ |
1276 | | /*****************************************************/ |
1277 | 0 | wgt0_cb = ps_ref_pic_l0->s_weight_offset.i2_cb_weight; |
1278 | 0 | off0_cb = ps_ref_pic_l0->s_weight_offset.i2_cb_offset; |
1279 | |
|
1280 | 0 | wgt0_cr = ps_ref_pic_l0->s_weight_offset.i2_cr_weight; |
1281 | 0 | off0_cr = ps_ref_pic_l0->s_weight_offset.i2_cr_offset; |
1282 | |
|
1283 | 0 | wgt1_cb = ps_ref_pic_l1->s_weight_offset.i2_cb_weight; |
1284 | 0 | off1_cb = ps_ref_pic_l1->s_weight_offset.i2_cb_offset; |
1285 | |
|
1286 | 0 | wgt1_cr = ps_ref_pic_l1->s_weight_offset.i2_cr_weight; |
1287 | 0 | off1_cr = ps_ref_pic_l1->s_weight_offset.i2_cr_offset; |
1288 | |
|
1289 | 0 | shift = ps_inter_pred_ctxt->i4_log2_chroma_wght_denom + SHIFT_14_MINUS_BIT_DEPTH + 1; |
1290 | |
|
1291 | 0 | ps_func_selector->ihevc_weighted_pred_chroma_bi_fptr( |
1292 | 0 | pi2_scr_buf_l0, |
1293 | 0 | pi2_scr_buf_l1, |
1294 | 0 | pu1_dst_buf, |
1295 | 0 | (pu_wd_chroma << 1), |
1296 | 0 | (pu_wd_chroma << 1), |
1297 | 0 | dst_stride, |
1298 | 0 | wgt0_cb, |
1299 | 0 | wgt0_cr, |
1300 | 0 | off0_cb, |
1301 | 0 | off0_cr, |
1302 | 0 | wgt1_cb, |
1303 | 0 | wgt1_cr, |
1304 | 0 | off1_cb, |
1305 | 0 | off1_cr, |
1306 | 0 | shift, |
1307 | 0 | 0, |
1308 | 0 | 0, |
1309 | 0 | pu_ht_chroma, |
1310 | 0 | pu_wd_chroma); |
1311 | 0 | } |
1312 | 434k | else |
1313 | 434k | { |
1314 | | /*****************************************************/ |
1315 | | /* Default Bi pred prediction (Chroma) */ |
1316 | | /*****************************************************/ |
1317 | 434k | ps_func_selector->ihevc_weighted_pred_chroma_bi_default_fptr( |
1318 | 434k | pi2_scr_buf_l0, |
1319 | 434k | pi2_scr_buf_l1, |
1320 | 434k | pu1_dst_buf, |
1321 | 434k | (pu_wd_chroma << 1), |
1322 | 434k | (pu_wd_chroma << 1), |
1323 | 434k | dst_stride, |
1324 | 434k | 0, |
1325 | 434k | 0, |
1326 | 434k | pu_ht_chroma, |
1327 | 434k | pu_wd_chroma); |
1328 | 434k | } |
1329 | 434k | } |
1330 | 434k | } |