/src/libhevc/decoder/ihevcd_inter_pred.c
Line | Count | Source |
1 | | /****************************************************************************** |
2 | | * |
3 | | * Copyright (C) 2012 Ittiam Systems Pvt Ltd, Bangalore |
4 | | * |
5 | | * Licensed under the Apache License, Version 2.0 (the "License"); |
6 | | * you may not use this file except in compliance with the License. |
7 | | * You may obtain a copy of the License at: |
8 | | * |
9 | | * http://www.apache.org/licenses/LICENSE-2.0 |
10 | | * |
11 | | * Unless required by applicable law or agreed to in writing, software |
12 | | * distributed under the License is distributed on an "AS IS" BASIS, |
13 | | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
14 | | * See the License for the specific language governing permissions and |
15 | | * limitations under the License. |
16 | | * |
17 | | ******************************************************************************/ |
18 | | /** |
19 | | ******************************************************************************* |
20 | | * @file |
21 | | * ihevc_inter_pred.c |
22 | | * |
23 | | * @brief |
24 | | * Calculates the prediction samples for a given cbt |
25 | | * |
26 | | * @author |
27 | | * Srinivas T |
28 | | * |
29 | | * @par List of Functions: |
30 | | * - ihevc_inter_pred() |
31 | | * |
32 | | * @remarks |
33 | | * None |
34 | | * |
35 | | ******************************************************************************* |
36 | | */ |
37 | | #include <stdio.h> |
38 | | #include <stddef.h> |
39 | | #include <stdlib.h> |
40 | | #include <string.h> |
41 | | #include <assert.h> |
42 | | |
43 | | #include "ihevc_typedefs.h" |
44 | | #include "iv.h" |
45 | | #include "ivd.h" |
46 | | #include "ihevcd_cxa.h" |
47 | | #include "ithread.h" |
48 | | |
49 | | #include "ihevc_defs.h" |
50 | | #include "ihevc_debug.h" |
51 | | #include "ihevc_structs.h" |
52 | | #include "ihevc_macros.h" |
53 | | #include "ihevc_platform_macros.h" |
54 | | #include "ihevc_cabac_tables.h" |
55 | | #include "ihevc_weighted_pred.h" |
56 | | |
57 | | #include "ihevc_error.h" |
58 | | #include "ihevc_common_tables.h" |
59 | | |
60 | | #include "ihevcd_trace.h" |
61 | | #include "ihevcd_defs.h" |
62 | | #include "ihevcd_function_selector.h" |
63 | | #include "ihevcd_structs.h" |
64 | | #include "ihevcd_error.h" |
65 | | #include "ihevcd_nal.h" |
66 | | #include "ihevcd_bitstream.h" |
67 | | #include "ihevcd_job_queue.h" |
68 | | #include "ihevcd_utils.h" |
69 | | |
70 | | #include "ihevc_inter_pred.h" |
71 | | #include "ihevcd_profile.h" |
72 | | |
73 | | /** |
74 | | ******************************************************************************* |
75 | | * |
76 | | * @brief |
77 | | * Inter prediction CTB level function |
78 | | * |
79 | | * @par Description: |
80 | | * For a given CTB, Inter prediction followed by weighted prediction is |
81 | | * done for all the PUs present in the CTB |
82 | | * |
83 | | * @param[in] ps_ctb |
84 | | * Pointer to the CTB context |
85 | | * |
86 | | * @returns |
87 | | * |
88 | | * @remarks |
89 | | * |
90 | | * |
91 | | ******************************************************************************* |
92 | | */ |
93 | | |
94 | | void ihevcd_inter_pred_ctb(process_ctxt_t *ps_proc) |
95 | 0 | { |
96 | 0 | UWORD8 *ref_pic_luma_l0, *ref_pic_chroma_l0; |
97 | 0 | UWORD8 *ref_pic_luma_l1, *ref_pic_chroma_l1; |
98 | |
|
99 | 0 | UWORD8 *ref_pic_l0 = NULL, *ref_pic_l1 = NULL; |
100 | |
|
101 | 0 | slice_header_t *ps_slice_hdr; |
102 | 0 | sps_t *ps_sps; |
103 | 0 | pps_t *ps_pps; |
104 | 0 | pu_t *ps_pu; |
105 | 0 | codec_t *ps_codec; |
106 | 0 | WORD32 pu_indx; |
107 | 0 | WORD32 pu_x, pu_y; |
108 | 0 | WORD32 pu_wd, pu_ht; |
109 | 0 | WORD32 i4_pu_cnt; |
110 | 0 | WORD32 cur_ctb_idx; |
111 | |
|
112 | 0 | WORD32 clr_indx; |
113 | 0 | WORD32 ntaps; |
114 | | |
115 | | |
116 | |
|
117 | 0 | WORD32 ai2_xint[2] = { 0, 0 }, ai2_yint[2] = { 0, 0 }; |
118 | 0 | WORD32 ai2_xfrac[2] = { 0, 0 }, ai2_yfrac[2] = { 0, 0 }; |
119 | |
|
120 | 0 | WORD32 weighted_pred, bi_pred; |
121 | |
|
122 | 0 | WORD32 ref_strd; |
123 | 0 | UWORD8 *pu1_dst_luma, *pu1_dst_chroma; |
124 | |
|
125 | 0 | UWORD8 *pu1_dst; |
126 | |
|
127 | 0 | WORD16 *pi2_tmp1, *pi2_tmp2; |
128 | |
|
129 | 0 | WORD32 luma_weight_l0, luma_weight_l1; |
130 | 0 | WORD32 chroma_weight_l0_cb, chroma_weight_l1_cb, chroma_weight_l0_cr, chroma_weight_l1_cr; |
131 | 0 | WORD32 luma_offset_l0, luma_offset_l1; |
132 | 0 | WORD32 chroma_offset_l0_cb, chroma_offset_l1_cb, chroma_offset_l0_cr, chroma_offset_l1_cr; |
133 | 0 | WORD32 shift, lvl_shift1, lvl_shift2; |
134 | |
|
135 | 0 | pf_inter_pred func_ptr1, func_ptr2, func_ptr3, func_ptr4; |
136 | 0 | WORD32 func_indx1, func_indx2, func_indx3, func_indx4; |
137 | 0 | void *func_src; |
138 | 0 | void *func_dst; |
139 | 0 | WORD32 func_src_strd; |
140 | 0 | WORD32 func_dst_strd; |
141 | 0 | WORD8 *func_coeff; |
142 | 0 | WORD32 func_wd; |
143 | 0 | WORD32 func_ht; |
144 | 0 | WORD32 next_ctb_idx; |
145 | 0 | WORD8(*coeff)[8]; |
146 | 0 | WORD32 chroma_yuv420sp_vu; |
147 | 0 | WORD32 num_comp; |
148 | 0 | WORD32 h_samp_factor, v_samp_factor; |
149 | 0 | WORD32 chroma_pixel_strd = 2; |
150 | 0 | WORD32 is_yuv420, is_yuv422, is_yuv444; |
151 | |
|
152 | 0 | PROFILE_DISABLE_INTER_PRED(); |
153 | 0 | ps_codec = ps_proc->ps_codec; |
154 | 0 | ps_slice_hdr = ps_proc->ps_slice_hdr; |
155 | 0 | ps_pps = ps_proc->ps_pps; |
156 | 0 | ps_sps = ps_proc->ps_sps; |
157 | 0 | cur_ctb_idx = ps_proc->i4_ctb_x |
158 | 0 | + ps_proc->i4_ctb_y * (ps_sps->i2_pic_wd_in_ctb); |
159 | | /* |
160 | | * In case of tiles, the next ctb belonging to the same tile must be used to get the PU index |
161 | | */ |
162 | |
|
163 | 0 | next_ctb_idx = ps_proc->i4_next_pu_ctb_cnt; |
164 | 0 | i4_pu_cnt = ps_proc->pu4_pic_pu_idx[next_ctb_idx] - ps_proc->pu4_pic_pu_idx[cur_ctb_idx]; |
165 | |
|
166 | 0 | ps_pu = ps_proc->ps_pu; |
167 | 0 | ref_strd = ps_codec->i4_strd; |
168 | 0 | pi2_tmp1 = ps_proc->pi2_inter_pred_tmp_buf1; |
169 | 0 | pi2_tmp2 = ps_proc->pi2_inter_pred_tmp_buf2; |
170 | 0 | pu1_dst_luma = ps_proc->pu1_cur_pic_luma; |
171 | 0 | pu1_dst_chroma = ps_proc->pu1_cur_pic_chroma; |
172 | |
|
173 | 0 | chroma_yuv420sp_vu = (ps_codec->e_ref_chroma_fmt == IV_YUV_420SP_VU); |
174 | |
|
175 | 0 | ASSERT(PSLICE == ps_slice_hdr->i1_slice_type || BSLICE == ps_slice_hdr->i1_slice_type); |
176 | |
|
177 | 0 | ref_pic_luma_l0 = NULL; |
178 | 0 | ref_pic_chroma_l0 = NULL; |
179 | |
|
180 | 0 | luma_weight_l0 = 0; |
181 | 0 | chroma_weight_l0_cb = 0; |
182 | 0 | chroma_weight_l0_cr = 0; |
183 | |
|
184 | 0 | luma_offset_l0 = 0; |
185 | 0 | chroma_offset_l0_cb = 0; |
186 | 0 | chroma_offset_l0_cr = 0; |
187 | |
|
188 | 0 | ref_pic_luma_l1 = NULL; |
189 | 0 | ref_pic_chroma_l1 = NULL; |
190 | |
|
191 | 0 | luma_weight_l1 = 0; |
192 | 0 | chroma_weight_l1_cb = 0; |
193 | 0 | chroma_weight_l1_cr = 0; |
194 | |
|
195 | 0 | luma_offset_l1 = 0; |
196 | 0 | chroma_offset_l1_cb = 0; |
197 | 0 | chroma_offset_l1_cr = 0; |
198 | |
|
199 | 0 | num_comp = ps_sps->i1_chroma_format_idc == CHROMA_FMT_IDC_MONOCHROME ? 1 : 2; |
200 | 0 | is_yuv420 = (CHROMA_FMT_IDC_YUV420 == ps_sps->i1_chroma_format_idc) ? 1 : 0; |
201 | 0 | is_yuv422 = (CHROMA_FMT_IDC_YUV422 == ps_sps->i1_chroma_format_idc) ? 1 : 0; |
202 | 0 | is_yuv444 = (CHROMA_FMT_IDC_YUV444 == ps_sps->i1_chroma_format_idc) ? 1 : 0; |
203 | 0 | h_samp_factor = is_yuv444 ? 1 : 2; |
204 | 0 | v_samp_factor = is_yuv420 ? 2 : 1; |
205 | |
|
206 | 0 | for(pu_indx = 0; pu_indx < i4_pu_cnt; pu_indx++, ps_pu++) |
207 | 0 | { |
208 | | /* If the PU is intra then proceed to the next */ |
209 | 0 | if(1 == ps_pu->b1_intra_flag) |
210 | 0 | continue; |
211 | 0 | pu_x = (ps_proc->i4_ctb_x << ps_sps->i1_log2_ctb_size) + (ps_pu->b4_pos_x << 2); |
212 | 0 | pu_y = (ps_proc->i4_ctb_y << ps_sps->i1_log2_ctb_size) + (ps_pu->b4_pos_y << 2); |
213 | |
|
214 | 0 | pu_wd = (ps_pu->b4_wd + 1) << 2; |
215 | 0 | pu_ht = (ps_pu->b4_ht + 1) << 2; |
216 | |
|
217 | 0 | weighted_pred = (ps_slice_hdr->i1_slice_type == PSLICE) ? ps_pps->i1_weighted_pred_flag : |
218 | 0 | ps_pps->i1_weighted_bipred_flag; |
219 | 0 | bi_pred = (ps_pu->b2_pred_mode == PRED_BI); |
220 | |
|
221 | 0 | if(ps_pu->b2_pred_mode != PRED_L1) |
222 | 0 | { |
223 | 0 | pic_buf_t *ps_pic_buf_l0; |
224 | |
|
225 | 0 | ps_pic_buf_l0 = (pic_buf_t *)((ps_slice_hdr->as_ref_pic_list0[ps_pu->mv.i1_l0_ref_idx].pv_pic_buf)); |
226 | |
|
227 | 0 | ref_pic_luma_l0 = ps_pic_buf_l0->pu1_luma; |
228 | |
|
229 | 0 | luma_weight_l0 = ps_slice_hdr->s_wt_ofst.i2_luma_weight_l0[ps_pu->mv.i1_l0_ref_idx]; |
230 | |
|
231 | 0 | luma_offset_l0 = ps_slice_hdr->s_wt_ofst.i2_luma_offset_l0[ps_pu->mv.i1_l0_ref_idx]; |
232 | |
|
233 | 0 | if(CHROMA_FMT_IDC_MONOCHROME != ps_sps->i1_chroma_format_idc) |
234 | 0 | { |
235 | 0 | ref_pic_chroma_l0 = ps_pic_buf_l0->pu1_chroma; |
236 | 0 | chroma_weight_l0_cb = ps_slice_hdr->s_wt_ofst.i2_chroma_weight_l0_cb[ps_pu->mv.i1_l0_ref_idx]; |
237 | 0 | chroma_weight_l0_cr = ps_slice_hdr->s_wt_ofst.i2_chroma_weight_l0_cr[ps_pu->mv.i1_l0_ref_idx]; |
238 | |
|
239 | 0 | chroma_offset_l0_cb = ps_slice_hdr->s_wt_ofst.i2_chroma_offset_l0_cb[ps_pu->mv.i1_l0_ref_idx]; |
240 | 0 | chroma_offset_l0_cr = ps_slice_hdr->s_wt_ofst.i2_chroma_offset_l0_cr[ps_pu->mv.i1_l0_ref_idx]; |
241 | 0 | } |
242 | 0 | } |
243 | |
|
244 | 0 | if(ps_pu->b2_pred_mode != PRED_L0) |
245 | 0 | { |
246 | 0 | pic_buf_t *ps_pic_buf_l1; |
247 | 0 | ps_pic_buf_l1 = (pic_buf_t *)((ps_slice_hdr->as_ref_pic_list1[ps_pu->mv.i1_l1_ref_idx].pv_pic_buf)); |
248 | 0 | ref_pic_luma_l1 = ps_pic_buf_l1->pu1_luma; |
249 | |
|
250 | 0 | luma_weight_l1 = ps_slice_hdr->s_wt_ofst.i2_luma_weight_l1[ps_pu->mv.i1_l1_ref_idx]; |
251 | |
|
252 | 0 | luma_offset_l1 = ps_slice_hdr->s_wt_ofst.i2_luma_offset_l1[ps_pu->mv.i1_l1_ref_idx]; |
253 | |
|
254 | 0 | if(CHROMA_FMT_IDC_MONOCHROME != ps_sps->i1_chroma_format_idc) |
255 | 0 | { |
256 | 0 | ref_pic_chroma_l1 = ps_pic_buf_l1->pu1_chroma; |
257 | 0 | chroma_weight_l1_cb = ps_slice_hdr->s_wt_ofst.i2_chroma_weight_l1_cb[ps_pu->mv.i1_l1_ref_idx]; |
258 | 0 | chroma_weight_l1_cr = ps_slice_hdr->s_wt_ofst.i2_chroma_weight_l1_cr[ps_pu->mv.i1_l1_ref_idx]; |
259 | |
|
260 | 0 | chroma_offset_l1_cb = ps_slice_hdr->s_wt_ofst.i2_chroma_offset_l1_cb[ps_pu->mv.i1_l1_ref_idx]; |
261 | 0 | chroma_offset_l1_cr = ps_slice_hdr->s_wt_ofst.i2_chroma_offset_l1_cr[ps_pu->mv.i1_l1_ref_idx]; |
262 | 0 | } |
263 | 0 | } |
264 | | |
265 | | /*luma and chroma components*/ |
266 | 0 | for(clr_indx = 0; clr_indx < num_comp; clr_indx++) |
267 | 0 | { |
268 | 0 | PROFILE_DISABLE_INTER_PRED_LUMA(clr_indx); |
269 | 0 | PROFILE_DISABLE_INTER_PRED_CHROMA(clr_indx); |
270 | |
|
271 | 0 | if(clr_indx == 0) |
272 | 0 | { |
273 | 0 | WORD32 mv; |
274 | 0 | if(ps_pu->b2_pred_mode != PRED_L1) |
275 | 0 | { |
276 | 0 | mv = CLIP3(ps_pu->mv.s_l0_mv.i2_mvx, (-((MAX_CTB_SIZE + pu_x + 7) << 2)), ((ps_sps->i2_pic_width_in_luma_samples - pu_x + 7) << 2)); |
277 | 0 | ai2_xint[0] = pu_x + (mv >> 2); |
278 | 0 | ai2_xfrac[0] = mv & 3; |
279 | |
|
280 | 0 | mv = CLIP3(ps_pu->mv.s_l0_mv.i2_mvy, (-((MAX_CTB_SIZE + pu_y + 7) << 2)), ((ps_sps->i2_pic_height_in_luma_samples - pu_y + 7) << 2)); |
281 | 0 | ai2_yint[0] = pu_y + (mv >> 2); |
282 | 0 | ai2_yfrac[0] = mv & 3; |
283 | |
|
284 | 0 | ai2_xfrac[0] &= ps_codec->i4_mv_frac_mask; |
285 | 0 | ai2_yfrac[0] &= ps_codec->i4_mv_frac_mask; |
286 | | |
287 | |
|
288 | 0 | ref_pic_l0 = ref_pic_luma_l0 + ai2_yint[0] * ref_strd |
289 | 0 | + ai2_xint[0]; |
290 | 0 | } |
291 | |
|
292 | 0 | if(ps_pu->b2_pred_mode != PRED_L0) |
293 | 0 | { |
294 | |
|
295 | 0 | mv = CLIP3(ps_pu->mv.s_l1_mv.i2_mvx, (-((MAX_CTB_SIZE + pu_x + 7) << 2)), ((ps_sps->i2_pic_width_in_luma_samples - pu_x + 7) << 2)); |
296 | 0 | ai2_xint[1] = pu_x + (mv >> 2); |
297 | 0 | ai2_xfrac[1] = mv & 3; |
298 | |
|
299 | 0 | mv = CLIP3(ps_pu->mv.s_l1_mv.i2_mvy, (-((MAX_CTB_SIZE + pu_y + 7) << 2)), ((ps_sps->i2_pic_height_in_luma_samples - pu_y + 7) << 2)); |
300 | 0 | ai2_yint[1] = pu_y + (mv >> 2); |
301 | 0 | ai2_yfrac[1] = mv & 3; |
302 | |
|
303 | 0 | ref_pic_l1 = ref_pic_luma_l1 + ai2_yint[1] * ref_strd |
304 | 0 | + ai2_xint[1]; |
305 | 0 | ai2_xfrac[1] &= ps_codec->i4_mv_frac_mask; |
306 | 0 | ai2_yfrac[1] &= ps_codec->i4_mv_frac_mask; |
307 | |
|
308 | 0 | } |
309 | |
|
310 | 0 | pu1_dst = pu1_dst_luma + pu_y * ref_strd + pu_x; |
311 | |
|
312 | 0 | ntaps = NTAPS_LUMA; |
313 | 0 | coeff = gai1_ihevc_luma_filter; |
314 | 0 | } |
315 | | |
316 | 0 | else |
317 | 0 | { |
318 | 0 | WORD32 mv; |
319 | | /* xint is upshifted by 1 because the chroma components are */ |
320 | | /* interleaved which is not the assumption made by standard */ |
321 | 0 | if(ps_pu->b2_pred_mode != PRED_L1) |
322 | 0 | { |
323 | 0 | mv = CLIP3(ps_pu->mv.s_l0_mv.i2_mvx, (-((MAX_CTB_SIZE + pu_x + 7) << 2)), ((ps_sps->i2_pic_width_in_luma_samples - pu_x + 7) << 2)); |
324 | 0 | ai2_xint[0] = (pu_x * chroma_pixel_strd / h_samp_factor) + (mv >> (2 + h_samp_factor - 1)) * chroma_pixel_strd; |
325 | 0 | ai2_xfrac[0] = mv & (is_yuv444 ? 3 : 7); |
326 | |
|
327 | 0 | mv = CLIP3(ps_pu->mv.s_l0_mv.i2_mvy, (-((MAX_CTB_SIZE + pu_y + 7) << 2)), ((ps_sps->i2_pic_height_in_luma_samples - pu_y + 7) << 2)); |
328 | 0 | ai2_yint[0] = ((pu_y / v_samp_factor) + (mv >> (2 + v_samp_factor - 1))); |
329 | 0 | ai2_yfrac[0] = mv & (is_yuv420 ? 7 : 3); |
330 | |
|
331 | 0 | ref_pic_l0 = ref_pic_chroma_l0 + ai2_yint[0] * (ref_strd * chroma_pixel_strd / h_samp_factor) + ai2_xint[0]; |
332 | |
|
333 | 0 | ai2_xfrac[0] &= ps_codec->i4_mv_frac_mask; |
334 | 0 | ai2_yfrac[0] &= ps_codec->i4_mv_frac_mask; |
335 | |
|
336 | 0 | } |
337 | |
|
338 | 0 | if(ps_pu->b2_pred_mode != PRED_L0) |
339 | 0 | { |
340 | 0 | mv = CLIP3(ps_pu->mv.s_l1_mv.i2_mvx, (-((MAX_CTB_SIZE + pu_x + 7) << 2)), ((ps_sps->i2_pic_width_in_luma_samples - pu_x + 7) << 2)); |
341 | 0 | ai2_xint[1] = (pu_x * chroma_pixel_strd / h_samp_factor) + (mv >> (2 + h_samp_factor - 1)) * chroma_pixel_strd; |
342 | 0 | ai2_xfrac[1] = mv & (is_yuv444 ? 3 : 7); |
343 | |
|
344 | 0 | mv = CLIP3(ps_pu->mv.s_l1_mv.i2_mvy, (-((MAX_CTB_SIZE + pu_y + 7) << 2)), ((ps_sps->i2_pic_height_in_luma_samples - pu_y + 7) << 2)); |
345 | 0 | ai2_yint[1] = ((pu_y / v_samp_factor) + (mv >> (2 + v_samp_factor - 1))); |
346 | 0 | ai2_yfrac[1] = mv & (is_yuv420 ? 7 : 3); |
347 | |
|
348 | 0 | ref_pic_l1 = ref_pic_chroma_l1 + ai2_yint[1] * (ref_strd * chroma_pixel_strd / h_samp_factor) + ai2_xint[1]; |
349 | |
|
350 | 0 | ai2_xfrac[1] &= ps_codec->i4_mv_frac_mask; |
351 | 0 | ai2_yfrac[1] &= ps_codec->i4_mv_frac_mask; |
352 | |
|
353 | 0 | } |
354 | |
|
355 | 0 | pu1_dst = pu1_dst_chroma + (pu_y / v_samp_factor) * (ref_strd * chroma_pixel_strd / h_samp_factor) + |
356 | 0 | (pu_x * chroma_pixel_strd / h_samp_factor); |
357 | |
|
358 | 0 | ntaps = NTAPS_CHROMA; |
359 | 0 | coeff = gai1_ihevc_chroma_filter; |
360 | 0 | } |
361 | |
|
362 | 0 | if(ps_pu->b2_pred_mode != PRED_L1) |
363 | 0 | { |
364 | 0 | func_indx1 = 4 * (weighted_pred || bi_pred) + 1 + 11 * clr_indx; |
365 | 0 | func_indx1 += ai2_xfrac[0] ? 2 : 0; |
366 | 0 | func_indx1 += ai2_yfrac[0] ? 1 : 0; |
367 | |
|
368 | 0 | func_indx2 = (ai2_xfrac[0] && ai2_yfrac[0]) |
369 | 0 | * (9 + (weighted_pred || bi_pred)) + 11 * clr_indx; |
370 | |
|
371 | 0 | func_ptr1 = ps_codec->apf_inter_pred[func_indx1]; |
372 | 0 | func_ptr2 = ps_codec->apf_inter_pred[func_indx2]; |
373 | 0 | } |
374 | 0 | else |
375 | 0 | { |
376 | 0 | func_ptr1 = NULL; |
377 | 0 | func_ptr2 = NULL; |
378 | 0 | } |
379 | 0 | if(ps_pu->b2_pred_mode != PRED_L0) |
380 | 0 | { |
381 | 0 | func_indx3 = 4 * (weighted_pred || bi_pred) + 1 + 11 * clr_indx; |
382 | 0 | func_indx3 += ai2_xfrac[1] ? 2 : 0; |
383 | 0 | func_indx3 += ai2_yfrac[1] ? 1 : 0; |
384 | |
|
385 | 0 | func_indx4 = (ai2_xfrac[1] && ai2_yfrac[1]) |
386 | 0 | * (9 + (weighted_pred || bi_pred)) + 11 * clr_indx; |
387 | |
|
388 | 0 | func_ptr3 = ps_codec->apf_inter_pred[func_indx3]; |
389 | 0 | func_ptr4 = ps_codec->apf_inter_pred[func_indx4]; |
390 | 0 | } |
391 | 0 | else |
392 | 0 | { |
393 | 0 | func_ptr3 = NULL; |
394 | 0 | func_ptr4 = NULL; |
395 | 0 | } |
396 | | |
397 | | /*Function 1*/ |
398 | 0 | if(func_ptr1 != NULL) |
399 | 0 | { |
400 | 0 | func_src_strd = ref_strd; |
401 | 0 | if (clr_indx != 0) |
402 | 0 | { |
403 | 0 | func_src_strd *= (chroma_pixel_strd / h_samp_factor); |
404 | 0 | } |
405 | 0 | func_src = (ai2_xfrac[0] && ai2_yfrac[0]) ? |
406 | 0 | ref_pic_l0 - (ntaps / 2 - 1) * func_src_strd : |
407 | 0 | ref_pic_l0; |
408 | 0 | func_dst = (weighted_pred || bi_pred) ? |
409 | 0 | (void *)pi2_tmp1 : (void *)pu1_dst; |
410 | 0 | if(ai2_xfrac[0] && ai2_yfrac[0]) |
411 | 0 | { |
412 | 0 | func_dst = pi2_tmp1; |
413 | 0 | } |
414 | |
|
415 | 0 | func_dst_strd = (weighted_pred || bi_pred |
416 | 0 | || (ai2_xfrac[0] && ai2_yfrac[0])) ? |
417 | 0 | pu_wd : ref_strd; |
418 | 0 | if (clr_indx != 0) |
419 | 0 | { |
420 | 0 | func_dst_strd *= (chroma_pixel_strd / h_samp_factor); |
421 | 0 | } |
422 | 0 | func_coeff = ai2_xfrac[0] ? coeff[ai2_xfrac[0] << (is_yuv444 ? clr_indx : 0)] |
423 | 0 | : coeff[ai2_yfrac[0] << (is_yuv420 ? 0 : clr_indx)]; |
424 | 0 | func_wd = pu_wd >> (is_yuv444 ? 0 : clr_indx); |
425 | 0 | func_ht = pu_ht >> (is_yuv420 ? clr_indx : 0); |
426 | 0 | func_ht += (ai2_xfrac[0] && ai2_yfrac[0]) ? ntaps - 1 : 0; |
427 | 0 | func_ptr1(func_src, func_dst, func_src_strd, func_dst_strd, |
428 | 0 | func_coeff, func_ht, func_wd); |
429 | 0 | } |
430 | | |
431 | | /*Function 2*/ |
432 | 0 | if(func_ptr2 != NULL) |
433 | 0 | { |
434 | 0 | func_src_strd = pu_wd; |
435 | 0 | if (clr_indx != 0) |
436 | 0 | { |
437 | 0 | func_src_strd *= (chroma_pixel_strd / h_samp_factor); |
438 | 0 | } |
439 | 0 | func_src = pi2_tmp1 + (ntaps / 2 - 1) * func_src_strd; |
440 | 0 | func_dst = (weighted_pred || bi_pred) ? |
441 | 0 | (void *)pi2_tmp1 : (void *)pu1_dst; |
442 | |
|
443 | 0 | func_dst_strd = (weighted_pred || bi_pred) ? |
444 | 0 | pu_wd : ref_strd; |
445 | 0 | if (clr_indx != 0) |
446 | 0 | { |
447 | 0 | func_dst_strd *= (chroma_pixel_strd / h_samp_factor); |
448 | 0 | } |
449 | 0 | func_coeff = coeff[ai2_yfrac[0] << (is_yuv420 ? 0 : clr_indx)]; |
450 | 0 | func_wd = pu_wd >> (is_yuv444 ? 0 : clr_indx); |
451 | 0 | func_ht = pu_ht >> (is_yuv420 ? clr_indx : 0); |
452 | 0 | func_ptr2(func_src, func_dst, func_src_strd, func_dst_strd, |
453 | 0 | func_coeff, func_ht, func_wd); |
454 | 0 | } |
455 | |
|
456 | 0 | if(func_ptr3 != NULL) |
457 | 0 | { |
458 | 0 | func_src_strd = ref_strd; |
459 | 0 | if (clr_indx != 0) |
460 | 0 | { |
461 | 0 | func_src_strd *= (chroma_pixel_strd / h_samp_factor); |
462 | 0 | } |
463 | 0 | func_src = (ai2_xfrac[1] && ai2_yfrac[1]) ? |
464 | 0 | ref_pic_l1 - (ntaps / 2 - 1) * func_src_strd : |
465 | 0 | ref_pic_l1; |
466 | |
|
467 | 0 | func_dst = (weighted_pred || bi_pred) ? |
468 | 0 | (void *)pi2_tmp2 : (void *)pu1_dst; |
469 | 0 | if(ai2_xfrac[1] && ai2_yfrac[1]) |
470 | 0 | { |
471 | 0 | func_dst = pi2_tmp2; |
472 | 0 | } |
473 | 0 | func_dst_strd = (weighted_pred || bi_pred |
474 | 0 | || (ai2_xfrac[1] && ai2_yfrac[1])) ? |
475 | 0 | pu_wd : ref_strd; |
476 | 0 | if (clr_indx != 0) |
477 | 0 | { |
478 | 0 | func_dst_strd *= (chroma_pixel_strd / h_samp_factor); |
479 | 0 | } |
480 | 0 | func_coeff = ai2_xfrac[1] ? coeff[ai2_xfrac[1] << (is_yuv444 ? clr_indx : 0)] |
481 | 0 | : coeff[ai2_yfrac[1] << (is_yuv420 ? 0 : clr_indx)]; |
482 | 0 | func_wd = pu_wd >> (is_yuv444 ? 0 : clr_indx); |
483 | 0 | func_ht = pu_ht >> (is_yuv420 ? clr_indx : 0); |
484 | |
|
485 | 0 | func_ht += (ai2_xfrac[1] && ai2_yfrac[1]) ? ntaps - 1 : 0; |
486 | 0 | func_ptr3(func_src, func_dst, func_src_strd, func_dst_strd, |
487 | 0 | func_coeff, func_ht, func_wd); |
488 | | |
489 | |
|
490 | 0 | } |
491 | |
|
492 | 0 | if(func_ptr4 != NULL) |
493 | 0 | { |
494 | 0 | func_src_strd = pu_wd; |
495 | 0 | if (clr_indx != 0) |
496 | 0 | { |
497 | 0 | func_src_strd *= (chroma_pixel_strd / h_samp_factor); |
498 | 0 | } |
499 | 0 | func_src = pi2_tmp2 + (ntaps / 2 - 1) * func_src_strd; |
500 | |
|
501 | 0 | func_dst = (weighted_pred || bi_pred) ? |
502 | 0 | (void *)pi2_tmp2 : (void *)pu1_dst; |
503 | 0 | func_dst_strd = (weighted_pred || bi_pred) ? |
504 | 0 | pu_wd : ref_strd; |
505 | 0 | if (clr_indx != 0) |
506 | 0 | { |
507 | 0 | func_dst_strd *= (chroma_pixel_strd / h_samp_factor); |
508 | 0 | } |
509 | 0 | func_coeff = coeff[ai2_yfrac[1] << (is_yuv420 ? 0 : clr_indx)]; |
510 | 0 | func_wd = pu_wd >> (is_yuv444 ? 0 : clr_indx); |
511 | 0 | func_ht = pu_ht >> (is_yuv420 ? clr_indx : 0); |
512 | 0 | func_ptr4(func_src, func_dst, func_src_strd, func_dst_strd, |
513 | 0 | func_coeff, func_ht, func_wd); |
514 | |
|
515 | 0 | } |
516 | |
|
517 | 0 | PROFILE_DISABLE_INTER_PRED_LUMA_AVERAGING(clr_indx); |
518 | 0 | PROFILE_DISABLE_INTER_PRED_CHROMA_AVERAGING(clr_indx); |
519 | | |
520 | |
|
521 | 0 | if((weighted_pred != 0) && (bi_pred != 0)) |
522 | 0 | { |
523 | 0 | lvl_shift1 = 0; |
524 | 0 | lvl_shift2 = 0; |
525 | 0 | if((0 == clr_indx) && (ai2_xfrac[0] && ai2_yfrac[0])) |
526 | 0 | lvl_shift1 = (1 << 13); |
527 | |
|
528 | 0 | if((0 == clr_indx) && (ai2_xfrac[1] && ai2_yfrac[1])) |
529 | 0 | lvl_shift2 = (1 << 13); |
530 | | |
531 | |
|
532 | 0 | if(0 == clr_indx) |
533 | 0 | { |
534 | 0 | shift = ps_slice_hdr->s_wt_ofst.i1_luma_log2_weight_denom |
535 | 0 | + SHIFT_14_MINUS_BIT_DEPTH + 1; |
536 | |
|
537 | 0 | ps_codec->s_func_selector.ihevc_weighted_pred_bi_fptr(pi2_tmp1, |
538 | 0 | pi2_tmp2, |
539 | 0 | pu1_dst, |
540 | 0 | pu_wd, |
541 | 0 | pu_wd, |
542 | 0 | ref_strd, |
543 | 0 | luma_weight_l0, |
544 | 0 | luma_offset_l0, |
545 | 0 | luma_weight_l1, |
546 | 0 | luma_offset_l1, |
547 | 0 | shift, |
548 | 0 | lvl_shift1, |
549 | 0 | lvl_shift2, |
550 | 0 | pu_ht, |
551 | 0 | pu_wd); |
552 | 0 | } |
553 | 0 | else |
554 | 0 | { |
555 | 0 | shift = ps_slice_hdr->s_wt_ofst.i1_chroma_log2_weight_denom |
556 | 0 | + SHIFT_14_MINUS_BIT_DEPTH + 1; |
557 | 0 | func_src_strd = pu_wd * (chroma_pixel_strd / h_samp_factor); |
558 | 0 | func_dst_strd = ref_strd * (chroma_pixel_strd / h_samp_factor); |
559 | |
|
560 | 0 | if(chroma_yuv420sp_vu) |
561 | 0 | { |
562 | 0 | ps_codec->s_func_selector.ihevc_weighted_pred_chroma_bi_fptr(pi2_tmp1, |
563 | 0 | pi2_tmp2, |
564 | 0 | pu1_dst, |
565 | 0 | func_src_strd, |
566 | 0 | func_src_strd, |
567 | 0 | func_dst_strd, |
568 | 0 | chroma_weight_l0_cr, |
569 | 0 | chroma_weight_l0_cb, |
570 | 0 | chroma_offset_l0_cr, |
571 | 0 | chroma_offset_l0_cb, |
572 | 0 | chroma_weight_l1_cr, |
573 | 0 | chroma_weight_l1_cb, |
574 | 0 | chroma_offset_l1_cr, |
575 | 0 | chroma_offset_l1_cb, |
576 | 0 | shift, |
577 | 0 | lvl_shift1, |
578 | 0 | lvl_shift2, |
579 | 0 | pu_ht >> (is_yuv420 ? clr_indx : 0), |
580 | 0 | pu_wd >> (is_yuv444 ? 0 : clr_indx)); |
581 | 0 | } |
582 | 0 | else |
583 | 0 | { |
584 | 0 | ps_codec->s_func_selector.ihevc_weighted_pred_chroma_bi_fptr(pi2_tmp1, |
585 | 0 | pi2_tmp2, |
586 | 0 | pu1_dst, |
587 | 0 | func_src_strd, |
588 | 0 | func_src_strd, |
589 | 0 | func_dst_strd, |
590 | 0 | chroma_weight_l0_cb, |
591 | 0 | chroma_weight_l0_cr, |
592 | 0 | chroma_offset_l0_cb, |
593 | 0 | chroma_offset_l0_cr, |
594 | 0 | chroma_weight_l1_cb, |
595 | 0 | chroma_weight_l1_cr, |
596 | 0 | chroma_offset_l1_cb, |
597 | 0 | chroma_offset_l1_cr, |
598 | 0 | shift, |
599 | 0 | lvl_shift1, |
600 | 0 | lvl_shift2, |
601 | 0 | pu_ht >> (is_yuv420 ? clr_indx : 0), |
602 | 0 | pu_wd >> (is_yuv444 ? 0 : clr_indx)); |
603 | 0 | } |
604 | 0 | } |
605 | 0 | } |
606 | | |
607 | 0 | else if((weighted_pred != 0) && (bi_pred == 0)) |
608 | 0 | { |
609 | 0 | lvl_shift1 = 0; |
610 | 0 | if(ps_pu->b2_pred_mode == PRED_L0) |
611 | 0 | { |
612 | 0 | if((0 == clr_indx) && (ai2_xfrac[0] && ai2_yfrac[0])) |
613 | 0 | lvl_shift1 = (1 << 13); |
614 | 0 | } |
615 | 0 | else |
616 | 0 | { |
617 | 0 | if((0 == clr_indx) && (ai2_xfrac[1] && ai2_yfrac[1])) |
618 | 0 | lvl_shift1 = (1 << 13); |
619 | 0 | } |
620 | |
|
621 | 0 | if(0 == clr_indx) |
622 | 0 | { |
623 | 0 | shift = ps_slice_hdr->s_wt_ofst.i1_luma_log2_weight_denom |
624 | 0 | + SHIFT_14_MINUS_BIT_DEPTH; |
625 | |
|
626 | 0 | ps_codec->s_func_selector.ihevc_weighted_pred_uni_fptr(ps_pu->b2_pred_mode == PRED_L0 ? pi2_tmp1 : pi2_tmp2, |
627 | 0 | pu1_dst, |
628 | 0 | pu_wd, |
629 | 0 | ref_strd, |
630 | 0 | ps_pu->b2_pred_mode == PRED_L0 ? luma_weight_l0 : luma_weight_l1, |
631 | 0 | ps_pu->b2_pred_mode == PRED_L0 ? luma_offset_l0 : luma_offset_l1, |
632 | 0 | shift, |
633 | 0 | lvl_shift1, |
634 | 0 | pu_ht, |
635 | 0 | pu_wd); |
636 | 0 | } |
637 | 0 | else |
638 | 0 | { |
639 | 0 | shift = ps_slice_hdr->s_wt_ofst.i1_chroma_log2_weight_denom |
640 | 0 | + SHIFT_14_MINUS_BIT_DEPTH; |
641 | 0 | func_src_strd = pu_wd * (chroma_pixel_strd / h_samp_factor); |
642 | 0 | func_dst_strd = ref_strd * (chroma_pixel_strd / h_samp_factor); |
643 | |
|
644 | 0 | if(chroma_yuv420sp_vu) |
645 | 0 | { |
646 | 0 | ps_codec->s_func_selector.ihevc_weighted_pred_chroma_uni_fptr(ps_pu->b2_pred_mode == PRED_L0 ? pi2_tmp1 : pi2_tmp2, |
647 | 0 | pu1_dst, |
648 | 0 | func_src_strd, |
649 | 0 | func_dst_strd, |
650 | 0 | ps_pu->b2_pred_mode == PRED_L0 ? chroma_weight_l0_cr : chroma_weight_l1_cr, |
651 | 0 | ps_pu->b2_pred_mode == PRED_L0 ? chroma_weight_l0_cb : chroma_weight_l1_cb, |
652 | 0 | ps_pu->b2_pred_mode == PRED_L0 ? chroma_offset_l0_cr : chroma_offset_l1_cr, |
653 | 0 | ps_pu->b2_pred_mode == PRED_L0 ? chroma_offset_l0_cb : chroma_offset_l1_cb, |
654 | 0 | shift, |
655 | 0 | lvl_shift1, |
656 | 0 | pu_ht >> (is_yuv420 ? clr_indx : 0), |
657 | 0 | pu_wd >> (is_yuv444 ? 0 : clr_indx)); |
658 | 0 | } |
659 | 0 | else |
660 | 0 | { |
661 | 0 | ps_codec->s_func_selector.ihevc_weighted_pred_chroma_uni_fptr(ps_pu->b2_pred_mode == PRED_L0 ? pi2_tmp1 : pi2_tmp2, |
662 | 0 | pu1_dst, |
663 | 0 | func_src_strd, |
664 | 0 | func_dst_strd, |
665 | 0 | ps_pu->b2_pred_mode == PRED_L0 ? chroma_weight_l0_cb : chroma_weight_l1_cb, |
666 | 0 | ps_pu->b2_pred_mode == PRED_L0 ? chroma_weight_l0_cr : chroma_weight_l1_cr, |
667 | 0 | ps_pu->b2_pred_mode == PRED_L0 ? chroma_offset_l0_cb : chroma_offset_l1_cb, |
668 | 0 | ps_pu->b2_pred_mode == PRED_L0 ? chroma_offset_l0_cr : chroma_offset_l1_cr, |
669 | 0 | shift, |
670 | 0 | lvl_shift1, |
671 | 0 | pu_ht >> (is_yuv420 ? clr_indx : 0), |
672 | 0 | pu_wd >> (is_yuv444 ? 0 : clr_indx)); |
673 | 0 | } |
674 | 0 | } |
675 | 0 | } |
676 | | |
677 | 0 | else if((weighted_pred == 0) && (bi_pred != 0)) |
678 | 0 | { |
679 | 0 | lvl_shift1 = 0; |
680 | 0 | lvl_shift2 = 0; |
681 | |
|
682 | 0 | if((0 == clr_indx) && (ai2_xfrac[0] && ai2_yfrac[0])) |
683 | 0 | lvl_shift1 = (1 << 13); |
684 | |
|
685 | 0 | if((0 == clr_indx) && (ai2_xfrac[1] && ai2_yfrac[1])) |
686 | 0 | lvl_shift2 = (1 << 13); |
687 | |
|
688 | 0 | func_src_strd = pu_wd; |
689 | 0 | func_dst_strd = ref_strd; |
690 | 0 | if (clr_indx != 0) |
691 | 0 | { |
692 | 0 | func_src_strd *= (chroma_pixel_strd / h_samp_factor); |
693 | 0 | func_dst_strd *= (chroma_pixel_strd / h_samp_factor); |
694 | 0 | } |
695 | 0 | func_ht = pu_ht >> (is_yuv420 ? clr_indx : 0); |
696 | 0 | func_wd = pu_wd << (is_yuv444 ? clr_indx : 0); |
697 | |
|
698 | 0 | ps_codec->s_func_selector.ihevc_weighted_pred_bi_default_fptr(pi2_tmp1, |
699 | 0 | pi2_tmp2, |
700 | 0 | pu1_dst, |
701 | 0 | func_src_strd, |
702 | 0 | func_src_strd, |
703 | 0 | func_dst_strd, |
704 | 0 | lvl_shift1, |
705 | 0 | lvl_shift2, |
706 | 0 | func_ht, |
707 | 0 | func_wd); |
708 | |
|
709 | 0 | } |
710 | 0 | } |
711 | 0 | } |
712 | 0 | } |