/src/libavc/encoder/ih264e_mc.c
Line | Count | Source (jump to first uncovered line) |
1 | | /****************************************************************************** |
2 | | * |
3 | | * Copyright (C) 2015 The Android Open Source Project |
4 | | * |
5 | | * Licensed under the Apache License, Version 2.0 (the "License"); |
6 | | * you may not use this file except in compliance with the License. |
7 | | * You may obtain a copy of the License at: |
8 | | * |
9 | | * http://www.apache.org/licenses/LICENSE-2.0 |
10 | | * |
11 | | * Unless required by applicable law or agreed to in writing, software |
12 | | * distributed under the License is distributed on an "AS IS" BASIS, |
13 | | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
14 | | * See the License for the specific language governing permissions and |
15 | | * limitations under the License. |
16 | | * |
17 | | ***************************************************************************** |
18 | | * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore |
19 | | */ |
20 | | |
21 | | /** |
22 | | ******************************************************************************* |
23 | | * @file |
24 | | * ih264e_mc.c |
25 | | * |
26 | | * @brief |
27 | | * Contains definition of functions for motion compensation |
28 | | * |
29 | | * @author |
30 | | * ittiam |
31 | | * |
32 | | * @par List of Functions: |
33 | | * - ih264e_motion_comp_luma |
34 | | * - ih264e_motion_comp_chroma |
35 | | * |
36 | | * @remarks |
37 | | * none |
38 | | * |
39 | | ******************************************************************************* |
40 | | */ |
41 | | |
42 | | /*****************************************************************************/ |
43 | | /* File Includes */ |
44 | | /*****************************************************************************/ |
45 | | |
46 | | /* System Include Files */ |
47 | | #include <stdio.h> |
48 | | |
49 | | /* User Include Files */ |
50 | | #include "ih264_typedefs.h" |
51 | | #include "iv2.h" |
52 | | #include "ive2.h" |
53 | | |
54 | | #include "ih264_defs.h" |
55 | | #include "ih264_mem_fns.h" |
56 | | #include "ih264_padding.h" |
57 | | #include "ih264_structs.h" |
58 | | #include "ih264_trans_quant_itrans_iquant.h" |
59 | | #include "ih264_inter_pred_filters.h" |
60 | | #include "ih264_intra_pred_filters.h" |
61 | | #include "ih264_deblk_edge_filters.h" |
62 | | #include "ih264_cabac_tables.h" |
63 | | |
64 | | #include "ime_defs.h" |
65 | | #include "ime_distortion_metrics.h" |
66 | | #include "ime_structs.h" |
67 | | |
68 | | #include "irc_cntrl_param.h" |
69 | | #include "irc_frame_info_collector.h" |
70 | | |
71 | | #include "ih264e_error.h" |
72 | | #include "ih264e_defs.h" |
73 | | #include "ih264e_rate_control.h" |
74 | | #include "ih264e_bitstream.h" |
75 | | #include "ih264e_cabac_structs.h" |
76 | | #include "ih264e_structs.h" |
77 | | #include "ih264e_mc.h" |
78 | | #include "ih264e_half_pel.h" |
79 | | |
80 | | |
81 | | /*****************************************************************************/ |
82 | | /* Function Definitions */ |
83 | | /*****************************************************************************/ |
84 | | |
85 | | /** |
86 | | ****************************************************************************** |
87 | | * |
88 | | * @brief |
89 | | * performs motion compensation for a luma mb for the given mv. |
90 | | * |
91 | | * @par Description |
92 | | * This routine performs motion compensation of an inter mb. When the inter |
93 | | * mb mode is P16x16, there is no need to copy 16x16 unit from reference buffer |
94 | | * to pred buffer. In this case the function returns pointer and stride of the |
95 | | * ref. buffer and this info is used in place of pred buffer else where. |
96 | | * In other cases, the pred buffer is populated via copy / filtering + copy |
97 | | * (q pel cases) and returned. |
98 | | * |
99 | | * @param[in] ps_proc |
100 | | * pointer to current proc ctxt |
101 | | * |
102 | | * @param[out] pu1_pseudo_pred |
103 | | * pseudo prediction buffer |
104 | | * |
105 | | * @param[out] u4_pseudo_pred_strd |
106 | | * pseudo pred buffer stride |
107 | | * |
108 | | * @return none |
109 | | * |
110 | | * @remarks Assumes half pel buffers for the entire frame are populated. |
111 | | * |
112 | | ****************************************************************************** |
113 | | */ |
114 | | void ih264e_motion_comp_luma(process_ctxt_t *ps_proc, UWORD8 **pu1_pseudo_pred, |
115 | | WORD32 *pi4_pseudo_pred_strd) |
116 | 0 | { |
117 | | /* codec context */ |
118 | 0 | codec_t *ps_codec = ps_proc->ps_codec; |
119 | | |
120 | | /* me ctxt */ |
121 | 0 | me_ctxt_t *ps_me_ctxt = &ps_proc->s_me_ctxt; |
122 | | |
123 | | /* Pointer to the structure having motion vectors, size and position of curr partitions */ |
124 | 0 | enc_pu_t *ps_curr_pu; |
125 | | |
126 | | /* pointers to full pel, half pel x, half pel y, half pel xy reference buffer */ |
127 | 0 | UWORD8 *pu1_ref[4]; |
128 | | |
129 | | /* pred buffer ptr */ |
130 | 0 | UWORD8 *pu1_pred; |
131 | | |
132 | | /* strides of full pel, half pel x, half pel y, half pel xy reference buffer */ |
133 | 0 | WORD32 i4_ref_strd[4]; |
134 | | |
135 | | /* pred buffer stride */ |
136 | 0 | WORD32 i4_pred_strd = ps_proc->i4_pred_strd; |
137 | | |
138 | | /* full pel motion vectors */ |
139 | 0 | WORD32 u4_mv_x_full, u4_mv_y_full; |
140 | | |
141 | | /* half pel motion vectors */ |
142 | 0 | WORD32 u4_mv_x_hpel, u4_mv_y_hpel; |
143 | | |
144 | | /* quarter pel motion vectors */ |
145 | 0 | WORD32 u4_mv_x_qpel, u4_mv_y_qpel; |
146 | | |
147 | | /* width & height of the partition */ |
148 | 0 | UWORD32 wd, ht; |
149 | | |
150 | | /* partition idx */ |
151 | 0 | UWORD32 u4_num_prtn; |
152 | | |
153 | | /* half / qpel coefficient */ |
154 | 0 | UWORD32 u4_subpel_factor; |
155 | | |
156 | | /* BIPRED Flag */ |
157 | 0 | WORD32 i4_bipred_flag; |
158 | | |
159 | | /* temp var */ |
160 | 0 | UWORD32 u4_lkup_idx1; |
161 | | |
162 | | /* Init */ |
163 | 0 | i4_ref_strd[0] = ps_proc->i4_rec_strd; |
164 | |
|
165 | 0 | i4_ref_strd[1] = i4_ref_strd[2] = i4_ref_strd[3] = |
166 | 0 | ps_me_ctxt->u4_subpel_buf_strd; |
167 | |
|
168 | 0 | for (u4_num_prtn = 0; u4_num_prtn < ps_proc->u4_num_sub_partitions; |
169 | 0 | u4_num_prtn++) |
170 | 0 | { |
171 | 0 | mv_t *ps_curr_mv; |
172 | | |
173 | | /* update ptr to curr partition */ |
174 | 0 | ps_curr_pu = ps_proc->ps_pu + u4_num_prtn; |
175 | | |
176 | | /* Set no no bipred */ |
177 | 0 | i4_bipred_flag = 0; |
178 | |
|
179 | 0 | switch (ps_curr_pu->b2_pred_mode) |
180 | 0 | { |
181 | 0 | case PRED_L0: |
182 | 0 | ps_curr_mv = &ps_curr_pu->s_me_info[0].s_mv; |
183 | 0 | pu1_ref[0] = ps_proc->apu1_ref_buf_luma[0]; |
184 | 0 | break; |
185 | | |
186 | 0 | case PRED_L1: |
187 | 0 | ps_curr_mv = &ps_curr_pu->s_me_info[1].s_mv; |
188 | 0 | pu1_ref[0] = ps_proc->apu1_ref_buf_luma[1]; |
189 | 0 | break; |
190 | | |
191 | 0 | case PRED_BI: |
192 | | /* |
193 | | * In case of PRED_BI, we only need to ensure that |
194 | | * the reference buffer that gets selected is |
195 | | * ps_proc->pu1_best_subpel_buf |
196 | | */ |
197 | | |
198 | | /* Dummy */ |
199 | 0 | ps_curr_mv = &ps_curr_pu->s_me_info[0].s_mv; |
200 | 0 | pu1_ref[0] = ps_proc->apu1_ref_buf_luma[0]; |
201 | |
|
202 | 0 | i4_bipred_flag = 1; |
203 | 0 | break; |
204 | | |
205 | 0 | default: |
206 | 0 | ps_curr_mv = &ps_curr_pu->s_me_info[0].s_mv; |
207 | 0 | pu1_ref[0] = ps_proc->apu1_ref_buf_luma[0]; |
208 | 0 | break; |
209 | |
|
210 | 0 | } |
211 | | |
212 | | /* get full pel mv's (full pel units) */ |
213 | 0 | u4_mv_x_full = ps_curr_mv->i2_mvx >> 2; |
214 | 0 | u4_mv_y_full = ps_curr_mv->i2_mvy >> 2; |
215 | | |
216 | | /* get half pel mv's */ |
217 | 0 | u4_mv_x_hpel = (ps_curr_mv->i2_mvx & 0x2) >> 1; |
218 | 0 | u4_mv_y_hpel = (ps_curr_mv->i2_mvy & 0x2) >> 1; |
219 | | |
220 | | /* get quarter pel mv's */ |
221 | 0 | u4_mv_x_qpel = (ps_curr_mv->i2_mvx & 0x1); |
222 | 0 | u4_mv_y_qpel = (ps_curr_mv->i2_mvy & 0x1); |
223 | | |
224 | | /* width and height of partition */ |
225 | 0 | wd = (ps_curr_pu->b4_wd + 1) << 2; |
226 | 0 | ht = (ps_curr_pu->b4_ht + 1) << 2; |
227 | | |
228 | | /* decision ? qpel/hpel, fpel */ |
229 | 0 | u4_subpel_factor = (u4_mv_y_hpel << 3) + (u4_mv_x_hpel << 2) |
230 | 0 | + (u4_mv_y_qpel << 1) + (u4_mv_x_qpel); |
231 | | |
232 | | /* Move ref to position given by MV */ |
233 | 0 | pu1_ref[0] += ((u4_mv_y_full * i4_ref_strd[0]) + u4_mv_x_full); |
234 | | |
235 | | /* Sub pel ptrs/ Biperd pointers init */ |
236 | 0 | pu1_ref[1] = ps_proc->pu1_best_subpel_buf; |
237 | 0 | i4_ref_strd[1] = ps_proc->u4_bst_spel_buf_strd; |
238 | | |
239 | | /* update pred buff ptr */ |
240 | 0 | pu1_pred = ps_proc->pu1_pred_mb |
241 | 0 | + 4 * ps_curr_pu->b4_pos_y * i4_pred_strd |
242 | 0 | + 4 * ps_curr_pu->b4_pos_x; |
243 | | |
244 | | /* u4_lkup_idx1 will be non zero for half pel and bipred */ |
245 | 0 | u4_lkup_idx1 = ((u4_subpel_factor >> 2) != 0) || i4_bipred_flag; |
246 | |
|
247 | 0 | { |
248 | | /********************************************************************/ |
249 | | /* if the block is P16x16 MB and mv are not quarter pel motion */ |
250 | | /* vectors, there is no need to copy 16x16 unit from reference frame*/ |
251 | | /* to pred buffer. We might as well send the reference frame buffer */ |
252 | | /* pointer as pred buffer (ofc with updated stride) to fwd transform*/ |
253 | | /* and inverse transform unit. */ |
254 | | /********************************************************************/ |
255 | 0 | if (ps_proc->u4_num_sub_partitions == 1) |
256 | 0 | { |
257 | 0 | *pu1_pseudo_pred = pu1_ref[u4_lkup_idx1]; |
258 | 0 | *pi4_pseudo_pred_strd = i4_ref_strd[u4_lkup_idx1]; |
259 | |
|
260 | 0 | } |
261 | | /* |
262 | | * Copying half pel or full pel to prediction buffer |
263 | | * Currently ps_proc->u4_num_sub_partitions will always be 1 as we |
264 | | * only support 16x16 in P mbs |
265 | | */ |
266 | 0 | else |
267 | 0 | { |
268 | 0 | ps_codec->pf_inter_pred_luma_copy(pu1_ref[u4_lkup_idx1], |
269 | 0 | pu1_pred, |
270 | 0 | i4_ref_strd[u4_lkup_idx1], |
271 | 0 | i4_pred_strd, ht, wd, NULL, |
272 | 0 | 0); |
273 | 0 | } |
274 | 0 | } |
275 | 0 | } |
276 | 0 | } |
277 | | |
278 | | /** |
279 | | ****************************************************************************** |
280 | | * |
281 | | * @brief |
282 | | * performs motion compensation for chroma mb |
283 | | * |
284 | | * @par Description |
285 | | * Copies a MB of data from the reference buffer (Full pel, half pel or q pel) |
286 | | * according to the motion vectors given |
287 | | * |
288 | | * @param[in] ps_proc |
289 | | * pointer to current proc ctxt |
290 | | * |
291 | | * @return none |
292 | | * |
293 | | * @remarks Assumes half pel and quarter pel buffers for the entire frame are |
294 | | * populated. |
295 | | ****************************************************************************** |
296 | | */ |
297 | | void ih264e_motion_comp_chroma(process_ctxt_t *ps_proc) |
298 | 0 | { |
299 | | /* codec context */ |
300 | 0 | codec_t *ps_codec = ps_proc->ps_codec; |
301 | | |
302 | | /* Pointer to the structure having motion vectors, size and position of curr partitions */ |
303 | 0 | enc_pu_t *ps_curr_pu; |
304 | | |
305 | | /* pointers to full pel, half pel x, half pel y, half pel xy reference buffer */ |
306 | 0 | UWORD8 *pu1_ref; |
307 | | |
308 | | /* pred buffer ptr */ |
309 | 0 | UWORD8 *pu1_pred; |
310 | | |
311 | | /* strides of full pel reference buffer */ |
312 | 0 | WORD32 i4_ref_strd = ps_proc->i4_rec_strd; |
313 | | |
314 | | /* pred buffer stride */ |
315 | 0 | WORD32 i4_pred_strd = ps_proc->i4_pred_strd; |
316 | | |
317 | | /* full pel motion vectors */ |
318 | 0 | WORD32 u4_mv_x_full, u4_mv_y_full; |
319 | | |
320 | | /* half pel motion vectors */ |
321 | 0 | WORD32 u4_mv_x_hpel, u4_mv_y_hpel; |
322 | | |
323 | | /* quarter pel motion vectors */ |
324 | 0 | WORD32 u4_mv_x_qpel, u4_mv_y_qpel; |
325 | | |
326 | | /* width & height of the partition */ |
327 | 0 | UWORD32 wd, ht; |
328 | | |
329 | | /* partition idx */ |
330 | 0 | UWORD32 u4_num_prtn; |
331 | |
|
332 | 0 | WORD32 u4_mv_x; |
333 | 0 | WORD32 u4_mv_y; |
334 | 0 | UWORD8 u1_dx, u1_dy; |
335 | |
|
336 | 0 | for (u4_num_prtn = 0; u4_num_prtn < ps_proc->u4_num_sub_partitions; |
337 | 0 | u4_num_prtn++) |
338 | 0 | { |
339 | 0 | mv_t *ps_curr_mv; |
340 | |
|
341 | 0 | ps_curr_pu = ps_proc->ps_pu + u4_num_prtn; |
342 | |
|
343 | 0 | if (ps_curr_pu->b2_pred_mode != PRED_BI) |
344 | 0 | { |
345 | 0 | ps_curr_mv = &ps_curr_pu->s_me_info[ps_curr_pu->b2_pred_mode].s_mv; |
346 | 0 | pu1_ref = ps_proc->apu1_ref_buf_chroma[ps_curr_pu->b2_pred_mode]; |
347 | |
|
348 | 0 | u4_mv_x = ps_curr_mv->i2_mvx >> 3; |
349 | 0 | u4_mv_y = ps_curr_mv->i2_mvy >> 3; |
350 | | |
351 | | /* corresponds to full pel motion vector in luma, but in chroma corresponds to pel formed wiith dx, dy =4 */ |
352 | 0 | u4_mv_x_full = (ps_curr_mv->i2_mvx & 0x4) >> 2; |
353 | 0 | u4_mv_y_full = (ps_curr_mv->i2_mvy & 0x4) >> 2; |
354 | | |
355 | | /* get half pel mv's */ |
356 | 0 | u4_mv_x_hpel = (ps_curr_mv->i2_mvx & 0x2) >> 1; |
357 | 0 | u4_mv_y_hpel = (ps_curr_mv->i2_mvy & 0x2) >> 1; |
358 | | |
359 | | /* get quarter pel mv's */ |
360 | 0 | u4_mv_x_qpel = (ps_curr_mv->i2_mvx & 0x1); |
361 | 0 | u4_mv_y_qpel = (ps_curr_mv->i2_mvy & 0x1); |
362 | | |
363 | | /* width and height of sub macro block */ |
364 | 0 | wd = (ps_curr_pu->b4_wd + 1) << 1; |
365 | 0 | ht = (ps_curr_pu->b4_ht + 1) << 1; |
366 | | |
367 | | /* move the pointers so that they point to the motion compensated locations */ |
368 | 0 | pu1_ref += ((u4_mv_y * i4_ref_strd) + (u4_mv_x << 1)); |
369 | |
|
370 | 0 | pu1_pred = ps_proc->pu1_pred_mb |
371 | 0 | + 4 * ps_curr_pu->b4_pos_y * i4_pred_strd |
372 | 0 | + 2 * ps_curr_pu->b4_pos_x; |
373 | |
|
374 | 0 | u1_dx = (u4_mv_x_full << 2) + (u4_mv_x_hpel << 1) + (u4_mv_x_qpel); |
375 | 0 | u1_dy = (u4_mv_y_full << 2) + (u4_mv_y_hpel << 1) + (u4_mv_y_qpel); |
376 | | |
377 | | /* cases where u1_dx = 0 or u1_dy = 0 are dealt separately in neon with |
378 | | * separate functions for better performance |
379 | | * |
380 | | * ih264_inter_pred_chroma_dx_zero_a9q |
381 | | * and |
382 | | * ih264_inter_pred_chroma_dy_zero_a9q |
383 | | */ |
384 | |
|
385 | 0 | ps_codec->pf_inter_pred_chroma(pu1_ref, pu1_pred, i4_ref_strd, |
386 | 0 | i4_pred_strd, u1_dx, u1_dy, ht, wd); |
387 | 0 | } |
388 | 0 | else /* If the pred mode is PRED_BI */ |
389 | 0 | { |
390 | | /* |
391 | | * We need to interpolate the L0 and L1 ref pics with the chorma MV |
392 | | * then use them to average for bilinrar interpred |
393 | | */ |
394 | 0 | WORD32 i4_predmode; |
395 | 0 | UWORD8 *pu1_ref_buf[2]; |
396 | | |
397 | | /* Temporary buffers to store the interpolated value from L0 and L1 */ |
398 | 0 | pu1_ref_buf[PRED_L0] = ps_proc->apu1_subpel_buffs[0]; |
399 | 0 | pu1_ref_buf[PRED_L1] = ps_proc->apu1_subpel_buffs[1]; |
400 | | |
401 | |
|
402 | 0 | for (i4_predmode = 0; i4_predmode < PRED_BI; i4_predmode++) |
403 | 0 | { |
404 | 0 | ps_curr_mv = &ps_curr_pu->s_me_info[i4_predmode].s_mv; |
405 | 0 | pu1_ref = ps_proc->apu1_ref_buf_chroma[i4_predmode]; |
406 | |
|
407 | 0 | u4_mv_x = ps_curr_mv->i2_mvx >> 3; |
408 | 0 | u4_mv_y = ps_curr_mv->i2_mvy >> 3; |
409 | | |
410 | | /* |
411 | | * corresponds to full pel motion vector in luma, but in chroma |
412 | | * corresponds to pel formed wiith dx, dy =4 |
413 | | */ |
414 | 0 | u4_mv_x_full = (ps_curr_mv->i2_mvx & 0x4) >> 2; |
415 | 0 | u4_mv_y_full = (ps_curr_mv->i2_mvy & 0x4) >> 2; |
416 | | |
417 | | /* get half pel mv's */ |
418 | 0 | u4_mv_x_hpel = (ps_curr_mv->i2_mvx & 0x2) >> 1; |
419 | 0 | u4_mv_y_hpel = (ps_curr_mv->i2_mvy & 0x2) >> 1; |
420 | | |
421 | | /* get quarter pel mv's */ |
422 | 0 | u4_mv_x_qpel = (ps_curr_mv->i2_mvx & 0x1); |
423 | 0 | u4_mv_y_qpel = (ps_curr_mv->i2_mvy & 0x1); |
424 | | |
425 | | /* width and height of sub macro block */ |
426 | 0 | wd = (ps_curr_pu->b4_wd + 1) << 1; |
427 | 0 | ht = (ps_curr_pu->b4_ht + 1) << 1; |
428 | | |
429 | | /* move the pointers so that they point to the motion compensated locations */ |
430 | 0 | pu1_ref += ((u4_mv_y * i4_ref_strd) + (u4_mv_x << 1)); |
431 | |
|
432 | 0 | pu1_pred = ps_proc->pu1_pred_mb |
433 | 0 | + 4 * ps_curr_pu->b4_pos_y * i4_pred_strd |
434 | 0 | + 2 * ps_curr_pu->b4_pos_x; |
435 | |
|
436 | 0 | u1_dx = (u4_mv_x_full << 2) + (u4_mv_x_hpel << 1) |
437 | 0 | + (u4_mv_x_qpel); |
438 | 0 | u1_dy = (u4_mv_y_full << 2) + (u4_mv_y_hpel << 1) |
439 | 0 | + (u4_mv_y_qpel); |
440 | |
|
441 | 0 | ps_codec->pf_inter_pred_chroma(pu1_ref, |
442 | 0 | pu1_ref_buf[i4_predmode], |
443 | 0 | i4_ref_strd, MB_SIZE, u1_dx, |
444 | 0 | u1_dy, ht, wd); |
445 | 0 | } |
446 | |
|
447 | 0 | ps_codec->pf_inter_pred_luma_bilinear(pu1_ref_buf[PRED_L0], |
448 | 0 | pu1_ref_buf[PRED_L1], pu1_pred, |
449 | 0 | MB_SIZE, MB_SIZE, |
450 | 0 | i4_pred_strd, MB_SIZE >> 1, |
451 | 0 | MB_SIZE); |
452 | 0 | } |
453 | 0 | } |
454 | 0 | } |