/src/libavc/decoder/ih264d_inter_pred.c
Line | Count | Source |
1 | | /****************************************************************************** |
2 | | * |
3 | | * Copyright (C) 2015 The Android Open Source Project |
4 | | * |
5 | | * Licensed under the Apache License, Version 2.0 (the "License"); |
6 | | * you may not use this file except in compliance with the License. |
7 | | * You may obtain a copy of the License at: |
8 | | * |
9 | | * http://www.apache.org/licenses/LICENSE-2.0 |
10 | | * |
11 | | * Unless required by applicable law or agreed to in writing, software |
12 | | * distributed under the License is distributed on an "AS IS" BASIS, |
13 | | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
14 | | * See the License for the specific language governing permissions and |
15 | | * limitations under the License. |
16 | | * |
17 | | ***************************************************************************** |
18 | | * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore |
19 | | */ |
20 | | /*! |
21 | | ************************************************************************** |
22 | | * \file ih264d_inter_pred.c |
23 | | * |
24 | | * \brief |
25 | | * This file contains routines to perform MotionCompensation tasks |
26 | | * |
27 | | * Detailed_description |
28 | | * |
29 | | * \date |
30 | | * 20/11/2002 |
31 | | * |
32 | | * \author Arvind Raman |
33 | | ************************************************************************** |
34 | | */ |
35 | | |
36 | | #include <string.h> |
37 | | #include "ih264d_defs.h" |
38 | | #include "ih264d_mvpred.h" |
39 | | #include "ih264d_error_handler.h" |
40 | | #include "ih264d_structs.h" |
41 | | #include "ih264d_defs.h" |
42 | | #include "ih264d_inter_pred.h" |
43 | | #include "ih264_typedefs.h" |
44 | | #include "ih264_macros.h" |
45 | | #include "ih264_platform_macros.h" |
46 | | #include "ih264d_debug.h" |
47 | | #include "ih264d_tables.h" |
48 | | #include "ih264d_mb_utils.h" |
49 | | |
50 | | |
51 | | void ih264d_pad_on_demand(pred_info_t *ps_pred, UWORD8 lum_chrom_blk); |
52 | | |
53 | | |
54 | | |
55 | | void ih264d_copy_multiplex_data(UWORD8 *puc_Source, |
56 | | UWORD8 *puc_To, |
57 | | UWORD32 uc_w, |
58 | | UWORD32 uc_h, |
59 | | UWORD32 ui16_sourceWidth, |
60 | | UWORD32 ui16_toWidth) |
61 | 0 | { |
62 | 0 | UWORD8 uc_i, uc_j; |
63 | |
|
64 | 0 | for(uc_i = 0; uc_i < uc_h; uc_i++) |
65 | 0 | { |
66 | 0 | memcpy(puc_To, puc_Source, uc_w); |
67 | 0 | puc_To += ui16_toWidth; |
68 | 0 | puc_Source += ui16_sourceWidth; |
69 | 0 | } |
70 | 0 | } |
71 | | |
72 | | |
73 | | /*! |
74 | | ************************************************************************** |
75 | | * \if Function name : dma_2d1d \endif |
76 | | * |
77 | | * \brief |
78 | | * 2D -> 1D linear DMA into the reference buffers |
79 | | * |
80 | | * \return |
81 | | * None |
82 | | ************************************************************************** |
83 | | */ |
84 | | void ih264d_copy_2d1d(UWORD8 *puc_src, |
85 | | UWORD8 *puc_dest, |
86 | | UWORD16 ui16_srcWidth, |
87 | | UWORD16 ui16_widthToFill, |
88 | | UWORD16 ui16_heightToFill) |
89 | 12.9k | { |
90 | 12.9k | UWORD32 uc_w, uc_h; |
91 | 122k | for(uc_h = ui16_heightToFill; uc_h != 0; uc_h--) |
92 | 109k | { |
93 | 109k | memcpy(puc_dest, puc_src, ui16_widthToFill); |
94 | 109k | puc_dest += ui16_widthToFill; |
95 | 109k | puc_src += ui16_srcWidth; |
96 | 109k | } |
97 | 12.9k | } |
98 | | |
99 | | /*! |
100 | | ************************************************************************** |
101 | | * \if Function name : ih264d_fill_pred_info \endif |
102 | | * |
103 | | * \brief |
104 | | * Fills inter prediction related info |
105 | | * |
106 | | * \return |
107 | | * None |
108 | | ************************************************************************** |
109 | | */ |
110 | | void ih264d_fill_pred_info(WORD16 *pi2_mv,WORD32 part_width,WORD32 part_height, WORD32 sub_mb_num, |
111 | | WORD32 pred_dir,pred_info_pkd_t *ps_pred_pkd,WORD8 i1_buf_id, |
112 | | WORD8 i1_ref_idx,UWORD32 *pu4_wt_offset,UWORD8 u1_pic_type) |
113 | 44.3M | { |
114 | 44.3M | WORD32 insert_bits; |
115 | | |
116 | 44.3M | ps_pred_pkd->i2_mv[0] = pi2_mv[0]; |
117 | 44.3M | ps_pred_pkd->i2_mv[1] = pi2_mv[1]; |
118 | | |
119 | 44.3M | insert_bits = sub_mb_num & 3; /*sub mb x*/ |
120 | 44.3M | ps_pred_pkd->i1_size_pos_info = insert_bits; |
121 | 44.3M | insert_bits = sub_mb_num >> 2;/*sub mb y*/ |
122 | 44.3M | ps_pred_pkd->i1_size_pos_info |= insert_bits << 2; |
123 | 44.3M | insert_bits = part_width >> 1; |
124 | 44.3M | ps_pred_pkd->i1_size_pos_info |= insert_bits << 4; |
125 | 44.3M | insert_bits = part_height >> 1; |
126 | 44.3M | ps_pred_pkd->i1_size_pos_info |= insert_bits << 6; |
127 | | |
128 | 44.3M | ps_pred_pkd->i1_ref_idx_info = i1_ref_idx; |
129 | 44.3M | ps_pred_pkd->i1_ref_idx_info |= (pred_dir << 6); |
130 | 44.3M | ps_pred_pkd->i1_buf_id = i1_buf_id; |
131 | 44.3M | ps_pred_pkd->pu4_wt_offst = pu4_wt_offset; |
132 | 44.3M | ps_pred_pkd->u1_pic_type = u1_pic_type; |
133 | | |
134 | | |
135 | 44.3M | } |
136 | | |
137 | | |
138 | | |
139 | | |
140 | | |
141 | | |
142 | | |
143 | | /*****************************************************************************/ |
144 | | /* \if Function name : formMbPartInfo \endif */ |
145 | | /* */ |
146 | | /* \brief */ |
147 | | /* Form the Mb partition information structure, to be used by the MC */ |
148 | | /* routine */ |
149 | | /* */ |
150 | | /* \return */ |
151 | | /* None */ |
152 | | /* \note */ |
153 | | /* c_bufx is used to select PredBuffer, */ |
154 | | /* if it's only Forward/Backward prediction always buffer used is */ |
155 | | /* puc_MbLumaPredBuffer[0 to X1],pu1_mb_cb_pred_buffer[0 to X1] and */ |
156 | | /* pu1_mb_cr_pred_buffer[0 to X1] */ |
157 | | /* */ |
158 | | /* if it's bidirect for forward ..PredBuffer[0 to X1] buffer is used and */ |
159 | | /* ..PredBuffer[X2 to X3] for backward prediction. and */ |
160 | | /* */ |
161 | | /* Final predicted samples values are the average of ..PredBuffer[0 to X1]*/ |
162 | | /* and ..PredBuffer[X2 to X3] */ |
163 | | /* */ |
164 | | /* X1 is 255 for Luma and 63 for Chroma */ |
165 | | /* X2 is 256 for Luma and 64 for Chroma */ |
166 | | /* X3 is 511 for Luma and 127 for Chroma */ |
167 | | /* */ |
168 | | /* DD MM YYYY Author(s) Changes (Describe the changes made) */ |
169 | | /* 11 05 2005 SWRN Modified to handle pod */ |
170 | | /*****************************************************************************/ |
171 | | |
172 | | WORD32 ih264d_form_mb_part_info_bp(pred_info_pkd_t *ps_pred_pkd, |
173 | | dec_struct_t * ps_dec, |
174 | | UWORD16 u2_mb_x, |
175 | | UWORD16 u2_mb_y, |
176 | | WORD32 mb_index, |
177 | | dec_mb_info_t *ps_cur_mb_info) |
178 | 11.0M | { |
179 | | /* The reference buffer pointer */ |
180 | 11.0M | WORD32 i2_frm_x, i2_frm_y; |
181 | 11.0M | WORD32 i2_tmp_mv_x, i2_tmp_mv_y; |
182 | 11.0M | WORD32 i2_rec_x, i2_rec_y; |
183 | | |
184 | 11.0M | WORD32 u2_pic_ht; |
185 | 11.0M | WORD32 u2_frm_wd; |
186 | 11.0M | WORD32 u2_rec_wd; |
187 | 11.0M | UWORD8 u1_sub_x = 0,u1_sub_y=0 ; |
188 | 11.0M | UWORD8 u1_part_wd = 0,u1_part_ht = 0; |
189 | 11.0M | WORD16 i2_mv_x,i2_mv_y; |
190 | | |
191 | | /********************************************/ |
192 | | /* i1_mc_wd width reqd for mcomp */ |
193 | | /* u1_dma_ht height reqd for mcomp */ |
194 | | /* u1_dma_wd width aligned to 4 bytes */ |
195 | | /* u1_dx fractional part of width */ |
196 | | /* u1_dx fractional part of height */ |
197 | | /********************************************/ |
198 | 11.0M | UWORD32 i1_mc_wd; |
199 | | |
200 | 11.0M | WORD32 u1_dma_ht; |
201 | | |
202 | 11.0M | UWORD32 u1_dma_wd; |
203 | 11.0M | UWORD32 u1_dx; |
204 | 11.0M | UWORD32 u1_dy; |
205 | 11.0M | pred_info_t * ps_pred = ps_dec->ps_pred + ps_dec->u4_pred_info_idx; |
206 | 11.0M | dec_slice_params_t * const ps_cur_slice = ps_dec->ps_cur_slice; |
207 | 11.0M | tfr_ctxt_t *ps_frame_buf; |
208 | 11.0M | struct pic_buffer_t *ps_ref_frm; |
209 | 11.0M | UWORD8 u1_scale_ref,u1_mbaff,u1_field; |
210 | 11.0M | pic_buffer_t **pps_ref_frame; |
211 | 11.0M | WORD8 i1_size_pos_info,i1_buf_id; |
212 | | |
213 | 11.0M | PROFILE_DISABLE_MB_PART_INFO() |
214 | | |
215 | 11.0M | UNUSED(ps_cur_mb_info); |
216 | 11.0M | i1_size_pos_info = ps_pred_pkd->i1_size_pos_info; |
217 | 11.0M | GET_XPOS_PRED(u1_sub_x,i1_size_pos_info); |
218 | 11.0M | GET_YPOS_PRED(u1_sub_y,i1_size_pos_info); |
219 | 11.0M | GET_WIDTH_PRED(u1_part_wd,i1_size_pos_info); |
220 | 11.0M | GET_HEIGHT_PRED(u1_part_ht,i1_size_pos_info); |
221 | 11.0M | i2_mv_x = ps_pred_pkd->i2_mv[0]; |
222 | 11.0M | i2_mv_y = ps_pred_pkd->i2_mv[1]; |
223 | 11.0M | i1_buf_id = ps_pred_pkd->i1_buf_id; |
224 | | |
225 | | |
226 | 11.0M | ps_ref_frm = ps_dec->apv_buf_id_pic_buf_map[i1_buf_id]; |
227 | | |
228 | | |
229 | 11.0M | { |
230 | 11.0M | ps_frame_buf = &ps_dec->s_tran_addrecon; |
231 | 11.0M | } |
232 | | |
233 | | |
234 | | /* Transfer Setup Y */ |
235 | 11.0M | { |
236 | 11.0M | UWORD8 *pu1_pred, *pu1_rec; |
237 | | |
238 | | /* calculating rounded motion vectors and fractional components */ |
239 | 11.0M | i2_tmp_mv_x = i2_mv_x; |
240 | 11.0M | i2_tmp_mv_y = i2_mv_y; |
241 | 11.0M | u1_dx = i2_tmp_mv_x & 0x3; |
242 | 11.0M | u1_dy = i2_tmp_mv_y & 0x3; |
243 | 11.0M | i2_tmp_mv_x >>= 2; |
244 | 11.0M | i2_tmp_mv_y >>= 2; |
245 | 11.0M | i1_mc_wd = u1_part_wd << 2; |
246 | 11.0M | u1_dma_ht = u1_part_ht << 2; |
247 | 11.0M | if(u1_dx) |
248 | 192k | { |
249 | 192k | i2_tmp_mv_x -= 2; |
250 | 192k | i1_mc_wd += 5; |
251 | 192k | } |
252 | 11.0M | if(u1_dy) |
253 | 183k | { |
254 | 183k | i2_tmp_mv_y -= 2; |
255 | 183k | u1_dma_ht += 5; |
256 | 183k | } |
257 | | |
258 | | /********************************************************************/ |
259 | | /* Calulating the horizontal and the vertical u4_ofst from top left */ |
260 | | /* edge of the reference frame, and subsequent clipping */ |
261 | | /********************************************************************/ |
262 | 11.0M | u2_pic_ht = ps_dec->u2_pic_ht; |
263 | 11.0M | u2_frm_wd = ps_dec->u2_frm_wd_y; |
264 | 11.0M | i2_rec_x = u1_sub_x << 2; |
265 | 11.0M | i2_rec_y = u1_sub_y << 2; |
266 | | |
267 | 11.0M | i2_frm_x = (u2_mb_x << 4) + i2_rec_x + i2_tmp_mv_x; |
268 | 11.0M | i2_frm_y = (u2_mb_y << 4) + i2_rec_y + i2_tmp_mv_y; |
269 | | |
270 | 11.0M | i2_frm_x = CLIP3(MAX_OFFSET_OUTSIDE_X_FRM, (ps_dec->u2_pic_wd - 1), |
271 | 11.0M | i2_frm_x); |
272 | 11.0M | i2_frm_y = CLIP3(((1 - u1_dma_ht)), (u2_pic_ht - (1)), i2_frm_y); |
273 | | |
274 | 11.0M | pu1_pred = ps_ref_frm->pu1_buf1 + i2_frm_y * u2_frm_wd + i2_frm_x; |
275 | | |
276 | 11.0M | u1_dma_wd = (i1_mc_wd + 3) & 0xFC; |
277 | | |
278 | | /********************************************************************/ |
279 | | /* Calulating the horizontal and the vertical u4_ofst from top left */ |
280 | | /* edge of the recon buffer */ |
281 | | /********************************************************************/ |
282 | 11.0M | u2_rec_wd = MB_SIZE; |
283 | 11.0M | { |
284 | 11.0M | u2_rec_wd = ps_dec->u2_frm_wd_y; |
285 | 11.0M | i2_rec_x += (mb_index << 4); |
286 | 11.0M | pu1_rec = ps_frame_buf->pu1_dest_y + i2_rec_y * u2_rec_wd |
287 | 11.0M | + i2_rec_x; |
288 | 11.0M | } |
289 | | |
290 | | /* filling the pred and dma structures for Y */ |
291 | 11.0M | u2_frm_wd = ps_dec->u2_frm_wd_y; |
292 | | |
293 | 11.0M | ps_pred->u2_u1_ref_buf_wd = u1_dma_wd; |
294 | 11.0M | ps_pred->i1_dma_ht = u1_dma_ht; |
295 | 11.0M | ps_pred->i1_mc_wd = i1_mc_wd; |
296 | 11.0M | ps_pred->u2_frm_wd = u2_frm_wd; |
297 | 11.0M | ps_pred->pu1_rec_y_u = pu1_rec; |
298 | 11.0M | ps_pred->u2_dst_stride = u2_rec_wd; |
299 | | |
300 | 11.0M | ps_pred->i1_mb_partwidth = u1_part_wd << 2; |
301 | 11.0M | ps_pred->i1_mb_partheight = u1_part_ht << 2; |
302 | 11.0M | ps_pred->u1_dydx = (u1_dy << 2) + u1_dx; |
303 | | |
304 | 11.0M | ps_pred->pu1_y_ref = pu1_pred; |
305 | | |
306 | 11.0M | } |
307 | | |
308 | | /* Increment ps_pred index */ |
309 | 11.0M | ps_pred++; |
310 | | |
311 | | /* Transfer Setup U & V */ |
312 | 11.0M | { |
313 | 11.0M | WORD32 i4_ref_offset, i4_rec_offset; |
314 | 11.0M | UWORD8 *pu1_pred_u, *pu1_pred_v; |
315 | | |
316 | | |
317 | | /* calculating rounded motion vectors and fractional components */ |
318 | 11.0M | i2_tmp_mv_x = i2_mv_x; |
319 | 11.0M | i2_tmp_mv_y = i2_mv_y; |
320 | | |
321 | | /************************************************************************/ |
322 | | /* Table 8-9: Derivation of the vertical component of the chroma vector */ |
323 | | /* in field coding mode */ |
324 | | /************************************************************************/ |
325 | | |
326 | | /* Eighth sample of the chroma MV */ |
327 | 11.0M | u1_dx = i2_tmp_mv_x & 0x7; |
328 | 11.0M | u1_dy = i2_tmp_mv_y & 0x7; |
329 | | |
330 | | /********************************************************************/ |
331 | | /* Calculating the full pel MV for chroma which is 1/2 of the Luma */ |
332 | | /* MV in full pel units */ |
333 | | /********************************************************************/ |
334 | 11.0M | i2_mv_x = i2_tmp_mv_x; |
335 | 11.0M | i2_mv_y = i2_tmp_mv_y; |
336 | 11.0M | i2_tmp_mv_x = SIGN_POW2_DIV(i2_tmp_mv_x, 3); |
337 | 11.0M | i2_tmp_mv_y = SIGN_POW2_DIV(i2_tmp_mv_y, 3); |
338 | 11.0M | i1_mc_wd = u1_part_wd << 1; |
339 | 11.0M | u1_dma_ht = u1_part_ht << 1; |
340 | 11.0M | if(u1_dx) |
341 | 209k | { |
342 | 209k | i2_tmp_mv_x -= (i2_mv_x < 0); |
343 | 209k | i1_mc_wd++; |
344 | 209k | } |
345 | 11.0M | if(u1_dy != 0) |
346 | 201k | { |
347 | 201k | i2_tmp_mv_y -= (i2_mv_y < 0); |
348 | 201k | u1_dma_ht++; |
349 | 201k | } |
350 | | |
351 | | /********************************************************************/ |
352 | | /* Calulating the horizontal and the vertical u4_ofst from top left */ |
353 | | /* edge of the reference frame, and subsequent clipping */ |
354 | | /********************************************************************/ |
355 | 11.0M | u2_pic_ht >>= 1; |
356 | 11.0M | u2_frm_wd = ps_dec->u2_frm_wd_uv; |
357 | 11.0M | i2_rec_x = u1_sub_x << 1; |
358 | 11.0M | i2_rec_y = u1_sub_y << 1; |
359 | | |
360 | 11.0M | i2_frm_x = (u2_mb_x << 3) + i2_rec_x + i2_tmp_mv_x; |
361 | 11.0M | i2_frm_y = (u2_mb_y << 3) + i2_rec_y + i2_tmp_mv_y; |
362 | | |
363 | 11.0M | i2_frm_x = CLIP3(MAX_OFFSET_OUTSIDE_UV_FRM, |
364 | 11.0M | ((ps_dec->u2_pic_wd >> 1) - 1), i2_frm_x); |
365 | 11.0M | i2_frm_y = CLIP3(((1 - u1_dma_ht)), (u2_pic_ht - (1)), i2_frm_y); |
366 | | |
367 | 11.0M | i4_ref_offset = i2_frm_y * u2_frm_wd + i2_frm_x * YUV420SP_FACTOR; |
368 | 11.0M | u1_dma_wd = (i1_mc_wd + 3) & 0xFC; |
369 | | |
370 | | /********************************************************************/ |
371 | | /* Calulating the horizontal and the vertical u4_ofst from top left */ |
372 | | /* edge of the recon buffer */ |
373 | | /********************************************************************/ |
374 | | /* CHANGED CODE */ |
375 | 11.0M | u2_rec_wd = BLK8x8SIZE * YUV420SP_FACTOR; |
376 | 11.0M | i4_rec_offset = i2_rec_y * u2_rec_wd + i2_rec_x * YUV420SP_FACTOR; |
377 | | |
378 | 11.0M | { |
379 | 11.0M | u2_rec_wd = ps_dec->u2_frm_wd_uv; |
380 | 11.0M | i2_rec_x += (mb_index << 3); |
381 | 11.0M | i4_rec_offset = i2_rec_y * u2_rec_wd + i2_rec_x * YUV420SP_FACTOR; |
382 | 11.0M | ps_pred->pu1_rec_y_u = ps_frame_buf->pu1_dest_u + i4_rec_offset; |
383 | 11.0M | ps_pred->u1_pi1_wt_ofst_rec_v = ps_frame_buf->pu1_dest_v |
384 | 11.0M | + i4_rec_offset; |
385 | 11.0M | } |
386 | | |
387 | | /* CHANGED CODE */ |
388 | | |
389 | | /* filling the common pred structures for U */ |
390 | 11.0M | u2_frm_wd = ps_dec->u2_frm_wd_uv; |
391 | | |
392 | 11.0M | ps_pred->u2_u1_ref_buf_wd = u1_dma_wd; |
393 | 11.0M | ps_pred->i1_dma_ht = u1_dma_ht; |
394 | 11.0M | ps_pred->i1_mc_wd = i1_mc_wd; |
395 | | |
396 | 11.0M | ps_pred->u2_frm_wd = u2_frm_wd; |
397 | 11.0M | ps_pred->u2_dst_stride = u2_rec_wd; |
398 | | |
399 | 11.0M | ps_pred->i1_mb_partwidth = u1_part_wd << 1; |
400 | 11.0M | ps_pred->i1_mb_partheight = u1_part_ht << 1; |
401 | 11.0M | ps_pred->u1_dydx = (u1_dy << 3) + u1_dx; |
402 | | |
403 | 11.0M | pu1_pred_u = ps_ref_frm->pu1_buf2 + i4_ref_offset; |
404 | 11.0M | pu1_pred_v = ps_ref_frm->pu1_buf3 + i4_ref_offset; |
405 | | |
406 | | /* Copy U & V partitions */ |
407 | 11.0M | ps_pred->pu1_u_ref = pu1_pred_u; |
408 | | |
409 | | /* Increment the reference buffer Index */ |
410 | 11.0M | ps_pred->pu1_v_ref = pu1_pred_v; |
411 | 11.0M | } |
412 | | |
413 | | /* Increment ps_pred index */ |
414 | 11.0M | ps_dec->u4_pred_info_idx += 2; |
415 | | |
416 | 11.0M | return OK; |
417 | | |
418 | 11.0M | } |
419 | | |
420 | | |
421 | | /*****************************************************************************/ |
422 | | /* \if Function name : formMbPartInfo \endif */ |
423 | | /* */ |
424 | | /* \brief */ |
425 | | /* Form the Mb partition information structure, to be used by the MC */ |
426 | | /* routine */ |
427 | | /* */ |
428 | | /* \return */ |
429 | | /* None */ |
430 | | /* \note */ |
431 | | /* c_bufx is used to select PredBuffer, */ |
432 | | /* if it's only Forward/Backward prediction always buffer used is */ |
433 | | /* puc_MbLumaPredBuffer[0 to X1],pu1_mb_cb_pred_buffer[0 to X1] and */ |
434 | | /* pu1_mb_cr_pred_buffer[0 to X1] */ |
435 | | /* */ |
436 | | /* if it's bidirect for forward ..PredBuffer[0 to X1] buffer is used and */ |
437 | | /* ..PredBuffer[X2 to X3] for backward prediction. and */ |
438 | | /* */ |
439 | | /* Final predicted samples values are the average of ..PredBuffer[0 to X1]*/ |
440 | | /* and ..PredBuffer[X2 to X3] */ |
441 | | /* */ |
442 | | /* X1 is 255 for Luma and 63 for Chroma */ |
443 | | /* X2 is 256 for Luma and 64 for Chroma */ |
444 | | /* X3 is 511 for Luma and 127 for Chroma */ |
445 | | /* */ |
446 | | /* DD MM YYYY Author(s) Changes (Describe the changes made) */ |
447 | | /* 11 05 2005 SWRN Modified to handle pod */ |
448 | | /*****************************************************************************/ |
449 | | WORD32 ih264d_form_mb_part_info_mp(pred_info_pkd_t *ps_pred_pkd, |
450 | | dec_struct_t * ps_dec, |
451 | | UWORD16 u2_mb_x, |
452 | | UWORD16 u2_mb_y, |
453 | | WORD32 mb_index, |
454 | | dec_mb_info_t *ps_cur_mb_info) |
455 | 31.9M | { |
456 | | /* The reference buffer pointer */ |
457 | 31.9M | UWORD8 *pu1_ref_buf; |
458 | 31.9M | WORD16 i2_frm_x, i2_frm_y, i2_tmp_mv_x, i2_tmp_mv_y, i2_pod_ht; |
459 | 31.9M | WORD16 i2_rec_x, i2_rec_y; |
460 | 31.9M | UWORD16 u2_pic_ht, u2_frm_wd, u2_rec_wd; |
461 | 31.9M | UWORD8 u1_wght_pred_type, u1_wted_bipred_idc; |
462 | 31.9M | UWORD16 u2_tot_ref_scratch_size; |
463 | 31.9M | UWORD8 u1_sub_x = 0; |
464 | 31.9M | UWORD8 u1_sub_y = 0; |
465 | 31.9M | UWORD8 u1_is_bi_dir = 0; |
466 | | |
467 | | /********************************************/ |
468 | | /* i1_mc_wd width reqd for mcomp */ |
469 | | /* u1_dma_ht height reqd for mcomp */ |
470 | | /* u1_dma_wd width aligned to 4 bytes */ |
471 | | /* u1_dx fractional part of width */ |
472 | | /* u1_dx fractional part of height */ |
473 | | /********************************************/ |
474 | 31.9M | UWORD8 i1_mc_wd, u1_dma_ht, u1_dma_wd, u1_dx, u1_dy; |
475 | 31.9M | pred_info_t * ps_pred ; |
476 | 31.9M | dec_slice_params_t * const ps_cur_slice = ps_dec->ps_cur_slice; |
477 | 31.9M | const UWORD8 u1_slice_type = ps_dec->ps_decode_cur_slice->slice_type; |
478 | 31.9M | UWORD8 u1_pod_bot, u1_pod_top; |
479 | | |
480 | | /* load the pictype for pod u4_flag & chroma motion vector derivation */ |
481 | 31.9M | UWORD8 u1_ref_pic_type ; |
482 | | |
483 | | /* set default value to flags specifying field nature of picture & mb */ |
484 | 31.9M | UWORD32 u1_mb_fld = 0, u1_mb_or_pic_fld; |
485 | 31.9M | UWORD32 u1_mb_bot = 0, u1_pic_bot = 0, u1_mb_or_pic_bot; |
486 | 31.9M | tfr_ctxt_t *ps_frame_buf; |
487 | | /* calculate flags specifying field nature of picture & mb */ |
488 | 31.9M | const UWORD32 u1_pic_fld = ps_cur_slice->u1_field_pic_flag; |
489 | 31.9M | WORD8 i1_pred; |
490 | 31.9M | WORD8 i1_size_pos_info,i1_buf_id,i1_ref_idx; |
491 | 31.9M | UWORD8 u1_part_wd,u1_part_ht; |
492 | 31.9M | WORD16 i2_mv_x,i2_mv_y; |
493 | 31.9M | struct pic_buffer_t *ps_ref_frm; |
494 | 31.9M | UWORD32 *pu4_wt_offset; |
495 | 31.9M | UWORD8 *pu1_buf1,*pu1_buf2,*pu1_buf3; |
496 | | |
497 | | |
498 | 31.9M | PROFILE_DISABLE_MB_PART_INFO() |
499 | | |
500 | 31.9M | ps_pred = ps_dec->ps_pred + ps_dec->u4_pred_info_idx; |
501 | | |
502 | | |
503 | 31.9M | i1_size_pos_info = ps_pred_pkd->i1_size_pos_info; |
504 | 31.9M | GET_XPOS_PRED(u1_sub_x,i1_size_pos_info); |
505 | 31.9M | GET_YPOS_PRED(u1_sub_y,i1_size_pos_info); |
506 | 31.9M | GET_WIDTH_PRED(u1_part_wd,i1_size_pos_info); |
507 | 31.9M | GET_HEIGHT_PRED(u1_part_ht,i1_size_pos_info); |
508 | 31.9M | i2_mv_x = ps_pred_pkd->i2_mv[0]; |
509 | 31.9M | i2_mv_y = ps_pred_pkd->i2_mv[1]; |
510 | 31.9M | i1_ref_idx = ps_pred_pkd->i1_ref_idx_info & 0x3f; |
511 | 31.9M | i1_buf_id = ps_pred_pkd->i1_buf_id; |
512 | 31.9M | ps_ref_frm = ps_dec->apv_buf_id_pic_buf_map[i1_buf_id]; |
513 | | |
514 | 31.9M | i1_pred = (ps_pred_pkd->i1_ref_idx_info & 0xC0) >> 6; |
515 | 31.9M | u1_is_bi_dir = (i1_pred == BI_PRED); |
516 | | |
517 | | |
518 | 31.9M | u1_ref_pic_type = ps_pred_pkd->u1_pic_type & PIC_MASK; |
519 | | |
520 | 31.9M | pu1_buf1 = ps_ref_frm->pu1_buf1; |
521 | 31.9M | pu1_buf2 = ps_ref_frm->pu1_buf2; |
522 | 31.9M | pu1_buf3 = ps_ref_frm->pu1_buf3; |
523 | | |
524 | 31.9M | if(u1_ref_pic_type == BOT_FLD) |
525 | 272k | { |
526 | 272k | pu1_buf1 += ps_ref_frm->u2_frm_wd_y; |
527 | 272k | pu1_buf2 += ps_ref_frm->u2_frm_wd_uv; |
528 | 272k | pu1_buf3 += ps_ref_frm->u2_frm_wd_uv; |
529 | | |
530 | 272k | } |
531 | | |
532 | | |
533 | | |
534 | 31.9M | if(ps_dec->ps_cur_pps->u1_wted_pred_flag) |
535 | 19.0M | { |
536 | 19.0M | pu4_wt_offset = (UWORD32*)&ps_dec->pu4_wt_ofsts[2 |
537 | 19.0M | * X3(i1_ref_idx)]; |
538 | 19.0M | } |
539 | | |
540 | | |
541 | 31.9M | pu4_wt_offset = ps_pred_pkd->pu4_wt_offst; |
542 | | |
543 | | |
544 | | /* Pointer to the frame buffer */ |
545 | 31.9M | { |
546 | 31.9M | ps_frame_buf = &ps_dec->s_tran_addrecon; |
547 | | /* CHANGED CODE */ |
548 | 31.9M | } |
549 | | |
550 | 31.9M | if(!u1_pic_fld) |
551 | 30.4M | { |
552 | 30.4M | u1_mb_fld = ps_cur_mb_info->u1_mb_field_decodingflag; |
553 | 30.4M | u1_mb_bot = 1 - ps_cur_mb_info->u1_topmb; |
554 | 30.4M | } |
555 | 1.52M | else |
556 | 1.52M | u1_pic_bot = ps_cur_slice->u1_bottom_field_flag; |
557 | | |
558 | | /****************************************************************/ |
559 | | /* calculating the flags the tell whether to use frame-padding */ |
560 | | /* or use software pad-on-demand */ |
561 | | /****************************************************************/ |
562 | 31.9M | u1_mb_or_pic_bot = u1_mb_bot | u1_pic_bot; |
563 | 31.9M | u1_mb_or_pic_fld = u1_mb_fld | u1_pic_fld; |
564 | 31.9M | u1_pod_bot = u1_mb_or_pic_fld && (u1_ref_pic_type == TOP_FLD); |
565 | 31.9M | u1_pod_top = u1_mb_or_pic_fld && (u1_ref_pic_type == BOT_FLD); |
566 | | |
567 | | /* Weighted Pred additions */ |
568 | 31.9M | u1_wted_bipred_idc = ps_dec->ps_cur_pps->u1_wted_bipred_idc; |
569 | | |
570 | 31.9M | if((u1_slice_type == P_SLICE) || (u1_slice_type == SP_SLICE)) |
571 | 15.9M | { |
572 | | /* P Slice only */ |
573 | 15.9M | u1_wght_pred_type = ps_dec->ps_cur_pps->u1_wted_pred_flag; |
574 | | |
575 | 15.9M | } |
576 | 15.9M | else |
577 | 15.9M | { |
578 | | /* B Slice only */ |
579 | 15.9M | u1_wght_pred_type = 1 + u1_is_bi_dir; |
580 | 15.9M | if(u1_wted_bipred_idc == 0) |
581 | 3.08M | u1_wght_pred_type = 0; |
582 | 15.9M | if((u1_wted_bipred_idc == 2) && (!u1_is_bi_dir)) |
583 | 3.03M | u1_wght_pred_type = 0; |
584 | 15.9M | } |
585 | | /* load the scratch reference buffer index */ |
586 | 31.9M | pu1_ref_buf = ps_dec->pu1_ref_buff + ps_dec->u4_dma_buf_idx; |
587 | 31.9M | u2_tot_ref_scratch_size = 0; |
588 | | |
589 | | |
590 | | /* Transfer Setup Y */ |
591 | 31.9M | { |
592 | 31.9M | UWORD8 *pu1_pred, *pu1_rec; |
593 | | /* calculating rounded motion vectors and fractional components */ |
594 | 31.9M | i2_tmp_mv_x = i2_mv_x; |
595 | 31.9M | i2_tmp_mv_y = i2_mv_y; |
596 | | |
597 | 31.9M | u1_dx = i2_tmp_mv_x & 0x3; |
598 | 31.9M | u1_dy = i2_tmp_mv_y & 0x3; |
599 | 31.9M | i2_tmp_mv_x >>= 2; |
600 | 31.9M | i2_tmp_mv_y >>= 2; |
601 | 31.9M | i1_mc_wd = u1_part_wd << 2; |
602 | 31.9M | u1_dma_ht = u1_part_ht << 2; |
603 | 31.9M | if(u1_dx) |
604 | 1.17M | { |
605 | 1.17M | i2_tmp_mv_x -= 2; |
606 | 1.17M | i1_mc_wd += 5; |
607 | 1.17M | } |
608 | 31.9M | if(u1_dy) |
609 | 848k | { |
610 | 848k | i2_tmp_mv_y -= 2; |
611 | 848k | u1_dma_ht += 5; |
612 | 848k | } |
613 | | |
614 | | /********************************************************************/ |
615 | | /* Calulating the horizontal and the vertical u4_ofst from top left */ |
616 | | /* edge of the reference frame, and subsequent clipping */ |
617 | | /********************************************************************/ |
618 | 31.9M | u2_pic_ht = ps_dec->u2_pic_ht >> u1_pic_fld; |
619 | 31.9M | u2_frm_wd = ps_dec->u2_frm_wd_y << u1_pic_fld; |
620 | 31.9M | i2_frm_x = (u2_mb_x << 4) + (u1_sub_x << 2) + i2_tmp_mv_x; |
621 | 31.9M | i2_frm_y = ((u2_mb_y + (u1_mb_bot && !u1_mb_fld)) << 4) |
622 | 31.9M | + (((u1_sub_y << 2) + i2_tmp_mv_y) << u1_mb_fld); |
623 | | |
624 | 31.9M | i2_frm_x = CLIP3(MAX_OFFSET_OUTSIDE_X_FRM, (ps_dec->u2_pic_wd - 1), |
625 | 31.9M | i2_frm_x); |
626 | 31.9M | i2_frm_y = CLIP3(((1 - u1_dma_ht) << u1_mb_fld), |
627 | 31.9M | (u2_pic_ht - (1 << u1_mb_fld)), i2_frm_y); |
628 | | |
629 | 31.9M | pu1_pred = pu1_buf1 + i2_frm_y * u2_frm_wd + i2_frm_x; |
630 | 31.9M | u1_dma_wd = (i1_mc_wd + 3) & 0xFC; |
631 | | /********************************************************************/ |
632 | | /* Calulating the horizontal and the vertical u4_ofst from top left */ |
633 | | /* edge of the recon buffer */ |
634 | | /********************************************************************/ |
635 | | /* CHANGED CODE */ |
636 | 31.9M | u2_rec_wd = MB_SIZE; |
637 | 31.9M | i2_rec_x = u1_sub_x << 2; |
638 | 31.9M | i2_rec_y = u1_sub_y << 2; |
639 | 31.9M | { |
640 | 31.9M | u2_rec_wd = ps_dec->u2_frm_wd_y << u1_mb_or_pic_fld; |
641 | 31.9M | i2_rec_x += (mb_index << 4); |
642 | 31.9M | pu1_rec = ps_frame_buf->pu1_dest_y + i2_rec_y * u2_rec_wd |
643 | 31.9M | + i2_rec_x; |
644 | 31.9M | if(u1_mb_bot) |
645 | 801k | pu1_rec += ps_dec->u2_frm_wd_y << ((u1_mb_fld) ? 0 : 4); |
646 | 31.9M | } |
647 | | |
648 | | /* CHANGED CODE */ |
649 | | |
650 | | /* filling the pred and dma structures for Y */ |
651 | 31.9M | u2_frm_wd = ps_dec->u2_frm_wd_y << u1_mb_or_pic_fld; |
652 | | |
653 | 31.9M | ps_pred->pu1_dma_dest_addr = pu1_ref_buf; |
654 | 31.9M | ps_pred->u2_u1_ref_buf_wd = u1_dma_wd; |
655 | 31.9M | ps_pred->u2_frm_wd = u2_frm_wd; |
656 | 31.9M | ps_pred->i1_dma_ht = u1_dma_ht; |
657 | 31.9M | ps_pred->i1_mc_wd = i1_mc_wd; |
658 | 31.9M | ps_pred->pu1_rec_y_u = pu1_rec; |
659 | 31.9M | ps_pred->u2_dst_stride = u2_rec_wd; |
660 | | |
661 | 31.9M | ps_pred->i1_mb_partwidth = u1_part_wd << 2; |
662 | 31.9M | ps_pred->i1_mb_partheight = u1_part_ht << 2; |
663 | 31.9M | ps_pred->u1_dydx = (u1_dy << 2) + u1_dx; |
664 | 31.9M | ps_pred->u1_is_bi_direct = u1_is_bi_dir; |
665 | 31.9M | ps_pred->u1_pi1_wt_ofst_rec_v = (UWORD8 *)pu4_wt_offset; |
666 | 31.9M | ps_pred->u1_wght_pred_type = u1_wght_pred_type; |
667 | 31.9M | ps_pred->i1_pod_ht = 0; |
668 | | |
669 | | /* Increment the Reference buffer Indices */ |
670 | 31.9M | pu1_ref_buf += u1_dma_wd * u1_dma_ht; |
671 | 31.9M | u2_tot_ref_scratch_size += u1_dma_wd * u1_dma_ht; |
672 | | |
673 | | /* unrestricted field motion comp for top region outside frame */ |
674 | 31.9M | i2_pod_ht = (-i2_frm_y) >> u1_mb_fld; |
675 | 31.9M | if((i2_pod_ht > 0) && u1_pod_top) |
676 | 1.55k | { |
677 | 1.55k | ps_pred->i1_pod_ht = (WORD8)(-i2_pod_ht); |
678 | 1.55k | u1_dma_ht -= i2_pod_ht; |
679 | 1.55k | pu1_pred += i2_pod_ht * u2_frm_wd; |
680 | 1.55k | } |
681 | | /* unrestricted field motion comp for bottom region outside frame */ |
682 | 31.9M | else if(u1_pod_bot) |
683 | 724k | { |
684 | 724k | i2_pod_ht = u1_dma_ht + ((i2_frm_y - u2_pic_ht) >> u1_mb_fld); |
685 | 724k | if(i2_pod_ht > 0) |
686 | 234 | { |
687 | 234 | u1_dma_ht -= i2_pod_ht; |
688 | 234 | ps_pred->i1_pod_ht = (WORD8)i2_pod_ht; |
689 | 234 | } |
690 | 724k | } |
691 | | |
692 | | /* Copy Y partition */ |
693 | | |
694 | | /* |
695 | | * ps_pred->i1_pod_ht is non zero when MBAFF is present. In case of MBAFF the reference data |
696 | | * is copied in the Scrath buffer so that the padding_on_demand doesnot corrupt the frame data |
697 | | */ |
698 | 31.9M | if(ps_pred->i1_pod_ht) |
699 | 1.79k | { |
700 | 1.79k | ps_pred->pu1_pred = pu1_pred; |
701 | 1.79k | ps_pred->u1_dma_ht_y = u1_dma_ht; |
702 | 1.79k | ps_pred->u1_dma_wd_y = u1_dma_wd; |
703 | 1.79k | } |
704 | 31.9M | ps_pred->pu1_y_ref = pu1_pred; |
705 | 31.9M | } |
706 | | |
707 | | |
708 | | |
709 | | /* Increment ps_pred index */ |
710 | 31.9M | ps_pred++; |
711 | | |
712 | | /* Transfer Setup U & V */ |
713 | 31.9M | { |
714 | 31.9M | WORD32 i4_ref_offset, i4_rec_offset; |
715 | 31.9M | UWORD8 *pu1_pred_u, *pu1_pred_v, u1_tmp_dma_ht; |
716 | | /* CHANGED CODE */ |
717 | 31.9M | UWORD8 u1_chroma_cbp = (UWORD8)(ps_cur_mb_info->u1_cbp >> 4); |
718 | | /* CHANGED CODE */ |
719 | | |
720 | | /* calculating rounded motion vectors and fractional components */ |
721 | 31.9M | i2_tmp_mv_x = i2_mv_x; |
722 | 31.9M | i2_tmp_mv_y = i2_mv_y; |
723 | | |
724 | | /************************************************************************/ |
725 | | /* Table 8-9: Derivation of the vertical component of the chroma vector */ |
726 | | /* in field coding mode */ |
727 | | /************************************************************************/ |
728 | 31.9M | if(u1_pod_bot && u1_mb_or_pic_bot) |
729 | 68.5k | i2_tmp_mv_y += 2; |
730 | 31.9M | if(u1_pod_top && !u1_mb_or_pic_bot) |
731 | 55.2k | i2_tmp_mv_y -= 2; |
732 | | |
733 | | /* Eighth sample of the chroma MV */ |
734 | 31.9M | u1_dx = i2_tmp_mv_x & 0x7; |
735 | 31.9M | u1_dy = i2_tmp_mv_y & 0x7; |
736 | | |
737 | | /********************************************************************/ |
738 | | /* Calculating the full pel MV for chroma which is 1/2 of the Luma */ |
739 | | /* MV in full pel units */ |
740 | | /********************************************************************/ |
741 | 31.9M | i2_mv_x = i2_tmp_mv_x; |
742 | 31.9M | i2_mv_y = i2_tmp_mv_y; |
743 | 31.9M | i2_tmp_mv_x = SIGN_POW2_DIV(i2_tmp_mv_x, 3); |
744 | 31.9M | i2_tmp_mv_y = SIGN_POW2_DIV(i2_tmp_mv_y, 3); |
745 | 31.9M | i1_mc_wd = u1_part_wd << 1; |
746 | 31.9M | u1_dma_ht = u1_part_ht << 1; |
747 | 31.9M | if(u1_dx) |
748 | 1.27M | { |
749 | 1.27M | if(i2_mv_x < 0) |
750 | 450k | i2_tmp_mv_x -= 1; |
751 | 1.27M | i1_mc_wd++; |
752 | 1.27M | } |
753 | 31.9M | if(u1_dy != 0) |
754 | 1.06M | { |
755 | 1.06M | if(i2_mv_y < 0) |
756 | 456k | i2_tmp_mv_y -= 1; |
757 | 1.06M | u1_dma_ht++; |
758 | 1.06M | } |
759 | | |
760 | | /********************************************************************/ |
761 | | /* Calulating the horizontal and the vertical u4_ofst from top left */ |
762 | | /* edge of the reference frame, and subsequent clipping */ |
763 | | /********************************************************************/ |
764 | 31.9M | u2_pic_ht >>= 1; |
765 | 31.9M | u2_frm_wd = ps_dec->u2_frm_wd_uv << u1_pic_fld; |
766 | 31.9M | i2_frm_x = (u2_mb_x << 3) + (u1_sub_x << 1) + i2_tmp_mv_x; |
767 | 31.9M | i2_frm_y = ((u2_mb_y + (u1_mb_bot && !u1_mb_fld)) << 3) |
768 | 31.9M | + (((u1_sub_y << 1) + i2_tmp_mv_y) << u1_mb_fld); |
769 | | |
770 | 31.9M | i2_frm_x = CLIP3(MAX_OFFSET_OUTSIDE_UV_FRM, |
771 | 31.9M | ((ps_dec->u2_pic_wd >> 1) - 1), i2_frm_x); |
772 | 31.9M | i2_frm_y = CLIP3(((1 - u1_dma_ht) << u1_mb_fld), |
773 | 31.9M | (u2_pic_ht - (1 << u1_mb_fld)), i2_frm_y); |
774 | | |
775 | 31.9M | i4_ref_offset = i2_frm_y * u2_frm_wd + i2_frm_x * YUV420SP_FACTOR; |
776 | 31.9M | u1_dma_wd = (i1_mc_wd + 3) & 0xFC; |
777 | | |
778 | | /********************************************************************/ |
779 | | /* Calulating the horizontal and the vertical u4_ofst from top left */ |
780 | | /* edge of the recon buffer */ |
781 | | /********************************************************************/ |
782 | | /* CHANGED CODE */ |
783 | 31.9M | u2_rec_wd = BLK8x8SIZE * YUV420SP_FACTOR; |
784 | 31.9M | i2_rec_x = u1_sub_x << 1; |
785 | 31.9M | i2_rec_y = u1_sub_y << 1; |
786 | 31.9M | i4_rec_offset = i2_rec_y * u2_rec_wd + i2_rec_x * YUV420SP_FACTOR; |
787 | 31.9M | { |
788 | 31.9M | u2_rec_wd = ps_dec->u2_frm_wd_uv << u1_mb_or_pic_fld; |
789 | | |
790 | 31.9M | i2_rec_x += (mb_index << 3); |
791 | 31.9M | i4_rec_offset = i2_rec_y * u2_rec_wd + i2_rec_x * YUV420SP_FACTOR; |
792 | 31.9M | if(u1_mb_bot) |
793 | 801k | i4_rec_offset += ps_dec->u2_frm_wd_uv << ((u1_mb_fld) ? 0 : 3); |
794 | 31.9M | ps_pred->pu1_rec_y_u = ps_frame_buf->pu1_dest_u + i4_rec_offset; |
795 | 31.9M | ps_pred->u1_pi1_wt_ofst_rec_v = ps_frame_buf->pu1_dest_v |
796 | 31.9M | + i4_rec_offset; |
797 | | |
798 | 31.9M | } |
799 | | |
800 | | /* CHANGED CODE */ |
801 | | |
802 | | /* filling the common pred structures for U */ |
803 | 31.9M | u2_frm_wd = ps_dec->u2_frm_wd_uv << u1_mb_or_pic_fld; |
804 | 31.9M | u1_tmp_dma_ht = u1_dma_ht; |
805 | 31.9M | ps_pred->u2_u1_ref_buf_wd = u1_dma_wd; |
806 | 31.9M | ps_pred->u2_frm_wd = u2_frm_wd; |
807 | 31.9M | ps_pred->i1_dma_ht = u1_dma_ht; |
808 | 31.9M | ps_pred->i1_mc_wd = i1_mc_wd; |
809 | 31.9M | ps_pred->u2_dst_stride = u2_rec_wd; |
810 | | |
811 | 31.9M | ps_pred->i1_mb_partwidth = u1_part_wd << 1; |
812 | 31.9M | ps_pred->i1_mb_partheight = u1_part_ht << 1; |
813 | 31.9M | ps_pred->u1_dydx = (u1_dy << 3) + u1_dx; |
814 | 31.9M | ps_pred->u1_is_bi_direct = u1_is_bi_dir; |
815 | 31.9M | ps_pred->u1_wght_pred_type = u1_wght_pred_type; |
816 | 31.9M | ps_pred->i1_pod_ht = 0; |
817 | | |
818 | 31.9M | ps_pred->pu1_dma_dest_addr = pu1_ref_buf; |
819 | | |
820 | | /* unrestricted field motion comp for top region outside frame */ |
821 | 31.9M | i2_pod_ht = (-i2_frm_y) >> u1_mb_fld; |
822 | 31.9M | if((i2_pod_ht > 0) && u1_pod_top) |
823 | 7.11k | { |
824 | 7.11k | i4_ref_offset += i2_pod_ht * u2_frm_wd; |
825 | 7.11k | u1_dma_ht -= i2_pod_ht; |
826 | 7.11k | ps_pred->i1_pod_ht = (WORD8)(-i2_pod_ht); |
827 | 7.11k | } |
828 | | /* unrestricted field motion comp for bottom region outside frame */ |
829 | 31.9M | else if(u1_pod_bot) |
830 | 724k | { |
831 | 724k | i2_pod_ht = u1_dma_ht + ((i2_frm_y - u2_pic_ht) >> u1_mb_fld); |
832 | 724k | if(i2_pod_ht > 0) |
833 | 4.01k | { |
834 | 4.01k | u1_dma_ht -= i2_pod_ht; |
835 | 4.01k | ps_pred->i1_pod_ht = (WORD8)i2_pod_ht; |
836 | 4.01k | } |
837 | 724k | } |
838 | | |
839 | 31.9M | pu1_pred_u = pu1_buf2 + i4_ref_offset; |
840 | 31.9M | pu1_pred_v = pu1_buf3 + i4_ref_offset; |
841 | | |
842 | | /* Copy U & V partitions */ |
843 | 31.9M | if(ps_pred->i1_pod_ht) |
844 | 11.1k | { |
845 | 11.1k | ps_pred->pu1_pred_u = pu1_pred_u; |
846 | 11.1k | ps_pred->u1_dma_ht_uv = u1_dma_ht; |
847 | 11.1k | ps_pred->u1_dma_wd_uv = u1_dma_wd; |
848 | | |
849 | 11.1k | } |
850 | 31.9M | ps_pred->pu1_u_ref = pu1_pred_u; |
851 | | |
852 | | /* Increment the reference buffer Index */ |
853 | 31.9M | u2_tot_ref_scratch_size += (u1_dma_wd * u1_tmp_dma_ht) << 1; |
854 | | |
855 | 31.9M | if(ps_pred->i1_pod_ht) |
856 | 11.1k | { |
857 | 11.1k | ps_pred->pu1_pred_v = pu1_pred_v; |
858 | 11.1k | ps_pred->u1_dma_ht_uv = u1_dma_ht; |
859 | 11.1k | ps_pred->u1_dma_wd_uv = u1_dma_wd; |
860 | 11.1k | } |
861 | | |
862 | 31.9M | ps_pred->pu1_v_ref = pu1_pred_v; |
863 | 31.9M | } |
864 | | |
865 | | /* Increment ps_pred index */ |
866 | 31.9M | ps_dec->u4_pred_info_idx += 2; |
867 | | |
868 | | |
869 | | /* Increment the reference buffer Index */ |
870 | 31.9M | ps_dec->u4_dma_buf_idx += u2_tot_ref_scratch_size; |
871 | | |
872 | 31.9M | if(ps_dec->u4_dma_buf_idx > MAX_REF_BUF_SIZE) |
873 | 0 | return ERROR_NUM_MV; |
874 | | |
875 | 31.9M | return OK; |
876 | | |
877 | | |
878 | | |
879 | 31.9M | } |
880 | | |
881 | | |
882 | | /*! |
883 | | ************************************************************************** |
884 | | * \if Function name : MotionCompensate \endif |
885 | | * |
886 | | * \brief |
887 | | * The routine forms predictor blocks for the entire MB and stores it in |
888 | | * predictor buffers.This function works only for BASELINE profile |
889 | | * |
890 | | * \param ps_dec: Pointer to the structure decStruct. This is used to get |
891 | | * pointers to the current and the reference frame and to the MbParams |
892 | | * structure. |
893 | | * |
894 | | * \return |
895 | | * None |
896 | | * |
897 | | * \note |
898 | | * The routine forms predictors for all the luma and the chroma MB |
899 | | * partitions. |
900 | | ************************************************************************** |
901 | | */ |
902 | | |
903 | | void ih264d_motion_compensate_bp(dec_struct_t * ps_dec, dec_mb_info_t *ps_cur_mb_info) |
904 | 10.9M | { |
905 | 10.9M | pred_info_t *ps_pred ; |
906 | 10.9M | UWORD8 *puc_ref, *pu1_dest_y; |
907 | 10.9M | UWORD8 *pu1_dest_u; |
908 | 10.9M | UWORD32 u2_num_pels, u2_ref_wd_y, u2_ref_wd_uv, u2_dst_wd; |
909 | | |
910 | 10.9M | UWORD32 u4_wd_y, u4_ht_y, u4_wd_uv; |
911 | 10.9M | UWORD32 u4_ht_uv; |
912 | 10.9M | UWORD8 *puc_pred0 = (UWORD8 *)(ps_dec->pi2_pred1); |
913 | | |
914 | | |
915 | 10.9M | PROFILE_DISABLE_INTER_PRED() |
916 | 10.9M | UNUSED(ps_cur_mb_info); |
917 | 10.9M | ps_pred = ps_dec->ps_pred ; |
918 | | |
919 | 21.9M | for(u2_num_pels = 0; u2_num_pels < 256;) |
920 | 11.0M | { |
921 | 11.0M | UWORD32 uc_dx, uc_dy; |
922 | | /* Pointer to the destination buffer. If the CBPs of all 8x8 blocks in |
923 | | the MB partition are zero then it would be better to copy the |
924 | | predictor valus directly to the current frame buffer */ |
925 | | /* |
926 | | * ps_pred->i1_pod_ht is non zero when MBAFF is present. In case of MBAFF the reference data |
927 | | * is copied in the Scrath buffer so that the padding_on_demand doesnot corrupt the frame data |
928 | | */ |
929 | | |
930 | 11.0M | u2_ref_wd_y = ps_pred->u2_frm_wd; |
931 | 11.0M | puc_ref = ps_pred->pu1_y_ref; |
932 | 11.0M | if(ps_pred->u1_dydx & 0x3) |
933 | 192k | puc_ref += 2; |
934 | 11.0M | if(ps_pred->u1_dydx >> 2) |
935 | 183k | puc_ref += 2 * u2_ref_wd_y; |
936 | | |
937 | 11.0M | u4_wd_y = ps_pred->i1_mb_partwidth; |
938 | 11.0M | u4_ht_y = ps_pred->i1_mb_partheight; |
939 | 11.0M | uc_dx = ps_pred->u1_dydx; |
940 | 11.0M | uc_dy = uc_dx >> 2; |
941 | 11.0M | uc_dx &= 0x3; |
942 | | |
943 | 11.0M | pu1_dest_y = ps_pred->pu1_rec_y_u; |
944 | 11.0M | u2_dst_wd = ps_pred->u2_dst_stride; |
945 | | |
946 | 11.0M | ps_dec->apf_inter_pred_luma[ps_pred->u1_dydx](puc_ref, pu1_dest_y, |
947 | 11.0M | u2_ref_wd_y, |
948 | 11.0M | u2_dst_wd, |
949 | 11.0M | u4_ht_y, |
950 | 11.0M | u4_wd_y, puc_pred0, |
951 | 11.0M | ps_pred->u1_dydx); |
952 | | |
953 | 11.0M | ps_pred++; |
954 | | |
955 | | /* Interpolate samples for the chroma components */ |
956 | 11.0M | { |
957 | 11.0M | UWORD8 *pu1_ref_u; |
958 | | |
959 | 11.0M | u2_ref_wd_uv = ps_pred->u2_frm_wd; |
960 | 11.0M | pu1_ref_u = ps_pred->pu1_u_ref; |
961 | | |
962 | 11.0M | u4_wd_uv = ps_pred->i1_mb_partwidth; |
963 | 11.0M | u4_ht_uv = ps_pred->i1_mb_partheight; |
964 | 11.0M | uc_dx = ps_pred->u1_dydx; /* 8*dy + dx */ |
965 | 11.0M | uc_dy = uc_dx >> 3; |
966 | 11.0M | uc_dx &= 0x7; |
967 | | |
968 | 11.0M | pu1_dest_u = ps_pred->pu1_rec_y_u; |
969 | 11.0M | u2_dst_wd = ps_pred->u2_dst_stride; |
970 | | |
971 | 11.0M | ps_pred++; |
972 | 11.0M | ps_dec->pf_inter_pred_chroma(pu1_ref_u, pu1_dest_u, u2_ref_wd_uv, |
973 | 11.0M | u2_dst_wd, uc_dx, uc_dy, |
974 | 11.0M | u4_ht_uv, u4_wd_uv); |
975 | | |
976 | 11.0M | } |
977 | | |
978 | 11.0M | u2_num_pels += (UWORD8)u4_wd_y * (UWORD8)u4_ht_y; |
979 | | |
980 | 11.0M | } |
981 | 10.9M | } |
982 | | |
983 | | |
984 | | /* |
985 | | ************************************************************************** |
986 | | * \if Function name : MotionCompensateB \endif |
987 | | * |
988 | | * \brief |
989 | | * The routine forms predictor blocks for the entire MB and stores it in |
990 | | * predictor buffers. |
991 | | * |
992 | | * \param ps_dec: Pointer to the structure decStruct. This is used to get |
993 | | * pointers to the current and the reference frame and to the MbParams |
994 | | * structure. |
995 | | * |
996 | | * \return |
997 | | * None |
998 | | * |
999 | | * \note |
1000 | | * The routine forms predictors for all the luma and the chroma MB |
1001 | | * partitions. |
1002 | | ************************************************************************** |
1003 | | */ |
1004 | | |
1005 | | void ih264d_motion_compensate_mp(dec_struct_t * ps_dec, dec_mb_info_t *ps_cur_mb_info) |
1006 | 25.7M | { |
1007 | 25.7M | pred_info_t *ps_pred ; |
1008 | 25.7M | pred_info_t *ps_pred_y_forw, *ps_pred_y_back, *ps_pred_cr_forw; |
1009 | 25.7M | UWORD8 *puc_ref, *pu1_dest_y, *puc_pred0, *puc_pred1; |
1010 | 25.7M | UWORD8 *pu1_dest_u, *pu1_dest_v; |
1011 | 25.7M | WORD16 *pi16_intm; |
1012 | 25.7M | UWORD32 u2_num_pels, u2_ref_wd_y, u2_ref_wd_uv, u2_dst_wd; |
1013 | 25.7M | UWORD32 u2_dest_wd_y, u2_dest_wd_uv; |
1014 | 25.7M | UWORD32 u2_row_buf_wd_y = 0; |
1015 | 25.7M | UWORD32 u2_row_buf_wd_uv = 0; |
1016 | 25.7M | UWORD32 u2_log2Y_crwd; |
1017 | 25.7M | UWORD32 u4_wd_y, u4_ht_y, u1_dir, u4_wd_uv; |
1018 | 25.7M | UWORD32 u4_ht_uv; |
1019 | 25.7M | UWORD8 *pu1_temp_mc_buffer = ps_dec->pu1_temp_mc_buffer; |
1020 | 25.7M | WORD32 i2_pod_ht; |
1021 | 25.7M | UWORD32 u2_pic_ht, u2_frm_wd, u2_rec_wd; |
1022 | 25.7M | UWORD32 u1_pod_bot, u1_pod_top; |
1023 | 25.7M | UWORD8 *pu1_pred, *pu1_dma_dst; |
1024 | 25.7M | UWORD32 u1_dma_wd, u1_dma_ht; |
1025 | | |
1026 | 25.7M | dec_slice_params_t * const ps_cur_slice = ps_dec->ps_cur_slice; |
1027 | | |
1028 | | /* set default value to flags specifying field nature of picture & mb */ |
1029 | 25.7M | UWORD32 u1_mb_fld = 0, u1_mb_or_pic_fld; |
1030 | 25.7M | UWORD32 u1_mb_or_pic_bot; |
1031 | | /* calculate flags specifying field nature of picture & mb */ |
1032 | 25.7M | const UWORD8 u1_pic_fld = ps_cur_slice->u1_field_pic_flag; |
1033 | | |
1034 | 25.7M | PROFILE_DISABLE_INTER_PRED() |
1035 | 25.7M | ps_pred = ps_dec->ps_pred ; |
1036 | | /* Initialize both ps_pred_y_forw, ps_pred_cr_forw and ps_pred_y_back |
1037 | | * to avoid static analysis warnings */ |
1038 | 25.7M | ps_pred_y_forw = ps_pred; |
1039 | 25.7M | ps_pred_y_back = ps_pred; |
1040 | 25.7M | ps_pred_cr_forw = ps_pred; |
1041 | | |
1042 | 25.7M | u2_log2Y_crwd = ps_dec->ps_decode_cur_slice->u2_log2Y_crwd; |
1043 | | |
1044 | 25.7M | if(!u1_pic_fld) |
1045 | 24.2M | { |
1046 | 24.2M | u1_mb_fld = ps_cur_mb_info->u1_mb_field_decodingflag; |
1047 | 24.2M | } |
1048 | | |
1049 | 25.7M | u1_mb_or_pic_fld = u1_mb_fld | u1_pic_fld; |
1050 | | |
1051 | 25.7M | pi16_intm = ps_dec->pi2_pred1; |
1052 | 25.7M | puc_pred0 = (UWORD8 *)pi16_intm; |
1053 | 25.7M | puc_pred1 = puc_pred0 + PRED_BUFFER_WIDTH * PRED_BUFFER_HEIGHT * sizeof(WORD16); |
1054 | | |
1055 | 52.0M | for(u2_num_pels = 0; u2_num_pels < 256;) |
1056 | 26.3M | { |
1057 | 26.3M | UWORD8 uc_dx, uc_dy; |
1058 | 26.3M | const UWORD8 u1_is_bi_direct = ps_pred->u1_is_bi_direct; |
1059 | 58.2M | for(u1_dir = 0; u1_dir <= u1_is_bi_direct; u1_dir++) |
1060 | 31.9M | { |
1061 | | /* Pointer to the destination buffer. If the CBPs of all 8x8 blocks in |
1062 | | the MB partition are zero then it would be better to copy the |
1063 | | predictor valus directly to the current frame buffer */ |
1064 | | /* |
1065 | | * ps_pred->i1_pod_ht is non zero when MBAFF is present. In case of MBAFF the reference data |
1066 | | * is copied in the Scrath buffer so that the padding_on_demand doesnot corrupt the frame data |
1067 | | */ |
1068 | | |
1069 | 31.9M | if(ps_pred->i1_pod_ht) |
1070 | 1.79k | { |
1071 | 1.79k | u2_ref_wd_y = ps_pred->u2_u1_ref_buf_wd; |
1072 | 1.79k | puc_ref = ps_pred->pu1_dma_dest_addr; |
1073 | 1.79k | } |
1074 | 31.9M | else |
1075 | 31.9M | { |
1076 | 31.9M | u2_ref_wd_y = ps_pred->u2_frm_wd; |
1077 | 31.9M | puc_ref = ps_pred->pu1_y_ref; |
1078 | | |
1079 | 31.9M | } |
1080 | | |
1081 | 31.9M | if(ps_pred->u1_dydx & 0x3) |
1082 | 1.17M | puc_ref += 2; |
1083 | 31.9M | if(ps_pred->u1_dydx >> 2) |
1084 | 848k | puc_ref += 2 * u2_ref_wd_y; |
1085 | 31.9M | u4_wd_y = ps_pred->i1_mb_partwidth; |
1086 | 31.9M | u4_ht_y = ps_pred->i1_mb_partheight; |
1087 | | |
1088 | 31.9M | uc_dx = ps_pred->u1_dydx; |
1089 | 31.9M | uc_dy = uc_dx >> 2; |
1090 | 31.9M | uc_dx &= 0x3; |
1091 | 31.9M | if(u1_dir == 0) |
1092 | 26.3M | { |
1093 | 26.3M | pu1_dest_y = ps_pred->pu1_rec_y_u; |
1094 | 26.3M | u2_row_buf_wd_y = ps_pred->u2_dst_stride; |
1095 | 26.3M | u2_dst_wd = ps_pred->u2_dst_stride; |
1096 | 26.3M | u2_dest_wd_y = u2_dst_wd; |
1097 | 26.3M | ps_pred_y_forw = ps_pred; |
1098 | 26.3M | } |
1099 | 5.61M | else |
1100 | 5.61M | { |
1101 | 5.61M | pu1_dest_y = pu1_temp_mc_buffer; |
1102 | 5.61M | u2_dst_wd = MB_SIZE; |
1103 | 5.61M | u2_dest_wd_y = u2_dst_wd; |
1104 | 5.61M | ps_pred_y_back = ps_pred; |
1105 | 5.61M | ps_pred_y_back->pu1_rec_y_u = pu1_dest_y; |
1106 | 5.61M | } |
1107 | | |
1108 | | /* padding on demand (POD) for y done here */ |
1109 | | |
1110 | 31.9M | if(ps_pred->i1_pod_ht) |
1111 | 1.79k | { |
1112 | 1.79k | pu1_pred = ps_pred->pu1_pred; |
1113 | 1.79k | pu1_dma_dst = ps_pred->pu1_dma_dest_addr; |
1114 | 1.79k | u1_dma_wd = ps_pred->u1_dma_wd_y; |
1115 | 1.79k | u1_dma_ht = ps_pred->u1_dma_ht_y; |
1116 | 1.79k | u2_frm_wd = ps_dec->u2_frm_wd_y << u1_mb_or_pic_fld; |
1117 | 1.79k | if(ps_pred->i1_pod_ht < 0) |
1118 | 1.55k | { |
1119 | 1.55k | pu1_dma_dst = pu1_dma_dst - (ps_pred->i1_pod_ht * ps_pred->u2_u1_ref_buf_wd); |
1120 | 1.55k | } |
1121 | 1.79k | ih264d_copy_2d1d(pu1_pred, pu1_dma_dst, u2_frm_wd, u1_dma_wd, |
1122 | 1.79k | u1_dma_ht); |
1123 | 1.79k | ih264d_pad_on_demand(ps_pred, LUM_BLK); |
1124 | 1.79k | } |
1125 | 31.9M | ps_dec->apf_inter_pred_luma[ps_pred->u1_dydx](puc_ref, pu1_dest_y, |
1126 | 31.9M | u2_ref_wd_y, |
1127 | 31.9M | u2_dst_wd, |
1128 | 31.9M | u4_ht_y, |
1129 | 31.9M | u4_wd_y, |
1130 | 31.9M | puc_pred0, |
1131 | 31.9M | ps_pred->u1_dydx); |
1132 | 31.9M | ps_pred++; |
1133 | | |
1134 | | /* Interpolate samples for the chroma components */ |
1135 | 31.9M | { |
1136 | 31.9M | UWORD8 *pu1_ref_u; |
1137 | 31.9M | UWORD32 u1_dma_ht; |
1138 | | |
1139 | | /* padding on demand (POD) for U and V done here */ |
1140 | 31.9M | u1_dma_ht = ps_pred->i1_dma_ht; |
1141 | | |
1142 | 31.9M | if(ps_pred->i1_pod_ht) |
1143 | 11.1k | { |
1144 | 11.1k | pu1_pred = ps_pred->pu1_pred_u; |
1145 | 11.1k | pu1_dma_dst = ps_pred->pu1_dma_dest_addr; |
1146 | 11.1k | u1_dma_ht = ps_pred->u1_dma_ht_uv; |
1147 | 11.1k | u1_dma_wd = ps_pred->u1_dma_wd_uv * YUV420SP_FACTOR; |
1148 | 11.1k | u2_frm_wd = ps_dec->u2_frm_wd_uv << u1_mb_or_pic_fld; |
1149 | 11.1k | if(ps_pred->i1_pod_ht < 0) |
1150 | 7.11k | { |
1151 | | /*Top POD*/ |
1152 | 7.11k | pu1_dma_dst -= (ps_pred->i1_pod_ht |
1153 | 7.11k | * ps_pred->u2_u1_ref_buf_wd |
1154 | 7.11k | * YUV420SP_FACTOR); |
1155 | 7.11k | } |
1156 | | |
1157 | 11.1k | ih264d_copy_2d1d(pu1_pred, pu1_dma_dst, u2_frm_wd, |
1158 | 11.1k | u1_dma_wd, u1_dma_ht); |
1159 | | |
1160 | 11.1k | pu1_dma_dst += (ps_pred->i1_dma_ht |
1161 | 11.1k | * ps_pred->u2_u1_ref_buf_wd); |
1162 | 11.1k | pu1_pred = ps_pred->pu1_pred_v; |
1163 | | |
1164 | 11.1k | ih264d_pad_on_demand(ps_pred, CHROM_BLK); |
1165 | 11.1k | } |
1166 | | |
1167 | 31.9M | if(ps_pred->i1_pod_ht) |
1168 | 11.1k | { |
1169 | 11.1k | pu1_ref_u = ps_pred->pu1_dma_dest_addr; |
1170 | | |
1171 | 11.1k | u2_ref_wd_uv = ps_pred->u2_u1_ref_buf_wd |
1172 | 11.1k | * YUV420SP_FACTOR; |
1173 | 11.1k | } |
1174 | 31.9M | else |
1175 | 31.9M | { |
1176 | 31.9M | u2_ref_wd_uv = ps_pred->u2_frm_wd; |
1177 | 31.9M | pu1_ref_u = ps_pred->pu1_u_ref; |
1178 | | |
1179 | 31.9M | } |
1180 | | |
1181 | 31.9M | u4_wd_uv = ps_pred->i1_mb_partwidth; |
1182 | 31.9M | u4_ht_uv = ps_pred->i1_mb_partheight; |
1183 | 31.9M | uc_dx = ps_pred->u1_dydx; /* 8*dy + dx */ |
1184 | 31.9M | uc_dy = uc_dx >> 3; |
1185 | 31.9M | uc_dx &= 0x7; |
1186 | 31.9M | if(u1_dir == 0) |
1187 | 26.3M | { |
1188 | 26.3M | pu1_dest_u = ps_pred->pu1_rec_y_u; |
1189 | | |
1190 | 26.3M | pu1_dest_v = ps_pred->u1_pi1_wt_ofst_rec_v; |
1191 | 26.3M | u2_row_buf_wd_uv = ps_pred->u2_dst_stride; |
1192 | 26.3M | u2_dst_wd = ps_pred->u2_dst_stride; |
1193 | 26.3M | u2_dest_wd_uv = u2_dst_wd; |
1194 | 26.3M | ps_pred_cr_forw = ps_pred; |
1195 | 26.3M | } |
1196 | 5.61M | else |
1197 | 5.61M | { |
1198 | 5.61M | pu1_dest_u = puc_pred0; |
1199 | | |
1200 | 5.61M | pu1_dest_v = puc_pred1; |
1201 | 5.61M | u2_dest_wd_uv = BUFFER_WIDTH; |
1202 | 5.61M | u2_dst_wd = BUFFER_WIDTH; |
1203 | 5.61M | ps_pred->pu1_rec_y_u = pu1_dest_u; |
1204 | 5.61M | ps_pred->u1_pi1_wt_ofst_rec_v = pu1_dest_v; |
1205 | 5.61M | } |
1206 | | |
1207 | 31.9M | ps_pred++; |
1208 | 31.9M | ps_dec->pf_inter_pred_chroma(pu1_ref_u, pu1_dest_u, |
1209 | 31.9M | u2_ref_wd_uv, u2_dst_wd, |
1210 | 31.9M | uc_dx, uc_dy, u4_ht_uv, |
1211 | 31.9M | u4_wd_uv); |
1212 | | |
1213 | 31.9M | if(ps_cur_mb_info->u1_Mux == 1) |
1214 | 12.1k | { |
1215 | | /******************************************************************/ |
1216 | | /* padding on demand (POD) for U and V done here */ |
1217 | | /* ps_pred now points to the Y entry of the 0,0 component */ |
1218 | | /* Y need not be checked for POD because Y lies within */ |
1219 | | /* the picture((0,0) mv for Y doesnot get changed. But (0,0) for */ |
1220 | | /* U and V can need POD beacause of cross-field mv adjustments */ |
1221 | | /* (Table 8-9 of standard) */ |
1222 | | /******************************************************************/ |
1223 | 12.1k | if((ps_pred + 1)->i1_pod_ht) |
1224 | 0 | { |
1225 | 0 | pu1_pred = (ps_pred + 1)->pu1_pred_u; |
1226 | 0 | pu1_dma_dst = (ps_pred + 1)->pu1_dma_dest_addr; |
1227 | 0 | u1_dma_ht = (ps_pred + 1)->u1_dma_ht_uv; |
1228 | 0 | u1_dma_wd = (ps_pred + 1)->u1_dma_wd_uv |
1229 | 0 | * YUV420SP_FACTOR; |
1230 | 0 | u2_frm_wd = ps_dec->u2_frm_wd_uv << u1_mb_or_pic_fld; |
1231 | 0 | if((ps_pred + 1)->i1_pod_ht < 0) |
1232 | 0 | { |
1233 | | /*Top POD*/ |
1234 | 0 | pu1_dma_dst -= ((ps_pred + 1)->i1_pod_ht |
1235 | 0 | * (ps_pred + 1)->u2_u1_ref_buf_wd |
1236 | 0 | * YUV420SP_FACTOR); |
1237 | 0 | } |
1238 | 0 | ih264d_copy_2d1d(pu1_pred, pu1_dma_dst, u2_frm_wd, |
1239 | 0 | u1_dma_wd, u1_dma_ht); |
1240 | 0 | pu1_dma_dst += ((ps_pred + 1)->i1_dma_ht |
1241 | 0 | * (ps_pred + 1)->u2_u1_ref_buf_wd); //(u1_dma_ht * u1_dma_wd);// |
1242 | 0 | pu1_pred = (ps_pred + 1)->pu1_pred_v; |
1243 | 0 | ih264d_pad_on_demand(ps_pred + 1, CHROM_BLK); |
1244 | |
|
1245 | 0 | } |
1246 | | |
1247 | 12.1k | ih264d_multiplex_ref_data(ps_dec, ps_pred, pu1_dest_y, |
1248 | 12.1k | pu1_dest_u, ps_cur_mb_info, |
1249 | 12.1k | u2_dest_wd_y, u2_dest_wd_uv, |
1250 | 12.1k | u1_dir); |
1251 | 12.1k | ps_pred += 2; |
1252 | 12.1k | } |
1253 | 31.9M | } |
1254 | 31.9M | } |
1255 | 26.3M | if(u1_dir != 0) |
1256 | 26.3M | u2_ref_wd_y = MB_SIZE; |
1257 | | |
1258 | 26.3M | u2_num_pels += u4_wd_y * u4_ht_y; |
1259 | | /* if BI_DIRECT, average the two pred's, and put in ..PredBuffer[0] */ |
1260 | 26.3M | if((u1_is_bi_direct != 0) || (ps_pred_y_forw->u1_wght_pred_type != 0)) |
1261 | 19.8M | { |
1262 | | |
1263 | 19.8M | switch(ps_pred_y_forw->u1_wght_pred_type) |
1264 | 19.8M | { |
1265 | 1.09M | case 0: |
1266 | 1.09M | ps_dec->pf_default_weighted_pred_luma( |
1267 | 1.09M | ps_pred_y_forw->pu1_rec_y_u, pu1_dest_y, |
1268 | 1.09M | ps_pred_y_forw->pu1_rec_y_u, |
1269 | 1.09M | u2_row_buf_wd_y, u2_ref_wd_y, |
1270 | 1.09M | u2_row_buf_wd_y, u4_ht_uv * 2, |
1271 | 1.09M | u4_wd_uv * 2); |
1272 | | |
1273 | 1.09M | ps_dec->pf_default_weighted_pred_chroma( |
1274 | 1.09M | ps_pred_cr_forw->pu1_rec_y_u, pu1_dest_u, |
1275 | 1.09M | ps_pred_cr_forw->pu1_rec_y_u, |
1276 | 1.09M | u2_row_buf_wd_uv, u2_dst_wd, |
1277 | 1.09M | u2_row_buf_wd_uv, u4_ht_uv, |
1278 | 1.09M | u4_wd_uv); |
1279 | | |
1280 | 1.09M | break; |
1281 | 14.2M | case 1: |
1282 | 14.2M | { |
1283 | 14.2M | UWORD32 *pu4_weight_ofst = |
1284 | 14.2M | (UWORD32*)ps_pred_y_forw->u1_pi1_wt_ofst_rec_v; |
1285 | 14.2M | UWORD32 u4_wt_ofst_u, u4_wt_ofst_v; |
1286 | 14.2M | UWORD32 u4_wt_ofst_y = |
1287 | 14.2M | (UWORD32)(pu4_weight_ofst[0]); |
1288 | 14.2M | WORD32 weight = (WORD16)(u4_wt_ofst_y & 0xffff); |
1289 | 14.2M | WORD32 ofst = (WORD8)(u4_wt_ofst_y >> 16); |
1290 | | |
1291 | 14.2M | ps_dec->pf_weighted_pred_luma(ps_pred_y_forw->pu1_rec_y_u, |
1292 | 14.2M | ps_pred_y_forw->pu1_rec_y_u, |
1293 | 14.2M | u2_row_buf_wd_y, |
1294 | 14.2M | u2_row_buf_wd_y, |
1295 | 14.2M | (u2_log2Y_crwd & 0x0ff), |
1296 | 14.2M | weight, ofst, u4_ht_y, |
1297 | 14.2M | u4_wd_y); |
1298 | | |
1299 | 14.2M | u4_wt_ofst_u = (UWORD32)(pu4_weight_ofst[2]); |
1300 | 14.2M | u4_wt_ofst_v = (UWORD32)(pu4_weight_ofst[4]); |
1301 | 14.2M | weight = ((u4_wt_ofst_v & 0xffff) << 16) |
1302 | 14.2M | | (u4_wt_ofst_u & 0xffff); |
1303 | 14.2M | ofst = ((u4_wt_ofst_v >> 16) << 8) |
1304 | 14.2M | | ((u4_wt_ofst_u >> 16) & 0xFF); |
1305 | | |
1306 | 14.2M | ps_dec->pf_weighted_pred_chroma( |
1307 | 14.2M | ps_pred_cr_forw->pu1_rec_y_u, |
1308 | 14.2M | ps_pred_cr_forw->pu1_rec_y_u, |
1309 | 14.2M | u2_row_buf_wd_uv, u2_row_buf_wd_uv, |
1310 | 14.2M | (u2_log2Y_crwd >> 8), weight, ofst, |
1311 | 14.2M | u4_ht_y >> 1, u4_wd_y >> 1); |
1312 | 14.2M | } |
1313 | | |
1314 | 14.2M | break; |
1315 | 4.51M | case 2: |
1316 | 4.51M | { |
1317 | 4.51M | UWORD32 *pu4_weight_ofst = |
1318 | 4.51M | (UWORD32*)ps_pred_y_forw->u1_pi1_wt_ofst_rec_v; |
1319 | 4.51M | UWORD32 u4_wt_ofst_u, u4_wt_ofst_v; |
1320 | 4.51M | UWORD32 u4_wt_ofst_y; |
1321 | 4.51M | WORD32 weight1, weight2; |
1322 | 4.51M | WORD32 ofst1, ofst2; |
1323 | | |
1324 | 4.51M | u4_wt_ofst_y = (UWORD32)(pu4_weight_ofst[0]); |
1325 | | |
1326 | 4.51M | weight1 = (WORD16)(u4_wt_ofst_y & 0xffff); |
1327 | 4.51M | ofst1 = (WORD8)(u4_wt_ofst_y >> 16); |
1328 | | |
1329 | 4.51M | u4_wt_ofst_y = (UWORD32)(pu4_weight_ofst[1]); |
1330 | 4.51M | weight2 = (WORD16)(u4_wt_ofst_y & 0xffff); |
1331 | 4.51M | ofst2 = (WORD8)(u4_wt_ofst_y >> 16); |
1332 | | |
1333 | 4.51M | ps_dec->pf_weighted_bi_pred_luma(ps_pred_y_forw->pu1_rec_y_u, |
1334 | 4.51M | ps_pred_y_back->pu1_rec_y_u, |
1335 | 4.51M | ps_pred_y_forw->pu1_rec_y_u, |
1336 | 4.51M | u2_row_buf_wd_y, |
1337 | 4.51M | u2_ref_wd_y, |
1338 | 4.51M | u2_row_buf_wd_y, |
1339 | 4.51M | (u2_log2Y_crwd & 0x0ff), |
1340 | 4.51M | weight1, weight2, ofst1, |
1341 | 4.51M | ofst2, u4_ht_y, |
1342 | 4.51M | u4_wd_y); |
1343 | | |
1344 | 4.51M | u4_wt_ofst_u = (UWORD32)(pu4_weight_ofst[2]); |
1345 | 4.51M | u4_wt_ofst_v = (UWORD32)(pu4_weight_ofst[4]); |
1346 | 4.51M | weight1 = ((u4_wt_ofst_v & 0xffff) << 16) |
1347 | 4.51M | | (u4_wt_ofst_u & 0xffff); |
1348 | 4.51M | ofst1 = ((u4_wt_ofst_v >> 16) << 8) |
1349 | 4.51M | | ((u4_wt_ofst_u >> 16) & 0xFF); |
1350 | | |
1351 | 4.51M | u4_wt_ofst_u = (UWORD32)(pu4_weight_ofst[3]); |
1352 | 4.51M | u4_wt_ofst_v = (UWORD32)(pu4_weight_ofst[5]); |
1353 | 4.51M | weight2 = ((u4_wt_ofst_v & 0xffff) << 16) |
1354 | 4.51M | | (u4_wt_ofst_u & 0xffff); |
1355 | 4.51M | ofst2 = ((u4_wt_ofst_v >> 16) << 8) |
1356 | 4.51M | | ((u4_wt_ofst_u >> 16) & 0xFF); |
1357 | | |
1358 | 4.51M | ps_dec->pf_weighted_bi_pred_chroma( |
1359 | 4.51M | (ps_pred_y_forw + 1)->pu1_rec_y_u, |
1360 | 4.51M | (ps_pred_y_back + 1)->pu1_rec_y_u, |
1361 | 4.51M | (ps_pred_y_forw + 1)->pu1_rec_y_u, |
1362 | 4.51M | u2_row_buf_wd_uv, u2_dst_wd, |
1363 | 4.51M | u2_row_buf_wd_uv, (u2_log2Y_crwd >> 8), |
1364 | 4.51M | weight1, weight2, ofst1, ofst2, |
1365 | 4.51M | u4_ht_y >> 1, u4_wd_y >> 1); |
1366 | 4.51M | } |
1367 | | |
1368 | 4.51M | break; |
1369 | 19.8M | } |
1370 | | |
1371 | 19.8M | } |
1372 | 26.3M | } |
1373 | 25.7M | } |
1374 | | |
1375 | | |
1376 | | /*! |
1377 | | ************************************************************************** |
1378 | | * \if Function name : ih264d_multiplex_ref_data \endif |
1379 | | * |
1380 | | * \brief |
1381 | | * Initializes forward and backward refernce lists for B slice decoding. |
1382 | | * |
1383 | | * |
1384 | | * \return |
1385 | | * 0 on Success and Error code otherwise |
1386 | | ************************************************************************** |
1387 | | */ |
1388 | | |
1389 | | void ih264d_multiplex_ref_data(dec_struct_t * ps_dec, |
1390 | | pred_info_t *ps_pred, |
1391 | | UWORD8* pu1_dest_y, |
1392 | | UWORD8* pu1_dest_u, |
1393 | | dec_mb_info_t *ps_cur_mb_info, |
1394 | | UWORD16 u2_dest_wd_y, |
1395 | | UWORD16 u2_dest_wd_uv, |
1396 | | UWORD8 u1_dir) |
1397 | 12.1k | { |
1398 | 12.1k | UWORD16 u2_mask = ps_cur_mb_info->u2_mask[u1_dir]; |
1399 | 12.1k | UWORD8 *pu1_ref_y, *pu1_ref_u; |
1400 | 12.1k | UWORD8 uc_cond, i, j, u1_dydx; |
1401 | 12.1k | UWORD16 u2_ref_wd_y, u2_ref_wd_uv; |
1402 | | |
1403 | 12.1k | PROFILE_DISABLE_INTER_PRED() |
1404 | | |
1405 | 12.1k | if(ps_pred->i1_pod_ht) |
1406 | 0 | { |
1407 | 0 | pu1_ref_y = ps_pred->pu1_dma_dest_addr; |
1408 | |
|
1409 | 0 | u2_ref_wd_y = ps_pred->u2_u1_ref_buf_wd; |
1410 | 0 | } |
1411 | 12.1k | else |
1412 | 12.1k | { |
1413 | 12.1k | pu1_ref_y = ps_pred->pu1_y_ref; |
1414 | 12.1k | u2_ref_wd_y = ps_pred->u2_frm_wd; |
1415 | 12.1k | } |
1416 | | |
1417 | 12.1k | ps_pred++; |
1418 | 12.1k | if(ps_pred->i1_pod_ht) |
1419 | 0 | { |
1420 | 0 | pu1_ref_u = ps_pred->pu1_dma_dest_addr; |
1421 | 0 | u2_ref_wd_uv = ps_pred->u2_u1_ref_buf_wd * YUV420SP_FACTOR; |
1422 | |
|
1423 | 0 | } |
1424 | 12.1k | else |
1425 | 12.1k | { |
1426 | 12.1k | pu1_ref_u = ps_pred->pu1_u_ref; |
1427 | 12.1k | u2_ref_wd_uv = ps_pred->u2_frm_wd; |
1428 | | |
1429 | 12.1k | } |
1430 | | |
1431 | 12.1k | u1_dydx = ps_pred->u1_dydx; |
1432 | | |
1433 | 12.1k | { |
1434 | 12.1k | UWORD8 uc_dx, uc_dy; |
1435 | 12.1k | UWORD8 *pu1_scratch_u; |
1436 | | |
1437 | 12.1k | uc_dx = u1_dydx & 0x3; |
1438 | 12.1k | uc_dy = u1_dydx >> 3; |
1439 | 12.1k | if(u1_dydx != 0) |
1440 | 0 | { |
1441 | 0 | pred_info_t * ps_prv_pred = ps_pred - 2; |
1442 | 0 | pu1_scratch_u = ps_prv_pred->pu1_dma_dest_addr; |
1443 | 0 | ps_dec->pf_inter_pred_chroma(pu1_ref_u, pu1_scratch_u, |
1444 | 0 | u2_ref_wd_uv, 16, uc_dx, uc_dy, 8, |
1445 | 0 | 8); |
1446 | | |
1447 | | /* Modify ref pointer and refWidth to point to scratch */ |
1448 | | /* buffer to be used below in ih264d_copy_multiplex_data functions */ |
1449 | | /* CHANGED CODE */ |
1450 | 0 | pu1_ref_u = pu1_scratch_u; |
1451 | 0 | u2_ref_wd_uv = 8 * YUV420SP_FACTOR; |
1452 | 0 | } |
1453 | 12.1k | } |
1454 | 12.1k | { |
1455 | 60.8k | for(i = 0; i < 4; i++) |
1456 | 48.7k | { |
1457 | 243k | for(j = 0; j < 4; j++) |
1458 | 194k | { |
1459 | 194k | uc_cond = u2_mask & 1; |
1460 | 194k | u2_mask >>= 1; |
1461 | 194k | if(uc_cond) |
1462 | 54.6k | { |
1463 | 54.6k | *(UWORD32 *)(pu1_dest_y + u2_dest_wd_y) = |
1464 | 54.6k | *(UWORD32 *)(pu1_ref_y + u2_ref_wd_y); |
1465 | 54.6k | *(UWORD32 *)(pu1_dest_y + 2 * u2_dest_wd_y) = |
1466 | 54.6k | *(UWORD32 *)(pu1_ref_y + 2 * u2_ref_wd_y); |
1467 | 54.6k | *(UWORD32 *)(pu1_dest_y + 3 * u2_dest_wd_y) = |
1468 | 54.6k | *(UWORD32 *)(pu1_ref_y + 3 * u2_ref_wd_y); |
1469 | 54.6k | { |
1470 | 54.6k | UWORD32 *dst, *src; |
1471 | 54.6k | dst = (UWORD32 *)pu1_dest_y; |
1472 | 54.6k | src = (UWORD32 *)pu1_ref_y; |
1473 | 54.6k | *dst = *src; |
1474 | 54.6k | dst++; |
1475 | 54.6k | src++; |
1476 | 54.6k | pu1_dest_y = (UWORD8 *)dst; |
1477 | 54.6k | pu1_ref_y = (UWORD8 *)src; |
1478 | 54.6k | } |
1479 | 54.6k | *(UWORD32 *)(pu1_dest_u + u2_dest_wd_uv) = |
1480 | 54.6k | *(UWORD32 *)(pu1_ref_u + u2_ref_wd_uv); |
1481 | 54.6k | { |
1482 | 54.6k | UWORD32 *dst, *src; |
1483 | 54.6k | dst = (UWORD32 *)pu1_dest_u; |
1484 | 54.6k | src = (UWORD32 *)pu1_ref_u; |
1485 | 54.6k | *dst = *src; |
1486 | 54.6k | dst++; |
1487 | 54.6k | src++; |
1488 | 54.6k | pu1_dest_u = (UWORD8 *)dst; |
1489 | 54.6k | pu1_ref_u = (UWORD8 *)src; |
1490 | 54.6k | } |
1491 | | |
1492 | 54.6k | } |
1493 | 140k | else |
1494 | 140k | { |
1495 | 140k | pu1_dest_y += 4; |
1496 | 140k | pu1_ref_y += 4; |
1497 | 140k | pu1_dest_u += 2 * YUV420SP_FACTOR; |
1498 | 140k | pu1_ref_u += 2 * YUV420SP_FACTOR; |
1499 | 140k | } |
1500 | 194k | } |
1501 | 48.7k | pu1_ref_y += 4 * (u2_ref_wd_y - 4); |
1502 | 48.7k | pu1_ref_u += 2 * (u2_ref_wd_uv - 4 * YUV420SP_FACTOR); |
1503 | 48.7k | pu1_dest_y += 4 * (u2_dest_wd_y - 4); |
1504 | 48.7k | pu1_dest_u += 2 * (u2_dest_wd_uv - 4 * YUV420SP_FACTOR); |
1505 | 48.7k | } |
1506 | 12.1k | } |
1507 | 12.1k | } |
1508 | | |
1509 | | void ih264d_pad_on_demand(pred_info_t *ps_pred, UWORD8 lum_chrom_blk) |
1510 | 12.9k | { |
1511 | 12.9k | if(CHROM_BLK == lum_chrom_blk) |
1512 | 11.1k | { |
1513 | 11.1k | UWORD32 *pu4_pod_src_u, *pu4_pod_dst_u; |
1514 | 11.1k | UWORD32 *pu4_pod_src_v, *pu4_pod_dst_v; |
1515 | 11.1k | WORD32 j, u1_wd_stride; |
1516 | 11.1k | WORD32 i, u1_dma_ht, i1_ht; |
1517 | 11.1k | UWORD32 u2_dma_size; |
1518 | 11.1k | u1_wd_stride = (ps_pred->u2_u1_ref_buf_wd >> 2) * YUV420SP_FACTOR; |
1519 | 11.1k | u1_dma_ht = ps_pred->i1_dma_ht; |
1520 | 11.1k | u2_dma_size = u1_wd_stride * u1_dma_ht; |
1521 | 11.1k | pu4_pod_src_u = (UWORD32 *)ps_pred->pu1_dma_dest_addr; |
1522 | 11.1k | pu4_pod_dst_u = pu4_pod_src_u; |
1523 | | |
1524 | 11.1k | pu4_pod_src_v = pu4_pod_src_u + u2_dma_size; |
1525 | 11.1k | pu4_pod_dst_v = pu4_pod_src_v; |
1526 | | |
1527 | 11.1k | i1_ht = ps_pred->i1_pod_ht; |
1528 | 11.1k | pu4_pod_src_u -= u1_wd_stride * i1_ht; |
1529 | 11.1k | pu4_pod_src_v -= u1_wd_stride * i1_ht; |
1530 | 11.1k | if(i1_ht < 0) |
1531 | | /* Top POD */ |
1532 | 7.11k | i1_ht = -i1_ht; |
1533 | 4.01k | else |
1534 | 4.01k | { |
1535 | | /* Bottom POD */ |
1536 | 4.01k | pu4_pod_src_u += (u1_dma_ht - 1) * u1_wd_stride; |
1537 | 4.01k | pu4_pod_dst_u += (u1_dma_ht - i1_ht) * u1_wd_stride; |
1538 | 4.01k | pu4_pod_src_v += (u1_dma_ht - 1) * u1_wd_stride; |
1539 | 4.01k | pu4_pod_dst_v += (u1_dma_ht - i1_ht) * u1_wd_stride; |
1540 | 4.01k | } |
1541 | | |
1542 | 23.7k | for(i = 0; i < i1_ht; i++) |
1543 | 66.6k | for(j = 0; j < u1_wd_stride; j++) |
1544 | 54.0k | { |
1545 | 54.0k | *pu4_pod_dst_u++ = *(pu4_pod_src_u + j); |
1546 | | |
1547 | 54.0k | } |
1548 | 11.1k | } |
1549 | 1.79k | else |
1550 | 1.79k | { |
1551 | 1.79k | UWORD32 *pu4_pod_src, *pu4_pod_dst; |
1552 | 1.79k | WORD32 j, u1_wd_stride; |
1553 | 1.79k | WORD32 i, i1_ht; |
1554 | 1.79k | pu4_pod_src = (UWORD32 *)ps_pred->pu1_dma_dest_addr; |
1555 | 1.79k | pu4_pod_dst = pu4_pod_src; |
1556 | 1.79k | u1_wd_stride = ps_pred->u2_u1_ref_buf_wd >> 2; |
1557 | 1.79k | i1_ht = ps_pred->i1_pod_ht; |
1558 | 1.79k | pu4_pod_src -= u1_wd_stride * i1_ht; |
1559 | 1.79k | if(i1_ht < 0) |
1560 | | /* Top POD */ |
1561 | 1.55k | i1_ht = -i1_ht; |
1562 | 234 | else |
1563 | 234 | { |
1564 | | /* Bottom POD */ |
1565 | 234 | pu4_pod_src += (ps_pred->i1_dma_ht - 1) * u1_wd_stride; |
1566 | 234 | pu4_pod_dst += (ps_pred->i1_dma_ht - i1_ht) * u1_wd_stride; |
1567 | 234 | } |
1568 | | |
1569 | 9.90k | for(i = 0; i < i1_ht; i++) |
1570 | 46.4k | for(j = 0; j < u1_wd_stride; j++) |
1571 | 38.3k | *pu4_pod_dst++ = *(pu4_pod_src + j); |
1572 | 1.79k | } |
1573 | 12.9k | } |
1574 | | |