/src/libavc/decoder/ih264d_thread_parse_decode.c
Line | Count | Source |
1 | | /****************************************************************************** |
2 | | * |
3 | | * Copyright (C) 2015 The Android Open Source Project |
4 | | * |
5 | | * Licensed under the Apache License, Version 2.0 (the "License"); |
6 | | * you may not use this file except in compliance with the License. |
7 | | * You may obtain a copy of the License at: |
8 | | * |
9 | | * http://www.apache.org/licenses/LICENSE-2.0 |
10 | | * |
11 | | * Unless required by applicable law or agreed to in writing, software |
12 | | * distributed under the License is distributed on an "AS IS" BASIS, |
13 | | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
14 | | * See the License for the specific language governing permissions and |
15 | | * limitations under the License. |
16 | | * |
17 | | ***************************************************************************** |
18 | | * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore |
19 | | */ |
20 | | /*! |
21 | | ************************************************************************** |
22 | | * \file ih264d_thread_parse_decode.c |
23 | | * |
24 | | * \brief |
25 | | * Contains routines that for multi-thread decoder |
26 | | * |
27 | | * Detailed_description |
28 | | * |
29 | | * \date |
30 | | * 20/02/2012 |
31 | | * |
32 | | * \author ZR |
33 | | ************************************************************************** |
34 | | */ |
35 | | |
36 | | #include "ih264d_error_handler.h" |
37 | | #include "ih264d_debug.h" |
38 | | #include "ithread.h" |
39 | | #include <string.h> |
40 | | #include "ih264d_defs.h" |
41 | | #include "ih264d_debug.h" |
42 | | #include "ih264d_tables.h" |
43 | | #include "ih264d_structs.h" |
44 | | #include "ih264d_defs.h" |
45 | | #include "ih264d_mb_utils.h" |
46 | | #include "ih264d_thread_parse_decode.h" |
47 | | #include "ih264d_inter_pred.h" |
48 | | |
49 | | #include "ih264d_process_pslice.h" |
50 | | #include "ih264d_process_intra_mb.h" |
51 | | #include "ih264d_deblocking.h" |
52 | | #include "ih264d_format_conv.h" |
53 | | |
54 | | void ih264d_deblock_mb_level(dec_struct_t *ps_dec, |
55 | | dec_mb_info_t *ps_cur_mb_info, |
56 | | UWORD32 nmb_index); |
57 | | |
58 | | void ih264d_copy_intra_pred_line(dec_struct_t *ps_dec, |
59 | | dec_mb_info_t *ps_cur_mb_info, |
60 | | UWORD32 nmb_index); |
61 | | |
62 | | void ih264d_parse_tfr_nmb(dec_struct_t * ps_dec, |
63 | | UWORD32 u4_mb_idx, |
64 | | UWORD32 u4_num_mbs, |
65 | | UWORD32 u4_num_mbs_next, |
66 | | UWORD32 u4_tfr_n_mb, |
67 | | UWORD32 u4_end_of_row) |
68 | 1.31M | { |
69 | 1.31M | WORD32 i, u4_mb_num; |
70 | | |
71 | 1.31M | const UWORD32 u1_mbaff = ps_dec->ps_cur_slice->u1_mbaff_frame_flag; |
72 | 1.31M | UWORD32 u4_n_mb_start; |
73 | | |
74 | 1.31M | UNUSED(u4_mb_idx); |
75 | 1.31M | UNUSED(u4_num_mbs_next); |
76 | 1.31M | if(u4_tfr_n_mb) |
77 | 1.31M | { |
78 | | |
79 | | |
80 | 1.31M | u4_n_mb_start = (ps_dec->u4_cur_mb_addr + 1) - u4_num_mbs; |
81 | | |
82 | | // copy into s_frmMbInfo |
83 | | |
84 | 1.31M | u4_mb_num = u4_n_mb_start; |
85 | 1.31M | u4_mb_num = (ps_dec->u4_cur_mb_addr + 1) - u4_num_mbs; |
86 | | |
87 | 27.9M | for(i = 0; i < u4_num_mbs; i++) |
88 | 26.6M | { |
89 | 26.6M | UPDATE_SLICE_NUM_MAP(ps_dec->pu2_slice_num_map, u4_mb_num, |
90 | 26.6M | ps_dec->u2_cur_slice_num); |
91 | 26.6M | DATA_SYNC(); |
92 | 26.6M | UPDATE_MB_MAP_MBNUM_BYTE(ps_dec->pu1_dec_mb_map, u4_mb_num); |
93 | | |
94 | 26.6M | u4_mb_num++; |
95 | 26.6M | } |
96 | | |
97 | | /****************************************************************/ |
98 | | /* Check for End Of Row in Next iteration */ |
99 | | /****************************************************************/ |
100 | | |
101 | | /****************************************************************/ |
102 | | /* Transfer the Following things */ |
103 | | /* N-Mb DeblkParams Data ( To Ext DeblkParams Buffer ) */ |
104 | | /* N-Mb Recon Data ( To Ext Frame Buffer ) */ |
105 | | /* N-Mb Intrapredline Data ( Updated Internally) */ |
106 | | /* N-Mb MV Data ( To Ext MV Buffer ) */ |
107 | | /* N-Mb MVTop/TopRight Data ( To Int MV Top Scratch Buffers) */ |
108 | | /****************************************************************/ |
109 | | |
110 | | /* Swap top and current pointers */ |
111 | | |
112 | 1.31M | ps_dec->s_tran_addrecon_parse.pu1_dest_y += |
113 | 1.31M | ps_dec->s_tran_addrecon_parse.u4_inc_y[u4_end_of_row]; |
114 | 1.31M | ps_dec->s_tran_addrecon_parse.pu1_dest_u += |
115 | 1.31M | ps_dec->s_tran_addrecon_parse.u4_inc_uv[u4_end_of_row]; |
116 | 1.31M | ps_dec->s_tran_addrecon_parse.pu1_dest_v += |
117 | 1.31M | ps_dec->s_tran_addrecon_parse.u4_inc_uv[u4_end_of_row]; |
118 | | |
119 | 1.31M | if(u4_end_of_row) |
120 | 1.29M | { |
121 | 1.29M | UWORD16 u2_mb_y; |
122 | 1.29M | UWORD32 u4_frame_stride, y_offset; |
123 | | |
124 | 1.29M | ps_dec->ps_top_mb_row = ps_dec->ps_cur_mb_row; |
125 | 1.29M | ps_dec->ps_cur_mb_row += ((ps_dec->u2_frm_wd_in_mbs) << u1_mbaff); |
126 | | |
127 | 1.29M | u2_mb_y = ps_dec->u2_mby + (1 + u1_mbaff); |
128 | 1.29M | u4_frame_stride = ps_dec->u2_frm_wd_y |
129 | 1.29M | << ps_dec->ps_cur_slice->u1_field_pic_flag; |
130 | 1.29M | y_offset = (u2_mb_y * u4_frame_stride) << 4; |
131 | 1.29M | ps_dec->s_tran_addrecon_parse.pu1_dest_y = |
132 | 1.29M | ps_dec->s_cur_pic.pu1_buf1 + y_offset; |
133 | | |
134 | 1.29M | u4_frame_stride = ps_dec->u2_frm_wd_uv |
135 | 1.29M | << ps_dec->ps_cur_slice->u1_field_pic_flag; |
136 | 1.29M | y_offset = (u2_mb_y * u4_frame_stride) << 3; |
137 | 1.29M | ps_dec->s_tran_addrecon_parse.pu1_dest_u = |
138 | 1.29M | ps_dec->s_cur_pic.pu1_buf2 + y_offset; |
139 | 1.29M | ps_dec->s_tran_addrecon_parse.pu1_dest_v = |
140 | 1.29M | ps_dec->s_cur_pic.pu1_buf3 + y_offset; |
141 | | |
142 | 1.29M | } |
143 | | |
144 | 1.31M | ps_dec->ps_deblk_mbn += u4_num_mbs; |
145 | | |
146 | | /* |
147 | | * The Slice boundary is also a valid condition to transfer. So recalculate |
148 | | * the Left increment, in case the number of MBs is lesser than the |
149 | | * N MB value. c_numMbs will be equal to N of N MB if the entire N Mb is |
150 | | * decoded. |
151 | | */ |
152 | 1.31M | ps_dec->s_tran_addrecon.u2_mv_left_inc = ((u4_num_mbs >> u1_mbaff) - 1) |
153 | 1.31M | << (4 + u1_mbaff); |
154 | 1.31M | ps_dec->s_tran_addrecon.u2_mv_top_left_inc = (u4_num_mbs << 2) - 1 |
155 | 1.31M | - (u1_mbaff << 2); |
156 | | |
157 | | /* reassign left MV and cur MV pointers */ |
158 | 1.31M | ps_dec->ps_mv_left = ps_dec->ps_mv_cur |
159 | 1.31M | + ps_dec->s_tran_addrecon.u2_mv_left_inc; |
160 | | |
161 | 1.31M | ps_dec->ps_mv_cur += (u4_num_mbs << 4); |
162 | 1.31M | ps_dec->u4_num_mbs_prev_nmb = u4_num_mbs; |
163 | | |
164 | 1.31M | } |
165 | 1.31M | } |
166 | | |
167 | | void ih264d_decode_tfr_nmb(dec_struct_t * ps_dec, |
168 | | UWORD32 u4_num_mbs, |
169 | | UWORD32 u4_num_mbs_next, |
170 | | UWORD32 u4_end_of_row) |
171 | 1.30M | { |
172 | | |
173 | 1.30M | UWORD32 u1_end_of_row_next; |
174 | | |
175 | 1.30M | const UWORD32 u1_mbaff = ps_dec->ps_cur_slice->u1_mbaff_frame_flag; |
176 | | |
177 | | /****************************************************************/ |
178 | | /* Check for End Of Row in Next iteration */ |
179 | | /****************************************************************/ |
180 | 1.30M | u1_end_of_row_next = u4_num_mbs_next && |
181 | 28.3k | ((u4_num_mbs_next) <= (ps_dec->u4_recon_mb_grp >> u1_mbaff)); |
182 | | |
183 | | /****************************************************************/ |
184 | | /* Transfer the Following things */ |
185 | | /* N-Mb DeblkParams Data ( To Ext DeblkParams Buffer ) */ |
186 | | /* N-Mb Recon Data ( To Ext Frame Buffer ) */ |
187 | | /* N-Mb Intrapredline Data ( Updated Internally) */ |
188 | | /* N-Mb MV Data ( To Ext MV Buffer ) */ |
189 | | /* N-Mb MVTop/TopRight Data ( To Int MV Top Scratch Buffers) */ |
190 | | /****************************************************************/ |
191 | 1.30M | if(u4_end_of_row) |
192 | 1.27M | { |
193 | 1.27M | ps_dec->i2_dec_thread_mb_y += (1 << u1_mbaff); |
194 | 1.27M | } |
195 | 1.30M | ih264d_transfer_mb_group_data(ps_dec, u4_num_mbs, u4_end_of_row, |
196 | 1.30M | u1_end_of_row_next); |
197 | | |
198 | 1.30M | } |
199 | | |
200 | | WORD32 ih264d_decode_recon_tfr_nmb_thread(dec_struct_t * ps_dec, |
201 | | UWORD32 u4_num_mbs, |
202 | | UWORD32 u4_num_mbs_next, |
203 | | UWORD32 u4_end_of_row) |
204 | 1.14M | { |
205 | 1.14M | WORD32 i,j; |
206 | 1.14M | dec_mb_info_t * ps_cur_mb_info; |
207 | 1.14M | UWORD32 u4_update_mbaff = 0; |
208 | 1.14M | const UWORD32 u1_mbaff = ps_dec->ps_cur_slice->u1_mbaff_frame_flag; |
209 | 1.14M | UWORD32 u1_slice_type, u1_B; |
210 | 1.14M | WORD32 u1_skip_th; |
211 | 1.14M | UWORD32 u1_ipcm_th; |
212 | 1.14M | UWORD32 u4_cond; |
213 | 1.14M | UWORD16 u2_slice_num,u2_cur_dec_mb_num; |
214 | 1.14M | WORD32 ret; |
215 | 1.14M | UWORD32 u4_mb_num; |
216 | 1.14M | WORD32 nop_cnt = 8*128; |
217 | 1.14M | u1_slice_type = ps_dec->ps_decode_cur_slice->slice_type; |
218 | | |
219 | 1.14M | u1_B = (u1_slice_type == B_SLICE); |
220 | | |
221 | 1.14M | u1_skip_th = ((u1_slice_type != I_SLICE) ? |
222 | 1.09M | (u1_B ? B_8x8 : PRED_8x8R0) : -1); |
223 | | |
224 | 1.14M | u1_ipcm_th = ((u1_slice_type != I_SLICE) ? (u1_B ? 23 : 5) : 0); |
225 | | |
226 | 1.14M | u2_cur_dec_mb_num = ps_dec->cur_dec_mb_num; |
227 | | |
228 | 16.0M | while(1) |
229 | 16.0M | { |
230 | | |
231 | 16.0M | UWORD32 u4_max_mb = (UWORD32)(ps_dec->i2_dec_thread_mb_y + (1 << u1_mbaff)) * ps_dec->u2_frm_wd_in_mbs - 1; |
232 | 16.0M | u4_mb_num = u2_cur_dec_mb_num; |
233 | | /*introducing 1 MB delay*/ |
234 | 16.0M | u4_mb_num = MIN(u4_mb_num + u4_num_mbs + 1, u4_max_mb); |
235 | | |
236 | 16.0M | CHECK_MB_MAP_BYTE(u4_mb_num, ps_dec->pu1_dec_mb_map, u4_cond); |
237 | 16.0M | if(u4_cond) |
238 | 1.14M | { |
239 | 1.14M | break; |
240 | 1.14M | } |
241 | 14.8M | else |
242 | 14.8M | { |
243 | 14.8M | if(nop_cnt > 0) |
244 | 13.2M | { |
245 | 13.2M | nop_cnt -= 128; |
246 | 13.2M | NOP(128); |
247 | 13.2M | } |
248 | 1.64M | else |
249 | 1.64M | { |
250 | 1.64M | if(ps_dec->u4_output_present && (2 == ps_dec->u4_num_cores) && |
251 | 267k | (ps_dec->u4_fmt_conv_cur_row < ps_dec->s_disp_frame_info.u4_y_ht)) |
252 | 81.6k | { |
253 | 81.6k | ps_dec->u4_fmt_conv_num_rows = |
254 | 81.6k | MIN(FMT_CONV_NUM_ROWS, |
255 | 81.6k | (ps_dec->s_disp_frame_info.u4_y_ht |
256 | 81.6k | - ps_dec->u4_fmt_conv_cur_row)); |
257 | 81.6k | ih264d_format_convert(ps_dec, &(ps_dec->s_disp_op), |
258 | 81.6k | ps_dec->u4_fmt_conv_cur_row, |
259 | 81.6k | ps_dec->u4_fmt_conv_num_rows); |
260 | 81.6k | ps_dec->u4_fmt_conv_cur_row += ps_dec->u4_fmt_conv_num_rows; |
261 | 81.6k | } |
262 | 1.55M | else |
263 | 1.55M | { |
264 | 1.55M | nop_cnt = 8*128; |
265 | 1.55M | ithread_yield(); |
266 | 1.55M | } |
267 | 1.64M | } |
268 | 14.8M | } |
269 | 16.0M | } |
270 | | /* N Mb MC Loop */ |
271 | 27.2M | for(i = 0; i < u4_num_mbs; i++) |
272 | 26.1M | { |
273 | 26.1M | u4_mb_num = u2_cur_dec_mb_num; |
274 | | |
275 | 26.1M | GET_SLICE_NUM_MAP(ps_dec->pu2_slice_num_map, u2_cur_dec_mb_num, |
276 | 26.1M | u2_slice_num); |
277 | | |
278 | 26.1M | if(u2_slice_num != ps_dec->u2_cur_slice_num_dec_thread) |
279 | 21.9k | { |
280 | 21.9k | ps_dec->u4_cur_slice_decode_done = 1; |
281 | 21.9k | break; |
282 | 21.9k | } |
283 | | |
284 | 26.0M | ps_cur_mb_info = &ps_dec->ps_frm_mb_info[u2_cur_dec_mb_num]; |
285 | | |
286 | 26.0M | ps_dec->u4_dma_buf_idx = 0; |
287 | 26.0M | ps_dec->u4_pred_info_idx = 0; |
288 | | |
289 | 26.0M | if(ps_cur_mb_info->u1_mb_type <= u1_skip_th) |
290 | 773k | { |
291 | 773k | WORD32 pred_cnt = 0; |
292 | 773k | pred_info_pkd_t *ps_pred_pkd; |
293 | 773k | UWORD32 u4_pred_info_pkd_idx; |
294 | 773k | WORD8 i1_pred; |
295 | | |
296 | 773k | u4_pred_info_pkd_idx = ps_cur_mb_info->u4_pred_info_pkd_idx; |
297 | | |
298 | 2.29M | while(pred_cnt < ps_cur_mb_info->u1_num_pred_parts) |
299 | 1.51M | { |
300 | 1.51M | ps_pred_pkd = ps_dec->ps_pred_pkd + u4_pred_info_pkd_idx; |
301 | | |
302 | 1.51M | ps_dec->p_form_mb_part_info_thread(ps_pred_pkd,ps_dec, |
303 | 1.51M | ps_cur_mb_info->u2_mbx, |
304 | 1.51M | ps_cur_mb_info->u2_mby, |
305 | 1.51M | (i >> u1_mbaff), |
306 | 1.51M | ps_cur_mb_info); |
307 | | |
308 | 1.51M | u4_pred_info_pkd_idx++; |
309 | 1.51M | pred_cnt++; |
310 | 1.51M | } |
311 | 773k | ps_dec->p_mc_dec_thread(ps_dec, ps_cur_mb_info); |
312 | 773k | } |
313 | 25.3M | else if(ps_cur_mb_info->u1_mb_type == MB_SKIP) |
314 | 24.5M | { |
315 | 24.5M | WORD32 pred_cnt = 0; |
316 | 24.5M | pred_info_pkd_t *ps_pred_pkd; |
317 | 24.5M | UWORD32 u4_pred_info_pkd_idx; |
318 | 24.5M | WORD8 i1_pred; |
319 | | |
320 | 24.5M | u4_pred_info_pkd_idx = ps_cur_mb_info->u4_pred_info_pkd_idx; |
321 | | |
322 | 54.6M | while(pred_cnt < ps_cur_mb_info->u1_num_pred_parts) |
323 | 30.1M | { |
324 | 30.1M | ps_pred_pkd = ps_dec->ps_pred_pkd + u4_pred_info_pkd_idx; |
325 | | |
326 | 30.1M | ps_dec->p_form_mb_part_info_thread(ps_pred_pkd,ps_dec, |
327 | 30.1M | ps_cur_mb_info->u2_mbx, |
328 | 30.1M | ps_cur_mb_info->u2_mby, |
329 | 30.1M | (i >> u1_mbaff), |
330 | 30.1M | ps_cur_mb_info); |
331 | | |
332 | 30.1M | u4_pred_info_pkd_idx++; |
333 | 30.1M | pred_cnt++; |
334 | 30.1M | } |
335 | | /* Decode MB skip */ |
336 | 24.5M | ps_dec->p_mc_dec_thread(ps_dec, ps_cur_mb_info); |
337 | 24.5M | } |
338 | | |
339 | 26.0M | u2_cur_dec_mb_num++; |
340 | 26.0M | } |
341 | | |
342 | | /* N Mb IQ IT RECON Loop */ |
343 | 27.2M | for(j = 0; j < i; j++) |
344 | 26.0M | { |
345 | 26.0M | ps_cur_mb_info = &ps_dec->ps_frm_mb_info[ps_dec->cur_dec_mb_num]; |
346 | | |
347 | 26.0M | if((ps_dec->u4_num_cores == 2) || !ps_dec->i1_recon_in_thread3_flag) |
348 | 8.51M | { |
349 | 8.51M | if(ps_cur_mb_info->u1_mb_type <= u1_skip_th) |
350 | 292k | { |
351 | 292k | ih264d_process_inter_mb(ps_dec, ps_cur_mb_info, j); |
352 | 292k | } |
353 | 8.22M | else if(ps_cur_mb_info->u1_mb_type != MB_SKIP) |
354 | 177k | { |
355 | 177k | if((u1_ipcm_th + 25) != ps_cur_mb_info->u1_mb_type) |
356 | 176k | { |
357 | 176k | ps_cur_mb_info->u1_mb_type -= (u1_skip_th + 1); |
358 | 176k | ih264d_process_intra_mb(ps_dec, ps_cur_mb_info, j); |
359 | 176k | } |
360 | 177k | } |
361 | | |
362 | | |
363 | 8.51M | if(ps_dec->u4_use_intrapred_line_copy == 1) |
364 | 6.19M | ih264d_copy_intra_pred_line(ps_dec, ps_cur_mb_info, j); |
365 | 8.51M | } |
366 | | |
367 | 26.0M | DATA_SYNC(); |
368 | | |
369 | 26.0M | if(u1_mbaff) |
370 | 1.95M | { |
371 | 1.95M | if(u4_update_mbaff) |
372 | 978k | { |
373 | 978k | UWORD32 u4_mb_num = ps_cur_mb_info->u2_mbx |
374 | 978k | + ps_dec->u2_frm_wd_in_mbs |
375 | 978k | * (ps_cur_mb_info->u2_mby >> 1); |
376 | 978k | UPDATE_MB_MAP_MBNUM_BYTE(ps_dec->pu1_recon_mb_map, u4_mb_num); |
377 | 978k | u4_update_mbaff = 0; |
378 | 978k | } |
379 | 978k | else |
380 | 978k | { |
381 | 978k | u4_update_mbaff = 1; |
382 | 978k | } |
383 | 1.95M | } |
384 | 24.1M | else |
385 | 24.1M | { |
386 | 24.1M | UWORD32 u4_mb_num = ps_cur_mb_info->u2_mbx |
387 | 24.1M | + ps_dec->u2_frm_wd_in_mbs * ps_cur_mb_info->u2_mby; |
388 | 24.1M | UPDATE_MB_MAP_MBNUM_BYTE(ps_dec->pu1_recon_mb_map, u4_mb_num); |
389 | 24.1M | } |
390 | 26.0M | ps_dec->cur_dec_mb_num++; |
391 | 26.0M | } |
392 | | |
393 | | /*N MB deblocking*/ |
394 | 1.14M | if(ps_dec->u4_nmb_deblk == 1) |
395 | 0 | { |
396 | 0 | UWORD32 u4_wd_y, u4_wd_uv; |
397 | 0 | tfr_ctxt_t *ps_tfr_cxt = &(ps_dec->s_tran_addrecon); |
398 | 0 | UWORD8 u1_field_pic_flag = ps_dec->ps_cur_slice->u1_field_pic_flag; |
399 | 0 | const WORD32 i4_cb_qp_idx_ofst = |
400 | 0 | ps_dec->ps_cur_pps->i1_chroma_qp_index_offset; |
401 | 0 | const WORD32 i4_cr_qp_idx_ofst = |
402 | 0 | ps_dec->ps_cur_pps->i1_second_chroma_qp_index_offset; |
403 | |
|
404 | 0 | u4_wd_y = ps_dec->u2_frm_wd_y << u1_field_pic_flag; |
405 | 0 | u4_wd_uv = ps_dec->u2_frm_wd_uv << u1_field_pic_flag; |
406 | |
|
407 | 0 | ps_cur_mb_info = &ps_dec->ps_frm_mb_info[ps_dec->u4_cur_deblk_mb_num]; |
408 | |
|
409 | 0 | ps_dec->u4_deblk_mb_x = ps_cur_mb_info->u2_mbx; |
410 | 0 | ps_dec->u4_deblk_mb_y = ps_cur_mb_info->u2_mby; |
411 | | |
412 | |
|
413 | 0 | for(j = 0; j < i; j++) |
414 | 0 | { |
415 | 0 | ih264d_deblock_mb_nonmbaff(ps_dec, ps_tfr_cxt, |
416 | 0 | i4_cb_qp_idx_ofst, i4_cr_qp_idx_ofst, |
417 | 0 | u4_wd_y, u4_wd_uv); |
418 | |
|
419 | 0 | } |
420 | 0 | } |
421 | | |
422 | | /*handle the last mb in picture case*/ |
423 | 1.14M | if(ps_dec->cur_dec_mb_num > ps_dec->ps_cur_sps->u4_max_mb_addr) |
424 | 75.6k | ps_dec->u4_cur_slice_decode_done = 1; |
425 | | |
426 | 1.14M | if(i != u4_num_mbs) |
427 | 21.9k | { |
428 | 21.9k | u4_end_of_row = 0; |
429 | | /*Number of MB's left in row*/ |
430 | 21.9k | u4_num_mbs_next = u4_num_mbs_next + ((u4_num_mbs - i) >> u1_mbaff); |
431 | 21.9k | } |
432 | | |
433 | 1.14M | ih264d_decode_tfr_nmb(ps_dec, (i), u4_num_mbs_next, u4_end_of_row); |
434 | | |
435 | 1.14M | return OK; |
436 | 1.14M | } |
437 | | |
438 | | WORD32 ih264d_decode_slice_thread(dec_struct_t *ps_dec) |
439 | 97.6k | { |
440 | 97.6k | UWORD32 u4_num_mbs_next, u4_num_mbsleft, u4_end_of_row = 0; |
441 | 97.6k | const UWORD32 i2_pic_wdin_mbs = ps_dec->u2_frm_wd_in_mbs; |
442 | 97.6k | UWORD32 u4_mbaff, u4_num_mbs; |
443 | | |
444 | 97.6k | UWORD16 u2_first_mb_in_slice; |
445 | 97.6k | UWORD16 i16_mb_x, i16_mb_y; |
446 | 97.6k | UWORD8 u1_field_pic; |
447 | 97.6k | UWORD32 u4_frame_stride, x_offset, y_offset; |
448 | 97.6k | WORD32 ret; |
449 | | |
450 | 97.6k | tfr_ctxt_t *ps_trns_addr; |
451 | | |
452 | | /*check for mb map of first mb in slice to ensure slice header is parsed*/ |
453 | 12.7M | while(1) |
454 | 12.7M | { |
455 | 12.7M | UWORD32 u4_mb_num = ps_dec->cur_dec_mb_num; |
456 | 12.7M | UWORD32 u4_cond = 0; |
457 | 12.7M | WORD32 nop_cnt = 8 * 128; |
458 | 12.7M | CHECK_MB_MAP_BYTE(u4_mb_num, ps_dec->pu1_dec_mb_map, u4_cond); |
459 | 12.7M | if(u4_cond) |
460 | 97.6k | { |
461 | 97.6k | break; |
462 | 97.6k | } |
463 | 12.6M | else |
464 | 12.6M | { |
465 | 12.6M | if(nop_cnt > 0) |
466 | 12.6M | { |
467 | 12.6M | nop_cnt -= 128; |
468 | 12.6M | NOP(128); |
469 | 12.6M | } |
470 | 0 | else if(ps_dec->u4_output_present && (2 == ps_dec->u4_num_cores) && |
471 | 0 | (ps_dec->u4_fmt_conv_cur_row < ps_dec->s_disp_frame_info.u4_y_ht)) |
472 | 0 | { |
473 | 0 | ps_dec->u4_fmt_conv_num_rows = |
474 | 0 | MIN(FMT_CONV_NUM_ROWS, |
475 | 0 | (ps_dec->s_disp_frame_info.u4_y_ht |
476 | 0 | - ps_dec->u4_fmt_conv_cur_row)); |
477 | 0 | ih264d_format_convert(ps_dec, &(ps_dec->s_disp_op), |
478 | 0 | ps_dec->u4_fmt_conv_cur_row, |
479 | 0 | ps_dec->u4_fmt_conv_num_rows); |
480 | 0 | ps_dec->u4_fmt_conv_cur_row += ps_dec->u4_fmt_conv_num_rows; |
481 | 0 | } |
482 | 0 | else |
483 | 0 | { |
484 | 0 | nop_cnt = 8*128; |
485 | 0 | ithread_yield(); |
486 | 0 | } |
487 | 12.6M | DEBUG_THREADS_PRINTF("waiting for mb mapcur_dec_mb_num = %d,ps_dec->u4_cur_mb_addr = %d\n",u2_cur_dec_mb_num, |
488 | 12.6M | ps_dec->u4_cur_mb_addr); |
489 | | |
490 | 12.6M | } |
491 | 12.7M | } |
492 | | |
493 | | |
494 | | |
495 | 97.6k | u4_mbaff = ps_dec->ps_cur_slice->u1_mbaff_frame_flag; |
496 | | |
497 | 97.6k | u2_first_mb_in_slice = ps_dec->ps_decode_cur_slice->u4_first_mb_in_slice; |
498 | | |
499 | 97.6k | i16_mb_x = MOD(u2_first_mb_in_slice, i2_pic_wdin_mbs); |
500 | 97.6k | i16_mb_y = DIV(u2_first_mb_in_slice, i2_pic_wdin_mbs); |
501 | 97.6k | i16_mb_y <<= u4_mbaff; |
502 | 97.6k | ps_dec->i2_dec_thread_mb_y = i16_mb_y; |
503 | | |
504 | | |
505 | 97.6k | ps_dec->cur_dec_mb_num = u2_first_mb_in_slice << u4_mbaff; |
506 | | |
507 | 97.6k | if((ps_dec->u4_num_cores == 2) || !ps_dec->i1_recon_in_thread3_flag) |
508 | 58.2k | { |
509 | 58.2k | ps_dec->pv_proc_tu_coeff_data = |
510 | 58.2k | (void *) ps_dec->ps_decode_cur_slice->pv_tu_coeff_data_start; |
511 | 58.2k | } |
512 | | |
513 | | // recalculate recon pointers |
514 | 97.6k | u1_field_pic = ps_dec->ps_cur_slice->u1_field_pic_flag; |
515 | 97.6k | u4_frame_stride = ps_dec->u2_frm_wd_y << u1_field_pic; |
516 | 97.6k | x_offset = i16_mb_x << 4; |
517 | 97.6k | y_offset = (i16_mb_y * u4_frame_stride) << 4; |
518 | | |
519 | 97.6k | ps_trns_addr = &(ps_dec->s_tran_addrecon); |
520 | | |
521 | 97.6k | ps_trns_addr->pu1_dest_y = ps_dec->s_cur_pic.pu1_buf1 + x_offset + y_offset; |
522 | | |
523 | 97.6k | u4_frame_stride = ps_dec->u2_frm_wd_uv << u1_field_pic; |
524 | 97.6k | x_offset >>= 1; |
525 | 97.6k | y_offset = (i16_mb_y * u4_frame_stride) << 3; |
526 | | |
527 | 97.6k | x_offset *= YUV420SP_FACTOR; |
528 | | |
529 | 97.6k | ps_trns_addr->pu1_dest_u = ps_dec->s_cur_pic.pu1_buf2 + x_offset + y_offset; |
530 | 97.6k | ps_trns_addr->pu1_dest_v = ps_dec->s_cur_pic.pu1_buf3 + x_offset + y_offset; |
531 | | |
532 | 97.6k | ps_trns_addr->pu1_mb_y = ps_trns_addr->pu1_dest_y; |
533 | 97.6k | ps_trns_addr->pu1_mb_u = ps_trns_addr->pu1_dest_u; |
534 | 97.6k | ps_trns_addr->pu1_mb_v = ps_trns_addr->pu1_dest_v; |
535 | | |
536 | | |
537 | | /* Initialise MC and formMbPartInfo fn ptrs one time based on profile_idc */ |
538 | 97.6k | { |
539 | 97.6k | ps_dec->p_mc_dec_thread = ih264d_motion_compensate_bp; |
540 | 97.6k | ps_dec->p_form_mb_part_info_thread = ih264d_form_mb_part_info_bp; |
541 | 97.6k | } |
542 | 97.6k | { |
543 | 97.6k | UWORD8 uc_nofield_nombaff; |
544 | 97.6k | uc_nofield_nombaff = ((ps_dec->ps_cur_slice->u1_field_pic_flag == 0) |
545 | 93.1k | && (ps_dec->ps_cur_slice->u1_mbaff_frame_flag == 0) |
546 | 90.5k | && (ps_dec->ps_decode_cur_slice->slice_type != B_SLICE) |
547 | 67.5k | && (ps_dec->ps_cur_pps->u1_wted_pred_flag == 0)); |
548 | | |
549 | 97.6k | if(uc_nofield_nombaff == 0) |
550 | 61.6k | { |
551 | 61.6k | ps_dec->p_mc_dec_thread = ih264d_motion_compensate_mp; |
552 | 61.6k | ps_dec->p_form_mb_part_info_thread = ih264d_form_mb_part_info_mp; |
553 | 61.6k | } |
554 | | |
555 | 97.6k | } |
556 | | |
557 | 97.6k | ps_dec->u4_cur_slice_decode_done = 0; |
558 | | |
559 | | |
560 | 1.24M | while(ps_dec->u4_cur_slice_decode_done != 1) |
561 | 1.14M | { |
562 | | |
563 | 1.14M | u4_num_mbsleft = ((i2_pic_wdin_mbs - i16_mb_x) << u4_mbaff); |
564 | | |
565 | 1.14M | if(u4_num_mbsleft <= ps_dec->u4_recon_mb_grp) |
566 | 1.14M | { |
567 | 1.14M | u4_num_mbs = u4_num_mbsleft; |
568 | | |
569 | | /*Indicate number of mb's left in a row*/ |
570 | 1.14M | u4_num_mbs_next = 0; |
571 | 1.14M | u4_end_of_row = 1; |
572 | 1.14M | i16_mb_x = 0; |
573 | 1.14M | } |
574 | 0 | else |
575 | 0 | { |
576 | 0 | u4_num_mbs = ps_dec->u4_recon_mb_grp; |
577 | | |
578 | | /*Indicate number of mb's left in a row*/ |
579 | 0 | u4_num_mbs_next = i2_pic_wdin_mbs - i16_mb_x |
580 | 0 | - (ps_dec->u4_recon_mb_grp >> u4_mbaff); |
581 | 0 | i16_mb_x += (u4_num_mbs >> u4_mbaff); |
582 | 0 | u4_end_of_row = 0; |
583 | |
|
584 | 0 | } |
585 | 1.14M | ret = ih264d_decode_recon_tfr_nmb_thread(ps_dec, u4_num_mbs, u4_num_mbs_next, |
586 | 1.14M | u4_end_of_row); |
587 | 1.14M | if(ret != OK) |
588 | 0 | return ret; |
589 | 1.14M | } |
590 | 97.6k | return OK; |
591 | 97.6k | } |
592 | | |
593 | | void ih264d_decode_picture_thread(dec_struct_t *ps_dec ) |
594 | 68.1k | { |
595 | 68.1k | ithread_set_name("ih264d_decode_picture_thread"); |
596 | | |
597 | 76.0k | while(1) |
598 | 76.0k | { |
599 | 76.0k | WORD32 ret; |
600 | 76.0k | if(ps_dec->i4_threads_active) |
601 | 8.22k | { |
602 | 8.22k | ret = ithread_mutex_lock(ps_dec->apv_proc_start_mutex[0]); |
603 | 8.22k | if(OK != ret) |
604 | 0 | break; |
605 | | |
606 | 16.1k | while(ps_dec->ai4_process_start[0] != PROC_START) |
607 | 7.90k | { |
608 | 7.90k | ithread_cond_wait(ps_dec->apv_proc_start_condition[0], |
609 | 7.90k | ps_dec->apv_proc_start_mutex[0]); |
610 | 7.90k | } |
611 | 8.22k | ps_dec->ai4_process_start[0] = PROC_IN_PROGRESS; |
612 | | |
613 | 8.22k | ret = ithread_mutex_unlock(ps_dec->apv_proc_start_mutex[0]); |
614 | 8.22k | if(OK != ret || ps_dec->i4_break_threads == 1) |
615 | 314 | break; |
616 | 8.22k | } |
617 | 97.6k | while(1) |
618 | 97.6k | { |
619 | | /*Complete all writes before processing next slice*/ |
620 | | |
621 | 97.6k | DEBUG_THREADS_PRINTF(" Entering decode slice\n"); |
622 | | |
623 | 97.6k | ih264d_decode_slice_thread(ps_dec); |
624 | 97.6k | DEBUG_THREADS_PRINTF(" Exit ih264d_decode_slice_thread \n"); |
625 | | |
626 | | |
627 | 97.6k | if(ps_dec->cur_dec_mb_num |
628 | 97.6k | > ps_dec->ps_cur_sps->u4_max_mb_addr) |
629 | 75.6k | { |
630 | | /*Last slice in frame*/ |
631 | 75.6k | break; |
632 | 75.6k | } |
633 | 21.9k | else |
634 | 21.9k | { |
635 | 21.9k | ps_dec->ps_decode_cur_slice++; |
636 | 21.9k | ps_dec->u2_cur_slice_num_dec_thread++; |
637 | 21.9k | } |
638 | | |
639 | 97.6k | } |
640 | 75.6k | if(ps_dec->u4_output_present && (2 == ps_dec->u4_num_cores) && |
641 | 22.6k | (ps_dec->u4_fmt_conv_cur_row < ps_dec->s_disp_frame_info.u4_y_ht)) |
642 | 4.57k | { |
643 | 4.57k | ps_dec->u4_fmt_conv_num_rows = |
644 | 4.57k | (ps_dec->s_disp_frame_info.u4_y_ht |
645 | 4.57k | - ps_dec->u4_fmt_conv_cur_row); |
646 | 4.57k | ih264d_format_convert(ps_dec, &(ps_dec->s_disp_op), |
647 | 4.57k | ps_dec->u4_fmt_conv_cur_row, |
648 | 4.57k | ps_dec->u4_fmt_conv_num_rows); |
649 | 4.57k | ps_dec->u4_fmt_conv_cur_row += ps_dec->u4_fmt_conv_num_rows; |
650 | 4.57k | } |
651 | | |
652 | 75.6k | if(ps_dec->i4_threads_active) |
653 | 7.90k | { |
654 | 7.90k | ret = ithread_mutex_lock(ps_dec->apv_proc_done_mutex[0]); |
655 | 7.90k | if(OK != ret) |
656 | 0 | break; |
657 | | |
658 | 7.90k | ps_dec->ai4_process_done[0] = PROC_DONE; |
659 | 7.90k | ithread_cond_signal(ps_dec->apv_proc_done_condition[0]); |
660 | | |
661 | 7.90k | ret = ithread_mutex_unlock(ps_dec->apv_proc_done_mutex[0]); |
662 | 7.90k | if(OK != ret) |
663 | 0 | break; |
664 | 7.90k | } |
665 | 67.7k | else |
666 | 67.7k | { |
667 | 67.7k | break; |
668 | 67.7k | } |
669 | 75.6k | } |
670 | 68.1k | } |
671 | | |
672 | | void ih264d_signal_decode_thread(dec_struct_t *ps_dec) |
673 | 131k | { |
674 | 131k | if(ps_dec->u4_dec_thread_created == 1) |
675 | 95.1k | { |
676 | 95.1k | if(ps_dec->i4_threads_active) |
677 | 8.92k | { |
678 | 8.92k | proc_state_t i4_process_state; |
679 | 8.92k | ithread_mutex_lock(ps_dec->apv_proc_start_mutex[0]); |
680 | 8.92k | i4_process_state = ps_dec->ai4_process_start[0]; |
681 | 8.92k | ithread_mutex_unlock(ps_dec->apv_proc_start_mutex[0]); |
682 | | |
683 | | // only wait if the thread has started decoding |
684 | 8.92k | if(i4_process_state != PROC_INIT) |
685 | 7.90k | { |
686 | 7.90k | ithread_mutex_lock(ps_dec->apv_proc_done_mutex[0]); |
687 | | |
688 | 15.4k | while(ps_dec->ai4_process_done[0] != PROC_DONE) |
689 | 7.56k | { |
690 | 7.56k | ithread_cond_wait(ps_dec->apv_proc_done_condition[0], |
691 | 7.56k | ps_dec->apv_proc_done_mutex[0]); |
692 | 7.56k | } |
693 | 7.90k | ps_dec->ai4_process_done[0] = PROC_INIT; |
694 | 7.90k | ithread_mutex_unlock(ps_dec->apv_proc_done_mutex[0]); |
695 | 7.90k | } |
696 | 8.92k | } |
697 | 86.2k | else |
698 | 86.2k | { |
699 | 86.2k | ithread_join(ps_dec->pv_dec_thread_handle, NULL); |
700 | 86.2k | ps_dec->u4_dec_thread_created = 0; |
701 | 86.2k | } |
702 | 95.1k | } |
703 | 131k | } |
704 | | void ih264d_signal_bs_deblk_thread(dec_struct_t *ps_dec) |
705 | 57.1k | { |
706 | 57.1k | if(ps_dec->u4_bs_deblk_thread_created) |
707 | 29.2k | { |
708 | 29.2k | if(ps_dec->i4_threads_active) |
709 | 5.37k | { |
710 | 5.37k | proc_state_t i4_process_state; |
711 | 5.37k | ithread_mutex_lock(ps_dec->apv_proc_start_mutex[1]); |
712 | 5.37k | i4_process_state = ps_dec->ai4_process_start[1]; |
713 | 5.37k | ithread_mutex_unlock(ps_dec->apv_proc_start_mutex[1]); |
714 | | |
715 | | // only wait if the thread has started deblking |
716 | 5.37k | if(i4_process_state != PROC_INIT) |
717 | 4.97k | { |
718 | 4.97k | ithread_mutex_lock(ps_dec->apv_proc_done_mutex[1]); |
719 | | |
720 | 6.40k | while(ps_dec->ai4_process_done[1] != PROC_DONE) |
721 | 1.43k | { |
722 | 1.43k | ithread_cond_wait(ps_dec->apv_proc_done_condition[1], |
723 | 1.43k | ps_dec->apv_proc_done_mutex[1]); |
724 | 1.43k | } |
725 | 4.97k | ps_dec->ai4_process_done[1] = PROC_INIT; |
726 | 4.97k | ithread_mutex_unlock(ps_dec->apv_proc_done_mutex[1]); |
727 | 4.97k | } |
728 | 5.37k | } |
729 | 23.8k | else |
730 | 23.8k | { |
731 | 23.8k | ithread_join(ps_dec->pv_bs_deblk_thread_handle, NULL); |
732 | 23.8k | ps_dec->u4_bs_deblk_thread_created = 0; |
733 | 23.8k | } |
734 | 29.2k | } |
735 | | |
736 | 57.1k | } |