/src/libavc/decoder/ih264d_thread_parse_decode.c
Line | Count | Source |
1 | | /****************************************************************************** |
2 | | * |
3 | | * Copyright (C) 2015 The Android Open Source Project |
4 | | * |
5 | | * Licensed under the Apache License, Version 2.0 (the "License"); |
6 | | * you may not use this file except in compliance with the License. |
7 | | * You may obtain a copy of the License at: |
8 | | * |
9 | | * http://www.apache.org/licenses/LICENSE-2.0 |
10 | | * |
11 | | * Unless required by applicable law or agreed to in writing, software |
12 | | * distributed under the License is distributed on an "AS IS" BASIS, |
13 | | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
14 | | * See the License for the specific language governing permissions and |
15 | | * limitations under the License. |
16 | | * |
17 | | ***************************************************************************** |
18 | | * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore |
19 | | */ |
20 | | /*! |
21 | | ************************************************************************** |
22 | | * \file ih264d_thread_parse_decode.c |
23 | | * |
24 | | * \brief |
25 | | * Contains routines that for multi-thread decoder |
26 | | * |
27 | | * Detailed_description |
28 | | * |
29 | | * \date |
30 | | * 20/02/2012 |
31 | | * |
32 | | * \author ZR |
33 | | ************************************************************************** |
34 | | */ |
35 | | |
36 | | #include "ih264d_error_handler.h" |
37 | | #include "ih264d_debug.h" |
38 | | #include "ithread.h" |
39 | | #include <string.h> |
40 | | #include "ih264d_defs.h" |
41 | | #include "ih264d_debug.h" |
42 | | #include "ih264d_tables.h" |
43 | | #include "ih264d_structs.h" |
44 | | #include "ih264d_defs.h" |
45 | | #include "ih264d_mb_utils.h" |
46 | | #include "ih264d_thread_parse_decode.h" |
47 | | #include "ih264d_inter_pred.h" |
48 | | |
49 | | #include "ih264d_process_pslice.h" |
50 | | #include "ih264d_process_intra_mb.h" |
51 | | #include "ih264d_deblocking.h" |
52 | | #include "ih264d_format_conv.h" |
53 | | |
54 | | void ih264d_deblock_mb_level(dec_struct_t *ps_dec, |
55 | | dec_mb_info_t *ps_cur_mb_info, |
56 | | UWORD32 nmb_index); |
57 | | |
58 | | void ih264d_copy_intra_pred_line(dec_struct_t *ps_dec, |
59 | | dec_mb_info_t *ps_cur_mb_info, |
60 | | UWORD32 nmb_index); |
61 | | |
62 | | void ih264d_parse_tfr_nmb(dec_struct_t * ps_dec, |
63 | | UWORD32 u4_mb_idx, |
64 | | UWORD32 u4_num_mbs, |
65 | | UWORD32 u4_num_mbs_next, |
66 | | UWORD32 u4_tfr_n_mb, |
67 | | UWORD32 u4_end_of_row) |
68 | 1.43M | { |
69 | 1.43M | WORD32 i, u4_mb_num; |
70 | | |
71 | 1.43M | const UWORD32 u1_mbaff = ps_dec->ps_cur_slice->u1_mbaff_frame_flag; |
72 | 1.43M | UWORD32 u4_n_mb_start; |
73 | | |
74 | 1.43M | UNUSED(u4_mb_idx); |
75 | 1.43M | UNUSED(u4_num_mbs_next); |
76 | 1.43M | if(u4_tfr_n_mb) |
77 | 1.43M | { |
78 | | |
79 | | |
80 | 1.43M | u4_n_mb_start = (ps_dec->u4_cur_mb_addr + 1) - u4_num_mbs; |
81 | | |
82 | | // copy into s_frmMbInfo |
83 | | |
84 | 1.43M | u4_mb_num = u4_n_mb_start; |
85 | 1.43M | u4_mb_num = (ps_dec->u4_cur_mb_addr + 1) - u4_num_mbs; |
86 | | |
87 | 19.2M | for(i = 0; i < u4_num_mbs; i++) |
88 | 17.7M | { |
89 | 17.7M | UPDATE_SLICE_NUM_MAP(ps_dec->pu2_slice_num_map, u4_mb_num, |
90 | 17.7M | ps_dec->u2_cur_slice_num); |
91 | 17.7M | DATA_SYNC(); |
92 | 17.7M | UPDATE_MB_MAP_MBNUM_BYTE(ps_dec->pu1_dec_mb_map, u4_mb_num); |
93 | | |
94 | 17.7M | u4_mb_num++; |
95 | 17.7M | } |
96 | | |
97 | | /****************************************************************/ |
98 | | /* Check for End Of Row in Next iteration */ |
99 | | /****************************************************************/ |
100 | | |
101 | | /****************************************************************/ |
102 | | /* Transfer the Following things */ |
103 | | /* N-Mb DeblkParams Data ( To Ext DeblkParams Buffer ) */ |
104 | | /* N-Mb Recon Data ( To Ext Frame Buffer ) */ |
105 | | /* N-Mb Intrapredline Data ( Updated Internally) */ |
106 | | /* N-Mb MV Data ( To Ext MV Buffer ) */ |
107 | | /* N-Mb MVTop/TopRight Data ( To Int MV Top Scratch Buffers) */ |
108 | | /****************************************************************/ |
109 | | |
110 | | /* Swap top and current pointers */ |
111 | | |
112 | 1.43M | ps_dec->s_tran_addrecon_parse.pu1_dest_y += |
113 | 1.43M | ps_dec->s_tran_addrecon_parse.u4_inc_y[u4_end_of_row]; |
114 | 1.43M | ps_dec->s_tran_addrecon_parse.pu1_dest_u += |
115 | 1.43M | ps_dec->s_tran_addrecon_parse.u4_inc_uv[u4_end_of_row]; |
116 | 1.43M | ps_dec->s_tran_addrecon_parse.pu1_dest_v += |
117 | 1.43M | ps_dec->s_tran_addrecon_parse.u4_inc_uv[u4_end_of_row]; |
118 | | |
119 | 1.43M | if(u4_end_of_row) |
120 | 1.41M | { |
121 | 1.41M | UWORD16 u2_mb_y; |
122 | 1.41M | UWORD32 u4_frame_stride, y_offset; |
123 | | |
124 | 1.41M | ps_dec->ps_top_mb_row = ps_dec->ps_cur_mb_row; |
125 | 1.41M | ps_dec->ps_cur_mb_row += ((ps_dec->u2_frm_wd_in_mbs) << u1_mbaff); |
126 | | |
127 | 1.41M | u2_mb_y = ps_dec->u2_mby + (1 + u1_mbaff); |
128 | 1.41M | u4_frame_stride = ps_dec->u2_frm_wd_y |
129 | 1.41M | << ps_dec->ps_cur_slice->u1_field_pic_flag; |
130 | 1.41M | y_offset = (u2_mb_y * u4_frame_stride) << 4; |
131 | 1.41M | ps_dec->s_tran_addrecon_parse.pu1_dest_y = |
132 | 1.41M | ps_dec->s_cur_pic.pu1_buf1 + y_offset; |
133 | | |
134 | 1.41M | u4_frame_stride = ps_dec->u2_frm_wd_uv |
135 | 1.41M | << ps_dec->ps_cur_slice->u1_field_pic_flag; |
136 | 1.41M | y_offset = (u2_mb_y * u4_frame_stride) << 3; |
137 | 1.41M | ps_dec->s_tran_addrecon_parse.pu1_dest_u = |
138 | 1.41M | ps_dec->s_cur_pic.pu1_buf2 + y_offset; |
139 | 1.41M | ps_dec->s_tran_addrecon_parse.pu1_dest_v = |
140 | 1.41M | ps_dec->s_cur_pic.pu1_buf3 + y_offset; |
141 | | |
142 | 1.41M | } |
143 | | |
144 | 1.43M | ps_dec->ps_deblk_mbn += u4_num_mbs; |
145 | | |
146 | | /* |
147 | | * The Slice boundary is also a valid condition to transfer. So recalculate |
148 | | * the Left increment, in case the number of MBs is lesser than the |
149 | | * N MB value. c_numMbs will be equal to N of N MB if the entire N Mb is |
150 | | * decoded. |
151 | | */ |
152 | 1.43M | ps_dec->s_tran_addrecon.u2_mv_left_inc = ((u4_num_mbs >> u1_mbaff) - 1) |
153 | 1.43M | << (4 + u1_mbaff); |
154 | 1.43M | ps_dec->s_tran_addrecon.u2_mv_top_left_inc = (u4_num_mbs << 2) - 1 |
155 | 1.43M | - (u1_mbaff << 2); |
156 | | |
157 | | /* reassign left MV and cur MV pointers */ |
158 | 1.43M | ps_dec->ps_mv_left = ps_dec->ps_mv_cur |
159 | 1.43M | + ps_dec->s_tran_addrecon.u2_mv_left_inc; |
160 | | |
161 | 1.43M | ps_dec->ps_mv_cur += (u4_num_mbs << 4); |
162 | 1.43M | ps_dec->u4_num_mbs_prev_nmb = u4_num_mbs; |
163 | | |
164 | 1.43M | } |
165 | 1.43M | } |
166 | | |
167 | | void ih264d_decode_tfr_nmb(dec_struct_t * ps_dec, |
168 | | UWORD32 u4_num_mbs, |
169 | | UWORD32 u4_num_mbs_next, |
170 | | UWORD32 u4_end_of_row) |
171 | 1.42M | { |
172 | | |
173 | 1.42M | UWORD32 u1_end_of_row_next; |
174 | | |
175 | 1.42M | const UWORD32 u1_mbaff = ps_dec->ps_cur_slice->u1_mbaff_frame_flag; |
176 | | |
177 | | /****************************************************************/ |
178 | | /* Check for End Of Row in Next iteration */ |
179 | | /****************************************************************/ |
180 | 1.42M | u1_end_of_row_next = u4_num_mbs_next && |
181 | 25.4k | ((u4_num_mbs_next) <= (ps_dec->u4_recon_mb_grp >> u1_mbaff)); |
182 | | |
183 | | /****************************************************************/ |
184 | | /* Transfer the Following things */ |
185 | | /* N-Mb DeblkParams Data ( To Ext DeblkParams Buffer ) */ |
186 | | /* N-Mb Recon Data ( To Ext Frame Buffer ) */ |
187 | | /* N-Mb Intrapredline Data ( Updated Internally) */ |
188 | | /* N-Mb MV Data ( To Ext MV Buffer ) */ |
189 | | /* N-Mb MVTop/TopRight Data ( To Int MV Top Scratch Buffers) */ |
190 | | /****************************************************************/ |
191 | 1.42M | if(u4_end_of_row) |
192 | 1.40M | { |
193 | 1.40M | ps_dec->i2_dec_thread_mb_y += (1 << u1_mbaff); |
194 | 1.40M | } |
195 | 1.42M | ih264d_transfer_mb_group_data(ps_dec, u4_num_mbs, u4_end_of_row, |
196 | 1.42M | u1_end_of_row_next); |
197 | | |
198 | 1.42M | } |
199 | | |
200 | | WORD32 ih264d_decode_recon_tfr_nmb_thread(dec_struct_t * ps_dec, |
201 | | UWORD32 u4_num_mbs, |
202 | | UWORD32 u4_num_mbs_next, |
203 | | UWORD32 u4_end_of_row) |
204 | 1.23M | { |
205 | 1.23M | WORD32 i,j; |
206 | 1.23M | dec_mb_info_t * ps_cur_mb_info; |
207 | 1.23M | UWORD32 u4_update_mbaff = 0; |
208 | 1.23M | const UWORD32 u1_mbaff = ps_dec->ps_cur_slice->u1_mbaff_frame_flag; |
209 | 1.23M | UWORD32 u1_slice_type, u1_B; |
210 | 1.23M | WORD32 u1_skip_th; |
211 | 1.23M | UWORD32 u1_ipcm_th; |
212 | 1.23M | UWORD32 u4_cond; |
213 | 1.23M | UWORD16 u2_slice_num,u2_cur_dec_mb_num; |
214 | 1.23M | WORD32 ret; |
215 | 1.23M | UWORD32 u4_mb_num; |
216 | 1.23M | WORD32 nop_cnt = 8*128; |
217 | 1.23M | u1_slice_type = ps_dec->ps_decode_cur_slice->slice_type; |
218 | | |
219 | 1.23M | u1_B = (u1_slice_type == B_SLICE); |
220 | | |
221 | 1.23M | u1_skip_th = ((u1_slice_type != I_SLICE) ? |
222 | 1.16M | (u1_B ? B_8x8 : PRED_8x8R0) : -1); |
223 | | |
224 | 1.23M | u1_ipcm_th = ((u1_slice_type != I_SLICE) ? (u1_B ? 23 : 5) : 0); |
225 | | |
226 | 1.23M | u2_cur_dec_mb_num = ps_dec->cur_dec_mb_num; |
227 | | |
228 | 10.3M | while(1) |
229 | 10.3M | { |
230 | | |
231 | 10.3M | UWORD32 u4_max_mb = (UWORD32)(ps_dec->i2_dec_thread_mb_y + (1 << u1_mbaff)) * ps_dec->u2_frm_wd_in_mbs - 1; |
232 | 10.3M | u4_mb_num = u2_cur_dec_mb_num; |
233 | | /*introducing 1 MB delay*/ |
234 | 10.3M | u4_mb_num = MIN(u4_mb_num + u4_num_mbs + 1, u4_max_mb); |
235 | | |
236 | 10.3M | CHECK_MB_MAP_BYTE(u4_mb_num, ps_dec->pu1_dec_mb_map, u4_cond); |
237 | 10.3M | if(u4_cond) |
238 | 1.23M | { |
239 | 1.23M | break; |
240 | 1.23M | } |
241 | 9.13M | else |
242 | 9.13M | { |
243 | 9.13M | if(nop_cnt > 0) |
244 | 8.10M | { |
245 | 8.10M | nop_cnt -= 128; |
246 | 8.10M | NOP(128); |
247 | 8.10M | } |
248 | 1.02M | else |
249 | 1.02M | { |
250 | 1.02M | if(ps_dec->u4_output_present && (2 == ps_dec->u4_num_cores) && |
251 | 266k | (ps_dec->u4_fmt_conv_cur_row < ps_dec->s_disp_frame_info.u4_y_ht)) |
252 | 98.2k | { |
253 | 98.2k | ps_dec->u4_fmt_conv_num_rows = |
254 | 98.2k | MIN(FMT_CONV_NUM_ROWS, |
255 | 98.2k | (ps_dec->s_disp_frame_info.u4_y_ht |
256 | 98.2k | - ps_dec->u4_fmt_conv_cur_row)); |
257 | 98.2k | ih264d_format_convert(ps_dec, &(ps_dec->s_disp_op), |
258 | 98.2k | ps_dec->u4_fmt_conv_cur_row, |
259 | 98.2k | ps_dec->u4_fmt_conv_num_rows); |
260 | 98.2k | ps_dec->u4_fmt_conv_cur_row += ps_dec->u4_fmt_conv_num_rows; |
261 | 98.2k | } |
262 | 927k | else |
263 | 927k | { |
264 | 927k | nop_cnt = 8*128; |
265 | 927k | ithread_yield(); |
266 | 927k | } |
267 | 1.02M | } |
268 | 9.13M | } |
269 | 10.3M | } |
270 | | /* N Mb MC Loop */ |
271 | 18.3M | for(i = 0; i < u4_num_mbs; i++) |
272 | 17.1M | { |
273 | 17.1M | u4_mb_num = u2_cur_dec_mb_num; |
274 | | |
275 | 17.1M | GET_SLICE_NUM_MAP(ps_dec->pu2_slice_num_map, u2_cur_dec_mb_num, |
276 | 17.1M | u2_slice_num); |
277 | | |
278 | 17.1M | if(u2_slice_num != ps_dec->u2_cur_slice_num_dec_thread) |
279 | 18.1k | { |
280 | 18.1k | ps_dec->u4_cur_slice_decode_done = 1; |
281 | 18.1k | break; |
282 | 18.1k | } |
283 | | |
284 | 17.1M | ps_cur_mb_info = &ps_dec->ps_frm_mb_info[u2_cur_dec_mb_num]; |
285 | | |
286 | 17.1M | ps_dec->u4_dma_buf_idx = 0; |
287 | 17.1M | ps_dec->u4_pred_info_idx = 0; |
288 | | |
289 | 17.1M | if(ps_cur_mb_info->u1_mb_type <= u1_skip_th) |
290 | 264k | { |
291 | 264k | WORD32 pred_cnt = 0; |
292 | 264k | pred_info_pkd_t *ps_pred_pkd; |
293 | 264k | UWORD32 u4_pred_info_pkd_idx; |
294 | 264k | WORD8 i1_pred; |
295 | | |
296 | 264k | u4_pred_info_pkd_idx = ps_cur_mb_info->u4_pred_info_pkd_idx; |
297 | | |
298 | 965k | while(pred_cnt < ps_cur_mb_info->u1_num_pred_parts) |
299 | 700k | { |
300 | 700k | ps_pred_pkd = ps_dec->ps_pred_pkd + u4_pred_info_pkd_idx; |
301 | | |
302 | 700k | ps_dec->p_form_mb_part_info_thread(ps_pred_pkd,ps_dec, |
303 | 700k | ps_cur_mb_info->u2_mbx, |
304 | 700k | ps_cur_mb_info->u2_mby, |
305 | 700k | (i >> u1_mbaff), |
306 | 700k | ps_cur_mb_info); |
307 | | |
308 | 700k | u4_pred_info_pkd_idx++; |
309 | 700k | pred_cnt++; |
310 | 700k | } |
311 | 264k | ps_dec->p_mc_dec_thread(ps_dec, ps_cur_mb_info); |
312 | 264k | } |
313 | 16.8M | else if(ps_cur_mb_info->u1_mb_type == MB_SKIP) |
314 | 16.1M | { |
315 | 16.1M | WORD32 pred_cnt = 0; |
316 | 16.1M | pred_info_pkd_t *ps_pred_pkd; |
317 | 16.1M | UWORD32 u4_pred_info_pkd_idx; |
318 | 16.1M | WORD8 i1_pred; |
319 | | |
320 | 16.1M | u4_pred_info_pkd_idx = ps_cur_mb_info->u4_pred_info_pkd_idx; |
321 | | |
322 | 33.0M | while(pred_cnt < ps_cur_mb_info->u1_num_pred_parts) |
323 | 16.8M | { |
324 | 16.8M | ps_pred_pkd = ps_dec->ps_pred_pkd + u4_pred_info_pkd_idx; |
325 | | |
326 | 16.8M | ps_dec->p_form_mb_part_info_thread(ps_pred_pkd,ps_dec, |
327 | 16.8M | ps_cur_mb_info->u2_mbx, |
328 | 16.8M | ps_cur_mb_info->u2_mby, |
329 | 16.8M | (i >> u1_mbaff), |
330 | 16.8M | ps_cur_mb_info); |
331 | | |
332 | 16.8M | u4_pred_info_pkd_idx++; |
333 | 16.8M | pred_cnt++; |
334 | 16.8M | } |
335 | | /* Decode MB skip */ |
336 | 16.1M | ps_dec->p_mc_dec_thread(ps_dec, ps_cur_mb_info); |
337 | 16.1M | } |
338 | | |
339 | 17.1M | u2_cur_dec_mb_num++; |
340 | 17.1M | } |
341 | | |
342 | | /* N Mb IQ IT RECON Loop */ |
343 | 18.3M | for(j = 0; j < i; j++) |
344 | 17.1M | { |
345 | 17.1M | ps_cur_mb_info = &ps_dec->ps_frm_mb_info[ps_dec->cur_dec_mb_num]; |
346 | | |
347 | 17.1M | if((ps_dec->u4_num_cores == 2) || !ps_dec->i1_recon_in_thread3_flag) |
348 | 9.54M | { |
349 | 9.54M | if(ps_cur_mb_info->u1_mb_type <= u1_skip_th) |
350 | 202k | { |
351 | 202k | ih264d_process_inter_mb(ps_dec, ps_cur_mb_info, j); |
352 | 202k | } |
353 | 9.34M | else if(ps_cur_mb_info->u1_mb_type != MB_SKIP) |
354 | 360k | { |
355 | 360k | if((u1_ipcm_th + 25) != ps_cur_mb_info->u1_mb_type) |
356 | 358k | { |
357 | 358k | ps_cur_mb_info->u1_mb_type -= (u1_skip_th + 1); |
358 | 358k | ih264d_process_intra_mb(ps_dec, ps_cur_mb_info, j); |
359 | 358k | } |
360 | 360k | } |
361 | | |
362 | | |
363 | 9.54M | if(ps_dec->u4_use_intrapred_line_copy == 1) |
364 | 8.61M | ih264d_copy_intra_pred_line(ps_dec, ps_cur_mb_info, j); |
365 | 9.54M | } |
366 | | |
367 | 17.1M | DATA_SYNC(); |
368 | | |
369 | 17.1M | if(u1_mbaff) |
370 | 383k | { |
371 | 383k | if(u4_update_mbaff) |
372 | 191k | { |
373 | 191k | UWORD32 u4_mb_num = ps_cur_mb_info->u2_mbx |
374 | 191k | + ps_dec->u2_frm_wd_in_mbs |
375 | 191k | * (ps_cur_mb_info->u2_mby >> 1); |
376 | 191k | UPDATE_MB_MAP_MBNUM_BYTE(ps_dec->pu1_recon_mb_map, u4_mb_num); |
377 | 191k | u4_update_mbaff = 0; |
378 | 191k | } |
379 | 191k | else |
380 | 191k | { |
381 | 191k | u4_update_mbaff = 1; |
382 | 191k | } |
383 | 383k | } |
384 | 16.7M | else |
385 | 16.7M | { |
386 | 16.7M | UWORD32 u4_mb_num = ps_cur_mb_info->u2_mbx |
387 | 16.7M | + ps_dec->u2_frm_wd_in_mbs * ps_cur_mb_info->u2_mby; |
388 | 16.7M | UPDATE_MB_MAP_MBNUM_BYTE(ps_dec->pu1_recon_mb_map, u4_mb_num); |
389 | 16.7M | } |
390 | 17.1M | ps_dec->cur_dec_mb_num++; |
391 | 17.1M | } |
392 | | |
393 | | /*N MB deblocking*/ |
394 | 1.23M | if(ps_dec->u4_nmb_deblk == 1) |
395 | 0 | { |
396 | 0 | UWORD32 u4_wd_y, u4_wd_uv; |
397 | 0 | tfr_ctxt_t *ps_tfr_cxt = &(ps_dec->s_tran_addrecon); |
398 | 0 | UWORD8 u1_field_pic_flag = ps_dec->ps_cur_slice->u1_field_pic_flag; |
399 | 0 | const WORD32 i4_cb_qp_idx_ofst = |
400 | 0 | ps_dec->ps_cur_pps->i1_chroma_qp_index_offset; |
401 | 0 | const WORD32 i4_cr_qp_idx_ofst = |
402 | 0 | ps_dec->ps_cur_pps->i1_second_chroma_qp_index_offset; |
403 | |
|
404 | 0 | u4_wd_y = ps_dec->u2_frm_wd_y << u1_field_pic_flag; |
405 | 0 | u4_wd_uv = ps_dec->u2_frm_wd_uv << u1_field_pic_flag; |
406 | |
|
407 | 0 | ps_cur_mb_info = &ps_dec->ps_frm_mb_info[ps_dec->u4_cur_deblk_mb_num]; |
408 | |
|
409 | 0 | ps_dec->u4_deblk_mb_x = ps_cur_mb_info->u2_mbx; |
410 | 0 | ps_dec->u4_deblk_mb_y = ps_cur_mb_info->u2_mby; |
411 | | |
412 | |
|
413 | 0 | for(j = 0; j < i; j++) |
414 | 0 | { |
415 | 0 | ih264d_deblock_mb_nonmbaff(ps_dec, ps_tfr_cxt, |
416 | 0 | i4_cb_qp_idx_ofst, i4_cr_qp_idx_ofst, |
417 | 0 | u4_wd_y, u4_wd_uv); |
418 | |
|
419 | 0 | } |
420 | 0 | } |
421 | | |
422 | | /*handle the last mb in picture case*/ |
423 | 1.23M | if(ps_dec->cur_dec_mb_num > ps_dec->ps_cur_sps->u4_max_mb_addr) |
424 | 104k | ps_dec->u4_cur_slice_decode_done = 1; |
425 | | |
426 | 1.23M | if(i != u4_num_mbs) |
427 | 18.1k | { |
428 | 18.1k | u4_end_of_row = 0; |
429 | | /*Number of MB's left in row*/ |
430 | 18.1k | u4_num_mbs_next = u4_num_mbs_next + ((u4_num_mbs - i) >> u1_mbaff); |
431 | 18.1k | } |
432 | | |
433 | 1.23M | ih264d_decode_tfr_nmb(ps_dec, (i), u4_num_mbs_next, u4_end_of_row); |
434 | | |
435 | 1.23M | return OK; |
436 | 1.23M | } |
437 | | |
438 | | WORD32 ih264d_decode_slice_thread(dec_struct_t *ps_dec) |
439 | 122k | { |
440 | 122k | UWORD32 u4_num_mbs_next, u4_num_mbsleft, u4_end_of_row = 0; |
441 | 122k | const UWORD32 i2_pic_wdin_mbs = ps_dec->u2_frm_wd_in_mbs; |
442 | 122k | UWORD32 u4_mbaff, u4_num_mbs; |
443 | | |
444 | 122k | UWORD16 u2_first_mb_in_slice; |
445 | 122k | UWORD16 i16_mb_x, i16_mb_y; |
446 | 122k | UWORD8 u1_field_pic; |
447 | 122k | UWORD32 u4_frame_stride, x_offset, y_offset; |
448 | 122k | WORD32 ret; |
449 | | |
450 | 122k | tfr_ctxt_t *ps_trns_addr; |
451 | | |
452 | | /*check for mb map of first mb in slice to ensure slice header is parsed*/ |
453 | 11.5M | while(1) |
454 | 11.5M | { |
455 | 11.5M | UWORD32 u4_mb_num = ps_dec->cur_dec_mb_num; |
456 | 11.5M | UWORD32 u4_cond = 0; |
457 | 11.5M | WORD32 nop_cnt = 8 * 128; |
458 | 11.5M | CHECK_MB_MAP_BYTE(u4_mb_num, ps_dec->pu1_dec_mb_map, u4_cond); |
459 | 11.5M | if(u4_cond) |
460 | 122k | { |
461 | 122k | break; |
462 | 122k | } |
463 | 11.3M | else |
464 | 11.3M | { |
465 | 11.3M | if(nop_cnt > 0) |
466 | 11.3M | { |
467 | 11.3M | nop_cnt -= 128; |
468 | 11.3M | NOP(128); |
469 | 11.3M | } |
470 | 0 | else if(ps_dec->u4_output_present && (2 == ps_dec->u4_num_cores) && |
471 | 0 | (ps_dec->u4_fmt_conv_cur_row < ps_dec->s_disp_frame_info.u4_y_ht)) |
472 | 0 | { |
473 | 0 | ps_dec->u4_fmt_conv_num_rows = |
474 | 0 | MIN(FMT_CONV_NUM_ROWS, |
475 | 0 | (ps_dec->s_disp_frame_info.u4_y_ht |
476 | 0 | - ps_dec->u4_fmt_conv_cur_row)); |
477 | 0 | ih264d_format_convert(ps_dec, &(ps_dec->s_disp_op), |
478 | 0 | ps_dec->u4_fmt_conv_cur_row, |
479 | 0 | ps_dec->u4_fmt_conv_num_rows); |
480 | 0 | ps_dec->u4_fmt_conv_cur_row += ps_dec->u4_fmt_conv_num_rows; |
481 | 0 | } |
482 | 0 | else |
483 | 0 | { |
484 | 0 | nop_cnt = 8*128; |
485 | 0 | ithread_yield(); |
486 | 0 | } |
487 | 11.3M | DEBUG_THREADS_PRINTF("waiting for mb mapcur_dec_mb_num = %d,ps_dec->u4_cur_mb_addr = %d\n",u2_cur_dec_mb_num, |
488 | 11.3M | ps_dec->u4_cur_mb_addr); |
489 | | |
490 | 11.3M | } |
491 | 11.5M | } |
492 | | |
493 | | |
494 | | |
495 | 122k | u4_mbaff = ps_dec->ps_cur_slice->u1_mbaff_frame_flag; |
496 | | |
497 | 122k | u2_first_mb_in_slice = ps_dec->ps_decode_cur_slice->u4_first_mb_in_slice; |
498 | | |
499 | 122k | i16_mb_x = MOD(u2_first_mb_in_slice, i2_pic_wdin_mbs); |
500 | 122k | i16_mb_y = DIV(u2_first_mb_in_slice, i2_pic_wdin_mbs); |
501 | 122k | i16_mb_y <<= u4_mbaff; |
502 | 122k | ps_dec->i2_dec_thread_mb_y = i16_mb_y; |
503 | | |
504 | | |
505 | 122k | ps_dec->cur_dec_mb_num = u2_first_mb_in_slice << u4_mbaff; |
506 | | |
507 | 122k | if((ps_dec->u4_num_cores == 2) || !ps_dec->i1_recon_in_thread3_flag) |
508 | 79.3k | { |
509 | 79.3k | ps_dec->pv_proc_tu_coeff_data = |
510 | 79.3k | (void *) ps_dec->ps_decode_cur_slice->pv_tu_coeff_data_start; |
511 | 79.3k | } |
512 | | |
513 | | // recalculate recon pointers |
514 | 122k | u1_field_pic = ps_dec->ps_cur_slice->u1_field_pic_flag; |
515 | 122k | u4_frame_stride = ps_dec->u2_frm_wd_y << u1_field_pic; |
516 | 122k | x_offset = i16_mb_x << 4; |
517 | 122k | y_offset = (i16_mb_y * u4_frame_stride) << 4; |
518 | | |
519 | 122k | ps_trns_addr = &(ps_dec->s_tran_addrecon); |
520 | | |
521 | 122k | ps_trns_addr->pu1_dest_y = ps_dec->s_cur_pic.pu1_buf1 + x_offset + y_offset; |
522 | | |
523 | 122k | u4_frame_stride = ps_dec->u2_frm_wd_uv << u1_field_pic; |
524 | 122k | x_offset >>= 1; |
525 | 122k | y_offset = (i16_mb_y * u4_frame_stride) << 3; |
526 | | |
527 | 122k | x_offset *= YUV420SP_FACTOR; |
528 | | |
529 | 122k | ps_trns_addr->pu1_dest_u = ps_dec->s_cur_pic.pu1_buf2 + x_offset + y_offset; |
530 | 122k | ps_trns_addr->pu1_dest_v = ps_dec->s_cur_pic.pu1_buf3 + x_offset + y_offset; |
531 | | |
532 | 122k | ps_trns_addr->pu1_mb_y = ps_trns_addr->pu1_dest_y; |
533 | 122k | ps_trns_addr->pu1_mb_u = ps_trns_addr->pu1_dest_u; |
534 | 122k | ps_trns_addr->pu1_mb_v = ps_trns_addr->pu1_dest_v; |
535 | | |
536 | | |
537 | | /* Initialise MC and formMbPartInfo fn ptrs one time based on profile_idc */ |
538 | 122k | { |
539 | 122k | ps_dec->p_mc_dec_thread = ih264d_motion_compensate_bp; |
540 | 122k | ps_dec->p_form_mb_part_info_thread = ih264d_form_mb_part_info_bp; |
541 | 122k | } |
542 | 122k | { |
543 | 122k | UWORD8 uc_nofield_nombaff; |
544 | 122k | uc_nofield_nombaff = ((ps_dec->ps_cur_slice->u1_field_pic_flag == 0) |
545 | 118k | && (ps_dec->ps_cur_slice->u1_mbaff_frame_flag == 0) |
546 | 117k | && (ps_dec->ps_decode_cur_slice->slice_type != B_SLICE) |
547 | 91.4k | && (ps_dec->ps_cur_pps->u1_wted_pred_flag == 0)); |
548 | | |
549 | 122k | if(uc_nofield_nombaff == 0) |
550 | 75.8k | { |
551 | 75.8k | ps_dec->p_mc_dec_thread = ih264d_motion_compensate_mp; |
552 | 75.8k | ps_dec->p_form_mb_part_info_thread = ih264d_form_mb_part_info_mp; |
553 | 75.8k | } |
554 | | |
555 | 122k | } |
556 | | |
557 | 122k | ps_dec->u4_cur_slice_decode_done = 0; |
558 | | |
559 | | |
560 | 1.35M | while(ps_dec->u4_cur_slice_decode_done != 1) |
561 | 1.23M | { |
562 | | |
563 | 1.23M | u4_num_mbsleft = ((i2_pic_wdin_mbs - i16_mb_x) << u4_mbaff); |
564 | | |
565 | 1.23M | if(u4_num_mbsleft <= ps_dec->u4_recon_mb_grp) |
566 | 1.23M | { |
567 | 1.23M | u4_num_mbs = u4_num_mbsleft; |
568 | | |
569 | | /*Indicate number of mb's left in a row*/ |
570 | 1.23M | u4_num_mbs_next = 0; |
571 | 1.23M | u4_end_of_row = 1; |
572 | 1.23M | i16_mb_x = 0; |
573 | 1.23M | } |
574 | 0 | else |
575 | 0 | { |
576 | 0 | u4_num_mbs = ps_dec->u4_recon_mb_grp; |
577 | | |
578 | | /*Indicate number of mb's left in a row*/ |
579 | 0 | u4_num_mbs_next = i2_pic_wdin_mbs - i16_mb_x |
580 | 0 | - (ps_dec->u4_recon_mb_grp >> u4_mbaff); |
581 | 0 | i16_mb_x += (u4_num_mbs >> u4_mbaff); |
582 | 0 | u4_end_of_row = 0; |
583 | |
|
584 | 0 | } |
585 | 1.23M | ret = ih264d_decode_recon_tfr_nmb_thread(ps_dec, u4_num_mbs, u4_num_mbs_next, |
586 | 1.23M | u4_end_of_row); |
587 | 1.23M | if(ret != OK) |
588 | 0 | return ret; |
589 | 1.23M | } |
590 | 122k | return OK; |
591 | 122k | } |
592 | | |
593 | | void ih264d_decode_picture_thread(dec_struct_t *ps_dec ) |
594 | 100k | { |
595 | 100k | ithread_set_name("ih264d_decode_picture_thread"); |
596 | | |
597 | 104k | while(1) |
598 | 104k | { |
599 | 104k | WORD32 ret; |
600 | 104k | if(ps_dec->i4_threads_active) |
601 | 4.41k | { |
602 | 4.41k | ret = ithread_mutex_lock(ps_dec->apv_proc_start_mutex[0]); |
603 | 4.41k | if(OK != ret) |
604 | 0 | break; |
605 | | |
606 | 8.67k | while(ps_dec->ai4_process_start[0] != PROC_START) |
607 | 4.25k | { |
608 | 4.25k | ithread_cond_wait(ps_dec->apv_proc_start_condition[0], |
609 | 4.25k | ps_dec->apv_proc_start_mutex[0]); |
610 | 4.25k | } |
611 | 4.41k | ps_dec->ai4_process_start[0] = PROC_IN_PROGRESS; |
612 | | |
613 | 4.41k | ret = ithread_mutex_unlock(ps_dec->apv_proc_start_mutex[0]); |
614 | 4.41k | if(OK != ret || ps_dec->i4_break_threads == 1) |
615 | 162 | break; |
616 | 4.41k | } |
617 | 122k | while(1) |
618 | 122k | { |
619 | | /*Complete all writes before processing next slice*/ |
620 | | |
621 | 122k | DEBUG_THREADS_PRINTF(" Entering decode slice\n"); |
622 | | |
623 | 122k | ih264d_decode_slice_thread(ps_dec); |
624 | 122k | DEBUG_THREADS_PRINTF(" Exit ih264d_decode_slice_thread \n"); |
625 | | |
626 | | |
627 | 122k | if(ps_dec->cur_dec_mb_num |
628 | 122k | > ps_dec->ps_cur_sps->u4_max_mb_addr) |
629 | 104k | { |
630 | | /*Last slice in frame*/ |
631 | 104k | break; |
632 | 104k | } |
633 | 18.1k | else |
634 | 18.1k | { |
635 | 18.1k | ps_dec->ps_decode_cur_slice++; |
636 | 18.1k | ps_dec->u2_cur_slice_num_dec_thread++; |
637 | 18.1k | } |
638 | | |
639 | 122k | } |
640 | 104k | if(ps_dec->u4_output_present && (2 == ps_dec->u4_num_cores) && |
641 | 29.4k | (ps_dec->u4_fmt_conv_cur_row < ps_dec->s_disp_frame_info.u4_y_ht)) |
642 | 4.85k | { |
643 | 4.85k | ps_dec->u4_fmt_conv_num_rows = |
644 | 4.85k | (ps_dec->s_disp_frame_info.u4_y_ht |
645 | 4.85k | - ps_dec->u4_fmt_conv_cur_row); |
646 | 4.85k | ih264d_format_convert(ps_dec, &(ps_dec->s_disp_op), |
647 | 4.85k | ps_dec->u4_fmt_conv_cur_row, |
648 | 4.85k | ps_dec->u4_fmt_conv_num_rows); |
649 | 4.85k | ps_dec->u4_fmt_conv_cur_row += ps_dec->u4_fmt_conv_num_rows; |
650 | 4.85k | } |
651 | | |
652 | 104k | if(ps_dec->i4_threads_active) |
653 | 4.25k | { |
654 | 4.25k | ret = ithread_mutex_lock(ps_dec->apv_proc_done_mutex[0]); |
655 | 4.25k | if(OK != ret) |
656 | 0 | break; |
657 | | |
658 | 4.25k | ps_dec->ai4_process_done[0] = PROC_DONE; |
659 | 4.25k | ithread_cond_signal(ps_dec->apv_proc_done_condition[0]); |
660 | | |
661 | 4.25k | ret = ithread_mutex_unlock(ps_dec->apv_proc_done_mutex[0]); |
662 | 4.25k | if(OK != ret) |
663 | 0 | break; |
664 | 4.25k | } |
665 | 100k | else |
666 | 100k | { |
667 | 100k | break; |
668 | 100k | } |
669 | 104k | } |
670 | 100k | } |
671 | | |
672 | | void ih264d_signal_decode_thread(dec_struct_t *ps_dec) |
673 | 184k | { |
674 | 184k | if(ps_dec->u4_dec_thread_created == 1) |
675 | 127k | { |
676 | 127k | if(ps_dec->i4_threads_active) |
677 | 5.22k | { |
678 | 5.22k | proc_state_t i4_process_state; |
679 | 5.22k | ithread_mutex_lock(ps_dec->apv_proc_start_mutex[0]); |
680 | 5.22k | i4_process_state = ps_dec->ai4_process_start[0]; |
681 | 5.22k | ithread_mutex_unlock(ps_dec->apv_proc_start_mutex[0]); |
682 | | |
683 | | // only wait if the thread has started decoding |
684 | 5.22k | if(i4_process_state != PROC_INIT) |
685 | 4.25k | { |
686 | 4.25k | ithread_mutex_lock(ps_dec->apv_proc_done_mutex[0]); |
687 | | |
688 | 8.16k | while(ps_dec->ai4_process_done[0] != PROC_DONE) |
689 | 3.90k | { |
690 | 3.90k | ithread_cond_wait(ps_dec->apv_proc_done_condition[0], |
691 | 3.90k | ps_dec->apv_proc_done_mutex[0]); |
692 | 3.90k | } |
693 | 4.25k | ps_dec->ai4_process_done[0] = PROC_INIT; |
694 | 4.25k | ithread_mutex_unlock(ps_dec->apv_proc_done_mutex[0]); |
695 | 4.25k | } |
696 | 5.22k | } |
697 | 121k | else |
698 | 121k | { |
699 | 121k | ithread_join(ps_dec->pv_dec_thread_handle, NULL); |
700 | 121k | ps_dec->u4_dec_thread_created = 0; |
701 | 121k | } |
702 | 127k | } |
703 | 184k | } |
704 | | void ih264d_signal_bs_deblk_thread(dec_struct_t *ps_dec) |
705 | 95.4k | { |
706 | 95.4k | if(ps_dec->u4_bs_deblk_thread_created) |
707 | 39.3k | { |
708 | 39.3k | if(ps_dec->i4_threads_active) |
709 | 2.41k | { |
710 | 2.41k | proc_state_t i4_process_state; |
711 | 2.41k | ithread_mutex_lock(ps_dec->apv_proc_start_mutex[1]); |
712 | 2.41k | i4_process_state = ps_dec->ai4_process_start[1]; |
713 | 2.41k | ithread_mutex_unlock(ps_dec->apv_proc_start_mutex[1]); |
714 | | |
715 | | // only wait if the thread has started deblking |
716 | 2.41k | if(i4_process_state != PROC_INIT) |
717 | 1.94k | { |
718 | 1.94k | ithread_mutex_lock(ps_dec->apv_proc_done_mutex[1]); |
719 | | |
720 | 2.25k | while(ps_dec->ai4_process_done[1] != PROC_DONE) |
721 | 311 | { |
722 | 311 | ithread_cond_wait(ps_dec->apv_proc_done_condition[1], |
723 | 311 | ps_dec->apv_proc_done_mutex[1]); |
724 | 311 | } |
725 | 1.94k | ps_dec->ai4_process_done[1] = PROC_INIT; |
726 | 1.94k | ithread_mutex_unlock(ps_dec->apv_proc_done_mutex[1]); |
727 | 1.94k | } |
728 | 2.41k | } |
729 | 36.9k | else |
730 | 36.9k | { |
731 | 36.9k | ithread_join(ps_dec->pv_bs_deblk_thread_handle, NULL); |
732 | 36.9k | ps_dec->u4_bs_deblk_thread_created = 0; |
733 | 36.9k | } |
734 | 39.3k | } |
735 | | |
736 | 95.4k | } |