/src/libhevc/encoder/ihevce_multi_thrd_funcs.c
Line | Count | Source (jump to first uncovered line) |
1 | | /****************************************************************************** |
2 | | * |
3 | | * Copyright (C) 2018 The Android Open Source Project |
4 | | * |
5 | | * Licensed under the Apache License, Version 2.0 (the "License"); |
6 | | * you may not use this file except in compliance with the License. |
7 | | * You may obtain a copy of the License at: |
8 | | * |
9 | | * http://www.apache.org/licenses/LICENSE-2.0 |
10 | | * |
11 | | * Unless required by applicable law or agreed to in writing, software |
12 | | * distributed under the License is distributed on an "AS IS" BASIS, |
13 | | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
14 | | * See the License for the specific language governing permissions and |
15 | | * limitations under the License. |
16 | | * |
17 | | ***************************************************************************** |
18 | | * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore |
19 | | */ |
20 | | /** |
21 | | ******************************************************************************* |
22 | | * @file |
23 | | * ihevce_multi_thread_funcs.c |
24 | | * |
25 | | * @brief |
26 | | * Contains functions related to Job Ques and others, required for multi threading |
27 | | * |
28 | | * @author |
29 | | * Ittiam |
30 | | * |
31 | | * @par List of Functions: |
32 | | * <TODO: TO BE ADDED> |
33 | | * |
34 | | * @remarks |
35 | | * None |
36 | | * |
37 | | ******************************************************************************* |
38 | | */ |
39 | | /*****************************************************************************/ |
40 | | /* File Includes */ |
41 | | /*****************************************************************************/ |
42 | | /* System include files */ |
43 | | #include <stdio.h> |
44 | | #include <string.h> |
45 | | #include <stdlib.h> |
46 | | #include <assert.h> |
47 | | #include <stdarg.h> |
48 | | #include <math.h> |
49 | | |
50 | | /* User include files */ |
51 | | #include "ihevc_typedefs.h" |
52 | | #include "itt_video_api.h" |
53 | | #include "ihevce_api.h" |
54 | | |
55 | | #include "rc_cntrl_param.h" |
56 | | #include "rc_frame_info_collector.h" |
57 | | #include "rc_look_ahead_params.h" |
58 | | |
59 | | #include "ihevc_defs.h" |
60 | | #include "ihevc_structs.h" |
61 | | #include "ihevc_platform_macros.h" |
62 | | #include "ihevc_deblk.h" |
63 | | #include "ihevc_itrans_recon.h" |
64 | | #include "ihevc_chroma_itrans_recon.h" |
65 | | #include "ihevc_chroma_intra_pred.h" |
66 | | #include "ihevc_intra_pred.h" |
67 | | #include "ihevc_inter_pred.h" |
68 | | #include "ihevc_mem_fns.h" |
69 | | #include "ihevc_padding.h" |
70 | | #include "ihevc_weighted_pred.h" |
71 | | #include "ihevc_sao.h" |
72 | | #include "ihevc_resi_trans.h" |
73 | | #include "ihevc_quant_iquant_ssd.h" |
74 | | #include "ihevc_cabac_tables.h" |
75 | | |
76 | | #include "ihevce_defs.h" |
77 | | #include "ihevce_lap_enc_structs.h" |
78 | | #include "ihevce_multi_thrd_structs.h" |
79 | | #include "ihevce_multi_thrd_funcs.h" |
80 | | #include "ihevce_me_common_defs.h" |
81 | | #include "ihevce_had_satd.h" |
82 | | #include "ihevce_error_codes.h" |
83 | | #include "ihevce_bitstream.h" |
84 | | #include "ihevce_cabac.h" |
85 | | #include "ihevce_rdoq_macros.h" |
86 | | #include "ihevce_function_selector.h" |
87 | | #include "ihevce_enc_structs.h" |
88 | | #include "ihevce_entropy_structs.h" |
89 | | #include "ihevce_cmn_utils_instr_set_router.h" |
90 | | #include "ihevce_enc_loop_structs.h" |
91 | | #include "ihevce_bs_compute_ctb.h" |
92 | | #include "ihevce_global_tables.h" |
93 | | #include "ihevce_dep_mngr_interface.h" |
94 | | #include "hme_datatype.h" |
95 | | #include "hme_interface.h" |
96 | | #include "hme_common_defs.h" |
97 | | #include "hme_defs.h" |
98 | | #include "ihevce_me_instr_set_router.h" |
99 | | #include "ihevce_ipe_instr_set_router.h" |
100 | | #include "ihevce_ipe_structs.h" |
101 | | #include "ihevce_coarse_me_pass.h" |
102 | | |
103 | | #include "cast_types.h" |
104 | | #include "osal.h" |
105 | | #include "osal_defaults.h" |
106 | | |
107 | | /********************************************************************/ |
108 | | /*Macros */ |
109 | | /********************************************************************/ |
110 | | #define MULT_FACT 100 |
111 | | |
112 | | /*****************************************************************************/ |
113 | | /* Function Definitions */ |
114 | | /*****************************************************************************/ |
115 | | static inline WORD32 ihevce_is_nonzero(volatile UWORD8 *buf, WORD32 size) |
116 | 3.88M | { |
117 | 3.88M | WORD32 i; |
118 | 314M | for (i = 0; i < size; i++) |
119 | 310M | { |
120 | 310M | if (buf[i]) |
121 | 0 | return 1; |
122 | 310M | } |
123 | 3.88M | return 0; |
124 | 3.88M | } |
125 | | /** |
126 | | ******************************************************************************* |
127 | | * |
128 | | * @brief Function Pops out the next Job in the appropriate Job Que |
129 | | * |
130 | | * @par Description: Does under mutex lock to ensure thread safe |
131 | | * |
132 | | * @param[inout] pv_multi_thrd_ctxt |
133 | | * Pointer to Multi thread context |
134 | | * |
135 | | * @param[in] i4_job_type |
136 | | * Job type from which a job needs to be popped out |
137 | | * |
138 | | * @param[in] i4_blocking_mode |
139 | | * Mode of operation |
140 | | * |
141 | | * @returns |
142 | | * None |
143 | | * |
144 | | * @remarks |
145 | | * |
146 | | ******************************************************************************* |
147 | | */ |
148 | | void *ihevce_pre_enc_grp_get_next_job( |
149 | | void *pv_multi_thrd_ctxt, WORD32 i4_job_type, WORD32 i4_blocking_mode, WORD32 i4_ping_pong) |
150 | 4.60M | { |
151 | | /* Local variables */ |
152 | 4.60M | multi_thrd_ctxt_t *ps_multi_thrd; |
153 | 4.60M | job_queue_handle_t *ps_job_queue_hdl; |
154 | 4.60M | void *pv_next = NULL; |
155 | 4.60M | void *pv_job_q_mutex_hdl_pre_enc = NULL; |
156 | | |
157 | | /* Derive local variables */ |
158 | 4.60M | ps_multi_thrd = (multi_thrd_ctxt_t *)pv_multi_thrd_ctxt; |
159 | 4.60M | ps_job_queue_hdl = |
160 | 4.60M | (job_queue_handle_t *)&ps_multi_thrd->as_job_que_preenc_hdls[i4_ping_pong][i4_job_type]; |
161 | | |
162 | | /* lock the mutex for Q access */ |
163 | | /* As design must facilitate for parallelism in each stage, |
164 | | It is recommended to have seperate mutex for each stage*/ |
165 | 4.60M | if(i4_job_type < ME_JOB_LYR4) |
166 | 2.17M | { |
167 | 2.17M | pv_job_q_mutex_hdl_pre_enc = ps_multi_thrd->pv_job_q_mutex_hdl_pre_enc_decomp; |
168 | 2.17M | } |
169 | 2.43M | else if(i4_job_type < IPE_JOB_LYR0) |
170 | 2.08M | { |
171 | 2.08M | pv_job_q_mutex_hdl_pre_enc = ps_multi_thrd->pv_job_q_mutex_hdl_pre_enc_hme; |
172 | 2.08M | } |
173 | 347k | else |
174 | 347k | { |
175 | 347k | pv_job_q_mutex_hdl_pre_enc = ps_multi_thrd->pv_job_q_mutex_hdl_pre_enc_l0ipe; |
176 | 347k | } |
177 | | |
178 | 4.60M | osal_mutex_lock(pv_job_q_mutex_hdl_pre_enc); |
179 | | /* Get the next */ |
180 | 4.60M | pv_next = ps_job_queue_hdl->pv_next; |
181 | | |
182 | | /* Update the next by checking input dependency */ |
183 | 4.60M | if(NULL != pv_next) |
184 | 3.55M | { |
185 | 3.55M | job_queue_t *ps_job_queue = (job_queue_t *)pv_next; |
186 | | |
187 | | /* check for input dependencies to be resolved */ |
188 | | /* this can be blocking or non blocking based on use case */ |
189 | | /* if non blocking then the function returns NULL */ |
190 | | |
191 | 3.55M | if(1 == i4_blocking_mode) |
192 | 3.55M | { |
193 | 3.55M | while(ihevce_is_nonzero(ps_job_queue->au1_in_dep, MAX_IN_DEP)); |
194 | | |
195 | | /* update the next job in the queue */ |
196 | 3.55M | ps_job_queue_hdl->pv_next = ps_job_queue->pv_next; |
197 | 3.55M | } |
198 | 0 | else |
199 | 0 | { |
200 | | /* check for input dependency resolved */ |
201 | 0 | if(ihevce_is_nonzero(ps_job_queue->au1_in_dep, MAX_IN_DEP)) |
202 | 0 | { |
203 | | /* return null */ |
204 | 0 | pv_next = NULL; |
205 | 0 | } |
206 | 0 | else |
207 | 0 | { |
208 | | /* update the next job in the queue */ |
209 | 0 | ps_job_queue_hdl->pv_next = ps_job_queue->pv_next; |
210 | 0 | } |
211 | 0 | } |
212 | 3.55M | } |
213 | | |
214 | | /* unlock the mutex */ |
215 | 4.60M | osal_mutex_unlock(pv_job_q_mutex_hdl_pre_enc); |
216 | | |
217 | | /* Return */ |
218 | 4.60M | return (pv_next); |
219 | | |
220 | 4.60M | } /* End of get_next_job */ |
221 | | |
222 | | /** |
223 | | ******************************************************************************* |
224 | | * |
225 | | * @brief Function Pops out the next Job in the appropriate Job Que |
226 | | * |
227 | | * @par Description: Does under mutex lock to ensure thread safe |
228 | | * |
229 | | * @param[inout] pv_multi_thrd_ctxt |
230 | | * Pointer to Multi thread context |
231 | | * |
232 | | * @param[in] i4_job_type |
233 | | * Job type from which a job needs to be popped out |
234 | | * |
235 | | * @param[in] i4_blocking_mode |
236 | | * Mode of operation |
237 | | * |
238 | | * @returns |
239 | | * None |
240 | | * |
241 | | * @remarks |
242 | | * |
243 | | ******************************************************************************* |
244 | | */ |
245 | | void *ihevce_enc_grp_get_next_job( |
246 | | void *pv_multi_thrd_ctxt, WORD32 i4_job_type, WORD32 i4_blocking_mode, WORD32 i4_curr_frm_id) |
247 | 638k | { |
248 | | /* Local variables */ |
249 | 638k | multi_thrd_ctxt_t *ps_multi_thrd; |
250 | 638k | job_queue_handle_t *ps_job_queue_hdl; |
251 | 638k | void *pv_next = NULL; |
252 | 638k | void *pv_job_q_mutex_hdl_enc_grp; |
253 | | |
254 | | /* Derive local variables */ |
255 | 638k | ps_multi_thrd = (multi_thrd_ctxt_t *)pv_multi_thrd_ctxt; |
256 | | |
257 | 638k | if(ME_JOB_ENC_LYR == i4_job_type) |
258 | 265k | { |
259 | 265k | pv_job_q_mutex_hdl_enc_grp = ps_multi_thrd->pv_job_q_mutex_hdl_enc_grp_me; |
260 | | |
261 | 265k | ps_job_queue_hdl = (job_queue_handle_t *)&ps_multi_thrd->aps_cur_out_me_prms[i4_curr_frm_id] |
262 | 265k | ->as_job_que_enc_hdls[i4_job_type]; |
263 | 265k | } |
264 | 372k | else |
265 | 372k | { |
266 | 372k | pv_job_q_mutex_hdl_enc_grp = ps_multi_thrd->pv_job_q_mutex_hdl_enc_grp_enc_loop; |
267 | 372k | ps_job_queue_hdl = |
268 | 372k | (job_queue_handle_t *)&ps_multi_thrd->aps_cur_inp_enc_prms[i4_curr_frm_id] |
269 | 372k | ->as_job_que_enc_hdls[i4_job_type]; |
270 | 372k | } |
271 | | |
272 | | /* lock the mutex for Q access */ |
273 | 638k | osal_mutex_lock(pv_job_q_mutex_hdl_enc_grp); |
274 | | |
275 | | /* Get the next */ |
276 | 638k | pv_next = ps_job_queue_hdl->pv_next; |
277 | | |
278 | | /* Update the next by checking input dependency */ |
279 | 638k | if(NULL != pv_next) |
280 | 331k | { |
281 | 331k | job_queue_t *ps_job_queue = (job_queue_t *)pv_next; |
282 | | |
283 | | /* check for input dependencies to be resolved */ |
284 | | /* this can be blocking or non blocking based on use case */ |
285 | | /* if non blocking then the function returns NULL */ |
286 | | |
287 | 331k | if(1 == i4_blocking_mode) |
288 | 331k | { |
289 | 331k | while(ihevce_is_nonzero(ps_job_queue->au1_in_dep, MAX_IN_DEP)); |
290 | | |
291 | | /* update the next job in the queue */ |
292 | 331k | ps_job_queue_hdl->pv_next = ps_job_queue->pv_next; |
293 | 331k | } |
294 | 0 | else |
295 | 0 | { |
296 | | /* check for input dependency resolved */ |
297 | 0 | if(ihevce_is_nonzero(ps_job_queue->au1_in_dep, MAX_IN_DEP)) |
298 | 0 | { |
299 | | /* return null */ |
300 | 0 | pv_next = NULL; |
301 | 0 | } |
302 | 0 | else |
303 | 0 | { |
304 | | /* update the next job in the queue */ |
305 | 0 | ps_job_queue_hdl->pv_next = ps_job_queue->pv_next; |
306 | 0 | } |
307 | 0 | } |
308 | 331k | } |
309 | | |
310 | | /* unlock the mutex */ |
311 | 638k | osal_mutex_unlock(pv_job_q_mutex_hdl_enc_grp); |
312 | | |
313 | | /* Return */ |
314 | 638k | return (pv_next); |
315 | | |
316 | 638k | } /* End of get_next_job */ |
317 | | |
318 | | /** |
319 | | ******************************************************************************* |
320 | | * |
321 | | * @brief Set the output dependency to done state |
322 | | * |
323 | | * @par Description: same as brief |
324 | | * |
325 | | * @param[inout] pv_multi_thrd_ctxt |
326 | | * Pointer to Multi thread context |
327 | | * |
328 | | * @param[in] ps_curr_job |
329 | | * Current finished Job pointer |
330 | | * |
331 | | * @returns |
332 | | * None |
333 | | * |
334 | | * @remarks |
335 | | * |
336 | | ******************************************************************************* |
337 | | */ |
338 | | void ihevce_pre_enc_grp_job_set_out_dep( |
339 | | void *pv_multi_thrd_ctxt, job_queue_t *ps_curr_job, WORD32 i4_ping_pong) |
340 | 3.55M | { |
341 | | /* local vareiables */ |
342 | 3.55M | WORD32 ctr; |
343 | 3.55M | multi_thrd_ctxt_t *ps_multi_thrd; |
344 | | |
345 | 3.55M | ps_multi_thrd = (multi_thrd_ctxt_t *)pv_multi_thrd_ctxt; |
346 | | |
347 | | /* loop over number output dependencies */ |
348 | 13.4M | for(ctr = 0; ctr < ps_curr_job->i4_num_output_dep; ctr++) |
349 | 9.90M | { |
350 | 9.90M | UWORD8 *pu1_ptr; |
351 | | |
352 | 9.90M | pu1_ptr = (UWORD8 *)ps_multi_thrd->aps_job_q_pre_enc[i4_ping_pong]; |
353 | 9.90M | pu1_ptr += ps_curr_job->au4_out_ofsts[ctr]; |
354 | 9.90M | *pu1_ptr = 0; |
355 | 9.90M | } |
356 | | |
357 | 3.55M | return; |
358 | 3.55M | } |
359 | | |
360 | | /** |
361 | | ******************************************************************************* |
362 | | * |
363 | | * @brief Set the output dependency to done state |
364 | | * |
365 | | * @par Description: same as brief |
366 | | * |
367 | | * @param[inout] pv_multi_thrd_ctxt |
368 | | * Pointer to Multi thread context |
369 | | * |
370 | | * @param[in] ps_curr_job |
371 | | * Current finished Job pointer |
372 | | * |
373 | | * @returns |
374 | | * None |
375 | | * |
376 | | * @remarks |
377 | | * |
378 | | ******************************************************************************* |
379 | | */ |
380 | | void ihevce_enc_grp_job_set_out_dep( |
381 | | void *pv_multi_thrd_ctxt, job_queue_t *ps_curr_job, WORD32 i4_curr_frm_id) |
382 | 331k | { |
383 | | /* local vareiables */ |
384 | 331k | WORD32 ctr; |
385 | 331k | UWORD8 *pu1_ptr; |
386 | 331k | multi_thrd_ctxt_t *ps_multi_thrd; |
387 | | |
388 | 331k | ps_multi_thrd = (multi_thrd_ctxt_t *)pv_multi_thrd_ctxt; |
389 | | |
390 | 331k | if(ME_JOB_ENC_LYR == ps_curr_job->i4_task_type) |
391 | 136k | { |
392 | 136k | pu1_ptr = (UWORD8 *)ps_multi_thrd->aps_cur_out_me_prms[i4_curr_frm_id]->ps_job_q_enc; |
393 | 136k | } |
394 | 195k | else |
395 | 195k | { |
396 | 195k | pu1_ptr = (UWORD8 *)ps_multi_thrd->aps_cur_inp_enc_prms[i4_curr_frm_id]->ps_job_q_enc; |
397 | 195k | } |
398 | | |
399 | | /* loop over number output dependencies */ |
400 | 467k | for(ctr = 0; ctr < ps_curr_job->i4_num_output_dep; ctr++) |
401 | 136k | { |
402 | 136k | WORD32 i4_off; |
403 | 136k | i4_off = ps_curr_job->au4_out_ofsts[ctr]; |
404 | 136k | pu1_ptr[i4_off] = 0; |
405 | 136k | } |
406 | | |
407 | 331k | return; |
408 | 331k | } |
409 | | |
410 | | /** |
411 | | ******************************************************************************* |
412 | | * |
413 | | * @brief Function prepares the Job Queues for all the passes of encoder |
414 | | * |
415 | | * @par Description: Based on picture type sets the input and output dependency |
416 | | * |
417 | | * @param[inout] pv_enc_ctxt |
418 | | * Pointer to encoder context |
419 | | * |
420 | | * @param[in] ps_curr_inp |
421 | | * Current Input buffer pointer |
422 | | * |
423 | | * @returns |
424 | | * None |
425 | | * |
426 | | * @remarks |
427 | | * |
428 | | ******************************************************************************* |
429 | | */ |
430 | | void ihevce_prepare_job_queue( |
431 | | void *pv_enc_ctxt, ihevce_lap_enc_buf_t *ps_curr_inp, WORD32 i4_curr_frm_id) |
432 | 177k | { |
433 | | /* local variables */ |
434 | 177k | enc_ctxt_t *ps_ctxt; |
435 | 177k | job_queue_t *ps_me_job_queue_lyr0; |
436 | 177k | job_queue_t *ps_enc_loop_job_queue; |
437 | 177k | WORD32 pass; |
438 | 177k | WORD32 num_jobs, col_tile_ctr; |
439 | 177k | WORD32 num_ctb_vert_rows; |
440 | 177k | WORD32 i4_pic_type; |
441 | 177k | WORD32 i; //counter for bitrate |
442 | 177k | WORD32 i4_num_bitrate_instances; |
443 | 177k | WORD32 i4_num_tile_col; |
444 | | |
445 | | /* derive local varaibles */ |
446 | 177k | ps_ctxt = (enc_ctxt_t *)pv_enc_ctxt; |
447 | 177k | num_ctb_vert_rows = ps_ctxt->s_frm_ctb_prms.i4_num_ctbs_vert; |
448 | 177k | i4_num_bitrate_instances = ps_ctxt->i4_num_bitrates; |
449 | | |
450 | 177k | i4_num_tile_col = 1; |
451 | 177k | if(1 == ps_ctxt->ps_tile_params_base->i4_tiles_enabled_flag) |
452 | 0 | { |
453 | 0 | i4_num_tile_col = ps_ctxt->ps_tile_params_base->i4_num_tile_cols; |
454 | 0 | } |
455 | | /* memset the entire job que buffer to zero */ |
456 | 177k | memset( |
457 | 177k | ps_ctxt->s_multi_thrd.aps_cur_out_me_prms[i4_curr_frm_id]->ps_job_q_enc, |
458 | 177k | 0, |
459 | 177k | MAX_NUM_VERT_UNITS_FRM * NUM_ENC_JOBS_QUES * i4_num_tile_col * sizeof(job_queue_t)); |
460 | | |
461 | | /* get the start address of Job queues */ |
462 | 177k | ps_me_job_queue_lyr0 = ps_ctxt->s_multi_thrd.aps_cur_out_me_prms[i4_curr_frm_id]->ps_job_q_enc; |
463 | 177k | ps_enc_loop_job_queue = ps_me_job_queue_lyr0 + (i4_num_tile_col * MAX_NUM_VERT_UNITS_FRM); |
464 | | |
465 | | /* store the JOB queue in the Job handle */ |
466 | 177k | ps_ctxt->s_multi_thrd.aps_cur_out_me_prms[i4_curr_frm_id] |
467 | 177k | ->as_job_que_enc_hdls[ME_JOB_ENC_LYR] |
468 | 177k | .pv_next = (void *)ps_me_job_queue_lyr0; |
469 | | /* store the JOB queue in the Job handle for reenc */ |
470 | 177k | ps_ctxt->s_multi_thrd.aps_cur_out_me_prms[i4_curr_frm_id] |
471 | 177k | ->as_job_que_enc_hdls_reenc[ME_JOB_ENC_LYR] |
472 | 177k | .pv_next = (void *)ps_me_job_queue_lyr0; |
473 | | |
474 | 355k | for(i = 0; i < i4_num_bitrate_instances; i++) |
475 | 177k | { |
476 | 177k | ps_ctxt->s_multi_thrd.aps_cur_out_me_prms[i4_curr_frm_id] |
477 | 177k | ->as_job_que_enc_hdls[ENC_LOOP_JOB + i] |
478 | 177k | .pv_next = (void *)ps_enc_loop_job_queue; |
479 | 177k | ps_ctxt->s_multi_thrd.aps_cur_out_me_prms[i4_curr_frm_id] |
480 | 177k | ->as_job_que_enc_hdls_reenc[ENC_LOOP_JOB + i] |
481 | 177k | .pv_next = (void *)ps_enc_loop_job_queue; |
482 | 177k | ps_enc_loop_job_queue += (i4_num_tile_col * MAX_NUM_VERT_UNITS_FRM); |
483 | 177k | } |
484 | | |
485 | 177k | i4_pic_type = ps_curr_inp->s_lap_out.i4_pic_type; |
486 | | |
487 | | //prepare ME JOB queue first |
488 | | //for(pass = 0; pass < NUM_ENC_JOBS_QUES; pass++) |
489 | 177k | { |
490 | 177k | job_queue_t *ps_job_queue_curr; |
491 | 177k | job_queue_t *ps_job_queue_next; |
492 | 177k | WORD32 ctr; |
493 | 177k | WORD32 inp_dep; |
494 | 177k | WORD32 out_dep; |
495 | 177k | WORD32 num_vert_units; |
496 | 177k | HEVCE_ENC_JOB_TYPES_T task_type; |
497 | | |
498 | 177k | pass = 0; //= ENC_LOOP_JOB |
499 | | |
500 | 177k | { |
501 | | /* num_ver_units of finest layer is stored at (num_hme_lyrs - 1)th index */ |
502 | 177k | num_vert_units = num_ctb_vert_rows; |
503 | 177k | task_type = ME_JOB_ENC_LYR; |
504 | 177k | ps_job_queue_curr = ps_me_job_queue_lyr0; |
505 | 177k | ps_job_queue_next = |
506 | 177k | (job_queue_t *)ps_ctxt->s_multi_thrd.aps_cur_out_me_prms[i4_curr_frm_id] |
507 | 177k | ->as_job_que_enc_hdls[ENC_LOOP_JOB] |
508 | 177k | .pv_next; |
509 | 177k | inp_dep = 0; |
510 | 177k | out_dep = 1; //set reference bit-rate's input dependency |
511 | 177k | } |
512 | | |
513 | 177k | if((ME_JOB_ENC_LYR == pass) && |
514 | 177k | ((IV_I_FRAME == i4_pic_type) || (IV_IDR_FRAME == i4_pic_type)) && !L0ME_IN_OPENLOOP_MODE) |
515 | 48.0k | { |
516 | | //continue; |
517 | 48.0k | } |
518 | 129k | else |
519 | 129k | { |
520 | | /* loop over all the vertical rows */ |
521 | 265k | for(num_jobs = 0; num_jobs < num_vert_units; num_jobs++) |
522 | 136k | { |
523 | | /* loop over all the column tiles */ |
524 | 272k | for(col_tile_ctr = 0; col_tile_ctr < i4_num_tile_col; col_tile_ctr++) |
525 | 136k | { |
526 | 136k | ULWORD64 u8_temp; |
527 | | |
528 | 136k | { |
529 | 136k | ps_job_queue_curr->s_job_info.s_me_job_info.i4_vert_unit_row_no = num_jobs; |
530 | 136k | ps_job_queue_curr->s_job_info.s_me_job_info.i4_tile_col_idx = col_tile_ctr; |
531 | 136k | } |
532 | | |
533 | 136k | ps_job_queue_curr->pv_next = (void *)(ps_job_queue_curr + 1); |
534 | | |
535 | 136k | ps_job_queue_curr->i4_task_type = task_type; |
536 | | |
537 | 136k | ps_job_queue_curr->i4_num_input_dep = inp_dep; |
538 | | |
539 | | /* set the entire input dep buffer to default value 0 */ |
540 | 136k | memset(&ps_job_queue_curr->au1_in_dep[0], 0, sizeof(UWORD8) * MAX_IN_DEP); |
541 | | |
542 | | /* set the input dep buffer to 1 for num inp dep */ |
543 | 136k | if(0 != inp_dep) |
544 | 0 | { |
545 | 0 | memset(&ps_job_queue_curr->au1_in_dep[0], 1, sizeof(UWORD8) * inp_dep); |
546 | 0 | } |
547 | | |
548 | 136k | ps_job_queue_curr->i4_num_output_dep = out_dep; |
549 | | |
550 | | /* set the entire offset buffer to default value */ |
551 | 136k | memset( |
552 | 136k | &ps_job_queue_curr->au4_out_ofsts[0], 0xFF, sizeof(UWORD32) * MAX_OUT_DEP); |
553 | | |
554 | 272k | for(ctr = 0; ctr < out_dep; ctr++) |
555 | 136k | { |
556 | | /* col tile level dependency b/w ME & EncLoop */ |
557 | 136k | u8_temp = (ULWORD64)( |
558 | 136k | &ps_job_queue_next[num_jobs * i4_num_tile_col + col_tile_ctr] - |
559 | 136k | ps_ctxt->s_multi_thrd.aps_cur_out_me_prms[i4_curr_frm_id]->ps_job_q_enc); |
560 | | |
561 | 136k | u8_temp *= sizeof(job_queue_t); |
562 | | |
563 | | /* store the offset to the array */ |
564 | 136k | ps_job_queue_curr->au4_out_ofsts[ctr] = (UWORD32)u8_temp; |
565 | 136k | } |
566 | | |
567 | 136k | ps_job_queue_curr++; |
568 | 136k | } |
569 | 136k | } //for ends |
570 | | |
571 | | /* set the last pointer to NULL */ |
572 | 129k | ps_job_queue_curr--; |
573 | 129k | ps_job_queue_curr->pv_next = (void *)NULL; |
574 | 129k | } //else ends |
575 | 177k | } |
576 | | |
577 | | //prepare Enc_loop JOB queue for all bitrate instances |
578 | | //for(pass = 0; pass < NUM_ENC_JOBS_QUES; pass++) |
579 | 355k | for(i = 0; i < i4_num_bitrate_instances; i++) |
580 | 177k | { |
581 | 177k | job_queue_t *ps_job_queue_curr; |
582 | 177k | job_queue_t *ps_job_queue_next; |
583 | 177k | WORD32 ctr; |
584 | 177k | WORD32 inp_dep; |
585 | 177k | WORD32 out_dep; |
586 | 177k | WORD32 num_vert_units; |
587 | 177k | HEVCE_ENC_JOB_TYPES_T task_type; |
588 | | |
589 | | /* In case of I or IDR pictures ME will not perform any processing */ |
590 | | //if(ENC_LOOP_JOB == pass) |
591 | 177k | { |
592 | 177k | if(((IV_I_FRAME == i4_pic_type) || (IV_IDR_FRAME == i4_pic_type)) && |
593 | 177k | !L0ME_IN_OPENLOOP_MODE) |
594 | 48.0k | { |
595 | 48.0k | inp_dep = 0; |
596 | 48.0k | } |
597 | 129k | else |
598 | 129k | { |
599 | 129k | inp_dep = 1; |
600 | 129k | } |
601 | | |
602 | 177k | task_type = (HEVCE_ENC_JOB_TYPES_T)(ENC_LOOP_JOB + i); |
603 | 177k | ps_job_queue_curr = |
604 | 177k | (job_queue_t *)ps_ctxt->s_multi_thrd.aps_cur_out_me_prms[i4_curr_frm_id] |
605 | 177k | ->as_job_que_enc_hdls[ENC_LOOP_JOB + i] |
606 | 177k | .pv_next; |
607 | 177k | ps_job_queue_next = |
608 | 177k | (job_queue_t *)ps_ctxt->s_multi_thrd.aps_cur_out_me_prms[i4_curr_frm_id] |
609 | 177k | ->as_job_que_enc_hdls[ENC_LOOP_JOB + i + 1] |
610 | 177k | .pv_next; |
611 | 177k | out_dep = 1; //output dependecny is the next bit-rate instance's input dependency |
612 | 177k | num_vert_units = num_ctb_vert_rows; |
613 | | |
614 | 177k | if(i == i4_num_bitrate_instances - 1) //for last bit-rate instance |
615 | 177k | { |
616 | | //clear output dependency |
617 | 177k | ps_job_queue_next = NULL; |
618 | 177k | out_dep = 0; |
619 | 177k | } |
620 | 177k | } |
621 | | |
622 | | /* loop over all the vertical rows */ |
623 | 372k | for(num_jobs = 0; num_jobs < num_vert_units; num_jobs++) |
624 | 195k | { |
625 | | /* loop over all the column tiles */ |
626 | 390k | for(col_tile_ctr = 0; col_tile_ctr < i4_num_tile_col; col_tile_ctr++) |
627 | 195k | { |
628 | 195k | ULWORD64 u8_temp; |
629 | | |
630 | 195k | { |
631 | 195k | ps_job_queue_curr->s_job_info.s_enc_loop_job_info.i4_ctb_row_no = num_jobs; |
632 | 195k | ps_job_queue_curr->s_job_info.s_enc_loop_job_info.i4_tile_col_idx = |
633 | 195k | col_tile_ctr; |
634 | 195k | ps_job_queue_curr->s_job_info.s_enc_loop_job_info.i4_bitrate_instance_no = i; |
635 | 195k | } |
636 | | |
637 | 195k | ps_job_queue_curr->pv_next = (void *)(ps_job_queue_curr + 1); |
638 | | |
639 | 195k | ps_job_queue_curr->i4_task_type = task_type; |
640 | | |
641 | 195k | ps_job_queue_curr->i4_num_input_dep = inp_dep; |
642 | | |
643 | | /* set the entire input dep buffer to default value 0 */ |
644 | 195k | memset(&ps_job_queue_curr->au1_in_dep[0], 0, sizeof(UWORD8) * MAX_IN_DEP); |
645 | | |
646 | | /* set the input dep buffer to 1 for num inp dep */ |
647 | 195k | if(0 != inp_dep) |
648 | 136k | { |
649 | 136k | memset(&ps_job_queue_curr->au1_in_dep[0], 1, sizeof(UWORD8) * inp_dep); |
650 | 136k | } |
651 | | |
652 | 195k | ps_job_queue_curr->i4_num_output_dep = out_dep; |
653 | | |
654 | | /* set the entire offset buffer to default value */ |
655 | 195k | memset(&ps_job_queue_curr->au4_out_ofsts[0], 0xFF, sizeof(UWORD32) * MAX_OUT_DEP); |
656 | | |
657 | 195k | for(ctr = 0; ctr < out_dep; ctr++) |
658 | 0 | { |
659 | | /* col tile level dependency b/w EncLoops of MBR */ |
660 | 0 | u8_temp = (ULWORD64)( |
661 | 0 | &ps_job_queue_next[num_jobs * i4_num_tile_col + col_tile_ctr] - |
662 | 0 | ps_ctxt->s_multi_thrd.aps_cur_out_me_prms[i4_curr_frm_id]->ps_job_q_enc); |
663 | |
|
664 | 0 | u8_temp *= sizeof(job_queue_t); |
665 | | |
666 | | /* store the offset to the array */ |
667 | 0 | ps_job_queue_curr->au4_out_ofsts[ctr] = (UWORD32)u8_temp; |
668 | 0 | } |
669 | | |
670 | 195k | ps_job_queue_curr++; |
671 | 195k | } |
672 | 195k | } |
673 | | |
674 | | /* set the last pointer to NULL */ |
675 | 177k | ps_job_queue_curr--; |
676 | 177k | ps_job_queue_curr->pv_next = (void *)NULL; |
677 | 177k | } |
678 | | |
679 | 177k | return; |
680 | | |
681 | 177k | } /* End of ihevce_prepare_job_queue */ |
682 | | |
683 | | /** |
684 | | ******************************************************************************* |
685 | | * |
686 | | * @brief Function prepares the Job Queues for all the passes of pre enc |
687 | | * |
688 | | * @par Description: Based on picture type sets the input and output dependency |
689 | | * |
690 | | * @param[inout] pv_enc_ctxt |
691 | | * Pointer to encoder context |
692 | | * |
693 | | * @param[in] ps_curr_inp |
694 | | * Current Input buffer pointer |
695 | | * |
696 | | * @returns |
697 | | * None |
698 | | * |
699 | | * @remarks |
700 | | * |
701 | | ******************************************************************************* |
702 | | */ |
703 | | void ihevce_prepare_pre_enc_job_queue( |
704 | | void *pv_enc_ctxt, ihevce_lap_enc_buf_t *ps_curr_inp, WORD32 i4_ping_pong) |
705 | 177k | { |
706 | | /* local variables */ |
707 | 177k | enc_ctxt_t *ps_ctxt; |
708 | 177k | job_queue_t *ps_decomp_job_queue_lyr0; |
709 | 177k | job_queue_t *ps_decomp_job_queue_lyr1; |
710 | 177k | job_queue_t *ps_decomp_job_queue_lyr2; |
711 | 177k | job_queue_t *ps_decomp_job_queue_lyr3; |
712 | 177k | job_queue_t *ps_me_job_queue_lyr1; |
713 | 177k | job_queue_t *ps_me_job_queue_lyr2; |
714 | 177k | job_queue_t *ps_me_job_queue_lyr3; |
715 | 177k | job_queue_t *ps_me_job_queue_lyr4; |
716 | 177k | job_queue_t *ps_ipe_job_queue; |
717 | 177k | job_queue_t *aps_me_job_queues[MAX_NUM_HME_LAYERS]; |
718 | 177k | multi_thrd_me_job_q_prms_t *ps_me_job_q_prms; |
719 | 177k | WORD32 ai4_decomp_num_vert_units_lyr[MAX_NUM_HME_LAYERS]; |
720 | 177k | WORD32 a14_decomp_lyr_unit_size[MAX_NUM_HME_LAYERS]; |
721 | 177k | WORD32 layer_no; |
722 | 177k | WORD32 decomp_lyr_cnt; |
723 | 177k | WORD32 num_jobs; |
724 | 177k | WORD32 n_tot_layers; |
725 | 177k | WORD32 a_wd[MAX_NUM_HME_LAYERS]; |
726 | 177k | WORD32 a_ht[MAX_NUM_HME_LAYERS]; |
727 | 177k | WORD32 a_disp_wd[MAX_NUM_HME_LAYERS]; |
728 | 177k | WORD32 a_disp_ht[MAX_NUM_HME_LAYERS]; |
729 | 177k | WORD32 u4_log_ctb_size; |
730 | 177k | WORD32 num_ctb_vert_rows; |
731 | 177k | WORD32 pass; |
732 | 177k | WORD32 me_lyr_cnt; |
733 | 177k | WORD32 num_hme_lyrs; |
734 | 177k | WORD32 ai4_me_num_vert_units_lyr[MAX_NUM_HME_LAYERS]; |
735 | 177k | WORD32 me_start_lyr_pass; |
736 | 177k | WORD32 ctb_size; |
737 | 177k | WORD32 me_coarsest_lyr_inp_dep = -1; |
738 | | |
739 | 177k | (void)ps_curr_inp; |
740 | | /* derive local varaibles */ |
741 | 177k | ps_ctxt = (enc_ctxt_t *)pv_enc_ctxt; |
742 | 177k | num_ctb_vert_rows = ps_ctxt->s_frm_ctb_prms.i4_num_ctbs_vert; |
743 | | |
744 | | /* CHANGE REQUIRED: change the pointer to the job queue buffer */ |
745 | | /* memset the entire job que buffer to zero */ |
746 | 177k | memset( |
747 | 177k | ps_ctxt->s_multi_thrd.aps_job_q_pre_enc[i4_ping_pong], |
748 | 177k | 0, |
749 | 177k | MAX_NUM_VERT_UNITS_FRM * NUM_PRE_ENC_JOBS_QUES * sizeof(job_queue_t)); |
750 | | |
751 | | /* Get the number of vertical units in a layer from the resolution of the layer */ |
752 | 177k | a_wd[0] = ps_ctxt->s_frm_ctb_prms.i4_cu_aligned_pic_wd; |
753 | 177k | a_ht[0] = ps_ctxt->s_frm_ctb_prms.i4_cu_aligned_pic_ht; |
754 | 177k | n_tot_layers = hme_derive_num_layers(1, a_wd, a_ht, a_disp_wd, a_disp_ht); |
755 | 177k | GETRANGE(u4_log_ctb_size, ps_ctxt->s_frm_ctb_prms.i4_ctb_size); |
756 | | |
757 | 177k | ASSERT(n_tot_layers >= 3); |
758 | | |
759 | | /* |
760 | | * Always force minimum layers as 4 so that we would have both l1 and l2 |
761 | | * pre intra analysis |
762 | | */ |
763 | 177k | if(n_tot_layers == 3) |
764 | 176k | { |
765 | 176k | n_tot_layers = 4; |
766 | 176k | a_wd[3] = CEIL16(a_wd[2] >> 1); |
767 | 176k | a_ht[3] = CEIL16(a_ht[2] >> 1); |
768 | 176k | } |
769 | | |
770 | 888k | for(layer_no = 0; layer_no < n_tot_layers; layer_no++) |
771 | 710k | { |
772 | 710k | ctb_size = 1 << (u4_log_ctb_size - 1 - layer_no); |
773 | 710k | ai4_decomp_num_vert_units_lyr[layer_no] = ((a_ht[layer_no] + ctb_size) & ~(ctb_size - 1)) >> |
774 | 710k | (u4_log_ctb_size - 1 - layer_no); |
775 | 710k | a14_decomp_lyr_unit_size[layer_no] = 1 << (u4_log_ctb_size - 1 - layer_no); |
776 | 710k | } |
777 | | |
778 | | /* get the start address of Job queues */ |
779 | 177k | ps_decomp_job_queue_lyr0 = ps_ctxt->s_multi_thrd.aps_job_q_pre_enc[i4_ping_pong]; |
780 | 177k | ps_decomp_job_queue_lyr1 = ps_decomp_job_queue_lyr0 + MAX_NUM_VERT_UNITS_FRM; |
781 | 177k | ps_decomp_job_queue_lyr2 = ps_decomp_job_queue_lyr1 + MAX_NUM_VERT_UNITS_FRM; |
782 | 177k | ps_decomp_job_queue_lyr3 = ps_decomp_job_queue_lyr2 + MAX_NUM_VERT_UNITS_FRM; |
783 | 177k | ps_me_job_queue_lyr4 = ps_decomp_job_queue_lyr3 + MAX_NUM_VERT_UNITS_FRM; |
784 | 177k | ps_me_job_queue_lyr3 = ps_me_job_queue_lyr4 + MAX_NUM_VERT_UNITS_FRM; |
785 | 177k | ps_me_job_queue_lyr2 = ps_me_job_queue_lyr3 + MAX_NUM_VERT_UNITS_FRM; |
786 | 177k | ps_me_job_queue_lyr1 = ps_me_job_queue_lyr2 + MAX_NUM_VERT_UNITS_FRM; |
787 | | |
788 | 177k | ps_ipe_job_queue = ps_me_job_queue_lyr1 + MAX_NUM_VERT_UNITS_FRM; |
789 | | |
790 | | /* store the JOB queue in the Job handle */ |
791 | 177k | ps_ctxt->s_multi_thrd.as_job_que_preenc_hdls[i4_ping_pong][DECOMP_JOB_LYR0].pv_next = |
792 | 177k | (void *)ps_decomp_job_queue_lyr0; |
793 | 177k | ps_ctxt->s_multi_thrd.as_job_que_preenc_hdls[i4_ping_pong][DECOMP_JOB_LYR1].pv_next = |
794 | 177k | (void *)ps_decomp_job_queue_lyr1; |
795 | 177k | ps_ctxt->s_multi_thrd.as_job_que_preenc_hdls[i4_ping_pong][DECOMP_JOB_LYR2].pv_next = |
796 | 177k | (void *)ps_decomp_job_queue_lyr2; |
797 | 177k | ps_ctxt->s_multi_thrd.as_job_que_preenc_hdls[i4_ping_pong][DECOMP_JOB_LYR3].pv_next = |
798 | 177k | (void *)ps_decomp_job_queue_lyr3; |
799 | 177k | ps_ctxt->s_multi_thrd.as_job_que_preenc_hdls[i4_ping_pong][ME_JOB_LYR4].pv_next = |
800 | 177k | (void *)ps_me_job_queue_lyr4; |
801 | 177k | ps_ctxt->s_multi_thrd.as_job_que_preenc_hdls[i4_ping_pong][ME_JOB_LYR3].pv_next = |
802 | 177k | (void *)ps_me_job_queue_lyr3; |
803 | 177k | ps_ctxt->s_multi_thrd.as_job_que_preenc_hdls[i4_ping_pong][ME_JOB_LYR2].pv_next = |
804 | 177k | (void *)ps_me_job_queue_lyr2; |
805 | 177k | ps_ctxt->s_multi_thrd.as_job_que_preenc_hdls[i4_ping_pong][ME_JOB_LYR1].pv_next = |
806 | 177k | (void *)ps_me_job_queue_lyr1; |
807 | 177k | ps_ctxt->s_multi_thrd.as_job_que_preenc_hdls[i4_ping_pong][IPE_JOB_LYR0].pv_next = |
808 | 177k | (void *)ps_ipe_job_queue; |
809 | | |
810 | | /* store the ME Jobs que into array */ |
811 | 177k | aps_me_job_queues[0] = NULL; |
812 | 177k | aps_me_job_queues[1] = ps_me_job_queue_lyr1; |
813 | 177k | aps_me_job_queues[2] = ps_me_job_queue_lyr2; |
814 | 177k | aps_me_job_queues[3] = ps_me_job_queue_lyr3; |
815 | 177k | aps_me_job_queues[4] = ps_me_job_queue_lyr4; |
816 | 177k | decomp_lyr_cnt = 0; |
817 | | /* Set the me_lyr_cnt to 0 */ |
818 | 177k | me_lyr_cnt = 0; |
819 | | |
820 | | /* call the ME function which returns the layer properties */ |
821 | 177k | ihevce_coarse_me_get_lyr_prms_job_que( |
822 | 177k | ps_ctxt->s_module_ctxt.pv_coarse_me_ctxt, |
823 | 177k | ps_curr_inp, |
824 | 177k | &num_hme_lyrs, |
825 | 177k | &ai4_me_num_vert_units_lyr[0], |
826 | 177k | &ps_ctxt->s_multi_thrd.as_me_job_q_prms[0][0]); |
827 | | |
828 | 177k | ps_me_job_q_prms = &ps_ctxt->s_multi_thrd.as_me_job_q_prms[0][0]; |
829 | | |
830 | | /* derive ME coarsest layer tak type */ |
831 | 177k | me_start_lyr_pass = ME_JOB_LYR4 + (MAX_NUM_HME_LAYERS - num_hme_lyrs); |
832 | | |
833 | 177k | ps_ctxt->s_multi_thrd.i4_me_coarsest_lyr_type = me_start_lyr_pass; |
834 | | |
835 | | /* coarsest HME layer number of units should be less than or equal to max in dep in Job queue */ |
836 | | /* this constraint is to take care of Coarsest layer requring entire layer to do FULL search */ |
837 | 177k | ASSERT(ai4_me_num_vert_units_lyr[0] <= MAX_IN_DEP); |
838 | | /* loop over all the passes in the encoder */ |
839 | 1.77M | for(pass = 0; pass < NUM_PRE_ENC_JOBS_QUES; pass++) |
840 | 1.59M | { |
841 | 1.59M | job_queue_t *ps_pre_enc_job_queue_curr; |
842 | 1.59M | job_queue_t *ps_pre_enc_job_queue_next; |
843 | 1.59M | WORD32 inp_dep_pass; |
844 | 1.59M | WORD32 out_dep_pass; |
845 | 1.59M | WORD32 num_vert_units; |
846 | 1.59M | HEVCE_PRE_ENC_JOB_TYPES_T pre_enc_task_type; |
847 | 1.59M | HEVCE_ENC_JOB_TYPES_T enc_task_type; |
848 | 1.59M | WORD32 proc_valid_flag = 0; |
849 | | |
850 | | // num_vert_units = ai4_decomp_num_vert_units_lyr[decomp_lyr_cnt]; |
851 | | /* Initializing the job queues for max no of rows among all the layers. And max would be for last layer*/ |
852 | 1.59M | num_vert_units = ai4_decomp_num_vert_units_lyr[n_tot_layers - 1]; |
853 | | |
854 | 1.59M | if(DECOMP_JOB_LYR0 == pass) |
855 | 177k | { |
856 | 177k | proc_valid_flag = 1; |
857 | 177k | pre_enc_task_type = DECOMP_JOB_LYR0; |
858 | 177k | enc_task_type = (HEVCE_ENC_JOB_TYPES_T)-1; |
859 | 177k | ps_pre_enc_job_queue_curr = ps_decomp_job_queue_lyr0; |
860 | | |
861 | 177k | inp_dep_pass = 0; |
862 | 177k | decomp_lyr_cnt++; |
863 | | |
864 | | /* If all the decomp layers are done next job queue will be ME job queue */ |
865 | 177k | if(decomp_lyr_cnt == (n_tot_layers - 1)) |
866 | 0 | { |
867 | | /* Assumption : num_hme_lyrs > 1*/ |
868 | 0 | ps_pre_enc_job_queue_next = aps_me_job_queues[num_hme_lyrs - 1]; |
869 | | |
870 | | /* ME coarsest layer is currently made dependent on entire decomp layer */ |
871 | 0 | out_dep_pass = ai4_me_num_vert_units_lyr[0]; |
872 | 0 | me_coarsest_lyr_inp_dep = num_vert_units; |
873 | 0 | } |
874 | 177k | else |
875 | 177k | { |
876 | 177k | ps_pre_enc_job_queue_next = ps_decomp_job_queue_lyr1; |
877 | 177k | out_dep_pass = 3; |
878 | 177k | } |
879 | 177k | } |
880 | 1.42M | else if((DECOMP_JOB_LYR1 == pass) && (decomp_lyr_cnt != (n_tot_layers - 1))) |
881 | 177k | { |
882 | 177k | proc_valid_flag = 1; |
883 | 177k | pre_enc_task_type = DECOMP_JOB_LYR1; |
884 | 177k | enc_task_type = (HEVCE_ENC_JOB_TYPES_T)-1; |
885 | 177k | ps_pre_enc_job_queue_curr = ps_decomp_job_queue_lyr1; |
886 | | |
887 | 177k | inp_dep_pass = 3; |
888 | 177k | decomp_lyr_cnt++; |
889 | | |
890 | | /* If all the decomp layers are done next job queue will be ME job queue */ |
891 | 177k | if(decomp_lyr_cnt == (n_tot_layers - 1)) |
892 | 0 | { |
893 | | /* Assumption : num_hme_lyrs > 1*/ |
894 | 0 | ps_pre_enc_job_queue_next = aps_me_job_queues[num_hme_lyrs - 1]; |
895 | | |
896 | | /* ME coarsest layer is currently made dependent on entire decomp layer */ |
897 | 0 | out_dep_pass = ai4_me_num_vert_units_lyr[0]; |
898 | 0 | me_coarsest_lyr_inp_dep = num_vert_units; |
899 | 0 | } |
900 | 177k | else |
901 | 177k | { |
902 | 177k | ps_pre_enc_job_queue_next = ps_decomp_job_queue_lyr2; |
903 | 177k | out_dep_pass = 3; |
904 | 177k | } |
905 | 177k | } |
906 | 1.24M | else if((DECOMP_JOB_LYR2 == pass) && (decomp_lyr_cnt != (n_tot_layers - 1))) |
907 | 177k | { |
908 | 177k | proc_valid_flag = 1; |
909 | 177k | pre_enc_task_type = DECOMP_JOB_LYR2; |
910 | 177k | enc_task_type = (HEVCE_ENC_JOB_TYPES_T)-1; |
911 | 177k | ps_pre_enc_job_queue_curr = ps_decomp_job_queue_lyr2; |
912 | | |
913 | 177k | inp_dep_pass = 3; |
914 | 177k | decomp_lyr_cnt++; |
915 | | |
916 | | /* If all the decomp layers are done next job queue will be ME job queue */ |
917 | 177k | if(decomp_lyr_cnt == (n_tot_layers - 1)) |
918 | 177k | { |
919 | | /* Assumption : num_hme_lyrs > 1*/ |
920 | 177k | ps_pre_enc_job_queue_next = aps_me_job_queues[num_hme_lyrs - 1]; |
921 | | |
922 | | /* ME coarsest layer is currently made dependent on entire decomp layer */ |
923 | 177k | out_dep_pass = ai4_me_num_vert_units_lyr[0]; |
924 | 177k | me_coarsest_lyr_inp_dep = num_vert_units; |
925 | 177k | } |
926 | 0 | else |
927 | 0 | { |
928 | | /* right now MAX 4 layers worth of JOB queues are prepared */ |
929 | 0 | ASSERT(0); |
930 | 0 | } |
931 | 177k | } |
932 | | |
933 | 1.06M | else if(IPE_JOB_LYR0 == pass) |
934 | 177k | { |
935 | 177k | proc_valid_flag = 1; |
936 | 177k | pre_enc_task_type = IPE_JOB_LYR0; |
937 | 177k | enc_task_type = (HEVCE_ENC_JOB_TYPES_T)-1; |
938 | 177k | ps_pre_enc_job_queue_curr = ps_ipe_job_queue; |
939 | 177k | ps_pre_enc_job_queue_next = NULL; |
940 | 177k | num_vert_units = num_ctb_vert_rows; |
941 | 177k | } |
942 | 888k | else if(((pass >= ME_JOB_LYR4) && (pass <= ME_JOB_LYR1)) && (pass >= me_start_lyr_pass)) |
943 | 356k | { |
944 | | /* num_ver_units of coarsest layer is stored at 0th index */ |
945 | 356k | num_vert_units = ai4_me_num_vert_units_lyr[me_lyr_cnt]; |
946 | 356k | proc_valid_flag = 1; |
947 | | |
948 | 356k | pre_enc_task_type = |
949 | 356k | (HEVCE_PRE_ENC_JOB_TYPES_T)((WORD32)ME_JOB_LYR1 - (num_hme_lyrs - me_lyr_cnt - 2)); |
950 | | |
951 | 356k | enc_task_type = (HEVCE_ENC_JOB_TYPES_T)-1; |
952 | | |
953 | | /* Assumption : num_hme_lyrs > 1*/ |
954 | 356k | ps_pre_enc_job_queue_curr = aps_me_job_queues[num_hme_lyrs - me_lyr_cnt - 1]; |
955 | | |
956 | 356k | if(me_lyr_cnt == (num_hme_lyrs - 2)) |
957 | 177k | { |
958 | 177k | ps_pre_enc_job_queue_next = ps_ipe_job_queue; |
959 | 177k | } |
960 | 179k | else |
961 | 179k | { |
962 | 179k | ps_pre_enc_job_queue_next = aps_me_job_queues[num_hme_lyrs - me_lyr_cnt - 2]; |
963 | 179k | } |
964 | 356k | me_lyr_cnt++; |
965 | 356k | } |
966 | | |
967 | | /* check for valid processing flag */ |
968 | 1.59M | if(0 == proc_valid_flag) |
969 | 531k | { |
970 | 531k | continue; |
971 | 531k | } |
972 | | |
973 | | /* in the loop ps_me_job_q_prms get incremented for every row */ |
974 | | /* so at the end of one layer the pointer will be correctly */ |
975 | | /* pointing to the start of next layer */ |
976 | | |
977 | | /* loop over all the vertical rows */ |
978 | 4.63M | for(num_jobs = 0; num_jobs < num_vert_units; num_jobs++) |
979 | 3.56M | { |
980 | 3.56M | ULWORD64 u8_temp; |
981 | 3.56M | WORD32 inp_dep = 0; |
982 | 3.56M | WORD32 out_dep = 0; |
983 | 3.56M | WORD32 ctr; |
984 | 3.56M | WORD32 job_off_ipe; |
985 | | |
986 | 3.56M | if(IPE_JOB_LYR0 == pass) |
987 | 195k | { |
988 | 195k | ps_pre_enc_job_queue_curr->s_job_info.s_ipe_job_info.i4_ctb_row_no = num_jobs; |
989 | 195k | inp_dep = ps_me_job_q_prms->i4_num_inp_dep; |
990 | 195k | out_dep = 0; |
991 | 195k | } |
992 | 3.36M | else if((pass >= DECOMP_JOB_LYR0) && (pass <= DECOMP_JOB_LYR3)) |
993 | 1.63M | { |
994 | 1.63M | ps_pre_enc_job_queue_curr->s_job_info.s_decomp_job_info.i4_vert_unit_row_no = |
995 | 1.63M | num_jobs; |
996 | | |
997 | | /* Input and output dependencies of 1st row and last row is 1 less than other rows*/ |
998 | 1.63M | inp_dep = inp_dep_pass; |
999 | 1.63M | out_dep = out_dep_pass; |
1000 | | |
1001 | 1.63M | if(pass != DECOMP_JOB_LYR0) |
1002 | 1.08M | { |
1003 | 1.08M | if(((num_jobs == 0) || (num_jobs == num_vert_units - 1))) |
1004 | 710k | { |
1005 | 710k | inp_dep = inp_dep_pass - 1; |
1006 | 710k | } |
1007 | 1.08M | } |
1008 | | |
1009 | 1.63M | if(pass != (DECOMP_JOB_LYR0 + n_tot_layers - 2)) |
1010 | 1.08M | { |
1011 | 1.08M | if(((num_jobs == 0) || (num_jobs == num_vert_units - 1))) |
1012 | 710k | { |
1013 | 710k | out_dep = out_dep_pass - 1; |
1014 | 710k | } |
1015 | 1.08M | } |
1016 | 1.63M | } |
1017 | 1.73M | else /* remaining all are ME JOBS */ |
1018 | 1.73M | { |
1019 | 1.73M | ps_pre_enc_job_queue_curr->s_job_info.s_me_job_info.i4_vert_unit_row_no = num_jobs; |
1020 | | |
1021 | 1.73M | if(pass == me_start_lyr_pass) |
1022 | 943k | { |
1023 | 943k | ASSERT(me_coarsest_lyr_inp_dep != -1); |
1024 | 943k | inp_dep = me_coarsest_lyr_inp_dep; |
1025 | 943k | } |
1026 | 789k | else |
1027 | 789k | { |
1028 | 789k | inp_dep = ps_me_job_q_prms->i4_num_inp_dep; |
1029 | 789k | } |
1030 | 1.73M | out_dep = ps_me_job_q_prms->i4_num_output_dep; |
1031 | 1.73M | } |
1032 | 3.56M | ps_pre_enc_job_queue_curr->pv_next = (void *)(ps_pre_enc_job_queue_curr + 1); |
1033 | | |
1034 | 3.56M | ps_pre_enc_job_queue_curr->i4_pre_enc_task_type = pre_enc_task_type; |
1035 | 3.56M | ps_pre_enc_job_queue_curr->i4_task_type = enc_task_type; |
1036 | | |
1037 | | /* Set the input dependencies */ |
1038 | 3.56M | ps_pre_enc_job_queue_curr->i4_num_input_dep = inp_dep; |
1039 | | |
1040 | | /* set the entire input dep buffer to default value 0 */ |
1041 | 3.56M | memset(&ps_pre_enc_job_queue_curr->au1_in_dep[0], 0, sizeof(UWORD8) * MAX_IN_DEP); |
1042 | | |
1043 | | /* set the input dep buffer to 1 for num inp dep */ |
1044 | 3.56M | if(0 != inp_dep) |
1045 | 3.01M | { |
1046 | 3.01M | memset(&ps_pre_enc_job_queue_curr->au1_in_dep[0], 1, sizeof(UWORD8) * inp_dep); |
1047 | 3.01M | } |
1048 | | |
1049 | | /* If decomposition layer ends at this pass the no of out dependencies |
1050 | | * will be based on number of vertical units in the coarsets layer of HME |
1051 | | * This is because the search range in coarsest layer will be almost |
1052 | | * entire frame (search range of +-128 in vert direction is max supported |
1053 | | */ |
1054 | 3.56M | if(pass == (DECOMP_JOB_LYR0 + n_tot_layers - 2)) |
1055 | 544k | { |
1056 | 544k | job_off_ipe = 0; |
1057 | 544k | } |
1058 | 3.01M | else |
1059 | 3.01M | { |
1060 | 3.01M | if(num_jobs == 0) |
1061 | 889k | job_off_ipe = num_jobs; |
1062 | | |
1063 | 2.12M | else |
1064 | 2.12M | job_off_ipe = num_jobs - 1; |
1065 | 3.01M | } |
1066 | | |
1067 | | /* Set the offsets of output dependencies */ |
1068 | 3.56M | ps_pre_enc_job_queue_curr->i4_num_output_dep = out_dep; |
1069 | | |
1070 | | /* set the entire offset buffer to default value */ |
1071 | 3.56M | memset( |
1072 | 3.56M | &ps_pre_enc_job_queue_curr->au4_out_ofsts[0], 0xFF, sizeof(UWORD32) * MAX_OUT_DEP); |
1073 | | |
1074 | 13.4M | for(ctr = 0; ctr < out_dep; ctr++) |
1075 | 9.90M | { |
1076 | | /* if IPE or DECOMP loop the dep is 1 to 1*/ |
1077 | 9.90M | if(((pass >= DECOMP_JOB_LYR0) && (pass <= DECOMP_JOB_LYR3)) || |
1078 | 9.90M | (IPE_JOB_LYR0 == pass)) |
1079 | 5.77M | { |
1080 | 5.77M | u8_temp = (ULWORD64)( |
1081 | 5.77M | &ps_pre_enc_job_queue_next[job_off_ipe] - |
1082 | 5.77M | ps_ctxt->s_multi_thrd.aps_job_q_pre_enc[i4_ping_pong]); |
1083 | | |
1084 | 5.77M | u8_temp *= sizeof(job_queue_t); |
1085 | | |
1086 | | /* add the excat inp dep byte for the next layer JOB */ |
1087 | 5.77M | u8_temp += ps_pre_enc_job_queue_next[job_off_ipe].i4_num_input_dep; |
1088 | | |
1089 | | /* increment the inp dep number for a given job */ |
1090 | 5.77M | ps_pre_enc_job_queue_next[job_off_ipe].i4_num_input_dep++; |
1091 | | |
1092 | 5.77M | job_off_ipe++; |
1093 | 5.77M | } |
1094 | 4.13M | else if((pass >= ME_JOB_LYR4) && (pass <= ME_JOB_LYR1)) |
1095 | 4.13M | { |
1096 | | /* ME layer Jobs */ |
1097 | 4.13M | WORD32 job_off; |
1098 | | |
1099 | 4.13M | job_off = ps_me_job_q_prms->ai4_out_dep_unit_off[ctr]; |
1100 | | |
1101 | 4.13M | u8_temp = (ULWORD64)( |
1102 | 4.13M | &ps_pre_enc_job_queue_next[job_off] - |
1103 | 4.13M | ps_ctxt->s_multi_thrd.aps_job_q_pre_enc[i4_ping_pong]); |
1104 | | |
1105 | 4.13M | u8_temp *= sizeof(job_queue_t); |
1106 | | |
1107 | | /* add the excat inp dep byte for the next layer JOB */ |
1108 | 4.13M | u8_temp += ps_pre_enc_job_queue_next[job_off].i4_num_input_dep; |
1109 | | |
1110 | | /* increment the inp dep number for a given job */ |
1111 | 4.13M | ps_pre_enc_job_queue_next[job_off].i4_num_input_dep++; |
1112 | 4.13M | } |
1113 | | /* store the offset to the array */ |
1114 | 9.90M | ps_pre_enc_job_queue_curr->au4_out_ofsts[ctr] = (UWORD32)u8_temp; |
1115 | 9.90M | } |
1116 | | /* ME job q params is incremented only for ME jobs */ |
1117 | 3.56M | if(((pass >= ME_JOB_LYR4) && (pass <= ME_JOB_LYR1)) || (IPE_JOB_LYR0 == pass)) |
1118 | 1.92M | { |
1119 | 1.92M | ps_me_job_q_prms++; |
1120 | 1.92M | } |
1121 | 3.56M | ps_pre_enc_job_queue_curr++; |
1122 | 3.56M | } |
1123 | | |
1124 | | /* set the last pointer to NULL */ |
1125 | 1.06M | ps_pre_enc_job_queue_curr--; |
1126 | 1.06M | ps_pre_enc_job_queue_curr->pv_next = (void *)NULL; |
1127 | 1.06M | } |
1128 | | |
1129 | | /* reset the num ctb processed in every row for IPE sync */ |
1130 | 177k | memset( |
1131 | 177k | &ps_ctxt->s_multi_thrd.ai4_ctbs_in_row_proc_ipe_pass[0], |
1132 | 177k | 0, |
1133 | 177k | (MAX_NUM_CTB_ROWS_FRM * sizeof(WORD32))); |
1134 | | |
1135 | 177k | } /* End of ihevce_prepare_pre_enc_job_queue */ |