/src/libhevc/encoder/ihevce_coarse_me_pass.c
Line | Count | Source |
1 | | /****************************************************************************** |
2 | | * |
3 | | * Copyright (C) 2018 The Android Open Source Project |
4 | | * |
5 | | * Licensed under the Apache License, Version 2.0 (the "License"); |
6 | | * you may not use this file except in compliance with the License. |
7 | | * You may obtain a copy of the License at: |
8 | | * |
9 | | * http://www.apache.org/licenses/LICENSE-2.0 |
10 | | * |
11 | | * Unless required by applicable law or agreed to in writing, software |
12 | | * distributed under the License is distributed on an "AS IS" BASIS, |
13 | | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
14 | | * See the License for the specific language governing permissions and |
15 | | * limitations under the License. |
16 | | * |
17 | | ***************************************************************************** |
18 | | * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore |
19 | | */ |
20 | | |
21 | | /*! |
22 | | ****************************************************************************** |
23 | | * \file ihevce_coarse_me_pass.c |
24 | | * |
25 | | * \brief |
26 | | * Converts the language of the encoder to language of me. This is an i/f |
27 | | * between the encoder style APIs and ME style APIs. This is basically |
28 | | * a memoryless glue layer. |
29 | | * |
30 | | * \date |
31 | | * 22/10/2012 |
32 | | * |
33 | | * \author |
34 | | * Ittiam |
35 | | * |
36 | | * |
37 | | * List of Functions |
38 | | * |
39 | | * |
40 | | ****************************************************************************** |
41 | | */ |
42 | | |
43 | | /*****************************************************************************/ |
44 | | /* File Includes */ |
45 | | /*****************************************************************************/ |
46 | | /* System include files */ |
47 | | #include <stdio.h> |
48 | | #include <string.h> |
49 | | #include <stdlib.h> |
50 | | #include <assert.h> |
51 | | #include <stdarg.h> |
52 | | #include <math.h> |
53 | | |
54 | | /* User include files */ |
55 | | #include "ihevc_typedefs.h" |
56 | | #include "itt_video_api.h" |
57 | | #include "ihevce_api.h" |
58 | | |
59 | | #include "rc_cntrl_param.h" |
60 | | #include "rc_frame_info_collector.h" |
61 | | #include "rc_look_ahead_params.h" |
62 | | |
63 | | #include "ihevc_defs.h" |
64 | | #include "ihevc_structs.h" |
65 | | #include "ihevc_platform_macros.h" |
66 | | #include "ihevc_deblk.h" |
67 | | #include "ihevc_itrans_recon.h" |
68 | | #include "ihevc_chroma_itrans_recon.h" |
69 | | #include "ihevc_chroma_intra_pred.h" |
70 | | #include "ihevc_intra_pred.h" |
71 | | #include "ihevc_inter_pred.h" |
72 | | #include "ihevc_mem_fns.h" |
73 | | #include "ihevc_padding.h" |
74 | | #include "ihevc_weighted_pred.h" |
75 | | #include "ihevc_sao.h" |
76 | | #include "ihevc_resi_trans.h" |
77 | | #include "ihevc_quant_iquant_ssd.h" |
78 | | #include "ihevc_cabac_tables.h" |
79 | | |
80 | | #include "ihevce_defs.h" |
81 | | #include "ihevce_lap_enc_structs.h" |
82 | | #include "ihevce_multi_thrd_structs.h" |
83 | | #include "ihevce_me_common_defs.h" |
84 | | #include "ihevce_had_satd.h" |
85 | | #include "ihevce_error_codes.h" |
86 | | #include "ihevce_bitstream.h" |
87 | | #include "ihevce_cabac.h" |
88 | | #include "ihevce_rdoq_macros.h" |
89 | | #include "ihevce_function_selector.h" |
90 | | #include "ihevce_enc_structs.h" |
91 | | #include "ihevce_entropy_structs.h" |
92 | | #include "ihevce_cmn_utils_instr_set_router.h" |
93 | | #include "ihevce_enc_loop_structs.h" |
94 | | #include "ihevce_bs_compute_ctb.h" |
95 | | #include "ihevce_global_tables.h" |
96 | | #include "ihevce_dep_mngr_interface.h" |
97 | | #include "hme_datatype.h" |
98 | | #include "hme_interface.h" |
99 | | #include "hme_common_defs.h" |
100 | | #include "hme_defs.h" |
101 | | #include "ihevce_me_instr_set_router.h" |
102 | | #include "ihevce_ipe_instr_set_router.h" |
103 | | #include "ihevce_ipe_structs.h" |
104 | | #include "hme_globals.h" |
105 | | #include "hme_utils.h" |
106 | | #include "hme_coarse.h" |
107 | | #include "hme_refine.h" |
108 | | #include "ihevce_me_pass.h" |
109 | | #include "ihevce_coarse_me_pass.h" |
110 | | |
111 | | /*****************************************************************************/ |
112 | | /* Function Definitions */ |
113 | | /*****************************************************************************/ |
114 | | |
115 | | /*! |
116 | | ****************************************************************************** |
117 | | * \if Function name : ihevce_coarse_me_get_num_mem_recs \endif |
118 | | * |
119 | | * \brief |
120 | | * Number of memory records are returned for ME module |
121 | | * Note : Include total mem. req. for HME + Total mem. req. for Dep Mngr for HME |
122 | | * |
123 | | * \return |
124 | | * Number of memory records |
125 | | * |
126 | | * \author |
127 | | * Ittiam |
128 | | * |
129 | | ***************************************************************************** |
130 | | */ |
131 | | WORD32 ihevce_coarse_me_get_num_mem_recs() |
132 | 522 | { |
133 | 522 | WORD32 hme_mem_recs = hme_coarse_num_alloc(); |
134 | 522 | WORD32 hme_dep_mngr_mem_recs = hme_coarse_dep_mngr_num_alloc(); |
135 | | |
136 | 522 | return ((hme_mem_recs + hme_dep_mngr_mem_recs)); |
137 | 522 | } |
138 | | |
139 | | /*! |
140 | | ****************************************************************************** |
141 | | * \if Function name : ihevce_coarse_me_get_mem_recs \endif |
142 | | * |
143 | | * \brief |
144 | | * Memory requirements are returned for coarse ME. |
145 | | * |
146 | | * \param[in,out] ps_mem_tab : pointer to memory descriptors table |
147 | | * \param[in] ps_init_prms : Create time static parameters |
148 | | * \param[in] i4_num_proc_thrds : Number of processing threads for this module |
149 | | * \param[in] i4_mem_space : memspace in whihc memory request should be done |
150 | | * |
151 | | * \return |
152 | | * Number of records |
153 | | * |
154 | | * \author |
155 | | * Ittiam |
156 | | * |
157 | | ***************************************************************************** |
158 | | */ |
159 | | WORD32 ihevce_coarse_me_get_mem_recs( |
160 | | iv_mem_rec_t *ps_mem_tab, |
161 | | ihevce_static_cfg_params_t *ps_init_prms, |
162 | | WORD32 i4_num_proc_thrds, |
163 | | WORD32 i4_mem_space, |
164 | | WORD32 i4_resolution_id) |
165 | 261 | { |
166 | 261 | hme_memtab_t as_memtabs[HME_COARSE_TOT_MEMTABS]; |
167 | 261 | WORD32 n_tabs, i; |
168 | | |
169 | | /* Init prms structure specific to HME */ |
170 | 261 | hme_init_prms_t s_hme_init_prms; |
171 | | |
172 | | //return (ihevce_coarse_me_get_num_mem_recs()); |
173 | | /*************************************************************************/ |
174 | | /* code flow: we call hme alloc function and then remap those memtabs */ |
175 | | /* to a different type of memtab structure. */ |
176 | | /*************************************************************************/ |
177 | 261 | ASSERT(HME_COARSE_TOT_MEMTABS >= hme_coarse_num_alloc()); |
178 | | |
179 | | /*************************************************************************/ |
180 | | /* POPULATE THE HME INIT PRMS */ |
181 | | /*************************************************************************/ |
182 | 261 | ihevce_derive_me_init_prms(ps_init_prms, &s_hme_init_prms, i4_num_proc_thrds, i4_resolution_id); |
183 | | |
184 | | /*************************************************************************/ |
185 | | /* CALL THE ME FUNCTION TO GET MEMTABS */ |
186 | | /*************************************************************************/ |
187 | 261 | n_tabs = hme_coarse_alloc(&as_memtabs[0], &s_hme_init_prms); |
188 | 261 | ASSERT(n_tabs == hme_coarse_num_alloc()); |
189 | | |
190 | | /*************************************************************************/ |
191 | | /* REMAP RESULTS TO ENCODER MEMTAB STRUCTURE */ |
192 | | /*************************************************************************/ |
193 | 83.2k | for(i = 0; i < n_tabs; i++) |
194 | 82.9k | { |
195 | 82.9k | ps_mem_tab[i].i4_mem_size = as_memtabs[i].size; |
196 | 82.9k | ps_mem_tab[i].i4_mem_alignment = as_memtabs[i].align; |
197 | 82.9k | ps_mem_tab[i].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space; |
198 | 82.9k | ps_mem_tab[i].i4_size = sizeof(iv_mem_rec_t); |
199 | 82.9k | } |
200 | | |
201 | | /*************************************************************************/ |
202 | | /* --- HME Coarse sync Dep Mngr Mem requests -- */ |
203 | | /*************************************************************************/ |
204 | 261 | { |
205 | 261 | WORD32 n_dep_tabs; |
206 | | |
207 | 261 | ps_mem_tab += n_tabs; |
208 | | |
209 | 261 | n_dep_tabs = hme_coarse_dep_mngr_alloc( |
210 | 261 | ps_mem_tab, ps_init_prms, i4_mem_space, i4_num_proc_thrds, i4_resolution_id); |
211 | | |
212 | 261 | ASSERT(n_dep_tabs == hme_coarse_dep_mngr_num_alloc()); |
213 | | |
214 | | /* Update the total no. of mem tabs */ |
215 | 261 | n_tabs += n_dep_tabs; |
216 | 261 | } |
217 | | |
218 | 261 | return (n_tabs); |
219 | 261 | } |
220 | | |
221 | | /*! |
222 | | ****************************************************************************** |
223 | | * \if Function name : ihevce_coarse_me_init \endif |
224 | | * |
225 | | * \brief |
226 | | * Intialization for ME context state structure . |
227 | | * |
228 | | * \param[in] ps_mem_tab : pointer to memory descriptors table |
229 | | * \param[in] ps_init_prms : Create time static parameters |
230 | | * \param[in] pv_osal_handle : Osal handle |
231 | | * |
232 | | * \return |
233 | | * Handle to the ME context |
234 | | * |
235 | | * \author |
236 | | * Ittiam |
237 | | * |
238 | | ***************************************************************************** |
239 | | */ |
240 | | void *ihevce_coarse_me_init( |
241 | | iv_mem_rec_t *ps_mem_tab, |
242 | | ihevce_static_cfg_params_t *ps_init_prms, |
243 | | WORD32 i4_num_proc_thrds, |
244 | | void *pv_osal_handle, |
245 | | WORD32 i4_resolution_id, |
246 | | UWORD8 u1_is_popcnt_available) |
247 | 261 | { |
248 | | /* ME handle to be returned */ |
249 | 261 | void *pv_me_ctxt; |
250 | 261 | WORD32 status; |
251 | 261 | coarse_me_master_ctxt_t *ps_ctxt; |
252 | | |
253 | | /* Init prms structure specific to HME */ |
254 | 261 | hme_init_prms_t s_hme_init_prms; |
255 | | |
256 | | /* memtabs to be passed to hme */ |
257 | 261 | hme_memtab_t as_memtabs[HME_COARSE_TOT_MEMTABS]; |
258 | 261 | WORD32 n_tabs, n_dep_tabs, i; |
259 | | |
260 | | /*************************************************************************/ |
261 | | /* POPULATE THE HME INIT PRMS */ |
262 | | /*************************************************************************/ |
263 | 261 | ihevce_derive_me_init_prms(ps_init_prms, &s_hme_init_prms, i4_num_proc_thrds, i4_resolution_id); |
264 | | |
265 | | /*************************************************************************/ |
266 | | /* Ensure local declaration is sufficient */ |
267 | | /*************************************************************************/ |
268 | 261 | n_tabs = hme_coarse_num_alloc(); |
269 | 261 | ASSERT(HME_COARSE_TOT_MEMTABS >= n_tabs); |
270 | | |
271 | | /*************************************************************************/ |
272 | | /* MAP RESULTS TO HME MEMTAB STRUCTURE */ |
273 | | /*************************************************************************/ |
274 | 83.2k | for(i = 0; i < n_tabs; i++) |
275 | 82.9k | { |
276 | 82.9k | as_memtabs[i].size = ps_mem_tab[i].i4_mem_size; |
277 | 82.9k | as_memtabs[i].align = ps_mem_tab[i].i4_mem_alignment; |
278 | 82.9k | as_memtabs[i].pu1_mem = (U08 *)ps_mem_tab[i].pv_base; |
279 | 82.9k | } |
280 | | /*************************************************************************/ |
281 | | /* CALL THE ME FUNCTION TO GET MEMTABS */ |
282 | | /*************************************************************************/ |
283 | 261 | pv_me_ctxt = (void *)as_memtabs[0].pu1_mem; |
284 | 261 | status = hme_coarse_init(pv_me_ctxt, &as_memtabs[0], &s_hme_init_prms); |
285 | 261 | ps_ctxt = (coarse_me_master_ctxt_t *)pv_me_ctxt; |
286 | 261 | if(status == -1) |
287 | 0 | return NULL; |
288 | | |
289 | | /*************************************************************************/ |
290 | | /* --- HME sync Dep Mngr Mem init -- */ |
291 | | /*************************************************************************/ |
292 | | |
293 | 261 | ps_mem_tab += n_tabs; |
294 | | |
295 | 261 | n_dep_tabs = hme_coarse_dep_mngr_init( |
296 | 261 | ps_mem_tab, ps_init_prms, pv_me_ctxt, pv_osal_handle, i4_num_proc_thrds, i4_resolution_id); |
297 | 261 | ASSERT(n_dep_tabs <= hme_coarse_dep_mngr_num_alloc()); |
298 | | |
299 | 261 | n_tabs += n_dep_tabs; |
300 | | |
301 | 261 | ihevce_me_instr_set_router( |
302 | 261 | (ihevce_me_optimised_function_list_t *)ps_ctxt->pv_me_optimised_function_list, |
303 | 261 | ps_init_prms->e_arch_type); |
304 | | |
305 | 261 | ihevce_cmn_utils_instr_set_router( |
306 | 261 | &ps_ctxt->s_cmn_opt_func, u1_is_popcnt_available, ps_init_prms->e_arch_type); |
307 | | |
308 | 261 | return (pv_me_ctxt); |
309 | 261 | } |
310 | | |
311 | | /*! |
312 | | ****************************************************************************** |
313 | | * \if Function name : ihevce_coarse_me_reg_thrds_sem \endif |
314 | | * |
315 | | * \brief |
316 | | * Intialization for ME context state structure with semaphores . |
317 | | * |
318 | | * \param[in] pv_me_ctxt : pointer to Coarse ME ctxt |
319 | | * \param[in] ppv_sem_hdls : Array of semaphore handles |
320 | | * \param[in] i4_num_proc_thrds : Number of processing threads |
321 | | * |
322 | | * \return |
323 | | * none |
324 | | * |
325 | | * \author |
326 | | * Ittiam |
327 | | * |
328 | | ***************************************************************************** |
329 | | */ |
330 | | void ihevce_coarse_me_reg_thrds_sem(void *pv_me_ctxt, void **ppv_sem_hdls, WORD32 i4_num_proc_thrds) |
331 | 261 | { |
332 | 261 | hme_coarse_dep_mngr_reg_sem(pv_me_ctxt, ppv_sem_hdls, i4_num_proc_thrds); |
333 | | |
334 | 261 | return; |
335 | 261 | } |
336 | | |
337 | | /*! |
338 | | ****************************************************************************** |
339 | | * \if Function name : ihevce_coarse_me_delete \endif |
340 | | * |
341 | | * \brief |
342 | | * Destroy Coarse ME module |
343 | | * Note : Only Destroys the resources allocated in the module like |
344 | | * semaphore,etc. Memory free is done Separately using memtabs |
345 | | * |
346 | | * \param[in] pv_me_ctxt : pointer to Coarse ME ctxt |
347 | | * \param[in] ps_init_prms : Create time static parameters |
348 | | * \param[in] pv_osal_handle : Osal handle |
349 | | * |
350 | | * \return |
351 | | * None |
352 | | * |
353 | | * \author |
354 | | * Ittiam |
355 | | * |
356 | | ***************************************************************************** |
357 | | */ |
358 | | void ihevce_coarse_me_delete( |
359 | | void *pv_me_ctxt, ihevce_static_cfg_params_t *ps_init_prms, WORD32 i4_resolution_id) |
360 | 261 | { |
361 | | /* --- HME sync Dep Mngr Delete --*/ |
362 | 261 | hme_coarse_dep_mngr_delete(pv_me_ctxt, ps_init_prms, i4_resolution_id); |
363 | 261 | } |
364 | | |
365 | | /** |
366 | | ******************************************************************************* |
367 | | * \if Function name : ihevce_coarse_me_set_resolution \endif |
368 | | * |
369 | | * \brief |
370 | | * Sets the resolution for ME state |
371 | | * |
372 | | * \par Description: |
373 | | * ME requires information of resolution to prime up its layer descriptors |
374 | | * and contexts. This API is called whenever a control call from application |
375 | | * causes a change of resolution. Has to be called once initially before |
376 | | * processing any frame. Again this is just a glue function and calls the |
377 | | * actual ME API for the same. |
378 | | * |
379 | | * \param[in,out] pv_me_ctxt: Handle to the ME context |
380 | | * \param[in] n_enc_layers: Number of layers getting encoded |
381 | | * \param[in] p_wd : Pointer containing widths of each layer getting encoded. |
382 | | * \param[in] p_ht : Pointer containing heights of each layer getting encoded. |
383 | | * |
384 | | * \returns |
385 | | * none |
386 | | * |
387 | | * \author |
388 | | * Ittiam |
389 | | * |
390 | | ******************************************************************************* |
391 | | */ |
392 | | void ihevce_coarse_me_set_resolution( |
393 | | void *pv_me_ctxt, WORD32 n_enc_layers, WORD32 *p_wd, WORD32 *p_ht) |
394 | 261 | { |
395 | | /* local variables */ |
396 | 261 | coarse_me_master_ctxt_t *ps_master_ctxt = (coarse_me_master_ctxt_t *)pv_me_ctxt; |
397 | 261 | WORD32 thrds; |
398 | | |
399 | 522 | for(thrds = 0; thrds < ps_master_ctxt->i4_num_proc_thrds; thrds++) |
400 | 261 | { |
401 | 261 | coarse_me_ctxt_t *ps_me_thrd_ctxt; |
402 | | |
403 | 261 | ps_me_thrd_ctxt = ps_master_ctxt->aps_me_ctxt[thrds]; |
404 | | |
405 | 261 | hme_coarse_set_resolution((void *)ps_me_thrd_ctxt, n_enc_layers, p_wd, p_ht); |
406 | 261 | } |
407 | 261 | } |
408 | | void ihevce_coarse_me_get_rc_param( |
409 | | void *pv_me_ctxt, |
410 | | LWORD64 *i8_acc_frame_hme_cost, |
411 | | LWORD64 *i8_acc_frame_hme_sad, |
412 | | LWORD64 *i8_acc_num_blks_higher_sad, |
413 | | LWORD64 *i8_total_blks, |
414 | | WORD32 i4_is_prev_pic_same_scene) |
415 | 6.25k | { |
416 | 6.25k | coarse_me_master_ctxt_t *ps_master_ctxt = (coarse_me_master_ctxt_t *)pv_me_ctxt; |
417 | 6.25k | WORD32 thrds; |
418 | 6.25k | coarse_me_ctxt_t *ps_me_thrd_ctxt; |
419 | | |
420 | 6.25k | *i8_acc_frame_hme_cost = 0; |
421 | 6.25k | *i8_acc_frame_hme_sad = 0; |
422 | | |
423 | 12.5k | for(thrds = 0; thrds < ps_master_ctxt->i4_num_proc_thrds; thrds++) |
424 | 6.25k | { |
425 | 6.25k | ps_me_thrd_ctxt = ps_master_ctxt->aps_me_ctxt[thrds]; |
426 | 6.25k | *i8_acc_frame_hme_cost += ps_me_thrd_ctxt->i4_L1_hme_best_cost; |
427 | | |
428 | | /*Calculate me cost wrt. to ref only for P frame */ |
429 | 6.25k | if(ps_me_thrd_ctxt->s_frm_prms.is_i_pic == ps_me_thrd_ctxt->s_frm_prms.bidir_enabled) |
430 | 3.98k | { |
431 | 3.98k | *i8_acc_num_blks_higher_sad += ps_me_thrd_ctxt->i4_num_blks_high_sad; |
432 | 3.98k | *i8_total_blks += ps_me_thrd_ctxt->i4_num_blks; |
433 | 3.98k | } |
434 | | |
435 | 6.25k | *i8_acc_frame_hme_sad += ps_me_thrd_ctxt->i4_L1_hme_sad; |
436 | 6.25k | } |
437 | 6.25k | } |
438 | | |
439 | | /*! |
440 | | ****************************************************************************** |
441 | | * \if Function name : ihevce_coarse_me_process \endif |
442 | | * |
443 | | * \brief |
444 | | * Frame level ME function |
445 | | * |
446 | | * \par Description: |
447 | | * Processing of all layers starting from coarse and going |
448 | | * to the refinement layers, except enocde layer |
449 | | * |
450 | | * \param[in] pv_ctxt : pointer to ME module |
451 | | * \param[in] ps_enc_lap_inp : pointer to input yuv buffer (frame buffer) |
452 | | * \param[in,out] ps_ctb_out : pointer to CTB analyse output structure (frame buffer) |
453 | | * \param[out] ps_cu_out : pointer to CU analyse output structure (frame buffer) |
454 | | * \param[in] pd_intra_costs : pointerto intra cost buffer |
455 | | * \param[in] ps_multi_thrd_ctxt : pointer to multi thread ctxt |
456 | | * \param[in] thrd_id : Thread id of the current thrd in which function is executed |
457 | | * |
458 | | * \return |
459 | | * None |
460 | | * |
461 | | * \author |
462 | | * Ittiam |
463 | | * |
464 | | ***************************************************************************** |
465 | | */ |
466 | | void ihevce_coarse_me_process( |
467 | | void *pv_me_ctxt, |
468 | | ihevce_lap_enc_buf_t *ps_enc_lap_inp, |
469 | | multi_thrd_ctxt_t *ps_multi_thrd_ctxt, |
470 | | WORD32 thrd_id, |
471 | | WORD32 i4_ping_pong) |
472 | | |
473 | 6.25k | { |
474 | | /* local variables */ |
475 | 6.25k | coarse_me_master_ctxt_t *ps_master_ctxt = (coarse_me_master_ctxt_t *)pv_me_ctxt; |
476 | 6.25k | coarse_me_ctxt_t *ps_thrd_ctxt; |
477 | | |
478 | | /* get the current thread ctxt pointer */ |
479 | 6.25k | ps_thrd_ctxt = ps_master_ctxt->aps_me_ctxt[thrd_id]; |
480 | 6.25k | ps_thrd_ctxt->thrd_id = thrd_id; |
481 | | |
482 | | /* frame level processing function */ |
483 | 6.25k | hme_coarse_process_frm( |
484 | 6.25k | (void *)ps_thrd_ctxt, |
485 | 6.25k | &ps_master_ctxt->s_ref_map, |
486 | 6.25k | &ps_master_ctxt->s_frm_prms, |
487 | 6.25k | ps_multi_thrd_ctxt, |
488 | 6.25k | i4_ping_pong, |
489 | 6.25k | &ps_master_ctxt->apv_dep_mngr_hme_sync[0]); |
490 | | |
491 | 6.25k | return; |
492 | 6.25k | } |
493 | | |
494 | | /*! |
495 | | ****************************************************************************** |
496 | | * \if Function name : ihevce_coarse_me_frame_end \endif |
497 | | * |
498 | | * \brief |
499 | | * End of frame update function performs |
500 | | * - GMV collation |
501 | | * - Dynamic Search Range collation |
502 | | * |
503 | | * \param[in] pv_ctxt : pointer to ME module |
504 | | * |
505 | | * \return |
506 | | * None |
507 | | * |
508 | | * \author |
509 | | * Ittiam |
510 | | * |
511 | | ***************************************************************************** |
512 | | */ |
513 | | void ihevce_coarse_me_frame_end(void *pv_me_ctxt) |
514 | 6.25k | { |
515 | | /* local variables */ |
516 | 6.25k | coarse_me_master_ctxt_t *ps_master_ctxt = (coarse_me_master_ctxt_t *)pv_me_ctxt; |
517 | 6.25k | coarse_me_ctxt_t *ps_thrd0_ctxt; |
518 | 6.25k | layer_ctxt_t *ps_curr_layer; |
519 | 6.25k | WORD32 num_ref, num_thrds, cur_poc; |
520 | 6.25k | WORD32 coarse_layer_id; |
521 | 6.25k | WORD32 i4_num_ref; |
522 | 6.25k | ME_QUALITY_PRESETS_T e_me_quality_preset; |
523 | | |
524 | | /* GMV collation is done for coarse Layer only */ |
525 | 6.25k | ps_thrd0_ctxt = ps_master_ctxt->aps_me_ctxt[0]; |
526 | 6.25k | coarse_layer_id = ps_thrd0_ctxt->num_layers - 1; |
527 | 6.25k | ps_curr_layer = ps_thrd0_ctxt->ps_curr_descr->aps_layers[coarse_layer_id]; |
528 | 6.25k | i4_num_ref = ps_master_ctxt->s_ref_map.i4_num_ref; |
529 | 6.25k | e_me_quality_preset = ps_thrd0_ctxt->s_init_prms.s_me_coding_tools.e_me_quality_presets; |
530 | | |
531 | | /* No processing is required if current pic is I pic */ |
532 | 6.25k | if(1 == ps_master_ctxt->s_frm_prms.is_i_pic) |
533 | 1.47k | { |
534 | 1.47k | return; |
535 | 1.47k | } |
536 | | |
537 | | /* use thrd 0 ctxt to collate the GMVs histogram and Dynamic Search Range */ |
538 | | /* across all threads */ |
539 | 13.8k | for(num_ref = 0; num_ref < i4_num_ref; num_ref++) |
540 | 9.10k | { |
541 | 9.10k | WORD32 i4_offset, i4_lobe_size, i4_layer_id; |
542 | 9.10k | mv_hist_t *ps_hist_thrd0; |
543 | 9.10k | dyn_range_prms_t *aps_dyn_range_prms_thrd0[MAX_NUM_LAYERS]; |
544 | | |
545 | 9.10k | ps_hist_thrd0 = ps_thrd0_ctxt->aps_mv_hist[num_ref]; |
546 | | |
547 | | /* Only for P pic. For P, both are 0, I&B has them mut. exclusive */ |
548 | 9.10k | if(ps_thrd0_ctxt->s_frm_prms.is_i_pic == ps_thrd0_ctxt->s_frm_prms.bidir_enabled) |
549 | 7.36k | { |
550 | 22.9k | for(i4_layer_id = coarse_layer_id; i4_layer_id > 0; i4_layer_id--) |
551 | 15.5k | { |
552 | 15.5k | aps_dyn_range_prms_thrd0[i4_layer_id] = |
553 | 15.5k | &ps_thrd0_ctxt->s_coarse_dyn_range_prms.as_dyn_range_prms[i4_layer_id][num_ref]; |
554 | 15.5k | } |
555 | 7.36k | } |
556 | | |
557 | 9.10k | i4_lobe_size = ps_hist_thrd0->i4_lobe1_size; |
558 | 9.10k | i4_offset = i4_lobe_size >> 1; |
559 | | |
560 | | /* run a loop over all the other threads to add up the histogram */ |
561 | | /* and to update the dynamical search range */ |
562 | 9.10k | for(num_thrds = 1; num_thrds < ps_master_ctxt->i4_num_proc_thrds; num_thrds++) |
563 | 0 | { |
564 | 0 | dyn_range_prms_t *ps_dyn_range_prms; |
565 | |
|
566 | 0 | if(ME_XTREME_SPEED_25 != e_me_quality_preset) |
567 | 0 | { |
568 | 0 | mv_hist_t *ps_hist; |
569 | 0 | WORD32 i4_y, i4_x; |
570 | | /* get current thrd histogram pointer */ |
571 | 0 | ps_hist = ps_master_ctxt->aps_me_ctxt[num_thrds]->aps_mv_hist[num_ref]; |
572 | | |
573 | | /* Accumalate the Bin count for all the thread */ |
574 | 0 | for(i4_y = 0; i4_y < ps_hist_thrd0->i4_num_rows; i4_y++) |
575 | 0 | { |
576 | 0 | for(i4_x = 0; i4_x < ps_hist_thrd0->i4_num_cols; i4_x++) |
577 | 0 | { |
578 | 0 | S32 i4_bin_id; |
579 | |
|
580 | 0 | i4_bin_id = i4_x + (i4_y * ps_hist_thrd0->i4_num_cols); |
581 | |
|
582 | 0 | ps_hist_thrd0->ai4_bin_count[i4_bin_id] += |
583 | 0 | ps_hist->ai4_bin_count[i4_bin_id]; |
584 | 0 | } |
585 | 0 | } |
586 | 0 | } |
587 | | |
588 | | /* Update the dynamical search range for each Layer */ |
589 | | /* Only for P pic. For P, both are 0, I&B has them mut. exclusive */ |
590 | 0 | if(ps_thrd0_ctxt->s_frm_prms.is_i_pic == ps_thrd0_ctxt->s_frm_prms.bidir_enabled) |
591 | 0 | { |
592 | 0 | for(i4_layer_id = coarse_layer_id; i4_layer_id > 0; i4_layer_id--) |
593 | 0 | { |
594 | | /* get current thrd, layer dynamical search range param. pointer */ |
595 | 0 | ps_dyn_range_prms = |
596 | 0 | &ps_master_ctxt->aps_me_ctxt[num_thrds] |
597 | 0 | ->s_coarse_dyn_range_prms.as_dyn_range_prms[i4_layer_id][num_ref]; |
598 | | /* TODO : This calls can be optimized further. No need for min in 1st call and max in 2nd call */ |
599 | 0 | hme_update_dynamic_search_params( |
600 | 0 | aps_dyn_range_prms_thrd0[i4_layer_id], ps_dyn_range_prms->i2_dyn_max_y); |
601 | |
|
602 | 0 | hme_update_dynamic_search_params( |
603 | 0 | aps_dyn_range_prms_thrd0[i4_layer_id], ps_dyn_range_prms->i2_dyn_min_y); |
604 | 0 | } |
605 | 0 | } |
606 | 0 | } |
607 | 9.10k | } |
608 | | |
609 | | /*************************************************************************/ |
610 | | /* Get the MAX/MIN per POC distance based on the all the ref. pics */ |
611 | | /*************************************************************************/ |
612 | | /* Only for P pic. For P, both are 0, I&B has them mut. exclusive */ |
613 | 4.77k | if(ps_thrd0_ctxt->s_frm_prms.is_i_pic == ps_thrd0_ctxt->s_frm_prms.bidir_enabled) |
614 | 3.98k | { |
615 | 3.98k | WORD32 i4_layer_id; |
616 | 3.98k | cur_poc = ps_thrd0_ctxt->i4_curr_poc; |
617 | | |
618 | 12.3k | for(i4_layer_id = coarse_layer_id; i4_layer_id > 0; i4_layer_id--) |
619 | 8.37k | { |
620 | 8.37k | ps_thrd0_ctxt->s_coarse_dyn_range_prms.i2_dyn_max_y_per_poc[i4_layer_id] = 0; |
621 | 8.37k | ps_thrd0_ctxt->s_coarse_dyn_range_prms.i2_dyn_min_y_per_poc[i4_layer_id] = 0; |
622 | 8.37k | } |
623 | | |
624 | 11.3k | for(num_ref = 0; num_ref < i4_num_ref; num_ref++) |
625 | 7.36k | { |
626 | 22.9k | for(i4_layer_id = coarse_layer_id; i4_layer_id > 0; i4_layer_id--) |
627 | 15.5k | { |
628 | 15.5k | WORD16 i2_mv_per_poc; |
629 | 15.5k | WORD32 ref_poc, poc_diff; |
630 | 15.5k | dyn_range_prms_t *ps_dyn_range_prms_thrd0; |
631 | | |
632 | 15.5k | ps_dyn_range_prms_thrd0 = |
633 | 15.5k | &ps_thrd0_ctxt->s_coarse_dyn_range_prms.as_dyn_range_prms[i4_layer_id][num_ref]; |
634 | | |
635 | 15.5k | ref_poc = ps_dyn_range_prms_thrd0->i4_poc; |
636 | 15.5k | ASSERT(ref_poc < cur_poc); |
637 | 15.5k | poc_diff = (cur_poc - ref_poc); |
638 | | |
639 | | /* cur. ref. pic. max y per POC */ |
640 | 15.5k | i2_mv_per_poc = (ps_dyn_range_prms_thrd0->i2_dyn_max_y + (poc_diff - 1)) / poc_diff; |
641 | | /* update the max y per POC */ |
642 | 15.5k | ps_thrd0_ctxt->s_coarse_dyn_range_prms.i2_dyn_max_y_per_poc[i4_layer_id] = |
643 | 15.5k | MAX(ps_thrd0_ctxt->s_coarse_dyn_range_prms.i2_dyn_max_y_per_poc[i4_layer_id], |
644 | 15.5k | i2_mv_per_poc); |
645 | | |
646 | | /* cur. ref. pic. min y per POC */ |
647 | 15.5k | i2_mv_per_poc = (ps_dyn_range_prms_thrd0->i2_dyn_min_y - (poc_diff - 1)) / poc_diff; |
648 | | /* update the min y per POC */ |
649 | 15.5k | ps_thrd0_ctxt->s_coarse_dyn_range_prms.i2_dyn_min_y_per_poc[i4_layer_id] = |
650 | 15.5k | MIN(ps_thrd0_ctxt->s_coarse_dyn_range_prms.i2_dyn_min_y_per_poc[i4_layer_id], |
651 | 15.5k | i2_mv_per_poc); |
652 | 15.5k | } |
653 | 7.36k | } |
654 | | |
655 | | /*************************************************************************/ |
656 | | /* Populate the results to all thread ctxt */ |
657 | | /*************************************************************************/ |
658 | 3.98k | for(num_thrds = 1; num_thrds < ps_master_ctxt->i4_num_proc_thrds; num_thrds++) |
659 | 0 | { |
660 | 0 | for(i4_layer_id = coarse_layer_id; i4_layer_id > 0; i4_layer_id--) |
661 | 0 | { |
662 | 0 | ps_master_ctxt->aps_me_ctxt[num_thrds] |
663 | 0 | ->s_coarse_dyn_range_prms.i2_dyn_max_y_per_poc[i4_layer_id] = |
664 | 0 | ps_thrd0_ctxt->s_coarse_dyn_range_prms.i2_dyn_max_y_per_poc[i4_layer_id]; |
665 | |
|
666 | 0 | ps_master_ctxt->aps_me_ctxt[num_thrds] |
667 | 0 | ->s_coarse_dyn_range_prms.i2_dyn_min_y_per_poc[i4_layer_id] = |
668 | 0 | ps_thrd0_ctxt->s_coarse_dyn_range_prms.i2_dyn_min_y_per_poc[i4_layer_id]; |
669 | 0 | } |
670 | 0 | } |
671 | 3.98k | } |
672 | | |
673 | 4.77k | if(ME_XTREME_SPEED_25 != e_me_quality_preset) |
674 | 3.37k | { |
675 | | /* call the function which calcualtes the GMV */ |
676 | | /* layer pointer is shared across all threads */ |
677 | | /* hence all threads will have access to updated */ |
678 | | /* GMVs populated using thread 0 ctxt */ |
679 | 10.5k | for(num_ref = 0; num_ref < i4_num_ref; num_ref++) |
680 | 7.15k | { |
681 | 7.15k | hme_calculate_global_mv( |
682 | 7.15k | ps_thrd0_ctxt->aps_mv_hist[num_ref], |
683 | 7.15k | &ps_curr_layer->s_global_mv[num_ref][GMV_THICK_LOBE], |
684 | 7.15k | GMV_THICK_LOBE); |
685 | 7.15k | } |
686 | 3.37k | } |
687 | 4.77k | return; |
688 | 4.77k | } |
689 | | |
690 | | /*! |
691 | | ****************************************************************************** |
692 | | * \if Function name : ihevce_coarse_me_frame_dpb_update \endif |
693 | | * |
694 | | * \brief |
695 | | * Frame level ME initialisation function |
696 | | * |
697 | | * \par Description: |
698 | | * Updation of ME's internal DPB |
699 | | * based on available ref list information |
700 | | * |
701 | | * \param[in] pv_ctxt : pointer to ME module |
702 | | * \param[in] num_ref_l0 : Number of reference pics in L0 list |
703 | | * \param[in] num_ref_l1 : Number of reference pics in L1 list |
704 | | * \param[in] pps_rec_list_l0 : List of recon pics in L0 list |
705 | | * \param[in] pps_rec_list_l1 : List of recon pics in L1 list |
706 | | * |
707 | | * \return |
708 | | * None |
709 | | * |
710 | | * \author |
711 | | * Ittiam |
712 | | * |
713 | | ***************************************************************************** |
714 | | */ |
715 | | void ihevce_coarse_me_frame_dpb_update( |
716 | | void *pv_me_ctxt, |
717 | | WORD32 num_ref_l0, |
718 | | WORD32 num_ref_l1, |
719 | | recon_pic_buf_t **pps_rec_list_l0, |
720 | | recon_pic_buf_t **pps_rec_list_l1) |
721 | 6.25k | { |
722 | 6.25k | coarse_me_master_ctxt_t *ps_master_ctxt = (coarse_me_master_ctxt_t *)pv_me_ctxt; |
723 | 6.25k | coarse_me_ctxt_t *ps_thrd0_ctxt; |
724 | 6.25k | WORD32 a_pocs_buffered_in_me[MAX_NUM_REF + 1]; |
725 | 6.25k | WORD32 a_pocs_to_remove[MAX_NUM_REF + 2]; |
726 | 6.25k | WORD32 poc_remove_id = 0; |
727 | 6.25k | WORD32 i, count; |
728 | | |
729 | | /* All processing done using shared / common memory across */ |
730 | | /* threads is done using thrd ctxt */ |
731 | 6.25k | ps_thrd0_ctxt = ps_master_ctxt->aps_me_ctxt[0]; |
732 | | |
733 | | /*************************************************************************/ |
734 | | /* Updation of ME's DPB list. This involves the following steps: */ |
735 | | /* 1. Obtain list of active POCs maintained within ME. */ |
736 | | /* 2. Search each of them in the ref list. Whatever is not found goes to */ |
737 | | /* the list to be removed. Note: a_pocs_buffered_in_me holds the */ |
738 | | /* currently active POC list within ME. a_pocs_to_remove holds the */ |
739 | | /* list of POCs to be removed, terminated by -1. */ |
740 | | /*************************************************************************/ |
741 | 6.25k | hme_coarse_get_active_pocs_list((void *)ps_thrd0_ctxt, a_pocs_buffered_in_me); |
742 | | |
743 | 6.25k | count = 0; |
744 | 29.1k | while(a_pocs_buffered_in_me[count] != -1) |
745 | 22.8k | { |
746 | 22.8k | WORD32 poc_to_search = a_pocs_buffered_in_me[count]; |
747 | 22.8k | WORD32 match_found_flag = 0; |
748 | | |
749 | | /*********************************************************************/ |
750 | | /* Search in any one list (L0/L1) since both lists contain all the */ |
751 | | /* active ref pics. */ |
752 | | /*********************************************************************/ |
753 | 56.6k | for(i = 0; i < num_ref_l0; i++) |
754 | 50.2k | { |
755 | 50.2k | if(poc_to_search == pps_rec_list_l0[i]->i4_poc) |
756 | 16.3k | { |
757 | 16.3k | match_found_flag = 1; |
758 | 16.3k | break; |
759 | 16.3k | } |
760 | 50.2k | } |
761 | 25.7k | for(i = 0; i < num_ref_l1; i++) |
762 | 3.94k | { |
763 | 3.94k | if(poc_to_search == pps_rec_list_l1[i]->i4_poc) |
764 | 1.04k | { |
765 | 1.04k | match_found_flag = 1; |
766 | 1.04k | break; |
767 | 1.04k | } |
768 | 3.94k | } |
769 | | |
770 | 22.8k | if(0 == match_found_flag) |
771 | 5.42k | { |
772 | | /*****************************************************************/ |
773 | | /* POC buffered inside ME but not part of ref list given by DPB */ |
774 | | /* Hence this needs to be flagged to ME for removal. */ |
775 | | /*****************************************************************/ |
776 | 5.42k | a_pocs_to_remove[poc_remove_id] = poc_to_search; |
777 | 5.42k | poc_remove_id++; |
778 | 5.42k | } |
779 | 22.8k | count++; |
780 | 22.8k | } |
781 | | |
782 | | /* List termination */ |
783 | 6.25k | a_pocs_to_remove[poc_remove_id] = -1; |
784 | | |
785 | | /* Call the ME API to remove "outdated" POCs */ |
786 | 6.25k | hme_coarse_discard_frm(ps_thrd0_ctxt, a_pocs_to_remove); |
787 | 6.25k | } |
788 | | |
789 | | /*! |
790 | | ****************************************************************************** |
791 | | * \if Function name : ihevce_coarse_me_frame_init \endif |
792 | | * |
793 | | * \brief |
794 | | * Coarse Frame level ME initialisation function |
795 | | * |
796 | | * \par Description: |
797 | | * The following pre-conditions exist for this function: a. We have the input |
798 | | * pic ready for encode, b. We have the reference list with POC, L0/L1 IDs |
799 | | * and ref ptrs ready for this picture and c. ihevce_me_set_resolution has |
800 | | * been called atleast once. Once these are supplied, the following are |
801 | | * done here: a. Input pyramid creation, b. Updation of ME's internal DPB |
802 | | * based on available ref list information |
803 | | * |
804 | | * \param[in] pv_ctxt : pointer to ME module |
805 | | * \param[in] ps_frm_ctb_prms : CTB characteristics parameters |
806 | | * \param[in] ps_frm_lamda : Frame level Lambda params |
807 | | * \param[in] num_ref_l0 : Number of reference pics in L0 list |
808 | | * \param[in] num_ref_l1 : Number of reference pics in L1 list |
809 | | * \param[in] num_ref_l0_active : Active reference pics in L0 dir for current frame (shall be <= num_ref_l0) |
810 | | * \param[in] num_ref_l1_active : Active reference pics in L1 dir for current frame (shall be <= num_ref_l1) |
811 | | * \param[in] pps_rec_list_l0 : List of recon pics in L0 list |
812 | | * \param[in] pps_rec_list_l1 : List of recon pics in L1 list |
813 | | * \param[in] ps_enc_lap_inp : pointer to input yuv buffer (frame buffer) |
814 | | * \param[in] i4_frm_qp : current picture QP |
815 | | * |
816 | | * \return |
817 | | * None |
818 | | * |
819 | | * \author |
820 | | * Ittiam |
821 | | * |
822 | | ***************************************************************************** |
823 | | */ |
824 | | void ihevce_coarse_me_frame_init( |
825 | | void *pv_me_ctxt, |
826 | | ihevce_static_cfg_params_t *ps_stat_prms, |
827 | | frm_ctb_ctxt_t *ps_frm_ctb_prms, |
828 | | frm_lambda_ctxt_t *ps_frm_lamda, |
829 | | WORD32 num_ref_l0, |
830 | | WORD32 num_ref_l1, |
831 | | WORD32 num_ref_l0_active, |
832 | | WORD32 num_ref_l1_active, |
833 | | recon_pic_buf_t **pps_rec_list_l0, |
834 | | recon_pic_buf_t **pps_rec_list_l1, |
835 | | ihevce_lap_enc_buf_t *ps_enc_lap_inp, |
836 | | WORD32 i4_frm_qp, |
837 | | ihevce_ed_blk_t *ps_layer1_buf, //EIID |
838 | | ihevce_ed_ctb_l1_t *ps_ed_ctb_l1, |
839 | | UWORD8 *pu1_me_reverse_map_info, |
840 | | WORD32 i4_temporal_layer_id) |
841 | 6.25k | { |
842 | | /* local variables */ |
843 | 6.25k | coarse_me_master_ctxt_t *ps_master_ctxt = (coarse_me_master_ctxt_t *)pv_me_ctxt; |
844 | 6.25k | coarse_me_ctxt_t *ps_ctxt; |
845 | 6.25k | coarse_me_ctxt_t *ps_thrd0_ctxt; |
846 | 6.25k | WORD32 inp_poc, num_ref; |
847 | 6.25k | WORD32 i; |
848 | | |
849 | | /* Input POC is derived from input buffer */ |
850 | 6.25k | inp_poc = ps_enc_lap_inp->s_lap_out.i4_poc; |
851 | 6.25k | num_ref = num_ref_l0 + num_ref_l1; |
852 | | |
853 | | /* All processing done using shared / common memory across */ |
854 | | /* threads is done using thrd 0 ctxt */ |
855 | 6.25k | ps_thrd0_ctxt = ps_master_ctxt->aps_me_ctxt[0]; |
856 | | |
857 | 6.25k | ps_master_ctxt->s_frm_prms.u1_num_active_ref_l0 = num_ref_l0_active; |
858 | 6.25k | ps_master_ctxt->s_frm_prms.u1_num_active_ref_l1 = num_ref_l1_active; |
859 | | |
860 | | /* store the frm ctb ctxt to all the thrd ctxt */ |
861 | 6.25k | { |
862 | 6.25k | WORD32 num_thrds; |
863 | | |
864 | | /* initialise the parameters for all the threads */ |
865 | 12.5k | for(num_thrds = 0; num_thrds < ps_master_ctxt->i4_num_proc_thrds; num_thrds++) |
866 | 6.25k | { |
867 | 6.25k | ps_ctxt = ps_master_ctxt->aps_me_ctxt[num_thrds]; |
868 | 6.25k | ps_ctxt->pv_ext_frm_prms = (void *)ps_frm_ctb_prms; |
869 | | /*EIID: early decision buffer pointer */ |
870 | 6.25k | ps_ctxt->ps_ed_blk = ps_layer1_buf; |
871 | 6.25k | ps_ctxt->ps_ed_ctb_l1 = ps_ed_ctb_l1; |
872 | | |
873 | | /* weighted pred enable flag */ |
874 | 6.25k | ps_ctxt->i4_wt_pred_enable_flag = ps_enc_lap_inp->s_lap_out.i1_weighted_pred_flag | |
875 | 6.25k | ps_enc_lap_inp->s_lap_out.i1_weighted_bipred_flag; |
876 | | |
877 | 6.25k | if(1 == ps_ctxt->i4_wt_pred_enable_flag) |
878 | 0 | { |
879 | | /* log2 weight denom */ |
880 | 0 | ps_ctxt->s_wt_pred.wpred_log_wdc = |
881 | 0 | ps_enc_lap_inp->s_lap_out.i4_log2_luma_wght_denom; |
882 | 0 | } |
883 | 6.25k | else |
884 | 6.25k | { |
885 | | /* default value */ |
886 | 6.25k | ps_ctxt->s_wt_pred.wpred_log_wdc = DENOM_DEFAULT; |
887 | 6.25k | } |
888 | 6.25k | ps_ctxt->i4_L1_hme_best_cost = 0; |
889 | 6.25k | ps_ctxt->i4_L1_hme_sad = 0; |
890 | 6.25k | ps_ctxt->i4_num_blks_high_sad = 0; |
891 | 6.25k | ps_ctxt->i4_num_blks = 0; |
892 | | |
893 | 6.25k | ps_ctxt->pv_me_optimised_function_list = ps_master_ctxt->pv_me_optimised_function_list; |
894 | 6.25k | ps_ctxt->ps_cmn_utils_optimised_function_list = &ps_master_ctxt->s_cmn_opt_func; |
895 | 6.25k | } |
896 | 6.25k | } |
897 | | /* Create the reference map for ME */ |
898 | 6.25k | ihevce_me_create_ref_map( |
899 | 6.25k | pps_rec_list_l0, |
900 | 6.25k | pps_rec_list_l1, |
901 | 6.25k | num_ref_l0_active, |
902 | 6.25k | num_ref_l1_active, |
903 | 6.25k | num_ref, |
904 | 6.25k | &ps_master_ctxt->s_ref_map); |
905 | | /*************************************************************************/ |
906 | | /* Call the ME frame level processing for further actiion. */ |
907 | | /* ToDo: Support Row Level API. */ |
908 | | /*************************************************************************/ |
909 | 6.25k | ps_master_ctxt->s_frm_prms.i2_mv_range_x = ps_thrd0_ctxt->s_init_prms.max_horz_search_range; |
910 | 6.25k | ps_master_ctxt->s_frm_prms.i2_mv_range_y = ps_thrd0_ctxt->s_init_prms.max_vert_search_range; |
911 | | |
912 | 6.25k | ps_master_ctxt->s_frm_prms.is_i_pic = 0; |
913 | 6.25k | ps_master_ctxt->s_frm_prms.i4_temporal_layer_id = i4_temporal_layer_id; |
914 | | |
915 | 6.25k | ps_master_ctxt->s_frm_prms.is_pic_second_field = |
916 | 6.25k | (!(ps_enc_lap_inp->s_input_buf.i4_bottom_field ^ |
917 | 6.25k | ps_enc_lap_inp->s_input_buf.i4_topfield_first)); |
918 | 6.25k | { |
919 | 6.25k | S32 pic_type = ps_enc_lap_inp->s_lap_out.i4_pic_type; |
920 | | |
921 | | /*********************************************************************/ |
922 | | /* For I Pic, we do not call update fn at ctb level, instead we do */ |
923 | | /* one shot update for entire picture. */ |
924 | | /*********************************************************************/ |
925 | 6.25k | if((pic_type == IV_I_FRAME) || (pic_type == IV_II_FRAME) || (pic_type == IV_IDR_FRAME)) |
926 | 1.47k | { |
927 | 1.47k | ps_master_ctxt->s_frm_prms.is_i_pic = 1; |
928 | 1.47k | ps_master_ctxt->s_frm_prms.bidir_enabled = 0; |
929 | 1.47k | } |
930 | 4.77k | else if((pic_type == IV_P_FRAME) || (pic_type == IV_PP_FRAME)) |
931 | 3.98k | { |
932 | 3.98k | ps_master_ctxt->s_frm_prms.bidir_enabled = 0; |
933 | 3.98k | } |
934 | 783 | else if((pic_type == IV_B_FRAME) || (pic_type == IV_BB_FRAME)) |
935 | 783 | { |
936 | 783 | ps_master_ctxt->s_frm_prms.bidir_enabled = 1; |
937 | 783 | } |
938 | 0 | else |
939 | 0 | { |
940 | | /* not sure whether we need to handle mixed frames like IP, */ |
941 | | /* they should ideally come as single field. */ |
942 | | /* TODO : resolve thsi ambiguity */ |
943 | 0 | ASSERT(0); |
944 | 0 | } |
945 | 6.25k | } |
946 | | /************************************************************************/ |
947 | | /* Lambda calculations moved outside ME and to one place, so as to have */ |
948 | | /* consistent lambda across ME, IPE, CL RDOPT etc */ |
949 | | /************************************************************************/ |
950 | | |
951 | 6.25k | { |
952 | 6.25k | #define CLIP3_F(min, max, val) (((val) < (min)) ? (min) : (((val) > (max)) ? (max) : (val))) |
953 | 6.25k | double q_steps[6] = { 0.625, 0.703, 0.79, 0.889, 1.0, 1.125 }; |
954 | 6.25k | double d_b_pic_factor; |
955 | 6.25k | double d_q_factor; |
956 | | //double d_lambda; |
957 | 6.25k | UWORD8 u1_temp_hier = ps_enc_lap_inp->s_lap_out.i4_temporal_lyr_id; |
958 | | |
959 | 6.25k | if(u1_temp_hier) |
960 | 783 | { |
961 | 783 | d_b_pic_factor = CLIP3_F(2.0, 4.0, (i4_frm_qp - 12.0) / 6.0); |
962 | 783 | } |
963 | 5.46k | else |
964 | 5.46k | d_b_pic_factor = 1.0; |
965 | | |
966 | 6.25k | d_q_factor = (1 << (i4_frm_qp / 6)) * q_steps[i4_frm_qp % 6]; |
967 | 6.25k | ps_master_ctxt->s_frm_prms.qstep = (WORD32)d_q_factor; |
968 | 6.25k | ps_master_ctxt->s_frm_prms.i4_frame_qp = i4_frm_qp; |
969 | 6.25k | } |
970 | | |
971 | | /* HME Dependency Manager : Reset the num ctb processed in every row */ |
972 | | /* for ME sync in every layer */ |
973 | 6.25k | { |
974 | 6.25k | WORD32 ctr; |
975 | 19.3k | for(ctr = 1; ctr < ps_thrd0_ctxt->num_layers; ctr++) |
976 | 13.0k | { |
977 | 13.0k | void *pv_dep_mngr_state; |
978 | 13.0k | pv_dep_mngr_state = ps_master_ctxt->apv_dep_mngr_hme_sync[ctr - 1]; |
979 | | |
980 | 13.0k | ihevce_dmgr_rst_row_row_sync(pv_dep_mngr_state); |
981 | 13.0k | } |
982 | 6.25k | } |
983 | | |
984 | | /* Frame level init of all threads of ME */ |
985 | 6.25k | { |
986 | 6.25k | WORD32 num_thrds; |
987 | | |
988 | | /* initialise the parameters for all the threads */ |
989 | 12.5k | for(num_thrds = 0; num_thrds < ps_master_ctxt->i4_num_proc_thrds; num_thrds++) |
990 | 6.25k | { |
991 | 6.25k | ps_ctxt = ps_master_ctxt->aps_me_ctxt[num_thrds]; |
992 | | |
993 | 6.25k | hme_coarse_process_frm_init( |
994 | 6.25k | (void *)ps_ctxt, ps_ctxt->ps_hme_ref_map, ps_ctxt->ps_hme_frm_prms); |
995 | 6.25k | } |
996 | 6.25k | } |
997 | | |
998 | 6.25k | ps_master_ctxt->s_frm_prms.i4_cl_sad_lambda_qf = ps_frm_lamda->i4_cl_sad_lambda_qf; |
999 | 6.25k | ps_master_ctxt->s_frm_prms.i4_cl_satd_lambda_qf = ps_frm_lamda->i4_cl_satd_lambda_qf; |
1000 | 6.25k | ps_master_ctxt->s_frm_prms.i4_ol_sad_lambda_qf = ps_frm_lamda->i4_ol_sad_lambda_qf; |
1001 | 6.25k | ps_master_ctxt->s_frm_prms.i4_ol_satd_lambda_qf = ps_frm_lamda->i4_ol_satd_lambda_qf; |
1002 | 6.25k | ps_master_ctxt->s_frm_prms.lambda_q_shift = LAMBDA_Q_SHIFT; |
1003 | | |
1004 | 6.25k | ps_master_ctxt->s_frm_prms.pf_interp_fxn = NULL; |
1005 | | |
1006 | | /*************************************************************************/ |
1007 | | /* If num ref is 0, that means that it has to be coded as I. Do nothing */ |
1008 | | /* However mv bank update needs to happen with "intra" mv. */ |
1009 | | /*************************************************************************/ |
1010 | 6.25k | if(ps_master_ctxt->s_ref_map.i4_num_ref == 0 || ps_master_ctxt->s_frm_prms.is_i_pic) |
1011 | 1.47k | { |
1012 | 4.60k | for(i = 1; i < ps_thrd0_ctxt->num_layers; i++) |
1013 | 3.12k | { |
1014 | 3.12k | layer_ctxt_t *ps_layer_ctxt = ps_thrd0_ctxt->ps_curr_descr->aps_layers[i]; |
1015 | 3.12k | BLK_SIZE_T e_blk_size; |
1016 | 3.12k | S32 use_4x4; |
1017 | | |
1018 | | /* The mv bank is filled with "intra" mv */ |
1019 | 3.12k | use_4x4 = hme_get_mv_blk_size( |
1020 | 3.12k | ps_thrd0_ctxt->s_init_prms.use_4x4, |
1021 | 3.12k | i, |
1022 | 3.12k | ps_thrd0_ctxt->num_layers, |
1023 | 3.12k | ps_thrd0_ctxt->u1_encode[i]); |
1024 | 3.12k | e_blk_size = use_4x4 ? BLK_4x4 : BLK_8x8; |
1025 | 3.12k | hme_init_mv_bank(ps_layer_ctxt, e_blk_size, 2, 1, ps_ctxt->u1_encode[i]); |
1026 | 3.12k | hme_fill_mvbank_intra(ps_layer_ctxt); |
1027 | | |
1028 | | /* Clear out the global mvs */ |
1029 | 3.12k | memset( |
1030 | 3.12k | ps_layer_ctxt->s_global_mv, |
1031 | 3.12k | 0, |
1032 | 3.12k | sizeof(hme_mv_t) * ps_thrd0_ctxt->max_num_ref * NUM_GMV_LOBES); |
1033 | 3.12k | } |
1034 | | |
1035 | 1.47k | return; |
1036 | 1.47k | } |
1037 | | |
1038 | | /*************************************************************************/ |
1039 | | /* Coarse & refine Layer frm init (layer mem is common across thrds) */ |
1040 | | /*************************************************************************/ |
1041 | 4.77k | { |
1042 | 4.77k | coarse_prms_t s_coarse_prms; |
1043 | 4.77k | refine_prms_t s_refine_prms; |
1044 | 4.77k | S16 i2_max; |
1045 | 4.77k | S32 layer_id; |
1046 | | |
1047 | 4.77k | layer_id = ps_thrd0_ctxt->num_layers - 1; |
1048 | 4.77k | i2_max = ps_thrd0_ctxt->ps_curr_descr->aps_layers[layer_id]->i2_max_mv_x; |
1049 | 4.77k | i2_max = MAX(i2_max, ps_thrd0_ctxt->ps_curr_descr->aps_layers[layer_id]->i2_max_mv_y); |
1050 | 4.77k | s_coarse_prms.i4_layer_id = layer_id; |
1051 | | |
1052 | 4.77k | { |
1053 | 4.77k | S32 log_start_step; |
1054 | | /* Based on Preset, set the starting step size for Refinement */ |
1055 | 4.77k | if(ME_MEDIUM_SPEED > ps_ctxt->s_init_prms.s_me_coding_tools.e_me_quality_presets) |
1056 | 797 | { |
1057 | 797 | log_start_step = 0; |
1058 | 797 | } |
1059 | 3.97k | else |
1060 | 3.97k | { |
1061 | 3.97k | log_start_step = 1; |
1062 | 3.97k | } |
1063 | 4.77k | s_coarse_prms.i4_max_iters = i2_max >> log_start_step; |
1064 | 4.77k | s_coarse_prms.i4_start_step = 1 << log_start_step; |
1065 | 4.77k | } |
1066 | 4.77k | s_coarse_prms.i4_num_ref = ps_master_ctxt->s_ref_map.i4_num_ref; |
1067 | 4.77k | s_coarse_prms.do_full_search = 1; |
1068 | 4.77k | s_coarse_prms.num_results = ps_thrd0_ctxt->max_num_results_coarse; |
1069 | | |
1070 | 4.77k | hme_coarse_frm_init(ps_thrd0_ctxt, &s_coarse_prms); |
1071 | | |
1072 | 4.77k | layer_id--; |
1073 | | |
1074 | | /*************************************************************************/ |
1075 | | /* This loop will run for all refine layers (non- encode layers) */ |
1076 | | /*************************************************************************/ |
1077 | 9.94k | while(layer_id > 0) |
1078 | 5.17k | { |
1079 | 5.17k | layer_ctxt_t *ps_curr_layer; |
1080 | 5.17k | layer_ctxt_t *ps_coarse_layer; |
1081 | | |
1082 | 5.17k | ps_coarse_layer = ps_thrd0_ctxt->ps_curr_descr->aps_layers[layer_id + 1]; |
1083 | | |
1084 | 5.17k | ps_curr_layer = ps_thrd0_ctxt->ps_curr_descr->aps_layers[layer_id]; |
1085 | | |
1086 | 5.17k | hme_set_refine_prms( |
1087 | 5.17k | &s_refine_prms, |
1088 | 5.17k | ps_thrd0_ctxt->u1_encode[layer_id], |
1089 | 5.17k | ps_master_ctxt->s_ref_map.i4_num_ref, |
1090 | 5.17k | layer_id, |
1091 | 5.17k | ps_thrd0_ctxt->num_layers, |
1092 | 5.17k | ps_thrd0_ctxt->num_layers_explicit_search, |
1093 | 5.17k | ps_thrd0_ctxt->s_init_prms.use_4x4, |
1094 | 5.17k | &ps_master_ctxt->s_frm_prms, |
1095 | 5.17k | NULL, |
1096 | 5.17k | &ps_thrd0_ctxt->s_init_prms.s_me_coding_tools); |
1097 | | |
1098 | 5.17k | hme_refine_frm_init(ps_curr_layer, &s_refine_prms, ps_coarse_layer); |
1099 | | |
1100 | 5.17k | layer_id--; |
1101 | 5.17k | } |
1102 | 4.77k | } |
1103 | | |
1104 | 4.77k | return; |
1105 | 6.25k | } |
1106 | | |
1107 | | /*! |
1108 | | ****************************************************************************** |
1109 | | * \if Function name : ihevce_decomp_pre_intra_frame_init \endif |
1110 | | * |
1111 | | * \brief |
1112 | | * Frame Intialization for Decomp intra pre analysis. |
1113 | | * |
1114 | | * \param[in] pv_ctxt : pointer to module ctxt |
1115 | | * \param[in] ppu1_decomp_lyr_bufs : pointer to array of layer buffer pointers |
1116 | | * \param[in] pi4_lyr_buf_stride : pointer to array of layer buffer strides |
1117 | | * |
1118 | | * \return |
1119 | | * None |
1120 | | * |
1121 | | * \author |
1122 | | * Ittiam |
1123 | | * |
1124 | | ***************************************************************************** |
1125 | | */ |
1126 | | WORD32 ihevce_coarse_me_get_lyr_buf_desc( |
1127 | | void *pv_me_ctxt, UWORD8 **ppu1_decomp_lyr_bufs, WORD32 *pi4_lyr_buf_stride) |
1128 | 6.25k | { |
1129 | | /* local variables */ |
1130 | 6.25k | coarse_me_master_ctxt_t *ps_master_ctxt = (coarse_me_master_ctxt_t *)pv_me_ctxt; |
1131 | 6.25k | coarse_me_ctxt_t *ps_thrd0_ctxt; |
1132 | 6.25k | WORD32 lyr_no; |
1133 | 6.25k | layers_descr_t *ps_curr_descr; |
1134 | 6.25k | WORD32 i4_free_idx; |
1135 | | |
1136 | | /* All processing done using shared / common memory across */ |
1137 | | /* threads is done using thrd0 ctxt */ |
1138 | 6.25k | ps_thrd0_ctxt = ps_master_ctxt->aps_me_ctxt[0]; |
1139 | | |
1140 | | /* Obtain an empty layer descriptor */ |
1141 | 6.25k | i4_free_idx = hme_coarse_find_free_descr_idx((void *)ps_thrd0_ctxt); |
1142 | | |
1143 | 6.25k | ps_curr_descr = &ps_thrd0_ctxt->as_ref_descr[i4_free_idx]; |
1144 | | |
1145 | | /* export all the layer buffers except Layer 0 (encode layer) */ |
1146 | 19.3k | for(lyr_no = 1; lyr_no < ps_thrd0_ctxt->num_layers; lyr_no++) |
1147 | 13.0k | { |
1148 | 13.0k | pi4_lyr_buf_stride[lyr_no - 1] = ps_curr_descr->aps_layers[lyr_no]->i4_inp_stride; |
1149 | 13.0k | ppu1_decomp_lyr_bufs[lyr_no - 1] = ps_curr_descr->aps_layers[lyr_no]->pu1_inp; |
1150 | 13.0k | } |
1151 | | |
1152 | 6.25k | return (i4_free_idx); |
1153 | 6.25k | } |
1154 | | |
1155 | | /*! |
1156 | | ****************************************************************************** |
1157 | | * \if Function name : ihevce_coarse_me_get_lyr_prms_job_que \endif |
1158 | | * |
1159 | | * \brief Returns to the caller key attributes related to dependency between layers |
1160 | | * for multi-thread execution |
1161 | | * |
1162 | | * |
1163 | | * \par Description: |
1164 | | * This function requires the precondition that the width and ht of encode |
1165 | | * layer is known, and ME API ihevce_me_set_resolution() API called with |
1166 | | * this info. Based on this, ME populates useful information for the encoder |
1167 | | * to execute the multi-thread (concurrent across layers) in this API. |
1168 | | * The number of layers, number of vertical units in each layer, and for |
1169 | | * each vertial unit in each layer, its dependency on previous layer's units |
1170 | | * From ME's perspective, a vertical unit is one which is smallest min size |
1171 | | * vertically (and spans the entire row horizontally). This is CTB for encode |
1172 | | * layer, and 8x8 / 4x4 for non encode layers. |
1173 | | * |
1174 | | * \param[in] pv_ctxt : ME handle |
1175 | | * \param[in] ps_curr_inp : Input buffer descriptor |
1176 | | * \param[out] pi4_num_hme_lyrs : Num of HME layers (ME updates) |
1177 | | * \param[out] pi4_num_vert_units_in_lyr : Array of size N (num layers), each |
1178 | | * entry has num vertical units in that particular layer |
1179 | | * \param[in] ps_me_job_q_prms : Array of job queue prms, one for each unit in a |
1180 | | * layer. Note that this is contiguous in order of processing |
1181 | | * All k units of layer N-1 from top to bottom, followed by |
1182 | | * all m units of layer N-2 .... ends with X units of layer 0 |
1183 | | * |
1184 | | * \return |
1185 | | * None |
1186 | | * |
1187 | | * \author |
1188 | | * Ittiam |
1189 | | * |
1190 | | ***************************************************************************** |
1191 | | */ |
1192 | | void ihevce_coarse_me_get_lyr_prms_job_que( |
1193 | | void *pv_me_ctxt, |
1194 | | ihevce_lap_enc_buf_t *ps_curr_inp, |
1195 | | WORD32 *pi4_num_hme_lyrs, |
1196 | | WORD32 *pi4_num_vert_units_in_lyr, |
1197 | | multi_thrd_me_job_q_prms_t *ps_me_job_q_prms) |
1198 | 6.25k | { |
1199 | 6.25k | coarse_me_ctxt_t *ps_ctxt; |
1200 | 6.25k | coarse_me_master_ctxt_t *ps_master_ctxt = (coarse_me_master_ctxt_t *)pv_me_ctxt; |
1201 | | |
1202 | | /* These arrays and ptrs track input dependencies for units of a layer */ |
1203 | | /* This is a ping poing design, while using one part, we update other part */ |
1204 | 6.25k | U08 au1_inp_dep[2][MAX_NUM_VERT_UNITS_FRM]; |
1205 | 6.25k | U08 *pu1_inp_dep_c, *pu1_inp_dep_n; |
1206 | | |
1207 | | /* Height of current and next layers */ |
1208 | 6.25k | S32 ht_c, ht_n; |
1209 | | |
1210 | | /* Blk ht at a given layer and next layer*/ |
1211 | 6.25k | S32 unit_ht_c, unit_ht_n, blk_ht_c, blk_ht_n; |
1212 | | |
1213 | | /* Number of vertical units in current and next layer */ |
1214 | 6.25k | S32 num_vert_c, num_vert_n; |
1215 | | |
1216 | 6.25k | S32 ctb_size = 64, num_layers, i, j, k; |
1217 | | |
1218 | | /* since same layer desc pointer is stored in all thread ctxt */ |
1219 | | /* a free idx is obtained using 0th thread ctxt pointer */ |
1220 | 6.25k | ps_ctxt = ps_master_ctxt->aps_me_ctxt[0]; |
1221 | | |
1222 | | /* Set the number of layers */ |
1223 | 6.25k | num_layers = ps_ctxt->num_layers; |
1224 | 6.25k | *pi4_num_hme_lyrs = num_layers; |
1225 | | |
1226 | 6.25k | pu1_inp_dep_c = &au1_inp_dep[0][0]; |
1227 | 6.25k | pu1_inp_dep_n = &au1_inp_dep[1][0]; |
1228 | | |
1229 | 6.25k | ASSERT(num_layers >= 2); |
1230 | | |
1231 | 6.25k | ht_n = ps_ctxt->a_ht[num_layers - 2]; |
1232 | 6.25k | ht_c = ps_ctxt->a_ht[num_layers - 1]; |
1233 | | |
1234 | | /* compute blk ht and unit ht for c and n */ |
1235 | 6.25k | if(ps_ctxt->u1_encode[num_layers - 1]) |
1236 | 0 | { |
1237 | 0 | blk_ht_c = 16; |
1238 | 0 | unit_ht_c = ctb_size; |
1239 | 0 | } |
1240 | 6.25k | else |
1241 | 6.25k | { |
1242 | 6.25k | blk_ht_c = hme_get_blk_size(ps_ctxt->s_init_prms.use_4x4, num_layers - 1, num_layers, 0); |
1243 | 6.25k | unit_ht_c = blk_ht_c; |
1244 | 6.25k | } |
1245 | | |
1246 | 6.25k | num_vert_c = (ht_c + unit_ht_c - 1) / unit_ht_c; |
1247 | | |
1248 | | /* For new design in Coarsest HME layer we need */ |
1249 | | /* one additional row extra at the end of frame */ |
1250 | | /* hence num_vert_c is incremented by 1 */ |
1251 | 6.25k | num_vert_c++; |
1252 | | |
1253 | | /* Dummy initialization outside loop, not used first time */ |
1254 | 6.25k | memset(pu1_inp_dep_c, 0, num_vert_c); |
1255 | | |
1256 | | /*************************************************************************/ |
1257 | | /* Run through each layer, set the number of vertical units and job queue*/ |
1258 | | /* attrs for each vert unit in the layer */ |
1259 | | /*************************************************************************/ |
1260 | 19.3k | for(i = num_layers - 1; i > 0; i--) |
1261 | 13.0k | { |
1262 | | /* 0th entry is actually layer id num_layers - 1 */ |
1263 | | /* and entry num_layers-1 equals the biggest layer (id = 0) */ |
1264 | 13.0k | pi4_num_vert_units_in_lyr[num_layers - 1 - i] = num_vert_c; |
1265 | | /* "n" is computed for first time */ |
1266 | 13.0k | ht_n = ps_ctxt->a_ht[i - 1]; |
1267 | 13.0k | blk_ht_n = hme_get_blk_size(ps_ctxt->s_init_prms.use_4x4, i - 1, num_layers, 0); |
1268 | 13.0k | unit_ht_n = blk_ht_n; |
1269 | 13.0k | if(ps_ctxt->u1_encode[i - 1]) |
1270 | 6.25k | unit_ht_n = ctb_size; |
1271 | | |
1272 | 13.0k | num_vert_n = (ht_n + unit_ht_n - 1) / unit_ht_n; |
1273 | | /* Initialize all units' inp dep in next layer to 0 */ |
1274 | 13.0k | memset(pu1_inp_dep_n, 0, num_vert_n * sizeof(U08)); |
1275 | | |
1276 | | /* Evaluate dependencies for this layer */ |
1277 | 98.6k | for(j = 0; j < num_vert_c; j++) |
1278 | 85.6k | { |
1279 | 85.6k | S32 v1, v2; |
1280 | | |
1281 | | /* Output dependencies. When one unit in current layer finishes, */ |
1282 | | /* how many in the next layer it affects?. Assuming that the top */ |
1283 | | /* of this vertical unit and bottom of this vertical unit project*/ |
1284 | | /* somewhere in the next layer. The top of this vertical unit */ |
1285 | | /* becomes the bottom right point for somebody, and the bottom of*/ |
1286 | | /* this vertical unit becomes the colocated pt for somebody, this*/ |
1287 | | /* is the extremum. */ |
1288 | | |
1289 | | /* for the initial unit affected by j in "c" layer, take j-1th */ |
1290 | | /* unit top and project it. */ |
1291 | 85.6k | v1 = (j - 1) * unit_ht_c * ht_n; |
1292 | 85.6k | v1 /= (ht_c * unit_ht_n); |
1293 | 85.6k | v1 -= 1; |
1294 | | |
1295 | | /* for the final unit affected by j in "c" layer, take jth unit */ |
1296 | | /* bottom and project it. */ |
1297 | | |
1298 | 85.6k | v2 = (j + 1) * unit_ht_c * ht_n; |
1299 | 85.6k | v2 /= (ht_c * unit_ht_n); |
1300 | 85.6k | v2 += 1; |
1301 | | |
1302 | | /* Clip to be within valid limits */ |
1303 | 85.6k | v1 = HME_CLIP(v1, 0, (num_vert_n - 1)); |
1304 | 85.6k | v2 = HME_CLIP(v2, 0, (num_vert_n - 1)); |
1305 | | |
1306 | | /* In the layer "n", units starting at offset v1, and upto v2 are*/ |
1307 | | /* dependent on unit j of layer "c". So for each of these units */ |
1308 | | /* increment the dependency by 1 corresponding to "jth" unit in */ |
1309 | | /* layer "c" */ |
1310 | 85.6k | ps_me_job_q_prms->i4_num_output_dep = v2 - v1 + 1; |
1311 | 85.6k | ASSERT(ps_me_job_q_prms->i4_num_output_dep <= MAX_OUT_DEP); |
1312 | 336k | for(k = v1; k <= v2; k++) |
1313 | 250k | pu1_inp_dep_n[k]++; |
1314 | | |
1315 | | /* Input dependency would have been calculated in prev run */ |
1316 | 85.6k | ps_me_job_q_prms->i4_num_inp_dep = pu1_inp_dep_c[j]; |
1317 | 85.6k | ASSERT(ps_me_job_q_prms->i4_num_inp_dep <= MAX_OUT_DEP); |
1318 | | |
1319 | | /* Offsets */ |
1320 | 336k | for(k = v1; k <= v2; k++) |
1321 | 250k | ps_me_job_q_prms->ai4_out_dep_unit_off[k - v1] = k; |
1322 | | |
1323 | 85.6k | ps_me_job_q_prms++; |
1324 | 85.6k | } |
1325 | | |
1326 | | /* Compute the blk size and vert unit size in each layer */ |
1327 | | /* "c" denotes curr layer, and "n" denotes the layer to which result */ |
1328 | | /* is projected to */ |
1329 | 13.0k | ht_c = ht_n; |
1330 | 13.0k | blk_ht_c = blk_ht_n; |
1331 | 13.0k | unit_ht_c = unit_ht_n; |
1332 | 13.0k | num_vert_c = num_vert_n; |
1333 | | |
1334 | | /* Input dep count for next layer was computed this iteration. */ |
1335 | | /* Swap so that p_inp_dep_n becomes current for next iteration, */ |
1336 | | /* and p_inp_dep_c will become update area during next iteration */ |
1337 | | /* for next to next. */ |
1338 | 13.0k | { |
1339 | 13.0k | U08 *pu1_tmp = pu1_inp_dep_n; |
1340 | 13.0k | pu1_inp_dep_n = pu1_inp_dep_c; |
1341 | 13.0k | pu1_inp_dep_c = pu1_tmp; |
1342 | 13.0k | } |
1343 | 13.0k | } |
1344 | | |
1345 | | /* LAYER 0 OR ENCODE LAYER UPDATE : NO OUTPUT DEPS */ |
1346 | | |
1347 | | /* set the numebr of vertical units */ |
1348 | 6.25k | pi4_num_vert_units_in_lyr[num_layers - 1] = num_vert_c; |
1349 | 16.4k | for(j = 0; j < num_vert_c; j++) |
1350 | 10.1k | { |
1351 | | /* Here there is no output dependency for ME. However this data is used for encode, */ |
1352 | | /* and there is a 1-1 correspondence between this and the encode */ |
1353 | | /* Hence we set output dependency of 1 */ |
1354 | 10.1k | ps_me_job_q_prms->i4_num_output_dep = 1; |
1355 | 10.1k | ps_me_job_q_prms->ai4_out_dep_unit_off[0] = j; |
1356 | 10.1k | ps_me_job_q_prms->i4_num_inp_dep = pu1_inp_dep_c[j]; |
1357 | 10.1k | ASSERT(ps_me_job_q_prms->i4_num_inp_dep <= MAX_OUT_DEP); |
1358 | 10.1k | ps_me_job_q_prms++; |
1359 | 10.1k | } |
1360 | | |
1361 | 6.25k | return; |
1362 | 6.25k | } |
1363 | | |
1364 | | /*! |
1365 | | ****************************************************************************** |
1366 | | * \if Function name : ihevce_coarse_me_set_lyr1_mv_bank \endif |
1367 | | * |
1368 | | * \brief |
1369 | | * Frame level ME initialisation of MV bank of penultimate layer |
1370 | | * |
1371 | | * \par Description: |
1372 | | * Updates the Layer1 context with the given buffers |
1373 | | * |
1374 | | * \param[in] pv_me_ctxt : pointer to ME module |
1375 | | * \param[in] pu1_mv_bank : MV bank buffer pointer |
1376 | | * \param[in] pu1_ref_idx_bank : refrence bank buffer pointer |
1377 | | * |
1378 | | * \return |
1379 | | * None |
1380 | | * |
1381 | | * \author |
1382 | | * Ittiam |
1383 | | * |
1384 | | ***************************************************************************** |
1385 | | */ |
1386 | | void ihevce_coarse_me_set_lyr1_mv_bank( |
1387 | | void *pv_me_ctxt, |
1388 | | ihevce_lap_enc_buf_t *ps_enc_lap_inp, |
1389 | | void *pv_mv_bank, |
1390 | | void *pv_ref_idx_bank, |
1391 | | WORD32 i4_curr_idx) |
1392 | 6.25k | { |
1393 | 6.25k | coarse_me_ctxt_t *ps_thrd0_ctxt; |
1394 | 6.25k | coarse_me_master_ctxt_t *ps_master_ctxt = (coarse_me_master_ctxt_t *)pv_me_ctxt; |
1395 | 6.25k | layer_ctxt_t *ps_lyr1_ctxt; |
1396 | | |
1397 | | /* Input descriptor that is updated and passed to ME */ |
1398 | 6.25k | hme_inp_desc_t s_inp_desc; |
1399 | | |
1400 | | /*************************************************************************/ |
1401 | | /* Add the current input to ME's DPB. This will also create the pyramids */ |
1402 | | /* for the HME layers tha are not "encoded". */ |
1403 | | /*************************************************************************/ |
1404 | 6.25k | s_inp_desc.i4_poc = ps_enc_lap_inp->s_lap_out.i4_poc; |
1405 | 6.25k | s_inp_desc.s_layer_desc[0].pu1_y = (UWORD8 *)ps_enc_lap_inp->s_lap_out.s_input_buf.pv_y_buf; |
1406 | 6.25k | s_inp_desc.s_layer_desc[0].pu1_u = (UWORD8 *)ps_enc_lap_inp->s_lap_out.s_input_buf.pv_u_buf; |
1407 | 6.25k | s_inp_desc.s_layer_desc[0].pu1_v = (UWORD8 *)ps_enc_lap_inp->s_lap_out.s_input_buf.pv_v_buf; |
1408 | | |
1409 | 6.25k | s_inp_desc.s_layer_desc[0].luma_stride = ps_enc_lap_inp->s_lap_out.s_input_buf.i4_y_strd; |
1410 | 6.25k | s_inp_desc.s_layer_desc[0].chroma_stride = ps_enc_lap_inp->s_lap_out.s_input_buf.i4_uv_strd; |
1411 | | |
1412 | 6.25k | hme_coarse_add_inp(pv_me_ctxt, &s_inp_desc, i4_curr_idx); |
1413 | | |
1414 | | /* All processing done using shared / common memory across */ |
1415 | | /* threads is done using thrd 0 ctxt since layer ctxt is shared accross all threads */ |
1416 | 6.25k | ps_thrd0_ctxt = ps_master_ctxt->aps_me_ctxt[0]; |
1417 | | |
1418 | 6.25k | ps_lyr1_ctxt = ps_thrd0_ctxt->ps_curr_descr->aps_layers[1]; |
1419 | | |
1420 | | /* register the mv bank & ref idx bank pointer */ |
1421 | 6.25k | ps_lyr1_ctxt->ps_layer_mvbank->pi1_ref_idx_base = (S08 *)pv_ref_idx_bank; |
1422 | 6.25k | ps_lyr1_ctxt->ps_layer_mvbank->ps_mv_base = (hme_mv_t *)pv_mv_bank; |
1423 | | |
1424 | 6.25k | return; |
1425 | 6.25k | } |
1426 | | |
1427 | | /*! |
1428 | | ****************************************************************************** |
1429 | | * \if Function name : ihevce_coarse_me_get_lyr1_ctxt \endif |
1430 | | * |
1431 | | * \brief |
1432 | | * function to get teh Layer 1 properties to be passed on the encode layer |
1433 | | * |
1434 | | * \par Description: |
1435 | | * Ucopies the enitre layer ctxt emory to the destination |
1436 | | * |
1437 | | * \param[in] pv_me_ctxt : pointer to ME module |
1438 | | * \param[in] pu1_mv_bank : MV bank buffer pointer |
1439 | | * \param[in] pu1_ref_idx_bank : refrence bank buffer pointer |
1440 | | * |
1441 | | * \return |
1442 | | * None |
1443 | | * |
1444 | | * \author |
1445 | | * Ittiam |
1446 | | * |
1447 | | ***************************************************************************** |
1448 | | */ |
1449 | | void ihevce_coarse_me_get_lyr1_ctxt( |
1450 | | void *pv_me_ctxt, void *pv_layer_ctxt, void *pv_layer_mv_bank_ctxt) |
1451 | 6.25k | { |
1452 | 6.25k | coarse_me_ctxt_t *ps_thrd0_ctxt; |
1453 | 6.25k | coarse_me_master_ctxt_t *ps_master_ctxt = (coarse_me_master_ctxt_t *)pv_me_ctxt; |
1454 | 6.25k | layer_ctxt_t *ps_lyr1_ctxt; |
1455 | | |
1456 | | /* All processing done using shared / common memory across */ |
1457 | | /* threads is done using thrd 0 ctxt since layer ctxt is shared accross all threads */ |
1458 | 6.25k | ps_thrd0_ctxt = ps_master_ctxt->aps_me_ctxt[0]; |
1459 | | |
1460 | | /* get the context of layer 1 */ |
1461 | 6.25k | ps_lyr1_ctxt = ps_thrd0_ctxt->ps_curr_descr->aps_layers[1]; |
1462 | | |
1463 | | /* copy the layer ctxt eve registerd mv bank & ref idx bank also goes in */ |
1464 | 6.25k | memcpy(pv_layer_ctxt, ps_lyr1_ctxt, sizeof(layer_ctxt_t)); |
1465 | | |
1466 | | /* copy the layer mv bank contents */ |
1467 | 6.25k | memcpy(pv_layer_mv_bank_ctxt, ps_lyr1_ctxt->ps_layer_mvbank, sizeof(layer_mv_t)); |
1468 | | |
1469 | | /* register the MV bank pointer in the layer ctxt*/ |
1470 | 6.25k | ((layer_ctxt_t *)pv_layer_ctxt)->ps_layer_mvbank = (layer_mv_t *)pv_layer_mv_bank_ctxt; |
1471 | | |
1472 | 6.25k | return; |
1473 | 6.25k | } |