Coverage Report

Created: 2025-11-05 07:08

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/libhevc/encoder/ihevce_multi_thrd_funcs.c
Line
Count
Source
1
/******************************************************************************
2
 *
3
 * Copyright (C) 2018 The Android Open Source Project
4
 *
5
 * Licensed under the Apache License, Version 2.0 (the "License");
6
 * you may not use this file except in compliance with the License.
7
 * You may obtain a copy of the License at:
8
 *
9
 * http://www.apache.org/licenses/LICENSE-2.0
10
 *
11
 * Unless required by applicable law or agreed to in writing, software
12
 * distributed under the License is distributed on an "AS IS" BASIS,
13
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
 * See the License for the specific language governing permissions and
15
 * limitations under the License.
16
 *
17
 *****************************************************************************
18
 * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
19
*/
20
/**
21
*******************************************************************************
22
* @file
23
*  ihevce_multi_thread_funcs.c
24
*
25
* @brief
26
*  Contains functions related to Job Ques and others, required for multi threading
27
*
28
* @author
29
*  Ittiam
30
*
31
* @par List of Functions:
32
*  <TODO: TO BE ADDED>
33
*
34
* @remarks
35
*  None
36
*
37
*******************************************************************************
38
*/
39
/*****************************************************************************/
40
/* File Includes                                                             */
41
/*****************************************************************************/
42
/* System include files */
43
#include <stdio.h>
44
#include <string.h>
45
#include <stdlib.h>
46
#include <assert.h>
47
#include <stdarg.h>
48
#include <math.h>
49
50
/* User include files */
51
#include "ihevc_typedefs.h"
52
#include "itt_video_api.h"
53
#include "ihevce_api.h"
54
55
#include "rc_cntrl_param.h"
56
#include "rc_frame_info_collector.h"
57
#include "rc_look_ahead_params.h"
58
59
#include "ihevc_defs.h"
60
#include "ihevc_structs.h"
61
#include "ihevc_platform_macros.h"
62
#include "ihevc_deblk.h"
63
#include "ihevc_itrans_recon.h"
64
#include "ihevc_chroma_itrans_recon.h"
65
#include "ihevc_chroma_intra_pred.h"
66
#include "ihevc_intra_pred.h"
67
#include "ihevc_inter_pred.h"
68
#include "ihevc_mem_fns.h"
69
#include "ihevc_padding.h"
70
#include "ihevc_weighted_pred.h"
71
#include "ihevc_sao.h"
72
#include "ihevc_resi_trans.h"
73
#include "ihevc_quant_iquant_ssd.h"
74
#include "ihevc_cabac_tables.h"
75
76
#include "ihevce_defs.h"
77
#include "ihevce_lap_enc_structs.h"
78
#include "ihevce_multi_thrd_structs.h"
79
#include "ihevce_multi_thrd_funcs.h"
80
#include "ihevce_me_common_defs.h"
81
#include "ihevce_had_satd.h"
82
#include "ihevce_error_codes.h"
83
#include "ihevce_bitstream.h"
84
#include "ihevce_cabac.h"
85
#include "ihevce_rdoq_macros.h"
86
#include "ihevce_function_selector.h"
87
#include "ihevce_enc_structs.h"
88
#include "ihevce_entropy_structs.h"
89
#include "ihevce_cmn_utils_instr_set_router.h"
90
#include "ihevce_enc_loop_structs.h"
91
#include "ihevce_bs_compute_ctb.h"
92
#include "ihevce_global_tables.h"
93
#include "ihevce_dep_mngr_interface.h"
94
#include "hme_datatype.h"
95
#include "hme_interface.h"
96
#include "hme_common_defs.h"
97
#include "hme_defs.h"
98
#include "ihevce_me_instr_set_router.h"
99
#include "ihevce_ipe_instr_set_router.h"
100
#include "ihevce_ipe_structs.h"
101
#include "ihevce_coarse_me_pass.h"
102
103
#include "cast_types.h"
104
#include "osal.h"
105
#include "osal_defaults.h"
106
107
/********************************************************************/
108
/*Macros                                                            */
109
/********************************************************************/
110
#define MULT_FACT 100
111
112
/*****************************************************************************/
113
/* Function Definitions                                                      */
114
/*****************************************************************************/
115
static inline WORD32 ihevce_is_nonzero(volatile UWORD8 *buf, WORD32 size)
116
2.15M
{
117
2.15M
    WORD32 i;
118
174M
    for (i = 0; i < size; i++)
119
172M
    {
120
172M
        if (buf[i])
121
0
            return 1;
122
172M
    }
123
2.15M
    return 0;
124
2.15M
}
125
/**
126
*******************************************************************************
127
*
128
* @brief Function Pops out the next Job in the appropriate Job Que
129
*
130
* @par Description: Does under mutex lock to ensure thread safe
131
*
132
* @param[inout] pv_multi_thrd_ctxt
133
*  Pointer to Multi thread context
134
*
135
* @param[in] i4_job_type
136
*   Job type from which a job needs to be popped out
137
*
138
* @param[in] i4_blocking_mode
139
*   Mode of operation
140
*
141
* @returns
142
*  None
143
*
144
* @remarks
145
*
146
*******************************************************************************
147
*/
148
void *ihevce_pre_enc_grp_get_next_job(
149
    void *pv_multi_thrd_ctxt, WORD32 i4_job_type, WORD32 i4_blocking_mode, WORD32 i4_ping_pong)
150
2.54M
{
151
    /* Local variables */
152
2.54M
    multi_thrd_ctxt_t *ps_multi_thrd;
153
2.54M
    job_queue_handle_t *ps_job_queue_hdl;
154
2.54M
    void *pv_next = NULL;
155
2.54M
    void *pv_job_q_mutex_hdl_pre_enc = NULL;
156
157
    /* Derive local variables */
158
2.54M
    ps_multi_thrd = (multi_thrd_ctxt_t *)pv_multi_thrd_ctxt;
159
2.54M
    ps_job_queue_hdl =
160
2.54M
        (job_queue_handle_t *)&ps_multi_thrd->as_job_que_preenc_hdls[i4_ping_pong][i4_job_type];
161
162
    /* lock the mutex for Q access */
163
    /* As design must facilitate for parallelism in each stage,
164
    It is recommended to have seperate mutex for each stage*/
165
2.54M
    if(i4_job_type < ME_JOB_LYR4)
166
1.17M
    {
167
1.17M
        pv_job_q_mutex_hdl_pre_enc = ps_multi_thrd->pv_job_q_mutex_hdl_pre_enc_decomp;
168
1.17M
    }
169
1.36M
    else if(i4_job_type < IPE_JOB_LYR0)
170
1.16M
    {
171
1.16M
        pv_job_q_mutex_hdl_pre_enc = ps_multi_thrd->pv_job_q_mutex_hdl_pre_enc_hme;
172
1.16M
    }
173
201k
    else
174
201k
    {
175
201k
        pv_job_q_mutex_hdl_pre_enc = ps_multi_thrd->pv_job_q_mutex_hdl_pre_enc_l0ipe;
176
201k
    }
177
178
2.54M
    osal_mutex_lock(pv_job_q_mutex_hdl_pre_enc);
179
    /* Get the next */
180
2.54M
    pv_next = ps_job_queue_hdl->pv_next;
181
182
    /* Update the next by checking input dependency */
183
2.54M
    if(NULL != pv_next)
184
1.97M
    {
185
1.97M
        job_queue_t *ps_job_queue = (job_queue_t *)pv_next;
186
187
        /* check for input dependencies to be resolved            */
188
        /* this can be blocking or non blocking based on use case */
189
        /* if non blocking then the function returns NULL         */
190
191
1.97M
        if(1 == i4_blocking_mode)
192
1.97M
        {
193
1.97M
            while(ihevce_is_nonzero(ps_job_queue->au1_in_dep, MAX_IN_DEP));
194
195
            /* update the next job in the queue */
196
1.97M
            ps_job_queue_hdl->pv_next = ps_job_queue->pv_next;
197
1.97M
        }
198
0
        else
199
0
        {
200
            /* check for input dependency resolved */
201
0
            if(ihevce_is_nonzero(ps_job_queue->au1_in_dep, MAX_IN_DEP))
202
0
            {
203
                /* return null */
204
0
                pv_next = NULL;
205
0
            }
206
0
            else
207
0
            {
208
                /* update the next job in the queue */
209
0
                ps_job_queue_hdl->pv_next = ps_job_queue->pv_next;
210
0
            }
211
0
        }
212
1.97M
    }
213
214
    /* unlock the mutex */
215
2.54M
    osal_mutex_unlock(pv_job_q_mutex_hdl_pre_enc);
216
217
    /* Return */
218
2.54M
    return (pv_next);
219
220
2.54M
} /* End of get_next_job */
221
222
/**
223
*******************************************************************************
224
*
225
* @brief Function Pops out the next Job in the appropriate Job Que
226
*
227
* @par Description: Does under mutex lock to ensure thread safe
228
*
229
* @param[inout] pv_multi_thrd_ctxt
230
*  Pointer to Multi thread context
231
*
232
* @param[in] i4_job_type
233
*   Job type from which a job needs to be popped out
234
*
235
* @param[in] i4_blocking_mode
236
*   Mode of operation
237
*
238
* @returns
239
*  None
240
*
241
* @remarks
242
*
243
*******************************************************************************
244
*/
245
void *ihevce_enc_grp_get_next_job(
246
    void *pv_multi_thrd_ctxt, WORD32 i4_job_type, WORD32 i4_blocking_mode, WORD32 i4_curr_frm_id)
247
338k
{
248
    /* Local variables */
249
338k
    multi_thrd_ctxt_t *ps_multi_thrd;
250
338k
    job_queue_handle_t *ps_job_queue_hdl;
251
338k
    void *pv_next = NULL;
252
338k
    void *pv_job_q_mutex_hdl_enc_grp;
253
254
    /* Derive local variables */
255
338k
    ps_multi_thrd = (multi_thrd_ctxt_t *)pv_multi_thrd_ctxt;
256
257
338k
    if(ME_JOB_ENC_LYR == i4_job_type)
258
132k
    {
259
132k
        pv_job_q_mutex_hdl_enc_grp = ps_multi_thrd->pv_job_q_mutex_hdl_enc_grp_me;
260
261
132k
        ps_job_queue_hdl = (job_queue_handle_t *)&ps_multi_thrd->aps_cur_out_me_prms[i4_curr_frm_id]
262
132k
                               ->as_job_que_enc_hdls[i4_job_type];
263
132k
    }
264
205k
    else
265
205k
    {
266
205k
        pv_job_q_mutex_hdl_enc_grp = ps_multi_thrd->pv_job_q_mutex_hdl_enc_grp_enc_loop;
267
205k
        ps_job_queue_hdl =
268
205k
            (job_queue_handle_t *)&ps_multi_thrd->aps_cur_inp_enc_prms[i4_curr_frm_id]
269
205k
                ->as_job_que_enc_hdls[i4_job_type];
270
205k
    }
271
272
    /* lock the mutex for Q access */
273
338k
    osal_mutex_lock(pv_job_q_mutex_hdl_enc_grp);
274
275
    /* Get the next */
276
338k
    pv_next = ps_job_queue_hdl->pv_next;
277
278
    /* Update the next by checking input dependency */
279
338k
    if(NULL != pv_next)
280
179k
    {
281
179k
        job_queue_t *ps_job_queue = (job_queue_t *)pv_next;
282
283
        /* check for input dependencies to be resolved            */
284
        /* this can be blocking or non blocking based on use case */
285
        /* if non blocking then the function returns NULL         */
286
287
179k
        if(1 == i4_blocking_mode)
288
179k
        {
289
179k
            while(ihevce_is_nonzero(ps_job_queue->au1_in_dep, MAX_IN_DEP));
290
291
            /* update the next job in the queue */
292
179k
            ps_job_queue_hdl->pv_next = ps_job_queue->pv_next;
293
179k
        }
294
0
        else
295
0
        {
296
            /* check for input dependency resolved */
297
0
            if(ihevce_is_nonzero(ps_job_queue->au1_in_dep, MAX_IN_DEP))
298
0
            {
299
                /* return null */
300
0
                pv_next = NULL;
301
0
            }
302
0
            else
303
0
            {
304
                /* update the next job in the queue */
305
0
                ps_job_queue_hdl->pv_next = ps_job_queue->pv_next;
306
0
            }
307
0
        }
308
179k
    }
309
310
    /* unlock the mutex */
311
338k
    osal_mutex_unlock(pv_job_q_mutex_hdl_enc_grp);
312
313
    /* Return */
314
338k
    return (pv_next);
315
316
338k
} /* End of get_next_job */
317
318
/**
319
*******************************************************************************
320
*
321
* @brief Set the output dependency to done state
322
*
323
* @par Description: same as brief
324
*
325
* @param[inout] pv_multi_thrd_ctxt
326
*  Pointer to Multi thread context
327
*
328
* @param[in] ps_curr_job
329
*  Current finished Job pointer
330
*
331
* @returns
332
*  None
333
*
334
* @remarks
335
*
336
*******************************************************************************
337
*/
338
void ihevce_pre_enc_grp_job_set_out_dep(
339
    void *pv_multi_thrd_ctxt, job_queue_t *ps_curr_job, WORD32 i4_ping_pong)
340
1.97M
{
341
    /* local vareiables */
342
1.97M
    WORD32 ctr;
343
1.97M
    multi_thrd_ctxt_t *ps_multi_thrd;
344
345
1.97M
    ps_multi_thrd = (multi_thrd_ctxt_t *)pv_multi_thrd_ctxt;
346
347
    /* loop over number output dependencies */
348
7.68M
    for(ctr = 0; ctr < ps_curr_job->i4_num_output_dep; ctr++)
349
5.71M
    {
350
5.71M
        UWORD8 *pu1_ptr;
351
352
5.71M
        pu1_ptr = (UWORD8 *)ps_multi_thrd->aps_job_q_pre_enc[i4_ping_pong];
353
5.71M
        pu1_ptr += ps_curr_job->au4_out_ofsts[ctr];
354
5.71M
        *pu1_ptr = 0;
355
5.71M
    }
356
357
1.97M
    return;
358
1.97M
}
359
360
/**
361
*******************************************************************************
362
*
363
* @brief Set the output dependency to done state
364
*
365
* @par Description: same as brief
366
*
367
* @param[inout] pv_multi_thrd_ctxt
368
*  Pointer to Multi thread context
369
*
370
* @param[in] ps_curr_job
371
*   Current finished Job pointer
372
*
373
* @returns
374
*  None
375
*
376
* @remarks
377
*
378
*******************************************************************************
379
*/
380
void ihevce_enc_grp_job_set_out_dep(
381
    void *pv_multi_thrd_ctxt, job_queue_t *ps_curr_job, WORD32 i4_curr_frm_id)
382
179k
{
383
    /* local vareiables */
384
179k
    WORD32 ctr;
385
179k
    UWORD8 *pu1_ptr;
386
179k
    multi_thrd_ctxt_t *ps_multi_thrd;
387
388
179k
    ps_multi_thrd = (multi_thrd_ctxt_t *)pv_multi_thrd_ctxt;
389
390
179k
    if(ME_JOB_ENC_LYR == ps_curr_job->i4_task_type)
391
68.9k
    {
392
68.9k
        pu1_ptr = (UWORD8 *)ps_multi_thrd->aps_cur_out_me_prms[i4_curr_frm_id]->ps_job_q_enc;
393
68.9k
    }
394
110k
    else
395
110k
    {
396
110k
        pu1_ptr = (UWORD8 *)ps_multi_thrd->aps_cur_inp_enc_prms[i4_curr_frm_id]->ps_job_q_enc;
397
110k
    }
398
399
    /* loop over number output dependencies */
400
248k
    for(ctr = 0; ctr < ps_curr_job->i4_num_output_dep; ctr++)
401
68.9k
    {
402
68.9k
        WORD32 i4_off;
403
68.9k
        i4_off = ps_curr_job->au4_out_ofsts[ctr];
404
68.9k
        pu1_ptr[i4_off] = 0;
405
68.9k
    }
406
407
179k
    return;
408
179k
}
409
410
/**
411
*******************************************************************************
412
*
413
* @brief Function prepares the Job Queues for all the passes of encoder
414
*
415
* @par Description: Based on picture type sets the input and output dependency
416
*
417
* @param[inout] pv_enc_ctxt
418
*  Pointer to encoder context
419
*
420
* @param[in] ps_curr_inp
421
*  Current Input buffer pointer
422
*
423
* @returns
424
*  None
425
*
426
* @remarks
427
*
428
*******************************************************************************
429
*/
430
void ihevce_prepare_job_queue(
431
    void *pv_enc_ctxt, ihevce_lap_enc_buf_t *ps_curr_inp, WORD32 i4_curr_frm_id)
432
95.3k
{
433
    /* local variables */
434
95.3k
    enc_ctxt_t *ps_ctxt;
435
95.3k
    job_queue_t *ps_me_job_queue_lyr0;
436
95.3k
    job_queue_t *ps_enc_loop_job_queue;
437
95.3k
    WORD32 pass;
438
95.3k
    WORD32 num_jobs, col_tile_ctr;
439
95.3k
    WORD32 num_ctb_vert_rows;
440
95.3k
    WORD32 i4_pic_type;
441
95.3k
    WORD32 i;  //counter for bitrate
442
95.3k
    WORD32 i4_num_bitrate_instances;
443
95.3k
    WORD32 i4_num_tile_col;
444
445
    /* derive local varaibles */
446
95.3k
    ps_ctxt = (enc_ctxt_t *)pv_enc_ctxt;
447
95.3k
    num_ctb_vert_rows = ps_ctxt->s_frm_ctb_prms.i4_num_ctbs_vert;
448
95.3k
    i4_num_bitrate_instances = ps_ctxt->i4_num_bitrates;
449
450
95.3k
    i4_num_tile_col = 1;
451
95.3k
    if(1 == ps_ctxt->ps_tile_params_base->i4_tiles_enabled_flag)
452
0
    {
453
0
        i4_num_tile_col = ps_ctxt->ps_tile_params_base->i4_num_tile_cols;
454
0
    }
455
    /* memset the entire job que buffer to zero */
456
95.3k
    memset(
457
95.3k
        ps_ctxt->s_multi_thrd.aps_cur_out_me_prms[i4_curr_frm_id]->ps_job_q_enc,
458
95.3k
        0,
459
95.3k
        MAX_NUM_VERT_UNITS_FRM * NUM_ENC_JOBS_QUES * i4_num_tile_col * sizeof(job_queue_t));
460
461
    /* get the start address of  Job queues */
462
95.3k
    ps_me_job_queue_lyr0 = ps_ctxt->s_multi_thrd.aps_cur_out_me_prms[i4_curr_frm_id]->ps_job_q_enc;
463
95.3k
    ps_enc_loop_job_queue = ps_me_job_queue_lyr0 + (i4_num_tile_col * MAX_NUM_VERT_UNITS_FRM);
464
465
    /* store the JOB queue in the Job handle */
466
95.3k
    ps_ctxt->s_multi_thrd.aps_cur_out_me_prms[i4_curr_frm_id]
467
95.3k
        ->as_job_que_enc_hdls[ME_JOB_ENC_LYR]
468
95.3k
        .pv_next = (void *)ps_me_job_queue_lyr0;
469
    /* store the JOB queue in the Job handle for reenc */
470
95.3k
    ps_ctxt->s_multi_thrd.aps_cur_out_me_prms[i4_curr_frm_id]
471
95.3k
        ->as_job_que_enc_hdls_reenc[ME_JOB_ENC_LYR]
472
95.3k
        .pv_next = (void *)ps_me_job_queue_lyr0;
473
474
190k
    for(i = 0; i < i4_num_bitrate_instances; i++)
475
95.3k
    {
476
95.3k
        ps_ctxt->s_multi_thrd.aps_cur_out_me_prms[i4_curr_frm_id]
477
95.3k
            ->as_job_que_enc_hdls[ENC_LOOP_JOB + i]
478
95.3k
            .pv_next = (void *)ps_enc_loop_job_queue;
479
95.3k
        ps_ctxt->s_multi_thrd.aps_cur_out_me_prms[i4_curr_frm_id]
480
95.3k
            ->as_job_que_enc_hdls_reenc[ENC_LOOP_JOB + i]
481
95.3k
            .pv_next = (void *)ps_enc_loop_job_queue;
482
95.3k
        ps_enc_loop_job_queue += (i4_num_tile_col * MAX_NUM_VERT_UNITS_FRM);
483
95.3k
    }
484
485
95.3k
    i4_pic_type = ps_curr_inp->s_lap_out.i4_pic_type;
486
487
    //prepare ME JOB queue first
488
    //for(pass = 0; pass < NUM_ENC_JOBS_QUES; pass++)
489
95.3k
    {
490
95.3k
        job_queue_t *ps_job_queue_curr;
491
95.3k
        job_queue_t *ps_job_queue_next;
492
95.3k
        WORD32 ctr;
493
95.3k
        WORD32 inp_dep;
494
95.3k
        WORD32 out_dep;
495
95.3k
        WORD32 num_vert_units;
496
95.3k
        HEVCE_ENC_JOB_TYPES_T task_type;
497
498
95.3k
        pass = 0;  //= ENC_LOOP_JOB
499
500
95.3k
        {
501
            /* num_ver_units of finest layer is stored at (num_hme_lyrs - 1)th index */
502
95.3k
            num_vert_units = num_ctb_vert_rows;
503
95.3k
            task_type = ME_JOB_ENC_LYR;
504
95.3k
            ps_job_queue_curr = ps_me_job_queue_lyr0;
505
95.3k
            ps_job_queue_next =
506
95.3k
                (job_queue_t *)ps_ctxt->s_multi_thrd.aps_cur_out_me_prms[i4_curr_frm_id]
507
95.3k
                    ->as_job_que_enc_hdls[ENC_LOOP_JOB]
508
95.3k
                    .pv_next;
509
95.3k
            inp_dep = 0;
510
95.3k
            out_dep = 1;  //set reference bit-rate's input dependency
511
95.3k
        }
512
513
95.3k
        if((ME_JOB_ENC_LYR == pass) &&
514
95.3k
           ((IV_I_FRAME == i4_pic_type) || (IV_IDR_FRAME == i4_pic_type)) && !L0ME_IN_OPENLOOP_MODE)
515
31.8k
        {
516
            //continue;
517
31.8k
        }
518
63.5k
        else
519
63.5k
        {
520
            /* loop over all the vertical rows */
521
132k
            for(num_jobs = 0; num_jobs < num_vert_units; num_jobs++)
522
68.9k
            {
523
                /* loop over all the column tiles */
524
137k
                for(col_tile_ctr = 0; col_tile_ctr < i4_num_tile_col; col_tile_ctr++)
525
68.9k
                {
526
68.9k
                    ULWORD64 u8_temp;
527
528
68.9k
                    {
529
68.9k
                        ps_job_queue_curr->s_job_info.s_me_job_info.i4_vert_unit_row_no = num_jobs;
530
68.9k
                        ps_job_queue_curr->s_job_info.s_me_job_info.i4_tile_col_idx = col_tile_ctr;
531
68.9k
                    }
532
533
68.9k
                    ps_job_queue_curr->pv_next = (void *)(ps_job_queue_curr + 1);
534
535
68.9k
                    ps_job_queue_curr->i4_task_type = task_type;
536
537
68.9k
                    ps_job_queue_curr->i4_num_input_dep = inp_dep;
538
539
                    /* set the entire input dep buffer to default value 0 */
540
68.9k
                    memset(&ps_job_queue_curr->au1_in_dep[0], 0, sizeof(UWORD8) * MAX_IN_DEP);
541
542
                    /* set the input dep buffer to 1 for num inp dep */
543
68.9k
                    if(0 != inp_dep)
544
0
                    {
545
0
                        memset(&ps_job_queue_curr->au1_in_dep[0], 1, sizeof(UWORD8) * inp_dep);
546
0
                    }
547
548
68.9k
                    ps_job_queue_curr->i4_num_output_dep = out_dep;
549
550
                    /* set the entire offset buffer to default value */
551
68.9k
                    memset(
552
68.9k
                        &ps_job_queue_curr->au4_out_ofsts[0], 0xFF, sizeof(UWORD32) * MAX_OUT_DEP);
553
554
137k
                    for(ctr = 0; ctr < out_dep; ctr++)
555
68.9k
                    {
556
                        /* col tile level dependency b/w ME & EncLoop */
557
68.9k
                        u8_temp = (ULWORD64)(
558
68.9k
                            &ps_job_queue_next[num_jobs * i4_num_tile_col + col_tile_ctr] -
559
68.9k
                            ps_ctxt->s_multi_thrd.aps_cur_out_me_prms[i4_curr_frm_id]->ps_job_q_enc);
560
561
68.9k
                        u8_temp *= sizeof(job_queue_t);
562
563
                        /* store the offset to the array */
564
68.9k
                        ps_job_queue_curr->au4_out_ofsts[ctr] = (UWORD32)u8_temp;
565
68.9k
                    }
566
567
68.9k
                    ps_job_queue_curr++;
568
68.9k
                }
569
68.9k
            }  //for ends
570
571
            /* set the last pointer to NULL */
572
63.5k
            ps_job_queue_curr--;
573
63.5k
            ps_job_queue_curr->pv_next = (void *)NULL;
574
63.5k
        }  //else ends
575
95.3k
    }
576
577
    //prepare Enc_loop JOB queue for all bitrate instances
578
    //for(pass = 0; pass < NUM_ENC_JOBS_QUES; pass++)
579
190k
    for(i = 0; i < i4_num_bitrate_instances; i++)
580
95.3k
    {
581
95.3k
        job_queue_t *ps_job_queue_curr;
582
95.3k
        job_queue_t *ps_job_queue_next;
583
95.3k
        WORD32 ctr;
584
95.3k
        WORD32 inp_dep;
585
95.3k
        WORD32 out_dep;
586
95.3k
        WORD32 num_vert_units;
587
95.3k
        HEVCE_ENC_JOB_TYPES_T task_type;
588
589
        /* In case of I or IDR pictures ME will not perform any processing */
590
        //if(ENC_LOOP_JOB == pass)
591
95.3k
        {
592
95.3k
            if(((IV_I_FRAME == i4_pic_type) || (IV_IDR_FRAME == i4_pic_type)) &&
593
0
               !L0ME_IN_OPENLOOP_MODE)
594
31.8k
            {
595
31.8k
                inp_dep = 0;
596
31.8k
            }
597
63.5k
            else
598
63.5k
            {
599
63.5k
                inp_dep = 1;
600
63.5k
            }
601
602
95.3k
            task_type = (HEVCE_ENC_JOB_TYPES_T)(ENC_LOOP_JOB + i);
603
95.3k
            ps_job_queue_curr =
604
95.3k
                (job_queue_t *)ps_ctxt->s_multi_thrd.aps_cur_out_me_prms[i4_curr_frm_id]
605
95.3k
                    ->as_job_que_enc_hdls[ENC_LOOP_JOB + i]
606
95.3k
                    .pv_next;
607
95.3k
            ps_job_queue_next =
608
95.3k
                (job_queue_t *)ps_ctxt->s_multi_thrd.aps_cur_out_me_prms[i4_curr_frm_id]
609
95.3k
                    ->as_job_que_enc_hdls[ENC_LOOP_JOB + i + 1]
610
95.3k
                    .pv_next;
611
95.3k
            out_dep = 1;  //output dependecny is the next bit-rate instance's input dependency
612
95.3k
            num_vert_units = num_ctb_vert_rows;
613
614
95.3k
            if(i == i4_num_bitrate_instances - 1)  //for last bit-rate instance
615
95.3k
            {
616
                //clear output dependency
617
95.3k
                ps_job_queue_next = NULL;
618
95.3k
                out_dep = 0;
619
95.3k
            }
620
95.3k
        }
621
622
        /* loop over all the vertical rows */
623
205k
        for(num_jobs = 0; num_jobs < num_vert_units; num_jobs++)
624
110k
        {
625
            /* loop over all the column tiles */
626
220k
            for(col_tile_ctr = 0; col_tile_ctr < i4_num_tile_col; col_tile_ctr++)
627
110k
            {
628
110k
                ULWORD64 u8_temp;
629
630
110k
                {
631
110k
                    ps_job_queue_curr->s_job_info.s_enc_loop_job_info.i4_ctb_row_no = num_jobs;
632
110k
                    ps_job_queue_curr->s_job_info.s_enc_loop_job_info.i4_tile_col_idx =
633
110k
                        col_tile_ctr;
634
110k
                    ps_job_queue_curr->s_job_info.s_enc_loop_job_info.i4_bitrate_instance_no = i;
635
110k
                }
636
637
110k
                ps_job_queue_curr->pv_next = (void *)(ps_job_queue_curr + 1);
638
639
110k
                ps_job_queue_curr->i4_task_type = task_type;
640
641
110k
                ps_job_queue_curr->i4_num_input_dep = inp_dep;
642
643
                /* set the entire input dep buffer to default value 0 */
644
110k
                memset(&ps_job_queue_curr->au1_in_dep[0], 0, sizeof(UWORD8) * MAX_IN_DEP);
645
646
                /* set the input dep buffer to 1 for num inp dep */
647
110k
                if(0 != inp_dep)
648
68.9k
                {
649
68.9k
                    memset(&ps_job_queue_curr->au1_in_dep[0], 1, sizeof(UWORD8) * inp_dep);
650
68.9k
                }
651
652
110k
                ps_job_queue_curr->i4_num_output_dep = out_dep;
653
654
                /* set the entire offset buffer to default value */
655
110k
                memset(&ps_job_queue_curr->au4_out_ofsts[0], 0xFF, sizeof(UWORD32) * MAX_OUT_DEP);
656
657
110k
                for(ctr = 0; ctr < out_dep; ctr++)
658
0
                {
659
                    /* col tile level dependency b/w EncLoops of MBR */
660
0
                    u8_temp = (ULWORD64)(
661
0
                        &ps_job_queue_next[num_jobs * i4_num_tile_col + col_tile_ctr] -
662
0
                        ps_ctxt->s_multi_thrd.aps_cur_out_me_prms[i4_curr_frm_id]->ps_job_q_enc);
663
664
0
                    u8_temp *= sizeof(job_queue_t);
665
666
                    /* store the offset to the array */
667
0
                    ps_job_queue_curr->au4_out_ofsts[ctr] = (UWORD32)u8_temp;
668
0
                }
669
670
110k
                ps_job_queue_curr++;
671
110k
            }
672
110k
        }
673
674
        /* set the last pointer to NULL */
675
95.3k
        ps_job_queue_curr--;
676
95.3k
        ps_job_queue_curr->pv_next = (void *)NULL;
677
95.3k
    }
678
679
95.3k
    return;
680
681
95.3k
} /* End of ihevce_prepare_job_queue */
682
683
/**
684
*******************************************************************************
685
*
686
* @brief Function prepares the Job Queues for all the passes of pre enc
687
*
688
* @par Description: Based on picture type sets the input and output dependency
689
*
690
* @param[inout] pv_enc_ctxt
691
*  Pointer to encoder context
692
*
693
* @param[in] ps_curr_inp
694
*   Current Input buffer pointer
695
*
696
* @returns
697
*  None
698
*
699
* @remarks
700
*
701
*******************************************************************************
702
*/
703
void ihevce_prepare_pre_enc_job_queue(
704
    void *pv_enc_ctxt, ihevce_lap_enc_buf_t *ps_curr_inp, WORD32 i4_ping_pong)
705
95.3k
{
706
    /* local variables */
707
95.3k
    enc_ctxt_t *ps_ctxt;
708
95.3k
    job_queue_t *ps_decomp_job_queue_lyr0;
709
95.3k
    job_queue_t *ps_decomp_job_queue_lyr1;
710
95.3k
    job_queue_t *ps_decomp_job_queue_lyr2;
711
95.3k
    job_queue_t *ps_decomp_job_queue_lyr3;
712
95.3k
    job_queue_t *ps_me_job_queue_lyr1;
713
95.3k
    job_queue_t *ps_me_job_queue_lyr2;
714
95.3k
    job_queue_t *ps_me_job_queue_lyr3;
715
95.3k
    job_queue_t *ps_me_job_queue_lyr4;
716
95.3k
    job_queue_t *ps_ipe_job_queue;
717
95.3k
    job_queue_t *aps_me_job_queues[MAX_NUM_HME_LAYERS];
718
95.3k
    multi_thrd_me_job_q_prms_t *ps_me_job_q_prms;
719
95.3k
    WORD32 ai4_decomp_num_vert_units_lyr[MAX_NUM_HME_LAYERS];
720
95.3k
    WORD32 a14_decomp_lyr_unit_size[MAX_NUM_HME_LAYERS];
721
95.3k
    WORD32 layer_no;
722
95.3k
    WORD32 decomp_lyr_cnt;
723
95.3k
    WORD32 num_jobs;
724
95.3k
    WORD32 n_tot_layers;
725
95.3k
    WORD32 a_wd[MAX_NUM_HME_LAYERS];
726
95.3k
    WORD32 a_ht[MAX_NUM_HME_LAYERS];
727
95.3k
    WORD32 a_disp_wd[MAX_NUM_HME_LAYERS];
728
95.3k
    WORD32 a_disp_ht[MAX_NUM_HME_LAYERS];
729
95.3k
    WORD32 u4_log_ctb_size;
730
95.3k
    WORD32 num_ctb_vert_rows;
731
95.3k
    WORD32 pass;
732
95.3k
    WORD32 me_lyr_cnt;
733
95.3k
    WORD32 num_hme_lyrs;
734
95.3k
    WORD32 ai4_me_num_vert_units_lyr[MAX_NUM_HME_LAYERS];
735
95.3k
    WORD32 me_start_lyr_pass;
736
95.3k
    WORD32 ctb_size;
737
95.3k
    WORD32 me_coarsest_lyr_inp_dep = -1;
738
739
95.3k
    (void)ps_curr_inp;
740
    /* derive local varaibles */
741
95.3k
    ps_ctxt = (enc_ctxt_t *)pv_enc_ctxt;
742
95.3k
    num_ctb_vert_rows = ps_ctxt->s_frm_ctb_prms.i4_num_ctbs_vert;
743
744
    /* CHANGE REQUIRED: change the pointer to the job queue buffer */
745
    /* memset the entire job que buffer to zero */
746
95.3k
    memset(
747
95.3k
        ps_ctxt->s_multi_thrd.aps_job_q_pre_enc[i4_ping_pong],
748
95.3k
        0,
749
95.3k
        MAX_NUM_VERT_UNITS_FRM * NUM_PRE_ENC_JOBS_QUES * sizeof(job_queue_t));
750
751
    /* Get the number of vertical units in a layer from the resolution of the layer */
752
95.3k
    a_wd[0] = ps_ctxt->s_frm_ctb_prms.i4_cu_aligned_pic_wd;
753
95.3k
    a_ht[0] = ps_ctxt->s_frm_ctb_prms.i4_cu_aligned_pic_ht;
754
95.3k
    n_tot_layers = hme_derive_num_layers(1, a_wd, a_ht, a_disp_wd, a_disp_ht);
755
95.3k
    GETRANGE(u4_log_ctb_size, ps_ctxt->s_frm_ctb_prms.i4_ctb_size);
756
757
95.3k
    ASSERT(n_tot_layers >= 3);
758
759
    /*
760
    * Always force minimum layers as 4 so that we would have both l1 and l2
761
    * pre intra analysis
762
    */
763
95.3k
    if(n_tot_layers == 3)
764
93.9k
    {
765
93.9k
        n_tot_layers = 4;
766
93.9k
        a_wd[3] = CEIL16(a_wd[2] >> 1);
767
93.9k
        a_ht[3] = CEIL16(a_ht[2] >> 1);
768
93.9k
    }
769
770
476k
    for(layer_no = 0; layer_no < n_tot_layers; layer_no++)
771
381k
    {
772
381k
        ctb_size = 1 << (u4_log_ctb_size - 1 - layer_no);
773
381k
        ai4_decomp_num_vert_units_lyr[layer_no] = ((a_ht[layer_no] + ctb_size) & ~(ctb_size - 1)) >>
774
381k
                                                  (u4_log_ctb_size - 1 - layer_no);
775
381k
        a14_decomp_lyr_unit_size[layer_no] = 1 << (u4_log_ctb_size - 1 - layer_no);
776
381k
    }
777
778
    /* get the start address of  Job queues */
779
95.3k
    ps_decomp_job_queue_lyr0 = ps_ctxt->s_multi_thrd.aps_job_q_pre_enc[i4_ping_pong];
780
95.3k
    ps_decomp_job_queue_lyr1 = ps_decomp_job_queue_lyr0 + MAX_NUM_VERT_UNITS_FRM;
781
95.3k
    ps_decomp_job_queue_lyr2 = ps_decomp_job_queue_lyr1 + MAX_NUM_VERT_UNITS_FRM;
782
95.3k
    ps_decomp_job_queue_lyr3 = ps_decomp_job_queue_lyr2 + MAX_NUM_VERT_UNITS_FRM;
783
95.3k
    ps_me_job_queue_lyr4 = ps_decomp_job_queue_lyr3 + MAX_NUM_VERT_UNITS_FRM;
784
95.3k
    ps_me_job_queue_lyr3 = ps_me_job_queue_lyr4 + MAX_NUM_VERT_UNITS_FRM;
785
95.3k
    ps_me_job_queue_lyr2 = ps_me_job_queue_lyr3 + MAX_NUM_VERT_UNITS_FRM;
786
95.3k
    ps_me_job_queue_lyr1 = ps_me_job_queue_lyr2 + MAX_NUM_VERT_UNITS_FRM;
787
788
95.3k
    ps_ipe_job_queue = ps_me_job_queue_lyr1 + MAX_NUM_VERT_UNITS_FRM;
789
790
    /* store the JOB queue in the Job handle */
791
95.3k
    ps_ctxt->s_multi_thrd.as_job_que_preenc_hdls[i4_ping_pong][DECOMP_JOB_LYR0].pv_next =
792
95.3k
        (void *)ps_decomp_job_queue_lyr0;
793
95.3k
    ps_ctxt->s_multi_thrd.as_job_que_preenc_hdls[i4_ping_pong][DECOMP_JOB_LYR1].pv_next =
794
95.3k
        (void *)ps_decomp_job_queue_lyr1;
795
95.3k
    ps_ctxt->s_multi_thrd.as_job_que_preenc_hdls[i4_ping_pong][DECOMP_JOB_LYR2].pv_next =
796
95.3k
        (void *)ps_decomp_job_queue_lyr2;
797
95.3k
    ps_ctxt->s_multi_thrd.as_job_que_preenc_hdls[i4_ping_pong][DECOMP_JOB_LYR3].pv_next =
798
95.3k
        (void *)ps_decomp_job_queue_lyr3;
799
95.3k
    ps_ctxt->s_multi_thrd.as_job_que_preenc_hdls[i4_ping_pong][ME_JOB_LYR4].pv_next =
800
95.3k
        (void *)ps_me_job_queue_lyr4;
801
95.3k
    ps_ctxt->s_multi_thrd.as_job_que_preenc_hdls[i4_ping_pong][ME_JOB_LYR3].pv_next =
802
95.3k
        (void *)ps_me_job_queue_lyr3;
803
95.3k
    ps_ctxt->s_multi_thrd.as_job_que_preenc_hdls[i4_ping_pong][ME_JOB_LYR2].pv_next =
804
95.3k
        (void *)ps_me_job_queue_lyr2;
805
95.3k
    ps_ctxt->s_multi_thrd.as_job_que_preenc_hdls[i4_ping_pong][ME_JOB_LYR1].pv_next =
806
95.3k
        (void *)ps_me_job_queue_lyr1;
807
95.3k
    ps_ctxt->s_multi_thrd.as_job_que_preenc_hdls[i4_ping_pong][IPE_JOB_LYR0].pv_next =
808
95.3k
        (void *)ps_ipe_job_queue;
809
810
    /* store the ME Jobs que into array */
811
95.3k
    aps_me_job_queues[0] = NULL;
812
95.3k
    aps_me_job_queues[1] = ps_me_job_queue_lyr1;
813
95.3k
    aps_me_job_queues[2] = ps_me_job_queue_lyr2;
814
95.3k
    aps_me_job_queues[3] = ps_me_job_queue_lyr3;
815
95.3k
    aps_me_job_queues[4] = ps_me_job_queue_lyr4;
816
95.3k
    decomp_lyr_cnt = 0;
817
    /* Set the me_lyr_cnt to 0  */
818
95.3k
    me_lyr_cnt = 0;
819
820
    /* call the ME function which returns the layer properties */
821
95.3k
    ihevce_coarse_me_get_lyr_prms_job_que(
822
95.3k
        ps_ctxt->s_module_ctxt.pv_coarse_me_ctxt,
823
95.3k
        ps_curr_inp,
824
95.3k
        &num_hme_lyrs,
825
95.3k
        &ai4_me_num_vert_units_lyr[0],
826
95.3k
        &ps_ctxt->s_multi_thrd.as_me_job_q_prms[0][0]);
827
828
95.3k
    ps_me_job_q_prms = &ps_ctxt->s_multi_thrd.as_me_job_q_prms[0][0];
829
830
    /* derive ME coarsest layer tak type */
831
95.3k
    me_start_lyr_pass = ME_JOB_LYR4 + (MAX_NUM_HME_LAYERS - num_hme_lyrs);
832
833
95.3k
    ps_ctxt->s_multi_thrd.i4_me_coarsest_lyr_type = me_start_lyr_pass;
834
835
    /* coarsest HME layer number of units should be less than or equal to max in dep in Job queue */
836
    /* this constraint is to take care of Coarsest layer requring entire layer to do FULL search */
837
95.3k
    ASSERT(ai4_me_num_vert_units_lyr[0] <= MAX_IN_DEP);
838
    /* loop over all the passes in the encoder */
839
953k
    for(pass = 0; pass < NUM_PRE_ENC_JOBS_QUES; pass++)
840
858k
    {
841
858k
        job_queue_t *ps_pre_enc_job_queue_curr;
842
858k
        job_queue_t *ps_pre_enc_job_queue_next;
843
858k
        WORD32 inp_dep_pass;
844
858k
        WORD32 out_dep_pass;
845
858k
        WORD32 num_vert_units;
846
858k
        HEVCE_PRE_ENC_JOB_TYPES_T pre_enc_task_type;
847
858k
        HEVCE_ENC_JOB_TYPES_T enc_task_type;
848
858k
        WORD32 proc_valid_flag = 0;
849
850
        // num_vert_units = ai4_decomp_num_vert_units_lyr[decomp_lyr_cnt];
851
        /* Initializing the job queues for max no of rows among all the layers. And max would be for last layer*/
852
858k
        num_vert_units = ai4_decomp_num_vert_units_lyr[n_tot_layers - 1];
853
854
858k
        if(DECOMP_JOB_LYR0 == pass)
855
95.3k
        {
856
95.3k
            proc_valid_flag = 1;
857
95.3k
            pre_enc_task_type = DECOMP_JOB_LYR0;
858
95.3k
            enc_task_type = (HEVCE_ENC_JOB_TYPES_T)-1;
859
95.3k
            ps_pre_enc_job_queue_curr = ps_decomp_job_queue_lyr0;
860
861
95.3k
            inp_dep_pass = 0;
862
95.3k
            decomp_lyr_cnt++;
863
864
            /* If all the decomp layers are done next job queue will be ME job queue */
865
95.3k
            if(decomp_lyr_cnt == (n_tot_layers - 1))
866
0
            {
867
                /* Assumption : num_hme_lyrs > 1*/
868
0
                ps_pre_enc_job_queue_next = aps_me_job_queues[num_hme_lyrs - 1];
869
870
                /* ME coarsest layer is currently made dependent on entire decomp layer */
871
0
                out_dep_pass = ai4_me_num_vert_units_lyr[0];
872
0
                me_coarsest_lyr_inp_dep = num_vert_units;
873
0
            }
874
95.3k
            else
875
95.3k
            {
876
95.3k
                ps_pre_enc_job_queue_next = ps_decomp_job_queue_lyr1;
877
95.3k
                out_dep_pass = 3;
878
95.3k
            }
879
95.3k
        }
880
763k
        else if((DECOMP_JOB_LYR1 == pass) && (decomp_lyr_cnt != (n_tot_layers - 1)))
881
95.3k
        {
882
95.3k
            proc_valid_flag = 1;
883
95.3k
            pre_enc_task_type = DECOMP_JOB_LYR1;
884
95.3k
            enc_task_type = (HEVCE_ENC_JOB_TYPES_T)-1;
885
95.3k
            ps_pre_enc_job_queue_curr = ps_decomp_job_queue_lyr1;
886
887
95.3k
            inp_dep_pass = 3;
888
95.3k
            decomp_lyr_cnt++;
889
890
            /* If all the decomp layers are done next job queue will be ME job queue */
891
95.3k
            if(decomp_lyr_cnt == (n_tot_layers - 1))
892
0
            {
893
                /* Assumption : num_hme_lyrs > 1*/
894
0
                ps_pre_enc_job_queue_next = aps_me_job_queues[num_hme_lyrs - 1];
895
896
                /* ME coarsest layer is currently made dependent on entire decomp layer */
897
0
                out_dep_pass = ai4_me_num_vert_units_lyr[0];
898
0
                me_coarsest_lyr_inp_dep = num_vert_units;
899
0
            }
900
95.3k
            else
901
95.3k
            {
902
95.3k
                ps_pre_enc_job_queue_next = ps_decomp_job_queue_lyr2;
903
95.3k
                out_dep_pass = 3;
904
95.3k
            }
905
95.3k
        }
906
667k
        else if((DECOMP_JOB_LYR2 == pass) && (decomp_lyr_cnt != (n_tot_layers - 1)))
907
95.3k
        {
908
95.3k
            proc_valid_flag = 1;
909
95.3k
            pre_enc_task_type = DECOMP_JOB_LYR2;
910
95.3k
            enc_task_type = (HEVCE_ENC_JOB_TYPES_T)-1;
911
95.3k
            ps_pre_enc_job_queue_curr = ps_decomp_job_queue_lyr2;
912
913
95.3k
            inp_dep_pass = 3;
914
95.3k
            decomp_lyr_cnt++;
915
916
            /* If all the decomp layers are done next job queue will be ME job queue */
917
95.3k
            if(decomp_lyr_cnt == (n_tot_layers - 1))
918
95.3k
            {
919
                /* Assumption : num_hme_lyrs > 1*/
920
95.3k
                ps_pre_enc_job_queue_next = aps_me_job_queues[num_hme_lyrs - 1];
921
922
                /* ME coarsest layer is currently made dependent on entire decomp layer */
923
95.3k
                out_dep_pass = ai4_me_num_vert_units_lyr[0];
924
95.3k
                me_coarsest_lyr_inp_dep = num_vert_units;
925
95.3k
            }
926
0
            else
927
0
            {
928
                /* right now MAX 4 layers worth of JOB queues are prepared */
929
0
                ASSERT(0);
930
0
            }
931
95.3k
        }
932
933
572k
        else if(IPE_JOB_LYR0 == pass)
934
95.3k
        {
935
95.3k
            proc_valid_flag = 1;
936
95.3k
            pre_enc_task_type = IPE_JOB_LYR0;
937
95.3k
            enc_task_type = (HEVCE_ENC_JOB_TYPES_T)-1;
938
95.3k
            ps_pre_enc_job_queue_curr = ps_ipe_job_queue;
939
95.3k
            ps_pre_enc_job_queue_next = NULL;
940
95.3k
            num_vert_units = num_ctb_vert_rows;
941
95.3k
        }
942
476k
        else if(((pass >= ME_JOB_LYR4) && (pass <= ME_JOB_LYR1)) && (pass >= me_start_lyr_pass))
943
192k
        {
944
            /* num_ver_units of coarsest layer is stored at 0th index */
945
192k
            num_vert_units = ai4_me_num_vert_units_lyr[me_lyr_cnt];
946
192k
            proc_valid_flag = 1;
947
948
192k
            pre_enc_task_type =
949
192k
                (HEVCE_PRE_ENC_JOB_TYPES_T)((WORD32)ME_JOB_LYR1 - (num_hme_lyrs - me_lyr_cnt - 2));
950
951
192k
            enc_task_type = (HEVCE_ENC_JOB_TYPES_T)-1;
952
953
            /* Assumption : num_hme_lyrs > 1*/
954
192k
            ps_pre_enc_job_queue_curr = aps_me_job_queues[num_hme_lyrs - me_lyr_cnt - 1];
955
956
192k
            if(me_lyr_cnt == (num_hme_lyrs - 2))
957
95.3k
            {
958
95.3k
                ps_pre_enc_job_queue_next = ps_ipe_job_queue;
959
95.3k
            }
960
96.8k
            else
961
96.8k
            {
962
96.8k
                ps_pre_enc_job_queue_next = aps_me_job_queues[num_hme_lyrs - me_lyr_cnt - 2];
963
96.8k
            }
964
192k
            me_lyr_cnt++;
965
192k
        }
966
967
        /* check for valid processing flag */
968
858k
        if(0 == proc_valid_flag)
969
284k
        {
970
284k
            continue;
971
284k
        }
972
973
        /* in the loop ps_me_job_q_prms get incremented for every row */
974
        /* so at the end of one layer the pointer will be correctly   */
975
        /* pointing to the start of next layer                        */
976
977
        /* loop over all the vertical rows */
978
2.54M
        for(num_jobs = 0; num_jobs < num_vert_units; num_jobs++)
979
1.97M
        {
980
1.97M
            ULWORD64 u8_temp;
981
1.97M
            WORD32 inp_dep = 0;
982
1.97M
            WORD32 out_dep = 0;
983
1.97M
            WORD32 ctr;
984
1.97M
            WORD32 job_off_ipe;
985
986
1.97M
            if(IPE_JOB_LYR0 == pass)
987
110k
            {
988
110k
                ps_pre_enc_job_queue_curr->s_job_info.s_ipe_job_info.i4_ctb_row_no = num_jobs;
989
110k
                inp_dep = ps_me_job_q_prms->i4_num_inp_dep;
990
110k
                out_dep = 0;
991
110k
            }
992
1.86M
            else if((pass >= DECOMP_JOB_LYR0) && (pass <= DECOMP_JOB_LYR3))
993
889k
            {
994
889k
                ps_pre_enc_job_queue_curr->s_job_info.s_decomp_job_info.i4_vert_unit_row_no =
995
889k
                    num_jobs;
996
997
                /* Input and output dependencies of 1st row and last row is 1 less than other rows*/
998
889k
                inp_dep = inp_dep_pass;
999
889k
                out_dep = out_dep_pass;
1000
1001
889k
                if(pass != DECOMP_JOB_LYR0)
1002
592k
                {
1003
592k
                    if(((num_jobs == 0) || (num_jobs == num_vert_units - 1)))
1004
381k
                    {
1005
381k
                        inp_dep = inp_dep_pass - 1;
1006
381k
                    }
1007
592k
                }
1008
1009
889k
                if(pass != (DECOMP_JOB_LYR0 + n_tot_layers - 2))
1010
592k
                {
1011
592k
                    if(((num_jobs == 0) || (num_jobs == num_vert_units - 1)))
1012
381k
                    {
1013
381k
                        out_dep = out_dep_pass - 1;
1014
381k
                    }
1015
592k
                }
1016
889k
            }
1017
973k
            else /* remaining all are ME JOBS */
1018
973k
            {
1019
973k
                ps_pre_enc_job_queue_curr->s_job_info.s_me_job_info.i4_vert_unit_row_no = num_jobs;
1020
1021
973k
                if(pass == me_start_lyr_pass)
1022
522k
                {
1023
522k
                    ASSERT(me_coarsest_lyr_inp_dep != -1);
1024
522k
                    inp_dep = me_coarsest_lyr_inp_dep;
1025
522k
                }
1026
450k
                else
1027
450k
                {
1028
450k
                    inp_dep = ps_me_job_q_prms->i4_num_inp_dep;
1029
450k
                }
1030
973k
                out_dep = ps_me_job_q_prms->i4_num_output_dep;
1031
973k
            }
1032
1.97M
            ps_pre_enc_job_queue_curr->pv_next = (void *)(ps_pre_enc_job_queue_curr + 1);
1033
1034
1.97M
            ps_pre_enc_job_queue_curr->i4_pre_enc_task_type = pre_enc_task_type;
1035
1.97M
            ps_pre_enc_job_queue_curr->i4_task_type = enc_task_type;
1036
1037
            /* Set the input dependencies */
1038
1.97M
            ps_pre_enc_job_queue_curr->i4_num_input_dep = inp_dep;
1039
1040
            /* set the entire input dep buffer to default value 0 */
1041
1.97M
            memset(&ps_pre_enc_job_queue_curr->au1_in_dep[0], 0, sizeof(UWORD8) * MAX_IN_DEP);
1042
1043
            /* set the input dep buffer to 1 for num inp dep */
1044
1.97M
            if(0 != inp_dep)
1045
1.67M
            {
1046
1.67M
                memset(&ps_pre_enc_job_queue_curr->au1_in_dep[0], 1, sizeof(UWORD8) * inp_dep);
1047
1.67M
            }
1048
1049
            /* If decomposition layer ends at this pass the no of out dependencies
1050
            * will be based on number of vertical units in the coarsets layer of HME
1051
            * This is because the search range in coarsest layer will be almost
1052
            * entire frame (search range of +-128 in vert direction is max supported
1053
            */
1054
1.97M
            if(pass == (DECOMP_JOB_LYR0 + n_tot_layers - 2))
1055
296k
            {
1056
296k
                job_off_ipe = 0;
1057
296k
            }
1058
1.67M
            else
1059
1.67M
            {
1060
1.67M
                if(num_jobs == 0)
1061
478k
                    job_off_ipe = num_jobs;
1062
1063
1.19M
                else
1064
1.19M
                    job_off_ipe = num_jobs - 1;
1065
1.67M
            }
1066
1067
            /* Set the offsets of output dependencies */
1068
1.97M
            ps_pre_enc_job_queue_curr->i4_num_output_dep = out_dep;
1069
1070
            /* set the entire offset buffer to default value */
1071
1.97M
            memset(
1072
1.97M
                &ps_pre_enc_job_queue_curr->au4_out_ofsts[0], 0xFF, sizeof(UWORD32) * MAX_OUT_DEP);
1073
1074
7.68M
            for(ctr = 0; ctr < out_dep; ctr++)
1075
5.71M
            {
1076
                /* if IPE or DECOMP loop the dep is 1 to 1*/
1077
5.71M
                if(((pass >= DECOMP_JOB_LYR0) && (pass <= DECOMP_JOB_LYR3)) ||
1078
2.40M
                   (IPE_JOB_LYR0 == pass))
1079
3.31M
                {
1080
3.31M
                    u8_temp = (ULWORD64)(
1081
3.31M
                        &ps_pre_enc_job_queue_next[job_off_ipe] -
1082
3.31M
                        ps_ctxt->s_multi_thrd.aps_job_q_pre_enc[i4_ping_pong]);
1083
1084
3.31M
                    u8_temp *= sizeof(job_queue_t);
1085
1086
                    /* add the excat inp dep byte for the next layer JOB */
1087
3.31M
                    u8_temp += ps_pre_enc_job_queue_next[job_off_ipe].i4_num_input_dep;
1088
1089
                    /* increment the inp dep number for a given job */
1090
3.31M
                    ps_pre_enc_job_queue_next[job_off_ipe].i4_num_input_dep++;
1091
1092
3.31M
                    job_off_ipe++;
1093
3.31M
                }
1094
2.40M
                else if((pass >= ME_JOB_LYR4) && (pass <= ME_JOB_LYR1))
1095
2.40M
                {
1096
                    /* ME layer Jobs */
1097
2.40M
                    WORD32 job_off;
1098
1099
2.40M
                    job_off = ps_me_job_q_prms->ai4_out_dep_unit_off[ctr];
1100
1101
2.40M
                    u8_temp = (ULWORD64)(
1102
2.40M
                        &ps_pre_enc_job_queue_next[job_off] -
1103
2.40M
                        ps_ctxt->s_multi_thrd.aps_job_q_pre_enc[i4_ping_pong]);
1104
1105
2.40M
                    u8_temp *= sizeof(job_queue_t);
1106
1107
                    /* add the excat inp dep byte for the next layer JOB */
1108
2.40M
                    u8_temp += ps_pre_enc_job_queue_next[job_off].i4_num_input_dep;
1109
1110
                    /* increment the inp dep number for a given job */
1111
2.40M
                    ps_pre_enc_job_queue_next[job_off].i4_num_input_dep++;
1112
2.40M
                }
1113
                /* store the offset to the array */
1114
5.71M
                ps_pre_enc_job_queue_curr->au4_out_ofsts[ctr] = (UWORD32)u8_temp;
1115
5.71M
            }
1116
            /* ME job q params is incremented only for ME jobs */
1117
1.97M
            if(((pass >= ME_JOB_LYR4) && (pass <= ME_JOB_LYR1)) || (IPE_JOB_LYR0 == pass))
1118
1.08M
            {
1119
1.08M
                ps_me_job_q_prms++;
1120
1.08M
            }
1121
1.97M
            ps_pre_enc_job_queue_curr++;
1122
1.97M
        }
1123
1124
        /* set the last pointer to NULL */
1125
573k
        ps_pre_enc_job_queue_curr--;
1126
573k
        ps_pre_enc_job_queue_curr->pv_next = (void *)NULL;
1127
573k
    }
1128
1129
    /* reset the num ctb processed in every row  for IPE sync */
1130
95.3k
    memset(
1131
95.3k
        &ps_ctxt->s_multi_thrd.ai4_ctbs_in_row_proc_ipe_pass[0],
1132
95.3k
        0,
1133
95.3k
        (MAX_NUM_CTB_ROWS_FRM * sizeof(WORD32)));
1134
1135
95.3k
} /* End of ihevce_prepare_pre_enc_job_queue */