Coverage Report

Created: 2025-07-23 06:28

/src/libhevc/encoder/ihevce_me_pass.c
Line
Count
Source (jump to first uncovered line)
1
/******************************************************************************
2
 *
3
 * Copyright (C) 2018 The Android Open Source Project
4
 *
5
 * Licensed under the Apache License, Version 2.0 (the "License");
6
 * you may not use this file except in compliance with the License.
7
 * You may obtain a copy of the License at:
8
 *
9
 * http://www.apache.org/licenses/LICENSE-2.0
10
 *
11
 * Unless required by applicable law or agreed to in writing, software
12
 * distributed under the License is distributed on an "AS IS" BASIS,
13
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
 * See the License for the specific language governing permissions and
15
 * limitations under the License.
16
 *
17
 *****************************************************************************
18
 * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
19
*/
20
21
/*!
22
******************************************************************************
23
* \file ihevce_me_pass.c
24
*
25
* \brief
26
*    Converts the language of the encoder to language of me. This is an i/f
27
*    between the encoder style APIs and ME style APIs. This is basically
28
*    a memoryless glue layer.
29
*
30
* \date
31
*    22/10/2012
32
*
33
* \author
34
*    Ittiam
35
*
36
*
37
* List of Functions
38
*
39
*
40
******************************************************************************
41
*/
42
43
/*****************************************************************************/
44
/* File Includes                                                             */
45
/*****************************************************************************/
46
/* System include files */
47
#include <stdio.h>
48
#include <string.h>
49
#include <stdlib.h>
50
#include <assert.h>
51
#include <stdarg.h>
52
#include <math.h>
53
54
/* User include files */
55
#include "ihevc_typedefs.h"
56
#include "itt_video_api.h"
57
#include "ihevce_api.h"
58
59
#include "rc_cntrl_param.h"
60
#include "rc_frame_info_collector.h"
61
#include "rc_look_ahead_params.h"
62
63
#include "ihevc_debug.h"
64
#include "ihevc_defs.h"
65
#include "ihevc_structs.h"
66
#include "ihevc_platform_macros.h"
67
#include "ihevc_deblk.h"
68
#include "ihevc_itrans_recon.h"
69
#include "ihevc_chroma_itrans_recon.h"
70
#include "ihevc_chroma_intra_pred.h"
71
#include "ihevc_intra_pred.h"
72
#include "ihevc_inter_pred.h"
73
#include "ihevc_mem_fns.h"
74
#include "ihevc_padding.h"
75
#include "ihevc_weighted_pred.h"
76
#include "ihevc_sao.h"
77
#include "ihevc_resi_trans.h"
78
#include "ihevc_quant_iquant_ssd.h"
79
#include "ihevc_cabac_tables.h"
80
81
#include "ihevce_defs.h"
82
#include "ihevce_lap_enc_structs.h"
83
#include "ihevce_multi_thrd_structs.h"
84
#include "ihevce_me_common_defs.h"
85
#include "ihevce_had_satd.h"
86
#include "ihevce_error_codes.h"
87
#include "ihevce_bitstream.h"
88
#include "ihevce_cabac.h"
89
#include "ihevce_rdoq_macros.h"
90
#include "ihevce_function_selector.h"
91
#include "ihevce_enc_structs.h"
92
#include "ihevce_entropy_structs.h"
93
#include "ihevce_cmn_utils_instr_set_router.h"
94
#include "ihevce_enc_loop_structs.h"
95
#include "ihevce_inter_pred.h"
96
97
#include "hme_datatype.h"
98
#include "hme_interface.h"
99
#include "hme_common_defs.h"
100
#include "hme_defs.h"
101
#include "ihevce_me_instr_set_router.h"
102
#include "hme_utils.h"
103
#include "hme_coarse.h"
104
#include "hme_refine.h"
105
#include "hme_function_selector.h"
106
#include "ihevce_me_pass.h"
107
108
#include "cast_types.h"
109
#include "osal.h"
110
#include "osal_defaults.h"
111
112
/*****************************************************************************/
113
/* Macros                                                                    */
114
/*****************************************************************************/
115
116
/** orig simple five tap scaler */
117
#define FIVE_TAP_ORIG_SCALER 0
118
119
/** simple gaussian filter, blurs the image a bit */
120
#define SIMPLE_GAUSSIAN_SCALER 0
121
122
/** lanczos scaler gives sharper images           */
123
#define LANCZOS_SCALER 1
124
125
// Saturated addition z = x + y
126
// overflow condition: z<x or z<y
127
#define SATURATED_ADD(z, x, y)                                                                     \
128
    {                                                                                              \
129
        (z) = (x) + (y);                                                                           \
130
        if(((z) < (x)) || ((z) < (y)))                                                             \
131
            (z) = MAX_INTRA_COST_IPE;                                                              \
132
    }
133
134
#define SATURATED_SUB(z, x, y)                                                                     \
135
    {                                                                                              \
136
        (z) = (x) - (y);                                                                           \
137
        if((z) < 0) /*if (((z) > (x)) || ((z) > (y))) */                                           \
138
            (z) = 0;                                                                               \
139
    }
140
141
#if(FIVE_TAP_ORIG_SCALER + SIMPLE_GAUSSIAN_SCALER + LANCZOS_SCALER) > 1
142
#error "HME ERROR: Only one scaler can be enabled at a time"
143
#endif
144
145
/*****************************************************************************/
146
/* Function Definitions                                                      */
147
/*****************************************************************************/
148
149
/*!
150
******************************************************************************
151
* \if Function name : ihevce_me_get_num_mem_recs \endif
152
*
153
* \brief
154
*    Number of memory records are returned for ME module
155
*    Note : Include TOT MEM. req. for ME + TOT MEM. req. for Dep Mngr for L0 ME
156
*
157
* \return
158
*    Number of memory records
159
*
160
* \author
161
*  Ittiam
162
*
163
*****************************************************************************
164
*/
165
WORD32 ihevce_me_get_num_mem_recs(WORD32 i4_num_me_frm_pllel)
166
16.4k
{
167
16.4k
    WORD32 me_mem_recs = hme_enc_num_alloc(i4_num_me_frm_pllel);
168
169
16.4k
    return (me_mem_recs);
170
16.4k
}
171
172
void ihevce_derive_me_init_prms(
173
    ihevce_static_cfg_params_t *ps_init_prms,
174
    hme_init_prms_t *ps_hme_init_prms,
175
    S32 i4_num_proc_thrds,
176
    S32 i4_resolution_id)
177
32.8k
{
178
32.8k
    WORD32 i4_field_pic = ps_init_prms->s_src_prms.i4_field_pic;
179
32.8k
    WORD32 min_cu_size;
180
181
    /* max number of ref frames. This should be > ref frms sent any frm */
182
32.8k
    ps_hme_init_prms->max_num_ref = ((DEFAULT_MAX_REFERENCE_PICS) << i4_field_pic);
183
184
    /* get the min cu size from config params */
185
32.8k
    min_cu_size = ps_init_prms->s_config_prms.i4_min_log2_cu_size;
186
187
32.8k
    min_cu_size = 1 << min_cu_size;
188
189
    /* Width and height for the layer being encoded */
190
32.8k
    ps_hme_init_prms->a_wd[0] =
191
32.8k
        ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_width +
192
32.8k
        SET_CTB_ALIGN(
193
32.8k
            ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_width, min_cu_size);
194
195
32.8k
    ps_hme_init_prms->a_ht[0] =
196
32.8k
        ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_height +
197
32.8k
        SET_CTB_ALIGN(
198
32.8k
            ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_height, min_cu_size);
199
200
    /* we store 4 results in coarsest layer per blk. 8x4L, 8x4R, 4x8T, 4x8B */
201
32.8k
    ps_hme_init_prms->max_num_results_coarse = 4;
202
203
    /* Every refinement layer stores a max of 2 results per partition */
204
32.8k
    ps_hme_init_prms->max_num_results = 2;
205
206
    /* Assuming abt 4 layers for 1080p, we do explicit search across all ref */
207
    /* frames in all but final layer In final layer, it could be 1/2 */
208
32.8k
    ps_hme_init_prms->num_layers_explicit_search = 3;
209
210
    /* Populate the max_tr_depth for Inter */
211
32.8k
    ps_hme_init_prms->u1_max_tr_depth = ps_init_prms->s_config_prms.i4_max_tr_tree_depth_nI;
212
213
32.8k
    ps_hme_init_prms->log_ctb_size = ps_init_prms->s_config_prms.i4_max_log2_cu_size;
214
32.8k
    ASSERT(ps_hme_init_prms->log_ctb_size == 6);
215
216
    /* currently encoding only 1 layer */
217
32.8k
    ps_hme_init_prms->num_simulcast_layers = 1;
218
219
    /* this feature not yet supported */
220
32.8k
    ps_hme_init_prms->segment_higher_layers = 0;
221
222
    /* Allow 4x4 in refinement layers. Unconditionally enabled in coarse lyr */
223
    /* And not enabled in encode layers, this is just for intermediate refine*/
224
    /* layers, where it could be used for better accuracy of motion.         */
225
226
32.8k
#if !OLD_XTREME_SPEED
227
32.8k
    if((IHEVCE_QUALITY_P6 ==
228
32.8k
        ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_quality_preset) ||
229
32.8k
       (IHEVCE_QUALITY_P7 ==
230
29.3k
        ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_quality_preset) ||
231
32.8k
       (IHEVCE_QUALITY_P5 ==
232
27.8k
        ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_quality_preset) ||
233
32.8k
       (IHEVCE_QUALITY_P4 ==
234
25.6k
        ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_quality_preset))
235
9.70k
        ps_hme_init_prms->use_4x4 = 0;
236
23.1k
    else
237
23.1k
        ps_hme_init_prms->use_4x4 = 1;
238
#else
239
    ps_hme_init_prms->use_4x4 = 1;
240
#endif
241
242
32.8k
    ps_hme_init_prms->num_b_frms =
243
32.8k
        (1 << ps_init_prms->s_coding_tools_prms.i4_max_temporal_layers) - 1;
244
245
32.8k
    ps_hme_init_prms->i4_num_proc_thrds = i4_num_proc_thrds;
246
247
32.8k
    if(IHEVCE_QUALITY_P0 ==
248
32.8k
       ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_quality_preset)
249
15.2k
    {
250
15.2k
        ps_hme_init_prms->s_me_coding_tools.e_me_quality_presets = ME_PRISTINE_QUALITY;
251
15.2k
        ps_hme_init_prms->s_me_coding_tools.i4_num_steps_hpel_refine = 3;
252
15.2k
        ps_hme_init_prms->s_me_coding_tools.i4_num_steps_qpel_refine = 3;
253
15.2k
    }
254
17.6k
    else if(
255
17.6k
        IHEVCE_QUALITY_P2 ==
256
17.6k
        ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_quality_preset)
257
4.11k
    {
258
4.11k
        ps_hme_init_prms->s_me_coding_tools.e_me_quality_presets = ME_HIGH_QUALITY;
259
4.11k
        ps_hme_init_prms->s_me_coding_tools.i4_num_steps_hpel_refine = 3;
260
4.11k
        ps_hme_init_prms->s_me_coding_tools.i4_num_steps_qpel_refine = 3;
261
4.11k
    }
262
13.5k
    else if(
263
13.5k
        IHEVCE_QUALITY_P3 ==
264
13.5k
        ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_quality_preset)
265
3.83k
    {
266
3.83k
        ps_hme_init_prms->s_me_coding_tools.e_me_quality_presets = ME_MEDIUM_SPEED;
267
3.83k
        ps_hme_init_prms->s_me_coding_tools.i4_num_steps_hpel_refine = 2;
268
3.83k
        ps_hme_init_prms->s_me_coding_tools.i4_num_steps_qpel_refine = 2;
269
3.83k
    }
270
9.70k
    else if(
271
9.70k
        IHEVCE_QUALITY_P4 ==
272
9.70k
        ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_quality_preset)
273
2.46k
    {
274
2.46k
        ps_hme_init_prms->s_me_coding_tools.e_me_quality_presets = ME_HIGH_SPEED;
275
2.46k
        ps_hme_init_prms->s_me_coding_tools.i4_num_steps_hpel_refine = 1;
276
2.46k
        ps_hme_init_prms->s_me_coding_tools.i4_num_steps_qpel_refine = 1;
277
2.46k
    }
278
7.24k
    else if(
279
7.24k
        IHEVCE_QUALITY_P5 ==
280
7.24k
        ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_quality_preset)
281
2.18k
    {
282
2.18k
        ps_hme_init_prms->s_me_coding_tools.e_me_quality_presets = ME_XTREME_SPEED;
283
2.18k
        ps_hme_init_prms->s_me_coding_tools.i4_num_steps_hpel_refine = 1;
284
2.18k
        ps_hme_init_prms->s_me_coding_tools.i4_num_steps_qpel_refine = 1;
285
2.18k
    }
286
5.06k
    else if(
287
5.06k
        IHEVCE_QUALITY_P6 ==
288
5.06k
        ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_quality_preset)
289
3.49k
    {
290
3.49k
        ps_hme_init_prms->s_me_coding_tools.e_me_quality_presets = ME_XTREME_SPEED_25;
291
3.49k
        ps_hme_init_prms->s_me_coding_tools.i4_num_steps_hpel_refine = 1;
292
3.49k
        ps_hme_init_prms->s_me_coding_tools.i4_num_steps_qpel_refine = 1;
293
3.49k
    }
294
1.56k
    else if(
295
1.56k
        IHEVCE_QUALITY_P7 ==
296
1.56k
        ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_quality_preset)
297
1.56k
    {
298
1.56k
        ps_hme_init_prms->s_me_coding_tools.e_me_quality_presets = ME_XTREME_SPEED_25;
299
1.56k
        ps_hme_init_prms->s_me_coding_tools.i4_num_steps_hpel_refine = 1;
300
1.56k
        ps_hme_init_prms->s_me_coding_tools.i4_num_steps_qpel_refine = 0;
301
1.56k
    }
302
303
32.8k
    ps_hme_init_prms->s_me_coding_tools.u1_l0_me_controlled_via_cmd_line = 0;
304
305
    /* Register the search range params from static params */
306
32.8k
    ps_hme_init_prms->max_horz_search_range = ps_init_prms->s_config_prms.i4_max_search_range_horz;
307
32.8k
    ps_hme_init_prms->max_vert_search_range = ps_init_prms->s_config_prms.i4_max_search_range_vert;
308
32.8k
    ps_hme_init_prms->e_arch_type = ps_init_prms->e_arch_type;
309
32.8k
    ps_hme_init_prms->is_interlaced = (ps_init_prms->s_src_prms.i4_field_pic == IV_INTERLACED);
310
311
32.8k
    ps_hme_init_prms->u1_is_stasino_enabled =
312
32.8k
        ((ps_init_prms->s_coding_tools_prms.i4_vqet &
313
32.8k
          (1 << BITPOS_IN_VQ_TOGGLE_FOR_CONTROL_TOGGLER)) &&
314
32.8k
         (ps_init_prms->s_coding_tools_prms.i4_vqet &
315
0
          (1 << BITPOS_IN_VQ_TOGGLE_FOR_ENABLING_NOISE_PRESERVATION)));
316
32.8k
}
317
318
/*!
319
******************************************************************************
320
* \if Function name : ihevce_me_get_mem_recs \endif
321
*
322
* \brief
323
*    Memory requirements are returned for ME.
324
*
325
* \param[in,out]  ps_mem_tab : pointer to memory descriptors table
326
* \param[in] ps_init_prms : Create time static parameters
327
* \param[in] i4_num_proc_thrds : Number of processing threads for this module
328
* \param[in] i4_mem_space : memspace in whihc memory request should be done
329
*
330
* \return
331
*    Number of records
332
*
333
* \author
334
*  Ittiam
335
*
336
*****************************************************************************
337
*/
338
WORD32 ihevce_me_get_mem_recs(
339
    iv_mem_rec_t *ps_mem_tab,
340
    ihevce_static_cfg_params_t *ps_init_prms,
341
    WORD32 i4_num_proc_thrds,
342
    WORD32 i4_mem_space,
343
    WORD32 i4_resolution_id,
344
    WORD32 i4_num_me_frm_pllel)
345
8.21k
{
346
8.21k
    hme_memtab_t as_memtabs[MAX_HME_ENC_TOT_MEMTABS];
347
8.21k
    WORD32 n_tabs, i;
348
349
    /* Init prms structure specific to HME */
350
8.21k
    hme_init_prms_t s_hme_init_prms;
351
352
    /*************************************************************************/
353
    /* code flow: we call hme alloc function and then remap those memtabs    */
354
    /* to a different type of memtab structure.                              */
355
    /*************************************************************************/
356
8.21k
    if(i4_num_me_frm_pllel > 1)
357
0
    {
358
0
        ASSERT(MAX_HME_ENC_TOT_MEMTABS >= hme_enc_num_alloc(i4_num_me_frm_pllel));
359
0
    }
360
8.21k
    else
361
8.21k
    {
362
8.21k
        ASSERT(MIN_HME_ENC_TOT_MEMTABS >= hme_enc_num_alloc(i4_num_me_frm_pllel));
363
8.21k
    }
364
365
    /*************************************************************************/
366
    /* POPULATE THE HME INIT PRMS                                            */
367
    /*************************************************************************/
368
8.21k
    ihevce_derive_me_init_prms(ps_init_prms, &s_hme_init_prms, i4_num_proc_thrds, i4_resolution_id);
369
370
    /*************************************************************************/
371
    /* CALL THE ME FUNCTION TO GET MEMTABS                                   */
372
    /*************************************************************************/
373
8.21k
    n_tabs = hme_enc_alloc(&as_memtabs[0], &s_hme_init_prms, i4_num_me_frm_pllel);
374
8.21k
    ASSERT(n_tabs == hme_enc_num_alloc(i4_num_me_frm_pllel));
375
376
    /*************************************************************************/
377
    /* REMAP RESULTS TO ENCODER MEMTAB STRUCTURE                             */
378
    /*************************************************************************/
379
2.89M
    for(i = 0; i < n_tabs; i++)
380
2.88M
    {
381
2.88M
        ps_mem_tab[i].i4_mem_size = as_memtabs[i].size;
382
2.88M
        ps_mem_tab[i].i4_mem_alignment = as_memtabs[i].align;
383
2.88M
        ps_mem_tab[i].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space;
384
2.88M
        ps_mem_tab[i].i4_size = sizeof(iv_mem_rec_t);
385
2.88M
    }
386
387
    /*************************************************************************/
388
    /* --- L0 ME sync Dep Mngr Mem requests --                               */
389
    /*************************************************************************/
390
8.21k
    ps_mem_tab += n_tabs;
391
392
8.21k
    return (n_tabs);
393
8.21k
}
394
395
/*!
396
******************************************************************************
397
* \if Function name : ihevce_me_init \endif
398
*
399
* \brief
400
*    Intialization for ME context state structure .
401
*
402
* \param[in] ps_mem_tab : pointer to memory descriptors table
403
* \param[in] ps_init_prms : Create time static parameters
404
* \param[in] pv_osal_handle : Osal handle
405
*
406
* \return
407
*    Handle to the ME context
408
*
409
* \author
410
*  Ittiam
411
*
412
*****************************************************************************
413
*/
414
void *ihevce_me_init(
415
    iv_mem_rec_t *ps_mem_tab,
416
    ihevce_static_cfg_params_t *ps_init_prms,
417
    WORD32 i4_num_proc_thrds,
418
    void *pv_osal_handle,
419
    rc_quant_t *ps_rc_quant_ctxt,
420
    void *pv_tile_params_base,
421
    WORD32 i4_resolution_id,
422
    WORD32 i4_num_me_frm_pllel,
423
    UWORD8 u1_is_popcnt_available)
424
8.21k
{
425
    /* ME handle to be returned */
426
8.21k
    void *pv_me_ctxt;
427
8.21k
    WORD32 status;
428
8.21k
    me_master_ctxt_t *ps_me_ctxt;
429
8.21k
    IV_ARCH_T e_arch_type;
430
431
    /* Init prms structure specific to HME */
432
8.21k
    hme_init_prms_t s_hme_init_prms;
433
434
    /* memtabs to be passed to hme */
435
8.21k
    hme_memtab_t as_memtabs[MAX_HME_ENC_TOT_MEMTABS];
436
8.21k
    WORD32 n_tabs, i;
437
438
    /*************************************************************************/
439
    /* POPULATE THE HME INIT PRMS                                            */
440
    /*************************************************************************/
441
8.21k
    ihevce_derive_me_init_prms(ps_init_prms, &s_hme_init_prms, i4_num_proc_thrds, i4_resolution_id);
442
443
    /*************************************************************************/
444
    /* Ensure local declaration is sufficient                                */
445
    /*************************************************************************/
446
8.21k
    n_tabs = hme_enc_num_alloc(i4_num_me_frm_pllel);
447
448
8.21k
    if(i4_num_me_frm_pllel > 1)
449
0
    {
450
0
        ASSERT(MAX_HME_ENC_TOT_MEMTABS >= n_tabs);
451
0
    }
452
8.21k
    else
453
8.21k
    {
454
8.21k
        ASSERT(MIN_HME_ENC_TOT_MEMTABS >= n_tabs);
455
8.21k
    }
456
457
    /*************************************************************************/
458
    /* MAP RESULTS TO HME MEMTAB STRUCTURE                                   */
459
    /*************************************************************************/
460
2.89M
    for(i = 0; i < n_tabs; i++)
461
2.88M
    {
462
2.88M
        as_memtabs[i].size = ps_mem_tab[i].i4_mem_size;
463
2.88M
        as_memtabs[i].align = ps_mem_tab[i].i4_mem_alignment;
464
2.88M
        as_memtabs[i].pu1_mem = (U08 *)ps_mem_tab[i].pv_base;
465
2.88M
    }
466
    /*************************************************************************/
467
    /* CALL THE ME FUNCTION TO GET MEMTABS                                   */
468
    /*************************************************************************/
469
8.21k
    pv_me_ctxt = (void *)as_memtabs[0].pu1_mem;
470
8.21k
    ps_me_ctxt = (me_master_ctxt_t *)pv_me_ctxt;
471
    /* Store Tile params base into ME context */
472
8.21k
    ps_me_ctxt->pv_tile_params_base = pv_tile_params_base;
473
474
8.21k
    status = hme_enc_init(
475
8.21k
        pv_me_ctxt, &as_memtabs[0], &s_hme_init_prms, ps_rc_quant_ctxt, i4_num_me_frm_pllel);
476
477
8.21k
    if(status == -1)
478
0
        return NULL;
479
480
    /*************************************************************************/
481
    /* --- L0 ME sync Dep Mngr Mem init --                                     */
482
    /*************************************************************************/
483
    /* Update numer of ME frames running in parallel in me master context */
484
8.21k
    ps_me_ctxt->i4_num_me_frm_pllel = i4_num_me_frm_pllel;
485
486
8.21k
    e_arch_type = ps_init_prms->e_arch_type;
487
488
8.21k
    hme_init_function_ptr(ps_me_ctxt, e_arch_type);
489
490
8.21k
    ihevce_me_instr_set_router(
491
8.21k
        (ihevce_me_optimised_function_list_t *)ps_me_ctxt->pv_me_optimised_function_list,
492
8.21k
        e_arch_type);
493
494
8.21k
    ihevce_cmn_utils_instr_set_router(
495
8.21k
        &ps_me_ctxt->s_cmn_opt_func, u1_is_popcnt_available, e_arch_type);
496
497
8.21k
    ps_mem_tab += n_tabs;
498
499
8.21k
    return (pv_me_ctxt);
500
8.21k
}
501
502
/**
503
*******************************************************************************
504
* \if Function name : ihevce_me_set_resolution \endif
505
*
506
* \brief
507
*    Sets the resolution for ME state
508
*
509
* \par Description:
510
*    ME requires information of resolution to prime up its layer descriptors
511
*    and contexts. This API is called whenever a control call from application
512
*    causes a change of resolution. Has to be called once initially before
513
*    processing any frame. Again this is just a glue function and calls the
514
*    actual ME API for the same.
515
*
516
* \param[in,out] pv_me_ctxt: Handle to the ME context
517
* \param[in] n_enc_layers: Number of layers getting encoded
518
* \param[in] p_wd : Pointer containing widths of each layer getting encoded.
519
* \param[in] p_ht : Pointer containing heights of each layer getting encoded.
520
*
521
* \returns
522
*  none
523
*
524
* \author
525
*  Ittiam
526
*
527
*******************************************************************************
528
*/
529
void ihevce_me_set_resolution(void *pv_me_ctxt, WORD32 n_enc_layers, WORD32 *p_wd, WORD32 *p_ht)
530
8.21k
{
531
    /* local variables */
532
8.21k
    me_master_ctxt_t *ps_master_ctxt = (me_master_ctxt_t *)pv_me_ctxt;
533
8.21k
    WORD32 thrds;
534
8.21k
    WORD32 i;
535
536
16.4k
    for(thrds = 0; thrds < ps_master_ctxt->i4_num_proc_thrds; thrds++)
537
8.21k
    {
538
8.21k
        me_ctxt_t *ps_me_thrd_ctxt;
539
540
8.21k
        ps_me_thrd_ctxt = ps_master_ctxt->aps_me_ctxt[thrds];
541
542
16.4k
        for(i = 0; i < MAX_NUM_ME_PARALLEL; i++)
543
8.21k
        {
544
8.21k
            hme_set_resolution((void *)ps_me_thrd_ctxt, n_enc_layers, p_wd, p_ht, i);
545
8.21k
        }
546
8.21k
    }
547
8.21k
}
548
549
void ihevce_populate_me_ctb_data(
550
    me_ctxt_t *ps_ctxt,
551
    me_frm_ctxt_t *ps_frm_ctxt,
552
    cur_ctb_cu_tree_t *ps_cu_tree,
553
    me_ctb_data_t *ps_me_ctb_data,
554
    CU_POS_T e_grandparent_blk_pos,
555
    CU_POS_T e_parent_blk_pos,
556
    CU_POS_T e_cur_blk_pos)
557
3.73M
{
558
3.73M
    inter_cu_results_t *ps_cu_results;
559
560
3.73M
    switch(ps_cu_tree->u1_cu_size)
561
3.73M
    {
562
126k
    case 64:
563
126k
    {
564
126k
        block_data_64x64_t *ps_data = &ps_me_ctb_data->s_64x64_block_data;
565
566
126k
        ps_cu_results = &ps_frm_ctxt->s_cu64x64_results;
567
126k
        ps_data->num_best_results = (ps_cu_tree->is_node_valid) ? ps_cu_results->u1_num_best_results
568
126k
                                                                : 0;
569
570
126k
        break;
571
0
    }
572
445k
    case 32:
573
445k
    {
574
445k
        block_data_32x32_t *ps_data = &ps_me_ctb_data->as_32x32_block_data[e_cur_blk_pos];
575
576
445k
        ps_cu_results = &ps_frm_ctxt->as_cu32x32_results[e_cur_blk_pos];
577
445k
        ps_data->num_best_results = (ps_cu_tree->is_node_valid) ? ps_cu_results->u1_num_best_results
578
445k
                                                                : 0;
579
580
445k
        break;
581
0
    }
582
853k
    case 16:
583
853k
    {
584
853k
        WORD32 i4_blk_id = e_cur_blk_pos + (e_parent_blk_pos << 2);
585
586
853k
        block_data_16x16_t *ps_data = &ps_me_ctb_data->as_block_data[i4_blk_id];
587
588
853k
        ps_cu_results = &ps_frm_ctxt->as_cu16x16_results[i4_blk_id];
589
853k
        ps_data->num_best_results = (ps_cu_tree->is_node_valid) ? ps_cu_results->u1_num_best_results
590
853k
                                                                : 0;
591
592
853k
        break;
593
0
    }
594
2.31M
    case 8:
595
2.31M
    {
596
2.31M
        WORD32 i4_blk_id = e_cur_blk_pos + (e_parent_blk_pos << 2) + (e_grandparent_blk_pos << 4);
597
598
2.31M
        block_data_8x8_t *ps_data = &ps_me_ctb_data->as_8x8_block_data[i4_blk_id];
599
600
2.31M
        ps_cu_results = &ps_frm_ctxt->as_cu8x8_results[i4_blk_id];
601
2.31M
        ps_data->num_best_results = (ps_cu_tree->is_node_valid) ? ps_cu_results->u1_num_best_results
602
2.31M
                                                                : 0;
603
604
2.31M
        break;
605
0
    }
606
3.73M
    }
607
608
3.73M
    if(ps_cu_tree->is_node_valid)
609
1.64M
    {
610
1.64M
        if((ps_ctxt->s_init_prms.s_me_coding_tools.e_me_quality_presets == ME_PRISTINE_QUALITY) &&
611
1.64M
           (ps_cu_tree->u1_cu_size != 8))
612
559k
        {
613
559k
            ihevce_populate_me_ctb_data(
614
559k
                ps_ctxt,
615
559k
                ps_frm_ctxt,
616
559k
                ps_cu_tree->ps_child_node_tl,
617
559k
                ps_me_ctb_data,
618
559k
                e_parent_blk_pos,
619
559k
                e_cur_blk_pos,
620
559k
                POS_TL);
621
622
559k
            ihevce_populate_me_ctb_data(
623
559k
                ps_ctxt,
624
559k
                ps_frm_ctxt,
625
559k
                ps_cu_tree->ps_child_node_tr,
626
559k
                ps_me_ctb_data,
627
559k
                e_parent_blk_pos,
628
559k
                e_cur_blk_pos,
629
559k
                POS_TR);
630
631
559k
            ihevce_populate_me_ctb_data(
632
559k
                ps_ctxt,
633
559k
                ps_frm_ctxt,
634
559k
                ps_cu_tree->ps_child_node_bl,
635
559k
                ps_me_ctb_data,
636
559k
                e_parent_blk_pos,
637
559k
                e_cur_blk_pos,
638
559k
                POS_BL);
639
640
559k
            ihevce_populate_me_ctb_data(
641
559k
                ps_ctxt,
642
559k
                ps_frm_ctxt,
643
559k
                ps_cu_tree->ps_child_node_br,
644
559k
                ps_me_ctb_data,
645
559k
                e_parent_blk_pos,
646
559k
                e_cur_blk_pos,
647
559k
                POS_BR);
648
559k
        }
649
1.64M
    }
650
2.09M
    else if(ps_cu_tree->u1_cu_size != 8)
651
343k
    {
652
343k
        ihevce_populate_me_ctb_data(
653
343k
            ps_ctxt,
654
343k
            ps_frm_ctxt,
655
343k
            ps_cu_tree->ps_child_node_tl,
656
343k
            ps_me_ctb_data,
657
343k
            e_parent_blk_pos,
658
343k
            e_cur_blk_pos,
659
343k
            POS_TL);
660
661
343k
        ihevce_populate_me_ctb_data(
662
343k
            ps_ctxt,
663
343k
            ps_frm_ctxt,
664
343k
            ps_cu_tree->ps_child_node_tr,
665
343k
            ps_me_ctb_data,
666
343k
            e_parent_blk_pos,
667
343k
            e_cur_blk_pos,
668
343k
            POS_TR);
669
670
343k
        ihevce_populate_me_ctb_data(
671
343k
            ps_ctxt,
672
343k
            ps_frm_ctxt,
673
343k
            ps_cu_tree->ps_child_node_bl,
674
343k
            ps_me_ctb_data,
675
343k
            e_parent_blk_pos,
676
343k
            e_cur_blk_pos,
677
343k
            POS_BL);
678
679
343k
        ihevce_populate_me_ctb_data(
680
343k
            ps_ctxt,
681
343k
            ps_frm_ctxt,
682
343k
            ps_cu_tree->ps_child_node_br,
683
343k
            ps_me_ctb_data,
684
343k
            e_parent_blk_pos,
685
343k
            e_cur_blk_pos,
686
343k
            POS_BR);
687
343k
    }
688
3.73M
}
689
690
void ihevce_me_update_ctb_results(
691
    void *pv_me_ctxt, void *pv_me_frm_ctxt, WORD32 i4_ctb_x, WORD32 i4_ctb_y)
692
126k
{
693
126k
    ctb_analyse_t *ps_ctb_out;
694
126k
    cur_ctb_cu_tree_t *ps_cu_tree;
695
126k
    me_ctb_data_t *ps_me_ctb_data;
696
697
126k
    me_ctxt_t *ps_ctxt = (me_ctxt_t *)pv_me_ctxt;
698
126k
    me_frm_ctxt_t *ps_frm_ctxt = (me_frm_ctxt_t *)pv_me_frm_ctxt;
699
700
126k
    ps_ctb_out = ps_frm_ctxt->ps_ctb_analyse_curr_row + i4_ctb_x;
701
702
126k
    ps_me_ctb_data = ps_frm_ctxt->ps_me_ctb_data_curr_row + i4_ctb_x;
703
126k
    ps_cu_tree = ps_frm_ctxt->ps_cu_tree_curr_row + (i4_ctb_x * MAX_NUM_NODES_CU_TREE);
704
705
126k
    ps_ctb_out->ps_cu_tree = ps_cu_tree;
706
126k
    ps_ctb_out->ps_me_ctb_data = ps_me_ctb_data;
707
708
126k
    ihevce_populate_me_ctb_data(
709
126k
        ps_ctxt, ps_frm_ctxt, ps_cu_tree, ps_me_ctb_data, POS_NA, POS_NA, POS_NA);
710
126k
}
711
712
WORD32 ihevce_me_find_poc_in_list(
713
    recon_pic_buf_t **pps_rec_list, WORD32 poc, WORD32 i4_idr_gop_num, WORD32 num_ref)
714
466k
{
715
466k
    WORD32 i;
716
717
925k
    for(i = 0; i < num_ref; i++)
718
925k
    {
719
925k
        if(pps_rec_list[i]->i4_poc == poc && pps_rec_list[i]->i4_idr_gop_num == i4_idr_gop_num)
720
466k
            return (i);
721
925k
    }
722
723
    /* should never come here */
724
0
    ASSERT(0);
725
0
    return (-1);
726
0
}
727
void ihevc_me_update_ref_desc(
728
    hme_ref_desc_t *ps_ref_desc,
729
    recon_pic_buf_t *ps_recon_pic,
730
    WORD32 ref_id_l0,
731
    WORD32 ref_id_l1,
732
    WORD32 ref_id_lc,
733
    WORD32 is_fwd)
734
466k
{
735
466k
    hme_ref_buf_info_t *ps_ref_info = &ps_ref_desc->as_ref_info[0];
736
466k
    iv_enc_yuv_buf_t *ps_yuv_desc = (iv_enc_yuv_buf_t *)&ps_recon_pic->s_yuv_buf_desc;
737
466k
    iv_enc_yuv_buf_t *ps_src_yuv_desc = (iv_enc_yuv_buf_t *)&ps_recon_pic->s_yuv_buf_desc_src;
738
466k
    S32 offset;
739
740
    /* Padding beyond 64 is not of use to ME */
741
466k
    ps_ref_info->u1_pad_x = MIN(64, PAD_HORZ);
742
466k
    ps_ref_info->u1_pad_y = MIN(64, PAD_VERT);
743
744
    /* Luma stride and offset. Assuming here that supplied ptr is */
745
    /* 0, 0 position and hence setting offset to 0. In fact, it is */
746
    /* not used inside ME as of now.                               */
747
466k
    ps_ref_info->luma_stride = ps_yuv_desc->i4_y_strd;
748
466k
    ps_ref_info->luma_offset = 0;
749
750
    /* 4 planes, fxfy is the direct recon buf, others are from subpel planes */
751
    //offset = ps_ref_info->luma_stride * PAD_VERT + PAD_HORZ;
752
466k
    offset = 0;
753
466k
    ps_ref_info->pu1_rec_fxfy = (UWORD8 *)ps_yuv_desc->pv_y_buf + offset;
754
466k
    ps_ref_info->pu1_rec_hxfy = ps_recon_pic->apu1_y_sub_pel_planes[0] + offset;
755
466k
    ps_ref_info->pu1_rec_fxhy = ps_recon_pic->apu1_y_sub_pel_planes[1] + offset;
756
466k
    ps_ref_info->pu1_rec_hxhy = ps_recon_pic->apu1_y_sub_pel_planes[2] + offset;
757
466k
    ps_ref_info->pu1_ref_src = (UWORD8 *)ps_src_yuv_desc->pv_y_buf + offset;
758
759
    /* U V ptrs though they are not used */
760
466k
    ps_ref_info->pu1_rec_u = (U08 *)ps_yuv_desc->pv_u_buf;
761
466k
    ps_ref_info->pu1_rec_v = (U08 *)ps_yuv_desc->pv_v_buf;
762
763
    /* uv offsets and strides, same treatment sa luma */
764
466k
    ps_ref_info->chroma_offset = 0;
765
466k
    ps_ref_info->chroma_stride = ps_yuv_desc->i4_uv_strd;
766
767
466k
    ps_ref_info->pv_dep_mngr = ps_recon_pic->pv_dep_mngr_recon;
768
769
    /* L0, L1 and LC id. */
770
466k
    ps_ref_desc->i1_ref_id_l0 = ref_id_l0;
771
466k
    ps_ref_desc->i1_ref_id_l1 = ref_id_l1;
772
466k
    ps_ref_desc->i1_ref_id_lc = ref_id_lc;
773
774
    /* POC of the ref pic */
775
466k
    ps_ref_desc->i4_poc = ps_recon_pic->i4_poc;
776
777
    /* Display num of the ref pic */
778
466k
    ps_ref_desc->i4_display_num = ps_recon_pic->i4_display_num;
779
780
    /* GOP number of the reference pic*/
781
466k
    ps_ref_desc->i4_GOP_num = ps_recon_pic->i4_idr_gop_num;
782
783
    /* Whether this picture is in past (fwd) or future (bck) */
784
466k
    ps_ref_desc->u1_is_fwd = is_fwd;
785
786
    /* store the weight and offsets fo refernce picture */
787
466k
    ps_ref_desc->i2_weight = ps_recon_pic->s_weight_offset.i2_luma_weight;
788
466k
    ps_ref_desc->i2_offset = ps_recon_pic->s_weight_offset.i2_luma_offset;
789
466k
}
790
791
/* Create the reference map for ME */
792
void ihevce_me_create_ref_map(
793
    recon_pic_buf_t **pps_rec_list_l0,
794
    recon_pic_buf_t **pps_rec_list_l1,
795
    WORD32 num_ref_l0_active,
796
    WORD32 num_ref_l1_active,
797
    WORD32 num_ref,
798
    hme_ref_map_t *ps_ref_map)
799
301k
{
800
301k
    WORD32 min_ref, i, poc, ref_id_l0, ref_id_l1;
801
802
    /* tracks running count of ref pics */
803
301k
    WORD32 ref_count = 0, i4_idr_gop_num;
804
805
    /* points to One instance of a ref pic structure */
806
301k
    recon_pic_buf_t *ps_recon_pic;
807
808
    /* points to one instance of ref desc str used by ME */
809
301k
    hme_ref_desc_t *ps_ref_desc;
810
811
301k
    min_ref = MIN(num_ref_l0_active, num_ref_l1_active);
812
813
350k
    for(i = 0; i < min_ref; i++)
814
49.1k
    {
815
        /* Create interleaved L0 and L1 entries */
816
49.1k
        ps_ref_desc = &ps_ref_map->as_ref_desc[ref_count];
817
49.1k
        ps_recon_pic = pps_rec_list_l0[i];
818
49.1k
        poc = ps_recon_pic->i4_poc;
819
49.1k
        i4_idr_gop_num = ps_recon_pic->i4_idr_gop_num;
820
49.1k
        ref_id_l0 = i;
821
49.1k
        ref_id_l1 = ihevce_me_find_poc_in_list(pps_rec_list_l1, poc, i4_idr_gop_num, num_ref);
822
49.1k
        ihevc_me_update_ref_desc(ps_ref_desc, ps_recon_pic, ref_id_l0, ref_id_l1, 2 * i, 1);
823
824
49.1k
        ref_count++;
825
826
49.1k
        ps_ref_desc = &ps_ref_map->as_ref_desc[ref_count];
827
49.1k
        ps_recon_pic = pps_rec_list_l1[i];
828
49.1k
        poc = ps_recon_pic->i4_poc;
829
49.1k
        i4_idr_gop_num = ps_recon_pic->i4_idr_gop_num;
830
49.1k
        ref_id_l1 = i;
831
49.1k
        ref_id_l0 = ihevce_me_find_poc_in_list(pps_rec_list_l0, poc, i4_idr_gop_num, num_ref);
832
49.1k
        ihevc_me_update_ref_desc(ps_ref_desc, ps_recon_pic, ref_id_l0, ref_id_l1, 2 * i + 1, 0);
833
834
49.1k
        ref_count++;
835
49.1k
    }
836
837
301k
    if(num_ref_l0_active > min_ref)
838
201k
    {
839
565k
        for(i = 0; i < (num_ref_l0_active - min_ref); i++)
840
364k
        {
841
364k
            ps_ref_desc = &ps_ref_map->as_ref_desc[ref_count];
842
364k
            ref_id_l0 = i + min_ref;
843
364k
            ps_recon_pic = pps_rec_list_l0[ref_id_l0];
844
364k
            poc = ps_recon_pic->i4_poc;
845
364k
            i4_idr_gop_num = ps_recon_pic->i4_idr_gop_num;
846
364k
            ref_id_l1 = ihevce_me_find_poc_in_list(pps_rec_list_l1, poc, i4_idr_gop_num, num_ref);
847
364k
            ihevc_me_update_ref_desc(
848
364k
                ps_ref_desc, ps_recon_pic, ref_id_l0, ref_id_l1, 2 * min_ref + i, 1);
849
364k
            ref_count++;
850
364k
        }
851
201k
    }
852
99.3k
    else
853
99.3k
    {
854
103k
        for(i = 0; i < (num_ref_l1_active - min_ref); i++)
855
4.20k
        {
856
4.20k
            ps_ref_desc = &ps_ref_map->as_ref_desc[ref_count];
857
4.20k
            ref_id_l1 = i + min_ref;
858
4.20k
            ps_recon_pic = pps_rec_list_l1[ref_id_l1];
859
4.20k
            poc = ps_recon_pic->i4_poc;
860
4.20k
            i4_idr_gop_num = ps_recon_pic->i4_idr_gop_num;
861
4.20k
            ref_id_l0 = ihevce_me_find_poc_in_list(pps_rec_list_l0, poc, i4_idr_gop_num, num_ref);
862
4.20k
            ihevc_me_update_ref_desc(
863
4.20k
                ps_ref_desc, ps_recon_pic, ref_id_l0, ref_id_l1, 2 * min_ref + i, 0);
864
4.20k
            ref_count++;
865
4.20k
        }
866
99.3k
    }
867
868
301k
    ps_ref_map->i4_num_ref = ref_count;
869
301k
    ASSERT(ref_count == (num_ref_l0_active + num_ref_l1_active));
870
871
    /* TODO : Fill better values in lambda depending on ref dist */
872
767k
    for(i = 0; i < ps_ref_map->i4_num_ref; i++)
873
466k
        ps_ref_map->as_ref_desc[i].lambda = 20;
874
301k
}
875
876
/*!
877
******************************************************************************
878
* \if Function name : ihevce_me_process \endif
879
*
880
* \brief
881
*    Frame level ME function
882
*
883
* \par Description:
884
*    Processing of all layers starting from coarse and going
885
*    to the refinement layers, all layers
886
*    that are encoded go CTB by CTB. Outputs of this function are populated
887
*    ctb_analyse_t structures, one per CTB.
888
*
889
* \param[in] pv_ctxt : pointer to ME module
890
* \param[in] ps_enc_lap_inp  : pointer to input yuv buffer (frame buffer)
891
* \param[in,out] ps_ctb_out : pointer to CTB analyse output structure (frame buffer)
892
* \param[out] ps_cu_out : pointer to CU analyse output structure (frame buffer)
893
* \param[in]  pd_intra_costs : pointerto intra cost buffer
894
* \param[in]  ps_multi_thrd_ctxt : pointer to multi thread ctxt
895
* \param[in]  thrd_id : Thread id of the current thrd in which function is executed
896
*
897
* \return
898
*    None
899
*
900
* \author
901
*  Ittiam
902
*
903
*****************************************************************************
904
*/
905
void ihevce_me_process(
906
    void *pv_me_ctxt,
907
    ihevce_lap_enc_buf_t *ps_enc_lap_inp,
908
    ctb_analyse_t *ps_ctb_out,
909
    me_enc_rdopt_ctxt_t *ps_cur_out_me_prms,
910
    double *pd_intra_costs,
911
    ipe_l0_ctb_analyse_for_me_t *ps_ipe_analyse_ctb,
912
    pre_enc_L0_ipe_encloop_ctxt_t *ps_l0_ipe_input,
913
    void *pv_coarse_layer,
914
    multi_thrd_ctxt_t *ps_multi_thrd_ctxt,
915
    WORD32 i4_frame_parallelism_level,
916
    WORD32 thrd_id,
917
    WORD32 i4_me_frm_id)
918
108k
{
919
108k
    me_ctxt_t *ps_thrd_ctxt;
920
108k
    me_frm_ctxt_t *ps_ctxt;
921
922
108k
    PF_EXT_UPDATE_FXN_T pf_ext_update_fxn;
923
924
108k
    me_master_ctxt_t *ps_master_ctxt = (me_master_ctxt_t *)pv_me_ctxt;
925
108k
    cur_ctb_cu_tree_t *ps_cu_tree_out = ps_cur_out_me_prms->ps_cur_ctb_cu_tree;
926
108k
    me_ctb_data_t *ps_me_ctb_data_out = ps_cur_out_me_prms->ps_cur_ctb_me_data;
927
108k
    layer_ctxt_t *ps_coarse_layer = (layer_ctxt_t *)pv_coarse_layer;
928
929
108k
    pf_ext_update_fxn = (PF_EXT_UPDATE_FXN_T)ihevce_me_update_ctb_results;
930
931
    /* get the current thread ctxt pointer */
932
108k
    ps_thrd_ctxt = ps_master_ctxt->aps_me_ctxt[thrd_id];
933
108k
    ps_ctxt = ps_thrd_ctxt->aps_me_frm_prms[i4_me_frm_id];
934
108k
    ps_ctxt->thrd_id = thrd_id;
935
936
    /* store the ctb out and cu out base pointers */
937
108k
    ps_ctxt->ps_ctb_analyse_base = ps_ctb_out;
938
939
108k
    ps_ctxt->ps_cu_tree_base = ps_cu_tree_out;
940
108k
    ps_ctxt->ps_ipe_l0_ctb_frm_base = ps_ipe_analyse_ctb;
941
108k
    ps_ctxt->ps_me_ctb_data_base = ps_me_ctb_data_out;
942
108k
    ps_ctxt->ps_func_selector = &ps_master_ctxt->s_func_selector;
943
944
    /** currently in master context. Copying that to me context **/
945
    /* frame level processing function */
946
108k
    hme_process_frm(
947
108k
        (void *)ps_thrd_ctxt,
948
108k
        ps_l0_ipe_input,
949
108k
        &ps_master_ctxt->as_ref_map[i4_me_frm_id],
950
108k
        &pd_intra_costs,
951
108k
        &ps_master_ctxt->as_frm_prms[i4_me_frm_id],
952
108k
        pf_ext_update_fxn,
953
108k
        ps_coarse_layer,
954
108k
        ps_multi_thrd_ctxt,
955
108k
        i4_frame_parallelism_level,
956
108k
        thrd_id,
957
108k
        i4_me_frm_id);
958
108k
}
959
/*!
960
******************************************************************************
961
* \if Function name : ihevce_me_frame_dpb_update \endif
962
*
963
* \brief
964
*    Frame level ME initialisation function
965
*
966
* \par Description:
967
*   Updation of ME's internal DPB
968
*    based on available ref list information
969
*
970
* \param[in] pv_ctxt : pointer to ME module
971
* \param[in] num_ref_l0 : Number of reference pics in L0 list
972
* \param[in] num_ref_l1 : Number of reference pics in L1 list
973
* \param[in] pps_rec_list_l0 : List of recon pics in L0 list
974
* \param[in] pps_rec_list_l1 : List of recon pics in L1 list
975
*
976
* \return
977
*    None
978
*
979
* \author
980
*  Ittiam
981
*
982
*****************************************************************************
983
*/
984
void ihevce_me_frame_dpb_update(
985
    void *pv_me_ctxt,
986
    WORD32 num_ref_l0,
987
    WORD32 num_ref_l1,
988
    recon_pic_buf_t **pps_rec_list_l0,
989
    recon_pic_buf_t **pps_rec_list_l1,
990
    WORD32 i4_thrd_id)
991
150k
{
992
150k
    me_master_ctxt_t *ps_master_ctxt = (me_master_ctxt_t *)pv_me_ctxt;
993
150k
    me_ctxt_t *ps_thrd0_ctxt;
994
150k
    WORD32 a_pocs_to_remove[MAX_NUM_REF + 2];
995
150k
    WORD32 i, i4_is_buffer_full;
996
150k
    WORD32 i4_least_POC = 0x7FFFFFFF;
997
150k
    WORD32 i4_least_GOP_num = 0x7FFFFFFF;
998
150k
    me_ctxt_t *ps_ctxt;
999
1000
    /* All processing done using shared / common memory across */
1001
    /* threads is done using thrd ctxt */
1002
150k
    ps_thrd0_ctxt = ps_master_ctxt->aps_me_ctxt[i4_thrd_id];
1003
1004
150k
    ps_ctxt = (me_ctxt_t *)ps_thrd0_ctxt;
1005
150k
    a_pocs_to_remove[0] = INVALID_POC;
1006
    /*************************************************************************/
1007
    /* Updation of ME's DPB list. This involves the following steps:         */
1008
    /* 1. Obtain list of active POCs maintained within ME.                   */
1009
    /* 2. Search each of them in the ref list. Whatever is not found goes to */
1010
    /*     the list to be removed. Note: a_pocs_buffered_in_me holds the     */
1011
    /*    currently active POC list within ME. a_pocs_to_remove holds the    */
1012
    /*    list of POCs to be removed, terminated by -1.                      */
1013
    /*************************************************************************/
1014
150k
    i4_is_buffer_full =
1015
150k
        hme_get_active_pocs_list((void *)ps_thrd0_ctxt, ps_master_ctxt->i4_num_me_frm_pllel);
1016
1017
150k
    if(i4_is_buffer_full)
1018
142k
    {
1019
        /* remove if any non-reference pictures are present */
1020
142k
        for(i = 0;
1021
854k
            i <
1022
854k
            (ps_ctxt->aps_me_frm_prms[0]->max_num_ref * ps_master_ctxt->i4_num_me_frm_pllel) + 1;
1023
712k
            i++)
1024
712k
        {
1025
712k
            if(ps_ctxt->as_ref_descr[i].aps_layers[0]->i4_is_reference == 0 &&
1026
712k
               ps_ctxt->as_ref_descr[i].aps_layers[0]->i4_non_ref_free == 1)
1027
14.4k
            {
1028
14.4k
                i4_least_POC = ps_ctxt->as_ref_descr[i].aps_layers[0]->i4_poc;
1029
14.4k
                i4_least_GOP_num = ps_ctxt->as_ref_descr[i].aps_layers[0]->i4_idr_gop_num;
1030
14.4k
            }
1031
712k
        }
1032
        /* if all non reference pictures are removed, then find the least poc
1033
        in the least gop number*/
1034
142k
        if(i4_least_POC == 0x7FFFFFFF)
1035
127k
        {
1036
127k
            ASSERT(i4_least_GOP_num == 0x7FFFFFFF);
1037
767k
            for(i = 0; i < (ps_ctxt->aps_me_frm_prms[0]->max_num_ref *
1038
767k
                            ps_master_ctxt->i4_num_me_frm_pllel) +
1039
767k
                               1;
1040
639k
                i++)
1041
639k
            {
1042
639k
                if(i4_least_GOP_num > ps_ctxt->as_ref_descr[i].aps_layers[0]->i4_idr_gop_num)
1043
155k
                {
1044
155k
                    i4_least_GOP_num = ps_ctxt->as_ref_descr[i].aps_layers[0]->i4_idr_gop_num;
1045
155k
                }
1046
639k
            }
1047
767k
            for(i = 0; i < (ps_ctxt->aps_me_frm_prms[0]->max_num_ref *
1048
767k
                            ps_master_ctxt->i4_num_me_frm_pllel) +
1049
767k
                               1;
1050
639k
                i++)
1051
639k
            {
1052
639k
                if(i4_least_POC > ps_ctxt->as_ref_descr[i].aps_layers[0]->i4_poc &&
1053
639k
                   ps_ctxt->as_ref_descr[i].aps_layers[0]->i4_idr_gop_num == i4_least_GOP_num)
1054
208k
                {
1055
208k
                    i4_least_POC = ps_ctxt->as_ref_descr[i].aps_layers[0]->i4_poc;
1056
208k
                }
1057
639k
            }
1058
127k
        }
1059
142k
        ASSERT(i4_least_POC != 0x7FFFFFFF);
1060
142k
        a_pocs_to_remove[0] = i4_least_POC;
1061
142k
        a_pocs_to_remove[1] = INVALID_POC;
1062
142k
    }
1063
1064
    /* Call the ME API to remove "outdated" POCs */
1065
150k
    hme_discard_frm(
1066
150k
        ps_thrd0_ctxt, a_pocs_to_remove, i4_least_GOP_num, ps_master_ctxt->i4_num_me_frm_pllel);
1067
150k
}
1068
/*!
1069
******************************************************************************
1070
* \if Function name : ihevce_me_frame_init \endif
1071
*
1072
* \brief
1073
*    Frame level ME initialisation function
1074
*
1075
* \par Description:
1076
*    The following pre-conditions exist for this function: a. We have the input
1077
*    pic ready for encode, b. We have the reference list with POC, L0/L1 IDs
1078
*    and ref ptrs ready for this picture and c. ihevce_me_set_resolution has
1079
*    been called atleast once. Once these are supplied, the following are
1080
*    done here: a. Input pyramid creation, b. Updation of ME's internal DPB
1081
*    based on available ref list information
1082
*
1083
* \param[in] pv_ctxt : pointer to ME module
1084
* \param[in] ps_frm_ctb_prms : CTB characteristics parameters
1085
* \param[in] ps_frm_lamda : Frame level Lambda params
1086
* \param[in] num_ref_l0 : Number of reference pics in L0 list
1087
* \param[in] num_ref_l1 : Number of reference pics in L1 list
1088
* \param[in] num_ref_l0_active : Active reference pics in L0 dir for current frame (shall be <= num_ref_l0)
1089
* \param[in] num_ref_l1_active : Active reference pics in L1 dir for current frame (shall be <= num_ref_l1)
1090
* \param[in] pps_rec_list_l0 : List of recon pics in L0 list
1091
* \param[in] pps_rec_list_l1 : List of recon pics in L1 list
1092
* \param[in] ps_enc_lap_inp  : pointer to input yuv buffer (frame buffer)
1093
* \param[in] i4_frm_qp       : current picture QP
1094
*
1095
* \return
1096
*    None
1097
*
1098
* \author
1099
*  Ittiam
1100
*
1101
*****************************************************************************
1102
*/
1103
void ihevce_me_frame_init(
1104
    void *pv_me_ctxt,
1105
    me_enc_rdopt_ctxt_t *ps_cur_out_me_prms,
1106
    ihevce_static_cfg_params_t *ps_stat_prms,
1107
    frm_ctb_ctxt_t *ps_frm_ctb_prms,
1108
    frm_lambda_ctxt_t *ps_frm_lamda,
1109
    WORD32 num_ref_l0,
1110
    WORD32 num_ref_l1,
1111
    WORD32 num_ref_l0_active,
1112
    WORD32 num_ref_l1_active,
1113
    recon_pic_buf_t **pps_rec_list_l0,
1114
    recon_pic_buf_t **pps_rec_list_l1,
1115
    recon_pic_buf_t *(*aps_ref_list)[HEVCE_MAX_REF_PICS * 2],
1116
    func_selector_t *ps_func_selector,
1117
    ihevce_lap_enc_buf_t *ps_enc_lap_inp,
1118
    void *pv_coarse_layer,
1119
    WORD32 i4_me_frm_id,
1120
    WORD32 i4_thrd_id,
1121
    WORD32 i4_frm_qp,
1122
    WORD32 i4_temporal_layer_id,
1123
    WORD8 i1_cu_qp_delta_enabled_flag,
1124
    void *pv_dep_mngr_encloop_dep_me)
1125
150k
{
1126
150k
    me_ctxt_t *ps_thrd_ctxt;
1127
150k
    me_ctxt_t *ps_thrd0_ctxt;
1128
150k
    me_frm_ctxt_t *ps_ctxt;
1129
150k
    hme_inp_desc_t s_inp_desc;
1130
1131
150k
    WORD32 inp_poc, num_ref;
1132
150k
    WORD32 i;
1133
1134
150k
    me_master_ctxt_t *ps_master_ctxt = (me_master_ctxt_t *)pv_me_ctxt;
1135
150k
    layer_ctxt_t *ps_coarse_layer = (layer_ctxt_t *)pv_coarse_layer;
1136
1137
    /* Input POC is derived from input buffer */
1138
150k
    inp_poc = ps_enc_lap_inp->s_lap_out.i4_poc;
1139
150k
    num_ref = num_ref_l0 + num_ref_l1;
1140
1141
    /* All processing done using shared / common memory across */
1142
    /* threads is done using thrd ctxt */
1143
150k
    ps_thrd0_ctxt = ps_master_ctxt->aps_me_ctxt[i4_thrd_id];
1144
1145
150k
    ps_ctxt = ps_thrd0_ctxt->aps_me_frm_prms[i4_me_frm_id];
1146
1147
    /* Update the paarameters "num_ref_l0_active" and "num_ref_l1_active" in hme_frm_prms */
1148
150k
    ps_master_ctxt->as_frm_prms[i4_me_frm_id].u1_num_active_ref_l0 = num_ref_l0_active;
1149
150k
    ps_master_ctxt->as_frm_prms[i4_me_frm_id].u1_num_active_ref_l1 = num_ref_l1_active;
1150
1151
    /*************************************************************************/
1152
    /* Add the current input to ME's DPB. This will also create the pyramids */
1153
    /* for the HME layers tha are not "encoded".                             */
1154
    /*************************************************************************/
1155
150k
    s_inp_desc.i4_poc = inp_poc;
1156
150k
    s_inp_desc.i4_idr_gop_num = ps_enc_lap_inp->s_lap_out.i4_idr_gop_num;
1157
150k
    s_inp_desc.i4_is_reference = ps_enc_lap_inp->s_lap_out.i4_is_ref_pic;
1158
150k
    s_inp_desc.s_layer_desc[0].pu1_y = (UWORD8 *)ps_enc_lap_inp->s_lap_out.s_input_buf.pv_y_buf;
1159
150k
    s_inp_desc.s_layer_desc[0].pu1_u = (UWORD8 *)ps_enc_lap_inp->s_lap_out.s_input_buf.pv_u_buf;
1160
150k
    s_inp_desc.s_layer_desc[0].pu1_v = (UWORD8 *)ps_enc_lap_inp->s_lap_out.s_input_buf.pv_v_buf;
1161
1162
150k
    s_inp_desc.s_layer_desc[0].luma_stride = ps_enc_lap_inp->s_lap_out.s_input_buf.i4_y_strd;
1163
150k
    s_inp_desc.s_layer_desc[0].chroma_stride = ps_enc_lap_inp->s_lap_out.s_input_buf.i4_uv_strd;
1164
1165
150k
    hme_add_inp(pv_me_ctxt, &s_inp_desc, i4_me_frm_id, i4_thrd_id);
1166
1167
    /* store the frm ctb ctxt to all the thrd ctxt */
1168
150k
    {
1169
150k
        WORD32 num_thrds;
1170
1171
        /* initialise the parameters for all the threads */
1172
301k
        for(num_thrds = 0; num_thrds < ps_master_ctxt->i4_num_proc_thrds; num_thrds++)
1173
150k
        {
1174
150k
            me_frm_ctxt_t *ps_me_tmp_frm_ctxt;
1175
1176
150k
            ps_thrd_ctxt = ps_master_ctxt->aps_me_ctxt[num_thrds];
1177
1178
150k
            ps_me_tmp_frm_ctxt = ps_thrd_ctxt->aps_me_frm_prms[i4_me_frm_id];
1179
1180
150k
            ps_thrd_ctxt->pv_ext_frm_prms = (void *)ps_frm_ctb_prms;
1181
150k
            ps_me_tmp_frm_ctxt->i4_l0me_qp_mod = ps_stat_prms->s_config_prms.i4_cu_level_rc & 1;
1182
1183
            /* intialize the inter pred (MC) context at frame level */
1184
150k
            ps_me_tmp_frm_ctxt->s_mc_ctxt.ps_ref_list = aps_ref_list;
1185
150k
            ps_me_tmp_frm_ctxt->s_mc_ctxt.i1_weighted_pred_flag =
1186
150k
                ps_enc_lap_inp->s_lap_out.i1_weighted_pred_flag;
1187
150k
            ps_me_tmp_frm_ctxt->s_mc_ctxt.i1_weighted_bipred_flag =
1188
150k
                ps_enc_lap_inp->s_lap_out.i1_weighted_bipred_flag;
1189
150k
            ps_me_tmp_frm_ctxt->s_mc_ctxt.i4_log2_luma_wght_denom =
1190
150k
                ps_enc_lap_inp->s_lap_out.i4_log2_luma_wght_denom;
1191
150k
            ps_me_tmp_frm_ctxt->s_mc_ctxt.i4_log2_chroma_wght_denom =
1192
150k
                ps_enc_lap_inp->s_lap_out.i4_log2_chroma_wght_denom;
1193
150k
            ps_me_tmp_frm_ctxt->s_mc_ctxt.i4_bit_depth = 8;
1194
150k
            ps_me_tmp_frm_ctxt->s_mc_ctxt.u1_chroma_array_type = 1;
1195
150k
            ps_me_tmp_frm_ctxt->s_mc_ctxt.ps_func_selector = ps_func_selector;
1196
            /* Initiallization for non-distributed mode */
1197
150k
            memset(
1198
150k
                ps_me_tmp_frm_ctxt->s_mc_ctxt.ai4_tile_xtra_pel,
1199
150k
                0,
1200
150k
                sizeof(ps_me_tmp_frm_ctxt->s_mc_ctxt.ai4_tile_xtra_pel));
1201
1202
150k
            ps_me_tmp_frm_ctxt->i4_pic_type = ps_enc_lap_inp->s_lap_out.i4_pic_type;
1203
1204
150k
            ps_me_tmp_frm_ctxt->i4_rc_pass = ps_stat_prms->s_pass_prms.i4_pass;
1205
150k
            ps_me_tmp_frm_ctxt->i4_temporal_layer = ps_enc_lap_inp->s_lap_out.i4_temporal_lyr_id;
1206
150k
            ps_me_tmp_frm_ctxt->i4_use_const_lamda_modifier = USE_CONSTANT_LAMBDA_MODIFIER;
1207
150k
            ps_me_tmp_frm_ctxt->i4_use_const_lamda_modifier =
1208
150k
                ps_ctxt->i4_use_const_lamda_modifier ||
1209
150k
                ((ps_stat_prms->s_coding_tools_prms.i4_vqet &
1210
150k
                  (1 << BITPOS_IN_VQ_TOGGLE_FOR_CONTROL_TOGGLER)) &&
1211
150k
                 ((ps_stat_prms->s_coding_tools_prms.i4_vqet &
1212
0
                   (1 << BITPOS_IN_VQ_TOGGLE_FOR_ENABLING_NOISE_PRESERVATION)) ||
1213
0
                  (ps_stat_prms->s_coding_tools_prms.i4_vqet &
1214
0
                   (1 << BITPOS_IN_VQ_TOGGLE_FOR_ENABLING_PSYRDOPT_1)) ||
1215
0
                  (ps_stat_prms->s_coding_tools_prms.i4_vqet &
1216
0
                   (1 << BITPOS_IN_VQ_TOGGLE_FOR_ENABLING_PSYRDOPT_2)) ||
1217
0
                  (ps_stat_prms->s_coding_tools_prms.i4_vqet &
1218
0
                   (1 << BITPOS_IN_VQ_TOGGLE_FOR_ENABLING_PSYRDOPT_3))));
1219
150k
            {
1220
150k
                ps_me_tmp_frm_ctxt->f_i_pic_lamda_modifier =
1221
150k
                    ps_enc_lap_inp->s_lap_out.f_i_pic_lamda_modifier;
1222
150k
            }
1223
            /* weighted pred enable flag */
1224
150k
            ps_me_tmp_frm_ctxt->i4_wt_pred_enable_flag =
1225
150k
                ps_enc_lap_inp->s_lap_out.i1_weighted_pred_flag |
1226
150k
                ps_enc_lap_inp->s_lap_out.i1_weighted_bipred_flag;
1227
1228
150k
            if(1 == ps_me_tmp_frm_ctxt->i4_wt_pred_enable_flag)
1229
0
            {
1230
                /* log2 weight denom  */
1231
0
                ps_me_tmp_frm_ctxt->s_wt_pred.wpred_log_wdc =
1232
0
                    ps_enc_lap_inp->s_lap_out.i4_log2_luma_wght_denom;
1233
0
            }
1234
150k
            else
1235
150k
            {
1236
                /* default value */
1237
150k
                ps_me_tmp_frm_ctxt->s_wt_pred.wpred_log_wdc = DENOM_DEFAULT;
1238
150k
            }
1239
1240
150k
            ps_me_tmp_frm_ctxt->u1_is_curFrame_a_refFrame = ps_enc_lap_inp->s_lap_out.i4_is_ref_pic;
1241
1242
150k
            ps_thrd_ctxt->pv_me_optimised_function_list =
1243
150k
                ps_master_ctxt->pv_me_optimised_function_list;
1244
150k
            ps_thrd_ctxt->ps_cmn_utils_optimised_function_list = &ps_master_ctxt->s_cmn_opt_func;
1245
150k
        }
1246
150k
    }
1247
1248
    /* Create the reference map for ME */
1249
150k
    ihevce_me_create_ref_map(
1250
150k
        pps_rec_list_l0,
1251
150k
        pps_rec_list_l1,
1252
150k
        num_ref_l0_active,
1253
150k
        num_ref_l1_active,
1254
150k
        num_ref,
1255
150k
        &ps_master_ctxt->as_ref_map[i4_me_frm_id]);
1256
1257
    /** Remember the pointers to recon list parmas for L0 and L1 lists in the context */
1258
150k
    ps_ctxt->ps_hme_ref_map->pps_rec_list_l0 = pps_rec_list_l0;
1259
150k
    ps_ctxt->ps_hme_ref_map->pps_rec_list_l1 = pps_rec_list_l1;
1260
1261
    /*************************************************************************/
1262
    /* Call the ME frame level processing for further actiion.               */
1263
    /* ToDo: Support Row Level API.                                          */
1264
    /*************************************************************************/
1265
150k
    ps_master_ctxt->as_frm_prms[i4_me_frm_id].i2_mv_range_x =
1266
150k
        ps_thrd0_ctxt->s_init_prms.max_horz_search_range;
1267
150k
    ps_master_ctxt->as_frm_prms[i4_me_frm_id].i2_mv_range_y =
1268
150k
        ps_thrd0_ctxt->s_init_prms.max_vert_search_range;
1269
150k
    ps_master_ctxt->as_frm_prms[i4_me_frm_id].is_i_pic = 0;
1270
150k
    ps_master_ctxt->as_frm_prms[i4_me_frm_id].is_pic_second_field =
1271
150k
        (!(ps_enc_lap_inp->s_input_buf.i4_bottom_field ^
1272
150k
           ps_enc_lap_inp->s_input_buf.i4_topfield_first));
1273
150k
    ps_master_ctxt->as_frm_prms[i4_me_frm_id].i4_temporal_layer_id = i4_temporal_layer_id;
1274
150k
    {
1275
150k
        S32 pic_type = ps_enc_lap_inp->s_lap_out.i4_pic_type;
1276
1277
        /*********************************************************************/
1278
        /* For I Pic, we do not call update fn at ctb level, instead we do   */
1279
        /* one shot update for entire picture.                               */
1280
        /*********************************************************************/
1281
150k
        if((pic_type == IV_I_FRAME) || (pic_type == IV_II_FRAME) || (pic_type == IV_IDR_FRAME))
1282
42.0k
        {
1283
42.0k
            ps_master_ctxt->as_frm_prms[i4_me_frm_id].is_i_pic = 1;
1284
42.0k
            ps_master_ctxt->as_frm_prms[i4_me_frm_id].bidir_enabled = 0;
1285
42.0k
        }
1286
1287
108k
        else if((pic_type == IV_P_FRAME) || (pic_type == IV_PP_FRAME))
1288
85.1k
        {
1289
85.1k
            ps_master_ctxt->as_frm_prms[i4_me_frm_id].bidir_enabled = 0;
1290
85.1k
        }
1291
23.3k
        else if((pic_type == IV_B_FRAME) || (pic_type == IV_BB_FRAME))
1292
23.3k
        {
1293
23.3k
            ps_master_ctxt->as_frm_prms[i4_me_frm_id].bidir_enabled = 1;
1294
23.3k
        }
1295
0
        else
1296
0
        {
1297
            /* not sure whether we need to handle mixed frames like IP, */
1298
            /* they should ideally come as single field. */
1299
            /* TODO : resolve thsi ambiguity */
1300
0
            ASSERT(0);
1301
0
        }
1302
150k
    }
1303
    /************************************************************************/
1304
    /* Lambda calculations moved outside ME and to one place, so as to have */
1305
    /* consistent lambda across ME, IPE, CL RDOPT etc                       */
1306
    /************************************************************************/
1307
1308
150k
    {
1309
150k
        double d_q_factor;
1310
1311
150k
        d_q_factor = pow(2.0, (i4_frm_qp / 6.)) * 5.0 / 8.0;
1312
150k
        ps_master_ctxt->as_frm_prms[i4_me_frm_id].qstep = (WORD32)(d_q_factor + .5);
1313
150k
        ps_master_ctxt->as_frm_prms[i4_me_frm_id].i4_frame_qp = i4_frm_qp;
1314
1315
        /* Qstep multiplied by 256, to work at higher precision:
1316
        5/6 is the rounding factor. Multiplied by 2 for the Had vs DCT
1317
        cost variation */
1318
150k
        ps_master_ctxt->as_frm_prms[i4_me_frm_id].qstep_ls8 =
1319
150k
            (WORD32)((((d_q_factor * 256) * 5) / 3) + .5);
1320
150k
    }
1321
1322
    /* Frame level init of all threads of ME */
1323
150k
    {
1324
150k
        WORD32 num_thrds;
1325
1326
        /* initialise the parameters for all the threads */
1327
301k
        for(num_thrds = 0; num_thrds < ps_master_ctxt->i4_num_proc_thrds; num_thrds++)
1328
150k
        {
1329
150k
            me_frm_ctxt_t *ps_tmp_frm_ctxt;
1330
1331
150k
            ps_thrd_ctxt = ps_master_ctxt->aps_me_ctxt[num_thrds];
1332
1333
150k
            ps_tmp_frm_ctxt = ps_thrd_ctxt->aps_me_frm_prms[i4_me_frm_id];
1334
1335
150k
            hme_process_frm_init(
1336
150k
                (void *)ps_thrd_ctxt,
1337
150k
                ps_tmp_frm_ctxt->ps_hme_ref_map,
1338
150k
                ps_tmp_frm_ctxt->ps_hme_frm_prms,
1339
150k
                i4_me_frm_id,
1340
150k
                ps_master_ctxt->i4_num_me_frm_pllel);
1341
1342
150k
            ps_tmp_frm_ctxt->s_frm_lambda_ctxt = *ps_frm_lamda;
1343
150k
            ps_tmp_frm_ctxt->pv_dep_mngr_encloop_dep_me = pv_dep_mngr_encloop_dep_me;
1344
150k
        }
1345
150k
    }
1346
1347
150k
    ps_master_ctxt->as_frm_prms[i4_me_frm_id].i4_cl_sad_lambda_qf =
1348
150k
        ps_frm_lamda->i4_cl_sad_lambda_qf;
1349
150k
    ps_master_ctxt->as_frm_prms[i4_me_frm_id].i4_cl_satd_lambda_qf =
1350
150k
        ps_frm_lamda->i4_cl_satd_lambda_qf;
1351
150k
    ps_master_ctxt->as_frm_prms[i4_me_frm_id].i4_ol_sad_lambda_qf =
1352
150k
        ps_frm_lamda->i4_ol_sad_lambda_qf;
1353
150k
    ps_master_ctxt->as_frm_prms[i4_me_frm_id].i4_ol_satd_lambda_qf =
1354
150k
        ps_frm_lamda->i4_ol_satd_lambda_qf;
1355
150k
    ps_master_ctxt->as_frm_prms[i4_me_frm_id].lambda_q_shift = LAMBDA_Q_SHIFT;
1356
1357
150k
    ps_master_ctxt->as_frm_prms[i4_me_frm_id].u1_is_cu_qp_delta_enabled =
1358
150k
        i1_cu_qp_delta_enabled_flag;
1359
1360
    /*************************************************************************/
1361
    /* If num ref is 0, that means that it has to be coded as I. Do nothing  */
1362
    /* However mv bank update needs to happen with "intra" mv.               */
1363
    /*************************************************************************/
1364
150k
    if(ps_master_ctxt->as_ref_map[i4_me_frm_id].i4_num_ref == 0 ||
1365
150k
       ps_master_ctxt->as_frm_prms[i4_me_frm_id].is_i_pic)
1366
42.0k
    {
1367
84.0k
        for(i = 0; i < 1; i++)
1368
42.0k
        {
1369
42.0k
            layer_ctxt_t *ps_layer_ctxt = ps_ctxt->ps_curr_descr->aps_layers[i];
1370
42.0k
            BLK_SIZE_T e_blk_size;
1371
42.0k
            S32 use_4x4;
1372
1373
            /* The mv bank is filled with "intra" mv */
1374
42.0k
            use_4x4 = hme_get_mv_blk_size(
1375
42.0k
                ps_thrd0_ctxt->s_init_prms.use_4x4, i, ps_ctxt->num_layers, ps_ctxt->u1_encode[i]);
1376
42.0k
            e_blk_size = use_4x4 ? BLK_4x4 : BLK_8x8;
1377
42.0k
            hme_init_mv_bank(ps_layer_ctxt, e_blk_size, 2, 1, ps_ctxt->u1_encode[i]);
1378
42.0k
            hme_fill_mvbank_intra(ps_layer_ctxt);
1379
1380
            /* Clear out the global mvs */
1381
42.0k
            memset(
1382
42.0k
                ps_layer_ctxt->s_global_mv,
1383
42.0k
                0,
1384
42.0k
                sizeof(hme_mv_t) * ps_ctxt->max_num_ref * NUM_GMV_LOBES);
1385
42.0k
        }
1386
1387
42.0k
        return;
1388
42.0k
    }
1389
1390
    /*************************************************************************/
1391
    /* Encode layer frame init                                               */
1392
    /*************************************************************************/
1393
108k
    {
1394
108k
        refine_prms_t s_refine_prms;
1395
108k
        layer_ctxt_t *ps_curr_layer;
1396
108k
        S16 i2_max;
1397
108k
        S32 layer_id;
1398
1399
108k
        layer_id = 0;
1400
108k
        i2_max = ps_ctxt->ps_curr_descr->aps_layers[layer_id]->i2_max_mv_x;
1401
108k
        i2_max = MAX(i2_max, ps_ctxt->ps_curr_descr->aps_layers[layer_id]->i2_max_mv_y);
1402
1403
108k
        ps_curr_layer = ps_ctxt->ps_curr_descr->aps_layers[layer_id];
1404
1405
108k
        {
1406
108k
            hme_set_refine_prms(
1407
108k
                &s_refine_prms,
1408
108k
                ps_ctxt->u1_encode[layer_id],
1409
108k
                ps_master_ctxt->as_ref_map[i4_me_frm_id].i4_num_ref,
1410
108k
                layer_id,
1411
108k
                ps_ctxt->num_layers,
1412
108k
                ps_ctxt->num_layers_explicit_search,
1413
108k
                ps_thrd0_ctxt->s_init_prms.use_4x4,
1414
108k
                &ps_master_ctxt->as_frm_prms[i4_me_frm_id],
1415
108k
                NULL,
1416
108k
                &ps_thrd0_ctxt->s_init_prms
1417
108k
                     .s_me_coding_tools); /* during frm init Intra cost Pointer is not required */
1418
1419
108k
            hme_refine_frm_init(ps_curr_layer, &s_refine_prms, ps_coarse_layer);
1420
108k
        }
1421
108k
    }
1422
108k
}
1423
1424
/*!
1425
******************************************************************************
1426
* \if Function name : ihevce_l0_me_frame_end \endif
1427
*
1428
* \brief
1429
*    End of frame update function performs
1430
*       - Dynamic Search Range collation
1431
*
1432
* \param[in] pv_ctxt : pointer to ME module
1433
*
1434
* \return
1435
*    None
1436
*
1437
* \author
1438
*  Ittiam
1439
*
1440
*****************************************************************************
1441
*/
1442
1443
void ihevce_l0_me_frame_end(
1444
    void *pv_me_ctxt, WORD32 i4_idx_dvsr_p, WORD32 i4_display_num, WORD32 me_frm_id)
1445
90.2k
{
1446
90.2k
    WORD32 i4_num_ref = 0, num_ref, num_thrds, cur_poc, frm_num;
1447
1448
90.2k
    me_master_ctxt_t *ps_master_ctxt = (me_master_ctxt_t *)pv_me_ctxt;
1449
90.2k
    me_ctxt_t *ps_thrd0_ctxt;
1450
90.2k
    me_frm_ctxt_t *ps_frm_ctxt;
1451
90.2k
    WORD32 prev_me_frm_id;
1452
1453
90.2k
    ps_thrd0_ctxt = ps_master_ctxt->aps_me_ctxt[0];
1454
90.2k
    ps_frm_ctxt = ps_thrd0_ctxt->aps_me_frm_prms[me_frm_id];
1455
1456
    /* Deriving the previous poc from previous frames context */
1457
90.2k
    if(me_frm_id == 0)
1458
90.2k
        prev_me_frm_id = (MAX_NUM_ME_PARALLEL - 1);
1459
0
    else
1460
0
        prev_me_frm_id = me_frm_id - 1;
1461
1462
    /* Getting the max num references value */
1463
180k
    for(num_thrds = 0; num_thrds < ps_master_ctxt->i4_num_proc_thrds; num_thrds++)
1464
90.2k
    {
1465
90.2k
        i4_num_ref =
1466
90.2k
            MAX(i4_num_ref,
1467
90.2k
                ps_master_ctxt->aps_me_ctxt[num_thrds]
1468
90.2k
                    ->aps_me_frm_prms[me_frm_id]
1469
90.2k
                    ->as_l0_dyn_range_prms[i4_idx_dvsr_p]
1470
90.2k
                    .i4_num_act_ref_in_l0);
1471
90.2k
    }
1472
1473
    /* No processing is required if current pic is I pic */
1474
90.2k
    if(1 == ps_master_ctxt->as_frm_prms[me_frm_id].is_i_pic)
1475
1.90k
    {
1476
1.90k
        return;
1477
1.90k
    }
1478
1479
    /* If a B/b pic, then the previous frame ctxts dyn search prms should be copied ito the latest ctxt */
1480
88.2k
    if(1 == ps_frm_ctxt->s_frm_prms.bidir_enabled)
1481
0
    {
1482
0
        return;
1483
0
    }
1484
1485
    /* Only for P pic. For P, both are 0, I&B has them mut. exclusive */
1486
88.2k
    ASSERT(ps_frm_ctxt->s_frm_prms.is_i_pic == ps_frm_ctxt->s_frm_prms.bidir_enabled);
1487
1488
    /* use thrd 0 ctxt to collate the Dynamic Search Range across all threads */
1489
253k
    for(num_ref = 0; num_ref < i4_num_ref; num_ref++)
1490
165k
    {
1491
165k
        dyn_range_prms_t *ps_dyn_range_prms_thrd0;
1492
1493
165k
        ps_dyn_range_prms_thrd0 =
1494
165k
            &ps_frm_ctxt->as_l0_dyn_range_prms[i4_idx_dvsr_p].as_dyn_range_prms[num_ref];
1495
1496
        /* run a loop over all the other threads to update the dynamical search range */
1497
165k
        for(num_thrds = 1; num_thrds < ps_master_ctxt->i4_num_proc_thrds; num_thrds++)
1498
0
        {
1499
0
            me_frm_ctxt_t *ps_me_tmp_frm_ctxt;
1500
1501
0
            dyn_range_prms_t *ps_dyn_range_prms;
1502
1503
0
            ps_me_tmp_frm_ctxt = ps_master_ctxt->aps_me_ctxt[num_thrds]->aps_me_frm_prms[me_frm_id];
1504
1505
            /* get current thrd dynamical search range param. pointer */
1506
0
            ps_dyn_range_prms =
1507
0
                &ps_me_tmp_frm_ctxt->as_l0_dyn_range_prms[i4_idx_dvsr_p].as_dyn_range_prms[num_ref];
1508
1509
            /* TODO : This calls can be optimized further. No need for min in 1st call and max in 2nd call */
1510
0
            hme_update_dynamic_search_params(
1511
0
                ps_dyn_range_prms_thrd0, ps_dyn_range_prms->i2_dyn_max_y);
1512
1513
0
            hme_update_dynamic_search_params(
1514
0
                ps_dyn_range_prms_thrd0, ps_dyn_range_prms->i2_dyn_min_y);
1515
0
        }
1516
165k
    }
1517
1518
    /*************************************************************************/
1519
    /* Get the MAX/MIN per POC distance based on the all the ref. pics       */
1520
    /*************************************************************************/
1521
88.2k
    cur_poc = ps_frm_ctxt->i4_curr_poc;
1522
88.2k
    ps_frm_ctxt->as_l0_dyn_range_prms[i4_idx_dvsr_p].i2_dyn_max_y_per_poc = 0;
1523
88.2k
    ps_frm_ctxt->as_l0_dyn_range_prms[i4_idx_dvsr_p].i2_dyn_min_y_per_poc = 0;
1524
    /*populate display num*/
1525
88.2k
    ps_frm_ctxt->as_l0_dyn_range_prms[i4_idx_dvsr_p].i4_display_num = i4_display_num;
1526
1527
253k
    for(num_ref = 0; num_ref < i4_num_ref; num_ref++)
1528
165k
    {
1529
165k
        WORD16 i2_mv_per_poc;
1530
165k
        WORD32 ref_poc, poc_diff;
1531
165k
        dyn_range_prms_t *ps_dyn_range_prms_thrd0;
1532
165k
        ps_dyn_range_prms_thrd0 =
1533
165k
            &ps_frm_ctxt->as_l0_dyn_range_prms[i4_idx_dvsr_p].as_dyn_range_prms[num_ref];
1534
1535
165k
        ref_poc = ps_dyn_range_prms_thrd0->i4_poc;
1536
        /* Should be cleaned up for ME llsm */
1537
165k
        poc_diff = (cur_poc - ref_poc);
1538
165k
        poc_diff = MAX(1, poc_diff);
1539
1540
        /* cur. ref. pic. max y per POC */
1541
165k
        i2_mv_per_poc = (ps_dyn_range_prms_thrd0->i2_dyn_max_y + (poc_diff - 1)) / poc_diff;
1542
        /* update the max y per POC */
1543
165k
        ps_frm_ctxt->as_l0_dyn_range_prms[i4_idx_dvsr_p].i2_dyn_max_y_per_poc = MAX(
1544
165k
            ps_frm_ctxt->as_l0_dyn_range_prms[i4_idx_dvsr_p].i2_dyn_max_y_per_poc, i2_mv_per_poc);
1545
1546
        /* cur. ref. pic. min y per POC */
1547
165k
        i2_mv_per_poc = (ps_dyn_range_prms_thrd0->i2_dyn_min_y - (poc_diff - 1)) / poc_diff;
1548
        /* update the min y per POC */
1549
165k
        ps_frm_ctxt->as_l0_dyn_range_prms[i4_idx_dvsr_p].i2_dyn_min_y_per_poc = MIN(
1550
165k
            ps_frm_ctxt->as_l0_dyn_range_prms[i4_idx_dvsr_p].i2_dyn_min_y_per_poc, i2_mv_per_poc);
1551
165k
    }
1552
1553
    /*************************************************************************/
1554
    /* Populate the results to all thread ctxt                               */
1555
    /*************************************************************************/
1556
88.2k
    for(num_thrds = 1; num_thrds < ps_master_ctxt->i4_num_proc_thrds; num_thrds++)
1557
0
    {
1558
0
        me_frm_ctxt_t *ps_me_tmp_frm_ctxt;
1559
1560
0
        ps_me_tmp_frm_ctxt = ps_master_ctxt->aps_me_ctxt[num_thrds]->aps_me_frm_prms[me_frm_id];
1561
1562
0
        ps_me_tmp_frm_ctxt->as_l0_dyn_range_prms[i4_idx_dvsr_p].i2_dyn_max_y_per_poc =
1563
0
            ps_frm_ctxt->as_l0_dyn_range_prms[i4_idx_dvsr_p].i2_dyn_max_y_per_poc;
1564
1565
0
        ps_me_tmp_frm_ctxt->as_l0_dyn_range_prms[i4_idx_dvsr_p].i2_dyn_min_y_per_poc =
1566
0
            ps_frm_ctxt->as_l0_dyn_range_prms[i4_idx_dvsr_p].i2_dyn_min_y_per_poc;
1567
1568
0
        ps_me_tmp_frm_ctxt->as_l0_dyn_range_prms[i4_idx_dvsr_p].i4_display_num =
1569
0
            ps_frm_ctxt->as_l0_dyn_range_prms[i4_idx_dvsr_p].i4_display_num;
1570
0
    }
1571
1572
    /* Copy the dynamic search paramteres into the other Frame cotexts in parallel */
1573
176k
    for(num_thrds = 0; num_thrds < ps_master_ctxt->i4_num_proc_thrds; num_thrds++)
1574
88.2k
    {
1575
88.2k
        l0_dyn_range_prms_t *ps_dyn_range_prms_thrd0;
1576
1577
88.2k
        ps_frm_ctxt = ps_thrd0_ctxt->aps_me_frm_prms[me_frm_id];
1578
1579
88.2k
        i4_num_ref = ps_frm_ctxt->as_l0_dyn_range_prms[i4_idx_dvsr_p].i4_num_act_ref_in_l0;
1580
1581
88.2k
        ps_dyn_range_prms_thrd0 = &ps_frm_ctxt->as_l0_dyn_range_prms[i4_idx_dvsr_p];
1582
1583
176k
        for(frm_num = 0; frm_num < MAX_NUM_ME_PARALLEL; frm_num++)
1584
88.2k
        {
1585
88.2k
            if(me_frm_id != frm_num)
1586
0
            {
1587
0
                me_frm_ctxt_t *ps_me_tmp_frm_ctxt;
1588
1589
0
                l0_dyn_range_prms_t *ps_dyn_range_prms;
1590
1591
0
                ps_me_tmp_frm_ctxt =
1592
0
                    ps_master_ctxt->aps_me_ctxt[num_thrds]->aps_me_frm_prms[frm_num];
1593
1594
                /* get current thrd dynamical search range param. pointer */
1595
0
                ps_dyn_range_prms = &ps_me_tmp_frm_ctxt->as_l0_dyn_range_prms[i4_idx_dvsr_p];
1596
1597
0
                memcpy(ps_dyn_range_prms, ps_dyn_range_prms_thrd0, sizeof(l0_dyn_range_prms_t));
1598
0
            }
1599
88.2k
        }
1600
88.2k
    }
1601
88.2k
}