Coverage Report

Created: 2026-01-17 06:25

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/libhevc/encoder/hme_refine.c
Line
Count
Source
1
/******************************************************************************
2
 *
3
 * Copyright (C) 2018 The Android Open Source Project
4
 *
5
 * Licensed under the Apache License, Version 2.0 (the "License");
6
 * you may not use this file except in compliance with the License.
7
 * You may obtain a copy of the License at:
8
 *
9
 * http://www.apache.org/licenses/LICENSE-2.0
10
 *
11
 * Unless required by applicable law or agreed to in writing, software
12
 * distributed under the License is distributed on an "AS IS" BASIS,
13
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
 * See the License for the specific language governing permissions and
15
 * limitations under the License.
16
 *
17
 *****************************************************************************
18
 * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
19
*/
20
/**
21
******************************************************************************
22
* @file hme_refine.c
23
*
24
* @brief
25
*    Contains the implementation of the refinement layer searches and related
26
*    functionality like CU merge.
27
*
28
* @author
29
*    Ittiam
30
*
31
*
32
* List of Functions
33
*
34
*
35
******************************************************************************
36
*/
37
38
/*****************************************************************************/
39
/* File Includes                                                             */
40
/*****************************************************************************/
41
/* System include files */
42
#include <stdio.h>
43
#include <string.h>
44
#include <stdlib.h>
45
#include <assert.h>
46
#include <stdarg.h>
47
#include <math.h>
48
#include <limits.h>
49
50
/* User include files */
51
#include "ihevc_typedefs.h"
52
#include "itt_video_api.h"
53
#include "ihevce_api.h"
54
55
#include "rc_cntrl_param.h"
56
#include "rc_frame_info_collector.h"
57
#include "rc_look_ahead_params.h"
58
59
#include "ihevc_defs.h"
60
#include "ihevc_structs.h"
61
#include "ihevc_platform_macros.h"
62
#include "ihevc_deblk.h"
63
#include "ihevc_itrans_recon.h"
64
#include "ihevc_chroma_itrans_recon.h"
65
#include "ihevc_chroma_intra_pred.h"
66
#include "ihevc_intra_pred.h"
67
#include "ihevc_inter_pred.h"
68
#include "ihevc_mem_fns.h"
69
#include "ihevc_padding.h"
70
#include "ihevc_weighted_pred.h"
71
#include "ihevc_sao.h"
72
#include "ihevc_resi_trans.h"
73
#include "ihevc_quant_iquant_ssd.h"
74
#include "ihevc_cabac_tables.h"
75
76
#include "ihevce_defs.h"
77
#include "ihevce_lap_enc_structs.h"
78
#include "ihevce_multi_thrd_structs.h"
79
#include "ihevce_multi_thrd_funcs.h"
80
#include "ihevce_me_common_defs.h"
81
#include "ihevce_had_satd.h"
82
#include "ihevce_error_codes.h"
83
#include "ihevce_bitstream.h"
84
#include "ihevce_cabac.h"
85
#include "ihevce_rdoq_macros.h"
86
#include "ihevce_function_selector.h"
87
#include "ihevce_enc_structs.h"
88
#include "ihevce_entropy_structs.h"
89
#include "ihevce_cmn_utils_instr_set_router.h"
90
#include "ihevce_enc_loop_structs.h"
91
#include "ihevce_bs_compute_ctb.h"
92
#include "ihevce_global_tables.h"
93
#include "ihevce_dep_mngr_interface.h"
94
#include "hme_datatype.h"
95
#include "hme_interface.h"
96
#include "hme_common_defs.h"
97
#include "hme_defs.h"
98
#include "ihevce_me_instr_set_router.h"
99
#include "hme_globals.h"
100
#include "hme_utils.h"
101
#include "hme_coarse.h"
102
#include "hme_fullpel.h"
103
#include "hme_subpel.h"
104
#include "hme_refine.h"
105
#include "hme_err_compute.h"
106
#include "hme_common_utils.h"
107
#include "hme_search_algo.h"
108
#include "ihevce_stasino_helpers.h"
109
#include "ihevce_common_utils.h"
110
111
/*****************************************************************************/
112
/* Globals                                                                   */
113
/*****************************************************************************/
114
115
/* brief: mapping buffer to convert raster scan indices into z-scan oder in a ctb */
116
UWORD8 gau1_raster_scan_to_ctb[4][4] = {
117
    { 0, 4, 16, 20 }, { 8, 12, 24, 28 }, { 32, 36, 48, 52 }, { 40, 44, 56, 60 }
118
};
119
120
/*****************************************************************************/
121
/* Extern Fucntion declaration                                               */
122
/*****************************************************************************/
123
extern ctb_boundary_attrs_t *
124
    get_ctb_attrs(S32 ctb_start_x, S32 ctb_start_y, S32 pic_wd, S32 pic_ht, me_frm_ctxt_t *ps_ctxt);
125
126
typedef void (*PF_HME_PROJECT_COLOC_CANDT_FXN)(
127
    search_node_t *ps_search_node,
128
    layer_ctxt_t *ps_curr_layer,
129
    layer_ctxt_t *ps_coarse_layer,
130
    S32 i4_pos_x,
131
    S32 i4_pos_y,
132
    S08 i1_ref_id,
133
    S32 i4_result_id);
134
135
typedef void (*PF_HME_PROJECT_COLOC_CANDT_L0_ME_FXN)(
136
    search_node_t *ps_search_node,
137
    layer_ctxt_t *ps_curr_layer,
138
    layer_ctxt_t *ps_coarse_layer,
139
    S32 i4_pos_x,
140
    S32 i4_pos_y,
141
    S32 i4_num_act_ref_l0,
142
    U08 u1_pred_dir,
143
    U08 u1_default_ref_id,
144
    S32 i4_result_id);
145
146
/*****************************************************************************/
147
/* Function Definitions                                                      */
148
/*****************************************************************************/
149
150
void ihevce_no_wt_copy(
151
    coarse_me_ctxt_t *ps_ctxt,
152
    layer_ctxt_t *ps_curr_layer,
153
    pu_t *ps_pu,
154
    UWORD8 *pu1_temp_pred,
155
    WORD32 temp_stride,
156
    WORD32 blk_x,
157
    WORD32 blk_y)
158
0
{
159
0
    UWORD8 *pu1_ref;
160
0
    WORD32 ref_stride, ref_offset;
161
0
    WORD32 row, col, i4_tmp;
162
163
0
    ASSERT((ps_pu->b2_pred_mode == PRED_L0) || (ps_pu->b2_pred_mode == PRED_L1));
164
165
0
    if(ps_pu->b2_pred_mode == PRED_L0)
166
0
    {
167
0
        WORD8 i1_ref_idx;
168
169
0
        i1_ref_idx = ps_pu->mv.i1_l0_ref_idx;
170
0
        pu1_ref = ps_curr_layer->ppu1_list_inp[i1_ref_idx];
171
172
0
        ref_stride = ps_curr_layer->i4_inp_stride;
173
174
0
        ref_offset = ((blk_y << 3) + ps_pu->mv.s_l0_mv.i2_mvy) * ref_stride;
175
0
        ref_offset += (blk_x << 3) + ps_pu->mv.s_l0_mv.i2_mvx;
176
177
0
        pu1_ref += ref_offset;
178
179
0
        for(row = 0; row < temp_stride; row++)
180
0
        {
181
0
            for(col = 0; col < temp_stride; col++)
182
0
            {
183
0
                i4_tmp = pu1_ref[col];
184
0
                pu1_temp_pred[col] = CLIP_U8(i4_tmp);
185
0
            }
186
187
0
            pu1_ref += ref_stride;
188
0
            pu1_temp_pred += temp_stride;
189
0
        }
190
0
    }
191
0
    else
192
0
    {
193
0
        WORD8 i1_ref_idx;
194
195
0
        i1_ref_idx = ps_pu->mv.i1_l1_ref_idx;
196
0
        pu1_ref = ps_curr_layer->ppu1_list_inp[i1_ref_idx];
197
198
0
        ref_stride = ps_curr_layer->i4_inp_stride;
199
200
0
        ref_offset = ((blk_y << 3) + ps_pu->mv.s_l1_mv.i2_mvy) * ref_stride;
201
0
        ref_offset += (blk_x << 3) + ps_pu->mv.s_l1_mv.i2_mvx;
202
203
0
        pu1_ref += ref_offset;
204
205
0
        for(row = 0; row < temp_stride; row++)
206
0
        {
207
0
            for(col = 0; col < temp_stride; col++)
208
0
            {
209
0
                i4_tmp = pu1_ref[col];
210
0
                pu1_temp_pred[col] = CLIP_U8(i4_tmp);
211
0
            }
212
213
0
            pu1_ref += ref_stride;
214
0
            pu1_temp_pred += temp_stride;
215
0
        }
216
0
    }
217
0
}
218
219
static WORD32 hme_add_clustered_mvs_as_merge_cands(
220
    cluster_data_t *ps_cluster_base,
221
    search_node_t *ps_merge_cand,
222
    range_prms_t **pps_range_prms,
223
    U08 *pu1_refid_to_pred_dir_list,
224
    WORD32 i4_num_clusters,
225
    U08 u1_pred_dir)
226
12.2k
{
227
12.2k
    WORD32 i, j, k;
228
12.2k
    WORD32 i4_num_cands_added = 0;
229
12.2k
    WORD32 i4_num_mvs_in_cluster;
230
231
49.3k
    for(i = 0; i < i4_num_clusters; i++)
232
37.1k
    {
233
37.1k
        cluster_data_t *ps_data = &ps_cluster_base[i];
234
235
37.1k
        if(u1_pred_dir == !pu1_refid_to_pred_dir_list[ps_data->ref_id])
236
31.3k
        {
237
31.3k
            i4_num_mvs_in_cluster = ps_data->num_mvs;
238
239
103k
            for(j = 0; j < i4_num_mvs_in_cluster; j++)
240
72.2k
            {
241
72.2k
                ps_merge_cand[i4_num_cands_added].s_mv.i2_mvx = ps_data->as_mv[j].mvx;
242
72.2k
                ps_merge_cand[i4_num_cands_added].s_mv.i2_mvy = ps_data->as_mv[j].mvy;
243
72.2k
                ps_merge_cand[i4_num_cands_added].i1_ref_idx = ps_data->ref_id;
244
245
72.2k
                CLIP_MV_WITHIN_RANGE(
246
72.2k
                    ps_merge_cand[i4_num_cands_added].s_mv.i2_mvx,
247
72.2k
                    ps_merge_cand[i4_num_cands_added].s_mv.i2_mvy,
248
72.2k
                    pps_range_prms[ps_data->ref_id],
249
72.2k
                    0,
250
72.2k
                    0,
251
72.2k
                    0);
252
253
215k
                for(k = 0; k < i4_num_cands_added; k++)
254
170k
                {
255
170k
                    if((ps_merge_cand[k].s_mv.i2_mvx == ps_data->as_mv[j].mvx) &&
256
57.2k
                       (ps_merge_cand[k].s_mv.i2_mvy == ps_data->as_mv[j].mvy) &&
257
27.2k
                       (ps_merge_cand[k].i1_ref_idx == ps_data->ref_id))
258
27.0k
                    {
259
27.0k
                        break;
260
27.0k
                    }
261
170k
                }
262
263
72.2k
                if(k == i4_num_cands_added)
264
45.2k
                {
265
45.2k
                    i4_num_cands_added++;
266
45.2k
                }
267
72.2k
            }
268
31.3k
        }
269
37.1k
    }
270
271
12.2k
    return i4_num_cands_added;
272
12.2k
}
273
274
static WORD32 hme_add_me_best_as_merge_cands(
275
    search_results_t **pps_child_data_array,
276
    inter_cu_results_t *ps_8x8cu_results,
277
    search_node_t *ps_merge_cand,
278
    range_prms_t **pps_range_prms,
279
    U08 *pu1_refid_to_pred_dir_list,
280
    S08 *pi1_past_list,
281
    S08 *pi1_future_list,
282
    BLK_SIZE_T e_blk_size,
283
    ME_QUALITY_PRESETS_T e_quality_preset,
284
    S32 i4_num_cands_added,
285
    U08 u1_pred_dir)
286
12.2k
{
287
12.2k
    WORD32 i, j, k;
288
12.2k
    WORD32 i4_max_cands_to_add;
289
290
12.2k
    WORD32 i4_result_id = 0;
291
292
12.2k
    ASSERT(!pps_child_data_array[0]->u1_split_flag || (BLK_64x64 != e_blk_size));
293
12.2k
    ASSERT(!pps_child_data_array[1]->u1_split_flag || (BLK_64x64 != e_blk_size));
294
12.2k
    ASSERT(!pps_child_data_array[2]->u1_split_flag || (BLK_64x64 != e_blk_size));
295
12.2k
    ASSERT(!pps_child_data_array[3]->u1_split_flag || (BLK_64x64 != e_blk_size));
296
297
12.2k
    switch(e_quality_preset)
298
12.2k
    {
299
12.2k
    case ME_PRISTINE_QUALITY:
300
12.2k
    {
301
12.2k
        i4_max_cands_to_add = MAX_MERGE_CANDTS;
302
303
12.2k
        break;
304
0
    }
305
0
    case ME_HIGH_QUALITY:
306
0
    {
307
        /* All 4 children are split and each grandchild contributes an MV */
308
        /* and 2 best results per grandchild */
309
0
        i4_max_cands_to_add = 4 * 4 * 2;
310
311
0
        break;
312
0
    }
313
0
    case ME_MEDIUM_SPEED:
314
0
    {
315
0
        i4_max_cands_to_add = 4 * 2 * 2;
316
317
0
        break;
318
0
    }
319
0
    case ME_HIGH_SPEED:
320
0
    case ME_XTREME_SPEED:
321
0
    case ME_XTREME_SPEED_25:
322
0
    {
323
0
        i4_max_cands_to_add = 4 * 2 * 1;
324
325
0
        break;
326
0
    }
327
12.2k
    }
328
329
61.0k
    while(i4_result_id < 4)
330
48.8k
    {
331
244k
        for(i = 0; i < 4; i++)
332
195k
        {
333
195k
            inter_cu_results_t *ps_child_data = pps_child_data_array[i]->ps_cu_results;
334
195k
            inter_cu_results_t *ps_grandchild_data = &ps_8x8cu_results[i << 2];
335
336
195k
            if(!pps_child_data_array[i]->u1_split_flag)
337
117k
            {
338
117k
                part_type_results_t *ps_data = &ps_child_data->ps_best_results[i4_result_id];
339
340
117k
                if(ps_child_data->u1_num_best_results <= i4_result_id)
341
29.5k
                {
342
29.5k
                    continue;
343
29.5k
                }
344
345
88.2k
                if(ps_data->as_pu_results->pu.b1_intra_flag)
346
1.79k
                {
347
1.79k
                    continue;
348
1.79k
                }
349
350
230k
                for(j = 0; j <= (ps_data->u1_part_type != PRT_2Nx2N); j++)
351
144k
                {
352
144k
                    mv_t *ps_mv;
353
354
144k
                    S08 i1_ref_idx;
355
356
144k
                    pu_t *ps_pu = &ps_data->as_pu_results[j].pu;
357
358
144k
                    if(u1_pred_dir !=
359
144k
                       ((ps_pu->b2_pred_mode == 2) ? u1_pred_dir : ps_pu->b2_pred_mode))
360
20.0k
                    {
361
20.0k
                        continue;
362
20.0k
                    }
363
364
124k
                    if(u1_pred_dir)
365
1.88k
                    {
366
1.88k
                        ps_mv = &ps_pu->mv.s_l1_mv;
367
1.88k
                        i1_ref_idx = pi1_future_list[ps_pu->mv.i1_l1_ref_idx];
368
1.88k
                    }
369
122k
                    else
370
122k
                    {
371
122k
                        ps_mv = &ps_pu->mv.s_l0_mv;
372
122k
                        i1_ref_idx = pi1_past_list[ps_pu->mv.i1_l0_ref_idx];
373
122k
                    }
374
375
124k
                    if(-1 == i1_ref_idx)
376
0
                    {
377
0
                        continue;
378
0
                    }
379
380
124k
                    ps_merge_cand[i4_num_cands_added].s_mv.i2_mvx = ps_mv->i2_mvx;
381
124k
                    ps_merge_cand[i4_num_cands_added].s_mv.i2_mvy = ps_mv->i2_mvy;
382
124k
                    ps_merge_cand[i4_num_cands_added].i1_ref_idx = i1_ref_idx;
383
384
124k
                    CLIP_MV_WITHIN_RANGE(
385
124k
                        ps_merge_cand[i4_num_cands_added].s_mv.i2_mvx,
386
124k
                        ps_merge_cand[i4_num_cands_added].s_mv.i2_mvy,
387
124k
                        pps_range_prms[i1_ref_idx],
388
124k
                        0,
389
124k
                        0,
390
124k
                        0);
391
392
316k
                    for(k = 0; k < i4_num_cands_added; k++)
393
305k
                    {
394
305k
                        if((ps_merge_cand[k].s_mv.i2_mvx == ps_mv->i2_mvx) &&
395
138k
                           (ps_merge_cand[k].s_mv.i2_mvy == ps_mv->i2_mvy) &&
396
113k
                           (ps_merge_cand[k].i1_ref_idx == i1_ref_idx))
397
112k
                        {
398
112k
                            break;
399
112k
                        }
400
305k
                    }
401
402
124k
                    if(k == i4_num_cands_added)
403
11.6k
                    {
404
11.6k
                        i4_num_cands_added++;
405
406
11.6k
                        if(i4_max_cands_to_add <= i4_num_cands_added)
407
0
                        {
408
0
                            return i4_num_cands_added;
409
0
                        }
410
11.6k
                    }
411
124k
                }
412
86.4k
            }
413
77.7k
            else
414
77.7k
            {
415
388k
                for(j = 0; j < 4; j++)
416
310k
                {
417
310k
                    mv_t *ps_mv;
418
419
310k
                    S08 i1_ref_idx;
420
421
310k
                    part_type_results_t *ps_data = ps_grandchild_data[j].ps_best_results;
422
310k
                    pu_t *ps_pu = &ps_data->as_pu_results[0].pu;
423
424
310k
                    ASSERT(ps_data->u1_part_type == PRT_2Nx2N);
425
426
310k
                    if(ps_grandchild_data[j].u1_num_best_results <= i4_result_id)
427
216k
                    {
428
216k
                        continue;
429
216k
                    }
430
431
94.6k
                    if(ps_data->as_pu_results->pu.b1_intra_flag)
432
39.9k
                    {
433
39.9k
                        continue;
434
39.9k
                    }
435
436
54.7k
                    if(u1_pred_dir !=
437
54.7k
                       ((ps_pu->b2_pred_mode == 2) ? u1_pred_dir : ps_pu->b2_pred_mode))
438
5.62k
                    {
439
5.62k
                        continue;
440
5.62k
                    }
441
442
49.1k
                    if(u1_pred_dir)
443
6.69k
                    {
444
6.69k
                        ps_mv = &ps_pu->mv.s_l1_mv;
445
6.69k
                        i1_ref_idx = pi1_future_list[ps_pu->mv.i1_l1_ref_idx];
446
6.69k
                    }
447
42.4k
                    else
448
42.4k
                    {
449
42.4k
                        ps_mv = &ps_pu->mv.s_l0_mv;
450
42.4k
                        i1_ref_idx = pi1_past_list[ps_pu->mv.i1_l0_ref_idx];
451
42.4k
                    }
452
453
49.1k
                    ps_merge_cand[i4_num_cands_added].s_mv.i2_mvx = ps_mv->i2_mvx;
454
49.1k
                    ps_merge_cand[i4_num_cands_added].s_mv.i2_mvy = ps_mv->i2_mvy;
455
49.1k
                    ps_merge_cand[i4_num_cands_added].i1_ref_idx = i1_ref_idx;
456
457
49.1k
                    CLIP_MV_WITHIN_RANGE(
458
49.1k
                        ps_merge_cand[i4_num_cands_added].s_mv.i2_mvx,
459
49.1k
                        ps_merge_cand[i4_num_cands_added].s_mv.i2_mvy,
460
49.1k
                        pps_range_prms[i1_ref_idx],
461
49.1k
                        0,
462
49.1k
                        0,
463
49.1k
                        0);
464
465
185k
                    for(k = 0; k < i4_num_cands_added; k++)
466
184k
                    {
467
184k
                        if((ps_merge_cand[k].s_mv.i2_mvx == ps_mv->i2_mvx) &&
468
76.0k
                           (ps_merge_cand[k].s_mv.i2_mvy == ps_mv->i2_mvy) &&
469
47.7k
                           (ps_merge_cand[k].i1_ref_idx == i1_ref_idx))
470
47.6k
                        {
471
47.6k
                            break;
472
47.6k
                        }
473
184k
                    }
474
475
49.1k
                    if(k == i4_num_cands_added)
476
1.42k
                    {
477
1.42k
                        i4_num_cands_added++;
478
479
1.42k
                        if(i4_max_cands_to_add <= i4_num_cands_added)
480
0
                        {
481
0
                            return i4_num_cands_added;
482
0
                        }
483
1.42k
                    }
484
49.1k
                }
485
77.7k
            }
486
195k
        }
487
488
48.8k
        i4_result_id++;
489
48.8k
    }
490
491
12.2k
    return i4_num_cands_added;
492
12.2k
}
493
494
WORD32 hme_add_cands_for_merge_eval(
495
    ctb_cluster_info_t *ps_cluster_info,
496
    search_results_t **pps_child_data_array,
497
    inter_cu_results_t *ps_8x8cu_results,
498
    range_prms_t **pps_range_prms,
499
    search_node_t *ps_merge_cand,
500
    U08 *pu1_refid_to_pred_dir_list,
501
    S08 *pi1_past_list,
502
    S08 *pi1_future_list,
503
    ME_QUALITY_PRESETS_T e_quality_preset,
504
    BLK_SIZE_T e_blk_size,
505
    U08 u1_pred_dir,
506
    U08 u1_blk_id)
507
12.2k
{
508
12.2k
    WORD32 i4_num_cands_added = 0;
509
510
12.2k
    if(ME_PRISTINE_QUALITY == e_quality_preset)
511
12.2k
    {
512
12.2k
        cluster_data_t *ps_cluster_primo;
513
514
12.2k
        WORD32 i4_num_clusters;
515
516
12.2k
        if(BLK_32x32 == e_blk_size)
517
11.7k
        {
518
11.7k
            ps_cluster_primo = ps_cluster_info->ps_32x32_blk[u1_blk_id].as_cluster_data;
519
11.7k
            i4_num_clusters = ps_cluster_info->ps_32x32_blk[u1_blk_id].num_clusters;
520
11.7k
        }
521
440
        else
522
440
        {
523
440
            ps_cluster_primo = ps_cluster_info->ps_64x64_blk->as_cluster_data;
524
440
            i4_num_clusters = ps_cluster_info->ps_64x64_blk->num_clusters;
525
440
        }
526
527
12.2k
        i4_num_cands_added = hme_add_clustered_mvs_as_merge_cands(
528
12.2k
            ps_cluster_primo,
529
12.2k
            ps_merge_cand,
530
12.2k
            pps_range_prms,
531
12.2k
            pu1_refid_to_pred_dir_list,
532
12.2k
            i4_num_clusters,
533
12.2k
            u1_pred_dir);
534
12.2k
    }
535
536
12.2k
    i4_num_cands_added = hme_add_me_best_as_merge_cands(
537
12.2k
        pps_child_data_array,
538
12.2k
        ps_8x8cu_results,
539
12.2k
        ps_merge_cand,
540
12.2k
        pps_range_prms,
541
12.2k
        pu1_refid_to_pred_dir_list,
542
12.2k
        pi1_past_list,
543
12.2k
        pi1_future_list,
544
12.2k
        e_blk_size,
545
12.2k
        e_quality_preset,
546
12.2k
        i4_num_cands_added,
547
12.2k
        u1_pred_dir);
548
549
12.2k
    return i4_num_cands_added;
550
12.2k
}
551
552
/**
553
********************************************************************************
554
*  @fn   void hme_pick_refine_merge_candts(hme_merge_prms_t *ps_merge_prms,
555
*                                           S08 i1_ref_idx,
556
*                                           S32 i4_best_part_type,
557
*                                           S32 i4_is_vert)
558
*
559
*  @brief  Given a target partition orientation in the merged CU, and the
560
*          partition type of most likely partition this fxn picks up
561
*          candidates from the 4 constituent CUs and does refinement search
562
*          to identify best results for the merge CU across active partitions
563
*
564
*  @param[in,out] ps_merge_prms : Parameters sent from higher layers. Out of
565
*                  these params, the search result structure is also derived and
566
*                 updated during the search
567
*
568
*  @param[in] i1_ref_idx : ID of the buffer within the search results to update.
569
*               Will be 0 if all refidx collapsed to one buf, else it'll be 0/1
570
*
571
*  @param[in] i4_best_part_type : partition type of potential partition in the
572
*              merged CU, -1 if the merge process has not yet been able to
573
*              determine this.
574
*
575
*  @param[in] i4_is_vert : Whether target partition of merged CU is vertical
576
*             orientation or horizontal orientation.
577
*
578
*  @return Number of merge candidates
579
********************************************************************************
580
*/
581
WORD32 hme_pick_eval_merge_candts(
582
    hme_merge_prms_t *ps_merge_prms,
583
    hme_subpel_prms_t *ps_subpel_prms,
584
    S32 i4_search_idx,
585
    S32 i4_best_part_type,
586
    S32 i4_is_vert,
587
    wgt_pred_ctxt_t *ps_wt_inp_prms,
588
    S32 i4_frm_qstep,
589
    ihevce_cmn_opt_func_t *ps_cmn_utils_optimised_function_list,
590
    ihevce_me_optimised_function_list_t *ps_me_optimised_function_list)
591
60.6k
{
592
60.6k
    S32 x_off, y_off;
593
60.6k
    search_node_t *ps_search_node;
594
60.6k
    S32 ai4_valid_part_ids[TOT_NUM_PARTS + 1];
595
60.6k
    S32 i4_num_valid_parts;
596
60.6k
    pred_ctxt_t *ps_pred_ctxt;
597
598
60.6k
    search_node_t as_merge_unique_node[MAX_MERGE_CANDTS];
599
60.6k
    S32 num_unique_nodes_cu_merge = 0;
600
601
60.6k
    search_results_t *ps_search_results = ps_merge_prms->ps_results_merge;
602
60.6k
    CU_SIZE_T e_cu_size = ps_search_results->e_cu_size;
603
60.6k
    S32 i4_part_mask = ps_search_results->i4_part_mask;
604
605
60.6k
    search_results_t *aps_child_results[4];
606
60.6k
    layer_ctxt_t *ps_curr_layer = ps_merge_prms->ps_layer_ctxt;
607
608
60.6k
    S32 i4_ref_stride, i, j;
609
60.6k
    result_upd_prms_t s_result_prms;
610
611
60.6k
    BLK_SIZE_T e_blk_size = ge_cu_size_to_blk_size[e_cu_size];
612
60.6k
    S32 i4_offset;
613
614
    /*************************************************************************/
615
    /* Function pointer for SAD/SATD, array and prms structure to pass to    */
616
    /* This function                                                         */
617
    /*************************************************************************/
618
60.6k
    PF_SAD_FXN_T pf_err_compute;
619
60.6k
    S32 ai4_sad_grid[9][17];
620
60.6k
    err_prms_t s_err_prms;
621
622
    /*************************************************************************/
623
    /* Allowed MV RANGE                                                      */
624
    /*************************************************************************/
625
60.6k
    range_prms_t **pps_range_prms = ps_merge_prms->aps_mv_range;
626
60.6k
    PF_INTERP_FXN_T pf_qpel_interp;
627
60.6k
    PF_MV_COST_FXN pf_mv_cost_compute;
628
60.6k
    WORD32 pred_lx;
629
60.6k
    U08 *apu1_hpel_ref[4];
630
631
60.6k
    interp_prms_t s_interp_prms;
632
60.6k
    S32 i4_interp_buf_id;
633
634
60.6k
    S32 i4_ctb_x_off = ps_merge_prms->i4_ctb_x_off;
635
60.6k
    S32 i4_ctb_y_off = ps_merge_prms->i4_ctb_y_off;
636
637
    /* Sanity checks */
638
60.6k
    ASSERT((e_blk_size == BLK_64x64) || (e_blk_size == BLK_32x32));
639
640
60.6k
    s_err_prms.ps_cmn_utils_optimised_function_list = ps_cmn_utils_optimised_function_list;
641
642
    /* Initialize all the ptrs to child CUs for merge decision */
643
60.6k
    aps_child_results[0] = ps_merge_prms->ps_results_tl;
644
60.6k
    aps_child_results[1] = ps_merge_prms->ps_results_tr;
645
60.6k
    aps_child_results[2] = ps_merge_prms->ps_results_bl;
646
60.6k
    aps_child_results[3] = ps_merge_prms->ps_results_br;
647
648
60.6k
    num_unique_nodes_cu_merge = 0;
649
650
60.6k
    pf_mv_cost_compute = compute_mv_cost_implicit_high_speed;
651
652
60.6k
    if(ME_PRISTINE_QUALITY == ps_merge_prms->e_quality_preset)
653
12.2k
    {
654
12.2k
        num_unique_nodes_cu_merge = hme_add_cands_for_merge_eval(
655
12.2k
            ps_merge_prms->ps_cluster_info,
656
12.2k
            aps_child_results,
657
12.2k
            ps_merge_prms->ps_8x8_cu_results,
658
12.2k
            pps_range_prms,
659
12.2k
            as_merge_unique_node,
660
12.2k
            ps_search_results->pu1_is_past,
661
12.2k
            ps_merge_prms->pi1_past_list,
662
12.2k
            ps_merge_prms->pi1_future_list,
663
12.2k
            ps_merge_prms->e_quality_preset,
664
12.2k
            e_blk_size,
665
12.2k
            i4_search_idx,
666
12.2k
            (ps_merge_prms->ps_results_merge->u1_x_off >> 5) +
667
12.2k
                (ps_merge_prms->ps_results_merge->u1_y_off >> 4));
668
12.2k
    }
669
48.4k
    else
670
48.4k
    {
671
        /*************************************************************************/
672
        /* Populate the list of unique search nodes in the child CUs for merge   */
673
        /* evaluation                                                            */
674
        /*************************************************************************/
675
242k
        for(i = 0; i < 4; i++)
676
193k
        {
677
193k
            search_node_t s_search_node;
678
679
193k
            PART_TYPE_T e_part_type;
680
193k
            PART_ID_T e_part_id;
681
682
193k
            WORD32 part_num;
683
684
193k
            search_results_t *ps_child = aps_child_results[i];
685
686
193k
            if(ps_child->ps_cu_results->u1_num_best_results)
687
193k
            {
688
193k
                if(!((ps_child->ps_cu_results->ps_best_results->as_pu_results->pu.b1_intra_flag) &&
689
656
                     (1 == ps_child->ps_cu_results->u1_num_best_results)))
690
193k
                {
691
193k
                    e_part_type =
692
193k
                        (PART_TYPE_T)ps_child->ps_cu_results->ps_best_results[0].u1_part_type;
693
694
193k
                    ASSERT(num_unique_nodes_cu_merge < MAX_MERGE_CANDTS);
695
696
                    /* Insert mvs of NxN partitions. */
697
391k
                    for(part_num = 0; part_num < gau1_num_parts_in_part_type[((S32)e_part_type)];
698
198k
                        part_num++)
699
198k
                    {
700
198k
                        e_part_id = ge_part_type_to_part_id[e_part_type][part_num];
701
702
198k
                        if(ps_child->aps_part_results[i4_search_idx][e_part_id]->i1_ref_idx != -1)
703
198k
                        {
704
198k
                            s_search_node = *ps_child->aps_part_results[i4_search_idx][e_part_id];
705
198k
                            if(s_search_node.s_mv.i2_mvx != INTRA_MV)
706
198k
                            {
707
198k
                                CLIP_MV_WITHIN_RANGE(
708
198k
                                    s_search_node.s_mv.i2_mvx,
709
198k
                                    s_search_node.s_mv.i2_mvy,
710
198k
                                    pps_range_prms[s_search_node.i1_ref_idx],
711
198k
                                    0,
712
198k
                                    0,
713
198k
                                    0);
714
715
198k
                                INSERT_NEW_NODE_NOMAP(
716
198k
                                    as_merge_unique_node,
717
198k
                                    num_unique_nodes_cu_merge,
718
198k
                                    s_search_node,
719
198k
                                    1);
720
198k
                            }
721
198k
                        }
722
198k
                    }
723
193k
                }
724
193k
            }
725
0
            else if(!((ps_merge_prms->ps_results_grandchild[(i << 2)]
726
0
                           .ps_cu_results->ps_best_results->as_pu_results->pu.b1_intra_flag) &&
727
0
                      (1 == ps_merge_prms->ps_results_grandchild[(i << 2)]
728
0
                                .ps_cu_results->u1_num_best_results)))
729
0
            {
730
0
                search_results_t *ps_results_root = &ps_merge_prms->ps_results_grandchild[(i << 2)];
731
732
0
                for(j = 0; j < 4; j++)
733
0
                {
734
0
                    e_part_type = (PART_TYPE_T)ps_results_root[j]
735
0
                                      .ps_cu_results->ps_best_results[0]
736
0
                                      .u1_part_type;
737
738
0
                    ASSERT(num_unique_nodes_cu_merge < MAX_MERGE_CANDTS);
739
740
                    /* Insert mvs of NxN partitions. */
741
0
                    for(part_num = 0; part_num < gau1_num_parts_in_part_type[((S32)e_part_type)];
742
0
                        part_num++)
743
0
                    {
744
0
                        e_part_id = ge_part_type_to_part_id[e_part_type][part_num];
745
746
0
                        if((ps_results_root[j]
747
0
                                .aps_part_results[i4_search_idx][e_part_id]
748
0
                                ->i1_ref_idx != -1) &&
749
0
                           (!ps_child->ps_cu_results->ps_best_results->as_pu_results->pu
750
0
                                 .b1_intra_flag))
751
0
                        {
752
0
                            s_search_node =
753
0
                                *ps_results_root[j].aps_part_results[i4_search_idx][e_part_id];
754
0
                            if(s_search_node.s_mv.i2_mvx != INTRA_MV)
755
0
                            {
756
0
                                CLIP_MV_WITHIN_RANGE(
757
0
                                    s_search_node.s_mv.i2_mvx,
758
0
                                    s_search_node.s_mv.i2_mvy,
759
0
                                    pps_range_prms[s_search_node.i1_ref_idx],
760
0
                                    0,
761
0
                                    0,
762
0
                                    0);
763
764
0
                                INSERT_NEW_NODE_NOMAP(
765
0
                                    as_merge_unique_node,
766
0
                                    num_unique_nodes_cu_merge,
767
0
                                    s_search_node,
768
0
                                    1);
769
0
                            }
770
0
                        }
771
0
                    }
772
0
                }
773
0
            }
774
193k
        }
775
48.4k
    }
776
777
60.6k
    if(0 == num_unique_nodes_cu_merge)
778
727
    {
779
727
        return 0;
780
727
    }
781
782
    /*************************************************************************/
783
    /* Appropriate Err compute fxn, depends on SAD/SATD, blk size and remains*/
784
    /* fixed through this subpel refinement for this partition.              */
785
    /* Note, we do not enable grid sads since one pt is evaluated per node   */
786
    /* Hence, part mask is also nearly dont care and we use 2Nx2N enabled.   */
787
    /*************************************************************************/
788
59.8k
    i4_part_mask = ps_search_results->i4_part_mask;
789
790
    /* Need to add the corresponding SAD functions for EXTREME SPEED : Lokesh */
791
59.8k
    if(ps_subpel_prms->i4_use_satd)
792
30.4k
    {
793
30.4k
        if(BLK_32x32 == e_blk_size)
794
28.9k
        {
795
28.9k
            pf_err_compute = hme_evalsatd_pt_pu_32x32;
796
28.9k
        }
797
1.53k
        else
798
1.53k
        {
799
1.53k
            pf_err_compute = hme_evalsatd_pt_pu_64x64;
800
1.53k
        }
801
30.4k
    }
802
29.4k
    else
803
29.4k
    {
804
29.4k
        pf_err_compute = (PF_SAD_FXN_T)hme_evalsad_grid_pu_MxM;
805
29.4k
    }
806
807
59.8k
    i4_ref_stride = ps_curr_layer->i4_rec_stride;
808
809
59.8k
    x_off = ps_merge_prms->ps_results_tl->u1_x_off;
810
59.8k
    y_off = ps_merge_prms->ps_results_tl->u1_y_off;
811
59.8k
    i4_offset = x_off + i4_ctb_x_off + ((y_off + i4_ctb_y_off) * i4_ref_stride);
812
813
    /*************************************************************************/
814
    /* This array stores the ids of the partitions whose                     */
815
    /* SADs are updated. Since the partitions whose SADs are updated may not */
816
    /* be in contiguous order, we supply another level of indirection.       */
817
    /*************************************************************************/
818
59.8k
    i4_num_valid_parts = hme_create_valid_part_ids(i4_part_mask, ai4_valid_part_ids);
819
820
    /* Initialize result params used for partition update */
821
59.8k
    s_result_prms.pf_mv_cost_compute = NULL;
822
59.8k
    s_result_prms.ps_search_results = ps_search_results;
823
59.8k
    s_result_prms.pi4_valid_part_ids = ai4_valid_part_ids;
824
59.8k
    s_result_prms.i1_ref_idx = i4_search_idx;
825
59.8k
    s_result_prms.i4_part_mask = i4_part_mask;
826
59.8k
    s_result_prms.pi4_sad_grid = &ai4_sad_grid[0][0];
827
59.8k
    s_result_prms.i4_grid_mask = 1;
828
829
    /* One time Initialization of error params used for SAD/SATD compute */
830
59.8k
    s_err_prms.i4_inp_stride = ps_subpel_prms->i4_inp_stride;
831
59.8k
    s_err_prms.i4_ref_stride = i4_ref_stride;
832
59.8k
    s_err_prms.i4_part_mask = (ENABLE_2Nx2N);
833
59.8k
    s_err_prms.i4_grid_mask = 1;
834
59.8k
    s_err_prms.pi4_sad_grid = &ai4_sad_grid[0][0];
835
59.8k
    s_err_prms.i4_blk_wd = gau1_blk_size_to_wd[e_blk_size];
836
59.8k
    s_err_prms.i4_blk_ht = gau1_blk_size_to_ht[e_blk_size];
837
59.8k
    s_err_prms.i4_step = 1;
838
839
    /*************************************************************************/
840
    /* One time preparation of non changing interpolation params.            */
841
    /*************************************************************************/
842
59.8k
    s_interp_prms.i4_ref_stride = i4_ref_stride;
843
59.8k
    s_interp_prms.i4_blk_wd = gau1_blk_size_to_wd[e_blk_size];
844
59.8k
    s_interp_prms.i4_blk_ht = gau1_blk_size_to_ht[e_blk_size];
845
59.8k
    s_interp_prms.apu1_interp_out[0] = ps_subpel_prms->pu1_wkg_mem;
846
59.8k
    s_interp_prms.i4_out_stride = gau1_blk_size_to_wd[e_blk_size];
847
59.8k
    i4_interp_buf_id = 0;
848
849
59.8k
    pf_qpel_interp = ps_subpel_prms->pf_qpel_interp;
850
851
    /***************************************************************************/
852
    /* Compute SATD/SAD for all unique nodes of children CUs to get best merge */
853
    /* results                                                                 */
854
    /***************************************************************************/
855
209k
    for(i = 0; i < num_unique_nodes_cu_merge; i++)
856
149k
    {
857
149k
        WORD8 i1_ref_idx;
858
149k
        ps_search_node = &as_merge_unique_node[i];
859
860
        /*********************************************************************/
861
        /* Compute the base pointer for input, interpolated buffers          */
862
        /* The base pointers point as follows:                               */
863
        /* fx fy : 0, 0 :: fx, hy : 0, 0.5, hx, fy: 0.5, 0, hx, fy: 0.5, 0.5 */
864
        /* To these, we need to add the offset of the current node           */
865
        /*********************************************************************/
866
149k
        i1_ref_idx = ps_search_node->i1_ref_idx;
867
149k
        apu1_hpel_ref[0] = ps_curr_layer->ppu1_list_rec_fxfy[i1_ref_idx] + i4_offset;
868
149k
        apu1_hpel_ref[1] = ps_curr_layer->ppu1_list_rec_hxfy[i1_ref_idx] + i4_offset;
869
149k
        apu1_hpel_ref[2] = ps_curr_layer->ppu1_list_rec_fxhy[i1_ref_idx] + i4_offset;
870
149k
        apu1_hpel_ref[3] = ps_curr_layer->ppu1_list_rec_hxhy[i1_ref_idx] + i4_offset;
871
872
149k
        s_interp_prms.ppu1_ref = &apu1_hpel_ref[0];
873
874
149k
        pf_qpel_interp(
875
149k
            &s_interp_prms,
876
149k
            ps_search_node->s_mv.i2_mvx,
877
149k
            ps_search_node->s_mv.i2_mvy,
878
149k
            i4_interp_buf_id);
879
880
149k
        pred_lx = i4_search_idx;
881
149k
        ps_pred_ctxt = &ps_search_results->as_pred_ctxt[pred_lx];
882
883
149k
        s_result_prms.u1_pred_lx = pred_lx;
884
149k
        s_result_prms.ps_search_node_base = ps_search_node;
885
149k
        s_err_prms.pu1_inp =
886
149k
            ps_wt_inp_prms->apu1_wt_inp[i1_ref_idx] + x_off + y_off * ps_subpel_prms->i4_inp_stride;
887
149k
        s_err_prms.pu1_ref = s_interp_prms.pu1_final_out;
888
149k
        s_err_prms.i4_ref_stride = s_interp_prms.i4_final_out_stride;
889
890
        /* Carry out the SAD/SATD. This call also does the TU RECURSION.
891
        Here the tu recursion logic is restricted with the size of the PU*/
892
149k
        pf_err_compute(&s_err_prms);
893
894
149k
        if(ps_subpel_prms->u1_is_cu_noisy &&
895
0
           ps_merge_prms->ps_inter_ctb_prms->i4_alpha_stim_multiplier)
896
0
        {
897
0
            ps_me_optimised_function_list->pf_compute_stim_injected_distortion_for_all_parts(
898
0
                s_err_prms.pu1_ref,
899
0
                s_err_prms.i4_ref_stride,
900
0
                ai4_valid_part_ids,
901
0
                ps_merge_prms->ps_inter_ctb_prms->pu8_part_src_sigmaX,
902
0
                ps_merge_prms->ps_inter_ctb_prms->pu8_part_src_sigmaXSquared,
903
0
                s_err_prms.pi4_sad_grid,
904
0
                ps_merge_prms->ps_inter_ctb_prms->i4_alpha_stim_multiplier,
905
0
                ps_wt_inp_prms->a_inv_wpred_wt[i1_ref_idx],
906
0
                ps_wt_inp_prms->ai4_shift_val[i1_ref_idx],
907
0
                i4_num_valid_parts,
908
0
                ps_wt_inp_prms->wpred_log_wdc,
909
0
                (BLK_32x32 == e_blk_size) ? 32 : 64);
910
0
        }
911
912
        /* Update the mv's */
913
149k
        s_result_prms.i2_mv_x = ps_search_node->s_mv.i2_mvx;
914
149k
        s_result_prms.i2_mv_y = ps_search_node->s_mv.i2_mvy;
915
916
        /* Update best results */
917
149k
        hme_update_results_pt_pu_best1_subpel_hs(&s_err_prms, &s_result_prms);
918
149k
    }
919
920
    /************************************************************************/
921
    /* Update mv cost and total cost for each valid partition in the CU     */
922
    /************************************************************************/
923
1.07M
    for(i = 0; i < TOT_NUM_PARTS; i++)
924
1.01M
    {
925
1.01M
        if(i4_part_mask & (1 << i))
926
512k
        {
927
512k
            WORD32 j;
928
512k
            WORD32 i4_mv_cost;
929
930
512k
            ps_search_node = ps_search_results->aps_part_results[i4_search_idx][i];
931
932
512k
            for(j = 0;
933
1.02M
                j < MIN(ps_search_results->u1_num_results_per_part, num_unique_nodes_cu_merge);
934
512k
                j++)
935
512k
            {
936
512k
                if(ps_search_node->i1_ref_idx != -1)
937
512k
                {
938
512k
                    pred_lx = i4_search_idx;
939
512k
                    ps_pred_ctxt = &ps_search_results->as_pred_ctxt[pred_lx];
940
941
                    /* Prediction context should now deal with qpel units */
942
512k
                    HME_SET_MVPRED_RES(ps_pred_ctxt, MV_RES_QPEL);
943
944
512k
                    ps_search_node->u1_subpel_done = 1;
945
512k
                    ps_search_node->u1_is_avail = 1;
946
947
512k
                    i4_mv_cost =
948
512k
                        pf_mv_cost_compute(ps_search_node, ps_pred_ctxt, (PART_ID_T)i, MV_RES_QPEL);
949
950
512k
                    ps_search_node->i4_tot_cost = i4_mv_cost + ps_search_node->i4_sad;
951
512k
                    ps_search_node->i4_mv_cost = i4_mv_cost;
952
953
512k
                    ps_search_node++;
954
512k
                }
955
512k
            }
956
512k
        }
957
1.01M
    }
958
959
59.8k
    return num_unique_nodes_cu_merge;
960
60.6k
}
961
962
53.0k
#define CU_MERGE_MAX_INTRA_PARTS 4
963
964
/**
965
********************************************************************************
966
*  @fn     hme_try_merge_high_speed
967
*
968
*  @brief  Attempts to merge 4 NxN candts to a 2Nx2N candt, either as a single
969
entity or with partititons for high speed preset
970
*
971
*  @param[in,out]  hme_merge_prms_t: Params for CU merge
972
*
973
*  @return MERGE_RESULT_T type result of merge (CU_MERGED/CU_SPLIT)
974
********************************************************************************
975
*/
976
CU_MERGE_RESULT_T hme_try_merge_high_speed(
977
    me_ctxt_t *ps_thrd_ctxt,
978
    me_frm_ctxt_t *ps_ctxt,
979
    ipe_l0_ctb_analyse_for_me_t *ps_cur_ipe_ctb,
980
    hme_subpel_prms_t *ps_subpel_prms,
981
    hme_merge_prms_t *ps_merge_prms,
982
    inter_pu_results_t *ps_pu_results,
983
    pu_result_t *ps_pu_result)
984
66.2k
{
985
66.2k
    search_results_t *ps_results_tl, *ps_results_tr;
986
66.2k
    search_results_t *ps_results_bl, *ps_results_br;
987
988
66.2k
    S32 i;
989
66.2k
    S32 i4_search_idx;
990
66.2k
    S32 i4_cost_parent;
991
66.2k
    S32 intra_cu_size;
992
66.2k
    ULWORD64 au8_final_src_sigmaX[17], au8_final_src_sigmaXSquared[17];
993
994
66.2k
    search_results_t *ps_results_merge = ps_merge_prms->ps_results_merge;
995
66.2k
    wgt_pred_ctxt_t *ps_wt_inp_prms = &ps_ctxt->s_wt_pred;
996
997
66.2k
    S32 i4_part_mask = ENABLE_ALL_PARTS - ENABLE_NxN;
998
66.2k
    S32 is_vert = 0, i4_best_part_type = -1;
999
66.2k
    S32 i4_intra_parts = 0; /* Keeps track of intra percentage before merge */
1000
66.2k
    S32 i4_cost_children = 0;
1001
66.2k
    S32 i4_frm_qstep = ps_ctxt->frm_qstep;
1002
66.2k
    S32 i4_num_merge_cands_evaluated = 0;
1003
66.2k
    U08 u1_x_off = ps_results_merge->u1_x_off;
1004
66.2k
    U08 u1_y_off = ps_results_merge->u1_y_off;
1005
66.2k
    S32 i4_32x32_id = (u1_y_off >> 4) + (u1_x_off >> 5);
1006
1007
66.2k
    ihevce_cmn_opt_func_t *ps_cmn_utils_optimised_function_list =
1008
66.2k
        ps_thrd_ctxt->ps_cmn_utils_optimised_function_list;
1009
66.2k
    ihevce_me_optimised_function_list_t *ps_me_optimised_function_list =
1010
66.2k
        ((ihevce_me_optimised_function_list_t *)ps_thrd_ctxt->pv_me_optimised_function_list);
1011
66.2k
    ps_results_tl = ps_merge_prms->ps_results_tl;
1012
66.2k
    ps_results_tr = ps_merge_prms->ps_results_tr;
1013
66.2k
    ps_results_bl = ps_merge_prms->ps_results_bl;
1014
66.2k
    ps_results_br = ps_merge_prms->ps_results_br;
1015
1016
66.2k
    if(ps_merge_prms->e_quality_preset == ME_XTREME_SPEED)
1017
12.5k
    {
1018
12.5k
        i4_part_mask &= ~ENABLE_AMP;
1019
12.5k
    }
1020
1021
66.2k
    if(ps_merge_prms->e_quality_preset == ME_XTREME_SPEED_25)
1022
11.2k
    {
1023
11.2k
        i4_part_mask &= ~ENABLE_AMP;
1024
1025
11.2k
        i4_part_mask &= ~ENABLE_SMP;
1026
11.2k
    }
1027
1028
66.2k
    ps_merge_prms->i4_num_pred_dir_actual = 0;
1029
1030
    /*************************************************************************/
1031
    /* The logic for High speed CU merge goes as follows:                    */
1032
    /*                                                                       */
1033
    /* 1. Early exit with CU_SPLIT if sum of best partitions of children CUs */
1034
    /*    exceed 7                                                           */
1035
    /* 2. Early exit with CU_MERGE if mvs of best partitions of children CUs */
1036
    /*    are identical                                                      */
1037
    /* 3. Find the all unique mvs of best partitions of children CUs and     */
1038
    /*    evaluate partial SATDs (all 17 partitions) for each unique mv. If  */
1039
    /*    best parent cost is lower than sum of the best children costs      */
1040
    /*    return CU_MERGE after seeding the best results else return CU_SPLIT*/
1041
    /*                                                                       */
1042
    /*************************************************************************/
1043
1044
    /* Count the number of best partitions in child CUs, early exit if > 7 */
1045
66.2k
    if((ps_merge_prms->e_quality_preset != ME_PRISTINE_QUALITY) ||
1046
10.6k
       (CU_32x32 == ps_results_merge->e_cu_size))
1047
65.8k
    {
1048
65.8k
        S32 num_parts_in_32x32 = 0;
1049
65.8k
        WORD32 i4_part_type;
1050
1051
65.8k
        if(ps_results_tl->u1_split_flag)
1052
16.5k
        {
1053
16.5k
            num_parts_in_32x32 += 4;
1054
1055
16.5k
#define COST_INTERCHANGE 0
1056
16.5k
            i4_cost_children = ps_merge_prms->ps_8x8_cu_results[0].ps_best_results->i4_tot_cost +
1057
16.5k
                               ps_merge_prms->ps_8x8_cu_results[1].ps_best_results->i4_tot_cost +
1058
16.5k
                               ps_merge_prms->ps_8x8_cu_results[2].ps_best_results->i4_tot_cost +
1059
16.5k
                               ps_merge_prms->ps_8x8_cu_results[3].ps_best_results->i4_tot_cost;
1060
16.5k
        }
1061
49.3k
        else
1062
49.3k
        {
1063
49.3k
            i4_part_type = ps_results_tl->ps_cu_results->ps_best_results[0].u1_part_type;
1064
49.3k
            num_parts_in_32x32 += gau1_num_parts_in_part_type[i4_part_type];
1065
49.3k
            i4_cost_children = ps_results_tl->ps_cu_results->ps_best_results[0].i4_tot_cost;
1066
49.3k
        }
1067
1068
65.8k
        if(ps_results_tr->u1_split_flag)
1069
14.4k
        {
1070
14.4k
            num_parts_in_32x32 += 4;
1071
1072
14.4k
            i4_cost_children += ps_merge_prms->ps_8x8_cu_results[4].ps_best_results->i4_tot_cost +
1073
14.4k
                                ps_merge_prms->ps_8x8_cu_results[5].ps_best_results->i4_tot_cost +
1074
14.4k
                                ps_merge_prms->ps_8x8_cu_results[6].ps_best_results->i4_tot_cost +
1075
14.4k
                                ps_merge_prms->ps_8x8_cu_results[7].ps_best_results->i4_tot_cost;
1076
14.4k
        }
1077
51.4k
        else
1078
51.4k
        {
1079
51.4k
            i4_part_type = ps_results_tr->ps_cu_results->ps_best_results[0].u1_part_type;
1080
51.4k
            num_parts_in_32x32 += gau1_num_parts_in_part_type[i4_part_type];
1081
51.4k
            i4_cost_children += ps_results_tr->ps_cu_results->ps_best_results[0].i4_tot_cost;
1082
51.4k
        }
1083
1084
65.8k
        if(ps_results_bl->u1_split_flag)
1085
11.6k
        {
1086
11.6k
            num_parts_in_32x32 += 4;
1087
1088
11.6k
            i4_cost_children += ps_merge_prms->ps_8x8_cu_results[8].ps_best_results->i4_tot_cost +
1089
11.6k
                                ps_merge_prms->ps_8x8_cu_results[9].ps_best_results->i4_tot_cost +
1090
11.6k
                                ps_merge_prms->ps_8x8_cu_results[10].ps_best_results->i4_tot_cost +
1091
11.6k
                                ps_merge_prms->ps_8x8_cu_results[11].ps_best_results->i4_tot_cost;
1092
11.6k
        }
1093
54.2k
        else
1094
54.2k
        {
1095
54.2k
            i4_part_type = ps_results_bl->ps_cu_results->ps_best_results[0].u1_part_type;
1096
54.2k
            num_parts_in_32x32 += gau1_num_parts_in_part_type[i4_part_type];
1097
54.2k
            i4_cost_children += ps_results_bl->ps_cu_results->ps_best_results[0].i4_tot_cost;
1098
54.2k
        }
1099
1100
65.8k
        if(ps_results_br->u1_split_flag)
1101
9.31k
        {
1102
9.31k
            num_parts_in_32x32 += 4;
1103
1104
9.31k
            i4_cost_children += ps_merge_prms->ps_8x8_cu_results[12].ps_best_results->i4_tot_cost +
1105
9.31k
                                ps_merge_prms->ps_8x8_cu_results[13].ps_best_results->i4_tot_cost +
1106
9.31k
                                ps_merge_prms->ps_8x8_cu_results[14].ps_best_results->i4_tot_cost +
1107
9.31k
                                ps_merge_prms->ps_8x8_cu_results[15].ps_best_results->i4_tot_cost;
1108
9.31k
        }
1109
56.5k
        else
1110
56.5k
        {
1111
56.5k
            i4_part_type = ps_results_br->ps_cu_results->ps_best_results[0].u1_part_type;
1112
56.5k
            num_parts_in_32x32 += gau1_num_parts_in_part_type[i4_part_type];
1113
56.5k
            i4_cost_children += ps_results_br->ps_cu_results->ps_best_results[0].i4_tot_cost;
1114
56.5k
        }
1115
1116
65.8k
        if((num_parts_in_32x32 > 7) && (ps_merge_prms->e_quality_preset != ME_PRISTINE_QUALITY))
1117
12.3k
        {
1118
12.3k
            return CU_SPLIT;
1119
12.3k
        }
1120
1121
53.5k
        if((num_parts_in_32x32 > MAX_NUM_CONSTITUENT_MVS_TO_ENABLE_32MERGE_IN_XS25) &&
1122
13.2k
           (ps_merge_prms->e_quality_preset == ME_XTREME_SPEED_25))
1123
925
        {
1124
925
            return CU_SPLIT;
1125
925
        }
1126
53.5k
    }
1127
1128
    /* Accumulate intra percentage before merge for early CU_SPLIT decision     */
1129
    /* Note : Each intra part represent a NxN unit of the children CUs          */
1130
    /* This is essentially 1/16th of the CUsize under consideration for merge   */
1131
53.0k
    if(ME_PRISTINE_QUALITY == ps_merge_prms->e_quality_preset)
1132
10.6k
    {
1133
10.6k
        if(CU_64x64 == ps_results_merge->e_cu_size)
1134
394
        {
1135
394
            i4_intra_parts =
1136
394
                (!ps_merge_prms->ps_cluster_info->ps_cu_tree_root->u1_inter_eval_enable)
1137
394
                    ? 16
1138
394
                    : ps_merge_prms->ps_cluster_info->ps_cu_tree_root->u1_intra_eval_enable;
1139
394
        }
1140
10.2k
        else
1141
10.2k
        {
1142
10.2k
            switch((ps_results_merge->u1_x_off >> 5) + ((ps_results_merge->u1_y_off >> 4)))
1143
10.2k
            {
1144
2.59k
            case 0:
1145
2.59k
            {
1146
2.59k
                i4_intra_parts = (!ps_merge_prms->ps_cluster_info->ps_cu_tree_root->ps_child_node_tl
1147
2.59k
                                       ->u1_inter_eval_enable)
1148
2.59k
                                     ? 16
1149
2.59k
                                     : (ps_merge_prms->ps_cluster_info->ps_cu_tree_root
1150
2.59k
                                            ->ps_child_node_tl->u1_intra_eval_enable);
1151
1152
2.59k
                break;
1153
0
            }
1154
2.54k
            case 1:
1155
2.54k
            {
1156
2.54k
                i4_intra_parts = (!ps_merge_prms->ps_cluster_info->ps_cu_tree_root->ps_child_node_tr
1157
2.54k
                                       ->u1_inter_eval_enable)
1158
2.54k
                                     ? 16
1159
2.54k
                                     : (ps_merge_prms->ps_cluster_info->ps_cu_tree_root
1160
2.54k
                                            ->ps_child_node_tr->u1_intra_eval_enable);
1161
1162
2.54k
                break;
1163
0
            }
1164
2.58k
            case 2:
1165
2.58k
            {
1166
2.58k
                i4_intra_parts = (!ps_merge_prms->ps_cluster_info->ps_cu_tree_root->ps_child_node_bl
1167
2.58k
                                       ->u1_inter_eval_enable)
1168
2.58k
                                     ? 16
1169
2.58k
                                     : (ps_merge_prms->ps_cluster_info->ps_cu_tree_root
1170
2.58k
                                            ->ps_child_node_bl->u1_intra_eval_enable);
1171
1172
2.58k
                break;
1173
0
            }
1174
2.55k
            case 3:
1175
2.55k
            {
1176
2.55k
                i4_intra_parts = (!ps_merge_prms->ps_cluster_info->ps_cu_tree_root->ps_child_node_br
1177
2.55k
                                       ->u1_inter_eval_enable)
1178
2.55k
                                     ? 16
1179
2.55k
                                     : (ps_merge_prms->ps_cluster_info->ps_cu_tree_root
1180
2.55k
                                            ->ps_child_node_br->u1_intra_eval_enable);
1181
1182
2.55k
                break;
1183
0
            }
1184
10.2k
            }
1185
10.2k
        }
1186
10.6k
    }
1187
42.3k
    else
1188
42.3k
    {
1189
211k
        for(i = 0; i < 4; i++)
1190
169k
        {
1191
169k
            search_results_t *ps_results =
1192
169k
                (i == 0) ? ps_results_tl
1193
169k
                         : ((i == 1) ? ps_results_tr : ((i == 2) ? ps_results_bl : ps_results_br));
1194
1195
169k
            part_type_results_t *ps_best_res = &ps_results->ps_cu_results->ps_best_results[0];
1196
1197
169k
            if(ps_results->u1_split_flag)
1198
3.19k
            {
1199
3.19k
                U08 u1_x_off = ps_results->u1_x_off;
1200
3.19k
                U08 u1_y_off = ps_results->u1_y_off;
1201
3.19k
                U08 u1_8x8_zscan_id = gau1_ctb_raster_to_zscan[(u1_x_off >> 2) + (u1_y_off << 2)] >>
1202
3.19k
                                      2;
1203
1204
                /* Special case to handle 8x8 CUs when 16x16 is split */
1205
3.19k
                ASSERT(ps_results->e_cu_size == CU_16x16);
1206
1207
3.19k
                ps_best_res = &ps_ctxt->as_cu8x8_results[u1_8x8_zscan_id].ps_best_results[0];
1208
1209
3.19k
                if(ps_best_res->as_pu_results[0].pu.b1_intra_flag)
1210
1.61k
                    i4_intra_parts += 1;
1211
1212
3.19k
                ps_best_res = &ps_ctxt->as_cu8x8_results[u1_8x8_zscan_id + 1].ps_best_results[0];
1213
1214
3.19k
                if(ps_best_res->as_pu_results[0].pu.b1_intra_flag)
1215
1.58k
                    i4_intra_parts += 1;
1216
1217
3.19k
                ps_best_res = &ps_ctxt->as_cu8x8_results[u1_8x8_zscan_id + 2].ps_best_results[0];
1218
1219
3.19k
                if(ps_best_res->as_pu_results[0].pu.b1_intra_flag)
1220
1.40k
                    i4_intra_parts += 1;
1221
1222
3.19k
                ps_best_res = &ps_ctxt->as_cu8x8_results[u1_8x8_zscan_id + 3].ps_best_results[0];
1223
1224
3.19k
                if(ps_best_res->as_pu_results[0].pu.b1_intra_flag)
1225
1.43k
                    i4_intra_parts += 1;
1226
3.19k
            }
1227
166k
            else if(ps_best_res[0].as_pu_results[0].pu.b1_intra_flag)
1228
8.28k
            {
1229
8.28k
                i4_intra_parts += 4;
1230
8.28k
            }
1231
169k
        }
1232
42.3k
    }
1233
1234
    /* Determine the max intra CU size indicated by IPE */
1235
53.0k
    intra_cu_size = CU_64x64;
1236
53.0k
    if(ps_cur_ipe_ctb->u1_split_flag)
1237
46.2k
    {
1238
46.2k
        intra_cu_size = CU_32x32;
1239
46.2k
        if(ps_cur_ipe_ctb->as_intra32_analyse[i4_32x32_id].b1_split_flag)
1240
25.7k
        {
1241
25.7k
            intra_cu_size = CU_16x16;
1242
25.7k
        }
1243
46.2k
    }
1244
1245
53.0k
    if(((i4_intra_parts > CU_MERGE_MAX_INTRA_PARTS) &&
1246
2.26k
        (intra_cu_size < ps_results_merge->e_cu_size) &&
1247
1.24k
        (ME_PRISTINE_QUALITY != ps_merge_prms->e_quality_preset)) ||
1248
51.7k
       (i4_intra_parts == 16))
1249
2.16k
    {
1250
2.16k
        S32 i4_merge_outcome;
1251
1252
2.16k
        i4_merge_outcome = (CU_32x32 == ps_results_merge->e_cu_size)
1253
2.16k
                               ? (!ps_cur_ipe_ctb->as_intra32_analyse[i4_32x32_id].b1_split_flag &&
1254
776
                                  ps_cur_ipe_ctb->as_intra32_analyse[i4_32x32_id].b1_valid_cu)
1255
2.16k
                               : (!ps_cur_ipe_ctb->u1_split_flag);
1256
1257
2.16k
        i4_merge_outcome = i4_merge_outcome ||
1258
1.24k
                           (ME_PRISTINE_QUALITY == ps_merge_prms->e_quality_preset);
1259
1260
2.16k
        i4_merge_outcome = i4_merge_outcome &&
1261
921
                           !(ps_subpel_prms->u1_is_cu_noisy && DISABLE_INTRA_WHEN_NOISY);
1262
1263
2.16k
        if(i4_merge_outcome)
1264
921
        {
1265
921
            inter_cu_results_t *ps_cu_results = ps_results_merge->ps_cu_results;
1266
921
            part_type_results_t *ps_best_result = ps_cu_results->ps_best_results;
1267
921
            pu_t *ps_pu = &ps_best_result->as_pu_results->pu;
1268
1269
921
            ps_cu_results->u1_num_best_results = 1;
1270
921
            ps_cu_results->u1_cu_size = ps_results_merge->e_cu_size;
1271
921
            ps_cu_results->u1_x_off = u1_x_off;
1272
921
            ps_cu_results->u1_y_off = u1_y_off;
1273
1274
921
            ps_best_result->u1_part_type = PRT_2Nx2N;
1275
921
            ps_best_result->ai4_tu_split_flag[0] = 0;
1276
921
            ps_best_result->ai4_tu_split_flag[1] = 0;
1277
921
            ps_best_result->ai4_tu_split_flag[2] = 0;
1278
921
            ps_best_result->ai4_tu_split_flag[3] = 0;
1279
921
            ps_best_result->i4_tot_cost =
1280
921
                (CU_64x64 == ps_results_merge->e_cu_size)
1281
921
                    ? ps_cur_ipe_ctb->i4_best64x64_intra_cost
1282
921
                    : ps_cur_ipe_ctb->ai4_best32x32_intra_cost[i4_32x32_id];
1283
1284
921
            ps_pu->b1_intra_flag = 1;
1285
921
            ps_pu->b4_pos_x = u1_x_off >> 2;
1286
921
            ps_pu->b4_pos_y = u1_y_off >> 2;
1287
921
            ps_pu->b4_wd = (1 << (ps_results_merge->e_cu_size + 1)) - 1;
1288
921
            ps_pu->b4_ht = ps_pu->b4_wd;
1289
921
            ps_pu->mv.i1_l0_ref_idx = -1;
1290
921
            ps_pu->mv.i1_l1_ref_idx = -1;
1291
921
            ps_pu->mv.s_l0_mv.i2_mvx = INTRA_MV;
1292
921
            ps_pu->mv.s_l0_mv.i2_mvy = INTRA_MV;
1293
921
            ps_pu->mv.s_l1_mv.i2_mvx = INTRA_MV;
1294
921
            ps_pu->mv.s_l1_mv.i2_mvy = INTRA_MV;
1295
1296
921
            return CU_MERGED;
1297
921
        }
1298
1.24k
        else
1299
1.24k
        {
1300
1.24k
            return CU_SPLIT;
1301
1.24k
        }
1302
2.16k
    }
1303
1304
50.8k
    if(i4_intra_parts)
1305
5.96k
    {
1306
5.96k
        i4_part_mask = ENABLE_2Nx2N;
1307
5.96k
    }
1308
1309
50.8k
    ps_results_merge->u1_num_active_ref = (ps_ctxt->s_frm_prms.bidir_enabled) ? 2 : 1;
1310
1311
50.8k
    hme_reset_search_results(ps_results_merge, i4_part_mask, MV_RES_QPEL);
1312
1313
50.8k
    ps_results_merge->u1_num_active_ref = ps_merge_prms->i4_num_ref;
1314
50.8k
    ps_merge_prms->i4_num_pred_dir_actual = 0;
1315
1316
50.8k
    if(ps_subpel_prms->u1_is_cu_noisy && ps_merge_prms->ps_inter_ctb_prms->i4_alpha_stim_multiplier)
1317
0
    {
1318
0
        S32 ai4_valid_part_ids[TOT_NUM_PARTS + 1];
1319
0
        S32 i4_num_valid_parts;
1320
0
        S32 i4_sigma_array_offset;
1321
1322
0
        i4_num_valid_parts = hme_create_valid_part_ids(i4_part_mask, ai4_valid_part_ids);
1323
1324
        /*********************************************************************************************************************************************/
1325
        /* i4_sigma_array_offset : takes care of pointing to the appropriate 4x4 block's sigmaX and sigmaX-squared value in a CTB out of 256 values  */
1326
        /* Logic is x/4 + ((y/4) x 16) : every 4 pixel increase in x equals one 4x4 block increment, every 4 pixel increase in y equals 16 4x4 block */
1327
        /* increment as there will be 256 4x4 blocks in a CTB                                                                                        */
1328
        /*********************************************************************************************************************************************/
1329
0
        i4_sigma_array_offset = (ps_merge_prms->ps_results_merge->u1_x_off / 4) +
1330
0
                                (ps_merge_prms->ps_results_merge->u1_y_off * 4);
1331
1332
0
        for(i = 0; i < i4_num_valid_parts; i++)
1333
0
        {
1334
0
            S32 i4_part_id = ai4_valid_part_ids[i];
1335
1336
0
            hme_compute_final_sigma_of_pu_from_base_blocks(
1337
0
                ps_ctxt->au4_4x4_src_sigmaX + i4_sigma_array_offset,
1338
0
                ps_ctxt->au4_4x4_src_sigmaXSquared + i4_sigma_array_offset,
1339
0
                au8_final_src_sigmaX,
1340
0
                au8_final_src_sigmaXSquared,
1341
0
                (CU_32x32 == ps_results_merge->e_cu_size) ? 32 : 64,
1342
0
                4,
1343
0
                i4_part_id,
1344
0
                16);
1345
0
        }
1346
1347
0
        ps_merge_prms->ps_inter_ctb_prms->pu8_part_src_sigmaX = au8_final_src_sigmaX;
1348
0
        ps_merge_prms->ps_inter_ctb_prms->pu8_part_src_sigmaXSquared = au8_final_src_sigmaXSquared;
1349
0
    }
1350
1351
    /*************************************************************************/
1352
    /* Loop through all ref idx and pick the merge candts and refine based   */
1353
    /* on the active partitions. At this stage num ref will be 1 or 2        */
1354
    /*************************************************************************/
1355
111k
    for(i4_search_idx = 0; i4_search_idx < ps_merge_prms->i4_num_ref; i4_search_idx++)
1356
60.6k
    {
1357
60.6k
        S32 i4_cands;
1358
60.6k
        U08 u1_pred_dir = 0;
1359
1360
60.6k
        if((2 == ps_merge_prms->i4_num_ref) || (!ps_ctxt->s_frm_prms.bidir_enabled))
1361
60.5k
        {
1362
60.5k
            u1_pred_dir = i4_search_idx;
1363
60.5k
        }
1364
103
        else if(ps_ctxt->s_frm_prms.u1_num_active_ref_l0 == 0)
1365
103
        {
1366
103
            u1_pred_dir = 1;
1367
103
        }
1368
0
        else if(ps_ctxt->s_frm_prms.u1_num_active_ref_l1 == 0)
1369
0
        {
1370
0
            u1_pred_dir = 0;
1371
0
        }
1372
0
        else
1373
0
        {
1374
0
            ASSERT(0);
1375
0
        }
1376
1377
        /* call the function to pick and evaluate the merge candts, given */
1378
        /* a ref id and a part mask.                                      */
1379
60.6k
        i4_cands = hme_pick_eval_merge_candts(
1380
60.6k
            ps_merge_prms,
1381
60.6k
            ps_subpel_prms,
1382
60.6k
            u1_pred_dir,
1383
60.6k
            i4_best_part_type,
1384
60.6k
            is_vert,
1385
60.6k
            ps_wt_inp_prms,
1386
60.6k
            i4_frm_qstep,
1387
60.6k
            ps_cmn_utils_optimised_function_list,
1388
60.6k
            ps_me_optimised_function_list);
1389
1390
60.6k
        if(i4_cands)
1391
59.8k
        {
1392
59.8k
            ps_merge_prms->au1_pred_dir_searched[ps_merge_prms->i4_num_pred_dir_actual] =
1393
59.8k
                u1_pred_dir;
1394
59.8k
            ps_merge_prms->i4_num_pred_dir_actual++;
1395
59.8k
        }
1396
1397
60.6k
        i4_num_merge_cands_evaluated += i4_cands;
1398
60.6k
    }
1399
1400
    /* Call the decide_part_types function here */
1401
    /* Populate the new PU struct with the results post subpel refinement*/
1402
50.8k
    if(i4_num_merge_cands_evaluated)
1403
50.8k
    {
1404
50.8k
        inter_cu_results_t *ps_cu_results = ps_results_merge->ps_cu_results;
1405
1406
50.8k
        hme_reset_wkg_mem(&ps_ctxt->s_buf_mgr);
1407
1408
50.8k
        ps_merge_prms->ps_inter_ctb_prms->i4_ctb_x_off = ps_merge_prms->i4_ctb_x_off;
1409
50.8k
        ps_merge_prms->ps_inter_ctb_prms->i4_ctb_y_off = ps_merge_prms->i4_ctb_y_off;
1410
1411
50.8k
        hme_populate_pus(
1412
50.8k
            ps_thrd_ctxt,
1413
50.8k
            ps_ctxt,
1414
50.8k
            ps_subpel_prms,
1415
50.8k
            ps_results_merge,
1416
50.8k
            ps_cu_results,
1417
50.8k
            ps_pu_results,
1418
50.8k
            ps_pu_result,
1419
50.8k
            ps_merge_prms->ps_inter_ctb_prms,
1420
50.8k
            &ps_ctxt->s_wt_pred,
1421
50.8k
            ps_merge_prms->ps_layer_ctxt,
1422
50.8k
            ps_merge_prms->au1_pred_dir_searched,
1423
50.8k
            ps_merge_prms->i4_num_pred_dir_actual);
1424
1425
50.8k
        ps_cu_results->i4_inp_offset = (ps_cu_results->u1_x_off) + (ps_cu_results->u1_y_off * 64);
1426
1427
50.8k
        hme_decide_part_types(
1428
50.8k
            ps_cu_results,
1429
50.8k
            ps_pu_results,
1430
50.8k
            ps_merge_prms->ps_inter_ctb_prms,
1431
50.8k
            ps_ctxt,
1432
50.8k
            ps_cmn_utils_optimised_function_list,
1433
50.8k
            ps_me_optimised_function_list
1434
1435
50.8k
        );
1436
1437
        /*****************************************************************/
1438
        /* INSERT INTRA RESULTS AT 32x32/64x64 LEVEL.                    */
1439
        /*****************************************************************/
1440
50.8k
#if DISABLE_INTRA_IN_BPICS
1441
50.8k
        if(1 != ((ME_XTREME_SPEED_25 == ps_merge_prms->e_quality_preset) &&
1442
6.70k
                 (ps_ctxt->s_frm_prms.i4_temporal_layer_id > TEMPORAL_LAYER_DISABLE)))
1443
50.6k
#endif
1444
50.6k
        {
1445
50.6k
            if(!(DISABLE_INTRA_WHEN_NOISY && ps_merge_prms->ps_inter_ctb_prms->u1_is_cu_noisy))
1446
50.6k
            {
1447
50.6k
                hme_insert_intra_nodes_post_bipred(
1448
50.6k
                    ps_cu_results, ps_cur_ipe_ctb, ps_ctxt->frm_qstep);
1449
50.6k
            }
1450
50.6k
        }
1451
50.8k
    }
1452
0
    else
1453
0
    {
1454
0
        return CU_SPLIT;
1455
0
    }
1456
1457
    /* We check the best result of ref idx 0 and compare for parent vs child */
1458
50.8k
    if((ps_merge_prms->e_quality_preset != ME_PRISTINE_QUALITY) ||
1459
10.6k
       (CU_32x32 == ps_results_merge->e_cu_size))
1460
50.4k
    {
1461
50.4k
        i4_cost_parent = ps_results_merge->ps_cu_results->ps_best_results[0].i4_tot_cost;
1462
        /*********************************************************************/
1463
        /* Add the cost of signaling the CU tree bits.                       */
1464
        /* Assuming parent is not split, then we signal 1 bit for this parent*/
1465
        /* CU. If split, then 1 bit for parent CU + 4 bits for each child CU */
1466
        /* So, 4*lambda is extra for children cost. :Lokesh                  */
1467
        /*********************************************************************/
1468
50.4k
        {
1469
50.4k
            pred_ctxt_t *ps_pred_ctxt = &ps_results_merge->as_pred_ctxt[0];
1470
1471
50.4k
            i4_cost_children += ((4 * ps_pred_ctxt->lambda) >> (ps_pred_ctxt->lambda_q_shift));
1472
50.4k
        }
1473
1474
50.4k
        if(i4_cost_parent < i4_cost_children)
1475
34.7k
        {
1476
34.7k
            return CU_MERGED;
1477
34.7k
        }
1478
1479
15.7k
        return CU_SPLIT;
1480
50.4k
    }
1481
394
    else
1482
394
    {
1483
394
        return CU_MERGED;
1484
394
    }
1485
50.8k
}
1486
1487
#define COPY_SEARCH_RESULT(ps_mv, pi1_ref_idx, ps_search_node, shift)                              \
1488
3.02M
    {                                                                                              \
1489
3.02M
        (ps_mv)->i2_mv_x = (ps_search_node)->s_mv.i2_mvx >> (shift);                               \
1490
3.02M
        (ps_mv)->i2_mv_y = (ps_search_node)->s_mv.i2_mvy >> (shift);                               \
1491
3.02M
        *(pi1_ref_idx) = (ps_search_node)->i1_ref_idx;                                             \
1492
3.02M
    }
1493
1494
/**
1495
********************************************************************************
1496
*  @fn     hme_update_mv_bank_noencode(search_results_t *ps_search_results,
1497
*                               layer_mv_t *ps_layer_mv,
1498
*                               S32 i4_search_blk_x,
1499
*                               S32 i4_search_blk_y,
1500
*                               mvbank_update_prms_t *ps_prms)
1501
*
1502
*  @brief  Updates the mv bank in case there is no further encodign to be done
1503
*
1504
*  @param[in]  ps_search_results: contains results for the block just searched
1505
*
1506
*  @param[in,out]  ps_layer_mv : Has pointer to mv bank amongst other things
1507
*
1508
*  @param[in] i4_search_blk_x  : col num of blk being searched
1509
*
1510
*  @param[in] i4_search_blk_y : row num of blk being searched
1511
*
1512
*  @param[in] ps_prms : contains certain parameters which govern how updatedone
1513
*
1514
*  @return None
1515
********************************************************************************
1516
*/
1517
1518
void hme_update_mv_bank_noencode(
1519
    search_results_t *ps_search_results,
1520
    layer_mv_t *ps_layer_mv,
1521
    S32 i4_search_blk_x,
1522
    S32 i4_search_blk_y,
1523
    mvbank_update_prms_t *ps_prms)
1524
22.0k
{
1525
22.0k
    hme_mv_t *ps_mv;
1526
22.0k
    hme_mv_t *ps_mv1, *ps_mv2, *ps_mv3, *ps_mv4;
1527
22.0k
    S08 *pi1_ref_idx, *pi1_ref_idx1, *pi1_ref_idx2, *pi1_ref_idx3, *pi1_ref_idx4;
1528
22.0k
    S32 i4_blk_x, i4_blk_y, i4_offset;
1529
22.0k
    S32 i4_j, i4_ref_id;
1530
22.0k
    search_node_t *ps_search_node;
1531
22.0k
    search_node_t *ps_search_node_8x8, *ps_search_node_4x4_1;
1532
22.0k
    search_node_t *ps_search_node_4x4_2, *ps_search_node_4x4_3;
1533
22.0k
    search_node_t *ps_search_node_4x4_4;
1534
1535
22.0k
    i4_blk_x = i4_search_blk_x << ps_prms->i4_shift;
1536
22.0k
    i4_blk_y = i4_search_blk_y << ps_prms->i4_shift;
1537
22.0k
    i4_offset = i4_blk_x + i4_blk_y * ps_layer_mv->i4_num_blks_per_row;
1538
1539
22.0k
    i4_offset *= ps_layer_mv->i4_num_mvs_per_blk;
1540
1541
    /* Identify the correct offset in the mvbank and the reference id buf */
1542
22.0k
    ps_mv = ps_layer_mv->ps_mv + i4_offset;
1543
22.0k
    pi1_ref_idx = ps_layer_mv->pi1_ref_idx + i4_offset;
1544
1545
    /*************************************************************************/
1546
    /* Supposing we store the mvs in the same blk size as we searched (e.g.  */
1547
    /* we searched 8x8 blks and store results for 8x8 blks), then we can     */
1548
    /* do a straightforward single update of results. This will have a 1-1   */
1549
    /* correspondence.                                                       */
1550
    /*************************************************************************/
1551
22.0k
    if(ps_layer_mv->e_blk_size == ps_prms->e_search_blk_size)
1552
7.98k
    {
1553
18.9k
        for(i4_ref_id = 0; i4_ref_id < (S32)ps_prms->i4_num_ref; i4_ref_id++)
1554
10.9k
        {
1555
10.9k
            ps_search_node = ps_search_results->aps_part_results[i4_ref_id][PART_ID_2Nx2N];
1556
21.8k
            for(i4_j = 0; i4_j < ps_layer_mv->i4_num_mvs_per_ref; i4_j++)
1557
10.9k
            {
1558
10.9k
                COPY_SEARCH_RESULT(ps_mv, pi1_ref_idx, ps_search_node, 0);
1559
10.9k
                ps_mv++;
1560
10.9k
                pi1_ref_idx++;
1561
10.9k
                ps_search_node++;
1562
10.9k
            }
1563
10.9k
        }
1564
7.98k
        return;
1565
7.98k
    }
1566
1567
    /*************************************************************************/
1568
    /* Case where search blk size is 8x8, but we update 4x4 results. In this */
1569
    /* case, we need to have NxN partitions enabled in search.               */
1570
    /* Further, we update on a 1-1 basis the 4x4 blk mvs from the respective */
1571
    /* NxN partition. We also update the 8x8 result into each of the 4x4 bank*/
1572
    /*************************************************************************/
1573
14.0k
    ASSERT(ps_layer_mv->e_blk_size == BLK_4x4);
1574
14.0k
    ASSERT(ps_prms->e_search_blk_size == BLK_8x8);
1575
14.0k
    ASSERT((ps_search_results->i4_part_mask & (ENABLE_NxN)) == (ENABLE_NxN));
1576
1577
    /*************************************************************************/
1578
    /* For every 4x4 blk we store corresponding 4x4 results and 1 8x8 result */
1579
    /* hence the below check.                                                */
1580
    /*************************************************************************/
1581
14.0k
    ASSERT(ps_layer_mv->i4_num_mvs_per_ref <= ps_search_results->u1_num_results_per_part + 1);
1582
1583
14.0k
    ps_mv1 = ps_mv;
1584
14.0k
    ps_mv2 = ps_mv1 + ps_layer_mv->i4_num_mvs_per_blk;
1585
14.0k
    ps_mv3 = ps_mv1 + (ps_layer_mv->i4_num_mvs_per_row);
1586
14.0k
    ps_mv4 = ps_mv3 + (ps_layer_mv->i4_num_mvs_per_blk);
1587
14.0k
    pi1_ref_idx1 = pi1_ref_idx;
1588
14.0k
    pi1_ref_idx2 = pi1_ref_idx1 + ps_layer_mv->i4_num_mvs_per_blk;
1589
14.0k
    pi1_ref_idx3 = pi1_ref_idx1 + (ps_layer_mv->i4_num_mvs_per_row);
1590
14.0k
    pi1_ref_idx4 = pi1_ref_idx3 + (ps_layer_mv->i4_num_mvs_per_blk);
1591
1592
38.6k
    for(i4_ref_id = 0; i4_ref_id < (S32)ps_search_results->u1_num_active_ref; i4_ref_id++)
1593
24.6k
    {
1594
24.6k
        ps_search_node_8x8 = ps_search_results->aps_part_results[i4_ref_id][PART_ID_2Nx2N];
1595
1596
24.6k
        ps_search_node_4x4_1 = ps_search_results->aps_part_results[i4_ref_id][PART_ID_NxN_TL];
1597
1598
24.6k
        ps_search_node_4x4_2 = ps_search_results->aps_part_results[i4_ref_id][PART_ID_NxN_TR];
1599
1600
24.6k
        ps_search_node_4x4_3 = ps_search_results->aps_part_results[i4_ref_id][PART_ID_NxN_BL];
1601
1602
24.6k
        ps_search_node_4x4_4 = ps_search_results->aps_part_results[i4_ref_id][PART_ID_NxN_BR];
1603
1604
24.6k
        COPY_SEARCH_RESULT(ps_mv1, pi1_ref_idx1, ps_search_node_4x4_1, 0);
1605
24.6k
        ps_mv1++;
1606
24.6k
        pi1_ref_idx1++;
1607
24.6k
        ps_search_node_4x4_1++;
1608
24.6k
        COPY_SEARCH_RESULT(ps_mv2, pi1_ref_idx2, ps_search_node_4x4_2, 0);
1609
24.6k
        ps_mv2++;
1610
24.6k
        pi1_ref_idx2++;
1611
24.6k
        ps_search_node_4x4_2++;
1612
24.6k
        COPY_SEARCH_RESULT(ps_mv3, pi1_ref_idx3, ps_search_node_4x4_3, 0);
1613
24.6k
        ps_mv3++;
1614
24.6k
        pi1_ref_idx3++;
1615
24.6k
        ps_search_node_4x4_3++;
1616
24.6k
        COPY_SEARCH_RESULT(ps_mv4, pi1_ref_idx4, ps_search_node_4x4_4, 0);
1617
24.6k
        ps_mv4++;
1618
24.6k
        pi1_ref_idx4++;
1619
24.6k
        ps_search_node_4x4_4++;
1620
1621
24.6k
        if(ps_layer_mv->i4_num_mvs_per_ref > 1)
1622
20.8k
        {
1623
20.8k
            COPY_SEARCH_RESULT(ps_mv1, pi1_ref_idx1, ps_search_node_8x8, 0);
1624
20.8k
            ps_mv1++;
1625
20.8k
            pi1_ref_idx1++;
1626
20.8k
            COPY_SEARCH_RESULT(ps_mv2, pi1_ref_idx2, ps_search_node_8x8, 0);
1627
20.8k
            ps_mv2++;
1628
20.8k
            pi1_ref_idx2++;
1629
20.8k
            COPY_SEARCH_RESULT(ps_mv3, pi1_ref_idx3, ps_search_node_8x8, 0);
1630
20.8k
            ps_mv3++;
1631
20.8k
            pi1_ref_idx3++;
1632
20.8k
            COPY_SEARCH_RESULT(ps_mv4, pi1_ref_idx4, ps_search_node_8x8, 0);
1633
20.8k
            ps_mv4++;
1634
20.8k
            pi1_ref_idx4++;
1635
20.8k
        }
1636
1637
24.6k
        for(i4_j = 2; i4_j < ps_layer_mv->i4_num_mvs_per_ref; i4_j++)
1638
0
        {
1639
0
            COPY_SEARCH_RESULT(ps_mv1, pi1_ref_idx1, ps_search_node_4x4_1, 0);
1640
0
            ps_mv1++;
1641
0
            pi1_ref_idx1++;
1642
0
            ps_search_node_4x4_1++;
1643
0
            COPY_SEARCH_RESULT(ps_mv2, pi1_ref_idx2, ps_search_node_4x4_2, 0);
1644
0
            ps_mv2++;
1645
0
            pi1_ref_idx2++;
1646
0
            ps_search_node_4x4_2++;
1647
0
            COPY_SEARCH_RESULT(ps_mv3, pi1_ref_idx3, ps_search_node_4x4_3, 0);
1648
0
            ps_mv3++;
1649
0
            pi1_ref_idx3++;
1650
0
            ps_search_node_4x4_3++;
1651
0
            COPY_SEARCH_RESULT(ps_mv4, pi1_ref_idx4, ps_search_node_4x4_4, 0);
1652
0
            ps_mv4++;
1653
0
            pi1_ref_idx4++;
1654
0
            ps_search_node_4x4_4++;
1655
0
        }
1656
24.6k
    }
1657
14.0k
}
1658
1659
void hme_update_mv_bank_encode(
1660
    search_results_t *ps_search_results,
1661
    layer_mv_t *ps_layer_mv,
1662
    S32 i4_search_blk_x,
1663
    S32 i4_search_blk_y,
1664
    mvbank_update_prms_t *ps_prms,
1665
    U08 *pu1_pred_dir_searched,
1666
    S32 i4_num_act_ref_l0)
1667
274k
{
1668
274k
    hme_mv_t *ps_mv;
1669
274k
    hme_mv_t *ps_mv1, *ps_mv2, *ps_mv3, *ps_mv4;
1670
274k
    S08 *pi1_ref_idx, *pi1_ref_idx1, *pi1_ref_idx2, *pi1_ref_idx3, *pi1_ref_idx4;
1671
274k
    S32 i4_blk_x, i4_blk_y, i4_offset;
1672
274k
    S32 j, i, num_parts;
1673
274k
    search_node_t *ps_search_node_tl, *ps_search_node_tr;
1674
274k
    search_node_t *ps_search_node_bl, *ps_search_node_br;
1675
274k
    search_node_t s_zero_mv;
1676
274k
    WORD32 i4_part_type = ps_search_results->ps_cu_results->ps_best_results[0].u1_part_type;
1677
1678
274k
    i4_blk_x = i4_search_blk_x << ps_prms->i4_shift;
1679
274k
    i4_blk_y = i4_search_blk_y << ps_prms->i4_shift;
1680
274k
    i4_offset = i4_blk_x + i4_blk_y * ps_layer_mv->i4_num_blks_per_row;
1681
1682
274k
    i4_offset *= ps_layer_mv->i4_num_mvs_per_blk;
1683
1684
    /* Identify the correct offset in the mvbank and the reference id buf */
1685
274k
    ps_mv = ps_layer_mv->ps_mv + i4_offset;
1686
274k
    pi1_ref_idx = ps_layer_mv->pi1_ref_idx + i4_offset;
1687
1688
274k
    ASSERT(ps_layer_mv->e_blk_size == BLK_8x8);
1689
274k
    ASSERT(ps_prms->e_search_blk_size == BLK_16x16);
1690
1691
    /*************************************************************************/
1692
    /* For every 4x4 blk we store corresponding 4x4 results and 1 8x8 result */
1693
    /* hence the below check.                                                */
1694
    /*************************************************************************/
1695
274k
    ASSERT(ps_layer_mv->i4_num_mvs_per_ref <= ps_search_results->u1_num_best_results);
1696
1697
274k
    ps_mv1 = ps_mv;
1698
274k
    ps_mv2 = ps_mv1 + ps_layer_mv->i4_num_mvs_per_blk;
1699
274k
    ps_mv3 = ps_mv1 + (ps_layer_mv->i4_num_mvs_per_row);
1700
274k
    ps_mv4 = ps_mv3 + (ps_layer_mv->i4_num_mvs_per_blk);
1701
274k
    pi1_ref_idx1 = pi1_ref_idx;
1702
274k
    pi1_ref_idx2 = pi1_ref_idx1 + ps_layer_mv->i4_num_mvs_per_blk;
1703
274k
    pi1_ref_idx3 = pi1_ref_idx1 + (ps_layer_mv->i4_num_mvs_per_row);
1704
274k
    pi1_ref_idx4 = pi1_ref_idx3 + (ps_layer_mv->i4_num_mvs_per_blk);
1705
1706
    /* Initialize zero mv: default mv used for intra mvs */
1707
274k
    s_zero_mv.s_mv.i2_mvx = 0;
1708
274k
    s_zero_mv.s_mv.i2_mvy = 0;
1709
274k
    s_zero_mv.i1_ref_idx = 0;
1710
1711
274k
    if((ps_search_results->e_cu_size == CU_16x16) && (ps_search_results->u1_split_flag) &&
1712
63.6k
       (ps_search_results->i4_part_mask & ENABLE_NxN))
1713
55.8k
    {
1714
55.8k
        i4_part_type = PRT_NxN;
1715
55.8k
    }
1716
1717
598k
    for(i = 0; i < ps_prms->i4_num_ref; i++)
1718
324k
    {
1719
649k
        for(j = 0; j < ps_layer_mv->i4_num_mvs_per_ref; j++)
1720
324k
        {
1721
324k
            WORD32 i4_part_id = ge_part_type_to_part_id[i4_part_type][0];
1722
1723
324k
            num_parts = gau1_num_parts_in_part_type[i4_part_type];
1724
1725
324k
            ps_search_node_tl =
1726
324k
                ps_search_results->aps_part_results[pu1_pred_dir_searched[i]][i4_part_id];
1727
1728
324k
            if(num_parts == 1)
1729
247k
            {
1730
247k
                ps_search_node_tr = ps_search_node_tl;
1731
247k
                ps_search_node_bl = ps_search_node_tl;
1732
247k
                ps_search_node_br = ps_search_node_tl;
1733
247k
            }
1734
77.0k
            else if(num_parts == 2)
1735
11.8k
            {
1736
                /* For vertically oriented partitions, tl, bl pt to same result */
1737
                /* For horizontally oriented partition, tl, tr pt to same result */
1738
                /* This means for AMP, 2 of the 8x8 blks in mv bank have ambiguous */
1739
                /* result, e.g. for 4x16L. Here left 2 8x8 have the 4x16L partition */
1740
                /* and right 2 8x8 have 12x16R partition */
1741
11.8k
                if(gau1_is_vert_part[i4_part_type])
1742
4.26k
                {
1743
4.26k
                    ps_search_node_tr =
1744
4.26k
                        ps_search_results
1745
4.26k
                            ->aps_part_results[pu1_pred_dir_searched[i]][i4_part_id + 1];
1746
4.26k
                    ps_search_node_bl = ps_search_node_tl;
1747
4.26k
                }
1748
7.59k
                else
1749
7.59k
                {
1750
7.59k
                    ps_search_node_tr = ps_search_node_tl;
1751
7.59k
                    ps_search_node_bl =
1752
7.59k
                        ps_search_results
1753
7.59k
                            ->aps_part_results[pu1_pred_dir_searched[i]][i4_part_id + 1];
1754
7.59k
                }
1755
11.8k
                ps_search_node_br =
1756
11.8k
                    ps_search_results->aps_part_results[pu1_pred_dir_searched[i]][i4_part_id + 1];
1757
11.8k
            }
1758
65.1k
            else
1759
65.1k
            {
1760
                /* 4 unique results */
1761
65.1k
                ps_search_node_tr =
1762
65.1k
                    ps_search_results->aps_part_results[pu1_pred_dir_searched[i]][i4_part_id + 1];
1763
65.1k
                ps_search_node_bl =
1764
65.1k
                    ps_search_results->aps_part_results[pu1_pred_dir_searched[i]][i4_part_id + 2];
1765
65.1k
                ps_search_node_br =
1766
65.1k
                    ps_search_results->aps_part_results[pu1_pred_dir_searched[i]][i4_part_id + 3];
1767
65.1k
            }
1768
1769
324k
            if(ps_search_node_tl->s_mv.i2_mvx == INTRA_MV)
1770
0
                ps_search_node_tl++;
1771
324k
            if(ps_search_node_tr->s_mv.i2_mvx == INTRA_MV)
1772
0
                ps_search_node_tr++;
1773
324k
            if(ps_search_node_bl->s_mv.i2_mvx == INTRA_MV)
1774
0
                ps_search_node_bl++;
1775
324k
            if(ps_search_node_br->s_mv.i2_mvx == INTRA_MV)
1776
0
                ps_search_node_br++;
1777
1778
324k
            COPY_SEARCH_RESULT(ps_mv1, pi1_ref_idx1, ps_search_node_tl, 0);
1779
324k
            ps_mv1++;
1780
324k
            pi1_ref_idx1++;
1781
324k
            COPY_SEARCH_RESULT(ps_mv2, pi1_ref_idx2, ps_search_node_tr, 0);
1782
324k
            ps_mv2++;
1783
324k
            pi1_ref_idx2++;
1784
324k
            COPY_SEARCH_RESULT(ps_mv3, pi1_ref_idx3, ps_search_node_bl, 0);
1785
324k
            ps_mv3++;
1786
324k
            pi1_ref_idx3++;
1787
324k
            COPY_SEARCH_RESULT(ps_mv4, pi1_ref_idx4, ps_search_node_br, 0);
1788
324k
            ps_mv4++;
1789
324k
            pi1_ref_idx4++;
1790
1791
324k
            if(ps_prms->i4_num_results_to_store > 1)
1792
0
            {
1793
0
                ps_search_node_tl =
1794
0
                    &ps_search_results->aps_part_results[pu1_pred_dir_searched[i]][i4_part_id][1];
1795
1796
0
                if(num_parts == 1)
1797
0
                {
1798
0
                    ps_search_node_tr = ps_search_node_tl;
1799
0
                    ps_search_node_bl = ps_search_node_tl;
1800
0
                    ps_search_node_br = ps_search_node_tl;
1801
0
                }
1802
0
                else if(num_parts == 2)
1803
0
                {
1804
                    /* For vertically oriented partitions, tl, bl pt to same result */
1805
                    /* For horizontally oriented partition, tl, tr pt to same result */
1806
                    /* This means for AMP, 2 of the 8x8 blks in mv bank have ambiguous */
1807
                    /* result, e.g. for 4x16L. Here left 2 8x8 have the 4x16L partition */
1808
                    /* and right 2 8x8 have 12x16R partition */
1809
0
                    if(gau1_is_vert_part[i4_part_type])
1810
0
                    {
1811
0
                        ps_search_node_tr =
1812
0
                            &ps_search_results
1813
0
                                 ->aps_part_results[pu1_pred_dir_searched[i]][i4_part_id + 1][1];
1814
0
                        ps_search_node_bl = ps_search_node_tl;
1815
0
                    }
1816
0
                    else
1817
0
                    {
1818
0
                        ps_search_node_tr = ps_search_node_tl;
1819
0
                        ps_search_node_bl =
1820
0
                            &ps_search_results
1821
0
                                 ->aps_part_results[pu1_pred_dir_searched[i]][i4_part_id + 1][1];
1822
0
                    }
1823
0
                    ps_search_node_br =
1824
0
                        &ps_search_results
1825
0
                             ->aps_part_results[pu1_pred_dir_searched[i]][i4_part_id + 1][1];
1826
0
                }
1827
0
                else
1828
0
                {
1829
                    /* 4 unique results */
1830
0
                    ps_search_node_tr =
1831
0
                        &ps_search_results
1832
0
                             ->aps_part_results[pu1_pred_dir_searched[i]][i4_part_id + 1][1];
1833
0
                    ps_search_node_bl =
1834
0
                        &ps_search_results
1835
0
                             ->aps_part_results[pu1_pred_dir_searched[i]][i4_part_id + 2][1];
1836
0
                    ps_search_node_br =
1837
0
                        &ps_search_results
1838
0
                             ->aps_part_results[pu1_pred_dir_searched[i]][i4_part_id + 3][1];
1839
0
                }
1840
1841
0
                if(ps_search_node_tl->s_mv.i2_mvx == INTRA_MV)
1842
0
                    ps_search_node_tl++;
1843
0
                if(ps_search_node_tr->s_mv.i2_mvx == INTRA_MV)
1844
0
                    ps_search_node_tr++;
1845
0
                if(ps_search_node_bl->s_mv.i2_mvx == INTRA_MV)
1846
0
                    ps_search_node_bl++;
1847
0
                if(ps_search_node_br->s_mv.i2_mvx == INTRA_MV)
1848
0
                    ps_search_node_br++;
1849
1850
0
                COPY_SEARCH_RESULT(ps_mv1, pi1_ref_idx1, ps_search_node_tl, 0);
1851
0
                ps_mv1++;
1852
0
                pi1_ref_idx1++;
1853
0
                COPY_SEARCH_RESULT(ps_mv2, pi1_ref_idx2, ps_search_node_tr, 0);
1854
0
                ps_mv2++;
1855
0
                pi1_ref_idx2++;
1856
0
                COPY_SEARCH_RESULT(ps_mv3, pi1_ref_idx3, ps_search_node_bl, 0);
1857
0
                ps_mv3++;
1858
0
                pi1_ref_idx3++;
1859
0
                COPY_SEARCH_RESULT(ps_mv4, pi1_ref_idx4, ps_search_node_br, 0);
1860
0
                ps_mv4++;
1861
0
                pi1_ref_idx4++;
1862
0
            }
1863
324k
        }
1864
324k
    }
1865
274k
}
1866
1867
/**
1868
********************************************************************************
1869
*  @fn     hme_update_mv_bank_noencode(search_results_t *ps_search_results,
1870
*                               layer_mv_t *ps_layer_mv,
1871
*                               S32 i4_search_blk_x,
1872
*                               S32 i4_search_blk_y,
1873
*                               mvbank_update_prms_t *ps_prms)
1874
*
1875
*  @brief  Updates the mv bank in case there is no further encodign to be done
1876
*
1877
*  @param[in]  ps_search_results: contains results for the block just searched
1878
*
1879
*  @param[in,out]  ps_layer_mv : Has pointer to mv bank amongst other things
1880
*
1881
*  @param[in] i4_search_blk_x  : col num of blk being searched
1882
*
1883
*  @param[in] i4_search_blk_y : row num of blk being searched
1884
*
1885
*  @param[in] ps_prms : contains certain parameters which govern how updatedone
1886
*
1887
*  @return None
1888
********************************************************************************
1889
*/
1890
1891
void hme_update_mv_bank_in_l1_me(
1892
    search_results_t *ps_search_results,
1893
    layer_mv_t *ps_layer_mv,
1894
    S32 i4_search_blk_x,
1895
    S32 i4_search_blk_y,
1896
    mvbank_update_prms_t *ps_prms)
1897
283k
{
1898
283k
    hme_mv_t *ps_mv;
1899
283k
    hme_mv_t *ps_mv1, *ps_mv2, *ps_mv3, *ps_mv4;
1900
283k
    S08 *pi1_ref_idx, *pi1_ref_idx1, *pi1_ref_idx2, *pi1_ref_idx3, *pi1_ref_idx4;
1901
283k
    S32 i4_blk_x, i4_blk_y, i4_offset;
1902
283k
    S32 i4_j, i4_ref_id;
1903
283k
    search_node_t *ps_search_node;
1904
283k
    search_node_t *ps_search_node_8x8, *ps_search_node_4x4;
1905
1906
283k
    i4_blk_x = i4_search_blk_x << ps_prms->i4_shift;
1907
283k
    i4_blk_y = i4_search_blk_y << ps_prms->i4_shift;
1908
283k
    i4_offset = i4_blk_x + i4_blk_y * ps_layer_mv->i4_num_blks_per_row;
1909
1910
283k
    i4_offset *= ps_layer_mv->i4_num_mvs_per_blk;
1911
1912
    /* Identify the correct offset in the mvbank and the reference id buf */
1913
283k
    ps_mv = ps_layer_mv->ps_mv + i4_offset;
1914
283k
    pi1_ref_idx = ps_layer_mv->pi1_ref_idx + i4_offset;
1915
1916
    /*************************************************************************/
1917
    /* Supposing we store the mvs in the same blk size as we searched (e.g.  */
1918
    /* we searched 8x8 blks and store results for 8x8 blks), then we can     */
1919
    /* do a straightforward single update of results. This will have a 1-1   */
1920
    /* correspondence.                                                       */
1921
    /*************************************************************************/
1922
283k
    if(ps_layer_mv->e_blk_size == ps_prms->e_search_blk_size)
1923
194k
    {
1924
194k
        search_node_t *aps_result_nodes_sorted[2][MAX_NUM_REF * 2];
1925
1926
194k
        hme_mv_t *ps_mv_l0_root = ps_mv;
1927
194k
        hme_mv_t *ps_mv_l1_root =
1928
194k
            ps_mv + (ps_prms->i4_num_active_ref_l0 * ps_layer_mv->i4_num_mvs_per_ref);
1929
1930
194k
        U32 u4_num_l0_results_updated = 0;
1931
194k
        U32 u4_num_l1_results_updated = 0;
1932
1933
194k
        S08 *pi1_ref_idx_l0_root = pi1_ref_idx;
1934
194k
        S08 *pi1_ref_idx_l1_root =
1935
194k
            pi1_ref_idx_l0_root + (ps_prms->i4_num_active_ref_l0 * ps_layer_mv->i4_num_mvs_per_ref);
1936
1937
518k
        for(i4_ref_id = 0; i4_ref_id < (S32)ps_prms->i4_num_ref; i4_ref_id++)
1938
323k
        {
1939
323k
            U32 *pu4_num_results_updated;
1940
323k
            search_node_t **pps_result_nodes;
1941
1942
323k
            U08 u1_pred_dir_of_cur_ref = !ps_search_results->pu1_is_past[i4_ref_id];
1943
1944
323k
            if(u1_pred_dir_of_cur_ref)
1945
23.8k
            {
1946
23.8k
                pu4_num_results_updated = &u4_num_l1_results_updated;
1947
23.8k
                pps_result_nodes = &aps_result_nodes_sorted[1][0];
1948
23.8k
            }
1949
299k
            else
1950
299k
            {
1951
299k
                pu4_num_results_updated = &u4_num_l0_results_updated;
1952
299k
                pps_result_nodes = &aps_result_nodes_sorted[0][0];
1953
299k
            }
1954
1955
323k
            ps_search_node = ps_search_results->aps_part_results[i4_ref_id][PART_ID_2Nx2N];
1956
1957
830k
            for(i4_j = 0; i4_j < ps_layer_mv->i4_num_mvs_per_ref; i4_j++)
1958
507k
            {
1959
507k
                hme_add_new_node_to_a_sorted_array(
1960
507k
                    &ps_search_node[i4_j], pps_result_nodes, NULL, *pu4_num_results_updated, 0);
1961
1962
507k
                ASSERT(ps_search_node[i4_j].i1_ref_idx == i4_ref_id);
1963
507k
                (*pu4_num_results_updated)++;
1964
507k
            }
1965
323k
        }
1966
1967
656k
        for(i4_j = 0; i4_j < (S32)u4_num_l0_results_updated; i4_j++)
1968
462k
        {
1969
462k
            COPY_SEARCH_RESULT(
1970
462k
                &ps_mv_l0_root[i4_j],
1971
462k
                &pi1_ref_idx_l0_root[i4_j],
1972
462k
                aps_result_nodes_sorted[0][i4_j],
1973
462k
                0);
1974
462k
        }
1975
1976
239k
        for(i4_j = 0; i4_j < (S32)u4_num_l1_results_updated; i4_j++)
1977
44.7k
        {
1978
44.7k
            COPY_SEARCH_RESULT(
1979
44.7k
                &ps_mv_l1_root[i4_j],
1980
44.7k
                &pi1_ref_idx_l1_root[i4_j],
1981
44.7k
                aps_result_nodes_sorted[1][i4_j],
1982
44.7k
                0);
1983
44.7k
        }
1984
1985
194k
        return;
1986
194k
    }
1987
1988
    /*************************************************************************/
1989
    /* Case where search blk size is 8x8, but we update 4x4 results. In this */
1990
    /* case, we need to have NxN partitions enabled in search.               */
1991
    /* Further, we update on a 1-1 basis the 4x4 blk mvs from the respective */
1992
    /* NxN partition. We also update the 8x8 result into each of the 4x4 bank*/
1993
    /*************************************************************************/
1994
89.3k
    ASSERT(ps_layer_mv->e_blk_size == BLK_4x4);
1995
89.3k
    ASSERT(ps_prms->e_search_blk_size == BLK_8x8);
1996
89.3k
    ASSERT((ps_search_results->i4_part_mask & (ENABLE_NxN)) == (ENABLE_NxN));
1997
1998
    /*************************************************************************/
1999
    /* For every 4x4 blk we store corresponding 4x4 results and 1 8x8 result */
2000
    /* hence the below check.                                                */
2001
    /*************************************************************************/
2002
89.3k
    ASSERT(ps_layer_mv->i4_num_mvs_per_ref <= ps_search_results->u1_num_results_per_part + 1);
2003
2004
89.3k
    ps_mv1 = ps_mv;
2005
89.3k
    ps_mv2 = ps_mv1 + ps_layer_mv->i4_num_mvs_per_blk;
2006
89.3k
    ps_mv3 = ps_mv1 + (ps_layer_mv->i4_num_mvs_per_row);
2007
89.3k
    ps_mv4 = ps_mv3 + (ps_layer_mv->i4_num_mvs_per_blk);
2008
89.3k
    pi1_ref_idx1 = pi1_ref_idx;
2009
89.3k
    pi1_ref_idx2 = pi1_ref_idx1 + ps_layer_mv->i4_num_mvs_per_blk;
2010
89.3k
    pi1_ref_idx3 = pi1_ref_idx1 + (ps_layer_mv->i4_num_mvs_per_row);
2011
89.3k
    pi1_ref_idx4 = pi1_ref_idx3 + (ps_layer_mv->i4_num_mvs_per_blk);
2012
2013
89.3k
    {
2014
        /* max ref frames * max results per partition * number of partitions (4x4, 8x8) */
2015
89.3k
        search_node_t *aps_result_nodes_sorted[2][MAX_NUM_REF * MAX_RESULTS_PER_PART * 2];
2016
89.3k
        U08 au1_cost_shifts_for_sorted_node[2][MAX_NUM_REF * MAX_RESULTS_PER_PART * 2];
2017
2018
89.3k
        S32 i;
2019
2020
89.3k
        hme_mv_t *ps_mv1_l0_root = ps_mv1;
2021
89.3k
        hme_mv_t *ps_mv1_l1_root =
2022
89.3k
            ps_mv1 + (ps_prms->i4_num_active_ref_l0 * ps_layer_mv->i4_num_mvs_per_ref);
2023
89.3k
        hme_mv_t *ps_mv2_l0_root = ps_mv2;
2024
89.3k
        hme_mv_t *ps_mv2_l1_root =
2025
89.3k
            ps_mv2 + (ps_prms->i4_num_active_ref_l0 * ps_layer_mv->i4_num_mvs_per_ref);
2026
89.3k
        hme_mv_t *ps_mv3_l0_root = ps_mv3;
2027
89.3k
        hme_mv_t *ps_mv3_l1_root =
2028
89.3k
            ps_mv3 + (ps_prms->i4_num_active_ref_l0 * ps_layer_mv->i4_num_mvs_per_ref);
2029
89.3k
        hme_mv_t *ps_mv4_l0_root = ps_mv4;
2030
89.3k
        hme_mv_t *ps_mv4_l1_root =
2031
89.3k
            ps_mv4 + (ps_prms->i4_num_active_ref_l0 * ps_layer_mv->i4_num_mvs_per_ref);
2032
2033
89.3k
        U32 u4_num_l0_results_updated = 0;
2034
89.3k
        U32 u4_num_l1_results_updated = 0;
2035
2036
89.3k
        S08 *pi1_ref_idx1_l0_root = pi1_ref_idx1;
2037
89.3k
        S08 *pi1_ref_idx1_l1_root = pi1_ref_idx1_l0_root + (ps_prms->i4_num_active_ref_l0 *
2038
89.3k
                                                            ps_layer_mv->i4_num_mvs_per_ref);
2039
89.3k
        S08 *pi1_ref_idx2_l0_root = pi1_ref_idx2;
2040
89.3k
        S08 *pi1_ref_idx2_l1_root = pi1_ref_idx2_l0_root + (ps_prms->i4_num_active_ref_l0 *
2041
89.3k
                                                            ps_layer_mv->i4_num_mvs_per_ref);
2042
89.3k
        S08 *pi1_ref_idx3_l0_root = pi1_ref_idx3;
2043
89.3k
        S08 *pi1_ref_idx3_l1_root = pi1_ref_idx3_l0_root + (ps_prms->i4_num_active_ref_l0 *
2044
89.3k
                                                            ps_layer_mv->i4_num_mvs_per_ref);
2045
89.3k
        S08 *pi1_ref_idx4_l0_root = pi1_ref_idx4;
2046
89.3k
        S08 *pi1_ref_idx4_l1_root = pi1_ref_idx4_l0_root + (ps_prms->i4_num_active_ref_l0 *
2047
89.3k
                                                            ps_layer_mv->i4_num_mvs_per_ref);
2048
2049
446k
        for(i = 0; i < 4; i++)
2050
357k
        {
2051
357k
            hme_mv_t *ps_mv_l0_root;
2052
357k
            hme_mv_t *ps_mv_l1_root;
2053
2054
357k
            S08 *pi1_ref_idx_l0_root;
2055
357k
            S08 *pi1_ref_idx_l1_root;
2056
2057
1.12M
            for(i4_ref_id = 0; i4_ref_id < ps_search_results->u1_num_active_ref; i4_ref_id++)
2058
772k
            {
2059
772k
                U32 *pu4_num_results_updated;
2060
772k
                search_node_t **pps_result_nodes;
2061
772k
                U08 *pu1_cost_shifts_for_sorted_node;
2062
2063
772k
                U08 u1_pred_dir_of_cur_ref = !ps_search_results->pu1_is_past[i4_ref_id];
2064
2065
772k
                if(u1_pred_dir_of_cur_ref)
2066
144k
                {
2067
144k
                    pu4_num_results_updated = &u4_num_l1_results_updated;
2068
144k
                    pps_result_nodes = &aps_result_nodes_sorted[1][0];
2069
144k
                    pu1_cost_shifts_for_sorted_node = &au1_cost_shifts_for_sorted_node[1][0];
2070
144k
                }
2071
627k
                else
2072
627k
                {
2073
627k
                    pu4_num_results_updated = &u4_num_l0_results_updated;
2074
627k
                    pps_result_nodes = &aps_result_nodes_sorted[0][0];
2075
627k
                    pu1_cost_shifts_for_sorted_node = &au1_cost_shifts_for_sorted_node[1][0];
2076
627k
                }
2077
2078
772k
                ps_search_node_8x8 = ps_search_results->aps_part_results[i4_ref_id][PART_ID_2Nx2N];
2079
2080
772k
                ps_search_node_4x4 =
2081
772k
                    ps_search_results->aps_part_results[i4_ref_id][PART_ID_NxN_TL + i];
2082
2083
1.80M
                for(i4_j = 0; i4_j < ps_layer_mv->i4_num_mvs_per_ref; i4_j++)
2084
1.02M
                {
2085
1.02M
                    hme_add_new_node_to_a_sorted_array(
2086
1.02M
                        &ps_search_node_4x4[i4_j],
2087
1.02M
                        pps_result_nodes,
2088
1.02M
                        pu1_cost_shifts_for_sorted_node,
2089
1.02M
                        *pu4_num_results_updated,
2090
1.02M
                        0);
2091
2092
1.02M
                    (*pu4_num_results_updated)++;
2093
2094
1.02M
                    hme_add_new_node_to_a_sorted_array(
2095
1.02M
                        &ps_search_node_8x8[i4_j],
2096
1.02M
                        pps_result_nodes,
2097
1.02M
                        pu1_cost_shifts_for_sorted_node,
2098
1.02M
                        *pu4_num_results_updated,
2099
1.02M
                        2);
2100
2101
1.02M
                    (*pu4_num_results_updated)++;
2102
1.02M
                }
2103
772k
            }
2104
2105
357k
            switch(i)
2106
357k
            {
2107
89.3k
            case 0:
2108
89.3k
            {
2109
89.3k
                ps_mv_l0_root = ps_mv1_l0_root;
2110
89.3k
                ps_mv_l1_root = ps_mv1_l1_root;
2111
2112
89.3k
                pi1_ref_idx_l0_root = pi1_ref_idx1_l0_root;
2113
89.3k
                pi1_ref_idx_l1_root = pi1_ref_idx1_l1_root;
2114
2115
89.3k
                break;
2116
0
            }
2117
89.3k
            case 1:
2118
89.3k
            {
2119
89.3k
                ps_mv_l0_root = ps_mv2_l0_root;
2120
89.3k
                ps_mv_l1_root = ps_mv2_l1_root;
2121
2122
89.3k
                pi1_ref_idx_l0_root = pi1_ref_idx2_l0_root;
2123
89.3k
                pi1_ref_idx_l1_root = pi1_ref_idx2_l1_root;
2124
2125
89.3k
                break;
2126
0
            }
2127
89.3k
            case 2:
2128
89.3k
            {
2129
89.3k
                ps_mv_l0_root = ps_mv3_l0_root;
2130
89.3k
                ps_mv_l1_root = ps_mv3_l1_root;
2131
2132
89.3k
                pi1_ref_idx_l0_root = pi1_ref_idx3_l0_root;
2133
89.3k
                pi1_ref_idx_l1_root = pi1_ref_idx3_l1_root;
2134
2135
89.3k
                break;
2136
0
            }
2137
89.3k
            case 3:
2138
89.3k
            {
2139
89.3k
                ps_mv_l0_root = ps_mv4_l0_root;
2140
89.3k
                ps_mv_l1_root = ps_mv4_l1_root;
2141
2142
89.3k
                pi1_ref_idx_l0_root = pi1_ref_idx4_l0_root;
2143
89.3k
                pi1_ref_idx_l1_root = pi1_ref_idx4_l1_root;
2144
2145
89.3k
                break;
2146
0
            }
2147
357k
            }
2148
2149
357k
            u4_num_l0_results_updated =
2150
357k
                MIN((S32)u4_num_l0_results_updated,
2151
357k
                    ps_prms->i4_num_active_ref_l0 * ps_layer_mv->i4_num_mvs_per_ref);
2152
2153
357k
            u4_num_l1_results_updated =
2154
357k
                MIN((S32)u4_num_l1_results_updated,
2155
357k
                    ps_prms->i4_num_active_ref_l1 * ps_layer_mv->i4_num_mvs_per_ref);
2156
2157
1.20M
            for(i4_j = 0; i4_j < (S32)u4_num_l0_results_updated; i4_j++)
2158
847k
            {
2159
847k
                COPY_SEARCH_RESULT(
2160
847k
                    &ps_mv_l0_root[i4_j],
2161
847k
                    &pi1_ref_idx_l0_root[i4_j],
2162
847k
                    aps_result_nodes_sorted[0][i4_j],
2163
847k
                    0);
2164
847k
            }
2165
2166
540k
            for(i4_j = 0; i4_j < (S32)u4_num_l1_results_updated; i4_j++)
2167
182k
            {
2168
182k
                COPY_SEARCH_RESULT(
2169
182k
                    &ps_mv_l1_root[i4_j],
2170
182k
                    &pi1_ref_idx_l1_root[i4_j],
2171
182k
                    aps_result_nodes_sorted[1][i4_j],
2172
182k
                    0);
2173
182k
            }
2174
357k
        }
2175
89.3k
    }
2176
89.3k
}
2177
2178
/**
2179
******************************************************************************
2180
*  @brief Scales motion vector component projecte from a diff layer in same
2181
*         picture (so no ref id related delta poc scaling required)
2182
******************************************************************************
2183
*/
2184
2185
#define SCALE_MV_COMP_RES(mvcomp_p, dim_c, dim_p)                                                  \
2186
0
    ((((mvcomp_p) * (dim_c)) + ((SIGN((mvcomp_p)) * (dim_p)) >> 1)) / (dim_p))
2187
/**
2188
********************************************************************************
2189
*  @fn     hme_project_coloc_candt(search_node_t *ps_search_node,
2190
*                                   layer_ctxt_t *ps_curr_layer,
2191
*                                   layer_ctxt_t *ps_coarse_layer,
2192
*                                   S32 i4_pos_x,
2193
*                                   S32 i4_pos_y,
2194
*                                   S08 i1_ref_id,
2195
*                                   S08 i1_result_id)
2196
*
2197
*  @brief  From a coarser layer, projects a candidated situated at "colocated"
2198
*          position in the picture (e.g. given x, y it will be x/2, y/2 dyadic
2199
*
2200
*  @param[out]  ps_search_node : contains the projected result
2201
*
2202
*  @param[in]   ps_curr_layer : current layer context
2203
*
2204
*  @param[in]   ps_coarse_layer  : coarser layer context
2205
*
2206
*  @param[in]   i4_pos_x  : x Position where mv is required (w.r.t. curr layer)
2207
*
2208
*  @param[in]   i4_pos_y  : y Position where mv is required (w.r.t. curr layer)
2209
*
2210
*  @param[in]   i1_ref_id : reference id for which the candidate required
2211
*
2212
*  @param[in]   i4_result_id : result id for which the candidate required
2213
*                              (0 : best result, 1 : next best)
2214
*
2215
*  @return None
2216
********************************************************************************
2217
*/
2218
2219
void hme_project_coloc_candt(
2220
    search_node_t *ps_search_node,
2221
    layer_ctxt_t *ps_curr_layer,
2222
    layer_ctxt_t *ps_coarse_layer,
2223
    S32 i4_pos_x,
2224
    S32 i4_pos_y,
2225
    S08 i1_ref_id,
2226
    S32 i4_result_id)
2227
0
{
2228
0
    S32 wd_c, ht_c, wd_p, ht_p;
2229
0
    S32 blksize_p, blk_x, blk_y, i4_offset;
2230
0
    layer_mv_t *ps_layer_mvbank;
2231
0
    hme_mv_t *ps_mv;
2232
0
    S08 *pi1_ref_idx;
2233
2234
    /* Width and ht of current and prev layers */
2235
0
    wd_c = ps_curr_layer->i4_wd;
2236
0
    ht_c = ps_curr_layer->i4_ht;
2237
0
    wd_p = ps_coarse_layer->i4_wd;
2238
0
    ht_p = ps_coarse_layer->i4_ht;
2239
2240
0
    ps_layer_mvbank = ps_coarse_layer->ps_layer_mvbank;
2241
0
    blksize_p = (S32)gau1_blk_size_to_wd[ps_layer_mvbank->e_blk_size];
2242
2243
    /* Safety check to avoid uninitialized access across temporal layers */
2244
0
    i4_pos_x = CLIP3(i4_pos_x, 0, (wd_c - blksize_p));
2245
0
    i4_pos_y = CLIP3(i4_pos_y, 0, (ht_c - blksize_p));
2246
2247
    /* Project the positions to prev layer */
2248
    /* TODO: convert these to scale factors at pic level */
2249
0
    blk_x = (i4_pos_x * wd_p) / (wd_c * blksize_p);
2250
0
    blk_y = (i4_pos_y * ht_p) / (ht_c * blksize_p);
2251
2252
    /* Pick up the mvs from the location */
2253
0
    i4_offset = (blk_x * ps_layer_mvbank->i4_num_mvs_per_blk);
2254
0
    i4_offset += (ps_layer_mvbank->i4_num_mvs_per_row * blk_y);
2255
2256
0
    ps_mv = ps_layer_mvbank->ps_mv + i4_offset;
2257
0
    pi1_ref_idx = ps_layer_mvbank->pi1_ref_idx + i4_offset;
2258
2259
0
    ps_mv += (i1_ref_id * ps_layer_mvbank->i4_num_mvs_per_ref);
2260
0
    pi1_ref_idx += (i1_ref_id * ps_layer_mvbank->i4_num_mvs_per_ref);
2261
2262
0
    ps_search_node->s_mv.i2_mvx = SCALE_MV_COMP_RES(ps_mv[i4_result_id].i2_mv_x, wd_c, wd_p);
2263
0
    ps_search_node->s_mv.i2_mvy = SCALE_MV_COMP_RES(ps_mv[i4_result_id].i2_mv_y, ht_c, ht_p);
2264
0
    ps_search_node->i1_ref_idx = pi1_ref_idx[i4_result_id];
2265
0
    ps_search_node->u1_subpel_done = 0;
2266
0
    if((ps_search_node->i1_ref_idx < 0) || (ps_search_node->s_mv.i2_mvx == INTRA_MV))
2267
0
    {
2268
0
        ps_search_node->i1_ref_idx = i1_ref_id;
2269
0
        ps_search_node->s_mv.i2_mvx = 0;
2270
0
        ps_search_node->s_mv.i2_mvy = 0;
2271
0
    }
2272
0
}
2273
2274
/**
2275
********************************************************************************
2276
*  @fn     hme_project_coloc_candt_dyadic(search_node_t *ps_search_node,
2277
*                                   layer_ctxt_t *ps_curr_layer,
2278
*                                   layer_ctxt_t *ps_coarse_layer,
2279
*                                   S32 i4_pos_x,
2280
*                                   S32 i4_pos_y,
2281
*                                   S08 i1_ref_id,
2282
*                                   S08 i1_result_id)
2283
*
2284
*  @brief  From a coarser layer, projects a candidated situated at "colocated"
2285
*          position in the picture when the ratios are dyadic
2286
*
2287
*  @param[out]  ps_search_node : contains the projected result
2288
*
2289
*  @param[in]   ps_curr_layer : current layer context
2290
*
2291
*  @param[in]   ps_coarse_layer  : coarser layer context
2292
*
2293
*  @param[in]   i4_pos_x  : x Position where mv is required (w.r.t. curr layer)
2294
*
2295
*  @param[in]   i4_pos_y  : y Position where mv is required (w.r.t. curr layer)
2296
*
2297
*  @param[in]   i1_ref_id : reference id for which the candidate required
2298
*
2299
*  @param[in]   i4_result_id : result id for which the candidate required
2300
*                              (0 : best result, 1 : next best)
2301
*
2302
*  @return None
2303
********************************************************************************
2304
*/
2305
2306
void hme_project_coloc_candt_dyadic(
2307
    search_node_t *ps_search_node,
2308
    layer_ctxt_t *ps_curr_layer,
2309
    layer_ctxt_t *ps_coarse_layer,
2310
    S32 i4_pos_x,
2311
    S32 i4_pos_y,
2312
    S08 i1_ref_id,
2313
    S32 i4_result_id)
2314
5.78M
{
2315
5.78M
    S32 wd_c, ht_c, wd_p, ht_p;
2316
5.78M
    S32 blksize_p, blk_x, blk_y, i4_offset;
2317
5.78M
    layer_mv_t *ps_layer_mvbank;
2318
5.78M
    hme_mv_t *ps_mv;
2319
5.78M
    S08 *pi1_ref_idx;
2320
2321
    /* Width and ht of current and prev layers */
2322
5.78M
    wd_c = ps_curr_layer->i4_wd;
2323
5.78M
    ht_c = ps_curr_layer->i4_ht;
2324
5.78M
    wd_p = ps_coarse_layer->i4_wd;
2325
5.78M
    ht_p = ps_coarse_layer->i4_ht;
2326
2327
5.78M
    ps_layer_mvbank = ps_coarse_layer->ps_layer_mvbank;
2328
    /* blksize_p = log2(wd) + 1 */
2329
5.78M
    blksize_p = (S32)gau1_blk_size_to_wd_shift[ps_layer_mvbank->e_blk_size];
2330
2331
    /* ASSERT for valid sizes */
2332
5.78M
    ASSERT((blksize_p == 3) || (blksize_p == 4) || (blksize_p == 5));
2333
2334
    /* Safety check to avoid uninitialized access across temporal layers */
2335
5.78M
    i4_pos_x = CLIP3(i4_pos_x, 0, (wd_c - blksize_p));
2336
5.78M
    i4_pos_y = CLIP3(i4_pos_y, 0, (ht_c - blksize_p));
2337
2338
    /* Project the positions to prev layer */
2339
    /* TODO: convert these to scale factors at pic level */
2340
5.78M
    blk_x = i4_pos_x >> blksize_p;  // (2 * blksize_p);
2341
5.78M
    blk_y = i4_pos_y >> blksize_p;  // (2 * blksize_p);
2342
2343
    /* Pick up the mvs from the location */
2344
5.78M
    i4_offset = (blk_x * ps_layer_mvbank->i4_num_mvs_per_blk);
2345
5.78M
    i4_offset += (ps_layer_mvbank->i4_num_mvs_per_row * blk_y);
2346
2347
5.78M
    ps_mv = ps_layer_mvbank->ps_mv + i4_offset;
2348
5.78M
    pi1_ref_idx = ps_layer_mvbank->pi1_ref_idx + i4_offset;
2349
2350
5.78M
    ps_mv += (i1_ref_id * ps_layer_mvbank->i4_num_mvs_per_ref);
2351
5.78M
    pi1_ref_idx += (i1_ref_id * ps_layer_mvbank->i4_num_mvs_per_ref);
2352
2353
5.78M
    ps_search_node->s_mv.i2_mvx = ps_mv[i4_result_id].i2_mv_x << 1;
2354
5.78M
    ps_search_node->s_mv.i2_mvy = ps_mv[i4_result_id].i2_mv_y << 1;
2355
5.78M
    ps_search_node->i1_ref_idx = pi1_ref_idx[i4_result_id];
2356
5.78M
    if((ps_search_node->i1_ref_idx < 0) || (ps_search_node->s_mv.i2_mvx == INTRA_MV))
2357
0
    {
2358
0
        ps_search_node->i1_ref_idx = i1_ref_id;
2359
0
        ps_search_node->s_mv.i2_mvx = 0;
2360
0
        ps_search_node->s_mv.i2_mvy = 0;
2361
0
    }
2362
5.78M
}
2363
2364
void hme_project_coloc_candt_dyadic_implicit(
2365
    search_node_t *ps_search_node,
2366
    layer_ctxt_t *ps_curr_layer,
2367
    layer_ctxt_t *ps_coarse_layer,
2368
    S32 i4_pos_x,
2369
    S32 i4_pos_y,
2370
    S32 i4_num_act_ref_l0,
2371
    U08 u1_pred_dir,
2372
    U08 u1_default_ref_id,
2373
    S32 i4_result_id)
2374
0
{
2375
0
    S32 wd_c, ht_c, wd_p, ht_p;
2376
0
    S32 blksize_p, blk_x, blk_y, i4_offset;
2377
0
    layer_mv_t *ps_layer_mvbank;
2378
0
    hme_mv_t *ps_mv;
2379
0
    S08 *pi1_ref_idx;
2380
2381
    /* Width and ht of current and prev layers */
2382
0
    wd_c = ps_curr_layer->i4_wd;
2383
0
    ht_c = ps_curr_layer->i4_ht;
2384
0
    wd_p = ps_coarse_layer->i4_wd;
2385
0
    ht_p = ps_coarse_layer->i4_ht;
2386
2387
0
    ps_layer_mvbank = ps_coarse_layer->ps_layer_mvbank;
2388
0
    blksize_p = (S32)gau1_blk_size_to_wd_shift[ps_layer_mvbank->e_blk_size];
2389
2390
    /* ASSERT for valid sizes */
2391
0
    ASSERT((blksize_p == 3) || (blksize_p == 4) || (blksize_p == 5));
2392
2393
    /* Safety check to avoid uninitialized access across temporal layers */
2394
0
    i4_pos_x = CLIP3(i4_pos_x, 0, (wd_c - blksize_p));
2395
0
    i4_pos_y = CLIP3(i4_pos_y, 0, (ht_c - blksize_p));
2396
    /* Project the positions to prev layer */
2397
    /* TODO: convert these to scale factors at pic level */
2398
0
    blk_x = i4_pos_x >> blksize_p;  // (2 * blksize_p);
2399
0
    blk_y = i4_pos_y >> blksize_p;  // (2 * blksize_p);
2400
2401
    /* Pick up the mvs from the location */
2402
0
    i4_offset = (blk_x * ps_layer_mvbank->i4_num_mvs_per_blk);
2403
0
    i4_offset += (ps_layer_mvbank->i4_num_mvs_per_row * blk_y);
2404
2405
0
    ps_mv = ps_layer_mvbank->ps_mv + i4_offset;
2406
0
    pi1_ref_idx = ps_layer_mvbank->pi1_ref_idx + i4_offset;
2407
2408
0
    if(u1_pred_dir == 1)
2409
0
    {
2410
0
        ps_mv += (i4_num_act_ref_l0 * ps_layer_mvbank->i4_num_mvs_per_ref);
2411
0
        pi1_ref_idx += (i4_num_act_ref_l0 * ps_layer_mvbank->i4_num_mvs_per_ref);
2412
0
    }
2413
2414
0
    ps_search_node->s_mv.i2_mvx = ps_mv[i4_result_id].i2_mv_x << 1;
2415
0
    ps_search_node->s_mv.i2_mvy = ps_mv[i4_result_id].i2_mv_y << 1;
2416
0
    ps_search_node->i1_ref_idx = pi1_ref_idx[i4_result_id];
2417
0
    if((ps_search_node->i1_ref_idx < 0) || (ps_search_node->s_mv.i2_mvx == INTRA_MV))
2418
0
    {
2419
0
        ps_search_node->i1_ref_idx = u1_default_ref_id;
2420
0
        ps_search_node->s_mv.i2_mvx = 0;
2421
0
        ps_search_node->s_mv.i2_mvy = 0;
2422
0
    }
2423
0
}
2424
2425
#define SCALE_RANGE_PRMS(prm1, prm2, shift)                                                        \
2426
1.94M
    {                                                                                              \
2427
1.94M
        prm1.i2_min_x = prm2.i2_min_x << shift;                                                    \
2428
1.94M
        prm1.i2_max_x = prm2.i2_max_x << shift;                                                    \
2429
1.94M
        prm1.i2_min_y = prm2.i2_min_y << shift;                                                    \
2430
1.94M
        prm1.i2_max_y = prm2.i2_max_y << shift;                                                    \
2431
1.94M
    }
2432
2433
#define SCALE_RANGE_PRMS_POINTERS(prm1, prm2, shift)                                               \
2434
121k
    {                                                                                              \
2435
121k
        prm1->i2_min_x = prm2->i2_min_x << shift;                                                  \
2436
121k
        prm1->i2_max_x = prm2->i2_max_x << shift;                                                  \
2437
121k
        prm1->i2_min_y = prm2->i2_min_y << shift;                                                  \
2438
121k
        prm1->i2_max_y = prm2->i2_max_y << shift;                                                  \
2439
121k
    }
2440
2441
/**
2442
********************************************************************************
2443
*  @fn   void hme_refine_frm_init(me_ctxt_t *ps_ctxt,
2444
*                       refine_layer_prms_t *ps_refine_prms)
2445
*
2446
*  @brief  Frame init of refinemnet layers in ME
2447
*
2448
*  @param[in,out]  ps_ctxt: ME Handle
2449
*
2450
*  @param[in]  ps_refine_prms : refinement layer prms
2451
*
2452
*  @return None
2453
********************************************************************************
2454
*/
2455
void hme_refine_frm_init(
2456
    layer_ctxt_t *ps_curr_layer, refine_prms_t *ps_refine_prms, layer_ctxt_t *ps_coarse_layer)
2457
13.1k
{
2458
    /* local variables */
2459
13.1k
    BLK_SIZE_T e_result_blk_size = BLK_8x8;
2460
13.1k
    S32 i4_num_ref_fpel, i4_num_ref_prev_layer;
2461
2462
13.1k
    i4_num_ref_prev_layer = ps_coarse_layer->ps_layer_mvbank->i4_num_ref;
2463
2464
13.1k
    if(ps_refine_prms->explicit_ref)
2465
6.83k
    {
2466
6.83k
        i4_num_ref_fpel = i4_num_ref_prev_layer;
2467
6.83k
    }
2468
6.36k
    else
2469
6.36k
    {
2470
6.36k
        i4_num_ref_fpel = 2;
2471
6.36k
    }
2472
2473
13.1k
    if(ps_refine_prms->i4_enable_4x4_part)
2474
1.76k
    {
2475
1.76k
        e_result_blk_size = BLK_4x4;
2476
1.76k
    }
2477
2478
13.1k
    i4_num_ref_fpel = MIN(i4_num_ref_fpel, i4_num_ref_prev_layer);
2479
2480
13.1k
    hme_init_mv_bank(
2481
13.1k
        ps_curr_layer,
2482
13.1k
        e_result_blk_size,
2483
13.1k
        i4_num_ref_fpel,
2484
13.1k
        ps_refine_prms->i4_num_mvbank_results,
2485
13.1k
        ps_refine_prms->i4_layer_id > 0 ? 0 : 1);
2486
13.1k
}
2487
2488
#if 1  //ENABLE_CU_RECURSION || TEST_AND_EVALUATE_CU_RECURSION
2489
/**
2490
********************************************************************************
2491
*  @fn   void hme_init_clusters_16x16
2492
*               (
2493
*                   cluster_16x16_blk_t *ps_cluster_blk_16x16
2494
*               )
2495
*
2496
*  @brief  Intialisations for the structs used in clustering algorithm
2497
*
2498
*  @param[in/out]  ps_cluster_blk_16x16: pointer to structure containing clusters
2499
*                                        of 16x16 block
2500
*
2501
*  @return None
2502
********************************************************************************
2503
*/
2504
static __inline void
2505
    hme_init_clusters_16x16(cluster_16x16_blk_t *ps_cluster_blk_16x16, S32 bidir_enabled)
2506
57.8k
{
2507
57.8k
    S32 i;
2508
2509
57.8k
    ps_cluster_blk_16x16->num_clusters = 0;
2510
57.8k
    ps_cluster_blk_16x16->intra_mv_area = 0;
2511
57.8k
    ps_cluster_blk_16x16->best_inter_cost = 0;
2512
2513
520k
    for(i = 0; i < MAX_NUM_CLUSTERS_16x16; i++)
2514
463k
    {
2515
463k
        ps_cluster_blk_16x16->as_cluster_data[i].max_dist_from_centroid =
2516
463k
            bidir_enabled ? MAX_DISTANCE_FROM_CENTROID_16x16_B : MAX_DISTANCE_FROM_CENTROID_16x16;
2517
2518
463k
        ps_cluster_blk_16x16->as_cluster_data[i].is_valid_cluster = 0;
2519
2520
463k
        ps_cluster_blk_16x16->as_cluster_data[i].bi_mv_pixel_area = 0;
2521
463k
        ps_cluster_blk_16x16->as_cluster_data[i].uni_mv_pixel_area = 0;
2522
463k
    }
2523
752k
    for(i = 0; i < MAX_NUM_REF; i++)
2524
694k
    {
2525
694k
        ps_cluster_blk_16x16->au1_num_clusters[i] = 0;
2526
694k
    }
2527
57.8k
}
2528
2529
/**
2530
********************************************************************************
2531
*  @fn   void hme_init_clusters_32x32
2532
*               (
2533
*                   cluster_32x32_blk_t *ps_cluster_blk_32x32
2534
*               )
2535
*
2536
*  @brief  Intialisations for the structs used in clustering algorithm
2537
*
2538
*  @param[in/out]  ps_cluster_blk_32x32: pointer to structure containing clusters
2539
*                                        of 32x32 block
2540
*
2541
*  @return None
2542
********************************************************************************
2543
*/
2544
static __inline void
2545
    hme_init_clusters_32x32(cluster_32x32_blk_t *ps_cluster_blk_32x32, S32 bidir_enabled)
2546
14.4k
{
2547
14.4k
    S32 i;
2548
2549
14.4k
    ps_cluster_blk_32x32->num_clusters = 0;
2550
14.4k
    ps_cluster_blk_32x32->intra_mv_area = 0;
2551
14.4k
    ps_cluster_blk_32x32->best_alt_ref = -1;
2552
14.4k
    ps_cluster_blk_32x32->best_uni_ref = -1;
2553
14.4k
    ps_cluster_blk_32x32->best_inter_cost = 0;
2554
14.4k
    ps_cluster_blk_32x32->num_clusters_with_weak_sdi_density = 0;
2555
2556
159k
    for(i = 0; i < MAX_NUM_CLUSTERS_32x32; i++)
2557
144k
    {
2558
144k
        ps_cluster_blk_32x32->as_cluster_data[i].max_dist_from_centroid =
2559
144k
            bidir_enabled ? MAX_DISTANCE_FROM_CENTROID_32x32_B : MAX_DISTANCE_FROM_CENTROID_32x32;
2560
144k
        ps_cluster_blk_32x32->as_cluster_data[i].is_valid_cluster = 0;
2561
2562
144k
        ps_cluster_blk_32x32->as_cluster_data[i].bi_mv_pixel_area = 0;
2563
144k
        ps_cluster_blk_32x32->as_cluster_data[i].uni_mv_pixel_area = 0;
2564
144k
    }
2565
188k
    for(i = 0; i < MAX_NUM_REF; i++)
2566
173k
    {
2567
173k
        ps_cluster_blk_32x32->au1_num_clusters[i] = 0;
2568
173k
    }
2569
14.4k
}
2570
2571
/**
2572
********************************************************************************
2573
*  @fn   void hme_init_clusters_64x64
2574
*               (
2575
*                   cluster_64x64_blk_t *ps_cluster_blk_64x64
2576
*               )
2577
*
2578
*  @brief  Intialisations for the structs used in clustering algorithm
2579
*
2580
*  @param[in/out]  ps_cluster_blk_64x64: pointer to structure containing clusters
2581
*                                        of 64x64 block
2582
*
2583
*  @return None
2584
********************************************************************************
2585
*/
2586
static __inline void
2587
    hme_init_clusters_64x64(cluster_64x64_blk_t *ps_cluster_blk_64x64, S32 bidir_enabled)
2588
3.61k
{
2589
3.61k
    S32 i;
2590
2591
3.61k
    ps_cluster_blk_64x64->num_clusters = 0;
2592
3.61k
    ps_cluster_blk_64x64->intra_mv_area = 0;
2593
3.61k
    ps_cluster_blk_64x64->best_alt_ref = -1;
2594
3.61k
    ps_cluster_blk_64x64->best_uni_ref = -1;
2595
3.61k
    ps_cluster_blk_64x64->best_inter_cost = 0;
2596
2597
39.7k
    for(i = 0; i < MAX_NUM_CLUSTERS_64x64; i++)
2598
36.1k
    {
2599
36.1k
        ps_cluster_blk_64x64->as_cluster_data[i].max_dist_from_centroid =
2600
36.1k
            bidir_enabled ? MAX_DISTANCE_FROM_CENTROID_64x64_B : MAX_DISTANCE_FROM_CENTROID_64x64;
2601
36.1k
        ps_cluster_blk_64x64->as_cluster_data[i].is_valid_cluster = 0;
2602
2603
36.1k
        ps_cluster_blk_64x64->as_cluster_data[i].bi_mv_pixel_area = 0;
2604
36.1k
        ps_cluster_blk_64x64->as_cluster_data[i].uni_mv_pixel_area = 0;
2605
36.1k
    }
2606
47.0k
    for(i = 0; i < MAX_NUM_REF; i++)
2607
43.4k
    {
2608
43.4k
        ps_cluster_blk_64x64->au1_num_clusters[i] = 0;
2609
43.4k
    }
2610
3.61k
}
2611
2612
/**
2613
********************************************************************************
2614
*  @fn   void hme_sort_and_assign_top_ref_ids_areawise
2615
*               (
2616
*                   ctb_cluster_info_t *ps_ctb_cluster_info
2617
*               )
2618
*
2619
*  @brief  Finds best_uni_ref and best_alt_ref
2620
*
2621
*  @param[in/out]  ps_ctb_cluster_info: structure that points to ctb data
2622
*
2623
*  @param[in]  bidir_enabled: flag that indicates whether or not bi-pred is
2624
*                             enabled
2625
*
2626
*  @param[in]  block_width: width of the block in pels
2627
*
2628
*  @param[in]  e_cu_pos: position of the block within the CTB
2629
*
2630
*  @return None
2631
********************************************************************************
2632
*/
2633
void hme_sort_and_assign_top_ref_ids_areawise(
2634
    ctb_cluster_info_t *ps_ctb_cluster_info, S32 bidir_enabled, S32 block_width, CU_POS_T e_cu_pos)
2635
18.0k
{
2636
18.0k
    cluster_32x32_blk_t *ps_32x32 = NULL;
2637
18.0k
    cluster_64x64_blk_t *ps_64x64 = NULL;
2638
18.0k
    cluster_data_t *ps_data;
2639
2640
18.0k
    S32 j, k;
2641
2642
18.0k
    S32 ai4_uni_area[MAX_NUM_REF];
2643
18.0k
    S32 ai4_bi_area[MAX_NUM_REF];
2644
18.0k
    S32 ai4_ref_id_found[MAX_NUM_REF];
2645
18.0k
    S32 ai4_ref_id[MAX_NUM_REF];
2646
2647
18.0k
    S32 best_uni_ref = -1, best_alt_ref = -1;
2648
18.0k
    S32 num_clusters;
2649
18.0k
    S32 num_ref = 0;
2650
18.0k
    S32 num_clusters_evaluated = 0;
2651
18.0k
    S32 is_cur_blk_valid;
2652
2653
18.0k
    if(32 == block_width)
2654
14.4k
    {
2655
14.4k
        is_cur_blk_valid = (ps_ctb_cluster_info->blk_32x32_mask & (1 << e_cu_pos)) || 0;
2656
14.4k
        ps_32x32 = &ps_ctb_cluster_info->ps_32x32_blk[e_cu_pos];
2657
14.4k
        num_clusters = ps_32x32->num_clusters;
2658
14.4k
        ps_data = &ps_32x32->as_cluster_data[0];
2659
14.4k
    }
2660
3.61k
    else
2661
3.61k
    {
2662
3.61k
        is_cur_blk_valid = (ps_ctb_cluster_info->blk_32x32_mask == 0xf);
2663
3.61k
        ps_64x64 = ps_ctb_cluster_info->ps_64x64_blk;
2664
3.61k
        num_clusters = ps_64x64->num_clusters;
2665
3.61k
        ps_data = &ps_64x64->as_cluster_data[0];
2666
3.61k
    }
2667
2668
#if !ENABLE_4CTB_EVALUATION
2669
    if((num_clusters > MAX_NUM_CLUSTERS_IN_VALID_32x32_BLK))
2670
    {
2671
        return;
2672
    }
2673
#endif
2674
18.0k
    if(num_clusters == 0)
2675
5.11k
    {
2676
5.11k
        return;
2677
5.11k
    }
2678
12.9k
    else if(!is_cur_blk_valid)
2679
56
    {
2680
56
        return;
2681
56
    }
2682
2683
12.9k
    memset(ai4_uni_area, 0, sizeof(S32) * MAX_NUM_REF);
2684
12.9k
    memset(ai4_bi_area, 0, sizeof(S32) * MAX_NUM_REF);
2685
12.9k
    memset(ai4_ref_id_found, 0, sizeof(S32) * MAX_NUM_REF);
2686
12.9k
    memset(ai4_ref_id, -1, sizeof(S32) * MAX_NUM_REF);
2687
2688
60.1k
    for(j = 0; num_clusters_evaluated < num_clusters; j++, ps_data++)
2689
47.2k
    {
2690
47.2k
        S32 ref_id;
2691
2692
47.2k
        if(!ps_data->is_valid_cluster)
2693
2.30k
        {
2694
2.30k
            continue;
2695
2.30k
        }
2696
2697
44.9k
        ref_id = ps_data->ref_id;
2698
2699
44.9k
        num_clusters_evaluated++;
2700
2701
44.9k
        ai4_uni_area[ref_id] += ps_data->uni_mv_pixel_area;
2702
44.9k
        ai4_bi_area[ref_id] += ps_data->bi_mv_pixel_area;
2703
2704
44.9k
        if(!ai4_ref_id_found[ref_id])
2705
18.4k
        {
2706
18.4k
            ai4_ref_id[ref_id] = ref_id;
2707
18.4k
            ai4_ref_id_found[ref_id] = 1;
2708
18.4k
            num_ref++;
2709
18.4k
        }
2710
44.9k
    }
2711
2712
12.9k
    {
2713
12.9k
        S32 ai4_ref_id_temp[MAX_NUM_REF];
2714
2715
12.9k
        memcpy(ai4_ref_id_temp, ai4_ref_id, sizeof(S32) * MAX_NUM_REF);
2716
2717
155k
        for(k = 1; k < MAX_NUM_REF; k++)
2718
142k
        {
2719
142k
            if(ai4_uni_area[k] > ai4_uni_area[0])
2720
1.73k
            {
2721
1.73k
                SWAP_HME(ai4_uni_area[k], ai4_uni_area[0], S32);
2722
1.73k
                SWAP_HME(ai4_ref_id_temp[k], ai4_ref_id_temp[0], S32);
2723
1.73k
            }
2724
142k
        }
2725
2726
12.9k
        best_uni_ref = ai4_ref_id_temp[0];
2727
12.9k
    }
2728
2729
12.9k
    if(bidir_enabled)
2730
1.85k
    {
2731
22.3k
        for(k = 1; k < MAX_NUM_REF; k++)
2732
20.4k
        {
2733
20.4k
            if(ai4_bi_area[k] > ai4_bi_area[0])
2734
374
            {
2735
374
                SWAP_HME(ai4_bi_area[k], ai4_bi_area[0], S32);
2736
374
                SWAP_HME(ai4_ref_id[k], ai4_ref_id[0], S32);
2737
374
            }
2738
20.4k
        }
2739
2740
1.85k
        if(!ai4_bi_area[0])
2741
1.02k
        {
2742
1.02k
            best_alt_ref = -1;
2743
2744
1.02k
            if(32 == block_width)
2745
921
            {
2746
921
                SET_VALUES_FOR_TOP_REF_IDS(ps_32x32, best_uni_ref, best_alt_ref, num_ref);
2747
921
            }
2748
103
            else
2749
103
            {
2750
103
                SET_VALUES_FOR_TOP_REF_IDS(ps_64x64, best_uni_ref, best_alt_ref, num_ref);
2751
103
            }
2752
2753
1.02k
            return;
2754
1.02k
        }
2755
2756
835
        if(best_uni_ref == ai4_ref_id[0])
2757
422
        {
2758
4.64k
            for(k = 2; k < MAX_NUM_REF; k++)
2759
4.22k
            {
2760
4.22k
                if(ai4_bi_area[k] > ai4_bi_area[1])
2761
99
                {
2762
99
                    SWAP_HME(ai4_bi_area[k], ai4_bi_area[1], S32);
2763
99
                    SWAP_HME(ai4_ref_id[k], ai4_ref_id[1], S32);
2764
99
                }
2765
4.22k
            }
2766
2767
422
            best_alt_ref = ai4_ref_id[1];
2768
422
        }
2769
413
        else
2770
413
        {
2771
413
            best_alt_ref = ai4_ref_id[0];
2772
413
        }
2773
835
    }
2774
2775
11.8k
    if(32 == block_width)
2776
9.36k
    {
2777
9.36k
        SET_VALUES_FOR_TOP_REF_IDS(ps_32x32, best_uni_ref, best_alt_ref, num_ref);
2778
9.36k
    }
2779
2.53k
    else
2780
2.53k
    {
2781
2.53k
        SET_VALUES_FOR_TOP_REF_IDS(ps_64x64, best_uni_ref, best_alt_ref, num_ref);
2782
2.53k
    }
2783
11.8k
}
2784
2785
/**
2786
********************************************************************************
2787
*  @fn   void hme_find_top_ref_ids
2788
*               (
2789
*                   ctb_cluster_info_t *ps_ctb_cluster_info
2790
*               )
2791
*
2792
*  @brief  Finds best_uni_ref and best_alt_ref
2793
*
2794
*  @param[in/out]  ps_ctb_cluster_info: structure that points to ctb data
2795
*
2796
*  @return None
2797
********************************************************************************
2798
*/
2799
void hme_find_top_ref_ids(
2800
    ctb_cluster_info_t *ps_ctb_cluster_info, S32 bidir_enabled, S32 block_width)
2801
7.23k
{
2802
7.23k
    S32 i;
2803
2804
7.23k
    if(32 == block_width)
2805
3.61k
    {
2806
18.0k
        for(i = 0; i < 4; i++)
2807
14.4k
        {
2808
14.4k
            hme_sort_and_assign_top_ref_ids_areawise(
2809
14.4k
                ps_ctb_cluster_info, bidir_enabled, block_width, (CU_POS_T)i);
2810
14.4k
        }
2811
3.61k
    }
2812
3.61k
    else if(64 == block_width)
2813
3.61k
    {
2814
3.61k
        hme_sort_and_assign_top_ref_ids_areawise(
2815
3.61k
            ps_ctb_cluster_info, bidir_enabled, block_width, POS_NA);
2816
3.61k
    }
2817
7.23k
}
2818
2819
/**
2820
********************************************************************************
2821
*  @fn   void hme_boot_out_outlier
2822
*               (
2823
*                   ctb_cluster_info_t *ps_ctb_cluster_info
2824
*               )
2825
*
2826
*  @brief  Removes outlier clusters before CU tree population
2827
*
2828
*  @param[in/out]  ps_ctb_cluster_info: structure that points to ctb data
2829
*
2830
*  @return None
2831
********************************************************************************
2832
*/
2833
void hme_boot_out_outlier(ctb_cluster_info_t *ps_ctb_cluster_info, S32 blk_width)
2834
7.23k
{
2835
7.23k
    cluster_32x32_blk_t *ps_32x32;
2836
2837
7.23k
    S32 i;
2838
2839
7.23k
    cluster_64x64_blk_t *ps_64x64 = &ps_ctb_cluster_info->ps_64x64_blk[0];
2840
2841
7.23k
    S32 sdi_threshold = ps_ctb_cluster_info->sdi_threshold;
2842
2843
7.23k
    if(32 == blk_width)
2844
3.61k
    {
2845
        /* 32x32 clusters */
2846
18.0k
        for(i = 0; i < 4; i++)
2847
14.4k
        {
2848
14.4k
            ps_32x32 = &ps_ctb_cluster_info->ps_32x32_blk[i];
2849
2850
14.4k
            if(ps_32x32->num_clusters > MAX_NUM_CLUSTERS_IN_ONE_REF_IDX)
2851
1.17k
            {
2852
1.17k
                BUMP_OUTLIER_CLUSTERS(ps_32x32, sdi_threshold);
2853
1.17k
            }
2854
14.4k
        }
2855
3.61k
    }
2856
3.61k
    else if(64 == blk_width)
2857
3.61k
    {
2858
        /* 64x64 clusters */
2859
3.61k
        if(ps_64x64->num_clusters > MAX_NUM_CLUSTERS_IN_ONE_REF_IDX)
2860
1.56k
        {
2861
1.56k
            BUMP_OUTLIER_CLUSTERS(ps_64x64, sdi_threshold);
2862
1.56k
        }
2863
3.61k
    }
2864
7.23k
}
2865
2866
/**
2867
********************************************************************************
2868
*  @fn   void hme_update_cluster_attributes
2869
*               (
2870
*                   cluster_data_t *ps_cluster_data,
2871
*                   S32 mvx,
2872
*                   S32 mvy,
2873
*                   PART_ID_T e_part_id
2874
*               )
2875
*
2876
*  @brief  Implementation fo the clustering algorithm
2877
*
2878
*  @param[in/out]  ps_cluster_data: pointer to cluster_data_t struct
2879
*
2880
*  @param[in]  mvx : x co-ordinate of the motion vector
2881
*
2882
*  @param[in]  mvy : y co-ordinate of the motion vector
2883
*
2884
*  @param[in]  ref_idx : ref_id of the motion vector
2885
*
2886
*  @param[in]  e_part_id : partition id of the motion vector
2887
*
2888
*  @return None
2889
********************************************************************************
2890
*/
2891
static __inline void hme_update_cluster_attributes(
2892
    cluster_data_t *ps_cluster_data,
2893
    S32 mvx,
2894
    S32 mvy,
2895
    S32 mvdx,
2896
    S32 mvdy,
2897
    S32 ref_id,
2898
    S32 sdi,
2899
    U08 is_part_of_bi,
2900
    PART_ID_T e_part_id)
2901
15.4k
{
2902
15.4k
    LWORD64 i8_mvx_sum_q8;
2903
15.4k
    LWORD64 i8_mvy_sum_q8;
2904
2905
15.4k
    S32 centroid_posx_q8 = ps_cluster_data->s_centroid.i4_pos_x_q8;
2906
15.4k
    S32 centroid_posy_q8 = ps_cluster_data->s_centroid.i4_pos_y_q8;
2907
2908
15.4k
    if((mvdx > 0) && (ps_cluster_data->min_x > mvx))
2909
943
    {
2910
943
        ps_cluster_data->min_x = mvx;
2911
943
    }
2912
14.4k
    else if((mvdx < 0) && (ps_cluster_data->max_x < mvx))
2913
990
    {
2914
990
        ps_cluster_data->max_x = mvx;
2915
990
    }
2916
2917
15.4k
    if((mvdy > 0) && (ps_cluster_data->min_y > mvy))
2918
2.37k
    {
2919
2.37k
        ps_cluster_data->min_y = mvy;
2920
2.37k
    }
2921
13.0k
    else if((mvdy < 0) && (ps_cluster_data->max_y < mvy))
2922
2.36k
    {
2923
2.36k
        ps_cluster_data->max_y = mvy;
2924
2.36k
    }
2925
2926
15.4k
    {
2927
15.4k
        S32 num_mvs = ps_cluster_data->num_mvs;
2928
2929
15.4k
        ps_cluster_data->as_mv[num_mvs].pixel_count = gai4_partition_area[e_part_id];
2930
15.4k
        ps_cluster_data->as_mv[num_mvs].mvx = mvx;
2931
15.4k
        ps_cluster_data->as_mv[num_mvs].mvy = mvy;
2932
2933
        /***************************/
2934
15.4k
        ps_cluster_data->as_mv[num_mvs].is_uni = !is_part_of_bi;
2935
15.4k
        ps_cluster_data->as_mv[num_mvs].sdi = sdi;
2936
        /**************************/
2937
15.4k
    }
2938
2939
    /* Updation of centroid */
2940
15.4k
    {
2941
15.4k
        i8_mvx_sum_q8 = (LWORD64)centroid_posx_q8 * ps_cluster_data->num_mvs + (mvx << 8);
2942
15.4k
        i8_mvy_sum_q8 = (LWORD64)centroid_posy_q8 * ps_cluster_data->num_mvs + (mvy << 8);
2943
2944
15.4k
        ps_cluster_data->num_mvs++;
2945
2946
15.4k
        ps_cluster_data->s_centroid.i4_pos_x_q8 =
2947
15.4k
            (WORD32)((i8_mvx_sum_q8) / ps_cluster_data->num_mvs);
2948
15.4k
        ps_cluster_data->s_centroid.i4_pos_y_q8 =
2949
15.4k
            (WORD32)((i8_mvy_sum_q8) / ps_cluster_data->num_mvs);
2950
15.4k
    }
2951
2952
15.4k
    ps_cluster_data->area_in_pixels += gai4_partition_area[e_part_id];
2953
2954
15.4k
    if(is_part_of_bi)
2955
514
    {
2956
514
        ps_cluster_data->bi_mv_pixel_area += gai4_partition_area[e_part_id];
2957
514
    }
2958
14.9k
    else
2959
14.9k
    {
2960
14.9k
        ps_cluster_data->uni_mv_pixel_area += gai4_partition_area[e_part_id];
2961
14.9k
    }
2962
15.4k
}
2963
2964
/**
2965
********************************************************************************
2966
*  @fn   void hme_try_cluster_merge
2967
*               (
2968
*                   cluster_data_t *ps_cluster_data,
2969
*                   S32 *pi4_num_clusters,
2970
*                   S32 idx_of_updated_cluster
2971
*               )
2972
*
2973
*  @brief  Implementation fo the clustering algorithm
2974
*
2975
*  @param[in/out]  ps_cluster_data: pointer to cluster_data_t struct
2976
*
2977
*  @param[in/out]  pi4_num_clusters : pointer to number of clusters
2978
*
2979
*  @param[in]  idx_of_updated_cluster : index of the cluster most recently
2980
*                                       updated
2981
*
2982
*  @return Nothing
2983
********************************************************************************
2984
*/
2985
void hme_try_cluster_merge(
2986
    cluster_data_t *ps_cluster_data, U08 *pu1_num_clusters, S32 idx_of_updated_cluster)
2987
15.0k
{
2988
15.0k
    centroid_t *ps_centroid;
2989
2990
15.0k
    S32 cur_pos_x_q8;
2991
15.0k
    S32 cur_pos_y_q8;
2992
15.0k
    S32 i;
2993
15.0k
    S32 max_dist_from_centroid;
2994
15.0k
    S32 mvd;
2995
15.0k
    S32 mvdx_q8;
2996
15.0k
    S32 mvdx;
2997
15.0k
    S32 mvdy_q8;
2998
15.0k
    S32 mvdy;
2999
15.0k
    S32 num_clusters, num_clusters_evaluated;
3000
15.0k
    S32 other_pos_x_q8;
3001
15.0k
    S32 other_pos_y_q8;
3002
3003
15.0k
    cluster_data_t *ps_root = ps_cluster_data;
3004
15.0k
    cluster_data_t *ps_cur_cluster = &ps_cluster_data[idx_of_updated_cluster];
3005
15.0k
    centroid_t *ps_cur_centroid = &ps_cur_cluster->s_centroid;
3006
3007
    /* Merge is superfluous if num_clusters is 1 */
3008
15.0k
    if(*pu1_num_clusters == 1)
3009
9.78k
    {
3010
9.78k
        return;
3011
9.78k
    }
3012
3013
5.26k
    cur_pos_x_q8 = ps_cur_centroid->i4_pos_x_q8;
3014
5.26k
    cur_pos_y_q8 = ps_cur_centroid->i4_pos_y_q8;
3015
3016
5.26k
    max_dist_from_centroid = ps_cur_cluster->max_dist_from_centroid;
3017
3018
5.26k
    num_clusters = *pu1_num_clusters;
3019
5.26k
    num_clusters_evaluated = 0;
3020
3021
17.3k
    for(i = 0; num_clusters_evaluated < num_clusters; i++, ps_cluster_data++)
3022
12.1k
    {
3023
12.1k
        if(!ps_cluster_data->is_valid_cluster)
3024
0
        {
3025
0
            continue;
3026
0
        }
3027
12.1k
        if((ps_cluster_data->ref_id != ps_cur_cluster->ref_id) || (i == idx_of_updated_cluster))
3028
7.46k
        {
3029
7.46k
            num_clusters_evaluated++;
3030
7.46k
            continue;
3031
7.46k
        }
3032
3033
4.67k
        ps_centroid = &ps_cluster_data->s_centroid;
3034
3035
4.67k
        other_pos_x_q8 = ps_centroid->i4_pos_x_q8;
3036
4.67k
        other_pos_y_q8 = ps_centroid->i4_pos_y_q8;
3037
3038
4.67k
        mvdx_q8 = (cur_pos_x_q8 - other_pos_x_q8);
3039
4.67k
        mvdy_q8 = (cur_pos_y_q8 - other_pos_y_q8);
3040
4.67k
        mvdx = (mvdx_q8 + (1 << 7)) >> 8;
3041
4.67k
        mvdy = (mvdy_q8 + (1 << 7)) >> 8;
3042
3043
4.67k
        mvd = ABS(mvdx) + ABS(mvdy);
3044
3045
4.67k
        if(mvd <= (max_dist_from_centroid >> 1))
3046
1
        {
3047
            /* 0 => no updates */
3048
            /* 1 => min updated */
3049
            /* 2 => max updated */
3050
1
            S32 minmax_x_update_id;
3051
1
            S32 minmax_y_update_id;
3052
3053
1
            LWORD64 i8_mv_x_sum_self = (LWORD64)cur_pos_x_q8 * ps_cur_cluster->num_mvs;
3054
1
            LWORD64 i8_mv_y_sum_self = (LWORD64)cur_pos_y_q8 * ps_cur_cluster->num_mvs;
3055
1
            LWORD64 i8_mv_x_sum_cousin = (LWORD64)other_pos_x_q8 * ps_cluster_data->num_mvs;
3056
1
            LWORD64 i8_mv_y_sum_cousin = (LWORD64)other_pos_y_q8 * ps_cluster_data->num_mvs;
3057
3058
1
            (*pu1_num_clusters)--;
3059
3060
1
            ps_cluster_data->is_valid_cluster = 0;
3061
3062
1
            memcpy(
3063
1
                &ps_cur_cluster->as_mv[ps_cur_cluster->num_mvs],
3064
1
                ps_cluster_data->as_mv,
3065
1
                sizeof(mv_data_t) * ps_cluster_data->num_mvs);
3066
3067
1
            ps_cur_cluster->num_mvs += ps_cluster_data->num_mvs;
3068
1
            ps_cur_cluster->area_in_pixels += ps_cluster_data->area_in_pixels;
3069
1
            ps_cur_cluster->bi_mv_pixel_area += ps_cluster_data->bi_mv_pixel_area;
3070
1
            ps_cur_cluster->uni_mv_pixel_area += ps_cluster_data->uni_mv_pixel_area;
3071
1
            i8_mv_x_sum_self += i8_mv_x_sum_cousin;
3072
1
            i8_mv_y_sum_self += i8_mv_y_sum_cousin;
3073
3074
1
            ps_cur_centroid->i4_pos_x_q8 = (WORD32)(i8_mv_x_sum_self / ps_cur_cluster->num_mvs);
3075
1
            ps_cur_centroid->i4_pos_y_q8 = (WORD32)(i8_mv_y_sum_self / ps_cur_cluster->num_mvs);
3076
3077
1
            minmax_x_update_id = (ps_cur_cluster->min_x < ps_cluster_data->min_x)
3078
1
                                     ? ((ps_cur_cluster->max_x > ps_cluster_data->max_x) ? 0 : 2)
3079
1
                                     : 1;
3080
1
            minmax_y_update_id = (ps_cur_cluster->min_y < ps_cluster_data->min_y)
3081
1
                                     ? ((ps_cur_cluster->max_y > ps_cluster_data->max_y) ? 0 : 2)
3082
1
                                     : 1;
3083
3084
            /* Updation of centroid spread */
3085
1
            switch(minmax_x_update_id + (minmax_y_update_id << 2))
3086
1
            {
3087
0
            case 1:
3088
0
            {
3089
0
                S32 mvd, mvd_q8;
3090
3091
0
                ps_cur_cluster->min_x = ps_cluster_data->min_x;
3092
3093
0
                mvd_q8 = ps_centroid->i4_pos_x_q8 - (ps_cur_cluster->min_x << 8);
3094
0
                mvd = (mvd_q8 + (1 << 7)) >> 8;
3095
3096
0
                if(mvd > (max_dist_from_centroid))
3097
0
                {
3098
0
                    ps_cluster_data->max_dist_from_centroid = mvd;
3099
0
                }
3100
0
                break;
3101
0
            }
3102
0
            case 2:
3103
0
            {
3104
0
                S32 mvd, mvd_q8;
3105
3106
0
                ps_cur_cluster->max_x = ps_cluster_data->max_x;
3107
3108
0
                mvd_q8 = (ps_cur_cluster->max_x << 8) - ps_centroid->i4_pos_x_q8;
3109
0
                mvd = (mvd_q8 + (1 << 7)) >> 8;
3110
3111
0
                if(mvd > (max_dist_from_centroid))
3112
0
                {
3113
0
                    ps_cluster_data->max_dist_from_centroid = mvd;
3114
0
                }
3115
0
                break;
3116
0
            }
3117
0
            case 4:
3118
0
            {
3119
0
                S32 mvd, mvd_q8;
3120
3121
0
                ps_cur_cluster->min_y = ps_cluster_data->min_y;
3122
3123
0
                mvd_q8 = ps_centroid->i4_pos_y_q8 - (ps_cur_cluster->min_y << 8);
3124
0
                mvd = (mvd_q8 + (1 << 7)) >> 8;
3125
3126
0
                if(mvd > (max_dist_from_centroid))
3127
0
                {
3128
0
                    ps_cluster_data->max_dist_from_centroid = mvd;
3129
0
                }
3130
0
                break;
3131
0
            }
3132
0
            case 5:
3133
0
            {
3134
0
                S32 mvd;
3135
0
                S32 mvdx, mvdx_q8;
3136
0
                S32 mvdy, mvdy_q8;
3137
3138
0
                mvdy_q8 = ps_centroid->i4_pos_y_q8 - (ps_cur_cluster->min_y << 8);
3139
0
                mvdy = (mvdy_q8 + (1 << 7)) >> 8;
3140
3141
0
                mvdx_q8 = ps_centroid->i4_pos_x_q8 - (ps_cur_cluster->min_x << 8);
3142
0
                mvdx = (mvdx_q8 + (1 << 7)) >> 8;
3143
3144
0
                mvd = (mvdx > mvdy) ? mvdx : mvdy;
3145
3146
0
                ps_cur_cluster->min_x = ps_cluster_data->min_x;
3147
0
                ps_cur_cluster->min_y = ps_cluster_data->min_y;
3148
3149
0
                if(mvd > max_dist_from_centroid)
3150
0
                {
3151
0
                    ps_cluster_data->max_dist_from_centroid = mvd;
3152
0
                }
3153
0
                break;
3154
0
            }
3155
0
            case 6:
3156
0
            {
3157
0
                S32 mvd;
3158
0
                S32 mvdx, mvdx_q8;
3159
0
                S32 mvdy, mvdy_q8;
3160
3161
0
                mvdy_q8 = ps_centroid->i4_pos_y_q8 - (ps_cur_cluster->min_y << 8);
3162
0
                mvdy = (mvdy_q8 + (1 << 7)) >> 8;
3163
3164
0
                mvdx_q8 = (ps_cur_cluster->max_x << 8) - ps_centroid->i4_pos_x_q8;
3165
0
                mvdx = (mvdx_q8 + (1 << 7)) >> 8;
3166
3167
0
                mvd = (mvdx > mvdy) ? mvdx : mvdy;
3168
3169
0
                ps_cur_cluster->max_x = ps_cluster_data->max_x;
3170
0
                ps_cur_cluster->min_y = ps_cluster_data->min_y;
3171
3172
0
                if(mvd > max_dist_from_centroid)
3173
0
                {
3174
0
                    ps_cluster_data->max_dist_from_centroid = mvd;
3175
0
                }
3176
0
                break;
3177
0
            }
3178
0
            case 8:
3179
0
            {
3180
0
                S32 mvd, mvd_q8;
3181
3182
0
                ps_cur_cluster->max_y = ps_cluster_data->max_y;
3183
3184
0
                mvd_q8 = (ps_cur_cluster->max_y << 8) - ps_centroid->i4_pos_y_q8;
3185
0
                mvd = (mvd_q8 + (1 << 7)) >> 8;
3186
3187
0
                if(mvd > (max_dist_from_centroid))
3188
0
                {
3189
0
                    ps_cluster_data->max_dist_from_centroid = mvd;
3190
0
                }
3191
0
                break;
3192
0
            }
3193
1
            case 9:
3194
1
            {
3195
1
                S32 mvd;
3196
1
                S32 mvdx, mvdx_q8;
3197
1
                S32 mvdy, mvdy_q8;
3198
3199
1
                mvdx_q8 = ps_centroid->i4_pos_x_q8 - (ps_cur_cluster->min_x << 8);
3200
1
                mvdx = (mvdx_q8 + (1 << 7)) >> 8;
3201
3202
1
                mvdy_q8 = (ps_cur_cluster->max_y << 8) - ps_centroid->i4_pos_y_q8;
3203
1
                mvdy = (mvdy_q8 + (1 << 7)) >> 8;
3204
3205
1
                mvd = (mvdx > mvdy) ? mvdx : mvdy;
3206
3207
1
                ps_cur_cluster->min_x = ps_cluster_data->min_x;
3208
1
                ps_cur_cluster->max_y = ps_cluster_data->max_y;
3209
3210
1
                if(mvd > max_dist_from_centroid)
3211
0
                {
3212
0
                    ps_cluster_data->max_dist_from_centroid = mvd;
3213
0
                }
3214
1
                break;
3215
0
            }
3216
0
            case 10:
3217
0
            {
3218
0
                S32 mvd;
3219
0
                S32 mvdx, mvdx_q8;
3220
0
                S32 mvdy, mvdy_q8;
3221
3222
0
                mvdx_q8 = (ps_cur_cluster->max_x << 8) - ps_centroid->i4_pos_x_q8;
3223
0
                mvdx = (mvdx_q8 + (1 << 7)) >> 8;
3224
3225
0
                mvdy_q8 = (ps_cur_cluster->max_y << 8) - ps_centroid->i4_pos_y_q8;
3226
0
                mvdy = (mvdy_q8 + (1 << 7)) >> 8;
3227
3228
0
                mvd = (mvdx > mvdy) ? mvdx : mvdy;
3229
3230
0
                ps_cur_cluster->max_x = ps_cluster_data->max_x;
3231
0
                ps_cur_cluster->max_y = ps_cluster_data->max_y;
3232
3233
0
                if(mvd > ps_cluster_data->max_dist_from_centroid)
3234
0
                {
3235
0
                    ps_cluster_data->max_dist_from_centroid = mvd;
3236
0
                }
3237
0
                break;
3238
0
            }
3239
0
            default:
3240
0
            {
3241
0
                break;
3242
0
            }
3243
1
            }
3244
3245
1
            hme_try_cluster_merge(ps_root, pu1_num_clusters, idx_of_updated_cluster);
3246
3247
1
            return;
3248
1
        }
3249
3250
4.67k
        num_clusters_evaluated++;
3251
4.67k
    }
3252
5.26k
}
3253
3254
/**
3255
********************************************************************************
3256
*  @fn   void hme_find_and_update_clusters
3257
*               (
3258
*                   cluster_data_t *ps_cluster_data,
3259
*                   S32 *pi4_num_clusters,
3260
*                   S32 mvx,
3261
*                   S32 mvy,
3262
*                   S32 ref_idx,
3263
*                   PART_ID_T e_part_id
3264
*               )
3265
*
3266
*  @brief  Implementation fo the clustering algorithm
3267
*
3268
*  @param[in/out]  ps_cluster_data: pointer to cluster_data_t struct
3269
*
3270
*  @param[in/out]  pi4_num_clusters : pointer to number of clusters
3271
*
3272
*  @param[in]  mvx : x co-ordinate of the motion vector
3273
*
3274
*  @param[in]  mvy : y co-ordinate of the motion vector
3275
*
3276
*  @param[in]  ref_idx : ref_id of the motion vector
3277
*
3278
*  @param[in]  e_part_id : partition id of the motion vector
3279
*
3280
*  @return None
3281
********************************************************************************
3282
*/
3283
void hme_find_and_update_clusters(
3284
    cluster_data_t *ps_cluster_data,
3285
    U08 *pu1_num_clusters,
3286
    S16 i2_mv_x,
3287
    S16 i2_mv_y,
3288
    U08 i1_ref_idx,
3289
    S32 i4_sdi,
3290
    PART_ID_T e_part_id,
3291
    U08 is_part_of_bi)
3292
68.8k
{
3293
68.8k
    S32 i;
3294
68.8k
    S32 min_mvd_cluster_id = -1;
3295
68.8k
    S32 mvd, mvd_limit, mvdx, mvdy;
3296
68.8k
    S32 min_mvdx, min_mvdy;
3297
3298
68.8k
    S32 min_mvd = MAX_32BIT_VAL;
3299
68.8k
    S32 num_clusters = *pu1_num_clusters;
3300
3301
68.8k
    S32 mvx = i2_mv_x;
3302
68.8k
    S32 mvy = i2_mv_y;
3303
68.8k
    S32 ref_idx = i1_ref_idx;
3304
68.8k
    S32 sdi = i4_sdi;
3305
68.8k
    S32 new_cluster_idx = MAX_NUM_CLUSTERS_16x16;
3306
3307
68.8k
    if(num_clusters == 0)
3308
38.4k
    {
3309
38.4k
        cluster_data_t *ps_data = &ps_cluster_data[num_clusters];
3310
3311
38.4k
        ps_data->num_mvs = 1;
3312
38.4k
        ps_data->s_centroid.i4_pos_x_q8 = mvx << 8;
3313
38.4k
        ps_data->s_centroid.i4_pos_y_q8 = mvy << 8;
3314
38.4k
        ps_data->ref_id = ref_idx;
3315
38.4k
        ps_data->area_in_pixels = gai4_partition_area[e_part_id];
3316
38.4k
        ps_data->as_mv[0].pixel_count = gai4_partition_area[e_part_id];
3317
38.4k
        ps_data->as_mv[0].mvx = mvx;
3318
38.4k
        ps_data->as_mv[0].mvy = mvy;
3319
3320
        /***************************/
3321
38.4k
        ps_data->as_mv[0].is_uni = !is_part_of_bi;
3322
38.4k
        ps_data->as_mv[0].sdi = sdi;
3323
38.4k
        if(is_part_of_bi)
3324
853
        {
3325
853
            ps_data->bi_mv_pixel_area += ps_data->area_in_pixels;
3326
853
        }
3327
37.6k
        else
3328
37.6k
        {
3329
37.6k
            ps_data->uni_mv_pixel_area += ps_data->area_in_pixels;
3330
37.6k
        }
3331
        /**************************/
3332
38.4k
        ps_data->max_x = mvx;
3333
38.4k
        ps_data->min_x = mvx;
3334
38.4k
        ps_data->max_y = mvy;
3335
38.4k
        ps_data->min_y = mvy;
3336
3337
38.4k
        ps_data->is_valid_cluster = 1;
3338
3339
38.4k
        *pu1_num_clusters = 1;
3340
38.4k
    }
3341
30.3k
    else
3342
30.3k
    {
3343
30.3k
        S32 num_clusters_evaluated = 0;
3344
3345
73.8k
        for(i = 0; num_clusters_evaluated < num_clusters; i++)
3346
43.5k
        {
3347
43.5k
            cluster_data_t *ps_data = &ps_cluster_data[i];
3348
3349
43.5k
            centroid_t *ps_centroid;
3350
3351
43.5k
            S32 mvx_q8;
3352
43.5k
            S32 mvy_q8;
3353
43.5k
            S32 posx_q8;
3354
43.5k
            S32 posy_q8;
3355
43.5k
            S32 mvdx_q8;
3356
43.5k
            S32 mvdy_q8;
3357
3358
            /* In anticipation of a possible merging of clusters */
3359
43.5k
            if(ps_data->is_valid_cluster == 0)
3360
0
            {
3361
0
                new_cluster_idx = i;
3362
0
                continue;
3363
0
            }
3364
3365
43.5k
            if(ref_idx != ps_data->ref_id)
3366
9.55k
            {
3367
9.55k
                num_clusters_evaluated++;
3368
9.55k
                continue;
3369
9.55k
            }
3370
3371
34.0k
            ps_centroid = &ps_data->s_centroid;
3372
34.0k
            posx_q8 = ps_centroid->i4_pos_x_q8;
3373
34.0k
            posy_q8 = ps_centroid->i4_pos_y_q8;
3374
3375
34.0k
            mvx_q8 = mvx << 8;
3376
34.0k
            mvy_q8 = mvy << 8;
3377
3378
34.0k
            mvdx_q8 = posx_q8 - mvx_q8;
3379
34.0k
            mvdy_q8 = posy_q8 - mvy_q8;
3380
3381
34.0k
            mvdx = (((mvdx_q8 + (1 << 7)) >> 8));
3382
34.0k
            mvdy = (((mvdy_q8 + (1 << 7)) >> 8));
3383
3384
34.0k
            mvd = ABS(mvdx) + ABS(mvdy);
3385
3386
34.0k
            if(mvd < min_mvd)
3387
30.5k
            {
3388
30.5k
                min_mvd = mvd;
3389
30.5k
                min_mvdx = mvdx;
3390
30.5k
                min_mvdy = mvdy;
3391
30.5k
                min_mvd_cluster_id = i;
3392
30.5k
            }
3393
3394
34.0k
            num_clusters_evaluated++;
3395
34.0k
        }
3396
3397
30.3k
        mvd_limit = (min_mvd_cluster_id == -1)
3398
30.3k
                        ? ps_cluster_data[0].max_dist_from_centroid
3399
30.3k
                        : ps_cluster_data[min_mvd_cluster_id].max_dist_from_centroid;
3400
3401
        /* This condition implies that min_mvd has been updated */
3402
30.3k
        if(min_mvd <= mvd_limit)
3403
15.4k
        {
3404
15.4k
            hme_update_cluster_attributes(
3405
15.4k
                &ps_cluster_data[min_mvd_cluster_id],
3406
15.4k
                mvx,
3407
15.4k
                mvy,
3408
15.4k
                min_mvdx,
3409
15.4k
                min_mvdy,
3410
15.4k
                ref_idx,
3411
15.4k
                sdi,
3412
15.4k
                is_part_of_bi,
3413
15.4k
                e_part_id);
3414
3415
15.4k
            if(PRT_NxN == ge_part_id_to_part_type[e_part_id])
3416
15.0k
            {
3417
15.0k
                hme_try_cluster_merge(ps_cluster_data, pu1_num_clusters, min_mvd_cluster_id);
3418
15.0k
            }
3419
15.4k
        }
3420
14.8k
        else
3421
14.8k
        {
3422
14.8k
            cluster_data_t *ps_data = (new_cluster_idx == MAX_NUM_CLUSTERS_16x16)
3423
14.8k
                                          ? &ps_cluster_data[num_clusters]
3424
14.8k
                                          : &ps_cluster_data[new_cluster_idx];
3425
3426
14.8k
            ps_data->num_mvs = 1;
3427
14.8k
            ps_data->s_centroid.i4_pos_x_q8 = mvx << 8;
3428
14.8k
            ps_data->s_centroid.i4_pos_y_q8 = mvy << 8;
3429
14.8k
            ps_data->ref_id = ref_idx;
3430
14.8k
            ps_data->area_in_pixels = gai4_partition_area[e_part_id];
3431
14.8k
            ps_data->as_mv[0].pixel_count = gai4_partition_area[e_part_id];
3432
14.8k
            ps_data->as_mv[0].mvx = mvx;
3433
14.8k
            ps_data->as_mv[0].mvy = mvy;
3434
3435
            /***************************/
3436
14.8k
            ps_data->as_mv[0].is_uni = !is_part_of_bi;
3437
14.8k
            ps_data->as_mv[0].sdi = sdi;
3438
14.8k
            if(is_part_of_bi)
3439
2.50k
            {
3440
2.50k
                ps_data->bi_mv_pixel_area += ps_data->area_in_pixels;
3441
2.50k
            }
3442
12.3k
            else
3443
12.3k
            {
3444
12.3k
                ps_data->uni_mv_pixel_area += ps_data->area_in_pixels;
3445
12.3k
            }
3446
            /**************************/
3447
14.8k
            ps_data->max_x = mvx;
3448
14.8k
            ps_data->min_x = mvx;
3449
14.8k
            ps_data->max_y = mvy;
3450
14.8k
            ps_data->min_y = mvy;
3451
3452
14.8k
            ps_data->is_valid_cluster = 1;
3453
3454
14.8k
            num_clusters++;
3455
14.8k
            *pu1_num_clusters = num_clusters;
3456
14.8k
        }
3457
30.3k
    }
3458
68.8k
}
3459
3460
/**
3461
********************************************************************************
3462
*  @fn   void hme_update_32x32_cluster_attributes
3463
*               (
3464
*                   cluster_32x32_blk_t *ps_blk_32x32,
3465
*                   cluster_data_t *ps_cluster_data
3466
*               )
3467
*
3468
*  @brief  Updates attributes for 32x32 clusters based on the attributes of
3469
*          the constituent 16x16 clusters
3470
*
3471
*  @param[out]  ps_blk_32x32: structure containing 32x32 block results
3472
*
3473
*  @param[in]  ps_cluster_data : structure containing 16x16 block results
3474
*
3475
*  @return None
3476
********************************************************************************
3477
*/
3478
void hme_update_32x32_cluster_attributes(
3479
    cluster_32x32_blk_t *ps_blk_32x32, cluster_data_t *ps_cluster_data)
3480
52.3k
{
3481
52.3k
    cluster_data_t *ps_cur_cluster_32;
3482
3483
52.3k
    S32 i;
3484
52.3k
    S32 mvd_limit;
3485
3486
52.3k
    S32 num_clusters = ps_blk_32x32->num_clusters;
3487
3488
52.3k
    if(0 == num_clusters)
3489
10.2k
    {
3490
10.2k
        ps_cur_cluster_32 = &ps_blk_32x32->as_cluster_data[0];
3491
3492
10.2k
        ps_blk_32x32->num_clusters++;
3493
10.2k
        ps_blk_32x32->au1_num_clusters[ps_cluster_data->ref_id]++;
3494
3495
10.2k
        ps_cur_cluster_32->is_valid_cluster = 1;
3496
3497
10.2k
        ps_cur_cluster_32->area_in_pixels = ps_cluster_data->area_in_pixels;
3498
10.2k
        ps_cur_cluster_32->bi_mv_pixel_area += ps_cluster_data->bi_mv_pixel_area;
3499
10.2k
        ps_cur_cluster_32->uni_mv_pixel_area += ps_cluster_data->uni_mv_pixel_area;
3500
3501
10.2k
        memcpy(
3502
10.2k
            ps_cur_cluster_32->as_mv,
3503
10.2k
            ps_cluster_data->as_mv,
3504
10.2k
            sizeof(mv_data_t) * ps_cluster_data->num_mvs);
3505
3506
10.2k
        ps_cur_cluster_32->num_mvs = ps_cluster_data->num_mvs;
3507
3508
10.2k
        ps_cur_cluster_32->ref_id = ps_cluster_data->ref_id;
3509
3510
10.2k
        ps_cur_cluster_32->max_x = ps_cluster_data->max_x;
3511
10.2k
        ps_cur_cluster_32->max_y = ps_cluster_data->max_y;
3512
10.2k
        ps_cur_cluster_32->min_x = ps_cluster_data->min_x;
3513
10.2k
        ps_cur_cluster_32->min_y = ps_cluster_data->min_y;
3514
3515
10.2k
        ps_cur_cluster_32->s_centroid = ps_cluster_data->s_centroid;
3516
10.2k
    }
3517
42.0k
    else
3518
42.0k
    {
3519
42.0k
        centroid_t *ps_centroid;
3520
3521
42.0k
        S32 cur_posx_q8, cur_posy_q8;
3522
42.0k
        S32 min_mvd_cluster_id = -1;
3523
42.0k
        S32 mvd;
3524
42.0k
        S32 mvdx;
3525
42.0k
        S32 mvdy;
3526
42.0k
        S32 mvdx_min;
3527
42.0k
        S32 mvdy_min;
3528
42.0k
        S32 mvdx_q8;
3529
42.0k
        S32 mvdy_q8;
3530
3531
42.0k
        S32 num_clusters_evaluated = 0;
3532
3533
42.0k
        S32 mvd_min = MAX_32BIT_VAL;
3534
3535
42.0k
        S32 mvx_inp_q8 = ps_cluster_data->s_centroid.i4_pos_x_q8;
3536
42.0k
        S32 mvy_inp_q8 = ps_cluster_data->s_centroid.i4_pos_y_q8;
3537
3538
142k
        for(i = 0; num_clusters_evaluated < num_clusters; i++)
3539
100k
        {
3540
100k
            ps_cur_cluster_32 = &ps_blk_32x32->as_cluster_data[i];
3541
3542
100k
            if(ps_cur_cluster_32->ref_id != ps_cluster_data->ref_id)
3543
26.3k
            {
3544
26.3k
                num_clusters_evaluated++;
3545
26.3k
                continue;
3546
26.3k
            }
3547
74.0k
            if(!ps_cluster_data->is_valid_cluster)
3548
0
            {
3549
0
                continue;
3550
0
            }
3551
3552
74.0k
            num_clusters_evaluated++;
3553
3554
74.0k
            ps_centroid = &ps_cur_cluster_32->s_centroid;
3555
3556
74.0k
            cur_posx_q8 = ps_centroid->i4_pos_x_q8;
3557
74.0k
            cur_posy_q8 = ps_centroid->i4_pos_y_q8;
3558
3559
74.0k
            mvdx_q8 = cur_posx_q8 - mvx_inp_q8;
3560
74.0k
            mvdy_q8 = cur_posy_q8 - mvy_inp_q8;
3561
3562
74.0k
            mvdx = (mvdx_q8 + (1 << 7)) >> 8;
3563
74.0k
            mvdy = (mvdy_q8 + (1 << 7)) >> 8;
3564
3565
74.0k
            mvd = ABS(mvdx) + ABS(mvdy);
3566
3567
74.0k
            if(mvd < mvd_min)
3568
54.4k
            {
3569
54.4k
                mvd_min = mvd;
3570
54.4k
                mvdx_min = mvdx;
3571
54.4k
                mvdy_min = mvdy;
3572
54.4k
                min_mvd_cluster_id = i;
3573
54.4k
            }
3574
74.0k
        }
3575
3576
42.0k
        ps_cur_cluster_32 = &ps_blk_32x32->as_cluster_data[0];
3577
3578
42.0k
        mvd_limit = (min_mvd_cluster_id == -1)
3579
42.0k
                        ? ps_cur_cluster_32[0].max_dist_from_centroid
3580
42.0k
                        : ps_cur_cluster_32[min_mvd_cluster_id].max_dist_from_centroid;
3581
3582
42.0k
        if(mvd_min <= mvd_limit)
3583
21.1k
        {
3584
21.1k
            LWORD64 i8_updated_posx;
3585
21.1k
            LWORD64 i8_updated_posy;
3586
21.1k
            WORD32 minmax_updated_x = 0;
3587
21.1k
            WORD32 minmax_updated_y = 0;
3588
3589
21.1k
            ps_cur_cluster_32 = &ps_blk_32x32->as_cluster_data[min_mvd_cluster_id];
3590
3591
21.1k
            ps_centroid = &ps_cur_cluster_32->s_centroid;
3592
3593
21.1k
            ps_cur_cluster_32->is_valid_cluster = 1;
3594
3595
21.1k
            ps_cur_cluster_32->area_in_pixels += ps_cluster_data->area_in_pixels;
3596
21.1k
            ps_cur_cluster_32->bi_mv_pixel_area += ps_cluster_data->bi_mv_pixel_area;
3597
21.1k
            ps_cur_cluster_32->uni_mv_pixel_area += ps_cluster_data->uni_mv_pixel_area;
3598
3599
21.1k
            memcpy(
3600
21.1k
                &ps_cur_cluster_32->as_mv[ps_cur_cluster_32->num_mvs],
3601
21.1k
                ps_cluster_data->as_mv,
3602
21.1k
                sizeof(mv_data_t) * ps_cluster_data->num_mvs);
3603
3604
21.1k
            if((mvdx_min > 0) && ((ps_cur_cluster_32->min_x << 8) > mvx_inp_q8))
3605
1.97k
            {
3606
1.97k
                ps_cur_cluster_32->min_x = (mvx_inp_q8 + ((1 << 7))) >> 8;
3607
1.97k
                minmax_updated_x = 1;
3608
1.97k
            }
3609
19.1k
            else if((mvdx_min < 0) && ((ps_cur_cluster_32->max_x << 8) < mvx_inp_q8))
3610
1.47k
            {
3611
1.47k
                ps_cur_cluster_32->max_x = (mvx_inp_q8 + (1 << 7)) >> 8;
3612
1.47k
                minmax_updated_x = 2;
3613
1.47k
            }
3614
3615
21.1k
            if((mvdy_min > 0) && ((ps_cur_cluster_32->min_y << 8) > mvy_inp_q8))
3616
3.17k
            {
3617
3.17k
                ps_cur_cluster_32->min_y = (mvy_inp_q8 + (1 << 7)) >> 8;
3618
3.17k
                minmax_updated_y = 1;
3619
3.17k
            }
3620
17.9k
            else if((mvdy_min < 0) && ((ps_cur_cluster_32->max_y << 8) < mvy_inp_q8))
3621
2.61k
            {
3622
2.61k
                ps_cur_cluster_32->max_y = (mvy_inp_q8 + (1 << 7)) >> 8;
3623
2.61k
                minmax_updated_y = 2;
3624
2.61k
            }
3625
3626
21.1k
            switch((minmax_updated_y << 2) + minmax_updated_x)
3627
21.1k
            {
3628
1.04k
            case 1:
3629
1.04k
            {
3630
1.04k
                S32 mvd, mvd_q8;
3631
3632
1.04k
                mvd_q8 = ps_centroid->i4_pos_x_q8 - (ps_cur_cluster_32->min_x << 8);
3633
1.04k
                mvd = (mvd_q8 + (1 << 7)) >> 8;
3634
3635
1.04k
                if(mvd > (mvd_limit))
3636
0
                {
3637
0
                    ps_cur_cluster_32->max_dist_from_centroid = mvd;
3638
0
                }
3639
1.04k
                break;
3640
0
            }
3641
635
            case 2:
3642
635
            {
3643
635
                S32 mvd, mvd_q8;
3644
3645
635
                mvd_q8 = (ps_cur_cluster_32->max_x << 8) - ps_centroid->i4_pos_x_q8;
3646
635
                mvd = (mvd_q8 + (1 << 7)) >> 8;
3647
3648
635
                if(mvd > (mvd_limit))
3649
1
                {
3650
1
                    ps_cur_cluster_32->max_dist_from_centroid = mvd;
3651
1
                }
3652
635
                break;
3653
0
            }
3654
2.25k
            case 4:
3655
2.25k
            {
3656
2.25k
                S32 mvd, mvd_q8;
3657
3658
2.25k
                mvd_q8 = ps_centroid->i4_pos_y_q8 - (ps_cur_cluster_32->min_y << 8);
3659
2.25k
                mvd = (mvd_q8 + (1 << 7)) >> 8;
3660
3661
2.25k
                if(mvd > (mvd_limit))
3662
0
                {
3663
0
                    ps_cur_cluster_32->max_dist_from_centroid = mvd;
3664
0
                }
3665
2.25k
                break;
3666
0
            }
3667
498
            case 5:
3668
498
            {
3669
498
                S32 mvd;
3670
498
                S32 mvdx, mvdx_q8;
3671
498
                S32 mvdy, mvdy_q8;
3672
3673
498
                mvdy_q8 = ps_centroid->i4_pos_y_q8 - (ps_cur_cluster_32->min_y << 8);
3674
498
                mvdy = (mvdy_q8 + (1 << 7)) >> 8;
3675
3676
498
                mvdx_q8 = ps_centroid->i4_pos_x_q8 - (ps_cur_cluster_32->min_x << 8);
3677
498
                mvdx = (mvdx_q8 + (1 << 7)) >> 8;
3678
3679
498
                mvd = (mvdx > mvdy) ? mvdx : mvdy;
3680
3681
498
                if(mvd > mvd_limit)
3682
0
                {
3683
0
                    ps_cur_cluster_32->max_dist_from_centroid = mvd;
3684
0
                }
3685
498
                break;
3686
0
            }
3687
427
            case 6:
3688
427
            {
3689
427
                S32 mvd;
3690
427
                S32 mvdx, mvdx_q8;
3691
427
                S32 mvdy, mvdy_q8;
3692
3693
427
                mvdy_q8 = ps_centroid->i4_pos_y_q8 - (ps_cur_cluster_32->min_y << 8);
3694
427
                mvdy = (mvdy_q8 + (1 << 7)) >> 8;
3695
3696
427
                mvdx_q8 = (ps_cur_cluster_32->max_x << 8) - ps_centroid->i4_pos_x_q8;
3697
427
                mvdx = (mvdx_q8 + (1 << 7)) >> 8;
3698
3699
427
                mvd = (mvdx > mvdy) ? mvdx : mvdy;
3700
3701
427
                if(mvd > mvd_limit)
3702
0
                {
3703
0
                    ps_cur_cluster_32->max_dist_from_centroid = mvd;
3704
0
                }
3705
427
                break;
3706
0
            }
3707
1.77k
            case 8:
3708
1.77k
            {
3709
1.77k
                S32 mvd, mvd_q8;
3710
3711
1.77k
                mvd_q8 = (ps_cur_cluster_32->max_y << 8) - ps_centroid->i4_pos_y_q8;
3712
1.77k
                mvd = (mvd_q8 + (1 << 7)) >> 8;
3713
3714
1.77k
                if(mvd > (mvd_limit))
3715
19
                {
3716
19
                    ps_cur_cluster_32->max_dist_from_centroid = mvd;
3717
19
                }
3718
1.77k
                break;
3719
0
            }
3720
438
            case 9:
3721
438
            {
3722
438
                S32 mvd;
3723
438
                S32 mvdx, mvdx_q8;
3724
438
                S32 mvdy, mvdy_q8;
3725
3726
438
                mvdx_q8 = ps_centroid->i4_pos_x_q8 - (ps_cur_cluster_32->min_x << 8);
3727
438
                mvdx = (mvdx_q8 + (1 << 7)) >> 8;
3728
3729
438
                mvdy_q8 = (ps_cur_cluster_32->max_y << 8) - ps_centroid->i4_pos_y_q8;
3730
438
                mvdy = (mvdy_q8 + (1 << 7)) >> 8;
3731
3732
438
                mvd = (mvdx > mvdy) ? mvdx : mvdy;
3733
3734
438
                if(mvd > mvd_limit)
3735
0
                {
3736
0
                    ps_cur_cluster_32->max_dist_from_centroid = mvd;
3737
0
                }
3738
438
                break;
3739
0
            }
3740
409
            case 10:
3741
409
            {
3742
409
                S32 mvd;
3743
409
                S32 mvdx, mvdx_q8;
3744
409
                S32 mvdy, mvdy_q8;
3745
3746
409
                mvdx_q8 = (ps_cur_cluster_32->max_x << 8) - ps_centroid->i4_pos_x_q8;
3747
409
                mvdx = (mvdx_q8 + (1 << 7)) >> 8;
3748
3749
409
                mvdy_q8 = (ps_cur_cluster_32->max_y << 8) - ps_centroid->i4_pos_y_q8;
3750
409
                mvdy = (mvdy_q8 + (1 << 7)) >> 8;
3751
3752
409
                mvd = (mvdx > mvdy) ? mvdx : mvdy;
3753
3754
409
                if(mvd > ps_cur_cluster_32->max_dist_from_centroid)
3755
0
                {
3756
0
                    ps_cur_cluster_32->max_dist_from_centroid = mvd;
3757
0
                }
3758
409
                break;
3759
0
            }
3760
13.6k
            default:
3761
13.6k
            {
3762
13.6k
                break;
3763
0
            }
3764
21.1k
            }
3765
3766
21.1k
            i8_updated_posx = ((LWORD64)ps_centroid->i4_pos_x_q8 * ps_cur_cluster_32->num_mvs) +
3767
21.1k
                              ((LWORD64)mvx_inp_q8 * ps_cluster_data->num_mvs);
3768
21.1k
            i8_updated_posy = ((LWORD64)ps_centroid->i4_pos_y_q8 * ps_cur_cluster_32->num_mvs) +
3769
21.1k
                              ((LWORD64)mvy_inp_q8 * ps_cluster_data->num_mvs);
3770
3771
21.1k
            ps_cur_cluster_32->num_mvs += ps_cluster_data->num_mvs;
3772
3773
21.1k
            ps_centroid->i4_pos_x_q8 = (WORD32)(i8_updated_posx / ps_cur_cluster_32->num_mvs);
3774
21.1k
            ps_centroid->i4_pos_y_q8 = (WORD32)(i8_updated_posy / ps_cur_cluster_32->num_mvs);
3775
21.1k
        }
3776
20.9k
        else if(num_clusters < MAX_NUM_CLUSTERS_32x32)
3777
20.7k
        {
3778
20.7k
            ps_cur_cluster_32 = &ps_blk_32x32->as_cluster_data[num_clusters];
3779
3780
20.7k
            ps_blk_32x32->num_clusters++;
3781
20.7k
            ps_blk_32x32->au1_num_clusters[ps_cluster_data->ref_id]++;
3782
3783
20.7k
            ps_cur_cluster_32->is_valid_cluster = 1;
3784
3785
20.7k
            ps_cur_cluster_32->area_in_pixels = ps_cluster_data->area_in_pixels;
3786
20.7k
            ps_cur_cluster_32->bi_mv_pixel_area += ps_cluster_data->bi_mv_pixel_area;
3787
20.7k
            ps_cur_cluster_32->uni_mv_pixel_area += ps_cluster_data->uni_mv_pixel_area;
3788
3789
20.7k
            memcpy(
3790
20.7k
                ps_cur_cluster_32->as_mv,
3791
20.7k
                ps_cluster_data->as_mv,
3792
20.7k
                sizeof(mv_data_t) * ps_cluster_data->num_mvs);
3793
3794
20.7k
            ps_cur_cluster_32->num_mvs = ps_cluster_data->num_mvs;
3795
3796
20.7k
            ps_cur_cluster_32->ref_id = ps_cluster_data->ref_id;
3797
3798
20.7k
            ps_cur_cluster_32->max_x = ps_cluster_data->max_x;
3799
20.7k
            ps_cur_cluster_32->max_y = ps_cluster_data->max_y;
3800
20.7k
            ps_cur_cluster_32->min_x = ps_cluster_data->min_x;
3801
20.7k
            ps_cur_cluster_32->min_y = ps_cluster_data->min_y;
3802
3803
20.7k
            ps_cur_cluster_32->s_centroid = ps_cluster_data->s_centroid;
3804
20.7k
        }
3805
42.0k
    }
3806
52.3k
}
3807
3808
/**
3809
********************************************************************************
3810
*  @fn   void hme_update_64x64_cluster_attributes
3811
*               (
3812
*                   cluster_64x64_blk_t *ps_blk_32x32,
3813
*                   cluster_data_t *ps_cluster_data
3814
*               )
3815
*
3816
*  @brief  Updates attributes for 64x64 clusters based on the attributes of
3817
*          the constituent 16x16 clusters
3818
*
3819
*  @param[out]  ps_blk_64x64: structure containing 64x64 block results
3820
*
3821
*  @param[in]  ps_cluster_data : structure containing 32x32 block results
3822
*
3823
*  @return None
3824
********************************************************************************
3825
*/
3826
void hme_update_64x64_cluster_attributes(
3827
    cluster_64x64_blk_t *ps_blk_64x64, cluster_data_t *ps_cluster_data)
3828
30.3k
{
3829
30.3k
    cluster_data_t *ps_cur_cluster_64;
3830
3831
30.3k
    S32 i;
3832
30.3k
    S32 mvd_limit;
3833
3834
30.3k
    S32 num_clusters = ps_blk_64x64->num_clusters;
3835
3836
30.3k
    if(0 == num_clusters)
3837
2.69k
    {
3838
2.69k
        ps_cur_cluster_64 = &ps_blk_64x64->as_cluster_data[0];
3839
3840
2.69k
        ps_blk_64x64->num_clusters++;
3841
2.69k
        ps_blk_64x64->au1_num_clusters[ps_cluster_data->ref_id]++;
3842
3843
2.69k
        ps_cur_cluster_64->is_valid_cluster = 1;
3844
3845
2.69k
        ps_cur_cluster_64->area_in_pixels = ps_cluster_data->area_in_pixels;
3846
2.69k
        ps_cur_cluster_64->bi_mv_pixel_area += ps_cluster_data->bi_mv_pixel_area;
3847
2.69k
        ps_cur_cluster_64->uni_mv_pixel_area += ps_cluster_data->uni_mv_pixel_area;
3848
3849
2.69k
        memcpy(
3850
2.69k
            ps_cur_cluster_64->as_mv,
3851
2.69k
            ps_cluster_data->as_mv,
3852
2.69k
            sizeof(mv_data_t) * ps_cluster_data->num_mvs);
3853
3854
2.69k
        ps_cur_cluster_64->num_mvs = ps_cluster_data->num_mvs;
3855
3856
2.69k
        ps_cur_cluster_64->ref_id = ps_cluster_data->ref_id;
3857
3858
2.69k
        ps_cur_cluster_64->max_x = ps_cluster_data->max_x;
3859
2.69k
        ps_cur_cluster_64->max_y = ps_cluster_data->max_y;
3860
2.69k
        ps_cur_cluster_64->min_x = ps_cluster_data->min_x;
3861
2.69k
        ps_cur_cluster_64->min_y = ps_cluster_data->min_y;
3862
3863
2.69k
        ps_cur_cluster_64->s_centroid = ps_cluster_data->s_centroid;
3864
2.69k
    }
3865
27.6k
    else
3866
27.6k
    {
3867
27.6k
        centroid_t *ps_centroid;
3868
3869
27.6k
        S32 cur_posx_q8, cur_posy_q8;
3870
27.6k
        S32 min_mvd_cluster_id = -1;
3871
27.6k
        S32 mvd;
3872
27.6k
        S32 mvdx;
3873
27.6k
        S32 mvdy;
3874
27.6k
        S32 mvdx_min;
3875
27.6k
        S32 mvdy_min;
3876
27.6k
        S32 mvdx_q8;
3877
27.6k
        S32 mvdy_q8;
3878
3879
27.6k
        S32 num_clusters_evaluated = 0;
3880
3881
27.6k
        S32 mvd_min = MAX_32BIT_VAL;
3882
3883
27.6k
        S32 mvx_inp_q8 = ps_cluster_data->s_centroid.i4_pos_x_q8;
3884
27.6k
        S32 mvy_inp_q8 = ps_cluster_data->s_centroid.i4_pos_y_q8;
3885
3886
164k
        for(i = 0; num_clusters_evaluated < num_clusters; i++)
3887
136k
        {
3888
136k
            ps_cur_cluster_64 = &ps_blk_64x64->as_cluster_data[i];
3889
3890
136k
            if(ps_cur_cluster_64->ref_id != ps_cluster_data->ref_id)
3891
45.3k
            {
3892
45.3k
                num_clusters_evaluated++;
3893
45.3k
                continue;
3894
45.3k
            }
3895
3896
91.3k
            if(!ps_cur_cluster_64->is_valid_cluster)
3897
0
            {
3898
0
                continue;
3899
0
            }
3900
3901
91.3k
            num_clusters_evaluated++;
3902
3903
91.3k
            ps_centroid = &ps_cur_cluster_64->s_centroid;
3904
3905
91.3k
            cur_posx_q8 = ps_centroid->i4_pos_x_q8;
3906
91.3k
            cur_posy_q8 = ps_centroid->i4_pos_y_q8;
3907
3908
91.3k
            mvdx_q8 = cur_posx_q8 - mvx_inp_q8;
3909
91.3k
            mvdy_q8 = cur_posy_q8 - mvy_inp_q8;
3910
3911
91.3k
            mvdx = (mvdx_q8 + (1 << 7)) >> 8;
3912
91.3k
            mvdy = (mvdy_q8 + (1 << 7)) >> 8;
3913
3914
91.3k
            mvd = ABS(mvdx) + ABS(mvdy);
3915
3916
91.3k
            if(mvd < mvd_min)
3917
50.6k
            {
3918
50.6k
                mvd_min = mvd;
3919
50.6k
                mvdx_min = mvdx;
3920
50.6k
                mvdy_min = mvdy;
3921
50.6k
                min_mvd_cluster_id = i;
3922
50.6k
            }
3923
91.3k
        }
3924
3925
27.6k
        ps_cur_cluster_64 = ps_blk_64x64->as_cluster_data;
3926
3927
27.6k
        mvd_limit = (min_mvd_cluster_id == -1)
3928
27.6k
                        ? ps_cur_cluster_64[0].max_dist_from_centroid
3929
27.6k
                        : ps_cur_cluster_64[min_mvd_cluster_id].max_dist_from_centroid;
3930
3931
27.6k
        if(mvd_min <= mvd_limit)
3932
11.4k
        {
3933
11.4k
            LWORD64 i8_updated_posx;
3934
11.4k
            LWORD64 i8_updated_posy;
3935
11.4k
            WORD32 minmax_updated_x = 0;
3936
11.4k
            WORD32 minmax_updated_y = 0;
3937
3938
11.4k
            ps_cur_cluster_64 = &ps_blk_64x64->as_cluster_data[min_mvd_cluster_id];
3939
3940
11.4k
            ps_centroid = &ps_cur_cluster_64->s_centroid;
3941
3942
11.4k
            ps_cur_cluster_64->is_valid_cluster = 1;
3943
3944
11.4k
            ps_cur_cluster_64->area_in_pixels += ps_cluster_data->area_in_pixels;
3945
11.4k
            ps_cur_cluster_64->bi_mv_pixel_area += ps_cluster_data->bi_mv_pixel_area;
3946
11.4k
            ps_cur_cluster_64->uni_mv_pixel_area += ps_cluster_data->uni_mv_pixel_area;
3947
3948
11.4k
            memcpy(
3949
11.4k
                &ps_cur_cluster_64->as_mv[ps_cur_cluster_64->num_mvs],
3950
11.4k
                ps_cluster_data->as_mv,
3951
11.4k
                sizeof(mv_data_t) * ps_cluster_data->num_mvs);
3952
3953
11.4k
            if((mvdx_min > 0) && ((ps_cur_cluster_64->min_x << 8) > mvx_inp_q8))
3954
1.97k
            {
3955
1.97k
                ps_cur_cluster_64->min_x = (mvx_inp_q8 + (1 << 7)) >> 8;
3956
1.97k
                minmax_updated_x = 1;
3957
1.97k
            }
3958
9.44k
            else if((mvdx_min < 0) && ((ps_cur_cluster_64->max_x << 8) < mvx_inp_q8))
3959
1.61k
            {
3960
1.61k
                ps_cur_cluster_64->max_x = (mvx_inp_q8 + (1 << 7)) >> 8;
3961
1.61k
                minmax_updated_x = 2;
3962
1.61k
            }
3963
3964
11.4k
            if((mvdy_min > 0) && ((ps_cur_cluster_64->min_y << 8) > mvy_inp_q8))
3965
2.53k
            {
3966
2.53k
                ps_cur_cluster_64->min_y = (mvy_inp_q8 + (1 << 7)) >> 8;
3967
2.53k
                minmax_updated_y = 1;
3968
2.53k
            }
3969
8.89k
            else if((mvdy_min < 0) && ((ps_cur_cluster_64->max_y << 8) < mvy_inp_q8))
3970
2.21k
            {
3971
2.21k
                ps_cur_cluster_64->max_y = (mvy_inp_q8 + (1 << 7)) >> 8;
3972
2.21k
                minmax_updated_y = 2;
3973
2.21k
            }
3974
3975
11.4k
            switch((minmax_updated_y << 2) + minmax_updated_x)
3976
11.4k
            {
3977
761
            case 1:
3978
761
            {
3979
761
                S32 mvd, mvd_q8;
3980
3981
761
                mvd_q8 = ps_centroid->i4_pos_x_q8 - (ps_cur_cluster_64->min_x << 8);
3982
761
                mvd = (mvd_q8 + (1 << 7)) >> 8;
3983
3984
761
                if(mvd > (mvd_limit))
3985
0
                {
3986
0
                    ps_cur_cluster_64->max_dist_from_centroid = mvd;
3987
0
                }
3988
761
                break;
3989
0
            }
3990
641
            case 2:
3991
641
            {
3992
641
                S32 mvd, mvd_q8;
3993
3994
641
                mvd_q8 = (ps_cur_cluster_64->max_x << 8) - ps_centroid->i4_pos_x_q8;
3995
641
                mvd = (mvd_q8 + (1 << 7)) >> 8;
3996
3997
641
                if(mvd > (mvd_limit))
3998
0
                {
3999
0
                    ps_cur_cluster_64->max_dist_from_centroid = mvd;
4000
0
                }
4001
641
                break;
4002
0
            }
4003
1.41k
            case 4:
4004
1.41k
            {
4005
1.41k
                S32 mvd, mvd_q8;
4006
4007
1.41k
                mvd_q8 = ps_centroid->i4_pos_y_q8 - (ps_cur_cluster_64->min_y << 8);
4008
1.41k
                mvd = (mvd_q8 + (1 << 7)) >> 8;
4009
4010
1.41k
                if(mvd > (mvd_limit))
4011
1
                {
4012
1
                    ps_cur_cluster_64->max_dist_from_centroid = mvd;
4013
1
                }
4014
1.41k
                break;
4015
0
            }
4016
618
            case 5:
4017
618
            {
4018
618
                S32 mvd;
4019
618
                S32 mvdx, mvdx_q8;
4020
618
                S32 mvdy, mvdy_q8;
4021
4022
618
                mvdy_q8 = ps_centroid->i4_pos_y_q8 - (ps_cur_cluster_64->min_y << 8);
4023
618
                mvdy = (mvdy_q8 + (1 << 7)) >> 8;
4024
4025
618
                mvdx_q8 = ps_centroid->i4_pos_x_q8 - (ps_cur_cluster_64->min_x << 8);
4026
618
                mvdx = (mvdx_q8 + (1 << 7)) >> 8;
4027
4028
618
                mvd = (mvdx > mvdy) ? mvdx : mvdy;
4029
4030
618
                if(mvd > mvd_limit)
4031
0
                {
4032
0
                    ps_cur_cluster_64->max_dist_from_centroid = mvd;
4033
0
                }
4034
618
                break;
4035
0
            }
4036
495
            case 6:
4037
495
            {
4038
495
                S32 mvd;
4039
495
                S32 mvdx, mvdx_q8;
4040
495
                S32 mvdy, mvdy_q8;
4041
4042
495
                mvdy_q8 = ps_centroid->i4_pos_y_q8 - (ps_cur_cluster_64->min_y << 8);
4043
495
                mvdy = (mvdy_q8 + (1 << 7)) >> 8;
4044
4045
495
                mvdx_q8 = (ps_cur_cluster_64->max_x << 8) - ps_centroid->i4_pos_x_q8;
4046
495
                mvdx = (mvdx_q8 + (1 << 7)) >> 8;
4047
4048
495
                mvd = (mvdx > mvdy) ? mvdx : mvdy;
4049
4050
495
                if(mvd > mvd_limit)
4051
0
                {
4052
0
                    ps_cur_cluster_64->max_dist_from_centroid = mvd;
4053
0
                }
4054
495
                break;
4055
0
            }
4056
1.13k
            case 8:
4057
1.13k
            {
4058
1.13k
                S32 mvd, mvd_q8;
4059
4060
1.13k
                mvd_q8 = (ps_cur_cluster_64->max_y << 8) - ps_centroid->i4_pos_y_q8;
4061
1.13k
                mvd = (mvd_q8 + (1 << 7)) >> 8;
4062
4063
1.13k
                if(mvd > (mvd_limit))
4064
3
                {
4065
3
                    ps_cur_cluster_64->max_dist_from_centroid = mvd;
4066
3
                }
4067
1.13k
                break;
4068
0
            }
4069
597
            case 9:
4070
597
            {
4071
597
                S32 mvd;
4072
597
                S32 mvdx, mvdx_q8;
4073
597
                S32 mvdy, mvdy_q8;
4074
4075
597
                mvdx_q8 = ps_centroid->i4_pos_x_q8 - (ps_cur_cluster_64->min_x << 8);
4076
597
                mvdx = (mvdx_q8 + (1 << 7)) >> 8;
4077
4078
597
                mvdy_q8 = (ps_cur_cluster_64->max_y << 8) - ps_centroid->i4_pos_y_q8;
4079
597
                mvdy = (mvdy_q8 + (1 << 7)) >> 8;
4080
4081
597
                mvd = (mvdx > mvdy) ? mvdx : mvdy;
4082
4083
597
                if(mvd > mvd_limit)
4084
0
                {
4085
0
                    ps_cur_cluster_64->max_dist_from_centroid = mvd;
4086
0
                }
4087
597
                break;
4088
0
            }
4089
479
            case 10:
4090
479
            {
4091
479
                S32 mvd;
4092
479
                S32 mvdx, mvdx_q8;
4093
479
                S32 mvdy, mvdy_q8;
4094
4095
479
                mvdx_q8 = (ps_cur_cluster_64->max_x << 8) - ps_centroid->i4_pos_x_q8;
4096
479
                mvdx = (mvdx_q8 + (1 << 7)) >> 8;
4097
4098
479
                mvdy_q8 = (ps_cur_cluster_64->max_y << 8) - ps_centroid->i4_pos_y_q8;
4099
479
                mvdy = (mvdy_q8 + (1 << 7)) >> 8;
4100
4101
479
                mvd = (mvdx > mvdy) ? mvdx : mvdy;
4102
4103
479
                if(mvd > ps_cur_cluster_64->max_dist_from_centroid)
4104
0
                {
4105
0
                    ps_cur_cluster_64->max_dist_from_centroid = mvd;
4106
0
                }
4107
479
                break;
4108
0
            }
4109
5.27k
            default:
4110
5.27k
            {
4111
5.27k
                break;
4112
0
            }
4113
11.4k
            }
4114
4115
11.4k
            i8_updated_posx = ((LWORD64)ps_centroid->i4_pos_x_q8 * ps_cur_cluster_64->num_mvs) +
4116
11.4k
                              ((LWORD64)mvx_inp_q8 * ps_cluster_data->num_mvs);
4117
11.4k
            i8_updated_posy = ((LWORD64)ps_centroid->i4_pos_y_q8 * ps_cur_cluster_64->num_mvs) +
4118
11.4k
                              ((LWORD64)mvy_inp_q8 * ps_cluster_data->num_mvs);
4119
4120
11.4k
            ps_cur_cluster_64->num_mvs += ps_cluster_data->num_mvs;
4121
4122
11.4k
            ps_centroid->i4_pos_x_q8 = (WORD32)(i8_updated_posx / ps_cur_cluster_64->num_mvs);
4123
11.4k
            ps_centroid->i4_pos_y_q8 = (WORD32)(i8_updated_posy / ps_cur_cluster_64->num_mvs);
4124
11.4k
        }
4125
16.2k
        else if(num_clusters < MAX_NUM_CLUSTERS_64x64)
4126
14.0k
        {
4127
14.0k
            ps_cur_cluster_64 = &ps_blk_64x64->as_cluster_data[num_clusters];
4128
4129
14.0k
            ps_blk_64x64->num_clusters++;
4130
14.0k
            ps_blk_64x64->au1_num_clusters[ps_cluster_data->ref_id]++;
4131
4132
14.0k
            ps_cur_cluster_64->is_valid_cluster = 1;
4133
4134
14.0k
            ps_cur_cluster_64->area_in_pixels = ps_cluster_data->area_in_pixels;
4135
14.0k
            ps_cur_cluster_64->bi_mv_pixel_area += ps_cluster_data->bi_mv_pixel_area;
4136
14.0k
            ps_cur_cluster_64->uni_mv_pixel_area += ps_cluster_data->uni_mv_pixel_area;
4137
4138
14.0k
            memcpy(
4139
14.0k
                &ps_cur_cluster_64->as_mv[0],
4140
14.0k
                ps_cluster_data->as_mv,
4141
14.0k
                sizeof(mv_data_t) * ps_cluster_data->num_mvs);
4142
4143
14.0k
            ps_cur_cluster_64->num_mvs = ps_cluster_data->num_mvs;
4144
4145
14.0k
            ps_cur_cluster_64->ref_id = ps_cluster_data->ref_id;
4146
4147
14.0k
            ps_cur_cluster_64->max_x = ps_cluster_data->max_x;
4148
14.0k
            ps_cur_cluster_64->max_y = ps_cluster_data->max_y;
4149
14.0k
            ps_cur_cluster_64->min_x = ps_cluster_data->min_x;
4150
14.0k
            ps_cur_cluster_64->min_y = ps_cluster_data->min_y;
4151
4152
14.0k
            ps_cur_cluster_64->s_centroid = ps_cluster_data->s_centroid;
4153
14.0k
        }
4154
27.6k
    }
4155
30.3k
}
4156
4157
/**
4158
********************************************************************************
4159
*  @fn   void hme_update_32x32_clusters
4160
*               (
4161
*                   cluster_32x32_blk_t *ps_blk_32x32,
4162
*                   cluster_16x16_blk_t *ps_blk_16x16
4163
*               )
4164
*
4165
*  @brief  Updates attributes for 32x32 clusters based on the attributes of
4166
*          the constituent 16x16 clusters
4167
*
4168
*  @param[out]  ps_blk_32x32: structure containing 32x32 block results
4169
*
4170
*  @param[in]  ps_blk_16x16 : structure containing 16x16 block results
4171
*
4172
*  @return None
4173
********************************************************************************
4174
*/
4175
static __inline void
4176
    hme_update_32x32_clusters(cluster_32x32_blk_t *ps_blk_32x32, cluster_16x16_blk_t *ps_blk_16x16)
4177
11.6k
{
4178
11.6k
    cluster_16x16_blk_t *ps_blk_16x16_cur;
4179
11.6k
    cluster_data_t *ps_cur_cluster;
4180
4181
11.6k
    S32 i, j;
4182
11.6k
    S32 num_clusters_cur_16x16_blk;
4183
4184
58.1k
    for(i = 0; i < 4; i++)
4185
46.5k
    {
4186
46.5k
        S32 num_clusters_evaluated = 0;
4187
4188
46.5k
        ps_blk_16x16_cur = &ps_blk_16x16[i];
4189
4190
46.5k
        num_clusters_cur_16x16_blk = ps_blk_16x16_cur->num_clusters;
4191
4192
46.5k
        ps_blk_32x32->intra_mv_area += ps_blk_16x16_cur->intra_mv_area;
4193
4194
46.5k
        ps_blk_32x32->best_inter_cost += ps_blk_16x16_cur->best_inter_cost;
4195
4196
98.8k
        for(j = 0; num_clusters_evaluated < num_clusters_cur_16x16_blk; j++)
4197
52.3k
        {
4198
52.3k
            ps_cur_cluster = &ps_blk_16x16_cur->as_cluster_data[j];
4199
4200
52.3k
            if(!ps_cur_cluster->is_valid_cluster)
4201
1
            {
4202
1
                continue;
4203
1
            }
4204
4205
52.3k
            hme_update_32x32_cluster_attributes(ps_blk_32x32, ps_cur_cluster);
4206
4207
52.3k
            num_clusters_evaluated++;
4208
52.3k
        }
4209
46.5k
    }
4210
11.6k
}
4211
4212
/**
4213
********************************************************************************
4214
*  @fn   void hme_update_64x64_clusters
4215
*               (
4216
*                   cluster_64x64_blk_t *ps_blk_64x64,
4217
*                   cluster_32x32_blk_t *ps_blk_32x32
4218
*               )
4219
*
4220
*  @brief  Updates attributes for 64x64 clusters based on the attributes of
4221
*          the constituent 16x16 clusters
4222
*
4223
*  @param[out]  ps_blk_64x64: structure containing 32x32 block results
4224
*
4225
*  @param[in]  ps_blk_32x32 : structure containing 16x16 block results
4226
*
4227
*  @return None
4228
********************************************************************************
4229
*/
4230
static __inline void
4231
    hme_update_64x64_clusters(cluster_64x64_blk_t *ps_blk_64x64, cluster_32x32_blk_t *ps_blk_32x32)
4232
3.61k
{
4233
3.61k
    cluster_32x32_blk_t *ps_blk_32x32_cur;
4234
3.61k
    cluster_data_t *ps_cur_cluster;
4235
4236
3.61k
    S32 i, j;
4237
3.61k
    S32 num_clusters_cur_32x32_blk;
4238
4239
18.0k
    for(i = 0; i < 4; i++)
4240
14.4k
    {
4241
14.4k
        S32 num_clusters_evaluated = 0;
4242
4243
14.4k
        ps_blk_32x32_cur = &ps_blk_32x32[i];
4244
4245
14.4k
        num_clusters_cur_32x32_blk = ps_blk_32x32_cur->num_clusters;
4246
4247
14.4k
        ps_blk_64x64->intra_mv_area += ps_blk_32x32_cur->intra_mv_area;
4248
14.4k
        ps_blk_64x64->best_inter_cost += ps_blk_32x32_cur->best_inter_cost;
4249
4250
45.5k
        for(j = 0; num_clusters_evaluated < num_clusters_cur_32x32_blk; j++)
4251
31.0k
        {
4252
31.0k
            ps_cur_cluster = &ps_blk_32x32_cur->as_cluster_data[j];
4253
4254
31.0k
            if(!ps_cur_cluster->is_valid_cluster)
4255
704
            {
4256
704
                continue;
4257
704
            }
4258
4259
30.3k
            hme_update_64x64_cluster_attributes(ps_blk_64x64, ps_cur_cluster);
4260
4261
30.3k
            num_clusters_evaluated++;
4262
30.3k
        }
4263
14.4k
    }
4264
3.61k
}
4265
4266
/**
4267
********************************************************************************
4268
*  @fn   void hme_try_merge_clusters_blksize_gt_16
4269
*               (
4270
*                   cluster_data_t *ps_cluster_data,
4271
*                   S32 num_clusters
4272
*               )
4273
*
4274
*  @brief  Merging clusters from blocks of size 32x32 and greater
4275
*
4276
*  @param[in/out]  ps_cluster_data: structure containing cluster data
4277
*
4278
*  @param[in/out]  pi4_num_clusters : pointer to number of clusters
4279
*
4280
*  @return Success or failure
4281
********************************************************************************
4282
*/
4283
S32 hme_try_merge_clusters_blksize_gt_16(cluster_data_t *ps_cluster_data, S32 num_clusters)
4284
26.9k
{
4285
26.9k
    centroid_t *ps_cur_centroid;
4286
26.9k
    cluster_data_t *ps_cur_cluster;
4287
4288
26.9k
    S32 i, mvd;
4289
26.9k
    S32 mvdx, mvdy, mvdx_q8, mvdy_q8;
4290
4291
26.9k
    centroid_t *ps_centroid = &ps_cluster_data->s_centroid;
4292
4293
26.9k
    S32 mvd_limit = ps_cluster_data->max_dist_from_centroid;
4294
26.9k
    S32 ref_id = ps_cluster_data->ref_id;
4295
4296
26.9k
    S32 node0_posx_q8 = ps_centroid->i4_pos_x_q8;
4297
26.9k
    S32 node0_posy_q8 = ps_centroid->i4_pos_y_q8;
4298
26.9k
    S32 num_clusters_evaluated = 1;
4299
26.9k
    S32 ret_value = 0;
4300
4301
26.9k
    if(1 >= num_clusters)
4302
3.84k
    {
4303
3.84k
        return ret_value;
4304
3.84k
    }
4305
4306
111k
    for(i = 1; num_clusters_evaluated < num_clusters; i++)
4307
88.0k
    {
4308
88.0k
        S32 cur_posx_q8;
4309
88.0k
        S32 cur_posy_q8;
4310
4311
88.0k
        ps_cur_cluster = &ps_cluster_data[i];
4312
4313
88.0k
        if((ref_id != ps_cur_cluster->ref_id))
4314
31.8k
        {
4315
31.8k
            num_clusters_evaluated++;
4316
31.8k
            continue;
4317
31.8k
        }
4318
4319
56.2k
        if((!ps_cur_cluster->is_valid_cluster))
4320
0
        {
4321
0
            continue;
4322
0
        }
4323
4324
56.2k
        num_clusters_evaluated++;
4325
4326
56.2k
        ps_cur_centroid = &ps_cur_cluster->s_centroid;
4327
4328
56.2k
        cur_posx_q8 = ps_cur_centroid->i4_pos_x_q8;
4329
56.2k
        cur_posy_q8 = ps_cur_centroid->i4_pos_y_q8;
4330
4331
56.2k
        mvdx_q8 = cur_posx_q8 - node0_posx_q8;
4332
56.2k
        mvdy_q8 = cur_posy_q8 - node0_posy_q8;
4333
4334
56.2k
        mvdx = (mvdx_q8 + (1 << 7)) >> 8;
4335
56.2k
        mvdy = (mvdy_q8 + (1 << 7)) >> 8;
4336
4337
56.2k
        mvd = ABS(mvdx) + ABS(mvdy);
4338
4339
56.2k
        if(mvd <= (mvd_limit >> 1))
4340
1
        {
4341
1
            LWORD64 i8_updated_posx;
4342
1
            LWORD64 i8_updated_posy;
4343
1
            WORD32 minmax_updated_x = 0;
4344
1
            WORD32 minmax_updated_y = 0;
4345
4346
1
            ps_cur_cluster->is_valid_cluster = 0;
4347
4348
1
            ps_cluster_data->area_in_pixels += ps_cur_cluster->area_in_pixels;
4349
1
            ps_cluster_data->bi_mv_pixel_area += ps_cur_cluster->bi_mv_pixel_area;
4350
1
            ps_cluster_data->uni_mv_pixel_area += ps_cur_cluster->uni_mv_pixel_area;
4351
4352
1
            memcpy(
4353
1
                &ps_cluster_data->as_mv[ps_cluster_data->num_mvs],
4354
1
                ps_cur_cluster->as_mv,
4355
1
                sizeof(mv_data_t) * ps_cur_cluster->num_mvs);
4356
4357
1
            if(mvdx > 0)
4358
0
            {
4359
0
                ps_cluster_data->min_x = (cur_posx_q8 + (1 << 7)) >> 8;
4360
0
                minmax_updated_x = 1;
4361
0
            }
4362
1
            else
4363
1
            {
4364
1
                ps_cluster_data->max_x = (cur_posx_q8 + (1 << 7)) >> 8;
4365
1
                minmax_updated_x = 2;
4366
1
            }
4367
4368
1
            if(mvdy > 0)
4369
0
            {
4370
0
                ps_cluster_data->min_y = (cur_posy_q8 + (1 << 7)) >> 8;
4371
0
                minmax_updated_y = 1;
4372
0
            }
4373
1
            else
4374
1
            {
4375
1
                ps_cluster_data->max_y = (cur_posy_q8 + (1 << 7)) >> 8;
4376
1
                minmax_updated_y = 2;
4377
1
            }
4378
4379
1
            switch((minmax_updated_y << 2) + minmax_updated_x)
4380
1
            {
4381
0
            case 1:
4382
0
            {
4383
0
                S32 mvd, mvd_q8;
4384
4385
0
                mvd_q8 = ps_cur_centroid->i4_pos_x_q8 - (ps_cluster_data->min_x << 8);
4386
0
                mvd = (mvd_q8 + (1 << 7)) >> 8;
4387
4388
0
                if(mvd > (mvd_limit))
4389
0
                {
4390
0
                    ps_cluster_data->max_dist_from_centroid = mvd;
4391
0
                }
4392
0
                break;
4393
0
            }
4394
0
            case 2:
4395
0
            {
4396
0
                S32 mvd, mvd_q8;
4397
4398
0
                mvd_q8 = (ps_cluster_data->max_x << 8) - ps_cur_centroid->i4_pos_x_q8;
4399
0
                mvd = (mvd_q8 + (1 << 7)) >> 8;
4400
4401
0
                if(mvd > (mvd_limit))
4402
0
                {
4403
0
                    ps_cluster_data->max_dist_from_centroid = mvd;
4404
0
                }
4405
0
                break;
4406
0
            }
4407
0
            case 4:
4408
0
            {
4409
0
                S32 mvd, mvd_q8;
4410
4411
0
                mvd_q8 = ps_cur_centroid->i4_pos_y_q8 - (ps_cluster_data->min_y << 8);
4412
0
                mvd = (mvd_q8 + (1 << 7)) >> 8;
4413
4414
0
                if(mvd > (mvd_limit))
4415
0
                {
4416
0
                    ps_cluster_data->max_dist_from_centroid = mvd;
4417
0
                }
4418
0
                break;
4419
0
            }
4420
0
            case 5:
4421
0
            {
4422
0
                S32 mvd;
4423
0
                S32 mvdx, mvdx_q8;
4424
0
                S32 mvdy, mvdy_q8;
4425
4426
0
                mvdy_q8 = ps_cur_centroid->i4_pos_y_q8 - (ps_cluster_data->min_y << 8);
4427
0
                mvdy = (mvdy_q8 + (1 << 7)) >> 8;
4428
4429
0
                mvdx_q8 = ps_cur_centroid->i4_pos_x_q8 - (ps_cluster_data->min_x << 8);
4430
0
                mvdx = (mvdx_q8 + (1 << 7)) >> 8;
4431
4432
0
                mvd = (mvdx > mvdy) ? mvdx : mvdy;
4433
4434
0
                if(mvd > mvd_limit)
4435
0
                {
4436
0
                    ps_cluster_data->max_dist_from_centroid = mvd;
4437
0
                }
4438
0
                break;
4439
0
            }
4440
0
            case 6:
4441
0
            {
4442
0
                S32 mvd;
4443
0
                S32 mvdx, mvdx_q8;
4444
0
                S32 mvdy, mvdy_q8;
4445
4446
0
                mvdy_q8 = ps_cur_centroid->i4_pos_y_q8 - (ps_cluster_data->min_y << 8);
4447
0
                mvdy = (mvdy_q8 + (1 << 7)) >> 8;
4448
4449
0
                mvdx_q8 = (ps_cluster_data->max_x << 8) - ps_cur_centroid->i4_pos_x_q8;
4450
0
                mvdx = (mvdx_q8 + (1 << 7)) >> 8;
4451
4452
0
                mvd = (mvdx > mvdy) ? mvdx : mvdy;
4453
4454
0
                if(mvd > mvd_limit)
4455
0
                {
4456
0
                    ps_cluster_data->max_dist_from_centroid = mvd;
4457
0
                }
4458
0
                break;
4459
0
            }
4460
0
            case 8:
4461
0
            {
4462
0
                S32 mvd, mvd_q8;
4463
4464
0
                mvd_q8 = (ps_cluster_data->max_y << 8) - ps_cur_centroid->i4_pos_y_q8;
4465
0
                mvd = (mvd_q8 + (1 << 7)) >> 8;
4466
4467
0
                if(mvd > (mvd_limit))
4468
0
                {
4469
0
                    ps_cluster_data->max_dist_from_centroid = mvd;
4470
0
                }
4471
0
                break;
4472
0
            }
4473
0
            case 9:
4474
0
            {
4475
0
                S32 mvd;
4476
0
                S32 mvdx, mvdx_q8;
4477
0
                S32 mvdy, mvdy_q8;
4478
4479
0
                mvdx_q8 = ps_cur_centroid->i4_pos_x_q8 - (ps_cluster_data->min_x << 8);
4480
0
                mvdx = (mvdx_q8 + (1 << 7)) >> 8;
4481
4482
0
                mvdy_q8 = (ps_cluster_data->max_y << 8) - ps_cur_centroid->i4_pos_y_q8;
4483
0
                mvdy = (mvdy_q8 + (1 << 7)) >> 8;
4484
4485
0
                mvd = (mvdx > mvdy) ? mvdx : mvdy;
4486
4487
0
                if(mvd > mvd_limit)
4488
0
                {
4489
0
                    ps_cluster_data->max_dist_from_centroid = mvd;
4490
0
                }
4491
0
                break;
4492
0
            }
4493
1
            case 10:
4494
1
            {
4495
1
                S32 mvd;
4496
1
                S32 mvdx, mvdx_q8;
4497
1
                S32 mvdy, mvdy_q8;
4498
4499
1
                mvdx_q8 = (ps_cluster_data->max_x << 8) - ps_cur_centroid->i4_pos_x_q8;
4500
1
                mvdx = (mvdx_q8 + (1 << 7)) >> 8;
4501
4502
1
                mvdy_q8 = (ps_cluster_data->max_y << 8) - ps_cur_centroid->i4_pos_y_q8;
4503
1
                mvdy = (mvdy_q8 + (1 << 7)) >> 8;
4504
4505
1
                mvd = (mvdx > mvdy) ? mvdx : mvdy;
4506
4507
1
                if(mvd > ps_cluster_data->max_dist_from_centroid)
4508
0
                {
4509
0
                    ps_cluster_data->max_dist_from_centroid = mvd;
4510
0
                }
4511
1
                break;
4512
0
            }
4513
0
            default:
4514
0
            {
4515
0
                break;
4516
0
            }
4517
1
            }
4518
4519
1
            i8_updated_posx = ((LWORD64)ps_centroid->i4_pos_x_q8 * ps_cluster_data->num_mvs) +
4520
1
                              ((LWORD64)cur_posx_q8 * ps_cur_cluster->num_mvs);
4521
1
            i8_updated_posy = ((LWORD64)ps_centroid->i4_pos_y_q8 * ps_cluster_data->num_mvs) +
4522
1
                              ((LWORD64)cur_posy_q8 * ps_cur_cluster->num_mvs);
4523
4524
1
            ps_cluster_data->num_mvs += ps_cur_cluster->num_mvs;
4525
4526
1
            ps_centroid->i4_pos_x_q8 = (WORD32)(i8_updated_posx / ps_cluster_data->num_mvs);
4527
1
            ps_centroid->i4_pos_y_q8 = (WORD32)(i8_updated_posy / ps_cluster_data->num_mvs);
4528
4529
1
            if(MAX_NUM_CLUSTERS_IN_VALID_64x64_BLK >= num_clusters)
4530
0
            {
4531
0
                num_clusters--;
4532
0
                num_clusters_evaluated = 1;
4533
0
                i = 0;
4534
0
                ret_value++;
4535
0
            }
4536
1
            else
4537
1
            {
4538
1
                ret_value++;
4539
4540
1
                return ret_value;
4541
1
            }
4542
1
        }
4543
56.2k
    }
4544
4545
23.1k
    if(ret_value)
4546
0
    {
4547
0
        for(i = 1; i < (num_clusters + ret_value); i++)
4548
0
        {
4549
0
            if(ps_cluster_data[i].is_valid_cluster)
4550
0
            {
4551
0
                break;
4552
0
            }
4553
0
        }
4554
0
        if(i == (num_clusters + ret_value))
4555
0
        {
4556
0
            return ret_value;
4557
0
        }
4558
0
    }
4559
23.1k
    else
4560
23.1k
    {
4561
23.1k
        i = 1;
4562
23.1k
    }
4563
4564
23.1k
    return (hme_try_merge_clusters_blksize_gt_16(&ps_cluster_data[i], num_clusters - 1)) +
4565
23.1k
           ret_value;
4566
23.1k
}
4567
4568
/**
4569
********************************************************************************
4570
*  @fn   S32 hme_determine_validity_32x32
4571
*               (
4572
*                   ctb_cluster_info_t *ps_ctb_cluster_info
4573
*               )
4574
*
4575
*  @brief  Determines whther current 32x32 block needs to be evaluated in enc_loop
4576
*           while recursing through the CU tree or not
4577
*
4578
*  @param[in]  ps_cluster_data: structure containing cluster data
4579
*
4580
*  @return Success or failure
4581
********************************************************************************
4582
*/
4583
__inline S32 hme_determine_validity_32x32(
4584
    ctb_cluster_info_t *ps_ctb_cluster_info,
4585
    S32 *pi4_children_nodes_required,
4586
    S32 blk_validity_wrt_pic_bndry,
4587
    S32 parent_blk_validity_wrt_pic_bndry)
4588
0
{
4589
0
    cluster_data_t *ps_data;
4590
0
4591
0
    cluster_32x32_blk_t *ps_32x32_blk = ps_ctb_cluster_info->ps_32x32_blk;
4592
0
    cluster_64x64_blk_t *ps_64x64_blk = ps_ctb_cluster_info->ps_64x64_blk;
4593
0
4594
0
    S32 num_clusters = ps_32x32_blk->num_clusters;
4595
0
    S32 num_clusters_parent = ps_64x64_blk->num_clusters;
4596
0
4597
0
    if(!blk_validity_wrt_pic_bndry)
4598
0
    {
4599
0
        *pi4_children_nodes_required = 1;
4600
0
        return 0;
4601
0
    }
4602
0
4603
0
    if(!parent_blk_validity_wrt_pic_bndry)
4604
0
    {
4605
0
        *pi4_children_nodes_required = 1;
4606
0
        return 1;
4607
0
    }
4608
0
4609
0
    if(num_clusters > MAX_NUM_CLUSTERS_IN_VALID_32x32_BLK)
4610
0
    {
4611
0
        *pi4_children_nodes_required = 1;
4612
0
        return 0;
4613
0
    }
4614
0
4615
0
    if(num_clusters_parent > MAX_NUM_CLUSTERS_IN_VALID_64x64_BLK)
4616
0
    {
4617
0
        *pi4_children_nodes_required = 1;
4618
0
4619
0
        return 1;
4620
0
    }
4621
0
    else if(num_clusters_parent < MAX_NUM_CLUSTERS_IN_VALID_64x64_BLK)
4622
0
    {
4623
0
        *pi4_children_nodes_required = 0;
4624
0
4625
0
        return 1;
4626
0
    }
4627
0
    else
4628
0
    {
4629
0
        if(num_clusters < MAX_NUM_CLUSTERS_IN_VALID_32x32_BLK)
4630
0
        {
4631
0
            *pi4_children_nodes_required = 0;
4632
0
            return 1;
4633
0
        }
4634
0
        else
4635
0
        {
4636
0
            S32 i;
4637
0
4638
0
            S32 area_of_parent = gai4_partition_area[PART_ID_2Nx2N] << 4;
4639
0
            S32 min_area = MAX_32BIT_VAL;
4640
0
            S32 num_clusters_evaluated = 0;
4641
0
4642
0
            for(i = 0; num_clusters_evaluated < num_clusters; i++)
4643
0
            {
4644
0
                ps_data = &ps_32x32_blk->as_cluster_data[i];
4645
0
4646
0
                if(!ps_data->is_valid_cluster)
4647
0
                {
4648
0
                    continue;
4649
0
                }
4650
0
4651
0
                num_clusters_evaluated++;
4652
0
4653
0
                if(ps_data->area_in_pixels < min_area)
4654
0
                {
4655
0
                    min_area = ps_data->area_in_pixels;
4656
0
                }
4657
0
            }
4658
0
4659
0
            if((min_area << 4) < area_of_parent)
4660
0
            {
4661
0
                *pi4_children_nodes_required = 1;
4662
0
                return 0;
4663
0
            }
4664
0
            else
4665
0
            {
4666
0
                *pi4_children_nodes_required = 0;
4667
0
                return 1;
4668
0
            }
4669
0
        }
4670
0
    }
4671
0
}
4672
4673
/**
4674
********************************************************************************
4675
*  @fn   S32 hme_determine_validity_16x16
4676
*               (
4677
*                   ctb_cluster_info_t *ps_ctb_cluster_info
4678
*               )
4679
*
4680
*  @brief  Determines whther current 16x16 block needs to be evaluated in enc_loop
4681
*           while recursing through the CU tree or not
4682
*
4683
*  @param[in]  ps_cluster_data: structure containing cluster data
4684
*
4685
*  @return Success or failure
4686
********************************************************************************
4687
*/
4688
__inline S32 hme_determine_validity_16x16(
4689
    ctb_cluster_info_t *ps_ctb_cluster_info,
4690
    S32 *pi4_children_nodes_required,
4691
    S32 blk_validity_wrt_pic_bndry,
4692
    S32 parent_blk_validity_wrt_pic_bndry)
4693
0
{
4694
0
    cluster_data_t *ps_data;
4695
0
4696
0
    cluster_16x16_blk_t *ps_16x16_blk = ps_ctb_cluster_info->ps_16x16_blk;
4697
0
    cluster_32x32_blk_t *ps_32x32_blk = ps_ctb_cluster_info->ps_32x32_blk;
4698
0
    cluster_64x64_blk_t *ps_64x64_blk = ps_ctb_cluster_info->ps_64x64_blk;
4699
0
4700
0
    S32 num_clusters = ps_16x16_blk->num_clusters;
4701
0
    S32 num_clusters_parent = ps_32x32_blk->num_clusters;
4702
0
    S32 num_clusters_grandparent = ps_64x64_blk->num_clusters;
4703
0
4704
0
    if(!blk_validity_wrt_pic_bndry)
4705
0
    {
4706
0
        *pi4_children_nodes_required = 1;
4707
0
        return 0;
4708
0
    }
4709
0
4710
0
    if(!parent_blk_validity_wrt_pic_bndry)
4711
0
    {
4712
0
        *pi4_children_nodes_required = 1;
4713
0
        return 1;
4714
0
    }
4715
0
4716
0
    if((num_clusters_parent > MAX_NUM_CLUSTERS_IN_VALID_32x32_BLK) &&
4717
0
       (num_clusters_grandparent > MAX_NUM_CLUSTERS_IN_VALID_64x64_BLK))
4718
0
    {
4719
0
        *pi4_children_nodes_required = 1;
4720
0
        return 1;
4721
0
    }
4722
0
4723
0
    /* Implies nc_64 <= 3 when num_clusters_parent > 3 & */
4724
0
    /* implies nc_64 > 3 when num_clusters_parent < 3 & */
4725
0
    if(num_clusters_parent != MAX_NUM_CLUSTERS_IN_VALID_32x32_BLK)
4726
0
    {
4727
0
        if(num_clusters <= MAX_NUM_CLUSTERS_IN_VALID_16x16_BLK)
4728
0
        {
4729
0
            *pi4_children_nodes_required = 0;
4730
0
4731
0
            return 1;
4732
0
        }
4733
0
        else
4734
0
        {
4735
0
            *pi4_children_nodes_required = 1;
4736
0
4737
0
            return 0;
4738
0
        }
4739
0
    }
4740
0
    /* Implies nc_64 >= 3 */
4741
0
    else
4742
0
    {
4743
0
        if(num_clusters < MAX_NUM_CLUSTERS_IN_VALID_16x16_BLK)
4744
0
        {
4745
0
            *pi4_children_nodes_required = 0;
4746
0
            return 1;
4747
0
        }
4748
0
        else if(num_clusters > MAX_NUM_CLUSTERS_IN_VALID_16x16_BLK)
4749
0
        {
4750
0
            *pi4_children_nodes_required = 1;
4751
0
            return 0;
4752
0
        }
4753
0
        else
4754
0
        {
4755
0
            S32 i;
4756
0
4757
0
            S32 area_of_parent = gai4_partition_area[PART_ID_2Nx2N] << 2;
4758
0
            S32 min_area = MAX_32BIT_VAL;
4759
0
            S32 num_clusters_evaluated = 0;
4760
0
4761
0
            for(i = 0; num_clusters_evaluated < num_clusters; i++)
4762
0
            {
4763
0
                ps_data = &ps_16x16_blk->as_cluster_data[i];
4764
0
4765
0
                if(!ps_data->is_valid_cluster)
4766
0
                {
4767
0
                    continue;
4768
0
                }
4769
0
4770
0
                num_clusters_evaluated++;
4771
0
4772
0
                if(ps_data->area_in_pixels < min_area)
4773
0
                {
4774
0
                    min_area = ps_data->area_in_pixels;
4775
0
                }
4776
0
            }
4777
0
4778
0
            if((min_area << 4) < area_of_parent)
4779
0
            {
4780
0
                *pi4_children_nodes_required = 1;
4781
0
                return 0;
4782
0
            }
4783
0
            else
4784
0
            {
4785
0
                *pi4_children_nodes_required = 0;
4786
0
                return 1;
4787
0
            }
4788
0
        }
4789
0
    }
4790
0
}
4791
4792
/**
4793
********************************************************************************
4794
*  @fn   void hme_build_cu_tree
4795
*               (
4796
*                   ctb_cluster_info_t *ps_ctb_cluster_info,
4797
*                   cur_ctb_cu_tree_t *ps_cu_tree,
4798
*                   S32 tree_depth,
4799
*                   CU_POS_T e_grand_parent_blk_pos,
4800
*                   CU_POS_T e_parent_blk_pos,
4801
*                   CU_POS_T e_cur_blk_pos
4802
*               )
4803
*
4804
*  @brief  Recursive function for CU tree initialisation
4805
*
4806
*  @param[in]  ps_ctb_cluster_info: structure containing pointers to clusters
4807
*                                   corresponding to all block sizes from 64x64
4808
*                                   to 16x16
4809
*
4810
*  @param[in]  e_parent_blk_pos: position of parent block wrt its parent, if
4811
*                                applicable
4812
*
4813
*  @param[in]  e_cur_blk_pos: position of current block wrt parent
4814
*
4815
*  @param[out]  ps_cu_tree : represents CU tree used in CU recursion
4816
*
4817
*  @param[in]  tree_depth : specifies depth of the CU tree
4818
*
4819
*  @return Nothing
4820
********************************************************************************
4821
*/
4822
void hme_build_cu_tree(
4823
    ctb_cluster_info_t *ps_ctb_cluster_info,
4824
    cur_ctb_cu_tree_t *ps_cu_tree,
4825
    S32 tree_depth,
4826
    CU_POS_T e_grandparent_blk_pos,
4827
    CU_POS_T e_parent_blk_pos,
4828
    CU_POS_T e_cur_blk_pos)
4829
3.61k
{
4830
3.61k
    ihevce_cu_tree_init(
4831
3.61k
        ps_cu_tree,
4832
3.61k
        ps_ctb_cluster_info->ps_cu_tree_root,
4833
3.61k
        &ps_ctb_cluster_info->nodes_created_in_cu_tree,
4834
3.61k
        tree_depth,
4835
3.61k
        e_grandparent_blk_pos,
4836
3.61k
        e_parent_blk_pos,
4837
3.61k
        e_cur_blk_pos);
4838
3.61k
}
4839
4840
/**
4841
********************************************************************************
4842
*  @fn   S32 hme_sdi_based_cluster_spread_eligibility
4843
*               (
4844
*                   cluster_32x32_blk_t *ps_blk_32x32
4845
*               )
4846
*
4847
*  @brief  Determines whether the spread of high SDI MV's around each cluster
4848
*          center is below a pre-determined threshold
4849
*
4850
*  @param[in]  ps_blk_32x32: structure containing pointers to clusters
4851
*                                   corresponding to all block sizes from 64x64
4852
*                                   to 16x16
4853
*
4854
*  @return 1 if the spread is constrained, else 0
4855
********************************************************************************
4856
*/
4857
__inline S32
4858
    hme_sdi_based_cluster_spread_eligibility(cluster_32x32_blk_t *ps_blk_32x32, S32 sdi_threshold)
4859
0
{
4860
0
    S32 cumulative_mv_distance;
4861
0
    S32 i, j;
4862
0
    S32 num_high_sdi_mvs;
4863
0
4864
0
    S32 num_clusters = ps_blk_32x32->num_clusters;
4865
0
4866
0
    for(i = 0; i < num_clusters; i++)
4867
0
    {
4868
0
        cluster_data_t *ps_data = &ps_blk_32x32->as_cluster_data[i];
4869
0
4870
0
        num_high_sdi_mvs = 0;
4871
0
        cumulative_mv_distance = 0;
4872
0
4873
0
        for(j = 0; j < ps_data->num_mvs; j++)
4874
0
        {
4875
0
            mv_data_t *ps_mv = &ps_data->as_mv[j];
4876
0
4877
0
            if(ps_mv->sdi >= sdi_threshold)
4878
0
            {
4879
0
                num_high_sdi_mvs++;
4880
0
4881
0
                COMPUTE_MVD(ps_mv, ps_data, cumulative_mv_distance);
4882
0
            }
4883
0
        }
4884
0
4885
0
        if(cumulative_mv_distance > ((ps_data->max_dist_from_centroid >> 1) * num_high_sdi_mvs))
4886
0
        {
4887
0
            return 0;
4888
0
        }
4889
0
    }
4890
0
4891
0
    return 1;
4892
0
}
4893
4894
/**
4895
********************************************************************************
4896
*  @fn   S32 hme_populate_cu_tree
4897
*               (
4898
*                   ctb_cluster_info_t *ps_ctb_cluster_info,
4899
*                   ipe_l0_ctb_analyse_for_me_t *ps_cur_ipe_ctb,
4900
*                   cur_ctb_cu_tree_t *ps_cu_tree,
4901
*                   S32 tree_depth,
4902
*                   CU_POS_T e_parent_blk_pos,
4903
*                   CU_POS_T e_cur_blk_pos
4904
*               )
4905
*
4906
*  @brief  Recursive function for CU tree population based on output of
4907
*          clustering algorithm
4908
*
4909
*  @param[in]  ps_ctb_cluster_info: structure containing pointers to clusters
4910
*                                   corresponding to all block sizes from 64x64
4911
*                                   to 16x16
4912
*
4913
*  @param[in]  e_parent_blk_pos: position of parent block wrt its parent, if
4914
applicable
4915
*
4916
*  @param[in]  e_cur_blk_pos: position of current block wrt parent
4917
*
4918
*  @param[in]  ps_cur_ipe_ctb : output container for ipe analyses
4919
*
4920
*  @param[out]  ps_cu_tree : represents CU tree used in CU recursion
4921
*
4922
*  @param[in]  tree_depth : specifies depth of the CU tree
4923
*
4924
*  @param[in]  ipe_decision_precedence : specifies whether precedence should
4925
*               be given to decisions made either by IPE(1) or clustering algos.
4926
*
4927
*  @return 1 if re-evaluation of parent node's validity is not required,
4928
else 0
4929
********************************************************************************
4930
*/
4931
void hme_populate_cu_tree(
4932
    ctb_cluster_info_t *ps_ctb_cluster_info,
4933
    cur_ctb_cu_tree_t *ps_cu_tree,
4934
    S32 tree_depth,
4935
    ME_QUALITY_PRESETS_T e_quality_preset,
4936
    CU_POS_T e_grandparent_blk_pos,
4937
    CU_POS_T e_parent_blk_pos,
4938
    CU_POS_T e_cur_blk_pos)
4939
121k
{
4940
121k
    S32 area_of_cur_blk;
4941
121k
    S32 area_limit_for_me_decision_precedence;
4942
121k
    S32 children_nodes_required;
4943
121k
    S32 intra_mv_area;
4944
121k
    S32 intra_eval_enable;
4945
121k
    S32 inter_eval_enable;
4946
121k
    S32 ipe_decision_precedence;
4947
121k
    S32 node_validity;
4948
121k
    S32 num_clusters;
4949
4950
121k
    ipe_l0_ctb_analyse_for_me_t *ps_cur_ipe_ctb = ps_ctb_cluster_info->ps_cur_ipe_ctb;
4951
4952
121k
    if(NULL == ps_cu_tree)
4953
0
    {
4954
0
        return;
4955
0
    }
4956
4957
121k
    switch(tree_depth)
4958
121k
    {
4959
3.61k
    case 0:
4960
3.61k
    {
4961
        /* 64x64 block */
4962
3.61k
        S32 blk_32x32_mask = ps_ctb_cluster_info->blk_32x32_mask;
4963
4964
3.61k
        cluster_64x64_blk_t *ps_blk_64x64 = ps_ctb_cluster_info->ps_64x64_blk;
4965
4966
3.61k
        area_of_cur_blk = gai4_partition_area[PART_ID_2Nx2N] << 4;
4967
3.61k
        area_limit_for_me_decision_precedence = (area_of_cur_blk * MAX_INTRA_PERCENTAGE) / 100;
4968
3.61k
        children_nodes_required = 0;
4969
3.61k
        intra_mv_area = ps_blk_64x64->intra_mv_area;
4970
4971
3.61k
        ipe_decision_precedence = (intra_mv_area >= area_limit_for_me_decision_precedence);
4972
4973
3.61k
        intra_eval_enable = ipe_decision_precedence;
4974
3.61k
        inter_eval_enable = !!ps_blk_64x64->num_clusters;
4975
4976
3.61k
#if 1  //!PROCESS_GT_1CTB_VIA_CU_RECUR_IN_FAST_PRESETS
4977
3.61k
        if(e_quality_preset >= ME_HIGH_QUALITY)
4978
0
        {
4979
0
            inter_eval_enable = 1;
4980
0
            node_validity = (blk_32x32_mask == 0xf);
4981
0
#if !USE_CLUSTER_DATA_AS_BLK_MERGE_CANDTS
4982
0
            ps_cu_tree->u1_inter_eval_enable = !(intra_mv_area == area_of_cur_blk);
4983
0
#endif
4984
0
            break;
4985
0
        }
4986
3.61k
#endif
4987
4988
3.61k
#if ENABLE_4CTB_EVALUATION
4989
3.61k
        node_validity = (blk_32x32_mask == 0xf);
4990
4991
3.61k
        break;
4992
#else
4993
        {
4994
            S32 i;
4995
4996
            num_clusters = ps_blk_64x64->num_clusters;
4997
4998
            node_validity = (ipe_decision_precedence)
4999
                                ? (!ps_cur_ipe_ctb->u1_split_flag)
5000
                                : (num_clusters <= MAX_NUM_CLUSTERS_IN_VALID_64x64_BLK);
5001
5002
            for(i = 0; i < MAX_NUM_REF; i++)
5003
            {
5004
                node_validity = node_validity && (ps_blk_64x64->au1_num_clusters[i] <=
5005
                                                  MAX_NUM_CLUSTERS_IN_ONE_REF_IDX);
5006
            }
5007
5008
            node_validity = node_validity && (blk_32x32_mask == 0xf);
5009
        }
5010
        break;
5011
#endif
5012
3.61k
    }
5013
14.4k
    case 1:
5014
14.4k
    {
5015
        /* 32x32 block */
5016
14.4k
        S32 is_percent_intra_area_gt_threshold;
5017
5018
14.4k
        cluster_32x32_blk_t *ps_blk_32x32 = &ps_ctb_cluster_info->ps_32x32_blk[e_cur_blk_pos];
5019
5020
14.4k
        S32 blk_32x32_mask = ps_ctb_cluster_info->blk_32x32_mask;
5021
5022
#if !ENABLE_4CTB_EVALUATION
5023
        S32 best_inter_cost = ps_blk_32x32->best_inter_cost;
5024
        S32 best_intra_cost =
5025
            ((ps_ctb_cluster_info->ps_cur_ipe_ctb->ai4_best32x32_intra_cost[e_cur_blk_pos] +
5026
              ps_ctb_cluster_info->i4_frame_qstep * ps_ctb_cluster_info->i4_frame_qstep_multiplier *
5027
                  4) < 0)
5028
                ? MAX_32BIT_VAL
5029
                : (ps_ctb_cluster_info->ps_cur_ipe_ctb->ai4_best32x32_intra_cost[e_cur_blk_pos] +
5030
                   ps_ctb_cluster_info->i4_frame_qstep *
5031
                       ps_ctb_cluster_info->i4_frame_qstep_multiplier * 4);
5032
        S32 best_cost = (best_inter_cost > best_intra_cost) ? best_intra_cost : best_inter_cost;
5033
        S32 cost_differential = (best_inter_cost - best_cost);
5034
#endif
5035
5036
14.4k
        area_of_cur_blk = gai4_partition_area[PART_ID_2Nx2N] << 2;
5037
14.4k
        area_limit_for_me_decision_precedence = (area_of_cur_blk * MAX_INTRA_PERCENTAGE) / 100;
5038
14.4k
        intra_mv_area = ps_blk_32x32->intra_mv_area;
5039
14.4k
        is_percent_intra_area_gt_threshold =
5040
14.4k
            (intra_mv_area > area_limit_for_me_decision_precedence);
5041
14.4k
        ipe_decision_precedence = (intra_mv_area >= area_limit_for_me_decision_precedence);
5042
5043
14.4k
        intra_eval_enable = ipe_decision_precedence;
5044
14.4k
        inter_eval_enable = !!ps_blk_32x32->num_clusters;
5045
14.4k
        children_nodes_required = 1;
5046
5047
14.4k
#if 1  //!PROCESS_GT_1CTB_VIA_CU_RECUR_IN_FAST_PRESETS
5048
14.4k
        if(e_quality_preset >= ME_HIGH_QUALITY)
5049
0
        {
5050
0
            inter_eval_enable = 1;
5051
0
            node_validity = (((blk_32x32_mask) & (1 << e_cur_blk_pos)) || 0);
5052
0
#if !USE_CLUSTER_DATA_AS_BLK_MERGE_CANDTS
5053
0
            ps_cu_tree->u1_inter_eval_enable = !(intra_mv_area == area_of_cur_blk);
5054
0
#endif
5055
0
            break;
5056
0
        }
5057
14.4k
#endif
5058
5059
14.4k
#if ENABLE_4CTB_EVALUATION
5060
14.4k
        node_validity = (((blk_32x32_mask) & (1 << e_cur_blk_pos)) || 0);
5061
5062
14.4k
        break;
5063
#else
5064
        {
5065
            S32 i;
5066
            num_clusters = ps_blk_32x32->num_clusters;
5067
5068
            if(ipe_decision_precedence)
5069
            {
5070
                node_validity = (ps_cur_ipe_ctb->as_intra32_analyse[e_cur_blk_pos].b1_merge_flag);
5071
                node_validity = node_validity && (((blk_32x32_mask) & (1 << e_cur_blk_pos)) || 0);
5072
            }
5073
            else
5074
            {
5075
                node_validity =
5076
                    ((ALL_INTER_COST_DIFF_THR * best_cost) >= (100 * cost_differential)) &&
5077
                    (num_clusters <= MAX_NUM_CLUSTERS_IN_VALID_32x32_BLK) &&
5078
                    (((blk_32x32_mask) & (1 << e_cur_blk_pos)) || 0);
5079
5080
                for(i = 0; (i < MAX_NUM_REF) && (node_validity); i++)
5081
                {
5082
                    node_validity = node_validity && (ps_blk_32x32->au1_num_clusters[i] <=
5083
                                                      MAX_NUM_CLUSTERS_IN_ONE_REF_IDX);
5084
                }
5085
5086
                if(node_validity)
5087
                {
5088
                    node_validity = node_validity &&
5089
                                    hme_sdi_based_cluster_spread_eligibility(
5090
                                        ps_blk_32x32, ps_ctb_cluster_info->sdi_threshold);
5091
                }
5092
            }
5093
        }
5094
5095
        break;
5096
#endif
5097
14.4k
    }
5098
57.8k
    case 2:
5099
57.8k
    {
5100
57.8k
        cluster_16x16_blk_t *ps_blk_16x16 =
5101
57.8k
            &ps_ctb_cluster_info->ps_16x16_blk[e_cur_blk_pos + (e_parent_blk_pos << 2)];
5102
5103
57.8k
        S32 blk_8x8_mask =
5104
57.8k
            ps_ctb_cluster_info->pi4_blk_8x8_mask[(S32)(e_parent_blk_pos << 2) + e_cur_blk_pos];
5105
5106
57.8k
        area_of_cur_blk = gai4_partition_area[PART_ID_2Nx2N];
5107
57.8k
        area_limit_for_me_decision_precedence = (area_of_cur_blk * MAX_INTRA_PERCENTAGE) / 100;
5108
57.8k
        children_nodes_required = 1;
5109
57.8k
        intra_mv_area = ps_blk_16x16->intra_mv_area;
5110
57.8k
        ipe_decision_precedence = (intra_mv_area >= area_limit_for_me_decision_precedence);
5111
57.8k
        num_clusters = ps_blk_16x16->num_clusters;
5112
5113
57.8k
        intra_eval_enable = ipe_decision_precedence;
5114
57.8k
        inter_eval_enable = 1;
5115
5116
57.8k
#if 1  //!PROCESS_GT_1CTB_VIA_CU_RECUR_IN_FAST_PRESETS
5117
57.8k
        if(e_quality_preset >= ME_HIGH_QUALITY)
5118
0
        {
5119
0
            node_validity =
5120
0
                !ps_ctb_cluster_info
5121
0
                     ->au1_is_16x16_blk_split[(S32)(e_parent_blk_pos << 2) + e_cur_blk_pos];
5122
0
            children_nodes_required = !node_validity;
5123
0
            break;
5124
0
        }
5125
57.8k
#endif
5126
5127
57.8k
#if ENABLE_4CTB_EVALUATION
5128
57.8k
        node_validity = (blk_8x8_mask == 0xf);
5129
5130
57.8k
#if ENABLE_CU_TREE_CULLING
5131
57.8k
        {
5132
57.8k
            cur_ctb_cu_tree_t *ps_32x32_root = NULL;
5133
5134
57.8k
            switch(e_parent_blk_pos)
5135
57.8k
            {
5136
14.4k
            case POS_TL:
5137
14.4k
            {
5138
14.4k
                ps_32x32_root = ps_ctb_cluster_info->ps_cu_tree_root->ps_child_node_tl;
5139
5140
14.4k
                break;
5141
0
            }
5142
14.4k
            case POS_TR:
5143
14.4k
            {
5144
14.4k
                ps_32x32_root = ps_ctb_cluster_info->ps_cu_tree_root->ps_child_node_tr;
5145
5146
14.4k
                break;
5147
0
            }
5148
14.4k
            case POS_BL:
5149
14.4k
            {
5150
14.4k
                ps_32x32_root = ps_ctb_cluster_info->ps_cu_tree_root->ps_child_node_bl;
5151
5152
14.4k
                break;
5153
0
            }
5154
14.4k
            case POS_BR:
5155
14.4k
            {
5156
14.4k
                ps_32x32_root = ps_ctb_cluster_info->ps_cu_tree_root->ps_child_node_br;
5157
5158
14.4k
                break;
5159
0
            }
5160
0
            default:
5161
0
            {
5162
0
                DBG_PRINTF("Invalid block position %d\n", e_parent_blk_pos);
5163
0
                break;
5164
0
            }
5165
57.8k
            }
5166
5167
57.8k
            if(ps_32x32_root->is_node_valid)
5168
46.5k
            {
5169
46.5k
                node_validity =
5170
46.5k
                    node_validity &&
5171
46.5k
                    !ps_ctb_cluster_info
5172
46.5k
                         ->au1_is_16x16_blk_split[(S32)(e_parent_blk_pos << 2) + e_cur_blk_pos];
5173
46.5k
                children_nodes_required = !node_validity;
5174
46.5k
            }
5175
57.8k
        }
5176
0
#endif
5177
5178
0
        break;
5179
#else
5180
5181
        if(ipe_decision_precedence)
5182
        {
5183
            S32 merge_flag_16 = (ps_cur_ipe_ctb->as_intra32_analyse[e_parent_blk_pos]
5184
                                     .as_intra16_analyse[e_cur_blk_pos]
5185
                                     .b1_merge_flag);
5186
            S32 valid_flag = (blk_8x8_mask == 0xf);
5187
5188
            node_validity = merge_flag_16 && valid_flag;
5189
        }
5190
        else
5191
        {
5192
            node_validity = (blk_8x8_mask == 0xf);
5193
        }
5194
5195
        break;
5196
#endif
5197
57.8k
    }
5198
45.5k
    case 3:
5199
45.5k
    {
5200
45.5k
        S32 blk_8x8_mask =
5201
45.5k
            ps_ctb_cluster_info
5202
45.5k
                ->pi4_blk_8x8_mask[(S32)(e_grandparent_blk_pos << 2) + e_parent_blk_pos];
5203
45.5k
        S32 merge_flag_16 = (ps_cur_ipe_ctb->as_intra32_analyse[e_grandparent_blk_pos]
5204
45.5k
                                 .as_intra16_analyse[e_parent_blk_pos]
5205
45.5k
                                 .b1_merge_flag);
5206
45.5k
        S32 merge_flag_32 =
5207
45.5k
            (ps_cur_ipe_ctb->as_intra32_analyse[e_grandparent_blk_pos].b1_merge_flag);
5208
5209
45.5k
        intra_eval_enable = !merge_flag_16 || !merge_flag_32;
5210
45.5k
        inter_eval_enable = 1;
5211
45.5k
        children_nodes_required = 0;
5212
5213
45.5k
#if 1  //!PROCESS_GT_1CTB_VIA_CU_RECUR_IN_FAST_PRESETS
5214
45.5k
        if(e_quality_preset >= ME_HIGH_QUALITY)
5215
0
        {
5216
0
            node_validity = ((blk_8x8_mask & (1 << e_cur_blk_pos)) || 0);
5217
0
            break;
5218
0
        }
5219
45.5k
#endif
5220
5221
45.5k
#if ENABLE_4CTB_EVALUATION
5222
45.5k
        node_validity = ((blk_8x8_mask & (1 << e_cur_blk_pos)) || 0);
5223
5224
45.5k
        break;
5225
#else
5226
        {
5227
            cur_ctb_cu_tree_t *ps_32x32_root;
5228
            cur_ctb_cu_tree_t *ps_16x16_root;
5229
            cluster_32x32_blk_t *ps_32x32_blk;
5230
5231
            switch(e_grandparent_blk_pos)
5232
            {
5233
            case POS_TL:
5234
            {
5235
                ps_32x32_root = ps_ctb_cluster_info->ps_cu_tree_root->ps_child_node_tl;
5236
5237
                break;
5238
            }
5239
            case POS_TR:
5240
            {
5241
                ps_32x32_root = ps_ctb_cluster_info->ps_cu_tree_root->ps_child_node_tr;
5242
5243
                break;
5244
            }
5245
            case POS_BL:
5246
            {
5247
                ps_32x32_root = ps_ctb_cluster_info->ps_cu_tree_root->ps_child_node_bl;
5248
5249
                break;
5250
            }
5251
            case POS_BR:
5252
            {
5253
                ps_32x32_root = ps_ctb_cluster_info->ps_cu_tree_root->ps_child_node_br;
5254
5255
                break;
5256
            }
5257
            default:
5258
            {
5259
                DBG_PRINTF("Invalid block position %d\n", e_grandparent_blk_pos);
5260
                break;
5261
            }
5262
            }
5263
5264
            switch(e_parent_blk_pos)
5265
            {
5266
            case POS_TL:
5267
            {
5268
                ps_16x16_root = ps_32x32_root->ps_child_node_tl;
5269
5270
                break;
5271
            }
5272
            case POS_TR:
5273
            {
5274
                ps_16x16_root = ps_32x32_root->ps_child_node_tr;
5275
5276
                break;
5277
            }
5278
            case POS_BL:
5279
            {
5280
                ps_16x16_root = ps_32x32_root->ps_child_node_bl;
5281
5282
                break;
5283
            }
5284
            case POS_BR:
5285
            {
5286
                ps_16x16_root = ps_32x32_root->ps_child_node_br;
5287
5288
                break;
5289
            }
5290
            default:
5291
            {
5292
                DBG_PRINTF("Invalid block position %d\n", e_parent_blk_pos);
5293
                break;
5294
            }
5295
            }
5296
5297
            ps_32x32_blk = &ps_ctb_cluster_info->ps_32x32_blk[e_grandparent_blk_pos];
5298
5299
            node_validity = ((blk_8x8_mask & (1 << e_cur_blk_pos)) || 0) &&
5300
                            ((!ps_32x32_root->is_node_valid) ||
5301
                             (ps_32x32_blk->num_clusters_with_weak_sdi_density > 0) ||
5302
                             (!ps_16x16_root->is_node_valid));
5303
5304
            break;
5305
        }
5306
#endif
5307
45.5k
    }
5308
121k
    }
5309
5310
    /* Fill the current cu_tree node */
5311
121k
    ps_cu_tree->is_node_valid = node_validity;
5312
121k
    ps_cu_tree->u1_intra_eval_enable = intra_eval_enable;
5313
121k
    ps_cu_tree->u1_inter_eval_enable = inter_eval_enable;
5314
5315
121k
    if(children_nodes_required)
5316
25.8k
    {
5317
25.8k
        tree_depth++;
5318
5319
25.8k
        hme_populate_cu_tree(
5320
25.8k
            ps_ctb_cluster_info,
5321
25.8k
            ps_cu_tree->ps_child_node_tl,
5322
25.8k
            tree_depth,
5323
25.8k
            e_quality_preset,
5324
25.8k
            e_parent_blk_pos,
5325
25.8k
            e_cur_blk_pos,
5326
25.8k
            POS_TL);
5327
5328
25.8k
        hme_populate_cu_tree(
5329
25.8k
            ps_ctb_cluster_info,
5330
25.8k
            ps_cu_tree->ps_child_node_tr,
5331
25.8k
            tree_depth,
5332
25.8k
            e_quality_preset,
5333
25.8k
            e_parent_blk_pos,
5334
25.8k
            e_cur_blk_pos,
5335
25.8k
            POS_TR);
5336
5337
25.8k
        hme_populate_cu_tree(
5338
25.8k
            ps_ctb_cluster_info,
5339
25.8k
            ps_cu_tree->ps_child_node_bl,
5340
25.8k
            tree_depth,
5341
25.8k
            e_quality_preset,
5342
25.8k
            e_parent_blk_pos,
5343
25.8k
            e_cur_blk_pos,
5344
25.8k
            POS_BL);
5345
5346
25.8k
        hme_populate_cu_tree(
5347
25.8k
            ps_ctb_cluster_info,
5348
25.8k
            ps_cu_tree->ps_child_node_br,
5349
25.8k
            tree_depth,
5350
25.8k
            e_quality_preset,
5351
25.8k
            e_parent_blk_pos,
5352
25.8k
            e_cur_blk_pos,
5353
25.8k
            POS_BR);
5354
25.8k
    }
5355
121k
}
5356
5357
/**
5358
********************************************************************************
5359
*  @fn   void hme_analyse_mv_clustering
5360
*               (
5361
*                   search_results_t *ps_search_results,
5362
*                   ipe_l0_ctb_analyse_for_me_t *ps_cur_ipe_ctb,
5363
*                   cur_ctb_cu_tree_t *ps_cu_tree
5364
*               )
5365
*
5366
*  @brief  Implementation for the clustering algorithm
5367
*
5368
*  @param[in]  ps_search_results: structure containing 16x16 block results
5369
*
5370
*  @param[in]  ps_cur_ipe_ctb : output container for ipe analyses
5371
*
5372
*  @param[out]  ps_cu_tree : represents CU tree used in CU recursion
5373
*
5374
*  @return None
5375
********************************************************************************
5376
*/
5377
void hme_analyse_mv_clustering(
5378
    search_results_t *ps_search_results,
5379
    inter_cu_results_t *ps_16x16_cu_results,
5380
    inter_cu_results_t *ps_8x8_cu_results,
5381
    ctb_cluster_info_t *ps_ctb_cluster_info,
5382
    S08 *pi1_future_list,
5383
    S08 *pi1_past_list,
5384
    S32 bidir_enabled,
5385
    ME_QUALITY_PRESETS_T e_quality_preset)
5386
3.61k
{
5387
3.61k
    cluster_16x16_blk_t *ps_blk_16x16;
5388
3.61k
    cluster_32x32_blk_t *ps_blk_32x32;
5389
3.61k
    cluster_64x64_blk_t *ps_blk_64x64;
5390
5391
3.61k
    part_type_results_t *ps_best_result;
5392
3.61k
    pu_result_t *aps_part_result[MAX_NUM_PARTS];
5393
3.61k
    pu_result_t *aps_inferior_parts[MAX_NUM_PARTS];
5394
5395
3.61k
    PART_ID_T e_part_id;
5396
3.61k
    PART_TYPE_T e_part_type;
5397
5398
3.61k
    S32 enable_64x64_merge;
5399
3.61k
    S32 i, j, k;
5400
3.61k
    S32 mvx, mvy;
5401
3.61k
    S32 num_parts;
5402
3.61k
    S32 ref_idx;
5403
3.61k
    S32 ai4_pred_mode[MAX_NUM_PARTS];
5404
5405
3.61k
    S32 num_32x32_merges = 0;
5406
5407
    /*****************************************/
5408
    /*****************************************/
5409
    /********* Enter ye who is HQ ************/
5410
    /*****************************************/
5411
    /*****************************************/
5412
5413
3.61k
    ps_blk_64x64 = ps_ctb_cluster_info->ps_64x64_blk;
5414
5415
    /* Initialise data in each of the clusters */
5416
61.5k
    for(i = 0; i < 16; i++)
5417
57.8k
    {
5418
57.8k
        ps_blk_16x16 = &ps_ctb_cluster_info->ps_16x16_blk[i];
5419
5420
57.8k
#if !USE_CLUSTER_DATA_AS_BLK_MERGE_CANDTS
5421
57.8k
        if(e_quality_preset < ME_HIGH_QUALITY)
5422
57.8k
        {
5423
57.8k
            hme_init_clusters_16x16(ps_blk_16x16, bidir_enabled);
5424
57.8k
        }
5425
0
        else
5426
0
        {
5427
0
            ps_blk_16x16->best_inter_cost = 0;
5428
0
            ps_blk_16x16->intra_mv_area = 0;
5429
0
        }
5430
#else
5431
        hme_init_clusters_16x16(ps_blk_16x16, bidir_enabled);
5432
#endif
5433
57.8k
    }
5434
5435
18.0k
    for(i = 0; i < 4; i++)
5436
14.4k
    {
5437
14.4k
        ps_blk_32x32 = &ps_ctb_cluster_info->ps_32x32_blk[i];
5438
5439
14.4k
#if !USE_CLUSTER_DATA_AS_BLK_MERGE_CANDTS
5440
14.4k
        if(e_quality_preset < ME_HIGH_QUALITY)
5441
14.4k
        {
5442
14.4k
            hme_init_clusters_32x32(ps_blk_32x32, bidir_enabled);
5443
14.4k
        }
5444
0
        else
5445
0
        {
5446
0
            ps_blk_32x32->best_inter_cost = 0;
5447
0
            ps_blk_32x32->intra_mv_area = 0;
5448
0
        }
5449
#else
5450
        hme_init_clusters_32x32(ps_blk_32x32, bidir_enabled);
5451
#endif
5452
14.4k
    }
5453
5454
3.61k
#if !USE_CLUSTER_DATA_AS_BLK_MERGE_CANDTS
5455
3.61k
    if(e_quality_preset < ME_HIGH_QUALITY)
5456
3.61k
    {
5457
3.61k
        hme_init_clusters_64x64(ps_blk_64x64, bidir_enabled);
5458
3.61k
    }
5459
0
    else
5460
0
    {
5461
0
        ps_blk_64x64->best_inter_cost = 0;
5462
0
        ps_blk_64x64->intra_mv_area = 0;
5463
0
    }
5464
#else
5465
    hme_init_clusters_64x64(ps_blk_64x64, bidir_enabled);
5466
#endif
5467
5468
    /* Initialise data for all nodes in the CU tree */
5469
3.61k
    hme_build_cu_tree(
5470
3.61k
        ps_ctb_cluster_info, ps_ctb_cluster_info->ps_cu_tree_root, 0, POS_NA, POS_NA, POS_NA);
5471
5472
3.61k
    if(e_quality_preset >= ME_HIGH_QUALITY)
5473
0
    {
5474
0
        memset(ps_ctb_cluster_info->au1_is_16x16_blk_split, 1, 16 * sizeof(U08));
5475
0
    }
5476
5477
#if ENABLE_UNIFORM_CU_SIZE_16x16 || ENABLE_UNIFORM_CU_SIZE_8x8
5478
    return;
5479
#endif
5480
5481
61.5k
    for(i = 0; i < 16; i++)
5482
57.8k
    {
5483
57.8k
        S32 blk_8x8_mask;
5484
57.8k
        S32 is_16x16_blk_valid;
5485
57.8k
        S32 num_clusters_updated;
5486
57.8k
        S32 num_clusters;
5487
5488
57.8k
        blk_8x8_mask = ps_ctb_cluster_info->pi4_blk_8x8_mask[i];
5489
5490
57.8k
        ps_blk_16x16 = &ps_ctb_cluster_info->ps_16x16_blk[i];
5491
5492
57.8k
        is_16x16_blk_valid = (blk_8x8_mask == 0xf);
5493
5494
57.8k
        if(is_16x16_blk_valid)
5495
47.6k
        {
5496
            /* Use 8x8 data when 16x16 CU is split */
5497
47.6k
            if(ps_search_results[i].u1_split_flag)
5498
18.3k
            {
5499
18.3k
                S32 blk_8x8_idx = i << 2;
5500
5501
18.3k
                num_parts = 4;
5502
18.3k
                e_part_type = PRT_NxN;
5503
5504
91.5k
                for(j = 0; j < num_parts; j++, blk_8x8_idx++)
5505
73.2k
                {
5506
                    /* Only 2Nx2N partition supported for 8x8 block */
5507
73.2k
                    ASSERT(
5508
73.2k
                        ps_8x8_cu_results[blk_8x8_idx].ps_best_results[0].u1_part_type ==
5509
73.2k
                        ((PART_TYPE_T)PRT_2Nx2N));
5510
5511
73.2k
                    aps_part_result[j] =
5512
73.2k
                        &ps_8x8_cu_results[blk_8x8_idx].ps_best_results[0].as_pu_results[0];
5513
73.2k
                    aps_inferior_parts[j] =
5514
73.2k
                        &ps_8x8_cu_results[blk_8x8_idx].ps_best_results[1].as_pu_results[0];
5515
73.2k
                    ai4_pred_mode[j] = (aps_part_result[j]->pu.b2_pred_mode);
5516
73.2k
                }
5517
18.3k
            }
5518
29.3k
            else
5519
29.3k
            {
5520
29.3k
                ps_best_result = &ps_16x16_cu_results[i].ps_best_results[0];
5521
5522
29.3k
                e_part_type = (PART_TYPE_T)ps_best_result->u1_part_type;
5523
29.3k
                num_parts = gau1_num_parts_in_part_type[e_part_type];
5524
5525
60.8k
                for(j = 0; j < num_parts; j++)
5526
31.5k
                {
5527
31.5k
                    aps_part_result[j] = &ps_best_result->as_pu_results[j];
5528
31.5k
                    aps_inferior_parts[j] = &ps_best_result[1].as_pu_results[j];
5529
31.5k
                    ai4_pred_mode[j] = (aps_part_result[j]->pu.b2_pred_mode);
5530
31.5k
                }
5531
5532
29.3k
                ps_ctb_cluster_info->au1_is_16x16_blk_split[i] = 0;
5533
29.3k
            }
5534
5535
152k
            for(j = 0; j < num_parts; j++)
5536
104k
            {
5537
104k
                pu_result_t *ps_part_result = aps_part_result[j];
5538
5539
104k
                S32 num_mvs = ((ai4_pred_mode[j] > 1) + 1);
5540
5541
104k
                e_part_id = ge_part_type_to_part_id[e_part_type][j];
5542
5543
                /* Skip clustering if best mode is intra */
5544
104k
                if((ps_part_result->pu.b1_intra_flag))
5545
37.9k
                {
5546
37.9k
                    ps_blk_16x16->intra_mv_area += gai4_partition_area[e_part_id];
5547
37.9k
                    ps_blk_16x16->best_inter_cost += aps_inferior_parts[j]->i4_tot_cost;
5548
37.9k
                    continue;
5549
37.9k
                }
5550
66.8k
                else
5551
66.8k
                {
5552
66.8k
                    ps_blk_16x16->best_inter_cost += ps_part_result->i4_tot_cost;
5553
66.8k
                }
5554
5555
66.8k
#if !USE_CLUSTER_DATA_AS_BLK_MERGE_CANDTS
5556
66.8k
                if(e_quality_preset >= ME_HIGH_QUALITY)
5557
0
                {
5558
0
                    continue;
5559
0
                }
5560
66.8k
#endif
5561
5562
135k
                for(k = 0; k < num_mvs; k++)
5563
68.8k
                {
5564
68.8k
                    mv_t *ps_mv;
5565
5566
68.8k
                    pu_mv_t *ps_pu_mv = &ps_part_result->pu.mv;
5567
5568
68.8k
                    S32 is_l0_mv = ((ai4_pred_mode[j] == 2) && !k) || (ai4_pred_mode[j] == 0);
5569
5570
68.8k
                    ps_mv = (is_l0_mv) ? (&ps_pu_mv->s_l0_mv) : (&ps_pu_mv->s_l1_mv);
5571
5572
68.8k
                    mvx = ps_mv->i2_mvx;
5573
68.8k
                    mvy = ps_mv->i2_mvy;
5574
5575
68.8k
                    ref_idx = (is_l0_mv) ? pi1_past_list[ps_pu_mv->i1_l0_ref_idx]
5576
68.8k
                                         : pi1_future_list[ps_pu_mv->i1_l1_ref_idx];
5577
5578
68.8k
                    num_clusters = ps_blk_16x16->num_clusters;
5579
5580
68.8k
                    hme_find_and_update_clusters(
5581
68.8k
                        ps_blk_16x16->as_cluster_data,
5582
68.8k
                        &(ps_blk_16x16->num_clusters),
5583
68.8k
                        mvx,
5584
68.8k
                        mvy,
5585
68.8k
                        ref_idx,
5586
68.8k
                        ps_part_result->i4_sdi,
5587
68.8k
                        e_part_id,
5588
68.8k
                        (ai4_pred_mode[j] == 2));
5589
5590
68.8k
                    num_clusters_updated = (ps_blk_16x16->num_clusters);
5591
5592
68.8k
                    ps_blk_16x16->au1_num_clusters[ref_idx] +=
5593
68.8k
                        (num_clusters_updated - num_clusters);
5594
68.8k
                }
5595
66.8k
            }
5596
47.6k
        }
5597
57.8k
    }
5598
5599
    /* Search for 32x32 clusters */
5600
18.0k
    for(i = 0; i < 4; i++)
5601
14.4k
    {
5602
14.4k
        S32 num_clusters_merged;
5603
5604
14.4k
        S32 is_32x32_blk_valid = (ps_ctb_cluster_info->blk_32x32_mask & (1 << i)) || 0;
5605
5606
14.4k
        if(is_32x32_blk_valid)
5607
11.6k
        {
5608
11.6k
            ps_blk_32x32 = &ps_ctb_cluster_info->ps_32x32_blk[i];
5609
11.6k
            ps_blk_16x16 = &ps_ctb_cluster_info->ps_16x16_blk[i << 2];
5610
5611
11.6k
#if !USE_CLUSTER_DATA_AS_BLK_MERGE_CANDTS
5612
11.6k
            if(e_quality_preset >= ME_HIGH_QUALITY)
5613
0
            {
5614
0
                for(j = 0; j < 4; j++, ps_blk_16x16++)
5615
0
                {
5616
0
                    ps_blk_32x32->intra_mv_area += ps_blk_16x16->intra_mv_area;
5617
5618
0
                    ps_blk_32x32->best_inter_cost += ps_blk_16x16->best_inter_cost;
5619
0
                }
5620
0
                continue;
5621
0
            }
5622
11.6k
#endif
5623
5624
11.6k
            hme_update_32x32_clusters(ps_blk_32x32, ps_blk_16x16);
5625
5626
11.6k
            if((ps_blk_32x32->num_clusters >= MAX_NUM_CLUSTERS_IN_VALID_32x32_BLK))
5627
2.02k
            {
5628
2.02k
                num_clusters_merged = hme_try_merge_clusters_blksize_gt_16(
5629
2.02k
                    ps_blk_32x32->as_cluster_data, (ps_blk_32x32->num_clusters));
5630
5631
2.02k
                if(num_clusters_merged)
5632
0
                {
5633
0
                    ps_blk_32x32->num_clusters -= num_clusters_merged;
5634
5635
0
                    UPDATE_CLUSTER_METADATA_POST_MERGE(ps_blk_32x32);
5636
0
                }
5637
2.02k
            }
5638
11.6k
        }
5639
14.4k
    }
5640
5641
3.61k
#if !USE_CLUSTER_DATA_AS_BLK_MERGE_CANDTS
5642
    /* Eliminate outlier 32x32 clusters */
5643
3.61k
    if(e_quality_preset < ME_HIGH_QUALITY)
5644
3.61k
#endif
5645
3.61k
    {
5646
3.61k
        hme_boot_out_outlier(ps_ctb_cluster_info, 32);
5647
5648
        /* Find best_uni_ref and best_alt_ref */
5649
3.61k
        hme_find_top_ref_ids(ps_ctb_cluster_info, bidir_enabled, 32);
5650
3.61k
    }
5651
5652
    /* Populate the CU tree for depths 1 and higher */
5653
3.61k
    {
5654
3.61k
        cur_ctb_cu_tree_t *ps_tree_root = ps_ctb_cluster_info->ps_cu_tree_root;
5655
3.61k
        cur_ctb_cu_tree_t *ps_tl = ps_tree_root->ps_child_node_tl;
5656
3.61k
        cur_ctb_cu_tree_t *ps_tr = ps_tree_root->ps_child_node_tr;
5657
3.61k
        cur_ctb_cu_tree_t *ps_bl = ps_tree_root->ps_child_node_bl;
5658
3.61k
        cur_ctb_cu_tree_t *ps_br = ps_tree_root->ps_child_node_br;
5659
5660
3.61k
        hme_populate_cu_tree(
5661
3.61k
            ps_ctb_cluster_info, ps_tl, 1, e_quality_preset, POS_NA, POS_NA, POS_TL);
5662
5663
3.61k
        num_32x32_merges += (ps_tl->is_node_valid == 1);
5664
5665
3.61k
        hme_populate_cu_tree(
5666
3.61k
            ps_ctb_cluster_info, ps_tr, 1, e_quality_preset, POS_NA, POS_NA, POS_TR);
5667
5668
3.61k
        num_32x32_merges += (ps_tr->is_node_valid == 1);
5669
5670
3.61k
        hme_populate_cu_tree(
5671
3.61k
            ps_ctb_cluster_info, ps_bl, 1, e_quality_preset, POS_NA, POS_NA, POS_BL);
5672
5673
3.61k
        num_32x32_merges += (ps_bl->is_node_valid == 1);
5674
5675
3.61k
        hme_populate_cu_tree(
5676
3.61k
            ps_ctb_cluster_info, ps_br, 1, e_quality_preset, POS_NA, POS_NA, POS_BR);
5677
5678
3.61k
        num_32x32_merges += (ps_br->is_node_valid == 1);
5679
3.61k
    }
5680
5681
#if !ENABLE_4CTB_EVALUATION
5682
    if(e_quality_preset < ME_HIGH_QUALITY)
5683
    {
5684
        enable_64x64_merge = (num_32x32_merges >= 3);
5685
    }
5686
#else
5687
3.61k
    if(e_quality_preset < ME_HIGH_QUALITY)
5688
3.61k
    {
5689
3.61k
        enable_64x64_merge = 1;
5690
3.61k
    }
5691
3.61k
#endif
5692
5693
3.61k
#if 1  //!PROCESS_GT_1CTB_VIA_CU_RECUR_IN_FAST_PRESETS
5694
3.61k
    if(e_quality_preset >= ME_HIGH_QUALITY)
5695
0
    {
5696
0
        enable_64x64_merge = 1;
5697
0
    }
5698
#else
5699
    if(e_quality_preset >= ME_HIGH_QUALITY)
5700
    {
5701
        enable_64x64_merge = (num_32x32_merges >= 3);
5702
    }
5703
#endif
5704
5705
3.61k
    if(enable_64x64_merge)
5706
3.61k
    {
5707
3.61k
        S32 num_clusters_merged;
5708
5709
3.61k
        ps_blk_32x32 = &ps_ctb_cluster_info->ps_32x32_blk[0];
5710
5711
3.61k
#if !USE_CLUSTER_DATA_AS_BLK_MERGE_CANDTS
5712
3.61k
        if(e_quality_preset >= ME_HIGH_QUALITY)
5713
0
        {
5714
0
            for(j = 0; j < 4; j++, ps_blk_32x32++)
5715
0
            {
5716
0
                ps_blk_64x64->intra_mv_area += ps_blk_32x32->intra_mv_area;
5717
5718
0
                ps_blk_64x64->best_inter_cost += ps_blk_32x32->best_inter_cost;
5719
0
            }
5720
0
        }
5721
3.61k
        else
5722
3.61k
#endif
5723
3.61k
        {
5724
3.61k
            hme_update_64x64_clusters(ps_blk_64x64, ps_blk_32x32);
5725
5726
3.61k
            if((ps_blk_64x64->num_clusters >= MAX_NUM_CLUSTERS_IN_VALID_64x64_BLK))
5727
1.81k
            {
5728
1.81k
                num_clusters_merged = hme_try_merge_clusters_blksize_gt_16(
5729
1.81k
                    ps_blk_64x64->as_cluster_data, (ps_blk_64x64->num_clusters));
5730
5731
1.81k
                if(num_clusters_merged)
5732
1
                {
5733
1
                    ps_blk_64x64->num_clusters -= num_clusters_merged;
5734
5735
1
                    UPDATE_CLUSTER_METADATA_POST_MERGE(ps_blk_64x64);
5736
1
                }
5737
1.81k
            }
5738
3.61k
        }
5739
5740
#if !ENABLE_4CTB_EVALUATION
5741
        if(e_quality_preset < ME_HIGH_QUALITY)
5742
        {
5743
            S32 best_inter_cost = ps_blk_64x64->best_inter_cost;
5744
            S32 best_intra_cost =
5745
                ((ps_ctb_cluster_info->ps_cur_ipe_ctb->i4_best64x64_intra_cost +
5746
                  ps_ctb_cluster_info->i4_frame_qstep *
5747
                      ps_ctb_cluster_info->i4_frame_qstep_multiplier * 16) < 0)
5748
                    ? MAX_32BIT_VAL
5749
                    : (ps_ctb_cluster_info->ps_cur_ipe_ctb->i4_best64x64_intra_cost +
5750
                       ps_ctb_cluster_info->i4_frame_qstep *
5751
                           ps_ctb_cluster_info->i4_frame_qstep_multiplier * 16);
5752
            S32 best_cost = (best_inter_cost > best_intra_cost) ? best_intra_cost : best_inter_cost;
5753
            S32 cost_differential = (best_inter_cost - best_cost);
5754
5755
            enable_64x64_merge =
5756
                ((ALL_INTER_COST_DIFF_THR * best_cost) >= (100 * cost_differential));
5757
        }
5758
#endif
5759
3.61k
    }
5760
5761
3.61k
    if(enable_64x64_merge)
5762
3.61k
    {
5763
3.61k
#if !USE_CLUSTER_DATA_AS_BLK_MERGE_CANDTS
5764
3.61k
        if(e_quality_preset < ME_HIGH_QUALITY)
5765
3.61k
#endif
5766
3.61k
        {
5767
3.61k
            hme_boot_out_outlier(ps_ctb_cluster_info, 64);
5768
5769
3.61k
            hme_find_top_ref_ids(ps_ctb_cluster_info, bidir_enabled, 64);
5770
3.61k
        }
5771
5772
3.61k
        hme_populate_cu_tree(
5773
3.61k
            ps_ctb_cluster_info,
5774
3.61k
            ps_ctb_cluster_info->ps_cu_tree_root,
5775
3.61k
            0,
5776
3.61k
            e_quality_preset,
5777
3.61k
            POS_NA,
5778
3.61k
            POS_NA,
5779
3.61k
            POS_NA);
5780
3.61k
    }
5781
3.61k
}
5782
#endif
5783
5784
static __inline void hme_merge_prms_init(
5785
    hme_merge_prms_t *ps_prms,
5786
    layer_ctxt_t *ps_curr_layer,
5787
    refine_prms_t *ps_refine_prms,
5788
    me_frm_ctxt_t *ps_me_ctxt,
5789
    range_prms_t *ps_range_prms_rec,
5790
    range_prms_t *ps_range_prms_inp,
5791
    mv_grid_t **pps_mv_grid,
5792
    inter_ctb_prms_t *ps_inter_ctb_prms,
5793
    S32 i4_num_pred_dir,
5794
    S32 i4_32x32_id,
5795
    BLK_SIZE_T e_blk_size,
5796
    ME_QUALITY_PRESETS_T e_me_quality_presets)
5797
31.8k
{
5798
31.8k
    S32 i4_use_rec = ps_refine_prms->i4_use_rec_in_fpel;
5799
31.8k
    S32 i4_cu_16x16 = (BLK_32x32 == e_blk_size) ? (i4_32x32_id << 2) : 0;
5800
5801
    /* Currently not enabling segmentation info from prev layers */
5802
31.8k
    ps_prms->i4_seg_info_avail = 0;
5803
31.8k
    ps_prms->i4_part_mask = 0;
5804
5805
    /* Number of reference pics in which to do merge */
5806
31.8k
    ps_prms->i4_num_ref = i4_num_pred_dir;
5807
5808
    /* Layer ctxt info */
5809
31.8k
    ps_prms->ps_layer_ctxt = ps_curr_layer;
5810
5811
31.8k
    ps_prms->ps_inter_ctb_prms = ps_inter_ctb_prms;
5812
5813
    /* Top left, top right, bottom left and bottom right 16x16 units */
5814
31.8k
    if(BLK_32x32 == e_blk_size)
5815
25.4k
    {
5816
25.4k
        ps_prms->ps_results_tl = &ps_me_ctxt->as_search_results_16x16[i4_cu_16x16];
5817
25.4k
        ps_prms->ps_results_tr = &ps_me_ctxt->as_search_results_16x16[i4_cu_16x16 + 1];
5818
25.4k
        ps_prms->ps_results_bl = &ps_me_ctxt->as_search_results_16x16[i4_cu_16x16 + 2];
5819
25.4k
        ps_prms->ps_results_br = &ps_me_ctxt->as_search_results_16x16[i4_cu_16x16 + 3];
5820
5821
        /* Merge results stored here */
5822
25.4k
        ps_prms->ps_results_merge = &ps_me_ctxt->as_search_results_32x32[i4_32x32_id];
5823
5824
        /* This could be lesser than the number of 16x16results generated*/
5825
        /* For now, keeping it to be same                                */
5826
25.4k
        ps_prms->i4_num_inp_results = ps_refine_prms->i4_num_fpel_results;
5827
25.4k
        ps_prms->ps_8x8_cu_results = &ps_me_ctxt->as_cu8x8_results[i4_32x32_id << 4];
5828
25.4k
        ps_prms->ps_results_grandchild = NULL;
5829
25.4k
    }
5830
6.36k
    else
5831
6.36k
    {
5832
6.36k
        ps_prms->ps_results_tl = &ps_me_ctxt->as_search_results_32x32[0];
5833
6.36k
        ps_prms->ps_results_tr = &ps_me_ctxt->as_search_results_32x32[1];
5834
6.36k
        ps_prms->ps_results_bl = &ps_me_ctxt->as_search_results_32x32[2];
5835
6.36k
        ps_prms->ps_results_br = &ps_me_ctxt->as_search_results_32x32[3];
5836
5837
        /* Merge results stored here */
5838
6.36k
        ps_prms->ps_results_merge = &ps_me_ctxt->s_search_results_64x64;
5839
5840
6.36k
        ps_prms->i4_num_inp_results = ps_refine_prms->i4_num_32x32_merge_results;
5841
6.36k
        ps_prms->ps_8x8_cu_results = &ps_me_ctxt->as_cu8x8_results[0];
5842
6.36k
        ps_prms->ps_results_grandchild = ps_me_ctxt->as_search_results_16x16;
5843
6.36k
    }
5844
5845
31.8k
    if(i4_use_rec)
5846
31.8k
    {
5847
31.8k
        WORD32 ref_ctr;
5848
5849
413k
        for(ref_ctr = 0; ref_ctr < MAX_NUM_REF; ref_ctr++)
5850
381k
        {
5851
381k
            ps_prms->aps_mv_range[ref_ctr] = &ps_range_prms_rec[ref_ctr];
5852
381k
        }
5853
31.8k
    }
5854
0
    else
5855
0
    {
5856
0
        WORD32 ref_ctr;
5857
5858
0
        for(ref_ctr = 0; ref_ctr < MAX_NUM_REF; ref_ctr++)
5859
0
        {
5860
0
            ps_prms->aps_mv_range[ref_ctr] = &ps_range_prms_inp[ref_ctr];
5861
0
        }
5862
0
    }
5863
31.8k
    ps_prms->i4_use_rec = i4_use_rec;
5864
5865
31.8k
    ps_prms->pf_mv_cost_compute = compute_mv_cost_implicit_high_speed;
5866
5867
31.8k
    ps_prms->pps_mv_grid = pps_mv_grid;
5868
5869
31.8k
    ps_prms->log_ctb_size = ps_me_ctxt->log_ctb_size;
5870
5871
31.8k
    ps_prms->e_quality_preset = e_me_quality_presets;
5872
31.8k
    ps_prms->pi1_future_list = ps_me_ctxt->ai1_future_list;
5873
31.8k
    ps_prms->pi1_past_list = ps_me_ctxt->ai1_past_list;
5874
31.8k
    ps_prms->ps_cluster_info = ps_me_ctxt->ps_ctb_cluster_info;
5875
31.8k
}
5876
5877
/**
5878
********************************************************************************
5879
*  @fn   void hme_refine(me_ctxt_t *ps_ctxt,
5880
*                       refine_layer_prms_t *ps_refine_prms)
5881
*
5882
*  @brief  Top level entry point for refinement ME
5883
*
5884
*  @param[in,out]  ps_ctxt: ME Handle
5885
*
5886
*  @param[in]  ps_refine_prms : refinement layer prms
5887
*
5888
*  @return None
5889
********************************************************************************
5890
*/
5891
void hme_refine(
5892
    me_ctxt_t *ps_thrd_ctxt,
5893
    refine_prms_t *ps_refine_prms,
5894
    PF_EXT_UPDATE_FXN_T pf_ext_update_fxn,
5895
    layer_ctxt_t *ps_coarse_layer,
5896
    multi_thrd_ctxt_t *ps_multi_thrd_ctxt,
5897
    S32 lyr_job_type,
5898
    S32 thrd_id,
5899
    S32 me_frm_id,
5900
    pre_enc_L0_ipe_encloop_ctxt_t *ps_l0_ipe_input)
5901
6.36k
{
5902
6.36k
    inter_ctb_prms_t s_common_frm_prms;
5903
5904
6.36k
    BLK_SIZE_T e_search_blk_size, e_result_blk_size;
5905
6.36k
    WORD32 i4_me_frm_id = me_frm_id % MAX_NUM_ME_PARALLEL;
5906
6.36k
    me_frm_ctxt_t *ps_ctxt = ps_thrd_ctxt->aps_me_frm_prms[i4_me_frm_id];
5907
6.36k
    ME_QUALITY_PRESETS_T e_me_quality_presets =
5908
6.36k
        ps_thrd_ctxt->s_init_prms.s_me_coding_tools.e_me_quality_presets;
5909
5910
6.36k
    WORD32 num_rows_proc = 0;
5911
6.36k
    WORD32 num_act_ref_pics;
5912
6.36k
    WORD16 i2_prev_enc_frm_max_mv_y;
5913
6.36k
    WORD32 i4_idx_dvsr_p = ps_multi_thrd_ctxt->i4_idx_dvsr_p;
5914
5915
    /*************************************************************************/
5916
    /* Complexity of search: Low to High                                     */
5917
    /*************************************************************************/
5918
6.36k
    SEARCH_COMPLEXITY_T e_search_complexity;
5919
5920
    /*************************************************************************/
5921
    /* to store the PU results which are passed to the decide_part_types     */
5922
    /* as input prms. Multiplied by 4 as the max number of Ref in a List is 4*/
5923
    /*************************************************************************/
5924
5925
6.36k
    pu_result_t as_pu_results[2][TOT_NUM_PARTS][MAX_NUM_RESULTS_PER_PART_LIST];
5926
6.36k
    inter_pu_results_t as_inter_pu_results[4];
5927
6.36k
    inter_pu_results_t *ps_pu_results = as_inter_pu_results;
5928
5929
    /*************************************************************************/
5930
    /* Config parameter structures for varius ME submodules                  */
5931
    /*************************************************************************/
5932
6.36k
    hme_merge_prms_t s_merge_prms_32x32_tl, s_merge_prms_32x32_tr;
5933
6.36k
    hme_merge_prms_t s_merge_prms_32x32_bl, s_merge_prms_32x32_br;
5934
6.36k
    hme_merge_prms_t s_merge_prms_64x64;
5935
6.36k
    hme_search_prms_t s_search_prms_blk;
5936
6.36k
    mvbank_update_prms_t s_mv_update_prms;
5937
6.36k
    hme_ctb_prms_t s_ctb_prms;
5938
6.36k
    hme_subpel_prms_t s_subpel_prms;
5939
6.36k
    fullpel_refine_ctxt_t *ps_fullpel_refine_ctxt = ps_ctxt->ps_fullpel_refine_ctxt;
5940
6.36k
    ctb_cluster_info_t *ps_ctb_cluster_info;
5941
6.36k
    fpel_srch_cand_init_data_t s_srch_cand_init_data;
5942
5943
    /* 4 bits (LSBs) of this variable control merge of 4 32x32 CUs in CTB */
5944
6.36k
    S32 en_merge_32x32;
5945
    /* 5 lsb's specify whether or not merge algorithm is required */
5946
    /* to be executed or not. Relevant only in PQ. Ought to be */
5947
    /* used in conjunction with en_merge_32x32 and */
5948
    /* ps_ctb_bound_attrs->u1_merge_to_64x64_flag. This is */
5949
    /* required when all children are deemed to be intras */
5950
6.36k
    S32 en_merge_execution;
5951
5952
    /*************************************************************************/
5953
    /* All types of search candidates for predictor based search.            */
5954
    /*************************************************************************/
5955
6.36k
    S32 num_init_candts = 0;
5956
6.36k
    S32 i4_num_act_ref_l0 = ps_ctxt->s_frm_prms.u1_num_active_ref_l0;
5957
6.36k
    S32 i4_num_act_ref_l1 = ps_ctxt->s_frm_prms.u1_num_active_ref_l1;
5958
6.36k
    search_candt_t *ps_search_candts, as_search_candts[MAX_INIT_CANDTS];
5959
6.36k
    search_node_t as_top_neighbours[4], as_left_neighbours[3];
5960
5961
6.36k
    pf_get_wt_inp fp_get_wt_inp;
5962
5963
6.36k
    search_node_t as_unique_search_nodes[MAX_INIT_CANDTS * 9];
5964
6.36k
    U32 au4_unique_node_map[MAP_X_MAX * 2];
5965
5966
    /* Controls the boundary attributes of CTB, whether it has 64x64 or not */
5967
6.36k
    ctb_boundary_attrs_t *ps_ctb_bound_attrs;
5968
5969
    /*************************************************************************/
5970
    /* points ot the search results for the blk level search (8x8/16x16)     */
5971
    /*************************************************************************/
5972
6.36k
    search_results_t *ps_search_results;
5973
5974
    /*************************************************************************/
5975
    /* Coordinates                                                           */
5976
    /*************************************************************************/
5977
6.36k
    S32 blk_x, blk_y, i4_ctb_x, i4_ctb_y, tile_col_idx, blk_id_in_ctb;
5978
6.36k
    S32 pos_x, pos_y;
5979
6.36k
    S32 blk_id_in_full_ctb;
5980
5981
    /*************************************************************************/
5982
    /* Related to dimensions of block being searched and pic dimensions      */
5983
    /*************************************************************************/
5984
6.36k
    S32 blk_4x4_to_16x16;
5985
6.36k
    S32 blk_wd, blk_ht, blk_size_shift;
5986
6.36k
    S32 i4_pic_wd, i4_pic_ht, num_blks_in_this_ctb;
5987
6.36k
    S32 num_results_prev_layer;
5988
5989
    /*************************************************************************/
5990
    /* Size of a basic unit for this layer. For non encode layers, we search */
5991
    /* in block sizes of 8x8. For encode layers, though we search 16x16s the */
5992
    /* basic unit size is the ctb size.                                      */
5993
    /*************************************************************************/
5994
6.36k
    S32 unit_size;
5995
5996
    /*************************************************************************/
5997
    /* Local variable storing results of any 4 CU merge to bigger CU         */
5998
    /*************************************************************************/
5999
6.36k
    CU_MERGE_RESULT_T e_merge_result;
6000
6001
    /*************************************************************************/
6002
    /* This mv grid stores results during and after fpel search, during      */
6003
    /* merge, subpel and bidirect refinements stages. 2 instances of this are*/
6004
    /* meant for the 2 directions of search (l0 and l1).                     */
6005
    /*************************************************************************/
6006
6.36k
    mv_grid_t *aps_mv_grid[2];
6007
6008
    /*************************************************************************/
6009
    /* Pointers to context in current and coarser layers                     */
6010
    /*************************************************************************/
6011
6.36k
    layer_ctxt_t *ps_curr_layer, *ps_prev_layer;
6012
6013
    /*************************************************************************/
6014
    /* to store mv range per blk, and picture limit, allowed search range    */
6015
    /* range prms in hpel and qpel units as well                             */
6016
    /*************************************************************************/
6017
6.36k
    range_prms_t as_range_prms_inp[MAX_NUM_REF], as_range_prms_rec[MAX_NUM_REF];
6018
6.36k
    range_prms_t s_pic_limit_inp, s_pic_limit_rec, as_mv_limit[MAX_NUM_REF];
6019
6.36k
    range_prms_t as_range_prms_hpel[MAX_NUM_REF], as_range_prms_qpel[MAX_NUM_REF];
6020
6021
    /*************************************************************************/
6022
    /* These variables are used to track number of references at different   */
6023
    /* stages of ME.                                                         */
6024
    /*************************************************************************/
6025
6.36k
    S32 i4_num_pred_dir;
6026
6.36k
    S32 i4_num_ref_each_dir, i, i4_num_ref_prev_layer;
6027
6.36k
    S32 lambda_recon = ps_refine_prms->lambda_recon;
6028
6029
    /* Counts successful merge to 32x32 every CTB (0-4) */
6030
6.36k
    S32 merge_count_32x32;
6031
6032
6.36k
    S32 ai4_id_coloc[14], ai4_id_Z[2];
6033
6.36k
    U08 au1_search_candidate_list_index[2];
6034
6.36k
    S32 ai4_num_coloc_cands[2];
6035
6.36k
    U08 u1_pred_dir, u1_pred_dir_ctr;
6036
6037
    /*************************************************************************/
6038
    /* Input pointer and stride                                              */
6039
    /*************************************************************************/
6040
6.36k
    U08 *pu1_inp;
6041
6.36k
    S32 i4_inp_stride;
6042
6.36k
    S32 end_of_frame;
6043
6.36k
    S32 num_sync_units_in_row, num_sync_units_in_tile;
6044
6045
    /*************************************************************************/
6046
    /* Indicates whether the all 4 8x8 blks are valid in the 16x16 blk in the*/
6047
    /* encode layer. If not 15, then 1 or more 8x8 blks not valid. Means that*/
6048
    /* we need to stop merges and force 8x8 CUs for that 16x16 blk           */
6049
    /*************************************************************************/
6050
6.36k
    S32 blk_8x8_mask;
6051
6.36k
    S32 ai4_blk_8x8_mask[16];
6052
6.36k
    U08 au1_is_64x64Blk_noisy[1];
6053
6.36k
    U08 au1_is_32x32Blk_noisy[4];
6054
6.36k
    U08 au1_is_16x16Blk_noisy[16];
6055
6056
6.36k
    ihevce_cmn_opt_func_t *ps_cmn_utils_optimised_function_list =
6057
6.36k
        ps_thrd_ctxt->ps_cmn_utils_optimised_function_list;
6058
6.36k
    ihevce_me_optimised_function_list_t *ps_me_optimised_function_list =
6059
6.36k
        ((ihevce_me_optimised_function_list_t *)ps_thrd_ctxt->pv_me_optimised_function_list);
6060
6061
6.36k
    ASSERT(ps_refine_prms->i4_layer_id < ps_ctxt->num_layers - 1);
6062
6063
    /*************************************************************************/
6064
    /* Pointers to current and coarse layer are needed for projection */
6065
    /* Pointer to prev layer are needed for other candts like coloc   */
6066
    /*************************************************************************/
6067
6.36k
    ps_curr_layer = ps_ctxt->ps_curr_descr->aps_layers[ps_refine_prms->i4_layer_id];
6068
6069
6.36k
    ps_prev_layer = hme_get_past_layer_ctxt(
6070
6.36k
        ps_thrd_ctxt, ps_ctxt, ps_refine_prms->i4_layer_id, ps_multi_thrd_ctxt->i4_num_me_frm_pllel);
6071
6072
6.36k
    num_results_prev_layer = ps_coarse_layer->ps_layer_mvbank->i4_num_mvs_per_ref;
6073
6074
    /* Function pointer is selected based on the C vc X86 macro */
6075
6076
6.36k
    fp_get_wt_inp = ps_me_optimised_function_list->pf_get_wt_inp_ctb;
6077
6078
6.36k
    i4_inp_stride = ps_curr_layer->i4_inp_stride;
6079
6.36k
    i4_pic_wd = ps_curr_layer->i4_wd;
6080
6.36k
    i4_pic_ht = ps_curr_layer->i4_ht;
6081
6.36k
    e_search_complexity = ps_refine_prms->e_search_complexity;
6082
6.36k
    end_of_frame = 0;
6083
6084
    /* This points to all the initial candts */
6085
6.36k
    ps_search_candts = &as_search_candts[0];
6086
6087
    /* mv grid being huge strucutre is part of context */
6088
6.36k
    aps_mv_grid[0] = &ps_ctxt->as_mv_grid[0];
6089
6.36k
    aps_mv_grid[1] = &ps_ctxt->as_mv_grid[1];
6090
6091
    /*************************************************************************/
6092
    /* If the current layer is encoded (since it may be multicast or final   */
6093
    /* layer (finest)), then we use 16x16 blk size with some selected parts  */
6094
    /* If the current layer is not encoded, then we use 8x8 blk size, with   */
6095
    /* enable or disable of 4x4 partitions depending on the input prms       */
6096
    /*************************************************************************/
6097
6.36k
    e_search_blk_size = BLK_16x16;
6098
6.36k
    blk_wd = blk_ht = 16;
6099
6.36k
    blk_size_shift = 4;
6100
6.36k
    e_result_blk_size = BLK_8x8;
6101
6.36k
    s_mv_update_prms.i4_shift = 1;
6102
6103
6.36k
    if(ps_coarse_layer->ps_layer_mvbank->e_blk_size == BLK_4x4)
6104
1.43k
    {
6105
1.43k
        blk_4x4_to_16x16 = 1;
6106
1.43k
    }
6107
4.93k
    else
6108
4.93k
    {
6109
4.93k
        blk_4x4_to_16x16 = 0;
6110
4.93k
    }
6111
6112
6.36k
    unit_size = 1 << ps_ctxt->log_ctb_size;
6113
6.36k
    s_search_prms_blk.i4_inp_stride = unit_size;
6114
6115
    /* This is required to properly update the layer mv bank */
6116
6.36k
    s_mv_update_prms.e_search_blk_size = e_search_blk_size;
6117
6.36k
    s_search_prms_blk.e_blk_size = e_search_blk_size;
6118
6119
    /*************************************************************************/
6120
    /* If current layer is explicit, then the number of ref frames are to    */
6121
    /* be same as previous layer. Else it will be 2                          */
6122
    /*************************************************************************/
6123
6.36k
    i4_num_ref_prev_layer = ps_coarse_layer->ps_layer_mvbank->i4_num_ref;
6124
6.36k
    i4_num_pred_dir =
6125
6.36k
        (ps_ctxt->s_frm_prms.bidir_enabled && (i4_num_act_ref_l0 > 0) && (i4_num_act_ref_l1 > 0)) +
6126
6.36k
        1;
6127
6128
6.36k
#if USE_MODIFIED == 1
6129
6.36k
    s_search_prms_blk.pf_mv_cost_compute = compute_mv_cost_implicit_high_speed_modified;
6130
#else
6131
    s_search_prms_blk.pf_mv_cost_compute = compute_mv_cost_implicit_high_speed;
6132
#endif
6133
6134
6.36k
    i4_num_pred_dir = MIN(i4_num_pred_dir, i4_num_ref_prev_layer);
6135
6.36k
    if(i4_num_ref_prev_layer <= 2)
6136
5.70k
    {
6137
5.70k
        i4_num_ref_each_dir = 1;
6138
5.70k
    }
6139
656
    else
6140
656
    {
6141
656
        i4_num_ref_each_dir = i4_num_ref_prev_layer >> 1;
6142
656
    }
6143
6144
6.36k
    s_mv_update_prms.i4_num_ref = i4_num_pred_dir;
6145
6.36k
    s_mv_update_prms.i4_num_results_to_store =
6146
6.36k
        MIN((ps_ctxt->s_frm_prms.bidir_enabled) ? ps_curr_layer->ps_layer_mvbank->i4_num_mvs_per_ref
6147
6.36k
                                                : (i4_num_act_ref_l0 > 1) + 1,
6148
6.36k
            ps_refine_prms->i4_num_results_per_part);
6149
6150
    /*************************************************************************/
6151
    /* Initialization of merge params for 16x16 to 32x32 merge.              */
6152
    /* There are 4 32x32 units in a CTB, so 4 param structures initialized   */
6153
    /*************************************************************************/
6154
6.36k
    {
6155
6.36k
        hme_merge_prms_t *aps_merge_prms[4];
6156
6.36k
        aps_merge_prms[0] = &s_merge_prms_32x32_tl;
6157
6.36k
        aps_merge_prms[1] = &s_merge_prms_32x32_tr;
6158
6.36k
        aps_merge_prms[2] = &s_merge_prms_32x32_bl;
6159
6.36k
        aps_merge_prms[3] = &s_merge_prms_32x32_br;
6160
31.8k
        for(i = 0; i < 4; i++)
6161
25.4k
        {
6162
25.4k
            hme_merge_prms_init(
6163
25.4k
                aps_merge_prms[i],
6164
25.4k
                ps_curr_layer,
6165
25.4k
                ps_refine_prms,
6166
25.4k
                ps_ctxt,
6167
25.4k
                as_range_prms_rec,
6168
25.4k
                as_range_prms_inp,
6169
25.4k
                &aps_mv_grid[0],
6170
25.4k
                &s_common_frm_prms,
6171
25.4k
                i4_num_pred_dir,
6172
25.4k
                i,
6173
25.4k
                BLK_32x32,
6174
25.4k
                e_me_quality_presets);
6175
25.4k
        }
6176
6.36k
    }
6177
6178
    /*************************************************************************/
6179
    /* Initialization of merge params for 32x32 to 64x64 merge.              */
6180
    /* There are 4 32x32 units in a CTB, so only 1 64x64 CU can be in CTB    */
6181
    /*************************************************************************/
6182
6.36k
    {
6183
6.36k
        hme_merge_prms_init(
6184
6.36k
            &s_merge_prms_64x64,
6185
6.36k
            ps_curr_layer,
6186
6.36k
            ps_refine_prms,
6187
6.36k
            ps_ctxt,
6188
6.36k
            as_range_prms_rec,
6189
6.36k
            as_range_prms_inp,
6190
6.36k
            &aps_mv_grid[0],
6191
6.36k
            &s_common_frm_prms,
6192
6.36k
            i4_num_pred_dir,
6193
6.36k
            0,
6194
6.36k
            BLK_64x64,
6195
6.36k
            e_me_quality_presets);
6196
6.36k
    }
6197
6198
    /* Pointers to cu_results are initialised here */
6199
6.36k
    {
6200
6.36k
        WORD32 i;
6201
6202
6.36k
        ps_ctxt->s_search_results_64x64.ps_cu_results = &ps_ctxt->s_cu64x64_results;
6203
6204
31.8k
        for(i = 0; i < 4; i++)
6205
25.4k
        {
6206
25.4k
            ps_ctxt->as_search_results_32x32[i].ps_cu_results = &ps_ctxt->as_cu32x32_results[i];
6207
25.4k
        }
6208
6209
108k
        for(i = 0; i < 16; i++)
6210
101k
        {
6211
101k
            ps_ctxt->as_search_results_16x16[i].ps_cu_results = &ps_ctxt->as_cu16x16_results[i];
6212
101k
        }
6213
6.36k
    }
6214
6215
    /*************************************************************************/
6216
    /* SUBPEL Params initialized here                                        */
6217
    /*************************************************************************/
6218
6.36k
    {
6219
6.36k
        s_subpel_prms.ps_search_results_16x16 = &ps_ctxt->as_search_results_16x16[0];
6220
6.36k
        s_subpel_prms.ps_search_results_32x32 = &ps_ctxt->as_search_results_32x32[0];
6221
6.36k
        s_subpel_prms.ps_search_results_64x64 = &ps_ctxt->s_search_results_64x64;
6222
6223
6.36k
        s_subpel_prms.i4_num_16x16_candts = ps_refine_prms->i4_num_fpel_results;
6224
6.36k
        s_subpel_prms.i4_num_32x32_candts = ps_refine_prms->i4_num_32x32_merge_results;
6225
6.36k
        s_subpel_prms.i4_num_64x64_candts = ps_refine_prms->i4_num_64x64_merge_results;
6226
6227
6.36k
        s_subpel_prms.i4_num_steps_hpel_refine = ps_refine_prms->i4_num_steps_hpel_refine;
6228
6.36k
        s_subpel_prms.i4_num_steps_qpel_refine = ps_refine_prms->i4_num_steps_qpel_refine;
6229
6230
6.36k
        s_subpel_prms.i4_use_satd = ps_refine_prms->i4_use_satd_subpel;
6231
6232
6.36k
        s_subpel_prms.i4_inp_stride = unit_size;
6233
6234
6.36k
        s_subpel_prms.u1_max_subpel_candts_2Nx2N = ps_refine_prms->u1_max_subpel_candts_2Nx2N;
6235
6.36k
        s_subpel_prms.u1_max_subpel_candts_NxN = ps_refine_prms->u1_max_subpel_candts_NxN;
6236
6.36k
        s_subpel_prms.u1_subpel_candt_threshold = ps_refine_prms->u1_subpel_candt_threshold;
6237
6238
6.36k
        s_subpel_prms.pf_qpel_interp = ps_me_optimised_function_list->pf_qpel_interp_avg_generic;
6239
6240
6.36k
        {
6241
6.36k
            WORD32 ref_ctr;
6242
82.7k
            for(ref_ctr = 0; ref_ctr < MAX_NUM_REF; ref_ctr++)
6243
76.3k
            {
6244
76.3k
                s_subpel_prms.aps_mv_range_hpel[ref_ctr] = &as_range_prms_hpel[ref_ctr];
6245
76.3k
                s_subpel_prms.aps_mv_range_qpel[ref_ctr] = &as_range_prms_qpel[ref_ctr];
6246
76.3k
            }
6247
6.36k
        }
6248
6.36k
        s_subpel_prms.pi2_inp_bck = ps_ctxt->pi2_inp_bck;
6249
6250
#if USE_MODIFIED == 0
6251
        s_subpel_prms.pf_mv_cost_compute = compute_mv_cost_implicit_high_speed;
6252
#else
6253
6.36k
        s_subpel_prms.pf_mv_cost_compute = compute_mv_cost_implicit_high_speed_modified;
6254
6.36k
#endif
6255
6.36k
        s_subpel_prms.e_me_quality_presets = e_me_quality_presets;
6256
6257
        /* BI Refinement done only if this field is 1 */
6258
6.36k
        s_subpel_prms.bidir_enabled = ps_refine_prms->bidir_enabled;
6259
6260
6.36k
        s_subpel_prms.u1_num_ref = ps_ctxt->num_ref_future + ps_ctxt->num_ref_past;
6261
6262
6.36k
        s_subpel_prms.i4_num_act_ref_l0 = ps_ctxt->s_frm_prms.u1_num_active_ref_l0;
6263
6.36k
        s_subpel_prms.i4_num_act_ref_l1 = ps_ctxt->s_frm_prms.u1_num_active_ref_l1;
6264
6.36k
        s_subpel_prms.u1_max_num_subpel_refine_centers =
6265
6.36k
            ps_refine_prms->u1_max_num_subpel_refine_centers;
6266
6.36k
    }
6267
6268
    /* inter_ctb_prms_t struct initialisation */
6269
6.36k
    {
6270
6.36k
        inter_ctb_prms_t *ps_inter_ctb_prms = &s_common_frm_prms;
6271
6.36k
        hme_subpel_prms_t *ps_subpel_prms = &s_subpel_prms;
6272
6273
6.36k
        ps_inter_ctb_prms->pps_rec_list_l0 = ps_ctxt->ps_hme_ref_map->pps_rec_list_l0;
6274
6.36k
        ps_inter_ctb_prms->pps_rec_list_l1 = ps_ctxt->ps_hme_ref_map->pps_rec_list_l1;
6275
6.36k
        ps_inter_ctb_prms->wpred_log_wdc = ps_ctxt->s_wt_pred.wpred_log_wdc;
6276
6.36k
        ps_inter_ctb_prms->u1_max_tr_depth = ps_thrd_ctxt->s_init_prms.u1_max_tr_depth;
6277
6.36k
        ps_inter_ctb_prms->i1_quality_preset = e_me_quality_presets;
6278
6.36k
        ps_inter_ctb_prms->i4_bidir_enabled = ps_subpel_prms->bidir_enabled;
6279
6.36k
        ps_inter_ctb_prms->i4_inp_stride = ps_subpel_prms->i4_inp_stride;
6280
6.36k
        ps_inter_ctb_prms->u1_num_ref = ps_subpel_prms->u1_num_ref;
6281
6.36k
        ps_inter_ctb_prms->u1_use_satd = ps_subpel_prms->i4_use_satd;
6282
6.36k
        ps_inter_ctb_prms->i4_rec_stride = ps_curr_layer->i4_rec_stride;
6283
6.36k
        ps_inter_ctb_prms->u1_num_active_ref_l0 = ps_ctxt->s_frm_prms.u1_num_active_ref_l0;
6284
6.36k
        ps_inter_ctb_prms->u1_num_active_ref_l1 = ps_ctxt->s_frm_prms.u1_num_active_ref_l1;
6285
6.36k
        ps_inter_ctb_prms->i4_lamda = lambda_recon;
6286
6.36k
        ps_inter_ctb_prms->u1_lamda_qshift = ps_refine_prms->lambda_q_shift;
6287
6.36k
        ps_inter_ctb_prms->i4_qstep_ls8 = ps_ctxt->ps_hme_frm_prms->qstep_ls8;
6288
6.36k
        ps_inter_ctb_prms->pi4_inv_wt = ps_ctxt->s_wt_pred.a_inv_wpred_wt;
6289
6.36k
        ps_inter_ctb_prms->pi1_past_list = ps_ctxt->ai1_past_list;
6290
6.36k
        ps_inter_ctb_prms->pi1_future_list = ps_ctxt->ai1_future_list;
6291
6.36k
        ps_inter_ctb_prms->pu4_src_variance = s_search_prms_blk.au4_src_variance;
6292
6.36k
        ps_inter_ctb_prms->u1_max_2nx2n_tu_recur_cands =
6293
6.36k
            ps_refine_prms->u1_max_2nx2n_tu_recur_cands;
6294
6.36k
    }
6295
6296
388k
    for(i = 0; i < MAX_INIT_CANDTS; i++)
6297
381k
    {
6298
381k
        ps_search_candts[i].ps_search_node = &ps_ctxt->s_init_search_node[i];
6299
381k
        ps_search_candts[i].ps_search_node->ps_mv = &ps_ctxt->as_search_cand_mv[i];
6300
6301
381k
        INIT_SEARCH_NODE(ps_search_candts[i].ps_search_node, 0);
6302
381k
    }
6303
6.36k
    num_act_ref_pics =
6304
6.36k
        ps_ctxt->s_frm_prms.u1_num_active_ref_l0 + ps_ctxt->s_frm_prms.u1_num_active_ref_l1;
6305
6306
6.36k
    if(num_act_ref_pics)
6307
6.36k
    {
6308
6.36k
        hme_search_cand_data_init(
6309
6.36k
            ai4_id_Z,
6310
6.36k
            ai4_id_coloc,
6311
6.36k
            ai4_num_coloc_cands,
6312
6.36k
            au1_search_candidate_list_index,
6313
6.36k
            i4_num_act_ref_l0,
6314
6.36k
            i4_num_act_ref_l1,
6315
6.36k
            ps_ctxt->s_frm_prms.bidir_enabled,
6316
6.36k
            blk_4x4_to_16x16);
6317
6.36k
    }
6318
6319
6.36k
    if(!ps_ctxt->s_frm_prms.bidir_enabled && (i4_num_act_ref_l0 > 1))
6320
3.85k
    {
6321
3.85k
        ps_search_candts[ai4_id_Z[0]].ps_search_node->i1_ref_idx = ps_ctxt->ai1_past_list[0];
6322
3.85k
        ps_search_candts[ai4_id_Z[1]].ps_search_node->i1_ref_idx = ps_ctxt->ai1_past_list[1];
6323
3.85k
    }
6324
2.50k
    else if(!ps_ctxt->s_frm_prms.bidir_enabled && (i4_num_act_ref_l0 == 1))
6325
1.52k
    {
6326
1.52k
        ps_search_candts[ai4_id_Z[0]].ps_search_node->i1_ref_idx = ps_ctxt->ai1_past_list[0];
6327
1.52k
    }
6328
6329
25.4k
    for(i = 0; i < 3; i++)
6330
19.0k
    {
6331
19.0k
        search_node_t *ps_search_node;
6332
19.0k
        ps_search_node = &as_left_neighbours[i];
6333
19.0k
        INIT_SEARCH_NODE(ps_search_node, 0);
6334
19.0k
        ps_search_node = &as_top_neighbours[i];
6335
19.0k
        INIT_SEARCH_NODE(ps_search_node, 0);
6336
19.0k
    }
6337
6338
6.36k
    INIT_SEARCH_NODE(&as_top_neighbours[3], 0);
6339
6.36k
    as_left_neighbours[2].u1_is_avail = 0;
6340
6341
    /*************************************************************************/
6342
    /* Initialize all the search results structure here. We update all the   */
6343
    /* search results to default values, and configure things like blk sizes */
6344
    /*************************************************************************/
6345
6.36k
    if(num_act_ref_pics)
6346
6.36k
    {
6347
6.36k
        S32 i4_x, i4_y;
6348
        /* 16x16 results */
6349
108k
        for(i = 0; i < 16; i++)
6350
101k
        {
6351
101k
            search_results_t *ps_search_results;
6352
101k
            S32 pred_lx;
6353
101k
            ps_search_results = &ps_ctxt->as_search_results_16x16[i];
6354
101k
            i4_x = (S32)gau1_encode_to_raster_x[i];
6355
101k
            i4_y = (S32)gau1_encode_to_raster_y[i];
6356
101k
            i4_x <<= 4;
6357
101k
            i4_y <<= 4;
6358
6359
101k
            hme_init_search_results(
6360
101k
                ps_search_results,
6361
101k
                i4_num_pred_dir,
6362
101k
                ps_refine_prms->i4_num_fpel_results,
6363
101k
                ps_refine_prms->i4_num_results_per_part,
6364
101k
                e_search_blk_size,
6365
101k
                i4_x,
6366
101k
                i4_y,
6367
101k
                &ps_ctxt->au1_is_past[0]);
6368
6369
305k
            for(pred_lx = 0; pred_lx < 2; pred_lx++)
6370
203k
            {
6371
203k
                pred_ctxt_t *ps_pred_ctxt;
6372
6373
203k
                ps_pred_ctxt = &ps_search_results->as_pred_ctxt[pred_lx];
6374
6375
203k
                hme_init_pred_ctxt_encode(
6376
203k
                    ps_pred_ctxt,
6377
203k
                    ps_search_results,
6378
203k
                    ps_search_candts[ai4_id_coloc[0]].ps_search_node,
6379
203k
                    ps_search_candts[ai4_id_Z[0]].ps_search_node,
6380
203k
                    aps_mv_grid[pred_lx],
6381
203k
                    pred_lx,
6382
203k
                    lambda_recon,
6383
203k
                    ps_refine_prms->lambda_q_shift,
6384
203k
                    &ps_ctxt->apu1_ref_bits_tlu_lc[0],
6385
203k
                    &ps_ctxt->ai2_ref_scf[0]);
6386
203k
            }
6387
101k
        }
6388
6389
31.8k
        for(i = 0; i < 4; i++)
6390
25.4k
        {
6391
25.4k
            search_results_t *ps_search_results;
6392
25.4k
            S32 pred_lx;
6393
25.4k
            ps_search_results = &ps_ctxt->as_search_results_32x32[i];
6394
6395
25.4k
            i4_x = (S32)gau1_encode_to_raster_x[i];
6396
25.4k
            i4_y = (S32)gau1_encode_to_raster_y[i];
6397
25.4k
            i4_x <<= 5;
6398
25.4k
            i4_y <<= 5;
6399
6400
25.4k
            hme_init_search_results(
6401
25.4k
                ps_search_results,
6402
25.4k
                i4_num_pred_dir,
6403
25.4k
                ps_refine_prms->i4_num_32x32_merge_results,
6404
25.4k
                ps_refine_prms->i4_num_results_per_part,
6405
25.4k
                BLK_32x32,
6406
25.4k
                i4_x,
6407
25.4k
                i4_y,
6408
25.4k
                &ps_ctxt->au1_is_past[0]);
6409
6410
76.3k
            for(pred_lx = 0; pred_lx < 2; pred_lx++)
6411
50.9k
            {
6412
50.9k
                pred_ctxt_t *ps_pred_ctxt;
6413
6414
50.9k
                ps_pred_ctxt = &ps_search_results->as_pred_ctxt[pred_lx];
6415
6416
50.9k
                hme_init_pred_ctxt_encode(
6417
50.9k
                    ps_pred_ctxt,
6418
50.9k
                    ps_search_results,
6419
50.9k
                    ps_search_candts[ai4_id_coloc[0]].ps_search_node,
6420
50.9k
                    ps_search_candts[ai4_id_Z[0]].ps_search_node,
6421
50.9k
                    aps_mv_grid[pred_lx],
6422
50.9k
                    pred_lx,
6423
50.9k
                    lambda_recon,
6424
50.9k
                    ps_refine_prms->lambda_q_shift,
6425
50.9k
                    &ps_ctxt->apu1_ref_bits_tlu_lc[0],
6426
50.9k
                    &ps_ctxt->ai2_ref_scf[0]);
6427
50.9k
            }
6428
25.4k
        }
6429
6430
6.36k
        {
6431
6.36k
            search_results_t *ps_search_results;
6432
6.36k
            S32 pred_lx;
6433
6.36k
            ps_search_results = &ps_ctxt->s_search_results_64x64;
6434
6435
6.36k
            hme_init_search_results(
6436
6.36k
                ps_search_results,
6437
6.36k
                i4_num_pred_dir,
6438
6.36k
                ps_refine_prms->i4_num_64x64_merge_results,
6439
6.36k
                ps_refine_prms->i4_num_results_per_part,
6440
6.36k
                BLK_64x64,
6441
6.36k
                0,
6442
6.36k
                0,
6443
6.36k
                &ps_ctxt->au1_is_past[0]);
6444
6445
19.0k
            for(pred_lx = 0; pred_lx < 2; pred_lx++)
6446
12.7k
            {
6447
12.7k
                pred_ctxt_t *ps_pred_ctxt;
6448
6449
12.7k
                ps_pred_ctxt = &ps_search_results->as_pred_ctxt[pred_lx];
6450
6451
12.7k
                hme_init_pred_ctxt_encode(
6452
12.7k
                    ps_pred_ctxt,
6453
12.7k
                    ps_search_results,
6454
12.7k
                    ps_search_candts[ai4_id_coloc[0]].ps_search_node,
6455
12.7k
                    ps_search_candts[ai4_id_Z[0]].ps_search_node,
6456
12.7k
                    aps_mv_grid[pred_lx],
6457
12.7k
                    pred_lx,
6458
12.7k
                    lambda_recon,
6459
12.7k
                    ps_refine_prms->lambda_q_shift,
6460
12.7k
                    &ps_ctxt->apu1_ref_bits_tlu_lc[0],
6461
12.7k
                    &ps_ctxt->ai2_ref_scf[0]);
6462
12.7k
            }
6463
6.36k
        }
6464
6.36k
    }
6465
6466
    /* Initialise the structure used in clustering  */
6467
6.36k
    if(ME_PRISTINE_QUALITY == e_me_quality_presets)
6468
816
    {
6469
816
        ps_ctb_cluster_info = ps_ctxt->ps_ctb_cluster_info;
6470
6471
816
        ps_ctb_cluster_info->ps_16x16_blk = ps_ctxt->ps_blk_16x16;
6472
816
        ps_ctb_cluster_info->ps_32x32_blk = ps_ctxt->ps_blk_32x32;
6473
816
        ps_ctb_cluster_info->ps_64x64_blk = ps_ctxt->ps_blk_64x64;
6474
816
        ps_ctb_cluster_info->pi4_blk_8x8_mask = ai4_blk_8x8_mask;
6475
816
        ps_ctb_cluster_info->sdi_threshold = ps_refine_prms->sdi_threshold;
6476
816
        ps_ctb_cluster_info->i4_frame_qstep = ps_ctxt->frm_qstep;
6477
816
        ps_ctb_cluster_info->i4_frame_qstep_multiplier = 16;
6478
816
    }
6479
6480
    /*********************************************************************/
6481
    /* Initialize the dyn. search range params. for each reference index */
6482
    /* in current layer ctxt                                             */
6483
    /*********************************************************************/
6484
6485
    /* Only for P pic. For P, both are 0, I&B has them mut. exclusive */
6486
6.36k
    if(ps_ctxt->s_frm_prms.is_i_pic == ps_ctxt->s_frm_prms.bidir_enabled)
6487
5.38k
    {
6488
5.38k
        WORD32 ref_ctr;
6489
        /* set no. of act ref in L0 for further use at frame level */
6490
5.38k
        ps_ctxt->as_l0_dyn_range_prms[i4_idx_dvsr_p].i4_num_act_ref_in_l0 =
6491
5.38k
            ps_ctxt->s_frm_prms.u1_num_active_ref_l0;
6492
6493
15.4k
        for(ref_ctr = 0; ref_ctr < ps_ctxt->s_frm_prms.u1_num_active_ref_l0; ref_ctr++)
6494
10.0k
        {
6495
10.0k
            INIT_DYN_SEARCH_PRMS(
6496
10.0k
                &ps_ctxt->as_l0_dyn_range_prms[i4_idx_dvsr_p].as_dyn_range_prms[ref_ctr],
6497
10.0k
                ps_ctxt->ai4_ref_idx_to_poc_lc[ref_ctr]);
6498
10.0k
        }
6499
5.38k
    }
6500
    /*************************************************************************/
6501
    /* Now that the candidates have been ordered, to choose the right number */
6502
    /* of initial candidates.                                                */
6503
    /*************************************************************************/
6504
6.36k
    if(blk_4x4_to_16x16)
6505
1.43k
    {
6506
1.43k
        if(i4_num_ref_prev_layer > 2)
6507
524
        {
6508
524
            if(e_search_complexity == SEARCH_CX_LOW)
6509
0
                num_init_candts = 7 * (!ps_ctxt->s_frm_prms.bidir_enabled + 1);
6510
524
            else if(e_search_complexity == SEARCH_CX_MED)
6511
524
                num_init_candts = 14 * (!ps_ctxt->s_frm_prms.bidir_enabled + 1);
6512
0
            else if(e_search_complexity == SEARCH_CX_HIGH)
6513
0
                num_init_candts = 21 * (!ps_ctxt->s_frm_prms.bidir_enabled + 1);
6514
0
            else
6515
0
                ASSERT(0);
6516
524
        }
6517
910
        else if(i4_num_ref_prev_layer == 2)
6518
617
        {
6519
617
            if(e_search_complexity == SEARCH_CX_LOW)
6520
0
                num_init_candts = 5 * (!ps_ctxt->s_frm_prms.bidir_enabled + 1);
6521
617
            else if(e_search_complexity == SEARCH_CX_MED)
6522
617
                num_init_candts = 12 * (!ps_ctxt->s_frm_prms.bidir_enabled + 1);
6523
0
            else if(e_search_complexity == SEARCH_CX_HIGH)
6524
0
                num_init_candts = 19 * (!ps_ctxt->s_frm_prms.bidir_enabled + 1);
6525
0
            else
6526
0
                ASSERT(0);
6527
617
        }
6528
293
        else
6529
293
        {
6530
293
            if(e_search_complexity == SEARCH_CX_LOW)
6531
0
                num_init_candts = 5;
6532
293
            else if(e_search_complexity == SEARCH_CX_MED)
6533
293
                num_init_candts = 12;
6534
0
            else if(e_search_complexity == SEARCH_CX_HIGH)
6535
0
                num_init_candts = 19;
6536
0
            else
6537
0
                ASSERT(0);
6538
293
        }
6539
1.43k
    }
6540
4.93k
    else
6541
4.93k
    {
6542
4.93k
        if(i4_num_ref_prev_layer > 2)
6543
132
        {
6544
132
            if(e_search_complexity == SEARCH_CX_LOW)
6545
0
                num_init_candts = 7 * (!ps_ctxt->s_frm_prms.bidir_enabled + 1);
6546
132
            else if(e_search_complexity == SEARCH_CX_MED)
6547
132
                num_init_candts = 13 * (!ps_ctxt->s_frm_prms.bidir_enabled + 1);
6548
0
            else if(e_search_complexity == SEARCH_CX_HIGH)
6549
0
                num_init_candts = 18 * (!ps_ctxt->s_frm_prms.bidir_enabled + 1);
6550
0
            else
6551
0
                ASSERT(0);
6552
132
        }
6553
4.79k
        else if(i4_num_ref_prev_layer == 2)
6554
3.55k
        {
6555
3.55k
            if(e_search_complexity == SEARCH_CX_LOW)
6556
578
                num_init_candts = 5 * (!ps_ctxt->s_frm_prms.bidir_enabled + 1);
6557
2.97k
            else if(e_search_complexity == SEARCH_CX_MED)
6558
2.97k
                num_init_candts = 11 * (!ps_ctxt->s_frm_prms.bidir_enabled + 1);
6559
0
            else if(e_search_complexity == SEARCH_CX_HIGH)
6560
0
                num_init_candts = 16 * (!ps_ctxt->s_frm_prms.bidir_enabled + 1);
6561
0
            else
6562
0
                ASSERT(0);
6563
3.55k
        }
6564
1.24k
        else
6565
1.24k
        {
6566
1.24k
            if(e_search_complexity == SEARCH_CX_LOW)
6567
848
                num_init_candts = 5;
6568
395
            else if(e_search_complexity == SEARCH_CX_MED)
6569
395
                num_init_candts = 11;
6570
0
            else if(e_search_complexity == SEARCH_CX_HIGH)
6571
0
                num_init_candts = 16;
6572
0
            else
6573
0
                ASSERT(0);
6574
1.24k
        }
6575
4.93k
    }
6576
6577
    /*************************************************************************/
6578
    /* The following search parameters are fixed throughout the search across*/
6579
    /* all blks. So these are configured outside processing loop             */
6580
    /*************************************************************************/
6581
6.36k
    s_search_prms_blk.i4_num_init_candts = num_init_candts;
6582
6.36k
    s_search_prms_blk.i4_start_step = 1;
6583
6.36k
    s_search_prms_blk.i4_use_satd = 0;
6584
6.36k
    s_search_prms_blk.i4_num_steps_post_refine = ps_refine_prms->i4_num_steps_post_refine_fpel;
6585
    /* we use recon only for encoded layers, otherwise it is not available */
6586
6.36k
    s_search_prms_blk.i4_use_rec = ps_refine_prms->i4_encode & ps_refine_prms->i4_use_rec_in_fpel;
6587
6588
6.36k
    s_search_prms_blk.ps_search_candts = ps_search_candts;
6589
6.36k
    if(s_search_prms_blk.i4_use_rec)
6590
6.36k
    {
6591
6.36k
        WORD32 ref_ctr;
6592
82.7k
        for(ref_ctr = 0; ref_ctr < MAX_NUM_REF; ref_ctr++)
6593
76.3k
            s_search_prms_blk.aps_mv_range[ref_ctr] = &as_range_prms_rec[ref_ctr];
6594
6.36k
    }
6595
0
    else
6596
0
    {
6597
0
        WORD32 ref_ctr;
6598
0
        for(ref_ctr = 0; ref_ctr < MAX_NUM_REF; ref_ctr++)
6599
0
            s_search_prms_blk.aps_mv_range[ref_ctr] = &as_range_prms_inp[ref_ctr];
6600
0
    }
6601
6602
    /*************************************************************************/
6603
    /* Initialize coordinates. Meaning as follows                            */
6604
    /* blk_x : x coordinate of the 16x16 blk, in terms of number of blks     */
6605
    /* blk_y : same as above, y coord.                                       */
6606
    /* num_blks_in_this_ctb : number of blks in this given ctb that starts   */
6607
    /* at i4_ctb_x, i4_ctb_y. This may not be 16 at picture boundaries.      */
6608
    /* i4_ctb_x, i4_ctb_y: pixel coordinate of the ctb realtive to top left  */
6609
    /* corner of the picture. Always multiple of 64.                         */
6610
    /* blk_id_in_ctb : encode order id of the blk in the ctb.                */
6611
    /*************************************************************************/
6612
6.36k
    blk_y = 0;
6613
6.36k
    blk_id_in_ctb = 0;
6614
6.36k
    i4_ctb_y = 0;
6615
6616
    /*************************************************************************/
6617
    /* Picture limit on all 4 sides. This will be used to set mv limits for  */
6618
    /* every block given its coordinate. Note thsi assumes that the min amt  */
6619
    /* of padding to right of pic is equal to the blk size. If we go all the */
6620
    /* way upto 64x64, then the min padding on right size of picture should  */
6621
    /* be 64, and also on bottom side of picture.                            */
6622
    /*************************************************************************/
6623
6.36k
    SET_PIC_LIMIT(
6624
6.36k
        s_pic_limit_inp,
6625
6.36k
        ps_curr_layer->i4_pad_x_rec,
6626
6.36k
        ps_curr_layer->i4_pad_y_rec,
6627
6.36k
        ps_curr_layer->i4_wd,
6628
6.36k
        ps_curr_layer->i4_ht,
6629
6.36k
        s_search_prms_blk.i4_num_steps_post_refine);
6630
6631
6.36k
    SET_PIC_LIMIT(
6632
6.36k
        s_pic_limit_rec,
6633
6.36k
        ps_curr_layer->i4_pad_x_rec,
6634
6.36k
        ps_curr_layer->i4_pad_y_rec,
6635
6.36k
        ps_curr_layer->i4_wd,
6636
6.36k
        ps_curr_layer->i4_ht,
6637
6.36k
        s_search_prms_blk.i4_num_steps_post_refine);
6638
6639
    /*************************************************************************/
6640
    /* set the MV limit per ref. pic.                                        */
6641
    /*    - P pic. : Based on the config params.                             */
6642
    /*    - B/b pic: Based on the Max/Min MV from prev. P and config. param. */
6643
    /*************************************************************************/
6644
6.36k
    hme_set_mv_limit_using_dvsr_data(
6645
6.36k
        ps_ctxt, ps_curr_layer, as_mv_limit, &i2_prev_enc_frm_max_mv_y, num_act_ref_pics);
6646
6.36k
    s_srch_cand_init_data.pu1_num_fpel_search_cands = ps_refine_prms->au1_num_fpel_search_cands;
6647
6.36k
    s_srch_cand_init_data.i4_num_act_ref_l0 = ps_ctxt->s_frm_prms.u1_num_active_ref_l0;
6648
6.36k
    s_srch_cand_init_data.i4_num_act_ref_l1 = ps_ctxt->s_frm_prms.u1_num_active_ref_l1;
6649
6.36k
    s_srch_cand_init_data.ps_coarse_layer = ps_coarse_layer;
6650
6.36k
    s_srch_cand_init_data.ps_curr_layer = ps_curr_layer;
6651
6.36k
    s_srch_cand_init_data.i4_max_num_init_cands = num_init_candts;
6652
6.36k
    s_srch_cand_init_data.ps_search_cands = ps_search_candts;
6653
6.36k
    s_srch_cand_init_data.u1_num_results_in_mvbank = s_mv_update_prms.i4_num_results_to_store;
6654
6.36k
    s_srch_cand_init_data.pi4_ref_id_lc_to_l0_map = ps_ctxt->a_ref_idx_lc_to_l0;
6655
6.36k
    s_srch_cand_init_data.pi4_ref_id_lc_to_l1_map = ps_ctxt->a_ref_idx_lc_to_l1;
6656
6.36k
    s_srch_cand_init_data.e_search_blk_size = e_search_blk_size;
6657
6658
22.4k
    while(0 == end_of_frame)
6659
16.1k
    {
6660
16.1k
        job_queue_t *ps_job;
6661
16.1k
        frm_ctb_ctxt_t *ps_frm_ctb_prms;
6662
16.1k
        ipe_l0_ctb_analyse_for_me_t *ps_cur_ipe_ctb;
6663
6664
16.1k
        WORD32 i4_max_mv_x_in_ctb;
6665
16.1k
        WORD32 i4_max_mv_y_in_ctb;
6666
16.1k
        void *pv_dep_mngr_encloop_dep_me;
6667
16.1k
        WORD32 offset_val, check_dep_pos, set_dep_pos;
6668
16.1k
        WORD32 left_ctb_in_diff_tile, i4_first_ctb_x = 0;
6669
6670
16.1k
        pv_dep_mngr_encloop_dep_me = ps_ctxt->pv_dep_mngr_encloop_dep_me;
6671
6672
16.1k
        ps_frm_ctb_prms = (frm_ctb_ctxt_t *)ps_thrd_ctxt->pv_ext_frm_prms;
6673
6674
        /* Get the current row from the job queue */
6675
16.1k
        ps_job = (job_queue_t *)ihevce_enc_grp_get_next_job(
6676
16.1k
            ps_multi_thrd_ctxt, lyr_job_type, 1, me_frm_id);
6677
6678
        /* If all rows are done, set the end of process flag to 1, */
6679
        /* and the current row to -1 */
6680
16.1k
        if(NULL == ps_job)
6681
6.36k
        {
6682
6.36k
            blk_y = -1;
6683
6.36k
            i4_ctb_y = -1;
6684
6.36k
            tile_col_idx = -1;
6685
6.36k
            end_of_frame = 1;
6686
6687
6.36k
            continue;
6688
6.36k
        }
6689
6690
        /* set the output dependency after picking up the row */
6691
9.74k
        ihevce_enc_grp_job_set_out_dep(ps_multi_thrd_ctxt, ps_job, me_frm_id);
6692
6693
        /* Obtain the current row's details from the job */
6694
9.74k
        {
6695
9.74k
            ihevce_tile_params_t *ps_col_tile_params;
6696
6697
9.74k
            i4_ctb_y = ps_job->s_job_info.s_me_job_info.i4_vert_unit_row_no;
6698
            /* Obtain the current colum tile index from the job */
6699
9.74k
            tile_col_idx = ps_job->s_job_info.s_me_job_info.i4_tile_col_idx;
6700
6701
            /* in encode layer block are 16x16 and CTB is 64 x 64 */
6702
            /* note if ctb is 32x32 the this calc needs to be changed */
6703
9.74k
            num_sync_units_in_row = (i4_pic_wd + ((1 << ps_ctxt->log_ctb_size) - 1)) >>
6704
9.74k
                                    ps_ctxt->log_ctb_size;
6705
6706
            /* The tile parameter for the col. idx. Use only the properties
6707
            which is same for all the bottom tiles like width, start_x, etc.
6708
            Don't use height, start_y, etc.                                  */
6709
9.74k
            ps_col_tile_params =
6710
9.74k
                ((ihevce_tile_params_t *)ps_thrd_ctxt->pv_tile_params_base + tile_col_idx);
6711
            /* in encode layer block are 16x16 and CTB is 64 x 64 */
6712
            /* note if ctb is 32x32 the this calc needs to be changed */
6713
9.74k
            num_sync_units_in_tile =
6714
9.74k
                (ps_col_tile_params->i4_curr_tile_width + ((1 << ps_ctxt->log_ctb_size) - 1)) >>
6715
9.74k
                ps_ctxt->log_ctb_size;
6716
6717
9.74k
            i4_first_ctb_x = ps_col_tile_params->i4_first_ctb_x;
6718
9.74k
            i4_ctb_x = i4_first_ctb_x;
6719
6720
9.74k
            if(!num_act_ref_pics)
6721
0
            {
6722
0
                for(i4_ctb_x = i4_first_ctb_x;
6723
0
                    i4_ctb_x < (ps_col_tile_params->i4_first_ctb_x + num_sync_units_in_tile);
6724
0
                    i4_ctb_x++)
6725
0
                {
6726
0
                    S32 blk_i = 0, blk_j = 0;
6727
                    /* set the dependency for the corresponding row in enc loop */
6728
0
                    ihevce_dmgr_set_row_row_sync(
6729
0
                        pv_dep_mngr_encloop_dep_me,
6730
0
                        (i4_ctb_x + 1),
6731
0
                        i4_ctb_y,
6732
0
                        tile_col_idx /* Col Tile No. */);
6733
0
                }
6734
6735
0
                continue;
6736
0
            }
6737
6738
            /* increment the number of rows proc */
6739
9.74k
            num_rows_proc++;
6740
6741
            /* Set Variables for Dep. Checking and Setting */
6742
9.74k
            set_dep_pos = i4_ctb_y + 1;
6743
9.74k
            if(i4_ctb_y > 0)
6744
3.37k
            {
6745
3.37k
                offset_val = 2;
6746
3.37k
                check_dep_pos = i4_ctb_y - 1;
6747
3.37k
            }
6748
6.36k
            else
6749
6.36k
            {
6750
                /* First row should run without waiting */
6751
6.36k
                offset_val = -1;
6752
6.36k
                check_dep_pos = 0;
6753
6.36k
            }
6754
6755
            /* row ctb out pointer  */
6756
9.74k
            ps_ctxt->ps_ctb_analyse_curr_row =
6757
9.74k
                ps_ctxt->ps_ctb_analyse_base + i4_ctb_y * ps_frm_ctb_prms->i4_num_ctbs_horz;
6758
6759
            /* Row level CU Tree buffer */
6760
9.74k
            ps_ctxt->ps_cu_tree_curr_row =
6761
9.74k
                ps_ctxt->ps_cu_tree_base +
6762
9.74k
                i4_ctb_y * ps_frm_ctb_prms->i4_num_ctbs_horz * MAX_NUM_NODES_CU_TREE;
6763
6764
9.74k
            ps_ctxt->ps_me_ctb_data_curr_row =
6765
9.74k
                ps_ctxt->ps_me_ctb_data_base + i4_ctb_y * ps_frm_ctb_prms->i4_num_ctbs_horz;
6766
9.74k
        }
6767
6768
        /* This flag says the CTB under processing is at the start of tile in horz dir.*/
6769
0
        left_ctb_in_diff_tile = 1;
6770
6771
        /* To make sure no 64-bit overflow happens when inv_wt is multiplied with un-normalized src_var,                                 */
6772
        /* the shift value will be passed onto the functions wherever inv_wt isused so that inv_wt is appropriately shift and multiplied */
6773
9.74k
        {
6774
9.74k
            S32 i4_ref_id, i4_bits_req;
6775
6776
28.1k
            for(i4_ref_id = 0; i4_ref_id < (ps_ctxt->s_frm_prms.u1_num_active_ref_l0 +
6777
28.1k
                                            ps_ctxt->s_frm_prms.u1_num_active_ref_l1);
6778
18.3k
                i4_ref_id++)
6779
18.3k
            {
6780
18.3k
                GETRANGE(i4_bits_req, ps_ctxt->s_wt_pred.a_inv_wpred_wt[i4_ref_id]);
6781
6782
18.3k
                if(i4_bits_req > 12)
6783
0
                {
6784
0
                    ps_ctxt->s_wt_pred.ai4_shift_val[i4_ref_id] = (i4_bits_req - 12);
6785
0
                }
6786
18.3k
                else
6787
18.3k
                {
6788
18.3k
                    ps_ctxt->s_wt_pred.ai4_shift_val[i4_ref_id] = 0;
6789
18.3k
                }
6790
18.3k
            }
6791
6792
9.74k
            s_common_frm_prms.pi4_inv_wt_shift_val = ps_ctxt->s_wt_pred.ai4_shift_val;
6793
9.74k
        }
6794
6795
        /* if non-encode layer then i4_ctb_x will be same as blk_x */
6796
        /* loop over all the units is a row                        */
6797
28.9k
        for(i4_ctb_x = i4_first_ctb_x; i4_ctb_x < (i4_first_ctb_x + num_sync_units_in_tile);
6798
19.2k
            i4_ctb_x++)
6799
19.2k
        {
6800
19.2k
            ihevce_ctb_noise_params *ps_ctb_noise_params =
6801
19.2k
                &ps_ctxt->ps_ctb_analyse_curr_row[i4_ctb_x].s_ctb_noise_params;
6802
6803
19.2k
            s_common_frm_prms.i4_ctb_x_off = i4_ctb_x << 6;
6804
19.2k
            s_common_frm_prms.i4_ctb_y_off = i4_ctb_y << 6;
6805
6806
19.2k
            ps_ctxt->s_mc_ctxt.i4_ctb_frm_pos_y = i4_ctb_y << 6;
6807
19.2k
            ps_ctxt->s_mc_ctxt.i4_ctb_frm_pos_x = i4_ctb_x << 6;
6808
            /* Initialize ptr to current IPE CTB */
6809
19.2k
            ps_cur_ipe_ctb = ps_ctxt->ps_ipe_l0_ctb_frm_base + i4_ctb_x +
6810
19.2k
                             i4_ctb_y * ps_frm_ctb_prms->i4_num_ctbs_horz;
6811
19.2k
            {
6812
19.2k
                ps_ctb_bound_attrs =
6813
19.2k
                    get_ctb_attrs(i4_ctb_x << 6, i4_ctb_y << 6, i4_pic_wd, i4_pic_ht, ps_ctxt);
6814
6815
19.2k
                en_merge_32x32 = ps_ctb_bound_attrs->u1_merge_to_32x32_flag;
6816
19.2k
                num_blks_in_this_ctb = ps_ctb_bound_attrs->u1_num_blks_in_ctb;
6817
19.2k
            }
6818
6819
            /* Block to initialise pointers to part_type_results_t */
6820
            /* in each size-specific inter_cu_results_t  */
6821
19.2k
            {
6822
19.2k
                WORD32 i;
6823
6824
1.24M
                for(i = 0; i < 64; i++)
6825
1.22M
                {
6826
1.22M
                    ps_ctxt->as_cu8x8_results[i].ps_best_results =
6827
1.22M
                        ps_ctxt->ps_me_ctb_data_curr_row[i4_ctb_x]
6828
1.22M
                            .as_8x8_block_data[i]
6829
1.22M
                            .as_best_results;
6830
1.22M
                    ps_ctxt->as_cu8x8_results[i].u1_num_best_results = 0;
6831
1.22M
                }
6832
6833
326k
                for(i = 0; i < 16; i++)
6834
307k
                {
6835
307k
                    ps_ctxt->as_cu16x16_results[i].ps_best_results =
6836
307k
                        ps_ctxt->ps_me_ctb_data_curr_row[i4_ctb_x].as_block_data[i].as_best_results;
6837
307k
                    ps_ctxt->as_cu16x16_results[i].u1_num_best_results = 0;
6838
307k
                }
6839
6840
96.0k
                for(i = 0; i < 4; i++)
6841
76.8k
                {
6842
76.8k
                    ps_ctxt->as_cu32x32_results[i].ps_best_results =
6843
76.8k
                        ps_ctxt->ps_me_ctb_data_curr_row[i4_ctb_x]
6844
76.8k
                            .as_32x32_block_data[i]
6845
76.8k
                            .as_best_results;
6846
76.8k
                    ps_ctxt->as_cu32x32_results[i].u1_num_best_results = 0;
6847
76.8k
                }
6848
6849
19.2k
                ps_ctxt->s_cu64x64_results.ps_best_results =
6850
19.2k
                    ps_ctxt->ps_me_ctb_data_curr_row[i4_ctb_x].s_64x64_block_data.as_best_results;
6851
19.2k
                ps_ctxt->s_cu64x64_results.u1_num_best_results = 0;
6852
19.2k
            }
6853
6854
19.2k
            if(ME_PRISTINE_QUALITY == e_me_quality_presets)
6855
3.61k
            {
6856
3.61k
                ps_ctb_cluster_info->blk_32x32_mask = en_merge_32x32;
6857
3.61k
                ps_ctb_cluster_info->ps_cur_ipe_ctb = ps_cur_ipe_ctb;
6858
3.61k
                ps_ctb_cluster_info->ps_cu_tree_root =
6859
3.61k
                    ps_ctxt->ps_cu_tree_curr_row + (i4_ctb_x * MAX_NUM_NODES_CU_TREE);
6860
3.61k
                ps_ctb_cluster_info->nodes_created_in_cu_tree = 1;
6861
3.61k
            }
6862
6863
19.2k
            if(ME_PRISTINE_QUALITY != e_me_quality_presets)
6864
15.5k
            {
6865
15.5k
                S32 i4_nodes_created_in_cu_tree = 1;
6866
6867
15.5k
                ihevce_cu_tree_init(
6868
15.5k
                    (ps_ctxt->ps_cu_tree_curr_row + (i4_ctb_x * MAX_NUM_NODES_CU_TREE)),
6869
15.5k
                    (ps_ctxt->ps_cu_tree_curr_row + (i4_ctb_x * MAX_NUM_NODES_CU_TREE)),
6870
15.5k
                    &i4_nodes_created_in_cu_tree,
6871
15.5k
                    0,
6872
15.5k
                    POS_NA,
6873
15.5k
                    POS_NA,
6874
15.5k
                    POS_NA);
6875
15.5k
            }
6876
6877
19.2k
            memset(ai4_blk_8x8_mask, 0, 16 * sizeof(S32));
6878
6879
19.2k
            if(ps_refine_prms->u1_use_lambda_derived_from_min_8x8_act_in_ctb)
6880
4.73k
            {
6881
4.73k
                S32 j;
6882
6883
4.73k
                ipe_l0_ctb_analyse_for_me_t *ps_cur_ipe_ctb;
6884
6885
4.73k
                ps_cur_ipe_ctb =
6886
4.73k
                    ps_ctxt->ps_ipe_l0_ctb_frm_base + i4_ctb_x + i4_ctb_y * num_sync_units_in_row;
6887
4.73k
                lambda_recon =
6888
4.73k
                    hme_recompute_lambda_from_min_8x8_act_in_ctb(ps_ctxt, ps_cur_ipe_ctb);
6889
6890
4.73k
                lambda_recon = ((float)lambda_recon * (100.0f - ME_LAMBDA_DISCOUNT) / 100.0f);
6891
6892
23.6k
                for(i = 0; i < 4; i++)
6893
18.9k
                {
6894
18.9k
                    ps_search_results = &ps_ctxt->as_search_results_32x32[i];
6895
6896
56.8k
                    for(j = 0; j < 2; j++)
6897
37.8k
                    {
6898
37.8k
                        ps_search_results->as_pred_ctxt[j].lambda = lambda_recon;
6899
37.8k
                    }
6900
18.9k
                }
6901
4.73k
                ps_search_results = &ps_ctxt->s_search_results_64x64;
6902
6903
14.2k
                for(j = 0; j < 2; j++)
6904
9.47k
                {
6905
9.47k
                    ps_search_results->as_pred_ctxt[j].lambda = lambda_recon;
6906
9.47k
                }
6907
6908
4.73k
                s_common_frm_prms.i4_lamda = lambda_recon;
6909
4.73k
            }
6910
14.4k
            else
6911
14.4k
            {
6912
14.4k
                lambda_recon = ps_refine_prms->lambda_recon;
6913
14.4k
            }
6914
6915
            /*********************************************************************/
6916
            /* replicate the inp buffer at blk or ctb level for each ref id,     */
6917
            /* Instead of searching with wk * ref(k), we search with Ik = I / wk */
6918
            /* thereby avoiding a bloat up of memory. If we did all references   */
6919
            /* weighted pred, we will end up with a duplicate copy of each ref   */
6920
            /* at each layer, since we need to preserve the original reference.  */
6921
            /* ToDo: Need to observe performance with this mechanism and compare */
6922
            /* with case where ref is weighted.                                  */
6923
            /*********************************************************************/
6924
19.2k
            fp_get_wt_inp(
6925
19.2k
                ps_curr_layer,
6926
19.2k
                &ps_ctxt->s_wt_pred,
6927
19.2k
                unit_size,
6928
19.2k
                s_common_frm_prms.i4_ctb_x_off,
6929
19.2k
                s_common_frm_prms.i4_ctb_y_off,
6930
19.2k
                unit_size,
6931
19.2k
                ps_ctxt->num_ref_future + ps_ctxt->num_ref_past,
6932
19.2k
                ps_ctxt->i4_wt_pred_enable_flag);
6933
6934
19.2k
            if(ps_thrd_ctxt->s_init_prms.u1_is_stasino_enabled)
6935
0
            {
6936
0
#if TEMPORAL_NOISE_DETECT
6937
0
                {
6938
0
                    WORD32 had_block_size = 16;
6939
0
                    WORD32 ctb_width = ((i4_pic_wd - s_common_frm_prms.i4_ctb_x_off) >= 64)
6940
0
                                           ? 64
6941
0
                                           : i4_pic_wd - s_common_frm_prms.i4_ctb_x_off;
6942
0
                    WORD32 ctb_height = ((i4_pic_ht - s_common_frm_prms.i4_ctb_y_off) >= 64)
6943
0
                                            ? 64
6944
0
                                            : i4_pic_ht - s_common_frm_prms.i4_ctb_y_off;
6945
0
                    WORD32 num_pred_dir = i4_num_pred_dir;
6946
0
                    WORD32 i4_x_off = s_common_frm_prms.i4_ctb_x_off;
6947
0
                    WORD32 i4_y_off = s_common_frm_prms.i4_ctb_y_off;
6948
6949
0
                    WORD32 i;
6950
0
                    WORD32 noise_detected;
6951
0
                    WORD32 ctb_size;
6952
0
                    WORD32 num_comp_had_blocks;
6953
0
                    WORD32 noisy_block_cnt;
6954
0
                    WORD32 index_8x8_block;
6955
0
                    WORD32 num_8x8_in_ctb_row;
6956
6957
0
                    WORD32 ht_offset;
6958
0
                    WORD32 wd_offset;
6959
0
                    WORD32 block_ht;
6960
0
                    WORD32 block_wd;
6961
6962
0
                    WORD32 num_horz_blocks;
6963
0
                    WORD32 num_vert_blocks;
6964
6965
0
                    WORD32 mean;
6966
0
                    UWORD32 variance_8x8;
6967
6968
0
                    WORD32 hh_energy_percent;
6969
6970
                    /* variables to hold the constant values. The variable values held are decided by the HAD block size */
6971
0
                    WORD32 min_noisy_block_cnt;
6972
0
                    WORD32 min_coeffs_above_avg;
6973
0
                    WORD32 min_coeff_avg_energy;
6974
6975
                    /* to store the mean and variance of each 8*8 block and find the variance of any higher block sizes later on. block */
6976
0
                    WORD32 i4_cu_x_off, i4_cu_y_off;
6977
0
                    WORD32 is_noisy;
6978
6979
                    /* intialise the variables holding the constants */
6980
0
                    if(had_block_size == 8)
6981
0
                    {
6982
0
                        min_noisy_block_cnt = MIN_NOISY_BLOCKS_CNT_8x8;  //6;//
6983
0
                        min_coeffs_above_avg = MIN_NUM_COEFFS_ABOVE_AVG_8x8;
6984
0
                        min_coeff_avg_energy = MIN_COEFF_AVG_ENERGY_8x8;
6985
0
                    }
6986
0
                    else
6987
0
                    {
6988
0
                        min_noisy_block_cnt = MIN_NOISY_BLOCKS_CNT_16x16;  //7;//
6989
0
                        min_coeffs_above_avg = MIN_NUM_COEFFS_ABOVE_AVG_16x16;
6990
0
                        min_coeff_avg_energy = MIN_COEFF_AVG_ENERGY_16x16;
6991
0
                    }
6992
6993
                    /* initialize the variables */
6994
0
                    noise_detected = 0;
6995
0
                    noisy_block_cnt = 0;
6996
0
                    hh_energy_percent = 0;
6997
0
                    variance_8x8 = 0;
6998
0
                    block_ht = ctb_height;
6999
0
                    block_wd = ctb_width;
7000
7001
0
                    mean = 0;
7002
7003
0
                    ctb_size = block_ht * block_wd;  //ctb_width * ctb_height;
7004
0
                    num_comp_had_blocks = ctb_size / (had_block_size * had_block_size);
7005
7006
0
                    num_horz_blocks = block_wd / had_block_size;  //ctb_width / had_block_size;
7007
0
                    num_vert_blocks = block_ht / had_block_size;  //ctb_height / had_block_size;
7008
7009
0
                    ht_offset = -had_block_size;
7010
0
                    wd_offset = -had_block_size;
7011
7012
0
                    num_8x8_in_ctb_row = block_wd / 8;  // number of 8x8 in this ctb
7013
0
                    for(i = 0; i < num_comp_had_blocks; i++)
7014
0
                    {
7015
0
                        if(i % num_horz_blocks == 0)
7016
0
                        {
7017
0
                            wd_offset = -had_block_size;
7018
0
                            ht_offset += had_block_size;
7019
0
                        }
7020
0
                        wd_offset += had_block_size;
7021
7022
                        /* CU level offsets */
7023
0
                        i4_cu_x_off = i4_x_off + (i % 4) * 16;  //+ (i % 4) * 16
7024
0
                        i4_cu_y_off = i4_y_off + (i / 4) * 16;
7025
7026
                        /* if 50 % or more of the CU is noisy then the return value is 1 */
7027
0
                        is_noisy = ihevce_determine_cu_noise_based_on_8x8Blk_data(
7028
0
                            ps_ctb_noise_params->au1_is_8x8Blk_noisy,
7029
0
                            (i % 4) * 16,
7030
0
                            (i / 4) * 16,
7031
0
                            16);
7032
7033
                        /* only if the CU is noisy then check the temporal noise detect call is made on the CU */
7034
0
                        if(is_noisy)
7035
0
                        {
7036
0
                            index_8x8_block = (i / num_horz_blocks) * 2 * num_8x8_in_ctb_row +
7037
0
                                              (i % num_horz_blocks) * 2;
7038
0
                            noisy_block_cnt += ihevce_16x16block_temporal_noise_detect(
7039
0
                                16,
7040
0
                                ((i4_pic_wd - s_common_frm_prms.i4_ctb_x_off) >= 64)
7041
0
                                    ? 64
7042
0
                                    : i4_pic_wd - s_common_frm_prms.i4_ctb_x_off,
7043
0
                                ((i4_pic_ht - s_common_frm_prms.i4_ctb_y_off) >= 64)
7044
0
                                    ? 64
7045
0
                                    : i4_pic_ht - s_common_frm_prms.i4_ctb_y_off,
7046
0
                                ps_ctb_noise_params,
7047
0
                                &s_srch_cand_init_data,
7048
0
                                &s_search_prms_blk,
7049
0
                                ps_ctxt,
7050
0
                                num_pred_dir,
7051
0
                                i4_num_act_ref_l0,
7052
0
                                i4_num_act_ref_l1,
7053
0
                                i4_cu_x_off,
7054
0
                                i4_cu_y_off,
7055
0
                                &ps_ctxt->s_wt_pred,
7056
0
                                unit_size,
7057
0
                                index_8x8_block,
7058
0
                                num_horz_blocks,
7059
0
                                /*num_8x8_in_ctb_row*/ 8,  // this should be a variable extra
7060
0
                                i);
7061
0
                        } /* if 16x16 is noisy */
7062
0
                    } /* loop over for all 16x16*/
7063
7064
0
                    if(noisy_block_cnt >= min_noisy_block_cnt)
7065
0
                    {
7066
0
                        noise_detected = 1;
7067
0
                    }
7068
7069
                    /* write back the noise presence detected for the current CTB to the structure */
7070
0
                    ps_ctb_noise_params->i4_noise_present = noise_detected;
7071
0
                }
7072
0
#endif
7073
7074
#if EVERYWHERE_NOISY && USE_NOISE_TERM_IN_L0_ME
7075
                if(ps_thrd_ctxt->s_init_prms.u1_is_stasino_enabled &&
7076
                   ps_ctb_noise_params->i4_noise_present)
7077
                {
7078
                    memset(
7079
                        ps_ctb_noise_params->au1_is_8x8Blk_noisy,
7080
                        1,
7081
                        sizeof(ps_ctb_noise_params->au1_is_8x8Blk_noisy));
7082
                }
7083
#endif
7084
7085
0
                for(i = 0; i < 16; i++)
7086
0
                {
7087
0
                    au1_is_16x16Blk_noisy[i] = ihevce_determine_cu_noise_based_on_8x8Blk_data(
7088
0
                        ps_ctb_noise_params->au1_is_8x8Blk_noisy, (i % 4) * 16, (i / 4) * 16, 16);
7089
0
                }
7090
7091
0
                for(i = 0; i < 4; i++)
7092
0
                {
7093
0
                    au1_is_32x32Blk_noisy[i] = ihevce_determine_cu_noise_based_on_8x8Blk_data(
7094
0
                        ps_ctb_noise_params->au1_is_8x8Blk_noisy, (i % 2) * 32, (i / 2) * 32, 32);
7095
0
                }
7096
7097
0
                for(i = 0; i < 1; i++)
7098
0
                {
7099
0
                    au1_is_64x64Blk_noisy[i] = ihevce_determine_cu_noise_based_on_8x8Blk_data(
7100
0
                        ps_ctb_noise_params->au1_is_8x8Blk_noisy, 0, 0, 64);
7101
0
                }
7102
7103
0
                if(ps_ctxt->s_frm_prms.bidir_enabled &&
7104
0
                   (ps_ctxt->s_frm_prms.i4_temporal_layer_id <=
7105
0
                    MAX_LAYER_ID_OF_B_PICS_WITHOUT_NOISE_DETECTION))
7106
0
                {
7107
0
                    ps_ctb_noise_params->i4_noise_present = 0;
7108
0
                    memset(
7109
0
                        ps_ctb_noise_params->au1_is_8x8Blk_noisy,
7110
0
                        0,
7111
0
                        sizeof(ps_ctb_noise_params->au1_is_8x8Blk_noisy));
7112
0
                }
7113
7114
0
#if ME_LAMBDA_DISCOUNT_WHEN_NOISY
7115
0
                for(i = 0; i < 4; i++)
7116
0
                {
7117
0
                    S32 j;
7118
0
                    S32 lambda;
7119
7120
0
                    if(au1_is_32x32Blk_noisy[i])
7121
0
                    {
7122
0
                        lambda = lambda_recon;
7123
0
                        lambda =
7124
0
                            ((float)lambda * (100.0f - ME_LAMBDA_DISCOUNT_WHEN_NOISY) / 100.0f);
7125
7126
0
                        ps_search_results = &ps_ctxt->as_search_results_32x32[i];
7127
7128
0
                        for(j = 0; j < 2; j++)
7129
0
                        {
7130
0
                            ps_search_results->as_pred_ctxt[j].lambda = lambda;
7131
0
                        }
7132
0
                    }
7133
0
                }
7134
7135
0
                {
7136
0
                    S32 j;
7137
0
                    S32 lambda;
7138
7139
0
                    if(au1_is_64x64Blk_noisy[0])
7140
0
                    {
7141
0
                        lambda = lambda_recon;
7142
0
                        lambda =
7143
0
                            ((float)lambda * (100.0f - ME_LAMBDA_DISCOUNT_WHEN_NOISY) / 100.0f);
7144
7145
0
                        ps_search_results = &ps_ctxt->s_search_results_64x64;
7146
7147
0
                        for(j = 0; j < 2; j++)
7148
0
                        {
7149
0
                            ps_search_results->as_pred_ctxt[j].lambda = lambda;
7150
0
                        }
7151
0
                    }
7152
0
                }
7153
0
#endif
7154
0
                if(au1_is_64x64Blk_noisy[0])
7155
0
                {
7156
0
                    U08 *pu1_inp = ps_curr_layer->pu1_inp + (s_common_frm_prms.i4_ctb_x_off +
7157
0
                                                             (s_common_frm_prms.i4_ctb_y_off *
7158
0
                                                              ps_curr_layer->i4_inp_stride));
7159
7160
0
                    hme_compute_sigmaX_and_sigmaXSquared(
7161
0
                        pu1_inp,
7162
0
                        ps_curr_layer->i4_inp_stride,
7163
0
                        ps_ctxt->au4_4x4_src_sigmaX,
7164
0
                        ps_ctxt->au4_4x4_src_sigmaXSquared,
7165
0
                        4,
7166
0
                        4,
7167
0
                        64,
7168
0
                        64,
7169
0
                        1,
7170
0
                        16);
7171
0
                }
7172
0
                else
7173
0
                {
7174
0
                    for(i = 0; i < 4; i++)
7175
0
                    {
7176
0
                        if(au1_is_32x32Blk_noisy[i])
7177
0
                        {
7178
0
                            U08 *pu1_inp =
7179
0
                                ps_curr_layer->pu1_inp +
7180
0
                                (s_common_frm_prms.i4_ctb_x_off +
7181
0
                                 (s_common_frm_prms.i4_ctb_y_off * ps_curr_layer->i4_inp_stride));
7182
7183
0
                            U08 u1_cu_size = 32;
7184
0
                            WORD32 i4_inp_buf_offset =
7185
0
                                (((i / 2) * (u1_cu_size * ps_curr_layer->i4_inp_stride)) +
7186
0
                                 ((i % 2) * u1_cu_size));
7187
7188
0
                            U16 u2_sigma_arr_start_index_of_3rd_32x32_blk_in_ctb = 128;
7189
0
                            U16 u2_sigma_arr_start_index_of_2nd_32x32_blk_in_ctb = 8;
7190
0
                            S32 i4_sigma_arr_offset =
7191
0
                                (((i / 2) * u2_sigma_arr_start_index_of_3rd_32x32_blk_in_ctb) +
7192
0
                                 ((i % 2) * u2_sigma_arr_start_index_of_2nd_32x32_blk_in_ctb));
7193
7194
0
                            hme_compute_sigmaX_and_sigmaXSquared(
7195
0
                                pu1_inp + i4_inp_buf_offset,
7196
0
                                ps_curr_layer->i4_inp_stride,
7197
0
                                ps_ctxt->au4_4x4_src_sigmaX + i4_sigma_arr_offset,
7198
0
                                ps_ctxt->au4_4x4_src_sigmaXSquared + i4_sigma_arr_offset,
7199
0
                                4,
7200
0
                                4,
7201
0
                                32,
7202
0
                                32,
7203
0
                                1,
7204
0
                                16);
7205
0
                        }
7206
0
                        else
7207
0
                        {
7208
0
                            S32 j;
7209
7210
0
                            U08 u1_16x16_blk_start_index_in_3rd_32x32_blk_of_ctb = 8;
7211
0
                            U08 u1_16x16_blk_start_index_in_2nd_32x32_blk_of_ctb = 2;
7212
0
                            S32 i4_16x16_blk_start_index_in_i_th_32x32_blk =
7213
0
                                (((i / 2) * u1_16x16_blk_start_index_in_3rd_32x32_blk_of_ctb) +
7214
0
                                 ((i % 2) * u1_16x16_blk_start_index_in_2nd_32x32_blk_of_ctb));
7215
7216
0
                            for(j = 0; j < 4; j++)
7217
0
                            {
7218
0
                                U08 u1_3rd_16x16_blk_index_in_32x32_blk = 4;
7219
0
                                U08 u1_2nd_16x16_blk_index_in_32x32_blk = 1;
7220
0
                                S32 i4_16x16_blk_index_in_ctb =
7221
0
                                    i4_16x16_blk_start_index_in_i_th_32x32_blk +
7222
0
                                    ((j % 2) * u1_2nd_16x16_blk_index_in_32x32_blk) +
7223
0
                                    ((j / 2) * u1_3rd_16x16_blk_index_in_32x32_blk);
7224
7225
                                //S32 k = (((i / 2) * 8) + ((i % 2) * 2)) + ((j % 2) * 1) + ((j / 2) * 4);
7226
7227
0
                                if(au1_is_16x16Blk_noisy[i4_16x16_blk_index_in_ctb])
7228
0
                                {
7229
0
                                    U08 *pu1_inp =
7230
0
                                        ps_curr_layer->pu1_inp + (s_common_frm_prms.i4_ctb_x_off +
7231
0
                                                                  (s_common_frm_prms.i4_ctb_y_off *
7232
0
                                                                   ps_curr_layer->i4_inp_stride));
7233
7234
0
                                    U08 u1_cu_size = 16;
7235
0
                                    WORD32 i4_inp_buf_offset =
7236
0
                                        (((i4_16x16_blk_index_in_ctb % 4) * u1_cu_size) +
7237
0
                                         ((i4_16x16_blk_index_in_ctb / 4) *
7238
0
                                          (u1_cu_size * ps_curr_layer->i4_inp_stride)));
7239
7240
0
                                    U16 u2_sigma_arr_start_index_of_3rd_16x16_blk_in_32x32_blk = 64;
7241
0
                                    U16 u2_sigma_arr_start_index_of_2nd_16x16_blk_in_32x32_blk = 4;
7242
0
                                    S32 i4_sigma_arr_offset =
7243
0
                                        (((i4_16x16_blk_index_in_ctb % 4) *
7244
0
                                          u2_sigma_arr_start_index_of_2nd_16x16_blk_in_32x32_blk) +
7245
0
                                         ((i4_16x16_blk_index_in_ctb / 4) *
7246
0
                                          u2_sigma_arr_start_index_of_3rd_16x16_blk_in_32x32_blk));
7247
7248
0
                                    hme_compute_sigmaX_and_sigmaXSquared(
7249
0
                                        pu1_inp + i4_inp_buf_offset,
7250
0
                                        ps_curr_layer->i4_inp_stride,
7251
0
                                        (ps_ctxt->au4_4x4_src_sigmaX + i4_sigma_arr_offset),
7252
0
                                        (ps_ctxt->au4_4x4_src_sigmaXSquared + i4_sigma_arr_offset),
7253
0
                                        4,
7254
0
                                        4,
7255
0
                                        16,
7256
0
                                        16,
7257
0
                                        1,
7258
0
                                        16);
7259
0
                                }
7260
0
                            }
7261
0
                        }
7262
0
                    }
7263
0
                }
7264
0
            }
7265
19.2k
            else
7266
19.2k
            {
7267
19.2k
                memset(au1_is_16x16Blk_noisy, 0, sizeof(au1_is_16x16Blk_noisy));
7268
7269
19.2k
                memset(au1_is_32x32Blk_noisy, 0, sizeof(au1_is_32x32Blk_noisy));
7270
7271
19.2k
                memset(au1_is_64x64Blk_noisy, 0, sizeof(au1_is_64x64Blk_noisy));
7272
19.2k
            }
7273
7274
293k
            for(blk_id_in_ctb = 0; blk_id_in_ctb < num_blks_in_this_ctb; blk_id_in_ctb++)
7275
274k
            {
7276
274k
                S32 ref_ctr;
7277
274k
                U08 au1_pred_dir_searched[2];
7278
274k
                U08 u1_is_cu_noisy;
7279
274k
                ULWORD64 au8_final_src_sigmaX[17], au8_final_src_sigmaXSquared[17];
7280
7281
274k
                {
7282
274k
                    blk_x = (i4_ctb_x << 2) +
7283
274k
                            (ps_ctb_bound_attrs->as_blk_attrs[blk_id_in_ctb].u1_blk_x);
7284
274k
                    blk_y = (i4_ctb_y << 2) +
7285
274k
                            (ps_ctb_bound_attrs->as_blk_attrs[blk_id_in_ctb].u1_blk_y);
7286
7287
274k
                    blk_id_in_full_ctb =
7288
274k
                        ps_ctb_bound_attrs->as_blk_attrs[blk_id_in_ctb].u1_blk_id_in_full_ctb;
7289
274k
                    blk_8x8_mask = ps_ctb_bound_attrs->as_blk_attrs[blk_id_in_ctb].u1_blk_8x8_mask;
7290
274k
                    ai4_blk_8x8_mask[blk_id_in_full_ctb] = blk_8x8_mask;
7291
274k
                    s_search_prms_blk.i4_cu_x_off = (blk_x << blk_size_shift) - (i4_ctb_x << 6);
7292
274k
                    s_search_prms_blk.i4_cu_y_off = (blk_y << blk_size_shift) - (i4_ctb_y << 6);
7293
274k
                }
7294
7295
                /* get the current input blk point */
7296
274k
                pos_x = blk_x << blk_size_shift;
7297
274k
                pos_y = blk_y << blk_size_shift;
7298
274k
                pu1_inp = ps_curr_layer->pu1_inp + pos_x + (pos_y * i4_inp_stride);
7299
7300
                /*********************************************************************/
7301
                /* For every blk in the picture, the search range needs to be derived*/
7302
                /* Any blk can have any mv, but practical search constraints are     */
7303
                /* imposed by the picture boundary and amt of padding.               */
7304
                /*********************************************************************/
7305
                /* MV limit is different based on ref. PIC */
7306
772k
                for(ref_ctr = 0; ref_ctr < num_act_ref_pics; ref_ctr++)
7307
498k
                {
7308
498k
                    if(!s_search_prms_blk.i4_use_rec)
7309
0
                    {
7310
0
                        hme_derive_search_range(
7311
0
                            &as_range_prms_inp[ref_ctr],
7312
0
                            &s_pic_limit_inp,
7313
0
                            &as_mv_limit[ref_ctr],
7314
0
                            pos_x,
7315
0
                            pos_y,
7316
0
                            blk_wd,
7317
0
                            blk_ht);
7318
0
                    }
7319
498k
                    else
7320
498k
                    {
7321
498k
                        hme_derive_search_range(
7322
498k
                            &as_range_prms_rec[ref_ctr],
7323
498k
                            &s_pic_limit_rec,
7324
498k
                            &as_mv_limit[ref_ctr],
7325
498k
                            pos_x,
7326
498k
                            pos_y,
7327
498k
                            blk_wd,
7328
498k
                            blk_ht);
7329
498k
                    }
7330
498k
                }
7331
274k
                s_search_prms_blk.i4_x_off = blk_x << blk_size_shift;
7332
274k
                s_search_prms_blk.i4_y_off = blk_y << blk_size_shift;
7333
                /* Select search results from a suitable search result in the context */
7334
274k
                {
7335
274k
                    ps_search_results = &ps_ctxt->as_search_results_16x16[blk_id_in_full_ctb];
7336
7337
274k
                    if(ps_refine_prms->u1_use_lambda_derived_from_min_8x8_act_in_ctb)
7338
66.2k
                    {
7339
66.2k
                        S32 i;
7340
7341
198k
                        for(i = 0; i < 2; i++)
7342
132k
                        {
7343
132k
                            ps_search_results->as_pred_ctxt[i].lambda = lambda_recon;
7344
132k
                        }
7345
66.2k
                    }
7346
274k
                }
7347
7348
274k
                u1_is_cu_noisy = au1_is_16x16Blk_noisy
7349
274k
                    [(s_search_prms_blk.i4_cu_x_off >> 4) + (s_search_prms_blk.i4_cu_y_off >> 2)];
7350
7351
274k
                s_subpel_prms.u1_is_cu_noisy = u1_is_cu_noisy;
7352
7353
274k
#if ME_LAMBDA_DISCOUNT_WHEN_NOISY
7354
274k
                if(u1_is_cu_noisy)
7355
0
                {
7356
0
                    S32 j;
7357
0
                    S32 lambda;
7358
7359
0
                    lambda = lambda_recon;
7360
0
                    lambda = ((float)lambda * (100.0f - ME_LAMBDA_DISCOUNT_WHEN_NOISY) / 100.0f);
7361
7362
0
                    for(j = 0; j < 2; j++)
7363
0
                    {
7364
0
                        ps_search_results->as_pred_ctxt[j].lambda = lambda;
7365
0
                    }
7366
0
                }
7367
274k
                else
7368
274k
                {
7369
274k
                    S32 j;
7370
274k
                    S32 lambda;
7371
7372
274k
                    lambda = lambda_recon;
7373
7374
822k
                    for(j = 0; j < 2; j++)
7375
548k
                    {
7376
548k
                        ps_search_results->as_pred_ctxt[j].lambda = lambda;
7377
548k
                    }
7378
274k
                }
7379
274k
#endif
7380
7381
274k
                s_search_prms_blk.ps_search_results = ps_search_results;
7382
7383
274k
                s_search_prms_blk.i4_part_mask = hme_part_mask_populator(
7384
274k
                    pu1_inp,
7385
274k
                    i4_inp_stride,
7386
274k
                    ps_refine_prms->limit_active_partitions,
7387
274k
                    ps_ctxt->ps_hme_frm_prms->bidir_enabled,
7388
274k
                    ps_ctxt->u1_is_curFrame_a_refFrame,
7389
274k
                    blk_8x8_mask,
7390
274k
                    e_me_quality_presets);
7391
7392
274k
                if(ME_PRISTINE_QUALITY == e_me_quality_presets)
7393
49.4k
                {
7394
49.4k
                    ps_ctb_cluster_info->ai4_part_mask[blk_id_in_full_ctb] =
7395
49.4k
                        s_search_prms_blk.i4_part_mask;
7396
49.4k
                }
7397
7398
                /* RESET ALL SEARCH RESULTS FOR THE NEW BLK */
7399
274k
                {
7400
                    /* Setting u1_num_active_refs to 2 */
7401
                    /* for the sole purpose of the */
7402
                    /* function called below */
7403
274k
                    ps_search_results->u1_num_active_ref = (ps_refine_prms->bidir_enabled) ? 2 : 1;
7404
7405
274k
                    hme_reset_search_results(
7406
274k
                        ps_search_results, s_search_prms_blk.i4_part_mask, MV_RES_FPEL);
7407
7408
274k
                    ps_search_results->u1_num_active_ref = i4_num_pred_dir;
7409
274k
                }
7410
7411
274k
                if(0 == blk_id_in_ctb)
7412
19.2k
                {
7413
19.2k
                    UWORD8 u1_ctr;
7414
54.3k
                    for(u1_ctr = 0; u1_ctr < (ps_ctxt->s_frm_prms.u1_num_active_ref_l0 +
7415
54.3k
                                              ps_ctxt->s_frm_prms.u1_num_active_ref_l1);
7416
35.1k
                        u1_ctr++)
7417
35.1k
                    {
7418
35.1k
                        WORD32 i4_max_dep_ctb_y;
7419
35.1k
                        WORD32 i4_max_dep_ctb_x;
7420
7421
                        /* Set max mv in ctb units */
7422
35.1k
                        i4_max_mv_x_in_ctb =
7423
35.1k
                            (ps_curr_layer->i2_max_mv_x + ((1 << ps_ctxt->log_ctb_size) - 1)) >>
7424
35.1k
                            ps_ctxt->log_ctb_size;
7425
7426
35.1k
                        i4_max_mv_y_in_ctb =
7427
35.1k
                            (as_mv_limit[u1_ctr].i2_max_y + ((1 << ps_ctxt->log_ctb_size) - 1)) >>
7428
35.1k
                            ps_ctxt->log_ctb_size;
7429
                        /********************************************************************/
7430
                        /* Set max ctb_x and ctb_y dependency on reference picture          */
7431
                        /* Note +1 is due to delayed deblock, SAO, subpel plan dependency   */
7432
                        /********************************************************************/
7433
35.1k
                        i4_max_dep_ctb_x = CLIP3(
7434
35.1k
                            (i4_ctb_x + i4_max_mv_x_in_ctb + 1),
7435
35.1k
                            0,
7436
35.1k
                            ps_frm_ctb_prms->i4_num_ctbs_horz - 1);
7437
35.1k
                        i4_max_dep_ctb_y = CLIP3(
7438
35.1k
                            (i4_ctb_y + i4_max_mv_y_in_ctb + 1),
7439
35.1k
                            0,
7440
35.1k
                            ps_frm_ctb_prms->i4_num_ctbs_vert - 1);
7441
7442
35.1k
                        ihevce_dmgr_map_chk_sync(
7443
35.1k
                            ps_curr_layer->ppv_dep_mngr_recon[u1_ctr],
7444
35.1k
                            ps_ctxt->thrd_id,
7445
35.1k
                            i4_ctb_x,
7446
35.1k
                            i4_ctb_y,
7447
35.1k
                            i4_max_mv_x_in_ctb,
7448
35.1k
                            i4_max_mv_y_in_ctb);
7449
35.1k
                    }
7450
19.2k
                }
7451
7452
                /* Loop across different Ref IDx */
7453
598k
                for(u1_pred_dir_ctr = 0; u1_pred_dir_ctr < i4_num_pred_dir; u1_pred_dir_ctr++)
7454
324k
                {
7455
324k
                    S32 resultid;
7456
324k
                    S08 u1_default_ref_id;
7457
324k
                    S32 i4_num_srch_cands = 0;
7458
324k
                    S32 i4_num_refinement_iterations;
7459
324k
                    S32 i4_refine_iter_ctr;
7460
7461
324k
                    if((i4_num_pred_dir == 2) || (!ps_ctxt->s_frm_prms.bidir_enabled) ||
7462
512
                       (ps_ctxt->s_frm_prms.u1_num_active_ref_l1 == 0))
7463
324k
                    {
7464
324k
                        u1_pred_dir = u1_pred_dir_ctr;
7465
324k
                    }
7466
512
                    else if(ps_ctxt->s_frm_prms.u1_num_active_ref_l0 == 0)
7467
512
                    {
7468
512
                        u1_pred_dir = 1;
7469
512
                    }
7470
7471
324k
                    u1_default_ref_id = (u1_pred_dir == 0) ? ps_ctxt->ai1_past_list[0]
7472
324k
                                                           : ps_ctxt->ai1_future_list[0];
7473
324k
                    au1_pred_dir_searched[u1_pred_dir_ctr] = u1_pred_dir;
7474
7475
324k
                    i4_num_srch_cands = 0;
7476
324k
                    resultid = 0;
7477
7478
                    /* START OF NEW CTB MEANS FILL UP NEOGHBOURS IN 18x18 GRID */
7479
324k
                    if(0 == blk_id_in_ctb)
7480
22.5k
                    {
7481
                        /*****************************************************************/
7482
                        /* Initialize the mv grid with results of neighbours for the next*/
7483
                        /* ctb.                                                          */
7484
                        /*****************************************************************/
7485
22.5k
                        hme_fill_ctb_neighbour_mvs(
7486
22.5k
                            ps_curr_layer,
7487
22.5k
                            blk_x,
7488
22.5k
                            blk_y,
7489
22.5k
                            aps_mv_grid[u1_pred_dir],
7490
22.5k
                            u1_pred_dir_ctr,
7491
22.5k
                            u1_default_ref_id,
7492
22.5k
                            ps_ctxt->s_frm_prms.u1_num_active_ref_l0);
7493
22.5k
                    }
7494
7495
324k
                    s_search_prms_blk.i1_ref_idx = u1_pred_dir;
7496
7497
324k
                    {
7498
324k
                        if((blk_id_in_full_ctb % 4) == 0)
7499
83.7k
                        {
7500
83.7k
                            ps_ctxt->as_search_results_32x32[blk_id_in_full_ctb >> 2]
7501
83.7k
                                .as_pred_ctxt[u1_pred_dir]
7502
83.7k
                                .proj_used = (blk_id_in_full_ctb == 8) ? 0 : 1;
7503
83.7k
                        }
7504
7505
324k
                        if(blk_id_in_full_ctb == 0)
7506
22.5k
                        {
7507
22.5k
                            ps_ctxt->s_search_results_64x64.as_pred_ctxt[u1_pred_dir].proj_used = 1;
7508
22.5k
                        }
7509
7510
324k
                        ps_search_results->as_pred_ctxt[u1_pred_dir].proj_used =
7511
324k
                            !gau1_encode_to_raster_y[blk_id_in_full_ctb];
7512
324k
                    }
7513
7514
324k
                    {
7515
324k
                        S32 x = gau1_encode_to_raster_x[blk_id_in_full_ctb];
7516
324k
                        S32 y = gau1_encode_to_raster_y[blk_id_in_full_ctb];
7517
324k
                        U08 u1_is_blk_at_ctb_boundary = !y;
7518
7519
324k
                        s_srch_cand_init_data.u1_is_left_available =
7520
324k
                            !(left_ctb_in_diff_tile && !s_search_prms_blk.i4_cu_x_off);
7521
7522
324k
                        if(u1_is_blk_at_ctb_boundary)
7523
85.3k
                        {
7524
85.3k
                            s_srch_cand_init_data.u1_is_topRight_available = 0;
7525
85.3k
                            s_srch_cand_init_data.u1_is_topLeft_available = 0;
7526
85.3k
                            s_srch_cand_init_data.u1_is_top_available = 0;
7527
85.3k
                        }
7528
239k
                        else
7529
239k
                        {
7530
239k
                            s_srch_cand_init_data.u1_is_topRight_available =
7531
239k
                                gau1_cu_tr_valid[y][x] && ((pos_x + blk_wd) < i4_pic_wd);
7532
239k
                            s_srch_cand_init_data.u1_is_top_available = 1;
7533
239k
                            s_srch_cand_init_data.u1_is_topLeft_available =
7534
239k
                                s_srch_cand_init_data.u1_is_left_available;
7535
239k
                        }
7536
324k
                    }
7537
7538
324k
                    s_srch_cand_init_data.i1_default_ref_id = u1_default_ref_id;
7539
324k
                    s_srch_cand_init_data.i1_alt_default_ref_id = ps_ctxt->ai1_past_list[1];
7540
324k
                    s_srch_cand_init_data.i4_pos_x = pos_x;
7541
324k
                    s_srch_cand_init_data.i4_pos_y = pos_y;
7542
324k
                    s_srch_cand_init_data.u1_pred_dir = u1_pred_dir;
7543
324k
                    s_srch_cand_init_data.u1_pred_dir_ctr = u1_pred_dir_ctr;
7544
324k
                    s_srch_cand_init_data.u1_search_candidate_list_index =
7545
324k
                        au1_search_candidate_list_index[u1_pred_dir];
7546
7547
324k
                    i4_num_srch_cands = hme_populate_search_candidates(&s_srch_cand_init_data);
7548
7549
                    /* Note this block also clips the MV range for all candidates */
7550
324k
                    {
7551
324k
                        S08 i1_check_for_mult_refs;
7552
7553
324k
                        i1_check_for_mult_refs = u1_pred_dir ? (ps_ctxt->num_ref_future > 1)
7554
324k
                                                             : (ps_ctxt->num_ref_past > 1);
7555
7556
324k
                        ps_me_optimised_function_list->pf_mv_clipper(
7557
324k
                            &s_search_prms_blk,
7558
324k
                            i4_num_srch_cands,
7559
324k
                            i1_check_for_mult_refs,
7560
324k
                            ps_refine_prms->i4_num_steps_fpel_refine,
7561
324k
                            ps_refine_prms->i4_num_steps_hpel_refine,
7562
324k
                            ps_refine_prms->i4_num_steps_qpel_refine);
7563
324k
                    }
7564
7565
324k
#if ENABLE_EXPLICIT_SEARCH_IN_P_IN_L0
7566
324k
                    i4_num_refinement_iterations =
7567
324k
                        ((!ps_ctxt->s_frm_prms.bidir_enabled) && (i4_num_act_ref_l0 > 1))
7568
324k
                            ? ((e_me_quality_presets == ME_HIGH_QUALITY) ? 2 : i4_num_act_ref_l0)
7569
324k
                            : 1;
7570
#else
7571
                    i4_num_refinement_iterations =
7572
                        ((!ps_ctxt->s_frm_prms.bidir_enabled) && (i4_num_act_ref_l0 > 1)) ? 2 : 1;
7573
#endif
7574
7575
#if ENABLE_EXPLICIT_SEARCH_IN_PQ
7576
                    if(e_me_quality_presets == ME_PRISTINE_QUALITY)
7577
                    {
7578
                        i4_num_refinement_iterations = (u1_pred_dir == 0) ? i4_num_act_ref_l0
7579
                                                                          : i4_num_act_ref_l1;
7580
                    }
7581
#endif
7582
7583
791k
                    for(i4_refine_iter_ctr = 0; i4_refine_iter_ctr < i4_num_refinement_iterations;
7584
466k
                        i4_refine_iter_ctr++)
7585
466k
                    {
7586
466k
                        S32 center_x;
7587
466k
                        S32 center_y;
7588
466k
                        S32 center_ref_idx;
7589
7590
466k
                        S08 *pi1_pred_dir_to_ref_idx =
7591
466k
                            (u1_pred_dir == 0) ? ps_ctxt->ai1_past_list : ps_ctxt->ai1_future_list;
7592
7593
466k
                        {
7594
466k
                            WORD32 i4_i;
7595
7596
8.39M
                            for(i4_i = 0; i4_i < TOT_NUM_PARTS; i4_i++)
7597
7.92M
                            {
7598
7.92M
                                ps_fullpel_refine_ctxt->i2_tot_cost[0][i4_i] = MAX_SIGNED_16BIT_VAL;
7599
7.92M
                                ps_fullpel_refine_ctxt->i2_mv_cost[0][i4_i] = MAX_SIGNED_16BIT_VAL;
7600
7.92M
                                ps_fullpel_refine_ctxt->i2_stim_injected_cost[0][i4_i] =
7601
7.92M
                                    MAX_SIGNED_16BIT_VAL;
7602
7.92M
                                ps_fullpel_refine_ctxt->i2_mv_x[0][i4_i] = 0;
7603
7.92M
                                ps_fullpel_refine_ctxt->i2_mv_y[0][i4_i] = 0;
7604
7.92M
                                ps_fullpel_refine_ctxt->i2_ref_idx[0][i4_i] = u1_default_ref_id;
7605
7606
7.92M
                                if(ps_refine_prms->i4_num_results_per_part == 2)
7607
0
                                {
7608
0
                                    ps_fullpel_refine_ctxt->i2_tot_cost[1][i4_i] =
7609
0
                                        MAX_SIGNED_16BIT_VAL;
7610
0
                                    ps_fullpel_refine_ctxt->i2_mv_cost[1][i4_i] =
7611
0
                                        MAX_SIGNED_16BIT_VAL;
7612
0
                                    ps_fullpel_refine_ctxt->i2_stim_injected_cost[1][i4_i] =
7613
0
                                        MAX_SIGNED_16BIT_VAL;
7614
0
                                    ps_fullpel_refine_ctxt->i2_mv_x[1][i4_i] = 0;
7615
0
                                    ps_fullpel_refine_ctxt->i2_mv_y[1][i4_i] = 0;
7616
0
                                    ps_fullpel_refine_ctxt->i2_ref_idx[1][i4_i] = u1_default_ref_id;
7617
0
                                }
7618
7.92M
                            }
7619
7620
466k
                            s_search_prms_blk.ps_fullpel_refine_ctxt = ps_fullpel_refine_ctxt;
7621
466k
                            s_subpel_prms.ps_subpel_refine_ctxt = ps_fullpel_refine_ctxt;
7622
466k
                        }
7623
7624
466k
                        {
7625
466k
                            search_node_t *ps_coloc_node;
7626
7627
466k
                            S32 i = 0;
7628
7629
466k
                            if(i4_num_refinement_iterations > 1)
7630
266k
                            {
7631
854k
                                for(i = 0; i < ai4_num_coloc_cands[u1_pred_dir]; i++)
7632
759k
                                {
7633
759k
                                    ps_coloc_node =
7634
759k
                                        s_search_prms_blk.ps_search_candts[ai4_id_coloc[i]]
7635
759k
                                            .ps_search_node;
7636
7637
759k
                                    if(pi1_pred_dir_to_ref_idx[i4_refine_iter_ctr] ==
7638
759k
                                       ps_coloc_node->i1_ref_idx)
7639
171k
                                    {
7640
171k
                                        break;
7641
171k
                                    }
7642
759k
                                }
7643
7644
266k
                                if(i == ai4_num_coloc_cands[u1_pred_dir])
7645
95.4k
                                {
7646
95.4k
                                    i = 0;
7647
95.4k
                                }
7648
266k
                            }
7649
199k
                            else
7650
199k
                            {
7651
199k
                                ps_coloc_node = s_search_prms_blk.ps_search_candts[ai4_id_coloc[0]]
7652
199k
                                                    .ps_search_node;
7653
199k
                            }
7654
7655
466k
                            hme_set_mvp_node(
7656
466k
                                ps_search_results,
7657
466k
                                ps_coloc_node,
7658
466k
                                u1_pred_dir,
7659
466k
                                (i4_num_refinement_iterations > 1)
7660
466k
                                    ? pi1_pred_dir_to_ref_idx[i4_refine_iter_ctr]
7661
466k
                                    : u1_default_ref_id);
7662
7663
466k
                            center_x = ps_coloc_node->ps_mv->i2_mvx;
7664
466k
                            center_y = ps_coloc_node->ps_mv->i2_mvy;
7665
466k
                            center_ref_idx = ps_coloc_node->i1_ref_idx;
7666
466k
                        }
7667
7668
                        /* Full-Pel search */
7669
466k
                        {
7670
466k
                            S32 num_unique_nodes;
7671
7672
466k
                            memset(au4_unique_node_map, 0, sizeof(au4_unique_node_map));
7673
7674
466k
                            num_unique_nodes = hme_remove_duplicate_fpel_search_candidates(
7675
466k
                                as_unique_search_nodes,
7676
466k
                                s_search_prms_blk.ps_search_candts,
7677
466k
                                au4_unique_node_map,
7678
466k
                                pi1_pred_dir_to_ref_idx,
7679
466k
                                i4_num_srch_cands,
7680
466k
                                s_search_prms_blk.i4_num_init_candts,
7681
466k
                                i4_refine_iter_ctr,
7682
466k
                                i4_num_refinement_iterations,
7683
466k
                                i4_num_act_ref_l0,
7684
466k
                                center_ref_idx,
7685
466k
                                center_x,
7686
466k
                                center_y,
7687
466k
                                ps_ctxt->s_frm_prms.bidir_enabled,
7688
466k
                                e_me_quality_presets);
7689
7690
                            /*************************************************************************/
7691
                            /* This array stores the ids of the partitions whose                     */
7692
                            /* SADs are updated. Since the partitions whose SADs are updated may not */
7693
                            /* be in contiguous order, we supply another level of indirection.       */
7694
                            /*************************************************************************/
7695
466k
                            ps_fullpel_refine_ctxt->i4_num_valid_parts = hme_create_valid_part_ids(
7696
466k
                                s_search_prms_blk.i4_part_mask,
7697
466k
                                &ps_fullpel_refine_ctxt->ai4_part_id[0]);
7698
7699
466k
                            if(!i4_refine_iter_ctr && !u1_pred_dir_ctr && u1_is_cu_noisy)
7700
0
                            {
7701
0
                                S32 i;
7702
                                /*i4_sigma_array_offset : takes care of pointing to the appropriate 4x4 block's sigmaX and sigmaX-squared value in a CTB out of 256 values*/
7703
0
                                S32 i4_sigma_array_offset = (s_search_prms_blk.i4_cu_x_off / 4) +
7704
0
                                                            (s_search_prms_blk.i4_cu_y_off * 4);
7705
7706
0
                                for(i = 0; i < ps_fullpel_refine_ctxt->i4_num_valid_parts; i++)
7707
0
                                {
7708
0
                                    S32 i4_part_id = ps_fullpel_refine_ctxt->ai4_part_id[i];
7709
7710
0
                                    hme_compute_final_sigma_of_pu_from_base_blocks(
7711
0
                                        ps_ctxt->au4_4x4_src_sigmaX + i4_sigma_array_offset,
7712
0
                                        ps_ctxt->au4_4x4_src_sigmaXSquared + i4_sigma_array_offset,
7713
0
                                        au8_final_src_sigmaX,
7714
0
                                        au8_final_src_sigmaXSquared,
7715
0
                                        16,
7716
0
                                        4,
7717
0
                                        i4_part_id,
7718
0
                                        16);
7719
0
                                }
7720
7721
0
                                s_common_frm_prms.pu8_part_src_sigmaX = au8_final_src_sigmaX;
7722
0
                                s_common_frm_prms.pu8_part_src_sigmaXSquared =
7723
0
                                    au8_final_src_sigmaXSquared;
7724
7725
0
                                s_search_prms_blk.pu8_part_src_sigmaX = au8_final_src_sigmaX;
7726
0
                                s_search_prms_blk.pu8_part_src_sigmaXSquared =
7727
0
                                    au8_final_src_sigmaXSquared;
7728
0
                            }
7729
7730
466k
                            if(0 == num_unique_nodes)
7731
11.7k
                            {
7732
11.7k
                                continue;
7733
11.7k
                            }
7734
7735
454k
                            if(num_unique_nodes >= 2)
7736
313k
                            {
7737
313k
                                s_search_prms_blk.ps_search_nodes = &as_unique_search_nodes[0];
7738
313k
                                s_search_prms_blk.i4_num_search_nodes = num_unique_nodes;
7739
313k
                                if(ps_ctxt->i4_pic_type != IV_P_FRAME)
7740
76.4k
                                {
7741
76.4k
                                    if(ps_ctxt->i4_temporal_layer == 1)
7742
29.2k
                                    {
7743
29.2k
                                        hme_fullpel_cand_sifter(
7744
29.2k
                                            &s_search_prms_blk,
7745
29.2k
                                            ps_curr_layer,
7746
29.2k
                                            &ps_ctxt->s_wt_pred,
7747
29.2k
                                            ALPHA_FOR_NOISE_TERM_IN_ME,
7748
29.2k
                                            u1_is_cu_noisy,
7749
29.2k
                                            ps_me_optimised_function_list);
7750
29.2k
                                    }
7751
47.2k
                                    else
7752
47.2k
                                    {
7753
47.2k
                                        hme_fullpel_cand_sifter(
7754
47.2k
                                            &s_search_prms_blk,
7755
47.2k
                                            ps_curr_layer,
7756
47.2k
                                            &ps_ctxt->s_wt_pred,
7757
47.2k
                                            ALPHA_FOR_NOISE_TERM_IN_ME,
7758
47.2k
                                            u1_is_cu_noisy,
7759
47.2k
                                            ps_me_optimised_function_list);
7760
47.2k
                                    }
7761
76.4k
                                }
7762
236k
                                else
7763
236k
                                {
7764
236k
                                    hme_fullpel_cand_sifter(
7765
236k
                                        &s_search_prms_blk,
7766
236k
                                        ps_curr_layer,
7767
236k
                                        &ps_ctxt->s_wt_pred,
7768
236k
                                        ALPHA_FOR_NOISE_TERM_IN_ME_P,
7769
236k
                                        u1_is_cu_noisy,
7770
236k
                                        ps_me_optimised_function_list);
7771
236k
                                }
7772
313k
                            }
7773
7774
454k
                            s_search_prms_blk.ps_search_nodes = &as_unique_search_nodes[0];
7775
7776
454k
                            hme_fullpel_refine(
7777
454k
                                ps_refine_prms,
7778
454k
                                &s_search_prms_blk,
7779
454k
                                ps_curr_layer,
7780
454k
                                &ps_ctxt->s_wt_pred,
7781
454k
                                au4_unique_node_map,
7782
454k
                                num_unique_nodes,
7783
454k
                                blk_8x8_mask,
7784
454k
                                center_x,
7785
454k
                                center_y,
7786
454k
                                center_ref_idx,
7787
454k
                                e_me_quality_presets,
7788
454k
                                ps_me_optimised_function_list);
7789
454k
                        }
7790
7791
                        /* Sub-Pel search */
7792
0
                        {
7793
454k
                            hme_reset_wkg_mem(&ps_ctxt->s_buf_mgr);
7794
7795
454k
                            s_subpel_prms.pu1_wkg_mem = (U08 *)hme_get_wkg_mem(
7796
454k
                                &ps_ctxt->s_buf_mgr,
7797
454k
                                INTERP_INTERMED_BUF_SIZE + INTERP_OUT_BUF_SIZE);
7798
                            /* MV limit is different based on ref. PIC */
7799
1.39M
                            for(ref_ctr = 0; ref_ctr < num_act_ref_pics; ref_ctr++)
7800
937k
                            {
7801
937k
                                SCALE_RANGE_PRMS(
7802
937k
                                    as_range_prms_hpel[ref_ctr], as_range_prms_rec[ref_ctr], 1);
7803
937k
                                SCALE_RANGE_PRMS(
7804
937k
                                    as_range_prms_qpel[ref_ctr], as_range_prms_rec[ref_ctr], 2);
7805
937k
                            }
7806
454k
                            s_subpel_prms.i4_ctb_x_off = i4_ctb_x << 6;
7807
454k
                            s_subpel_prms.i4_ctb_y_off = i4_ctb_y << 6;
7808
7809
454k
                            hme_subpel_refine_cu_hs(
7810
454k
                                &s_subpel_prms,
7811
454k
                                ps_curr_layer,
7812
454k
                                ps_search_results,
7813
454k
                                u1_pred_dir,
7814
454k
                                &ps_ctxt->s_wt_pred,
7815
454k
                                blk_8x8_mask,
7816
454k
                                ps_ctxt->ps_func_selector,
7817
454k
                                ps_cmn_utils_optimised_function_list,
7818
454k
                                ps_me_optimised_function_list);
7819
454k
                        }
7820
454k
                    }
7821
324k
                }
7822
                /* Populate the new PU struct with the results post subpel refinement*/
7823
274k
                {
7824
274k
                    inter_cu_results_t *ps_cu_results;
7825
274k
                    WORD32 best_inter_cost, intra_cost, posx, posy;
7826
7827
274k
                    UWORD8 intra_8x8_enabled = 0;
7828
7829
                    /*  cost of 16x16 cu parent  */
7830
274k
                    WORD32 parent_cost = MAX_32BIT_VAL;
7831
7832
                    /*  cost of 8x8 cu children  */
7833
                    /*********************************************************************/
7834
                    /* Assuming parent is not split, then we signal 1 bit for this parent*/
7835
                    /* CU. If split, then 1 bit for parent CU + 4 bits for each child CU */
7836
                    /* So, 4*lambda is extra for children cost.                          */
7837
                    /*********************************************************************/
7838
274k
                    WORD32 child_cost = 0;
7839
7840
274k
                    ps_cu_results = ps_search_results->ps_cu_results;
7841
7842
                    /* Initialize the pu_results pointers to the first struct in the stack array */
7843
274k
                    ps_pu_results = as_inter_pu_results;
7844
7845
274k
                    hme_reset_wkg_mem(&ps_ctxt->s_buf_mgr);
7846
7847
274k
                    hme_populate_pus(
7848
274k
                        ps_thrd_ctxt,
7849
274k
                        ps_ctxt,
7850
274k
                        &s_subpel_prms,
7851
274k
                        ps_search_results,
7852
274k
                        ps_cu_results,
7853
274k
                        ps_pu_results,
7854
274k
                        &(as_pu_results[0][0][0]),
7855
274k
                        &s_common_frm_prms,
7856
274k
                        &ps_ctxt->s_wt_pred,
7857
274k
                        ps_curr_layer,
7858
274k
                        au1_pred_dir_searched,
7859
274k
                        i4_num_pred_dir);
7860
7861
274k
                    ps_cu_results->i4_inp_offset =
7862
274k
                        (ps_cu_results->u1_x_off) + (ps_cu_results->u1_y_off * 64);
7863
7864
274k
                    hme_decide_part_types(
7865
274k
                        ps_cu_results,
7866
274k
                        ps_pu_results,
7867
274k
                        &s_common_frm_prms,
7868
274k
                        ps_ctxt,
7869
274k
                        ps_cmn_utils_optimised_function_list,
7870
274k
                        ps_me_optimised_function_list
7871
7872
274k
                    );
7873
7874
                    /* UPDATE the MIN and MAX MVs for Dynamical Search Range for each ref. pic. */
7875
                    /* Only for P pic. For P, both are 0, I&B has them mut. exclusive */
7876
274k
                    if(ps_ctxt->s_frm_prms.is_i_pic == ps_ctxt->s_frm_prms.bidir_enabled)
7877
223k
                    {
7878
223k
                        WORD32 res_ctr;
7879
7880
596k
                        for(res_ctr = 0; res_ctr < ps_cu_results->u1_num_best_results; res_ctr++)
7881
373k
                        {
7882
373k
                            WORD32 num_part = 2, part_ctr;
7883
373k
                            part_type_results_t *ps_best_results =
7884
373k
                                &ps_cu_results->ps_best_results[res_ctr];
7885
7886
373k
                            if(PRT_2Nx2N == ps_best_results->u1_part_type)
7887
214k
                                num_part = 1;
7888
7889
906k
                            for(part_ctr = 0; part_ctr < num_part; part_ctr++)
7890
533k
                            {
7891
533k
                                pu_result_t *ps_pu_results =
7892
533k
                                    &ps_best_results->as_pu_results[part_ctr];
7893
7894
533k
                                ASSERT(PRED_L0 == ps_pu_results->pu.b2_pred_mode);
7895
7896
533k
                                hme_update_dynamic_search_params(
7897
533k
                                    &ps_ctxt->as_l0_dyn_range_prms[i4_idx_dvsr_p]
7898
533k
                                         .as_dyn_range_prms[ps_pu_results->pu.mv.i1_l0_ref_idx],
7899
533k
                                    ps_pu_results->pu.mv.s_l0_mv.i2_mvy);
7900
7901
                                /* Sanity Check */
7902
533k
                                ASSERT(
7903
533k
                                    ps_pu_results->pu.mv.i1_l0_ref_idx <
7904
533k
                                    ps_ctxt->s_frm_prms.u1_num_active_ref_l0);
7905
7906
                                /* No L1 for P Pic. */
7907
533k
                                ASSERT(PRED_L1 != ps_pu_results->pu.b2_pred_mode);
7908
                                /* No BI for P Pic. */
7909
533k
                                ASSERT(PRED_BI != ps_pu_results->pu.b2_pred_mode);
7910
533k
                            }
7911
373k
                        }
7912
223k
                    }
7913
7914
                    /*****************************************************************/
7915
                    /* INSERT INTRA RESULTS AT 16x16 LEVEL.                          */
7916
                    /*****************************************************************/
7917
7918
274k
#if DISABLE_INTRA_IN_BPICS
7919
274k
                    if(1 != ((ME_XTREME_SPEED_25 == e_me_quality_presets) &&
7920
50.3k
                             (ps_ctxt->s_frm_prms.i4_temporal_layer_id > TEMPORAL_LAYER_DISABLE)))
7921
273k
#endif
7922
273k
                    {
7923
273k
                        if(!(DISABLE_INTRA_WHEN_NOISY && s_common_frm_prms.u1_is_cu_noisy))
7924
273k
                        {
7925
273k
                            hme_insert_intra_nodes_post_bipred(
7926
273k
                                ps_cu_results, ps_cur_ipe_ctb, ps_ctxt->frm_qstep);
7927
273k
                        }
7928
273k
                    }
7929
7930
274k
#if DISABLE_INTRA_IN_BPICS
7931
274k
                    if((ME_XTREME_SPEED_25 == e_me_quality_presets) &&
7932
50.3k
                       (ps_ctxt->s_frm_prms.i4_temporal_layer_id > TEMPORAL_LAYER_DISABLE))
7933
960
                    {
7934
960
                        intra_8x8_enabled = 0;
7935
960
                    }
7936
273k
                    else
7937
273k
#endif
7938
273k
                    {
7939
                        /*TRAQO intra flag updation*/
7940
273k
                        if(1 == ps_cu_results->ps_best_results->as_pu_results[0].pu.b1_intra_flag)
7941
17.1k
                        {
7942
17.1k
                            best_inter_cost =
7943
17.1k
                                ps_cu_results->ps_best_results->as_pu_results[1].i4_tot_cost;
7944
17.1k
                            intra_cost =
7945
17.1k
                                ps_cu_results->ps_best_results->as_pu_results[0].i4_tot_cost;
7946
                            /*@16x16 level*/
7947
17.1k
                            posx = (ps_cu_results->ps_best_results->as_pu_results[1].pu.b4_pos_x
7948
17.1k
                                    << 2) >>
7949
17.1k
                                   4;
7950
17.1k
                            posy = (ps_cu_results->ps_best_results->as_pu_results[1].pu.b4_pos_y
7951
17.1k
                                    << 2) >>
7952
17.1k
                                   4;
7953
17.1k
                        }
7954
255k
                        else
7955
255k
                        {
7956
255k
                            best_inter_cost =
7957
255k
                                ps_cu_results->ps_best_results->as_pu_results[0].i4_tot_cost;
7958
255k
                            posx = (ps_cu_results->ps_best_results->as_pu_results[0].pu.b4_pos_x
7959
255k
                                    << 2) >>
7960
255k
                                   3;
7961
255k
                            posy = (ps_cu_results->ps_best_results->as_pu_results[0].pu.b4_pos_y
7962
255k
                                    << 2) >>
7963
255k
                                   3;
7964
255k
                        }
7965
7966
                        /* Disable intra16/32/64 flags based on split flags recommended by IPE */
7967
273k
                        if(ps_cur_ipe_ctb->u1_split_flag)
7968
246k
                        {
7969
                            /* Id of the 32x32 block, 16x16 block in a CTB */
7970
246k
                            WORD32 i4_32x32_id =
7971
246k
                                (ps_cu_results->u1_y_off >> 5) * 2 + (ps_cu_results->u1_x_off >> 5);
7972
246k
                            WORD32 i4_16x16_id = ((ps_cu_results->u1_y_off >> 4) & 0x1) * 2 +
7973
246k
                                                 ((ps_cu_results->u1_x_off >> 4) & 0x1);
7974
7975
246k
                            if(ps_cur_ipe_ctb->as_intra32_analyse[i4_32x32_id].b1_split_flag)
7976
164k
                            {
7977
164k
                                if(ps_cur_ipe_ctb->as_intra32_analyse[i4_32x32_id]
7978
164k
                                       .as_intra16_analyse[i4_16x16_id]
7979
164k
                                       .b1_split_flag)
7980
81.1k
                                {
7981
81.1k
                                    intra_8x8_enabled =
7982
81.1k
                                        ps_cur_ipe_ctb->as_intra32_analyse[i4_32x32_id]
7983
81.1k
                                            .as_intra16_analyse[i4_16x16_id]
7984
81.1k
                                            .as_intra8_analyse[0]
7985
81.1k
                                            .b1_valid_cu;
7986
81.1k
                                    intra_8x8_enabled &=
7987
81.1k
                                        ps_cur_ipe_ctb->as_intra32_analyse[i4_32x32_id]
7988
81.1k
                                            .as_intra16_analyse[i4_16x16_id]
7989
81.1k
                                            .as_intra8_analyse[1]
7990
81.1k
                                            .b1_valid_cu;
7991
81.1k
                                    intra_8x8_enabled &=
7992
81.1k
                                        ps_cur_ipe_ctb->as_intra32_analyse[i4_32x32_id]
7993
81.1k
                                            .as_intra16_analyse[i4_16x16_id]
7994
81.1k
                                            .as_intra8_analyse[2]
7995
81.1k
                                            .b1_valid_cu;
7996
81.1k
                                    intra_8x8_enabled &=
7997
81.1k
                                        ps_cur_ipe_ctb->as_intra32_analyse[i4_32x32_id]
7998
81.1k
                                            .as_intra16_analyse[i4_16x16_id]
7999
81.1k
                                            .as_intra8_analyse[3]
8000
81.1k
                                            .b1_valid_cu;
8001
81.1k
                                }
8002
164k
                            }
8003
246k
                        }
8004
273k
                    }
8005
8006
274k
                    if(blk_8x8_mask == 0xf)
8007
264k
                    {
8008
264k
                        parent_cost =
8009
264k
                            ps_search_results->ps_cu_results->ps_best_results[0].i4_tot_cost;
8010
264k
                        ps_search_results->u1_split_flag = 0;
8011
264k
                    }
8012
9.52k
                    else
8013
9.52k
                    {
8014
9.52k
                        ps_search_results->u1_split_flag = 1;
8015
9.52k
                    }
8016
8017
274k
                    ps_cu_results = &ps_ctxt->as_cu8x8_results[blk_id_in_full_ctb << 2];
8018
8019
274k
                    if(s_common_frm_prms.u1_is_cu_noisy)
8020
0
                    {
8021
0
                        intra_8x8_enabled = 0;
8022
0
                    }
8023
8024
                    /* Evalaute 8x8 if NxN part id is enabled */
8025
274k
                    if((ps_search_results->i4_part_mask & ENABLE_NxN) || intra_8x8_enabled)
8026
139k
                    {
8027
                        /* Populates the PU's for the 4 8x8's in one call */
8028
139k
                        hme_populate_pus_8x8_cu(
8029
139k
                            ps_thrd_ctxt,
8030
139k
                            ps_ctxt,
8031
139k
                            &s_subpel_prms,
8032
139k
                            ps_search_results,
8033
139k
                            ps_cu_results,
8034
139k
                            ps_pu_results,
8035
139k
                            &(as_pu_results[0][0][0]),
8036
139k
                            &s_common_frm_prms,
8037
139k
                            au1_pred_dir_searched,
8038
139k
                            i4_num_pred_dir,
8039
139k
                            blk_8x8_mask);
8040
8041
                        /* Re-initialize the pu_results pointers to the first struct in the stack array */
8042
139k
                        ps_pu_results = as_inter_pu_results;
8043
8044
695k
                        for(i = 0; i < 4; i++)
8045
556k
                        {
8046
556k
                            if((blk_8x8_mask & (1 << i)))
8047
537k
                            {
8048
537k
                                if(ps_cu_results->i4_part_mask)
8049
482k
                                {
8050
482k
                                    hme_decide_part_types(
8051
482k
                                        ps_cu_results,
8052
482k
                                        ps_pu_results,
8053
482k
                                        &s_common_frm_prms,
8054
482k
                                        ps_ctxt,
8055
482k
                                        ps_cmn_utils_optimised_function_list,
8056
482k
                                        ps_me_optimised_function_list
8057
8058
482k
                                    );
8059
482k
                                }
8060
                                /*****************************************************************/
8061
                                /* INSERT INTRA RESULTS AT 8x8 LEVEL.                          */
8062
                                /*****************************************************************/
8063
537k
#if DISABLE_INTRA_IN_BPICS
8064
537k
                                if(1 != ((ME_XTREME_SPEED_25 == e_me_quality_presets) &&
8065
54.8k
                                         (ps_ctxt->s_frm_prms.i4_temporal_layer_id >
8066
54.8k
                                          TEMPORAL_LAYER_DISABLE)))
8067
536k
#endif
8068
536k
                                {
8069
536k
                                    if(!(DISABLE_INTRA_WHEN_NOISY &&
8070
0
                                         s_common_frm_prms.u1_is_cu_noisy))
8071
536k
                                    {
8072
536k
                                        hme_insert_intra_nodes_post_bipred(
8073
536k
                                            ps_cu_results, ps_cur_ipe_ctb, ps_ctxt->frm_qstep);
8074
536k
                                    }
8075
536k
                                }
8076
8077
537k
                                child_cost += ps_cu_results->ps_best_results[0].i4_tot_cost;
8078
537k
                            }
8079
8080
556k
                            ps_cu_results++;
8081
556k
                            ps_pu_results++;
8082
556k
                        }
8083
8084
                        /* Compare 16x16 vs 8x8 cost */
8085
139k
                        if(child_cost < parent_cost)
8086
63.6k
                        {
8087
63.6k
                            ps_search_results->best_cu_cost = child_cost;
8088
63.6k
                            ps_search_results->u1_split_flag = 1;
8089
63.6k
                        }
8090
139k
                    }
8091
274k
                }
8092
8093
274k
                hme_update_mv_bank_encode(
8094
274k
                    ps_search_results,
8095
274k
                    ps_curr_layer->ps_layer_mvbank,
8096
274k
                    blk_x,
8097
274k
                    blk_y,
8098
274k
                    &s_mv_update_prms,
8099
274k
                    au1_pred_dir_searched,
8100
274k
                    i4_num_act_ref_l0);
8101
8102
                /*********************************************************************/
8103
                /* Map the best results to an MV Grid. This is a 18x18 grid that is  */
8104
                /* useful for doing things like predictor for cost calculation or    */
8105
                /* also for merge calculations if need be.                           */
8106
                /*********************************************************************/
8107
274k
                hme_map_mvs_to_grid(
8108
274k
                    &aps_mv_grid[0], ps_search_results, au1_pred_dir_searched, i4_num_pred_dir);
8109
274k
            }
8110
8111
            /* Set the CU tree nodes appropriately */
8112
19.2k
            if(e_me_quality_presets != ME_PRISTINE_QUALITY)
8113
15.5k
            {
8114
15.5k
                WORD32 i, j;
8115
8116
265k
                for(i = 0; i < 16; i++)
8117
249k
                {
8118
249k
                    cur_ctb_cu_tree_t *ps_tree_node =
8119
249k
                        ps_ctxt->ps_cu_tree_curr_row + (i4_ctb_x * MAX_NUM_NODES_CU_TREE);
8120
249k
                    search_results_t *ps_results = &ps_ctxt->as_search_results_16x16[i];
8121
8122
249k
                    switch(i >> 2)
8123
249k
                    {
8124
62.3k
                    case 0:
8125
62.3k
                    {
8126
62.3k
                        ps_tree_node = ps_tree_node->ps_child_node_tl;
8127
8128
62.3k
                        break;
8129
0
                    }
8130
62.3k
                    case 1:
8131
62.3k
                    {
8132
62.3k
                        ps_tree_node = ps_tree_node->ps_child_node_tr;
8133
8134
62.3k
                        break;
8135
0
                    }
8136
62.3k
                    case 2:
8137
62.3k
                    {
8138
62.3k
                        ps_tree_node = ps_tree_node->ps_child_node_bl;
8139
8140
62.3k
                        break;
8141
0
                    }
8142
62.3k
                    case 3:
8143
62.3k
                    {
8144
62.3k
                        ps_tree_node = ps_tree_node->ps_child_node_br;
8145
8146
62.3k
                        break;
8147
0
                    }
8148
249k
                    }
8149
8150
249k
                    switch(i % 4)
8151
249k
                    {
8152
62.3k
                    case 0:
8153
62.3k
                    {
8154
62.3k
                        ps_tree_node = ps_tree_node->ps_child_node_tl;
8155
8156
62.3k
                        break;
8157
0
                    }
8158
62.3k
                    case 1:
8159
62.3k
                    {
8160
62.3k
                        ps_tree_node = ps_tree_node->ps_child_node_tr;
8161
8162
62.3k
                        break;
8163
0
                    }
8164
62.3k
                    case 2:
8165
62.3k
                    {
8166
62.3k
                        ps_tree_node = ps_tree_node->ps_child_node_bl;
8167
8168
62.3k
                        break;
8169
0
                    }
8170
62.3k
                    case 3:
8171
62.3k
                    {
8172
62.3k
                        ps_tree_node = ps_tree_node->ps_child_node_br;
8173
8174
62.3k
                        break;
8175
0
                    }
8176
249k
                    }
8177
8178
249k
                    if(ai4_blk_8x8_mask[i] == 15)
8179
216k
                    {
8180
216k
                        if(!ps_results->u1_split_flag)
8181
181k
                        {
8182
181k
                            ps_tree_node->is_node_valid = 1;
8183
181k
                            NULLIFY_THE_CHILDREN_NODES(ps_tree_node);
8184
181k
                        }
8185
35.7k
                        else
8186
35.7k
                        {
8187
35.7k
                            ps_tree_node->is_node_valid = 0;
8188
35.7k
                            ENABLE_THE_CHILDREN_NODES(ps_tree_node);
8189
35.7k
                        }
8190
216k
                    }
8191
32.5k
                    else
8192
32.5k
                    {
8193
32.5k
                        cur_ctb_cu_tree_t *ps_tree_child;
8194
8195
32.5k
                        ps_tree_node->is_node_valid = 0;
8196
8197
162k
                        for(j = 0; j < 4; j++)
8198
130k
                        {
8199
130k
                            switch(j)
8200
130k
                            {
8201
32.5k
                            case 0:
8202
32.5k
                            {
8203
32.5k
                                ps_tree_child = ps_tree_node->ps_child_node_tl;
8204
8205
32.5k
                                break;
8206
0
                            }
8207
32.5k
                            case 1:
8208
32.5k
                            {
8209
32.5k
                                ps_tree_child = ps_tree_node->ps_child_node_tr;
8210
8211
32.5k
                                break;
8212
0
                            }
8213
32.5k
                            case 2:
8214
32.5k
                            {
8215
32.5k
                                ps_tree_child = ps_tree_node->ps_child_node_bl;
8216
8217
32.5k
                                break;
8218
0
                            }
8219
32.5k
                            case 3:
8220
32.5k
                            {
8221
32.5k
                                ps_tree_child = ps_tree_node->ps_child_node_br;
8222
8223
32.5k
                                break;
8224
0
                            }
8225
130k
                            }
8226
8227
130k
                            ps_tree_child->is_node_valid = !!(ai4_blk_8x8_mask[i] & (1 << j));
8228
130k
                        }
8229
32.5k
                    }
8230
249k
                }
8231
15.5k
            }
8232
8233
19.2k
            if(ME_PRISTINE_QUALITY == e_me_quality_presets)
8234
3.61k
            {
8235
3.61k
                cur_ctb_cu_tree_t *ps_tree = ps_ctb_cluster_info->ps_cu_tree_root;
8236
8237
3.61k
                hme_analyse_mv_clustering(
8238
3.61k
                    ps_ctxt->as_search_results_16x16,
8239
3.61k
                    ps_ctxt->as_cu16x16_results,
8240
3.61k
                    ps_ctxt->as_cu8x8_results,
8241
3.61k
                    ps_ctxt->ps_ctb_cluster_info,
8242
3.61k
                    ps_ctxt->ai1_future_list,
8243
3.61k
                    ps_ctxt->ai1_past_list,
8244
3.61k
                    ps_ctxt->s_frm_prms.bidir_enabled,
8245
3.61k
                    e_me_quality_presets);
8246
8247
#if DISABLE_BLK_MERGE_WHEN_NOISY
8248
                ps_tree->ps_child_node_tl->is_node_valid = !au1_is_32x32Blk_noisy[0];
8249
                ps_tree->ps_child_node_tr->is_node_valid = !au1_is_32x32Blk_noisy[1];
8250
                ps_tree->ps_child_node_bl->is_node_valid = !au1_is_32x32Blk_noisy[2];
8251
                ps_tree->ps_child_node_br->is_node_valid = !au1_is_32x32Blk_noisy[3];
8252
                ps_tree->ps_child_node_tl->u1_inter_eval_enable = !au1_is_32x32Blk_noisy[0];
8253
                ps_tree->ps_child_node_tr->u1_inter_eval_enable = !au1_is_32x32Blk_noisy[1];
8254
                ps_tree->ps_child_node_bl->u1_inter_eval_enable = !au1_is_32x32Blk_noisy[2];
8255
                ps_tree->ps_child_node_br->u1_inter_eval_enable = !au1_is_32x32Blk_noisy[3];
8256
                ps_tree->is_node_valid = !au1_is_64x64Blk_noisy[0];
8257
                ps_tree->u1_inter_eval_enable = !au1_is_64x64Blk_noisy[0];
8258
#endif
8259
8260
3.61k
                en_merge_32x32 = (ps_tree->ps_child_node_tl->is_node_valid << 0) |
8261
3.61k
                                 (ps_tree->ps_child_node_tr->is_node_valid << 1) |
8262
3.61k
                                 (ps_tree->ps_child_node_bl->is_node_valid << 2) |
8263
3.61k
                                 (ps_tree->ps_child_node_br->is_node_valid << 3);
8264
8265
3.61k
                en_merge_execution = (ps_tree->ps_child_node_tl->u1_inter_eval_enable << 0) |
8266
3.61k
                                     (ps_tree->ps_child_node_tr->u1_inter_eval_enable << 1) |
8267
3.61k
                                     (ps_tree->ps_child_node_bl->u1_inter_eval_enable << 2) |
8268
3.61k
                                     (ps_tree->ps_child_node_br->u1_inter_eval_enable << 3) |
8269
3.61k
                                     (ps_tree->u1_inter_eval_enable << 4);
8270
3.61k
            }
8271
15.5k
            else
8272
15.5k
            {
8273
15.5k
                en_merge_execution = 0x1f;
8274
8275
#if DISABLE_BLK_MERGE_WHEN_NOISY
8276
                en_merge_32x32 = ((!au1_is_32x32Blk_noisy[0] << 0) & (en_merge_32x32 & 1)) |
8277
                                 ((!au1_is_32x32Blk_noisy[1] << 1) & (en_merge_32x32 & 2)) |
8278
                                 ((!au1_is_32x32Blk_noisy[2] << 2) & (en_merge_32x32 & 4)) |
8279
                                 ((!au1_is_32x32Blk_noisy[3] << 3) & (en_merge_32x32 & 8));
8280
#endif
8281
15.5k
            }
8282
8283
            /* Re-initialize the pu_results pointers to the first struct in the stack array */
8284
19.2k
            ps_pu_results = as_inter_pu_results;
8285
8286
19.2k
            {
8287
19.2k
                WORD32 ref_ctr;
8288
8289
19.2k
                s_ctb_prms.i4_ctb_x = i4_ctb_x << 6;
8290
19.2k
                s_ctb_prms.i4_ctb_y = i4_ctb_y << 6;
8291
8292
                /* MV limit is different based on ref. PIC */
8293
54.3k
                for(ref_ctr = 0; ref_ctr < num_act_ref_pics; ref_ctr++)
8294
35.1k
                {
8295
35.1k
                    SCALE_RANGE_PRMS(as_range_prms_hpel[ref_ctr], as_range_prms_rec[ref_ctr], 1);
8296
35.1k
                    SCALE_RANGE_PRMS(as_range_prms_qpel[ref_ctr], as_range_prms_rec[ref_ctr], 2);
8297
35.1k
                }
8298
8299
19.2k
                e_merge_result = CU_SPLIT;
8300
19.2k
                merge_count_32x32 = 0;
8301
8302
19.2k
                if((en_merge_32x32 & 1) && (en_merge_execution & 1))
8303
16.3k
                {
8304
16.3k
                    range_prms_t *ps_pic_limit;
8305
16.3k
                    if(s_merge_prms_32x32_tl.i4_use_rec == 1)
8306
16.3k
                    {
8307
16.3k
                        ps_pic_limit = &s_pic_limit_rec;
8308
16.3k
                    }
8309
0
                    else
8310
0
                    {
8311
0
                        ps_pic_limit = &s_pic_limit_inp;
8312
0
                    }
8313
                    /* MV limit is different based on ref. PIC */
8314
46.2k
                    for(ref_ctr = 0; ref_ctr < num_act_ref_pics; ref_ctr++)
8315
29.9k
                    {
8316
29.9k
                        hme_derive_search_range(
8317
29.9k
                            s_merge_prms_32x32_tl.aps_mv_range[ref_ctr],
8318
29.9k
                            ps_pic_limit,
8319
29.9k
                            &as_mv_limit[ref_ctr],
8320
29.9k
                            i4_ctb_x << 6,
8321
29.9k
                            i4_ctb_y << 6,
8322
29.9k
                            32,
8323
29.9k
                            32);
8324
8325
29.9k
                        SCALE_RANGE_PRMS_POINTERS(
8326
29.9k
                            s_merge_prms_32x32_tl.aps_mv_range[ref_ctr],
8327
29.9k
                            s_merge_prms_32x32_tl.aps_mv_range[ref_ctr],
8328
29.9k
                            2);
8329
29.9k
                    }
8330
16.3k
                    s_merge_prms_32x32_tl.i4_ctb_x_off = i4_ctb_x << 6;
8331
16.3k
                    s_merge_prms_32x32_tl.i4_ctb_y_off = i4_ctb_y << 6;
8332
16.3k
                    s_subpel_prms.u1_is_cu_noisy = au1_is_32x32Blk_noisy[0];
8333
8334
16.3k
                    e_merge_result = hme_try_merge_high_speed(
8335
16.3k
                        ps_thrd_ctxt,
8336
16.3k
                        ps_ctxt,
8337
16.3k
                        ps_cur_ipe_ctb,
8338
16.3k
                        &s_subpel_prms,
8339
16.3k
                        &s_merge_prms_32x32_tl,
8340
16.3k
                        ps_pu_results,
8341
16.3k
                        &as_pu_results[0][0][0]);
8342
8343
16.3k
                    if(e_merge_result == CU_MERGED)
8344
4.72k
                    {
8345
4.72k
                        inter_cu_results_t *ps_cu_results =
8346
4.72k
                            s_merge_prms_32x32_tl.ps_results_merge->ps_cu_results;
8347
8348
4.72k
                        if(!((ps_cu_results->u1_num_best_results == 1) &&
8349
778
                             (ps_cu_results->ps_best_results->as_pu_results->pu.b1_intra_flag)))
8350
4.53k
                        {
8351
4.53k
                            hme_map_mvs_to_grid(
8352
4.53k
                                &aps_mv_grid[0],
8353
4.53k
                                s_merge_prms_32x32_tl.ps_results_merge,
8354
4.53k
                                s_merge_prms_32x32_tl.au1_pred_dir_searched,
8355
4.53k
                                s_merge_prms_32x32_tl.i4_num_pred_dir_actual);
8356
4.53k
                        }
8357
8358
4.72k
                        if(ME_PRISTINE_QUALITY != e_me_quality_presets)
8359
4.06k
                        {
8360
4.06k
                            ps_ctxt->ps_cu_tree_curr_row[(i4_ctb_x * MAX_NUM_NODES_CU_TREE)]
8361
4.06k
                                .ps_child_node_tl->is_node_valid = 1;
8362
4.06k
                            NULLIFY_THE_CHILDREN_NODES(
8363
4.06k
                                ps_ctxt->ps_cu_tree_curr_row[(i4_ctb_x * MAX_NUM_NODES_CU_TREE)]
8364
4.06k
                                    .ps_child_node_tl);
8365
4.06k
                        }
8366
8367
4.72k
                        merge_count_32x32++;
8368
4.72k
                        e_merge_result = CU_SPLIT;
8369
4.72k
                    }
8370
11.6k
                    else if(ME_PRISTINE_QUALITY == e_me_quality_presets)
8371
1.93k
                    {
8372
1.93k
#if ENABLE_CU_TREE_CULLING
8373
1.93k
                        cur_ctb_cu_tree_t *ps_tree =
8374
1.93k
                            ps_ctb_cluster_info->ps_cu_tree_root->ps_child_node_tl;
8375
8376
1.93k
                        ps_ctb_cluster_info->ps_cu_tree_root->is_node_valid = 0;
8377
1.93k
                        en_merge_execution = (en_merge_execution & (~(1 << 4)));
8378
1.93k
                        ENABLE_THE_CHILDREN_NODES(ps_tree);
8379
1.93k
                        ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_tl);
8380
1.93k
                        ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_tr);
8381
1.93k
                        ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_bl);
8382
1.93k
                        ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_br);
8383
1.93k
#endif
8384
1.93k
                    }
8385
16.3k
                }
8386
2.85k
                else if((en_merge_32x32 & 1) && (!(en_merge_execution & 1)))
8387
353
                {
8388
353
#if ENABLE_CU_TREE_CULLING
8389
353
                    cur_ctb_cu_tree_t *ps_tree =
8390
353
                        ps_ctb_cluster_info->ps_cu_tree_root->ps_child_node_tl;
8391
8392
353
                    ENABLE_THE_CHILDREN_NODES(ps_tree);
8393
353
                    ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_tl);
8394
353
                    ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_tr);
8395
353
                    ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_bl);
8396
353
                    ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_br);
8397
353
#endif
8398
8399
353
                    if(au1_is_32x32Blk_noisy[0] && DISABLE_INTRA_WHEN_NOISY)
8400
0
                    {
8401
0
                        ps_tree->is_node_valid = 0;
8402
0
                        ps_ctb_cluster_info->ps_cu_tree_root->is_node_valid = 0;
8403
0
                        en_merge_execution = (en_merge_execution & (~(1 << 4)));
8404
0
                    }
8405
353
                }
8406
8407
19.2k
                if((en_merge_32x32 & 2) && (en_merge_execution & 2))
8408
15.9k
                {
8409
15.9k
                    range_prms_t *ps_pic_limit;
8410
15.9k
                    if(s_merge_prms_32x32_tr.i4_use_rec == 1)
8411
15.9k
                    {
8412
15.9k
                        ps_pic_limit = &s_pic_limit_rec;
8413
15.9k
                    }
8414
0
                    else
8415
0
                    {
8416
0
                        ps_pic_limit = &s_pic_limit_inp;
8417
0
                    }
8418
                    /* MV limit is different based on ref. PIC */
8419
45.0k
                    for(ref_ctr = 0; ref_ctr < num_act_ref_pics; ref_ctr++)
8420
29.1k
                    {
8421
29.1k
                        hme_derive_search_range(
8422
29.1k
                            s_merge_prms_32x32_tr.aps_mv_range[ref_ctr],
8423
29.1k
                            ps_pic_limit,
8424
29.1k
                            &as_mv_limit[ref_ctr],
8425
29.1k
                            (i4_ctb_x << 6) + 32,
8426
29.1k
                            i4_ctb_y << 6,
8427
29.1k
                            32,
8428
29.1k
                            32);
8429
29.1k
                        SCALE_RANGE_PRMS_POINTERS(
8430
29.1k
                            s_merge_prms_32x32_tr.aps_mv_range[ref_ctr],
8431
29.1k
                            s_merge_prms_32x32_tr.aps_mv_range[ref_ctr],
8432
29.1k
                            2);
8433
29.1k
                    }
8434
15.9k
                    s_merge_prms_32x32_tr.i4_ctb_x_off = i4_ctb_x << 6;
8435
15.9k
                    s_merge_prms_32x32_tr.i4_ctb_y_off = i4_ctb_y << 6;
8436
15.9k
                    s_subpel_prms.u1_is_cu_noisy = au1_is_32x32Blk_noisy[1];
8437
8438
15.9k
                    e_merge_result = hme_try_merge_high_speed(
8439
15.9k
                        ps_thrd_ctxt,
8440
15.9k
                        ps_ctxt,
8441
15.9k
                        ps_cur_ipe_ctb,
8442
15.9k
                        &s_subpel_prms,
8443
15.9k
                        &s_merge_prms_32x32_tr,
8444
15.9k
                        ps_pu_results,
8445
15.9k
                        &as_pu_results[0][0][0]);
8446
8447
15.9k
                    if(e_merge_result == CU_MERGED)
8448
7.68k
                    {
8449
7.68k
                        inter_cu_results_t *ps_cu_results =
8450
7.68k
                            s_merge_prms_32x32_tr.ps_results_merge->ps_cu_results;
8451
8452
7.68k
                        if(!((ps_cu_results->u1_num_best_results == 1) &&
8453
1.42k
                             (ps_cu_results->ps_best_results->as_pu_results->pu.b1_intra_flag)))
8454
7.51k
                        {
8455
7.51k
                            hme_map_mvs_to_grid(
8456
7.51k
                                &aps_mv_grid[0],
8457
7.51k
                                s_merge_prms_32x32_tr.ps_results_merge,
8458
7.51k
                                s_merge_prms_32x32_tr.au1_pred_dir_searched,
8459
7.51k
                                s_merge_prms_32x32_tr.i4_num_pred_dir_actual);
8460
7.51k
                        }
8461
8462
7.68k
                        if(ME_PRISTINE_QUALITY != e_me_quality_presets)
8463
6.79k
                        {
8464
6.79k
                            ps_ctxt->ps_cu_tree_curr_row[(i4_ctb_x * MAX_NUM_NODES_CU_TREE)]
8465
6.79k
                                .ps_child_node_tr->is_node_valid = 1;
8466
6.79k
                            NULLIFY_THE_CHILDREN_NODES(
8467
6.79k
                                ps_ctxt->ps_cu_tree_curr_row[(i4_ctb_x * MAX_NUM_NODES_CU_TREE)]
8468
6.79k
                                    .ps_child_node_tr);
8469
6.79k
                        }
8470
8471
7.68k
                        merge_count_32x32++;
8472
7.68k
                        e_merge_result = CU_SPLIT;
8473
7.68k
                    }
8474
8.21k
                    else if(ME_PRISTINE_QUALITY == e_me_quality_presets)
8475
1.65k
                    {
8476
1.65k
#if ENABLE_CU_TREE_CULLING
8477
1.65k
                        cur_ctb_cu_tree_t *ps_tree =
8478
1.65k
                            ps_ctb_cluster_info->ps_cu_tree_root->ps_child_node_tr;
8479
8480
1.65k
                        ps_ctb_cluster_info->ps_cu_tree_root->is_node_valid = 0;
8481
1.65k
                        en_merge_execution = (en_merge_execution & (~(1 << 4)));
8482
1.65k
                        ENABLE_THE_CHILDREN_NODES(ps_tree);
8483
1.65k
                        ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_tl);
8484
1.65k
                        ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_tr);
8485
1.65k
                        ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_bl);
8486
1.65k
                        ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_br);
8487
1.65k
#endif
8488
1.65k
                    }
8489
15.9k
                }
8490
3.30k
                else if((en_merge_32x32 & 2) && (!(en_merge_execution & 2)))
8491
363
                {
8492
363
#if ENABLE_CU_TREE_CULLING
8493
363
                    cur_ctb_cu_tree_t *ps_tree =
8494
363
                        ps_ctb_cluster_info->ps_cu_tree_root->ps_child_node_tr;
8495
8496
363
                    ENABLE_THE_CHILDREN_NODES(ps_tree);
8497
363
                    ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_tl);
8498
363
                    ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_tr);
8499
363
                    ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_bl);
8500
363
                    ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_br);
8501
363
#endif
8502
8503
363
                    if(au1_is_32x32Blk_noisy[1] && DISABLE_INTRA_WHEN_NOISY)
8504
0
                    {
8505
0
                        ps_tree->is_node_valid = 0;
8506
0
                        ps_ctb_cluster_info->ps_cu_tree_root->is_node_valid = 0;
8507
0
                        en_merge_execution = (en_merge_execution & (~(1 << 4)));
8508
0
                    }
8509
363
                }
8510
8511
19.2k
                if((en_merge_32x32 & 4) && (en_merge_execution & 4))
8512
15.8k
                {
8513
15.8k
                    range_prms_t *ps_pic_limit;
8514
15.8k
                    if(s_merge_prms_32x32_bl.i4_use_rec == 1)
8515
15.8k
                    {
8516
15.8k
                        ps_pic_limit = &s_pic_limit_rec;
8517
15.8k
                    }
8518
0
                    else
8519
0
                    {
8520
0
                        ps_pic_limit = &s_pic_limit_inp;
8521
0
                    }
8522
                    /* MV limit is different based on ref. PIC */
8523
44.7k
                    for(ref_ctr = 0; ref_ctr < num_act_ref_pics; ref_ctr++)
8524
28.9k
                    {
8525
28.9k
                        hme_derive_search_range(
8526
28.9k
                            s_merge_prms_32x32_bl.aps_mv_range[ref_ctr],
8527
28.9k
                            ps_pic_limit,
8528
28.9k
                            &as_mv_limit[ref_ctr],
8529
28.9k
                            i4_ctb_x << 6,
8530
28.9k
                            (i4_ctb_y << 6) + 32,
8531
28.9k
                            32,
8532
28.9k
                            32);
8533
28.9k
                        SCALE_RANGE_PRMS_POINTERS(
8534
28.9k
                            s_merge_prms_32x32_bl.aps_mv_range[ref_ctr],
8535
28.9k
                            s_merge_prms_32x32_bl.aps_mv_range[ref_ctr],
8536
28.9k
                            2);
8537
28.9k
                    }
8538
15.8k
                    s_merge_prms_32x32_bl.i4_ctb_x_off = i4_ctb_x << 6;
8539
15.8k
                    s_merge_prms_32x32_bl.i4_ctb_y_off = i4_ctb_y << 6;
8540
15.8k
                    s_subpel_prms.u1_is_cu_noisy = au1_is_32x32Blk_noisy[2];
8541
8542
15.8k
                    e_merge_result = hme_try_merge_high_speed(
8543
15.8k
                        ps_thrd_ctxt,
8544
15.8k
                        ps_ctxt,
8545
15.8k
                        ps_cur_ipe_ctb,
8546
15.8k
                        &s_subpel_prms,
8547
15.8k
                        &s_merge_prms_32x32_bl,
8548
15.8k
                        ps_pu_results,
8549
15.8k
                        &as_pu_results[0][0][0]);
8550
8551
15.8k
                    if(e_merge_result == CU_MERGED)
8552
9.91k
                    {
8553
9.91k
                        inter_cu_results_t *ps_cu_results =
8554
9.91k
                            s_merge_prms_32x32_bl.ps_results_merge->ps_cu_results;
8555
8556
9.91k
                        if(!((ps_cu_results->u1_num_best_results == 1) &&
8557
1.28k
                             (ps_cu_results->ps_best_results->as_pu_results->pu.b1_intra_flag)))
8558
9.68k
                        {
8559
9.68k
                            hme_map_mvs_to_grid(
8560
9.68k
                                &aps_mv_grid[0],
8561
9.68k
                                s_merge_prms_32x32_bl.ps_results_merge,
8562
9.68k
                                s_merge_prms_32x32_bl.au1_pred_dir_searched,
8563
9.68k
                                s_merge_prms_32x32_bl.i4_num_pred_dir_actual);
8564
9.68k
                        }
8565
8566
9.91k
                        if(ME_PRISTINE_QUALITY != e_me_quality_presets)
8567
8.90k
                        {
8568
8.90k
                            ps_ctxt->ps_cu_tree_curr_row[(i4_ctb_x * MAX_NUM_NODES_CU_TREE)]
8569
8.90k
                                .ps_child_node_bl->is_node_valid = 1;
8570
8.90k
                            NULLIFY_THE_CHILDREN_NODES(
8571
8.90k
                                ps_ctxt->ps_cu_tree_curr_row[(i4_ctb_x * MAX_NUM_NODES_CU_TREE)]
8572
8.90k
                                    .ps_child_node_bl);
8573
8.90k
                        }
8574
8575
9.91k
                        merge_count_32x32++;
8576
9.91k
                        e_merge_result = CU_SPLIT;
8577
9.91k
                    }
8578
5.89k
                    else if(ME_PRISTINE_QUALITY == e_me_quality_presets)
8579
1.58k
                    {
8580
1.58k
#if ENABLE_CU_TREE_CULLING
8581
1.58k
                        cur_ctb_cu_tree_t *ps_tree =
8582
1.58k
                            ps_ctb_cluster_info->ps_cu_tree_root->ps_child_node_bl;
8583
8584
1.58k
                        ps_ctb_cluster_info->ps_cu_tree_root->is_node_valid = 0;
8585
1.58k
                        en_merge_execution = (en_merge_execution & (~(1 << 4)));
8586
1.58k
                        ENABLE_THE_CHILDREN_NODES(ps_tree);
8587
1.58k
                        ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_tl);
8588
1.58k
                        ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_tr);
8589
1.58k
                        ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_bl);
8590
1.58k
                        ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_br);
8591
1.58k
#endif
8592
1.58k
                    }
8593
15.8k
                }
8594
3.40k
                else if((en_merge_32x32 & 4) && (!(en_merge_execution & 4)))
8595
318
                {
8596
318
#if ENABLE_CU_TREE_CULLING
8597
318
                    cur_ctb_cu_tree_t *ps_tree =
8598
318
                        ps_ctb_cluster_info->ps_cu_tree_root->ps_child_node_bl;
8599
8600
318
                    ENABLE_THE_CHILDREN_NODES(ps_tree);
8601
318
                    ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_tl);
8602
318
                    ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_tr);
8603
318
                    ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_bl);
8604
318
                    ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_br);
8605
318
#endif
8606
8607
318
                    if(au1_is_32x32Blk_noisy[2] && DISABLE_INTRA_WHEN_NOISY)
8608
0
                    {
8609
0
                        ps_tree->is_node_valid = 0;
8610
0
                        ps_ctb_cluster_info->ps_cu_tree_root->is_node_valid = 0;
8611
0
                        en_merge_execution = (en_merge_execution & (~(1 << 4)));
8612
0
                    }
8613
318
                }
8614
8615
19.2k
                if((en_merge_32x32 & 8) && (en_merge_execution & 8))
8616
15.3k
                {
8617
15.3k
                    range_prms_t *ps_pic_limit;
8618
15.3k
                    if(s_merge_prms_32x32_br.i4_use_rec == 1)
8619
15.3k
                    {
8620
15.3k
                        ps_pic_limit = &s_pic_limit_rec;
8621
15.3k
                    }
8622
0
                    else
8623
0
                    {
8624
0
                        ps_pic_limit = &s_pic_limit_inp;
8625
0
                    }
8626
                    /* MV limit is different based on ref. PIC */
8627
43.6k
                    for(ref_ctr = 0; ref_ctr < num_act_ref_pics; ref_ctr++)
8628
28.2k
                    {
8629
28.2k
                        hme_derive_search_range(
8630
28.2k
                            s_merge_prms_32x32_br.aps_mv_range[ref_ctr],
8631
28.2k
                            ps_pic_limit,
8632
28.2k
                            &as_mv_limit[ref_ctr],
8633
28.2k
                            (i4_ctb_x << 6) + 32,
8634
28.2k
                            (i4_ctb_y << 6) + 32,
8635
28.2k
                            32,
8636
28.2k
                            32);
8637
8638
28.2k
                        SCALE_RANGE_PRMS_POINTERS(
8639
28.2k
                            s_merge_prms_32x32_br.aps_mv_range[ref_ctr],
8640
28.2k
                            s_merge_prms_32x32_br.aps_mv_range[ref_ctr],
8641
28.2k
                            2);
8642
28.2k
                    }
8643
15.3k
                    s_merge_prms_32x32_br.i4_ctb_x_off = i4_ctb_x << 6;
8644
15.3k
                    s_merge_prms_32x32_br.i4_ctb_y_off = i4_ctb_y << 6;
8645
15.3k
                    s_subpel_prms.u1_is_cu_noisy = au1_is_32x32Blk_noisy[3];
8646
8647
15.3k
                    e_merge_result = hme_try_merge_high_speed(
8648
15.3k
                        ps_thrd_ctxt,
8649
15.3k
                        ps_ctxt,
8650
15.3k
                        ps_cur_ipe_ctb,
8651
15.3k
                        &s_subpel_prms,
8652
15.3k
                        &s_merge_prms_32x32_br,
8653
15.3k
                        ps_pu_results,
8654
15.3k
                        &as_pu_results[0][0][0]);
8655
8656
15.3k
                    if(e_merge_result == CU_MERGED)
8657
11.4k
                    {
8658
                        /*inter_cu_results_t *ps_cu_results = s_merge_prms_32x32_br.ps_results_merge->ps_cu_results;
8659
8660
                        if(!((ps_cu_results->u1_num_best_results == 1) &&
8661
                        (ps_cu_results->ps_best_results->as_pu_results->pu.b1_intra_flag)))
8662
                        {
8663
                        hme_map_mvs_to_grid
8664
                        (
8665
                        &aps_mv_grid[0],
8666
                        s_merge_prms_32x32_br.ps_results_merge,
8667
                        s_merge_prms_32x32_br.au1_pred_dir_searched,
8668
                        s_merge_prms_32x32_br.i4_num_pred_dir_actual
8669
                        );
8670
                        }*/
8671
8672
11.4k
                        if(ME_PRISTINE_QUALITY != e_me_quality_presets)
8673
10.3k
                        {
8674
10.3k
                            ps_ctxt->ps_cu_tree_curr_row[(i4_ctb_x * MAX_NUM_NODES_CU_TREE)]
8675
10.3k
                                .ps_child_node_br->is_node_valid = 1;
8676
10.3k
                            NULLIFY_THE_CHILDREN_NODES(
8677
10.3k
                                ps_ctxt->ps_cu_tree_curr_row[(i4_ctb_x * MAX_NUM_NODES_CU_TREE)]
8678
10.3k
                                    .ps_child_node_br);
8679
10.3k
                        }
8680
8681
11.4k
                        merge_count_32x32++;
8682
11.4k
                        e_merge_result = CU_SPLIT;
8683
11.4k
                    }
8684
3.90k
                    else if(ME_PRISTINE_QUALITY == e_me_quality_presets)
8685
1.38k
                    {
8686
1.38k
#if ENABLE_CU_TREE_CULLING
8687
1.38k
                        cur_ctb_cu_tree_t *ps_tree =
8688
1.38k
                            ps_ctb_cluster_info->ps_cu_tree_root->ps_child_node_br;
8689
8690
1.38k
                        ps_ctb_cluster_info->ps_cu_tree_root->is_node_valid = 0;
8691
1.38k
                        en_merge_execution = (en_merge_execution & (~(1 << 4)));
8692
1.38k
                        ENABLE_THE_CHILDREN_NODES(ps_tree);
8693
1.38k
                        ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_tl);
8694
1.38k
                        ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_tr);
8695
1.38k
                        ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_bl);
8696
1.38k
                        ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_br);
8697
1.38k
#endif
8698
1.38k
                    }
8699
15.3k
                }
8700
3.83k
                else if((en_merge_32x32 & 8) && (!(en_merge_execution & 8)))
8701
313
                {
8702
313
#if ENABLE_CU_TREE_CULLING
8703
313
                    cur_ctb_cu_tree_t *ps_tree =
8704
313
                        ps_ctb_cluster_info->ps_cu_tree_root->ps_child_node_br;
8705
8706
313
                    ENABLE_THE_CHILDREN_NODES(ps_tree);
8707
313
                    ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_tl);
8708
313
                    ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_tr);
8709
313
                    ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_bl);
8710
313
                    ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_br);
8711
313
#endif
8712
8713
313
                    if(au1_is_32x32Blk_noisy[3] && DISABLE_INTRA_WHEN_NOISY)
8714
0
                    {
8715
0
                        ps_tree->is_node_valid = 0;
8716
0
                        ps_ctb_cluster_info->ps_cu_tree_root->is_node_valid = 0;
8717
0
                        en_merge_execution = (en_merge_execution & (~(1 << 4)));
8718
0
                    }
8719
313
                }
8720
8721
                /* Try merging all 32x32 to 64x64 candts */
8722
19.2k
                if(((en_merge_32x32 & 0xf) == 0xf) &&
8723
15.6k
                   (((merge_count_32x32 == 4) && (e_me_quality_presets != ME_PRISTINE_QUALITY)) ||
8724
12.8k
                    ((en_merge_execution & 16) && (e_me_quality_presets == ME_PRISTINE_QUALITY))))
8725
3.25k
                    if((((e_me_quality_presets == ME_XTREME_SPEED_25) &&
8726
425
                         !DISABLE_64X64_BLOCK_MERGE_IN_ME_IN_XS25) ||
8727
3.25k
                        (e_me_quality_presets != ME_XTREME_SPEED_25)))
8728
2.83k
                    {
8729
2.83k
                        range_prms_t *ps_pic_limit;
8730
2.83k
                        if(s_merge_prms_64x64.i4_use_rec == 1)
8731
2.83k
                        {
8732
2.83k
                            ps_pic_limit = &s_pic_limit_rec;
8733
2.83k
                        }
8734
0
                        else
8735
0
                        {
8736
0
                            ps_pic_limit = &s_pic_limit_inp;
8737
0
                        }
8738
                        /* MV limit is different based on ref. PIC */
8739
8.43k
                        for(ref_ctr = 0; ref_ctr < num_act_ref_pics; ref_ctr++)
8740
5.60k
                        {
8741
5.60k
                            hme_derive_search_range(
8742
5.60k
                                s_merge_prms_64x64.aps_mv_range[ref_ctr],
8743
5.60k
                                ps_pic_limit,
8744
5.60k
                                &as_mv_limit[ref_ctr],
8745
5.60k
                                i4_ctb_x << 6,
8746
5.60k
                                i4_ctb_y << 6,
8747
5.60k
                                64,
8748
5.60k
                                64);
8749
8750
5.60k
                            SCALE_RANGE_PRMS_POINTERS(
8751
5.60k
                                s_merge_prms_64x64.aps_mv_range[ref_ctr],
8752
5.60k
                                s_merge_prms_64x64.aps_mv_range[ref_ctr],
8753
5.60k
                                2);
8754
5.60k
                        }
8755
2.83k
                        s_merge_prms_64x64.i4_ctb_x_off = i4_ctb_x << 6;
8756
2.83k
                        s_merge_prms_64x64.i4_ctb_y_off = i4_ctb_y << 6;
8757
2.83k
                        s_subpel_prms.u1_is_cu_noisy = au1_is_64x64Blk_noisy[0];
8758
8759
2.83k
                        e_merge_result = hme_try_merge_high_speed(
8760
2.83k
                            ps_thrd_ctxt,
8761
2.83k
                            ps_ctxt,
8762
2.83k
                            ps_cur_ipe_ctb,
8763
2.83k
                            &s_subpel_prms,
8764
2.83k
                            &s_merge_prms_64x64,
8765
2.83k
                            ps_pu_results,
8766
2.83k
                            &as_pu_results[0][0][0]);
8767
8768
2.83k
                        if((e_merge_result == CU_MERGED) &&
8769
2.24k
                           (ME_PRISTINE_QUALITY != e_me_quality_presets))
8770
1.85k
                        {
8771
1.85k
                            ps_ctxt->ps_cu_tree_curr_row[(i4_ctb_x * MAX_NUM_NODES_CU_TREE)]
8772
1.85k
                                .is_node_valid = 1;
8773
1.85k
                            NULLIFY_THE_CHILDREN_NODES(
8774
1.85k
                                ps_ctxt->ps_cu_tree_curr_row + (i4_ctb_x * MAX_NUM_NODES_CU_TREE));
8775
1.85k
                        }
8776
979
                        else if(
8777
979
                            (e_merge_result == CU_SPLIT) &&
8778
585
                            (ME_PRISTINE_QUALITY == e_me_quality_presets))
8779
0
                        {
8780
0
                            ps_ctxt->ps_cu_tree_curr_row[(i4_ctb_x * MAX_NUM_NODES_CU_TREE)]
8781
0
                                .is_node_valid = 0;
8782
0
                        }
8783
2.83k
                    }
8784
8785
                /*****************************************************************/
8786
                /* UPDATION OF RESULT TO EXTERNAL STRUCTURES                     */
8787
                /*****************************************************************/
8788
19.2k
                pf_ext_update_fxn((void *)ps_thrd_ctxt, (void *)ps_ctxt, i4_ctb_x, i4_ctb_y);
8789
8790
19.2k
                {
8791
#ifdef _DEBUG
8792
                    S32 wd = ((i4_pic_wd - s_common_frm_prms.i4_ctb_x_off) >= 64)
8793
                                 ? 64
8794
                                 : i4_pic_wd - s_common_frm_prms.i4_ctb_x_off;
8795
                    S32 ht = ((i4_pic_ht - s_common_frm_prms.i4_ctb_y_off) >= 64)
8796
                                 ? 64
8797
                                 : i4_pic_ht - s_common_frm_prms.i4_ctb_y_off;
8798
                    ASSERT(
8799
                        (wd * ht) ==
8800
                        ihevce_compute_area_of_valid_cus_in_ctb(
8801
                            &ps_ctxt->ps_cu_tree_curr_row[(i4_ctb_x * MAX_NUM_NODES_CU_TREE)]));
8802
#endif
8803
19.2k
                }
8804
19.2k
            }
8805
8806
            /* set the dependency for the corresponding row in enc loop */
8807
19.2k
            ihevce_dmgr_set_row_row_sync(
8808
19.2k
                pv_dep_mngr_encloop_dep_me,
8809
19.2k
                (i4_ctb_x + 1),
8810
19.2k
                i4_ctb_y,
8811
19.2k
                tile_col_idx /* Col Tile No. */);
8812
8813
19.2k
            left_ctb_in_diff_tile = 0;
8814
19.2k
        }
8815
9.74k
    }
8816
6.36k
}
8817
8818
/**
8819
********************************************************************************
8820
*  @fn   void hme_refine_no_encode(coarse_me_ctxt_t *ps_ctxt,
8821
*                       refine_layer_prms_t *ps_refine_prms)
8822
*
8823
*  @brief  Top level entry point for refinement ME
8824
*
8825
*  @param[in,out]  ps_ctxt: ME Handle
8826
*
8827
*  @param[in]  ps_refine_prms : refinement layer prms
8828
*
8829
*  @return None
8830
********************************************************************************
8831
*/
8832
void hme_refine_no_encode(
8833
    coarse_me_ctxt_t *ps_ctxt,
8834
    refine_prms_t *ps_refine_prms,
8835
    multi_thrd_ctxt_t *ps_multi_thrd_ctxt,
8836
    S32 lyr_job_type,
8837
    WORD32 i4_ping_pong,
8838
    void **ppv_dep_mngr_hme_sync)
8839
9.62k
{
8840
9.62k
    BLK_SIZE_T e_search_blk_size, e_result_blk_size;
8841
9.62k
    ME_QUALITY_PRESETS_T e_me_quality_presets =
8842
9.62k
        ps_ctxt->s_init_prms.s_me_coding_tools.e_me_quality_presets;
8843
8844
    /*************************************************************************/
8845
    /* Complexity of search: Low to High                                     */
8846
    /*************************************************************************/
8847
9.62k
    SEARCH_COMPLEXITY_T e_search_complexity;
8848
8849
    /*************************************************************************/
8850
    /* Config parameter structures for varius ME submodules                  */
8851
    /*************************************************************************/
8852
9.62k
    hme_search_prms_t s_search_prms_blk;
8853
9.62k
    mvbank_update_prms_t s_mv_update_prms;
8854
8855
    /*************************************************************************/
8856
    /* All types of search candidates for predictor based search.            */
8857
    /*************************************************************************/
8858
9.62k
    S32 num_init_candts = 0;
8859
9.62k
    search_candt_t *ps_search_candts, as_search_candts[MAX_INIT_CANDTS];
8860
9.62k
    search_node_t as_top_neighbours[4], as_left_neighbours[3];
8861
9.62k
    search_node_t *ps_candt_zeromv, *ps_candt_tl, *ps_candt_tr;
8862
9.62k
    search_node_t *ps_candt_l, *ps_candt_t;
8863
9.62k
    search_node_t *ps_candt_prj_br[2], *ps_candt_prj_b[2], *ps_candt_prj_r[2];
8864
9.62k
    search_node_t *ps_candt_prj_bl[2];
8865
9.62k
    search_node_t *ps_candt_prj_tr[2], *ps_candt_prj_t[2], *ps_candt_prj_tl[2];
8866
9.62k
    search_node_t *ps_candt_prj_coloc[2];
8867
8868
9.62k
    pf_get_wt_inp fp_get_wt_inp;
8869
8870
9.62k
    search_node_t as_unique_search_nodes[MAX_INIT_CANDTS * 9];
8871
9.62k
    U32 au4_unique_node_map[MAP_X_MAX * 2];
8872
8873
    /*EIID */
8874
9.62k
    WORD32 i4_num_inter_wins = 0;  //debug code to find stat of
8875
9.62k
    WORD32 i4_num_comparisions = 0;  //debug code
8876
9.62k
    WORD32 i4_threshold_multiplier;
8877
9.62k
    WORD32 i4_threshold_divider;
8878
9.62k
    WORD32 i4_temporal_layer =
8879
9.62k
        ps_multi_thrd_ctxt->aps_curr_inp_pre_enc[i4_ping_pong]->s_lap_out.i4_temporal_lyr_id;
8880
8881
    /*************************************************************************/
8882
    /* points ot the search results for the blk level search (8x8/16x16)     */
8883
    /*************************************************************************/
8884
9.62k
    search_results_t *ps_search_results;
8885
8886
    /*************************************************************************/
8887
    /* Coordinates                                                           */
8888
    /*************************************************************************/
8889
9.62k
    S32 blk_x, i4_ctb_x, blk_id_in_ctb;
8890
    //S32 i4_ctb_y;
8891
9.62k
    S32 pos_x, pos_y;
8892
9.62k
    S32 blk_id_in_full_ctb;
8893
9.62k
    S32 i4_num_srch_cands;
8894
8895
9.62k
    S32 blk_y;
8896
8897
    /*************************************************************************/
8898
    /* Related to dimensions of block being searched and pic dimensions      */
8899
    /*************************************************************************/
8900
9.62k
    S32 blk_wd, blk_ht, blk_size_shift, num_blks_in_row, num_blks_in_pic;
8901
9.62k
    S32 i4_pic_wd, i4_pic_ht, num_blks_in_this_ctb;
8902
9.62k
    S32 num_results_prev_layer;
8903
8904
    /*************************************************************************/
8905
    /* Size of a basic unit for this layer. For non encode layers, we search */
8906
    /* in block sizes of 8x8. For encode layers, though we search 16x16s the */
8907
    /* basic unit size is the ctb size.                                      */
8908
    /*************************************************************************/
8909
9.62k
    S32 unit_size;
8910
8911
    /*************************************************************************/
8912
    /* Pointers to context in current and coarser layers                     */
8913
    /*************************************************************************/
8914
9.62k
    layer_ctxt_t *ps_curr_layer, *ps_coarse_layer;
8915
8916
    /*************************************************************************/
8917
    /* to store mv range per blk, and picture limit, allowed search range    */
8918
    /* range prms in hpel and qpel units as well                             */
8919
    /*************************************************************************/
8920
9.62k
    range_prms_t s_range_prms_inp, s_range_prms_rec;
8921
9.62k
    range_prms_t s_pic_limit_inp, s_pic_limit_rec, as_mv_limit[MAX_NUM_REF];
8922
    /*************************************************************************/
8923
    /* These variables are used to track number of references at different   */
8924
    /* stages of ME.                                                         */
8925
    /*************************************************************************/
8926
9.62k
    S32 i4_num_ref_fpel, i4_num_ref_before_merge;
8927
9.62k
    S32 i4_num_ref_each_dir, i, i4_num_ref_prev_layer;
8928
9.62k
    S32 lambda_inp = ps_refine_prms->lambda_inp;
8929
8930
    /*************************************************************************/
8931
    /* When a layer is implicit, it means that it searches on 1 or 2 ref idx */
8932
    /* Explicit means it searches on all active ref idx.                     */
8933
    /*************************************************************************/
8934
9.62k
    S32 curr_layer_implicit, prev_layer_implicit;
8935
8936
    /*************************************************************************/
8937
    /* Variables for loop counts                                             */
8938
    /*************************************************************************/
8939
9.62k
    S32 id;
8940
9.62k
    S08 i1_ref_idx;
8941
8942
    /*************************************************************************/
8943
    /* Input pointer and stride                                              */
8944
    /*************************************************************************/
8945
9.62k
    U08 *pu1_inp;
8946
9.62k
    S32 i4_inp_stride;
8947
8948
9.62k
    S32 end_of_frame;
8949
8950
9.62k
    S32 num_sync_units_in_row;
8951
8952
9.62k
    PF_HME_PROJECT_COLOC_CANDT_FXN pf_hme_project_coloc_candt;
8953
9.62k
    ASSERT(ps_refine_prms->i4_layer_id < ps_ctxt->num_layers - 1);
8954
8955
    /*************************************************************************/
8956
    /* Pointers to current and coarse layer are needed for projection */
8957
    /* Pointer to prev layer are needed for other candts like coloc   */
8958
    /*************************************************************************/
8959
9.62k
    ps_curr_layer = ps_ctxt->ps_curr_descr->aps_layers[ps_refine_prms->i4_layer_id];
8960
8961
9.62k
    ps_coarse_layer = ps_ctxt->ps_curr_descr->aps_layers[ps_refine_prms->i4_layer_id + 1];
8962
8963
9.62k
    num_results_prev_layer = ps_coarse_layer->ps_layer_mvbank->i4_num_mvs_per_ref;
8964
8965
    /* Function pointer is selected based on the C vc X86 macro */
8966
8967
9.62k
    fp_get_wt_inp = ((ihevce_me_optimised_function_list_t *)ps_ctxt->pv_me_optimised_function_list)
8968
9.62k
                        ->pf_get_wt_inp_8x8;
8969
8970
9.62k
    i4_inp_stride = ps_curr_layer->i4_inp_stride;
8971
9.62k
    i4_pic_wd = ps_curr_layer->i4_wd;
8972
9.62k
    i4_pic_ht = ps_curr_layer->i4_ht;
8973
9.62k
    e_search_complexity = ps_refine_prms->e_search_complexity;
8974
8975
9.62k
    end_of_frame = 0;
8976
8977
    /* If the previous layer is non-encode layer, then use dyadic projection */
8978
9.62k
    if(0 == ps_ctxt->u1_encode[ps_refine_prms->i4_layer_id + 1])
8979
9.62k
        pf_hme_project_coloc_candt = hme_project_coloc_candt_dyadic;
8980
0
    else
8981
0
        pf_hme_project_coloc_candt = hme_project_coloc_candt;
8982
8983
    /* This points to all the initial candts */
8984
9.62k
    ps_search_candts = &as_search_candts[0];
8985
8986
9.62k
    {
8987
9.62k
        e_search_blk_size = BLK_8x8;
8988
9.62k
        blk_wd = blk_ht = 8;
8989
9.62k
        blk_size_shift = 3;
8990
9.62k
        s_mv_update_prms.i4_shift = 0;
8991
        /*********************************************************************/
8992
        /* In case we do not encode this layer, we search 8x8 with or without*/
8993
        /* enable 4x4 SAD.                                                   */
8994
        /*********************************************************************/
8995
9.62k
        {
8996
9.62k
            S32 i4_mask = (ENABLE_2Nx2N);
8997
8998
9.62k
            e_result_blk_size = BLK_8x8;
8999
9.62k
            if(ps_refine_prms->i4_enable_4x4_part)
9000
2.41k
            {
9001
2.41k
                i4_mask |= (ENABLE_NxN);
9002
2.41k
                e_result_blk_size = BLK_4x4;
9003
2.41k
                s_mv_update_prms.i4_shift = 1;
9004
2.41k
            }
9005
9006
9.62k
            s_search_prms_blk.i4_part_mask = i4_mask;
9007
9.62k
        }
9008
9009
9.62k
        unit_size = blk_wd;
9010
9.62k
        s_search_prms_blk.i4_inp_stride = unit_size;
9011
9.62k
    }
9012
9013
    /* This is required to properly update the layer mv bank */
9014
9.62k
    s_mv_update_prms.e_search_blk_size = e_search_blk_size;
9015
9.62k
    s_search_prms_blk.e_blk_size = e_search_blk_size;
9016
9017
    /*************************************************************************/
9018
    /* If current layer is explicit, then the number of ref frames are to    */
9019
    /* be same as previous layer. Else it will be 2                          */
9020
    /*************************************************************************/
9021
9.62k
    i4_num_ref_prev_layer = ps_coarse_layer->ps_layer_mvbank->i4_num_ref;
9022
9.62k
    if(ps_refine_prms->explicit_ref)
9023
9.62k
    {
9024
9.62k
        curr_layer_implicit = 0;
9025
9.62k
        i4_num_ref_fpel = i4_num_ref_prev_layer;
9026
        /* 100578 : Using same mv cost fun. for all presets. */
9027
9.62k
        s_search_prms_blk.pf_mv_cost_compute = compute_mv_cost_refine;
9028
9.62k
    }
9029
0
    else
9030
0
    {
9031
0
        i4_num_ref_fpel = 2;
9032
0
        curr_layer_implicit = 1;
9033
0
        {
9034
0
            if(ME_MEDIUM_SPEED > e_me_quality_presets)
9035
0
            {
9036
0
                s_search_prms_blk.pf_mv_cost_compute = compute_mv_cost_implicit;
9037
0
            }
9038
0
            else
9039
0
            {
9040
0
#if USE_MODIFIED == 1
9041
0
                s_search_prms_blk.pf_mv_cost_compute = compute_mv_cost_implicit_high_speed_modified;
9042
#else
9043
                s_search_prms_blk.pf_mv_cost_compute = compute_mv_cost_implicit_high_speed;
9044
#endif
9045
0
            }
9046
0
        }
9047
0
    }
9048
9049
9.62k
    i4_num_ref_fpel = MIN(i4_num_ref_fpel, i4_num_ref_prev_layer);
9050
9.62k
    if(ps_multi_thrd_ctxt->aps_curr_inp_pre_enc[i4_ping_pong]->s_lap_out.i4_pic_type ==
9051
9.62k
           IV_IDR_FRAME ||
9052
7.46k
       ps_multi_thrd_ctxt->aps_curr_inp_pre_enc[i4_ping_pong]->s_lap_out.i4_pic_type == IV_I_FRAME)
9053
2.79k
    {
9054
2.79k
        i4_num_ref_fpel = 1;
9055
2.79k
    }
9056
9.62k
    if(i4_num_ref_prev_layer <= 2)
9057
8.83k
    {
9058
8.83k
        prev_layer_implicit = 1;
9059
8.83k
        curr_layer_implicit = 1;
9060
8.83k
        i4_num_ref_each_dir = 1;
9061
8.83k
    }
9062
788
    else
9063
788
    {
9064
        /* It is assumed that we have equal number of references in each dir */
9065
        //ASSERT(!(i4_num_ref_prev_layer & 1));
9066
788
        prev_layer_implicit = 0;
9067
788
        i4_num_ref_each_dir = i4_num_ref_prev_layer >> 1;
9068
788
    }
9069
9.62k
    s_mv_update_prms.i4_num_ref = i4_num_ref_fpel;
9070
9.62k
    s_mv_update_prms.i4_num_active_ref_l0 = ps_ctxt->s_frm_prms.u1_num_active_ref_l0;
9071
9.62k
    s_mv_update_prms.i4_num_active_ref_l1 = ps_ctxt->s_frm_prms.u1_num_active_ref_l1;
9072
9073
    /* this can be kept to 1 or 2 */
9074
9.62k
    i4_num_ref_before_merge = 2;
9075
9.62k
    i4_num_ref_before_merge = MIN(i4_num_ref_before_merge, i4_num_ref_fpel);
9076
9077
    /* Set up place holders to hold the search nodes of each initial candt */
9078
587k
    for(i = 0; i < MAX_INIT_CANDTS; i++)
9079
577k
    {
9080
577k
        ps_search_candts[i].ps_search_node = &ps_ctxt->s_init_search_node[i];
9081
577k
        INIT_SEARCH_NODE(ps_search_candts[i].ps_search_node, 0);
9082
577k
    }
9083
9084
    /* redundant, but doing it here since it is used in pred ctxt init */
9085
9.62k
    ps_candt_zeromv = ps_search_candts[0].ps_search_node;
9086
38.5k
    for(i = 0; i < 3; i++)
9087
28.8k
    {
9088
28.8k
        search_node_t *ps_search_node;
9089
28.8k
        ps_search_node = &as_left_neighbours[i];
9090
28.8k
        INIT_SEARCH_NODE(ps_search_node, 0);
9091
28.8k
        ps_search_node = &as_top_neighbours[i];
9092
28.8k
        INIT_SEARCH_NODE(ps_search_node, 0);
9093
28.8k
    }
9094
9095
9.62k
    INIT_SEARCH_NODE(&as_top_neighbours[3], 0);
9096
    /* bottom left node always not available for the blk being searched */
9097
9.62k
    as_left_neighbours[2].u1_is_avail = 0;
9098
    /*************************************************************************/
9099
    /* Initialize all the search results structure here. We update all the   */
9100
    /* search results to default values, and configure things like blk sizes */
9101
    /*************************************************************************/
9102
9.62k
    if(ps_refine_prms->i4_encode == 0)
9103
9.62k
    {
9104
9.62k
        S32 pred_lx;
9105
9.62k
        search_results_t *ps_search_results;
9106
9107
9.62k
        ps_search_results = &ps_ctxt->s_search_results_8x8;
9108
9.62k
        hme_init_search_results(
9109
9.62k
            ps_search_results,
9110
9.62k
            i4_num_ref_fpel,
9111
9.62k
            ps_refine_prms->i4_num_fpel_results,
9112
9.62k
            ps_refine_prms->i4_num_results_per_part,
9113
9.62k
            e_search_blk_size,
9114
9.62k
            0,
9115
9.62k
            0,
9116
9.62k
            &ps_ctxt->au1_is_past[0]);
9117
28.8k
        for(pred_lx = 0; pred_lx < 2; pred_lx++)
9118
19.2k
        {
9119
19.2k
            hme_init_pred_ctxt_no_encode(
9120
19.2k
                &ps_search_results->as_pred_ctxt[pred_lx],
9121
19.2k
                ps_search_results,
9122
19.2k
                &as_top_neighbours[0],
9123
19.2k
                &as_left_neighbours[0],
9124
19.2k
                &ps_candt_prj_coloc[0],
9125
19.2k
                ps_candt_zeromv,
9126
19.2k
                ps_candt_zeromv,
9127
19.2k
                pred_lx,
9128
19.2k
                lambda_inp,
9129
19.2k
                ps_refine_prms->lambda_q_shift,
9130
19.2k
                &ps_ctxt->apu1_ref_bits_tlu_lc[0],
9131
19.2k
                &ps_ctxt->ai2_ref_scf[0]);
9132
19.2k
        }
9133
9.62k
    }
9134
9135
    /*********************************************************************/
9136
    /* Initialize the dyn. search range params. for each reference index */
9137
    /* in current layer ctxt                                             */
9138
    /*********************************************************************/
9139
    /* Only for P pic. For P, both are 0, I&B has them mut. exclusive */
9140
9.62k
    if(ps_ctxt->s_frm_prms.is_i_pic == ps_ctxt->s_frm_prms.bidir_enabled)
9141
5.83k
    {
9142
5.83k
        WORD32 ref_ctr;
9143
9144
16.8k
        for(ref_ctr = 0; ref_ctr < s_mv_update_prms.i4_num_ref; ref_ctr++)
9145
10.9k
        {
9146
10.9k
            INIT_DYN_SEARCH_PRMS(
9147
10.9k
                &ps_ctxt->s_coarse_dyn_range_prms
9148
10.9k
                     .as_dyn_range_prms[ps_refine_prms->i4_layer_id][ref_ctr],
9149
10.9k
                ps_ctxt->ai4_ref_idx_to_poc_lc[ref_ctr]);
9150
10.9k
        }
9151
5.83k
    }
9152
9153
    /* Next set up initial candidates according to a given set of rules.   */
9154
    /* The number of initial candidates affects the quality of ME in the   */
9155
    /* case of motion with multiple degrees of freedom. In case of simple  */
9156
    /* translational motion, a current and a few causal and non causal     */
9157
    /* candts would suffice. More candidates help to cover more complex    */
9158
    /* cases like partitions, rotation/zoom, occlusion in/out, fine motion */
9159
    /* where multiple ref helps etc.                                       */
9160
    /* The candidate choice also depends on the following parameters.      */
9161
    /* e_search_complexity: SRCH_CX_LOW, SRCH_CX_MED, SRCH_CX_HIGH         */
9162
    /* Whether we encode or not, and the type of search across reference   */
9163
    /* i.e. the previous layer may have been explicit/implicit and curr    */
9164
    /* layer may be explicit/implicit                                      */
9165
9166
    /* 0, 0, L, T, projected coloc best always presnt by default */
9167
9.62k
    id = hme_decide_search_candidate_priority_in_l1_and_l2_me(ZERO_MV, e_me_quality_presets);
9168
9.62k
    ps_candt_zeromv = ps_search_candts[id].ps_search_node;
9169
9.62k
    ps_search_candts[id].u1_num_steps_refine = 0;
9170
9.62k
    ps_candt_zeromv->s_mv.i2_mvx = 0;
9171
9.62k
    ps_candt_zeromv->s_mv.i2_mvy = 0;
9172
9173
9.62k
    id = hme_decide_search_candidate_priority_in_l1_and_l2_me(SPATIAL_LEFT0, e_me_quality_presets);
9174
9.62k
    ps_candt_l = ps_search_candts[id].ps_search_node;
9175
9.62k
    ps_search_candts[id].u1_num_steps_refine = 0;
9176
9177
    /* Even in ME_HIGH_SPEED mode, in layer 0, blocks */
9178
    /* not at the CTB boundary use the causal T and */
9179
    /* not the projected T, although the candidate is */
9180
    /* still pointed to by ps_candt_prj_t[0] */
9181
9.62k
    if(ME_MEDIUM_SPEED <= e_me_quality_presets)
9182
7.76k
    {
9183
        /* Using Projected top to eliminate sync */
9184
7.76k
        id = hme_decide_search_candidate_priority_in_l1_and_l2_me(
9185
7.76k
            PROJECTED_TOP0, e_me_quality_presets);
9186
7.76k
        ps_candt_prj_t[0] = ps_search_candts[id].ps_search_node;
9187
7.76k
        ps_search_candts[id].u1_num_steps_refine = 1;
9188
7.76k
    }
9189
1.86k
    else
9190
1.86k
    {
9191
1.86k
        id = hme_decide_search_candidate_priority_in_l1_and_l2_me(
9192
1.86k
            SPATIAL_TOP0, e_me_quality_presets);
9193
1.86k
        ps_candt_t = ps_search_candts[id].ps_search_node;
9194
1.86k
        ps_search_candts[id].u1_num_steps_refine = 0;
9195
1.86k
    }
9196
9197
9.62k
    id = hme_decide_search_candidate_priority_in_l1_and_l2_me(
9198
9.62k
        PROJECTED_COLOC0, e_me_quality_presets);
9199
9.62k
    ps_candt_prj_coloc[0] = ps_search_candts[id].ps_search_node;
9200
9.62k
    ps_search_candts[id].u1_num_steps_refine = 1;
9201
9202
9.62k
    id = hme_decide_search_candidate_priority_in_l1_and_l2_me(
9203
9.62k
        PROJECTED_COLOC1, e_me_quality_presets);
9204
9.62k
    ps_candt_prj_coloc[1] = ps_search_candts[id].ps_search_node;
9205
9.62k
    ps_search_candts[id].u1_num_steps_refine = 1;
9206
9207
9.62k
    if(ME_MEDIUM_SPEED <= e_me_quality_presets)
9208
7.76k
    {
9209
7.76k
        id = hme_decide_search_candidate_priority_in_l1_and_l2_me(
9210
7.76k
            PROJECTED_TOP_RIGHT0, e_me_quality_presets);
9211
7.76k
        ps_candt_prj_tr[0] = ps_search_candts[id].ps_search_node;
9212
7.76k
        ps_search_candts[id].u1_num_steps_refine = 1;
9213
9214
7.76k
        id = hme_decide_search_candidate_priority_in_l1_and_l2_me(
9215
7.76k
            PROJECTED_TOP_LEFT0, e_me_quality_presets);
9216
7.76k
        ps_candt_prj_tl[0] = ps_search_candts[id].ps_search_node;
9217
7.76k
        ps_search_candts[id].u1_num_steps_refine = 1;
9218
7.76k
    }
9219
1.86k
    else
9220
1.86k
    {
9221
1.86k
        id = hme_decide_search_candidate_priority_in_l1_and_l2_me(
9222
1.86k
            SPATIAL_TOP_RIGHT0, e_me_quality_presets);
9223
1.86k
        ps_candt_tr = ps_search_candts[id].ps_search_node;
9224
1.86k
        ps_search_candts[id].u1_num_steps_refine = 0;
9225
9226
1.86k
        id = hme_decide_search_candidate_priority_in_l1_and_l2_me(
9227
1.86k
            SPATIAL_TOP_LEFT0, e_me_quality_presets);
9228
1.86k
        ps_candt_tl = ps_search_candts[id].ps_search_node;
9229
1.86k
        ps_search_candts[id].u1_num_steps_refine = 0;
9230
1.86k
    }
9231
9232
9.62k
    id = hme_decide_search_candidate_priority_in_l1_and_l2_me(
9233
9.62k
        PROJECTED_RIGHT0, e_me_quality_presets);
9234
9.62k
    ps_candt_prj_r[0] = ps_search_candts[id].ps_search_node;
9235
9.62k
    ps_search_candts[id].u1_num_steps_refine = 1;
9236
9237
9.62k
    id = hme_decide_search_candidate_priority_in_l1_and_l2_me(
9238
9.62k
        PROJECTED_BOTTOM0, e_me_quality_presets);
9239
9.62k
    ps_candt_prj_b[0] = ps_search_candts[id].ps_search_node;
9240
9.62k
    ps_search_candts[id].u1_num_steps_refine = 1;
9241
9242
9.62k
    id = hme_decide_search_candidate_priority_in_l1_and_l2_me(
9243
9.62k
        PROJECTED_BOTTOM_RIGHT0, e_me_quality_presets);
9244
9.62k
    ps_candt_prj_br[0] = ps_search_candts[id].ps_search_node;
9245
9.62k
    ps_search_candts[id].u1_num_steps_refine = 1;
9246
9247
9.62k
    id = hme_decide_search_candidate_priority_in_l1_and_l2_me(
9248
9.62k
        PROJECTED_BOTTOM_LEFT0, e_me_quality_presets);
9249
9.62k
    ps_candt_prj_bl[0] = ps_search_candts[id].ps_search_node;
9250
9.62k
    ps_search_candts[id].u1_num_steps_refine = 1;
9251
9252
9.62k
    id = hme_decide_search_candidate_priority_in_l1_and_l2_me(
9253
9.62k
        PROJECTED_RIGHT1, e_me_quality_presets);
9254
9.62k
    ps_candt_prj_r[1] = ps_search_candts[id].ps_search_node;
9255
9.62k
    ps_search_candts[id].u1_num_steps_refine = 1;
9256
9257
9.62k
    id = hme_decide_search_candidate_priority_in_l1_and_l2_me(
9258
9.62k
        PROJECTED_BOTTOM1, e_me_quality_presets);
9259
9.62k
    ps_candt_prj_b[1] = ps_search_candts[id].ps_search_node;
9260
9.62k
    ps_search_candts[id].u1_num_steps_refine = 1;
9261
9262
9.62k
    id = hme_decide_search_candidate_priority_in_l1_and_l2_me(
9263
9.62k
        PROJECTED_BOTTOM_RIGHT1, e_me_quality_presets);
9264
9.62k
    ps_candt_prj_br[1] = ps_search_candts[id].ps_search_node;
9265
9.62k
    ps_search_candts[id].u1_num_steps_refine = 1;
9266
9267
9.62k
    id = hme_decide_search_candidate_priority_in_l1_and_l2_me(
9268
9.62k
        PROJECTED_BOTTOM_LEFT1, e_me_quality_presets);
9269
9.62k
    ps_candt_prj_bl[1] = ps_search_candts[id].ps_search_node;
9270
9.62k
    ps_search_candts[id].u1_num_steps_refine = 1;
9271
9272
9.62k
    id = hme_decide_search_candidate_priority_in_l1_and_l2_me(PROJECTED_TOP1, e_me_quality_presets);
9273
9.62k
    ps_candt_prj_t[1] = ps_search_candts[id].ps_search_node;
9274
9.62k
    ps_search_candts[id].u1_num_steps_refine = 1;
9275
9276
9.62k
    id = hme_decide_search_candidate_priority_in_l1_and_l2_me(
9277
9.62k
        PROJECTED_TOP_RIGHT1, e_me_quality_presets);
9278
9.62k
    ps_candt_prj_tr[1] = ps_search_candts[id].ps_search_node;
9279
9.62k
    ps_search_candts[id].u1_num_steps_refine = 1;
9280
9281
9.62k
    id = hme_decide_search_candidate_priority_in_l1_and_l2_me(
9282
9.62k
        PROJECTED_TOP_LEFT1, e_me_quality_presets);
9283
9.62k
    ps_candt_prj_tl[1] = ps_search_candts[id].ps_search_node;
9284
9.62k
    ps_search_candts[id].u1_num_steps_refine = 1;
9285
9286
    /*************************************************************************/
9287
    /* Now that the candidates have been ordered, to choose the right number */
9288
    /* of initial candidates.                                                */
9289
    /*************************************************************************/
9290
9.62k
    if(curr_layer_implicit && !prev_layer_implicit)
9291
0
    {
9292
0
        if(e_search_complexity == SEARCH_CX_LOW)
9293
0
            num_init_candts = 7;
9294
0
        else if(e_search_complexity == SEARCH_CX_MED)
9295
0
            num_init_candts = 13;
9296
0
        else if(e_search_complexity == SEARCH_CX_HIGH)
9297
0
            num_init_candts = 18;
9298
0
        else
9299
0
            ASSERT(0);
9300
0
    }
9301
9.62k
    else
9302
9.62k
    {
9303
9.62k
        if(e_search_complexity == SEARCH_CX_LOW)
9304
7.76k
            num_init_candts = 5;
9305
1.86k
        else if(e_search_complexity == SEARCH_CX_MED)
9306
1.86k
            num_init_candts = 11;
9307
0
        else if(e_search_complexity == SEARCH_CX_HIGH)
9308
0
            num_init_candts = 16;
9309
0
        else
9310
0
            ASSERT(0);
9311
9.62k
    }
9312
9313
9.62k
    if(ME_XTREME_SPEED_25 == e_me_quality_presets)
9314
2.00k
    {
9315
2.00k
        num_init_candts = NUM_INIT_SEARCH_CANDS_IN_L1_AND_L2_ME_IN_XS25;
9316
2.00k
    }
9317
9318
    /*************************************************************************/
9319
    /* The following search parameters are fixed throughout the search across*/
9320
    /* all blks. So these are configured outside processing loop             */
9321
    /*************************************************************************/
9322
9.62k
    s_search_prms_blk.i4_num_init_candts = num_init_candts;
9323
9.62k
    s_search_prms_blk.i4_start_step = 1;
9324
9.62k
    s_search_prms_blk.i4_use_satd = 0;
9325
9.62k
    s_search_prms_blk.i4_num_steps_post_refine = ps_refine_prms->i4_num_steps_post_refine_fpel;
9326
    /* we use recon only for encoded layers, otherwise it is not available */
9327
9.62k
    s_search_prms_blk.i4_use_rec = ps_refine_prms->i4_encode & ps_refine_prms->i4_use_rec_in_fpel;
9328
9329
9.62k
    s_search_prms_blk.ps_search_candts = ps_search_candts;
9330
    /* We use the same mv_range for all ref. pic. So assign to member 0 */
9331
9.62k
    if(s_search_prms_blk.i4_use_rec)
9332
0
        s_search_prms_blk.aps_mv_range[0] = &s_range_prms_rec;
9333
9.62k
    else
9334
9.62k
        s_search_prms_blk.aps_mv_range[0] = &s_range_prms_inp;
9335
    /*************************************************************************/
9336
    /* Initialize coordinates. Meaning as follows                            */
9337
    /* blk_x : x coordinate of the 16x16 blk, in terms of number of blks     */
9338
    /* blk_y : same as above, y coord.                                       */
9339
    /* num_blks_in_this_ctb : number of blks in this given ctb that starts   */
9340
    /* at i4_ctb_x, i4_ctb_y. This may not be 16 at picture boundaries.      */
9341
    /* i4_ctb_x, i4_ctb_y: pixel coordinate of the ctb realtive to top left  */
9342
    /* corner of the picture. Always multiple of 64.                         */
9343
    /* blk_id_in_ctb : encode order id of the blk in the ctb.                */
9344
    /*************************************************************************/
9345
9.62k
    blk_y = 0;
9346
9.62k
    blk_id_in_ctb = 0;
9347
9348
9.62k
    GET_NUM_BLKS_IN_PIC(i4_pic_wd, i4_pic_ht, blk_size_shift, num_blks_in_row, num_blks_in_pic);
9349
9350
    /* Get the number of sync units in a row based on encode/non enocde layer */
9351
9.62k
    num_sync_units_in_row = num_blks_in_row;
9352
9353
    /*************************************************************************/
9354
    /* Picture limit on all 4 sides. This will be used to set mv limits for  */
9355
    /* every block given its coordinate. Note thsi assumes that the min amt  */
9356
    /* of padding to right of pic is equal to the blk size. If we go all the */
9357
    /* way upto 64x64, then the min padding on right size of picture should  */
9358
    /* be 64, and also on bottom side of picture.                            */
9359
    /*************************************************************************/
9360
9.62k
    SET_PIC_LIMIT(
9361
9.62k
        s_pic_limit_inp,
9362
9.62k
        ps_curr_layer->i4_pad_x_inp,
9363
9.62k
        ps_curr_layer->i4_pad_y_inp,
9364
9.62k
        ps_curr_layer->i4_wd,
9365
9.62k
        ps_curr_layer->i4_ht,
9366
9.62k
        s_search_prms_blk.i4_num_steps_post_refine);
9367
9368
9.62k
    SET_PIC_LIMIT(
9369
9.62k
        s_pic_limit_rec,
9370
9.62k
        ps_curr_layer->i4_pad_x_rec,
9371
9.62k
        ps_curr_layer->i4_pad_y_rec,
9372
9.62k
        ps_curr_layer->i4_wd,
9373
9.62k
        ps_curr_layer->i4_ht,
9374
9.62k
        s_search_prms_blk.i4_num_steps_post_refine);
9375
9376
    /*************************************************************************/
9377
    /* set the MV limit per ref. pic.                                        */
9378
    /*    - P pic. : Based on the config params.                             */
9379
    /*    - B/b pic: Based on the Max/Min MV from prev. P and config. param. */
9380
    /*************************************************************************/
9381
9.62k
    {
9382
9.62k
        WORD32 ref_ctr;
9383
        /* Only for B/b pic. */
9384
9.62k
        if(1 == ps_ctxt->s_frm_prms.bidir_enabled)
9385
993
        {
9386
993
            WORD16 i2_mv_y_per_poc, i2_max_mv_y;
9387
993
            WORD32 cur_poc, ref_poc, abs_poc_diff;
9388
9389
993
            cur_poc = ps_ctxt->i4_curr_poc;
9390
9391
            /* Get abs MAX for symmetric search */
9392
993
            i2_mv_y_per_poc = MAX(
9393
993
                ps_ctxt->s_coarse_dyn_range_prms.i2_dyn_max_y_per_poc[ps_refine_prms->i4_layer_id],
9394
993
                (ABS(ps_ctxt->s_coarse_dyn_range_prms
9395
993
                         .i2_dyn_min_y_per_poc[ps_refine_prms->i4_layer_id])));
9396
9397
3.27k
            for(ref_ctr = 0; ref_ctr < i4_num_ref_fpel; ref_ctr++)
9398
2.28k
            {
9399
2.28k
                ref_poc = ps_ctxt->ai4_ref_idx_to_poc_lc[ref_ctr];
9400
2.28k
                abs_poc_diff = ABS((cur_poc - ref_poc));
9401
                /* Get the cur. max MV based on POC distance */
9402
2.28k
                i2_max_mv_y = i2_mv_y_per_poc * abs_poc_diff;
9403
2.28k
                i2_max_mv_y = MIN(i2_max_mv_y, ps_curr_layer->i2_max_mv_y);
9404
9405
2.28k
                as_mv_limit[ref_ctr].i2_min_x = -ps_curr_layer->i2_max_mv_x;
9406
2.28k
                as_mv_limit[ref_ctr].i2_min_y = -i2_max_mv_y;
9407
2.28k
                as_mv_limit[ref_ctr].i2_max_x = ps_curr_layer->i2_max_mv_x;
9408
2.28k
                as_mv_limit[ref_ctr].i2_max_y = i2_max_mv_y;
9409
2.28k
            }
9410
993
        }
9411
8.63k
        else
9412
8.63k
        {
9413
            /* Set the Config. File Params for P pic. */
9414
22.4k
            for(ref_ctr = 0; ref_ctr < i4_num_ref_fpel; ref_ctr++)
9415
13.7k
            {
9416
13.7k
                as_mv_limit[ref_ctr].i2_min_x = -ps_curr_layer->i2_max_mv_x;
9417
13.7k
                as_mv_limit[ref_ctr].i2_min_y = -ps_curr_layer->i2_max_mv_y;
9418
13.7k
                as_mv_limit[ref_ctr].i2_max_x = ps_curr_layer->i2_max_mv_x;
9419
13.7k
                as_mv_limit[ref_ctr].i2_max_y = ps_curr_layer->i2_max_mv_y;
9420
13.7k
            }
9421
8.63k
        }
9422
9.62k
    }
9423
9424
    /* EIID: Calculate threshold based on quality preset and/or temporal layers */
9425
9.62k
    if(e_me_quality_presets == ME_MEDIUM_SPEED)
9426
1.04k
    {
9427
1.04k
        i4_threshold_multiplier = 1;
9428
1.04k
        i4_threshold_divider = 4;
9429
1.04k
    }
9430
8.57k
    else if(e_me_quality_presets == ME_HIGH_SPEED)
9431
1.97k
    {
9432
1.97k
        i4_threshold_multiplier = 1;
9433
1.97k
        i4_threshold_divider = 2;
9434
1.97k
    }
9435
6.60k
    else if((e_me_quality_presets == ME_XTREME_SPEED) || (e_me_quality_presets == ME_XTREME_SPEED_25))
9436
4.73k
    {
9437
#if OLD_XTREME_SPEED
9438
        /* Hard coding the temporal ID value to 1, if it is older xtreme speed */
9439
        i4_temporal_layer = 1;
9440
#endif
9441
4.73k
        if(i4_temporal_layer == 0)
9442
4.28k
        {
9443
4.28k
            i4_threshold_multiplier = 3;
9444
4.28k
            i4_threshold_divider = 4;
9445
4.28k
        }
9446
454
        else if(i4_temporal_layer == 1)
9447
153
        {
9448
153
            i4_threshold_multiplier = 3;
9449
153
            i4_threshold_divider = 4;
9450
153
        }
9451
301
        else if(i4_temporal_layer == 2)
9452
301
        {
9453
301
            i4_threshold_multiplier = 1;
9454
301
            i4_threshold_divider = 1;
9455
301
        }
9456
0
        else
9457
0
        {
9458
0
            i4_threshold_multiplier = 5;
9459
0
            i4_threshold_divider = 4;
9460
0
        }
9461
4.73k
    }
9462
1.86k
    else if(e_me_quality_presets == ME_HIGH_QUALITY)
9463
314
    {
9464
314
        i4_threshold_multiplier = 1;
9465
314
        i4_threshold_divider = 1;
9466
314
    }
9467
9468
    /*************************************************************************/
9469
    /*************************************************************************/
9470
    /*************************************************************************/
9471
    /* START OF THE CORE LOOP                                                */
9472
    /* If Encode is 0, then we just loop over each blk                       */
9473
    /*************************************************************************/
9474
    /*************************************************************************/
9475
    /*************************************************************************/
9476
79.5k
    while(0 == end_of_frame)
9477
69.8k
    {
9478
69.8k
        job_queue_t *ps_job;
9479
69.8k
        ihevce_ed_blk_t *ps_ed_blk_ctxt_curr_row;  //EIID
9480
69.8k
        WORD32 i4_ctb_row_ctr;  //counter to calculate CTB row counter. It's (row_ctr /4)
9481
69.8k
        WORD32 i4_num_ctbs_in_row = (num_blks_in_row + 3) / 4;  //calculations verified for L1 only
9482
        //+3 to get ceil values when divided by 4
9483
69.8k
        WORD32 i4_num_4x4_blocks_in_ctb_at_l1 =
9484
69.8k
            8 * 8;  //considering CTB size 32x32 at L1. hardcoded for now
9485
        //if there is variable for ctb size use that and this variable can be derived
9486
69.8k
        WORD32 offset_val, check_dep_pos, set_dep_pos;
9487
69.8k
        void *pv_hme_dep_mngr;
9488
69.8k
        ihevce_ed_ctb_l1_t *ps_ed_ctb_l1_row;
9489
9490
        /* Get the current layer HME Dep Mngr       */
9491
        /* Note : Use layer_id - 1 in HME layers    */
9492
9493
69.8k
        pv_hme_dep_mngr = ppv_dep_mngr_hme_sync[ps_refine_prms->i4_layer_id - 1];
9494
9495
        /* Get the current row from the job queue */
9496
69.8k
        ps_job = (job_queue_t *)ihevce_pre_enc_grp_get_next_job(
9497
69.8k
            ps_multi_thrd_ctxt, lyr_job_type, 1, i4_ping_pong);
9498
9499
        /* If all rows are done, set the end of process flag to 1, */
9500
        /* and the current row to -1 */
9501
69.8k
        if(NULL == ps_job)
9502
9.62k
        {
9503
9.62k
            blk_y = -1;
9504
9.62k
            end_of_frame = 1;
9505
9506
9.62k
            continue;
9507
9.62k
        }
9508
9509
60.2k
        if(1 == ps_ctxt->s_frm_prms.is_i_pic)
9510
20.5k
        {
9511
            /* set the output dependency of current row */
9512
20.5k
            ihevce_pre_enc_grp_job_set_out_dep(ps_multi_thrd_ctxt, ps_job, i4_ping_pong);
9513
20.5k
            continue;
9514
20.5k
        }
9515
9516
39.7k
        blk_y = ps_job->s_job_info.s_me_job_info.i4_vert_unit_row_no;
9517
39.7k
        blk_x = 0;
9518
39.7k
        i4_ctb_x = 0;
9519
9520
        /* wait for Corresponding Pre intra Job to be completed */
9521
39.7k
        if(1 == ps_refine_prms->i4_layer_id)
9522
36.9k
        {
9523
36.9k
            volatile UWORD32 i4_l1_done;
9524
36.9k
            volatile UWORD32 *pi4_l1_done;
9525
36.9k
            pi4_l1_done = (volatile UWORD32 *)&ps_multi_thrd_ctxt
9526
36.9k
                              ->aai4_l1_pre_intra_done[i4_ping_pong][blk_y >> 2];
9527
36.9k
            i4_l1_done = *pi4_l1_done;
9528
36.9k
            while(!i4_l1_done)
9529
0
            {
9530
0
                i4_l1_done = *pi4_l1_done;
9531
0
            }
9532
36.9k
        }
9533
        /* Set Variables for Dep. Checking and Setting */
9534
39.7k
        set_dep_pos = blk_y + 1;
9535
39.7k
        if(blk_y > 0)
9536
32.8k
        {
9537
32.8k
            offset_val = 2;
9538
32.8k
            check_dep_pos = blk_y - 1;
9539
32.8k
        }
9540
6.83k
        else
9541
6.83k
        {
9542
            /* First row should run without waiting */
9543
6.83k
            offset_val = -1;
9544
6.83k
            check_dep_pos = 0;
9545
6.83k
        }
9546
9547
        /* EIID: calculate ed_blk_ctxt pointer for current row */
9548
        /* valid for only layer-1. not varified and used for other layers */
9549
39.7k
        i4_ctb_row_ctr = blk_y / 4;
9550
39.7k
        ps_ed_blk_ctxt_curr_row =
9551
39.7k
            ps_ctxt->ps_ed_blk + (i4_ctb_row_ctr * i4_num_ctbs_in_row *
9552
39.7k
                                  i4_num_4x4_blocks_in_ctb_at_l1);  //valid for L1 only
9553
39.7k
        ps_ed_ctb_l1_row = ps_ctxt->ps_ed_ctb_l1 + (i4_ctb_row_ctr * i4_num_ctbs_in_row);
9554
9555
        /* if non-encode layer then i4_ctb_x will be same as blk_x */
9556
        /* loop over all the units is a row                        */
9557
345k
        for(; i4_ctb_x < num_sync_units_in_row; i4_ctb_x++)
9558
305k
        {
9559
305k
            ihevce_ed_blk_t *ps_ed_blk_ctxt_curr_ctb;  //EIDD
9560
305k
            ihevce_ed_ctb_l1_t *ps_ed_ctb_l1_curr;
9561
305k
            WORD32 i4_ctb_blk_ctr = i4_ctb_x / 4;
9562
9563
            /* Wait till top row block is processed   */
9564
            /* Currently checking till top right block*/
9565
9566
            /* Disabled since all candidates, except for */
9567
            /* L and C, are projected from the coarser layer, */
9568
            /* only in ME_HIGH_SPEED mode */
9569
305k
            if((ME_MEDIUM_SPEED > e_me_quality_presets))
9570
103k
            {
9571
103k
                if(i4_ctb_x < (num_sync_units_in_row - 1))
9572
96.3k
                {
9573
96.3k
                    ihevce_dmgr_chk_row_row_sync(
9574
96.3k
                        pv_hme_dep_mngr,
9575
96.3k
                        i4_ctb_x,
9576
96.3k
                        offset_val,
9577
96.3k
                        check_dep_pos,
9578
96.3k
                        0, /* Col Tile No. : Not supported in PreEnc*/
9579
96.3k
                        ps_ctxt->thrd_id);
9580
96.3k
                }
9581
103k
            }
9582
9583
305k
            {
9584
                /* for non encoder layer only one block is processed */
9585
305k
                num_blks_in_this_ctb = 1;
9586
305k
            }
9587
9588
            /* EIID: derive ed_ctxt ptr for current CTB */
9589
305k
            ps_ed_blk_ctxt_curr_ctb =
9590
305k
                ps_ed_blk_ctxt_curr_row +
9591
305k
                (i4_ctb_blk_ctr *
9592
305k
                 i4_num_4x4_blocks_in_ctb_at_l1);  //currently valid for l1 layer only
9593
305k
            ps_ed_ctb_l1_curr = ps_ed_ctb_l1_row + i4_ctb_blk_ctr;
9594
9595
            /* loop over all the blocks in CTB will always be 1 */
9596
611k
            for(blk_id_in_ctb = 0; blk_id_in_ctb < num_blks_in_this_ctb; blk_id_in_ctb++)
9597
305k
            {
9598
305k
                {
9599
                    /* non encode layer */
9600
305k
                    blk_x = i4_ctb_x;
9601
305k
                    blk_id_in_full_ctb = 0;
9602
305k
                    s_search_prms_blk.i4_cu_x_off = s_search_prms_blk.i4_cu_y_off = 0;
9603
305k
                }
9604
9605
                /* get the current input blk point */
9606
305k
                pos_x = blk_x << blk_size_shift;
9607
305k
                pos_y = blk_y << blk_size_shift;
9608
305k
                pu1_inp = ps_curr_layer->pu1_inp + pos_x + (pos_y * i4_inp_stride);
9609
9610
                /*********************************************************************/
9611
                /* replicate the inp buffer at blk or ctb level for each ref id,     */
9612
                /* Instead of searching with wk * ref(k), we search with Ik = I / wk */
9613
                /* thereby avoiding a bloat up of memory. If we did all references   */
9614
                /* weighted pred, we will end up with a duplicate copy of each ref   */
9615
                /* at each layer, since we need to preserve the original reference.  */
9616
                /* ToDo: Need to observe performance with this mechanism and compare */
9617
                /* with case where ref is weighted.                                  */
9618
                /*********************************************************************/
9619
305k
                if(blk_id_in_ctb == 0)
9620
305k
                {
9621
305k
                    fp_get_wt_inp(
9622
305k
                        ps_curr_layer,
9623
305k
                        &ps_ctxt->s_wt_pred,
9624
305k
                        unit_size,
9625
305k
                        pos_x,
9626
305k
                        pos_y,
9627
305k
                        unit_size,
9628
305k
                        ps_ctxt->num_ref_future + ps_ctxt->num_ref_past,
9629
305k
                        ps_ctxt->i4_wt_pred_enable_flag);
9630
305k
                }
9631
9632
305k
                s_search_prms_blk.i4_x_off = blk_x << blk_size_shift;
9633
305k
                s_search_prms_blk.i4_y_off = blk_y << blk_size_shift;
9634
                /* Select search results from a suitable search result in the context */
9635
305k
                {
9636
305k
                    ps_search_results = &ps_ctxt->s_search_results_8x8;
9637
305k
                }
9638
9639
305k
                s_search_prms_blk.ps_search_results = ps_search_results;
9640
9641
                /* RESET ALL SEARCH RESULTS FOR THE NEW BLK */
9642
305k
                hme_reset_search_results(
9643
305k
                    ps_search_results, s_search_prms_blk.i4_part_mask, MV_RES_FPEL);
9644
9645
                /* Loop across different Ref IDx */
9646
858k
                for(i1_ref_idx = 0; i1_ref_idx < i4_num_ref_fpel; i1_ref_idx++)
9647
552k
                {
9648
552k
                    S32 next_blk_offset = (e_search_blk_size == BLK_16x16) ? 22 : 12;
9649
552k
                    S32 prev_blk_offset = 6;
9650
552k
                    S32 resultid;
9651
9652
                    /*********************************************************************/
9653
                    /* For every blk in the picture, the search range needs to be derived*/
9654
                    /* Any blk can have any mv, but practical search constraints are     */
9655
                    /* imposed by the picture boundary and amt of padding.               */
9656
                    /*********************************************************************/
9657
                    /* MV limit is different based on ref. PIC */
9658
552k
                    hme_derive_search_range(
9659
552k
                        &s_range_prms_inp,
9660
552k
                        &s_pic_limit_inp,
9661
552k
                        &as_mv_limit[i1_ref_idx],
9662
552k
                        pos_x,
9663
552k
                        pos_y,
9664
552k
                        blk_wd,
9665
552k
                        blk_ht);
9666
552k
                    hme_derive_search_range(
9667
552k
                        &s_range_prms_rec,
9668
552k
                        &s_pic_limit_rec,
9669
552k
                        &as_mv_limit[i1_ref_idx],
9670
552k
                        pos_x,
9671
552k
                        pos_y,
9672
552k
                        blk_wd,
9673
552k
                        blk_ht);
9674
9675
552k
                    s_search_prms_blk.i1_ref_idx = i1_ref_idx;
9676
552k
                    ps_candt_zeromv->i1_ref_idx = i1_ref_idx;
9677
9678
552k
                    i4_num_srch_cands = 1;
9679
9680
552k
                    if(1 != ps_refine_prms->i4_layer_id)
9681
35.5k
                    {
9682
35.5k
                        S32 x, y;
9683
35.5k
                        x = gau1_encode_to_raster_x[blk_id_in_full_ctb];
9684
35.5k
                        y = gau1_encode_to_raster_y[blk_id_in_full_ctb];
9685
9686
35.5k
                        if(ME_MEDIUM_SPEED > e_me_quality_presets)
9687
20.8k
                        {
9688
20.8k
                            hme_get_spatial_candt(
9689
20.8k
                                ps_curr_layer,
9690
20.8k
                                e_search_blk_size,
9691
20.8k
                                blk_x,
9692
20.8k
                                blk_y,
9693
20.8k
                                i1_ref_idx,
9694
20.8k
                                &as_top_neighbours[0],
9695
20.8k
                                &as_left_neighbours[0],
9696
20.8k
                                0,
9697
20.8k
                                ((ps_refine_prms->i4_encode) ? gau1_cu_tr_valid[y][x] : 1),
9698
20.8k
                                0,
9699
20.8k
                                ps_refine_prms->i4_encode);
9700
9701
20.8k
                            *ps_candt_tr = as_top_neighbours[3];
9702
20.8k
                            *ps_candt_t = as_top_neighbours[1];
9703
20.8k
                            *ps_candt_tl = as_top_neighbours[0];
9704
20.8k
                            i4_num_srch_cands += 3;
9705
20.8k
                        }
9706
14.7k
                        else
9707
14.7k
                        {
9708
14.7k
                            layer_mv_t *ps_layer_mvbank = ps_curr_layer->ps_layer_mvbank;
9709
14.7k
                            S32 i4_blk_size1 = gau1_blk_size_to_wd[ps_layer_mvbank->e_blk_size];
9710
14.7k
                            S32 i4_blk_size2 = gau1_blk_size_to_wd[e_search_blk_size];
9711
14.7k
                            search_node_t *ps_search_node;
9712
14.7k
                            S32 i4_offset, blk_x_temp = blk_x, blk_y_temp = blk_y;
9713
14.7k
                            hme_mv_t *ps_mv, *ps_mv_base;
9714
14.7k
                            S08 *pi1_ref_idx, *pi1_ref_idx_base;
9715
14.7k
                            S32 jump = 1, mvs_in_blk, mvs_in_row;
9716
14.7k
                            S32 shift = (ps_refine_prms->i4_encode ? 2 : 0);
9717
9718
14.7k
                            if(i4_blk_size1 != i4_blk_size2)
9719
3.80k
                            {
9720
3.80k
                                blk_x_temp <<= 1;
9721
3.80k
                                blk_y_temp <<= 1;
9722
3.80k
                                jump = 2;
9723
3.80k
                                if((i4_blk_size1 << 2) == i4_blk_size2)
9724
0
                                {
9725
0
                                    blk_x_temp <<= 1;
9726
0
                                    blk_y_temp <<= 1;
9727
0
                                    jump = 4;
9728
0
                                }
9729
3.80k
                            }
9730
9731
14.7k
                            mvs_in_blk = ps_layer_mvbank->i4_num_mvs_per_blk;
9732
14.7k
                            mvs_in_row = ps_layer_mvbank->i4_num_mvs_per_row;
9733
9734
                            /* Adjust teh blk coord to point to top left locn */
9735
14.7k
                            blk_x_temp -= 1;
9736
14.7k
                            blk_y_temp -= 1;
9737
9738
                            /* Pick up the mvs from the location */
9739
14.7k
                            i4_offset = (blk_x_temp * ps_layer_mvbank->i4_num_mvs_per_blk);
9740
14.7k
                            i4_offset += (ps_layer_mvbank->i4_num_mvs_per_row * blk_y_temp);
9741
9742
14.7k
                            ps_mv = ps_layer_mvbank->ps_mv + i4_offset;
9743
14.7k
                            pi1_ref_idx = ps_layer_mvbank->pi1_ref_idx + i4_offset;
9744
9745
14.7k
                            ps_mv += (i1_ref_idx * ps_layer_mvbank->i4_num_mvs_per_ref);
9746
14.7k
                            pi1_ref_idx += (i1_ref_idx * ps_layer_mvbank->i4_num_mvs_per_ref);
9747
9748
14.7k
                            ps_mv_base = ps_mv;
9749
14.7k
                            pi1_ref_idx_base = pi1_ref_idx;
9750
9751
14.7k
                            ps_search_node = &as_left_neighbours[0];
9752
14.7k
                            ps_mv = ps_mv_base + mvs_in_row;
9753
14.7k
                            pi1_ref_idx = pi1_ref_idx_base + mvs_in_row;
9754
14.7k
                            COPY_MV_TO_SEARCH_NODE(
9755
14.7k
                                ps_search_node, ps_mv, pi1_ref_idx, i1_ref_idx, shift);
9756
9757
14.7k
                            i4_num_srch_cands++;
9758
14.7k
                        }
9759
35.5k
                    }
9760
516k
                    else
9761
516k
                    {
9762
516k
                        S32 x, y;
9763
516k
                        x = gau1_encode_to_raster_x[blk_id_in_full_ctb];
9764
516k
                        y = gau1_encode_to_raster_y[blk_id_in_full_ctb];
9765
9766
516k
                        if(ME_MEDIUM_SPEED > e_me_quality_presets)
9767
195k
                        {
9768
195k
                            hme_get_spatial_candt_in_l1_me(
9769
195k
                                ps_curr_layer,
9770
195k
                                e_search_blk_size,
9771
195k
                                blk_x,
9772
195k
                                blk_y,
9773
195k
                                i1_ref_idx,
9774
195k
                                !ps_search_results->pu1_is_past[i1_ref_idx],
9775
195k
                                &as_top_neighbours[0],
9776
195k
                                &as_left_neighbours[0],
9777
195k
                                0,
9778
195k
                                ((ps_refine_prms->i4_encode) ? gau1_cu_tr_valid[y][x] : 1),
9779
195k
                                0,
9780
195k
                                ps_ctxt->s_frm_prms.u1_num_active_ref_l0,
9781
195k
                                ps_ctxt->s_frm_prms.u1_num_active_ref_l1);
9782
9783
195k
                            *ps_candt_tr = as_top_neighbours[3];
9784
195k
                            *ps_candt_t = as_top_neighbours[1];
9785
195k
                            *ps_candt_tl = as_top_neighbours[0];
9786
9787
195k
                            i4_num_srch_cands += 3;
9788
195k
                        }
9789
321k
                        else
9790
321k
                        {
9791
321k
                            layer_mv_t *ps_layer_mvbank = ps_curr_layer->ps_layer_mvbank;
9792
321k
                            S32 i4_blk_size1 = gau1_blk_size_to_wd[ps_layer_mvbank->e_blk_size];
9793
321k
                            S32 i4_blk_size2 = gau1_blk_size_to_wd[e_search_blk_size];
9794
321k
                            S32 i4_mv_pos_in_implicit_array;
9795
321k
                            search_node_t *ps_search_node;
9796
321k
                            S32 i4_offset, blk_x_temp = blk_x, blk_y_temp = blk_y;
9797
321k
                            hme_mv_t *ps_mv, *ps_mv_base;
9798
321k
                            S08 *pi1_ref_idx, *pi1_ref_idx_base;
9799
321k
                            S32 jump = 1, mvs_in_blk, mvs_in_row;
9800
321k
                            S32 shift = (ps_refine_prms->i4_encode ? 2 : 0);
9801
321k
                            U08 u1_pred_dir = !ps_search_results->pu1_is_past[i1_ref_idx];
9802
321k
                            S32 i4_num_results_in_given_dir =
9803
321k
                                ((u1_pred_dir == 1) ? (ps_layer_mvbank->i4_num_mvs_per_ref *
9804
31.3k
                                                       ps_ctxt->s_frm_prms.u1_num_active_ref_l1)
9805
321k
                                                    : (ps_layer_mvbank->i4_num_mvs_per_ref *
9806
289k
                                                       ps_ctxt->s_frm_prms.u1_num_active_ref_l0));
9807
9808
321k
                            if(i4_blk_size1 != i4_blk_size2)
9809
64.4k
                            {
9810
64.4k
                                blk_x_temp <<= 1;
9811
64.4k
                                blk_y_temp <<= 1;
9812
64.4k
                                jump = 2;
9813
64.4k
                                if((i4_blk_size1 << 2) == i4_blk_size2)
9814
0
                                {
9815
0
                                    blk_x_temp <<= 1;
9816
0
                                    blk_y_temp <<= 1;
9817
0
                                    jump = 4;
9818
0
                                }
9819
64.4k
                            }
9820
9821
321k
                            mvs_in_blk = ps_layer_mvbank->i4_num_mvs_per_blk;
9822
321k
                            mvs_in_row = ps_layer_mvbank->i4_num_mvs_per_row;
9823
9824
                            /* Adjust teh blk coord to point to top left locn */
9825
321k
                            blk_x_temp -= 1;
9826
321k
                            blk_y_temp -= 1;
9827
9828
                            /* Pick up the mvs from the location */
9829
321k
                            i4_offset = (blk_x_temp * ps_layer_mvbank->i4_num_mvs_per_blk);
9830
321k
                            i4_offset += (ps_layer_mvbank->i4_num_mvs_per_row * blk_y_temp);
9831
9832
321k
                            i4_offset +=
9833
321k
                                ((u1_pred_dir == 1) ? (ps_layer_mvbank->i4_num_mvs_per_ref *
9834
31.3k
                                                       ps_ctxt->s_frm_prms.u1_num_active_ref_l0)
9835
321k
                                                    : 0);
9836
9837
321k
                            ps_mv = ps_layer_mvbank->ps_mv + i4_offset;
9838
321k
                            pi1_ref_idx = ps_layer_mvbank->pi1_ref_idx + i4_offset;
9839
9840
321k
                            ps_mv_base = ps_mv;
9841
321k
                            pi1_ref_idx_base = pi1_ref_idx;
9842
9843
321k
                            {
9844
                                /* ps_mv and pi1_ref_idx now point to the top left locn */
9845
321k
                                ps_search_node = &as_left_neighbours[0];
9846
321k
                                ps_mv = ps_mv_base + mvs_in_row;
9847
321k
                                pi1_ref_idx = pi1_ref_idx_base + mvs_in_row;
9848
9849
321k
                                i4_mv_pos_in_implicit_array =
9850
321k
                                    hme_find_pos_of_implicitly_stored_ref_id(
9851
321k
                                        pi1_ref_idx, i1_ref_idx, 0, i4_num_results_in_given_dir);
9852
9853
321k
                                if(-1 != i4_mv_pos_in_implicit_array)
9854
265k
                                {
9855
265k
                                    COPY_MV_TO_SEARCH_NODE(
9856
265k
                                        ps_search_node,
9857
265k
                                        &ps_mv[i4_mv_pos_in_implicit_array],
9858
265k
                                        &pi1_ref_idx[i4_mv_pos_in_implicit_array],
9859
265k
                                        i1_ref_idx,
9860
265k
                                        shift);
9861
265k
                                }
9862
56.0k
                                else
9863
56.0k
                                {
9864
56.0k
                                    ps_search_node->u1_is_avail = 0;
9865
56.0k
                                    ps_search_node->s_mv.i2_mvx = 0;
9866
56.0k
                                    ps_search_node->s_mv.i2_mvy = 0;
9867
56.0k
                                    ps_search_node->i1_ref_idx = i1_ref_idx;
9868
56.0k
                                }
9869
9870
321k
                                i4_num_srch_cands++;
9871
321k
                            }
9872
321k
                        }
9873
516k
                    }
9874
9875
552k
                    *ps_candt_l = as_left_neighbours[0];
9876
9877
                    /* when 16x16 is searched in an encode layer, and the prev layer */
9878
                    /* stores results for 4x4 blks, we project 5 candts corresponding */
9879
                    /* to (2,2), (2,14), (14,2), 14,14) and 2nd best of (2,2) */
9880
                    /* However in other cases, only 2,2 best and 2nd best reqd */
9881
552k
                    resultid = 0;
9882
552k
                    pf_hme_project_coloc_candt(
9883
552k
                        ps_candt_prj_coloc[0],
9884
552k
                        ps_curr_layer,
9885
552k
                        ps_coarse_layer,
9886
552k
                        pos_x + 2,
9887
552k
                        pos_y + 2,
9888
552k
                        i1_ref_idx,
9889
552k
                        resultid);
9890
9891
552k
                    i4_num_srch_cands++;
9892
9893
552k
                    resultid = 1;
9894
552k
                    if(num_results_prev_layer > 1)
9895
502k
                    {
9896
502k
                        pf_hme_project_coloc_candt(
9897
502k
                            ps_candt_prj_coloc[1],
9898
502k
                            ps_curr_layer,
9899
502k
                            ps_coarse_layer,
9900
502k
                            pos_x + 2,
9901
502k
                            pos_y + 2,
9902
502k
                            i1_ref_idx,
9903
502k
                            resultid);
9904
9905
502k
                        i4_num_srch_cands++;
9906
502k
                    }
9907
9908
552k
                    resultid = 0;
9909
9910
552k
                    if(ME_MEDIUM_SPEED <= e_me_quality_presets)
9911
335k
                    {
9912
335k
                        pf_hme_project_coloc_candt(
9913
335k
                            ps_candt_prj_t[0],
9914
335k
                            ps_curr_layer,
9915
335k
                            ps_coarse_layer,
9916
335k
                            pos_x,
9917
335k
                            pos_y - prev_blk_offset,
9918
335k
                            i1_ref_idx,
9919
335k
                            resultid);
9920
9921
335k
                        i4_num_srch_cands++;
9922
335k
                    }
9923
9924
552k
                    {
9925
552k
                        pf_hme_project_coloc_candt(
9926
552k
                            ps_candt_prj_br[0],
9927
552k
                            ps_curr_layer,
9928
552k
                            ps_coarse_layer,
9929
552k
                            pos_x + next_blk_offset,
9930
552k
                            pos_y + next_blk_offset,
9931
552k
                            i1_ref_idx,
9932
552k
                            resultid);
9933
552k
                        pf_hme_project_coloc_candt(
9934
552k
                            ps_candt_prj_bl[0],
9935
552k
                            ps_curr_layer,
9936
552k
                            ps_coarse_layer,
9937
552k
                            pos_x - prev_blk_offset,
9938
552k
                            pos_y + next_blk_offset,
9939
552k
                            i1_ref_idx,
9940
552k
                            resultid);
9941
552k
                        pf_hme_project_coloc_candt(
9942
552k
                            ps_candt_prj_r[0],
9943
552k
                            ps_curr_layer,
9944
552k
                            ps_coarse_layer,
9945
552k
                            pos_x + next_blk_offset,
9946
552k
                            pos_y,
9947
552k
                            i1_ref_idx,
9948
552k
                            resultid);
9949
552k
                        pf_hme_project_coloc_candt(
9950
552k
                            ps_candt_prj_b[0],
9951
552k
                            ps_curr_layer,
9952
552k
                            ps_coarse_layer,
9953
552k
                            pos_x,
9954
552k
                            pos_y + next_blk_offset,
9955
552k
                            i1_ref_idx,
9956
552k
                            resultid);
9957
9958
552k
                        i4_num_srch_cands += 4;
9959
9960
552k
                        if(ME_MEDIUM_SPEED <= e_me_quality_presets)
9961
335k
                        {
9962
335k
                            pf_hme_project_coloc_candt(
9963
335k
                                ps_candt_prj_tr[0],
9964
335k
                                ps_curr_layer,
9965
335k
                                ps_coarse_layer,
9966
335k
                                pos_x + next_blk_offset,
9967
335k
                                pos_y - prev_blk_offset,
9968
335k
                                i1_ref_idx,
9969
335k
                                resultid);
9970
335k
                            pf_hme_project_coloc_candt(
9971
335k
                                ps_candt_prj_tl[0],
9972
335k
                                ps_curr_layer,
9973
335k
                                ps_coarse_layer,
9974
335k
                                pos_x - prev_blk_offset,
9975
335k
                                pos_y - prev_blk_offset,
9976
335k
                                i1_ref_idx,
9977
335k
                                resultid);
9978
9979
335k
                            i4_num_srch_cands += 2;
9980
335k
                        }
9981
552k
                    }
9982
552k
                    if((num_results_prev_layer > 1) && (e_search_complexity >= SEARCH_CX_MED))
9983
216k
                    {
9984
216k
                        resultid = 1;
9985
216k
                        pf_hme_project_coloc_candt(
9986
216k
                            ps_candt_prj_br[1],
9987
216k
                            ps_curr_layer,
9988
216k
                            ps_coarse_layer,
9989
216k
                            pos_x + next_blk_offset,
9990
216k
                            pos_y + next_blk_offset,
9991
216k
                            i1_ref_idx,
9992
216k
                            resultid);
9993
216k
                        pf_hme_project_coloc_candt(
9994
216k
                            ps_candt_prj_bl[1],
9995
216k
                            ps_curr_layer,
9996
216k
                            ps_coarse_layer,
9997
216k
                            pos_x - prev_blk_offset,
9998
216k
                            pos_y + next_blk_offset,
9999
216k
                            i1_ref_idx,
10000
216k
                            resultid);
10001
216k
                        pf_hme_project_coloc_candt(
10002
216k
                            ps_candt_prj_r[1],
10003
216k
                            ps_curr_layer,
10004
216k
                            ps_coarse_layer,
10005
216k
                            pos_x + next_blk_offset,
10006
216k
                            pos_y,
10007
216k
                            i1_ref_idx,
10008
216k
                            resultid);
10009
216k
                        pf_hme_project_coloc_candt(
10010
216k
                            ps_candt_prj_b[1],
10011
216k
                            ps_curr_layer,
10012
216k
                            ps_coarse_layer,
10013
216k
                            pos_x,
10014
216k
                            pos_y + next_blk_offset,
10015
216k
                            i1_ref_idx,
10016
216k
                            resultid);
10017
10018
216k
                        i4_num_srch_cands += 4;
10019
10020
216k
                        pf_hme_project_coloc_candt(
10021
216k
                            ps_candt_prj_tr[1],
10022
216k
                            ps_curr_layer,
10023
216k
                            ps_coarse_layer,
10024
216k
                            pos_x + next_blk_offset,
10025
216k
                            pos_y - prev_blk_offset,
10026
216k
                            i1_ref_idx,
10027
216k
                            resultid);
10028
216k
                        pf_hme_project_coloc_candt(
10029
216k
                            ps_candt_prj_tl[1],
10030
216k
                            ps_curr_layer,
10031
216k
                            ps_coarse_layer,
10032
216k
                            pos_x - prev_blk_offset,
10033
216k
                            pos_y - prev_blk_offset,
10034
216k
                            i1_ref_idx,
10035
216k
                            resultid);
10036
216k
                        pf_hme_project_coloc_candt(
10037
216k
                            ps_candt_prj_t[1],
10038
216k
                            ps_curr_layer,
10039
216k
                            ps_coarse_layer,
10040
216k
                            pos_x,
10041
216k
                            pos_y - prev_blk_offset,
10042
216k
                            i1_ref_idx,
10043
216k
                            resultid);
10044
10045
216k
                        i4_num_srch_cands += 3;
10046
216k
                    }
10047
10048
                    /* Note this block also clips the MV range for all candidates */
10049
#ifdef _DEBUG
10050
                    {
10051
                        S32 candt;
10052
                        range_prms_t *ps_range_prms;
10053
10054
                        S32 num_ref_valid = ps_ctxt->num_ref_future + ps_ctxt->num_ref_past;
10055
                        for(candt = 0; candt < i4_num_srch_cands; candt++)
10056
                        {
10057
                            search_node_t *ps_search_node;
10058
10059
                            ps_search_node =
10060
                                s_search_prms_blk.ps_search_candts[candt].ps_search_node;
10061
10062
                            ps_range_prms = s_search_prms_blk.aps_mv_range[0];
10063
10064
                            if((ps_search_node->i1_ref_idx >= num_ref_valid) ||
10065
                               (ps_search_node->i1_ref_idx < 0))
10066
                            {
10067
                                ASSERT(0);
10068
                            }
10069
                        }
10070
                    }
10071
#endif
10072
10073
552k
                    {
10074
552k
                        S32 srch_cand;
10075
552k
                        S32 num_unique_nodes = 0;
10076
552k
                        S32 num_nodes_searched = 0;
10077
552k
                        S32 num_best_cand = 0;
10078
552k
                        S08 i1_grid_enable = 0;
10079
552k
                        search_node_t as_best_two_proj_node[TOT_NUM_PARTS * 2];
10080
                        /* has list of valid partition to search terminated by -1 */
10081
552k
                        S32 ai4_valid_part_ids[TOT_NUM_PARTS + 1];
10082
552k
                        S32 center_x;
10083
552k
                        S32 center_y;
10084
10085
                        /* indicates if the centre point of grid needs to be explicitly added for search */
10086
552k
                        S32 add_centre = 0;
10087
10088
552k
                        memset(au4_unique_node_map, 0, sizeof(au4_unique_node_map));
10089
552k
                        center_x = ps_candt_prj_coloc[0]->s_mv.i2_mvx;
10090
552k
                        center_y = ps_candt_prj_coloc[0]->s_mv.i2_mvy;
10091
10092
552k
                        for(srch_cand = 0;
10093
7.43M
                            (srch_cand < i4_num_srch_cands) &&
10094
6.97M
                            (num_unique_nodes <= s_search_prms_blk.i4_num_init_candts);
10095
6.88M
                            srch_cand++)
10096
6.88M
                        {
10097
6.88M
                            search_node_t s_search_node_temp =
10098
6.88M
                                s_search_prms_blk.ps_search_candts[srch_cand].ps_search_node[0];
10099
10100
6.88M
                            s_search_node_temp.i1_ref_idx = i1_ref_idx;  //TEMP FIX;
10101
10102
                            /* Clip the motion vectors as well here since after clipping
10103
                            two candidates can become same and they will be removed during deduplication */
10104
6.88M
                            CLIP_MV_WITHIN_RANGE(
10105
6.88M
                                s_search_node_temp.s_mv.i2_mvx,
10106
6.88M
                                s_search_node_temp.s_mv.i2_mvy,
10107
6.88M
                                s_search_prms_blk.aps_mv_range[0],
10108
6.88M
                                ps_refine_prms->i4_num_steps_fpel_refine,
10109
6.88M
                                ps_refine_prms->i4_num_steps_hpel_refine,
10110
6.88M
                                ps_refine_prms->i4_num_steps_qpel_refine);
10111
10112
                            /* PT_C */
10113
6.88M
                            INSERT_NEW_NODE(
10114
6.88M
                                as_unique_search_nodes,
10115
6.88M
                                num_unique_nodes,
10116
6.88M
                                s_search_node_temp,
10117
6.88M
                                0,
10118
6.88M
                                au4_unique_node_map,
10119
6.88M
                                center_x,
10120
6.88M
                                center_y,
10121
6.88M
                                1);
10122
10123
6.88M
                            num_nodes_searched += 1;
10124
6.88M
                        }
10125
552k
                        num_unique_nodes =
10126
552k
                            MIN(num_unique_nodes, s_search_prms_blk.i4_num_init_candts);
10127
10128
                        /* If number of candidates projected/number of candidates to be refined are more than 2,
10129
                        then filter out and choose the best two here */
10130
552k
                        if(num_unique_nodes >= 2)
10131
383k
                        {
10132
383k
                            S32 num_results;
10133
383k
                            S32 cnt;
10134
383k
                            S32 *pi4_valid_part_ids;
10135
383k
                            s_search_prms_blk.ps_search_nodes = &as_unique_search_nodes[0];
10136
383k
                            s_search_prms_blk.i4_num_search_nodes = num_unique_nodes;
10137
383k
                            pi4_valid_part_ids = &ai4_valid_part_ids[0];
10138
10139
                            /* pi4_valid_part_ids is updated inside */
10140
383k
                            hme_pred_search_no_encode(
10141
383k
                                &s_search_prms_blk,
10142
383k
                                ps_curr_layer,
10143
383k
                                &ps_ctxt->s_wt_pred,
10144
383k
                                pi4_valid_part_ids,
10145
383k
                                1,
10146
383k
                                e_me_quality_presets,
10147
383k
                                i1_grid_enable,
10148
383k
                                (ihevce_me_optimised_function_list_t *)
10149
383k
                                    ps_ctxt->pv_me_optimised_function_list
10150
10151
383k
                            );
10152
10153
383k
                            num_best_cand = 0;
10154
383k
                            cnt = 0;
10155
383k
                            num_results = ps_search_results->u1_num_results_per_part;
10156
10157
1.43M
                            while((id = pi4_valid_part_ids[cnt++]) >= 0)
10158
1.05M
                            {
10159
1.05M
                                num_results =
10160
1.05M
                                    MIN(ps_refine_prms->pu1_num_best_results[id], num_results);
10161
10162
2.27M
                                for(i = 0; i < num_results; i++)
10163
1.21M
                                {
10164
1.21M
                                    search_node_t s_search_node_temp;
10165
1.21M
                                    s_search_node_temp =
10166
1.21M
                                        *(ps_search_results->aps_part_results[i1_ref_idx][id] + i);
10167
1.21M
                                    if(s_search_node_temp.i1_ref_idx >= 0)
10168
1.21M
                                    {
10169
1.21M
                                        INSERT_NEW_NODE_NOMAP(
10170
1.21M
                                            as_best_two_proj_node,
10171
1.21M
                                            num_best_cand,
10172
1.21M
                                            s_search_node_temp,
10173
1.21M
                                            0);
10174
1.21M
                                    }
10175
1.21M
                                }
10176
1.05M
                            }
10177
383k
                        }
10178
168k
                        else
10179
168k
                        {
10180
168k
                            add_centre = 1;
10181
168k
                            num_best_cand = num_unique_nodes;
10182
168k
                            as_best_two_proj_node[0] = as_unique_search_nodes[0];
10183
168k
                        }
10184
10185
552k
                        num_unique_nodes = 0;
10186
552k
                        num_nodes_searched = 0;
10187
10188
552k
                        if(1 == num_best_cand)
10189
361k
                        {
10190
361k
                            search_node_t s_search_node_temp = as_best_two_proj_node[0];
10191
361k
                            S16 i2_mv_x = s_search_node_temp.s_mv.i2_mvx;
10192
361k
                            S16 i2_mv_y = s_search_node_temp.s_mv.i2_mvy;
10193
361k
                            S08 i1_ref_idx = s_search_node_temp.i1_ref_idx;
10194
10195
361k
                            i1_grid_enable = 1;
10196
10197
361k
                            as_unique_search_nodes[num_unique_nodes].s_mv.i2_mvx = i2_mv_x - 1;
10198
361k
                            as_unique_search_nodes[num_unique_nodes].s_mv.i2_mvy = i2_mv_y - 1;
10199
361k
                            as_unique_search_nodes[num_unique_nodes++].i1_ref_idx = i1_ref_idx;
10200
10201
361k
                            as_unique_search_nodes[num_unique_nodes].s_mv.i2_mvx = i2_mv_x;
10202
361k
                            as_unique_search_nodes[num_unique_nodes].s_mv.i2_mvy = i2_mv_y - 1;
10203
361k
                            as_unique_search_nodes[num_unique_nodes++].i1_ref_idx = i1_ref_idx;
10204
10205
361k
                            as_unique_search_nodes[num_unique_nodes].s_mv.i2_mvx = i2_mv_x + 1;
10206
361k
                            as_unique_search_nodes[num_unique_nodes].s_mv.i2_mvy = i2_mv_y - 1;
10207
361k
                            as_unique_search_nodes[num_unique_nodes++].i1_ref_idx = i1_ref_idx;
10208
10209
361k
                            as_unique_search_nodes[num_unique_nodes].s_mv.i2_mvx = i2_mv_x - 1;
10210
361k
                            as_unique_search_nodes[num_unique_nodes].s_mv.i2_mvy = i2_mv_y;
10211
361k
                            as_unique_search_nodes[num_unique_nodes++].i1_ref_idx = i1_ref_idx;
10212
10213
361k
                            as_unique_search_nodes[num_unique_nodes].s_mv.i2_mvx = i2_mv_x + 1;
10214
361k
                            as_unique_search_nodes[num_unique_nodes].s_mv.i2_mvy = i2_mv_y;
10215
361k
                            as_unique_search_nodes[num_unique_nodes++].i1_ref_idx = i1_ref_idx;
10216
10217
361k
                            as_unique_search_nodes[num_unique_nodes].s_mv.i2_mvx = i2_mv_x - 1;
10218
361k
                            as_unique_search_nodes[num_unique_nodes].s_mv.i2_mvy = i2_mv_y + 1;
10219
361k
                            as_unique_search_nodes[num_unique_nodes++].i1_ref_idx = i1_ref_idx;
10220
10221
361k
                            as_unique_search_nodes[num_unique_nodes].s_mv.i2_mvx = i2_mv_x;
10222
361k
                            as_unique_search_nodes[num_unique_nodes].s_mv.i2_mvy = i2_mv_y + 1;
10223
361k
                            as_unique_search_nodes[num_unique_nodes++].i1_ref_idx = i1_ref_idx;
10224
10225
361k
                            as_unique_search_nodes[num_unique_nodes].s_mv.i2_mvx = i2_mv_x + 1;
10226
361k
                            as_unique_search_nodes[num_unique_nodes].s_mv.i2_mvy = i2_mv_y + 1;
10227
361k
                            as_unique_search_nodes[num_unique_nodes++].i1_ref_idx = i1_ref_idx;
10228
10229
361k
                            if(add_centre)
10230
168k
                            {
10231
168k
                                as_unique_search_nodes[num_unique_nodes].s_mv.i2_mvx = i2_mv_x;
10232
168k
                                as_unique_search_nodes[num_unique_nodes].s_mv.i2_mvy = i2_mv_y;
10233
168k
                                as_unique_search_nodes[num_unique_nodes++].i1_ref_idx = i1_ref_idx;
10234
168k
                            }
10235
361k
                        }
10236
191k
                        else
10237
191k
                        {
10238
                            /* For the candidates where refinement was required, choose the best two */
10239
622k
                            for(srch_cand = 0; srch_cand < num_best_cand; srch_cand++)
10240
431k
                            {
10241
431k
                                search_node_t s_search_node_temp = as_best_two_proj_node[srch_cand];
10242
431k
                                WORD32 mv_x = s_search_node_temp.s_mv.i2_mvx;
10243
431k
                                WORD32 mv_y = s_search_node_temp.s_mv.i2_mvy;
10244
10245
                                /* Because there may not be two best unique candidates (because of clipping),
10246
                                second best candidate can be uninitialized, ignore that */
10247
431k
                                if(s_search_node_temp.s_mv.i2_mvx == INTRA_MV ||
10248
431k
                                   s_search_node_temp.i1_ref_idx < 0)
10249
0
                                {
10250
0
                                    num_nodes_searched++;
10251
0
                                    continue;
10252
0
                                }
10253
10254
                                /* PT_C */
10255
                                /* Since the center point has already be evaluated and best results are persistent,
10256
                                it will not be evaluated again */
10257
431k
                                if(add_centre) /* centre point added explicitly again if search results is not updated */
10258
0
                                {
10259
0
                                    INSERT_NEW_NODE(
10260
0
                                        as_unique_search_nodes,
10261
0
                                        num_unique_nodes,
10262
0
                                        s_search_node_temp,
10263
0
                                        0,
10264
0
                                        au4_unique_node_map,
10265
0
                                        center_x,
10266
0
                                        center_y,
10267
0
                                        1);
10268
0
                                }
10269
10270
                                /* PT_L */
10271
431k
                                s_search_node_temp.s_mv.i2_mvx = mv_x - 1;
10272
431k
                                s_search_node_temp.s_mv.i2_mvy = mv_y;
10273
431k
                                INSERT_NEW_NODE(
10274
431k
                                    as_unique_search_nodes,
10275
431k
                                    num_unique_nodes,
10276
431k
                                    s_search_node_temp,
10277
431k
                                    0,
10278
431k
                                    au4_unique_node_map,
10279
431k
                                    center_x,
10280
431k
                                    center_y,
10281
431k
                                    1);
10282
10283
                                /* PT_T */
10284
431k
                                s_search_node_temp.s_mv.i2_mvx = mv_x;
10285
431k
                                s_search_node_temp.s_mv.i2_mvy = mv_y - 1;
10286
431k
                                INSERT_NEW_NODE(
10287
431k
                                    as_unique_search_nodes,
10288
431k
                                    num_unique_nodes,
10289
431k
                                    s_search_node_temp,
10290
431k
                                    0,
10291
431k
                                    au4_unique_node_map,
10292
431k
                                    center_x,
10293
431k
                                    center_y,
10294
431k
                                    1);
10295
10296
                                /* PT_R */
10297
431k
                                s_search_node_temp.s_mv.i2_mvx = mv_x + 1;
10298
431k
                                s_search_node_temp.s_mv.i2_mvy = mv_y;
10299
431k
                                INSERT_NEW_NODE(
10300
431k
                                    as_unique_search_nodes,
10301
431k
                                    num_unique_nodes,
10302
431k
                                    s_search_node_temp,
10303
431k
                                    0,
10304
431k
                                    au4_unique_node_map,
10305
431k
                                    center_x,
10306
431k
                                    center_y,
10307
431k
                                    1);
10308
10309
                                /* PT_B */
10310
431k
                                s_search_node_temp.s_mv.i2_mvx = mv_x;
10311
431k
                                s_search_node_temp.s_mv.i2_mvy = mv_y + 1;
10312
431k
                                INSERT_NEW_NODE(
10313
431k
                                    as_unique_search_nodes,
10314
431k
                                    num_unique_nodes,
10315
431k
                                    s_search_node_temp,
10316
431k
                                    0,
10317
431k
                                    au4_unique_node_map,
10318
431k
                                    center_x,
10319
431k
                                    center_y,
10320
431k
                                    1);
10321
10322
                                /* PT_TL */
10323
431k
                                s_search_node_temp.s_mv.i2_mvx = mv_x - 1;
10324
431k
                                s_search_node_temp.s_mv.i2_mvy = mv_y - 1;
10325
431k
                                INSERT_NEW_NODE(
10326
431k
                                    as_unique_search_nodes,
10327
431k
                                    num_unique_nodes,
10328
431k
                                    s_search_node_temp,
10329
431k
                                    0,
10330
431k
                                    au4_unique_node_map,
10331
431k
                                    center_x,
10332
431k
                                    center_y,
10333
431k
                                    1);
10334
10335
                                /* PT_TR */
10336
431k
                                s_search_node_temp.s_mv.i2_mvx = mv_x + 1;
10337
431k
                                s_search_node_temp.s_mv.i2_mvy = mv_y - 1;
10338
431k
                                INSERT_NEW_NODE(
10339
431k
                                    as_unique_search_nodes,
10340
431k
                                    num_unique_nodes,
10341
431k
                                    s_search_node_temp,
10342
431k
                                    0,
10343
431k
                                    au4_unique_node_map,
10344
431k
                                    center_x,
10345
431k
                                    center_y,
10346
431k
                                    1);
10347
10348
                                /* PT_BL */
10349
431k
                                s_search_node_temp.s_mv.i2_mvx = mv_x - 1;
10350
431k
                                s_search_node_temp.s_mv.i2_mvy = mv_y + 1;
10351
431k
                                INSERT_NEW_NODE(
10352
431k
                                    as_unique_search_nodes,
10353
431k
                                    num_unique_nodes,
10354
431k
                                    s_search_node_temp,
10355
431k
                                    0,
10356
431k
                                    au4_unique_node_map,
10357
431k
                                    center_x,
10358
431k
                                    center_y,
10359
431k
                                    1);
10360
10361
                                /* PT_BR */
10362
431k
                                s_search_node_temp.s_mv.i2_mvx = mv_x + 1;
10363
431k
                                s_search_node_temp.s_mv.i2_mvy = mv_y + 1;
10364
431k
                                INSERT_NEW_NODE(
10365
431k
                                    as_unique_search_nodes,
10366
431k
                                    num_unique_nodes,
10367
431k
                                    s_search_node_temp,
10368
431k
                                    0,
10369
431k
                                    au4_unique_node_map,
10370
431k
                                    center_x,
10371
431k
                                    center_y,
10372
431k
                                    1);
10373
431k
                            }
10374
191k
                        }
10375
10376
552k
                        s_search_prms_blk.ps_search_nodes = &as_unique_search_nodes[0];
10377
552k
                        s_search_prms_blk.i4_num_search_nodes = num_unique_nodes;
10378
10379
                        /*****************************************************************/
10380
                        /* Call the search algorithm, this includes:                     */
10381
                        /* Pre-Search-Refinement (for coarse candts)                     */
10382
                        /* Search on each candidate                                      */
10383
                        /* Post Search Refinement on winners/other new candidates        */
10384
                        /*****************************************************************/
10385
10386
552k
                        hme_pred_search_no_encode(
10387
552k
                            &s_search_prms_blk,
10388
552k
                            ps_curr_layer,
10389
552k
                            &ps_ctxt->s_wt_pred,
10390
552k
                            ai4_valid_part_ids,
10391
552k
                            0,
10392
552k
                            e_me_quality_presets,
10393
552k
                            i1_grid_enable,
10394
552k
                            (ihevce_me_optimised_function_list_t *)
10395
552k
                                ps_ctxt->pv_me_optimised_function_list);
10396
10397
552k
                        i1_grid_enable = 0;
10398
552k
                    }
10399
552k
                }
10400
10401
                /* for non encode layer update MV and end processing for block */
10402
305k
                {
10403
305k
                    WORD32 i4_ref_id, min_cost = 0x7fffffff, min_sad = 0;
10404
305k
                    search_node_t *ps_search_node;
10405
                    /* now update the reqd results back to the layer mv bank. */
10406
305k
                    if(1 == ps_refine_prms->i4_layer_id)
10407
283k
                    {
10408
283k
                        hme_update_mv_bank_in_l1_me(
10409
283k
                            ps_search_results,
10410
283k
                            ps_curr_layer->ps_layer_mvbank,
10411
283k
                            blk_x,
10412
283k
                            blk_y,
10413
283k
                            &s_mv_update_prms);
10414
283k
                    }
10415
22.0k
                    else
10416
22.0k
                    {
10417
22.0k
                        hme_update_mv_bank_noencode(
10418
22.0k
                            ps_search_results,
10419
22.0k
                            ps_curr_layer->ps_layer_mvbank,
10420
22.0k
                            blk_x,
10421
22.0k
                            blk_y,
10422
22.0k
                            &s_mv_update_prms);
10423
22.0k
                    }
10424
10425
                    /* UPDATE the MIN and MAX MVs for Dynamical Search Range for each ref. pic. */
10426
                    /* Only for P pic. For P, both are 0, I&B has them mut. exclusive */
10427
305k
                    if(ps_ctxt->s_frm_prms.is_i_pic == ps_ctxt->s_frm_prms.bidir_enabled)
10428
253k
                    {
10429
253k
                        WORD32 i4_j;
10430
253k
                        layer_mv_t *ps_layer_mv = ps_curr_layer->ps_layer_mvbank;
10431
10432
                        //if (ps_layer_mv->e_blk_size == s_mv_update_prms.e_search_blk_size)
10433
                        /* Not considering this for Dyn. Search Update */
10434
253k
                        {
10435
686k
                            for(i4_ref_id = 0; i4_ref_id < (S32)s_mv_update_prms.i4_num_ref;
10436
432k
                                i4_ref_id++)
10437
432k
                            {
10438
432k
                                ps_search_node =
10439
432k
                                    ps_search_results->aps_part_results[i4_ref_id][PART_ID_2Nx2N];
10440
10441
1.07M
                                for(i4_j = 0; i4_j < ps_layer_mv->i4_num_mvs_per_ref; i4_j++)
10442
640k
                                {
10443
640k
                                    hme_update_dynamic_search_params(
10444
640k
                                        &ps_ctxt->s_coarse_dyn_range_prms
10445
640k
                                             .as_dyn_range_prms[ps_refine_prms->i4_layer_id]
10446
640k
                                                               [i4_ref_id],
10447
640k
                                        ps_search_node->s_mv.i2_mvy);
10448
10449
640k
                                    ps_search_node++;
10450
640k
                                }
10451
432k
                            }
10452
253k
                        }
10453
253k
                    }
10454
10455
305k
                    if(1 == ps_refine_prms->i4_layer_id)
10456
283k
                    {
10457
283k
                        WORD32 wt_pred_val, log_wt_pred_val;
10458
283k
                        WORD32 ref_id_of_nearest_poc = 0;
10459
283k
                        WORD32 max_val = 0x7fffffff;
10460
283k
                        WORD32 max_l0_val = 0x7fffffff;
10461
283k
                        WORD32 max_l1_val = 0x7fffffff;
10462
283k
                        WORD32 cur_val;
10463
283k
                        WORD32 i4_local_weighted_sad, i4_local_cost_weighted_pred;
10464
10465
283k
                        WORD32 bestl0_sad = 0x7fffffff;
10466
283k
                        WORD32 bestl1_sad = 0x7fffffff;
10467
283k
                        search_node_t *ps_best_l0_blk = NULL, *ps_best_l1_blk = NULL;
10468
10469
800k
                        for(i4_ref_id = 0; i4_ref_id < (S32)s_mv_update_prms.i4_num_ref;
10470
516k
                            i4_ref_id++)
10471
516k
                        {
10472
516k
                            wt_pred_val = ps_ctxt->s_wt_pred.a_wpred_wt[i4_ref_id];
10473
516k
                            log_wt_pred_val = ps_ctxt->s_wt_pred.wpred_log_wdc;
10474
10475
516k
                            ps_search_node =
10476
516k
                                ps_search_results->aps_part_results[i4_ref_id][PART_ID_2Nx2N];
10477
10478
516k
                            i4_local_weighted_sad = ((ps_search_node->i4_sad * wt_pred_val) +
10479
516k
                                                     ((1 << log_wt_pred_val) >> 1)) >>
10480
516k
                                                    log_wt_pred_val;
10481
10482
516k
                            i4_local_cost_weighted_pred =
10483
516k
                                i4_local_weighted_sad +
10484
516k
                                (ps_search_node->i4_tot_cost - ps_search_node->i4_sad);
10485
                            //the loop is redundant as the results are already sorted based on total cost
10486
                            //for (i4_j = 0; i4_j < ps_curr_layer->ps_layer_mvbank->i4_num_mvs_per_ref; i4_j++)
10487
516k
                            {
10488
516k
                                if(i4_local_cost_weighted_pred < min_cost)
10489
325k
                                {
10490
325k
                                    min_cost = i4_local_cost_weighted_pred;
10491
325k
                                    min_sad = i4_local_weighted_sad;
10492
325k
                                }
10493
516k
                            }
10494
10495
                            /* For P frame, calculate the nearest poc which is either P or I frame*/
10496
516k
                            if(ps_ctxt->s_frm_prms.is_i_pic == ps_ctxt->s_frm_prms.bidir_enabled)
10497
398k
                            {
10498
398k
                                if(-1 != ps_coarse_layer->ai4_ref_id_to_poc_lc[i4_ref_id])
10499
398k
                                {
10500
398k
                                    cur_val =
10501
398k
                                        ABS(ps_ctxt->i4_curr_poc -
10502
398k
                                            ps_coarse_layer->ai4_ref_id_to_poc_lc[i4_ref_id]);
10503
398k
                                    if(cur_val < max_val)
10504
232k
                                    {
10505
232k
                                        max_val = cur_val;
10506
232k
                                        ref_id_of_nearest_poc = i4_ref_id;
10507
232k
                                    }
10508
398k
                                }
10509
398k
                            }
10510
516k
                        }
10511
                        /*Store me cost wrt. to past frame only for P frame  */
10512
283k
                        if(ps_ctxt->s_frm_prms.is_i_pic == ps_ctxt->s_frm_prms.bidir_enabled)
10513
232k
                        {
10514
232k
                            if(-1 != ps_coarse_layer->ai4_ref_id_to_poc_lc[ref_id_of_nearest_poc])
10515
232k
                            {
10516
232k
                                WORD16 i2_mvx, i2_mvy;
10517
10518
232k
                                WORD32 i4_diff_col_ctr = blk_x - (i4_ctb_blk_ctr * 4);
10519
232k
                                WORD32 i4_diff_row_ctr = blk_y - (i4_ctb_row_ctr * 4);
10520
232k
                                WORD32 z_scan_idx =
10521
232k
                                    gau1_raster_scan_to_ctb[i4_diff_row_ctr][i4_diff_col_ctr];
10522
232k
                                WORD32 wt, log_wt;
10523
10524
                                /*ASSERT((ps_ctxt->i4_curr_poc - ps_coarse_layer->ai4_ref_id_to_poc_lc[ref_id_of_nearest_poc])
10525
                                <= (1 + ps_ctxt->num_b_frms));*/
10526
10527
                                /*obtain mvx and mvy */
10528
232k
                                i2_mvx =
10529
232k
                                    ps_search_results
10530
232k
                                        ->aps_part_results[ref_id_of_nearest_poc][PART_ID_2Nx2N]
10531
232k
                                        ->s_mv.i2_mvx;
10532
232k
                                i2_mvy =
10533
232k
                                    ps_search_results
10534
232k
                                        ->aps_part_results[ref_id_of_nearest_poc][PART_ID_2Nx2N]
10535
232k
                                        ->s_mv.i2_mvy;
10536
10537
                                /*register the min cost for l1 me in blk context */
10538
232k
                                wt = ps_ctxt->s_wt_pred.a_wpred_wt[ref_id_of_nearest_poc];
10539
232k
                                log_wt = ps_ctxt->s_wt_pred.wpred_log_wdc;
10540
10541
                                /*register the min cost for l1 me in blk context */
10542
232k
                                ps_ed_ctb_l1_curr->i4_sad_me_for_ref[z_scan_idx >> 2] =
10543
232k
                                    ((ps_search_results
10544
232k
                                          ->aps_part_results[ref_id_of_nearest_poc][PART_ID_2Nx2N]
10545
232k
                                          ->i4_sad *
10546
232k
                                      wt) +
10547
232k
                                     ((1 << log_wt) >> 1)) >>
10548
232k
                                    log_wt;
10549
232k
                                ps_ed_ctb_l1_curr->i4_sad_cost_me_for_ref[z_scan_idx >> 2] =
10550
232k
                                    ps_ed_ctb_l1_curr->i4_sad_me_for_ref[z_scan_idx >> 2] +
10551
232k
                                    (ps_search_results
10552
232k
                                         ->aps_part_results[ref_id_of_nearest_poc][PART_ID_2Nx2N]
10553
232k
                                         ->i4_tot_cost -
10554
232k
                                     ps_search_results
10555
232k
                                         ->aps_part_results[ref_id_of_nearest_poc][PART_ID_2Nx2N]
10556
232k
                                         ->i4_sad);
10557
                                /*for complexity change detection*/
10558
232k
                                ps_ctxt->i4_num_blks++;
10559
232k
                                if(ps_ed_ctb_l1_curr->i4_sad_cost_me_for_ref[z_scan_idx >> 2] >
10560
232k
                                   (8 /*blk width*/ * 8 /*blk height*/ * (1 + ps_ctxt->num_b_frms)))
10561
98.6k
                                {
10562
98.6k
                                    ps_ctxt->i4_num_blks_high_sad++;
10563
98.6k
                                }
10564
232k
                            }
10565
232k
                        }
10566
283k
                    }
10567
10568
                    /* EIID: Early inter intra decisions */
10569
                    /* tap L1 level SAD for inter intra decisions */
10570
305k
                    if((e_me_quality_presets >= ME_MEDIUM_SPEED) &&
10571
202k
                       (!ps_ctxt->s_frm_prms
10572
202k
                             .is_i_pic))  //for high-quality preset->disable early decisions
10573
202k
                    {
10574
202k
                        if(1 == ps_refine_prms->i4_layer_id)
10575
191k
                        {
10576
191k
                            WORD32 i4_min_sad_cost_8x8_block = min_cost;
10577
191k
                            ihevce_ed_blk_t *ps_curr_ed_blk_ctxt;
10578
191k
                            WORD32 i4_diff_col_ctr = blk_x - (i4_ctb_blk_ctr * 4);
10579
191k
                            WORD32 i4_diff_row_ctr = blk_y - (i4_ctb_row_ctr * 4);
10580
191k
                            WORD32 z_scan_idx =
10581
191k
                                gau1_raster_scan_to_ctb[i4_diff_row_ctr][i4_diff_col_ctr];
10582
191k
                            ps_curr_ed_blk_ctxt = ps_ed_blk_ctxt_curr_ctb + z_scan_idx;
10583
10584
                            /*register the min cost for l1 me in blk context */
10585
191k
                            ps_ed_ctb_l1_curr->i4_best_sad_cost_8x8_l1_me[z_scan_idx >> 2] =
10586
191k
                                i4_min_sad_cost_8x8_block;
10587
191k
                            i4_num_comparisions++;
10588
10589
                            /* take early inter-intra decision here */
10590
191k
                            ps_curr_ed_blk_ctxt->intra_or_inter = 3; /*init saying eval both */
10591
191k
#if DISABLE_INTRA_IN_BPICS
10592
191k
                            if((e_me_quality_presets == ME_XTREME_SPEED_25) &&
10593
53.0k
                               (ps_ctxt->s_frm_prms.i4_temporal_layer_id > TEMPORAL_LAYER_DISABLE))
10594
960
                            {
10595
960
                                ps_curr_ed_blk_ctxt->intra_or_inter =
10596
960
                                    2; /*eval only inter if inter cost is less */
10597
960
                                i4_num_inter_wins++;
10598
960
                            }
10599
190k
                            else
10600
190k
#endif
10601
190k
                            {
10602
190k
                                if(ps_ed_ctb_l1_curr->i4_best_sad_cost_8x8_l1_me[z_scan_idx >> 2] <
10603
190k
                                   ((ps_ed_ctb_l1_curr->i4_best_sad_cost_8x8_l1_ipe[z_scan_idx >> 2] *
10604
190k
                                     i4_threshold_multiplier) /
10605
190k
                                    i4_threshold_divider))
10606
58.6k
                                {
10607
58.6k
                                    ps_curr_ed_blk_ctxt->intra_or_inter =
10608
58.6k
                                        2; /*eval only inter if inter cost is less */
10609
58.6k
                                    i4_num_inter_wins++;
10610
58.6k
                                }
10611
190k
                            }
10612
10613
                            //{
10614
                            //  DBG_PRINTF ("(blk x, blk y):(%d, %d)\t me:(ctb_x, ctb_y):(%d, %d)\t intra_SAD_COST: %d\tInter_SAD_COST: %d\n",
10615
                            //      blk_x,blk_y,
10616
                            //      i4_ctb_blk_ctr, i4_ctb_row_ctr,
10617
                            //      ps_curr_ed_blk_ctxt->i4_best_sad_8x8_l1_ipe,
10618
                            //      i4_min_sad_cost_8x8_block
10619
                            //      );
10620
                            //}
10621
10622
191k
                        }  //end of layer-1
10623
202k
                    }  //end of if (e_me_quality_presets >= ME_MEDIUM_SPEED)
10624
103k
                    else
10625
103k
                    {
10626
103k
                        if(1 == ps_refine_prms->i4_layer_id)
10627
92.6k
                        {
10628
92.6k
                            WORD32 i4_min_sad_cost_8x8_block = min_cost;
10629
92.6k
                            WORD32 i4_diff_col_ctr = blk_x - (i4_ctb_blk_ctr * 4);
10630
92.6k
                            WORD32 i4_diff_row_ctr = blk_y - (i4_ctb_row_ctr * 4);
10631
92.6k
                            WORD32 z_scan_idx =
10632
92.6k
                                gau1_raster_scan_to_ctb[i4_diff_row_ctr][i4_diff_col_ctr];
10633
10634
                            /*register the min cost for l1 me in blk context */
10635
92.6k
                            ps_ed_ctb_l1_curr->i4_best_sad_cost_8x8_l1_me[z_scan_idx >> 2] =
10636
92.6k
                                i4_min_sad_cost_8x8_block;
10637
92.6k
                        }
10638
103k
                    }
10639
305k
                    if(1 == ps_refine_prms->i4_layer_id)
10640
283k
                    {
10641
283k
                        WORD32 i4_diff_col_ctr = blk_x - (i4_ctb_blk_ctr * 4);
10642
283k
                        WORD32 i4_diff_row_ctr = blk_y - (i4_ctb_row_ctr * 4);
10643
283k
                        WORD32 z_scan_idx =
10644
283k
                            gau1_raster_scan_to_ctb[i4_diff_row_ctr][i4_diff_col_ctr];
10645
10646
283k
                        ps_ed_ctb_l1_curr->i4_best_sad_8x8_l1_me_for_decide[z_scan_idx >> 2] =
10647
283k
                            min_sad;
10648
10649
283k
                        if(min_cost <
10650
283k
                           ps_ed_ctb_l1_curr->i4_best_sad_cost_8x8_l1_ipe[z_scan_idx >> 2])
10651
168k
                        {
10652
168k
                            ps_ctxt->i4_L1_hme_best_cost += min_cost;
10653
168k
                            ps_ctxt->i4_L1_hme_sad += min_sad;
10654
168k
                            ps_ed_ctb_l1_curr->i4_best_sad_8x8_l1_me[z_scan_idx >> 2] = min_sad;
10655
168k
                        }
10656
115k
                        else
10657
115k
                        {
10658
115k
                            ps_ctxt->i4_L1_hme_best_cost +=
10659
115k
                                ps_ed_ctb_l1_curr->i4_best_sad_cost_8x8_l1_ipe[z_scan_idx >> 2];
10660
115k
                            ps_ctxt->i4_L1_hme_sad +=
10661
115k
                                ps_ed_ctb_l1_curr->i4_best_sad_8x8_l1_ipe[z_scan_idx >> 2];
10662
115k
                            ps_ed_ctb_l1_curr->i4_best_sad_8x8_l1_me[z_scan_idx >> 2] =
10663
115k
                                ps_ed_ctb_l1_curr->i4_best_sad_8x8_l1_ipe[z_scan_idx >> 2];
10664
115k
                        }
10665
283k
                    }
10666
305k
                }
10667
305k
            }
10668
10669
            /* Update the number of blocks processed in the current row */
10670
305k
            if((ME_MEDIUM_SPEED > e_me_quality_presets))
10671
103k
            {
10672
103k
                ihevce_dmgr_set_row_row_sync(
10673
103k
                    pv_hme_dep_mngr,
10674
103k
                    (i4_ctb_x + 1),
10675
103k
                    blk_y,
10676
103k
                    0 /* Col Tile No. : Not supported in PreEnc*/);
10677
103k
            }
10678
305k
        }
10679
10680
        /* set the output dependency after completion of row */
10681
39.7k
        ihevce_pre_enc_grp_job_set_out_dep(ps_multi_thrd_ctxt, ps_job, i4_ping_pong);
10682
39.7k
    }
10683
9.62k
}