Coverage Report

Created: 2025-07-11 06:43

/src/libhevc/encoder/hme_refine.c
Line
Count
Source (jump to first uncovered line)
1
/******************************************************************************
2
 *
3
 * Copyright (C) 2018 The Android Open Source Project
4
 *
5
 * Licensed under the Apache License, Version 2.0 (the "License");
6
 * you may not use this file except in compliance with the License.
7
 * You may obtain a copy of the License at:
8
 *
9
 * http://www.apache.org/licenses/LICENSE-2.0
10
 *
11
 * Unless required by applicable law or agreed to in writing, software
12
 * distributed under the License is distributed on an "AS IS" BASIS,
13
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
 * See the License for the specific language governing permissions and
15
 * limitations under the License.
16
 *
17
 *****************************************************************************
18
 * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
19
*/
20
/**
21
******************************************************************************
22
* @file hme_refine.c
23
*
24
* @brief
25
*    Contains the implementation of the refinement layer searches and related
26
*    functionality like CU merge.
27
*
28
* @author
29
*    Ittiam
30
*
31
*
32
* List of Functions
33
*
34
*
35
******************************************************************************
36
*/
37
38
/*****************************************************************************/
39
/* File Includes                                                             */
40
/*****************************************************************************/
41
/* System include files */
42
#include <stdio.h>
43
#include <string.h>
44
#include <stdlib.h>
45
#include <assert.h>
46
#include <stdarg.h>
47
#include <math.h>
48
#include <limits.h>
49
50
/* User include files */
51
#include "ihevc_typedefs.h"
52
#include "itt_video_api.h"
53
#include "ihevce_api.h"
54
55
#include "rc_cntrl_param.h"
56
#include "rc_frame_info_collector.h"
57
#include "rc_look_ahead_params.h"
58
59
#include "ihevc_defs.h"
60
#include "ihevc_structs.h"
61
#include "ihevc_platform_macros.h"
62
#include "ihevc_deblk.h"
63
#include "ihevc_itrans_recon.h"
64
#include "ihevc_chroma_itrans_recon.h"
65
#include "ihevc_chroma_intra_pred.h"
66
#include "ihevc_intra_pred.h"
67
#include "ihevc_inter_pred.h"
68
#include "ihevc_mem_fns.h"
69
#include "ihevc_padding.h"
70
#include "ihevc_weighted_pred.h"
71
#include "ihevc_sao.h"
72
#include "ihevc_resi_trans.h"
73
#include "ihevc_quant_iquant_ssd.h"
74
#include "ihevc_cabac_tables.h"
75
76
#include "ihevce_defs.h"
77
#include "ihevce_lap_enc_structs.h"
78
#include "ihevce_multi_thrd_structs.h"
79
#include "ihevce_multi_thrd_funcs.h"
80
#include "ihevce_me_common_defs.h"
81
#include "ihevce_had_satd.h"
82
#include "ihevce_error_codes.h"
83
#include "ihevce_bitstream.h"
84
#include "ihevce_cabac.h"
85
#include "ihevce_rdoq_macros.h"
86
#include "ihevce_function_selector.h"
87
#include "ihevce_enc_structs.h"
88
#include "ihevce_entropy_structs.h"
89
#include "ihevce_cmn_utils_instr_set_router.h"
90
#include "ihevce_enc_loop_structs.h"
91
#include "ihevce_bs_compute_ctb.h"
92
#include "ihevce_global_tables.h"
93
#include "ihevce_dep_mngr_interface.h"
94
#include "hme_datatype.h"
95
#include "hme_interface.h"
96
#include "hme_common_defs.h"
97
#include "hme_defs.h"
98
#include "ihevce_me_instr_set_router.h"
99
#include "hme_globals.h"
100
#include "hme_utils.h"
101
#include "hme_coarse.h"
102
#include "hme_fullpel.h"
103
#include "hme_subpel.h"
104
#include "hme_refine.h"
105
#include "hme_err_compute.h"
106
#include "hme_common_utils.h"
107
#include "hme_search_algo.h"
108
#include "ihevce_stasino_helpers.h"
109
#include "ihevce_common_utils.h"
110
111
/*****************************************************************************/
112
/* Globals                                                                   */
113
/*****************************************************************************/
114
115
/* brief: mapping buffer to convert raster scan indices into z-scan oder in a ctb */
116
UWORD8 gau1_raster_scan_to_ctb[4][4] = {
117
    { 0, 4, 16, 20 }, { 8, 12, 24, 28 }, { 32, 36, 48, 52 }, { 40, 44, 56, 60 }
118
};
119
120
/*****************************************************************************/
121
/* Extern Fucntion declaration                                               */
122
/*****************************************************************************/
123
extern ctb_boundary_attrs_t *
124
    get_ctb_attrs(S32 ctb_start_x, S32 ctb_start_y, S32 pic_wd, S32 pic_ht, me_frm_ctxt_t *ps_ctxt);
125
126
typedef void (*PF_HME_PROJECT_COLOC_CANDT_FXN)(
127
    search_node_t *ps_search_node,
128
    layer_ctxt_t *ps_curr_layer,
129
    layer_ctxt_t *ps_coarse_layer,
130
    S32 i4_pos_x,
131
    S32 i4_pos_y,
132
    S08 i1_ref_id,
133
    S32 i4_result_id);
134
135
typedef void (*PF_HME_PROJECT_COLOC_CANDT_L0_ME_FXN)(
136
    search_node_t *ps_search_node,
137
    layer_ctxt_t *ps_curr_layer,
138
    layer_ctxt_t *ps_coarse_layer,
139
    S32 i4_pos_x,
140
    S32 i4_pos_y,
141
    S32 i4_num_act_ref_l0,
142
    U08 u1_pred_dir,
143
    U08 u1_default_ref_id,
144
    S32 i4_result_id);
145
146
/*****************************************************************************/
147
/* Function Definitions                                                      */
148
/*****************************************************************************/
149
150
void ihevce_no_wt_copy(
151
    coarse_me_ctxt_t *ps_ctxt,
152
    layer_ctxt_t *ps_curr_layer,
153
    pu_t *ps_pu,
154
    UWORD8 *pu1_temp_pred,
155
    WORD32 temp_stride,
156
    WORD32 blk_x,
157
    WORD32 blk_y)
158
0
{
159
0
    UWORD8 *pu1_ref;
160
0
    WORD32 ref_stride, ref_offset;
161
0
    WORD32 row, col, i4_tmp;
162
163
0
    ASSERT((ps_pu->b2_pred_mode == PRED_L0) || (ps_pu->b2_pred_mode == PRED_L1));
164
165
0
    if(ps_pu->b2_pred_mode == PRED_L0)
166
0
    {
167
0
        WORD8 i1_ref_idx;
168
169
0
        i1_ref_idx = ps_pu->mv.i1_l0_ref_idx;
170
0
        pu1_ref = ps_curr_layer->ppu1_list_inp[i1_ref_idx];
171
172
0
        ref_stride = ps_curr_layer->i4_inp_stride;
173
174
0
        ref_offset = ((blk_y << 3) + ps_pu->mv.s_l0_mv.i2_mvy) * ref_stride;
175
0
        ref_offset += (blk_x << 3) + ps_pu->mv.s_l0_mv.i2_mvx;
176
177
0
        pu1_ref += ref_offset;
178
179
0
        for(row = 0; row < temp_stride; row++)
180
0
        {
181
0
            for(col = 0; col < temp_stride; col++)
182
0
            {
183
0
                i4_tmp = pu1_ref[col];
184
0
                pu1_temp_pred[col] = CLIP_U8(i4_tmp);
185
0
            }
186
187
0
            pu1_ref += ref_stride;
188
0
            pu1_temp_pred += temp_stride;
189
0
        }
190
0
    }
191
0
    else
192
0
    {
193
0
        WORD8 i1_ref_idx;
194
195
0
        i1_ref_idx = ps_pu->mv.i1_l1_ref_idx;
196
0
        pu1_ref = ps_curr_layer->ppu1_list_inp[i1_ref_idx];
197
198
0
        ref_stride = ps_curr_layer->i4_inp_stride;
199
200
0
        ref_offset = ((blk_y << 3) + ps_pu->mv.s_l1_mv.i2_mvy) * ref_stride;
201
0
        ref_offset += (blk_x << 3) + ps_pu->mv.s_l1_mv.i2_mvx;
202
203
0
        pu1_ref += ref_offset;
204
205
0
        for(row = 0; row < temp_stride; row++)
206
0
        {
207
0
            for(col = 0; col < temp_stride; col++)
208
0
            {
209
0
                i4_tmp = pu1_ref[col];
210
0
                pu1_temp_pred[col] = CLIP_U8(i4_tmp);
211
0
            }
212
213
0
            pu1_ref += ref_stride;
214
0
            pu1_temp_pred += temp_stride;
215
0
        }
216
0
    }
217
0
}
218
219
static WORD32 hme_add_clustered_mvs_as_merge_cands(
220
    cluster_data_t *ps_cluster_base,
221
    search_node_t *ps_merge_cand,
222
    range_prms_t **pps_range_prms,
223
    U08 *pu1_refid_to_pred_dir_list,
224
    WORD32 i4_num_clusters,
225
    U08 u1_pred_dir)
226
173k
{
227
173k
    WORD32 i, j, k;
228
173k
    WORD32 i4_num_cands_added = 0;
229
173k
    WORD32 i4_num_mvs_in_cluster;
230
231
451k
    for(i = 0; i < i4_num_clusters; i++)
232
277k
    {
233
277k
        cluster_data_t *ps_data = &ps_cluster_base[i];
234
235
277k
        if(u1_pred_dir == !pu1_refid_to_pred_dir_list[ps_data->ref_id])
236
212k
        {
237
212k
            i4_num_mvs_in_cluster = ps_data->num_mvs;
238
239
971k
            for(j = 0; j < i4_num_mvs_in_cluster; j++)
240
758k
            {
241
758k
                ps_merge_cand[i4_num_cands_added].s_mv.i2_mvx = ps_data->as_mv[j].mvx;
242
758k
                ps_merge_cand[i4_num_cands_added].s_mv.i2_mvy = ps_data->as_mv[j].mvy;
243
758k
                ps_merge_cand[i4_num_cands_added].i1_ref_idx = ps_data->ref_id;
244
245
758k
                CLIP_MV_WITHIN_RANGE(
246
758k
                    ps_merge_cand[i4_num_cands_added].s_mv.i2_mvx,
247
758k
                    ps_merge_cand[i4_num_cands_added].s_mv.i2_mvy,
248
758k
                    pps_range_prms[ps_data->ref_id],
249
758k
                    0,
250
758k
                    0,
251
758k
                    0);
252
253
1.29M
                for(k = 0; k < i4_num_cands_added; k++)
254
1.01M
                {
255
1.01M
                    if((ps_merge_cand[k].s_mv.i2_mvx == ps_data->as_mv[j].mvx) &&
256
1.01M
                       (ps_merge_cand[k].s_mv.i2_mvy == ps_data->as_mv[j].mvy) &&
257
1.01M
                       (ps_merge_cand[k].i1_ref_idx == ps_data->ref_id))
258
479k
                    {
259
479k
                        break;
260
479k
                    }
261
1.01M
                }
262
263
758k
                if(k == i4_num_cands_added)
264
279k
                {
265
279k
                    i4_num_cands_added++;
266
279k
                }
267
758k
            }
268
212k
        }
269
277k
    }
270
271
173k
    return i4_num_cands_added;
272
173k
}
273
274
static WORD32 hme_add_me_best_as_merge_cands(
275
    search_results_t **pps_child_data_array,
276
    inter_cu_results_t *ps_8x8cu_results,
277
    search_node_t *ps_merge_cand,
278
    range_prms_t **pps_range_prms,
279
    U08 *pu1_refid_to_pred_dir_list,
280
    S08 *pi1_past_list,
281
    S08 *pi1_future_list,
282
    BLK_SIZE_T e_blk_size,
283
    ME_QUALITY_PRESETS_T e_quality_preset,
284
    S32 i4_num_cands_added,
285
    U08 u1_pred_dir)
286
173k
{
287
173k
    WORD32 i, j, k;
288
173k
    WORD32 i4_max_cands_to_add;
289
290
173k
    WORD32 i4_result_id = 0;
291
292
173k
    ASSERT(!pps_child_data_array[0]->u1_split_flag || (BLK_64x64 != e_blk_size));
293
173k
    ASSERT(!pps_child_data_array[1]->u1_split_flag || (BLK_64x64 != e_blk_size));
294
173k
    ASSERT(!pps_child_data_array[2]->u1_split_flag || (BLK_64x64 != e_blk_size));
295
173k
    ASSERT(!pps_child_data_array[3]->u1_split_flag || (BLK_64x64 != e_blk_size));
296
297
173k
    switch(e_quality_preset)
298
173k
    {
299
173k
    case ME_PRISTINE_QUALITY:
300
173k
    {
301
173k
        i4_max_cands_to_add = MAX_MERGE_CANDTS;
302
303
173k
        break;
304
0
    }
305
0
    case ME_HIGH_QUALITY:
306
0
    {
307
        /* All 4 children are split and each grandchild contributes an MV */
308
        /* and 2 best results per grandchild */
309
0
        i4_max_cands_to_add = 4 * 4 * 2;
310
311
0
        break;
312
0
    }
313
0
    case ME_MEDIUM_SPEED:
314
0
    {
315
0
        i4_max_cands_to_add = 4 * 2 * 2;
316
317
0
        break;
318
0
    }
319
0
    case ME_HIGH_SPEED:
320
0
    case ME_XTREME_SPEED:
321
0
    case ME_XTREME_SPEED_25:
322
0
    {
323
0
        i4_max_cands_to_add = 4 * 2 * 1;
324
325
0
        break;
326
0
    }
327
173k
    }
328
329
868k
    while(i4_result_id < 4)
330
695k
    {
331
3.47M
        for(i = 0; i < 4; i++)
332
2.78M
        {
333
2.78M
            inter_cu_results_t *ps_child_data = pps_child_data_array[i]->ps_cu_results;
334
2.78M
            inter_cu_results_t *ps_grandchild_data = &ps_8x8cu_results[i << 2];
335
336
2.78M
            if(!pps_child_data_array[i]->u1_split_flag)
337
2.62M
            {
338
2.62M
                part_type_results_t *ps_data = &ps_child_data->ps_best_results[i4_result_id];
339
340
2.62M
                if(ps_child_data->u1_num_best_results <= i4_result_id)
341
655k
                {
342
655k
                    continue;
343
655k
                }
344
345
1.96M
                if(ps_data->as_pu_results->pu.b1_intra_flag)
346
1.35k
                {
347
1.35k
                    continue;
348
1.35k
                }
349
350
5.24M
                for(j = 0; j <= (ps_data->u1_part_type != PRT_2Nx2N); j++)
351
3.27M
                {
352
3.27M
                    mv_t *ps_mv;
353
354
3.27M
                    S08 i1_ref_idx;
355
356
3.27M
                    pu_t *ps_pu = &ps_data->as_pu_results[j].pu;
357
358
3.27M
                    if(u1_pred_dir !=
359
3.27M
                       ((ps_pu->b2_pred_mode == 2) ? u1_pred_dir : ps_pu->b2_pred_mode))
360
754k
                    {
361
754k
                        continue;
362
754k
                    }
363
364
2.52M
                    if(u1_pred_dir)
365
45.4k
                    {
366
45.4k
                        ps_mv = &ps_pu->mv.s_l1_mv;
367
45.4k
                        i1_ref_idx = pi1_future_list[ps_pu->mv.i1_l1_ref_idx];
368
45.4k
                    }
369
2.47M
                    else
370
2.47M
                    {
371
2.47M
                        ps_mv = &ps_pu->mv.s_l0_mv;
372
2.47M
                        i1_ref_idx = pi1_past_list[ps_pu->mv.i1_l0_ref_idx];
373
2.47M
                    }
374
375
2.52M
                    if(-1 == i1_ref_idx)
376
0
                    {
377
0
                        continue;
378
0
                    }
379
380
2.52M
                    ps_merge_cand[i4_num_cands_added].s_mv.i2_mvx = ps_mv->i2_mvx;
381
2.52M
                    ps_merge_cand[i4_num_cands_added].s_mv.i2_mvy = ps_mv->i2_mvy;
382
2.52M
                    ps_merge_cand[i4_num_cands_added].i1_ref_idx = i1_ref_idx;
383
384
2.52M
                    CLIP_MV_WITHIN_RANGE(
385
2.52M
                        ps_merge_cand[i4_num_cands_added].s_mv.i2_mvx,
386
2.52M
                        ps_merge_cand[i4_num_cands_added].s_mv.i2_mvy,
387
2.52M
                        pps_range_prms[i1_ref_idx],
388
2.52M
                        0,
389
2.52M
                        0,
390
2.52M
                        0);
391
392
3.87M
                    for(k = 0; k < i4_num_cands_added; k++)
393
3.83M
                    {
394
3.83M
                        if((ps_merge_cand[k].s_mv.i2_mvx == ps_mv->i2_mvx) &&
395
3.83M
                           (ps_merge_cand[k].s_mv.i2_mvy == ps_mv->i2_mvy) &&
396
3.83M
                           (ps_merge_cand[k].i1_ref_idx == i1_ref_idx))
397
2.48M
                        {
398
2.48M
                            break;
399
2.48M
                        }
400
3.83M
                    }
401
402
2.52M
                    if(k == i4_num_cands_added)
403
40.3k
                    {
404
40.3k
                        i4_num_cands_added++;
405
406
40.3k
                        if(i4_max_cands_to_add <= i4_num_cands_added)
407
0
                        {
408
0
                            return i4_num_cands_added;
409
0
                        }
410
40.3k
                    }
411
2.52M
                }
412
1.96M
            }
413
157k
            else
414
157k
            {
415
789k
                for(j = 0; j < 4; j++)
416
631k
                {
417
631k
                    mv_t *ps_mv;
418
419
631k
                    S08 i1_ref_idx;
420
421
631k
                    part_type_results_t *ps_data = ps_grandchild_data[j].ps_best_results;
422
631k
                    pu_t *ps_pu = &ps_data->as_pu_results[0].pu;
423
424
631k
                    ASSERT(ps_data->u1_part_type == PRT_2Nx2N);
425
426
631k
                    if(ps_grandchild_data[j].u1_num_best_results <= i4_result_id)
427
416k
                    {
428
416k
                        continue;
429
416k
                    }
430
431
214k
                    if(ps_data->as_pu_results->pu.b1_intra_flag)
432
43.1k
                    {
433
43.1k
                        continue;
434
43.1k
                    }
435
436
171k
                    if(u1_pred_dir !=
437
171k
                       ((ps_pu->b2_pred_mode == 2) ? u1_pred_dir : ps_pu->b2_pred_mode))
438
37.1k
                    {
439
37.1k
                        continue;
440
37.1k
                    }
441
442
134k
                    if(u1_pred_dir)
443
43.5k
                    {
444
43.5k
                        ps_mv = &ps_pu->mv.s_l1_mv;
445
43.5k
                        i1_ref_idx = pi1_future_list[ps_pu->mv.i1_l1_ref_idx];
446
43.5k
                    }
447
91.0k
                    else
448
91.0k
                    {
449
91.0k
                        ps_mv = &ps_pu->mv.s_l0_mv;
450
91.0k
                        i1_ref_idx = pi1_past_list[ps_pu->mv.i1_l0_ref_idx];
451
91.0k
                    }
452
453
134k
                    ps_merge_cand[i4_num_cands_added].s_mv.i2_mvx = ps_mv->i2_mvx;
454
134k
                    ps_merge_cand[i4_num_cands_added].s_mv.i2_mvy = ps_mv->i2_mvy;
455
134k
                    ps_merge_cand[i4_num_cands_added].i1_ref_idx = i1_ref_idx;
456
457
134k
                    CLIP_MV_WITHIN_RANGE(
458
134k
                        ps_merge_cand[i4_num_cands_added].s_mv.i2_mvx,
459
134k
                        ps_merge_cand[i4_num_cands_added].s_mv.i2_mvy,
460
134k
                        pps_range_prms[i1_ref_idx],
461
134k
                        0,
462
134k
                        0,
463
134k
                        0);
464
465
345k
                    for(k = 0; k < i4_num_cands_added; k++)
466
344k
                    {
467
344k
                        if((ps_merge_cand[k].s_mv.i2_mvx == ps_mv->i2_mvx) &&
468
344k
                           (ps_merge_cand[k].s_mv.i2_mvy == ps_mv->i2_mvy) &&
469
344k
                           (ps_merge_cand[k].i1_ref_idx == i1_ref_idx))
470
133k
                        {
471
133k
                            break;
472
133k
                        }
473
344k
                    }
474
475
134k
                    if(k == i4_num_cands_added)
476
1.01k
                    {
477
1.01k
                        i4_num_cands_added++;
478
479
1.01k
                        if(i4_max_cands_to_add <= i4_num_cands_added)
480
0
                        {
481
0
                            return i4_num_cands_added;
482
0
                        }
483
1.01k
                    }
484
134k
                }
485
157k
            }
486
2.78M
        }
487
488
695k
        i4_result_id++;
489
695k
    }
490
491
173k
    return i4_num_cands_added;
492
173k
}
493
494
WORD32 hme_add_cands_for_merge_eval(
495
    ctb_cluster_info_t *ps_cluster_info,
496
    search_results_t **pps_child_data_array,
497
    inter_cu_results_t *ps_8x8cu_results,
498
    range_prms_t **pps_range_prms,
499
    search_node_t *ps_merge_cand,
500
    U08 *pu1_refid_to_pred_dir_list,
501
    S08 *pi1_past_list,
502
    S08 *pi1_future_list,
503
    ME_QUALITY_PRESETS_T e_quality_preset,
504
    BLK_SIZE_T e_blk_size,
505
    U08 u1_pred_dir,
506
    U08 u1_blk_id)
507
173k
{
508
173k
    WORD32 i4_num_cands_added = 0;
509
510
173k
    if(ME_PRISTINE_QUALITY == e_quality_preset)
511
173k
    {
512
173k
        cluster_data_t *ps_cluster_primo;
513
514
173k
        WORD32 i4_num_clusters;
515
516
173k
        if(BLK_32x32 == e_blk_size)
517
157k
        {
518
157k
            ps_cluster_primo = ps_cluster_info->ps_32x32_blk[u1_blk_id].as_cluster_data;
519
157k
            i4_num_clusters = ps_cluster_info->ps_32x32_blk[u1_blk_id].num_clusters;
520
157k
        }
521
15.9k
        else
522
15.9k
        {
523
15.9k
            ps_cluster_primo = ps_cluster_info->ps_64x64_blk->as_cluster_data;
524
15.9k
            i4_num_clusters = ps_cluster_info->ps_64x64_blk->num_clusters;
525
15.9k
        }
526
527
173k
        i4_num_cands_added = hme_add_clustered_mvs_as_merge_cands(
528
173k
            ps_cluster_primo,
529
173k
            ps_merge_cand,
530
173k
            pps_range_prms,
531
173k
            pu1_refid_to_pred_dir_list,
532
173k
            i4_num_clusters,
533
173k
            u1_pred_dir);
534
173k
    }
535
536
173k
    i4_num_cands_added = hme_add_me_best_as_merge_cands(
537
173k
        pps_child_data_array,
538
173k
        ps_8x8cu_results,
539
173k
        ps_merge_cand,
540
173k
        pps_range_prms,
541
173k
        pu1_refid_to_pred_dir_list,
542
173k
        pi1_past_list,
543
173k
        pi1_future_list,
544
173k
        e_blk_size,
545
173k
        e_quality_preset,
546
173k
        i4_num_cands_added,
547
173k
        u1_pred_dir);
548
549
173k
    return i4_num_cands_added;
550
173k
}
551
552
/**
553
********************************************************************************
554
*  @fn   void hme_pick_refine_merge_candts(hme_merge_prms_t *ps_merge_prms,
555
*                                           S08 i1_ref_idx,
556
*                                           S32 i4_best_part_type,
557
*                                           S32 i4_is_vert)
558
*
559
*  @brief  Given a target partition orientation in the merged CU, and the
560
*          partition type of most likely partition this fxn picks up
561
*          candidates from the 4 constituent CUs and does refinement search
562
*          to identify best results for the merge CU across active partitions
563
*
564
*  @param[in,out] ps_merge_prms : Parameters sent from higher layers. Out of
565
*                  these params, the search result structure is also derived and
566
*                 updated during the search
567
*
568
*  @param[in] i1_ref_idx : ID of the buffer within the search results to update.
569
*               Will be 0 if all refidx collapsed to one buf, else it'll be 0/1
570
*
571
*  @param[in] i4_best_part_type : partition type of potential partition in the
572
*              merged CU, -1 if the merge process has not yet been able to
573
*              determine this.
574
*
575
*  @param[in] i4_is_vert : Whether target partition of merged CU is vertical
576
*             orientation or horizontal orientation.
577
*
578
*  @return Number of merge candidates
579
********************************************************************************
580
*/
581
WORD32 hme_pick_eval_merge_candts(
582
    hme_merge_prms_t *ps_merge_prms,
583
    hme_subpel_prms_t *ps_subpel_prms,
584
    S32 i4_search_idx,
585
    S32 i4_best_part_type,
586
    S32 i4_is_vert,
587
    wgt_pred_ctxt_t *ps_wt_inp_prms,
588
    S32 i4_frm_qstep,
589
    ihevce_cmn_opt_func_t *ps_cmn_utils_optimised_function_list,
590
    ihevce_me_optimised_function_list_t *ps_me_optimised_function_list)
591
710k
{
592
710k
    S32 x_off, y_off;
593
710k
    search_node_t *ps_search_node;
594
710k
    S32 ai4_valid_part_ids[TOT_NUM_PARTS + 1];
595
710k
    S32 i4_num_valid_parts;
596
710k
    pred_ctxt_t *ps_pred_ctxt;
597
598
710k
    search_node_t as_merge_unique_node[MAX_MERGE_CANDTS];
599
710k
    S32 num_unique_nodes_cu_merge = 0;
600
601
710k
    search_results_t *ps_search_results = ps_merge_prms->ps_results_merge;
602
710k
    CU_SIZE_T e_cu_size = ps_search_results->e_cu_size;
603
710k
    S32 i4_part_mask = ps_search_results->i4_part_mask;
604
605
710k
    search_results_t *aps_child_results[4];
606
710k
    layer_ctxt_t *ps_curr_layer = ps_merge_prms->ps_layer_ctxt;
607
608
710k
    S32 i4_ref_stride, i, j;
609
710k
    result_upd_prms_t s_result_prms;
610
611
710k
    BLK_SIZE_T e_blk_size = ge_cu_size_to_blk_size[e_cu_size];
612
710k
    S32 i4_offset;
613
614
    /*************************************************************************/
615
    /* Function pointer for SAD/SATD, array and prms structure to pass to    */
616
    /* This function                                                         */
617
    /*************************************************************************/
618
710k
    PF_SAD_FXN_T pf_err_compute;
619
710k
    S32 ai4_sad_grid[9][17];
620
710k
    err_prms_t s_err_prms;
621
622
    /*************************************************************************/
623
    /* Allowed MV RANGE                                                      */
624
    /*************************************************************************/
625
710k
    range_prms_t **pps_range_prms = ps_merge_prms->aps_mv_range;
626
710k
    PF_INTERP_FXN_T pf_qpel_interp;
627
710k
    PF_MV_COST_FXN pf_mv_cost_compute;
628
710k
    WORD32 pred_lx;
629
710k
    U08 *apu1_hpel_ref[4];
630
631
710k
    interp_prms_t s_interp_prms;
632
710k
    S32 i4_interp_buf_id;
633
634
710k
    S32 i4_ctb_x_off = ps_merge_prms->i4_ctb_x_off;
635
710k
    S32 i4_ctb_y_off = ps_merge_prms->i4_ctb_y_off;
636
637
    /* Sanity checks */
638
710k
    ASSERT((e_blk_size == BLK_64x64) || (e_blk_size == BLK_32x32));
639
640
710k
    s_err_prms.ps_cmn_utils_optimised_function_list = ps_cmn_utils_optimised_function_list;
641
642
    /* Initialize all the ptrs to child CUs for merge decision */
643
710k
    aps_child_results[0] = ps_merge_prms->ps_results_tl;
644
710k
    aps_child_results[1] = ps_merge_prms->ps_results_tr;
645
710k
    aps_child_results[2] = ps_merge_prms->ps_results_bl;
646
710k
    aps_child_results[3] = ps_merge_prms->ps_results_br;
647
648
710k
    num_unique_nodes_cu_merge = 0;
649
650
710k
    pf_mv_cost_compute = compute_mv_cost_implicit_high_speed;
651
652
710k
    if(ME_PRISTINE_QUALITY == ps_merge_prms->e_quality_preset)
653
173k
    {
654
173k
        num_unique_nodes_cu_merge = hme_add_cands_for_merge_eval(
655
173k
            ps_merge_prms->ps_cluster_info,
656
173k
            aps_child_results,
657
173k
            ps_merge_prms->ps_8x8_cu_results,
658
173k
            pps_range_prms,
659
173k
            as_merge_unique_node,
660
173k
            ps_search_results->pu1_is_past,
661
173k
            ps_merge_prms->pi1_past_list,
662
173k
            ps_merge_prms->pi1_future_list,
663
173k
            ps_merge_prms->e_quality_preset,
664
173k
            e_blk_size,
665
173k
            i4_search_idx,
666
173k
            (ps_merge_prms->ps_results_merge->u1_x_off >> 5) +
667
173k
                (ps_merge_prms->ps_results_merge->u1_y_off >> 4));
668
173k
    }
669
537k
    else
670
537k
    {
671
        /*************************************************************************/
672
        /* Populate the list of unique search nodes in the child CUs for merge   */
673
        /* evaluation                                                            */
674
        /*************************************************************************/
675
2.68M
        for(i = 0; i < 4; i++)
676
2.14M
        {
677
2.14M
            search_node_t s_search_node;
678
679
2.14M
            PART_TYPE_T e_part_type;
680
2.14M
            PART_ID_T e_part_id;
681
682
2.14M
            WORD32 part_num;
683
684
2.14M
            search_results_t *ps_child = aps_child_results[i];
685
686
2.14M
            if(ps_child->ps_cu_results->u1_num_best_results)
687
2.14M
            {
688
2.14M
                if(!((ps_child->ps_cu_results->ps_best_results->as_pu_results->pu.b1_intra_flag) &&
689
2.14M
                     (1 == ps_child->ps_cu_results->u1_num_best_results)))
690
2.14M
                {
691
2.14M
                    e_part_type =
692
2.14M
                        (PART_TYPE_T)ps_child->ps_cu_results->ps_best_results[0].u1_part_type;
693
694
2.14M
                    ASSERT(num_unique_nodes_cu_merge < MAX_MERGE_CANDTS);
695
696
                    /* Insert mvs of NxN partitions. */
697
4.30M
                    for(part_num = 0; part_num < gau1_num_parts_in_part_type[((S32)e_part_type)];
698
2.15M
                        part_num++)
699
2.15M
                    {
700
2.15M
                        e_part_id = ge_part_type_to_part_id[e_part_type][part_num];
701
702
2.15M
                        if(ps_child->aps_part_results[i4_search_idx][e_part_id]->i1_ref_idx != -1)
703
2.15M
                        {
704
2.15M
                            s_search_node = *ps_child->aps_part_results[i4_search_idx][e_part_id];
705
2.15M
                            if(s_search_node.s_mv.i2_mvx != INTRA_MV)
706
2.15M
                            {
707
2.15M
                                CLIP_MV_WITHIN_RANGE(
708
2.15M
                                    s_search_node.s_mv.i2_mvx,
709
2.15M
                                    s_search_node.s_mv.i2_mvy,
710
2.15M
                                    pps_range_prms[s_search_node.i1_ref_idx],
711
2.15M
                                    0,
712
2.15M
                                    0,
713
2.15M
                                    0);
714
715
2.15M
                                INSERT_NEW_NODE_NOMAP(
716
2.15M
                                    as_merge_unique_node,
717
2.15M
                                    num_unique_nodes_cu_merge,
718
2.15M
                                    s_search_node,
719
2.15M
                                    1);
720
2.15M
                            }
721
2.15M
                        }
722
2.15M
                    }
723
2.14M
                }
724
2.14M
            }
725
0
            else if(!((ps_merge_prms->ps_results_grandchild[(i << 2)]
726
0
                           .ps_cu_results->ps_best_results->as_pu_results->pu.b1_intra_flag) &&
727
0
                      (1 == ps_merge_prms->ps_results_grandchild[(i << 2)]
728
0
                                .ps_cu_results->u1_num_best_results)))
729
0
            {
730
0
                search_results_t *ps_results_root = &ps_merge_prms->ps_results_grandchild[(i << 2)];
731
732
0
                for(j = 0; j < 4; j++)
733
0
                {
734
0
                    e_part_type = (PART_TYPE_T)ps_results_root[j]
735
0
                                      .ps_cu_results->ps_best_results[0]
736
0
                                      .u1_part_type;
737
738
0
                    ASSERT(num_unique_nodes_cu_merge < MAX_MERGE_CANDTS);
739
740
                    /* Insert mvs of NxN partitions. */
741
0
                    for(part_num = 0; part_num < gau1_num_parts_in_part_type[((S32)e_part_type)];
742
0
                        part_num++)
743
0
                    {
744
0
                        e_part_id = ge_part_type_to_part_id[e_part_type][part_num];
745
746
0
                        if((ps_results_root[j]
747
0
                                .aps_part_results[i4_search_idx][e_part_id]
748
0
                                ->i1_ref_idx != -1) &&
749
0
                           (!ps_child->ps_cu_results->ps_best_results->as_pu_results->pu
750
0
                                 .b1_intra_flag))
751
0
                        {
752
0
                            s_search_node =
753
0
                                *ps_results_root[j].aps_part_results[i4_search_idx][e_part_id];
754
0
                            if(s_search_node.s_mv.i2_mvx != INTRA_MV)
755
0
                            {
756
0
                                CLIP_MV_WITHIN_RANGE(
757
0
                                    s_search_node.s_mv.i2_mvx,
758
0
                                    s_search_node.s_mv.i2_mvy,
759
0
                                    pps_range_prms[s_search_node.i1_ref_idx],
760
0
                                    0,
761
0
                                    0,
762
0
                                    0);
763
764
0
                                INSERT_NEW_NODE_NOMAP(
765
0
                                    as_merge_unique_node,
766
0
                                    num_unique_nodes_cu_merge,
767
0
                                    s_search_node,
768
0
                                    1);
769
0
                            }
770
0
                        }
771
0
                    }
772
0
                }
773
0
            }
774
2.14M
        }
775
537k
    }
776
777
710k
    if(0 == num_unique_nodes_cu_merge)
778
29.3k
    {
779
29.3k
        return 0;
780
29.3k
    }
781
782
    /*************************************************************************/
783
    /* Appropriate Err compute fxn, depends on SAD/SATD, blk size and remains*/
784
    /* fixed through this subpel refinement for this partition.              */
785
    /* Note, we do not enable grid sads since one pt is evaluated per node   */
786
    /* Hence, part mask is also nearly dont care and we use 2Nx2N enabled.   */
787
    /*************************************************************************/
788
681k
    i4_part_mask = ps_search_results->i4_part_mask;
789
790
    /* Need to add the corresponding SAD functions for EXTREME SPEED : Lokesh */
791
681k
    if(ps_subpel_prms->i4_use_satd)
792
319k
    {
793
319k
        if(BLK_32x32 == e_blk_size)
794
286k
        {
795
286k
            pf_err_compute = hme_evalsatd_pt_pu_32x32;
796
286k
        }
797
33.1k
        else
798
33.1k
        {
799
33.1k
            pf_err_compute = hme_evalsatd_pt_pu_64x64;
800
33.1k
        }
801
319k
    }
802
361k
    else
803
361k
    {
804
361k
        pf_err_compute = (PF_SAD_FXN_T)hme_evalsad_grid_pu_MxM;
805
361k
    }
806
807
681k
    i4_ref_stride = ps_curr_layer->i4_rec_stride;
808
809
681k
    x_off = ps_merge_prms->ps_results_tl->u1_x_off;
810
681k
    y_off = ps_merge_prms->ps_results_tl->u1_y_off;
811
681k
    i4_offset = x_off + i4_ctb_x_off + ((y_off + i4_ctb_y_off) * i4_ref_stride);
812
813
    /*************************************************************************/
814
    /* This array stores the ids of the partitions whose                     */
815
    /* SADs are updated. Since the partitions whose SADs are updated may not */
816
    /* be in contiguous order, we supply another level of indirection.       */
817
    /*************************************************************************/
818
681k
    i4_num_valid_parts = hme_create_valid_part_ids(i4_part_mask, ai4_valid_part_ids);
819
820
    /* Initialize result params used for partition update */
821
681k
    s_result_prms.pf_mv_cost_compute = NULL;
822
681k
    s_result_prms.ps_search_results = ps_search_results;
823
681k
    s_result_prms.pi4_valid_part_ids = ai4_valid_part_ids;
824
681k
    s_result_prms.i1_ref_idx = i4_search_idx;
825
681k
    s_result_prms.i4_part_mask = i4_part_mask;
826
681k
    s_result_prms.pi4_sad_grid = &ai4_sad_grid[0][0];
827
681k
    s_result_prms.i4_grid_mask = 1;
828
829
    /* One time Initialization of error params used for SAD/SATD compute */
830
681k
    s_err_prms.i4_inp_stride = ps_subpel_prms->i4_inp_stride;
831
681k
    s_err_prms.i4_ref_stride = i4_ref_stride;
832
681k
    s_err_prms.i4_part_mask = (ENABLE_2Nx2N);
833
681k
    s_err_prms.i4_grid_mask = 1;
834
681k
    s_err_prms.pi4_sad_grid = &ai4_sad_grid[0][0];
835
681k
    s_err_prms.i4_blk_wd = gau1_blk_size_to_wd[e_blk_size];
836
681k
    s_err_prms.i4_blk_ht = gau1_blk_size_to_ht[e_blk_size];
837
681k
    s_err_prms.i4_step = 1;
838
839
    /*************************************************************************/
840
    /* One time preparation of non changing interpolation params.            */
841
    /*************************************************************************/
842
681k
    s_interp_prms.i4_ref_stride = i4_ref_stride;
843
681k
    s_interp_prms.i4_blk_wd = gau1_blk_size_to_wd[e_blk_size];
844
681k
    s_interp_prms.i4_blk_ht = gau1_blk_size_to_ht[e_blk_size];
845
681k
    s_interp_prms.apu1_interp_out[0] = ps_subpel_prms->pu1_wkg_mem;
846
681k
    s_interp_prms.i4_out_stride = gau1_blk_size_to_wd[e_blk_size];
847
681k
    i4_interp_buf_id = 0;
848
849
681k
    pf_qpel_interp = ps_subpel_prms->pf_qpel_interp;
850
851
    /***************************************************************************/
852
    /* Compute SATD/SAD for all unique nodes of children CUs to get best merge */
853
    /* results                                                                 */
854
    /***************************************************************************/
855
1.75M
    for(i = 0; i < num_unique_nodes_cu_merge; i++)
856
1.07M
    {
857
1.07M
        WORD8 i1_ref_idx;
858
1.07M
        ps_search_node = &as_merge_unique_node[i];
859
860
        /*********************************************************************/
861
        /* Compute the base pointer for input, interpolated buffers          */
862
        /* The base pointers point as follows:                               */
863
        /* fx fy : 0, 0 :: fx, hy : 0, 0.5, hx, fy: 0.5, 0, hx, fy: 0.5, 0.5 */
864
        /* To these, we need to add the offset of the current node           */
865
        /*********************************************************************/
866
1.07M
        i1_ref_idx = ps_search_node->i1_ref_idx;
867
1.07M
        apu1_hpel_ref[0] = ps_curr_layer->ppu1_list_rec_fxfy[i1_ref_idx] + i4_offset;
868
1.07M
        apu1_hpel_ref[1] = ps_curr_layer->ppu1_list_rec_hxfy[i1_ref_idx] + i4_offset;
869
1.07M
        apu1_hpel_ref[2] = ps_curr_layer->ppu1_list_rec_fxhy[i1_ref_idx] + i4_offset;
870
1.07M
        apu1_hpel_ref[3] = ps_curr_layer->ppu1_list_rec_hxhy[i1_ref_idx] + i4_offset;
871
872
1.07M
        s_interp_prms.ppu1_ref = &apu1_hpel_ref[0];
873
874
1.07M
        pf_qpel_interp(
875
1.07M
            &s_interp_prms,
876
1.07M
            ps_search_node->s_mv.i2_mvx,
877
1.07M
            ps_search_node->s_mv.i2_mvy,
878
1.07M
            i4_interp_buf_id);
879
880
1.07M
        pred_lx = i4_search_idx;
881
1.07M
        ps_pred_ctxt = &ps_search_results->as_pred_ctxt[pred_lx];
882
883
1.07M
        s_result_prms.u1_pred_lx = pred_lx;
884
1.07M
        s_result_prms.ps_search_node_base = ps_search_node;
885
1.07M
        s_err_prms.pu1_inp =
886
1.07M
            ps_wt_inp_prms->apu1_wt_inp[i1_ref_idx] + x_off + y_off * ps_subpel_prms->i4_inp_stride;
887
1.07M
        s_err_prms.pu1_ref = s_interp_prms.pu1_final_out;
888
1.07M
        s_err_prms.i4_ref_stride = s_interp_prms.i4_final_out_stride;
889
890
        /* Carry out the SAD/SATD. This call also does the TU RECURSION.
891
        Here the tu recursion logic is restricted with the size of the PU*/
892
1.07M
        pf_err_compute(&s_err_prms);
893
894
1.07M
        if(ps_subpel_prms->u1_is_cu_noisy &&
895
1.07M
           ps_merge_prms->ps_inter_ctb_prms->i4_alpha_stim_multiplier)
896
0
        {
897
0
            ps_me_optimised_function_list->pf_compute_stim_injected_distortion_for_all_parts(
898
0
                s_err_prms.pu1_ref,
899
0
                s_err_prms.i4_ref_stride,
900
0
                ai4_valid_part_ids,
901
0
                ps_merge_prms->ps_inter_ctb_prms->pu8_part_src_sigmaX,
902
0
                ps_merge_prms->ps_inter_ctb_prms->pu8_part_src_sigmaXSquared,
903
0
                s_err_prms.pi4_sad_grid,
904
0
                ps_merge_prms->ps_inter_ctb_prms->i4_alpha_stim_multiplier,
905
0
                ps_wt_inp_prms->a_inv_wpred_wt[i1_ref_idx],
906
0
                ps_wt_inp_prms->ai4_shift_val[i1_ref_idx],
907
0
                i4_num_valid_parts,
908
0
                ps_wt_inp_prms->wpred_log_wdc,
909
0
                (BLK_32x32 == e_blk_size) ? 32 : 64);
910
0
        }
911
912
        /* Update the mv's */
913
1.07M
        s_result_prms.i2_mv_x = ps_search_node->s_mv.i2_mvx;
914
1.07M
        s_result_prms.i2_mv_y = ps_search_node->s_mv.i2_mvy;
915
916
        /* Update best results */
917
1.07M
        hme_update_results_pt_pu_best1_subpel_hs(&s_err_prms, &s_result_prms);
918
1.07M
    }
919
920
    /************************************************************************/
921
    /* Update mv cost and total cost for each valid partition in the CU     */
922
    /************************************************************************/
923
12.2M
    for(i = 0; i < TOT_NUM_PARTS; i++)
924
11.5M
    {
925
11.5M
        if(i4_part_mask & (1 << i))
926
5.07M
        {
927
5.07M
            WORD32 j;
928
5.07M
            WORD32 i4_mv_cost;
929
930
5.07M
            ps_search_node = ps_search_results->aps_part_results[i4_search_idx][i];
931
932
5.07M
            for(j = 0;
933
10.1M
                j < MIN(ps_search_results->u1_num_results_per_part, num_unique_nodes_cu_merge);
934
5.07M
                j++)
935
5.07M
            {
936
5.07M
                if(ps_search_node->i1_ref_idx != -1)
937
5.07M
                {
938
5.07M
                    pred_lx = i4_search_idx;
939
5.07M
                    ps_pred_ctxt = &ps_search_results->as_pred_ctxt[pred_lx];
940
941
                    /* Prediction context should now deal with qpel units */
942
5.07M
                    HME_SET_MVPRED_RES(ps_pred_ctxt, MV_RES_QPEL);
943
944
5.07M
                    ps_search_node->u1_subpel_done = 1;
945
5.07M
                    ps_search_node->u1_is_avail = 1;
946
947
5.07M
                    i4_mv_cost =
948
5.07M
                        pf_mv_cost_compute(ps_search_node, ps_pred_ctxt, (PART_ID_T)i, MV_RES_QPEL);
949
950
5.07M
                    ps_search_node->i4_tot_cost = i4_mv_cost + ps_search_node->i4_sad;
951
5.07M
                    ps_search_node->i4_mv_cost = i4_mv_cost;
952
953
5.07M
                    ps_search_node++;
954
5.07M
                }
955
5.07M
            }
956
5.07M
        }
957
11.5M
    }
958
959
681k
    return num_unique_nodes_cu_merge;
960
710k
}
961
962
580k
#define CU_MERGE_MAX_INTRA_PARTS 4
963
964
/**
965
********************************************************************************
966
*  @fn     hme_try_merge_high_speed
967
*
968
*  @brief  Attempts to merge 4 NxN candts to a 2Nx2N candt, either as a single
969
entity or with partititons for high speed preset
970
*
971
*  @param[in,out]  hme_merge_prms_t: Params for CU merge
972
*
973
*  @return MERGE_RESULT_T type result of merge (CU_MERGED/CU_SPLIT)
974
********************************************************************************
975
*/
976
CU_MERGE_RESULT_T hme_try_merge_high_speed(
977
    me_ctxt_t *ps_thrd_ctxt,
978
    me_frm_ctxt_t *ps_ctxt,
979
    ipe_l0_ctb_analyse_for_me_t *ps_cur_ipe_ctb,
980
    hme_subpel_prms_t *ps_subpel_prms,
981
    hme_merge_prms_t *ps_merge_prms,
982
    inter_pu_results_t *ps_pu_results,
983
    pu_result_t *ps_pu_result)
984
591k
{
985
591k
    search_results_t *ps_results_tl, *ps_results_tr;
986
591k
    search_results_t *ps_results_bl, *ps_results_br;
987
988
591k
    S32 i;
989
591k
    S32 i4_search_idx;
990
591k
    S32 i4_cost_parent;
991
591k
    S32 intra_cu_size;
992
591k
    ULWORD64 au8_final_src_sigmaX[17], au8_final_src_sigmaXSquared[17];
993
994
591k
    search_results_t *ps_results_merge = ps_merge_prms->ps_results_merge;
995
591k
    wgt_pred_ctxt_t *ps_wt_inp_prms = &ps_ctxt->s_wt_pred;
996
997
591k
    S32 i4_part_mask = ENABLE_ALL_PARTS - ENABLE_NxN;
998
591k
    S32 is_vert = 0, i4_best_part_type = -1;
999
591k
    S32 i4_intra_parts = 0; /* Keeps track of intra percentage before merge */
1000
591k
    S32 i4_cost_children = 0;
1001
591k
    S32 i4_frm_qstep = ps_ctxt->frm_qstep;
1002
591k
    S32 i4_num_merge_cands_evaluated = 0;
1003
591k
    U08 u1_x_off = ps_results_merge->u1_x_off;
1004
591k
    U08 u1_y_off = ps_results_merge->u1_y_off;
1005
591k
    S32 i4_32x32_id = (u1_y_off >> 4) + (u1_x_off >> 5);
1006
1007
591k
    ihevce_cmn_opt_func_t *ps_cmn_utils_optimised_function_list =
1008
591k
        ps_thrd_ctxt->ps_cmn_utils_optimised_function_list;
1009
591k
    ihevce_me_optimised_function_list_t *ps_me_optimised_function_list =
1010
591k
        ((ihevce_me_optimised_function_list_t *)ps_thrd_ctxt->pv_me_optimised_function_list);
1011
591k
    ps_results_tl = ps_merge_prms->ps_results_tl;
1012
591k
    ps_results_tr = ps_merge_prms->ps_results_tr;
1013
591k
    ps_results_bl = ps_merge_prms->ps_results_bl;
1014
591k
    ps_results_br = ps_merge_prms->ps_results_br;
1015
1016
591k
    if(ps_merge_prms->e_quality_preset == ME_XTREME_SPEED)
1017
61.0k
    {
1018
61.0k
        i4_part_mask &= ~ENABLE_AMP;
1019
61.0k
    }
1020
1021
591k
    if(ps_merge_prms->e_quality_preset == ME_XTREME_SPEED_25)
1022
210k
    {
1023
210k
        i4_part_mask &= ~ENABLE_AMP;
1024
1025
210k
        i4_part_mask &= ~ENABLE_SMP;
1026
210k
    }
1027
1028
591k
    ps_merge_prms->i4_num_pred_dir_actual = 0;
1029
1030
    /*************************************************************************/
1031
    /* The logic for High speed CU merge goes as follows:                    */
1032
    /*                                                                       */
1033
    /* 1. Early exit with CU_SPLIT if sum of best partitions of children CUs */
1034
    /*    exceed 7                                                           */
1035
    /* 2. Early exit with CU_MERGE if mvs of best partitions of children CUs */
1036
    /*    are identical                                                      */
1037
    /* 3. Find the all unique mvs of best partitions of children CUs and     */
1038
    /*    evaluate partial SATDs (all 17 partitions) for each unique mv. If  */
1039
    /*    best parent cost is lower than sum of the best children costs      */
1040
    /*    return CU_MERGE after seeding the best results else return CU_SPLIT*/
1041
    /*                                                                       */
1042
    /*************************************************************************/
1043
1044
    /* Count the number of best partitions in child CUs, early exit if > 7 */
1045
591k
    if((ps_merge_prms->e_quality_preset != ME_PRISTINE_QUALITY) ||
1046
591k
       (CU_32x32 == ps_results_merge->e_cu_size))
1047
579k
    {
1048
579k
        S32 num_parts_in_32x32 = 0;
1049
579k
        WORD32 i4_part_type;
1050
1051
579k
        if(ps_results_tl->u1_split_flag)
1052
30.7k
        {
1053
30.7k
            num_parts_in_32x32 += 4;
1054
1055
30.7k
#define COST_INTERCHANGE 0
1056
30.7k
            i4_cost_children = ps_merge_prms->ps_8x8_cu_results[0].ps_best_results->i4_tot_cost +
1057
30.7k
                               ps_merge_prms->ps_8x8_cu_results[1].ps_best_results->i4_tot_cost +
1058
30.7k
                               ps_merge_prms->ps_8x8_cu_results[2].ps_best_results->i4_tot_cost +
1059
30.7k
                               ps_merge_prms->ps_8x8_cu_results[3].ps_best_results->i4_tot_cost;
1060
30.7k
        }
1061
549k
        else
1062
549k
        {
1063
549k
            i4_part_type = ps_results_tl->ps_cu_results->ps_best_results[0].u1_part_type;
1064
549k
            num_parts_in_32x32 += gau1_num_parts_in_part_type[i4_part_type];
1065
549k
            i4_cost_children = ps_results_tl->ps_cu_results->ps_best_results[0].i4_tot_cost;
1066
549k
        }
1067
1068
579k
        if(ps_results_tr->u1_split_flag)
1069
13.7k
        {
1070
13.7k
            num_parts_in_32x32 += 4;
1071
1072
13.7k
            i4_cost_children += ps_merge_prms->ps_8x8_cu_results[4].ps_best_results->i4_tot_cost +
1073
13.7k
                                ps_merge_prms->ps_8x8_cu_results[5].ps_best_results->i4_tot_cost +
1074
13.7k
                                ps_merge_prms->ps_8x8_cu_results[6].ps_best_results->i4_tot_cost +
1075
13.7k
                                ps_merge_prms->ps_8x8_cu_results[7].ps_best_results->i4_tot_cost;
1076
13.7k
        }
1077
566k
        else
1078
566k
        {
1079
566k
            i4_part_type = ps_results_tr->ps_cu_results->ps_best_results[0].u1_part_type;
1080
566k
            num_parts_in_32x32 += gau1_num_parts_in_part_type[i4_part_type];
1081
566k
            i4_cost_children += ps_results_tr->ps_cu_results->ps_best_results[0].i4_tot_cost;
1082
566k
        }
1083
1084
579k
        if(ps_results_bl->u1_split_flag)
1085
13.2k
        {
1086
13.2k
            num_parts_in_32x32 += 4;
1087
1088
13.2k
            i4_cost_children += ps_merge_prms->ps_8x8_cu_results[8].ps_best_results->i4_tot_cost +
1089
13.2k
                                ps_merge_prms->ps_8x8_cu_results[9].ps_best_results->i4_tot_cost +
1090
13.2k
                                ps_merge_prms->ps_8x8_cu_results[10].ps_best_results->i4_tot_cost +
1091
13.2k
                                ps_merge_prms->ps_8x8_cu_results[11].ps_best_results->i4_tot_cost;
1092
13.2k
        }
1093
566k
        else
1094
566k
        {
1095
566k
            i4_part_type = ps_results_bl->ps_cu_results->ps_best_results[0].u1_part_type;
1096
566k
            num_parts_in_32x32 += gau1_num_parts_in_part_type[i4_part_type];
1097
566k
            i4_cost_children += ps_results_bl->ps_cu_results->ps_best_results[0].i4_tot_cost;
1098
566k
        }
1099
1100
579k
        if(ps_results_br->u1_split_flag)
1101
6.66k
        {
1102
6.66k
            num_parts_in_32x32 += 4;
1103
1104
6.66k
            i4_cost_children += ps_merge_prms->ps_8x8_cu_results[12].ps_best_results->i4_tot_cost +
1105
6.66k
                                ps_merge_prms->ps_8x8_cu_results[13].ps_best_results->i4_tot_cost +
1106
6.66k
                                ps_merge_prms->ps_8x8_cu_results[14].ps_best_results->i4_tot_cost +
1107
6.66k
                                ps_merge_prms->ps_8x8_cu_results[15].ps_best_results->i4_tot_cost;
1108
6.66k
        }
1109
573k
        else
1110
573k
        {
1111
573k
            i4_part_type = ps_results_br->ps_cu_results->ps_best_results[0].u1_part_type;
1112
573k
            num_parts_in_32x32 += gau1_num_parts_in_part_type[i4_part_type];
1113
573k
            i4_cost_children += ps_results_br->ps_cu_results->ps_best_results[0].i4_tot_cost;
1114
573k
        }
1115
1116
579k
        if((num_parts_in_32x32 > 7) && (ps_merge_prms->e_quality_preset != ME_PRISTINE_QUALITY))
1117
9.30k
        {
1118
9.30k
            return CU_SPLIT;
1119
9.30k
        }
1120
1121
570k
        if((num_parts_in_32x32 > MAX_NUM_CONSTITUENT_MVS_TO_ENABLE_32MERGE_IN_XS25) &&
1122
570k
           (ps_merge_prms->e_quality_preset == ME_XTREME_SPEED_25))
1123
1.80k
        {
1124
1.80k
            return CU_SPLIT;
1125
1.80k
        }
1126
570k
    }
1127
1128
    /* Accumulate intra percentage before merge for early CU_SPLIT decision     */
1129
    /* Note : Each intra part represent a NxN unit of the children CUs          */
1130
    /* This is essentially 1/16th of the CUsize under consideration for merge   */
1131
580k
    if(ME_PRISTINE_QUALITY == ps_merge_prms->e_quality_preset)
1132
134k
    {
1133
134k
        if(CU_64x64 == ps_results_merge->e_cu_size)
1134
11.6k
        {
1135
11.6k
            i4_intra_parts =
1136
11.6k
                (!ps_merge_prms->ps_cluster_info->ps_cu_tree_root->u1_inter_eval_enable)
1137
11.6k
                    ? 16
1138
11.6k
                    : ps_merge_prms->ps_cluster_info->ps_cu_tree_root->u1_intra_eval_enable;
1139
11.6k
        }
1140
122k
        else
1141
122k
        {
1142
122k
            switch((ps_results_merge->u1_x_off >> 5) + ((ps_results_merge->u1_y_off >> 4)))
1143
122k
            {
1144
31.0k
            case 0:
1145
31.0k
            {
1146
31.0k
                i4_intra_parts = (!ps_merge_prms->ps_cluster_info->ps_cu_tree_root->ps_child_node_tl
1147
31.0k
                                       ->u1_inter_eval_enable)
1148
31.0k
                                     ? 16
1149
31.0k
                                     : (ps_merge_prms->ps_cluster_info->ps_cu_tree_root
1150
31.0k
                                            ->ps_child_node_tl->u1_intra_eval_enable);
1151
1152
31.0k
                break;
1153
0
            }
1154
30.7k
            case 1:
1155
30.7k
            {
1156
30.7k
                i4_intra_parts = (!ps_merge_prms->ps_cluster_info->ps_cu_tree_root->ps_child_node_tr
1157
30.7k
                                       ->u1_inter_eval_enable)
1158
30.7k
                                     ? 16
1159
30.7k
                                     : (ps_merge_prms->ps_cluster_info->ps_cu_tree_root
1160
30.7k
                                            ->ps_child_node_tr->u1_intra_eval_enable);
1161
1162
30.7k
                break;
1163
0
            }
1164
30.3k
            case 2:
1165
30.3k
            {
1166
30.3k
                i4_intra_parts = (!ps_merge_prms->ps_cluster_info->ps_cu_tree_root->ps_child_node_bl
1167
30.3k
                                       ->u1_inter_eval_enable)
1168
30.3k
                                     ? 16
1169
30.3k
                                     : (ps_merge_prms->ps_cluster_info->ps_cu_tree_root
1170
30.3k
                                            ->ps_child_node_bl->u1_intra_eval_enable);
1171
1172
30.3k
                break;
1173
0
            }
1174
30.1k
            case 3:
1175
30.1k
            {
1176
30.1k
                i4_intra_parts = (!ps_merge_prms->ps_cluster_info->ps_cu_tree_root->ps_child_node_br
1177
30.1k
                                       ->u1_inter_eval_enable)
1178
30.1k
                                     ? 16
1179
30.1k
                                     : (ps_merge_prms->ps_cluster_info->ps_cu_tree_root
1180
30.1k
                                            ->ps_child_node_br->u1_intra_eval_enable);
1181
1182
30.1k
                break;
1183
0
            }
1184
122k
            }
1185
122k
        }
1186
134k
    }
1187
446k
    else
1188
446k
    {
1189
2.23M
        for(i = 0; i < 4; i++)
1190
1.78M
        {
1191
1.78M
            search_results_t *ps_results =
1192
1.78M
                (i == 0) ? ps_results_tl
1193
1.78M
                         : ((i == 1) ? ps_results_tr : ((i == 2) ? ps_results_bl : ps_results_br));
1194
1195
1.78M
            part_type_results_t *ps_best_res = &ps_results->ps_cu_results->ps_best_results[0];
1196
1197
1.78M
            if(ps_results->u1_split_flag)
1198
7.50k
            {
1199
7.50k
                U08 u1_x_off = ps_results->u1_x_off;
1200
7.50k
                U08 u1_y_off = ps_results->u1_y_off;
1201
7.50k
                U08 u1_8x8_zscan_id = gau1_ctb_raster_to_zscan[(u1_x_off >> 2) + (u1_y_off << 2)] >>
1202
7.50k
                                      2;
1203
1204
                /* Special case to handle 8x8 CUs when 16x16 is split */
1205
7.50k
                ASSERT(ps_results->e_cu_size == CU_16x16);
1206
1207
7.50k
                ps_best_res = &ps_ctxt->as_cu8x8_results[u1_8x8_zscan_id].ps_best_results[0];
1208
1209
7.50k
                if(ps_best_res->as_pu_results[0].pu.b1_intra_flag)
1210
3.86k
                    i4_intra_parts += 1;
1211
1212
7.50k
                ps_best_res = &ps_ctxt->as_cu8x8_results[u1_8x8_zscan_id + 1].ps_best_results[0];
1213
1214
7.50k
                if(ps_best_res->as_pu_results[0].pu.b1_intra_flag)
1215
2.84k
                    i4_intra_parts += 1;
1216
1217
7.50k
                ps_best_res = &ps_ctxt->as_cu8x8_results[u1_8x8_zscan_id + 2].ps_best_results[0];
1218
1219
7.50k
                if(ps_best_res->as_pu_results[0].pu.b1_intra_flag)
1220
2.73k
                    i4_intra_parts += 1;
1221
1222
7.50k
                ps_best_res = &ps_ctxt->as_cu8x8_results[u1_8x8_zscan_id + 3].ps_best_results[0];
1223
1224
7.50k
                if(ps_best_res->as_pu_results[0].pu.b1_intra_flag)
1225
2.88k
                    i4_intra_parts += 1;
1226
7.50k
            }
1227
1.77M
            else if(ps_best_res[0].as_pu_results[0].pu.b1_intra_flag)
1228
10.4k
            {
1229
10.4k
                i4_intra_parts += 4;
1230
10.4k
            }
1231
1.78M
        }
1232
446k
    }
1233
1234
    /* Determine the max intra CU size indicated by IPE */
1235
580k
    intra_cu_size = CU_64x64;
1236
580k
    if(ps_cur_ipe_ctb->u1_split_flag)
1237
573k
    {
1238
573k
        intra_cu_size = CU_32x32;
1239
573k
        if(ps_cur_ipe_ctb->as_intra32_analyse[i4_32x32_id].b1_split_flag)
1240
311k
        {
1241
311k
            intra_cu_size = CU_16x16;
1242
311k
        }
1243
573k
    }
1244
1245
580k
    if(((i4_intra_parts > CU_MERGE_MAX_INTRA_PARTS) &&
1246
580k
        (intra_cu_size < ps_results_merge->e_cu_size) &&
1247
580k
        (ME_PRISTINE_QUALITY != ps_merge_prms->e_quality_preset)) ||
1248
580k
       (i4_intra_parts == 16))
1249
2.78k
    {
1250
2.78k
        S32 i4_merge_outcome;
1251
1252
2.78k
        i4_merge_outcome = (CU_32x32 == ps_results_merge->e_cu_size)
1253
2.78k
                               ? (!ps_cur_ipe_ctb->as_intra32_analyse[i4_32x32_id].b1_split_flag &&
1254
2.63k
                                  ps_cur_ipe_ctb->as_intra32_analyse[i4_32x32_id].b1_valid_cu)
1255
2.78k
                               : (!ps_cur_ipe_ctb->u1_split_flag);
1256
1257
2.78k
        i4_merge_outcome = i4_merge_outcome ||
1258
2.78k
                           (ME_PRISTINE_QUALITY == ps_merge_prms->e_quality_preset);
1259
1260
2.78k
        i4_merge_outcome = i4_merge_outcome &&
1261
2.78k
                           !(ps_subpel_prms->u1_is_cu_noisy && DISABLE_INTRA_WHEN_NOISY);
1262
1263
2.78k
        if(i4_merge_outcome)
1264
1.01k
        {
1265
1.01k
            inter_cu_results_t *ps_cu_results = ps_results_merge->ps_cu_results;
1266
1.01k
            part_type_results_t *ps_best_result = ps_cu_results->ps_best_results;
1267
1.01k
            pu_t *ps_pu = &ps_best_result->as_pu_results->pu;
1268
1269
1.01k
            ps_cu_results->u1_num_best_results = 1;
1270
1.01k
            ps_cu_results->u1_cu_size = ps_results_merge->e_cu_size;
1271
1.01k
            ps_cu_results->u1_x_off = u1_x_off;
1272
1.01k
            ps_cu_results->u1_y_off = u1_y_off;
1273
1274
1.01k
            ps_best_result->u1_part_type = PRT_2Nx2N;
1275
1.01k
            ps_best_result->ai4_tu_split_flag[0] = 0;
1276
1.01k
            ps_best_result->ai4_tu_split_flag[1] = 0;
1277
1.01k
            ps_best_result->ai4_tu_split_flag[2] = 0;
1278
1.01k
            ps_best_result->ai4_tu_split_flag[3] = 0;
1279
1.01k
            ps_best_result->i4_tot_cost =
1280
1.01k
                (CU_64x64 == ps_results_merge->e_cu_size)
1281
1.01k
                    ? ps_cur_ipe_ctb->i4_best64x64_intra_cost
1282
1.01k
                    : ps_cur_ipe_ctb->ai4_best32x32_intra_cost[i4_32x32_id];
1283
1284
1.01k
            ps_pu->b1_intra_flag = 1;
1285
1.01k
            ps_pu->b4_pos_x = u1_x_off >> 2;
1286
1.01k
            ps_pu->b4_pos_y = u1_y_off >> 2;
1287
1.01k
            ps_pu->b4_wd = (1 << (ps_results_merge->e_cu_size + 1)) - 1;
1288
1.01k
            ps_pu->b4_ht = ps_pu->b4_wd;
1289
1.01k
            ps_pu->mv.i1_l0_ref_idx = -1;
1290
1.01k
            ps_pu->mv.i1_l1_ref_idx = -1;
1291
1.01k
            ps_pu->mv.s_l0_mv.i2_mvx = INTRA_MV;
1292
1.01k
            ps_pu->mv.s_l0_mv.i2_mvy = INTRA_MV;
1293
1.01k
            ps_pu->mv.s_l1_mv.i2_mvx = INTRA_MV;
1294
1.01k
            ps_pu->mv.s_l1_mv.i2_mvy = INTRA_MV;
1295
1296
1.01k
            return CU_MERGED;
1297
1.01k
        }
1298
1.77k
        else
1299
1.77k
        {
1300
1.77k
            return CU_SPLIT;
1301
1.77k
        }
1302
2.78k
    }
1303
1304
577k
    if(i4_intra_parts)
1305
8.72k
    {
1306
8.72k
        i4_part_mask = ENABLE_2Nx2N;
1307
8.72k
    }
1308
1309
577k
    ps_results_merge->u1_num_active_ref = (ps_ctxt->s_frm_prms.bidir_enabled) ? 2 : 1;
1310
1311
577k
    hme_reset_search_results(ps_results_merge, i4_part_mask, MV_RES_QPEL);
1312
1313
577k
    ps_results_merge->u1_num_active_ref = ps_merge_prms->i4_num_ref;
1314
577k
    ps_merge_prms->i4_num_pred_dir_actual = 0;
1315
1316
577k
    if(ps_subpel_prms->u1_is_cu_noisy && ps_merge_prms->ps_inter_ctb_prms->i4_alpha_stim_multiplier)
1317
0
    {
1318
0
        S32 ai4_valid_part_ids[TOT_NUM_PARTS + 1];
1319
0
        S32 i4_num_valid_parts;
1320
0
        S32 i4_sigma_array_offset;
1321
1322
0
        i4_num_valid_parts = hme_create_valid_part_ids(i4_part_mask, ai4_valid_part_ids);
1323
1324
        /*********************************************************************************************************************************************/
1325
        /* i4_sigma_array_offset : takes care of pointing to the appropriate 4x4 block's sigmaX and sigmaX-squared value in a CTB out of 256 values  */
1326
        /* Logic is x/4 + ((y/4) x 16) : every 4 pixel increase in x equals one 4x4 block increment, every 4 pixel increase in y equals 16 4x4 block */
1327
        /* increment as there will be 256 4x4 blocks in a CTB                                                                                        */
1328
        /*********************************************************************************************************************************************/
1329
0
        i4_sigma_array_offset = (ps_merge_prms->ps_results_merge->u1_x_off / 4) +
1330
0
                                (ps_merge_prms->ps_results_merge->u1_y_off * 4);
1331
1332
0
        for(i = 0; i < i4_num_valid_parts; i++)
1333
0
        {
1334
0
            S32 i4_part_id = ai4_valid_part_ids[i];
1335
1336
0
            hme_compute_final_sigma_of_pu_from_base_blocks(
1337
0
                ps_ctxt->au4_4x4_src_sigmaX + i4_sigma_array_offset,
1338
0
                ps_ctxt->au4_4x4_src_sigmaXSquared + i4_sigma_array_offset,
1339
0
                au8_final_src_sigmaX,
1340
0
                au8_final_src_sigmaXSquared,
1341
0
                (CU_32x32 == ps_results_merge->e_cu_size) ? 32 : 64,
1342
0
                4,
1343
0
                i4_part_id,
1344
0
                16);
1345
0
        }
1346
1347
0
        ps_merge_prms->ps_inter_ctb_prms->pu8_part_src_sigmaX = au8_final_src_sigmaX;
1348
0
        ps_merge_prms->ps_inter_ctb_prms->pu8_part_src_sigmaXSquared = au8_final_src_sigmaXSquared;
1349
0
    }
1350
1351
    /*************************************************************************/
1352
    /* Loop through all ref idx and pick the merge candts and refine based   */
1353
    /* on the active partitions. At this stage num ref will be 1 or 2        */
1354
    /*************************************************************************/
1355
1.28M
    for(i4_search_idx = 0; i4_search_idx < ps_merge_prms->i4_num_ref; i4_search_idx++)
1356
710k
    {
1357
710k
        S32 i4_cands;
1358
710k
        U08 u1_pred_dir = 0;
1359
1360
710k
        if((2 == ps_merge_prms->i4_num_ref) || (!ps_ctxt->s_frm_prms.bidir_enabled))
1361
707k
        {
1362
707k
            u1_pred_dir = i4_search_idx;
1363
707k
        }
1364
2.89k
        else if(ps_ctxt->s_frm_prms.u1_num_active_ref_l0 == 0)
1365
2.89k
        {
1366
2.89k
            u1_pred_dir = 1;
1367
2.89k
        }
1368
0
        else if(ps_ctxt->s_frm_prms.u1_num_active_ref_l1 == 0)
1369
0
        {
1370
0
            u1_pred_dir = 0;
1371
0
        }
1372
0
        else
1373
0
        {
1374
0
            ASSERT(0);
1375
0
        }
1376
1377
        /* call the function to pick and evaluate the merge candts, given */
1378
        /* a ref id and a part mask.                                      */
1379
710k
        i4_cands = hme_pick_eval_merge_candts(
1380
710k
            ps_merge_prms,
1381
710k
            ps_subpel_prms,
1382
710k
            u1_pred_dir,
1383
710k
            i4_best_part_type,
1384
710k
            is_vert,
1385
710k
            ps_wt_inp_prms,
1386
710k
            i4_frm_qstep,
1387
710k
            ps_cmn_utils_optimised_function_list,
1388
710k
            ps_me_optimised_function_list);
1389
1390
710k
        if(i4_cands)
1391
681k
        {
1392
681k
            ps_merge_prms->au1_pred_dir_searched[ps_merge_prms->i4_num_pred_dir_actual] =
1393
681k
                u1_pred_dir;
1394
681k
            ps_merge_prms->i4_num_pred_dir_actual++;
1395
681k
        }
1396
1397
710k
        i4_num_merge_cands_evaluated += i4_cands;
1398
710k
    }
1399
1400
    /* Call the decide_part_types function here */
1401
    /* Populate the new PU struct with the results post subpel refinement*/
1402
577k
    if(i4_num_merge_cands_evaluated)
1403
577k
    {
1404
577k
        inter_cu_results_t *ps_cu_results = ps_results_merge->ps_cu_results;
1405
1406
577k
        hme_reset_wkg_mem(&ps_ctxt->s_buf_mgr);
1407
1408
577k
        ps_merge_prms->ps_inter_ctb_prms->i4_ctb_x_off = ps_merge_prms->i4_ctb_x_off;
1409
577k
        ps_merge_prms->ps_inter_ctb_prms->i4_ctb_y_off = ps_merge_prms->i4_ctb_y_off;
1410
1411
577k
        hme_populate_pus(
1412
577k
            ps_thrd_ctxt,
1413
577k
            ps_ctxt,
1414
577k
            ps_subpel_prms,
1415
577k
            ps_results_merge,
1416
577k
            ps_cu_results,
1417
577k
            ps_pu_results,
1418
577k
            ps_pu_result,
1419
577k
            ps_merge_prms->ps_inter_ctb_prms,
1420
577k
            &ps_ctxt->s_wt_pred,
1421
577k
            ps_merge_prms->ps_layer_ctxt,
1422
577k
            ps_merge_prms->au1_pred_dir_searched,
1423
577k
            ps_merge_prms->i4_num_pred_dir_actual);
1424
1425
577k
        ps_cu_results->i4_inp_offset = (ps_cu_results->u1_x_off) + (ps_cu_results->u1_y_off * 64);
1426
1427
577k
        hme_decide_part_types(
1428
577k
            ps_cu_results,
1429
577k
            ps_pu_results,
1430
577k
            ps_merge_prms->ps_inter_ctb_prms,
1431
577k
            ps_ctxt,
1432
577k
            ps_cmn_utils_optimised_function_list,
1433
577k
            ps_me_optimised_function_list
1434
1435
577k
        );
1436
1437
        /*****************************************************************/
1438
        /* INSERT INTRA RESULTS AT 32x32/64x64 LEVEL.                    */
1439
        /*****************************************************************/
1440
577k
#if DISABLE_INTRA_IN_BPICS
1441
577k
        if(1 != ((ME_XTREME_SPEED_25 == ps_merge_prms->e_quality_preset) &&
1442
577k
                 (ps_ctxt->s_frm_prms.i4_temporal_layer_id > TEMPORAL_LAYER_DISABLE)))
1443
522k
#endif
1444
522k
        {
1445
522k
            if(!(DISABLE_INTRA_WHEN_NOISY && ps_merge_prms->ps_inter_ctb_prms->u1_is_cu_noisy))
1446
522k
            {
1447
522k
                hme_insert_intra_nodes_post_bipred(
1448
522k
                    ps_cu_results, ps_cur_ipe_ctb, ps_ctxt->frm_qstep);
1449
522k
            }
1450
522k
        }
1451
577k
    }
1452
1
    else
1453
1
    {
1454
1
        return CU_SPLIT;
1455
1
    }
1456
1457
    /* We check the best result of ref idx 0 and compare for parent vs child */
1458
577k
    if((ps_merge_prms->e_quality_preset != ME_PRISTINE_QUALITY) ||
1459
577k
       (CU_32x32 == ps_results_merge->e_cu_size))
1460
566k
    {
1461
566k
        i4_cost_parent = ps_results_merge->ps_cu_results->ps_best_results[0].i4_tot_cost;
1462
        /*********************************************************************/
1463
        /* Add the cost of signaling the CU tree bits.                       */
1464
        /* Assuming parent is not split, then we signal 1 bit for this parent*/
1465
        /* CU. If split, then 1 bit for parent CU + 4 bits for each child CU */
1466
        /* So, 4*lambda is extra for children cost. :Lokesh                  */
1467
        /*********************************************************************/
1468
566k
        {
1469
566k
            pred_ctxt_t *ps_pred_ctxt = &ps_results_merge->as_pred_ctxt[0];
1470
1471
566k
            i4_cost_children += ((4 * ps_pred_ctxt->lambda) >> (ps_pred_ctxt->lambda_q_shift));
1472
566k
        }
1473
1474
566k
        if(i4_cost_parent < i4_cost_children)
1475
464k
        {
1476
464k
            return CU_MERGED;
1477
464k
        }
1478
1479
101k
        return CU_SPLIT;
1480
566k
    }
1481
11.6k
    else
1482
11.6k
    {
1483
11.6k
        return CU_MERGED;
1484
11.6k
    }
1485
577k
}
1486
1487
#define COPY_SEARCH_RESULT(ps_mv, pi1_ref_idx, ps_search_node, shift)                              \
1488
27.1M
    {                                                                                              \
1489
27.1M
        (ps_mv)->i2_mv_x = (ps_search_node)->s_mv.i2_mvx >> (shift);                               \
1490
27.1M
        (ps_mv)->i2_mv_y = (ps_search_node)->s_mv.i2_mvy >> (shift);                               \
1491
27.1M
        *(pi1_ref_idx) = (ps_search_node)->i1_ref_idx;                                             \
1492
27.1M
    }
1493
1494
/**
1495
********************************************************************************
1496
*  @fn     hme_update_mv_bank_noencode(search_results_t *ps_search_results,
1497
*                               layer_mv_t *ps_layer_mv,
1498
*                               S32 i4_search_blk_x,
1499
*                               S32 i4_search_blk_y,
1500
*                               mvbank_update_prms_t *ps_prms)
1501
*
1502
*  @brief  Updates the mv bank in case there is no further encodign to be done
1503
*
1504
*  @param[in]  ps_search_results: contains results for the block just searched
1505
*
1506
*  @param[in,out]  ps_layer_mv : Has pointer to mv bank amongst other things
1507
*
1508
*  @param[in] i4_search_blk_x  : col num of blk being searched
1509
*
1510
*  @param[in] i4_search_blk_y : row num of blk being searched
1511
*
1512
*  @param[in] ps_prms : contains certain parameters which govern how updatedone
1513
*
1514
*  @return None
1515
********************************************************************************
1516
*/
1517
1518
void hme_update_mv_bank_noencode(
1519
    search_results_t *ps_search_results,
1520
    layer_mv_t *ps_layer_mv,
1521
    S32 i4_search_blk_x,
1522
    S32 i4_search_blk_y,
1523
    mvbank_update_prms_t *ps_prms)
1524
20.4k
{
1525
20.4k
    hme_mv_t *ps_mv;
1526
20.4k
    hme_mv_t *ps_mv1, *ps_mv2, *ps_mv3, *ps_mv4;
1527
20.4k
    S08 *pi1_ref_idx, *pi1_ref_idx1, *pi1_ref_idx2, *pi1_ref_idx3, *pi1_ref_idx4;
1528
20.4k
    S32 i4_blk_x, i4_blk_y, i4_offset;
1529
20.4k
    S32 i4_j, i4_ref_id;
1530
20.4k
    search_node_t *ps_search_node;
1531
20.4k
    search_node_t *ps_search_node_8x8, *ps_search_node_4x4_1;
1532
20.4k
    search_node_t *ps_search_node_4x4_2, *ps_search_node_4x4_3;
1533
20.4k
    search_node_t *ps_search_node_4x4_4;
1534
1535
20.4k
    i4_blk_x = i4_search_blk_x << ps_prms->i4_shift;
1536
20.4k
    i4_blk_y = i4_search_blk_y << ps_prms->i4_shift;
1537
20.4k
    i4_offset = i4_blk_x + i4_blk_y * ps_layer_mv->i4_num_blks_per_row;
1538
1539
20.4k
    i4_offset *= ps_layer_mv->i4_num_mvs_per_blk;
1540
1541
    /* Identify the correct offset in the mvbank and the reference id buf */
1542
20.4k
    ps_mv = ps_layer_mv->ps_mv + i4_offset;
1543
20.4k
    pi1_ref_idx = ps_layer_mv->pi1_ref_idx + i4_offset;
1544
1545
    /*************************************************************************/
1546
    /* Supposing we store the mvs in the same blk size as we searched (e.g.  */
1547
    /* we searched 8x8 blks and store results for 8x8 blks), then we can     */
1548
    /* do a straightforward single update of results. This will have a 1-1   */
1549
    /* correspondence.                                                       */
1550
    /*************************************************************************/
1551
20.4k
    if(ps_layer_mv->e_blk_size == ps_prms->e_search_blk_size)
1552
9.23k
    {
1553
21.8k
        for(i4_ref_id = 0; i4_ref_id < (S32)ps_prms->i4_num_ref; i4_ref_id++)
1554
12.6k
        {
1555
12.6k
            ps_search_node = ps_search_results->aps_part_results[i4_ref_id][PART_ID_2Nx2N];
1556
25.3k
            for(i4_j = 0; i4_j < ps_layer_mv->i4_num_mvs_per_ref; i4_j++)
1557
12.6k
            {
1558
12.6k
                COPY_SEARCH_RESULT(ps_mv, pi1_ref_idx, ps_search_node, 0);
1559
12.6k
                ps_mv++;
1560
12.6k
                pi1_ref_idx++;
1561
12.6k
                ps_search_node++;
1562
12.6k
            }
1563
12.6k
        }
1564
9.23k
        return;
1565
9.23k
    }
1566
1567
    /*************************************************************************/
1568
    /* Case where search blk size is 8x8, but we update 4x4 results. In this */
1569
    /* case, we need to have NxN partitions enabled in search.               */
1570
    /* Further, we update on a 1-1 basis the 4x4 blk mvs from the respective */
1571
    /* NxN partition. We also update the 8x8 result into each of the 4x4 bank*/
1572
    /*************************************************************************/
1573
11.2k
    ASSERT(ps_layer_mv->e_blk_size == BLK_4x4);
1574
11.2k
    ASSERT(ps_prms->e_search_blk_size == BLK_8x8);
1575
11.2k
    ASSERT((ps_search_results->i4_part_mask & (ENABLE_NxN)) == (ENABLE_NxN));
1576
1577
    /*************************************************************************/
1578
    /* For every 4x4 blk we store corresponding 4x4 results and 1 8x8 result */
1579
    /* hence the below check.                                                */
1580
    /*************************************************************************/
1581
11.2k
    ASSERT(ps_layer_mv->i4_num_mvs_per_ref <= ps_search_results->u1_num_results_per_part + 1);
1582
1583
11.2k
    ps_mv1 = ps_mv;
1584
11.2k
    ps_mv2 = ps_mv1 + ps_layer_mv->i4_num_mvs_per_blk;
1585
11.2k
    ps_mv3 = ps_mv1 + (ps_layer_mv->i4_num_mvs_per_row);
1586
11.2k
    ps_mv4 = ps_mv3 + (ps_layer_mv->i4_num_mvs_per_blk);
1587
11.2k
    pi1_ref_idx1 = pi1_ref_idx;
1588
11.2k
    pi1_ref_idx2 = pi1_ref_idx1 + ps_layer_mv->i4_num_mvs_per_blk;
1589
11.2k
    pi1_ref_idx3 = pi1_ref_idx1 + (ps_layer_mv->i4_num_mvs_per_row);
1590
11.2k
    pi1_ref_idx4 = pi1_ref_idx3 + (ps_layer_mv->i4_num_mvs_per_blk);
1591
1592
32.6k
    for(i4_ref_id = 0; i4_ref_id < (S32)ps_search_results->u1_num_active_ref; i4_ref_id++)
1593
21.3k
    {
1594
21.3k
        ps_search_node_8x8 = ps_search_results->aps_part_results[i4_ref_id][PART_ID_2Nx2N];
1595
1596
21.3k
        ps_search_node_4x4_1 = ps_search_results->aps_part_results[i4_ref_id][PART_ID_NxN_TL];
1597
1598
21.3k
        ps_search_node_4x4_2 = ps_search_results->aps_part_results[i4_ref_id][PART_ID_NxN_TR];
1599
1600
21.3k
        ps_search_node_4x4_3 = ps_search_results->aps_part_results[i4_ref_id][PART_ID_NxN_BL];
1601
1602
21.3k
        ps_search_node_4x4_4 = ps_search_results->aps_part_results[i4_ref_id][PART_ID_NxN_BR];
1603
1604
21.3k
        COPY_SEARCH_RESULT(ps_mv1, pi1_ref_idx1, ps_search_node_4x4_1, 0);
1605
21.3k
        ps_mv1++;
1606
21.3k
        pi1_ref_idx1++;
1607
21.3k
        ps_search_node_4x4_1++;
1608
21.3k
        COPY_SEARCH_RESULT(ps_mv2, pi1_ref_idx2, ps_search_node_4x4_2, 0);
1609
21.3k
        ps_mv2++;
1610
21.3k
        pi1_ref_idx2++;
1611
21.3k
        ps_search_node_4x4_2++;
1612
21.3k
        COPY_SEARCH_RESULT(ps_mv3, pi1_ref_idx3, ps_search_node_4x4_3, 0);
1613
21.3k
        ps_mv3++;
1614
21.3k
        pi1_ref_idx3++;
1615
21.3k
        ps_search_node_4x4_3++;
1616
21.3k
        COPY_SEARCH_RESULT(ps_mv4, pi1_ref_idx4, ps_search_node_4x4_4, 0);
1617
21.3k
        ps_mv4++;
1618
21.3k
        pi1_ref_idx4++;
1619
21.3k
        ps_search_node_4x4_4++;
1620
1621
21.3k
        if(ps_layer_mv->i4_num_mvs_per_ref > 1)
1622
18.4k
        {
1623
18.4k
            COPY_SEARCH_RESULT(ps_mv1, pi1_ref_idx1, ps_search_node_8x8, 0);
1624
18.4k
            ps_mv1++;
1625
18.4k
            pi1_ref_idx1++;
1626
18.4k
            COPY_SEARCH_RESULT(ps_mv2, pi1_ref_idx2, ps_search_node_8x8, 0);
1627
18.4k
            ps_mv2++;
1628
18.4k
            pi1_ref_idx2++;
1629
18.4k
            COPY_SEARCH_RESULT(ps_mv3, pi1_ref_idx3, ps_search_node_8x8, 0);
1630
18.4k
            ps_mv3++;
1631
18.4k
            pi1_ref_idx3++;
1632
18.4k
            COPY_SEARCH_RESULT(ps_mv4, pi1_ref_idx4, ps_search_node_8x8, 0);
1633
18.4k
            ps_mv4++;
1634
18.4k
            pi1_ref_idx4++;
1635
18.4k
        }
1636
1637
21.3k
        for(i4_j = 2; i4_j < ps_layer_mv->i4_num_mvs_per_ref; i4_j++)
1638
0
        {
1639
0
            COPY_SEARCH_RESULT(ps_mv1, pi1_ref_idx1, ps_search_node_4x4_1, 0);
1640
0
            ps_mv1++;
1641
0
            pi1_ref_idx1++;
1642
0
            ps_search_node_4x4_1++;
1643
0
            COPY_SEARCH_RESULT(ps_mv2, pi1_ref_idx2, ps_search_node_4x4_2, 0);
1644
0
            ps_mv2++;
1645
0
            pi1_ref_idx2++;
1646
0
            ps_search_node_4x4_2++;
1647
0
            COPY_SEARCH_RESULT(ps_mv3, pi1_ref_idx3, ps_search_node_4x4_3, 0);
1648
0
            ps_mv3++;
1649
0
            pi1_ref_idx3++;
1650
0
            ps_search_node_4x4_3++;
1651
0
            COPY_SEARCH_RESULT(ps_mv4, pi1_ref_idx4, ps_search_node_4x4_4, 0);
1652
0
            ps_mv4++;
1653
0
            pi1_ref_idx4++;
1654
0
            ps_search_node_4x4_4++;
1655
0
        }
1656
21.3k
    }
1657
11.2k
}
1658
1659
void hme_update_mv_bank_encode(
1660
    search_results_t *ps_search_results,
1661
    layer_mv_t *ps_layer_mv,
1662
    S32 i4_search_blk_x,
1663
    S32 i4_search_blk_y,
1664
    mvbank_update_prms_t *ps_prms,
1665
    U08 *pu1_pred_dir_searched,
1666
    S32 i4_num_act_ref_l0)
1667
2.27M
{
1668
2.27M
    hme_mv_t *ps_mv;
1669
2.27M
    hme_mv_t *ps_mv1, *ps_mv2, *ps_mv3, *ps_mv4;
1670
2.27M
    S08 *pi1_ref_idx, *pi1_ref_idx1, *pi1_ref_idx2, *pi1_ref_idx3, *pi1_ref_idx4;
1671
2.27M
    S32 i4_blk_x, i4_blk_y, i4_offset;
1672
2.27M
    S32 j, i, num_parts;
1673
2.27M
    search_node_t *ps_search_node_tl, *ps_search_node_tr;
1674
2.27M
    search_node_t *ps_search_node_bl, *ps_search_node_br;
1675
2.27M
    search_node_t s_zero_mv;
1676
2.27M
    WORD32 i4_part_type = ps_search_results->ps_cu_results->ps_best_results[0].u1_part_type;
1677
1678
2.27M
    i4_blk_x = i4_search_blk_x << ps_prms->i4_shift;
1679
2.27M
    i4_blk_y = i4_search_blk_y << ps_prms->i4_shift;
1680
2.27M
    i4_offset = i4_blk_x + i4_blk_y * ps_layer_mv->i4_num_blks_per_row;
1681
1682
2.27M
    i4_offset *= ps_layer_mv->i4_num_mvs_per_blk;
1683
1684
    /* Identify the correct offset in the mvbank and the reference id buf */
1685
2.27M
    ps_mv = ps_layer_mv->ps_mv + i4_offset;
1686
2.27M
    pi1_ref_idx = ps_layer_mv->pi1_ref_idx + i4_offset;
1687
1688
2.27M
    ASSERT(ps_layer_mv->e_blk_size == BLK_8x8);
1689
2.27M
    ASSERT(ps_prms->e_search_blk_size == BLK_16x16);
1690
1691
    /*************************************************************************/
1692
    /* For every 4x4 blk we store corresponding 4x4 results and 1 8x8 result */
1693
    /* hence the below check.                                                */
1694
    /*************************************************************************/
1695
2.27M
    ASSERT(ps_layer_mv->i4_num_mvs_per_ref <= ps_search_results->u1_num_best_results);
1696
1697
2.27M
    ps_mv1 = ps_mv;
1698
2.27M
    ps_mv2 = ps_mv1 + ps_layer_mv->i4_num_mvs_per_blk;
1699
2.27M
    ps_mv3 = ps_mv1 + (ps_layer_mv->i4_num_mvs_per_row);
1700
2.27M
    ps_mv4 = ps_mv3 + (ps_layer_mv->i4_num_mvs_per_blk);
1701
2.27M
    pi1_ref_idx1 = pi1_ref_idx;
1702
2.27M
    pi1_ref_idx2 = pi1_ref_idx1 + ps_layer_mv->i4_num_mvs_per_blk;
1703
2.27M
    pi1_ref_idx3 = pi1_ref_idx1 + (ps_layer_mv->i4_num_mvs_per_row);
1704
2.27M
    pi1_ref_idx4 = pi1_ref_idx3 + (ps_layer_mv->i4_num_mvs_per_blk);
1705
1706
    /* Initialize zero mv: default mv used for intra mvs */
1707
2.27M
    s_zero_mv.s_mv.i2_mvx = 0;
1708
2.27M
    s_zero_mv.s_mv.i2_mvy = 0;
1709
2.27M
    s_zero_mv.i1_ref_idx = 0;
1710
1711
2.27M
    if((ps_search_results->e_cu_size == CU_16x16) && (ps_search_results->u1_split_flag) &&
1712
2.27M
       (ps_search_results->i4_part_mask & ENABLE_NxN))
1713
84.8k
    {
1714
84.8k
        i4_part_type = PRT_NxN;
1715
84.8k
    }
1716
1717
5.04M
    for(i = 0; i < ps_prms->i4_num_ref; i++)
1718
2.77M
    {
1719
5.55M
        for(j = 0; j < ps_layer_mv->i4_num_mvs_per_ref; j++)
1720
2.77M
        {
1721
2.77M
            WORD32 i4_part_id = ge_part_type_to_part_id[i4_part_type][0];
1722
1723
2.77M
            num_parts = gau1_num_parts_in_part_type[i4_part_type];
1724
1725
2.77M
            ps_search_node_tl =
1726
2.77M
                ps_search_results->aps_part_results[pu1_pred_dir_searched[i]][i4_part_id];
1727
1728
2.77M
            if(num_parts == 1)
1729
2.65M
            {
1730
2.65M
                ps_search_node_tr = ps_search_node_tl;
1731
2.65M
                ps_search_node_bl = ps_search_node_tl;
1732
2.65M
                ps_search_node_br = ps_search_node_tl;
1733
2.65M
            }
1734
122k
            else if(num_parts == 2)
1735
23.1k
            {
1736
                /* For vertically oriented partitions, tl, bl pt to same result */
1737
                /* For horizontally oriented partition, tl, tr pt to same result */
1738
                /* This means for AMP, 2 of the 8x8 blks in mv bank have ambiguous */
1739
                /* result, e.g. for 4x16L. Here left 2 8x8 have the 4x16L partition */
1740
                /* and right 2 8x8 have 12x16R partition */
1741
23.1k
                if(gau1_is_vert_part[i4_part_type])
1742
13.7k
                {
1743
13.7k
                    ps_search_node_tr =
1744
13.7k
                        ps_search_results
1745
13.7k
                            ->aps_part_results[pu1_pred_dir_searched[i]][i4_part_id + 1];
1746
13.7k
                    ps_search_node_bl = ps_search_node_tl;
1747
13.7k
                }
1748
9.31k
                else
1749
9.31k
                {
1750
9.31k
                    ps_search_node_tr = ps_search_node_tl;
1751
9.31k
                    ps_search_node_bl =
1752
9.31k
                        ps_search_results
1753
9.31k
                            ->aps_part_results[pu1_pred_dir_searched[i]][i4_part_id + 1];
1754
9.31k
                }
1755
23.1k
                ps_search_node_br =
1756
23.1k
                    ps_search_results->aps_part_results[pu1_pred_dir_searched[i]][i4_part_id + 1];
1757
23.1k
            }
1758
99.7k
            else
1759
99.7k
            {
1760
                /* 4 unique results */
1761
99.7k
                ps_search_node_tr =
1762
99.7k
                    ps_search_results->aps_part_results[pu1_pred_dir_searched[i]][i4_part_id + 1];
1763
99.7k
                ps_search_node_bl =
1764
99.7k
                    ps_search_results->aps_part_results[pu1_pred_dir_searched[i]][i4_part_id + 2];
1765
99.7k
                ps_search_node_br =
1766
99.7k
                    ps_search_results->aps_part_results[pu1_pred_dir_searched[i]][i4_part_id + 3];
1767
99.7k
            }
1768
1769
2.77M
            if(ps_search_node_tl->s_mv.i2_mvx == INTRA_MV)
1770
0
                ps_search_node_tl++;
1771
2.77M
            if(ps_search_node_tr->s_mv.i2_mvx == INTRA_MV)
1772
0
                ps_search_node_tr++;
1773
2.77M
            if(ps_search_node_bl->s_mv.i2_mvx == INTRA_MV)
1774
0
                ps_search_node_bl++;
1775
2.77M
            if(ps_search_node_br->s_mv.i2_mvx == INTRA_MV)
1776
0
                ps_search_node_br++;
1777
1778
2.77M
            COPY_SEARCH_RESULT(ps_mv1, pi1_ref_idx1, ps_search_node_tl, 0);
1779
2.77M
            ps_mv1++;
1780
2.77M
            pi1_ref_idx1++;
1781
2.77M
            COPY_SEARCH_RESULT(ps_mv2, pi1_ref_idx2, ps_search_node_tr, 0);
1782
2.77M
            ps_mv2++;
1783
2.77M
            pi1_ref_idx2++;
1784
2.77M
            COPY_SEARCH_RESULT(ps_mv3, pi1_ref_idx3, ps_search_node_bl, 0);
1785
2.77M
            ps_mv3++;
1786
2.77M
            pi1_ref_idx3++;
1787
2.77M
            COPY_SEARCH_RESULT(ps_mv4, pi1_ref_idx4, ps_search_node_br, 0);
1788
2.77M
            ps_mv4++;
1789
2.77M
            pi1_ref_idx4++;
1790
1791
2.77M
            if(ps_prms->i4_num_results_to_store > 1)
1792
0
            {
1793
0
                ps_search_node_tl =
1794
0
                    &ps_search_results->aps_part_results[pu1_pred_dir_searched[i]][i4_part_id][1];
1795
1796
0
                if(num_parts == 1)
1797
0
                {
1798
0
                    ps_search_node_tr = ps_search_node_tl;
1799
0
                    ps_search_node_bl = ps_search_node_tl;
1800
0
                    ps_search_node_br = ps_search_node_tl;
1801
0
                }
1802
0
                else if(num_parts == 2)
1803
0
                {
1804
                    /* For vertically oriented partitions, tl, bl pt to same result */
1805
                    /* For horizontally oriented partition, tl, tr pt to same result */
1806
                    /* This means for AMP, 2 of the 8x8 blks in mv bank have ambiguous */
1807
                    /* result, e.g. for 4x16L. Here left 2 8x8 have the 4x16L partition */
1808
                    /* and right 2 8x8 have 12x16R partition */
1809
0
                    if(gau1_is_vert_part[i4_part_type])
1810
0
                    {
1811
0
                        ps_search_node_tr =
1812
0
                            &ps_search_results
1813
0
                                 ->aps_part_results[pu1_pred_dir_searched[i]][i4_part_id + 1][1];
1814
0
                        ps_search_node_bl = ps_search_node_tl;
1815
0
                    }
1816
0
                    else
1817
0
                    {
1818
0
                        ps_search_node_tr = ps_search_node_tl;
1819
0
                        ps_search_node_bl =
1820
0
                            &ps_search_results
1821
0
                                 ->aps_part_results[pu1_pred_dir_searched[i]][i4_part_id + 1][1];
1822
0
                    }
1823
0
                    ps_search_node_br =
1824
0
                        &ps_search_results
1825
0
                             ->aps_part_results[pu1_pred_dir_searched[i]][i4_part_id + 1][1];
1826
0
                }
1827
0
                else
1828
0
                {
1829
                    /* 4 unique results */
1830
0
                    ps_search_node_tr =
1831
0
                        &ps_search_results
1832
0
                             ->aps_part_results[pu1_pred_dir_searched[i]][i4_part_id + 1][1];
1833
0
                    ps_search_node_bl =
1834
0
                        &ps_search_results
1835
0
                             ->aps_part_results[pu1_pred_dir_searched[i]][i4_part_id + 2][1];
1836
0
                    ps_search_node_br =
1837
0
                        &ps_search_results
1838
0
                             ->aps_part_results[pu1_pred_dir_searched[i]][i4_part_id + 3][1];
1839
0
                }
1840
1841
0
                if(ps_search_node_tl->s_mv.i2_mvx == INTRA_MV)
1842
0
                    ps_search_node_tl++;
1843
0
                if(ps_search_node_tr->s_mv.i2_mvx == INTRA_MV)
1844
0
                    ps_search_node_tr++;
1845
0
                if(ps_search_node_bl->s_mv.i2_mvx == INTRA_MV)
1846
0
                    ps_search_node_bl++;
1847
0
                if(ps_search_node_br->s_mv.i2_mvx == INTRA_MV)
1848
0
                    ps_search_node_br++;
1849
1850
0
                COPY_SEARCH_RESULT(ps_mv1, pi1_ref_idx1, ps_search_node_tl, 0);
1851
0
                ps_mv1++;
1852
0
                pi1_ref_idx1++;
1853
0
                COPY_SEARCH_RESULT(ps_mv2, pi1_ref_idx2, ps_search_node_tr, 0);
1854
0
                ps_mv2++;
1855
0
                pi1_ref_idx2++;
1856
0
                COPY_SEARCH_RESULT(ps_mv3, pi1_ref_idx3, ps_search_node_bl, 0);
1857
0
                ps_mv3++;
1858
0
                pi1_ref_idx3++;
1859
0
                COPY_SEARCH_RESULT(ps_mv4, pi1_ref_idx4, ps_search_node_br, 0);
1860
0
                ps_mv4++;
1861
0
                pi1_ref_idx4++;
1862
0
            }
1863
2.77M
        }
1864
2.77M
    }
1865
2.27M
}
1866
1867
/**
1868
********************************************************************************
1869
*  @fn     hme_update_mv_bank_noencode(search_results_t *ps_search_results,
1870
*                               layer_mv_t *ps_layer_mv,
1871
*                               S32 i4_search_blk_x,
1872
*                               S32 i4_search_blk_y,
1873
*                               mvbank_update_prms_t *ps_prms)
1874
*
1875
*  @brief  Updates the mv bank in case there is no further encodign to be done
1876
*
1877
*  @param[in]  ps_search_results: contains results for the block just searched
1878
*
1879
*  @param[in,out]  ps_layer_mv : Has pointer to mv bank amongst other things
1880
*
1881
*  @param[in] i4_search_blk_x  : col num of blk being searched
1882
*
1883
*  @param[in] i4_search_blk_y : row num of blk being searched
1884
*
1885
*  @param[in] ps_prms : contains certain parameters which govern how updatedone
1886
*
1887
*  @return None
1888
********************************************************************************
1889
*/
1890
1891
void hme_update_mv_bank_in_l1_me(
1892
    search_results_t *ps_search_results,
1893
    layer_mv_t *ps_layer_mv,
1894
    S32 i4_search_blk_x,
1895
    S32 i4_search_blk_y,
1896
    mvbank_update_prms_t *ps_prms)
1897
2.29M
{
1898
2.29M
    hme_mv_t *ps_mv;
1899
2.29M
    hme_mv_t *ps_mv1, *ps_mv2, *ps_mv3, *ps_mv4;
1900
2.29M
    S08 *pi1_ref_idx, *pi1_ref_idx1, *pi1_ref_idx2, *pi1_ref_idx3, *pi1_ref_idx4;
1901
2.29M
    S32 i4_blk_x, i4_blk_y, i4_offset;
1902
2.29M
    S32 i4_j, i4_ref_id;
1903
2.29M
    search_node_t *ps_search_node;
1904
2.29M
    search_node_t *ps_search_node_8x8, *ps_search_node_4x4;
1905
1906
2.29M
    i4_blk_x = i4_search_blk_x << ps_prms->i4_shift;
1907
2.29M
    i4_blk_y = i4_search_blk_y << ps_prms->i4_shift;
1908
2.29M
    i4_offset = i4_blk_x + i4_blk_y * ps_layer_mv->i4_num_blks_per_row;
1909
1910
2.29M
    i4_offset *= ps_layer_mv->i4_num_mvs_per_blk;
1911
1912
    /* Identify the correct offset in the mvbank and the reference id buf */
1913
2.29M
    ps_mv = ps_layer_mv->ps_mv + i4_offset;
1914
2.29M
    pi1_ref_idx = ps_layer_mv->pi1_ref_idx + i4_offset;
1915
1916
    /*************************************************************************/
1917
    /* Supposing we store the mvs in the same blk size as we searched (e.g.  */
1918
    /* we searched 8x8 blks and store results for 8x8 blks), then we can     */
1919
    /* do a straightforward single update of results. This will have a 1-1   */
1920
    /* correspondence.                                                       */
1921
    /*************************************************************************/
1922
2.29M
    if(ps_layer_mv->e_blk_size == ps_prms->e_search_blk_size)
1923
1.25M
    {
1924
1.25M
        search_node_t *aps_result_nodes_sorted[2][MAX_NUM_REF * 2];
1925
1926
1.25M
        hme_mv_t *ps_mv_l0_root = ps_mv;
1927
1.25M
        hme_mv_t *ps_mv_l1_root =
1928
1.25M
            ps_mv + (ps_prms->i4_num_active_ref_l0 * ps_layer_mv->i4_num_mvs_per_ref);
1929
1930
1.25M
        U32 u4_num_l0_results_updated = 0;
1931
1.25M
        U32 u4_num_l1_results_updated = 0;
1932
1933
1.25M
        S08 *pi1_ref_idx_l0_root = pi1_ref_idx;
1934
1.25M
        S08 *pi1_ref_idx_l1_root =
1935
1.25M
            pi1_ref_idx_l0_root + (ps_prms->i4_num_active_ref_l0 * ps_layer_mv->i4_num_mvs_per_ref);
1936
1937
3.15M
        for(i4_ref_id = 0; i4_ref_id < (S32)ps_prms->i4_num_ref; i4_ref_id++)
1938
1.89M
        {
1939
1.89M
            U32 *pu4_num_results_updated;
1940
1.89M
            search_node_t **pps_result_nodes;
1941
1942
1.89M
            U08 u1_pred_dir_of_cur_ref = !ps_search_results->pu1_is_past[i4_ref_id];
1943
1944
1.89M
            if(u1_pred_dir_of_cur_ref)
1945
270k
            {
1946
270k
                pu4_num_results_updated = &u4_num_l1_results_updated;
1947
270k
                pps_result_nodes = &aps_result_nodes_sorted[1][0];
1948
270k
            }
1949
1.62M
            else
1950
1.62M
            {
1951
1.62M
                pu4_num_results_updated = &u4_num_l0_results_updated;
1952
1.62M
                pps_result_nodes = &aps_result_nodes_sorted[0][0];
1953
1.62M
            }
1954
1955
1.89M
            ps_search_node = ps_search_results->aps_part_results[i4_ref_id][PART_ID_2Nx2N];
1956
1957
4.46M
            for(i4_j = 0; i4_j < ps_layer_mv->i4_num_mvs_per_ref; i4_j++)
1958
2.56M
            {
1959
2.56M
                hme_add_new_node_to_a_sorted_array(
1960
2.56M
                    &ps_search_node[i4_j], pps_result_nodes, NULL, *pu4_num_results_updated, 0);
1961
1962
2.56M
                ASSERT(ps_search_node[i4_j].i1_ref_idx == i4_ref_id);
1963
2.56M
                (*pu4_num_results_updated)++;
1964
2.56M
            }
1965
1.89M
        }
1966
1967
3.50M
        for(i4_j = 0; i4_j < (S32)u4_num_l0_results_updated; i4_j++)
1968
2.24M
        {
1969
2.24M
            COPY_SEARCH_RESULT(
1970
2.24M
                &ps_mv_l0_root[i4_j],
1971
2.24M
                &pi1_ref_idx_l0_root[i4_j],
1972
2.24M
                aps_result_nodes_sorted[0][i4_j],
1973
2.24M
                0);
1974
2.24M
        }
1975
1976
1.57M
        for(i4_j = 0; i4_j < (S32)u4_num_l1_results_updated; i4_j++)
1977
315k
        {
1978
315k
            COPY_SEARCH_RESULT(
1979
315k
                &ps_mv_l1_root[i4_j],
1980
315k
                &pi1_ref_idx_l1_root[i4_j],
1981
315k
                aps_result_nodes_sorted[1][i4_j],
1982
315k
                0);
1983
315k
        }
1984
1985
1.25M
        return;
1986
1.25M
    }
1987
1988
    /*************************************************************************/
1989
    /* Case where search blk size is 8x8, but we update 4x4 results. In this */
1990
    /* case, we need to have NxN partitions enabled in search.               */
1991
    /* Further, we update on a 1-1 basis the 4x4 blk mvs from the respective */
1992
    /* NxN partition. We also update the 8x8 result into each of the 4x4 bank*/
1993
    /*************************************************************************/
1994
1.03M
    ASSERT(ps_layer_mv->e_blk_size == BLK_4x4);
1995
1.03M
    ASSERT(ps_prms->e_search_blk_size == BLK_8x8);
1996
1.03M
    ASSERT((ps_search_results->i4_part_mask & (ENABLE_NxN)) == (ENABLE_NxN));
1997
1998
    /*************************************************************************/
1999
    /* For every 4x4 blk we store corresponding 4x4 results and 1 8x8 result */
2000
    /* hence the below check.                                                */
2001
    /*************************************************************************/
2002
1.03M
    ASSERT(ps_layer_mv->i4_num_mvs_per_ref <= ps_search_results->u1_num_results_per_part + 1);
2003
2004
1.03M
    ps_mv1 = ps_mv;
2005
1.03M
    ps_mv2 = ps_mv1 + ps_layer_mv->i4_num_mvs_per_blk;
2006
1.03M
    ps_mv3 = ps_mv1 + (ps_layer_mv->i4_num_mvs_per_row);
2007
1.03M
    ps_mv4 = ps_mv3 + (ps_layer_mv->i4_num_mvs_per_blk);
2008
1.03M
    pi1_ref_idx1 = pi1_ref_idx;
2009
1.03M
    pi1_ref_idx2 = pi1_ref_idx1 + ps_layer_mv->i4_num_mvs_per_blk;
2010
1.03M
    pi1_ref_idx3 = pi1_ref_idx1 + (ps_layer_mv->i4_num_mvs_per_row);
2011
1.03M
    pi1_ref_idx4 = pi1_ref_idx3 + (ps_layer_mv->i4_num_mvs_per_blk);
2012
2013
1.03M
    {
2014
        /* max ref frames * max results per partition * number of partitions (4x4, 8x8) */
2015
1.03M
        search_node_t *aps_result_nodes_sorted[2][MAX_NUM_REF * MAX_RESULTS_PER_PART * 2];
2016
1.03M
        U08 au1_cost_shifts_for_sorted_node[2][MAX_NUM_REF * MAX_RESULTS_PER_PART * 2];
2017
2018
1.03M
        S32 i;
2019
2020
1.03M
        hme_mv_t *ps_mv1_l0_root = ps_mv1;
2021
1.03M
        hme_mv_t *ps_mv1_l1_root =
2022
1.03M
            ps_mv1 + (ps_prms->i4_num_active_ref_l0 * ps_layer_mv->i4_num_mvs_per_ref);
2023
1.03M
        hme_mv_t *ps_mv2_l0_root = ps_mv2;
2024
1.03M
        hme_mv_t *ps_mv2_l1_root =
2025
1.03M
            ps_mv2 + (ps_prms->i4_num_active_ref_l0 * ps_layer_mv->i4_num_mvs_per_ref);
2026
1.03M
        hme_mv_t *ps_mv3_l0_root = ps_mv3;
2027
1.03M
        hme_mv_t *ps_mv3_l1_root =
2028
1.03M
            ps_mv3 + (ps_prms->i4_num_active_ref_l0 * ps_layer_mv->i4_num_mvs_per_ref);
2029
1.03M
        hme_mv_t *ps_mv4_l0_root = ps_mv4;
2030
1.03M
        hme_mv_t *ps_mv4_l1_root =
2031
1.03M
            ps_mv4 + (ps_prms->i4_num_active_ref_l0 * ps_layer_mv->i4_num_mvs_per_ref);
2032
2033
1.03M
        U32 u4_num_l0_results_updated = 0;
2034
1.03M
        U32 u4_num_l1_results_updated = 0;
2035
2036
1.03M
        S08 *pi1_ref_idx1_l0_root = pi1_ref_idx1;
2037
1.03M
        S08 *pi1_ref_idx1_l1_root = pi1_ref_idx1_l0_root + (ps_prms->i4_num_active_ref_l0 *
2038
1.03M
                                                            ps_layer_mv->i4_num_mvs_per_ref);
2039
1.03M
        S08 *pi1_ref_idx2_l0_root = pi1_ref_idx2;
2040
1.03M
        S08 *pi1_ref_idx2_l1_root = pi1_ref_idx2_l0_root + (ps_prms->i4_num_active_ref_l0 *
2041
1.03M
                                                            ps_layer_mv->i4_num_mvs_per_ref);
2042
1.03M
        S08 *pi1_ref_idx3_l0_root = pi1_ref_idx3;
2043
1.03M
        S08 *pi1_ref_idx3_l1_root = pi1_ref_idx3_l0_root + (ps_prms->i4_num_active_ref_l0 *
2044
1.03M
                                                            ps_layer_mv->i4_num_mvs_per_ref);
2045
1.03M
        S08 *pi1_ref_idx4_l0_root = pi1_ref_idx4;
2046
1.03M
        S08 *pi1_ref_idx4_l1_root = pi1_ref_idx4_l0_root + (ps_prms->i4_num_active_ref_l0 *
2047
1.03M
                                                            ps_layer_mv->i4_num_mvs_per_ref);
2048
2049
5.18M
        for(i = 0; i < 4; i++)
2050
4.15M
        {
2051
4.15M
            hme_mv_t *ps_mv_l0_root;
2052
4.15M
            hme_mv_t *ps_mv_l1_root;
2053
2054
4.15M
            S08 *pi1_ref_idx_l0_root;
2055
4.15M
            S08 *pi1_ref_idx_l1_root;
2056
2057
14.8M
            for(i4_ref_id = 0; i4_ref_id < ps_search_results->u1_num_active_ref; i4_ref_id++)
2058
10.7M
            {
2059
10.7M
                U32 *pu4_num_results_updated;
2060
10.7M
                search_node_t **pps_result_nodes;
2061
10.7M
                U08 *pu1_cost_shifts_for_sorted_node;
2062
2063
10.7M
                U08 u1_pred_dir_of_cur_ref = !ps_search_results->pu1_is_past[i4_ref_id];
2064
2065
10.7M
                if(u1_pred_dir_of_cur_ref)
2066
1.29M
                {
2067
1.29M
                    pu4_num_results_updated = &u4_num_l1_results_updated;
2068
1.29M
                    pps_result_nodes = &aps_result_nodes_sorted[1][0];
2069
1.29M
                    pu1_cost_shifts_for_sorted_node = &au1_cost_shifts_for_sorted_node[1][0];
2070
1.29M
                }
2071
9.44M
                else
2072
9.44M
                {
2073
9.44M
                    pu4_num_results_updated = &u4_num_l0_results_updated;
2074
9.44M
                    pps_result_nodes = &aps_result_nodes_sorted[0][0];
2075
9.44M
                    pu1_cost_shifts_for_sorted_node = &au1_cost_shifts_for_sorted_node[1][0];
2076
9.44M
                }
2077
2078
10.7M
                ps_search_node_8x8 = ps_search_results->aps_part_results[i4_ref_id][PART_ID_2Nx2N];
2079
2080
10.7M
                ps_search_node_4x4 =
2081
10.7M
                    ps_search_results->aps_part_results[i4_ref_id][PART_ID_NxN_TL + i];
2082
2083
24.0M
                for(i4_j = 0; i4_j < ps_layer_mv->i4_num_mvs_per_ref; i4_j++)
2084
13.2M
                {
2085
13.2M
                    hme_add_new_node_to_a_sorted_array(
2086
13.2M
                        &ps_search_node_4x4[i4_j],
2087
13.2M
                        pps_result_nodes,
2088
13.2M
                        pu1_cost_shifts_for_sorted_node,
2089
13.2M
                        *pu4_num_results_updated,
2090
13.2M
                        0);
2091
2092
13.2M
                    (*pu4_num_results_updated)++;
2093
2094
13.2M
                    hme_add_new_node_to_a_sorted_array(
2095
13.2M
                        &ps_search_node_8x8[i4_j],
2096
13.2M
                        pps_result_nodes,
2097
13.2M
                        pu1_cost_shifts_for_sorted_node,
2098
13.2M
                        *pu4_num_results_updated,
2099
13.2M
                        2);
2100
2101
13.2M
                    (*pu4_num_results_updated)++;
2102
13.2M
                }
2103
10.7M
            }
2104
2105
4.15M
            switch(i)
2106
4.15M
            {
2107
1.03M
            case 0:
2108
1.03M
            {
2109
1.03M
                ps_mv_l0_root = ps_mv1_l0_root;
2110
1.03M
                ps_mv_l1_root = ps_mv1_l1_root;
2111
2112
1.03M
                pi1_ref_idx_l0_root = pi1_ref_idx1_l0_root;
2113
1.03M
                pi1_ref_idx_l1_root = pi1_ref_idx1_l1_root;
2114
2115
1.03M
                break;
2116
0
            }
2117
1.03M
            case 1:
2118
1.03M
            {
2119
1.03M
                ps_mv_l0_root = ps_mv2_l0_root;
2120
1.03M
                ps_mv_l1_root = ps_mv2_l1_root;
2121
2122
1.03M
                pi1_ref_idx_l0_root = pi1_ref_idx2_l0_root;
2123
1.03M
                pi1_ref_idx_l1_root = pi1_ref_idx2_l1_root;
2124
2125
1.03M
                break;
2126
0
            }
2127
1.03M
            case 2:
2128
1.03M
            {
2129
1.03M
                ps_mv_l0_root = ps_mv3_l0_root;
2130
1.03M
                ps_mv_l1_root = ps_mv3_l1_root;
2131
2132
1.03M
                pi1_ref_idx_l0_root = pi1_ref_idx3_l0_root;
2133
1.03M
                pi1_ref_idx_l1_root = pi1_ref_idx3_l1_root;
2134
2135
1.03M
                break;
2136
0
            }
2137
1.03M
            case 3:
2138
1.03M
            {
2139
1.03M
                ps_mv_l0_root = ps_mv4_l0_root;
2140
1.03M
                ps_mv_l1_root = ps_mv4_l1_root;
2141
2142
1.03M
                pi1_ref_idx_l0_root = pi1_ref_idx4_l0_root;
2143
1.03M
                pi1_ref_idx_l1_root = pi1_ref_idx4_l1_root;
2144
2145
1.03M
                break;
2146
0
            }
2147
4.15M
            }
2148
2149
4.15M
            u4_num_l0_results_updated =
2150
4.15M
                MIN((S32)u4_num_l0_results_updated,
2151
4.15M
                    ps_prms->i4_num_active_ref_l0 * ps_layer_mv->i4_num_mvs_per_ref);
2152
2153
4.15M
            u4_num_l1_results_updated =
2154
4.15M
                MIN((S32)u4_num_l1_results_updated,
2155
4.15M
                    ps_prms->i4_num_active_ref_l1 * ps_layer_mv->i4_num_mvs_per_ref);
2156
2157
15.9M
            for(i4_j = 0; i4_j < (S32)u4_num_l0_results_updated; i4_j++)
2158
11.8M
            {
2159
11.8M
                COPY_SEARCH_RESULT(
2160
11.8M
                    &ps_mv_l0_root[i4_j],
2161
11.8M
                    &pi1_ref_idx_l0_root[i4_j],
2162
11.8M
                    aps_result_nodes_sorted[0][i4_j],
2163
11.8M
                    0);
2164
11.8M
            }
2165
2166
5.60M
            for(i4_j = 0; i4_j < (S32)u4_num_l1_results_updated; i4_j++)
2167
1.45M
            {
2168
1.45M
                COPY_SEARCH_RESULT(
2169
1.45M
                    &ps_mv_l1_root[i4_j],
2170
1.45M
                    &pi1_ref_idx_l1_root[i4_j],
2171
1.45M
                    aps_result_nodes_sorted[1][i4_j],
2172
1.45M
                    0);
2173
1.45M
            }
2174
4.15M
        }
2175
1.03M
    }
2176
1.03M
}
2177
2178
/**
2179
******************************************************************************
2180
*  @brief Scales motion vector component projecte from a diff layer in same
2181
*         picture (so no ref id related delta poc scaling required)
2182
******************************************************************************
2183
*/
2184
2185
#define SCALE_MV_COMP_RES(mvcomp_p, dim_c, dim_p)                                                  \
2186
0
    ((((mvcomp_p) * (dim_c)) + ((SIGN((mvcomp_p)) * (dim_p)) >> 1)) / (dim_p))
2187
/**
2188
********************************************************************************
2189
*  @fn     hme_project_coloc_candt(search_node_t *ps_search_node,
2190
*                                   layer_ctxt_t *ps_curr_layer,
2191
*                                   layer_ctxt_t *ps_coarse_layer,
2192
*                                   S32 i4_pos_x,
2193
*                                   S32 i4_pos_y,
2194
*                                   S08 i1_ref_id,
2195
*                                   S08 i1_result_id)
2196
*
2197
*  @brief  From a coarser layer, projects a candidated situated at "colocated"
2198
*          position in the picture (e.g. given x, y it will be x/2, y/2 dyadic
2199
*
2200
*  @param[out]  ps_search_node : contains the projected result
2201
*
2202
*  @param[in]   ps_curr_layer : current layer context
2203
*
2204
*  @param[in]   ps_coarse_layer  : coarser layer context
2205
*
2206
*  @param[in]   i4_pos_x  : x Position where mv is required (w.r.t. curr layer)
2207
*
2208
*  @param[in]   i4_pos_y  : y Position where mv is required (w.r.t. curr layer)
2209
*
2210
*  @param[in]   i1_ref_id : reference id for which the candidate required
2211
*
2212
*  @param[in]   i4_result_id : result id for which the candidate required
2213
*                              (0 : best result, 1 : next best)
2214
*
2215
*  @return None
2216
********************************************************************************
2217
*/
2218
2219
void hme_project_coloc_candt(
2220
    search_node_t *ps_search_node,
2221
    layer_ctxt_t *ps_curr_layer,
2222
    layer_ctxt_t *ps_coarse_layer,
2223
    S32 i4_pos_x,
2224
    S32 i4_pos_y,
2225
    S08 i1_ref_id,
2226
    S32 i4_result_id)
2227
0
{
2228
0
    S32 wd_c, ht_c, wd_p, ht_p;
2229
0
    S32 blksize_p, blk_x, blk_y, i4_offset;
2230
0
    layer_mv_t *ps_layer_mvbank;
2231
0
    hme_mv_t *ps_mv;
2232
0
    S08 *pi1_ref_idx;
2233
2234
    /* Width and ht of current and prev layers */
2235
0
    wd_c = ps_curr_layer->i4_wd;
2236
0
    ht_c = ps_curr_layer->i4_ht;
2237
0
    wd_p = ps_coarse_layer->i4_wd;
2238
0
    ht_p = ps_coarse_layer->i4_ht;
2239
2240
0
    ps_layer_mvbank = ps_coarse_layer->ps_layer_mvbank;
2241
0
    blksize_p = (S32)gau1_blk_size_to_wd[ps_layer_mvbank->e_blk_size];
2242
2243
    /* Safety check to avoid uninitialized access across temporal layers */
2244
0
    i4_pos_x = CLIP3(i4_pos_x, 0, (wd_c - blksize_p));
2245
0
    i4_pos_y = CLIP3(i4_pos_y, 0, (ht_c - blksize_p));
2246
2247
    /* Project the positions to prev layer */
2248
    /* TODO: convert these to scale factors at pic level */
2249
0
    blk_x = (i4_pos_x * wd_p) / (wd_c * blksize_p);
2250
0
    blk_y = (i4_pos_y * ht_p) / (ht_c * blksize_p);
2251
2252
    /* Pick up the mvs from the location */
2253
0
    i4_offset = (blk_x * ps_layer_mvbank->i4_num_mvs_per_blk);
2254
0
    i4_offset += (ps_layer_mvbank->i4_num_mvs_per_row * blk_y);
2255
2256
0
    ps_mv = ps_layer_mvbank->ps_mv + i4_offset;
2257
0
    pi1_ref_idx = ps_layer_mvbank->pi1_ref_idx + i4_offset;
2258
2259
0
    ps_mv += (i1_ref_id * ps_layer_mvbank->i4_num_mvs_per_ref);
2260
0
    pi1_ref_idx += (i1_ref_id * ps_layer_mvbank->i4_num_mvs_per_ref);
2261
2262
0
    ps_search_node->s_mv.i2_mvx = SCALE_MV_COMP_RES(ps_mv[i4_result_id].i2_mv_x, wd_c, wd_p);
2263
0
    ps_search_node->s_mv.i2_mvy = SCALE_MV_COMP_RES(ps_mv[i4_result_id].i2_mv_y, ht_c, ht_p);
2264
0
    ps_search_node->i1_ref_idx = pi1_ref_idx[i4_result_id];
2265
0
    ps_search_node->u1_subpel_done = 0;
2266
0
    if((ps_search_node->i1_ref_idx < 0) || (ps_search_node->s_mv.i2_mvx == INTRA_MV))
2267
0
    {
2268
0
        ps_search_node->i1_ref_idx = i1_ref_id;
2269
0
        ps_search_node->s_mv.i2_mvx = 0;
2270
0
        ps_search_node->s_mv.i2_mvy = 0;
2271
0
    }
2272
0
}
2273
2274
/**
2275
********************************************************************************
2276
*  @fn     hme_project_coloc_candt_dyadic(search_node_t *ps_search_node,
2277
*                                   layer_ctxt_t *ps_curr_layer,
2278
*                                   layer_ctxt_t *ps_coarse_layer,
2279
*                                   S32 i4_pos_x,
2280
*                                   S32 i4_pos_y,
2281
*                                   S08 i1_ref_id,
2282
*                                   S08 i1_result_id)
2283
*
2284
*  @brief  From a coarser layer, projects a candidated situated at "colocated"
2285
*          position in the picture when the ratios are dyadic
2286
*
2287
*  @param[out]  ps_search_node : contains the projected result
2288
*
2289
*  @param[in]   ps_curr_layer : current layer context
2290
*
2291
*  @param[in]   ps_coarse_layer  : coarser layer context
2292
*
2293
*  @param[in]   i4_pos_x  : x Position where mv is required (w.r.t. curr layer)
2294
*
2295
*  @param[in]   i4_pos_y  : y Position where mv is required (w.r.t. curr layer)
2296
*
2297
*  @param[in]   i1_ref_id : reference id for which the candidate required
2298
*
2299
*  @param[in]   i4_result_id : result id for which the candidate required
2300
*                              (0 : best result, 1 : next best)
2301
*
2302
*  @return None
2303
********************************************************************************
2304
*/
2305
2306
void hme_project_coloc_candt_dyadic(
2307
    search_node_t *ps_search_node,
2308
    layer_ctxt_t *ps_curr_layer,
2309
    layer_ctxt_t *ps_coarse_layer,
2310
    S32 i4_pos_x,
2311
    S32 i4_pos_y,
2312
    S08 i1_ref_id,
2313
    S32 i4_result_id)
2314
49.9M
{
2315
49.9M
    S32 wd_c, ht_c, wd_p, ht_p;
2316
49.9M
    S32 blksize_p, blk_x, blk_y, i4_offset;
2317
49.9M
    layer_mv_t *ps_layer_mvbank;
2318
49.9M
    hme_mv_t *ps_mv;
2319
49.9M
    S08 *pi1_ref_idx;
2320
2321
    /* Width and ht of current and prev layers */
2322
49.9M
    wd_c = ps_curr_layer->i4_wd;
2323
49.9M
    ht_c = ps_curr_layer->i4_ht;
2324
49.9M
    wd_p = ps_coarse_layer->i4_wd;
2325
49.9M
    ht_p = ps_coarse_layer->i4_ht;
2326
2327
49.9M
    ps_layer_mvbank = ps_coarse_layer->ps_layer_mvbank;
2328
    /* blksize_p = log2(wd) + 1 */
2329
49.9M
    blksize_p = (S32)gau1_blk_size_to_wd_shift[ps_layer_mvbank->e_blk_size];
2330
2331
    /* ASSERT for valid sizes */
2332
49.9M
    ASSERT((blksize_p == 3) || (blksize_p == 4) || (blksize_p == 5));
2333
2334
    /* Safety check to avoid uninitialized access across temporal layers */
2335
49.9M
    i4_pos_x = CLIP3(i4_pos_x, 0, (wd_c - blksize_p));
2336
49.9M
    i4_pos_y = CLIP3(i4_pos_y, 0, (ht_c - blksize_p));
2337
2338
    /* Project the positions to prev layer */
2339
    /* TODO: convert these to scale factors at pic level */
2340
49.9M
    blk_x = i4_pos_x >> blksize_p;  // (2 * blksize_p);
2341
49.9M
    blk_y = i4_pos_y >> blksize_p;  // (2 * blksize_p);
2342
2343
    /* Pick up the mvs from the location */
2344
49.9M
    i4_offset = (blk_x * ps_layer_mvbank->i4_num_mvs_per_blk);
2345
49.9M
    i4_offset += (ps_layer_mvbank->i4_num_mvs_per_row * blk_y);
2346
2347
49.9M
    ps_mv = ps_layer_mvbank->ps_mv + i4_offset;
2348
49.9M
    pi1_ref_idx = ps_layer_mvbank->pi1_ref_idx + i4_offset;
2349
2350
49.9M
    ps_mv += (i1_ref_id * ps_layer_mvbank->i4_num_mvs_per_ref);
2351
49.9M
    pi1_ref_idx += (i1_ref_id * ps_layer_mvbank->i4_num_mvs_per_ref);
2352
2353
49.9M
    ps_search_node->s_mv.i2_mvx = ps_mv[i4_result_id].i2_mv_x << 1;
2354
49.9M
    ps_search_node->s_mv.i2_mvy = ps_mv[i4_result_id].i2_mv_y << 1;
2355
49.9M
    ps_search_node->i1_ref_idx = pi1_ref_idx[i4_result_id];
2356
49.9M
    if((ps_search_node->i1_ref_idx < 0) || (ps_search_node->s_mv.i2_mvx == INTRA_MV))
2357
0
    {
2358
0
        ps_search_node->i1_ref_idx = i1_ref_id;
2359
0
        ps_search_node->s_mv.i2_mvx = 0;
2360
0
        ps_search_node->s_mv.i2_mvy = 0;
2361
0
    }
2362
49.9M
}
2363
2364
void hme_project_coloc_candt_dyadic_implicit(
2365
    search_node_t *ps_search_node,
2366
    layer_ctxt_t *ps_curr_layer,
2367
    layer_ctxt_t *ps_coarse_layer,
2368
    S32 i4_pos_x,
2369
    S32 i4_pos_y,
2370
    S32 i4_num_act_ref_l0,
2371
    U08 u1_pred_dir,
2372
    U08 u1_default_ref_id,
2373
    S32 i4_result_id)
2374
0
{
2375
0
    S32 wd_c, ht_c, wd_p, ht_p;
2376
0
    S32 blksize_p, blk_x, blk_y, i4_offset;
2377
0
    layer_mv_t *ps_layer_mvbank;
2378
0
    hme_mv_t *ps_mv;
2379
0
    S08 *pi1_ref_idx;
2380
2381
    /* Width and ht of current and prev layers */
2382
0
    wd_c = ps_curr_layer->i4_wd;
2383
0
    ht_c = ps_curr_layer->i4_ht;
2384
0
    wd_p = ps_coarse_layer->i4_wd;
2385
0
    ht_p = ps_coarse_layer->i4_ht;
2386
2387
0
    ps_layer_mvbank = ps_coarse_layer->ps_layer_mvbank;
2388
0
    blksize_p = (S32)gau1_blk_size_to_wd_shift[ps_layer_mvbank->e_blk_size];
2389
2390
    /* ASSERT for valid sizes */
2391
0
    ASSERT((blksize_p == 3) || (blksize_p == 4) || (blksize_p == 5));
2392
2393
    /* Safety check to avoid uninitialized access across temporal layers */
2394
0
    i4_pos_x = CLIP3(i4_pos_x, 0, (wd_c - blksize_p));
2395
0
    i4_pos_y = CLIP3(i4_pos_y, 0, (ht_c - blksize_p));
2396
    /* Project the positions to prev layer */
2397
    /* TODO: convert these to scale factors at pic level */
2398
0
    blk_x = i4_pos_x >> blksize_p;  // (2 * blksize_p);
2399
0
    blk_y = i4_pos_y >> blksize_p;  // (2 * blksize_p);
2400
2401
    /* Pick up the mvs from the location */
2402
0
    i4_offset = (blk_x * ps_layer_mvbank->i4_num_mvs_per_blk);
2403
0
    i4_offset += (ps_layer_mvbank->i4_num_mvs_per_row * blk_y);
2404
2405
0
    ps_mv = ps_layer_mvbank->ps_mv + i4_offset;
2406
0
    pi1_ref_idx = ps_layer_mvbank->pi1_ref_idx + i4_offset;
2407
2408
0
    if(u1_pred_dir == 1)
2409
0
    {
2410
0
        ps_mv += (i4_num_act_ref_l0 * ps_layer_mvbank->i4_num_mvs_per_ref);
2411
0
        pi1_ref_idx += (i4_num_act_ref_l0 * ps_layer_mvbank->i4_num_mvs_per_ref);
2412
0
    }
2413
2414
0
    ps_search_node->s_mv.i2_mvx = ps_mv[i4_result_id].i2_mv_x << 1;
2415
0
    ps_search_node->s_mv.i2_mvy = ps_mv[i4_result_id].i2_mv_y << 1;
2416
0
    ps_search_node->i1_ref_idx = pi1_ref_idx[i4_result_id];
2417
0
    if((ps_search_node->i1_ref_idx < 0) || (ps_search_node->s_mv.i2_mvx == INTRA_MV))
2418
0
    {
2419
0
        ps_search_node->i1_ref_idx = u1_default_ref_id;
2420
0
        ps_search_node->s_mv.i2_mvx = 0;
2421
0
        ps_search_node->s_mv.i2_mvy = 0;
2422
0
    }
2423
0
}
2424
2425
#define SCALE_RANGE_PRMS(prm1, prm2, shift)                                                        \
2426
19.4M
    {                                                                                              \
2427
19.4M
        prm1.i2_min_x = prm2.i2_min_x << shift;                                                    \
2428
19.4M
        prm1.i2_max_x = prm2.i2_max_x << shift;                                                    \
2429
19.4M
        prm1.i2_min_y = prm2.i2_min_y << shift;                                                    \
2430
19.4M
        prm1.i2_max_y = prm2.i2_max_y << shift;                                                    \
2431
19.4M
    }
2432
2433
#define SCALE_RANGE_PRMS_POINTERS(prm1, prm2, shift)                                               \
2434
1.20M
    {                                                                                              \
2435
1.20M
        prm1->i2_min_x = prm2->i2_min_x << shift;                                                  \
2436
1.20M
        prm1->i2_max_x = prm2->i2_max_x << shift;                                                  \
2437
1.20M
        prm1->i2_min_y = prm2->i2_min_y << shift;                                                  \
2438
1.20M
        prm1->i2_max_y = prm2->i2_max_y << shift;                                                  \
2439
1.20M
    }
2440
2441
/**
2442
********************************************************************************
2443
*  @fn   void hme_refine_frm_init(me_ctxt_t *ps_ctxt,
2444
*                       refine_layer_prms_t *ps_refine_prms)
2445
*
2446
*  @brief  Frame init of refinemnet layers in ME
2447
*
2448
*  @param[in,out]  ps_ctxt: ME Handle
2449
*
2450
*  @param[in]  ps_refine_prms : refinement layer prms
2451
*
2452
*  @return None
2453
********************************************************************************
2454
*/
2455
void hme_refine_frm_init(
2456
    layer_ctxt_t *ps_curr_layer, refine_prms_t *ps_refine_prms, layer_ctxt_t *ps_coarse_layer)
2457
259k
{
2458
    /* local variables */
2459
259k
    BLK_SIZE_T e_result_blk_size = BLK_8x8;
2460
259k
    S32 i4_num_ref_fpel, i4_num_ref_prev_layer;
2461
2462
259k
    i4_num_ref_prev_layer = ps_coarse_layer->ps_layer_mvbank->i4_num_ref;
2463
2464
259k
    if(ps_refine_prms->explicit_ref)
2465
130k
    {
2466
130k
        i4_num_ref_fpel = i4_num_ref_prev_layer;
2467
130k
    }
2468
129k
    else
2469
129k
    {
2470
129k
        i4_num_ref_fpel = 2;
2471
129k
    }
2472
2473
259k
    if(ps_refine_prms->i4_enable_4x4_part)
2474
59.3k
    {
2475
59.3k
        e_result_blk_size = BLK_4x4;
2476
59.3k
    }
2477
2478
259k
    i4_num_ref_fpel = MIN(i4_num_ref_fpel, i4_num_ref_prev_layer);
2479
2480
259k
    hme_init_mv_bank(
2481
259k
        ps_curr_layer,
2482
259k
        e_result_blk_size,
2483
259k
        i4_num_ref_fpel,
2484
259k
        ps_refine_prms->i4_num_mvbank_results,
2485
259k
        ps_refine_prms->i4_layer_id > 0 ? 0 : 1);
2486
259k
}
2487
2488
#if 1  //ENABLE_CU_RECURSION || TEST_AND_EVALUATE_CU_RECURSION
2489
/**
2490
********************************************************************************
2491
*  @fn   void hme_init_clusters_16x16
2492
*               (
2493
*                   cluster_16x16_blk_t *ps_cluster_blk_16x16
2494
*               )
2495
*
2496
*  @brief  Intialisations for the structs used in clustering algorithm
2497
*
2498
*  @param[in/out]  ps_cluster_blk_16x16: pointer to structure containing clusters
2499
*                                        of 16x16 block
2500
*
2501
*  @return None
2502
********************************************************************************
2503
*/
2504
static __inline void
2505
    hme_init_clusters_16x16(cluster_16x16_blk_t *ps_cluster_blk_16x16, S32 bidir_enabled)
2506
520k
{
2507
520k
    S32 i;
2508
2509
520k
    ps_cluster_blk_16x16->num_clusters = 0;
2510
520k
    ps_cluster_blk_16x16->intra_mv_area = 0;
2511
520k
    ps_cluster_blk_16x16->best_inter_cost = 0;
2512
2513
4.68M
    for(i = 0; i < MAX_NUM_CLUSTERS_16x16; i++)
2514
4.16M
    {
2515
4.16M
        ps_cluster_blk_16x16->as_cluster_data[i].max_dist_from_centroid =
2516
4.16M
            bidir_enabled ? MAX_DISTANCE_FROM_CENTROID_16x16_B : MAX_DISTANCE_FROM_CENTROID_16x16;
2517
2518
4.16M
        ps_cluster_blk_16x16->as_cluster_data[i].is_valid_cluster = 0;
2519
2520
4.16M
        ps_cluster_blk_16x16->as_cluster_data[i].bi_mv_pixel_area = 0;
2521
4.16M
        ps_cluster_blk_16x16->as_cluster_data[i].uni_mv_pixel_area = 0;
2522
4.16M
    }
2523
6.77M
    for(i = 0; i < MAX_NUM_REF; i++)
2524
6.25M
    {
2525
6.25M
        ps_cluster_blk_16x16->au1_num_clusters[i] = 0;
2526
6.25M
    }
2527
520k
}
2528
2529
/**
2530
********************************************************************************
2531
*  @fn   void hme_init_clusters_32x32
2532
*               (
2533
*                   cluster_32x32_blk_t *ps_cluster_blk_32x32
2534
*               )
2535
*
2536
*  @brief  Intialisations for the structs used in clustering algorithm
2537
*
2538
*  @param[in/out]  ps_cluster_blk_32x32: pointer to structure containing clusters
2539
*                                        of 32x32 block
2540
*
2541
*  @return None
2542
********************************************************************************
2543
*/
2544
static __inline void
2545
    hme_init_clusters_32x32(cluster_32x32_blk_t *ps_cluster_blk_32x32, S32 bidir_enabled)
2546
130k
{
2547
130k
    S32 i;
2548
2549
130k
    ps_cluster_blk_32x32->num_clusters = 0;
2550
130k
    ps_cluster_blk_32x32->intra_mv_area = 0;
2551
130k
    ps_cluster_blk_32x32->best_alt_ref = -1;
2552
130k
    ps_cluster_blk_32x32->best_uni_ref = -1;
2553
130k
    ps_cluster_blk_32x32->best_inter_cost = 0;
2554
130k
    ps_cluster_blk_32x32->num_clusters_with_weak_sdi_density = 0;
2555
2556
1.43M
    for(i = 0; i < MAX_NUM_CLUSTERS_32x32; i++)
2557
1.30M
    {
2558
1.30M
        ps_cluster_blk_32x32->as_cluster_data[i].max_dist_from_centroid =
2559
1.30M
            bidir_enabled ? MAX_DISTANCE_FROM_CENTROID_32x32_B : MAX_DISTANCE_FROM_CENTROID_32x32;
2560
1.30M
        ps_cluster_blk_32x32->as_cluster_data[i].is_valid_cluster = 0;
2561
2562
1.30M
        ps_cluster_blk_32x32->as_cluster_data[i].bi_mv_pixel_area = 0;
2563
1.30M
        ps_cluster_blk_32x32->as_cluster_data[i].uni_mv_pixel_area = 0;
2564
1.30M
    }
2565
1.69M
    for(i = 0; i < MAX_NUM_REF; i++)
2566
1.56M
    {
2567
1.56M
        ps_cluster_blk_32x32->au1_num_clusters[i] = 0;
2568
1.56M
    }
2569
130k
}
2570
2571
/**
2572
********************************************************************************
2573
*  @fn   void hme_init_clusters_64x64
2574
*               (
2575
*                   cluster_64x64_blk_t *ps_cluster_blk_64x64
2576
*               )
2577
*
2578
*  @brief  Intialisations for the structs used in clustering algorithm
2579
*
2580
*  @param[in/out]  ps_cluster_blk_64x64: pointer to structure containing clusters
2581
*                                        of 64x64 block
2582
*
2583
*  @return None
2584
********************************************************************************
2585
*/
2586
static __inline void
2587
    hme_init_clusters_64x64(cluster_64x64_blk_t *ps_cluster_blk_64x64, S32 bidir_enabled)
2588
32.5k
{
2589
32.5k
    S32 i;
2590
2591
32.5k
    ps_cluster_blk_64x64->num_clusters = 0;
2592
32.5k
    ps_cluster_blk_64x64->intra_mv_area = 0;
2593
32.5k
    ps_cluster_blk_64x64->best_alt_ref = -1;
2594
32.5k
    ps_cluster_blk_64x64->best_uni_ref = -1;
2595
32.5k
    ps_cluster_blk_64x64->best_inter_cost = 0;
2596
2597
358k
    for(i = 0; i < MAX_NUM_CLUSTERS_64x64; i++)
2598
325k
    {
2599
325k
        ps_cluster_blk_64x64->as_cluster_data[i].max_dist_from_centroid =
2600
325k
            bidir_enabled ? MAX_DISTANCE_FROM_CENTROID_64x64_B : MAX_DISTANCE_FROM_CENTROID_64x64;
2601
325k
        ps_cluster_blk_64x64->as_cluster_data[i].is_valid_cluster = 0;
2602
2603
325k
        ps_cluster_blk_64x64->as_cluster_data[i].bi_mv_pixel_area = 0;
2604
325k
        ps_cluster_blk_64x64->as_cluster_data[i].uni_mv_pixel_area = 0;
2605
325k
    }
2606
423k
    for(i = 0; i < MAX_NUM_REF; i++)
2607
390k
    {
2608
390k
        ps_cluster_blk_64x64->au1_num_clusters[i] = 0;
2609
390k
    }
2610
32.5k
}
2611
2612
/**
2613
********************************************************************************
2614
*  @fn   void hme_sort_and_assign_top_ref_ids_areawise
2615
*               (
2616
*                   ctb_cluster_info_t *ps_ctb_cluster_info
2617
*               )
2618
*
2619
*  @brief  Finds best_uni_ref and best_alt_ref
2620
*
2621
*  @param[in/out]  ps_ctb_cluster_info: structure that points to ctb data
2622
*
2623
*  @param[in]  bidir_enabled: flag that indicates whether or not bi-pred is
2624
*                             enabled
2625
*
2626
*  @param[in]  block_width: width of the block in pels
2627
*
2628
*  @param[in]  e_cu_pos: position of the block within the CTB
2629
*
2630
*  @return None
2631
********************************************************************************
2632
*/
2633
void hme_sort_and_assign_top_ref_ids_areawise(
2634
    ctb_cluster_info_t *ps_ctb_cluster_info, S32 bidir_enabled, S32 block_width, CU_POS_T e_cu_pos)
2635
162k
{
2636
162k
    cluster_32x32_blk_t *ps_32x32 = NULL;
2637
162k
    cluster_64x64_blk_t *ps_64x64 = NULL;
2638
162k
    cluster_data_t *ps_data;
2639
2640
162k
    S32 j, k;
2641
2642
162k
    S32 ai4_uni_area[MAX_NUM_REF];
2643
162k
    S32 ai4_bi_area[MAX_NUM_REF];
2644
162k
    S32 ai4_ref_id_found[MAX_NUM_REF];
2645
162k
    S32 ai4_ref_id[MAX_NUM_REF];
2646
2647
162k
    S32 best_uni_ref = -1, best_alt_ref = -1;
2648
162k
    S32 num_clusters;
2649
162k
    S32 num_ref = 0;
2650
162k
    S32 num_clusters_evaluated = 0;
2651
162k
    S32 is_cur_blk_valid;
2652
2653
162k
    if(32 == block_width)
2654
130k
    {
2655
130k
        is_cur_blk_valid = (ps_ctb_cluster_info->blk_32x32_mask & (1 << e_cu_pos)) || 0;
2656
130k
        ps_32x32 = &ps_ctb_cluster_info->ps_32x32_blk[e_cu_pos];
2657
130k
        num_clusters = ps_32x32->num_clusters;
2658
130k
        ps_data = &ps_32x32->as_cluster_data[0];
2659
130k
    }
2660
32.5k
    else
2661
32.5k
    {
2662
32.5k
        is_cur_blk_valid = (ps_ctb_cluster_info->blk_32x32_mask == 0xf);
2663
32.5k
        ps_64x64 = ps_ctb_cluster_info->ps_64x64_blk;
2664
32.5k
        num_clusters = ps_64x64->num_clusters;
2665
32.5k
        ps_data = &ps_64x64->as_cluster_data[0];
2666
32.5k
    }
2667
2668
#if !ENABLE_4CTB_EVALUATION
2669
    if((num_clusters > MAX_NUM_CLUSTERS_IN_VALID_32x32_BLK))
2670
    {
2671
        return;
2672
    }
2673
#endif
2674
162k
    if(num_clusters == 0)
2675
9.28k
    {
2676
9.28k
        return;
2677
9.28k
    }
2678
153k
    else if(!is_cur_blk_valid)
2679
855
    {
2680
855
        return;
2681
855
    }
2682
2683
152k
    memset(ai4_uni_area, 0, sizeof(S32) * MAX_NUM_REF);
2684
152k
    memset(ai4_bi_area, 0, sizeof(S32) * MAX_NUM_REF);
2685
152k
    memset(ai4_ref_id_found, 0, sizeof(S32) * MAX_NUM_REF);
2686
152k
    memset(ai4_ref_id, -1, sizeof(S32) * MAX_NUM_REF);
2687
2688
420k
    for(j = 0; num_clusters_evaluated < num_clusters; j++, ps_data++)
2689
268k
    {
2690
268k
        S32 ref_id;
2691
2692
268k
        if(!ps_data->is_valid_cluster)
2693
1.49k
        {
2694
1.49k
            continue;
2695
1.49k
        }
2696
2697
266k
        ref_id = ps_data->ref_id;
2698
2699
266k
        num_clusters_evaluated++;
2700
2701
266k
        ai4_uni_area[ref_id] += ps_data->uni_mv_pixel_area;
2702
266k
        ai4_bi_area[ref_id] += ps_data->bi_mv_pixel_area;
2703
2704
266k
        if(!ai4_ref_id_found[ref_id])
2705
195k
        {
2706
195k
            ai4_ref_id[ref_id] = ref_id;
2707
195k
            ai4_ref_id_found[ref_id] = 1;
2708
195k
            num_ref++;
2709
195k
        }
2710
266k
    }
2711
2712
152k
    {
2713
152k
        S32 ai4_ref_id_temp[MAX_NUM_REF];
2714
2715
152k
        memcpy(ai4_ref_id_temp, ai4_ref_id, sizeof(S32) * MAX_NUM_REF);
2716
2717
1.83M
        for(k = 1; k < MAX_NUM_REF; k++)
2718
1.67M
        {
2719
1.67M
            if(ai4_uni_area[k] > ai4_uni_area[0])
2720
2.70k
            {
2721
2.70k
                SWAP_HME(ai4_uni_area[k], ai4_uni_area[0], S32);
2722
2.70k
                SWAP_HME(ai4_ref_id_temp[k], ai4_ref_id_temp[0], S32);
2723
2.70k
            }
2724
1.67M
        }
2725
2726
152k
        best_uni_ref = ai4_ref_id_temp[0];
2727
152k
    }
2728
2729
152k
    if(bidir_enabled)
2730
45.1k
    {
2731
542k
        for(k = 1; k < MAX_NUM_REF; k++)
2732
497k
        {
2733
497k
            if(ai4_bi_area[k] > ai4_bi_area[0])
2734
2.05k
            {
2735
2.05k
                SWAP_HME(ai4_bi_area[k], ai4_bi_area[0], S32);
2736
2.05k
                SWAP_HME(ai4_ref_id[k], ai4_ref_id[0], S32);
2737
2.05k
            }
2738
497k
        }
2739
2740
45.1k
        if(!ai4_bi_area[0])
2741
39.8k
        {
2742
39.8k
            best_alt_ref = -1;
2743
2744
39.8k
            if(32 == block_width)
2745
33.1k
            {
2746
33.1k
                SET_VALUES_FOR_TOP_REF_IDS(ps_32x32, best_uni_ref, best_alt_ref, num_ref);
2747
33.1k
            }
2748
6.70k
            else
2749
6.70k
            {
2750
6.70k
                SET_VALUES_FOR_TOP_REF_IDS(ps_64x64, best_uni_ref, best_alt_ref, num_ref);
2751
6.70k
            }
2752
2753
39.8k
            return;
2754
39.8k
        }
2755
2756
5.31k
        if(best_uni_ref == ai4_ref_id[0])
2757
3.27k
        {
2758
35.9k
            for(k = 2; k < MAX_NUM_REF; k++)
2759
32.7k
            {
2760
32.7k
                if(ai4_bi_area[k] > ai4_bi_area[1])
2761
603
                {
2762
603
                    SWAP_HME(ai4_bi_area[k], ai4_bi_area[1], S32);
2763
603
                    SWAP_HME(ai4_ref_id[k], ai4_ref_id[1], S32);
2764
603
                }
2765
32.7k
            }
2766
2767
3.27k
            best_alt_ref = ai4_ref_id[1];
2768
3.27k
        }
2769
2.04k
        else
2770
2.04k
        {
2771
2.04k
            best_alt_ref = ai4_ref_id[0];
2772
2.04k
        }
2773
5.31k
    }
2774
2775
112k
    if(32 == block_width)
2776
89.2k
    {
2777
89.2k
        SET_VALUES_FOR_TOP_REF_IDS(ps_32x32, best_uni_ref, best_alt_ref, num_ref);
2778
89.2k
    }
2779
23.5k
    else
2780
23.5k
    {
2781
23.5k
        SET_VALUES_FOR_TOP_REF_IDS(ps_64x64, best_uni_ref, best_alt_ref, num_ref);
2782
23.5k
    }
2783
112k
}
2784
2785
/**
2786
********************************************************************************
2787
*  @fn   void hme_find_top_ref_ids
2788
*               (
2789
*                   ctb_cluster_info_t *ps_ctb_cluster_info
2790
*               )
2791
*
2792
*  @brief  Finds best_uni_ref and best_alt_ref
2793
*
2794
*  @param[in/out]  ps_ctb_cluster_info: structure that points to ctb data
2795
*
2796
*  @return None
2797
********************************************************************************
2798
*/
2799
void hme_find_top_ref_ids(
2800
    ctb_cluster_info_t *ps_ctb_cluster_info, S32 bidir_enabled, S32 block_width)
2801
65.1k
{
2802
65.1k
    S32 i;
2803
2804
65.1k
    if(32 == block_width)
2805
32.5k
    {
2806
162k
        for(i = 0; i < 4; i++)
2807
130k
        {
2808
130k
            hme_sort_and_assign_top_ref_ids_areawise(
2809
130k
                ps_ctb_cluster_info, bidir_enabled, block_width, (CU_POS_T)i);
2810
130k
        }
2811
32.5k
    }
2812
32.5k
    else if(64 == block_width)
2813
32.5k
    {
2814
32.5k
        hme_sort_and_assign_top_ref_ids_areawise(
2815
32.5k
            ps_ctb_cluster_info, bidir_enabled, block_width, POS_NA);
2816
32.5k
    }
2817
65.1k
}
2818
2819
/**
2820
********************************************************************************
2821
*  @fn   void hme_boot_out_outlier
2822
*               (
2823
*                   ctb_cluster_info_t *ps_ctb_cluster_info
2824
*               )
2825
*
2826
*  @brief  Removes outlier clusters before CU tree population
2827
*
2828
*  @param[in/out]  ps_ctb_cluster_info: structure that points to ctb data
2829
*
2830
*  @return None
2831
********************************************************************************
2832
*/
2833
void hme_boot_out_outlier(ctb_cluster_info_t *ps_ctb_cluster_info, S32 blk_width)
2834
65.1k
{
2835
65.1k
    cluster_32x32_blk_t *ps_32x32;
2836
2837
65.1k
    S32 i;
2838
2839
65.1k
    cluster_64x64_blk_t *ps_64x64 = &ps_ctb_cluster_info->ps_64x64_blk[0];
2840
2841
65.1k
    S32 sdi_threshold = ps_ctb_cluster_info->sdi_threshold;
2842
2843
65.1k
    if(32 == blk_width)
2844
32.5k
    {
2845
        /* 32x32 clusters */
2846
162k
        for(i = 0; i < 4; i++)
2847
130k
        {
2848
130k
            ps_32x32 = &ps_ctb_cluster_info->ps_32x32_blk[i];
2849
2850
130k
            if(ps_32x32->num_clusters > MAX_NUM_CLUSTERS_IN_ONE_REF_IDX)
2851
1.83k
            {
2852
1.83k
                BUMP_OUTLIER_CLUSTERS(ps_32x32, sdi_threshold);
2853
1.83k
            }
2854
130k
        }
2855
32.5k
    }
2856
32.5k
    else if(64 == blk_width)
2857
32.5k
    {
2858
        /* 64x64 clusters */
2859
32.5k
        if(ps_64x64->num_clusters > MAX_NUM_CLUSTERS_IN_ONE_REF_IDX)
2860
2.75k
        {
2861
2.75k
            BUMP_OUTLIER_CLUSTERS(ps_64x64, sdi_threshold);
2862
2.75k
        }
2863
32.5k
    }
2864
65.1k
}
2865
2866
/**
2867
********************************************************************************
2868
*  @fn   void hme_update_cluster_attributes
2869
*               (
2870
*                   cluster_data_t *ps_cluster_data,
2871
*                   S32 mvx,
2872
*                   S32 mvy,
2873
*                   PART_ID_T e_part_id
2874
*               )
2875
*
2876
*  @brief  Implementation fo the clustering algorithm
2877
*
2878
*  @param[in/out]  ps_cluster_data: pointer to cluster_data_t struct
2879
*
2880
*  @param[in]  mvx : x co-ordinate of the motion vector
2881
*
2882
*  @param[in]  mvy : y co-ordinate of the motion vector
2883
*
2884
*  @param[in]  ref_idx : ref_id of the motion vector
2885
*
2886
*  @param[in]  e_part_id : partition id of the motion vector
2887
*
2888
*  @return None
2889
********************************************************************************
2890
*/
2891
static __inline void hme_update_cluster_attributes(
2892
    cluster_data_t *ps_cluster_data,
2893
    S32 mvx,
2894
    S32 mvy,
2895
    S32 mvdx,
2896
    S32 mvdy,
2897
    S32 ref_id,
2898
    S32 sdi,
2899
    U08 is_part_of_bi,
2900
    PART_ID_T e_part_id)
2901
48.5k
{
2902
48.5k
    LWORD64 i8_mvx_sum_q8;
2903
48.5k
    LWORD64 i8_mvy_sum_q8;
2904
2905
48.5k
    S32 centroid_posx_q8 = ps_cluster_data->s_centroid.i4_pos_x_q8;
2906
48.5k
    S32 centroid_posy_q8 = ps_cluster_data->s_centroid.i4_pos_y_q8;
2907
2908
48.5k
    if((mvdx > 0) && (ps_cluster_data->min_x > mvx))
2909
4.31k
    {
2910
4.31k
        ps_cluster_data->min_x = mvx;
2911
4.31k
    }
2912
44.2k
    else if((mvdx < 0) && (ps_cluster_data->max_x < mvx))
2913
4.34k
    {
2914
4.34k
        ps_cluster_data->max_x = mvx;
2915
4.34k
    }
2916
2917
48.5k
    if((mvdy > 0) && (ps_cluster_data->min_y > mvy))
2918
4.03k
    {
2919
4.03k
        ps_cluster_data->min_y = mvy;
2920
4.03k
    }
2921
44.5k
    else if((mvdy < 0) && (ps_cluster_data->max_y < mvy))
2922
4.20k
    {
2923
4.20k
        ps_cluster_data->max_y = mvy;
2924
4.20k
    }
2925
2926
48.5k
    {
2927
48.5k
        S32 num_mvs = ps_cluster_data->num_mvs;
2928
2929
48.5k
        ps_cluster_data->as_mv[num_mvs].pixel_count = gai4_partition_area[e_part_id];
2930
48.5k
        ps_cluster_data->as_mv[num_mvs].mvx = mvx;
2931
48.5k
        ps_cluster_data->as_mv[num_mvs].mvy = mvy;
2932
2933
        /***************************/
2934
48.5k
        ps_cluster_data->as_mv[num_mvs].is_uni = !is_part_of_bi;
2935
48.5k
        ps_cluster_data->as_mv[num_mvs].sdi = sdi;
2936
        /**************************/
2937
48.5k
    }
2938
2939
    /* Updation of centroid */
2940
48.5k
    {
2941
48.5k
        i8_mvx_sum_q8 = (LWORD64)centroid_posx_q8 * ps_cluster_data->num_mvs + (mvx << 8);
2942
48.5k
        i8_mvy_sum_q8 = (LWORD64)centroid_posy_q8 * ps_cluster_data->num_mvs + (mvy << 8);
2943
2944
48.5k
        ps_cluster_data->num_mvs++;
2945
2946
48.5k
        ps_cluster_data->s_centroid.i4_pos_x_q8 =
2947
48.5k
            (WORD32)((i8_mvx_sum_q8) / ps_cluster_data->num_mvs);
2948
48.5k
        ps_cluster_data->s_centroid.i4_pos_y_q8 =
2949
48.5k
            (WORD32)((i8_mvy_sum_q8) / ps_cluster_data->num_mvs);
2950
48.5k
    }
2951
2952
48.5k
    ps_cluster_data->area_in_pixels += gai4_partition_area[e_part_id];
2953
2954
48.5k
    if(is_part_of_bi)
2955
2.38k
    {
2956
2.38k
        ps_cluster_data->bi_mv_pixel_area += gai4_partition_area[e_part_id];
2957
2.38k
    }
2958
46.1k
    else
2959
46.1k
    {
2960
46.1k
        ps_cluster_data->uni_mv_pixel_area += gai4_partition_area[e_part_id];
2961
46.1k
    }
2962
48.5k
}
2963
2964
/**
2965
********************************************************************************
2966
*  @fn   void hme_try_cluster_merge
2967
*               (
2968
*                   cluster_data_t *ps_cluster_data,
2969
*                   S32 *pi4_num_clusters,
2970
*                   S32 idx_of_updated_cluster
2971
*               )
2972
*
2973
*  @brief  Implementation fo the clustering algorithm
2974
*
2975
*  @param[in/out]  ps_cluster_data: pointer to cluster_data_t struct
2976
*
2977
*  @param[in/out]  pi4_num_clusters : pointer to number of clusters
2978
*
2979
*  @param[in]  idx_of_updated_cluster : index of the cluster most recently
2980
*                                       updated
2981
*
2982
*  @return Nothing
2983
********************************************************************************
2984
*/
2985
void hme_try_cluster_merge(
2986
    cluster_data_t *ps_cluster_data, U08 *pu1_num_clusters, S32 idx_of_updated_cluster)
2987
46.4k
{
2988
46.4k
    centroid_t *ps_centroid;
2989
2990
46.4k
    S32 cur_pos_x_q8;
2991
46.4k
    S32 cur_pos_y_q8;
2992
46.4k
    S32 i;
2993
46.4k
    S32 max_dist_from_centroid;
2994
46.4k
    S32 mvd;
2995
46.4k
    S32 mvdx_q8;
2996
46.4k
    S32 mvdx;
2997
46.4k
    S32 mvdy_q8;
2998
46.4k
    S32 mvdy;
2999
46.4k
    S32 num_clusters, num_clusters_evaluated;
3000
46.4k
    S32 other_pos_x_q8;
3001
46.4k
    S32 other_pos_y_q8;
3002
3003
46.4k
    cluster_data_t *ps_root = ps_cluster_data;
3004
46.4k
    cluster_data_t *ps_cur_cluster = &ps_cluster_data[idx_of_updated_cluster];
3005
46.4k
    centroid_t *ps_cur_centroid = &ps_cur_cluster->s_centroid;
3006
3007
    /* Merge is superfluous if num_clusters is 1 */
3008
46.4k
    if(*pu1_num_clusters == 1)
3009
25.5k
    {
3010
25.5k
        return;
3011
25.5k
    }
3012
3013
20.8k
    cur_pos_x_q8 = ps_cur_centroid->i4_pos_x_q8;
3014
20.8k
    cur_pos_y_q8 = ps_cur_centroid->i4_pos_y_q8;
3015
3016
20.8k
    max_dist_from_centroid = ps_cur_cluster->max_dist_from_centroid;
3017
3018
20.8k
    num_clusters = *pu1_num_clusters;
3019
20.8k
    num_clusters_evaluated = 0;
3020
3021
71.1k
    for(i = 0; num_clusters_evaluated < num_clusters; i++, ps_cluster_data++)
3022
50.3k
    {
3023
50.3k
        if(!ps_cluster_data->is_valid_cluster)
3024
2
        {
3025
2
            continue;
3026
2
        }
3027
50.3k
        if((ps_cluster_data->ref_id != ps_cur_cluster->ref_id) || (i == idx_of_updated_cluster))
3028
41.1k
        {
3029
41.1k
            num_clusters_evaluated++;
3030
41.1k
            continue;
3031
41.1k
        }
3032
3033
9.19k
        ps_centroid = &ps_cluster_data->s_centroid;
3034
3035
9.19k
        other_pos_x_q8 = ps_centroid->i4_pos_x_q8;
3036
9.19k
        other_pos_y_q8 = ps_centroid->i4_pos_y_q8;
3037
3038
9.19k
        mvdx_q8 = (cur_pos_x_q8 - other_pos_x_q8);
3039
9.19k
        mvdy_q8 = (cur_pos_y_q8 - other_pos_y_q8);
3040
9.19k
        mvdx = (mvdx_q8 + (1 << 7)) >> 8;
3041
9.19k
        mvdy = (mvdy_q8 + (1 << 7)) >> 8;
3042
3043
9.19k
        mvd = ABS(mvdx) + ABS(mvdy);
3044
3045
9.19k
        if(mvd <= (max_dist_from_centroid >> 1))
3046
6
        {
3047
            /* 0 => no updates */
3048
            /* 1 => min updated */
3049
            /* 2 => max updated */
3050
6
            S32 minmax_x_update_id;
3051
6
            S32 minmax_y_update_id;
3052
3053
6
            LWORD64 i8_mv_x_sum_self = (LWORD64)cur_pos_x_q8 * ps_cur_cluster->num_mvs;
3054
6
            LWORD64 i8_mv_y_sum_self = (LWORD64)cur_pos_y_q8 * ps_cur_cluster->num_mvs;
3055
6
            LWORD64 i8_mv_x_sum_cousin = (LWORD64)other_pos_x_q8 * ps_cluster_data->num_mvs;
3056
6
            LWORD64 i8_mv_y_sum_cousin = (LWORD64)other_pos_y_q8 * ps_cluster_data->num_mvs;
3057
3058
6
            (*pu1_num_clusters)--;
3059
3060
6
            ps_cluster_data->is_valid_cluster = 0;
3061
3062
6
            memcpy(
3063
6
                &ps_cur_cluster->as_mv[ps_cur_cluster->num_mvs],
3064
6
                ps_cluster_data->as_mv,
3065
6
                sizeof(mv_data_t) * ps_cluster_data->num_mvs);
3066
3067
6
            ps_cur_cluster->num_mvs += ps_cluster_data->num_mvs;
3068
6
            ps_cur_cluster->area_in_pixels += ps_cluster_data->area_in_pixels;
3069
6
            ps_cur_cluster->bi_mv_pixel_area += ps_cluster_data->bi_mv_pixel_area;
3070
6
            ps_cur_cluster->uni_mv_pixel_area += ps_cluster_data->uni_mv_pixel_area;
3071
6
            i8_mv_x_sum_self += i8_mv_x_sum_cousin;
3072
6
            i8_mv_y_sum_self += i8_mv_y_sum_cousin;
3073
3074
6
            ps_cur_centroid->i4_pos_x_q8 = (WORD32)(i8_mv_x_sum_self / ps_cur_cluster->num_mvs);
3075
6
            ps_cur_centroid->i4_pos_y_q8 = (WORD32)(i8_mv_y_sum_self / ps_cur_cluster->num_mvs);
3076
3077
6
            minmax_x_update_id = (ps_cur_cluster->min_x < ps_cluster_data->min_x)
3078
6
                                     ? ((ps_cur_cluster->max_x > ps_cluster_data->max_x) ? 0 : 2)
3079
6
                                     : 1;
3080
6
            minmax_y_update_id = (ps_cur_cluster->min_y < ps_cluster_data->min_y)
3081
6
                                     ? ((ps_cur_cluster->max_y > ps_cluster_data->max_y) ? 0 : 2)
3082
6
                                     : 1;
3083
3084
            /* Updation of centroid spread */
3085
6
            switch(minmax_x_update_id + (minmax_y_update_id << 2))
3086
6
            {
3087
0
            case 1:
3088
0
            {
3089
0
                S32 mvd, mvd_q8;
3090
3091
0
                ps_cur_cluster->min_x = ps_cluster_data->min_x;
3092
3093
0
                mvd_q8 = ps_centroid->i4_pos_x_q8 - (ps_cur_cluster->min_x << 8);
3094
0
                mvd = (mvd_q8 + (1 << 7)) >> 8;
3095
3096
0
                if(mvd > (max_dist_from_centroid))
3097
0
                {
3098
0
                    ps_cluster_data->max_dist_from_centroid = mvd;
3099
0
                }
3100
0
                break;
3101
0
            }
3102
1
            case 2:
3103
1
            {
3104
1
                S32 mvd, mvd_q8;
3105
3106
1
                ps_cur_cluster->max_x = ps_cluster_data->max_x;
3107
3108
1
                mvd_q8 = (ps_cur_cluster->max_x << 8) - ps_centroid->i4_pos_x_q8;
3109
1
                mvd = (mvd_q8 + (1 << 7)) >> 8;
3110
3111
1
                if(mvd > (max_dist_from_centroid))
3112
0
                {
3113
0
                    ps_cluster_data->max_dist_from_centroid = mvd;
3114
0
                }
3115
1
                break;
3116
0
            }
3117
0
            case 4:
3118
0
            {
3119
0
                S32 mvd, mvd_q8;
3120
3121
0
                ps_cur_cluster->min_y = ps_cluster_data->min_y;
3122
3123
0
                mvd_q8 = ps_centroid->i4_pos_y_q8 - (ps_cur_cluster->min_y << 8);
3124
0
                mvd = (mvd_q8 + (1 << 7)) >> 8;
3125
3126
0
                if(mvd > (max_dist_from_centroid))
3127
0
                {
3128
0
                    ps_cluster_data->max_dist_from_centroid = mvd;
3129
0
                }
3130
0
                break;
3131
0
            }
3132
0
            case 5:
3133
0
            {
3134
0
                S32 mvd;
3135
0
                S32 mvdx, mvdx_q8;
3136
0
                S32 mvdy, mvdy_q8;
3137
3138
0
                mvdy_q8 = ps_centroid->i4_pos_y_q8 - (ps_cur_cluster->min_y << 8);
3139
0
                mvdy = (mvdy_q8 + (1 << 7)) >> 8;
3140
3141
0
                mvdx_q8 = ps_centroid->i4_pos_x_q8 - (ps_cur_cluster->min_x << 8);
3142
0
                mvdx = (mvdx_q8 + (1 << 7)) >> 8;
3143
3144
0
                mvd = (mvdx > mvdy) ? mvdx : mvdy;
3145
3146
0
                ps_cur_cluster->min_x = ps_cluster_data->min_x;
3147
0
                ps_cur_cluster->min_y = ps_cluster_data->min_y;
3148
3149
0
                if(mvd > max_dist_from_centroid)
3150
0
                {
3151
0
                    ps_cluster_data->max_dist_from_centroid = mvd;
3152
0
                }
3153
0
                break;
3154
0
            }
3155
2
            case 6:
3156
2
            {
3157
2
                S32 mvd;
3158
2
                S32 mvdx, mvdx_q8;
3159
2
                S32 mvdy, mvdy_q8;
3160
3161
2
                mvdy_q8 = ps_centroid->i4_pos_y_q8 - (ps_cur_cluster->min_y << 8);
3162
2
                mvdy = (mvdy_q8 + (1 << 7)) >> 8;
3163
3164
2
                mvdx_q8 = (ps_cur_cluster->max_x << 8) - ps_centroid->i4_pos_x_q8;
3165
2
                mvdx = (mvdx_q8 + (1 << 7)) >> 8;
3166
3167
2
                mvd = (mvdx > mvdy) ? mvdx : mvdy;
3168
3169
2
                ps_cur_cluster->max_x = ps_cluster_data->max_x;
3170
2
                ps_cur_cluster->min_y = ps_cluster_data->min_y;
3171
3172
2
                if(mvd > max_dist_from_centroid)
3173
0
                {
3174
0
                    ps_cluster_data->max_dist_from_centroid = mvd;
3175
0
                }
3176
2
                break;
3177
0
            }
3178
0
            case 8:
3179
0
            {
3180
0
                S32 mvd, mvd_q8;
3181
3182
0
                ps_cur_cluster->max_y = ps_cluster_data->max_y;
3183
3184
0
                mvd_q8 = (ps_cur_cluster->max_y << 8) - ps_centroid->i4_pos_y_q8;
3185
0
                mvd = (mvd_q8 + (1 << 7)) >> 8;
3186
3187
0
                if(mvd > (max_dist_from_centroid))
3188
0
                {
3189
0
                    ps_cluster_data->max_dist_from_centroid = mvd;
3190
0
                }
3191
0
                break;
3192
0
            }
3193
1
            case 9:
3194
1
            {
3195
1
                S32 mvd;
3196
1
                S32 mvdx, mvdx_q8;
3197
1
                S32 mvdy, mvdy_q8;
3198
3199
1
                mvdx_q8 = ps_centroid->i4_pos_x_q8 - (ps_cur_cluster->min_x << 8);
3200
1
                mvdx = (mvdx_q8 + (1 << 7)) >> 8;
3201
3202
1
                mvdy_q8 = (ps_cur_cluster->max_y << 8) - ps_centroid->i4_pos_y_q8;
3203
1
                mvdy = (mvdy_q8 + (1 << 7)) >> 8;
3204
3205
1
                mvd = (mvdx > mvdy) ? mvdx : mvdy;
3206
3207
1
                ps_cur_cluster->min_x = ps_cluster_data->min_x;
3208
1
                ps_cur_cluster->max_y = ps_cluster_data->max_y;
3209
3210
1
                if(mvd > max_dist_from_centroid)
3211
0
                {
3212
0
                    ps_cluster_data->max_dist_from_centroid = mvd;
3213
0
                }
3214
1
                break;
3215
0
            }
3216
2
            case 10:
3217
2
            {
3218
2
                S32 mvd;
3219
2
                S32 mvdx, mvdx_q8;
3220
2
                S32 mvdy, mvdy_q8;
3221
3222
2
                mvdx_q8 = (ps_cur_cluster->max_x << 8) - ps_centroid->i4_pos_x_q8;
3223
2
                mvdx = (mvdx_q8 + (1 << 7)) >> 8;
3224
3225
2
                mvdy_q8 = (ps_cur_cluster->max_y << 8) - ps_centroid->i4_pos_y_q8;
3226
2
                mvdy = (mvdy_q8 + (1 << 7)) >> 8;
3227
3228
2
                mvd = (mvdx > mvdy) ? mvdx : mvdy;
3229
3230
2
                ps_cur_cluster->max_x = ps_cluster_data->max_x;
3231
2
                ps_cur_cluster->max_y = ps_cluster_data->max_y;
3232
3233
2
                if(mvd > ps_cluster_data->max_dist_from_centroid)
3234
0
                {
3235
0
                    ps_cluster_data->max_dist_from_centroid = mvd;
3236
0
                }
3237
2
                break;
3238
0
            }
3239
0
            default:
3240
0
            {
3241
0
                break;
3242
0
            }
3243
6
            }
3244
3245
6
            hme_try_cluster_merge(ps_root, pu1_num_clusters, idx_of_updated_cluster);
3246
3247
6
            return;
3248
6
        }
3249
3250
9.18k
        num_clusters_evaluated++;
3251
9.18k
    }
3252
20.8k
}
3253
3254
/**
3255
********************************************************************************
3256
*  @fn   void hme_find_and_update_clusters
3257
*               (
3258
*                   cluster_data_t *ps_cluster_data,
3259
*                   S32 *pi4_num_clusters,
3260
*                   S32 mvx,
3261
*                   S32 mvy,
3262
*                   S32 ref_idx,
3263
*                   PART_ID_T e_part_id
3264
*               )
3265
*
3266
*  @brief  Implementation fo the clustering algorithm
3267
*
3268
*  @param[in/out]  ps_cluster_data: pointer to cluster_data_t struct
3269
*
3270
*  @param[in/out]  pi4_num_clusters : pointer to number of clusters
3271
*
3272
*  @param[in]  mvx : x co-ordinate of the motion vector
3273
*
3274
*  @param[in]  mvy : y co-ordinate of the motion vector
3275
*
3276
*  @param[in]  ref_idx : ref_id of the motion vector
3277
*
3278
*  @param[in]  e_part_id : partition id of the motion vector
3279
*
3280
*  @return None
3281
********************************************************************************
3282
*/
3283
void hme_find_and_update_clusters(
3284
    cluster_data_t *ps_cluster_data,
3285
    U08 *pu1_num_clusters,
3286
    S16 i2_mv_x,
3287
    S16 i2_mv_y,
3288
    U08 i1_ref_idx,
3289
    S32 i4_sdi,
3290
    PART_ID_T e_part_id,
3291
    U08 is_part_of_bi)
3292
572k
{
3293
572k
    S32 i;
3294
572k
    S32 min_mvd_cluster_id = -1;
3295
572k
    S32 mvd, mvd_limit, mvdx, mvdy;
3296
572k
    S32 min_mvdx, min_mvdy;
3297
3298
572k
    S32 min_mvd = MAX_32BIT_VAL;
3299
572k
    S32 num_clusters = *pu1_num_clusters;
3300
3301
572k
    S32 mvx = i2_mv_x;
3302
572k
    S32 mvy = i2_mv_y;
3303
572k
    S32 ref_idx = i1_ref_idx;
3304
572k
    S32 sdi = i4_sdi;
3305
572k
    S32 new_cluster_idx = MAX_NUM_CLUSTERS_16x16;
3306
3307
572k
    if(num_clusters == 0)
3308
488k
    {
3309
488k
        cluster_data_t *ps_data = &ps_cluster_data[num_clusters];
3310
3311
488k
        ps_data->num_mvs = 1;
3312
488k
        ps_data->s_centroid.i4_pos_x_q8 = mvx << 8;
3313
488k
        ps_data->s_centroid.i4_pos_y_q8 = mvy << 8;
3314
488k
        ps_data->ref_id = ref_idx;
3315
488k
        ps_data->area_in_pixels = gai4_partition_area[e_part_id];
3316
488k
        ps_data->as_mv[0].pixel_count = gai4_partition_area[e_part_id];
3317
488k
        ps_data->as_mv[0].mvx = mvx;
3318
488k
        ps_data->as_mv[0].mvy = mvy;
3319
3320
        /***************************/
3321
488k
        ps_data->as_mv[0].is_uni = !is_part_of_bi;
3322
488k
        ps_data->as_mv[0].sdi = sdi;
3323
488k
        if(is_part_of_bi)
3324
3.91k
        {
3325
3.91k
            ps_data->bi_mv_pixel_area += ps_data->area_in_pixels;
3326
3.91k
        }
3327
484k
        else
3328
484k
        {
3329
484k
            ps_data->uni_mv_pixel_area += ps_data->area_in_pixels;
3330
484k
        }
3331
        /**************************/
3332
488k
        ps_data->max_x = mvx;
3333
488k
        ps_data->min_x = mvx;
3334
488k
        ps_data->max_y = mvy;
3335
488k
        ps_data->min_y = mvy;
3336
3337
488k
        ps_data->is_valid_cluster = 1;
3338
3339
488k
        *pu1_num_clusters = 1;
3340
488k
    }
3341
84.0k
    else
3342
84.0k
    {
3343
84.0k
        S32 num_clusters_evaluated = 0;
3344
3345
212k
        for(i = 0; num_clusters_evaluated < num_clusters; i++)
3346
128k
        {
3347
128k
            cluster_data_t *ps_data = &ps_cluster_data[i];
3348
3349
128k
            centroid_t *ps_centroid;
3350
3351
128k
            S32 mvx_q8;
3352
128k
            S32 mvy_q8;
3353
128k
            S32 posx_q8;
3354
128k
            S32 posy_q8;
3355
128k
            S32 mvdx_q8;
3356
128k
            S32 mvdy_q8;
3357
3358
            /* In anticipation of a possible merging of clusters */
3359
128k
            if(ps_data->is_valid_cluster == 0)
3360
0
            {
3361
0
                new_cluster_idx = i;
3362
0
                continue;
3363
0
            }
3364
3365
128k
            if(ref_idx != ps_data->ref_id)
3366
51.0k
            {
3367
51.0k
                num_clusters_evaluated++;
3368
51.0k
                continue;
3369
51.0k
            }
3370
3371
77.0k
            ps_centroid = &ps_data->s_centroid;
3372
77.0k
            posx_q8 = ps_centroid->i4_pos_x_q8;
3373
77.0k
            posy_q8 = ps_centroid->i4_pos_y_q8;
3374
3375
77.0k
            mvx_q8 = mvx << 8;
3376
77.0k
            mvy_q8 = mvy << 8;
3377
3378
77.0k
            mvdx_q8 = posx_q8 - mvx_q8;
3379
77.0k
            mvdy_q8 = posy_q8 - mvy_q8;
3380
3381
77.0k
            mvdx = (((mvdx_q8 + (1 << 7)) >> 8));
3382
77.0k
            mvdy = (((mvdy_q8 + (1 << 7)) >> 8));
3383
3384
77.0k
            mvd = ABS(mvdx) + ABS(mvdy);
3385
3386
77.0k
            if(mvd < min_mvd)
3387
72.7k
            {
3388
72.7k
                min_mvd = mvd;
3389
72.7k
                min_mvdx = mvdx;
3390
72.7k
                min_mvdy = mvdy;
3391
72.7k
                min_mvd_cluster_id = i;
3392
72.7k
            }
3393
3394
77.0k
            num_clusters_evaluated++;
3395
77.0k
        }
3396
3397
84.0k
        mvd_limit = (min_mvd_cluster_id == -1)
3398
84.0k
                        ? ps_cluster_data[0].max_dist_from_centroid
3399
84.0k
                        : ps_cluster_data[min_mvd_cluster_id].max_dist_from_centroid;
3400
3401
        /* This condition implies that min_mvd has been updated */
3402
84.0k
        if(min_mvd <= mvd_limit)
3403
48.5k
        {
3404
48.5k
            hme_update_cluster_attributes(
3405
48.5k
                &ps_cluster_data[min_mvd_cluster_id],
3406
48.5k
                mvx,
3407
48.5k
                mvy,
3408
48.5k
                min_mvdx,
3409
48.5k
                min_mvdy,
3410
48.5k
                ref_idx,
3411
48.5k
                sdi,
3412
48.5k
                is_part_of_bi,
3413
48.5k
                e_part_id);
3414
3415
48.5k
            if(PRT_NxN == ge_part_id_to_part_type[e_part_id])
3416
46.3k
            {
3417
46.3k
                hme_try_cluster_merge(ps_cluster_data, pu1_num_clusters, min_mvd_cluster_id);
3418
46.3k
            }
3419
48.5k
        }
3420
35.4k
        else
3421
35.4k
        {
3422
35.4k
            cluster_data_t *ps_data = (new_cluster_idx == MAX_NUM_CLUSTERS_16x16)
3423
35.4k
                                          ? &ps_cluster_data[num_clusters]
3424
35.4k
                                          : &ps_cluster_data[new_cluster_idx];
3425
3426
35.4k
            ps_data->num_mvs = 1;
3427
35.4k
            ps_data->s_centroid.i4_pos_x_q8 = mvx << 8;
3428
35.4k
            ps_data->s_centroid.i4_pos_y_q8 = mvy << 8;
3429
35.4k
            ps_data->ref_id = ref_idx;
3430
35.4k
            ps_data->area_in_pixels = gai4_partition_area[e_part_id];
3431
35.4k
            ps_data->as_mv[0].pixel_count = gai4_partition_area[e_part_id];
3432
35.4k
            ps_data->as_mv[0].mvx = mvx;
3433
35.4k
            ps_data->as_mv[0].mvy = mvy;
3434
3435
            /***************************/
3436
35.4k
            ps_data->as_mv[0].is_uni = !is_part_of_bi;
3437
35.4k
            ps_data->as_mv[0].sdi = sdi;
3438
35.4k
            if(is_part_of_bi)
3439
8.65k
            {
3440
8.65k
                ps_data->bi_mv_pixel_area += ps_data->area_in_pixels;
3441
8.65k
            }
3442
26.8k
            else
3443
26.8k
            {
3444
26.8k
                ps_data->uni_mv_pixel_area += ps_data->area_in_pixels;
3445
26.8k
            }
3446
            /**************************/
3447
35.4k
            ps_data->max_x = mvx;
3448
35.4k
            ps_data->min_x = mvx;
3449
35.4k
            ps_data->max_y = mvy;
3450
35.4k
            ps_data->min_y = mvy;
3451
3452
35.4k
            ps_data->is_valid_cluster = 1;
3453
3454
35.4k
            num_clusters++;
3455
35.4k
            *pu1_num_clusters = num_clusters;
3456
35.4k
        }
3457
84.0k
    }
3458
572k
}
3459
3460
/**
3461
********************************************************************************
3462
*  @fn   void hme_update_32x32_cluster_attributes
3463
*               (
3464
*                   cluster_32x32_blk_t *ps_blk_32x32,
3465
*                   cluster_data_t *ps_cluster_data
3466
*               )
3467
*
3468
*  @brief  Updates attributes for 32x32 clusters based on the attributes of
3469
*          the constituent 16x16 clusters
3470
*
3471
*  @param[out]  ps_blk_32x32: structure containing 32x32 block results
3472
*
3473
*  @param[in]  ps_cluster_data : structure containing 16x16 block results
3474
*
3475
*  @return None
3476
********************************************************************************
3477
*/
3478
void hme_update_32x32_cluster_attributes(
3479
    cluster_32x32_blk_t *ps_blk_32x32, cluster_data_t *ps_cluster_data)
3480
521k
{
3481
521k
    cluster_data_t *ps_cur_cluster_32;
3482
3483
521k
    S32 i;
3484
521k
    S32 mvd_limit;
3485
3486
521k
    S32 num_clusters = ps_blk_32x32->num_clusters;
3487
3488
521k
    if(0 == num_clusters)
3489
122k
    {
3490
122k
        ps_cur_cluster_32 = &ps_blk_32x32->as_cluster_data[0];
3491
3492
122k
        ps_blk_32x32->num_clusters++;
3493
122k
        ps_blk_32x32->au1_num_clusters[ps_cluster_data->ref_id]++;
3494
3495
122k
        ps_cur_cluster_32->is_valid_cluster = 1;
3496
3497
122k
        ps_cur_cluster_32->area_in_pixels = ps_cluster_data->area_in_pixels;
3498
122k
        ps_cur_cluster_32->bi_mv_pixel_area += ps_cluster_data->bi_mv_pixel_area;
3499
122k
        ps_cur_cluster_32->uni_mv_pixel_area += ps_cluster_data->uni_mv_pixel_area;
3500
3501
122k
        memcpy(
3502
122k
            ps_cur_cluster_32->as_mv,
3503
122k
            ps_cluster_data->as_mv,
3504
122k
            sizeof(mv_data_t) * ps_cluster_data->num_mvs);
3505
3506
122k
        ps_cur_cluster_32->num_mvs = ps_cluster_data->num_mvs;
3507
3508
122k
        ps_cur_cluster_32->ref_id = ps_cluster_data->ref_id;
3509
3510
122k
        ps_cur_cluster_32->max_x = ps_cluster_data->max_x;
3511
122k
        ps_cur_cluster_32->max_y = ps_cluster_data->max_y;
3512
122k
        ps_cur_cluster_32->min_x = ps_cluster_data->min_x;
3513
122k
        ps_cur_cluster_32->min_y = ps_cluster_data->min_y;
3514
3515
122k
        ps_cur_cluster_32->s_centroid = ps_cluster_data->s_centroid;
3516
122k
    }
3517
399k
    else
3518
399k
    {
3519
399k
        centroid_t *ps_centroid;
3520
3521
399k
        S32 cur_posx_q8, cur_posy_q8;
3522
399k
        S32 min_mvd_cluster_id = -1;
3523
399k
        S32 mvd;
3524
399k
        S32 mvdx;
3525
399k
        S32 mvdy;
3526
399k
        S32 mvdx_min;
3527
399k
        S32 mvdy_min;
3528
399k
        S32 mvdx_q8;
3529
399k
        S32 mvdy_q8;
3530
3531
399k
        S32 num_clusters_evaluated = 0;
3532
3533
399k
        S32 mvd_min = MAX_32BIT_VAL;
3534
3535
399k
        S32 mvx_inp_q8 = ps_cluster_data->s_centroid.i4_pos_x_q8;
3536
399k
        S32 mvy_inp_q8 = ps_cluster_data->s_centroid.i4_pos_y_q8;
3537
3538
971k
        for(i = 0; num_clusters_evaluated < num_clusters; i++)
3539
571k
        {
3540
571k
            ps_cur_cluster_32 = &ps_blk_32x32->as_cluster_data[i];
3541
3542
571k
            if(ps_cur_cluster_32->ref_id != ps_cluster_data->ref_id)
3543
127k
            {
3544
127k
                num_clusters_evaluated++;
3545
127k
                continue;
3546
127k
            }
3547
444k
            if(!ps_cluster_data->is_valid_cluster)
3548
0
            {
3549
0
                continue;
3550
0
            }
3551
3552
444k
            num_clusters_evaluated++;
3553
3554
444k
            ps_centroid = &ps_cur_cluster_32->s_centroid;
3555
3556
444k
            cur_posx_q8 = ps_centroid->i4_pos_x_q8;
3557
444k
            cur_posy_q8 = ps_centroid->i4_pos_y_q8;
3558
3559
444k
            mvdx_q8 = cur_posx_q8 - mvx_inp_q8;
3560
444k
            mvdy_q8 = cur_posy_q8 - mvy_inp_q8;
3561
3562
444k
            mvdx = (mvdx_q8 + (1 << 7)) >> 8;
3563
444k
            mvdy = (mvdy_q8 + (1 << 7)) >> 8;
3564
3565
444k
            mvd = ABS(mvdx) + ABS(mvdy);
3566
3567
444k
            if(mvd < mvd_min)
3568
411k
            {
3569
411k
                mvd_min = mvd;
3570
411k
                mvdx_min = mvdx;
3571
411k
                mvdy_min = mvdy;
3572
411k
                min_mvd_cluster_id = i;
3573
411k
            }
3574
444k
        }
3575
3576
399k
        ps_cur_cluster_32 = &ps_blk_32x32->as_cluster_data[0];
3577
3578
399k
        mvd_limit = (min_mvd_cluster_id == -1)
3579
399k
                        ? ps_cur_cluster_32[0].max_dist_from_centroid
3580
399k
                        : ps_cur_cluster_32[min_mvd_cluster_id].max_dist_from_centroid;
3581
3582
399k
        if(mvd_min <= mvd_limit)
3583
326k
        {
3584
326k
            LWORD64 i8_updated_posx;
3585
326k
            LWORD64 i8_updated_posy;
3586
326k
            WORD32 minmax_updated_x = 0;
3587
326k
            WORD32 minmax_updated_y = 0;
3588
3589
326k
            ps_cur_cluster_32 = &ps_blk_32x32->as_cluster_data[min_mvd_cluster_id];
3590
3591
326k
            ps_centroid = &ps_cur_cluster_32->s_centroid;
3592
3593
326k
            ps_cur_cluster_32->is_valid_cluster = 1;
3594
3595
326k
            ps_cur_cluster_32->area_in_pixels += ps_cluster_data->area_in_pixels;
3596
326k
            ps_cur_cluster_32->bi_mv_pixel_area += ps_cluster_data->bi_mv_pixel_area;
3597
326k
            ps_cur_cluster_32->uni_mv_pixel_area += ps_cluster_data->uni_mv_pixel_area;
3598
3599
326k
            memcpy(
3600
326k
                &ps_cur_cluster_32->as_mv[ps_cur_cluster_32->num_mvs],
3601
326k
                ps_cluster_data->as_mv,
3602
326k
                sizeof(mv_data_t) * ps_cluster_data->num_mvs);
3603
3604
326k
            if((mvdx_min > 0) && ((ps_cur_cluster_32->min_x << 8) > mvx_inp_q8))
3605
11.3k
            {
3606
11.3k
                ps_cur_cluster_32->min_x = (mvx_inp_q8 + ((1 << 7))) >> 8;
3607
11.3k
                minmax_updated_x = 1;
3608
11.3k
            }
3609
315k
            else if((mvdx_min < 0) && ((ps_cur_cluster_32->max_x << 8) < mvx_inp_q8))
3610
8.46k
            {
3611
8.46k
                ps_cur_cluster_32->max_x = (mvx_inp_q8 + (1 << 7)) >> 8;
3612
8.46k
                minmax_updated_x = 2;
3613
8.46k
            }
3614
3615
326k
            if((mvdy_min > 0) && ((ps_cur_cluster_32->min_y << 8) > mvy_inp_q8))
3616
12.8k
            {
3617
12.8k
                ps_cur_cluster_32->min_y = (mvy_inp_q8 + (1 << 7)) >> 8;
3618
12.8k
                minmax_updated_y = 1;
3619
12.8k
            }
3620
313k
            else if((mvdy_min < 0) && ((ps_cur_cluster_32->max_y << 8) < mvy_inp_q8))
3621
17.1k
            {
3622
17.1k
                ps_cur_cluster_32->max_y = (mvy_inp_q8 + (1 << 7)) >> 8;
3623
17.1k
                minmax_updated_y = 2;
3624
17.1k
            }
3625
3626
326k
            switch((minmax_updated_y << 2) + minmax_updated_x)
3627
326k
            {
3628
5.71k
            case 1:
3629
5.71k
            {
3630
5.71k
                S32 mvd, mvd_q8;
3631
3632
5.71k
                mvd_q8 = ps_centroid->i4_pos_x_q8 - (ps_cur_cluster_32->min_x << 8);
3633
5.71k
                mvd = (mvd_q8 + (1 << 7)) >> 8;
3634
3635
5.71k
                if(mvd > (mvd_limit))
3636
1
                {
3637
1
                    ps_cur_cluster_32->max_dist_from_centroid = mvd;
3638
1
                }
3639
5.71k
                break;
3640
0
            }
3641
3.72k
            case 2:
3642
3.72k
            {
3643
3.72k
                S32 mvd, mvd_q8;
3644
3645
3.72k
                mvd_q8 = (ps_cur_cluster_32->max_x << 8) - ps_centroid->i4_pos_x_q8;
3646
3.72k
                mvd = (mvd_q8 + (1 << 7)) >> 8;
3647
3648
3.72k
                if(mvd > (mvd_limit))
3649
13
                {
3650
13
                    ps_cur_cluster_32->max_dist_from_centroid = mvd;
3651
13
                }
3652
3.72k
                break;
3653
0
            }
3654
6.93k
            case 4:
3655
6.93k
            {
3656
6.93k
                S32 mvd, mvd_q8;
3657
3658
6.93k
                mvd_q8 = ps_centroid->i4_pos_y_q8 - (ps_cur_cluster_32->min_y << 8);
3659
6.93k
                mvd = (mvd_q8 + (1 << 7)) >> 8;
3660
3661
6.93k
                if(mvd > (mvd_limit))
3662
7
                {
3663
7
                    ps_cur_cluster_32->max_dist_from_centroid = mvd;
3664
7
                }
3665
6.93k
                break;
3666
0
            }
3667
3.19k
            case 5:
3668
3.19k
            {
3669
3.19k
                S32 mvd;
3670
3.19k
                S32 mvdx, mvdx_q8;
3671
3.19k
                S32 mvdy, mvdy_q8;
3672
3673
3.19k
                mvdy_q8 = ps_centroid->i4_pos_y_q8 - (ps_cur_cluster_32->min_y << 8);
3674
3.19k
                mvdy = (mvdy_q8 + (1 << 7)) >> 8;
3675
3676
3.19k
                mvdx_q8 = ps_centroid->i4_pos_x_q8 - (ps_cur_cluster_32->min_x << 8);
3677
3.19k
                mvdx = (mvdx_q8 + (1 << 7)) >> 8;
3678
3679
3.19k
                mvd = (mvdx > mvdy) ? mvdx : mvdy;
3680
3681
3.19k
                if(mvd > mvd_limit)
3682
0
                {
3683
0
                    ps_cur_cluster_32->max_dist_from_centroid = mvd;
3684
0
                }
3685
3.19k
                break;
3686
0
            }
3687
2.68k
            case 6:
3688
2.68k
            {
3689
2.68k
                S32 mvd;
3690
2.68k
                S32 mvdx, mvdx_q8;
3691
2.68k
                S32 mvdy, mvdy_q8;
3692
3693
2.68k
                mvdy_q8 = ps_centroid->i4_pos_y_q8 - (ps_cur_cluster_32->min_y << 8);
3694
2.68k
                mvdy = (mvdy_q8 + (1 << 7)) >> 8;
3695
3696
2.68k
                mvdx_q8 = (ps_cur_cluster_32->max_x << 8) - ps_centroid->i4_pos_x_q8;
3697
2.68k
                mvdx = (mvdx_q8 + (1 << 7)) >> 8;
3698
3699
2.68k
                mvd = (mvdx > mvdy) ? mvdx : mvdy;
3700
3701
2.68k
                if(mvd > mvd_limit)
3702
0
                {
3703
0
                    ps_cur_cluster_32->max_dist_from_centroid = mvd;
3704
0
                }
3705
2.68k
                break;
3706
0
            }
3707
12.6k
            case 8:
3708
12.6k
            {
3709
12.6k
                S32 mvd, mvd_q8;
3710
3711
12.6k
                mvd_q8 = (ps_cur_cluster_32->max_y << 8) - ps_centroid->i4_pos_y_q8;
3712
12.6k
                mvd = (mvd_q8 + (1 << 7)) >> 8;
3713
3714
12.6k
                if(mvd > (mvd_limit))
3715
16
                {
3716
16
                    ps_cur_cluster_32->max_dist_from_centroid = mvd;
3717
16
                }
3718
12.6k
                break;
3719
0
            }
3720
2.43k
            case 9:
3721
2.43k
            {
3722
2.43k
                S32 mvd;
3723
2.43k
                S32 mvdx, mvdx_q8;
3724
2.43k
                S32 mvdy, mvdy_q8;
3725
3726
2.43k
                mvdx_q8 = ps_centroid->i4_pos_x_q8 - (ps_cur_cluster_32->min_x << 8);
3727
2.43k
                mvdx = (mvdx_q8 + (1 << 7)) >> 8;
3728
3729
2.43k
                mvdy_q8 = (ps_cur_cluster_32->max_y << 8) - ps_centroid->i4_pos_y_q8;
3730
2.43k
                mvdy = (mvdy_q8 + (1 << 7)) >> 8;
3731
3732
2.43k
                mvd = (mvdx > mvdy) ? mvdx : mvdy;
3733
3734
2.43k
                if(mvd > mvd_limit)
3735
0
                {
3736
0
                    ps_cur_cluster_32->max_dist_from_centroid = mvd;
3737
0
                }
3738
2.43k
                break;
3739
0
            }
3740
2.06k
            case 10:
3741
2.06k
            {
3742
2.06k
                S32 mvd;
3743
2.06k
                S32 mvdx, mvdx_q8;
3744
2.06k
                S32 mvdy, mvdy_q8;
3745
3746
2.06k
                mvdx_q8 = (ps_cur_cluster_32->max_x << 8) - ps_centroid->i4_pos_x_q8;
3747
2.06k
                mvdx = (mvdx_q8 + (1 << 7)) >> 8;
3748
3749
2.06k
                mvdy_q8 = (ps_cur_cluster_32->max_y << 8) - ps_centroid->i4_pos_y_q8;
3750
2.06k
                mvdy = (mvdy_q8 + (1 << 7)) >> 8;
3751
3752
2.06k
                mvd = (mvdx > mvdy) ? mvdx : mvdy;
3753
3754
2.06k
                if(mvd > ps_cur_cluster_32->max_dist_from_centroid)
3755
0
                {
3756
0
                    ps_cur_cluster_32->max_dist_from_centroid = mvd;
3757
0
                }
3758
2.06k
                break;
3759
0
            }
3760
287k
            default:
3761
287k
            {
3762
287k
                break;
3763
0
            }
3764
326k
            }
3765
3766
326k
            i8_updated_posx = ((LWORD64)ps_centroid->i4_pos_x_q8 * ps_cur_cluster_32->num_mvs) +
3767
326k
                              ((LWORD64)mvx_inp_q8 * ps_cluster_data->num_mvs);
3768
326k
            i8_updated_posy = ((LWORD64)ps_centroid->i4_pos_y_q8 * ps_cur_cluster_32->num_mvs) +
3769
326k
                              ((LWORD64)mvy_inp_q8 * ps_cluster_data->num_mvs);
3770
3771
326k
            ps_cur_cluster_32->num_mvs += ps_cluster_data->num_mvs;
3772
3773
326k
            ps_centroid->i4_pos_x_q8 = (WORD32)(i8_updated_posx / ps_cur_cluster_32->num_mvs);
3774
326k
            ps_centroid->i4_pos_y_q8 = (WORD32)(i8_updated_posy / ps_cur_cluster_32->num_mvs);
3775
326k
        }
3776
72.6k
        else if(num_clusters < MAX_NUM_CLUSTERS_32x32)
3777
72.1k
        {
3778
72.1k
            ps_cur_cluster_32 = &ps_blk_32x32->as_cluster_data[num_clusters];
3779
3780
72.1k
            ps_blk_32x32->num_clusters++;
3781
72.1k
            ps_blk_32x32->au1_num_clusters[ps_cluster_data->ref_id]++;
3782
3783
72.1k
            ps_cur_cluster_32->is_valid_cluster = 1;
3784
3785
72.1k
            ps_cur_cluster_32->area_in_pixels = ps_cluster_data->area_in_pixels;
3786
72.1k
            ps_cur_cluster_32->bi_mv_pixel_area += ps_cluster_data->bi_mv_pixel_area;
3787
72.1k
            ps_cur_cluster_32->uni_mv_pixel_area += ps_cluster_data->uni_mv_pixel_area;
3788
3789
72.1k
            memcpy(
3790
72.1k
                ps_cur_cluster_32->as_mv,
3791
72.1k
                ps_cluster_data->as_mv,
3792
72.1k
                sizeof(mv_data_t) * ps_cluster_data->num_mvs);
3793
3794
72.1k
            ps_cur_cluster_32->num_mvs = ps_cluster_data->num_mvs;
3795
3796
72.1k
            ps_cur_cluster_32->ref_id = ps_cluster_data->ref_id;
3797
3798
72.1k
            ps_cur_cluster_32->max_x = ps_cluster_data->max_x;
3799
72.1k
            ps_cur_cluster_32->max_y = ps_cluster_data->max_y;
3800
72.1k
            ps_cur_cluster_32->min_x = ps_cluster_data->min_x;
3801
72.1k
            ps_cur_cluster_32->min_y = ps_cluster_data->min_y;
3802
3803
72.1k
            ps_cur_cluster_32->s_centroid = ps_cluster_data->s_centroid;
3804
72.1k
        }
3805
399k
    }
3806
521k
}
3807
3808
/**
3809
********************************************************************************
3810
*  @fn   void hme_update_64x64_cluster_attributes
3811
*               (
3812
*                   cluster_64x64_blk_t *ps_blk_32x32,
3813
*                   cluster_data_t *ps_cluster_data
3814
*               )
3815
*
3816
*  @brief  Updates attributes for 64x64 clusters based on the attributes of
3817
*          the constituent 16x16 clusters
3818
*
3819
*  @param[out]  ps_blk_64x64: structure containing 64x64 block results
3820
*
3821
*  @param[in]  ps_cluster_data : structure containing 32x32 block results
3822
*
3823
*  @return None
3824
********************************************************************************
3825
*/
3826
void hme_update_64x64_cluster_attributes(
3827
    cluster_64x64_blk_t *ps_blk_64x64, cluster_data_t *ps_cluster_data)
3828
194k
{
3829
194k
    cluster_data_t *ps_cur_cluster_64;
3830
3831
194k
    S32 i;
3832
194k
    S32 mvd_limit;
3833
3834
194k
    S32 num_clusters = ps_blk_64x64->num_clusters;
3835
3836
194k
    if(0 == num_clusters)
3837
31.0k
    {
3838
31.0k
        ps_cur_cluster_64 = &ps_blk_64x64->as_cluster_data[0];
3839
3840
31.0k
        ps_blk_64x64->num_clusters++;
3841
31.0k
        ps_blk_64x64->au1_num_clusters[ps_cluster_data->ref_id]++;
3842
3843
31.0k
        ps_cur_cluster_64->is_valid_cluster = 1;
3844
3845
31.0k
        ps_cur_cluster_64->area_in_pixels = ps_cluster_data->area_in_pixels;
3846
31.0k
        ps_cur_cluster_64->bi_mv_pixel_area += ps_cluster_data->bi_mv_pixel_area;
3847
31.0k
        ps_cur_cluster_64->uni_mv_pixel_area += ps_cluster_data->uni_mv_pixel_area;
3848
3849
31.0k
        memcpy(
3850
31.0k
            ps_cur_cluster_64->as_mv,
3851
31.0k
            ps_cluster_data->as_mv,
3852
31.0k
            sizeof(mv_data_t) * ps_cluster_data->num_mvs);
3853
3854
31.0k
        ps_cur_cluster_64->num_mvs = ps_cluster_data->num_mvs;
3855
3856
31.0k
        ps_cur_cluster_64->ref_id = ps_cluster_data->ref_id;
3857
3858
31.0k
        ps_cur_cluster_64->max_x = ps_cluster_data->max_x;
3859
31.0k
        ps_cur_cluster_64->max_y = ps_cluster_data->max_y;
3860
31.0k
        ps_cur_cluster_64->min_x = ps_cluster_data->min_x;
3861
31.0k
        ps_cur_cluster_64->min_y = ps_cluster_data->min_y;
3862
3863
31.0k
        ps_cur_cluster_64->s_centroid = ps_cluster_data->s_centroid;
3864
31.0k
    }
3865
163k
    else
3866
163k
    {
3867
163k
        centroid_t *ps_centroid;
3868
3869
163k
        S32 cur_posx_q8, cur_posy_q8;
3870
163k
        S32 min_mvd_cluster_id = -1;
3871
163k
        S32 mvd;
3872
163k
        S32 mvdx;
3873
163k
        S32 mvdy;
3874
163k
        S32 mvdx_min;
3875
163k
        S32 mvdy_min;
3876
163k
        S32 mvdx_q8;
3877
163k
        S32 mvdy_q8;
3878
3879
163k
        S32 num_clusters_evaluated = 0;
3880
3881
163k
        S32 mvd_min = MAX_32BIT_VAL;
3882
3883
163k
        S32 mvx_inp_q8 = ps_cluster_data->s_centroid.i4_pos_x_q8;
3884
163k
        S32 mvy_inp_q8 = ps_cluster_data->s_centroid.i4_pos_y_q8;
3885
3886
579k
        for(i = 0; num_clusters_evaluated < num_clusters; i++)
3887
416k
        {
3888
416k
            ps_cur_cluster_64 = &ps_blk_64x64->as_cluster_data[i];
3889
3890
416k
            if(ps_cur_cluster_64->ref_id != ps_cluster_data->ref_id)
3891
173k
            {
3892
173k
                num_clusters_evaluated++;
3893
173k
                continue;
3894
173k
            }
3895
3896
242k
            if(!ps_cur_cluster_64->is_valid_cluster)
3897
0
            {
3898
0
                continue;
3899
0
            }
3900
3901
242k
            num_clusters_evaluated++;
3902
3903
242k
            ps_centroid = &ps_cur_cluster_64->s_centroid;
3904
3905
242k
            cur_posx_q8 = ps_centroid->i4_pos_x_q8;
3906
242k
            cur_posy_q8 = ps_centroid->i4_pos_y_q8;
3907
3908
242k
            mvdx_q8 = cur_posx_q8 - mvx_inp_q8;
3909
242k
            mvdy_q8 = cur_posy_q8 - mvy_inp_q8;
3910
3911
242k
            mvdx = (mvdx_q8 + (1 << 7)) >> 8;
3912
242k
            mvdy = (mvdy_q8 + (1 << 7)) >> 8;
3913
3914
242k
            mvd = ABS(mvdx) + ABS(mvdy);
3915
3916
242k
            if(mvd < mvd_min)
3917
186k
            {
3918
186k
                mvd_min = mvd;
3919
186k
                mvdx_min = mvdx;
3920
186k
                mvdy_min = mvdy;
3921
186k
                min_mvd_cluster_id = i;
3922
186k
            }
3923
242k
        }
3924
3925
163k
        ps_cur_cluster_64 = ps_blk_64x64->as_cluster_data;
3926
3927
163k
        mvd_limit = (min_mvd_cluster_id == -1)
3928
163k
                        ? ps_cur_cluster_64[0].max_dist_from_centroid
3929
163k
                        : ps_cur_cluster_64[min_mvd_cluster_id].max_dist_from_centroid;
3930
3931
163k
        if(mvd_min <= mvd_limit)
3932
115k
        {
3933
115k
            LWORD64 i8_updated_posx;
3934
115k
            LWORD64 i8_updated_posy;
3935
115k
            WORD32 minmax_updated_x = 0;
3936
115k
            WORD32 minmax_updated_y = 0;
3937
3938
115k
            ps_cur_cluster_64 = &ps_blk_64x64->as_cluster_data[min_mvd_cluster_id];
3939
3940
115k
            ps_centroid = &ps_cur_cluster_64->s_centroid;
3941
3942
115k
            ps_cur_cluster_64->is_valid_cluster = 1;
3943
3944
115k
            ps_cur_cluster_64->area_in_pixels += ps_cluster_data->area_in_pixels;
3945
115k
            ps_cur_cluster_64->bi_mv_pixel_area += ps_cluster_data->bi_mv_pixel_area;
3946
115k
            ps_cur_cluster_64->uni_mv_pixel_area += ps_cluster_data->uni_mv_pixel_area;
3947
3948
115k
            memcpy(
3949
115k
                &ps_cur_cluster_64->as_mv[ps_cur_cluster_64->num_mvs],
3950
115k
                ps_cluster_data->as_mv,
3951
115k
                sizeof(mv_data_t) * ps_cluster_data->num_mvs);
3952
3953
115k
            if((mvdx_min > 0) && ((ps_cur_cluster_64->min_x << 8) > mvx_inp_q8))
3954
7.93k
            {
3955
7.93k
                ps_cur_cluster_64->min_x = (mvx_inp_q8 + (1 << 7)) >> 8;
3956
7.93k
                minmax_updated_x = 1;
3957
7.93k
            }
3958
107k
            else if((mvdx_min < 0) && ((ps_cur_cluster_64->max_x << 8) < mvx_inp_q8))
3959
6.40k
            {
3960
6.40k
                ps_cur_cluster_64->max_x = (mvx_inp_q8 + (1 << 7)) >> 8;
3961
6.40k
                minmax_updated_x = 2;
3962
6.40k
            }
3963
3964
115k
            if((mvdy_min > 0) && ((ps_cur_cluster_64->min_y << 8) > mvy_inp_q8))
3965
8.92k
            {
3966
8.92k
                ps_cur_cluster_64->min_y = (mvy_inp_q8 + (1 << 7)) >> 8;
3967
8.92k
                minmax_updated_y = 1;
3968
8.92k
            }
3969
106k
            else if((mvdy_min < 0) && ((ps_cur_cluster_64->max_y << 8) < mvy_inp_q8))
3970
5.31k
            {
3971
5.31k
                ps_cur_cluster_64->max_y = (mvy_inp_q8 + (1 << 7)) >> 8;
3972
5.31k
                minmax_updated_y = 2;
3973
5.31k
            }
3974
3975
115k
            switch((minmax_updated_y << 2) + minmax_updated_x)
3976
115k
            {
3977
4.09k
            case 1:
3978
4.09k
            {
3979
4.09k
                S32 mvd, mvd_q8;
3980
3981
4.09k
                mvd_q8 = ps_centroid->i4_pos_x_q8 - (ps_cur_cluster_64->min_x << 8);
3982
4.09k
                mvd = (mvd_q8 + (1 << 7)) >> 8;
3983
3984
4.09k
                if(mvd > (mvd_limit))
3985
3
                {
3986
3
                    ps_cur_cluster_64->max_dist_from_centroid = mvd;
3987
3
                }
3988
4.09k
                break;
3989
0
            }
3990
2.89k
            case 2:
3991
2.89k
            {
3992
2.89k
                S32 mvd, mvd_q8;
3993
3994
2.89k
                mvd_q8 = (ps_cur_cluster_64->max_x << 8) - ps_centroid->i4_pos_x_q8;
3995
2.89k
                mvd = (mvd_q8 + (1 << 7)) >> 8;
3996
3997
2.89k
                if(mvd > (mvd_limit))
3998
2
                {
3999
2
                    ps_cur_cluster_64->max_dist_from_centroid = mvd;
4000
2
                }
4001
2.89k
                break;
4002
0
            }
4003
4.83k
            case 4:
4004
4.83k
            {
4005
4.83k
                S32 mvd, mvd_q8;
4006
4007
4.83k
                mvd_q8 = ps_centroid->i4_pos_y_q8 - (ps_cur_cluster_64->min_y << 8);
4008
4.83k
                mvd = (mvd_q8 + (1 << 7)) >> 8;
4009
4010
4.83k
                if(mvd > (mvd_limit))
4011
1
                {
4012
1
                    ps_cur_cluster_64->max_dist_from_centroid = mvd;
4013
1
                }
4014
4.83k
                break;
4015
0
            }
4016
2.11k
            case 5:
4017
2.11k
            {
4018
2.11k
                S32 mvd;
4019
2.11k
                S32 mvdx, mvdx_q8;
4020
2.11k
                S32 mvdy, mvdy_q8;
4021
4022
2.11k
                mvdy_q8 = ps_centroid->i4_pos_y_q8 - (ps_cur_cluster_64->min_y << 8);
4023
2.11k
                mvdy = (mvdy_q8 + (1 << 7)) >> 8;
4024
4025
2.11k
                mvdx_q8 = ps_centroid->i4_pos_x_q8 - (ps_cur_cluster_64->min_x << 8);
4026
2.11k
                mvdx = (mvdx_q8 + (1 << 7)) >> 8;
4027
4028
2.11k
                mvd = (mvdx > mvdy) ? mvdx : mvdy;
4029
4030
2.11k
                if(mvd > mvd_limit)
4031
0
                {
4032
0
                    ps_cur_cluster_64->max_dist_from_centroid = mvd;
4033
0
                }
4034
2.11k
                break;
4035
0
            }
4036
1.98k
            case 6:
4037
1.98k
            {
4038
1.98k
                S32 mvd;
4039
1.98k
                S32 mvdx, mvdx_q8;
4040
1.98k
                S32 mvdy, mvdy_q8;
4041
4042
1.98k
                mvdy_q8 = ps_centroid->i4_pos_y_q8 - (ps_cur_cluster_64->min_y << 8);
4043
1.98k
                mvdy = (mvdy_q8 + (1 << 7)) >> 8;
4044
4045
1.98k
                mvdx_q8 = (ps_cur_cluster_64->max_x << 8) - ps_centroid->i4_pos_x_q8;
4046
1.98k
                mvdx = (mvdx_q8 + (1 << 7)) >> 8;
4047
4048
1.98k
                mvd = (mvdx > mvdy) ? mvdx : mvdy;
4049
4050
1.98k
                if(mvd > mvd_limit)
4051
0
                {
4052
0
                    ps_cur_cluster_64->max_dist_from_centroid = mvd;
4053
0
                }
4054
1.98k
                break;
4055
0
            }
4056
2.05k
            case 8:
4057
2.05k
            {
4058
2.05k
                S32 mvd, mvd_q8;
4059
4060
2.05k
                mvd_q8 = (ps_cur_cluster_64->max_y << 8) - ps_centroid->i4_pos_y_q8;
4061
2.05k
                mvd = (mvd_q8 + (1 << 7)) >> 8;
4062
4063
2.05k
                if(mvd > (mvd_limit))
4064
6
                {
4065
6
                    ps_cur_cluster_64->max_dist_from_centroid = mvd;
4066
6
                }
4067
2.05k
                break;
4068
0
            }
4069
1.72k
            case 9:
4070
1.72k
            {
4071
1.72k
                S32 mvd;
4072
1.72k
                S32 mvdx, mvdx_q8;
4073
1.72k
                S32 mvdy, mvdy_q8;
4074
4075
1.72k
                mvdx_q8 = ps_centroid->i4_pos_x_q8 - (ps_cur_cluster_64->min_x << 8);
4076
1.72k
                mvdx = (mvdx_q8 + (1 << 7)) >> 8;
4077
4078
1.72k
                mvdy_q8 = (ps_cur_cluster_64->max_y << 8) - ps_centroid->i4_pos_y_q8;
4079
1.72k
                mvdy = (mvdy_q8 + (1 << 7)) >> 8;
4080
4081
1.72k
                mvd = (mvdx > mvdy) ? mvdx : mvdy;
4082
4083
1.72k
                if(mvd > mvd_limit)
4084
0
                {
4085
0
                    ps_cur_cluster_64->max_dist_from_centroid = mvd;
4086
0
                }
4087
1.72k
                break;
4088
0
            }
4089
1.53k
            case 10:
4090
1.53k
            {
4091
1.53k
                S32 mvd;
4092
1.53k
                S32 mvdx, mvdx_q8;
4093
1.53k
                S32 mvdy, mvdy_q8;
4094
4095
1.53k
                mvdx_q8 = (ps_cur_cluster_64->max_x << 8) - ps_centroid->i4_pos_x_q8;
4096
1.53k
                mvdx = (mvdx_q8 + (1 << 7)) >> 8;
4097
4098
1.53k
                mvdy_q8 = (ps_cur_cluster_64->max_y << 8) - ps_centroid->i4_pos_y_q8;
4099
1.53k
                mvdy = (mvdy_q8 + (1 << 7)) >> 8;
4100
4101
1.53k
                mvd = (mvdx > mvdy) ? mvdx : mvdy;
4102
4103
1.53k
                if(mvd > ps_cur_cluster_64->max_dist_from_centroid)
4104
0
                {
4105
0
                    ps_cur_cluster_64->max_dist_from_centroid = mvd;
4106
0
                }
4107
1.53k
                break;
4108
0
            }
4109
94.1k
            default:
4110
94.1k
            {
4111
94.1k
                break;
4112
0
            }
4113
115k
            }
4114
4115
115k
            i8_updated_posx = ((LWORD64)ps_centroid->i4_pos_x_q8 * ps_cur_cluster_64->num_mvs) +
4116
115k
                              ((LWORD64)mvx_inp_q8 * ps_cluster_data->num_mvs);
4117
115k
            i8_updated_posy = ((LWORD64)ps_centroid->i4_pos_y_q8 * ps_cur_cluster_64->num_mvs) +
4118
115k
                              ((LWORD64)mvy_inp_q8 * ps_cluster_data->num_mvs);
4119
4120
115k
            ps_cur_cluster_64->num_mvs += ps_cluster_data->num_mvs;
4121
4122
115k
            ps_centroid->i4_pos_x_q8 = (WORD32)(i8_updated_posx / ps_cur_cluster_64->num_mvs);
4123
115k
            ps_centroid->i4_pos_y_q8 = (WORD32)(i8_updated_posy / ps_cur_cluster_64->num_mvs);
4124
115k
        }
4125
47.7k
        else if(num_clusters < MAX_NUM_CLUSTERS_64x64)
4126
46.0k
        {
4127
46.0k
            ps_cur_cluster_64 = &ps_blk_64x64->as_cluster_data[num_clusters];
4128
4129
46.0k
            ps_blk_64x64->num_clusters++;
4130
46.0k
            ps_blk_64x64->au1_num_clusters[ps_cluster_data->ref_id]++;
4131
4132
46.0k
            ps_cur_cluster_64->is_valid_cluster = 1;
4133
4134
46.0k
            ps_cur_cluster_64->area_in_pixels = ps_cluster_data->area_in_pixels;
4135
46.0k
            ps_cur_cluster_64->bi_mv_pixel_area += ps_cluster_data->bi_mv_pixel_area;
4136
46.0k
            ps_cur_cluster_64->uni_mv_pixel_area += ps_cluster_data->uni_mv_pixel_area;
4137
4138
46.0k
            memcpy(
4139
46.0k
                &ps_cur_cluster_64->as_mv[0],
4140
46.0k
                ps_cluster_data->as_mv,
4141
46.0k
                sizeof(mv_data_t) * ps_cluster_data->num_mvs);
4142
4143
46.0k
            ps_cur_cluster_64->num_mvs = ps_cluster_data->num_mvs;
4144
4145
46.0k
            ps_cur_cluster_64->ref_id = ps_cluster_data->ref_id;
4146
4147
46.0k
            ps_cur_cluster_64->max_x = ps_cluster_data->max_x;
4148
46.0k
            ps_cur_cluster_64->max_y = ps_cluster_data->max_y;
4149
46.0k
            ps_cur_cluster_64->min_x = ps_cluster_data->min_x;
4150
46.0k
            ps_cur_cluster_64->min_y = ps_cluster_data->min_y;
4151
4152
46.0k
            ps_cur_cluster_64->s_centroid = ps_cluster_data->s_centroid;
4153
46.0k
        }
4154
163k
    }
4155
194k
}
4156
4157
/**
4158
********************************************************************************
4159
*  @fn   void hme_update_32x32_clusters
4160
*               (
4161
*                   cluster_32x32_blk_t *ps_blk_32x32,
4162
*                   cluster_16x16_blk_t *ps_blk_16x16
4163
*               )
4164
*
4165
*  @brief  Updates attributes for 32x32 clusters based on the attributes of
4166
*          the constituent 16x16 clusters
4167
*
4168
*  @param[out]  ps_blk_32x32: structure containing 32x32 block results
4169
*
4170
*  @param[in]  ps_blk_16x16 : structure containing 16x16 block results
4171
*
4172
*  @return None
4173
********************************************************************************
4174
*/
4175
static __inline void
4176
    hme_update_32x32_clusters(cluster_32x32_blk_t *ps_blk_32x32, cluster_16x16_blk_t *ps_blk_16x16)
4177
123k
{
4178
123k
    cluster_16x16_blk_t *ps_blk_16x16_cur;
4179
123k
    cluster_data_t *ps_cur_cluster;
4180
4181
123k
    S32 i, j;
4182
123k
    S32 num_clusters_cur_16x16_blk;
4183
4184
616k
    for(i = 0; i < 4; i++)
4185
493k
    {
4186
493k
        S32 num_clusters_evaluated = 0;
4187
4188
493k
        ps_blk_16x16_cur = &ps_blk_16x16[i];
4189
4190
493k
        num_clusters_cur_16x16_blk = ps_blk_16x16_cur->num_clusters;
4191
4192
493k
        ps_blk_32x32->intra_mv_area += ps_blk_16x16_cur->intra_mv_area;
4193
4194
493k
        ps_blk_32x32->best_inter_cost += ps_blk_16x16_cur->best_inter_cost;
4195
4196
1.01M
        for(j = 0; num_clusters_evaluated < num_clusters_cur_16x16_blk; j++)
4197
521k
        {
4198
521k
            ps_cur_cluster = &ps_blk_16x16_cur->as_cluster_data[j];
4199
4200
521k
            if(!ps_cur_cluster->is_valid_cluster)
4201
5
            {
4202
5
                continue;
4203
5
            }
4204
4205
521k
            hme_update_32x32_cluster_attributes(ps_blk_32x32, ps_cur_cluster);
4206
4207
521k
            num_clusters_evaluated++;
4208
521k
        }
4209
493k
    }
4210
123k
}
4211
4212
/**
4213
********************************************************************************
4214
*  @fn   void hme_update_64x64_clusters
4215
*               (
4216
*                   cluster_64x64_blk_t *ps_blk_64x64,
4217
*                   cluster_32x32_blk_t *ps_blk_32x32
4218
*               )
4219
*
4220
*  @brief  Updates attributes for 64x64 clusters based on the attributes of
4221
*          the constituent 16x16 clusters
4222
*
4223
*  @param[out]  ps_blk_64x64: structure containing 32x32 block results
4224
*
4225
*  @param[in]  ps_blk_32x32 : structure containing 16x16 block results
4226
*
4227
*  @return None
4228
********************************************************************************
4229
*/
4230
static __inline void
4231
    hme_update_64x64_clusters(cluster_64x64_blk_t *ps_blk_64x64, cluster_32x32_blk_t *ps_blk_32x32)
4232
32.5k
{
4233
32.5k
    cluster_32x32_blk_t *ps_blk_32x32_cur;
4234
32.5k
    cluster_data_t *ps_cur_cluster;
4235
4236
32.5k
    S32 i, j;
4237
32.5k
    S32 num_clusters_cur_32x32_blk;
4238
4239
162k
    for(i = 0; i < 4; i++)
4240
130k
    {
4241
130k
        S32 num_clusters_evaluated = 0;
4242
4243
130k
        ps_blk_32x32_cur = &ps_blk_32x32[i];
4244
4245
130k
        num_clusters_cur_32x32_blk = ps_blk_32x32_cur->num_clusters;
4246
4247
130k
        ps_blk_64x64->intra_mv_area += ps_blk_32x32_cur->intra_mv_area;
4248
130k
        ps_blk_64x64->best_inter_cost += ps_blk_32x32_cur->best_inter_cost;
4249
4250
324k
        for(j = 0; num_clusters_evaluated < num_clusters_cur_32x32_blk; j++)
4251
194k
        {
4252
194k
            ps_cur_cluster = &ps_blk_32x32_cur->as_cluster_data[j];
4253
4254
194k
            if(!ps_cur_cluster->is_valid_cluster)
4255
310
            {
4256
310
                continue;
4257
310
            }
4258
4259
194k
            hme_update_64x64_cluster_attributes(ps_blk_64x64, ps_cur_cluster);
4260
4261
194k
            num_clusters_evaluated++;
4262
194k
        }
4263
130k
    }
4264
32.5k
}
4265
4266
/**
4267
********************************************************************************
4268
*  @fn   void hme_try_merge_clusters_blksize_gt_16
4269
*               (
4270
*                   cluster_data_t *ps_cluster_data,
4271
*                   S32 num_clusters
4272
*               )
4273
*
4274
*  @brief  Merging clusters from blocks of size 32x32 and greater
4275
*
4276
*  @param[in/out]  ps_cluster_data: structure containing cluster data
4277
*
4278
*  @param[in/out]  pi4_num_clusters : pointer to number of clusters
4279
*
4280
*  @return Success or failure
4281
********************************************************************************
4282
*/
4283
S32 hme_try_merge_clusters_blksize_gt_16(cluster_data_t *ps_cluster_data, S32 num_clusters)
4284
50.4k
{
4285
50.4k
    centroid_t *ps_cur_centroid;
4286
50.4k
    cluster_data_t *ps_cur_cluster;
4287
4288
50.4k
    S32 i, mvd;
4289
50.4k
    S32 mvdx, mvdy, mvdx_q8, mvdy_q8;
4290
4291
50.4k
    centroid_t *ps_centroid = &ps_cluster_data->s_centroid;
4292
4293
50.4k
    S32 mvd_limit = ps_cluster_data->max_dist_from_centroid;
4294
50.4k
    S32 ref_id = ps_cluster_data->ref_id;
4295
4296
50.4k
    S32 node0_posx_q8 = ps_centroid->i4_pos_x_q8;
4297
50.4k
    S32 node0_posy_q8 = ps_centroid->i4_pos_y_q8;
4298
50.4k
    S32 num_clusters_evaluated = 1;
4299
50.4k
    S32 ret_value = 0;
4300
4301
50.4k
    if(1 >= num_clusters)
4302
7.95k
    {
4303
7.95k
        return ret_value;
4304
7.95k
    }
4305
4306
186k
    for(i = 1; num_clusters_evaluated < num_clusters; i++)
4307
144k
    {
4308
144k
        S32 cur_posx_q8;
4309
144k
        S32 cur_posy_q8;
4310
4311
144k
        ps_cur_cluster = &ps_cluster_data[i];
4312
4313
144k
        if((ref_id != ps_cur_cluster->ref_id))
4314
80.2k
        {
4315
80.2k
            num_clusters_evaluated++;
4316
80.2k
            continue;
4317
80.2k
        }
4318
4319
64.2k
        if((!ps_cur_cluster->is_valid_cluster))
4320
3
        {
4321
3
            continue;
4322
3
        }
4323
4324
64.2k
        num_clusters_evaluated++;
4325
4326
64.2k
        ps_cur_centroid = &ps_cur_cluster->s_centroid;
4327
4328
64.2k
        cur_posx_q8 = ps_cur_centroid->i4_pos_x_q8;
4329
64.2k
        cur_posy_q8 = ps_cur_centroid->i4_pos_y_q8;
4330
4331
64.2k
        mvdx_q8 = cur_posx_q8 - node0_posx_q8;
4332
64.2k
        mvdy_q8 = cur_posy_q8 - node0_posy_q8;
4333
4334
64.2k
        mvdx = (mvdx_q8 + (1 << 7)) >> 8;
4335
64.2k
        mvdy = (mvdy_q8 + (1 << 7)) >> 8;
4336
4337
64.2k
        mvd = ABS(mvdx) + ABS(mvdy);
4338
4339
64.2k
        if(mvd <= (mvd_limit >> 1))
4340
9
        {
4341
9
            LWORD64 i8_updated_posx;
4342
9
            LWORD64 i8_updated_posy;
4343
9
            WORD32 minmax_updated_x = 0;
4344
9
            WORD32 minmax_updated_y = 0;
4345
4346
9
            ps_cur_cluster->is_valid_cluster = 0;
4347
4348
9
            ps_cluster_data->area_in_pixels += ps_cur_cluster->area_in_pixels;
4349
9
            ps_cluster_data->bi_mv_pixel_area += ps_cur_cluster->bi_mv_pixel_area;
4350
9
            ps_cluster_data->uni_mv_pixel_area += ps_cur_cluster->uni_mv_pixel_area;
4351
4352
9
            memcpy(
4353
9
                &ps_cluster_data->as_mv[ps_cluster_data->num_mvs],
4354
9
                ps_cur_cluster->as_mv,
4355
9
                sizeof(mv_data_t) * ps_cur_cluster->num_mvs);
4356
4357
9
            if(mvdx > 0)
4358
3
            {
4359
3
                ps_cluster_data->min_x = (cur_posx_q8 + (1 << 7)) >> 8;
4360
3
                minmax_updated_x = 1;
4361
3
            }
4362
6
            else
4363
6
            {
4364
6
                ps_cluster_data->max_x = (cur_posx_q8 + (1 << 7)) >> 8;
4365
6
                minmax_updated_x = 2;
4366
6
            }
4367
4368
9
            if(mvdy > 0)
4369
4
            {
4370
4
                ps_cluster_data->min_y = (cur_posy_q8 + (1 << 7)) >> 8;
4371
4
                minmax_updated_y = 1;
4372
4
            }
4373
5
            else
4374
5
            {
4375
5
                ps_cluster_data->max_y = (cur_posy_q8 + (1 << 7)) >> 8;
4376
5
                minmax_updated_y = 2;
4377
5
            }
4378
4379
9
            switch((minmax_updated_y << 2) + minmax_updated_x)
4380
9
            {
4381
0
            case 1:
4382
0
            {
4383
0
                S32 mvd, mvd_q8;
4384
4385
0
                mvd_q8 = ps_cur_centroid->i4_pos_x_q8 - (ps_cluster_data->min_x << 8);
4386
0
                mvd = (mvd_q8 + (1 << 7)) >> 8;
4387
4388
0
                if(mvd > (mvd_limit))
4389
0
                {
4390
0
                    ps_cluster_data->max_dist_from_centroid = mvd;
4391
0
                }
4392
0
                break;
4393
0
            }
4394
0
            case 2:
4395
0
            {
4396
0
                S32 mvd, mvd_q8;
4397
4398
0
                mvd_q8 = (ps_cluster_data->max_x << 8) - ps_cur_centroid->i4_pos_x_q8;
4399
0
                mvd = (mvd_q8 + (1 << 7)) >> 8;
4400
4401
0
                if(mvd > (mvd_limit))
4402
0
                {
4403
0
                    ps_cluster_data->max_dist_from_centroid = mvd;
4404
0
                }
4405
0
                break;
4406
0
            }
4407
0
            case 4:
4408
0
            {
4409
0
                S32 mvd, mvd_q8;
4410
4411
0
                mvd_q8 = ps_cur_centroid->i4_pos_y_q8 - (ps_cluster_data->min_y << 8);
4412
0
                mvd = (mvd_q8 + (1 << 7)) >> 8;
4413
4414
0
                if(mvd > (mvd_limit))
4415
0
                {
4416
0
                    ps_cluster_data->max_dist_from_centroid = mvd;
4417
0
                }
4418
0
                break;
4419
0
            }
4420
2
            case 5:
4421
2
            {
4422
2
                S32 mvd;
4423
2
                S32 mvdx, mvdx_q8;
4424
2
                S32 mvdy, mvdy_q8;
4425
4426
2
                mvdy_q8 = ps_cur_centroid->i4_pos_y_q8 - (ps_cluster_data->min_y << 8);
4427
2
                mvdy = (mvdy_q8 + (1 << 7)) >> 8;
4428
4429
2
                mvdx_q8 = ps_cur_centroid->i4_pos_x_q8 - (ps_cluster_data->min_x << 8);
4430
2
                mvdx = (mvdx_q8 + (1 << 7)) >> 8;
4431
4432
2
                mvd = (mvdx > mvdy) ? mvdx : mvdy;
4433
4434
2
                if(mvd > mvd_limit)
4435
0
                {
4436
0
                    ps_cluster_data->max_dist_from_centroid = mvd;
4437
0
                }
4438
2
                break;
4439
0
            }
4440
2
            case 6:
4441
2
            {
4442
2
                S32 mvd;
4443
2
                S32 mvdx, mvdx_q8;
4444
2
                S32 mvdy, mvdy_q8;
4445
4446
2
                mvdy_q8 = ps_cur_centroid->i4_pos_y_q8 - (ps_cluster_data->min_y << 8);
4447
2
                mvdy = (mvdy_q8 + (1 << 7)) >> 8;
4448
4449
2
                mvdx_q8 = (ps_cluster_data->max_x << 8) - ps_cur_centroid->i4_pos_x_q8;
4450
2
                mvdx = (mvdx_q8 + (1 << 7)) >> 8;
4451
4452
2
                mvd = (mvdx > mvdy) ? mvdx : mvdy;
4453
4454
2
                if(mvd > mvd_limit)
4455
0
                {
4456
0
                    ps_cluster_data->max_dist_from_centroid = mvd;
4457
0
                }
4458
2
                break;
4459
0
            }
4460
0
            case 8:
4461
0
            {
4462
0
                S32 mvd, mvd_q8;
4463
4464
0
                mvd_q8 = (ps_cluster_data->max_y << 8) - ps_cur_centroid->i4_pos_y_q8;
4465
0
                mvd = (mvd_q8 + (1 << 7)) >> 8;
4466
4467
0
                if(mvd > (mvd_limit))
4468
0
                {
4469
0
                    ps_cluster_data->max_dist_from_centroid = mvd;
4470
0
                }
4471
0
                break;
4472
0
            }
4473
1
            case 9:
4474
1
            {
4475
1
                S32 mvd;
4476
1
                S32 mvdx, mvdx_q8;
4477
1
                S32 mvdy, mvdy_q8;
4478
4479
1
                mvdx_q8 = ps_cur_centroid->i4_pos_x_q8 - (ps_cluster_data->min_x << 8);
4480
1
                mvdx = (mvdx_q8 + (1 << 7)) >> 8;
4481
4482
1
                mvdy_q8 = (ps_cluster_data->max_y << 8) - ps_cur_centroid->i4_pos_y_q8;
4483
1
                mvdy = (mvdy_q8 + (1 << 7)) >> 8;
4484
4485
1
                mvd = (mvdx > mvdy) ? mvdx : mvdy;
4486
4487
1
                if(mvd > mvd_limit)
4488
0
                {
4489
0
                    ps_cluster_data->max_dist_from_centroid = mvd;
4490
0
                }
4491
1
                break;
4492
0
            }
4493
4
            case 10:
4494
4
            {
4495
4
                S32 mvd;
4496
4
                S32 mvdx, mvdx_q8;
4497
4
                S32 mvdy, mvdy_q8;
4498
4499
4
                mvdx_q8 = (ps_cluster_data->max_x << 8) - ps_cur_centroid->i4_pos_x_q8;
4500
4
                mvdx = (mvdx_q8 + (1 << 7)) >> 8;
4501
4502
4
                mvdy_q8 = (ps_cluster_data->max_y << 8) - ps_cur_centroid->i4_pos_y_q8;
4503
4
                mvdy = (mvdy_q8 + (1 << 7)) >> 8;
4504
4505
4
                mvd = (mvdx > mvdy) ? mvdx : mvdy;
4506
4507
4
                if(mvd > ps_cluster_data->max_dist_from_centroid)
4508
0
                {
4509
0
                    ps_cluster_data->max_dist_from_centroid = mvd;
4510
0
                }
4511
4
                break;
4512
0
            }
4513
0
            default:
4514
0
            {
4515
0
                break;
4516
0
            }
4517
9
            }
4518
4519
9
            i8_updated_posx = ((LWORD64)ps_centroid->i4_pos_x_q8 * ps_cluster_data->num_mvs) +
4520
9
                              ((LWORD64)cur_posx_q8 * ps_cur_cluster->num_mvs);
4521
9
            i8_updated_posy = ((LWORD64)ps_centroid->i4_pos_y_q8 * ps_cluster_data->num_mvs) +
4522
9
                              ((LWORD64)cur_posy_q8 * ps_cur_cluster->num_mvs);
4523
4524
9
            ps_cluster_data->num_mvs += ps_cur_cluster->num_mvs;
4525
4526
9
            ps_centroid->i4_pos_x_q8 = (WORD32)(i8_updated_posx / ps_cluster_data->num_mvs);
4527
9
            ps_centroid->i4_pos_y_q8 = (WORD32)(i8_updated_posy / ps_cluster_data->num_mvs);
4528
4529
9
            if(MAX_NUM_CLUSTERS_IN_VALID_64x64_BLK >= num_clusters)
4530
6
            {
4531
6
                num_clusters--;
4532
6
                num_clusters_evaluated = 1;
4533
6
                i = 0;
4534
6
                ret_value++;
4535
6
            }
4536
3
            else
4537
3
            {
4538
3
                ret_value++;
4539
4540
3
                return ret_value;
4541
3
            }
4542
9
        }
4543
64.2k
    }
4544
4545
42.4k
    if(ret_value)
4546
6
    {
4547
8
        for(i = 1; i < (num_clusters + ret_value); i++)
4548
7
        {
4549
7
            if(ps_cluster_data[i].is_valid_cluster)
4550
5
            {
4551
5
                break;
4552
5
            }
4553
7
        }
4554
6
        if(i == (num_clusters + ret_value))
4555
1
        {
4556
1
            return ret_value;
4557
1
        }
4558
6
    }
4559
42.4k
    else
4560
42.4k
    {
4561
42.4k
        i = 1;
4562
42.4k
    }
4563
4564
42.4k
    return (hme_try_merge_clusters_blksize_gt_16(&ps_cluster_data[i], num_clusters - 1)) +
4565
42.4k
           ret_value;
4566
42.4k
}
4567
4568
/**
4569
********************************************************************************
4570
*  @fn   S32 hme_determine_validity_32x32
4571
*               (
4572
*                   ctb_cluster_info_t *ps_ctb_cluster_info
4573
*               )
4574
*
4575
*  @brief  Determines whther current 32x32 block needs to be evaluated in enc_loop
4576
*           while recursing through the CU tree or not
4577
*
4578
*  @param[in]  ps_cluster_data: structure containing cluster data
4579
*
4580
*  @return Success or failure
4581
********************************************************************************
4582
*/
4583
__inline S32 hme_determine_validity_32x32(
4584
    ctb_cluster_info_t *ps_ctb_cluster_info,
4585
    S32 *pi4_children_nodes_required,
4586
    S32 blk_validity_wrt_pic_bndry,
4587
    S32 parent_blk_validity_wrt_pic_bndry)
4588
0
{
4589
0
    cluster_data_t *ps_data;
4590
0
4591
0
    cluster_32x32_blk_t *ps_32x32_blk = ps_ctb_cluster_info->ps_32x32_blk;
4592
0
    cluster_64x64_blk_t *ps_64x64_blk = ps_ctb_cluster_info->ps_64x64_blk;
4593
0
4594
0
    S32 num_clusters = ps_32x32_blk->num_clusters;
4595
0
    S32 num_clusters_parent = ps_64x64_blk->num_clusters;
4596
0
4597
0
    if(!blk_validity_wrt_pic_bndry)
4598
0
    {
4599
0
        *pi4_children_nodes_required = 1;
4600
0
        return 0;
4601
0
    }
4602
0
4603
0
    if(!parent_blk_validity_wrt_pic_bndry)
4604
0
    {
4605
0
        *pi4_children_nodes_required = 1;
4606
0
        return 1;
4607
0
    }
4608
0
4609
0
    if(num_clusters > MAX_NUM_CLUSTERS_IN_VALID_32x32_BLK)
4610
0
    {
4611
0
        *pi4_children_nodes_required = 1;
4612
0
        return 0;
4613
0
    }
4614
0
4615
0
    if(num_clusters_parent > MAX_NUM_CLUSTERS_IN_VALID_64x64_BLK)
4616
0
    {
4617
0
        *pi4_children_nodes_required = 1;
4618
0
4619
0
        return 1;
4620
0
    }
4621
0
    else if(num_clusters_parent < MAX_NUM_CLUSTERS_IN_VALID_64x64_BLK)
4622
0
    {
4623
0
        *pi4_children_nodes_required = 0;
4624
0
4625
0
        return 1;
4626
0
    }
4627
0
    else
4628
0
    {
4629
0
        if(num_clusters < MAX_NUM_CLUSTERS_IN_VALID_32x32_BLK)
4630
0
        {
4631
0
            *pi4_children_nodes_required = 0;
4632
0
            return 1;
4633
0
        }
4634
0
        else
4635
0
        {
4636
0
            S32 i;
4637
0
4638
0
            S32 area_of_parent = gai4_partition_area[PART_ID_2Nx2N] << 4;
4639
0
            S32 min_area = MAX_32BIT_VAL;
4640
0
            S32 num_clusters_evaluated = 0;
4641
0
4642
0
            for(i = 0; num_clusters_evaluated < num_clusters; i++)
4643
0
            {
4644
0
                ps_data = &ps_32x32_blk->as_cluster_data[i];
4645
0
4646
0
                if(!ps_data->is_valid_cluster)
4647
0
                {
4648
0
                    continue;
4649
0
                }
4650
0
4651
0
                num_clusters_evaluated++;
4652
0
4653
0
                if(ps_data->area_in_pixels < min_area)
4654
0
                {
4655
0
                    min_area = ps_data->area_in_pixels;
4656
0
                }
4657
0
            }
4658
0
4659
0
            if((min_area << 4) < area_of_parent)
4660
0
            {
4661
0
                *pi4_children_nodes_required = 1;
4662
0
                return 0;
4663
0
            }
4664
0
            else
4665
0
            {
4666
0
                *pi4_children_nodes_required = 0;
4667
0
                return 1;
4668
0
            }
4669
0
        }
4670
0
    }
4671
0
}
4672
4673
/**
4674
********************************************************************************
4675
*  @fn   S32 hme_determine_validity_16x16
4676
*               (
4677
*                   ctb_cluster_info_t *ps_ctb_cluster_info
4678
*               )
4679
*
4680
*  @brief  Determines whther current 16x16 block needs to be evaluated in enc_loop
4681
*           while recursing through the CU tree or not
4682
*
4683
*  @param[in]  ps_cluster_data: structure containing cluster data
4684
*
4685
*  @return Success or failure
4686
********************************************************************************
4687
*/
4688
__inline S32 hme_determine_validity_16x16(
4689
    ctb_cluster_info_t *ps_ctb_cluster_info,
4690
    S32 *pi4_children_nodes_required,
4691
    S32 blk_validity_wrt_pic_bndry,
4692
    S32 parent_blk_validity_wrt_pic_bndry)
4693
0
{
4694
0
    cluster_data_t *ps_data;
4695
0
4696
0
    cluster_16x16_blk_t *ps_16x16_blk = ps_ctb_cluster_info->ps_16x16_blk;
4697
0
    cluster_32x32_blk_t *ps_32x32_blk = ps_ctb_cluster_info->ps_32x32_blk;
4698
0
    cluster_64x64_blk_t *ps_64x64_blk = ps_ctb_cluster_info->ps_64x64_blk;
4699
0
4700
0
    S32 num_clusters = ps_16x16_blk->num_clusters;
4701
0
    S32 num_clusters_parent = ps_32x32_blk->num_clusters;
4702
0
    S32 num_clusters_grandparent = ps_64x64_blk->num_clusters;
4703
0
4704
0
    if(!blk_validity_wrt_pic_bndry)
4705
0
    {
4706
0
        *pi4_children_nodes_required = 1;
4707
0
        return 0;
4708
0
    }
4709
0
4710
0
    if(!parent_blk_validity_wrt_pic_bndry)
4711
0
    {
4712
0
        *pi4_children_nodes_required = 1;
4713
0
        return 1;
4714
0
    }
4715
0
4716
0
    if((num_clusters_parent > MAX_NUM_CLUSTERS_IN_VALID_32x32_BLK) &&
4717
0
       (num_clusters_grandparent > MAX_NUM_CLUSTERS_IN_VALID_64x64_BLK))
4718
0
    {
4719
0
        *pi4_children_nodes_required = 1;
4720
0
        return 1;
4721
0
    }
4722
0
4723
0
    /* Implies nc_64 <= 3 when num_clusters_parent > 3 & */
4724
0
    /* implies nc_64 > 3 when num_clusters_parent < 3 & */
4725
0
    if(num_clusters_parent != MAX_NUM_CLUSTERS_IN_VALID_32x32_BLK)
4726
0
    {
4727
0
        if(num_clusters <= MAX_NUM_CLUSTERS_IN_VALID_16x16_BLK)
4728
0
        {
4729
0
            *pi4_children_nodes_required = 0;
4730
0
4731
0
            return 1;
4732
0
        }
4733
0
        else
4734
0
        {
4735
0
            *pi4_children_nodes_required = 1;
4736
0
4737
0
            return 0;
4738
0
        }
4739
0
    }
4740
0
    /* Implies nc_64 >= 3 */
4741
0
    else
4742
0
    {
4743
0
        if(num_clusters < MAX_NUM_CLUSTERS_IN_VALID_16x16_BLK)
4744
0
        {
4745
0
            *pi4_children_nodes_required = 0;
4746
0
            return 1;
4747
0
        }
4748
0
        else if(num_clusters > MAX_NUM_CLUSTERS_IN_VALID_16x16_BLK)
4749
0
        {
4750
0
            *pi4_children_nodes_required = 1;
4751
0
            return 0;
4752
0
        }
4753
0
        else
4754
0
        {
4755
0
            S32 i;
4756
0
4757
0
            S32 area_of_parent = gai4_partition_area[PART_ID_2Nx2N] << 2;
4758
0
            S32 min_area = MAX_32BIT_VAL;
4759
0
            S32 num_clusters_evaluated = 0;
4760
0
4761
0
            for(i = 0; num_clusters_evaluated < num_clusters; i++)
4762
0
            {
4763
0
                ps_data = &ps_16x16_blk->as_cluster_data[i];
4764
0
4765
0
                if(!ps_data->is_valid_cluster)
4766
0
                {
4767
0
                    continue;
4768
0
                }
4769
0
4770
0
                num_clusters_evaluated++;
4771
0
4772
0
                if(ps_data->area_in_pixels < min_area)
4773
0
                {
4774
0
                    min_area = ps_data->area_in_pixels;
4775
0
                }
4776
0
            }
4777
0
4778
0
            if((min_area << 4) < area_of_parent)
4779
0
            {
4780
0
                *pi4_children_nodes_required = 1;
4781
0
                return 0;
4782
0
            }
4783
0
            else
4784
0
            {
4785
0
                *pi4_children_nodes_required = 0;
4786
0
                return 1;
4787
0
            }
4788
0
        }
4789
0
    }
4790
0
}
4791
4792
/**
4793
********************************************************************************
4794
*  @fn   void hme_build_cu_tree
4795
*               (
4796
*                   ctb_cluster_info_t *ps_ctb_cluster_info,
4797
*                   cur_ctb_cu_tree_t *ps_cu_tree,
4798
*                   S32 tree_depth,
4799
*                   CU_POS_T e_grand_parent_blk_pos,
4800
*                   CU_POS_T e_parent_blk_pos,
4801
*                   CU_POS_T e_cur_blk_pos
4802
*               )
4803
*
4804
*  @brief  Recursive function for CU tree initialisation
4805
*
4806
*  @param[in]  ps_ctb_cluster_info: structure containing pointers to clusters
4807
*                                   corresponding to all block sizes from 64x64
4808
*                                   to 16x16
4809
*
4810
*  @param[in]  e_parent_blk_pos: position of parent block wrt its parent, if
4811
*                                applicable
4812
*
4813
*  @param[in]  e_cur_blk_pos: position of current block wrt parent
4814
*
4815
*  @param[out]  ps_cu_tree : represents CU tree used in CU recursion
4816
*
4817
*  @param[in]  tree_depth : specifies depth of the CU tree
4818
*
4819
*  @return Nothing
4820
********************************************************************************
4821
*/
4822
void hme_build_cu_tree(
4823
    ctb_cluster_info_t *ps_ctb_cluster_info,
4824
    cur_ctb_cu_tree_t *ps_cu_tree,
4825
    S32 tree_depth,
4826
    CU_POS_T e_grandparent_blk_pos,
4827
    CU_POS_T e_parent_blk_pos,
4828
    CU_POS_T e_cur_blk_pos)
4829
32.5k
{
4830
32.5k
    ihevce_cu_tree_init(
4831
32.5k
        ps_cu_tree,
4832
32.5k
        ps_ctb_cluster_info->ps_cu_tree_root,
4833
32.5k
        &ps_ctb_cluster_info->nodes_created_in_cu_tree,
4834
32.5k
        tree_depth,
4835
32.5k
        e_grandparent_blk_pos,
4836
32.5k
        e_parent_blk_pos,
4837
32.5k
        e_cur_blk_pos);
4838
32.5k
}
4839
4840
/**
4841
********************************************************************************
4842
*  @fn   S32 hme_sdi_based_cluster_spread_eligibility
4843
*               (
4844
*                   cluster_32x32_blk_t *ps_blk_32x32
4845
*               )
4846
*
4847
*  @brief  Determines whether the spread of high SDI MV's around each cluster
4848
*          center is below a pre-determined threshold
4849
*
4850
*  @param[in]  ps_blk_32x32: structure containing pointers to clusters
4851
*                                   corresponding to all block sizes from 64x64
4852
*                                   to 16x16
4853
*
4854
*  @return 1 if the spread is constrained, else 0
4855
********************************************************************************
4856
*/
4857
__inline S32
4858
    hme_sdi_based_cluster_spread_eligibility(cluster_32x32_blk_t *ps_blk_32x32, S32 sdi_threshold)
4859
0
{
4860
0
    S32 cumulative_mv_distance;
4861
0
    S32 i, j;
4862
0
    S32 num_high_sdi_mvs;
4863
0
4864
0
    S32 num_clusters = ps_blk_32x32->num_clusters;
4865
0
4866
0
    for(i = 0; i < num_clusters; i++)
4867
0
    {
4868
0
        cluster_data_t *ps_data = &ps_blk_32x32->as_cluster_data[i];
4869
0
4870
0
        num_high_sdi_mvs = 0;
4871
0
        cumulative_mv_distance = 0;
4872
0
4873
0
        for(j = 0; j < ps_data->num_mvs; j++)
4874
0
        {
4875
0
            mv_data_t *ps_mv = &ps_data->as_mv[j];
4876
0
4877
0
            if(ps_mv->sdi >= sdi_threshold)
4878
0
            {
4879
0
                num_high_sdi_mvs++;
4880
0
4881
0
                COMPUTE_MVD(ps_mv, ps_data, cumulative_mv_distance);
4882
0
            }
4883
0
        }
4884
0
4885
0
        if(cumulative_mv_distance > ((ps_data->max_dist_from_centroid >> 1) * num_high_sdi_mvs))
4886
0
        {
4887
0
            return 0;
4888
0
        }
4889
0
    }
4890
0
4891
0
    return 1;
4892
0
}
4893
4894
/**
4895
********************************************************************************
4896
*  @fn   S32 hme_populate_cu_tree
4897
*               (
4898
*                   ctb_cluster_info_t *ps_ctb_cluster_info,
4899
*                   ipe_l0_ctb_analyse_for_me_t *ps_cur_ipe_ctb,
4900
*                   cur_ctb_cu_tree_t *ps_cu_tree,
4901
*                   S32 tree_depth,
4902
*                   CU_POS_T e_parent_blk_pos,
4903
*                   CU_POS_T e_cur_blk_pos
4904
*               )
4905
*
4906
*  @brief  Recursive function for CU tree population based on output of
4907
*          clustering algorithm
4908
*
4909
*  @param[in]  ps_ctb_cluster_info: structure containing pointers to clusters
4910
*                                   corresponding to all block sizes from 64x64
4911
*                                   to 16x16
4912
*
4913
*  @param[in]  e_parent_blk_pos: position of parent block wrt its parent, if
4914
applicable
4915
*
4916
*  @param[in]  e_cur_blk_pos: position of current block wrt parent
4917
*
4918
*  @param[in]  ps_cur_ipe_ctb : output container for ipe analyses
4919
*
4920
*  @param[out]  ps_cu_tree : represents CU tree used in CU recursion
4921
*
4922
*  @param[in]  tree_depth : specifies depth of the CU tree
4923
*
4924
*  @param[in]  ipe_decision_precedence : specifies whether precedence should
4925
*               be given to decisions made either by IPE(1) or clustering algos.
4926
*
4927
*  @return 1 if re-evaluation of parent node's validity is not required,
4928
else 0
4929
********************************************************************************
4930
*/
4931
void hme_populate_cu_tree(
4932
    ctb_cluster_info_t *ps_ctb_cluster_info,
4933
    cur_ctb_cu_tree_t *ps_cu_tree,
4934
    S32 tree_depth,
4935
    ME_QUALITY_PRESETS_T e_quality_preset,
4936
    CU_POS_T e_grandparent_blk_pos,
4937
    CU_POS_T e_parent_blk_pos,
4938
    CU_POS_T e_cur_blk_pos)
4939
794k
{
4940
794k
    S32 area_of_cur_blk;
4941
794k
    S32 area_limit_for_me_decision_precedence;
4942
794k
    S32 children_nodes_required;
4943
794k
    S32 intra_mv_area;
4944
794k
    S32 intra_eval_enable;
4945
794k
    S32 inter_eval_enable;
4946
794k
    S32 ipe_decision_precedence;
4947
794k
    S32 node_validity;
4948
794k
    S32 num_clusters;
4949
4950
794k
    ipe_l0_ctb_analyse_for_me_t *ps_cur_ipe_ctb = ps_ctb_cluster_info->ps_cur_ipe_ctb;
4951
4952
794k
    if(NULL == ps_cu_tree)
4953
0
    {
4954
0
        return;
4955
0
    }
4956
4957
794k
    switch(tree_depth)
4958
794k
    {
4959
32.5k
    case 0:
4960
32.5k
    {
4961
        /* 64x64 block */
4962
32.5k
        S32 blk_32x32_mask = ps_ctb_cluster_info->blk_32x32_mask;
4963
4964
32.5k
        cluster_64x64_blk_t *ps_blk_64x64 = ps_ctb_cluster_info->ps_64x64_blk;
4965
4966
32.5k
        area_of_cur_blk = gai4_partition_area[PART_ID_2Nx2N] << 4;
4967
32.5k
        area_limit_for_me_decision_precedence = (area_of_cur_blk * MAX_INTRA_PERCENTAGE) / 100;
4968
32.5k
        children_nodes_required = 0;
4969
32.5k
        intra_mv_area = ps_blk_64x64->intra_mv_area;
4970
4971
32.5k
        ipe_decision_precedence = (intra_mv_area >= area_limit_for_me_decision_precedence);
4972
4973
32.5k
        intra_eval_enable = ipe_decision_precedence;
4974
32.5k
        inter_eval_enable = !!ps_blk_64x64->num_clusters;
4975
4976
32.5k
#if 1  //!PROCESS_GT_1CTB_VIA_CU_RECUR_IN_FAST_PRESETS
4977
32.5k
        if(e_quality_preset >= ME_HIGH_QUALITY)
4978
0
        {
4979
0
            inter_eval_enable = 1;
4980
0
            node_validity = (blk_32x32_mask == 0xf);
4981
0
#if !USE_CLUSTER_DATA_AS_BLK_MERGE_CANDTS
4982
0
            ps_cu_tree->u1_inter_eval_enable = !(intra_mv_area == area_of_cur_blk);
4983
0
#endif
4984
0
            break;
4985
0
        }
4986
32.5k
#endif
4987
4988
32.5k
#if ENABLE_4CTB_EVALUATION
4989
32.5k
        node_validity = (blk_32x32_mask == 0xf);
4990
4991
32.5k
        break;
4992
#else
4993
        {
4994
            S32 i;
4995
4996
            num_clusters = ps_blk_64x64->num_clusters;
4997
4998
            node_validity = (ipe_decision_precedence)
4999
                                ? (!ps_cur_ipe_ctb->u1_split_flag)
5000
                                : (num_clusters <= MAX_NUM_CLUSTERS_IN_VALID_64x64_BLK);
5001
5002
            for(i = 0; i < MAX_NUM_REF; i++)
5003
            {
5004
                node_validity = node_validity && (ps_blk_64x64->au1_num_clusters[i] <=
5005
                                                  MAX_NUM_CLUSTERS_IN_ONE_REF_IDX);
5006
            }
5007
5008
            node_validity = node_validity && (blk_32x32_mask == 0xf);
5009
        }
5010
        break;
5011
#endif
5012
32.5k
    }
5013
130k
    case 1:
5014
130k
    {
5015
        /* 32x32 block */
5016
130k
        S32 is_percent_intra_area_gt_threshold;
5017
5018
130k
        cluster_32x32_blk_t *ps_blk_32x32 = &ps_ctb_cluster_info->ps_32x32_blk[e_cur_blk_pos];
5019
5020
130k
        S32 blk_32x32_mask = ps_ctb_cluster_info->blk_32x32_mask;
5021
5022
#if !ENABLE_4CTB_EVALUATION
5023
        S32 best_inter_cost = ps_blk_32x32->best_inter_cost;
5024
        S32 best_intra_cost =
5025
            ((ps_ctb_cluster_info->ps_cur_ipe_ctb->ai4_best32x32_intra_cost[e_cur_blk_pos] +
5026
              ps_ctb_cluster_info->i4_frame_qstep * ps_ctb_cluster_info->i4_frame_qstep_multiplier *
5027
                  4) < 0)
5028
                ? MAX_32BIT_VAL
5029
                : (ps_ctb_cluster_info->ps_cur_ipe_ctb->ai4_best32x32_intra_cost[e_cur_blk_pos] +
5030
                   ps_ctb_cluster_info->i4_frame_qstep *
5031
                       ps_ctb_cluster_info->i4_frame_qstep_multiplier * 4);
5032
        S32 best_cost = (best_inter_cost > best_intra_cost) ? best_intra_cost : best_inter_cost;
5033
        S32 cost_differential = (best_inter_cost - best_cost);
5034
#endif
5035
5036
130k
        area_of_cur_blk = gai4_partition_area[PART_ID_2Nx2N] << 2;
5037
130k
        area_limit_for_me_decision_precedence = (area_of_cur_blk * MAX_INTRA_PERCENTAGE) / 100;
5038
130k
        intra_mv_area = ps_blk_32x32->intra_mv_area;
5039
130k
        is_percent_intra_area_gt_threshold =
5040
130k
            (intra_mv_area > area_limit_for_me_decision_precedence);
5041
130k
        ipe_decision_precedence = (intra_mv_area >= area_limit_for_me_decision_precedence);
5042
5043
130k
        intra_eval_enable = ipe_decision_precedence;
5044
130k
        inter_eval_enable = !!ps_blk_32x32->num_clusters;
5045
130k
        children_nodes_required = 1;
5046
5047
130k
#if 1  //!PROCESS_GT_1CTB_VIA_CU_RECUR_IN_FAST_PRESETS
5048
130k
        if(e_quality_preset >= ME_HIGH_QUALITY)
5049
0
        {
5050
0
            inter_eval_enable = 1;
5051
0
            node_validity = (((blk_32x32_mask) & (1 << e_cur_blk_pos)) || 0);
5052
0
#if !USE_CLUSTER_DATA_AS_BLK_MERGE_CANDTS
5053
0
            ps_cu_tree->u1_inter_eval_enable = !(intra_mv_area == area_of_cur_blk);
5054
0
#endif
5055
0
            break;
5056
0
        }
5057
130k
#endif
5058
5059
130k
#if ENABLE_4CTB_EVALUATION
5060
130k
        node_validity = (((blk_32x32_mask) & (1 << e_cur_blk_pos)) || 0);
5061
5062
130k
        break;
5063
#else
5064
        {
5065
            S32 i;
5066
            num_clusters = ps_blk_32x32->num_clusters;
5067
5068
            if(ipe_decision_precedence)
5069
            {
5070
                node_validity = (ps_cur_ipe_ctb->as_intra32_analyse[e_cur_blk_pos].b1_merge_flag);
5071
                node_validity = node_validity && (((blk_32x32_mask) & (1 << e_cur_blk_pos)) || 0);
5072
            }
5073
            else
5074
            {
5075
                node_validity =
5076
                    ((ALL_INTER_COST_DIFF_THR * best_cost) >= (100 * cost_differential)) &&
5077
                    (num_clusters <= MAX_NUM_CLUSTERS_IN_VALID_32x32_BLK) &&
5078
                    (((blk_32x32_mask) & (1 << e_cur_blk_pos)) || 0);
5079
5080
                for(i = 0; (i < MAX_NUM_REF) && (node_validity); i++)
5081
                {
5082
                    node_validity = node_validity && (ps_blk_32x32->au1_num_clusters[i] <=
5083
                                                      MAX_NUM_CLUSTERS_IN_ONE_REF_IDX);
5084
                }
5085
5086
                if(node_validity)
5087
                {
5088
                    node_validity = node_validity &&
5089
                                    hme_sdi_based_cluster_spread_eligibility(
5090
                                        ps_blk_32x32, ps_ctb_cluster_info->sdi_threshold);
5091
                }
5092
            }
5093
        }
5094
5095
        break;
5096
#endif
5097
130k
    }
5098
520k
    case 2:
5099
520k
    {
5100
520k
        cluster_16x16_blk_t *ps_blk_16x16 =
5101
520k
            &ps_ctb_cluster_info->ps_16x16_blk[e_cur_blk_pos + (e_parent_blk_pos << 2)];
5102
5103
520k
        S32 blk_8x8_mask =
5104
520k
            ps_ctb_cluster_info->pi4_blk_8x8_mask[(S32)(e_parent_blk_pos << 2) + e_cur_blk_pos];
5105
5106
520k
        area_of_cur_blk = gai4_partition_area[PART_ID_2Nx2N];
5107
520k
        area_limit_for_me_decision_precedence = (area_of_cur_blk * MAX_INTRA_PERCENTAGE) / 100;
5108
520k
        children_nodes_required = 1;
5109
520k
        intra_mv_area = ps_blk_16x16->intra_mv_area;
5110
520k
        ipe_decision_precedence = (intra_mv_area >= area_limit_for_me_decision_precedence);
5111
520k
        num_clusters = ps_blk_16x16->num_clusters;
5112
5113
520k
        intra_eval_enable = ipe_decision_precedence;
5114
520k
        inter_eval_enable = 1;
5115
5116
520k
#if 1  //!PROCESS_GT_1CTB_VIA_CU_RECUR_IN_FAST_PRESETS
5117
520k
        if(e_quality_preset >= ME_HIGH_QUALITY)
5118
0
        {
5119
0
            node_validity =
5120
0
                !ps_ctb_cluster_info
5121
0
                     ->au1_is_16x16_blk_split[(S32)(e_parent_blk_pos << 2) + e_cur_blk_pos];
5122
0
            children_nodes_required = !node_validity;
5123
0
            break;
5124
0
        }
5125
520k
#endif
5126
5127
520k
#if ENABLE_4CTB_EVALUATION
5128
520k
        node_validity = (blk_8x8_mask == 0xf);
5129
5130
520k
#if ENABLE_CU_TREE_CULLING
5131
520k
        {
5132
520k
            cur_ctb_cu_tree_t *ps_32x32_root = NULL;
5133
5134
520k
            switch(e_parent_blk_pos)
5135
520k
            {
5136
130k
            case POS_TL:
5137
130k
            {
5138
130k
                ps_32x32_root = ps_ctb_cluster_info->ps_cu_tree_root->ps_child_node_tl;
5139
5140
130k
                break;
5141
0
            }
5142
130k
            case POS_TR:
5143
130k
            {
5144
130k
                ps_32x32_root = ps_ctb_cluster_info->ps_cu_tree_root->ps_child_node_tr;
5145
5146
130k
                break;
5147
0
            }
5148
130k
            case POS_BL:
5149
130k
            {
5150
130k
                ps_32x32_root = ps_ctb_cluster_info->ps_cu_tree_root->ps_child_node_bl;
5151
5152
130k
                break;
5153
0
            }
5154
130k
            case POS_BR:
5155
130k
            {
5156
130k
                ps_32x32_root = ps_ctb_cluster_info->ps_cu_tree_root->ps_child_node_br;
5157
5158
130k
                break;
5159
0
            }
5160
0
            default:
5161
0
            {
5162
0
                DBG_PRINTF("Invalid block position %d\n", e_parent_blk_pos);
5163
0
                break;
5164
0
            }
5165
520k
            }
5166
5167
520k
            if(ps_32x32_root->is_node_valid)
5168
493k
            {
5169
493k
                node_validity =
5170
493k
                    node_validity &&
5171
493k
                    !ps_ctb_cluster_info
5172
493k
                         ->au1_is_16x16_blk_split[(S32)(e_parent_blk_pos << 2) + e_cur_blk_pos];
5173
493k
                children_nodes_required = !node_validity;
5174
493k
            }
5175
520k
        }
5176
0
#endif
5177
5178
0
        break;
5179
#else
5180
5181
        if(ipe_decision_precedence)
5182
        {
5183
            S32 merge_flag_16 = (ps_cur_ipe_ctb->as_intra32_analyse[e_parent_blk_pos]
5184
                                     .as_intra16_analyse[e_cur_blk_pos]
5185
                                     .b1_merge_flag);
5186
            S32 valid_flag = (blk_8x8_mask == 0xf);
5187
5188
            node_validity = merge_flag_16 && valid_flag;
5189
        }
5190
        else
5191
        {
5192
            node_validity = (blk_8x8_mask == 0xf);
5193
        }
5194
5195
        break;
5196
#endif
5197
520k
    }
5198
110k
    case 3:
5199
110k
    {
5200
110k
        S32 blk_8x8_mask =
5201
110k
            ps_ctb_cluster_info
5202
110k
                ->pi4_blk_8x8_mask[(S32)(e_grandparent_blk_pos << 2) + e_parent_blk_pos];
5203
110k
        S32 merge_flag_16 = (ps_cur_ipe_ctb->as_intra32_analyse[e_grandparent_blk_pos]
5204
110k
                                 .as_intra16_analyse[e_parent_blk_pos]
5205
110k
                                 .b1_merge_flag);
5206
110k
        S32 merge_flag_32 =
5207
110k
            (ps_cur_ipe_ctb->as_intra32_analyse[e_grandparent_blk_pos].b1_merge_flag);
5208
5209
110k
        intra_eval_enable = !merge_flag_16 || !merge_flag_32;
5210
110k
        inter_eval_enable = 1;
5211
110k
        children_nodes_required = 0;
5212
5213
110k
#if 1  //!PROCESS_GT_1CTB_VIA_CU_RECUR_IN_FAST_PRESETS
5214
110k
        if(e_quality_preset >= ME_HIGH_QUALITY)
5215
0
        {
5216
0
            node_validity = ((blk_8x8_mask & (1 << e_cur_blk_pos)) || 0);
5217
0
            break;
5218
0
        }
5219
110k
#endif
5220
5221
110k
#if ENABLE_4CTB_EVALUATION
5222
110k
        node_validity = ((blk_8x8_mask & (1 << e_cur_blk_pos)) || 0);
5223
5224
110k
        break;
5225
#else
5226
        {
5227
            cur_ctb_cu_tree_t *ps_32x32_root;
5228
            cur_ctb_cu_tree_t *ps_16x16_root;
5229
            cluster_32x32_blk_t *ps_32x32_blk;
5230
5231
            switch(e_grandparent_blk_pos)
5232
            {
5233
            case POS_TL:
5234
            {
5235
                ps_32x32_root = ps_ctb_cluster_info->ps_cu_tree_root->ps_child_node_tl;
5236
5237
                break;
5238
            }
5239
            case POS_TR:
5240
            {
5241
                ps_32x32_root = ps_ctb_cluster_info->ps_cu_tree_root->ps_child_node_tr;
5242
5243
                break;
5244
            }
5245
            case POS_BL:
5246
            {
5247
                ps_32x32_root = ps_ctb_cluster_info->ps_cu_tree_root->ps_child_node_bl;
5248
5249
                break;
5250
            }
5251
            case POS_BR:
5252
            {
5253
                ps_32x32_root = ps_ctb_cluster_info->ps_cu_tree_root->ps_child_node_br;
5254
5255
                break;
5256
            }
5257
            default:
5258
            {
5259
                DBG_PRINTF("Invalid block position %d\n", e_grandparent_blk_pos);
5260
                break;
5261
            }
5262
            }
5263
5264
            switch(e_parent_blk_pos)
5265
            {
5266
            case POS_TL:
5267
            {
5268
                ps_16x16_root = ps_32x32_root->ps_child_node_tl;
5269
5270
                break;
5271
            }
5272
            case POS_TR:
5273
            {
5274
                ps_16x16_root = ps_32x32_root->ps_child_node_tr;
5275
5276
                break;
5277
            }
5278
            case POS_BL:
5279
            {
5280
                ps_16x16_root = ps_32x32_root->ps_child_node_bl;
5281
5282
                break;
5283
            }
5284
            case POS_BR:
5285
            {
5286
                ps_16x16_root = ps_32x32_root->ps_child_node_br;
5287
5288
                break;
5289
            }
5290
            default:
5291
            {
5292
                DBG_PRINTF("Invalid block position %d\n", e_parent_blk_pos);
5293
                break;
5294
            }
5295
            }
5296
5297
            ps_32x32_blk = &ps_ctb_cluster_info->ps_32x32_blk[e_grandparent_blk_pos];
5298
5299
            node_validity = ((blk_8x8_mask & (1 << e_cur_blk_pos)) || 0) &&
5300
                            ((!ps_32x32_root->is_node_valid) ||
5301
                             (ps_32x32_blk->num_clusters_with_weak_sdi_density > 0) ||
5302
                             (!ps_16x16_root->is_node_valid));
5303
5304
            break;
5305
        }
5306
#endif
5307
110k
    }
5308
794k
    }
5309
5310
    /* Fill the current cu_tree node */
5311
794k
    ps_cu_tree->is_node_valid = node_validity;
5312
794k
    ps_cu_tree->u1_intra_eval_enable = intra_eval_enable;
5313
794k
    ps_cu_tree->u1_inter_eval_enable = inter_eval_enable;
5314
5315
794k
    if(children_nodes_required)
5316
157k
    {
5317
157k
        tree_depth++;
5318
5319
157k
        hme_populate_cu_tree(
5320
157k
            ps_ctb_cluster_info,
5321
157k
            ps_cu_tree->ps_child_node_tl,
5322
157k
            tree_depth,
5323
157k
            e_quality_preset,
5324
157k
            e_parent_blk_pos,
5325
157k
            e_cur_blk_pos,
5326
157k
            POS_TL);
5327
5328
157k
        hme_populate_cu_tree(
5329
157k
            ps_ctb_cluster_info,
5330
157k
            ps_cu_tree->ps_child_node_tr,
5331
157k
            tree_depth,
5332
157k
            e_quality_preset,
5333
157k
            e_parent_blk_pos,
5334
157k
            e_cur_blk_pos,
5335
157k
            POS_TR);
5336
5337
157k
        hme_populate_cu_tree(
5338
157k
            ps_ctb_cluster_info,
5339
157k
            ps_cu_tree->ps_child_node_bl,
5340
157k
            tree_depth,
5341
157k
            e_quality_preset,
5342
157k
            e_parent_blk_pos,
5343
157k
            e_cur_blk_pos,
5344
157k
            POS_BL);
5345
5346
157k
        hme_populate_cu_tree(
5347
157k
            ps_ctb_cluster_info,
5348
157k
            ps_cu_tree->ps_child_node_br,
5349
157k
            tree_depth,
5350
157k
            e_quality_preset,
5351
157k
            e_parent_blk_pos,
5352
157k
            e_cur_blk_pos,
5353
157k
            POS_BR);
5354
157k
    }
5355
794k
}
5356
5357
/**
5358
********************************************************************************
5359
*  @fn   void hme_analyse_mv_clustering
5360
*               (
5361
*                   search_results_t *ps_search_results,
5362
*                   ipe_l0_ctb_analyse_for_me_t *ps_cur_ipe_ctb,
5363
*                   cur_ctb_cu_tree_t *ps_cu_tree
5364
*               )
5365
*
5366
*  @brief  Implementation for the clustering algorithm
5367
*
5368
*  @param[in]  ps_search_results: structure containing 16x16 block results
5369
*
5370
*  @param[in]  ps_cur_ipe_ctb : output container for ipe analyses
5371
*
5372
*  @param[out]  ps_cu_tree : represents CU tree used in CU recursion
5373
*
5374
*  @return None
5375
********************************************************************************
5376
*/
5377
void hme_analyse_mv_clustering(
5378
    search_results_t *ps_search_results,
5379
    inter_cu_results_t *ps_16x16_cu_results,
5380
    inter_cu_results_t *ps_8x8_cu_results,
5381
    ctb_cluster_info_t *ps_ctb_cluster_info,
5382
    S08 *pi1_future_list,
5383
    S08 *pi1_past_list,
5384
    S32 bidir_enabled,
5385
    ME_QUALITY_PRESETS_T e_quality_preset)
5386
32.5k
{
5387
32.5k
    cluster_16x16_blk_t *ps_blk_16x16;
5388
32.5k
    cluster_32x32_blk_t *ps_blk_32x32;
5389
32.5k
    cluster_64x64_blk_t *ps_blk_64x64;
5390
5391
32.5k
    part_type_results_t *ps_best_result;
5392
32.5k
    pu_result_t *aps_part_result[MAX_NUM_PARTS];
5393
32.5k
    pu_result_t *aps_inferior_parts[MAX_NUM_PARTS];
5394
5395
32.5k
    PART_ID_T e_part_id;
5396
32.5k
    PART_TYPE_T e_part_type;
5397
5398
32.5k
    S32 enable_64x64_merge;
5399
32.5k
    S32 i, j, k;
5400
32.5k
    S32 mvx, mvy;
5401
32.5k
    S32 num_parts;
5402
32.5k
    S32 ref_idx;
5403
32.5k
    S32 ai4_pred_mode[MAX_NUM_PARTS];
5404
5405
32.5k
    S32 num_32x32_merges = 0;
5406
5407
    /*****************************************/
5408
    /*****************************************/
5409
    /********* Enter ye who is HQ ************/
5410
    /*****************************************/
5411
    /*****************************************/
5412
5413
32.5k
    ps_blk_64x64 = ps_ctb_cluster_info->ps_64x64_blk;
5414
5415
    /* Initialise data in each of the clusters */
5416
553k
    for(i = 0; i < 16; i++)
5417
520k
    {
5418
520k
        ps_blk_16x16 = &ps_ctb_cluster_info->ps_16x16_blk[i];
5419
5420
520k
#if !USE_CLUSTER_DATA_AS_BLK_MERGE_CANDTS
5421
520k
        if(e_quality_preset < ME_HIGH_QUALITY)
5422
520k
        {
5423
520k
            hme_init_clusters_16x16(ps_blk_16x16, bidir_enabled);
5424
520k
        }
5425
0
        else
5426
0
        {
5427
0
            ps_blk_16x16->best_inter_cost = 0;
5428
0
            ps_blk_16x16->intra_mv_area = 0;
5429
0
        }
5430
#else
5431
        hme_init_clusters_16x16(ps_blk_16x16, bidir_enabled);
5432
#endif
5433
520k
    }
5434
5435
162k
    for(i = 0; i < 4; i++)
5436
130k
    {
5437
130k
        ps_blk_32x32 = &ps_ctb_cluster_info->ps_32x32_blk[i];
5438
5439
130k
#if !USE_CLUSTER_DATA_AS_BLK_MERGE_CANDTS
5440
130k
        if(e_quality_preset < ME_HIGH_QUALITY)
5441
130k
        {
5442
130k
            hme_init_clusters_32x32(ps_blk_32x32, bidir_enabled);
5443
130k
        }
5444
0
        else
5445
0
        {
5446
0
            ps_blk_32x32->best_inter_cost = 0;
5447
0
            ps_blk_32x32->intra_mv_area = 0;
5448
0
        }
5449
#else
5450
        hme_init_clusters_32x32(ps_blk_32x32, bidir_enabled);
5451
#endif
5452
130k
    }
5453
5454
32.5k
#if !USE_CLUSTER_DATA_AS_BLK_MERGE_CANDTS
5455
32.5k
    if(e_quality_preset < ME_HIGH_QUALITY)
5456
32.5k
    {
5457
32.5k
        hme_init_clusters_64x64(ps_blk_64x64, bidir_enabled);
5458
32.5k
    }
5459
0
    else
5460
0
    {
5461
0
        ps_blk_64x64->best_inter_cost = 0;
5462
0
        ps_blk_64x64->intra_mv_area = 0;
5463
0
    }
5464
#else
5465
    hme_init_clusters_64x64(ps_blk_64x64, bidir_enabled);
5466
#endif
5467
5468
    /* Initialise data for all nodes in the CU tree */
5469
32.5k
    hme_build_cu_tree(
5470
32.5k
        ps_ctb_cluster_info, ps_ctb_cluster_info->ps_cu_tree_root, 0, POS_NA, POS_NA, POS_NA);
5471
5472
32.5k
    if(e_quality_preset >= ME_HIGH_QUALITY)
5473
0
    {
5474
0
        memset(ps_ctb_cluster_info->au1_is_16x16_blk_split, 1, 16 * sizeof(U08));
5475
0
    }
5476
5477
#if ENABLE_UNIFORM_CU_SIZE_16x16 || ENABLE_UNIFORM_CU_SIZE_8x8
5478
    return;
5479
#endif
5480
5481
553k
    for(i = 0; i < 16; i++)
5482
520k
    {
5483
520k
        S32 blk_8x8_mask;
5484
520k
        S32 is_16x16_blk_valid;
5485
520k
        S32 num_clusters_updated;
5486
520k
        S32 num_clusters;
5487
5488
520k
        blk_8x8_mask = ps_ctb_cluster_info->pi4_blk_8x8_mask[i];
5489
5490
520k
        ps_blk_16x16 = &ps_ctb_cluster_info->ps_16x16_blk[i];
5491
5492
520k
        is_16x16_blk_valid = (blk_8x8_mask == 0xf);
5493
5494
520k
        if(is_16x16_blk_valid)
5495
495k
        {
5496
            /* Use 8x8 data when 16x16 CU is split */
5497
495k
            if(ps_search_results[i].u1_split_flag)
5498
33.5k
            {
5499
33.5k
                S32 blk_8x8_idx = i << 2;
5500
5501
33.5k
                num_parts = 4;
5502
33.5k
                e_part_type = PRT_NxN;
5503
5504
167k
                for(j = 0; j < num_parts; j++, blk_8x8_idx++)
5505
134k
                {
5506
                    /* Only 2Nx2N partition supported for 8x8 block */
5507
134k
                    ASSERT(
5508
134k
                        ps_8x8_cu_results[blk_8x8_idx].ps_best_results[0].u1_part_type ==
5509
134k
                        ((PART_TYPE_T)PRT_2Nx2N));
5510
5511
134k
                    aps_part_result[j] =
5512
134k
                        &ps_8x8_cu_results[blk_8x8_idx].ps_best_results[0].as_pu_results[0];
5513
134k
                    aps_inferior_parts[j] =
5514
134k
                        &ps_8x8_cu_results[blk_8x8_idx].ps_best_results[1].as_pu_results[0];
5515
134k
                    ai4_pred_mode[j] = (aps_part_result[j]->pu.b2_pred_mode);
5516
134k
                }
5517
33.5k
            }
5518
461k
            else
5519
461k
            {
5520
461k
                ps_best_result = &ps_16x16_cu_results[i].ps_best_results[0];
5521
5522
461k
                e_part_type = (PART_TYPE_T)ps_best_result->u1_part_type;
5523
461k
                num_parts = gau1_num_parts_in_part_type[e_part_type];
5524
5525
933k
                for(j = 0; j < num_parts; j++)
5526
471k
                {
5527
471k
                    aps_part_result[j] = &ps_best_result->as_pu_results[j];
5528
471k
                    aps_inferior_parts[j] = &ps_best_result[1].as_pu_results[j];
5529
471k
                    ai4_pred_mode[j] = (aps_part_result[j]->pu.b2_pred_mode);
5530
471k
                }
5531
5532
461k
                ps_ctb_cluster_info->au1_is_16x16_blk_split[i] = 0;
5533
461k
            }
5534
5535
1.10M
            for(j = 0; j < num_parts; j++)
5536
605k
            {
5537
605k
                pu_result_t *ps_part_result = aps_part_result[j];
5538
5539
605k
                S32 num_mvs = ((ai4_pred_mode[j] > 1) + 1);
5540
5541
605k
                e_part_id = ge_part_type_to_part_id[e_part_type][j];
5542
5543
                /* Skip clustering if best mode is intra */
5544
605k
                if((ps_part_result->pu.b1_intra_flag))
5545
40.4k
                {
5546
40.4k
                    ps_blk_16x16->intra_mv_area += gai4_partition_area[e_part_id];
5547
40.4k
                    ps_blk_16x16->best_inter_cost += aps_inferior_parts[j]->i4_tot_cost;
5548
40.4k
                    continue;
5549
40.4k
                }
5550
565k
                else
5551
565k
                {
5552
565k
                    ps_blk_16x16->best_inter_cost += ps_part_result->i4_tot_cost;
5553
565k
                }
5554
5555
565k
#if !USE_CLUSTER_DATA_AS_BLK_MERGE_CANDTS
5556
565k
                if(e_quality_preset >= ME_HIGH_QUALITY)
5557
0
                {
5558
0
                    continue;
5559
0
                }
5560
565k
#endif
5561
5562
1.13M
                for(k = 0; k < num_mvs; k++)
5563
572k
                {
5564
572k
                    mv_t *ps_mv;
5565
5566
572k
                    pu_mv_t *ps_pu_mv = &ps_part_result->pu.mv;
5567
5568
572k
                    S32 is_l0_mv = ((ai4_pred_mode[j] == 2) && !k) || (ai4_pred_mode[j] == 0);
5569
5570
572k
                    ps_mv = (is_l0_mv) ? (&ps_pu_mv->s_l0_mv) : (&ps_pu_mv->s_l1_mv);
5571
5572
572k
                    mvx = ps_mv->i2_mvx;
5573
572k
                    mvy = ps_mv->i2_mvy;
5574
5575
572k
                    ref_idx = (is_l0_mv) ? pi1_past_list[ps_pu_mv->i1_l0_ref_idx]
5576
572k
                                         : pi1_future_list[ps_pu_mv->i1_l1_ref_idx];
5577
5578
572k
                    num_clusters = ps_blk_16x16->num_clusters;
5579
5580
572k
                    hme_find_and_update_clusters(
5581
572k
                        ps_blk_16x16->as_cluster_data,
5582
572k
                        &(ps_blk_16x16->num_clusters),
5583
572k
                        mvx,
5584
572k
                        mvy,
5585
572k
                        ref_idx,
5586
572k
                        ps_part_result->i4_sdi,
5587
572k
                        e_part_id,
5588
572k
                        (ai4_pred_mode[j] == 2));
5589
5590
572k
                    num_clusters_updated = (ps_blk_16x16->num_clusters);
5591
5592
572k
                    ps_blk_16x16->au1_num_clusters[ref_idx] +=
5593
572k
                        (num_clusters_updated - num_clusters);
5594
572k
                }
5595
565k
            }
5596
495k
        }
5597
520k
    }
5598
5599
    /* Search for 32x32 clusters */
5600
162k
    for(i = 0; i < 4; i++)
5601
130k
    {
5602
130k
        S32 num_clusters_merged;
5603
5604
130k
        S32 is_32x32_blk_valid = (ps_ctb_cluster_info->blk_32x32_mask & (1 << i)) || 0;
5605
5606
130k
        if(is_32x32_blk_valid)
5607
123k
        {
5608
123k
            ps_blk_32x32 = &ps_ctb_cluster_info->ps_32x32_blk[i];
5609
123k
            ps_blk_16x16 = &ps_ctb_cluster_info->ps_16x16_blk[i << 2];
5610
5611
123k
#if !USE_CLUSTER_DATA_AS_BLK_MERGE_CANDTS
5612
123k
            if(e_quality_preset >= ME_HIGH_QUALITY)
5613
0
            {
5614
0
                for(j = 0; j < 4; j++, ps_blk_16x16++)
5615
0
                {
5616
0
                    ps_blk_32x32->intra_mv_area += ps_blk_16x16->intra_mv_area;
5617
5618
0
                    ps_blk_32x32->best_inter_cost += ps_blk_16x16->best_inter_cost;
5619
0
                }
5620
0
                continue;
5621
0
            }
5622
123k
#endif
5623
5624
123k
            hme_update_32x32_clusters(ps_blk_32x32, ps_blk_16x16);
5625
5626
123k
            if((ps_blk_32x32->num_clusters >= MAX_NUM_CLUSTERS_IN_VALID_32x32_BLK))
5627
3.85k
            {
5628
3.85k
                num_clusters_merged = hme_try_merge_clusters_blksize_gt_16(
5629
3.85k
                    ps_blk_32x32->as_cluster_data, (ps_blk_32x32->num_clusters));
5630
5631
3.85k
                if(num_clusters_merged)
5632
4
                {
5633
4
                    ps_blk_32x32->num_clusters -= num_clusters_merged;
5634
5635
4
                    UPDATE_CLUSTER_METADATA_POST_MERGE(ps_blk_32x32);
5636
4
                }
5637
3.85k
            }
5638
123k
        }
5639
130k
    }
5640
5641
32.5k
#if !USE_CLUSTER_DATA_AS_BLK_MERGE_CANDTS
5642
    /* Eliminate outlier 32x32 clusters */
5643
32.5k
    if(e_quality_preset < ME_HIGH_QUALITY)
5644
32.5k
#endif
5645
32.5k
    {
5646
32.5k
        hme_boot_out_outlier(ps_ctb_cluster_info, 32);
5647
5648
        /* Find best_uni_ref and best_alt_ref */
5649
32.5k
        hme_find_top_ref_ids(ps_ctb_cluster_info, bidir_enabled, 32);
5650
32.5k
    }
5651
5652
    /* Populate the CU tree for depths 1 and higher */
5653
32.5k
    {
5654
32.5k
        cur_ctb_cu_tree_t *ps_tree_root = ps_ctb_cluster_info->ps_cu_tree_root;
5655
32.5k
        cur_ctb_cu_tree_t *ps_tl = ps_tree_root->ps_child_node_tl;
5656
32.5k
        cur_ctb_cu_tree_t *ps_tr = ps_tree_root->ps_child_node_tr;
5657
32.5k
        cur_ctb_cu_tree_t *ps_bl = ps_tree_root->ps_child_node_bl;
5658
32.5k
        cur_ctb_cu_tree_t *ps_br = ps_tree_root->ps_child_node_br;
5659
5660
32.5k
        hme_populate_cu_tree(
5661
32.5k
            ps_ctb_cluster_info, ps_tl, 1, e_quality_preset, POS_NA, POS_NA, POS_TL);
5662
5663
32.5k
        num_32x32_merges += (ps_tl->is_node_valid == 1);
5664
5665
32.5k
        hme_populate_cu_tree(
5666
32.5k
            ps_ctb_cluster_info, ps_tr, 1, e_quality_preset, POS_NA, POS_NA, POS_TR);
5667
5668
32.5k
        num_32x32_merges += (ps_tr->is_node_valid == 1);
5669
5670
32.5k
        hme_populate_cu_tree(
5671
32.5k
            ps_ctb_cluster_info, ps_bl, 1, e_quality_preset, POS_NA, POS_NA, POS_BL);
5672
5673
32.5k
        num_32x32_merges += (ps_bl->is_node_valid == 1);
5674
5675
32.5k
        hme_populate_cu_tree(
5676
32.5k
            ps_ctb_cluster_info, ps_br, 1, e_quality_preset, POS_NA, POS_NA, POS_BR);
5677
5678
32.5k
        num_32x32_merges += (ps_br->is_node_valid == 1);
5679
32.5k
    }
5680
5681
#if !ENABLE_4CTB_EVALUATION
5682
    if(e_quality_preset < ME_HIGH_QUALITY)
5683
    {
5684
        enable_64x64_merge = (num_32x32_merges >= 3);
5685
    }
5686
#else
5687
32.5k
    if(e_quality_preset < ME_HIGH_QUALITY)
5688
32.5k
    {
5689
32.5k
        enable_64x64_merge = 1;
5690
32.5k
    }
5691
32.5k
#endif
5692
5693
32.5k
#if 1  //!PROCESS_GT_1CTB_VIA_CU_RECUR_IN_FAST_PRESETS
5694
32.5k
    if(e_quality_preset >= ME_HIGH_QUALITY)
5695
0
    {
5696
0
        enable_64x64_merge = 1;
5697
0
    }
5698
#else
5699
    if(e_quality_preset >= ME_HIGH_QUALITY)
5700
    {
5701
        enable_64x64_merge = (num_32x32_merges >= 3);
5702
    }
5703
#endif
5704
5705
32.5k
    if(enable_64x64_merge)
5706
32.5k
    {
5707
32.5k
        S32 num_clusters_merged;
5708
5709
32.5k
        ps_blk_32x32 = &ps_ctb_cluster_info->ps_32x32_blk[0];
5710
5711
32.5k
#if !USE_CLUSTER_DATA_AS_BLK_MERGE_CANDTS
5712
32.5k
        if(e_quality_preset >= ME_HIGH_QUALITY)
5713
0
        {
5714
0
            for(j = 0; j < 4; j++, ps_blk_32x32++)
5715
0
            {
5716
0
                ps_blk_64x64->intra_mv_area += ps_blk_32x32->intra_mv_area;
5717
5718
0
                ps_blk_64x64->best_inter_cost += ps_blk_32x32->best_inter_cost;
5719
0
            }
5720
0
        }
5721
32.5k
        else
5722
32.5k
#endif
5723
32.5k
        {
5724
32.5k
            hme_update_64x64_clusters(ps_blk_64x64, ps_blk_32x32);
5725
5726
32.5k
            if((ps_blk_64x64->num_clusters >= MAX_NUM_CLUSTERS_IN_VALID_64x64_BLK))
5727
4.10k
            {
5728
4.10k
                num_clusters_merged = hme_try_merge_clusters_blksize_gt_16(
5729
4.10k
                    ps_blk_64x64->as_cluster_data, (ps_blk_64x64->num_clusters));
5730
5731
4.10k
                if(num_clusters_merged)
5732
5
                {
5733
5
                    ps_blk_64x64->num_clusters -= num_clusters_merged;
5734
5735
5
                    UPDATE_CLUSTER_METADATA_POST_MERGE(ps_blk_64x64);
5736
5
                }
5737
4.10k
            }
5738
32.5k
        }
5739
5740
#if !ENABLE_4CTB_EVALUATION
5741
        if(e_quality_preset < ME_HIGH_QUALITY)
5742
        {
5743
            S32 best_inter_cost = ps_blk_64x64->best_inter_cost;
5744
            S32 best_intra_cost =
5745
                ((ps_ctb_cluster_info->ps_cur_ipe_ctb->i4_best64x64_intra_cost +
5746
                  ps_ctb_cluster_info->i4_frame_qstep *
5747
                      ps_ctb_cluster_info->i4_frame_qstep_multiplier * 16) < 0)
5748
                    ? MAX_32BIT_VAL
5749
                    : (ps_ctb_cluster_info->ps_cur_ipe_ctb->i4_best64x64_intra_cost +
5750
                       ps_ctb_cluster_info->i4_frame_qstep *
5751
                           ps_ctb_cluster_info->i4_frame_qstep_multiplier * 16);
5752
            S32 best_cost = (best_inter_cost > best_intra_cost) ? best_intra_cost : best_inter_cost;
5753
            S32 cost_differential = (best_inter_cost - best_cost);
5754
5755
            enable_64x64_merge =
5756
                ((ALL_INTER_COST_DIFF_THR * best_cost) >= (100 * cost_differential));
5757
        }
5758
#endif
5759
32.5k
    }
5760
5761
32.5k
    if(enable_64x64_merge)
5762
32.5k
    {
5763
32.5k
#if !USE_CLUSTER_DATA_AS_BLK_MERGE_CANDTS
5764
32.5k
        if(e_quality_preset < ME_HIGH_QUALITY)
5765
32.5k
#endif
5766
32.5k
        {
5767
32.5k
            hme_boot_out_outlier(ps_ctb_cluster_info, 64);
5768
5769
32.5k
            hme_find_top_ref_ids(ps_ctb_cluster_info, bidir_enabled, 64);
5770
32.5k
        }
5771
5772
32.5k
        hme_populate_cu_tree(
5773
32.5k
            ps_ctb_cluster_info,
5774
32.5k
            ps_ctb_cluster_info->ps_cu_tree_root,
5775
32.5k
            0,
5776
32.5k
            e_quality_preset,
5777
32.5k
            POS_NA,
5778
32.5k
            POS_NA,
5779
32.5k
            POS_NA);
5780
32.5k
    }
5781
32.5k
}
5782
#endif
5783
5784
static __inline void hme_merge_prms_init(
5785
    hme_merge_prms_t *ps_prms,
5786
    layer_ctxt_t *ps_curr_layer,
5787
    refine_prms_t *ps_refine_prms,
5788
    me_frm_ctxt_t *ps_me_ctxt,
5789
    range_prms_t *ps_range_prms_rec,
5790
    range_prms_t *ps_range_prms_inp,
5791
    mv_grid_t **pps_mv_grid,
5792
    inter_ctb_prms_t *ps_inter_ctb_prms,
5793
    S32 i4_num_pred_dir,
5794
    S32 i4_32x32_id,
5795
    BLK_SIZE_T e_blk_size,
5796
    ME_QUALITY_PRESETS_T e_me_quality_presets)
5797
648k
{
5798
648k
    S32 i4_use_rec = ps_refine_prms->i4_use_rec_in_fpel;
5799
648k
    S32 i4_cu_16x16 = (BLK_32x32 == e_blk_size) ? (i4_32x32_id << 2) : 0;
5800
5801
    /* Currently not enabling segmentation info from prev layers */
5802
648k
    ps_prms->i4_seg_info_avail = 0;
5803
648k
    ps_prms->i4_part_mask = 0;
5804
5805
    /* Number of reference pics in which to do merge */
5806
648k
    ps_prms->i4_num_ref = i4_num_pred_dir;
5807
5808
    /* Layer ctxt info */
5809
648k
    ps_prms->ps_layer_ctxt = ps_curr_layer;
5810
5811
648k
    ps_prms->ps_inter_ctb_prms = ps_inter_ctb_prms;
5812
5813
    /* Top left, top right, bottom left and bottom right 16x16 units */
5814
648k
    if(BLK_32x32 == e_blk_size)
5815
518k
    {
5816
518k
        ps_prms->ps_results_tl = &ps_me_ctxt->as_search_results_16x16[i4_cu_16x16];
5817
518k
        ps_prms->ps_results_tr = &ps_me_ctxt->as_search_results_16x16[i4_cu_16x16 + 1];
5818
518k
        ps_prms->ps_results_bl = &ps_me_ctxt->as_search_results_16x16[i4_cu_16x16 + 2];
5819
518k
        ps_prms->ps_results_br = &ps_me_ctxt->as_search_results_16x16[i4_cu_16x16 + 3];
5820
5821
        /* Merge results stored here */
5822
518k
        ps_prms->ps_results_merge = &ps_me_ctxt->as_search_results_32x32[i4_32x32_id];
5823
5824
        /* This could be lesser than the number of 16x16results generated*/
5825
        /* For now, keeping it to be same                                */
5826
518k
        ps_prms->i4_num_inp_results = ps_refine_prms->i4_num_fpel_results;
5827
518k
        ps_prms->ps_8x8_cu_results = &ps_me_ctxt->as_cu8x8_results[i4_32x32_id << 4];
5828
518k
        ps_prms->ps_results_grandchild = NULL;
5829
518k
    }
5830
129k
    else
5831
129k
    {
5832
129k
        ps_prms->ps_results_tl = &ps_me_ctxt->as_search_results_32x32[0];
5833
129k
        ps_prms->ps_results_tr = &ps_me_ctxt->as_search_results_32x32[1];
5834
129k
        ps_prms->ps_results_bl = &ps_me_ctxt->as_search_results_32x32[2];
5835
129k
        ps_prms->ps_results_br = &ps_me_ctxt->as_search_results_32x32[3];
5836
5837
        /* Merge results stored here */
5838
129k
        ps_prms->ps_results_merge = &ps_me_ctxt->s_search_results_64x64;
5839
5840
129k
        ps_prms->i4_num_inp_results = ps_refine_prms->i4_num_32x32_merge_results;
5841
129k
        ps_prms->ps_8x8_cu_results = &ps_me_ctxt->as_cu8x8_results[0];
5842
129k
        ps_prms->ps_results_grandchild = ps_me_ctxt->as_search_results_16x16;
5843
129k
    }
5844
5845
648k
    if(i4_use_rec)
5846
648k
    {
5847
648k
        WORD32 ref_ctr;
5848
5849
8.42M
        for(ref_ctr = 0; ref_ctr < MAX_NUM_REF; ref_ctr++)
5850
7.78M
        {
5851
7.78M
            ps_prms->aps_mv_range[ref_ctr] = &ps_range_prms_rec[ref_ctr];
5852
7.78M
        }
5853
648k
    }
5854
0
    else
5855
0
    {
5856
0
        WORD32 ref_ctr;
5857
5858
0
        for(ref_ctr = 0; ref_ctr < MAX_NUM_REF; ref_ctr++)
5859
0
        {
5860
0
            ps_prms->aps_mv_range[ref_ctr] = &ps_range_prms_inp[ref_ctr];
5861
0
        }
5862
0
    }
5863
648k
    ps_prms->i4_use_rec = i4_use_rec;
5864
5865
648k
    ps_prms->pf_mv_cost_compute = compute_mv_cost_implicit_high_speed;
5866
5867
648k
    ps_prms->pps_mv_grid = pps_mv_grid;
5868
5869
648k
    ps_prms->log_ctb_size = ps_me_ctxt->log_ctb_size;
5870
5871
648k
    ps_prms->e_quality_preset = e_me_quality_presets;
5872
648k
    ps_prms->pi1_future_list = ps_me_ctxt->ai1_future_list;
5873
648k
    ps_prms->pi1_past_list = ps_me_ctxt->ai1_past_list;
5874
648k
    ps_prms->ps_cluster_info = ps_me_ctxt->ps_ctb_cluster_info;
5875
648k
}
5876
5877
/**
5878
********************************************************************************
5879
*  @fn   void hme_refine(me_ctxt_t *ps_ctxt,
5880
*                       refine_layer_prms_t *ps_refine_prms)
5881
*
5882
*  @brief  Top level entry point for refinement ME
5883
*
5884
*  @param[in,out]  ps_ctxt: ME Handle
5885
*
5886
*  @param[in]  ps_refine_prms : refinement layer prms
5887
*
5888
*  @return None
5889
********************************************************************************
5890
*/
5891
void hme_refine(
5892
    me_ctxt_t *ps_thrd_ctxt,
5893
    refine_prms_t *ps_refine_prms,
5894
    PF_EXT_UPDATE_FXN_T pf_ext_update_fxn,
5895
    layer_ctxt_t *ps_coarse_layer,
5896
    multi_thrd_ctxt_t *ps_multi_thrd_ctxt,
5897
    S32 lyr_job_type,
5898
    S32 thrd_id,
5899
    S32 me_frm_id,
5900
    pre_enc_L0_ipe_encloop_ctxt_t *ps_l0_ipe_input)
5901
129k
{
5902
129k
    inter_ctb_prms_t s_common_frm_prms;
5903
5904
129k
    BLK_SIZE_T e_search_blk_size, e_result_blk_size;
5905
129k
    WORD32 i4_me_frm_id = me_frm_id % MAX_NUM_ME_PARALLEL;
5906
129k
    me_frm_ctxt_t *ps_ctxt = ps_thrd_ctxt->aps_me_frm_prms[i4_me_frm_id];
5907
129k
    ME_QUALITY_PRESETS_T e_me_quality_presets =
5908
129k
        ps_thrd_ctxt->s_init_prms.s_me_coding_tools.e_me_quality_presets;
5909
5910
129k
    WORD32 num_rows_proc = 0;
5911
129k
    WORD32 num_act_ref_pics;
5912
129k
    WORD16 i2_prev_enc_frm_max_mv_y;
5913
129k
    WORD32 i4_idx_dvsr_p = ps_multi_thrd_ctxt->i4_idx_dvsr_p;
5914
5915
    /*************************************************************************/
5916
    /* Complexity of search: Low to High                                     */
5917
    /*************************************************************************/
5918
129k
    SEARCH_COMPLEXITY_T e_search_complexity;
5919
5920
    /*************************************************************************/
5921
    /* to store the PU results which are passed to the decide_part_types     */
5922
    /* as input prms. Multiplied by 4 as the max number of Ref in a List is 4*/
5923
    /*************************************************************************/
5924
5925
129k
    pu_result_t as_pu_results[2][TOT_NUM_PARTS][MAX_NUM_RESULTS_PER_PART_LIST];
5926
129k
    inter_pu_results_t as_inter_pu_results[4];
5927
129k
    inter_pu_results_t *ps_pu_results = as_inter_pu_results;
5928
5929
    /*************************************************************************/
5930
    /* Config parameter structures for varius ME submodules                  */
5931
    /*************************************************************************/
5932
129k
    hme_merge_prms_t s_merge_prms_32x32_tl, s_merge_prms_32x32_tr;
5933
129k
    hme_merge_prms_t s_merge_prms_32x32_bl, s_merge_prms_32x32_br;
5934
129k
    hme_merge_prms_t s_merge_prms_64x64;
5935
129k
    hme_search_prms_t s_search_prms_blk;
5936
129k
    mvbank_update_prms_t s_mv_update_prms;
5937
129k
    hme_ctb_prms_t s_ctb_prms;
5938
129k
    hme_subpel_prms_t s_subpel_prms;
5939
129k
    fullpel_refine_ctxt_t *ps_fullpel_refine_ctxt = ps_ctxt->ps_fullpel_refine_ctxt;
5940
129k
    ctb_cluster_info_t *ps_ctb_cluster_info;
5941
129k
    fpel_srch_cand_init_data_t s_srch_cand_init_data;
5942
5943
    /* 4 bits (LSBs) of this variable control merge of 4 32x32 CUs in CTB */
5944
129k
    S32 en_merge_32x32;
5945
    /* 5 lsb's specify whether or not merge algorithm is required */
5946
    /* to be executed or not. Relevant only in PQ. Ought to be */
5947
    /* used in conjunction with en_merge_32x32 and */
5948
    /* ps_ctb_bound_attrs->u1_merge_to_64x64_flag. This is */
5949
    /* required when all children are deemed to be intras */
5950
129k
    S32 en_merge_execution;
5951
5952
    /*************************************************************************/
5953
    /* All types of search candidates for predictor based search.            */
5954
    /*************************************************************************/
5955
129k
    S32 num_init_candts = 0;
5956
129k
    S32 i4_num_act_ref_l0 = ps_ctxt->s_frm_prms.u1_num_active_ref_l0;
5957
129k
    S32 i4_num_act_ref_l1 = ps_ctxt->s_frm_prms.u1_num_active_ref_l1;
5958
129k
    search_candt_t *ps_search_candts, as_search_candts[MAX_INIT_CANDTS];
5959
129k
    search_node_t as_top_neighbours[4], as_left_neighbours[3];
5960
5961
129k
    pf_get_wt_inp fp_get_wt_inp;
5962
5963
129k
    search_node_t as_unique_search_nodes[MAX_INIT_CANDTS * 9];
5964
129k
    U32 au4_unique_node_map[MAP_X_MAX * 2];
5965
5966
    /* Controls the boundary attributes of CTB, whether it has 64x64 or not */
5967
129k
    ctb_boundary_attrs_t *ps_ctb_bound_attrs;
5968
5969
    /*************************************************************************/
5970
    /* points ot the search results for the blk level search (8x8/16x16)     */
5971
    /*************************************************************************/
5972
129k
    search_results_t *ps_search_results;
5973
5974
    /*************************************************************************/
5975
    /* Coordinates                                                           */
5976
    /*************************************************************************/
5977
129k
    S32 blk_x, blk_y, i4_ctb_x, i4_ctb_y, tile_col_idx, blk_id_in_ctb;
5978
129k
    S32 pos_x, pos_y;
5979
129k
    S32 blk_id_in_full_ctb;
5980
5981
    /*************************************************************************/
5982
    /* Related to dimensions of block being searched and pic dimensions      */
5983
    /*************************************************************************/
5984
129k
    S32 blk_4x4_to_16x16;
5985
129k
    S32 blk_wd, blk_ht, blk_size_shift;
5986
129k
    S32 i4_pic_wd, i4_pic_ht, num_blks_in_this_ctb;
5987
129k
    S32 num_results_prev_layer;
5988
5989
    /*************************************************************************/
5990
    /* Size of a basic unit for this layer. For non encode layers, we search */
5991
    /* in block sizes of 8x8. For encode layers, though we search 16x16s the */
5992
    /* basic unit size is the ctb size.                                      */
5993
    /*************************************************************************/
5994
129k
    S32 unit_size;
5995
5996
    /*************************************************************************/
5997
    /* Local variable storing results of any 4 CU merge to bigger CU         */
5998
    /*************************************************************************/
5999
129k
    CU_MERGE_RESULT_T e_merge_result;
6000
6001
    /*************************************************************************/
6002
    /* This mv grid stores results during and after fpel search, during      */
6003
    /* merge, subpel and bidirect refinements stages. 2 instances of this are*/
6004
    /* meant for the 2 directions of search (l0 and l1).                     */
6005
    /*************************************************************************/
6006
129k
    mv_grid_t *aps_mv_grid[2];
6007
6008
    /*************************************************************************/
6009
    /* Pointers to context in current and coarser layers                     */
6010
    /*************************************************************************/
6011
129k
    layer_ctxt_t *ps_curr_layer, *ps_prev_layer;
6012
6013
    /*************************************************************************/
6014
    /* to store mv range per blk, and picture limit, allowed search range    */
6015
    /* range prms in hpel and qpel units as well                             */
6016
    /*************************************************************************/
6017
129k
    range_prms_t as_range_prms_inp[MAX_NUM_REF], as_range_prms_rec[MAX_NUM_REF];
6018
129k
    range_prms_t s_pic_limit_inp, s_pic_limit_rec, as_mv_limit[MAX_NUM_REF];
6019
129k
    range_prms_t as_range_prms_hpel[MAX_NUM_REF], as_range_prms_qpel[MAX_NUM_REF];
6020
6021
    /*************************************************************************/
6022
    /* These variables are used to track number of references at different   */
6023
    /* stages of ME.                                                         */
6024
    /*************************************************************************/
6025
129k
    S32 i4_num_pred_dir;
6026
129k
    S32 i4_num_ref_each_dir, i, i4_num_ref_prev_layer;
6027
129k
    S32 lambda_recon = ps_refine_prms->lambda_recon;
6028
6029
    /* Counts successful merge to 32x32 every CTB (0-4) */
6030
129k
    S32 merge_count_32x32;
6031
6032
129k
    S32 ai4_id_coloc[14], ai4_id_Z[2];
6033
129k
    U08 au1_search_candidate_list_index[2];
6034
129k
    S32 ai4_num_coloc_cands[2];
6035
129k
    U08 u1_pred_dir, u1_pred_dir_ctr;
6036
6037
    /*************************************************************************/
6038
    /* Input pointer and stride                                              */
6039
    /*************************************************************************/
6040
129k
    U08 *pu1_inp;
6041
129k
    S32 i4_inp_stride;
6042
129k
    S32 end_of_frame;
6043
129k
    S32 num_sync_units_in_row, num_sync_units_in_tile;
6044
6045
    /*************************************************************************/
6046
    /* Indicates whether the all 4 8x8 blks are valid in the 16x16 blk in the*/
6047
    /* encode layer. If not 15, then 1 or more 8x8 blks not valid. Means that*/
6048
    /* we need to stop merges and force 8x8 CUs for that 16x16 blk           */
6049
    /*************************************************************************/
6050
129k
    S32 blk_8x8_mask;
6051
129k
    S32 ai4_blk_8x8_mask[16];
6052
129k
    U08 au1_is_64x64Blk_noisy[1];
6053
129k
    U08 au1_is_32x32Blk_noisy[4];
6054
129k
    U08 au1_is_16x16Blk_noisy[16];
6055
6056
129k
    ihevce_cmn_opt_func_t *ps_cmn_utils_optimised_function_list =
6057
129k
        ps_thrd_ctxt->ps_cmn_utils_optimised_function_list;
6058
129k
    ihevce_me_optimised_function_list_t *ps_me_optimised_function_list =
6059
129k
        ((ihevce_me_optimised_function_list_t *)ps_thrd_ctxt->pv_me_optimised_function_list);
6060
6061
129k
    ASSERT(ps_refine_prms->i4_layer_id < ps_ctxt->num_layers - 1);
6062
6063
    /*************************************************************************/
6064
    /* Pointers to current and coarse layer are needed for projection */
6065
    /* Pointer to prev layer are needed for other candts like coloc   */
6066
    /*************************************************************************/
6067
129k
    ps_curr_layer = ps_ctxt->ps_curr_descr->aps_layers[ps_refine_prms->i4_layer_id];
6068
6069
129k
    ps_prev_layer = hme_get_past_layer_ctxt(
6070
129k
        ps_thrd_ctxt, ps_ctxt, ps_refine_prms->i4_layer_id, ps_multi_thrd_ctxt->i4_num_me_frm_pllel);
6071
6072
129k
    num_results_prev_layer = ps_coarse_layer->ps_layer_mvbank->i4_num_mvs_per_ref;
6073
6074
    /* Function pointer is selected based on the C vc X86 macro */
6075
6076
129k
    fp_get_wt_inp = ps_me_optimised_function_list->pf_get_wt_inp_ctb;
6077
6078
129k
    i4_inp_stride = ps_curr_layer->i4_inp_stride;
6079
129k
    i4_pic_wd = ps_curr_layer->i4_wd;
6080
129k
    i4_pic_ht = ps_curr_layer->i4_ht;
6081
129k
    e_search_complexity = ps_refine_prms->e_search_complexity;
6082
129k
    end_of_frame = 0;
6083
6084
    /* This points to all the initial candts */
6085
129k
    ps_search_candts = &as_search_candts[0];
6086
6087
    /* mv grid being huge strucutre is part of context */
6088
129k
    aps_mv_grid[0] = &ps_ctxt->as_mv_grid[0];
6089
129k
    aps_mv_grid[1] = &ps_ctxt->as_mv_grid[1];
6090
6091
    /*************************************************************************/
6092
    /* If the current layer is encoded (since it may be multicast or final   */
6093
    /* layer (finest)), then we use 16x16 blk size with some selected parts  */
6094
    /* If the current layer is not encoded, then we use 8x8 blk size, with   */
6095
    /* enable or disable of 4x4 partitions depending on the input prms       */
6096
    /*************************************************************************/
6097
129k
    e_search_blk_size = BLK_16x16;
6098
129k
    blk_wd = blk_ht = 16;
6099
129k
    blk_size_shift = 4;
6100
129k
    e_result_blk_size = BLK_8x8;
6101
129k
    s_mv_update_prms.i4_shift = 1;
6102
6103
129k
    if(ps_coarse_layer->ps_layer_mvbank->e_blk_size == BLK_4x4)
6104
59.0k
    {
6105
59.0k
        blk_4x4_to_16x16 = 1;
6106
59.0k
    }
6107
70.6k
    else
6108
70.6k
    {
6109
70.6k
        blk_4x4_to_16x16 = 0;
6110
70.6k
    }
6111
6112
129k
    unit_size = 1 << ps_ctxt->log_ctb_size;
6113
129k
    s_search_prms_blk.i4_inp_stride = unit_size;
6114
6115
    /* This is required to properly update the layer mv bank */
6116
129k
    s_mv_update_prms.e_search_blk_size = e_search_blk_size;
6117
129k
    s_search_prms_blk.e_blk_size = e_search_blk_size;
6118
6119
    /*************************************************************************/
6120
    /* If current layer is explicit, then the number of ref frames are to    */
6121
    /* be same as previous layer. Else it will be 2                          */
6122
    /*************************************************************************/
6123
129k
    i4_num_ref_prev_layer = ps_coarse_layer->ps_layer_mvbank->i4_num_ref;
6124
129k
    i4_num_pred_dir =
6125
129k
        (ps_ctxt->s_frm_prms.bidir_enabled && (i4_num_act_ref_l0 > 0) && (i4_num_act_ref_l1 > 0)) +
6126
129k
        1;
6127
6128
129k
#if USE_MODIFIED == 1
6129
129k
    s_search_prms_blk.pf_mv_cost_compute = compute_mv_cost_implicit_high_speed_modified;
6130
#else
6131
    s_search_prms_blk.pf_mv_cost_compute = compute_mv_cost_implicit_high_speed;
6132
#endif
6133
6134
129k
    i4_num_pred_dir = MIN(i4_num_pred_dir, i4_num_ref_prev_layer);
6135
129k
    if(i4_num_ref_prev_layer <= 2)
6136
102k
    {
6137
102k
        i4_num_ref_each_dir = 1;
6138
102k
    }
6139
27.3k
    else
6140
27.3k
    {
6141
27.3k
        i4_num_ref_each_dir = i4_num_ref_prev_layer >> 1;
6142
27.3k
    }
6143
6144
129k
    s_mv_update_prms.i4_num_ref = i4_num_pred_dir;
6145
129k
    s_mv_update_prms.i4_num_results_to_store =
6146
129k
        MIN((ps_ctxt->s_frm_prms.bidir_enabled) ? ps_curr_layer->ps_layer_mvbank->i4_num_mvs_per_ref
6147
129k
                                                : (i4_num_act_ref_l0 > 1) + 1,
6148
129k
            ps_refine_prms->i4_num_results_per_part);
6149
6150
    /*************************************************************************/
6151
    /* Initialization of merge params for 16x16 to 32x32 merge.              */
6152
    /* There are 4 32x32 units in a CTB, so 4 param structures initialized   */
6153
    /*************************************************************************/
6154
129k
    {
6155
129k
        hme_merge_prms_t *aps_merge_prms[4];
6156
129k
        aps_merge_prms[0] = &s_merge_prms_32x32_tl;
6157
129k
        aps_merge_prms[1] = &s_merge_prms_32x32_tr;
6158
129k
        aps_merge_prms[2] = &s_merge_prms_32x32_bl;
6159
129k
        aps_merge_prms[3] = &s_merge_prms_32x32_br;
6160
648k
        for(i = 0; i < 4; i++)
6161
518k
        {
6162
518k
            hme_merge_prms_init(
6163
518k
                aps_merge_prms[i],
6164
518k
                ps_curr_layer,
6165
518k
                ps_refine_prms,
6166
518k
                ps_ctxt,
6167
518k
                as_range_prms_rec,
6168
518k
                as_range_prms_inp,
6169
518k
                &aps_mv_grid[0],
6170
518k
                &s_common_frm_prms,
6171
518k
                i4_num_pred_dir,
6172
518k
                i,
6173
518k
                BLK_32x32,
6174
518k
                e_me_quality_presets);
6175
518k
        }
6176
129k
    }
6177
6178
    /*************************************************************************/
6179
    /* Initialization of merge params for 32x32 to 64x64 merge.              */
6180
    /* There are 4 32x32 units in a CTB, so only 1 64x64 CU can be in CTB    */
6181
    /*************************************************************************/
6182
129k
    {
6183
129k
        hme_merge_prms_init(
6184
129k
            &s_merge_prms_64x64,
6185
129k
            ps_curr_layer,
6186
129k
            ps_refine_prms,
6187
129k
            ps_ctxt,
6188
129k
            as_range_prms_rec,
6189
129k
            as_range_prms_inp,
6190
129k
            &aps_mv_grid[0],
6191
129k
            &s_common_frm_prms,
6192
129k
            i4_num_pred_dir,
6193
129k
            0,
6194
129k
            BLK_64x64,
6195
129k
            e_me_quality_presets);
6196
129k
    }
6197
6198
    /* Pointers to cu_results are initialised here */
6199
129k
    {
6200
129k
        WORD32 i;
6201
6202
129k
        ps_ctxt->s_search_results_64x64.ps_cu_results = &ps_ctxt->s_cu64x64_results;
6203
6204
648k
        for(i = 0; i < 4; i++)
6205
518k
        {
6206
518k
            ps_ctxt->as_search_results_32x32[i].ps_cu_results = &ps_ctxt->as_cu32x32_results[i];
6207
518k
        }
6208
6209
2.20M
        for(i = 0; i < 16; i++)
6210
2.07M
        {
6211
2.07M
            ps_ctxt->as_search_results_16x16[i].ps_cu_results = &ps_ctxt->as_cu16x16_results[i];
6212
2.07M
        }
6213
129k
    }
6214
6215
    /*************************************************************************/
6216
    /* SUBPEL Params initialized here                                        */
6217
    /*************************************************************************/
6218
129k
    {
6219
129k
        s_subpel_prms.ps_search_results_16x16 = &ps_ctxt->as_search_results_16x16[0];
6220
129k
        s_subpel_prms.ps_search_results_32x32 = &ps_ctxt->as_search_results_32x32[0];
6221
129k
        s_subpel_prms.ps_search_results_64x64 = &ps_ctxt->s_search_results_64x64;
6222
6223
129k
        s_subpel_prms.i4_num_16x16_candts = ps_refine_prms->i4_num_fpel_results;
6224
129k
        s_subpel_prms.i4_num_32x32_candts = ps_refine_prms->i4_num_32x32_merge_results;
6225
129k
        s_subpel_prms.i4_num_64x64_candts = ps_refine_prms->i4_num_64x64_merge_results;
6226
6227
129k
        s_subpel_prms.i4_num_steps_hpel_refine = ps_refine_prms->i4_num_steps_hpel_refine;
6228
129k
        s_subpel_prms.i4_num_steps_qpel_refine = ps_refine_prms->i4_num_steps_qpel_refine;
6229
6230
129k
        s_subpel_prms.i4_use_satd = ps_refine_prms->i4_use_satd_subpel;
6231
6232
129k
        s_subpel_prms.i4_inp_stride = unit_size;
6233
6234
129k
        s_subpel_prms.u1_max_subpel_candts_2Nx2N = ps_refine_prms->u1_max_subpel_candts_2Nx2N;
6235
129k
        s_subpel_prms.u1_max_subpel_candts_NxN = ps_refine_prms->u1_max_subpel_candts_NxN;
6236
129k
        s_subpel_prms.u1_subpel_candt_threshold = ps_refine_prms->u1_subpel_candt_threshold;
6237
6238
129k
        s_subpel_prms.pf_qpel_interp = ps_me_optimised_function_list->pf_qpel_interp_avg_generic;
6239
6240
129k
        {
6241
129k
            WORD32 ref_ctr;
6242
1.68M
            for(ref_ctr = 0; ref_ctr < MAX_NUM_REF; ref_ctr++)
6243
1.55M
            {
6244
1.55M
                s_subpel_prms.aps_mv_range_hpel[ref_ctr] = &as_range_prms_hpel[ref_ctr];
6245
1.55M
                s_subpel_prms.aps_mv_range_qpel[ref_ctr] = &as_range_prms_qpel[ref_ctr];
6246
1.55M
            }
6247
129k
        }
6248
129k
        s_subpel_prms.pi2_inp_bck = ps_ctxt->pi2_inp_bck;
6249
6250
#if USE_MODIFIED == 0
6251
        s_subpel_prms.pf_mv_cost_compute = compute_mv_cost_implicit_high_speed;
6252
#else
6253
129k
        s_subpel_prms.pf_mv_cost_compute = compute_mv_cost_implicit_high_speed_modified;
6254
129k
#endif
6255
129k
        s_subpel_prms.e_me_quality_presets = e_me_quality_presets;
6256
6257
        /* BI Refinement done only if this field is 1 */
6258
129k
        s_subpel_prms.bidir_enabled = ps_refine_prms->bidir_enabled;
6259
6260
129k
        s_subpel_prms.u1_num_ref = ps_ctxt->num_ref_future + ps_ctxt->num_ref_past;
6261
6262
129k
        s_subpel_prms.i4_num_act_ref_l0 = ps_ctxt->s_frm_prms.u1_num_active_ref_l0;
6263
129k
        s_subpel_prms.i4_num_act_ref_l1 = ps_ctxt->s_frm_prms.u1_num_active_ref_l1;
6264
129k
        s_subpel_prms.u1_max_num_subpel_refine_centers =
6265
129k
            ps_refine_prms->u1_max_num_subpel_refine_centers;
6266
129k
    }
6267
6268
    /* inter_ctb_prms_t struct initialisation */
6269
129k
    {
6270
129k
        inter_ctb_prms_t *ps_inter_ctb_prms = &s_common_frm_prms;
6271
129k
        hme_subpel_prms_t *ps_subpel_prms = &s_subpel_prms;
6272
6273
129k
        ps_inter_ctb_prms->pps_rec_list_l0 = ps_ctxt->ps_hme_ref_map->pps_rec_list_l0;
6274
129k
        ps_inter_ctb_prms->pps_rec_list_l1 = ps_ctxt->ps_hme_ref_map->pps_rec_list_l1;
6275
129k
        ps_inter_ctb_prms->wpred_log_wdc = ps_ctxt->s_wt_pred.wpred_log_wdc;
6276
129k
        ps_inter_ctb_prms->u1_max_tr_depth = ps_thrd_ctxt->s_init_prms.u1_max_tr_depth;
6277
129k
        ps_inter_ctb_prms->i1_quality_preset = e_me_quality_presets;
6278
129k
        ps_inter_ctb_prms->i4_bidir_enabled = ps_subpel_prms->bidir_enabled;
6279
129k
        ps_inter_ctb_prms->i4_inp_stride = ps_subpel_prms->i4_inp_stride;
6280
129k
        ps_inter_ctb_prms->u1_num_ref = ps_subpel_prms->u1_num_ref;
6281
129k
        ps_inter_ctb_prms->u1_use_satd = ps_subpel_prms->i4_use_satd;
6282
129k
        ps_inter_ctb_prms->i4_rec_stride = ps_curr_layer->i4_rec_stride;
6283
129k
        ps_inter_ctb_prms->u1_num_active_ref_l0 = ps_ctxt->s_frm_prms.u1_num_active_ref_l0;
6284
129k
        ps_inter_ctb_prms->u1_num_active_ref_l1 = ps_ctxt->s_frm_prms.u1_num_active_ref_l1;
6285
129k
        ps_inter_ctb_prms->i4_lamda = lambda_recon;
6286
129k
        ps_inter_ctb_prms->u1_lamda_qshift = ps_refine_prms->lambda_q_shift;
6287
129k
        ps_inter_ctb_prms->i4_qstep_ls8 = ps_ctxt->ps_hme_frm_prms->qstep_ls8;
6288
129k
        ps_inter_ctb_prms->pi4_inv_wt = ps_ctxt->s_wt_pred.a_inv_wpred_wt;
6289
129k
        ps_inter_ctb_prms->pi1_past_list = ps_ctxt->ai1_past_list;
6290
129k
        ps_inter_ctb_prms->pi1_future_list = ps_ctxt->ai1_future_list;
6291
129k
        ps_inter_ctb_prms->pu4_src_variance = s_search_prms_blk.au4_src_variance;
6292
129k
        ps_inter_ctb_prms->u1_max_2nx2n_tu_recur_cands =
6293
129k
            ps_refine_prms->u1_max_2nx2n_tu_recur_cands;
6294
129k
    }
6295
6296
7.90M
    for(i = 0; i < MAX_INIT_CANDTS; i++)
6297
7.78M
    {
6298
7.78M
        ps_search_candts[i].ps_search_node = &ps_ctxt->s_init_search_node[i];
6299
7.78M
        ps_search_candts[i].ps_search_node->ps_mv = &ps_ctxt->as_search_cand_mv[i];
6300
6301
7.78M
        INIT_SEARCH_NODE(ps_search_candts[i].ps_search_node, 0);
6302
7.78M
    }
6303
129k
    num_act_ref_pics =
6304
129k
        ps_ctxt->s_frm_prms.u1_num_active_ref_l0 + ps_ctxt->s_frm_prms.u1_num_active_ref_l1;
6305
6306
129k
    if(num_act_ref_pics)
6307
129k
    {
6308
129k
        hme_search_cand_data_init(
6309
129k
            ai4_id_Z,
6310
129k
            ai4_id_coloc,
6311
129k
            ai4_num_coloc_cands,
6312
129k
            au1_search_candidate_list_index,
6313
129k
            i4_num_act_ref_l0,
6314
129k
            i4_num_act_ref_l1,
6315
129k
            ps_ctxt->s_frm_prms.bidir_enabled,
6316
129k
            blk_4x4_to_16x16);
6317
129k
    }
6318
6319
129k
    if(!ps_ctxt->s_frm_prms.bidir_enabled && (i4_num_act_ref_l0 > 1))
6320
53.7k
    {
6321
53.7k
        ps_search_candts[ai4_id_Z[0]].ps_search_node->i1_ref_idx = ps_ctxt->ai1_past_list[0];
6322
53.7k
        ps_search_candts[ai4_id_Z[1]].ps_search_node->i1_ref_idx = ps_ctxt->ai1_past_list[1];
6323
53.7k
    }
6324
75.9k
    else if(!ps_ctxt->s_frm_prms.bidir_enabled && (i4_num_act_ref_l0 == 1))
6325
44.9k
    {
6326
44.9k
        ps_search_candts[ai4_id_Z[0]].ps_search_node->i1_ref_idx = ps_ctxt->ai1_past_list[0];
6327
44.9k
    }
6328
6329
518k
    for(i = 0; i < 3; i++)
6330
389k
    {
6331
389k
        search_node_t *ps_search_node;
6332
389k
        ps_search_node = &as_left_neighbours[i];
6333
389k
        INIT_SEARCH_NODE(ps_search_node, 0);
6334
389k
        ps_search_node = &as_top_neighbours[i];
6335
389k
        INIT_SEARCH_NODE(ps_search_node, 0);
6336
389k
    }
6337
6338
129k
    INIT_SEARCH_NODE(&as_top_neighbours[3], 0);
6339
129k
    as_left_neighbours[2].u1_is_avail = 0;
6340
6341
    /*************************************************************************/
6342
    /* Initialize all the search results structure here. We update all the   */
6343
    /* search results to default values, and configure things like blk sizes */
6344
    /*************************************************************************/
6345
129k
    if(num_act_ref_pics)
6346
129k
    {
6347
129k
        S32 i4_x, i4_y;
6348
        /* 16x16 results */
6349
2.20M
        for(i = 0; i < 16; i++)
6350
2.07M
        {
6351
2.07M
            search_results_t *ps_search_results;
6352
2.07M
            S32 pred_lx;
6353
2.07M
            ps_search_results = &ps_ctxt->as_search_results_16x16[i];
6354
2.07M
            i4_x = (S32)gau1_encode_to_raster_x[i];
6355
2.07M
            i4_y = (S32)gau1_encode_to_raster_y[i];
6356
2.07M
            i4_x <<= 4;
6357
2.07M
            i4_y <<= 4;
6358
6359
2.07M
            hme_init_search_results(
6360
2.07M
                ps_search_results,
6361
2.07M
                i4_num_pred_dir,
6362
2.07M
                ps_refine_prms->i4_num_fpel_results,
6363
2.07M
                ps_refine_prms->i4_num_results_per_part,
6364
2.07M
                e_search_blk_size,
6365
2.07M
                i4_x,
6366
2.07M
                i4_y,
6367
2.07M
                &ps_ctxt->au1_is_past[0]);
6368
6369
6.22M
            for(pred_lx = 0; pred_lx < 2; pred_lx++)
6370
4.14M
            {
6371
4.14M
                pred_ctxt_t *ps_pred_ctxt;
6372
6373
4.14M
                ps_pred_ctxt = &ps_search_results->as_pred_ctxt[pred_lx];
6374
6375
4.14M
                hme_init_pred_ctxt_encode(
6376
4.14M
                    ps_pred_ctxt,
6377
4.14M
                    ps_search_results,
6378
4.14M
                    ps_search_candts[ai4_id_coloc[0]].ps_search_node,
6379
4.14M
                    ps_search_candts[ai4_id_Z[0]].ps_search_node,
6380
4.14M
                    aps_mv_grid[pred_lx],
6381
4.14M
                    pred_lx,
6382
4.14M
                    lambda_recon,
6383
4.14M
                    ps_refine_prms->lambda_q_shift,
6384
4.14M
                    &ps_ctxt->apu1_ref_bits_tlu_lc[0],
6385
4.14M
                    &ps_ctxt->ai2_ref_scf[0]);
6386
4.14M
            }
6387
2.07M
        }
6388
6389
648k
        for(i = 0; i < 4; i++)
6390
518k
        {
6391
518k
            search_results_t *ps_search_results;
6392
518k
            S32 pred_lx;
6393
518k
            ps_search_results = &ps_ctxt->as_search_results_32x32[i];
6394
6395
518k
            i4_x = (S32)gau1_encode_to_raster_x[i];
6396
518k
            i4_y = (S32)gau1_encode_to_raster_y[i];
6397
518k
            i4_x <<= 5;
6398
518k
            i4_y <<= 5;
6399
6400
518k
            hme_init_search_results(
6401
518k
                ps_search_results,
6402
518k
                i4_num_pred_dir,
6403
518k
                ps_refine_prms->i4_num_32x32_merge_results,
6404
518k
                ps_refine_prms->i4_num_results_per_part,
6405
518k
                BLK_32x32,
6406
518k
                i4_x,
6407
518k
                i4_y,
6408
518k
                &ps_ctxt->au1_is_past[0]);
6409
6410
1.55M
            for(pred_lx = 0; pred_lx < 2; pred_lx++)
6411
1.03M
            {
6412
1.03M
                pred_ctxt_t *ps_pred_ctxt;
6413
6414
1.03M
                ps_pred_ctxt = &ps_search_results->as_pred_ctxt[pred_lx];
6415
6416
1.03M
                hme_init_pred_ctxt_encode(
6417
1.03M
                    ps_pred_ctxt,
6418
1.03M
                    ps_search_results,
6419
1.03M
                    ps_search_candts[ai4_id_coloc[0]].ps_search_node,
6420
1.03M
                    ps_search_candts[ai4_id_Z[0]].ps_search_node,
6421
1.03M
                    aps_mv_grid[pred_lx],
6422
1.03M
                    pred_lx,
6423
1.03M
                    lambda_recon,
6424
1.03M
                    ps_refine_prms->lambda_q_shift,
6425
1.03M
                    &ps_ctxt->apu1_ref_bits_tlu_lc[0],
6426
1.03M
                    &ps_ctxt->ai2_ref_scf[0]);
6427
1.03M
            }
6428
518k
        }
6429
6430
129k
        {
6431
129k
            search_results_t *ps_search_results;
6432
129k
            S32 pred_lx;
6433
129k
            ps_search_results = &ps_ctxt->s_search_results_64x64;
6434
6435
129k
            hme_init_search_results(
6436
129k
                ps_search_results,
6437
129k
                i4_num_pred_dir,
6438
129k
                ps_refine_prms->i4_num_64x64_merge_results,
6439
129k
                ps_refine_prms->i4_num_results_per_part,
6440
129k
                BLK_64x64,
6441
129k
                0,
6442
129k
                0,
6443
129k
                &ps_ctxt->au1_is_past[0]);
6444
6445
389k
            for(pred_lx = 0; pred_lx < 2; pred_lx++)
6446
259k
            {
6447
259k
                pred_ctxt_t *ps_pred_ctxt;
6448
6449
259k
                ps_pred_ctxt = &ps_search_results->as_pred_ctxt[pred_lx];
6450
6451
259k
                hme_init_pred_ctxt_encode(
6452
259k
                    ps_pred_ctxt,
6453
259k
                    ps_search_results,
6454
259k
                    ps_search_candts[ai4_id_coloc[0]].ps_search_node,
6455
259k
                    ps_search_candts[ai4_id_Z[0]].ps_search_node,
6456
259k
                    aps_mv_grid[pred_lx],
6457
259k
                    pred_lx,
6458
259k
                    lambda_recon,
6459
259k
                    ps_refine_prms->lambda_q_shift,
6460
259k
                    &ps_ctxt->apu1_ref_bits_tlu_lc[0],
6461
259k
                    &ps_ctxt->ai2_ref_scf[0]);
6462
259k
            }
6463
129k
        }
6464
129k
    }
6465
6466
    /* Initialise the structure used in clustering  */
6467
129k
    if(ME_PRISTINE_QUALITY == e_me_quality_presets)
6468
27.5k
    {
6469
27.5k
        ps_ctb_cluster_info = ps_ctxt->ps_ctb_cluster_info;
6470
6471
27.5k
        ps_ctb_cluster_info->ps_16x16_blk = ps_ctxt->ps_blk_16x16;
6472
27.5k
        ps_ctb_cluster_info->ps_32x32_blk = ps_ctxt->ps_blk_32x32;
6473
27.5k
        ps_ctb_cluster_info->ps_64x64_blk = ps_ctxt->ps_blk_64x64;
6474
27.5k
        ps_ctb_cluster_info->pi4_blk_8x8_mask = ai4_blk_8x8_mask;
6475
27.5k
        ps_ctb_cluster_info->sdi_threshold = ps_refine_prms->sdi_threshold;
6476
27.5k
        ps_ctb_cluster_info->i4_frame_qstep = ps_ctxt->frm_qstep;
6477
27.5k
        ps_ctb_cluster_info->i4_frame_qstep_multiplier = 16;
6478
27.5k
    }
6479
6480
    /*********************************************************************/
6481
    /* Initialize the dyn. search range params. for each reference index */
6482
    /* in current layer ctxt                                             */
6483
    /*********************************************************************/
6484
6485
    /* Only for P pic. For P, both are 0, I&B has them mut. exclusive */
6486
129k
    if(ps_ctxt->s_frm_prms.is_i_pic == ps_ctxt->s_frm_prms.bidir_enabled)
6487
98.7k
    {
6488
98.7k
        WORD32 ref_ctr;
6489
        /* set no. of act ref in L0 for further use at frame level */
6490
98.7k
        ps_ctxt->as_l0_dyn_range_prms[i4_idx_dvsr_p].i4_num_act_ref_in_l0 =
6491
98.7k
            ps_ctxt->s_frm_prms.u1_num_active_ref_l0;
6492
6493
281k
        for(ref_ctr = 0; ref_ctr < ps_ctxt->s_frm_prms.u1_num_active_ref_l0; ref_ctr++)
6494
182k
        {
6495
182k
            INIT_DYN_SEARCH_PRMS(
6496
182k
                &ps_ctxt->as_l0_dyn_range_prms[i4_idx_dvsr_p].as_dyn_range_prms[ref_ctr],
6497
182k
                ps_ctxt->ai4_ref_idx_to_poc_lc[ref_ctr]);
6498
182k
        }
6499
98.7k
    }
6500
    /*************************************************************************/
6501
    /* Now that the candidates have been ordered, to choose the right number */
6502
    /* of initial candidates.                                                */
6503
    /*************************************************************************/
6504
129k
    if(blk_4x4_to_16x16)
6505
59.0k
    {
6506
59.0k
        if(i4_num_ref_prev_layer > 2)
6507
27.2k
        {
6508
27.2k
            if(e_search_complexity == SEARCH_CX_LOW)
6509
0
                num_init_candts = 7 * (!ps_ctxt->s_frm_prms.bidir_enabled + 1);
6510
27.2k
            else if(e_search_complexity == SEARCH_CX_MED)
6511
27.2k
                num_init_candts = 14 * (!ps_ctxt->s_frm_prms.bidir_enabled + 1);
6512
0
            else if(e_search_complexity == SEARCH_CX_HIGH)
6513
0
                num_init_candts = 21 * (!ps_ctxt->s_frm_prms.bidir_enabled + 1);
6514
0
            else
6515
0
                ASSERT(0);
6516
27.2k
        }
6517
31.7k
        else if(i4_num_ref_prev_layer == 2)
6518
21.4k
        {
6519
21.4k
            if(e_search_complexity == SEARCH_CX_LOW)
6520
0
                num_init_candts = 5 * (!ps_ctxt->s_frm_prms.bidir_enabled + 1);
6521
21.4k
            else if(e_search_complexity == SEARCH_CX_MED)
6522
21.4k
                num_init_candts = 12 * (!ps_ctxt->s_frm_prms.bidir_enabled + 1);
6523
0
            else if(e_search_complexity == SEARCH_CX_HIGH)
6524
0
                num_init_candts = 19 * (!ps_ctxt->s_frm_prms.bidir_enabled + 1);
6525
0
            else
6526
0
                ASSERT(0);
6527
21.4k
        }
6528
10.2k
        else
6529
10.2k
        {
6530
10.2k
            if(e_search_complexity == SEARCH_CX_LOW)
6531
0
                num_init_candts = 5;
6532
10.2k
            else if(e_search_complexity == SEARCH_CX_MED)
6533
10.2k
                num_init_candts = 12;
6534
0
            else if(e_search_complexity == SEARCH_CX_HIGH)
6535
0
                num_init_candts = 19;
6536
0
            else
6537
0
                ASSERT(0);
6538
10.2k
        }
6539
59.0k
    }
6540
70.6k
    else
6541
70.6k
    {
6542
70.6k
        if(i4_num_ref_prev_layer > 2)
6543
123
        {
6544
123
            if(e_search_complexity == SEARCH_CX_LOW)
6545
0
                num_init_candts = 7 * (!ps_ctxt->s_frm_prms.bidir_enabled + 1);
6546
123
            else if(e_search_complexity == SEARCH_CX_MED)
6547
123
                num_init_candts = 13 * (!ps_ctxt->s_frm_prms.bidir_enabled + 1);
6548
0
            else if(e_search_complexity == SEARCH_CX_HIGH)
6549
0
                num_init_candts = 18 * (!ps_ctxt->s_frm_prms.bidir_enabled + 1);
6550
0
            else
6551
0
                ASSERT(0);
6552
123
        }
6553
70.5k
        else if(i4_num_ref_prev_layer == 2)
6554
35.2k
        {
6555
35.2k
            if(e_search_complexity == SEARCH_CX_LOW)
6556
18.8k
                num_init_candts = 5 * (!ps_ctxt->s_frm_prms.bidir_enabled + 1);
6557
16.4k
            else if(e_search_complexity == SEARCH_CX_MED)
6558
16.4k
                num_init_candts = 11 * (!ps_ctxt->s_frm_prms.bidir_enabled + 1);
6559
0
            else if(e_search_complexity == SEARCH_CX_HIGH)
6560
0
                num_init_candts = 16 * (!ps_ctxt->s_frm_prms.bidir_enabled + 1);
6561
0
            else
6562
0
                ASSERT(0);
6563
35.2k
        }
6564
35.2k
        else
6565
35.2k
        {
6566
35.2k
            if(e_search_complexity == SEARCH_CX_LOW)
6567
31.9k
                num_init_candts = 5;
6568
3.28k
            else if(e_search_complexity == SEARCH_CX_MED)
6569
3.28k
                num_init_candts = 11;
6570
0
            else if(e_search_complexity == SEARCH_CX_HIGH)
6571
0
                num_init_candts = 16;
6572
0
            else
6573
0
                ASSERT(0);
6574
35.2k
        }
6575
70.6k
    }
6576
6577
    /*************************************************************************/
6578
    /* The following search parameters are fixed throughout the search across*/
6579
    /* all blks. So these are configured outside processing loop             */
6580
    /*************************************************************************/
6581
129k
    s_search_prms_blk.i4_num_init_candts = num_init_candts;
6582
129k
    s_search_prms_blk.i4_start_step = 1;
6583
129k
    s_search_prms_blk.i4_use_satd = 0;
6584
129k
    s_search_prms_blk.i4_num_steps_post_refine = ps_refine_prms->i4_num_steps_post_refine_fpel;
6585
    /* we use recon only for encoded layers, otherwise it is not available */
6586
129k
    s_search_prms_blk.i4_use_rec = ps_refine_prms->i4_encode & ps_refine_prms->i4_use_rec_in_fpel;
6587
6588
129k
    s_search_prms_blk.ps_search_candts = ps_search_candts;
6589
129k
    if(s_search_prms_blk.i4_use_rec)
6590
129k
    {
6591
129k
        WORD32 ref_ctr;
6592
1.68M
        for(ref_ctr = 0; ref_ctr < MAX_NUM_REF; ref_ctr++)
6593
1.55M
            s_search_prms_blk.aps_mv_range[ref_ctr] = &as_range_prms_rec[ref_ctr];
6594
129k
    }
6595
0
    else
6596
0
    {
6597
0
        WORD32 ref_ctr;
6598
0
        for(ref_ctr = 0; ref_ctr < MAX_NUM_REF; ref_ctr++)
6599
0
            s_search_prms_blk.aps_mv_range[ref_ctr] = &as_range_prms_inp[ref_ctr];
6600
0
    }
6601
6602
    /*************************************************************************/
6603
    /* Initialize coordinates. Meaning as follows                            */
6604
    /* blk_x : x coordinate of the 16x16 blk, in terms of number of blks     */
6605
    /* blk_y : same as above, y coord.                                       */
6606
    /* num_blks_in_this_ctb : number of blks in this given ctb that starts   */
6607
    /* at i4_ctb_x, i4_ctb_y. This may not be 16 at picture boundaries.      */
6608
    /* i4_ctb_x, i4_ctb_y: pixel coordinate of the ctb realtive to top left  */
6609
    /* corner of the picture. Always multiple of 64.                         */
6610
    /* blk_id_in_ctb : encode order id of the blk in the ctb.                */
6611
    /*************************************************************************/
6612
129k
    blk_y = 0;
6613
129k
    blk_id_in_ctb = 0;
6614
129k
    i4_ctb_y = 0;
6615
6616
    /*************************************************************************/
6617
    /* Picture limit on all 4 sides. This will be used to set mv limits for  */
6618
    /* every block given its coordinate. Note thsi assumes that the min amt  */
6619
    /* of padding to right of pic is equal to the blk size. If we go all the */
6620
    /* way upto 64x64, then the min padding on right size of picture should  */
6621
    /* be 64, and also on bottom side of picture.                            */
6622
    /*************************************************************************/
6623
129k
    SET_PIC_LIMIT(
6624
129k
        s_pic_limit_inp,
6625
129k
        ps_curr_layer->i4_pad_x_rec,
6626
129k
        ps_curr_layer->i4_pad_y_rec,
6627
129k
        ps_curr_layer->i4_wd,
6628
129k
        ps_curr_layer->i4_ht,
6629
129k
        s_search_prms_blk.i4_num_steps_post_refine);
6630
6631
129k
    SET_PIC_LIMIT(
6632
129k
        s_pic_limit_rec,
6633
129k
        ps_curr_layer->i4_pad_x_rec,
6634
129k
        ps_curr_layer->i4_pad_y_rec,
6635
129k
        ps_curr_layer->i4_wd,
6636
129k
        ps_curr_layer->i4_ht,
6637
129k
        s_search_prms_blk.i4_num_steps_post_refine);
6638
6639
    /*************************************************************************/
6640
    /* set the MV limit per ref. pic.                                        */
6641
    /*    - P pic. : Based on the config params.                             */
6642
    /*    - B/b pic: Based on the Max/Min MV from prev. P and config. param. */
6643
    /*************************************************************************/
6644
129k
    hme_set_mv_limit_using_dvsr_data(
6645
129k
        ps_ctxt, ps_curr_layer, as_mv_limit, &i2_prev_enc_frm_max_mv_y, num_act_ref_pics);
6646
129k
    s_srch_cand_init_data.pu1_num_fpel_search_cands = ps_refine_prms->au1_num_fpel_search_cands;
6647
129k
    s_srch_cand_init_data.i4_num_act_ref_l0 = ps_ctxt->s_frm_prms.u1_num_active_ref_l0;
6648
129k
    s_srch_cand_init_data.i4_num_act_ref_l1 = ps_ctxt->s_frm_prms.u1_num_active_ref_l1;
6649
129k
    s_srch_cand_init_data.ps_coarse_layer = ps_coarse_layer;
6650
129k
    s_srch_cand_init_data.ps_curr_layer = ps_curr_layer;
6651
129k
    s_srch_cand_init_data.i4_max_num_init_cands = num_init_candts;
6652
129k
    s_srch_cand_init_data.ps_search_cands = ps_search_candts;
6653
129k
    s_srch_cand_init_data.u1_num_results_in_mvbank = s_mv_update_prms.i4_num_results_to_store;
6654
129k
    s_srch_cand_init_data.pi4_ref_id_lc_to_l0_map = ps_ctxt->a_ref_idx_lc_to_l0;
6655
129k
    s_srch_cand_init_data.pi4_ref_id_lc_to_l1_map = ps_ctxt->a_ref_idx_lc_to_l1;
6656
129k
    s_srch_cand_init_data.e_search_blk_size = e_search_blk_size;
6657
6658
395k
    while(0 == end_of_frame)
6659
265k
    {
6660
265k
        job_queue_t *ps_job;
6661
265k
        frm_ctb_ctxt_t *ps_frm_ctb_prms;
6662
265k
        ipe_l0_ctb_analyse_for_me_t *ps_cur_ipe_ctb;
6663
6664
265k
        WORD32 i4_max_mv_x_in_ctb;
6665
265k
        WORD32 i4_max_mv_y_in_ctb;
6666
265k
        void *pv_dep_mngr_encloop_dep_me;
6667
265k
        WORD32 offset_val, check_dep_pos, set_dep_pos;
6668
265k
        WORD32 left_ctb_in_diff_tile, i4_first_ctb_x = 0;
6669
6670
265k
        pv_dep_mngr_encloop_dep_me = ps_ctxt->pv_dep_mngr_encloop_dep_me;
6671
6672
265k
        ps_frm_ctb_prms = (frm_ctb_ctxt_t *)ps_thrd_ctxt->pv_ext_frm_prms;
6673
6674
        /* Get the current row from the job queue */
6675
265k
        ps_job = (job_queue_t *)ihevce_enc_grp_get_next_job(
6676
265k
            ps_multi_thrd_ctxt, lyr_job_type, 1, me_frm_id);
6677
6678
        /* If all rows are done, set the end of process flag to 1, */
6679
        /* and the current row to -1 */
6680
265k
        if(NULL == ps_job)
6681
129k
        {
6682
129k
            blk_y = -1;
6683
129k
            i4_ctb_y = -1;
6684
129k
            tile_col_idx = -1;
6685
129k
            end_of_frame = 1;
6686
6687
129k
            continue;
6688
129k
        }
6689
6690
        /* set the output dependency after picking up the row */
6691
136k
        ihevce_enc_grp_job_set_out_dep(ps_multi_thrd_ctxt, ps_job, me_frm_id);
6692
6693
        /* Obtain the current row's details from the job */
6694
136k
        {
6695
136k
            ihevce_tile_params_t *ps_col_tile_params;
6696
6697
136k
            i4_ctb_y = ps_job->s_job_info.s_me_job_info.i4_vert_unit_row_no;
6698
            /* Obtain the current colum tile index from the job */
6699
136k
            tile_col_idx = ps_job->s_job_info.s_me_job_info.i4_tile_col_idx;
6700
6701
            /* in encode layer block are 16x16 and CTB is 64 x 64 */
6702
            /* note if ctb is 32x32 the this calc needs to be changed */
6703
136k
            num_sync_units_in_row = (i4_pic_wd + ((1 << ps_ctxt->log_ctb_size) - 1)) >>
6704
136k
                                    ps_ctxt->log_ctb_size;
6705
6706
            /* The tile parameter for the col. idx. Use only the properties
6707
            which is same for all the bottom tiles like width, start_x, etc.
6708
            Don't use height, start_y, etc.                                  */
6709
136k
            ps_col_tile_params =
6710
136k
                ((ihevce_tile_params_t *)ps_thrd_ctxt->pv_tile_params_base + tile_col_idx);
6711
            /* in encode layer block are 16x16 and CTB is 64 x 64 */
6712
            /* note if ctb is 32x32 the this calc needs to be changed */
6713
136k
            num_sync_units_in_tile =
6714
136k
                (ps_col_tile_params->i4_curr_tile_width + ((1 << ps_ctxt->log_ctb_size) - 1)) >>
6715
136k
                ps_ctxt->log_ctb_size;
6716
6717
136k
            i4_first_ctb_x = ps_col_tile_params->i4_first_ctb_x;
6718
136k
            i4_ctb_x = i4_first_ctb_x;
6719
6720
136k
            if(!num_act_ref_pics)
6721
0
            {
6722
0
                for(i4_ctb_x = i4_first_ctb_x;
6723
0
                    i4_ctb_x < (ps_col_tile_params->i4_first_ctb_x + num_sync_units_in_tile);
6724
0
                    i4_ctb_x++)
6725
0
                {
6726
0
                    S32 blk_i = 0, blk_j = 0;
6727
                    /* set the dependency for the corresponding row in enc loop */
6728
0
                    ihevce_dmgr_set_row_row_sync(
6729
0
                        pv_dep_mngr_encloop_dep_me,
6730
0
                        (i4_ctb_x + 1),
6731
0
                        i4_ctb_y,
6732
0
                        tile_col_idx /* Col Tile No. */);
6733
0
                }
6734
6735
0
                continue;
6736
0
            }
6737
6738
            /* increment the number of rows proc */
6739
136k
            num_rows_proc++;
6740
6741
            /* Set Variables for Dep. Checking and Setting */
6742
136k
            set_dep_pos = i4_ctb_y + 1;
6743
136k
            if(i4_ctb_y > 0)
6744
6.61k
            {
6745
6.61k
                offset_val = 2;
6746
6.61k
                check_dep_pos = i4_ctb_y - 1;
6747
6.61k
            }
6748
129k
            else
6749
129k
            {
6750
                /* First row should run without waiting */
6751
129k
                offset_val = -1;
6752
129k
                check_dep_pos = 0;
6753
129k
            }
6754
6755
            /* row ctb out pointer  */
6756
136k
            ps_ctxt->ps_ctb_analyse_curr_row =
6757
136k
                ps_ctxt->ps_ctb_analyse_base + i4_ctb_y * ps_frm_ctb_prms->i4_num_ctbs_horz;
6758
6759
            /* Row level CU Tree buffer */
6760
136k
            ps_ctxt->ps_cu_tree_curr_row =
6761
136k
                ps_ctxt->ps_cu_tree_base +
6762
136k
                i4_ctb_y * ps_frm_ctb_prms->i4_num_ctbs_horz * MAX_NUM_NODES_CU_TREE;
6763
6764
136k
            ps_ctxt->ps_me_ctb_data_curr_row =
6765
136k
                ps_ctxt->ps_me_ctb_data_base + i4_ctb_y * ps_frm_ctb_prms->i4_num_ctbs_horz;
6766
136k
        }
6767
6768
        /* This flag says the CTB under processing is at the start of tile in horz dir.*/
6769
0
        left_ctb_in_diff_tile = 1;
6770
6771
        /* To make sure no 64-bit overflow happens when inv_wt is multiplied with un-normalized src_var,                                 */
6772
        /* the shift value will be passed onto the functions wherever inv_wt isused so that inv_wt is appropriately shift and multiplied */
6773
136k
        {
6774
136k
            S32 i4_ref_id, i4_bits_req;
6775
6776
410k
            for(i4_ref_id = 0; i4_ref_id < (ps_ctxt->s_frm_prms.u1_num_active_ref_l0 +
6777
410k
                                            ps_ctxt->s_frm_prms.u1_num_active_ref_l1);
6778
273k
                i4_ref_id++)
6779
273k
            {
6780
273k
                GETRANGE(i4_bits_req, ps_ctxt->s_wt_pred.a_inv_wpred_wt[i4_ref_id]);
6781
6782
273k
                if(i4_bits_req > 12)
6783
0
                {
6784
0
                    ps_ctxt->s_wt_pred.ai4_shift_val[i4_ref_id] = (i4_bits_req - 12);
6785
0
                }
6786
273k
                else
6787
273k
                {
6788
273k
                    ps_ctxt->s_wt_pred.ai4_shift_val[i4_ref_id] = 0;
6789
273k
                }
6790
273k
            }
6791
6792
136k
            s_common_frm_prms.pi4_inv_wt_shift_val = ps_ctxt->s_wt_pred.ai4_shift_val;
6793
136k
        }
6794
6795
        /* if non-encode layer then i4_ctb_x will be same as blk_x */
6796
        /* loop over all the units is a row                        */
6797
283k
        for(i4_ctb_x = i4_first_ctb_x; i4_ctb_x < (i4_first_ctb_x + num_sync_units_in_tile);
6798
146k
            i4_ctb_x++)
6799
146k
        {
6800
146k
            ihevce_ctb_noise_params *ps_ctb_noise_params =
6801
146k
                &ps_ctxt->ps_ctb_analyse_curr_row[i4_ctb_x].s_ctb_noise_params;
6802
6803
146k
            s_common_frm_prms.i4_ctb_x_off = i4_ctb_x << 6;
6804
146k
            s_common_frm_prms.i4_ctb_y_off = i4_ctb_y << 6;
6805
6806
146k
            ps_ctxt->s_mc_ctxt.i4_ctb_frm_pos_y = i4_ctb_y << 6;
6807
146k
            ps_ctxt->s_mc_ctxt.i4_ctb_frm_pos_x = i4_ctb_x << 6;
6808
            /* Initialize ptr to current IPE CTB */
6809
146k
            ps_cur_ipe_ctb = ps_ctxt->ps_ipe_l0_ctb_frm_base + i4_ctb_x +
6810
146k
                             i4_ctb_y * ps_frm_ctb_prms->i4_num_ctbs_horz;
6811
146k
            {
6812
146k
                ps_ctb_bound_attrs =
6813
146k
                    get_ctb_attrs(i4_ctb_x << 6, i4_ctb_y << 6, i4_pic_wd, i4_pic_ht, ps_ctxt);
6814
6815
146k
                en_merge_32x32 = ps_ctb_bound_attrs->u1_merge_to_32x32_flag;
6816
146k
                num_blks_in_this_ctb = ps_ctb_bound_attrs->u1_num_blks_in_ctb;
6817
146k
            }
6818
6819
            /* Block to initialise pointers to part_type_results_t */
6820
            /* in each size-specific inter_cu_results_t  */
6821
146k
            {
6822
146k
                WORD32 i;
6823
6824
9.54M
                for(i = 0; i < 64; i++)
6825
9.39M
                {
6826
9.39M
                    ps_ctxt->as_cu8x8_results[i].ps_best_results =
6827
9.39M
                        ps_ctxt->ps_me_ctb_data_curr_row[i4_ctb_x]
6828
9.39M
                            .as_8x8_block_data[i]
6829
9.39M
                            .as_best_results;
6830
9.39M
                    ps_ctxt->as_cu8x8_results[i].u1_num_best_results = 0;
6831
9.39M
                }
6832
6833
2.49M
                for(i = 0; i < 16; i++)
6834
2.34M
                {
6835
2.34M
                    ps_ctxt->as_cu16x16_results[i].ps_best_results =
6836
2.34M
                        ps_ctxt->ps_me_ctb_data_curr_row[i4_ctb_x].as_block_data[i].as_best_results;
6837
2.34M
                    ps_ctxt->as_cu16x16_results[i].u1_num_best_results = 0;
6838
2.34M
                }
6839
6840
733k
                for(i = 0; i < 4; i++)
6841
587k
                {
6842
587k
                    ps_ctxt->as_cu32x32_results[i].ps_best_results =
6843
587k
                        ps_ctxt->ps_me_ctb_data_curr_row[i4_ctb_x]
6844
587k
                            .as_32x32_block_data[i]
6845
587k
                            .as_best_results;
6846
587k
                    ps_ctxt->as_cu32x32_results[i].u1_num_best_results = 0;
6847
587k
                }
6848
6849
146k
                ps_ctxt->s_cu64x64_results.ps_best_results =
6850
146k
                    ps_ctxt->ps_me_ctb_data_curr_row[i4_ctb_x].s_64x64_block_data.as_best_results;
6851
146k
                ps_ctxt->s_cu64x64_results.u1_num_best_results = 0;
6852
146k
            }
6853
6854
146k
            if(ME_PRISTINE_QUALITY == e_me_quality_presets)
6855
32.5k
            {
6856
32.5k
                ps_ctb_cluster_info->blk_32x32_mask = en_merge_32x32;
6857
32.5k
                ps_ctb_cluster_info->ps_cur_ipe_ctb = ps_cur_ipe_ctb;
6858
32.5k
                ps_ctb_cluster_info->ps_cu_tree_root =
6859
32.5k
                    ps_ctxt->ps_cu_tree_curr_row + (i4_ctb_x * MAX_NUM_NODES_CU_TREE);
6860
32.5k
                ps_ctb_cluster_info->nodes_created_in_cu_tree = 1;
6861
32.5k
            }
6862
6863
146k
            if(ME_PRISTINE_QUALITY != e_me_quality_presets)
6864
114k
            {
6865
114k
                S32 i4_nodes_created_in_cu_tree = 1;
6866
6867
114k
                ihevce_cu_tree_init(
6868
114k
                    (ps_ctxt->ps_cu_tree_curr_row + (i4_ctb_x * MAX_NUM_NODES_CU_TREE)),
6869
114k
                    (ps_ctxt->ps_cu_tree_curr_row + (i4_ctb_x * MAX_NUM_NODES_CU_TREE)),
6870
114k
                    &i4_nodes_created_in_cu_tree,
6871
114k
                    0,
6872
114k
                    POS_NA,
6873
114k
                    POS_NA,
6874
114k
                    POS_NA);
6875
114k
            }
6876
6877
146k
            memset(ai4_blk_8x8_mask, 0, 16 * sizeof(S32));
6878
6879
146k
            if(ps_refine_prms->u1_use_lambda_derived_from_min_8x8_act_in_ctb)
6880
40.1k
            {
6881
40.1k
                S32 j;
6882
6883
40.1k
                ipe_l0_ctb_analyse_for_me_t *ps_cur_ipe_ctb;
6884
6885
40.1k
                ps_cur_ipe_ctb =
6886
40.1k
                    ps_ctxt->ps_ipe_l0_ctb_frm_base + i4_ctb_x + i4_ctb_y * num_sync_units_in_row;
6887
40.1k
                lambda_recon =
6888
40.1k
                    hme_recompute_lambda_from_min_8x8_act_in_ctb(ps_ctxt, ps_cur_ipe_ctb);
6889
6890
40.1k
                lambda_recon = ((float)lambda_recon * (100.0f - ME_LAMBDA_DISCOUNT) / 100.0f);
6891
6892
200k
                for(i = 0; i < 4; i++)
6893
160k
                {
6894
160k
                    ps_search_results = &ps_ctxt->as_search_results_32x32[i];
6895
6896
482k
                    for(j = 0; j < 2; j++)
6897
321k
                    {
6898
321k
                        ps_search_results->as_pred_ctxt[j].lambda = lambda_recon;
6899
321k
                    }
6900
160k
                }
6901
40.1k
                ps_search_results = &ps_ctxt->s_search_results_64x64;
6902
6903
120k
                for(j = 0; j < 2; j++)
6904
80.3k
                {
6905
80.3k
                    ps_search_results->as_pred_ctxt[j].lambda = lambda_recon;
6906
80.3k
                }
6907
6908
40.1k
                s_common_frm_prms.i4_lamda = lambda_recon;
6909
40.1k
            }
6910
106k
            else
6911
106k
            {
6912
106k
                lambda_recon = ps_refine_prms->lambda_recon;
6913
106k
            }
6914
6915
            /*********************************************************************/
6916
            /* replicate the inp buffer at blk or ctb level for each ref id,     */
6917
            /* Instead of searching with wk * ref(k), we search with Ik = I / wk */
6918
            /* thereby avoiding a bloat up of memory. If we did all references   */
6919
            /* weighted pred, we will end up with a duplicate copy of each ref   */
6920
            /* at each layer, since we need to preserve the original reference.  */
6921
            /* ToDo: Need to observe performance with this mechanism and compare */
6922
            /* with case where ref is weighted.                                  */
6923
            /*********************************************************************/
6924
146k
            fp_get_wt_inp(
6925
146k
                ps_curr_layer,
6926
146k
                &ps_ctxt->s_wt_pred,
6927
146k
                unit_size,
6928
146k
                s_common_frm_prms.i4_ctb_x_off,
6929
146k
                s_common_frm_prms.i4_ctb_y_off,
6930
146k
                unit_size,
6931
146k
                ps_ctxt->num_ref_future + ps_ctxt->num_ref_past,
6932
146k
                ps_ctxt->i4_wt_pred_enable_flag);
6933
6934
146k
            if(ps_thrd_ctxt->s_init_prms.u1_is_stasino_enabled)
6935
0
            {
6936
0
#if TEMPORAL_NOISE_DETECT
6937
0
                {
6938
0
                    WORD32 had_block_size = 16;
6939
0
                    WORD32 ctb_width = ((i4_pic_wd - s_common_frm_prms.i4_ctb_x_off) >= 64)
6940
0
                                           ? 64
6941
0
                                           : i4_pic_wd - s_common_frm_prms.i4_ctb_x_off;
6942
0
                    WORD32 ctb_height = ((i4_pic_ht - s_common_frm_prms.i4_ctb_y_off) >= 64)
6943
0
                                            ? 64
6944
0
                                            : i4_pic_ht - s_common_frm_prms.i4_ctb_y_off;
6945
0
                    WORD32 num_pred_dir = i4_num_pred_dir;
6946
0
                    WORD32 i4_x_off = s_common_frm_prms.i4_ctb_x_off;
6947
0
                    WORD32 i4_y_off = s_common_frm_prms.i4_ctb_y_off;
6948
6949
0
                    WORD32 i;
6950
0
                    WORD32 noise_detected;
6951
0
                    WORD32 ctb_size;
6952
0
                    WORD32 num_comp_had_blocks;
6953
0
                    WORD32 noisy_block_cnt;
6954
0
                    WORD32 index_8x8_block;
6955
0
                    WORD32 num_8x8_in_ctb_row;
6956
6957
0
                    WORD32 ht_offset;
6958
0
                    WORD32 wd_offset;
6959
0
                    WORD32 block_ht;
6960
0
                    WORD32 block_wd;
6961
6962
0
                    WORD32 num_horz_blocks;
6963
0
                    WORD32 num_vert_blocks;
6964
6965
0
                    WORD32 mean;
6966
0
                    UWORD32 variance_8x8;
6967
6968
0
                    WORD32 hh_energy_percent;
6969
6970
                    /* variables to hold the constant values. The variable values held are decided by the HAD block size */
6971
0
                    WORD32 min_noisy_block_cnt;
6972
0
                    WORD32 min_coeffs_above_avg;
6973
0
                    WORD32 min_coeff_avg_energy;
6974
6975
                    /* to store the mean and variance of each 8*8 block and find the variance of any higher block sizes later on. block */
6976
0
                    WORD32 i4_cu_x_off, i4_cu_y_off;
6977
0
                    WORD32 is_noisy;
6978
6979
                    /* intialise the variables holding the constants */
6980
0
                    if(had_block_size == 8)
6981
0
                    {
6982
0
                        min_noisy_block_cnt = MIN_NOISY_BLOCKS_CNT_8x8;  //6;//
6983
0
                        min_coeffs_above_avg = MIN_NUM_COEFFS_ABOVE_AVG_8x8;
6984
0
                        min_coeff_avg_energy = MIN_COEFF_AVG_ENERGY_8x8;
6985
0
                    }
6986
0
                    else
6987
0
                    {
6988
0
                        min_noisy_block_cnt = MIN_NOISY_BLOCKS_CNT_16x16;  //7;//
6989
0
                        min_coeffs_above_avg = MIN_NUM_COEFFS_ABOVE_AVG_16x16;
6990
0
                        min_coeff_avg_energy = MIN_COEFF_AVG_ENERGY_16x16;
6991
0
                    }
6992
6993
                    /* initialize the variables */
6994
0
                    noise_detected = 0;
6995
0
                    noisy_block_cnt = 0;
6996
0
                    hh_energy_percent = 0;
6997
0
                    variance_8x8 = 0;
6998
0
                    block_ht = ctb_height;
6999
0
                    block_wd = ctb_width;
7000
7001
0
                    mean = 0;
7002
7003
0
                    ctb_size = block_ht * block_wd;  //ctb_width * ctb_height;
7004
0
                    num_comp_had_blocks = ctb_size / (had_block_size * had_block_size);
7005
7006
0
                    num_horz_blocks = block_wd / had_block_size;  //ctb_width / had_block_size;
7007
0
                    num_vert_blocks = block_ht / had_block_size;  //ctb_height / had_block_size;
7008
7009
0
                    ht_offset = -had_block_size;
7010
0
                    wd_offset = -had_block_size;
7011
7012
0
                    num_8x8_in_ctb_row = block_wd / 8;  // number of 8x8 in this ctb
7013
0
                    for(i = 0; i < num_comp_had_blocks; i++)
7014
0
                    {
7015
0
                        if(i % num_horz_blocks == 0)
7016
0
                        {
7017
0
                            wd_offset = -had_block_size;
7018
0
                            ht_offset += had_block_size;
7019
0
                        }
7020
0
                        wd_offset += had_block_size;
7021
7022
                        /* CU level offsets */
7023
0
                        i4_cu_x_off = i4_x_off + (i % 4) * 16;  //+ (i % 4) * 16
7024
0
                        i4_cu_y_off = i4_y_off + (i / 4) * 16;
7025
7026
                        /* if 50 % or more of the CU is noisy then the return value is 1 */
7027
0
                        is_noisy = ihevce_determine_cu_noise_based_on_8x8Blk_data(
7028
0
                            ps_ctb_noise_params->au1_is_8x8Blk_noisy,
7029
0
                            (i % 4) * 16,
7030
0
                            (i / 4) * 16,
7031
0
                            16);
7032
7033
                        /* only if the CU is noisy then check the temporal noise detect call is made on the CU */
7034
0
                        if(is_noisy)
7035
0
                        {
7036
0
                            index_8x8_block = (i / num_horz_blocks) * 2 * num_8x8_in_ctb_row +
7037
0
                                              (i % num_horz_blocks) * 2;
7038
0
                            noisy_block_cnt += ihevce_16x16block_temporal_noise_detect(
7039
0
                                16,
7040
0
                                ((i4_pic_wd - s_common_frm_prms.i4_ctb_x_off) >= 64)
7041
0
                                    ? 64
7042
0
                                    : i4_pic_wd - s_common_frm_prms.i4_ctb_x_off,
7043
0
                                ((i4_pic_ht - s_common_frm_prms.i4_ctb_y_off) >= 64)
7044
0
                                    ? 64
7045
0
                                    : i4_pic_ht - s_common_frm_prms.i4_ctb_y_off,
7046
0
                                ps_ctb_noise_params,
7047
0
                                &s_srch_cand_init_data,
7048
0
                                &s_search_prms_blk,
7049
0
                                ps_ctxt,
7050
0
                                num_pred_dir,
7051
0
                                i4_num_act_ref_l0,
7052
0
                                i4_num_act_ref_l1,
7053
0
                                i4_cu_x_off,
7054
0
                                i4_cu_y_off,
7055
0
                                &ps_ctxt->s_wt_pred,
7056
0
                                unit_size,
7057
0
                                index_8x8_block,
7058
0
                                num_horz_blocks,
7059
0
                                /*num_8x8_in_ctb_row*/ 8,  // this should be a variable extra
7060
0
                                i);
7061
0
                        } /* if 16x16 is noisy */
7062
0
                    } /* loop over for all 16x16*/
7063
7064
0
                    if(noisy_block_cnt >= min_noisy_block_cnt)
7065
0
                    {
7066
0
                        noise_detected = 1;
7067
0
                    }
7068
7069
                    /* write back the noise presence detected for the current CTB to the structure */
7070
0
                    ps_ctb_noise_params->i4_noise_present = noise_detected;
7071
0
                }
7072
0
#endif
7073
7074
#if EVERYWHERE_NOISY && USE_NOISE_TERM_IN_L0_ME
7075
                if(ps_thrd_ctxt->s_init_prms.u1_is_stasino_enabled &&
7076
                   ps_ctb_noise_params->i4_noise_present)
7077
                {
7078
                    memset(
7079
                        ps_ctb_noise_params->au1_is_8x8Blk_noisy,
7080
                        1,
7081
                        sizeof(ps_ctb_noise_params->au1_is_8x8Blk_noisy));
7082
                }
7083
#endif
7084
7085
0
                for(i = 0; i < 16; i++)
7086
0
                {
7087
0
                    au1_is_16x16Blk_noisy[i] = ihevce_determine_cu_noise_based_on_8x8Blk_data(
7088
0
                        ps_ctb_noise_params->au1_is_8x8Blk_noisy, (i % 4) * 16, (i / 4) * 16, 16);
7089
0
                }
7090
7091
0
                for(i = 0; i < 4; i++)
7092
0
                {
7093
0
                    au1_is_32x32Blk_noisy[i] = ihevce_determine_cu_noise_based_on_8x8Blk_data(
7094
0
                        ps_ctb_noise_params->au1_is_8x8Blk_noisy, (i % 2) * 32, (i / 2) * 32, 32);
7095
0
                }
7096
7097
0
                for(i = 0; i < 1; i++)
7098
0
                {
7099
0
                    au1_is_64x64Blk_noisy[i] = ihevce_determine_cu_noise_based_on_8x8Blk_data(
7100
0
                        ps_ctb_noise_params->au1_is_8x8Blk_noisy, 0, 0, 64);
7101
0
                }
7102
7103
0
                if(ps_ctxt->s_frm_prms.bidir_enabled &&
7104
0
                   (ps_ctxt->s_frm_prms.i4_temporal_layer_id <=
7105
0
                    MAX_LAYER_ID_OF_B_PICS_WITHOUT_NOISE_DETECTION))
7106
0
                {
7107
0
                    ps_ctb_noise_params->i4_noise_present = 0;
7108
0
                    memset(
7109
0
                        ps_ctb_noise_params->au1_is_8x8Blk_noisy,
7110
0
                        0,
7111
0
                        sizeof(ps_ctb_noise_params->au1_is_8x8Blk_noisy));
7112
0
                }
7113
7114
0
#if ME_LAMBDA_DISCOUNT_WHEN_NOISY
7115
0
                for(i = 0; i < 4; i++)
7116
0
                {
7117
0
                    S32 j;
7118
0
                    S32 lambda;
7119
7120
0
                    if(au1_is_32x32Blk_noisy[i])
7121
0
                    {
7122
0
                        lambda = lambda_recon;
7123
0
                        lambda =
7124
0
                            ((float)lambda * (100.0f - ME_LAMBDA_DISCOUNT_WHEN_NOISY) / 100.0f);
7125
7126
0
                        ps_search_results = &ps_ctxt->as_search_results_32x32[i];
7127
7128
0
                        for(j = 0; j < 2; j++)
7129
0
                        {
7130
0
                            ps_search_results->as_pred_ctxt[j].lambda = lambda;
7131
0
                        }
7132
0
                    }
7133
0
                }
7134
7135
0
                {
7136
0
                    S32 j;
7137
0
                    S32 lambda;
7138
7139
0
                    if(au1_is_64x64Blk_noisy[0])
7140
0
                    {
7141
0
                        lambda = lambda_recon;
7142
0
                        lambda =
7143
0
                            ((float)lambda * (100.0f - ME_LAMBDA_DISCOUNT_WHEN_NOISY) / 100.0f);
7144
7145
0
                        ps_search_results = &ps_ctxt->s_search_results_64x64;
7146
7147
0
                        for(j = 0; j < 2; j++)
7148
0
                        {
7149
0
                            ps_search_results->as_pred_ctxt[j].lambda = lambda;
7150
0
                        }
7151
0
                    }
7152
0
                }
7153
0
#endif
7154
0
                if(au1_is_64x64Blk_noisy[0])
7155
0
                {
7156
0
                    U08 *pu1_inp = ps_curr_layer->pu1_inp + (s_common_frm_prms.i4_ctb_x_off +
7157
0
                                                             (s_common_frm_prms.i4_ctb_y_off *
7158
0
                                                              ps_curr_layer->i4_inp_stride));
7159
7160
0
                    hme_compute_sigmaX_and_sigmaXSquared(
7161
0
                        pu1_inp,
7162
0
                        ps_curr_layer->i4_inp_stride,
7163
0
                        ps_ctxt->au4_4x4_src_sigmaX,
7164
0
                        ps_ctxt->au4_4x4_src_sigmaXSquared,
7165
0
                        4,
7166
0
                        4,
7167
0
                        64,
7168
0
                        64,
7169
0
                        1,
7170
0
                        16);
7171
0
                }
7172
0
                else
7173
0
                {
7174
0
                    for(i = 0; i < 4; i++)
7175
0
                    {
7176
0
                        if(au1_is_32x32Blk_noisy[i])
7177
0
                        {
7178
0
                            U08 *pu1_inp =
7179
0
                                ps_curr_layer->pu1_inp +
7180
0
                                (s_common_frm_prms.i4_ctb_x_off +
7181
0
                                 (s_common_frm_prms.i4_ctb_y_off * ps_curr_layer->i4_inp_stride));
7182
7183
0
                            U08 u1_cu_size = 32;
7184
0
                            WORD32 i4_inp_buf_offset =
7185
0
                                (((i / 2) * (u1_cu_size * ps_curr_layer->i4_inp_stride)) +
7186
0
                                 ((i % 2) * u1_cu_size));
7187
7188
0
                            U16 u2_sigma_arr_start_index_of_3rd_32x32_blk_in_ctb = 128;
7189
0
                            U16 u2_sigma_arr_start_index_of_2nd_32x32_blk_in_ctb = 8;
7190
0
                            S32 i4_sigma_arr_offset =
7191
0
                                (((i / 2) * u2_sigma_arr_start_index_of_3rd_32x32_blk_in_ctb) +
7192
0
                                 ((i % 2) * u2_sigma_arr_start_index_of_2nd_32x32_blk_in_ctb));
7193
7194
0
                            hme_compute_sigmaX_and_sigmaXSquared(
7195
0
                                pu1_inp + i4_inp_buf_offset,
7196
0
                                ps_curr_layer->i4_inp_stride,
7197
0
                                ps_ctxt->au4_4x4_src_sigmaX + i4_sigma_arr_offset,
7198
0
                                ps_ctxt->au4_4x4_src_sigmaXSquared + i4_sigma_arr_offset,
7199
0
                                4,
7200
0
                                4,
7201
0
                                32,
7202
0
                                32,
7203
0
                                1,
7204
0
                                16);
7205
0
                        }
7206
0
                        else
7207
0
                        {
7208
0
                            S32 j;
7209
7210
0
                            U08 u1_16x16_blk_start_index_in_3rd_32x32_blk_of_ctb = 8;
7211
0
                            U08 u1_16x16_blk_start_index_in_2nd_32x32_blk_of_ctb = 2;
7212
0
                            S32 i4_16x16_blk_start_index_in_i_th_32x32_blk =
7213
0
                                (((i / 2) * u1_16x16_blk_start_index_in_3rd_32x32_blk_of_ctb) +
7214
0
                                 ((i % 2) * u1_16x16_blk_start_index_in_2nd_32x32_blk_of_ctb));
7215
7216
0
                            for(j = 0; j < 4; j++)
7217
0
                            {
7218
0
                                U08 u1_3rd_16x16_blk_index_in_32x32_blk = 4;
7219
0
                                U08 u1_2nd_16x16_blk_index_in_32x32_blk = 1;
7220
0
                                S32 i4_16x16_blk_index_in_ctb =
7221
0
                                    i4_16x16_blk_start_index_in_i_th_32x32_blk +
7222
0
                                    ((j % 2) * u1_2nd_16x16_blk_index_in_32x32_blk) +
7223
0
                                    ((j / 2) * u1_3rd_16x16_blk_index_in_32x32_blk);
7224
7225
                                //S32 k = (((i / 2) * 8) + ((i % 2) * 2)) + ((j % 2) * 1) + ((j / 2) * 4);
7226
7227
0
                                if(au1_is_16x16Blk_noisy[i4_16x16_blk_index_in_ctb])
7228
0
                                {
7229
0
                                    U08 *pu1_inp =
7230
0
                                        ps_curr_layer->pu1_inp + (s_common_frm_prms.i4_ctb_x_off +
7231
0
                                                                  (s_common_frm_prms.i4_ctb_y_off *
7232
0
                                                                   ps_curr_layer->i4_inp_stride));
7233
7234
0
                                    U08 u1_cu_size = 16;
7235
0
                                    WORD32 i4_inp_buf_offset =
7236
0
                                        (((i4_16x16_blk_index_in_ctb % 4) * u1_cu_size) +
7237
0
                                         ((i4_16x16_blk_index_in_ctb / 4) *
7238
0
                                          (u1_cu_size * ps_curr_layer->i4_inp_stride)));
7239
7240
0
                                    U16 u2_sigma_arr_start_index_of_3rd_16x16_blk_in_32x32_blk = 64;
7241
0
                                    U16 u2_sigma_arr_start_index_of_2nd_16x16_blk_in_32x32_blk = 4;
7242
0
                                    S32 i4_sigma_arr_offset =
7243
0
                                        (((i4_16x16_blk_index_in_ctb % 4) *
7244
0
                                          u2_sigma_arr_start_index_of_2nd_16x16_blk_in_32x32_blk) +
7245
0
                                         ((i4_16x16_blk_index_in_ctb / 4) *
7246
0
                                          u2_sigma_arr_start_index_of_3rd_16x16_blk_in_32x32_blk));
7247
7248
0
                                    hme_compute_sigmaX_and_sigmaXSquared(
7249
0
                                        pu1_inp + i4_inp_buf_offset,
7250
0
                                        ps_curr_layer->i4_inp_stride,
7251
0
                                        (ps_ctxt->au4_4x4_src_sigmaX + i4_sigma_arr_offset),
7252
0
                                        (ps_ctxt->au4_4x4_src_sigmaXSquared + i4_sigma_arr_offset),
7253
0
                                        4,
7254
0
                                        4,
7255
0
                                        16,
7256
0
                                        16,
7257
0
                                        1,
7258
0
                                        16);
7259
0
                                }
7260
0
                            }
7261
0
                        }
7262
0
                    }
7263
0
                }
7264
0
            }
7265
146k
            else
7266
146k
            {
7267
146k
                memset(au1_is_16x16Blk_noisy, 0, sizeof(au1_is_16x16Blk_noisy));
7268
7269
146k
                memset(au1_is_32x32Blk_noisy, 0, sizeof(au1_is_32x32Blk_noisy));
7270
7271
146k
                memset(au1_is_64x64Blk_noisy, 0, sizeof(au1_is_64x64Blk_noisy));
7272
146k
            }
7273
7274
2.41M
            for(blk_id_in_ctb = 0; blk_id_in_ctb < num_blks_in_this_ctb; blk_id_in_ctb++)
7275
2.27M
            {
7276
2.27M
                S32 ref_ctr;
7277
2.27M
                U08 au1_pred_dir_searched[2];
7278
2.27M
                U08 u1_is_cu_noisy;
7279
2.27M
                ULWORD64 au8_final_src_sigmaX[17], au8_final_src_sigmaXSquared[17];
7280
7281
2.27M
                {
7282
2.27M
                    blk_x = (i4_ctb_x << 2) +
7283
2.27M
                            (ps_ctb_bound_attrs->as_blk_attrs[blk_id_in_ctb].u1_blk_x);
7284
2.27M
                    blk_y = (i4_ctb_y << 2) +
7285
2.27M
                            (ps_ctb_bound_attrs->as_blk_attrs[blk_id_in_ctb].u1_blk_y);
7286
7287
2.27M
                    blk_id_in_full_ctb =
7288
2.27M
                        ps_ctb_bound_attrs->as_blk_attrs[blk_id_in_ctb].u1_blk_id_in_full_ctb;
7289
2.27M
                    blk_8x8_mask = ps_ctb_bound_attrs->as_blk_attrs[blk_id_in_ctb].u1_blk_8x8_mask;
7290
2.27M
                    ai4_blk_8x8_mask[blk_id_in_full_ctb] = blk_8x8_mask;
7291
2.27M
                    s_search_prms_blk.i4_cu_x_off = (blk_x << blk_size_shift) - (i4_ctb_x << 6);
7292
2.27M
                    s_search_prms_blk.i4_cu_y_off = (blk_y << blk_size_shift) - (i4_ctb_y << 6);
7293
2.27M
                }
7294
7295
                /* get the current input blk point */
7296
2.27M
                pos_x = blk_x << blk_size_shift;
7297
2.27M
                pos_y = blk_y << blk_size_shift;
7298
2.27M
                pu1_inp = ps_curr_layer->pu1_inp + pos_x + (pos_y * i4_inp_stride);
7299
7300
                /*********************************************************************/
7301
                /* For every blk in the picture, the search range needs to be derived*/
7302
                /* Any blk can have any mv, but practical search constraints are     */
7303
                /* imposed by the picture boundary and amt of padding.               */
7304
                /*********************************************************************/
7305
                /* MV limit is different based on ref. PIC */
7306
6.80M
                for(ref_ctr = 0; ref_ctr < num_act_ref_pics; ref_ctr++)
7307
4.53M
                {
7308
4.53M
                    if(!s_search_prms_blk.i4_use_rec)
7309
0
                    {
7310
0
                        hme_derive_search_range(
7311
0
                            &as_range_prms_inp[ref_ctr],
7312
0
                            &s_pic_limit_inp,
7313
0
                            &as_mv_limit[ref_ctr],
7314
0
                            pos_x,
7315
0
                            pos_y,
7316
0
                            blk_wd,
7317
0
                            blk_ht);
7318
0
                    }
7319
4.53M
                    else
7320
4.53M
                    {
7321
4.53M
                        hme_derive_search_range(
7322
4.53M
                            &as_range_prms_rec[ref_ctr],
7323
4.53M
                            &s_pic_limit_rec,
7324
4.53M
                            &as_mv_limit[ref_ctr],
7325
4.53M
                            pos_x,
7326
4.53M
                            pos_y,
7327
4.53M
                            blk_wd,
7328
4.53M
                            blk_ht);
7329
4.53M
                    }
7330
4.53M
                }
7331
2.27M
                s_search_prms_blk.i4_x_off = blk_x << blk_size_shift;
7332
2.27M
                s_search_prms_blk.i4_y_off = blk_y << blk_size_shift;
7333
                /* Select search results from a suitable search result in the context */
7334
2.27M
                {
7335
2.27M
                    ps_search_results = &ps_ctxt->as_search_results_16x16[blk_id_in_full_ctb];
7336
7337
2.27M
                    if(ps_refine_prms->u1_use_lambda_derived_from_min_8x8_act_in_ctb)
7338
625k
                    {
7339
625k
                        S32 i;
7340
7341
1.87M
                        for(i = 0; i < 2; i++)
7342
1.25M
                        {
7343
1.25M
                            ps_search_results->as_pred_ctxt[i].lambda = lambda_recon;
7344
1.25M
                        }
7345
625k
                    }
7346
2.27M
                }
7347
7348
2.27M
                u1_is_cu_noisy = au1_is_16x16Blk_noisy
7349
2.27M
                    [(s_search_prms_blk.i4_cu_x_off >> 4) + (s_search_prms_blk.i4_cu_y_off >> 2)];
7350
7351
2.27M
                s_subpel_prms.u1_is_cu_noisy = u1_is_cu_noisy;
7352
7353
2.27M
#if ME_LAMBDA_DISCOUNT_WHEN_NOISY
7354
2.27M
                if(u1_is_cu_noisy)
7355
0
                {
7356
0
                    S32 j;
7357
0
                    S32 lambda;
7358
7359
0
                    lambda = lambda_recon;
7360
0
                    lambda = ((float)lambda * (100.0f - ME_LAMBDA_DISCOUNT_WHEN_NOISY) / 100.0f);
7361
7362
0
                    for(j = 0; j < 2; j++)
7363
0
                    {
7364
0
                        ps_search_results->as_pred_ctxt[j].lambda = lambda;
7365
0
                    }
7366
0
                }
7367
2.27M
                else
7368
2.27M
                {
7369
2.27M
                    S32 j;
7370
2.27M
                    S32 lambda;
7371
7372
2.27M
                    lambda = lambda_recon;
7373
7374
6.81M
                    for(j = 0; j < 2; j++)
7375
4.54M
                    {
7376
4.54M
                        ps_search_results->as_pred_ctxt[j].lambda = lambda;
7377
4.54M
                    }
7378
2.27M
                }
7379
2.27M
#endif
7380
7381
2.27M
                s_search_prms_blk.ps_search_results = ps_search_results;
7382
7383
2.27M
                s_search_prms_blk.i4_part_mask = hme_part_mask_populator(
7384
2.27M
                    pu1_inp,
7385
2.27M
                    i4_inp_stride,
7386
2.27M
                    ps_refine_prms->limit_active_partitions,
7387
2.27M
                    ps_ctxt->ps_hme_frm_prms->bidir_enabled,
7388
2.27M
                    ps_ctxt->u1_is_curFrame_a_refFrame,
7389
2.27M
                    blk_8x8_mask,
7390
2.27M
                    e_me_quality_presets);
7391
7392
2.27M
                if(ME_PRISTINE_QUALITY == e_me_quality_presets)
7393
501k
                {
7394
501k
                    ps_ctb_cluster_info->ai4_part_mask[blk_id_in_full_ctb] =
7395
501k
                        s_search_prms_blk.i4_part_mask;
7396
501k
                }
7397
7398
                /* RESET ALL SEARCH RESULTS FOR THE NEW BLK */
7399
2.27M
                {
7400
                    /* Setting u1_num_active_refs to 2 */
7401
                    /* for the sole purpose of the */
7402
                    /* function called below */
7403
2.27M
                    ps_search_results->u1_num_active_ref = (ps_refine_prms->bidir_enabled) ? 2 : 1;
7404
7405
2.27M
                    hme_reset_search_results(
7406
2.27M
                        ps_search_results, s_search_prms_blk.i4_part_mask, MV_RES_FPEL);
7407
7408
2.27M
                    ps_search_results->u1_num_active_ref = i4_num_pred_dir;
7409
2.27M
                }
7410
7411
2.27M
                if(0 == blk_id_in_ctb)
7412
146k
                {
7413
146k
                    UWORD8 u1_ctr;
7414
439k
                    for(u1_ctr = 0; u1_ctr < (ps_ctxt->s_frm_prms.u1_num_active_ref_l0 +
7415
439k
                                              ps_ctxt->s_frm_prms.u1_num_active_ref_l1);
7416
292k
                        u1_ctr++)
7417
292k
                    {
7418
292k
                        WORD32 i4_max_dep_ctb_y;
7419
292k
                        WORD32 i4_max_dep_ctb_x;
7420
7421
                        /* Set max mv in ctb units */
7422
292k
                        i4_max_mv_x_in_ctb =
7423
292k
                            (ps_curr_layer->i2_max_mv_x + ((1 << ps_ctxt->log_ctb_size) - 1)) >>
7424
292k
                            ps_ctxt->log_ctb_size;
7425
7426
292k
                        i4_max_mv_y_in_ctb =
7427
292k
                            (as_mv_limit[u1_ctr].i2_max_y + ((1 << ps_ctxt->log_ctb_size) - 1)) >>
7428
292k
                            ps_ctxt->log_ctb_size;
7429
                        /********************************************************************/
7430
                        /* Set max ctb_x and ctb_y dependency on reference picture          */
7431
                        /* Note +1 is due to delayed deblock, SAO, subpel plan dependency   */
7432
                        /********************************************************************/
7433
292k
                        i4_max_dep_ctb_x = CLIP3(
7434
292k
                            (i4_ctb_x + i4_max_mv_x_in_ctb + 1),
7435
292k
                            0,
7436
292k
                            ps_frm_ctb_prms->i4_num_ctbs_horz - 1);
7437
292k
                        i4_max_dep_ctb_y = CLIP3(
7438
292k
                            (i4_ctb_y + i4_max_mv_y_in_ctb + 1),
7439
292k
                            0,
7440
292k
                            ps_frm_ctb_prms->i4_num_ctbs_vert - 1);
7441
7442
292k
                        ihevce_dmgr_map_chk_sync(
7443
292k
                            ps_curr_layer->ppv_dep_mngr_recon[u1_ctr],
7444
292k
                            ps_ctxt->thrd_id,
7445
292k
                            i4_ctb_x,
7446
292k
                            i4_ctb_y,
7447
292k
                            i4_max_mv_x_in_ctb,
7448
292k
                            i4_max_mv_y_in_ctb);
7449
292k
                    }
7450
146k
                }
7451
7452
                /* Loop across different Ref IDx */
7453
5.04M
                for(u1_pred_dir_ctr = 0; u1_pred_dir_ctr < i4_num_pred_dir; u1_pred_dir_ctr++)
7454
2.77M
                {
7455
2.77M
                    S32 resultid;
7456
2.77M
                    S08 u1_default_ref_id;
7457
2.77M
                    S32 i4_num_srch_cands = 0;
7458
2.77M
                    S32 i4_num_refinement_iterations;
7459
2.77M
                    S32 i4_refine_iter_ctr;
7460
7461
2.77M
                    if((i4_num_pred_dir == 2) || (!ps_ctxt->s_frm_prms.bidir_enabled) ||
7462
2.77M
                       (ps_ctxt->s_frm_prms.u1_num_active_ref_l1 == 0))
7463
2.76M
                    {
7464
2.76M
                        u1_pred_dir = u1_pred_dir_ctr;
7465
2.76M
                    }
7466
11.8k
                    else if(ps_ctxt->s_frm_prms.u1_num_active_ref_l0 == 0)
7467
11.8k
                    {
7468
11.8k
                        u1_pred_dir = 1;
7469
11.8k
                    }
7470
7471
2.77M
                    u1_default_ref_id = (u1_pred_dir == 0) ? ps_ctxt->ai1_past_list[0]
7472
2.77M
                                                           : ps_ctxt->ai1_future_list[0];
7473
2.77M
                    au1_pred_dir_searched[u1_pred_dir_ctr] = u1_pred_dir;
7474
7475
2.77M
                    i4_num_srch_cands = 0;
7476
2.77M
                    resultid = 0;
7477
7478
                    /* START OF NEW CTB MEANS FILL UP NEOGHBOURS IN 18x18 GRID */
7479
2.77M
                    if(0 == blk_id_in_ctb)
7480
179k
                    {
7481
                        /*****************************************************************/
7482
                        /* Initialize the mv grid with results of neighbours for the next*/
7483
                        /* ctb.                                                          */
7484
                        /*****************************************************************/
7485
179k
                        hme_fill_ctb_neighbour_mvs(
7486
179k
                            ps_curr_layer,
7487
179k
                            blk_x,
7488
179k
                            blk_y,
7489
179k
                            aps_mv_grid[u1_pred_dir],
7490
179k
                            u1_pred_dir_ctr,
7491
179k
                            u1_default_ref_id,
7492
179k
                            ps_ctxt->s_frm_prms.u1_num_active_ref_l0);
7493
179k
                    }
7494
7495
2.77M
                    s_search_prms_blk.i1_ref_idx = u1_pred_dir;
7496
7497
2.77M
                    {
7498
2.77M
                        if((blk_id_in_full_ctb % 4) == 0)
7499
701k
                        {
7500
701k
                            ps_ctxt->as_search_results_32x32[blk_id_in_full_ctb >> 2]
7501
701k
                                .as_pred_ctxt[u1_pred_dir]
7502
701k
                                .proj_used = (blk_id_in_full_ctb == 8) ? 0 : 1;
7503
701k
                        }
7504
7505
2.77M
                        if(blk_id_in_full_ctb == 0)
7506
179k
                        {
7507
179k
                            ps_ctxt->s_search_results_64x64.as_pred_ctxt[u1_pred_dir].proj_used = 1;
7508
179k
                        }
7509
7510
2.77M
                        ps_search_results->as_pred_ctxt[u1_pred_dir].proj_used =
7511
2.77M
                            !gau1_encode_to_raster_y[blk_id_in_full_ctb];
7512
2.77M
                    }
7513
7514
2.77M
                    {
7515
2.77M
                        S32 x = gau1_encode_to_raster_x[blk_id_in_full_ctb];
7516
2.77M
                        S32 y = gau1_encode_to_raster_y[blk_id_in_full_ctb];
7517
2.77M
                        U08 u1_is_blk_at_ctb_boundary = !y;
7518
7519
2.77M
                        s_srch_cand_init_data.u1_is_left_available =
7520
2.77M
                            !(left_ctb_in_diff_tile && !s_search_prms_blk.i4_cu_x_off);
7521
7522
2.77M
                        if(u1_is_blk_at_ctb_boundary)
7523
701k
                        {
7524
701k
                            s_srch_cand_init_data.u1_is_topRight_available = 0;
7525
701k
                            s_srch_cand_init_data.u1_is_topLeft_available = 0;
7526
701k
                            s_srch_cand_init_data.u1_is_top_available = 0;
7527
701k
                        }
7528
2.07M
                        else
7529
2.07M
                        {
7530
2.07M
                            s_srch_cand_init_data.u1_is_topRight_available =
7531
2.07M
                                gau1_cu_tr_valid[y][x] && ((pos_x + blk_wd) < i4_pic_wd);
7532
2.07M
                            s_srch_cand_init_data.u1_is_top_available = 1;
7533
2.07M
                            s_srch_cand_init_data.u1_is_topLeft_available =
7534
2.07M
                                s_srch_cand_init_data.u1_is_left_available;
7535
2.07M
                        }
7536
2.77M
                    }
7537
7538
2.77M
                    s_srch_cand_init_data.i1_default_ref_id = u1_default_ref_id;
7539
2.77M
                    s_srch_cand_init_data.i1_alt_default_ref_id = ps_ctxt->ai1_past_list[1];
7540
2.77M
                    s_srch_cand_init_data.i4_pos_x = pos_x;
7541
2.77M
                    s_srch_cand_init_data.i4_pos_y = pos_y;
7542
2.77M
                    s_srch_cand_init_data.u1_pred_dir = u1_pred_dir;
7543
2.77M
                    s_srch_cand_init_data.u1_pred_dir_ctr = u1_pred_dir_ctr;
7544
2.77M
                    s_srch_cand_init_data.u1_search_candidate_list_index =
7545
2.77M
                        au1_search_candidate_list_index[u1_pred_dir];
7546
7547
2.77M
                    i4_num_srch_cands = hme_populate_search_candidates(&s_srch_cand_init_data);
7548
7549
                    /* Note this block also clips the MV range for all candidates */
7550
2.77M
                    {
7551
2.77M
                        S08 i1_check_for_mult_refs;
7552
7553
2.77M
                        i1_check_for_mult_refs = u1_pred_dir ? (ps_ctxt->num_ref_future > 1)
7554
2.77M
                                                             : (ps_ctxt->num_ref_past > 1);
7555
7556
2.77M
                        ps_me_optimised_function_list->pf_mv_clipper(
7557
2.77M
                            &s_search_prms_blk,
7558
2.77M
                            i4_num_srch_cands,
7559
2.77M
                            i1_check_for_mult_refs,
7560
2.77M
                            ps_refine_prms->i4_num_steps_fpel_refine,
7561
2.77M
                            ps_refine_prms->i4_num_steps_hpel_refine,
7562
2.77M
                            ps_refine_prms->i4_num_steps_qpel_refine);
7563
2.77M
                    }
7564
7565
2.77M
#if ENABLE_EXPLICIT_SEARCH_IN_P_IN_L0
7566
2.77M
                    i4_num_refinement_iterations =
7567
2.77M
                        ((!ps_ctxt->s_frm_prms.bidir_enabled) && (i4_num_act_ref_l0 > 1))
7568
2.77M
                            ? ((e_me_quality_presets == ME_HIGH_QUALITY) ? 2 : i4_num_act_ref_l0)
7569
2.77M
                            : 1;
7570
#else
7571
                    i4_num_refinement_iterations =
7572
                        ((!ps_ctxt->s_frm_prms.bidir_enabled) && (i4_num_act_ref_l0 > 1)) ? 2 : 1;
7573
#endif
7574
7575
#if ENABLE_EXPLICIT_SEARCH_IN_PQ
7576
                    if(e_me_quality_presets == ME_PRISTINE_QUALITY)
7577
                    {
7578
                        i4_num_refinement_iterations = (u1_pred_dir == 0) ? i4_num_act_ref_l0
7579
                                                                          : i4_num_act_ref_l1;
7580
                    }
7581
#endif
7582
7583
6.86M
                    for(i4_refine_iter_ctr = 0; i4_refine_iter_ctr < i4_num_refinement_iterations;
7584
4.08M
                        i4_refine_iter_ctr++)
7585
4.08M
                    {
7586
4.08M
                        S32 center_x;
7587
4.08M
                        S32 center_y;
7588
4.08M
                        S32 center_ref_idx;
7589
7590
4.08M
                        S08 *pi1_pred_dir_to_ref_idx =
7591
4.08M
                            (u1_pred_dir == 0) ? ps_ctxt->ai1_past_list : ps_ctxt->ai1_future_list;
7592
7593
4.08M
                        {
7594
4.08M
                            WORD32 i4_i;
7595
7596
73.6M
                            for(i4_i = 0; i4_i < TOT_NUM_PARTS; i4_i++)
7597
69.5M
                            {
7598
69.5M
                                ps_fullpel_refine_ctxt->i2_tot_cost[0][i4_i] = MAX_SIGNED_16BIT_VAL;
7599
69.5M
                                ps_fullpel_refine_ctxt->i2_mv_cost[0][i4_i] = MAX_SIGNED_16BIT_VAL;
7600
69.5M
                                ps_fullpel_refine_ctxt->i2_stim_injected_cost[0][i4_i] =
7601
69.5M
                                    MAX_SIGNED_16BIT_VAL;
7602
69.5M
                                ps_fullpel_refine_ctxt->i2_mv_x[0][i4_i] = 0;
7603
69.5M
                                ps_fullpel_refine_ctxt->i2_mv_y[0][i4_i] = 0;
7604
69.5M
                                ps_fullpel_refine_ctxt->i2_ref_idx[0][i4_i] = u1_default_ref_id;
7605
7606
69.5M
                                if(ps_refine_prms->i4_num_results_per_part == 2)
7607
0
                                {
7608
0
                                    ps_fullpel_refine_ctxt->i2_tot_cost[1][i4_i] =
7609
0
                                        MAX_SIGNED_16BIT_VAL;
7610
0
                                    ps_fullpel_refine_ctxt->i2_mv_cost[1][i4_i] =
7611
0
                                        MAX_SIGNED_16BIT_VAL;
7612
0
                                    ps_fullpel_refine_ctxt->i2_stim_injected_cost[1][i4_i] =
7613
0
                                        MAX_SIGNED_16BIT_VAL;
7614
0
                                    ps_fullpel_refine_ctxt->i2_mv_x[1][i4_i] = 0;
7615
0
                                    ps_fullpel_refine_ctxt->i2_mv_y[1][i4_i] = 0;
7616
0
                                    ps_fullpel_refine_ctxt->i2_ref_idx[1][i4_i] = u1_default_ref_id;
7617
0
                                }
7618
69.5M
                            }
7619
7620
4.08M
                            s_search_prms_blk.ps_fullpel_refine_ctxt = ps_fullpel_refine_ctxt;
7621
4.08M
                            s_subpel_prms.ps_subpel_refine_ctxt = ps_fullpel_refine_ctxt;
7622
4.08M
                        }
7623
7624
4.08M
                        {
7625
4.08M
                            search_node_t *ps_coloc_node;
7626
7627
4.08M
                            S32 i = 0;
7628
7629
4.08M
                            if(i4_num_refinement_iterations > 1)
7630
2.25M
                            {
7631
7.60M
                                for(i = 0; i < ai4_num_coloc_cands[u1_pred_dir]; i++)
7632
7.00M
                                {
7633
7.00M
                                    ps_coloc_node =
7634
7.00M
                                        s_search_prms_blk.ps_search_candts[ai4_id_coloc[i]]
7635
7.00M
                                            .ps_search_node;
7636
7637
7.00M
                                    if(pi1_pred_dir_to_ref_idx[i4_refine_iter_ctr] ==
7638
7.00M
                                       ps_coloc_node->i1_ref_idx)
7639
1.65M
                                    {
7640
1.65M
                                        break;
7641
1.65M
                                    }
7642
7.00M
                                }
7643
7644
2.25M
                                if(i == ai4_num_coloc_cands[u1_pred_dir])
7645
605k
                                {
7646
605k
                                    i = 0;
7647
605k
                                }
7648
2.25M
                            }
7649
1.83M
                            else
7650
1.83M
                            {
7651
1.83M
                                ps_coloc_node = s_search_prms_blk.ps_search_candts[ai4_id_coloc[0]]
7652
1.83M
                                                    .ps_search_node;
7653
1.83M
                            }
7654
7655
4.08M
                            hme_set_mvp_node(
7656
4.08M
                                ps_search_results,
7657
4.08M
                                ps_coloc_node,
7658
4.08M
                                u1_pred_dir,
7659
4.08M
                                (i4_num_refinement_iterations > 1)
7660
4.08M
                                    ? pi1_pred_dir_to_ref_idx[i4_refine_iter_ctr]
7661
4.08M
                                    : u1_default_ref_id);
7662
7663
4.08M
                            center_x = ps_coloc_node->ps_mv->i2_mvx;
7664
4.08M
                            center_y = ps_coloc_node->ps_mv->i2_mvy;
7665
4.08M
                            center_ref_idx = ps_coloc_node->i1_ref_idx;
7666
4.08M
                        }
7667
7668
                        /* Full-Pel search */
7669
4.08M
                        {
7670
4.08M
                            S32 num_unique_nodes;
7671
7672
4.08M
                            memset(au4_unique_node_map, 0, sizeof(au4_unique_node_map));
7673
7674
4.08M
                            num_unique_nodes = hme_remove_duplicate_fpel_search_candidates(
7675
4.08M
                                as_unique_search_nodes,
7676
4.08M
                                s_search_prms_blk.ps_search_candts,
7677
4.08M
                                au4_unique_node_map,
7678
4.08M
                                pi1_pred_dir_to_ref_idx,
7679
4.08M
                                i4_num_srch_cands,
7680
4.08M
                                s_search_prms_blk.i4_num_init_candts,
7681
4.08M
                                i4_refine_iter_ctr,
7682
4.08M
                                i4_num_refinement_iterations,
7683
4.08M
                                i4_num_act_ref_l0,
7684
4.08M
                                center_ref_idx,
7685
4.08M
                                center_x,
7686
4.08M
                                center_y,
7687
4.08M
                                ps_ctxt->s_frm_prms.bidir_enabled,
7688
4.08M
                                e_me_quality_presets);
7689
7690
                            /*************************************************************************/
7691
                            /* This array stores the ids of the partitions whose                     */
7692
                            /* SADs are updated. Since the partitions whose SADs are updated may not */
7693
                            /* be in contiguous order, we supply another level of indirection.       */
7694
                            /*************************************************************************/
7695
4.08M
                            ps_fullpel_refine_ctxt->i4_num_valid_parts = hme_create_valid_part_ids(
7696
4.08M
                                s_search_prms_blk.i4_part_mask,
7697
4.08M
                                &ps_fullpel_refine_ctxt->ai4_part_id[0]);
7698
7699
4.08M
                            if(!i4_refine_iter_ctr && !u1_pred_dir_ctr && u1_is_cu_noisy)
7700
0
                            {
7701
0
                                S32 i;
7702
                                /*i4_sigma_array_offset : takes care of pointing to the appropriate 4x4 block's sigmaX and sigmaX-squared value in a CTB out of 256 values*/
7703
0
                                S32 i4_sigma_array_offset = (s_search_prms_blk.i4_cu_x_off / 4) +
7704
0
                                                            (s_search_prms_blk.i4_cu_y_off * 4);
7705
7706
0
                                for(i = 0; i < ps_fullpel_refine_ctxt->i4_num_valid_parts; i++)
7707
0
                                {
7708
0
                                    S32 i4_part_id = ps_fullpel_refine_ctxt->ai4_part_id[i];
7709
7710
0
                                    hme_compute_final_sigma_of_pu_from_base_blocks(
7711
0
                                        ps_ctxt->au4_4x4_src_sigmaX + i4_sigma_array_offset,
7712
0
                                        ps_ctxt->au4_4x4_src_sigmaXSquared + i4_sigma_array_offset,
7713
0
                                        au8_final_src_sigmaX,
7714
0
                                        au8_final_src_sigmaXSquared,
7715
0
                                        16,
7716
0
                                        4,
7717
0
                                        i4_part_id,
7718
0
                                        16);
7719
0
                                }
7720
7721
0
                                s_common_frm_prms.pu8_part_src_sigmaX = au8_final_src_sigmaX;
7722
0
                                s_common_frm_prms.pu8_part_src_sigmaXSquared =
7723
0
                                    au8_final_src_sigmaXSquared;
7724
7725
0
                                s_search_prms_blk.pu8_part_src_sigmaX = au8_final_src_sigmaX;
7726
0
                                s_search_prms_blk.pu8_part_src_sigmaXSquared =
7727
0
                                    au8_final_src_sigmaXSquared;
7728
0
                            }
7729
7730
4.08M
                            if(0 == num_unique_nodes)
7731
99.1k
                            {
7732
99.1k
                                continue;
7733
99.1k
                            }
7734
7735
3.98M
                            if(num_unique_nodes >= 2)
7736
1.25M
                            {
7737
1.25M
                                s_search_prms_blk.ps_search_nodes = &as_unique_search_nodes[0];
7738
1.25M
                                s_search_prms_blk.i4_num_search_nodes = num_unique_nodes;
7739
1.25M
                                if(ps_ctxt->i4_pic_type != IV_P_FRAME)
7740
361k
                                {
7741
361k
                                    if(ps_ctxt->i4_temporal_layer == 1)
7742
111k
                                    {
7743
111k
                                        hme_fullpel_cand_sifter(
7744
111k
                                            &s_search_prms_blk,
7745
111k
                                            ps_curr_layer,
7746
111k
                                            &ps_ctxt->s_wt_pred,
7747
111k
                                            ALPHA_FOR_NOISE_TERM_IN_ME,
7748
111k
                                            u1_is_cu_noisy,
7749
111k
                                            ps_me_optimised_function_list);
7750
111k
                                    }
7751
249k
                                    else
7752
249k
                                    {
7753
249k
                                        hme_fullpel_cand_sifter(
7754
249k
                                            &s_search_prms_blk,
7755
249k
                                            ps_curr_layer,
7756
249k
                                            &ps_ctxt->s_wt_pred,
7757
249k
                                            ALPHA_FOR_NOISE_TERM_IN_ME,
7758
249k
                                            u1_is_cu_noisy,
7759
249k
                                            ps_me_optimised_function_list);
7760
249k
                                    }
7761
361k
                                }
7762
898k
                                else
7763
898k
                                {
7764
898k
                                    hme_fullpel_cand_sifter(
7765
898k
                                        &s_search_prms_blk,
7766
898k
                                        ps_curr_layer,
7767
898k
                                        &ps_ctxt->s_wt_pred,
7768
898k
                                        ALPHA_FOR_NOISE_TERM_IN_ME_P,
7769
898k
                                        u1_is_cu_noisy,
7770
898k
                                        ps_me_optimised_function_list);
7771
898k
                                }
7772
1.25M
                            }
7773
7774
3.98M
                            s_search_prms_blk.ps_search_nodes = &as_unique_search_nodes[0];
7775
7776
3.98M
                            hme_fullpel_refine(
7777
3.98M
                                ps_refine_prms,
7778
3.98M
                                &s_search_prms_blk,
7779
3.98M
                                ps_curr_layer,
7780
3.98M
                                &ps_ctxt->s_wt_pred,
7781
3.98M
                                au4_unique_node_map,
7782
3.98M
                                num_unique_nodes,
7783
3.98M
                                blk_8x8_mask,
7784
3.98M
                                center_x,
7785
3.98M
                                center_y,
7786
3.98M
                                center_ref_idx,
7787
3.98M
                                e_me_quality_presets,
7788
3.98M
                                ps_me_optimised_function_list);
7789
3.98M
                        }
7790
7791
                        /* Sub-Pel search */
7792
0
                        {
7793
3.98M
                            hme_reset_wkg_mem(&ps_ctxt->s_buf_mgr);
7794
7795
3.98M
                            s_subpel_prms.pu1_wkg_mem = (U08 *)hme_get_wkg_mem(
7796
3.98M
                                &ps_ctxt->s_buf_mgr,
7797
3.98M
                                INTERP_INTERMED_BUF_SIZE + INTERP_OUT_BUF_SIZE);
7798
                            /* MV limit is different based on ref. PIC */
7799
13.4M
                            for(ref_ctr = 0; ref_ctr < num_act_ref_pics; ref_ctr++)
7800
9.43M
                            {
7801
9.43M
                                SCALE_RANGE_PRMS(
7802
9.43M
                                    as_range_prms_hpel[ref_ctr], as_range_prms_rec[ref_ctr], 1);
7803
9.43M
                                SCALE_RANGE_PRMS(
7804
9.43M
                                    as_range_prms_qpel[ref_ctr], as_range_prms_rec[ref_ctr], 2);
7805
9.43M
                            }
7806
3.98M
                            s_subpel_prms.i4_ctb_x_off = i4_ctb_x << 6;
7807
3.98M
                            s_subpel_prms.i4_ctb_y_off = i4_ctb_y << 6;
7808
7809
3.98M
                            hme_subpel_refine_cu_hs(
7810
3.98M
                                &s_subpel_prms,
7811
3.98M
                                ps_curr_layer,
7812
3.98M
                                ps_search_results,
7813
3.98M
                                u1_pred_dir,
7814
3.98M
                                &ps_ctxt->s_wt_pred,
7815
3.98M
                                blk_8x8_mask,
7816
3.98M
                                ps_ctxt->ps_func_selector,
7817
3.98M
                                ps_cmn_utils_optimised_function_list,
7818
3.98M
                                ps_me_optimised_function_list);
7819
3.98M
                        }
7820
3.98M
                    }
7821
2.77M
                }
7822
                /* Populate the new PU struct with the results post subpel refinement*/
7823
2.27M
                {
7824
2.27M
                    inter_cu_results_t *ps_cu_results;
7825
2.27M
                    WORD32 best_inter_cost, intra_cost, posx, posy;
7826
7827
2.27M
                    UWORD8 intra_8x8_enabled = 0;
7828
7829
                    /*  cost of 16x16 cu parent  */
7830
2.27M
                    WORD32 parent_cost = MAX_32BIT_VAL;
7831
7832
                    /*  cost of 8x8 cu children  */
7833
                    /*********************************************************************/
7834
                    /* Assuming parent is not split, then we signal 1 bit for this parent*/
7835
                    /* CU. If split, then 1 bit for parent CU + 4 bits for each child CU */
7836
                    /* So, 4*lambda is extra for children cost.                          */
7837
                    /*********************************************************************/
7838
2.27M
                    WORD32 child_cost = 0;
7839
7840
2.27M
                    ps_cu_results = ps_search_results->ps_cu_results;
7841
7842
                    /* Initialize the pu_results pointers to the first struct in the stack array */
7843
2.27M
                    ps_pu_results = as_inter_pu_results;
7844
7845
2.27M
                    hme_reset_wkg_mem(&ps_ctxt->s_buf_mgr);
7846
7847
2.27M
                    hme_populate_pus(
7848
2.27M
                        ps_thrd_ctxt,
7849
2.27M
                        ps_ctxt,
7850
2.27M
                        &s_subpel_prms,
7851
2.27M
                        ps_search_results,
7852
2.27M
                        ps_cu_results,
7853
2.27M
                        ps_pu_results,
7854
2.27M
                        &(as_pu_results[0][0][0]),
7855
2.27M
                        &s_common_frm_prms,
7856
2.27M
                        &ps_ctxt->s_wt_pred,
7857
2.27M
                        ps_curr_layer,
7858
2.27M
                        au1_pred_dir_searched,
7859
2.27M
                        i4_num_pred_dir);
7860
7861
2.27M
                    ps_cu_results->i4_inp_offset =
7862
2.27M
                        (ps_cu_results->u1_x_off) + (ps_cu_results->u1_y_off * 64);
7863
7864
2.27M
                    hme_decide_part_types(
7865
2.27M
                        ps_cu_results,
7866
2.27M
                        ps_pu_results,
7867
2.27M
                        &s_common_frm_prms,
7868
2.27M
                        ps_ctxt,
7869
2.27M
                        ps_cmn_utils_optimised_function_list,
7870
2.27M
                        ps_me_optimised_function_list
7871
7872
2.27M
                    );
7873
7874
                    /* UPDATE the MIN and MAX MVs for Dynamical Search Range for each ref. pic. */
7875
                    /* Only for P pic. For P, both are 0, I&B has them mut. exclusive */
7876
2.27M
                    if(ps_ctxt->s_frm_prms.is_i_pic == ps_ctxt->s_frm_prms.bidir_enabled)
7877
1.75M
                    {
7878
1.75M
                        WORD32 res_ctr;
7879
7880
4.48M
                        for(res_ctr = 0; res_ctr < ps_cu_results->u1_num_best_results; res_ctr++)
7881
2.72M
                        {
7882
2.72M
                            WORD32 num_part = 2, part_ctr;
7883
2.72M
                            part_type_results_t *ps_best_results =
7884
2.72M
                                &ps_cu_results->ps_best_results[res_ctr];
7885
7886
2.72M
                            if(PRT_2Nx2N == ps_best_results->u1_part_type)
7887
1.72M
                                num_part = 1;
7888
7889
6.45M
                            for(part_ctr = 0; part_ctr < num_part; part_ctr++)
7890
3.72M
                            {
7891
3.72M
                                pu_result_t *ps_pu_results =
7892
3.72M
                                    &ps_best_results->as_pu_results[part_ctr];
7893
7894
3.72M
                                ASSERT(PRED_L0 == ps_pu_results->pu.b2_pred_mode);
7895
7896
3.72M
                                hme_update_dynamic_search_params(
7897
3.72M
                                    &ps_ctxt->as_l0_dyn_range_prms[i4_idx_dvsr_p]
7898
3.72M
                                         .as_dyn_range_prms[ps_pu_results->pu.mv.i1_l0_ref_idx],
7899
3.72M
                                    ps_pu_results->pu.mv.s_l0_mv.i2_mvy);
7900
7901
                                /* Sanity Check */
7902
3.72M
                                ASSERT(
7903
3.72M
                                    ps_pu_results->pu.mv.i1_l0_ref_idx <
7904
3.72M
                                    ps_ctxt->s_frm_prms.u1_num_active_ref_l0);
7905
7906
                                /* No L1 for P Pic. */
7907
3.72M
                                ASSERT(PRED_L1 != ps_pu_results->pu.b2_pred_mode);
7908
                                /* No BI for P Pic. */
7909
3.72M
                                ASSERT(PRED_BI != ps_pu_results->pu.b2_pred_mode);
7910
3.72M
                            }
7911
2.72M
                        }
7912
1.75M
                    }
7913
7914
                    /*****************************************************************/
7915
                    /* INSERT INTRA RESULTS AT 16x16 LEVEL.                          */
7916
                    /*****************************************************************/
7917
7918
2.27M
#if DISABLE_INTRA_IN_BPICS
7919
2.27M
                    if(1 != ((ME_XTREME_SPEED_25 == e_me_quality_presets) &&
7920
2.27M
                             (ps_ctxt->s_frm_prms.i4_temporal_layer_id > TEMPORAL_LAYER_DISABLE)))
7921
2.04M
#endif
7922
2.04M
                    {
7923
2.04M
                        if(!(DISABLE_INTRA_WHEN_NOISY && s_common_frm_prms.u1_is_cu_noisy))
7924
2.04M
                        {
7925
2.04M
                            hme_insert_intra_nodes_post_bipred(
7926
2.04M
                                ps_cu_results, ps_cur_ipe_ctb, ps_ctxt->frm_qstep);
7927
2.04M
                        }
7928
2.04M
                    }
7929
7930
2.27M
#if DISABLE_INTRA_IN_BPICS
7931
2.27M
                    if((ME_XTREME_SPEED_25 == e_me_quality_presets) &&
7932
2.27M
                       (ps_ctxt->s_frm_prms.i4_temporal_layer_id > TEMPORAL_LAYER_DISABLE))
7933
222k
                    {
7934
222k
                        intra_8x8_enabled = 0;
7935
222k
                    }
7936
2.04M
                    else
7937
2.04M
#endif
7938
2.04M
                    {
7939
                        /*TRAQO intra flag updation*/
7940
2.04M
                        if(1 == ps_cu_results->ps_best_results->as_pu_results[0].pu.b1_intra_flag)
7941
18.5k
                        {
7942
18.5k
                            best_inter_cost =
7943
18.5k
                                ps_cu_results->ps_best_results->as_pu_results[1].i4_tot_cost;
7944
18.5k
                            intra_cost =
7945
18.5k
                                ps_cu_results->ps_best_results->as_pu_results[0].i4_tot_cost;
7946
                            /*@16x16 level*/
7947
18.5k
                            posx = (ps_cu_results->ps_best_results->as_pu_results[1].pu.b4_pos_x
7948
18.5k
                                    << 2) >>
7949
18.5k
                                   4;
7950
18.5k
                            posy = (ps_cu_results->ps_best_results->as_pu_results[1].pu.b4_pos_y
7951
18.5k
                                    << 2) >>
7952
18.5k
                                   4;
7953
18.5k
                        }
7954
2.02M
                        else
7955
2.02M
                        {
7956
2.02M
                            best_inter_cost =
7957
2.02M
                                ps_cu_results->ps_best_results->as_pu_results[0].i4_tot_cost;
7958
2.02M
                            posx = (ps_cu_results->ps_best_results->as_pu_results[0].pu.b4_pos_x
7959
2.02M
                                    << 2) >>
7960
2.02M
                                   3;
7961
2.02M
                            posy = (ps_cu_results->ps_best_results->as_pu_results[0].pu.b4_pos_y
7962
2.02M
                                    << 2) >>
7963
2.02M
                                   3;
7964
2.02M
                        }
7965
7966
                        /* Disable intra16/32/64 flags based on split flags recommended by IPE */
7967
2.04M
                        if(ps_cur_ipe_ctb->u1_split_flag)
7968
2.02M
                        {
7969
                            /* Id of the 32x32 block, 16x16 block in a CTB */
7970
2.02M
                            WORD32 i4_32x32_id =
7971
2.02M
                                (ps_cu_results->u1_y_off >> 5) * 2 + (ps_cu_results->u1_x_off >> 5);
7972
2.02M
                            WORD32 i4_16x16_id = ((ps_cu_results->u1_y_off >> 4) & 0x1) * 2 +
7973
2.02M
                                                 ((ps_cu_results->u1_x_off >> 4) & 0x1);
7974
7975
2.02M
                            if(ps_cur_ipe_ctb->as_intra32_analyse[i4_32x32_id].b1_split_flag)
7976
1.04M
                            {
7977
1.04M
                                if(ps_cur_ipe_ctb->as_intra32_analyse[i4_32x32_id]
7978
1.04M
                                       .as_intra16_analyse[i4_16x16_id]
7979
1.04M
                                       .b1_split_flag)
7980
132k
                                {
7981
132k
                                    intra_8x8_enabled =
7982
132k
                                        ps_cur_ipe_ctb->as_intra32_analyse[i4_32x32_id]
7983
132k
                                            .as_intra16_analyse[i4_16x16_id]
7984
132k
                                            .as_intra8_analyse[0]
7985
132k
                                            .b1_valid_cu;
7986
132k
                                    intra_8x8_enabled &=
7987
132k
                                        ps_cur_ipe_ctb->as_intra32_analyse[i4_32x32_id]
7988
132k
                                            .as_intra16_analyse[i4_16x16_id]
7989
132k
                                            .as_intra8_analyse[1]
7990
132k
                                            .b1_valid_cu;
7991
132k
                                    intra_8x8_enabled &=
7992
132k
                                        ps_cur_ipe_ctb->as_intra32_analyse[i4_32x32_id]
7993
132k
                                            .as_intra16_analyse[i4_16x16_id]
7994
132k
                                            .as_intra8_analyse[2]
7995
132k
                                            .b1_valid_cu;
7996
132k
                                    intra_8x8_enabled &=
7997
132k
                                        ps_cur_ipe_ctb->as_intra32_analyse[i4_32x32_id]
7998
132k
                                            .as_intra16_analyse[i4_16x16_id]
7999
132k
                                            .as_intra8_analyse[3]
8000
132k
                                            .b1_valid_cu;
8001
132k
                                }
8002
1.04M
                            }
8003
2.02M
                        }
8004
2.04M
                    }
8005
8006
2.27M
                    if(blk_8x8_mask == 0xf)
8007
2.24M
                    {
8008
2.24M
                        parent_cost =
8009
2.24M
                            ps_search_results->ps_cu_results->ps_best_results[0].i4_tot_cost;
8010
2.24M
                        ps_search_results->u1_split_flag = 0;
8011
2.24M
                    }
8012
28.8k
                    else
8013
28.8k
                    {
8014
28.8k
                        ps_search_results->u1_split_flag = 1;
8015
28.8k
                    }
8016
8017
2.27M
                    ps_cu_results = &ps_ctxt->as_cu8x8_results[blk_id_in_full_ctb << 2];
8018
8019
2.27M
                    if(s_common_frm_prms.u1_is_cu_noisy)
8020
0
                    {
8021
0
                        intra_8x8_enabled = 0;
8022
0
                    }
8023
8024
                    /* Evalaute 8x8 if NxN part id is enabled */
8025
2.27M
                    if((ps_search_results->i4_part_mask & ENABLE_NxN) || intra_8x8_enabled)
8026
763k
                    {
8027
                        /* Populates the PU's for the 4 8x8's in one call */
8028
763k
                        hme_populate_pus_8x8_cu(
8029
763k
                            ps_thrd_ctxt,
8030
763k
                            ps_ctxt,
8031
763k
                            &s_subpel_prms,
8032
763k
                            ps_search_results,
8033
763k
                            ps_cu_results,
8034
763k
                            ps_pu_results,
8035
763k
                            &(as_pu_results[0][0][0]),
8036
763k
                            &s_common_frm_prms,
8037
763k
                            au1_pred_dir_searched,
8038
763k
                            i4_num_pred_dir,
8039
763k
                            blk_8x8_mask);
8040
8041
                        /* Re-initialize the pu_results pointers to the first struct in the stack array */
8042
763k
                        ps_pu_results = as_inter_pu_results;
8043
8044
3.81M
                        for(i = 0; i < 4; i++)
8045
3.05M
                        {
8046
3.05M
                            if((blk_8x8_mask & (1 << i)))
8047
2.99M
                            {
8048
2.99M
                                if(ps_cu_results->i4_part_mask)
8049
2.91M
                                {
8050
2.91M
                                    hme_decide_part_types(
8051
2.91M
                                        ps_cu_results,
8052
2.91M
                                        ps_pu_results,
8053
2.91M
                                        &s_common_frm_prms,
8054
2.91M
                                        ps_ctxt,
8055
2.91M
                                        ps_cmn_utils_optimised_function_list,
8056
2.91M
                                        ps_me_optimised_function_list
8057
8058
2.91M
                                    );
8059
2.91M
                                }
8060
                                /*****************************************************************/
8061
                                /* INSERT INTRA RESULTS AT 8x8 LEVEL.                          */
8062
                                /*****************************************************************/
8063
2.99M
#if DISABLE_INTRA_IN_BPICS
8064
2.99M
                                if(1 != ((ME_XTREME_SPEED_25 == e_me_quality_presets) &&
8065
2.99M
                                         (ps_ctxt->s_frm_prms.i4_temporal_layer_id >
8066
58.4k
                                          TEMPORAL_LAYER_DISABLE)))
8067
2.99M
#endif
8068
2.99M
                                {
8069
2.99M
                                    if(!(DISABLE_INTRA_WHEN_NOISY &&
8070
2.99M
                                         s_common_frm_prms.u1_is_cu_noisy))
8071
2.99M
                                    {
8072
2.99M
                                        hme_insert_intra_nodes_post_bipred(
8073
2.99M
                                            ps_cu_results, ps_cur_ipe_ctb, ps_ctxt->frm_qstep);
8074
2.99M
                                    }
8075
2.99M
                                }
8076
8077
2.99M
                                child_cost += ps_cu_results->ps_best_results[0].i4_tot_cost;
8078
2.99M
                            }
8079
8080
3.05M
                            ps_cu_results++;
8081
3.05M
                            ps_pu_results++;
8082
3.05M
                        }
8083
8084
                        /* Compare 16x16 vs 8x8 cost */
8085
763k
                        if(child_cost < parent_cost)
8086
95.2k
                        {
8087
95.2k
                            ps_search_results->best_cu_cost = child_cost;
8088
95.2k
                            ps_search_results->u1_split_flag = 1;
8089
95.2k
                        }
8090
763k
                    }
8091
2.27M
                }
8092
8093
0
                hme_update_mv_bank_encode(
8094
2.27M
                    ps_search_results,
8095
2.27M
                    ps_curr_layer->ps_layer_mvbank,
8096
2.27M
                    blk_x,
8097
2.27M
                    blk_y,
8098
2.27M
                    &s_mv_update_prms,
8099
2.27M
                    au1_pred_dir_searched,
8100
2.27M
                    i4_num_act_ref_l0);
8101
8102
                /*********************************************************************/
8103
                /* Map the best results to an MV Grid. This is a 18x18 grid that is  */
8104
                /* useful for doing things like predictor for cost calculation or    */
8105
                /* also for merge calculations if need be.                           */
8106
                /*********************************************************************/
8107
2.27M
                hme_map_mvs_to_grid(
8108
2.27M
                    &aps_mv_grid[0], ps_search_results, au1_pred_dir_searched, i4_num_pred_dir);
8109
2.27M
            }
8110
8111
            /* Set the CU tree nodes appropriately */
8112
146k
            if(e_me_quality_presets != ME_PRISTINE_QUALITY)
8113
114k
            {
8114
114k
                WORD32 i, j;
8115
8116
1.94M
                for(i = 0; i < 16; i++)
8117
1.82M
                {
8118
1.82M
                    cur_ctb_cu_tree_t *ps_tree_node =
8119
1.82M
                        ps_ctxt->ps_cu_tree_curr_row + (i4_ctb_x * MAX_NUM_NODES_CU_TREE);
8120
1.82M
                    search_results_t *ps_results = &ps_ctxt->as_search_results_16x16[i];
8121
8122
1.82M
                    switch(i >> 2)
8123
1.82M
                    {
8124
456k
                    case 0:
8125
456k
                    {
8126
456k
                        ps_tree_node = ps_tree_node->ps_child_node_tl;
8127
8128
456k
                        break;
8129
0
                    }
8130
456k
                    case 1:
8131
456k
                    {
8132
456k
                        ps_tree_node = ps_tree_node->ps_child_node_tr;
8133
8134
456k
                        break;
8135
0
                    }
8136
456k
                    case 2:
8137
456k
                    {
8138
456k
                        ps_tree_node = ps_tree_node->ps_child_node_bl;
8139
8140
456k
                        break;
8141
0
                    }
8142
456k
                    case 3:
8143
456k
                    {
8144
456k
                        ps_tree_node = ps_tree_node->ps_child_node_br;
8145
8146
456k
                        break;
8147
0
                    }
8148
1.82M
                    }
8149
8150
1.82M
                    switch(i % 4)
8151
1.82M
                    {
8152
456k
                    case 0:
8153
456k
                    {
8154
456k
                        ps_tree_node = ps_tree_node->ps_child_node_tl;
8155
8156
456k
                        break;
8157
0
                    }
8158
456k
                    case 1:
8159
456k
                    {
8160
456k
                        ps_tree_node = ps_tree_node->ps_child_node_tr;
8161
8162
456k
                        break;
8163
0
                    }
8164
456k
                    case 2:
8165
456k
                    {
8166
456k
                        ps_tree_node = ps_tree_node->ps_child_node_bl;
8167
8168
456k
                        break;
8169
0
                    }
8170
456k
                    case 3:
8171
456k
                    {
8172
456k
                        ps_tree_node = ps_tree_node->ps_child_node_br;
8173
8174
456k
                        break;
8175
0
                    }
8176
1.82M
                    }
8177
8178
1.82M
                    if(ai4_blk_8x8_mask[i] == 15)
8179
1.74M
                    {
8180
1.74M
                        if(!ps_results->u1_split_flag)
8181
1.71M
                        {
8182
1.71M
                            ps_tree_node->is_node_valid = 1;
8183
1.71M
                            NULLIFY_THE_CHILDREN_NODES(ps_tree_node);
8184
1.71M
                        }
8185
32.9k
                        else
8186
32.9k
                        {
8187
32.9k
                            ps_tree_node->is_node_valid = 0;
8188
32.9k
                            ENABLE_THE_CHILDREN_NODES(ps_tree_node);
8189
32.9k
                        }
8190
1.74M
                    }
8191
81.2k
                    else
8192
81.2k
                    {
8193
81.2k
                        cur_ctb_cu_tree_t *ps_tree_child;
8194
8195
81.2k
                        ps_tree_node->is_node_valid = 0;
8196
8197
406k
                        for(j = 0; j < 4; j++)
8198
324k
                        {
8199
324k
                            switch(j)
8200
324k
                            {
8201
81.2k
                            case 0:
8202
81.2k
                            {
8203
81.2k
                                ps_tree_child = ps_tree_node->ps_child_node_tl;
8204
8205
81.2k
                                break;
8206
0
                            }
8207
81.2k
                            case 1:
8208
81.2k
                            {
8209
81.2k
                                ps_tree_child = ps_tree_node->ps_child_node_tr;
8210
8211
81.2k
                                break;
8212
0
                            }
8213
81.2k
                            case 2:
8214
81.2k
                            {
8215
81.2k
                                ps_tree_child = ps_tree_node->ps_child_node_bl;
8216
8217
81.2k
                                break;
8218
0
                            }
8219
81.2k
                            case 3:
8220
81.2k
                            {
8221
81.2k
                                ps_tree_child = ps_tree_node->ps_child_node_br;
8222
8223
81.2k
                                break;
8224
0
                            }
8225
324k
                            }
8226
8227
324k
                            ps_tree_child->is_node_valid = !!(ai4_blk_8x8_mask[i] & (1 << j));
8228
324k
                        }
8229
81.2k
                    }
8230
1.82M
                }
8231
114k
            }
8232
8233
146k
            if(ME_PRISTINE_QUALITY == e_me_quality_presets)
8234
32.5k
            {
8235
32.5k
                cur_ctb_cu_tree_t *ps_tree = ps_ctb_cluster_info->ps_cu_tree_root;
8236
8237
32.5k
                hme_analyse_mv_clustering(
8238
32.5k
                    ps_ctxt->as_search_results_16x16,
8239
32.5k
                    ps_ctxt->as_cu16x16_results,
8240
32.5k
                    ps_ctxt->as_cu8x8_results,
8241
32.5k
                    ps_ctxt->ps_ctb_cluster_info,
8242
32.5k
                    ps_ctxt->ai1_future_list,
8243
32.5k
                    ps_ctxt->ai1_past_list,
8244
32.5k
                    ps_ctxt->s_frm_prms.bidir_enabled,
8245
32.5k
                    e_me_quality_presets);
8246
8247
#if DISABLE_BLK_MERGE_WHEN_NOISY
8248
                ps_tree->ps_child_node_tl->is_node_valid = !au1_is_32x32Blk_noisy[0];
8249
                ps_tree->ps_child_node_tr->is_node_valid = !au1_is_32x32Blk_noisy[1];
8250
                ps_tree->ps_child_node_bl->is_node_valid = !au1_is_32x32Blk_noisy[2];
8251
                ps_tree->ps_child_node_br->is_node_valid = !au1_is_32x32Blk_noisy[3];
8252
                ps_tree->ps_child_node_tl->u1_inter_eval_enable = !au1_is_32x32Blk_noisy[0];
8253
                ps_tree->ps_child_node_tr->u1_inter_eval_enable = !au1_is_32x32Blk_noisy[1];
8254
                ps_tree->ps_child_node_bl->u1_inter_eval_enable = !au1_is_32x32Blk_noisy[2];
8255
                ps_tree->ps_child_node_br->u1_inter_eval_enable = !au1_is_32x32Blk_noisy[3];
8256
                ps_tree->is_node_valid = !au1_is_64x64Blk_noisy[0];
8257
                ps_tree->u1_inter_eval_enable = !au1_is_64x64Blk_noisy[0];
8258
#endif
8259
8260
32.5k
                en_merge_32x32 = (ps_tree->ps_child_node_tl->is_node_valid << 0) |
8261
32.5k
                                 (ps_tree->ps_child_node_tr->is_node_valid << 1) |
8262
32.5k
                                 (ps_tree->ps_child_node_bl->is_node_valid << 2) |
8263
32.5k
                                 (ps_tree->ps_child_node_br->is_node_valid << 3);
8264
8265
32.5k
                en_merge_execution = (ps_tree->ps_child_node_tl->u1_inter_eval_enable << 0) |
8266
32.5k
                                     (ps_tree->ps_child_node_tr->u1_inter_eval_enable << 1) |
8267
32.5k
                                     (ps_tree->ps_child_node_bl->u1_inter_eval_enable << 2) |
8268
32.5k
                                     (ps_tree->ps_child_node_br->u1_inter_eval_enable << 3) |
8269
32.5k
                                     (ps_tree->u1_inter_eval_enable << 4);
8270
32.5k
            }
8271
114k
            else
8272
114k
            {
8273
114k
                en_merge_execution = 0x1f;
8274
8275
#if DISABLE_BLK_MERGE_WHEN_NOISY
8276
                en_merge_32x32 = ((!au1_is_32x32Blk_noisy[0] << 0) & (en_merge_32x32 & 1)) |
8277
                                 ((!au1_is_32x32Blk_noisy[1] << 1) & (en_merge_32x32 & 2)) |
8278
                                 ((!au1_is_32x32Blk_noisy[2] << 2) & (en_merge_32x32 & 4)) |
8279
                                 ((!au1_is_32x32Blk_noisy[3] << 3) & (en_merge_32x32 & 8));
8280
#endif
8281
114k
            }
8282
8283
            /* Re-initialize the pu_results pointers to the first struct in the stack array */
8284
146k
            ps_pu_results = as_inter_pu_results;
8285
8286
146k
            {
8287
146k
                WORD32 ref_ctr;
8288
8289
146k
                s_ctb_prms.i4_ctb_x = i4_ctb_x << 6;
8290
146k
                s_ctb_prms.i4_ctb_y = i4_ctb_y << 6;
8291
8292
                /* MV limit is different based on ref. PIC */
8293
439k
                for(ref_ctr = 0; ref_ctr < num_act_ref_pics; ref_ctr++)
8294
292k
                {
8295
292k
                    SCALE_RANGE_PRMS(as_range_prms_hpel[ref_ctr], as_range_prms_rec[ref_ctr], 1);
8296
292k
                    SCALE_RANGE_PRMS(as_range_prms_qpel[ref_ctr], as_range_prms_rec[ref_ctr], 2);
8297
292k
                }
8298
8299
146k
                e_merge_result = CU_SPLIT;
8300
146k
                merge_count_32x32 = 0;
8301
8302
146k
                if((en_merge_32x32 & 1) && (en_merge_execution & 1))
8303
140k
                {
8304
140k
                    range_prms_t *ps_pic_limit;
8305
140k
                    if(s_merge_prms_32x32_tl.i4_use_rec == 1)
8306
140k
                    {
8307
140k
                        ps_pic_limit = &s_pic_limit_rec;
8308
140k
                    }
8309
0
                    else
8310
0
                    {
8311
0
                        ps_pic_limit = &s_pic_limit_inp;
8312
0
                    }
8313
                    /* MV limit is different based on ref. PIC */
8314
422k
                    for(ref_ctr = 0; ref_ctr < num_act_ref_pics; ref_ctr++)
8315
281k
                    {
8316
281k
                        hme_derive_search_range(
8317
281k
                            s_merge_prms_32x32_tl.aps_mv_range[ref_ctr],
8318
281k
                            ps_pic_limit,
8319
281k
                            &as_mv_limit[ref_ctr],
8320
281k
                            i4_ctb_x << 6,
8321
281k
                            i4_ctb_y << 6,
8322
281k
                            32,
8323
281k
                            32);
8324
8325
281k
                        SCALE_RANGE_PRMS_POINTERS(
8326
281k
                            s_merge_prms_32x32_tl.aps_mv_range[ref_ctr],
8327
281k
                            s_merge_prms_32x32_tl.aps_mv_range[ref_ctr],
8328
281k
                            2);
8329
281k
                    }
8330
140k
                    s_merge_prms_32x32_tl.i4_ctb_x_off = i4_ctb_x << 6;
8331
140k
                    s_merge_prms_32x32_tl.i4_ctb_y_off = i4_ctb_y << 6;
8332
140k
                    s_subpel_prms.u1_is_cu_noisy = au1_is_32x32Blk_noisy[0];
8333
8334
140k
                    e_merge_result = hme_try_merge_high_speed(
8335
140k
                        ps_thrd_ctxt,
8336
140k
                        ps_ctxt,
8337
140k
                        ps_cur_ipe_ctb,
8338
140k
                        &s_subpel_prms,
8339
140k
                        &s_merge_prms_32x32_tl,
8340
140k
                        ps_pu_results,
8341
140k
                        &as_pu_results[0][0][0]);
8342
8343
140k
                    if(e_merge_result == CU_MERGED)
8344
53.1k
                    {
8345
53.1k
                        inter_cu_results_t *ps_cu_results =
8346
53.1k
                            s_merge_prms_32x32_tl.ps_results_merge->ps_cu_results;
8347
8348
53.1k
                        if(!((ps_cu_results->u1_num_best_results == 1) &&
8349
53.1k
                             (ps_cu_results->ps_best_results->as_pu_results->pu.b1_intra_flag)))
8350
52.8k
                        {
8351
52.8k
                            hme_map_mvs_to_grid(
8352
52.8k
                                &aps_mv_grid[0],
8353
52.8k
                                s_merge_prms_32x32_tl.ps_results_merge,
8354
52.8k
                                s_merge_prms_32x32_tl.au1_pred_dir_searched,
8355
52.8k
                                s_merge_prms_32x32_tl.i4_num_pred_dir_actual);
8356
52.8k
                        }
8357
8358
53.1k
                        if(ME_PRISTINE_QUALITY != e_me_quality_presets)
8359
40.6k
                        {
8360
40.6k
                            ps_ctxt->ps_cu_tree_curr_row[(i4_ctb_x * MAX_NUM_NODES_CU_TREE)]
8361
40.6k
                                .ps_child_node_tl->is_node_valid = 1;
8362
40.6k
                            NULLIFY_THE_CHILDREN_NODES(
8363
40.6k
                                ps_ctxt->ps_cu_tree_curr_row[(i4_ctb_x * MAX_NUM_NODES_CU_TREE)]
8364
40.6k
                                    .ps_child_node_tl);
8365
40.6k
                        }
8366
8367
53.1k
                        merge_count_32x32++;
8368
53.1k
                        e_merge_result = CU_SPLIT;
8369
53.1k
                    }
8370
87.6k
                    else if(ME_PRISTINE_QUALITY == e_me_quality_presets)
8371
18.5k
                    {
8372
18.5k
#if ENABLE_CU_TREE_CULLING
8373
18.5k
                        cur_ctb_cu_tree_t *ps_tree =
8374
18.5k
                            ps_ctb_cluster_info->ps_cu_tree_root->ps_child_node_tl;
8375
8376
18.5k
                        ps_ctb_cluster_info->ps_cu_tree_root->is_node_valid = 0;
8377
18.5k
                        en_merge_execution = (en_merge_execution & (~(1 << 4)));
8378
18.5k
                        ENABLE_THE_CHILDREN_NODES(ps_tree);
8379
18.5k
                        ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_tl);
8380
18.5k
                        ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_tr);
8381
18.5k
                        ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_bl);
8382
18.5k
                        ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_br);
8383
18.5k
#endif
8384
18.5k
                    }
8385
140k
                }
8386
6.01k
                else if((en_merge_32x32 & 1) && (!(en_merge_execution & 1)))
8387
224
                {
8388
224
#if ENABLE_CU_TREE_CULLING
8389
224
                    cur_ctb_cu_tree_t *ps_tree =
8390
224
                        ps_ctb_cluster_info->ps_cu_tree_root->ps_child_node_tl;
8391
8392
224
                    ENABLE_THE_CHILDREN_NODES(ps_tree);
8393
224
                    ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_tl);
8394
224
                    ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_tr);
8395
224
                    ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_bl);
8396
224
                    ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_br);
8397
224
#endif
8398
8399
224
                    if(au1_is_32x32Blk_noisy[0] && DISABLE_INTRA_WHEN_NOISY)
8400
0
                    {
8401
0
                        ps_tree->is_node_valid = 0;
8402
0
                        ps_ctb_cluster_info->ps_cu_tree_root->is_node_valid = 0;
8403
0
                        en_merge_execution = (en_merge_execution & (~(1 << 4)));
8404
0
                    }
8405
224
                }
8406
8407
146k
                if((en_merge_32x32 & 2) && (en_merge_execution & 2))
8408
139k
                {
8409
139k
                    range_prms_t *ps_pic_limit;
8410
139k
                    if(s_merge_prms_32x32_tr.i4_use_rec == 1)
8411
139k
                    {
8412
139k
                        ps_pic_limit = &s_pic_limit_rec;
8413
139k
                    }
8414
0
                    else
8415
0
                    {
8416
0
                        ps_pic_limit = &s_pic_limit_inp;
8417
0
                    }
8418
                    /* MV limit is different based on ref. PIC */
8419
418k
                    for(ref_ctr = 0; ref_ctr < num_act_ref_pics; ref_ctr++)
8420
279k
                    {
8421
279k
                        hme_derive_search_range(
8422
279k
                            s_merge_prms_32x32_tr.aps_mv_range[ref_ctr],
8423
279k
                            ps_pic_limit,
8424
279k
                            &as_mv_limit[ref_ctr],
8425
279k
                            (i4_ctb_x << 6) + 32,
8426
279k
                            i4_ctb_y << 6,
8427
279k
                            32,
8428
279k
                            32);
8429
279k
                        SCALE_RANGE_PRMS_POINTERS(
8430
279k
                            s_merge_prms_32x32_tr.aps_mv_range[ref_ctr],
8431
279k
                            s_merge_prms_32x32_tr.aps_mv_range[ref_ctr],
8432
279k
                            2);
8433
279k
                    }
8434
139k
                    s_merge_prms_32x32_tr.i4_ctb_x_off = i4_ctb_x << 6;
8435
139k
                    s_merge_prms_32x32_tr.i4_ctb_y_off = i4_ctb_y << 6;
8436
139k
                    s_subpel_prms.u1_is_cu_noisy = au1_is_32x32Blk_noisy[1];
8437
8438
139k
                    e_merge_result = hme_try_merge_high_speed(
8439
139k
                        ps_thrd_ctxt,
8440
139k
                        ps_ctxt,
8441
139k
                        ps_cur_ipe_ctb,
8442
139k
                        &s_subpel_prms,
8443
139k
                        &s_merge_prms_32x32_tr,
8444
139k
                        ps_pu_results,
8445
139k
                        &as_pu_results[0][0][0]);
8446
8447
139k
                    if(e_merge_result == CU_MERGED)
8448
130k
                    {
8449
130k
                        inter_cu_results_t *ps_cu_results =
8450
130k
                            s_merge_prms_32x32_tr.ps_results_merge->ps_cu_results;
8451
8452
130k
                        if(!((ps_cu_results->u1_num_best_results == 1) &&
8453
130k
                             (ps_cu_results->ps_best_results->as_pu_results->pu.b1_intra_flag)))
8454
130k
                        {
8455
130k
                            hme_map_mvs_to_grid(
8456
130k
                                &aps_mv_grid[0],
8457
130k
                                s_merge_prms_32x32_tr.ps_results_merge,
8458
130k
                                s_merge_prms_32x32_tr.au1_pred_dir_searched,
8459
130k
                                s_merge_prms_32x32_tr.i4_num_pred_dir_actual);
8460
130k
                        }
8461
8462
130k
                        if(ME_PRISTINE_QUALITY != e_me_quality_presets)
8463
102k
                        {
8464
102k
                            ps_ctxt->ps_cu_tree_curr_row[(i4_ctb_x * MAX_NUM_NODES_CU_TREE)]
8465
102k
                                .ps_child_node_tr->is_node_valid = 1;
8466
102k
                            NULLIFY_THE_CHILDREN_NODES(
8467
102k
                                ps_ctxt->ps_cu_tree_curr_row[(i4_ctb_x * MAX_NUM_NODES_CU_TREE)]
8468
102k
                                    .ps_child_node_tr);
8469
102k
                        }
8470
8471
130k
                        merge_count_32x32++;
8472
130k
                        e_merge_result = CU_SPLIT;
8473
130k
                    }
8474
9.17k
                    else if(ME_PRISTINE_QUALITY == e_me_quality_presets)
8475
2.90k
                    {
8476
2.90k
#if ENABLE_CU_TREE_CULLING
8477
2.90k
                        cur_ctb_cu_tree_t *ps_tree =
8478
2.90k
                            ps_ctb_cluster_info->ps_cu_tree_root->ps_child_node_tr;
8479
8480
2.90k
                        ps_ctb_cluster_info->ps_cu_tree_root->is_node_valid = 0;
8481
2.90k
                        en_merge_execution = (en_merge_execution & (~(1 << 4)));
8482
2.90k
                        ENABLE_THE_CHILDREN_NODES(ps_tree);
8483
2.90k
                        ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_tl);
8484
2.90k
                        ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_tr);
8485
2.90k
                        ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_bl);
8486
2.90k
                        ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_br);
8487
2.90k
#endif
8488
2.90k
                    }
8489
139k
                }
8490
7.38k
                else if((en_merge_32x32 & 2) && (!(en_merge_execution & 2)))
8491
224
                {
8492
224
#if ENABLE_CU_TREE_CULLING
8493
224
                    cur_ctb_cu_tree_t *ps_tree =
8494
224
                        ps_ctb_cluster_info->ps_cu_tree_root->ps_child_node_tr;
8495
8496
224
                    ENABLE_THE_CHILDREN_NODES(ps_tree);
8497
224
                    ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_tl);
8498
224
                    ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_tr);
8499
224
                    ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_bl);
8500
224
                    ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_br);
8501
224
#endif
8502
8503
224
                    if(au1_is_32x32Blk_noisy[1] && DISABLE_INTRA_WHEN_NOISY)
8504
0
                    {
8505
0
                        ps_tree->is_node_valid = 0;
8506
0
                        ps_ctb_cluster_info->ps_cu_tree_root->is_node_valid = 0;
8507
0
                        en_merge_execution = (en_merge_execution & (~(1 << 4)));
8508
0
                    }
8509
224
                }
8510
8511
146k
                if((en_merge_32x32 & 4) && (en_merge_execution & 4))
8512
138k
                {
8513
138k
                    range_prms_t *ps_pic_limit;
8514
138k
                    if(s_merge_prms_32x32_bl.i4_use_rec == 1)
8515
138k
                    {
8516
138k
                        ps_pic_limit = &s_pic_limit_rec;
8517
138k
                    }
8518
0
                    else
8519
0
                    {
8520
0
                        ps_pic_limit = &s_pic_limit_inp;
8521
0
                    }
8522
                    /* MV limit is different based on ref. PIC */
8523
416k
                    for(ref_ctr = 0; ref_ctr < num_act_ref_pics; ref_ctr++)
8524
277k
                    {
8525
277k
                        hme_derive_search_range(
8526
277k
                            s_merge_prms_32x32_bl.aps_mv_range[ref_ctr],
8527
277k
                            ps_pic_limit,
8528
277k
                            &as_mv_limit[ref_ctr],
8529
277k
                            i4_ctb_x << 6,
8530
277k
                            (i4_ctb_y << 6) + 32,
8531
277k
                            32,
8532
277k
                            32);
8533
277k
                        SCALE_RANGE_PRMS_POINTERS(
8534
277k
                            s_merge_prms_32x32_bl.aps_mv_range[ref_ctr],
8535
277k
                            s_merge_prms_32x32_bl.aps_mv_range[ref_ctr],
8536
277k
                            2);
8537
277k
                    }
8538
138k
                    s_merge_prms_32x32_bl.i4_ctb_x_off = i4_ctb_x << 6;
8539
138k
                    s_merge_prms_32x32_bl.i4_ctb_y_off = i4_ctb_y << 6;
8540
138k
                    s_subpel_prms.u1_is_cu_noisy = au1_is_32x32Blk_noisy[2];
8541
8542
138k
                    e_merge_result = hme_try_merge_high_speed(
8543
138k
                        ps_thrd_ctxt,
8544
138k
                        ps_ctxt,
8545
138k
                        ps_cur_ipe_ctb,
8546
138k
                        &s_subpel_prms,
8547
138k
                        &s_merge_prms_32x32_bl,
8548
138k
                        ps_pu_results,
8549
138k
                        &as_pu_results[0][0][0]);
8550
8551
138k
                    if(e_merge_result == CU_MERGED)
8552
129k
                    {
8553
129k
                        inter_cu_results_t *ps_cu_results =
8554
129k
                            s_merge_prms_32x32_bl.ps_results_merge->ps_cu_results;
8555
8556
129k
                        if(!((ps_cu_results->u1_num_best_results == 1) &&
8557
129k
                             (ps_cu_results->ps_best_results->as_pu_results->pu.b1_intra_flag)))
8558
129k
                        {
8559
129k
                            hme_map_mvs_to_grid(
8560
129k
                                &aps_mv_grid[0],
8561
129k
                                s_merge_prms_32x32_bl.ps_results_merge,
8562
129k
                                s_merge_prms_32x32_bl.au1_pred_dir_searched,
8563
129k
                                s_merge_prms_32x32_bl.i4_num_pred_dir_actual);
8564
129k
                        }
8565
8566
129k
                        if(ME_PRISTINE_QUALITY != e_me_quality_presets)
8567
102k
                        {
8568
102k
                            ps_ctxt->ps_cu_tree_curr_row[(i4_ctb_x * MAX_NUM_NODES_CU_TREE)]
8569
102k
                                .ps_child_node_bl->is_node_valid = 1;
8570
102k
                            NULLIFY_THE_CHILDREN_NODES(
8571
102k
                                ps_ctxt->ps_cu_tree_curr_row[(i4_ctb_x * MAX_NUM_NODES_CU_TREE)]
8572
102k
                                    .ps_child_node_bl);
8573
102k
                        }
8574
8575
129k
                        merge_count_32x32++;
8576
129k
                        e_merge_result = CU_SPLIT;
8577
129k
                    }
8578
9.25k
                    else if(ME_PRISTINE_QUALITY == e_me_quality_presets)
8579
2.85k
                    {
8580
2.85k
#if ENABLE_CU_TREE_CULLING
8581
2.85k
                        cur_ctb_cu_tree_t *ps_tree =
8582
2.85k
                            ps_ctb_cluster_info->ps_cu_tree_root->ps_child_node_bl;
8583
8584
2.85k
                        ps_ctb_cluster_info->ps_cu_tree_root->is_node_valid = 0;
8585
2.85k
                        en_merge_execution = (en_merge_execution & (~(1 << 4)));
8586
2.85k
                        ENABLE_THE_CHILDREN_NODES(ps_tree);
8587
2.85k
                        ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_tl);
8588
2.85k
                        ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_tr);
8589
2.85k
                        ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_bl);
8590
2.85k
                        ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_br);
8591
2.85k
#endif
8592
2.85k
                    }
8593
138k
                }
8594
7.92k
                else if((en_merge_32x32 & 4) && (!(en_merge_execution & 4)))
8595
245
                {
8596
245
#if ENABLE_CU_TREE_CULLING
8597
245
                    cur_ctb_cu_tree_t *ps_tree =
8598
245
                        ps_ctb_cluster_info->ps_cu_tree_root->ps_child_node_bl;
8599
8600
245
                    ENABLE_THE_CHILDREN_NODES(ps_tree);
8601
245
                    ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_tl);
8602
245
                    ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_tr);
8603
245
                    ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_bl);
8604
245
                    ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_br);
8605
245
#endif
8606
8607
245
                    if(au1_is_32x32Blk_noisy[2] && DISABLE_INTRA_WHEN_NOISY)
8608
0
                    {
8609
0
                        ps_tree->is_node_valid = 0;
8610
0
                        ps_ctb_cluster_info->ps_cu_tree_root->is_node_valid = 0;
8611
0
                        en_merge_execution = (en_merge_execution & (~(1 << 4)));
8612
0
                    }
8613
245
                }
8614
8615
146k
                if((en_merge_32x32 & 8) && (en_merge_execution & 8))
8616
137k
                {
8617
137k
                    range_prms_t *ps_pic_limit;
8618
137k
                    if(s_merge_prms_32x32_br.i4_use_rec == 1)
8619
137k
                    {
8620
137k
                        ps_pic_limit = &s_pic_limit_rec;
8621
137k
                    }
8622
0
                    else
8623
0
                    {
8624
0
                        ps_pic_limit = &s_pic_limit_inp;
8625
0
                    }
8626
                    /* MV limit is different based on ref. PIC */
8627
413k
                    for(ref_ctr = 0; ref_ctr < num_act_ref_pics; ref_ctr++)
8628
275k
                    {
8629
275k
                        hme_derive_search_range(
8630
275k
                            s_merge_prms_32x32_br.aps_mv_range[ref_ctr],
8631
275k
                            ps_pic_limit,
8632
275k
                            &as_mv_limit[ref_ctr],
8633
275k
                            (i4_ctb_x << 6) + 32,
8634
275k
                            (i4_ctb_y << 6) + 32,
8635
275k
                            32,
8636
275k
                            32);
8637
8638
275k
                        SCALE_RANGE_PRMS_POINTERS(
8639
275k
                            s_merge_prms_32x32_br.aps_mv_range[ref_ctr],
8640
275k
                            s_merge_prms_32x32_br.aps_mv_range[ref_ctr],
8641
275k
                            2);
8642
275k
                    }
8643
137k
                    s_merge_prms_32x32_br.i4_ctb_x_off = i4_ctb_x << 6;
8644
137k
                    s_merge_prms_32x32_br.i4_ctb_y_off = i4_ctb_y << 6;
8645
137k
                    s_subpel_prms.u1_is_cu_noisy = au1_is_32x32Blk_noisy[3];
8646
8647
137k
                    e_merge_result = hme_try_merge_high_speed(
8648
137k
                        ps_thrd_ctxt,
8649
137k
                        ps_ctxt,
8650
137k
                        ps_cur_ipe_ctb,
8651
137k
                        &s_subpel_prms,
8652
137k
                        &s_merge_prms_32x32_br,
8653
137k
                        ps_pu_results,
8654
137k
                        &as_pu_results[0][0][0]);
8655
8656
137k
                    if(e_merge_result == CU_MERGED)
8657
134k
                    {
8658
                        /*inter_cu_results_t *ps_cu_results = s_merge_prms_32x32_br.ps_results_merge->ps_cu_results;
8659
8660
                        if(!((ps_cu_results->u1_num_best_results == 1) &&
8661
                        (ps_cu_results->ps_best_results->as_pu_results->pu.b1_intra_flag)))
8662
                        {
8663
                        hme_map_mvs_to_grid
8664
                        (
8665
                        &aps_mv_grid[0],
8666
                        s_merge_prms_32x32_br.ps_results_merge,
8667
                        s_merge_prms_32x32_br.au1_pred_dir_searched,
8668
                        s_merge_prms_32x32_br.i4_num_pred_dir_actual
8669
                        );
8670
                        }*/
8671
8672
134k
                        if(ME_PRISTINE_QUALITY != e_me_quality_presets)
8673
105k
                        {
8674
105k
                            ps_ctxt->ps_cu_tree_curr_row[(i4_ctb_x * MAX_NUM_NODES_CU_TREE)]
8675
105k
                                .ps_child_node_br->is_node_valid = 1;
8676
105k
                            NULLIFY_THE_CHILDREN_NODES(
8677
105k
                                ps_ctxt->ps_cu_tree_curr_row[(i4_ctb_x * MAX_NUM_NODES_CU_TREE)]
8678
105k
                                    .ps_child_node_br);
8679
105k
                        }
8680
8681
134k
                        merge_count_32x32++;
8682
134k
                        e_merge_result = CU_SPLIT;
8683
134k
                    }
8684
2.82k
                    else if(ME_PRISTINE_QUALITY == e_me_quality_presets)
8685
780
                    {
8686
780
#if ENABLE_CU_TREE_CULLING
8687
780
                        cur_ctb_cu_tree_t *ps_tree =
8688
780
                            ps_ctb_cluster_info->ps_cu_tree_root->ps_child_node_br;
8689
8690
780
                        ps_ctb_cluster_info->ps_cu_tree_root->is_node_valid = 0;
8691
780
                        en_merge_execution = (en_merge_execution & (~(1 << 4)));
8692
780
                        ENABLE_THE_CHILDREN_NODES(ps_tree);
8693
780
                        ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_tl);
8694
780
                        ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_tr);
8695
780
                        ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_bl);
8696
780
                        ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_br);
8697
780
#endif
8698
780
                    }
8699
137k
                }
8700
9.18k
                else if((en_merge_32x32 & 8) && (!(en_merge_execution & 8)))
8701
240
                {
8702
240
#if ENABLE_CU_TREE_CULLING
8703
240
                    cur_ctb_cu_tree_t *ps_tree =
8704
240
                        ps_ctb_cluster_info->ps_cu_tree_root->ps_child_node_br;
8705
8706
240
                    ENABLE_THE_CHILDREN_NODES(ps_tree);
8707
240
                    ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_tl);
8708
240
                    ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_tr);
8709
240
                    ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_bl);
8710
240
                    ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_br);
8711
240
#endif
8712
8713
240
                    if(au1_is_32x32Blk_noisy[3] && DISABLE_INTRA_WHEN_NOISY)
8714
0
                    {
8715
0
                        ps_tree->is_node_valid = 0;
8716
0
                        ps_ctb_cluster_info->ps_cu_tree_root->is_node_valid = 0;
8717
0
                        en_merge_execution = (en_merge_execution & (~(1 << 4)));
8718
0
                    }
8719
240
                }
8720
8721
                /* Try merging all 32x32 to 64x64 candts */
8722
146k
                if(((en_merge_32x32 & 0xf) == 0xf) &&
8723
146k
                   (((merge_count_32x32 == 4) && (e_me_quality_presets != ME_PRISTINE_QUALITY)) ||
8724
137k
                    ((en_merge_execution & 16) && (e_me_quality_presets == ME_PRISTINE_QUALITY))))
8725
50.4k
                    if((((e_me_quality_presets == ME_XTREME_SPEED_25) &&
8726
50.4k
                         !DISABLE_64X64_BLOCK_MERGE_IN_ME_IN_XS25) ||
8727
50.4k
                        (e_me_quality_presets != ME_XTREME_SPEED_25)))
8728
35.0k
                    {
8729
35.0k
                        range_prms_t *ps_pic_limit;
8730
35.0k
                        if(s_merge_prms_64x64.i4_use_rec == 1)
8731
35.0k
                        {
8732
35.0k
                            ps_pic_limit = &s_pic_limit_rec;
8733
35.0k
                        }
8734
0
                        else
8735
0
                        {
8736
0
                            ps_pic_limit = &s_pic_limit_inp;
8737
0
                        }
8738
                        /* MV limit is different based on ref. PIC */
8739
123k
                        for(ref_ctr = 0; ref_ctr < num_act_ref_pics; ref_ctr++)
8740
88.0k
                        {
8741
88.0k
                            hme_derive_search_range(
8742
88.0k
                                s_merge_prms_64x64.aps_mv_range[ref_ctr],
8743
88.0k
                                ps_pic_limit,
8744
88.0k
                                &as_mv_limit[ref_ctr],
8745
88.0k
                                i4_ctb_x << 6,
8746
88.0k
                                i4_ctb_y << 6,
8747
88.0k
                                64,
8748
88.0k
                                64);
8749
8750
88.0k
                            SCALE_RANGE_PRMS_POINTERS(
8751
88.0k
                                s_merge_prms_64x64.aps_mv_range[ref_ctr],
8752
88.0k
                                s_merge_prms_64x64.aps_mv_range[ref_ctr],
8753
88.0k
                                2);
8754
88.0k
                        }
8755
35.0k
                        s_merge_prms_64x64.i4_ctb_x_off = i4_ctb_x << 6;
8756
35.0k
                        s_merge_prms_64x64.i4_ctb_y_off = i4_ctb_y << 6;
8757
35.0k
                        s_subpel_prms.u1_is_cu_noisy = au1_is_64x64Blk_noisy[0];
8758
8759
35.0k
                        e_merge_result = hme_try_merge_high_speed(
8760
35.0k
                            ps_thrd_ctxt,
8761
35.0k
                            ps_ctxt,
8762
35.0k
                            ps_cur_ipe_ctb,
8763
35.0k
                            &s_subpel_prms,
8764
35.0k
                            &s_merge_prms_64x64,
8765
35.0k
                            ps_pu_results,
8766
35.0k
                            &as_pu_results[0][0][0]);
8767
8768
35.0k
                        if((e_merge_result == CU_MERGED) &&
8769
35.0k
                           (ME_PRISTINE_QUALITY != e_me_quality_presets))
8770
17.6k
                        {
8771
17.6k
                            ps_ctxt->ps_cu_tree_curr_row[(i4_ctb_x * MAX_NUM_NODES_CU_TREE)]
8772
17.6k
                                .is_node_valid = 1;
8773
17.6k
                            NULLIFY_THE_CHILDREN_NODES(
8774
17.6k
                                ps_ctxt->ps_cu_tree_curr_row + (i4_ctb_x * MAX_NUM_NODES_CU_TREE));
8775
17.6k
                        }
8776
17.4k
                        else if(
8777
17.4k
                            (e_merge_result == CU_SPLIT) &&
8778
17.4k
                            (ME_PRISTINE_QUALITY == e_me_quality_presets))
8779
0
                        {
8780
0
                            ps_ctxt->ps_cu_tree_curr_row[(i4_ctb_x * MAX_NUM_NODES_CU_TREE)]
8781
0
                                .is_node_valid = 0;
8782
0
                        }
8783
35.0k
                    }
8784
8785
                /*****************************************************************/
8786
                /* UPDATION OF RESULT TO EXTERNAL STRUCTURES                     */
8787
                /*****************************************************************/
8788
146k
                pf_ext_update_fxn((void *)ps_thrd_ctxt, (void *)ps_ctxt, i4_ctb_x, i4_ctb_y);
8789
8790
146k
                {
8791
#ifdef _DEBUG
8792
                    S32 wd = ((i4_pic_wd - s_common_frm_prms.i4_ctb_x_off) >= 64)
8793
                                 ? 64
8794
                                 : i4_pic_wd - s_common_frm_prms.i4_ctb_x_off;
8795
                    S32 ht = ((i4_pic_ht - s_common_frm_prms.i4_ctb_y_off) >= 64)
8796
                                 ? 64
8797
                                 : i4_pic_ht - s_common_frm_prms.i4_ctb_y_off;
8798
                    ASSERT(
8799
                        (wd * ht) ==
8800
                        ihevce_compute_area_of_valid_cus_in_ctb(
8801
                            &ps_ctxt->ps_cu_tree_curr_row[(i4_ctb_x * MAX_NUM_NODES_CU_TREE)]));
8802
#endif
8803
146k
                }
8804
146k
            }
8805
8806
            /* set the dependency for the corresponding row in enc loop */
8807
146k
            ihevce_dmgr_set_row_row_sync(
8808
146k
                pv_dep_mngr_encloop_dep_me,
8809
146k
                (i4_ctb_x + 1),
8810
146k
                i4_ctb_y,
8811
146k
                tile_col_idx /* Col Tile No. */);
8812
8813
146k
            left_ctb_in_diff_tile = 0;
8814
146k
        }
8815
136k
    }
8816
129k
}
8817
8818
/**
8819
********************************************************************************
8820
*  @fn   void hme_refine_no_encode(coarse_me_ctxt_t *ps_ctxt,
8821
*                       refine_layer_prms_t *ps_refine_prms)
8822
*
8823
*  @brief  Top level entry point for refinement ME
8824
*
8825
*  @param[in,out]  ps_ctxt: ME Handle
8826
*
8827
*  @param[in]  ps_refine_prms : refinement layer prms
8828
*
8829
*  @return None
8830
********************************************************************************
8831
*/
8832
void hme_refine_no_encode(
8833
    coarse_me_ctxt_t *ps_ctxt,
8834
    refine_prms_t *ps_refine_prms,
8835
    multi_thrd_ctxt_t *ps_multi_thrd_ctxt,
8836
    S32 lyr_job_type,
8837
    WORD32 i4_ping_pong,
8838
    void **ppv_dep_mngr_hme_sync)
8839
179k
{
8840
179k
    BLK_SIZE_T e_search_blk_size, e_result_blk_size;
8841
179k
    ME_QUALITY_PRESETS_T e_me_quality_presets =
8842
179k
        ps_ctxt->s_init_prms.s_me_coding_tools.e_me_quality_presets;
8843
8844
    /*************************************************************************/
8845
    /* Complexity of search: Low to High                                     */
8846
    /*************************************************************************/
8847
179k
    SEARCH_COMPLEXITY_T e_search_complexity;
8848
8849
    /*************************************************************************/
8850
    /* Config parameter structures for varius ME submodules                  */
8851
    /*************************************************************************/
8852
179k
    hme_search_prms_t s_search_prms_blk;
8853
179k
    mvbank_update_prms_t s_mv_update_prms;
8854
8855
    /*************************************************************************/
8856
    /* All types of search candidates for predictor based search.            */
8857
    /*************************************************************************/
8858
179k
    S32 num_init_candts = 0;
8859
179k
    search_candt_t *ps_search_candts, as_search_candts[MAX_INIT_CANDTS];
8860
179k
    search_node_t as_top_neighbours[4], as_left_neighbours[3];
8861
179k
    search_node_t *ps_candt_zeromv, *ps_candt_tl, *ps_candt_tr;
8862
179k
    search_node_t *ps_candt_l, *ps_candt_t;
8863
179k
    search_node_t *ps_candt_prj_br[2], *ps_candt_prj_b[2], *ps_candt_prj_r[2];
8864
179k
    search_node_t *ps_candt_prj_bl[2];
8865
179k
    search_node_t *ps_candt_prj_tr[2], *ps_candt_prj_t[2], *ps_candt_prj_tl[2];
8866
179k
    search_node_t *ps_candt_prj_coloc[2];
8867
8868
179k
    pf_get_wt_inp fp_get_wt_inp;
8869
8870
179k
    search_node_t as_unique_search_nodes[MAX_INIT_CANDTS * 9];
8871
179k
    U32 au4_unique_node_map[MAP_X_MAX * 2];
8872
8873
    /*EIID */
8874
179k
    WORD32 i4_num_inter_wins = 0;  //debug code to find stat of
8875
179k
    WORD32 i4_num_comparisions = 0;  //debug code
8876
179k
    WORD32 i4_threshold_multiplier;
8877
179k
    WORD32 i4_threshold_divider;
8878
179k
    WORD32 i4_temporal_layer =
8879
179k
        ps_multi_thrd_ctxt->aps_curr_inp_pre_enc[i4_ping_pong]->s_lap_out.i4_temporal_lyr_id;
8880
8881
    /*************************************************************************/
8882
    /* points ot the search results for the blk level search (8x8/16x16)     */
8883
    /*************************************************************************/
8884
179k
    search_results_t *ps_search_results;
8885
8886
    /*************************************************************************/
8887
    /* Coordinates                                                           */
8888
    /*************************************************************************/
8889
179k
    S32 blk_x, i4_ctb_x, blk_id_in_ctb;
8890
    //S32 i4_ctb_y;
8891
179k
    S32 pos_x, pos_y;
8892
179k
    S32 blk_id_in_full_ctb;
8893
179k
    S32 i4_num_srch_cands;
8894
8895
179k
    S32 blk_y;
8896
8897
    /*************************************************************************/
8898
    /* Related to dimensions of block being searched and pic dimensions      */
8899
    /*************************************************************************/
8900
179k
    S32 blk_wd, blk_ht, blk_size_shift, num_blks_in_row, num_blks_in_pic;
8901
179k
    S32 i4_pic_wd, i4_pic_ht, num_blks_in_this_ctb;
8902
179k
    S32 num_results_prev_layer;
8903
8904
    /*************************************************************************/
8905
    /* Size of a basic unit for this layer. For non encode layers, we search */
8906
    /* in block sizes of 8x8. For encode layers, though we search 16x16s the */
8907
    /* basic unit size is the ctb size.                                      */
8908
    /*************************************************************************/
8909
179k
    S32 unit_size;
8910
8911
    /*************************************************************************/
8912
    /* Pointers to context in current and coarser layers                     */
8913
    /*************************************************************************/
8914
179k
    layer_ctxt_t *ps_curr_layer, *ps_coarse_layer;
8915
8916
    /*************************************************************************/
8917
    /* to store mv range per blk, and picture limit, allowed search range    */
8918
    /* range prms in hpel and qpel units as well                             */
8919
    /*************************************************************************/
8920
179k
    range_prms_t s_range_prms_inp, s_range_prms_rec;
8921
179k
    range_prms_t s_pic_limit_inp, s_pic_limit_rec, as_mv_limit[MAX_NUM_REF];
8922
    /*************************************************************************/
8923
    /* These variables are used to track number of references at different   */
8924
    /* stages of ME.                                                         */
8925
    /*************************************************************************/
8926
179k
    S32 i4_num_ref_fpel, i4_num_ref_before_merge;
8927
179k
    S32 i4_num_ref_each_dir, i, i4_num_ref_prev_layer;
8928
179k
    S32 lambda_inp = ps_refine_prms->lambda_inp;
8929
8930
    /*************************************************************************/
8931
    /* When a layer is implicit, it means that it searches on 1 or 2 ref idx */
8932
    /* Explicit means it searches on all active ref idx.                     */
8933
    /*************************************************************************/
8934
179k
    S32 curr_layer_implicit, prev_layer_implicit;
8935
8936
    /*************************************************************************/
8937
    /* Variables for loop counts                                             */
8938
    /*************************************************************************/
8939
179k
    S32 id;
8940
179k
    S08 i1_ref_idx;
8941
8942
    /*************************************************************************/
8943
    /* Input pointer and stride                                              */
8944
    /*************************************************************************/
8945
179k
    U08 *pu1_inp;
8946
179k
    S32 i4_inp_stride;
8947
8948
179k
    S32 end_of_frame;
8949
8950
179k
    S32 num_sync_units_in_row;
8951
8952
179k
    PF_HME_PROJECT_COLOC_CANDT_FXN pf_hme_project_coloc_candt;
8953
179k
    ASSERT(ps_refine_prms->i4_layer_id < ps_ctxt->num_layers - 1);
8954
8955
    /*************************************************************************/
8956
    /* Pointers to current and coarse layer are needed for projection */
8957
    /* Pointer to prev layer are needed for other candts like coloc   */
8958
    /*************************************************************************/
8959
179k
    ps_curr_layer = ps_ctxt->ps_curr_descr->aps_layers[ps_refine_prms->i4_layer_id];
8960
8961
179k
    ps_coarse_layer = ps_ctxt->ps_curr_descr->aps_layers[ps_refine_prms->i4_layer_id + 1];
8962
8963
179k
    num_results_prev_layer = ps_coarse_layer->ps_layer_mvbank->i4_num_mvs_per_ref;
8964
8965
    /* Function pointer is selected based on the C vc X86 macro */
8966
8967
179k
    fp_get_wt_inp = ((ihevce_me_optimised_function_list_t *)ps_ctxt->pv_me_optimised_function_list)
8968
179k
                        ->pf_get_wt_inp_8x8;
8969
8970
179k
    i4_inp_stride = ps_curr_layer->i4_inp_stride;
8971
179k
    i4_pic_wd = ps_curr_layer->i4_wd;
8972
179k
    i4_pic_ht = ps_curr_layer->i4_ht;
8973
179k
    e_search_complexity = ps_refine_prms->e_search_complexity;
8974
8975
179k
    end_of_frame = 0;
8976
8977
    /* If the previous layer is non-encode layer, then use dyadic projection */
8978
179k
    if(0 == ps_ctxt->u1_encode[ps_refine_prms->i4_layer_id + 1])
8979
179k
        pf_hme_project_coloc_candt = hme_project_coloc_candt_dyadic;
8980
0
    else
8981
0
        pf_hme_project_coloc_candt = hme_project_coloc_candt;
8982
8983
    /* This points to all the initial candts */
8984
179k
    ps_search_candts = &as_search_candts[0];
8985
8986
179k
    {
8987
179k
        e_search_blk_size = BLK_8x8;
8988
179k
        blk_wd = blk_ht = 8;
8989
179k
        blk_size_shift = 3;
8990
179k
        s_mv_update_prms.i4_shift = 0;
8991
        /*********************************************************************/
8992
        /* In case we do not encode this layer, we search 8x8 with or without*/
8993
        /* enable 4x4 SAD.                                                   */
8994
        /*********************************************************************/
8995
179k
        {
8996
179k
            S32 i4_mask = (ENABLE_2Nx2N);
8997
8998
179k
            e_result_blk_size = BLK_8x8;
8999
179k
            if(ps_refine_prms->i4_enable_4x4_part)
9000
77.4k
            {
9001
77.4k
                i4_mask |= (ENABLE_NxN);
9002
77.4k
                e_result_blk_size = BLK_4x4;
9003
77.4k
                s_mv_update_prms.i4_shift = 1;
9004
77.4k
            }
9005
9006
179k
            s_search_prms_blk.i4_part_mask = i4_mask;
9007
179k
        }
9008
9009
179k
        unit_size = blk_wd;
9010
179k
        s_search_prms_blk.i4_inp_stride = unit_size;
9011
179k
    }
9012
9013
    /* This is required to properly update the layer mv bank */
9014
179k
    s_mv_update_prms.e_search_blk_size = e_search_blk_size;
9015
179k
    s_search_prms_blk.e_blk_size = e_search_blk_size;
9016
9017
    /*************************************************************************/
9018
    /* If current layer is explicit, then the number of ref frames are to    */
9019
    /* be same as previous layer. Else it will be 2                          */
9020
    /*************************************************************************/
9021
179k
    i4_num_ref_prev_layer = ps_coarse_layer->ps_layer_mvbank->i4_num_ref;
9022
179k
    if(ps_refine_prms->explicit_ref)
9023
179k
    {
9024
179k
        curr_layer_implicit = 0;
9025
179k
        i4_num_ref_fpel = i4_num_ref_prev_layer;
9026
        /* 100578 : Using same mv cost fun. for all presets. */
9027
179k
        s_search_prms_blk.pf_mv_cost_compute = compute_mv_cost_refine;
9028
179k
    }
9029
0
    else
9030
0
    {
9031
0
        i4_num_ref_fpel = 2;
9032
0
        curr_layer_implicit = 1;
9033
0
        {
9034
0
            if(ME_MEDIUM_SPEED > e_me_quality_presets)
9035
0
            {
9036
0
                s_search_prms_blk.pf_mv_cost_compute = compute_mv_cost_implicit;
9037
0
            }
9038
0
            else
9039
0
            {
9040
0
#if USE_MODIFIED == 1
9041
0
                s_search_prms_blk.pf_mv_cost_compute = compute_mv_cost_implicit_high_speed_modified;
9042
#else
9043
                s_search_prms_blk.pf_mv_cost_compute = compute_mv_cost_implicit_high_speed;
9044
#endif
9045
0
            }
9046
0
        }
9047
0
    }
9048
9049
179k
    i4_num_ref_fpel = MIN(i4_num_ref_fpel, i4_num_ref_prev_layer);
9050
179k
    if(ps_multi_thrd_ctxt->aps_curr_inp_pre_enc[i4_ping_pong]->s_lap_out.i4_pic_type ==
9051
179k
           IV_IDR_FRAME ||
9052
179k
       ps_multi_thrd_ctxt->aps_curr_inp_pre_enc[i4_ping_pong]->s_lap_out.i4_pic_type == IV_I_FRAME)
9053
48.9k
    {
9054
48.9k
        i4_num_ref_fpel = 1;
9055
48.9k
    }
9056
179k
    if(i4_num_ref_prev_layer <= 2)
9057
151k
    {
9058
151k
        prev_layer_implicit = 1;
9059
151k
        curr_layer_implicit = 1;
9060
151k
        i4_num_ref_each_dir = 1;
9061
151k
    }
9062
27.5k
    else
9063
27.5k
    {
9064
        /* It is assumed that we have equal number of references in each dir */
9065
        //ASSERT(!(i4_num_ref_prev_layer & 1));
9066
27.5k
        prev_layer_implicit = 0;
9067
27.5k
        i4_num_ref_each_dir = i4_num_ref_prev_layer >> 1;
9068
27.5k
    }
9069
179k
    s_mv_update_prms.i4_num_ref = i4_num_ref_fpel;
9070
179k
    s_mv_update_prms.i4_num_active_ref_l0 = ps_ctxt->s_frm_prms.u1_num_active_ref_l0;
9071
179k
    s_mv_update_prms.i4_num_active_ref_l1 = ps_ctxt->s_frm_prms.u1_num_active_ref_l1;
9072
9073
    /* this can be kept to 1 or 2 */
9074
179k
    i4_num_ref_before_merge = 2;
9075
179k
    i4_num_ref_before_merge = MIN(i4_num_ref_before_merge, i4_num_ref_fpel);
9076
9077
    /* Set up place holders to hold the search nodes of each initial candt */
9078
10.9M
    for(i = 0; i < MAX_INIT_CANDTS; i++)
9079
10.7M
    {
9080
10.7M
        ps_search_candts[i].ps_search_node = &ps_ctxt->s_init_search_node[i];
9081
10.7M
        INIT_SEARCH_NODE(ps_search_candts[i].ps_search_node, 0);
9082
10.7M
    }
9083
9084
    /* redundant, but doing it here since it is used in pred ctxt init */
9085
179k
    ps_candt_zeromv = ps_search_candts[0].ps_search_node;
9086
716k
    for(i = 0; i < 3; i++)
9087
537k
    {
9088
537k
        search_node_t *ps_search_node;
9089
537k
        ps_search_node = &as_left_neighbours[i];
9090
537k
        INIT_SEARCH_NODE(ps_search_node, 0);
9091
537k
        ps_search_node = &as_top_neighbours[i];
9092
537k
        INIT_SEARCH_NODE(ps_search_node, 0);
9093
537k
    }
9094
9095
179k
    INIT_SEARCH_NODE(&as_top_neighbours[3], 0);
9096
    /* bottom left node always not available for the blk being searched */
9097
179k
    as_left_neighbours[2].u1_is_avail = 0;
9098
    /*************************************************************************/
9099
    /* Initialize all the search results structure here. We update all the   */
9100
    /* search results to default values, and configure things like blk sizes */
9101
    /*************************************************************************/
9102
179k
    if(ps_refine_prms->i4_encode == 0)
9103
179k
    {
9104
179k
        S32 pred_lx;
9105
179k
        search_results_t *ps_search_results;
9106
9107
179k
        ps_search_results = &ps_ctxt->s_search_results_8x8;
9108
179k
        hme_init_search_results(
9109
179k
            ps_search_results,
9110
179k
            i4_num_ref_fpel,
9111
179k
            ps_refine_prms->i4_num_fpel_results,
9112
179k
            ps_refine_prms->i4_num_results_per_part,
9113
179k
            e_search_blk_size,
9114
179k
            0,
9115
179k
            0,
9116
179k
            &ps_ctxt->au1_is_past[0]);
9117
537k
        for(pred_lx = 0; pred_lx < 2; pred_lx++)
9118
358k
        {
9119
358k
            hme_init_pred_ctxt_no_encode(
9120
358k
                &ps_search_results->as_pred_ctxt[pred_lx],
9121
358k
                ps_search_results,
9122
358k
                &as_top_neighbours[0],
9123
358k
                &as_left_neighbours[0],
9124
358k
                &ps_candt_prj_coloc[0],
9125
358k
                ps_candt_zeromv,
9126
358k
                ps_candt_zeromv,
9127
358k
                pred_lx,
9128
358k
                lambda_inp,
9129
358k
                ps_refine_prms->lambda_q_shift,
9130
358k
                &ps_ctxt->apu1_ref_bits_tlu_lc[0],
9131
358k
                &ps_ctxt->ai2_ref_scf[0]);
9132
358k
        }
9133
179k
    }
9134
9135
    /*********************************************************************/
9136
    /* Initialize the dyn. search range params. for each reference index */
9137
    /* in current layer ctxt                                             */
9138
    /*********************************************************************/
9139
    /* Only for P pic. For P, both are 0, I&B has them mut. exclusive */
9140
179k
    if(ps_ctxt->s_frm_prms.is_i_pic == ps_ctxt->s_frm_prms.bidir_enabled)
9141
99.2k
    {
9142
99.2k
        WORD32 ref_ctr;
9143
9144
282k
        for(ref_ctr = 0; ref_ctr < s_mv_update_prms.i4_num_ref; ref_ctr++)
9145
183k
        {
9146
183k
            INIT_DYN_SEARCH_PRMS(
9147
183k
                &ps_ctxt->s_coarse_dyn_range_prms
9148
183k
                     .as_dyn_range_prms[ps_refine_prms->i4_layer_id][ref_ctr],
9149
183k
                ps_ctxt->ai4_ref_idx_to_poc_lc[ref_ctr]);
9150
183k
        }
9151
99.2k
    }
9152
9153
    /* Next set up initial candidates according to a given set of rules.   */
9154
    /* The number of initial candidates affects the quality of ME in the   */
9155
    /* case of motion with multiple degrees of freedom. In case of simple  */
9156
    /* translational motion, a current and a few causal and non causal     */
9157
    /* candts would suffice. More candidates help to cover more complex    */
9158
    /* cases like partitions, rotation/zoom, occlusion in/out, fine motion */
9159
    /* where multiple ref helps etc.                                       */
9160
    /* The candidate choice also depends on the following parameters.      */
9161
    /* e_search_complexity: SRCH_CX_LOW, SRCH_CX_MED, SRCH_CX_HIGH         */
9162
    /* Whether we encode or not, and the type of search across reference   */
9163
    /* i.e. the previous layer may have been explicit/implicit and curr    */
9164
    /* layer may be explicit/implicit                                      */
9165
9166
    /* 0, 0, L, T, projected coloc best always presnt by default */
9167
179k
    id = hme_decide_search_candidate_priority_in_l1_and_l2_me(ZERO_MV, e_me_quality_presets);
9168
179k
    ps_candt_zeromv = ps_search_candts[id].ps_search_node;
9169
179k
    ps_search_candts[id].u1_num_steps_refine = 0;
9170
179k
    ps_candt_zeromv->s_mv.i2_mvx = 0;
9171
179k
    ps_candt_zeromv->s_mv.i2_mvy = 0;
9172
9173
179k
    id = hme_decide_search_candidate_priority_in_l1_and_l2_me(SPATIAL_LEFT0, e_me_quality_presets);
9174
179k
    ps_candt_l = ps_search_candts[id].ps_search_node;
9175
179k
    ps_search_candts[id].u1_num_steps_refine = 0;
9176
9177
    /* Even in ME_HIGH_SPEED mode, in layer 0, blocks */
9178
    /* not at the CTB boundary use the causal T and */
9179
    /* not the projected T, although the candidate is */
9180
    /* still pointed to by ps_candt_prj_t[0] */
9181
179k
    if(ME_MEDIUM_SPEED <= e_me_quality_presets)
9182
126k
    {
9183
        /* Using Projected top to eliminate sync */
9184
126k
        id = hme_decide_search_candidate_priority_in_l1_and_l2_me(
9185
126k
            PROJECTED_TOP0, e_me_quality_presets);
9186
126k
        ps_candt_prj_t[0] = ps_search_candts[id].ps_search_node;
9187
126k
        ps_search_candts[id].u1_num_steps_refine = 1;
9188
126k
    }
9189
52.9k
    else
9190
52.9k
    {
9191
52.9k
        id = hme_decide_search_candidate_priority_in_l1_and_l2_me(
9192
52.9k
            SPATIAL_TOP0, e_me_quality_presets);
9193
52.9k
        ps_candt_t = ps_search_candts[id].ps_search_node;
9194
52.9k
        ps_search_candts[id].u1_num_steps_refine = 0;
9195
52.9k
    }
9196
9197
179k
    id = hme_decide_search_candidate_priority_in_l1_and_l2_me(
9198
179k
        PROJECTED_COLOC0, e_me_quality_presets);
9199
179k
    ps_candt_prj_coloc[0] = ps_search_candts[id].ps_search_node;
9200
179k
    ps_search_candts[id].u1_num_steps_refine = 1;
9201
9202
179k
    id = hme_decide_search_candidate_priority_in_l1_and_l2_me(
9203
179k
        PROJECTED_COLOC1, e_me_quality_presets);
9204
179k
    ps_candt_prj_coloc[1] = ps_search_candts[id].ps_search_node;
9205
179k
    ps_search_candts[id].u1_num_steps_refine = 1;
9206
9207
179k
    if(ME_MEDIUM_SPEED <= e_me_quality_presets)
9208
126k
    {
9209
126k
        id = hme_decide_search_candidate_priority_in_l1_and_l2_me(
9210
126k
            PROJECTED_TOP_RIGHT0, e_me_quality_presets);
9211
126k
        ps_candt_prj_tr[0] = ps_search_candts[id].ps_search_node;
9212
126k
        ps_search_candts[id].u1_num_steps_refine = 1;
9213
9214
126k
        id = hme_decide_search_candidate_priority_in_l1_and_l2_me(
9215
126k
            PROJECTED_TOP_LEFT0, e_me_quality_presets);
9216
126k
        ps_candt_prj_tl[0] = ps_search_candts[id].ps_search_node;
9217
126k
        ps_search_candts[id].u1_num_steps_refine = 1;
9218
126k
    }
9219
52.9k
    else
9220
52.9k
    {
9221
52.9k
        id = hme_decide_search_candidate_priority_in_l1_and_l2_me(
9222
52.9k
            SPATIAL_TOP_RIGHT0, e_me_quality_presets);
9223
52.9k
        ps_candt_tr = ps_search_candts[id].ps_search_node;
9224
52.9k
        ps_search_candts[id].u1_num_steps_refine = 0;
9225
9226
52.9k
        id = hme_decide_search_candidate_priority_in_l1_and_l2_me(
9227
52.9k
            SPATIAL_TOP_LEFT0, e_me_quality_presets);
9228
52.9k
        ps_candt_tl = ps_search_candts[id].ps_search_node;
9229
52.9k
        ps_search_candts[id].u1_num_steps_refine = 0;
9230
52.9k
    }
9231
9232
179k
    id = hme_decide_search_candidate_priority_in_l1_and_l2_me(
9233
179k
        PROJECTED_RIGHT0, e_me_quality_presets);
9234
179k
    ps_candt_prj_r[0] = ps_search_candts[id].ps_search_node;
9235
179k
    ps_search_candts[id].u1_num_steps_refine = 1;
9236
9237
179k
    id = hme_decide_search_candidate_priority_in_l1_and_l2_me(
9238
179k
        PROJECTED_BOTTOM0, e_me_quality_presets);
9239
179k
    ps_candt_prj_b[0] = ps_search_candts[id].ps_search_node;
9240
179k
    ps_search_candts[id].u1_num_steps_refine = 1;
9241
9242
179k
    id = hme_decide_search_candidate_priority_in_l1_and_l2_me(
9243
179k
        PROJECTED_BOTTOM_RIGHT0, e_me_quality_presets);
9244
179k
    ps_candt_prj_br[0] = ps_search_candts[id].ps_search_node;
9245
179k
    ps_search_candts[id].u1_num_steps_refine = 1;
9246
9247
179k
    id = hme_decide_search_candidate_priority_in_l1_and_l2_me(
9248
179k
        PROJECTED_BOTTOM_LEFT0, e_me_quality_presets);
9249
179k
    ps_candt_prj_bl[0] = ps_search_candts[id].ps_search_node;
9250
179k
    ps_search_candts[id].u1_num_steps_refine = 1;
9251
9252
179k
    id = hme_decide_search_candidate_priority_in_l1_and_l2_me(
9253
179k
        PROJECTED_RIGHT1, e_me_quality_presets);
9254
179k
    ps_candt_prj_r[1] = ps_search_candts[id].ps_search_node;
9255
179k
    ps_search_candts[id].u1_num_steps_refine = 1;
9256
9257
179k
    id = hme_decide_search_candidate_priority_in_l1_and_l2_me(
9258
179k
        PROJECTED_BOTTOM1, e_me_quality_presets);
9259
179k
    ps_candt_prj_b[1] = ps_search_candts[id].ps_search_node;
9260
179k
    ps_search_candts[id].u1_num_steps_refine = 1;
9261
9262
179k
    id = hme_decide_search_candidate_priority_in_l1_and_l2_me(
9263
179k
        PROJECTED_BOTTOM_RIGHT1, e_me_quality_presets);
9264
179k
    ps_candt_prj_br[1] = ps_search_candts[id].ps_search_node;
9265
179k
    ps_search_candts[id].u1_num_steps_refine = 1;
9266
9267
179k
    id = hme_decide_search_candidate_priority_in_l1_and_l2_me(
9268
179k
        PROJECTED_BOTTOM_LEFT1, e_me_quality_presets);
9269
179k
    ps_candt_prj_bl[1] = ps_search_candts[id].ps_search_node;
9270
179k
    ps_search_candts[id].u1_num_steps_refine = 1;
9271
9272
179k
    id = hme_decide_search_candidate_priority_in_l1_and_l2_me(PROJECTED_TOP1, e_me_quality_presets);
9273
179k
    ps_candt_prj_t[1] = ps_search_candts[id].ps_search_node;
9274
179k
    ps_search_candts[id].u1_num_steps_refine = 1;
9275
9276
179k
    id = hme_decide_search_candidate_priority_in_l1_and_l2_me(
9277
179k
        PROJECTED_TOP_RIGHT1, e_me_quality_presets);
9278
179k
    ps_candt_prj_tr[1] = ps_search_candts[id].ps_search_node;
9279
179k
    ps_search_candts[id].u1_num_steps_refine = 1;
9280
9281
179k
    id = hme_decide_search_candidate_priority_in_l1_and_l2_me(
9282
179k
        PROJECTED_TOP_LEFT1, e_me_quality_presets);
9283
179k
    ps_candt_prj_tl[1] = ps_search_candts[id].ps_search_node;
9284
179k
    ps_search_candts[id].u1_num_steps_refine = 1;
9285
9286
    /*************************************************************************/
9287
    /* Now that the candidates have been ordered, to choose the right number */
9288
    /* of initial candidates.                                                */
9289
    /*************************************************************************/
9290
179k
    if(curr_layer_implicit && !prev_layer_implicit)
9291
0
    {
9292
0
        if(e_search_complexity == SEARCH_CX_LOW)
9293
0
            num_init_candts = 7;
9294
0
        else if(e_search_complexity == SEARCH_CX_MED)
9295
0
            num_init_candts = 13;
9296
0
        else if(e_search_complexity == SEARCH_CX_HIGH)
9297
0
            num_init_candts = 18;
9298
0
        else
9299
0
            ASSERT(0);
9300
0
    }
9301
179k
    else
9302
179k
    {
9303
179k
        if(e_search_complexity == SEARCH_CX_LOW)
9304
126k
            num_init_candts = 5;
9305
52.9k
        else if(e_search_complexity == SEARCH_CX_MED)
9306
52.9k
            num_init_candts = 11;
9307
0
        else if(e_search_complexity == SEARCH_CX_HIGH)
9308
0
            num_init_candts = 16;
9309
0
        else
9310
0
            ASSERT(0);
9311
179k
    }
9312
9313
179k
    if(ME_XTREME_SPEED_25 == e_me_quality_presets)
9314
73.2k
    {
9315
73.2k
        num_init_candts = NUM_INIT_SEARCH_CANDS_IN_L1_AND_L2_ME_IN_XS25;
9316
73.2k
    }
9317
9318
    /*************************************************************************/
9319
    /* The following search parameters are fixed throughout the search across*/
9320
    /* all blks. So these are configured outside processing loop             */
9321
    /*************************************************************************/
9322
179k
    s_search_prms_blk.i4_num_init_candts = num_init_candts;
9323
179k
    s_search_prms_blk.i4_start_step = 1;
9324
179k
    s_search_prms_blk.i4_use_satd = 0;
9325
179k
    s_search_prms_blk.i4_num_steps_post_refine = ps_refine_prms->i4_num_steps_post_refine_fpel;
9326
    /* we use recon only for encoded layers, otherwise it is not available */
9327
179k
    s_search_prms_blk.i4_use_rec = ps_refine_prms->i4_encode & ps_refine_prms->i4_use_rec_in_fpel;
9328
9329
179k
    s_search_prms_blk.ps_search_candts = ps_search_candts;
9330
    /* We use the same mv_range for all ref. pic. So assign to member 0 */
9331
179k
    if(s_search_prms_blk.i4_use_rec)
9332
0
        s_search_prms_blk.aps_mv_range[0] = &s_range_prms_rec;
9333
179k
    else
9334
179k
        s_search_prms_blk.aps_mv_range[0] = &s_range_prms_inp;
9335
    /*************************************************************************/
9336
    /* Initialize coordinates. Meaning as follows                            */
9337
    /* blk_x : x coordinate of the 16x16 blk, in terms of number of blks     */
9338
    /* blk_y : same as above, y coord.                                       */
9339
    /* num_blks_in_this_ctb : number of blks in this given ctb that starts   */
9340
    /* at i4_ctb_x, i4_ctb_y. This may not be 16 at picture boundaries.      */
9341
    /* i4_ctb_x, i4_ctb_y: pixel coordinate of the ctb realtive to top left  */
9342
    /* corner of the picture. Always multiple of 64.                         */
9343
    /* blk_id_in_ctb : encode order id of the blk in the ctb.                */
9344
    /*************************************************************************/
9345
179k
    blk_y = 0;
9346
179k
    blk_id_in_ctb = 0;
9347
9348
179k
    GET_NUM_BLKS_IN_PIC(i4_pic_wd, i4_pic_ht, blk_size_shift, num_blks_in_row, num_blks_in_pic);
9349
9350
    /* Get the number of sync units in a row based on encode/non enocde layer */
9351
179k
    num_sync_units_in_row = num_blks_in_row;
9352
9353
    /*************************************************************************/
9354
    /* Picture limit on all 4 sides. This will be used to set mv limits for  */
9355
    /* every block given its coordinate. Note thsi assumes that the min amt  */
9356
    /* of padding to right of pic is equal to the blk size. If we go all the */
9357
    /* way upto 64x64, then the min padding on right size of picture should  */
9358
    /* be 64, and also on bottom side of picture.                            */
9359
    /*************************************************************************/
9360
179k
    SET_PIC_LIMIT(
9361
179k
        s_pic_limit_inp,
9362
179k
        ps_curr_layer->i4_pad_x_inp,
9363
179k
        ps_curr_layer->i4_pad_y_inp,
9364
179k
        ps_curr_layer->i4_wd,
9365
179k
        ps_curr_layer->i4_ht,
9366
179k
        s_search_prms_blk.i4_num_steps_post_refine);
9367
9368
179k
    SET_PIC_LIMIT(
9369
179k
        s_pic_limit_rec,
9370
179k
        ps_curr_layer->i4_pad_x_rec,
9371
179k
        ps_curr_layer->i4_pad_y_rec,
9372
179k
        ps_curr_layer->i4_wd,
9373
179k
        ps_curr_layer->i4_ht,
9374
179k
        s_search_prms_blk.i4_num_steps_post_refine);
9375
9376
    /*************************************************************************/
9377
    /* set the MV limit per ref. pic.                                        */
9378
    /*    - P pic. : Based on the config params.                             */
9379
    /*    - B/b pic: Based on the Max/Min MV from prev. P and config. param. */
9380
    /*************************************************************************/
9381
179k
    {
9382
179k
        WORD32 ref_ctr;
9383
        /* Only for B/b pic. */
9384
179k
        if(1 == ps_ctxt->s_frm_prms.bidir_enabled)
9385
30.9k
        {
9386
30.9k
            WORD16 i2_mv_y_per_poc, i2_max_mv_y;
9387
30.9k
            WORD32 cur_poc, ref_poc, abs_poc_diff;
9388
9389
30.9k
            cur_poc = ps_ctxt->i4_curr_poc;
9390
9391
            /* Get abs MAX for symmetric search */
9392
30.9k
            i2_mv_y_per_poc = MAX(
9393
30.9k
                ps_ctxt->s_coarse_dyn_range_prms.i2_dyn_max_y_per_poc[ps_refine_prms->i4_layer_id],
9394
30.9k
                (ABS(ps_ctxt->s_coarse_dyn_range_prms
9395
30.9k
                         .i2_dyn_min_y_per_poc[ps_refine_prms->i4_layer_id])));
9396
9397
109k
            for(ref_ctr = 0; ref_ctr < i4_num_ref_fpel; ref_ctr++)
9398
78.7k
            {
9399
78.7k
                ref_poc = ps_ctxt->ai4_ref_idx_to_poc_lc[ref_ctr];
9400
78.7k
                abs_poc_diff = ABS((cur_poc - ref_poc));
9401
                /* Get the cur. max MV based on POC distance */
9402
78.7k
                i2_max_mv_y = i2_mv_y_per_poc * abs_poc_diff;
9403
78.7k
                i2_max_mv_y = MIN(i2_max_mv_y, ps_curr_layer->i2_max_mv_y);
9404
9405
78.7k
                as_mv_limit[ref_ctr].i2_min_x = -ps_curr_layer->i2_max_mv_x;
9406
78.7k
                as_mv_limit[ref_ctr].i2_min_y = -i2_max_mv_y;
9407
78.7k
                as_mv_limit[ref_ctr].i2_max_x = ps_curr_layer->i2_max_mv_x;
9408
78.7k
                as_mv_limit[ref_ctr].i2_max_y = i2_max_mv_y;
9409
78.7k
            }
9410
30.9k
        }
9411
148k
        else
9412
148k
        {
9413
            /* Set the Config. File Params for P pic. */
9414
380k
            for(ref_ctr = 0; ref_ctr < i4_num_ref_fpel; ref_ctr++)
9415
232k
            {
9416
232k
                as_mv_limit[ref_ctr].i2_min_x = -ps_curr_layer->i2_max_mv_x;
9417
232k
                as_mv_limit[ref_ctr].i2_min_y = -ps_curr_layer->i2_max_mv_y;
9418
232k
                as_mv_limit[ref_ctr].i2_max_x = ps_curr_layer->i2_max_mv_x;
9419
232k
                as_mv_limit[ref_ctr].i2_max_y = ps_curr_layer->i2_max_mv_y;
9420
232k
            }
9421
148k
        }
9422
179k
    }
9423
9424
    /* EIID: Calculate threshold based on quality preset and/or temporal layers */
9425
179k
    if(e_me_quality_presets == ME_MEDIUM_SPEED)
9426
25.5k
    {
9427
25.5k
        i4_threshold_multiplier = 1;
9428
25.5k
        i4_threshold_divider = 4;
9429
25.5k
    }
9430
153k
    else if(e_me_quality_presets == ME_HIGH_SPEED)
9431
10.7k
    {
9432
10.7k
        i4_threshold_multiplier = 1;
9433
10.7k
        i4_threshold_divider = 2;
9434
10.7k
    }
9435
142k
    else if((e_me_quality_presets == ME_XTREME_SPEED) || (e_me_quality_presets == ME_XTREME_SPEED_25))
9436
89.8k
    {
9437
#if OLD_XTREME_SPEED
9438
        /* Hard coding the temporal ID value to 1, if it is older xtreme speed */
9439
        i4_temporal_layer = 1;
9440
#endif
9441
89.8k
        if(i4_temporal_layer == 0)
9442
73.8k
        {
9443
73.8k
            i4_threshold_multiplier = 3;
9444
73.8k
            i4_threshold_divider = 4;
9445
73.8k
        }
9446
16.0k
        else if(i4_temporal_layer == 1)
9447
5.40k
        {
9448
5.40k
            i4_threshold_multiplier = 3;
9449
5.40k
            i4_threshold_divider = 4;
9450
5.40k
        }
9451
10.6k
        else if(i4_temporal_layer == 2)
9452
10.6k
        {
9453
10.6k
            i4_threshold_multiplier = 1;
9454
10.6k
            i4_threshold_divider = 1;
9455
10.6k
        }
9456
0
        else
9457
0
        {
9458
0
            i4_threshold_multiplier = 5;
9459
0
            i4_threshold_divider = 4;
9460
0
        }
9461
89.8k
    }
9462
52.9k
    else if(e_me_quality_presets == ME_HIGH_QUALITY)
9463
16.3k
    {
9464
16.3k
        i4_threshold_multiplier = 1;
9465
16.3k
        i4_threshold_divider = 1;
9466
16.3k
    }
9467
9468
    /*************************************************************************/
9469
    /*************************************************************************/
9470
    /*************************************************************************/
9471
    /* START OF THE CORE LOOP                                                */
9472
    /* If Encode is 0, then we just loop over each blk                       */
9473
    /*************************************************************************/
9474
    /*************************************************************************/
9475
    /*************************************************************************/
9476
1.14M
    while(0 == end_of_frame)
9477
968k
    {
9478
968k
        job_queue_t *ps_job;
9479
968k
        ihevce_ed_blk_t *ps_ed_blk_ctxt_curr_row;  //EIID
9480
968k
        WORD32 i4_ctb_row_ctr;  //counter to calculate CTB row counter. It's (row_ctr /4)
9481
968k
        WORD32 i4_num_ctbs_in_row = (num_blks_in_row + 3) / 4;  //calculations verified for L1 only
9482
        //+3 to get ceil values when divided by 4
9483
968k
        WORD32 i4_num_4x4_blocks_in_ctb_at_l1 =
9484
968k
            8 * 8;  //considering CTB size 32x32 at L1. hardcoded for now
9485
        //if there is variable for ctb size use that and this variable can be derived
9486
968k
        WORD32 offset_val, check_dep_pos, set_dep_pos;
9487
968k
        void *pv_hme_dep_mngr;
9488
968k
        ihevce_ed_ctb_l1_t *ps_ed_ctb_l1_row;
9489
9490
        /* Get the current layer HME Dep Mngr       */
9491
        /* Note : Use layer_id - 1 in HME layers    */
9492
9493
968k
        pv_hme_dep_mngr = ppv_dep_mngr_hme_sync[ps_refine_prms->i4_layer_id - 1];
9494
9495
        /* Get the current row from the job queue */
9496
968k
        ps_job = (job_queue_t *)ihevce_pre_enc_grp_get_next_job(
9497
968k
            ps_multi_thrd_ctxt, lyr_job_type, 1, i4_ping_pong);
9498
9499
        /* If all rows are done, set the end of process flag to 1, */
9500
        /* and the current row to -1 */
9501
968k
        if(NULL == ps_job)
9502
179k
        {
9503
179k
            blk_y = -1;
9504
179k
            end_of_frame = 1;
9505
9506
179k
            continue;
9507
179k
        }
9508
9509
789k
        if(1 == ps_ctxt->s_frm_prms.is_i_pic)
9510
245k
        {
9511
            /* set the output dependency of current row */
9512
245k
            ihevce_pre_enc_grp_job_set_out_dep(ps_multi_thrd_ctxt, ps_job, i4_ping_pong);
9513
245k
            continue;
9514
245k
        }
9515
9516
544k
        blk_y = ps_job->s_job_info.s_me_job_info.i4_vert_unit_row_no;
9517
544k
        blk_x = 0;
9518
544k
        i4_ctb_x = 0;
9519
9520
        /* wait for Corresponding Pre intra Job to be completed */
9521
544k
        if(1 == ps_refine_prms->i4_layer_id)
9522
541k
        {
9523
541k
            volatile UWORD32 i4_l1_done;
9524
541k
            volatile UWORD32 *pi4_l1_done;
9525
541k
            pi4_l1_done = (volatile UWORD32 *)&ps_multi_thrd_ctxt
9526
541k
                              ->aai4_l1_pre_intra_done[i4_ping_pong][blk_y >> 2];
9527
541k
            i4_l1_done = *pi4_l1_done;
9528
541k
            while(!i4_l1_done)
9529
0
            {
9530
0
                i4_l1_done = *pi4_l1_done;
9531
0
            }
9532
541k
        }
9533
        /* Set Variables for Dep. Checking and Setting */
9534
544k
        set_dep_pos = blk_y + 1;
9535
544k
        if(blk_y > 0)
9536
414k
        {
9537
414k
            offset_val = 2;
9538
414k
            check_dep_pos = blk_y - 1;
9539
414k
        }
9540
130k
        else
9541
130k
        {
9542
            /* First row should run without waiting */
9543
130k
            offset_val = -1;
9544
130k
            check_dep_pos = 0;
9545
130k
        }
9546
9547
        /* EIID: calculate ed_blk_ctxt pointer for current row */
9548
        /* valid for only layer-1. not varified and used for other layers */
9549
544k
        i4_ctb_row_ctr = blk_y / 4;
9550
544k
        ps_ed_blk_ctxt_curr_row =
9551
544k
            ps_ctxt->ps_ed_blk + (i4_ctb_row_ctr * i4_num_ctbs_in_row *
9552
544k
                                  i4_num_4x4_blocks_in_ctb_at_l1);  //valid for L1 only
9553
544k
        ps_ed_ctb_l1_row = ps_ctxt->ps_ed_ctb_l1 + (i4_ctb_row_ctr * i4_num_ctbs_in_row);
9554
9555
        /* if non-encode layer then i4_ctb_x will be same as blk_x */
9556
        /* loop over all the units is a row                        */
9557
2.86M
        for(; i4_ctb_x < num_sync_units_in_row; i4_ctb_x++)
9558
2.31M
        {
9559
2.31M
            ihevce_ed_blk_t *ps_ed_blk_ctxt_curr_ctb;  //EIDD
9560
2.31M
            ihevce_ed_ctb_l1_t *ps_ed_ctb_l1_curr;
9561
2.31M
            WORD32 i4_ctb_blk_ctr = i4_ctb_x / 4;
9562
9563
            /* Wait till top row block is processed   */
9564
            /* Currently checking till top right block*/
9565
9566
            /* Disabled since all candidates, except for */
9567
            /* L and C, are projected from the coarser layer, */
9568
            /* only in ME_HIGH_SPEED mode */
9569
2.31M
            if((ME_MEDIUM_SPEED > e_me_quality_presets))
9570
719k
            {
9571
719k
                if(i4_ctb_x < (num_sync_units_in_row - 1))
9572
552k
                {
9573
552k
                    ihevce_dmgr_chk_row_row_sync(
9574
552k
                        pv_hme_dep_mngr,
9575
552k
                        i4_ctb_x,
9576
552k
                        offset_val,
9577
552k
                        check_dep_pos,
9578
552k
                        0, /* Col Tile No. : Not supported in PreEnc*/
9579
552k
                        ps_ctxt->thrd_id);
9580
552k
                }
9581
719k
            }
9582
9583
2.31M
            {
9584
                /* for non encoder layer only one block is processed */
9585
2.31M
                num_blks_in_this_ctb = 1;
9586
2.31M
            }
9587
9588
            /* EIID: derive ed_ctxt ptr for current CTB */
9589
2.31M
            ps_ed_blk_ctxt_curr_ctb =
9590
2.31M
                ps_ed_blk_ctxt_curr_row +
9591
2.31M
                (i4_ctb_blk_ctr *
9592
2.31M
                 i4_num_4x4_blocks_in_ctb_at_l1);  //currently valid for l1 layer only
9593
2.31M
            ps_ed_ctb_l1_curr = ps_ed_ctb_l1_row + i4_ctb_blk_ctr;
9594
9595
            /* loop over all the blocks in CTB will always be 1 */
9596
4.63M
            for(blk_id_in_ctb = 0; blk_id_in_ctb < num_blks_in_this_ctb; blk_id_in_ctb++)
9597
2.31M
            {
9598
2.31M
                {
9599
                    /* non encode layer */
9600
2.31M
                    blk_x = i4_ctb_x;
9601
2.31M
                    blk_id_in_full_ctb = 0;
9602
2.31M
                    s_search_prms_blk.i4_cu_x_off = s_search_prms_blk.i4_cu_y_off = 0;
9603
2.31M
                }
9604
9605
                /* get the current input blk point */
9606
2.31M
                pos_x = blk_x << blk_size_shift;
9607
2.31M
                pos_y = blk_y << blk_size_shift;
9608
2.31M
                pu1_inp = ps_curr_layer->pu1_inp + pos_x + (pos_y * i4_inp_stride);
9609
9610
                /*********************************************************************/
9611
                /* replicate the inp buffer at blk or ctb level for each ref id,     */
9612
                /* Instead of searching with wk * ref(k), we search with Ik = I / wk */
9613
                /* thereby avoiding a bloat up of memory. If we did all references   */
9614
                /* weighted pred, we will end up with a duplicate copy of each ref   */
9615
                /* at each layer, since we need to preserve the original reference.  */
9616
                /* ToDo: Need to observe performance with this mechanism and compare */
9617
                /* with case where ref is weighted.                                  */
9618
                /*********************************************************************/
9619
2.31M
                if(blk_id_in_ctb == 0)
9620
2.31M
                {
9621
2.31M
                    fp_get_wt_inp(
9622
2.31M
                        ps_curr_layer,
9623
2.31M
                        &ps_ctxt->s_wt_pred,
9624
2.31M
                        unit_size,
9625
2.31M
                        pos_x,
9626
2.31M
                        pos_y,
9627
2.31M
                        unit_size,
9628
2.31M
                        ps_ctxt->num_ref_future + ps_ctxt->num_ref_past,
9629
2.31M
                        ps_ctxt->i4_wt_pred_enable_flag);
9630
2.31M
                }
9631
9632
2.31M
                s_search_prms_blk.i4_x_off = blk_x << blk_size_shift;
9633
2.31M
                s_search_prms_blk.i4_y_off = blk_y << blk_size_shift;
9634
                /* Select search results from a suitable search result in the context */
9635
2.31M
                {
9636
2.31M
                    ps_search_results = &ps_ctxt->s_search_results_8x8;
9637
2.31M
                }
9638
9639
2.31M
                s_search_prms_blk.ps_search_results = ps_search_results;
9640
9641
                /* RESET ALL SEARCH RESULTS FOR THE NEW BLK */
9642
2.31M
                hme_reset_search_results(
9643
2.31M
                    ps_search_results, s_search_prms_blk.i4_part_mask, MV_RES_FPEL);
9644
9645
                /* Loop across different Ref IDx */
9646
6.93M
                for(i1_ref_idx = 0; i1_ref_idx < i4_num_ref_fpel; i1_ref_idx++)
9647
4.61M
                {
9648
4.61M
                    S32 next_blk_offset = (e_search_blk_size == BLK_16x16) ? 22 : 12;
9649
4.61M
                    S32 prev_blk_offset = 6;
9650
4.61M
                    S32 resultid;
9651
9652
                    /*********************************************************************/
9653
                    /* For every blk in the picture, the search range needs to be derived*/
9654
                    /* Any blk can have any mv, but practical search constraints are     */
9655
                    /* imposed by the picture boundary and amt of padding.               */
9656
                    /*********************************************************************/
9657
                    /* MV limit is different based on ref. PIC */
9658
4.61M
                    hme_derive_search_range(
9659
4.61M
                        &s_range_prms_inp,
9660
4.61M
                        &s_pic_limit_inp,
9661
4.61M
                        &as_mv_limit[i1_ref_idx],
9662
4.61M
                        pos_x,
9663
4.61M
                        pos_y,
9664
4.61M
                        blk_wd,
9665
4.61M
                        blk_ht);
9666
4.61M
                    hme_derive_search_range(
9667
4.61M
                        &s_range_prms_rec,
9668
4.61M
                        &s_pic_limit_rec,
9669
4.61M
                        &as_mv_limit[i1_ref_idx],
9670
4.61M
                        pos_x,
9671
4.61M
                        pos_y,
9672
4.61M
                        blk_wd,
9673
4.61M
                        blk_ht);
9674
9675
4.61M
                    s_search_prms_blk.i1_ref_idx = i1_ref_idx;
9676
4.61M
                    ps_candt_zeromv->i1_ref_idx = i1_ref_idx;
9677
9678
4.61M
                    i4_num_srch_cands = 1;
9679
9680
4.61M
                    if(1 != ps_refine_prms->i4_layer_id)
9681
34.0k
                    {
9682
34.0k
                        S32 x, y;
9683
34.0k
                        x = gau1_encode_to_raster_x[blk_id_in_full_ctb];
9684
34.0k
                        y = gau1_encode_to_raster_y[blk_id_in_full_ctb];
9685
9686
34.0k
                        if(ME_MEDIUM_SPEED > e_me_quality_presets)
9687
18.4k
                        {
9688
18.4k
                            hme_get_spatial_candt(
9689
18.4k
                                ps_curr_layer,
9690
18.4k
                                e_search_blk_size,
9691
18.4k
                                blk_x,
9692
18.4k
                                blk_y,
9693
18.4k
                                i1_ref_idx,
9694
18.4k
                                &as_top_neighbours[0],
9695
18.4k
                                &as_left_neighbours[0],
9696
18.4k
                                0,
9697
18.4k
                                ((ps_refine_prms->i4_encode) ? gau1_cu_tr_valid[y][x] : 1),
9698
18.4k
                                0,
9699
18.4k
                                ps_refine_prms->i4_encode);
9700
9701
18.4k
                            *ps_candt_tr = as_top_neighbours[3];
9702
18.4k
                            *ps_candt_t = as_top_neighbours[1];
9703
18.4k
                            *ps_candt_tl = as_top_neighbours[0];
9704
18.4k
                            i4_num_srch_cands += 3;
9705
18.4k
                        }
9706
15.6k
                        else
9707
15.6k
                        {
9708
15.6k
                            layer_mv_t *ps_layer_mvbank = ps_curr_layer->ps_layer_mvbank;
9709
15.6k
                            S32 i4_blk_size1 = gau1_blk_size_to_wd[ps_layer_mvbank->e_blk_size];
9710
15.6k
                            S32 i4_blk_size2 = gau1_blk_size_to_wd[e_search_blk_size];
9711
15.6k
                            search_node_t *ps_search_node;
9712
15.6k
                            S32 i4_offset, blk_x_temp = blk_x, blk_y_temp = blk_y;
9713
15.6k
                            hme_mv_t *ps_mv, *ps_mv_base;
9714
15.6k
                            S08 *pi1_ref_idx, *pi1_ref_idx_base;
9715
15.6k
                            S32 jump = 1, mvs_in_blk, mvs_in_row;
9716
15.6k
                            S32 shift = (ps_refine_prms->i4_encode ? 2 : 0);
9717
9718
15.6k
                            if(i4_blk_size1 != i4_blk_size2)
9719
2.94k
                            {
9720
2.94k
                                blk_x_temp <<= 1;
9721
2.94k
                                blk_y_temp <<= 1;
9722
2.94k
                                jump = 2;
9723
2.94k
                                if((i4_blk_size1 << 2) == i4_blk_size2)
9724
0
                                {
9725
0
                                    blk_x_temp <<= 1;
9726
0
                                    blk_y_temp <<= 1;
9727
0
                                    jump = 4;
9728
0
                                }
9729
2.94k
                            }
9730
9731
15.6k
                            mvs_in_blk = ps_layer_mvbank->i4_num_mvs_per_blk;
9732
15.6k
                            mvs_in_row = ps_layer_mvbank->i4_num_mvs_per_row;
9733
9734
                            /* Adjust teh blk coord to point to top left locn */
9735
15.6k
                            blk_x_temp -= 1;
9736
15.6k
                            blk_y_temp -= 1;
9737
9738
                            /* Pick up the mvs from the location */
9739
15.6k
                            i4_offset = (blk_x_temp * ps_layer_mvbank->i4_num_mvs_per_blk);
9740
15.6k
                            i4_offset += (ps_layer_mvbank->i4_num_mvs_per_row * blk_y_temp);
9741
9742
15.6k
                            ps_mv = ps_layer_mvbank->ps_mv + i4_offset;
9743
15.6k
                            pi1_ref_idx = ps_layer_mvbank->pi1_ref_idx + i4_offset;
9744
9745
15.6k
                            ps_mv += (i1_ref_idx * ps_layer_mvbank->i4_num_mvs_per_ref);
9746
15.6k
                            pi1_ref_idx += (i1_ref_idx * ps_layer_mvbank->i4_num_mvs_per_ref);
9747
9748
15.6k
                            ps_mv_base = ps_mv;
9749
15.6k
                            pi1_ref_idx_base = pi1_ref_idx;
9750
9751
15.6k
                            ps_search_node = &as_left_neighbours[0];
9752
15.6k
                            ps_mv = ps_mv_base + mvs_in_row;
9753
15.6k
                            pi1_ref_idx = pi1_ref_idx_base + mvs_in_row;
9754
15.6k
                            COPY_MV_TO_SEARCH_NODE(
9755
15.6k
                                ps_search_node, ps_mv, pi1_ref_idx, i1_ref_idx, shift);
9756
9757
15.6k
                            i4_num_srch_cands++;
9758
15.6k
                        }
9759
34.0k
                    }
9760
4.58M
                    else
9761
4.58M
                    {
9762
4.58M
                        S32 x, y;
9763
4.58M
                        x = gau1_encode_to_raster_x[blk_id_in_full_ctb];
9764
4.58M
                        y = gau1_encode_to_raster_y[blk_id_in_full_ctb];
9765
9766
4.58M
                        if(ME_MEDIUM_SPEED > e_me_quality_presets)
9767
2.10M
                        {
9768
2.10M
                            hme_get_spatial_candt_in_l1_me(
9769
2.10M
                                ps_curr_layer,
9770
2.10M
                                e_search_blk_size,
9771
2.10M
                                blk_x,
9772
2.10M
                                blk_y,
9773
2.10M
                                i1_ref_idx,
9774
2.10M
                                !ps_search_results->pu1_is_past[i1_ref_idx],
9775
2.10M
                                &as_top_neighbours[0],
9776
2.10M
                                &as_left_neighbours[0],
9777
2.10M
                                0,
9778
2.10M
                                ((ps_refine_prms->i4_encode) ? gau1_cu_tr_valid[y][x] : 1),
9779
2.10M
                                0,
9780
2.10M
                                ps_ctxt->s_frm_prms.u1_num_active_ref_l0,
9781
2.10M
                                ps_ctxt->s_frm_prms.u1_num_active_ref_l1);
9782
9783
2.10M
                            *ps_candt_tr = as_top_neighbours[3];
9784
2.10M
                            *ps_candt_t = as_top_neighbours[1];
9785
2.10M
                            *ps_candt_tl = as_top_neighbours[0];
9786
9787
2.10M
                            i4_num_srch_cands += 3;
9788
2.10M
                        }
9789
2.48M
                        else
9790
2.48M
                        {
9791
2.48M
                            layer_mv_t *ps_layer_mvbank = ps_curr_layer->ps_layer_mvbank;
9792
2.48M
                            S32 i4_blk_size1 = gau1_blk_size_to_wd[ps_layer_mvbank->e_blk_size];
9793
2.48M
                            S32 i4_blk_size2 = gau1_blk_size_to_wd[e_search_blk_size];
9794
2.48M
                            S32 i4_mv_pos_in_implicit_array;
9795
2.48M
                            search_node_t *ps_search_node;
9796
2.48M
                            S32 i4_offset, blk_x_temp = blk_x, blk_y_temp = blk_y;
9797
2.48M
                            hme_mv_t *ps_mv, *ps_mv_base;
9798
2.48M
                            S08 *pi1_ref_idx, *pi1_ref_idx_base;
9799
2.48M
                            S32 jump = 1, mvs_in_blk, mvs_in_row;
9800
2.48M
                            S32 shift = (ps_refine_prms->i4_encode ? 2 : 0);
9801
2.48M
                            U08 u1_pred_dir = !ps_search_results->pu1_is_past[i1_ref_idx];
9802
2.48M
                            S32 i4_num_results_in_given_dir =
9803
2.48M
                                ((u1_pred_dir == 1) ? (ps_layer_mvbank->i4_num_mvs_per_ref *
9804
307k
                                                       ps_ctxt->s_frm_prms.u1_num_active_ref_l1)
9805
2.48M
                                                    : (ps_layer_mvbank->i4_num_mvs_per_ref *
9806
2.17M
                                                       ps_ctxt->s_frm_prms.u1_num_active_ref_l0));
9807
9808
2.48M
                            if(i4_blk_size1 != i4_blk_size2)
9809
639k
                            {
9810
639k
                                blk_x_temp <<= 1;
9811
639k
                                blk_y_temp <<= 1;
9812
639k
                                jump = 2;
9813
639k
                                if((i4_blk_size1 << 2) == i4_blk_size2)
9814
0
                                {
9815
0
                                    blk_x_temp <<= 1;
9816
0
                                    blk_y_temp <<= 1;
9817
0
                                    jump = 4;
9818
0
                                }
9819
639k
                            }
9820
9821
2.48M
                            mvs_in_blk = ps_layer_mvbank->i4_num_mvs_per_blk;
9822
2.48M
                            mvs_in_row = ps_layer_mvbank->i4_num_mvs_per_row;
9823
9824
                            /* Adjust teh blk coord to point to top left locn */
9825
2.48M
                            blk_x_temp -= 1;
9826
2.48M
                            blk_y_temp -= 1;
9827
9828
                            /* Pick up the mvs from the location */
9829
2.48M
                            i4_offset = (blk_x_temp * ps_layer_mvbank->i4_num_mvs_per_blk);
9830
2.48M
                            i4_offset += (ps_layer_mvbank->i4_num_mvs_per_row * blk_y_temp);
9831
9832
2.48M
                            i4_offset +=
9833
2.48M
                                ((u1_pred_dir == 1) ? (ps_layer_mvbank->i4_num_mvs_per_ref *
9834
307k
                                                       ps_ctxt->s_frm_prms.u1_num_active_ref_l0)
9835
2.48M
                                                    : 0);
9836
9837
2.48M
                            ps_mv = ps_layer_mvbank->ps_mv + i4_offset;
9838
2.48M
                            pi1_ref_idx = ps_layer_mvbank->pi1_ref_idx + i4_offset;
9839
9840
2.48M
                            ps_mv_base = ps_mv;
9841
2.48M
                            pi1_ref_idx_base = pi1_ref_idx;
9842
9843
2.48M
                            {
9844
                                /* ps_mv and pi1_ref_idx now point to the top left locn */
9845
2.48M
                                ps_search_node = &as_left_neighbours[0];
9846
2.48M
                                ps_mv = ps_mv_base + mvs_in_row;
9847
2.48M
                                pi1_ref_idx = pi1_ref_idx_base + mvs_in_row;
9848
9849
2.48M
                                i4_mv_pos_in_implicit_array =
9850
2.48M
                                    hme_find_pos_of_implicitly_stored_ref_id(
9851
2.48M
                                        pi1_ref_idx, i1_ref_idx, 0, i4_num_results_in_given_dir);
9852
9853
2.48M
                                if(-1 != i4_mv_pos_in_implicit_array)
9854
1.88M
                                {
9855
1.88M
                                    COPY_MV_TO_SEARCH_NODE(
9856
1.88M
                                        ps_search_node,
9857
1.88M
                                        &ps_mv[i4_mv_pos_in_implicit_array],
9858
1.88M
                                        &pi1_ref_idx[i4_mv_pos_in_implicit_array],
9859
1.88M
                                        i1_ref_idx,
9860
1.88M
                                        shift);
9861
1.88M
                                }
9862
593k
                                else
9863
593k
                                {
9864
593k
                                    ps_search_node->u1_is_avail = 0;
9865
593k
                                    ps_search_node->s_mv.i2_mvx = 0;
9866
593k
                                    ps_search_node->s_mv.i2_mvy = 0;
9867
593k
                                    ps_search_node->i1_ref_idx = i1_ref_idx;
9868
593k
                                }
9869
9870
2.48M
                                i4_num_srch_cands++;
9871
2.48M
                            }
9872
2.48M
                        }
9873
4.58M
                    }
9874
9875
4.61M
                    *ps_candt_l = as_left_neighbours[0];
9876
9877
                    /* when 16x16 is searched in an encode layer, and the prev layer */
9878
                    /* stores results for 4x4 blks, we project 5 candts corresponding */
9879
                    /* to (2,2), (2,14), (14,2), 14,14) and 2nd best of (2,2) */
9880
                    /* However in other cases, only 2,2 best and 2nd best reqd */
9881
4.61M
                    resultid = 0;
9882
4.61M
                    pf_hme_project_coloc_candt(
9883
4.61M
                        ps_candt_prj_coloc[0],
9884
4.61M
                        ps_curr_layer,
9885
4.61M
                        ps_coarse_layer,
9886
4.61M
                        pos_x + 2,
9887
4.61M
                        pos_y + 2,
9888
4.61M
                        i1_ref_idx,
9889
4.61M
                        resultid);
9890
9891
4.61M
                    i4_num_srch_cands++;
9892
9893
4.61M
                    resultid = 1;
9894
4.61M
                    if(num_results_prev_layer > 1)
9895
4.56M
                    {
9896
4.56M
                        pf_hme_project_coloc_candt(
9897
4.56M
                            ps_candt_prj_coloc[1],
9898
4.56M
                            ps_curr_layer,
9899
4.56M
                            ps_coarse_layer,
9900
4.56M
                            pos_x + 2,
9901
4.56M
                            pos_y + 2,
9902
4.56M
                            i1_ref_idx,
9903
4.56M
                            resultid);
9904
9905
4.56M
                        i4_num_srch_cands++;
9906
4.56M
                    }
9907
9908
4.61M
                    resultid = 0;
9909
9910
4.61M
                    if(ME_MEDIUM_SPEED <= e_me_quality_presets)
9911
2.49M
                    {
9912
2.49M
                        pf_hme_project_coloc_candt(
9913
2.49M
                            ps_candt_prj_t[0],
9914
2.49M
                            ps_curr_layer,
9915
2.49M
                            ps_coarse_layer,
9916
2.49M
                            pos_x,
9917
2.49M
                            pos_y - prev_blk_offset,
9918
2.49M
                            i1_ref_idx,
9919
2.49M
                            resultid);
9920
9921
2.49M
                        i4_num_srch_cands++;
9922
2.49M
                    }
9923
9924
4.61M
                    {
9925
4.61M
                        pf_hme_project_coloc_candt(
9926
4.61M
                            ps_candt_prj_br[0],
9927
4.61M
                            ps_curr_layer,
9928
4.61M
                            ps_coarse_layer,
9929
4.61M
                            pos_x + next_blk_offset,
9930
4.61M
                            pos_y + next_blk_offset,
9931
4.61M
                            i1_ref_idx,
9932
4.61M
                            resultid);
9933
4.61M
                        pf_hme_project_coloc_candt(
9934
4.61M
                            ps_candt_prj_bl[0],
9935
4.61M
                            ps_curr_layer,
9936
4.61M
                            ps_coarse_layer,
9937
4.61M
                            pos_x - prev_blk_offset,
9938
4.61M
                            pos_y + next_blk_offset,
9939
4.61M
                            i1_ref_idx,
9940
4.61M
                            resultid);
9941
4.61M
                        pf_hme_project_coloc_candt(
9942
4.61M
                            ps_candt_prj_r[0],
9943
4.61M
                            ps_curr_layer,
9944
4.61M
                            ps_coarse_layer,
9945
4.61M
                            pos_x + next_blk_offset,
9946
4.61M
                            pos_y,
9947
4.61M
                            i1_ref_idx,
9948
4.61M
                            resultid);
9949
4.61M
                        pf_hme_project_coloc_candt(
9950
4.61M
                            ps_candt_prj_b[0],
9951
4.61M
                            ps_curr_layer,
9952
4.61M
                            ps_coarse_layer,
9953
4.61M
                            pos_x,
9954
4.61M
                            pos_y + next_blk_offset,
9955
4.61M
                            i1_ref_idx,
9956
4.61M
                            resultid);
9957
9958
4.61M
                        i4_num_srch_cands += 4;
9959
9960
4.61M
                        if(ME_MEDIUM_SPEED <= e_me_quality_presets)
9961
2.49M
                        {
9962
2.49M
                            pf_hme_project_coloc_candt(
9963
2.49M
                                ps_candt_prj_tr[0],
9964
2.49M
                                ps_curr_layer,
9965
2.49M
                                ps_coarse_layer,
9966
2.49M
                                pos_x + next_blk_offset,
9967
2.49M
                                pos_y - prev_blk_offset,
9968
2.49M
                                i1_ref_idx,
9969
2.49M
                                resultid);
9970
2.49M
                            pf_hme_project_coloc_candt(
9971
2.49M
                                ps_candt_prj_tl[0],
9972
2.49M
                                ps_curr_layer,
9973
2.49M
                                ps_coarse_layer,
9974
2.49M
                                pos_x - prev_blk_offset,
9975
2.49M
                                pos_y - prev_blk_offset,
9976
2.49M
                                i1_ref_idx,
9977
2.49M
                                resultid);
9978
9979
2.49M
                            i4_num_srch_cands += 2;
9980
2.49M
                        }
9981
4.61M
                    }
9982
4.61M
                    if((num_results_prev_layer > 1) && (e_search_complexity >= SEARCH_CX_MED))
9983
2.12M
                    {
9984
2.12M
                        resultid = 1;
9985
2.12M
                        pf_hme_project_coloc_candt(
9986
2.12M
                            ps_candt_prj_br[1],
9987
2.12M
                            ps_curr_layer,
9988
2.12M
                            ps_coarse_layer,
9989
2.12M
                            pos_x + next_blk_offset,
9990
2.12M
                            pos_y + next_blk_offset,
9991
2.12M
                            i1_ref_idx,
9992
2.12M
                            resultid);
9993
2.12M
                        pf_hme_project_coloc_candt(
9994
2.12M
                            ps_candt_prj_bl[1],
9995
2.12M
                            ps_curr_layer,
9996
2.12M
                            ps_coarse_layer,
9997
2.12M
                            pos_x - prev_blk_offset,
9998
2.12M
                            pos_y + next_blk_offset,
9999
2.12M
                            i1_ref_idx,
10000
2.12M
                            resultid);
10001
2.12M
                        pf_hme_project_coloc_candt(
10002
2.12M
                            ps_candt_prj_r[1],
10003
2.12M
                            ps_curr_layer,
10004
2.12M
                            ps_coarse_layer,
10005
2.12M
                            pos_x + next_blk_offset,
10006
2.12M
                            pos_y,
10007
2.12M
                            i1_ref_idx,
10008
2.12M
                            resultid);
10009
2.12M
                        pf_hme_project_coloc_candt(
10010
2.12M
                            ps_candt_prj_b[1],
10011
2.12M
                            ps_curr_layer,
10012
2.12M
                            ps_coarse_layer,
10013
2.12M
                            pos_x,
10014
2.12M
                            pos_y + next_blk_offset,
10015
2.12M
                            i1_ref_idx,
10016
2.12M
                            resultid);
10017
10018
2.12M
                        i4_num_srch_cands += 4;
10019
10020
2.12M
                        pf_hme_project_coloc_candt(
10021
2.12M
                            ps_candt_prj_tr[1],
10022
2.12M
                            ps_curr_layer,
10023
2.12M
                            ps_coarse_layer,
10024
2.12M
                            pos_x + next_blk_offset,
10025
2.12M
                            pos_y - prev_blk_offset,
10026
2.12M
                            i1_ref_idx,
10027
2.12M
                            resultid);
10028
2.12M
                        pf_hme_project_coloc_candt(
10029
2.12M
                            ps_candt_prj_tl[1],
10030
2.12M
                            ps_curr_layer,
10031
2.12M
                            ps_coarse_layer,
10032
2.12M
                            pos_x - prev_blk_offset,
10033
2.12M
                            pos_y - prev_blk_offset,
10034
2.12M
                            i1_ref_idx,
10035
2.12M
                            resultid);
10036
2.12M
                        pf_hme_project_coloc_candt(
10037
2.12M
                            ps_candt_prj_t[1],
10038
2.12M
                            ps_curr_layer,
10039
2.12M
                            ps_coarse_layer,
10040
2.12M
                            pos_x,
10041
2.12M
                            pos_y - prev_blk_offset,
10042
2.12M
                            i1_ref_idx,
10043
2.12M
                            resultid);
10044
10045
2.12M
                        i4_num_srch_cands += 3;
10046
2.12M
                    }
10047
10048
                    /* Note this block also clips the MV range for all candidates */
10049
#ifdef _DEBUG
10050
                    {
10051
                        S32 candt;
10052
                        range_prms_t *ps_range_prms;
10053
10054
                        S32 num_ref_valid = ps_ctxt->num_ref_future + ps_ctxt->num_ref_past;
10055
                        for(candt = 0; candt < i4_num_srch_cands; candt++)
10056
                        {
10057
                            search_node_t *ps_search_node;
10058
10059
                            ps_search_node =
10060
                                s_search_prms_blk.ps_search_candts[candt].ps_search_node;
10061
10062
                            ps_range_prms = s_search_prms_blk.aps_mv_range[0];
10063
10064
                            if((ps_search_node->i1_ref_idx >= num_ref_valid) ||
10065
                               (ps_search_node->i1_ref_idx < 0))
10066
                            {
10067
                                ASSERT(0);
10068
                            }
10069
                        }
10070
                    }
10071
#endif
10072
10073
4.61M
                    {
10074
4.61M
                        S32 srch_cand;
10075
4.61M
                        S32 num_unique_nodes = 0;
10076
4.61M
                        S32 num_nodes_searched = 0;
10077
4.61M
                        S32 num_best_cand = 0;
10078
4.61M
                        S08 i1_grid_enable = 0;
10079
4.61M
                        search_node_t as_best_two_proj_node[TOT_NUM_PARTS * 2];
10080
                        /* has list of valid partition to search terminated by -1 */
10081
4.61M
                        S32 ai4_valid_part_ids[TOT_NUM_PARTS + 1];
10082
4.61M
                        S32 center_x;
10083
4.61M
                        S32 center_y;
10084
10085
                        /* indicates if the centre point of grid needs to be explicitly added for search */
10086
4.61M
                        S32 add_centre = 0;
10087
10088
4.61M
                        memset(au4_unique_node_map, 0, sizeof(au4_unique_node_map));
10089
4.61M
                        center_x = ps_candt_prj_coloc[0]->s_mv.i2_mvx;
10090
4.61M
                        center_y = ps_candt_prj_coloc[0]->s_mv.i2_mvy;
10091
10092
4.61M
                        for(srch_cand = 0;
10093
67.6M
                            (srch_cand < i4_num_srch_cands) &&
10094
67.6M
                            (num_unique_nodes <= s_search_prms_blk.i4_num_init_candts);
10095
62.9M
                            srch_cand++)
10096
62.9M
                        {
10097
62.9M
                            search_node_t s_search_node_temp =
10098
62.9M
                                s_search_prms_blk.ps_search_candts[srch_cand].ps_search_node[0];
10099
10100
62.9M
                            s_search_node_temp.i1_ref_idx = i1_ref_idx;  //TEMP FIX;
10101
10102
                            /* Clip the motion vectors as well here since after clipping
10103
                            two candidates can become same and they will be removed during deduplication */
10104
62.9M
                            CLIP_MV_WITHIN_RANGE(
10105
62.9M
                                s_search_node_temp.s_mv.i2_mvx,
10106
62.9M
                                s_search_node_temp.s_mv.i2_mvy,
10107
62.9M
                                s_search_prms_blk.aps_mv_range[0],
10108
62.9M
                                ps_refine_prms->i4_num_steps_fpel_refine,
10109
62.9M
                                ps_refine_prms->i4_num_steps_hpel_refine,
10110
62.9M
                                ps_refine_prms->i4_num_steps_qpel_refine);
10111
10112
                            /* PT_C */
10113
62.9M
                            INSERT_NEW_NODE(
10114
62.9M
                                as_unique_search_nodes,
10115
62.9M
                                num_unique_nodes,
10116
62.9M
                                s_search_node_temp,
10117
62.9M
                                0,
10118
62.9M
                                au4_unique_node_map,
10119
62.9M
                                center_x,
10120
62.9M
                                center_y,
10121
62.9M
                                1);
10122
10123
62.9M
                            num_nodes_searched += 1;
10124
62.9M
                        }
10125
4.61M
                        num_unique_nodes =
10126
4.61M
                            MIN(num_unique_nodes, s_search_prms_blk.i4_num_init_candts);
10127
10128
                        /* If number of candidates projected/number of candidates to be refined are more than 2,
10129
                        then filter out and choose the best two here */
10130
4.61M
                        if(num_unique_nodes >= 2)
10131
1.05M
                        {
10132
1.05M
                            S32 num_results;
10133
1.05M
                            S32 cnt;
10134
1.05M
                            S32 *pi4_valid_part_ids;
10135
1.05M
                            s_search_prms_blk.ps_search_nodes = &as_unique_search_nodes[0];
10136
1.05M
                            s_search_prms_blk.i4_num_search_nodes = num_unique_nodes;
10137
1.05M
                            pi4_valid_part_ids = &ai4_valid_part_ids[0];
10138
10139
                            /* pi4_valid_part_ids is updated inside */
10140
1.05M
                            hme_pred_search_no_encode(
10141
1.05M
                                &s_search_prms_blk,
10142
1.05M
                                ps_curr_layer,
10143
1.05M
                                &ps_ctxt->s_wt_pred,
10144
1.05M
                                pi4_valid_part_ids,
10145
1.05M
                                1,
10146
1.05M
                                e_me_quality_presets,
10147
1.05M
                                i1_grid_enable,
10148
1.05M
                                (ihevce_me_optimised_function_list_t *)
10149
1.05M
                                    ps_ctxt->pv_me_optimised_function_list
10150
10151
1.05M
                            );
10152
10153
1.05M
                            num_best_cand = 0;
10154
1.05M
                            cnt = 0;
10155
1.05M
                            num_results = ps_search_results->u1_num_results_per_part;
10156
10157
4.94M
                            while((id = pi4_valid_part_ids[cnt++]) >= 0)
10158
3.88M
                            {
10159
3.88M
                                num_results =
10160
3.88M
                                    MIN(ps_refine_prms->pu1_num_best_results[id], num_results);
10161
10162
8.07M
                                for(i = 0; i < num_results; i++)
10163
4.18M
                                {
10164
4.18M
                                    search_node_t s_search_node_temp;
10165
4.18M
                                    s_search_node_temp =
10166
4.18M
                                        *(ps_search_results->aps_part_results[i1_ref_idx][id] + i);
10167
4.18M
                                    if(s_search_node_temp.i1_ref_idx >= 0)
10168
4.18M
                                    {
10169
4.18M
                                        INSERT_NEW_NODE_NOMAP(
10170
4.18M
                                            as_best_two_proj_node,
10171
4.18M
                                            num_best_cand,
10172
4.18M
                                            s_search_node_temp,
10173
4.18M
                                            0);
10174
4.18M
                                    }
10175
4.18M
                                }
10176
3.88M
                            }
10177
1.05M
                        }
10178
3.55M
                        else
10179
3.55M
                        {
10180
3.55M
                            add_centre = 1;
10181
3.55M
                            num_best_cand = num_unique_nodes;
10182
3.55M
                            as_best_two_proj_node[0] = as_unique_search_nodes[0];
10183
3.55M
                        }
10184
10185
4.61M
                        num_unique_nodes = 0;
10186
4.61M
                        num_nodes_searched = 0;
10187
10188
4.61M
                        if(1 == num_best_cand)
10189
4.24M
                        {
10190
4.24M
                            search_node_t s_search_node_temp = as_best_two_proj_node[0];
10191
4.24M
                            S16 i2_mv_x = s_search_node_temp.s_mv.i2_mvx;
10192
4.24M
                            S16 i2_mv_y = s_search_node_temp.s_mv.i2_mvy;
10193
4.24M
                            S08 i1_ref_idx = s_search_node_temp.i1_ref_idx;
10194
10195
4.24M
                            i1_grid_enable = 1;
10196
10197
4.24M
                            as_unique_search_nodes[num_unique_nodes].s_mv.i2_mvx = i2_mv_x - 1;
10198
4.24M
                            as_unique_search_nodes[num_unique_nodes].s_mv.i2_mvy = i2_mv_y - 1;
10199
4.24M
                            as_unique_search_nodes[num_unique_nodes++].i1_ref_idx = i1_ref_idx;
10200
10201
4.24M
                            as_unique_search_nodes[num_unique_nodes].s_mv.i2_mvx = i2_mv_x;
10202
4.24M
                            as_unique_search_nodes[num_unique_nodes].s_mv.i2_mvy = i2_mv_y - 1;
10203
4.24M
                            as_unique_search_nodes[num_unique_nodes++].i1_ref_idx = i1_ref_idx;
10204
10205
4.24M
                            as_unique_search_nodes[num_unique_nodes].s_mv.i2_mvx = i2_mv_x + 1;
10206
4.24M
                            as_unique_search_nodes[num_unique_nodes].s_mv.i2_mvy = i2_mv_y - 1;
10207
4.24M
                            as_unique_search_nodes[num_unique_nodes++].i1_ref_idx = i1_ref_idx;
10208
10209
4.24M
                            as_unique_search_nodes[num_unique_nodes].s_mv.i2_mvx = i2_mv_x - 1;
10210
4.24M
                            as_unique_search_nodes[num_unique_nodes].s_mv.i2_mvy = i2_mv_y;
10211
4.24M
                            as_unique_search_nodes[num_unique_nodes++].i1_ref_idx = i1_ref_idx;
10212
10213
4.24M
                            as_unique_search_nodes[num_unique_nodes].s_mv.i2_mvx = i2_mv_x + 1;
10214
4.24M
                            as_unique_search_nodes[num_unique_nodes].s_mv.i2_mvy = i2_mv_y;
10215
4.24M
                            as_unique_search_nodes[num_unique_nodes++].i1_ref_idx = i1_ref_idx;
10216
10217
4.24M
                            as_unique_search_nodes[num_unique_nodes].s_mv.i2_mvx = i2_mv_x - 1;
10218
4.24M
                            as_unique_search_nodes[num_unique_nodes].s_mv.i2_mvy = i2_mv_y + 1;
10219
4.24M
                            as_unique_search_nodes[num_unique_nodes++].i1_ref_idx = i1_ref_idx;
10220
10221
4.24M
                            as_unique_search_nodes[num_unique_nodes].s_mv.i2_mvx = i2_mv_x;
10222
4.24M
                            as_unique_search_nodes[num_unique_nodes].s_mv.i2_mvy = i2_mv_y + 1;
10223
4.24M
                            as_unique_search_nodes[num_unique_nodes++].i1_ref_idx = i1_ref_idx;
10224
10225
4.24M
                            as_unique_search_nodes[num_unique_nodes].s_mv.i2_mvx = i2_mv_x + 1;
10226
4.24M
                            as_unique_search_nodes[num_unique_nodes].s_mv.i2_mvy = i2_mv_y + 1;
10227
4.24M
                            as_unique_search_nodes[num_unique_nodes++].i1_ref_idx = i1_ref_idx;
10228
10229
4.24M
                            if(add_centre)
10230
3.55M
                            {
10231
3.55M
                                as_unique_search_nodes[num_unique_nodes].s_mv.i2_mvx = i2_mv_x;
10232
3.55M
                                as_unique_search_nodes[num_unique_nodes].s_mv.i2_mvy = i2_mv_y;
10233
3.55M
                                as_unique_search_nodes[num_unique_nodes++].i1_ref_idx = i1_ref_idx;
10234
3.55M
                            }
10235
4.24M
                        }
10236
372k
                        else
10237
372k
                        {
10238
                            /* For the candidates where refinement was required, choose the best two */
10239
1.16M
                            for(srch_cand = 0; srch_cand < num_best_cand; srch_cand++)
10240
794k
                            {
10241
794k
                                search_node_t s_search_node_temp = as_best_two_proj_node[srch_cand];
10242
794k
                                WORD32 mv_x = s_search_node_temp.s_mv.i2_mvx;
10243
794k
                                WORD32 mv_y = s_search_node_temp.s_mv.i2_mvy;
10244
10245
                                /* Because there may not be two best unique candidates (because of clipping),
10246
                                second best candidate can be uninitialized, ignore that */
10247
794k
                                if(s_search_node_temp.s_mv.i2_mvx == INTRA_MV ||
10248
794k
                                   s_search_node_temp.i1_ref_idx < 0)
10249
0
                                {
10250
0
                                    num_nodes_searched++;
10251
0
                                    continue;
10252
0
                                }
10253
10254
                                /* PT_C */
10255
                                /* Since the center point has already be evaluated and best results are persistent,
10256
                                it will not be evaluated again */
10257
794k
                                if(add_centre) /* centre point added explicitly again if search results is not updated */
10258
0
                                {
10259
0
                                    INSERT_NEW_NODE(
10260
0
                                        as_unique_search_nodes,
10261
0
                                        num_unique_nodes,
10262
0
                                        s_search_node_temp,
10263
0
                                        0,
10264
0
                                        au4_unique_node_map,
10265
0
                                        center_x,
10266
0
                                        center_y,
10267
0
                                        1);
10268
0
                                }
10269
10270
                                /* PT_L */
10271
794k
                                s_search_node_temp.s_mv.i2_mvx = mv_x - 1;
10272
794k
                                s_search_node_temp.s_mv.i2_mvy = mv_y;
10273
794k
                                INSERT_NEW_NODE(
10274
794k
                                    as_unique_search_nodes,
10275
794k
                                    num_unique_nodes,
10276
794k
                                    s_search_node_temp,
10277
794k
                                    0,
10278
794k
                                    au4_unique_node_map,
10279
794k
                                    center_x,
10280
794k
                                    center_y,
10281
794k
                                    1);
10282
10283
                                /* PT_T */
10284
794k
                                s_search_node_temp.s_mv.i2_mvx = mv_x;
10285
794k
                                s_search_node_temp.s_mv.i2_mvy = mv_y - 1;
10286
794k
                                INSERT_NEW_NODE(
10287
794k
                                    as_unique_search_nodes,
10288
794k
                                    num_unique_nodes,
10289
794k
                                    s_search_node_temp,
10290
794k
                                    0,
10291
794k
                                    au4_unique_node_map,
10292
794k
                                    center_x,
10293
794k
                                    center_y,
10294
794k
                                    1);
10295
10296
                                /* PT_R */
10297
794k
                                s_search_node_temp.s_mv.i2_mvx = mv_x + 1;
10298
794k
                                s_search_node_temp.s_mv.i2_mvy = mv_y;
10299
794k
                                INSERT_NEW_NODE(
10300
794k
                                    as_unique_search_nodes,
10301
794k
                                    num_unique_nodes,
10302
794k
                                    s_search_node_temp,
10303
794k
                                    0,
10304
794k
                                    au4_unique_node_map,
10305
794k
                                    center_x,
10306
794k
                                    center_y,
10307
794k
                                    1);
10308
10309
                                /* PT_B */
10310
794k
                                s_search_node_temp.s_mv.i2_mvx = mv_x;
10311
794k
                                s_search_node_temp.s_mv.i2_mvy = mv_y + 1;
10312
794k
                                INSERT_NEW_NODE(
10313
794k
                                    as_unique_search_nodes,
10314
794k
                                    num_unique_nodes,
10315
794k
                                    s_search_node_temp,
10316
794k
                                    0,
10317
794k
                                    au4_unique_node_map,
10318
794k
                                    center_x,
10319
794k
                                    center_y,
10320
794k
                                    1);
10321
10322
                                /* PT_TL */
10323
794k
                                s_search_node_temp.s_mv.i2_mvx = mv_x - 1;
10324
794k
                                s_search_node_temp.s_mv.i2_mvy = mv_y - 1;
10325
794k
                                INSERT_NEW_NODE(
10326
794k
                                    as_unique_search_nodes,
10327
794k
                                    num_unique_nodes,
10328
794k
                                    s_search_node_temp,
10329
794k
                                    0,
10330
794k
                                    au4_unique_node_map,
10331
794k
                                    center_x,
10332
794k
                                    center_y,
10333
794k
                                    1);
10334
10335
                                /* PT_TR */
10336
794k
                                s_search_node_temp.s_mv.i2_mvx = mv_x + 1;
10337
794k
                                s_search_node_temp.s_mv.i2_mvy = mv_y - 1;
10338
794k
                                INSERT_NEW_NODE(
10339
794k
                                    as_unique_search_nodes,
10340
794k
                                    num_unique_nodes,
10341
794k
                                    s_search_node_temp,
10342
794k
                                    0,
10343
794k
                                    au4_unique_node_map,
10344
794k
                                    center_x,
10345
794k
                                    center_y,
10346
794k
                                    1);
10347
10348
                                /* PT_BL */
10349
794k
                                s_search_node_temp.s_mv.i2_mvx = mv_x - 1;
10350
794k
                                s_search_node_temp.s_mv.i2_mvy = mv_y + 1;
10351
794k
                                INSERT_NEW_NODE(
10352
794k
                                    as_unique_search_nodes,
10353
794k
                                    num_unique_nodes,
10354
794k
                                    s_search_node_temp,
10355
794k
                                    0,
10356
794k
                                    au4_unique_node_map,
10357
794k
                                    center_x,
10358
794k
                                    center_y,
10359
794k
                                    1);
10360
10361
                                /* PT_BR */
10362
794k
                                s_search_node_temp.s_mv.i2_mvx = mv_x + 1;
10363
794k
                                s_search_node_temp.s_mv.i2_mvy = mv_y + 1;
10364
794k
                                INSERT_NEW_NODE(
10365
794k
                                    as_unique_search_nodes,
10366
794k
                                    num_unique_nodes,
10367
794k
                                    s_search_node_temp,
10368
794k
                                    0,
10369
794k
                                    au4_unique_node_map,
10370
794k
                                    center_x,
10371
794k
                                    center_y,
10372
794k
                                    1);
10373
794k
                            }
10374
372k
                        }
10375
10376
4.61M
                        s_search_prms_blk.ps_search_nodes = &as_unique_search_nodes[0];
10377
4.61M
                        s_search_prms_blk.i4_num_search_nodes = num_unique_nodes;
10378
10379
                        /*****************************************************************/
10380
                        /* Call the search algorithm, this includes:                     */
10381
                        /* Pre-Search-Refinement (for coarse candts)                     */
10382
                        /* Search on each candidate                                      */
10383
                        /* Post Search Refinement on winners/other new candidates        */
10384
                        /*****************************************************************/
10385
10386
4.61M
                        hme_pred_search_no_encode(
10387
4.61M
                            &s_search_prms_blk,
10388
4.61M
                            ps_curr_layer,
10389
4.61M
                            &ps_ctxt->s_wt_pred,
10390
4.61M
                            ai4_valid_part_ids,
10391
4.61M
                            0,
10392
4.61M
                            e_me_quality_presets,
10393
4.61M
                            i1_grid_enable,
10394
4.61M
                            (ihevce_me_optimised_function_list_t *)
10395
4.61M
                                ps_ctxt->pv_me_optimised_function_list);
10396
10397
4.61M
                        i1_grid_enable = 0;
10398
4.61M
                    }
10399
4.61M
                }
10400
10401
                /* for non encode layer update MV and end processing for block */
10402
2.31M
                {
10403
2.31M
                    WORD32 i4_ref_id, min_cost = 0x7fffffff, min_sad = 0;
10404
2.31M
                    search_node_t *ps_search_node;
10405
                    /* now update the reqd results back to the layer mv bank. */
10406
2.31M
                    if(1 == ps_refine_prms->i4_layer_id)
10407
2.29M
                    {
10408
2.29M
                        hme_update_mv_bank_in_l1_me(
10409
2.29M
                            ps_search_results,
10410
2.29M
                            ps_curr_layer->ps_layer_mvbank,
10411
2.29M
                            blk_x,
10412
2.29M
                            blk_y,
10413
2.29M
                            &s_mv_update_prms);
10414
2.29M
                    }
10415
20.4k
                    else
10416
20.4k
                    {
10417
20.4k
                        hme_update_mv_bank_noencode(
10418
20.4k
                            ps_search_results,
10419
20.4k
                            ps_curr_layer->ps_layer_mvbank,
10420
20.4k
                            blk_x,
10421
20.4k
                            blk_y,
10422
20.4k
                            &s_mv_update_prms);
10423
20.4k
                    }
10424
10425
                    /* UPDATE the MIN and MAX MVs for Dynamical Search Range for each ref. pic. */
10426
                    /* Only for P pic. For P, both are 0, I&B has them mut. exclusive */
10427
2.31M
                    if(ps_ctxt->s_frm_prms.is_i_pic == ps_ctxt->s_frm_prms.bidir_enabled)
10428
1.79M
                    {
10429
1.79M
                        WORD32 i4_j;
10430
1.79M
                        layer_mv_t *ps_layer_mv = ps_curr_layer->ps_layer_mvbank;
10431
10432
                        //if (ps_layer_mv->e_blk_size == s_mv_update_prms.e_search_blk_size)
10433
                        /* Not considering this for Dyn. Search Update */
10434
1.79M
                        {
10435
5.08M
                            for(i4_ref_id = 0; i4_ref_id < (S32)s_mv_update_prms.i4_num_ref;
10436
3.29M
                                i4_ref_id++)
10437
3.29M
                            {
10438
3.29M
                                ps_search_node =
10439
3.29M
                                    ps_search_results->aps_part_results[i4_ref_id][PART_ID_2Nx2N];
10440
10441
7.74M
                                for(i4_j = 0; i4_j < ps_layer_mv->i4_num_mvs_per_ref; i4_j++)
10442
4.44M
                                {
10443
4.44M
                                    hme_update_dynamic_search_params(
10444
4.44M
                                        &ps_ctxt->s_coarse_dyn_range_prms
10445
4.44M
                                             .as_dyn_range_prms[ps_refine_prms->i4_layer_id]
10446
4.44M
                                                               [i4_ref_id],
10447
4.44M
                                        ps_search_node->s_mv.i2_mvy);
10448
10449
4.44M
                                    ps_search_node++;
10450
4.44M
                                }
10451
3.29M
                            }
10452
1.79M
                        }
10453
1.79M
                    }
10454
10455
2.31M
                    if(1 == ps_refine_prms->i4_layer_id)
10456
2.29M
                    {
10457
2.29M
                        WORD32 wt_pred_val, log_wt_pred_val;
10458
2.29M
                        WORD32 ref_id_of_nearest_poc = 0;
10459
2.29M
                        WORD32 max_val = 0x7fffffff;
10460
2.29M
                        WORD32 max_l0_val = 0x7fffffff;
10461
2.29M
                        WORD32 max_l1_val = 0x7fffffff;
10462
2.29M
                        WORD32 cur_val;
10463
2.29M
                        WORD32 i4_local_weighted_sad, i4_local_cost_weighted_pred;
10464
10465
2.29M
                        WORD32 bestl0_sad = 0x7fffffff;
10466
2.29M
                        WORD32 bestl1_sad = 0x7fffffff;
10467
2.29M
                        search_node_t *ps_best_l0_blk = NULL, *ps_best_l1_blk = NULL;
10468
10469
6.87M
                        for(i4_ref_id = 0; i4_ref_id < (S32)s_mv_update_prms.i4_num_ref;
10470
4.58M
                            i4_ref_id++)
10471
4.58M
                        {
10472
4.58M
                            wt_pred_val = ps_ctxt->s_wt_pred.a_wpred_wt[i4_ref_id];
10473
4.58M
                            log_wt_pred_val = ps_ctxt->s_wt_pred.wpred_log_wdc;
10474
10475
4.58M
                            ps_search_node =
10476
4.58M
                                ps_search_results->aps_part_results[i4_ref_id][PART_ID_2Nx2N];
10477
10478
4.58M
                            i4_local_weighted_sad = ((ps_search_node->i4_sad * wt_pred_val) +
10479
4.58M
                                                     ((1 << log_wt_pred_val) >> 1)) >>
10480
4.58M
                                                    log_wt_pred_val;
10481
10482
4.58M
                            i4_local_cost_weighted_pred =
10483
4.58M
                                i4_local_weighted_sad +
10484
4.58M
                                (ps_search_node->i4_tot_cost - ps_search_node->i4_sad);
10485
                            //the loop is redundant as the results are already sorted based on total cost
10486
                            //for (i4_j = 0; i4_j < ps_curr_layer->ps_layer_mvbank->i4_num_mvs_per_ref; i4_j++)
10487
4.58M
                            {
10488
4.58M
                                if(i4_local_cost_weighted_pred < min_cost)
10489
2.38M
                                {
10490
2.38M
                                    min_cost = i4_local_cost_weighted_pred;
10491
2.38M
                                    min_sad = i4_local_weighted_sad;
10492
2.38M
                                }
10493
4.58M
                            }
10494
10495
                            /* For P frame, calculate the nearest poc which is either P or I frame*/
10496
4.58M
                            if(ps_ctxt->s_frm_prms.is_i_pic == ps_ctxt->s_frm_prms.bidir_enabled)
10497
3.26M
                            {
10498
3.26M
                                if(-1 != ps_coarse_layer->ai4_ref_id_to_poc_lc[i4_ref_id])
10499
3.26M
                                {
10500
3.26M
                                    cur_val =
10501
3.26M
                                        ABS(ps_ctxt->i4_curr_poc -
10502
3.26M
                                            ps_coarse_layer->ai4_ref_id_to_poc_lc[i4_ref_id]);
10503
3.26M
                                    if(cur_val < max_val)
10504
1.77M
                                    {
10505
1.77M
                                        max_val = cur_val;
10506
1.77M
                                        ref_id_of_nearest_poc = i4_ref_id;
10507
1.77M
                                    }
10508
3.26M
                                }
10509
3.26M
                            }
10510
4.58M
                        }
10511
                        /*Store me cost wrt. to past frame only for P frame  */
10512
2.29M
                        if(ps_ctxt->s_frm_prms.is_i_pic == ps_ctxt->s_frm_prms.bidir_enabled)
10513
1.77M
                        {
10514
1.77M
                            if(-1 != ps_coarse_layer->ai4_ref_id_to_poc_lc[ref_id_of_nearest_poc])
10515
1.77M
                            {
10516
1.77M
                                WORD16 i2_mvx, i2_mvy;
10517
10518
1.77M
                                WORD32 i4_diff_col_ctr = blk_x - (i4_ctb_blk_ctr * 4);
10519
1.77M
                                WORD32 i4_diff_row_ctr = blk_y - (i4_ctb_row_ctr * 4);
10520
1.77M
                                WORD32 z_scan_idx =
10521
1.77M
                                    gau1_raster_scan_to_ctb[i4_diff_row_ctr][i4_diff_col_ctr];
10522
1.77M
                                WORD32 wt, log_wt;
10523
10524
                                /*ASSERT((ps_ctxt->i4_curr_poc - ps_coarse_layer->ai4_ref_id_to_poc_lc[ref_id_of_nearest_poc])
10525
                                <= (1 + ps_ctxt->num_b_frms));*/
10526
10527
                                /*obtain mvx and mvy */
10528
1.77M
                                i2_mvx =
10529
1.77M
                                    ps_search_results
10530
1.77M
                                        ->aps_part_results[ref_id_of_nearest_poc][PART_ID_2Nx2N]
10531
1.77M
                                        ->s_mv.i2_mvx;
10532
1.77M
                                i2_mvy =
10533
1.77M
                                    ps_search_results
10534
1.77M
                                        ->aps_part_results[ref_id_of_nearest_poc][PART_ID_2Nx2N]
10535
1.77M
                                        ->s_mv.i2_mvy;
10536
10537
                                /*register the min cost for l1 me in blk context */
10538
1.77M
                                wt = ps_ctxt->s_wt_pred.a_wpred_wt[ref_id_of_nearest_poc];
10539
1.77M
                                log_wt = ps_ctxt->s_wt_pred.wpred_log_wdc;
10540
10541
                                /*register the min cost for l1 me in blk context */
10542
1.77M
                                ps_ed_ctb_l1_curr->i4_sad_me_for_ref[z_scan_idx >> 2] =
10543
1.77M
                                    ((ps_search_results
10544
1.77M
                                          ->aps_part_results[ref_id_of_nearest_poc][PART_ID_2Nx2N]
10545
1.77M
                                          ->i4_sad *
10546
1.77M
                                      wt) +
10547
1.77M
                                     ((1 << log_wt) >> 1)) >>
10548
1.77M
                                    log_wt;
10549
1.77M
                                ps_ed_ctb_l1_curr->i4_sad_cost_me_for_ref[z_scan_idx >> 2] =
10550
1.77M
                                    ps_ed_ctb_l1_curr->i4_sad_me_for_ref[z_scan_idx >> 2] +
10551
1.77M
                                    (ps_search_results
10552
1.77M
                                         ->aps_part_results[ref_id_of_nearest_poc][PART_ID_2Nx2N]
10553
1.77M
                                         ->i4_tot_cost -
10554
1.77M
                                     ps_search_results
10555
1.77M
                                         ->aps_part_results[ref_id_of_nearest_poc][PART_ID_2Nx2N]
10556
1.77M
                                         ->i4_sad);
10557
                                /*for complexity change detection*/
10558
1.77M
                                ps_ctxt->i4_num_blks++;
10559
1.77M
                                if(ps_ed_ctb_l1_curr->i4_sad_cost_me_for_ref[z_scan_idx >> 2] >
10560
1.77M
                                   (8 /*blk width*/ * 8 /*blk height*/ * (1 + ps_ctxt->num_b_frms)))
10561
284k
                                {
10562
284k
                                    ps_ctxt->i4_num_blks_high_sad++;
10563
284k
                                }
10564
1.77M
                            }
10565
1.77M
                        }
10566
2.29M
                    }
10567
10568
                    /* EIID: Early inter intra decisions */
10569
                    /* tap L1 level SAD for inter intra decisions */
10570
2.31M
                    if((e_me_quality_presets >= ME_MEDIUM_SPEED) &&
10571
2.31M
                       (!ps_ctxt->s_frm_prms
10572
1.59M
                             .is_i_pic))  //for high-quality preset->disable early decisions
10573
1.59M
                    {
10574
1.59M
                        if(1 == ps_refine_prms->i4_layer_id)
10575
1.58M
                        {
10576
1.58M
                            WORD32 i4_min_sad_cost_8x8_block = min_cost;
10577
1.58M
                            ihevce_ed_blk_t *ps_curr_ed_blk_ctxt;
10578
1.58M
                            WORD32 i4_diff_col_ctr = blk_x - (i4_ctb_blk_ctr * 4);
10579
1.58M
                            WORD32 i4_diff_row_ctr = blk_y - (i4_ctb_row_ctr * 4);
10580
1.58M
                            WORD32 z_scan_idx =
10581
1.58M
                                gau1_raster_scan_to_ctb[i4_diff_row_ctr][i4_diff_col_ctr];
10582
1.58M
                            ps_curr_ed_blk_ctxt = ps_ed_blk_ctxt_curr_ctb + z_scan_idx;
10583
10584
                            /*register the min cost for l1 me in blk context */
10585
1.58M
                            ps_ed_ctb_l1_curr->i4_best_sad_cost_8x8_l1_me[z_scan_idx >> 2] =
10586
1.58M
                                i4_min_sad_cost_8x8_block;
10587
1.58M
                            i4_num_comparisions++;
10588
10589
                            /* take early inter-intra decision here */
10590
1.58M
                            ps_curr_ed_blk_ctxt->intra_or_inter = 3; /*init saying eval both */
10591
1.58M
#if DISABLE_INTRA_IN_BPICS
10592
1.58M
                            if((e_me_quality_presets == ME_XTREME_SPEED_25) &&
10593
1.58M
                               (ps_ctxt->s_frm_prms.i4_temporal_layer_id > TEMPORAL_LAYER_DISABLE))
10594
222k
                            {
10595
222k
                                ps_curr_ed_blk_ctxt->intra_or_inter =
10596
222k
                                    2; /*eval only inter if inter cost is less */
10597
222k
                                i4_num_inter_wins++;
10598
222k
                            }
10599
1.36M
                            else
10600
1.36M
#endif
10601
1.36M
                            {
10602
1.36M
                                if(ps_ed_ctb_l1_curr->i4_best_sad_cost_8x8_l1_me[z_scan_idx >> 2] <
10603
1.36M
                                   ((ps_ed_ctb_l1_curr->i4_best_sad_cost_8x8_l1_ipe[z_scan_idx >> 2] *
10604
1.36M
                                     i4_threshold_multiplier) /
10605
1.36M
                                    i4_threshold_divider))
10606
605k
                                {
10607
605k
                                    ps_curr_ed_blk_ctxt->intra_or_inter =
10608
605k
                                        2; /*eval only inter if inter cost is less */
10609
605k
                                    i4_num_inter_wins++;
10610
605k
                                }
10611
1.36M
                            }
10612
10613
                            //{
10614
                            //  DBG_PRINTF ("(blk x, blk y):(%d, %d)\t me:(ctb_x, ctb_y):(%d, %d)\t intra_SAD_COST: %d\tInter_SAD_COST: %d\n",
10615
                            //      blk_x,blk_y,
10616
                            //      i4_ctb_blk_ctr, i4_ctb_row_ctr,
10617
                            //      ps_curr_ed_blk_ctxt->i4_best_sad_8x8_l1_ipe,
10618
                            //      i4_min_sad_cost_8x8_block
10619
                            //      );
10620
                            //}
10621
10622
1.58M
                        }  //end of layer-1
10623
1.59M
                    }  //end of if (e_me_quality_presets >= ME_MEDIUM_SPEED)
10624
719k
                    else
10625
719k
                    {
10626
719k
                        if(1 == ps_refine_prms->i4_layer_id)
10627
710k
                        {
10628
710k
                            WORD32 i4_min_sad_cost_8x8_block = min_cost;
10629
710k
                            WORD32 i4_diff_col_ctr = blk_x - (i4_ctb_blk_ctr * 4);
10630
710k
                            WORD32 i4_diff_row_ctr = blk_y - (i4_ctb_row_ctr * 4);
10631
710k
                            WORD32 z_scan_idx =
10632
710k
                                gau1_raster_scan_to_ctb[i4_diff_row_ctr][i4_diff_col_ctr];
10633
10634
                            /*register the min cost for l1 me in blk context */
10635
710k
                            ps_ed_ctb_l1_curr->i4_best_sad_cost_8x8_l1_me[z_scan_idx >> 2] =
10636
710k
                                i4_min_sad_cost_8x8_block;
10637
710k
                        }
10638
719k
                    }
10639
2.31M
                    if(1 == ps_refine_prms->i4_layer_id)
10640
2.29M
                    {
10641
2.29M
                        WORD32 i4_diff_col_ctr = blk_x - (i4_ctb_blk_ctr * 4);
10642
2.29M
                        WORD32 i4_diff_row_ctr = blk_y - (i4_ctb_row_ctr * 4);
10643
2.29M
                        WORD32 z_scan_idx =
10644
2.29M
                            gau1_raster_scan_to_ctb[i4_diff_row_ctr][i4_diff_col_ctr];
10645
10646
2.29M
                        ps_ed_ctb_l1_curr->i4_best_sad_8x8_l1_me_for_decide[z_scan_idx >> 2] =
10647
2.29M
                            min_sad;
10648
10649
2.29M
                        if(min_cost <
10650
2.29M
                           ps_ed_ctb_l1_curr->i4_best_sad_cost_8x8_l1_ipe[z_scan_idx >> 2])
10651
1.91M
                        {
10652
1.91M
                            ps_ctxt->i4_L1_hme_best_cost += min_cost;
10653
1.91M
                            ps_ctxt->i4_L1_hme_sad += min_sad;
10654
1.91M
                            ps_ed_ctb_l1_curr->i4_best_sad_8x8_l1_me[z_scan_idx >> 2] = min_sad;
10655
1.91M
                        }
10656
377k
                        else
10657
377k
                        {
10658
377k
                            ps_ctxt->i4_L1_hme_best_cost +=
10659
377k
                                ps_ed_ctb_l1_curr->i4_best_sad_cost_8x8_l1_ipe[z_scan_idx >> 2];
10660
377k
                            ps_ctxt->i4_L1_hme_sad +=
10661
377k
                                ps_ed_ctb_l1_curr->i4_best_sad_8x8_l1_ipe[z_scan_idx >> 2];
10662
377k
                            ps_ed_ctb_l1_curr->i4_best_sad_8x8_l1_me[z_scan_idx >> 2] =
10663
377k
                                ps_ed_ctb_l1_curr->i4_best_sad_8x8_l1_ipe[z_scan_idx >> 2];
10664
377k
                        }
10665
2.29M
                    }
10666
2.31M
                }
10667
2.31M
            }
10668
10669
            /* Update the number of blocks processed in the current row */
10670
2.31M
            if((ME_MEDIUM_SPEED > e_me_quality_presets))
10671
719k
            {
10672
719k
                ihevce_dmgr_set_row_row_sync(
10673
719k
                    pv_hme_dep_mngr,
10674
719k
                    (i4_ctb_x + 1),
10675
719k
                    blk_y,
10676
719k
                    0 /* Col Tile No. : Not supported in PreEnc*/);
10677
719k
            }
10678
2.31M
        }
10679
10680
        /* set the output dependency after completion of row */
10681
544k
        ihevce_pre_enc_grp_job_set_out_dep(ps_multi_thrd_ctxt, ps_job, i4_ping_pong);
10682
544k
    }
10683
179k
}