Coverage Report

Created: 2023-09-25 07:43

/src/libhevc/encoder/hme_refine.c
Line
Count
Source (jump to first uncovered line)
1
/******************************************************************************
2
 *
3
 * Copyright (C) 2018 The Android Open Source Project
4
 *
5
 * Licensed under the Apache License, Version 2.0 (the "License");
6
 * you may not use this file except in compliance with the License.
7
 * You may obtain a copy of the License at:
8
 *
9
 * http://www.apache.org/licenses/LICENSE-2.0
10
 *
11
 * Unless required by applicable law or agreed to in writing, software
12
 * distributed under the License is distributed on an "AS IS" BASIS,
13
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
 * See the License for the specific language governing permissions and
15
 * limitations under the License.
16
 *
17
 *****************************************************************************
18
 * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
19
*/
20
/**
21
******************************************************************************
22
* @file hme_refine.c
23
*
24
* @brief
25
*    Contains the implementation of the refinement layer searches and related
26
*    functionality like CU merge.
27
*
28
* @author
29
*    Ittiam
30
*
31
*
32
* List of Functions
33
*
34
*
35
******************************************************************************
36
*/
37
38
/*****************************************************************************/
39
/* File Includes                                                             */
40
/*****************************************************************************/
41
/* System include files */
42
#include <stdio.h>
43
#include <string.h>
44
#include <stdlib.h>
45
#include <assert.h>
46
#include <stdarg.h>
47
#include <math.h>
48
#include <limits.h>
49
50
/* User include files */
51
#include "ihevc_typedefs.h"
52
#include "itt_video_api.h"
53
#include "ihevce_api.h"
54
55
#include "rc_cntrl_param.h"
56
#include "rc_frame_info_collector.h"
57
#include "rc_look_ahead_params.h"
58
59
#include "ihevc_defs.h"
60
#include "ihevc_structs.h"
61
#include "ihevc_platform_macros.h"
62
#include "ihevc_deblk.h"
63
#include "ihevc_itrans_recon.h"
64
#include "ihevc_chroma_itrans_recon.h"
65
#include "ihevc_chroma_intra_pred.h"
66
#include "ihevc_intra_pred.h"
67
#include "ihevc_inter_pred.h"
68
#include "ihevc_mem_fns.h"
69
#include "ihevc_padding.h"
70
#include "ihevc_weighted_pred.h"
71
#include "ihevc_sao.h"
72
#include "ihevc_resi_trans.h"
73
#include "ihevc_quant_iquant_ssd.h"
74
#include "ihevc_cabac_tables.h"
75
76
#include "ihevce_defs.h"
77
#include "ihevce_lap_enc_structs.h"
78
#include "ihevce_multi_thrd_structs.h"
79
#include "ihevce_multi_thrd_funcs.h"
80
#include "ihevce_me_common_defs.h"
81
#include "ihevce_had_satd.h"
82
#include "ihevce_error_codes.h"
83
#include "ihevce_bitstream.h"
84
#include "ihevce_cabac.h"
85
#include "ihevce_rdoq_macros.h"
86
#include "ihevce_function_selector.h"
87
#include "ihevce_enc_structs.h"
88
#include "ihevce_entropy_structs.h"
89
#include "ihevce_cmn_utils_instr_set_router.h"
90
#include "ihevce_enc_loop_structs.h"
91
#include "ihevce_bs_compute_ctb.h"
92
#include "ihevce_global_tables.h"
93
#include "ihevce_dep_mngr_interface.h"
94
#include "hme_datatype.h"
95
#include "hme_interface.h"
96
#include "hme_common_defs.h"
97
#include "hme_defs.h"
98
#include "ihevce_me_instr_set_router.h"
99
#include "hme_globals.h"
100
#include "hme_utils.h"
101
#include "hme_coarse.h"
102
#include "hme_fullpel.h"
103
#include "hme_subpel.h"
104
#include "hme_refine.h"
105
#include "hme_err_compute.h"
106
#include "hme_common_utils.h"
107
#include "hme_search_algo.h"
108
#include "ihevce_stasino_helpers.h"
109
#include "ihevce_common_utils.h"
110
111
/*****************************************************************************/
112
/* Globals                                                                   */
113
/*****************************************************************************/
114
115
/* brief: mapping buffer to convert raster scan indices into z-scan oder in a ctb */
116
UWORD8 gau1_raster_scan_to_ctb[4][4] = {
117
    { 0, 4, 16, 20 }, { 8, 12, 24, 28 }, { 32, 36, 48, 52 }, { 40, 44, 56, 60 }
118
};
119
120
/*****************************************************************************/
121
/* Extern Fucntion declaration                                               */
122
/*****************************************************************************/
123
extern ctb_boundary_attrs_t *
124
    get_ctb_attrs(S32 ctb_start_x, S32 ctb_start_y, S32 pic_wd, S32 pic_ht, me_frm_ctxt_t *ps_ctxt);
125
126
typedef void (*PF_HME_PROJECT_COLOC_CANDT_FXN)(
127
    search_node_t *ps_search_node,
128
    layer_ctxt_t *ps_curr_layer,
129
    layer_ctxt_t *ps_coarse_layer,
130
    S32 i4_pos_x,
131
    S32 i4_pos_y,
132
    S08 i1_ref_id,
133
    S32 i4_result_id);
134
135
typedef void (*PF_HME_PROJECT_COLOC_CANDT_L0_ME_FXN)(
136
    search_node_t *ps_search_node,
137
    layer_ctxt_t *ps_curr_layer,
138
    layer_ctxt_t *ps_coarse_layer,
139
    S32 i4_pos_x,
140
    S32 i4_pos_y,
141
    S32 i4_num_act_ref_l0,
142
    U08 u1_pred_dir,
143
    U08 u1_default_ref_id,
144
    S32 i4_result_id);
145
146
/*****************************************************************************/
147
/* Function Definitions                                                      */
148
/*****************************************************************************/
149
150
void ihevce_no_wt_copy(
151
    coarse_me_ctxt_t *ps_ctxt,
152
    layer_ctxt_t *ps_curr_layer,
153
    pu_t *ps_pu,
154
    UWORD8 *pu1_temp_pred,
155
    WORD32 temp_stride,
156
    WORD32 blk_x,
157
    WORD32 blk_y)
158
0
{
159
0
    UWORD8 *pu1_ref;
160
0
    WORD32 ref_stride, ref_offset;
161
0
    WORD32 row, col, i4_tmp;
162
163
0
    ASSERT((ps_pu->b2_pred_mode == PRED_L0) || (ps_pu->b2_pred_mode == PRED_L1));
164
165
0
    if(ps_pu->b2_pred_mode == PRED_L0)
166
0
    {
167
0
        WORD8 i1_ref_idx;
168
169
0
        i1_ref_idx = ps_pu->mv.i1_l0_ref_idx;
170
0
        pu1_ref = ps_curr_layer->ppu1_list_inp[i1_ref_idx];
171
172
0
        ref_stride = ps_curr_layer->i4_inp_stride;
173
174
0
        ref_offset = ((blk_y << 3) + ps_pu->mv.s_l0_mv.i2_mvy) * ref_stride;
175
0
        ref_offset += (blk_x << 3) + ps_pu->mv.s_l0_mv.i2_mvx;
176
177
0
        pu1_ref += ref_offset;
178
179
0
        for(row = 0; row < temp_stride; row++)
180
0
        {
181
0
            for(col = 0; col < temp_stride; col++)
182
0
            {
183
0
                i4_tmp = pu1_ref[col];
184
0
                pu1_temp_pred[col] = CLIP_U8(i4_tmp);
185
0
            }
186
187
0
            pu1_ref += ref_stride;
188
0
            pu1_temp_pred += temp_stride;
189
0
        }
190
0
    }
191
0
    else
192
0
    {
193
0
        WORD8 i1_ref_idx;
194
195
0
        i1_ref_idx = ps_pu->mv.i1_l1_ref_idx;
196
0
        pu1_ref = ps_curr_layer->ppu1_list_inp[i1_ref_idx];
197
198
0
        ref_stride = ps_curr_layer->i4_inp_stride;
199
200
0
        ref_offset = ((blk_y << 3) + ps_pu->mv.s_l1_mv.i2_mvy) * ref_stride;
201
0
        ref_offset += (blk_x << 3) + ps_pu->mv.s_l1_mv.i2_mvx;
202
203
0
        pu1_ref += ref_offset;
204
205
0
        for(row = 0; row < temp_stride; row++)
206
0
        {
207
0
            for(col = 0; col < temp_stride; col++)
208
0
            {
209
0
                i4_tmp = pu1_ref[col];
210
0
                pu1_temp_pred[col] = CLIP_U8(i4_tmp);
211
0
            }
212
213
0
            pu1_ref += ref_stride;
214
0
            pu1_temp_pred += temp_stride;
215
0
        }
216
0
    }
217
0
}
218
219
static WORD32 hme_add_clustered_mvs_as_merge_cands(
220
    cluster_data_t *ps_cluster_base,
221
    search_node_t *ps_merge_cand,
222
    range_prms_t **pps_range_prms,
223
    U08 *pu1_refid_to_pred_dir_list,
224
    WORD32 i4_num_clusters,
225
    U08 u1_pred_dir)
226
0
{
227
0
    WORD32 i, j, k;
228
0
    WORD32 i4_num_cands_added = 0;
229
0
    WORD32 i4_num_mvs_in_cluster;
230
231
0
    for(i = 0; i < i4_num_clusters; i++)
232
0
    {
233
0
        cluster_data_t *ps_data = &ps_cluster_base[i];
234
235
0
        if(u1_pred_dir == !pu1_refid_to_pred_dir_list[ps_data->ref_id])
236
0
        {
237
0
            i4_num_mvs_in_cluster = ps_data->num_mvs;
238
239
0
            for(j = 0; j < i4_num_mvs_in_cluster; j++)
240
0
            {
241
0
                ps_merge_cand[i4_num_cands_added].s_mv.i2_mvx = ps_data->as_mv[j].mvx;
242
0
                ps_merge_cand[i4_num_cands_added].s_mv.i2_mvy = ps_data->as_mv[j].mvy;
243
0
                ps_merge_cand[i4_num_cands_added].i1_ref_idx = ps_data->ref_id;
244
245
0
                CLIP_MV_WITHIN_RANGE(
246
0
                    ps_merge_cand[i4_num_cands_added].s_mv.i2_mvx,
247
0
                    ps_merge_cand[i4_num_cands_added].s_mv.i2_mvy,
248
0
                    pps_range_prms[ps_data->ref_id],
249
0
                    0,
250
0
                    0,
251
0
                    0);
252
253
0
                for(k = 0; k < i4_num_cands_added; k++)
254
0
                {
255
0
                    if((ps_merge_cand[k].s_mv.i2_mvx == ps_data->as_mv[j].mvx) &&
256
0
                       (ps_merge_cand[k].s_mv.i2_mvy == ps_data->as_mv[j].mvy) &&
257
0
                       (ps_merge_cand[k].i1_ref_idx == ps_data->ref_id))
258
0
                    {
259
0
                        break;
260
0
                    }
261
0
                }
262
263
0
                if(k == i4_num_cands_added)
264
0
                {
265
0
                    i4_num_cands_added++;
266
0
                }
267
0
            }
268
0
        }
269
0
    }
270
271
0
    return i4_num_cands_added;
272
0
}
273
274
static WORD32 hme_add_me_best_as_merge_cands(
275
    search_results_t **pps_child_data_array,
276
    inter_cu_results_t *ps_8x8cu_results,
277
    search_node_t *ps_merge_cand,
278
    range_prms_t **pps_range_prms,
279
    U08 *pu1_refid_to_pred_dir_list,
280
    S08 *pi1_past_list,
281
    S08 *pi1_future_list,
282
    BLK_SIZE_T e_blk_size,
283
    ME_QUALITY_PRESETS_T e_quality_preset,
284
    S32 i4_num_cands_added,
285
    U08 u1_pred_dir)
286
0
{
287
0
    WORD32 i, j, k;
288
0
    WORD32 i4_max_cands_to_add;
289
290
0
    WORD32 i4_result_id = 0;
291
292
0
    ASSERT(!pps_child_data_array[0]->u1_split_flag || (BLK_64x64 != e_blk_size));
293
0
    ASSERT(!pps_child_data_array[1]->u1_split_flag || (BLK_64x64 != e_blk_size));
294
0
    ASSERT(!pps_child_data_array[2]->u1_split_flag || (BLK_64x64 != e_blk_size));
295
0
    ASSERT(!pps_child_data_array[3]->u1_split_flag || (BLK_64x64 != e_blk_size));
296
297
0
    switch(e_quality_preset)
298
0
    {
299
0
    case ME_PRISTINE_QUALITY:
300
0
    {
301
0
        i4_max_cands_to_add = MAX_MERGE_CANDTS;
302
303
0
        break;
304
0
    }
305
0
    case ME_HIGH_QUALITY:
306
0
    {
307
        /* All 4 children are split and each grandchild contributes an MV */
308
        /* and 2 best results per grandchild */
309
0
        i4_max_cands_to_add = 4 * 4 * 2;
310
311
0
        break;
312
0
    }
313
0
    case ME_MEDIUM_SPEED:
314
0
    {
315
0
        i4_max_cands_to_add = 4 * 2 * 2;
316
317
0
        break;
318
0
    }
319
0
    case ME_HIGH_SPEED:
320
0
    case ME_XTREME_SPEED:
321
0
    case ME_XTREME_SPEED_25:
322
0
    {
323
0
        i4_max_cands_to_add = 4 * 2 * 1;
324
325
0
        break;
326
0
    }
327
0
    }
328
329
0
    while(i4_result_id < 4)
330
0
    {
331
0
        for(i = 0; i < 4; i++)
332
0
        {
333
0
            inter_cu_results_t *ps_child_data = pps_child_data_array[i]->ps_cu_results;
334
0
            inter_cu_results_t *ps_grandchild_data = &ps_8x8cu_results[i << 2];
335
336
0
            if(!pps_child_data_array[i]->u1_split_flag)
337
0
            {
338
0
                part_type_results_t *ps_data = &ps_child_data->ps_best_results[i4_result_id];
339
340
0
                if(ps_child_data->u1_num_best_results <= i4_result_id)
341
0
                {
342
0
                    continue;
343
0
                }
344
345
0
                if(ps_data->as_pu_results->pu.b1_intra_flag)
346
0
                {
347
0
                    continue;
348
0
                }
349
350
0
                for(j = 0; j <= (ps_data->u1_part_type != PRT_2Nx2N); j++)
351
0
                {
352
0
                    mv_t *ps_mv;
353
354
0
                    S08 i1_ref_idx;
355
356
0
                    pu_t *ps_pu = &ps_data->as_pu_results[j].pu;
357
358
0
                    if(u1_pred_dir !=
359
0
                       ((ps_pu->b2_pred_mode == 2) ? u1_pred_dir : ps_pu->b2_pred_mode))
360
0
                    {
361
0
                        continue;
362
0
                    }
363
364
0
                    if(u1_pred_dir)
365
0
                    {
366
0
                        ps_mv = &ps_pu->mv.s_l1_mv;
367
0
                        i1_ref_idx = pi1_future_list[ps_pu->mv.i1_l1_ref_idx];
368
0
                    }
369
0
                    else
370
0
                    {
371
0
                        ps_mv = &ps_pu->mv.s_l0_mv;
372
0
                        i1_ref_idx = pi1_past_list[ps_pu->mv.i1_l0_ref_idx];
373
0
                    }
374
375
0
                    if(-1 == i1_ref_idx)
376
0
                    {
377
0
                        continue;
378
0
                    }
379
380
0
                    ps_merge_cand[i4_num_cands_added].s_mv.i2_mvx = ps_mv->i2_mvx;
381
0
                    ps_merge_cand[i4_num_cands_added].s_mv.i2_mvy = ps_mv->i2_mvy;
382
0
                    ps_merge_cand[i4_num_cands_added].i1_ref_idx = i1_ref_idx;
383
384
0
                    CLIP_MV_WITHIN_RANGE(
385
0
                        ps_merge_cand[i4_num_cands_added].s_mv.i2_mvx,
386
0
                        ps_merge_cand[i4_num_cands_added].s_mv.i2_mvy,
387
0
                        pps_range_prms[i1_ref_idx],
388
0
                        0,
389
0
                        0,
390
0
                        0);
391
392
0
                    for(k = 0; k < i4_num_cands_added; k++)
393
0
                    {
394
0
                        if((ps_merge_cand[k].s_mv.i2_mvx == ps_mv->i2_mvx) &&
395
0
                           (ps_merge_cand[k].s_mv.i2_mvy == ps_mv->i2_mvy) &&
396
0
                           (ps_merge_cand[k].i1_ref_idx == i1_ref_idx))
397
0
                        {
398
0
                            break;
399
0
                        }
400
0
                    }
401
402
0
                    if(k == i4_num_cands_added)
403
0
                    {
404
0
                        i4_num_cands_added++;
405
406
0
                        if(i4_max_cands_to_add <= i4_num_cands_added)
407
0
                        {
408
0
                            return i4_num_cands_added;
409
0
                        }
410
0
                    }
411
0
                }
412
0
            }
413
0
            else
414
0
            {
415
0
                for(j = 0; j < 4; j++)
416
0
                {
417
0
                    mv_t *ps_mv;
418
419
0
                    S08 i1_ref_idx;
420
421
0
                    part_type_results_t *ps_data = ps_grandchild_data[j].ps_best_results;
422
0
                    pu_t *ps_pu = &ps_data->as_pu_results[0].pu;
423
424
0
                    ASSERT(ps_data->u1_part_type == PRT_2Nx2N);
425
426
0
                    if(ps_grandchild_data[j].u1_num_best_results <= i4_result_id)
427
0
                    {
428
0
                        continue;
429
0
                    }
430
431
0
                    if(ps_data->as_pu_results->pu.b1_intra_flag)
432
0
                    {
433
0
                        continue;
434
0
                    }
435
436
0
                    if(u1_pred_dir !=
437
0
                       ((ps_pu->b2_pred_mode == 2) ? u1_pred_dir : ps_pu->b2_pred_mode))
438
0
                    {
439
0
                        continue;
440
0
                    }
441
442
0
                    if(u1_pred_dir)
443
0
                    {
444
0
                        ps_mv = &ps_pu->mv.s_l1_mv;
445
0
                        i1_ref_idx = pi1_future_list[ps_pu->mv.i1_l1_ref_idx];
446
0
                    }
447
0
                    else
448
0
                    {
449
0
                        ps_mv = &ps_pu->mv.s_l0_mv;
450
0
                        i1_ref_idx = pi1_past_list[ps_pu->mv.i1_l0_ref_idx];
451
0
                    }
452
453
0
                    ps_merge_cand[i4_num_cands_added].s_mv.i2_mvx = ps_mv->i2_mvx;
454
0
                    ps_merge_cand[i4_num_cands_added].s_mv.i2_mvy = ps_mv->i2_mvy;
455
0
                    ps_merge_cand[i4_num_cands_added].i1_ref_idx = i1_ref_idx;
456
457
0
                    CLIP_MV_WITHIN_RANGE(
458
0
                        ps_merge_cand[i4_num_cands_added].s_mv.i2_mvx,
459
0
                        ps_merge_cand[i4_num_cands_added].s_mv.i2_mvy,
460
0
                        pps_range_prms[i1_ref_idx],
461
0
                        0,
462
0
                        0,
463
0
                        0);
464
465
0
                    for(k = 0; k < i4_num_cands_added; k++)
466
0
                    {
467
0
                        if((ps_merge_cand[k].s_mv.i2_mvx == ps_mv->i2_mvx) &&
468
0
                           (ps_merge_cand[k].s_mv.i2_mvy == ps_mv->i2_mvy) &&
469
0
                           (ps_merge_cand[k].i1_ref_idx == i1_ref_idx))
470
0
                        {
471
0
                            break;
472
0
                        }
473
0
                    }
474
475
0
                    if(k == i4_num_cands_added)
476
0
                    {
477
0
                        i4_num_cands_added++;
478
479
0
                        if(i4_max_cands_to_add <= i4_num_cands_added)
480
0
                        {
481
0
                            return i4_num_cands_added;
482
0
                        }
483
0
                    }
484
0
                }
485
0
            }
486
0
        }
487
488
0
        i4_result_id++;
489
0
    }
490
491
0
    return i4_num_cands_added;
492
0
}
493
494
WORD32 hme_add_cands_for_merge_eval(
495
    ctb_cluster_info_t *ps_cluster_info,
496
    search_results_t **pps_child_data_array,
497
    inter_cu_results_t *ps_8x8cu_results,
498
    range_prms_t **pps_range_prms,
499
    search_node_t *ps_merge_cand,
500
    U08 *pu1_refid_to_pred_dir_list,
501
    S08 *pi1_past_list,
502
    S08 *pi1_future_list,
503
    ME_QUALITY_PRESETS_T e_quality_preset,
504
    BLK_SIZE_T e_blk_size,
505
    U08 u1_pred_dir,
506
    U08 u1_blk_id)
507
0
{
508
0
    WORD32 i4_num_cands_added = 0;
509
510
0
    if(ME_PRISTINE_QUALITY == e_quality_preset)
511
0
    {
512
0
        cluster_data_t *ps_cluster_primo;
513
514
0
        WORD32 i4_num_clusters;
515
516
0
        if(BLK_32x32 == e_blk_size)
517
0
        {
518
0
            ps_cluster_primo = ps_cluster_info->ps_32x32_blk[u1_blk_id].as_cluster_data;
519
0
            i4_num_clusters = ps_cluster_info->ps_32x32_blk[u1_blk_id].num_clusters;
520
0
        }
521
0
        else
522
0
        {
523
0
            ps_cluster_primo = ps_cluster_info->ps_64x64_blk->as_cluster_data;
524
0
            i4_num_clusters = ps_cluster_info->ps_64x64_blk->num_clusters;
525
0
        }
526
527
0
        i4_num_cands_added = hme_add_clustered_mvs_as_merge_cands(
528
0
            ps_cluster_primo,
529
0
            ps_merge_cand,
530
0
            pps_range_prms,
531
0
            pu1_refid_to_pred_dir_list,
532
0
            i4_num_clusters,
533
0
            u1_pred_dir);
534
0
    }
535
536
0
    i4_num_cands_added = hme_add_me_best_as_merge_cands(
537
0
        pps_child_data_array,
538
0
        ps_8x8cu_results,
539
0
        ps_merge_cand,
540
0
        pps_range_prms,
541
0
        pu1_refid_to_pred_dir_list,
542
0
        pi1_past_list,
543
0
        pi1_future_list,
544
0
        e_blk_size,
545
0
        e_quality_preset,
546
0
        i4_num_cands_added,
547
0
        u1_pred_dir);
548
549
0
    return i4_num_cands_added;
550
0
}
551
552
/**
553
********************************************************************************
554
*  @fn   void hme_pick_refine_merge_candts(hme_merge_prms_t *ps_merge_prms,
555
*                                           S08 i1_ref_idx,
556
*                                           S32 i4_best_part_type,
557
*                                           S32 i4_is_vert)
558
*
559
*  @brief  Given a target partition orientation in the merged CU, and the
560
*          partition type of most likely partition this fxn picks up
561
*          candidates from the 4 constituent CUs and does refinement search
562
*          to identify best results for the merge CU across active partitions
563
*
564
*  @param[in,out] ps_merge_prms : Parameters sent from higher layers. Out of
565
*                  these params, the search result structure is also derived and
566
*                 updated during the search
567
*
568
*  @param[in] i1_ref_idx : ID of the buffer within the search results to update.
569
*               Will be 0 if all refidx collapsed to one buf, else it'll be 0/1
570
*
571
*  @param[in] i4_best_part_type : partition type of potential partition in the
572
*              merged CU, -1 if the merge process has not yet been able to
573
*              determine this.
574
*
575
*  @param[in] i4_is_vert : Whether target partition of merged CU is vertical
576
*             orientation or horizontal orientation.
577
*
578
*  @return Number of merge candidates
579
********************************************************************************
580
*/
581
WORD32 hme_pick_eval_merge_candts(
582
    hme_merge_prms_t *ps_merge_prms,
583
    hme_subpel_prms_t *ps_subpel_prms,
584
    S32 i4_search_idx,
585
    S32 i4_best_part_type,
586
    S32 i4_is_vert,
587
    wgt_pred_ctxt_t *ps_wt_inp_prms,
588
    S32 i4_frm_qstep,
589
    ihevce_cmn_opt_func_t *ps_cmn_utils_optimised_function_list,
590
    ihevce_me_optimised_function_list_t *ps_me_optimised_function_list)
591
0
{
592
0
    S32 x_off, y_off;
593
0
    search_node_t *ps_search_node;
594
0
    S32 ai4_valid_part_ids[TOT_NUM_PARTS + 1];
595
0
    S32 i4_num_valid_parts;
596
0
    pred_ctxt_t *ps_pred_ctxt;
597
598
0
    search_node_t as_merge_unique_node[MAX_MERGE_CANDTS];
599
0
    S32 num_unique_nodes_cu_merge = 0;
600
601
0
    search_results_t *ps_search_results = ps_merge_prms->ps_results_merge;
602
0
    CU_SIZE_T e_cu_size = ps_search_results->e_cu_size;
603
0
    S32 i4_part_mask = ps_search_results->i4_part_mask;
604
605
0
    search_results_t *aps_child_results[4];
606
0
    layer_ctxt_t *ps_curr_layer = ps_merge_prms->ps_layer_ctxt;
607
608
0
    S32 i4_ref_stride, i, j;
609
0
    result_upd_prms_t s_result_prms;
610
611
0
    BLK_SIZE_T e_blk_size = ge_cu_size_to_blk_size[e_cu_size];
612
0
    S32 i4_offset;
613
614
    /*************************************************************************/
615
    /* Function pointer for SAD/SATD, array and prms structure to pass to    */
616
    /* This function                                                         */
617
    /*************************************************************************/
618
0
    PF_SAD_FXN_T pf_err_compute;
619
0
    S32 ai4_sad_grid[9][17];
620
0
    err_prms_t s_err_prms;
621
622
    /*************************************************************************/
623
    /* Allowed MV RANGE                                                      */
624
    /*************************************************************************/
625
0
    range_prms_t **pps_range_prms = ps_merge_prms->aps_mv_range;
626
0
    PF_INTERP_FXN_T pf_qpel_interp;
627
0
    PF_MV_COST_FXN pf_mv_cost_compute;
628
0
    WORD32 pred_lx;
629
0
    U08 *apu1_hpel_ref[4];
630
631
0
    interp_prms_t s_interp_prms;
632
0
    S32 i4_interp_buf_id;
633
634
0
    S32 i4_ctb_x_off = ps_merge_prms->i4_ctb_x_off;
635
0
    S32 i4_ctb_y_off = ps_merge_prms->i4_ctb_y_off;
636
637
    /* Sanity checks */
638
0
    ASSERT((e_blk_size == BLK_64x64) || (e_blk_size == BLK_32x32));
639
640
0
    s_err_prms.ps_cmn_utils_optimised_function_list = ps_cmn_utils_optimised_function_list;
641
642
    /* Initialize all the ptrs to child CUs for merge decision */
643
0
    aps_child_results[0] = ps_merge_prms->ps_results_tl;
644
0
    aps_child_results[1] = ps_merge_prms->ps_results_tr;
645
0
    aps_child_results[2] = ps_merge_prms->ps_results_bl;
646
0
    aps_child_results[3] = ps_merge_prms->ps_results_br;
647
648
0
    num_unique_nodes_cu_merge = 0;
649
650
0
    pf_mv_cost_compute = compute_mv_cost_implicit_high_speed;
651
652
0
    if(ME_PRISTINE_QUALITY == ps_merge_prms->e_quality_preset)
653
0
    {
654
0
        num_unique_nodes_cu_merge = hme_add_cands_for_merge_eval(
655
0
            ps_merge_prms->ps_cluster_info,
656
0
            aps_child_results,
657
0
            ps_merge_prms->ps_8x8_cu_results,
658
0
            pps_range_prms,
659
0
            as_merge_unique_node,
660
0
            ps_search_results->pu1_is_past,
661
0
            ps_merge_prms->pi1_past_list,
662
0
            ps_merge_prms->pi1_future_list,
663
0
            ps_merge_prms->e_quality_preset,
664
0
            e_blk_size,
665
0
            i4_search_idx,
666
0
            (ps_merge_prms->ps_results_merge->u1_x_off >> 5) +
667
0
                (ps_merge_prms->ps_results_merge->u1_y_off >> 4));
668
0
    }
669
0
    else
670
0
    {
671
        /*************************************************************************/
672
        /* Populate the list of unique search nodes in the child CUs for merge   */
673
        /* evaluation                                                            */
674
        /*************************************************************************/
675
0
        for(i = 0; i < 4; i++)
676
0
        {
677
0
            search_node_t s_search_node;
678
679
0
            PART_TYPE_T e_part_type;
680
0
            PART_ID_T e_part_id;
681
682
0
            WORD32 part_num;
683
684
0
            search_results_t *ps_child = aps_child_results[i];
685
686
0
            if(ps_child->ps_cu_results->u1_num_best_results)
687
0
            {
688
0
                if(!((ps_child->ps_cu_results->ps_best_results->as_pu_results->pu.b1_intra_flag) &&
689
0
                     (1 == ps_child->ps_cu_results->u1_num_best_results)))
690
0
                {
691
0
                    e_part_type =
692
0
                        (PART_TYPE_T)ps_child->ps_cu_results->ps_best_results[0].u1_part_type;
693
694
0
                    ASSERT(num_unique_nodes_cu_merge < MAX_MERGE_CANDTS);
695
696
                    /* Insert mvs of NxN partitions. */
697
0
                    for(part_num = 0; part_num < gau1_num_parts_in_part_type[((S32)e_part_type)];
698
0
                        part_num++)
699
0
                    {
700
0
                        e_part_id = ge_part_type_to_part_id[e_part_type][part_num];
701
702
0
                        if(ps_child->aps_part_results[i4_search_idx][e_part_id]->i1_ref_idx != -1)
703
0
                        {
704
0
                            s_search_node = *ps_child->aps_part_results[i4_search_idx][e_part_id];
705
0
                            if(s_search_node.s_mv.i2_mvx != INTRA_MV)
706
0
                            {
707
0
                                CLIP_MV_WITHIN_RANGE(
708
0
                                    s_search_node.s_mv.i2_mvx,
709
0
                                    s_search_node.s_mv.i2_mvy,
710
0
                                    pps_range_prms[s_search_node.i1_ref_idx],
711
0
                                    0,
712
0
                                    0,
713
0
                                    0);
714
715
0
                                INSERT_NEW_NODE_NOMAP(
716
0
                                    as_merge_unique_node,
717
0
                                    num_unique_nodes_cu_merge,
718
0
                                    s_search_node,
719
0
                                    1);
720
0
                            }
721
0
                        }
722
0
                    }
723
0
                }
724
0
            }
725
0
            else if(!((ps_merge_prms->ps_results_grandchild[(i << 2)]
726
0
                           .ps_cu_results->ps_best_results->as_pu_results->pu.b1_intra_flag) &&
727
0
                      (1 == ps_merge_prms->ps_results_grandchild[(i << 2)]
728
0
                                .ps_cu_results->u1_num_best_results)))
729
0
            {
730
0
                search_results_t *ps_results_root = &ps_merge_prms->ps_results_grandchild[(i << 2)];
731
732
0
                for(j = 0; j < 4; j++)
733
0
                {
734
0
                    e_part_type = (PART_TYPE_T)ps_results_root[j]
735
0
                                      .ps_cu_results->ps_best_results[0]
736
0
                                      .u1_part_type;
737
738
0
                    ASSERT(num_unique_nodes_cu_merge < MAX_MERGE_CANDTS);
739
740
                    /* Insert mvs of NxN partitions. */
741
0
                    for(part_num = 0; part_num < gau1_num_parts_in_part_type[((S32)e_part_type)];
742
0
                        part_num++)
743
0
                    {
744
0
                        e_part_id = ge_part_type_to_part_id[e_part_type][part_num];
745
746
0
                        if((ps_results_root[j]
747
0
                                .aps_part_results[i4_search_idx][e_part_id]
748
0
                                ->i1_ref_idx != -1) &&
749
0
                           (!ps_child->ps_cu_results->ps_best_results->as_pu_results->pu
750
0
                                 .b1_intra_flag))
751
0
                        {
752
0
                            s_search_node =
753
0
                                *ps_results_root[j].aps_part_results[i4_search_idx][e_part_id];
754
0
                            if(s_search_node.s_mv.i2_mvx != INTRA_MV)
755
0
                            {
756
0
                                CLIP_MV_WITHIN_RANGE(
757
0
                                    s_search_node.s_mv.i2_mvx,
758
0
                                    s_search_node.s_mv.i2_mvy,
759
0
                                    pps_range_prms[s_search_node.i1_ref_idx],
760
0
                                    0,
761
0
                                    0,
762
0
                                    0);
763
764
0
                                INSERT_NEW_NODE_NOMAP(
765
0
                                    as_merge_unique_node,
766
0
                                    num_unique_nodes_cu_merge,
767
0
                                    s_search_node,
768
0
                                    1);
769
0
                            }
770
0
                        }
771
0
                    }
772
0
                }
773
0
            }
774
0
        }
775
0
    }
776
777
0
    if(0 == num_unique_nodes_cu_merge)
778
0
    {
779
0
        return 0;
780
0
    }
781
782
    /*************************************************************************/
783
    /* Appropriate Err compute fxn, depends on SAD/SATD, blk size and remains*/
784
    /* fixed through this subpel refinement for this partition.              */
785
    /* Note, we do not enable grid sads since one pt is evaluated per node   */
786
    /* Hence, part mask is also nearly dont care and we use 2Nx2N enabled.   */
787
    /*************************************************************************/
788
0
    i4_part_mask = ps_search_results->i4_part_mask;
789
790
    /* Need to add the corresponding SAD functions for EXTREME SPEED : Lokesh */
791
0
    if(ps_subpel_prms->i4_use_satd)
792
0
    {
793
0
        if(BLK_32x32 == e_blk_size)
794
0
        {
795
0
            pf_err_compute = hme_evalsatd_pt_pu_32x32;
796
0
        }
797
0
        else
798
0
        {
799
0
            pf_err_compute = hme_evalsatd_pt_pu_64x64;
800
0
        }
801
0
    }
802
0
    else
803
0
    {
804
0
        pf_err_compute = (PF_SAD_FXN_T)hme_evalsad_grid_pu_MxM;
805
0
    }
806
807
0
    i4_ref_stride = ps_curr_layer->i4_rec_stride;
808
809
0
    x_off = ps_merge_prms->ps_results_tl->u1_x_off;
810
0
    y_off = ps_merge_prms->ps_results_tl->u1_y_off;
811
0
    i4_offset = x_off + i4_ctb_x_off + ((y_off + i4_ctb_y_off) * i4_ref_stride);
812
813
    /*************************************************************************/
814
    /* This array stores the ids of the partitions whose                     */
815
    /* SADs are updated. Since the partitions whose SADs are updated may not */
816
    /* be in contiguous order, we supply another level of indirection.       */
817
    /*************************************************************************/
818
0
    i4_num_valid_parts = hme_create_valid_part_ids(i4_part_mask, ai4_valid_part_ids);
819
820
    /* Initialize result params used for partition update */
821
0
    s_result_prms.pf_mv_cost_compute = NULL;
822
0
    s_result_prms.ps_search_results = ps_search_results;
823
0
    s_result_prms.pi4_valid_part_ids = ai4_valid_part_ids;
824
0
    s_result_prms.i1_ref_idx = i4_search_idx;
825
0
    s_result_prms.i4_part_mask = i4_part_mask;
826
0
    s_result_prms.pi4_sad_grid = &ai4_sad_grid[0][0];
827
0
    s_result_prms.i4_grid_mask = 1;
828
829
    /* One time Initialization of error params used for SAD/SATD compute */
830
0
    s_err_prms.i4_inp_stride = ps_subpel_prms->i4_inp_stride;
831
0
    s_err_prms.i4_ref_stride = i4_ref_stride;
832
0
    s_err_prms.i4_part_mask = (ENABLE_2Nx2N);
833
0
    s_err_prms.i4_grid_mask = 1;
834
0
    s_err_prms.pi4_sad_grid = &ai4_sad_grid[0][0];
835
0
    s_err_prms.i4_blk_wd = gau1_blk_size_to_wd[e_blk_size];
836
0
    s_err_prms.i4_blk_ht = gau1_blk_size_to_ht[e_blk_size];
837
0
    s_err_prms.i4_step = 1;
838
839
    /*************************************************************************/
840
    /* One time preparation of non changing interpolation params.            */
841
    /*************************************************************************/
842
0
    s_interp_prms.i4_ref_stride = i4_ref_stride;
843
0
    s_interp_prms.i4_blk_wd = gau1_blk_size_to_wd[e_blk_size];
844
0
    s_interp_prms.i4_blk_ht = gau1_blk_size_to_ht[e_blk_size];
845
0
    s_interp_prms.apu1_interp_out[0] = ps_subpel_prms->pu1_wkg_mem;
846
0
    s_interp_prms.i4_out_stride = gau1_blk_size_to_wd[e_blk_size];
847
0
    i4_interp_buf_id = 0;
848
849
0
    pf_qpel_interp = ps_subpel_prms->pf_qpel_interp;
850
851
    /***************************************************************************/
852
    /* Compute SATD/SAD for all unique nodes of children CUs to get best merge */
853
    /* results                                                                 */
854
    /***************************************************************************/
855
0
    for(i = 0; i < num_unique_nodes_cu_merge; i++)
856
0
    {
857
0
        WORD8 i1_ref_idx;
858
0
        ps_search_node = &as_merge_unique_node[i];
859
860
        /*********************************************************************/
861
        /* Compute the base pointer for input, interpolated buffers          */
862
        /* The base pointers point as follows:                               */
863
        /* fx fy : 0, 0 :: fx, hy : 0, 0.5, hx, fy: 0.5, 0, hx, fy: 0.5, 0.5 */
864
        /* To these, we need to add the offset of the current node           */
865
        /*********************************************************************/
866
0
        i1_ref_idx = ps_search_node->i1_ref_idx;
867
0
        apu1_hpel_ref[0] = ps_curr_layer->ppu1_list_rec_fxfy[i1_ref_idx] + i4_offset;
868
0
        apu1_hpel_ref[1] = ps_curr_layer->ppu1_list_rec_hxfy[i1_ref_idx] + i4_offset;
869
0
        apu1_hpel_ref[2] = ps_curr_layer->ppu1_list_rec_fxhy[i1_ref_idx] + i4_offset;
870
0
        apu1_hpel_ref[3] = ps_curr_layer->ppu1_list_rec_hxhy[i1_ref_idx] + i4_offset;
871
872
0
        s_interp_prms.ppu1_ref = &apu1_hpel_ref[0];
873
874
0
        pf_qpel_interp(
875
0
            &s_interp_prms,
876
0
            ps_search_node->s_mv.i2_mvx,
877
0
            ps_search_node->s_mv.i2_mvy,
878
0
            i4_interp_buf_id);
879
880
0
        pred_lx = i4_search_idx;
881
0
        ps_pred_ctxt = &ps_search_results->as_pred_ctxt[pred_lx];
882
883
0
        s_result_prms.u1_pred_lx = pred_lx;
884
0
        s_result_prms.ps_search_node_base = ps_search_node;
885
0
        s_err_prms.pu1_inp =
886
0
            ps_wt_inp_prms->apu1_wt_inp[i1_ref_idx] + x_off + y_off * ps_subpel_prms->i4_inp_stride;
887
0
        s_err_prms.pu1_ref = s_interp_prms.pu1_final_out;
888
0
        s_err_prms.i4_ref_stride = s_interp_prms.i4_final_out_stride;
889
890
        /* Carry out the SAD/SATD. This call also does the TU RECURSION.
891
        Here the tu recursion logic is restricted with the size of the PU*/
892
0
        pf_err_compute(&s_err_prms);
893
894
0
        if(ps_subpel_prms->u1_is_cu_noisy &&
895
0
           ps_merge_prms->ps_inter_ctb_prms->i4_alpha_stim_multiplier)
896
0
        {
897
0
            ps_me_optimised_function_list->pf_compute_stim_injected_distortion_for_all_parts(
898
0
                s_err_prms.pu1_ref,
899
0
                s_err_prms.i4_ref_stride,
900
0
                ai4_valid_part_ids,
901
0
                ps_merge_prms->ps_inter_ctb_prms->pu8_part_src_sigmaX,
902
0
                ps_merge_prms->ps_inter_ctb_prms->pu8_part_src_sigmaXSquared,
903
0
                s_err_prms.pi4_sad_grid,
904
0
                ps_merge_prms->ps_inter_ctb_prms->i4_alpha_stim_multiplier,
905
0
                ps_wt_inp_prms->a_inv_wpred_wt[i1_ref_idx],
906
0
                ps_wt_inp_prms->ai4_shift_val[i1_ref_idx],
907
0
                i4_num_valid_parts,
908
0
                ps_wt_inp_prms->wpred_log_wdc,
909
0
                (BLK_32x32 == e_blk_size) ? 32 : 64);
910
0
        }
911
912
        /* Update the mv's */
913
0
        s_result_prms.i2_mv_x = ps_search_node->s_mv.i2_mvx;
914
0
        s_result_prms.i2_mv_y = ps_search_node->s_mv.i2_mvy;
915
916
        /* Update best results */
917
0
        hme_update_results_pt_pu_best1_subpel_hs(&s_err_prms, &s_result_prms);
918
0
    }
919
920
    /************************************************************************/
921
    /* Update mv cost and total cost for each valid partition in the CU     */
922
    /************************************************************************/
923
0
    for(i = 0; i < TOT_NUM_PARTS; i++)
924
0
    {
925
0
        if(i4_part_mask & (1 << i))
926
0
        {
927
0
            WORD32 j;
928
0
            WORD32 i4_mv_cost;
929
930
0
            ps_search_node = ps_search_results->aps_part_results[i4_search_idx][i];
931
932
0
            for(j = 0;
933
0
                j < MIN(ps_search_results->u1_num_results_per_part, num_unique_nodes_cu_merge);
934
0
                j++)
935
0
            {
936
0
                if(ps_search_node->i1_ref_idx != -1)
937
0
                {
938
0
                    pred_lx = i4_search_idx;
939
0
                    ps_pred_ctxt = &ps_search_results->as_pred_ctxt[pred_lx];
940
941
                    /* Prediction context should now deal with qpel units */
942
0
                    HME_SET_MVPRED_RES(ps_pred_ctxt, MV_RES_QPEL);
943
944
0
                    ps_search_node->u1_subpel_done = 1;
945
0
                    ps_search_node->u1_is_avail = 1;
946
947
0
                    i4_mv_cost =
948
0
                        pf_mv_cost_compute(ps_search_node, ps_pred_ctxt, (PART_ID_T)i, MV_RES_QPEL);
949
950
0
                    ps_search_node->i4_tot_cost = i4_mv_cost + ps_search_node->i4_sad;
951
0
                    ps_search_node->i4_mv_cost = i4_mv_cost;
952
953
0
                    ps_search_node++;
954
0
                }
955
0
            }
956
0
        }
957
0
    }
958
959
0
    return num_unique_nodes_cu_merge;
960
0
}
961
962
0
#define CU_MERGE_MAX_INTRA_PARTS 4
963
964
/**
965
********************************************************************************
966
*  @fn     hme_try_merge_high_speed
967
*
968
*  @brief  Attempts to merge 4 NxN candts to a 2Nx2N candt, either as a single
969
entity or with partititons for high speed preset
970
*
971
*  @param[in,out]  hme_merge_prms_t: Params for CU merge
972
*
973
*  @return MERGE_RESULT_T type result of merge (CU_MERGED/CU_SPLIT)
974
********************************************************************************
975
*/
976
CU_MERGE_RESULT_T hme_try_merge_high_speed(
977
    me_ctxt_t *ps_thrd_ctxt,
978
    me_frm_ctxt_t *ps_ctxt,
979
    ipe_l0_ctb_analyse_for_me_t *ps_cur_ipe_ctb,
980
    hme_subpel_prms_t *ps_subpel_prms,
981
    hme_merge_prms_t *ps_merge_prms,
982
    inter_pu_results_t *ps_pu_results,
983
    pu_result_t *ps_pu_result)
984
0
{
985
0
    search_results_t *ps_results_tl, *ps_results_tr;
986
0
    search_results_t *ps_results_bl, *ps_results_br;
987
988
0
    S32 i;
989
0
    S32 i4_search_idx;
990
0
    S32 i4_cost_parent;
991
0
    S32 intra_cu_size;
992
0
    ULWORD64 au8_final_src_sigmaX[17], au8_final_src_sigmaXSquared[17];
993
994
0
    search_results_t *ps_results_merge = ps_merge_prms->ps_results_merge;
995
0
    wgt_pred_ctxt_t *ps_wt_inp_prms = &ps_ctxt->s_wt_pred;
996
997
0
    S32 i4_part_mask = ENABLE_ALL_PARTS - ENABLE_NxN;
998
0
    S32 is_vert = 0, i4_best_part_type = -1;
999
0
    S32 i4_intra_parts = 0; /* Keeps track of intra percentage before merge */
1000
0
    S32 i4_cost_children = 0;
1001
0
    S32 i4_frm_qstep = ps_ctxt->frm_qstep;
1002
0
    S32 i4_num_merge_cands_evaluated = 0;
1003
0
    U08 u1_x_off = ps_results_merge->u1_x_off;
1004
0
    U08 u1_y_off = ps_results_merge->u1_y_off;
1005
0
    S32 i4_32x32_id = (u1_y_off >> 4) + (u1_x_off >> 5);
1006
1007
0
    ihevce_cmn_opt_func_t *ps_cmn_utils_optimised_function_list =
1008
0
        ps_thrd_ctxt->ps_cmn_utils_optimised_function_list;
1009
0
    ihevce_me_optimised_function_list_t *ps_me_optimised_function_list =
1010
0
        ((ihevce_me_optimised_function_list_t *)ps_thrd_ctxt->pv_me_optimised_function_list);
1011
0
    ps_results_tl = ps_merge_prms->ps_results_tl;
1012
0
    ps_results_tr = ps_merge_prms->ps_results_tr;
1013
0
    ps_results_bl = ps_merge_prms->ps_results_bl;
1014
0
    ps_results_br = ps_merge_prms->ps_results_br;
1015
1016
0
    if(ps_merge_prms->e_quality_preset == ME_XTREME_SPEED)
1017
0
    {
1018
0
        i4_part_mask &= ~ENABLE_AMP;
1019
0
    }
1020
1021
0
    if(ps_merge_prms->e_quality_preset == ME_XTREME_SPEED_25)
1022
0
    {
1023
0
        i4_part_mask &= ~ENABLE_AMP;
1024
1025
0
        i4_part_mask &= ~ENABLE_SMP;
1026
0
    }
1027
1028
0
    ps_merge_prms->i4_num_pred_dir_actual = 0;
1029
1030
    /*************************************************************************/
1031
    /* The logic for High speed CU merge goes as follows:                    */
1032
    /*                                                                       */
1033
    /* 1. Early exit with CU_SPLIT if sum of best partitions of children CUs */
1034
    /*    exceed 7                                                           */
1035
    /* 2. Early exit with CU_MERGE if mvs of best partitions of children CUs */
1036
    /*    are identical                                                      */
1037
    /* 3. Find the all unique mvs of best partitions of children CUs and     */
1038
    /*    evaluate partial SATDs (all 17 partitions) for each unique mv. If  */
1039
    /*    best parent cost is lower than sum of the best children costs      */
1040
    /*    return CU_MERGE after seeding the best results else return CU_SPLIT*/
1041
    /*                                                                       */
1042
    /*************************************************************************/
1043
1044
    /* Count the number of best partitions in child CUs, early exit if > 7 */
1045
0
    if((ps_merge_prms->e_quality_preset != ME_PRISTINE_QUALITY) ||
1046
0
       (CU_32x32 == ps_results_merge->e_cu_size))
1047
0
    {
1048
0
        S32 num_parts_in_32x32 = 0;
1049
0
        WORD32 i4_part_type;
1050
1051
0
        if(ps_results_tl->u1_split_flag)
1052
0
        {
1053
0
            num_parts_in_32x32 += 4;
1054
1055
0
#define COST_INTERCHANGE 0
1056
0
            i4_cost_children = ps_merge_prms->ps_8x8_cu_results[0].ps_best_results->i4_tot_cost +
1057
0
                               ps_merge_prms->ps_8x8_cu_results[1].ps_best_results->i4_tot_cost +
1058
0
                               ps_merge_prms->ps_8x8_cu_results[2].ps_best_results->i4_tot_cost +
1059
0
                               ps_merge_prms->ps_8x8_cu_results[3].ps_best_results->i4_tot_cost;
1060
0
        }
1061
0
        else
1062
0
        {
1063
0
            i4_part_type = ps_results_tl->ps_cu_results->ps_best_results[0].u1_part_type;
1064
0
            num_parts_in_32x32 += gau1_num_parts_in_part_type[i4_part_type];
1065
0
            i4_cost_children = ps_results_tl->ps_cu_results->ps_best_results[0].i4_tot_cost;
1066
0
        }
1067
1068
0
        if(ps_results_tr->u1_split_flag)
1069
0
        {
1070
0
            num_parts_in_32x32 += 4;
1071
1072
0
            i4_cost_children += ps_merge_prms->ps_8x8_cu_results[4].ps_best_results->i4_tot_cost +
1073
0
                                ps_merge_prms->ps_8x8_cu_results[5].ps_best_results->i4_tot_cost +
1074
0
                                ps_merge_prms->ps_8x8_cu_results[6].ps_best_results->i4_tot_cost +
1075
0
                                ps_merge_prms->ps_8x8_cu_results[7].ps_best_results->i4_tot_cost;
1076
0
        }
1077
0
        else
1078
0
        {
1079
0
            i4_part_type = ps_results_tr->ps_cu_results->ps_best_results[0].u1_part_type;
1080
0
            num_parts_in_32x32 += gau1_num_parts_in_part_type[i4_part_type];
1081
0
            i4_cost_children += ps_results_tr->ps_cu_results->ps_best_results[0].i4_tot_cost;
1082
0
        }
1083
1084
0
        if(ps_results_bl->u1_split_flag)
1085
0
        {
1086
0
            num_parts_in_32x32 += 4;
1087
1088
0
            i4_cost_children += ps_merge_prms->ps_8x8_cu_results[8].ps_best_results->i4_tot_cost +
1089
0
                                ps_merge_prms->ps_8x8_cu_results[9].ps_best_results->i4_tot_cost +
1090
0
                                ps_merge_prms->ps_8x8_cu_results[10].ps_best_results->i4_tot_cost +
1091
0
                                ps_merge_prms->ps_8x8_cu_results[11].ps_best_results->i4_tot_cost;
1092
0
        }
1093
0
        else
1094
0
        {
1095
0
            i4_part_type = ps_results_bl->ps_cu_results->ps_best_results[0].u1_part_type;
1096
0
            num_parts_in_32x32 += gau1_num_parts_in_part_type[i4_part_type];
1097
0
            i4_cost_children += ps_results_bl->ps_cu_results->ps_best_results[0].i4_tot_cost;
1098
0
        }
1099
1100
0
        if(ps_results_br->u1_split_flag)
1101
0
        {
1102
0
            num_parts_in_32x32 += 4;
1103
1104
0
            i4_cost_children += ps_merge_prms->ps_8x8_cu_results[12].ps_best_results->i4_tot_cost +
1105
0
                                ps_merge_prms->ps_8x8_cu_results[13].ps_best_results->i4_tot_cost +
1106
0
                                ps_merge_prms->ps_8x8_cu_results[14].ps_best_results->i4_tot_cost +
1107
0
                                ps_merge_prms->ps_8x8_cu_results[15].ps_best_results->i4_tot_cost;
1108
0
        }
1109
0
        else
1110
0
        {
1111
0
            i4_part_type = ps_results_br->ps_cu_results->ps_best_results[0].u1_part_type;
1112
0
            num_parts_in_32x32 += gau1_num_parts_in_part_type[i4_part_type];
1113
0
            i4_cost_children += ps_results_br->ps_cu_results->ps_best_results[0].i4_tot_cost;
1114
0
        }
1115
1116
0
        if((num_parts_in_32x32 > 7) && (ps_merge_prms->e_quality_preset != ME_PRISTINE_QUALITY))
1117
0
        {
1118
0
            return CU_SPLIT;
1119
0
        }
1120
1121
0
        if((num_parts_in_32x32 > MAX_NUM_CONSTITUENT_MVS_TO_ENABLE_32MERGE_IN_XS25) &&
1122
0
           (ps_merge_prms->e_quality_preset == ME_XTREME_SPEED_25))
1123
0
        {
1124
0
            return CU_SPLIT;
1125
0
        }
1126
0
    }
1127
1128
    /* Accumulate intra percentage before merge for early CU_SPLIT decision     */
1129
    /* Note : Each intra part represent a NxN unit of the children CUs          */
1130
    /* This is essentially 1/16th of the CUsize under consideration for merge   */
1131
0
    if(ME_PRISTINE_QUALITY == ps_merge_prms->e_quality_preset)
1132
0
    {
1133
0
        if(CU_64x64 == ps_results_merge->e_cu_size)
1134
0
        {
1135
0
            i4_intra_parts =
1136
0
                (!ps_merge_prms->ps_cluster_info->ps_cu_tree_root->u1_inter_eval_enable)
1137
0
                    ? 16
1138
0
                    : ps_merge_prms->ps_cluster_info->ps_cu_tree_root->u1_intra_eval_enable;
1139
0
        }
1140
0
        else
1141
0
        {
1142
0
            switch((ps_results_merge->u1_x_off >> 5) + ((ps_results_merge->u1_y_off >> 4)))
1143
0
            {
1144
0
            case 0:
1145
0
            {
1146
0
                i4_intra_parts = (!ps_merge_prms->ps_cluster_info->ps_cu_tree_root->ps_child_node_tl
1147
0
                                       ->u1_inter_eval_enable)
1148
0
                                     ? 16
1149
0
                                     : (ps_merge_prms->ps_cluster_info->ps_cu_tree_root
1150
0
                                            ->ps_child_node_tl->u1_intra_eval_enable);
1151
1152
0
                break;
1153
0
            }
1154
0
            case 1:
1155
0
            {
1156
0
                i4_intra_parts = (!ps_merge_prms->ps_cluster_info->ps_cu_tree_root->ps_child_node_tr
1157
0
                                       ->u1_inter_eval_enable)
1158
0
                                     ? 16
1159
0
                                     : (ps_merge_prms->ps_cluster_info->ps_cu_tree_root
1160
0
                                            ->ps_child_node_tr->u1_intra_eval_enable);
1161
1162
0
                break;
1163
0
            }
1164
0
            case 2:
1165
0
            {
1166
0
                i4_intra_parts = (!ps_merge_prms->ps_cluster_info->ps_cu_tree_root->ps_child_node_bl
1167
0
                                       ->u1_inter_eval_enable)
1168
0
                                     ? 16
1169
0
                                     : (ps_merge_prms->ps_cluster_info->ps_cu_tree_root
1170
0
                                            ->ps_child_node_bl->u1_intra_eval_enable);
1171
1172
0
                break;
1173
0
            }
1174
0
            case 3:
1175
0
            {
1176
0
                i4_intra_parts = (!ps_merge_prms->ps_cluster_info->ps_cu_tree_root->ps_child_node_br
1177
0
                                       ->u1_inter_eval_enable)
1178
0
                                     ? 16
1179
0
                                     : (ps_merge_prms->ps_cluster_info->ps_cu_tree_root
1180
0
                                            ->ps_child_node_br->u1_intra_eval_enable);
1181
1182
0
                break;
1183
0
            }
1184
0
            }
1185
0
        }
1186
0
    }
1187
0
    else
1188
0
    {
1189
0
        for(i = 0; i < 4; i++)
1190
0
        {
1191
0
            search_results_t *ps_results =
1192
0
                (i == 0) ? ps_results_tl
1193
0
                         : ((i == 1) ? ps_results_tr : ((i == 2) ? ps_results_bl : ps_results_br));
1194
1195
0
            part_type_results_t *ps_best_res = &ps_results->ps_cu_results->ps_best_results[0];
1196
1197
0
            if(ps_results->u1_split_flag)
1198
0
            {
1199
0
                U08 u1_x_off = ps_results->u1_x_off;
1200
0
                U08 u1_y_off = ps_results->u1_y_off;
1201
0
                U08 u1_8x8_zscan_id = gau1_ctb_raster_to_zscan[(u1_x_off >> 2) + (u1_y_off << 2)] >>
1202
0
                                      2;
1203
1204
                /* Special case to handle 8x8 CUs when 16x16 is split */
1205
0
                ASSERT(ps_results->e_cu_size == CU_16x16);
1206
1207
0
                ps_best_res = &ps_ctxt->as_cu8x8_results[u1_8x8_zscan_id].ps_best_results[0];
1208
1209
0
                if(ps_best_res->as_pu_results[0].pu.b1_intra_flag)
1210
0
                    i4_intra_parts += 1;
1211
1212
0
                ps_best_res = &ps_ctxt->as_cu8x8_results[u1_8x8_zscan_id + 1].ps_best_results[0];
1213
1214
0
                if(ps_best_res->as_pu_results[0].pu.b1_intra_flag)
1215
0
                    i4_intra_parts += 1;
1216
1217
0
                ps_best_res = &ps_ctxt->as_cu8x8_results[u1_8x8_zscan_id + 2].ps_best_results[0];
1218
1219
0
                if(ps_best_res->as_pu_results[0].pu.b1_intra_flag)
1220
0
                    i4_intra_parts += 1;
1221
1222
0
                ps_best_res = &ps_ctxt->as_cu8x8_results[u1_8x8_zscan_id + 3].ps_best_results[0];
1223
1224
0
                if(ps_best_res->as_pu_results[0].pu.b1_intra_flag)
1225
0
                    i4_intra_parts += 1;
1226
0
            }
1227
0
            else if(ps_best_res[0].as_pu_results[0].pu.b1_intra_flag)
1228
0
            {
1229
0
                i4_intra_parts += 4;
1230
0
            }
1231
0
        }
1232
0
    }
1233
1234
    /* Determine the max intra CU size indicated by IPE */
1235
0
    intra_cu_size = CU_64x64;
1236
0
    if(ps_cur_ipe_ctb->u1_split_flag)
1237
0
    {
1238
0
        intra_cu_size = CU_32x32;
1239
0
        if(ps_cur_ipe_ctb->as_intra32_analyse[i4_32x32_id].b1_split_flag)
1240
0
        {
1241
0
            intra_cu_size = CU_16x16;
1242
0
        }
1243
0
    }
1244
1245
0
    if(((i4_intra_parts > CU_MERGE_MAX_INTRA_PARTS) &&
1246
0
        (intra_cu_size < ps_results_merge->e_cu_size) &&
1247
0
        (ME_PRISTINE_QUALITY != ps_merge_prms->e_quality_preset)) ||
1248
0
       (i4_intra_parts == 16))
1249
0
    {
1250
0
        S32 i4_merge_outcome;
1251
1252
0
        i4_merge_outcome = (CU_32x32 == ps_results_merge->e_cu_size)
1253
0
                               ? (!ps_cur_ipe_ctb->as_intra32_analyse[i4_32x32_id].b1_split_flag &&
1254
0
                                  ps_cur_ipe_ctb->as_intra32_analyse[i4_32x32_id].b1_valid_cu)
1255
0
                               : (!ps_cur_ipe_ctb->u1_split_flag);
1256
1257
0
        i4_merge_outcome = i4_merge_outcome ||
1258
0
                           (ME_PRISTINE_QUALITY == ps_merge_prms->e_quality_preset);
1259
1260
0
        i4_merge_outcome = i4_merge_outcome &&
1261
0
                           !(ps_subpel_prms->u1_is_cu_noisy && DISABLE_INTRA_WHEN_NOISY);
1262
1263
0
        if(i4_merge_outcome)
1264
0
        {
1265
0
            inter_cu_results_t *ps_cu_results = ps_results_merge->ps_cu_results;
1266
0
            part_type_results_t *ps_best_result = ps_cu_results->ps_best_results;
1267
0
            pu_t *ps_pu = &ps_best_result->as_pu_results->pu;
1268
1269
0
            ps_cu_results->u1_num_best_results = 1;
1270
0
            ps_cu_results->u1_cu_size = ps_results_merge->e_cu_size;
1271
0
            ps_cu_results->u1_x_off = u1_x_off;
1272
0
            ps_cu_results->u1_y_off = u1_y_off;
1273
1274
0
            ps_best_result->u1_part_type = PRT_2Nx2N;
1275
0
            ps_best_result->ai4_tu_split_flag[0] = 0;
1276
0
            ps_best_result->ai4_tu_split_flag[1] = 0;
1277
0
            ps_best_result->ai4_tu_split_flag[2] = 0;
1278
0
            ps_best_result->ai4_tu_split_flag[3] = 0;
1279
0
            ps_best_result->i4_tot_cost =
1280
0
                (CU_64x64 == ps_results_merge->e_cu_size)
1281
0
                    ? ps_cur_ipe_ctb->i4_best64x64_intra_cost
1282
0
                    : ps_cur_ipe_ctb->ai4_best32x32_intra_cost[i4_32x32_id];
1283
1284
0
            ps_pu->b1_intra_flag = 1;
1285
0
            ps_pu->b4_pos_x = u1_x_off >> 2;
1286
0
            ps_pu->b4_pos_y = u1_y_off >> 2;
1287
0
            ps_pu->b4_wd = (1 << (ps_results_merge->e_cu_size + 1)) - 1;
1288
0
            ps_pu->b4_ht = ps_pu->b4_wd;
1289
0
            ps_pu->mv.i1_l0_ref_idx = -1;
1290
0
            ps_pu->mv.i1_l1_ref_idx = -1;
1291
0
            ps_pu->mv.s_l0_mv.i2_mvx = INTRA_MV;
1292
0
            ps_pu->mv.s_l0_mv.i2_mvy = INTRA_MV;
1293
0
            ps_pu->mv.s_l1_mv.i2_mvx = INTRA_MV;
1294
0
            ps_pu->mv.s_l1_mv.i2_mvy = INTRA_MV;
1295
1296
0
            return CU_MERGED;
1297
0
        }
1298
0
        else
1299
0
        {
1300
0
            return CU_SPLIT;
1301
0
        }
1302
0
    }
1303
1304
0
    if(i4_intra_parts)
1305
0
    {
1306
0
        i4_part_mask = ENABLE_2Nx2N;
1307
0
    }
1308
1309
0
    ps_results_merge->u1_num_active_ref = (ps_ctxt->s_frm_prms.bidir_enabled) ? 2 : 1;
1310
1311
0
    hme_reset_search_results(ps_results_merge, i4_part_mask, MV_RES_QPEL);
1312
1313
0
    ps_results_merge->u1_num_active_ref = ps_merge_prms->i4_num_ref;
1314
0
    ps_merge_prms->i4_num_pred_dir_actual = 0;
1315
1316
0
    if(ps_subpel_prms->u1_is_cu_noisy && ps_merge_prms->ps_inter_ctb_prms->i4_alpha_stim_multiplier)
1317
0
    {
1318
0
        S32 ai4_valid_part_ids[TOT_NUM_PARTS + 1];
1319
0
        S32 i4_num_valid_parts;
1320
0
        S32 i4_sigma_array_offset;
1321
1322
0
        i4_num_valid_parts = hme_create_valid_part_ids(i4_part_mask, ai4_valid_part_ids);
1323
1324
        /*********************************************************************************************************************************************/
1325
        /* i4_sigma_array_offset : takes care of pointing to the appropriate 4x4 block's sigmaX and sigmaX-squared value in a CTB out of 256 values  */
1326
        /* Logic is x/4 + ((y/4) x 16) : every 4 pixel increase in x equals one 4x4 block increment, every 4 pixel increase in y equals 16 4x4 block */
1327
        /* increment as there will be 256 4x4 blocks in a CTB                                                                                        */
1328
        /*********************************************************************************************************************************************/
1329
0
        i4_sigma_array_offset = (ps_merge_prms->ps_results_merge->u1_x_off / 4) +
1330
0
                                (ps_merge_prms->ps_results_merge->u1_y_off * 4);
1331
1332
0
        for(i = 0; i < i4_num_valid_parts; i++)
1333
0
        {
1334
0
            S32 i4_part_id = ai4_valid_part_ids[i];
1335
1336
0
            hme_compute_final_sigma_of_pu_from_base_blocks(
1337
0
                ps_ctxt->au4_4x4_src_sigmaX + i4_sigma_array_offset,
1338
0
                ps_ctxt->au4_4x4_src_sigmaXSquared + i4_sigma_array_offset,
1339
0
                au8_final_src_sigmaX,
1340
0
                au8_final_src_sigmaXSquared,
1341
0
                (CU_32x32 == ps_results_merge->e_cu_size) ? 32 : 64,
1342
0
                4,
1343
0
                i4_part_id,
1344
0
                16);
1345
0
        }
1346
1347
0
        ps_merge_prms->ps_inter_ctb_prms->pu8_part_src_sigmaX = au8_final_src_sigmaX;
1348
0
        ps_merge_prms->ps_inter_ctb_prms->pu8_part_src_sigmaXSquared = au8_final_src_sigmaXSquared;
1349
0
    }
1350
1351
    /*************************************************************************/
1352
    /* Loop through all ref idx and pick the merge candts and refine based   */
1353
    /* on the active partitions. At this stage num ref will be 1 or 2        */
1354
    /*************************************************************************/
1355
0
    for(i4_search_idx = 0; i4_search_idx < ps_merge_prms->i4_num_ref; i4_search_idx++)
1356
0
    {
1357
0
        S32 i4_cands;
1358
0
        U08 u1_pred_dir = 0;
1359
1360
0
        if((2 == ps_merge_prms->i4_num_ref) || (!ps_ctxt->s_frm_prms.bidir_enabled))
1361
0
        {
1362
0
            u1_pred_dir = i4_search_idx;
1363
0
        }
1364
0
        else if(ps_ctxt->s_frm_prms.u1_num_active_ref_l0 == 0)
1365
0
        {
1366
0
            u1_pred_dir = 1;
1367
0
        }
1368
0
        else if(ps_ctxt->s_frm_prms.u1_num_active_ref_l1 == 0)
1369
0
        {
1370
0
            u1_pred_dir = 0;
1371
0
        }
1372
0
        else
1373
0
        {
1374
0
            ASSERT(0);
1375
0
        }
1376
1377
        /* call the function to pick and evaluate the merge candts, given */
1378
        /* a ref id and a part mask.                                      */
1379
0
        i4_cands = hme_pick_eval_merge_candts(
1380
0
            ps_merge_prms,
1381
0
            ps_subpel_prms,
1382
0
            u1_pred_dir,
1383
0
            i4_best_part_type,
1384
0
            is_vert,
1385
0
            ps_wt_inp_prms,
1386
0
            i4_frm_qstep,
1387
0
            ps_cmn_utils_optimised_function_list,
1388
0
            ps_me_optimised_function_list);
1389
1390
0
        if(i4_cands)
1391
0
        {
1392
0
            ps_merge_prms->au1_pred_dir_searched[ps_merge_prms->i4_num_pred_dir_actual] =
1393
0
                u1_pred_dir;
1394
0
            ps_merge_prms->i4_num_pred_dir_actual++;
1395
0
        }
1396
1397
0
        i4_num_merge_cands_evaluated += i4_cands;
1398
0
    }
1399
1400
    /* Call the decide_part_types function here */
1401
    /* Populate the new PU struct with the results post subpel refinement*/
1402
0
    if(i4_num_merge_cands_evaluated)
1403
0
    {
1404
0
        inter_cu_results_t *ps_cu_results = ps_results_merge->ps_cu_results;
1405
1406
0
        hme_reset_wkg_mem(&ps_ctxt->s_buf_mgr);
1407
1408
0
        ps_merge_prms->ps_inter_ctb_prms->i4_ctb_x_off = ps_merge_prms->i4_ctb_x_off;
1409
0
        ps_merge_prms->ps_inter_ctb_prms->i4_ctb_y_off = ps_merge_prms->i4_ctb_y_off;
1410
1411
0
        hme_populate_pus(
1412
0
            ps_thrd_ctxt,
1413
0
            ps_ctxt,
1414
0
            ps_subpel_prms,
1415
0
            ps_results_merge,
1416
0
            ps_cu_results,
1417
0
            ps_pu_results,
1418
0
            ps_pu_result,
1419
0
            ps_merge_prms->ps_inter_ctb_prms,
1420
0
            &ps_ctxt->s_wt_pred,
1421
0
            ps_merge_prms->ps_layer_ctxt,
1422
0
            ps_merge_prms->au1_pred_dir_searched,
1423
0
            ps_merge_prms->i4_num_pred_dir_actual);
1424
1425
0
        ps_cu_results->i4_inp_offset = (ps_cu_results->u1_x_off) + (ps_cu_results->u1_y_off * 64);
1426
1427
0
        hme_decide_part_types(
1428
0
            ps_cu_results,
1429
0
            ps_pu_results,
1430
0
            ps_merge_prms->ps_inter_ctb_prms,
1431
0
            ps_ctxt,
1432
0
            ps_cmn_utils_optimised_function_list,
1433
0
            ps_me_optimised_function_list
1434
1435
0
        );
1436
1437
        /*****************************************************************/
1438
        /* INSERT INTRA RESULTS AT 32x32/64x64 LEVEL.                    */
1439
        /*****************************************************************/
1440
0
#if DISABLE_INTRA_IN_BPICS
1441
0
        if(1 != ((ME_XTREME_SPEED_25 == ps_merge_prms->e_quality_preset) &&
1442
0
                 (ps_ctxt->s_frm_prms.i4_temporal_layer_id > TEMPORAL_LAYER_DISABLE)))
1443
0
#endif
1444
0
        {
1445
0
            if(!(DISABLE_INTRA_WHEN_NOISY && ps_merge_prms->ps_inter_ctb_prms->u1_is_cu_noisy))
1446
0
            {
1447
0
                hme_insert_intra_nodes_post_bipred(
1448
0
                    ps_cu_results, ps_cur_ipe_ctb, ps_ctxt->frm_qstep);
1449
0
            }
1450
0
        }
1451
0
    }
1452
0
    else
1453
0
    {
1454
0
        return CU_SPLIT;
1455
0
    }
1456
1457
    /* We check the best result of ref idx 0 and compare for parent vs child */
1458
0
    if((ps_merge_prms->e_quality_preset != ME_PRISTINE_QUALITY) ||
1459
0
       (CU_32x32 == ps_results_merge->e_cu_size))
1460
0
    {
1461
0
        i4_cost_parent = ps_results_merge->ps_cu_results->ps_best_results[0].i4_tot_cost;
1462
        /*********************************************************************/
1463
        /* Add the cost of signaling the CU tree bits.                       */
1464
        /* Assuming parent is not split, then we signal 1 bit for this parent*/
1465
        /* CU. If split, then 1 bit for parent CU + 4 bits for each child CU */
1466
        /* So, 4*lambda is extra for children cost. :Lokesh                  */
1467
        /*********************************************************************/
1468
0
        {
1469
0
            pred_ctxt_t *ps_pred_ctxt = &ps_results_merge->as_pred_ctxt[0];
1470
1471
0
            i4_cost_children += ((4 * ps_pred_ctxt->lambda) >> (ps_pred_ctxt->lambda_q_shift));
1472
0
        }
1473
1474
0
        if(i4_cost_parent < i4_cost_children)
1475
0
        {
1476
0
            return CU_MERGED;
1477
0
        }
1478
1479
0
        return CU_SPLIT;
1480
0
    }
1481
0
    else
1482
0
    {
1483
0
        return CU_MERGED;
1484
0
    }
1485
0
}
1486
1487
#define COPY_SEARCH_RESULT(ps_mv, pi1_ref_idx, ps_search_node, shift)                              \
1488
0
    {                                                                                              \
1489
0
        (ps_mv)->i2_mv_x = (ps_search_node)->s_mv.i2_mvx >> (shift);                               \
1490
0
        (ps_mv)->i2_mv_y = (ps_search_node)->s_mv.i2_mvy >> (shift);                               \
1491
0
        *(pi1_ref_idx) = (ps_search_node)->i1_ref_idx;                                             \
1492
0
    }
1493
1494
/**
1495
********************************************************************************
1496
*  @fn     hme_update_mv_bank_noencode(search_results_t *ps_search_results,
1497
*                               layer_mv_t *ps_layer_mv,
1498
*                               S32 i4_search_blk_x,
1499
*                               S32 i4_search_blk_y,
1500
*                               mvbank_update_prms_t *ps_prms)
1501
*
1502
*  @brief  Updates the mv bank in case there is no further encodign to be done
1503
*
1504
*  @param[in]  ps_search_results: contains results for the block just searched
1505
*
1506
*  @param[in,out]  ps_layer_mv : Has pointer to mv bank amongst other things
1507
*
1508
*  @param[in] i4_search_blk_x  : col num of blk being searched
1509
*
1510
*  @param[in] i4_search_blk_y : row num of blk being searched
1511
*
1512
*  @param[in] ps_prms : contains certain parameters which govern how updatedone
1513
*
1514
*  @return None
1515
********************************************************************************
1516
*/
1517
1518
void hme_update_mv_bank_noencode(
1519
    search_results_t *ps_search_results,
1520
    layer_mv_t *ps_layer_mv,
1521
    S32 i4_search_blk_x,
1522
    S32 i4_search_blk_y,
1523
    mvbank_update_prms_t *ps_prms)
1524
0
{
1525
0
    hme_mv_t *ps_mv;
1526
0
    hme_mv_t *ps_mv1, *ps_mv2, *ps_mv3, *ps_mv4;
1527
0
    S08 *pi1_ref_idx, *pi1_ref_idx1, *pi1_ref_idx2, *pi1_ref_idx3, *pi1_ref_idx4;
1528
0
    S32 i4_blk_x, i4_blk_y, i4_offset;
1529
0
    S32 i4_j, i4_ref_id;
1530
0
    search_node_t *ps_search_node;
1531
0
    search_node_t *ps_search_node_8x8, *ps_search_node_4x4_1;
1532
0
    search_node_t *ps_search_node_4x4_2, *ps_search_node_4x4_3;
1533
0
    search_node_t *ps_search_node_4x4_4;
1534
1535
0
    i4_blk_x = i4_search_blk_x << ps_prms->i4_shift;
1536
0
    i4_blk_y = i4_search_blk_y << ps_prms->i4_shift;
1537
0
    i4_offset = i4_blk_x + i4_blk_y * ps_layer_mv->i4_num_blks_per_row;
1538
1539
0
    i4_offset *= ps_layer_mv->i4_num_mvs_per_blk;
1540
1541
    /* Identify the correct offset in the mvbank and the reference id buf */
1542
0
    ps_mv = ps_layer_mv->ps_mv + i4_offset;
1543
0
    pi1_ref_idx = ps_layer_mv->pi1_ref_idx + i4_offset;
1544
1545
    /*************************************************************************/
1546
    /* Supposing we store the mvs in the same blk size as we searched (e.g.  */
1547
    /* we searched 8x8 blks and store results for 8x8 blks), then we can     */
1548
    /* do a straightforward single update of results. This will have a 1-1   */
1549
    /* correspondence.                                                       */
1550
    /*************************************************************************/
1551
0
    if(ps_layer_mv->e_blk_size == ps_prms->e_search_blk_size)
1552
0
    {
1553
0
        for(i4_ref_id = 0; i4_ref_id < (S32)ps_prms->i4_num_ref; i4_ref_id++)
1554
0
        {
1555
0
            ps_search_node = ps_search_results->aps_part_results[i4_ref_id][PART_ID_2Nx2N];
1556
0
            for(i4_j = 0; i4_j < ps_layer_mv->i4_num_mvs_per_ref; i4_j++)
1557
0
            {
1558
0
                COPY_SEARCH_RESULT(ps_mv, pi1_ref_idx, ps_search_node, 0);
1559
0
                ps_mv++;
1560
0
                pi1_ref_idx++;
1561
0
                ps_search_node++;
1562
0
            }
1563
0
        }
1564
0
        return;
1565
0
    }
1566
1567
    /*************************************************************************/
1568
    /* Case where search blk size is 8x8, but we update 4x4 results. In this */
1569
    /* case, we need to have NxN partitions enabled in search.               */
1570
    /* Further, we update on a 1-1 basis the 4x4 blk mvs from the respective */
1571
    /* NxN partition. We also update the 8x8 result into each of the 4x4 bank*/
1572
    /*************************************************************************/
1573
0
    ASSERT(ps_layer_mv->e_blk_size == BLK_4x4);
1574
0
    ASSERT(ps_prms->e_search_blk_size == BLK_8x8);
1575
0
    ASSERT((ps_search_results->i4_part_mask & (ENABLE_NxN)) == (ENABLE_NxN));
1576
1577
    /*************************************************************************/
1578
    /* For every 4x4 blk we store corresponding 4x4 results and 1 8x8 result */
1579
    /* hence the below check.                                                */
1580
    /*************************************************************************/
1581
0
    ASSERT(ps_layer_mv->i4_num_mvs_per_ref <= ps_search_results->u1_num_results_per_part + 1);
1582
1583
0
    ps_mv1 = ps_mv;
1584
0
    ps_mv2 = ps_mv1 + ps_layer_mv->i4_num_mvs_per_blk;
1585
0
    ps_mv3 = ps_mv1 + (ps_layer_mv->i4_num_mvs_per_row);
1586
0
    ps_mv4 = ps_mv3 + (ps_layer_mv->i4_num_mvs_per_blk);
1587
0
    pi1_ref_idx1 = pi1_ref_idx;
1588
0
    pi1_ref_idx2 = pi1_ref_idx1 + ps_layer_mv->i4_num_mvs_per_blk;
1589
0
    pi1_ref_idx3 = pi1_ref_idx1 + (ps_layer_mv->i4_num_mvs_per_row);
1590
0
    pi1_ref_idx4 = pi1_ref_idx3 + (ps_layer_mv->i4_num_mvs_per_blk);
1591
1592
0
    for(i4_ref_id = 0; i4_ref_id < (S32)ps_search_results->u1_num_active_ref; i4_ref_id++)
1593
0
    {
1594
0
        ps_search_node_8x8 = ps_search_results->aps_part_results[i4_ref_id][PART_ID_2Nx2N];
1595
1596
0
        ps_search_node_4x4_1 = ps_search_results->aps_part_results[i4_ref_id][PART_ID_NxN_TL];
1597
1598
0
        ps_search_node_4x4_2 = ps_search_results->aps_part_results[i4_ref_id][PART_ID_NxN_TR];
1599
1600
0
        ps_search_node_4x4_3 = ps_search_results->aps_part_results[i4_ref_id][PART_ID_NxN_BL];
1601
1602
0
        ps_search_node_4x4_4 = ps_search_results->aps_part_results[i4_ref_id][PART_ID_NxN_BR];
1603
1604
0
        COPY_SEARCH_RESULT(ps_mv1, pi1_ref_idx1, ps_search_node_4x4_1, 0);
1605
0
        ps_mv1++;
1606
0
        pi1_ref_idx1++;
1607
0
        ps_search_node_4x4_1++;
1608
0
        COPY_SEARCH_RESULT(ps_mv2, pi1_ref_idx2, ps_search_node_4x4_2, 0);
1609
0
        ps_mv2++;
1610
0
        pi1_ref_idx2++;
1611
0
        ps_search_node_4x4_2++;
1612
0
        COPY_SEARCH_RESULT(ps_mv3, pi1_ref_idx3, ps_search_node_4x4_3, 0);
1613
0
        ps_mv3++;
1614
0
        pi1_ref_idx3++;
1615
0
        ps_search_node_4x4_3++;
1616
0
        COPY_SEARCH_RESULT(ps_mv4, pi1_ref_idx4, ps_search_node_4x4_4, 0);
1617
0
        ps_mv4++;
1618
0
        pi1_ref_idx4++;
1619
0
        ps_search_node_4x4_4++;
1620
1621
0
        if(ps_layer_mv->i4_num_mvs_per_ref > 1)
1622
0
        {
1623
0
            COPY_SEARCH_RESULT(ps_mv1, pi1_ref_idx1, ps_search_node_8x8, 0);
1624
0
            ps_mv1++;
1625
0
            pi1_ref_idx1++;
1626
0
            COPY_SEARCH_RESULT(ps_mv2, pi1_ref_idx2, ps_search_node_8x8, 0);
1627
0
            ps_mv2++;
1628
0
            pi1_ref_idx2++;
1629
0
            COPY_SEARCH_RESULT(ps_mv3, pi1_ref_idx3, ps_search_node_8x8, 0);
1630
0
            ps_mv3++;
1631
0
            pi1_ref_idx3++;
1632
0
            COPY_SEARCH_RESULT(ps_mv4, pi1_ref_idx4, ps_search_node_8x8, 0);
1633
0
            ps_mv4++;
1634
0
            pi1_ref_idx4++;
1635
0
        }
1636
1637
0
        for(i4_j = 2; i4_j < ps_layer_mv->i4_num_mvs_per_ref; i4_j++)
1638
0
        {
1639
0
            COPY_SEARCH_RESULT(ps_mv1, pi1_ref_idx1, ps_search_node_4x4_1, 0);
1640
0
            ps_mv1++;
1641
0
            pi1_ref_idx1++;
1642
0
            ps_search_node_4x4_1++;
1643
0
            COPY_SEARCH_RESULT(ps_mv2, pi1_ref_idx2, ps_search_node_4x4_2, 0);
1644
0
            ps_mv2++;
1645
0
            pi1_ref_idx2++;
1646
0
            ps_search_node_4x4_2++;
1647
0
            COPY_SEARCH_RESULT(ps_mv3, pi1_ref_idx3, ps_search_node_4x4_3, 0);
1648
0
            ps_mv3++;
1649
0
            pi1_ref_idx3++;
1650
0
            ps_search_node_4x4_3++;
1651
0
            COPY_SEARCH_RESULT(ps_mv4, pi1_ref_idx4, ps_search_node_4x4_4, 0);
1652
0
            ps_mv4++;
1653
0
            pi1_ref_idx4++;
1654
0
            ps_search_node_4x4_4++;
1655
0
        }
1656
0
    }
1657
0
}
1658
1659
void hme_update_mv_bank_encode(
1660
    search_results_t *ps_search_results,
1661
    layer_mv_t *ps_layer_mv,
1662
    S32 i4_search_blk_x,
1663
    S32 i4_search_blk_y,
1664
    mvbank_update_prms_t *ps_prms,
1665
    U08 *pu1_pred_dir_searched,
1666
    S32 i4_num_act_ref_l0)
1667
0
{
1668
0
    hme_mv_t *ps_mv;
1669
0
    hme_mv_t *ps_mv1, *ps_mv2, *ps_mv3, *ps_mv4;
1670
0
    S08 *pi1_ref_idx, *pi1_ref_idx1, *pi1_ref_idx2, *pi1_ref_idx3, *pi1_ref_idx4;
1671
0
    S32 i4_blk_x, i4_blk_y, i4_offset;
1672
0
    S32 j, i, num_parts;
1673
0
    search_node_t *ps_search_node_tl, *ps_search_node_tr;
1674
0
    search_node_t *ps_search_node_bl, *ps_search_node_br;
1675
0
    search_node_t s_zero_mv;
1676
0
    WORD32 i4_part_type = ps_search_results->ps_cu_results->ps_best_results[0].u1_part_type;
1677
1678
0
    i4_blk_x = i4_search_blk_x << ps_prms->i4_shift;
1679
0
    i4_blk_y = i4_search_blk_y << ps_prms->i4_shift;
1680
0
    i4_offset = i4_blk_x + i4_blk_y * ps_layer_mv->i4_num_blks_per_row;
1681
1682
0
    i4_offset *= ps_layer_mv->i4_num_mvs_per_blk;
1683
1684
    /* Identify the correct offset in the mvbank and the reference id buf */
1685
0
    ps_mv = ps_layer_mv->ps_mv + i4_offset;
1686
0
    pi1_ref_idx = ps_layer_mv->pi1_ref_idx + i4_offset;
1687
1688
0
    ASSERT(ps_layer_mv->e_blk_size == BLK_8x8);
1689
0
    ASSERT(ps_prms->e_search_blk_size == BLK_16x16);
1690
1691
    /*************************************************************************/
1692
    /* For every 4x4 blk we store corresponding 4x4 results and 1 8x8 result */
1693
    /* hence the below check.                                                */
1694
    /*************************************************************************/
1695
0
    ASSERT(ps_layer_mv->i4_num_mvs_per_ref <= ps_search_results->u1_num_best_results);
1696
1697
0
    ps_mv1 = ps_mv;
1698
0
    ps_mv2 = ps_mv1 + ps_layer_mv->i4_num_mvs_per_blk;
1699
0
    ps_mv3 = ps_mv1 + (ps_layer_mv->i4_num_mvs_per_row);
1700
0
    ps_mv4 = ps_mv3 + (ps_layer_mv->i4_num_mvs_per_blk);
1701
0
    pi1_ref_idx1 = pi1_ref_idx;
1702
0
    pi1_ref_idx2 = pi1_ref_idx1 + ps_layer_mv->i4_num_mvs_per_blk;
1703
0
    pi1_ref_idx3 = pi1_ref_idx1 + (ps_layer_mv->i4_num_mvs_per_row);
1704
0
    pi1_ref_idx4 = pi1_ref_idx3 + (ps_layer_mv->i4_num_mvs_per_blk);
1705
1706
    /* Initialize zero mv: default mv used for intra mvs */
1707
0
    s_zero_mv.s_mv.i2_mvx = 0;
1708
0
    s_zero_mv.s_mv.i2_mvy = 0;
1709
0
    s_zero_mv.i1_ref_idx = 0;
1710
1711
0
    if((ps_search_results->e_cu_size == CU_16x16) && (ps_search_results->u1_split_flag) &&
1712
0
       (ps_search_results->i4_part_mask & ENABLE_NxN))
1713
0
    {
1714
0
        i4_part_type = PRT_NxN;
1715
0
    }
1716
1717
0
    for(i = 0; i < ps_prms->i4_num_ref; i++)
1718
0
    {
1719
0
        for(j = 0; j < ps_layer_mv->i4_num_mvs_per_ref; j++)
1720
0
        {
1721
0
            WORD32 i4_part_id = ge_part_type_to_part_id[i4_part_type][0];
1722
1723
0
            num_parts = gau1_num_parts_in_part_type[i4_part_type];
1724
1725
0
            ps_search_node_tl =
1726
0
                ps_search_results->aps_part_results[pu1_pred_dir_searched[i]][i4_part_id];
1727
1728
0
            if(num_parts == 1)
1729
0
            {
1730
0
                ps_search_node_tr = ps_search_node_tl;
1731
0
                ps_search_node_bl = ps_search_node_tl;
1732
0
                ps_search_node_br = ps_search_node_tl;
1733
0
            }
1734
0
            else if(num_parts == 2)
1735
0
            {
1736
                /* For vertically oriented partitions, tl, bl pt to same result */
1737
                /* For horizontally oriented partition, tl, tr pt to same result */
1738
                /* This means for AMP, 2 of the 8x8 blks in mv bank have ambiguous */
1739
                /* result, e.g. for 4x16L. Here left 2 8x8 have the 4x16L partition */
1740
                /* and right 2 8x8 have 12x16R partition */
1741
0
                if(gau1_is_vert_part[i4_part_type])
1742
0
                {
1743
0
                    ps_search_node_tr =
1744
0
                        ps_search_results
1745
0
                            ->aps_part_results[pu1_pred_dir_searched[i]][i4_part_id + 1];
1746
0
                    ps_search_node_bl = ps_search_node_tl;
1747
0
                }
1748
0
                else
1749
0
                {
1750
0
                    ps_search_node_tr = ps_search_node_tl;
1751
0
                    ps_search_node_bl =
1752
0
                        ps_search_results
1753
0
                            ->aps_part_results[pu1_pred_dir_searched[i]][i4_part_id + 1];
1754
0
                }
1755
0
                ps_search_node_br =
1756
0
                    ps_search_results->aps_part_results[pu1_pred_dir_searched[i]][i4_part_id + 1];
1757
0
            }
1758
0
            else
1759
0
            {
1760
                /* 4 unique results */
1761
0
                ps_search_node_tr =
1762
0
                    ps_search_results->aps_part_results[pu1_pred_dir_searched[i]][i4_part_id + 1];
1763
0
                ps_search_node_bl =
1764
0
                    ps_search_results->aps_part_results[pu1_pred_dir_searched[i]][i4_part_id + 2];
1765
0
                ps_search_node_br =
1766
0
                    ps_search_results->aps_part_results[pu1_pred_dir_searched[i]][i4_part_id + 3];
1767
0
            }
1768
1769
0
            if(ps_search_node_tl->s_mv.i2_mvx == INTRA_MV)
1770
0
                ps_search_node_tl++;
1771
0
            if(ps_search_node_tr->s_mv.i2_mvx == INTRA_MV)
1772
0
                ps_search_node_tr++;
1773
0
            if(ps_search_node_bl->s_mv.i2_mvx == INTRA_MV)
1774
0
                ps_search_node_bl++;
1775
0
            if(ps_search_node_br->s_mv.i2_mvx == INTRA_MV)
1776
0
                ps_search_node_br++;
1777
1778
0
            COPY_SEARCH_RESULT(ps_mv1, pi1_ref_idx1, ps_search_node_tl, 0);
1779
0
            ps_mv1++;
1780
0
            pi1_ref_idx1++;
1781
0
            COPY_SEARCH_RESULT(ps_mv2, pi1_ref_idx2, ps_search_node_tr, 0);
1782
0
            ps_mv2++;
1783
0
            pi1_ref_idx2++;
1784
0
            COPY_SEARCH_RESULT(ps_mv3, pi1_ref_idx3, ps_search_node_bl, 0);
1785
0
            ps_mv3++;
1786
0
            pi1_ref_idx3++;
1787
0
            COPY_SEARCH_RESULT(ps_mv4, pi1_ref_idx4, ps_search_node_br, 0);
1788
0
            ps_mv4++;
1789
0
            pi1_ref_idx4++;
1790
1791
0
            if(ps_prms->i4_num_results_to_store > 1)
1792
0
            {
1793
0
                ps_search_node_tl =
1794
0
                    &ps_search_results->aps_part_results[pu1_pred_dir_searched[i]][i4_part_id][1];
1795
1796
0
                if(num_parts == 1)
1797
0
                {
1798
0
                    ps_search_node_tr = ps_search_node_tl;
1799
0
                    ps_search_node_bl = ps_search_node_tl;
1800
0
                    ps_search_node_br = ps_search_node_tl;
1801
0
                }
1802
0
                else if(num_parts == 2)
1803
0
                {
1804
                    /* For vertically oriented partitions, tl, bl pt to same result */
1805
                    /* For horizontally oriented partition, tl, tr pt to same result */
1806
                    /* This means for AMP, 2 of the 8x8 blks in mv bank have ambiguous */
1807
                    /* result, e.g. for 4x16L. Here left 2 8x8 have the 4x16L partition */
1808
                    /* and right 2 8x8 have 12x16R partition */
1809
0
                    if(gau1_is_vert_part[i4_part_type])
1810
0
                    {
1811
0
                        ps_search_node_tr =
1812
0
                            &ps_search_results
1813
0
                                 ->aps_part_results[pu1_pred_dir_searched[i]][i4_part_id + 1][1];
1814
0
                        ps_search_node_bl = ps_search_node_tl;
1815
0
                    }
1816
0
                    else
1817
0
                    {
1818
0
                        ps_search_node_tr = ps_search_node_tl;
1819
0
                        ps_search_node_bl =
1820
0
                            &ps_search_results
1821
0
                                 ->aps_part_results[pu1_pred_dir_searched[i]][i4_part_id + 1][1];
1822
0
                    }
1823
0
                    ps_search_node_br =
1824
0
                        &ps_search_results
1825
0
                             ->aps_part_results[pu1_pred_dir_searched[i]][i4_part_id + 1][1];
1826
0
                }
1827
0
                else
1828
0
                {
1829
                    /* 4 unique results */
1830
0
                    ps_search_node_tr =
1831
0
                        &ps_search_results
1832
0
                             ->aps_part_results[pu1_pred_dir_searched[i]][i4_part_id + 1][1];
1833
0
                    ps_search_node_bl =
1834
0
                        &ps_search_results
1835
0
                             ->aps_part_results[pu1_pred_dir_searched[i]][i4_part_id + 2][1];
1836
0
                    ps_search_node_br =
1837
0
                        &ps_search_results
1838
0
                             ->aps_part_results[pu1_pred_dir_searched[i]][i4_part_id + 3][1];
1839
0
                }
1840
1841
0
                if(ps_search_node_tl->s_mv.i2_mvx == INTRA_MV)
1842
0
                    ps_search_node_tl++;
1843
0
                if(ps_search_node_tr->s_mv.i2_mvx == INTRA_MV)
1844
0
                    ps_search_node_tr++;
1845
0
                if(ps_search_node_bl->s_mv.i2_mvx == INTRA_MV)
1846
0
                    ps_search_node_bl++;
1847
0
                if(ps_search_node_br->s_mv.i2_mvx == INTRA_MV)
1848
0
                    ps_search_node_br++;
1849
1850
0
                COPY_SEARCH_RESULT(ps_mv1, pi1_ref_idx1, ps_search_node_tl, 0);
1851
0
                ps_mv1++;
1852
0
                pi1_ref_idx1++;
1853
0
                COPY_SEARCH_RESULT(ps_mv2, pi1_ref_idx2, ps_search_node_tr, 0);
1854
0
                ps_mv2++;
1855
0
                pi1_ref_idx2++;
1856
0
                COPY_SEARCH_RESULT(ps_mv3, pi1_ref_idx3, ps_search_node_bl, 0);
1857
0
                ps_mv3++;
1858
0
                pi1_ref_idx3++;
1859
0
                COPY_SEARCH_RESULT(ps_mv4, pi1_ref_idx4, ps_search_node_br, 0);
1860
0
                ps_mv4++;
1861
0
                pi1_ref_idx4++;
1862
0
            }
1863
0
        }
1864
0
    }
1865
0
}
1866
1867
/**
1868
********************************************************************************
1869
*  @fn     hme_update_mv_bank_noencode(search_results_t *ps_search_results,
1870
*                               layer_mv_t *ps_layer_mv,
1871
*                               S32 i4_search_blk_x,
1872
*                               S32 i4_search_blk_y,
1873
*                               mvbank_update_prms_t *ps_prms)
1874
*
1875
*  @brief  Updates the mv bank in case there is no further encodign to be done
1876
*
1877
*  @param[in]  ps_search_results: contains results for the block just searched
1878
*
1879
*  @param[in,out]  ps_layer_mv : Has pointer to mv bank amongst other things
1880
*
1881
*  @param[in] i4_search_blk_x  : col num of blk being searched
1882
*
1883
*  @param[in] i4_search_blk_y : row num of blk being searched
1884
*
1885
*  @param[in] ps_prms : contains certain parameters which govern how updatedone
1886
*
1887
*  @return None
1888
********************************************************************************
1889
*/
1890
1891
void hme_update_mv_bank_in_l1_me(
1892
    search_results_t *ps_search_results,
1893
    layer_mv_t *ps_layer_mv,
1894
    S32 i4_search_blk_x,
1895
    S32 i4_search_blk_y,
1896
    mvbank_update_prms_t *ps_prms)
1897
0
{
1898
0
    hme_mv_t *ps_mv;
1899
0
    hme_mv_t *ps_mv1, *ps_mv2, *ps_mv3, *ps_mv4;
1900
0
    S08 *pi1_ref_idx, *pi1_ref_idx1, *pi1_ref_idx2, *pi1_ref_idx3, *pi1_ref_idx4;
1901
0
    S32 i4_blk_x, i4_blk_y, i4_offset;
1902
0
    S32 i4_j, i4_ref_id;
1903
0
    search_node_t *ps_search_node;
1904
0
    search_node_t *ps_search_node_8x8, *ps_search_node_4x4;
1905
1906
0
    i4_blk_x = i4_search_blk_x << ps_prms->i4_shift;
1907
0
    i4_blk_y = i4_search_blk_y << ps_prms->i4_shift;
1908
0
    i4_offset = i4_blk_x + i4_blk_y * ps_layer_mv->i4_num_blks_per_row;
1909
1910
0
    i4_offset *= ps_layer_mv->i4_num_mvs_per_blk;
1911
1912
    /* Identify the correct offset in the mvbank and the reference id buf */
1913
0
    ps_mv = ps_layer_mv->ps_mv + i4_offset;
1914
0
    pi1_ref_idx = ps_layer_mv->pi1_ref_idx + i4_offset;
1915
1916
    /*************************************************************************/
1917
    /* Supposing we store the mvs in the same blk size as we searched (e.g.  */
1918
    /* we searched 8x8 blks and store results for 8x8 blks), then we can     */
1919
    /* do a straightforward single update of results. This will have a 1-1   */
1920
    /* correspondence.                                                       */
1921
    /*************************************************************************/
1922
0
    if(ps_layer_mv->e_blk_size == ps_prms->e_search_blk_size)
1923
0
    {
1924
0
        search_node_t *aps_result_nodes_sorted[2][MAX_NUM_REF * 2];
1925
1926
0
        hme_mv_t *ps_mv_l0_root = ps_mv;
1927
0
        hme_mv_t *ps_mv_l1_root =
1928
0
            ps_mv + (ps_prms->i4_num_active_ref_l0 * ps_layer_mv->i4_num_mvs_per_ref);
1929
1930
0
        U32 u4_num_l0_results_updated = 0;
1931
0
        U32 u4_num_l1_results_updated = 0;
1932
1933
0
        S08 *pi1_ref_idx_l0_root = pi1_ref_idx;
1934
0
        S08 *pi1_ref_idx_l1_root =
1935
0
            pi1_ref_idx_l0_root + (ps_prms->i4_num_active_ref_l0 * ps_layer_mv->i4_num_mvs_per_ref);
1936
1937
0
        for(i4_ref_id = 0; i4_ref_id < (S32)ps_prms->i4_num_ref; i4_ref_id++)
1938
0
        {
1939
0
            U32 *pu4_num_results_updated;
1940
0
            search_node_t **pps_result_nodes;
1941
1942
0
            U08 u1_pred_dir_of_cur_ref = !ps_search_results->pu1_is_past[i4_ref_id];
1943
1944
0
            if(u1_pred_dir_of_cur_ref)
1945
0
            {
1946
0
                pu4_num_results_updated = &u4_num_l1_results_updated;
1947
0
                pps_result_nodes = &aps_result_nodes_sorted[1][0];
1948
0
            }
1949
0
            else
1950
0
            {
1951
0
                pu4_num_results_updated = &u4_num_l0_results_updated;
1952
0
                pps_result_nodes = &aps_result_nodes_sorted[0][0];
1953
0
            }
1954
1955
0
            ps_search_node = ps_search_results->aps_part_results[i4_ref_id][PART_ID_2Nx2N];
1956
1957
0
            for(i4_j = 0; i4_j < ps_layer_mv->i4_num_mvs_per_ref; i4_j++)
1958
0
            {
1959
0
                hme_add_new_node_to_a_sorted_array(
1960
0
                    &ps_search_node[i4_j], pps_result_nodes, NULL, *pu4_num_results_updated, 0);
1961
1962
0
                ASSERT(ps_search_node[i4_j].i1_ref_idx == i4_ref_id);
1963
0
                (*pu4_num_results_updated)++;
1964
0
            }
1965
0
        }
1966
1967
0
        for(i4_j = 0; i4_j < (S32)u4_num_l0_results_updated; i4_j++)
1968
0
        {
1969
0
            COPY_SEARCH_RESULT(
1970
0
                &ps_mv_l0_root[i4_j],
1971
0
                &pi1_ref_idx_l0_root[i4_j],
1972
0
                aps_result_nodes_sorted[0][i4_j],
1973
0
                0);
1974
0
        }
1975
1976
0
        for(i4_j = 0; i4_j < (S32)u4_num_l1_results_updated; i4_j++)
1977
0
        {
1978
0
            COPY_SEARCH_RESULT(
1979
0
                &ps_mv_l1_root[i4_j],
1980
0
                &pi1_ref_idx_l1_root[i4_j],
1981
0
                aps_result_nodes_sorted[1][i4_j],
1982
0
                0);
1983
0
        }
1984
1985
0
        return;
1986
0
    }
1987
1988
    /*************************************************************************/
1989
    /* Case where search blk size is 8x8, but we update 4x4 results. In this */
1990
    /* case, we need to have NxN partitions enabled in search.               */
1991
    /* Further, we update on a 1-1 basis the 4x4 blk mvs from the respective */
1992
    /* NxN partition. We also update the 8x8 result into each of the 4x4 bank*/
1993
    /*************************************************************************/
1994
0
    ASSERT(ps_layer_mv->e_blk_size == BLK_4x4);
1995
0
    ASSERT(ps_prms->e_search_blk_size == BLK_8x8);
1996
0
    ASSERT((ps_search_results->i4_part_mask & (ENABLE_NxN)) == (ENABLE_NxN));
1997
1998
    /*************************************************************************/
1999
    /* For every 4x4 blk we store corresponding 4x4 results and 1 8x8 result */
2000
    /* hence the below check.                                                */
2001
    /*************************************************************************/
2002
0
    ASSERT(ps_layer_mv->i4_num_mvs_per_ref <= ps_search_results->u1_num_results_per_part + 1);
2003
2004
0
    ps_mv1 = ps_mv;
2005
0
    ps_mv2 = ps_mv1 + ps_layer_mv->i4_num_mvs_per_blk;
2006
0
    ps_mv3 = ps_mv1 + (ps_layer_mv->i4_num_mvs_per_row);
2007
0
    ps_mv4 = ps_mv3 + (ps_layer_mv->i4_num_mvs_per_blk);
2008
0
    pi1_ref_idx1 = pi1_ref_idx;
2009
0
    pi1_ref_idx2 = pi1_ref_idx1 + ps_layer_mv->i4_num_mvs_per_blk;
2010
0
    pi1_ref_idx3 = pi1_ref_idx1 + (ps_layer_mv->i4_num_mvs_per_row);
2011
0
    pi1_ref_idx4 = pi1_ref_idx3 + (ps_layer_mv->i4_num_mvs_per_blk);
2012
2013
0
    {
2014
        /* max ref frames * max results per partition * number of partitions (4x4, 8x8) */
2015
0
        search_node_t *aps_result_nodes_sorted[2][MAX_NUM_REF * MAX_RESULTS_PER_PART * 2];
2016
0
        U08 au1_cost_shifts_for_sorted_node[2][MAX_NUM_REF * MAX_RESULTS_PER_PART * 2];
2017
2018
0
        S32 i;
2019
2020
0
        hme_mv_t *ps_mv1_l0_root = ps_mv1;
2021
0
        hme_mv_t *ps_mv1_l1_root =
2022
0
            ps_mv1 + (ps_prms->i4_num_active_ref_l0 * ps_layer_mv->i4_num_mvs_per_ref);
2023
0
        hme_mv_t *ps_mv2_l0_root = ps_mv2;
2024
0
        hme_mv_t *ps_mv2_l1_root =
2025
0
            ps_mv2 + (ps_prms->i4_num_active_ref_l0 * ps_layer_mv->i4_num_mvs_per_ref);
2026
0
        hme_mv_t *ps_mv3_l0_root = ps_mv3;
2027
0
        hme_mv_t *ps_mv3_l1_root =
2028
0
            ps_mv3 + (ps_prms->i4_num_active_ref_l0 * ps_layer_mv->i4_num_mvs_per_ref);
2029
0
        hme_mv_t *ps_mv4_l0_root = ps_mv4;
2030
0
        hme_mv_t *ps_mv4_l1_root =
2031
0
            ps_mv4 + (ps_prms->i4_num_active_ref_l0 * ps_layer_mv->i4_num_mvs_per_ref);
2032
2033
0
        U32 u4_num_l0_results_updated = 0;
2034
0
        U32 u4_num_l1_results_updated = 0;
2035
2036
0
        S08 *pi1_ref_idx1_l0_root = pi1_ref_idx1;
2037
0
        S08 *pi1_ref_idx1_l1_root = pi1_ref_idx1_l0_root + (ps_prms->i4_num_active_ref_l0 *
2038
0
                                                            ps_layer_mv->i4_num_mvs_per_ref);
2039
0
        S08 *pi1_ref_idx2_l0_root = pi1_ref_idx2;
2040
0
        S08 *pi1_ref_idx2_l1_root = pi1_ref_idx2_l0_root + (ps_prms->i4_num_active_ref_l0 *
2041
0
                                                            ps_layer_mv->i4_num_mvs_per_ref);
2042
0
        S08 *pi1_ref_idx3_l0_root = pi1_ref_idx3;
2043
0
        S08 *pi1_ref_idx3_l1_root = pi1_ref_idx3_l0_root + (ps_prms->i4_num_active_ref_l0 *
2044
0
                                                            ps_layer_mv->i4_num_mvs_per_ref);
2045
0
        S08 *pi1_ref_idx4_l0_root = pi1_ref_idx4;
2046
0
        S08 *pi1_ref_idx4_l1_root = pi1_ref_idx4_l0_root + (ps_prms->i4_num_active_ref_l0 *
2047
0
                                                            ps_layer_mv->i4_num_mvs_per_ref);
2048
2049
0
        for(i = 0; i < 4; i++)
2050
0
        {
2051
0
            hme_mv_t *ps_mv_l0_root;
2052
0
            hme_mv_t *ps_mv_l1_root;
2053
2054
0
            S08 *pi1_ref_idx_l0_root;
2055
0
            S08 *pi1_ref_idx_l1_root;
2056
2057
0
            for(i4_ref_id = 0; i4_ref_id < ps_search_results->u1_num_active_ref; i4_ref_id++)
2058
0
            {
2059
0
                U32 *pu4_num_results_updated;
2060
0
                search_node_t **pps_result_nodes;
2061
0
                U08 *pu1_cost_shifts_for_sorted_node;
2062
2063
0
                U08 u1_pred_dir_of_cur_ref = !ps_search_results->pu1_is_past[i4_ref_id];
2064
2065
0
                if(u1_pred_dir_of_cur_ref)
2066
0
                {
2067
0
                    pu4_num_results_updated = &u4_num_l1_results_updated;
2068
0
                    pps_result_nodes = &aps_result_nodes_sorted[1][0];
2069
0
                    pu1_cost_shifts_for_sorted_node = &au1_cost_shifts_for_sorted_node[1][0];
2070
0
                }
2071
0
                else
2072
0
                {
2073
0
                    pu4_num_results_updated = &u4_num_l0_results_updated;
2074
0
                    pps_result_nodes = &aps_result_nodes_sorted[0][0];
2075
0
                    pu1_cost_shifts_for_sorted_node = &au1_cost_shifts_for_sorted_node[1][0];
2076
0
                }
2077
2078
0
                ps_search_node_8x8 = ps_search_results->aps_part_results[i4_ref_id][PART_ID_2Nx2N];
2079
2080
0
                ps_search_node_4x4 =
2081
0
                    ps_search_results->aps_part_results[i4_ref_id][PART_ID_NxN_TL + i];
2082
2083
0
                for(i4_j = 0; i4_j < ps_layer_mv->i4_num_mvs_per_ref; i4_j++)
2084
0
                {
2085
0
                    hme_add_new_node_to_a_sorted_array(
2086
0
                        &ps_search_node_4x4[i4_j],
2087
0
                        pps_result_nodes,
2088
0
                        pu1_cost_shifts_for_sorted_node,
2089
0
                        *pu4_num_results_updated,
2090
0
                        0);
2091
2092
0
                    (*pu4_num_results_updated)++;
2093
2094
0
                    hme_add_new_node_to_a_sorted_array(
2095
0
                        &ps_search_node_8x8[i4_j],
2096
0
                        pps_result_nodes,
2097
0
                        pu1_cost_shifts_for_sorted_node,
2098
0
                        *pu4_num_results_updated,
2099
0
                        2);
2100
2101
0
                    (*pu4_num_results_updated)++;
2102
0
                }
2103
0
            }
2104
2105
0
            switch(i)
2106
0
            {
2107
0
            case 0:
2108
0
            {
2109
0
                ps_mv_l0_root = ps_mv1_l0_root;
2110
0
                ps_mv_l1_root = ps_mv1_l1_root;
2111
2112
0
                pi1_ref_idx_l0_root = pi1_ref_idx1_l0_root;
2113
0
                pi1_ref_idx_l1_root = pi1_ref_idx1_l1_root;
2114
2115
0
                break;
2116
0
            }
2117
0
            case 1:
2118
0
            {
2119
0
                ps_mv_l0_root = ps_mv2_l0_root;
2120
0
                ps_mv_l1_root = ps_mv2_l1_root;
2121
2122
0
                pi1_ref_idx_l0_root = pi1_ref_idx2_l0_root;
2123
0
                pi1_ref_idx_l1_root = pi1_ref_idx2_l1_root;
2124
2125
0
                break;
2126
0
            }
2127
0
            case 2:
2128
0
            {
2129
0
                ps_mv_l0_root = ps_mv3_l0_root;
2130
0
                ps_mv_l1_root = ps_mv3_l1_root;
2131
2132
0
                pi1_ref_idx_l0_root = pi1_ref_idx3_l0_root;
2133
0
                pi1_ref_idx_l1_root = pi1_ref_idx3_l1_root;
2134
2135
0
                break;
2136
0
            }
2137
0
            case 3:
2138
0
            {
2139
0
                ps_mv_l0_root = ps_mv4_l0_root;
2140
0
                ps_mv_l1_root = ps_mv4_l1_root;
2141
2142
0
                pi1_ref_idx_l0_root = pi1_ref_idx4_l0_root;
2143
0
                pi1_ref_idx_l1_root = pi1_ref_idx4_l1_root;
2144
2145
0
                break;
2146
0
            }
2147
0
            }
2148
2149
0
            u4_num_l0_results_updated =
2150
0
                MIN((S32)u4_num_l0_results_updated,
2151
0
                    ps_prms->i4_num_active_ref_l0 * ps_layer_mv->i4_num_mvs_per_ref);
2152
2153
0
            u4_num_l1_results_updated =
2154
0
                MIN((S32)u4_num_l1_results_updated,
2155
0
                    ps_prms->i4_num_active_ref_l1 * ps_layer_mv->i4_num_mvs_per_ref);
2156
2157
0
            for(i4_j = 0; i4_j < (S32)u4_num_l0_results_updated; i4_j++)
2158
0
            {
2159
0
                COPY_SEARCH_RESULT(
2160
0
                    &ps_mv_l0_root[i4_j],
2161
0
                    &pi1_ref_idx_l0_root[i4_j],
2162
0
                    aps_result_nodes_sorted[0][i4_j],
2163
0
                    0);
2164
0
            }
2165
2166
0
            for(i4_j = 0; i4_j < (S32)u4_num_l1_results_updated; i4_j++)
2167
0
            {
2168
0
                COPY_SEARCH_RESULT(
2169
0
                    &ps_mv_l1_root[i4_j],
2170
0
                    &pi1_ref_idx_l1_root[i4_j],
2171
0
                    aps_result_nodes_sorted[1][i4_j],
2172
0
                    0);
2173
0
            }
2174
0
        }
2175
0
    }
2176
0
}
2177
2178
/**
2179
******************************************************************************
2180
*  @brief Scales motion vector component projecte from a diff layer in same
2181
*         picture (so no ref id related delta poc scaling required)
2182
******************************************************************************
2183
*/
2184
2185
#define SCALE_MV_COMP_RES(mvcomp_p, dim_c, dim_p)                                                  \
2186
0
    ((((mvcomp_p) * (dim_c)) + ((SIGN((mvcomp_p)) * (dim_p)) >> 1)) / (dim_p))
2187
/**
2188
********************************************************************************
2189
*  @fn     hme_project_coloc_candt(search_node_t *ps_search_node,
2190
*                                   layer_ctxt_t *ps_curr_layer,
2191
*                                   layer_ctxt_t *ps_coarse_layer,
2192
*                                   S32 i4_pos_x,
2193
*                                   S32 i4_pos_y,
2194
*                                   S08 i1_ref_id,
2195
*                                   S08 i1_result_id)
2196
*
2197
*  @brief  From a coarser layer, projects a candidated situated at "colocated"
2198
*          position in the picture (e.g. given x, y it will be x/2, y/2 dyadic
2199
*
2200
*  @param[out]  ps_search_node : contains the projected result
2201
*
2202
*  @param[in]   ps_curr_layer : current layer context
2203
*
2204
*  @param[in]   ps_coarse_layer  : coarser layer context
2205
*
2206
*  @param[in]   i4_pos_x  : x Position where mv is required (w.r.t. curr layer)
2207
*
2208
*  @param[in]   i4_pos_y  : y Position where mv is required (w.r.t. curr layer)
2209
*
2210
*  @param[in]   i1_ref_id : reference id for which the candidate required
2211
*
2212
*  @param[in]   i4_result_id : result id for which the candidate required
2213
*                              (0 : best result, 1 : next best)
2214
*
2215
*  @return None
2216
********************************************************************************
2217
*/
2218
2219
void hme_project_coloc_candt(
2220
    search_node_t *ps_search_node,
2221
    layer_ctxt_t *ps_curr_layer,
2222
    layer_ctxt_t *ps_coarse_layer,
2223
    S32 i4_pos_x,
2224
    S32 i4_pos_y,
2225
    S08 i1_ref_id,
2226
    S32 i4_result_id)
2227
0
{
2228
0
    S32 wd_c, ht_c, wd_p, ht_p;
2229
0
    S32 blksize_p, blk_x, blk_y, i4_offset;
2230
0
    layer_mv_t *ps_layer_mvbank;
2231
0
    hme_mv_t *ps_mv;
2232
0
    S08 *pi1_ref_idx;
2233
2234
    /* Width and ht of current and prev layers */
2235
0
    wd_c = ps_curr_layer->i4_wd;
2236
0
    ht_c = ps_curr_layer->i4_ht;
2237
0
    wd_p = ps_coarse_layer->i4_wd;
2238
0
    ht_p = ps_coarse_layer->i4_ht;
2239
2240
0
    ps_layer_mvbank = ps_coarse_layer->ps_layer_mvbank;
2241
0
    blksize_p = (S32)gau1_blk_size_to_wd[ps_layer_mvbank->e_blk_size];
2242
2243
    /* Safety check to avoid uninitialized access across temporal layers */
2244
0
    i4_pos_x = CLIP3(i4_pos_x, 0, (wd_c - blksize_p));
2245
0
    i4_pos_y = CLIP3(i4_pos_y, 0, (ht_c - blksize_p));
2246
2247
    /* Project the positions to prev layer */
2248
    /* TODO: convert these to scale factors at pic level */
2249
0
    blk_x = (i4_pos_x * wd_p) / (wd_c * blksize_p);
2250
0
    blk_y = (i4_pos_y * ht_p) / (ht_c * blksize_p);
2251
2252
    /* Pick up the mvs from the location */
2253
0
    i4_offset = (blk_x * ps_layer_mvbank->i4_num_mvs_per_blk);
2254
0
    i4_offset += (ps_layer_mvbank->i4_num_mvs_per_row * blk_y);
2255
2256
0
    ps_mv = ps_layer_mvbank->ps_mv + i4_offset;
2257
0
    pi1_ref_idx = ps_layer_mvbank->pi1_ref_idx + i4_offset;
2258
2259
0
    ps_mv += (i1_ref_id * ps_layer_mvbank->i4_num_mvs_per_ref);
2260
0
    pi1_ref_idx += (i1_ref_id * ps_layer_mvbank->i4_num_mvs_per_ref);
2261
2262
0
    ps_search_node->s_mv.i2_mvx = SCALE_MV_COMP_RES(ps_mv[i4_result_id].i2_mv_x, wd_c, wd_p);
2263
0
    ps_search_node->s_mv.i2_mvy = SCALE_MV_COMP_RES(ps_mv[i4_result_id].i2_mv_y, ht_c, ht_p);
2264
0
    ps_search_node->i1_ref_idx = pi1_ref_idx[i4_result_id];
2265
0
    ps_search_node->u1_subpel_done = 0;
2266
0
    if((ps_search_node->i1_ref_idx < 0) || (ps_search_node->s_mv.i2_mvx == INTRA_MV))
2267
0
    {
2268
0
        ps_search_node->i1_ref_idx = i1_ref_id;
2269
0
        ps_search_node->s_mv.i2_mvx = 0;
2270
0
        ps_search_node->s_mv.i2_mvy = 0;
2271
0
    }
2272
0
}
2273
2274
/**
2275
********************************************************************************
2276
*  @fn     hme_project_coloc_candt_dyadic(search_node_t *ps_search_node,
2277
*                                   layer_ctxt_t *ps_curr_layer,
2278
*                                   layer_ctxt_t *ps_coarse_layer,
2279
*                                   S32 i4_pos_x,
2280
*                                   S32 i4_pos_y,
2281
*                                   S08 i1_ref_id,
2282
*                                   S08 i1_result_id)
2283
*
2284
*  @brief  From a coarser layer, projects a candidated situated at "colocated"
2285
*          position in the picture when the ratios are dyadic
2286
*
2287
*  @param[out]  ps_search_node : contains the projected result
2288
*
2289
*  @param[in]   ps_curr_layer : current layer context
2290
*
2291
*  @param[in]   ps_coarse_layer  : coarser layer context
2292
*
2293
*  @param[in]   i4_pos_x  : x Position where mv is required (w.r.t. curr layer)
2294
*
2295
*  @param[in]   i4_pos_y  : y Position where mv is required (w.r.t. curr layer)
2296
*
2297
*  @param[in]   i1_ref_id : reference id for which the candidate required
2298
*
2299
*  @param[in]   i4_result_id : result id for which the candidate required
2300
*                              (0 : best result, 1 : next best)
2301
*
2302
*  @return None
2303
********************************************************************************
2304
*/
2305
2306
void hme_project_coloc_candt_dyadic(
2307
    search_node_t *ps_search_node,
2308
    layer_ctxt_t *ps_curr_layer,
2309
    layer_ctxt_t *ps_coarse_layer,
2310
    S32 i4_pos_x,
2311
    S32 i4_pos_y,
2312
    S08 i1_ref_id,
2313
    S32 i4_result_id)
2314
0
{
2315
0
    S32 wd_c, ht_c, wd_p, ht_p;
2316
0
    S32 blksize_p, blk_x, blk_y, i4_offset;
2317
0
    layer_mv_t *ps_layer_mvbank;
2318
0
    hme_mv_t *ps_mv;
2319
0
    S08 *pi1_ref_idx;
2320
2321
    /* Width and ht of current and prev layers */
2322
0
    wd_c = ps_curr_layer->i4_wd;
2323
0
    ht_c = ps_curr_layer->i4_ht;
2324
0
    wd_p = ps_coarse_layer->i4_wd;
2325
0
    ht_p = ps_coarse_layer->i4_ht;
2326
2327
0
    ps_layer_mvbank = ps_coarse_layer->ps_layer_mvbank;
2328
    /* blksize_p = log2(wd) + 1 */
2329
0
    blksize_p = (S32)gau1_blk_size_to_wd_shift[ps_layer_mvbank->e_blk_size];
2330
2331
    /* ASSERT for valid sizes */
2332
0
    ASSERT((blksize_p == 3) || (blksize_p == 4) || (blksize_p == 5));
2333
2334
    /* Safety check to avoid uninitialized access across temporal layers */
2335
0
    i4_pos_x = CLIP3(i4_pos_x, 0, (wd_c - blksize_p));
2336
0
    i4_pos_y = CLIP3(i4_pos_y, 0, (ht_c - blksize_p));
2337
2338
    /* Project the positions to prev layer */
2339
    /* TODO: convert these to scale factors at pic level */
2340
0
    blk_x = i4_pos_x >> blksize_p;  // (2 * blksize_p);
2341
0
    blk_y = i4_pos_y >> blksize_p;  // (2 * blksize_p);
2342
2343
    /* Pick up the mvs from the location */
2344
0
    i4_offset = (blk_x * ps_layer_mvbank->i4_num_mvs_per_blk);
2345
0
    i4_offset += (ps_layer_mvbank->i4_num_mvs_per_row * blk_y);
2346
2347
0
    ps_mv = ps_layer_mvbank->ps_mv + i4_offset;
2348
0
    pi1_ref_idx = ps_layer_mvbank->pi1_ref_idx + i4_offset;
2349
2350
0
    ps_mv += (i1_ref_id * ps_layer_mvbank->i4_num_mvs_per_ref);
2351
0
    pi1_ref_idx += (i1_ref_id * ps_layer_mvbank->i4_num_mvs_per_ref);
2352
2353
0
    ps_search_node->s_mv.i2_mvx = ps_mv[i4_result_id].i2_mv_x << 1;
2354
0
    ps_search_node->s_mv.i2_mvy = ps_mv[i4_result_id].i2_mv_y << 1;
2355
0
    ps_search_node->i1_ref_idx = pi1_ref_idx[i4_result_id];
2356
0
    if((ps_search_node->i1_ref_idx < 0) || (ps_search_node->s_mv.i2_mvx == INTRA_MV))
2357
0
    {
2358
0
        ps_search_node->i1_ref_idx = i1_ref_id;
2359
0
        ps_search_node->s_mv.i2_mvx = 0;
2360
0
        ps_search_node->s_mv.i2_mvy = 0;
2361
0
    }
2362
0
}
2363
2364
void hme_project_coloc_candt_dyadic_implicit(
2365
    search_node_t *ps_search_node,
2366
    layer_ctxt_t *ps_curr_layer,
2367
    layer_ctxt_t *ps_coarse_layer,
2368
    S32 i4_pos_x,
2369
    S32 i4_pos_y,
2370
    S32 i4_num_act_ref_l0,
2371
    U08 u1_pred_dir,
2372
    U08 u1_default_ref_id,
2373
    S32 i4_result_id)
2374
0
{
2375
0
    S32 wd_c, ht_c, wd_p, ht_p;
2376
0
    S32 blksize_p, blk_x, blk_y, i4_offset;
2377
0
    layer_mv_t *ps_layer_mvbank;
2378
0
    hme_mv_t *ps_mv;
2379
0
    S08 *pi1_ref_idx;
2380
2381
    /* Width and ht of current and prev layers */
2382
0
    wd_c = ps_curr_layer->i4_wd;
2383
0
    ht_c = ps_curr_layer->i4_ht;
2384
0
    wd_p = ps_coarse_layer->i4_wd;
2385
0
    ht_p = ps_coarse_layer->i4_ht;
2386
2387
0
    ps_layer_mvbank = ps_coarse_layer->ps_layer_mvbank;
2388
0
    blksize_p = (S32)gau1_blk_size_to_wd_shift[ps_layer_mvbank->e_blk_size];
2389
2390
    /* ASSERT for valid sizes */
2391
0
    ASSERT((blksize_p == 3) || (blksize_p == 4) || (blksize_p == 5));
2392
2393
    /* Safety check to avoid uninitialized access across temporal layers */
2394
0
    i4_pos_x = CLIP3(i4_pos_x, 0, (wd_c - blksize_p));
2395
0
    i4_pos_y = CLIP3(i4_pos_y, 0, (ht_c - blksize_p));
2396
    /* Project the positions to prev layer */
2397
    /* TODO: convert these to scale factors at pic level */
2398
0
    blk_x = i4_pos_x >> blksize_p;  // (2 * blksize_p);
2399
0
    blk_y = i4_pos_y >> blksize_p;  // (2 * blksize_p);
2400
2401
    /* Pick up the mvs from the location */
2402
0
    i4_offset = (blk_x * ps_layer_mvbank->i4_num_mvs_per_blk);
2403
0
    i4_offset += (ps_layer_mvbank->i4_num_mvs_per_row * blk_y);
2404
2405
0
    ps_mv = ps_layer_mvbank->ps_mv + i4_offset;
2406
0
    pi1_ref_idx = ps_layer_mvbank->pi1_ref_idx + i4_offset;
2407
2408
0
    if(u1_pred_dir == 1)
2409
0
    {
2410
0
        ps_mv += (i4_num_act_ref_l0 * ps_layer_mvbank->i4_num_mvs_per_ref);
2411
0
        pi1_ref_idx += (i4_num_act_ref_l0 * ps_layer_mvbank->i4_num_mvs_per_ref);
2412
0
    }
2413
2414
0
    ps_search_node->s_mv.i2_mvx = ps_mv[i4_result_id].i2_mv_x << 1;
2415
0
    ps_search_node->s_mv.i2_mvy = ps_mv[i4_result_id].i2_mv_y << 1;
2416
0
    ps_search_node->i1_ref_idx = pi1_ref_idx[i4_result_id];
2417
0
    if((ps_search_node->i1_ref_idx < 0) || (ps_search_node->s_mv.i2_mvx == INTRA_MV))
2418
0
    {
2419
0
        ps_search_node->i1_ref_idx = u1_default_ref_id;
2420
0
        ps_search_node->s_mv.i2_mvx = 0;
2421
0
        ps_search_node->s_mv.i2_mvy = 0;
2422
0
    }
2423
0
}
2424
2425
#define SCALE_RANGE_PRMS(prm1, prm2, shift)                                                        \
2426
0
    {                                                                                              \
2427
0
        prm1.i2_min_x = prm2.i2_min_x << shift;                                                    \
2428
0
        prm1.i2_max_x = prm2.i2_max_x << shift;                                                    \
2429
0
        prm1.i2_min_y = prm2.i2_min_y << shift;                                                    \
2430
0
        prm1.i2_max_y = prm2.i2_max_y << shift;                                                    \
2431
0
    }
2432
2433
#define SCALE_RANGE_PRMS_POINTERS(prm1, prm2, shift)                                               \
2434
0
    {                                                                                              \
2435
0
        prm1->i2_min_x = prm2->i2_min_x << shift;                                                  \
2436
0
        prm1->i2_max_x = prm2->i2_max_x << shift;                                                  \
2437
0
        prm1->i2_min_y = prm2->i2_min_y << shift;                                                  \
2438
0
        prm1->i2_max_y = prm2->i2_max_y << shift;                                                  \
2439
0
    }
2440
2441
/**
2442
********************************************************************************
2443
*  @fn   void hme_refine_frm_init(me_ctxt_t *ps_ctxt,
2444
*                       refine_layer_prms_t *ps_refine_prms)
2445
*
2446
*  @brief  Frame init of refinemnet layers in ME
2447
*
2448
*  @param[in,out]  ps_ctxt: ME Handle
2449
*
2450
*  @param[in]  ps_refine_prms : refinement layer prms
2451
*
2452
*  @return None
2453
********************************************************************************
2454
*/
2455
void hme_refine_frm_init(
2456
    layer_ctxt_t *ps_curr_layer, refine_prms_t *ps_refine_prms, layer_ctxt_t *ps_coarse_layer)
2457
0
{
2458
    /* local variables */
2459
0
    BLK_SIZE_T e_result_blk_size = BLK_8x8;
2460
0
    S32 i4_num_ref_fpel, i4_num_ref_prev_layer;
2461
2462
0
    i4_num_ref_prev_layer = ps_coarse_layer->ps_layer_mvbank->i4_num_ref;
2463
2464
0
    if(ps_refine_prms->explicit_ref)
2465
0
    {
2466
0
        i4_num_ref_fpel = i4_num_ref_prev_layer;
2467
0
    }
2468
0
    else
2469
0
    {
2470
0
        i4_num_ref_fpel = 2;
2471
0
    }
2472
2473
0
    if(ps_refine_prms->i4_enable_4x4_part)
2474
0
    {
2475
0
        e_result_blk_size = BLK_4x4;
2476
0
    }
2477
2478
0
    i4_num_ref_fpel = MIN(i4_num_ref_fpel, i4_num_ref_prev_layer);
2479
2480
0
    hme_init_mv_bank(
2481
0
        ps_curr_layer,
2482
0
        e_result_blk_size,
2483
0
        i4_num_ref_fpel,
2484
0
        ps_refine_prms->i4_num_mvbank_results,
2485
0
        ps_refine_prms->i4_layer_id > 0 ? 0 : 1);
2486
0
}
2487
2488
#if 1  //ENABLE_CU_RECURSION || TEST_AND_EVALUATE_CU_RECURSION
2489
/**
2490
********************************************************************************
2491
*  @fn   void hme_init_clusters_16x16
2492
*               (
2493
*                   cluster_16x16_blk_t *ps_cluster_blk_16x16
2494
*               )
2495
*
2496
*  @brief  Intialisations for the structs used in clustering algorithm
2497
*
2498
*  @param[in/out]  ps_cluster_blk_16x16: pointer to structure containing clusters
2499
*                                        of 16x16 block
2500
*
2501
*  @return None
2502
********************************************************************************
2503
*/
2504
static __inline void
2505
    hme_init_clusters_16x16(cluster_16x16_blk_t *ps_cluster_blk_16x16, S32 bidir_enabled)
2506
0
{
2507
0
    S32 i;
2508
2509
0
    ps_cluster_blk_16x16->num_clusters = 0;
2510
0
    ps_cluster_blk_16x16->intra_mv_area = 0;
2511
0
    ps_cluster_blk_16x16->best_inter_cost = 0;
2512
2513
0
    for(i = 0; i < MAX_NUM_CLUSTERS_16x16; i++)
2514
0
    {
2515
0
        ps_cluster_blk_16x16->as_cluster_data[i].max_dist_from_centroid =
2516
0
            bidir_enabled ? MAX_DISTANCE_FROM_CENTROID_16x16_B : MAX_DISTANCE_FROM_CENTROID_16x16;
2517
2518
0
        ps_cluster_blk_16x16->as_cluster_data[i].is_valid_cluster = 0;
2519
2520
0
        ps_cluster_blk_16x16->as_cluster_data[i].bi_mv_pixel_area = 0;
2521
0
        ps_cluster_blk_16x16->as_cluster_data[i].uni_mv_pixel_area = 0;
2522
0
    }
2523
0
    for(i = 0; i < MAX_NUM_REF; i++)
2524
0
    {
2525
0
        ps_cluster_blk_16x16->au1_num_clusters[i] = 0;
2526
0
    }
2527
0
}
2528
2529
/**
2530
********************************************************************************
2531
*  @fn   void hme_init_clusters_32x32
2532
*               (
2533
*                   cluster_32x32_blk_t *ps_cluster_blk_32x32
2534
*               )
2535
*
2536
*  @brief  Intialisations for the structs used in clustering algorithm
2537
*
2538
*  @param[in/out]  ps_cluster_blk_32x32: pointer to structure containing clusters
2539
*                                        of 32x32 block
2540
*
2541
*  @return None
2542
********************************************************************************
2543
*/
2544
static __inline void
2545
    hme_init_clusters_32x32(cluster_32x32_blk_t *ps_cluster_blk_32x32, S32 bidir_enabled)
2546
0
{
2547
0
    S32 i;
2548
2549
0
    ps_cluster_blk_32x32->num_clusters = 0;
2550
0
    ps_cluster_blk_32x32->intra_mv_area = 0;
2551
0
    ps_cluster_blk_32x32->best_alt_ref = -1;
2552
0
    ps_cluster_blk_32x32->best_uni_ref = -1;
2553
0
    ps_cluster_blk_32x32->best_inter_cost = 0;
2554
0
    ps_cluster_blk_32x32->num_clusters_with_weak_sdi_density = 0;
2555
2556
0
    for(i = 0; i < MAX_NUM_CLUSTERS_32x32; i++)
2557
0
    {
2558
0
        ps_cluster_blk_32x32->as_cluster_data[i].max_dist_from_centroid =
2559
0
            bidir_enabled ? MAX_DISTANCE_FROM_CENTROID_32x32_B : MAX_DISTANCE_FROM_CENTROID_32x32;
2560
0
        ps_cluster_blk_32x32->as_cluster_data[i].is_valid_cluster = 0;
2561
2562
0
        ps_cluster_blk_32x32->as_cluster_data[i].bi_mv_pixel_area = 0;
2563
0
        ps_cluster_blk_32x32->as_cluster_data[i].uni_mv_pixel_area = 0;
2564
0
    }
2565
0
    for(i = 0; i < MAX_NUM_REF; i++)
2566
0
    {
2567
0
        ps_cluster_blk_32x32->au1_num_clusters[i] = 0;
2568
0
    }
2569
0
}
2570
2571
/**
2572
********************************************************************************
2573
*  @fn   void hme_init_clusters_64x64
2574
*               (
2575
*                   cluster_64x64_blk_t *ps_cluster_blk_64x64
2576
*               )
2577
*
2578
*  @brief  Intialisations for the structs used in clustering algorithm
2579
*
2580
*  @param[in/out]  ps_cluster_blk_64x64: pointer to structure containing clusters
2581
*                                        of 64x64 block
2582
*
2583
*  @return None
2584
********************************************************************************
2585
*/
2586
static __inline void
2587
    hme_init_clusters_64x64(cluster_64x64_blk_t *ps_cluster_blk_64x64, S32 bidir_enabled)
2588
0
{
2589
0
    S32 i;
2590
2591
0
    ps_cluster_blk_64x64->num_clusters = 0;
2592
0
    ps_cluster_blk_64x64->intra_mv_area = 0;
2593
0
    ps_cluster_blk_64x64->best_alt_ref = -1;
2594
0
    ps_cluster_blk_64x64->best_uni_ref = -1;
2595
0
    ps_cluster_blk_64x64->best_inter_cost = 0;
2596
2597
0
    for(i = 0; i < MAX_NUM_CLUSTERS_64x64; i++)
2598
0
    {
2599
0
        ps_cluster_blk_64x64->as_cluster_data[i].max_dist_from_centroid =
2600
0
            bidir_enabled ? MAX_DISTANCE_FROM_CENTROID_64x64_B : MAX_DISTANCE_FROM_CENTROID_64x64;
2601
0
        ps_cluster_blk_64x64->as_cluster_data[i].is_valid_cluster = 0;
2602
2603
0
        ps_cluster_blk_64x64->as_cluster_data[i].bi_mv_pixel_area = 0;
2604
0
        ps_cluster_blk_64x64->as_cluster_data[i].uni_mv_pixel_area = 0;
2605
0
    }
2606
0
    for(i = 0; i < MAX_NUM_REF; i++)
2607
0
    {
2608
0
        ps_cluster_blk_64x64->au1_num_clusters[i] = 0;
2609
0
    }
2610
0
}
2611
2612
/**
2613
********************************************************************************
2614
*  @fn   void hme_sort_and_assign_top_ref_ids_areawise
2615
*               (
2616
*                   ctb_cluster_info_t *ps_ctb_cluster_info
2617
*               )
2618
*
2619
*  @brief  Finds best_uni_ref and best_alt_ref
2620
*
2621
*  @param[in/out]  ps_ctb_cluster_info: structure that points to ctb data
2622
*
2623
*  @param[in]  bidir_enabled: flag that indicates whether or not bi-pred is
2624
*                             enabled
2625
*
2626
*  @param[in]  block_width: width of the block in pels
2627
*
2628
*  @param[in]  e_cu_pos: position of the block within the CTB
2629
*
2630
*  @return None
2631
********************************************************************************
2632
*/
2633
void hme_sort_and_assign_top_ref_ids_areawise(
2634
    ctb_cluster_info_t *ps_ctb_cluster_info, S32 bidir_enabled, S32 block_width, CU_POS_T e_cu_pos)
2635
0
{
2636
0
    cluster_32x32_blk_t *ps_32x32 = NULL;
2637
0
    cluster_64x64_blk_t *ps_64x64 = NULL;
2638
0
    cluster_data_t *ps_data;
2639
2640
0
    S32 j, k;
2641
2642
0
    S32 ai4_uni_area[MAX_NUM_REF];
2643
0
    S32 ai4_bi_area[MAX_NUM_REF];
2644
0
    S32 ai4_ref_id_found[MAX_NUM_REF];
2645
0
    S32 ai4_ref_id[MAX_NUM_REF];
2646
2647
0
    S32 best_uni_ref = -1, best_alt_ref = -1;
2648
0
    S32 num_clusters;
2649
0
    S32 num_ref = 0;
2650
0
    S32 num_clusters_evaluated = 0;
2651
0
    S32 is_cur_blk_valid;
2652
2653
0
    if(32 == block_width)
2654
0
    {
2655
0
        is_cur_blk_valid = (ps_ctb_cluster_info->blk_32x32_mask & (1 << e_cu_pos)) || 0;
2656
0
        ps_32x32 = &ps_ctb_cluster_info->ps_32x32_blk[e_cu_pos];
2657
0
        num_clusters = ps_32x32->num_clusters;
2658
0
        ps_data = &ps_32x32->as_cluster_data[0];
2659
0
    }
2660
0
    else
2661
0
    {
2662
0
        is_cur_blk_valid = (ps_ctb_cluster_info->blk_32x32_mask == 0xf);
2663
0
        ps_64x64 = ps_ctb_cluster_info->ps_64x64_blk;
2664
0
        num_clusters = ps_64x64->num_clusters;
2665
0
        ps_data = &ps_64x64->as_cluster_data[0];
2666
0
    }
2667
2668
#if !ENABLE_4CTB_EVALUATION
2669
    if((num_clusters > MAX_NUM_CLUSTERS_IN_VALID_32x32_BLK))
2670
    {
2671
        return;
2672
    }
2673
#endif
2674
0
    if(num_clusters == 0)
2675
0
    {
2676
0
        return;
2677
0
    }
2678
0
    else if(!is_cur_blk_valid)
2679
0
    {
2680
0
        return;
2681
0
    }
2682
2683
0
    memset(ai4_uni_area, 0, sizeof(S32) * MAX_NUM_REF);
2684
0
    memset(ai4_bi_area, 0, sizeof(S32) * MAX_NUM_REF);
2685
0
    memset(ai4_ref_id_found, 0, sizeof(S32) * MAX_NUM_REF);
2686
0
    memset(ai4_ref_id, -1, sizeof(S32) * MAX_NUM_REF);
2687
2688
0
    for(j = 0; num_clusters_evaluated < num_clusters; j++, ps_data++)
2689
0
    {
2690
0
        S32 ref_id;
2691
2692
0
        if(!ps_data->is_valid_cluster)
2693
0
        {
2694
0
            continue;
2695
0
        }
2696
2697
0
        ref_id = ps_data->ref_id;
2698
2699
0
        num_clusters_evaluated++;
2700
2701
0
        ai4_uni_area[ref_id] += ps_data->uni_mv_pixel_area;
2702
0
        ai4_bi_area[ref_id] += ps_data->bi_mv_pixel_area;
2703
2704
0
        if(!ai4_ref_id_found[ref_id])
2705
0
        {
2706
0
            ai4_ref_id[ref_id] = ref_id;
2707
0
            ai4_ref_id_found[ref_id] = 1;
2708
0
            num_ref++;
2709
0
        }
2710
0
    }
2711
2712
0
    {
2713
0
        S32 ai4_ref_id_temp[MAX_NUM_REF];
2714
2715
0
        memcpy(ai4_ref_id_temp, ai4_ref_id, sizeof(S32) * MAX_NUM_REF);
2716
2717
0
        for(k = 1; k < MAX_NUM_REF; k++)
2718
0
        {
2719
0
            if(ai4_uni_area[k] > ai4_uni_area[0])
2720
0
            {
2721
0
                SWAP_HME(ai4_uni_area[k], ai4_uni_area[0], S32);
2722
0
                SWAP_HME(ai4_ref_id_temp[k], ai4_ref_id_temp[0], S32);
2723
0
            }
2724
0
        }
2725
2726
0
        best_uni_ref = ai4_ref_id_temp[0];
2727
0
    }
2728
2729
0
    if(bidir_enabled)
2730
0
    {
2731
0
        for(k = 1; k < MAX_NUM_REF; k++)
2732
0
        {
2733
0
            if(ai4_bi_area[k] > ai4_bi_area[0])
2734
0
            {
2735
0
                SWAP_HME(ai4_bi_area[k], ai4_bi_area[0], S32);
2736
0
                SWAP_HME(ai4_ref_id[k], ai4_ref_id[0], S32);
2737
0
            }
2738
0
        }
2739
2740
0
        if(!ai4_bi_area[0])
2741
0
        {
2742
0
            best_alt_ref = -1;
2743
2744
0
            if(32 == block_width)
2745
0
            {
2746
0
                SET_VALUES_FOR_TOP_REF_IDS(ps_32x32, best_uni_ref, best_alt_ref, num_ref);
2747
0
            }
2748
0
            else
2749
0
            {
2750
0
                SET_VALUES_FOR_TOP_REF_IDS(ps_64x64, best_uni_ref, best_alt_ref, num_ref);
2751
0
            }
2752
2753
0
            return;
2754
0
        }
2755
2756
0
        if(best_uni_ref == ai4_ref_id[0])
2757
0
        {
2758
0
            for(k = 2; k < MAX_NUM_REF; k++)
2759
0
            {
2760
0
                if(ai4_bi_area[k] > ai4_bi_area[1])
2761
0
                {
2762
0
                    SWAP_HME(ai4_bi_area[k], ai4_bi_area[1], S32);
2763
0
                    SWAP_HME(ai4_ref_id[k], ai4_ref_id[1], S32);
2764
0
                }
2765
0
            }
2766
2767
0
            best_alt_ref = ai4_ref_id[1];
2768
0
        }
2769
0
        else
2770
0
        {
2771
0
            best_alt_ref = ai4_ref_id[0];
2772
0
        }
2773
0
    }
2774
2775
0
    if(32 == block_width)
2776
0
    {
2777
0
        SET_VALUES_FOR_TOP_REF_IDS(ps_32x32, best_uni_ref, best_alt_ref, num_ref);
2778
0
    }
2779
0
    else
2780
0
    {
2781
0
        SET_VALUES_FOR_TOP_REF_IDS(ps_64x64, best_uni_ref, best_alt_ref, num_ref);
2782
0
    }
2783
0
}
2784
2785
/**
2786
********************************************************************************
2787
*  @fn   void hme_find_top_ref_ids
2788
*               (
2789
*                   ctb_cluster_info_t *ps_ctb_cluster_info
2790
*               )
2791
*
2792
*  @brief  Finds best_uni_ref and best_alt_ref
2793
*
2794
*  @param[in/out]  ps_ctb_cluster_info: structure that points to ctb data
2795
*
2796
*  @return None
2797
********************************************************************************
2798
*/
2799
void hme_find_top_ref_ids(
2800
    ctb_cluster_info_t *ps_ctb_cluster_info, S32 bidir_enabled, S32 block_width)
2801
0
{
2802
0
    S32 i;
2803
2804
0
    if(32 == block_width)
2805
0
    {
2806
0
        for(i = 0; i < 4; i++)
2807
0
        {
2808
0
            hme_sort_and_assign_top_ref_ids_areawise(
2809
0
                ps_ctb_cluster_info, bidir_enabled, block_width, (CU_POS_T)i);
2810
0
        }
2811
0
    }
2812
0
    else if(64 == block_width)
2813
0
    {
2814
0
        hme_sort_and_assign_top_ref_ids_areawise(
2815
0
            ps_ctb_cluster_info, bidir_enabled, block_width, POS_NA);
2816
0
    }
2817
0
}
2818
2819
/**
2820
********************************************************************************
2821
*  @fn   void hme_boot_out_outlier
2822
*               (
2823
*                   ctb_cluster_info_t *ps_ctb_cluster_info
2824
*               )
2825
*
2826
*  @brief  Removes outlier clusters before CU tree population
2827
*
2828
*  @param[in/out]  ps_ctb_cluster_info: structure that points to ctb data
2829
*
2830
*  @return None
2831
********************************************************************************
2832
*/
2833
void hme_boot_out_outlier(ctb_cluster_info_t *ps_ctb_cluster_info, S32 blk_width)
2834
0
{
2835
0
    cluster_32x32_blk_t *ps_32x32;
2836
2837
0
    S32 i;
2838
2839
0
    cluster_64x64_blk_t *ps_64x64 = &ps_ctb_cluster_info->ps_64x64_blk[0];
2840
2841
0
    S32 sdi_threshold = ps_ctb_cluster_info->sdi_threshold;
2842
2843
0
    if(32 == blk_width)
2844
0
    {
2845
        /* 32x32 clusters */
2846
0
        for(i = 0; i < 4; i++)
2847
0
        {
2848
0
            ps_32x32 = &ps_ctb_cluster_info->ps_32x32_blk[i];
2849
2850
0
            if(ps_32x32->num_clusters > MAX_NUM_CLUSTERS_IN_ONE_REF_IDX)
2851
0
            {
2852
0
                BUMP_OUTLIER_CLUSTERS(ps_32x32, sdi_threshold);
2853
0
            }
2854
0
        }
2855
0
    }
2856
0
    else if(64 == blk_width)
2857
0
    {
2858
        /* 64x64 clusters */
2859
0
        if(ps_64x64->num_clusters > MAX_NUM_CLUSTERS_IN_ONE_REF_IDX)
2860
0
        {
2861
0
            BUMP_OUTLIER_CLUSTERS(ps_64x64, sdi_threshold);
2862
0
        }
2863
0
    }
2864
0
}
2865
2866
/**
2867
********************************************************************************
2868
*  @fn   void hme_update_cluster_attributes
2869
*               (
2870
*                   cluster_data_t *ps_cluster_data,
2871
*                   S32 mvx,
2872
*                   S32 mvy,
2873
*                   PART_ID_T e_part_id
2874
*               )
2875
*
2876
*  @brief  Implementation fo the clustering algorithm
2877
*
2878
*  @param[in/out]  ps_cluster_data: pointer to cluster_data_t struct
2879
*
2880
*  @param[in]  mvx : x co-ordinate of the motion vector
2881
*
2882
*  @param[in]  mvy : y co-ordinate of the motion vector
2883
*
2884
*  @param[in]  ref_idx : ref_id of the motion vector
2885
*
2886
*  @param[in]  e_part_id : partition id of the motion vector
2887
*
2888
*  @return None
2889
********************************************************************************
2890
*/
2891
static __inline void hme_update_cluster_attributes(
2892
    cluster_data_t *ps_cluster_data,
2893
    S32 mvx,
2894
    S32 mvy,
2895
    S32 mvdx,
2896
    S32 mvdy,
2897
    S32 ref_id,
2898
    S32 sdi,
2899
    U08 is_part_of_bi,
2900
    PART_ID_T e_part_id)
2901
0
{
2902
0
    LWORD64 i8_mvx_sum_q8;
2903
0
    LWORD64 i8_mvy_sum_q8;
2904
2905
0
    S32 centroid_posx_q8 = ps_cluster_data->s_centroid.i4_pos_x_q8;
2906
0
    S32 centroid_posy_q8 = ps_cluster_data->s_centroid.i4_pos_y_q8;
2907
2908
0
    if((mvdx > 0) && (ps_cluster_data->min_x > mvx))
2909
0
    {
2910
0
        ps_cluster_data->min_x = mvx;
2911
0
    }
2912
0
    else if((mvdx < 0) && (ps_cluster_data->max_x < mvx))
2913
0
    {
2914
0
        ps_cluster_data->max_x = mvx;
2915
0
    }
2916
2917
0
    if((mvdy > 0) && (ps_cluster_data->min_y > mvy))
2918
0
    {
2919
0
        ps_cluster_data->min_y = mvy;
2920
0
    }
2921
0
    else if((mvdy < 0) && (ps_cluster_data->max_y < mvy))
2922
0
    {
2923
0
        ps_cluster_data->max_y = mvy;
2924
0
    }
2925
2926
0
    {
2927
0
        S32 num_mvs = ps_cluster_data->num_mvs;
2928
2929
0
        ps_cluster_data->as_mv[num_mvs].pixel_count = gai4_partition_area[e_part_id];
2930
0
        ps_cluster_data->as_mv[num_mvs].mvx = mvx;
2931
0
        ps_cluster_data->as_mv[num_mvs].mvy = mvy;
2932
2933
        /***************************/
2934
0
        ps_cluster_data->as_mv[num_mvs].is_uni = !is_part_of_bi;
2935
0
        ps_cluster_data->as_mv[num_mvs].sdi = sdi;
2936
        /**************************/
2937
0
    }
2938
2939
    /* Updation of centroid */
2940
0
    {
2941
0
        i8_mvx_sum_q8 = (LWORD64)centroid_posx_q8 * ps_cluster_data->num_mvs + (mvx << 8);
2942
0
        i8_mvy_sum_q8 = (LWORD64)centroid_posy_q8 * ps_cluster_data->num_mvs + (mvy << 8);
2943
2944
0
        ps_cluster_data->num_mvs++;
2945
2946
0
        ps_cluster_data->s_centroid.i4_pos_x_q8 =
2947
0
            (WORD32)((i8_mvx_sum_q8) / ps_cluster_data->num_mvs);
2948
0
        ps_cluster_data->s_centroid.i4_pos_y_q8 =
2949
0
            (WORD32)((i8_mvy_sum_q8) / ps_cluster_data->num_mvs);
2950
0
    }
2951
2952
0
    ps_cluster_data->area_in_pixels += gai4_partition_area[e_part_id];
2953
2954
0
    if(is_part_of_bi)
2955
0
    {
2956
0
        ps_cluster_data->bi_mv_pixel_area += gai4_partition_area[e_part_id];
2957
0
    }
2958
0
    else
2959
0
    {
2960
0
        ps_cluster_data->uni_mv_pixel_area += gai4_partition_area[e_part_id];
2961
0
    }
2962
0
}
2963
2964
/**
2965
********************************************************************************
2966
*  @fn   void hme_try_cluster_merge
2967
*               (
2968
*                   cluster_data_t *ps_cluster_data,
2969
*                   S32 *pi4_num_clusters,
2970
*                   S32 idx_of_updated_cluster
2971
*               )
2972
*
2973
*  @brief  Implementation fo the clustering algorithm
2974
*
2975
*  @param[in/out]  ps_cluster_data: pointer to cluster_data_t struct
2976
*
2977
*  @param[in/out]  pi4_num_clusters : pointer to number of clusters
2978
*
2979
*  @param[in]  idx_of_updated_cluster : index of the cluster most recently
2980
*                                       updated
2981
*
2982
*  @return Nothing
2983
********************************************************************************
2984
*/
2985
void hme_try_cluster_merge(
2986
    cluster_data_t *ps_cluster_data, U08 *pu1_num_clusters, S32 idx_of_updated_cluster)
2987
0
{
2988
0
    centroid_t *ps_centroid;
2989
2990
0
    S32 cur_pos_x_q8;
2991
0
    S32 cur_pos_y_q8;
2992
0
    S32 i;
2993
0
    S32 max_dist_from_centroid;
2994
0
    S32 mvd;
2995
0
    S32 mvdx_q8;
2996
0
    S32 mvdx;
2997
0
    S32 mvdy_q8;
2998
0
    S32 mvdy;
2999
0
    S32 num_clusters, num_clusters_evaluated;
3000
0
    S32 other_pos_x_q8;
3001
0
    S32 other_pos_y_q8;
3002
3003
0
    cluster_data_t *ps_root = ps_cluster_data;
3004
0
    cluster_data_t *ps_cur_cluster = &ps_cluster_data[idx_of_updated_cluster];
3005
0
    centroid_t *ps_cur_centroid = &ps_cur_cluster->s_centroid;
3006
3007
    /* Merge is superfluous if num_clusters is 1 */
3008
0
    if(*pu1_num_clusters == 1)
3009
0
    {
3010
0
        return;
3011
0
    }
3012
3013
0
    cur_pos_x_q8 = ps_cur_centroid->i4_pos_x_q8;
3014
0
    cur_pos_y_q8 = ps_cur_centroid->i4_pos_y_q8;
3015
3016
0
    max_dist_from_centroid = ps_cur_cluster->max_dist_from_centroid;
3017
3018
0
    num_clusters = *pu1_num_clusters;
3019
0
    num_clusters_evaluated = 0;
3020
3021
0
    for(i = 0; num_clusters_evaluated < num_clusters; i++, ps_cluster_data++)
3022
0
    {
3023
0
        if(!ps_cluster_data->is_valid_cluster)
3024
0
        {
3025
0
            continue;
3026
0
        }
3027
0
        if((ps_cluster_data->ref_id != ps_cur_cluster->ref_id) || (i == idx_of_updated_cluster))
3028
0
        {
3029
0
            num_clusters_evaluated++;
3030
0
            continue;
3031
0
        }
3032
3033
0
        ps_centroid = &ps_cluster_data->s_centroid;
3034
3035
0
        other_pos_x_q8 = ps_centroid->i4_pos_x_q8;
3036
0
        other_pos_y_q8 = ps_centroid->i4_pos_y_q8;
3037
3038
0
        mvdx_q8 = (cur_pos_x_q8 - other_pos_x_q8);
3039
0
        mvdy_q8 = (cur_pos_y_q8 - other_pos_y_q8);
3040
0
        mvdx = (mvdx_q8 + (1 << 7)) >> 8;
3041
0
        mvdy = (mvdy_q8 + (1 << 7)) >> 8;
3042
3043
0
        mvd = ABS(mvdx) + ABS(mvdy);
3044
3045
0
        if(mvd <= (max_dist_from_centroid >> 1))
3046
0
        {
3047
            /* 0 => no updates */
3048
            /* 1 => min updated */
3049
            /* 2 => max updated */
3050
0
            S32 minmax_x_update_id;
3051
0
            S32 minmax_y_update_id;
3052
3053
0
            LWORD64 i8_mv_x_sum_self = (LWORD64)cur_pos_x_q8 * ps_cur_cluster->num_mvs;
3054
0
            LWORD64 i8_mv_y_sum_self = (LWORD64)cur_pos_y_q8 * ps_cur_cluster->num_mvs;
3055
0
            LWORD64 i8_mv_x_sum_cousin = (LWORD64)other_pos_x_q8 * ps_cluster_data->num_mvs;
3056
0
            LWORD64 i8_mv_y_sum_cousin = (LWORD64)other_pos_y_q8 * ps_cluster_data->num_mvs;
3057
3058
0
            (*pu1_num_clusters)--;
3059
3060
0
            ps_cluster_data->is_valid_cluster = 0;
3061
3062
0
            memcpy(
3063
0
                &ps_cur_cluster->as_mv[ps_cur_cluster->num_mvs],
3064
0
                ps_cluster_data->as_mv,
3065
0
                sizeof(mv_data_t) * ps_cluster_data->num_mvs);
3066
3067
0
            ps_cur_cluster->num_mvs += ps_cluster_data->num_mvs;
3068
0
            ps_cur_cluster->area_in_pixels += ps_cluster_data->area_in_pixels;
3069
0
            ps_cur_cluster->bi_mv_pixel_area += ps_cluster_data->bi_mv_pixel_area;
3070
0
            ps_cur_cluster->uni_mv_pixel_area += ps_cluster_data->uni_mv_pixel_area;
3071
0
            i8_mv_x_sum_self += i8_mv_x_sum_cousin;
3072
0
            i8_mv_y_sum_self += i8_mv_y_sum_cousin;
3073
3074
0
            ps_cur_centroid->i4_pos_x_q8 = (WORD32)(i8_mv_x_sum_self / ps_cur_cluster->num_mvs);
3075
0
            ps_cur_centroid->i4_pos_y_q8 = (WORD32)(i8_mv_y_sum_self / ps_cur_cluster->num_mvs);
3076
3077
0
            minmax_x_update_id = (ps_cur_cluster->min_x < ps_cluster_data->min_x)
3078
0
                                     ? ((ps_cur_cluster->max_x > ps_cluster_data->max_x) ? 0 : 2)
3079
0
                                     : 1;
3080
0
            minmax_y_update_id = (ps_cur_cluster->min_y < ps_cluster_data->min_y)
3081
0
                                     ? ((ps_cur_cluster->max_y > ps_cluster_data->max_y) ? 0 : 2)
3082
0
                                     : 1;
3083
3084
            /* Updation of centroid spread */
3085
0
            switch(minmax_x_update_id + (minmax_y_update_id << 2))
3086
0
            {
3087
0
            case 1:
3088
0
            {
3089
0
                S32 mvd, mvd_q8;
3090
3091
0
                ps_cur_cluster->min_x = ps_cluster_data->min_x;
3092
3093
0
                mvd_q8 = ps_centroid->i4_pos_x_q8 - (ps_cur_cluster->min_x << 8);
3094
0
                mvd = (mvd_q8 + (1 << 7)) >> 8;
3095
3096
0
                if(mvd > (max_dist_from_centroid))
3097
0
                {
3098
0
                    ps_cluster_data->max_dist_from_centroid = mvd;
3099
0
                }
3100
0
                break;
3101
0
            }
3102
0
            case 2:
3103
0
            {
3104
0
                S32 mvd, mvd_q8;
3105
3106
0
                ps_cur_cluster->max_x = ps_cluster_data->max_x;
3107
3108
0
                mvd_q8 = (ps_cur_cluster->max_x << 8) - ps_centroid->i4_pos_x_q8;
3109
0
                mvd = (mvd_q8 + (1 << 7)) >> 8;
3110
3111
0
                if(mvd > (max_dist_from_centroid))
3112
0
                {
3113
0
                    ps_cluster_data->max_dist_from_centroid = mvd;
3114
0
                }
3115
0
                break;
3116
0
            }
3117
0
            case 4:
3118
0
            {
3119
0
                S32 mvd, mvd_q8;
3120
3121
0
                ps_cur_cluster->min_y = ps_cluster_data->min_y;
3122
3123
0
                mvd_q8 = ps_centroid->i4_pos_y_q8 - (ps_cur_cluster->min_y << 8);
3124
0
                mvd = (mvd_q8 + (1 << 7)) >> 8;
3125
3126
0
                if(mvd > (max_dist_from_centroid))
3127
0
                {
3128
0
                    ps_cluster_data->max_dist_from_centroid = mvd;
3129
0
                }
3130
0
                break;
3131
0
            }
3132
0
            case 5:
3133
0
            {
3134
0
                S32 mvd;
3135
0
                S32 mvdx, mvdx_q8;
3136
0
                S32 mvdy, mvdy_q8;
3137
3138
0
                mvdy_q8 = ps_centroid->i4_pos_y_q8 - (ps_cur_cluster->min_y << 8);
3139
0
                mvdy = (mvdy_q8 + (1 << 7)) >> 8;
3140
3141
0
                mvdx_q8 = ps_centroid->i4_pos_x_q8 - (ps_cur_cluster->min_x << 8);
3142
0
                mvdx = (mvdx_q8 + (1 << 7)) >> 8;
3143
3144
0
                mvd = (mvdx > mvdy) ? mvdx : mvdy;
3145
3146
0
                ps_cur_cluster->min_x = ps_cluster_data->min_x;
3147
0
                ps_cur_cluster->min_y = ps_cluster_data->min_y;
3148
3149
0
                if(mvd > max_dist_from_centroid)
3150
0
                {
3151
0
                    ps_cluster_data->max_dist_from_centroid = mvd;
3152
0
                }
3153
0
                break;
3154
0
            }
3155
0
            case 6:
3156
0
            {
3157
0
                S32 mvd;
3158
0
                S32 mvdx, mvdx_q8;
3159
0
                S32 mvdy, mvdy_q8;
3160
3161
0
                mvdy_q8 = ps_centroid->i4_pos_y_q8 - (ps_cur_cluster->min_y << 8);
3162
0
                mvdy = (mvdy_q8 + (1 << 7)) >> 8;
3163
3164
0
                mvdx_q8 = (ps_cur_cluster->max_x << 8) - ps_centroid->i4_pos_x_q8;
3165
0
                mvdx = (mvdx_q8 + (1 << 7)) >> 8;
3166
3167
0
                mvd = (mvdx > mvdy) ? mvdx : mvdy;
3168
3169
0
                ps_cur_cluster->max_x = ps_cluster_data->max_x;
3170
0
                ps_cur_cluster->min_y = ps_cluster_data->min_y;
3171
3172
0
                if(mvd > max_dist_from_centroid)
3173
0
                {
3174
0
                    ps_cluster_data->max_dist_from_centroid = mvd;
3175
0
                }
3176
0
                break;
3177
0
            }
3178
0
            case 8:
3179
0
            {
3180
0
                S32 mvd, mvd_q8;
3181
3182
0
                ps_cur_cluster->max_y = ps_cluster_data->max_y;
3183
3184
0
                mvd_q8 = (ps_cur_cluster->max_y << 8) - ps_centroid->i4_pos_y_q8;
3185
0
                mvd = (mvd_q8 + (1 << 7)) >> 8;
3186
3187
0
                if(mvd > (max_dist_from_centroid))
3188
0
                {
3189
0
                    ps_cluster_data->max_dist_from_centroid = mvd;
3190
0
                }
3191
0
                break;
3192
0
            }
3193
0
            case 9:
3194
0
            {
3195
0
                S32 mvd;
3196
0
                S32 mvdx, mvdx_q8;
3197
0
                S32 mvdy, mvdy_q8;
3198
3199
0
                mvdx_q8 = ps_centroid->i4_pos_x_q8 - (ps_cur_cluster->min_x << 8);
3200
0
                mvdx = (mvdx_q8 + (1 << 7)) >> 8;
3201
3202
0
                mvdy_q8 = (ps_cur_cluster->max_y << 8) - ps_centroid->i4_pos_y_q8;
3203
0
                mvdy = (mvdy_q8 + (1 << 7)) >> 8;
3204
3205
0
                mvd = (mvdx > mvdy) ? mvdx : mvdy;
3206
3207
0
                ps_cur_cluster->min_x = ps_cluster_data->min_x;
3208
0
                ps_cur_cluster->max_y = ps_cluster_data->max_y;
3209
3210
0
                if(mvd > max_dist_from_centroid)
3211
0
                {
3212
0
                    ps_cluster_data->max_dist_from_centroid = mvd;
3213
0
                }
3214
0
                break;
3215
0
            }
3216
0
            case 10:
3217
0
            {
3218
0
                S32 mvd;
3219
0
                S32 mvdx, mvdx_q8;
3220
0
                S32 mvdy, mvdy_q8;
3221
3222
0
                mvdx_q8 = (ps_cur_cluster->max_x << 8) - ps_centroid->i4_pos_x_q8;
3223
0
                mvdx = (mvdx_q8 + (1 << 7)) >> 8;
3224
3225
0
                mvdy_q8 = (ps_cur_cluster->max_y << 8) - ps_centroid->i4_pos_y_q8;
3226
0
                mvdy = (mvdy_q8 + (1 << 7)) >> 8;
3227
3228
0
                mvd = (mvdx > mvdy) ? mvdx : mvdy;
3229
3230
0
                ps_cur_cluster->max_x = ps_cluster_data->max_x;
3231
0
                ps_cur_cluster->max_y = ps_cluster_data->max_y;
3232
3233
0
                if(mvd > ps_cluster_data->max_dist_from_centroid)
3234
0
                {
3235
0
                    ps_cluster_data->max_dist_from_centroid = mvd;
3236
0
                }
3237
0
                break;
3238
0
            }
3239
0
            default:
3240
0
            {
3241
0
                break;
3242
0
            }
3243
0
            }
3244
3245
0
            hme_try_cluster_merge(ps_root, pu1_num_clusters, idx_of_updated_cluster);
3246
3247
0
            return;
3248
0
        }
3249
3250
0
        num_clusters_evaluated++;
3251
0
    }
3252
0
}
3253
3254
/**
3255
********************************************************************************
3256
*  @fn   void hme_find_and_update_clusters
3257
*               (
3258
*                   cluster_data_t *ps_cluster_data,
3259
*                   S32 *pi4_num_clusters,
3260
*                   S32 mvx,
3261
*                   S32 mvy,
3262
*                   S32 ref_idx,
3263
*                   PART_ID_T e_part_id
3264
*               )
3265
*
3266
*  @brief  Implementation fo the clustering algorithm
3267
*
3268
*  @param[in/out]  ps_cluster_data: pointer to cluster_data_t struct
3269
*
3270
*  @param[in/out]  pi4_num_clusters : pointer to number of clusters
3271
*
3272
*  @param[in]  mvx : x co-ordinate of the motion vector
3273
*
3274
*  @param[in]  mvy : y co-ordinate of the motion vector
3275
*
3276
*  @param[in]  ref_idx : ref_id of the motion vector
3277
*
3278
*  @param[in]  e_part_id : partition id of the motion vector
3279
*
3280
*  @return None
3281
********************************************************************************
3282
*/
3283
void hme_find_and_update_clusters(
3284
    cluster_data_t *ps_cluster_data,
3285
    U08 *pu1_num_clusters,
3286
    S16 i2_mv_x,
3287
    S16 i2_mv_y,
3288
    U08 i1_ref_idx,
3289
    S32 i4_sdi,
3290
    PART_ID_T e_part_id,
3291
    U08 is_part_of_bi)
3292
0
{
3293
0
    S32 i;
3294
0
    S32 min_mvd_cluster_id = -1;
3295
0
    S32 mvd, mvd_limit, mvdx, mvdy;
3296
0
    S32 min_mvdx, min_mvdy;
3297
3298
0
    S32 min_mvd = MAX_32BIT_VAL;
3299
0
    S32 num_clusters = *pu1_num_clusters;
3300
3301
0
    S32 mvx = i2_mv_x;
3302
0
    S32 mvy = i2_mv_y;
3303
0
    S32 ref_idx = i1_ref_idx;
3304
0
    S32 sdi = i4_sdi;
3305
0
    S32 new_cluster_idx = MAX_NUM_CLUSTERS_16x16;
3306
3307
0
    if(num_clusters == 0)
3308
0
    {
3309
0
        cluster_data_t *ps_data = &ps_cluster_data[num_clusters];
3310
3311
0
        ps_data->num_mvs = 1;
3312
0
        ps_data->s_centroid.i4_pos_x_q8 = mvx << 8;
3313
0
        ps_data->s_centroid.i4_pos_y_q8 = mvy << 8;
3314
0
        ps_data->ref_id = ref_idx;
3315
0
        ps_data->area_in_pixels = gai4_partition_area[e_part_id];
3316
0
        ps_data->as_mv[0].pixel_count = gai4_partition_area[e_part_id];
3317
0
        ps_data->as_mv[0].mvx = mvx;
3318
0
        ps_data->as_mv[0].mvy = mvy;
3319
3320
        /***************************/
3321
0
        ps_data->as_mv[0].is_uni = !is_part_of_bi;
3322
0
        ps_data->as_mv[0].sdi = sdi;
3323
0
        if(is_part_of_bi)
3324
0
        {
3325
0
            ps_data->bi_mv_pixel_area += ps_data->area_in_pixels;
3326
0
        }
3327
0
        else
3328
0
        {
3329
0
            ps_data->uni_mv_pixel_area += ps_data->area_in_pixels;
3330
0
        }
3331
        /**************************/
3332
0
        ps_data->max_x = mvx;
3333
0
        ps_data->min_x = mvx;
3334
0
        ps_data->max_y = mvy;
3335
0
        ps_data->min_y = mvy;
3336
3337
0
        ps_data->is_valid_cluster = 1;
3338
3339
0
        *pu1_num_clusters = 1;
3340
0
    }
3341
0
    else
3342
0
    {
3343
0
        S32 num_clusters_evaluated = 0;
3344
3345
0
        for(i = 0; num_clusters_evaluated < num_clusters; i++)
3346
0
        {
3347
0
            cluster_data_t *ps_data = &ps_cluster_data[i];
3348
3349
0
            centroid_t *ps_centroid;
3350
3351
0
            S32 mvx_q8;
3352
0
            S32 mvy_q8;
3353
0
            S32 posx_q8;
3354
0
            S32 posy_q8;
3355
0
            S32 mvdx_q8;
3356
0
            S32 mvdy_q8;
3357
3358
            /* In anticipation of a possible merging of clusters */
3359
0
            if(ps_data->is_valid_cluster == 0)
3360
0
            {
3361
0
                new_cluster_idx = i;
3362
0
                continue;
3363
0
            }
3364
3365
0
            if(ref_idx != ps_data->ref_id)
3366
0
            {
3367
0
                num_clusters_evaluated++;
3368
0
                continue;
3369
0
            }
3370
3371
0
            ps_centroid = &ps_data->s_centroid;
3372
0
            posx_q8 = ps_centroid->i4_pos_x_q8;
3373
0
            posy_q8 = ps_centroid->i4_pos_y_q8;
3374
3375
0
            mvx_q8 = mvx << 8;
3376
0
            mvy_q8 = mvy << 8;
3377
3378
0
            mvdx_q8 = posx_q8 - mvx_q8;
3379
0
            mvdy_q8 = posy_q8 - mvy_q8;
3380
3381
0
            mvdx = (((mvdx_q8 + (1 << 7)) >> 8));
3382
0
            mvdy = (((mvdy_q8 + (1 << 7)) >> 8));
3383
3384
0
            mvd = ABS(mvdx) + ABS(mvdy);
3385
3386
0
            if(mvd < min_mvd)
3387
0
            {
3388
0
                min_mvd = mvd;
3389
0
                min_mvdx = mvdx;
3390
0
                min_mvdy = mvdy;
3391
0
                min_mvd_cluster_id = i;
3392
0
            }
3393
3394
0
            num_clusters_evaluated++;
3395
0
        }
3396
3397
0
        mvd_limit = (min_mvd_cluster_id == -1)
3398
0
                        ? ps_cluster_data[0].max_dist_from_centroid
3399
0
                        : ps_cluster_data[min_mvd_cluster_id].max_dist_from_centroid;
3400
3401
        /* This condition implies that min_mvd has been updated */
3402
0
        if(min_mvd <= mvd_limit)
3403
0
        {
3404
0
            hme_update_cluster_attributes(
3405
0
                &ps_cluster_data[min_mvd_cluster_id],
3406
0
                mvx,
3407
0
                mvy,
3408
0
                min_mvdx,
3409
0
                min_mvdy,
3410
0
                ref_idx,
3411
0
                sdi,
3412
0
                is_part_of_bi,
3413
0
                e_part_id);
3414
3415
0
            if(PRT_NxN == ge_part_id_to_part_type[e_part_id])
3416
0
            {
3417
0
                hme_try_cluster_merge(ps_cluster_data, pu1_num_clusters, min_mvd_cluster_id);
3418
0
            }
3419
0
        }
3420
0
        else
3421
0
        {
3422
0
            cluster_data_t *ps_data = (new_cluster_idx == MAX_NUM_CLUSTERS_16x16)
3423
0
                                          ? &ps_cluster_data[num_clusters]
3424
0
                                          : &ps_cluster_data[new_cluster_idx];
3425
3426
0
            ps_data->num_mvs = 1;
3427
0
            ps_data->s_centroid.i4_pos_x_q8 = mvx << 8;
3428
0
            ps_data->s_centroid.i4_pos_y_q8 = mvy << 8;
3429
0
            ps_data->ref_id = ref_idx;
3430
0
            ps_data->area_in_pixels = gai4_partition_area[e_part_id];
3431
0
            ps_data->as_mv[0].pixel_count = gai4_partition_area[e_part_id];
3432
0
            ps_data->as_mv[0].mvx = mvx;
3433
0
            ps_data->as_mv[0].mvy = mvy;
3434
3435
            /***************************/
3436
0
            ps_data->as_mv[0].is_uni = !is_part_of_bi;
3437
0
            ps_data->as_mv[0].sdi = sdi;
3438
0
            if(is_part_of_bi)
3439
0
            {
3440
0
                ps_data->bi_mv_pixel_area += ps_data->area_in_pixels;
3441
0
            }
3442
0
            else
3443
0
            {
3444
0
                ps_data->uni_mv_pixel_area += ps_data->area_in_pixels;
3445
0
            }
3446
            /**************************/
3447
0
            ps_data->max_x = mvx;
3448
0
            ps_data->min_x = mvx;
3449
0
            ps_data->max_y = mvy;
3450
0
            ps_data->min_y = mvy;
3451
3452
0
            ps_data->is_valid_cluster = 1;
3453
3454
0
            num_clusters++;
3455
0
            *pu1_num_clusters = num_clusters;
3456
0
        }
3457
0
    }
3458
0
}
3459
3460
/**
3461
********************************************************************************
3462
*  @fn   void hme_update_32x32_cluster_attributes
3463
*               (
3464
*                   cluster_32x32_blk_t *ps_blk_32x32,
3465
*                   cluster_data_t *ps_cluster_data
3466
*               )
3467
*
3468
*  @brief  Updates attributes for 32x32 clusters based on the attributes of
3469
*          the constituent 16x16 clusters
3470
*
3471
*  @param[out]  ps_blk_32x32: structure containing 32x32 block results
3472
*
3473
*  @param[in]  ps_cluster_data : structure containing 16x16 block results
3474
*
3475
*  @return None
3476
********************************************************************************
3477
*/
3478
void hme_update_32x32_cluster_attributes(
3479
    cluster_32x32_blk_t *ps_blk_32x32, cluster_data_t *ps_cluster_data)
3480
0
{
3481
0
    cluster_data_t *ps_cur_cluster_32;
3482
3483
0
    S32 i;
3484
0
    S32 mvd_limit;
3485
3486
0
    S32 num_clusters = ps_blk_32x32->num_clusters;
3487
3488
0
    if(0 == num_clusters)
3489
0
    {
3490
0
        ps_cur_cluster_32 = &ps_blk_32x32->as_cluster_data[0];
3491
3492
0
        ps_blk_32x32->num_clusters++;
3493
0
        ps_blk_32x32->au1_num_clusters[ps_cluster_data->ref_id]++;
3494
3495
0
        ps_cur_cluster_32->is_valid_cluster = 1;
3496
3497
0
        ps_cur_cluster_32->area_in_pixels = ps_cluster_data->area_in_pixels;
3498
0
        ps_cur_cluster_32->bi_mv_pixel_area += ps_cluster_data->bi_mv_pixel_area;
3499
0
        ps_cur_cluster_32->uni_mv_pixel_area += ps_cluster_data->uni_mv_pixel_area;
3500
3501
0
        memcpy(
3502
0
            ps_cur_cluster_32->as_mv,
3503
0
            ps_cluster_data->as_mv,
3504
0
            sizeof(mv_data_t) * ps_cluster_data->num_mvs);
3505
3506
0
        ps_cur_cluster_32->num_mvs = ps_cluster_data->num_mvs;
3507
3508
0
        ps_cur_cluster_32->ref_id = ps_cluster_data->ref_id;
3509
3510
0
        ps_cur_cluster_32->max_x = ps_cluster_data->max_x;
3511
0
        ps_cur_cluster_32->max_y = ps_cluster_data->max_y;
3512
0
        ps_cur_cluster_32->min_x = ps_cluster_data->min_x;
3513
0
        ps_cur_cluster_32->min_y = ps_cluster_data->min_y;
3514
3515
0
        ps_cur_cluster_32->s_centroid = ps_cluster_data->s_centroid;
3516
0
    }
3517
0
    else
3518
0
    {
3519
0
        centroid_t *ps_centroid;
3520
3521
0
        S32 cur_posx_q8, cur_posy_q8;
3522
0
        S32 min_mvd_cluster_id = -1;
3523
0
        S32 mvd;
3524
0
        S32 mvdx;
3525
0
        S32 mvdy;
3526
0
        S32 mvdx_min;
3527
0
        S32 mvdy_min;
3528
0
        S32 mvdx_q8;
3529
0
        S32 mvdy_q8;
3530
3531
0
        S32 num_clusters_evaluated = 0;
3532
3533
0
        S32 mvd_min = MAX_32BIT_VAL;
3534
3535
0
        S32 mvx_inp_q8 = ps_cluster_data->s_centroid.i4_pos_x_q8;
3536
0
        S32 mvy_inp_q8 = ps_cluster_data->s_centroid.i4_pos_y_q8;
3537
3538
0
        for(i = 0; num_clusters_evaluated < num_clusters; i++)
3539
0
        {
3540
0
            ps_cur_cluster_32 = &ps_blk_32x32->as_cluster_data[i];
3541
3542
0
            if(ps_cur_cluster_32->ref_id != ps_cluster_data->ref_id)
3543
0
            {
3544
0
                num_clusters_evaluated++;
3545
0
                continue;
3546
0
            }
3547
0
            if(!ps_cluster_data->is_valid_cluster)
3548
0
            {
3549
0
                continue;
3550
0
            }
3551
3552
0
            num_clusters_evaluated++;
3553
3554
0
            ps_centroid = &ps_cur_cluster_32->s_centroid;
3555
3556
0
            cur_posx_q8 = ps_centroid->i4_pos_x_q8;
3557
0
            cur_posy_q8 = ps_centroid->i4_pos_y_q8;
3558
3559
0
            mvdx_q8 = cur_posx_q8 - mvx_inp_q8;
3560
0
            mvdy_q8 = cur_posy_q8 - mvy_inp_q8;
3561
3562
0
            mvdx = (mvdx_q8 + (1 << 7)) >> 8;
3563
0
            mvdy = (mvdy_q8 + (1 << 7)) >> 8;
3564
3565
0
            mvd = ABS(mvdx) + ABS(mvdy);
3566
3567
0
            if(mvd < mvd_min)
3568
0
            {
3569
0
                mvd_min = mvd;
3570
0
                mvdx_min = mvdx;
3571
0
                mvdy_min = mvdy;
3572
0
                min_mvd_cluster_id = i;
3573
0
            }
3574
0
        }
3575
3576
0
        ps_cur_cluster_32 = &ps_blk_32x32->as_cluster_data[0];
3577
3578
0
        mvd_limit = (min_mvd_cluster_id == -1)
3579
0
                        ? ps_cur_cluster_32[0].max_dist_from_centroid
3580
0
                        : ps_cur_cluster_32[min_mvd_cluster_id].max_dist_from_centroid;
3581
3582
0
        if(mvd_min <= mvd_limit)
3583
0
        {
3584
0
            LWORD64 i8_updated_posx;
3585
0
            LWORD64 i8_updated_posy;
3586
0
            WORD32 minmax_updated_x = 0;
3587
0
            WORD32 minmax_updated_y = 0;
3588
3589
0
            ps_cur_cluster_32 = &ps_blk_32x32->as_cluster_data[min_mvd_cluster_id];
3590
3591
0
            ps_centroid = &ps_cur_cluster_32->s_centroid;
3592
3593
0
            ps_cur_cluster_32->is_valid_cluster = 1;
3594
3595
0
            ps_cur_cluster_32->area_in_pixels += ps_cluster_data->area_in_pixels;
3596
0
            ps_cur_cluster_32->bi_mv_pixel_area += ps_cluster_data->bi_mv_pixel_area;
3597
0
            ps_cur_cluster_32->uni_mv_pixel_area += ps_cluster_data->uni_mv_pixel_area;
3598
3599
0
            memcpy(
3600
0
                &ps_cur_cluster_32->as_mv[ps_cur_cluster_32->num_mvs],
3601
0
                ps_cluster_data->as_mv,
3602
0
                sizeof(mv_data_t) * ps_cluster_data->num_mvs);
3603
3604
0
            if((mvdx_min > 0) && ((ps_cur_cluster_32->min_x << 8) > mvx_inp_q8))
3605
0
            {
3606
0
                ps_cur_cluster_32->min_x = (mvx_inp_q8 + ((1 << 7))) >> 8;
3607
0
                minmax_updated_x = 1;
3608
0
            }
3609
0
            else if((mvdx_min < 0) && ((ps_cur_cluster_32->max_x << 8) < mvx_inp_q8))
3610
0
            {
3611
0
                ps_cur_cluster_32->max_x = (mvx_inp_q8 + (1 << 7)) >> 8;
3612
0
                minmax_updated_x = 2;
3613
0
            }
3614
3615
0
            if((mvdy_min > 0) && ((ps_cur_cluster_32->min_y << 8) > mvy_inp_q8))
3616
0
            {
3617
0
                ps_cur_cluster_32->min_y = (mvy_inp_q8 + (1 << 7)) >> 8;
3618
0
                minmax_updated_y = 1;
3619
0
            }
3620
0
            else if((mvdy_min < 0) && ((ps_cur_cluster_32->max_y << 8) < mvy_inp_q8))
3621
0
            {
3622
0
                ps_cur_cluster_32->max_y = (mvy_inp_q8 + (1 << 7)) >> 8;
3623
0
                minmax_updated_y = 2;
3624
0
            }
3625
3626
0
            switch((minmax_updated_y << 2) + minmax_updated_x)
3627
0
            {
3628
0
            case 1:
3629
0
            {
3630
0
                S32 mvd, mvd_q8;
3631
3632
0
                mvd_q8 = ps_centroid->i4_pos_x_q8 - (ps_cur_cluster_32->min_x << 8);
3633
0
                mvd = (mvd_q8 + (1 << 7)) >> 8;
3634
3635
0
                if(mvd > (mvd_limit))
3636
0
                {
3637
0
                    ps_cur_cluster_32->max_dist_from_centroid = mvd;
3638
0
                }
3639
0
                break;
3640
0
            }
3641
0
            case 2:
3642
0
            {
3643
0
                S32 mvd, mvd_q8;
3644
3645
0
                mvd_q8 = (ps_cur_cluster_32->max_x << 8) - ps_centroid->i4_pos_x_q8;
3646
0
                mvd = (mvd_q8 + (1 << 7)) >> 8;
3647
3648
0
                if(mvd > (mvd_limit))
3649
0
                {
3650
0
                    ps_cur_cluster_32->max_dist_from_centroid = mvd;
3651
0
                }
3652
0
                break;
3653
0
            }
3654
0
            case 4:
3655
0
            {
3656
0
                S32 mvd, mvd_q8;
3657
3658
0
                mvd_q8 = ps_centroid->i4_pos_y_q8 - (ps_cur_cluster_32->min_y << 8);
3659
0
                mvd = (mvd_q8 + (1 << 7)) >> 8;
3660
3661
0
                if(mvd > (mvd_limit))
3662
0
                {
3663
0
                    ps_cur_cluster_32->max_dist_from_centroid = mvd;
3664
0
                }
3665
0
                break;
3666
0
            }
3667
0
            case 5:
3668
0
            {
3669
0
                S32 mvd;
3670
0
                S32 mvdx, mvdx_q8;
3671
0
                S32 mvdy, mvdy_q8;
3672
3673
0
                mvdy_q8 = ps_centroid->i4_pos_y_q8 - (ps_cur_cluster_32->min_y << 8);
3674
0
                mvdy = (mvdy_q8 + (1 << 7)) >> 8;
3675
3676
0
                mvdx_q8 = ps_centroid->i4_pos_x_q8 - (ps_cur_cluster_32->min_x << 8);
3677
0
                mvdx = (mvdx_q8 + (1 << 7)) >> 8;
3678
3679
0
                mvd = (mvdx > mvdy) ? mvdx : mvdy;
3680
3681
0
                if(mvd > mvd_limit)
3682
0
                {
3683
0
                    ps_cur_cluster_32->max_dist_from_centroid = mvd;
3684
0
                }
3685
0
                break;
3686
0
            }
3687
0
            case 6:
3688
0
            {
3689
0
                S32 mvd;
3690
0
                S32 mvdx, mvdx_q8;
3691
0
                S32 mvdy, mvdy_q8;
3692
3693
0
                mvdy_q8 = ps_centroid->i4_pos_y_q8 - (ps_cur_cluster_32->min_y << 8);
3694
0
                mvdy = (mvdy_q8 + (1 << 7)) >> 8;
3695
3696
0
                mvdx_q8 = (ps_cur_cluster_32->max_x << 8) - ps_centroid->i4_pos_x_q8;
3697
0
                mvdx = (mvdx_q8 + (1 << 7)) >> 8;
3698
3699
0
                mvd = (mvdx > mvdy) ? mvdx : mvdy;
3700
3701
0
                if(mvd > mvd_limit)
3702
0
                {
3703
0
                    ps_cur_cluster_32->max_dist_from_centroid = mvd;
3704
0
                }
3705
0
                break;
3706
0
            }
3707
0
            case 8:
3708
0
            {
3709
0
                S32 mvd, mvd_q8;
3710
3711
0
                mvd_q8 = (ps_cur_cluster_32->max_y << 8) - ps_centroid->i4_pos_y_q8;
3712
0
                mvd = (mvd_q8 + (1 << 7)) >> 8;
3713
3714
0
                if(mvd > (mvd_limit))
3715
0
                {
3716
0
                    ps_cur_cluster_32->max_dist_from_centroid = mvd;
3717
0
                }
3718
0
                break;
3719
0
            }
3720
0
            case 9:
3721
0
            {
3722
0
                S32 mvd;
3723
0
                S32 mvdx, mvdx_q8;
3724
0
                S32 mvdy, mvdy_q8;
3725
3726
0
                mvdx_q8 = ps_centroid->i4_pos_x_q8 - (ps_cur_cluster_32->min_x << 8);
3727
0
                mvdx = (mvdx_q8 + (1 << 7)) >> 8;
3728
3729
0
                mvdy_q8 = (ps_cur_cluster_32->max_y << 8) - ps_centroid->i4_pos_y_q8;
3730
0
                mvdy = (mvdy_q8 + (1 << 7)) >> 8;
3731
3732
0
                mvd = (mvdx > mvdy) ? mvdx : mvdy;
3733
3734
0
                if(mvd > mvd_limit)
3735
0
                {
3736
0
                    ps_cur_cluster_32->max_dist_from_centroid = mvd;
3737
0
                }
3738
0
                break;
3739
0
            }
3740
0
            case 10:
3741
0
            {
3742
0
                S32 mvd;
3743
0
                S32 mvdx, mvdx_q8;
3744
0
                S32 mvdy, mvdy_q8;
3745
3746
0
                mvdx_q8 = (ps_cur_cluster_32->max_x << 8) - ps_centroid->i4_pos_x_q8;
3747
0
                mvdx = (mvdx_q8 + (1 << 7)) >> 8;
3748
3749
0
                mvdy_q8 = (ps_cur_cluster_32->max_y << 8) - ps_centroid->i4_pos_y_q8;
3750
0
                mvdy = (mvdy_q8 + (1 << 7)) >> 8;
3751
3752
0
                mvd = (mvdx > mvdy) ? mvdx : mvdy;
3753
3754
0
                if(mvd > ps_cur_cluster_32->max_dist_from_centroid)
3755
0
                {
3756
0
                    ps_cur_cluster_32->max_dist_from_centroid = mvd;
3757
0
                }
3758
0
                break;
3759
0
            }
3760
0
            default:
3761
0
            {
3762
0
                break;
3763
0
            }
3764
0
            }
3765
3766
0
            i8_updated_posx = ((LWORD64)ps_centroid->i4_pos_x_q8 * ps_cur_cluster_32->num_mvs) +
3767
0
                              ((LWORD64)mvx_inp_q8 * ps_cluster_data->num_mvs);
3768
0
            i8_updated_posy = ((LWORD64)ps_centroid->i4_pos_y_q8 * ps_cur_cluster_32->num_mvs) +
3769
0
                              ((LWORD64)mvy_inp_q8 * ps_cluster_data->num_mvs);
3770
3771
0
            ps_cur_cluster_32->num_mvs += ps_cluster_data->num_mvs;
3772
3773
0
            ps_centroid->i4_pos_x_q8 = (WORD32)(i8_updated_posx / ps_cur_cluster_32->num_mvs);
3774
0
            ps_centroid->i4_pos_y_q8 = (WORD32)(i8_updated_posy / ps_cur_cluster_32->num_mvs);
3775
0
        }
3776
0
        else if(num_clusters < MAX_NUM_CLUSTERS_32x32)
3777
0
        {
3778
0
            ps_cur_cluster_32 = &ps_blk_32x32->as_cluster_data[num_clusters];
3779
3780
0
            ps_blk_32x32->num_clusters++;
3781
0
            ps_blk_32x32->au1_num_clusters[ps_cluster_data->ref_id]++;
3782
3783
0
            ps_cur_cluster_32->is_valid_cluster = 1;
3784
3785
0
            ps_cur_cluster_32->area_in_pixels = ps_cluster_data->area_in_pixels;
3786
0
            ps_cur_cluster_32->bi_mv_pixel_area += ps_cluster_data->bi_mv_pixel_area;
3787
0
            ps_cur_cluster_32->uni_mv_pixel_area += ps_cluster_data->uni_mv_pixel_area;
3788
3789
0
            memcpy(
3790
0
                ps_cur_cluster_32->as_mv,
3791
0
                ps_cluster_data->as_mv,
3792
0
                sizeof(mv_data_t) * ps_cluster_data->num_mvs);
3793
3794
0
            ps_cur_cluster_32->num_mvs = ps_cluster_data->num_mvs;
3795
3796
0
            ps_cur_cluster_32->ref_id = ps_cluster_data->ref_id;
3797
3798
0
            ps_cur_cluster_32->max_x = ps_cluster_data->max_x;
3799
0
            ps_cur_cluster_32->max_y = ps_cluster_data->max_y;
3800
0
            ps_cur_cluster_32->min_x = ps_cluster_data->min_x;
3801
0
            ps_cur_cluster_32->min_y = ps_cluster_data->min_y;
3802
3803
0
            ps_cur_cluster_32->s_centroid = ps_cluster_data->s_centroid;
3804
0
        }
3805
0
    }
3806
0
}
3807
3808
/**
3809
********************************************************************************
3810
*  @fn   void hme_update_64x64_cluster_attributes
3811
*               (
3812
*                   cluster_64x64_blk_t *ps_blk_32x32,
3813
*                   cluster_data_t *ps_cluster_data
3814
*               )
3815
*
3816
*  @brief  Updates attributes for 64x64 clusters based on the attributes of
3817
*          the constituent 16x16 clusters
3818
*
3819
*  @param[out]  ps_blk_64x64: structure containing 64x64 block results
3820
*
3821
*  @param[in]  ps_cluster_data : structure containing 32x32 block results
3822
*
3823
*  @return None
3824
********************************************************************************
3825
*/
3826
void hme_update_64x64_cluster_attributes(
3827
    cluster_64x64_blk_t *ps_blk_64x64, cluster_data_t *ps_cluster_data)
3828
0
{
3829
0
    cluster_data_t *ps_cur_cluster_64;
3830
3831
0
    S32 i;
3832
0
    S32 mvd_limit;
3833
3834
0
    S32 num_clusters = ps_blk_64x64->num_clusters;
3835
3836
0
    if(0 == num_clusters)
3837
0
    {
3838
0
        ps_cur_cluster_64 = &ps_blk_64x64->as_cluster_data[0];
3839
3840
0
        ps_blk_64x64->num_clusters++;
3841
0
        ps_blk_64x64->au1_num_clusters[ps_cluster_data->ref_id]++;
3842
3843
0
        ps_cur_cluster_64->is_valid_cluster = 1;
3844
3845
0
        ps_cur_cluster_64->area_in_pixels = ps_cluster_data->area_in_pixels;
3846
0
        ps_cur_cluster_64->bi_mv_pixel_area += ps_cluster_data->bi_mv_pixel_area;
3847
0
        ps_cur_cluster_64->uni_mv_pixel_area += ps_cluster_data->uni_mv_pixel_area;
3848
3849
0
        memcpy(
3850
0
            ps_cur_cluster_64->as_mv,
3851
0
            ps_cluster_data->as_mv,
3852
0
            sizeof(mv_data_t) * ps_cluster_data->num_mvs);
3853
3854
0
        ps_cur_cluster_64->num_mvs = ps_cluster_data->num_mvs;
3855
3856
0
        ps_cur_cluster_64->ref_id = ps_cluster_data->ref_id;
3857
3858
0
        ps_cur_cluster_64->max_x = ps_cluster_data->max_x;
3859
0
        ps_cur_cluster_64->max_y = ps_cluster_data->max_y;
3860
0
        ps_cur_cluster_64->min_x = ps_cluster_data->min_x;
3861
0
        ps_cur_cluster_64->min_y = ps_cluster_data->min_y;
3862
3863
0
        ps_cur_cluster_64->s_centroid = ps_cluster_data->s_centroid;
3864
0
    }
3865
0
    else
3866
0
    {
3867
0
        centroid_t *ps_centroid;
3868
3869
0
        S32 cur_posx_q8, cur_posy_q8;
3870
0
        S32 min_mvd_cluster_id = -1;
3871
0
        S32 mvd;
3872
0
        S32 mvdx;
3873
0
        S32 mvdy;
3874
0
        S32 mvdx_min;
3875
0
        S32 mvdy_min;
3876
0
        S32 mvdx_q8;
3877
0
        S32 mvdy_q8;
3878
3879
0
        S32 num_clusters_evaluated = 0;
3880
3881
0
        S32 mvd_min = MAX_32BIT_VAL;
3882
3883
0
        S32 mvx_inp_q8 = ps_cluster_data->s_centroid.i4_pos_x_q8;
3884
0
        S32 mvy_inp_q8 = ps_cluster_data->s_centroid.i4_pos_y_q8;
3885
3886
0
        for(i = 0; num_clusters_evaluated < num_clusters; i++)
3887
0
        {
3888
0
            ps_cur_cluster_64 = &ps_blk_64x64->as_cluster_data[i];
3889
3890
0
            if(ps_cur_cluster_64->ref_id != ps_cluster_data->ref_id)
3891
0
            {
3892
0
                num_clusters_evaluated++;
3893
0
                continue;
3894
0
            }
3895
3896
0
            if(!ps_cur_cluster_64->is_valid_cluster)
3897
0
            {
3898
0
                continue;
3899
0
            }
3900
3901
0
            num_clusters_evaluated++;
3902
3903
0
            ps_centroid = &ps_cur_cluster_64->s_centroid;
3904
3905
0
            cur_posx_q8 = ps_centroid->i4_pos_x_q8;
3906
0
            cur_posy_q8 = ps_centroid->i4_pos_y_q8;
3907
3908
0
            mvdx_q8 = cur_posx_q8 - mvx_inp_q8;
3909
0
            mvdy_q8 = cur_posy_q8 - mvy_inp_q8;
3910
3911
0
            mvdx = (mvdx_q8 + (1 << 7)) >> 8;
3912
0
            mvdy = (mvdy_q8 + (1 << 7)) >> 8;
3913
3914
0
            mvd = ABS(mvdx) + ABS(mvdy);
3915
3916
0
            if(mvd < mvd_min)
3917
0
            {
3918
0
                mvd_min = mvd;
3919
0
                mvdx_min = mvdx;
3920
0
                mvdy_min = mvdy;
3921
0
                min_mvd_cluster_id = i;
3922
0
            }
3923
0
        }
3924
3925
0
        ps_cur_cluster_64 = ps_blk_64x64->as_cluster_data;
3926
3927
0
        mvd_limit = (min_mvd_cluster_id == -1)
3928
0
                        ? ps_cur_cluster_64[0].max_dist_from_centroid
3929
0
                        : ps_cur_cluster_64[min_mvd_cluster_id].max_dist_from_centroid;
3930
3931
0
        if(mvd_min <= mvd_limit)
3932
0
        {
3933
0
            LWORD64 i8_updated_posx;
3934
0
            LWORD64 i8_updated_posy;
3935
0
            WORD32 minmax_updated_x = 0;
3936
0
            WORD32 minmax_updated_y = 0;
3937
3938
0
            ps_cur_cluster_64 = &ps_blk_64x64->as_cluster_data[min_mvd_cluster_id];
3939
3940
0
            ps_centroid = &ps_cur_cluster_64->s_centroid;
3941
3942
0
            ps_cur_cluster_64->is_valid_cluster = 1;
3943
3944
0
            ps_cur_cluster_64->area_in_pixels += ps_cluster_data->area_in_pixels;
3945
0
            ps_cur_cluster_64->bi_mv_pixel_area += ps_cluster_data->bi_mv_pixel_area;
3946
0
            ps_cur_cluster_64->uni_mv_pixel_area += ps_cluster_data->uni_mv_pixel_area;
3947
3948
0
            memcpy(
3949
0
                &ps_cur_cluster_64->as_mv[ps_cur_cluster_64->num_mvs],
3950
0
                ps_cluster_data->as_mv,
3951
0
                sizeof(mv_data_t) * ps_cluster_data->num_mvs);
3952
3953
0
            if((mvdx_min > 0) && ((ps_cur_cluster_64->min_x << 8) > mvx_inp_q8))
3954
0
            {
3955
0
                ps_cur_cluster_64->min_x = (mvx_inp_q8 + (1 << 7)) >> 8;
3956
0
                minmax_updated_x = 1;
3957
0
            }
3958
0
            else if((mvdx_min < 0) && ((ps_cur_cluster_64->max_x << 8) < mvx_inp_q8))
3959
0
            {
3960
0
                ps_cur_cluster_64->max_x = (mvx_inp_q8 + (1 << 7)) >> 8;
3961
0
                minmax_updated_x = 2;
3962
0
            }
3963
3964
0
            if((mvdy_min > 0) && ((ps_cur_cluster_64->min_y << 8) > mvy_inp_q8))
3965
0
            {
3966
0
                ps_cur_cluster_64->min_y = (mvy_inp_q8 + (1 << 7)) >> 8;
3967
0
                minmax_updated_y = 1;
3968
0
            }
3969
0
            else if((mvdy_min < 0) && ((ps_cur_cluster_64->max_y << 8) < mvy_inp_q8))
3970
0
            {
3971
0
                ps_cur_cluster_64->max_y = (mvy_inp_q8 + (1 << 7)) >> 8;
3972
0
                minmax_updated_y = 2;
3973
0
            }
3974
3975
0
            switch((minmax_updated_y << 2) + minmax_updated_x)
3976
0
            {
3977
0
            case 1:
3978
0
            {
3979
0
                S32 mvd, mvd_q8;
3980
3981
0
                mvd_q8 = ps_centroid->i4_pos_x_q8 - (ps_cur_cluster_64->min_x << 8);
3982
0
                mvd = (mvd_q8 + (1 << 7)) >> 8;
3983
3984
0
                if(mvd > (mvd_limit))
3985
0
                {
3986
0
                    ps_cur_cluster_64->max_dist_from_centroid = mvd;
3987
0
                }
3988
0
                break;
3989
0
            }
3990
0
            case 2:
3991
0
            {
3992
0
                S32 mvd, mvd_q8;
3993
3994
0
                mvd_q8 = (ps_cur_cluster_64->max_x << 8) - ps_centroid->i4_pos_x_q8;
3995
0
                mvd = (mvd_q8 + (1 << 7)) >> 8;
3996
3997
0
                if(mvd > (mvd_limit))
3998
0
                {
3999
0
                    ps_cur_cluster_64->max_dist_from_centroid = mvd;
4000
0
                }
4001
0
                break;
4002
0
            }
4003
0
            case 4:
4004
0
            {
4005
0
                S32 mvd, mvd_q8;
4006
4007
0
                mvd_q8 = ps_centroid->i4_pos_y_q8 - (ps_cur_cluster_64->min_y << 8);
4008
0
                mvd = (mvd_q8 + (1 << 7)) >> 8;
4009
4010
0
                if(mvd > (mvd_limit))
4011
0
                {
4012
0
                    ps_cur_cluster_64->max_dist_from_centroid = mvd;
4013
0
                }
4014
0
                break;
4015
0
            }
4016
0
            case 5:
4017
0
            {
4018
0
                S32 mvd;
4019
0
                S32 mvdx, mvdx_q8;
4020
0
                S32 mvdy, mvdy_q8;
4021
4022
0
                mvdy_q8 = ps_centroid->i4_pos_y_q8 - (ps_cur_cluster_64->min_y << 8);
4023
0
                mvdy = (mvdy_q8 + (1 << 7)) >> 8;
4024
4025
0
                mvdx_q8 = ps_centroid->i4_pos_x_q8 - (ps_cur_cluster_64->min_x << 8);
4026
0
                mvdx = (mvdx_q8 + (1 << 7)) >> 8;
4027
4028
0
                mvd = (mvdx > mvdy) ? mvdx : mvdy;
4029
4030
0
                if(mvd > mvd_limit)
4031
0
                {
4032
0
                    ps_cur_cluster_64->max_dist_from_centroid = mvd;
4033
0
                }
4034
0
                break;
4035
0
            }
4036
0
            case 6:
4037
0
            {
4038
0
                S32 mvd;
4039
0
                S32 mvdx, mvdx_q8;
4040
0
                S32 mvdy, mvdy_q8;
4041
4042
0
                mvdy_q8 = ps_centroid->i4_pos_y_q8 - (ps_cur_cluster_64->min_y << 8);
4043
0
                mvdy = (mvdy_q8 + (1 << 7)) >> 8;
4044
4045
0
                mvdx_q8 = (ps_cur_cluster_64->max_x << 8) - ps_centroid->i4_pos_x_q8;
4046
0
                mvdx = (mvdx_q8 + (1 << 7)) >> 8;
4047
4048
0
                mvd = (mvdx > mvdy) ? mvdx : mvdy;
4049
4050
0
                if(mvd > mvd_limit)
4051
0
                {
4052
0
                    ps_cur_cluster_64->max_dist_from_centroid = mvd;
4053
0
                }
4054
0
                break;
4055
0
            }
4056
0
            case 8:
4057
0
            {
4058
0
                S32 mvd, mvd_q8;
4059
4060
0
                mvd_q8 = (ps_cur_cluster_64->max_y << 8) - ps_centroid->i4_pos_y_q8;
4061
0
                mvd = (mvd_q8 + (1 << 7)) >> 8;
4062
4063
0
                if(mvd > (mvd_limit))
4064
0
                {
4065
0
                    ps_cur_cluster_64->max_dist_from_centroid = mvd;
4066
0
                }
4067
0
                break;
4068
0
            }
4069
0
            case 9:
4070
0
            {
4071
0
                S32 mvd;
4072
0
                S32 mvdx, mvdx_q8;
4073
0
                S32 mvdy, mvdy_q8;
4074
4075
0
                mvdx_q8 = ps_centroid->i4_pos_x_q8 - (ps_cur_cluster_64->min_x << 8);
4076
0
                mvdx = (mvdx_q8 + (1 << 7)) >> 8;
4077
4078
0
                mvdy_q8 = (ps_cur_cluster_64->max_y << 8) - ps_centroid->i4_pos_y_q8;
4079
0
                mvdy = (mvdy_q8 + (1 << 7)) >> 8;
4080
4081
0
                mvd = (mvdx > mvdy) ? mvdx : mvdy;
4082
4083
0
                if(mvd > mvd_limit)
4084
0
                {
4085
0
                    ps_cur_cluster_64->max_dist_from_centroid = mvd;
4086
0
                }
4087
0
                break;
4088
0
            }
4089
0
            case 10:
4090
0
            {
4091
0
                S32 mvd;
4092
0
                S32 mvdx, mvdx_q8;
4093
0
                S32 mvdy, mvdy_q8;
4094
4095
0
                mvdx_q8 = (ps_cur_cluster_64->max_x << 8) - ps_centroid->i4_pos_x_q8;
4096
0
                mvdx = (mvdx_q8 + (1 << 7)) >> 8;
4097
4098
0
                mvdy_q8 = (ps_cur_cluster_64->max_y << 8) - ps_centroid->i4_pos_y_q8;
4099
0
                mvdy = (mvdy_q8 + (1 << 7)) >> 8;
4100
4101
0
                mvd = (mvdx > mvdy) ? mvdx : mvdy;
4102
4103
0
                if(mvd > ps_cur_cluster_64->max_dist_from_centroid)
4104
0
                {
4105
0
                    ps_cur_cluster_64->max_dist_from_centroid = mvd;
4106
0
                }
4107
0
                break;
4108
0
            }
4109
0
            default:
4110
0
            {
4111
0
                break;
4112
0
            }
4113
0
            }
4114
4115
0
            i8_updated_posx = ((LWORD64)ps_centroid->i4_pos_x_q8 * ps_cur_cluster_64->num_mvs) +
4116
0
                              ((LWORD64)mvx_inp_q8 * ps_cluster_data->num_mvs);
4117
0
            i8_updated_posy = ((LWORD64)ps_centroid->i4_pos_y_q8 * ps_cur_cluster_64->num_mvs) +
4118
0
                              ((LWORD64)mvy_inp_q8 * ps_cluster_data->num_mvs);
4119
4120
0
            ps_cur_cluster_64->num_mvs += ps_cluster_data->num_mvs;
4121
4122
0
            ps_centroid->i4_pos_x_q8 = (WORD32)(i8_updated_posx / ps_cur_cluster_64->num_mvs);
4123
0
            ps_centroid->i4_pos_y_q8 = (WORD32)(i8_updated_posy / ps_cur_cluster_64->num_mvs);
4124
0
        }
4125
0
        else if(num_clusters < MAX_NUM_CLUSTERS_64x64)
4126
0
        {
4127
0
            ps_cur_cluster_64 = &ps_blk_64x64->as_cluster_data[num_clusters];
4128
4129
0
            ps_blk_64x64->num_clusters++;
4130
0
            ps_blk_64x64->au1_num_clusters[ps_cluster_data->ref_id]++;
4131
4132
0
            ps_cur_cluster_64->is_valid_cluster = 1;
4133
4134
0
            ps_cur_cluster_64->area_in_pixels = ps_cluster_data->area_in_pixels;
4135
0
            ps_cur_cluster_64->bi_mv_pixel_area += ps_cluster_data->bi_mv_pixel_area;
4136
0
            ps_cur_cluster_64->uni_mv_pixel_area += ps_cluster_data->uni_mv_pixel_area;
4137
4138
0
            memcpy(
4139
0
                &ps_cur_cluster_64->as_mv[0],
4140
0
                ps_cluster_data->as_mv,
4141
0
                sizeof(mv_data_t) * ps_cluster_data->num_mvs);
4142
4143
0
            ps_cur_cluster_64->num_mvs = ps_cluster_data->num_mvs;
4144
4145
0
            ps_cur_cluster_64->ref_id = ps_cluster_data->ref_id;
4146
4147
0
            ps_cur_cluster_64->max_x = ps_cluster_data->max_x;
4148
0
            ps_cur_cluster_64->max_y = ps_cluster_data->max_y;
4149
0
            ps_cur_cluster_64->min_x = ps_cluster_data->min_x;
4150
0
            ps_cur_cluster_64->min_y = ps_cluster_data->min_y;
4151
4152
0
            ps_cur_cluster_64->s_centroid = ps_cluster_data->s_centroid;
4153
0
        }
4154
0
    }
4155
0
}
4156
4157
/**
4158
********************************************************************************
4159
*  @fn   void hme_update_32x32_clusters
4160
*               (
4161
*                   cluster_32x32_blk_t *ps_blk_32x32,
4162
*                   cluster_16x16_blk_t *ps_blk_16x16
4163
*               )
4164
*
4165
*  @brief  Updates attributes for 32x32 clusters based on the attributes of
4166
*          the constituent 16x16 clusters
4167
*
4168
*  @param[out]  ps_blk_32x32: structure containing 32x32 block results
4169
*
4170
*  @param[in]  ps_blk_16x16 : structure containing 16x16 block results
4171
*
4172
*  @return None
4173
********************************************************************************
4174
*/
4175
static __inline void
4176
    hme_update_32x32_clusters(cluster_32x32_blk_t *ps_blk_32x32, cluster_16x16_blk_t *ps_blk_16x16)
4177
0
{
4178
0
    cluster_16x16_blk_t *ps_blk_16x16_cur;
4179
0
    cluster_data_t *ps_cur_cluster;
4180
4181
0
    S32 i, j;
4182
0
    S32 num_clusters_cur_16x16_blk;
4183
4184
0
    for(i = 0; i < 4; i++)
4185
0
    {
4186
0
        S32 num_clusters_evaluated = 0;
4187
4188
0
        ps_blk_16x16_cur = &ps_blk_16x16[i];
4189
4190
0
        num_clusters_cur_16x16_blk = ps_blk_16x16_cur->num_clusters;
4191
4192
0
        ps_blk_32x32->intra_mv_area += ps_blk_16x16_cur->intra_mv_area;
4193
4194
0
        ps_blk_32x32->best_inter_cost += ps_blk_16x16_cur->best_inter_cost;
4195
4196
0
        for(j = 0; num_clusters_evaluated < num_clusters_cur_16x16_blk; j++)
4197
0
        {
4198
0
            ps_cur_cluster = &ps_blk_16x16_cur->as_cluster_data[j];
4199
4200
0
            if(!ps_cur_cluster->is_valid_cluster)
4201
0
            {
4202
0
                continue;
4203
0
            }
4204
4205
0
            hme_update_32x32_cluster_attributes(ps_blk_32x32, ps_cur_cluster);
4206
4207
0
            num_clusters_evaluated++;
4208
0
        }
4209
0
    }
4210
0
}
4211
4212
/**
4213
********************************************************************************
4214
*  @fn   void hme_update_64x64_clusters
4215
*               (
4216
*                   cluster_64x64_blk_t *ps_blk_64x64,
4217
*                   cluster_32x32_blk_t *ps_blk_32x32
4218
*               )
4219
*
4220
*  @brief  Updates attributes for 64x64 clusters based on the attributes of
4221
*          the constituent 16x16 clusters
4222
*
4223
*  @param[out]  ps_blk_64x64: structure containing 32x32 block results
4224
*
4225
*  @param[in]  ps_blk_32x32 : structure containing 16x16 block results
4226
*
4227
*  @return None
4228
********************************************************************************
4229
*/
4230
static __inline void
4231
    hme_update_64x64_clusters(cluster_64x64_blk_t *ps_blk_64x64, cluster_32x32_blk_t *ps_blk_32x32)
4232
0
{
4233
0
    cluster_32x32_blk_t *ps_blk_32x32_cur;
4234
0
    cluster_data_t *ps_cur_cluster;
4235
4236
0
    S32 i, j;
4237
0
    S32 num_clusters_cur_32x32_blk;
4238
4239
0
    for(i = 0; i < 4; i++)
4240
0
    {
4241
0
        S32 num_clusters_evaluated = 0;
4242
4243
0
        ps_blk_32x32_cur = &ps_blk_32x32[i];
4244
4245
0
        num_clusters_cur_32x32_blk = ps_blk_32x32_cur->num_clusters;
4246
4247
0
        ps_blk_64x64->intra_mv_area += ps_blk_32x32_cur->intra_mv_area;
4248
0
        ps_blk_64x64->best_inter_cost += ps_blk_32x32_cur->best_inter_cost;
4249
4250
0
        for(j = 0; num_clusters_evaluated < num_clusters_cur_32x32_blk; j++)
4251
0
        {
4252
0
            ps_cur_cluster = &ps_blk_32x32_cur->as_cluster_data[j];
4253
4254
0
            if(!ps_cur_cluster->is_valid_cluster)
4255
0
            {
4256
0
                continue;
4257
0
            }
4258
4259
0
            hme_update_64x64_cluster_attributes(ps_blk_64x64, ps_cur_cluster);
4260
4261
0
            num_clusters_evaluated++;
4262
0
        }
4263
0
    }
4264
0
}
4265
4266
/**
4267
********************************************************************************
4268
*  @fn   void hme_try_merge_clusters_blksize_gt_16
4269
*               (
4270
*                   cluster_data_t *ps_cluster_data,
4271
*                   S32 num_clusters
4272
*               )
4273
*
4274
*  @brief  Merging clusters from blocks of size 32x32 and greater
4275
*
4276
*  @param[in/out]  ps_cluster_data: structure containing cluster data
4277
*
4278
*  @param[in/out]  pi4_num_clusters : pointer to number of clusters
4279
*
4280
*  @return Success or failure
4281
********************************************************************************
4282
*/
4283
S32 hme_try_merge_clusters_blksize_gt_16(cluster_data_t *ps_cluster_data, S32 num_clusters)
4284
0
{
4285
0
    centroid_t *ps_cur_centroid;
4286
0
    cluster_data_t *ps_cur_cluster;
4287
4288
0
    S32 i, mvd;
4289
0
    S32 mvdx, mvdy, mvdx_q8, mvdy_q8;
4290
4291
0
    centroid_t *ps_centroid = &ps_cluster_data->s_centroid;
4292
4293
0
    S32 mvd_limit = ps_cluster_data->max_dist_from_centroid;
4294
0
    S32 ref_id = ps_cluster_data->ref_id;
4295
4296
0
    S32 node0_posx_q8 = ps_centroid->i4_pos_x_q8;
4297
0
    S32 node0_posy_q8 = ps_centroid->i4_pos_y_q8;
4298
0
    S32 num_clusters_evaluated = 1;
4299
0
    S32 ret_value = 0;
4300
4301
0
    if(1 >= num_clusters)
4302
0
    {
4303
0
        return ret_value;
4304
0
    }
4305
4306
0
    for(i = 1; num_clusters_evaluated < num_clusters; i++)
4307
0
    {
4308
0
        S32 cur_posx_q8;
4309
0
        S32 cur_posy_q8;
4310
4311
0
        ps_cur_cluster = &ps_cluster_data[i];
4312
4313
0
        if((ref_id != ps_cur_cluster->ref_id))
4314
0
        {
4315
0
            num_clusters_evaluated++;
4316
0
            continue;
4317
0
        }
4318
4319
0
        if((!ps_cur_cluster->is_valid_cluster))
4320
0
        {
4321
0
            continue;
4322
0
        }
4323
4324
0
        num_clusters_evaluated++;
4325
4326
0
        ps_cur_centroid = &ps_cur_cluster->s_centroid;
4327
4328
0
        cur_posx_q8 = ps_cur_centroid->i4_pos_x_q8;
4329
0
        cur_posy_q8 = ps_cur_centroid->i4_pos_y_q8;
4330
4331
0
        mvdx_q8 = cur_posx_q8 - node0_posx_q8;
4332
0
        mvdy_q8 = cur_posy_q8 - node0_posy_q8;
4333
4334
0
        mvdx = (mvdx_q8 + (1 << 7)) >> 8;
4335
0
        mvdy = (mvdy_q8 + (1 << 7)) >> 8;
4336
4337
0
        mvd = ABS(mvdx) + ABS(mvdy);
4338
4339
0
        if(mvd <= (mvd_limit >> 1))
4340
0
        {
4341
0
            LWORD64 i8_updated_posx;
4342
0
            LWORD64 i8_updated_posy;
4343
0
            WORD32 minmax_updated_x = 0;
4344
0
            WORD32 minmax_updated_y = 0;
4345
4346
0
            ps_cur_cluster->is_valid_cluster = 0;
4347
4348
0
            ps_cluster_data->area_in_pixels += ps_cur_cluster->area_in_pixels;
4349
0
            ps_cluster_data->bi_mv_pixel_area += ps_cur_cluster->bi_mv_pixel_area;
4350
0
            ps_cluster_data->uni_mv_pixel_area += ps_cur_cluster->uni_mv_pixel_area;
4351
4352
0
            memcpy(
4353
0
                &ps_cluster_data->as_mv[ps_cluster_data->num_mvs],
4354
0
                ps_cur_cluster->as_mv,
4355
0
                sizeof(mv_data_t) * ps_cur_cluster->num_mvs);
4356
4357
0
            if(mvdx > 0)
4358
0
            {
4359
0
                ps_cluster_data->min_x = (cur_posx_q8 + (1 << 7)) >> 8;
4360
0
                minmax_updated_x = 1;
4361
0
            }
4362
0
            else
4363
0
            {
4364
0
                ps_cluster_data->max_x = (cur_posx_q8 + (1 << 7)) >> 8;
4365
0
                minmax_updated_x = 2;
4366
0
            }
4367
4368
0
            if(mvdy > 0)
4369
0
            {
4370
0
                ps_cluster_data->min_y = (cur_posy_q8 + (1 << 7)) >> 8;
4371
0
                minmax_updated_y = 1;
4372
0
            }
4373
0
            else
4374
0
            {
4375
0
                ps_cluster_data->max_y = (cur_posy_q8 + (1 << 7)) >> 8;
4376
0
                minmax_updated_y = 2;
4377
0
            }
4378
4379
0
            switch((minmax_updated_y << 2) + minmax_updated_x)
4380
0
            {
4381
0
            case 1:
4382
0
            {
4383
0
                S32 mvd, mvd_q8;
4384
4385
0
                mvd_q8 = ps_cur_centroid->i4_pos_x_q8 - (ps_cluster_data->min_x << 8);
4386
0
                mvd = (mvd_q8 + (1 << 7)) >> 8;
4387
4388
0
                if(mvd > (mvd_limit))
4389
0
                {
4390
0
                    ps_cluster_data->max_dist_from_centroid = mvd;
4391
0
                }
4392
0
                break;
4393
0
            }
4394
0
            case 2:
4395
0
            {
4396
0
                S32 mvd, mvd_q8;
4397
4398
0
                mvd_q8 = (ps_cluster_data->max_x << 8) - ps_cur_centroid->i4_pos_x_q8;
4399
0
                mvd = (mvd_q8 + (1 << 7)) >> 8;
4400
4401
0
                if(mvd > (mvd_limit))
4402
0
                {
4403
0
                    ps_cluster_data->max_dist_from_centroid = mvd;
4404
0
                }
4405
0
                break;
4406
0
            }
4407
0
            case 4:
4408
0
            {
4409
0
                S32 mvd, mvd_q8;
4410
4411
0
                mvd_q8 = ps_cur_centroid->i4_pos_y_q8 - (ps_cluster_data->min_y << 8);
4412
0
                mvd = (mvd_q8 + (1 << 7)) >> 8;
4413
4414
0
                if(mvd > (mvd_limit))
4415
0
                {
4416
0
                    ps_cluster_data->max_dist_from_centroid = mvd;
4417
0
                }
4418
0
                break;
4419
0
            }
4420
0
            case 5:
4421
0
            {
4422
0
                S32 mvd;
4423
0
                S32 mvdx, mvdx_q8;
4424
0
                S32 mvdy, mvdy_q8;
4425
4426
0
                mvdy_q8 = ps_cur_centroid->i4_pos_y_q8 - (ps_cluster_data->min_y << 8);
4427
0
                mvdy = (mvdy_q8 + (1 << 7)) >> 8;
4428
4429
0
                mvdx_q8 = ps_cur_centroid->i4_pos_x_q8 - (ps_cluster_data->min_x << 8);
4430
0
                mvdx = (mvdx_q8 + (1 << 7)) >> 8;
4431
4432
0
                mvd = (mvdx > mvdy) ? mvdx : mvdy;
4433
4434
0
                if(mvd > mvd_limit)
4435
0
                {
4436
0
                    ps_cluster_data->max_dist_from_centroid = mvd;
4437
0
                }
4438
0
                break;
4439
0
            }
4440
0
            case 6:
4441
0
            {
4442
0
                S32 mvd;
4443
0
                S32 mvdx, mvdx_q8;
4444
0
                S32 mvdy, mvdy_q8;
4445
4446
0
                mvdy_q8 = ps_cur_centroid->i4_pos_y_q8 - (ps_cluster_data->min_y << 8);
4447
0
                mvdy = (mvdy_q8 + (1 << 7)) >> 8;
4448
4449
0
                mvdx_q8 = (ps_cluster_data->max_x << 8) - ps_cur_centroid->i4_pos_x_q8;
4450
0
                mvdx = (mvdx_q8 + (1 << 7)) >> 8;
4451
4452
0
                mvd = (mvdx > mvdy) ? mvdx : mvdy;
4453
4454
0
                if(mvd > mvd_limit)
4455
0
                {
4456
0
                    ps_cluster_data->max_dist_from_centroid = mvd;
4457
0
                }
4458
0
                break;
4459
0
            }
4460
0
            case 8:
4461
0
            {
4462
0
                S32 mvd, mvd_q8;
4463
4464
0
                mvd_q8 = (ps_cluster_data->max_y << 8) - ps_cur_centroid->i4_pos_y_q8;
4465
0
                mvd = (mvd_q8 + (1 << 7)) >> 8;
4466
4467
0
                if(mvd > (mvd_limit))
4468
0
                {
4469
0
                    ps_cluster_data->max_dist_from_centroid = mvd;
4470
0
                }
4471
0
                break;
4472
0
            }
4473
0
            case 9:
4474
0
            {
4475
0
                S32 mvd;
4476
0
                S32 mvdx, mvdx_q8;
4477
0
                S32 mvdy, mvdy_q8;
4478
4479
0
                mvdx_q8 = ps_cur_centroid->i4_pos_x_q8 - (ps_cluster_data->min_x << 8);
4480
0
                mvdx = (mvdx_q8 + (1 << 7)) >> 8;
4481
4482
0
                mvdy_q8 = (ps_cluster_data->max_y << 8) - ps_cur_centroid->i4_pos_y_q8;
4483
0
                mvdy = (mvdy_q8 + (1 << 7)) >> 8;
4484
4485
0
                mvd = (mvdx > mvdy) ? mvdx : mvdy;
4486
4487
0
                if(mvd > mvd_limit)
4488
0
                {
4489
0
                    ps_cluster_data->max_dist_from_centroid = mvd;
4490
0
                }
4491
0
                break;
4492
0
            }
4493
0
            case 10:
4494
0
            {
4495
0
                S32 mvd;
4496
0
                S32 mvdx, mvdx_q8;
4497
0
                S32 mvdy, mvdy_q8;
4498
4499
0
                mvdx_q8 = (ps_cluster_data->max_x << 8) - ps_cur_centroid->i4_pos_x_q8;
4500
0
                mvdx = (mvdx_q8 + (1 << 7)) >> 8;
4501
4502
0
                mvdy_q8 = (ps_cluster_data->max_y << 8) - ps_cur_centroid->i4_pos_y_q8;
4503
0
                mvdy = (mvdy_q8 + (1 << 7)) >> 8;
4504
4505
0
                mvd = (mvdx > mvdy) ? mvdx : mvdy;
4506
4507
0
                if(mvd > ps_cluster_data->max_dist_from_centroid)
4508
0
                {
4509
0
                    ps_cluster_data->max_dist_from_centroid = mvd;
4510
0
                }
4511
0
                break;
4512
0
            }
4513
0
            default:
4514
0
            {
4515
0
                break;
4516
0
            }
4517
0
            }
4518
4519
0
            i8_updated_posx = ((LWORD64)ps_centroid->i4_pos_x_q8 * ps_cluster_data->num_mvs) +
4520
0
                              ((LWORD64)cur_posx_q8 * ps_cur_cluster->num_mvs);
4521
0
            i8_updated_posy = ((LWORD64)ps_centroid->i4_pos_y_q8 * ps_cluster_data->num_mvs) +
4522
0
                              ((LWORD64)cur_posy_q8 * ps_cur_cluster->num_mvs);
4523
4524
0
            ps_cluster_data->num_mvs += ps_cur_cluster->num_mvs;
4525
4526
0
            ps_centroid->i4_pos_x_q8 = (WORD32)(i8_updated_posx / ps_cluster_data->num_mvs);
4527
0
            ps_centroid->i4_pos_y_q8 = (WORD32)(i8_updated_posy / ps_cluster_data->num_mvs);
4528
4529
0
            if(MAX_NUM_CLUSTERS_IN_VALID_64x64_BLK >= num_clusters)
4530
0
            {
4531
0
                num_clusters--;
4532
0
                num_clusters_evaluated = 1;
4533
0
                i = 0;
4534
0
                ret_value++;
4535
0
            }
4536
0
            else
4537
0
            {
4538
0
                ret_value++;
4539
4540
0
                return ret_value;
4541
0
            }
4542
0
        }
4543
0
    }
4544
4545
0
    if(ret_value)
4546
0
    {
4547
0
        for(i = 1; i < (num_clusters + ret_value); i++)
4548
0
        {
4549
0
            if(ps_cluster_data[i].is_valid_cluster)
4550
0
            {
4551
0
                break;
4552
0
            }
4553
0
        }
4554
0
        if(i == (num_clusters + ret_value))
4555
0
        {
4556
0
            return ret_value;
4557
0
        }
4558
0
    }
4559
0
    else
4560
0
    {
4561
0
        i = 1;
4562
0
    }
4563
4564
0
    return (hme_try_merge_clusters_blksize_gt_16(&ps_cluster_data[i], num_clusters - 1)) +
4565
0
           ret_value;
4566
0
}
4567
4568
/**
4569
********************************************************************************
4570
*  @fn   S32 hme_determine_validity_32x32
4571
*               (
4572
*                   ctb_cluster_info_t *ps_ctb_cluster_info
4573
*               )
4574
*
4575
*  @brief  Determines whther current 32x32 block needs to be evaluated in enc_loop
4576
*           while recursing through the CU tree or not
4577
*
4578
*  @param[in]  ps_cluster_data: structure containing cluster data
4579
*
4580
*  @return Success or failure
4581
********************************************************************************
4582
*/
4583
__inline S32 hme_determine_validity_32x32(
4584
    ctb_cluster_info_t *ps_ctb_cluster_info,
4585
    S32 *pi4_children_nodes_required,
4586
    S32 blk_validity_wrt_pic_bndry,
4587
    S32 parent_blk_validity_wrt_pic_bndry)
4588
0
{
4589
0
    cluster_data_t *ps_data;
4590
0
4591
0
    cluster_32x32_blk_t *ps_32x32_blk = ps_ctb_cluster_info->ps_32x32_blk;
4592
0
    cluster_64x64_blk_t *ps_64x64_blk = ps_ctb_cluster_info->ps_64x64_blk;
4593
0
4594
0
    S32 num_clusters = ps_32x32_blk->num_clusters;
4595
0
    S32 num_clusters_parent = ps_64x64_blk->num_clusters;
4596
0
4597
0
    if(!blk_validity_wrt_pic_bndry)
4598
0
    {
4599
0
        *pi4_children_nodes_required = 1;
4600
0
        return 0;
4601
0
    }
4602
0
4603
0
    if(!parent_blk_validity_wrt_pic_bndry)
4604
0
    {
4605
0
        *pi4_children_nodes_required = 1;
4606
0
        return 1;
4607
0
    }
4608
0
4609
0
    if(num_clusters > MAX_NUM_CLUSTERS_IN_VALID_32x32_BLK)
4610
0
    {
4611
0
        *pi4_children_nodes_required = 1;
4612
0
        return 0;
4613
0
    }
4614
0
4615
0
    if(num_clusters_parent > MAX_NUM_CLUSTERS_IN_VALID_64x64_BLK)
4616
0
    {
4617
0
        *pi4_children_nodes_required = 1;
4618
0
4619
0
        return 1;
4620
0
    }
4621
0
    else if(num_clusters_parent < MAX_NUM_CLUSTERS_IN_VALID_64x64_BLK)
4622
0
    {
4623
0
        *pi4_children_nodes_required = 0;
4624
0
4625
0
        return 1;
4626
0
    }
4627
0
    else
4628
0
    {
4629
0
        if(num_clusters < MAX_NUM_CLUSTERS_IN_VALID_32x32_BLK)
4630
0
        {
4631
0
            *pi4_children_nodes_required = 0;
4632
0
            return 1;
4633
0
        }
4634
0
        else
4635
0
        {
4636
0
            S32 i;
4637
0
4638
0
            S32 area_of_parent = gai4_partition_area[PART_ID_2Nx2N] << 4;
4639
0
            S32 min_area = MAX_32BIT_VAL;
4640
0
            S32 num_clusters_evaluated = 0;
4641
0
4642
0
            for(i = 0; num_clusters_evaluated < num_clusters; i++)
4643
0
            {
4644
0
                ps_data = &ps_32x32_blk->as_cluster_data[i];
4645
0
4646
0
                if(!ps_data->is_valid_cluster)
4647
0
                {
4648
0
                    continue;
4649
0
                }
4650
0
4651
0
                num_clusters_evaluated++;
4652
0
4653
0
                if(ps_data->area_in_pixels < min_area)
4654
0
                {
4655
0
                    min_area = ps_data->area_in_pixels;
4656
0
                }
4657
0
            }
4658
0
4659
0
            if((min_area << 4) < area_of_parent)
4660
0
            {
4661
0
                *pi4_children_nodes_required = 1;
4662
0
                return 0;
4663
0
            }
4664
0
            else
4665
0
            {
4666
0
                *pi4_children_nodes_required = 0;
4667
0
                return 1;
4668
0
            }
4669
0
        }
4670
0
    }
4671
0
}
4672
4673
/**
4674
********************************************************************************
4675
*  @fn   S32 hme_determine_validity_16x16
4676
*               (
4677
*                   ctb_cluster_info_t *ps_ctb_cluster_info
4678
*               )
4679
*
4680
*  @brief  Determines whther current 16x16 block needs to be evaluated in enc_loop
4681
*           while recursing through the CU tree or not
4682
*
4683
*  @param[in]  ps_cluster_data: structure containing cluster data
4684
*
4685
*  @return Success or failure
4686
********************************************************************************
4687
*/
4688
__inline S32 hme_determine_validity_16x16(
4689
    ctb_cluster_info_t *ps_ctb_cluster_info,
4690
    S32 *pi4_children_nodes_required,
4691
    S32 blk_validity_wrt_pic_bndry,
4692
    S32 parent_blk_validity_wrt_pic_bndry)
4693
0
{
4694
0
    cluster_data_t *ps_data;
4695
0
4696
0
    cluster_16x16_blk_t *ps_16x16_blk = ps_ctb_cluster_info->ps_16x16_blk;
4697
0
    cluster_32x32_blk_t *ps_32x32_blk = ps_ctb_cluster_info->ps_32x32_blk;
4698
0
    cluster_64x64_blk_t *ps_64x64_blk = ps_ctb_cluster_info->ps_64x64_blk;
4699
0
4700
0
    S32 num_clusters = ps_16x16_blk->num_clusters;
4701
0
    S32 num_clusters_parent = ps_32x32_blk->num_clusters;
4702
0
    S32 num_clusters_grandparent = ps_64x64_blk->num_clusters;
4703
0
4704
0
    if(!blk_validity_wrt_pic_bndry)
4705
0
    {
4706
0
        *pi4_children_nodes_required = 1;
4707
0
        return 0;
4708
0
    }
4709
0
4710
0
    if(!parent_blk_validity_wrt_pic_bndry)
4711
0
    {
4712
0
        *pi4_children_nodes_required = 1;
4713
0
        return 1;
4714
0
    }
4715
0
4716
0
    if((num_clusters_parent > MAX_NUM_CLUSTERS_IN_VALID_32x32_BLK) &&
4717
0
       (num_clusters_grandparent > MAX_NUM_CLUSTERS_IN_VALID_64x64_BLK))
4718
0
    {
4719
0
        *pi4_children_nodes_required = 1;
4720
0
        return 1;
4721
0
    }
4722
0
4723
0
    /* Implies nc_64 <= 3 when num_clusters_parent > 3 & */
4724
0
    /* implies nc_64 > 3 when num_clusters_parent < 3 & */
4725
0
    if(num_clusters_parent != MAX_NUM_CLUSTERS_IN_VALID_32x32_BLK)
4726
0
    {
4727
0
        if(num_clusters <= MAX_NUM_CLUSTERS_IN_VALID_16x16_BLK)
4728
0
        {
4729
0
            *pi4_children_nodes_required = 0;
4730
0
4731
0
            return 1;
4732
0
        }
4733
0
        else
4734
0
        {
4735
0
            *pi4_children_nodes_required = 1;
4736
0
4737
0
            return 0;
4738
0
        }
4739
0
    }
4740
0
    /* Implies nc_64 >= 3 */
4741
0
    else
4742
0
    {
4743
0
        if(num_clusters < MAX_NUM_CLUSTERS_IN_VALID_16x16_BLK)
4744
0
        {
4745
0
            *pi4_children_nodes_required = 0;
4746
0
            return 1;
4747
0
        }
4748
0
        else if(num_clusters > MAX_NUM_CLUSTERS_IN_VALID_16x16_BLK)
4749
0
        {
4750
0
            *pi4_children_nodes_required = 1;
4751
0
            return 0;
4752
0
        }
4753
0
        else
4754
0
        {
4755
0
            S32 i;
4756
0
4757
0
            S32 area_of_parent = gai4_partition_area[PART_ID_2Nx2N] << 2;
4758
0
            S32 min_area = MAX_32BIT_VAL;
4759
0
            S32 num_clusters_evaluated = 0;
4760
0
4761
0
            for(i = 0; num_clusters_evaluated < num_clusters; i++)
4762
0
            {
4763
0
                ps_data = &ps_16x16_blk->as_cluster_data[i];
4764
0
4765
0
                if(!ps_data->is_valid_cluster)
4766
0
                {
4767
0
                    continue;
4768
0
                }
4769
0
4770
0
                num_clusters_evaluated++;
4771
0
4772
0
                if(ps_data->area_in_pixels < min_area)
4773
0
                {
4774
0
                    min_area = ps_data->area_in_pixels;
4775
0
                }
4776
0
            }
4777
0
4778
0
            if((min_area << 4) < area_of_parent)
4779
0
            {
4780
0
                *pi4_children_nodes_required = 1;
4781
0
                return 0;
4782
0
            }
4783
0
            else
4784
0
            {
4785
0
                *pi4_children_nodes_required = 0;
4786
0
                return 1;
4787
0
            }
4788
0
        }
4789
0
    }
4790
0
}
4791
4792
/**
4793
********************************************************************************
4794
*  @fn   void hme_build_cu_tree
4795
*               (
4796
*                   ctb_cluster_info_t *ps_ctb_cluster_info,
4797
*                   cur_ctb_cu_tree_t *ps_cu_tree,
4798
*                   S32 tree_depth,
4799
*                   CU_POS_T e_grand_parent_blk_pos,
4800
*                   CU_POS_T e_parent_blk_pos,
4801
*                   CU_POS_T e_cur_blk_pos
4802
*               )
4803
*
4804
*  @brief  Recursive function for CU tree initialisation
4805
*
4806
*  @param[in]  ps_ctb_cluster_info: structure containing pointers to clusters
4807
*                                   corresponding to all block sizes from 64x64
4808
*                                   to 16x16
4809
*
4810
*  @param[in]  e_parent_blk_pos: position of parent block wrt its parent, if
4811
*                                applicable
4812
*
4813
*  @param[in]  e_cur_blk_pos: position of current block wrt parent
4814
*
4815
*  @param[out]  ps_cu_tree : represents CU tree used in CU recursion
4816
*
4817
*  @param[in]  tree_depth : specifies depth of the CU tree
4818
*
4819
*  @return Nothing
4820
********************************************************************************
4821
*/
4822
void hme_build_cu_tree(
4823
    ctb_cluster_info_t *ps_ctb_cluster_info,
4824
    cur_ctb_cu_tree_t *ps_cu_tree,
4825
    S32 tree_depth,
4826
    CU_POS_T e_grandparent_blk_pos,
4827
    CU_POS_T e_parent_blk_pos,
4828
    CU_POS_T e_cur_blk_pos)
4829
0
{
4830
0
    ihevce_cu_tree_init(
4831
0
        ps_cu_tree,
4832
0
        ps_ctb_cluster_info->ps_cu_tree_root,
4833
0
        &ps_ctb_cluster_info->nodes_created_in_cu_tree,
4834
0
        tree_depth,
4835
0
        e_grandparent_blk_pos,
4836
0
        e_parent_blk_pos,
4837
0
        e_cur_blk_pos);
4838
0
}
4839
4840
/**
4841
********************************************************************************
4842
*  @fn   S32 hme_sdi_based_cluster_spread_eligibility
4843
*               (
4844
*                   cluster_32x32_blk_t *ps_blk_32x32
4845
*               )
4846
*
4847
*  @brief  Determines whether the spread of high SDI MV's around each cluster
4848
*          center is below a pre-determined threshold
4849
*
4850
*  @param[in]  ps_blk_32x32: structure containing pointers to clusters
4851
*                                   corresponding to all block sizes from 64x64
4852
*                                   to 16x16
4853
*
4854
*  @return 1 if the spread is constrained, else 0
4855
********************************************************************************
4856
*/
4857
__inline S32
4858
    hme_sdi_based_cluster_spread_eligibility(cluster_32x32_blk_t *ps_blk_32x32, S32 sdi_threshold)
4859
0
{
4860
0
    S32 cumulative_mv_distance;
4861
0
    S32 i, j;
4862
0
    S32 num_high_sdi_mvs;
4863
0
4864
0
    S32 num_clusters = ps_blk_32x32->num_clusters;
4865
0
4866
0
    for(i = 0; i < num_clusters; i++)
4867
0
    {
4868
0
        cluster_data_t *ps_data = &ps_blk_32x32->as_cluster_data[i];
4869
0
4870
0
        num_high_sdi_mvs = 0;
4871
0
        cumulative_mv_distance = 0;
4872
0
4873
0
        for(j = 0; j < ps_data->num_mvs; j++)
4874
0
        {
4875
0
            mv_data_t *ps_mv = &ps_data->as_mv[j];
4876
0
4877
0
            if(ps_mv->sdi >= sdi_threshold)
4878
0
            {
4879
0
                num_high_sdi_mvs++;
4880
0
4881
0
                COMPUTE_MVD(ps_mv, ps_data, cumulative_mv_distance);
4882
0
            }
4883
0
        }
4884
0
4885
0
        if(cumulative_mv_distance > ((ps_data->max_dist_from_centroid >> 1) * num_high_sdi_mvs))
4886
0
        {
4887
0
            return 0;
4888
0
        }
4889
0
    }
4890
0
4891
0
    return 1;
4892
0
}
4893
4894
/**
4895
********************************************************************************
4896
*  @fn   S32 hme_populate_cu_tree
4897
*               (
4898
*                   ctb_cluster_info_t *ps_ctb_cluster_info,
4899
*                   ipe_l0_ctb_analyse_for_me_t *ps_cur_ipe_ctb,
4900
*                   cur_ctb_cu_tree_t *ps_cu_tree,
4901
*                   S32 tree_depth,
4902
*                   CU_POS_T e_parent_blk_pos,
4903
*                   CU_POS_T e_cur_blk_pos
4904
*               )
4905
*
4906
*  @brief  Recursive function for CU tree population based on output of
4907
*          clustering algorithm
4908
*
4909
*  @param[in]  ps_ctb_cluster_info: structure containing pointers to clusters
4910
*                                   corresponding to all block sizes from 64x64
4911
*                                   to 16x16
4912
*
4913
*  @param[in]  e_parent_blk_pos: position of parent block wrt its parent, if
4914
applicable
4915
*
4916
*  @param[in]  e_cur_blk_pos: position of current block wrt parent
4917
*
4918
*  @param[in]  ps_cur_ipe_ctb : output container for ipe analyses
4919
*
4920
*  @param[out]  ps_cu_tree : represents CU tree used in CU recursion
4921
*
4922
*  @param[in]  tree_depth : specifies depth of the CU tree
4923
*
4924
*  @param[in]  ipe_decision_precedence : specifies whether precedence should
4925
*               be given to decisions made either by IPE(1) or clustering algos.
4926
*
4927
*  @return 1 if re-evaluation of parent node's validity is not required,
4928
else 0
4929
********************************************************************************
4930
*/
4931
void hme_populate_cu_tree(
4932
    ctb_cluster_info_t *ps_ctb_cluster_info,
4933
    cur_ctb_cu_tree_t *ps_cu_tree,
4934
    S32 tree_depth,
4935
    ME_QUALITY_PRESETS_T e_quality_preset,
4936
    CU_POS_T e_grandparent_blk_pos,
4937
    CU_POS_T e_parent_blk_pos,
4938
    CU_POS_T e_cur_blk_pos)
4939
0
{
4940
0
    S32 area_of_cur_blk;
4941
0
    S32 area_limit_for_me_decision_precedence;
4942
0
    S32 children_nodes_required;
4943
0
    S32 intra_mv_area;
4944
0
    S32 intra_eval_enable;
4945
0
    S32 inter_eval_enable;
4946
0
    S32 ipe_decision_precedence;
4947
0
    S32 node_validity;
4948
0
    S32 num_clusters;
4949
4950
0
    ipe_l0_ctb_analyse_for_me_t *ps_cur_ipe_ctb = ps_ctb_cluster_info->ps_cur_ipe_ctb;
4951
4952
0
    if(NULL == ps_cu_tree)
4953
0
    {
4954
0
        return;
4955
0
    }
4956
4957
0
    switch(tree_depth)
4958
0
    {
4959
0
    case 0:
4960
0
    {
4961
        /* 64x64 block */
4962
0
        S32 blk_32x32_mask = ps_ctb_cluster_info->blk_32x32_mask;
4963
4964
0
        cluster_64x64_blk_t *ps_blk_64x64 = ps_ctb_cluster_info->ps_64x64_blk;
4965
4966
0
        area_of_cur_blk = gai4_partition_area[PART_ID_2Nx2N] << 4;
4967
0
        area_limit_for_me_decision_precedence = (area_of_cur_blk * MAX_INTRA_PERCENTAGE) / 100;
4968
0
        children_nodes_required = 0;
4969
0
        intra_mv_area = ps_blk_64x64->intra_mv_area;
4970
4971
0
        ipe_decision_precedence = (intra_mv_area >= area_limit_for_me_decision_precedence);
4972
4973
0
        intra_eval_enable = ipe_decision_precedence;
4974
0
        inter_eval_enable = !!ps_blk_64x64->num_clusters;
4975
4976
0
#if 1  //!PROCESS_GT_1CTB_VIA_CU_RECUR_IN_FAST_PRESETS
4977
0
        if(e_quality_preset >= ME_HIGH_QUALITY)
4978
0
        {
4979
0
            inter_eval_enable = 1;
4980
0
            node_validity = (blk_32x32_mask == 0xf);
4981
0
#if !USE_CLUSTER_DATA_AS_BLK_MERGE_CANDTS
4982
0
            ps_cu_tree->u1_inter_eval_enable = !(intra_mv_area == area_of_cur_blk);
4983
0
#endif
4984
0
            break;
4985
0
        }
4986
0
#endif
4987
4988
0
#if ENABLE_4CTB_EVALUATION
4989
0
        node_validity = (blk_32x32_mask == 0xf);
4990
4991
0
        break;
4992
#else
4993
        {
4994
            S32 i;
4995
4996
            num_clusters = ps_blk_64x64->num_clusters;
4997
4998
            node_validity = (ipe_decision_precedence)
4999
                                ? (!ps_cur_ipe_ctb->u1_split_flag)
5000
                                : (num_clusters <= MAX_NUM_CLUSTERS_IN_VALID_64x64_BLK);
5001
5002
            for(i = 0; i < MAX_NUM_REF; i++)
5003
            {
5004
                node_validity = node_validity && (ps_blk_64x64->au1_num_clusters[i] <=
5005
                                                  MAX_NUM_CLUSTERS_IN_ONE_REF_IDX);
5006
            }
5007
5008
            node_validity = node_validity && (blk_32x32_mask == 0xf);
5009
        }
5010
        break;
5011
#endif
5012
0
    }
5013
0
    case 1:
5014
0
    {
5015
        /* 32x32 block */
5016
0
        S32 is_percent_intra_area_gt_threshold;
5017
5018
0
        cluster_32x32_blk_t *ps_blk_32x32 = &ps_ctb_cluster_info->ps_32x32_blk[e_cur_blk_pos];
5019
5020
0
        S32 blk_32x32_mask = ps_ctb_cluster_info->blk_32x32_mask;
5021
5022
#if !ENABLE_4CTB_EVALUATION
5023
        S32 best_inter_cost = ps_blk_32x32->best_inter_cost;
5024
        S32 best_intra_cost =
5025
            ((ps_ctb_cluster_info->ps_cur_ipe_ctb->ai4_best32x32_intra_cost[e_cur_blk_pos] +
5026
              ps_ctb_cluster_info->i4_frame_qstep * ps_ctb_cluster_info->i4_frame_qstep_multiplier *
5027
                  4) < 0)
5028
                ? MAX_32BIT_VAL
5029
                : (ps_ctb_cluster_info->ps_cur_ipe_ctb->ai4_best32x32_intra_cost[e_cur_blk_pos] +
5030
                   ps_ctb_cluster_info->i4_frame_qstep *
5031
                       ps_ctb_cluster_info->i4_frame_qstep_multiplier * 4);
5032
        S32 best_cost = (best_inter_cost > best_intra_cost) ? best_intra_cost : best_inter_cost;
5033
        S32 cost_differential = (best_inter_cost - best_cost);
5034
#endif
5035
5036
0
        area_of_cur_blk = gai4_partition_area[PART_ID_2Nx2N] << 2;
5037
0
        area_limit_for_me_decision_precedence = (area_of_cur_blk * MAX_INTRA_PERCENTAGE) / 100;
5038
0
        intra_mv_area = ps_blk_32x32->intra_mv_area;
5039
0
        is_percent_intra_area_gt_threshold =
5040
0
            (intra_mv_area > area_limit_for_me_decision_precedence);
5041
0
        ipe_decision_precedence = (intra_mv_area >= area_limit_for_me_decision_precedence);
5042
5043
0
        intra_eval_enable = ipe_decision_precedence;
5044
0
        inter_eval_enable = !!ps_blk_32x32->num_clusters;
5045
0
        children_nodes_required = 1;
5046
5047
0
#if 1  //!PROCESS_GT_1CTB_VIA_CU_RECUR_IN_FAST_PRESETS
5048
0
        if(e_quality_preset >= ME_HIGH_QUALITY)
5049
0
        {
5050
0
            inter_eval_enable = 1;
5051
0
            node_validity = (((blk_32x32_mask) & (1 << e_cur_blk_pos)) || 0);
5052
0
#if !USE_CLUSTER_DATA_AS_BLK_MERGE_CANDTS
5053
0
            ps_cu_tree->u1_inter_eval_enable = !(intra_mv_area == area_of_cur_blk);
5054
0
#endif
5055
0
            break;
5056
0
        }
5057
0
#endif
5058
5059
0
#if ENABLE_4CTB_EVALUATION
5060
0
        node_validity = (((blk_32x32_mask) & (1 << e_cur_blk_pos)) || 0);
5061
5062
0
        break;
5063
#else
5064
        {
5065
            S32 i;
5066
            num_clusters = ps_blk_32x32->num_clusters;
5067
5068
            if(ipe_decision_precedence)
5069
            {
5070
                node_validity = (ps_cur_ipe_ctb->as_intra32_analyse[e_cur_blk_pos].b1_merge_flag);
5071
                node_validity = node_validity && (((blk_32x32_mask) & (1 << e_cur_blk_pos)) || 0);
5072
            }
5073
            else
5074
            {
5075
                node_validity =
5076
                    ((ALL_INTER_COST_DIFF_THR * best_cost) >= (100 * cost_differential)) &&
5077
                    (num_clusters <= MAX_NUM_CLUSTERS_IN_VALID_32x32_BLK) &&
5078
                    (((blk_32x32_mask) & (1 << e_cur_blk_pos)) || 0);
5079
5080
                for(i = 0; (i < MAX_NUM_REF) && (node_validity); i++)
5081
                {
5082
                    node_validity = node_validity && (ps_blk_32x32->au1_num_clusters[i] <=
5083
                                                      MAX_NUM_CLUSTERS_IN_ONE_REF_IDX);
5084
                }
5085
5086
                if(node_validity)
5087
                {
5088
                    node_validity = node_validity &&
5089
                                    hme_sdi_based_cluster_spread_eligibility(
5090
                                        ps_blk_32x32, ps_ctb_cluster_info->sdi_threshold);
5091
                }
5092
            }
5093
        }
5094
5095
        break;
5096
#endif
5097
0
    }
5098
0
    case 2:
5099
0
    {
5100
0
        cluster_16x16_blk_t *ps_blk_16x16 =
5101
0
            &ps_ctb_cluster_info->ps_16x16_blk[e_cur_blk_pos + (e_parent_blk_pos << 2)];
5102
5103
0
        S32 blk_8x8_mask =
5104
0
            ps_ctb_cluster_info->pi4_blk_8x8_mask[(S32)(e_parent_blk_pos << 2) + e_cur_blk_pos];
5105
5106
0
        area_of_cur_blk = gai4_partition_area[PART_ID_2Nx2N];
5107
0
        area_limit_for_me_decision_precedence = (area_of_cur_blk * MAX_INTRA_PERCENTAGE) / 100;
5108
0
        children_nodes_required = 1;
5109
0
        intra_mv_area = ps_blk_16x16->intra_mv_area;
5110
0
        ipe_decision_precedence = (intra_mv_area >= area_limit_for_me_decision_precedence);
5111
0
        num_clusters = ps_blk_16x16->num_clusters;
5112
5113
0
        intra_eval_enable = ipe_decision_precedence;
5114
0
        inter_eval_enable = 1;
5115
5116
0
#if 1  //!PROCESS_GT_1CTB_VIA_CU_RECUR_IN_FAST_PRESETS
5117
0
        if(e_quality_preset >= ME_HIGH_QUALITY)
5118
0
        {
5119
0
            node_validity =
5120
0
                !ps_ctb_cluster_info
5121
0
                     ->au1_is_16x16_blk_split[(S32)(e_parent_blk_pos << 2) + e_cur_blk_pos];
5122
0
            children_nodes_required = !node_validity;
5123
0
            break;
5124
0
        }
5125
0
#endif
5126
5127
0
#if ENABLE_4CTB_EVALUATION
5128
0
        node_validity = (blk_8x8_mask == 0xf);
5129
5130
0
#if ENABLE_CU_TREE_CULLING
5131
0
        {
5132
0
            cur_ctb_cu_tree_t *ps_32x32_root = NULL;
5133
5134
0
            switch(e_parent_blk_pos)
5135
0
            {
5136
0
            case POS_TL:
5137
0
            {
5138
0
                ps_32x32_root = ps_ctb_cluster_info->ps_cu_tree_root->ps_child_node_tl;
5139
5140
0
                break;
5141
0
            }
5142
0
            case POS_TR:
5143
0
            {
5144
0
                ps_32x32_root = ps_ctb_cluster_info->ps_cu_tree_root->ps_child_node_tr;
5145
5146
0
                break;
5147
0
            }
5148
0
            case POS_BL:
5149
0
            {
5150
0
                ps_32x32_root = ps_ctb_cluster_info->ps_cu_tree_root->ps_child_node_bl;
5151
5152
0
                break;
5153
0
            }
5154
0
            case POS_BR:
5155
0
            {
5156
0
                ps_32x32_root = ps_ctb_cluster_info->ps_cu_tree_root->ps_child_node_br;
5157
5158
0
                break;
5159
0
            }
5160
0
            default:
5161
0
            {
5162
0
                DBG_PRINTF("Invalid block position %d\n", e_parent_blk_pos);
5163
0
                break;
5164
0
            }
5165
0
            }
5166
5167
0
            if(ps_32x32_root->is_node_valid)
5168
0
            {
5169
0
                node_validity =
5170
0
                    node_validity &&
5171
0
                    !ps_ctb_cluster_info
5172
0
                         ->au1_is_16x16_blk_split[(S32)(e_parent_blk_pos << 2) + e_cur_blk_pos];
5173
0
                children_nodes_required = !node_validity;
5174
0
            }
5175
0
        }
5176
0
#endif
5177
5178
0
        break;
5179
#else
5180
5181
        if(ipe_decision_precedence)
5182
        {
5183
            S32 merge_flag_16 = (ps_cur_ipe_ctb->as_intra32_analyse[e_parent_blk_pos]
5184
                                     .as_intra16_analyse[e_cur_blk_pos]
5185
                                     .b1_merge_flag);
5186
            S32 valid_flag = (blk_8x8_mask == 0xf);
5187
5188
            node_validity = merge_flag_16 && valid_flag;
5189
        }
5190
        else
5191
        {
5192
            node_validity = (blk_8x8_mask == 0xf);
5193
        }
5194
5195
        break;
5196
#endif
5197
0
    }
5198
0
    case 3:
5199
0
    {
5200
0
        S32 blk_8x8_mask =
5201
0
            ps_ctb_cluster_info
5202
0
                ->pi4_blk_8x8_mask[(S32)(e_grandparent_blk_pos << 2) + e_parent_blk_pos];
5203
0
        S32 merge_flag_16 = (ps_cur_ipe_ctb->as_intra32_analyse[e_grandparent_blk_pos]
5204
0
                                 .as_intra16_analyse[e_parent_blk_pos]
5205
0
                                 .b1_merge_flag);
5206
0
        S32 merge_flag_32 =
5207
0
            (ps_cur_ipe_ctb->as_intra32_analyse[e_grandparent_blk_pos].b1_merge_flag);
5208
5209
0
        intra_eval_enable = !merge_flag_16 || !merge_flag_32;
5210
0
        inter_eval_enable = 1;
5211
0
        children_nodes_required = 0;
5212
5213
0
#if 1  //!PROCESS_GT_1CTB_VIA_CU_RECUR_IN_FAST_PRESETS
5214
0
        if(e_quality_preset >= ME_HIGH_QUALITY)
5215
0
        {
5216
0
            node_validity = ((blk_8x8_mask & (1 << e_cur_blk_pos)) || 0);
5217
0
            break;
5218
0
        }
5219
0
#endif
5220
5221
0
#if ENABLE_4CTB_EVALUATION
5222
0
        node_validity = ((blk_8x8_mask & (1 << e_cur_blk_pos)) || 0);
5223
5224
0
        break;
5225
#else
5226
        {
5227
            cur_ctb_cu_tree_t *ps_32x32_root;
5228
            cur_ctb_cu_tree_t *ps_16x16_root;
5229
            cluster_32x32_blk_t *ps_32x32_blk;
5230
5231
            switch(e_grandparent_blk_pos)
5232
            {
5233
            case POS_TL:
5234
            {
5235
                ps_32x32_root = ps_ctb_cluster_info->ps_cu_tree_root->ps_child_node_tl;
5236
5237
                break;
5238
            }
5239
            case POS_TR:
5240
            {
5241
                ps_32x32_root = ps_ctb_cluster_info->ps_cu_tree_root->ps_child_node_tr;
5242
5243
                break;
5244
            }
5245
            case POS_BL:
5246
            {
5247
                ps_32x32_root = ps_ctb_cluster_info->ps_cu_tree_root->ps_child_node_bl;
5248
5249
                break;
5250
            }
5251
            case POS_BR:
5252
            {
5253
                ps_32x32_root = ps_ctb_cluster_info->ps_cu_tree_root->ps_child_node_br;
5254
5255
                break;
5256
            }
5257
            default:
5258
            {
5259
                DBG_PRINTF("Invalid block position %d\n", e_grandparent_blk_pos);
5260
                break;
5261
            }
5262
            }
5263
5264
            switch(e_parent_blk_pos)
5265
            {
5266
            case POS_TL:
5267
            {
5268
                ps_16x16_root = ps_32x32_root->ps_child_node_tl;
5269
5270
                break;
5271
            }
5272
            case POS_TR:
5273
            {
5274
                ps_16x16_root = ps_32x32_root->ps_child_node_tr;
5275
5276
                break;
5277
            }
5278
            case POS_BL:
5279
            {
5280
                ps_16x16_root = ps_32x32_root->ps_child_node_bl;
5281
5282
                break;
5283
            }
5284
            case POS_BR:
5285
            {
5286
                ps_16x16_root = ps_32x32_root->ps_child_node_br;
5287
5288
                break;
5289
            }
5290
            default:
5291
            {
5292
                DBG_PRINTF("Invalid block position %d\n", e_parent_blk_pos);
5293
                break;
5294
            }
5295
            }
5296
5297
            ps_32x32_blk = &ps_ctb_cluster_info->ps_32x32_blk[e_grandparent_blk_pos];
5298
5299
            node_validity = ((blk_8x8_mask & (1 << e_cur_blk_pos)) || 0) &&
5300
                            ((!ps_32x32_root->is_node_valid) ||
5301
                             (ps_32x32_blk->num_clusters_with_weak_sdi_density > 0) ||
5302
                             (!ps_16x16_root->is_node_valid));
5303
5304
            break;
5305
        }
5306
#endif
5307
0
    }
5308
0
    }
5309
5310
    /* Fill the current cu_tree node */
5311
0
    ps_cu_tree->is_node_valid = node_validity;
5312
0
    ps_cu_tree->u1_intra_eval_enable = intra_eval_enable;
5313
0
    ps_cu_tree->u1_inter_eval_enable = inter_eval_enable;
5314
5315
0
    if(children_nodes_required)
5316
0
    {
5317
0
        tree_depth++;
5318
5319
0
        hme_populate_cu_tree(
5320
0
            ps_ctb_cluster_info,
5321
0
            ps_cu_tree->ps_child_node_tl,
5322
0
            tree_depth,
5323
0
            e_quality_preset,
5324
0
            e_parent_blk_pos,
5325
0
            e_cur_blk_pos,
5326
0
            POS_TL);
5327
5328
0
        hme_populate_cu_tree(
5329
0
            ps_ctb_cluster_info,
5330
0
            ps_cu_tree->ps_child_node_tr,
5331
0
            tree_depth,
5332
0
            e_quality_preset,
5333
0
            e_parent_blk_pos,
5334
0
            e_cur_blk_pos,
5335
0
            POS_TR);
5336
5337
0
        hme_populate_cu_tree(
5338
0
            ps_ctb_cluster_info,
5339
0
            ps_cu_tree->ps_child_node_bl,
5340
0
            tree_depth,
5341
0
            e_quality_preset,
5342
0
            e_parent_blk_pos,
5343
0
            e_cur_blk_pos,
5344
0
            POS_BL);
5345
5346
0
        hme_populate_cu_tree(
5347
0
            ps_ctb_cluster_info,
5348
0
            ps_cu_tree->ps_child_node_br,
5349
0
            tree_depth,
5350
0
            e_quality_preset,
5351
0
            e_parent_blk_pos,
5352
0
            e_cur_blk_pos,
5353
0
            POS_BR);
5354
0
    }
5355
0
}
5356
5357
/**
5358
********************************************************************************
5359
*  @fn   void hme_analyse_mv_clustering
5360
*               (
5361
*                   search_results_t *ps_search_results,
5362
*                   ipe_l0_ctb_analyse_for_me_t *ps_cur_ipe_ctb,
5363
*                   cur_ctb_cu_tree_t *ps_cu_tree
5364
*               )
5365
*
5366
*  @brief  Implementation for the clustering algorithm
5367
*
5368
*  @param[in]  ps_search_results: structure containing 16x16 block results
5369
*
5370
*  @param[in]  ps_cur_ipe_ctb : output container for ipe analyses
5371
*
5372
*  @param[out]  ps_cu_tree : represents CU tree used in CU recursion
5373
*
5374
*  @return None
5375
********************************************************************************
5376
*/
5377
void hme_analyse_mv_clustering(
5378
    search_results_t *ps_search_results,
5379
    inter_cu_results_t *ps_16x16_cu_results,
5380
    inter_cu_results_t *ps_8x8_cu_results,
5381
    ctb_cluster_info_t *ps_ctb_cluster_info,
5382
    S08 *pi1_future_list,
5383
    S08 *pi1_past_list,
5384
    S32 bidir_enabled,
5385
    ME_QUALITY_PRESETS_T e_quality_preset)
5386
0
{
5387
0
    cluster_16x16_blk_t *ps_blk_16x16;
5388
0
    cluster_32x32_blk_t *ps_blk_32x32;
5389
0
    cluster_64x64_blk_t *ps_blk_64x64;
5390
5391
0
    part_type_results_t *ps_best_result;
5392
0
    pu_result_t *aps_part_result[MAX_NUM_PARTS];
5393
0
    pu_result_t *aps_inferior_parts[MAX_NUM_PARTS];
5394
5395
0
    PART_ID_T e_part_id;
5396
0
    PART_TYPE_T e_part_type;
5397
5398
0
    S32 enable_64x64_merge;
5399
0
    S32 i, j, k;
5400
0
    S32 mvx, mvy;
5401
0
    S32 num_parts;
5402
0
    S32 ref_idx;
5403
0
    S32 ai4_pred_mode[MAX_NUM_PARTS];
5404
5405
0
    S32 num_32x32_merges = 0;
5406
5407
    /*****************************************/
5408
    /*****************************************/
5409
    /********* Enter ye who is HQ ************/
5410
    /*****************************************/
5411
    /*****************************************/
5412
5413
0
    ps_blk_64x64 = ps_ctb_cluster_info->ps_64x64_blk;
5414
5415
    /* Initialise data in each of the clusters */
5416
0
    for(i = 0; i < 16; i++)
5417
0
    {
5418
0
        ps_blk_16x16 = &ps_ctb_cluster_info->ps_16x16_blk[i];
5419
5420
0
#if !USE_CLUSTER_DATA_AS_BLK_MERGE_CANDTS
5421
0
        if(e_quality_preset < ME_HIGH_QUALITY)
5422
0
        {
5423
0
            hme_init_clusters_16x16(ps_blk_16x16, bidir_enabled);
5424
0
        }
5425
0
        else
5426
0
        {
5427
0
            ps_blk_16x16->best_inter_cost = 0;
5428
0
            ps_blk_16x16->intra_mv_area = 0;
5429
0
        }
5430
#else
5431
        hme_init_clusters_16x16(ps_blk_16x16, bidir_enabled);
5432
#endif
5433
0
    }
5434
5435
0
    for(i = 0; i < 4; i++)
5436
0
    {
5437
0
        ps_blk_32x32 = &ps_ctb_cluster_info->ps_32x32_blk[i];
5438
5439
0
#if !USE_CLUSTER_DATA_AS_BLK_MERGE_CANDTS
5440
0
        if(e_quality_preset < ME_HIGH_QUALITY)
5441
0
        {
5442
0
            hme_init_clusters_32x32(ps_blk_32x32, bidir_enabled);
5443
0
        }
5444
0
        else
5445
0
        {
5446
0
            ps_blk_32x32->best_inter_cost = 0;
5447
0
            ps_blk_32x32->intra_mv_area = 0;
5448
0
        }
5449
#else
5450
        hme_init_clusters_32x32(ps_blk_32x32, bidir_enabled);
5451
#endif
5452
0
    }
5453
5454
0
#if !USE_CLUSTER_DATA_AS_BLK_MERGE_CANDTS
5455
0
    if(e_quality_preset < ME_HIGH_QUALITY)
5456
0
    {
5457
0
        hme_init_clusters_64x64(ps_blk_64x64, bidir_enabled);
5458
0
    }
5459
0
    else
5460
0
    {
5461
0
        ps_blk_64x64->best_inter_cost = 0;
5462
0
        ps_blk_64x64->intra_mv_area = 0;
5463
0
    }
5464
#else
5465
    hme_init_clusters_64x64(ps_blk_64x64, bidir_enabled);
5466
#endif
5467
5468
    /* Initialise data for all nodes in the CU tree */
5469
0
    hme_build_cu_tree(
5470
0
        ps_ctb_cluster_info, ps_ctb_cluster_info->ps_cu_tree_root, 0, POS_NA, POS_NA, POS_NA);
5471
5472
0
    if(e_quality_preset >= ME_HIGH_QUALITY)
5473
0
    {
5474
0
        memset(ps_ctb_cluster_info->au1_is_16x16_blk_split, 1, 16 * sizeof(U08));
5475
0
    }
5476
5477
#if ENABLE_UNIFORM_CU_SIZE_16x16 || ENABLE_UNIFORM_CU_SIZE_8x8
5478
    return;
5479
#endif
5480
5481
0
    for(i = 0; i < 16; i++)
5482
0
    {
5483
0
        S32 blk_8x8_mask;
5484
0
        S32 is_16x16_blk_valid;
5485
0
        S32 num_clusters_updated;
5486
0
        S32 num_clusters;
5487
5488
0
        blk_8x8_mask = ps_ctb_cluster_info->pi4_blk_8x8_mask[i];
5489
5490
0
        ps_blk_16x16 = &ps_ctb_cluster_info->ps_16x16_blk[i];
5491
5492
0
        is_16x16_blk_valid = (blk_8x8_mask == 0xf);
5493
5494
0
        if(is_16x16_blk_valid)
5495
0
        {
5496
            /* Use 8x8 data when 16x16 CU is split */
5497
0
            if(ps_search_results[i].u1_split_flag)
5498
0
            {
5499
0
                S32 blk_8x8_idx = i << 2;
5500
5501
0
                num_parts = 4;
5502
0
                e_part_type = PRT_NxN;
5503
5504
0
                for(j = 0; j < num_parts; j++, blk_8x8_idx++)
5505
0
                {
5506
                    /* Only 2Nx2N partition supported for 8x8 block */
5507
0
                    ASSERT(
5508
0
                        ps_8x8_cu_results[blk_8x8_idx].ps_best_results[0].u1_part_type ==
5509
0
                        ((PART_TYPE_T)PRT_2Nx2N));
5510
5511
0
                    aps_part_result[j] =
5512
0
                        &ps_8x8_cu_results[blk_8x8_idx].ps_best_results[0].as_pu_results[0];
5513
0
                    aps_inferior_parts[j] =
5514
0
                        &ps_8x8_cu_results[blk_8x8_idx].ps_best_results[1].as_pu_results[0];
5515
0
                    ai4_pred_mode[j] = (aps_part_result[j]->pu.b2_pred_mode);
5516
0
                }
5517
0
            }
5518
0
            else
5519
0
            {
5520
0
                ps_best_result = &ps_16x16_cu_results[i].ps_best_results[0];
5521
5522
0
                e_part_type = (PART_TYPE_T)ps_best_result->u1_part_type;
5523
0
                num_parts = gau1_num_parts_in_part_type[e_part_type];
5524
5525
0
                for(j = 0; j < num_parts; j++)
5526
0
                {
5527
0
                    aps_part_result[j] = &ps_best_result->as_pu_results[j];
5528
0
                    aps_inferior_parts[j] = &ps_best_result[1].as_pu_results[j];
5529
0
                    ai4_pred_mode[j] = (aps_part_result[j]->pu.b2_pred_mode);
5530
0
                }
5531
5532
0
                ps_ctb_cluster_info->au1_is_16x16_blk_split[i] = 0;
5533
0
            }
5534
5535
0
            for(j = 0; j < num_parts; j++)
5536
0
            {
5537
0
                pu_result_t *ps_part_result = aps_part_result[j];
5538
5539
0
                S32 num_mvs = ((ai4_pred_mode[j] > 1) + 1);
5540
5541
0
                e_part_id = ge_part_type_to_part_id[e_part_type][j];
5542
5543
                /* Skip clustering if best mode is intra */
5544
0
                if((ps_part_result->pu.b1_intra_flag))
5545
0
                {
5546
0
                    ps_blk_16x16->intra_mv_area += gai4_partition_area[e_part_id];
5547
0
                    ps_blk_16x16->best_inter_cost += aps_inferior_parts[j]->i4_tot_cost;
5548
0
                    continue;
5549
0
                }
5550
0
                else
5551
0
                {
5552
0
                    ps_blk_16x16->best_inter_cost += ps_part_result->i4_tot_cost;
5553
0
                }
5554
5555
0
#if !USE_CLUSTER_DATA_AS_BLK_MERGE_CANDTS
5556
0
                if(e_quality_preset >= ME_HIGH_QUALITY)
5557
0
                {
5558
0
                    continue;
5559
0
                }
5560
0
#endif
5561
5562
0
                for(k = 0; k < num_mvs; k++)
5563
0
                {
5564
0
                    mv_t *ps_mv;
5565
5566
0
                    pu_mv_t *ps_pu_mv = &ps_part_result->pu.mv;
5567
5568
0
                    S32 is_l0_mv = ((ai4_pred_mode[j] == 2) && !k) || (ai4_pred_mode[j] == 0);
5569
5570
0
                    ps_mv = (is_l0_mv) ? (&ps_pu_mv->s_l0_mv) : (&ps_pu_mv->s_l1_mv);
5571
5572
0
                    mvx = ps_mv->i2_mvx;
5573
0
                    mvy = ps_mv->i2_mvy;
5574
5575
0
                    ref_idx = (is_l0_mv) ? pi1_past_list[ps_pu_mv->i1_l0_ref_idx]
5576
0
                                         : pi1_future_list[ps_pu_mv->i1_l1_ref_idx];
5577
5578
0
                    num_clusters = ps_blk_16x16->num_clusters;
5579
5580
0
                    hme_find_and_update_clusters(
5581
0
                        ps_blk_16x16->as_cluster_data,
5582
0
                        &(ps_blk_16x16->num_clusters),
5583
0
                        mvx,
5584
0
                        mvy,
5585
0
                        ref_idx,
5586
0
                        ps_part_result->i4_sdi,
5587
0
                        e_part_id,
5588
0
                        (ai4_pred_mode[j] == 2));
5589
5590
0
                    num_clusters_updated = (ps_blk_16x16->num_clusters);
5591
5592
0
                    ps_blk_16x16->au1_num_clusters[ref_idx] +=
5593
0
                        (num_clusters_updated - num_clusters);
5594
0
                }
5595
0
            }
5596
0
        }
5597
0
    }
5598
5599
    /* Search for 32x32 clusters */
5600
0
    for(i = 0; i < 4; i++)
5601
0
    {
5602
0
        S32 num_clusters_merged;
5603
5604
0
        S32 is_32x32_blk_valid = (ps_ctb_cluster_info->blk_32x32_mask & (1 << i)) || 0;
5605
5606
0
        if(is_32x32_blk_valid)
5607
0
        {
5608
0
            ps_blk_32x32 = &ps_ctb_cluster_info->ps_32x32_blk[i];
5609
0
            ps_blk_16x16 = &ps_ctb_cluster_info->ps_16x16_blk[i << 2];
5610
5611
0
#if !USE_CLUSTER_DATA_AS_BLK_MERGE_CANDTS
5612
0
            if(e_quality_preset >= ME_HIGH_QUALITY)
5613
0
            {
5614
0
                for(j = 0; j < 4; j++, ps_blk_16x16++)
5615
0
                {
5616
0
                    ps_blk_32x32->intra_mv_area += ps_blk_16x16->intra_mv_area;
5617
5618
0
                    ps_blk_32x32->best_inter_cost += ps_blk_16x16->best_inter_cost;
5619
0
                }
5620
0
                continue;
5621
0
            }
5622
0
#endif
5623
5624
0
            hme_update_32x32_clusters(ps_blk_32x32, ps_blk_16x16);
5625
5626
0
            if((ps_blk_32x32->num_clusters >= MAX_NUM_CLUSTERS_IN_VALID_32x32_BLK))
5627
0
            {
5628
0
                num_clusters_merged = hme_try_merge_clusters_blksize_gt_16(
5629
0
                    ps_blk_32x32->as_cluster_data, (ps_blk_32x32->num_clusters));
5630
5631
0
                if(num_clusters_merged)
5632
0
                {
5633
0
                    ps_blk_32x32->num_clusters -= num_clusters_merged;
5634
5635
0
                    UPDATE_CLUSTER_METADATA_POST_MERGE(ps_blk_32x32);
5636
0
                }
5637
0
            }
5638
0
        }
5639
0
    }
5640
5641
0
#if !USE_CLUSTER_DATA_AS_BLK_MERGE_CANDTS
5642
    /* Eliminate outlier 32x32 clusters */
5643
0
    if(e_quality_preset < ME_HIGH_QUALITY)
5644
0
#endif
5645
0
    {
5646
0
        hme_boot_out_outlier(ps_ctb_cluster_info, 32);
5647
5648
        /* Find best_uni_ref and best_alt_ref */
5649
0
        hme_find_top_ref_ids(ps_ctb_cluster_info, bidir_enabled, 32);
5650
0
    }
5651
5652
    /* Populate the CU tree for depths 1 and higher */
5653
0
    {
5654
0
        cur_ctb_cu_tree_t *ps_tree_root = ps_ctb_cluster_info->ps_cu_tree_root;
5655
0
        cur_ctb_cu_tree_t *ps_tl = ps_tree_root->ps_child_node_tl;
5656
0
        cur_ctb_cu_tree_t *ps_tr = ps_tree_root->ps_child_node_tr;
5657
0
        cur_ctb_cu_tree_t *ps_bl = ps_tree_root->ps_child_node_bl;
5658
0
        cur_ctb_cu_tree_t *ps_br = ps_tree_root->ps_child_node_br;
5659
5660
0
        hme_populate_cu_tree(
5661
0
            ps_ctb_cluster_info, ps_tl, 1, e_quality_preset, POS_NA, POS_NA, POS_TL);
5662
5663
0
        num_32x32_merges += (ps_tl->is_node_valid == 1);
5664
5665
0
        hme_populate_cu_tree(
5666
0
            ps_ctb_cluster_info, ps_tr, 1, e_quality_preset, POS_NA, POS_NA, POS_TR);
5667
5668
0
        num_32x32_merges += (ps_tr->is_node_valid == 1);
5669
5670
0
        hme_populate_cu_tree(
5671
0
            ps_ctb_cluster_info, ps_bl, 1, e_quality_preset, POS_NA, POS_NA, POS_BL);
5672
5673
0
        num_32x32_merges += (ps_bl->is_node_valid == 1);
5674
5675
0
        hme_populate_cu_tree(
5676
0
            ps_ctb_cluster_info, ps_br, 1, e_quality_preset, POS_NA, POS_NA, POS_BR);
5677
5678
0
        num_32x32_merges += (ps_br->is_node_valid == 1);
5679
0
    }
5680
5681
#if !ENABLE_4CTB_EVALUATION
5682
    if(e_quality_preset < ME_HIGH_QUALITY)
5683
    {
5684
        enable_64x64_merge = (num_32x32_merges >= 3);
5685
    }
5686
#else
5687
0
    if(e_quality_preset < ME_HIGH_QUALITY)
5688
0
    {
5689
0
        enable_64x64_merge = 1;
5690
0
    }
5691
0
#endif
5692
5693
0
#if 1  //!PROCESS_GT_1CTB_VIA_CU_RECUR_IN_FAST_PRESETS
5694
0
    if(e_quality_preset >= ME_HIGH_QUALITY)
5695
0
    {
5696
0
        enable_64x64_merge = 1;
5697
0
    }
5698
#else
5699
    if(e_quality_preset >= ME_HIGH_QUALITY)
5700
    {
5701
        enable_64x64_merge = (num_32x32_merges >= 3);
5702
    }
5703
#endif
5704
5705
0
    if(enable_64x64_merge)
5706
0
    {
5707
0
        S32 num_clusters_merged;
5708
5709
0
        ps_blk_32x32 = &ps_ctb_cluster_info->ps_32x32_blk[0];
5710
5711
0
#if !USE_CLUSTER_DATA_AS_BLK_MERGE_CANDTS
5712
0
        if(e_quality_preset >= ME_HIGH_QUALITY)
5713
0
        {
5714
0
            for(j = 0; j < 4; j++, ps_blk_32x32++)
5715
0
            {
5716
0
                ps_blk_64x64->intra_mv_area += ps_blk_32x32->intra_mv_area;
5717
5718
0
                ps_blk_64x64->best_inter_cost += ps_blk_32x32->best_inter_cost;
5719
0
            }
5720
0
        }
5721
0
        else
5722
0
#endif
5723
0
        {
5724
0
            hme_update_64x64_clusters(ps_blk_64x64, ps_blk_32x32);
5725
5726
0
            if((ps_blk_64x64->num_clusters >= MAX_NUM_CLUSTERS_IN_VALID_64x64_BLK))
5727
0
            {
5728
0
                num_clusters_merged = hme_try_merge_clusters_blksize_gt_16(
5729
0
                    ps_blk_64x64->as_cluster_data, (ps_blk_64x64->num_clusters));
5730
5731
0
                if(num_clusters_merged)
5732
0
                {
5733
0
                    ps_blk_64x64->num_clusters -= num_clusters_merged;
5734
5735
0
                    UPDATE_CLUSTER_METADATA_POST_MERGE(ps_blk_64x64);
5736
0
                }
5737
0
            }
5738
0
        }
5739
5740
#if !ENABLE_4CTB_EVALUATION
5741
        if(e_quality_preset < ME_HIGH_QUALITY)
5742
        {
5743
            S32 best_inter_cost = ps_blk_64x64->best_inter_cost;
5744
            S32 best_intra_cost =
5745
                ((ps_ctb_cluster_info->ps_cur_ipe_ctb->i4_best64x64_intra_cost +
5746
                  ps_ctb_cluster_info->i4_frame_qstep *
5747
                      ps_ctb_cluster_info->i4_frame_qstep_multiplier * 16) < 0)
5748
                    ? MAX_32BIT_VAL
5749
                    : (ps_ctb_cluster_info->ps_cur_ipe_ctb->i4_best64x64_intra_cost +
5750
                       ps_ctb_cluster_info->i4_frame_qstep *
5751
                           ps_ctb_cluster_info->i4_frame_qstep_multiplier * 16);
5752
            S32 best_cost = (best_inter_cost > best_intra_cost) ? best_intra_cost : best_inter_cost;
5753
            S32 cost_differential = (best_inter_cost - best_cost);
5754
5755
            enable_64x64_merge =
5756
                ((ALL_INTER_COST_DIFF_THR * best_cost) >= (100 * cost_differential));
5757
        }
5758
#endif
5759
0
    }
5760
5761
0
    if(enable_64x64_merge)
5762
0
    {
5763
0
#if !USE_CLUSTER_DATA_AS_BLK_MERGE_CANDTS
5764
0
        if(e_quality_preset < ME_HIGH_QUALITY)
5765
0
#endif
5766
0
        {
5767
0
            hme_boot_out_outlier(ps_ctb_cluster_info, 64);
5768
5769
0
            hme_find_top_ref_ids(ps_ctb_cluster_info, bidir_enabled, 64);
5770
0
        }
5771
5772
0
        hme_populate_cu_tree(
5773
0
            ps_ctb_cluster_info,
5774
0
            ps_ctb_cluster_info->ps_cu_tree_root,
5775
0
            0,
5776
0
            e_quality_preset,
5777
0
            POS_NA,
5778
0
            POS_NA,
5779
0
            POS_NA);
5780
0
    }
5781
0
}
5782
#endif
5783
5784
static __inline void hme_merge_prms_init(
5785
    hme_merge_prms_t *ps_prms,
5786
    layer_ctxt_t *ps_curr_layer,
5787
    refine_prms_t *ps_refine_prms,
5788
    me_frm_ctxt_t *ps_me_ctxt,
5789
    range_prms_t *ps_range_prms_rec,
5790
    range_prms_t *ps_range_prms_inp,
5791
    mv_grid_t **pps_mv_grid,
5792
    inter_ctb_prms_t *ps_inter_ctb_prms,
5793
    S32 i4_num_pred_dir,
5794
    S32 i4_32x32_id,
5795
    BLK_SIZE_T e_blk_size,
5796
    ME_QUALITY_PRESETS_T e_me_quality_presets)
5797
0
{
5798
0
    S32 i4_use_rec = ps_refine_prms->i4_use_rec_in_fpel;
5799
0
    S32 i4_cu_16x16 = (BLK_32x32 == e_blk_size) ? (i4_32x32_id << 2) : 0;
5800
5801
    /* Currently not enabling segmentation info from prev layers */
5802
0
    ps_prms->i4_seg_info_avail = 0;
5803
0
    ps_prms->i4_part_mask = 0;
5804
5805
    /* Number of reference pics in which to do merge */
5806
0
    ps_prms->i4_num_ref = i4_num_pred_dir;
5807
5808
    /* Layer ctxt info */
5809
0
    ps_prms->ps_layer_ctxt = ps_curr_layer;
5810
5811
0
    ps_prms->ps_inter_ctb_prms = ps_inter_ctb_prms;
5812
5813
    /* Top left, top right, bottom left and bottom right 16x16 units */
5814
0
    if(BLK_32x32 == e_blk_size)
5815
0
    {
5816
0
        ps_prms->ps_results_tl = &ps_me_ctxt->as_search_results_16x16[i4_cu_16x16];
5817
0
        ps_prms->ps_results_tr = &ps_me_ctxt->as_search_results_16x16[i4_cu_16x16 + 1];
5818
0
        ps_prms->ps_results_bl = &ps_me_ctxt->as_search_results_16x16[i4_cu_16x16 + 2];
5819
0
        ps_prms->ps_results_br = &ps_me_ctxt->as_search_results_16x16[i4_cu_16x16 + 3];
5820
5821
        /* Merge results stored here */
5822
0
        ps_prms->ps_results_merge = &ps_me_ctxt->as_search_results_32x32[i4_32x32_id];
5823
5824
        /* This could be lesser than the number of 16x16results generated*/
5825
        /* For now, keeping it to be same                                */
5826
0
        ps_prms->i4_num_inp_results = ps_refine_prms->i4_num_fpel_results;
5827
0
        ps_prms->ps_8x8_cu_results = &ps_me_ctxt->as_cu8x8_results[i4_32x32_id << 4];
5828
0
        ps_prms->ps_results_grandchild = NULL;
5829
0
    }
5830
0
    else
5831
0
    {
5832
0
        ps_prms->ps_results_tl = &ps_me_ctxt->as_search_results_32x32[0];
5833
0
        ps_prms->ps_results_tr = &ps_me_ctxt->as_search_results_32x32[1];
5834
0
        ps_prms->ps_results_bl = &ps_me_ctxt->as_search_results_32x32[2];
5835
0
        ps_prms->ps_results_br = &ps_me_ctxt->as_search_results_32x32[3];
5836
5837
        /* Merge results stored here */
5838
0
        ps_prms->ps_results_merge = &ps_me_ctxt->s_search_results_64x64;
5839
5840
0
        ps_prms->i4_num_inp_results = ps_refine_prms->i4_num_32x32_merge_results;
5841
0
        ps_prms->ps_8x8_cu_results = &ps_me_ctxt->as_cu8x8_results[0];
5842
0
        ps_prms->ps_results_grandchild = ps_me_ctxt->as_search_results_16x16;
5843
0
    }
5844
5845
0
    if(i4_use_rec)
5846
0
    {
5847
0
        WORD32 ref_ctr;
5848
5849
0
        for(ref_ctr = 0; ref_ctr < MAX_NUM_REF; ref_ctr++)
5850
0
        {
5851
0
            ps_prms->aps_mv_range[ref_ctr] = &ps_range_prms_rec[ref_ctr];
5852
0
        }
5853
0
    }
5854
0
    else
5855
0
    {
5856
0
        WORD32 ref_ctr;
5857
5858
0
        for(ref_ctr = 0; ref_ctr < MAX_NUM_REF; ref_ctr++)
5859
0
        {
5860
0
            ps_prms->aps_mv_range[ref_ctr] = &ps_range_prms_inp[ref_ctr];
5861
0
        }
5862
0
    }
5863
0
    ps_prms->i4_use_rec = i4_use_rec;
5864
5865
0
    ps_prms->pf_mv_cost_compute = compute_mv_cost_implicit_high_speed;
5866
5867
0
    ps_prms->pps_mv_grid = pps_mv_grid;
5868
5869
0
    ps_prms->log_ctb_size = ps_me_ctxt->log_ctb_size;
5870
5871
0
    ps_prms->e_quality_preset = e_me_quality_presets;
5872
0
    ps_prms->pi1_future_list = ps_me_ctxt->ai1_future_list;
5873
0
    ps_prms->pi1_past_list = ps_me_ctxt->ai1_past_list;
5874
0
    ps_prms->ps_cluster_info = ps_me_ctxt->ps_ctb_cluster_info;
5875
0
}
5876
5877
/**
5878
********************************************************************************
5879
*  @fn   void hme_refine(me_ctxt_t *ps_ctxt,
5880
*                       refine_layer_prms_t *ps_refine_prms)
5881
*
5882
*  @brief  Top level entry point for refinement ME
5883
*
5884
*  @param[in,out]  ps_ctxt: ME Handle
5885
*
5886
*  @param[in]  ps_refine_prms : refinement layer prms
5887
*
5888
*  @return None
5889
********************************************************************************
5890
*/
5891
void hme_refine(
5892
    me_ctxt_t *ps_thrd_ctxt,
5893
    refine_prms_t *ps_refine_prms,
5894
    PF_EXT_UPDATE_FXN_T pf_ext_update_fxn,
5895
    layer_ctxt_t *ps_coarse_layer,
5896
    multi_thrd_ctxt_t *ps_multi_thrd_ctxt,
5897
    S32 lyr_job_type,
5898
    S32 thrd_id,
5899
    S32 me_frm_id,
5900
    pre_enc_L0_ipe_encloop_ctxt_t *ps_l0_ipe_input)
5901
0
{
5902
0
    inter_ctb_prms_t s_common_frm_prms;
5903
5904
0
    BLK_SIZE_T e_search_blk_size, e_result_blk_size;
5905
0
    WORD32 i4_me_frm_id = me_frm_id % MAX_NUM_ME_PARALLEL;
5906
0
    me_frm_ctxt_t *ps_ctxt = ps_thrd_ctxt->aps_me_frm_prms[i4_me_frm_id];
5907
0
    ME_QUALITY_PRESETS_T e_me_quality_presets =
5908
0
        ps_thrd_ctxt->s_init_prms.s_me_coding_tools.e_me_quality_presets;
5909
5910
0
    WORD32 num_rows_proc = 0;
5911
0
    WORD32 num_act_ref_pics;
5912
0
    WORD16 i2_prev_enc_frm_max_mv_y;
5913
0
    WORD32 i4_idx_dvsr_p = ps_multi_thrd_ctxt->i4_idx_dvsr_p;
5914
5915
    /*************************************************************************/
5916
    /* Complexity of search: Low to High                                     */
5917
    /*************************************************************************/
5918
0
    SEARCH_COMPLEXITY_T e_search_complexity;
5919
5920
    /*************************************************************************/
5921
    /* to store the PU results which are passed to the decide_part_types     */
5922
    /* as input prms. Multiplied by 4 as the max number of Ref in a List is 4*/
5923
    /*************************************************************************/
5924
5925
0
    pu_result_t as_pu_results[2][TOT_NUM_PARTS][MAX_NUM_RESULTS_PER_PART_LIST];
5926
0
    inter_pu_results_t as_inter_pu_results[4];
5927
0
    inter_pu_results_t *ps_pu_results = as_inter_pu_results;
5928
5929
    /*************************************************************************/
5930
    /* Config parameter structures for varius ME submodules                  */
5931
    /*************************************************************************/
5932
0
    hme_merge_prms_t s_merge_prms_32x32_tl, s_merge_prms_32x32_tr;
5933
0
    hme_merge_prms_t s_merge_prms_32x32_bl, s_merge_prms_32x32_br;
5934
0
    hme_merge_prms_t s_merge_prms_64x64;
5935
0
    hme_search_prms_t s_search_prms_blk;
5936
0
    mvbank_update_prms_t s_mv_update_prms;
5937
0
    hme_ctb_prms_t s_ctb_prms;
5938
0
    hme_subpel_prms_t s_subpel_prms;
5939
0
    fullpel_refine_ctxt_t *ps_fullpel_refine_ctxt = ps_ctxt->ps_fullpel_refine_ctxt;
5940
0
    ctb_cluster_info_t *ps_ctb_cluster_info;
5941
0
    fpel_srch_cand_init_data_t s_srch_cand_init_data;
5942
5943
    /* 4 bits (LSBs) of this variable control merge of 4 32x32 CUs in CTB */
5944
0
    S32 en_merge_32x32;
5945
    /* 5 lsb's specify whether or not merge algorithm is required */
5946
    /* to be executed or not. Relevant only in PQ. Ought to be */
5947
    /* used in conjunction with en_merge_32x32 and */
5948
    /* ps_ctb_bound_attrs->u1_merge_to_64x64_flag. This is */
5949
    /* required when all children are deemed to be intras */
5950
0
    S32 en_merge_execution;
5951
5952
    /*************************************************************************/
5953
    /* All types of search candidates for predictor based search.            */
5954
    /*************************************************************************/
5955
0
    S32 num_init_candts = 0;
5956
0
    S32 i4_num_act_ref_l0 = ps_ctxt->s_frm_prms.u1_num_active_ref_l0;
5957
0
    S32 i4_num_act_ref_l1 = ps_ctxt->s_frm_prms.u1_num_active_ref_l1;
5958
0
    search_candt_t *ps_search_candts, as_search_candts[MAX_INIT_CANDTS];
5959
0
    search_node_t as_top_neighbours[4], as_left_neighbours[3];
5960
5961
0
    pf_get_wt_inp fp_get_wt_inp;
5962
5963
0
    search_node_t as_unique_search_nodes[MAX_INIT_CANDTS * 9];
5964
0
    U32 au4_unique_node_map[MAP_X_MAX * 2];
5965
5966
    /* Controls the boundary attributes of CTB, whether it has 64x64 or not */
5967
0
    ctb_boundary_attrs_t *ps_ctb_bound_attrs;
5968
5969
    /*************************************************************************/
5970
    /* points ot the search results for the blk level search (8x8/16x16)     */
5971
    /*************************************************************************/
5972
0
    search_results_t *ps_search_results;
5973
5974
    /*************************************************************************/
5975
    /* Coordinates                                                           */
5976
    /*************************************************************************/
5977
0
    S32 blk_x, blk_y, i4_ctb_x, i4_ctb_y, tile_col_idx, blk_id_in_ctb;
5978
0
    S32 pos_x, pos_y;
5979
0
    S32 blk_id_in_full_ctb;
5980
5981
    /*************************************************************************/
5982
    /* Related to dimensions of block being searched and pic dimensions      */
5983
    /*************************************************************************/
5984
0
    S32 blk_4x4_to_16x16;
5985
0
    S32 blk_wd, blk_ht, blk_size_shift;
5986
0
    S32 i4_pic_wd, i4_pic_ht, num_blks_in_this_ctb;
5987
0
    S32 num_results_prev_layer;
5988
5989
    /*************************************************************************/
5990
    /* Size of a basic unit for this layer. For non encode layers, we search */
5991
    /* in block sizes of 8x8. For encode layers, though we search 16x16s the */
5992
    /* basic unit size is the ctb size.                                      */
5993
    /*************************************************************************/
5994
0
    S32 unit_size;
5995
5996
    /*************************************************************************/
5997
    /* Local variable storing results of any 4 CU merge to bigger CU         */
5998
    /*************************************************************************/
5999
0
    CU_MERGE_RESULT_T e_merge_result;
6000
6001
    /*************************************************************************/
6002
    /* This mv grid stores results during and after fpel search, during      */
6003
    /* merge, subpel and bidirect refinements stages. 2 instances of this are*/
6004
    /* meant for the 2 directions of search (l0 and l1).                     */
6005
    /*************************************************************************/
6006
0
    mv_grid_t *aps_mv_grid[2];
6007
6008
    /*************************************************************************/
6009
    /* Pointers to context in current and coarser layers                     */
6010
    /*************************************************************************/
6011
0
    layer_ctxt_t *ps_curr_layer, *ps_prev_layer;
6012
6013
    /*************************************************************************/
6014
    /* to store mv range per blk, and picture limit, allowed search range    */
6015
    /* range prms in hpel and qpel units as well                             */
6016
    /*************************************************************************/
6017
0
    range_prms_t as_range_prms_inp[MAX_NUM_REF], as_range_prms_rec[MAX_NUM_REF];
6018
0
    range_prms_t s_pic_limit_inp, s_pic_limit_rec, as_mv_limit[MAX_NUM_REF];
6019
0
    range_prms_t as_range_prms_hpel[MAX_NUM_REF], as_range_prms_qpel[MAX_NUM_REF];
6020
6021
    /*************************************************************************/
6022
    /* These variables are used to track number of references at different   */
6023
    /* stages of ME.                                                         */
6024
    /*************************************************************************/
6025
0
    S32 i4_num_pred_dir;
6026
0
    S32 i4_num_ref_each_dir, i, i4_num_ref_prev_layer;
6027
0
    S32 lambda_recon = ps_refine_prms->lambda_recon;
6028
6029
    /* Counts successful merge to 32x32 every CTB (0-4) */
6030
0
    S32 merge_count_32x32;
6031
6032
0
    S32 ai4_id_coloc[14], ai4_id_Z[2];
6033
0
    U08 au1_search_candidate_list_index[2];
6034
0
    S32 ai4_num_coloc_cands[2];
6035
0
    U08 u1_pred_dir, u1_pred_dir_ctr;
6036
6037
    /*************************************************************************/
6038
    /* Input pointer and stride                                              */
6039
    /*************************************************************************/
6040
0
    U08 *pu1_inp;
6041
0
    S32 i4_inp_stride;
6042
0
    S32 end_of_frame;
6043
0
    S32 num_sync_units_in_row, num_sync_units_in_tile;
6044
6045
    /*************************************************************************/
6046
    /* Indicates whether the all 4 8x8 blks are valid in the 16x16 blk in the*/
6047
    /* encode layer. If not 15, then 1 or more 8x8 blks not valid. Means that*/
6048
    /* we need to stop merges and force 8x8 CUs for that 16x16 blk           */
6049
    /*************************************************************************/
6050
0
    S32 blk_8x8_mask;
6051
0
    S32 ai4_blk_8x8_mask[16];
6052
0
    U08 au1_is_64x64Blk_noisy[1];
6053
0
    U08 au1_is_32x32Blk_noisy[4];
6054
0
    U08 au1_is_16x16Blk_noisy[16];
6055
6056
0
    ihevce_cmn_opt_func_t *ps_cmn_utils_optimised_function_list =
6057
0
        ps_thrd_ctxt->ps_cmn_utils_optimised_function_list;
6058
0
    ihevce_me_optimised_function_list_t *ps_me_optimised_function_list =
6059
0
        ((ihevce_me_optimised_function_list_t *)ps_thrd_ctxt->pv_me_optimised_function_list);
6060
6061
0
    ASSERT(ps_refine_prms->i4_layer_id < ps_ctxt->num_layers - 1);
6062
6063
    /*************************************************************************/
6064
    /* Pointers to current and coarse layer are needed for projection */
6065
    /* Pointer to prev layer are needed for other candts like coloc   */
6066
    /*************************************************************************/
6067
0
    ps_curr_layer = ps_ctxt->ps_curr_descr->aps_layers[ps_refine_prms->i4_layer_id];
6068
6069
0
    ps_prev_layer = hme_get_past_layer_ctxt(
6070
0
        ps_thrd_ctxt, ps_ctxt, ps_refine_prms->i4_layer_id, ps_multi_thrd_ctxt->i4_num_me_frm_pllel);
6071
6072
0
    num_results_prev_layer = ps_coarse_layer->ps_layer_mvbank->i4_num_mvs_per_ref;
6073
6074
    /* Function pointer is selected based on the C vc X86 macro */
6075
6076
0
    fp_get_wt_inp = ps_me_optimised_function_list->pf_get_wt_inp_ctb;
6077
6078
0
    i4_inp_stride = ps_curr_layer->i4_inp_stride;
6079
0
    i4_pic_wd = ps_curr_layer->i4_wd;
6080
0
    i4_pic_ht = ps_curr_layer->i4_ht;
6081
0
    e_search_complexity = ps_refine_prms->e_search_complexity;
6082
0
    end_of_frame = 0;
6083
6084
    /* This points to all the initial candts */
6085
0
    ps_search_candts = &as_search_candts[0];
6086
6087
    /* mv grid being huge strucutre is part of context */
6088
0
    aps_mv_grid[0] = &ps_ctxt->as_mv_grid[0];
6089
0
    aps_mv_grid[1] = &ps_ctxt->as_mv_grid[1];
6090
6091
    /*************************************************************************/
6092
    /* If the current layer is encoded (since it may be multicast or final   */
6093
    /* layer (finest)), then we use 16x16 blk size with some selected parts  */
6094
    /* If the current layer is not encoded, then we use 8x8 blk size, with   */
6095
    /* enable or disable of 4x4 partitions depending on the input prms       */
6096
    /*************************************************************************/
6097
0
    e_search_blk_size = BLK_16x16;
6098
0
    blk_wd = blk_ht = 16;
6099
0
    blk_size_shift = 4;
6100
0
    e_result_blk_size = BLK_8x8;
6101
0
    s_mv_update_prms.i4_shift = 1;
6102
6103
0
    if(ps_coarse_layer->ps_layer_mvbank->e_blk_size == BLK_4x4)
6104
0
    {
6105
0
        blk_4x4_to_16x16 = 1;
6106
0
    }
6107
0
    else
6108
0
    {
6109
0
        blk_4x4_to_16x16 = 0;
6110
0
    }
6111
6112
0
    unit_size = 1 << ps_ctxt->log_ctb_size;
6113
0
    s_search_prms_blk.i4_inp_stride = unit_size;
6114
6115
    /* This is required to properly update the layer mv bank */
6116
0
    s_mv_update_prms.e_search_blk_size = e_search_blk_size;
6117
0
    s_search_prms_blk.e_blk_size = e_search_blk_size;
6118
6119
    /*************************************************************************/
6120
    /* If current layer is explicit, then the number of ref frames are to    */
6121
    /* be same as previous layer. Else it will be 2                          */
6122
    /*************************************************************************/
6123
0
    i4_num_ref_prev_layer = ps_coarse_layer->ps_layer_mvbank->i4_num_ref;
6124
0
    i4_num_pred_dir =
6125
0
        (ps_ctxt->s_frm_prms.bidir_enabled && (i4_num_act_ref_l0 > 0) && (i4_num_act_ref_l1 > 0)) +
6126
0
        1;
6127
6128
0
#if USE_MODIFIED == 1
6129
0
    s_search_prms_blk.pf_mv_cost_compute = compute_mv_cost_implicit_high_speed_modified;
6130
#else
6131
    s_search_prms_blk.pf_mv_cost_compute = compute_mv_cost_implicit_high_speed;
6132
#endif
6133
6134
0
    i4_num_pred_dir = MIN(i4_num_pred_dir, i4_num_ref_prev_layer);
6135
0
    if(i4_num_ref_prev_layer <= 2)
6136
0
    {
6137
0
        i4_num_ref_each_dir = 1;
6138
0
    }
6139
0
    else
6140
0
    {
6141
0
        i4_num_ref_each_dir = i4_num_ref_prev_layer >> 1;
6142
0
    }
6143
6144
0
    s_mv_update_prms.i4_num_ref = i4_num_pred_dir;
6145
0
    s_mv_update_prms.i4_num_results_to_store =
6146
0
        MIN((ps_ctxt->s_frm_prms.bidir_enabled) ? ps_curr_layer->ps_layer_mvbank->i4_num_mvs_per_ref
6147
0
                                                : (i4_num_act_ref_l0 > 1) + 1,
6148
0
            ps_refine_prms->i4_num_results_per_part);
6149
6150
    /*************************************************************************/
6151
    /* Initialization of merge params for 16x16 to 32x32 merge.              */
6152
    /* There are 4 32x32 units in a CTB, so 4 param structures initialized   */
6153
    /*************************************************************************/
6154
0
    {
6155
0
        hme_merge_prms_t *aps_merge_prms[4];
6156
0
        aps_merge_prms[0] = &s_merge_prms_32x32_tl;
6157
0
        aps_merge_prms[1] = &s_merge_prms_32x32_tr;
6158
0
        aps_merge_prms[2] = &s_merge_prms_32x32_bl;
6159
0
        aps_merge_prms[3] = &s_merge_prms_32x32_br;
6160
0
        for(i = 0; i < 4; i++)
6161
0
        {
6162
0
            hme_merge_prms_init(
6163
0
                aps_merge_prms[i],
6164
0
                ps_curr_layer,
6165
0
                ps_refine_prms,
6166
0
                ps_ctxt,
6167
0
                as_range_prms_rec,
6168
0
                as_range_prms_inp,
6169
0
                &aps_mv_grid[0],
6170
0
                &s_common_frm_prms,
6171
0
                i4_num_pred_dir,
6172
0
                i,
6173
0
                BLK_32x32,
6174
0
                e_me_quality_presets);
6175
0
        }
6176
0
    }
6177
6178
    /*************************************************************************/
6179
    /* Initialization of merge params for 32x32 to 64x64 merge.              */
6180
    /* There are 4 32x32 units in a CTB, so only 1 64x64 CU can be in CTB    */
6181
    /*************************************************************************/
6182
0
    {
6183
0
        hme_merge_prms_init(
6184
0
            &s_merge_prms_64x64,
6185
0
            ps_curr_layer,
6186
0
            ps_refine_prms,
6187
0
            ps_ctxt,
6188
0
            as_range_prms_rec,
6189
0
            as_range_prms_inp,
6190
0
            &aps_mv_grid[0],
6191
0
            &s_common_frm_prms,
6192
0
            i4_num_pred_dir,
6193
0
            0,
6194
0
            BLK_64x64,
6195
0
            e_me_quality_presets);
6196
0
    }
6197
6198
    /* Pointers to cu_results are initialised here */
6199
0
    {
6200
0
        WORD32 i;
6201
6202
0
        ps_ctxt->s_search_results_64x64.ps_cu_results = &ps_ctxt->s_cu64x64_results;
6203
6204
0
        for(i = 0; i < 4; i++)
6205
0
        {
6206
0
            ps_ctxt->as_search_results_32x32[i].ps_cu_results = &ps_ctxt->as_cu32x32_results[i];
6207
0
        }
6208
6209
0
        for(i = 0; i < 16; i++)
6210
0
        {
6211
0
            ps_ctxt->as_search_results_16x16[i].ps_cu_results = &ps_ctxt->as_cu16x16_results[i];
6212
0
        }
6213
0
    }
6214
6215
    /*************************************************************************/
6216
    /* SUBPEL Params initialized here                                        */
6217
    /*************************************************************************/
6218
0
    {
6219
0
        s_subpel_prms.ps_search_results_16x16 = &ps_ctxt->as_search_results_16x16[0];
6220
0
        s_subpel_prms.ps_search_results_32x32 = &ps_ctxt->as_search_results_32x32[0];
6221
0
        s_subpel_prms.ps_search_results_64x64 = &ps_ctxt->s_search_results_64x64;
6222
6223
0
        s_subpel_prms.i4_num_16x16_candts = ps_refine_prms->i4_num_fpel_results;
6224
0
        s_subpel_prms.i4_num_32x32_candts = ps_refine_prms->i4_num_32x32_merge_results;
6225
0
        s_subpel_prms.i4_num_64x64_candts = ps_refine_prms->i4_num_64x64_merge_results;
6226
6227
0
        s_subpel_prms.i4_num_steps_hpel_refine = ps_refine_prms->i4_num_steps_hpel_refine;
6228
0
        s_subpel_prms.i4_num_steps_qpel_refine = ps_refine_prms->i4_num_steps_qpel_refine;
6229
6230
0
        s_subpel_prms.i4_use_satd = ps_refine_prms->i4_use_satd_subpel;
6231
6232
0
        s_subpel_prms.i4_inp_stride = unit_size;
6233
6234
0
        s_subpel_prms.u1_max_subpel_candts_2Nx2N = ps_refine_prms->u1_max_subpel_candts_2Nx2N;
6235
0
        s_subpel_prms.u1_max_subpel_candts_NxN = ps_refine_prms->u1_max_subpel_candts_NxN;
6236
0
        s_subpel_prms.u1_subpel_candt_threshold = ps_refine_prms->u1_subpel_candt_threshold;
6237
6238
0
        s_subpel_prms.pf_qpel_interp = ps_me_optimised_function_list->pf_qpel_interp_avg_generic;
6239
6240
0
        {
6241
0
            WORD32 ref_ctr;
6242
0
            for(ref_ctr = 0; ref_ctr < MAX_NUM_REF; ref_ctr++)
6243
0
            {
6244
0
                s_subpel_prms.aps_mv_range_hpel[ref_ctr] = &as_range_prms_hpel[ref_ctr];
6245
0
                s_subpel_prms.aps_mv_range_qpel[ref_ctr] = &as_range_prms_qpel[ref_ctr];
6246
0
            }
6247
0
        }
6248
0
        s_subpel_prms.pi2_inp_bck = ps_ctxt->pi2_inp_bck;
6249
6250
#if USE_MODIFIED == 0
6251
        s_subpel_prms.pf_mv_cost_compute = compute_mv_cost_implicit_high_speed;
6252
#else
6253
0
        s_subpel_prms.pf_mv_cost_compute = compute_mv_cost_implicit_high_speed_modified;
6254
0
#endif
6255
0
        s_subpel_prms.e_me_quality_presets = e_me_quality_presets;
6256
6257
        /* BI Refinement done only if this field is 1 */
6258
0
        s_subpel_prms.bidir_enabled = ps_refine_prms->bidir_enabled;
6259
6260
0
        s_subpel_prms.u1_num_ref = ps_ctxt->num_ref_future + ps_ctxt->num_ref_past;
6261
6262
0
        s_subpel_prms.i4_num_act_ref_l0 = ps_ctxt->s_frm_prms.u1_num_active_ref_l0;
6263
0
        s_subpel_prms.i4_num_act_ref_l1 = ps_ctxt->s_frm_prms.u1_num_active_ref_l1;
6264
0
        s_subpel_prms.u1_max_num_subpel_refine_centers =
6265
0
            ps_refine_prms->u1_max_num_subpel_refine_centers;
6266
0
    }
6267
6268
    /* inter_ctb_prms_t struct initialisation */
6269
0
    {
6270
0
        inter_ctb_prms_t *ps_inter_ctb_prms = &s_common_frm_prms;
6271
0
        hme_subpel_prms_t *ps_subpel_prms = &s_subpel_prms;
6272
6273
0
        ps_inter_ctb_prms->pps_rec_list_l0 = ps_ctxt->ps_hme_ref_map->pps_rec_list_l0;
6274
0
        ps_inter_ctb_prms->pps_rec_list_l1 = ps_ctxt->ps_hme_ref_map->pps_rec_list_l1;
6275
0
        ps_inter_ctb_prms->wpred_log_wdc = ps_ctxt->s_wt_pred.wpred_log_wdc;
6276
0
        ps_inter_ctb_prms->u1_max_tr_depth = ps_thrd_ctxt->s_init_prms.u1_max_tr_depth;
6277
0
        ps_inter_ctb_prms->i1_quality_preset = e_me_quality_presets;
6278
0
        ps_inter_ctb_prms->i4_bidir_enabled = ps_subpel_prms->bidir_enabled;
6279
0
        ps_inter_ctb_prms->i4_inp_stride = ps_subpel_prms->i4_inp_stride;
6280
0
        ps_inter_ctb_prms->u1_num_ref = ps_subpel_prms->u1_num_ref;
6281
0
        ps_inter_ctb_prms->u1_use_satd = ps_subpel_prms->i4_use_satd;
6282
0
        ps_inter_ctb_prms->i4_rec_stride = ps_curr_layer->i4_rec_stride;
6283
0
        ps_inter_ctb_prms->u1_num_active_ref_l0 = ps_ctxt->s_frm_prms.u1_num_active_ref_l0;
6284
0
        ps_inter_ctb_prms->u1_num_active_ref_l1 = ps_ctxt->s_frm_prms.u1_num_active_ref_l1;
6285
0
        ps_inter_ctb_prms->i4_lamda = lambda_recon;
6286
0
        ps_inter_ctb_prms->u1_lamda_qshift = ps_refine_prms->lambda_q_shift;
6287
0
        ps_inter_ctb_prms->i4_qstep_ls8 = ps_ctxt->ps_hme_frm_prms->qstep_ls8;
6288
0
        ps_inter_ctb_prms->pi4_inv_wt = ps_ctxt->s_wt_pred.a_inv_wpred_wt;
6289
0
        ps_inter_ctb_prms->pi1_past_list = ps_ctxt->ai1_past_list;
6290
0
        ps_inter_ctb_prms->pi1_future_list = ps_ctxt->ai1_future_list;
6291
0
        ps_inter_ctb_prms->pu4_src_variance = s_search_prms_blk.au4_src_variance;
6292
0
        ps_inter_ctb_prms->u1_max_2nx2n_tu_recur_cands =
6293
0
            ps_refine_prms->u1_max_2nx2n_tu_recur_cands;
6294
0
    }
6295
6296
0
    for(i = 0; i < MAX_INIT_CANDTS; i++)
6297
0
    {
6298
0
        ps_search_candts[i].ps_search_node = &ps_ctxt->s_init_search_node[i];
6299
0
        ps_search_candts[i].ps_search_node->ps_mv = &ps_ctxt->as_search_cand_mv[i];
6300
6301
0
        INIT_SEARCH_NODE(ps_search_candts[i].ps_search_node, 0);
6302
0
    }
6303
0
    num_act_ref_pics =
6304
0
        ps_ctxt->s_frm_prms.u1_num_active_ref_l0 + ps_ctxt->s_frm_prms.u1_num_active_ref_l1;
6305
6306
0
    if(num_act_ref_pics)
6307
0
    {
6308
0
        hme_search_cand_data_init(
6309
0
            ai4_id_Z,
6310
0
            ai4_id_coloc,
6311
0
            ai4_num_coloc_cands,
6312
0
            au1_search_candidate_list_index,
6313
0
            i4_num_act_ref_l0,
6314
0
            i4_num_act_ref_l1,
6315
0
            ps_ctxt->s_frm_prms.bidir_enabled,
6316
0
            blk_4x4_to_16x16);
6317
0
    }
6318
6319
0
    if(!ps_ctxt->s_frm_prms.bidir_enabled && (i4_num_act_ref_l0 > 1))
6320
0
    {
6321
0
        ps_search_candts[ai4_id_Z[0]].ps_search_node->i1_ref_idx = ps_ctxt->ai1_past_list[0];
6322
0
        ps_search_candts[ai4_id_Z[1]].ps_search_node->i1_ref_idx = ps_ctxt->ai1_past_list[1];
6323
0
    }
6324
0
    else if(!ps_ctxt->s_frm_prms.bidir_enabled && (i4_num_act_ref_l0 == 1))
6325
0
    {
6326
0
        ps_search_candts[ai4_id_Z[0]].ps_search_node->i1_ref_idx = ps_ctxt->ai1_past_list[0];
6327
0
    }
6328
6329
0
    for(i = 0; i < 3; i++)
6330
0
    {
6331
0
        search_node_t *ps_search_node;
6332
0
        ps_search_node = &as_left_neighbours[i];
6333
0
        INIT_SEARCH_NODE(ps_search_node, 0);
6334
0
        ps_search_node = &as_top_neighbours[i];
6335
0
        INIT_SEARCH_NODE(ps_search_node, 0);
6336
0
    }
6337
6338
0
    INIT_SEARCH_NODE(&as_top_neighbours[3], 0);
6339
0
    as_left_neighbours[2].u1_is_avail = 0;
6340
6341
    /*************************************************************************/
6342
    /* Initialize all the search results structure here. We update all the   */
6343
    /* search results to default values, and configure things like blk sizes */
6344
    /*************************************************************************/
6345
0
    if(num_act_ref_pics)
6346
0
    {
6347
0
        S32 i4_x, i4_y;
6348
        /* 16x16 results */
6349
0
        for(i = 0; i < 16; i++)
6350
0
        {
6351
0
            search_results_t *ps_search_results;
6352
0
            S32 pred_lx;
6353
0
            ps_search_results = &ps_ctxt->as_search_results_16x16[i];
6354
0
            i4_x = (S32)gau1_encode_to_raster_x[i];
6355
0
            i4_y = (S32)gau1_encode_to_raster_y[i];
6356
0
            i4_x <<= 4;
6357
0
            i4_y <<= 4;
6358
6359
0
            hme_init_search_results(
6360
0
                ps_search_results,
6361
0
                i4_num_pred_dir,
6362
0
                ps_refine_prms->i4_num_fpel_results,
6363
0
                ps_refine_prms->i4_num_results_per_part,
6364
0
                e_search_blk_size,
6365
0
                i4_x,
6366
0
                i4_y,
6367
0
                &ps_ctxt->au1_is_past[0]);
6368
6369
0
            for(pred_lx = 0; pred_lx < 2; pred_lx++)
6370
0
            {
6371
0
                pred_ctxt_t *ps_pred_ctxt;
6372
6373
0
                ps_pred_ctxt = &ps_search_results->as_pred_ctxt[pred_lx];
6374
6375
0
                hme_init_pred_ctxt_encode(
6376
0
                    ps_pred_ctxt,
6377
0
                    ps_search_results,
6378
0
                    ps_search_candts[ai4_id_coloc[0]].ps_search_node,
6379
0
                    ps_search_candts[ai4_id_Z[0]].ps_search_node,
6380
0
                    aps_mv_grid[pred_lx],
6381
0
                    pred_lx,
6382
0
                    lambda_recon,
6383
0
                    ps_refine_prms->lambda_q_shift,
6384
0
                    &ps_ctxt->apu1_ref_bits_tlu_lc[0],
6385
0
                    &ps_ctxt->ai2_ref_scf[0]);
6386
0
            }
6387
0
        }
6388
6389
0
        for(i = 0; i < 4; i++)
6390
0
        {
6391
0
            search_results_t *ps_search_results;
6392
0
            S32 pred_lx;
6393
0
            ps_search_results = &ps_ctxt->as_search_results_32x32[i];
6394
6395
0
            i4_x = (S32)gau1_encode_to_raster_x[i];
6396
0
            i4_y = (S32)gau1_encode_to_raster_y[i];
6397
0
            i4_x <<= 5;
6398
0
            i4_y <<= 5;
6399
6400
0
            hme_init_search_results(
6401
0
                ps_search_results,
6402
0
                i4_num_pred_dir,
6403
0
                ps_refine_prms->i4_num_32x32_merge_results,
6404
0
                ps_refine_prms->i4_num_results_per_part,
6405
0
                BLK_32x32,
6406
0
                i4_x,
6407
0
                i4_y,
6408
0
                &ps_ctxt->au1_is_past[0]);
6409
6410
0
            for(pred_lx = 0; pred_lx < 2; pred_lx++)
6411
0
            {
6412
0
                pred_ctxt_t *ps_pred_ctxt;
6413
6414
0
                ps_pred_ctxt = &ps_search_results->as_pred_ctxt[pred_lx];
6415
6416
0
                hme_init_pred_ctxt_encode(
6417
0
                    ps_pred_ctxt,
6418
0
                    ps_search_results,
6419
0
                    ps_search_candts[ai4_id_coloc[0]].ps_search_node,
6420
0
                    ps_search_candts[ai4_id_Z[0]].ps_search_node,
6421
0
                    aps_mv_grid[pred_lx],
6422
0
                    pred_lx,
6423
0
                    lambda_recon,
6424
0
                    ps_refine_prms->lambda_q_shift,
6425
0
                    &ps_ctxt->apu1_ref_bits_tlu_lc[0],
6426
0
                    &ps_ctxt->ai2_ref_scf[0]);
6427
0
            }
6428
0
        }
6429
6430
0
        {
6431
0
            search_results_t *ps_search_results;
6432
0
            S32 pred_lx;
6433
0
            ps_search_results = &ps_ctxt->s_search_results_64x64;
6434
6435
0
            hme_init_search_results(
6436
0
                ps_search_results,
6437
0
                i4_num_pred_dir,
6438
0
                ps_refine_prms->i4_num_64x64_merge_results,
6439
0
                ps_refine_prms->i4_num_results_per_part,
6440
0
                BLK_64x64,
6441
0
                0,
6442
0
                0,
6443
0
                &ps_ctxt->au1_is_past[0]);
6444
6445
0
            for(pred_lx = 0; pred_lx < 2; pred_lx++)
6446
0
            {
6447
0
                pred_ctxt_t *ps_pred_ctxt;
6448
6449
0
                ps_pred_ctxt = &ps_search_results->as_pred_ctxt[pred_lx];
6450
6451
0
                hme_init_pred_ctxt_encode(
6452
0
                    ps_pred_ctxt,
6453
0
                    ps_search_results,
6454
0
                    ps_search_candts[ai4_id_coloc[0]].ps_search_node,
6455
0
                    ps_search_candts[ai4_id_Z[0]].ps_search_node,
6456
0
                    aps_mv_grid[pred_lx],
6457
0
                    pred_lx,
6458
0
                    lambda_recon,
6459
0
                    ps_refine_prms->lambda_q_shift,
6460
0
                    &ps_ctxt->apu1_ref_bits_tlu_lc[0],
6461
0
                    &ps_ctxt->ai2_ref_scf[0]);
6462
0
            }
6463
0
        }
6464
0
    }
6465
6466
    /* Initialise the structure used in clustering  */
6467
0
    if(ME_PRISTINE_QUALITY == e_me_quality_presets)
6468
0
    {
6469
0
        ps_ctb_cluster_info = ps_ctxt->ps_ctb_cluster_info;
6470
6471
0
        ps_ctb_cluster_info->ps_16x16_blk = ps_ctxt->ps_blk_16x16;
6472
0
        ps_ctb_cluster_info->ps_32x32_blk = ps_ctxt->ps_blk_32x32;
6473
0
        ps_ctb_cluster_info->ps_64x64_blk = ps_ctxt->ps_blk_64x64;
6474
0
        ps_ctb_cluster_info->pi4_blk_8x8_mask = ai4_blk_8x8_mask;
6475
0
        ps_ctb_cluster_info->sdi_threshold = ps_refine_prms->sdi_threshold;
6476
0
        ps_ctb_cluster_info->i4_frame_qstep = ps_ctxt->frm_qstep;
6477
0
        ps_ctb_cluster_info->i4_frame_qstep_multiplier = 16;
6478
0
    }
6479
6480
    /*********************************************************************/
6481
    /* Initialize the dyn. search range params. for each reference index */
6482
    /* in current layer ctxt                                             */
6483
    /*********************************************************************/
6484
6485
    /* Only for P pic. For P, both are 0, I&B has them mut. exclusive */
6486
0
    if(ps_ctxt->s_frm_prms.is_i_pic == ps_ctxt->s_frm_prms.bidir_enabled)
6487
0
    {
6488
0
        WORD32 ref_ctr;
6489
        /* set no. of act ref in L0 for further use at frame level */
6490
0
        ps_ctxt->as_l0_dyn_range_prms[i4_idx_dvsr_p].i4_num_act_ref_in_l0 =
6491
0
            ps_ctxt->s_frm_prms.u1_num_active_ref_l0;
6492
6493
0
        for(ref_ctr = 0; ref_ctr < ps_ctxt->s_frm_prms.u1_num_active_ref_l0; ref_ctr++)
6494
0
        {
6495
0
            INIT_DYN_SEARCH_PRMS(
6496
0
                &ps_ctxt->as_l0_dyn_range_prms[i4_idx_dvsr_p].as_dyn_range_prms[ref_ctr],
6497
0
                ps_ctxt->ai4_ref_idx_to_poc_lc[ref_ctr]);
6498
0
        }
6499
0
    }
6500
    /*************************************************************************/
6501
    /* Now that the candidates have been ordered, to choose the right number */
6502
    /* of initial candidates.                                                */
6503
    /*************************************************************************/
6504
0
    if(blk_4x4_to_16x16)
6505
0
    {
6506
0
        if(i4_num_ref_prev_layer > 2)
6507
0
        {
6508
0
            if(e_search_complexity == SEARCH_CX_LOW)
6509
0
                num_init_candts = 7 * (!ps_ctxt->s_frm_prms.bidir_enabled + 1);
6510
0
            else if(e_search_complexity == SEARCH_CX_MED)
6511
0
                num_init_candts = 14 * (!ps_ctxt->s_frm_prms.bidir_enabled + 1);
6512
0
            else if(e_search_complexity == SEARCH_CX_HIGH)
6513
0
                num_init_candts = 21 * (!ps_ctxt->s_frm_prms.bidir_enabled + 1);
6514
0
            else
6515
0
                ASSERT(0);
6516
0
        }
6517
0
        else if(i4_num_ref_prev_layer == 2)
6518
0
        {
6519
0
            if(e_search_complexity == SEARCH_CX_LOW)
6520
0
                num_init_candts = 5 * (!ps_ctxt->s_frm_prms.bidir_enabled + 1);
6521
0
            else if(e_search_complexity == SEARCH_CX_MED)
6522
0
                num_init_candts = 12 * (!ps_ctxt->s_frm_prms.bidir_enabled + 1);
6523
0
            else if(e_search_complexity == SEARCH_CX_HIGH)
6524
0
                num_init_candts = 19 * (!ps_ctxt->s_frm_prms.bidir_enabled + 1);
6525
0
            else
6526
0
                ASSERT(0);
6527
0
        }
6528
0
        else
6529
0
        {
6530
0
            if(e_search_complexity == SEARCH_CX_LOW)
6531
0
                num_init_candts = 5;
6532
0
            else if(e_search_complexity == SEARCH_CX_MED)
6533
0
                num_init_candts = 12;
6534
0
            else if(e_search_complexity == SEARCH_CX_HIGH)
6535
0
                num_init_candts = 19;
6536
0
            else
6537
0
                ASSERT(0);
6538
0
        }
6539
0
    }
6540
0
    else
6541
0
    {
6542
0
        if(i4_num_ref_prev_layer > 2)
6543
0
        {
6544
0
            if(e_search_complexity == SEARCH_CX_LOW)
6545
0
                num_init_candts = 7 * (!ps_ctxt->s_frm_prms.bidir_enabled + 1);
6546
0
            else if(e_search_complexity == SEARCH_CX_MED)
6547
0
                num_init_candts = 13 * (!ps_ctxt->s_frm_prms.bidir_enabled + 1);
6548
0
            else if(e_search_complexity == SEARCH_CX_HIGH)
6549
0
                num_init_candts = 18 * (!ps_ctxt->s_frm_prms.bidir_enabled + 1);
6550
0
            else
6551
0
                ASSERT(0);
6552
0
        }
6553
0
        else if(i4_num_ref_prev_layer == 2)
6554
0
        {
6555
0
            if(e_search_complexity == SEARCH_CX_LOW)
6556
0
                num_init_candts = 5 * (!ps_ctxt->s_frm_prms.bidir_enabled + 1);
6557
0
            else if(e_search_complexity == SEARCH_CX_MED)
6558
0
                num_init_candts = 11 * (!ps_ctxt->s_frm_prms.bidir_enabled + 1);
6559
0
            else if(e_search_complexity == SEARCH_CX_HIGH)
6560
0
                num_init_candts = 16 * (!ps_ctxt->s_frm_prms.bidir_enabled + 1);
6561
0
            else
6562
0
                ASSERT(0);
6563
0
        }
6564
0
        else
6565
0
        {
6566
0
            if(e_search_complexity == SEARCH_CX_LOW)
6567
0
                num_init_candts = 5;
6568
0
            else if(e_search_complexity == SEARCH_CX_MED)
6569
0
                num_init_candts = 11;
6570
0
            else if(e_search_complexity == SEARCH_CX_HIGH)
6571
0
                num_init_candts = 16;
6572
0
            else
6573
0
                ASSERT(0);
6574
0
        }
6575
0
    }
6576
6577
    /*************************************************************************/
6578
    /* The following search parameters are fixed throughout the search across*/
6579
    /* all blks. So these are configured outside processing loop             */
6580
    /*************************************************************************/
6581
0
    s_search_prms_blk.i4_num_init_candts = num_init_candts;
6582
0
    s_search_prms_blk.i4_start_step = 1;
6583
0
    s_search_prms_blk.i4_use_satd = 0;
6584
0
    s_search_prms_blk.i4_num_steps_post_refine = ps_refine_prms->i4_num_steps_post_refine_fpel;
6585
    /* we use recon only for encoded layers, otherwise it is not available */
6586
0
    s_search_prms_blk.i4_use_rec = ps_refine_prms->i4_encode & ps_refine_prms->i4_use_rec_in_fpel;
6587
6588
0
    s_search_prms_blk.ps_search_candts = ps_search_candts;
6589
0
    if(s_search_prms_blk.i4_use_rec)
6590
0
    {
6591
0
        WORD32 ref_ctr;
6592
0
        for(ref_ctr = 0; ref_ctr < MAX_NUM_REF; ref_ctr++)
6593
0
            s_search_prms_blk.aps_mv_range[ref_ctr] = &as_range_prms_rec[ref_ctr];
6594
0
    }
6595
0
    else
6596
0
    {
6597
0
        WORD32 ref_ctr;
6598
0
        for(ref_ctr = 0; ref_ctr < MAX_NUM_REF; ref_ctr++)
6599
0
            s_search_prms_blk.aps_mv_range[ref_ctr] = &as_range_prms_inp[ref_ctr];
6600
0
    }
6601
6602
    /*************************************************************************/
6603
    /* Initialize coordinates. Meaning as follows                            */
6604
    /* blk_x : x coordinate of the 16x16 blk, in terms of number of blks     */
6605
    /* blk_y : same as above, y coord.                                       */
6606
    /* num_blks_in_this_ctb : number of blks in this given ctb that starts   */
6607
    /* at i4_ctb_x, i4_ctb_y. This may not be 16 at picture boundaries.      */
6608
    /* i4_ctb_x, i4_ctb_y: pixel coordinate of the ctb realtive to top left  */
6609
    /* corner of the picture. Always multiple of 64.                         */
6610
    /* blk_id_in_ctb : encode order id of the blk in the ctb.                */
6611
    /*************************************************************************/
6612
0
    blk_y = 0;
6613
0
    blk_id_in_ctb = 0;
6614
0
    i4_ctb_y = 0;
6615
6616
    /*************************************************************************/
6617
    /* Picture limit on all 4 sides. This will be used to set mv limits for  */
6618
    /* every block given its coordinate. Note thsi assumes that the min amt  */
6619
    /* of padding to right of pic is equal to the blk size. If we go all the */
6620
    /* way upto 64x64, then the min padding on right size of picture should  */
6621
    /* be 64, and also on bottom side of picture.                            */
6622
    /*************************************************************************/
6623
0
    SET_PIC_LIMIT(
6624
0
        s_pic_limit_inp,
6625
0
        ps_curr_layer->i4_pad_x_rec,
6626
0
        ps_curr_layer->i4_pad_y_rec,
6627
0
        ps_curr_layer->i4_wd,
6628
0
        ps_curr_layer->i4_ht,
6629
0
        s_search_prms_blk.i4_num_steps_post_refine);
6630
6631
0
    SET_PIC_LIMIT(
6632
0
        s_pic_limit_rec,
6633
0
        ps_curr_layer->i4_pad_x_rec,
6634
0
        ps_curr_layer->i4_pad_y_rec,
6635
0
        ps_curr_layer->i4_wd,
6636
0
        ps_curr_layer->i4_ht,
6637
0
        s_search_prms_blk.i4_num_steps_post_refine);
6638
6639
    /*************************************************************************/
6640
    /* set the MV limit per ref. pic.                                        */
6641
    /*    - P pic. : Based on the config params.                             */
6642
    /*    - B/b pic: Based on the Max/Min MV from prev. P and config. param. */
6643
    /*************************************************************************/
6644
0
    hme_set_mv_limit_using_dvsr_data(
6645
0
        ps_ctxt, ps_curr_layer, as_mv_limit, &i2_prev_enc_frm_max_mv_y, num_act_ref_pics);
6646
0
    s_srch_cand_init_data.pu1_num_fpel_search_cands = ps_refine_prms->au1_num_fpel_search_cands;
6647
0
    s_srch_cand_init_data.i4_num_act_ref_l0 = ps_ctxt->s_frm_prms.u1_num_active_ref_l0;
6648
0
    s_srch_cand_init_data.i4_num_act_ref_l1 = ps_ctxt->s_frm_prms.u1_num_active_ref_l1;
6649
0
    s_srch_cand_init_data.ps_coarse_layer = ps_coarse_layer;
6650
0
    s_srch_cand_init_data.ps_curr_layer = ps_curr_layer;
6651
0
    s_srch_cand_init_data.i4_max_num_init_cands = num_init_candts;
6652
0
    s_srch_cand_init_data.ps_search_cands = ps_search_candts;
6653
0
    s_srch_cand_init_data.u1_num_results_in_mvbank = s_mv_update_prms.i4_num_results_to_store;
6654
0
    s_srch_cand_init_data.pi4_ref_id_lc_to_l0_map = ps_ctxt->a_ref_idx_lc_to_l0;
6655
0
    s_srch_cand_init_data.pi4_ref_id_lc_to_l1_map = ps_ctxt->a_ref_idx_lc_to_l1;
6656
0
    s_srch_cand_init_data.e_search_blk_size = e_search_blk_size;
6657
6658
0
    while(0 == end_of_frame)
6659
0
    {
6660
0
        job_queue_t *ps_job;
6661
0
        frm_ctb_ctxt_t *ps_frm_ctb_prms;
6662
0
        ipe_l0_ctb_analyse_for_me_t *ps_cur_ipe_ctb;
6663
6664
0
        WORD32 i4_max_mv_x_in_ctb;
6665
0
        WORD32 i4_max_mv_y_in_ctb;
6666
0
        void *pv_dep_mngr_encloop_dep_me;
6667
0
        WORD32 offset_val, check_dep_pos, set_dep_pos;
6668
0
        WORD32 left_ctb_in_diff_tile, i4_first_ctb_x = 0;
6669
6670
0
        pv_dep_mngr_encloop_dep_me = ps_ctxt->pv_dep_mngr_encloop_dep_me;
6671
6672
0
        ps_frm_ctb_prms = (frm_ctb_ctxt_t *)ps_thrd_ctxt->pv_ext_frm_prms;
6673
6674
        /* Get the current row from the job queue */
6675
0
        ps_job = (job_queue_t *)ihevce_enc_grp_get_next_job(
6676
0
            ps_multi_thrd_ctxt, lyr_job_type, 1, me_frm_id);
6677
6678
        /* If all rows are done, set the end of process flag to 1, */
6679
        /* and the current row to -1 */
6680
0
        if(NULL == ps_job)
6681
0
        {
6682
0
            blk_y = -1;
6683
0
            i4_ctb_y = -1;
6684
0
            tile_col_idx = -1;
6685
0
            end_of_frame = 1;
6686
6687
0
            continue;
6688
0
        }
6689
6690
        /* set the output dependency after picking up the row */
6691
0
        ihevce_enc_grp_job_set_out_dep(ps_multi_thrd_ctxt, ps_job, me_frm_id);
6692
6693
        /* Obtain the current row's details from the job */
6694
0
        {
6695
0
            ihevce_tile_params_t *ps_col_tile_params;
6696
6697
0
            i4_ctb_y = ps_job->s_job_info.s_me_job_info.i4_vert_unit_row_no;
6698
            /* Obtain the current colum tile index from the job */
6699
0
            tile_col_idx = ps_job->s_job_info.s_me_job_info.i4_tile_col_idx;
6700
6701
            /* in encode layer block are 16x16 and CTB is 64 x 64 */
6702
            /* note if ctb is 32x32 the this calc needs to be changed */
6703
0
            num_sync_units_in_row = (i4_pic_wd + ((1 << ps_ctxt->log_ctb_size) - 1)) >>
6704
0
                                    ps_ctxt->log_ctb_size;
6705
6706
            /* The tile parameter for the col. idx. Use only the properties
6707
            which is same for all the bottom tiles like width, start_x, etc.
6708
            Don't use height, start_y, etc.                                  */
6709
0
            ps_col_tile_params =
6710
0
                ((ihevce_tile_params_t *)ps_thrd_ctxt->pv_tile_params_base + tile_col_idx);
6711
            /* in encode layer block are 16x16 and CTB is 64 x 64 */
6712
            /* note if ctb is 32x32 the this calc needs to be changed */
6713
0
            num_sync_units_in_tile =
6714
0
                (ps_col_tile_params->i4_curr_tile_width + ((1 << ps_ctxt->log_ctb_size) - 1)) >>
6715
0
                ps_ctxt->log_ctb_size;
6716
6717
0
            i4_first_ctb_x = ps_col_tile_params->i4_first_ctb_x;
6718
0
            i4_ctb_x = i4_first_ctb_x;
6719
6720
0
            if(!num_act_ref_pics)
6721
0
            {
6722
0
                for(i4_ctb_x = i4_first_ctb_x;
6723
0
                    i4_ctb_x < (ps_col_tile_params->i4_first_ctb_x + num_sync_units_in_tile);
6724
0
                    i4_ctb_x++)
6725
0
                {
6726
0
                    S32 blk_i = 0, blk_j = 0;
6727
                    /* set the dependency for the corresponding row in enc loop */
6728
0
                    ihevce_dmgr_set_row_row_sync(
6729
0
                        pv_dep_mngr_encloop_dep_me,
6730
0
                        (i4_ctb_x + 1),
6731
0
                        i4_ctb_y,
6732
0
                        tile_col_idx /* Col Tile No. */);
6733
0
                }
6734
6735
0
                continue;
6736
0
            }
6737
6738
            /* increment the number of rows proc */
6739
0
            num_rows_proc++;
6740
6741
            /* Set Variables for Dep. Checking and Setting */
6742
0
            set_dep_pos = i4_ctb_y + 1;
6743
0
            if(i4_ctb_y > 0)
6744
0
            {
6745
0
                offset_val = 2;
6746
0
                check_dep_pos = i4_ctb_y - 1;
6747
0
            }
6748
0
            else
6749
0
            {
6750
                /* First row should run without waiting */
6751
0
                offset_val = -1;
6752
0
                check_dep_pos = 0;
6753
0
            }
6754
6755
            /* row ctb out pointer  */
6756
0
            ps_ctxt->ps_ctb_analyse_curr_row =
6757
0
                ps_ctxt->ps_ctb_analyse_base + i4_ctb_y * ps_frm_ctb_prms->i4_num_ctbs_horz;
6758
6759
            /* Row level CU Tree buffer */
6760
0
            ps_ctxt->ps_cu_tree_curr_row =
6761
0
                ps_ctxt->ps_cu_tree_base +
6762
0
                i4_ctb_y * ps_frm_ctb_prms->i4_num_ctbs_horz * MAX_NUM_NODES_CU_TREE;
6763
6764
0
            ps_ctxt->ps_me_ctb_data_curr_row =
6765
0
                ps_ctxt->ps_me_ctb_data_base + i4_ctb_y * ps_frm_ctb_prms->i4_num_ctbs_horz;
6766
0
        }
6767
6768
        /* This flag says the CTB under processing is at the start of tile in horz dir.*/
6769
0
        left_ctb_in_diff_tile = 1;
6770
6771
        /* To make sure no 64-bit overflow happens when inv_wt is multiplied with un-normalized src_var,                                 */
6772
        /* the shift value will be passed onto the functions wherever inv_wt isused so that inv_wt is appropriately shift and multiplied */
6773
0
        {
6774
0
            S32 i4_ref_id, i4_bits_req;
6775
6776
0
            for(i4_ref_id = 0; i4_ref_id < (ps_ctxt->s_frm_prms.u1_num_active_ref_l0 +
6777
0
                                            ps_ctxt->s_frm_prms.u1_num_active_ref_l1);
6778
0
                i4_ref_id++)
6779
0
            {
6780
0
                GETRANGE(i4_bits_req, ps_ctxt->s_wt_pred.a_inv_wpred_wt[i4_ref_id]);
6781
6782
0
                if(i4_bits_req > 12)
6783
0
                {
6784
0
                    ps_ctxt->s_wt_pred.ai4_shift_val[i4_ref_id] = (i4_bits_req - 12);
6785
0
                }
6786
0
                else
6787
0
                {
6788
0
                    ps_ctxt->s_wt_pred.ai4_shift_val[i4_ref_id] = 0;
6789
0
                }
6790
0
            }
6791
6792
0
            s_common_frm_prms.pi4_inv_wt_shift_val = ps_ctxt->s_wt_pred.ai4_shift_val;
6793
0
        }
6794
6795
        /* if non-encode layer then i4_ctb_x will be same as blk_x */
6796
        /* loop over all the units is a row                        */
6797
0
        for(i4_ctb_x = i4_first_ctb_x; i4_ctb_x < (i4_first_ctb_x + num_sync_units_in_tile);
6798
0
            i4_ctb_x++)
6799
0
        {
6800
0
            ihevce_ctb_noise_params *ps_ctb_noise_params =
6801
0
                &ps_ctxt->ps_ctb_analyse_curr_row[i4_ctb_x].s_ctb_noise_params;
6802
6803
0
            s_common_frm_prms.i4_ctb_x_off = i4_ctb_x << 6;
6804
0
            s_common_frm_prms.i4_ctb_y_off = i4_ctb_y << 6;
6805
6806
0
            ps_ctxt->s_mc_ctxt.i4_ctb_frm_pos_y = i4_ctb_y << 6;
6807
0
            ps_ctxt->s_mc_ctxt.i4_ctb_frm_pos_x = i4_ctb_x << 6;
6808
            /* Initialize ptr to current IPE CTB */
6809
0
            ps_cur_ipe_ctb = ps_ctxt->ps_ipe_l0_ctb_frm_base + i4_ctb_x +
6810
0
                             i4_ctb_y * ps_frm_ctb_prms->i4_num_ctbs_horz;
6811
0
            {
6812
0
                ps_ctb_bound_attrs =
6813
0
                    get_ctb_attrs(i4_ctb_x << 6, i4_ctb_y << 6, i4_pic_wd, i4_pic_ht, ps_ctxt);
6814
6815
0
                en_merge_32x32 = ps_ctb_bound_attrs->u1_merge_to_32x32_flag;
6816
0
                num_blks_in_this_ctb = ps_ctb_bound_attrs->u1_num_blks_in_ctb;
6817
0
            }
6818
6819
            /* Block to initialise pointers to part_type_results_t */
6820
            /* in each size-specific inter_cu_results_t  */
6821
0
            {
6822
0
                WORD32 i;
6823
6824
0
                for(i = 0; i < 64; i++)
6825
0
                {
6826
0
                    ps_ctxt->as_cu8x8_results[i].ps_best_results =
6827
0
                        ps_ctxt->ps_me_ctb_data_curr_row[i4_ctb_x]
6828
0
                            .as_8x8_block_data[i]
6829
0
                            .as_best_results;
6830
0
                    ps_ctxt->as_cu8x8_results[i].u1_num_best_results = 0;
6831
0
                }
6832
6833
0
                for(i = 0; i < 16; i++)
6834
0
                {
6835
0
                    ps_ctxt->as_cu16x16_results[i].ps_best_results =
6836
0
                        ps_ctxt->ps_me_ctb_data_curr_row[i4_ctb_x].as_block_data[i].as_best_results;
6837
0
                    ps_ctxt->as_cu16x16_results[i].u1_num_best_results = 0;
6838
0
                }
6839
6840
0
                for(i = 0; i < 4; i++)
6841
0
                {
6842
0
                    ps_ctxt->as_cu32x32_results[i].ps_best_results =
6843
0
                        ps_ctxt->ps_me_ctb_data_curr_row[i4_ctb_x]
6844
0
                            .as_32x32_block_data[i]
6845
0
                            .as_best_results;
6846
0
                    ps_ctxt->as_cu32x32_results[i].u1_num_best_results = 0;
6847
0
                }
6848
6849
0
                ps_ctxt->s_cu64x64_results.ps_best_results =
6850
0
                    ps_ctxt->ps_me_ctb_data_curr_row[i4_ctb_x].s_64x64_block_data.as_best_results;
6851
0
                ps_ctxt->s_cu64x64_results.u1_num_best_results = 0;
6852
0
            }
6853
6854
0
            if(ME_PRISTINE_QUALITY == e_me_quality_presets)
6855
0
            {
6856
0
                ps_ctb_cluster_info->blk_32x32_mask = en_merge_32x32;
6857
0
                ps_ctb_cluster_info->ps_cur_ipe_ctb = ps_cur_ipe_ctb;
6858
0
                ps_ctb_cluster_info->ps_cu_tree_root =
6859
0
                    ps_ctxt->ps_cu_tree_curr_row + (i4_ctb_x * MAX_NUM_NODES_CU_TREE);
6860
0
                ps_ctb_cluster_info->nodes_created_in_cu_tree = 1;
6861
0
            }
6862
6863
0
            if(ME_PRISTINE_QUALITY != e_me_quality_presets)
6864
0
            {
6865
0
                S32 i4_nodes_created_in_cu_tree = 1;
6866
6867
0
                ihevce_cu_tree_init(
6868
0
                    (ps_ctxt->ps_cu_tree_curr_row + (i4_ctb_x * MAX_NUM_NODES_CU_TREE)),
6869
0
                    (ps_ctxt->ps_cu_tree_curr_row + (i4_ctb_x * MAX_NUM_NODES_CU_TREE)),
6870
0
                    &i4_nodes_created_in_cu_tree,
6871
0
                    0,
6872
0
                    POS_NA,
6873
0
                    POS_NA,
6874
0
                    POS_NA);
6875
0
            }
6876
6877
0
            memset(ai4_blk_8x8_mask, 0, 16 * sizeof(S32));
6878
6879
0
            if(ps_refine_prms->u1_use_lambda_derived_from_min_8x8_act_in_ctb)
6880
0
            {
6881
0
                S32 j;
6882
6883
0
                ipe_l0_ctb_analyse_for_me_t *ps_cur_ipe_ctb;
6884
6885
0
                ps_cur_ipe_ctb =
6886
0
                    ps_ctxt->ps_ipe_l0_ctb_frm_base + i4_ctb_x + i4_ctb_y * num_sync_units_in_row;
6887
0
                lambda_recon =
6888
0
                    hme_recompute_lambda_from_min_8x8_act_in_ctb(ps_ctxt, ps_cur_ipe_ctb);
6889
6890
0
                lambda_recon = ((float)lambda_recon * (100.0f - ME_LAMBDA_DISCOUNT) / 100.0f);
6891
6892
0
                for(i = 0; i < 4; i++)
6893
0
                {
6894
0
                    ps_search_results = &ps_ctxt->as_search_results_32x32[i];
6895
6896
0
                    for(j = 0; j < 2; j++)
6897
0
                    {
6898
0
                        ps_search_results->as_pred_ctxt[j].lambda = lambda_recon;
6899
0
                    }
6900
0
                }
6901
0
                ps_search_results = &ps_ctxt->s_search_results_64x64;
6902
6903
0
                for(j = 0; j < 2; j++)
6904
0
                {
6905
0
                    ps_search_results->as_pred_ctxt[j].lambda = lambda_recon;
6906
0
                }
6907
6908
0
                s_common_frm_prms.i4_lamda = lambda_recon;
6909
0
            }
6910
0
            else
6911
0
            {
6912
0
                lambda_recon = ps_refine_prms->lambda_recon;
6913
0
            }
6914
6915
            /*********************************************************************/
6916
            /* replicate the inp buffer at blk or ctb level for each ref id,     */
6917
            /* Instead of searching with wk * ref(k), we search with Ik = I / wk */
6918
            /* thereby avoiding a bloat up of memory. If we did all references   */
6919
            /* weighted pred, we will end up with a duplicate copy of each ref   */
6920
            /* at each layer, since we need to preserve the original reference.  */
6921
            /* ToDo: Need to observe performance with this mechanism and compare */
6922
            /* with case where ref is weighted.                                  */
6923
            /*********************************************************************/
6924
0
            fp_get_wt_inp(
6925
0
                ps_curr_layer,
6926
0
                &ps_ctxt->s_wt_pred,
6927
0
                unit_size,
6928
0
                s_common_frm_prms.i4_ctb_x_off,
6929
0
                s_common_frm_prms.i4_ctb_y_off,
6930
0
                unit_size,
6931
0
                ps_ctxt->num_ref_future + ps_ctxt->num_ref_past,
6932
0
                ps_ctxt->i4_wt_pred_enable_flag);
6933
6934
0
            if(ps_thrd_ctxt->s_init_prms.u1_is_stasino_enabled)
6935
0
            {
6936
0
#if TEMPORAL_NOISE_DETECT
6937
0
                {
6938
0
                    WORD32 had_block_size = 16;
6939
0
                    WORD32 ctb_width = ((i4_pic_wd - s_common_frm_prms.i4_ctb_x_off) >= 64)
6940
0
                                           ? 64
6941
0
                                           : i4_pic_wd - s_common_frm_prms.i4_ctb_x_off;
6942
0
                    WORD32 ctb_height = ((i4_pic_ht - s_common_frm_prms.i4_ctb_y_off) >= 64)
6943
0
                                            ? 64
6944
0
                                            : i4_pic_ht - s_common_frm_prms.i4_ctb_y_off;
6945
0
                    WORD32 num_pred_dir = i4_num_pred_dir;
6946
0
                    WORD32 i4_x_off = s_common_frm_prms.i4_ctb_x_off;
6947
0
                    WORD32 i4_y_off = s_common_frm_prms.i4_ctb_y_off;
6948
6949
0
                    WORD32 i;
6950
0
                    WORD32 noise_detected;
6951
0
                    WORD32 ctb_size;
6952
0
                    WORD32 num_comp_had_blocks;
6953
0
                    WORD32 noisy_block_cnt;
6954
0
                    WORD32 index_8x8_block;
6955
0
                    WORD32 num_8x8_in_ctb_row;
6956
6957
0
                    WORD32 ht_offset;
6958
0
                    WORD32 wd_offset;
6959
0
                    WORD32 block_ht;
6960
0
                    WORD32 block_wd;
6961
6962
0
                    WORD32 num_horz_blocks;
6963
0
                    WORD32 num_vert_blocks;
6964
6965
0
                    WORD32 mean;
6966
0
                    UWORD32 variance_8x8;
6967
6968
0
                    WORD32 hh_energy_percent;
6969
6970
                    /* variables to hold the constant values. The variable values held are decided by the HAD block size */
6971
0
                    WORD32 min_noisy_block_cnt;
6972
0
                    WORD32 min_coeffs_above_avg;
6973
0
                    WORD32 min_coeff_avg_energy;
6974
6975
                    /* to store the mean and variance of each 8*8 block and find the variance of any higher block sizes later on. block */
6976
0
                    WORD32 i4_cu_x_off, i4_cu_y_off;
6977
0
                    WORD32 is_noisy;
6978
6979
                    /* intialise the variables holding the constants */
6980
0
                    if(had_block_size == 8)
6981
0
                    {
6982
0
                        min_noisy_block_cnt = MIN_NOISY_BLOCKS_CNT_8x8;  //6;//
6983
0
                        min_coeffs_above_avg = MIN_NUM_COEFFS_ABOVE_AVG_8x8;
6984
0
                        min_coeff_avg_energy = MIN_COEFF_AVG_ENERGY_8x8;
6985
0
                    }
6986
0
                    else
6987
0
                    {
6988
0
                        min_noisy_block_cnt = MIN_NOISY_BLOCKS_CNT_16x16;  //7;//
6989
0
                        min_coeffs_above_avg = MIN_NUM_COEFFS_ABOVE_AVG_16x16;
6990
0
                        min_coeff_avg_energy = MIN_COEFF_AVG_ENERGY_16x16;
6991
0
                    }
6992
6993
                    /* initialize the variables */
6994
0
                    noise_detected = 0;
6995
0
                    noisy_block_cnt = 0;
6996
0
                    hh_energy_percent = 0;
6997
0
                    variance_8x8 = 0;
6998
0
                    block_ht = ctb_height;
6999
0
                    block_wd = ctb_width;
7000
7001
0
                    mean = 0;
7002
7003
0
                    ctb_size = block_ht * block_wd;  //ctb_width * ctb_height;
7004
0
                    num_comp_had_blocks = ctb_size / (had_block_size * had_block_size);
7005
7006
0
                    num_horz_blocks = block_wd / had_block_size;  //ctb_width / had_block_size;
7007
0
                    num_vert_blocks = block_ht / had_block_size;  //ctb_height / had_block_size;
7008
7009
0
                    ht_offset = -had_block_size;
7010
0
                    wd_offset = -had_block_size;
7011
7012
0
                    num_8x8_in_ctb_row = block_wd / 8;  // number of 8x8 in this ctb
7013
0
                    for(i = 0; i < num_comp_had_blocks; i++)
7014
0
                    {
7015
0
                        if(i % num_horz_blocks == 0)
7016
0
                        {
7017
0
                            wd_offset = -had_block_size;
7018
0
                            ht_offset += had_block_size;
7019
0
                        }
7020
0
                        wd_offset += had_block_size;
7021
7022
                        /* CU level offsets */
7023
0
                        i4_cu_x_off = i4_x_off + (i % 4) * 16;  //+ (i % 4) * 16
7024
0
                        i4_cu_y_off = i4_y_off + (i / 4) * 16;
7025
7026
                        /* if 50 % or more of the CU is noisy then the return value is 1 */
7027
0
                        is_noisy = ihevce_determine_cu_noise_based_on_8x8Blk_data(
7028
0
                            ps_ctb_noise_params->au1_is_8x8Blk_noisy,
7029
0
                            (i % 4) * 16,
7030
0
                            (i / 4) * 16,
7031
0
                            16);
7032
7033
                        /* only if the CU is noisy then check the temporal noise detect call is made on the CU */
7034
0
                        if(is_noisy)
7035
0
                        {
7036
0
                            index_8x8_block = (i / num_horz_blocks) * 2 * num_8x8_in_ctb_row +
7037
0
                                              (i % num_horz_blocks) * 2;
7038
0
                            noisy_block_cnt += ihevce_16x16block_temporal_noise_detect(
7039
0
                                16,
7040
0
                                ((i4_pic_wd - s_common_frm_prms.i4_ctb_x_off) >= 64)
7041
0
                                    ? 64
7042
0
                                    : i4_pic_wd - s_common_frm_prms.i4_ctb_x_off,
7043
0
                                ((i4_pic_ht - s_common_frm_prms.i4_ctb_y_off) >= 64)
7044
0
                                    ? 64
7045
0
                                    : i4_pic_ht - s_common_frm_prms.i4_ctb_y_off,
7046
0
                                ps_ctb_noise_params,
7047
0
                                &s_srch_cand_init_data,
7048
0
                                &s_search_prms_blk,
7049
0
                                ps_ctxt,
7050
0
                                num_pred_dir,
7051
0
                                i4_num_act_ref_l0,
7052
0
                                i4_num_act_ref_l1,
7053
0
                                i4_cu_x_off,
7054
0
                                i4_cu_y_off,
7055
0
                                &ps_ctxt->s_wt_pred,
7056
0
                                unit_size,
7057
0
                                index_8x8_block,
7058
0
                                num_horz_blocks,
7059
0
                                /*num_8x8_in_ctb_row*/ 8,  // this should be a variable extra
7060
0
                                i);
7061
0
                        } /* if 16x16 is noisy */
7062
0
                    } /* loop over for all 16x16*/
7063
7064
0
                    if(noisy_block_cnt >= min_noisy_block_cnt)
7065
0
                    {
7066
0
                        noise_detected = 1;
7067
0
                    }
7068
7069
                    /* write back the noise presence detected for the current CTB to the structure */
7070
0
                    ps_ctb_noise_params->i4_noise_present = noise_detected;
7071
0
                }
7072
0
#endif
7073
7074
#if EVERYWHERE_NOISY && USE_NOISE_TERM_IN_L0_ME
7075
                if(ps_thrd_ctxt->s_init_prms.u1_is_stasino_enabled &&
7076
                   ps_ctb_noise_params->i4_noise_present)
7077
                {
7078
                    memset(
7079
                        ps_ctb_noise_params->au1_is_8x8Blk_noisy,
7080
                        1,
7081
                        sizeof(ps_ctb_noise_params->au1_is_8x8Blk_noisy));
7082
                }
7083
#endif
7084
7085
0
                for(i = 0; i < 16; i++)
7086
0
                {
7087
0
                    au1_is_16x16Blk_noisy[i] = ihevce_determine_cu_noise_based_on_8x8Blk_data(
7088
0
                        ps_ctb_noise_params->au1_is_8x8Blk_noisy, (i % 4) * 16, (i / 4) * 16, 16);
7089
0
                }
7090
7091
0
                for(i = 0; i < 4; i++)
7092
0
                {
7093
0
                    au1_is_32x32Blk_noisy[i] = ihevce_determine_cu_noise_based_on_8x8Blk_data(
7094
0
                        ps_ctb_noise_params->au1_is_8x8Blk_noisy, (i % 2) * 32, (i / 2) * 32, 32);
7095
0
                }
7096
7097
0
                for(i = 0; i < 1; i++)
7098
0
                {
7099
0
                    au1_is_64x64Blk_noisy[i] = ihevce_determine_cu_noise_based_on_8x8Blk_data(
7100
0
                        ps_ctb_noise_params->au1_is_8x8Blk_noisy, 0, 0, 64);
7101
0
                }
7102
7103
0
                if(ps_ctxt->s_frm_prms.bidir_enabled &&
7104
0
                   (ps_ctxt->s_frm_prms.i4_temporal_layer_id <=
7105
0
                    MAX_LAYER_ID_OF_B_PICS_WITHOUT_NOISE_DETECTION))
7106
0
                {
7107
0
                    ps_ctb_noise_params->i4_noise_present = 0;
7108
0
                    memset(
7109
0
                        ps_ctb_noise_params->au1_is_8x8Blk_noisy,
7110
0
                        0,
7111
0
                        sizeof(ps_ctb_noise_params->au1_is_8x8Blk_noisy));
7112
0
                }
7113
7114
0
#if ME_LAMBDA_DISCOUNT_WHEN_NOISY
7115
0
                for(i = 0; i < 4; i++)
7116
0
                {
7117
0
                    S32 j;
7118
0
                    S32 lambda;
7119
7120
0
                    if(au1_is_32x32Blk_noisy[i])
7121
0
                    {
7122
0
                        lambda = lambda_recon;
7123
0
                        lambda =
7124
0
                            ((float)lambda * (100.0f - ME_LAMBDA_DISCOUNT_WHEN_NOISY) / 100.0f);
7125
7126
0
                        ps_search_results = &ps_ctxt->as_search_results_32x32[i];
7127
7128
0
                        for(j = 0; j < 2; j++)
7129
0
                        {
7130
0
                            ps_search_results->as_pred_ctxt[j].lambda = lambda;
7131
0
                        }
7132
0
                    }
7133
0
                }
7134
7135
0
                {
7136
0
                    S32 j;
7137
0
                    S32 lambda;
7138
7139
0
                    if(au1_is_64x64Blk_noisy[0])
7140
0
                    {
7141
0
                        lambda = lambda_recon;
7142
0
                        lambda =
7143
0
                            ((float)lambda * (100.0f - ME_LAMBDA_DISCOUNT_WHEN_NOISY) / 100.0f);
7144
7145
0
                        ps_search_results = &ps_ctxt->s_search_results_64x64;
7146
7147
0
                        for(j = 0; j < 2; j++)
7148
0
                        {
7149
0
                            ps_search_results->as_pred_ctxt[j].lambda = lambda;
7150
0
                        }
7151
0
                    }
7152
0
                }
7153
0
#endif
7154
0
                if(au1_is_64x64Blk_noisy[0])
7155
0
                {
7156
0
                    U08 *pu1_inp = ps_curr_layer->pu1_inp + (s_common_frm_prms.i4_ctb_x_off +
7157
0
                                                             (s_common_frm_prms.i4_ctb_y_off *
7158
0
                                                              ps_curr_layer->i4_inp_stride));
7159
7160
0
                    hme_compute_sigmaX_and_sigmaXSquared(
7161
0
                        pu1_inp,
7162
0
                        ps_curr_layer->i4_inp_stride,
7163
0
                        ps_ctxt->au4_4x4_src_sigmaX,
7164
0
                        ps_ctxt->au4_4x4_src_sigmaXSquared,
7165
0
                        4,
7166
0
                        4,
7167
0
                        64,
7168
0
                        64,
7169
0
                        1,
7170
0
                        16);
7171
0
                }
7172
0
                else
7173
0
                {
7174
0
                    for(i = 0; i < 4; i++)
7175
0
                    {
7176
0
                        if(au1_is_32x32Blk_noisy[i])
7177
0
                        {
7178
0
                            U08 *pu1_inp =
7179
0
                                ps_curr_layer->pu1_inp +
7180
0
                                (s_common_frm_prms.i4_ctb_x_off +
7181
0
                                 (s_common_frm_prms.i4_ctb_y_off * ps_curr_layer->i4_inp_stride));
7182
7183
0
                            U08 u1_cu_size = 32;
7184
0
                            WORD32 i4_inp_buf_offset =
7185
0
                                (((i / 2) * (u1_cu_size * ps_curr_layer->i4_inp_stride)) +
7186
0
                                 ((i % 2) * u1_cu_size));
7187
7188
0
                            U16 u2_sigma_arr_start_index_of_3rd_32x32_blk_in_ctb = 128;
7189
0
                            U16 u2_sigma_arr_start_index_of_2nd_32x32_blk_in_ctb = 8;
7190
0
                            S32 i4_sigma_arr_offset =
7191
0
                                (((i / 2) * u2_sigma_arr_start_index_of_3rd_32x32_blk_in_ctb) +
7192
0
                                 ((i % 2) * u2_sigma_arr_start_index_of_2nd_32x32_blk_in_ctb));
7193
7194
0
                            hme_compute_sigmaX_and_sigmaXSquared(
7195
0
                                pu1_inp + i4_inp_buf_offset,
7196
0
                                ps_curr_layer->i4_inp_stride,
7197
0
                                ps_ctxt->au4_4x4_src_sigmaX + i4_sigma_arr_offset,
7198
0
                                ps_ctxt->au4_4x4_src_sigmaXSquared + i4_sigma_arr_offset,
7199
0
                                4,
7200
0
                                4,
7201
0
                                32,
7202
0
                                32,
7203
0
                                1,
7204
0
                                16);
7205
0
                        }
7206
0
                        else
7207
0
                        {
7208
0
                            S32 j;
7209
7210
0
                            U08 u1_16x16_blk_start_index_in_3rd_32x32_blk_of_ctb = 8;
7211
0
                            U08 u1_16x16_blk_start_index_in_2nd_32x32_blk_of_ctb = 2;
7212
0
                            S32 i4_16x16_blk_start_index_in_i_th_32x32_blk =
7213
0
                                (((i / 2) * u1_16x16_blk_start_index_in_3rd_32x32_blk_of_ctb) +
7214
0
                                 ((i % 2) * u1_16x16_blk_start_index_in_2nd_32x32_blk_of_ctb));
7215
7216
0
                            for(j = 0; j < 4; j++)
7217
0
                            {
7218
0
                                U08 u1_3rd_16x16_blk_index_in_32x32_blk = 4;
7219
0
                                U08 u1_2nd_16x16_blk_index_in_32x32_blk = 1;
7220
0
                                S32 i4_16x16_blk_index_in_ctb =
7221
0
                                    i4_16x16_blk_start_index_in_i_th_32x32_blk +
7222
0
                                    ((j % 2) * u1_2nd_16x16_blk_index_in_32x32_blk) +
7223
0
                                    ((j / 2) * u1_3rd_16x16_blk_index_in_32x32_blk);
7224
7225
                                //S32 k = (((i / 2) * 8) + ((i % 2) * 2)) + ((j % 2) * 1) + ((j / 2) * 4);
7226
7227
0
                                if(au1_is_16x16Blk_noisy[i4_16x16_blk_index_in_ctb])
7228
0
                                {
7229
0
                                    U08 *pu1_inp =
7230
0
                                        ps_curr_layer->pu1_inp + (s_common_frm_prms.i4_ctb_x_off +
7231
0
                                                                  (s_common_frm_prms.i4_ctb_y_off *
7232
0
                                                                   ps_curr_layer->i4_inp_stride));
7233
7234
0
                                    U08 u1_cu_size = 16;
7235
0
                                    WORD32 i4_inp_buf_offset =
7236
0
                                        (((i4_16x16_blk_index_in_ctb % 4) * u1_cu_size) +
7237
0
                                         ((i4_16x16_blk_index_in_ctb / 4) *
7238
0
                                          (u1_cu_size * ps_curr_layer->i4_inp_stride)));
7239
7240
0
                                    U16 u2_sigma_arr_start_index_of_3rd_16x16_blk_in_32x32_blk = 64;
7241
0
                                    U16 u2_sigma_arr_start_index_of_2nd_16x16_blk_in_32x32_blk = 4;
7242
0
                                    S32 i4_sigma_arr_offset =
7243
0
                                        (((i4_16x16_blk_index_in_ctb % 4) *
7244
0
                                          u2_sigma_arr_start_index_of_2nd_16x16_blk_in_32x32_blk) +
7245
0
                                         ((i4_16x16_blk_index_in_ctb / 4) *
7246
0
                                          u2_sigma_arr_start_index_of_3rd_16x16_blk_in_32x32_blk));
7247
7248
0
                                    hme_compute_sigmaX_and_sigmaXSquared(
7249
0
                                        pu1_inp + i4_inp_buf_offset,
7250
0
                                        ps_curr_layer->i4_inp_stride,
7251
0
                                        (ps_ctxt->au4_4x4_src_sigmaX + i4_sigma_arr_offset),
7252
0
                                        (ps_ctxt->au4_4x4_src_sigmaXSquared + i4_sigma_arr_offset),
7253
0
                                        4,
7254
0
                                        4,
7255
0
                                        16,
7256
0
                                        16,
7257
0
                                        1,
7258
0
                                        16);
7259
0
                                }
7260
0
                            }
7261
0
                        }
7262
0
                    }
7263
0
                }
7264
0
            }
7265
0
            else
7266
0
            {
7267
0
                memset(au1_is_16x16Blk_noisy, 0, sizeof(au1_is_16x16Blk_noisy));
7268
7269
0
                memset(au1_is_32x32Blk_noisy, 0, sizeof(au1_is_32x32Blk_noisy));
7270
7271
0
                memset(au1_is_64x64Blk_noisy, 0, sizeof(au1_is_64x64Blk_noisy));
7272
0
            }
7273
7274
0
            for(blk_id_in_ctb = 0; blk_id_in_ctb < num_blks_in_this_ctb; blk_id_in_ctb++)
7275
0
            {
7276
0
                S32 ref_ctr;
7277
0
                U08 au1_pred_dir_searched[2];
7278
0
                U08 u1_is_cu_noisy;
7279
0
                ULWORD64 au8_final_src_sigmaX[17], au8_final_src_sigmaXSquared[17];
7280
7281
0
                {
7282
0
                    blk_x = (i4_ctb_x << 2) +
7283
0
                            (ps_ctb_bound_attrs->as_blk_attrs[blk_id_in_ctb].u1_blk_x);
7284
0
                    blk_y = (i4_ctb_y << 2) +
7285
0
                            (ps_ctb_bound_attrs->as_blk_attrs[blk_id_in_ctb].u1_blk_y);
7286
7287
0
                    blk_id_in_full_ctb =
7288
0
                        ps_ctb_bound_attrs->as_blk_attrs[blk_id_in_ctb].u1_blk_id_in_full_ctb;
7289
0
                    blk_8x8_mask = ps_ctb_bound_attrs->as_blk_attrs[blk_id_in_ctb].u1_blk_8x8_mask;
7290
0
                    ai4_blk_8x8_mask[blk_id_in_full_ctb] = blk_8x8_mask;
7291
0
                    s_search_prms_blk.i4_cu_x_off = (blk_x << blk_size_shift) - (i4_ctb_x << 6);
7292
0
                    s_search_prms_blk.i4_cu_y_off = (blk_y << blk_size_shift) - (i4_ctb_y << 6);
7293
0
                }
7294
7295
                /* get the current input blk point */
7296
0
                pos_x = blk_x << blk_size_shift;
7297
0
                pos_y = blk_y << blk_size_shift;
7298
0
                pu1_inp = ps_curr_layer->pu1_inp + pos_x + (pos_y * i4_inp_stride);
7299
7300
                /*********************************************************************/
7301
                /* For every blk in the picture, the search range needs to be derived*/
7302
                /* Any blk can have any mv, but practical search constraints are     */
7303
                /* imposed by the picture boundary and amt of padding.               */
7304
                /*********************************************************************/
7305
                /* MV limit is different based on ref. PIC */
7306
0
                for(ref_ctr = 0; ref_ctr < num_act_ref_pics; ref_ctr++)
7307
0
                {
7308
0
                    if(!s_search_prms_blk.i4_use_rec)
7309
0
                    {
7310
0
                        hme_derive_search_range(
7311
0
                            &as_range_prms_inp[ref_ctr],
7312
0
                            &s_pic_limit_inp,
7313
0
                            &as_mv_limit[ref_ctr],
7314
0
                            pos_x,
7315
0
                            pos_y,
7316
0
                            blk_wd,
7317
0
                            blk_ht);
7318
0
                    }
7319
0
                    else
7320
0
                    {
7321
0
                        hme_derive_search_range(
7322
0
                            &as_range_prms_rec[ref_ctr],
7323
0
                            &s_pic_limit_rec,
7324
0
                            &as_mv_limit[ref_ctr],
7325
0
                            pos_x,
7326
0
                            pos_y,
7327
0
                            blk_wd,
7328
0
                            blk_ht);
7329
0
                    }
7330
0
                }
7331
0
                s_search_prms_blk.i4_x_off = blk_x << blk_size_shift;
7332
0
                s_search_prms_blk.i4_y_off = blk_y << blk_size_shift;
7333
                /* Select search results from a suitable search result in the context */
7334
0
                {
7335
0
                    ps_search_results = &ps_ctxt->as_search_results_16x16[blk_id_in_full_ctb];
7336
7337
0
                    if(ps_refine_prms->u1_use_lambda_derived_from_min_8x8_act_in_ctb)
7338
0
                    {
7339
0
                        S32 i;
7340
7341
0
                        for(i = 0; i < 2; i++)
7342
0
                        {
7343
0
                            ps_search_results->as_pred_ctxt[i].lambda = lambda_recon;
7344
0
                        }
7345
0
                    }
7346
0
                }
7347
7348
0
                u1_is_cu_noisy = au1_is_16x16Blk_noisy
7349
0
                    [(s_search_prms_blk.i4_cu_x_off >> 4) + (s_search_prms_blk.i4_cu_y_off >> 2)];
7350
7351
0
                s_subpel_prms.u1_is_cu_noisy = u1_is_cu_noisy;
7352
7353
0
#if ME_LAMBDA_DISCOUNT_WHEN_NOISY
7354
0
                if(u1_is_cu_noisy)
7355
0
                {
7356
0
                    S32 j;
7357
0
                    S32 lambda;
7358
7359
0
                    lambda = lambda_recon;
7360
0
                    lambda = ((float)lambda * (100.0f - ME_LAMBDA_DISCOUNT_WHEN_NOISY) / 100.0f);
7361
7362
0
                    for(j = 0; j < 2; j++)
7363
0
                    {
7364
0
                        ps_search_results->as_pred_ctxt[j].lambda = lambda;
7365
0
                    }
7366
0
                }
7367
0
                else
7368
0
                {
7369
0
                    S32 j;
7370
0
                    S32 lambda;
7371
7372
0
                    lambda = lambda_recon;
7373
7374
0
                    for(j = 0; j < 2; j++)
7375
0
                    {
7376
0
                        ps_search_results->as_pred_ctxt[j].lambda = lambda;
7377
0
                    }
7378
0
                }
7379
0
#endif
7380
7381
0
                s_search_prms_blk.ps_search_results = ps_search_results;
7382
7383
0
                s_search_prms_blk.i4_part_mask = hme_part_mask_populator(
7384
0
                    pu1_inp,
7385
0
                    i4_inp_stride,
7386
0
                    ps_refine_prms->limit_active_partitions,
7387
0
                    ps_ctxt->ps_hme_frm_prms->bidir_enabled,
7388
0
                    ps_ctxt->u1_is_curFrame_a_refFrame,
7389
0
                    blk_8x8_mask,
7390
0
                    e_me_quality_presets);
7391
7392
0
                if(ME_PRISTINE_QUALITY == e_me_quality_presets)
7393
0
                {
7394
0
                    ps_ctb_cluster_info->ai4_part_mask[blk_id_in_full_ctb] =
7395
0
                        s_search_prms_blk.i4_part_mask;
7396
0
                }
7397
7398
                /* RESET ALL SEARCH RESULTS FOR THE NEW BLK */
7399
0
                {
7400
                    /* Setting u1_num_active_refs to 2 */
7401
                    /* for the sole purpose of the */
7402
                    /* function called below */
7403
0
                    ps_search_results->u1_num_active_ref = (ps_refine_prms->bidir_enabled) ? 2 : 1;
7404
7405
0
                    hme_reset_search_results(
7406
0
                        ps_search_results, s_search_prms_blk.i4_part_mask, MV_RES_FPEL);
7407
7408
0
                    ps_search_results->u1_num_active_ref = i4_num_pred_dir;
7409
0
                }
7410
7411
0
                if(0 == blk_id_in_ctb)
7412
0
                {
7413
0
                    UWORD8 u1_ctr;
7414
0
                    for(u1_ctr = 0; u1_ctr < (ps_ctxt->s_frm_prms.u1_num_active_ref_l0 +
7415
0
                                              ps_ctxt->s_frm_prms.u1_num_active_ref_l1);
7416
0
                        u1_ctr++)
7417
0
                    {
7418
0
                        WORD32 i4_max_dep_ctb_y;
7419
0
                        WORD32 i4_max_dep_ctb_x;
7420
7421
                        /* Set max mv in ctb units */
7422
0
                        i4_max_mv_x_in_ctb =
7423
0
                            (ps_curr_layer->i2_max_mv_x + ((1 << ps_ctxt->log_ctb_size) - 1)) >>
7424
0
                            ps_ctxt->log_ctb_size;
7425
7426
0
                        i4_max_mv_y_in_ctb =
7427
0
                            (as_mv_limit[u1_ctr].i2_max_y + ((1 << ps_ctxt->log_ctb_size) - 1)) >>
7428
0
                            ps_ctxt->log_ctb_size;
7429
                        /********************************************************************/
7430
                        /* Set max ctb_x and ctb_y dependency on reference picture          */
7431
                        /* Note +1 is due to delayed deblock, SAO, subpel plan dependency   */
7432
                        /********************************************************************/
7433
0
                        i4_max_dep_ctb_x = CLIP3(
7434
0
                            (i4_ctb_x + i4_max_mv_x_in_ctb + 1),
7435
0
                            0,
7436
0
                            ps_frm_ctb_prms->i4_num_ctbs_horz - 1);
7437
0
                        i4_max_dep_ctb_y = CLIP3(
7438
0
                            (i4_ctb_y + i4_max_mv_y_in_ctb + 1),
7439
0
                            0,
7440
0
                            ps_frm_ctb_prms->i4_num_ctbs_vert - 1);
7441
7442
0
                        ihevce_dmgr_map_chk_sync(
7443
0
                            ps_curr_layer->ppv_dep_mngr_recon[u1_ctr],
7444
0
                            ps_ctxt->thrd_id,
7445
0
                            i4_ctb_x,
7446
0
                            i4_ctb_y,
7447
0
                            i4_max_mv_x_in_ctb,
7448
0
                            i4_max_mv_y_in_ctb);
7449
0
                    }
7450
0
                }
7451
7452
                /* Loop across different Ref IDx */
7453
0
                for(u1_pred_dir_ctr = 0; u1_pred_dir_ctr < i4_num_pred_dir; u1_pred_dir_ctr++)
7454
0
                {
7455
0
                    S32 resultid;
7456
0
                    S08 u1_default_ref_id;
7457
0
                    S32 i4_num_srch_cands = 0;
7458
0
                    S32 i4_num_refinement_iterations;
7459
0
                    S32 i4_refine_iter_ctr;
7460
7461
0
                    if((i4_num_pred_dir == 2) || (!ps_ctxt->s_frm_prms.bidir_enabled) ||
7462
0
                       (ps_ctxt->s_frm_prms.u1_num_active_ref_l1 == 0))
7463
0
                    {
7464
0
                        u1_pred_dir = u1_pred_dir_ctr;
7465
0
                    }
7466
0
                    else if(ps_ctxt->s_frm_prms.u1_num_active_ref_l0 == 0)
7467
0
                    {
7468
0
                        u1_pred_dir = 1;
7469
0
                    }
7470
7471
0
                    u1_default_ref_id = (u1_pred_dir == 0) ? ps_ctxt->ai1_past_list[0]
7472
0
                                                           : ps_ctxt->ai1_future_list[0];
7473
0
                    au1_pred_dir_searched[u1_pred_dir_ctr] = u1_pred_dir;
7474
7475
0
                    i4_num_srch_cands = 0;
7476
0
                    resultid = 0;
7477
7478
                    /* START OF NEW CTB MEANS FILL UP NEOGHBOURS IN 18x18 GRID */
7479
0
                    if(0 == blk_id_in_ctb)
7480
0
                    {
7481
                        /*****************************************************************/
7482
                        /* Initialize the mv grid with results of neighbours for the next*/
7483
                        /* ctb.                                                          */
7484
                        /*****************************************************************/
7485
0
                        hme_fill_ctb_neighbour_mvs(
7486
0
                            ps_curr_layer,
7487
0
                            blk_x,
7488
0
                            blk_y,
7489
0
                            aps_mv_grid[u1_pred_dir],
7490
0
                            u1_pred_dir_ctr,
7491
0
                            u1_default_ref_id,
7492
0
                            ps_ctxt->s_frm_prms.u1_num_active_ref_l0);
7493
0
                    }
7494
7495
0
                    s_search_prms_blk.i1_ref_idx = u1_pred_dir;
7496
7497
0
                    {
7498
0
                        if((blk_id_in_full_ctb % 4) == 0)
7499
0
                        {
7500
0
                            ps_ctxt->as_search_results_32x32[blk_id_in_full_ctb >> 2]
7501
0
                                .as_pred_ctxt[u1_pred_dir]
7502
0
                                .proj_used = (blk_id_in_full_ctb == 8) ? 0 : 1;
7503
0
                        }
7504
7505
0
                        if(blk_id_in_full_ctb == 0)
7506
0
                        {
7507
0
                            ps_ctxt->s_search_results_64x64.as_pred_ctxt[u1_pred_dir].proj_used = 1;
7508
0
                        }
7509
7510
0
                        ps_search_results->as_pred_ctxt[u1_pred_dir].proj_used =
7511
0
                            !gau1_encode_to_raster_y[blk_id_in_full_ctb];
7512
0
                    }
7513
7514
0
                    {
7515
0
                        S32 x = gau1_encode_to_raster_x[blk_id_in_full_ctb];
7516
0
                        S32 y = gau1_encode_to_raster_y[blk_id_in_full_ctb];
7517
0
                        U08 u1_is_blk_at_ctb_boundary = !y;
7518
7519
0
                        s_srch_cand_init_data.u1_is_left_available =
7520
0
                            !(left_ctb_in_diff_tile && !s_search_prms_blk.i4_cu_x_off);
7521
7522
0
                        if(u1_is_blk_at_ctb_boundary)
7523
0
                        {
7524
0
                            s_srch_cand_init_data.u1_is_topRight_available = 0;
7525
0
                            s_srch_cand_init_data.u1_is_topLeft_available = 0;
7526
0
                            s_srch_cand_init_data.u1_is_top_available = 0;
7527
0
                        }
7528
0
                        else
7529
0
                        {
7530
0
                            s_srch_cand_init_data.u1_is_topRight_available =
7531
0
                                gau1_cu_tr_valid[y][x] && ((pos_x + blk_wd) < i4_pic_wd);
7532
0
                            s_srch_cand_init_data.u1_is_top_available = 1;
7533
0
                            s_srch_cand_init_data.u1_is_topLeft_available =
7534
0
                                s_srch_cand_init_data.u1_is_left_available;
7535
0
                        }
7536
0
                    }
7537
7538
0
                    s_srch_cand_init_data.i1_default_ref_id = u1_default_ref_id;
7539
0
                    s_srch_cand_init_data.i1_alt_default_ref_id = ps_ctxt->ai1_past_list[1];
7540
0
                    s_srch_cand_init_data.i4_pos_x = pos_x;
7541
0
                    s_srch_cand_init_data.i4_pos_y = pos_y;
7542
0
                    s_srch_cand_init_data.u1_pred_dir = u1_pred_dir;
7543
0
                    s_srch_cand_init_data.u1_pred_dir_ctr = u1_pred_dir_ctr;
7544
0
                    s_srch_cand_init_data.u1_search_candidate_list_index =
7545
0
                        au1_search_candidate_list_index[u1_pred_dir];
7546
7547
0
                    i4_num_srch_cands = hme_populate_search_candidates(&s_srch_cand_init_data);
7548
7549
                    /* Note this block also clips the MV range for all candidates */
7550
0
                    {
7551
0
                        S08 i1_check_for_mult_refs;
7552
7553
0
                        i1_check_for_mult_refs = u1_pred_dir ? (ps_ctxt->num_ref_future > 1)
7554
0
                                                             : (ps_ctxt->num_ref_past > 1);
7555
7556
0
                        ps_me_optimised_function_list->pf_mv_clipper(
7557
0
                            &s_search_prms_blk,
7558
0
                            i4_num_srch_cands,
7559
0
                            i1_check_for_mult_refs,
7560
0
                            ps_refine_prms->i4_num_steps_fpel_refine,
7561
0
                            ps_refine_prms->i4_num_steps_hpel_refine,
7562
0
                            ps_refine_prms->i4_num_steps_qpel_refine);
7563
0
                    }
7564
7565
0
#if ENABLE_EXPLICIT_SEARCH_IN_P_IN_L0
7566
0
                    i4_num_refinement_iterations =
7567
0
                        ((!ps_ctxt->s_frm_prms.bidir_enabled) && (i4_num_act_ref_l0 > 1))
7568
0
                            ? ((e_me_quality_presets == ME_HIGH_QUALITY) ? 2 : i4_num_act_ref_l0)
7569
0
                            : 1;
7570
#else
7571
                    i4_num_refinement_iterations =
7572
                        ((!ps_ctxt->s_frm_prms.bidir_enabled) && (i4_num_act_ref_l0 > 1)) ? 2 : 1;
7573
#endif
7574
7575
#if ENABLE_EXPLICIT_SEARCH_IN_PQ
7576
                    if(e_me_quality_presets == ME_PRISTINE_QUALITY)
7577
                    {
7578
                        i4_num_refinement_iterations = (u1_pred_dir == 0) ? i4_num_act_ref_l0
7579
                                                                          : i4_num_act_ref_l1;
7580
                    }
7581
#endif
7582
7583
0
                    for(i4_refine_iter_ctr = 0; i4_refine_iter_ctr < i4_num_refinement_iterations;
7584
0
                        i4_refine_iter_ctr++)
7585
0
                    {
7586
0
                        S32 center_x;
7587
0
                        S32 center_y;
7588
0
                        S32 center_ref_idx;
7589
7590
0
                        S08 *pi1_pred_dir_to_ref_idx =
7591
0
                            (u1_pred_dir == 0) ? ps_ctxt->ai1_past_list : ps_ctxt->ai1_future_list;
7592
7593
0
                        {
7594
0
                            WORD32 i4_i;
7595
7596
0
                            for(i4_i = 0; i4_i < TOT_NUM_PARTS; i4_i++)
7597
0
                            {
7598
0
                                ps_fullpel_refine_ctxt->i2_tot_cost[0][i4_i] = MAX_SIGNED_16BIT_VAL;
7599
0
                                ps_fullpel_refine_ctxt->i2_mv_cost[0][i4_i] = MAX_SIGNED_16BIT_VAL;
7600
0
                                ps_fullpel_refine_ctxt->i2_stim_injected_cost[0][i4_i] =
7601
0
                                    MAX_SIGNED_16BIT_VAL;
7602
0
                                ps_fullpel_refine_ctxt->i2_mv_x[0][i4_i] = 0;
7603
0
                                ps_fullpel_refine_ctxt->i2_mv_y[0][i4_i] = 0;
7604
0
                                ps_fullpel_refine_ctxt->i2_ref_idx[0][i4_i] = u1_default_ref_id;
7605
7606
0
                                if(ps_refine_prms->i4_num_results_per_part == 2)
7607
0
                                {
7608
0
                                    ps_fullpel_refine_ctxt->i2_tot_cost[1][i4_i] =
7609
0
                                        MAX_SIGNED_16BIT_VAL;
7610
0
                                    ps_fullpel_refine_ctxt->i2_mv_cost[1][i4_i] =
7611
0
                                        MAX_SIGNED_16BIT_VAL;
7612
0
                                    ps_fullpel_refine_ctxt->i2_stim_injected_cost[1][i4_i] =
7613
0
                                        MAX_SIGNED_16BIT_VAL;
7614
0
                                    ps_fullpel_refine_ctxt->i2_mv_x[1][i4_i] = 0;
7615
0
                                    ps_fullpel_refine_ctxt->i2_mv_y[1][i4_i] = 0;
7616
0
                                    ps_fullpel_refine_ctxt->i2_ref_idx[1][i4_i] = u1_default_ref_id;
7617
0
                                }
7618
0
                            }
7619
7620
0
                            s_search_prms_blk.ps_fullpel_refine_ctxt = ps_fullpel_refine_ctxt;
7621
0
                            s_subpel_prms.ps_subpel_refine_ctxt = ps_fullpel_refine_ctxt;
7622
0
                        }
7623
7624
0
                        {
7625
0
                            search_node_t *ps_coloc_node;
7626
7627
0
                            S32 i = 0;
7628
7629
0
                            if(i4_num_refinement_iterations > 1)
7630
0
                            {
7631
0
                                for(i = 0; i < ai4_num_coloc_cands[u1_pred_dir]; i++)
7632
0
                                {
7633
0
                                    ps_coloc_node =
7634
0
                                        s_search_prms_blk.ps_search_candts[ai4_id_coloc[i]]
7635
0
                                            .ps_search_node;
7636
7637
0
                                    if(pi1_pred_dir_to_ref_idx[i4_refine_iter_ctr] ==
7638
0
                                       ps_coloc_node->i1_ref_idx)
7639
0
                                    {
7640
0
                                        break;
7641
0
                                    }
7642
0
                                }
7643
7644
0
                                if(i == ai4_num_coloc_cands[u1_pred_dir])
7645
0
                                {
7646
0
                                    i = 0;
7647
0
                                }
7648
0
                            }
7649
0
                            else
7650
0
                            {
7651
0
                                ps_coloc_node = s_search_prms_blk.ps_search_candts[ai4_id_coloc[0]]
7652
0
                                                    .ps_search_node;
7653
0
                            }
7654
7655
0
                            hme_set_mvp_node(
7656
0
                                ps_search_results,
7657
0
                                ps_coloc_node,
7658
0
                                u1_pred_dir,
7659
0
                                (i4_num_refinement_iterations > 1)
7660
0
                                    ? pi1_pred_dir_to_ref_idx[i4_refine_iter_ctr]
7661
0
                                    : u1_default_ref_id);
7662
7663
0
                            center_x = ps_coloc_node->ps_mv->i2_mvx;
7664
0
                            center_y = ps_coloc_node->ps_mv->i2_mvy;
7665
0
                            center_ref_idx = ps_coloc_node->i1_ref_idx;
7666
0
                        }
7667
7668
                        /* Full-Pel search */
7669
0
                        {
7670
0
                            S32 num_unique_nodes;
7671
7672
0
                            memset(au4_unique_node_map, 0, sizeof(au4_unique_node_map));
7673
7674
0
                            num_unique_nodes = hme_remove_duplicate_fpel_search_candidates(
7675
0
                                as_unique_search_nodes,
7676
0
                                s_search_prms_blk.ps_search_candts,
7677
0
                                au4_unique_node_map,
7678
0
                                pi1_pred_dir_to_ref_idx,
7679
0
                                i4_num_srch_cands,
7680
0
                                s_search_prms_blk.i4_num_init_candts,
7681
0
                                i4_refine_iter_ctr,
7682
0
                                i4_num_refinement_iterations,
7683
0
                                i4_num_act_ref_l0,
7684
0
                                center_ref_idx,
7685
0
                                center_x,
7686
0
                                center_y,
7687
0
                                ps_ctxt->s_frm_prms.bidir_enabled,
7688
0
                                e_me_quality_presets);
7689
7690
                            /*************************************************************************/
7691
                            /* This array stores the ids of the partitions whose                     */
7692
                            /* SADs are updated. Since the partitions whose SADs are updated may not */
7693
                            /* be in contiguous order, we supply another level of indirection.       */
7694
                            /*************************************************************************/
7695
0
                            ps_fullpel_refine_ctxt->i4_num_valid_parts = hme_create_valid_part_ids(
7696
0
                                s_search_prms_blk.i4_part_mask,
7697
0
                                &ps_fullpel_refine_ctxt->ai4_part_id[0]);
7698
7699
0
                            if(!i4_refine_iter_ctr && !u1_pred_dir_ctr && u1_is_cu_noisy)
7700
0
                            {
7701
0
                                S32 i;
7702
                                /*i4_sigma_array_offset : takes care of pointing to the appropriate 4x4 block's sigmaX and sigmaX-squared value in a CTB out of 256 values*/
7703
0
                                S32 i4_sigma_array_offset = (s_search_prms_blk.i4_cu_x_off / 4) +
7704
0
                                                            (s_search_prms_blk.i4_cu_y_off * 4);
7705
7706
0
                                for(i = 0; i < ps_fullpel_refine_ctxt->i4_num_valid_parts; i++)
7707
0
                                {
7708
0
                                    S32 i4_part_id = ps_fullpel_refine_ctxt->ai4_part_id[i];
7709
7710
0
                                    hme_compute_final_sigma_of_pu_from_base_blocks(
7711
0
                                        ps_ctxt->au4_4x4_src_sigmaX + i4_sigma_array_offset,
7712
0
                                        ps_ctxt->au4_4x4_src_sigmaXSquared + i4_sigma_array_offset,
7713
0
                                        au8_final_src_sigmaX,
7714
0
                                        au8_final_src_sigmaXSquared,
7715
0
                                        16,
7716
0
                                        4,
7717
0
                                        i4_part_id,
7718
0
                                        16);
7719
0
                                }
7720
7721
0
                                s_common_frm_prms.pu8_part_src_sigmaX = au8_final_src_sigmaX;
7722
0
                                s_common_frm_prms.pu8_part_src_sigmaXSquared =
7723
0
                                    au8_final_src_sigmaXSquared;
7724
7725
0
                                s_search_prms_blk.pu8_part_src_sigmaX = au8_final_src_sigmaX;
7726
0
                                s_search_prms_blk.pu8_part_src_sigmaXSquared =
7727
0
                                    au8_final_src_sigmaXSquared;
7728
0
                            }
7729
7730
0
                            if(0 == num_unique_nodes)
7731
0
                            {
7732
0
                                continue;
7733
0
                            }
7734
7735
0
                            if(num_unique_nodes >= 2)
7736
0
                            {
7737
0
                                s_search_prms_blk.ps_search_nodes = &as_unique_search_nodes[0];
7738
0
                                s_search_prms_blk.i4_num_search_nodes = num_unique_nodes;
7739
0
                                if(ps_ctxt->i4_pic_type != IV_P_FRAME)
7740
0
                                {
7741
0
                                    if(ps_ctxt->i4_temporal_layer == 1)
7742
0
                                    {
7743
0
                                        hme_fullpel_cand_sifter(
7744
0
                                            &s_search_prms_blk,
7745
0
                                            ps_curr_layer,
7746
0
                                            &ps_ctxt->s_wt_pred,
7747
0
                                            ALPHA_FOR_NOISE_TERM_IN_ME,
7748
0
                                            u1_is_cu_noisy,
7749
0
                                            ps_me_optimised_function_list);
7750
0
                                    }
7751
0
                                    else
7752
0
                                    {
7753
0
                                        hme_fullpel_cand_sifter(
7754
0
                                            &s_search_prms_blk,
7755
0
                                            ps_curr_layer,
7756
0
                                            &ps_ctxt->s_wt_pred,
7757
0
                                            ALPHA_FOR_NOISE_TERM_IN_ME,
7758
0
                                            u1_is_cu_noisy,
7759
0
                                            ps_me_optimised_function_list);
7760
0
                                    }
7761
0
                                }
7762
0
                                else
7763
0
                                {
7764
0
                                    hme_fullpel_cand_sifter(
7765
0
                                        &s_search_prms_blk,
7766
0
                                        ps_curr_layer,
7767
0
                                        &ps_ctxt->s_wt_pred,
7768
0
                                        ALPHA_FOR_NOISE_TERM_IN_ME_P,
7769
0
                                        u1_is_cu_noisy,
7770
0
                                        ps_me_optimised_function_list);
7771
0
                                }
7772
0
                            }
7773
7774
0
                            s_search_prms_blk.ps_search_nodes = &as_unique_search_nodes[0];
7775
7776
0
                            hme_fullpel_refine(
7777
0
                                ps_refine_prms,
7778
0
                                &s_search_prms_blk,
7779
0
                                ps_curr_layer,
7780
0
                                &ps_ctxt->s_wt_pred,
7781
0
                                au4_unique_node_map,
7782
0
                                num_unique_nodes,
7783
0
                                blk_8x8_mask,
7784
0
                                center_x,
7785
0
                                center_y,
7786
0
                                center_ref_idx,
7787
0
                                e_me_quality_presets,
7788
0
                                ps_me_optimised_function_list);
7789
0
                        }
7790
7791
                        /* Sub-Pel search */
7792
0
                        {
7793
0
                            hme_reset_wkg_mem(&ps_ctxt->s_buf_mgr);
7794
7795
0
                            s_subpel_prms.pu1_wkg_mem = (U08 *)hme_get_wkg_mem(
7796
0
                                &ps_ctxt->s_buf_mgr,
7797
0
                                INTERP_INTERMED_BUF_SIZE + INTERP_OUT_BUF_SIZE);
7798
                            /* MV limit is different based on ref. PIC */
7799
0
                            for(ref_ctr = 0; ref_ctr < num_act_ref_pics; ref_ctr++)
7800
0
                            {
7801
0
                                SCALE_RANGE_PRMS(
7802
0
                                    as_range_prms_hpel[ref_ctr], as_range_prms_rec[ref_ctr], 1);
7803
0
                                SCALE_RANGE_PRMS(
7804
0
                                    as_range_prms_qpel[ref_ctr], as_range_prms_rec[ref_ctr], 2);
7805
0
                            }
7806
0
                            s_subpel_prms.i4_ctb_x_off = i4_ctb_x << 6;
7807
0
                            s_subpel_prms.i4_ctb_y_off = i4_ctb_y << 6;
7808
7809
0
                            hme_subpel_refine_cu_hs(
7810
0
                                &s_subpel_prms,
7811
0
                                ps_curr_layer,
7812
0
                                ps_search_results,
7813
0
                                u1_pred_dir,
7814
0
                                &ps_ctxt->s_wt_pred,
7815
0
                                blk_8x8_mask,
7816
0
                                ps_ctxt->ps_func_selector,
7817
0
                                ps_cmn_utils_optimised_function_list,
7818
0
                                ps_me_optimised_function_list);
7819
0
                        }
7820
0
                    }
7821
0
                }
7822
                /* Populate the new PU struct with the results post subpel refinement*/
7823
0
                {
7824
0
                    inter_cu_results_t *ps_cu_results;
7825
0
                    WORD32 best_inter_cost, intra_cost, posx, posy;
7826
7827
0
                    UWORD8 intra_8x8_enabled = 0;
7828
7829
                    /*  cost of 16x16 cu parent  */
7830
0
                    WORD32 parent_cost = MAX_32BIT_VAL;
7831
7832
                    /*  cost of 8x8 cu children  */
7833
                    /*********************************************************************/
7834
                    /* Assuming parent is not split, then we signal 1 bit for this parent*/
7835
                    /* CU. If split, then 1 bit for parent CU + 4 bits for each child CU */
7836
                    /* So, 4*lambda is extra for children cost.                          */
7837
                    /*********************************************************************/
7838
0
                    WORD32 child_cost = 0;
7839
7840
0
                    ps_cu_results = ps_search_results->ps_cu_results;
7841
7842
                    /* Initialize the pu_results pointers to the first struct in the stack array */
7843
0
                    ps_pu_results = as_inter_pu_results;
7844
7845
0
                    hme_reset_wkg_mem(&ps_ctxt->s_buf_mgr);
7846
7847
0
                    hme_populate_pus(
7848
0
                        ps_thrd_ctxt,
7849
0
                        ps_ctxt,
7850
0
                        &s_subpel_prms,
7851
0
                        ps_search_results,
7852
0
                        ps_cu_results,
7853
0
                        ps_pu_results,
7854
0
                        &(as_pu_results[0][0][0]),
7855
0
                        &s_common_frm_prms,
7856
0
                        &ps_ctxt->s_wt_pred,
7857
0
                        ps_curr_layer,
7858
0
                        au1_pred_dir_searched,
7859
0
                        i4_num_pred_dir);
7860
7861
0
                    ps_cu_results->i4_inp_offset =
7862
0
                        (ps_cu_results->u1_x_off) + (ps_cu_results->u1_y_off * 64);
7863
7864
0
                    hme_decide_part_types(
7865
0
                        ps_cu_results,
7866
0
                        ps_pu_results,
7867
0
                        &s_common_frm_prms,
7868
0
                        ps_ctxt,
7869
0
                        ps_cmn_utils_optimised_function_list,
7870
0
                        ps_me_optimised_function_list
7871
7872
0
                    );
7873
7874
                    /* UPDATE the MIN and MAX MVs for Dynamical Search Range for each ref. pic. */
7875
                    /* Only for P pic. For P, both are 0, I&B has them mut. exclusive */
7876
0
                    if(ps_ctxt->s_frm_prms.is_i_pic == ps_ctxt->s_frm_prms.bidir_enabled)
7877
0
                    {
7878
0
                        WORD32 res_ctr;
7879
7880
0
                        for(res_ctr = 0; res_ctr < ps_cu_results->u1_num_best_results; res_ctr++)
7881
0
                        {
7882
0
                            WORD32 num_part = 2, part_ctr;
7883
0
                            part_type_results_t *ps_best_results =
7884
0
                                &ps_cu_results->ps_best_results[res_ctr];
7885
7886
0
                            if(PRT_2Nx2N == ps_best_results->u1_part_type)
7887
0
                                num_part = 1;
7888
7889
0
                            for(part_ctr = 0; part_ctr < num_part; part_ctr++)
7890
0
                            {
7891
0
                                pu_result_t *ps_pu_results =
7892
0
                                    &ps_best_results->as_pu_results[part_ctr];
7893
7894
0
                                ASSERT(PRED_L0 == ps_pu_results->pu.b2_pred_mode);
7895
7896
0
                                hme_update_dynamic_search_params(
7897
0
                                    &ps_ctxt->as_l0_dyn_range_prms[i4_idx_dvsr_p]
7898
0
                                         .as_dyn_range_prms[ps_pu_results->pu.mv.i1_l0_ref_idx],
7899
0
                                    ps_pu_results->pu.mv.s_l0_mv.i2_mvy);
7900
7901
                                /* Sanity Check */
7902
0
                                ASSERT(
7903
0
                                    ps_pu_results->pu.mv.i1_l0_ref_idx <
7904
0
                                    ps_ctxt->s_frm_prms.u1_num_active_ref_l0);
7905
7906
                                /* No L1 for P Pic. */
7907
0
                                ASSERT(PRED_L1 != ps_pu_results->pu.b2_pred_mode);
7908
                                /* No BI for P Pic. */
7909
0
                                ASSERT(PRED_BI != ps_pu_results->pu.b2_pred_mode);
7910
0
                            }
7911
0
                        }
7912
0
                    }
7913
7914
                    /*****************************************************************/
7915
                    /* INSERT INTRA RESULTS AT 16x16 LEVEL.                          */
7916
                    /*****************************************************************/
7917
7918
0
#if DISABLE_INTRA_IN_BPICS
7919
0
                    if(1 != ((ME_XTREME_SPEED_25 == e_me_quality_presets) &&
7920
0
                             (ps_ctxt->s_frm_prms.i4_temporal_layer_id > TEMPORAL_LAYER_DISABLE)))
7921
0
#endif
7922
0
                    {
7923
0
                        if(!(DISABLE_INTRA_WHEN_NOISY && s_common_frm_prms.u1_is_cu_noisy))
7924
0
                        {
7925
0
                            hme_insert_intra_nodes_post_bipred(
7926
0
                                ps_cu_results, ps_cur_ipe_ctb, ps_ctxt->frm_qstep);
7927
0
                        }
7928
0
                    }
7929
7930
0
#if DISABLE_INTRA_IN_BPICS
7931
0
                    if((ME_XTREME_SPEED_25 == e_me_quality_presets) &&
7932
0
                       (ps_ctxt->s_frm_prms.i4_temporal_layer_id > TEMPORAL_LAYER_DISABLE))
7933
0
                    {
7934
0
                        intra_8x8_enabled = 0;
7935
0
                    }
7936
0
                    else
7937
0
#endif
7938
0
                    {
7939
                        /*TRAQO intra flag updation*/
7940
0
                        if(1 == ps_cu_results->ps_best_results->as_pu_results[0].pu.b1_intra_flag)
7941
0
                        {
7942
0
                            best_inter_cost =
7943
0
                                ps_cu_results->ps_best_results->as_pu_results[1].i4_tot_cost;
7944
0
                            intra_cost =
7945
0
                                ps_cu_results->ps_best_results->as_pu_results[0].i4_tot_cost;
7946
                            /*@16x16 level*/
7947
0
                            posx = (ps_cu_results->ps_best_results->as_pu_results[1].pu.b4_pos_x
7948
0
                                    << 2) >>
7949
0
                                   4;
7950
0
                            posy = (ps_cu_results->ps_best_results->as_pu_results[1].pu.b4_pos_y
7951
0
                                    << 2) >>
7952
0
                                   4;
7953
0
                        }
7954
0
                        else
7955
0
                        {
7956
0
                            best_inter_cost =
7957
0
                                ps_cu_results->ps_best_results->as_pu_results[0].i4_tot_cost;
7958
0
                            posx = (ps_cu_results->ps_best_results->as_pu_results[0].pu.b4_pos_x
7959
0
                                    << 2) >>
7960
0
                                   3;
7961
0
                            posy = (ps_cu_results->ps_best_results->as_pu_results[0].pu.b4_pos_y
7962
0
                                    << 2) >>
7963
0
                                   3;
7964
0
                        }
7965
7966
                        /* Disable intra16/32/64 flags based on split flags recommended by IPE */
7967
0
                        if(ps_cur_ipe_ctb->u1_split_flag)
7968
0
                        {
7969
                            /* Id of the 32x32 block, 16x16 block in a CTB */
7970
0
                            WORD32 i4_32x32_id =
7971
0
                                (ps_cu_results->u1_y_off >> 5) * 2 + (ps_cu_results->u1_x_off >> 5);
7972
0
                            WORD32 i4_16x16_id = ((ps_cu_results->u1_y_off >> 4) & 0x1) * 2 +
7973
0
                                                 ((ps_cu_results->u1_x_off >> 4) & 0x1);
7974
7975
0
                            if(ps_cur_ipe_ctb->as_intra32_analyse[i4_32x32_id].b1_split_flag)
7976
0
                            {
7977
0
                                if(ps_cur_ipe_ctb->as_intra32_analyse[i4_32x32_id]
7978
0
                                       .as_intra16_analyse[i4_16x16_id]
7979
0
                                       .b1_split_flag)
7980
0
                                {
7981
0
                                    intra_8x8_enabled =
7982
0
                                        ps_cur_ipe_ctb->as_intra32_analyse[i4_32x32_id]
7983
0
                                            .as_intra16_analyse[i4_16x16_id]
7984
0
                                            .as_intra8_analyse[0]
7985
0
                                            .b1_valid_cu;
7986
0
                                    intra_8x8_enabled &=
7987
0
                                        ps_cur_ipe_ctb->as_intra32_analyse[i4_32x32_id]
7988
0
                                            .as_intra16_analyse[i4_16x16_id]
7989
0
                                            .as_intra8_analyse[1]
7990
0
                                            .b1_valid_cu;
7991
0
                                    intra_8x8_enabled &=
7992
0
                                        ps_cur_ipe_ctb->as_intra32_analyse[i4_32x32_id]
7993
0
                                            .as_intra16_analyse[i4_16x16_id]
7994
0
                                            .as_intra8_analyse[2]
7995
0
                                            .b1_valid_cu;
7996
0
                                    intra_8x8_enabled &=
7997
0
                                        ps_cur_ipe_ctb->as_intra32_analyse[i4_32x32_id]
7998
0
                                            .as_intra16_analyse[i4_16x16_id]
7999
0
                                            .as_intra8_analyse[3]
8000
0
                                            .b1_valid_cu;
8001
0
                                }
8002
0
                            }
8003
0
                        }
8004
0
                    }
8005
8006
0
                    if(blk_8x8_mask == 0xf)
8007
0
                    {
8008
0
                        parent_cost =
8009
0
                            ps_search_results->ps_cu_results->ps_best_results[0].i4_tot_cost;
8010
0
                        ps_search_results->u1_split_flag = 0;
8011
0
                    }
8012
0
                    else
8013
0
                    {
8014
0
                        ps_search_results->u1_split_flag = 1;
8015
0
                    }
8016
8017
0
                    ps_cu_results = &ps_ctxt->as_cu8x8_results[blk_id_in_full_ctb << 2];
8018
8019
0
                    if(s_common_frm_prms.u1_is_cu_noisy)
8020
0
                    {
8021
0
                        intra_8x8_enabled = 0;
8022
0
                    }
8023
8024
                    /* Evalaute 8x8 if NxN part id is enabled */
8025
0
                    if((ps_search_results->i4_part_mask & ENABLE_NxN) || intra_8x8_enabled)
8026
0
                    {
8027
                        /* Populates the PU's for the 4 8x8's in one call */
8028
0
                        hme_populate_pus_8x8_cu(
8029
0
                            ps_thrd_ctxt,
8030
0
                            ps_ctxt,
8031
0
                            &s_subpel_prms,
8032
0
                            ps_search_results,
8033
0
                            ps_cu_results,
8034
0
                            ps_pu_results,
8035
0
                            &(as_pu_results[0][0][0]),
8036
0
                            &s_common_frm_prms,
8037
0
                            au1_pred_dir_searched,
8038
0
                            i4_num_pred_dir,
8039
0
                            blk_8x8_mask);
8040
8041
                        /* Re-initialize the pu_results pointers to the first struct in the stack array */
8042
0
                        ps_pu_results = as_inter_pu_results;
8043
8044
0
                        for(i = 0; i < 4; i++)
8045
0
                        {
8046
0
                            if((blk_8x8_mask & (1 << i)))
8047
0
                            {
8048
0
                                if(ps_cu_results->i4_part_mask)
8049
0
                                {
8050
0
                                    hme_decide_part_types(
8051
0
                                        ps_cu_results,
8052
0
                                        ps_pu_results,
8053
0
                                        &s_common_frm_prms,
8054
0
                                        ps_ctxt,
8055
0
                                        ps_cmn_utils_optimised_function_list,
8056
0
                                        ps_me_optimised_function_list
8057
8058
0
                                    );
8059
0
                                }
8060
                                /*****************************************************************/
8061
                                /* INSERT INTRA RESULTS AT 8x8 LEVEL.                          */
8062
                                /*****************************************************************/
8063
0
#if DISABLE_INTRA_IN_BPICS
8064
0
                                if(1 != ((ME_XTREME_SPEED_25 == e_me_quality_presets) &&
8065
0
                                         (ps_ctxt->s_frm_prms.i4_temporal_layer_id >
8066
0
                                          TEMPORAL_LAYER_DISABLE)))
8067
0
#endif
8068
0
                                {
8069
0
                                    if(!(DISABLE_INTRA_WHEN_NOISY &&
8070
0
                                         s_common_frm_prms.u1_is_cu_noisy))
8071
0
                                    {
8072
0
                                        hme_insert_intra_nodes_post_bipred(
8073
0
                                            ps_cu_results, ps_cur_ipe_ctb, ps_ctxt->frm_qstep);
8074
0
                                    }
8075
0
                                }
8076
8077
0
                                child_cost += ps_cu_results->ps_best_results[0].i4_tot_cost;
8078
0
                            }
8079
8080
0
                            ps_cu_results++;
8081
0
                            ps_pu_results++;
8082
0
                        }
8083
8084
                        /* Compare 16x16 vs 8x8 cost */
8085
0
                        if(child_cost < parent_cost)
8086
0
                        {
8087
0
                            ps_search_results->best_cu_cost = child_cost;
8088
0
                            ps_search_results->u1_split_flag = 1;
8089
0
                        }
8090
0
                    }
8091
0
                }
8092
8093
0
                hme_update_mv_bank_encode(
8094
0
                    ps_search_results,
8095
0
                    ps_curr_layer->ps_layer_mvbank,
8096
0
                    blk_x,
8097
0
                    blk_y,
8098
0
                    &s_mv_update_prms,
8099
0
                    au1_pred_dir_searched,
8100
0
                    i4_num_act_ref_l0);
8101
8102
                /*********************************************************************/
8103
                /* Map the best results to an MV Grid. This is a 18x18 grid that is  */
8104
                /* useful for doing things like predictor for cost calculation or    */
8105
                /* also for merge calculations if need be.                           */
8106
                /*********************************************************************/
8107
0
                hme_map_mvs_to_grid(
8108
0
                    &aps_mv_grid[0], ps_search_results, au1_pred_dir_searched, i4_num_pred_dir);
8109
0
            }
8110
8111
            /* Set the CU tree nodes appropriately */
8112
0
            if(e_me_quality_presets != ME_PRISTINE_QUALITY)
8113
0
            {
8114
0
                WORD32 i, j;
8115
8116
0
                for(i = 0; i < 16; i++)
8117
0
                {
8118
0
                    cur_ctb_cu_tree_t *ps_tree_node =
8119
0
                        ps_ctxt->ps_cu_tree_curr_row + (i4_ctb_x * MAX_NUM_NODES_CU_TREE);
8120
0
                    search_results_t *ps_results = &ps_ctxt->as_search_results_16x16[i];
8121
8122
0
                    switch(i >> 2)
8123
0
                    {
8124
0
                    case 0:
8125
0
                    {
8126
0
                        ps_tree_node = ps_tree_node->ps_child_node_tl;
8127
8128
0
                        break;
8129
0
                    }
8130
0
                    case 1:
8131
0
                    {
8132
0
                        ps_tree_node = ps_tree_node->ps_child_node_tr;
8133
8134
0
                        break;
8135
0
                    }
8136
0
                    case 2:
8137
0
                    {
8138
0
                        ps_tree_node = ps_tree_node->ps_child_node_bl;
8139
8140
0
                        break;
8141
0
                    }
8142
0
                    case 3:
8143
0
                    {
8144
0
                        ps_tree_node = ps_tree_node->ps_child_node_br;
8145
8146
0
                        break;
8147
0
                    }
8148
0
                    }
8149
8150
0
                    switch(i % 4)
8151
0
                    {
8152
0
                    case 0:
8153
0
                    {
8154
0
                        ps_tree_node = ps_tree_node->ps_child_node_tl;
8155
8156
0
                        break;
8157
0
                    }
8158
0
                    case 1:
8159
0
                    {
8160
0
                        ps_tree_node = ps_tree_node->ps_child_node_tr;
8161
8162
0
                        break;
8163
0
                    }
8164
0
                    case 2:
8165
0
                    {
8166
0
                        ps_tree_node = ps_tree_node->ps_child_node_bl;
8167
8168
0
                        break;
8169
0
                    }
8170
0
                    case 3:
8171
0
                    {
8172
0
                        ps_tree_node = ps_tree_node->ps_child_node_br;
8173
8174
0
                        break;
8175
0
                    }
8176
0
                    }
8177
8178
0
                    if(ai4_blk_8x8_mask[i] == 15)
8179
0
                    {
8180
0
                        if(!ps_results->u1_split_flag)
8181
0
                        {
8182
0
                            ps_tree_node->is_node_valid = 1;
8183
0
                            NULLIFY_THE_CHILDREN_NODES(ps_tree_node);
8184
0
                        }
8185
0
                        else
8186
0
                        {
8187
0
                            ps_tree_node->is_node_valid = 0;
8188
0
                            ENABLE_THE_CHILDREN_NODES(ps_tree_node);
8189
0
                        }
8190
0
                    }
8191
0
                    else
8192
0
                    {
8193
0
                        cur_ctb_cu_tree_t *ps_tree_child;
8194
8195
0
                        ps_tree_node->is_node_valid = 0;
8196
8197
0
                        for(j = 0; j < 4; j++)
8198
0
                        {
8199
0
                            switch(j)
8200
0
                            {
8201
0
                            case 0:
8202
0
                            {
8203
0
                                ps_tree_child = ps_tree_node->ps_child_node_tl;
8204
8205
0
                                break;
8206
0
                            }
8207
0
                            case 1:
8208
0
                            {
8209
0
                                ps_tree_child = ps_tree_node->ps_child_node_tr;
8210
8211
0
                                break;
8212
0
                            }
8213
0
                            case 2:
8214
0
                            {
8215
0
                                ps_tree_child = ps_tree_node->ps_child_node_bl;
8216
8217
0
                                break;
8218
0
                            }
8219
0
                            case 3:
8220
0
                            {
8221
0
                                ps_tree_child = ps_tree_node->ps_child_node_br;
8222
8223
0
                                break;
8224
0
                            }
8225
0
                            }
8226
8227
0
                            ps_tree_child->is_node_valid = !!(ai4_blk_8x8_mask[i] & (1 << j));
8228
0
                        }
8229
0
                    }
8230
0
                }
8231
0
            }
8232
8233
0
            if(ME_PRISTINE_QUALITY == e_me_quality_presets)
8234
0
            {
8235
0
                cur_ctb_cu_tree_t *ps_tree = ps_ctb_cluster_info->ps_cu_tree_root;
8236
8237
0
                hme_analyse_mv_clustering(
8238
0
                    ps_ctxt->as_search_results_16x16,
8239
0
                    ps_ctxt->as_cu16x16_results,
8240
0
                    ps_ctxt->as_cu8x8_results,
8241
0
                    ps_ctxt->ps_ctb_cluster_info,
8242
0
                    ps_ctxt->ai1_future_list,
8243
0
                    ps_ctxt->ai1_past_list,
8244
0
                    ps_ctxt->s_frm_prms.bidir_enabled,
8245
0
                    e_me_quality_presets);
8246
8247
#if DISABLE_BLK_MERGE_WHEN_NOISY
8248
                ps_tree->ps_child_node_tl->is_node_valid = !au1_is_32x32Blk_noisy[0];
8249
                ps_tree->ps_child_node_tr->is_node_valid = !au1_is_32x32Blk_noisy[1];
8250
                ps_tree->ps_child_node_bl->is_node_valid = !au1_is_32x32Blk_noisy[2];
8251
                ps_tree->ps_child_node_br->is_node_valid = !au1_is_32x32Blk_noisy[3];
8252
                ps_tree->ps_child_node_tl->u1_inter_eval_enable = !au1_is_32x32Blk_noisy[0];
8253
                ps_tree->ps_child_node_tr->u1_inter_eval_enable = !au1_is_32x32Blk_noisy[1];
8254
                ps_tree->ps_child_node_bl->u1_inter_eval_enable = !au1_is_32x32Blk_noisy[2];
8255
                ps_tree->ps_child_node_br->u1_inter_eval_enable = !au1_is_32x32Blk_noisy[3];
8256
                ps_tree->is_node_valid = !au1_is_64x64Blk_noisy[0];
8257
                ps_tree->u1_inter_eval_enable = !au1_is_64x64Blk_noisy[0];
8258
#endif
8259
8260
0
                en_merge_32x32 = (ps_tree->ps_child_node_tl->is_node_valid << 0) |
8261
0
                                 (ps_tree->ps_child_node_tr->is_node_valid << 1) |
8262
0
                                 (ps_tree->ps_child_node_bl->is_node_valid << 2) |
8263
0
                                 (ps_tree->ps_child_node_br->is_node_valid << 3);
8264
8265
0
                en_merge_execution = (ps_tree->ps_child_node_tl->u1_inter_eval_enable << 0) |
8266
0
                                     (ps_tree->ps_child_node_tr->u1_inter_eval_enable << 1) |
8267
0
                                     (ps_tree->ps_child_node_bl->u1_inter_eval_enable << 2) |
8268
0
                                     (ps_tree->ps_child_node_br->u1_inter_eval_enable << 3) |
8269
0
                                     (ps_tree->u1_inter_eval_enable << 4);
8270
0
            }
8271
0
            else
8272
0
            {
8273
0
                en_merge_execution = 0x1f;
8274
8275
#if DISABLE_BLK_MERGE_WHEN_NOISY
8276
                en_merge_32x32 = ((!au1_is_32x32Blk_noisy[0] << 0) & (en_merge_32x32 & 1)) |
8277
                                 ((!au1_is_32x32Blk_noisy[1] << 1) & (en_merge_32x32 & 2)) |
8278
                                 ((!au1_is_32x32Blk_noisy[2] << 2) & (en_merge_32x32 & 4)) |
8279
                                 ((!au1_is_32x32Blk_noisy[3] << 3) & (en_merge_32x32 & 8));
8280
#endif
8281
0
            }
8282
8283
            /* Re-initialize the pu_results pointers to the first struct in the stack array */
8284
0
            ps_pu_results = as_inter_pu_results;
8285
8286
0
            {
8287
0
                WORD32 ref_ctr;
8288
8289
0
                s_ctb_prms.i4_ctb_x = i4_ctb_x << 6;
8290
0
                s_ctb_prms.i4_ctb_y = i4_ctb_y << 6;
8291
8292
                /* MV limit is different based on ref. PIC */
8293
0
                for(ref_ctr = 0; ref_ctr < num_act_ref_pics; ref_ctr++)
8294
0
                {
8295
0
                    SCALE_RANGE_PRMS(as_range_prms_hpel[ref_ctr], as_range_prms_rec[ref_ctr], 1);
8296
0
                    SCALE_RANGE_PRMS(as_range_prms_qpel[ref_ctr], as_range_prms_rec[ref_ctr], 2);
8297
0
                }
8298
8299
0
                e_merge_result = CU_SPLIT;
8300
0
                merge_count_32x32 = 0;
8301
8302
0
                if((en_merge_32x32 & 1) && (en_merge_execution & 1))
8303
0
                {
8304
0
                    range_prms_t *ps_pic_limit;
8305
0
                    if(s_merge_prms_32x32_tl.i4_use_rec == 1)
8306
0
                    {
8307
0
                        ps_pic_limit = &s_pic_limit_rec;
8308
0
                    }
8309
0
                    else
8310
0
                    {
8311
0
                        ps_pic_limit = &s_pic_limit_inp;
8312
0
                    }
8313
                    /* MV limit is different based on ref. PIC */
8314
0
                    for(ref_ctr = 0; ref_ctr < num_act_ref_pics; ref_ctr++)
8315
0
                    {
8316
0
                        hme_derive_search_range(
8317
0
                            s_merge_prms_32x32_tl.aps_mv_range[ref_ctr],
8318
0
                            ps_pic_limit,
8319
0
                            &as_mv_limit[ref_ctr],
8320
0
                            i4_ctb_x << 6,
8321
0
                            i4_ctb_y << 6,
8322
0
                            32,
8323
0
                            32);
8324
8325
0
                        SCALE_RANGE_PRMS_POINTERS(
8326
0
                            s_merge_prms_32x32_tl.aps_mv_range[ref_ctr],
8327
0
                            s_merge_prms_32x32_tl.aps_mv_range[ref_ctr],
8328
0
                            2);
8329
0
                    }
8330
0
                    s_merge_prms_32x32_tl.i4_ctb_x_off = i4_ctb_x << 6;
8331
0
                    s_merge_prms_32x32_tl.i4_ctb_y_off = i4_ctb_y << 6;
8332
0
                    s_subpel_prms.u1_is_cu_noisy = au1_is_32x32Blk_noisy[0];
8333
8334
0
                    e_merge_result = hme_try_merge_high_speed(
8335
0
                        ps_thrd_ctxt,
8336
0
                        ps_ctxt,
8337
0
                        ps_cur_ipe_ctb,
8338
0
                        &s_subpel_prms,
8339
0
                        &s_merge_prms_32x32_tl,
8340
0
                        ps_pu_results,
8341
0
                        &as_pu_results[0][0][0]);
8342
8343
0
                    if(e_merge_result == CU_MERGED)
8344
0
                    {
8345
0
                        inter_cu_results_t *ps_cu_results =
8346
0
                            s_merge_prms_32x32_tl.ps_results_merge->ps_cu_results;
8347
8348
0
                        if(!((ps_cu_results->u1_num_best_results == 1) &&
8349
0
                             (ps_cu_results->ps_best_results->as_pu_results->pu.b1_intra_flag)))
8350
0
                        {
8351
0
                            hme_map_mvs_to_grid(
8352
0
                                &aps_mv_grid[0],
8353
0
                                s_merge_prms_32x32_tl.ps_results_merge,
8354
0
                                s_merge_prms_32x32_tl.au1_pred_dir_searched,
8355
0
                                s_merge_prms_32x32_tl.i4_num_pred_dir_actual);
8356
0
                        }
8357
8358
0
                        if(ME_PRISTINE_QUALITY != e_me_quality_presets)
8359
0
                        {
8360
0
                            ps_ctxt->ps_cu_tree_curr_row[(i4_ctb_x * MAX_NUM_NODES_CU_TREE)]
8361
0
                                .ps_child_node_tl->is_node_valid = 1;
8362
0
                            NULLIFY_THE_CHILDREN_NODES(
8363
0
                                ps_ctxt->ps_cu_tree_curr_row[(i4_ctb_x * MAX_NUM_NODES_CU_TREE)]
8364
0
                                    .ps_child_node_tl);
8365
0
                        }
8366
8367
0
                        merge_count_32x32++;
8368
0
                        e_merge_result = CU_SPLIT;
8369
0
                    }
8370
0
                    else if(ME_PRISTINE_QUALITY == e_me_quality_presets)
8371
0
                    {
8372
0
#if ENABLE_CU_TREE_CULLING
8373
0
                        cur_ctb_cu_tree_t *ps_tree =
8374
0
                            ps_ctb_cluster_info->ps_cu_tree_root->ps_child_node_tl;
8375
8376
0
                        ps_ctb_cluster_info->ps_cu_tree_root->is_node_valid = 0;
8377
0
                        en_merge_execution = (en_merge_execution & (~(1 << 4)));
8378
0
                        ENABLE_THE_CHILDREN_NODES(ps_tree);
8379
0
                        ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_tl);
8380
0
                        ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_tr);
8381
0
                        ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_bl);
8382
0
                        ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_br);
8383
0
#endif
8384
0
                    }
8385
0
                }
8386
0
                else if((en_merge_32x32 & 1) && (!(en_merge_execution & 1)))
8387
0
                {
8388
0
#if ENABLE_CU_TREE_CULLING
8389
0
                    cur_ctb_cu_tree_t *ps_tree =
8390
0
                        ps_ctb_cluster_info->ps_cu_tree_root->ps_child_node_tl;
8391
8392
0
                    ENABLE_THE_CHILDREN_NODES(ps_tree);
8393
0
                    ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_tl);
8394
0
                    ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_tr);
8395
0
                    ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_bl);
8396
0
                    ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_br);
8397
0
#endif
8398
8399
0
                    if(au1_is_32x32Blk_noisy[0] && DISABLE_INTRA_WHEN_NOISY)
8400
0
                    {
8401
0
                        ps_tree->is_node_valid = 0;
8402
0
                        ps_ctb_cluster_info->ps_cu_tree_root->is_node_valid = 0;
8403
0
                        en_merge_execution = (en_merge_execution & (~(1 << 4)));
8404
0
                    }
8405
0
                }
8406
8407
0
                if((en_merge_32x32 & 2) && (en_merge_execution & 2))
8408
0
                {
8409
0
                    range_prms_t *ps_pic_limit;
8410
0
                    if(s_merge_prms_32x32_tr.i4_use_rec == 1)
8411
0
                    {
8412
0
                        ps_pic_limit = &s_pic_limit_rec;
8413
0
                    }
8414
0
                    else
8415
0
                    {
8416
0
                        ps_pic_limit = &s_pic_limit_inp;
8417
0
                    }
8418
                    /* MV limit is different based on ref. PIC */
8419
0
                    for(ref_ctr = 0; ref_ctr < num_act_ref_pics; ref_ctr++)
8420
0
                    {
8421
0
                        hme_derive_search_range(
8422
0
                            s_merge_prms_32x32_tr.aps_mv_range[ref_ctr],
8423
0
                            ps_pic_limit,
8424
0
                            &as_mv_limit[ref_ctr],
8425
0
                            (i4_ctb_x << 6) + 32,
8426
0
                            i4_ctb_y << 6,
8427
0
                            32,
8428
0
                            32);
8429
0
                        SCALE_RANGE_PRMS_POINTERS(
8430
0
                            s_merge_prms_32x32_tr.aps_mv_range[ref_ctr],
8431
0
                            s_merge_prms_32x32_tr.aps_mv_range[ref_ctr],
8432
0
                            2);
8433
0
                    }
8434
0
                    s_merge_prms_32x32_tr.i4_ctb_x_off = i4_ctb_x << 6;
8435
0
                    s_merge_prms_32x32_tr.i4_ctb_y_off = i4_ctb_y << 6;
8436
0
                    s_subpel_prms.u1_is_cu_noisy = au1_is_32x32Blk_noisy[1];
8437
8438
0
                    e_merge_result = hme_try_merge_high_speed(
8439
0
                        ps_thrd_ctxt,
8440
0
                        ps_ctxt,
8441
0
                        ps_cur_ipe_ctb,
8442
0
                        &s_subpel_prms,
8443
0
                        &s_merge_prms_32x32_tr,
8444
0
                        ps_pu_results,
8445
0
                        &as_pu_results[0][0][0]);
8446
8447
0
                    if(e_merge_result == CU_MERGED)
8448
0
                    {
8449
0
                        inter_cu_results_t *ps_cu_results =
8450
0
                            s_merge_prms_32x32_tr.ps_results_merge->ps_cu_results;
8451
8452
0
                        if(!((ps_cu_results->u1_num_best_results == 1) &&
8453
0
                             (ps_cu_results->ps_best_results->as_pu_results->pu.b1_intra_flag)))
8454
0
                        {
8455
0
                            hme_map_mvs_to_grid(
8456
0
                                &aps_mv_grid[0],
8457
0
                                s_merge_prms_32x32_tr.ps_results_merge,
8458
0
                                s_merge_prms_32x32_tr.au1_pred_dir_searched,
8459
0
                                s_merge_prms_32x32_tr.i4_num_pred_dir_actual);
8460
0
                        }
8461
8462
0
                        if(ME_PRISTINE_QUALITY != e_me_quality_presets)
8463
0
                        {
8464
0
                            ps_ctxt->ps_cu_tree_curr_row[(i4_ctb_x * MAX_NUM_NODES_CU_TREE)]
8465
0
                                .ps_child_node_tr->is_node_valid = 1;
8466
0
                            NULLIFY_THE_CHILDREN_NODES(
8467
0
                                ps_ctxt->ps_cu_tree_curr_row[(i4_ctb_x * MAX_NUM_NODES_CU_TREE)]
8468
0
                                    .ps_child_node_tr);
8469
0
                        }
8470
8471
0
                        merge_count_32x32++;
8472
0
                        e_merge_result = CU_SPLIT;
8473
0
                    }
8474
0
                    else if(ME_PRISTINE_QUALITY == e_me_quality_presets)
8475
0
                    {
8476
0
#if ENABLE_CU_TREE_CULLING
8477
0
                        cur_ctb_cu_tree_t *ps_tree =
8478
0
                            ps_ctb_cluster_info->ps_cu_tree_root->ps_child_node_tr;
8479
8480
0
                        ps_ctb_cluster_info->ps_cu_tree_root->is_node_valid = 0;
8481
0
                        en_merge_execution = (en_merge_execution & (~(1 << 4)));
8482
0
                        ENABLE_THE_CHILDREN_NODES(ps_tree);
8483
0
                        ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_tl);
8484
0
                        ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_tr);
8485
0
                        ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_bl);
8486
0
                        ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_br);
8487
0
#endif
8488
0
                    }
8489
0
                }
8490
0
                else if((en_merge_32x32 & 2) && (!(en_merge_execution & 2)))
8491
0
                {
8492
0
#if ENABLE_CU_TREE_CULLING
8493
0
                    cur_ctb_cu_tree_t *ps_tree =
8494
0
                        ps_ctb_cluster_info->ps_cu_tree_root->ps_child_node_tr;
8495
8496
0
                    ENABLE_THE_CHILDREN_NODES(ps_tree);
8497
0
                    ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_tl);
8498
0
                    ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_tr);
8499
0
                    ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_bl);
8500
0
                    ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_br);
8501
0
#endif
8502
8503
0
                    if(au1_is_32x32Blk_noisy[1] && DISABLE_INTRA_WHEN_NOISY)
8504
0
                    {
8505
0
                        ps_tree->is_node_valid = 0;
8506
0
                        ps_ctb_cluster_info->ps_cu_tree_root->is_node_valid = 0;
8507
0
                        en_merge_execution = (en_merge_execution & (~(1 << 4)));
8508
0
                    }
8509
0
                }
8510
8511
0
                if((en_merge_32x32 & 4) && (en_merge_execution & 4))
8512
0
                {
8513
0
                    range_prms_t *ps_pic_limit;
8514
0
                    if(s_merge_prms_32x32_bl.i4_use_rec == 1)
8515
0
                    {
8516
0
                        ps_pic_limit = &s_pic_limit_rec;
8517
0
                    }
8518
0
                    else
8519
0
                    {
8520
0
                        ps_pic_limit = &s_pic_limit_inp;
8521
0
                    }
8522
                    /* MV limit is different based on ref. PIC */
8523
0
                    for(ref_ctr = 0; ref_ctr < num_act_ref_pics; ref_ctr++)
8524
0
                    {
8525
0
                        hme_derive_search_range(
8526
0
                            s_merge_prms_32x32_bl.aps_mv_range[ref_ctr],
8527
0
                            ps_pic_limit,
8528
0
                            &as_mv_limit[ref_ctr],
8529
0
                            i4_ctb_x << 6,
8530
0
                            (i4_ctb_y << 6) + 32,
8531
0
                            32,
8532
0
                            32);
8533
0
                        SCALE_RANGE_PRMS_POINTERS(
8534
0
                            s_merge_prms_32x32_bl.aps_mv_range[ref_ctr],
8535
0
                            s_merge_prms_32x32_bl.aps_mv_range[ref_ctr],
8536
0
                            2);
8537
0
                    }
8538
0
                    s_merge_prms_32x32_bl.i4_ctb_x_off = i4_ctb_x << 6;
8539
0
                    s_merge_prms_32x32_bl.i4_ctb_y_off = i4_ctb_y << 6;
8540
0
                    s_subpel_prms.u1_is_cu_noisy = au1_is_32x32Blk_noisy[2];
8541
8542
0
                    e_merge_result = hme_try_merge_high_speed(
8543
0
                        ps_thrd_ctxt,
8544
0
                        ps_ctxt,
8545
0
                        ps_cur_ipe_ctb,
8546
0
                        &s_subpel_prms,
8547
0
                        &s_merge_prms_32x32_bl,
8548
0
                        ps_pu_results,
8549
0
                        &as_pu_results[0][0][0]);
8550
8551
0
                    if(e_merge_result == CU_MERGED)
8552
0
                    {
8553
0
                        inter_cu_results_t *ps_cu_results =
8554
0
                            s_merge_prms_32x32_bl.ps_results_merge->ps_cu_results;
8555
8556
0
                        if(!((ps_cu_results->u1_num_best_results == 1) &&
8557
0
                             (ps_cu_results->ps_best_results->as_pu_results->pu.b1_intra_flag)))
8558
0
                        {
8559
0
                            hme_map_mvs_to_grid(
8560
0
                                &aps_mv_grid[0],
8561
0
                                s_merge_prms_32x32_bl.ps_results_merge,
8562
0
                                s_merge_prms_32x32_bl.au1_pred_dir_searched,
8563
0
                                s_merge_prms_32x32_bl.i4_num_pred_dir_actual);
8564
0
                        }
8565
8566
0
                        if(ME_PRISTINE_QUALITY != e_me_quality_presets)
8567
0
                        {
8568
0
                            ps_ctxt->ps_cu_tree_curr_row[(i4_ctb_x * MAX_NUM_NODES_CU_TREE)]
8569
0
                                .ps_child_node_bl->is_node_valid = 1;
8570
0
                            NULLIFY_THE_CHILDREN_NODES(
8571
0
                                ps_ctxt->ps_cu_tree_curr_row[(i4_ctb_x * MAX_NUM_NODES_CU_TREE)]
8572
0
                                    .ps_child_node_bl);
8573
0
                        }
8574
8575
0
                        merge_count_32x32++;
8576
0
                        e_merge_result = CU_SPLIT;
8577
0
                    }
8578
0
                    else if(ME_PRISTINE_QUALITY == e_me_quality_presets)
8579
0
                    {
8580
0
#if ENABLE_CU_TREE_CULLING
8581
0
                        cur_ctb_cu_tree_t *ps_tree =
8582
0
                            ps_ctb_cluster_info->ps_cu_tree_root->ps_child_node_bl;
8583
8584
0
                        ps_ctb_cluster_info->ps_cu_tree_root->is_node_valid = 0;
8585
0
                        en_merge_execution = (en_merge_execution & (~(1 << 4)));
8586
0
                        ENABLE_THE_CHILDREN_NODES(ps_tree);
8587
0
                        ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_tl);
8588
0
                        ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_tr);
8589
0
                        ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_bl);
8590
0
                        ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_br);
8591
0
#endif
8592
0
                    }
8593
0
                }
8594
0
                else if((en_merge_32x32 & 4) && (!(en_merge_execution & 4)))
8595
0
                {
8596
0
#if ENABLE_CU_TREE_CULLING
8597
0
                    cur_ctb_cu_tree_t *ps_tree =
8598
0
                        ps_ctb_cluster_info->ps_cu_tree_root->ps_child_node_bl;
8599
8600
0
                    ENABLE_THE_CHILDREN_NODES(ps_tree);
8601
0
                    ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_tl);
8602
0
                    ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_tr);
8603
0
                    ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_bl);
8604
0
                    ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_br);
8605
0
#endif
8606
8607
0
                    if(au1_is_32x32Blk_noisy[2] && DISABLE_INTRA_WHEN_NOISY)
8608
0
                    {
8609
0
                        ps_tree->is_node_valid = 0;
8610
0
                        ps_ctb_cluster_info->ps_cu_tree_root->is_node_valid = 0;
8611
0
                        en_merge_execution = (en_merge_execution & (~(1 << 4)));
8612
0
                    }
8613
0
                }
8614
8615
0
                if((en_merge_32x32 & 8) && (en_merge_execution & 8))
8616
0
                {
8617
0
                    range_prms_t *ps_pic_limit;
8618
0
                    if(s_merge_prms_32x32_br.i4_use_rec == 1)
8619
0
                    {
8620
0
                        ps_pic_limit = &s_pic_limit_rec;
8621
0
                    }
8622
0
                    else
8623
0
                    {
8624
0
                        ps_pic_limit = &s_pic_limit_inp;
8625
0
                    }
8626
                    /* MV limit is different based on ref. PIC */
8627
0
                    for(ref_ctr = 0; ref_ctr < num_act_ref_pics; ref_ctr++)
8628
0
                    {
8629
0
                        hme_derive_search_range(
8630
0
                            s_merge_prms_32x32_br.aps_mv_range[ref_ctr],
8631
0
                            ps_pic_limit,
8632
0
                            &as_mv_limit[ref_ctr],
8633
0
                            (i4_ctb_x << 6) + 32,
8634
0
                            (i4_ctb_y << 6) + 32,
8635
0
                            32,
8636
0
                            32);
8637
8638
0
                        SCALE_RANGE_PRMS_POINTERS(
8639
0
                            s_merge_prms_32x32_br.aps_mv_range[ref_ctr],
8640
0
                            s_merge_prms_32x32_br.aps_mv_range[ref_ctr],
8641
0
                            2);
8642
0
                    }
8643
0
                    s_merge_prms_32x32_br.i4_ctb_x_off = i4_ctb_x << 6;
8644
0
                    s_merge_prms_32x32_br.i4_ctb_y_off = i4_ctb_y << 6;
8645
0
                    s_subpel_prms.u1_is_cu_noisy = au1_is_32x32Blk_noisy[3];
8646
8647
0
                    e_merge_result = hme_try_merge_high_speed(
8648
0
                        ps_thrd_ctxt,
8649
0
                        ps_ctxt,
8650
0
                        ps_cur_ipe_ctb,
8651
0
                        &s_subpel_prms,
8652
0
                        &s_merge_prms_32x32_br,
8653
0
                        ps_pu_results,
8654
0
                        &as_pu_results[0][0][0]);
8655
8656
0
                    if(e_merge_result == CU_MERGED)
8657
0
                    {
8658
                        /*inter_cu_results_t *ps_cu_results = s_merge_prms_32x32_br.ps_results_merge->ps_cu_results;
8659
8660
                        if(!((ps_cu_results->u1_num_best_results == 1) &&
8661
                        (ps_cu_results->ps_best_results->as_pu_results->pu.b1_intra_flag)))
8662
                        {
8663
                        hme_map_mvs_to_grid
8664
                        (
8665
                        &aps_mv_grid[0],
8666
                        s_merge_prms_32x32_br.ps_results_merge,
8667
                        s_merge_prms_32x32_br.au1_pred_dir_searched,
8668
                        s_merge_prms_32x32_br.i4_num_pred_dir_actual
8669
                        );
8670
                        }*/
8671
8672
0
                        if(ME_PRISTINE_QUALITY != e_me_quality_presets)
8673
0
                        {
8674
0
                            ps_ctxt->ps_cu_tree_curr_row[(i4_ctb_x * MAX_NUM_NODES_CU_TREE)]
8675
0
                                .ps_child_node_br->is_node_valid = 1;
8676
0
                            NULLIFY_THE_CHILDREN_NODES(
8677
0
                                ps_ctxt->ps_cu_tree_curr_row[(i4_ctb_x * MAX_NUM_NODES_CU_TREE)]
8678
0
                                    .ps_child_node_br);
8679
0
                        }
8680
8681
0
                        merge_count_32x32++;
8682
0
                        e_merge_result = CU_SPLIT;
8683
0
                    }
8684
0
                    else if(ME_PRISTINE_QUALITY == e_me_quality_presets)
8685
0
                    {
8686
0
#if ENABLE_CU_TREE_CULLING
8687
0
                        cur_ctb_cu_tree_t *ps_tree =
8688
0
                            ps_ctb_cluster_info->ps_cu_tree_root->ps_child_node_br;
8689
8690
0
                        ps_ctb_cluster_info->ps_cu_tree_root->is_node_valid = 0;
8691
0
                        en_merge_execution = (en_merge_execution & (~(1 << 4)));
8692
0
                        ENABLE_THE_CHILDREN_NODES(ps_tree);
8693
0
                        ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_tl);
8694
0
                        ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_tr);
8695
0
                        ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_bl);
8696
0
                        ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_br);
8697
0
#endif
8698
0
                    }
8699
0
                }
8700
0
                else if((en_merge_32x32 & 8) && (!(en_merge_execution & 8)))
8701
0
                {
8702
0
#if ENABLE_CU_TREE_CULLING
8703
0
                    cur_ctb_cu_tree_t *ps_tree =
8704
0
                        ps_ctb_cluster_info->ps_cu_tree_root->ps_child_node_br;
8705
8706
0
                    ENABLE_THE_CHILDREN_NODES(ps_tree);
8707
0
                    ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_tl);
8708
0
                    ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_tr);
8709
0
                    ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_bl);
8710
0
                    ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_br);
8711
0
#endif
8712
8713
0
                    if(au1_is_32x32Blk_noisy[3] && DISABLE_INTRA_WHEN_NOISY)
8714
0
                    {
8715
0
                        ps_tree->is_node_valid = 0;
8716
0
                        ps_ctb_cluster_info->ps_cu_tree_root->is_node_valid = 0;
8717
0
                        en_merge_execution = (en_merge_execution & (~(1 << 4)));
8718
0
                    }
8719
0
                }
8720
8721
                /* Try merging all 32x32 to 64x64 candts */
8722
0
                if(((en_merge_32x32 & 0xf) == 0xf) &&
8723
0
                   (((merge_count_32x32 == 4) && (e_me_quality_presets != ME_PRISTINE_QUALITY)) ||
8724
0
                    ((en_merge_execution & 16) && (e_me_quality_presets == ME_PRISTINE_QUALITY))))
8725
0
                    if((((e_me_quality_presets == ME_XTREME_SPEED_25) &&
8726
0
                         !DISABLE_64X64_BLOCK_MERGE_IN_ME_IN_XS25) ||
8727
0
                        (e_me_quality_presets != ME_XTREME_SPEED_25)))
8728
0
                    {
8729
0
                        range_prms_t *ps_pic_limit;
8730
0
                        if(s_merge_prms_64x64.i4_use_rec == 1)
8731
0
                        {
8732
0
                            ps_pic_limit = &s_pic_limit_rec;
8733
0
                        }
8734
0
                        else
8735
0
                        {
8736
0
                            ps_pic_limit = &s_pic_limit_inp;
8737
0
                        }
8738
                        /* MV limit is different based on ref. PIC */
8739
0
                        for(ref_ctr = 0; ref_ctr < num_act_ref_pics; ref_ctr++)
8740
0
                        {
8741
0
                            hme_derive_search_range(
8742
0
                                s_merge_prms_64x64.aps_mv_range[ref_ctr],
8743
0
                                ps_pic_limit,
8744
0
                                &as_mv_limit[ref_ctr],
8745
0
                                i4_ctb_x << 6,
8746
0
                                i4_ctb_y << 6,
8747
0
                                64,
8748
0
                                64);
8749
8750
0
                            SCALE_RANGE_PRMS_POINTERS(
8751
0
                                s_merge_prms_64x64.aps_mv_range[ref_ctr],
8752
0
                                s_merge_prms_64x64.aps_mv_range[ref_ctr],
8753
0
                                2);
8754
0
                        }
8755
0
                        s_merge_prms_64x64.i4_ctb_x_off = i4_ctb_x << 6;
8756
0
                        s_merge_prms_64x64.i4_ctb_y_off = i4_ctb_y << 6;
8757
0
                        s_subpel_prms.u1_is_cu_noisy = au1_is_64x64Blk_noisy[0];
8758
8759
0
                        e_merge_result = hme_try_merge_high_speed(
8760
0
                            ps_thrd_ctxt,
8761
0
                            ps_ctxt,
8762
0
                            ps_cur_ipe_ctb,
8763
0
                            &s_subpel_prms,
8764
0
                            &s_merge_prms_64x64,
8765
0
                            ps_pu_results,
8766
0
                            &as_pu_results[0][0][0]);
8767
8768
0
                        if((e_merge_result == CU_MERGED) &&
8769
0
                           (ME_PRISTINE_QUALITY != e_me_quality_presets))
8770
0
                        {
8771
0
                            ps_ctxt->ps_cu_tree_curr_row[(i4_ctb_x * MAX_NUM_NODES_CU_TREE)]
8772
0
                                .is_node_valid = 1;
8773
0
                            NULLIFY_THE_CHILDREN_NODES(
8774
0
                                ps_ctxt->ps_cu_tree_curr_row + (i4_ctb_x * MAX_NUM_NODES_CU_TREE));
8775
0
                        }
8776
0
                        else if(
8777
0
                            (e_merge_result == CU_SPLIT) &&
8778
0
                            (ME_PRISTINE_QUALITY == e_me_quality_presets))
8779
0
                        {
8780
0
                            ps_ctxt->ps_cu_tree_curr_row[(i4_ctb_x * MAX_NUM_NODES_CU_TREE)]
8781
0
                                .is_node_valid = 0;
8782
0
                        }
8783
0
                    }
8784
8785
                /*****************************************************************/
8786
                /* UPDATION OF RESULT TO EXTERNAL STRUCTURES                     */
8787
                /*****************************************************************/
8788
0
                pf_ext_update_fxn((void *)ps_thrd_ctxt, (void *)ps_ctxt, i4_ctb_x, i4_ctb_y);
8789
8790
0
                {
8791
#ifdef _DEBUG
8792
                    S32 wd = ((i4_pic_wd - s_common_frm_prms.i4_ctb_x_off) >= 64)
8793
                                 ? 64
8794
                                 : i4_pic_wd - s_common_frm_prms.i4_ctb_x_off;
8795
                    S32 ht = ((i4_pic_ht - s_common_frm_prms.i4_ctb_y_off) >= 64)
8796
                                 ? 64
8797
                                 : i4_pic_ht - s_common_frm_prms.i4_ctb_y_off;
8798
                    ASSERT(
8799
                        (wd * ht) ==
8800
                        ihevce_compute_area_of_valid_cus_in_ctb(
8801
                            &ps_ctxt->ps_cu_tree_curr_row[(i4_ctb_x * MAX_NUM_NODES_CU_TREE)]));
8802
#endif
8803
0
                }
8804
0
            }
8805
8806
            /* set the dependency for the corresponding row in enc loop */
8807
0
            ihevce_dmgr_set_row_row_sync(
8808
0
                pv_dep_mngr_encloop_dep_me,
8809
0
                (i4_ctb_x + 1),
8810
0
                i4_ctb_y,
8811
0
                tile_col_idx /* Col Tile No. */);
8812
8813
0
            left_ctb_in_diff_tile = 0;
8814
0
        }
8815
0
    }
8816
0
}
8817
8818
/**
8819
********************************************************************************
8820
*  @fn   void hme_refine_no_encode(coarse_me_ctxt_t *ps_ctxt,
8821
*                       refine_layer_prms_t *ps_refine_prms)
8822
*
8823
*  @brief  Top level entry point for refinement ME
8824
*
8825
*  @param[in,out]  ps_ctxt: ME Handle
8826
*
8827
*  @param[in]  ps_refine_prms : refinement layer prms
8828
*
8829
*  @return None
8830
********************************************************************************
8831
*/
8832
void hme_refine_no_encode(
8833
    coarse_me_ctxt_t *ps_ctxt,
8834
    refine_prms_t *ps_refine_prms,
8835
    multi_thrd_ctxt_t *ps_multi_thrd_ctxt,
8836
    S32 lyr_job_type,
8837
    WORD32 i4_ping_pong,
8838
    void **ppv_dep_mngr_hme_sync)
8839
0
{
8840
0
    BLK_SIZE_T e_search_blk_size, e_result_blk_size;
8841
0
    ME_QUALITY_PRESETS_T e_me_quality_presets =
8842
0
        ps_ctxt->s_init_prms.s_me_coding_tools.e_me_quality_presets;
8843
8844
    /*************************************************************************/
8845
    /* Complexity of search: Low to High                                     */
8846
    /*************************************************************************/
8847
0
    SEARCH_COMPLEXITY_T e_search_complexity;
8848
8849
    /*************************************************************************/
8850
    /* Config parameter structures for varius ME submodules                  */
8851
    /*************************************************************************/
8852
0
    hme_search_prms_t s_search_prms_blk;
8853
0
    mvbank_update_prms_t s_mv_update_prms;
8854
8855
    /*************************************************************************/
8856
    /* All types of search candidates for predictor based search.            */
8857
    /*************************************************************************/
8858
0
    S32 num_init_candts = 0;
8859
0
    search_candt_t *ps_search_candts, as_search_candts[MAX_INIT_CANDTS];
8860
0
    search_node_t as_top_neighbours[4], as_left_neighbours[3];
8861
0
    search_node_t *ps_candt_zeromv, *ps_candt_tl, *ps_candt_tr;
8862
0
    search_node_t *ps_candt_l, *ps_candt_t;
8863
0
    search_node_t *ps_candt_prj_br[2], *ps_candt_prj_b[2], *ps_candt_prj_r[2];
8864
0
    search_node_t *ps_candt_prj_bl[2];
8865
0
    search_node_t *ps_candt_prj_tr[2], *ps_candt_prj_t[2], *ps_candt_prj_tl[2];
8866
0
    search_node_t *ps_candt_prj_coloc[2];
8867
8868
0
    pf_get_wt_inp fp_get_wt_inp;
8869
8870
0
    search_node_t as_unique_search_nodes[MAX_INIT_CANDTS * 9];
8871
0
    U32 au4_unique_node_map[MAP_X_MAX * 2];
8872
8873
    /*EIID */
8874
0
    WORD32 i4_num_inter_wins = 0;  //debug code to find stat of
8875
0
    WORD32 i4_num_comparisions = 0;  //debug code
8876
0
    WORD32 i4_threshold_multiplier;
8877
0
    WORD32 i4_threshold_divider;
8878
0
    WORD32 i4_temporal_layer =
8879
0
        ps_multi_thrd_ctxt->aps_curr_inp_pre_enc[i4_ping_pong]->s_lap_out.i4_temporal_lyr_id;
8880
8881
    /*************************************************************************/
8882
    /* points ot the search results for the blk level search (8x8/16x16)     */
8883
    /*************************************************************************/
8884
0
    search_results_t *ps_search_results;
8885
8886
    /*************************************************************************/
8887
    /* Coordinates                                                           */
8888
    /*************************************************************************/
8889
0
    S32 blk_x, i4_ctb_x, blk_id_in_ctb;
8890
    //S32 i4_ctb_y;
8891
0
    S32 pos_x, pos_y;
8892
0
    S32 blk_id_in_full_ctb;
8893
0
    S32 i4_num_srch_cands;
8894
8895
0
    S32 blk_y;
8896
8897
    /*************************************************************************/
8898
    /* Related to dimensions of block being searched and pic dimensions      */
8899
    /*************************************************************************/
8900
0
    S32 blk_wd, blk_ht, blk_size_shift, num_blks_in_row, num_blks_in_pic;
8901
0
    S32 i4_pic_wd, i4_pic_ht, num_blks_in_this_ctb;
8902
0
    S32 num_results_prev_layer;
8903
8904
    /*************************************************************************/
8905
    /* Size of a basic unit for this layer. For non encode layers, we search */
8906
    /* in block sizes of 8x8. For encode layers, though we search 16x16s the */
8907
    /* basic unit size is the ctb size.                                      */
8908
    /*************************************************************************/
8909
0
    S32 unit_size;
8910
8911
    /*************************************************************************/
8912
    /* Pointers to context in current and coarser layers                     */
8913
    /*************************************************************************/
8914
0
    layer_ctxt_t *ps_curr_layer, *ps_coarse_layer;
8915
8916
    /*************************************************************************/
8917
    /* to store mv range per blk, and picture limit, allowed search range    */
8918
    /* range prms in hpel and qpel units as well                             */
8919
    /*************************************************************************/
8920
0
    range_prms_t s_range_prms_inp, s_range_prms_rec;
8921
0
    range_prms_t s_pic_limit_inp, s_pic_limit_rec, as_mv_limit[MAX_NUM_REF];
8922
    /*************************************************************************/
8923
    /* These variables are used to track number of references at different   */
8924
    /* stages of ME.                                                         */
8925
    /*************************************************************************/
8926
0
    S32 i4_num_ref_fpel, i4_num_ref_before_merge;
8927
0
    S32 i4_num_ref_each_dir, i, i4_num_ref_prev_layer;
8928
0
    S32 lambda_inp = ps_refine_prms->lambda_inp;
8929
8930
    /*************************************************************************/
8931
    /* When a layer is implicit, it means that it searches on 1 or 2 ref idx */
8932
    /* Explicit means it searches on all active ref idx.                     */
8933
    /*************************************************************************/
8934
0
    S32 curr_layer_implicit, prev_layer_implicit;
8935
8936
    /*************************************************************************/
8937
    /* Variables for loop counts                                             */
8938
    /*************************************************************************/
8939
0
    S32 id;
8940
0
    S08 i1_ref_idx;
8941
8942
    /*************************************************************************/
8943
    /* Input pointer and stride                                              */
8944
    /*************************************************************************/
8945
0
    U08 *pu1_inp;
8946
0
    S32 i4_inp_stride;
8947
8948
0
    S32 end_of_frame;
8949
8950
0
    S32 num_sync_units_in_row;
8951
8952
0
    PF_HME_PROJECT_COLOC_CANDT_FXN pf_hme_project_coloc_candt;
8953
0
    ASSERT(ps_refine_prms->i4_layer_id < ps_ctxt->num_layers - 1);
8954
8955
    /*************************************************************************/
8956
    /* Pointers to current and coarse layer are needed for projection */
8957
    /* Pointer to prev layer are needed for other candts like coloc   */
8958
    /*************************************************************************/
8959
0
    ps_curr_layer = ps_ctxt->ps_curr_descr->aps_layers[ps_refine_prms->i4_layer_id];
8960
8961
0
    ps_coarse_layer = ps_ctxt->ps_curr_descr->aps_layers[ps_refine_prms->i4_layer_id + 1];
8962
8963
0
    num_results_prev_layer = ps_coarse_layer->ps_layer_mvbank->i4_num_mvs_per_ref;
8964
8965
    /* Function pointer is selected based on the C vc X86 macro */
8966
8967
0
    fp_get_wt_inp = ((ihevce_me_optimised_function_list_t *)ps_ctxt->pv_me_optimised_function_list)
8968
0
                        ->pf_get_wt_inp_8x8;
8969
8970
0
    i4_inp_stride = ps_curr_layer->i4_inp_stride;
8971
0
    i4_pic_wd = ps_curr_layer->i4_wd;
8972
0
    i4_pic_ht = ps_curr_layer->i4_ht;
8973
0
    e_search_complexity = ps_refine_prms->e_search_complexity;
8974
8975
0
    end_of_frame = 0;
8976
8977
    /* If the previous layer is non-encode layer, then use dyadic projection */
8978
0
    if(0 == ps_ctxt->u1_encode[ps_refine_prms->i4_layer_id + 1])
8979
0
        pf_hme_project_coloc_candt = hme_project_coloc_candt_dyadic;
8980
0
    else
8981
0
        pf_hme_project_coloc_candt = hme_project_coloc_candt;
8982
8983
    /* This points to all the initial candts */
8984
0
    ps_search_candts = &as_search_candts[0];
8985
8986
0
    {
8987
0
        e_search_blk_size = BLK_8x8;
8988
0
        blk_wd = blk_ht = 8;
8989
0
        blk_size_shift = 3;
8990
0
        s_mv_update_prms.i4_shift = 0;
8991
        /*********************************************************************/
8992
        /* In case we do not encode this layer, we search 8x8 with or without*/
8993
        /* enable 4x4 SAD.                                                   */
8994
        /*********************************************************************/
8995
0
        {
8996
0
            S32 i4_mask = (ENABLE_2Nx2N);
8997
8998
0
            e_result_blk_size = BLK_8x8;
8999
0
            if(ps_refine_prms->i4_enable_4x4_part)
9000
0
            {
9001
0
                i4_mask |= (ENABLE_NxN);
9002
0
                e_result_blk_size = BLK_4x4;
9003
0
                s_mv_update_prms.i4_shift = 1;
9004
0
            }
9005
9006
0
            s_search_prms_blk.i4_part_mask = i4_mask;
9007
0
        }
9008
9009
0
        unit_size = blk_wd;
9010
0
        s_search_prms_blk.i4_inp_stride = unit_size;
9011
0
    }
9012
9013
    /* This is required to properly update the layer mv bank */
9014
0
    s_mv_update_prms.e_search_blk_size = e_search_blk_size;
9015
0
    s_search_prms_blk.e_blk_size = e_search_blk_size;
9016
9017
    /*************************************************************************/
9018
    /* If current layer is explicit, then the number of ref frames are to    */
9019
    /* be same as previous layer. Else it will be 2                          */
9020
    /*************************************************************************/
9021
0
    i4_num_ref_prev_layer = ps_coarse_layer->ps_layer_mvbank->i4_num_ref;
9022
0
    if(ps_refine_prms->explicit_ref)
9023
0
    {
9024
0
        curr_layer_implicit = 0;
9025
0
        i4_num_ref_fpel = i4_num_ref_prev_layer;
9026
        /* 100578 : Using same mv cost fun. for all presets. */
9027
0
        s_search_prms_blk.pf_mv_cost_compute = compute_mv_cost_refine;
9028
0
    }
9029
0
    else
9030
0
    {
9031
0
        i4_num_ref_fpel = 2;
9032
0
        curr_layer_implicit = 1;
9033
0
        {
9034
0
            if(ME_MEDIUM_SPEED > e_me_quality_presets)
9035
0
            {
9036
0
                s_search_prms_blk.pf_mv_cost_compute = compute_mv_cost_implicit;
9037
0
            }
9038
0
            else
9039
0
            {
9040
0
#if USE_MODIFIED == 1
9041
0
                s_search_prms_blk.pf_mv_cost_compute = compute_mv_cost_implicit_high_speed_modified;
9042
#else
9043
                s_search_prms_blk.pf_mv_cost_compute = compute_mv_cost_implicit_high_speed;
9044
#endif
9045
0
            }
9046
0
        }
9047
0
    }
9048
9049
0
    i4_num_ref_fpel = MIN(i4_num_ref_fpel, i4_num_ref_prev_layer);
9050
0
    if(ps_multi_thrd_ctxt->aps_curr_inp_pre_enc[i4_ping_pong]->s_lap_out.i4_pic_type ==
9051
0
           IV_IDR_FRAME ||
9052
0
       ps_multi_thrd_ctxt->aps_curr_inp_pre_enc[i4_ping_pong]->s_lap_out.i4_pic_type == IV_I_FRAME)
9053
0
    {
9054
0
        i4_num_ref_fpel = 1;
9055
0
    }
9056
0
    if(i4_num_ref_prev_layer <= 2)
9057
0
    {
9058
0
        prev_layer_implicit = 1;
9059
0
        curr_layer_implicit = 1;
9060
0
        i4_num_ref_each_dir = 1;
9061
0
    }
9062
0
    else
9063
0
    {
9064
        /* It is assumed that we have equal number of references in each dir */
9065
        //ASSERT(!(i4_num_ref_prev_layer & 1));
9066
0
        prev_layer_implicit = 0;
9067
0
        i4_num_ref_each_dir = i4_num_ref_prev_layer >> 1;
9068
0
    }
9069
0
    s_mv_update_prms.i4_num_ref = i4_num_ref_fpel;
9070
0
    s_mv_update_prms.i4_num_active_ref_l0 = ps_ctxt->s_frm_prms.u1_num_active_ref_l0;
9071
0
    s_mv_update_prms.i4_num_active_ref_l1 = ps_ctxt->s_frm_prms.u1_num_active_ref_l1;
9072
9073
    /* this can be kept to 1 or 2 */
9074
0
    i4_num_ref_before_merge = 2;
9075
0
    i4_num_ref_before_merge = MIN(i4_num_ref_before_merge, i4_num_ref_fpel);
9076
9077
    /* Set up place holders to hold the search nodes of each initial candt */
9078
0
    for(i = 0; i < MAX_INIT_CANDTS; i++)
9079
0
    {
9080
0
        ps_search_candts[i].ps_search_node = &ps_ctxt->s_init_search_node[i];
9081
0
        INIT_SEARCH_NODE(ps_search_candts[i].ps_search_node, 0);
9082
0
    }
9083
9084
    /* redundant, but doing it here since it is used in pred ctxt init */
9085
0
    ps_candt_zeromv = ps_search_candts[0].ps_search_node;
9086
0
    for(i = 0; i < 3; i++)
9087
0
    {
9088
0
        search_node_t *ps_search_node;
9089
0
        ps_search_node = &as_left_neighbours[i];
9090
0
        INIT_SEARCH_NODE(ps_search_node, 0);
9091
0
        ps_search_node = &as_top_neighbours[i];
9092
0
        INIT_SEARCH_NODE(ps_search_node, 0);
9093
0
    }
9094
9095
0
    INIT_SEARCH_NODE(&as_top_neighbours[3], 0);
9096
    /* bottom left node always not available for the blk being searched */
9097
0
    as_left_neighbours[2].u1_is_avail = 0;
9098
    /*************************************************************************/
9099
    /* Initialize all the search results structure here. We update all the   */
9100
    /* search results to default values, and configure things like blk sizes */
9101
    /*************************************************************************/
9102
0
    if(ps_refine_prms->i4_encode == 0)
9103
0
    {
9104
0
        S32 pred_lx;
9105
0
        search_results_t *ps_search_results;
9106
9107
0
        ps_search_results = &ps_ctxt->s_search_results_8x8;
9108
0
        hme_init_search_results(
9109
0
            ps_search_results,
9110
0
            i4_num_ref_fpel,
9111
0
            ps_refine_prms->i4_num_fpel_results,
9112
0
            ps_refine_prms->i4_num_results_per_part,
9113
0
            e_search_blk_size,
9114
0
            0,
9115
0
            0,
9116
0
            &ps_ctxt->au1_is_past[0]);
9117
0
        for(pred_lx = 0; pred_lx < 2; pred_lx++)
9118
0
        {
9119
0
            hme_init_pred_ctxt_no_encode(
9120
0
                &ps_search_results->as_pred_ctxt[pred_lx],
9121
0
                ps_search_results,
9122
0
                &as_top_neighbours[0],
9123
0
                &as_left_neighbours[0],
9124
0
                &ps_candt_prj_coloc[0],
9125
0
                ps_candt_zeromv,
9126
0
                ps_candt_zeromv,
9127
0
                pred_lx,
9128
0
                lambda_inp,
9129
0
                ps_refine_prms->lambda_q_shift,
9130
0
                &ps_ctxt->apu1_ref_bits_tlu_lc[0],
9131
0
                &ps_ctxt->ai2_ref_scf[0]);
9132
0
        }
9133
0
    }
9134
9135
    /*********************************************************************/
9136
    /* Initialize the dyn. search range params. for each reference index */
9137
    /* in current layer ctxt                                             */
9138
    /*********************************************************************/
9139
    /* Only for P pic. For P, both are 0, I&B has them mut. exclusive */
9140
0
    if(ps_ctxt->s_frm_prms.is_i_pic == ps_ctxt->s_frm_prms.bidir_enabled)
9141
0
    {
9142
0
        WORD32 ref_ctr;
9143
9144
0
        for(ref_ctr = 0; ref_ctr < s_mv_update_prms.i4_num_ref; ref_ctr++)
9145
0
        {
9146
0
            INIT_DYN_SEARCH_PRMS(
9147
0
                &ps_ctxt->s_coarse_dyn_range_prms
9148
0
                     .as_dyn_range_prms[ps_refine_prms->i4_layer_id][ref_ctr],
9149
0
                ps_ctxt->ai4_ref_idx_to_poc_lc[ref_ctr]);
9150
0
        }
9151
0
    }
9152
9153
    /* Next set up initial candidates according to a given set of rules.   */
9154
    /* The number of initial candidates affects the quality of ME in the   */
9155
    /* case of motion with multiple degrees of freedom. In case of simple  */
9156
    /* translational motion, a current and a few causal and non causal     */
9157
    /* candts would suffice. More candidates help to cover more complex    */
9158
    /* cases like partitions, rotation/zoom, occlusion in/out, fine motion */
9159
    /* where multiple ref helps etc.                                       */
9160
    /* The candidate choice also depends on the following parameters.      */
9161
    /* e_search_complexity: SRCH_CX_LOW, SRCH_CX_MED, SRCH_CX_HIGH         */
9162
    /* Whether we encode or not, and the type of search across reference   */
9163
    /* i.e. the previous layer may have been explicit/implicit and curr    */
9164
    /* layer may be explicit/implicit                                      */
9165
9166
    /* 0, 0, L, T, projected coloc best always presnt by default */
9167
0
    id = hme_decide_search_candidate_priority_in_l1_and_l2_me(ZERO_MV, e_me_quality_presets);
9168
0
    ps_candt_zeromv = ps_search_candts[id].ps_search_node;
9169
0
    ps_search_candts[id].u1_num_steps_refine = 0;
9170
0
    ps_candt_zeromv->s_mv.i2_mvx = 0;
9171
0
    ps_candt_zeromv->s_mv.i2_mvy = 0;
9172
9173
0
    id = hme_decide_search_candidate_priority_in_l1_and_l2_me(SPATIAL_LEFT0, e_me_quality_presets);
9174
0
    ps_candt_l = ps_search_candts[id].ps_search_node;
9175
0
    ps_search_candts[id].u1_num_steps_refine = 0;
9176
9177
    /* Even in ME_HIGH_SPEED mode, in layer 0, blocks */
9178
    /* not at the CTB boundary use the causal T and */
9179
    /* not the projected T, although the candidate is */
9180
    /* still pointed to by ps_candt_prj_t[0] */
9181
0
    if(ME_MEDIUM_SPEED <= e_me_quality_presets)
9182
0
    {
9183
        /* Using Projected top to eliminate sync */
9184
0
        id = hme_decide_search_candidate_priority_in_l1_and_l2_me(
9185
0
            PROJECTED_TOP0, e_me_quality_presets);
9186
0
        ps_candt_prj_t[0] = ps_search_candts[id].ps_search_node;
9187
0
        ps_search_candts[id].u1_num_steps_refine = 1;
9188
0
    }
9189
0
    else
9190
0
    {
9191
0
        id = hme_decide_search_candidate_priority_in_l1_and_l2_me(
9192
0
            SPATIAL_TOP0, e_me_quality_presets);
9193
0
        ps_candt_t = ps_search_candts[id].ps_search_node;
9194
0
        ps_search_candts[id].u1_num_steps_refine = 0;
9195
0
    }
9196
9197
0
    id = hme_decide_search_candidate_priority_in_l1_and_l2_me(
9198
0
        PROJECTED_COLOC0, e_me_quality_presets);
9199
0
    ps_candt_prj_coloc[0] = ps_search_candts[id].ps_search_node;
9200
0
    ps_search_candts[id].u1_num_steps_refine = 1;
9201
9202
0
    id = hme_decide_search_candidate_priority_in_l1_and_l2_me(
9203
0
        PROJECTED_COLOC1, e_me_quality_presets);
9204
0
    ps_candt_prj_coloc[1] = ps_search_candts[id].ps_search_node;
9205
0
    ps_search_candts[id].u1_num_steps_refine = 1;
9206
9207
0
    if(ME_MEDIUM_SPEED <= e_me_quality_presets)
9208
0
    {
9209
0
        id = hme_decide_search_candidate_priority_in_l1_and_l2_me(
9210
0
            PROJECTED_TOP_RIGHT0, e_me_quality_presets);
9211
0
        ps_candt_prj_tr[0] = ps_search_candts[id].ps_search_node;
9212
0
        ps_search_candts[id].u1_num_steps_refine = 1;
9213
9214
0
        id = hme_decide_search_candidate_priority_in_l1_and_l2_me(
9215
0
            PROJECTED_TOP_LEFT0, e_me_quality_presets);
9216
0
        ps_candt_prj_tl[0] = ps_search_candts[id].ps_search_node;
9217
0
        ps_search_candts[id].u1_num_steps_refine = 1;
9218
0
    }
9219
0
    else
9220
0
    {
9221
0
        id = hme_decide_search_candidate_priority_in_l1_and_l2_me(
9222
0
            SPATIAL_TOP_RIGHT0, e_me_quality_presets);
9223
0
        ps_candt_tr = ps_search_candts[id].ps_search_node;
9224
0
        ps_search_candts[id].u1_num_steps_refine = 0;
9225
9226
0
        id = hme_decide_search_candidate_priority_in_l1_and_l2_me(
9227
0
            SPATIAL_TOP_LEFT0, e_me_quality_presets);
9228
0
        ps_candt_tl = ps_search_candts[id].ps_search_node;
9229
0
        ps_search_candts[id].u1_num_steps_refine = 0;
9230
0
    }
9231
9232
0
    id = hme_decide_search_candidate_priority_in_l1_and_l2_me(
9233
0
        PROJECTED_RIGHT0, e_me_quality_presets);
9234
0
    ps_candt_prj_r[0] = ps_search_candts[id].ps_search_node;
9235
0
    ps_search_candts[id].u1_num_steps_refine = 1;
9236
9237
0
    id = hme_decide_search_candidate_priority_in_l1_and_l2_me(
9238
0
        PROJECTED_BOTTOM0, e_me_quality_presets);
9239
0
    ps_candt_prj_b[0] = ps_search_candts[id].ps_search_node;
9240
0
    ps_search_candts[id].u1_num_steps_refine = 1;
9241
9242
0
    id = hme_decide_search_candidate_priority_in_l1_and_l2_me(
9243
0
        PROJECTED_BOTTOM_RIGHT0, e_me_quality_presets);
9244
0
    ps_candt_prj_br[0] = ps_search_candts[id].ps_search_node;
9245
0
    ps_search_candts[id].u1_num_steps_refine = 1;
9246
9247
0
    id = hme_decide_search_candidate_priority_in_l1_and_l2_me(
9248
0
        PROJECTED_BOTTOM_LEFT0, e_me_quality_presets);
9249
0
    ps_candt_prj_bl[0] = ps_search_candts[id].ps_search_node;
9250
0
    ps_search_candts[id].u1_num_steps_refine = 1;
9251
9252
0
    id = hme_decide_search_candidate_priority_in_l1_and_l2_me(
9253
0
        PROJECTED_RIGHT1, e_me_quality_presets);
9254
0
    ps_candt_prj_r[1] = ps_search_candts[id].ps_search_node;
9255
0
    ps_search_candts[id].u1_num_steps_refine = 1;
9256
9257
0
    id = hme_decide_search_candidate_priority_in_l1_and_l2_me(
9258
0
        PROJECTED_BOTTOM1, e_me_quality_presets);
9259
0
    ps_candt_prj_b[1] = ps_search_candts[id].ps_search_node;
9260
0
    ps_search_candts[id].u1_num_steps_refine = 1;
9261
9262
0
    id = hme_decide_search_candidate_priority_in_l1_and_l2_me(
9263
0
        PROJECTED_BOTTOM_RIGHT1, e_me_quality_presets);
9264
0
    ps_candt_prj_br[1] = ps_search_candts[id].ps_search_node;
9265
0
    ps_search_candts[id].u1_num_steps_refine = 1;
9266
9267
0
    id = hme_decide_search_candidate_priority_in_l1_and_l2_me(
9268
0
        PROJECTED_BOTTOM_LEFT1, e_me_quality_presets);
9269
0
    ps_candt_prj_bl[1] = ps_search_candts[id].ps_search_node;
9270
0
    ps_search_candts[id].u1_num_steps_refine = 1;
9271
9272
0
    id = hme_decide_search_candidate_priority_in_l1_and_l2_me(PROJECTED_TOP1, e_me_quality_presets);
9273
0
    ps_candt_prj_t[1] = ps_search_candts[id].ps_search_node;
9274
0
    ps_search_candts[id].u1_num_steps_refine = 1;
9275
9276
0
    id = hme_decide_search_candidate_priority_in_l1_and_l2_me(
9277
0
        PROJECTED_TOP_RIGHT1, e_me_quality_presets);
9278
0
    ps_candt_prj_tr[1] = ps_search_candts[id].ps_search_node;
9279
0
    ps_search_candts[id].u1_num_steps_refine = 1;
9280
9281
0
    id = hme_decide_search_candidate_priority_in_l1_and_l2_me(
9282
0
        PROJECTED_TOP_LEFT1, e_me_quality_presets);
9283
0
    ps_candt_prj_tl[1] = ps_search_candts[id].ps_search_node;
9284
0
    ps_search_candts[id].u1_num_steps_refine = 1;
9285
9286
    /*************************************************************************/
9287
    /* Now that the candidates have been ordered, to choose the right number */
9288
    /* of initial candidates.                                                */
9289
    /*************************************************************************/
9290
0
    if(curr_layer_implicit && !prev_layer_implicit)
9291
0
    {
9292
0
        if(e_search_complexity == SEARCH_CX_LOW)
9293
0
            num_init_candts = 7;
9294
0
        else if(e_search_complexity == SEARCH_CX_MED)
9295
0
            num_init_candts = 13;
9296
0
        else if(e_search_complexity == SEARCH_CX_HIGH)
9297
0
            num_init_candts = 18;
9298
0
        else
9299
0
            ASSERT(0);
9300
0
    }
9301
0
    else
9302
0
    {
9303
0
        if(e_search_complexity == SEARCH_CX_LOW)
9304
0
            num_init_candts = 5;
9305
0
        else if(e_search_complexity == SEARCH_CX_MED)
9306
0
            num_init_candts = 11;
9307
0
        else if(e_search_complexity == SEARCH_CX_HIGH)
9308
0
            num_init_candts = 16;
9309
0
        else
9310
0
            ASSERT(0);
9311
0
    }
9312
9313
0
    if(ME_XTREME_SPEED_25 == e_me_quality_presets)
9314
0
    {
9315
0
        num_init_candts = NUM_INIT_SEARCH_CANDS_IN_L1_AND_L2_ME_IN_XS25;
9316
0
    }
9317
9318
    /*************************************************************************/
9319
    /* The following search parameters are fixed throughout the search across*/
9320
    /* all blks. So these are configured outside processing loop             */
9321
    /*************************************************************************/
9322
0
    s_search_prms_blk.i4_num_init_candts = num_init_candts;
9323
0
    s_search_prms_blk.i4_start_step = 1;
9324
0
    s_search_prms_blk.i4_use_satd = 0;
9325
0
    s_search_prms_blk.i4_num_steps_post_refine = ps_refine_prms->i4_num_steps_post_refine_fpel;
9326
    /* we use recon only for encoded layers, otherwise it is not available */
9327
0
    s_search_prms_blk.i4_use_rec = ps_refine_prms->i4_encode & ps_refine_prms->i4_use_rec_in_fpel;
9328
9329
0
    s_search_prms_blk.ps_search_candts = ps_search_candts;
9330
    /* We use the same mv_range for all ref. pic. So assign to member 0 */
9331
0
    if(s_search_prms_blk.i4_use_rec)
9332
0
        s_search_prms_blk.aps_mv_range[0] = &s_range_prms_rec;
9333
0
    else
9334
0
        s_search_prms_blk.aps_mv_range[0] = &s_range_prms_inp;
9335
    /*************************************************************************/
9336
    /* Initialize coordinates. Meaning as follows                            */
9337
    /* blk_x : x coordinate of the 16x16 blk, in terms of number of blks     */
9338
    /* blk_y : same as above, y coord.                                       */
9339
    /* num_blks_in_this_ctb : number of blks in this given ctb that starts   */
9340
    /* at i4_ctb_x, i4_ctb_y. This may not be 16 at picture boundaries.      */
9341
    /* i4_ctb_x, i4_ctb_y: pixel coordinate of the ctb realtive to top left  */
9342
    /* corner of the picture. Always multiple of 64.                         */
9343
    /* blk_id_in_ctb : encode order id of the blk in the ctb.                */
9344
    /*************************************************************************/
9345
0
    blk_y = 0;
9346
0
    blk_id_in_ctb = 0;
9347
9348
0
    GET_NUM_BLKS_IN_PIC(i4_pic_wd, i4_pic_ht, blk_size_shift, num_blks_in_row, num_blks_in_pic);
9349
9350
    /* Get the number of sync units in a row based on encode/non enocde layer */
9351
0
    num_sync_units_in_row = num_blks_in_row;
9352
9353
    /*************************************************************************/
9354
    /* Picture limit on all 4 sides. This will be used to set mv limits for  */
9355
    /* every block given its coordinate. Note thsi assumes that the min amt  */
9356
    /* of padding to right of pic is equal to the blk size. If we go all the */
9357
    /* way upto 64x64, then the min padding on right size of picture should  */
9358
    /* be 64, and also on bottom side of picture.                            */
9359
    /*************************************************************************/
9360
0
    SET_PIC_LIMIT(
9361
0
        s_pic_limit_inp,
9362
0
        ps_curr_layer->i4_pad_x_inp,
9363
0
        ps_curr_layer->i4_pad_y_inp,
9364
0
        ps_curr_layer->i4_wd,
9365
0
        ps_curr_layer->i4_ht,
9366
0
        s_search_prms_blk.i4_num_steps_post_refine);
9367
9368
0
    SET_PIC_LIMIT(
9369
0
        s_pic_limit_rec,
9370
0
        ps_curr_layer->i4_pad_x_rec,
9371
0
        ps_curr_layer->i4_pad_y_rec,
9372
0
        ps_curr_layer->i4_wd,
9373
0
        ps_curr_layer->i4_ht,
9374
0
        s_search_prms_blk.i4_num_steps_post_refine);
9375
9376
    /*************************************************************************/
9377
    /* set the MV limit per ref. pic.                                        */
9378
    /*    - P pic. : Based on the config params.                             */
9379
    /*    - B/b pic: Based on the Max/Min MV from prev. P and config. param. */
9380
    /*************************************************************************/
9381
0
    {
9382
0
        WORD32 ref_ctr;
9383
        /* Only for B/b pic. */
9384
0
        if(1 == ps_ctxt->s_frm_prms.bidir_enabled)
9385
0
        {
9386
0
            WORD16 i2_mv_y_per_poc, i2_max_mv_y;
9387
0
            WORD32 cur_poc, ref_poc, abs_poc_diff;
9388
9389
0
            cur_poc = ps_ctxt->i4_curr_poc;
9390
9391
            /* Get abs MAX for symmetric search */
9392
0
            i2_mv_y_per_poc = MAX(
9393
0
                ps_ctxt->s_coarse_dyn_range_prms.i2_dyn_max_y_per_poc[ps_refine_prms->i4_layer_id],
9394
0
                (ABS(ps_ctxt->s_coarse_dyn_range_prms
9395
0
                         .i2_dyn_min_y_per_poc[ps_refine_prms->i4_layer_id])));
9396
9397
0
            for(ref_ctr = 0; ref_ctr < i4_num_ref_fpel; ref_ctr++)
9398
0
            {
9399
0
                ref_poc = ps_ctxt->ai4_ref_idx_to_poc_lc[ref_ctr];
9400
0
                abs_poc_diff = ABS((cur_poc - ref_poc));
9401
                /* Get the cur. max MV based on POC distance */
9402
0
                i2_max_mv_y = i2_mv_y_per_poc * abs_poc_diff;
9403
0
                i2_max_mv_y = MIN(i2_max_mv_y, ps_curr_layer->i2_max_mv_y);
9404
9405
0
                as_mv_limit[ref_ctr].i2_min_x = -ps_curr_layer->i2_max_mv_x;
9406
0
                as_mv_limit[ref_ctr].i2_min_y = -i2_max_mv_y;
9407
0
                as_mv_limit[ref_ctr].i2_max_x = ps_curr_layer->i2_max_mv_x;
9408
0
                as_mv_limit[ref_ctr].i2_max_y = i2_max_mv_y;
9409
0
            }
9410
0
        }
9411
0
        else
9412
0
        {
9413
            /* Set the Config. File Params for P pic. */
9414
0
            for(ref_ctr = 0; ref_ctr < i4_num_ref_fpel; ref_ctr++)
9415
0
            {
9416
0
                as_mv_limit[ref_ctr].i2_min_x = -ps_curr_layer->i2_max_mv_x;
9417
0
                as_mv_limit[ref_ctr].i2_min_y = -ps_curr_layer->i2_max_mv_y;
9418
0
                as_mv_limit[ref_ctr].i2_max_x = ps_curr_layer->i2_max_mv_x;
9419
0
                as_mv_limit[ref_ctr].i2_max_y = ps_curr_layer->i2_max_mv_y;
9420
0
            }
9421
0
        }
9422
0
    }
9423
9424
    /* EIID: Calculate threshold based on quality preset and/or temporal layers */
9425
0
    if(e_me_quality_presets == ME_MEDIUM_SPEED)
9426
0
    {
9427
0
        i4_threshold_multiplier = 1;
9428
0
        i4_threshold_divider = 4;
9429
0
    }
9430
0
    else if(e_me_quality_presets == ME_HIGH_SPEED)
9431
0
    {
9432
0
        i4_threshold_multiplier = 1;
9433
0
        i4_threshold_divider = 2;
9434
0
    }
9435
0
    else if((e_me_quality_presets == ME_XTREME_SPEED) || (e_me_quality_presets == ME_XTREME_SPEED_25))
9436
0
    {
9437
#if OLD_XTREME_SPEED
9438
        /* Hard coding the temporal ID value to 1, if it is older xtreme speed */
9439
        i4_temporal_layer = 1;
9440
#endif
9441
0
        if(i4_temporal_layer == 0)
9442
0
        {
9443
0
            i4_threshold_multiplier = 3;
9444
0
            i4_threshold_divider = 4;
9445
0
        }
9446
0
        else if(i4_temporal_layer == 1)
9447
0
        {
9448
0
            i4_threshold_multiplier = 3;
9449
0
            i4_threshold_divider = 4;
9450
0
        }
9451
0
        else if(i4_temporal_layer == 2)
9452
0
        {
9453
0
            i4_threshold_multiplier = 1;
9454
0
            i4_threshold_divider = 1;
9455
0
        }
9456
0
        else
9457
0
        {
9458
0
            i4_threshold_multiplier = 5;
9459
0
            i4_threshold_divider = 4;
9460
0
        }
9461
0
    }
9462
0
    else if(e_me_quality_presets == ME_HIGH_QUALITY)
9463
0
    {
9464
0
        i4_threshold_multiplier = 1;
9465
0
        i4_threshold_divider = 1;
9466
0
    }
9467
9468
    /*************************************************************************/
9469
    /*************************************************************************/
9470
    /*************************************************************************/
9471
    /* START OF THE CORE LOOP                                                */
9472
    /* If Encode is 0, then we just loop over each blk                       */
9473
    /*************************************************************************/
9474
    /*************************************************************************/
9475
    /*************************************************************************/
9476
0
    while(0 == end_of_frame)
9477
0
    {
9478
0
        job_queue_t *ps_job;
9479
0
        ihevce_ed_blk_t *ps_ed_blk_ctxt_curr_row;  //EIID
9480
0
        WORD32 i4_ctb_row_ctr;  //counter to calculate CTB row counter. It's (row_ctr /4)
9481
0
        WORD32 i4_num_ctbs_in_row = (num_blks_in_row + 3) / 4;  //calculations verified for L1 only
9482
        //+3 to get ceil values when divided by 4
9483
0
        WORD32 i4_num_4x4_blocks_in_ctb_at_l1 =
9484
0
            8 * 8;  //considering CTB size 32x32 at L1. hardcoded for now
9485
        //if there is variable for ctb size use that and this variable can be derived
9486
0
        WORD32 offset_val, check_dep_pos, set_dep_pos;
9487
0
        void *pv_hme_dep_mngr;
9488
0
        ihevce_ed_ctb_l1_t *ps_ed_ctb_l1_row;
9489
9490
        /* Get the current layer HME Dep Mngr       */
9491
        /* Note : Use layer_id - 1 in HME layers    */
9492
9493
0
        pv_hme_dep_mngr = ppv_dep_mngr_hme_sync[ps_refine_prms->i4_layer_id - 1];
9494
9495
        /* Get the current row from the job queue */
9496
0
        ps_job = (job_queue_t *)ihevce_pre_enc_grp_get_next_job(
9497
0
            ps_multi_thrd_ctxt, lyr_job_type, 1, i4_ping_pong);
9498
9499
        /* If all rows are done, set the end of process flag to 1, */
9500
        /* and the current row to -1 */
9501
0
        if(NULL == ps_job)
9502
0
        {
9503
0
            blk_y = -1;
9504
0
            end_of_frame = 1;
9505
9506
0
            continue;
9507
0
        }
9508
9509
0
        if(1 == ps_ctxt->s_frm_prms.is_i_pic)
9510
0
        {
9511
            /* set the output dependency of current row */
9512
0
            ihevce_pre_enc_grp_job_set_out_dep(ps_multi_thrd_ctxt, ps_job, i4_ping_pong);
9513
0
            continue;
9514
0
        }
9515
9516
0
        blk_y = ps_job->s_job_info.s_me_job_info.i4_vert_unit_row_no;
9517
0
        blk_x = 0;
9518
0
        i4_ctb_x = 0;
9519
9520
        /* wait for Corresponding Pre intra Job to be completed */
9521
0
        if(1 == ps_refine_prms->i4_layer_id)
9522
0
        {
9523
0
            volatile UWORD32 i4_l1_done;
9524
0
            volatile UWORD32 *pi4_l1_done;
9525
0
            pi4_l1_done = (volatile UWORD32 *)&ps_multi_thrd_ctxt
9526
0
                              ->aai4_l1_pre_intra_done[i4_ping_pong][blk_y >> 2];
9527
0
            i4_l1_done = *pi4_l1_done;
9528
0
            while(!i4_l1_done)
9529
0
            {
9530
0
                i4_l1_done = *pi4_l1_done;
9531
0
            }
9532
0
        }
9533
        /* Set Variables for Dep. Checking and Setting */
9534
0
        set_dep_pos = blk_y + 1;
9535
0
        if(blk_y > 0)
9536
0
        {
9537
0
            offset_val = 2;
9538
0
            check_dep_pos = blk_y - 1;
9539
0
        }
9540
0
        else
9541
0
        {
9542
            /* First row should run without waiting */
9543
0
            offset_val = -1;
9544
0
            check_dep_pos = 0;
9545
0
        }
9546
9547
        /* EIID: calculate ed_blk_ctxt pointer for current row */
9548
        /* valid for only layer-1. not varified and used for other layers */
9549
0
        i4_ctb_row_ctr = blk_y / 4;
9550
0
        ps_ed_blk_ctxt_curr_row =
9551
0
            ps_ctxt->ps_ed_blk + (i4_ctb_row_ctr * i4_num_ctbs_in_row *
9552
0
                                  i4_num_4x4_blocks_in_ctb_at_l1);  //valid for L1 only
9553
0
        ps_ed_ctb_l1_row = ps_ctxt->ps_ed_ctb_l1 + (i4_ctb_row_ctr * i4_num_ctbs_in_row);
9554
9555
        /* if non-encode layer then i4_ctb_x will be same as blk_x */
9556
        /* loop over all the units is a row                        */
9557
0
        for(; i4_ctb_x < num_sync_units_in_row; i4_ctb_x++)
9558
0
        {
9559
0
            ihevce_ed_blk_t *ps_ed_blk_ctxt_curr_ctb;  //EIDD
9560
0
            ihevce_ed_ctb_l1_t *ps_ed_ctb_l1_curr;
9561
0
            WORD32 i4_ctb_blk_ctr = i4_ctb_x / 4;
9562
9563
            /* Wait till top row block is processed   */
9564
            /* Currently checking till top right block*/
9565
9566
            /* Disabled since all candidates, except for */
9567
            /* L and C, are projected from the coarser layer, */
9568
            /* only in ME_HIGH_SPEED mode */
9569
0
            if((ME_MEDIUM_SPEED > e_me_quality_presets))
9570
0
            {
9571
0
                if(i4_ctb_x < (num_sync_units_in_row - 1))
9572
0
                {
9573
0
                    ihevce_dmgr_chk_row_row_sync(
9574
0
                        pv_hme_dep_mngr,
9575
0
                        i4_ctb_x,
9576
0
                        offset_val,
9577
0
                        check_dep_pos,
9578
0
                        0, /* Col Tile No. : Not supported in PreEnc*/
9579
0
                        ps_ctxt->thrd_id);
9580
0
                }
9581
0
            }
9582
9583
0
            {
9584
                /* for non encoder layer only one block is processed */
9585
0
                num_blks_in_this_ctb = 1;
9586
0
            }
9587
9588
            /* EIID: derive ed_ctxt ptr for current CTB */
9589
0
            ps_ed_blk_ctxt_curr_ctb =
9590
0
                ps_ed_blk_ctxt_curr_row +
9591
0
                (i4_ctb_blk_ctr *
9592
0
                 i4_num_4x4_blocks_in_ctb_at_l1);  //currently valid for l1 layer only
9593
0
            ps_ed_ctb_l1_curr = ps_ed_ctb_l1_row + i4_ctb_blk_ctr;
9594
9595
            /* loop over all the blocks in CTB will always be 1 */
9596
0
            for(blk_id_in_ctb = 0; blk_id_in_ctb < num_blks_in_this_ctb; blk_id_in_ctb++)
9597
0
            {
9598
0
                {
9599
                    /* non encode layer */
9600
0
                    blk_x = i4_ctb_x;
9601
0
                    blk_id_in_full_ctb = 0;
9602
0
                    s_search_prms_blk.i4_cu_x_off = s_search_prms_blk.i4_cu_y_off = 0;
9603
0
                }
9604
9605
                /* get the current input blk point */
9606
0
                pos_x = blk_x << blk_size_shift;
9607
0
                pos_y = blk_y << blk_size_shift;
9608
0
                pu1_inp = ps_curr_layer->pu1_inp + pos_x + (pos_y * i4_inp_stride);
9609
9610
                /*********************************************************************/
9611
                /* replicate the inp buffer at blk or ctb level for each ref id,     */
9612
                /* Instead of searching with wk * ref(k), we search with Ik = I / wk */
9613
                /* thereby avoiding a bloat up of memory. If we did all references   */
9614
                /* weighted pred, we will end up with a duplicate copy of each ref   */
9615
                /* at each layer, since we need to preserve the original reference.  */
9616
                /* ToDo: Need to observe performance with this mechanism and compare */
9617
                /* with case where ref is weighted.                                  */
9618
                /*********************************************************************/
9619
0
                if(blk_id_in_ctb == 0)
9620
0
                {
9621
0
                    fp_get_wt_inp(
9622
0
                        ps_curr_layer,
9623
0
                        &ps_ctxt->s_wt_pred,
9624
0
                        unit_size,
9625
0
                        pos_x,
9626
0
                        pos_y,
9627
0
                        unit_size,
9628
0
                        ps_ctxt->num_ref_future + ps_ctxt->num_ref_past,
9629
0
                        ps_ctxt->i4_wt_pred_enable_flag);
9630
0
                }
9631
9632
0
                s_search_prms_blk.i4_x_off = blk_x << blk_size_shift;
9633
0
                s_search_prms_blk.i4_y_off = blk_y << blk_size_shift;
9634
                /* Select search results from a suitable search result in the context */
9635
0
                {
9636
0
                    ps_search_results = &ps_ctxt->s_search_results_8x8;
9637
0
                }
9638
9639
0
                s_search_prms_blk.ps_search_results = ps_search_results;
9640
9641
                /* RESET ALL SEARCH RESULTS FOR THE NEW BLK */
9642
0
                hme_reset_search_results(
9643
0
                    ps_search_results, s_search_prms_blk.i4_part_mask, MV_RES_FPEL);
9644
9645
                /* Loop across different Ref IDx */
9646
0
                for(i1_ref_idx = 0; i1_ref_idx < i4_num_ref_fpel; i1_ref_idx++)
9647
0
                {
9648
0
                    S32 next_blk_offset = (e_search_blk_size == BLK_16x16) ? 22 : 12;
9649
0
                    S32 prev_blk_offset = 6;
9650
0
                    S32 resultid;
9651
9652
                    /*********************************************************************/
9653
                    /* For every blk in the picture, the search range needs to be derived*/
9654
                    /* Any blk can have any mv, but practical search constraints are     */
9655
                    /* imposed by the picture boundary and amt of padding.               */
9656
                    /*********************************************************************/
9657
                    /* MV limit is different based on ref. PIC */
9658
0
                    hme_derive_search_range(
9659
0
                        &s_range_prms_inp,
9660
0
                        &s_pic_limit_inp,
9661
0
                        &as_mv_limit[i1_ref_idx],
9662
0
                        pos_x,
9663
0
                        pos_y,
9664
0
                        blk_wd,
9665
0
                        blk_ht);
9666
0
                    hme_derive_search_range(
9667
0
                        &s_range_prms_rec,
9668
0
                        &s_pic_limit_rec,
9669
0
                        &as_mv_limit[i1_ref_idx],
9670
0
                        pos_x,
9671
0
                        pos_y,
9672
0
                        blk_wd,
9673
0
                        blk_ht);
9674
9675
0
                    s_search_prms_blk.i1_ref_idx = i1_ref_idx;
9676
0
                    ps_candt_zeromv->i1_ref_idx = i1_ref_idx;
9677
9678
0
                    i4_num_srch_cands = 1;
9679
9680
0
                    if(1 != ps_refine_prms->i4_layer_id)
9681
0
                    {
9682
0
                        S32 x, y;
9683
0
                        x = gau1_encode_to_raster_x[blk_id_in_full_ctb];
9684
0
                        y = gau1_encode_to_raster_y[blk_id_in_full_ctb];
9685
9686
0
                        if(ME_MEDIUM_SPEED > e_me_quality_presets)
9687
0
                        {
9688
0
                            hme_get_spatial_candt(
9689
0
                                ps_curr_layer,
9690
0
                                e_search_blk_size,
9691
0
                                blk_x,
9692
0
                                blk_y,
9693
0
                                i1_ref_idx,
9694
0
                                &as_top_neighbours[0],
9695
0
                                &as_left_neighbours[0],
9696
0
                                0,
9697
0
                                ((ps_refine_prms->i4_encode) ? gau1_cu_tr_valid[y][x] : 1),
9698
0
                                0,
9699
0
                                ps_refine_prms->i4_encode);
9700
9701
0
                            *ps_candt_tr = as_top_neighbours[3];
9702
0
                            *ps_candt_t = as_top_neighbours[1];
9703
0
                            *ps_candt_tl = as_top_neighbours[0];
9704
0
                            i4_num_srch_cands += 3;
9705
0
                        }
9706
0
                        else
9707
0
                        {
9708
0
                            layer_mv_t *ps_layer_mvbank = ps_curr_layer->ps_layer_mvbank;
9709
0
                            S32 i4_blk_size1 = gau1_blk_size_to_wd[ps_layer_mvbank->e_blk_size];
9710
0
                            S32 i4_blk_size2 = gau1_blk_size_to_wd[e_search_blk_size];
9711
0
                            search_node_t *ps_search_node;
9712
0
                            S32 i4_offset, blk_x_temp = blk_x, blk_y_temp = blk_y;
9713
0
                            hme_mv_t *ps_mv, *ps_mv_base;
9714
0
                            S08 *pi1_ref_idx, *pi1_ref_idx_base;
9715
0
                            S32 jump = 1, mvs_in_blk, mvs_in_row;
9716
0
                            S32 shift = (ps_refine_prms->i4_encode ? 2 : 0);
9717
9718
0
                            if(i4_blk_size1 != i4_blk_size2)
9719
0
                            {
9720
0
                                blk_x_temp <<= 1;
9721
0
                                blk_y_temp <<= 1;
9722
0
                                jump = 2;
9723
0
                                if((i4_blk_size1 << 2) == i4_blk_size2)
9724
0
                                {
9725
0
                                    blk_x_temp <<= 1;
9726
0
                                    blk_y_temp <<= 1;
9727
0
                                    jump = 4;
9728
0
                                }
9729
0
                            }
9730
9731
0
                            mvs_in_blk = ps_layer_mvbank->i4_num_mvs_per_blk;
9732
0
                            mvs_in_row = ps_layer_mvbank->i4_num_mvs_per_row;
9733
9734
                            /* Adjust teh blk coord to point to top left locn */
9735
0
                            blk_x_temp -= 1;
9736
0
                            blk_y_temp -= 1;
9737
9738
                            /* Pick up the mvs from the location */
9739
0
                            i4_offset = (blk_x_temp * ps_layer_mvbank->i4_num_mvs_per_blk);
9740
0
                            i4_offset += (ps_layer_mvbank->i4_num_mvs_per_row * blk_y_temp);
9741
9742
0
                            ps_mv = ps_layer_mvbank->ps_mv + i4_offset;
9743
0
                            pi1_ref_idx = ps_layer_mvbank->pi1_ref_idx + i4_offset;
9744
9745
0
                            ps_mv += (i1_ref_idx * ps_layer_mvbank->i4_num_mvs_per_ref);
9746
0
                            pi1_ref_idx += (i1_ref_idx * ps_layer_mvbank->i4_num_mvs_per_ref);
9747
9748
0
                            ps_mv_base = ps_mv;
9749
0
                            pi1_ref_idx_base = pi1_ref_idx;
9750
9751
0
                            ps_search_node = &as_left_neighbours[0];
9752
0
                            ps_mv = ps_mv_base + mvs_in_row;
9753
0
                            pi1_ref_idx = pi1_ref_idx_base + mvs_in_row;
9754
0
                            COPY_MV_TO_SEARCH_NODE(
9755
0
                                ps_search_node, ps_mv, pi1_ref_idx, i1_ref_idx, shift);
9756
9757
0
                            i4_num_srch_cands++;
9758
0
                        }
9759
0
                    }
9760
0
                    else
9761
0
                    {
9762
0
                        S32 x, y;
9763
0
                        x = gau1_encode_to_raster_x[blk_id_in_full_ctb];
9764
0
                        y = gau1_encode_to_raster_y[blk_id_in_full_ctb];
9765
9766
0
                        if(ME_MEDIUM_SPEED > e_me_quality_presets)
9767
0
                        {
9768
0
                            hme_get_spatial_candt_in_l1_me(
9769
0
                                ps_curr_layer,
9770
0
                                e_search_blk_size,
9771
0
                                blk_x,
9772
0
                                blk_y,
9773
0
                                i1_ref_idx,
9774
0
                                !ps_search_results->pu1_is_past[i1_ref_idx],
9775
0
                                &as_top_neighbours[0],
9776
0
                                &as_left_neighbours[0],
9777
0
                                0,
9778
0
                                ((ps_refine_prms->i4_encode) ? gau1_cu_tr_valid[y][x] : 1),
9779
0
                                0,
9780
0
                                ps_ctxt->s_frm_prms.u1_num_active_ref_l0,
9781
0
                                ps_ctxt->s_frm_prms.u1_num_active_ref_l1);
9782
9783
0
                            *ps_candt_tr = as_top_neighbours[3];
9784
0
                            *ps_candt_t = as_top_neighbours[1];
9785
0
                            *ps_candt_tl = as_top_neighbours[0];
9786
9787
0
                            i4_num_srch_cands += 3;
9788
0
                        }
9789
0
                        else
9790
0
                        {
9791
0
                            layer_mv_t *ps_layer_mvbank = ps_curr_layer->ps_layer_mvbank;
9792
0
                            S32 i4_blk_size1 = gau1_blk_size_to_wd[ps_layer_mvbank->e_blk_size];
9793
0
                            S32 i4_blk_size2 = gau1_blk_size_to_wd[e_search_blk_size];
9794
0
                            S32 i4_mv_pos_in_implicit_array;
9795
0
                            search_node_t *ps_search_node;
9796
0
                            S32 i4_offset, blk_x_temp = blk_x, blk_y_temp = blk_y;
9797
0
                            hme_mv_t *ps_mv, *ps_mv_base;
9798
0
                            S08 *pi1_ref_idx, *pi1_ref_idx_base;
9799
0
                            S32 jump = 1, mvs_in_blk, mvs_in_row;
9800
0
                            S32 shift = (ps_refine_prms->i4_encode ? 2 : 0);
9801
0
                            U08 u1_pred_dir = !ps_search_results->pu1_is_past[i1_ref_idx];
9802
0
                            S32 i4_num_results_in_given_dir =
9803
0
                                ((u1_pred_dir == 1) ? (ps_layer_mvbank->i4_num_mvs_per_ref *
9804
0
                                                       ps_ctxt->s_frm_prms.u1_num_active_ref_l1)
9805
0
                                                    : (ps_layer_mvbank->i4_num_mvs_per_ref *
9806
0
                                                       ps_ctxt->s_frm_prms.u1_num_active_ref_l0));
9807
9808
0
                            if(i4_blk_size1 != i4_blk_size2)
9809
0
                            {
9810
0
                                blk_x_temp <<= 1;
9811
0
                                blk_y_temp <<= 1;
9812
0
                                jump = 2;
9813
0
                                if((i4_blk_size1 << 2) == i4_blk_size2)
9814
0
                                {
9815
0
                                    blk_x_temp <<= 1;
9816
0
                                    blk_y_temp <<= 1;
9817
0
                                    jump = 4;
9818
0
                                }
9819
0
                            }
9820
9821
0
                            mvs_in_blk = ps_layer_mvbank->i4_num_mvs_per_blk;
9822
0
                            mvs_in_row = ps_layer_mvbank->i4_num_mvs_per_row;
9823
9824
                            /* Adjust teh blk coord to point to top left locn */
9825
0
                            blk_x_temp -= 1;
9826
0
                            blk_y_temp -= 1;
9827
9828
                            /* Pick up the mvs from the location */
9829
0
                            i4_offset = (blk_x_temp * ps_layer_mvbank->i4_num_mvs_per_blk);
9830
0
                            i4_offset += (ps_layer_mvbank->i4_num_mvs_per_row * blk_y_temp);
9831
9832
0
                            i4_offset +=
9833
0
                                ((u1_pred_dir == 1) ? (ps_layer_mvbank->i4_num_mvs_per_ref *
9834
0
                                                       ps_ctxt->s_frm_prms.u1_num_active_ref_l0)
9835
0
                                                    : 0);
9836
9837
0
                            ps_mv = ps_layer_mvbank->ps_mv + i4_offset;
9838
0
                            pi1_ref_idx = ps_layer_mvbank->pi1_ref_idx + i4_offset;
9839
9840
0
                            ps_mv_base = ps_mv;
9841
0
                            pi1_ref_idx_base = pi1_ref_idx;
9842
9843
0
                            {
9844
                                /* ps_mv and pi1_ref_idx now point to the top left locn */
9845
0
                                ps_search_node = &as_left_neighbours[0];
9846
0
                                ps_mv = ps_mv_base + mvs_in_row;
9847
0
                                pi1_ref_idx = pi1_ref_idx_base + mvs_in_row;
9848
9849
0
                                i4_mv_pos_in_implicit_array =
9850
0
                                    hme_find_pos_of_implicitly_stored_ref_id(
9851
0
                                        pi1_ref_idx, i1_ref_idx, 0, i4_num_results_in_given_dir);
9852
9853
0
                                if(-1 != i4_mv_pos_in_implicit_array)
9854
0
                                {
9855
0
                                    COPY_MV_TO_SEARCH_NODE(
9856
0
                                        ps_search_node,
9857
0
                                        &ps_mv[i4_mv_pos_in_implicit_array],
9858
0
                                        &pi1_ref_idx[i4_mv_pos_in_implicit_array],
9859
0
                                        i1_ref_idx,
9860
0
                                        shift);
9861
0
                                }
9862
0
                                else
9863
0
                                {
9864
0
                                    ps_search_node->u1_is_avail = 0;
9865
0
                                    ps_search_node->s_mv.i2_mvx = 0;
9866
0
                                    ps_search_node->s_mv.i2_mvy = 0;
9867
0
                                    ps_search_node->i1_ref_idx = i1_ref_idx;
9868
0
                                }
9869
9870
0
                                i4_num_srch_cands++;
9871
0
                            }
9872
0
                        }
9873
0
                    }
9874
9875
0
                    *ps_candt_l = as_left_neighbours[0];
9876
9877
                    /* when 16x16 is searched in an encode layer, and the prev layer */
9878
                    /* stores results for 4x4 blks, we project 5 candts corresponding */
9879
                    /* to (2,2), (2,14), (14,2), 14,14) and 2nd best of (2,2) */
9880
                    /* However in other cases, only 2,2 best and 2nd best reqd */
9881
0
                    resultid = 0;
9882
0
                    pf_hme_project_coloc_candt(
9883
0
                        ps_candt_prj_coloc[0],
9884
0
                        ps_curr_layer,
9885
0
                        ps_coarse_layer,
9886
0
                        pos_x + 2,
9887
0
                        pos_y + 2,
9888
0
                        i1_ref_idx,
9889
0
                        resultid);
9890
9891
0
                    i4_num_srch_cands++;
9892
9893
0
                    resultid = 1;
9894
0
                    if(num_results_prev_layer > 1)
9895
0
                    {
9896
0
                        pf_hme_project_coloc_candt(
9897
0
                            ps_candt_prj_coloc[1],
9898
0
                            ps_curr_layer,
9899
0
                            ps_coarse_layer,
9900
0
                            pos_x + 2,
9901
0
                            pos_y + 2,
9902
0
                            i1_ref_idx,
9903
0
                            resultid);
9904
9905
0
                        i4_num_srch_cands++;
9906
0
                    }
9907
9908
0
                    resultid = 0;
9909
9910
0
                    if(ME_MEDIUM_SPEED <= e_me_quality_presets)
9911
0
                    {
9912
0
                        pf_hme_project_coloc_candt(
9913
0
                            ps_candt_prj_t[0],
9914
0
                            ps_curr_layer,
9915
0
                            ps_coarse_layer,
9916
0
                            pos_x,
9917
0
                            pos_y - prev_blk_offset,
9918
0
                            i1_ref_idx,
9919
0
                            resultid);
9920
9921
0
                        i4_num_srch_cands++;
9922
0
                    }
9923
9924
0
                    {
9925
0
                        pf_hme_project_coloc_candt(
9926
0
                            ps_candt_prj_br[0],
9927
0
                            ps_curr_layer,
9928
0
                            ps_coarse_layer,
9929
0
                            pos_x + next_blk_offset,
9930
0
                            pos_y + next_blk_offset,
9931
0
                            i1_ref_idx,
9932
0
                            resultid);
9933
0
                        pf_hme_project_coloc_candt(
9934
0
                            ps_candt_prj_bl[0],
9935
0
                            ps_curr_layer,
9936
0
                            ps_coarse_layer,
9937
0
                            pos_x - prev_blk_offset,
9938
0
                            pos_y + next_blk_offset,
9939
0
                            i1_ref_idx,
9940
0
                            resultid);
9941
0
                        pf_hme_project_coloc_candt(
9942
0
                            ps_candt_prj_r[0],
9943
0
                            ps_curr_layer,
9944
0
                            ps_coarse_layer,
9945
0
                            pos_x + next_blk_offset,
9946
0
                            pos_y,
9947
0
                            i1_ref_idx,
9948
0
                            resultid);
9949
0
                        pf_hme_project_coloc_candt(
9950
0
                            ps_candt_prj_b[0],
9951
0
                            ps_curr_layer,
9952
0
                            ps_coarse_layer,
9953
0
                            pos_x,
9954
0
                            pos_y + next_blk_offset,
9955
0
                            i1_ref_idx,
9956
0
                            resultid);
9957
9958
0
                        i4_num_srch_cands += 4;
9959
9960
0
                        if(ME_MEDIUM_SPEED <= e_me_quality_presets)
9961
0
                        {
9962
0
                            pf_hme_project_coloc_candt(
9963
0
                                ps_candt_prj_tr[0],
9964
0
                                ps_curr_layer,
9965
0
                                ps_coarse_layer,
9966
0
                                pos_x + next_blk_offset,
9967
0
                                pos_y - prev_blk_offset,
9968
0
                                i1_ref_idx,
9969
0
                                resultid);
9970
0
                            pf_hme_project_coloc_candt(
9971
0
                                ps_candt_prj_tl[0],
9972
0
                                ps_curr_layer,
9973
0
                                ps_coarse_layer,
9974
0
                                pos_x - prev_blk_offset,
9975
0
                                pos_y - prev_blk_offset,
9976
0
                                i1_ref_idx,
9977
0
                                resultid);
9978
9979
0
                            i4_num_srch_cands += 2;
9980
0
                        }
9981
0
                    }
9982
0
                    if((num_results_prev_layer > 1) && (e_search_complexity >= SEARCH_CX_MED))
9983
0
                    {
9984
0
                        resultid = 1;
9985
0
                        pf_hme_project_coloc_candt(
9986
0
                            ps_candt_prj_br[1],
9987
0
                            ps_curr_layer,
9988
0
                            ps_coarse_layer,
9989
0
                            pos_x + next_blk_offset,
9990
0
                            pos_y + next_blk_offset,
9991
0
                            i1_ref_idx,
9992
0
                            resultid);
9993
0
                        pf_hme_project_coloc_candt(
9994
0
                            ps_candt_prj_bl[1],
9995
0
                            ps_curr_layer,
9996
0
                            ps_coarse_layer,
9997
0
                            pos_x - prev_blk_offset,
9998
0
                            pos_y + next_blk_offset,
9999
0
                            i1_ref_idx,
10000
0
                            resultid);
10001
0
                        pf_hme_project_coloc_candt(
10002
0
                            ps_candt_prj_r[1],
10003
0
                            ps_curr_layer,
10004
0
                            ps_coarse_layer,
10005
0
                            pos_x + next_blk_offset,
10006
0
                            pos_y,
10007
0
                            i1_ref_idx,
10008
0
                            resultid);
10009
0
                        pf_hme_project_coloc_candt(
10010
0
                            ps_candt_prj_b[1],
10011
0
                            ps_curr_layer,
10012
0
                            ps_coarse_layer,
10013
0
                            pos_x,
10014
0
                            pos_y + next_blk_offset,
10015
0
                            i1_ref_idx,
10016
0
                            resultid);
10017
10018
0
                        i4_num_srch_cands += 4;
10019
10020
0
                        pf_hme_project_coloc_candt(
10021
0
                            ps_candt_prj_tr[1],
10022
0
                            ps_curr_layer,
10023
0
                            ps_coarse_layer,
10024
0
                            pos_x + next_blk_offset,
10025
0
                            pos_y - prev_blk_offset,
10026
0
                            i1_ref_idx,
10027
0
                            resultid);
10028
0
                        pf_hme_project_coloc_candt(
10029
0
                            ps_candt_prj_tl[1],
10030
0
                            ps_curr_layer,
10031
0
                            ps_coarse_layer,
10032
0
                            pos_x - prev_blk_offset,
10033
0
                            pos_y - prev_blk_offset,
10034
0
                            i1_ref_idx,
10035
0
                            resultid);
10036
0
                        pf_hme_project_coloc_candt(
10037
0
                            ps_candt_prj_t[1],
10038
0
                            ps_curr_layer,
10039
0
                            ps_coarse_layer,
10040
0
                            pos_x,
10041
0
                            pos_y - prev_blk_offset,
10042
0
                            i1_ref_idx,
10043
0
                            resultid);
10044
10045
0
                        i4_num_srch_cands += 3;
10046
0
                    }
10047
10048
                    /* Note this block also clips the MV range for all candidates */
10049
#ifdef _DEBUG
10050
                    {
10051
                        S32 candt;
10052
                        range_prms_t *ps_range_prms;
10053
10054
                        S32 num_ref_valid = ps_ctxt->num_ref_future + ps_ctxt->num_ref_past;
10055
                        for(candt = 0; candt < i4_num_srch_cands; candt++)
10056
                        {
10057
                            search_node_t *ps_search_node;
10058
10059
                            ps_search_node =
10060
                                s_search_prms_blk.ps_search_candts[candt].ps_search_node;
10061
10062
                            ps_range_prms = s_search_prms_blk.aps_mv_range[0];
10063
10064
                            if((ps_search_node->i1_ref_idx >= num_ref_valid) ||
10065
                               (ps_search_node->i1_ref_idx < 0))
10066
                            {
10067
                                ASSERT(0);
10068
                            }
10069
                        }
10070
                    }
10071
#endif
10072
10073
0
                    {
10074
0
                        S32 srch_cand;
10075
0
                        S32 num_unique_nodes = 0;
10076
0
                        S32 num_nodes_searched = 0;
10077
0
                        S32 num_best_cand = 0;
10078
0
                        S08 i1_grid_enable = 0;
10079
0
                        search_node_t as_best_two_proj_node[TOT_NUM_PARTS * 2];
10080
                        /* has list of valid partition to search terminated by -1 */
10081
0
                        S32 ai4_valid_part_ids[TOT_NUM_PARTS + 1];
10082
0
                        S32 center_x;
10083
0
                        S32 center_y;
10084
10085
                        /* indicates if the centre point of grid needs to be explicitly added for search */
10086
0
                        S32 add_centre = 0;
10087
10088
0
                        memset(au4_unique_node_map, 0, sizeof(au4_unique_node_map));
10089
0
                        center_x = ps_candt_prj_coloc[0]->s_mv.i2_mvx;
10090
0
                        center_y = ps_candt_prj_coloc[0]->s_mv.i2_mvy;
10091
10092
0
                        for(srch_cand = 0;
10093
0
                            (srch_cand < i4_num_srch_cands) &&
10094
0
                            (num_unique_nodes <= s_search_prms_blk.i4_num_init_candts);
10095
0
                            srch_cand++)
10096
0
                        {
10097
0
                            search_node_t s_search_node_temp =
10098
0
                                s_search_prms_blk.ps_search_candts[srch_cand].ps_search_node[0];
10099
10100
0
                            s_search_node_temp.i1_ref_idx = i1_ref_idx;  //TEMP FIX;
10101
10102
                            /* Clip the motion vectors as well here since after clipping
10103
                            two candidates can become same and they will be removed during deduplication */
10104
0
                            CLIP_MV_WITHIN_RANGE(
10105
0
                                s_search_node_temp.s_mv.i2_mvx,
10106
0
                                s_search_node_temp.s_mv.i2_mvy,
10107
0
                                s_search_prms_blk.aps_mv_range[0],
10108
0
                                ps_refine_prms->i4_num_steps_fpel_refine,
10109
0
                                ps_refine_prms->i4_num_steps_hpel_refine,
10110
0
                                ps_refine_prms->i4_num_steps_qpel_refine);
10111
10112
                            /* PT_C */
10113
0
                            INSERT_NEW_NODE(
10114
0
                                as_unique_search_nodes,
10115
0
                                num_unique_nodes,
10116
0
                                s_search_node_temp,
10117
0
                                0,
10118
0
                                au4_unique_node_map,
10119
0
                                center_x,
10120
0
                                center_y,
10121
0
                                1);
10122
10123
0
                            num_nodes_searched += 1;
10124
0
                        }
10125
0
                        num_unique_nodes =
10126
0
                            MIN(num_unique_nodes, s_search_prms_blk.i4_num_init_candts);
10127
10128
                        /* If number of candidates projected/number of candidates to be refined are more than 2,
10129
                        then filter out and choose the best two here */
10130
0
                        if(num_unique_nodes >= 2)
10131
0
                        {
10132
0
                            S32 num_results;
10133
0
                            S32 cnt;
10134
0
                            S32 *pi4_valid_part_ids;
10135
0
                            s_search_prms_blk.ps_search_nodes = &as_unique_search_nodes[0];
10136
0
                            s_search_prms_blk.i4_num_search_nodes = num_unique_nodes;
10137
0
                            pi4_valid_part_ids = &ai4_valid_part_ids[0];
10138
10139
                            /* pi4_valid_part_ids is updated inside */
10140
0
                            hme_pred_search_no_encode(
10141
0
                                &s_search_prms_blk,
10142
0
                                ps_curr_layer,
10143
0
                                &ps_ctxt->s_wt_pred,
10144
0
                                pi4_valid_part_ids,
10145
0
                                1,
10146
0
                                e_me_quality_presets,
10147
0
                                i1_grid_enable,
10148
0
                                (ihevce_me_optimised_function_list_t *)
10149
0
                                    ps_ctxt->pv_me_optimised_function_list
10150
10151
0
                            );
10152
10153
0
                            num_best_cand = 0;
10154
0
                            cnt = 0;
10155
0
                            num_results = ps_search_results->u1_num_results_per_part;
10156
10157
0
                            while((id = pi4_valid_part_ids[cnt++]) >= 0)
10158
0
                            {
10159
0
                                num_results =
10160
0
                                    MIN(ps_refine_prms->pu1_num_best_results[id], num_results);
10161
10162
0
                                for(i = 0; i < num_results; i++)
10163
0
                                {
10164
0
                                    search_node_t s_search_node_temp;
10165
0
                                    s_search_node_temp =
10166
0
                                        *(ps_search_results->aps_part_results[i1_ref_idx][id] + i);
10167
0
                                    if(s_search_node_temp.i1_ref_idx >= 0)
10168
0
                                    {
10169
0
                                        INSERT_NEW_NODE_NOMAP(
10170
0
                                            as_best_two_proj_node,
10171
0
                                            num_best_cand,
10172
0
                                            s_search_node_temp,
10173
0
                                            0);
10174
0
                                    }
10175
0
                                }
10176
0
                            }
10177
0
                        }
10178
0
                        else
10179
0
                        {
10180
0
                            add_centre = 1;
10181
0
                            num_best_cand = num_unique_nodes;
10182
0
                            as_best_two_proj_node[0] = as_unique_search_nodes[0];
10183
0
                        }
10184
10185
0
                        num_unique_nodes = 0;
10186
0
                        num_nodes_searched = 0;
10187
10188
0
                        if(1 == num_best_cand)
10189
0
                        {
10190
0
                            search_node_t s_search_node_temp = as_best_two_proj_node[0];
10191
0
                            S16 i2_mv_x = s_search_node_temp.s_mv.i2_mvx;
10192
0
                            S16 i2_mv_y = s_search_node_temp.s_mv.i2_mvy;
10193
0
                            S08 i1_ref_idx = s_search_node_temp.i1_ref_idx;
10194
10195
0
                            i1_grid_enable = 1;
10196
10197
0
                            as_unique_search_nodes[num_unique_nodes].s_mv.i2_mvx = i2_mv_x - 1;
10198
0
                            as_unique_search_nodes[num_unique_nodes].s_mv.i2_mvy = i2_mv_y - 1;
10199
0
                            as_unique_search_nodes[num_unique_nodes++].i1_ref_idx = i1_ref_idx;
10200
10201
0
                            as_unique_search_nodes[num_unique_nodes].s_mv.i2_mvx = i2_mv_x;
10202
0
                            as_unique_search_nodes[num_unique_nodes].s_mv.i2_mvy = i2_mv_y - 1;
10203
0
                            as_unique_search_nodes[num_unique_nodes++].i1_ref_idx = i1_ref_idx;
10204
10205
0
                            as_unique_search_nodes[num_unique_nodes].s_mv.i2_mvx = i2_mv_x + 1;
10206
0
                            as_unique_search_nodes[num_unique_nodes].s_mv.i2_mvy = i2_mv_y - 1;
10207
0
                            as_unique_search_nodes[num_unique_nodes++].i1_ref_idx = i1_ref_idx;
10208
10209
0
                            as_unique_search_nodes[num_unique_nodes].s_mv.i2_mvx = i2_mv_x - 1;
10210
0
                            as_unique_search_nodes[num_unique_nodes].s_mv.i2_mvy = i2_mv_y;
10211
0
                            as_unique_search_nodes[num_unique_nodes++].i1_ref_idx = i1_ref_idx;
10212
10213
0
                            as_unique_search_nodes[num_unique_nodes].s_mv.i2_mvx = i2_mv_x + 1;
10214
0
                            as_unique_search_nodes[num_unique_nodes].s_mv.i2_mvy = i2_mv_y;
10215
0
                            as_unique_search_nodes[num_unique_nodes++].i1_ref_idx = i1_ref_idx;
10216
10217
0
                            as_unique_search_nodes[num_unique_nodes].s_mv.i2_mvx = i2_mv_x - 1;
10218
0
                            as_unique_search_nodes[num_unique_nodes].s_mv.i2_mvy = i2_mv_y + 1;
10219
0
                            as_unique_search_nodes[num_unique_nodes++].i1_ref_idx = i1_ref_idx;
10220
10221
0
                            as_unique_search_nodes[num_unique_nodes].s_mv.i2_mvx = i2_mv_x;
10222
0
                            as_unique_search_nodes[num_unique_nodes].s_mv.i2_mvy = i2_mv_y + 1;
10223
0
                            as_unique_search_nodes[num_unique_nodes++].i1_ref_idx = i1_ref_idx;
10224
10225
0
                            as_unique_search_nodes[num_unique_nodes].s_mv.i2_mvx = i2_mv_x + 1;
10226
0
                            as_unique_search_nodes[num_unique_nodes].s_mv.i2_mvy = i2_mv_y + 1;
10227
0
                            as_unique_search_nodes[num_unique_nodes++].i1_ref_idx = i1_ref_idx;
10228
10229
0
                            if(add_centre)
10230
0
                            {
10231
0
                                as_unique_search_nodes[num_unique_nodes].s_mv.i2_mvx = i2_mv_x;
10232
0
                                as_unique_search_nodes[num_unique_nodes].s_mv.i2_mvy = i2_mv_y;
10233
0
                                as_unique_search_nodes[num_unique_nodes++].i1_ref_idx = i1_ref_idx;
10234
0
                            }
10235
0
                        }
10236
0
                        else
10237
0
                        {
10238
                            /* For the candidates where refinement was required, choose the best two */
10239
0
                            for(srch_cand = 0; srch_cand < num_best_cand; srch_cand++)
10240
0
                            {
10241
0
                                search_node_t s_search_node_temp = as_best_two_proj_node[srch_cand];
10242
0
                                WORD32 mv_x = s_search_node_temp.s_mv.i2_mvx;
10243
0
                                WORD32 mv_y = s_search_node_temp.s_mv.i2_mvy;
10244
10245
                                /* Because there may not be two best unique candidates (because of clipping),
10246
                                second best candidate can be uninitialized, ignore that */
10247
0
                                if(s_search_node_temp.s_mv.i2_mvx == INTRA_MV ||
10248
0
                                   s_search_node_temp.i1_ref_idx < 0)
10249
0
                                {
10250
0
                                    num_nodes_searched++;
10251
0
                                    continue;
10252
0
                                }
10253
10254
                                /* PT_C */
10255
                                /* Since the center point has already be evaluated and best results are persistent,
10256
                                it will not be evaluated again */
10257
0
                                if(add_centre) /* centre point added explicitly again if search results is not updated */
10258
0
                                {
10259
0
                                    INSERT_NEW_NODE(
10260
0
                                        as_unique_search_nodes,
10261
0
                                        num_unique_nodes,
10262
0
                                        s_search_node_temp,
10263
0
                                        0,
10264
0
                                        au4_unique_node_map,
10265
0
                                        center_x,
10266
0
                                        center_y,
10267
0
                                        1);
10268
0
                                }
10269
10270
                                /* PT_L */
10271
0
                                s_search_node_temp.s_mv.i2_mvx = mv_x - 1;
10272
0
                                s_search_node_temp.s_mv.i2_mvy = mv_y;
10273
0
                                INSERT_NEW_NODE(
10274
0
                                    as_unique_search_nodes,
10275
0
                                    num_unique_nodes,
10276
0
                                    s_search_node_temp,
10277
0
                                    0,
10278
0
                                    au4_unique_node_map,
10279
0
                                    center_x,
10280
0
                                    center_y,
10281
0
                                    1);
10282
10283
                                /* PT_T */
10284
0
                                s_search_node_temp.s_mv.i2_mvx = mv_x;
10285
0
                                s_search_node_temp.s_mv.i2_mvy = mv_y - 1;
10286
0
                                INSERT_NEW_NODE(
10287
0
                                    as_unique_search_nodes,
10288
0
                                    num_unique_nodes,
10289
0
                                    s_search_node_temp,
10290
0
                                    0,
10291
0
                                    au4_unique_node_map,
10292
0
                                    center_x,
10293
0
                                    center_y,
10294
0
                                    1);
10295
10296
                                /* PT_R */
10297
0
                                s_search_node_temp.s_mv.i2_mvx = mv_x + 1;
10298
0
                                s_search_node_temp.s_mv.i2_mvy = mv_y;
10299
0
                                INSERT_NEW_NODE(
10300
0
                                    as_unique_search_nodes,
10301
0
                                    num_unique_nodes,
10302
0
                                    s_search_node_temp,
10303
0
                                    0,
10304
0
                                    au4_unique_node_map,
10305
0
                                    center_x,
10306
0
                                    center_y,
10307
0
                                    1);
10308
10309
                                /* PT_B */
10310
0
                                s_search_node_temp.s_mv.i2_mvx = mv_x;
10311
0
                                s_search_node_temp.s_mv.i2_mvy = mv_y + 1;
10312
0
                                INSERT_NEW_NODE(
10313
0
                                    as_unique_search_nodes,
10314
0
                                    num_unique_nodes,
10315
0
                                    s_search_node_temp,
10316
0
                                    0,
10317
0
                                    au4_unique_node_map,
10318
0
                                    center_x,
10319
0
                                    center_y,
10320
0
                                    1);
10321
10322
                                /* PT_TL */
10323
0
                                s_search_node_temp.s_mv.i2_mvx = mv_x - 1;
10324
0
                                s_search_node_temp.s_mv.i2_mvy = mv_y - 1;
10325
0
                                INSERT_NEW_NODE(
10326
0
                                    as_unique_search_nodes,
10327
0
                                    num_unique_nodes,
10328
0
                                    s_search_node_temp,
10329
0
                                    0,
10330
0
                                    au4_unique_node_map,
10331
0
                                    center_x,
10332
0
                                    center_y,
10333
0
                                    1);
10334
10335
                                /* PT_TR */
10336
0
                                s_search_node_temp.s_mv.i2_mvx = mv_x + 1;
10337
0
                                s_search_node_temp.s_mv.i2_mvy = mv_y - 1;
10338
0
                                INSERT_NEW_NODE(
10339
0
                                    as_unique_search_nodes,
10340
0
                                    num_unique_nodes,
10341
0
                                    s_search_node_temp,
10342
0
                                    0,
10343
0
                                    au4_unique_node_map,
10344
0
                                    center_x,
10345
0
                                    center_y,
10346
0
                                    1);
10347
10348
                                /* PT_BL */
10349
0
                                s_search_node_temp.s_mv.i2_mvx = mv_x - 1;
10350
0
                                s_search_node_temp.s_mv.i2_mvy = mv_y + 1;
10351
0
                                INSERT_NEW_NODE(
10352
0
                                    as_unique_search_nodes,
10353
0
                                    num_unique_nodes,
10354
0
                                    s_search_node_temp,
10355
0
                                    0,
10356
0
                                    au4_unique_node_map,
10357
0
                                    center_x,
10358
0
                                    center_y,
10359
0
                                    1);
10360
10361
                                /* PT_BR */
10362
0
                                s_search_node_temp.s_mv.i2_mvx = mv_x + 1;
10363
0
                                s_search_node_temp.s_mv.i2_mvy = mv_y + 1;
10364
0
                                INSERT_NEW_NODE(
10365
0
                                    as_unique_search_nodes,
10366
0
                                    num_unique_nodes,
10367
0
                                    s_search_node_temp,
10368
0
                                    0,
10369
0
                                    au4_unique_node_map,
10370
0
                                    center_x,
10371
0
                                    center_y,
10372
0
                                    1);
10373
0
                            }
10374
0
                        }
10375
10376
0
                        s_search_prms_blk.ps_search_nodes = &as_unique_search_nodes[0];
10377
0
                        s_search_prms_blk.i4_num_search_nodes = num_unique_nodes;
10378
10379
                        /*****************************************************************/
10380
                        /* Call the search algorithm, this includes:                     */
10381
                        /* Pre-Search-Refinement (for coarse candts)                     */
10382
                        /* Search on each candidate                                      */
10383
                        /* Post Search Refinement on winners/other new candidates        */
10384
                        /*****************************************************************/
10385
10386
0
                        hme_pred_search_no_encode(
10387
0
                            &s_search_prms_blk,
10388
0
                            ps_curr_layer,
10389
0
                            &ps_ctxt->s_wt_pred,
10390
0
                            ai4_valid_part_ids,
10391
0
                            0,
10392
0
                            e_me_quality_presets,
10393
0
                            i1_grid_enable,
10394
0
                            (ihevce_me_optimised_function_list_t *)
10395
0
                                ps_ctxt->pv_me_optimised_function_list);
10396
10397
0
                        i1_grid_enable = 0;
10398
0
                    }
10399
0
                }
10400
10401
                /* for non encode layer update MV and end processing for block */
10402
0
                {
10403
0
                    WORD32 i4_ref_id, min_cost = 0x7fffffff, min_sad = 0;
10404
0
                    search_node_t *ps_search_node;
10405
                    /* now update the reqd results back to the layer mv bank. */
10406
0
                    if(1 == ps_refine_prms->i4_layer_id)
10407
0
                    {
10408
0
                        hme_update_mv_bank_in_l1_me(
10409
0
                            ps_search_results,
10410
0
                            ps_curr_layer->ps_layer_mvbank,
10411
0
                            blk_x,
10412
0
                            blk_y,
10413
0
                            &s_mv_update_prms);
10414
0
                    }
10415
0
                    else
10416
0
                    {
10417
0
                        hme_update_mv_bank_noencode(
10418
0
                            ps_search_results,
10419
0
                            ps_curr_layer->ps_layer_mvbank,
10420
0
                            blk_x,
10421
0
                            blk_y,
10422
0
                            &s_mv_update_prms);
10423
0
                    }
10424
10425
                    /* UPDATE the MIN and MAX MVs for Dynamical Search Range for each ref. pic. */
10426
                    /* Only for P pic. For P, both are 0, I&B has them mut. exclusive */
10427
0
                    if(ps_ctxt->s_frm_prms.is_i_pic == ps_ctxt->s_frm_prms.bidir_enabled)
10428
0
                    {
10429
0
                        WORD32 i4_j;
10430
0
                        layer_mv_t *ps_layer_mv = ps_curr_layer->ps_layer_mvbank;
10431
10432
                        //if (ps_layer_mv->e_blk_size == s_mv_update_prms.e_search_blk_size)
10433
                        /* Not considering this for Dyn. Search Update */
10434
0
                        {
10435
0
                            for(i4_ref_id = 0; i4_ref_id < (S32)s_mv_update_prms.i4_num_ref;
10436
0
                                i4_ref_id++)
10437
0
                            {
10438
0
                                ps_search_node =
10439
0
                                    ps_search_results->aps_part_results[i4_ref_id][PART_ID_2Nx2N];
10440
10441
0
                                for(i4_j = 0; i4_j < ps_layer_mv->i4_num_mvs_per_ref; i4_j++)
10442
0
                                {
10443
0
                                    hme_update_dynamic_search_params(
10444
0
                                        &ps_ctxt->s_coarse_dyn_range_prms
10445
0
                                             .as_dyn_range_prms[ps_refine_prms->i4_layer_id]
10446
0
                                                               [i4_ref_id],
10447
0
                                        ps_search_node->s_mv.i2_mvy);
10448
10449
0
                                    ps_search_node++;
10450
0
                                }
10451
0
                            }
10452
0
                        }
10453
0
                    }
10454
10455
0
                    if(1 == ps_refine_prms->i4_layer_id)
10456
0
                    {
10457
0
                        WORD32 wt_pred_val, log_wt_pred_val;
10458
0
                        WORD32 ref_id_of_nearest_poc = 0;
10459
0
                        WORD32 max_val = 0x7fffffff;
10460
0
                        WORD32 max_l0_val = 0x7fffffff;
10461
0
                        WORD32 max_l1_val = 0x7fffffff;
10462
0
                        WORD32 cur_val;
10463
0
                        WORD32 i4_local_weighted_sad, i4_local_cost_weighted_pred;
10464
10465
0
                        WORD32 bestl0_sad = 0x7fffffff;
10466
0
                        WORD32 bestl1_sad = 0x7fffffff;
10467
0
                        search_node_t *ps_best_l0_blk = NULL, *ps_best_l1_blk = NULL;
10468
10469
0
                        for(i4_ref_id = 0; i4_ref_id < (S32)s_mv_update_prms.i4_num_ref;
10470
0
                            i4_ref_id++)
10471
0
                        {
10472
0
                            wt_pred_val = ps_ctxt->s_wt_pred.a_wpred_wt[i4_ref_id];
10473
0
                            log_wt_pred_val = ps_ctxt->s_wt_pred.wpred_log_wdc;
10474
10475
0
                            ps_search_node =
10476
0
                                ps_search_results->aps_part_results[i4_ref_id][PART_ID_2Nx2N];
10477
10478
0
                            i4_local_weighted_sad = ((ps_search_node->i4_sad * wt_pred_val) +
10479
0
                                                     ((1 << log_wt_pred_val) >> 1)) >>
10480
0
                                                    log_wt_pred_val;
10481
10482
0
                            i4_local_cost_weighted_pred =
10483
0
                                i4_local_weighted_sad +
10484
0
                                (ps_search_node->i4_tot_cost - ps_search_node->i4_sad);
10485
                            //the loop is redundant as the results are already sorted based on total cost
10486
                            //for (i4_j = 0; i4_j < ps_curr_layer->ps_layer_mvbank->i4_num_mvs_per_ref; i4_j++)
10487
0
                            {
10488
0
                                if(i4_local_cost_weighted_pred < min_cost)
10489
0
                                {
10490
0
                                    min_cost = i4_local_cost_weighted_pred;
10491
0
                                    min_sad = i4_local_weighted_sad;
10492
0
                                }
10493
0
                            }
10494
10495
                            /* For P frame, calculate the nearest poc which is either P or I frame*/
10496
0
                            if(ps_ctxt->s_frm_prms.is_i_pic == ps_ctxt->s_frm_prms.bidir_enabled)
10497
0
                            {
10498
0
                                if(-1 != ps_coarse_layer->ai4_ref_id_to_poc_lc[i4_ref_id])
10499
0
                                {
10500
0
                                    cur_val =
10501
0
                                        ABS(ps_ctxt->i4_curr_poc -
10502
0
                                            ps_coarse_layer->ai4_ref_id_to_poc_lc[i4_ref_id]);
10503
0
                                    if(cur_val < max_val)
10504
0
                                    {
10505
0
                                        max_val = cur_val;
10506
0
                                        ref_id_of_nearest_poc = i4_ref_id;
10507
0
                                    }
10508
0
                                }
10509
0
                            }
10510
0
                        }
10511
                        /*Store me cost wrt. to past frame only for P frame  */
10512
0
                        if(ps_ctxt->s_frm_prms.is_i_pic == ps_ctxt->s_frm_prms.bidir_enabled)
10513
0
                        {
10514
0
                            if(-1 != ps_coarse_layer->ai4_ref_id_to_poc_lc[ref_id_of_nearest_poc])
10515
0
                            {
10516
0
                                WORD16 i2_mvx, i2_mvy;
10517
10518
0
                                WORD32 i4_diff_col_ctr = blk_x - (i4_ctb_blk_ctr * 4);
10519
0
                                WORD32 i4_diff_row_ctr = blk_y - (i4_ctb_row_ctr * 4);
10520
0
                                WORD32 z_scan_idx =
10521
0
                                    gau1_raster_scan_to_ctb[i4_diff_row_ctr][i4_diff_col_ctr];
10522
0
                                WORD32 wt, log_wt;
10523
10524
                                /*ASSERT((ps_ctxt->i4_curr_poc - ps_coarse_layer->ai4_ref_id_to_poc_lc[ref_id_of_nearest_poc])
10525
                                <= (1 + ps_ctxt->num_b_frms));*/
10526
10527
                                /*obtain mvx and mvy */
10528
0
                                i2_mvx =
10529
0
                                    ps_search_results
10530
0
                                        ->aps_part_results[ref_id_of_nearest_poc][PART_ID_2Nx2N]
10531
0
                                        ->s_mv.i2_mvx;
10532
0
                                i2_mvy =
10533
0
                                    ps_search_results
10534
0
                                        ->aps_part_results[ref_id_of_nearest_poc][PART_ID_2Nx2N]
10535
0
                                        ->s_mv.i2_mvy;
10536
10537
                                /*register the min cost for l1 me in blk context */
10538
0
                                wt = ps_ctxt->s_wt_pred.a_wpred_wt[ref_id_of_nearest_poc];
10539
0
                                log_wt = ps_ctxt->s_wt_pred.wpred_log_wdc;
10540
10541
                                /*register the min cost for l1 me in blk context */
10542
0
                                ps_ed_ctb_l1_curr->i4_sad_me_for_ref[z_scan_idx >> 2] =
10543
0
                                    ((ps_search_results
10544
0
                                          ->aps_part_results[ref_id_of_nearest_poc][PART_ID_2Nx2N]
10545
0
                                          ->i4_sad *
10546
0
                                      wt) +
10547
0
                                     ((1 << log_wt) >> 1)) >>
10548
0
                                    log_wt;
10549
0
                                ps_ed_ctb_l1_curr->i4_sad_cost_me_for_ref[z_scan_idx >> 2] =
10550
0
                                    ps_ed_ctb_l1_curr->i4_sad_me_for_ref[z_scan_idx >> 2] +
10551
0
                                    (ps_search_results
10552
0
                                         ->aps_part_results[ref_id_of_nearest_poc][PART_ID_2Nx2N]
10553
0
                                         ->i4_tot_cost -
10554
0
                                     ps_search_results
10555
0
                                         ->aps_part_results[ref_id_of_nearest_poc][PART_ID_2Nx2N]
10556
0
                                         ->i4_sad);
10557
                                /*for complexity change detection*/
10558
0
                                ps_ctxt->i4_num_blks++;
10559
0
                                if(ps_ed_ctb_l1_curr->i4_sad_cost_me_for_ref[z_scan_idx >> 2] >
10560
0
                                   (8 /*blk width*/ * 8 /*blk height*/ * (1 + ps_ctxt->num_b_frms)))
10561
0
                                {
10562
0
                                    ps_ctxt->i4_num_blks_high_sad++;
10563
0
                                }
10564
0
                            }
10565
0
                        }
10566
0
                    }
10567
10568
                    /* EIID: Early inter intra decisions */
10569
                    /* tap L1 level SAD for inter intra decisions */
10570
0
                    if((e_me_quality_presets >= ME_MEDIUM_SPEED) &&
10571
0
                       (!ps_ctxt->s_frm_prms
10572
0
                             .is_i_pic))  //for high-quality preset->disable early decisions
10573
0
                    {
10574
0
                        if(1 == ps_refine_prms->i4_layer_id)
10575
0
                        {
10576
0
                            WORD32 i4_min_sad_cost_8x8_block = min_cost;
10577
0
                            ihevce_ed_blk_t *ps_curr_ed_blk_ctxt;
10578
0
                            WORD32 i4_diff_col_ctr = blk_x - (i4_ctb_blk_ctr * 4);
10579
0
                            WORD32 i4_diff_row_ctr = blk_y - (i4_ctb_row_ctr * 4);
10580
0
                            WORD32 z_scan_idx =
10581
0
                                gau1_raster_scan_to_ctb[i4_diff_row_ctr][i4_diff_col_ctr];
10582
0
                            ps_curr_ed_blk_ctxt = ps_ed_blk_ctxt_curr_ctb + z_scan_idx;
10583
10584
                            /*register the min cost for l1 me in blk context */
10585
0
                            ps_ed_ctb_l1_curr->i4_best_sad_cost_8x8_l1_me[z_scan_idx >> 2] =
10586
0
                                i4_min_sad_cost_8x8_block;
10587
0
                            i4_num_comparisions++;
10588
10589
                            /* take early inter-intra decision here */
10590
0
                            ps_curr_ed_blk_ctxt->intra_or_inter = 3; /*init saying eval both */
10591
0
#if DISABLE_INTRA_IN_BPICS
10592
0
                            if((e_me_quality_presets == ME_XTREME_SPEED_25) &&
10593
0
                               (ps_ctxt->s_frm_prms.i4_temporal_layer_id > TEMPORAL_LAYER_DISABLE))
10594
0
                            {
10595
0
                                ps_curr_ed_blk_ctxt->intra_or_inter =
10596
0
                                    2; /*eval only inter if inter cost is less */
10597
0
                                i4_num_inter_wins++;
10598
0
                            }
10599
0
                            else
10600
0
#endif
10601
0
                            {
10602
0
                                if(ps_ed_ctb_l1_curr->i4_best_sad_cost_8x8_l1_me[z_scan_idx >> 2] <
10603
0
                                   ((ps_ed_ctb_l1_curr->i4_best_sad_cost_8x8_l1_ipe[z_scan_idx >> 2] *
10604
0
                                     i4_threshold_multiplier) /
10605
0
                                    i4_threshold_divider))
10606
0
                                {
10607
0
                                    ps_curr_ed_blk_ctxt->intra_or_inter =
10608
0
                                        2; /*eval only inter if inter cost is less */
10609
0
                                    i4_num_inter_wins++;
10610
0
                                }
10611
0
                            }
10612
10613
                            //{
10614
                            //  DBG_PRINTF ("(blk x, blk y):(%d, %d)\t me:(ctb_x, ctb_y):(%d, %d)\t intra_SAD_COST: %d\tInter_SAD_COST: %d\n",
10615
                            //      blk_x,blk_y,
10616
                            //      i4_ctb_blk_ctr, i4_ctb_row_ctr,
10617
                            //      ps_curr_ed_blk_ctxt->i4_best_sad_8x8_l1_ipe,
10618
                            //      i4_min_sad_cost_8x8_block
10619
                            //      );
10620
                            //}
10621
10622
0
                        }  //end of layer-1
10623
0
                    }  //end of if (e_me_quality_presets >= ME_MEDIUM_SPEED)
10624
0
                    else
10625
0
                    {
10626
0
                        if(1 == ps_refine_prms->i4_layer_id)
10627
0
                        {
10628
0
                            WORD32 i4_min_sad_cost_8x8_block = min_cost;
10629
0
                            WORD32 i4_diff_col_ctr = blk_x - (i4_ctb_blk_ctr * 4);
10630
0
                            WORD32 i4_diff_row_ctr = blk_y - (i4_ctb_row_ctr * 4);
10631
0
                            WORD32 z_scan_idx =
10632
0
                                gau1_raster_scan_to_ctb[i4_diff_row_ctr][i4_diff_col_ctr];
10633
10634
                            /*register the min cost for l1 me in blk context */
10635
0
                            ps_ed_ctb_l1_curr->i4_best_sad_cost_8x8_l1_me[z_scan_idx >> 2] =
10636
0
                                i4_min_sad_cost_8x8_block;
10637
0
                        }
10638
0
                    }
10639
0
                    if(1 == ps_refine_prms->i4_layer_id)
10640
0
                    {
10641
0
                        WORD32 i4_diff_col_ctr = blk_x - (i4_ctb_blk_ctr * 4);
10642
0
                        WORD32 i4_diff_row_ctr = blk_y - (i4_ctb_row_ctr * 4);
10643
0
                        WORD32 z_scan_idx =
10644
0
                            gau1_raster_scan_to_ctb[i4_diff_row_ctr][i4_diff_col_ctr];
10645
10646
0
                        ps_ed_ctb_l1_curr->i4_best_sad_8x8_l1_me_for_decide[z_scan_idx >> 2] =
10647
0
                            min_sad;
10648
10649
0
                        if(min_cost <
10650
0
                           ps_ed_ctb_l1_curr->i4_best_sad_cost_8x8_l1_ipe[z_scan_idx >> 2])
10651
0
                        {
10652
0
                            ps_ctxt->i4_L1_hme_best_cost += min_cost;
10653
0
                            ps_ctxt->i4_L1_hme_sad += min_sad;
10654
0
                            ps_ed_ctb_l1_curr->i4_best_sad_8x8_l1_me[z_scan_idx >> 2] = min_sad;
10655
0
                        }
10656
0
                        else
10657
0
                        {
10658
0
                            ps_ctxt->i4_L1_hme_best_cost +=
10659
0
                                ps_ed_ctb_l1_curr->i4_best_sad_cost_8x8_l1_ipe[z_scan_idx >> 2];
10660
0
                            ps_ctxt->i4_L1_hme_sad +=
10661
0
                                ps_ed_ctb_l1_curr->i4_best_sad_8x8_l1_ipe[z_scan_idx >> 2];
10662
0
                            ps_ed_ctb_l1_curr->i4_best_sad_8x8_l1_me[z_scan_idx >> 2] =
10663
0
                                ps_ed_ctb_l1_curr->i4_best_sad_8x8_l1_ipe[z_scan_idx >> 2];
10664
0
                        }
10665
0
                    }
10666
0
                }
10667
0
            }
10668
10669
            /* Update the number of blocks processed in the current row */
10670
0
            if((ME_MEDIUM_SPEED > e_me_quality_presets))
10671
0
            {
10672
0
                ihevce_dmgr_set_row_row_sync(
10673
0
                    pv_hme_dep_mngr,
10674
0
                    (i4_ctb_x + 1),
10675
0
                    blk_y,
10676
0
                    0 /* Col Tile No. : Not supported in PreEnc*/);
10677
0
            }
10678
0
        }
10679
10680
        /* set the output dependency after completion of row */
10681
0
        ihevce_pre_enc_grp_job_set_out_dep(ps_multi_thrd_ctxt, ps_job, i4_ping_pong);
10682
0
    }
10683
0
}