Coverage Report

Created: 2025-08-26 06:31

/src/libhevc/encoder/hme_utils.c
Line
Count
Source (jump to first uncovered line)
1
/******************************************************************************
2
 *
3
 * Copyright (C) 2018 The Android Open Source Project
4
 *
5
 * Licensed under the Apache License, Version 2.0 (the "License");
6
 * you may not use this file except in compliance with the License.
7
 * You may obtain a copy of the License at:
8
 *
9
 * http://www.apache.org/licenses/LICENSE-2.0
10
 *
11
 * Unless required by applicable law or agreed to in writing, software
12
 * distributed under the License is distributed on an "AS IS" BASIS,
13
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
 * See the License for the specific language governing permissions and
15
 * limitations under the License.
16
 *
17
 *****************************************************************************
18
 * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
19
*/
20
21
/*****************************************************************************/
22
/* File Includes                                                             */
23
/*****************************************************************************/
24
/* System include files */
25
#include <stdio.h>
26
#include <string.h>
27
#include <stdlib.h>
28
#include <assert.h>
29
#include <stdarg.h>
30
#include <math.h>
31
#include <limits.h>
32
33
/* User include files */
34
#include "ihevc_typedefs.h"
35
#include "itt_video_api.h"
36
#include "ihevce_api.h"
37
38
#include "rc_cntrl_param.h"
39
#include "rc_frame_info_collector.h"
40
#include "rc_look_ahead_params.h"
41
42
#include "ihevc_defs.h"
43
#include "ihevc_structs.h"
44
#include "ihevc_platform_macros.h"
45
#include "ihevc_deblk.h"
46
#include "ihevc_itrans_recon.h"
47
#include "ihevc_chroma_itrans_recon.h"
48
#include "ihevc_chroma_intra_pred.h"
49
#include "ihevc_intra_pred.h"
50
#include "ihevc_inter_pred.h"
51
#include "ihevc_mem_fns.h"
52
#include "ihevc_padding.h"
53
#include "ihevc_weighted_pred.h"
54
#include "ihevc_sao.h"
55
#include "ihevc_resi_trans.h"
56
#include "ihevc_quant_iquant_ssd.h"
57
#include "ihevc_cabac_tables.h"
58
59
#include "ihevce_defs.h"
60
#include "ihevce_lap_enc_structs.h"
61
#include "ihevce_multi_thrd_structs.h"
62
#include "ihevce_multi_thrd_funcs.h"
63
#include "ihevce_me_common_defs.h"
64
#include "ihevce_had_satd.h"
65
#include "ihevce_error_codes.h"
66
#include "ihevce_bitstream.h"
67
#include "ihevce_cabac.h"
68
#include "ihevce_rdoq_macros.h"
69
#include "ihevce_function_selector.h"
70
#include "ihevce_enc_structs.h"
71
#include "ihevce_entropy_structs.h"
72
#include "ihevce_cmn_utils_instr_set_router.h"
73
#include "ihevce_enc_loop_structs.h"
74
#include "ihevce_inter_pred.h"
75
#include "ihevce_global_tables.h"
76
#include "ihevce_dep_mngr_interface.h"
77
#include "hme_datatype.h"
78
#include "hme_interface.h"
79
#include "hme_common_defs.h"
80
#include "hme_defs.h"
81
#include "ihevce_me_instr_set_router.h"
82
#include "hme_globals.h"
83
#include "hme_utils.h"
84
#include "hme_coarse.h"
85
#include "hme_fullpel.h"
86
#include "hme_subpel.h"
87
#include "hme_refine.h"
88
#include "hme_err_compute.h"
89
#include "hme_common_utils.h"
90
#include "hme_search_algo.h"
91
#include "ihevce_stasino_helpers.h"
92
#include "ihevce_common_utils.h"
93
94
/*****************************************************************************/
95
/* Macros                                                                    */
96
/*****************************************************************************/
97
#define UNI_SATD_SCALE 1
98
99
/*****************************************************************************/
100
/* Function Definitions                                                      */
101
/*****************************************************************************/
102
void ihevce_open_loop_pred_data(
103
    me_frm_ctxt_t *ps_ctxt,
104
    inter_pu_results_t *ps_pu_results,
105
    U08 *pu1_src,
106
    U08 *pu1_temp_pred,
107
    S32 stride,
108
    S32 src_strd,
109
    UWORD8 e_part_id)
110
0
{
111
0
    S32 best_sad_l0 = -1, best_sad_l1 = -1;
112
0
    S32 sad_diff, status;
113
0
    inter_pred_me_ctxt_t *ps_inter_pred_me_ctxt;
114
0
    U08 enable_bi = 0;
115
0
    pu_t s_pu;
116
117
0
    ps_inter_pred_me_ctxt = &ps_ctxt->s_mc_ctxt;
118
0
    ps_ctxt->i4_count++;
119
    /* L0*/
120
0
    if(ps_pu_results->u1_num_results_per_part_l0[e_part_id])
121
0
    {
122
0
        pu_result_t *ps_best_l0_pu;
123
0
        ps_best_l0_pu = ps_pu_results->aps_pu_results[0][PRT_2Nx2N];
124
0
        best_sad_l0 = ps_best_l0_pu->i4_tot_cost - ps_best_l0_pu->i4_mv_cost;
125
0
        s_pu.b2_pred_mode = PRED_L0;
126
0
        s_pu.b4_ht = ps_best_l0_pu->pu.b4_ht;
127
0
        s_pu.b4_wd = ps_best_l0_pu->pu.b4_wd;
128
0
        s_pu.b4_pos_x = ps_best_l0_pu->pu.b4_pos_x;
129
0
        s_pu.b4_pos_y = ps_best_l0_pu->pu.b4_pos_y;
130
0
        s_pu.b1_intra_flag = 0;
131
0
        s_pu.mv.s_l0_mv.i2_mvx = ps_best_l0_pu->pu.mv.s_l0_mv.i2_mvx;
132
0
        s_pu.mv.s_l0_mv.i2_mvy = ps_best_l0_pu->pu.mv.s_l0_mv.i2_mvy;
133
0
        s_pu.mv.i1_l0_ref_idx = ps_best_l0_pu->pu.mv.i1_l0_ref_idx;
134
0
    }
135
    /*L1*/
136
0
    if(ps_pu_results->u1_num_results_per_part_l1[e_part_id])
137
0
    {
138
0
        pu_result_t *ps_best_l1_pu;
139
0
        ps_best_l1_pu = ps_pu_results->aps_pu_results[1][PRT_2Nx2N];
140
0
        best_sad_l1 = ps_best_l1_pu->i4_tot_cost - ps_best_l1_pu->i4_mv_cost;
141
0
        s_pu.b2_pred_mode = PRED_L1;
142
0
        s_pu.b4_ht = ps_best_l1_pu->pu.b4_ht;
143
0
        s_pu.b4_wd = ps_best_l1_pu->pu.b4_wd;
144
0
        s_pu.b4_pos_x = ps_best_l1_pu->pu.b4_pos_x;
145
0
        s_pu.b4_pos_y = ps_best_l1_pu->pu.b4_pos_y;
146
0
        s_pu.b1_intra_flag = 0;
147
0
        s_pu.mv.s_l1_mv.i2_mvx = ps_best_l1_pu->pu.mv.s_l1_mv.i2_mvx;
148
0
        s_pu.mv.s_l1_mv.i2_mvy = ps_best_l1_pu->pu.mv.s_l1_mv.i2_mvy;
149
0
        s_pu.mv.i1_l1_ref_idx = ps_best_l1_pu->pu.mv.i1_l1_ref_idx;
150
0
    }
151
0
    ASSERT((best_sad_l0 != -1) || (best_sad_l1 != -1));
152
    /*bi selection*/
153
0
    if((best_sad_l0 != -1) && (best_sad_l1 != -1))
154
0
    {
155
0
        sad_diff = abs(best_sad_l0 - best_sad_l1);
156
0
        if((sad_diff < (best_sad_l0 * 0.15)) && (sad_diff < (best_sad_l1 * 0.15)))
157
0
        {
158
0
            enable_bi = 1;
159
0
            s_pu.b2_pred_mode = PRED_BI;
160
0
        }
161
0
        if(!enable_bi)
162
0
        {
163
0
            if(best_sad_l0 < best_sad_l1)
164
0
            {
165
0
                s_pu.b2_pred_mode = PRED_L0;
166
0
            }
167
0
            else
168
0
            {
169
0
                s_pu.b2_pred_mode = PRED_L1;
170
0
            }
171
0
        }
172
0
    }
173
0
    status = ihevce_luma_inter_pred_pu(ps_inter_pred_me_ctxt, &s_pu, pu1_temp_pred, stride, 1);
174
0
    if(status == -1)
175
0
    {
176
0
        ASSERT(0);
177
0
    }
178
0
}
179
180
/**
181
********************************************************************************
182
*  @fn     void *hme_get_wkg_mem(buf_mgr_t *ps_buf_mgr, S32 i4_size)
183
*
184
*  @brief  Allocates a block of size = i4_size from working memory and returns
185
*
186
*  @param[in,out] ps_buf_mgr: Buffer manager for wkg memory
187
*
188
*  @param[in]  i4_size : size required
189
*
190
*  @return void pointer to allocated memory, NULL if failure
191
********************************************************************************
192
*/
193
void *hme_get_wkg_mem(buf_mgr_t *ps_buf_mgr, S32 i4_size)
194
4.81M
{
195
4.81M
    U08 *pu1_mem;
196
197
4.81M
    if(ps_buf_mgr->i4_used + i4_size > ps_buf_mgr->i4_total)
198
0
        return NULL;
199
200
4.81M
    pu1_mem = ps_buf_mgr->pu1_wkg_mem + ps_buf_mgr->i4_used;
201
4.81M
    ps_buf_mgr->i4_used += i4_size;
202
203
4.81M
    return ((void *)pu1_mem);
204
4.81M
}
205
206
/**
207
********************************************************************************
208
*  @fn     hme_init_histogram(
209
*
210
*  @brief  Top level entry point for Coarse ME. Runs across blocks and does the
211
*          needful by calling other low level routines.
212
*
213
*  @param[in,out]  ps_hist : the histogram structure
214
*
215
*  @param[in]  i4_max_mv_x : Maximum mv allowed in x direction (fpel units)
216
*
217
*  @param[in]  i4_max_mv_y : Maximum mv allowed in y direction (fpel units)
218
*
219
*  @return None
220
********************************************************************************
221
*/
222
223
void hme_init_histogram(mv_hist_t *ps_hist, S32 i4_max_mv_x, S32 i4_max_mv_y)
224
150k
{
225
150k
    S32 i4_num_bins, i4_num_cols, i4_num_rows;
226
150k
    S32 i4_shift_x, i4_shift_y, i, i4_range, i4_val;
227
228
    /*************************************************************************/
229
    /* Evaluate the shift_x and shift_y. For this, we use the following logic*/
230
    /* Assuming that we use up all MAX_NUM_BINS. Then the number of bins is  */
231
    /* given by formula ((max_mv_x * 2) >> shift_x)*((max_mv_y * 2)>>shift_y)*/
232
    /* or shift_x + shift_y is log ((max_mv_x * max_mv_y * 4) / MAX_NUM_BINS)*/
233
    /* if above quantity is negative, then we make it zero.                  */
234
    /* If result is odd, then shift_y is result >> 1, shift_x is shift_y + 1 */
235
    /*************************************************************************/
236
150k
    i4_val = i4_max_mv_x * i4_max_mv_y * 4;
237
150k
    i4_range = (hme_get_range(i4_val - 1)) + 1;
238
150k
    if(i4_range > LOG_MAX_NUM_BINS)
239
125k
    {
240
125k
        i4_shift_y = (i4_range - LOG_MAX_NUM_BINS);
241
125k
        i4_shift_x = (i4_shift_y + 1) >> 1;
242
125k
        i4_shift_y >>= 1;
243
125k
    }
244
25.6k
    else
245
25.6k
    {
246
25.6k
        i4_shift_y = 0;
247
25.6k
        i4_shift_x = 0;
248
25.6k
    }
249
250
    /* we assume the mv range is -max_mv_x to +max_mv_x, ditto for y */
251
    /* So number of columns is 2*max_mv_x >> i4_shift_x. Ditto for rows */
252
    /* this helps us compute num bins that are active for this histo session */
253
150k
    i4_num_cols = (i4_max_mv_x << 1) >> i4_shift_x;
254
150k
    i4_num_rows = (i4_max_mv_y << 1) >> i4_shift_y;
255
150k
    i4_num_bins = i4_num_rows * i4_num_cols;
256
257
150k
    ASSERT(i4_num_bins <= MAX_NUM_BINS);
258
259
150k
    ps_hist->i4_num_rows = i4_num_rows;
260
150k
    ps_hist->i4_num_cols = i4_num_cols;
261
150k
    ps_hist->i4_min_x = -i4_max_mv_x;
262
150k
    ps_hist->i4_min_y = -i4_max_mv_y;
263
150k
    ps_hist->i4_shift_x = i4_shift_x;
264
150k
    ps_hist->i4_shift_y = i4_shift_y;
265
150k
    ps_hist->i4_lobe1_size = 5;
266
150k
    ps_hist->i4_lobe2_size = 3;
267
268
150k
    ps_hist->i4_num_bins = i4_num_bins;
269
270
136M
    for(i = 0; i < i4_num_bins; i++)
271
136M
    {
272
136M
        ps_hist->ai4_bin_count[i] = 0;
273
136M
    }
274
150k
}
275
276
/**
277
********************************************************************************
278
*  @fn     hme_update_histogram(
279
*
280
*  @brief  Updates the histogram given an mv entry
281
*
282
*  @param[in,out]  ps_hist : the histogram structure
283
*
284
*  @param[in]  i4_mv_x : x component of the mv (fpel units)
285
*
286
*  @param[in]  i4_mv_y : y component of the mv (fpel units)
287
*
288
*  @return None
289
********************************************************************************
290
*/
291
void hme_update_histogram(mv_hist_t *ps_hist, S32 i4_mv_x, S32 i4_mv_y)
292
5.33M
{
293
5.33M
    S32 i4_bin_index, i4_col, i4_row;
294
295
5.33M
    i4_col = (i4_mv_x - ps_hist->i4_min_x) >> ps_hist->i4_shift_x;
296
5.33M
    i4_row = (i4_mv_y - ps_hist->i4_min_y) >> ps_hist->i4_shift_y;
297
298
5.33M
    i4_bin_index = i4_col + (i4_row * ps_hist->i4_num_cols);
299
    /* Sanity Check */
300
5.33M
    ASSERT(i4_bin_index < MAX_NUM_BINS);
301
302
5.33M
    ps_hist->ai4_bin_count[i4_bin_index]++;
303
5.33M
}
304
305
/**
306
********************************************************************************
307
*  @fn     hme_get_global_mv(
308
*
309
*  @brief  returns the global mv of a previous picture. Accounts for the fact
310
*          that the delta poc of the previous picture may have been different
311
*          from delta poc of current picture. Delta poc is POC difference
312
*          between a picture and its reference.
313
*
314
*  @param[out]  ps_mv: mv_t structure where the motion vector is returned
315
*
316
*  @param[in]  i4_delta_poc: the delta poc for the current pic w.r.t. reference
317
*
318
*  @return None
319
********************************************************************************
320
*/
321
void hme_get_global_mv(layer_ctxt_t *ps_prev_layer, hme_mv_t *ps_mv, S32 i4_delta_poc)
322
150k
{
323
150k
    S16 i2_mv_x, i2_mv_y;
324
150k
    S32 i4_delta_poc_prev;
325
150k
    S32 i4_poc_prev = ps_prev_layer->i4_poc;
326
150k
    S32 i4_poc_prev_ref = ps_prev_layer->ai4_ref_id_to_poc_lc[0];
327
328
150k
    i4_delta_poc_prev = i4_poc_prev - i4_poc_prev_ref;
329
150k
    i2_mv_x = ps_prev_layer->s_global_mv[0][GMV_THICK_LOBE].i2_mv_x;
330
150k
    i2_mv_y = ps_prev_layer->s_global_mv[0][GMV_THICK_LOBE].i2_mv_y;
331
332
150k
    i2_mv_x = (S16)((i2_mv_x * i4_delta_poc) / i4_delta_poc_prev);
333
150k
    i2_mv_y = (S16)((i2_mv_y * i4_delta_poc) / i4_delta_poc_prev);
334
335
150k
    ps_mv->i2_mv_x = i2_mv_x;
336
150k
    ps_mv->i2_mv_y = i2_mv_y;
337
150k
}
338
339
/**
340
********************************************************************************
341
*  @fn     hme_calculate_global_mv(
342
*
343
*  @brief  Calculates global mv for a given histogram
344
*
345
*  @param[in]  ps_hist : the histogram structure
346
*
347
*  @param[in]  ps_mv : used to return the global mv
348
*
349
*  @param[in]  e_lobe_type : refer to GMV_MVTYPE_T
350
*
351
*  @return None
352
********************************************************************************
353
*/
354
void hme_calculate_global_mv(mv_hist_t *ps_hist, hme_mv_t *ps_mv, GMV_MVTYPE_T e_lobe_type)
355
141k
{
356
141k
    S32 i4_offset, i4_lobe_size, i4_y, i4_x, *pi4_bin_count;
357
141k
    S32 i4_max_sum = -1;
358
141k
    S32 i4_max_x = 0, i4_max_y = 0;
359
360
141k
    if(e_lobe_type == GMV_THICK_LOBE)
361
141k
        i4_lobe_size = ps_hist->i4_lobe1_size;
362
0
    else
363
0
        i4_lobe_size = ps_hist->i4_lobe2_size;
364
365
141k
    i4_offset = i4_lobe_size >> 1;
366
2.99M
    for(i4_y = i4_offset; i4_y < ps_hist->i4_num_rows - i4_offset; i4_y++)
367
2.85M
    {
368
91.1M
        for(i4_x = i4_offset; i4_x < ps_hist->i4_num_cols - i4_offset; i4_x++)
369
88.2M
        {
370
88.2M
            S32 i4_bin_id, i4_sum;
371
88.2M
            i4_bin_id = (i4_x - 2) + ((i4_y - 2) * ps_hist->i4_num_cols);
372
373
88.2M
            pi4_bin_count = &ps_hist->ai4_bin_count[i4_bin_id];
374
88.2M
            i4_sum = hme_compute_2d_sum_unsigned(
375
88.2M
                (void *)pi4_bin_count,
376
88.2M
                i4_lobe_size,
377
88.2M
                i4_lobe_size,
378
88.2M
                ps_hist->i4_num_cols,
379
88.2M
                sizeof(U32));
380
381
88.2M
            if(i4_sum > i4_max_sum)
382
373k
            {
383
373k
                i4_max_x = i4_x;
384
373k
                i4_max_y = i4_y;
385
373k
                i4_max_sum = i4_sum;
386
373k
            }
387
88.2M
        }
388
2.85M
    }
389
390
141k
    ps_mv->i2_mv_y = (S16)((i4_max_y << ps_hist->i4_shift_y) + ps_hist->i4_min_y);
391
141k
    ps_mv->i2_mv_x = (S16)((i4_max_x << ps_hist->i4_shift_x) + ps_hist->i4_min_x);
392
141k
}
393
394
/**
395
********************************************************************************
396
*  @fn    ctb_node_t *hme_get_ctb_node(ctb_mem_mgr_t *ps_mem_mgr)
397
*
398
*  @brief  returns a new ctb node usable for creating a new ctb candidate
399
*
400
*  @param[in] ps_mem_mgr : memory manager holding all ctb nodes
401
*
402
*  @return NULL if no free nodes, else ptr to the new ctb node
403
********************************************************************************
404
*/
405
ctb_node_t *hme_get_ctb_node(ctb_mem_mgr_t *ps_mem_mgr)
406
0
{
407
0
    U08 *pu1_ret;
408
0
    if((ps_mem_mgr->i4_used + ps_mem_mgr->i4_size) > ps_mem_mgr->i4_tot)
409
0
        return (NULL);
410
0
    pu1_ret = ps_mem_mgr->pu1_mem + ps_mem_mgr->i4_used;
411
0
    ps_mem_mgr->i4_used += ps_mem_mgr->i4_size;
412
0
    return ((ctb_node_t *)pu1_ret);
413
0
}
414
415
/**
416
********************************************************************************
417
*  @fn     hme_map_mvs_to_grid(mv_grid_t **pps_mv_grid,
418
search_results_t *ps_search_results, S32 i4_num_ref)
419
*
420
*  @brief  For a given CU whose results are in ps_search_results, the 17x17
421
*          mv grid is updated for future use within the CTB
422
*
423
*  @param[in] ps_search_results : Search results data structure
424
*
425
*  @param[out] pps_mv_grid: The mv grid (as many as num ref)
426
*
427
*  @param[in]  i4_num_ref: nuber of search iterations to update
428
*
429
*  @return None
430
********************************************************************************
431
*/
432
void hme_map_mvs_to_grid(
433
    mv_grid_t **pps_mv_grid,
434
    search_results_t *ps_search_results,
435
    U08 *pu1_pred_dir_searched,
436
    S32 i4_num_pred_dir)
437
1.78M
{
438
1.78M
    S32 i4_cu_start_offset;
439
    /*************************************************************************/
440
    /* Start x, y offset of CU relative to CTB. To update the mv grid which  */
441
    /* stores 1 mv per 4x4, we convert pixel offset to 4x4 blk offset        */
442
    /*************************************************************************/
443
1.78M
    S32 i4_cu_offset_x = (S32)ps_search_results->u1_x_off >> 2;
444
1.78M
    S32 i4_cu_offset_y = (S32)ps_search_results->u1_y_off >> 2;
445
446
    /* Controls the attribute of a given partition within CU   */
447
    /* , i.e. start locn, size                                 */
448
1.78M
    part_attr_t *ps_part_attr;
449
450
1.78M
    S32 i4_part, i4_part_id, num_parts, i4_stride;
451
1.78M
    S16 i2_mv_x, i2_mv_y;
452
1.78M
    S08 i1_ref_idx;
453
454
    /* Per partition, attributes w.r.t. CU start */
455
1.78M
    S32 x_start, y_start, x_end, y_end, i4_x, i4_y;
456
1.78M
    PART_TYPE_T e_part_type;
457
458
    /* Points to exact mv structures within the grid to be udpated */
459
1.78M
    search_node_t *ps_grid_node, *ps_grid_node_tmp;
460
461
    /* points to exact mv grid (based on search iteration) to be updated */
462
1.78M
    mv_grid_t *ps_mv_grid;
463
464
1.78M
    search_node_t *ps_search_node;
465
466
1.78M
    S32 shift, i, mv_shift = 2;
467
    /* Proportional to the size of CU, controls the number of 4x4 blks */
468
    /* to be updated                                                   */
469
1.78M
    shift = ps_search_results->e_cu_size;
470
1.78M
    ASSERT(i4_num_pred_dir <= 2);
471
472
1.78M
    e_part_type = (PART_TYPE_T)ps_search_results->ps_cu_results->ps_best_results[0].u1_part_type;
473
474
1.78M
    if((ps_search_results->e_cu_size == CU_16x16) && (ps_search_results->u1_split_flag) &&
475
1.78M
       (ps_search_results->i4_part_mask & ENABLE_NxN))
476
84.4k
    {
477
84.4k
        e_part_type = PRT_NxN;
478
84.4k
    }
479
480
3.91M
    for(i = 0; i < i4_num_pred_dir; i++)
481
2.13M
    {
482
2.13M
        num_parts = gau1_num_parts_in_part_type[e_part_type];
483
2.13M
        ps_mv_grid = pps_mv_grid[pu1_pred_dir_searched[i]];
484
2.13M
        i4_stride = ps_mv_grid->i4_stride;
485
486
2.13M
        i4_cu_start_offset =
487
2.13M
            i4_cu_offset_x + i4_cu_offset_y * i4_stride + ps_mv_grid->i4_start_offset;
488
489
        /* Move to the appropriate 2d locn of CU start within Grid */
490
2.13M
        ps_grid_node = &ps_mv_grid->as_node[i4_cu_start_offset];
491
492
4.59M
        for(i4_part = 0; i4_part < num_parts; i4_part++)
493
2.45M
        {
494
2.45M
            i4_part_id = ge_part_type_to_part_id[e_part_type][i4_part];
495
496
            /* Pick the mvx and y and ref id corresponding to this partition */
497
2.45M
            ps_search_node =
498
2.45M
                ps_search_results->aps_part_results[pu1_pred_dir_searched[i]][i4_part_id];
499
500
2.45M
            i2_mv_x = ps_search_node->s_mv.i2_mvx;
501
2.45M
            i2_mv_y = ps_search_node->s_mv.i2_mvy;
502
2.45M
            i1_ref_idx = ps_search_node->i1_ref_idx;
503
504
            /* Move to the appropriate location within the CU */
505
2.45M
            ps_part_attr = &gas_part_attr_in_cu[i4_part_id];
506
2.45M
            x_start = ps_part_attr->u1_x_start;
507
2.45M
            x_end = x_start + ps_part_attr->u1_x_count;
508
2.45M
            y_start = ps_part_attr->u1_y_start;
509
2.45M
            y_end = y_start + ps_part_attr->u1_y_count;
510
511
            /* Convert attributes from 8x8 CU size to given CU size */
512
2.45M
            x_start = (x_start << shift) >> mv_shift;
513
2.45M
            x_end = (x_end << shift) >> mv_shift;
514
2.45M
            y_start = (y_start << shift) >> mv_shift;
515
2.45M
            y_end = (y_end << shift) >> mv_shift;
516
517
2.45M
            ps_grid_node_tmp = ps_grid_node + y_start * i4_stride;
518
519
            /* Update all 4x4 blk mvs with the part mv */
520
            /* For e.g. we update 4 units in case of NxN for 16x16 CU */
521
12.4M
            for(i4_y = y_start; i4_y < y_end; i4_y++)
522
9.95M
            {
523
55.4M
                for(i4_x = x_start; i4_x < x_end; i4_x++)
524
45.5M
                {
525
45.5M
                    ps_grid_node_tmp[i4_x].s_mv.i2_mvx = i2_mv_x;
526
45.5M
                    ps_grid_node_tmp[i4_x].s_mv.i2_mvy = i2_mv_y;
527
45.5M
                    ps_grid_node_tmp[i4_x].i1_ref_idx = i1_ref_idx;
528
45.5M
                    ps_grid_node_tmp[i4_x].u1_subpel_done = 1;
529
45.5M
                }
530
9.95M
                ps_grid_node_tmp += i4_stride;
531
9.95M
            }
532
2.45M
        }
533
2.13M
    }
534
1.78M
}
535
536
void hme_set_ctb_pred_attr(ctb_node_t *ps_parent, U08 *pu1_pred0, U08 *pu1_pred1, S32 i4_stride)
537
0
{
538
0
    ps_parent->apu1_pred[0] = pu1_pred0;
539
0
    ps_parent->apu1_pred[1] = pu1_pred1;
540
0
    ps_parent->i4_pred_stride = i4_stride;
541
0
    if(ps_parent->ps_tl != NULL)
542
0
    {
543
0
        S32 blk_wd = (S32)ps_parent->ps_tr->u1_x_off;
544
0
        blk_wd -= (S32)ps_parent->u1_x_off;
545
546
0
        hme_set_ctb_pred_attr(ps_parent->ps_tl, pu1_pred0, pu1_pred1, i4_stride >> 1);
547
548
0
        hme_set_ctb_pred_attr(
549
0
            ps_parent->ps_tr, pu1_pred0 + blk_wd, pu1_pred1 + blk_wd, i4_stride >> 1);
550
551
0
        hme_set_ctb_pred_attr(
552
0
            ps_parent->ps_bl,
553
0
            pu1_pred0 + (blk_wd * i4_stride),
554
0
            pu1_pred1 + (blk_wd * i4_stride),
555
0
            i4_stride >> 1);
556
557
0
        hme_set_ctb_pred_attr(
558
0
            ps_parent->ps_tr,
559
0
            pu1_pred0 + (blk_wd * (1 + i4_stride)),
560
0
            pu1_pred1 + (blk_wd * (1 + i4_stride)),
561
0
            i4_stride >> 1);
562
0
    }
563
0
}
564
565
/**
566
********************************************************************************
567
*  @fn     hme_create_valid_part_ids(S32 i4_part_mask, S32 *pi4_valid_part_ids)
568
*
569
*  @brief  Expands the part mask to a list of valid part ids terminated by -1
570
*
571
*  @param[in] i4_part_mask : bit mask of active partitino ids
572
*
573
*  @param[out] pi4_valid_part_ids : array, each entry has one valid part id
574
*               Terminated by -1 to signal end.
575
*
576
*  @return number of partitions
577
********************************************************************************
578
*/
579
S32 hme_create_valid_part_ids(S32 i4_part_mask, S32 *pi4_valid_part_ids)
580
14.3M
{
581
14.3M
    S32 id = 0, i;
582
258M
    for(i = 0; i < TOT_NUM_PARTS; i++)
583
244M
    {
584
244M
        if(i4_part_mask & (1 << i))
585
49.3M
        {
586
49.3M
            pi4_valid_part_ids[id] = i;
587
49.3M
            id++;
588
49.3M
        }
589
244M
    }
590
14.3M
    pi4_valid_part_ids[id] = -1;
591
592
14.3M
    return id;
593
14.3M
}
594
595
ctb_boundary_attrs_t *
596
    get_ctb_attrs(S32 ctb_start_x, S32 ctb_start_y, S32 pic_wd, S32 pic_ht, me_frm_ctxt_t *ps_ctxt)
597
104k
{
598
104k
    S32 horz_crop, vert_crop;
599
104k
    ctb_boundary_attrs_t *ps_attrs;
600
601
104k
    horz_crop = ((ctb_start_x + 64) > pic_wd) ? 2 : 0;
602
104k
    vert_crop = ((ctb_start_y + 64) > pic_ht) ? 1 : 0;
603
104k
    switch(horz_crop + vert_crop)
604
104k
    {
605
94.4k
    case 0:
606
94.4k
        ps_attrs = &ps_ctxt->as_ctb_bound_attrs[CTB_CENTRE];
607
94.4k
        break;
608
4.60k
    case 1:
609
4.60k
        ps_attrs = &ps_ctxt->as_ctb_bound_attrs[CTB_BOT_PIC_BOUNDARY];
610
4.60k
        break;
611
4.89k
    case 2:
612
4.89k
        ps_attrs = &ps_ctxt->as_ctb_bound_attrs[CTB_RT_PIC_BOUNDARY];
613
4.89k
        break;
614
437
    case 3:
615
437
        ps_attrs = &ps_ctxt->as_ctb_bound_attrs[CTB_BOT_RT_PIC_BOUNDARY];
616
437
        break;
617
104k
    }
618
104k
    return (ps_attrs);
619
104k
}
620
621
/**
622
********************************************************************************
623
*  @fn     hevc_avg_2d(U08 *pu1_src1,
624
*                   U08 *pu1_src2,
625
*                   S32 i4_src1_stride,
626
*                   S32 i4_src2_stride,
627
*                   S32 i4_blk_wd,
628
*                   S32 i4_blk_ht,
629
*                   U08 *pu1_dst,
630
*                   S32 i4_dst_stride)
631
*
632
*
633
*  @brief  point wise average of two buffers into a third buffer
634
*
635
*  @param[in] pu1_src1 : first source buffer
636
*
637
*  @param[in] pu1_src2 : 2nd source buffer
638
*
639
*  @param[in] i4_src1_stride : stride of source 1 buffer
640
*
641
*  @param[in] i4_src2_stride : stride of source 2 buffer
642
*
643
*  @param[in] i4_blk_wd : block width
644
*
645
*  @param[in] i4_blk_ht : block height
646
*
647
*  @param[out] pu1_dst : destination buffer
648
*
649
*  @param[in] i4_dst_stride : stride of the destination buffer
650
*
651
*  @return void
652
********************************************************************************
653
*/
654
void hevc_avg_2d(
655
    U08 *pu1_src1,
656
    U08 *pu1_src2,
657
    S32 i4_src1_stride,
658
    S32 i4_src2_stride,
659
    S32 i4_blk_wd,
660
    S32 i4_blk_ht,
661
    U08 *pu1_dst,
662
    S32 i4_dst_stride)
663
12.9M
{
664
12.9M
    S32 i, j;
665
666
222M
    for(i = 0; i < i4_blk_ht; i++)
667
209M
    {
668
3.67G
        for(j = 0; j < i4_blk_wd; j++)
669
3.46G
        {
670
3.46G
            pu1_dst[j] = (pu1_src1[j] + pu1_src2[j] + 1) >> 1;
671
3.46G
        }
672
209M
        pu1_src1 += i4_src1_stride;
673
209M
        pu1_src2 += i4_src2_stride;
674
209M
        pu1_dst += i4_dst_stride;
675
209M
    }
676
12.9M
}
677
/**
678
********************************************************************************
679
*  @fn     hme_pick_back_search_node(search_results_t *ps_search_results,
680
*                                   search_node_t *ps_search_node_fwd,
681
*                                   S32 i4_part_idx,
682
*                                   layer_ctxt_t *ps_curr_layer)
683
*
684
*
685
*  @brief  returns the search node corresponding to a ref idx in same or
686
*          opp direction. Preference is given to opp direction, but if that
687
*          does not yield results, same direction is attempted.
688
*
689
*  @param[in] ps_search_results: search results overall
690
*
691
*  @param[in] ps_search_node_fwd: search node corresponding to "fwd" direction
692
*
693
*  @param[in] i4_part_idx : partition id
694
*
695
*  @param[in] ps_curr_layer : layer context for current layer.
696
*
697
*  @return search node corresponding to hte "other direction"
698
********************************************************************************
699
*/
700
//#define PICK_L1_REF_SAME_DIR
701
search_node_t *hme_pick_back_search_node(
702
    search_results_t *ps_search_results,
703
    search_node_t *ps_search_node_fwd,
704
    S32 i4_part_idx,
705
    layer_ctxt_t *ps_curr_layer)
706
0
{
707
0
    S32 is_past_l0, is_past_l1, id, i, i4_poc;
708
0
    S32 *pi4_ref_id_to_poc_lc = ps_curr_layer->ai4_ref_id_to_poc_lc;
709
    //ref_attr_t *ps_ref_attr_lc;
710
0
    S08 i1_ref_idx_fwd;
711
0
    S16 i2_mv_x, i2_mv_y;
712
0
    search_node_t *ps_search_node;
713
714
0
    i1_ref_idx_fwd = ps_search_node_fwd->i1_ref_idx;
715
0
    i2_mv_x = ps_search_node_fwd->s_mv.i2_mvx;
716
0
    i2_mv_y = ps_search_node_fwd->s_mv.i2_mvy;
717
0
    i4_poc = ps_curr_layer->i4_poc;
718
719
    //ps_ref_attr_lc = &ps_curr_layer->as_ref_attr_lc[0];
720
    /* If the ref id already picked up maps to a past pic, then we pick */
721
    /* a result corresponding to future pic. If such a result is not    */
722
    /* to be found, then we pick a result corresponding to a past pic   */
723
    //is_past = ps_ref_attr_lc[i1_ref_idx_fwd].u1_is_past;
724
0
    is_past_l0 = (i4_poc > pi4_ref_id_to_poc_lc[i1_ref_idx_fwd]) ? 1 : 0;
725
726
0
    ASSERT(ps_search_results->u1_num_active_ref <= 2);
727
728
    /* pick the right iteration of search nodes to pick up */
729
#ifdef PICK_L1_REF_SAME_DIR
730
    if(ps_search_results->u1_num_active_ref == 2)
731
        id = !is_past_l0;
732
#else
733
0
    if(ps_search_results->u1_num_active_ref == 2)
734
0
        id = is_past_l0;
735
0
#endif
736
0
    else
737
0
        id = 0;
738
739
0
    ps_search_node = ps_search_results->aps_part_results[id][i4_part_idx];
740
741
0
    for(i = 0; i < ps_search_results->u1_num_results_per_part; i++)
742
0
    {
743
0
        S08 i1_ref_test = ps_search_node[i].i1_ref_idx;
744
0
        is_past_l1 = (pi4_ref_id_to_poc_lc[i1_ref_test] < i4_poc) ? 1 : 0;
745
        //if (ps_ref_attr_lc[ps_search_node[i].i1_ref_idx].u1_is_past != is_past)
746
#ifdef PICK_L1_REF_SAME_DIR
747
        if(is_past_l1 == is_past_l0)
748
#else
749
0
        if(is_past_l1 != is_past_l0)
750
0
#endif
751
0
        {
752
            /* belongs to same direction as the ref idx passed, so continue */
753
0
            return (ps_search_node + i);
754
0
        }
755
0
    }
756
757
    /* Unable to find best result in opp direction, so try same direction */
758
    /* However we need to ensure that we do not pick up same result       */
759
0
    for(i = 0; i < ps_search_results->u1_num_results_per_part; i++)
760
0
    {
761
0
        if((ps_search_node->i1_ref_idx != i1_ref_idx_fwd) ||
762
0
           (ps_search_node->s_mv.i2_mvx != i2_mv_x) || (ps_search_node->s_mv.i2_mvy != i2_mv_y))
763
0
        {
764
0
            return (ps_search_node);
765
0
        }
766
0
        ps_search_node++;
767
0
    }
768
769
    //ASSERT(0);
770
0
    return (ps_search_results->aps_part_results[id][i4_part_idx]);
771
772
    //return (NULL);
773
0
}
774
775
/**
776
********************************************************************************
777
*  @fn     hme_study_input_segmentation(U08 *pu1_inp, S32 i4_inp_stride)
778
*
779
*
780
*  @brief  Examines input 16x16 for possible edges and orientations of those,
781
*          and returns a bit mask of partitions that should be searched for
782
*
783
*  @param[in] pu1_inp : input buffer
784
*
785
*  @param[in] i4_inp_stride: input stride
786
*
787
*  @return part mask (bit mask of active partitions to search)
788
********************************************************************************
789
*/
790
791
S32 hme_study_input_segmentation(U08 *pu1_inp, S32 i4_inp_stride, S32 limit_active_partitions)
792
1.49M
{
793
1.49M
    S32 i4_rsum[16], i4_csum[16];
794
1.49M
    U08 *pu1_tmp, u1_tmp;
795
1.49M
    S32 i4_max_ridx, i4_max_cidx, i4_tmp;
796
1.49M
    S32 i, j, i4_ret;
797
1.49M
    S32 i4_max_rp[4], i4_max_cp[4];
798
1.49M
    S32 i4_seg_lutc[4] = { 0, ENABLE_nLx2N, ENABLE_Nx2N, ENABLE_nRx2N };
799
1.49M
    S32 i4_seg_lutr[4] = { 0, ENABLE_2NxnU, ENABLE_2NxN, ENABLE_2NxnD };
800
1.89M
#define EDGE_THR (15 * 16)
801
17.0M
#define HI_PASS(ptr, i) (2 * (ptr[i] - ptr[i - 1]) + (ptr[i + 1] - ptr[i - 2]))
802
803
1.49M
    if(0 == limit_active_partitions)
804
544k
    {
805
        /*********************************************************************/
806
        /* In this case, we do not optimize on active partitions and search  */
807
        /* brute force. This way, 17 partitinos would be enabled.            */
808
        /*********************************************************************/
809
544k
        return (ENABLE_ALL_PARTS);
810
544k
    }
811
812
    /*************************************************************************/
813
    /* Control passes below in case we wish to optimize on active partitions.*/
814
    /* This is based on input characteristics, check how an edge passes along*/
815
    /* an input 16x16 area, if at all, and decide active partitinos.         */
816
    /*************************************************************************/
817
818
    /* Initialize row and col sums */
819
16.0M
    for(i = 0; i < 16; i++)
820
15.1M
    {
821
15.1M
        i4_rsum[i] = 0;
822
15.1M
        i4_csum[i] = 0;
823
15.1M
    }
824
947k
    pu1_tmp = pu1_inp;
825
16.0M
    for(i = 0; i < 16; i++)
826
15.1M
    {
827
257M
        for(j = 0; j < 16; j++)
828
242M
        {
829
242M
            u1_tmp = *pu1_tmp++;
830
242M
            i4_rsum[i] += u1_tmp;
831
242M
            i4_csum[j] += u1_tmp;
832
242M
        }
833
15.1M
        pu1_tmp += (i4_inp_stride - 16);
834
15.1M
    }
835
836
    /* 0 is dummy; 1 is 4; 2 is 8; 3 is 12 */
837
947k
    i4_max_rp[0] = 0;
838
947k
    i4_max_cp[0] = 0;
839
947k
    i4_max_rp[1] = 0;
840
947k
    i4_max_cp[1] = 0;
841
947k
    i4_max_rp[2] = 0;
842
947k
    i4_max_cp[2] = 0;
843
947k
    i4_max_rp[3] = 0;
844
947k
    i4_max_cp[3] = 0;
845
846
    /* Get Max edge strength across (2,3) (3,4) (4,5) */
847
3.78M
    for(i = 3; i < 6; i++)
848
2.84M
    {
849
        /* Run [-1 -2 2 1] filter through rsum/csum */
850
2.84M
        i4_tmp = HI_PASS(i4_rsum, i);
851
2.84M
        if(ABS(i4_tmp) > i4_max_rp[1])
852
226k
            i4_max_rp[1] = i4_tmp;
853
854
2.84M
        i4_tmp = HI_PASS(i4_csum, i);
855
2.84M
        if(ABS(i4_tmp) > i4_max_cp[1])
856
245k
            i4_max_cp[1] = i4_tmp;
857
2.84M
    }
858
859
    /* Get Max edge strength across (6,7) (7,8) (8,9) */
860
3.78M
    for(i = 7; i < 10; i++)
861
2.84M
    {
862
        /* Run [-1 -2 2 1] filter through rsum/csum */
863
2.84M
        i4_tmp = HI_PASS(i4_rsum, i);
864
2.84M
        if(ABS(i4_tmp) > i4_max_rp[2])
865
127k
            i4_max_rp[2] = i4_tmp;
866
867
2.84M
        i4_tmp = HI_PASS(i4_csum, i);
868
2.84M
        if(ABS(i4_tmp) > i4_max_cp[2])
869
157k
            i4_max_cp[2] = i4_tmp;
870
2.84M
    }
871
872
    /* Get Max edge strength across (10,11) (11,12) (12,13) */
873
3.78M
    for(i = 11; i < 14; i++)
874
2.84M
    {
875
        /* Run [-1 -2 2 1] filter through rsum/csum */
876
2.84M
        i4_tmp = HI_PASS(i4_rsum, i);
877
2.84M
        if(ABS(i4_tmp) > i4_max_rp[3])
878
113k
            i4_max_rp[3] = i4_tmp;
879
880
2.84M
        i4_tmp = HI_PASS(i4_csum, i);
881
2.84M
        if(ABS(i4_tmp) > i4_max_cp[3])
882
106k
            i4_max_cp[3] = i4_tmp;
883
2.84M
    }
884
885
    /* Find the maximum across the 3 and see whether the strength qualifies as edge */
886
947k
    i4_max_ridx = 1;
887
947k
    i4_max_cidx = 1;
888
2.84M
    for(i = 2; i <= 3; i++)
889
1.89M
    {
890
1.89M
        if(i4_max_rp[i] > i4_max_rp[i4_max_ridx])
891
42.6k
            i4_max_ridx = i;
892
893
1.89M
        if(i4_max_cp[i] > i4_max_cp[i4_max_cidx])
894
54.9k
            i4_max_cidx = i;
895
1.89M
    }
896
897
947k
    if(EDGE_THR > i4_max_rp[i4_max_ridx])
898
902k
    {
899
902k
        i4_max_ridx = 0;
900
902k
    }
901
902
947k
    if(EDGE_THR > i4_max_cp[i4_max_cidx])
903
907k
    {
904
907k
        i4_max_cidx = 0;
905
907k
    }
906
907
947k
    i4_ret = ENABLE_2Nx2N;
908
909
    /* If only vertical discontinuity, go with one of 2Nx? */
910
947k
    if(0 == (i4_max_ridx + i4_max_cidx))
911
884k
    {
912
        //num_me_parts++;
913
884k
        return i4_ret;
914
884k
    }
915
916
62.0k
    if(i4_max_ridx && (i4_max_cidx == 0))
917
22.6k
    {
918
        //num_me_parts += 3;
919
22.6k
        return ((i4_ret | i4_seg_lutr[i4_max_ridx]));
920
22.6k
    }
921
922
    /* If only horizontal discontinuity, go with one of ?x2N */
923
39.4k
    if(i4_max_cidx && (i4_max_ridx == 0))
924
17.5k
    {
925
        //num_me_parts += 3;
926
17.5k
        return ((i4_ret | i4_seg_lutc[i4_max_cidx]));
927
17.5k
    }
928
929
    /* If middle is dominant in both directions, go with NxN */
930
21.8k
    if((2 == i4_max_cidx) && (2 == i4_max_ridx))
931
2.20k
    {
932
        //num_me_parts += 5;
933
2.20k
        return ((i4_ret | ENABLE_NxN));
934
2.20k
    }
935
936
    /* Otherwise, conservatively, enable NxN and the 2 AMPs */
937
    //num_me_parts += 9;
938
19.6k
    return (i4_ret | ENABLE_NxN | i4_seg_lutr[i4_max_ridx] | i4_seg_lutc[i4_max_cidx]);
939
21.8k
}
940
941
/**
942
********************************************************************************
943
*  @fn     hme_init_search_results(search_results_t *ps_search_results,
944
*                           S32 i4_num_ref,
945
*                           S32 i4_num_best_results,
946
*                           S32 i4_num_results_per_part,
947
*                           BLK_SIZE_T e_blk_size,
948
*                           S32 i4_x_off,
949
*                           S32 i4_y_off)
950
*
951
*  @brief  Initializes the search results structure with some key attributes
952
*
953
*  @param[out] ps_search_results : search results structure to initialise
954
*
955
*  @param[in] i4_num_Ref: corresponds to the number of ref ids searched
956
*
957
*  @param[in] i4_num_best_results: Number of best results for the CU to
958
*               be maintained in the result structure
959
*
960
*  @param[in] i4_num_results_per_part: Per active partition the number of best
961
*               results to be maintained
962
*
963
*  @param[in] e_blk_size: blk size of the CU for which this structure used
964
*
965
*  @param[in] i4_x_off: x offset of the top left of CU from CTB top left
966
*
967
*  @param[in] i4_y_off: y offset of the top left of CU from CTB top left
968
*
969
*  @param[in] pu1_is_past : points ot an array that tells whether a given ref id
970
*              has prominence in L0 or in L1 list (past or future )
971
*
972
*  @return void
973
********************************************************************************
974
*/
975
void hme_init_search_results(
976
    search_results_t *ps_search_results,
977
    S32 i4_num_ref,
978
    S32 i4_num_best_results,
979
    S32 i4_num_results_per_part,
980
    BLK_SIZE_T e_blk_size,
981
    S32 i4_x_off,
982
    S32 i4_y_off,
983
    U08 *pu1_is_past)
984
2.05M
{
985
2.05M
    CU_SIZE_T e_cu_size = ge_blk_size_to_cu_size[e_blk_size];
986
987
2.05M
    ASSERT(e_cu_size != -1);
988
2.05M
    ps_search_results->e_cu_size = e_cu_size;
989
2.05M
    ps_search_results->u1_x_off = (U08)i4_x_off;
990
2.05M
    ps_search_results->u1_y_off = (U08)i4_y_off;
991
2.05M
    ps_search_results->u1_num_active_ref = (U08)i4_num_ref;
992
2.05M
    ps_search_results->u1_num_best_results = (U08)i4_num_best_results;
993
2.05M
    ps_search_results->u1_num_results_per_part = (U08)i4_num_results_per_part;
994
2.05M
    ps_search_results->pu1_is_past = pu1_is_past;
995
2.05M
    ps_search_results->u1_split_flag = 0;
996
2.05M
    ps_search_results->best_cu_cost = MAX_32BIT_VAL;
997
2.05M
}
998
999
/**
1000
********************************************************************************
1001
*  @fn     hme_reset_search_results((search_results_t *ps_search_results,
1002
*                               S32 i4_part_mask)
1003
*
1004
*
1005
*  @brief  Resets the best results to maximum values, so as to allow search
1006
*          for the new CU's partitions. The existing results may be from an
1007
*          older CU using same structure.
1008
*
1009
*  @param[in] ps_search_results: search results structure
1010
*
1011
*  @param[in] i4_part_mask : bit mask of active partitions
1012
*
1013
*  @return part mask (bit mask of active partitions to search)
1014
********************************************************************************
1015
*/
1016
void hme_reset_search_results(search_results_t *ps_search_results, S32 i4_part_mask, S32 mv_res)
1017
6.04M
{
1018
6.04M
    S32 i4_num_ref = (S32)ps_search_results->u1_num_active_ref;
1019
6.04M
    S08 i1_ref_idx;
1020
6.04M
    S32 i, j;
1021
6.04M
    search_node_t *ps_search_node;
1022
1023
    /* store this for future use */
1024
6.04M
    ps_search_results->i4_part_mask = i4_part_mask;
1025
1026
    /* Reset the spli_flag to zero */
1027
6.04M
    ps_search_results->u1_split_flag = 0;
1028
1029
6.04M
    HME_SET_MVPRED_RES((&ps_search_results->as_pred_ctxt[0]), mv_res);
1030
6.04M
    HME_SET_MVPRED_RES((&ps_search_results->as_pred_ctxt[1]), mv_res);
1031
1032
16.8M
    for(i1_ref_idx = 0; i1_ref_idx < i4_num_ref; i1_ref_idx++)
1033
10.7M
    {
1034
        /* Reset the individual partitino results */
1035
193M
        for(i = 0; i < TOT_NUM_PARTS; i++)
1036
183M
        {
1037
183M
            if(!(i4_part_mask & (1 << i)))
1038
144M
                continue;
1039
1040
39.0M
            ps_search_node = ps_search_results->aps_part_results[i1_ref_idx][i];
1041
1042
80.9M
            for(j = 0; j < ps_search_results->u1_num_results_per_part; j++)
1043
41.8M
            {
1044
41.8M
                ps_search_node[j].s_mv.i2_mvx = 0;
1045
41.8M
                ps_search_node[j].s_mv.i2_mvy = 0;
1046
41.8M
                ps_search_node[j].i4_tot_cost = MAX_32BIT_VAL;
1047
41.8M
                ps_search_node[j].i4_sad = MAX_32BIT_VAL;
1048
41.8M
                ps_search_node[j].i4_sdi = 0;
1049
41.8M
                ps_search_node[j].i1_ref_idx = -1;
1050
41.8M
                ps_search_node[j].u1_subpel_done = 0;
1051
41.8M
                ps_search_node[j].u1_is_avail = 1;
1052
41.8M
                ps_search_node[j].i4_mv_cost = 0;
1053
41.8M
            }
1054
39.0M
        }
1055
10.7M
    }
1056
6.04M
}
1057
/**
1058
********************************************************************************
1059
*  @fn     hme_clamp_grid_by_mvrange(search_node_t *ps_search_node,
1060
*                               S32 i4_step,
1061
*                               range_prms_t *ps_mvrange)
1062
*
1063
*  @brief  Given a central pt within mv range, and a grid of points surrounding
1064
*           this pt, this function returns a grid mask of pts within search rng
1065
*
1066
*  @param[in] ps_search_node: the centre pt of the grid
1067
*
1068
*  @param[in] i4_step: step size of grid
1069
*
1070
*  @param[in] ps_mvrange: structure containing the current mv range
1071
*
1072
*  @return bitmask of the  pts in grid within search range
1073
********************************************************************************
1074
*/
1075
S32 hme_clamp_grid_by_mvrange(search_node_t *ps_search_node, S32 i4_step, range_prms_t *ps_mvrange)
1076
40.5M
{
1077
40.5M
    S32 i4_mask = GRID_ALL_PTS_VALID;
1078
40.5M
    if(ps_search_node->s_mv.i2_mvx + i4_step >= ps_mvrange->i2_max_x)
1079
8.11k
    {
1080
8.11k
        i4_mask &= (GRID_RT_3_INVALID);
1081
8.11k
    }
1082
40.5M
    if(ps_search_node->s_mv.i2_mvx - i4_step < ps_mvrange->i2_min_x)
1083
347k
    {
1084
347k
        i4_mask &= (GRID_LT_3_INVALID);
1085
347k
    }
1086
40.5M
    if(ps_search_node->s_mv.i2_mvy + i4_step >= ps_mvrange->i2_max_y)
1087
186k
    {
1088
186k
        i4_mask &= (GRID_BOT_3_INVALID);
1089
186k
    }
1090
40.5M
    if(ps_search_node->s_mv.i2_mvy - i4_step < ps_mvrange->i2_min_y)
1091
377k
    {
1092
377k
        i4_mask &= (GRID_TOP_3_INVALID);
1093
377k
    }
1094
40.5M
    return i4_mask;
1095
40.5M
}
1096
1097
/**
1098
********************************************************************************
1099
*  @fn    layer_ctxt_t *hme_get_past_layer_ctxt(me_ctxt_t *ps_ctxt,
1100
S32 i4_layer_id)
1101
*
1102
*  @brief  returns the layer ctxt of the layer with given id from the temporally
1103
*          previous frame
1104
*
1105
*  @param[in] ps_ctxt : ME context
1106
*
1107
*  @param[in] i4_layer_id : id of layer required
1108
*
1109
*  @return layer ctxt of given layer id in temporally previous frame
1110
********************************************************************************
1111
*/
1112
layer_ctxt_t *hme_get_past_layer_ctxt(
1113
    me_ctxt_t *ps_ctxt, me_frm_ctxt_t *ps_frm_ctxt, S32 i4_layer_id, S32 i4_num_me_frm_pllel)
1114
85.3k
{
1115
85.3k
    S32 i4_poc = ps_frm_ctxt->ai4_ref_idx_to_poc_lc[0];
1116
85.3k
    S32 i;
1117
85.3k
    layers_descr_t *ps_desc;
1118
1119
238k
    for(i = 0; i < (ps_ctxt->aps_me_frm_prms[0]->max_num_ref * i4_num_me_frm_pllel) + 1; i++)
1120
238k
    {
1121
238k
        ps_desc = &ps_ctxt->as_ref_descr[i];
1122
238k
        if(i4_poc == ps_desc->aps_layers[i4_layer_id]->i4_poc)
1123
85.3k
            return (ps_desc->aps_layers[i4_layer_id]);
1124
238k
    }
1125
0
    return NULL;
1126
85.3k
}
1127
1128
/**
1129
********************************************************************************
1130
*  @fn    layer_ctxt_t *hme_coarse_get_past_layer_ctxt(me_ctxt_t *ps_ctxt,
1131
S32 i4_layer_id)
1132
*
1133
*  @brief  returns the layer ctxt of the layer with given id from the temporally
1134
*          previous frame
1135
*
1136
*  @param[in] ps_ctxt : ME context
1137
*
1138
*  @param[in] i4_layer_id : id of layer required
1139
*
1140
*  @return layer ctxt of given layer id in temporally previous frame
1141
********************************************************************************
1142
*/
1143
layer_ctxt_t *hme_coarse_get_past_layer_ctxt(coarse_me_ctxt_t *ps_ctxt, S32 i4_layer_id)
1144
128k
{
1145
128k
    S32 i4_poc = ps_ctxt->ai4_ref_idx_to_poc_lc[0];
1146
128k
    S32 i;
1147
128k
    layers_descr_t *ps_desc;
1148
1149
415k
    for(i = 0; i < ps_ctxt->max_num_ref + 1 + NUM_BUFS_DECOMP_HME; i++)
1150
400k
    {
1151
400k
        ps_desc = &ps_ctxt->as_ref_descr[i];
1152
400k
        if(i4_poc == ps_desc->aps_layers[i4_layer_id]->i4_poc)
1153
113k
            return (ps_desc->aps_layers[i4_layer_id]);
1154
400k
    }
1155
14.6k
    return NULL;
1156
128k
}
1157
1158
/**
1159
********************************************************************************
1160
*  @fn    void hme_init_mv_bank(layer_ctxt_t *ps_layer_ctxt,
1161
BLK_SIZE_T e_blk_size,
1162
S32 i4_num_ref,
1163
S32 i4_num_results_per_part)
1164
*
1165
*  @brief  Given a blk size to be used for this layer, this function initialize
1166
*          the mv bank to make it ready to store and return results.
1167
*
1168
*  @param[in, out] ps_layer_ctxt: pointer to layer ctxt
1169
*
1170
*  @param[in] e_blk_size : resolution at which mvs are stored
1171
*
1172
*  @param[in] i4_num_ref: number of reference frames corresponding to which
1173
*              results are stored.
1174
*
1175
*  @param[in] e_blk_size : resolution at which mvs are stored
1176
*
1177
*  @param[in] i4_num_results_per_part : Number of results to be stored per
1178
*               ref idx. So these many best results stored
1179
*
1180
*  @return void
1181
********************************************************************************
1182
*/
1183
void hme_init_mv_bank(
1184
    layer_ctxt_t *ps_layer_ctxt,
1185
    BLK_SIZE_T e_blk_size,
1186
    S32 i4_num_ref,
1187
    S32 i4_num_results_per_part,
1188
    U08 u1_enc)
1189
385k
{
1190
385k
    layer_mv_t *ps_mv_bank;
1191
385k
    hme_mv_t *ps_mv1, *ps_mv2;
1192
385k
    S08 *pi1_ref_id1, *pi1_ref_id2;
1193
385k
    S32 blk_wd, mvs_in_blk, blks_in_row, mvs_in_row, blks_in_col;
1194
385k
    S32 i4_i, i4_j, blk_ht;
1195
1196
385k
    ps_mv_bank = ps_layer_ctxt->ps_layer_mvbank;
1197
385k
    ps_mv_bank->i4_num_mvs_per_ref = i4_num_results_per_part;
1198
385k
    ps_mv_bank->i4_num_ref = i4_num_ref;
1199
385k
    mvs_in_blk = i4_num_ref * i4_num_results_per_part;
1200
385k
    ps_mv_bank->i4_num_mvs_per_blk = mvs_in_blk;
1201
1202
    /*************************************************************************/
1203
    /* Store blk size, from blk size derive blk width and use this to compute*/
1204
    /* number of blocks every row. We also pad to left and top by 1, to      */
1205
    /* support the prediction mechanism.                                     */
1206
    /*************************************************************************/
1207
385k
    ps_mv_bank->e_blk_size = e_blk_size;
1208
385k
    blk_wd = gau1_blk_size_to_wd[e_blk_size];
1209
385k
    blk_ht = gau1_blk_size_to_ht[e_blk_size];
1210
1211
385k
    blks_in_row = (ps_layer_ctxt->i4_wd + (blk_wd - 1)) / blk_wd;
1212
385k
    blks_in_col = (ps_layer_ctxt->i4_ht + (blk_ht - 1)) / blk_ht;
1213
1214
385k
    if(u1_enc)
1215
128k
    {
1216
        /* TODO: CTB64x64 is assumed. FIX according to actual CTB */
1217
128k
        WORD32 num_ctb_cols = ((ps_layer_ctxt->i4_wd + 63) >> 6);
1218
128k
        WORD32 num_ctb_rows = ((ps_layer_ctxt->i4_ht + 63) >> 6);
1219
1220
128k
        blks_in_row = (num_ctb_cols << 3);
1221
128k
        blks_in_col = (num_ctb_rows << 3);
1222
128k
    }
1223
1224
385k
    blks_in_row += 2;
1225
385k
    mvs_in_row = blks_in_row * mvs_in_blk;
1226
1227
385k
    ps_mv_bank->i4_num_blks_per_row = blks_in_row;
1228
385k
    ps_mv_bank->i4_num_mvs_per_row = mvs_in_row;
1229
1230
    /* To ensure run time requirements fall within allocation time request */
1231
385k
    ASSERT(ps_mv_bank->i4_num_mvs_per_row <= ps_mv_bank->max_num_mvs_per_row);
1232
1233
    /*************************************************************************/
1234
    /* Increment by one full row at top for padding and one column in left   */
1235
    /* this gives us the actual start of mv for 0,0 blk                      */
1236
    /*************************************************************************/
1237
385k
    ps_mv_bank->ps_mv = ps_mv_bank->ps_mv_base + mvs_in_row + mvs_in_blk;
1238
385k
    ps_mv_bank->pi1_ref_idx = ps_mv_bank->pi1_ref_idx_base + mvs_in_row + mvs_in_blk;
1239
1240
385k
    memset(ps_mv_bank->ps_mv_base, 0, mvs_in_row * sizeof(hme_mv_t));
1241
385k
    memset(ps_mv_bank->pi1_ref_idx_base, -1, mvs_in_row * sizeof(U08));
1242
1243
    /*************************************************************************/
1244
    /* Initialize top row, left col and right col with zeros since these are */
1245
    /* used as candidates during searches.                                   */
1246
    /*************************************************************************/
1247
385k
    ps_mv1 = ps_mv_bank->ps_mv_base + mvs_in_row;
1248
385k
    ps_mv2 = ps_mv1 + mvs_in_row - mvs_in_blk;
1249
385k
    pi1_ref_id1 = ps_mv_bank->pi1_ref_idx_base + mvs_in_row;
1250
385k
    pi1_ref_id2 = pi1_ref_id1 + mvs_in_row - mvs_in_blk;
1251
3.01M
    for(i4_i = 0; i4_i < blks_in_col; i4_i++)
1252
2.62M
    {
1253
10.4M
        for(i4_j = 0; i4_j < mvs_in_blk; i4_j++)
1254
7.84M
        {
1255
7.84M
            ps_mv1[i4_j].i2_mv_x = 0;
1256
7.84M
            ps_mv1[i4_j].i2_mv_y = 0;
1257
7.84M
            ps_mv2[i4_j].i2_mv_x = 0;
1258
7.84M
            ps_mv2[i4_j].i2_mv_y = 0;
1259
7.84M
            pi1_ref_id1[i4_j] = -1;
1260
7.84M
            pi1_ref_id2[i4_j] = -1;
1261
7.84M
        }
1262
2.62M
        ps_mv1 += mvs_in_row;
1263
2.62M
        ps_mv2 += mvs_in_row;
1264
2.62M
        pi1_ref_id1 += mvs_in_row;
1265
2.62M
        pi1_ref_id2 += mvs_in_row;
1266
2.62M
    }
1267
385k
}
1268
void hme_fill_mvbank_intra(layer_ctxt_t *ps_layer_ctxt)
1269
128k
{
1270
128k
    layer_mv_t *ps_mv_bank;
1271
128k
    hme_mv_t *ps_mv;
1272
128k
    S08 *pi1_ref_id;
1273
128k
    S32 blk_wd, blks_in_row, mvs_in_row, blks_in_col;
1274
128k
    S32 i, j, blk_ht;
1275
128k
    BLK_SIZE_T e_blk_size;
1276
1277
128k
    ps_mv_bank = ps_layer_ctxt->ps_layer_mvbank;
1278
1279
    /*************************************************************************/
1280
    /* Store blk size, from blk size derive blk width and use this to compute*/
1281
    /* number of blocks every row. We also pad to left and top by 1, to      */
1282
    /* support the prediction mechanism.                                     */
1283
    /*************************************************************************/
1284
128k
    e_blk_size = ps_mv_bank->e_blk_size;
1285
128k
    blk_wd = gau1_blk_size_to_wd[e_blk_size];
1286
128k
    blk_ht = gau1_blk_size_to_wd[e_blk_size];
1287
128k
    blks_in_row = ps_layer_ctxt->i4_wd / blk_wd;
1288
128k
    blks_in_col = ps_layer_ctxt->i4_ht / blk_ht;
1289
128k
    mvs_in_row = blks_in_row * ps_mv_bank->i4_num_mvs_per_blk;
1290
1291
    /*************************************************************************/
1292
    /* Increment by one full row at top for padding and one column in left   */
1293
    /* this gives us the actual start of mv for 0,0 blk                      */
1294
    /*************************************************************************/
1295
128k
    ps_mv = ps_mv_bank->ps_mv;
1296
128k
    pi1_ref_id = ps_mv_bank->pi1_ref_idx;
1297
1298
1.06M
    for(i = 0; i < blks_in_col; i++)
1299
934k
    {
1300
14.9M
        for(j = 0; j < blks_in_row; j++)
1301
13.9M
        {
1302
13.9M
            ps_mv[j].i2_mv_x = INTRA_MV;
1303
13.9M
            ps_mv[j].i2_mv_y = INTRA_MV;
1304
13.9M
            pi1_ref_id[j] = -1;
1305
13.9M
        }
1306
934k
        ps_mv += ps_mv_bank->i4_num_mvs_per_row;
1307
934k
        pi1_ref_id += ps_mv_bank->i4_num_mvs_per_row;
1308
934k
    }
1309
128k
}
1310
1311
/**
1312
********************************************************************************
1313
*  @fn    void hme_derive_search_range(range_prms_t *ps_range,
1314
*                                   range_prms_t *ps_pic_limit,
1315
*                                   range_prms_t *ps_mv_limit,
1316
*                                   S32 i4_x,
1317
*                                   S32 i4_y,
1318
*                                   S32 blk_wd,
1319
*                                   S32 blk_ht)
1320
*
1321
*  @brief  given picture limits and blk dimensions and mv search limits, obtains
1322
*          teh valid search range such that the blk stays within pic boundaries,
1323
*          where picture boundaries include padded portions of picture
1324
*
1325
*  @param[out] ps_range: updated with actual search range
1326
*
1327
*  @param[in] ps_pic_limit : picture boundaries
1328
*
1329
*  @param[in] ps_mv_limit: Search range limits for the mvs
1330
*
1331
*  @param[in] i4_x : x coordinate of the blk
1332
*
1333
*  @param[in] i4_y : y coordinate of the blk
1334
*
1335
*  @param[in] blk_wd : blk width
1336
*
1337
*  @param[in] blk_ht : blk height
1338
*
1339
*  @return void
1340
********************************************************************************
1341
*/
1342
void hme_derive_search_range(
1343
    range_prms_t *ps_range,
1344
    range_prms_t *ps_pic_limit,
1345
    range_prms_t *ps_mv_limit,
1346
    S32 i4_x,
1347
    S32 i4_y,
1348
    S32 blk_wd,
1349
    S32 blk_ht)
1350
14.0M
{
1351
14.0M
    ps_range->i2_max_x =
1352
14.0M
        MIN((ps_pic_limit->i2_max_x - (S16)blk_wd - (S16)i4_x), ps_mv_limit->i2_max_x);
1353
14.0M
    ps_range->i2_min_x = MAX((ps_pic_limit->i2_min_x - (S16)i4_x), ps_mv_limit->i2_min_x);
1354
14.0M
    ps_range->i2_max_y =
1355
14.0M
        MIN((ps_pic_limit->i2_max_y - (S16)blk_ht - (S16)i4_y), ps_mv_limit->i2_max_y);
1356
14.0M
    ps_range->i2_min_y = MAX((ps_pic_limit->i2_min_y - (S16)i4_y), ps_mv_limit->i2_min_y);
1357
14.0M
}
1358
1359
/**
1360
********************************************************************************
1361
*  @fn    void hme_get_spatial_candt(search_node_t *ps_search_node,
1362
*                                   layer_ctxt_t *ps_curr_layer,
1363
*                                   S32 i4_blk_x,
1364
*                                   S32 i4_blk_y,
1365
*                                   S08 i1_ref_id,
1366
*                                   S32 i4_result_id)
1367
*
1368
*  @brief  obtains a candt from the same mv bank as the current one, its called
1369
*          spatial candt as it does not require scaling for temporal distances
1370
*
1371
*  @param[out] ps_search_node: mv and ref id updated here of the candt
1372
*
1373
*  @param[in] ps_curr_layer: layer ctxt, has the mv bank structure pointer
1374
*
1375
*  @param[in] i4_blk_x : x coordinate of the block in mv bank
1376
*
1377
*  @param[in] i4_blk_y : y coordinate of the block in mv bank
1378
*
1379
*  @param[in] i1_ref_id : Corresponds to ref idx from which to pick up mv
1380
*              results, useful if multiple ref idx candts maintained separately.
1381
*
1382
*  @param[in] i4_result_id : If multiple results stored per ref idx, this
1383
*              pts to the id of the result
1384
*
1385
*  @param[in] tr_avail : top right availability of the block
1386
*
1387
*  @param[in] bl_avail : bottom left availability of the block
1388
*
1389
*  @return void
1390
********************************************************************************
1391
*/
1392
void hme_get_spatial_candt(
1393
    layer_ctxt_t *ps_curr_layer,
1394
    BLK_SIZE_T e_search_blk_size,
1395
    S32 i4_blk_x,
1396
    S32 i4_blk_y,
1397
    S08 i1_ref_idx,
1398
    search_node_t *ps_top_neighbours,
1399
    search_node_t *ps_left_neighbours,
1400
    S32 i4_result_id,
1401
    S32 tr_avail,
1402
    S32 bl_avail,
1403
    S32 encode)
1404
1405
3.33M
{
1406
3.33M
    layer_mv_t *ps_layer_mvbank = ps_curr_layer->ps_layer_mvbank;
1407
3.33M
    S32 i4_blk_size1 = gau1_blk_size_to_wd[ps_layer_mvbank->e_blk_size];
1408
3.33M
    S32 i4_blk_size2 = gau1_blk_size_to_wd[e_search_blk_size];
1409
3.33M
    search_node_t *ps_search_node;
1410
3.33M
    S32 i4_offset;
1411
3.33M
    hme_mv_t *ps_mv, *ps_mv_base;
1412
3.33M
    S08 *pi1_ref_idx, *pi1_ref_idx_base;
1413
3.33M
    S32 jump = 1, mvs_in_blk, mvs_in_row;
1414
3.33M
    S32 shift = (encode ? 2 : 0);
1415
1416
3.33M
    if(i4_blk_size1 != i4_blk_size2)
1417
17.6k
    {
1418
17.6k
        i4_blk_x <<= 1;
1419
17.6k
        i4_blk_y <<= 1;
1420
17.6k
        jump = 2;
1421
17.6k
        if((i4_blk_size1 << 2) == i4_blk_size2)
1422
0
        {
1423
0
            i4_blk_x <<= 1;
1424
0
            i4_blk_y <<= 1;
1425
0
            jump = 4;
1426
0
        }
1427
17.6k
    }
1428
1429
3.33M
    mvs_in_blk = ps_layer_mvbank->i4_num_mvs_per_blk;
1430
3.33M
    mvs_in_row = ps_layer_mvbank->i4_num_mvs_per_row;
1431
1432
    /* Adjust teh blk coord to point to top left locn */
1433
3.33M
    i4_blk_x -= 1;
1434
3.33M
    i4_blk_y -= 1;
1435
    /* Pick up the mvs from the location */
1436
3.33M
    i4_offset = (i4_blk_x * ps_layer_mvbank->i4_num_mvs_per_blk);
1437
3.33M
    i4_offset += (ps_layer_mvbank->i4_num_mvs_per_row * i4_blk_y);
1438
1439
3.33M
    ps_mv = ps_layer_mvbank->ps_mv + i4_offset;
1440
3.33M
    pi1_ref_idx = ps_layer_mvbank->pi1_ref_idx + i4_offset;
1441
1442
3.33M
    ps_mv += (i1_ref_idx * ps_layer_mvbank->i4_num_mvs_per_ref) + i4_result_id;
1443
3.33M
    pi1_ref_idx += (i1_ref_idx * ps_layer_mvbank->i4_num_mvs_per_ref) + i4_result_id;
1444
1445
3.33M
    ps_mv_base = ps_mv;
1446
3.33M
    pi1_ref_idx_base = pi1_ref_idx;
1447
1448
    /* ps_mv and pi1_ref_idx now point to the top left locn */
1449
    /* Get 4 mvs as follows:                                */
1450
3.33M
    ps_search_node = ps_top_neighbours;
1451
3.33M
    COPY_MV_TO_SEARCH_NODE(ps_search_node, ps_mv, pi1_ref_idx, i1_ref_idx, shift);
1452
1453
    /* Move to top */
1454
3.33M
    ps_search_node++;
1455
3.33M
    ps_mv += mvs_in_blk;
1456
3.33M
    pi1_ref_idx += mvs_in_blk;
1457
3.33M
    COPY_MV_TO_SEARCH_NODE(ps_search_node, ps_mv, pi1_ref_idx, i1_ref_idx, shift);
1458
1459
    /* Move to t1 : relevant for 4x4 part searches or for partitions i 16x16 */
1460
3.33M
    if(ps_layer_mvbank->i4_num_mvs_per_ref > 1)
1461
3.33M
    {
1462
3.33M
        ps_search_node++;
1463
3.33M
        ps_mv += (mvs_in_blk * (jump >> 1));
1464
3.33M
        pi1_ref_idx += (mvs_in_blk * (jump >> 1));
1465
3.33M
        COPY_MV_TO_SEARCH_NODE(ps_search_node, ps_mv, pi1_ref_idx, i1_ref_idx, shift);
1466
3.33M
    }
1467
0
    else
1468
0
    {
1469
0
        ps_search_node++;
1470
0
        ps_search_node->s_mv.i2_mvx = 0;
1471
0
        ps_search_node->s_mv.i2_mvy = 0;
1472
0
        ps_search_node->i1_ref_idx = i1_ref_idx;
1473
0
        ps_search_node->u1_is_avail = 0;
1474
0
        ps_search_node->u1_subpel_done = 0;
1475
0
    }
1476
1477
    /* Move to tr: this will be tr w.r.t. the blk being searched */
1478
3.33M
    ps_search_node++;
1479
3.33M
    if(tr_avail == 0)
1480
0
    {
1481
0
        ps_search_node->s_mv.i2_mvx = 0;
1482
0
        ps_search_node->s_mv.i2_mvy = 0;
1483
0
        ps_search_node->i1_ref_idx = i1_ref_idx;
1484
0
        ps_search_node->u1_is_avail = 0;
1485
0
        ps_search_node->u1_subpel_done = 0;
1486
0
    }
1487
3.33M
    else
1488
3.33M
    {
1489
3.33M
        ps_mv = ps_mv_base + (mvs_in_blk * (1 + jump));
1490
3.33M
        pi1_ref_idx = pi1_ref_idx_base + (mvs_in_blk * (1 + jump));
1491
3.33M
        COPY_MV_TO_SEARCH_NODE(ps_search_node, ps_mv, pi1_ref_idx, i1_ref_idx, shift);
1492
3.33M
    }
1493
1494
    /* Move to left */
1495
3.33M
    ps_search_node = ps_left_neighbours;
1496
3.33M
    ps_mv = ps_mv_base + mvs_in_row;
1497
3.33M
    pi1_ref_idx = pi1_ref_idx_base + mvs_in_row;
1498
3.33M
    COPY_MV_TO_SEARCH_NODE(ps_search_node, ps_mv, pi1_ref_idx, i1_ref_idx, shift);
1499
1500
    /* Move to l1 */
1501
3.33M
    if(ps_layer_mvbank->i4_num_mvs_per_ref > 1)
1502
3.33M
    {
1503
3.33M
        ps_search_node++;
1504
3.33M
        ps_mv += (mvs_in_row * (jump >> 1));
1505
3.33M
        pi1_ref_idx += (mvs_in_row * (jump >> 1));
1506
3.33M
        COPY_MV_TO_SEARCH_NODE(ps_search_node, ps_mv, pi1_ref_idx, i1_ref_idx, shift);
1507
3.33M
    }
1508
0
    else
1509
0
    {
1510
0
        ps_search_node++;
1511
0
        ps_search_node->s_mv.i2_mvx = 0;
1512
0
        ps_search_node->s_mv.i2_mvy = 0;
1513
0
        ps_search_node->i1_ref_idx = i1_ref_idx;
1514
0
        ps_search_node->u1_is_avail = 0;
1515
0
        ps_search_node->u1_subpel_done = 0;
1516
0
    }
1517
1518
    /* Move to bl */
1519
3.33M
    ps_search_node++;
1520
3.33M
    if(bl_avail == 0)
1521
3.33M
    {
1522
3.33M
        ps_search_node->s_mv.i2_mvx = 0;
1523
3.33M
        ps_search_node->s_mv.i2_mvy = 0;
1524
3.33M
        ps_search_node->i1_ref_idx = i1_ref_idx;
1525
3.33M
        ps_search_node->u1_is_avail = 0;
1526
3.33M
    }
1527
0
    else
1528
0
    {
1529
0
        ps_mv = ps_mv_base + (mvs_in_row * (1 + jump));
1530
0
        pi1_ref_idx = pi1_ref_idx_base + (mvs_in_row * (1 + jump));
1531
0
        COPY_MV_TO_SEARCH_NODE(ps_search_node, ps_mv, pi1_ref_idx, i1_ref_idx, shift);
1532
0
    }
1533
3.33M
}
1534
1535
void hme_get_spatial_candt_in_l1_me(
1536
    layer_ctxt_t *ps_curr_layer,
1537
    BLK_SIZE_T e_search_blk_size,
1538
    S32 i4_blk_x,
1539
    S32 i4_blk_y,
1540
    S08 i1_ref_idx,
1541
    U08 u1_pred_dir,
1542
    search_node_t *ps_top_neighbours,
1543
    search_node_t *ps_left_neighbours,
1544
    S32 i4_result_id,
1545
    S32 tr_avail,
1546
    S32 bl_avail,
1547
    S32 i4_num_act_ref_l0,
1548
    S32 i4_num_act_ref_l1)
1549
1.63M
{
1550
1.63M
    search_node_t *ps_search_node;
1551
1.63M
    hme_mv_t *ps_mv, *ps_mv_base;
1552
1553
1.63M
    S32 i4_offset;
1554
1.63M
    S32 mvs_in_blk, mvs_in_row;
1555
1.63M
    S08 *pi1_ref_idx, *pi1_ref_idx_base;
1556
1.63M
    S32 i4_mv_pos_in_implicit_array;
1557
1558
1.63M
    layer_mv_t *ps_layer_mvbank = ps_curr_layer->ps_layer_mvbank;
1559
1560
1.63M
    S32 i4_blk_size1 = gau1_blk_size_to_wd[ps_layer_mvbank->e_blk_size];
1561
1.63M
    S32 i4_blk_size2 = gau1_blk_size_to_wd[e_search_blk_size];
1562
1.63M
    S32 jump = 1;
1563
1.63M
    S32 shift = 0;
1564
1.63M
    S32 i4_num_results_in_given_dir =
1565
1.63M
        ((u1_pred_dir == 1) ? (ps_layer_mvbank->i4_num_mvs_per_ref * i4_num_act_ref_l1)
1566
1.63M
                            : (ps_layer_mvbank->i4_num_mvs_per_ref * i4_num_act_ref_l0));
1567
1568
1.63M
    if(i4_blk_size1 != i4_blk_size2)
1569
1.58M
    {
1570
1.58M
        i4_blk_x <<= 1;
1571
1.58M
        i4_blk_y <<= 1;
1572
1.58M
        jump = 2;
1573
1.58M
        if((i4_blk_size1 << 2) == i4_blk_size2)
1574
0
        {
1575
0
            i4_blk_x <<= 1;
1576
0
            i4_blk_y <<= 1;
1577
0
            jump = 4;
1578
0
        }
1579
1.58M
    }
1580
1581
1.63M
    mvs_in_blk = ps_layer_mvbank->i4_num_mvs_per_blk;
1582
1.63M
    mvs_in_row = ps_layer_mvbank->i4_num_mvs_per_row;
1583
1584
    /* Adjust the blk coord to point to top left locn */
1585
1.63M
    i4_blk_x -= 1;
1586
1.63M
    i4_blk_y -= 1;
1587
    /* Pick up the mvs from the location */
1588
1.63M
    i4_offset = (i4_blk_x * ps_layer_mvbank->i4_num_mvs_per_blk);
1589
1.63M
    i4_offset += (ps_layer_mvbank->i4_num_mvs_per_row * i4_blk_y);
1590
1591
1.63M
    i4_offset +=
1592
1.63M
        ((u1_pred_dir == 1) ? (ps_layer_mvbank->i4_num_mvs_per_ref * i4_num_act_ref_l0) : 0);
1593
1594
1.63M
    ps_mv = ps_layer_mvbank->ps_mv + i4_offset;
1595
1.63M
    pi1_ref_idx = ps_layer_mvbank->pi1_ref_idx + i4_offset;
1596
1597
1.63M
    ps_mv_base = ps_mv;
1598
1.63M
    pi1_ref_idx_base = pi1_ref_idx;
1599
1600
    /* TL */
1601
1.63M
    {
1602
        /* ps_mv and pi1_ref_idx now point to the top left locn */
1603
1.63M
        ps_search_node = ps_top_neighbours;
1604
1605
1.63M
        i4_mv_pos_in_implicit_array = hme_find_pos_of_implicitly_stored_ref_id(
1606
1.63M
            pi1_ref_idx, i1_ref_idx, i4_result_id, i4_num_results_in_given_dir);
1607
1608
1.63M
        if(-1 != i4_mv_pos_in_implicit_array)
1609
714k
        {
1610
714k
            COPY_MV_TO_SEARCH_NODE(
1611
714k
                ps_search_node,
1612
714k
                &ps_mv[i4_mv_pos_in_implicit_array],
1613
714k
                &pi1_ref_idx[i4_mv_pos_in_implicit_array],
1614
714k
                i1_ref_idx,
1615
714k
                shift);
1616
714k
        }
1617
921k
        else
1618
921k
        {
1619
921k
            ps_search_node->u1_is_avail = 0;
1620
921k
            ps_search_node->s_mv.i2_mvx = 0;
1621
921k
            ps_search_node->s_mv.i2_mvy = 0;
1622
921k
            ps_search_node->i1_ref_idx = i1_ref_idx;
1623
921k
        }
1624
1.63M
    }
1625
1626
    /* Move to top */
1627
1.63M
    {
1628
        /* ps_mv and pi1_ref_idx now point to the top left locn */
1629
1.63M
        ps_search_node++;
1630
1.63M
        ps_mv += mvs_in_blk;
1631
1.63M
        pi1_ref_idx += mvs_in_blk;
1632
1633
1.63M
        i4_mv_pos_in_implicit_array = hme_find_pos_of_implicitly_stored_ref_id(
1634
1.63M
            pi1_ref_idx, i1_ref_idx, i4_result_id, i4_num_results_in_given_dir);
1635
1636
1.63M
        if(-1 != i4_mv_pos_in_implicit_array)
1637
1.00M
        {
1638
1.00M
            COPY_MV_TO_SEARCH_NODE(
1639
1.00M
                ps_search_node,
1640
1.00M
                &ps_mv[i4_mv_pos_in_implicit_array],
1641
1.00M
                &pi1_ref_idx[i4_mv_pos_in_implicit_array],
1642
1.00M
                i1_ref_idx,
1643
1.00M
                shift);
1644
1.00M
        }
1645
631k
        else
1646
631k
        {
1647
631k
            ps_search_node->u1_is_avail = 0;
1648
631k
            ps_search_node->s_mv.i2_mvx = 0;
1649
631k
            ps_search_node->s_mv.i2_mvy = 0;
1650
631k
            ps_search_node->i1_ref_idx = i1_ref_idx;
1651
631k
        }
1652
1.63M
    }
1653
1654
    /* Move to t1 : relevant for 4x4 part searches or for partitions i 16x16 */
1655
1.63M
    if(ps_layer_mvbank->i4_num_mvs_per_ref > 1)
1656
0
    {
1657
0
        ps_search_node++;
1658
0
        ps_mv += (mvs_in_blk * (jump >> 1));
1659
0
        pi1_ref_idx += (mvs_in_blk * (jump >> 1));
1660
1661
0
        i4_mv_pos_in_implicit_array = hme_find_pos_of_implicitly_stored_ref_id(
1662
0
            pi1_ref_idx, i1_ref_idx, i4_result_id, i4_num_results_in_given_dir);
1663
1664
0
        if(-1 != i4_mv_pos_in_implicit_array)
1665
0
        {
1666
0
            COPY_MV_TO_SEARCH_NODE(
1667
0
                ps_search_node,
1668
0
                &ps_mv[i4_mv_pos_in_implicit_array],
1669
0
                &pi1_ref_idx[i4_mv_pos_in_implicit_array],
1670
0
                i1_ref_idx,
1671
0
                shift);
1672
0
        }
1673
0
        else
1674
0
        {
1675
0
            ps_search_node->u1_is_avail = 0;
1676
0
            ps_search_node->s_mv.i2_mvx = 0;
1677
0
            ps_search_node->s_mv.i2_mvy = 0;
1678
0
            ps_search_node->i1_ref_idx = i1_ref_idx;
1679
0
        }
1680
0
    }
1681
1.63M
    else
1682
1.63M
    {
1683
1.63M
        ps_search_node++;
1684
1.63M
        ps_search_node->u1_is_avail = 0;
1685
1.63M
        ps_search_node->s_mv.i2_mvx = 0;
1686
1.63M
        ps_search_node->s_mv.i2_mvy = 0;
1687
1.63M
        ps_search_node->i1_ref_idx = i1_ref_idx;
1688
1.63M
    }
1689
1690
    /* Move to tr: this will be tr w.r.t. the blk being searched */
1691
1.63M
    ps_search_node++;
1692
1.63M
    if(tr_avail == 0)
1693
0
    {
1694
0
        ps_search_node->s_mv.i2_mvx = 0;
1695
0
        ps_search_node->s_mv.i2_mvy = 0;
1696
0
        ps_search_node->i1_ref_idx = i1_ref_idx;
1697
0
        ps_search_node->u1_is_avail = 0;
1698
0
        ps_search_node->u1_subpel_done = 0;
1699
0
    }
1700
1.63M
    else
1701
1.63M
    {
1702
        /* ps_mv and pi1_ref_idx now point to the top left locn */
1703
1.63M
        ps_mv = ps_mv_base + (mvs_in_blk * (1 + jump));
1704
1.63M
        pi1_ref_idx = pi1_ref_idx_base + (mvs_in_blk * (1 + jump));
1705
1706
1.63M
        i4_mv_pos_in_implicit_array = hme_find_pos_of_implicitly_stored_ref_id(
1707
1.63M
            pi1_ref_idx, i1_ref_idx, i4_result_id, i4_num_results_in_given_dir);
1708
1709
1.63M
        if(-1 != i4_mv_pos_in_implicit_array)
1710
769k
        {
1711
769k
            COPY_MV_TO_SEARCH_NODE(
1712
769k
                ps_search_node,
1713
769k
                &ps_mv[i4_mv_pos_in_implicit_array],
1714
769k
                &pi1_ref_idx[i4_mv_pos_in_implicit_array],
1715
769k
                i1_ref_idx,
1716
769k
                shift);
1717
769k
        }
1718
866k
        else
1719
866k
        {
1720
866k
            ps_search_node->u1_is_avail = 0;
1721
866k
            ps_search_node->s_mv.i2_mvx = 0;
1722
866k
            ps_search_node->s_mv.i2_mvy = 0;
1723
866k
            ps_search_node->i1_ref_idx = i1_ref_idx;
1724
866k
        }
1725
1.63M
    }
1726
1727
    /* Move to left */
1728
1.63M
    {
1729
        /* ps_mv and pi1_ref_idx now point to the top left locn */
1730
1.63M
        ps_search_node = ps_left_neighbours;
1731
1.63M
        ps_mv = ps_mv_base + mvs_in_row;
1732
1.63M
        pi1_ref_idx = pi1_ref_idx_base + mvs_in_row;
1733
1734
1.63M
        i4_mv_pos_in_implicit_array = hme_find_pos_of_implicitly_stored_ref_id(
1735
1.63M
            pi1_ref_idx, i1_ref_idx, i4_result_id, i4_num_results_in_given_dir);
1736
1737
1.63M
        if(-1 != i4_mv_pos_in_implicit_array)
1738
1.04M
        {
1739
1.04M
            COPY_MV_TO_SEARCH_NODE(
1740
1.04M
                ps_search_node,
1741
1.04M
                &ps_mv[i4_mv_pos_in_implicit_array],
1742
1.04M
                &pi1_ref_idx[i4_mv_pos_in_implicit_array],
1743
1.04M
                i1_ref_idx,
1744
1.04M
                shift);
1745
1.04M
        }
1746
588k
        else
1747
588k
        {
1748
588k
            ps_search_node->u1_is_avail = 0;
1749
588k
            ps_search_node->s_mv.i2_mvx = 0;
1750
588k
            ps_search_node->s_mv.i2_mvy = 0;
1751
588k
            ps_search_node->i1_ref_idx = i1_ref_idx;
1752
588k
        }
1753
1.63M
    }
1754
1755
    /* Move to l1 */
1756
1.63M
    if(ps_layer_mvbank->i4_num_mvs_per_ref > 1)
1757
0
    {
1758
        /* ps_mv and pi1_ref_idx now point to the top left locn */
1759
0
        ps_search_node++;
1760
0
        ps_mv += (mvs_in_row * (jump >> 1));
1761
0
        pi1_ref_idx += (mvs_in_row * (jump >> 1));
1762
1763
0
        i4_mv_pos_in_implicit_array = hme_find_pos_of_implicitly_stored_ref_id(
1764
0
            pi1_ref_idx, i1_ref_idx, i4_result_id, i4_num_results_in_given_dir);
1765
1766
0
        if(-1 != i4_mv_pos_in_implicit_array)
1767
0
        {
1768
0
            COPY_MV_TO_SEARCH_NODE(
1769
0
                ps_search_node,
1770
0
                &ps_mv[i4_mv_pos_in_implicit_array],
1771
0
                &pi1_ref_idx[i4_mv_pos_in_implicit_array],
1772
0
                i1_ref_idx,
1773
0
                shift);
1774
0
        }
1775
0
        else
1776
0
        {
1777
0
            ps_search_node->u1_is_avail = 0;
1778
0
            ps_search_node->s_mv.i2_mvx = 0;
1779
0
            ps_search_node->s_mv.i2_mvy = 0;
1780
0
            ps_search_node->i1_ref_idx = i1_ref_idx;
1781
0
        }
1782
0
    }
1783
1.63M
    else
1784
1.63M
    {
1785
1.63M
        ps_search_node++;
1786
1.63M
        ps_search_node->u1_is_avail = 0;
1787
1.63M
        ps_search_node->s_mv.i2_mvx = 0;
1788
1.63M
        ps_search_node->s_mv.i2_mvy = 0;
1789
1.63M
        ps_search_node->i1_ref_idx = i1_ref_idx;
1790
1.63M
    }
1791
1792
    /* Move to bl */
1793
1.63M
    ps_search_node++;
1794
1.63M
    if(bl_avail == 0)
1795
1.63M
    {
1796
1.63M
        ps_search_node->s_mv.i2_mvx = 0;
1797
1.63M
        ps_search_node->s_mv.i2_mvy = 0;
1798
1.63M
        ps_search_node->i1_ref_idx = i1_ref_idx;
1799
1.63M
        ps_search_node->u1_is_avail = 0;
1800
1.63M
    }
1801
0
    else
1802
0
    {
1803
        /* ps_mv and pi1_ref_idx now point to the top left locn */
1804
0
        ps_mv = ps_mv_base + (mvs_in_row * (1 + jump));
1805
0
        pi1_ref_idx = pi1_ref_idx_base + (mvs_in_row * (1 + jump));
1806
1807
0
        i4_mv_pos_in_implicit_array = hme_find_pos_of_implicitly_stored_ref_id(
1808
0
            pi1_ref_idx, i1_ref_idx, i4_result_id, i4_num_results_in_given_dir);
1809
1810
0
        if(-1 != i4_mv_pos_in_implicit_array)
1811
0
        {
1812
0
            COPY_MV_TO_SEARCH_NODE(
1813
0
                ps_search_node,
1814
0
                &ps_mv[i4_mv_pos_in_implicit_array],
1815
0
                &pi1_ref_idx[i4_mv_pos_in_implicit_array],
1816
0
                i1_ref_idx,
1817
0
                shift);
1818
0
        }
1819
0
        else
1820
0
        {
1821
0
            ps_search_node->u1_is_avail = 0;
1822
0
            ps_search_node->s_mv.i2_mvx = 0;
1823
0
            ps_search_node->s_mv.i2_mvy = 0;
1824
0
            ps_search_node->i1_ref_idx = i1_ref_idx;
1825
0
        }
1826
0
    }
1827
1.63M
}
1828
1829
/**
1830
********************************************************************************
1831
*  @fn    void hme_fill_ctb_neighbour_mvs(layer_ctxt_t *ps_curr_layer,
1832
*                                   S32 i4_blk_x,
1833
*                                   S32 i4_blk_y,
1834
*                                   mvgrid_t *ps_mv_grid ,
1835
*                                   S32 i1_ref_id)
1836
*
1837
*  @brief  The 18x18 MV grid for a ctb, is filled in first row and 1st col
1838
*          this corresponds to neighbours (TL, T, TR, L, BL)
1839
*
1840
*  @param[in] ps_curr_layer: layer ctxt, has the mv bank structure pointer
1841
*
1842
*  @param[in] blk_x : x coordinate of the block in mv bank
1843
*
1844
*  @param[in] blk_y : y coordinate of the block in mv bank
1845
*
1846
*  @param[in] ps_mv_grid : Grid (18x18 mvs at 4x4 level)
1847
*
1848
*  @param[in] i1_ref_idx : Corresponds to ref idx from which to pick up mv
1849
*              results, useful if multiple ref idx candts maintained separately.
1850
*
1851
*  @return void
1852
********************************************************************************
1853
*/
1854
void hme_fill_ctb_neighbour_mvs(
1855
    layer_ctxt_t *ps_curr_layer,
1856
    S32 blk_x,
1857
    S32 blk_y,
1858
    mv_grid_t *ps_mv_grid,
1859
    U08 u1_pred_dir_ctr,
1860
    U08 u1_default_ref_id,
1861
    S32 i4_num_act_ref_l0)
1862
124k
{
1863
124k
    search_node_t *ps_grid_node;
1864
124k
    layer_mv_t *ps_layer_mvbank = ps_curr_layer->ps_layer_mvbank;
1865
124k
    S32 i4_offset;
1866
124k
    hme_mv_t *ps_mv, *ps_mv_base;
1867
124k
    S08 *pi1_ref_idx, *pi1_ref_idx_base;
1868
124k
    S32 jump = 0, inc, i, mvs_in_blk, mvs_in_row;
1869
1870
124k
    if(ps_layer_mvbank->e_blk_size == BLK_4x4)
1871
0
    {
1872
        /* searching 16x16, mvs are for 4x4 */
1873
0
        jump = 1;
1874
0
        blk_x <<= 2;
1875
0
        blk_y <<= 2;
1876
0
    }
1877
124k
    else
1878
124k
    {
1879
        /* Searching 16x16, mvs are for 8x8 */
1880
124k
        blk_x <<= 1;
1881
124k
        blk_y <<= 1;
1882
124k
    }
1883
124k
    ASSERT(ps_layer_mvbank->e_blk_size != BLK_16x16);
1884
1885
124k
    mvs_in_blk = ps_layer_mvbank->i4_num_mvs_per_blk;
1886
124k
    mvs_in_row = ps_layer_mvbank->i4_num_mvs_per_row;
1887
1888
    /* Adjust the blk coord to point to top left locn */
1889
124k
    blk_x -= 1;
1890
124k
    blk_y -= 1;
1891
1892
    /* Pick up the mvs from the location */
1893
124k
    i4_offset = (blk_x * ps_layer_mvbank->i4_num_mvs_per_blk);
1894
124k
    i4_offset += (ps_layer_mvbank->i4_num_mvs_per_row * blk_y);
1895
1896
124k
    i4_offset += (u1_pred_dir_ctr == 1);
1897
1898
124k
    ps_mv = ps_layer_mvbank->ps_mv + i4_offset;
1899
124k
    pi1_ref_idx = ps_layer_mvbank->pi1_ref_idx + i4_offset;
1900
1901
124k
    ps_mv_base = ps_mv;
1902
124k
    pi1_ref_idx_base = pi1_ref_idx;
1903
1904
    /* the 0, 0 entry of the grid pts to top left for the ctb */
1905
124k
    ps_grid_node = &ps_mv_grid->as_node[0];
1906
1907
    /* Copy 18 mvs at 4x4 level including top left, 16 top mvs for ctb, 1 tr */
1908
2.37M
    for(i = 0; i < 18; i++)
1909
2.24M
    {
1910
2.24M
        COPY_MV_TO_SEARCH_NODE(ps_grid_node, ps_mv, pi1_ref_idx, u1_default_ref_id, 0);
1911
2.24M
        ps_grid_node++;
1912
2.24M
        inc = 1;
1913
        /* If blk size is 8x8, then every 2 grid nodes are updated with same mv */
1914
2.24M
        if(i & 1)
1915
1.12M
            inc = jump;
1916
1917
2.24M
        ps_mv += (mvs_in_blk * inc);
1918
2.24M
        pi1_ref_idx += (mvs_in_blk * inc);
1919
2.24M
    }
1920
1921
124k
    ps_mv = ps_mv_base + mvs_in_row;
1922
124k
    pi1_ref_idx = pi1_ref_idx_base + mvs_in_row;
1923
1924
    /* now copy left 16 left mvs */
1925
124k
    ps_grid_node = &ps_mv_grid->as_node[0];
1926
124k
    ps_grid_node += (ps_mv_grid->i4_stride);
1927
2.12M
    for(i = 0; i < 16; i++)
1928
1.99M
    {
1929
1.99M
        COPY_MV_TO_SEARCH_NODE(ps_grid_node, ps_mv, pi1_ref_idx, u1_default_ref_id, 0);
1930
1.99M
        ps_grid_node += ps_mv_grid->i4_stride;
1931
1.99M
        inc = 1;
1932
        /* If blk size is 8x8, then every 2 grid nodes are updated with same mv */
1933
1.99M
        if(!(i & 1))
1934
999k
            inc = jump;
1935
1936
1.99M
        ps_mv += (mvs_in_row * inc);
1937
1.99M
        pi1_ref_idx += (mvs_in_row * inc);
1938
1.99M
    }
1939
    /* last one set to invalid as bottom left not yet encoded */
1940
124k
    ps_grid_node->u1_is_avail = 0;
1941
124k
}
1942
1943
void hme_reset_wkg_mem(buf_mgr_t *ps_buf_mgr)
1944
4.82M
{
1945
4.82M
    ps_buf_mgr->i4_used = 0;
1946
4.82M
}
1947
void hme_init_wkg_mem(buf_mgr_t *ps_buf_mgr, U08 *pu1_mem, S32 size)
1948
7.59k
{
1949
7.59k
    ps_buf_mgr->pu1_wkg_mem = pu1_mem;
1950
7.59k
    ps_buf_mgr->i4_total = size;
1951
7.59k
    hme_reset_wkg_mem(ps_buf_mgr);
1952
7.59k
}
1953
1954
void hme_init_mv_grid(mv_grid_t *ps_mv_grid)
1955
45.5k
{
1956
45.5k
    S32 i, j;
1957
45.5k
    search_node_t *ps_search_node;
1958
    /*************************************************************************/
1959
    /* We have a 64x64 CTB in the worst case. For this, we have 16x16 4x4 MVs*/
1960
    /* Additionally, we have 1 neighbour on each side. This makes it a 18x18 */
1961
    /* MV Grid. The boundary of this Grid on all sides are neighbours and the*/
1962
    /* left and top edges of this grid is filled run time. The center portion*/
1963
    /* represents the actual CTB MVs (16x16) and is also filled run time.    */
1964
    /* However, the availability is always set as available (init time)      */
1965
    /*************************************************************************/
1966
45.5k
    ps_mv_grid->i4_stride = NUM_COLUMNS_IN_CTB_GRID;
1967
45.5k
    ps_mv_grid->i4_start_offset = ps_mv_grid->i4_stride + CTB_MV_GRID_PAD;
1968
45.5k
    ps_search_node = &ps_mv_grid->as_node[ps_mv_grid->i4_start_offset];
1969
774k
    for(i = 0; i < 16; i++)
1970
728k
    {
1971
12.3M
        for(j = 0; j < 16; j++)
1972
11.6M
        {
1973
11.6M
            ps_search_node[j].u1_is_avail = 1;
1974
11.6M
        }
1975
1976
728k
        ps_search_node += ps_mv_grid->i4_stride;
1977
728k
    }
1978
45.5k
}
1979
/**
1980
********************************************************************************
1981
*  @fn    void hme_pad_left(U08 *pu1_dst, S32 stride, S32 pad_wd, S32 pad_ht)
1982
*
1983
*  @brief  Pads horizontally to left side. Each pixel replicated across a line
1984
*
1985
*  @param[in] pu1_dst : destination pointer. Points to the pixel to be repeated
1986
*
1987
*  @param[in] stride : stride of destination buffer
1988
*
1989
*  @param[in] pad_wd : Amt of horizontal padding to be done
1990
*
1991
*  @param[in] pad_ht : Number of lines for which horizontal padding to be done
1992
*
1993
*  @return void
1994
********************************************************************************
1995
*/
1996
void hme_pad_left(U08 *pu1_dst, S32 stride, S32 pad_wd, S32 pad_ht)
1997
0
{
1998
0
    S32 i, j;
1999
0
    U08 u1_val;
2000
0
    for(i = 0; i < pad_ht; i++)
2001
0
    {
2002
0
        u1_val = pu1_dst[0];
2003
0
        for(j = -pad_wd; j < 0; j++)
2004
0
            pu1_dst[j] = u1_val;
2005
2006
0
        pu1_dst += stride;
2007
0
    }
2008
0
}
2009
/**
2010
********************************************************************************
2011
*  @fn    void hme_pad_right(U08 *pu1_dst, S32 stride, S32 pad_wd, S32 pad_ht)
2012
*
2013
*  @brief  Pads horizontally to rt side. Each pixel replicated across a line
2014
*
2015
*  @param[in] pu1_dst : destination pointer. Points to the pixel to be repeated
2016
*
2017
*  @param[in] stride : stride of destination buffer
2018
*
2019
*  @param[in] pad_wd : Amt of horizontal padding to be done
2020
*
2021
*  @param[in] pad_ht : Number of lines for which horizontal padding to be done
2022
*
2023
*  @return void
2024
********************************************************************************
2025
*/
2026
void hme_pad_right(U08 *pu1_dst, S32 stride, S32 pad_wd, S32 pad_ht)
2027
2.84M
{
2028
2.84M
    S32 i, j;
2029
2.84M
    U08 u1_val;
2030
24.2M
    for(i = 0; i < pad_ht; i++)
2031
21.4M
    {
2032
21.4M
        u1_val = pu1_dst[0];
2033
179M
        for(j = 1; j <= pad_wd; j++)
2034
158M
            pu1_dst[j] = u1_val;
2035
2036
21.4M
        pu1_dst += stride;
2037
21.4M
    }
2038
2.84M
}
2039
/**
2040
********************************************************************************
2041
*  @fn    void hme_pad_top(U08 *pu1_dst, S32 stride, S32 pad_ht, S32 pad_wd)
2042
*
2043
*  @brief  Pads vertically on the top. Repeats the top line for top padding
2044
*
2045
*  @param[in] pu1_dst : destination pointer. Points to the line to be repeated
2046
*
2047
*  @param[in] stride : stride of destination buffer
2048
*
2049
*  @param[in] pad_ht : Amt of vertical padding to be done
2050
*
2051
*  @param[in] pad_wd : Number of columns for which vertical padding to be done
2052
*
2053
*  @return void
2054
********************************************************************************
2055
*/
2056
void hme_pad_top(U08 *pu1_dst, S32 stride, S32 pad_ht, S32 pad_wd)
2057
0
{
2058
0
    S32 i;
2059
0
    for(i = 1; i <= pad_ht; i++)
2060
0
        memcpy(pu1_dst - (i * stride), pu1_dst, pad_wd);
2061
0
}
2062
/**
2063
********************************************************************************
2064
*  @fn    void hme_pad_bot(U08 *pu1_dst, S32 stride, S32 pad_ht, S32 pad_wd)
2065
*
2066
*  @brief  Pads vertically on the bot. Repeats the top line for top padding
2067
*
2068
*  @param[in] pu1_dst : destination pointer. Points to the line to be repeated
2069
*
2070
*  @param[in] stride : stride of destination buffer
2071
*
2072
*  @param[in] pad_ht : Amt of vertical padding to be done
2073
*
2074
*  @param[in] pad_wd : Number of columns for which vertical padding to be done
2075
*
2076
*  @return void
2077
********************************************************************************
2078
*/
2079
void hme_pad_bot(U08 *pu1_dst, S32 stride, S32 pad_ht, S32 pad_wd)
2080
1.41M
{
2081
1.41M
    S32 i;
2082
7.58M
    for(i = 1; i <= pad_ht; i++)
2083
6.17M
        memcpy(pu1_dst + (i * stride), pu1_dst, pad_wd);
2084
1.41M
}
2085
2086
/**
2087
********************************************************************************
2088
*  @fn    void hme_get_wt_inp(layer_ctxt_t *ps_curr_layer,  S32 pos_x,
2089
*                           S32 pos_y, S32 size)
2090
*
2091
*  @brief  Does weighting of the input in case the search needs to happen
2092
*          with reference frames weighted
2093
*
2094
*  @param[in] ps_curr_layer: layer ctxt
2095
*
2096
*  @param[in] pos_x : x coordinate of the input blk in the picture
2097
*
2098
*  @param[in] pos_y : y coordinate of hte input blk in the picture
2099
*
2100
*  @param[in] size : size of the input block
2101
*
2102
*  @param[in] num_ref : Number of reference frames
2103
*
2104
*  @return void
2105
********************************************************************************
2106
*/
2107
void hme_get_wt_inp(
2108
    layer_ctxt_t *ps_curr_layer,
2109
    wgt_pred_ctxt_t *ps_wt_inp_prms,
2110
    S32 dst_stride,
2111
    S32 pos_x,
2112
    S32 pos_y,
2113
    S32 size,
2114
    S32 num_ref,
2115
    U08 u1_is_wt_pred_on)
2116
4.17M
{
2117
4.17M
    S32 ref, i, j;
2118
4.17M
    U08 *pu1_src, *pu1_dst, *pu1_src_tmp;
2119
4.17M
    S32 log_wdc = ps_wt_inp_prms->wpred_log_wdc;
2120
4.17M
    S32 x_count, y_count;
2121
2122
    /* Fixed source */
2123
4.17M
    pu1_src = ps_curr_layer->pu1_inp;
2124
2125
    /* Make sure the start positions of block are inside frame limits */
2126
4.17M
    pos_x = MIN(pos_x, ps_curr_layer->i4_wd - 1);
2127
4.17M
    pos_y = MIN(pos_y, ps_curr_layer->i4_ht - 1);
2128
2129
4.17M
    pu1_src += (pos_x + (pos_y * ps_curr_layer->i4_inp_stride));
2130
2131
    /* In case we handle imcomplete CTBs, we copy only as much as reqd */
2132
    /* from input buffers to prevent out of bound accesses. In this    */
2133
    /* case, we do padding in x or y or both dirns */
2134
4.17M
    x_count = MIN(size, (ps_curr_layer->i4_wd - pos_x));
2135
4.17M
    y_count = MIN(size, (ps_curr_layer->i4_ht - pos_y));
2136
2137
16.9M
    for(i = 0; i < num_ref + 1; i++)
2138
12.7M
    {
2139
12.7M
        ps_wt_inp_prms->apu1_wt_inp[i] = ps_wt_inp_prms->apu1_wt_inp_buf_array[num_ref];
2140
12.7M
    }
2141
2142
    /* Run thro all ref ids */
2143
16.9M
    for(ref = 0; ref < num_ref + 1; ref++)
2144
12.7M
    {
2145
12.7M
        S32 wt, off;
2146
12.7M
        S32 inv_wt;
2147
2148
12.7M
        pu1_src_tmp = pu1_src;
2149
2150
        /* Each ref id may have differnet wt/offset. */
2151
        /* So we have unique inp buf for each ref id */
2152
12.7M
        pu1_dst = ps_wt_inp_prms->apu1_wt_inp[ref];
2153
2154
12.7M
        if(ref == num_ref)
2155
4.17M
        {
2156
            /* last ref will be non weighted input */
2157
41.4M
            for(i = 0; i < y_count; i++)
2158
37.2M
            {
2159
646M
                for(j = 0; j < x_count; j++)
2160
609M
                {
2161
609M
                    pu1_dst[j] = pu1_src_tmp[j];
2162
609M
                }
2163
37.2M
                pu1_src_tmp += ps_curr_layer->i4_inp_stride;
2164
37.2M
                pu1_dst += dst_stride;
2165
37.2M
            }
2166
4.17M
        }
2167
8.59M
        else
2168
8.59M
        {
2169
            /* Wt and off specific to this ref id */
2170
8.59M
            wt = ps_wt_inp_prms->a_wpred_wt[ref];
2171
8.59M
            inv_wt = ps_wt_inp_prms->a_inv_wpred_wt[ref];
2172
8.59M
            off = ps_wt_inp_prms->a_wpred_off[ref];
2173
2174
            /* Generate size*size worth of modified input samples */
2175
85.1M
            for(i = 0; i < y_count; i++)
2176
76.5M
            {
2177
1.32G
                for(j = 0; j < x_count; j++)
2178
1.25G
                {
2179
1.25G
                    S32 tmp;
2180
2181
                    /* Since we scale input, we use inverse transform of wt pred */
2182
                    //tmp = HME_INV_WT_PRED(pu1_src_tmp[j], wt, off, log_wdc);
2183
1.25G
                    tmp = HME_INV_WT_PRED1(pu1_src_tmp[j], inv_wt, off, log_wdc);
2184
1.25G
                    pu1_dst[j] = (U08)(HME_CLIP(tmp, 0, 255));
2185
1.25G
                }
2186
76.5M
                pu1_src_tmp += ps_curr_layer->i4_inp_stride;
2187
76.5M
                pu1_dst += dst_stride;
2188
76.5M
            }
2189
8.59M
        }
2190
2191
        /* Check and do padding in right direction if need be */
2192
12.7M
        pu1_dst = ps_wt_inp_prms->apu1_wt_inp[ref];
2193
12.7M
        if(x_count != size)
2194
2.84M
        {
2195
2.84M
            hme_pad_right(pu1_dst + x_count - 1, dst_stride, size - x_count, y_count);
2196
2.84M
        }
2197
2198
        /* Check and do padding in bottom directino if need be */
2199
12.7M
        if(y_count != size)
2200
1.41M
        {
2201
1.41M
            hme_pad_bot(pu1_dst + (y_count - 1) * dst_stride, dst_stride, size - y_count, size);
2202
1.41M
        }
2203
12.7M
    }
2204
4.17M
}
2205
/**
2206
****************************************************************************************
2207
*  @fn     hme_pick_best_pu_cand(pu_result_t *ps_pu_results_dst,
2208
*                                pu_result_t *ps_pu_results_inp,
2209
*                                UWORD8 u1_num_results_per_part,
2210
*                                UWORD8 u1_num_best_cand)
2211
*
2212
*  @brief  Does the candidate evaluation across all the current candidates and returns
2213
*           the best two or one candidates across given lists
2214
*
2215
*  @param[in]  - ps_pu_results_inp : Pointer to the input candidates
2216
*              - u1_num_results_per_part: Number of available candidates
2217
*
2218
*  @param[out] - ps_pu_results_dst : Pointer to best PU results
2219
*
2220
****************************************************************************************
2221
*/
2222
void hme_pick_best_pu_cand(
2223
    pu_result_t *ps_pu_results_dst,
2224
    pu_result_t *ps_pu_results_list0,
2225
    pu_result_t *ps_pu_results_list1,
2226
    UWORD8 u1_num_results_per_part_l0,
2227
    UWORD8 u1_num_results_per_part_l1,
2228
    UWORD8 u1_candidate_rank)
2229
14.5M
{
2230
14.5M
    struct cand_pos_data
2231
14.5M
    {
2232
14.5M
        U08 u1_cand_list_id;
2233
2234
14.5M
        U08 u1_cand_id_in_cand_list;
2235
14.5M
    } as_cand_pos_data[MAX_NUM_RESULTS_PER_PART_LIST << 1];
2236
2237
14.5M
    S32 ai4_costs[MAX_NUM_RESULTS_PER_PART_LIST << 1];
2238
14.5M
    U08 i, j;
2239
2240
28.9M
    for(i = 0; i < u1_num_results_per_part_l0; i++)
2241
14.4M
    {
2242
14.4M
        ai4_costs[i] = ps_pu_results_list0[i].i4_tot_cost;
2243
14.4M
        as_cand_pos_data[i].u1_cand_id_in_cand_list = i;
2244
14.4M
        as_cand_pos_data[i].u1_cand_list_id = 0;
2245
14.4M
    }
2246
2247
18.4M
    for(i = 0, j = u1_num_results_per_part_l0; i < u1_num_results_per_part_l1; i++, j++)
2248
3.94M
    {
2249
3.94M
        ai4_costs[j] = ps_pu_results_list1[i].i4_tot_cost;
2250
3.94M
        as_cand_pos_data[j].u1_cand_id_in_cand_list = i;
2251
3.94M
        as_cand_pos_data[j].u1_cand_list_id = 1;
2252
3.94M
    }
2253
2254
14.5M
    SORT_PRIMARY_INTTYPE_ARRAY_AND_REORDER_GENERIC_COMPANION_ARRAY(
2255
14.5M
        ai4_costs,
2256
14.5M
        as_cand_pos_data,
2257
14.5M
        u1_num_results_per_part_l0 + u1_num_results_per_part_l1,
2258
14.5M
        struct cand_pos_data);
2259
2260
14.5M
    if(as_cand_pos_data[u1_candidate_rank].u1_cand_list_id)
2261
933k
    {
2262
933k
        ps_pu_results_dst[0] =
2263
933k
            ps_pu_results_list1[as_cand_pos_data[u1_candidate_rank].u1_cand_id_in_cand_list];
2264
933k
    }
2265
13.5M
    else
2266
13.5M
    {
2267
13.5M
        ps_pu_results_dst[0] =
2268
13.5M
            ps_pu_results_list0[as_cand_pos_data[u1_candidate_rank].u1_cand_id_in_cand_list];
2269
13.5M
    }
2270
14.5M
}
2271
2272
/* Returns the number of candidates */
2273
static S32 hme_tu_recur_cand_harvester(
2274
    part_type_results_t *ps_cand_container,
2275
    inter_pu_results_t *ps_pu_data,
2276
    inter_ctb_prms_t *ps_inter_ctb_prms,
2277
    S32 i4_part_mask)
2278
4.30M
{
2279
4.30M
    part_type_results_t s_cand_data;
2280
2281
4.30M
    U08 i, j;
2282
4.30M
    PART_ID_T e_part_id;
2283
2284
4.30M
    S32 i4_num_cands = 0;
2285
2286
    /* 2Nx2N part_type decision part */
2287
4.30M
    if(i4_part_mask & ENABLE_2Nx2N)
2288
4.26M
    {
2289
4.26M
        U08 u1_num_candt_to_pick;
2290
2291
4.26M
        e_part_id = ge_part_type_to_part_id[PRT_2Nx2N][0];
2292
2293
4.26M
        ASSERT(ps_inter_ctb_prms->u1_max_2nx2n_tu_recur_cands >= 1);
2294
2295
4.26M
        if(!ps_inter_ctb_prms->i4_bidir_enabled || (i4_part_mask == ENABLE_2Nx2N))
2296
4.04M
        {
2297
4.04M
            u1_num_candt_to_pick =
2298
4.04M
                MIN(ps_inter_ctb_prms->u1_max_2nx2n_tu_recur_cands,
2299
4.04M
                    ps_pu_data->u1_num_results_per_part_l0[e_part_id] +
2300
4.04M
                        ps_pu_data->u1_num_results_per_part_l1[e_part_id]);
2301
4.04M
        }
2302
229k
        else
2303
229k
        {
2304
229k
            u1_num_candt_to_pick =
2305
229k
                MIN(1,
2306
229k
                    ps_pu_data->u1_num_results_per_part_l0[e_part_id] +
2307
229k
                        ps_pu_data->u1_num_results_per_part_l1[e_part_id]);
2308
229k
        }
2309
2310
4.26M
        if(ME_XTREME_SPEED_25 == ps_inter_ctb_prms->i1_quality_preset)
2311
614k
        {
2312
614k
            u1_num_candt_to_pick = MIN(u1_num_candt_to_pick, MAX_NUM_TU_RECUR_CANDS_IN_XS25);
2313
614k
        }
2314
2315
9.15M
        for(i = 0; i < u1_num_candt_to_pick; i++)
2316
4.88M
        {
2317
            /* Picks the best two candidates of all the available ones */
2318
4.88M
            hme_pick_best_pu_cand(
2319
4.88M
                ps_cand_container[i4_num_cands].as_pu_results,
2320
4.88M
                ps_pu_data->aps_pu_results[0][e_part_id],
2321
4.88M
                ps_pu_data->aps_pu_results[1][e_part_id],
2322
4.88M
                ps_pu_data->u1_num_results_per_part_l0[e_part_id],
2323
4.88M
                ps_pu_data->u1_num_results_per_part_l1[e_part_id],
2324
4.88M
                i);
2325
2326
            /* Update the other params part_type and total_cost in part_type_results */
2327
4.88M
            ps_cand_container[i4_num_cands].u1_part_type = e_part_id;
2328
4.88M
            ps_cand_container[i4_num_cands].i4_tot_cost =
2329
4.88M
                ps_cand_container[i4_num_cands].as_pu_results->i4_tot_cost;
2330
2331
4.88M
            i4_num_cands++;
2332
4.88M
        }
2333
4.26M
    }
2334
2335
    /* SMP */
2336
4.30M
    {
2337
4.30M
        S32 i4_total_cost;
2338
2339
4.30M
        S32 num_part_types = PRT_Nx2N - PRT_2NxN + 1;
2340
4.30M
        S32 start_part_type = PRT_2NxN;
2341
4.30M
        S32 best_cost = MAX_32BIT_VAL;
2342
4.30M
        S32 part_type_cnt = 0;
2343
2344
12.9M
        for(j = 0; j < num_part_types; j++)
2345
8.60M
        {
2346
8.60M
            if(!(i4_part_mask & gai4_part_type_to_part_mask[j + start_part_type]))
2347
6.94M
            {
2348
6.94M
                continue;
2349
6.94M
            }
2350
2351
4.97M
            for(i = 0; i < gau1_num_parts_in_part_type[j + start_part_type]; i++)
2352
3.31M
            {
2353
3.31M
                e_part_id = ge_part_type_to_part_id[j + start_part_type][i];
2354
2355
                /* Pick the best candidate for the partition acroos lists */
2356
3.31M
                hme_pick_best_pu_cand(
2357
3.31M
                    &s_cand_data.as_pu_results[i],
2358
3.31M
                    ps_pu_data->aps_pu_results[0][e_part_id],
2359
3.31M
                    ps_pu_data->aps_pu_results[1][e_part_id],
2360
3.31M
                    ps_pu_data->u1_num_results_per_part_l0[e_part_id],
2361
3.31M
                    ps_pu_data->u1_num_results_per_part_l1[e_part_id],
2362
3.31M
                    0);
2363
3.31M
            }
2364
2365
1.65M
            i4_total_cost =
2366
1.65M
                s_cand_data.as_pu_results[0].i4_tot_cost + s_cand_data.as_pu_results[1].i4_tot_cost;
2367
2368
1.65M
            if(i4_total_cost < best_cost)
2369
922k
            {
2370
                /* Stores the index of the best part_type in the sub-catoegory */
2371
922k
                best_cost = i4_total_cost;
2372
2373
922k
                ps_cand_container[i4_num_cands] = s_cand_data;
2374
2375
922k
                ps_cand_container[i4_num_cands].u1_part_type = j + start_part_type;
2376
922k
                ps_cand_container[i4_num_cands].i4_tot_cost = i4_total_cost;
2377
922k
            }
2378
2379
1.65M
            part_type_cnt++;
2380
1.65M
        }
2381
2382
4.30M
        i4_num_cands = (part_type_cnt) ? (i4_num_cands + 1) : i4_num_cands;
2383
4.30M
    }
2384
2385
    /* AMP */
2386
4.30M
    {
2387
4.30M
        S32 i4_total_cost;
2388
2389
4.30M
        S32 num_part_types = PRT_nRx2N - PRT_2NxnU + 1;
2390
4.30M
        S32 start_part_type = PRT_2NxnU;
2391
4.30M
        S32 best_cost = MAX_32BIT_VAL;
2392
4.30M
        S32 part_type_cnt = 0;
2393
2394
21.5M
        for(j = 0; j < num_part_types; j++)
2395
17.2M
        {
2396
17.2M
            if(!(i4_part_mask & gai4_part_type_to_part_mask[j + start_part_type]))
2397
14.0M
            {
2398
14.0M
                continue;
2399
14.0M
            }
2400
2401
9.44M
            for(i = 0; i < gau1_num_parts_in_part_type[j + start_part_type]; i++)
2402
6.29M
            {
2403
6.29M
                e_part_id = ge_part_type_to_part_id[j + start_part_type][i];
2404
2405
                /* Pick the best candidate for the partition acroos lists */
2406
6.29M
                hme_pick_best_pu_cand(
2407
6.29M
                    &s_cand_data.as_pu_results[i],
2408
6.29M
                    ps_pu_data->aps_pu_results[0][e_part_id],
2409
6.29M
                    ps_pu_data->aps_pu_results[1][e_part_id],
2410
6.29M
                    ps_pu_data->u1_num_results_per_part_l0[e_part_id],
2411
6.29M
                    ps_pu_data->u1_num_results_per_part_l1[e_part_id],
2412
6.29M
                    0);
2413
6.29M
            }
2414
2415
3.14M
            i4_total_cost =
2416
3.14M
                s_cand_data.as_pu_results[0].i4_tot_cost + s_cand_data.as_pu_results[1].i4_tot_cost;
2417
2418
3.14M
            if(i4_total_cost < best_cost)
2419
932k
            {
2420
                /* Stores the index of the best part_type in the sub-catoegory */
2421
932k
                best_cost = i4_total_cost;
2422
2423
932k
                ps_cand_container[i4_num_cands] = s_cand_data;
2424
2425
932k
                ps_cand_container[i4_num_cands].u1_part_type = j + start_part_type;
2426
932k
                ps_cand_container[i4_num_cands].i4_tot_cost = i4_total_cost;
2427
932k
            }
2428
2429
3.14M
            part_type_cnt++;
2430
3.14M
        }
2431
2432
4.30M
        i4_num_cands = (part_type_cnt) ? (i4_num_cands + 1) : i4_num_cands;
2433
4.30M
    }
2434
2435
4.30M
    return i4_num_cands;
2436
4.30M
}
2437
2438
/**
2439
*****************************************************************************
2440
*  @fn     hme_decide_part_types(search_results_t *ps_search_results)
2441
*
2442
*  @brief  Does uni/bi evaluation accross various partition types,
2443
*          decides best inter partition types for the CU, compares
2444
*          intra cost and decides the best K results for the CU
2445
*
2446
*          This is called post subpel refinmenent for 16x16s, 8x8s and
2447
*          for post merge evaluation for 32x32,64x64 CUs
2448
*
2449
*  @param[in,out] ps_search_results : Search results data structure
2450
*                 - In : 2 lists of upto 2mvs & refids, active partition mask
2451
*                 - Out: Best results for final rdo evaluation of the cu
2452
*
2453
*  @param[in]     ps_subpel_prms : Sub pel params data structure
2454
*
2455
*
2456
*  @par Description
2457
*    --------------------------------------------------------------------------------
2458
*     Flow:
2459
*            for each category (SMP,AMP,2Nx2N based on part mask)
2460
*            {
2461
*                for each part_type
2462
*                {
2463
*                    for each part
2464
*                        pick best candidate from each list
2465
*                    combine uni part type
2466
*                    update best results for part type
2467
*                }
2468
*                pick the best part type for given category (for SMP & AMP)
2469
*            }
2470
*                    ||
2471
*                    ||
2472
*                    \/
2473
*           Bi-Pred evaluation:
2474
*            for upto 4 best part types
2475
*            {
2476
*                for each part
2477
*                {
2478
*                    compute fixed size had for all uni and remember coeffs
2479
*                    compute bisatd
2480
*                    uni vs bi and gives upto two results
2481
*                    also gives the pt level pred buffer
2482
*                }
2483
*             }
2484
*                    ||
2485
*                    ||
2486
*                    \/
2487
*            select X candidates for tu recursion as per the Note below
2488
*               tu_rec_on_part_type (reuse transform coeffs)
2489
*                    ||
2490
*                    ||
2491
*                    \/
2492
*            insert intra nodes at appropriate result id
2493
*                    ||
2494
*                    ||
2495
*                    \/
2496
*            populate y best resuls for rdo based on preset
2497
*
2498
*     Note :
2499
*     number of TU rec for P pics : 2 2nx2n + 1 smp + 1 amp for ms or 9 for hq
2500
*     number of TU rec for B pics : 1 2nx2n + 1 smp + 1 amp for ms or 2 uni 2nx2n + 1 smp + 1 amp for ms or 9 for hq
2501
*     --------------------------------------------------------------------------------
2502
*
2503
*  @return None
2504
********************************************************************************
2505
*/
2506
void hme_decide_part_types(
2507
    inter_cu_results_t *ps_cu_results,
2508
    inter_pu_results_t *ps_pu_results,
2509
    inter_ctb_prms_t *ps_inter_ctb_prms,
2510
    me_frm_ctxt_t *ps_ctxt,
2511
    ihevce_cmn_opt_func_t *ps_cmn_utils_optimised_function_list,
2512
    ihevce_me_optimised_function_list_t *ps_me_optimised_function_list
2513
2514
)
2515
4.30M
{
2516
4.30M
    S32 i, j;
2517
4.30M
    S32 i4_part_mask;
2518
4.30M
    ULWORD64 au8_pred_sigmaXSquare[NUM_BEST_ME_OUTPUTS][NUM_INTER_PU_PARTS];
2519
4.30M
    ULWORD64 au8_pred_sigmaX[NUM_BEST_ME_OUTPUTS][NUM_INTER_PU_PARTS];
2520
4.30M
    S32 i4_noise_term;
2521
4.30M
    WORD32 e_part_id;
2522
2523
4.30M
    PF_SAD_FXN_TU_REC apf_err_compute[4];
2524
2525
4.30M
    part_type_results_t as_part_type_results[NUM_BEST_ME_OUTPUTS];
2526
4.30M
    part_type_results_t *ps_part_type_results;
2527
2528
4.30M
    S32 num_best_cand = 0;
2529
4.30M
    const S32 i4_default_src_wt = ((1 << 15) + (WGHT_DEFAULT >> 1)) / WGHT_DEFAULT;
2530
2531
4.30M
    i4_part_mask = ps_cu_results->i4_part_mask;
2532
2533
4.30M
    num_best_cand = hme_tu_recur_cand_harvester(
2534
4.30M
        as_part_type_results, ps_pu_results, ps_inter_ctb_prms, i4_part_mask);
2535
2536
    /* Partition ID for the current PU */
2537
4.30M
    e_part_id = (UWORD8)ge_part_type_to_part_id[PRT_2Nx2N][0];
2538
2539
4.30M
    ps_part_type_results = as_part_type_results;
2540
10.8M
    for(i = 0; i < num_best_cand; i++)
2541
6.52M
    {
2542
6.52M
        hme_compute_pred_and_evaluate_bi(
2543
6.52M
            ps_cu_results,
2544
6.52M
            ps_pu_results,
2545
6.52M
            ps_inter_ctb_prms,
2546
6.52M
            &(ps_part_type_results[i]),
2547
6.52M
            au8_pred_sigmaXSquare[i],
2548
6.52M
            au8_pred_sigmaX[i],
2549
6.52M
            ps_cmn_utils_optimised_function_list,
2550
6.52M
            ps_me_optimised_function_list
2551
2552
6.52M
        );
2553
6.52M
    }
2554
    /* Perform TU_REC on the best candidates selected */
2555
4.30M
    {
2556
4.30M
        WORD32 i4_sad_grid;
2557
4.30M
        WORD32 ai4_tu_split_flag[4];
2558
4.30M
        WORD32 ai4_tu_early_cbf[4];
2559
2560
4.30M
        WORD32 best_cost[NUM_BEST_ME_OUTPUTS];
2561
4.30M
        WORD32 ai4_final_idx[NUM_BEST_ME_OUTPUTS];
2562
4.30M
        WORD16 i2_wght;
2563
4.30M
        WORD32 i4_satd;
2564
2565
4.30M
        err_prms_t s_err_prms;
2566
4.30M
        err_prms_t *ps_err_prms = &s_err_prms;
2567
2568
        /* Default cost and final idx initialization */
2569
10.8M
        for(i = 0; i < num_best_cand; i++)
2570
6.52M
        {
2571
6.52M
            best_cost[i] = MAX_32BIT_VAL;
2572
6.52M
            ai4_final_idx[i] = -1;
2573
6.52M
        }
2574
2575
        /* Assign the stad function to the err_compute function pointer :
2576
        Implemented only for 32x32 and 64x64, hence 16x16 and 8x8 are kept NULL */
2577
4.30M
        apf_err_compute[CU_64x64] = hme_evalsatd_pt_pu_64x64_tu_rec;
2578
4.30M
        apf_err_compute[CU_32x32] = hme_evalsatd_pt_pu_32x32_tu_rec;
2579
4.30M
        apf_err_compute[CU_16x16] = hme_evalsatd_pt_pu_16x16_tu_rec;
2580
4.30M
        apf_err_compute[CU_8x8] = hme_evalsatd_pt_pu_8x8_tu_rec;
2581
2582
4.30M
        ps_err_prms->pi4_sad_grid = &i4_sad_grid;
2583
4.30M
        ps_err_prms->pi4_tu_split_flags = ai4_tu_split_flag;
2584
4.30M
        ps_err_prms->u1_max_tr_depth = ps_inter_ctb_prms->u1_max_tr_depth;
2585
4.30M
        ps_err_prms->pi4_tu_early_cbf = ai4_tu_early_cbf;
2586
4.30M
        ps_err_prms->i4_grid_mask = 1;
2587
4.30M
        ps_err_prms->pu1_wkg_mem = ps_inter_ctb_prms->pu1_wkg_mem;
2588
4.30M
        ps_err_prms->u1_max_tr_size = 32;
2589
2590
4.30M
        if(ps_inter_ctb_prms->u1_is_cu_noisy)
2591
0
        {
2592
0
            ps_err_prms->u1_max_tr_size = MAX_TU_SIZE_WHEN_NOISY;
2593
0
        }
2594
2595
        /* TU_REC for the best candidates, as mentioned in NOTE above (except candidates that
2596
        are disabled by Part_mask */
2597
10.8M
        for(i = 0; i < num_best_cand; i++)
2598
6.52M
        {
2599
6.52M
            part_type_results_t *ps_best_results;
2600
6.52M
            pu_result_t *ps_pu_result;
2601
6.52M
            WORD32 part_type_cost;
2602
6.52M
            WORD32 cand_idx;
2603
2604
6.52M
            WORD32 pred_dir;
2605
6.52M
            S32 i4_inp_off;
2606
2607
6.52M
            S32 lambda;
2608
6.52M
            U08 lambda_qshift;
2609
6.52M
            U08 *apu1_inp[MAX_NUM_INTER_PARTS];
2610
6.52M
            S16 ai2_wt[MAX_NUM_INTER_PARTS];
2611
6.52M
            S32 ai4_inv_wt[MAX_NUM_INTER_PARTS];
2612
6.52M
            S32 ai4_inv_wt_shift_val[MAX_NUM_INTER_PARTS];
2613
2614
6.52M
            WORD32 part_type = ps_part_type_results[i].u1_part_type;
2615
6.52M
            WORD32 e_cu_size = ps_cu_results->u1_cu_size;
2616
6.52M
            WORD32 e_blk_size = ge_cu_size_to_blk_size[e_cu_size];
2617
6.52M
            U08 u1_num_parts = gau1_num_parts_in_part_type[part_type];
2618
6.52M
            U08 u1_inp_buf_idx = UCHAR_MAX;
2619
2620
6.52M
            ps_err_prms->i4_part_mask = i4_part_mask;
2621
6.52M
            ps_err_prms->i4_blk_wd = gau1_blk_size_to_wd[e_blk_size];
2622
6.52M
            ps_err_prms->i4_blk_ht = gau1_blk_size_to_ht[e_blk_size];
2623
6.52M
            ps_err_prms->pu1_ref = ps_part_type_results[i].pu1_pred;
2624
6.52M
            ps_err_prms->i4_ref_stride = ps_part_type_results[i].i4_pred_stride;
2625
2626
            /* Current offset for the present part type */
2627
6.52M
            i4_inp_off = ps_cu_results->i4_inp_offset;
2628
2629
6.52M
            ps_best_results = &(ps_part_type_results[i]);
2630
2631
6.52M
            part_type_cost = 0;
2632
6.52M
            lambda = ps_inter_ctb_prms->i4_lamda;
2633
6.52M
            lambda_qshift = ps_inter_ctb_prms->u1_lamda_qshift;
2634
2635
14.6M
            for(j = 0; j < u1_num_parts; j++)
2636
8.17M
            {
2637
8.17M
                ps_pu_result = &(ps_best_results->as_pu_results[j]);
2638
2639
8.17M
                pred_dir = ps_pu_result->pu.b2_pred_mode;
2640
2641
8.17M
                if(PRED_L0 == pred_dir)
2642
7.36M
                {
2643
7.36M
                    apu1_inp[j] =
2644
7.36M
                        ps_inter_ctb_prms->apu1_wt_inp[PRED_L0][ps_pu_result->pu.mv.i1_l0_ref_idx] +
2645
7.36M
                        i4_inp_off;
2646
7.36M
                    ai2_wt[j] =
2647
7.36M
                        ps_inter_ctb_prms->pps_rec_list_l0[ps_pu_result->pu.mv.i1_l0_ref_idx]
2648
7.36M
                            ->s_weight_offset.i2_luma_weight;
2649
7.36M
                    ai4_inv_wt[j] =
2650
7.36M
                        ps_inter_ctb_prms->pi4_inv_wt
2651
7.36M
                            [ps_inter_ctb_prms->pi1_past_list[ps_pu_result->pu.mv.i1_l0_ref_idx]];
2652
7.36M
                    ai4_inv_wt_shift_val[j] =
2653
7.36M
                        ps_inter_ctb_prms->pi4_inv_wt_shift_val
2654
7.36M
                            [ps_inter_ctb_prms->pi1_past_list[ps_pu_result->pu.mv.i1_l0_ref_idx]];
2655
7.36M
                }
2656
810k
                else if(PRED_L1 == pred_dir)
2657
744k
                {
2658
744k
                    apu1_inp[j] =
2659
744k
                        ps_inter_ctb_prms->apu1_wt_inp[PRED_L1][ps_pu_result->pu.mv.i1_l1_ref_idx] +
2660
744k
                        i4_inp_off;
2661
744k
                    ai2_wt[j] =
2662
744k
                        ps_inter_ctb_prms->pps_rec_list_l1[ps_pu_result->pu.mv.i1_l1_ref_idx]
2663
744k
                            ->s_weight_offset.i2_luma_weight;
2664
744k
                    ai4_inv_wt[j] =
2665
744k
                        ps_inter_ctb_prms->pi4_inv_wt
2666
744k
                            [ps_inter_ctb_prms->pi1_future_list[ps_pu_result->pu.mv.i1_l1_ref_idx]];
2667
744k
                    ai4_inv_wt_shift_val[j] =
2668
744k
                        ps_inter_ctb_prms->pi4_inv_wt_shift_val
2669
744k
                            [ps_inter_ctb_prms->pi1_future_list[ps_pu_result->pu.mv.i1_l1_ref_idx]];
2670
744k
                }
2671
66.1k
                else if(PRED_BI == pred_dir)
2672
66.1k
                {
2673
66.1k
                    apu1_inp[j] = ps_inter_ctb_prms->pu1_non_wt_inp + i4_inp_off;
2674
66.1k
                    ai2_wt[j] = 1 << ps_inter_ctb_prms->wpred_log_wdc;
2675
66.1k
                    ai4_inv_wt[j] = i4_default_src_wt;
2676
66.1k
                    ai4_inv_wt_shift_val[j] = 0;
2677
66.1k
                }
2678
0
                else
2679
0
                {
2680
0
                    ASSERT(0);
2681
0
                }
2682
2683
8.17M
                part_type_cost += ps_pu_result->i4_mv_cost;
2684
8.17M
            }
2685
2686
6.52M
            if((u1_num_parts == 1) || (ai2_wt[0] == ai2_wt[1]))
2687
6.52M
            {
2688
6.52M
                ps_err_prms->pu1_inp = apu1_inp[0];
2689
6.52M
                ps_err_prms->i4_inp_stride = ps_inter_ctb_prms->i4_inp_stride;
2690
6.52M
                i2_wght = ai2_wt[0];
2691
6.52M
            }
2692
0
            else
2693
0
            {
2694
0
                if(1 != ihevce_get_free_pred_buf_indices(
2695
0
                            &u1_inp_buf_idx,
2696
0
                            &ps_inter_ctb_prms->s_pred_buf_mngr.u4_pred_buf_usage_indicator,
2697
0
                            1))
2698
0
                {
2699
0
                    ASSERT(0);
2700
0
                }
2701
0
                else
2702
0
                {
2703
0
                    U08 *pu1_dst =
2704
0
                        ps_inter_ctb_prms->s_pred_buf_mngr.apu1_pred_bufs[u1_inp_buf_idx];
2705
0
                    U08 *pu1_src = apu1_inp[0];
2706
0
                    U08 u1_pu1_wd = (ps_part_type_results[i].as_pu_results[0].pu.b4_wd + 1) << 2;
2707
0
                    U08 u1_pu1_ht = (ps_part_type_results[i].as_pu_results[0].pu.b4_ht + 1) << 2;
2708
0
                    U08 u1_pu2_wd = (ps_part_type_results[i].as_pu_results[1].pu.b4_wd + 1) << 2;
2709
0
                    U08 u1_pu2_ht = (ps_part_type_results[i].as_pu_results[1].pu.b4_ht + 1) << 2;
2710
2711
0
                    ps_cmn_utils_optimised_function_list->pf_copy_2d(
2712
0
                        pu1_dst,
2713
0
                        MAX_CU_SIZE,
2714
0
                        pu1_src,
2715
0
                        ps_inter_ctb_prms->i4_inp_stride,
2716
0
                        u1_pu1_wd,
2717
0
                        u1_pu1_ht);
2718
2719
0
                    pu1_dst +=
2720
0
                        (gai1_is_part_vertical[ge_part_type_to_part_id[part_type][0]]
2721
0
                             ? u1_pu1_ht * MAX_CU_SIZE
2722
0
                             : u1_pu1_wd);
2723
0
                    pu1_src =
2724
0
                        apu1_inp[1] + (gai1_is_part_vertical[ge_part_type_to_part_id[part_type][0]]
2725
0
                                           ? u1_pu1_ht * ps_inter_ctb_prms->i4_inp_stride
2726
0
                                           : u1_pu1_wd);
2727
2728
0
                    ps_cmn_utils_optimised_function_list->pf_copy_2d(
2729
0
                        pu1_dst,
2730
0
                        MAX_CU_SIZE,
2731
0
                        pu1_src,
2732
0
                        ps_inter_ctb_prms->i4_inp_stride,
2733
0
                        u1_pu2_wd,
2734
0
                        u1_pu2_ht);
2735
2736
0
                    ps_err_prms->pu1_inp =
2737
0
                        ps_inter_ctb_prms->s_pred_buf_mngr.apu1_pred_bufs[u1_inp_buf_idx];
2738
0
                    ps_err_prms->i4_inp_stride = MAX_CU_SIZE;
2739
0
                    i2_wght = ai2_wt[1];
2740
0
                }
2741
0
            }
2742
2743
6.52M
#if !DISABLE_TU_RECURSION
2744
6.52M
            i4_satd = apf_err_compute[e_cu_size](
2745
6.52M
                ps_err_prms,
2746
6.52M
                lambda,
2747
6.52M
                lambda_qshift,
2748
6.52M
                ps_inter_ctb_prms->i4_qstep_ls8,
2749
6.52M
                ps_ctxt->ps_func_selector);
2750
#else
2751
            ps_err_prms->pi4_sad_grid = &i4_satd;
2752
2753
            pf_err_compute(ps_err_prms);
2754
2755
            if((part_type == PRT_2Nx2N) || (e_cu_size != CU_64x64))
2756
            {
2757
                ai4_tu_split_flag[0] = 1;
2758
                ai4_tu_split_flag[1] = 1;
2759
                ai4_tu_split_flag[2] = 1;
2760
                ai4_tu_split_flag[3] = 1;
2761
2762
                ps_err_prms->i4_tu_split_cost = 0;
2763
            }
2764
            else
2765
            {
2766
                ai4_tu_split_flag[0] = 1;
2767
                ai4_tu_split_flag[1] = 1;
2768
                ai4_tu_split_flag[2] = 1;
2769
                ai4_tu_split_flag[3] = 1;
2770
2771
                ps_err_prms->i4_tu_split_cost = 0;
2772
            }
2773
#endif
2774
2775
6.52M
#if UNI_SATD_SCALE
2776
6.52M
            i4_satd = (i4_satd * i2_wght) >> ps_inter_ctb_prms->wpred_log_wdc;
2777
6.52M
#endif
2778
2779
6.52M
            if(ps_inter_ctb_prms->u1_is_cu_noisy && ps_inter_ctb_prms->i4_alpha_stim_multiplier)
2780
0
            {
2781
0
                ULWORD64 u8_temp_var, u8_temp_var1, u8_pred_sigmaSquaredX;
2782
0
                ULWORD64 u8_src_variance, u8_pred_variance;
2783
0
                unsigned long u4_shift_val;
2784
0
                S32 i4_bits_req;
2785
0
                S32 i4_q_level = STIM_Q_FORMAT + ALPHA_Q_FORMAT;
2786
2787
0
                if(1 == u1_num_parts)
2788
0
                {
2789
0
                    u8_pred_sigmaSquaredX = au8_pred_sigmaX[i][0] * au8_pred_sigmaX[i][0];
2790
0
                    u8_pred_variance = au8_pred_sigmaXSquare[i][0] - u8_pred_sigmaSquaredX;
2791
2792
0
                    if(e_cu_size == CU_8x8)
2793
0
                    {
2794
0
                        PART_ID_T e_part_id = (PART_ID_T)(
2795
0
                            (PART_ID_NxN_TL) + (ps_cu_results->u1_x_off & 1) +
2796
0
                            ((ps_cu_results->u1_y_off & 1) << 1));
2797
2798
0
                        u4_shift_val = ihevce_calc_stim_injected_variance(
2799
0
                            ps_inter_ctb_prms->pu8_part_src_sigmaX,
2800
0
                            ps_inter_ctb_prms->pu8_part_src_sigmaXSquared,
2801
0
                            &u8_src_variance,
2802
0
                            ai4_inv_wt[0],
2803
0
                            ai4_inv_wt_shift_val[0],
2804
0
                            ps_inter_ctb_prms->wpred_log_wdc,
2805
0
                            e_part_id);
2806
0
                    }
2807
0
                    else
2808
0
                    {
2809
0
                        u4_shift_val = ihevce_calc_stim_injected_variance(
2810
0
                            ps_inter_ctb_prms->pu8_part_src_sigmaX,
2811
0
                            ps_inter_ctb_prms->pu8_part_src_sigmaXSquared,
2812
0
                            &u8_src_variance,
2813
0
                            ai4_inv_wt[0],
2814
0
                            ai4_inv_wt_shift_val[0],
2815
0
                            ps_inter_ctb_prms->wpred_log_wdc,
2816
0
                            e_part_id);
2817
0
                    }
2818
2819
0
                    u8_pred_variance = u8_pred_variance >> u4_shift_val;
2820
2821
0
                    GETRANGE64(i4_bits_req, u8_pred_variance);
2822
2823
0
                    if(i4_bits_req > 27)
2824
0
                    {
2825
0
                        u8_pred_variance = u8_pred_variance >> (i4_bits_req - 27);
2826
0
                        u8_src_variance = u8_src_variance >> (i4_bits_req - 27);
2827
0
                    }
2828
2829
0
                    if(u8_src_variance == u8_pred_variance)
2830
0
                    {
2831
0
                        u8_temp_var = (1 << STIM_Q_FORMAT);
2832
0
                    }
2833
0
                    else
2834
0
                    {
2835
0
                        u8_temp_var = (2 * u8_src_variance * u8_pred_variance);
2836
0
                        u8_temp_var = (u8_temp_var * (1 << STIM_Q_FORMAT));
2837
0
                        u8_temp_var1 = (u8_src_variance * u8_src_variance) +
2838
0
                                       (u8_pred_variance * u8_pred_variance);
2839
0
                        u8_temp_var = (u8_temp_var + (u8_temp_var1 / 2));
2840
0
                        u8_temp_var = (u8_temp_var / u8_temp_var1);
2841
0
                    }
2842
2843
0
                    i4_noise_term = (UWORD32)u8_temp_var;
2844
2845
0
                    ASSERT(i4_noise_term >= 0);
2846
2847
0
                    i4_noise_term *= ps_inter_ctb_prms->i4_alpha_stim_multiplier;
2848
2849
0
                    u8_temp_var = i4_satd;
2850
0
                    u8_temp_var *= ((1 << (i4_q_level)) - (i4_noise_term));
2851
0
                    u8_temp_var += (1 << ((i4_q_level)-1));
2852
0
                    i4_satd = (UWORD32)(u8_temp_var >> (i4_q_level));
2853
0
                }
2854
0
                else /*if(e_cu_size <= CU_16x16)*/
2855
0
                {
2856
0
                    unsigned long temp_shift_val;
2857
0
                    PART_ID_T ae_part_id[MAX_NUM_INTER_PARTS] = {
2858
0
                        ge_part_type_to_part_id[part_type][0], ge_part_type_to_part_id[part_type][1]
2859
0
                    };
2860
2861
0
                    u4_shift_val = ihevce_calc_variance_for_diff_weights(
2862
0
                        ps_inter_ctb_prms->pu8_part_src_sigmaX,
2863
0
                        ps_inter_ctb_prms->pu8_part_src_sigmaXSquared,
2864
0
                        &u8_src_variance,
2865
0
                        ai4_inv_wt,
2866
0
                        ai4_inv_wt_shift_val,
2867
0
                        ps_best_results->as_pu_results,
2868
0
                        ps_inter_ctb_prms->wpred_log_wdc,
2869
0
                        ae_part_id,
2870
0
                        gau1_blk_size_to_wd[e_blk_size],
2871
0
                        u1_num_parts,
2872
0
                        1);
2873
2874
0
                    temp_shift_val = u4_shift_val;
2875
2876
0
                    u4_shift_val = ihevce_calc_variance_for_diff_weights(
2877
0
                        au8_pred_sigmaX[i],
2878
0
                        au8_pred_sigmaXSquare[i],
2879
0
                        &u8_pred_variance,
2880
0
                        ai4_inv_wt,
2881
0
                        ai4_inv_wt_shift_val,
2882
0
                        ps_best_results->as_pu_results,
2883
0
                        0,
2884
0
                        ae_part_id,
2885
0
                        gau1_blk_size_to_wd[e_blk_size],
2886
0
                        u1_num_parts,
2887
0
                        0);
2888
2889
0
                    u8_pred_variance = u8_pred_variance >> temp_shift_val;
2890
2891
0
                    GETRANGE64(i4_bits_req, u8_pred_variance);
2892
2893
0
                    if(i4_bits_req > 27)
2894
0
                    {
2895
0
                        u8_pred_variance = u8_pred_variance >> (i4_bits_req - 27);
2896
0
                        u8_src_variance = u8_src_variance >> (i4_bits_req - 27);
2897
0
                    }
2898
2899
0
                    if(u8_src_variance == u8_pred_variance)
2900
0
                    {
2901
0
                        u8_temp_var = (1 << STIM_Q_FORMAT);
2902
0
                    }
2903
0
                    else
2904
0
                    {
2905
0
                        u8_temp_var = (2 * u8_src_variance * u8_pred_variance);
2906
0
                        u8_temp_var = (u8_temp_var * (1 << STIM_Q_FORMAT));
2907
0
                        u8_temp_var1 = (u8_src_variance * u8_src_variance) +
2908
0
                                       (u8_pred_variance * u8_pred_variance);
2909
0
                        u8_temp_var = (u8_temp_var + (u8_temp_var1 / 2));
2910
0
                        u8_temp_var = (u8_temp_var / u8_temp_var1);
2911
0
                    }
2912
2913
0
                    i4_noise_term = (UWORD32)u8_temp_var;
2914
2915
0
                    ASSERT(i4_noise_term >= 0);
2916
0
                    ASSERT(i4_noise_term <= (1 << (STIM_Q_FORMAT + ALPHA_Q_FORMAT)));
2917
2918
0
                    i4_noise_term *= ps_inter_ctb_prms->i4_alpha_stim_multiplier;
2919
2920
0
                    u8_temp_var = i4_satd;
2921
0
                    u8_temp_var *= ((1 << (i4_q_level)) - (i4_noise_term));
2922
0
                    u8_temp_var += (1 << ((i4_q_level)-1));
2923
0
                    i4_satd = (UWORD32)(u8_temp_var >> (i4_q_level));
2924
2925
0
                    ASSERT(i4_satd >= 0);
2926
0
                }
2927
0
            }
2928
2929
6.52M
            if(u1_inp_buf_idx != UCHAR_MAX)
2930
0
            {
2931
0
                ihevce_set_pred_buf_as_free(
2932
0
                    &ps_inter_ctb_prms->s_pred_buf_mngr.u4_pred_buf_usage_indicator,
2933
0
                    u1_inp_buf_idx);
2934
0
            }
2935
2936
6.52M
            part_type_cost += i4_satd;
2937
2938
            /*Update the best results with the new results */
2939
6.52M
            ps_best_results->i4_tot_cost = part_type_cost;
2940
2941
6.52M
            ps_best_results->i4_tu_split_cost = ps_err_prms->i4_tu_split_cost;
2942
2943
6.52M
            ASSERT(ai4_tu_split_flag[0] >= 0);
2944
6.52M
            if(e_cu_size == CU_64x64)
2945
72.5k
            {
2946
72.5k
                ps_best_results->ai4_tu_split_flag[0] = ai4_tu_split_flag[0];
2947
72.5k
                ps_best_results->ai4_tu_split_flag[1] = ai4_tu_split_flag[1];
2948
72.5k
                ps_best_results->ai4_tu_split_flag[2] = ai4_tu_split_flag[2];
2949
72.5k
                ps_best_results->ai4_tu_split_flag[3] = ai4_tu_split_flag[3];
2950
2951
                /* Update the TU early cbf flags into the best results structure */
2952
72.5k
                ps_best_results->ai4_tu_early_cbf[0] = ai4_tu_early_cbf[0];
2953
72.5k
                ps_best_results->ai4_tu_early_cbf[1] = ai4_tu_early_cbf[1];
2954
72.5k
                ps_best_results->ai4_tu_early_cbf[2] = ai4_tu_early_cbf[2];
2955
72.5k
                ps_best_results->ai4_tu_early_cbf[3] = ai4_tu_early_cbf[3];
2956
72.5k
            }
2957
6.45M
            else
2958
6.45M
            {
2959
6.45M
                ps_best_results->ai4_tu_split_flag[0] = ai4_tu_split_flag[0];
2960
6.45M
                ps_best_results->ai4_tu_early_cbf[0] = ai4_tu_early_cbf[0];
2961
6.45M
            }
2962
2963
6.52M
            if(part_type_cost < best_cost[num_best_cand - 1])
2964
6.52M
            {
2965
                /* Push and sort current part type if it is one of the num_best_cand */
2966
8.17M
                for(cand_idx = 0; cand_idx < i; cand_idx++)
2967
3.01M
                {
2968
3.01M
                    if(part_type_cost <= best_cost[cand_idx])
2969
1.37M
                    {
2970
1.37M
                        memmove(
2971
1.37M
                            &ai4_final_idx[cand_idx + 1],
2972
1.37M
                            &ai4_final_idx[cand_idx],
2973
1.37M
                            sizeof(WORD32) * (i - cand_idx));
2974
1.37M
                        memmove(
2975
1.37M
                            &best_cost[cand_idx + 1],
2976
1.37M
                            &best_cost[cand_idx],
2977
1.37M
                            sizeof(WORD32) * (i - cand_idx));
2978
1.37M
                        break;
2979
1.37M
                    }
2980
3.01M
                }
2981
2982
6.52M
                ai4_final_idx[cand_idx] = i;
2983
6.52M
                best_cost[cand_idx] = part_type_cost;
2984
6.52M
            }
2985
6.52M
        }
2986
2987
4.30M
        ps_cu_results->u1_num_best_results = num_best_cand;
2988
2989
10.8M
        for(i = 0; i < num_best_cand; i++)
2990
6.52M
        {
2991
6.52M
            ASSERT(ai4_final_idx[i] < num_best_cand);
2992
2993
6.52M
            if(ai4_final_idx[i] != -1)
2994
6.52M
            {
2995
6.52M
                memcpy(
2996
6.52M
                    &(ps_cu_results->ps_best_results[i]),
2997
6.52M
                    &(ps_part_type_results[ai4_final_idx[i]]),
2998
6.52M
                    sizeof(part_type_results_t));
2999
6.52M
            }
3000
6.52M
        }
3001
4.30M
    }
3002
3003
30.1M
    for(i = 0; i < (MAX_NUM_PRED_BUFS_USED_FOR_PARTTYPE_DECISIONS)-2; i++)
3004
25.8M
    {
3005
25.8M
        ihevce_set_pred_buf_as_free(
3006
25.8M
            &ps_inter_ctb_prms->s_pred_buf_mngr.u4_pred_buf_usage_indicator, i);
3007
25.8M
    }
3008
4.30M
}
3009
3010
/**
3011
**************************************************************************************************
3012
*  @fn     hme_populate_pus(search_results_t *ps_search_results, inter_cu_results_t *ps_cu_results)
3013
*
3014
*  @brief Does the population of the inter_cu_results structure with the results after the
3015
*           subpel refinement
3016
*
3017
*          This is called post subpel refinmenent for 16x16s, 8x8s and
3018
*          for post merge evaluation for 32x32,64x64 CUs
3019
*
3020
*  @param[in,out] ps_search_results : Search results data structure
3021
*                 - ps_cu_results : cu_results data structure
3022
*                   ps_pu_result  : Pointer to the memory for storing PU's
3023
*
3024
****************************************************************************************************
3025
*/
3026
void hme_populate_pus(
3027
    me_ctxt_t *ps_thrd_ctxt,
3028
    me_frm_ctxt_t *ps_ctxt,
3029
    hme_subpel_prms_t *ps_subpel_prms,
3030
    search_results_t *ps_search_results,
3031
    inter_cu_results_t *ps_cu_results,
3032
    inter_pu_results_t *ps_pu_results,
3033
    pu_result_t *ps_pu_result,
3034
    inter_ctb_prms_t *ps_inter_ctb_prms,
3035
    wgt_pred_ctxt_t *ps_wt_prms,
3036
    layer_ctxt_t *ps_curr_layer,
3037
    U08 *pu1_pred_dir_searched,
3038
    WORD32 i4_num_active_ref)
3039
1.97M
{
3040
1.97M
    WORD32 i, j, k;
3041
1.97M
    WORD32 i4_part_mask;
3042
1.97M
    WORD32 i4_ref;
3043
1.97M
    UWORD8 e_part_id;
3044
1.97M
    pu_result_t *ps_curr_pu;
3045
1.97M
    search_node_t *ps_search_node;
3046
1.97M
    part_attr_t *ps_part_attr;
3047
1.97M
    UWORD8 e_cu_size = ps_search_results->e_cu_size;
3048
1.97M
    WORD32 num_results_per_part_l0 = 0;
3049
1.97M
    WORD32 num_results_per_part_l1 = 0;
3050
1.97M
    WORD32 i4_ref_id;
3051
1.97M
    WORD32 i4_total_act_ref;
3052
3053
1.97M
    i4_part_mask = ps_search_results->i4_part_mask;
3054
3055
    /* pred_buf_mngr init */
3056
1.97M
    {
3057
1.97M
        hme_get_wkg_mem(&ps_ctxt->s_buf_mgr, MAX_WKG_MEM_SIZE_PER_THREAD);
3058
3059
1.97M
        ps_inter_ctb_prms->s_pred_buf_mngr.u4_pred_buf_usage_indicator = UINT_MAX;
3060
3061
13.7M
        for(i = 0; i < MAX_NUM_PRED_BUFS_USED_FOR_PARTTYPE_DECISIONS - 2; i++)
3062
11.8M
        {
3063
11.8M
            ps_inter_ctb_prms->s_pred_buf_mngr.apu1_pred_bufs[i] =
3064
11.8M
                ps_ctxt->s_buf_mgr.pu1_wkg_mem + i * INTERP_OUT_BUF_SIZE;
3065
11.8M
            ps_inter_ctb_prms->s_pred_buf_mngr.u4_pred_buf_usage_indicator &= ~(1 << i);
3066
11.8M
        }
3067
3068
1.97M
        ps_inter_ctb_prms->pu1_wkg_mem = ps_ctxt->s_buf_mgr.pu1_wkg_mem + i * INTERP_OUT_BUF_SIZE;
3069
1.97M
    }
3070
3071
1.97M
    ps_inter_ctb_prms->i4_alpha_stim_multiplier = ALPHA_FOR_NOISE_TERM_IN_ME;
3072
1.97M
    ps_inter_ctb_prms->u1_is_cu_noisy = ps_subpel_prms->u1_is_cu_noisy;
3073
1.97M
    ps_inter_ctb_prms->i4_lamda = ps_search_results->as_pred_ctxt[0].lambda;
3074
3075
    /* Populate the CU level parameters */
3076
1.97M
    ps_cu_results->u1_cu_size = ps_search_results->e_cu_size;
3077
1.97M
    ps_cu_results->u1_num_best_results = ps_search_results->u1_num_best_results;
3078
1.97M
    ps_cu_results->i4_part_mask = ps_search_results->i4_part_mask;
3079
1.97M
    ps_cu_results->u1_x_off = ps_search_results->u1_x_off;
3080
1.97M
    ps_cu_results->u1_y_off = ps_search_results->u1_y_off;
3081
3082
1.97M
    i4_total_act_ref =
3083
1.97M
        ps_ctxt->s_frm_prms.u1_num_active_ref_l0 + ps_ctxt->s_frm_prms.u1_num_active_ref_l1;
3084
    /*Populate the partition results
3085
    Loop across all the active references that are enabled right now */
3086
17.7M
    for(i = 0; i < MAX_PART_TYPES; i++)
3087
15.7M
    {
3088
15.7M
        if(!(i4_part_mask & gai4_part_type_to_part_mask[i]))
3089
8.42M
        {
3090
8.42M
            continue;
3091
8.42M
        }
3092
3093
21.2M
        for(j = 0; j < gau1_num_parts_in_part_type[i]; j++)
3094
13.9M
        {
3095
            /* Partition ID for the current PU */
3096
13.9M
            e_part_id = (UWORD8)ge_part_type_to_part_id[i][j];
3097
13.9M
            ps_part_attr = &gas_part_attr_in_cu[e_part_id];
3098
3099
13.9M
            num_results_per_part_l0 = 0;
3100
13.9M
            num_results_per_part_l1 = 0;
3101
3102
13.9M
            ps_pu_results->aps_pu_results[0][e_part_id] =
3103
13.9M
                ps_pu_result + (e_part_id * MAX_NUM_RESULTS_PER_PART_LIST);
3104
13.9M
            ps_pu_results->aps_pu_results[1][e_part_id] =
3105
13.9M
                ps_pu_result + ((e_part_id + TOT_NUM_PARTS) * MAX_NUM_RESULTS_PER_PART_LIST);
3106
3107
31.1M
            for(i4_ref = 0; i4_ref < i4_num_active_ref; i4_ref++)
3108
17.1M
            {
3109
17.1M
                U08 u1_pred_dir = pu1_pred_dir_searched[i4_ref];
3110
3111
34.3M
                for(k = 0; k < ps_search_results->u1_num_results_per_part; k++)
3112
17.1M
                {
3113
17.1M
                    ps_search_node =
3114
17.1M
                        &ps_search_results->aps_part_results[u1_pred_dir][e_part_id][k];
3115
3116
                    /* If subpel is done then the node is a valid candidate else break the loop */
3117
17.1M
                    if(ps_search_node->u1_subpel_done)
3118
17.1M
                    {
3119
17.1M
                        i4_ref_id = ps_search_node->i1_ref_idx;
3120
3121
17.1M
                        ASSERT(i4_ref_id >= 0);
3122
3123
                        /* Check whether current ref_id is past or future and assign the pointers to L0 or L1 list accordingly */
3124
17.1M
                        if(!u1_pred_dir)
3125
13.8M
                        {
3126
13.8M
                            ps_curr_pu = ps_pu_results->aps_pu_results[0][e_part_id] +
3127
13.8M
                                         num_results_per_part_l0;
3128
3129
13.8M
                            ASSERT(
3130
13.8M
                                ps_ctxt->a_ref_idx_lc_to_l0[i4_ref_id] <
3131
13.8M
                                ps_inter_ctb_prms->u1_num_active_ref_l0);
3132
3133
                            /* Always populate the ref_idx value in l0_ref_idx */
3134
13.8M
                            ps_curr_pu->pu.mv.i1_l0_ref_idx =
3135
13.8M
                                ps_ctxt->a_ref_idx_lc_to_l0[i4_ref_id];
3136
13.8M
                            ps_curr_pu->pu.mv.s_l0_mv = ps_search_node->s_mv;
3137
13.8M
                            ps_curr_pu->pu.mv.i1_l1_ref_idx = -1;
3138
13.8M
                            ps_curr_pu->pu.b2_pred_mode = PRED_L0;
3139
3140
13.8M
                            ps_inter_ctb_prms->apu1_wt_inp[0][ps_curr_pu->pu.mv.i1_l0_ref_idx] =
3141
13.8M
                                ps_wt_prms->apu1_wt_inp[i4_ref_id];
3142
3143
13.8M
                            num_results_per_part_l0++;
3144
13.8M
                        }
3145
3.34M
                        else
3146
3.34M
                        {
3147
3.34M
                            ps_curr_pu = ps_pu_results->aps_pu_results[1][e_part_id] +
3148
3.34M
                                         num_results_per_part_l1;
3149
3150
3.34M
                            ASSERT(
3151
3.34M
                                ps_ctxt->a_ref_idx_lc_to_l1[i4_ref_id] <
3152
3.34M
                                ps_inter_ctb_prms->u1_num_active_ref_l1);
3153
3154
                            /* populate the ref_idx value in l1_ref_idx */
3155
3.34M
                            ps_curr_pu->pu.mv.i1_l1_ref_idx =
3156
3.34M
                                ps_ctxt->a_ref_idx_lc_to_l1[i4_ref_id];
3157
3.34M
                            ps_curr_pu->pu.mv.s_l1_mv = ps_search_node->s_mv;
3158
3.34M
                            ps_curr_pu->pu.mv.i1_l0_ref_idx = -1;
3159
3.34M
                            ps_curr_pu->pu.b2_pred_mode = PRED_L1;
3160
3161
                            /* Copy the values from weighted params to common_frm_aprams */
3162
3.34M
                            ps_inter_ctb_prms->apu1_wt_inp[1][ps_curr_pu->pu.mv.i1_l1_ref_idx] =
3163
3.34M
                                ps_wt_prms->apu1_wt_inp[i4_ref_id];
3164
3165
3.34M
                            num_results_per_part_l1++;
3166
3.34M
                        }
3167
17.1M
                        ps_curr_pu->i4_mv_cost = ps_search_node->i4_mv_cost;
3168
17.1M
                        ps_curr_pu->i4_sdi = ps_search_node->i4_sdi;
3169
3170
17.1M
#if UNI_SATD_SCALE
3171
                        /*SATD is scaled by weight. Hence rescale the SATD */
3172
17.1M
                        ps_curr_pu->i4_tot_cost =
3173
17.1M
                            ((ps_search_node->i4_sad *
3174
17.1M
                                  ps_ctxt->s_wt_pred.a_wpred_wt[ps_search_node->i1_ref_idx] +
3175
17.1M
                              (1 << (ps_inter_ctb_prms->wpred_log_wdc - 1))) >>
3176
17.1M
                             ps_inter_ctb_prms->wpred_log_wdc) +
3177
17.1M
                            ps_search_node->i4_mv_cost;
3178
17.1M
#endif
3179
3180
                        /* Packed format of the width and height */
3181
17.1M
                        ps_curr_pu->pu.b4_wd = ((ps_part_attr->u1_x_count << e_cu_size) >> 2) - 1;
3182
17.1M
                        ps_curr_pu->pu.b4_ht = ((ps_part_attr->u1_y_count << e_cu_size) >> 2) - 1;
3183
3184
17.1M
                        ps_curr_pu->pu.b4_pos_x =
3185
17.1M
                            (((ps_part_attr->u1_x_start << e_cu_size) + ps_cu_results->u1_x_off) >>
3186
17.1M
                             2);
3187
17.1M
                        ps_curr_pu->pu.b4_pos_y =
3188
17.1M
                            (((ps_part_attr->u1_y_start << e_cu_size) + ps_cu_results->u1_y_off) >>
3189
17.1M
                             2);
3190
3191
17.1M
                        ps_curr_pu->pu.b1_intra_flag = 0;
3192
3193
                        /* Unweighted input */
3194
17.1M
                        ps_inter_ctb_prms->pu1_non_wt_inp =
3195
17.1M
                            ps_wt_prms->apu1_wt_inp[i4_total_act_ref];
3196
3197
17.1M
                        ps_search_node++;
3198
17.1M
                    }
3199
0
                    else
3200
0
                    {
3201
0
                        break;
3202
0
                    }
3203
17.1M
                }
3204
17.1M
            }
3205
3206
13.9M
            ps_pu_results->u1_num_results_per_part_l0[e_part_id] = num_results_per_part_l0;
3207
13.9M
            ps_pu_results->u1_num_results_per_part_l1[e_part_id] = num_results_per_part_l1;
3208
13.9M
        }
3209
7.34M
    }
3210
1.97M
}
3211
3212
/**
3213
*********************************************************************************************************
3214
*  @fn     hme_populate_pus_8x8_cu(search_results_t *ps_search_results, inter_cu_results_t *ps_cu_results)
3215
*
3216
*  @brief Does the population of the inter_cu_results structure with the results after the
3217
*           subpel refinement
3218
*
3219
*          This is called post subpel refinmenent for 16x16s, 8x8s and
3220
*          for post merge evaluation for 32x32,64x64 CUs
3221
*
3222
*  @param[in,out] ps_search_results : Search results data structure
3223
*                 - ps_cu_results : cu_results data structure
3224
*                   ps_pu_results : Pointer for the PU's
3225
*                   ps_pu_result  : Pointer to the memory for storing PU's
3226
*
3227
*********************************************************************************************************
3228
*/
3229
void hme_populate_pus_8x8_cu(
3230
    me_ctxt_t *ps_thrd_ctxt,
3231
    me_frm_ctxt_t *ps_ctxt,
3232
    hme_subpel_prms_t *ps_subpel_prms,
3233
    search_results_t *ps_search_results,
3234
    inter_cu_results_t *ps_cu_results,
3235
    inter_pu_results_t *ps_pu_results,
3236
    pu_result_t *ps_pu_result,
3237
    inter_ctb_prms_t *ps_inter_ctb_prms,
3238
    U08 *pu1_pred_dir_searched,
3239
    WORD32 i4_num_active_ref,
3240
    U08 u1_blk_8x8_mask)
3241
621k
{
3242
621k
    WORD32 i, k;
3243
621k
    WORD32 i4_part_mask;
3244
621k
    WORD32 i4_ref;
3245
621k
    pu_result_t *ps_curr_pu;
3246
621k
    search_node_t *ps_search_node;
3247
621k
    WORD32 i4_ref_id;
3248
621k
    WORD32 x_off, y_off;
3249
3250
    /* Make part mask available as only 2Nx2N
3251
    Later support for 4x8 and 8x4 needs to be added */
3252
621k
    i4_part_mask = ENABLE_2Nx2N;
3253
3254
621k
    x_off = ps_search_results->u1_x_off;
3255
621k
    y_off = ps_search_results->u1_y_off;
3256
3257
3.10M
    for(i = 0; i < 4; i++)
3258
2.48M
    {
3259
2.48M
        if(u1_blk_8x8_mask & (1 << i))
3260
2.42M
        {
3261
2.42M
            UWORD8 u1_x_pos, u1_y_pos;
3262
3263
2.42M
            WORD32 num_results_per_part_l0 = 0;
3264
2.42M
            WORD32 num_results_per_part_l1 = 0;
3265
3266
2.42M
            ps_cu_results->u1_cu_size = CU_8x8;
3267
2.42M
            ps_cu_results->u1_num_best_results = ps_search_results->u1_num_best_results;
3268
2.42M
            ps_cu_results->i4_part_mask = i4_part_mask;
3269
2.42M
            ps_cu_results->u1_x_off = x_off + (i & 1) * 8;
3270
2.42M
            ps_cu_results->u1_y_off = y_off + (i >> 1) * 8;
3271
2.42M
            ps_cu_results->i4_inp_offset = ps_cu_results->u1_x_off + (ps_cu_results->u1_y_off * 64);
3272
3273
2.42M
            ps_cu_results->ps_best_results[0].i4_tot_cost = MAX_32BIT_VAL;
3274
2.42M
            ps_cu_results->ps_best_results[0].i4_tu_split_cost = 0;
3275
3276
2.42M
            u1_x_pos = ps_cu_results->u1_x_off >> 2;
3277
2.42M
            u1_y_pos = ps_cu_results->u1_y_off >> 2;
3278
3279
2.42M
            if(!(ps_search_results->i4_part_mask & ENABLE_NxN))
3280
91.9k
            {
3281
91.9k
                ps_curr_pu = &ps_cu_results->ps_best_results[0].as_pu_results[0];
3282
3283
91.9k
                ps_cu_results->i4_part_mask = 0;
3284
91.9k
                ps_cu_results->u1_num_best_results = 0;
3285
3286
91.9k
                ps_curr_pu->i4_tot_cost = MAX_32BIT_VAL;
3287
3288
91.9k
                ps_curr_pu->pu.b4_wd = 1;
3289
91.9k
                ps_curr_pu->pu.b4_ht = 1;
3290
91.9k
                ps_curr_pu->pu.b4_pos_x = u1_x_pos;
3291
91.9k
                ps_curr_pu->pu.b4_pos_y = u1_y_pos;
3292
91.9k
                ps_cu_results->ps_best_results[0].i4_tu_split_cost = 0;
3293
3294
91.9k
                ps_cu_results++;
3295
91.9k
                ps_pu_results++;
3296
3297
91.9k
                continue;
3298
91.9k
            }
3299
3300
2.33M
            ps_pu_results->aps_pu_results[0][0] =
3301
2.33M
                ps_pu_result + (i * MAX_NUM_RESULTS_PER_PART_LIST);
3302
2.33M
            ps_pu_results->aps_pu_results[1][0] =
3303
2.33M
                ps_pu_result + ((i + TOT_NUM_PARTS) * MAX_NUM_RESULTS_PER_PART_LIST);
3304
3305
5.29M
            for(i4_ref = 0; i4_ref < i4_num_active_ref; i4_ref++)
3306
2.96M
            {
3307
2.96M
                U08 u1_pred_dir = pu1_pred_dir_searched[i4_ref];
3308
3309
                /* Select the NxN partition node for the current ref_idx in the search results*/
3310
2.96M
                ps_search_node =
3311
2.96M
                    ps_search_results->aps_part_results[u1_pred_dir][PART_ID_NxN_TL + i];
3312
3313
5.92M
                for(k = 0; k < ps_search_results->u1_num_results_per_part; k++)
3314
2.96M
                {
3315
                    /* If subpel is done then the node is a valid candidate else break the loop */
3316
2.96M
                    if((ps_search_node->u1_is_avail) || (ps_search_node->u1_subpel_done))
3317
2.96M
                    {
3318
2.96M
                        i4_ref_id = ps_search_node->i1_ref_idx;
3319
3320
2.96M
                        ASSERT(i4_ref_id >= 0);
3321
3322
2.96M
                        if(!u1_pred_dir)
3323
2.31M
                        {
3324
2.31M
                            ps_curr_pu =
3325
2.31M
                                ps_pu_results->aps_pu_results[0][0] + num_results_per_part_l0;
3326
3327
2.31M
                            ASSERT(
3328
2.31M
                                ps_ctxt->a_ref_idx_lc_to_l0[i4_ref_id] <
3329
2.31M
                                ps_inter_ctb_prms->u1_num_active_ref_l0);
3330
3331
2.31M
                            ps_curr_pu->pu.mv.i1_l0_ref_idx =
3332
2.31M
                                ps_ctxt->a_ref_idx_lc_to_l0[i4_ref_id];
3333
2.31M
                            ps_curr_pu->pu.mv.s_l0_mv = ps_search_node->s_mv;
3334
2.31M
                            ps_curr_pu->pu.mv.i1_l1_ref_idx = -1;
3335
2.31M
                            ps_curr_pu->pu.b2_pred_mode = PRED_L0;
3336
3337
2.31M
                            num_results_per_part_l0++;
3338
2.31M
                        }
3339
646k
                        else
3340
646k
                        {
3341
646k
                            ps_curr_pu =
3342
646k
                                ps_pu_results->aps_pu_results[1][0] + num_results_per_part_l1;
3343
3344
646k
                            ASSERT(
3345
646k
                                ps_ctxt->a_ref_idx_lc_to_l1[i4_ref_id] <
3346
646k
                                ps_inter_ctb_prms->u1_num_active_ref_l1);
3347
3348
646k
                            ps_curr_pu->pu.mv.i1_l1_ref_idx =
3349
646k
                                ps_ctxt->a_ref_idx_lc_to_l1[i4_ref_id];
3350
646k
                            ps_curr_pu->pu.mv.s_l1_mv = ps_search_node->s_mv;
3351
646k
                            ps_curr_pu->pu.mv.i1_l0_ref_idx = -1;
3352
646k
                            ps_curr_pu->pu.b2_pred_mode = PRED_L1;
3353
3354
646k
                            num_results_per_part_l1++;
3355
646k
                        }
3356
2.96M
                        ps_curr_pu->i4_mv_cost = ps_search_node->i4_mv_cost;
3357
2.96M
                        ps_curr_pu->i4_sdi = ps_search_node->i4_sdi;
3358
3359
2.96M
#if UNI_SATD_SCALE
3360
                        /*SATD is scaled by weight. Hence rescale the SATD */
3361
2.96M
                        ps_curr_pu->i4_tot_cost =
3362
2.96M
                            ((ps_search_node->i4_sad *
3363
2.96M
                                  ps_ctxt->s_wt_pred.a_wpred_wt[ps_search_node->i1_ref_idx] +
3364
2.96M
                              (1 << (ps_inter_ctb_prms->wpred_log_wdc - 1))) >>
3365
2.96M
                             ps_inter_ctb_prms->wpred_log_wdc) +
3366
2.96M
                            ps_search_node->i4_mv_cost;
3367
2.96M
#endif
3368
3369
2.96M
                        ps_curr_pu->pu.b4_wd = 1;
3370
2.96M
                        ps_curr_pu->pu.b4_ht = 1;
3371
2.96M
                        ps_curr_pu->pu.b4_pos_x = u1_x_pos;
3372
2.96M
                        ps_curr_pu->pu.b4_pos_y = u1_y_pos;
3373
2.96M
                        ps_curr_pu->pu.b1_intra_flag = 0;
3374
3375
2.96M
                        ps_search_node++;
3376
2.96M
                    }
3377
0
                    else
3378
0
                    {
3379
                        /* if NxN was not evaluated at 16x16 level, assign max cost to 8x8 CU
3380
                        to remove 8x8's as possible candidates during evaluation */
3381
3382
0
                        ps_curr_pu = ps_pu_results->aps_pu_results[0][0] + num_results_per_part_l0;
3383
3384
0
                        ps_curr_pu->i4_tot_cost = MAX_32BIT_VAL;
3385
3386
0
                        ps_curr_pu = ps_pu_results->aps_pu_results[1][0] + num_results_per_part_l1;
3387
3388
0
                        ps_curr_pu->i4_tot_cost = MAX_32BIT_VAL;
3389
3390
0
                        break;
3391
0
                    }
3392
2.96M
                }
3393
2.96M
            }
3394
3395
            /* Update the num_results per_part across lists L0 and L1 */
3396
2.33M
            ps_pu_results->u1_num_results_per_part_l0[0] = num_results_per_part_l0;
3397
2.33M
            ps_pu_results->u1_num_results_per_part_l1[0] = num_results_per_part_l1;
3398
2.33M
        }
3399
2.39M
        ps_cu_results++;
3400
2.39M
        ps_pu_results++;
3401
2.39M
    }
3402
621k
}
3403
3404
/**
3405
********************************************************************************
3406
*  @fn     hme_insert_intra_nodes_post_bipred
3407
*
3408
*  @brief  Compares intra costs (populated by IPE) with the best inter costs
3409
*          (populated after evaluating bi-pred) and updates the best results
3410
*          if intra cost is better
3411
*
3412
*  @param[in,out]  ps_cu_results    [inout] : Best results structure of CU
3413
*                  ps_cur_ipe_ctb   [in]    : intra results for the current CTB
3414
*                  i4_frm_qstep     [in]    : current frame quantizer(qscale)*
3415
*
3416
*  @return None
3417
********************************************************************************
3418
*/
3419
void hme_insert_intra_nodes_post_bipred(
3420
    inter_cu_results_t *ps_cu_results,
3421
    ipe_l0_ctb_analyse_for_me_t *ps_cur_ipe_ctb,
3422
    WORD32 i4_frm_qstep)
3423
4.29M
{
3424
4.29M
    WORD32 i;
3425
4.29M
    WORD32 num_results;
3426
4.29M
    WORD32 cu_size = ps_cu_results->u1_cu_size;
3427
4.29M
    UWORD8 u1_x_off = ps_cu_results->u1_x_off;
3428
4.29M
    UWORD8 u1_y_off = ps_cu_results->u1_y_off;
3429
3430
    /* Id of the 32x32 block, 16x16 block in a CTB */
3431
4.29M
    WORD32 i4_32x32_id = (u1_y_off >> 5) * 2 + (u1_x_off >> 5);
3432
4.29M
    WORD32 i4_16x16_id = ((u1_y_off >> 4) & 0x1) * 2 + ((u1_x_off >> 4) & 0x1);
3433
3434
    /* Flags to indicate if intra64/intra32/intra16 cusize are invalid as per IPE decision */
3435
4.29M
    WORD32 disable_intra64 = 0;
3436
4.29M
    WORD32 disable_intra32 = 0;
3437
4.29M
    WORD32 disable_intra16 = 0;
3438
3439
4.29M
    S32 i4_intra_2nx2n_cost;
3440
3441
    /* ME final results for this CU (post seeding of best uni/bi pred results) */
3442
4.29M
    part_type_results_t *ps_best_result;
3443
3444
4.29M
    i4_frm_qstep *= !L0ME_IN_OPENLOOP_MODE;
3445
3446
    /*If inter candidates are enabled then enter the for loop to update the intra candidate */
3447
3448
4.29M
    if((ps_cu_results->u1_num_best_results == 0) && (CU_8x8 == ps_cu_results->u1_cu_size))
3449
91.9k
    {
3450
91.9k
        ps_cu_results->u1_num_best_results = 1;
3451
91.9k
    }
3452
3453
4.29M
    num_results = ps_cu_results->u1_num_best_results;
3454
3455
4.29M
    ps_best_result = &ps_cu_results->ps_best_results[0];
3456
3457
    /* Disable intra16/32/64 flags based on split flags recommended by IPE */
3458
4.29M
    if(ps_cur_ipe_ctb->u1_split_flag)
3459
4.20M
    {
3460
4.20M
        disable_intra64 = 1;
3461
4.20M
        if(ps_cur_ipe_ctb->as_intra32_analyse[i4_32x32_id].b1_split_flag)
3462
1.84M
        {
3463
1.84M
            disable_intra32 = 1;
3464
3465
1.84M
            if(ps_cur_ipe_ctb->as_intra32_analyse[i4_32x32_id]
3466
1.84M
                   .as_intra16_analyse[i4_16x16_id]
3467
1.84M
                   .b1_split_flag)
3468
581k
            {
3469
581k
                disable_intra16 = 1;
3470
581k
            }
3471
1.84M
        }
3472
4.20M
    }
3473
3474
    /* Derive the intra cost based on current cu size and offset */
3475
4.29M
    switch(cu_size)
3476
4.29M
    {
3477
2.42M
    case CU_8x8:
3478
2.42M
    {
3479
2.42M
        i4_intra_2nx2n_cost = ps_cur_ipe_ctb->ai4_best8x8_intra_cost[u1_y_off + (u1_x_off >> 3)];
3480
3481
        /* Accounting for coding noise in the open loop IPE cost */
3482
2.42M
        i4_intra_2nx2n_cost +=
3483
2.42M
            ((i4_frm_qstep * 16) >> 2) /*+ ((i4_frm_qstep*i4_intra_2nx2n_cost)/256) */;
3484
3485
2.42M
        break;
3486
0
    }
3487
3488
1.49M
    case CU_16x16:
3489
1.49M
    {
3490
1.49M
        i4_intra_2nx2n_cost =
3491
1.49M
            ps_cur_ipe_ctb->ai4_best16x16_intra_cost[(u1_y_off >> 4) * 4 + (u1_x_off >> 4)];
3492
3493
        /* Accounting for coding noise in the open loop IPE cost */
3494
1.49M
        i4_intra_2nx2n_cost +=
3495
1.49M
            ((i4_frm_qstep * 16)); /* + ((i4_frm_qstep*i4_intra_2nx2n_cost)/256) */
3496
3497
1.49M
        if(disable_intra16)
3498
139k
        {
3499
            /* Disable intra 2Nx2N (intra 16) as IPE suggested best mode as 8x8 */
3500
139k
            i4_intra_2nx2n_cost = MAX_32BIT_VAL;
3501
139k
        }
3502
1.49M
        break;
3503
0
    }
3504
3505
348k
    case CU_32x32:
3506
348k
    {
3507
348k
        i4_intra_2nx2n_cost =
3508
348k
            ps_cur_ipe_ctb->ai4_best32x32_intra_cost[(u1_y_off >> 5) * 2 + (u1_x_off >> 5)];
3509
3510
        /* Accounting for coding noise in the open loop IPE cost */
3511
348k
        i4_intra_2nx2n_cost +=
3512
348k
            (i4_frm_qstep * 16 * 4) /* + ((i4_frm_qstep*i4_intra_2nx2n_cost)/256) */;
3513
3514
348k
        if(disable_intra32)
3515
160k
        {
3516
            /* Disable intra 2Nx2N (intra 32) as IPE suggested best mode as 16x16 or 8x8 */
3517
160k
            i4_intra_2nx2n_cost = MAX_32BIT_VAL;
3518
160k
        }
3519
348k
        break;
3520
0
    }
3521
3522
25.4k
    case CU_64x64:
3523
25.4k
    {
3524
25.4k
        i4_intra_2nx2n_cost = ps_cur_ipe_ctb->i4_best64x64_intra_cost;
3525
3526
        /* Accounting for coding noise in the open loop IPE cost */
3527
25.4k
        i4_intra_2nx2n_cost +=
3528
25.4k
            (i4_frm_qstep * 16 * 16) /* + ((i4_frm_qstep*i4_intra_2nx2n_cost)/256) */;
3529
3530
25.4k
        if(disable_intra64)
3531
24.5k
        {
3532
            /* Disable intra 2Nx2N (intra 64) as IPE suggested best mode as 32x32 /16x16 / 8x8 */
3533
24.5k
            i4_intra_2nx2n_cost = MAX_32BIT_VAL;
3534
24.5k
        }
3535
25.4k
        break;
3536
0
    }
3537
3538
0
    default:
3539
0
        ASSERT(0);
3540
4.29M
    }
3541
3542
4.29M
    {
3543
        /*****************************************************************/
3544
        /* Intra / Inter cost comparison for  2Nx2N : cu size 8/16/32/64 */
3545
        /* Identify where the current result isto be placed. Basically   */
3546
        /* find the node which has cost just higher than node under test */
3547
        /*****************************************************************/
3548
10.5M
        for(i = 0; i < num_results; i++)
3549
6.50M
        {
3550
            /* Subtrqact the tu_spli_flag_cost from total_inter_cost for fair comparision */
3551
6.50M
            WORD32 inter_cost = ps_best_result[i].i4_tot_cost - ps_best_result[i].i4_tu_split_cost;
3552
3553
6.50M
            if(i4_intra_2nx2n_cost < inter_cost)
3554
216k
            {
3555
216k
                if(i < (num_results - 1))
3556
12.4k
                {
3557
12.4k
                    memmove(
3558
12.4k
                        ps_best_result + i + 1,
3559
12.4k
                        ps_best_result + i,
3560
12.4k
                        sizeof(ps_best_result[0]) * (num_results - 1 - i));
3561
12.4k
                }
3562
3563
                /* Insert the intra node result */
3564
216k
                ps_best_result[i].u1_part_type = PRT_2Nx2N;
3565
216k
                ps_best_result[i].i4_tot_cost = i4_intra_2nx2n_cost;
3566
216k
                ps_best_result[i].ai4_tu_split_flag[0] = 0;
3567
216k
                ps_best_result[i].ai4_tu_split_flag[1] = 0;
3568
216k
                ps_best_result[i].ai4_tu_split_flag[2] = 0;
3569
216k
                ps_best_result[i].ai4_tu_split_flag[3] = 0;
3570
3571
                /* Populate intra flag, cost and default mvs, refidx for intra pu */
3572
216k
                ps_best_result[i].as_pu_results[0].i4_tot_cost = i4_intra_2nx2n_cost;
3573
                //ps_best_result[i].as_pu_results[0].i4_sad = i4_intra_2nx2n_cost;
3574
216k
                ps_best_result[i].as_pu_results[0].i4_mv_cost = 0;
3575
216k
                ps_best_result[i].as_pu_results[0].pu.b1_intra_flag = 1;
3576
216k
                ps_best_result[i].as_pu_results[0].pu.mv.i1_l0_ref_idx = -1;
3577
216k
                ps_best_result[i].as_pu_results[0].pu.mv.i1_l1_ref_idx = -1;
3578
216k
                ps_best_result[i].as_pu_results[0].pu.mv.s_l0_mv.i2_mvx = INTRA_MV;
3579
216k
                ps_best_result[i].as_pu_results[0].pu.mv.s_l0_mv.i2_mvy = INTRA_MV;
3580
216k
                ps_best_result[i].as_pu_results[0].pu.mv.s_l1_mv.i2_mvx = INTRA_MV;
3581
216k
                ps_best_result[i].as_pu_results[0].pu.mv.s_l1_mv.i2_mvy = INTRA_MV;
3582
3583
216k
                break;
3584
216k
            }
3585
6.50M
        }
3586
4.29M
    }
3587
4.29M
}
3588
3589
S32 hme_recompute_lambda_from_min_8x8_act_in_ctb(
3590
    me_frm_ctxt_t *ps_ctxt, ipe_l0_ctb_analyse_for_me_t *ps_cur_ipe_ctb)
3591
33.6k
{
3592
33.6k
    double lambda;
3593
33.6k
    double lambda_modifier;
3594
33.6k
    WORD32 i4_cu_qp;
3595
33.6k
    frm_lambda_ctxt_t *ps_frm_lambda_ctxt;
3596
    //ipe_l0_ctb_analyse_for_me_t *ps_cur_ipe_ctb;
3597
33.6k
    WORD32 i4_frame_qp;
3598
33.6k
    rc_quant_t *ps_rc_quant_ctxt;
3599
33.6k
    WORD32 i4_is_bpic;
3600
3601
33.6k
    ps_frm_lambda_ctxt = &ps_ctxt->s_frm_lambda_ctxt;
3602
    //ps_cur_ipe_ctb = ps_ctxt->ps_ipe_l0_ctb_frm_base;
3603
33.6k
    i4_frame_qp = ps_ctxt->s_frm_prms.i4_frame_qp;
3604
33.6k
    ps_rc_quant_ctxt = ps_ctxt->ps_rc_quant_ctxt;
3605
33.6k
    i4_is_bpic = ps_ctxt->s_frm_prms.bidir_enabled;
3606
3607
33.6k
    i4_cu_qp = ps_rc_quant_ctxt->pi4_qp_to_qscale[i4_frame_qp + ps_rc_quant_ctxt->i1_qp_offset];
3608
3609
33.6k
    {
3610
33.6k
        if(ps_ctxt->i4_l0me_qp_mod)
3611
33.6k
        {
3612
33.6k
#if MODULATE_LAMDA_WHEN_SPATIAL_MOD_ON
3613
#if LAMDA_BASED_ON_QUANT
3614
            WORD32 i4_activity = ps_cur_ipe_ctb->i4_64x64_act_factor[2][0];
3615
#else
3616
33.6k
            WORD32 i4_activity = ps_cur_ipe_ctb->i4_64x64_act_factor[3][0];
3617
33.6k
#endif
3618
33.6k
            i4_cu_qp = (((i4_cu_qp)*i4_activity) + (1 << (QP_LEVEL_MOD_ACT_FACTOR - 1))) >>
3619
33.6k
                       QP_LEVEL_MOD_ACT_FACTOR;
3620
3621
33.6k
#endif
3622
33.6k
        }
3623
33.6k
        if(i4_cu_qp > ps_rc_quant_ctxt->i2_max_qscale)
3624
1.50k
            i4_cu_qp = ps_rc_quant_ctxt->i2_max_qscale;
3625
32.1k
        else if(i4_cu_qp < ps_rc_quant_ctxt->i2_min_qscale)
3626
0
            i4_cu_qp = ps_rc_quant_ctxt->i2_min_qscale;
3627
3628
33.6k
        i4_cu_qp = ps_rc_quant_ctxt->pi4_qscale_to_qp[i4_cu_qp];
3629
33.6k
    }
3630
3631
33.6k
    if(i4_cu_qp > ps_rc_quant_ctxt->i2_max_qp)
3632
0
        i4_cu_qp = ps_rc_quant_ctxt->i2_max_qp;
3633
33.6k
    else if(i4_cu_qp < ps_rc_quant_ctxt->i2_min_qp)
3634
149
        i4_cu_qp = ps_rc_quant_ctxt->i2_min_qp;
3635
3636
33.6k
    lambda = pow(2.0, (((double)(i4_cu_qp - 12)) / 3));
3637
3638
33.6k
    lambda_modifier = ps_frm_lambda_ctxt->lambda_modifier;
3639
3640
33.6k
    if(i4_is_bpic)
3641
8.52k
    {
3642
8.52k
        lambda_modifier = lambda_modifier * CLIP3((((double)(i4_cu_qp - 12)) / 6.0), 2.00, 4.00);
3643
8.52k
    }
3644
33.6k
    if(ps_ctxt->i4_use_const_lamda_modifier)
3645
0
    {
3646
0
        if(ps_ctxt->s_frm_prms.is_i_pic)
3647
0
        {
3648
0
            lambda_modifier = ps_ctxt->f_i_pic_lamda_modifier;
3649
0
        }
3650
0
        else
3651
0
        {
3652
0
            lambda_modifier = CONST_LAMDA_MOD_VAL;
3653
0
        }
3654
0
    }
3655
33.6k
    lambda *= lambda_modifier;
3656
3657
33.6k
    return ((WORD32)(sqrt(lambda) * (1 << LAMBDA_Q_SHIFT)));
3658
33.6k
}
3659
3660
/**
3661
********************************************************************************
3662
*  @fn     hme_update_dynamic_search_params
3663
*
3664
*  @brief  Update the Dynamic search params based on the current MVs
3665
*
3666
*  @param[in,out]  ps_dyn_range_prms    [inout] : Dyn. Range Param str.
3667
*                  i2_mvy               [in]    : current MV y comp.
3668
*
3669
*  @return None
3670
********************************************************************************
3671
*/
3672
void hme_update_dynamic_search_params(dyn_range_prms_t *ps_dyn_range_prms, WORD16 i2_mvy)
3673
15.8M
{
3674
    /* If MV is up large, update i2_dyn_max_y */
3675
15.8M
    if(i2_mvy > ps_dyn_range_prms->i2_dyn_max_y)
3676
120k
        ps_dyn_range_prms->i2_dyn_max_y = i2_mvy;
3677
    /* If MV is down large, update i2_dyn_min_y */
3678
15.8M
    if(i2_mvy < ps_dyn_range_prms->i2_dyn_min_y)
3679
126k
        ps_dyn_range_prms->i2_dyn_min_y = i2_mvy;
3680
15.8M
}
3681
3682
void hme_add_new_node_to_a_sorted_array(
3683
    search_node_t *ps_result_node,
3684
    search_node_t **pps_sorted_array,
3685
    U08 *pu1_shifts,
3686
    U32 u4_num_results_updated,
3687
    U08 u1_shift)
3688
21.1M
{
3689
21.1M
    U32 i;
3690
3691
21.1M
    if(NULL == pu1_shifts)
3692
1.92M
    {
3693
1.92M
        S32 i4_cur_node_cost = ps_result_node->i4_tot_cost;
3694
3695
3.25M
        for(i = 0; i < u4_num_results_updated; i++)
3696
1.54M
        {
3697
1.54M
            if(i4_cur_node_cost < pps_sorted_array[i]->i4_tot_cost)
3698
215k
            {
3699
215k
                memmove(
3700
215k
                    &pps_sorted_array[i + 1],
3701
215k
                    &pps_sorted_array[i],
3702
215k
                    (u4_num_results_updated - i) * sizeof(search_node_t *));
3703
3704
215k
                break;
3705
215k
            }
3706
1.54M
        }
3707
1.92M
    }
3708
19.2M
    else
3709
19.2M
    {
3710
19.2M
        S32 i4_cur_node_cost =
3711
19.2M
            (u1_shift == 0) ? ps_result_node->i4_tot_cost
3712
19.2M
                            : (ps_result_node->i4_tot_cost + (1 << (u1_shift - 1))) >> u1_shift;
3713
3714
91.3M
        for(i = 0; i < u4_num_results_updated; i++)
3715
82.5M
        {
3716
82.5M
            S32 i4_prev_node_cost = (pu1_shifts[i] == 0) ? pps_sorted_array[i]->i4_tot_cost
3717
82.5M
                                                         : (pps_sorted_array[i]->i4_tot_cost +
3718
60.2M
                                                            (1 << (pu1_shifts[i] - 1))) >>
3719
60.2M
                                                               pu1_shifts[i];
3720
3721
82.5M
            if(i4_cur_node_cost < i4_prev_node_cost)
3722
10.3M
            {
3723
10.3M
                memmove(
3724
10.3M
                    &pps_sorted_array[i + 1],
3725
10.3M
                    &pps_sorted_array[i],
3726
10.3M
                    (u4_num_results_updated - i) * sizeof(search_node_t *));
3727
10.3M
                memmove(
3728
10.3M
                    &pu1_shifts[i + 1], &pu1_shifts[i], (u4_num_results_updated - i) * sizeof(U08));
3729
3730
10.3M
                break;
3731
10.3M
            }
3732
82.5M
        }
3733
3734
19.2M
        pu1_shifts[i] = u1_shift;
3735
19.2M
    }
3736
3737
21.1M
    pps_sorted_array[i] = ps_result_node;
3738
21.1M
}
3739
3740
S32 hme_find_pos_of_implicitly_stored_ref_id(
3741
    S08 *pi1_ref_idx, S08 i1_ref_idx, S32 i4_result_id, S32 i4_num_results)
3742
8.21M
{
3743
8.21M
    S32 i;
3744
3745
21.5M
    for(i = 0; i < i4_num_results; i++)
3746
18.1M
    {
3747
18.1M
        if(i1_ref_idx == pi1_ref_idx[i])
3748
4.81M
        {
3749
4.81M
            if(0 == i4_result_id)
3750
4.81M
            {
3751
4.81M
                return i;
3752
4.81M
            }
3753
0
            else
3754
0
            {
3755
0
                i4_result_id--;
3756
0
            }
3757
4.81M
        }
3758
18.1M
    }
3759
3760
3.39M
    return -1;
3761
8.21M
}
3762
3763
static __inline void hme_search_node_populator(
3764
    search_node_t *ps_search_node, hme_mv_t *ps_mv, S08 i1_ref_idx, S08 i1_mv_magnitude_shift)
3765
39.7M
{
3766
39.7M
    ps_search_node->ps_mv->i2_mvx = SHL_NEG((WORD16)ps_mv->i2_mv_x, i1_mv_magnitude_shift);
3767
39.7M
    ps_search_node->ps_mv->i2_mvy = SHL_NEG((WORD16)ps_mv->i2_mv_y, i1_mv_magnitude_shift);
3768
39.7M
    ps_search_node->i1_ref_idx = i1_ref_idx;
3769
39.7M
    ps_search_node->u1_is_avail = 1;
3770
39.7M
    ps_search_node->u1_subpel_done = 0;
3771
39.7M
}
3772
3773
S32 hme_populate_search_candidates(fpel_srch_cand_init_data_t *ps_ctxt)
3774
1.89M
{
3775
1.89M
    hme_mv_t *ps_mv;
3776
3777
1.89M
    S32 wd_c, ht_c, wd_p, ht_p;
3778
1.89M
    S32 blksize_p, blksize_c;
3779
1.89M
    S32 i;
3780
1.89M
    S08 *pi1_ref_idx;
3781
    /* Cache for storing offsets */
3782
1.89M
    S32 ai4_cand_offsets[NUM_SEARCH_CAND_LOCATIONS];
3783
3784
1.89M
    layer_ctxt_t *ps_curr_layer = ps_ctxt->ps_curr_layer;
3785
1.89M
    layer_ctxt_t *ps_coarse_layer = ps_ctxt->ps_coarse_layer;
3786
1.89M
    layer_mv_t *ps_coarse_layer_mvbank = ps_coarse_layer->ps_layer_mvbank;
3787
1.89M
    layer_mv_t *ps_curr_layer_mvbank = ps_curr_layer->ps_layer_mvbank;
3788
1.89M
    search_candt_t *ps_search_cands = ps_ctxt->ps_search_cands;
3789
1.89M
    hme_mv_t s_zero_mv = { 0 };
3790
3791
1.89M
    S32 i4_pos_x = ps_ctxt->i4_pos_x;
3792
1.89M
    S32 i4_pos_y = ps_ctxt->i4_pos_y;
3793
1.89M
    S32 i4_num_act_ref_l0 = ps_ctxt->i4_num_act_ref_l0;
3794
1.89M
    S32 i4_num_act_ref_l1 = ps_ctxt->i4_num_act_ref_l1;
3795
1.89M
    U08 u1_pred_dir = ps_ctxt->u1_pred_dir;
3796
1.89M
    U08 u1_pred_dir_ctr = ps_ctxt->u1_pred_dir_ctr;
3797
1.89M
    U08 u1_num_results_in_curr_mvbank = ps_ctxt->u1_num_results_in_mvbank;
3798
1.89M
    U08 u1_num_results_in_coarse_mvbank =
3799
1.89M
        (u1_pred_dir == 0) ? (i4_num_act_ref_l0 * ps_coarse_layer_mvbank->i4_num_mvs_per_ref)
3800
1.89M
                           : (i4_num_act_ref_l1 * ps_coarse_layer_mvbank->i4_num_mvs_per_ref);
3801
1.89M
    S32 i4_init_offset_projected =
3802
1.89M
        (u1_pred_dir == 1) ? (i4_num_act_ref_l0 * ps_coarse_layer_mvbank->i4_num_mvs_per_ref) : 0;
3803
1.89M
    S32 i4_init_offset_spatial =
3804
1.89M
        (u1_pred_dir_ctr == 1)
3805
1.89M
            ? (ps_curr_layer_mvbank->i4_num_mvs_per_ref * u1_num_results_in_curr_mvbank)
3806
1.89M
            : 0;
3807
1.89M
    U08 u1_search_candidate_list_index = ps_ctxt->u1_search_candidate_list_index;
3808
1.89M
    U08 u1_max_num_search_cands =
3809
1.89M
        gau1_max_num_search_cands_in_l0_me[u1_search_candidate_list_index];
3810
1.89M
    S32 i4_num_srch_cands = MIN(u1_max_num_search_cands, ps_ctxt->i4_max_num_init_cands << 1);
3811
1.89M
    U16 u2_is_offset_available = 0;
3812
1.89M
    U08 u1_search_blk_to_spatial_mvbank_blk_size_factor = 1;
3813
3814
    /* Width and ht of current and prev layers */
3815
1.89M
    wd_c = ps_curr_layer->i4_wd;
3816
1.89M
    ht_c = ps_curr_layer->i4_ht;
3817
1.89M
    wd_p = ps_coarse_layer->i4_wd;
3818
1.89M
    ht_p = ps_coarse_layer->i4_ht;
3819
3820
1.89M
    blksize_p = gau1_blk_size_to_wd_shift[ps_coarse_layer_mvbank->e_blk_size];
3821
1.89M
    blksize_c = gau1_blk_size_to_wd_shift[ps_curr_layer_mvbank->e_blk_size];
3822
3823
    /* ASSERT for valid sizes */
3824
1.89M
    ASSERT((blksize_p == 3) || (blksize_p == 4) || (blksize_p == 5));
3825
3826
1.89M
    {
3827
1.89M
        S32 x = i4_pos_x >> 4;
3828
1.89M
        S32 y = i4_pos_y >> 4;
3829
3830
1.89M
        if(blksize_c != gau1_blk_size_to_wd_shift[ps_ctxt->e_search_blk_size])
3831
1.89M
        {
3832
1.89M
            x *= 2;
3833
1.89M
            y *= 2;
3834
3835
1.89M
            u1_search_blk_to_spatial_mvbank_blk_size_factor = 2;
3836
1.89M
        }
3837
3838
1.89M
        i4_init_offset_spatial += (x + y * ps_curr_layer_mvbank->i4_num_blks_per_row) *
3839
1.89M
                                  ps_curr_layer_mvbank->i4_num_mvs_per_blk;
3840
1.89M
    }
3841
3842
41.6M
    for(i = 0; i < i4_num_srch_cands; i++)
3843
39.7M
    {
3844
39.7M
        SEARCH_CANDIDATE_TYPE_T e_search_cand_type =
3845
39.7M
            gae_search_cand_priority_to_search_cand_type_map_in_l0_me[u1_search_candidate_list_index]
3846
39.7M
                                                                     [i];
3847
39.7M
        SEARCH_CAND_LOCATIONS_T e_search_cand_loc =
3848
39.7M
            gae_search_cand_type_to_location_map[e_search_cand_type];
3849
39.7M
        S08 i1_result_id = MIN(
3850
39.7M
            gai1_search_cand_type_to_result_id_map[e_search_cand_type],
3851
39.7M
            (e_search_cand_loc < 0 ? 0
3852
39.7M
                                   : ps_ctxt->pu1_num_fpel_search_cands[e_search_cand_loc] - 1));
3853
39.7M
        U08 u1_is_spatial_cand = (1 == gau1_search_cand_type_to_spatiality_map[e_search_cand_type]);
3854
39.7M
        U08 u1_is_proj_cand = (0 == gau1_search_cand_type_to_spatiality_map[e_search_cand_type]);
3855
39.7M
        U08 u1_is_zeroMV_cand = (ZERO_MV == e_search_cand_type) ||
3856
39.7M
                                (ZERO_MV_ALTREF == e_search_cand_type);
3857
3858
        /* When spatial candidates are available, use them, else use the projected candidates */
3859
        /* This is required since some blocks will never have certain spatial candidates, and in order */
3860
        /* to accomodate such instances in 'gae_search_cand_priority_to_search_cand_type_map_in_l0_me' list,  */
3861
        /* all candidates apart from the 'LEFT' have been marked as projected */
3862
39.7M
        if(((e_search_cand_loc == TOPLEFT) || (e_search_cand_loc == TOP) ||
3863
39.7M
            (e_search_cand_loc == TOPRIGHT)) &&
3864
39.7M
           (i1_result_id < u1_num_results_in_curr_mvbank) && u1_is_proj_cand)
3865
7.58M
        {
3866
7.58M
            if(e_search_cand_loc == TOPLEFT)
3867
2.52M
            {
3868
2.52M
                u1_is_spatial_cand = ps_ctxt->u1_is_topLeft_available ||
3869
2.52M
                                     !ps_ctxt->u1_is_left_available;
3870
2.52M
            }
3871
5.05M
            else if(e_search_cand_loc == TOPRIGHT)
3872
2.52M
            {
3873
2.52M
                u1_is_spatial_cand = ps_ctxt->u1_is_topRight_available;
3874
2.52M
            }
3875
2.52M
            else
3876
2.52M
            {
3877
2.52M
                u1_is_spatial_cand = ps_ctxt->u1_is_top_available;
3878
2.52M
            }
3879
3880
7.58M
            u1_is_proj_cand = !u1_is_spatial_cand;
3881
7.58M
        }
3882
3883
39.7M
        switch(u1_is_zeroMV_cand + (u1_is_spatial_cand << 1) + (u1_is_proj_cand << 2))
3884
39.7M
        {
3885
1.98M
        case 1:
3886
1.98M
        {
3887
1.98M
            hme_search_node_populator(
3888
1.98M
                ps_search_cands[i].ps_search_node,
3889
1.98M
                &s_zero_mv,
3890
1.98M
                (ZERO_MV == e_search_cand_type) ? ps_ctxt->i1_default_ref_id
3891
1.98M
                                                : ps_ctxt->i1_alt_default_ref_id,
3892
1.98M
                0);
3893
3894
1.98M
            break;
3895
0
        }
3896
7.62M
        case 2:
3897
7.62M
        {
3898
7.62M
            S08 i1_mv_magnitude_shift = 0;
3899
3900
7.62M
            S32 i4_offset = i4_init_offset_spatial;
3901
3902
7.62M
            i1_result_id = MIN(i1_result_id, u1_num_results_in_curr_mvbank - 1);
3903
7.62M
            i4_offset += i1_result_id;
3904
3905
7.62M
            switch(e_search_cand_loc)
3906
7.62M
            {
3907
2.62M
            case LEFT:
3908
2.62M
            {
3909
2.62M
                if(ps_ctxt->u1_is_left_available)
3910
2.01M
                {
3911
2.01M
                    i1_mv_magnitude_shift = -2;
3912
3913
2.01M
                    i4_offset -= ps_curr_layer_mvbank->i4_num_mvs_per_blk;
3914
3915
2.01M
                    ps_mv = ps_curr_layer_mvbank->ps_mv + i4_offset;
3916
2.01M
                    pi1_ref_idx = ps_curr_layer_mvbank->pi1_ref_idx + i4_offset;
3917
2.01M
                }
3918
615k
                else
3919
615k
                {
3920
615k
                    i1_mv_magnitude_shift = 0;
3921
3922
615k
                    ps_mv = &s_zero_mv;
3923
615k
                    pi1_ref_idx = &ps_ctxt->i1_default_ref_id;
3924
615k
                }
3925
3926
2.62M
                break;
3927
0
            }
3928
2.03M
            case TOPLEFT:
3929
2.03M
            {
3930
2.03M
                if(ps_ctxt->u1_is_topLeft_available)
3931
1.44M
                {
3932
1.44M
                    i1_mv_magnitude_shift = -2;
3933
3934
1.44M
                    i4_offset -= ps_curr_layer_mvbank->i4_num_mvs_per_blk;
3935
1.44M
                    i4_offset -= ps_curr_layer_mvbank->i4_num_mvs_per_row;
3936
3937
1.44M
                    ps_mv = ps_curr_layer_mvbank->ps_mv + i4_offset;
3938
1.44M
                    pi1_ref_idx = ps_curr_layer_mvbank->pi1_ref_idx + i4_offset;
3939
1.44M
                }
3940
592k
                else
3941
592k
                {
3942
592k
                    i1_mv_magnitude_shift = 0;
3943
3944
592k
                    ps_mv = &s_zero_mv;
3945
592k
                    pi1_ref_idx = &ps_ctxt->i1_default_ref_id;
3946
592k
                }
3947
3948
2.03M
                break;
3949
0
            }
3950
1.88M
            case TOP:
3951
1.88M
            {
3952
1.88M
                if(ps_ctxt->u1_is_top_available)
3953
1.88M
                {
3954
1.88M
                    i1_mv_magnitude_shift = -2;
3955
3956
1.88M
                    i4_offset -= ps_curr_layer_mvbank->i4_num_mvs_per_row;
3957
3958
1.88M
                    ps_mv = ps_curr_layer_mvbank->ps_mv + i4_offset;
3959
1.88M
                    pi1_ref_idx = ps_curr_layer_mvbank->pi1_ref_idx + i4_offset;
3960
1.88M
                }
3961
0
                else
3962
0
                {
3963
0
                    i1_mv_magnitude_shift = 0;
3964
3965
0
                    ps_mv = &s_zero_mv;
3966
0
                    pi1_ref_idx = &ps_ctxt->i1_default_ref_id;
3967
0
                }
3968
3969
1.88M
                break;
3970
0
            }
3971
1.08M
            case TOPRIGHT:
3972
1.08M
            {
3973
1.08M
                if(ps_ctxt->u1_is_topRight_available)
3974
1.08M
                {
3975
1.08M
                    i1_mv_magnitude_shift = -2;
3976
3977
1.08M
                    i4_offset += ps_curr_layer_mvbank->i4_num_mvs_per_blk *
3978
1.08M
                                 u1_search_blk_to_spatial_mvbank_blk_size_factor;
3979
1.08M
                    i4_offset -= ps_curr_layer_mvbank->i4_num_mvs_per_row;
3980
3981
1.08M
                    ps_mv = ps_curr_layer_mvbank->ps_mv + i4_offset;
3982
1.08M
                    pi1_ref_idx = ps_curr_layer_mvbank->pi1_ref_idx + i4_offset;
3983
1.08M
                }
3984
0
                else
3985
0
                {
3986
0
                    i1_mv_magnitude_shift = 0;
3987
0
                    ps_mv = &s_zero_mv;
3988
0
                    pi1_ref_idx = &ps_ctxt->i1_default_ref_id;
3989
0
                }
3990
3991
1.08M
                break;
3992
0
            }
3993
0
            default:
3994
0
            {
3995
                /* AiyAiyYo!! */
3996
0
                ASSERT(0);
3997
0
            }
3998
7.62M
            }
3999
4000
7.62M
            hme_search_node_populator(
4001
7.62M
                ps_search_cands[i].ps_search_node, ps_mv, pi1_ref_idx[0], i1_mv_magnitude_shift);
4002
4003
7.62M
            break;
4004
7.62M
        }
4005
30.1M
        case 4:
4006
30.1M
        {
4007
30.1M
            ASSERT(ILLUSORY_CANDIDATE != e_search_cand_type);
4008
30.1M
            ASSERT(ILLUSORY_LOCATION != e_search_cand_loc);
4009
4010
30.1M
            i1_result_id = MIN(i1_result_id, u1_num_results_in_coarse_mvbank - 1);
4011
4012
30.1M
            if(!(u2_is_offset_available & (1 << e_search_cand_loc)))
4013
15.4M
            {
4014
15.4M
                S32 x, y;
4015
4016
15.4M
                x = i4_pos_x + gai4_search_cand_location_to_x_offset_map[e_search_cand_loc];
4017
15.4M
                y = i4_pos_y + gai4_search_cand_location_to_y_offset_map[e_search_cand_loc];
4018
4019
                /* Safety check to avoid uninitialized access across temporal layers */
4020
15.4M
                x = CLIP3(x, 0, (wd_c - blksize_p));
4021
15.4M
                y = CLIP3(y, 0, (ht_c - blksize_p));
4022
4023
                /* Project the positions to prev layer */
4024
15.4M
                x = x >> blksize_p;
4025
15.4M
                y = y >> blksize_p;
4026
4027
15.4M
                ai4_cand_offsets[e_search_cand_loc] =
4028
15.4M
                    (x * ps_coarse_layer_mvbank->i4_num_mvs_per_blk);
4029
15.4M
                ai4_cand_offsets[e_search_cand_loc] +=
4030
15.4M
                    (y * ps_coarse_layer_mvbank->i4_num_mvs_per_row);
4031
15.4M
                ai4_cand_offsets[e_search_cand_loc] += i4_init_offset_projected;
4032
4033
15.4M
                u2_is_offset_available |= (1 << e_search_cand_loc);
4034
15.4M
            }
4035
4036
30.1M
            ps_mv =
4037
30.1M
                ps_coarse_layer_mvbank->ps_mv + ai4_cand_offsets[e_search_cand_loc] + i1_result_id;
4038
30.1M
            pi1_ref_idx = ps_coarse_layer_mvbank->pi1_ref_idx +
4039
30.1M
                          ai4_cand_offsets[e_search_cand_loc] + i1_result_id;
4040
4041
30.1M
            hme_search_node_populator(ps_search_cands[i].ps_search_node, ps_mv, pi1_ref_idx[0], 1);
4042
4043
30.1M
            break;
4044
30.1M
        }
4045
0
        default:
4046
0
        {
4047
            /* NoNoNoNoNooooooooNO! */
4048
0
            ASSERT(0);
4049
0
        }
4050
39.7M
        }
4051
4052
39.7M
        ASSERT(ps_search_cands[i].ps_search_node->i1_ref_idx >= 0);
4053
39.7M
        ASSERT(
4054
39.7M
            !u1_pred_dir
4055
39.7M
                ? (ps_ctxt->pi4_ref_id_lc_to_l0_map[ps_search_cands[i].ps_search_node->i1_ref_idx] <
4056
39.7M
                   i4_num_act_ref_l0)
4057
39.7M
                : (ps_ctxt->pi4_ref_id_lc_to_l1_map[ps_search_cands[i].ps_search_node->i1_ref_idx] <
4058
39.7M
                   ps_ctxt->i4_num_act_ref_l1));
4059
39.7M
    }
4060
4061
1.89M
    return i4_num_srch_cands;
4062
1.89M
}
4063
4064
void hme_mv_clipper(
4065
    hme_search_prms_t *ps_search_prms_blk,
4066
    S32 i4_num_srch_cands,
4067
    S08 i1_check_for_mult_refs,
4068
    U08 u1_fpel_refine_extent,
4069
    U08 u1_hpel_refine_extent,
4070
    U08 u1_qpel_refine_extent)
4071
1.89M
{
4072
1.89M
    S32 candt;
4073
1.89M
    range_prms_t *ps_range_prms;
4074
4075
41.6M
    for(candt = 0; candt < i4_num_srch_cands; candt++)
4076
39.7M
    {
4077
39.7M
        search_node_t *ps_search_node;
4078
4079
39.7M
        ps_search_node = ps_search_prms_blk->ps_search_candts[candt].ps_search_node;
4080
39.7M
        ps_range_prms = ps_search_prms_blk->aps_mv_range[ps_search_node->i1_ref_idx];
4081
4082
        /* Clip the motion vectors as well here since after clipping
4083
        two candidates can become same and they will be removed during deduplication */
4084
39.7M
        CLIP_MV_WITHIN_RANGE(
4085
39.7M
            ps_search_node->ps_mv->i2_mvx,
4086
39.7M
            ps_search_node->ps_mv->i2_mvy,
4087
39.7M
            ps_range_prms,
4088
39.7M
            u1_fpel_refine_extent,
4089
39.7M
            u1_hpel_refine_extent,
4090
39.7M
            u1_qpel_refine_extent);
4091
39.7M
    }
4092
1.89M
}
4093
4094
void hme_init_pred_buf_info(
4095
    hme_pred_buf_info_t (*ps_info)[MAX_NUM_INTER_PARTS],
4096
    hme_pred_buf_mngr_t *ps_buf_mngr,
4097
    U08 u1_pu1_wd,
4098
    U08 u1_pu1_ht,
4099
    PART_TYPE_T e_part_type)
4100
19.5M
{
4101
19.5M
    U08 u1_pred_buf_array_id;
4102
4103
19.5M
    if(1 != ihevce_get_free_pred_buf_indices(
4104
19.5M
                &u1_pred_buf_array_id, &ps_buf_mngr->u4_pred_buf_usage_indicator, 1))
4105
0
    {
4106
0
        ASSERT(0);
4107
0
    }
4108
19.5M
    else
4109
19.5M
    {
4110
19.5M
        ps_info[0][0].i4_pred_stride = MAX_CU_SIZE;
4111
19.5M
        ps_info[0][0].pu1_pred = ps_buf_mngr->apu1_pred_bufs[u1_pred_buf_array_id];
4112
19.5M
        ps_info[0][0].u1_pred_buf_array_id = u1_pred_buf_array_id;
4113
4114
19.5M
        if(PRT_2Nx2N != e_part_type)
4115
4.92M
        {
4116
4.92M
            ps_info[0][1].i4_pred_stride = MAX_CU_SIZE;
4117
4.92M
            ps_info[0][1].pu1_pred = ps_buf_mngr->apu1_pred_bufs[u1_pred_buf_array_id] +
4118
4.92M
                                     (gai1_is_part_vertical[ge_part_type_to_part_id[e_part_type][0]]
4119
4.92M
                                          ? u1_pu1_ht * ps_info[0][1].i4_pred_stride
4120
4.92M
                                          : u1_pu1_wd);
4121
4.92M
            ps_info[0][1].u1_pred_buf_array_id = u1_pred_buf_array_id;
4122
4.92M
        }
4123
19.5M
    }
4124
19.5M
}
4125
4126
void hme_debrief_bipred_eval(
4127
    part_type_results_t *ps_part_type_result,
4128
    hme_pred_buf_info_t (*ps_pred_buf_info)[MAX_NUM_INTER_PARTS],
4129
    hme_pred_buf_mngr_t *ps_pred_buf_mngr,
4130
    U08 *pu1_allocated_pred_buf_array_indixes,
4131
    ihevce_cmn_opt_func_t *ps_cmn_utils_optimised_function_list
4132
4133
)
4134
6.52M
{
4135
6.52M
    PART_TYPE_T e_part_type = (PART_TYPE_T)ps_part_type_result->u1_part_type;
4136
4137
6.52M
    U32 *pu4_pred_buf_usage_indicator = &ps_pred_buf_mngr->u4_pred_buf_usage_indicator;
4138
6.52M
    U08 u1_is_part_vertical = gai1_is_part_vertical[ge_part_type_to_part_id[e_part_type][0]];
4139
4140
6.52M
    if(0 == ps_part_type_result->u1_part_type)
4141
4.88M
    {
4142
4.88M
        if(ps_part_type_result->as_pu_results->pu.b2_pred_mode == PRED_BI)
4143
43.5k
        {
4144
43.5k
            ASSERT(UCHAR_MAX != ps_pred_buf_info[2][0].u1_pred_buf_array_id);
4145
4146
43.5k
            ps_part_type_result->pu1_pred = ps_pred_buf_info[2][0].pu1_pred;
4147
43.5k
            ps_part_type_result->i4_pred_stride = ps_pred_buf_info[2][0].i4_pred_stride;
4148
4149
43.5k
            ihevce_set_pred_buf_as_free(
4150
43.5k
                pu4_pred_buf_usage_indicator, pu1_allocated_pred_buf_array_indixes[0]);
4151
4152
43.5k
            ihevce_set_pred_buf_as_free(
4153
43.5k
                pu4_pred_buf_usage_indicator, pu1_allocated_pred_buf_array_indixes[1]);
4154
43.5k
        }
4155
4.84M
        else
4156
4.84M
        {
4157
4.84M
            ps_part_type_result->pu1_pred = ps_pred_buf_info[0][0].pu1_pred;
4158
4.84M
            ps_part_type_result->i4_pred_stride = ps_pred_buf_info[0][0].i4_pred_stride;
4159
4160
4.84M
            ihevce_set_pred_buf_as_free(
4161
4.84M
                pu4_pred_buf_usage_indicator, pu1_allocated_pred_buf_array_indixes[2]);
4162
4163
4.84M
            ihevce_set_pred_buf_as_free(
4164
4.84M
                pu4_pred_buf_usage_indicator, pu1_allocated_pred_buf_array_indixes[1]);
4165
4166
4.84M
            if(UCHAR_MAX == ps_pred_buf_info[0][0].u1_pred_buf_array_id)
4167
4.66M
            {
4168
4.66M
                ihevce_set_pred_buf_as_free(
4169
4.66M
                    pu4_pred_buf_usage_indicator, pu1_allocated_pred_buf_array_indixes[0]);
4170
4.66M
            }
4171
4.84M
        }
4172
4.88M
    }
4173
1.64M
    else
4174
1.64M
    {
4175
1.64M
        U08 *pu1_src_pred;
4176
1.64M
        U08 *pu1_dst_pred;
4177
1.64M
        S32 i4_src_pred_stride;
4178
1.64M
        S32 i4_dst_pred_stride;
4179
4180
1.64M
        U08 u1_pu1_wd = (ps_part_type_result->as_pu_results[0].pu.b4_wd + 1) << 2;
4181
1.64M
        U08 u1_pu1_ht = (ps_part_type_result->as_pu_results[0].pu.b4_ht + 1) << 2;
4182
1.64M
        U08 u1_pu2_wd = (ps_part_type_result->as_pu_results[1].pu.b4_wd + 1) << 2;
4183
1.64M
        U08 u1_pu2_ht = (ps_part_type_result->as_pu_results[1].pu.b4_ht + 1) << 2;
4184
4185
1.64M
        U08 u1_condition_for_switch =
4186
1.64M
            (ps_part_type_result->as_pu_results[0].pu.b2_pred_mode == PRED_BI) |
4187
1.64M
            ((ps_part_type_result->as_pu_results[1].pu.b2_pred_mode == PRED_BI) << 1);
4188
4189
1.64M
        switch(u1_condition_for_switch)
4190
1.64M
        {
4191
1.62M
        case 0:
4192
1.62M
        {
4193
1.62M
            ps_part_type_result->pu1_pred =
4194
1.62M
                ps_pred_buf_mngr->apu1_pred_bufs[pu1_allocated_pred_buf_array_indixes[0]];
4195
1.62M
            ps_part_type_result->i4_pred_stride = MAX_CU_SIZE;
4196
4197
1.62M
            ihevce_set_pred_buf_as_free(
4198
1.62M
                pu4_pred_buf_usage_indicator, pu1_allocated_pred_buf_array_indixes[2]);
4199
4200
1.62M
            ihevce_set_pred_buf_as_free(
4201
1.62M
                pu4_pred_buf_usage_indicator, pu1_allocated_pred_buf_array_indixes[1]);
4202
4203
1.62M
            if(UCHAR_MAX == ps_pred_buf_info[0][0].u1_pred_buf_array_id)
4204
1.52M
            {
4205
1.52M
                pu1_src_pred = ps_pred_buf_info[0][0].pu1_pred;
4206
1.52M
                pu1_dst_pred = ps_part_type_result->pu1_pred;
4207
1.52M
                i4_src_pred_stride = ps_pred_buf_info[0][0].i4_pred_stride;
4208
1.52M
                i4_dst_pred_stride = ps_part_type_result->i4_pred_stride;
4209
4210
1.52M
                ps_cmn_utils_optimised_function_list->pf_copy_2d(
4211
1.52M
                    pu1_dst_pred,
4212
1.52M
                    i4_dst_pred_stride,
4213
1.52M
                    pu1_src_pred,
4214
1.52M
                    i4_src_pred_stride,
4215
1.52M
                    u1_pu1_wd,
4216
1.52M
                    u1_pu1_ht);
4217
1.52M
            }
4218
4219
1.62M
            if(UCHAR_MAX == ps_pred_buf_info[0][1].u1_pred_buf_array_id)
4220
1.55M
            {
4221
1.55M
                pu1_src_pred = ps_pred_buf_info[0][1].pu1_pred;
4222
1.55M
                pu1_dst_pred = ps_part_type_result->pu1_pred +
4223
1.55M
                               (u1_is_part_vertical
4224
1.55M
                                    ? u1_pu1_ht * ps_part_type_result->i4_pred_stride
4225
1.55M
                                    : u1_pu1_wd);
4226
1.55M
                i4_src_pred_stride = ps_pred_buf_info[0][1].i4_pred_stride;
4227
1.55M
                i4_dst_pred_stride = ps_part_type_result->i4_pred_stride;
4228
4229
1.55M
                ps_cmn_utils_optimised_function_list->pf_copy_2d(
4230
1.55M
                    pu1_dst_pred,
4231
1.55M
                    i4_dst_pred_stride,
4232
1.55M
                    pu1_src_pred,
4233
1.55M
                    i4_src_pred_stride,
4234
1.55M
                    u1_pu2_wd,
4235
1.55M
                    u1_pu2_ht);
4236
1.55M
            }
4237
4238
1.62M
            break;
4239
0
        }
4240
9.54k
        case 1:
4241
9.54k
        {
4242
9.54k
            ASSERT(UCHAR_MAX != ps_pred_buf_info[2][0].u1_pred_buf_array_id);
4243
4244
9.54k
            ihevce_set_pred_buf_as_free(
4245
9.54k
                pu4_pred_buf_usage_indicator, pu1_allocated_pred_buf_array_indixes[1]);
4246
4247
            /* Copy PU1 pred into PU2's pred buf */
4248
9.54k
            if(((u1_pu1_ht < u1_pu2_ht) || (u1_pu1_wd < u1_pu2_wd)) &&
4249
9.54k
               (UCHAR_MAX != ps_pred_buf_info[0][1].u1_pred_buf_array_id))
4250
1.31k
            {
4251
1.31k
                ps_part_type_result->pu1_pred =
4252
1.31k
                    ps_pred_buf_info[0][1].pu1_pred -
4253
1.31k
                    (u1_is_part_vertical ? u1_pu1_ht * ps_pred_buf_info[0][1].i4_pred_stride
4254
1.31k
                                         : u1_pu1_wd);
4255
1.31k
                ps_part_type_result->i4_pred_stride = ps_pred_buf_info[0][1].i4_pred_stride;
4256
4257
1.31k
                ihevce_set_pred_buf_as_free(
4258
1.31k
                    pu4_pred_buf_usage_indicator, pu1_allocated_pred_buf_array_indixes[2]);
4259
4260
1.31k
                pu1_src_pred = ps_pred_buf_info[2][0].pu1_pred;
4261
1.31k
                pu1_dst_pred = ps_part_type_result->pu1_pred;
4262
1.31k
                i4_src_pred_stride = ps_pred_buf_info[2][0].i4_pred_stride;
4263
1.31k
                i4_dst_pred_stride = ps_part_type_result->i4_pred_stride;
4264
4265
1.31k
                ps_cmn_utils_optimised_function_list->pf_copy_2d(
4266
1.31k
                    pu1_dst_pred,
4267
1.31k
                    i4_dst_pred_stride,
4268
1.31k
                    pu1_src_pred,
4269
1.31k
                    i4_src_pred_stride,
4270
1.31k
                    u1_pu1_wd,
4271
1.31k
                    u1_pu1_ht);
4272
1.31k
            }
4273
8.23k
            else
4274
8.23k
            {
4275
8.23k
                ps_part_type_result->pu1_pred = ps_pred_buf_info[2][0].pu1_pred;
4276
8.23k
                ps_part_type_result->i4_pred_stride = ps_pred_buf_info[2][0].i4_pred_stride;
4277
4278
8.23k
                ihevce_set_pred_buf_as_free(
4279
8.23k
                    pu4_pred_buf_usage_indicator, pu1_allocated_pred_buf_array_indixes[0]);
4280
4281
8.23k
                pu1_src_pred = ps_pred_buf_info[0][1].pu1_pred;
4282
8.23k
                pu1_dst_pred = ps_part_type_result->pu1_pred;
4283
8.23k
                i4_src_pred_stride = ps_pred_buf_info[0][1].i4_pred_stride;
4284
8.23k
                i4_dst_pred_stride = ps_part_type_result->i4_pred_stride;
4285
4286
8.23k
                ps_cmn_utils_optimised_function_list->pf_copy_2d(
4287
8.23k
                    pu1_dst_pred,
4288
8.23k
                    i4_dst_pred_stride,
4289
8.23k
                    pu1_src_pred,
4290
8.23k
                    i4_src_pred_stride,
4291
8.23k
                    u1_pu2_wd,
4292
8.23k
                    u1_pu2_ht);
4293
8.23k
            }
4294
4295
9.54k
            break;
4296
9.54k
        }
4297
3.12k
        case 2:
4298
3.12k
        {
4299
3.12k
            ASSERT(UCHAR_MAX != ps_pred_buf_info[2][1].u1_pred_buf_array_id);
4300
4301
3.12k
            ihevce_set_pred_buf_as_free(
4302
3.12k
                pu4_pred_buf_usage_indicator, pu1_allocated_pred_buf_array_indixes[1]);
4303
4304
            /* Copy PU2 pred into PU1's pred buf */
4305
3.12k
            if(((u1_pu1_ht > u1_pu2_ht) || (u1_pu1_wd > u1_pu2_wd)) &&
4306
3.12k
               (UCHAR_MAX != ps_pred_buf_info[0][0].u1_pred_buf_array_id))
4307
151
            {
4308
151
                ps_part_type_result->pu1_pred = ps_pred_buf_info[0][0].pu1_pred;
4309
151
                ps_part_type_result->i4_pred_stride = ps_pred_buf_info[0][0].i4_pred_stride;
4310
4311
151
                ihevce_set_pred_buf_as_free(
4312
151
                    pu4_pred_buf_usage_indicator, pu1_allocated_pred_buf_array_indixes[2]);
4313
4314
151
                pu1_src_pred = ps_pred_buf_info[2][1].pu1_pred;
4315
151
                pu1_dst_pred = ps_part_type_result->pu1_pred +
4316
151
                               (u1_is_part_vertical
4317
151
                                    ? u1_pu1_ht * ps_part_type_result->i4_pred_stride
4318
151
                                    : u1_pu1_wd);
4319
151
                i4_src_pred_stride = ps_pred_buf_info[2][1].i4_pred_stride;
4320
151
                i4_dst_pred_stride = ps_part_type_result->i4_pred_stride;
4321
4322
151
                ps_cmn_utils_optimised_function_list->pf_copy_2d(
4323
151
                    pu1_dst_pred,
4324
151
                    i4_dst_pred_stride,
4325
151
                    pu1_src_pred,
4326
151
                    i4_src_pred_stride,
4327
151
                    u1_pu2_wd,
4328
151
                    u1_pu2_ht);
4329
151
            }
4330
2.97k
            else
4331
2.97k
            {
4332
2.97k
                ps_part_type_result->pu1_pred =
4333
2.97k
                    ps_pred_buf_info[2][1].pu1_pred -
4334
2.97k
                    (u1_is_part_vertical ? u1_pu1_ht * ps_pred_buf_info[2][1].i4_pred_stride
4335
2.97k
                                         : u1_pu1_wd);
4336
2.97k
                ps_part_type_result->i4_pred_stride = ps_pred_buf_info[2][1].i4_pred_stride;
4337
4338
2.97k
                ihevce_set_pred_buf_as_free(
4339
2.97k
                    pu4_pred_buf_usage_indicator, pu1_allocated_pred_buf_array_indixes[0]);
4340
4341
2.97k
                pu1_src_pred = ps_pred_buf_info[0][0].pu1_pred;
4342
2.97k
                pu1_dst_pred = ps_part_type_result->pu1_pred;
4343
2.97k
                i4_src_pred_stride = ps_pred_buf_info[0][0].i4_pred_stride;
4344
2.97k
                i4_dst_pred_stride = ps_part_type_result->i4_pred_stride;
4345
4346
2.97k
                ps_cmn_utils_optimised_function_list->pf_copy_2d(
4347
2.97k
                    pu1_dst_pred,
4348
2.97k
                    i4_dst_pred_stride,
4349
2.97k
                    pu1_src_pred,
4350
2.97k
                    i4_src_pred_stride,
4351
2.97k
                    u1_pu1_wd,
4352
2.97k
                    u1_pu1_ht);
4353
2.97k
            }
4354
4355
3.12k
            break;
4356
3.12k
        }
4357
4.97k
        case 3:
4358
4.97k
        {
4359
4.97k
            ASSERT(UCHAR_MAX != ps_pred_buf_info[2][0].u1_pred_buf_array_id);
4360
4.97k
            ASSERT(UCHAR_MAX != ps_pred_buf_info[2][1].u1_pred_buf_array_id);
4361
4.97k
            ASSERT(
4362
4.97k
                ps_pred_buf_info[2][1].u1_pred_buf_array_id ==
4363
4.97k
                ps_pred_buf_info[2][0].u1_pred_buf_array_id);
4364
4365
4.97k
            ps_part_type_result->pu1_pred = ps_pred_buf_info[2][0].pu1_pred;
4366
4.97k
            ps_part_type_result->i4_pred_stride = ps_pred_buf_info[2][0].i4_pred_stride;
4367
4368
4.97k
            ihevce_set_pred_buf_as_free(
4369
4.97k
                pu4_pred_buf_usage_indicator, pu1_allocated_pred_buf_array_indixes[0]);
4370
4371
4.97k
            break;
4372
4.97k
        }
4373
1.64M
        }
4374
1.64M
    }
4375
6.52M
}
4376
4377
U08 hme_decide_search_candidate_priority_in_l1_and_l2_me(
4378
    SEARCH_CANDIDATE_TYPE_T e_cand_type, ME_QUALITY_PRESETS_T e_quality_preset)
4379
2.32M
{
4380
2.32M
    U08 u1_priority_val =
4381
2.32M
        gau1_search_cand_priority_in_l1_and_l2_me[e_quality_preset >= ME_MEDIUM_SPEED][e_cand_type];
4382
4383
2.32M
    if(UCHAR_MAX == u1_priority_val)
4384
0
    {
4385
0
        ASSERT(0);
4386
0
    }
4387
4388
2.32M
    ASSERT(u1_priority_val <= MAX_INIT_CANDTS);
4389
4390
2.32M
    return u1_priority_val;
4391
2.32M
}
4392
4393
U08 hme_decide_search_candidate_priority_in_l0_me(SEARCH_CANDIDATE_TYPE_T e_cand_type, U08 u1_index)
4394
633k
{
4395
633k
    U08 u1_priority_val = gau1_search_cand_priority_in_l0_me[u1_index][e_cand_type];
4396
4397
633k
    if(UCHAR_MAX == u1_priority_val)
4398
0
    {
4399
0
        ASSERT(0);
4400
0
    }
4401
4402
633k
    ASSERT(u1_priority_val <= MAX_INIT_CANDTS);
4403
4404
633k
    return u1_priority_val;
4405
633k
}
4406
4407
void hme_search_cand_data_init(
4408
    S32 *pi4_id_Z,
4409
    S32 *pi4_id_coloc,
4410
    S32 *pi4_num_coloc_cands,
4411
    U08 *pu1_search_candidate_list_index,
4412
    S32 i4_num_act_ref_l0,
4413
    S32 i4_num_act_ref_l1,
4414
    U08 u1_is_bidir_enabled,
4415
    U08 u1_4x4_blk_in_l1me)
4416
85.3k
{
4417
85.3k
    S32 i, j;
4418
85.3k
    S32 i4_num_coloc_cands;
4419
4420
85.3k
    U08 u1_search_candidate_list_index;
4421
4422
85.3k
    if(!u1_is_bidir_enabled && !u1_4x4_blk_in_l1me)
4423
35.3k
    {
4424
35.3k
        S32 i;
4425
4426
35.3k
        u1_search_candidate_list_index = (i4_num_act_ref_l0 - 1) * 2;
4427
35.3k
        i4_num_coloc_cands = i4_num_act_ref_l0 * 2;
4428
4429
35.3k
        switch(i4_num_act_ref_l0)
4430
35.3k
        {
4431
19.3k
        case 1:
4432
19.3k
        {
4433
58.0k
            for(i = 0; i < 2; i++)
4434
38.7k
            {
4435
38.7k
                pi4_id_coloc[i] = hme_decide_search_candidate_priority_in_l0_me(
4436
38.7k
                    (SEARCH_CANDIDATE_TYPE_T)(PROJECTED_COLOC0 + i),
4437
38.7k
                    u1_search_candidate_list_index);
4438
38.7k
            }
4439
4440
19.3k
            break;
4441
0
        }
4442
15.8k
        case 2:
4443
15.8k
        {
4444
79.4k
            for(i = 0; i < 4; i++)
4445
63.5k
            {
4446
63.5k
                pi4_id_coloc[i] = hme_decide_search_candidate_priority_in_l0_me(
4447
63.5k
                    (SEARCH_CANDIDATE_TYPE_T)(PROJECTED_COLOC0 + i),
4448
63.5k
                    u1_search_candidate_list_index);
4449
63.5k
            }
4450
4451
15.8k
            break;
4452
0
        }
4453
35
        case 3:
4454
35
        {
4455
245
            for(i = 0; i < 6; i++)
4456
210
            {
4457
210
                pi4_id_coloc[i] = hme_decide_search_candidate_priority_in_l0_me(
4458
210
                    (SEARCH_CANDIDATE_TYPE_T)(PROJECTED_COLOC0 + i),
4459
210
                    u1_search_candidate_list_index);
4460
210
            }
4461
4462
35
            break;
4463
0
        }
4464
113
        case 4:
4465
113
        {
4466
1.01k
            for(i = 0; i < 8; i++)
4467
904
            {
4468
904
                pi4_id_coloc[i] = hme_decide_search_candidate_priority_in_l0_me(
4469
904
                    (SEARCH_CANDIDATE_TYPE_T)(PROJECTED_COLOC0 + i),
4470
904
                    u1_search_candidate_list_index);
4471
904
            }
4472
4473
113
            break;
4474
0
        }
4475
0
        default:
4476
0
        {
4477
0
            ASSERT(0);
4478
0
        }
4479
35.3k
        }
4480
4481
35.3k
        *pi4_num_coloc_cands = i4_num_coloc_cands;
4482
35.3k
        *pu1_search_candidate_list_index = u1_search_candidate_list_index;
4483
35.3k
    }
4484
49.9k
    else if(!u1_is_bidir_enabled && u1_4x4_blk_in_l1me)
4485
31.4k
    {
4486
31.4k
        S32 i;
4487
4488
31.4k
        i4_num_coloc_cands = i4_num_act_ref_l0 * 2;
4489
31.4k
        u1_search_candidate_list_index = (i4_num_act_ref_l0 - 1) * 2 + 1;
4490
4491
31.4k
        switch(i4_num_act_ref_l0)
4492
31.4k
        {
4493
7.65k
        case 1:
4494
7.65k
        {
4495
22.9k
            for(i = 0; i < 2; i++)
4496
15.3k
            {
4497
15.3k
                pi4_id_coloc[i] = hme_decide_search_candidate_priority_in_l0_me(
4498
15.3k
                    (SEARCH_CANDIDATE_TYPE_T)(PROJECTED_COLOC0 + i),
4499
15.3k
                    u1_search_candidate_list_index);
4500
15.3k
            }
4501
4502
7.65k
            pi4_id_coloc[i] = hme_decide_search_candidate_priority_in_l0_me(
4503
7.65k
                PROJECTED_COLOC_TR0, u1_search_candidate_list_index);
4504
4505
7.65k
            pi4_id_coloc[i + 1] = hme_decide_search_candidate_priority_in_l0_me(
4506
7.65k
                PROJECTED_COLOC_BL0, u1_search_candidate_list_index);
4507
4508
7.65k
            pi4_id_coloc[i + 2] = hme_decide_search_candidate_priority_in_l0_me(
4509
7.65k
                PROJECTED_COLOC_BR0, u1_search_candidate_list_index);
4510
4511
7.65k
            i4_num_coloc_cands += 3;
4512
4513
7.65k
            break;
4514
0
        }
4515
10.7k
        case 2:
4516
10.7k
        {
4517
53.8k
            for(i = 0; i < 4; i++)
4518
43.1k
            {
4519
43.1k
                pi4_id_coloc[i] = hme_decide_search_candidate_priority_in_l0_me(
4520
43.1k
                    (SEARCH_CANDIDATE_TYPE_T)(PROJECTED_COLOC0 + i),
4521
43.1k
                    u1_search_candidate_list_index);
4522
43.1k
            }
4523
4524
10.7k
            pi4_id_coloc[i] = hme_decide_search_candidate_priority_in_l0_me(
4525
10.7k
                PROJECTED_COLOC_TR0, u1_search_candidate_list_index);
4526
4527
10.7k
            pi4_id_coloc[i + 1] = hme_decide_search_candidate_priority_in_l0_me(
4528
10.7k
                PROJECTED_COLOC_BL0, u1_search_candidate_list_index);
4529
4530
10.7k
            pi4_id_coloc[i + 2] = hme_decide_search_candidate_priority_in_l0_me(
4531
10.7k
                PROJECTED_COLOC_BR0, u1_search_candidate_list_index);
4532
4533
10.7k
            pi4_id_coloc[i + 3] = hme_decide_search_candidate_priority_in_l0_me(
4534
10.7k
                PROJECTED_COLOC_TR1, u1_search_candidate_list_index);
4535
4536
10.7k
            pi4_id_coloc[i + 4] = hme_decide_search_candidate_priority_in_l0_me(
4537
10.7k
                PROJECTED_COLOC_BL1, u1_search_candidate_list_index);
4538
4539
10.7k
            pi4_id_coloc[i + 5] = hme_decide_search_candidate_priority_in_l0_me(
4540
10.7k
                PROJECTED_COLOC_BR1, u1_search_candidate_list_index);
4541
4542
10.7k
            i4_num_coloc_cands += 6;
4543
4544
10.7k
            break;
4545
0
        }
4546
3.05k
        case 3:
4547
3.05k
        {
4548
21.3k
            for(i = 0; i < 6; i++)
4549
18.3k
            {
4550
18.3k
                pi4_id_coloc[i] = hme_decide_search_candidate_priority_in_l0_me(
4551
18.3k
                    (SEARCH_CANDIDATE_TYPE_T)(PROJECTED_COLOC0 + i),
4552
18.3k
                    u1_search_candidate_list_index);
4553
18.3k
            }
4554
4555
3.05k
            pi4_id_coloc[i] = hme_decide_search_candidate_priority_in_l0_me(
4556
3.05k
                PROJECTED_COLOC_TR0, u1_search_candidate_list_index);
4557
4558
3.05k
            pi4_id_coloc[i + 1] = hme_decide_search_candidate_priority_in_l0_me(
4559
3.05k
                PROJECTED_COLOC_BL0, u1_search_candidate_list_index);
4560
4561
3.05k
            pi4_id_coloc[i + 2] = hme_decide_search_candidate_priority_in_l0_me(
4562
3.05k
                PROJECTED_COLOC_BR0, u1_search_candidate_list_index);
4563
4564
3.05k
            pi4_id_coloc[i + 3] = hme_decide_search_candidate_priority_in_l0_me(
4565
3.05k
                PROJECTED_COLOC_TR1, u1_search_candidate_list_index);
4566
4567
3.05k
            pi4_id_coloc[i + 4] = hme_decide_search_candidate_priority_in_l0_me(
4568
3.05k
                PROJECTED_COLOC_BL1, u1_search_candidate_list_index);
4569
4570
3.05k
            pi4_id_coloc[i + 5] = hme_decide_search_candidate_priority_in_l0_me(
4571
3.05k
                PROJECTED_COLOC_BR1, u1_search_candidate_list_index);
4572
4573
3.05k
            i4_num_coloc_cands += 6;
4574
4575
3.05k
            break;
4576
0
        }
4577
9.93k
        case 4:
4578
9.93k
        {
4579
89.4k
            for(i = 0; i < 8; i++)
4580
79.4k
            {
4581
79.4k
                pi4_id_coloc[i] = hme_decide_search_candidate_priority_in_l0_me(
4582
79.4k
                    (SEARCH_CANDIDATE_TYPE_T)(PROJECTED_COLOC0 + i),
4583
79.4k
                    u1_search_candidate_list_index);
4584
79.4k
            }
4585
4586
9.93k
            pi4_id_coloc[i] = hme_decide_search_candidate_priority_in_l0_me(
4587
9.93k
                PROJECTED_COLOC_TR0, u1_search_candidate_list_index);
4588
4589
9.93k
            pi4_id_coloc[i + 1] = hme_decide_search_candidate_priority_in_l0_me(
4590
9.93k
                PROJECTED_COLOC_BL0, u1_search_candidate_list_index);
4591
4592
9.93k
            pi4_id_coloc[i + 2] = hme_decide_search_candidate_priority_in_l0_me(
4593
9.93k
                PROJECTED_COLOC_BR0, u1_search_candidate_list_index);
4594
4595
9.93k
            pi4_id_coloc[i + 3] = hme_decide_search_candidate_priority_in_l0_me(
4596
9.93k
                PROJECTED_COLOC_TR1, u1_search_candidate_list_index);
4597
4598
9.93k
            pi4_id_coloc[i + 4] = hme_decide_search_candidate_priority_in_l0_me(
4599
9.93k
                PROJECTED_COLOC_BL1, u1_search_candidate_list_index);
4600
4601
9.93k
            pi4_id_coloc[i + 5] = hme_decide_search_candidate_priority_in_l0_me(
4602
9.93k
                PROJECTED_COLOC_BR1, u1_search_candidate_list_index);
4603
4604
9.93k
            i4_num_coloc_cands += 6;
4605
4606
9.93k
            break;
4607
0
        }
4608
0
        default:
4609
0
        {
4610
0
            ASSERT(0);
4611
0
        }
4612
31.4k
        }
4613
4614
31.4k
        *pi4_num_coloc_cands = i4_num_coloc_cands;
4615
31.4k
        *pu1_search_candidate_list_index = u1_search_candidate_list_index;
4616
31.4k
    }
4617
18.5k
    else
4618
18.5k
    {
4619
        /* The variable 'u1_search_candidate_list_index' is hardcoded */
4620
        /* to 10 and 11 respectively. But, these values are not returned */
4621
        /* by this function since the actual values are dependent on */
4622
        /* the number of refs in L0 and L1 respectively */
4623
        /* Hence, the actual return values are being recomputed */
4624
        /* in the latter part of this block */
4625
4626
18.5k
        if(!u1_4x4_blk_in_l1me)
4627
8.17k
        {
4628
8.17k
            u1_search_candidate_list_index = 10;
4629
4630
8.17k
            i4_num_coloc_cands = 2 + (2 * ((i4_num_act_ref_l0 > 1) || (i4_num_act_ref_l1 > 1)));
4631
4632
24.5k
            for(i = 0; i < i4_num_coloc_cands; i++)
4633
16.3k
            {
4634
16.3k
                pi4_id_coloc[i] = hme_decide_search_candidate_priority_in_l0_me(
4635
16.3k
                    (SEARCH_CANDIDATE_TYPE_T)(PROJECTED_COLOC0 + i),
4636
16.3k
                    u1_search_candidate_list_index);
4637
16.3k
            }
4638
8.17k
        }
4639
10.3k
        else
4640
10.3k
        {
4641
10.3k
            u1_search_candidate_list_index = 11;
4642
4643
10.3k
            i4_num_coloc_cands = 2 + (2 * ((i4_num_act_ref_l0 > 1) || (i4_num_act_ref_l1 > 1)));
4644
4645
45.7k
            for(i = 0; i < i4_num_coloc_cands; i++)
4646
35.3k
            {
4647
35.3k
                pi4_id_coloc[i] = hme_decide_search_candidate_priority_in_l0_me(
4648
35.3k
                    (SEARCH_CANDIDATE_TYPE_T)(PROJECTED_COLOC0 + i),
4649
35.3k
                    u1_search_candidate_list_index);
4650
35.3k
            }
4651
4652
10.3k
            pi4_id_coloc[i] = hme_decide_search_candidate_priority_in_l0_me(
4653
10.3k
                PROJECTED_COLOC_TR0, u1_search_candidate_list_index);
4654
4655
10.3k
            pi4_id_coloc[i + 1] = hme_decide_search_candidate_priority_in_l0_me(
4656
10.3k
                PROJECTED_COLOC_BL0, u1_search_candidate_list_index);
4657
4658
10.3k
            pi4_id_coloc[i + 2] = hme_decide_search_candidate_priority_in_l0_me(
4659
10.3k
                PROJECTED_COLOC_BR0, u1_search_candidate_list_index);
4660
10.3k
        }
4661
4662
55.6k
        for(j = 0; j < 2; j++)
4663
37.1k
        {
4664
37.1k
            if(0 == j)
4665
18.5k
            {
4666
18.5k
                pu1_search_candidate_list_index[j] =
4667
18.5k
                    8 + ((i4_num_act_ref_l0 > 1) * 2) + u1_4x4_blk_in_l1me;
4668
18.5k
                pi4_num_coloc_cands[j] =
4669
18.5k
                    (u1_4x4_blk_in_l1me * 3) + 2 + ((i4_num_act_ref_l0 > 1) * 2);
4670
18.5k
            }
4671
18.5k
            else
4672
18.5k
            {
4673
18.5k
                pu1_search_candidate_list_index[j] =
4674
18.5k
                    8 + ((i4_num_act_ref_l1 > 1) * 2) + u1_4x4_blk_in_l1me;
4675
18.5k
                pi4_num_coloc_cands[j] =
4676
18.5k
                    (u1_4x4_blk_in_l1me * 3) + 2 + ((i4_num_act_ref_l1 > 1) * 2);
4677
18.5k
            }
4678
37.1k
        }
4679
18.5k
    }
4680
4681
85.3k
    if(i4_num_act_ref_l0 || i4_num_act_ref_l1)
4682
85.3k
    {
4683
85.3k
        pi4_id_Z[0] = hme_decide_search_candidate_priority_in_l0_me(
4684
85.3k
            (SEARCH_CANDIDATE_TYPE_T)ZERO_MV, pu1_search_candidate_list_index[0]);
4685
85.3k
    }
4686
4687
85.3k
    if((i4_num_act_ref_l0 > 1) && !u1_is_bidir_enabled)
4688
39.8k
    {
4689
39.8k
        pi4_id_Z[1] = hme_decide_search_candidate_priority_in_l0_me(
4690
39.8k
            (SEARCH_CANDIDATE_TYPE_T)ZERO_MV_ALTREF, pu1_search_candidate_list_index[0]);
4691
39.8k
    }
4692
85.3k
}
4693
4694
static U08
4695
    hme_determine_base_block_size(S32 *pi4_valid_part_array, S32 i4_num_valid_parts, U08 u1_cu_size)
4696
0
{
4697
0
    ASSERT(i4_num_valid_parts > 0);
4698
4699
0
    if(1 == i4_num_valid_parts)
4700
0
    {
4701
0
        ASSERT(pi4_valid_part_array[i4_num_valid_parts - 1] == PART_ID_2Nx2N);
4702
4703
0
        return u1_cu_size;
4704
0
    }
4705
0
    else
4706
0
    {
4707
0
        if(pi4_valid_part_array[i4_num_valid_parts - 1] <= PART_ID_NxN_BR)
4708
0
        {
4709
0
            return u1_cu_size / 2;
4710
0
        }
4711
0
        else if(pi4_valid_part_array[i4_num_valid_parts - 1] <= PART_ID_nRx2N_R)
4712
0
        {
4713
0
            return u1_cu_size / 4;
4714
0
        }
4715
0
    }
4716
4717
0
    return u1_cu_size / 4;
4718
0
}
4719
4720
static U32 hme_compute_variance_of_pu_from_base_blocks(
4721
    ULWORD64 *pu8_SigmaX,
4722
    ULWORD64 *pu8_SigmaXSquared,
4723
    U08 u1_cu_size,
4724
    U08 u1_base_block_size,
4725
    S32 i4_part_id)
4726
0
{
4727
0
    U08 i, j;
4728
0
    ULWORD64 u8_final_variance;
4729
4730
0
    U08 u1_part_dimension_multiplier = (u1_cu_size >> 4);
4731
0
    S32 i4_part_wd = gai1_part_wd_and_ht[i4_part_id][0] * u1_part_dimension_multiplier;
4732
0
    S32 i4_part_ht = gai1_part_wd_and_ht[i4_part_id][1] * u1_part_dimension_multiplier;
4733
0
    U08 u1_num_base_blocks_in_pu_row = i4_part_wd / u1_base_block_size;
4734
0
    U08 u1_num_base_blocks_in_pu_column = i4_part_ht / u1_base_block_size;
4735
0
    U08 u1_num_base_blocks_in_cu_row = u1_cu_size / u1_base_block_size;
4736
0
    U08 u1_num_base_blocks = (u1_num_base_blocks_in_pu_row * u1_num_base_blocks_in_pu_column);
4737
0
    U32 u4_num_pixels_in_base_block = u1_base_block_size * u1_base_block_size;
4738
0
    ULWORD64 u8_final_SigmaXSquared = 0;
4739
0
    ULWORD64 u8_final_SigmaX = 0;
4740
4741
0
    if(ge_part_id_to_part_type[i4_part_id] != PRT_NxN)
4742
0
    {
4743
0
        U08 u1_column_start_index = gau1_part_id_to_part_num[i4_part_id]
4744
0
                                        ? (gai1_is_part_vertical[i4_part_id]
4745
0
                                               ? 0
4746
0
                                               : (u1_cu_size - i4_part_wd) / u1_base_block_size)
4747
0
                                        : 0;
4748
0
        U08 u1_row_start_index = gau1_part_id_to_part_num[i4_part_id]
4749
0
                                     ? (gai1_is_part_vertical[i4_part_id]
4750
0
                                            ? (u1_cu_size - i4_part_ht) / u1_base_block_size
4751
0
                                            : 0)
4752
0
                                     : 0;
4753
0
        U08 u1_column_end_index = u1_column_start_index + u1_num_base_blocks_in_pu_row;
4754
0
        U08 u1_row_end_index = u1_row_start_index + u1_num_base_blocks_in_pu_column;
4755
4756
0
        for(i = u1_row_start_index; i < u1_row_end_index; i++)
4757
0
        {
4758
0
            for(j = u1_column_start_index; j < u1_column_end_index; j++)
4759
0
            {
4760
0
                u8_final_SigmaXSquared += pu8_SigmaXSquared[j + i * u1_num_base_blocks_in_cu_row];
4761
0
                u8_final_SigmaX += pu8_SigmaX[j + i * u1_num_base_blocks_in_cu_row];
4762
0
            }
4763
0
        }
4764
4765
0
        u8_final_variance =
4766
0
            u1_num_base_blocks * u4_num_pixels_in_base_block * u8_final_SigmaXSquared;
4767
0
        u8_final_variance -= u8_final_SigmaX * u8_final_SigmaX;
4768
0
        u8_final_variance +=
4769
0
            ((u1_num_base_blocks * u4_num_pixels_in_base_block) *
4770
0
             (u1_num_base_blocks * u4_num_pixels_in_base_block) / 2);
4771
0
        u8_final_variance /= (u1_num_base_blocks * u4_num_pixels_in_base_block) *
4772
0
                             (u1_num_base_blocks * u4_num_pixels_in_base_block);
4773
4774
0
        ASSERT(u8_final_variance <= UINT_MAX);
4775
0
    }
4776
0
    else
4777
0
    {
4778
0
        U08 u1_row_start_index;
4779
0
        U08 u1_column_start_index;
4780
0
        U08 u1_row_end_index;
4781
0
        U08 u1_column_end_index;
4782
4783
0
        switch(gau1_part_id_to_part_num[i4_part_id])
4784
0
        {
4785
0
        case 0:
4786
0
        {
4787
0
            u1_row_start_index = 0;
4788
0
            u1_column_start_index = 0;
4789
4790
0
            break;
4791
0
        }
4792
0
        case 1:
4793
0
        {
4794
0
            u1_row_start_index = 0;
4795
0
            u1_column_start_index = u1_num_base_blocks_in_pu_row;
4796
4797
0
            break;
4798
0
        }
4799
0
        case 2:
4800
0
        {
4801
0
            u1_row_start_index = u1_num_base_blocks_in_pu_column;
4802
0
            u1_column_start_index = 0;
4803
4804
0
            break;
4805
0
        }
4806
0
        case 3:
4807
0
        {
4808
0
            u1_row_start_index = u1_num_base_blocks_in_pu_column;
4809
0
            u1_column_start_index = u1_num_base_blocks_in_pu_row;
4810
4811
0
            break;
4812
0
        }
4813
0
        }
4814
4815
0
        u1_column_end_index = u1_column_start_index + u1_num_base_blocks_in_pu_row;
4816
0
        u1_row_end_index = u1_row_start_index + u1_num_base_blocks_in_pu_column;
4817
4818
0
        for(i = u1_row_start_index; i < u1_row_end_index; i++)
4819
0
        {
4820
0
            for(j = u1_column_start_index; j < u1_column_end_index; j++)
4821
0
            {
4822
0
                u8_final_SigmaXSquared += pu8_SigmaXSquared[j + i * u1_num_base_blocks_in_cu_row];
4823
0
                u8_final_SigmaX += pu8_SigmaX[j + i * u1_num_base_blocks_in_cu_row];
4824
0
            }
4825
0
        }
4826
4827
0
        u8_final_variance =
4828
0
            u1_num_base_blocks * u4_num_pixels_in_base_block * u8_final_SigmaXSquared;
4829
0
        u8_final_variance -= u8_final_SigmaX * u8_final_SigmaX;
4830
0
        u8_final_variance +=
4831
0
            ((u1_num_base_blocks * u4_num_pixels_in_base_block) *
4832
0
             (u1_num_base_blocks * u4_num_pixels_in_base_block) / 2);
4833
0
        u8_final_variance /= (u1_num_base_blocks * u4_num_pixels_in_base_block) *
4834
0
                             (u1_num_base_blocks * u4_num_pixels_in_base_block);
4835
4836
0
        ASSERT(u8_final_variance <= UINT_MAX);
4837
0
    }
4838
4839
0
    return u8_final_variance;
4840
0
}
4841
4842
void hme_compute_variance_for_all_parts(
4843
    U08 *pu1_data,
4844
    S32 i4_data_stride,
4845
    S32 *pi4_valid_part_array,
4846
    U32 *pu4_variance,
4847
    S32 i4_num_valid_parts,
4848
    U08 u1_cu_size)
4849
0
{
4850
0
    ULWORD64 au8_SigmaX[16];
4851
0
    ULWORD64 au8_SigmaXSquared[16];
4852
0
    U08 i, j, k, l;
4853
0
    U08 u1_base_block_size;
4854
0
    U08 u1_num_base_blocks_in_cu_row;
4855
0
    U08 u1_num_base_blocks_in_cu_column;
4856
4857
0
    u1_base_block_size =
4858
0
        hme_determine_base_block_size(pi4_valid_part_array, i4_num_valid_parts, u1_cu_size);
4859
4860
0
    u1_num_base_blocks_in_cu_row = u1_num_base_blocks_in_cu_column =
4861
0
        u1_cu_size / u1_base_block_size;
4862
4863
0
    ASSERT(u1_num_base_blocks_in_cu_row <= 4);
4864
4865
0
    for(i = 0; i < u1_num_base_blocks_in_cu_column; i++)
4866
0
    {
4867
0
        for(j = 0; j < u1_num_base_blocks_in_cu_row; j++)
4868
0
        {
4869
0
            U08 *pu1_buf =
4870
0
                pu1_data + (u1_base_block_size * j) + (u1_base_block_size * i * i4_data_stride);
4871
4872
0
            au8_SigmaX[j + i * u1_num_base_blocks_in_cu_row] = 0;
4873
0
            au8_SigmaXSquared[j + i * u1_num_base_blocks_in_cu_row] = 0;
4874
4875
0
            for(k = 0; k < u1_base_block_size; k++)
4876
0
            {
4877
0
                for(l = 0; l < u1_base_block_size; l++)
4878
0
                {
4879
0
                    au8_SigmaX[j + i * u1_num_base_blocks_in_cu_row] +=
4880
0
                        pu1_buf[l + k * i4_data_stride];
4881
0
                    au8_SigmaXSquared[j + i * u1_num_base_blocks_in_cu_row] +=
4882
0
                        pu1_buf[l + k * i4_data_stride] * pu1_buf[l + k * i4_data_stride];
4883
0
                }
4884
0
            }
4885
0
        }
4886
0
    }
4887
4888
0
    for(i = 0; i < i4_num_valid_parts; i++)
4889
0
    {
4890
0
        pu4_variance[pi4_valid_part_array[i]] = hme_compute_variance_of_pu_from_base_blocks(
4891
0
            au8_SigmaX, au8_SigmaXSquared, u1_cu_size, u1_base_block_size, pi4_valid_part_array[i]);
4892
0
    }
4893
0
}
4894
4895
void hme_compute_final_sigma_of_pu_from_base_blocks(
4896
    U32 *pu4_SigmaX,
4897
    U32 *pu4_SigmaXSquared,
4898
    ULWORD64 *pu8_final_sigmaX,
4899
    ULWORD64 *pu8_final_sigmaX_Squared,
4900
    U08 u1_cu_size,
4901
    U08 u1_base_block_size,
4902
    S32 i4_part_id,
4903
    U08 u1_base_blk_array_stride)
4904
0
{
4905
0
    U08 i, j;
4906
    //U08 u1_num_base_blocks_in_cu_row;
4907
4908
0
    U08 u1_part_dimension_multiplier = (u1_cu_size >> 4);
4909
0
    S32 i4_part_wd = gai1_part_wd_and_ht[i4_part_id][0] * u1_part_dimension_multiplier;
4910
0
    S32 i4_part_ht = gai1_part_wd_and_ht[i4_part_id][1] * u1_part_dimension_multiplier;
4911
0
    U08 u1_num_base_blocks_in_pu_row = i4_part_wd / u1_base_block_size;
4912
0
    U08 u1_num_base_blocks_in_pu_column = i4_part_ht / u1_base_block_size;
4913
0
    U16 u2_num_base_blocks = (u1_num_base_blocks_in_pu_row * u1_num_base_blocks_in_pu_column);
4914
0
    U32 u4_num_pixels_in_base_block = u1_base_block_size * u1_base_block_size;
4915
0
    U32 u4_N = (u2_num_base_blocks * u4_num_pixels_in_base_block);
4916
4917
    /*if (u1_is_for_src)
4918
    {
4919
    u1_num_base_blocks_in_cu_row = 16;
4920
    }
4921
    else
4922
    {
4923
    u1_num_base_blocks_in_cu_row = u1_cu_size / u1_base_block_size;
4924
    }*/
4925
4926
0
    pu8_final_sigmaX[i4_part_id] = 0;
4927
0
    pu8_final_sigmaX_Squared[i4_part_id] = 0;
4928
4929
0
    if(ge_part_id_to_part_type[i4_part_id] != PRT_NxN)
4930
0
    {
4931
0
        U08 u1_column_start_index = gau1_part_id_to_part_num[i4_part_id]
4932
0
                                        ? (gai1_is_part_vertical[i4_part_id]
4933
0
                                               ? 0
4934
0
                                               : (u1_cu_size - i4_part_wd) / u1_base_block_size)
4935
0
                                        : 0;
4936
0
        U08 u1_row_start_index = gau1_part_id_to_part_num[i4_part_id]
4937
0
                                     ? (gai1_is_part_vertical[i4_part_id]
4938
0
                                            ? (u1_cu_size - i4_part_ht) / u1_base_block_size
4939
0
                                            : 0)
4940
0
                                     : 0;
4941
0
        U08 u1_column_end_index = u1_column_start_index + u1_num_base_blocks_in_pu_row;
4942
0
        U08 u1_row_end_index = u1_row_start_index + u1_num_base_blocks_in_pu_column;
4943
4944
0
        for(i = u1_row_start_index; i < u1_row_end_index; i++)
4945
0
        {
4946
0
            for(j = u1_column_start_index; j < u1_column_end_index; j++)
4947
0
            {
4948
0
                pu8_final_sigmaX_Squared[i4_part_id] +=
4949
0
                    pu4_SigmaXSquared[j + i * u1_base_blk_array_stride];
4950
0
                pu8_final_sigmaX[i4_part_id] += pu4_SigmaX[j + i * u1_base_blk_array_stride];
4951
0
            }
4952
0
        }
4953
0
    }
4954
0
    else
4955
0
    {
4956
0
        U08 u1_row_start_index;
4957
0
        U08 u1_column_start_index;
4958
0
        U08 u1_row_end_index;
4959
0
        U08 u1_column_end_index;
4960
4961
0
        switch(gau1_part_id_to_part_num[i4_part_id])
4962
0
        {
4963
0
        case 0:
4964
0
        {
4965
0
            u1_row_start_index = 0;
4966
0
            u1_column_start_index = 0;
4967
4968
0
            break;
4969
0
        }
4970
0
        case 1:
4971
0
        {
4972
0
            u1_row_start_index = 0;
4973
0
            u1_column_start_index = u1_num_base_blocks_in_pu_row;
4974
4975
0
            break;
4976
0
        }
4977
0
        case 2:
4978
0
        {
4979
0
            u1_row_start_index = u1_num_base_blocks_in_pu_column;
4980
0
            u1_column_start_index = 0;
4981
4982
0
            break;
4983
0
        }
4984
0
        case 3:
4985
0
        {
4986
0
            u1_row_start_index = u1_num_base_blocks_in_pu_column;
4987
0
            u1_column_start_index = u1_num_base_blocks_in_pu_row;
4988
4989
0
            break;
4990
0
        }
4991
0
        }
4992
4993
0
        u1_column_end_index = u1_column_start_index + u1_num_base_blocks_in_pu_row;
4994
0
        u1_row_end_index = u1_row_start_index + u1_num_base_blocks_in_pu_column;
4995
4996
0
        for(i = u1_row_start_index; i < u1_row_end_index; i++)
4997
0
        {
4998
0
            for(j = u1_column_start_index; j < u1_column_end_index; j++)
4999
0
            {
5000
0
                pu8_final_sigmaX_Squared[i4_part_id] +=
5001
0
                    pu4_SigmaXSquared[j + i * u1_base_blk_array_stride];
5002
0
                pu8_final_sigmaX[i4_part_id] += pu4_SigmaX[j + i * u1_base_blk_array_stride];
5003
0
            }
5004
0
        }
5005
0
    }
5006
5007
0
    pu8_final_sigmaX_Squared[i4_part_id] *= u4_N;
5008
0
}
5009
5010
void hme_compute_stim_injected_distortion_for_all_parts(
5011
    U08 *pu1_pred,
5012
    S32 i4_pred_stride,
5013
    S32 *pi4_valid_part_array,
5014
    ULWORD64 *pu8_src_sigmaX,
5015
    ULWORD64 *pu8_src_sigmaXSquared,
5016
    S32 *pi4_sad_array,
5017
    S32 i4_alpha_stim_multiplier,
5018
    S32 i4_inv_wt,
5019
    S32 i4_inv_wt_shift_val,
5020
    S32 i4_num_valid_parts,
5021
    S32 i4_wpred_log_wdc,
5022
    U08 u1_cu_size)
5023
0
{
5024
0
    U32 au4_sigmaX[16], au4_sigmaXSquared[16];
5025
0
    ULWORD64 au8_final_ref_sigmaX[17], au8_final_ref_sigmaXSquared[17];
5026
0
    S32 i4_noise_term;
5027
0
    U16 i2_count;
5028
5029
0
    ULWORD64 u8_temp_var, u8_temp_var1, u8_pure_dist;
5030
0
    ULWORD64 u8_ref_X_Square, u8_src_var, u8_ref_var;
5031
5032
0
    U08 u1_base_block_size;
5033
5034
0
    WORD32 i4_q_level = STIM_Q_FORMAT + ALPHA_Q_FORMAT;
5035
5036
0
    u1_base_block_size =
5037
0
        hme_determine_base_block_size(pi4_valid_part_array, i4_num_valid_parts, u1_cu_size);
5038
5039
0
    ASSERT(u1_cu_size >= 16);
5040
5041
0
    hme_compute_sigmaX_and_sigmaXSquared(
5042
0
        pu1_pred,
5043
0
        i4_pred_stride,
5044
0
        au4_sigmaX,
5045
0
        au4_sigmaXSquared,
5046
0
        u1_base_block_size,
5047
0
        u1_base_block_size,
5048
0
        u1_cu_size,
5049
0
        u1_cu_size,
5050
0
        1,
5051
0
        u1_cu_size / u1_base_block_size);
5052
5053
    /* Noise Term Computation */
5054
0
    for(i2_count = 0; i2_count < i4_num_valid_parts; i2_count++)
5055
0
    {
5056
0
        unsigned long u4_shift_val;
5057
0
        S32 i4_bits_req;
5058
0
        S32 part_id = pi4_valid_part_array[i2_count];
5059
5060
0
        if(i4_alpha_stim_multiplier)
5061
0
        {
5062
            /* Final SigmaX and SigmaX-Squared Calculation */
5063
0
            hme_compute_final_sigma_of_pu_from_base_blocks(
5064
0
                au4_sigmaX,
5065
0
                au4_sigmaXSquared,
5066
0
                au8_final_ref_sigmaX,
5067
0
                au8_final_ref_sigmaXSquared,
5068
0
                u1_cu_size,
5069
0
                u1_base_block_size,
5070
0
                part_id,
5071
0
                (u1_cu_size / u1_base_block_size));
5072
5073
0
            u8_ref_X_Square = (au8_final_ref_sigmaX[part_id] * au8_final_ref_sigmaX[part_id]);
5074
0
            u8_ref_var = (au8_final_ref_sigmaXSquared[part_id] - u8_ref_X_Square);
5075
5076
0
            u4_shift_val = ihevce_calc_stim_injected_variance(
5077
0
                pu8_src_sigmaX,
5078
0
                pu8_src_sigmaXSquared,
5079
0
                &u8_src_var,
5080
0
                i4_inv_wt,
5081
0
                i4_inv_wt_shift_val,
5082
0
                i4_wpred_log_wdc,
5083
0
                part_id);
5084
5085
0
            u8_ref_var = u8_ref_var >> u4_shift_val;
5086
5087
0
            GETRANGE64(i4_bits_req, u8_ref_var);
5088
5089
0
            if(i4_bits_req > 27)
5090
0
            {
5091
0
                u8_ref_var = u8_ref_var >> (i4_bits_req - 27);
5092
0
                u8_src_var = u8_src_var >> (i4_bits_req - 27);
5093
0
            }
5094
5095
0
            if(u8_src_var == u8_ref_var)
5096
0
            {
5097
0
                u8_temp_var = (1 << STIM_Q_FORMAT);
5098
0
            }
5099
0
            else
5100
0
            {
5101
0
                u8_temp_var = (u8_src_var * u8_ref_var * (1 << STIM_Q_FORMAT));
5102
0
                u8_temp_var1 = (u8_src_var * u8_src_var) + (u8_ref_var * u8_ref_var);
5103
0
                u8_temp_var = (u8_temp_var + (u8_temp_var1 / 2));
5104
0
                u8_temp_var = (u8_temp_var / u8_temp_var1);
5105
0
                u8_temp_var = (2 * u8_temp_var);
5106
0
            }
5107
5108
0
            i4_noise_term = (UWORD32)u8_temp_var;
5109
5110
0
            ASSERT(i4_noise_term >= 0);
5111
5112
0
            i4_noise_term *= i4_alpha_stim_multiplier;
5113
0
        }
5114
0
        else
5115
0
        {
5116
0
            i4_noise_term = 0;
5117
0
        }
5118
5119
0
        u8_pure_dist = pi4_sad_array[part_id];
5120
0
        u8_pure_dist *= ((1 << (i4_q_level)) - (i4_noise_term));
5121
0
        u8_pure_dist += (1 << ((i4_q_level)-1));
5122
0
        pi4_sad_array[part_id] = (UWORD32)(u8_pure_dist >> (i4_q_level));
5123
0
    }
5124
0
}
5125
5126
void hme_compute_sigmaX_and_sigmaXSquared(
5127
    U08 *pu1_data,
5128
    S32 i4_buf_stride,
5129
    void *pv_sigmaX,
5130
    void *pv_sigmaXSquared,
5131
    U08 u1_base_blk_wd,
5132
    U08 u1_base_blk_ht,
5133
    U08 u1_blk_wd,
5134
    U08 u1_blk_ht,
5135
    U08 u1_is_sigma_pointer_size_32_bit,
5136
    U08 u1_array_stride)
5137
0
{
5138
0
    U08 i, j, k, l;
5139
0
    U08 u1_num_base_blks_in_row;
5140
0
    U08 u1_num_base_blks_in_column;
5141
5142
0
    u1_num_base_blks_in_row = u1_blk_wd / u1_base_blk_wd;
5143
0
    u1_num_base_blks_in_column = u1_blk_ht / u1_base_blk_ht;
5144
5145
0
    if(u1_is_sigma_pointer_size_32_bit)
5146
0
    {
5147
0
        U32 *sigmaX, *sigmaXSquared;
5148
5149
0
        sigmaX = (U32 *)pv_sigmaX;
5150
0
        sigmaXSquared = (U32 *)pv_sigmaXSquared;
5151
5152
        /* Loop to compute the sigma_X and sigma_X_Squared */
5153
0
        for(i = 0; i < u1_num_base_blks_in_column; i++)
5154
0
        {
5155
0
            for(j = 0; j < u1_num_base_blks_in_row; j++)
5156
0
            {
5157
0
                U32 u4_sigmaX = 0, u4_sigmaXSquared = 0;
5158
0
                U08 *pu1_buf =
5159
0
                    pu1_data + (u1_base_blk_wd * j) + (u1_base_blk_ht * i * i4_buf_stride);
5160
5161
0
                for(k = 0; k < u1_base_blk_ht; k++)
5162
0
                {
5163
0
                    for(l = 0; l < u1_base_blk_wd; l++)
5164
0
                    {
5165
0
                        u4_sigmaX += pu1_buf[l + k * i4_buf_stride];
5166
0
                        u4_sigmaXSquared +=
5167
0
                            (pu1_buf[l + k * i4_buf_stride] * pu1_buf[l + k * i4_buf_stride]);
5168
0
                    }
5169
0
                }
5170
5171
0
                sigmaX[j + i * u1_array_stride] = u4_sigmaX;
5172
0
                sigmaXSquared[j + i * u1_array_stride] = u4_sigmaXSquared;
5173
0
            }
5174
0
        }
5175
0
    }
5176
0
    else
5177
0
    {
5178
0
        ULWORD64 *sigmaX, *sigmaXSquared;
5179
5180
0
        sigmaX = (ULWORD64 *)pv_sigmaX;
5181
0
        sigmaXSquared = (ULWORD64 *)pv_sigmaXSquared;
5182
5183
        /* Loop to compute the sigma_X and sigma_X_Squared */
5184
0
        for(i = 0; i < u1_num_base_blks_in_column; i++)
5185
0
        {
5186
0
            for(j = 0; j < u1_num_base_blks_in_row; j++)
5187
0
            {
5188
0
                ULWORD64 u8_sigmaX = 0, u8_sigmaXSquared = 0;
5189
0
                U08 *pu1_buf =
5190
0
                    pu1_data + (u1_base_blk_wd * j) + (u1_base_blk_ht * i * i4_buf_stride);
5191
5192
0
                for(k = 0; k < u1_base_blk_ht; k++)
5193
0
                {
5194
0
                    for(l = 0; l < u1_base_blk_wd; l++)
5195
0
                    {
5196
0
                        u8_sigmaX += pu1_buf[l + k * i4_buf_stride];
5197
0
                        u8_sigmaXSquared +=
5198
0
                            (pu1_buf[l + k * i4_buf_stride] * pu1_buf[l + k * i4_buf_stride]);
5199
0
                    }
5200
0
                }
5201
5202
0
                u8_sigmaXSquared = u8_sigmaXSquared * u1_blk_wd * u1_blk_ht;
5203
5204
0
                sigmaX[j + i * u1_array_stride] = u8_sigmaX;
5205
0
                sigmaXSquared[j + i * u1_array_stride] = u8_sigmaXSquared;
5206
0
            }
5207
0
        }
5208
0
    }
5209
0
}
5210
5211
#if TEMPORAL_NOISE_DETECT
5212
WORD32 ihevce_16x16block_temporal_noise_detect(
5213
    WORD32 had_block_size,
5214
    WORD32 ctb_width,
5215
    WORD32 ctb_height,
5216
    ihevce_ctb_noise_params *ps_ctb_noise_params,
5217
    fpel_srch_cand_init_data_t *s_proj_srch_cand_init_data,
5218
    hme_search_prms_t *s_search_prms_blk,
5219
    me_frm_ctxt_t *ps_ctxt,
5220
    WORD32 num_pred_dir,
5221
    WORD32 i4_num_act_ref_l0,
5222
    WORD32 i4_num_act_ref_l1,
5223
    WORD32 i4_cu_x_off,
5224
    WORD32 i4_cu_y_off,
5225
    wgt_pred_ctxt_t *ps_wt_inp_prms,
5226
    WORD32 input_stride,
5227
    WORD32 index_8x8_block,
5228
    WORD32 num_horz_blocks,
5229
    WORD32 num_8x8_in_ctb_row,
5230
    WORD32 i4_16x16_index)
5231
0
{
5232
0
    WORD32 i;
5233
0
    WORD32 noise_detected;
5234
5235
0
    UWORD8 *pu1_l0_block;
5236
0
    UWORD8 *pu1_l1_block;
5237
5238
0
    WORD32 mean;
5239
0
    UWORD32 variance_8x8;
5240
5241
    /* to store the mean and variance of each 8*8 block and find the variance of any higher block sizes later on. block */
5242
0
    WORD16 pi2_residue_16x16[256];
5243
0
    WORD32 mean_16x16;
5244
0
    UWORD32 variance_16x16[2];
5245
5246
    /* throw errors in case of un- supported arguments */
5247
    /* assumptions size is 8 or 16 or 32 */
5248
0
    assert(
5249
0
        (had_block_size == 8) || (had_block_size == 16) || (had_block_size == 32));  //ihevc_assert
5250
5251
    /* initialize the variables */
5252
0
    noise_detected = 0;
5253
0
    variance_8x8 = 0;
5254
5255
0
    mean = 0;
5256
5257
0
    {
5258
0
        i = 0;
5259
        /* get the ref/pred and source using the MV of both directions */
5260
        /* pick the best candidates in each direction */
5261
        /* Colocated cands */
5262
0
        {
5263
            // steps to be done
5264
            /* pick the candidates */
5265
            /* do motion compoensation using the candidates got from prev step : pick from the offset */
5266
            /* get the ref or the pred from the offset*/
5267
            /* get the source data */
5268
            /* send the pred - source to noise detect */
5269
            /* do noise detect on the residue of source and pred */
5270
5271
0
            layer_mv_t *ps_layer_mvbank;
5272
0
            hme_mv_t *ps_mv;
5273
5274
            //S32 i;
5275
0
            S32 wd_c, ht_c, wd_p, ht_p;
5276
0
            S32 blksize_p, blk_x, blk_y, i4_offset;
5277
0
            S08 *pi1_ref_idx;
5278
0
            fpel_srch_cand_init_data_t *ps_ctxt_2 = s_proj_srch_cand_init_data;
5279
0
            layer_ctxt_t *ps_curr_layer = ps_ctxt_2->ps_curr_layer;
5280
0
            layer_ctxt_t *ps_coarse_layer = ps_ctxt_2->ps_coarse_layer;
5281
0
            err_prms_t s_err_prms;
5282
0
            S32 i4_blk_wd;
5283
0
            S32 i4_blk_ht;
5284
0
            BLK_SIZE_T e_blk_size;
5285
0
            hme_search_prms_t *ps_search_prms;
5286
0
            S32 i4_part_mask;
5287
0
            S32 *pi4_valid_part_ids;
5288
5289
            /* has list of valid partition to search terminated by -1 */
5290
0
            S32 ai4_valid_part_ids[TOT_NUM_PARTS + 1];
5291
5292
            /*SEARCH_COMPLEXITY_T e_search_complexity = ps_ctxt->e_search_complexity;*/
5293
5294
0
            S32 i4_pos_x;
5295
0
            S32 i4_pos_y;
5296
0
            U08 u1_pred_dir;  // = ps_ctxt_2->u1_pred_dir;
5297
0
            U08 u1_default_ref_id = 0;  //ps_ctxt_2->u1_default_ref_id;
5298
0
            S32 i4_inp_off, i4_ref_offset, i4_ref_stride;
5299
5300
            /* The reference is actually an array of ptrs since there are several    */
5301
            /* reference id. So an array gets passed form calling function           */
5302
0
            U08 **ppu1_ref;
5303
5304
            /* Atributes of input candidates */
5305
0
            search_node_t as_search_node[2];
5306
0
            wgt_pred_ctxt_t *ps_wt_inp_prms;
5307
5308
0
            S32 posx;
5309
0
            S32 posy;
5310
0
            S32 i4_num_results_to_proj;
5311
0
            S32 ai4_sad_grid[9 * TOT_NUM_PARTS];
5312
0
            S32 i4_inp_stride;
5313
5314
            /* intialize variables */
5315
            /* Width and ht of current and prev layers */
5316
0
            wd_c = ps_curr_layer->i4_wd;
5317
0
            ht_c = ps_curr_layer->i4_ht;
5318
0
            wd_p = ps_coarse_layer->i4_wd;
5319
0
            ht_p = ps_coarse_layer->i4_ht;
5320
5321
0
            ps_search_prms = s_search_prms_blk;
5322
5323
0
            ps_wt_inp_prms = &ps_ctxt->s_wt_pred;
5324
0
            e_blk_size = ps_search_prms->e_blk_size;
5325
0
            i4_part_mask = ps_search_prms->i4_part_mask;
5326
5327
0
            i4_blk_wd = gau1_blk_size_to_wd[e_blk_size];
5328
0
            i4_blk_ht = gau1_blk_size_to_ht[e_blk_size];
5329
5330
0
            ps_layer_mvbank = ps_coarse_layer->ps_layer_mvbank;
5331
0
            blksize_p = gau1_blk_size_to_wd_shift[ps_layer_mvbank->e_blk_size];
5332
5333
            /* ASSERT for valid sizes */
5334
0
            ASSERT((blksize_p == 3) || (blksize_p == 4) || (blksize_p == 5));
5335
5336
0
            i4_pos_x = i4_cu_x_off;
5337
0
            i4_pos_y = i4_cu_y_off;
5338
0
            posx = i4_pos_x + 2;
5339
0
            posy = i4_pos_y + 2;
5340
5341
0
            i4_inp_stride = ps_search_prms->i4_inp_stride;
5342
            /* Move to the location of the search blk in inp buffer */
5343
            //i4_inp_off = i4_cu_x_off;
5344
            //i4_inp_off += i4_cu_y_off * i4_inp_stride;
5345
0
            i4_inp_off = (i4_16x16_index % 4) * 16;
5346
0
            i4_inp_off += (i4_16x16_index / 4) * 16 * i4_inp_stride;
5347
5348
            /***********pick the candidates**************************************/
5349
0
            for(u1_pred_dir = 0; u1_pred_dir < num_pred_dir; u1_pred_dir++)
5350
0
            {
5351
0
                WORD32 actual_pred_dir = 0;
5352
5353
0
                if(u1_pred_dir == 0 && i4_num_act_ref_l0 == 0)
5354
0
                {
5355
0
                    actual_pred_dir = 1;
5356
0
                }
5357
0
                else if(u1_pred_dir == 0 && i4_num_act_ref_l0 != 0)
5358
0
                {
5359
0
                    actual_pred_dir = 0;
5360
0
                }
5361
0
                else if(u1_pred_dir == 1)
5362
0
                {
5363
0
                    actual_pred_dir = 1;
5364
0
                }
5365
5366
0
                i4_num_results_to_proj = 1;  // only the best proj
5367
5368
                /* Safety check to avoid uninitialized access across temporal layers */
5369
0
                posx = CLIP3(posx, 0, (wd_c - blksize_p)); /* block position withing frAME */
5370
0
                posy = CLIP3(posy, 0, (ht_c - blksize_p));
5371
5372
                /* Project the positions to prev layer */
5373
0
                blk_x = posx >> blksize_p;
5374
0
                blk_y = posy >> blksize_p;
5375
5376
                /* Pick up the mvs from the location */
5377
0
                i4_offset = (blk_x * ps_layer_mvbank->i4_num_mvs_per_blk);
5378
0
                i4_offset += (ps_layer_mvbank->i4_num_mvs_per_row * blk_y);
5379
5380
0
                ps_mv = ps_layer_mvbank->ps_mv + i4_offset;
5381
0
                pi1_ref_idx = ps_layer_mvbank->pi1_ref_idx + i4_offset;
5382
5383
0
                if(actual_pred_dir == 1)
5384
0
                {
5385
0
                    ps_mv += (i4_num_act_ref_l0 * ps_layer_mvbank->i4_num_mvs_per_ref);
5386
0
                    pi1_ref_idx += (i4_num_act_ref_l0 * ps_layer_mvbank->i4_num_mvs_per_ref);
5387
0
                }
5388
5389
0
                {
5390
0
                    as_search_node[actual_pred_dir].s_mv.i2_mvx = ps_mv[0].i2_mv_x << 1;
5391
0
                    as_search_node[actual_pred_dir].s_mv.i2_mvy = ps_mv[0].i2_mv_y << 1;
5392
0
                    as_search_node[actual_pred_dir].i1_ref_idx = pi1_ref_idx[0];
5393
5394
0
                    if((as_search_node[actual_pred_dir].i1_ref_idx < 0) ||
5395
0
                       (as_search_node[actual_pred_dir].s_mv.i2_mvx == INTRA_MV))
5396
0
                    {
5397
0
                        as_search_node[actual_pred_dir].i1_ref_idx = u1_default_ref_id;
5398
0
                        as_search_node[actual_pred_dir].s_mv.i2_mvx = 0;
5399
0
                        as_search_node[actual_pred_dir].s_mv.i2_mvy = 0;
5400
0
                    }
5401
0
                }
5402
5403
                /********************************************************************************************/
5404
0
                {
5405
                    /* declare the variables */
5406
                    //ps_fullpel_refine_ctxt = ps_search_prms->ps_fullpel_refine_ctxt;
5407
5408
0
                    pi4_valid_part_ids = ai4_valid_part_ids;
5409
0
                    i4_ref_stride = ps_curr_layer->i4_rec_stride;
5410
0
                    s_err_prms.i4_inp_stride = i4_inp_stride;
5411
0
                    s_err_prms.i4_ref_stride = i4_ref_stride;
5412
0
                    s_err_prms.i4_part_mask = i4_part_mask;
5413
0
                    s_err_prms.pi4_sad_grid = &ai4_sad_grid[0];
5414
0
                    s_err_prms.i4_blk_wd = i4_blk_wd;
5415
0
                    s_err_prms.i4_blk_ht = i4_blk_ht;
5416
0
                    s_err_prms.i4_step = 1;
5417
0
                    s_err_prms.pi4_valid_part_ids = pi4_valid_part_ids;
5418
                    //s_err_prms.i4_num_partitions = ps_fullpel_refine_ctxt->i4_num_valid_parts;
5419
5420
                    /*************************************************************************/
5421
                    /* Depending on flag i4_use_rec, we use either input of previously       */
5422
                    /* encoded pictures or we use recon of previously encoded pictures.      */
5423
0
                    i4_ref_stride = ps_curr_layer->i4_rec_stride;
5424
0
                    ppu1_ref = ps_curr_layer->ppu1_list_rec_fxfy;  // pointer to the pred
5425
5426
0
                    i4_ref_offset = (i4_ref_stride * i4_cu_y_off) + i4_cu_x_off;  //i4_x_off;
5427
5428
0
                    s_err_prms.pu1_ref =
5429
0
                        ppu1_ref[as_search_node[actual_pred_dir].i1_ref_idx] + i4_ref_offset;
5430
0
                    s_err_prms.pu1_ref += as_search_node[actual_pred_dir].s_mv.i2_mvx;
5431
0
                    s_err_prms.pu1_ref +=
5432
0
                        as_search_node[actual_pred_dir].s_mv.i2_mvy * i4_ref_stride;
5433
5434
                    /*get the source */
5435
0
                    s_err_prms.pu1_inp =
5436
0
                        ps_wt_inp_prms->apu1_wt_inp[as_search_node[actual_pred_dir].i1_ref_idx] +
5437
0
                        i4_inp_off;  //pu1_src_input + i4_inp_off;//ps_wt_inp_prms->apu1_wt_inp[as_search_node[actual_pred_dir].i1_ref_idx] + i4_inp_off;
5438
5439
                    /* send the pred - source to noise detect */
5440
                    // noise_detect_hme(noise_structure, s_err_prms.pu1_inp, s_err_prms.pu1_ref);
5441
0
                }
5442
                /* change the l0/l1 blcok pointer names accrodingle */
5443
5444
                /* get memory pointers the input and the reference */
5445
0
                pu1_l0_block = s_err_prms.pu1_inp;
5446
0
                pu1_l1_block = s_err_prms.pu1_ref;
5447
5448
0
                {
5449
0
                    WORD32 i2, j2;
5450
0
                    WORD32 dim = 16;
5451
0
                    UWORD8 *buf1;
5452
0
                    UWORD8 *buf2;
5453
0
                    for(i2 = 0; i2 < dim; i2++)
5454
0
                    {
5455
0
                        buf1 = pu1_l0_block + i2 * i4_inp_stride;
5456
0
                        buf2 = pu1_l1_block + i2 * i4_ref_stride;
5457
5458
0
                        for(j2 = 0; j2 < dim; j2++)
5459
0
                        {
5460
0
                            pi2_residue_16x16[i2 * dim + j2] = (WORD16)(buf1[j2] - buf2[j2]);
5461
0
                        }
5462
0
                    }
5463
5464
0
                    ihevce_calc_variance_signed(
5465
0
                        pi2_residue_16x16, 16, &mean_16x16, &variance_16x16[u1_pred_dir], 16, 16);
5466
5467
                    /* compare the source and residue variance for this block ps_ctb_noise_params->i4_variance_src_16x16 */
5468
0
                    if(variance_16x16[u1_pred_dir] >
5469
0
                       ((TEMPORAL_VARIANCE_FACTOR *
5470
0
                         ps_ctb_noise_params->au4_variance_src_16x16[i4_16x16_index]) >>
5471
0
                        Q_TEMPORAL_VARIANCE_FACTOR))
5472
0
                    {
5473
                        /* update noisy block count only if all  best MV in diff directions indicates noise */
5474
0
                        if(u1_pred_dir == num_pred_dir - 1)
5475
0
                        {
5476
0
                            ps_ctb_noise_params->au1_is_8x8Blk_noisy[index_8x8_block] = 1;
5477
0
                            ps_ctb_noise_params->au1_is_8x8Blk_noisy[index_8x8_block + 1] = 1;
5478
0
                            ps_ctb_noise_params
5479
0
                                ->au1_is_8x8Blk_noisy[index_8x8_block + num_8x8_in_ctb_row] = 1;
5480
0
                            ps_ctb_noise_params
5481
0
                                ->au1_is_8x8Blk_noisy[index_8x8_block + num_8x8_in_ctb_row + 1] = 1;
5482
0
                            noise_detected = 1;
5483
0
                        }
5484
0
                    }
5485
0
                    else /* if any one of the direction mv says it as non noise then dont check for the other directions MV , move for next block*/
5486
0
                    {
5487
0
                        noise_detected = 0;
5488
0
                        ps_ctb_noise_params->au1_is_8x8Blk_noisy[index_8x8_block] = 0;
5489
0
                        ps_ctb_noise_params->au1_is_8x8Blk_noisy[index_8x8_block + 1] = 0;
5490
0
                        ps_ctb_noise_params
5491
0
                            ->au1_is_8x8Blk_noisy[index_8x8_block + num_8x8_in_ctb_row] = 0;
5492
0
                        ps_ctb_noise_params
5493
0
                            ->au1_is_8x8Blk_noisy[index_8x8_block + num_8x8_in_ctb_row + 1] = 0;
5494
0
                        break;
5495
0
                    }
5496
0
                }  // variance analysis and calculation
5497
0
            }  // for each direction
5498
0
        }  // HME code
5499
5500
0
    }  // for each 16x16 block
5501
5502
0
    return (noise_detected);
5503
0
}
5504
#endif
5505
5506
void hme_qpel_interp_avg_1pt(
5507
    interp_prms_t *ps_prms,
5508
    S32 i4_mv_x,
5509
    S32 i4_mv_y,
5510
    S32 i4_buf_id,
5511
    U08 **ppu1_final,
5512
    S32 *pi4_final_stride)
5513
12.3M
{
5514
12.3M
    U08 *pu1_src1, *pu1_src2, *pu1_dst;
5515
12.3M
    qpel_input_buf_cfg_t *ps_inp_cfg;
5516
12.3M
    S32 i4_mv_x_frac, i4_mv_y_frac, i4_offset;
5517
5518
    /*************************************************************************/
5519
    /* For a given QPEL pt, we need to determine the 2 source pts that are   */
5520
    /* needed to do the QPEL averaging. The logic to do this is as follows   */
5521
    /* i4_mv_x and i4_mv_y are the motion vectors in QPEL units that are     */
5522
    /* pointing to the pt of interest. Obviously, they are w.r.t. the 0,0    */
5523
    /* pt of th reference blk that is colocated to the inp blk.              */
5524
    /*    A j E k B                                                          */
5525
    /*    l m n o p                                                          */
5526
    /*    F q G r H                                                          */
5527
    /*    s t u v w                                                          */
5528
    /*    C x I y D                                                          */
5529
    /* In above diagram, A. B, C, D are full pts at offsets (0,0),(1,0),(0,1)*/
5530
    /* and (1,1) respectively in the fpel buffer (id = 0)                    */
5531
    /* E and I are hxfy pts in offsets (0,0),(0,1) respectively in hxfy buf  */
5532
    /* F and H are fxhy pts in offsets (0,0),(1,0) respectively in fxhy buf  */
5533
    /* G is hxhy pt in offset 0,0 in hxhy buf                                */
5534
    /* All above offsets are computed w.r.t. motion displaced pt in          */
5535
    /* respective bufs. This means that A corresponds to (i4_mv_x >> 2) and  */
5536
    /* (i4_mv_y >> 2) in fxfy buf. Ditto with E, F and G                     */
5537
    /* fxfy buf is buf id 0, hxfy is buf id 1, fxhy is buf id 2, hxhy is 3   */
5538
    /* If we consider pt v to be derived. v has a fractional comp of 3, 3    */
5539
    /* v is avg of H and I. So the table look up of v should give following  */
5540
    /* buf 1 (H) : offset = (1, 0) buf id = 2.                               */
5541
    /* buf 2 (I) : offset = 0 , 1) buf id = 1.                               */
5542
    /* NOTE: For pts that are fxfy/hxfy/fxhy/hxhy, bufid 1 will be -1.       */
5543
    /*************************************************************************/
5544
12.3M
    i4_mv_x_frac = i4_mv_x & 3;
5545
12.3M
    i4_mv_y_frac = i4_mv_y & 3;
5546
5547
12.3M
    i4_offset = (i4_mv_x >> 2) + (i4_mv_y >> 2) * ps_prms->i4_ref_stride;
5548
5549
    /* Derive the descriptor that has all offset and size info */
5550
12.3M
    ps_inp_cfg = &gas_qpel_inp_buf_cfg[i4_mv_y_frac][i4_mv_x_frac];
5551
5552
12.3M
    pu1_src1 = ps_prms->ppu1_ref[ps_inp_cfg->i1_buf_id1];
5553
12.3M
    pu1_src1 += ps_inp_cfg->i1_buf_xoff1 + i4_offset;
5554
12.3M
    pu1_src1 += (ps_inp_cfg->i1_buf_yoff1 * ps_prms->i4_ref_stride);
5555
5556
12.3M
    pu1_src2 = ps_prms->ppu1_ref[ps_inp_cfg->i1_buf_id2];
5557
12.3M
    pu1_src2 += ps_inp_cfg->i1_buf_xoff2 + i4_offset;
5558
12.3M
    pu1_src2 += (ps_inp_cfg->i1_buf_yoff2 * ps_prms->i4_ref_stride);
5559
5560
12.3M
    pu1_dst = ps_prms->apu1_interp_out[i4_buf_id];
5561
12.3M
    hevc_avg_2d(
5562
12.3M
        pu1_src1,
5563
12.3M
        pu1_src2,
5564
12.3M
        ps_prms->i4_ref_stride,
5565
12.3M
        ps_prms->i4_ref_stride,
5566
12.3M
        ps_prms->i4_blk_wd,
5567
12.3M
        ps_prms->i4_blk_ht,
5568
12.3M
        pu1_dst,
5569
12.3M
        ps_prms->i4_out_stride);
5570
12.3M
    ppu1_final[i4_buf_id] = pu1_dst;
5571
12.3M
    pi4_final_stride[i4_buf_id] = ps_prms->i4_out_stride;
5572
12.3M
}
5573
5574
void hme_qpel_interp_avg_2pt_vert_with_reuse(
5575
    interp_prms_t *ps_prms, S32 i4_mv_x, S32 i4_mv_y, U08 **ppu1_final, S32 *pi4_final_stride)
5576
3.03M
{
5577
3.03M
    hme_qpel_interp_avg_1pt(ps_prms, i4_mv_x, i4_mv_y + 1, 3, ppu1_final, pi4_final_stride);
5578
5579
3.03M
    hme_qpel_interp_avg_1pt(ps_prms, i4_mv_x, i4_mv_y - 1, 1, ppu1_final, pi4_final_stride);
5580
3.03M
}
5581
5582
void hme_qpel_interp_avg_2pt_horz_with_reuse(
5583
    interp_prms_t *ps_prms, S32 i4_mv_x, S32 i4_mv_y, U08 **ppu1_final, S32 *pi4_final_stride)
5584
3.04M
{
5585
3.04M
    hme_qpel_interp_avg_1pt(ps_prms, i4_mv_x + 1, i4_mv_y, 2, ppu1_final, pi4_final_stride);
5586
5587
3.04M
    hme_qpel_interp_avg_1pt(ps_prms, i4_mv_x - 1, i4_mv_y, 0, ppu1_final, pi4_final_stride);
5588
3.04M
}
5589
5590
void hme_set_mv_limit_using_dvsr_data(
5591
    me_frm_ctxt_t *ps_ctxt,
5592
    layer_ctxt_t *ps_curr_layer,
5593
    range_prms_t *ps_mv_limit,
5594
    S16 *pi2_prev_enc_frm_max_mv_y,
5595
    U08 u1_num_act_ref_pics)
5596
85.3k
{
5597
85.3k
    WORD32 ref_ctr;
5598
5599
    /* Only for B/b pic. */
5600
85.3k
    if(1 == ps_ctxt->s_frm_prms.bidir_enabled)
5601
18.5k
    {
5602
18.5k
        WORD16 i2_mv_y_per_poc, i2_max_mv_y;
5603
18.5k
        WORD32 cur_poc, prev_poc, ref_poc, abs_poc_diff;
5604
18.5k
        WORD32 prev_poc_count = 0;
5605
18.5k
        WORD32 i4_p_idx;
5606
5607
18.5k
        pi2_prev_enc_frm_max_mv_y[0] = 0;
5608
5609
18.5k
        cur_poc = ps_ctxt->i4_curr_poc;
5610
5611
18.5k
        i4_p_idx = 0;
5612
5613
        /* Get abs MAX for symmetric search */
5614
18.5k
        i2_mv_y_per_poc = ps_curr_layer->i2_max_mv_y;
5615
        /* Assuming P to P distance as 4 */
5616
18.5k
        i2_mv_y_per_poc = (i2_mv_y_per_poc + 2) >> 2;
5617
5618
66.7k
        for(ref_ctr = 0; ref_ctr < u1_num_act_ref_pics; ref_ctr++)
5619
48.2k
        {
5620
            /* Get the prev. encoded frame POC */
5621
48.2k
            prev_poc = ps_ctxt->i4_prev_poc;
5622
5623
48.2k
            ref_poc = ps_ctxt->ai4_ref_idx_to_poc_lc[ref_ctr];
5624
48.2k
            abs_poc_diff = ABS((cur_poc - ref_poc));
5625
            /* Get the cur. max MV based on POC distance */
5626
48.2k
            i2_max_mv_y = i2_mv_y_per_poc * abs_poc_diff;
5627
48.2k
            i2_max_mv_y = MIN(i2_max_mv_y, ps_curr_layer->i2_max_mv_y);
5628
5629
48.2k
            ps_mv_limit[ref_ctr].i2_min_x = -ps_curr_layer->i2_max_mv_x;
5630
48.2k
            ps_mv_limit[ref_ctr].i2_min_y = -i2_max_mv_y;
5631
48.2k
            ps_mv_limit[ref_ctr].i2_max_x = ps_curr_layer->i2_max_mv_x;
5632
48.2k
            ps_mv_limit[ref_ctr].i2_max_y = i2_max_mv_y;
5633
5634
            /* Find the MAX MV for the prev. encoded frame to optimize */
5635
            /* the reverse dependency of ME on Enc.Loop                */
5636
48.2k
            if(ref_poc == prev_poc)
5637
12.7k
            {
5638
                /* TO DO : Same thing for horz. search also */
5639
12.7k
                pi2_prev_enc_frm_max_mv_y[0] = i2_max_mv_y;
5640
12.7k
                prev_poc_count++;
5641
12.7k
            }
5642
48.2k
        }
5643
18.5k
    }
5644
66.8k
    else
5645
66.8k
    {
5646
66.8k
        ASSERT(0 == ps_ctxt->s_frm_prms.u1_num_active_ref_l1);
5647
5648
        /* Set the Config. File Params for P pic. */
5649
196k
        for(ref_ctr = 0; ref_ctr < ps_ctxt->s_frm_prms.u1_num_active_ref_l0; ref_ctr++)
5650
129k
        {
5651
129k
            ps_mv_limit[ref_ctr].i2_min_x = -ps_curr_layer->i2_max_mv_x;
5652
129k
            ps_mv_limit[ref_ctr].i2_min_y = -ps_curr_layer->i2_max_mv_y;
5653
129k
            ps_mv_limit[ref_ctr].i2_max_x = ps_curr_layer->i2_max_mv_x;
5654
129k
            ps_mv_limit[ref_ctr].i2_max_y = ps_curr_layer->i2_max_mv_y;
5655
129k
        }
5656
5657
        /* For P PIC., go with  Config. File Params */
5658
66.8k
        pi2_prev_enc_frm_max_mv_y[0] = ps_curr_layer->i2_max_mv_y;
5659
66.8k
    }
5660
85.3k
}
5661
5662
S32 hme_part_mask_populator(
5663
    U08 *pu1_inp,
5664
    S32 i4_inp_stride,
5665
    U08 u1_limit_active_partitions,
5666
    U08 u1_is_bPic,
5667
    U08 u1_is_refPic,
5668
    U08 u1_blk_8x8_mask,
5669
    ME_QUALITY_PRESETS_T e_me_quality_preset)
5670
1.57M
{
5671
1.57M
    if(15 != u1_blk_8x8_mask)
5672
32.2k
    {
5673
32.2k
        return ENABLE_NxN;
5674
32.2k
    }
5675
1.54M
    else
5676
1.54M
    {
5677
1.54M
        U08 u1_call_inp_segmentation_based_part_mask_populator =
5678
1.54M
            (ME_XTREME_SPEED_25 != e_me_quality_preset) ||
5679
1.54M
            (!u1_is_bPic && !DISABLE_8X8CUS_IN_PPICS_IN_P6) ||
5680
1.54M
            (u1_is_bPic && u1_is_refPic && !DISABLE_8X8CUS_IN_REFBPICS_IN_P6) ||
5681
1.54M
            (u1_is_bPic && !u1_is_refPic && !DISABLE_8X8CUS_IN_NREFBPICS_IN_P6);
5682
5683
1.54M
        if(u1_call_inp_segmentation_based_part_mask_populator)
5684
1.49M
        {
5685
1.49M
            S32 i4_part_mask =
5686
1.49M
                hme_study_input_segmentation(pu1_inp, i4_inp_stride, u1_limit_active_partitions);
5687
5688
1.49M
            if(e_me_quality_preset == ME_XTREME_SPEED)
5689
163k
            {
5690
163k
                i4_part_mask &= ~ENABLE_AMP;
5691
163k
            }
5692
5693
1.49M
            if(e_me_quality_preset == ME_XTREME_SPEED_25)
5694
409k
            {
5695
409k
                i4_part_mask &= ~ENABLE_AMP;
5696
5697
409k
                i4_part_mask &= ~ENABLE_SMP;
5698
409k
            }
5699
5700
1.49M
            return i4_part_mask;
5701
1.49M
        }
5702
53.0k
        else
5703
53.0k
        {
5704
53.0k
            return ENABLE_2Nx2N;
5705
53.0k
        }
5706
1.54M
    }
5707
1.57M
}