Coverage Report

Created: 2025-08-26 06:31

/src/libhevc/encoder/hme_search_algo.c
Line
Count
Source (jump to first uncovered line)
1
/******************************************************************************
2
 *
3
 * Copyright (C) 2018 The Android Open Source Project
4
 *
5
 * Licensed under the Apache License, Version 2.0 (the "License");
6
 * you may not use this file except in compliance with the License.
7
 * You may obtain a copy of the License at:
8
 *
9
 * http://www.apache.org/licenses/LICENSE-2.0
10
 *
11
 * Unless required by applicable law or agreed to in writing, software
12
 * distributed under the License is distributed on an "AS IS" BASIS,
13
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
 * See the License for the specific language governing permissions and
15
 * limitations under the License.
16
 *
17
 *****************************************************************************
18
 * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
19
*/
20
/**
21
******************************************************************************
22
* @file hme_search_algo.c
23
*
24
* @brief
25
*    Contains various search algorithms to be used by coarse/refinement layers
26
*
27
* @author
28
*    Ittiam
29
*
30
*
31
* List of Functions
32
* hme_compute_grid_results_step_gt_1()
33
* hme_compute_grid_results_step_1()
34
* hme_pred_search_square_stepn()
35
*
36
******************************************************************************
37
*/
38
39
/*****************************************************************************/
40
/* File Includes                                                             */
41
/*****************************************************************************/
42
/* System include files */
43
#include <stdio.h>
44
#include <string.h>
45
#include <stdlib.h>
46
#include <assert.h>
47
#include <stdarg.h>
48
#include <math.h>
49
#include <limits.h>
50
51
/* User include files */
52
#include "ihevc_typedefs.h"
53
#include "itt_video_api.h"
54
#include "ihevce_api.h"
55
56
#include "rc_cntrl_param.h"
57
#include "rc_frame_info_collector.h"
58
#include "rc_look_ahead_params.h"
59
60
#include "ihevc_defs.h"
61
#include "ihevc_structs.h"
62
#include "ihevc_platform_macros.h"
63
#include "ihevc_deblk.h"
64
#include "ihevc_itrans_recon.h"
65
#include "ihevc_chroma_itrans_recon.h"
66
#include "ihevc_chroma_intra_pred.h"
67
#include "ihevc_intra_pred.h"
68
#include "ihevc_inter_pred.h"
69
#include "ihevc_mem_fns.h"
70
#include "ihevc_padding.h"
71
#include "ihevc_weighted_pred.h"
72
#include "ihevc_sao.h"
73
#include "ihevc_resi_trans.h"
74
#include "ihevc_quant_iquant_ssd.h"
75
#include "ihevc_cabac_tables.h"
76
77
#include "ihevce_defs.h"
78
#include "ihevce_lap_enc_structs.h"
79
#include "ihevce_multi_thrd_structs.h"
80
#include "ihevce_multi_thrd_funcs.h"
81
#include "ihevce_me_common_defs.h"
82
#include "ihevce_had_satd.h"
83
#include "ihevce_error_codes.h"
84
#include "ihevce_bitstream.h"
85
#include "ihevce_cabac.h"
86
#include "ihevce_rdoq_macros.h"
87
#include "ihevce_function_selector.h"
88
#include "ihevce_enc_structs.h"
89
#include "ihevce_entropy_structs.h"
90
#include "ihevce_cmn_utils_instr_set_router.h"
91
#include "ihevce_enc_loop_structs.h"
92
#include "ihevce_bs_compute_ctb.h"
93
#include "ihevce_global_tables.h"
94
#include "ihevce_dep_mngr_interface.h"
95
#include "hme_datatype.h"
96
#include "hme_interface.h"
97
#include "hme_common_defs.h"
98
#include "hme_defs.h"
99
#include "ihevce_me_instr_set_router.h"
100
#include "hme_globals.h"
101
#include "hme_utils.h"
102
#include "hme_coarse.h"
103
#include "hme_fullpel.h"
104
#include "hme_subpel.h"
105
#include "hme_refine.h"
106
#include "hme_err_compute.h"
107
#include "hme_common_utils.h"
108
#include "hme_search_algo.h"
109
#include "ihevce_stasino_helpers.h"
110
#include "ihevce_common_utils.h"
111
112
/*****************************************************************************/
113
/* Function Definitions                                                      */
114
/*****************************************************************************/
115
116
/**
117
********************************************************************************
118
*  @fn     void hme_compute_grid_results_step_1(err_prms_t *ps_err_prms,
119
result_upd_prms_t *ps_result_prms,
120
BLK_SIZE_T e_blk_size)
121
*
122
*  @brief  Updates results for a grid of step = 1
123
*
124
*  @param[in] ps_err_prms: Various parameters to this function
125
*
126
*  @param[in] ps_result_prms : Parameters pertaining to result updation
127
*
128
*  @param[out] e_blk_size: Block size of the blk being searched for
129
*
130
*  @return none
131
********************************************************************************
132
*/
133
void hme_compute_grid_results(
134
    err_prms_t *ps_err_prms, result_upd_prms_t *ps_result_prms, BLK_SIZE_T e_blk_size)
135
14.7M
{
136
14.7M
    PF_RESULT_FXN_T pf_hme_result_fxn;
137
14.7M
    PF_SAD_FXN_T pf_sad_fxn;
138
14.7M
    S32 i4_num_results;
139
14.7M
    S32 part_id;
140
141
14.7M
    part_id = ps_result_prms->pi4_valid_part_ids[0];
142
143
14.7M
    i4_num_results = (S32)ps_result_prms->ps_search_results->u1_num_results_per_part;
144
145
14.7M
    pf_sad_fxn = hme_get_sad_fxn(e_blk_size, ps_err_prms->i4_grid_mask, ps_err_prms->i4_part_mask);
146
147
14.7M
    pf_hme_result_fxn =
148
14.7M
        hme_get_result_fxn(ps_err_prms->i4_grid_mask, ps_err_prms->i4_part_mask, i4_num_results);
149
150
14.7M
    pf_sad_fxn(ps_err_prms);
151
14.7M
    pf_hme_result_fxn(ps_result_prms);
152
14.7M
}
153
154
/**
155
********************************************************************************
156
*  @fn     void hme_pred_search_square_stepn(hme_search_prms_t *ps_search_prms,
157
*                                   layer_ctxt_t *ps_layer_ctxt)
158
*
159
*  @brief  Implements predictive search, with square grid refinement. In this
160
*          case, we start with a bigger step size, like 4, refining upto a
161
*          variable number of pts, till we hit end of search range or hit a
162
*          minima. Then we refine using smaller steps. The bigger step size
163
*          like 4 or 2, do not use optimized SAD functions, they evaluate
164
*          SAD for each individual pt.
165
*
166
*  @param[in,out]  ps_search_prms: All the params to this function
167
*
168
*  @param[in] ps_layer_ctxt: Context for the layer
169
*
170
*  @return None
171
********************************************************************************
172
*/
173
void hme_pred_search_square_stepn(
174
    hme_search_prms_t *ps_search_prms,
175
    layer_ctxt_t *ps_layer_ctxt,
176
    wgt_pred_ctxt_t *ps_wt_inp_prms,
177
    ME_QUALITY_PRESETS_T e_me_quality_preset,
178
    ihevce_me_optimised_function_list_t *ps_me_optimised_function_list
179
180
)
181
6.64M
{
182
    /* Stores the SAD for all parts at each pt in the grid */
183
6.64M
    S32 ai4_sad_grid[9][TOT_NUM_PARTS];
184
185
6.64M
    S32 ai4_valid_part_ids[TOT_NUM_PARTS + 1];
186
187
    /* Atributes of input candidates */
188
6.64M
    search_candt_t *ps_search_candts;
189
6.64M
    search_node_t s_search_node;
190
191
    /* Number of candidates to search */
192
6.64M
    S32 i4_num_candts, max_num_iters, i4_num_results;
193
194
    /* Input and reference attributes */
195
6.64M
    S32 i4_inp_stride, i4_ref_stride, i4_ref_offset;
196
197
    /* The reference is actually an array of ptrs since there are several    */
198
    /* reference id. So an array gets passed form calling function           */
199
6.64M
    U08 **ppu1_ref;
200
201
    /* Holds the search results at the end of this fxn */
202
6.64M
    search_results_t *ps_search_results;
203
204
    /* These control number of parts and number of pts in grid to search */
205
6.64M
    S32 i4_part_mask, i4_grid_mask;
206
207
    /* Blk width, blk height and blk size are derived from input params */
208
6.64M
    BLK_SIZE_T e_blk_size;
209
6.64M
    CU_SIZE_T e_cu_size;
210
6.64M
    S32 i4_blk_wd, i4_blk_ht, i4_step, i4_candt, i4_iter;
211
6.64M
    S32 i4_inp_off;
212
6.64M
    S32 i4_min_id;
213
    /* Points to the range limits for mv */
214
6.64M
    range_prms_t *ps_range_prms;
215
216
    /*************************************************************************/
217
    /* These functions pointers for calculating Err and the result update    */
218
    /* Each carries its own parameters structure, which is generated on the  */
219
    /* fly in this function                                                  */
220
    /*************************************************************************/
221
6.64M
    err_prms_t s_err_prms;
222
6.64M
    result_upd_prms_t s_result_prms;
223
224
6.64M
    max_num_iters = ps_search_prms->i4_max_iters;
225
    /* Using the member 0 to store for all ref. idx., see in coarsest */
226
6.64M
    ps_range_prms = ps_search_prms->aps_mv_range[0];
227
6.64M
    i4_inp_stride = ps_search_prms->i4_inp_stride;
228
    /* Move to the location of the search blk in inp buffer */
229
6.64M
    i4_inp_off = ps_search_prms->i4_cu_x_off;
230
6.64M
    i4_inp_off += (ps_search_prms->i4_cu_y_off * i4_inp_stride);
231
232
6.64M
    ps_search_results = ps_search_prms->ps_search_results;
233
234
    /*************************************************************************/
235
    /* Depending on flag i4_use_rec, we use either input of previously       */
236
    /* encoded pictures or we use recon of previously encoded pictures.      */
237
    /*************************************************************************/
238
6.64M
    if(ps_search_prms->i4_use_rec == 1)
239
0
    {
240
0
        i4_ref_stride = ps_layer_ctxt->i4_rec_stride;
241
0
        ppu1_ref = ps_layer_ctxt->ppu1_list_rec_fxfy;
242
0
    }
243
6.64M
    else
244
6.64M
    {
245
6.64M
        i4_ref_stride = ps_layer_ctxt->i4_inp_stride;
246
6.64M
        ppu1_ref = ps_layer_ctxt->ppu1_list_inp;
247
6.64M
    }
248
6.64M
    i4_ref_offset = (i4_ref_stride * ps_search_prms->i4_y_off) + ps_search_prms->i4_x_off;
249
250
    /*************************************************************************/
251
    /* Obtain the blk size of the search blk. Assumed here that the search   */
252
    /* is done on a CU size, rather than any arbitrary blk size.             */
253
    /*************************************************************************/
254
6.64M
    ps_search_results = ps_search_prms->ps_search_results;
255
6.64M
    e_blk_size = ps_search_prms->e_blk_size;
256
6.64M
    i4_blk_wd = (S32)gau1_blk_size_to_wd[e_blk_size];
257
6.64M
    i4_blk_ht = (S32)gau1_blk_size_to_ht[e_blk_size];
258
6.64M
    e_cu_size = ps_search_results->e_cu_size;
259
6.64M
    i4_num_results = (S32)ps_search_results->u1_num_results_per_part;
260
261
6.64M
    ps_search_candts = ps_search_prms->ps_search_candts;
262
6.64M
    i4_num_candts = ps_search_prms->i4_num_init_candts;
263
6.64M
    i4_part_mask = ps_search_prms->i4_part_mask;
264
265
    /*************************************************************************/
266
    /* This array stores the ids of the partitions whose                     */
267
    /* SADs are updated. Since the partitions whose SADs are updated may not */
268
    /* be in contiguous order, we supply another level of indirection.       */
269
    /*************************************************************************/
270
6.64M
    hme_create_valid_part_ids(i4_part_mask, ai4_valid_part_ids);
271
272
    /* Update the parameters used to pass to SAD */
273
    /* input ptr, strides, SAD Grid, part mask, blk width and ht */
274
    /* The above are fixed ptrs, only pu1_ref and grid mask are  */
275
    /* varying params which are updated just before calling fxn  */
276
6.64M
    s_err_prms.i4_inp_stride = i4_inp_stride;
277
6.64M
    s_err_prms.i4_ref_stride = i4_ref_stride;
278
6.64M
    s_err_prms.i4_part_mask = i4_part_mask;
279
6.64M
    s_err_prms.pi4_sad_grid = &ai4_sad_grid[0][0];
280
6.64M
    s_err_prms.i4_blk_wd = i4_blk_wd;
281
6.64M
    s_err_prms.i4_blk_ht = i4_blk_ht;
282
6.64M
    s_err_prms.pi4_valid_part_ids = ai4_valid_part_ids;
283
284
6.64M
    s_result_prms.pf_mv_cost_compute = ps_search_prms->pf_mv_cost_compute;
285
6.64M
    s_result_prms.ps_search_results = ps_search_results;
286
6.64M
    s_result_prms.pi4_valid_part_ids = ai4_valid_part_ids;
287
6.64M
    s_result_prms.i1_ref_idx = ps_search_prms->i1_ref_idx;
288
6.64M
    s_result_prms.i4_part_mask = ps_search_prms->i4_part_mask;
289
6.64M
    s_result_prms.ps_search_node_base = &s_search_node;
290
6.64M
    s_result_prms.pi4_sad_grid = &ai4_sad_grid[0][0];
291
292
    /* Run through each of the candts in a loop */
293
34.6M
    for(i4_candt = 0; i4_candt < i4_num_candts; i4_candt++)
294
27.9M
    {
295
27.9M
        S32 i4_num_refine;
296
297
27.9M
        i4_step = ps_search_prms->i4_start_step;
298
299
27.9M
        s_search_node = *(ps_search_candts->ps_search_node);
300
301
        /* initialize minimum cost for this candidate. As we search around */
302
        /* this candidate, this is used to check early exit, when in any   */
303
        /* given iteration, the center pt of the grid is lowest value      */
304
27.9M
        s_result_prms.i4_min_cost = MAX_32BIT_VAL;
305
306
        /* If we need to do refinements, then we need to evaluate */
307
        /* neighbouring pts. Before doing so, we have to do       */
308
        /* basic range checks against max allowed mvs             */
309
27.9M
        i4_num_refine = ps_search_candts->u1_num_steps_refine;
310
311
27.9M
        CLIP_MV_WITHIN_RANGE(
312
27.9M
            s_search_node.s_mv.i2_mvx, s_search_node.s_mv.i2_mvy, ps_range_prms, 0, 0, 0);
313
314
        /* The first time, we search all 8 pts around init candt plus the init candt */
315
27.9M
        i4_grid_mask = 0x1ff;
316
27.9M
        s_err_prms.pu1_inp = ps_wt_inp_prms->apu1_wt_inp[s_search_node.i1_ref_idx] + i4_inp_off;
317
318
31.9M
        for(i4_iter = 0; i4_iter < max_num_iters; i4_iter++)
319
27.9M
        {
320
27.9M
            i4_grid_mask &= hme_clamp_grid_by_mvrange(&s_search_node, i4_step, ps_range_prms);
321
322
27.9M
            s_err_prms.i4_grid_mask = i4_grid_mask;
323
27.9M
            s_err_prms.pu1_ref = ppu1_ref[s_search_node.i1_ref_idx] + i4_ref_offset;
324
27.9M
            s_err_prms.pu1_ref +=
325
27.9M
                (s_search_node.s_mv.i2_mvx +
326
27.9M
                 (s_search_node.s_mv.i2_mvy * s_err_prms.i4_ref_stride));
327
328
27.9M
            s_result_prms.i4_step = i4_step;
329
27.9M
            s_err_prms.i4_step = i4_step;
330
27.9M
            s_result_prms.i4_grid_mask = i4_grid_mask;
331
332
            /* For Top,TopLeft and Left cand., get only center point SAD    */
333
            /* and do early exit                                            */
334
27.9M
            if(0 == i4_num_refine)
335
16.0M
            {
336
16.0M
                s_err_prms.i4_grid_mask = 0x1;
337
16.0M
                s_result_prms.i4_grid_mask = 0x1;
338
339
                /* sad pt fun. populates sad to 0th location, whereas update */
340
                /* fun. takes it based on part. id                           */
341
16.0M
                s_err_prms.pi4_sad_grid =
342
16.0M
                    s_result_prms.pi4_sad_grid + (1 * s_result_prms.pi4_valid_part_ids[0]);
343
344
16.0M
                ps_me_optimised_function_list->pf_evalsad_pt_npu_mxn_8bit(&s_err_prms);
345
346
16.0M
                s_err_prms.pi4_sad_grid = s_result_prms.pi4_sad_grid;
347
348
16.0M
                if(ME_XTREME_SPEED_25 == e_me_quality_preset)
349
0
                    hme_update_results_grid_pu_bestn_xtreme_speed(&s_result_prms);
350
16.0M
                else
351
16.0M
                    hme_update_results_grid_pu_bestn(&s_result_prms);
352
353
16.0M
                i4_min_id = (S32)PT_C; /* Center Point         */
354
16.0M
                i4_step = 0; /* No further refinment */
355
16.0M
                s_result_prms.i4_step = i4_step;
356
16.0M
                s_err_prms.i4_step = i4_step;
357
16.0M
            }
358
11.9M
            else
359
11.9M
            {
360
11.9M
                if(ME_XTREME_SPEED_25 == e_me_quality_preset)
361
1.30M
                {
362
1.30M
                    err_prms_t *ps_err_prms = &s_err_prms;
363
1.30M
                    ASSERT(ps_err_prms->i4_grid_mask != 1);
364
1.30M
                    ASSERT((ps_err_prms->i4_part_mask == 4) || (ps_err_prms->i4_part_mask == 16));
365
366
                    /*****************************************************************/
367
                    /* In this case, there are no partial updates. The blk can be    */
368
                    /* of any type and need not be a CU. The only thing that matters */
369
                    /* here is the width of the blk, 4/8/(>=16)                      */
370
                    /*****************************************************************/
371
1.30M
                    ps_me_optimised_function_list->pf_evalsad_grid_npu_MxN(&s_err_prms);
372
373
1.30M
                    hme_update_results_grid_pu_bestn_xtreme_speed(&s_result_prms);
374
1.30M
                }
375
10.6M
                else
376
10.6M
                {
377
                    /* Obtain SAD for all 9 pts in grid*/
378
10.6M
                    hme_compute_grid_results(&s_err_prms, &s_result_prms, e_blk_size);
379
10.6M
                }
380
381
                /* Early exit in case of centre being local minima */
382
11.9M
                i4_min_id = s_result_prms.i4_min_id;
383
11.9M
            }
384
385
27.9M
            i4_grid_mask = gai4_opt_grid_mask[i4_min_id];
386
387
27.9M
            s_search_node.s_mv.i2_mvx += (i4_step * gai1_grid_id_to_x[i4_min_id]);
388
27.9M
            s_search_node.s_mv.i2_mvy += (i4_step * gai1_grid_id_to_y[i4_min_id]);
389
27.9M
            if(i4_min_id == (S32)PT_C)
390
24.0M
                break;
391
27.9M
        }
392
393
        /* Next keep reducing stepsize by factor of 2 */
394
27.9M
        i4_step >>= 1;
395
33.4M
        while(i4_step)
396
5.43M
        {
397
5.43M
            i4_grid_mask = 0x1fe &
398
5.43M
                           hme_clamp_grid_by_mvrange(&s_search_node, i4_step, ps_range_prms);
399
            //i4_grid_mask &= 0x1fe;
400
401
5.43M
            s_err_prms.i4_grid_mask = i4_grid_mask;
402
5.43M
            s_result_prms.i4_grid_mask = i4_grid_mask;
403
5.43M
            s_err_prms.i4_step = i4_step;
404
5.43M
            s_result_prms.i4_step = i4_step;
405
5.43M
            s_err_prms.pu1_ref = ppu1_ref[s_search_node.i1_ref_idx] + i4_ref_offset;
406
5.43M
            s_err_prms.pu1_ref +=
407
5.43M
                (s_search_node.s_mv.i2_mvx +
408
5.43M
                 (s_search_node.s_mv.i2_mvy * s_err_prms.i4_ref_stride));
409
5.43M
            if(ME_XTREME_SPEED_25 == e_me_quality_preset)
410
1.30M
            {
411
1.30M
                err_prms_t *ps_err_prms = &s_err_prms;
412
1.30M
                ASSERT(ps_err_prms->i4_grid_mask != 1);
413
1.30M
                ASSERT((ps_err_prms->i4_part_mask == 4) || (ps_err_prms->i4_part_mask == 16));
414
415
                /*****************************************************************/
416
                /* In this case, there are no partial updates. The blk can be    */
417
                /* of any type and need not be a CU. The only thing that matters */
418
                /* here is the width of the blk, 4/8/(>=16)                      */
419
                /*****************************************************************/
420
1.30M
                ps_me_optimised_function_list->pf_evalsad_grid_npu_MxN(&s_err_prms);
421
422
1.30M
                hme_update_results_grid_pu_bestn_xtreme_speed(&s_result_prms);
423
1.30M
            }
424
4.12M
            else
425
4.12M
            {
426
4.12M
                hme_compute_grid_results(&s_err_prms, &s_result_prms, e_blk_size);
427
4.12M
            }
428
429
5.43M
            i4_min_id = s_result_prms.i4_min_id;
430
431
5.43M
            s_search_node.s_mv.i2_mvx += (i4_step * gai1_grid_id_to_x[i4_min_id]);
432
5.43M
            s_search_node.s_mv.i2_mvy += (i4_step * gai1_grid_id_to_y[i4_min_id]);
433
434
5.43M
            i4_step >>= 1;
435
5.43M
        }
436
437
27.9M
        ps_search_candts++;
438
27.9M
    }
439
6.64M
}
440
441
/**
442
********************************************************************************
443
*  @fn     hme_pred_search_square_step1(hme_search_prms_t *ps_search_prms,
444
*                               layer_ctxt_t *ps_layer_ctxt)
445
*
446
*  @brief  Implements predictive search with square grid refinement. In this
447
*           case, the square grid is of step 1 always. since this is considered
448
*           to be more of a refinement search
449
*
450
*  @param[in,out]  ps_search_prms: All the params to this function
451
*
452
*  @param[in] ps_layer_ctxt: All info about this layer
453
*
454
*  @return None
455
********************************************************************************
456
*/
457
/**
458
********************************************************************************
459
*  @fn     hme_pred_search(hme_search_prms_t *ps_search_prms,
460
*                               layer_ctxt_t *ps_layer_ctxt)
461
*
462
*  @brief  Implements predictive search after removing duplicate candidates
463
*          from initial list. Each square grid (of step 1) is expanded
464
*          to nine search pts before the dedeuplication process. one point
465
*          cost is then evaluated for each unique node after the deduplication
466
*          process
467
*
468
*  @param[in,out]  ps_search_prms: All the params to this function
469
*
470
*  @param[in] ps_layer_ctxt: All info about this layer
471
*
472
*  @return None
473
********************************************************************************
474
*/
475
void hme_pred_search(
476
    hme_search_prms_t *ps_search_prms,
477
    layer_ctxt_t *ps_layer_ctxt,
478
    wgt_pred_ctxt_t *ps_wt_inp_prms,
479
    S08 i1_grid_flag,
480
    ihevce_me_optimised_function_list_t *ps_me_optimised_function_list
481
482
)
483
3.88M
{
484
    /* Stores the SAD for all parts at each pt in the grid */
485
3.88M
    S32 ai4_sad_grid[9 * TOT_NUM_PARTS];
486
487
    /* Atributes of input candidates */
488
3.88M
    search_node_t *ps_search_node;
489
490
3.88M
    search_results_t *ps_search_results;
491
3.88M
    S32 i4_num_nodes, i4_candt;
492
493
    /* Input and reference attributes */
494
3.88M
    S32 i4_inp_stride, i4_ref_stride, i4_ref_offset;
495
496
    /* The reference is actually an array of ptrs since there are several    */
497
    /* reference id. So an array gets passed form calling function           */
498
3.88M
    U08 **ppu1_ref;
499
500
    /* These control number of parts and number of pts in grid to search */
501
3.88M
    S32 i4_part_mask, i4_grid_mask;
502
503
3.88M
    S32 shift_for_cu_size;
504
505
    /* Blk width, blk height and blk size are derived from input params */
506
3.88M
    BLK_SIZE_T e_blk_size;
507
3.88M
    CU_SIZE_T e_cu_size;
508
3.88M
    S32 i4_blk_wd, i4_blk_ht;
509
510
    /*************************************************************************/
511
    /* These functions pointers for calculating Err and the result update    */
512
    /* Each carries its own parameters structure, which is generated on the  */
513
    /* fly in this function                                                  */
514
    /*************************************************************************/
515
3.88M
    PF_RESULT_FXN_T pf_hme_result_fxn;
516
3.88M
    PF_SAD_FXN_T pf_sad_fxn;
517
3.88M
    PF_CALC_SAD_AND_RESULT pf_calc_sad_and_result;
518
3.88M
    err_prms_t s_err_prms;
519
3.88M
    result_upd_prms_t s_result_prms;
520
3.88M
    S32 i4_num_results;
521
3.88M
    S32 i4_inp_off;
522
3.88M
    fullpel_refine_ctxt_t *ps_fullpel_refine_ctxt = ps_search_prms->ps_fullpel_refine_ctxt;
523
524
3.88M
    i4_inp_stride = ps_search_prms->i4_inp_stride;
525
526
    /* Move to the location of the search blk in inp buffer */
527
3.88M
    i4_inp_off = ps_search_prms->i4_cu_x_off;
528
3.88M
    i4_inp_off += ps_search_prms->i4_cu_y_off * i4_inp_stride;
529
530
    /*************************************************************************/
531
    /* Depending on flag i4_use_rec, we use either input of previously       */
532
    /* encoded pictures or we use recon of previously encoded pictures.      */
533
    /*************************************************************************/
534
3.88M
    if(ps_search_prms->i4_use_rec == 1)
535
3.88M
    {
536
3.88M
        i4_ref_stride = ps_layer_ctxt->i4_rec_stride;
537
3.88M
        ppu1_ref = ps_layer_ctxt->ppu1_list_rec_fxfy;
538
3.88M
    }
539
0
    else
540
0
    {
541
0
        i4_ref_stride = ps_layer_ctxt->i4_rec_stride;
542
0
        ppu1_ref = ps_layer_ctxt->ppu1_list_inp;
543
0
    }
544
3.88M
    i4_ref_offset = (i4_ref_stride * ps_search_prms->i4_y_off) + ps_search_prms->i4_x_off;
545
    /* Obtain the blk size of the search blk. Assumed here that the search   */
546
    /* is done on a CU size, rather than any arbitrary blk size.             */
547
3.88M
    ps_search_results = ps_search_prms->ps_search_results;
548
3.88M
    e_blk_size = ps_search_prms->e_blk_size;
549
3.88M
    i4_blk_wd = gau1_blk_size_to_wd[e_blk_size];
550
3.88M
    i4_blk_ht = gau1_blk_size_to_ht[e_blk_size];
551
3.88M
    e_cu_size = ps_search_results->e_cu_size;
552
553
    /* Assuming cu size of 8x8 as enum 0, the other will be 1, 2, 3 */
554
    /* This will also set the shift w.r.t. the base cu size of 8x8 */
555
3.88M
    shift_for_cu_size = e_cu_size;
556
557
3.88M
    ps_search_node = ps_search_prms->ps_search_nodes;
558
3.88M
    i4_num_nodes = ps_search_prms->i4_num_search_nodes;
559
3.88M
    i4_part_mask = ps_search_prms->i4_part_mask;
560
561
    /* Update the parameters used to pass to SAD */
562
    /* input ptr, strides, SAD Grid, part mask, blk width and ht */
563
    /* The above are fixed ptrs, only pu1_ref and grid mask are  */
564
    /* varying params which are updated just before calling fxn  */
565
3.88M
    s_err_prms.i4_inp_stride = i4_inp_stride;
566
3.88M
    s_err_prms.i4_ref_stride = i4_ref_stride;
567
3.88M
    s_err_prms.i4_part_mask = i4_part_mask;
568
3.88M
    s_err_prms.pi4_sad_grid = &ai4_sad_grid[0];
569
3.88M
    s_err_prms.i4_blk_wd = i4_blk_wd;
570
3.88M
    s_err_prms.i4_blk_ht = i4_blk_ht;
571
3.88M
    s_err_prms.i4_step = 1;
572
3.88M
    s_err_prms.i4_num_partitions = ps_fullpel_refine_ctxt->i4_num_valid_parts;
573
574
3.88M
    s_result_prms.pf_mv_cost_compute = ps_search_prms->pf_mv_cost_compute;
575
3.88M
    s_result_prms.ps_search_results = ps_search_results;
576
3.88M
    s_result_prms.i1_ref_idx = (S08)ps_search_prms->i1_ref_idx;
577
3.88M
    s_result_prms.pi4_sad_grid = ai4_sad_grid;
578
3.88M
    s_result_prms.i4_part_mask = i4_part_mask;
579
3.88M
    s_result_prms.i4_step = 1;
580
3.88M
    pf_calc_sad_and_result = hme_get_calc_sad_and_result_fxn(
581
3.88M
        i1_grid_flag,
582
3.88M
        ps_search_prms->u1_is_cu_noisy,
583
3.88M
        i4_part_mask,
584
3.88M
        ps_fullpel_refine_ctxt->i4_num_valid_parts,
585
3.88M
        ps_search_results->u1_num_results_per_part);
586
587
3.88M
    pf_calc_sad_and_result(
588
3.88M
        ps_search_prms, ps_wt_inp_prms, &s_err_prms, &s_result_prms, ppu1_ref, i4_ref_stride);
589
3.88M
}
590
591
static __inline FT_CALC_SAD_AND_RESULT *hme_get_calc_sad_and_result_explicit_fxn(
592
    ihevce_me_optimised_function_list_t *ps_me_optimised_function_list,
593
    S32 i4_part_mask,
594
    S32 i4_num_partitions,
595
    S08 i1_grid_enable,
596
    U08 u1_num_results_per_part)
597
4.34M
{
598
4.34M
    FT_CALC_SAD_AND_RESULT *pf_func = NULL;
599
600
4.34M
    if(2 == u1_num_results_per_part)
601
1.33M
    {
602
1.33M
        if(i4_part_mask == 1)
603
800k
        {
604
800k
            ASSERT(i4_num_partitions == 1);
605
606
800k
            if(i1_grid_enable == 0)
607
384k
            {
608
384k
                pf_func =
609
384k
                    ps_me_optimised_function_list->pf_calc_pt_sad_and_2_best_results_explicit_8x8;
610
384k
            }
611
415k
            else
612
415k
            {
613
415k
                pf_func = ps_me_optimised_function_list
614
415k
                              ->pf_calc_pt_sad_and_2_best_results_explicit_8x8_for_grid;
615
415k
            }
616
800k
        }
617
535k
        else
618
535k
        {
619
535k
            ASSERT(i4_num_partitions == 5);
620
621
535k
            pf_func =
622
535k
                ps_me_optimised_function_list->pf_calc_pt_sad_and_2_best_results_explicit_8x8_4x4;
623
535k
        }
624
1.33M
    }
625
3.00M
    else if(1 == u1_num_results_per_part)
626
3.00M
    {
627
3.00M
        if(i4_part_mask == 1)
628
887k
        {
629
887k
            ASSERT(i4_num_partitions == 1);
630
631
887k
            if(i1_grid_enable == 0)
632
170k
            {
633
170k
                pf_func =
634
170k
                    ps_me_optimised_function_list->pf_calc_pt_sad_and_1_best_result_explicit_8x8;
635
170k
            }
636
717k
            else
637
717k
            {
638
717k
                pf_func = ps_me_optimised_function_list
639
717k
                              ->pf_calc_pt_sad_and_1_best_result_explicit_8x8_for_grid;
640
717k
            }
641
887k
        }
642
2.11M
        else
643
2.11M
        {
644
2.11M
            ASSERT(i4_num_partitions == 5);
645
646
2.11M
            pf_func =
647
2.11M
                ps_me_optimised_function_list->pf_calc_pt_sad_and_1_best_result_explicit_8x8_4x4;
648
2.11M
        }
649
3.00M
    }
650
651
4.34M
    return pf_func;
652
4.34M
}
653
654
/**
655
********************************************************************************
656
*  @fn     void hme_pred_search_no_encode(hme_search_prms_t *ps_search_prms,
657
*                                         layer_ctxt_t *ps_layer_ctxt,
658
*                                         wgt_pred_ctxt_t *ps_wt_inp_prms,
659
*                                         S32 *pi4_valid_part_ids,
660
*                                         S32 disable_refine,
661
*                                         ME_QUALITY_PRESETS_T e_me_quality_preset)
662
*
663
*  @brief  Implements predictive search after removing duplicate candidates
664
*          from initial list. Each square grid (of step 1) is expanded
665
*          to nine search pts before the dedeuplication process. one point
666
*          cost is then evaluated for each unique node after the deduplication
667
*          process
668
*
669
*  @param[in,out]  ps_search_prms: All the params to this function
670
*
671
*  @param[in] ps_layer_ctxt: All info about this layer
672
*
673
*  @return None
674
********************************************************************************
675
*/
676
void hme_pred_search_no_encode(
677
    hme_search_prms_t *ps_search_prms,
678
    layer_ctxt_t *ps_layer_ctxt,
679
    wgt_pred_ctxt_t *ps_wt_inp_prms,
680
    S32 *pi4_valid_part_ids,
681
    S32 disable_refine,
682
    ME_QUALITY_PRESETS_T e_me_quality_preset,
683
    S08 i1_grid_enable,
684
    ihevce_me_optimised_function_list_t *ps_me_optimised_function_list)
685
4.34M
{
686
    /* Stores the SAD for all parts at each pt in the grid */
687
4.34M
    S32 ai4_sad_grid[9 * TOT_NUM_PARTS];
688
689
    /* Atributes of input candidates */
690
4.34M
    search_node_t *ps_search_node;
691
4.34M
    search_results_t *ps_search_results;
692
4.34M
    S32 i4_num_nodes;
693
694
    /* Input and reference attributes */
695
4.34M
    S32 i4_inp_stride, i4_ref_stride, i4_ref_offset;
696
697
    /* The reference is actually an array of ptrs since there are several    */
698
    /* reference id. So an array gets passed form calling function           */
699
4.34M
    U08 **ppu1_ref;
700
701
    /* These control number of parts and number of pts in grid to search */
702
4.34M
    S32 i4_part_mask;  // i4_grid_mask;
703
704
4.34M
    S32 shift_for_cu_size;
705
    /* Blk width, blk height and blk size are derived from input params */
706
4.34M
    BLK_SIZE_T e_blk_size;
707
4.34M
    CU_SIZE_T e_cu_size;
708
4.34M
    S32 i4_blk_wd, i4_blk_ht;
709
710
    /*************************************************************************/
711
    /* These functions pointers for calculating Err and the result update    */
712
    /* Each carries its own parameters structure, which is generated on the  */
713
    /* fly in this function                                                  */
714
    /*************************************************************************/
715
4.34M
    PF_CALC_SAD_AND_RESULT pf_calc_sad_and_result;
716
4.34M
    err_prms_t s_err_prms;
717
4.34M
    result_upd_prms_t s_result_prms;
718
4.34M
    S32 i4_num_results;
719
4.34M
    S32 i4_search_idx = ps_search_prms->i1_ref_idx;
720
4.34M
    S32 i4_inp_off;
721
4.34M
    S32 i4_num_partitions;
722
723
4.34M
    i4_inp_stride = ps_search_prms->i4_inp_stride;
724
725
    /* Move to the location of the search blk in inp buffer */
726
4.34M
    i4_inp_off = ps_search_prms->i4_cu_x_off;
727
4.34M
    i4_inp_off += ps_search_prms->i4_cu_y_off * i4_inp_stride;
728
729
    /*************************************************************************/
730
    /* Depending on flag i4_use_rec, we use either input of previously       */
731
    /* encoded pictures or we use recon of previously encoded pictures.      */
732
    /*************************************************************************/
733
4.34M
    if(ps_search_prms->i4_use_rec == 1)
734
0
    {
735
0
        i4_ref_stride = ps_layer_ctxt->i4_rec_stride;
736
0
        ppu1_ref = ps_layer_ctxt->ppu1_list_rec_fxfy;
737
0
    }
738
4.34M
    else
739
4.34M
    {
740
4.34M
        i4_ref_stride = ps_layer_ctxt->i4_inp_stride;
741
4.34M
        ppu1_ref = ps_layer_ctxt->ppu1_list_inp;
742
4.34M
    }
743
4.34M
    i4_ref_offset = (i4_ref_stride * ps_search_prms->i4_y_off) + ps_search_prms->i4_x_off;
744
    /* Obtain the blk size of the search blk. Assumed here that the search   */
745
    /* is done on a CU size, rather than any arbitrary blk size.             */
746
4.34M
    ps_search_results = ps_search_prms->ps_search_results;
747
4.34M
    e_blk_size = ps_search_prms->e_blk_size;
748
4.34M
    i4_blk_wd = gau1_blk_size_to_wd[e_blk_size];
749
4.34M
    i4_blk_ht = gau1_blk_size_to_ht[e_blk_size];
750
4.34M
    e_cu_size = ps_search_results->e_cu_size;
751
752
    /* Assuming cu size of 8x8 as enum 0, the other will be 1, 2, 3 */
753
    /* This will also set the shift w.r.t. the base cu size of 8x8 */
754
4.34M
    shift_for_cu_size = e_cu_size;
755
756
4.34M
    ps_search_node = ps_search_prms->ps_search_nodes;
757
4.34M
    i4_num_nodes = ps_search_prms->i4_num_search_nodes;
758
4.34M
    i4_part_mask = ps_search_prms->i4_part_mask;
759
760
    /*************************************************************************/
761
    /* This array stores the ids of the partitions whose                     */
762
    /* SADs are updated. Since the partitions whose SADs are updated may not */
763
    /* be in contiguous order, we supply another level of indirection.       */
764
    /*************************************************************************/
765
4.34M
    i4_num_partitions = hme_create_valid_part_ids(i4_part_mask, pi4_valid_part_ids);
766
767
    /* Update the parameters used to pass to SAD */
768
    /* input ptr, strides, SAD Grid, part mask, blk width and ht */
769
    /* The above are fixed ptrs, only pu1_ref and grid mask are  */
770
    /* varying params which are updated just before calling fxn  */
771
4.34M
    s_err_prms.i4_inp_stride = i4_inp_stride;
772
4.34M
    s_err_prms.i4_ref_stride = i4_ref_stride;
773
4.34M
    s_err_prms.i4_part_mask = i4_part_mask;
774
4.34M
    s_err_prms.pi4_sad_grid = &ai4_sad_grid[0];
775
4.34M
    s_err_prms.i4_blk_wd = i4_blk_wd;
776
4.34M
    s_err_prms.i4_blk_ht = i4_blk_ht;
777
4.34M
    s_err_prms.i4_step = 1;
778
4.34M
    s_err_prms.pi4_valid_part_ids = pi4_valid_part_ids;
779
4.34M
    s_err_prms.i4_num_partitions = i4_num_partitions;
780
781
4.34M
    s_result_prms.pf_mv_cost_compute = ps_search_prms->pf_mv_cost_compute;
782
4.34M
    s_result_prms.ps_search_results = ps_search_results;
783
4.34M
    s_result_prms.pi4_valid_part_ids = pi4_valid_part_ids;
784
4.34M
    s_result_prms.i1_ref_idx = (S08)ps_search_prms->i1_ref_idx;
785
4.34M
    s_result_prms.pi4_sad_grid = ai4_sad_grid;
786
4.34M
    s_result_prms.i4_part_mask = i4_part_mask;
787
4.34M
    s_result_prms.i4_step = 1;
788
789
4.34M
    pf_calc_sad_and_result = hme_get_calc_sad_and_result_explicit_fxn(
790
4.34M
        ps_me_optimised_function_list,
791
4.34M
        i4_part_mask,
792
4.34M
        i4_num_partitions,
793
4.34M
        i1_grid_enable,
794
4.34M
        ps_search_results->u1_num_results_per_part);
795
796
4.34M
    pf_calc_sad_and_result(
797
4.34M
        ps_search_prms, ps_wt_inp_prms, &s_err_prms, &s_result_prms, ppu1_ref, i4_ref_stride);
798
4.34M
}