Coverage Report

Created: 2026-05-24 07:01

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/libhevc/encoder/hme_coarse.c
Line
Count
Source
1
/******************************************************************************
2
 *
3
 * Copyright (C) 2018 The Android Open Source Project
4
 *
5
 * Licensed under the Apache License, Version 2.0 (the "License");
6
 * you may not use this file except in compliance with the License.
7
 * You may obtain a copy of the License at:
8
 *
9
 * http://www.apache.org/licenses/LICENSE-2.0
10
 *
11
 * Unless required by applicable law or agreed to in writing, software
12
 * distributed under the License is distributed on an "AS IS" BASIS,
13
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
 * See the License for the specific language governing permissions and
15
 * limitations under the License.
16
 *
17
 *****************************************************************************
18
 * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
19
*/
20
21
/**
22
******************************************************************************
23
* @file hme_coarse.c
24
*
25
* @brief
26
*    Contains ME algorithm for the coarse layer.
27
*
28
* @author
29
*    Ittiam
30
*
31
*
32
* List of Functions
33
* hme_update_mv_bank_coarse()
34
* hme_coarse()
35
******************************************************************************
36
*/
37
38
/*****************************************************************************/
39
/* File Includes                                                             */
40
/*****************************************************************************/
41
/* System include files */
42
#include <stdio.h>
43
#include <string.h>
44
#include <stdlib.h>
45
#include <assert.h>
46
#include <stdarg.h>
47
#include <math.h>
48
#include <limits.h>
49
50
/* User include files */
51
#include "ihevc_typedefs.h"
52
#include "itt_video_api.h"
53
#include "ihevce_api.h"
54
55
#include "rc_cntrl_param.h"
56
#include "rc_frame_info_collector.h"
57
#include "rc_look_ahead_params.h"
58
59
#include "ihevc_defs.h"
60
#include "ihevc_structs.h"
61
#include "ihevc_platform_macros.h"
62
#include "ihevc_deblk.h"
63
#include "ihevc_itrans_recon.h"
64
#include "ihevc_chroma_itrans_recon.h"
65
#include "ihevc_chroma_intra_pred.h"
66
#include "ihevc_intra_pred.h"
67
#include "ihevc_inter_pred.h"
68
#include "ihevc_mem_fns.h"
69
#include "ihevc_padding.h"
70
#include "ihevc_weighted_pred.h"
71
#include "ihevc_sao.h"
72
#include "ihevc_resi_trans.h"
73
#include "ihevc_quant_iquant_ssd.h"
74
#include "ihevc_cabac_tables.h"
75
76
#include "ihevce_defs.h"
77
#include "ihevce_lap_enc_structs.h"
78
#include "ihevce_multi_thrd_structs.h"
79
#include "ihevce_multi_thrd_funcs.h"
80
#include "ihevce_me_common_defs.h"
81
#include "ihevce_had_satd.h"
82
#include "ihevce_error_codes.h"
83
#include "ihevce_bitstream.h"
84
#include "ihevce_cabac.h"
85
#include "ihevce_rdoq_macros.h"
86
#include "ihevce_function_selector.h"
87
#include "ihevce_enc_structs.h"
88
#include "ihevce_entropy_structs.h"
89
#include "ihevce_cmn_utils_instr_set_router.h"
90
#include "ihevce_enc_loop_structs.h"
91
#include "ihevce_bs_compute_ctb.h"
92
#include "ihevce_global_tables.h"
93
#include "ihevce_dep_mngr_interface.h"
94
#include "hme_datatype.h"
95
#include "hme_interface.h"
96
#include "hme_common_defs.h"
97
#include "hme_defs.h"
98
#include "ihevce_me_instr_set_router.h"
99
#include "hme_globals.h"
100
#include "hme_utils.h"
101
#include "hme_coarse.h"
102
#include "hme_refine.h"
103
#include "hme_err_compute.h"
104
#include "hme_common_utils.h"
105
#include "hme_search_algo.h"
106
107
/*******************************************************************************
108
*                             MACROS
109
*******************************************************************************/
110
#define COPY_SEARCH_RESULT(ps_mv, pi1_ref_idx, ps_search_node, shift)                              \
111
557k
    {                                                                                              \
112
557k
        ps_mv->i2_mv_x = ps_search_node->s_mv.i2_mvx >> (shift);                                   \
113
557k
        ps_mv->i2_mv_y = ps_search_node->s_mv.i2_mvy >> (shift);                                   \
114
557k
        *pi1_ref_idx = ps_search_node->i1_ref_idx;                                                 \
115
557k
    }
116
117
/*****************************************************************************/
118
/* Function Definitions                                                      */
119
/*****************************************************************************/
120
121
/**
122
********************************************************************************
123
*  @fn     void hme_update_mv_bank_coarse(search_results_t *ps_search_results,
124
*                                   layer_mv_t *ps_layer_mv,
125
*                                   S32 i4_blk_x,
126
*                                   S32 i4_blk_y,
127
*                                   search_node_t *ps_search_node_4x8_l,
128
*                                   search_node_t *ps_search_node_8x4_t,
129
*                                   S08 i1_ref_idx,
130
*                                   mvbank_update_prms_t *ps_prms
131
*
132
*  @brief  Updates the coarse layer MV Bank for a given ref id and blk pos
133
*
134
*  @param[in]  ps_search_results: Search results data structure
135
*
136
*  @param[in, out]  ps_layer_mv : MV Bank for this layer
137
*
138
*  @param[in]  i4_search_blk_x: column number of the 4x4 blk searched
139
*
140
*  @param[in]  i4_search_blk_y: row number of the 4x4 blk searched
141
*
142
*  @param[in]  ps_search_node_4x8_t: Best MV of the 4x8T blk
143
*
144
*  @param[in]  ps_search_node_8x4_l: Best MV of the 8x4L blk
145
*
146
*  @param[in]  i1_ref_idx : Reference ID that has been searched
147
*
148
*  @param[in]  ps_prms : Parameters pertaining to the MV Bank update
149
*
150
*  @return None
151
********************************************************************************
152
*/
153
void hme_update_mv_bank_coarse(
154
    search_results_t *ps_search_results,
155
    layer_mv_t *ps_layer_mv,
156
    S32 i4_search_blk_x,
157
    S32 i4_search_blk_y,
158
    search_node_t *ps_search_node_4x8_t,
159
    search_node_t *ps_search_node_8x4_l,
160
    S08 i1_ref_idx,
161
    mvbank_update_prms_t *ps_prms)
162
139k
{
163
    /* These point to the MV and ref idx posn to be udpated */
164
139k
    hme_mv_t *ps_mv;
165
139k
    S08 *pi1_ref_idx;
166
167
    /* Offset within the bank */
168
139k
    S32 i4_offset;
169
170
139k
    S32 i, j, i4_blk_x, i4_blk_y;
171
172
    /* Best results for 8x4R and 4x8B blocks */
173
139k
    search_node_t *ps_search_node_8x4_r, *ps_search_node_4x8_b;
174
175
    /* Number of MVs in a block */
176
139k
    S32 num_mvs = ps_layer_mv->i4_num_mvs_per_ref;
177
178
139k
    search_node_t *aps_search_nodes[4];
179
180
    /* The search blk may be different in size from the blk used to hold MV */
181
139k
    i4_blk_x = i4_search_blk_x << ps_prms->i4_shift;
182
139k
    i4_blk_y = i4_search_blk_y << ps_prms->i4_shift;
183
184
    /* Compute the offset in the MV bank */
185
139k
    i4_offset = i4_blk_x + i4_blk_y * ps_layer_mv->i4_num_blks_per_row;
186
139k
    i4_offset *= ps_layer_mv->i4_num_mvs_per_blk;
187
188
    /* Identify the correct offset in the mvbank and the reference id buf */
189
139k
    ps_mv = ps_layer_mv->ps_mv + (i4_offset + (num_mvs * i1_ref_idx));
190
139k
    pi1_ref_idx = ps_layer_mv->pi1_ref_idx + (i4_offset + (num_mvs * i1_ref_idx));
191
192
    /*************************************************************************/
193
    /* We have atleast 4 distinct results: the 4x8 top (coming from top blk) */
194
    /* 8x4 left (coming from left blk), 8x4 and 4x8 right and bot resp.      */
195
    /* If number of results to be stored is 4, then we store all these 4     */
196
    /* results, else we pick best ones                                       */
197
    /*************************************************************************/
198
139k
    ps_search_node_8x4_r = ps_search_results->aps_part_results[i1_ref_idx][PART_ID_2NxN_B];
199
139k
    ps_search_node_4x8_b = ps_search_results->aps_part_results[i1_ref_idx][PART_ID_Nx2N_R];
200
201
139k
    ASSERT(num_mvs <= 4);
202
203
    /* Doing this to sort best results */
204
139k
    aps_search_nodes[0] = ps_search_node_8x4_r;
205
139k
    aps_search_nodes[1] = ps_search_node_4x8_b;
206
139k
    aps_search_nodes[2] = ps_search_node_8x4_l;
207
139k
    aps_search_nodes[3] = ps_search_node_4x8_t;
208
139k
    if(num_mvs == 4)
209
139k
    {
210
139k
        COPY_SEARCH_RESULT(ps_mv, pi1_ref_idx, aps_search_nodes[0], 0);
211
139k
        ps_mv++;
212
139k
        pi1_ref_idx++;
213
139k
        COPY_SEARCH_RESULT(ps_mv, pi1_ref_idx, aps_search_nodes[1], 0);
214
139k
        ps_mv++;
215
139k
        pi1_ref_idx++;
216
139k
        COPY_SEARCH_RESULT(ps_mv, pi1_ref_idx, aps_search_nodes[2], 0);
217
139k
        ps_mv++;
218
139k
        pi1_ref_idx++;
219
139k
        COPY_SEARCH_RESULT(ps_mv, pi1_ref_idx, aps_search_nodes[3], 0);
220
139k
        ps_mv++;
221
139k
        pi1_ref_idx++;
222
139k
        return;
223
139k
    }
224
225
    /* Run through the results, store them in best to worst order */
226
0
    for(i = 0; i < num_mvs; i++)
227
0
    {
228
0
        for(j = i + 1; j < 4; j++)
229
0
        {
230
0
            if(aps_search_nodes[j]->i4_tot_cost < aps_search_nodes[i]->i4_tot_cost)
231
0
            {
232
0
                SWAP_HME(aps_search_nodes[j], aps_search_nodes[i], search_node_t *);
233
0
            }
234
0
        }
235
0
        COPY_SEARCH_RESULT(ps_mv, pi1_ref_idx, aps_search_nodes[i], 0);
236
0
        ps_mv++;
237
0
        pi1_ref_idx++;
238
0
    }
239
0
}
240
241
/**
242
********************************************************************************
243
*  @fn     void hme_coarse_frm_init(me_ctxt_t *ps_ctxt, coarse_prms_t *ps_coarse_prms)
244
*
245
*  @brief  Frame init entry point Coarse ME.
246
*
247
*  @param[in,out]  ps_ctxt: ME Handle
248
*
249
*  @param[in]  ps_coarse_prms : Coarse layer config params
250
*
251
*  @return None
252
********************************************************************************
253
*/
254
void hme_coarse_frm_init(coarse_me_ctxt_t *ps_ctxt, coarse_prms_t *ps_coarse_prms)
255
2.03k
{
256
2.03k
    layer_ctxt_t *ps_curr_layer;
257
258
2.03k
    S32 i4_pic_wd, i4_pic_ht;
259
260
2.03k
    S32 num_blks_in_pic, num_blks_in_row;
261
262
2.03k
    BLK_SIZE_T e_search_blk_size = BLK_4x4;
263
264
2.03k
    S32 blk_size_shift = 2, blk_wd = 4, blk_ht = 4;
265
266
    /* Number of references to search */
267
2.03k
    S32 i4_num_ref;
268
269
2.03k
    ps_curr_layer = ps_ctxt->ps_curr_descr->aps_layers[ps_coarse_prms->i4_layer_id];
270
2.03k
    i4_num_ref = ps_coarse_prms->i4_num_ref;
271
272
2.03k
    i4_pic_wd = ps_curr_layer->i4_wd;
273
2.03k
    i4_pic_ht = ps_curr_layer->i4_ht;
274
    /* Macro updates num_blks_in_pic and num_blks_in_row*/
275
2.03k
    GET_NUM_BLKS_IN_PIC(i4_pic_wd, i4_pic_ht, blk_size_shift, num_blks_in_row, num_blks_in_pic);
276
277
    /************************************************************************/
278
    /* Initialize the mv bank that holds results of this layer.             */
279
    /************************************************************************/
280
2.03k
    hme_init_mv_bank(
281
2.03k
        ps_curr_layer,
282
2.03k
        BLK_4x4,
283
2.03k
        i4_num_ref,
284
2.03k
        ps_coarse_prms->num_results,
285
2.03k
        ps_ctxt->u1_encode[ps_coarse_prms->i4_layer_id]);
286
287
2.03k
    return;
288
2.03k
}
289
290
/**
291
********************************************************************************
292
*  @fn    void hme_derive_worst_case_search_range(range_prms_t *ps_range,
293
*                                   range_prms_t *ps_pic_limit,
294
*                                   range_prms_t *ps_mv_limit,
295
*                                   S32 i4_x,
296
*                                   S32 i4_y,
297
*                                   S32 blk_wd,
298
*                                   S32 blk_ht)
299
*
300
*  @brief  given picture limits and blk dimensions and mv search limits, obtains
301
*          teh valid search range such that the blk stays within pic boundaries,
302
*          where picture boundaries include padded portions of picture
303
*
304
*  @param[out] ps_range: updated with actual search range
305
*
306
*  @param[in] ps_pic_limit : picture boundaries
307
*
308
*  @param[in] ps_mv_limit: Search range limits for the mvs
309
*
310
*  @param[in] i4_x : x coordinate of the blk
311
*
312
*  @param[in] i4_y : y coordinate of the blk
313
*
314
*  @param[in] blk_wd : blk width
315
*
316
*  @param[in] blk_ht : blk height
317
*
318
*  @return void
319
********************************************************************************
320
*/
321
void hme_derive_worst_case_search_range(
322
    range_prms_t *ps_range,
323
    range_prms_t *ps_pic_limit,
324
    range_prms_t *ps_mv_limit,
325
    S32 i4_x,
326
    S32 i4_y,
327
    S32 blk_wd,
328
    S32 blk_ht)
329
195k
{
330
    /* Taking max x of left block, min x of current block */
331
195k
    ps_range->i2_max_x =
332
195k
        MIN((ps_pic_limit->i2_max_x - (S16)blk_wd - (S16)(i4_x - 4)), ps_mv_limit->i2_max_x);
333
195k
    ps_range->i2_min_x = MAX((ps_pic_limit->i2_min_x - (S16)i4_x), ps_mv_limit->i2_min_x);
334
    /* Taking max y of top block, min y of current block */
335
195k
    ps_range->i2_max_y =
336
195k
        MIN((ps_pic_limit->i2_max_y - (S16)blk_ht - (S16)(i4_y - 4)), ps_mv_limit->i2_max_y);
337
195k
    ps_range->i2_min_y = MAX((ps_pic_limit->i2_min_y - (S16)i4_y), ps_mv_limit->i2_min_y);
338
195k
}
339
340
/**
341
********************************************************************************
342
* @fn void hme_combine_4x4_sads_and_compute_cost(S08 i1_ref_idx,
343
*                                           range_prms_t *ps_mv_range,
344
*                                           range_prms_t *ps_mv_limit,
345
*                                           hme_mv_t *ps_best_mv_4x8,
346
*                                           hme_mv_t *ps_best_mv_8x4,
347
*                                           pred_ctxt_t *ps_pred_ctxt,
348
*                                           PF_MV_COST_FXN pf_mv_cost_compute,
349
*                                           ME_QUALITY_PRESETS_T e_me_quality_preset,
350
*                                           S16 *pi2_sads_4x4_current,
351
*                                           S16 *pi2_sads_4x4_east,
352
*                                           S16 *pi2_sads_4x4_south,
353
*                                           FILE *fp_dump_sad)
354
*
355
*  @brief  Does a full search on entire srch window with a given step size in coarse layer
356
*
357
*  @param[in] i1_ref_idx : Cur ref idx
358
*
359
*  @param[in] ps_layer_ctxt: All info about this layer
360
*
361
*  @param[out] ps_best_mv  : type hme_mv_t contains best mv x and y
362
*
363
*  @param[in] ps_pred_ctxt : Prediction ctxt for cost computation
364
*
365
*  @param[in] pf_mv_cost_compute : mv cost computation function
366
*
367
*  @return void
368
********************************************************************************
369
*/
370
void hme_combine_4x4_sads_and_compute_cost_high_quality(
371
    S08 i1_ref_idx,
372
    range_prms_t *ps_mv_range,
373
    range_prms_t *ps_mv_limit,
374
    hme_mv_t *ps_best_mv_4x8,
375
    hme_mv_t *ps_best_mv_8x4,
376
    pred_ctxt_t *ps_pred_ctxt,
377
    PF_MV_COST_FXN pf_mv_cost_compute,
378
    S16 *pi2_sads_4x4_current,
379
    S16 *pi2_sads_4x4_east,
380
    S16 *pi2_sads_4x4_south)
381
46.4k
{
382
    /* These control number of parts and number of pts in grid to search */
383
46.4k
    S32 stepy, stepx, best_mv_y_4x8, best_mv_x_4x8, best_mv_y_8x4, best_mv_x_8x4;
384
46.4k
    S32 step_shift_x, step_shift_y;
385
46.4k
    S32 mvx, mvy, mv_x_offset, mv_y_offset, mv_x_range, mv_y_range;
386
387
46.4k
    S32 min_cost_4x8 = MAX_32BIT_VAL;
388
46.4k
    S32 min_cost_8x4 = MAX_32BIT_VAL;
389
390
46.4k
    search_node_t s_search_node;
391
46.4k
    s_search_node.i1_ref_idx = i1_ref_idx;
392
393
46.4k
    stepx = stepy = HME_COARSE_STEP_SIZE_HIGH_QUALITY;
394
    /*TODO: Calculate Step shift from the #define HME_COARSE_STEP_SIZE_HIGH_QUALITY */
395
46.4k
    step_shift_x = step_shift_y = 1;
396
397
46.4k
    mv_x_offset = (-ps_mv_limit->i2_min_x >> step_shift_x);
398
46.4k
    mv_y_offset = (-ps_mv_limit->i2_min_y >> step_shift_y);
399
46.4k
    mv_x_range = (-ps_mv_limit->i2_min_x + ps_mv_limit->i2_max_x) >> step_shift_x;
400
46.4k
    mv_y_range = (-ps_mv_limit->i2_min_y + ps_mv_limit->i2_max_y) >> step_shift_y;
401
402
    /* Run 2loops to sweep over the reference area */
403
990k
    for(mvy = ps_mv_range->i2_min_y; mvy < ps_mv_range->i2_max_y; mvy += stepy)
404
943k
    {
405
23.8M
        for(mvx = ps_mv_range->i2_min_x; mvx < ps_mv_range->i2_max_x; mvx += stepx)
406
22.8M
        {
407
22.8M
            S32 sad_4x8, cost_4x8, sad_8x4, cost_8x4;
408
22.8M
            S32 sad_pos = ((mvx >> step_shift_x) + mv_x_offset) +
409
22.8M
                          ((mvy >> step_shift_y) + mv_y_offset) * mv_x_range;
410
411
            /* Get SAD by adding SAD for current and neighbour S  */
412
22.8M
            sad_4x8 = pi2_sads_4x4_current[sad_pos] + pi2_sads_4x4_south[sad_pos];
413
22.8M
            sad_8x4 = pi2_sads_4x4_current[sad_pos] + pi2_sads_4x4_east[sad_pos];
414
415
            //          fprintf(fp_dump_sad,"%d\t",sad);
416
22.8M
            s_search_node.s_mv.i2_mvx = mvx;
417
22.8M
            s_search_node.s_mv.i2_mvy = mvy;
418
419
22.8M
            cost_4x8 = cost_8x4 =
420
22.8M
                pf_mv_cost_compute(&s_search_node, ps_pred_ctxt, PART_ID_2Nx2N, MV_RES_FPEL);
421
422
22.8M
            cost_4x8 += sad_4x8;
423
22.8M
            cost_8x4 += sad_8x4;
424
425
22.8M
            if(cost_4x8 < min_cost_4x8)
426
483k
            {
427
483k
                best_mv_x_4x8 = mvx;
428
483k
                best_mv_y_4x8 = mvy;
429
483k
                min_cost_4x8 = cost_4x8;
430
483k
            }
431
22.8M
            if(cost_8x4 < min_cost_8x4)
432
502k
            {
433
502k
                best_mv_x_8x4 = mvx;
434
502k
                best_mv_y_8x4 = mvy;
435
502k
                min_cost_8x4 = cost_8x4;
436
502k
            }
437
22.8M
        }
438
943k
    }
439
440
46.4k
    ps_best_mv_4x8->i2_mv_x = best_mv_x_4x8;
441
46.4k
    ps_best_mv_4x8->i2_mv_y = best_mv_y_4x8;
442
443
46.4k
    ps_best_mv_8x4->i2_mv_x = best_mv_x_8x4;
444
46.4k
    ps_best_mv_8x4->i2_mv_y = best_mv_y_8x4;
445
46.4k
}
446
447
void hme_combine_4x4_sads_and_compute_cost_high_speed(
448
    S08 i1_ref_idx,
449
    range_prms_t *ps_mv_range,
450
    range_prms_t *ps_mv_limit,
451
    hme_mv_t *ps_best_mv_4x8,
452
    hme_mv_t *ps_best_mv_8x4,
453
    pred_ctxt_t *ps_pred_ctxt,
454
    PF_MV_COST_FXN pf_mv_cost_compute,
455
    S16 *pi2_sads_4x4_current,
456
    S16 *pi2_sads_4x4_east,
457
    S16 *pi2_sads_4x4_south)
458
92.7k
{
459
    /* These control number of parts and number of pts in grid to search */
460
92.7k
    S32 stepy, stepx, best_mv_y_4x8, best_mv_x_4x8, best_mv_y_8x4, best_mv_x_8x4;
461
92.7k
    S32 step_shift_x, step_shift_y;
462
92.7k
    S32 mvx, mvy, mv_x_offset, mv_y_offset, mv_x_range, mv_y_range;
463
464
92.7k
    S32 rnd, lambda, lambda_q_shift;
465
466
92.7k
    S32 min_cost_4x8 = MAX_32BIT_VAL;
467
92.7k
    S32 min_cost_8x4 = MAX_32BIT_VAL;
468
469
92.7k
    (void)pf_mv_cost_compute;
470
92.7k
    stepx = stepy = HME_COARSE_STEP_SIZE_HIGH_SPEED;
471
    /*TODO: Calculate Step shift from the #define HME_COARSE_STEP_SIZE_HIGH_SPEED */
472
92.7k
    step_shift_x = step_shift_y = 2;
473
474
92.7k
    mv_x_offset = (-ps_mv_limit->i2_min_x >> step_shift_x);
475
92.7k
    mv_y_offset = (-ps_mv_limit->i2_min_y >> step_shift_y);
476
92.7k
    mv_x_range = (-ps_mv_limit->i2_min_x + ps_mv_limit->i2_max_x) >> step_shift_x;
477
92.7k
    mv_y_range = (-ps_mv_limit->i2_min_y + ps_mv_limit->i2_max_y) >> step_shift_y;
478
479
92.7k
    lambda = ps_pred_ctxt->lambda;
480
92.7k
    lambda_q_shift = ps_pred_ctxt->lambda_q_shift;
481
92.7k
    rnd = 1 << (lambda_q_shift - 1);
482
483
92.7k
    ASSERT(MAX_MVX_SUPPORTED_IN_COARSE_LAYER >= ABS(ps_mv_range->i2_max_x));
484
92.7k
    ASSERT(MAX_MVY_SUPPORTED_IN_COARSE_LAYER >= ABS(ps_mv_range->i2_max_y));
485
486
    /* Run 2loops to sweep over the reference area */
487
964k
    for(mvy = ps_mv_range->i2_min_y; mvy < ps_mv_range->i2_max_y; mvy += stepy)
488
871k
    {
489
13.3M
        for(mvx = ps_mv_range->i2_min_x; mvx < ps_mv_range->i2_max_x; mvx += stepx)
490
12.4M
        {
491
12.4M
            S32 sad_4x8, cost_4x8, sad_8x4, cost_8x4;
492
493
12.4M
            S32 sad_pos = ((mvx >> step_shift_x) + mv_x_offset) +
494
12.4M
                          ((mvy >> step_shift_y) + mv_y_offset) * mv_x_range;
495
496
            /* Get SAD by adding SAD for current and neighbour S  */
497
12.4M
            sad_4x8 = pi2_sads_4x4_current[sad_pos] + pi2_sads_4x4_south[sad_pos];
498
12.4M
            sad_8x4 = pi2_sads_4x4_current[sad_pos] + pi2_sads_4x4_east[sad_pos];
499
500
            //          fprintf(fp_dump_sad,"%d\t",sad);
501
502
12.4M
            cost_4x8 = cost_8x4 =
503
12.4M
                (2 * hme_get_range(ABS(mvx)) - 1) + (2 * hme_get_range(ABS(mvy)) - 1) + i1_ref_idx;
504
505
12.4M
            cost_4x8 += (mvx != 0) ? 1 : 0;
506
12.4M
            cost_4x8 += (mvy != 0) ? 1 : 0;
507
12.4M
            cost_4x8 = (cost_4x8 * lambda + rnd) >> lambda_q_shift;
508
509
12.4M
            cost_8x4 += (mvx != 0) ? 1 : 0;
510
12.4M
            cost_8x4 += (mvy != 0) ? 1 : 0;
511
12.4M
            cost_8x4 = (cost_8x4 * lambda + rnd) >> lambda_q_shift;
512
513
12.4M
            cost_4x8 += sad_4x8;
514
12.4M
            cost_8x4 += sad_8x4;
515
516
12.4M
            if(cost_4x8 < min_cost_4x8)
517
803k
            {
518
803k
                best_mv_x_4x8 = mvx;
519
803k
                best_mv_y_4x8 = mvy;
520
803k
                min_cost_4x8 = cost_4x8;
521
803k
            }
522
12.4M
            if(cost_8x4 < min_cost_8x4)
523
778k
            {
524
778k
                best_mv_x_8x4 = mvx;
525
778k
                best_mv_y_8x4 = mvy;
526
778k
                min_cost_8x4 = cost_8x4;
527
778k
            }
528
12.4M
        }
529
871k
    }
530
531
92.7k
    ps_best_mv_4x8->i2_mv_x = best_mv_x_4x8;
532
92.7k
    ps_best_mv_4x8->i2_mv_y = best_mv_y_4x8;
533
534
92.7k
    ps_best_mv_8x4->i2_mv_x = best_mv_x_8x4;
535
92.7k
    ps_best_mv_8x4->i2_mv_y = best_mv_y_8x4;
536
92.7k
}
537
538
/**
539
********************************************************************************
540
*  @fn     hme_store_4x4_sads(hme_search_prms_t *ps_search_prms,
541
*                               layer_ctxt_t *ps_layer_ctxt)
542
*
543
*  @brief  Does a 4x4 sad computation on a given range and stores it in memory
544
*
545
*  @param[in] ps_search_prms : Search prms structure containing info like
546
*               blk dimensions, search range etc
547
*
548
*  @param[in] ps_layer_ctxt: All info about this layer
549
*
550
*  @param[in] ps_wt_inp_prms: All info about weighted input
551
*
552
*  @param[in] e_me_quality_preset: motion estimation quality preset
553
*
554
*  @param[in] pi2_sads_4x4: Memory to store all 4x4 SADs for given range
555
*
556
*  @return void
557
********************************************************************************
558
*/
559
560
void hme_store_4x4_sads_high_quality(
561
    hme_search_prms_t *ps_search_prms,
562
    layer_ctxt_t *ps_layer_ctxt,
563
    range_prms_t *ps_mv_limit,
564
    wgt_pred_ctxt_t *ps_wt_inp_prms,
565
    S16 *pi2_sads_4x4)
566
66.9k
{
567
66.9k
    S32 sad, i, j;
568
569
    /* Input and reference attributes */
570
66.9k
    U08 *pu1_inp, *pu1_inp_orig, *pu1_ref;
571
66.9k
    S32 i4_inp_stride, i4_ref_stride, i4_ref_offset;
572
573
    /* The reference is actually an array of ptrs since there are several    */
574
    /* reference id. So an array gets passed form calling function           */
575
66.9k
    U08 **ppu1_ref, *pu1_ref_coloc;
576
577
66.9k
    S32 stepy, stepx, step_shift_x, step_shift_y;
578
66.9k
    S32 mvx, mvy, mv_x_offset, mv_y_offset, mv_x_range, mv_y_range;
579
580
    /* Points to the range limits for mv */
581
66.9k
    range_prms_t *ps_range_prms;
582
583
    /* Reference index to be searched */
584
66.9k
    S32 i4_search_idx = ps_search_prms->i1_ref_idx;
585
    /* Using the member 0 to store for all ref. idx. */
586
66.9k
    ps_range_prms = ps_search_prms->aps_mv_range[0];
587
66.9k
    pu1_inp_orig = ps_wt_inp_prms->apu1_wt_inp[i4_search_idx];
588
66.9k
    i4_inp_stride = ps_search_prms->i4_inp_stride;
589
590
    /* Move to the location of the search blk in inp buffer */
591
66.9k
    pu1_inp_orig += ps_search_prms->i4_cu_x_off;
592
66.9k
    pu1_inp_orig += ps_search_prms->i4_cu_y_off * i4_inp_stride;
593
594
    /*************************************************************************/
595
    /* we use either input of previously encoded pictures as reference       */
596
    /* in coarse layer                                                       */
597
    /*************************************************************************/
598
66.9k
    i4_ref_stride = ps_layer_ctxt->i4_inp_stride;
599
66.9k
    ppu1_ref = ps_layer_ctxt->ppu1_list_inp;
600
601
    /* colocated position in reference picture */
602
66.9k
    i4_ref_offset = (i4_ref_stride * ps_search_prms->i4_y_off) + ps_search_prms->i4_x_off;
603
66.9k
    pu1_ref_coloc = ppu1_ref[i4_search_idx] + i4_ref_offset;
604
605
66.9k
    stepx = stepy = HME_COARSE_STEP_SIZE_HIGH_QUALITY;
606
    /*TODO: Calculate Step shift from the #define HME_COARSE_STEP_SIZE_HIGH_QUALITY */
607
66.9k
    step_shift_x = step_shift_y = 1;
608
609
66.9k
    mv_x_offset = -(ps_mv_limit->i2_min_x >> step_shift_x);
610
66.9k
    mv_y_offset = -(ps_mv_limit->i2_min_y >> step_shift_y);
611
66.9k
    mv_x_range = (-ps_mv_limit->i2_min_x + ps_mv_limit->i2_max_x) >> step_shift_x;
612
66.9k
    mv_y_range = (-ps_mv_limit->i2_min_y + ps_mv_limit->i2_max_y) >> step_shift_y;
613
614
    /* Run 2loops to sweep over the reference area */
615
1.48M
    for(mvy = ps_range_prms->i2_min_y; mvy < ps_range_prms->i2_max_y; mvy += stepy)
616
1.41M
    {
617
38.1M
        for(mvx = ps_range_prms->i2_min_x; mvx < ps_range_prms->i2_max_x; mvx += stepx)
618
36.7M
        {
619
            /* Set up the reference and inp ptr */
620
36.7M
            pu1_ref = pu1_ref_coloc + mvx + (mvy * i4_ref_stride);
621
36.7M
            pu1_inp = pu1_inp_orig;
622
            /* SAD computation */
623
36.7M
            {
624
36.7M
                sad = 0;
625
183M
                for(i = 0; i < 4; i++)
626
147M
                {
627
735M
                    for(j = 0; j < 4; j++)
628
588M
                    {
629
588M
                        sad += (ABS(((S32)pu1_inp[j] - (S32)pu1_ref[j])));
630
588M
                    }
631
147M
                    pu1_inp += i4_inp_stride;
632
147M
                    pu1_ref += i4_ref_stride;
633
147M
                }
634
36.7M
            }
635
636
36.7M
            pi2_sads_4x4
637
36.7M
                [((mvx >> step_shift_x) + mv_x_offset) +
638
36.7M
                 ((mvy >> step_shift_y) + mv_y_offset) * mv_x_range] = sad;
639
36.7M
        }
640
1.41M
    }
641
66.9k
}
642
643
void hme_store_4x4_sads_high_speed(
644
    hme_search_prms_t *ps_search_prms,
645
    layer_ctxt_t *ps_layer_ctxt,
646
    range_prms_t *ps_mv_limit,
647
    wgt_pred_ctxt_t *ps_wt_inp_prms,
648
    S16 *pi2_sads_4x4)
649
128k
{
650
128k
    S32 sad, i, j;
651
652
    /* Input and reference attributes */
653
128k
    U08 *pu1_inp, *pu1_inp_orig, *pu1_ref;
654
128k
    S32 i4_inp_stride, i4_ref_stride, i4_ref_offset;
655
656
    /* The reference is actually an array of ptrs since there are several    */
657
    /* reference id. So an array gets passed form calling function           */
658
128k
    U08 **ppu1_ref, *pu1_ref_coloc;
659
660
128k
    S32 stepy, stepx, step_shift_x, step_shift_y;
661
128k
    S32 mvx, mvy, mv_x_offset, mv_y_offset, mv_x_range, mv_y_range;
662
663
    /* Points to the range limits for mv */
664
128k
    range_prms_t *ps_range_prms;
665
666
    /* Reference index to be searched */
667
128k
    S32 i4_search_idx = ps_search_prms->i1_ref_idx;
668
669
    /* Using the member 0 for all ref. idx */
670
128k
    ps_range_prms = ps_search_prms->aps_mv_range[0];
671
128k
    pu1_inp_orig = ps_wt_inp_prms->apu1_wt_inp[i4_search_idx];
672
128k
    i4_inp_stride = ps_search_prms->i4_inp_stride;
673
674
    /* Move to the location of the search blk in inp buffer */
675
128k
    pu1_inp_orig += ps_search_prms->i4_cu_x_off;
676
128k
    pu1_inp_orig += ps_search_prms->i4_cu_y_off * i4_inp_stride;
677
678
    /*************************************************************************/
679
    /* we use either input of previously encoded pictures as reference       */
680
    /* in coarse layer                                                       */
681
    /*************************************************************************/
682
128k
    i4_ref_stride = ps_layer_ctxt->i4_inp_stride;
683
128k
    ppu1_ref = ps_layer_ctxt->ppu1_list_inp;
684
685
    /* colocated position in reference picture */
686
128k
    i4_ref_offset = (i4_ref_stride * ps_search_prms->i4_y_off) + ps_search_prms->i4_x_off;
687
128k
    pu1_ref_coloc = ppu1_ref[i4_search_idx] + i4_ref_offset;
688
689
128k
    stepx = stepy = HME_COARSE_STEP_SIZE_HIGH_SPEED;
690
    /*TODO: Calculate Step shift from the #define HME_COARSE_STEP_SIZE_HIGH_SPEED */
691
128k
    step_shift_x = step_shift_y = 2;
692
693
128k
    mv_x_offset = -(ps_mv_limit->i2_min_x >> step_shift_x);
694
128k
    mv_y_offset = -(ps_mv_limit->i2_min_y >> step_shift_y);
695
128k
    mv_x_range = (-ps_mv_limit->i2_min_x + ps_mv_limit->i2_max_x) >> step_shift_x;
696
128k
    mv_y_range = (-ps_mv_limit->i2_min_y + ps_mv_limit->i2_max_y) >> step_shift_y;
697
698
    /* Run 2loops to sweep over the reference area */
699
1.38M
    for(mvy = ps_range_prms->i2_min_y; mvy < ps_range_prms->i2_max_y; mvy += stepy)
700
1.25M
    {
701
20.0M
        for(mvx = ps_range_prms->i2_min_x; mvx < ps_range_prms->i2_max_x; mvx += stepx)
702
18.8M
        {
703
            /* Set up the reference and inp ptr */
704
18.8M
            pu1_ref = pu1_ref_coloc + mvx + (mvy * i4_ref_stride);
705
18.8M
            pu1_inp = pu1_inp_orig;
706
            /* SAD computation */
707
18.8M
            {
708
18.8M
                sad = 0;
709
94.1M
                for(i = 0; i < 4; i++)
710
75.2M
                {
711
376M
                    for(j = 0; j < 4; j++)
712
301M
                    {
713
301M
                        sad += (ABS(((S32)pu1_inp[j] - (S32)pu1_ref[j])));
714
301M
                    }
715
75.2M
                    pu1_inp += i4_inp_stride;
716
75.2M
                    pu1_ref += i4_ref_stride;
717
75.2M
                }
718
18.8M
            }
719
720
18.8M
            pi2_sads_4x4
721
18.8M
                [((mvx >> step_shift_x) + mv_x_offset) +
722
18.8M
                 ((mvy >> step_shift_y) + mv_y_offset) * mv_x_range] = sad;
723
18.8M
        }
724
1.25M
    }
725
128k
}
726
/**
727
********************************************************************************
728
*  @fn     void hme_coarsest(me_ctxt_t *ps_ctxt, coarse_prms_t *ps_coarse_prms)
729
*
730
*  @brief  Top level entry point for Coarse ME. Runs across blks and searches
731
*          at a 4x4 blk granularity by using 4x8 and 8x4 patterns.
732
*
733
*  @param[in,out]  ps_ctxt: ME Handle
734
*
735
*  @param[in]  ps_coarse_prms : Coarse layer config params
736
*
737
*  @param[in]  ps_multi_thrd_ctxt : Multi thread context
738
*
739
*  @return None
740
********************************************************************************
741
*/
742
void hme_coarsest(
743
    coarse_me_ctxt_t *ps_ctxt,
744
    coarse_prms_t *ps_coarse_prms,
745
    multi_thrd_ctxt_t *ps_multi_thrd_ctxt,
746
    WORD32 i4_ping_pong,
747
    void **ppv_dep_mngr_hme_sync)
748
2.59k
{
749
2.59k
    S16 *pi2_cur_ref_sads_4x4;
750
2.59k
    S32 ai4_sad_4x4_block_size[MAX_NUM_REF], ai4_sad_4x4_block_stride[MAX_NUM_REF];
751
2.59k
    S32 num_rows_coarse;
752
2.59k
    S32 sad_top_offset, sad_current_offset;
753
2.59k
    S32 search_node_top_offset, search_node_left_offset;
754
755
2.59k
    ME_QUALITY_PRESETS_T e_me_quality_preset =
756
2.59k
        ps_ctxt->s_init_prms.s_me_coding_tools.e_me_quality_presets;
757
758
2.59k
    search_results_t *ps_search_results;
759
2.59k
    mvbank_update_prms_t s_mv_update_prms;
760
2.59k
    BLK_SIZE_T e_search_blk_size = BLK_4x4;
761
2.59k
    hme_search_prms_t s_search_prms_4x8, s_search_prms_8x4, s_search_prms_4x4;
762
763
2.59k
    S32 global_id_8x4, global_id_4x8;
764
765
    /*************************************************************************/
766
    /* These directly point to the best search result nodes that will be     */
767
    /* updated by the search algorithm, rather than have to go through an    */
768
    /* elaborate structure                                                   */
769
    /*************************************************************************/
770
2.59k
    search_node_t *aps_best_search_node_8x4[MAX_NUM_REF];
771
2.59k
    search_node_t *aps_best_search_node_4x8[MAX_NUM_REF];
772
773
    /* These point to various spatial candts */
774
2.59k
    search_node_t *ps_candt_8x4_l, *ps_candt_8x4_t, *ps_candt_8x4_tl;
775
2.59k
    search_node_t *ps_candt_4x8_l, *ps_candt_4x8_t, *ps_candt_4x8_tl;
776
2.59k
    search_node_t *ps_candt_zeromv_8x4, *ps_candt_zeromv_4x8;
777
2.59k
    search_node_t *ps_candt_fs_8x4, *ps_candt_fs_4x8;
778
2.59k
    search_node_t as_top_neighbours[4], as_left_neighbours[3];
779
780
    /* Holds the global mv for a given ref index */
781
2.59k
    search_node_t s_candt_global[MAX_NUM_REF];
782
783
    /* All the search candidates */
784
2.59k
    search_candt_t as_search_candts_8x4[MAX_INIT_CANDTS];
785
2.59k
    search_candt_t as_search_candts_4x8[MAX_INIT_CANDTS];
786
2.59k
    search_candt_t *ps_search_candts_8x4, *ps_search_candts_4x8;
787
788
    /* Actual range per blk and the pic level boundaries */
789
2.59k
    range_prms_t s_range_prms, s_pic_limit, as_mv_limit[MAX_NUM_REF];
790
791
    /* Current and prev pic layer ctxt at the coarsest layer */
792
2.59k
    layer_ctxt_t *ps_curr_layer, *ps_prev_layer;
793
794
    /* best mv of full search */
795
2.59k
    hme_mv_t best_mv_4x8, best_mv_8x4;
796
797
    /* Book keeping at blk level */
798
2.59k
    S32 blk_x, num_blks_in_pic, num_blks_in_row, num_4x4_blks_in_row;
799
800
2.59k
    S32 blk_y;
801
802
    /* Block dimensions */
803
2.59k
    S32 blk_size_shift = 2, blk_wd = 4, blk_ht = 4;
804
805
2.59k
    S32 lambda = ps_coarse_prms->lambda;
806
807
    /* Number of references to search */
808
2.59k
    S32 i4_num_ref;
809
810
2.59k
    S32 i4_i, id, i;
811
2.59k
    S08 i1_ref_idx;
812
813
2.59k
    S32 i4_pic_wd, i4_pic_ht;
814
2.59k
    S32 i4_layer_id;
815
816
2.59k
    S32 end_of_frame;
817
818
2.59k
    pf_get_wt_inp fp_get_wt_inp;
819
820
    /* Maximum search iterations around any candidate */
821
2.59k
    S32 i4_max_iters = ps_coarse_prms->i4_max_iters;
822
823
2.59k
    ps_curr_layer = ps_ctxt->ps_curr_descr->aps_layers[ps_coarse_prms->i4_layer_id];
824
2.59k
    ps_prev_layer = hme_coarse_get_past_layer_ctxt(ps_ctxt, ps_coarse_prms->i4_layer_id);
825
826
    /* We need only one instance of search results structure */
827
2.59k
    ps_search_results = &ps_ctxt->s_search_results_8x8;
828
829
2.59k
    ps_search_candts_8x4 = &as_search_candts_8x4[0];
830
2.59k
    ps_search_candts_4x8 = &as_search_candts_4x8[0];
831
832
2.59k
    end_of_frame = 0;
833
834
2.59k
    i4_pic_wd = ps_curr_layer->i4_wd;
835
2.59k
    i4_pic_ht = ps_curr_layer->i4_ht;
836
837
2.59k
    fp_get_wt_inp = ((ihevce_me_optimised_function_list_t *)ps_ctxt->pv_me_optimised_function_list)
838
2.59k
                        ->pf_get_wt_inp_8x8;
839
840
2.59k
    num_rows_coarse = ps_ctxt->i4_num_row_bufs;
841
842
    /*************************************************************************/
843
    /* Coarse Layer always does explicit search. Number of reference frames  */
844
    /* to search is a configurable parameter supplied by the application     */
845
    /*************************************************************************/
846
2.59k
    i4_num_ref = ps_coarse_prms->i4_num_ref;
847
2.59k
    i4_layer_id = ps_coarse_prms->i4_layer_id;
848
849
    /*************************************************************************/
850
    /*  The search algorithm goes as follows:                                */
851
    /*                                                                       */
852
    /*          ___                                                          */
853
    /*         | e |                                                         */
854
    /*      ___|___|___                                                      */
855
    /*     | c | a | b |                                                     */
856
    /*     |___|___|___|                                                     */
857
    /*         | d |                                                         */
858
    /*         |___|                                                         */
859
    /*                                                                       */
860
    /* For the target block a, we collect best results from 2 8x4 blks       */
861
    /* These are c-a and a-b. The 4x8 blks are e-a and a-d                   */
862
    /* c-a result is already available from results of blk c. a-b is         */
863
    /* evaluated in this blk. Likewise e-a result is stored in a row buffer  */
864
    /* a-d is evaluated this blk                                             */
865
    /* So we store a row buffer which stores best 4x8 results of all top blk */
866
    /*************************************************************************/
867
868
    /************************************************************************/
869
    /* Initialize the pointers to the best node.                            */
870
    /************************************************************************/
871
7.30k
    for(i4_i = 0; i4_i < i4_num_ref; i4_i++)
872
4.70k
    {
873
4.70k
        aps_best_search_node_8x4[i4_i] = ps_search_results->aps_part_results[i4_i][PART_ID_2NxN_B];
874
4.70k
        aps_best_search_node_4x8[i4_i] = ps_search_results->aps_part_results[i4_i][PART_ID_Nx2N_R];
875
4.70k
    }
876
877
    /************************************************************************/
878
    /* Initialize the "searchresults" structure. This will set up the number*/
879
    /* of search types, result updates etc                                  */
880
    /************************************************************************/
881
2.59k
    {
882
2.59k
        S32 num_results_per_part;
883
        /* We evaluate 4 types of results per 4x4 blk. 8x4L and 8x4R and     */
884
        /* 4x8 T and 4x8B. So if we are to give 4 results, then we need to   */
885
        /* only evaluate 1 result per part. In the coarse layer, we are      */
886
        /* limited to 2 results max per part, and max of 8 results.          */
887
2.59k
        num_results_per_part = (ps_coarse_prms->num_results + 3) >> 2;
888
2.59k
        hme_init_search_results(
889
2.59k
            ps_search_results,
890
2.59k
            i4_num_ref,
891
2.59k
            ps_coarse_prms->num_results,
892
2.59k
            num_results_per_part,
893
2.59k
            BLK_8x8,
894
2.59k
            0,
895
2.59k
            0,
896
2.59k
            ps_ctxt->au1_is_past);
897
2.59k
    }
898
899
    /* Macro updates num_blks_in_pic and num_blks_in_row*/
900
2.59k
    GET_NUM_BLKS_IN_PIC(i4_pic_wd, i4_pic_ht, blk_size_shift, num_blks_in_row, num_blks_in_pic);
901
902
2.59k
    num_4x4_blks_in_row = num_blks_in_row + 1;
903
904
2.59k
    s_mv_update_prms.e_search_blk_size = e_search_blk_size;
905
2.59k
    s_mv_update_prms.i4_num_ref = i4_num_ref;
906
2.59k
    s_mv_update_prms.i4_shift = 0;
907
908
    /* For full search, support 2 or 4 step size */
909
2.59k
    if(ps_coarse_prms->do_full_search)
910
2.59k
    {
911
2.59k
        ASSERT((ps_coarse_prms->full_search_step == 2) || (ps_coarse_prms->full_search_step == 4));
912
2.59k
    }
913
914
7.30k
    for(i4_i = 0; i4_i < i4_num_ref; i4_i++)
915
4.70k
    {
916
4.70k
        S32 blk, delta_poc;
917
4.70k
        S32 mv_x_clip, mv_y_clip;
918
        /* Initialize only the first row */
919
32.5k
        for(blk = 0; blk < num_blks_in_row; blk++)
920
27.8k
        {
921
27.8k
            INIT_SEARCH_NODE(&ps_ctxt->aps_best_search_nodes_4x8_n_rows[i4_i][blk], i4_i);
922
27.8k
        }
923
924
4.70k
        delta_poc = ABS(ps_curr_layer->i4_poc - ps_curr_layer->ai4_ref_id_to_poc_lc[i4_i]);
925
926
        /* Setting search range for different references based on the delta poc */
927
        /*************************************************************************/
928
        /* set the MV limit per ref. pic.                                        */
929
        /*    - P pic. : Based on the config params.                             */
930
        /*    - B/b pic: Based on the Max/Min MV from prev. P and config. param. */
931
        /*************************************************************************/
932
4.70k
        {
933
            /* TO DO : Remove hard coding of P-P dist. of 4 */
934
4.70k
            mv_x_clip = (ps_curr_layer->i2_max_mv_x * delta_poc) / 4;
935
936
            /* Only for B/b pic. */
937
4.70k
            if(1 == ps_ctxt->s_frm_prms.bidir_enabled)
938
83
            {
939
83
                WORD16 i2_mv_y_per_poc;
940
941
                /* Get abs MAX for symmetric search */
942
83
                i2_mv_y_per_poc =
943
83
                    MAX(ps_ctxt->s_coarse_dyn_range_prms.i2_dyn_max_y_per_poc[i4_layer_id],
944
83
                        (ABS(ps_ctxt->s_coarse_dyn_range_prms.i2_dyn_min_y_per_poc[i4_layer_id])));
945
946
83
                mv_y_clip = i2_mv_y_per_poc * delta_poc;
947
83
            }
948
            /* Set the Config. File Params for P pic. */
949
4.61k
            else
950
4.61k
            {
951
                /* TO DO : Remove hard coding of P-P dist. of 4 */
952
4.61k
                mv_y_clip = (ps_curr_layer->i2_max_mv_y * delta_poc) / 4;
953
4.61k
            }
954
955
            /* Making mv_x and mv_y range multiple of 4 */
956
4.70k
            mv_x_clip = (((mv_x_clip + 3) >> 2) << 2);
957
4.70k
            mv_y_clip = (((mv_y_clip + 3) >> 2) << 2);
958
            /* Clipping the range of mv_x and mv_y */
959
4.70k
            mv_x_clip = CLIP3(mv_x_clip, 4, MAX_MVX_SUPPORTED_IN_COARSE_LAYER);
960
4.70k
            mv_y_clip = CLIP3(mv_y_clip, 4, MAX_MVY_SUPPORTED_IN_COARSE_LAYER);
961
962
4.70k
            as_mv_limit[i4_i].i2_min_x = -mv_x_clip;
963
4.70k
            as_mv_limit[i4_i].i2_min_y = -mv_y_clip;
964
4.70k
            as_mv_limit[i4_i].i2_max_x = mv_x_clip;
965
4.70k
            as_mv_limit[i4_i].i2_max_y = mv_y_clip;
966
4.70k
        }
967
        /*Populating SAD block size based on search range */
968
4.70k
        ai4_sad_4x4_block_size[i4_i] = ((2 * mv_x_clip) / ps_coarse_prms->full_search_step) *
969
4.70k
                                       ((2 * mv_y_clip) / ps_coarse_prms->full_search_step);
970
4.70k
        ai4_sad_4x4_block_stride[i4_i] = (num_blks_in_row + 1) * ai4_sad_4x4_block_size[i4_i];
971
4.70k
    }
972
973
314k
    for(i = 0; i < 2 * MAX_INIT_CANDTS; i++)
974
311k
    {
975
311k
        search_node_t *ps_search_node;
976
311k
        ps_search_node = &ps_ctxt->s_init_search_node[i];
977
311k
        INIT_SEARCH_NODE(ps_search_node, 0);
978
311k
    }
979
10.3k
    for(i = 0; i < 3; i++)
980
7.79k
    {
981
7.79k
        search_node_t *ps_search_node;
982
7.79k
        ps_search_node = &as_left_neighbours[i];
983
7.79k
        INIT_SEARCH_NODE(ps_search_node, 0);
984
7.79k
        ps_search_node = &as_top_neighbours[i];
985
7.79k
        INIT_SEARCH_NODE(ps_search_node, 0);
986
7.79k
    }
987
2.59k
    INIT_SEARCH_NODE(&as_top_neighbours[3], 0);
988
    /* Set up place holders to hold the search nodes of each initial candt */
989
158k
    for(i = 0; i < MAX_INIT_CANDTS; i++)
990
155k
    {
991
155k
        ps_search_candts_8x4[i].ps_search_node = &ps_ctxt->s_init_search_node[i];
992
993
155k
        ps_search_candts_4x8[i].ps_search_node = &ps_ctxt->s_init_search_node[MAX_INIT_CANDTS + i];
994
995
155k
        ps_search_candts_8x4[i].u1_num_steps_refine = (U08)i4_max_iters;
996
155k
        ps_search_candts_4x8[i].u1_num_steps_refine = (U08)i4_max_iters;
997
155k
    }
998
999
    /* For Top,TopLeft and Left cand., no need for refinement */
1000
2.59k
    id = 0;
1001
2.59k
    if((ps_coarse_prms->do_full_search) && (ME_XTREME_SPEED_25 == e_me_quality_preset))
1002
327
    {
1003
        /* This search candt has the full search result */
1004
327
        ps_candt_fs_8x4 = ps_search_candts_8x4[id].ps_search_node;
1005
327
        id++;
1006
327
    }
1007
1008
2.59k
    ps_candt_8x4_l = ps_search_candts_8x4[id].ps_search_node;
1009
2.59k
    ps_search_candts_8x4[id].u1_num_steps_refine = 0;
1010
2.59k
    id++;
1011
2.59k
    ps_candt_8x4_t = ps_search_candts_8x4[id].ps_search_node;
1012
2.59k
    ps_search_candts_8x4[id].u1_num_steps_refine = 0;
1013
2.59k
    id++;
1014
2.59k
    ps_candt_8x4_tl = ps_search_candts_8x4[id].ps_search_node;
1015
2.59k
    ps_search_candts_8x4[id].u1_num_steps_refine = 0;
1016
2.59k
    id++;
1017
    /* This search candt stores the global candt */
1018
2.59k
    global_id_8x4 = id;
1019
2.59k
    id++;
1020
1021
2.59k
    if((ps_coarse_prms->do_full_search) && (ME_XTREME_SPEED_25 != e_me_quality_preset))
1022
2.27k
    {
1023
        /* This search candt has the full search result */
1024
2.27k
        ps_candt_fs_8x4 = ps_search_candts_8x4[id].ps_search_node;
1025
2.27k
        id++;
1026
2.27k
    }
1027
    /* Don't increment id as (0,0) is removed from cand. list. Initializing */
1028
    /* the pointer for hme_init_pred_ctxt_no_encode()                       */
1029
2.59k
    ps_candt_zeromv_8x4 = ps_search_candts_8x4[id].ps_search_node;
1030
1031
    /* For Top,TopLeft and Left cand., no need for refinement */
1032
2.59k
    id = 0;
1033
2.59k
    if((ps_coarse_prms->do_full_search) && (ME_XTREME_SPEED_25 == e_me_quality_preset))
1034
327
    {
1035
        /* This search candt has the full search result */
1036
327
        ps_candt_fs_4x8 = ps_search_candts_4x8[id].ps_search_node;
1037
327
        id++;
1038
327
    }
1039
1040
2.59k
    ps_candt_4x8_l = ps_search_candts_4x8[id].ps_search_node;
1041
2.59k
    ps_search_candts_4x8[id].u1_num_steps_refine = 0;
1042
2.59k
    id++;
1043
2.59k
    ps_candt_4x8_t = ps_search_candts_4x8[id].ps_search_node;
1044
2.59k
    ps_search_candts_4x8[id].u1_num_steps_refine = 0;
1045
2.59k
    id++;
1046
2.59k
    ps_candt_4x8_tl = ps_search_candts_4x8[id].ps_search_node;
1047
2.59k
    ps_search_candts_4x8[id].u1_num_steps_refine = 0;
1048
2.59k
    id++;
1049
    /* This search candt stores the global candt */
1050
2.59k
    global_id_4x8 = id;
1051
2.59k
    id++;
1052
2.59k
    if((ps_coarse_prms->do_full_search) && (ME_XTREME_SPEED_25 != e_me_quality_preset))
1053
2.27k
    {
1054
        /* This search candt has the full search result */
1055
2.27k
        ps_candt_fs_4x8 = ps_search_candts_4x8[id].ps_search_node;
1056
2.27k
        id++;
1057
2.27k
    }
1058
    /* Don't increment id4as (0,0) is removed from cand. list. Initializing */
1059
    /* the pointer for hme_init_pred_ctxt_no_encode()                       */
1060
2.59k
    ps_candt_zeromv_4x8 = ps_search_candts_4x8[id].ps_search_node;
1061
1062
    /* Zero mv always has 0 mvx and y componnent, ref idx initialized inside */
1063
2.59k
    ps_candt_zeromv_8x4->s_mv.i2_mvx = 0;
1064
2.59k
    ps_candt_zeromv_8x4->s_mv.i2_mvy = 0;
1065
2.59k
    ps_candt_zeromv_4x8->s_mv.i2_mvx = 0;
1066
2.59k
    ps_candt_zeromv_4x8->s_mv.i2_mvy = 0;
1067
1068
    /* SET UP THE PRED CTXT FOR L0 AND L1 */
1069
2.59k
    {
1070
2.59k
        S32 pred_lx;
1071
1072
        /* Bottom left always not available */
1073
2.59k
        as_left_neighbours[2].u1_is_avail = 0;
1074
1075
7.79k
        for(pred_lx = 0; pred_lx < 2; pred_lx++)
1076
5.19k
        {
1077
5.19k
            pred_ctxt_t *ps_pred_ctxt;
1078
1079
5.19k
            ps_pred_ctxt = &ps_search_results->as_pred_ctxt[pred_lx];
1080
5.19k
            hme_init_pred_ctxt_no_encode(
1081
5.19k
                ps_pred_ctxt,
1082
5.19k
                ps_search_results,
1083
5.19k
                as_top_neighbours,
1084
5.19k
                as_left_neighbours,
1085
5.19k
                NULL,
1086
5.19k
                ps_candt_zeromv_8x4,
1087
5.19k
                ps_candt_zeromv_8x4,
1088
5.19k
                pred_lx,
1089
5.19k
                lambda,
1090
5.19k
                ps_coarse_prms->lambda_q_shift,
1091
5.19k
                ps_ctxt->apu1_ref_bits_tlu_lc,
1092
5.19k
                ps_ctxt->ai2_ref_scf);
1093
5.19k
        }
1094
2.59k
    }
1095
1096
    /*************************************************************************/
1097
    /* Initialize the search parameters for search algo with the following   */
1098
    /* parameters: No SATD, calculated number of initial candidates,         */
1099
    /* No post refinement, initial step size and number of iterations as     */
1100
    /* passed by the calling function.                                       */
1101
    /* Also, we use input for this layer search, and not recon.              */
1102
    /*************************************************************************/
1103
2.59k
    if(e_me_quality_preset == ME_XTREME_SPEED_25)
1104
327
        s_search_prms_8x4.i4_num_init_candts = 1;
1105
2.27k
    else
1106
2.27k
        s_search_prms_8x4.i4_num_init_candts = id;
1107
2.59k
    s_search_prms_8x4.i4_use_satd = 0;
1108
2.59k
    s_search_prms_8x4.i4_start_step = ps_coarse_prms->i4_start_step;
1109
2.59k
    s_search_prms_8x4.i4_num_steps_post_refine = 0;
1110
2.59k
    s_search_prms_8x4.i4_use_rec = 0;
1111
2.59k
    s_search_prms_8x4.ps_search_candts = ps_search_candts_8x4;
1112
2.59k
    s_search_prms_8x4.e_blk_size = BLK_8x4;
1113
2.59k
    s_search_prms_8x4.i4_max_iters = ps_coarse_prms->i4_max_iters;
1114
    /* Coarse layer is always explicit */
1115
2.59k
    if(ME_MEDIUM_SPEED > e_me_quality_preset)
1116
1.10k
    {
1117
1.10k
        s_search_prms_8x4.pf_mv_cost_compute = compute_mv_cost_coarse;
1118
1.10k
    }
1119
1.49k
    else
1120
1.49k
    {
1121
1.49k
        s_search_prms_8x4.pf_mv_cost_compute = compute_mv_cost_coarse_high_speed;
1122
1.49k
    }
1123
1124
2.59k
    s_search_prms_8x4.i4_inp_stride = 8;
1125
2.59k
    s_search_prms_8x4.i4_cu_x_off = s_search_prms_8x4.i4_cu_y_off = 0;
1126
2.59k
    if(ps_coarse_prms->do_full_search)
1127
2.59k
        s_search_prms_8x4.i4_max_iters = 1;
1128
2.59k
    s_search_prms_8x4.i4_part_mask = (1 << PART_ID_2NxN_B);
1129
    /* Using the member 0 to store for all ref. idx. */
1130
2.59k
    s_search_prms_8x4.aps_mv_range[0] = &s_range_prms;
1131
2.59k
    s_search_prms_8x4.ps_search_results = ps_search_results;
1132
2.59k
    s_search_prms_8x4.full_search_step = ps_coarse_prms->full_search_step;
1133
1134
2.59k
    s_search_prms_4x8 = s_search_prms_8x4;
1135
2.59k
    s_search_prms_4x8.ps_search_candts = ps_search_candts_4x8;
1136
2.59k
    s_search_prms_4x8.e_blk_size = BLK_4x8;
1137
2.59k
    s_search_prms_4x8.i4_part_mask = (1 << PART_ID_Nx2N_R);
1138
1139
2.59k
    s_search_prms_4x4 = s_search_prms_8x4;
1140
    /* Since s_search_prms_4x4 is used only to computer sad at 4x4 level, search candidate is not used */
1141
2.59k
    s_search_prms_4x4.ps_search_candts = ps_search_candts_4x8;
1142
2.59k
    s_search_prms_4x4.e_blk_size = BLK_4x4;
1143
2.59k
    s_search_prms_4x4.i4_part_mask = (1 << PART_ID_2Nx2N);
1144
    /*************************************************************************/
1145
    /* Picture limit on all 4 sides. This will be used to set mv limits for  */
1146
    /* every block given its coordinate.                                     */
1147
    /*************************************************************************/
1148
2.59k
    SET_PIC_LIMIT(
1149
2.59k
        s_pic_limit,
1150
2.59k
        ps_curr_layer->i4_pad_x_inp,
1151
2.59k
        ps_curr_layer->i4_pad_y_inp,
1152
2.59k
        ps_curr_layer->i4_wd,
1153
2.59k
        ps_curr_layer->i4_ht,
1154
2.59k
        s_search_prms_4x4.i4_num_steps_post_refine);
1155
1156
    /* Pick the global mv from previous reference */
1157
7.30k
    for(i1_ref_idx = 0; i1_ref_idx < i4_num_ref; i1_ref_idx++)
1158
4.70k
    {
1159
4.70k
        if(ME_XTREME_SPEED_25 != e_me_quality_preset)
1160
4.35k
        {
1161
            /* Distance of current pic from reference */
1162
4.35k
            S32 i4_delta_poc;
1163
1164
4.35k
            hme_mv_t s_mv;
1165
4.35k
            i4_delta_poc = ps_curr_layer->i4_poc - ps_curr_layer->ai4_ref_id_to_poc_lc[i1_ref_idx];
1166
1167
4.35k
            hme_get_global_mv(ps_prev_layer, &s_mv, i4_delta_poc);
1168
1169
4.35k
            s_candt_global[i1_ref_idx].s_mv.i2_mvx = s_mv.i2_mv_x;
1170
4.35k
            s_candt_global[i1_ref_idx].s_mv.i2_mvy = s_mv.i2_mv_y;
1171
4.35k
            s_candt_global[i1_ref_idx].i1_ref_idx = i1_ref_idx;
1172
1173
            /*********************************************************************/
1174
            /* Initialize the histogram for each reference index in current      */
1175
            /* layer ctxt                                                        */
1176
            /*********************************************************************/
1177
4.35k
            hme_init_histogram(
1178
4.35k
                ps_ctxt->aps_mv_hist[i1_ref_idx],
1179
4.35k
                (S32)as_mv_limit[i1_ref_idx].i2_max_x,
1180
4.35k
                (S32)as_mv_limit[i1_ref_idx].i2_max_y);
1181
4.35k
        }
1182
1183
        /*********************************************************************/
1184
        /* Initialize the dyn. search range params. for each reference index */
1185
        /* in current layer ctxt                                             */
1186
        /*********************************************************************/
1187
        /* Only for P pic. For P, both are 0, I&B has them mut. exclusive */
1188
4.70k
        if(ps_ctxt->s_frm_prms.is_i_pic == ps_ctxt->s_frm_prms.bidir_enabled)
1189
4.39k
        {
1190
4.39k
            INIT_DYN_SEARCH_PRMS(
1191
4.39k
                &ps_ctxt->s_coarse_dyn_range_prms.as_dyn_range_prms[i4_layer_id][i1_ref_idx],
1192
4.39k
                ps_curr_layer->ai4_ref_id_to_poc_lc[i1_ref_idx]);
1193
4.39k
        }
1194
4.70k
    }
1195
1196
    /*************************************************************************/
1197
    /* if exhaustive algorithmm then we use only 1 candt 0, 0                */
1198
    /* else we use a lot of causal and non causal candts                     */
1199
    /* finally set number to the configured number of candts                 */
1200
    /*************************************************************************/
1201
1202
    /* Loop in raster order over each 4x4 blk in a given row till end of frame */
1203
23.9k
    while(0 == end_of_frame)
1204
21.3k
    {
1205
21.3k
        job_queue_t *ps_job;
1206
21.3k
        void *pv_hme_dep_mngr;
1207
21.3k
        WORD32 offset_val, check_dep_pos, set_dep_pos;
1208
1209
        /* Get the current layer HME Dep Mngr       */
1210
        /* Note : Use layer_id - 1 in HME layers    */
1211
21.3k
        pv_hme_dep_mngr = ppv_dep_mngr_hme_sync[ps_coarse_prms->i4_layer_id - 1];
1212
1213
        /* Get the current row from the job queue */
1214
21.3k
        ps_job = (job_queue_t *)ihevce_pre_enc_grp_get_next_job(
1215
21.3k
            ps_multi_thrd_ctxt, ps_multi_thrd_ctxt->i4_me_coarsest_lyr_type, 1, i4_ping_pong);
1216
1217
        /* If all rows are done, set the end of process flag to 1, */
1218
        /* and the current row to -1 */
1219
21.3k
        if(NULL == ps_job)
1220
2.59k
        {
1221
2.59k
            blk_y = -1;
1222
2.59k
            end_of_frame = 1;
1223
2.59k
        }
1224
18.7k
        else
1225
18.7k
        {
1226
18.7k
            ASSERT(ps_multi_thrd_ctxt->i4_me_coarsest_lyr_type == ps_job->i4_pre_enc_task_type);
1227
1228
            /* Obtain the current row's details from the job */
1229
18.7k
            blk_y = ps_job->s_job_info.s_me_job_info.i4_vert_unit_row_no;
1230
1231
18.7k
            if(1 == ps_ctxt->s_frm_prms.is_i_pic)
1232
5.28k
            {
1233
                /* set the output dependency of current row */
1234
5.28k
                ihevce_pre_enc_grp_job_set_out_dep(ps_multi_thrd_ctxt, ps_job, i4_ping_pong);
1235
5.28k
                continue;
1236
5.28k
            }
1237
1238
            /* Set Variables for Dep. Checking and Setting */
1239
13.4k
            set_dep_pos = blk_y + 1;
1240
13.4k
            if(blk_y > 0)
1241
11.4k
            {
1242
11.4k
                offset_val = 2;
1243
11.4k
                check_dep_pos = blk_y - 1;
1244
11.4k
            }
1245
2.03k
            else
1246
2.03k
            {
1247
                /* First row should run without waiting */
1248
2.03k
                offset_val = -1;
1249
2.03k
                check_dep_pos = 0;
1250
2.03k
            }
1251
1252
            /* Loop over all the blocks in current row */
1253
            /* One block extra, since the last block in a row needs East block */
1254
109k
            for(blk_x = 0; blk_x < (num_blks_in_row + 1); blk_x++)
1255
96.3k
            {
1256
                /* Wait till top row block is processed   */
1257
                /* Currently checking till top right block*/
1258
96.3k
                if(blk_x < (num_blks_in_row))
1259
82.8k
                {
1260
82.8k
                    ihevce_dmgr_chk_row_row_sync(
1261
82.8k
                        pv_hme_dep_mngr,
1262
82.8k
                        blk_x,
1263
82.8k
                        offset_val,
1264
82.8k
                        check_dep_pos,
1265
82.8k
                        0, /* Col Tile No. : Not supported in PreEnc*/
1266
82.8k
                        ps_ctxt->thrd_id);
1267
82.8k
                }
1268
1269
                /***************************************************************/
1270
                /* Get Weighted input for all references                       */
1271
                /***************************************************************/
1272
96.3k
                fp_get_wt_inp(
1273
96.3k
                    ps_curr_layer,
1274
96.3k
                    &ps_ctxt->s_wt_pred,
1275
96.3k
                    1 << (blk_size_shift + 1),
1276
96.3k
                    blk_x << blk_size_shift,
1277
96.3k
                    (blk_y - 1) << blk_size_shift,
1278
96.3k
                    1 << (blk_size_shift + 1),
1279
96.3k
                    i4_num_ref,
1280
96.3k
                    ps_ctxt->i4_wt_pred_enable_flag);
1281
1282
                /* RESET ALL SEARCH RESULTS FOR THE NEW BLK */
1283
96.3k
                hme_reset_search_results(
1284
96.3k
                    ps_search_results,
1285
96.3k
                    s_search_prms_8x4.i4_part_mask | s_search_prms_4x8.i4_part_mask,
1286
96.3k
                    MV_RES_FPEL);
1287
1288
                /* Compute the search node offsets */
1289
                /* MAX is used to clip when left and top neighbours are not availbale at coarse boundaries  */
1290
96.3k
                search_node_top_offset =
1291
96.3k
                    blk_x + ps_ctxt->ai4_row_index[MAX((blk_y - 2), 0)] * num_blks_in_row;
1292
96.3k
                search_node_left_offset =
1293
96.3k
                    MAX((blk_x - 1), 0) +
1294
96.3k
                    ps_ctxt->ai4_row_index[MAX((blk_y - 1), 0)] * num_blks_in_row;
1295
1296
                /* Input offset: wrt CU start. Offset for South block */
1297
96.3k
                s_search_prms_4x4.i4_cu_x_off = 0;
1298
96.3k
                s_search_prms_4x4.i4_cu_y_off = 4;
1299
96.3k
                s_search_prms_4x4.i4_inp_stride = 8;
1300
96.3k
                s_search_prms_4x4.i4_x_off = blk_x << blk_size_shift;
1301
96.3k
                s_search_prms_4x4.i4_y_off = blk_y << blk_size_shift;
1302
1303
96.3k
                s_search_prms_4x8.i4_x_off = s_search_prms_8x4.i4_x_off = blk_x << blk_size_shift;
1304
96.3k
                s_search_prms_4x8.i4_y_off = s_search_prms_8x4.i4_y_off = (blk_y - 1)
1305
96.3k
                                                                          << blk_size_shift;
1306
1307
                /* This layer will always use explicit ME */
1308
                /* Loop across different Ref IDx */
1309
291k
                for(i1_ref_idx = 0; i1_ref_idx < i4_num_ref; i1_ref_idx++)
1310
195k
                {
1311
195k
                    sad_top_offset = (blk_x * ai4_sad_4x4_block_size[i1_ref_idx]) +
1312
195k
                                     ps_ctxt->ai4_row_index[MAX((blk_y - 1), 0)] *
1313
195k
                                         ai4_sad_4x4_block_stride[i1_ref_idx];
1314
195k
                    sad_current_offset =
1315
195k
                        (blk_x * ai4_sad_4x4_block_size[i1_ref_idx]) +
1316
195k
                        ps_ctxt->ai4_row_index[blk_y] * ai4_sad_4x4_block_stride[i1_ref_idx];
1317
1318
                    /* Initialize search node if blk_x == 0, as it doesn't have left neighbours */
1319
195k
                    if(0 == blk_x)
1320
28.9k
                        INIT_SEARCH_NODE(
1321
195k
                            &ps_ctxt->aps_best_search_nodes_8x4_n_rows[i1_ref_idx][blk_x],
1322
195k
                            i1_ref_idx);
1323
1324
195k
                    pi2_cur_ref_sads_4x4 = ps_ctxt->api2_sads_4x4_n_rows[i1_ref_idx];
1325
1326
                    /* Initialize changing params here */
1327
195k
                    s_search_prms_8x4.i1_ref_idx = i1_ref_idx;
1328
195k
                    s_search_prms_4x8.i1_ref_idx = i1_ref_idx;
1329
195k
                    s_search_prms_4x4.i1_ref_idx = i1_ref_idx;
1330
1331
195k
                    if(num_blks_in_row == blk_x)
1332
28.9k
                    {
1333
28.9k
                        S16 *pi2_sads_4x4_current;
1334
                        /* Since the current 4x4 block will be a padded region, which may not match with any of the reference  */
1335
28.9k
                        pi2_sads_4x4_current = pi2_cur_ref_sads_4x4 + sad_current_offset;
1336
1337
28.9k
                        memset(pi2_sads_4x4_current, 0, ai4_sad_4x4_block_size[i1_ref_idx]);
1338
28.9k
                    }
1339
1340
                    /* SAD to be computed and stored for the 4x4 block in 1st row and the last block of all rows*/
1341
195k
                    if((0 == blk_y) || (num_blks_in_row == blk_x))
1342
55.8k
                    {
1343
55.8k
                        S16 *pi2_sads_4x4_current;
1344
                        /* Computer 4x4 SADs for current block */
1345
                        /* Pointer to store SADs */
1346
55.8k
                        pi2_sads_4x4_current = pi2_cur_ref_sads_4x4 + sad_current_offset;
1347
1348
55.8k
                        hme_derive_worst_case_search_range(
1349
55.8k
                            &s_range_prms,
1350
55.8k
                            &s_pic_limit,
1351
55.8k
                            &as_mv_limit[i1_ref_idx],
1352
55.8k
                            blk_x << blk_size_shift,
1353
55.8k
                            blk_y << blk_size_shift,
1354
55.8k
                            blk_wd,
1355
55.8k
                            blk_ht);
1356
1357
55.8k
                        if(ME_PRISTINE_QUALITY >= e_me_quality_preset)
1358
20.4k
                        {
1359
20.4k
                            ((ihevce_me_optimised_function_list_t *)
1360
20.4k
                                 ps_ctxt->pv_me_optimised_function_list)
1361
20.4k
                                ->pf_store_4x4_sads_high_quality(
1362
20.4k
                                    &s_search_prms_4x4,
1363
20.4k
                                    ps_curr_layer,
1364
20.4k
                                    &as_mv_limit[i1_ref_idx],
1365
20.4k
                                    &ps_ctxt->s_wt_pred,
1366
20.4k
                                    pi2_sads_4x4_current);
1367
20.4k
                        }
1368
35.4k
                        else
1369
35.4k
                        {
1370
35.4k
                            ((ihevce_me_optimised_function_list_t *)
1371
35.4k
                                 ps_ctxt->pv_me_optimised_function_list)
1372
35.4k
                                ->pf_store_4x4_sads_high_speed(
1373
35.4k
                                    &s_search_prms_4x4,
1374
35.4k
                                    ps_curr_layer,
1375
35.4k
                                    &as_mv_limit[i1_ref_idx],
1376
35.4k
                                    &ps_ctxt->s_wt_pred,
1377
35.4k
                                    pi2_sads_4x4_current);
1378
35.4k
                        }
1379
55.8k
                    }
1380
139k
                    else
1381
139k
                    {
1382
                        /* For the zero mv candt, the ref idx to be modified */
1383
139k
                        ps_candt_zeromv_8x4->i1_ref_idx = i1_ref_idx;
1384
139k
                        ps_candt_zeromv_4x8->i1_ref_idx = i1_ref_idx;
1385
1386
139k
                        if(ME_XTREME_SPEED_25 != e_me_quality_preset)
1387
128k
                        {
1388
                            /* For the global mvs alone, the search node points to a local variable */
1389
128k
                            ps_search_candts_8x4[global_id_8x4].ps_search_node =
1390
128k
                                &s_candt_global[i1_ref_idx];
1391
128k
                            ps_search_candts_4x8[global_id_4x8].ps_search_node =
1392
128k
                                &s_candt_global[i1_ref_idx];
1393
128k
                        }
1394
1395
139k
                        hme_get_spatial_candt(
1396
139k
                            ps_curr_layer,
1397
139k
                            BLK_4x4,
1398
139k
                            blk_x,
1399
139k
                            blk_y - 1,
1400
139k
                            i1_ref_idx,
1401
139k
                            as_top_neighbours,
1402
139k
                            as_left_neighbours,
1403
139k
                            0,
1404
139k
                            1,
1405
139k
                            0,
1406
139k
                            0);
1407
                        /* set up the various candts */
1408
139k
                        *ps_candt_4x8_l = as_left_neighbours[0];
1409
139k
                        *ps_candt_4x8_t = as_top_neighbours[1];
1410
139k
                        *ps_candt_4x8_tl = as_top_neighbours[0];
1411
139k
                        *ps_candt_8x4_l = *ps_candt_4x8_l;
1412
139k
                        *ps_candt_8x4_tl = *ps_candt_4x8_tl;
1413
139k
                        *ps_candt_8x4_t = *ps_candt_4x8_t;
1414
1415
139k
                        {
1416
139k
                            S32 pred_lx;
1417
139k
                            S16 *pi2_sads_4x4_current, *pi2_sads_4x4_top;
1418
139k
                            pred_ctxt_t *ps_pred_ctxt;
1419
139k
                            PF_MV_COST_FXN pf_mv_cost_compute;
1420
1421
                            /* Computer 4x4 SADs for current block */
1422
                            /* Pointer to store SADs */
1423
139k
                            pi2_sads_4x4_current = pi2_cur_ref_sads_4x4 + sad_current_offset;
1424
1425
139k
                            hme_derive_worst_case_search_range(
1426
139k
                                &s_range_prms,
1427
139k
                                &s_pic_limit,
1428
139k
                                &as_mv_limit[i1_ref_idx],
1429
139k
                                blk_x << blk_size_shift,
1430
139k
                                blk_y << blk_size_shift,
1431
139k
                                blk_wd,
1432
139k
                                blk_ht);
1433
139k
                            if(i4_pic_ht == blk_y)
1434
0
                            {
1435
0
                                memset(pi2_sads_4x4_current, 0, ai4_sad_4x4_block_size[i1_ref_idx]);
1436
0
                            }
1437
139k
                            else
1438
139k
                            {
1439
139k
                                if(ME_PRISTINE_QUALITY >= e_me_quality_preset)
1440
46.4k
                                {
1441
46.4k
                                    ((ihevce_me_optimised_function_list_t *)
1442
46.4k
                                         ps_ctxt->pv_me_optimised_function_list)
1443
46.4k
                                        ->pf_store_4x4_sads_high_quality(
1444
46.4k
                                            &s_search_prms_4x4,
1445
46.4k
                                            ps_curr_layer,
1446
46.4k
                                            &as_mv_limit[i1_ref_idx],
1447
46.4k
                                            &ps_ctxt->s_wt_pred,
1448
46.4k
                                            pi2_sads_4x4_current);
1449
46.4k
                                }
1450
92.7k
                                else
1451
92.7k
                                {
1452
92.7k
                                    ((ihevce_me_optimised_function_list_t *)
1453
92.7k
                                         ps_ctxt->pv_me_optimised_function_list)
1454
92.7k
                                        ->pf_store_4x4_sads_high_speed(
1455
92.7k
                                            &s_search_prms_4x4,
1456
92.7k
                                            ps_curr_layer,
1457
92.7k
                                            &as_mv_limit[i1_ref_idx],
1458
92.7k
                                            &ps_ctxt->s_wt_pred,
1459
92.7k
                                            pi2_sads_4x4_current);
1460
92.7k
                                }
1461
139k
                            }
1462
                            /* Set pred direction to L0 or L1 */
1463
139k
                            pred_lx = 1 - ps_search_results->pu1_is_past[i1_ref_idx];
1464
1465
                            /* Suitable context (L0 or L1) */
1466
139k
                            ps_pred_ctxt = &ps_search_results->as_pred_ctxt[pred_lx];
1467
1468
                            /* Coarse layer is always explicit */
1469
139k
                            if(ME_PRISTINE_QUALITY > e_me_quality_preset)
1470
0
                            {
1471
0
                                pf_mv_cost_compute = compute_mv_cost_coarse;
1472
0
                            }
1473
139k
                            else
1474
139k
                            {
1475
                                /* Cost function is not called in high speed case. Below one is just a dummy function */
1476
139k
                                pf_mv_cost_compute = compute_mv_cost_coarse_high_speed;
1477
139k
                            }
1478
1479
                            /*********************************************************************/
1480
                            /* Now, compute the mv for the top block                             */
1481
                            /*********************************************************************/
1482
139k
                            pi2_sads_4x4_top = pi2_cur_ref_sads_4x4 + sad_top_offset;
1483
1484
                            /*********************************************************************/
1485
                            /* For every blk in the picture, the search range needs to be derived*/
1486
                            /* Any blk can have any mv, but practical search constraints are     */
1487
                            /* imposed by the picture boundary and amt of padding.               */
1488
                            /*********************************************************************/
1489
139k
                            hme_derive_search_range(
1490
139k
                                &s_range_prms,
1491
139k
                                &s_pic_limit,
1492
139k
                                &as_mv_limit[i1_ref_idx],
1493
139k
                                blk_x << blk_size_shift,
1494
139k
                                (blk_y - 1) << blk_size_shift,
1495
139k
                                blk_wd,
1496
139k
                                blk_ht);
1497
1498
                            /* Computer the mv for the top block */
1499
139k
                            if(ME_PRISTINE_QUALITY >= e_me_quality_preset)
1500
46.4k
                            {
1501
46.4k
                                ((ihevce_me_optimised_function_list_t *)
1502
46.4k
                                     ps_ctxt->pv_me_optimised_function_list)
1503
46.4k
                                    ->pf_combine_4x4_sads_and_compute_cost_high_quality(
1504
46.4k
                                        i1_ref_idx,
1505
46.4k
                                        &s_range_prms, /* Both 4x8 and 8x4 has same search range */
1506
46.4k
                                        &as_mv_limit[i1_ref_idx],
1507
46.4k
                                        &best_mv_4x8,
1508
46.4k
                                        &best_mv_8x4,
1509
46.4k
                                        ps_pred_ctxt,
1510
46.4k
                                        pf_mv_cost_compute,
1511
46.4k
                                        pi2_sads_4x4_top, /* Current SAD block */
1512
46.4k
                                        (pi2_sads_4x4_top +
1513
46.4k
                                         ai4_sad_4x4_block_size[i1_ref_idx]), /* East SAD block */
1514
46.4k
                                        pi2_sads_4x4_current); /* South SAD block */
1515
46.4k
                            }
1516
92.7k
                            else
1517
92.7k
                            {
1518
92.7k
                                ((ihevce_me_optimised_function_list_t *)
1519
92.7k
                                     ps_ctxt->pv_me_optimised_function_list)
1520
92.7k
                                    ->pf_combine_4x4_sads_and_compute_cost_high_speed(
1521
92.7k
                                        i1_ref_idx,
1522
92.7k
                                        &s_range_prms, /* Both 4x8 and 8x4 has same search range */
1523
92.7k
                                        &as_mv_limit[i1_ref_idx],
1524
92.7k
                                        &best_mv_4x8,
1525
92.7k
                                        &best_mv_8x4,
1526
92.7k
                                        ps_pred_ctxt,
1527
92.7k
                                        pf_mv_cost_compute,
1528
92.7k
                                        pi2_sads_4x4_top, /* Current SAD block */
1529
92.7k
                                        (pi2_sads_4x4_top +
1530
92.7k
                                         ai4_sad_4x4_block_size[i1_ref_idx]), /* East SAD block */
1531
92.7k
                                        pi2_sads_4x4_current); /* South SAD block */
1532
92.7k
                            }
1533
1534
139k
                            ps_candt_fs_4x8->s_mv.i2_mvx = best_mv_4x8.i2_mv_x;
1535
139k
                            ps_candt_fs_4x8->s_mv.i2_mvy = best_mv_4x8.i2_mv_y;
1536
139k
                            ps_candt_fs_4x8->i1_ref_idx = i1_ref_idx;
1537
1538
139k
                            ps_candt_fs_8x4->s_mv.i2_mvx = best_mv_8x4.i2_mv_x;
1539
139k
                            ps_candt_fs_8x4->s_mv.i2_mvy = best_mv_8x4.i2_mv_y;
1540
139k
                            ps_candt_fs_8x4->i1_ref_idx = i1_ref_idx;
1541
139k
                        }
1542
1543
                        /* call the appropriate Search Algo for 4x8S. The 4x8N would  */
1544
                        /* have already been called by top block */
1545
139k
                        hme_pred_search_square_stepn(
1546
139k
                            &s_search_prms_8x4,
1547
139k
                            ps_curr_layer,
1548
139k
                            &ps_ctxt->s_wt_pred,
1549
139k
                            e_me_quality_preset,
1550
139k
                            (ihevce_me_optimised_function_list_t *)
1551
139k
                                ps_ctxt->pv_me_optimised_function_list
1552
1553
139k
                        );
1554
1555
                        /* Call the appropriate search algo for 8x4E */
1556
139k
                        hme_pred_search_square_stepn(
1557
139k
                            &s_search_prms_4x8,
1558
139k
                            ps_curr_layer,
1559
139k
                            &ps_ctxt->s_wt_pred,
1560
139k
                            e_me_quality_preset,
1561
139k
                            (ihevce_me_optimised_function_list_t *)
1562
139k
                                ps_ctxt->pv_me_optimised_function_list);
1563
1564
139k
                        if(ME_XTREME_SPEED_25 != e_me_quality_preset)
1565
128k
                        {
1566
                            /* Histogram updates across different Ref ID for global MV */
1567
128k
                            hme_update_histogram(
1568
128k
                                ps_ctxt->aps_mv_hist[i1_ref_idx],
1569
128k
                                aps_best_search_node_8x4[i1_ref_idx]->s_mv.i2_mvx,
1570
128k
                                aps_best_search_node_8x4[i1_ref_idx]->s_mv.i2_mvy);
1571
128k
                            hme_update_histogram(
1572
128k
                                ps_ctxt->aps_mv_hist[i1_ref_idx],
1573
128k
                                aps_best_search_node_4x8[i1_ref_idx]->s_mv.i2_mvx,
1574
128k
                                aps_best_search_node_4x8[i1_ref_idx]->s_mv.i2_mvy);
1575
128k
                        }
1576
1577
                        /* update the best results to the mv bank */
1578
139k
                        hme_update_mv_bank_coarse(
1579
139k
                            ps_search_results,
1580
139k
                            ps_curr_layer->ps_layer_mvbank,
1581
139k
                            blk_x,
1582
139k
                            (blk_y - 1),
1583
139k
                            ps_ctxt->aps_best_search_nodes_4x8_n_rows[i1_ref_idx] +
1584
139k
                                search_node_top_offset, /* Top Candidate */
1585
139k
                            ps_ctxt->aps_best_search_nodes_8x4_n_rows[i1_ref_idx] +
1586
139k
                                search_node_left_offset, /* Left candidate */
1587
139k
                            i1_ref_idx,
1588
139k
                            &s_mv_update_prms);
1589
1590
                        /* Copy the best search result to 5 row array for future use */
1591
139k
                        *(ps_ctxt->aps_best_search_nodes_4x8_n_rows[i1_ref_idx] + blk_x +
1592
139k
                          ps_ctxt->ai4_row_index[blk_y - 1] * num_blks_in_row) =
1593
139k
                            *(aps_best_search_node_4x8[i1_ref_idx]);
1594
1595
139k
                        *(ps_ctxt->aps_best_search_nodes_8x4_n_rows[i1_ref_idx] + blk_x +
1596
139k
                          ps_ctxt->ai4_row_index[blk_y - 1] * num_blks_in_row) =
1597
139k
                            *(aps_best_search_node_8x4[i1_ref_idx]);
1598
1599
                        /* UPDATE the MIN and MAX MVs for Dynamical Search Range for each ref. pic. */
1600
                        /* Only for P pic. For P, both are 0, I&B has them mut. exclusive */
1601
139k
                        if(ps_ctxt->s_frm_prms.is_i_pic == ps_ctxt->s_frm_prms.bidir_enabled)
1602
130k
                        {
1603
130k
                            WORD32 num_mvs, i, j;
1604
130k
                            search_node_t *aps_search_nodes[4];
1605
                            /* Best results for 8x4R and 4x8B blocks */
1606
130k
                            search_node_t *ps_search_node_8x4_r, *ps_search_node_4x8_b;
1607
1608
130k
                            num_mvs = ps_curr_layer->ps_layer_mvbank->i4_num_mvs_per_ref;
1609
1610
                            /*************************************************************************/
1611
                            /* We have atleast 4 distinct results: the 4x8 top (coming from top blk) */
1612
                            /* 8x4 left (coming from left blk), 8x4 and 4x8 right and bot resp.      */
1613
                            /* If number of results to be stored is 4, then we store all these 4     */
1614
                            /* results, else we pick best ones                                       */
1615
                            /*************************************************************************/
1616
130k
                            ps_search_node_8x4_r =
1617
130k
                                ps_search_results->aps_part_results[i1_ref_idx][PART_ID_2NxN_B];
1618
130k
                            ps_search_node_4x8_b =
1619
130k
                                ps_search_results->aps_part_results[i1_ref_idx][PART_ID_Nx2N_R];
1620
1621
130k
                            ASSERT(num_mvs <= 4);
1622
1623
                            /* Doing this to sort best results */
1624
130k
                            aps_search_nodes[0] = ps_search_node_8x4_r;
1625
130k
                            aps_search_nodes[1] = ps_search_node_4x8_b;
1626
130k
                            aps_search_nodes[2] =
1627
130k
                                ps_ctxt->aps_best_search_nodes_8x4_n_rows[i1_ref_idx] +
1628
130k
                                search_node_left_offset; /* Left candidate */
1629
130k
                            aps_search_nodes[3] =
1630
130k
                                ps_ctxt->aps_best_search_nodes_4x8_n_rows[i1_ref_idx] +
1631
130k
                                search_node_top_offset; /* Top Candidate */
1632
1633
                            /* Note : Need to be resolved!!! */
1634
                            /* Added this to match with "hme_update_mv_bank_coarse" */
1635
130k
                            if(num_mvs != 4)
1636
0
                            {
1637
                                /* Run through the results, store them in best to worst order */
1638
0
                                for(i = 0; i < num_mvs; i++)
1639
0
                                {
1640
0
                                    for(j = i + 1; j < 4; j++)
1641
0
                                    {
1642
0
                                        if(aps_search_nodes[j]->i4_tot_cost <
1643
0
                                           aps_search_nodes[i]->i4_tot_cost)
1644
0
                                        {
1645
0
                                            SWAP_HME(
1646
0
                                                aps_search_nodes[j],
1647
0
                                                aps_search_nodes[i],
1648
0
                                                search_node_t *);
1649
0
                                        }
1650
0
                                    }
1651
0
                                }
1652
0
                            }
1653
1654
                            /* UPDATE the MIN and MAX MVs for Dynamical Search Range for each ref. pic. */
1655
652k
                            for(i = 0; i < num_mvs; i++)
1656
522k
                            {
1657
522k
                                hme_update_dynamic_search_params(
1658
522k
                                    &ps_ctxt->s_coarse_dyn_range_prms
1659
522k
                                         .as_dyn_range_prms[i4_layer_id][i1_ref_idx],
1660
522k
                                    aps_search_nodes[i]->s_mv.i2_mvy);
1661
522k
                            }
1662
130k
                        }
1663
139k
                    }
1664
195k
                }
1665
1666
                /* Update the number of blocks processed in the current row */
1667
96.3k
                ihevce_dmgr_set_row_row_sync(
1668
96.3k
                    pv_hme_dep_mngr,
1669
96.3k
                    (blk_x + 1),
1670
96.3k
                    blk_y,
1671
96.3k
                    0 /* Col Tile No. : Not supported in PreEnc*/);
1672
96.3k
            }
1673
1674
            /* set the output dependency after completion of row */
1675
13.4k
            ihevce_pre_enc_grp_job_set_out_dep(ps_multi_thrd_ctxt, ps_job, i4_ping_pong);
1676
13.4k
        }
1677
21.3k
    }
1678
1679
2.59k
    return;
1680
2.59k
}