Coverage Report

Created: 2025-07-09 06:41

/src/libavc/encoder/svc/isvce_me.c
Line
Count
Source (jump to first uncovered line)
1
/******************************************************************************
2
 *
3
 * Copyright (C) 2022 The Android Open Source Project
4
 *
5
 * Licensed under the Apache License, Version 2.0 (the "License");
6
 * you may not use this file except in compliance with the License.
7
 * You may obtain a copy of the License at:
8
 *
9
 * http://www.apache.org/licenses/LICENSE-2.0
10
 *
11
 * Unless required by applicable law or agreed to in writing, software
12
 * distributed under the License is distributed on an "AS IS" BASIS,
13
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
 * See the License for the specific language governing permissions and
15
 * limitations under the License.
16
 *
17
 *****************************************************************************
18
 * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
19
 */
20
21
/**
22
 *******************************************************************************
23
 * @file
24
 *  isvce_me.c
25
 *
26
 * @brief
27
 *  Contains definition of functions for motion estimation
28
 *
29
 * @author
30
 *  ittiam
31
 *
32
 * @par List of Functions:
33
 *  - isvce_init_mv_bits()
34
 *  - isvce_skip_analysis_chroma()
35
 *  - isvce_skip_analysis_luma()
36
 *  - isvce_analyse_skip()
37
 *  - isvce_get_search_candidates()
38
 *  - isvce_find_skip_motion_vector()
39
 *  - isvce_get_mv_predictor()
40
 *  - isvce_mv_pred()
41
 *  - isvce_mv_pred_me()
42
 *  - isvce_init_me()
43
 *  - isvce_compute_me()
44
 *  - isvce_compute_me_nmb()
45
 *
46
 * @remarks
47
 *  None
48
 *
49
 *******************************************************************************
50
 */
51
52
/*****************************************************************************/
53
/* File Includes                                                             */
54
/*****************************************************************************/
55
56
/* System include files */
57
#include <stdio.h>
58
#include <assert.h>
59
#include <limits.h>
60
#include <stdbool.h>
61
62
/* User include files */
63
#include "ih264_typedefs.h"
64
#include "ih264_macros.h"
65
#include "isvc_macros.h"
66
#include "ih264_platform_macros.h"
67
#include "iv2.h"
68
#include "ive2.h"
69
#include "ithread.h"
70
#include "ih264_platform_macros.h"
71
#include "isvc_defs.h"
72
#include "ime_defs.h"
73
#include "ime_distortion_metrics.h"
74
#include "ime_structs.h"
75
#include "isvc_structs.h"
76
#include "isvc_trans_quant_itrans_iquant.h"
77
#include "isvc_inter_pred_filters.h"
78
#include "isvc_mem_fns.h"
79
#include "ih264_padding.h"
80
#include "ih264_intra_pred_filters.h"
81
#include "ih264_deblk_edge_filters.h"
82
#include "isvc_cabac_tables.h"
83
#include "isvce_defs.h"
84
#include "ih264e_error.h"
85
#include "ih264e_bitstream.h"
86
#include "irc_cntrl_param.h"
87
#include "irc_frame_info_collector.h"
88
#include "isvce_rate_control.h"
89
#include "isvce_cabac_structs.h"
90
#include "isvce_structs.h"
91
#include "isvce_globals.h"
92
#include "isvce_me.h"
93
#include "ime.h"
94
#include "ih264_debug.h"
95
#include "ih264e_intra_modes_eval.h"
96
#include "isvce_core_coding.h"
97
#include "isvce_mc.h"
98
#include "ih264e_debug.h"
99
#include "ih264e_half_pel.h"
100
#include "ime_statistics.h"
101
#include "ih264e_platform_macros.h"
102
#include "isvce_defs.h"
103
#include "isvce_structs.h"
104
#include "isvce_ilp_mv_utils.h"
105
#include "isvce_utils.h"
106
107
/*****************************************************************************/
108
/* Function Definitions                                                      */
109
/*****************************************************************************/
110
111
/**
112
*******************************************************************************
113
*
114
* @brief Diamond Search
115
*
116
* @par Description:
117
*  This function computes the sad at vertices of several layers of diamond grid
118
*  at a time. The number of layers of diamond grid that would be evaluated is
119
*  configurable.The function computes the sad at vertices of a diamond grid. If
120
*  the sad at the center of the diamond grid is lesser than the sad at any other
121
*  point of the diamond grid, the function marks the candidate Mb partition as
122
*  mv.
123
*
124
* @param[in] ps_mb_part
125
*  pointer to current mb partition ctxt with respect to ME
126
*
127
* @param[in] ps_me_ctxt
128
*  pointer to me context
129
*
130
* @param[in] u4_lambda_motion
131
*  lambda motion
132
*
133
* @param[in] u4_enable_fast_sad
134
*  enable/disable fast sad computation
135
*
136
* @returns  mv pair & corresponding distortion and cost
137
*
138
* @remarks Diamond Srch, radius is 1
139
*
140
*******************************************************************************
141
*/
142
static void isvce_diamond_search_16x16(isvce_me_ctxt_t *ps_me_ctxt, WORD32 i4_reflist)
143
617k
{
144
    /* MB partition info */
145
617k
    mb_part_ctxt *ps_mb_part = &ps_me_ctxt->as_mb_part[i4_reflist];
146
147
    /* lagrange parameter */
148
617k
    UWORD32 u4_lambda_motion = ps_me_ctxt->u4_lambda_motion;
149
150
    /* srch range*/
151
617k
    WORD32 i4_srch_range_n = ps_me_ctxt->i4_srch_range_n;
152
617k
    WORD32 i4_srch_range_s = ps_me_ctxt->i4_srch_range_s;
153
617k
    WORD32 i4_srch_range_e = ps_me_ctxt->i4_srch_range_e;
154
617k
    WORD32 i4_srch_range_w = ps_me_ctxt->i4_srch_range_w;
155
156
    /* pointer to src macro block */
157
617k
    UWORD8 *pu1_curr_mb = ps_me_ctxt->pu1_src_buf_luma;
158
617k
    UWORD8 *pu1_ref_mb = ps_me_ctxt->apu1_ref_buf_luma[i4_reflist];
159
160
    /* strides */
161
617k
    WORD32 i4_src_strd = ps_me_ctxt->i4_src_strd;
162
617k
    WORD32 i4_ref_strd = ps_me_ctxt->ai4_rec_strd[i4_reflist];
163
164
    /* least cost */
165
617k
    WORD32 i4_cost_least = ps_mb_part->i4_mb_cost;
166
167
    /* least sad */
168
617k
    WORD32 i4_distortion_least = ps_mb_part->i4_mb_distortion;
169
170
    /* mv pair */
171
617k
    WORD16 i2_mvx, i2_mvy;
172
173
    /* mv bits */
174
617k
    UWORD8 *pu1_mv_bits = ps_me_ctxt->pu1_mv_bits;
175
176
    /* temp var */
177
617k
    WORD32 i4_cost[4];
178
617k
    WORD32 i4_sad[4];
179
617k
    UWORD8 *pu1_ref;
180
617k
    WORD16 i2_mv_u_x, i2_mv_u_y;
181
182
    /* Diamond search Iteration Max Cnt */
183
617k
    WORD64 i8_num_layers = ps_me_ctxt->u4_num_layers;
184
185
    /* mv with best sad during initial evaluation */
186
617k
    i2_mvx = ps_mb_part->s_mv_curr.i2_mvx;
187
617k
    i2_mvy = ps_mb_part->s_mv_curr.i2_mvy;
188
189
617k
    i2_mv_u_x = i2_mvx;
190
617k
    i2_mv_u_y = i2_mvy;
191
192
1.93M
    while(i8_num_layers--)
193
1.89M
    {
194
        /* FIXME : is this the write way to check for out of bounds ? */
195
1.89M
        if((i2_mvx - 1 < i4_srch_range_w) || (i2_mvx + 1 > i4_srch_range_e) ||
196
1.89M
           (i2_mvy - 1 < i4_srch_range_n) || (i2_mvy + 1 > i4_srch_range_s))
197
116k
        {
198
116k
            break;
199
116k
        }
200
201
1.78M
        pu1_ref = pu1_ref_mb + i2_mvx + (i2_mvy * i4_ref_strd);
202
203
1.78M
        ps_me_ctxt->pf_ime_compute_sad4_diamond(pu1_ref, pu1_curr_mb, i4_ref_strd, i4_src_strd,
204
1.78M
                                                i4_sad);
205
206
1.78M
        DEBUG_SAD_HISTOGRAM_ADD(i4_sad[0], 2);
207
1.78M
        DEBUG_SAD_HISTOGRAM_ADD(i4_sad[1], 2);
208
1.78M
        DEBUG_SAD_HISTOGRAM_ADD(i4_sad[2], 2);
209
1.78M
        DEBUG_SAD_HISTOGRAM_ADD(i4_sad[3], 2);
210
211
        /* compute cost */
212
1.78M
        i4_cost[0] =
213
1.78M
            i4_sad[0] +
214
1.78M
            u4_lambda_motion * (pu1_mv_bits[((i2_mvx - 1) << 2) - ps_mb_part->s_mv_pred.i2_mvx] +
215
1.78M
                                pu1_mv_bits[(i2_mvy << 2) - ps_mb_part->s_mv_pred.i2_mvy]);
216
1.78M
        i4_cost[1] =
217
1.78M
            i4_sad[1] +
218
1.78M
            u4_lambda_motion * (pu1_mv_bits[((i2_mvx + 1) << 2) - ps_mb_part->s_mv_pred.i2_mvx] +
219
1.78M
                                pu1_mv_bits[(i2_mvy << 2) - ps_mb_part->s_mv_pred.i2_mvy]);
220
1.78M
        i4_cost[2] =
221
1.78M
            i4_sad[2] +
222
1.78M
            u4_lambda_motion * (pu1_mv_bits[(i2_mvx << 2) - ps_mb_part->s_mv_pred.i2_mvx] +
223
1.78M
                                pu1_mv_bits[((i2_mvy - 1) << 2) - ps_mb_part->s_mv_pred.i2_mvy]);
224
1.78M
        i4_cost[3] =
225
1.78M
            i4_sad[3] +
226
1.78M
            u4_lambda_motion * (pu1_mv_bits[(i2_mvx << 2) - ps_mb_part->s_mv_pred.i2_mvx] +
227
1.78M
                                pu1_mv_bits[((i2_mvy + 1) << 2) - ps_mb_part->s_mv_pred.i2_mvy]);
228
229
1.78M
        if(i4_cost_least > i4_cost[0])
230
460k
        {
231
460k
            i4_cost_least = i4_cost[0];
232
460k
            i4_distortion_least = i4_sad[0];
233
234
460k
            i2_mv_u_x = (i2_mvx - 1);
235
460k
            i2_mv_u_y = i2_mvy;
236
460k
        }
237
238
1.78M
        if(i4_cost_least > i4_cost[1])
239
407k
        {
240
407k
            i4_cost_least = i4_cost[1];
241
407k
            i4_distortion_least = i4_sad[1];
242
243
407k
            i2_mv_u_x = (i2_mvx + 1);
244
407k
            i2_mv_u_y = i2_mvy;
245
407k
        }
246
247
1.78M
        if(i4_cost_least > i4_cost[2])
248
372k
        {
249
372k
            i4_cost_least = i4_cost[2];
250
372k
            i4_distortion_least = i4_sad[2];
251
252
372k
            i2_mv_u_x = i2_mvx;
253
372k
            i2_mv_u_y = i2_mvy - 1;
254
372k
        }
255
256
1.78M
        if(i4_cost_least > i4_cost[3])
257
342k
        {
258
342k
            i4_cost_least = i4_cost[3];
259
342k
            i4_distortion_least = i4_sad[3];
260
261
342k
            i2_mv_u_x = i2_mvx;
262
342k
            i2_mv_u_y = i2_mvy + 1;
263
342k
        }
264
265
1.78M
        if((i2_mv_u_x == i2_mvx) && (i2_mv_u_y == i2_mvy))
266
467k
        {
267
467k
            ps_mb_part->u4_exit = 1;
268
467k
            break;
269
467k
        }
270
1.31M
        else
271
1.31M
        {
272
1.31M
            i2_mvx = i2_mv_u_x;
273
1.31M
            i2_mvy = i2_mv_u_y;
274
1.31M
        }
275
1.78M
    }
276
277
617k
    if(i4_cost_least < ps_mb_part->i4_mb_cost)
278
263k
    {
279
263k
        ps_mb_part->i4_mb_cost = i4_cost_least;
280
263k
        ps_mb_part->i4_mb_distortion = i4_distortion_least;
281
263k
        ps_mb_part->s_mv_curr.i2_mvx = i2_mvx;
282
263k
        ps_mb_part->s_mv_curr.i2_mvy = i2_mvy;
283
263k
    }
284
617k
}
285
286
/**
287
*******************************************************************************
288
*
289
* @brief This function computes the best motion vector among the tentative mv
290
* candidates chosen.
291
*
292
* @par Description:
293
*  This function determines the position in the search window at which the
294
*motion estimation should begin in order to minimise the number of search
295
*iterations.
296
*
297
* @param[in] ps_mb_part
298
*  pointer to current mb partition ctxt with respect to ME
299
*
300
* @param[in] u4_lambda_motion
301
*  lambda motion
302
*
303
* @param[in] u4_fast_flag
304
*  enable/disable fast sad computation
305
*
306
* @returns  mv pair & corresponding distortion and cost
307
*
308
* @remarks none
309
*
310
*******************************************************************************
311
*/
312
313
static void isvce_evaluate_init_srchposn_16x16(isvce_me_ctxt_t *ps_me_ctxt, WORD32 i4_reflist)
314
619k
{
315
619k
    UWORD32 u4_lambda_motion = ps_me_ctxt->u4_lambda_motion;
316
317
    /* candidate mv cnt */
318
619k
    UWORD32 u4_num_candidates = ps_me_ctxt->u4_num_candidates[i4_reflist];
319
320
    /* list of candidate mvs */
321
619k
    ime_mv_t *ps_mv_list = ps_me_ctxt->as_mv_init_search[i4_reflist];
322
323
    /* pointer to src macro block */
324
619k
    UWORD8 *pu1_curr_mb = ps_me_ctxt->pu1_src_buf_luma;
325
619k
    UWORD8 *pu1_ref_mb = ps_me_ctxt->apu1_ref_buf_luma[i4_reflist];
326
327
    /* strides */
328
619k
    WORD32 i4_src_strd = ps_me_ctxt->i4_src_strd;
329
619k
    WORD32 i4_ref_strd = ps_me_ctxt->ai4_rec_strd[i4_reflist];
330
331
    /* enabled fast sad computation */
332
619k
    UWORD32 u4_enable_fast_sad = ps_me_ctxt->u4_enable_fast_sad;
333
334
    /* SAD(distortion metric) of an 8x8 block */
335
619k
    WORD32 i4_mb_distortion;
336
337
    /* cost = distortion + u4_lambda_motion * rate */
338
619k
    WORD32 i4_mb_cost, i4_mb_cost_least = INT_MAX, i4_distortion_least = INT_MAX;
339
340
    /* mb partitions info */
341
619k
    mb_part_ctxt *ps_mb_part = &(ps_me_ctxt->as_mb_part[i4_reflist]);
342
343
    /* mv bits */
344
619k
    UWORD8 *pu1_mv_bits = ps_me_ctxt->pu1_mv_bits;
345
346
    /* temp var */
347
619k
    UWORD32 i, j;
348
619k
    WORD32 i4_srch_pos_idx = 0;
349
619k
    UWORD8 *pu1_ref = NULL;
350
351
    /* Carry out a search using each of the motion vector pairs identified above
352
     * as predictors. */
353
    /* TODO : Just like Skip, Do we need to add any bias to zero mv as well */
354
2.12M
    for(i = 0; i < u4_num_candidates; i++)
355
1.50M
    {
356
        /* compute sad */
357
1.50M
        WORD32 c_sad = 1;
358
359
3.19M
        for(j = 0; j < i; j++)
360
1.69M
        {
361
1.69M
            if((ps_mv_list[i].i2_mvx == ps_mv_list[j].i2_mvx) &&
362
1.69M
               (ps_mv_list[i].i2_mvy == ps_mv_list[j].i2_mvy))
363
0
            {
364
0
                c_sad = 0;
365
0
                break;
366
0
            }
367
1.69M
        }
368
1.50M
        if(c_sad)
369
1.50M
        {
370
            /* adjust ref pointer */
371
1.50M
            pu1_ref = pu1_ref_mb + ps_mv_list[i].i2_mvx + (ps_mv_list[i].i2_mvy * i4_ref_strd);
372
373
            /* compute distortion */
374
1.50M
            ps_me_ctxt->pf_ime_compute_sad_16x16[u4_enable_fast_sad](
375
1.50M
                pu1_curr_mb, pu1_ref, i4_src_strd, i4_ref_strd, i4_mb_cost_least,
376
1.50M
                &i4_mb_distortion);
377
378
1.50M
            DEBUG_SAD_HISTOGRAM_ADD(i4_mb_distortion, 3);
379
            /* compute cost */
380
1.50M
            i4_mb_cost =
381
1.50M
                i4_mb_distortion +
382
1.50M
                u4_lambda_motion *
383
1.50M
                    (pu1_mv_bits[(ps_mv_list[i].i2_mvx << 2) - ps_mb_part->s_mv_pred.i2_mvx] +
384
1.50M
                     pu1_mv_bits[(ps_mv_list[i].i2_mvy << 2) - ps_mb_part->s_mv_pred.i2_mvy]);
385
386
1.50M
            if(i4_mb_cost < i4_mb_cost_least)
387
853k
            {
388
853k
                i4_mb_cost_least = i4_mb_cost;
389
390
853k
                i4_distortion_least = i4_mb_distortion;
391
392
853k
                i4_srch_pos_idx = i;
393
853k
            }
394
1.50M
        }
395
1.50M
    }
396
397
619k
    if(i4_mb_cost_least < ps_mb_part->i4_mb_cost)
398
617k
    {
399
617k
        ps_mb_part->i4_srch_pos_idx = i4_srch_pos_idx;
400
617k
        ps_mb_part->i4_mb_cost = i4_mb_cost_least;
401
617k
        ps_mb_part->i4_mb_distortion = i4_distortion_least;
402
617k
        ps_mb_part->s_mv_curr.i2_mvx = ps_mv_list[i4_srch_pos_idx].i2_mvx;
403
617k
        ps_mb_part->s_mv_curr.i2_mvy = ps_mv_list[i4_srch_pos_idx].i2_mvy;
404
617k
    }
405
619k
}
406
407
/**
408
*******************************************************************************
409
*
410
* @brief Searches for the best matching full pixel predictor within the search
411
* range
412
*
413
* @par Description:
414
*  This function begins by computing the mv predict vector for the current mb.
415
*  This is used for cost computations. Further basing on the algo. chosen, it
416
*  looks through a set of candidate vectors that best represent the mb a least
417
*  cost and returns this information.
418
*
419
* @param[in] ps_proc
420
*  pointer to current proc ctxt
421
*
422
* @param[in] ps_me_ctxt
423
*  pointer to me context
424
*
425
* @returns  mv pair & corresponding distortion and cost
426
*
427
* @remarks none
428
*
429
*******************************************************************************
430
*/
431
static void isvce_full_pel_motion_estimation_16x16(isvce_me_ctxt_t *ps_me_ctxt, WORD32 i4_ref_list)
432
617k
{
433
    /* mb part info */
434
617k
    mb_part_ctxt *ps_mb_part = &ps_me_ctxt->as_mb_part[i4_ref_list];
435
436
    /******************************************************************/
437
    /* Modify Search range about initial candidate instead of zero mv */
438
    /******************************************************************/
439
    /*
440
     * FIXME: The motion vectors in a way can become unbounded. It may so happen
441
     * that MV might exceed the limit of the profile configured.
442
     */
443
617k
    ps_me_ctxt->i4_srch_range_w =
444
617k
        MAX(ps_me_ctxt->i4_srch_range_w,
445
617k
            -ps_me_ctxt->ai2_srch_boundaries[0] + ps_mb_part->s_mv_curr.i2_mvx);
446
617k
    ps_me_ctxt->i4_srch_range_e =
447
617k
        MIN(ps_me_ctxt->i4_srch_range_e,
448
617k
            ps_me_ctxt->ai2_srch_boundaries[0] + ps_mb_part->s_mv_curr.i2_mvx);
449
617k
    ps_me_ctxt->i4_srch_range_n =
450
617k
        MAX(ps_me_ctxt->i4_srch_range_n,
451
617k
            -ps_me_ctxt->ai2_srch_boundaries[1] + ps_mb_part->s_mv_curr.i2_mvy);
452
617k
    ps_me_ctxt->i4_srch_range_s =
453
617k
        MIN(ps_me_ctxt->i4_srch_range_s,
454
617k
            ps_me_ctxt->ai2_srch_boundaries[1] + ps_mb_part->s_mv_curr.i2_mvy);
455
456
    /************************************************************/
457
    /* Traverse about best initial candidate for mv             */
458
    /************************************************************/
459
460
617k
    switch(ps_me_ctxt->u4_me_speed_preset)
461
617k
    {
462
618k
        case DMND_SRCH:
463
618k
            isvce_diamond_search_16x16(ps_me_ctxt, i4_ref_list);
464
618k
            break;
465
0
        default:
466
0
            assert(0);
467
0
            break;
468
617k
    }
469
617k
}
470
471
/**
472
*******************************************************************************
473
*
474
* @brief Searches for the best matching sub pixel predictor within the search
475
* range
476
*
477
* @par Description:
478
*  This function begins by searching across all sub pixel sample points
479
*  around the full pel motion vector. The vector with least cost is chosen as
480
*  the mv for the current mb. If the skip mode is not evaluated while analysing
481
*  the initial search candidates then analyse it here and update the mv.
482
*
483
* @param[in] ps_proc
484
*  pointer to current proc ctxt
485
*
486
* @param[in] ps_me_ctxt
487
*  pointer to me context
488
*
489
* @returns none
490
*
491
* @remarks none
492
*
493
*******************************************************************************
494
*/
495
static void isvce_sub_pel_motion_estimation_16x16(isvce_me_ctxt_t *ps_me_ctxt, WORD32 i4_reflist)
496
538k
{
497
    /* pointers to src & ref macro block */
498
538k
    UWORD8 *pu1_curr_mb = ps_me_ctxt->pu1_src_buf_luma;
499
500
    /* pointers to ref. half pel planes */
501
538k
    UWORD8 *pu1_ref_mb_half_x;
502
538k
    UWORD8 *pu1_ref_mb_half_y;
503
538k
    UWORD8 *pu1_ref_mb_half_xy;
504
505
    /* pointers to ref. half pel planes */
506
538k
    UWORD8 *pu1_ref_mb_half_x_temp;
507
538k
    UWORD8 *pu1_ref_mb_half_y_temp;
508
538k
    UWORD8 *pu1_ref_mb_half_xy_temp;
509
510
    /* strides */
511
538k
    WORD32 i4_src_strd = ps_me_ctxt->i4_src_strd;
512
513
538k
    WORD32 i4_ref_strd = ps_me_ctxt->u4_subpel_buf_strd;
514
515
    /* mb partitions info */
516
538k
    mb_part_ctxt *ps_mb_part = &ps_me_ctxt->as_mb_part[i4_reflist];
517
518
    /* SAD(distortion metric) of an mb */
519
538k
    WORD32 i4_mb_distortion;
520
538k
    WORD32 i4_distortion_least = ps_mb_part->i4_mb_distortion;
521
522
    /* cost = distortion + u4_lambda_motion * rate */
523
538k
    WORD32 i4_mb_cost;
524
538k
    WORD32 i4_mb_cost_least = ps_mb_part->i4_mb_cost;
525
526
    /*Best half pel buffer*/
527
538k
    UWORD8 *pu1_best_hpel_buf = NULL;
528
529
    /* mv bits */
530
538k
    UWORD8 *pu1_mv_bits = ps_me_ctxt->pu1_mv_bits;
531
532
    /* Motion vectors in full-pel units */
533
538k
    WORD16 mv_x, mv_y;
534
535
    /* lambda - lagrange constant */
536
538k
    UWORD32 u4_lambda_motion = ps_me_ctxt->u4_lambda_motion;
537
538
    /* Flags to check if half pel points needs to be evaluated */
539
    /**************************************/
540
    /* 1 bit for each half pel candidate  */
541
    /* bit 0 - half x = 1, half y = 0     */
542
    /* bit 1 - half x = -1, half y = 0    */
543
    /* bit 2 - half x = 0, half y = 1     */
544
    /* bit 3 - half x = 0, half y = -1    */
545
    /* bit 4 - half x = 1, half y = 1     */
546
    /* bit 5 - half x = -1, half y = 1    */
547
    /* bit 6 - half x = 1, half y = -1    */
548
    /* bit 7 - half x = -1, half y = -1   */
549
    /**************************************/
550
    /* temp var */
551
538k
    WORD16 i2_mv_u_x, i2_mv_u_y;
552
538k
    WORD32 i, j;
553
538k
    WORD32 ai4_sad[8];
554
555
538k
    WORD32 i4_srch_pos_idx = ps_mb_part->i4_srch_pos_idx;
556
557
538k
    i2_mv_u_x = ps_mb_part->s_mv_curr.i2_mvx;
558
538k
    i2_mv_u_y = ps_mb_part->s_mv_curr.i2_mvy;
559
560
    /************************************************************/
561
    /* Evaluate half pel                                        */
562
    /************************************************************/
563
538k
    mv_x = ps_mb_part->s_mv_curr.i2_mvx >> 2;
564
538k
    mv_y = ps_mb_part->s_mv_curr.i2_mvy >> 2;
565
566
    /**************************************************************/
567
    /* ps_me_ctxt->pu1_half_x points to the half pel pixel on the */
568
    /* left side of full pel                                      */
569
    /* ps_me_ctxt->pu1_half_y points to the half pel pixel on the */
570
    /* top  side of full pel                                      */
571
    /* ps_me_ctxt->pu1_half_xy points to the half pel pixel       */
572
    /* on the top left side of full pel                           */
573
    /* for the function pf_ime_sub_pel_compute_sad_16x16 the      */
574
    /* default postions are                                       */
575
    /* ps_me_ctxt->pu1_half_x = right halp_pel                    */
576
    /*  ps_me_ctxt->pu1_half_y = bottom halp_pel                  */
577
    /*  ps_me_ctxt->pu1_half_xy = bottom right halp_pel           */
578
    /* Hence corresponding adjustments made here                  */
579
    /**************************************************************/
580
581
538k
    pu1_ref_mb_half_x_temp = pu1_ref_mb_half_x = ps_me_ctxt->apu1_subpel_buffs[0] + 1;
582
538k
    pu1_ref_mb_half_y_temp = pu1_ref_mb_half_y = ps_me_ctxt->apu1_subpel_buffs[1] + 1 + i4_ref_strd;
583
538k
    pu1_ref_mb_half_xy_temp = pu1_ref_mb_half_xy =
584
538k
        ps_me_ctxt->apu1_subpel_buffs[2] + 1 + i4_ref_strd;
585
586
538k
    ps_me_ctxt->pf_ime_sub_pel_compute_sad_16x16(pu1_curr_mb, pu1_ref_mb_half_x, pu1_ref_mb_half_y,
587
538k
                                                 pu1_ref_mb_half_xy, i4_src_strd, i4_ref_strd,
588
538k
                                                 ai4_sad);
589
590
    /* Half x plane */
591
1.61M
    for(i = 0; i < 2; i++)
592
1.07M
    {
593
1.07M
        WORD32 mv_x_tmp = (mv_x << 2) + 2;
594
1.07M
        WORD32 mv_y_tmp = (mv_y << 2);
595
596
1.07M
        mv_x_tmp -= (i * 4);
597
598
1.07M
        i4_mb_distortion = ai4_sad[i];
599
600
        /* compute cost */
601
1.07M
        i4_mb_cost = i4_mb_distortion +
602
1.07M
                     u4_lambda_motion * (pu1_mv_bits[mv_x_tmp - ps_mb_part->s_mv_pred.i2_mvx] +
603
1.07M
                                         pu1_mv_bits[mv_y_tmp - ps_mb_part->s_mv_pred.i2_mvy]);
604
605
1.07M
        if(i4_mb_cost < i4_mb_cost_least)
606
186k
        {
607
186k
            i4_mb_cost_least = i4_mb_cost;
608
609
186k
            i4_distortion_least = i4_mb_distortion;
610
611
186k
            i2_mv_u_x = mv_x_tmp;
612
613
186k
            i2_mv_u_y = mv_y_tmp;
614
615
186k
            ps_me_ctxt->apu1_subpel_buffs[0] = pu1_ref_mb_half_x_temp - i;
616
186k
            pu1_best_hpel_buf = pu1_ref_mb_half_x_temp - i;
617
618
186k
            i4_srch_pos_idx = 0;
619
186k
        }
620
1.07M
    }
621
622
    /* Half y plane */
623
1.61M
    for(i = 0; i < 2; i++)
624
1.07M
    {
625
1.07M
        WORD32 mv_x_tmp = (mv_x << 2);
626
1.07M
        WORD32 mv_y_tmp = (mv_y << 2) + 2;
627
628
1.07M
        mv_y_tmp -= (i * 4);
629
630
1.07M
        i4_mb_distortion = ai4_sad[2 + i];
631
632
        /* compute cost */
633
1.07M
        i4_mb_cost = i4_mb_distortion +
634
1.07M
                     u4_lambda_motion * (pu1_mv_bits[mv_x_tmp - ps_mb_part->s_mv_pred.i2_mvx] +
635
1.07M
                                         pu1_mv_bits[mv_y_tmp - ps_mb_part->s_mv_pred.i2_mvy]);
636
637
1.07M
        if(i4_mb_cost < i4_mb_cost_least)
638
98.4k
        {
639
98.4k
            i4_mb_cost_least = i4_mb_cost;
640
641
98.4k
            i4_distortion_least = i4_mb_distortion;
642
643
98.4k
            i2_mv_u_x = mv_x_tmp;
644
645
98.4k
            i2_mv_u_y = mv_y_tmp;
646
647
98.4k
            ps_me_ctxt->apu1_subpel_buffs[1] = pu1_ref_mb_half_y_temp - i * (i4_ref_strd);
648
98.4k
            pu1_best_hpel_buf = pu1_ref_mb_half_y_temp - i * (i4_ref_strd);
649
650
98.4k
            i4_srch_pos_idx = 1;
651
98.4k
        }
652
1.07M
    }
653
654
    /* Half xy plane */
655
1.61M
    for(j = 0; j < 2; j++)
656
1.07M
    {
657
3.22M
        for(i = 0; i < 2; i++)
658
2.15M
        {
659
2.15M
            WORD32 mv_x_tmp = (mv_x << 2) + 2;
660
2.15M
            WORD32 mv_y_tmp = (mv_y << 2) + 2;
661
662
2.15M
            mv_x_tmp -= (i * 4);
663
2.15M
            mv_y_tmp -= (j * 4);
664
665
2.15M
            i4_mb_distortion = ai4_sad[4 + i + 2 * j];
666
667
            /* compute cost */
668
2.15M
            i4_mb_cost = i4_mb_distortion +
669
2.15M
                         u4_lambda_motion * (pu1_mv_bits[mv_x_tmp - ps_mb_part->s_mv_pred.i2_mvx] +
670
2.15M
                                             pu1_mv_bits[mv_y_tmp - ps_mb_part->s_mv_pred.i2_mvy]);
671
672
2.15M
            if(i4_mb_cost < i4_mb_cost_least)
673
88.1k
            {
674
88.1k
                i4_mb_cost_least = i4_mb_cost;
675
676
88.1k
                i4_distortion_least = i4_mb_distortion;
677
678
88.1k
                i2_mv_u_x = mv_x_tmp;
679
680
88.1k
                i2_mv_u_y = mv_y_tmp;
681
682
88.1k
                ps_me_ctxt->apu1_subpel_buffs[2] = pu1_ref_mb_half_xy_temp - j * (i4_ref_strd) -i;
683
88.1k
                pu1_best_hpel_buf = pu1_ref_mb_half_xy_temp - j * (i4_ref_strd) -i;
684
685
88.1k
                i4_srch_pos_idx = 2;
686
88.1k
            }
687
2.15M
        }
688
1.07M
    }
689
690
538k
    if(i4_mb_cost_least < ps_mb_part->i4_mb_cost)
691
228k
    {
692
228k
        ps_mb_part->i4_mb_cost = i4_mb_cost_least;
693
228k
        ps_mb_part->i4_mb_distortion = i4_distortion_least;
694
228k
        ps_mb_part->s_mv_curr.i2_mvx = i2_mv_u_x;
695
228k
        ps_mb_part->s_mv_curr.i2_mvy = i2_mv_u_y;
696
228k
        ps_mb_part->pu1_best_hpel_buf = pu1_best_hpel_buf;
697
228k
        ps_mb_part->i4_srch_pos_idx = i4_srch_pos_idx;
698
228k
    }
699
538k
}
700
701
/**
702
*******************************************************************************
703
*
704
* @brief This function computes cost of skip macroblocks
705
*
706
* @par Description:
707
*
708
* @param[in] ps_me_ctxt
709
*  pointer to me ctxt
710
*
711
*
712
* @returns  none
713
*
714
* @remarks
715
* NOTE: while computing the skip cost, do not enable early exit from compute
716
* sad function because, a negative bias gets added later
717
* Note tha the last ME candidate in me ctxt is taken as skip motion vector
718
*
719
*******************************************************************************
720
*/
721
static void isvce_compute_skip_cost(isvce_me_ctxt_t *ps_me_ctxt, ime_mv_t *ps_skip_mv,
722
                                    mb_part_ctxt *ps_smb_part_info, UWORD32 u4_use_stat_sad,
723
                                    WORD32 i4_reflist, WORD32 i4_is_slice_type_b)
724
882k
{
725
    /* SAD(distortion metric) of an mb */
726
882k
    WORD32 i4_mb_distortion;
727
728
    /* cost = distortion + u4_lambda_motion * rate */
729
882k
    WORD32 i4_mb_cost;
730
731
    /* temp var */
732
882k
    UWORD8 *pu1_ref = NULL;
733
734
882k
    ime_mv_t s_skip_mv;
735
736
882k
    s_skip_mv.i2_mvx = (ps_skip_mv->i2_mvx + 2) >> 2;
737
882k
    s_skip_mv.i2_mvy = (ps_skip_mv->i2_mvy + 2) >> 2;
738
739
    /* Check if the skip mv is out of bounds or subpel */
740
882k
    {
741
        /* skip mv */
742
882k
        ime_mv_t s_clip_skip_mv;
743
744
882k
        s_clip_skip_mv.i2_mvx =
745
882k
            CLIP3(ps_me_ctxt->i4_srch_range_w, ps_me_ctxt->i4_srch_range_e, s_skip_mv.i2_mvx);
746
882k
        s_clip_skip_mv.i2_mvy =
747
882k
            CLIP3(ps_me_ctxt->i4_srch_range_n, ps_me_ctxt->i4_srch_range_s, s_skip_mv.i2_mvy);
748
749
882k
        if((s_clip_skip_mv.i2_mvx != s_skip_mv.i2_mvx) ||
750
882k
           (s_clip_skip_mv.i2_mvy != s_skip_mv.i2_mvy) || (ps_skip_mv->i2_mvx & 0x3) ||
751
882k
           (ps_skip_mv->i2_mvy & 0x3))
752
63.0k
        {
753
63.0k
            return;
754
63.0k
        }
755
882k
    }
756
757
    /* adjust ref pointer */
758
819k
    pu1_ref = ps_me_ctxt->apu1_ref_buf_luma[i4_reflist] + s_skip_mv.i2_mvx +
759
819k
              (s_skip_mv.i2_mvy * ps_me_ctxt->ai4_rec_strd[i4_reflist]);
760
761
819k
    if(u4_use_stat_sad == 1)
762
820k
    {
763
820k
        UWORD32 u4_is_nonzero;
764
765
820k
        ps_me_ctxt->pf_ime_compute_sad_stat_luma_16x16(
766
820k
            ps_me_ctxt->pu1_src_buf_luma, pu1_ref, ps_me_ctxt->i4_src_strd,
767
820k
            ps_me_ctxt->ai4_rec_strd[i4_reflist], ps_me_ctxt->pu2_sad_thrsh, &i4_mb_distortion,
768
820k
            &u4_is_nonzero);
769
770
820k
        if(u4_is_nonzero == 0 || i4_mb_distortion <= ps_me_ctxt->i4_min_sad)
771
265k
        {
772
265k
            ps_me_ctxt->u4_min_sad_reached = 1; /* found min sad */
773
265k
            ps_me_ctxt->i4_min_sad = (u4_is_nonzero == 0) ? 0 : i4_mb_distortion;
774
265k
        }
775
820k
    }
776
18.4E
    else
777
18.4E
    {
778
18.4E
        ps_me_ctxt->pf_ime_compute_sad_16x16[ps_me_ctxt->u4_enable_fast_sad](
779
18.4E
            ps_me_ctxt->pu1_src_buf_luma, pu1_ref, ps_me_ctxt->i4_src_strd,
780
18.4E
            ps_me_ctxt->ai4_rec_strd[i4_reflist], INT_MAX, &i4_mb_distortion);
781
782
18.4E
        if(i4_mb_distortion <= ps_me_ctxt->i4_min_sad)
783
0
        {
784
0
            ps_me_ctxt->i4_min_sad = i4_mb_distortion;
785
0
            ps_me_ctxt->u4_min_sad_reached = 1; /* found min sad */
786
0
        }
787
18.4E
    }
788
789
    /* for skip mode cost & distortion are identical
790
     * But we shall add a bias to favor skip mode.
791
     * Doc. JVT B118 Suggests SKIP_BIAS as 16.
792
     * TODO : Empirical analysis of SKIP_BIAS is necessary */
793
794
819k
    i4_mb_cost = i4_mb_distortion -
795
819k
                 (ps_me_ctxt->u4_lambda_motion *
796
819k
                  (ps_me_ctxt->i4_skip_bias[0] + ps_me_ctxt->i4_skip_bias[1] * i4_is_slice_type_b));
797
798
819k
    if(i4_mb_cost <= ps_smb_part_info->i4_mb_cost)
799
822k
    {
800
822k
        ps_smb_part_info->i4_mb_cost = i4_mb_cost;
801
822k
        ps_smb_part_info->i4_mb_distortion = i4_mb_distortion;
802
822k
        ps_smb_part_info->s_mv_curr.i2_mvx = s_skip_mv.i2_mvx;
803
822k
        ps_smb_part_info->s_mv_curr.i2_mvy = s_skip_mv.i2_mvy;
804
822k
    }
805
819k
}
806
807
/**
808
*******************************************************************************
809
*
810
* @brief
811
*  This function populates the length of the codewords for motion vectors in the
812
*  range (-search range, search range) in pixels
813
*
814
* @param[in] ps_me
815
*  Pointer to me ctxt
816
*
817
* @param[out] pu1_mv_bits
818
*  length of the codeword for all mv's
819
*
820
* @remarks The length of the code words are derived from signed exponential
821
* goloumb codes.
822
*
823
*******************************************************************************
824
*/
825
void isvce_init_mv_bits(isvce_me_ctxt_t *ps_me_ctxt)
826
4.95k
{
827
    /* temp var */
828
4.95k
    WORD32 i, codesize = 3, diff, limit;
829
4.95k
    UWORD32 u4_code_num, u4_range;
830
4.95k
    UWORD32 u4_uev_min, u4_uev_max, u4_sev_min, u4_sev_max;
831
832
    /* max srch range */
833
4.95k
    diff = MAX(DEFAULT_MAX_SRCH_RANGE_X, DEFAULT_MAX_SRCH_RANGE_Y);
834
    /* sub pel */
835
4.95k
    diff <<= 2;
836
    /* delta mv */
837
4.95k
    diff <<= 1;
838
839
    /* codeNum for positive integer     =  2x-1     : Table9-3  */
840
4.95k
    u4_code_num = (diff << 1);
841
842
    /* get range of the bit string and put using put_bits()                 */
843
4.95k
    GETRANGE(u4_range, u4_code_num);
844
845
4.95k
    limit = 2 * u4_range - 1;
846
847
    /* init mv bits */
848
4.95k
    ps_me_ctxt->pu1_mv_bits[0] = 1;
849
850
59.5k
    while(codesize < limit)
851
54.5k
    {
852
54.5k
        u4_uev_min = (1 << (codesize >> 1));
853
54.5k
        u4_uev_max = 2 * u4_uev_min - 1;
854
855
54.5k
        u4_sev_min = u4_uev_min >> 1;
856
54.5k
        u4_sev_max = u4_uev_max >> 1;
857
858
54.5k
        DEBUG("\n%d min, %d max %d codesize", u4_sev_min, u4_sev_max, codesize);
859
860
10.2M
        for(i = u4_sev_min; i <= (WORD32) u4_sev_max; i++)
861
10.1M
        {
862
10.1M
            ps_me_ctxt->pu1_mv_bits[-i] = ps_me_ctxt->pu1_mv_bits[i] = codesize;
863
10.1M
        }
864
865
54.5k
        codesize += 2;
866
54.5k
    }
867
4.95k
}
868
869
/**
870
*******************************************************************************
871
*
872
* @brief Adds valid MVs as initial search candidates for motion estimation by
873
* cheking if it is distinct or not.
874
*
875
* @param[in] ps_search_cand
876
*  MV to add as search candidate
877
*
878
* @param[in] ps_me_ctxt
879
*  pointer to ME context
880
*
881
* @param[in] u4_num_candidates
882
*  Number of inital search candidates value
883
*
884
*******************************************************************************
885
*/
886
static FORCEINLINE void isvce_add_me_init_search_cands(mv_t *ps_search_cand,
887
                                                       isvce_me_ctxt_t *ps_me_ctxt,
888
                                                       WORD32 i4_reflist,
889
                                                       UWORD32 *u4_num_candidates,
890
                                                       bool b_is_max_mv_diff_lt_4)
891
4.38M
{
892
4.38M
    WORD32 k;
893
4.38M
    WORD32 i4_mv_x, i4_mv_y;
894
895
4.38M
    bool b_is_mv_identical = false;
896
897
4.38M
    WORD32 i4_srch_range_n = ps_me_ctxt->i4_srch_range_n;
898
4.38M
    WORD32 i4_srch_range_s = ps_me_ctxt->i4_srch_range_s;
899
4.38M
    WORD32 i4_srch_range_e = ps_me_ctxt->i4_srch_range_e;
900
4.38M
    WORD32 i4_srch_range_w = ps_me_ctxt->i4_srch_range_w;
901
4.38M
    UWORD32 u4_num_init_search_cands = u4_num_candidates[0];
902
903
4.38M
    i4_mv_x = (ps_search_cand->i2_mvx + 2) >> 2;
904
4.38M
    i4_mv_y = (ps_search_cand->i2_mvy + 2) >> 2;
905
906
4.38M
    i4_mv_x = CLIP3(i4_srch_range_w, i4_srch_range_e, i4_mv_x);
907
4.38M
    i4_mv_y = CLIP3(i4_srch_range_n, i4_srch_range_s, i4_mv_y);
908
909
4.38M
    if(u4_num_init_search_cands == 0)
910
883k
    {
911
883k
        b_is_mv_identical = false;
912
883k
    }
913
3.49M
    else
914
3.49M
    {
915
9.89M
        for(k = u4_num_init_search_cands - 1; k >= 0; k--)
916
6.39M
        {
917
6.39M
            if((ps_me_ctxt->as_mv_init_search[i4_reflist][k].i2_mvx == i4_mv_x &&
918
6.39M
                ps_me_ctxt->as_mv_init_search[i4_reflist][k].i2_mvy == i4_mv_y))
919
2.49M
            {
920
2.49M
                b_is_mv_identical = true;
921
2.49M
            }
922
6.39M
        }
923
3.49M
    }
924
925
4.38M
    if(!b_is_mv_identical)
926
1.89M
    {
927
1.89M
        if(USE_ILP_MV_IN_ME && ps_me_ctxt->ps_ilp_me_cands)
928
1.53M
        {
929
1.53M
            if(ps_me_ctxt->ps_ilp_me_cands->u4_num_ilp_mvs < 2 || b_is_max_mv_diff_lt_4)
930
1.22M
            {
931
1.22M
                if(u4_num_init_search_cands < MAX_CAND_IF_NUM_ILP_MV_LT_2)
932
1.22M
                {
933
1.22M
                    ps_me_ctxt->as_mv_init_search[i4_reflist][u4_num_init_search_cands].i2_mvx =
934
1.22M
                        i4_mv_x;
935
1.22M
                    ps_me_ctxt->as_mv_init_search[i4_reflist][u4_num_init_search_cands].i2_mvy =
936
1.22M
                        i4_mv_y;
937
938
1.22M
                    u4_num_candidates[0] += 1;
939
1.22M
                }
940
1.22M
            }
941
318k
            else if(ps_me_ctxt->ps_ilp_me_cands->u4_num_ilp_mvs >= 2 && !b_is_max_mv_diff_lt_4)
942
318k
            {
943
318k
                if(u4_num_init_search_cands < MAX_CAND_IF_NUM_ILP_MV_GTEQ_2)
944
311k
                {
945
311k
                    ps_me_ctxt->as_mv_init_search[i4_reflist][u4_num_init_search_cands].i2_mvx =
946
311k
                        i4_mv_x;
947
311k
                    ps_me_ctxt->as_mv_init_search[i4_reflist][u4_num_init_search_cands].i2_mvy =
948
311k
                        i4_mv_y;
949
950
311k
                    u4_num_candidates[0] += 1;
951
311k
                }
952
318k
            }
953
1.53M
        }
954
353k
        else
955
353k
        {
956
353k
            ps_me_ctxt->as_mv_init_search[i4_reflist][u4_num_init_search_cands].i2_mvx = i4_mv_x;
957
353k
            ps_me_ctxt->as_mv_init_search[i4_reflist][u4_num_init_search_cands].i2_mvy = i4_mv_y;
958
959
353k
            u4_num_candidates[0] += 1;
960
353k
        }
961
1.89M
    }
962
4.38M
}
963
964
/**
965
*******************************************************************************
966
*
967
* @brief Determines the valid candidates for which the initial search shall
968
*happen. The best of these candidates is used to center the diamond pixel
969
*search.
970
*
971
* @par Description: The function sends the skip, (0,0), left, top and top-right
972
* neighbouring MBs MVs. The left, top and top-right MBs MVs are used because
973
* these are the same MVs that are used to form the MV predictor. This initial MV
974
* search candidates need not take care of slice boundaries and hence neighbor
975
* availability checks are not made here.
976
*
977
* @param[in] ps_left_mb_pu
978
*  pointer to left mb motion vector info
979
*
980
* @param[in] ps_top_mb_pu
981
*  pointer to top & top right mb motion vector info
982
*
983
* @param[in] ps_top_left_mb_pu
984
*  pointer to top left mb motion vector info
985
*
986
* @param[out] ps_skip_mv
987
*  pointer to skip motion vectors for the curr mb
988
*
989
* @param[in] i4_mb_x
990
*  mb index x
991
*
992
* @param[in] i4_mb_y
993
*  mb index y
994
*
995
* @param[in] i4_wd_mbs
996
*  pic width in mbs
997
*
998
* @param[in] ps_motionEst
999
*  pointer to me context
1000
*
1001
* @returns  The list of MVs to be used of priming the full pel search and the
1002
* number of such MVs
1003
*
1004
* @remarks
1005
*   Assumptions : 1. Assumes Only partition of size 16x16
1006
*
1007
*******************************************************************************
1008
*/
1009
static void isvce_get_search_candidates(isvce_process_ctxt_t *ps_proc, isvce_me_ctxt_t *ps_me_ctxt,
1010
                                        WORD32 i4_reflist)
1011
883k
{
1012
883k
    mv_t s_zero_mv;
1013
883k
    mv_t *ps_left_mv, *ps_top_mv, *ps_top_left_mv, *ps_top_right_mv;
1014
1015
883k
    UWORD32 i;
1016
883k
    WORD32 i4_left_mode, i4_top_mode, i4_top_left_mode, i4_top_right_mode;
1017
1018
883k
    isvce_codec_t *ps_codec = ps_proc->ps_codec;
1019
883k
    block_neighbors_t *ps_ngbr_avbl = ps_proc->ps_ngbr_avbl;
1020
883k
    mb_part_ctxt *ps_mb_part = &ps_me_ctxt->as_mb_part[i4_reflist];
1021
883k
    ilp_me_cands_t *ps_ilp_me_cands = ps_me_ctxt->ps_ilp_me_cands;
1022
1023
883k
    bool b_is_max_mv_diff_lt_4 = false;
1024
883k
    WORD32 i4_mb_x = ps_proc->i4_mb_x;
1025
18.4E
    WORD32 i4_cmpl_predmode = (i4_reflist == 0) ? L1 : L0;
1026
883k
    UWORD32 u4_num_candidates = 0;
1027
1028
883k
    s_zero_mv.i2_mvx = 0;
1029
883k
    s_zero_mv.i2_mvy = 0;
1030
883k
    ps_left_mv = &ps_proc->s_nbr_info.ps_left_mb_info->as_pu->as_me_info[i4_reflist].s_mv;
1031
883k
    ps_top_mv =
1032
883k
        &(ps_proc->s_nbr_info.ps_top_row_mb_info + i4_mb_x)->as_pu->as_me_info[i4_reflist].s_mv;
1033
883k
    ps_top_left_mv = &ps_proc->s_nbr_info.ps_top_row_mb_info->as_pu->as_me_info[i4_reflist].s_mv;
1034
883k
    ps_top_right_mv =
1035
883k
        &(ps_proc->s_nbr_info.ps_top_row_mb_info + i4_mb_x + 1)->as_pu->as_me_info[i4_reflist].s_mv;
1036
1037
883k
    i4_left_mode =
1038
883k
        ps_ngbr_avbl->u1_mb_a
1039
883k
            ? (ps_proc->s_nbr_info.ps_left_mb_info->as_pu->u1_pred_mode != i4_cmpl_predmode)
1040
883k
            : 0;
1041
883k
    i4_top_mode = ps_ngbr_avbl->u1_mb_b
1042
883k
                      ? ((ps_proc->s_nbr_info.ps_top_row_mb_info + i4_mb_x)->as_pu->u1_pred_mode !=
1043
586k
                         i4_cmpl_predmode)
1044
883k
                      : 0;
1045
883k
    i4_top_right_mode =
1046
883k
        ps_ngbr_avbl->u1_mb_c
1047
883k
            ? ((ps_proc->s_nbr_info.ps_top_row_mb_info + i4_mb_x + 1)->as_pu->u1_pred_mode !=
1048
513k
               i4_cmpl_predmode)
1049
883k
            : 0;
1050
883k
    i4_top_left_mode =
1051
883k
        ps_ngbr_avbl->u1_mb_d
1052
883k
            ? ((ps_proc->s_nbr_info.ps_top_row_mb_info + i4_mb_x - 1)->as_pu->u1_pred_mode !=
1053
513k
               i4_cmpl_predmode)
1054
883k
            : 0;
1055
1056
883k
    if(USE_ILP_MV_IN_ME && ps_ilp_me_cands)
1057
678k
    {
1058
678k
        if(ps_ilp_me_cands->u4_num_ilp_mvs >= 2)
1059
89.7k
        {
1060
89.7k
            b_is_max_mv_diff_lt_4 = isvce_check_max_mv_diff_lt_4(ps_ilp_me_cands, i4_reflist);
1061
89.7k
        }
1062
1063
        /* Taking ILP MV Predictor as one of the candidates */
1064
678k
        if(ps_ilp_me_cands->u4_num_ilp_mvs < 2 || b_is_max_mv_diff_lt_4)
1065
606k
        {
1066
1.09M
            for(i = 0; i < ps_ilp_me_cands->u4_num_ilp_mvs_incl_nbrs; i++)
1067
492k
            {
1068
492k
                if(((ps_ilp_me_cands->ae_pred_mode[i] == ((PRED_MODE_T) i4_reflist)) ||
1069
492k
                    ((ps_ilp_me_cands->ae_pred_mode[i] == BI))))
1070
492k
                {
1071
492k
                    isvce_add_me_init_search_cands(&ps_ilp_me_cands->as_mv[i][i4_reflist].s_mv,
1072
492k
                                                   ps_me_ctxt, i4_reflist, &u4_num_candidates,
1073
492k
                                                   b_is_max_mv_diff_lt_4);
1074
492k
                }
1075
492k
            }
1076
606k
        }
1077
678k
    }
1078
1079
    /* Taking the Top MV Predictor as one of the candidates     */
1080
883k
    if(ps_ngbr_avbl->u1_mb_b && i4_top_mode)
1081
586k
    {
1082
586k
        isvce_add_me_init_search_cands(ps_top_mv, ps_me_ctxt, i4_reflist, &u4_num_candidates,
1083
586k
                                       b_is_max_mv_diff_lt_4);
1084
586k
    }
1085
1086
    /* Taking the Left MV Predictor as one of the candidates    */
1087
883k
    if(ps_ngbr_avbl->u1_mb_a && i4_left_mode)
1088
772k
    {
1089
772k
        isvce_add_me_init_search_cands(ps_left_mv, ps_me_ctxt, i4_reflist, &u4_num_candidates,
1090
772k
                                       b_is_max_mv_diff_lt_4);
1091
772k
    }
1092
1093
    /********************************************************************/
1094
    /*                            MV Prediction                         */
1095
    /********************************************************************/
1096
883k
    isvce_mv_pred_me(ps_proc, i4_reflist);
1097
1098
883k
    ps_mb_part->s_mv_pred.i2_mvx = ps_proc->ps_pred_mv[i4_reflist].s_mv.i2_mvx;
1099
883k
    ps_mb_part->s_mv_pred.i2_mvy = ps_proc->ps_pred_mv[i4_reflist].s_mv.i2_mvy;
1100
1101
    /* Get the skip motion vector                               */
1102
883k
    {
1103
883k
        ps_me_ctxt->i4_skip_type =
1104
883k
            ps_codec->apf_find_skip_params_me[ps_proc->i4_slice_type](ps_proc, i4_reflist);
1105
1106
        /* Taking the Skip motion vector as one of the candidates   */
1107
883k
        isvce_add_me_init_search_cands(&ps_proc->ps_skip_mv[i4_reflist].s_mv, ps_me_ctxt,
1108
883k
                                       i4_reflist, &u4_num_candidates, b_is_max_mv_diff_lt_4);
1109
1110
883k
        if(ps_proc->i4_slice_type == BSLICE)
1111
0
        {
1112
            /* Taking the temporal Skip motion vector as one of the candidates   */
1113
0
            isvce_add_me_init_search_cands(&ps_proc->ps_skip_mv[i4_reflist + 2].s_mv, ps_me_ctxt,
1114
0
                                           i4_reflist, &u4_num_candidates, b_is_max_mv_diff_lt_4);
1115
0
        }
1116
883k
    }
1117
1118
    /* Taking ILP MV Predictor as one of the candidates */
1119
883k
    if(USE_ILP_MV_IN_ME && ps_ilp_me_cands &&
1120
883k
       (ps_ilp_me_cands->u4_num_ilp_mvs >= 2 && !b_is_max_mv_diff_lt_4))
1121
72.4k
    {
1122
280k
        for(i = 0; i < ps_ilp_me_cands->u4_num_ilp_mvs_incl_nbrs; i++)
1123
207k
        {
1124
207k
            if(((ps_ilp_me_cands->ae_pred_mode[i] == ((PRED_MODE_T) i4_reflist)) ||
1125
207k
                ((ps_ilp_me_cands->ae_pred_mode[i] == BI))))
1126
207k
            {
1127
207k
                isvce_add_me_init_search_cands(&ps_ilp_me_cands->as_mv[i][i4_reflist].s_mv,
1128
207k
                                               ps_me_ctxt, i4_reflist, &u4_num_candidates,
1129
207k
                                               b_is_max_mv_diff_lt_4);
1130
207k
            }
1131
207k
        }
1132
72.4k
    }
1133
1134
883k
    if(ps_ngbr_avbl->u1_mb_b && i4_top_mode)
1135
586k
    {
1136
        /* Taking the TopRt MV Predictor as one of the candidates   */
1137
586k
        if(ps_ngbr_avbl->u1_mb_c && i4_top_right_mode)
1138
513k
        {
1139
513k
            isvce_add_me_init_search_cands(ps_top_right_mv, ps_me_ctxt, i4_reflist,
1140
513k
                                           &u4_num_candidates, b_is_max_mv_diff_lt_4);
1141
513k
        }
1142
1143
        /* Taking the TopLt MV Predictor as one of the candidates   */
1144
72.8k
        else if(ps_ngbr_avbl->u1_mb_d && i4_top_left_mode)
1145
70.1k
        {
1146
70.1k
            isvce_add_me_init_search_cands(ps_top_left_mv, ps_me_ctxt, i4_reflist,
1147
70.1k
                                           &u4_num_candidates, b_is_max_mv_diff_lt_4);
1148
70.1k
        }
1149
586k
    }
1150
1151
    /* Taking the Zero motion vector as one of the candidates   */
1152
883k
    isvce_add_me_init_search_cands(&s_zero_mv, ps_me_ctxt, i4_reflist, &u4_num_candidates,
1153
883k
                                   b_is_max_mv_diff_lt_4);
1154
1155
883k
    ASSERT(u4_num_candidates <= MAX_FPEL_SEARCH_CANDIDATES);
1156
1157
882k
    ps_me_ctxt->u4_num_candidates[i4_reflist] = u4_num_candidates;
1158
882k
}
1159
1160
/**
1161
*******************************************************************************
1162
*
1163
* @brief The function computes parameters for a PSKIP MB
1164
*
1165
* @par Description:
1166
*  The function updates the skip motion vector and checks if the current
1167
*  MB can be a skip PSKIP mB or not
1168
*
1169
* @param[in] ps_proc
1170
*  Pointer to process context
1171
*
1172
* @param[in] u4_for_me
1173
*  Flag to indicate function is called for ME or not
1174
*
1175
* @param[out] i4_ref_list
1176
*  Current active refernce list
1177
*
1178
* @returns Flag indicating if the current MB can be marked as skip
1179
*
1180
* @remarks The code implements the logic as described in sec 8.4.1.2.2 in H264
1181
*   specification.
1182
*
1183
*******************************************************************************
1184
*/
1185
WORD32 isvce_find_pskip_params(isvce_process_ctxt_t *ps_proc, WORD32 i4_reflist)
1186
228k
{
1187
    /* left mb motion vector */
1188
228k
    isvce_enc_pu_t *ps_left_mb_pu;
1189
1190
    /* top mb motion vector */
1191
228k
    isvce_enc_pu_t *ps_top_mb_pu;
1192
1193
    /* Skip mv */
1194
228k
    mv_t *ps_skip_mv = &ps_proc->ps_skip_mv[L0].s_mv;
1195
1196
228k
    UNUSED(i4_reflist);
1197
1198
228k
    ps_left_mb_pu = ps_proc->s_nbr_info.ps_left_mb_info->as_pu;
1199
228k
    ps_top_mb_pu = (ps_proc->s_nbr_info.ps_top_row_mb_info + ps_proc->i4_mb_x)->as_pu;
1200
1201
228k
    if((!ps_proc->ps_ngbr_avbl->u1_mb_a) || (!ps_proc->ps_ngbr_avbl->u1_mb_b) ||
1202
228k
       ((ps_left_mb_pu->as_me_info[L0].i1_ref_idx == 0) &&
1203
135k
        (ps_left_mb_pu->as_me_info[L0].s_mv.i2_mvx == 0) &&
1204
135k
        (ps_left_mb_pu->as_me_info[L0].s_mv.i2_mvy == 0)) ||
1205
228k
       ((ps_top_mb_pu->as_me_info[L0].i1_ref_idx == 0) &&
1206
28.2k
        (ps_top_mb_pu->as_me_info[L0].s_mv.i2_mvx == 0) &&
1207
28.2k
        (ps_top_mb_pu->as_me_info[L0].s_mv.i2_mvy == 0)))
1208
1209
209k
    {
1210
209k
        ps_skip_mv->i2_mvx = 0;
1211
209k
        ps_skip_mv->i2_mvy = 0;
1212
209k
    }
1213
19.0k
    else
1214
19.0k
    {
1215
19.0k
        ps_skip_mv->i2_mvx = ps_proc->ps_pred_mv[L0].s_mv.i2_mvx;
1216
19.0k
        ps_skip_mv->i2_mvy = ps_proc->ps_pred_mv[L0].s_mv.i2_mvy;
1217
19.0k
    }
1218
1219
228k
    if((ps_proc->ps_mb_info->as_pu->as_me_info[L0].s_mv.i2_mvx == ps_skip_mv->i2_mvx) &&
1220
228k
       (ps_proc->ps_mb_info->as_pu->as_me_info[L0].s_mv.i2_mvy == ps_skip_mv->i2_mvy))
1221
200k
    {
1222
200k
        return 1;
1223
200k
    }
1224
1225
27.4k
    return 0;
1226
228k
}
1227
1228
/**
1229
*******************************************************************************
1230
*
1231
* @brief The function computes parameters for a PSKIP MB
1232
*
1233
* @par Description:
1234
*  The function updates the skip motion vector and checks if the current
1235
*  MB can be a skip PSKIP mB or not
1236
*
1237
* @param[in] ps_proc
1238
*  Pointer to process context
1239
*
1240
* @param[in] u4_for_me
1241
*  Flag to dincate fucntion is called for ME or not
1242
*
1243
* @param[out] i4_ref_list
1244
*  Current active refernce list
1245
*
1246
* @returns Flag indicating if the current MB can be marked as skip
1247
*
1248
* @remarks The code implements the logic as described in sec 8.4.1.2.2 in H264
1249
*   specification.
1250
*
1251
*******************************************************************************
1252
*/
1253
WORD32 isvce_find_pskip_params_me(isvce_process_ctxt_t *ps_proc, WORD32 i4_reflist)
1254
884k
{
1255
    /* left mb motion vector */
1256
884k
    isvce_enc_pu_t *ps_left_mb_pu;
1257
1258
    /* top mb motion vector */
1259
884k
    isvce_enc_pu_t *ps_top_mb_pu;
1260
1261
    /* Skip mv */
1262
884k
    mv_t *ps_skip_mv = &ps_proc->ps_skip_mv[L0].s_mv;
1263
1264
884k
    UNUSED(i4_reflist);
1265
1266
884k
    ps_left_mb_pu = ps_proc->s_nbr_info.ps_left_mb_info->as_pu;
1267
884k
    ps_top_mb_pu = (ps_proc->s_nbr_info.ps_top_row_mb_info + ps_proc->i4_mb_x)->as_pu;
1268
1269
884k
    if((!ps_proc->ps_ngbr_avbl->u1_mb_a) || (!ps_proc->ps_ngbr_avbl->u1_mb_b) ||
1270
884k
       ((ps_left_mb_pu->as_me_info[L0].i1_ref_idx == 0) &&
1271
514k
        (ps_left_mb_pu->as_me_info[L0].s_mv.i2_mvx == 0) &&
1272
514k
        (ps_left_mb_pu->as_me_info[L0].s_mv.i2_mvy == 0)) ||
1273
884k
       ((ps_top_mb_pu->as_me_info[L0].i1_ref_idx == 0) &&
1274
365k
        (ps_top_mb_pu->as_me_info[L0].s_mv.i2_mvx == 0) &&
1275
365k
        (ps_top_mb_pu->as_me_info[L0].s_mv.i2_mvy == 0)))
1276
1277
565k
    {
1278
565k
        ps_skip_mv->i2_mvx = 0;
1279
565k
        ps_skip_mv->i2_mvy = 0;
1280
565k
    }
1281
319k
    else
1282
319k
    {
1283
319k
        ps_skip_mv->i2_mvx = ps_proc->ps_pred_mv[L0].s_mv.i2_mvx;
1284
319k
        ps_skip_mv->i2_mvy = ps_proc->ps_pred_mv[L0].s_mv.i2_mvy;
1285
319k
    }
1286
1287
884k
    return L0;
1288
884k
}
1289
1290
/**
1291
*******************************************************************************
1292
*
1293
* @brief motion vector predictor
1294
*
1295
* @par Description:
1296
*  The routine calculates the motion vector predictor for a given block,
1297
*  given the candidate MV predictors.
1298
*
1299
* @param[in] ps_left_mb_pu
1300
*  pointer to left mb motion vector info
1301
*
1302
* @param[in] ps_top_row_pu
1303
*  pointer to top & top right mb motion vector info
1304
*
1305
* @param[out] ps_pred_mv
1306
*  pointer to candidate predictors for the current block
1307
*
1308
* @returns  The x & y components of the MV predictor.
1309
*
1310
* @remarks The code implements the logic as described in sec 8.4.1.3 in H264
1311
*   specification.
1312
*   Assumptions : 1. Assumes Single reference frame
1313
*                 2. Assumes Only partition of size 16x16
1314
*
1315
*******************************************************************************
1316
*/
1317
void isvce_get_mv_predictor(isvce_enc_pu_mv_t *ps_pred_mv, isvce_enc_pu_mv_t *ps_neig_mv,
1318
                            WORD32 pred_algo)
1319
1.45M
{
1320
1.45M
    switch(pred_algo)
1321
1.45M
    {
1322
34.2k
        case 0:
1323
            /* left */
1324
34.2k
            ps_pred_mv->s_mv.i2_mvx = ps_neig_mv[0].s_mv.i2_mvx;
1325
34.2k
            ps_pred_mv->s_mv.i2_mvy = ps_neig_mv[0].s_mv.i2_mvy;
1326
34.2k
            break;
1327
48.2k
        case 1:
1328
            /* top */
1329
48.2k
            ps_pred_mv->s_mv.i2_mvx = ps_neig_mv[1].s_mv.i2_mvx;
1330
48.2k
            ps_pred_mv->s_mv.i2_mvy = ps_neig_mv[1].s_mv.i2_mvy;
1331
48.2k
            break;
1332
31.4k
        case 2:
1333
            /* top right */
1334
31.4k
            ps_pred_mv->s_mv.i2_mvx = ps_neig_mv[2].s_mv.i2_mvx;
1335
31.4k
            ps_pred_mv->s_mv.i2_mvy = ps_neig_mv[2].s_mv.i2_mvy;
1336
31.4k
            break;
1337
1.33M
        case 3:
1338
            /* median */
1339
1.33M
            MEDIAN(ps_neig_mv[0].s_mv.i2_mvx, ps_neig_mv[1].s_mv.i2_mvx, ps_neig_mv[2].s_mv.i2_mvx,
1340
1.33M
                   ps_pred_mv->s_mv.i2_mvx);
1341
1.33M
            MEDIAN(ps_neig_mv[0].s_mv.i2_mvy, ps_neig_mv[1].s_mv.i2_mvy, ps_neig_mv[2].s_mv.i2_mvy,
1342
1.33M
                   ps_pred_mv->s_mv.i2_mvy);
1343
1344
1.33M
            break;
1345
0
        default:
1346
0
            break;
1347
1.45M
    }
1348
1.45M
}
1349
1350
/**
1351
*******************************************************************************
1352
*
1353
* @brief This function performs MV prediction
1354
*
1355
* @par Description:
1356
*
1357
* @param[in] ps_proc
1358
*  Process context corresponding to the job
1359
*
1360
* @returns  none
1361
*
1362
* @remarks none
1363
*  This function will update the MB availability since intra inter decision
1364
*  should be done before the call
1365
*
1366
*******************************************************************************
1367
*/
1368
void isvce_mv_pred(isvce_process_ctxt_t *ps_proc, WORD32 i4_slice_type)
1369
566k
{
1370
566k
    isvce_enc_pu_mv_t as_pu_mv[3];
1371
1372
566k
    UWORD8 u1_reflist, u1_cmpl_predmode;
1373
566k
    WORD32 i;
1374
1375
566k
    isvce_enc_pu_mv_t *ps_pred_mv = ps_proc->ps_pred_mv;
1376
566k
    isvce_enc_pu_mv_t s_default_mv_info = {{0, 0}, -1};
1377
566k
    block_neighbors_t *ps_ngbr_avbl = ps_proc->ps_ngbr_avbl;
1378
566k
    isvce_mb_info_t *ps_top_mb = ps_proc->s_nbr_info.ps_top_row_mb_info + ps_proc->i4_mb_x;
1379
566k
    isvce_mb_info_t *ps_top_left_mb = ps_top_mb - 1;
1380
566k
    isvce_mb_info_t *ps_top_right_mb = ps_top_mb + 1;
1381
566k
    isvce_mb_info_t *ps_left_mb = ps_proc->s_nbr_info.ps_left_mb_info;
1382
1383
566k
    UWORD8 u1_left_is_intra = ps_left_mb->u1_is_intra;
1384
566k
    UWORD8 u1_num_ref_lists = (i4_slice_type == PSLICE) ? 1 : 2;
1385
1386
1.13M
    for(u1_reflist = 0; u1_reflist < u1_num_ref_lists; u1_reflist++)
1387
565k
    {
1388
565k
        WORD8 i1_cur_ref_idx = 0;
1389
1390
565k
        WORD32 pred_algo = 3, a, b, c;
1391
1392
2.26M
        for(i = 0; i < 3; i++)
1393
1.69M
        {
1394
1.69M
            as_pu_mv[i] = s_default_mv_info;
1395
1.69M
        }
1396
1397
18.4E
        u1_cmpl_predmode = (u1_reflist == 0) ? L1 : L0;
1398
1399
        /* Before performing mv prediction prepare the ngbr information and
1400
         * reset motion vectors basing on their availability */
1401
565k
        if(ps_ngbr_avbl->u1_mb_a && (u1_left_is_intra != 1) &&
1402
565k
           (ps_left_mb->as_pu->u1_pred_mode != u1_cmpl_predmode))
1403
433k
        {
1404
            /* left mv */
1405
433k
            as_pu_mv[0].s_mv = ps_left_mb->as_pu->as_me_info[u1_reflist].s_mv;
1406
433k
            as_pu_mv[0].i1_ref_idx = ps_left_mb->as_pu->as_me_info[u1_reflist].i1_ref_idx;
1407
1408
            /* Only left available */
1409
433k
            if(!ps_ngbr_avbl->u1_mb_b && !ps_ngbr_avbl->u1_mb_c && !ps_ngbr_avbl->u1_mb_d)
1410
181k
            {
1411
181k
                as_pu_mv[1].s_mv = ps_left_mb->as_pu->as_me_info[u1_reflist].s_mv;
1412
181k
                as_pu_mv[1].i1_ref_idx = ps_left_mb->as_pu->as_me_info[u1_reflist].i1_ref_idx;
1413
1414
181k
                as_pu_mv[2].s_mv = ps_left_mb->as_pu->as_me_info[u1_reflist].s_mv;
1415
181k
                as_pu_mv[2].i1_ref_idx = ps_left_mb->as_pu->as_me_info[u1_reflist].i1_ref_idx;
1416
181k
            }
1417
433k
        }
1418
565k
        if(ps_ngbr_avbl->u1_mb_b && !ps_top_mb->u1_is_intra &&
1419
565k
           (ps_top_mb->as_pu[0].u1_pred_mode != u1_cmpl_predmode))
1420
302k
        {
1421
            /* top mv */
1422
302k
            as_pu_mv[1].s_mv = ps_top_mb->as_pu[0].as_me_info[u1_reflist].s_mv;
1423
302k
            as_pu_mv[1].i1_ref_idx = ps_top_mb->as_pu[0].as_me_info[u1_reflist].i1_ref_idx;
1424
302k
        }
1425
1426
565k
        if(!ps_ngbr_avbl->u1_mb_c)
1427
266k
        {
1428
            /* top right mv - When top right partition is not available for
1429
             * prediction if top left is available use it for prediction else
1430
             * set the mv information to -1 and (0, 0)
1431
             * */
1432
266k
            if(ps_ngbr_avbl->u1_mb_d && !ps_top_left_mb->u1_is_intra &&
1433
266k
               (ps_top_left_mb->as_pu->u1_pred_mode != u1_cmpl_predmode))
1434
29.4k
            {
1435
29.4k
                as_pu_mv[2].s_mv = ps_top_left_mb->as_pu[0].as_me_info[u1_reflist].s_mv;
1436
29.4k
                as_pu_mv[2].i1_ref_idx = ps_top_left_mb->as_pu[0].as_me_info[u1_reflist].i1_ref_idx;
1437
29.4k
            }
1438
266k
        }
1439
299k
        else if(ps_top_right_mb->as_pu->u1_pred_mode != u1_cmpl_predmode &&
1440
300k
                !ps_top_right_mb->u1_is_intra)
1441
251k
        {
1442
251k
            as_pu_mv[2].s_mv = ps_top_right_mb->as_pu->as_me_info[u1_reflist].s_mv;
1443
251k
            as_pu_mv[2].i1_ref_idx = ps_top_right_mb->as_pu->as_me_info[u1_reflist].i1_ref_idx;
1444
251k
        }
1445
1446
        /* If only one of the candidate blocks has a reference frame equal to
1447
         * the current block then use the same block as the final predictor */
1448
565k
        a = (as_pu_mv[0].i1_ref_idx == i1_cur_ref_idx) ? 0 : -1;
1449
565k
        b = (as_pu_mv[1].i1_ref_idx == i1_cur_ref_idx) ? 0 : -1;
1450
565k
        c = (as_pu_mv[2].i1_ref_idx == i1_cur_ref_idx) ? 0 : -1;
1451
565k
        if(a == 0 && b == -1 && c == -1)
1452
11.5k
            pred_algo = 0; /* LEFT */
1453
554k
        else if(a == -1 && b == 0 && c == -1)
1454
15.4k
            pred_algo = 1; /* TOP */
1455
538k
        else if(a == -1 && b == -1 && c == 0)
1456
6.12k
            pred_algo = 2;
1457
1458
565k
        isvce_get_mv_predictor(&ps_pred_mv[u1_reflist], &as_pu_mv[0], pred_algo);
1459
1460
565k
        ps_pred_mv[u1_reflist].i1_ref_idx = i1_cur_ref_idx;
1461
565k
    }
1462
566k
}
1463
1464
/**
1465
*******************************************************************************
1466
*
1467
* @brief This function approximates Pred. MV
1468
*
1469
* @par Description:
1470
*
1471
* @param[in] ps_proc
1472
*  Process context corresponding to the job
1473
*
1474
* @returns  none
1475
*
1476
* @remarks none
1477
*  Motion estimation happens at nmb level. For cost calculations, mv is appro
1478
*  ximated using this function
1479
*
1480
*******************************************************************************
1481
*/
1482
void isvce_mv_pred_me(isvce_process_ctxt_t *ps_proc, WORD32 i4_ref_list)
1483
884k
{
1484
884k
    isvce_enc_pu_mv_t as_pu_mv[3];
1485
1486
884k
    WORD32 i, a, b, c;
1487
1488
884k
    isvce_enc_pu_mv_t *ps_pred_mv = ps_proc->ps_pred_mv;
1489
884k
    isvce_enc_pu_mv_t s_default_mv_info = {{0, 0}, -1};
1490
884k
    block_neighbors_t *ps_ngbr_avbl = ps_proc->ps_ngbr_avbl;
1491
884k
    isvce_mb_info_t *ps_top_mb = ps_proc->s_nbr_info.ps_top_row_mb_info + ps_proc->i4_mb_x;
1492
884k
    isvce_mb_info_t *ps_top_left_mb = ps_top_mb - 1;
1493
884k
    isvce_mb_info_t *ps_top_right_mb = ps_top_mb + 1;
1494
884k
    isvce_mb_info_t *ps_left_mb = ps_proc->s_nbr_info.ps_left_mb_info;
1495
1496
884k
    WORD8 i1_cur_ref_idx = 0;
1497
884k
    WORD32 i4_cmpl_predmode = (i4_ref_list == 0) ? L1 : L0;
1498
884k
    WORD32 pred_algo = 3;
1499
1500
3.53M
    for(i = 0; i < 3; i++)
1501
2.65M
    {
1502
2.65M
        as_pu_mv[i] = s_default_mv_info;
1503
2.65M
    }
1504
1505
884k
    if(ps_ngbr_avbl->u1_mb_a && !ps_left_mb->u1_is_intra &&
1506
884k
       (ps_left_mb->as_pu->u1_pred_mode != i4_cmpl_predmode))
1507
501k
    {
1508
        /* left mv */
1509
501k
        as_pu_mv[0].s_mv = ps_left_mb->as_pu->as_me_info[i4_ref_list].s_mv;
1510
501k
        as_pu_mv[0].i1_ref_idx = ps_left_mb->as_pu->as_me_info[i4_ref_list].i1_ref_idx;
1511
1512
        /* Only left available */
1513
501k
        if(!ps_ngbr_avbl->u1_mb_b && !ps_ngbr_avbl->u1_mb_c && !ps_ngbr_avbl->u1_mb_d)
1514
201k
        {
1515
201k
            as_pu_mv[1].s_mv = ps_left_mb->as_pu->as_me_info[i4_ref_list].s_mv;
1516
201k
            as_pu_mv[1].i1_ref_idx = ps_left_mb->as_pu->as_me_info[i4_ref_list].i1_ref_idx;
1517
1518
201k
            as_pu_mv[2].s_mv = ps_left_mb->as_pu->as_me_info[i4_ref_list].s_mv;
1519
201k
            as_pu_mv[2].i1_ref_idx = ps_left_mb->as_pu->as_me_info[i4_ref_list].i1_ref_idx;
1520
201k
        }
1521
501k
    }
1522
884k
    if(ps_ngbr_avbl->u1_mb_b && !ps_top_mb->u1_is_intra &&
1523
884k
       (ps_top_mb->as_pu->u1_pred_mode != i4_cmpl_predmode))
1524
376k
    {
1525
        /* top mv */
1526
376k
        as_pu_mv[1].s_mv = ps_top_mb->as_pu->as_me_info[i4_ref_list].s_mv;
1527
376k
        as_pu_mv[1].i1_ref_idx = ps_top_mb->as_pu->as_me_info[i4_ref_list].i1_ref_idx;
1528
376k
    }
1529
884k
    if(!ps_ngbr_avbl->u1_mb_c)
1530
371k
    {
1531
        /* top right mv - When top right partition is not available for
1532
         * prediction if top left is available use it for prediction else
1533
         * set the mv information to -1 and (0, 0)
1534
         * */
1535
371k
        if(ps_ngbr_avbl->u1_mb_d && !ps_top_left_mb->u1_is_intra &&
1536
371k
           (ps_top_left_mb->as_pu->u1_pred_mode != i4_cmpl_predmode))
1537
44.4k
        {
1538
44.4k
            as_pu_mv[2].s_mv = ps_top_left_mb->as_pu->as_me_info[i4_ref_list].s_mv;
1539
44.4k
            as_pu_mv[2].i1_ref_idx = ps_top_left_mb->as_pu->as_me_info[i4_ref_list].i1_ref_idx;
1540
44.4k
        }
1541
371k
    }
1542
512k
    else if(ps_top_right_mb->as_pu->u1_pred_mode != i4_cmpl_predmode &&
1543
513k
            !ps_top_right_mb->u1_is_intra)
1544
324k
    {
1545
324k
        as_pu_mv[2].s_mv = ps_top_right_mb->as_pu->as_me_info[i4_ref_list].s_mv;
1546
324k
        as_pu_mv[2].i1_ref_idx = ps_top_right_mb->as_pu->as_me_info[i4_ref_list].i1_ref_idx;
1547
324k
    }
1548
1549
    /* If only one of the candidate blocks has a reference frame equal to
1550
     * the current block then use the same block as the final predictor */
1551
884k
    a = (as_pu_mv[0].i1_ref_idx == i1_cur_ref_idx) ? 0 : -1;
1552
884k
    b = (as_pu_mv[1].i1_ref_idx == i1_cur_ref_idx) ? 0 : -1;
1553
884k
    c = (as_pu_mv[2].i1_ref_idx == i1_cur_ref_idx) ? 0 : -1;
1554
1555
884k
    if(a == 0 && b == -1 && c == -1)
1556
22.6k
        pred_algo = 0; /* LEFT */
1557
861k
    else if(a == -1 && b == 0 && c == -1)
1558
32.8k
        pred_algo = 1; /* TOP */
1559
828k
    else if(a == -1 && b == -1 && c == 0)
1560
25.2k
        pred_algo = 2;
1561
1562
884k
    isvce_get_mv_predictor(&ps_pred_mv[i4_ref_list], &as_pu_mv[0], pred_algo);
1563
884k
}
1564
1565
/**
1566
*******************************************************************************
1567
*
1568
* @brief This function initializes me ctxt
1569
*
1570
* @par Description:
1571
*  Before dispatching the current job to me thread, the me context associated
1572
*  with the job is initialized.
1573
*
1574
* @param[in] ps_proc
1575
*  Process context corresponding to the job
1576
*
1577
* @returns  none
1578
*
1579
* @remarks none
1580
*
1581
*******************************************************************************
1582
*/
1583
void isvce_init_me(isvce_process_ctxt_t *ps_proc)
1584
884k
{
1585
884k
    isvce_me_ctxt_t *ps_me_ctxt = &ps_proc->s_me_ctxt;
1586
884k
    isvce_codec_t *ps_codec = ps_proc->ps_codec;
1587
1588
884k
    ps_me_ctxt->i4_skip_bias[BSLICE] = SKIP_BIAS_B;
1589
1590
884k
    if(ps_codec->s_cfg.u4_num_bframes == 0)
1591
884k
    {
1592
884k
        ps_me_ctxt->i4_skip_bias[PSLICE] = 4 * SKIP_BIAS_P;
1593
884k
    }
1594
12
    else
1595
12
    {
1596
12
        ps_me_ctxt->i4_skip_bias[PSLICE] = SKIP_BIAS_P;
1597
12
    }
1598
1599
884k
    ps_me_ctxt->pu1_src_buf_luma = ps_proc->s_src_buf_props.as_component_bufs[0].pv_data;
1600
884k
    ps_me_ctxt->i4_src_strd = ps_proc->s_src_buf_props.as_component_bufs[0].i4_data_stride;
1601
1602
884k
    ps_me_ctxt->apu1_ref_buf_luma[0] = ps_proc->as_ref_buf_props[0].as_component_bufs[0].pv_data;
1603
884k
    ps_me_ctxt->apu1_ref_buf_luma[1] = ps_proc->as_ref_buf_props[1].as_component_bufs[0].pv_data;
1604
1605
884k
    ps_me_ctxt->ai4_rec_strd[0] = ps_proc->as_ref_buf_props[0].as_component_bufs[0].i4_data_stride;
1606
884k
    ps_me_ctxt->ai4_rec_strd[1] = ps_proc->as_ref_buf_props[1].as_component_bufs[0].i4_data_stride;
1607
1608
884k
    ps_me_ctxt->u4_lambda_motion = gu1_qp0[ps_me_ctxt->u1_mb_qp];
1609
884k
}
1610
1611
/**
1612
*******************************************************************************
1613
*
1614
* @brief This function performs motion estimation for the current mb using
1615
*   single reference list
1616
*
1617
* @par Description:
1618
*  The current mb is compared with a list of mb's in the reference frame for
1619
*  least cost. The mb that offers least cost is chosen as predicted mb and the
1620
*  displacement of the predicted mb from index location of the current mb is
1621
*  signaled as mv. The list of the mb's that are chosen in the reference frame
1622
*  are dependent on the speed of the ME configured.
1623
*
1624
* @param[in] ps_proc
1625
*  Process context corresponding to the job
1626
*
1627
* @returns  motion vector of the pred mb, sad, cost.
1628
*
1629
* @remarks none
1630
*
1631
*******************************************************************************
1632
*/
1633
void isvce_compute_me_single_reflist(isvce_process_ctxt_t *ps_proc)
1634
883k
{
1635
883k
    mb_part_ctxt s_skip_mbpart;
1636
1637
    /* source buffer for halp pel generation functions */
1638
883k
    UWORD8 *pu1_hpel_src;
1639
1640
883k
    isvce_me_ctxt_t *ps_me_ctxt = &ps_proc->s_me_ctxt;
1641
883k
    isvce_codec_t *ps_codec = ps_proc->ps_codec;
1642
883k
    quant_params_t *ps_qp_params = ps_proc->ps_qp_params[0];
1643
883k
    isa_dependent_fxns_t *ps_isa_dependent_fxns = &ps_codec->s_isa_dependent_fxns;
1644
883k
    inter_pred_fxns_t *ps_inter_pred_fxns = &ps_isa_dependent_fxns->s_inter_pred_fxns;
1645
1646
883k
    ps_me_ctxt->pu2_sad_thrsh = ps_qp_params->pu2_sad_thrsh;
1647
1648
883k
    ASSERT(1 == MAX_REF_FRAMES_PER_PRED_DIR);
1649
1650
883k
    {
1651
883k
        WORD32 rows_above, rows_below, columns_left, columns_right;
1652
1653
        /* During evaluation for motion vectors do not search through padded regions
1654
         */
1655
        /* Obtain number of rows and columns that are effective for computing for me
1656
         * evaluation */
1657
883k
        rows_above = MB_SIZE + ps_proc->i4_mb_y * MB_SIZE;
1658
883k
        rows_below = (ps_proc->i4_ht_mbs - ps_proc->i4_mb_y) * MB_SIZE;
1659
883k
        columns_left = MB_SIZE + ps_proc->i4_mb_x * MB_SIZE;
1660
883k
        columns_right = (ps_proc->i4_wd_mbs - ps_proc->i4_mb_x) * MB_SIZE;
1661
1662
        /* init srch range */
1663
        /* NOTE : For now, lets limit the search range by DEFAULT_MAX_SRCH_RANGE_X /
1664
         * 2 on all sides.
1665
         */
1666
883k
        ps_me_ctxt->i4_srch_range_w = -MIN(columns_left, DEFAULT_MAX_SRCH_RANGE_X >> 1);
1667
883k
        ps_me_ctxt->i4_srch_range_e = MIN(columns_right, DEFAULT_MAX_SRCH_RANGE_X >> 1);
1668
883k
        ps_me_ctxt->i4_srch_range_n = -MIN(rows_above, DEFAULT_MAX_SRCH_RANGE_Y >> 1);
1669
883k
        ps_me_ctxt->i4_srch_range_s = MIN(rows_below, DEFAULT_MAX_SRCH_RANGE_Y >> 1);
1670
1671
        /* this is to facilitate fast sub pel computation with minimal loads */
1672
883k
        ps_me_ctxt->i4_srch_range_w += 1;
1673
883k
        ps_me_ctxt->i4_srch_range_e -= 1;
1674
883k
        ps_me_ctxt->i4_srch_range_n += 1;
1675
883k
        ps_me_ctxt->i4_srch_range_s -= 1;
1676
883k
    }
1677
1678
    /***********************************************************************
1679
     * Compute ME for list L0
1680
     ***********************************************************************/
1681
1682
    /* Init SATQD for the current list */
1683
883k
    ps_me_ctxt->u4_min_sad_reached = 0;
1684
883k
    ps_me_ctxt->i4_min_sad = ps_proc->ps_cur_mb->u4_min_sad;
1685
1686
    /* Get the seed motion vector candidates                    */
1687
883k
    isvce_get_search_candidates(ps_proc, ps_me_ctxt, L0);
1688
1689
    /* ****************************************************************
1690
     *Evaluate the SKIP for current list
1691
     * ****************************************************************/
1692
883k
    s_skip_mbpart.s_mv_curr.i2_mvx = 0;
1693
883k
    s_skip_mbpart.s_mv_curr.i2_mvy = 0;
1694
883k
    s_skip_mbpart.i4_mb_cost = INT_MAX;
1695
883k
    s_skip_mbpart.i4_mb_distortion = INT_MAX;
1696
1697
883k
    isvce_compute_skip_cost(ps_me_ctxt, (ime_mv_t *) (&ps_proc->ps_skip_mv[L0].s_mv),
1698
883k
                            &s_skip_mbpart, ps_codec->s_cfg.u4_enable_satqd, PRED_L0,
1699
883k
                            0 /* Not a Bslice */);
1700
1701
883k
    s_skip_mbpart.s_mv_curr.i2_mvx <<= 2;
1702
883k
    s_skip_mbpart.s_mv_curr.i2_mvy <<= 2;
1703
1704
    /******************************************************************
1705
     * Evaluate ME For current list
1706
     *****************************************************************/
1707
883k
    ps_me_ctxt->as_mb_part[L0].s_mv_curr.i2_mvx = 0;
1708
883k
    ps_me_ctxt->as_mb_part[L0].s_mv_curr.i2_mvy = 0;
1709
883k
    ps_me_ctxt->as_mb_part[L0].i4_mb_cost = INT_MAX;
1710
883k
    ps_me_ctxt->as_mb_part[L0].i4_mb_distortion = INT_MAX;
1711
1712
    /* Init Hpel */
1713
883k
    ps_me_ctxt->as_mb_part[L0].pu1_best_hpel_buf = NULL;
1714
1715
    /* In case we found out the minimum SAD, exit the ME eval */
1716
883k
    if(!ps_me_ctxt->u4_min_sad_reached)
1717
619k
    {
1718
        /* Evaluate search candidates for initial mv pt */
1719
619k
        isvce_evaluate_init_srchposn_16x16(ps_me_ctxt, L0);
1720
1721
        /********************************************************************/
1722
        /*                  full pel motion estimation                      */
1723
        /********************************************************************/
1724
619k
        isvce_full_pel_motion_estimation_16x16(ps_me_ctxt, L0);
1725
1726
        /* Scale the MV to qpel resolution */
1727
619k
        ps_me_ctxt->as_mb_part[L0].s_mv_curr.i2_mvx <<= 2;
1728
619k
        ps_me_ctxt->as_mb_part[L0].s_mv_curr.i2_mvy <<= 2;
1729
1730
619k
        if(ps_me_ctxt->u4_enable_hpel)
1731
538k
        {
1732
            /* moving src pointer to the converged motion vector location*/
1733
538k
            pu1_hpel_src =
1734
538k
                ps_me_ctxt->apu1_ref_buf_luma[L0] +
1735
538k
                (ps_me_ctxt->as_mb_part[L0].s_mv_curr.i2_mvx >> 2) +
1736
538k
                (ps_me_ctxt->as_mb_part[L0].s_mv_curr.i2_mvy >> 2) * ps_me_ctxt->ai4_rec_strd[L0];
1737
1738
538k
            ps_me_ctxt->apu1_subpel_buffs[0] = ps_proc->apu1_subpel_buffs[0];
1739
538k
            ps_me_ctxt->apu1_subpel_buffs[1] = ps_proc->apu1_subpel_buffs[1];
1740
538k
            ps_me_ctxt->apu1_subpel_buffs[2] = ps_proc->apu1_subpel_buffs[2];
1741
1742
538k
            ps_me_ctxt->u4_subpel_buf_strd = HP_BUFF_WD;
1743
1744
            /* half  pel search is done for both sides of full pel,
1745
             * hence half_x of width x height = 17x16 is created
1746
             * starting from left half_x of converged full pel */
1747
538k
            pu1_hpel_src -= 1;
1748
1749
            /* computing half_x */
1750
538k
            ps_codec->pf_ih264e_sixtapfilter_horz(pu1_hpel_src, ps_me_ctxt->apu1_subpel_buffs[0],
1751
538k
                                                  ps_me_ctxt->ai4_rec_strd[L0],
1752
538k
                                                  ps_me_ctxt->u4_subpel_buf_strd);
1753
1754
            /*
1755
             * Halfpel search is done for both sides of full pel,
1756
             * hence half_y of width x height = 16x17 is created
1757
             * starting from top half_y of converged full pel
1758
             * for half_xy top_left is required
1759
             * hence it starts from pu1_hpel_src = full_pel_converged_point -
1760
             * i4_rec_strd - 1
1761
             */
1762
538k
            pu1_hpel_src -= ps_me_ctxt->ai4_rec_strd[L0];
1763
1764
            /* computing half_y , and half_xy*/
1765
538k
            ps_codec->pf_ih264e_sixtap_filter_2dvh_vert(
1766
538k
                pu1_hpel_src, ps_me_ctxt->apu1_subpel_buffs[1], ps_me_ctxt->apu1_subpel_buffs[2],
1767
538k
                ps_me_ctxt->ai4_rec_strd[L0], ps_me_ctxt->u4_subpel_buf_strd,
1768
538k
                ps_proc->ai16_pred1 + 3, ps_me_ctxt->u4_subpel_buf_strd);
1769
1770
538k
            isvce_sub_pel_motion_estimation_16x16(ps_me_ctxt, L0);
1771
538k
        }
1772
619k
    }
1773
1774
    /***********************************************************************
1775
     * If a particular skiip Mv is giving better sad, copy to the corresponding
1776
     * MBPART
1777
     * In B slices this loop should go only to PREDL1: If we found min sad
1778
     * we will go to the skip ref list only
1779
     * Have to find a way to make it without too much change or new vars
1780
     **********************************************************************/
1781
883k
    if(s_skip_mbpart.i4_mb_cost < ps_me_ctxt->as_mb_part[L0].i4_mb_cost)
1782
378k
    {
1783
378k
        ps_me_ctxt->as_mb_part[L0].i4_mb_cost = s_skip_mbpart.i4_mb_cost;
1784
378k
        ps_me_ctxt->as_mb_part[L0].i4_mb_distortion = s_skip_mbpart.i4_mb_distortion;
1785
378k
        ps_me_ctxt->as_mb_part[L0].s_mv_curr = s_skip_mbpart.s_mv_curr;
1786
378k
    }
1787
504k
    else if(ps_me_ctxt->as_mb_part[L0].pu1_best_hpel_buf)
1788
220k
    {
1789
        /* Now we have to copy the buffers */
1790
220k
        ps_inter_pred_fxns->pf_inter_pred_luma_copy(
1791
220k
            ps_me_ctxt->as_mb_part[L0].pu1_best_hpel_buf, ps_proc->pu1_best_subpel_buf,
1792
220k
            ps_me_ctxt->u4_subpel_buf_strd, ps_proc->u4_bst_spel_buf_strd, MB_SIZE, MB_SIZE, NULL,
1793
220k
            0);
1794
220k
    }
1795
1796
    /**********************************************************************
1797
     * Now get the minimum of MB part sads by searching over all ref lists
1798
     **********************************************************************/
1799
883k
    ps_proc->ps_mb_info->as_pu->as_me_info[L0].s_mv.i2_mvx =
1800
883k
        ps_me_ctxt->as_mb_part[L0].s_mv_curr.i2_mvx;
1801
883k
    ps_proc->ps_mb_info->as_pu->as_me_info[L0].s_mv.i2_mvy =
1802
883k
        ps_me_ctxt->as_mb_part[L0].s_mv_curr.i2_mvy;
1803
883k
    ps_proc->ps_cur_mb->i4_mb_cost = ps_me_ctxt->as_mb_part[L0].i4_mb_cost;
1804
883k
    ps_proc->ps_cur_mb->i4_mb_distortion = ps_me_ctxt->as_mb_part[L0].i4_mb_distortion;
1805
883k
    ps_proc->ps_cur_mb->u4_mb_type = P16x16;
1806
883k
    ps_proc->ps_mb_info->as_pu->u1_pred_mode = L0;
1807
1808
    /* Mark the reflists */
1809
883k
    ps_proc->ps_mb_info->as_pu->as_me_info[0].i1_ref_idx = 0;
1810
883k
    ps_proc->ps_mb_info->as_pu->as_me_info[1].i1_ref_idx = -1;
1811
1812
    /* number of partitions */
1813
883k
    ps_proc->u4_num_sub_partitions = 1;
1814
883k
    *(ps_proc->pu4_mb_pu_cnt) = 1;
1815
1816
    /* position in-terms of PU */
1817
883k
    ps_proc->ps_mb_info->as_pu->u1_pos_x_in_4x4 = 0;
1818
883k
    ps_proc->ps_mb_info->as_pu->u1_pos_y_in_4x4 = 0;
1819
1820
    /* PU size */
1821
883k
    ps_proc->ps_mb_info->as_pu->u1_wd_in_4x4_m1 = 3;
1822
883k
    ps_proc->ps_mb_info->as_pu->u1_ht_in_4x4_m1 = 3;
1823
1824
    /* Update min sad conditions */
1825
883k
    if(ps_me_ctxt->u4_min_sad_reached == 1)
1826
266k
    {
1827
266k
        ps_proc->ps_cur_mb->u4_min_sad_reached = 1;
1828
266k
        ps_proc->ps_cur_mb->u4_min_sad = ps_me_ctxt->i4_min_sad;
1829
266k
    }
1830
883k
}
1831
1832
/**
1833
*******************************************************************************
1834
*
1835
* @brief This function performs motion estimation for the current NMB
1836
*
1837
* @par Description:
1838
* Intializes input and output pointers required by the function
1839
*isvce_compute_me and calls the function isvce_compute_me in a loop to process
1840
*NMBs.
1841
*
1842
* @param[in] ps_proc
1843
*  Process context corresponding to the job
1844
*
1845
* @returns
1846
*
1847
* @remarks none
1848
*
1849
*******************************************************************************
1850
*/
1851
void isvce_compute_me_nmb(isvce_process_ctxt_t *ps_proc, UWORD32 u4_nmb_count)
1852
884k
{
1853
884k
    UWORD32 u4_i;
1854
1855
884k
    isvce_codec_t *ps_codec = ps_proc->ps_codec;
1856
884k
    isvce_mb_info_t *ps_mb_begin = ps_proc->ps_mb_info;
1857
1858
884k
    UWORD32 *pu4_mb_pu_cnt_begin = ps_proc->pu4_mb_pu_cnt;
1859
884k
    UWORD8 *pu1_me_map = ps_proc->pu1_me_map + (ps_proc->i4_mb_y * ps_proc->i4_wd_mbs);
1860
1861
    /* Spatial dependencies for skip are not met if nmb > 1 */
1862
884k
    ASSERT(1 == u4_nmb_count);
1863
1864
884k
    if(ps_proc->i4_mb_x)
1865
772k
    {
1866
772k
        ps_proc->s_me_ctxt.u4_left_is_intra = ps_proc->s_nbr_info.ps_left_mb_info->u1_is_intra;
1867
772k
        ps_proc->s_me_ctxt.u4_left_is_skip =
1868
772k
            (ps_proc->s_nbr_info.ps_left_mb_info->u2_mb_type == PSKIP);
1869
772k
    }
1870
1871
1.76M
    for(u4_i = 0; u4_i < u4_nmb_count; u4_i++)
1872
884k
    {
1873
        /* Wait for ME map */
1874
884k
        if(ps_proc->i4_mb_y > 0)
1875
586k
        {
1876
            /* Wait for top right ME to be done */
1877
586k
            UWORD8 *pu1_me_map_tp_rw =
1878
586k
                ps_proc->pu1_me_map + (ps_proc->i4_mb_y - 1) * ps_proc->i4_wd_mbs;
1879
1880
586k
            while(1)
1881
587k
            {
1882
587k
                volatile UWORD8 *pu1_buf;
1883
587k
                WORD32 idx = ps_proc->i4_mb_x + u4_i + 1;
1884
1885
587k
                idx = MIN(idx, (ps_proc->i4_wd_mbs - 1));
1886
587k
                pu1_buf = pu1_me_map_tp_rw + idx;
1887
587k
                if(*pu1_buf) break;
1888
18.4E
                ithread_yield();
1889
18.4E
            }
1890
586k
        }
1891
1892
884k
        ps_proc->ps_skip_mv = &(ps_proc->ps_nmb_info[u4_i].as_skip_mv[0]);
1893
884k
        ps_proc->ps_ngbr_avbl = &(ps_proc->ps_nmb_info[u4_i].s_ngbr_avbl);
1894
884k
        ps_proc->ps_pred_mv = &(ps_proc->ps_nmb_info[u4_i].as_pred_mv[0]);
1895
884k
        ps_proc->ps_cur_mb = &(ps_proc->ps_nmb_info[u4_i]);
1896
1897
884k
        ps_proc->ps_cur_mb->u4_min_sad = ps_proc->u4_min_sad;
1898
884k
        ps_proc->ps_cur_mb->u4_min_sad_reached = 0;
1899
1900
884k
        ps_proc->ps_cur_mb->i4_mb_cost = INT_MAX;
1901
884k
        ps_proc->ps_cur_mb->i4_mb_distortion = SHRT_MAX;
1902
1903
        /* Set the best subpel buf to the correct mb so that the buffer can be
1904
         * copied */
1905
884k
        ps_proc->pu1_best_subpel_buf = ps_proc->ps_nmb_info[u4_i].pu1_best_sub_pel_buf;
1906
884k
        ps_proc->u4_bst_spel_buf_strd = ps_proc->ps_nmb_info[u4_i].u4_bst_spel_buf_strd;
1907
1908
        /* Set the min sad conditions */
1909
884k
        ps_proc->ps_cur_mb->u4_min_sad = ps_codec->u4_min_sad;
1910
884k
        ps_proc->ps_cur_mb->u4_min_sad_reached = 0;
1911
1912
884k
        isvce_derive_nghbr_avbl_of_mbs(ps_proc);
1913
1914
884k
        isvce_init_me(ps_proc);
1915
1916
        /* Compute ME according to slice type */
1917
884k
        ps_codec->apf_compute_me[ps_proc->i4_slice_type](ps_proc);
1918
1919
        /* update top and left structs */
1920
884k
        if(u4_nmb_count > 1)
1921
0
        {
1922
0
            isvce_mb_info_t *ps_left_syn = ps_proc->s_nbr_info.ps_left_mb_info;
1923
1924
0
            ps_left_syn[0] = ps_proc->ps_mb_info[0];
1925
0
            ps_left_syn[0].u1_is_intra = 0;
1926
0
            ps_left_syn[0].u2_mb_type = ps_proc->ps_cur_mb->u4_mb_type;
1927
0
        }
1928
1929
        /* Copy the min sad reached info */
1930
884k
        ps_proc->ps_nmb_info[u4_i].u4_min_sad_reached = ps_proc->ps_cur_mb->u4_min_sad_reached;
1931
884k
        ps_proc->ps_nmb_info[u4_i].u4_min_sad = ps_proc->ps_cur_mb->u4_min_sad;
1932
1933
        /*
1934
         * To make sure that the MV map is properly sync to the
1935
         * cache we need to do a DDB
1936
         */
1937
884k
        {
1938
884k
            DATA_SYNC();
1939
1940
884k
            pu1_me_map[ps_proc->i4_mb_x] = 1;
1941
884k
        }
1942
884k
        ps_proc->i4_mb_x++;
1943
1944
884k
        ps_proc->s_me_ctxt.u4_left_is_intra = 0;
1945
884k
        ps_proc->s_me_ctxt.u4_left_is_skip = (ps_proc->ps_cur_mb->u4_mb_type == PSKIP);
1946
1947
        /* update buffers pointers */
1948
884k
        ps_proc->s_src_buf_props.as_component_bufs[0].pv_data =
1949
884k
            ((UWORD8 *) ps_proc->s_src_buf_props.as_component_bufs[0].pv_data) + MB_SIZE;
1950
884k
        ps_proc->s_rec_buf_props.as_component_bufs[0].pv_data =
1951
884k
            ((UWORD8 *) ps_proc->s_rec_buf_props.as_component_bufs[0].pv_data) + MB_SIZE;
1952
884k
        ps_proc->as_ref_buf_props[0].as_component_bufs[0].pv_data =
1953
884k
            ((UWORD8 *) ps_proc->as_ref_buf_props[0].as_component_bufs[0].pv_data) + MB_SIZE;
1954
884k
        ps_proc->as_ref_buf_props[1].as_component_bufs[0].pv_data =
1955
884k
            ((UWORD8 *) ps_proc->as_ref_buf_props[1].as_component_bufs[0].pv_data) + MB_SIZE;
1956
1957
        /*
1958
         * Note: Although chroma mb size is 8, as the chroma buffers are
1959
         * interleaved, the stride per MB is MB_SIZE
1960
         */
1961
884k
        ps_proc->s_src_buf_props.as_component_bufs[1].pv_data =
1962
884k
            ((UWORD8 *) ps_proc->s_src_buf_props.as_component_bufs[1].pv_data) + MB_SIZE;
1963
884k
        ps_proc->s_rec_buf_props.as_component_bufs[1].pv_data =
1964
884k
            ((UWORD8 *) ps_proc->s_rec_buf_props.as_component_bufs[1].pv_data) + MB_SIZE;
1965
884k
        ps_proc->as_ref_buf_props[0].as_component_bufs[1].pv_data =
1966
884k
            ((UWORD8 *) ps_proc->as_ref_buf_props[0].as_component_bufs[1].pv_data) + MB_SIZE;
1967
884k
        ps_proc->as_ref_buf_props[1].as_component_bufs[1].pv_data =
1968
884k
            ((UWORD8 *) ps_proc->as_ref_buf_props[1].as_component_bufs[1].pv_data) + MB_SIZE;
1969
1970
884k
        ps_proc->pu4_mb_pu_cnt++;
1971
884k
        ps_proc->ps_mb_info++;
1972
884k
    }
1973
1974
884k
    ps_proc->ps_mb_info = ps_mb_begin;
1975
884k
    ps_proc->pu4_mb_pu_cnt = pu4_mb_pu_cnt_begin;
1976
884k
    ps_proc->i4_mb_x = ps_proc->i4_mb_x - u4_nmb_count;
1977
1978
    /* update buffers pointers */
1979
884k
    ps_proc->s_src_buf_props.as_component_bufs[0].pv_data =
1980
884k
        ((UWORD8 *) ps_proc->s_src_buf_props.as_component_bufs[0].pv_data) - MB_SIZE * u4_nmb_count;
1981
884k
    ps_proc->s_rec_buf_props.as_component_bufs[0].pv_data =
1982
884k
        ((UWORD8 *) ps_proc->s_rec_buf_props.as_component_bufs[0].pv_data) - MB_SIZE * u4_nmb_count;
1983
884k
    ps_proc->as_ref_buf_props[0].as_component_bufs[0].pv_data =
1984
884k
        ((UWORD8 *) ps_proc->as_ref_buf_props[0].as_component_bufs[0].pv_data) -
1985
884k
        MB_SIZE * u4_nmb_count;
1986
884k
    ps_proc->as_ref_buf_props[1].as_component_bufs[0].pv_data =
1987
884k
        ((UWORD8 *) ps_proc->as_ref_buf_props[1].as_component_bufs[0].pv_data) -
1988
884k
        MB_SIZE * u4_nmb_count;
1989
1990
    /*
1991
     * Note: Although chroma mb size is 8, as the chroma buffers are
1992
     * interleaved, the stride per MB is MB_SIZE
1993
     */
1994
884k
    ps_proc->s_src_buf_props.as_component_bufs[1].pv_data =
1995
884k
        ((UWORD8 *) ps_proc->s_src_buf_props.as_component_bufs[1].pv_data) - MB_SIZE * u4_nmb_count;
1996
884k
    ps_proc->s_rec_buf_props.as_component_bufs[1].pv_data =
1997
884k
        ((UWORD8 *) ps_proc->s_rec_buf_props.as_component_bufs[1].pv_data) - MB_SIZE * u4_nmb_count;
1998
884k
    ps_proc->as_ref_buf_props[0].as_component_bufs[1].pv_data =
1999
884k
        ((UWORD8 *) ps_proc->as_ref_buf_props[0].as_component_bufs[1].pv_data) -
2000
884k
        MB_SIZE * u4_nmb_count;
2001
884k
    ps_proc->as_ref_buf_props[1].as_component_bufs[1].pv_data =
2002
884k
        ((UWORD8 *) ps_proc->as_ref_buf_props[1].as_component_bufs[1].pv_data) -
2003
884k
        MB_SIZE * u4_nmb_count;
2004
884k
}
2005
2006
/**
2007
*******************************************************************************
2008
*
2009
* @brief The function computes parameters for a BSKIP MB
2010
*
2011
* @par Description:
2012
*  The function updates the skip motion vector for B Mb, check if the Mb can be
2013
*  marked as skip and returns it
2014
*
2015
* @param[in] ps_proc
2016
*  Pointer to process context
2017
*
2018
* @param[in] u4_for_me
2019
*  Dummy
2020
*
2021
* @param[in] i4_reflist
2022
*  Dummy
2023
*
2024
* @returns Flag indicating if the current Mb can be skip or not
2025
*
2026
* @remarks
2027
*   The code implements the logic as described in sec 8.4.1.2.2
2028
*   It also computes co-located MB parmas according to sec 8.4.1.2.1
2029
*
2030
*   Need to add condition for this fucntion to be used in ME
2031
*
2032
*******************************************************************************/
2033
WORD32 isvce_find_bskip_params_me(isvce_process_ctxt_t *ps_proc, WORD32 i4_reflist)
2034
0
{
2035
    /* Colzero for co-located MB */
2036
0
    WORD32 i4_colzeroflag;
2037
2038
    /* motion vectors for neighbouring MBs */
2039
0
    isvce_enc_pu_t *ps_a_pu, *ps_c_pu, *ps_b_pu;
2040
2041
    /* Variables to check if a particular mB is available */
2042
0
    WORD32 i4_a, i4_b, i4_c, i4_c_avail;
2043
2044
    /* Mode availability, init to no modes available     */
2045
0
    WORD32 i4_mode_avail;
2046
2047
    /*  mb neighbor availability */
2048
0
    block_neighbors_t *ps_ngbr_avbl = ps_proc->ps_ngbr_avbl;
2049
2050
    /* Temp var */
2051
0
    WORD32 i, i4_cmpl_mode, i4_skip_type = -1;
2052
2053
    /*
2054
     * Colocated motion vector
2055
     */
2056
0
    mv_t s_mvcol;
2057
2058
    /*
2059
     * Colocated picture idx
2060
     */
2061
0
    WORD32 i4_refidxcol;
2062
2063
0
    isvce_codec_t *ps_codec = ps_proc->ps_codec;
2064
2065
0
    UNUSED(i4_reflist);
2066
2067
    /**************************************************************************
2068
     *Find co-located MB parameters
2069
     *      See sec 8.4.1.2.1  for reference
2070
     **************************************************************************/
2071
0
    {
2072
        /*
2073
         * Find the co-located Mb and update the skip and pred appropriately
2074
         * 1) Default colpic is forward ref : Table 8-6
2075
         * 2) Default mb col is current MB : Table 8-8
2076
         */
2077
2078
0
        if(ps_proc->ps_col_mb->u1_is_intra)
2079
0
        {
2080
0
            s_mvcol.i2_mvx = 0;
2081
0
            s_mvcol.i2_mvy = 0;
2082
0
            i4_refidxcol = -1;
2083
0
        }
2084
0
        else
2085
0
        {
2086
0
            if(ps_proc->ps_col_mb->as_pu->u1_pred_mode != L1)
2087
0
            {
2088
0
                s_mvcol = ps_proc->ps_col_mb->as_pu->as_me_info[L0].s_mv;
2089
0
                i4_refidxcol = 0;
2090
0
            }
2091
0
            else
2092
0
            {
2093
0
                s_mvcol = ps_proc->ps_col_mb->as_pu->as_me_info[L1].s_mv;
2094
0
                i4_refidxcol = 0;
2095
0
            }
2096
0
        }
2097
2098
        /* RefPicList1[ 0 ]  is marked as  "used for short-term reference", as
2099
         * default */
2100
0
        i4_colzeroflag =
2101
0
            (!i4_refidxcol && (ABS(s_mvcol.i2_mvx) <= 1) && (ABS(s_mvcol.i2_mvy) <= 1));
2102
0
    }
2103
2104
    /***************************************************************************
2105
     * Evaluating skip params : Spatial Skip
2106
     **************************************************************************/
2107
0
    {
2108
        /* Get the neighbouring MBS according to Section 8.4.1.2.2 */
2109
0
        ps_a_pu = ps_proc->s_nbr_info.ps_left_mb_info->as_pu;
2110
0
        ps_b_pu = ps_proc->s_nbr_info.ps_top_row_mb_info[ps_proc->i4_mb_x].as_pu;
2111
2112
0
        i4_c_avail = 0;
2113
0
        if(ps_ngbr_avbl->u1_mb_c)
2114
0
        {
2115
0
            ps_c_pu = ps_proc->s_nbr_info.ps_top_row_mb_info[ps_proc->i4_mb_x + 1].as_pu;
2116
0
            i4_c_avail = 1;
2117
0
        }
2118
0
        else
2119
0
        {
2120
0
            ps_c_pu = ps_proc->s_nbr_info.ps_top_row_mb_info[ps_proc->i4_mb_x - 1].as_pu;
2121
0
            i4_c_avail = ps_ngbr_avbl->u1_mb_d;
2122
0
        }
2123
2124
0
        i4_a = ps_ngbr_avbl->u1_mb_a;
2125
0
        i4_b = ps_ngbr_avbl->u1_mb_b;
2126
0
        i4_c = i4_c_avail;
2127
2128
        /* Init to no mode avail */
2129
0
        i4_mode_avail = 0;
2130
0
        for(i = 0; i < 2; i++)
2131
0
        {
2132
0
            i4_cmpl_mode = (i == 0) ? L1 : L0;
2133
2134
0
            i4_mode_avail |= (i4_a && (ps_a_pu->u1_pred_mode != i4_cmpl_mode) &&
2135
0
                              (ps_a_pu->as_me_info[i].i1_ref_idx == 0))
2136
0
                             << i;
2137
0
            i4_mode_avail |= (i4_b && (ps_b_pu->u1_pred_mode != i4_cmpl_mode) &&
2138
0
                              (ps_b_pu->as_me_info[i].i1_ref_idx == 0))
2139
0
                             << i;
2140
0
            i4_mode_avail |= (i4_c && (ps_c_pu->u1_pred_mode != i4_cmpl_mode) &&
2141
0
                              (ps_c_pu->as_me_info[i].i1_ref_idx == 0))
2142
0
                             << i;
2143
0
        }
2144
2145
0
        if(i4_mode_avail == 0x3 || i4_mode_avail == 0x0)
2146
0
        {
2147
0
            i4_skip_type = BI;
2148
0
        }
2149
0
        else if(i4_mode_avail == 0x1)
2150
0
        {
2151
0
            i4_skip_type = L0;
2152
0
        }
2153
0
        else if(i4_mode_avail == 0x2)
2154
0
        {
2155
0
            i4_skip_type = L1;
2156
0
        }
2157
2158
        /* Update skip MV for L0 */
2159
0
        if((i4_mode_avail & 0x1) && (!i4_colzeroflag))
2160
0
        {
2161
0
            ps_proc->ps_skip_mv[0].s_mv.i2_mvx = ps_proc->ps_pred_mv[0].s_mv.i2_mvx;
2162
0
            ps_proc->ps_skip_mv[0].s_mv.i2_mvy = ps_proc->ps_pred_mv[0].s_mv.i2_mvy;
2163
0
        }
2164
0
        else
2165
0
        {
2166
0
            ps_proc->ps_skip_mv[0].s_mv.i2_mvx = 0;
2167
0
            ps_proc->ps_skip_mv[0].s_mv.i2_mvy = 0;
2168
0
        }
2169
2170
        /* Update skip MV for L1 */
2171
0
        if((i4_mode_avail & 0x2) && (!i4_colzeroflag))
2172
0
        {
2173
0
            ps_proc->ps_skip_mv[1].s_mv.i2_mvx = ps_proc->ps_pred_mv[1].s_mv.i2_mvx;
2174
0
            ps_proc->ps_skip_mv[1].s_mv.i2_mvy = ps_proc->ps_pred_mv[1].s_mv.i2_mvy;
2175
0
        }
2176
0
        else
2177
0
        {
2178
0
            ps_proc->ps_skip_mv[1].s_mv.i2_mvx = 0;
2179
0
            ps_proc->ps_skip_mv[1].s_mv.i2_mvy = 0;
2180
0
        }
2181
0
    }
2182
2183
    /***************************************************************************
2184
     * Evaluating skip params : Temporal skip
2185
     **************************************************************************/
2186
0
    {
2187
0
        svc_au_buf_t *ps_ref_pic[MAX_REF_PIC_CNT];
2188
0
        WORD32 i4_td, i4_tx, i4_tb, i4_dist_scale_factor;
2189
0
        isvce_enc_pu_mv_t *ps_skip_mv = &ps_proc->ps_skip_mv[2];
2190
2191
0
        ps_ref_pic[L0] = ps_proc->aps_ref_pic[L0];
2192
0
        ps_ref_pic[L1] = ps_proc->aps_ref_pic[L1];
2193
2194
0
        i4_tb = ps_codec->i4_poc - ps_ref_pic[L0]->i4_abs_poc;
2195
0
        i4_td = ps_ref_pic[L1]->i4_abs_poc - ps_ref_pic[L0]->i4_abs_poc;
2196
2197
0
        i4_tb = CLIP3(-128, 127, i4_tb);
2198
0
        i4_td = CLIP3(-128, 127, i4_td);
2199
2200
0
        i4_tx = (16384 + ABS(i4_td / 2)) / i4_td;
2201
0
        i4_dist_scale_factor = CLIP3(-1024, 1023, (i4_tb * i4_tx + 32) >> 6);
2202
2203
        /* Motion vectors taken in full pel resolution , hence  -> (& 0xfffc)
2204
         * operation */
2205
0
        ps_skip_mv[L0].s_mv.i2_mvx = ((i4_dist_scale_factor * s_mvcol.i2_mvx + 128) >> 8) & 0xfffc;
2206
0
        ps_skip_mv[L0].s_mv.i2_mvy = ((i4_dist_scale_factor * s_mvcol.i2_mvy + 128) >> 8) & 0xfffc;
2207
2208
0
        ps_skip_mv[L1].s_mv.i2_mvx = (ps_skip_mv[L0].s_mv.i2_mvx - s_mvcol.i2_mvx) & 0xfffc;
2209
0
        ps_skip_mv[L1].s_mv.i2_mvy = (ps_skip_mv[L0].s_mv.i2_mvy - s_mvcol.i2_mvy) & 0xfffc;
2210
0
    }
2211
2212
0
    return i4_skip_type;
2213
0
}
2214
2215
/**
2216
*******************************************************************************
2217
*
2218
* @brief The function computes the skip motion vectoe for B mb
2219
*
2220
* @par Description:
2221
*  The function gives the skip motion vector for B Mb, check if the Mb can be
2222
*  marked as skip
2223
*
2224
* @param[in] ps_proc
2225
*  Pointer to process context
2226
*
2227
* @param[in] u4_for_me
2228
*  Dummy
2229
*
2230
* @param[in] u4_for_me
2231
*  Dummy
2232
*
2233
* @returns Flag indicating if the current Mb can be skip or not
2234
*
2235
* @remarks The code implements the logic as described in sec 8.4.1.2.2 in H264
2236
*   specification. It also computes co-located MB parmas according to
2237
*sec 8.4.1.2.1
2238
*
2239
*******************************************************************************/
2240
WORD32 isvce_find_bskip_params(isvce_process_ctxt_t *ps_proc, WORD32 i4_reflist)
2241
0
{
2242
0
    WORD32 i4_colzeroflag;
2243
2244
    /* motion vectors */
2245
0
    isvce_enc_pu_t *ps_a_pu, *ps_c_pu, *ps_b_pu;
2246
2247
    /* Syntax elem */
2248
0
    isvce_mb_info_t *ps_a_syn, *ps_b_syn, *ps_c_syn;
2249
2250
    /* Variables to check if a particular mB is available */
2251
0
    WORD32 i4_a, i4_b, i4_c, i4_c_avail;
2252
2253
    /* Mode availability, init to no modes available     */
2254
0
    WORD32 i4_mode_avail;
2255
2256
    /*  mb neighbor availability */
2257
0
    block_neighbors_t *ps_ngbr_avbl = ps_proc->ps_ngbr_avbl;
2258
2259
    /* Temp var */
2260
0
    WORD32 i, i4_cmpl_mode;
2261
2262
0
    UNUSED(i4_reflist);
2263
2264
    /**************************************************************************
2265
     *Find co-locates parameters
2266
     *      See sec 8.4.1.2.1  for reference
2267
     **************************************************************************/
2268
0
    {
2269
        /*
2270
         * Find the co-located Mb and update the skip and pred appropriately
2271
         * 1) Default colpic is forward ref : Table 8-6
2272
         * 2) Default mb col is current MB : Table 8-8
2273
         */
2274
2275
0
        mv_t s_mvcol;
2276
0
        WORD32 i4_refidxcol;
2277
2278
0
        if(ps_proc->ps_col_mb->u1_is_intra)
2279
0
        {
2280
0
            s_mvcol.i2_mvx = 0;
2281
0
            s_mvcol.i2_mvy = 0;
2282
0
            i4_refidxcol = -1;
2283
0
        }
2284
0
        else
2285
0
        {
2286
0
            if(ps_proc->ps_col_mb->as_pu->u1_pred_mode != L1)
2287
0
            {
2288
0
                s_mvcol = ps_proc->ps_col_mb->as_pu->as_me_info[L0].s_mv;
2289
0
                i4_refidxcol = 0;
2290
0
            }
2291
0
            else
2292
0
            {
2293
0
                s_mvcol = ps_proc->ps_col_mb->as_pu->as_me_info[L1].s_mv;
2294
0
                i4_refidxcol = 0;
2295
0
            }
2296
0
        }
2297
2298
        /* RefPicList1[ 0 ]  is marked as  "used for short-term reference", as
2299
         * default */
2300
0
        i4_colzeroflag =
2301
0
            (!i4_refidxcol && (ABS(s_mvcol.i2_mvx) <= 1) && (ABS(s_mvcol.i2_mvy) <= 1));
2302
0
    }
2303
2304
    /***************************************************************************
2305
     * Evaluating skip params
2306
     **************************************************************************/
2307
    /* Section 8.4.1.2.2 */
2308
0
    ps_a_syn = ps_proc->s_nbr_info.ps_left_mb_info;
2309
0
    ps_a_pu = ps_proc->s_nbr_info.ps_left_mb_info->as_pu;
2310
2311
0
    ps_b_syn = ps_proc->s_nbr_info.ps_top_row_mb_info + ps_proc->i4_mb_x;
2312
0
    ps_b_pu = ps_b_syn->as_pu;
2313
2314
0
    i4_c_avail = 0;
2315
0
    if(ps_ngbr_avbl->u1_mb_c)
2316
0
    {
2317
0
        ps_c_syn = ps_b_syn + 1;
2318
0
        ps_c_pu = ps_c_syn->as_pu;
2319
0
        i4_c_avail = 1;
2320
0
    }
2321
0
    else
2322
0
    {
2323
0
        ps_c_syn = ps_b_syn - 1;
2324
0
        ps_c_pu = ps_c_syn->as_pu;
2325
0
        i4_c_avail = ps_ngbr_avbl->u1_mb_d;
2326
0
    }
2327
2328
0
    i4_a = ps_ngbr_avbl->u1_mb_a;
2329
0
    i4_a &= !ps_a_syn->u1_is_intra;
2330
2331
0
    i4_b = ps_ngbr_avbl->u1_mb_b;
2332
0
    i4_b &= !ps_b_syn->u1_is_intra;
2333
2334
0
    i4_c = i4_c_avail;
2335
0
    i4_c &= !ps_c_syn->u1_is_intra;
2336
2337
    /* Init to no mode avail */
2338
0
    i4_mode_avail = 0;
2339
0
    for(i = 0; i < 2; i++)
2340
0
    {
2341
0
        i4_cmpl_mode = (i == 0) ? L1 : L0;
2342
2343
0
        i4_mode_avail |= (i4_a && (ps_a_pu->u1_pred_mode != i4_cmpl_mode) &&
2344
0
                          (ps_a_pu->as_me_info[i].i1_ref_idx == 0))
2345
0
                         << i;
2346
0
        i4_mode_avail |= (i4_b && (ps_b_pu->u1_pred_mode != i4_cmpl_mode) &&
2347
0
                          (ps_b_pu->as_me_info[i].i1_ref_idx == 0))
2348
0
                         << i;
2349
0
        i4_mode_avail |= (i4_c && (ps_c_pu->u1_pred_mode != i4_cmpl_mode) &&
2350
0
                          (ps_c_pu->as_me_info[i].i1_ref_idx == 0))
2351
0
                         << i;
2352
0
    }
2353
2354
    /* Update skip MV for L0 */
2355
0
    if((i4_mode_avail & 0x1) && (!i4_colzeroflag))
2356
0
    {
2357
0
        ps_proc->ps_skip_mv[0].s_mv.i2_mvx = ps_proc->ps_pred_mv[0].s_mv.i2_mvx;
2358
0
        ps_proc->ps_skip_mv[0].s_mv.i2_mvy = ps_proc->ps_pred_mv[0].s_mv.i2_mvy;
2359
0
    }
2360
0
    else
2361
0
    {
2362
0
        ps_proc->ps_skip_mv[0].s_mv.i2_mvx = 0;
2363
0
        ps_proc->ps_skip_mv[0].s_mv.i2_mvy = 0;
2364
0
    }
2365
2366
    /* Update skip MV for L1 */
2367
0
    if((i4_mode_avail & 0x2) && (!i4_colzeroflag))
2368
0
    {
2369
0
        ps_proc->ps_skip_mv[1].s_mv.i2_mvx = ps_proc->ps_pred_mv[1].s_mv.i2_mvx;
2370
0
        ps_proc->ps_skip_mv[1].s_mv.i2_mvy = ps_proc->ps_pred_mv[1].s_mv.i2_mvy;
2371
0
    }
2372
0
    else
2373
0
    {
2374
0
        ps_proc->ps_skip_mv[1].s_mv.i2_mvx = 0;
2375
0
        ps_proc->ps_skip_mv[1].s_mv.i2_mvy = 0;
2376
0
    }
2377
2378
    /* Now see if the ME information matches the SKIP information */
2379
0
    switch(ps_proc->ps_mb_info->as_pu->u1_pred_mode)
2380
0
    {
2381
0
        case PRED_BI:
2382
0
            if((ps_proc->ps_mb_info->as_pu->as_me_info[0].s_mv.i2_mvx ==
2383
0
                ps_proc->ps_skip_mv[0].s_mv.i2_mvx) &&
2384
0
               (ps_proc->ps_mb_info->as_pu->as_me_info[0].s_mv.i2_mvy ==
2385
0
                ps_proc->ps_skip_mv[0].s_mv.i2_mvy) &&
2386
0
               (ps_proc->ps_mb_info->as_pu->as_me_info[1].s_mv.i2_mvx ==
2387
0
                ps_proc->ps_skip_mv[1].s_mv.i2_mvx) &&
2388
0
               (ps_proc->ps_mb_info->as_pu->as_me_info[1].s_mv.i2_mvy ==
2389
0
                ps_proc->ps_skip_mv[1].s_mv.i2_mvy) &&
2390
0
               (i4_mode_avail == 0x3 || i4_mode_avail == 0x0))
2391
0
            {
2392
0
                return 1;
2393
0
            }
2394
0
            break;
2395
2396
0
        case PRED_L0:
2397
0
            if((ps_proc->ps_mb_info->as_pu->as_me_info[0].s_mv.i2_mvx ==
2398
0
                ps_proc->ps_skip_mv[0].s_mv.i2_mvx) &&
2399
0
               (ps_proc->ps_mb_info->as_pu->as_me_info[0].s_mv.i2_mvy ==
2400
0
                ps_proc->ps_skip_mv[0].s_mv.i2_mvy) &&
2401
0
               (i4_mode_avail == 0x1))
2402
0
            {
2403
0
                return 1;
2404
0
            }
2405
0
            break;
2406
2407
0
        case PRED_L1:
2408
0
            if((ps_proc->ps_mb_info->as_pu->as_me_info[1].s_mv.i2_mvx ==
2409
0
                ps_proc->ps_skip_mv[1].s_mv.i2_mvx) &&
2410
0
               (ps_proc->ps_mb_info->as_pu->as_me_info[1].s_mv.i2_mvy ==
2411
0
                ps_proc->ps_skip_mv[1].s_mv.i2_mvy) &&
2412
0
               (i4_mode_avail == 0x2))
2413
0
            {
2414
0
                return 1;
2415
0
            }
2416
0
            break;
2417
0
    }
2418
2419
0
    return 0;
2420
0
}
2421
2422
/**
2423
*******************************************************************************
2424
*
2425
* @brief This function computes the best motion vector among the tentative mv
2426
* candidates chosen.
2427
*
2428
* @par Description:
2429
*  This function determines the position in the search window at which the
2430
*motion estimation should begin in order to minimise the number of search
2431
*iterations.
2432
*
2433
* @param[in] ps_mb_part
2434
*  pointer to current mb partition ctxt with respect to ME
2435
*
2436
* @param[in] u4_lambda_motion
2437
*  lambda motion
2438
*
2439
* @param[in] u4_fast_flag
2440
*  enable/disable fast sad computation
2441
*
2442
* @returns  mv pair & corresponding distortion and cost
2443
*
2444
* @remarks Currently onyl 4 search candiates are supported
2445
*
2446
*******************************************************************************
2447
*/
2448
void isvce_evaluate_bipred(isvce_me_ctxt_t *ps_me_ctxt, isvce_process_ctxt_t *ps_proc,
2449
                           mb_part_ctxt *ps_mb_ctxt_bi)
2450
0
{
2451
0
    UWORD32 i, u4_fast_sad;
2452
2453
0
    WORD32 i4_dest_buff;
2454
2455
0
    mv_t *ps_l0_pred_mv, *ps_l1_pred_mv, s_l0_mv, s_l1_mv;
2456
2457
0
    UWORD8 *pu1_ref_mb_l0, *pu1_ref_mb_l1;
2458
2459
0
    UWORD8 *pu1_dst_buf;
2460
2461
0
    WORD32 i4_ref_l0_stride, i4_ref_l1_stride;
2462
2463
0
    WORD32 i4_mb_distortion, i4_mb_cost;
2464
2465
0
    isvce_codec_t *ps_codec = ps_proc->ps_codec;
2466
0
    isa_dependent_fxns_t *ps_isa_dependent_fxns = &ps_codec->s_isa_dependent_fxns;
2467
0
    inter_pred_fxns_t *ps_inter_pred_fxns = &ps_isa_dependent_fxns->s_inter_pred_fxns;
2468
2469
0
    u4_fast_sad = ps_me_ctxt->u4_enable_fast_sad;
2470
2471
0
    i4_dest_buff = 0;
2472
0
    for(i = 0; i < ps_me_ctxt->u4_num_candidates[BI]; i += 2)
2473
0
    {
2474
0
        pu1_dst_buf = ps_me_ctxt->apu1_subpel_buffs[i4_dest_buff];
2475
2476
0
        s_l0_mv.i2_mvx = ps_me_ctxt->as_mv_init_search[BI][i].i2_mvx >> 2;
2477
0
        s_l0_mv.i2_mvy = ps_me_ctxt->as_mv_init_search[BI][i].i2_mvy >> 2;
2478
0
        s_l1_mv.i2_mvx = ps_me_ctxt->as_mv_init_search[BI][i + 1].i2_mvx >> 2;
2479
0
        s_l1_mv.i2_mvy = ps_me_ctxt->as_mv_init_search[BI][i + 1].i2_mvy >> 2;
2480
2481
0
        ps_l0_pred_mv = &ps_proc->ps_pred_mv[L0].s_mv;
2482
0
        ps_l1_pred_mv = &ps_proc->ps_pred_mv[L1].s_mv;
2483
2484
0
        if((ps_me_ctxt->as_mv_init_search[BI][i].i2_mvx & 0x3) ||
2485
0
           (ps_me_ctxt->as_mv_init_search[BI][i].i2_mvy & 0x3))
2486
0
        {
2487
0
            pu1_ref_mb_l0 = ps_me_ctxt->as_mb_part[L0].pu1_best_hpel_buf;
2488
0
            i4_ref_l0_stride = ps_me_ctxt->u4_subpel_buf_strd;
2489
0
        }
2490
0
        else
2491
0
        {
2492
0
            pu1_ref_mb_l0 = ps_me_ctxt->apu1_ref_buf_luma[L0] + (s_l0_mv.i2_mvx) +
2493
0
                            ((s_l0_mv.i2_mvy) * ps_me_ctxt->ai4_rec_strd[L0]);
2494
0
            i4_ref_l0_stride = ps_me_ctxt->ai4_rec_strd[L0];
2495
0
        }
2496
2497
0
        if((ps_me_ctxt->as_mv_init_search[BI][i + 1].i2_mvx & 0x3) ||
2498
0
           (ps_me_ctxt->as_mv_init_search[BI][i + 1].i2_mvy & 0x3))
2499
0
        {
2500
0
            pu1_ref_mb_l1 = ps_me_ctxt->as_mb_part[L1].pu1_best_hpel_buf;
2501
0
            i4_ref_l1_stride = ps_me_ctxt->u4_subpel_buf_strd;
2502
0
        }
2503
0
        else
2504
0
        {
2505
0
            pu1_ref_mb_l1 = ps_me_ctxt->apu1_ref_buf_luma[L1] + (s_l1_mv.i2_mvx) +
2506
0
                            ((s_l1_mv.i2_mvy) * ps_me_ctxt->ai4_rec_strd[L1]);
2507
0
            i4_ref_l1_stride = ps_me_ctxt->ai4_rec_strd[L1];
2508
0
        }
2509
2510
0
        ps_inter_pred_fxns->pf_inter_pred_luma_bilinear(
2511
0
            pu1_ref_mb_l0, pu1_ref_mb_l1, pu1_dst_buf, i4_ref_l0_stride, i4_ref_l1_stride,
2512
0
            ps_me_ctxt->u4_subpel_buf_strd, MB_SIZE, MB_SIZE);
2513
2514
0
        ps_me_ctxt->pf_ime_compute_sad_16x16[u4_fast_sad](
2515
0
            ps_me_ctxt->pu1_src_buf_luma, pu1_dst_buf, ps_me_ctxt->i4_src_strd,
2516
0
            ps_me_ctxt->u4_subpel_buf_strd, INT_MAX, &i4_mb_distortion);
2517
2518
        /* compute cost */
2519
0
        i4_mb_cost =
2520
0
            ps_me_ctxt
2521
0
                ->pu1_mv_bits[ps_me_ctxt->as_mv_init_search[BI][i].i2_mvx - ps_l0_pred_mv->i2_mvx];
2522
0
        i4_mb_cost +=
2523
0
            ps_me_ctxt
2524
0
                ->pu1_mv_bits[ps_me_ctxt->as_mv_init_search[BI][i].i2_mvy - ps_l0_pred_mv->i2_mvy];
2525
0
        i4_mb_cost += ps_me_ctxt->pu1_mv_bits[ps_me_ctxt->as_mv_init_search[BI][i + 1].i2_mvx -
2526
0
                                              ps_l1_pred_mv->i2_mvx];
2527
0
        i4_mb_cost += ps_me_ctxt->pu1_mv_bits[ps_me_ctxt->as_mv_init_search[BI][i + 1].i2_mvy -
2528
0
                                              ps_l1_pred_mv->i2_mvy];
2529
2530
0
        i4_mb_cost -=
2531
0
            (ps_me_ctxt->i4_skip_bias[BSLICE]) * (ps_me_ctxt->i4_skip_type == BI) * (i == 0);
2532
2533
0
        i4_mb_cost *= ps_me_ctxt->u4_lambda_motion;
2534
0
        i4_mb_cost += i4_mb_distortion;
2535
2536
0
        if(i4_mb_cost < ps_mb_ctxt_bi->i4_mb_cost)
2537
0
        {
2538
0
            ps_mb_ctxt_bi->i4_srch_pos_idx = (i >> 1);
2539
0
            ps_mb_ctxt_bi->i4_mb_cost = i4_mb_cost;
2540
0
            ps_mb_ctxt_bi->i4_mb_distortion = i4_mb_distortion;
2541
0
            ps_mb_ctxt_bi->pu1_best_hpel_buf = pu1_dst_buf;
2542
0
            i4_dest_buff = (i4_dest_buff + 1) % 2;
2543
0
        }
2544
0
    }
2545
0
}
2546
2547
/**
2548
*******************************************************************************
2549
*
2550
* @brief This function performs motion estimation for the current mb
2551
*
2552
* @par Description:
2553
*  The current mb is compared with a list of mb's in the reference frame for
2554
*  least cost. The mb that offers least cost is chosen as predicted mb and the
2555
*  displacement of the predicted mb from index location of the current mb is
2556
*  signaled as mv. The list of the mb's that are chosen in the reference frame
2557
*  are dependent on the speed of the ME configured.
2558
*
2559
* @param[in] ps_proc
2560
*  Process context corresponding to the job
2561
*
2562
* @returns  motion vector of the pred mb, sad, cost.
2563
*
2564
* @remarks none
2565
*
2566
*******************************************************************************
2567
*/
2568
void isvce_compute_me_multi_reflist(isvce_process_ctxt_t *ps_proc)
2569
0
{
2570
    /* me ctxt */
2571
0
    isvce_me_ctxt_t *ps_me_ctxt = &ps_proc->s_me_ctxt;
2572
2573
    /* codec context */
2574
0
    isvce_codec_t *ps_codec = ps_proc->ps_codec;
2575
0
    isa_dependent_fxns_t *ps_isa_dependent_fxns = &ps_codec->s_isa_dependent_fxns;
2576
0
    inter_pred_fxns_t *ps_inter_pred_fxns = &ps_isa_dependent_fxns->s_inter_pred_fxns;
2577
2578
    /* Temp variables for looping over ref lists */
2579
0
    WORD32 i4_reflist, i4_max_reflist;
2580
2581
    /* source buffer for halp pel generation functions */
2582
0
    UWORD8 *pu1_hpel_src;
2583
2584
    /* quantization parameters */
2585
0
    quant_params_t *ps_qp_params = ps_proc->ps_qp_params[0];
2586
2587
    /* Mb part ctxts for SKIP */
2588
0
    mb_part_ctxt as_skip_mbpart[2];
2589
2590
0
    ASSERT(1 == MAX_REF_FRAMES_PER_PRED_DIR);
2591
2592
    /* Sad therholds */
2593
0
    ps_me_ctxt->pu2_sad_thrsh = ps_qp_params->pu2_sad_thrsh;
2594
2595
0
    {
2596
0
        WORD32 rows_above, rows_below, columns_left, columns_right;
2597
2598
        /* During evaluation for motion vectors do not search through padded regions
2599
         */
2600
        /* Obtain number of rows and columns that are effective for computing for me
2601
         * evaluation */
2602
0
        rows_above = MB_SIZE + ps_proc->i4_mb_y * MB_SIZE;
2603
0
        rows_below = (ps_proc->i4_ht_mbs - ps_proc->i4_mb_y) * MB_SIZE;
2604
0
        columns_left = MB_SIZE + ps_proc->i4_mb_x * MB_SIZE;
2605
0
        columns_right = (ps_proc->i4_wd_mbs - ps_proc->i4_mb_x) * MB_SIZE;
2606
2607
        /* init srch range */
2608
        /* NOTE : For now, lets limit the search range by DEFAULT_MAX_SRCH_RANGE_X /
2609
         * 2 on all sides.
2610
         */
2611
0
        ps_me_ctxt->i4_srch_range_w = -MIN(columns_left, DEFAULT_MAX_SRCH_RANGE_X >> 1);
2612
0
        ps_me_ctxt->i4_srch_range_e = MIN(columns_right, DEFAULT_MAX_SRCH_RANGE_X >> 1);
2613
0
        ps_me_ctxt->i4_srch_range_n = -MIN(rows_above, DEFAULT_MAX_SRCH_RANGE_Y >> 1);
2614
0
        ps_me_ctxt->i4_srch_range_s = MIN(rows_below, DEFAULT_MAX_SRCH_RANGE_Y >> 1);
2615
2616
        /* this is to facilitate fast sub pel computation with minimal loads */
2617
0
        if(ps_me_ctxt->u4_enable_hpel)
2618
0
        {
2619
0
            ps_me_ctxt->i4_srch_range_w += 1;
2620
0
            ps_me_ctxt->i4_srch_range_e -= 1;
2621
0
            ps_me_ctxt->i4_srch_range_n += 1;
2622
0
            ps_me_ctxt->i4_srch_range_s -= 1;
2623
0
        }
2624
0
    }
2625
2626
    /* Compute ME and store the MVs */
2627
0
    {
2628
        /***********************************************************************
2629
         * Compute ME for lists L0 and L1
2630
         *  For L0 -> L0 skip + L0
2631
         *  for L1 -> L0 skip + L0 + L1 skip + L1
2632
         ***********************************************************************/
2633
0
        i4_max_reflist = (ps_proc->i4_slice_type == PSLICE) ? L0 : L1;
2634
2635
        /* Init SATQD for the current list */
2636
0
        ps_me_ctxt->u4_min_sad_reached = 0;
2637
0
        ps_me_ctxt->i4_min_sad = ps_proc->ps_cur_mb->u4_min_sad;
2638
2639
0
        for(i4_reflist = L0; i4_reflist <= i4_max_reflist; i4_reflist++)
2640
0
        {
2641
            /* Get the seed motion vector candidates                    */
2642
0
            isvce_get_search_candidates(ps_proc, ps_me_ctxt, i4_reflist);
2643
2644
            /* ****************************************************************
2645
             *Evaluate the SKIP for current list
2646
             * ****************************************************************/
2647
0
            as_skip_mbpart[i4_reflist].s_mv_curr.i2_mvx = 0;
2648
0
            as_skip_mbpart[i4_reflist].s_mv_curr.i2_mvy = 0;
2649
0
            as_skip_mbpart[i4_reflist].i4_mb_cost = INT_MAX;
2650
0
            as_skip_mbpart[i4_reflist].i4_mb_distortion = INT_MAX;
2651
2652
0
            if(ps_me_ctxt->i4_skip_type == i4_reflist)
2653
0
            {
2654
0
                isvce_compute_skip_cost(
2655
0
                    ps_me_ctxt, (ime_mv_t *) (&ps_proc->ps_skip_mv[i4_reflist].s_mv),
2656
0
                    &as_skip_mbpart[i4_reflist], ps_codec->s_cfg.u4_enable_satqd, i4_reflist,
2657
0
                    (ps_proc->i4_slice_type == BSLICE));
2658
0
            }
2659
2660
0
            as_skip_mbpart[i4_reflist].s_mv_curr.i2_mvx <<= 2;
2661
0
            as_skip_mbpart[i4_reflist].s_mv_curr.i2_mvy <<= 2;
2662
2663
            /******************************************************************
2664
             * Evaluate ME For current list
2665
             *****************************************************************/
2666
0
            ps_me_ctxt->as_mb_part[i4_reflist].s_mv_curr.i2_mvx = 0;
2667
0
            ps_me_ctxt->as_mb_part[i4_reflist].s_mv_curr.i2_mvy = 0;
2668
0
            ps_me_ctxt->as_mb_part[i4_reflist].i4_mb_cost = INT_MAX;
2669
0
            ps_me_ctxt->as_mb_part[i4_reflist].i4_mb_distortion = INT_MAX;
2670
2671
            /* Init Hpel */
2672
0
            ps_me_ctxt->as_mb_part[i4_reflist].pu1_best_hpel_buf = NULL;
2673
2674
            /* In case we found out the minimum SAD, exit the ME eval */
2675
0
            if(ps_me_ctxt->u4_min_sad_reached)
2676
0
            {
2677
0
                i4_max_reflist = i4_reflist;
2678
0
                break;
2679
0
            }
2680
2681
            /* Evaluate search candidates for initial mv pt */
2682
0
            isvce_evaluate_init_srchposn_16x16(ps_me_ctxt, i4_reflist);
2683
2684
            /********************************************************************/
2685
            /*                  full pel motion estimation                      */
2686
            /********************************************************************/
2687
0
            isvce_full_pel_motion_estimation_16x16(ps_me_ctxt, i4_reflist);
2688
2689
0
            DEBUG_MV_HISTOGRAM_ADD((ps_me_ctxt->s_mb_part.s_mv_curr.i2_mvx >> 2),
2690
0
                                   (ps_me_ctxt->s_mb_part.s_mv_curr.i2_mvy >> 2));
2691
2692
0
            DEBUG_SAD_HISTOGRAM_ADD(ps_me_ctxt->s_mb_part.i4_mb_distortion, 1);
2693
2694
            /* Scale the MV to qpel resolution */
2695
0
            ps_me_ctxt->as_mb_part[i4_reflist].s_mv_curr.i2_mvx <<= 2;
2696
0
            ps_me_ctxt->as_mb_part[i4_reflist].s_mv_curr.i2_mvy <<= 2;
2697
2698
0
            if(ps_me_ctxt->u4_enable_hpel)
2699
0
            {
2700
                /* moving src pointer to the converged motion vector location */
2701
0
                pu1_hpel_src = ps_me_ctxt->apu1_ref_buf_luma[i4_reflist] +
2702
0
                               (ps_me_ctxt->as_mb_part[i4_reflist].s_mv_curr.i2_mvx >> 2) +
2703
0
                               ((ps_me_ctxt->as_mb_part[i4_reflist].s_mv_curr.i2_mvy >> 2) *
2704
0
                                ps_me_ctxt->ai4_rec_strd[i4_reflist]);
2705
2706
0
                ps_me_ctxt->apu1_subpel_buffs[0] = ps_proc->apu1_subpel_buffs[0];
2707
0
                ps_me_ctxt->apu1_subpel_buffs[1] = ps_proc->apu1_subpel_buffs[1];
2708
0
                ps_me_ctxt->apu1_subpel_buffs[2] = ps_proc->apu1_subpel_buffs[2];
2709
2710
                /* Init the search position to an invalid number */
2711
0
                ps_me_ctxt->as_mb_part[i4_reflist].i4_srch_pos_idx = 3;
2712
2713
                /* Incase a buffer is still in use by L0, replace it with spare buff */
2714
0
                ps_me_ctxt->apu1_subpel_buffs[ps_me_ctxt->as_mb_part[L0].i4_srch_pos_idx] =
2715
0
                    ps_proc->apu1_subpel_buffs[3];
2716
2717
0
                ps_me_ctxt->u4_subpel_buf_strd = HP_BUFF_WD;
2718
2719
                /* half  pel search is done for both sides of full pel,
2720
                 * hence half_x of width x height = 17x16 is created
2721
                 * starting from left half_x of converged full pel */
2722
0
                pu1_hpel_src -= 1;
2723
2724
                /* computing half_x */
2725
0
                ps_codec->pf_ih264e_sixtapfilter_horz(
2726
0
                    pu1_hpel_src, ps_me_ctxt->apu1_subpel_buffs[0],
2727
0
                    ps_me_ctxt->ai4_rec_strd[i4_reflist], ps_me_ctxt->u4_subpel_buf_strd);
2728
2729
                /*
2730
                 * Halfpel search is done for both sides of full pel,
2731
                 * hence half_y of width x height = 16x17 is created
2732
                 * starting from top half_y of converged full pel
2733
                 * for half_xy top_left is required
2734
                 * hence it starts from pu1_hpel_src = full_pel_converged_point -
2735
                 * i4_rec_strd - 1
2736
                 */
2737
0
                pu1_hpel_src -= ps_me_ctxt->ai4_rec_strd[i4_reflist];
2738
2739
                /* computing half_y and half_xy */
2740
0
                ps_codec->pf_ih264e_sixtap_filter_2dvh_vert(
2741
0
                    pu1_hpel_src, ps_me_ctxt->apu1_subpel_buffs[1],
2742
0
                    ps_me_ctxt->apu1_subpel_buffs[2], ps_me_ctxt->ai4_rec_strd[i4_reflist],
2743
0
                    ps_me_ctxt->u4_subpel_buf_strd, ps_proc->ai16_pred1 + 3,
2744
0
                    ps_me_ctxt->u4_subpel_buf_strd);
2745
2746
0
                isvce_sub_pel_motion_estimation_16x16(ps_me_ctxt, i4_reflist);
2747
0
            }
2748
0
        }
2749
2750
        /***********************************************************************
2751
         * If a particular skiip Mv is giving better sad, copy to the corresponding
2752
         * MBPART
2753
         * In B slices this loop should go only to PREDL1: If we found min sad
2754
         * we will go to the skip ref list only
2755
         * Have to find a way to make it without too much change or new vars
2756
         **********************************************************************/
2757
0
        for(i4_reflist = 0; i4_reflist <= i4_max_reflist; i4_reflist++)
2758
0
        {
2759
0
            if(as_skip_mbpart[i4_reflist].i4_mb_cost <
2760
0
               ps_me_ctxt->as_mb_part[i4_reflist].i4_mb_cost)
2761
0
            {
2762
0
                ps_me_ctxt->as_mb_part[i4_reflist].i4_mb_cost =
2763
0
                    as_skip_mbpart[i4_reflist].i4_mb_cost;
2764
0
                ps_me_ctxt->as_mb_part[i4_reflist].i4_mb_distortion =
2765
0
                    as_skip_mbpart[i4_reflist].i4_mb_distortion;
2766
0
                ps_me_ctxt->as_mb_part[i4_reflist].s_mv_curr = as_skip_mbpart[i4_reflist].s_mv_curr;
2767
0
            }
2768
0
        }
2769
2770
        /***********************************************************************
2771
         * Compute ME for BI
2772
         *  In case of BI we do ME for two candidates
2773
         *   1) The best L0 and L1 Mvs
2774
         *   2) Skip L0 and L1 MVs
2775
         *
2776
         *   TODO
2777
         *   one of the search candidates is skip. Hence it may be duplicated
2778
         ***********************************************************************/
2779
0
        if(i4_max_reflist == L1 && ps_me_ctxt->u4_min_sad_reached == 0)
2780
0
        {
2781
0
            WORD32 i, j = 0;
2782
0
            WORD32 l0_srch_pos_idx, l1_srch_pos_idx;
2783
0
            WORD32 i4_l0_skip_mv_idx, i4_l1_skip_mv_idx;
2784
2785
            /* Get the free buffers */
2786
0
            l0_srch_pos_idx = ps_me_ctxt->as_mb_part[L0].i4_srch_pos_idx;
2787
0
            l1_srch_pos_idx = ps_me_ctxt->as_mb_part[L1].i4_srch_pos_idx;
2788
2789
            /* Search for the two free buffers in subpel list */
2790
0
            for(i = 0; i < SUBPEL_BUFF_CNT; i++)
2791
0
            {
2792
0
                if(i != l0_srch_pos_idx && i != l1_srch_pos_idx)
2793
0
                {
2794
0
                    ps_me_ctxt->apu1_subpel_buffs[j] = ps_proc->apu1_subpel_buffs[i];
2795
0
                    j++;
2796
0
                }
2797
0
            }
2798
0
            ps_me_ctxt->u4_subpel_buf_strd = HP_BUFF_WD;
2799
2800
            /* Copy the statial SKIP MV of each list */
2801
0
            i4_l0_skip_mv_idx = ps_me_ctxt->u4_num_candidates[L0] - 2;
2802
0
            i4_l1_skip_mv_idx = ps_me_ctxt->u4_num_candidates[L1] - 2;
2803
0
            ps_me_ctxt->as_mv_init_search[BI][0].i2_mvx =
2804
0
                ps_me_ctxt->as_mv_init_search[L0][i4_l0_skip_mv_idx].i2_mvx << 2;
2805
0
            ps_me_ctxt->as_mv_init_search[BI][0].i2_mvy =
2806
0
                ps_me_ctxt->as_mv_init_search[L0][i4_l0_skip_mv_idx].i2_mvy << 2;
2807
0
            ps_me_ctxt->as_mv_init_search[BI][1].i2_mvx =
2808
0
                ps_me_ctxt->as_mv_init_search[L1][i4_l1_skip_mv_idx].i2_mvx << 2;
2809
0
            ps_me_ctxt->as_mv_init_search[BI][1].i2_mvy =
2810
0
                ps_me_ctxt->as_mv_init_search[L1][i4_l1_skip_mv_idx].i2_mvy << 2;
2811
2812
            /* Copy the SKIP MV temporal of each list */
2813
0
            i4_l0_skip_mv_idx++;
2814
0
            i4_l1_skip_mv_idx++;
2815
0
            ps_me_ctxt->as_mv_init_search[BI][2].i2_mvx =
2816
0
                ps_me_ctxt->as_mv_init_search[L0][i4_l0_skip_mv_idx].i2_mvx << 2;
2817
0
            ps_me_ctxt->as_mv_init_search[BI][2].i2_mvy =
2818
0
                ps_me_ctxt->as_mv_init_search[L0][i4_l0_skip_mv_idx].i2_mvy << 2;
2819
0
            ps_me_ctxt->as_mv_init_search[BI][3].i2_mvx =
2820
0
                ps_me_ctxt->as_mv_init_search[L1][i4_l1_skip_mv_idx].i2_mvx << 2;
2821
0
            ps_me_ctxt->as_mv_init_search[BI][3].i2_mvy =
2822
0
                ps_me_ctxt->as_mv_init_search[L1][i4_l1_skip_mv_idx].i2_mvy << 2;
2823
2824
            /* Copy the best MV after ME */
2825
0
            ps_me_ctxt->as_mv_init_search[BI][4] = ps_me_ctxt->as_mb_part[L0].s_mv_curr;
2826
0
            ps_me_ctxt->as_mv_init_search[BI][5] = ps_me_ctxt->as_mb_part[L1].s_mv_curr;
2827
2828
0
            ps_me_ctxt->u4_num_candidates[BI] = 6;
2829
2830
0
            ps_me_ctxt->as_mb_part[BI].i4_mb_cost = INT_MAX;
2831
0
            ps_me_ctxt->as_mb_part[BI].i4_mb_distortion = INT_MAX;
2832
2833
0
            isvce_evaluate_bipred(ps_me_ctxt, ps_proc, &ps_me_ctxt->as_mb_part[BI]);
2834
2835
0
            i4_max_reflist = BI;
2836
0
        }
2837
2838
        /**********************************************************************
2839
         * Now get the minimum of MB part sads by searching over all ref lists
2840
         **********************************************************************/
2841
0
        ps_proc->ps_mb_info->as_pu->u1_pred_mode = 0x3;
2842
2843
0
        for(i4_reflist = 0; i4_reflist <= i4_max_reflist; i4_reflist++)
2844
0
        {
2845
0
            if(ps_me_ctxt->as_mb_part[i4_reflist].i4_mb_cost < ps_proc->ps_cur_mb->i4_mb_cost)
2846
0
            {
2847
0
                ps_proc->ps_cur_mb->i4_mb_cost = ps_me_ctxt->as_mb_part[i4_reflist].i4_mb_cost;
2848
0
                ps_proc->ps_cur_mb->i4_mb_distortion =
2849
0
                    ps_me_ctxt->as_mb_part[i4_reflist].i4_mb_distortion;
2850
0
                ps_proc->ps_cur_mb->u4_mb_type =
2851
0
                    (ps_proc->i4_slice_type == PSLICE) ? P16x16 : B16x16;
2852
0
                ps_proc->ps_mb_info->as_pu->u1_pred_mode = i4_reflist;
2853
0
            }
2854
0
        }
2855
2856
        /**********************************************************************
2857
         * In case we have a BI MB, we have to copy the buffers and set proer MV's
2858
         *  1)In case its BI, we need to get the best MVs given by BI and update
2859
         *    to their corresponding MB part
2860
         *  2)We also need to copy the buffer in which bipred buff is populated
2861
         *
2862
         *  Not that if we have
2863
         **********************************************************************/
2864
0
        if(ps_proc->ps_mb_info->as_pu->u1_pred_mode == BI)
2865
0
        {
2866
0
            WORD32 i4_srch_pos = ps_me_ctxt->as_mb_part[BI].i4_srch_pos_idx;
2867
0
            UWORD8 *pu1_bi_buf = ps_me_ctxt->as_mb_part[BI].pu1_best_hpel_buf;
2868
2869
0
            ps_me_ctxt->as_mb_part[L0].s_mv_curr =
2870
0
                ps_me_ctxt->as_mv_init_search[BI][i4_srch_pos << 1];
2871
0
            ps_me_ctxt->as_mb_part[L1].s_mv_curr =
2872
0
                ps_me_ctxt->as_mv_init_search[BI][(i4_srch_pos << 1) + 1];
2873
2874
            /* Now we have to copy the buffers */
2875
0
            ps_inter_pred_fxns->pf_inter_pred_luma_copy(
2876
0
                pu1_bi_buf, ps_proc->pu1_best_subpel_buf, ps_me_ctxt->u4_subpel_buf_strd,
2877
0
                ps_proc->u4_bst_spel_buf_strd, MB_SIZE, MB_SIZE, NULL, 0);
2878
0
        }
2879
0
        else if(ps_me_ctxt->as_mb_part[ps_proc->ps_mb_info->as_pu->u1_pred_mode].pu1_best_hpel_buf)
2880
0
        {
2881
            /* Now we have to copy the buffers */
2882
0
            ps_inter_pred_fxns->pf_inter_pred_luma_copy(
2883
0
                ps_me_ctxt->as_mb_part[ps_proc->ps_mb_info->as_pu->u1_pred_mode].pu1_best_hpel_buf,
2884
0
                ps_proc->pu1_best_subpel_buf, ps_me_ctxt->u4_subpel_buf_strd,
2885
0
                ps_proc->u4_bst_spel_buf_strd, MB_SIZE, MB_SIZE, NULL, 0);
2886
0
        }
2887
0
    }
2888
2889
    /**************************************************************************
2890
     *Now copy the MVs to the current PU with qpel scaling
2891
     ***************************************************************************/
2892
0
    ps_proc->ps_mb_info->as_pu->as_me_info[L0].s_mv.i2_mvx =
2893
0
        (ps_me_ctxt->as_mb_part[L0].s_mv_curr.i2_mvx);
2894
0
    ps_proc->ps_mb_info->as_pu->as_me_info[L0].s_mv.i2_mvy =
2895
0
        (ps_me_ctxt->as_mb_part[L0].s_mv_curr.i2_mvy);
2896
0
    ps_proc->ps_mb_info->as_pu->as_me_info[L1].s_mv.i2_mvx =
2897
0
        (ps_me_ctxt->as_mb_part[L1].s_mv_curr.i2_mvx);
2898
0
    ps_proc->ps_mb_info->as_pu->as_me_info[L1].s_mv.i2_mvy =
2899
0
        (ps_me_ctxt->as_mb_part[L1].s_mv_curr.i2_mvy);
2900
2901
0
    ps_proc->ps_mb_info->as_pu->as_me_info[0].i1_ref_idx =
2902
0
        (ps_proc->ps_mb_info->as_pu->u1_pred_mode != L1) ? 0 : -1;
2903
0
    ps_proc->ps_mb_info->as_pu->as_me_info[1].i1_ref_idx =
2904
0
        (ps_proc->ps_mb_info->as_pu->u1_pred_mode != L0) ? 0 : -1;
2905
2906
    /* number of partitions */
2907
0
    ps_proc->u4_num_sub_partitions = 1;
2908
0
    *(ps_proc->pu4_mb_pu_cnt) = 1;
2909
2910
    /* position in-terms of PU */
2911
0
    ps_proc->ps_mb_info->as_pu->u1_pos_x_in_4x4 = 0;
2912
0
    ps_proc->ps_mb_info->as_pu->u1_pos_y_in_4x4 = 0;
2913
2914
    /* PU size */
2915
0
    ps_proc->ps_mb_info->as_pu->u1_wd_in_4x4_m1 = 3;
2916
0
    ps_proc->ps_mb_info->as_pu->u1_ht_in_4x4_m1 = 3;
2917
2918
    /* Update min sad conditions */
2919
0
    if(ps_me_ctxt->u4_min_sad_reached == 1)
2920
0
    {
2921
0
        ps_proc->ps_cur_mb->u4_min_sad_reached = 1;
2922
0
        ps_proc->ps_cur_mb->u4_min_sad = ps_me_ctxt->i4_min_sad;
2923
0
    }
2924
0
}