Coverage Report

Created: 2025-11-09 06:37

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/libavc/encoder/ime.c
Line
Count
Source
1
/******************************************************************************
2
 *
3
 * Copyright (C) 2015 The Android Open Source Project
4
 *
5
 * Licensed under the Apache License, Version 2.0 (the "License");
6
 * you may not use this file except in compliance with the License.
7
 * You may obtain a copy of the License at:
8
 *
9
 * http://www.apache.org/licenses/LICENSE-2.0
10
 *
11
 * Unless required by applicable law or agreed to in writing, software
12
 * distributed under the License is distributed on an "AS IS" BASIS,
13
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
 * See the License for the specific language governing permissions and
15
 * limitations under the License.
16
 *
17
 *****************************************************************************
18
 * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
19
*/
20
/**
21
 *******************************************************************************
22
 * @file
23
 *  ime.c
24
 *
25
 * @brief
26
 *  This file contains functions needed for computing motion vectors of a
27
 *  16x16 block
28
 *
29
 * @author
30
 *  Ittiam
31
 *
32
 * @par List of Functions:
33
 *  - ime_diamond_search_16x16
34
 *  - ime_evaluate_init_srchposn_16x16
35
 *  - ime_full_pel_motion_estimation_16x16
36
 *  - ime_sub_pel_motion_estimation_16x16
37
 *  - ime_compute_skip_cost
38
 *
39
 * @remarks
40
 *  None
41
 *
42
 *******************************************************************************
43
 */
44
45
/*****************************************************************************/
46
/* File Includes                                                             */
47
/*****************************************************************************/
48
49
/* System include files */
50
#include <stdio.h>
51
#include <assert.h>
52
#include <limits.h>
53
#include <string.h>
54
55
/* User include files */
56
#include "ime_typedefs.h"
57
#include "ime_distortion_metrics.h"
58
#include "ime_defs.h"
59
#include "ime_structs.h"
60
#include "ime.h"
61
#include "ime_macros.h"
62
#include "ime_statistics.h"
63
64
/**
65
*******************************************************************************
66
*
67
* @brief Diamond Search
68
*
69
* @par Description:
70
*  This function computes the sad at vertices of several layers of diamond grid
71
*  at a time. The number of layers of diamond grid that would be evaluated is
72
*  configurable.The function computes the sad at vertices of a diamond grid. If
73
*  the sad at the center of the diamond grid is lesser than the sad at any other
74
*  point of the diamond grid, the function marks the candidate Mb partition as
75
*  mv.
76
*
77
* @param[in] ps_me_ctxt
78
*  pointer to me context
79
*
80
* @param[in] i4_reflist
81
*  ref list
82
*
83
* @returns  mv pair & corresponding distortion and cost
84
*
85
* @remarks Diamond Srch, radius is 1
86
*
87
*******************************************************************************
88
*/
89
void ime_diamond_search_16x16(me_ctxt_t *ps_me_ctxt, WORD32 i4_reflist)
90
303k
{
91
    /* MB partition info */
92
303k
    mb_part_ctxt *ps_mb_part = &ps_me_ctxt->as_mb_part[i4_reflist];
93
94
    /* lagrange parameter */
95
303k
    UWORD32 u4_lambda_motion = ps_me_ctxt->u4_lambda_motion;
96
97
    /* srch range*/
98
303k
    WORD32 i4_srch_range_n = ps_me_ctxt->i4_srch_range_n;
99
303k
    WORD32 i4_srch_range_s = ps_me_ctxt->i4_srch_range_s;
100
303k
    WORD32 i4_srch_range_e = ps_me_ctxt->i4_srch_range_e;
101
303k
    WORD32 i4_srch_range_w = ps_me_ctxt->i4_srch_range_w;
102
103
    /* enabled fast sad computation */
104
//    UWORD32 u4_enable_fast_sad = ps_me_ctxt->u4_enable_fast_sad;
105
106
    /* pointer to src macro block */
107
303k
    UWORD8 *pu1_curr_mb = ps_me_ctxt->pu1_src_buf_luma;
108
303k
    UWORD8 *pu1_ref_mb = ps_me_ctxt->apu1_ref_buf_luma[i4_reflist];
109
110
    /* strides */
111
303k
    WORD32 i4_src_strd = ps_me_ctxt->i4_src_strd;
112
303k
    WORD32 i4_ref_strd = ps_me_ctxt->i4_rec_strd;
113
114
    /* least cost */
115
303k
    WORD32 i4_cost_least = ps_mb_part->i4_mb_cost;
116
117
    /* least sad */
118
303k
    WORD32 i4_distortion_least = ps_mb_part->i4_mb_distortion;
119
120
    /* mv pair */
121
303k
    WORD16 i2_mvx, i2_mvy;
122
123
    /* mv bits */
124
303k
    UWORD8 *pu1_mv_bits = ps_me_ctxt->pu1_mv_bits;
125
126
    /* temp var */
127
303k
    WORD32 i4_cost[4];
128
303k
    WORD32 i4_sad[4];
129
303k
    UWORD8 *pu1_ref;
130
303k
    WORD16 i2_mv_u_x, i2_mv_u_y;
131
132
    /* Diamond search Iteration Max Cnt */
133
303k
    UWORD32 u4_num_layers = ps_me_ctxt->u4_num_layers;
134
135
    /* temp var */
136
//    UWORD8 u1_prev_jump = NONE;
137
//    UWORD8 u1_curr_jump = NONE;
138
//    UWORD8 u1_next_jump;
139
//    WORD32 mask_arr[5] = {15, 13, 14, 7, 11};
140
//    WORD32 mask;
141
//    UWORD8 *apu1_ref[4];
142
//    WORD32 i, cnt;
143
//    WORD32 dia[4][2] = {{-1, 0}, {1, 0}, {0, -1}, {0, 1}};
144
145
    /* mv with best sad during initial evaluation */
146
303k
    i2_mvx = ps_mb_part->s_mv_curr.i2_mvx;
147
303k
    i2_mvy = ps_mb_part->s_mv_curr.i2_mvy;
148
149
303k
    i2_mv_u_x = i2_mvx;
150
303k
    i2_mv_u_y = i2_mvy;
151
152
1.03M
    while (u4_num_layers)
153
1.02M
    {
154
        /* FIXME : is this the write way to check for out of bounds ? */
155
1.02M
        if ( (i2_mvx - 1 < i4_srch_range_w) ||
156
1.00M
                        (i2_mvx + 1 > i4_srch_range_e) ||
157
996k
                        (i2_mvy - 1 < i4_srch_range_n) ||
158
985k
                        (i2_mvy + 1 > i4_srch_range_s) )
159
44.6k
        {
160
44.6k
            break;
161
44.6k
        }
162
163
981k
        pu1_ref = pu1_ref_mb + i2_mvx + (i2_mvy * i4_ref_strd);
164
165
981k
        ps_me_ctxt->pf_ime_compute_sad4_diamond(pu1_ref,
166
981k
                                                pu1_curr_mb,
167
981k
                                                i4_ref_strd,
168
981k
                                                i4_src_strd,
169
981k
                                                i4_sad);
170
171
981k
        DEBUG_SAD_HISTOGRAM_ADD(i4_sad[0], 2);
172
981k
        DEBUG_SAD_HISTOGRAM_ADD(i4_sad[1], 2);
173
981k
        DEBUG_SAD_HISTOGRAM_ADD(i4_sad[2], 2);
174
981k
        DEBUG_SAD_HISTOGRAM_ADD(i4_sad[3], 2);
175
176
        /* compute cost */
177
981k
        i4_cost[0] = i4_sad[0] + (WORD32)(u4_lambda_motion * ( pu1_mv_bits[ ((i2_mvx - 1) << 2) - ps_mb_part->s_mv_pred.i2_mvx]
178
981k
                                                                   + pu1_mv_bits[(i2_mvy << 2) - ps_mb_part->s_mv_pred.i2_mvy] ));
179
981k
        i4_cost[1] = i4_sad[1] + (WORD32)(u4_lambda_motion * ( pu1_mv_bits[ ((i2_mvx + 1) << 2) - ps_mb_part->s_mv_pred.i2_mvx]
180
981k
                                                                   + pu1_mv_bits[(i2_mvy << 2) - ps_mb_part->s_mv_pred.i2_mvy] ));
181
981k
        i4_cost[2] = i4_sad[2] + (WORD32)(u4_lambda_motion * ( pu1_mv_bits[ (i2_mvx << 2) - ps_mb_part->s_mv_pred.i2_mvx]
182
981k
                                                                   + pu1_mv_bits[((i2_mvy - 1) << 2) - ps_mb_part->s_mv_pred.i2_mvy] ));
183
981k
        i4_cost[3] = i4_sad[3] + (WORD32)(u4_lambda_motion * ( pu1_mv_bits[ (i2_mvx << 2) - ps_mb_part->s_mv_pred.i2_mvx]
184
981k
                                                                   + pu1_mv_bits[((i2_mvy + 1) << 2) - ps_mb_part->s_mv_pred.i2_mvy] ));
185
186
187
981k
        if (i4_cost_least > i4_cost[0])
188
325k
        {
189
325k
            i4_cost_least = i4_cost[0];
190
325k
            i4_distortion_least = i4_sad[0];
191
192
325k
            i2_mv_u_x = (i2_mvx - 1);
193
325k
            i2_mv_u_y = i2_mvy;
194
325k
        }
195
196
981k
        if (i4_cost_least > i4_cost[1])
197
238k
        {
198
238k
            i4_cost_least = i4_cost[1];
199
238k
            i4_distortion_least = i4_sad[1];
200
201
238k
            i2_mv_u_x = (i2_mvx + 1);
202
238k
            i2_mv_u_y = i2_mvy;
203
238k
        }
204
205
981k
        if (i4_cost_least > i4_cost[2])
206
215k
        {
207
215k
            i4_cost_least = i4_cost[2];
208
215k
            i4_distortion_least = i4_sad[2];
209
210
215k
            i2_mv_u_x = i2_mvx;
211
215k
            i2_mv_u_y = i2_mvy - 1;
212
215k
        }
213
214
981k
        if (i4_cost_least > i4_cost[3])
215
128k
        {
216
128k
            i4_cost_least = i4_cost[3];
217
128k
            i4_distortion_least = i4_sad[3];
218
219
128k
            i2_mv_u_x = i2_mvx;
220
128k
            i2_mv_u_y = i2_mvy + 1;
221
128k
        }
222
223
981k
        if( (i2_mv_u_x == i2_mvx) && (i2_mv_u_y == i2_mvy))
224
244k
        {
225
244k
            ps_mb_part->u4_exit = 1;
226
244k
            break;
227
244k
        }
228
736k
        else
229
736k
        {
230
736k
            i2_mvx = i2_mv_u_x;
231
736k
            i2_mvy = i2_mv_u_y;
232
736k
        }
233
736k
        u4_num_layers--;
234
736k
    }
235
236
303k
    if (i4_cost_least < ps_mb_part->i4_mb_cost)
237
152k
    {
238
152k
        ps_mb_part->i4_mb_cost = i4_cost_least;
239
152k
        ps_mb_part->i4_mb_distortion = i4_distortion_least;
240
152k
        ps_mb_part->s_mv_curr.i2_mvx = i2_mvx;
241
152k
        ps_mb_part->s_mv_curr.i2_mvy = i2_mvy;
242
152k
    }
243
244
303k
}
245
246
247
/**
248
*******************************************************************************
249
*
250
* @brief This function computes the best motion vector among the tentative mv
251
* candidates chosen.
252
*
253
* @par Description:
254
*  This function determines the position in the search window at which the motion
255
*  estimation should begin in order to minimise the number of search iterations.
256
*
257
* @param[in] ps_me_ctxt
258
*  pointer to me context
259
*
260
* @param[in] i4_reflist
261
*  ref list
262
*
263
* @returns  mv pair & corresponding distortion and cost
264
*
265
* @remarks none
266
*
267
*******************************************************************************
268
*/
269
270
void ime_evaluate_init_srchposn_16x16
271
        (
272
            me_ctxt_t *ps_me_ctxt,
273
            WORD32 i4_reflist
274
        )
275
302k
{
276
302k
    UWORD32 u4_lambda_motion = ps_me_ctxt->u4_lambda_motion;
277
278
    /* candidate mv cnt */
279
302k
    UWORD32 u4_num_candidates = ps_me_ctxt->u4_num_candidates[i4_reflist];
280
281
    /* list of candidate mvs */
282
302k
    ime_mv_t *ps_mv_list = ps_me_ctxt->as_mv_init_search[i4_reflist];
283
284
    /* pointer to src macro block */
285
302k
    UWORD8 *pu1_curr_mb = ps_me_ctxt->pu1_src_buf_luma;
286
302k
    UWORD8 *pu1_ref_mb = ps_me_ctxt->apu1_ref_buf_luma[i4_reflist];
287
288
    /* strides */
289
302k
    WORD32 i4_src_strd = ps_me_ctxt->i4_src_strd;
290
302k
    WORD32 i4_ref_strd = ps_me_ctxt->i4_rec_strd;
291
292
    /* enabled fast sad computation */
293
302k
    UWORD32 u4_enable_fast_sad = ps_me_ctxt->u4_enable_fast_sad;
294
295
    /* SAD(distortion metric) of an 8x8 block */
296
302k
    WORD32 i4_mb_distortion;
297
298
    /* cost = distortion + u4_lambda_motion * rate */
299
302k
    WORD32 i4_mb_cost, i4_mb_cost_least = INT_MAX, i4_distortion_least = INT_MAX;
300
301
    /* mb partitions info */
302
302k
    mb_part_ctxt *ps_mb_part = &(ps_me_ctxt->as_mb_part[i4_reflist]);
303
304
    /* mv bits */
305
302k
    UWORD8 *pu1_mv_bits = ps_me_ctxt->pu1_mv_bits;
306
307
    /* temp var */
308
302k
    UWORD32  i, j;
309
302k
    WORD32 i4_srch_pos_idx = 0;
310
302k
    UWORD8 *pu1_ref = NULL;
311
312
    /* Carry out a search using each of the motion vector pairs identified above as predictors. */
313
    /* TODO : Just like Skip, Do we need to add any bias to zero mv as well */
314
1.48M
    for(i = 0; i < u4_num_candidates; i++)
315
1.18M
    {
316
        /* compute sad */
317
1.18M
        WORD32 c_sad = 1;
318
319
1.90M
        for(j = 0; j < i; j++ )
320
1.29M
        {
321
1.29M
            if ( (ps_mv_list[i].i2_mvx == ps_mv_list[j].i2_mvx) &&
322
746k
                            (ps_mv_list[i].i2_mvy == ps_mv_list[j].i2_mvy) )
323
576k
            {
324
576k
                c_sad = 0;
325
576k
                break;
326
576k
            }
327
1.29M
        }
328
1.18M
        if(c_sad)
329
609k
        {
330
            /* adjust ref pointer */
331
609k
            pu1_ref = pu1_ref_mb + ps_mv_list[i].i2_mvx + (ps_mv_list[i].i2_mvy * i4_ref_strd);
332
333
            /* compute distortion */
334
609k
            ps_me_ctxt->pf_ime_compute_sad_16x16[u4_enable_fast_sad](pu1_curr_mb, pu1_ref, i4_src_strd, i4_ref_strd, i4_mb_cost_least, &i4_mb_distortion);
335
336
609k
            DEBUG_SAD_HISTOGRAM_ADD(i4_mb_distortion, 3);
337
338
            /* compute cost */
339
609k
            i4_mb_cost = i4_mb_distortion + (WORD32)(u4_lambda_motion * ( pu1_mv_bits[ (ps_mv_list[i].i2_mvx << 2) - ps_mb_part->s_mv_pred.i2_mvx]
340
609k
                            + pu1_mv_bits[(ps_mv_list[i].i2_mvy << 2) - ps_mb_part->s_mv_pred.i2_mvy] ));
341
342
609k
            if (i4_mb_cost < i4_mb_cost_least)
343
451k
            {
344
451k
                i4_mb_cost_least = i4_mb_cost;
345
346
451k
                i4_distortion_least = i4_mb_distortion;
347
348
451k
                i4_srch_pos_idx = i;
349
451k
            }
350
609k
        }
351
1.18M
    }
352
353
302k
    if (i4_mb_cost_least < ps_mb_part->i4_mb_cost)
354
303k
    {
355
303k
        ps_mb_part->i4_srch_pos_idx = i4_srch_pos_idx;
356
303k
        ps_mb_part->i4_mb_cost = i4_mb_cost_least;
357
303k
        ps_mb_part->i4_mb_distortion = i4_distortion_least;
358
303k
        ps_mb_part->s_mv_curr.i2_mvx = ps_mv_list[i4_srch_pos_idx].i2_mvx;
359
303k
        ps_mb_part->s_mv_curr.i2_mvy = ps_mv_list[i4_srch_pos_idx].i2_mvy;
360
303k
    }
361
302k
}
362
363
/**
364
*******************************************************************************
365
*
366
* @brief Searches for the best matching full pixel predictor within the search
367
* range
368
*
369
* @par Description:
370
*  For a given algorithm (diamond, Hex, nStep, ...) chosen, it searches for the
371
*  best matching full pixel predictor within the search range
372
*
373
* @param[in] ps_me_ctxt
374
*  pointer to me context
375
*
376
* @param[in] i4_reflist
377
*  ref list
378
*
379
* @returns  mv pair & corresponding distortion and cost
380
*
381
* @remarks none
382
*
383
*******************************************************************************
384
*/
385
void ime_full_pel_motion_estimation_16x16
386
    (
387
        me_ctxt_t *ps_me_ctxt,
388
        WORD32 i4_ref_list
389
    )
390
303k
{
391
    /* mb part info */
392
303k
    mb_part_ctxt *ps_mb_part = &ps_me_ctxt->as_mb_part[i4_ref_list];
393
394
    /******************************************************************/
395
    /* Modify Search range about initial candidate instead of zero mv */
396
    /******************************************************************/
397
    /*
398
     * FIXME: The motion vectors in a way can become unbounded. It may so happen that
399
     * MV might exceed the limit of the profile configured.
400
     */
401
303k
    ps_me_ctxt->i4_srch_range_w = MAX(ps_me_ctxt->i4_srch_range_w,
402
303k
                                      -ps_me_ctxt->ai2_srch_boundaries[0] + ps_mb_part->s_mv_curr.i2_mvx);
403
303k
    ps_me_ctxt->i4_srch_range_e = MIN(ps_me_ctxt->i4_srch_range_e,
404
303k
                                       ps_me_ctxt->ai2_srch_boundaries[0] + ps_mb_part->s_mv_curr.i2_mvx);
405
303k
    ps_me_ctxt->i4_srch_range_n = MAX(ps_me_ctxt->i4_srch_range_n,
406
303k
                                      -ps_me_ctxt->ai2_srch_boundaries[1] + ps_mb_part->s_mv_curr.i2_mvy);
407
303k
    ps_me_ctxt->i4_srch_range_s = MIN(ps_me_ctxt->i4_srch_range_s,
408
303k
                                       ps_me_ctxt->ai2_srch_boundaries[1] + ps_mb_part->s_mv_curr.i2_mvy);
409
410
    /************************************************************/
411
    /* Traverse about best initial candidate for mv             */
412
    /************************************************************/
413
414
303k
    switch (ps_me_ctxt->u4_me_speed_preset)
415
303k
    {
416
303k
        case DMND_SRCH:
417
303k
            ime_diamond_search_16x16(ps_me_ctxt, i4_ref_list);
418
303k
            break;
419
0
        default:
420
0
            assert(0);
421
0
            break;
422
303k
    }
423
303k
}
424
425
/**
426
*******************************************************************************
427
*
428
* @brief Searches for the best matching sub pixel predictor within the search
429
* range
430
*
431
* @par Description:
432
*  This function begins by searching across all sub pixel sample points
433
*  around the full pel motion vector. The vector with least cost is chosen as
434
*  the mv for the current mb.
435
*
436
* @param[in] ps_me_ctxt
437
*  pointer to me context
438
*
439
* @param[in] i4_reflist
440
*  ref list
441
*
442
* @returns mv pair & corresponding distortion and cost
443
*
444
* @remarks none
445
*
446
*******************************************************************************
447
*/
448
void ime_sub_pel_motion_estimation_16x16
449
    (
450
        me_ctxt_t *ps_me_ctxt,
451
        WORD32 i4_reflist
452
    )
453
207k
{
454
    /* pointers to src & ref macro block */
455
207k
    UWORD8 *pu1_curr_mb = ps_me_ctxt->pu1_src_buf_luma;
456
457
    /* pointers to ref. half pel planes */
458
207k
    UWORD8 *pu1_ref_mb_half_x;
459
207k
    UWORD8 *pu1_ref_mb_half_y;
460
207k
    UWORD8 *pu1_ref_mb_half_xy;
461
462
    /* pointers to ref. half pel planes */
463
207k
    UWORD8 *pu1_ref_mb_half_x_temp;
464
207k
    UWORD8 *pu1_ref_mb_half_y_temp;
465
207k
    UWORD8 *pu1_ref_mb_half_xy_temp;
466
467
    /* strides */
468
207k
    WORD32 i4_src_strd = ps_me_ctxt->i4_src_strd;
469
470
207k
    WORD32 i4_ref_strd = ps_me_ctxt->u4_subpel_buf_strd;
471
472
    /* mb partitions info */
473
207k
    mb_part_ctxt *ps_mb_part = &ps_me_ctxt->as_mb_part[i4_reflist];
474
475
    /* SAD(distortion metric) of an mb */
476
207k
    WORD32 i4_mb_distortion;
477
207k
    WORD32 i4_distortion_least = ps_mb_part->i4_mb_distortion;
478
479
    /* cost = distortion + u4_lambda_motion * rate */
480
207k
    WORD32 i4_mb_cost;
481
207k
    WORD32 i4_mb_cost_least = ps_mb_part->i4_mb_cost;
482
483
    /*Best half pel buffer*/
484
207k
    UWORD8 *pu1_best_hpel_buf = NULL;
485
486
    /* mv bits */
487
207k
    UWORD8 *pu1_mv_bits = ps_me_ctxt->pu1_mv_bits;
488
489
    /* Motion vectors in full-pel units */
490
207k
    WORD16 mv_x, mv_y;
491
492
    /* lambda - lagrange constant */
493
207k
    UWORD32 u4_lambda_motion = ps_me_ctxt->u4_lambda_motion;
494
495
    /* Flags to check if half pel points needs to be evaluated */
496
    /**************************************/
497
    /* 1 bit for each half pel candidate  */
498
    /* bit 0 - half x = 1, half y = 0     */
499
    /* bit 1 - half x = -1, half y = 0    */
500
    /* bit 2 - half x = 0, half y = 1     */
501
    /* bit 3 - half x = 0, half y = -1    */
502
    /* bit 4 - half x = 1, half y = 1     */
503
    /* bit 5 - half x = -1, half y = 1    */
504
    /* bit 6 - half x = 1, half y = -1    */
505
    /* bit 7 - half x = -1, half y = -1   */
506
    /**************************************/
507
    /* temp var */
508
207k
    WORD16 i2_mv_u_x, i2_mv_u_y;
509
207k
    WORD32 i, j;
510
207k
    WORD32 ai4_sad[8];
511
512
207k
    WORD32 i4_srch_pos_idx = ps_mb_part->i4_srch_pos_idx;
513
514
207k
    i2_mv_u_x = ps_mb_part->s_mv_curr.i2_mvx;
515
207k
    i2_mv_u_y = ps_mb_part->s_mv_curr.i2_mvy;
516
517
    /************************************************************/
518
    /* Evaluate half pel                                        */
519
    /************************************************************/
520
207k
    mv_x = ps_mb_part->s_mv_curr.i2_mvx >> 2;
521
207k
    mv_y = ps_mb_part->s_mv_curr.i2_mvy >> 2;
522
523
524
    /**************************************************************/
525
    /* ps_me_ctxt->pu1_half_x points to the half pel pixel on the */
526
    /* left side of full pel                                      */
527
    /* ps_me_ctxt->pu1_half_y points to the half pel pixel on the */
528
    /* top  side of full pel                                      */
529
    /* ps_me_ctxt->pu1_half_xy points to the half pel pixel       */
530
    /* on the top left side of full pel                           */
531
    /* for the function pf_ime_sub_pel_compute_sad_16x16 the      */
532
    /* default postions are                                       */
533
    /* ps_me_ctxt->pu1_half_x = right halp_pel                    */
534
    /*  ps_me_ctxt->pu1_half_y = bottom halp_pel                  */
535
    /*  ps_me_ctxt->pu1_half_xy = bottom right halp_pel           */
536
    /* Hence corresponding adjustments made here                  */
537
    /**************************************************************/
538
539
207k
    pu1_ref_mb_half_x_temp = pu1_ref_mb_half_x = ps_me_ctxt->apu1_subpel_buffs[0] + 1;
540
207k
    pu1_ref_mb_half_y_temp = pu1_ref_mb_half_y = ps_me_ctxt->apu1_subpel_buffs[1] + 1 + i4_ref_strd;
541
207k
    pu1_ref_mb_half_xy_temp = pu1_ref_mb_half_xy = ps_me_ctxt->apu1_subpel_buffs[2] + 1 + i4_ref_strd;
542
543
207k
    ps_me_ctxt->pf_ime_sub_pel_compute_sad_16x16(pu1_curr_mb, pu1_ref_mb_half_x,
544
207k
                                                 pu1_ref_mb_half_y,
545
207k
                                                 pu1_ref_mb_half_xy,
546
207k
                                                 i4_src_strd, i4_ref_strd,
547
207k
                                                 ai4_sad);
548
549
    /* Half x plane */
550
621k
    for(i = 0; i < 2; i++)
551
414k
    {
552
414k
        WORD32 mv_x_tmp = (mv_x << 2) + 2;
553
414k
        WORD32 mv_y_tmp = (mv_y << 2);
554
555
414k
        mv_x_tmp -= (i * 4);
556
557
414k
        i4_mb_distortion = ai4_sad[i];
558
559
        /* compute cost */
560
414k
        i4_mb_cost = i4_mb_distortion + (WORD32)(u4_lambda_motion * ( pu1_mv_bits[ mv_x_tmp - ps_mb_part->s_mv_pred.i2_mvx]
561
414k
                        + pu1_mv_bits[mv_y_tmp - ps_mb_part->s_mv_pred.i2_mvy] ));
562
563
414k
        if (i4_mb_cost < i4_mb_cost_least)
564
76.7k
        {
565
76.7k
            i4_mb_cost_least = i4_mb_cost;
566
567
76.7k
            i4_distortion_least = i4_mb_distortion;
568
569
76.7k
            i2_mv_u_x = mv_x_tmp;
570
571
76.7k
            i2_mv_u_y = mv_y_tmp;
572
573
76.7k
#ifndef HP_PL /*choosing whether left or right half_x*/
574
76.7k
            ps_me_ctxt->apu1_subpel_buffs[0] = pu1_ref_mb_half_x_temp - i;
575
76.7k
            pu1_best_hpel_buf = pu1_ref_mb_half_x_temp - i;
576
577
76.7k
            i4_srch_pos_idx = 0;
578
76.7k
#endif
579
76.7k
        }
580
581
414k
    }
582
583
    /* Half y plane */
584
621k
    for(i = 0; i < 2; i++)
585
414k
    {
586
414k
        WORD32 mv_x_tmp = (mv_x << 2);
587
414k
        WORD32 mv_y_tmp = (mv_y << 2) + 2;
588
589
414k
        mv_y_tmp -= (i * 4);
590
591
414k
        i4_mb_distortion = ai4_sad[2 + i];
592
593
        /* compute cost */
594
414k
        i4_mb_cost = i4_mb_distortion + (WORD32)(u4_lambda_motion * ( pu1_mv_bits[ mv_x_tmp - ps_mb_part->s_mv_pred.i2_mvx]
595
414k
                        + pu1_mv_bits[mv_y_tmp - ps_mb_part->s_mv_pred.i2_mvy] ));
596
597
414k
        if (i4_mb_cost < i4_mb_cost_least)
598
54.5k
        {
599
54.5k
            i4_mb_cost_least = i4_mb_cost;
600
601
54.5k
            i4_distortion_least = i4_mb_distortion;
602
603
54.5k
            i2_mv_u_x = mv_x_tmp;
604
605
54.5k
            i2_mv_u_y = mv_y_tmp;
606
607
54.5k
#ifndef HP_PL/*choosing whether top or bottom half_y*/
608
54.5k
            ps_me_ctxt->apu1_subpel_buffs[1] = pu1_ref_mb_half_y_temp  - i*(i4_ref_strd);
609
54.5k
            pu1_best_hpel_buf = pu1_ref_mb_half_y_temp  - i*(i4_ref_strd);
610
611
54.5k
            i4_srch_pos_idx = 1;
612
54.5k
#endif
613
54.5k
        }
614
615
414k
    }
616
617
    /* Half xy plane */
618
621k
    for(j = 0; j < 2; j++)
619
414k
    {
620
1.24M
        for(i = 0; i < 2; i++)
621
828k
        {
622
828k
            WORD32 mv_x_tmp = (mv_x << 2) + 2;
623
828k
            WORD32 mv_y_tmp = (mv_y << 2) + 2;
624
625
828k
            mv_x_tmp -= (i * 4);
626
828k
            mv_y_tmp -= (j * 4);
627
628
828k
            i4_mb_distortion = ai4_sad[4 + i + 2 * j];
629
630
            /* compute cost */
631
828k
            i4_mb_cost = i4_mb_distortion + (WORD32)(u4_lambda_motion * ( pu1_mv_bits[ mv_x_tmp - ps_mb_part->s_mv_pred.i2_mvx]
632
828k
                            + pu1_mv_bits[mv_y_tmp - ps_mb_part->s_mv_pred.i2_mvy] ));
633
634
828k
            if (i4_mb_cost < i4_mb_cost_least)
635
38.4k
            {
636
38.4k
                i4_mb_cost_least = i4_mb_cost;
637
638
38.4k
                i4_distortion_least = i4_mb_distortion;
639
640
38.4k
                i2_mv_u_x = mv_x_tmp;
641
642
38.4k
                i2_mv_u_y = mv_y_tmp;
643
644
38.4k
#ifndef HP_PL /*choosing between four half_xy */
645
38.4k
                ps_me_ctxt->apu1_subpel_buffs[2] = pu1_ref_mb_half_xy_temp  - j*(i4_ref_strd) - i;
646
38.4k
                pu1_best_hpel_buf =  pu1_ref_mb_half_xy_temp  - j*(i4_ref_strd) - i;
647
648
38.4k
                i4_srch_pos_idx = 2;
649
38.4k
#endif
650
38.4k
            }
651
652
828k
        }
653
414k
    }
654
655
207k
    if (i4_mb_cost_least < ps_mb_part->i4_mb_cost)
656
99.5k
    {
657
99.5k
        ps_mb_part->i4_mb_cost = i4_mb_cost_least;
658
99.5k
        ps_mb_part->i4_mb_distortion = i4_distortion_least;
659
99.5k
        ps_mb_part->s_mv_curr.i2_mvx = i2_mv_u_x;
660
99.5k
        ps_mb_part->s_mv_curr.i2_mvy = i2_mv_u_y;
661
99.5k
        ps_mb_part->pu1_best_hpel_buf = pu1_best_hpel_buf;
662
99.5k
        ps_mb_part->i4_srch_pos_idx = i4_srch_pos_idx;
663
99.5k
    }
664
207k
}
665
666
/**
667
*******************************************************************************
668
*
669
* @brief This function computes cost of skip macroblocks
670
*
671
* @par Description:
672
*
673
* @param[in] ps_me_ctxt
674
*  pointer to me ctxt
675
*
676
*
677
* @returns  none
678
*
679
* @remarks
680
* NOTE: while computing the skip cost, do not enable early exit from compute
681
* sad function because, a negative bias gets added later
682
* Note that the last ME candidate in me ctxt is taken as skip motion vector
683
*
684
*******************************************************************************
685
*/
686
void ime_compute_skip_cost
687
    (
688
         me_ctxt_t *ps_me_ctxt,
689
         ime_mv_t *ps_skip_mv,
690
         mb_part_ctxt *ps_smb_part_info,
691
         UWORD32 u4_use_stat_sad,
692
         WORD32 i4_reflist,
693
         WORD32 i4_is_slice_type_b
694
    )
695
164k
{
696
697
    /* SAD(distortion metric) of an mb */
698
164k
    WORD32 i4_mb_distortion;
699
700
    /* cost = distortion + u4_lambda_motion * rate */
701
164k
    WORD32 i4_mb_cost;
702
703
    /* temp var */
704
164k
    UWORD8 *pu1_ref = NULL;
705
706
164k
    ime_mv_t s_skip_mv;
707
708
164k
    s_skip_mv.i2_mvx = (ps_skip_mv->i2_mvx +2)>>2;
709
164k
    s_skip_mv.i2_mvy = (ps_skip_mv->i2_mvy +2)>>2;
710
711
    /* Check if the skip mv is out of bounds or subpel */
712
164k
    {
713
        /* skip mv */
714
164k
        ime_mv_t s_clip_skip_mv;
715
716
164k
        s_clip_skip_mv.i2_mvx = CLIP3(ps_me_ctxt->i4_srch_range_w, ps_me_ctxt->i4_srch_range_e, s_skip_mv.i2_mvx);
717
164k
        s_clip_skip_mv.i2_mvy = CLIP3(ps_me_ctxt->i4_srch_range_n, ps_me_ctxt->i4_srch_range_s, s_skip_mv.i2_mvy);
718
719
164k
        if ((s_clip_skip_mv.i2_mvx != s_skip_mv.i2_mvx) ||
720
162k
           (s_clip_skip_mv.i2_mvy != s_skip_mv.i2_mvy) ||
721
160k
           (ps_skip_mv->i2_mvx & 0x3) ||
722
150k
           (ps_skip_mv->i2_mvy & 0x3))
723
17.6k
        {
724
17.6k
            return ;
725
17.6k
        }
726
164k
    }
727
728
729
    /* adjust ref pointer */
730
146k
    pu1_ref = ps_me_ctxt->apu1_ref_buf_luma[i4_reflist] + s_skip_mv.i2_mvx
731
146k
                    + (s_skip_mv.i2_mvy * ps_me_ctxt->i4_rec_strd);
732
733
146k
    if(u4_use_stat_sad == 1)
734
146k
    {
735
146k
        UWORD32 u4_is_nonzero;
736
737
146k
        ps_me_ctxt->pf_ime_compute_sad_stat_luma_16x16(
738
146k
                        ps_me_ctxt->pu1_src_buf_luma, pu1_ref, ps_me_ctxt->i4_src_strd,
739
146k
                        ps_me_ctxt->i4_rec_strd, ps_me_ctxt->pu2_sad_thrsh,
740
146k
                        &i4_mb_distortion, &u4_is_nonzero);
741
742
146k
        if (u4_is_nonzero == 0 || i4_mb_distortion <= ps_me_ctxt->i4_min_sad)
743
21.8k
        {
744
21.8k
            ps_me_ctxt->u4_min_sad_reached = 1; /* found min sad */
745
21.8k
            ps_me_ctxt->i4_min_sad = (u4_is_nonzero == 0) ? 0 : i4_mb_distortion;
746
21.8k
        }
747
146k
    }
748
18.4E
    else
749
18.4E
    {
750
18.4E
        ps_me_ctxt->pf_ime_compute_sad_16x16[ps_me_ctxt->u4_enable_fast_sad](
751
18.4E
                        ps_me_ctxt->pu1_src_buf_luma, pu1_ref, ps_me_ctxt->i4_src_strd,
752
18.4E
                        ps_me_ctxt->i4_rec_strd, INT_MAX, &i4_mb_distortion);
753
754
18.4E
        if(i4_mb_distortion <= ps_me_ctxt->i4_min_sad)
755
0
        {
756
0
            ps_me_ctxt->i4_min_sad = i4_mb_distortion;
757
0
            ps_me_ctxt->u4_min_sad_reached = 1; /* found min sad */
758
0
        }
759
18.4E
    }
760
761
762
    /* for skip mode cost & distortion are identical
763
     * But we shall add a bias to favor skip mode.
764
     * Doc. JVT B118 Suggests SKIP_BIAS as 16.
765
     * TODO : Empirical analysis of SKIP_BIAS is necessary */
766
767
146k
    i4_mb_cost = i4_mb_distortion - (ps_me_ctxt->u4_lambda_motion * (ps_me_ctxt->i4_skip_bias[0] + ps_me_ctxt->i4_skip_bias[1]  * i4_is_slice_type_b));
768
769
146k
    if (i4_mb_cost <= ps_smb_part_info->i4_mb_cost)
770
146k
    {
771
146k
        ps_smb_part_info->i4_mb_cost = i4_mb_cost;
772
146k
        ps_smb_part_info->i4_mb_distortion = i4_mb_distortion;
773
146k
        ps_smb_part_info->s_mv_curr.i2_mvx = s_skip_mv.i2_mvx;
774
146k
        ps_smb_part_info->s_mv_curr.i2_mvy = s_skip_mv.i2_mvy;
775
146k
    }
776
146k
}
777