Coverage Report

Created: 2025-07-18 07:04

/src/libavc/encoder/ih264e_me.c
Line
Count
Source (jump to first uncovered line)
1
/******************************************************************************
2
 *
3
 * Copyright (C) 2015 The Android Open Source Project
4
 *
5
 * Licensed under the Apache License, Version 2.0 (the "License");
6
 * you may not use this file except in compliance with the License.
7
 * You may obtain a copy of the License at:
8
 *
9
 * http://www.apache.org/licenses/LICENSE-2.0
10
 *
11
 * Unless required by applicable law or agreed to in writing, software
12
 * distributed under the License is distributed on an "AS IS" BASIS,
13
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
 * See the License for the specific language governing permissions and
15
 * limitations under the License.
16
 *
17
 *****************************************************************************
18
 * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
19
*/
20
21
/**
22
*******************************************************************************
23
* @file
24
*  ih264e_me.c
25
*
26
* @brief
27
*  Contains definition of functions for motion estimation
28
*
29
* @author
30
*  ittiam
31
*
32
* @par List of Functions:
33
*  - ih264e_init_mv_bits
34
*  - ih264e_get_search_candidates
35
*  - ih264e_find_pskip_params
36
*  - ih264e_find_pskip_params_me
37
*  - ih264e_get_mv_predictor
38
*  - ih264e_mv_pred
39
*  - ih264e_mv_pred_me
40
*  - ih264e_compute_me_single_reflist
41
*  - ih264e_compute_me_nmb
42
*  - ih264e_find_bskip_params_me
43
*  - ih264e_find_bskip_params
44
*  - ih264e_evaluate_bipred
45
*  - ih264e_compute_me_multi_reflist
46
*
47
* @remarks
48
*  none
49
*
50
*******************************************************************************
51
*/
52
53
/*****************************************************************************/
54
/* File Includes                                                             */
55
/*****************************************************************************/
56
57
/* System Include Files */
58
#include <stdio.h>
59
#include <assert.h>
60
#include <limits.h>
61
62
/* User Include Files */
63
#include "ih264_typedefs.h"
64
#include "iv2.h"
65
#include "ive2.h"
66
#include "ithread.h"
67
68
#include "ih264_debug.h"
69
#include "ih264_macros.h"
70
#include "ih264_defs.h"
71
#include "ih264_mem_fns.h"
72
#include "ih264_padding.h"
73
#include "ih264_structs.h"
74
#include "ih264_trans_quant_itrans_iquant.h"
75
#include "ih264_inter_pred_filters.h"
76
#include "ih264_intra_pred_filters.h"
77
#include "ih264_deblk_edge_filters.h"
78
#include "ih264_cabac_tables.h"
79
#include "ih264_platform_macros.h"
80
81
#include "ime_defs.h"
82
#include "ime_distortion_metrics.h"
83
#include "ime_structs.h"
84
#include "ime.h"
85
#include "ime_statistics.h"
86
87
#include "irc_cntrl_param.h"
88
#include "irc_frame_info_collector.h"
89
90
#include "ih264e_error.h"
91
#include "ih264e_defs.h"
92
#include "ih264e_globals.h"
93
#include "ih264e_rate_control.h"
94
#include "ih264e_bitstream.h"
95
#include "ih264e_cabac_structs.h"
96
#include "ih264e_structs.h"
97
#include "ih264e_mc.h"
98
#include "ih264e_me.h"
99
#include "ih264e_half_pel.h"
100
#include "ih264e_intra_modes_eval.h"
101
#include "ih264e_core_coding.h"
102
#include "ih264e_platform_macros.h"
103
104
105
/*****************************************************************************/
106
/* Function Definitions                                                      */
107
/*****************************************************************************/
108
109
/**
110
*******************************************************************************
111
*
112
* @brief
113
*  This function populates the length of the codewords for motion vectors in the
114
*  range (-search range, search range) in pixels
115
*
116
* @param[in] ps_me
117
*  Pointer to me ctxt
118
*
119
* @param[out] pu1_mv_bits
120
*  length of the codeword for all mv's
121
*
122
* @remarks The length of the code words are derived from signed exponential
123
*  goloumb codes.
124
*
125
*******************************************************************************
126
*/
127
void ih264e_init_mv_bits(me_ctxt_t *ps_me_ctxt)
128
4.75k
{
129
    /* temp var */
130
4.75k
    WORD32 i, codesize = 3, diff, limit;
131
4.75k
    UWORD32 u4_code_num, u4_range;
132
4.75k
    UWORD32 u4_uev_min, u4_uev_max, u4_sev_min, u4_sev_max;
133
134
    /* max srch range */
135
4.75k
    diff = MAX(DEFAULT_MAX_SRCH_RANGE_X, DEFAULT_MAX_SRCH_RANGE_Y);
136
    /* sub pel */
137
4.75k
    diff <<= 2;
138
    /* delta mv */
139
4.75k
    diff <<= 1;
140
141
    /* codeNum for positive integer     =  2x-1     : Table9-3  */
142
4.75k
    u4_code_num = (diff << 1);
143
144
    /* get range of the bit string and put using put_bits()                 */
145
4.75k
    GETRANGE(u4_range, u4_code_num);
146
147
4.75k
    limit = 2*u4_range - 1;
148
149
    /* init mv bits */
150
4.75k
    ps_me_ctxt->pu1_mv_bits[0] = 1;
151
152
57.0k
    while (codesize < limit)
153
52.2k
    {
154
52.2k
        u4_uev_min = (1 << (codesize >> 1));
155
52.2k
        u4_uev_max = 2*u4_uev_min - 1;
156
157
52.2k
        u4_sev_min = u4_uev_min >> 1;
158
52.2k
        u4_sev_max = u4_uev_max >> 1;
159
160
52.2k
        DEBUG("\n%d min, %d max %d codesize", u4_sev_min, u4_sev_max, codesize);
161
162
9.77M
        for (i = u4_sev_min; i <= (WORD32)u4_sev_max; i++)
163
9.72M
        {
164
9.72M
            ps_me_ctxt->pu1_mv_bits[-i] = ps_me_ctxt->pu1_mv_bits[i] = codesize;
165
9.72M
        }
166
167
52.2k
        codesize += 2;
168
52.2k
    }
169
4.75k
}
170
171
/**
172
*******************************************************************************
173
*
174
* @brief Determines the valid candidates for which the initial search shall happen.
175
* The best of these candidates is used to center the diamond pixel search.
176
*
177
* @par Description The function sends the skip, (0,0), left, top and top-right
178
* neighbouring MBs MVs. The left, top and top-right MBs MVs are used because
179
* these are the same MVs that are used to form the MV predictor. This initial MV
180
* search candidates need not take care of slice boundaries and hence neighbor
181
* availability checks are not made here.
182
*
183
* @param[in] ps_proc
184
*  Pointer to process context
185
*
186
* @param[in] ps_me_ctxt
187
*  pointer to me context
188
*
189
* @param[in] i4_ref_list
190
*  Current active reference list
191
*
192
* @returns  The list of MVs to be used of priming the full pel search and the
193
* number of such MVs
194
*
195
* @remarks
196
*   Assumptions : 1. Assumes Only partition of size 16x16
197
*
198
*******************************************************************************
199
*/
200
static void ih264e_get_search_candidates(process_ctxt_t *ps_proc,
201
                                         me_ctxt_t *ps_me_ctxt,
202
                                         WORD32 i4_reflist)
203
293k
{
204
    /* curr mb indices */
205
293k
    WORD32 i4_mb_x = ps_proc->i4_mb_x;
206
207
    /* Motion vector */
208
293k
    mv_t *ps_left_mv, *ps_top_mv, *ps_top_left_mv, *ps_top_right_mv;
209
210
    /* Pred modes */
211
293k
    WORD32 i4_left_mode, i4_top_mode, i4_top_left_mode, i4_top_right_mode;
212
213
    /* mb part info */
214
293k
    mb_part_ctxt *ps_mb_part = &ps_me_ctxt->as_mb_part[i4_reflist];
215
216
    /* mvs */
217
293k
    WORD32 mvx, mvy;
218
219
    /* ngbr availability */
220
293k
    block_neighbors_t *ps_ngbr_avbl = ps_proc->ps_ngbr_avbl;
221
222
    /* Current mode */
223
293k
    WORD32 i4_cmpl_predmode = (i4_reflist == 0) ? PRED_L1 : PRED_L0;
224
225
    /* srch range*/
226
293k
    WORD32 i4_srch_range_n = ps_me_ctxt->i4_srch_range_n;
227
293k
    WORD32 i4_srch_range_s = ps_me_ctxt->i4_srch_range_s;
228
293k
    WORD32 i4_srch_range_e = ps_me_ctxt->i4_srch_range_e;
229
293k
    WORD32 i4_srch_range_w = ps_me_ctxt->i4_srch_range_w;
230
231
    /* num of candidate search candidates */
232
293k
    UWORD32 u4_num_candidates = 0;
233
234
293k
    ps_left_mv = &ps_proc->s_left_mb_pu_ME.s_me_info[i4_reflist].s_mv;
235
293k
    ps_top_mv = &(ps_proc->ps_top_row_pu_ME + i4_mb_x)->s_me_info[i4_reflist].s_mv;
236
293k
    ps_top_left_mv = &ps_proc->s_top_left_mb_pu_ME.s_me_info[i4_reflist].s_mv;
237
293k
    ps_top_right_mv = &(ps_proc->ps_top_row_pu_ME + i4_mb_x + 1)->s_me_info[i4_reflist].s_mv;
238
239
293k
    i4_left_mode = ps_proc->s_left_mb_pu_ME.b2_pred_mode != i4_cmpl_predmode;
240
293k
    i4_top_mode = (ps_proc->ps_top_row_pu_ME + i4_mb_x)->b2_pred_mode != i4_cmpl_predmode;
241
293k
    i4_top_left_mode = ps_proc->s_top_left_mb_pu_ME.b2_pred_mode != i4_cmpl_predmode;
242
293k
    i4_top_right_mode = (ps_proc->ps_top_row_pu_ME + i4_mb_x + 1)->b2_pred_mode != i4_cmpl_predmode;
243
244
    /* Taking the Zero motion vector as one of the candidates   */
245
293k
    ps_me_ctxt->as_mv_init_search[i4_reflist][u4_num_candidates].i2_mvx = 0;
246
293k
    ps_me_ctxt->as_mv_init_search[i4_reflist][u4_num_candidates].i2_mvy = 0;
247
248
293k
    u4_num_candidates++;
249
250
    /* Taking the Left MV Predictor as one of the candidates    */
251
293k
    if (ps_ngbr_avbl->u1_mb_a && i4_left_mode)
252
135k
    {
253
135k
        mvx      = (ps_left_mv->i2_mvx + 2) >> 2;
254
135k
        mvy      = (ps_left_mv->i2_mvy + 2) >> 2;
255
256
135k
        mvx = CLIP3(i4_srch_range_w, i4_srch_range_e, mvx);
257
135k
        mvy = CLIP3(i4_srch_range_n, i4_srch_range_s, mvy);
258
259
135k
        ps_me_ctxt->as_mv_init_search[i4_reflist][u4_num_candidates].i2_mvx = mvx;
260
135k
        ps_me_ctxt->as_mv_init_search[i4_reflist][u4_num_candidates].i2_mvy = mvy;
261
262
135k
        u4_num_candidates ++;
263
135k
    }
264
265
    /* Taking the Top MV Predictor as one of the candidates     */
266
293k
    if (ps_ngbr_avbl->u1_mb_b && i4_top_mode)
267
132k
    {
268
132k
        mvx      = (ps_top_mv->i2_mvx + 2) >> 2;
269
132k
        mvy      = (ps_top_mv->i2_mvy + 2) >> 2;
270
271
132k
        mvx = CLIP3(i4_srch_range_w, i4_srch_range_e, mvx);
272
132k
        mvy = CLIP3(i4_srch_range_n, i4_srch_range_s, mvy);
273
274
132k
        ps_me_ctxt->as_mv_init_search[i4_reflist][u4_num_candidates].i2_mvx = mvx;
275
132k
        ps_me_ctxt->as_mv_init_search[i4_reflist][u4_num_candidates].i2_mvy = mvy;
276
277
132k
        u4_num_candidates ++;
278
279
        /* Taking the TopRt MV Predictor as one of the candidates   */
280
132k
        if (ps_ngbr_avbl->u1_mb_c && i4_top_right_mode)
281
79.8k
        {
282
79.8k
            mvx      = (ps_top_right_mv->i2_mvx + 2) >> 2;
283
79.8k
            mvy      = (ps_top_right_mv->i2_mvy + 2)>> 2;
284
285
79.8k
            mvx = CLIP3(i4_srch_range_w, i4_srch_range_e, mvx);
286
79.8k
            mvy = CLIP3(i4_srch_range_n, i4_srch_range_s, mvy);
287
288
79.8k
            ps_me_ctxt->as_mv_init_search[i4_reflist][u4_num_candidates].i2_mvx = mvx;
289
79.8k
            ps_me_ctxt->as_mv_init_search[i4_reflist][u4_num_candidates].i2_mvy = mvy;
290
291
79.8k
            u4_num_candidates ++;
292
79.8k
        }
293
        /* Taking the TopLt MV Predictor as one of the candidates   */
294
52.9k
        else if(ps_ngbr_avbl->u1_mb_d && i4_top_left_mode)
295
35.5k
        {
296
35.5k
            mvx      = (ps_top_left_mv->i2_mvx + 2) >> 2;
297
35.5k
            mvy      = (ps_top_left_mv->i2_mvy + 2) >> 2;
298
299
35.5k
            mvx = CLIP3(i4_srch_range_w, i4_srch_range_e, mvx);
300
35.5k
            mvy = CLIP3(i4_srch_range_n, i4_srch_range_s, mvy);
301
302
35.5k
            ps_me_ctxt->as_mv_init_search[i4_reflist][u4_num_candidates].i2_mvx = mvx;
303
35.5k
            ps_me_ctxt->as_mv_init_search[i4_reflist][u4_num_candidates].i2_mvy = mvy;
304
305
35.5k
            u4_num_candidates ++;
306
35.5k
        }
307
132k
    }
308
309
    /********************************************************************/
310
    /*                            MV Prediction                         */
311
    /********************************************************************/
312
293k
    ih264e_mv_pred_me(ps_proc, i4_reflist);
313
314
293k
    ps_mb_part->s_mv_pred.i2_mvx = ps_proc->ps_pred_mv[i4_reflist].s_mv.i2_mvx;
315
293k
    ps_mb_part->s_mv_pred.i2_mvy = ps_proc->ps_pred_mv[i4_reflist].s_mv.i2_mvy;
316
317
    /* Get the skip motion vector                               */
318
293k
    {
319
293k
        ps_me_ctxt->i4_skip_type = ps_proc->ps_codec->apf_find_skip_params_me
320
293k
                                    [ps_proc->i4_slice_type](ps_proc, i4_reflist);
321
322
        /* Taking the Skip motion vector as one of the candidates   */
323
293k
        mvx = (ps_proc->ps_skip_mv[i4_reflist].s_mv.i2_mvx + 2) >> 2;
324
293k
        mvy = (ps_proc->ps_skip_mv[i4_reflist].s_mv.i2_mvy + 2) >> 2;
325
326
293k
        mvx = CLIP3(i4_srch_range_w, i4_srch_range_e, mvx);
327
293k
        mvy = CLIP3(i4_srch_range_n, i4_srch_range_s, mvy);
328
329
293k
        ps_me_ctxt->as_mv_init_search[i4_reflist][u4_num_candidates].i2_mvx = mvx;
330
293k
        ps_me_ctxt->as_mv_init_search[i4_reflist][u4_num_candidates].i2_mvy = mvy;
331
293k
        u4_num_candidates++;
332
333
293k
        if (ps_proc->i4_slice_type == BSLICE)
334
185k
        {
335
            /* Taking the temporal Skip motion vector as one of the candidates   */
336
185k
            mvx = (ps_proc->ps_skip_mv[i4_reflist + 2].s_mv.i2_mvx + 2) >> 2;
337
185k
            mvy = (ps_proc->ps_skip_mv[i4_reflist + 2].s_mv.i2_mvy + 2) >> 2;
338
339
185k
            mvx = CLIP3(i4_srch_range_w, i4_srch_range_e, mvx);
340
185k
            mvy = CLIP3(i4_srch_range_n, i4_srch_range_s, mvy);
341
342
185k
            ps_me_ctxt->as_mv_init_search[i4_reflist][u4_num_candidates].i2_mvx = mvx;
343
185k
            ps_me_ctxt->as_mv_init_search[i4_reflist][u4_num_candidates].i2_mvy = mvy;
344
185k
            u4_num_candidates++;
345
185k
        }
346
293k
    }
347
348
293k
    ASSERT(u4_num_candidates <= 6);
349
350
292k
    ps_me_ctxt->u4_num_candidates[i4_reflist] = u4_num_candidates;
351
292k
}
352
353
/**
354
*******************************************************************************
355
*
356
* @brief The function computes parameters for a PSKIP MB
357
*
358
* @par Description:
359
*  The function updates the skip motion vector and checks if the current
360
*  MB can be a PSKIP MB or not
361
*
362
* @param[in] ps_proc
363
*  Pointer to process context
364
*
365
* @param[in] i4_ref_list
366
*  Current active reference list
367
*
368
* @returns Flag indicating if the current MB can be marked as skip
369
*
370
*******************************************************************************
371
*/
372
WORD32 ih264e_find_pskip_params(process_ctxt_t *ps_proc, WORD32 i4_reflist)
373
15.3k
{
374
    /* left mb motion vector */
375
15.3k
    enc_pu_t *ps_left_mb_pu ;
376
377
    /* top mb motion vector */
378
15.3k
    enc_pu_t *ps_top_mb_pu ;
379
380
    /* Skip mv */
381
15.3k
    mv_t *ps_skip_mv = &ps_proc->ps_skip_mv[PRED_L0].s_mv;
382
383
15.3k
    UNUSED(i4_reflist);
384
385
15.3k
    ps_left_mb_pu = &ps_proc->s_left_mb_pu;
386
15.3k
    ps_top_mb_pu = ps_proc->ps_top_row_pu + ps_proc->i4_mb_x;
387
388
15.3k
    if ((!ps_proc->ps_ngbr_avbl->u1_mb_a) ||
389
15.3k
        (!ps_proc->ps_ngbr_avbl->u1_mb_b) ||
390
15.3k
        (
391
6.49k
          (ps_left_mb_pu->s_me_info[PRED_L0].i1_ref_idx == -1) &&
392
6.49k
          (ps_left_mb_pu->s_me_info[PRED_L0].s_mv.i2_mvx == 0) &&
393
6.49k
          (ps_left_mb_pu->s_me_info[PRED_L0].s_mv.i2_mvy == 0)
394
6.49k
       ) ||
395
15.3k
       (
396
2.77k
          (ps_top_mb_pu->s_me_info[PRED_L0].i1_ref_idx == -1) &&
397
2.77k
          (ps_top_mb_pu->s_me_info[PRED_L0].s_mv.i2_mvx == 0) &&
398
2.77k
          (ps_top_mb_pu->s_me_info[PRED_L0].s_mv.i2_mvy == 0)
399
2.77k
       )
400
15.3k
     )
401
13.0k
    {
402
13.0k
        ps_skip_mv->i2_mvx = 0;
403
13.0k
        ps_skip_mv->i2_mvy = 0;
404
13.0k
    }
405
2.31k
    else
406
2.31k
    {
407
2.31k
        ps_skip_mv->i2_mvx = ps_proc->ps_pred_mv[PRED_L0].s_mv.i2_mvx;
408
2.31k
        ps_skip_mv->i2_mvy = ps_proc->ps_pred_mv[PRED_L0].s_mv.i2_mvy;
409
2.31k
    }
410
411
15.3k
    if ((ps_proc->ps_pu->s_me_info[PRED_L0].s_mv.i2_mvx == ps_skip_mv->i2_mvx)
412
15.3k
     && (ps_proc->ps_pu->s_me_info[PRED_L0].s_mv.i2_mvy == ps_skip_mv->i2_mvy))
413
11.3k
    {
414
11.3k
        return 1;
415
11.3k
    }
416
417
3.94k
    return 0;
418
15.3k
}
419
420
/**
421
*******************************************************************************
422
*
423
* @brief The function computes parameters for a PSKIP MB
424
*
425
* @par Description:
426
*  The function updates the skip motion vector and checks if the current
427
*  MB can be a PSKIP MB or not
428
*
429
* @param[in] ps_proc
430
*  Pointer to process context
431
*
432
* @param[in] i4_ref_list
433
*  Current active reference list
434
*
435
* @returns Flag indicating if the current MB can be marked as skip
436
*
437
*******************************************************************************
438
*/
439
WORD32 ih264e_find_pskip_params_me(process_ctxt_t *ps_proc, WORD32 i4_reflist)
440
107k
{
441
    /* left mb motion vector */
442
107k
    enc_pu_t *ps_left_mb_pu ;
443
444
    /* top mb motion vector */
445
107k
    enc_pu_t *ps_top_mb_pu ;
446
447
    /* Skip mv */
448
107k
    mv_t *ps_skip_mv = &ps_proc->ps_skip_mv[PRED_L0].s_mv;
449
450
107k
    UNUSED(i4_reflist);
451
452
107k
    ps_left_mb_pu = &ps_proc->s_left_mb_pu_ME;
453
107k
    ps_top_mb_pu = ps_proc->ps_top_row_pu_ME + ps_proc->i4_mb_x;
454
455
107k
    if ((!ps_proc->ps_ngbr_avbl->u1_mb_a) ||
456
107k
        (!ps_proc->ps_ngbr_avbl->u1_mb_b) ||
457
107k
        (
458
43.9k
          (ps_left_mb_pu->s_me_info[PRED_L0].i1_ref_idx == -1) &&
459
43.9k
          (ps_left_mb_pu->s_me_info[PRED_L0].s_mv.i2_mvx == 0) &&
460
43.9k
          (ps_left_mb_pu->s_me_info[PRED_L0].s_mv.i2_mvy == 0)
461
43.9k
        ) ||
462
107k
        (
463
34.1k
          (ps_top_mb_pu->s_me_info[PRED_L0].i1_ref_idx == -1) &&
464
34.1k
          (ps_top_mb_pu->s_me_info[PRED_L0].s_mv.i2_mvx == 0) &&
465
34.1k
          (ps_top_mb_pu->s_me_info[PRED_L0].s_mv.i2_mvy == 0)
466
34.1k
        )
467
107k
     )
468
75.4k
    {
469
75.4k
        ps_skip_mv->i2_mvx = 0;
470
75.4k
        ps_skip_mv->i2_mvy = 0;
471
75.4k
    }
472
31.5k
    else
473
31.5k
    {
474
31.5k
        ps_skip_mv->i2_mvx = ps_proc->ps_pred_mv[PRED_L0].s_mv.i2_mvx;
475
31.5k
        ps_skip_mv->i2_mvy = ps_proc->ps_pred_mv[PRED_L0].s_mv.i2_mvy;
476
31.5k
    }
477
478
107k
    return PRED_L0;
479
107k
}
480
481
/**
482
*******************************************************************************
483
*
484
* @brief motion vector predictor
485
*
486
* @par Description:
487
*  The routine calculates the motion vector predictor for a given block,
488
*  given the candidate MV predictors.
489
*
490
* @param[in] ps_left_mb_pu
491
*  pointer to left mb motion vector info
492
*
493
* @param[in] ps_top_row_pu
494
*  pointer to top & top right mb motion vector info
495
*
496
* @param[out] ps_pred_mv
497
*  pointer to candidate predictors for the current block
498
*
499
* @param[in] i4_ref_list
500
*  Current active reference list
501
*
502
* @returns  The x & y components of the MV predictor.
503
*
504
* @remarks The code implements the logic as described in sec 8.4.1.3 in H264
505
*   specification.
506
*   Assumptions : 1. Assumes Single reference frame
507
*                 2. Assumes Only partition of size 16x16
508
*
509
*******************************************************************************
510
*/
511
void ih264e_get_mv_predictor(enc_pu_t *ps_left_mb_pu,
512
                             enc_pu_t *ps_top_row_pu,
513
                             enc_pu_mv_t *ps_pred_mv,
514
                             WORD32 i4_ref_list)
515
591k
{
516
    /* Indicated the current ref */
517
591k
    WORD8 i1_ref_idx;
518
519
    /* For pred L0 */
520
591k
    i1_ref_idx = -1;
521
591k
    {
522
        /* temp var */
523
591k
        WORD32 pred_algo = 3, a, b, c;
524
525
        /* If only one of the candidate blocks has a reference frame equal to
526
         * the current block then use the same block as the final predictor */
527
591k
        a = (ps_left_mb_pu->s_me_info[i4_ref_list].i1_ref_idx == i1_ref_idx) ? 0 : -1;
528
591k
        b = (ps_top_row_pu[0].s_me_info[i4_ref_list].i1_ref_idx == i1_ref_idx) ? 0 : -1;
529
591k
        c = (ps_top_row_pu[1].s_me_info[i4_ref_list].i1_ref_idx == i1_ref_idx) ? 0 : -1;
530
531
591k
        if (a == 0 && b == -1 && c == -1)
532
101k
            pred_algo = 0; /* LEFT */
533
490k
        else if(a == -1 && b == 0 && c == -1)
534
33.3k
            pred_algo = 1; /* TOP */
535
456k
        else if(a == -1 && b == -1 && c == 0)
536
11.7k
            pred_algo = 2; /* TOP RIGHT */
537
538
591k
        switch (pred_algo)
539
591k
        {
540
101k
            case 0:
541
                /* left */
542
101k
                ps_pred_mv->s_mv.i2_mvx = ps_left_mb_pu->s_me_info[i4_ref_list].s_mv.i2_mvx;
543
101k
                ps_pred_mv->s_mv.i2_mvy = ps_left_mb_pu->s_me_info[i4_ref_list].s_mv.i2_mvy;
544
101k
                break;
545
33.3k
            case 1:
546
                /* top */
547
33.3k
                ps_pred_mv->s_mv.i2_mvx = ps_top_row_pu[0].s_me_info[i4_ref_list].s_mv.i2_mvx;
548
33.3k
                ps_pred_mv->s_mv.i2_mvy = ps_top_row_pu[0].s_me_info[i4_ref_list].s_mv.i2_mvy;
549
33.3k
                break;
550
11.7k
            case 2:
551
                /* top right */
552
11.7k
                ps_pred_mv->s_mv.i2_mvx = ps_top_row_pu[1].s_me_info[i4_ref_list].s_mv.i2_mvx;
553
11.7k
                ps_pred_mv->s_mv.i2_mvy = ps_top_row_pu[1].s_me_info[i4_ref_list].s_mv.i2_mvy;
554
11.7k
                break;
555
445k
            case 3:
556
                /* median */
557
445k
                MEDIAN(ps_left_mb_pu->s_me_info[i4_ref_list].s_mv.i2_mvx,
558
445k
                       ps_top_row_pu[0].s_me_info[i4_ref_list].s_mv.i2_mvx,
559
445k
                       ps_top_row_pu[1].s_me_info[i4_ref_list].s_mv.i2_mvx,
560
445k
                       ps_pred_mv->s_mv.i2_mvx);
561
445k
                MEDIAN(ps_left_mb_pu->s_me_info[i4_ref_list].s_mv.i2_mvy,
562
445k
                       ps_top_row_pu[0].s_me_info[i4_ref_list].s_mv.i2_mvy,
563
445k
                       ps_top_row_pu[1].s_me_info[i4_ref_list].s_mv.i2_mvy,
564
445k
                       ps_pred_mv->s_mv.i2_mvy);
565
566
445k
                break;
567
0
            default:
568
0
                break;
569
591k
        }
570
591k
    }
571
591k
}
572
573
/**
574
*******************************************************************************
575
*
576
* @brief This function performs MV prediction
577
*
578
* @par Description:
579
*
580
* @param[in] ps_proc
581
*  Process context corresponding to the job
582
*
583
* @param[in] i4_slice_type
584
*  slice type
585
*
586
* @returns  none
587
*
588
* @remarks none
589
*  This function will update the MB availability since intra inter decision
590
*  should be done before the call
591
*
592
*******************************************************************************
593
*/
594
void ih264e_mv_pred(process_ctxt_t *ps_proc, WORD32 i4_slice_type)
595
202k
{
596
    /* left mb motion vector */
597
202k
    enc_pu_t *ps_left_mb_pu;
598
599
    /* top left mb motion vector */
600
202k
    enc_pu_t *ps_top_left_mb_pu;
601
602
    /* top row motion vector info */
603
202k
    enc_pu_t *ps_top_row_pu;
604
605
    /* predicted motion vector */
606
202k
    enc_pu_mv_t *ps_pred_mv = ps_proc->ps_pred_mv;
607
608
    /* zero mv */
609
202k
    mv_t zero_mv = { 0, 0 };
610
611
    /*  mb neighbor availability */
612
202k
    block_neighbors_t *ps_ngbr_avbl = ps_proc->ps_ngbr_avbl;
613
614
    /* mb syntax elements of neighbors */
615
202k
    mb_info_t *ps_top_syn = ps_proc->ps_top_row_mb_syntax_ele + ps_proc->i4_mb_x;
616
202k
    mb_info_t *ps_top_left_syn;
617
202k
    UWORD32 u4_left_is_intra;
618
619
    /* Temp var */
620
202k
    WORD32 i4_reflist, max_reflist, i4_cmpl_predmode;
621
622
202k
    ps_top_left_syn = &(ps_proc->s_top_left_mb_syntax_ele);
623
202k
    u4_left_is_intra = ps_proc->s_left_mb_syntax_ele.u2_is_intra;
624
202k
    ps_left_mb_pu = &ps_proc->s_left_mb_pu;
625
202k
    ps_top_left_mb_pu = &ps_proc->s_top_left_mb_pu;
626
202k
    ps_top_row_pu = (ps_proc->ps_top_row_pu + ps_proc->i4_mb_x);
627
628
    /* Number of ref lists to process */
629
202k
    max_reflist = (i4_slice_type == PSLICE) ? 1 : 2;
630
631
501k
    for (i4_reflist = 0; i4_reflist < max_reflist; i4_reflist++)
632
298k
    {
633
298k
        i4_cmpl_predmode = (i4_reflist == 0) ? PRED_L1 : PRED_L0;
634
635
        /* Before performing mv prediction prepare the ngbr information and
636
         * reset motion vectors basing on their availability */
637
298k
        if (!ps_ngbr_avbl->u1_mb_a || (u4_left_is_intra == 1)
638
298k
                        || (ps_left_mb_pu->b2_pred_mode == i4_cmpl_predmode))
639
217k
        {
640
            /* left mv */
641
217k
            ps_left_mb_pu->s_me_info[i4_reflist].i1_ref_idx = 0;
642
217k
            ps_left_mb_pu->s_me_info[i4_reflist].s_mv = zero_mv;
643
217k
        }
644
298k
        if (!ps_ngbr_avbl->u1_mb_b || ps_top_syn->u2_is_intra
645
298k
                        || (ps_top_row_pu[0].b2_pred_mode == i4_cmpl_predmode))
646
221k
        {
647
            /* top mv */
648
221k
            ps_top_row_pu[0].s_me_info[i4_reflist].i1_ref_idx = 0;
649
221k
            ps_top_row_pu[0].s_me_info[i4_reflist].s_mv = zero_mv;
650
221k
        }
651
652
298k
        if (!ps_ngbr_avbl->u1_mb_c)
653
185k
        {
654
            /* top right mv - When top right partition is not available for
655
             * prediction if top left is available use it for prediction else
656
             * set the mv information to -1 and (0, 0)
657
             * */
658
185k
            if (!ps_ngbr_avbl->u1_mb_d || ps_top_left_syn->u2_is_intra
659
185k
                            || (ps_top_left_mb_pu->b2_pred_mode == i4_cmpl_predmode))
660
159k
            {
661
159k
                ps_top_row_pu[1].s_me_info[i4_reflist].i1_ref_idx = 0;
662
159k
                ps_top_row_pu[1].s_me_info[i4_reflist].s_mv = zero_mv;
663
159k
            }
664
25.7k
            else
665
25.7k
            {
666
25.7k
                ps_top_row_pu[1].s_me_info[i4_reflist].i1_ref_idx = ps_top_left_mb_pu->s_me_info[i4_reflist].i1_ref_idx;
667
25.7k
                ps_top_row_pu[1].s_me_info[i4_reflist].s_mv = ps_top_left_mb_pu->s_me_info[i4_reflist].s_mv;
668
25.7k
            }
669
185k
        }
670
113k
        else if(ps_top_syn[1].u2_is_intra
671
113k
                        || (ps_top_row_pu[1].b2_pred_mode == i4_cmpl_predmode))
672
68.9k
        {
673
68.9k
            ps_top_row_pu[1].s_me_info[i4_reflist].i1_ref_idx = 0;
674
68.9k
            ps_top_row_pu[1].s_me_info[i4_reflist].s_mv = zero_mv;
675
68.9k
        }
676
677
298k
        ih264e_get_mv_predictor(ps_left_mb_pu, ps_top_row_pu, &ps_pred_mv[i4_reflist], i4_reflist);
678
298k
    }
679
202k
}
680
681
/**
682
*******************************************************************************
683
*
684
* @brief This function approximates Pred. MV
685
*
686
* @par Description:
687
*
688
* @param[in] ps_proc
689
*  Process context corresponding to the job
690
*
691
* @param[in] i4_ref_list
692
*  Current active reference list
693
*
694
* @returns  none
695
*
696
* @remarks none
697
*  Motion estimation happens at nmb level. For cost calculations, mv is appro
698
*  ximated using this function
699
*
700
*******************************************************************************
701
*/
702
void ih264e_mv_pred_me(process_ctxt_t *ps_proc, WORD32 i4_ref_list)
703
293k
{
704
    /* left mb motion vector */
705
293k
    enc_pu_t *ps_left_mb_pu ;
706
707
    /* top left mb motion vector */
708
293k
    enc_pu_t *ps_top_left_mb_pu ;
709
710
    /* top row motion vector info */
711
293k
    enc_pu_t *ps_top_row_pu;
712
713
293k
    enc_pu_t s_top_row_pu[2];
714
715
    /* predicted motion vector */
716
293k
    enc_pu_mv_t *ps_pred_mv = ps_proc->ps_pred_mv;
717
718
    /* zero mv */
719
293k
    mv_t zero_mv = {0, 0};
720
721
    /* Complementary pred mode */
722
293k
    WORD32 i4_cmpl_predmode = (i4_ref_list == 0) ? PRED_L1 : PRED_L0;
723
724
    /*  mb neighbor availability */
725
293k
    block_neighbors_t *ps_ngbr_avbl = ps_proc->ps_ngbr_avbl;
726
727
293k
    ps_left_mb_pu = &ps_proc->s_left_mb_pu_ME;
728
293k
    ps_top_left_mb_pu = &ps_proc->s_top_left_mb_pu_ME;
729
293k
    ps_top_row_pu = (ps_proc->ps_top_row_pu_ME + ps_proc->i4_mb_x);
730
731
293k
    s_top_row_pu[0] = ps_top_row_pu[0];
732
293k
    s_top_row_pu[1] = ps_top_row_pu[1];
733
734
    /*
735
     * Before performing mv prediction prepare the ngbr information and
736
     * reset motion vectors basing on their availability
737
     */
738
293k
    if (!ps_ngbr_avbl->u1_mb_a || (ps_left_mb_pu->b2_pred_mode == i4_cmpl_predmode))
739
158k
    {
740
        /* left mv */
741
158k
        ps_left_mb_pu->s_me_info[i4_ref_list].i1_ref_idx = 0;
742
158k
        ps_left_mb_pu->s_me_info[i4_ref_list].s_mv = zero_mv;
743
158k
    }
744
293k
    if (!ps_ngbr_avbl->u1_mb_b || (s_top_row_pu[0].b2_pred_mode == i4_cmpl_predmode))
745
160k
    {
746
        /* top mv */
747
160k
        s_top_row_pu[0].s_me_info[i4_ref_list].i1_ref_idx = 0;
748
160k
        s_top_row_pu[0].s_me_info[i4_ref_list].s_mv = zero_mv;
749
750
160k
    }
751
293k
    if (!ps_ngbr_avbl->u1_mb_c)
752
181k
    {
753
        /* top right mv - When top right partition is not available for
754
         * prediction if top left is available use it for prediction else
755
         * set the mv information to -1 and (0, 0)
756
         * */
757
181k
        if (!ps_ngbr_avbl->u1_mb_d || (ps_top_left_mb_pu->b2_pred_mode == i4_cmpl_predmode))
758
144k
        {
759
144k
            s_top_row_pu[1].s_me_info[i4_ref_list].i1_ref_idx = 0;
760
144k
            s_top_row_pu[1].s_me_info[i4_ref_list].s_mv = zero_mv;
761
762
144k
            s_top_row_pu[1].s_me_info[i4_ref_list].i1_ref_idx = 0;
763
144k
            s_top_row_pu[1].s_me_info[i4_ref_list].s_mv = zero_mv;
764
144k
        }
765
37.5k
        else
766
37.5k
        {
767
37.5k
            s_top_row_pu[1].s_me_info[i4_ref_list].i1_ref_idx = ps_top_left_mb_pu->s_me_info[0].i1_ref_idx;
768
37.5k
            s_top_row_pu[1].s_me_info[i4_ref_list].s_mv = ps_top_left_mb_pu->s_me_info[0].s_mv;
769
37.5k
        }
770
181k
    }
771
111k
    else if (ps_top_row_pu[1].b2_pred_mode == i4_cmpl_predmode)
772
27.2k
    {
773
27.2k
        ps_top_row_pu[1].s_me_info[i4_ref_list].i1_ref_idx = 0;
774
27.2k
        ps_top_row_pu[1].s_me_info[i4_ref_list].s_mv = zero_mv;
775
27.2k
    }
776
777
293k
    ih264e_get_mv_predictor(ps_left_mb_pu, &(s_top_row_pu[0]),
778
293k
                            &ps_pred_mv[i4_ref_list], i4_ref_list);
779
293k
}
780
781
/**
782
*******************************************************************************
783
*
784
* @brief This function initializes me ctxt
785
*
786
* @par Description:
787
*  Before dispatching the current job to me thread, the me context associated
788
*  with the job is initialized.
789
*
790
* @param[in] ps_proc
791
*  Process context corresponding to the job
792
*
793
* @returns  none
794
*
795
* @remarks none
796
*
797
*******************************************************************************
798
*/
799
void ih264e_init_me(process_ctxt_t *ps_proc)
800
203k
{
801
    /* me ctxt */
802
203k
    me_ctxt_t *ps_me_ctxt = &ps_proc->s_me_ctxt;
803
804
    /* codec context */
805
203k
    codec_t *ps_codec = ps_proc->ps_codec;
806
807
203k
    ps_me_ctxt->i4_skip_bias[BSLICE] = SKIP_BIAS_B;
808
809
203k
    if (ps_codec->s_cfg.u4_num_bframes == 0)
810
65.7k
    {
811
65.7k
       ps_me_ctxt->i4_skip_bias[PSLICE] = 4 * SKIP_BIAS_P;
812
65.7k
    }
813
137k
    else
814
137k
    {
815
137k
       ps_me_ctxt->i4_skip_bias[PSLICE] =  SKIP_BIAS_P;
816
137k
    }
817
818
    /* src ptr */
819
203k
    ps_me_ctxt->pu1_src_buf_luma = ps_proc->pu1_src_buf_luma;
820
821
    /* src stride */
822
203k
    ps_me_ctxt->i4_src_strd = ps_proc->i4_src_strd;
823
824
    /* ref ptrs and corresponding lagrange params */
825
203k
    ps_me_ctxt->apu1_ref_buf_luma[0] = ps_proc->apu1_ref_buf_luma[0];
826
203k
    ps_me_ctxt->apu1_ref_buf_luma[1] = ps_proc->apu1_ref_buf_luma[1];
827
828
203k
    if (ps_codec->pic_type == PIC_B)
829
96.0k
    {
830
96.0k
        ps_me_ctxt->u4_lambda_motion = gu1_qp_lambdaB[ps_me_ctxt->u1_mb_qp];
831
96.0k
    }
832
107k
    else
833
107k
    {
834
107k
        ps_me_ctxt->u4_lambda_motion = gu1_qp_lambdaIP[ps_me_ctxt->u1_mb_qp];
835
107k
    }
836
203k
}
837
838
839
/**
840
*******************************************************************************
841
*
842
* @brief This function performs motion estimation for the current mb using
843
*   single reference list
844
*
845
* @par Description:
846
*  The current mb is compared with a list of mb's in the reference frame for
847
*  least cost. The mb that offers least cost is chosen as predicted mb and the
848
*  displacement of the predicted mb from index location of the current mb is
849
*  signaled as mv. The list of the mb's that are chosen in the reference frame
850
*  are dependent on the speed of the ME configured.
851
*
852
* @param[in] ps_proc
853
*  Process context corresponding to the job
854
*
855
* @returns  motion vector of the pred mb, sad, cost.
856
*
857
* @remarks none
858
*
859
*******************************************************************************
860
*/
861
void ih264e_compute_me_single_reflist(process_ctxt_t *ps_proc)
862
107k
{
863
    /* me ctxt */
864
107k
    me_ctxt_t *ps_me_ctxt = &ps_proc->s_me_ctxt;
865
866
    /* codec context */
867
107k
    codec_t *ps_codec = ps_proc->ps_codec;
868
869
    /* recon stride */
870
107k
    WORD32 i4_rec_strd = ps_proc->i4_rec_strd;
871
872
    /* source buffer for halp pel generation functions */
873
107k
    UWORD8 *pu1_hpel_src;
874
875
    /* quantization parameters */
876
107k
    quant_params_t *ps_qp_params = ps_proc->ps_qp_params[0];
877
878
    /* Mb part ctxts for SKIP */
879
107k
    mb_part_ctxt s_skip_mbpart;
880
881
    /* Sad therholds */
882
107k
    ps_me_ctxt->pu2_sad_thrsh = ps_qp_params->pu2_sad_thrsh;
883
884
107k
    {
885
107k
        WORD32 rows_above, rows_below, columns_left, columns_right;
886
887
        /* During evaluation for motion vectors do not search through padded regions */
888
        /* Obtain number of rows and columns that are effective for computing for me evaluation */
889
107k
        rows_above = MB_SIZE + ps_proc->i4_mb_y * MB_SIZE;
890
107k
        rows_below = (ps_proc->i4_ht_mbs - ps_proc->i4_mb_y) * MB_SIZE;
891
107k
        columns_left = MB_SIZE + ps_proc->i4_mb_x * MB_SIZE;
892
107k
        columns_right = (ps_proc->i4_wd_mbs - ps_proc->i4_mb_x) * MB_SIZE;
893
894
        /* init srch range */
895
        /* NOTE : For now, lets limit the search range by DEFAULT_MAX_SRCH_RANGE_X / 2
896
         * on all sides.
897
         */
898
107k
        ps_me_ctxt->i4_srch_range_w = -MIN(columns_left, DEFAULT_MAX_SRCH_RANGE_X >> 1);
899
107k
        ps_me_ctxt->i4_srch_range_e = MIN(columns_right, DEFAULT_MAX_SRCH_RANGE_X >> 1);
900
107k
        ps_me_ctxt->i4_srch_range_n = -MIN(rows_above, DEFAULT_MAX_SRCH_RANGE_Y >> 1);
901
107k
        ps_me_ctxt->i4_srch_range_s = MIN(rows_below, DEFAULT_MAX_SRCH_RANGE_Y >> 1);
902
903
        /* this is to facilitate fast sub pel computation with minimal loads */
904
107k
        ps_me_ctxt->i4_srch_range_w += 1;
905
107k
        ps_me_ctxt->i4_srch_range_e -= 1;
906
107k
        ps_me_ctxt->i4_srch_range_n += 1;
907
107k
        ps_me_ctxt->i4_srch_range_s -= 1;
908
107k
    }
909
910
    /* Compute ME and store the MVs */
911
912
    /***********************************************************************
913
     * Compute ME for list L0
914
     ***********************************************************************/
915
916
    /* Init SATQD for the current list */
917
107k
    ps_me_ctxt->u4_min_sad_reached  = 0;
918
107k
    ps_me_ctxt->i4_min_sad = ps_proc->ps_cur_mb->u4_min_sad;
919
920
    /* Get the seed motion vector candidates                    */
921
107k
    ih264e_get_search_candidates(ps_proc, ps_me_ctxt, PRED_L0);
922
923
    /*****************************************************************
924
     * Evaluate the SKIP for current list
925
     *****************************************************************/
926
107k
    s_skip_mbpart.s_mv_curr.i2_mvx = 0;
927
107k
    s_skip_mbpart.s_mv_curr.i2_mvy = 0;
928
107k
    s_skip_mbpart.i4_mb_cost = INT_MAX;
929
107k
    s_skip_mbpart.i4_mb_distortion = INT_MAX;
930
931
107k
    ime_compute_skip_cost( ps_me_ctxt,
932
107k
                           (ime_mv_t *)(&ps_proc->ps_skip_mv[PRED_L0].s_mv),
933
107k
                           &s_skip_mbpart,
934
107k
                           ps_proc->ps_codec->s_cfg.u4_enable_satqd,
935
107k
                           PRED_L0,
936
107k
                           0 /* Not a Bslice */ );
937
938
107k
    s_skip_mbpart.s_mv_curr.i2_mvx <<= 2;
939
107k
    s_skip_mbpart.s_mv_curr.i2_mvy <<= 2;
940
941
    /******************************************************************
942
     * Evaluate ME For current list
943
     *****************************************************************/
944
107k
    ps_me_ctxt->as_mb_part[PRED_L0].s_mv_curr.i2_mvx = 0;
945
107k
    ps_me_ctxt->as_mb_part[PRED_L0].s_mv_curr.i2_mvy = 0;
946
107k
    ps_me_ctxt->as_mb_part[PRED_L0].i4_mb_cost = INT_MAX;
947
107k
    ps_me_ctxt->as_mb_part[PRED_L0].i4_mb_distortion = INT_MAX;
948
949
    /* Init Hpel */
950
107k
    ps_me_ctxt->as_mb_part[PRED_L0].pu1_best_hpel_buf = NULL;
951
952
    /* In case we found out the minimum SAD, exit the ME eval */
953
107k
    if (!ps_me_ctxt->u4_min_sad_reached)
954
92.8k
    {
955
        /* Evaluate search candidates for initial mv pt */
956
92.8k
        ime_evaluate_init_srchposn_16x16(ps_me_ctxt, PRED_L0);
957
958
        /********************************************************************/
959
        /*                  full pel motion estimation                      */
960
        /********************************************************************/
961
92.8k
        ime_full_pel_motion_estimation_16x16(ps_me_ctxt, PRED_L0);
962
963
        /* Scale the MV to qpel resolution */
964
92.8k
        ps_me_ctxt->as_mb_part[PRED_L0].s_mv_curr.i2_mvx <<= 2;
965
92.8k
        ps_me_ctxt->as_mb_part[PRED_L0].s_mv_curr.i2_mvy <<= 2;
966
967
92.8k
        if (ps_me_ctxt->u4_enable_hpel)
968
55.4k
        {
969
            /* moving src pointer to the converged motion vector location*/
970
55.4k
            pu1_hpel_src = ps_me_ctxt->apu1_ref_buf_luma[PRED_L0]
971
55.4k
                           + (ps_me_ctxt->as_mb_part[PRED_L0].s_mv_curr.i2_mvx >> 2)
972
55.4k
                           + (ps_me_ctxt->as_mb_part[PRED_L0].s_mv_curr.i2_mvy >> 2) * i4_rec_strd;
973
974
55.4k
            ps_me_ctxt->apu1_subpel_buffs[0] = ps_proc->apu1_subpel_buffs[0];
975
55.4k
            ps_me_ctxt->apu1_subpel_buffs[1] = ps_proc->apu1_subpel_buffs[1];
976
55.4k
            ps_me_ctxt->apu1_subpel_buffs[2] = ps_proc->apu1_subpel_buffs[2];
977
978
55.4k
            ps_me_ctxt->u4_subpel_buf_strd = HP_BUFF_WD;
979
980
            /* half  pel search is done for both sides of full pel,
981
             * hence half_x of width x height = 17x16 is created
982
             * starting from left half_x of converged full pel */
983
55.4k
            pu1_hpel_src -= 1;
984
985
            /* computing half_x */
986
55.4k
            ps_codec->pf_ih264e_sixtapfilter_horz(pu1_hpel_src,
987
55.4k
                                                  ps_me_ctxt->apu1_subpel_buffs[0],
988
55.4k
                                                  i4_rec_strd,
989
55.4k
                                                  ps_me_ctxt->u4_subpel_buf_strd);
990
991
            /*
992
             * Halfpel search is done for both sides of full pel,
993
             * hence half_y of width x height = 16x17 is created
994
             * starting from top half_y of converged full pel
995
             * for half_xy top_left is required
996
             * hence it starts from pu1_hpel_src = full_pel_converged_point - i4_rec_strd - 1
997
             */
998
55.4k
            pu1_hpel_src -= i4_rec_strd;
999
1000
            /* computing half_y , and half_xy*/
1001
55.4k
            ps_codec->pf_ih264e_sixtap_filter_2dvh_vert(
1002
55.4k
                            pu1_hpel_src, ps_me_ctxt->apu1_subpel_buffs[1],
1003
55.4k
                            ps_me_ctxt->apu1_subpel_buffs[2], i4_rec_strd,
1004
55.4k
                            ps_me_ctxt->u4_subpel_buf_strd, ps_proc->ai16_pred1 + 3,
1005
55.4k
                            ps_me_ctxt->u4_subpel_buf_strd);
1006
1007
55.4k
            ime_sub_pel_motion_estimation_16x16(ps_me_ctxt, PRED_L0);
1008
55.4k
        }
1009
92.8k
    }
1010
1011
1012
    /***********************************************************************
1013
     * If a particular skiip Mv is giving better sad, copy to the corresponding
1014
     * MBPART
1015
     * In B slices this loop should go only to PREDL1: If we found min sad
1016
     * we will go to the skip ref list only
1017
     * Have to find a way to make it without too much change or new vars
1018
     **********************************************************************/
1019
107k
    if (s_skip_mbpart.i4_mb_cost < ps_me_ctxt->as_mb_part[PRED_L0].i4_mb_cost)
1020
28.7k
    {
1021
28.7k
        ps_me_ctxt->as_mb_part[PRED_L0].i4_mb_cost = s_skip_mbpart.i4_mb_cost;
1022
28.7k
        ps_me_ctxt->as_mb_part[PRED_L0].i4_mb_distortion = s_skip_mbpart.i4_mb_distortion;
1023
28.7k
        ps_me_ctxt->as_mb_part[PRED_L0].s_mv_curr = s_skip_mbpart.s_mv_curr;
1024
28.7k
    }
1025
78.2k
    else if (ps_me_ctxt->as_mb_part[PRED_L0].pu1_best_hpel_buf)
1026
22.0k
    {
1027
        /* Now we have to copy the buffers */
1028
22.0k
        ps_codec->pf_inter_pred_luma_copy(
1029
22.0k
                        ps_me_ctxt->as_mb_part[PRED_L0].pu1_best_hpel_buf,
1030
22.0k
                        ps_proc->pu1_best_subpel_buf,
1031
22.0k
                        ps_me_ctxt->u4_subpel_buf_strd,
1032
22.0k
                        ps_proc->u4_bst_spel_buf_strd, MB_SIZE, MB_SIZE,
1033
22.0k
                        NULL, 0);
1034
22.0k
    }
1035
1036
    /**********************************************************************
1037
     * Now get the minimum of MB part sads by searching over all ref lists
1038
     **********************************************************************/
1039
107k
    ps_proc->ps_pu->s_me_info[PRED_L0].s_mv.i2_mvx = ps_me_ctxt->as_mb_part[PRED_L0].s_mv_curr.i2_mvx;
1040
107k
    ps_proc->ps_pu->s_me_info[PRED_L0].s_mv.i2_mvy = ps_me_ctxt->as_mb_part[PRED_L0].s_mv_curr.i2_mvy;
1041
107k
    ps_proc->ps_cur_mb->i4_mb_cost = ps_me_ctxt->as_mb_part[PRED_L0].i4_mb_cost;
1042
107k
    ps_proc->ps_cur_mb->i4_mb_distortion = ps_me_ctxt->as_mb_part[PRED_L0].i4_mb_distortion;
1043
107k
    ps_proc->ps_cur_mb->u4_mb_type = P16x16;
1044
107k
    ps_proc->ps_pu->b2_pred_mode = PRED_L0 ;
1045
1046
    /* Mark the reflists */
1047
107k
    ps_proc->ps_pu->s_me_info[0].i1_ref_idx = -1;
1048
107k
    ps_proc->ps_pu->s_me_info[1].i1_ref_idx =  0;
1049
1050
    /* number of partitions */
1051
107k
    ps_proc->u4_num_sub_partitions = 1;
1052
107k
    *(ps_proc->pu4_mb_pu_cnt) = 1;
1053
1054
    /* position in-terms of PU */
1055
107k
    ps_proc->ps_pu->b4_pos_x = 0;
1056
107k
    ps_proc->ps_pu->b4_pos_y = 0;
1057
1058
    /* PU size */
1059
107k
    ps_proc->ps_pu->b4_wd = 3;
1060
107k
    ps_proc->ps_pu->b4_ht = 3;
1061
1062
    /* Update min sad conditions */
1063
107k
    if (ps_me_ctxt->u4_min_sad_reached == 1)
1064
14.2k
    {
1065
14.2k
        ps_proc->ps_cur_mb->u4_min_sad_reached = 1;
1066
14.2k
        ps_proc->ps_cur_mb->u4_min_sad = ps_me_ctxt->i4_min_sad;
1067
14.2k
    }
1068
107k
}
1069
1070
/**
1071
*******************************************************************************
1072
*
1073
* @brief This function performs motion estimation for the current NMB
1074
*
1075
* @par Description:
1076
*  Intializes input and output pointers required by the function ih264e_compute_me
1077
*  and calls the function ih264e_compute_me in a loop to process NMBs.
1078
*
1079
* @param[in] ps_proc
1080
*  Process context corresponding to the job
1081
*
1082
* @param[in] u4_nmb_count
1083
*  Number of mb's to process
1084
*
1085
* @returns
1086
*
1087
* @remarks none
1088
*
1089
*******************************************************************************
1090
*/
1091
void ih264e_compute_me_nmb(process_ctxt_t *ps_proc, UWORD32 u4_nmb_count)
1092
76.5k
{
1093
    /* pic pu */
1094
76.5k
    enc_pu_t *ps_pu_begin = ps_proc->ps_pu;
1095
1096
    /* ME map */
1097
76.5k
    UWORD8 *pu1_me_map = ps_proc->pu1_me_map + (ps_proc->i4_mb_y * ps_proc->i4_wd_mbs);
1098
1099
    /* temp var */
1100
76.5k
    UWORD32 u4_i;
1101
1102
76.5k
    ps_proc->s_me_ctxt.u4_left_is_intra = ps_proc->s_left_mb_syntax_ele.u2_is_intra;
1103
76.5k
    ps_proc->s_me_ctxt.u4_left_is_skip = (ps_proc->s_left_mb_syntax_ele.u2_mb_type == PSKIP);
1104
1105
279k
    for (u4_i = 0; u4_i < u4_nmb_count; u4_i++)
1106
202k
    {
1107
        /* Wait for ME map */
1108
202k
        if (ps_proc->i4_mb_y > 0)
1109
123k
        {
1110
            /* Wait for top right ME to be done */
1111
123k
            UWORD8 *pu1_me_map_tp_rw = ps_proc->pu1_me_map + (ps_proc->i4_mb_y - 1) * ps_proc->i4_wd_mbs;
1112
1113
220k
            while (1)
1114
220k
            {
1115
220k
                volatile UWORD8 *pu1_buf;
1116
220k
                WORD32 idx = ps_proc->i4_mb_x + u4_i + 1;
1117
1118
220k
                idx = MIN(idx, (ps_proc->i4_wd_mbs - 1));
1119
220k
                pu1_buf =  pu1_me_map_tp_rw + idx;
1120
220k
                if(*pu1_buf)
1121
123k
                    break;
1122
97.1k
                ithread_yield();
1123
97.1k
            }
1124
123k
        }
1125
1126
202k
        ps_proc->ps_skip_mv = &(ps_proc->ps_nmb_info[u4_i].as_skip_mv[0]);
1127
202k
        ps_proc->ps_ngbr_avbl = &(ps_proc->ps_nmb_info[u4_i].s_ngbr_avbl);
1128
202k
        ps_proc->ps_pred_mv = &(ps_proc->ps_nmb_info[u4_i].as_pred_mv[0]);
1129
1130
202k
        ps_proc->ps_cur_mb = &(ps_proc->ps_nmb_info[u4_i]);
1131
1132
202k
        ps_proc->ps_cur_mb->u4_min_sad = ps_proc->u4_min_sad;
1133
202k
        ps_proc->ps_cur_mb->u4_min_sad_reached = 0;
1134
1135
202k
        ps_proc->ps_cur_mb->i4_mb_cost = INT_MAX;
1136
202k
        ps_proc->ps_cur_mb->i4_mb_distortion = SHRT_MAX;
1137
1138
        /* Set the best subpel buf to the correct mb so that the buffer can be copied */
1139
202k
        ps_proc->pu1_best_subpel_buf = ps_proc->ps_nmb_info[u4_i].pu1_best_sub_pel_buf;
1140
202k
        ps_proc->u4_bst_spel_buf_strd = ps_proc->ps_nmb_info[u4_i].u4_bst_spel_buf_strd;
1141
1142
        /* Set the min sad conditions */
1143
202k
        ps_proc->ps_cur_mb->u4_min_sad = ps_proc->ps_codec->u4_min_sad;
1144
202k
        ps_proc->ps_cur_mb->u4_min_sad_reached = 0;
1145
1146
        /* Derive neighbor availability for the current macroblock */
1147
202k
        ih264e_derive_nghbr_avbl_of_mbs(ps_proc);
1148
1149
        /* init me */
1150
202k
        ih264e_init_me(ps_proc);
1151
1152
        /* Compute ME according to slice type */
1153
202k
        ps_proc->ps_codec->apf_compute_me[ps_proc->i4_slice_type](ps_proc);
1154
1155
        /* update top and left structs */
1156
202k
        {
1157
202k
            mb_info_t *ps_top_syn = ps_proc->ps_top_row_mb_syntax_ele + ps_proc->i4_mb_x;
1158
202k
            mb_info_t *ps_top_left_syn = &(ps_proc->s_top_left_mb_syntax_ME);
1159
202k
            enc_pu_t *ps_left_mb_pu = &ps_proc->s_left_mb_pu_ME;
1160
202k
            enc_pu_t *ps_top_left_mb_pu = &ps_proc->s_top_left_mb_pu_ME;
1161
202k
            enc_pu_t *ps_top_mv = ps_proc->ps_top_row_pu_ME + ps_proc->i4_mb_x;
1162
1163
202k
            *ps_top_left_syn = *ps_top_syn;
1164
1165
202k
            *ps_top_left_mb_pu = *ps_top_mv;
1166
202k
            *ps_left_mb_pu = *ps_proc->ps_pu;
1167
202k
        }
1168
1169
202k
        ps_proc->ps_pu += *ps_proc->pu4_mb_pu_cnt;
1170
1171
        /* Copy the min sad reached info */
1172
202k
        ps_proc->ps_nmb_info[u4_i].u4_min_sad_reached = ps_proc->ps_cur_mb->u4_min_sad_reached;
1173
202k
        ps_proc->ps_nmb_info[u4_i].u4_min_sad   = ps_proc->ps_cur_mb->u4_min_sad;
1174
1175
        /*
1176
         * To make sure that the MV map is properly sync to the
1177
         * cache we need to do a DDB
1178
         */
1179
202k
        {
1180
202k
            DATA_SYNC();
1181
1182
202k
            pu1_me_map[ps_proc->i4_mb_x] = 1;
1183
202k
        }
1184
202k
        ps_proc->i4_mb_x++;
1185
1186
202k
        ps_proc->s_me_ctxt.u4_left_is_intra = 0;
1187
202k
        ps_proc->s_me_ctxt.u4_left_is_skip = (ps_proc->ps_cur_mb->u4_mb_type  == PSKIP);
1188
1189
        /* update buffers pointers */
1190
202k
        ps_proc->pu1_src_buf_luma += MB_SIZE;
1191
202k
        ps_proc->pu1_rec_buf_luma += MB_SIZE;
1192
202k
        ps_proc->apu1_ref_buf_luma[0] += MB_SIZE;
1193
202k
        ps_proc->apu1_ref_buf_luma[1] += MB_SIZE;
1194
1195
        /*
1196
         * Note: Although chroma mb size is 8, as the chroma buffers are interleaved,
1197
         * the stride per MB is MB_SIZE
1198
         */
1199
202k
        ps_proc->pu1_src_buf_chroma += MB_SIZE;
1200
202k
        ps_proc->pu1_rec_buf_chroma += MB_SIZE;
1201
202k
        ps_proc->apu1_ref_buf_chroma[0] += MB_SIZE;
1202
202k
        ps_proc->apu1_ref_buf_chroma[1] += MB_SIZE;
1203
1204
1205
202k
        ps_proc->pu4_mb_pu_cnt += 1;
1206
202k
    }
1207
1208
76.5k
    ps_proc->ps_pu = ps_pu_begin;
1209
76.5k
    ps_proc->i4_mb_x = ps_proc->i4_mb_x - u4_nmb_count;
1210
1211
    /* update buffers pointers */
1212
76.5k
    ps_proc->pu1_src_buf_luma -= MB_SIZE * u4_nmb_count;
1213
76.5k
    ps_proc->pu1_rec_buf_luma -= MB_SIZE * u4_nmb_count;
1214
76.5k
    ps_proc->apu1_ref_buf_luma[0] -= MB_SIZE * u4_nmb_count;
1215
76.5k
    ps_proc->apu1_ref_buf_luma[1] -= MB_SIZE * u4_nmb_count;
1216
1217
    /*
1218
     * Note: Although chroma mb size is 8, as the chroma buffers are interleaved,
1219
     * the stride per MB is MB_SIZE
1220
     */
1221
76.5k
    ps_proc->pu1_src_buf_chroma -= MB_SIZE * u4_nmb_count;
1222
76.5k
    ps_proc->pu1_rec_buf_chroma -= MB_SIZE * u4_nmb_count;
1223
76.5k
    ps_proc->apu1_ref_buf_chroma[0] -= MB_SIZE * u4_nmb_count;
1224
76.5k
    ps_proc->apu1_ref_buf_chroma[1] -= MB_SIZE * u4_nmb_count;
1225
1226
76.5k
    ps_proc->pu4_mb_pu_cnt -= u4_nmb_count;
1227
76.5k
}
1228
1229
1230
/**
1231
*******************************************************************************
1232
*
1233
* @brief The function computes parameters for a BSKIP MB
1234
*
1235
* @par Description:
1236
*  The function updates the skip motion vector for B Mb, check if the Mb can be
1237
*  marked as skip and returns it
1238
*
1239
* @param[in] ps_proc
1240
*  Pointer to process context
1241
*
1242
* @param[in] i4_reflist
1243
*  Current active reference list
1244
*
1245
* @returns Flag indicating if the current Mb can be skip or not
1246
*
1247
* @remarks
1248
*   The code implements the logic as described in sec 8.4.1.2.2
1249
*   It also computes co-located MB parmas according to sec 8.4.1.2.1
1250
*
1251
*   Need to add condition for this fucntion to be used in ME
1252
*
1253
*******************************************************************************
1254
*/
1255
WORD32 ih264e_find_bskip_params_me(process_ctxt_t *ps_proc, WORD32 i4_reflist)
1256
186k
{
1257
    /* Colzero for co-located MB */
1258
186k
    WORD32 i4_colzeroflag;
1259
1260
    /* motion vectors for neighbouring MBs */
1261
186k
    enc_pu_t *ps_a_pu, *ps_c_pu, *ps_b_pu;
1262
1263
    /* Variables to check if a particular mB is available */
1264
186k
    WORD32 i4_a, i4_b, i4_c, i4_c_avail;
1265
1266
    /* Mode availability, init to no modes available     */
1267
186k
    WORD32 i4_mode_avail;
1268
1269
    /*  mb neighbor availability */
1270
186k
    block_neighbors_t *ps_ngbr_avbl = ps_proc->ps_ngbr_avbl;
1271
1272
    /* Temp var */
1273
186k
    WORD32 i, i4_cmpl_mode, i4_skip_type = -1;
1274
1275
    /*
1276
     * Colocated motion vector
1277
     */
1278
186k
    mv_t s_mvcol;
1279
1280
    /*
1281
     * Colocated picture idx
1282
     */
1283
186k
    WORD32 i4_refidxcol;
1284
1285
186k
    UNUSED(i4_reflist);
1286
1287
    /**************************************************************************
1288
     *Find co-located MB parameters
1289
     *      See sec 8.4.1.2.1  for reference
1290
     **************************************************************************/
1291
186k
    {
1292
        /*
1293
         * Find the co-located Mb and update the skip and pred appropriately
1294
         * 1) Default colpic is forward ref : Table 8-6
1295
         * 2) Default mb col is current MB : Table 8-8
1296
         */
1297
1298
186k
        if (ps_proc->ps_colpu->b1_intra_flag)
1299
71.4k
        {
1300
71.4k
            s_mvcol.i2_mvx = 0;
1301
71.4k
            s_mvcol.i2_mvy = 0;
1302
71.4k
            i4_refidxcol = -1;
1303
71.4k
        }
1304
115k
        else
1305
115k
        {
1306
115k
            if (ps_proc->ps_colpu->b2_pred_mode != PRED_L1)
1307
115k
            {
1308
115k
                s_mvcol = ps_proc->ps_colpu->s_me_info[PRED_L0].s_mv;
1309
115k
                i4_refidxcol = 0;
1310
115k
            }
1311
18.4E
            else // if(ps_proc->ps_colpu->b2_pred_mode != PRED_L0)
1312
18.4E
            {
1313
18.4E
                s_mvcol = ps_proc->ps_colpu->s_me_info[PRED_L1].s_mv;
1314
18.4E
                i4_refidxcol = 0;
1315
18.4E
            }
1316
115k
        }
1317
1318
        /* RefPicList1[ 0 ]  is marked as  "used for short-term reference", as default */
1319
186k
        i4_colzeroflag = (!i4_refidxcol && (ABS(s_mvcol.i2_mvx) <= 1)
1320
186k
                        && (ABS(s_mvcol.i2_mvy) <= 1));
1321
1322
186k
    }
1323
1324
    /***************************************************************************
1325
     * Evaluating skip params : Spatial Skip
1326
     **************************************************************************/
1327
186k
    {
1328
    /* Get the neighbouring MBS according to Section 8.4.1.2.2 */
1329
186k
    ps_a_pu = &ps_proc->s_left_mb_pu_ME;
1330
186k
    ps_b_pu = (ps_proc->ps_top_row_pu_ME + ps_proc->i4_mb_x);
1331
1332
186k
    i4_c_avail = 0;
1333
186k
    if (ps_ngbr_avbl->u1_mb_c)
1334
68.0k
    {
1335
68.0k
        ps_c_pu = &((ps_proc->ps_top_row_pu_ME + ps_proc->i4_mb_x)[1]);
1336
68.0k
        i4_c_avail = 1;
1337
68.0k
    }
1338
118k
    else
1339
118k
    {
1340
118k
        ps_c_pu = &ps_proc->s_top_left_mb_pu_ME;
1341
118k
        i4_c_avail = ps_ngbr_avbl->u1_mb_d;
1342
118k
    }
1343
1344
186k
    i4_a = ps_ngbr_avbl->u1_mb_a;
1345
186k
    i4_b = ps_ngbr_avbl->u1_mb_b;
1346
186k
    i4_c = i4_c_avail;
1347
1348
    /* Init to no mode avail */
1349
186k
    i4_mode_avail = 0;
1350
558k
    for (i = 0; i < 2; i++)
1351
371k
    {
1352
371k
        i4_cmpl_mode = (i == 0) ? PRED_L1 : PRED_L0;
1353
1354
371k
        i4_mode_avail |= (i4_a && (ps_a_pu->b2_pred_mode != i4_cmpl_mode) && (ps_a_pu->s_me_info[i].i1_ref_idx != 0))<<i;
1355
371k
        i4_mode_avail |= (i4_b && (ps_b_pu->b2_pred_mode != i4_cmpl_mode) && (ps_b_pu->s_me_info[i].i1_ref_idx != 0))<<i;
1356
371k
        i4_mode_avail |= (i4_c && (ps_c_pu->b2_pred_mode != i4_cmpl_mode) && (ps_c_pu->s_me_info[i].i1_ref_idx != 0))<<i;
1357
371k
    }
1358
1359
186k
    if (i4_mode_avail == 0x3 || i4_mode_avail == 0x0)
1360
87.1k
    {
1361
87.1k
        i4_skip_type= PRED_BI;
1362
87.1k
    }
1363
99.3k
    else if(i4_mode_avail == 0x1)
1364
52.3k
    {
1365
52.3k
        i4_skip_type = PRED_L0;
1366
52.3k
    }
1367
47.0k
    else if(i4_mode_avail == 0x2)
1368
46.5k
    {
1369
46.5k
        i4_skip_type = PRED_L1;
1370
46.5k
    }
1371
1372
    /* Update skip MV for L0 */
1373
186k
    if ((i4_mode_avail & 0x1) && (!i4_colzeroflag))
1374
69.3k
    {
1375
69.3k
        ps_proc->ps_skip_mv[0].s_mv.i2_mvx = ps_proc->ps_pred_mv[0].s_mv.i2_mvx;
1376
69.3k
        ps_proc->ps_skip_mv[0].s_mv.i2_mvy = ps_proc->ps_pred_mv[0].s_mv.i2_mvy;
1377
69.3k
    }
1378
117k
    else
1379
117k
    {
1380
117k
        ps_proc->ps_skip_mv[0].s_mv.i2_mvx = 0;
1381
117k
        ps_proc->ps_skip_mv[0].s_mv.i2_mvy = 0;
1382
117k
    }
1383
1384
    /* Update skip MV for L1 */
1385
186k
    if ((i4_mode_avail & 0x2) && (!i4_colzeroflag))
1386
72.9k
    {
1387
72.9k
        ps_proc->ps_skip_mv[1].s_mv.i2_mvx = ps_proc->ps_pred_mv[1].s_mv.i2_mvx;
1388
72.9k
        ps_proc->ps_skip_mv[1].s_mv.i2_mvy = ps_proc->ps_pred_mv[1].s_mv.i2_mvy;
1389
72.9k
    }
1390
113k
    else
1391
113k
    {
1392
113k
        ps_proc->ps_skip_mv[1].s_mv.i2_mvx = 0;
1393
113k
        ps_proc->ps_skip_mv[1].s_mv.i2_mvy = 0;
1394
113k
    }
1395
1396
186k
    }
1397
1398
    /***************************************************************************
1399
     * Evaluating skip params : Temporal skip
1400
     **************************************************************************/
1401
186k
    {
1402
186k
        pic_buf_t *  ps_ref_pic[MAX_REF_PIC_CNT];
1403
186k
        WORD32 i4_td, i4_tx, i4_tb, i4_dist_scale_factor;
1404
186k
        enc_pu_mv_t *ps_skip_mv = &ps_proc->ps_skip_mv[2];
1405
1406
186k
        ps_ref_pic[PRED_L0] = ps_proc->aps_ref_pic[PRED_L0];
1407
186k
        ps_ref_pic[PRED_L1] = ps_proc->aps_ref_pic[PRED_L1];
1408
1409
186k
        i4_tb = ps_proc->ps_codec->i4_poc - ps_ref_pic[PRED_L0]->i4_abs_poc;
1410
186k
        i4_td = ps_ref_pic[PRED_L1]->i4_abs_poc - ps_ref_pic[PRED_L0]->i4_abs_poc;
1411
1412
186k
        i4_tb = CLIP3(-128, 127, i4_tb);
1413
186k
        i4_td = CLIP3(-128, 127, i4_td);
1414
1415
186k
        i4_tx = ( 16384 + ABS( i4_td / 2 ) ) / i4_td ;
1416
186k
        i4_dist_scale_factor =  CLIP3( -1024, 1023, ( i4_tb * i4_tx + 32 ) >> 6 );
1417
1418
        /* Motion vectors taken in full pel resolution , hence  -> (& 0xfffc) operation */
1419
186k
        ps_skip_mv[PRED_L0].s_mv.i2_mvx = (( i4_dist_scale_factor * s_mvcol.i2_mvx + 128 ) >> 8) & 0xfffc;
1420
186k
        ps_skip_mv[PRED_L0].s_mv.i2_mvy = (( i4_dist_scale_factor * s_mvcol.i2_mvy + 128 ) >> 8) & 0xfffc;
1421
1422
186k
        ps_skip_mv[PRED_L1].s_mv.i2_mvx = (ps_skip_mv[PRED_L0].s_mv.i2_mvx - s_mvcol.i2_mvx) & 0xfffc;
1423
186k
        ps_skip_mv[PRED_L1].s_mv.i2_mvy = (ps_skip_mv[PRED_L0].s_mv.i2_mvy - s_mvcol.i2_mvy) & 0xfffc;
1424
1425
186k
    }
1426
1427
186k
    return i4_skip_type;
1428
186k
}
1429
1430
/**
1431
*******************************************************************************
1432
*
1433
* @brief The function computes the skip motion vectoe for B mb
1434
*
1435
* @par Description:
1436
*  The function gives the skip motion vector for B Mb, check if the Mb can be
1437
*  marked as skip
1438
*
1439
* @param[in] ps_proc
1440
*  Pointer to process context
1441
*
1442
* @param[in] i4_reflist
1443
*  Dummy
1444
*
1445
* @returns Flag indicating if the current Mb can be skip or not
1446
*
1447
* @remarks The code implements the logic as described in sec 8.4.1.2.2 in H264
1448
*   specification. It also computes co-located MB parmas according to sec 8.4.1.2.1
1449
*
1450
*******************************************************************************/
1451
WORD32 ih264e_find_bskip_params(process_ctxt_t *ps_proc, WORD32 i4_reflist)
1452
66.0k
{
1453
    /* Colzero for co-located MB */
1454
66.0k
    WORD32 i4_colzeroflag;
1455
1456
    /* motion vectors */
1457
66.0k
    enc_pu_t *ps_a_pu, *ps_c_pu, *ps_b_pu;
1458
1459
    /* Syntax elem */
1460
66.0k
    mb_info_t *ps_a_syn, *ps_b_syn, *ps_c_syn;
1461
1462
    /* Variables to check if a particular mB is available */
1463
66.0k
    WORD32 i4_a, i4_b, i4_c, i4_c_avail;
1464
1465
    /* Mode availability, init to no modes available     */
1466
66.0k
    WORD32 i4_mode_avail;
1467
1468
    /*  mb neighbor availability */
1469
66.0k
    block_neighbors_t *ps_ngbr_avbl = ps_proc->ps_ngbr_avbl;
1470
1471
    /* Temp var */
1472
66.0k
    WORD32 i, i4_cmpl_mode;
1473
1474
66.0k
    UNUSED(i4_reflist);
1475
1476
    /**************************************************************************
1477
     * Find co-locates parameters
1478
     *      See sec 8.4.1.2.1  for reference
1479
     **************************************************************************/
1480
66.0k
    {
1481
        /*
1482
         * Find the co-located Mb and update the skip and pred appropriately
1483
         * 1) Default colpic is forward ref : Table 8-6
1484
         * 2) Default mb col is current MB : Table 8-8
1485
         */
1486
1487
66.0k
        mv_t s_mvcol;
1488
66.0k
        WORD32 i4_refidxcol;
1489
1490
66.0k
        if (ps_proc->ps_colpu->b1_intra_flag)
1491
19.2k
        {
1492
19.2k
            s_mvcol.i2_mvx = 0;
1493
19.2k
            s_mvcol.i2_mvy = 0;
1494
19.2k
            i4_refidxcol = -1;
1495
19.2k
        }
1496
46.8k
        else
1497
46.8k
        {
1498
46.8k
            if (ps_proc->ps_colpu->b2_pred_mode != PRED_L1)
1499
46.8k
            {
1500
46.8k
                s_mvcol = ps_proc->ps_colpu->s_me_info[PRED_L0].s_mv;
1501
46.8k
                i4_refidxcol = 0;
1502
46.8k
            }
1503
18.4E
            else // if(ps_proc->ps_colpu->b2_pred_mode != PRED_L0)
1504
18.4E
            {
1505
18.4E
                s_mvcol = ps_proc->ps_colpu->s_me_info[PRED_L1].s_mv;
1506
18.4E
                i4_refidxcol = 0;
1507
18.4E
            }
1508
46.8k
        }
1509
1510
        /* RefPicList1[ 0 ]  is marked as  "used for short-term reference", as default */
1511
66.0k
        i4_colzeroflag = (!i4_refidxcol && (ABS(s_mvcol.i2_mvx) <= 1)
1512
66.0k
                        && (ABS(s_mvcol.i2_mvy) <= 1));
1513
1514
66.0k
    }
1515
1516
    /***************************************************************************
1517
     * Evaluating skip params
1518
     **************************************************************************/
1519
    /* Section 8.4.1.2.2 */
1520
66.0k
    ps_a_syn = &ps_proc->s_left_mb_syntax_ele;
1521
66.0k
    ps_a_pu = &ps_proc->s_left_mb_pu;
1522
1523
66.0k
    ps_b_syn = ps_proc->ps_top_row_mb_syntax_ele + ps_proc->i4_mb_x;
1524
66.0k
    ps_b_pu = (ps_proc->ps_top_row_pu + ps_proc->i4_mb_x);
1525
1526
66.0k
    i4_c_avail = 0;
1527
66.0k
    if (ps_ngbr_avbl->u1_mb_c)
1528
22.6k
    {
1529
22.6k
        ps_c_syn = &((ps_proc->ps_top_row_mb_syntax_ele + ps_proc->i4_mb_x)[1]);
1530
22.6k
        ps_c_pu = &((ps_proc->ps_top_row_pu + ps_proc->i4_mb_x)[1]);
1531
22.6k
        i4_c_avail = 1;
1532
22.6k
    }
1533
43.4k
    else
1534
43.4k
    {
1535
43.4k
        ps_c_syn = &(ps_proc->s_top_left_mb_syntax_ele);
1536
43.4k
        ps_c_pu = &ps_proc->s_top_left_mb_pu;
1537
43.4k
        i4_c_avail = ps_ngbr_avbl->u1_mb_d;
1538
43.4k
    }
1539
1540
1541
66.0k
    i4_a = ps_ngbr_avbl->u1_mb_a;
1542
66.0k
    i4_a &= !ps_a_syn->u2_is_intra;
1543
1544
66.0k
    i4_b = ps_ngbr_avbl->u1_mb_b;
1545
66.0k
    i4_b &= !ps_b_syn->u2_is_intra;
1546
1547
66.0k
    i4_c = i4_c_avail;
1548
66.0k
    i4_c &= !ps_c_syn->u2_is_intra;
1549
1550
    /* Init to no mode avail */
1551
66.0k
    i4_mode_avail = 0;
1552
198k
    for (i = 0; i < 2; i++)
1553
132k
    {
1554
132k
        i4_cmpl_mode = (i == 0) ? PRED_L1 : PRED_L0;
1555
1556
132k
        i4_mode_avail |= (i4_a && (ps_a_pu->b2_pred_mode != i4_cmpl_mode) && (ps_a_pu->s_me_info[i].i1_ref_idx != 0))<<i;
1557
132k
        i4_mode_avail |= (i4_b && (ps_b_pu->b2_pred_mode != i4_cmpl_mode) && (ps_b_pu->s_me_info[i].i1_ref_idx != 0))<<i;
1558
132k
        i4_mode_avail |= (i4_c && (ps_c_pu->b2_pred_mode != i4_cmpl_mode) && (ps_c_pu->s_me_info[i].i1_ref_idx != 0))<<i;
1559
132k
    }
1560
1561
    /* Update skip MV for L0 */
1562
66.0k
    if ((i4_mode_avail & 0x1) && (!i4_colzeroflag))
1563
17.9k
    {
1564
17.9k
        ps_proc->ps_skip_mv[0].s_mv.i2_mvx = ps_proc->ps_pred_mv[0].s_mv.i2_mvx;
1565
17.9k
        ps_proc->ps_skip_mv[0].s_mv.i2_mvy = ps_proc->ps_pred_mv[0].s_mv.i2_mvy;
1566
17.9k
    }
1567
48.1k
    else
1568
48.1k
    {
1569
48.1k
        ps_proc->ps_skip_mv[0].s_mv.i2_mvx = 0;
1570
48.1k
        ps_proc->ps_skip_mv[0].s_mv.i2_mvy = 0;
1571
48.1k
    }
1572
1573
    /* Update skip MV for L1 */
1574
66.0k
    if ((i4_mode_avail & 0x2) && (!i4_colzeroflag))
1575
20.5k
    {
1576
20.5k
        ps_proc->ps_skip_mv[1].s_mv.i2_mvx = ps_proc->ps_pred_mv[1].s_mv.i2_mvx;
1577
20.5k
        ps_proc->ps_skip_mv[1].s_mv.i2_mvy = ps_proc->ps_pred_mv[1].s_mv.i2_mvy;
1578
20.5k
    }
1579
45.5k
    else
1580
45.5k
    {
1581
45.5k
        ps_proc->ps_skip_mv[1].s_mv.i2_mvx = 0;
1582
45.5k
        ps_proc->ps_skip_mv[1].s_mv.i2_mvy = 0;
1583
45.5k
    }
1584
1585
    /* Now see if the ME information matches the SKIP information */
1586
66.0k
    switch (ps_proc->ps_pu->b2_pred_mode)
1587
66.0k
    {
1588
8.50k
        case PRED_BI:
1589
8.50k
            if (  (ps_proc->ps_pu->s_me_info[0].s_mv.i2_mvx == ps_proc->ps_skip_mv[0].s_mv.i2_mvx)
1590
8.50k
               && (ps_proc->ps_pu->s_me_info[0].s_mv.i2_mvy == ps_proc->ps_skip_mv[0].s_mv.i2_mvy)
1591
8.50k
               && (ps_proc->ps_pu->s_me_info[1].s_mv.i2_mvx == ps_proc->ps_skip_mv[1].s_mv.i2_mvx)
1592
8.50k
               && (ps_proc->ps_pu->s_me_info[1].s_mv.i2_mvy == ps_proc->ps_skip_mv[1].s_mv.i2_mvy)
1593
8.50k
               && (i4_mode_avail ==  0x3 || i4_mode_avail == 0x0))
1594
872
            {
1595
872
                return 1;
1596
872
            }
1597
7.63k
            break;
1598
1599
30.8k
        case PRED_L0:
1600
30.8k
            if ( (ps_proc->ps_pu->s_me_info[0].s_mv.i2_mvx == ps_proc->ps_skip_mv[0].s_mv.i2_mvx)
1601
30.8k
              && (ps_proc->ps_pu->s_me_info[0].s_mv.i2_mvy == ps_proc->ps_skip_mv[0].s_mv.i2_mvy)
1602
30.8k
              && (i4_mode_avail == 0x1))
1603
9.76k
            {
1604
9.76k
                return 1;
1605
9.76k
            }
1606
21.1k
            break;
1607
1608
26.7k
        case PRED_L1:
1609
26.7k
            if (  (ps_proc->ps_pu->s_me_info[1].s_mv.i2_mvx == ps_proc->ps_skip_mv[1].s_mv.i2_mvx)
1610
26.7k
               && (ps_proc->ps_pu->s_me_info[1].s_mv.i2_mvy == ps_proc->ps_skip_mv[1].s_mv.i2_mvy)
1611
26.7k
               && (i4_mode_avail == 0x2))
1612
8.74k
            {
1613
8.74k
                return 1;
1614
8.74k
            }
1615
17.9k
            break;
1616
66.0k
    }
1617
1618
46.7k
    return 0;
1619
66.0k
}
1620
1621
1622
/**
1623
*******************************************************************************
1624
*
1625
* @brief This function computes the best motion vector among the tentative mv
1626
* candidates chosen.
1627
*
1628
* @par Description:
1629
*  This function determines the position in the search window at which the motion
1630
*  estimation should begin in order to minimise the number of search iterations.
1631
*
1632
* @param[in] ps_me_ctxt
1633
*  pointer to me context
1634
*
1635
* @param[in] ps_proc
1636
*  process context
1637
*
1638
* @param[in] ps_mb_ctxt_bi
1639
*  pointer to current mb partition ctxt with respect to ME
1640
*
1641
* @returns  mv pair & corresponding distortion and cost
1642
*
1643
* @remarks Currently only 4 search candiates are supported
1644
*
1645
*******************************************************************************
1646
*/
1647
void ih264e_evaluate_bipred(me_ctxt_t *ps_me_ctxt,
1648
                            process_ctxt_t *ps_proc,
1649
                            mb_part_ctxt *ps_mb_ctxt_bi)
1650
86.5k
{
1651
1652
86.5k
    UWORD32 i, u4_fast_sad;
1653
1654
86.5k
    WORD32 i4_dest_buff;
1655
1656
86.5k
    mv_t *ps_l0_pred_mv, *ps_l1_pred_mv, s_l0_mv, s_l1_mv;
1657
1658
86.5k
    UWORD8 *pu1_ref_mb_l0, *pu1_ref_mb_l1;
1659
1660
86.5k
    UWORD8 *pu1_dst_buf;
1661
1662
86.5k
    WORD32 i4_ref_l0_stride, i4_ref_l1_stride;
1663
1664
86.5k
    WORD32 i4_mb_distortion, i4_mb_cost;
1665
1666
86.5k
    u4_fast_sad = ps_me_ctxt->u4_enable_fast_sad;
1667
1668
86.5k
    i4_dest_buff = 0;
1669
1670
346k
    for (i = 0; i < ps_me_ctxt->u4_num_candidates[PRED_BI]; i += 2)
1671
259k
    {
1672
259k
        pu1_dst_buf = ps_me_ctxt->apu1_subpel_buffs[i4_dest_buff];
1673
1674
259k
        s_l0_mv.i2_mvx = ps_me_ctxt->as_mv_init_search[PRED_BI][i].i2_mvx >> 2;
1675
259k
        s_l0_mv.i2_mvy = ps_me_ctxt->as_mv_init_search[PRED_BI][i].i2_mvy >> 2;
1676
259k
        s_l1_mv.i2_mvx = ps_me_ctxt->as_mv_init_search[PRED_BI][i + 1].i2_mvx >> 2;
1677
259k
        s_l1_mv.i2_mvy = ps_me_ctxt->as_mv_init_search[PRED_BI][i + 1].i2_mvy >> 2;
1678
1679
259k
        ps_l0_pred_mv = &ps_proc->ps_pred_mv[PRED_L0].s_mv;
1680
259k
        ps_l1_pred_mv = &ps_proc->ps_pred_mv[PRED_L1].s_mv;
1681
1682
259k
        if ((ps_me_ctxt->as_mv_init_search[PRED_BI][i].i2_mvx & 0x3)||
1683
259k
                        (ps_me_ctxt->as_mv_init_search[PRED_BI][i].i2_mvy & 0x3))
1684
31.7k
        {
1685
31.7k
            pu1_ref_mb_l0 = ps_me_ctxt->as_mb_part[PRED_L0].pu1_best_hpel_buf;
1686
31.7k
            i4_ref_l0_stride = ps_me_ctxt->u4_subpel_buf_strd;
1687
31.7k
        }
1688
227k
        else
1689
227k
        {
1690
227k
            pu1_ref_mb_l0 = ps_me_ctxt->apu1_ref_buf_luma[PRED_L0] + (s_l0_mv.i2_mvx) + ((s_l0_mv.i2_mvy) * ps_me_ctxt->i4_rec_strd);
1691
227k
            i4_ref_l0_stride = ps_me_ctxt->i4_rec_strd;
1692
227k
        }
1693
1694
1695
259k
        if ((ps_me_ctxt->as_mv_init_search[PRED_BI][i + 1].i2_mvx & 0x3) ||
1696
259k
                        (ps_me_ctxt->as_mv_init_search[PRED_BI][i + 1].i2_mvy & 0x3))
1697
29.3k
        {
1698
29.3k
            pu1_ref_mb_l1 = ps_me_ctxt->as_mb_part[PRED_L1].pu1_best_hpel_buf;
1699
29.3k
            i4_ref_l1_stride = ps_me_ctxt->u4_subpel_buf_strd;
1700
29.3k
        }
1701
230k
        else
1702
230k
        {
1703
230k
            pu1_ref_mb_l1 = ps_me_ctxt->apu1_ref_buf_luma[PRED_L1] + (s_l1_mv.i2_mvx) + ((s_l1_mv.i2_mvy) * ps_me_ctxt->i4_rec_strd);
1704
230k
            i4_ref_l1_stride = ps_me_ctxt->i4_rec_strd;
1705
230k
        }
1706
1707
259k
        ps_proc->ps_codec->pf_inter_pred_luma_bilinear(
1708
259k
                        pu1_ref_mb_l0, pu1_ref_mb_l1, pu1_dst_buf,
1709
259k
                        i4_ref_l0_stride, i4_ref_l1_stride,
1710
259k
                        ps_me_ctxt->u4_subpel_buf_strd, MB_SIZE, MB_SIZE);
1711
1712
259k
        ps_me_ctxt->pf_ime_compute_sad_16x16[u4_fast_sad](
1713
259k
                        ps_me_ctxt->pu1_src_buf_luma, pu1_dst_buf,
1714
259k
                        ps_me_ctxt->i4_src_strd, ps_me_ctxt->u4_subpel_buf_strd,
1715
259k
                        INT_MAX, &i4_mb_distortion);
1716
1717
        /* compute cost */
1718
259k
        i4_mb_cost =  ps_me_ctxt->pu1_mv_bits[ps_me_ctxt->as_mv_init_search[PRED_BI][i].i2_mvx - ps_l0_pred_mv->i2_mvx];
1719
259k
        i4_mb_cost += ps_me_ctxt->pu1_mv_bits[ps_me_ctxt->as_mv_init_search[PRED_BI][i].i2_mvy - ps_l0_pred_mv->i2_mvy];
1720
259k
        i4_mb_cost += ps_me_ctxt->pu1_mv_bits[ps_me_ctxt->as_mv_init_search[PRED_BI][i + 1].i2_mvx - ps_l1_pred_mv->i2_mvx];
1721
259k
        i4_mb_cost += ps_me_ctxt->pu1_mv_bits[ps_me_ctxt->as_mv_init_search[PRED_BI][i + 1].i2_mvy - ps_l1_pred_mv->i2_mvy];
1722
1723
259k
        i4_mb_cost -= (ps_me_ctxt->i4_skip_bias[BSLICE]) * (ps_me_ctxt->i4_skip_type == PRED_BI) * (i == 0);
1724
1725
1726
259k
        i4_mb_cost *= ps_me_ctxt->u4_lambda_motion;
1727
259k
        i4_mb_cost += i4_mb_distortion;
1728
1729
259k
        if (i4_mb_cost < ps_mb_ctxt_bi->i4_mb_cost)
1730
172k
        {
1731
172k
            ps_mb_ctxt_bi->i4_srch_pos_idx = (i>>1);
1732
172k
            ps_mb_ctxt_bi->i4_mb_cost = i4_mb_cost;
1733
172k
            ps_mb_ctxt_bi->i4_mb_distortion = i4_mb_distortion;
1734
172k
            ps_mb_ctxt_bi->pu1_best_hpel_buf = pu1_dst_buf;
1735
172k
            i4_dest_buff = (i4_dest_buff + 1) % 2;
1736
172k
        }
1737
259k
    }
1738
1739
86.5k
}
1740
1741
/**
1742
*******************************************************************************
1743
*
1744
* @brief This function performs motion estimation for the current mb
1745
*
1746
* @par Description:
1747
*  The current mb is compared with a list of mb's in the reference frame for
1748
*  least cost. The mb that offers least cost is chosen as predicted mb and the
1749
*  displacement of the predicted mb from index location of the current mb is
1750
*  signaled as mv. The list of the mb's that are chosen in the reference frame
1751
*  are dependent on the speed of the ME configured.
1752
*
1753
* @param[in] ps_proc
1754
*  Process context corresponding to the job
1755
*
1756
* @returns  motion vector of the pred mb, sad, cost.
1757
*
1758
* @remarks none
1759
*
1760
*******************************************************************************
1761
*/
1762
void ih264e_compute_me_multi_reflist(process_ctxt_t *ps_proc)
1763
96.0k
{
1764
    /* me ctxt */
1765
96.0k
    me_ctxt_t *ps_me_ctxt = &ps_proc->s_me_ctxt;
1766
1767
    /* codec context */
1768
96.0k
    codec_t *ps_codec = ps_proc->ps_codec;
1769
1770
    /* Temp variables for looping over ref lists */
1771
96.0k
    WORD32 i4_reflist, i4_max_reflist;
1772
1773
    /* recon stride */
1774
96.0k
    WORD32 i4_rec_strd = ps_proc->i4_rec_strd;
1775
1776
    /* source buffer for halp pel generation functions */
1777
96.0k
    UWORD8 *pu1_hpel_src;
1778
1779
    /* quantization parameters */
1780
96.0k
    quant_params_t *ps_qp_params = ps_proc->ps_qp_params[0];
1781
1782
    /* Mb part ctxts for SKIP */
1783
96.0k
    mb_part_ctxt as_skip_mbpart[2];
1784
1785
    /* Sad therholds */
1786
96.0k
    ps_me_ctxt->pu2_sad_thrsh = ps_qp_params->pu2_sad_thrsh;
1787
1788
96.0k
    {
1789
96.0k
        WORD32 rows_above, rows_below, columns_left, columns_right;
1790
1791
        /* During evaluation for motion vectors do not search through padded regions */
1792
        /* Obtain number of rows and columns that are effective for computing for me evaluation */
1793
96.0k
        rows_above = MB_SIZE + ps_proc->i4_mb_y * MB_SIZE;
1794
96.0k
        rows_below = (ps_proc->i4_ht_mbs - ps_proc->i4_mb_y) * MB_SIZE;
1795
96.0k
        columns_left = MB_SIZE + ps_proc->i4_mb_x * MB_SIZE;
1796
96.0k
        columns_right = (ps_proc->i4_wd_mbs - ps_proc->i4_mb_x) * MB_SIZE;
1797
1798
        /* init srch range */
1799
        /* NOTE : For now, lets limit the search range by DEFAULT_MAX_SRCH_RANGE_X / 2
1800
         * on all sides.
1801
         */
1802
96.0k
        ps_me_ctxt->i4_srch_range_w = -MIN(columns_left, DEFAULT_MAX_SRCH_RANGE_X >> 1);
1803
96.0k
        ps_me_ctxt->i4_srch_range_e = MIN(columns_right, DEFAULT_MAX_SRCH_RANGE_X >> 1);
1804
96.0k
        ps_me_ctxt->i4_srch_range_n = -MIN(rows_above, DEFAULT_MAX_SRCH_RANGE_Y >> 1);
1805
96.0k
        ps_me_ctxt->i4_srch_range_s = MIN(rows_below, DEFAULT_MAX_SRCH_RANGE_Y >> 1);
1806
1807
        /* this is to facilitate fast sub pel computation with minimal loads */
1808
96.0k
        if (ps_me_ctxt->u4_enable_hpel)
1809
64.8k
        {
1810
64.8k
            ps_me_ctxt->i4_srch_range_w += 1;
1811
64.8k
            ps_me_ctxt->i4_srch_range_e -= 1;
1812
64.8k
            ps_me_ctxt->i4_srch_range_n += 1;
1813
64.8k
            ps_me_ctxt->i4_srch_range_s -= 1;
1814
64.8k
        }
1815
96.0k
    }
1816
1817
    /* Compute ME and store the MVs */
1818
96.0k
    {
1819
        /***********************************************************************
1820
         * Compute ME for lists L0 and L1
1821
         *  For L0 -> L0 skip + L0
1822
         *  for L1 -> L0 skip + L0 + L1 skip + L1
1823
         ***********************************************************************/
1824
96.0k
        i4_max_reflist = (ps_proc->i4_slice_type == PSLICE) ? PRED_L0 : PRED_L1;
1825
1826
        /* Init SATQD for the current list */
1827
96.0k
        ps_me_ctxt->u4_min_sad_reached  = 0;
1828
96.0k
        ps_me_ctxt->i4_min_sad = ps_proc->ps_cur_mb->u4_min_sad;
1829
1830
273k
        for (i4_reflist = PRED_L0; i4_reflist <= i4_max_reflist; i4_reflist++)
1831
186k
        {
1832
1833
            /* Get the seed motion vector candidates                    */
1834
186k
            ih264e_get_search_candidates(ps_proc, ps_me_ctxt, i4_reflist);
1835
1836
            /* ****************************************************************
1837
             *Evaluate the SKIP for current list
1838
             * ****************************************************************/
1839
186k
            as_skip_mbpart[i4_reflist].s_mv_curr.i2_mvx = 0;
1840
186k
            as_skip_mbpart[i4_reflist].s_mv_curr.i2_mvy = 0;
1841
186k
            as_skip_mbpart[i4_reflist].i4_mb_cost = INT_MAX;
1842
186k
            as_skip_mbpart[i4_reflist].i4_mb_distortion = INT_MAX;
1843
1844
186k
            if (ps_me_ctxt->i4_skip_type == i4_reflist)
1845
52.3k
            {
1846
52.3k
                ime_compute_skip_cost( ps_me_ctxt,
1847
52.3k
                                       (ime_mv_t *)(&ps_proc->ps_skip_mv[i4_reflist].s_mv),
1848
52.3k
                                       &as_skip_mbpart[i4_reflist],
1849
52.3k
                                       ps_proc->ps_codec->s_cfg.u4_enable_satqd,
1850
52.3k
                                       i4_reflist,
1851
52.3k
                                       (ps_proc->i4_slice_type == BSLICE) );
1852
52.3k
            }
1853
1854
186k
            as_skip_mbpart[i4_reflist].s_mv_curr.i2_mvx <<= 2;
1855
186k
            as_skip_mbpart[i4_reflist].s_mv_curr.i2_mvy <<= 2;
1856
1857
            /******************************************************************
1858
             * Evaluate ME For current list
1859
             *****************************************************************/
1860
186k
            ps_me_ctxt->as_mb_part[i4_reflist].s_mv_curr.i2_mvx = 0;
1861
186k
            ps_me_ctxt->as_mb_part[i4_reflist].s_mv_curr.i2_mvy = 0;
1862
186k
            ps_me_ctxt->as_mb_part[i4_reflist].i4_mb_cost = INT_MAX;
1863
186k
            ps_me_ctxt->as_mb_part[i4_reflist].i4_mb_distortion = INT_MAX;
1864
1865
            /* Init Hpel */
1866
186k
            ps_me_ctxt->as_mb_part[i4_reflist].pu1_best_hpel_buf = NULL;
1867
1868
            /* In case we found out the minimum SAD, exit the ME eval */
1869
186k
            if (ps_me_ctxt->u4_min_sad_reached)
1870
9.50k
            {
1871
9.50k
                i4_max_reflist = i4_reflist;
1872
9.50k
                break;
1873
9.50k
            }
1874
1875
1876
            /* Evaluate search candidates for initial mv pt */
1877
177k
            ime_evaluate_init_srchposn_16x16(ps_me_ctxt, i4_reflist);
1878
1879
            /********************************************************************/
1880
            /*                  full pel motion estimation                      */
1881
            /********************************************************************/
1882
177k
            ime_full_pel_motion_estimation_16x16(ps_me_ctxt, i4_reflist);
1883
1884
177k
            DEBUG_MV_HISTOGRAM_ADD((ps_me_ctxt->as_mb_part[i4_reflist].s_mv_curr.i2_mvx >> 2),
1885
177k
                                   (ps_me_ctxt->as_mb_part[i4_reflist].s_mv_curr.i2_mvy >> 2));
1886
1887
177k
            DEBUG_SAD_HISTOGRAM_ADD(ps_me_ctxt->as_mb_part[i4_reflist].i4_mb_distortion, 1);
1888
1889
            /* Scale the MV to qpel resolution */
1890
177k
            ps_me_ctxt->as_mb_part[i4_reflist].s_mv_curr.i2_mvx <<= 2;
1891
177k
            ps_me_ctxt->as_mb_part[i4_reflist].s_mv_curr.i2_mvy <<= 2;
1892
1893
177k
            if (ps_me_ctxt->u4_enable_hpel)
1894
118k
            {
1895
                /* moving src pointer to the converged motion vector location */
1896
118k
                pu1_hpel_src =   ps_me_ctxt->apu1_ref_buf_luma[i4_reflist]
1897
118k
                               + (ps_me_ctxt->as_mb_part[i4_reflist].s_mv_curr.i2_mvx >> 2)
1898
118k
                               + ((ps_me_ctxt->as_mb_part[i4_reflist].s_mv_curr.i2_mvy >> 2)* i4_rec_strd);
1899
1900
118k
                ps_me_ctxt->apu1_subpel_buffs[0] = ps_proc->apu1_subpel_buffs[0];
1901
118k
                ps_me_ctxt->apu1_subpel_buffs[1] = ps_proc->apu1_subpel_buffs[1];
1902
118k
                ps_me_ctxt->apu1_subpel_buffs[2] = ps_proc->apu1_subpel_buffs[2];
1903
1904
                /* Init the search position to an invalid number */
1905
118k
                ps_me_ctxt->as_mb_part[i4_reflist].i4_srch_pos_idx = 3;
1906
1907
                /* Incase a buffer is still in use by L0, replace it with spare buff */
1908
118k
                ps_me_ctxt->apu1_subpel_buffs[ps_me_ctxt->as_mb_part[PRED_L0].i4_srch_pos_idx] =
1909
118k
                                ps_proc->apu1_subpel_buffs[3];
1910
1911
1912
118k
                ps_me_ctxt->u4_subpel_buf_strd = HP_BUFF_WD;
1913
1914
                /* half  pel search is done for both sides of full pel,
1915
                 * hence half_x of width x height = 17x16 is created
1916
                 * starting from left half_x of converged full pel */
1917
118k
                pu1_hpel_src -= 1;
1918
1919
                /* computing half_x */
1920
118k
                ps_codec->pf_ih264e_sixtapfilter_horz(pu1_hpel_src,
1921
118k
                                                      ps_me_ctxt->apu1_subpel_buffs[0],
1922
118k
                                                      i4_rec_strd,
1923
118k
                                                      ps_me_ctxt->u4_subpel_buf_strd);
1924
1925
                /*
1926
                 * Halfpel search is done for both sides of full pel,
1927
                 * hence half_y of width x height = 16x17 is created
1928
                 * starting from top half_y of converged full pel
1929
                 * for half_xy top_left is required
1930
                 * hence it starts from pu1_hpel_src = full_pel_converged_point - i4_rec_strd - 1
1931
                 */
1932
118k
                pu1_hpel_src -= i4_rec_strd;
1933
1934
                /* computing half_y and half_xy */
1935
118k
                ps_codec->pf_ih264e_sixtap_filter_2dvh_vert(
1936
118k
                                pu1_hpel_src, ps_me_ctxt->apu1_subpel_buffs[1],
1937
118k
                                ps_me_ctxt->apu1_subpel_buffs[2], i4_rec_strd,
1938
118k
                                ps_me_ctxt->u4_subpel_buf_strd, ps_proc->ai16_pred1 + 3,
1939
118k
                                ps_me_ctxt->u4_subpel_buf_strd);
1940
1941
118k
                ime_sub_pel_motion_estimation_16x16(ps_me_ctxt, i4_reflist);
1942
1943
118k
            }
1944
177k
        }
1945
1946
        /***********************************************************************
1947
         * If a particular skiip Mv is giving better sad, copy to the corresponding
1948
         * MBPART
1949
         * In B slices this loop should go only to PREDL1: If we found min sad
1950
         * we will go to the skip ref list only
1951
         * Have to find a way to make it without too much change or new vars
1952
         **********************************************************************/
1953
282k
        for (i4_reflist = 0; i4_reflist <= i4_max_reflist; i4_reflist++)
1954
186k
        {
1955
186k
            if (as_skip_mbpart[i4_reflist].i4_mb_cost < ps_me_ctxt->as_mb_part[i4_reflist].i4_mb_cost)
1956
23.6k
            {
1957
23.6k
                ps_me_ctxt->as_mb_part[i4_reflist].i4_mb_cost = as_skip_mbpart[i4_reflist].i4_mb_cost;
1958
23.6k
                ps_me_ctxt->as_mb_part[i4_reflist].i4_mb_distortion = as_skip_mbpart[i4_reflist].i4_mb_distortion;
1959
23.6k
                ps_me_ctxt->as_mb_part[i4_reflist].s_mv_curr = as_skip_mbpart[i4_reflist].s_mv_curr;
1960
23.6k
            }
1961
186k
        }
1962
1963
        /***********************************************************************
1964
         * Compute ME for BI
1965
         *  In case of BI we do ME for two candidates
1966
         *   1) The best L0 and L1 Mvs
1967
         *   2) Skip L0 and L1 MVs
1968
         *
1969
         *   TODO
1970
         *   one of the search candidates is skip. Hence it may be duplicated
1971
         ***********************************************************************/
1972
96.0k
        if (i4_max_reflist == PRED_L1 && ps_me_ctxt->u4_min_sad_reached == 0)
1973
86.5k
        {
1974
86.5k
            WORD32 i, j = 0;
1975
86.5k
            WORD32 l0_srch_pos_idx, l1_srch_pos_idx;
1976
86.5k
            WORD32 i4_l0_skip_mv_idx, i4_l1_skip_mv_idx;
1977
1978
            /* Get the free buffers */
1979
86.5k
            l0_srch_pos_idx = ps_me_ctxt->as_mb_part[PRED_L0].i4_srch_pos_idx;
1980
86.5k
            l1_srch_pos_idx = ps_me_ctxt->as_mb_part[PRED_L1].i4_srch_pos_idx;
1981
1982
            /* Search for the two free buffers in subpel list */
1983
432k
            for (i = 0; i < SUBPEL_BUFF_CNT; i++)
1984
346k
            {
1985
346k
                if (i != l0_srch_pos_idx && i != l1_srch_pos_idx)
1986
211k
                {
1987
211k
                    ps_me_ctxt->apu1_subpel_buffs[j] = ps_proc->apu1_subpel_buffs[i];
1988
211k
                    j++;
1989
211k
                }
1990
346k
            }
1991
86.5k
            ps_me_ctxt->u4_subpel_buf_strd = HP_BUFF_WD;
1992
1993
            /* Copy the statial SKIP MV of each list */
1994
86.5k
            i4_l0_skip_mv_idx = ps_me_ctxt->u4_num_candidates[PRED_L0] - 2;
1995
86.5k
            i4_l1_skip_mv_idx = ps_me_ctxt->u4_num_candidates[PRED_L1] - 2;
1996
86.5k
            ps_me_ctxt->as_mv_init_search[PRED_BI][0].i2_mvx = ps_me_ctxt->as_mv_init_search[PRED_L0][i4_l0_skip_mv_idx].i2_mvx << 2;
1997
86.5k
            ps_me_ctxt->as_mv_init_search[PRED_BI][0].i2_mvy = ps_me_ctxt->as_mv_init_search[PRED_L0][i4_l0_skip_mv_idx].i2_mvy << 2;
1998
86.5k
            ps_me_ctxt->as_mv_init_search[PRED_BI][1].i2_mvx = ps_me_ctxt->as_mv_init_search[PRED_L1][i4_l1_skip_mv_idx].i2_mvx << 2;
1999
86.5k
            ps_me_ctxt->as_mv_init_search[PRED_BI][1].i2_mvy = ps_me_ctxt->as_mv_init_search[PRED_L1][i4_l1_skip_mv_idx].i2_mvy << 2;
2000
2001
            /* Copy the SKIP MV temporal of each list */
2002
86.5k
            i4_l0_skip_mv_idx++;
2003
86.5k
            i4_l1_skip_mv_idx++;
2004
86.5k
            ps_me_ctxt->as_mv_init_search[PRED_BI][2].i2_mvx = ps_me_ctxt->as_mv_init_search[PRED_L0][i4_l0_skip_mv_idx].i2_mvx << 2;
2005
86.5k
            ps_me_ctxt->as_mv_init_search[PRED_BI][2].i2_mvy = ps_me_ctxt->as_mv_init_search[PRED_L0][i4_l0_skip_mv_idx].i2_mvy << 2;
2006
86.5k
            ps_me_ctxt->as_mv_init_search[PRED_BI][3].i2_mvx = ps_me_ctxt->as_mv_init_search[PRED_L1][i4_l1_skip_mv_idx].i2_mvx << 2;
2007
86.5k
            ps_me_ctxt->as_mv_init_search[PRED_BI][3].i2_mvy = ps_me_ctxt->as_mv_init_search[PRED_L1][i4_l1_skip_mv_idx].i2_mvy << 2;
2008
2009
            /* Copy the best MV after ME */
2010
86.5k
            ps_me_ctxt->as_mv_init_search[PRED_BI][4] = ps_me_ctxt->as_mb_part[PRED_L0].s_mv_curr;
2011
86.5k
            ps_me_ctxt->as_mv_init_search[PRED_BI][5] = ps_me_ctxt->as_mb_part[PRED_L1].s_mv_curr;
2012
2013
86.5k
            ps_me_ctxt->u4_num_candidates[PRED_BI] = 6;
2014
2015
86.5k
            ps_me_ctxt->as_mb_part[PRED_BI].i4_mb_cost = INT_MAX;
2016
86.5k
            ps_me_ctxt->as_mb_part[PRED_BI].i4_mb_distortion = INT_MAX;
2017
2018
86.5k
            ih264e_evaluate_bipred(ps_me_ctxt, ps_proc,
2019
86.5k
                                   &ps_me_ctxt->as_mb_part[PRED_BI]);
2020
2021
86.5k
            i4_max_reflist = PRED_BI;
2022
86.5k
        }
2023
2024
        /**********************************************************************
2025
         * Now get the minimum of MB part sads by searching over all ref lists
2026
         **********************************************************************/
2027
96.0k
        ps_proc->ps_pu->b2_pred_mode = 0x3;
2028
2029
369k
        for (i4_reflist = 0; i4_reflist <= i4_max_reflist; i4_reflist++)
2030
273k
        {
2031
273k
            if (ps_me_ctxt->as_mb_part[i4_reflist].i4_mb_cost < ps_proc->ps_cur_mb->i4_mb_cost)
2032
154k
            {
2033
154k
                ps_proc->ps_cur_mb->i4_mb_cost = ps_me_ctxt->as_mb_part[i4_reflist].i4_mb_cost;
2034
154k
                ps_proc->ps_cur_mb->i4_mb_distortion = ps_me_ctxt->as_mb_part[i4_reflist].i4_mb_distortion;
2035
154k
                ps_proc->ps_cur_mb->u4_mb_type = (ps_proc->i4_slice_type == PSLICE) ? P16x16 : B16x16;
2036
154k
                ps_proc->ps_pu->b2_pred_mode = i4_reflist ;
2037
154k
            }
2038
273k
        }
2039
2040
        /**********************************************************************
2041
         * In case we have a BI MB, we have to copy the buffers and set proer MV's
2042
         *  1)In case its BI, we need to get the best MVs given by BI and update
2043
         *    to their corresponding MB part
2044
         *  2)We also need to copy the buffer in which bipred buff is populated
2045
         *
2046
         *  Not that if we have
2047
         **********************************************************************/
2048
96.0k
        if (ps_proc->ps_pu->b2_pred_mode == PRED_BI)
2049
13.8k
        {
2050
13.8k
            WORD32 i4_srch_pos = ps_me_ctxt->as_mb_part[PRED_BI].i4_srch_pos_idx;
2051
13.8k
            UWORD8 *pu1_bi_buf = ps_me_ctxt->as_mb_part[PRED_BI].pu1_best_hpel_buf;
2052
2053
13.8k
            ps_me_ctxt->as_mb_part[PRED_L0].s_mv_curr = ps_me_ctxt->as_mv_init_search[PRED_BI][i4_srch_pos << 1];
2054
13.8k
            ps_me_ctxt->as_mb_part[PRED_L1].s_mv_curr = ps_me_ctxt->as_mv_init_search[PRED_BI][(i4_srch_pos << 1) + 1];
2055
2056
            /* Now we have to copy the buffers */
2057
13.8k
            ps_codec->pf_inter_pred_luma_copy(pu1_bi_buf,
2058
13.8k
                                              ps_proc->pu1_best_subpel_buf,
2059
13.8k
                                              ps_me_ctxt->u4_subpel_buf_strd,
2060
13.8k
                                              ps_proc->u4_bst_spel_buf_strd,
2061
13.8k
                                              MB_SIZE, MB_SIZE, NULL, 0);
2062
2063
13.8k
        }
2064
82.1k
        else if (ps_me_ctxt->as_mb_part[ps_proc->ps_pu->b2_pred_mode].pu1_best_hpel_buf)
2065
24.3k
        {
2066
            /* Now we have to copy the buffers */
2067
24.3k
            ps_codec->pf_inter_pred_luma_copy(
2068
24.3k
                            ps_me_ctxt->as_mb_part[ps_proc->ps_pu->b2_pred_mode].pu1_best_hpel_buf,
2069
24.3k
                            ps_proc->pu1_best_subpel_buf,
2070
24.3k
                            ps_me_ctxt->u4_subpel_buf_strd,
2071
24.3k
                            ps_proc->u4_bst_spel_buf_strd, MB_SIZE, MB_SIZE,
2072
24.3k
                            NULL, 0);
2073
24.3k
        }
2074
96.0k
    }
2075
2076
    /**************************************************************************
2077
     *Now copy the MVs to the current PU with qpel scaling
2078
     ***************************************************************************/
2079
96.0k
    ps_proc->ps_pu->s_me_info[PRED_L0].s_mv.i2_mvx = (ps_me_ctxt->as_mb_part[PRED_L0].s_mv_curr.i2_mvx);
2080
96.0k
    ps_proc->ps_pu->s_me_info[PRED_L0].s_mv.i2_mvy = (ps_me_ctxt->as_mb_part[PRED_L0].s_mv_curr.i2_mvy);
2081
96.0k
    ps_proc->ps_pu->s_me_info[PRED_L1].s_mv.i2_mvx = (ps_me_ctxt->as_mb_part[PRED_L1].s_mv_curr.i2_mvx);
2082
96.0k
    ps_proc->ps_pu->s_me_info[PRED_L1].s_mv.i2_mvy = (ps_me_ctxt->as_mb_part[PRED_L1].s_mv_curr.i2_mvy);
2083
2084
2085
96.0k
    ps_proc->ps_pu->s_me_info[0].i1_ref_idx = (ps_proc->ps_pu->b2_pred_mode != PRED_L1)? -1:0;
2086
96.0k
    ps_proc->ps_pu->s_me_info[1].i1_ref_idx = (ps_proc->ps_pu->b2_pred_mode != PRED_L0)? -1:0;
2087
2088
    /* number of partitions */
2089
96.0k
    ps_proc->u4_num_sub_partitions = 1;
2090
96.0k
    *(ps_proc->pu4_mb_pu_cnt) = 1;
2091
2092
    /* position in-terms of PU */
2093
96.0k
    ps_proc->ps_pu->b4_pos_x = 0;
2094
96.0k
    ps_proc->ps_pu->b4_pos_y = 0;
2095
2096
    /* PU size */
2097
96.0k
    ps_proc->ps_pu->b4_wd = 3;
2098
96.0k
    ps_proc->ps_pu->b4_ht = 3;
2099
2100
    /* Update min sad conditions */
2101
96.0k
    if (ps_me_ctxt->u4_min_sad_reached == 1)
2102
9.50k
    {
2103
9.50k
        ps_proc->ps_cur_mb->u4_min_sad_reached = 1;
2104
9.50k
        ps_proc->ps_cur_mb->u4_min_sad = ps_me_ctxt->i4_min_sad;
2105
9.50k
    }
2106
96.0k
}
2107