Coverage Report

Created: 2025-07-23 06:18

/src/libavc/encoder/ih264e_me.c
Line
Count
Source (jump to first uncovered line)
1
/******************************************************************************
2
 *
3
 * Copyright (C) 2015 The Android Open Source Project
4
 *
5
 * Licensed under the Apache License, Version 2.0 (the "License");
6
 * you may not use this file except in compliance with the License.
7
 * You may obtain a copy of the License at:
8
 *
9
 * http://www.apache.org/licenses/LICENSE-2.0
10
 *
11
 * Unless required by applicable law or agreed to in writing, software
12
 * distributed under the License is distributed on an "AS IS" BASIS,
13
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
 * See the License for the specific language governing permissions and
15
 * limitations under the License.
16
 *
17
 *****************************************************************************
18
 * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
19
*/
20
21
/**
22
*******************************************************************************
23
* @file
24
*  ih264e_me.c
25
*
26
* @brief
27
*  Contains definition of functions for motion estimation
28
*
29
* @author
30
*  ittiam
31
*
32
* @par List of Functions:
33
*  - ih264e_init_mv_bits
34
*  - ih264e_get_search_candidates
35
*  - ih264e_find_pskip_params
36
*  - ih264e_find_pskip_params_me
37
*  - ih264e_get_mv_predictor
38
*  - ih264e_mv_pred
39
*  - ih264e_mv_pred_me
40
*  - ih264e_compute_me_single_reflist
41
*  - ih264e_compute_me_nmb
42
*  - ih264e_find_bskip_params_me
43
*  - ih264e_find_bskip_params
44
*  - ih264e_evaluate_bipred
45
*  - ih264e_compute_me_multi_reflist
46
*
47
* @remarks
48
*  none
49
*
50
*******************************************************************************
51
*/
52
53
/*****************************************************************************/
54
/* File Includes                                                             */
55
/*****************************************************************************/
56
57
/* System Include Files */
58
#include <stdio.h>
59
#include <assert.h>
60
#include <limits.h>
61
62
/* User Include Files */
63
#include "ih264_typedefs.h"
64
#include "iv2.h"
65
#include "ive2.h"
66
#include "ithread.h"
67
68
#include "ih264_debug.h"
69
#include "ih264_macros.h"
70
#include "ih264_defs.h"
71
#include "ih264_mem_fns.h"
72
#include "ih264_padding.h"
73
#include "ih264_structs.h"
74
#include "ih264_trans_quant_itrans_iquant.h"
75
#include "ih264_inter_pred_filters.h"
76
#include "ih264_intra_pred_filters.h"
77
#include "ih264_deblk_edge_filters.h"
78
#include "ih264_cabac_tables.h"
79
#include "ih264_platform_macros.h"
80
81
#include "ime_defs.h"
82
#include "ime_distortion_metrics.h"
83
#include "ime_structs.h"
84
#include "ime.h"
85
#include "ime_statistics.h"
86
87
#include "irc_cntrl_param.h"
88
#include "irc_frame_info_collector.h"
89
90
#include "ih264e_error.h"
91
#include "ih264e_defs.h"
92
#include "ih264e_globals.h"
93
#include "ih264e_rate_control.h"
94
#include "ih264e_bitstream.h"
95
#include "ih264e_cabac_structs.h"
96
#include "ih264e_structs.h"
97
#include "ih264e_mc.h"
98
#include "ih264e_me.h"
99
#include "ih264e_half_pel.h"
100
#include "ih264e_intra_modes_eval.h"
101
#include "ih264e_core_coding.h"
102
#include "ih264e_platform_macros.h"
103
104
105
/*****************************************************************************/
106
/* Function Definitions                                                      */
107
/*****************************************************************************/
108
109
/**
110
*******************************************************************************
111
*
112
* @brief
113
*  This function populates the length of the codewords for motion vectors in the
114
*  range (-search range, search range) in pixels
115
*
116
* @param[in] ps_me
117
*  Pointer to me ctxt
118
*
119
* @param[out] pu1_mv_bits
120
*  length of the codeword for all mv's
121
*
122
* @remarks The length of the code words are derived from signed exponential
123
*  goloumb codes.
124
*
125
*******************************************************************************
126
*/
127
void ih264e_init_mv_bits(me_ctxt_t *ps_me_ctxt)
128
4.78k
{
129
    /* temp var */
130
4.78k
    WORD32 i, codesize = 3, diff, limit;
131
4.78k
    UWORD32 u4_code_num, u4_range;
132
4.78k
    UWORD32 u4_uev_min, u4_uev_max, u4_sev_min, u4_sev_max;
133
134
    /* max srch range */
135
4.78k
    diff = MAX(DEFAULT_MAX_SRCH_RANGE_X, DEFAULT_MAX_SRCH_RANGE_Y);
136
    /* sub pel */
137
4.78k
    diff <<= 2;
138
    /* delta mv */
139
4.78k
    diff <<= 1;
140
141
    /* codeNum for positive integer     =  2x-1     : Table9-3  */
142
4.78k
    u4_code_num = (diff << 1);
143
144
    /* get range of the bit string and put using put_bits()                 */
145
4.78k
    GETRANGE(u4_range, u4_code_num);
146
147
4.78k
    limit = 2*u4_range - 1;
148
149
    /* init mv bits */
150
4.78k
    ps_me_ctxt->pu1_mv_bits[0] = 1;
151
152
57.4k
    while (codesize < limit)
153
52.6k
    {
154
52.6k
        u4_uev_min = (1 << (codesize >> 1));
155
52.6k
        u4_uev_max = 2*u4_uev_min - 1;
156
157
52.6k
        u4_sev_min = u4_uev_min >> 1;
158
52.6k
        u4_sev_max = u4_uev_max >> 1;
159
160
52.6k
        DEBUG("\n%d min, %d max %d codesize", u4_sev_min, u4_sev_max, codesize);
161
162
9.84M
        for (i = u4_sev_min; i <= (WORD32)u4_sev_max; i++)
163
9.79M
        {
164
9.79M
            ps_me_ctxt->pu1_mv_bits[-i] = ps_me_ctxt->pu1_mv_bits[i] = codesize;
165
9.79M
        }
166
167
52.6k
        codesize += 2;
168
52.6k
    }
169
4.78k
}
170
171
/**
172
*******************************************************************************
173
*
174
* @brief Determines the valid candidates for which the initial search shall happen.
175
* The best of these candidates is used to center the diamond pixel search.
176
*
177
* @par Description The function sends the skip, (0,0), left, top and top-right
178
* neighbouring MBs MVs. The left, top and top-right MBs MVs are used because
179
* these are the same MVs that are used to form the MV predictor. This initial MV
180
* search candidates need not take care of slice boundaries and hence neighbor
181
* availability checks are not made here.
182
*
183
* @param[in] ps_proc
184
*  Pointer to process context
185
*
186
* @param[in] ps_me_ctxt
187
*  pointer to me context
188
*
189
* @param[in] i4_ref_list
190
*  Current active reference list
191
*
192
* @returns  The list of MVs to be used of priming the full pel search and the
193
* number of such MVs
194
*
195
* @remarks
196
*   Assumptions : 1. Assumes Only partition of size 16x16
197
*
198
*******************************************************************************
199
*/
200
static void ih264e_get_search_candidates(process_ctxt_t *ps_proc,
201
                                         me_ctxt_t *ps_me_ctxt,
202
                                         WORD32 i4_reflist)
203
289k
{
204
    /* curr mb indices */
205
289k
    WORD32 i4_mb_x = ps_proc->i4_mb_x;
206
207
    /* Motion vector */
208
289k
    mv_t *ps_left_mv, *ps_top_mv, *ps_top_left_mv, *ps_top_right_mv;
209
210
    /* Pred modes */
211
289k
    WORD32 i4_left_mode, i4_top_mode, i4_top_left_mode, i4_top_right_mode;
212
213
    /* mb part info */
214
289k
    mb_part_ctxt *ps_mb_part = &ps_me_ctxt->as_mb_part[i4_reflist];
215
216
    /* mvs */
217
289k
    WORD32 mvx, mvy;
218
219
    /* ngbr availability */
220
289k
    block_neighbors_t *ps_ngbr_avbl = ps_proc->ps_ngbr_avbl;
221
222
    /* Current mode */
223
289k
    WORD32 i4_cmpl_predmode = (i4_reflist == 0) ? PRED_L1 : PRED_L0;
224
225
    /* srch range*/
226
289k
    WORD32 i4_srch_range_n = ps_me_ctxt->i4_srch_range_n;
227
289k
    WORD32 i4_srch_range_s = ps_me_ctxt->i4_srch_range_s;
228
289k
    WORD32 i4_srch_range_e = ps_me_ctxt->i4_srch_range_e;
229
289k
    WORD32 i4_srch_range_w = ps_me_ctxt->i4_srch_range_w;
230
231
    /* num of candidate search candidates */
232
289k
    UWORD32 u4_num_candidates = 0;
233
234
289k
    ps_left_mv = &ps_proc->s_left_mb_pu_ME.s_me_info[i4_reflist].s_mv;
235
289k
    ps_top_mv = &(ps_proc->ps_top_row_pu_ME + i4_mb_x)->s_me_info[i4_reflist].s_mv;
236
289k
    ps_top_left_mv = &ps_proc->s_top_left_mb_pu_ME.s_me_info[i4_reflist].s_mv;
237
289k
    ps_top_right_mv = &(ps_proc->ps_top_row_pu_ME + i4_mb_x + 1)->s_me_info[i4_reflist].s_mv;
238
239
289k
    i4_left_mode = ps_proc->s_left_mb_pu_ME.b2_pred_mode != i4_cmpl_predmode;
240
289k
    i4_top_mode = (ps_proc->ps_top_row_pu_ME + i4_mb_x)->b2_pred_mode != i4_cmpl_predmode;
241
289k
    i4_top_left_mode = ps_proc->s_top_left_mb_pu_ME.b2_pred_mode != i4_cmpl_predmode;
242
289k
    i4_top_right_mode = (ps_proc->ps_top_row_pu_ME + i4_mb_x + 1)->b2_pred_mode != i4_cmpl_predmode;
243
244
    /* Taking the Zero motion vector as one of the candidates   */
245
289k
    ps_me_ctxt->as_mv_init_search[i4_reflist][u4_num_candidates].i2_mvx = 0;
246
289k
    ps_me_ctxt->as_mv_init_search[i4_reflist][u4_num_candidates].i2_mvy = 0;
247
248
289k
    u4_num_candidates++;
249
250
    /* Taking the Left MV Predictor as one of the candidates    */
251
289k
    if (ps_ngbr_avbl->u1_mb_a && i4_left_mode)
252
133k
    {
253
133k
        mvx      = (ps_left_mv->i2_mvx + 2) >> 2;
254
133k
        mvy      = (ps_left_mv->i2_mvy + 2) >> 2;
255
256
133k
        mvx = CLIP3(i4_srch_range_w, i4_srch_range_e, mvx);
257
133k
        mvy = CLIP3(i4_srch_range_n, i4_srch_range_s, mvy);
258
259
133k
        ps_me_ctxt->as_mv_init_search[i4_reflist][u4_num_candidates].i2_mvx = mvx;
260
133k
        ps_me_ctxt->as_mv_init_search[i4_reflist][u4_num_candidates].i2_mvy = mvy;
261
262
133k
        u4_num_candidates ++;
263
133k
    }
264
265
    /* Taking the Top MV Predictor as one of the candidates     */
266
289k
    if (ps_ngbr_avbl->u1_mb_b && i4_top_mode)
267
130k
    {
268
130k
        mvx      = (ps_top_mv->i2_mvx + 2) >> 2;
269
130k
        mvy      = (ps_top_mv->i2_mvy + 2) >> 2;
270
271
130k
        mvx = CLIP3(i4_srch_range_w, i4_srch_range_e, mvx);
272
130k
        mvy = CLIP3(i4_srch_range_n, i4_srch_range_s, mvy);
273
274
130k
        ps_me_ctxt->as_mv_init_search[i4_reflist][u4_num_candidates].i2_mvx = mvx;
275
130k
        ps_me_ctxt->as_mv_init_search[i4_reflist][u4_num_candidates].i2_mvy = mvy;
276
277
130k
        u4_num_candidates ++;
278
279
        /* Taking the TopRt MV Predictor as one of the candidates   */
280
130k
        if (ps_ngbr_avbl->u1_mb_c && i4_top_right_mode)
281
78.9k
        {
282
78.9k
            mvx      = (ps_top_right_mv->i2_mvx + 2) >> 2;
283
78.9k
            mvy      = (ps_top_right_mv->i2_mvy + 2)>> 2;
284
285
78.9k
            mvx = CLIP3(i4_srch_range_w, i4_srch_range_e, mvx);
286
78.9k
            mvy = CLIP3(i4_srch_range_n, i4_srch_range_s, mvy);
287
288
78.9k
            ps_me_ctxt->as_mv_init_search[i4_reflist][u4_num_candidates].i2_mvx = mvx;
289
78.9k
            ps_me_ctxt->as_mv_init_search[i4_reflist][u4_num_candidates].i2_mvy = mvy;
290
291
78.9k
            u4_num_candidates ++;
292
78.9k
        }
293
        /* Taking the TopLt MV Predictor as one of the candidates   */
294
51.7k
        else if(ps_ngbr_avbl->u1_mb_d && i4_top_left_mode)
295
35.3k
        {
296
35.3k
            mvx      = (ps_top_left_mv->i2_mvx + 2) >> 2;
297
35.3k
            mvy      = (ps_top_left_mv->i2_mvy + 2) >> 2;
298
299
35.3k
            mvx = CLIP3(i4_srch_range_w, i4_srch_range_e, mvx);
300
35.3k
            mvy = CLIP3(i4_srch_range_n, i4_srch_range_s, mvy);
301
302
35.3k
            ps_me_ctxt->as_mv_init_search[i4_reflist][u4_num_candidates].i2_mvx = mvx;
303
35.3k
            ps_me_ctxt->as_mv_init_search[i4_reflist][u4_num_candidates].i2_mvy = mvy;
304
305
35.3k
            u4_num_candidates ++;
306
35.3k
        }
307
130k
    }
308
309
    /********************************************************************/
310
    /*                            MV Prediction                         */
311
    /********************************************************************/
312
289k
    ih264e_mv_pred_me(ps_proc, i4_reflist);
313
314
289k
    ps_mb_part->s_mv_pred.i2_mvx = ps_proc->ps_pred_mv[i4_reflist].s_mv.i2_mvx;
315
289k
    ps_mb_part->s_mv_pred.i2_mvy = ps_proc->ps_pred_mv[i4_reflist].s_mv.i2_mvy;
316
317
    /* Get the skip motion vector                               */
318
289k
    {
319
289k
        ps_me_ctxt->i4_skip_type = ps_proc->ps_codec->apf_find_skip_params_me
320
289k
                                    [ps_proc->i4_slice_type](ps_proc, i4_reflist);
321
322
        /* Taking the Skip motion vector as one of the candidates   */
323
289k
        mvx = (ps_proc->ps_skip_mv[i4_reflist].s_mv.i2_mvx + 2) >> 2;
324
289k
        mvy = (ps_proc->ps_skip_mv[i4_reflist].s_mv.i2_mvy + 2) >> 2;
325
326
289k
        mvx = CLIP3(i4_srch_range_w, i4_srch_range_e, mvx);
327
289k
        mvy = CLIP3(i4_srch_range_n, i4_srch_range_s, mvy);
328
329
289k
        ps_me_ctxt->as_mv_init_search[i4_reflist][u4_num_candidates].i2_mvx = mvx;
330
289k
        ps_me_ctxt->as_mv_init_search[i4_reflist][u4_num_candidates].i2_mvy = mvy;
331
289k
        u4_num_candidates++;
332
333
289k
        if (ps_proc->i4_slice_type == BSLICE)
334
184k
        {
335
            /* Taking the temporal Skip motion vector as one of the candidates   */
336
184k
            mvx = (ps_proc->ps_skip_mv[i4_reflist + 2].s_mv.i2_mvx + 2) >> 2;
337
184k
            mvy = (ps_proc->ps_skip_mv[i4_reflist + 2].s_mv.i2_mvy + 2) >> 2;
338
339
184k
            mvx = CLIP3(i4_srch_range_w, i4_srch_range_e, mvx);
340
184k
            mvy = CLIP3(i4_srch_range_n, i4_srch_range_s, mvy);
341
342
184k
            ps_me_ctxt->as_mv_init_search[i4_reflist][u4_num_candidates].i2_mvx = mvx;
343
184k
            ps_me_ctxt->as_mv_init_search[i4_reflist][u4_num_candidates].i2_mvy = mvy;
344
184k
            u4_num_candidates++;
345
184k
        }
346
289k
    }
347
348
289k
    ASSERT(u4_num_candidates <= 6);
349
350
289k
    ps_me_ctxt->u4_num_candidates[i4_reflist] = u4_num_candidates;
351
289k
}
352
353
/**
354
*******************************************************************************
355
*
356
* @brief The function computes parameters for a PSKIP MB
357
*
358
* @par Description:
359
*  The function updates the skip motion vector and checks if the current
360
*  MB can be a PSKIP MB or not
361
*
362
* @param[in] ps_proc
363
*  Pointer to process context
364
*
365
* @param[in] i4_ref_list
366
*  Current active reference list
367
*
368
* @returns Flag indicating if the current MB can be marked as skip
369
*
370
*******************************************************************************
371
*/
372
WORD32 ih264e_find_pskip_params(process_ctxt_t *ps_proc, WORD32 i4_reflist)
373
14.5k
{
374
    /* left mb motion vector */
375
14.5k
    enc_pu_t *ps_left_mb_pu ;
376
377
    /* top mb motion vector */
378
14.5k
    enc_pu_t *ps_top_mb_pu ;
379
380
    /* Skip mv */
381
14.5k
    mv_t *ps_skip_mv = &ps_proc->ps_skip_mv[PRED_L0].s_mv;
382
383
14.5k
    UNUSED(i4_reflist);
384
385
14.5k
    ps_left_mb_pu = &ps_proc->s_left_mb_pu;
386
14.5k
    ps_top_mb_pu = ps_proc->ps_top_row_pu + ps_proc->i4_mb_x;
387
388
14.5k
    if ((!ps_proc->ps_ngbr_avbl->u1_mb_a) ||
389
14.5k
        (!ps_proc->ps_ngbr_avbl->u1_mb_b) ||
390
14.5k
        (
391
6.14k
          (ps_left_mb_pu->s_me_info[PRED_L0].i1_ref_idx == -1) &&
392
6.14k
          (ps_left_mb_pu->s_me_info[PRED_L0].s_mv.i2_mvx == 0) &&
393
6.14k
          (ps_left_mb_pu->s_me_info[PRED_L0].s_mv.i2_mvy == 0)
394
6.14k
       ) ||
395
14.5k
       (
396
2.65k
          (ps_top_mb_pu->s_me_info[PRED_L0].i1_ref_idx == -1) &&
397
2.65k
          (ps_top_mb_pu->s_me_info[PRED_L0].s_mv.i2_mvx == 0) &&
398
2.65k
          (ps_top_mb_pu->s_me_info[PRED_L0].s_mv.i2_mvy == 0)
399
2.65k
       )
400
14.5k
     )
401
12.3k
    {
402
12.3k
        ps_skip_mv->i2_mvx = 0;
403
12.3k
        ps_skip_mv->i2_mvy = 0;
404
12.3k
    }
405
2.19k
    else
406
2.19k
    {
407
2.19k
        ps_skip_mv->i2_mvx = ps_proc->ps_pred_mv[PRED_L0].s_mv.i2_mvx;
408
2.19k
        ps_skip_mv->i2_mvy = ps_proc->ps_pred_mv[PRED_L0].s_mv.i2_mvy;
409
2.19k
    }
410
411
14.5k
    if ((ps_proc->ps_pu->s_me_info[PRED_L0].s_mv.i2_mvx == ps_skip_mv->i2_mvx)
412
14.5k
     && (ps_proc->ps_pu->s_me_info[PRED_L0].s_mv.i2_mvy == ps_skip_mv->i2_mvy))
413
10.7k
    {
414
10.7k
        return 1;
415
10.7k
    }
416
417
3.81k
    return 0;
418
14.5k
}
419
420
/**
421
*******************************************************************************
422
*
423
* @brief The function computes parameters for a PSKIP MB
424
*
425
* @par Description:
426
*  The function updates the skip motion vector and checks if the current
427
*  MB can be a PSKIP MB or not
428
*
429
* @param[in] ps_proc
430
*  Pointer to process context
431
*
432
* @param[in] i4_ref_list
433
*  Current active reference list
434
*
435
* @returns Flag indicating if the current MB can be marked as skip
436
*
437
*******************************************************************************
438
*/
439
WORD32 ih264e_find_pskip_params_me(process_ctxt_t *ps_proc, WORD32 i4_reflist)
440
104k
{
441
    /* left mb motion vector */
442
104k
    enc_pu_t *ps_left_mb_pu ;
443
444
    /* top mb motion vector */
445
104k
    enc_pu_t *ps_top_mb_pu ;
446
447
    /* Skip mv */
448
104k
    mv_t *ps_skip_mv = &ps_proc->ps_skip_mv[PRED_L0].s_mv;
449
450
104k
    UNUSED(i4_reflist);
451
452
104k
    ps_left_mb_pu = &ps_proc->s_left_mb_pu_ME;
453
104k
    ps_top_mb_pu = ps_proc->ps_top_row_pu_ME + ps_proc->i4_mb_x;
454
455
104k
    if ((!ps_proc->ps_ngbr_avbl->u1_mb_a) ||
456
104k
        (!ps_proc->ps_ngbr_avbl->u1_mb_b) ||
457
104k
        (
458
43.4k
          (ps_left_mb_pu->s_me_info[PRED_L0].i1_ref_idx == -1) &&
459
43.4k
          (ps_left_mb_pu->s_me_info[PRED_L0].s_mv.i2_mvx == 0) &&
460
43.4k
          (ps_left_mb_pu->s_me_info[PRED_L0].s_mv.i2_mvy == 0)
461
43.4k
        ) ||
462
104k
        (
463
34.0k
          (ps_top_mb_pu->s_me_info[PRED_L0].i1_ref_idx == -1) &&
464
34.0k
          (ps_top_mb_pu->s_me_info[PRED_L0].s_mv.i2_mvx == 0) &&
465
34.0k
          (ps_top_mb_pu->s_me_info[PRED_L0].s_mv.i2_mvy == 0)
466
34.0k
        )
467
104k
     )
468
73.8k
    {
469
73.8k
        ps_skip_mv->i2_mvx = 0;
470
73.8k
        ps_skip_mv->i2_mvy = 0;
471
73.8k
    }
472
31.1k
    else
473
31.1k
    {
474
31.1k
        ps_skip_mv->i2_mvx = ps_proc->ps_pred_mv[PRED_L0].s_mv.i2_mvx;
475
31.1k
        ps_skip_mv->i2_mvy = ps_proc->ps_pred_mv[PRED_L0].s_mv.i2_mvy;
476
31.1k
    }
477
478
104k
    return PRED_L0;
479
104k
}
480
481
/**
482
*******************************************************************************
483
*
484
* @brief motion vector predictor
485
*
486
* @par Description:
487
*  The routine calculates the motion vector predictor for a given block,
488
*  given the candidate MV predictors.
489
*
490
* @param[in] ps_left_mb_pu
491
*  pointer to left mb motion vector info
492
*
493
* @param[in] ps_top_row_pu
494
*  pointer to top & top right mb motion vector info
495
*
496
* @param[out] ps_pred_mv
497
*  pointer to candidate predictors for the current block
498
*
499
* @param[in] i4_ref_list
500
*  Current active reference list
501
*
502
* @returns  The x & y components of the MV predictor.
503
*
504
* @remarks The code implements the logic as described in sec 8.4.1.3 in H264
505
*   specification.
506
*   Assumptions : 1. Assumes Single reference frame
507
*                 2. Assumes Only partition of size 16x16
508
*
509
*******************************************************************************
510
*/
511
void ih264e_get_mv_predictor(enc_pu_t *ps_left_mb_pu,
512
                             enc_pu_t *ps_top_row_pu,
513
                             enc_pu_mv_t *ps_pred_mv,
514
                             WORD32 i4_ref_list)
515
580k
{
516
    /* Indicated the current ref */
517
580k
    WORD8 i1_ref_idx;
518
519
    /* For pred L0 */
520
580k
    i1_ref_idx = -1;
521
580k
    {
522
        /* temp var */
523
580k
        WORD32 pred_algo = 3, a, b, c;
524
525
        /* If only one of the candidate blocks has a reference frame equal to
526
         * the current block then use the same block as the final predictor */
527
580k
        a = (ps_left_mb_pu->s_me_info[i4_ref_list].i1_ref_idx == i1_ref_idx) ? 0 : -1;
528
580k
        b = (ps_top_row_pu[0].s_me_info[i4_ref_list].i1_ref_idx == i1_ref_idx) ? 0 : -1;
529
580k
        c = (ps_top_row_pu[1].s_me_info[i4_ref_list].i1_ref_idx == i1_ref_idx) ? 0 : -1;
530
531
580k
        if (a == 0 && b == -1 && c == -1)
532
100k
            pred_algo = 0; /* LEFT */
533
480k
        else if(a == -1 && b == 0 && c == -1)
534
32.1k
            pred_algo = 1; /* TOP */
535
448k
        else if(a == -1 && b == -1 && c == 0)
536
12.1k
            pred_algo = 2; /* TOP RIGHT */
537
538
580k
        switch (pred_algo)
539
580k
        {
540
100k
            case 0:
541
                /* left */
542
100k
                ps_pred_mv->s_mv.i2_mvx = ps_left_mb_pu->s_me_info[i4_ref_list].s_mv.i2_mvx;
543
100k
                ps_pred_mv->s_mv.i2_mvy = ps_left_mb_pu->s_me_info[i4_ref_list].s_mv.i2_mvy;
544
100k
                break;
545
32.1k
            case 1:
546
                /* top */
547
32.1k
                ps_pred_mv->s_mv.i2_mvx = ps_top_row_pu[0].s_me_info[i4_ref_list].s_mv.i2_mvx;
548
32.1k
                ps_pred_mv->s_mv.i2_mvy = ps_top_row_pu[0].s_me_info[i4_ref_list].s_mv.i2_mvy;
549
32.1k
                break;
550
12.1k
            case 2:
551
                /* top right */
552
12.1k
                ps_pred_mv->s_mv.i2_mvx = ps_top_row_pu[1].s_me_info[i4_ref_list].s_mv.i2_mvx;
553
12.1k
                ps_pred_mv->s_mv.i2_mvy = ps_top_row_pu[1].s_me_info[i4_ref_list].s_mv.i2_mvy;
554
12.1k
                break;
555
438k
            case 3:
556
                /* median */
557
438k
                MEDIAN(ps_left_mb_pu->s_me_info[i4_ref_list].s_mv.i2_mvx,
558
438k
                       ps_top_row_pu[0].s_me_info[i4_ref_list].s_mv.i2_mvx,
559
438k
                       ps_top_row_pu[1].s_me_info[i4_ref_list].s_mv.i2_mvx,
560
438k
                       ps_pred_mv->s_mv.i2_mvx);
561
438k
                MEDIAN(ps_left_mb_pu->s_me_info[i4_ref_list].s_mv.i2_mvy,
562
438k
                       ps_top_row_pu[0].s_me_info[i4_ref_list].s_mv.i2_mvy,
563
438k
                       ps_top_row_pu[1].s_me_info[i4_ref_list].s_mv.i2_mvy,
564
438k
                       ps_pred_mv->s_mv.i2_mvy);
565
566
438k
                break;
567
0
            default:
568
0
                break;
569
580k
        }
570
580k
    }
571
580k
}
572
573
/**
574
*******************************************************************************
575
*
576
* @brief This function performs MV prediction
577
*
578
* @par Description:
579
*
580
* @param[in] ps_proc
581
*  Process context corresponding to the job
582
*
583
* @param[in] i4_slice_type
584
*  slice type
585
*
586
* @returns  none
587
*
588
* @remarks none
589
*  This function will update the MB availability since intra inter decision
590
*  should be done before the call
591
*
592
*******************************************************************************
593
*/
594
void ih264e_mv_pred(process_ctxt_t *ps_proc, WORD32 i4_slice_type)
595
200k
{
596
    /* left mb motion vector */
597
200k
    enc_pu_t *ps_left_mb_pu;
598
599
    /* top left mb motion vector */
600
200k
    enc_pu_t *ps_top_left_mb_pu;
601
602
    /* top row motion vector info */
603
200k
    enc_pu_t *ps_top_row_pu;
604
605
    /* predicted motion vector */
606
200k
    enc_pu_mv_t *ps_pred_mv = ps_proc->ps_pred_mv;
607
608
    /* zero mv */
609
200k
    mv_t zero_mv = { 0, 0 };
610
611
    /*  mb neighbor availability */
612
200k
    block_neighbors_t *ps_ngbr_avbl = ps_proc->ps_ngbr_avbl;
613
614
    /* mb syntax elements of neighbors */
615
200k
    mb_info_t *ps_top_syn = ps_proc->ps_top_row_mb_syntax_ele + ps_proc->i4_mb_x;
616
200k
    mb_info_t *ps_top_left_syn;
617
200k
    UWORD32 u4_left_is_intra;
618
619
    /* Temp var */
620
200k
    WORD32 i4_reflist, max_reflist, i4_cmpl_predmode;
621
622
200k
    ps_top_left_syn = &(ps_proc->s_top_left_mb_syntax_ele);
623
200k
    u4_left_is_intra = ps_proc->s_left_mb_syntax_ele.u2_is_intra;
624
200k
    ps_left_mb_pu = &ps_proc->s_left_mb_pu;
625
200k
    ps_top_left_mb_pu = &ps_proc->s_top_left_mb_pu;
626
200k
    ps_top_row_pu = (ps_proc->ps_top_row_pu + ps_proc->i4_mb_x);
627
628
    /* Number of ref lists to process */
629
200k
    max_reflist = (i4_slice_type == PSLICE) ? 1 : 2;
630
631
495k
    for (i4_reflist = 0; i4_reflist < max_reflist; i4_reflist++)
632
295k
    {
633
295k
        i4_cmpl_predmode = (i4_reflist == 0) ? PRED_L1 : PRED_L0;
634
635
        /* Before performing mv prediction prepare the ngbr information and
636
         * reset motion vectors basing on their availability */
637
295k
        if (!ps_ngbr_avbl->u1_mb_a || (u4_left_is_intra == 1)
638
295k
                        || (ps_left_mb_pu->b2_pred_mode == i4_cmpl_predmode))
639
215k
        {
640
            /* left mv */
641
215k
            ps_left_mb_pu->s_me_info[i4_reflist].i1_ref_idx = 0;
642
215k
            ps_left_mb_pu->s_me_info[i4_reflist].s_mv = zero_mv;
643
215k
        }
644
295k
        if (!ps_ngbr_avbl->u1_mb_b || ps_top_syn->u2_is_intra
645
295k
                        || (ps_top_row_pu[0].b2_pred_mode == i4_cmpl_predmode))
646
218k
        {
647
            /* top mv */
648
218k
            ps_top_row_pu[0].s_me_info[i4_reflist].i1_ref_idx = 0;
649
218k
            ps_top_row_pu[0].s_me_info[i4_reflist].s_mv = zero_mv;
650
218k
        }
651
652
295k
        if (!ps_ngbr_avbl->u1_mb_c)
653
182k
        {
654
            /* top right mv - When top right partition is not available for
655
             * prediction if top left is available use it for prediction else
656
             * set the mv information to -1 and (0, 0)
657
             * */
658
182k
            if (!ps_ngbr_avbl->u1_mb_d || ps_top_left_syn->u2_is_intra
659
182k
                            || (ps_top_left_mb_pu->b2_pred_mode == i4_cmpl_predmode))
660
157k
            {
661
157k
                ps_top_row_pu[1].s_me_info[i4_reflist].i1_ref_idx = 0;
662
157k
                ps_top_row_pu[1].s_me_info[i4_reflist].s_mv = zero_mv;
663
157k
            }
664
25.5k
            else
665
25.5k
            {
666
25.5k
                ps_top_row_pu[1].s_me_info[i4_reflist].i1_ref_idx = ps_top_left_mb_pu->s_me_info[i4_reflist].i1_ref_idx;
667
25.5k
                ps_top_row_pu[1].s_me_info[i4_reflist].s_mv = ps_top_left_mb_pu->s_me_info[i4_reflist].s_mv;
668
25.5k
            }
669
182k
        }
670
112k
        else if(ps_top_syn[1].u2_is_intra
671
112k
                        || (ps_top_row_pu[1].b2_pred_mode == i4_cmpl_predmode))
672
68.9k
        {
673
68.9k
            ps_top_row_pu[1].s_me_info[i4_reflist].i1_ref_idx = 0;
674
68.9k
            ps_top_row_pu[1].s_me_info[i4_reflist].s_mv = zero_mv;
675
68.9k
        }
676
677
295k
        ih264e_get_mv_predictor(ps_left_mb_pu, ps_top_row_pu, &ps_pred_mv[i4_reflist], i4_reflist);
678
295k
    }
679
200k
}
680
681
/**
682
*******************************************************************************
683
*
684
* @brief This function approximates Pred. MV
685
*
686
* @par Description:
687
*
688
* @param[in] ps_proc
689
*  Process context corresponding to the job
690
*
691
* @param[in] i4_ref_list
692
*  Current active reference list
693
*
694
* @returns  none
695
*
696
* @remarks none
697
*  Motion estimation happens at nmb level. For cost calculations, mv is appro
698
*  ximated using this function
699
*
700
*******************************************************************************
701
*/
702
void ih264e_mv_pred_me(process_ctxt_t *ps_proc, WORD32 i4_ref_list)
703
289k
{
704
    /* left mb motion vector */
705
289k
    enc_pu_t *ps_left_mb_pu ;
706
707
    /* top left mb motion vector */
708
289k
    enc_pu_t *ps_top_left_mb_pu ;
709
710
    /* top row motion vector info */
711
289k
    enc_pu_t *ps_top_row_pu;
712
713
289k
    enc_pu_t s_top_row_pu[2];
714
715
    /* predicted motion vector */
716
289k
    enc_pu_mv_t *ps_pred_mv = ps_proc->ps_pred_mv;
717
718
    /* zero mv */
719
289k
    mv_t zero_mv = {0, 0};
720
721
    /* Complementary pred mode */
722
289k
    WORD32 i4_cmpl_predmode = (i4_ref_list == 0) ? PRED_L1 : PRED_L0;
723
724
    /*  mb neighbor availability */
725
289k
    block_neighbors_t *ps_ngbr_avbl = ps_proc->ps_ngbr_avbl;
726
727
289k
    ps_left_mb_pu = &ps_proc->s_left_mb_pu_ME;
728
289k
    ps_top_left_mb_pu = &ps_proc->s_top_left_mb_pu_ME;
729
289k
    ps_top_row_pu = (ps_proc->ps_top_row_pu_ME + ps_proc->i4_mb_x);
730
731
289k
    s_top_row_pu[0] = ps_top_row_pu[0];
732
289k
    s_top_row_pu[1] = ps_top_row_pu[1];
733
734
    /*
735
     * Before performing mv prediction prepare the ngbr information and
736
     * reset motion vectors basing on their availability
737
     */
738
289k
    if (!ps_ngbr_avbl->u1_mb_a || (ps_left_mb_pu->b2_pred_mode == i4_cmpl_predmode))
739
157k
    {
740
        /* left mv */
741
157k
        ps_left_mb_pu->s_me_info[i4_ref_list].i1_ref_idx = 0;
742
157k
        ps_left_mb_pu->s_me_info[i4_ref_list].s_mv = zero_mv;
743
157k
    }
744
289k
    if (!ps_ngbr_avbl->u1_mb_b || (s_top_row_pu[0].b2_pred_mode == i4_cmpl_predmode))
745
159k
    {
746
        /* top mv */
747
159k
        s_top_row_pu[0].s_me_info[i4_ref_list].i1_ref_idx = 0;
748
159k
        s_top_row_pu[0].s_me_info[i4_ref_list].s_mv = zero_mv;
749
750
159k
    }
751
289k
    if (!ps_ngbr_avbl->u1_mb_c)
752
179k
    {
753
        /* top right mv - When top right partition is not available for
754
         * prediction if top left is available use it for prediction else
755
         * set the mv information to -1 and (0, 0)
756
         * */
757
179k
        if (!ps_ngbr_avbl->u1_mb_d || (ps_top_left_mb_pu->b2_pred_mode == i4_cmpl_predmode))
758
141k
        {
759
141k
            s_top_row_pu[1].s_me_info[i4_ref_list].i1_ref_idx = 0;
760
141k
            s_top_row_pu[1].s_me_info[i4_ref_list].s_mv = zero_mv;
761
762
141k
            s_top_row_pu[1].s_me_info[i4_ref_list].i1_ref_idx = 0;
763
141k
            s_top_row_pu[1].s_me_info[i4_ref_list].s_mv = zero_mv;
764
141k
        }
765
37.3k
        else
766
37.3k
        {
767
37.3k
            s_top_row_pu[1].s_me_info[i4_ref_list].i1_ref_idx = ps_top_left_mb_pu->s_me_info[0].i1_ref_idx;
768
37.3k
            s_top_row_pu[1].s_me_info[i4_ref_list].s_mv = ps_top_left_mb_pu->s_me_info[0].s_mv;
769
37.3k
        }
770
179k
    }
771
110k
    else if (ps_top_row_pu[1].b2_pred_mode == i4_cmpl_predmode)
772
27.4k
    {
773
27.4k
        ps_top_row_pu[1].s_me_info[i4_ref_list].i1_ref_idx = 0;
774
27.4k
        ps_top_row_pu[1].s_me_info[i4_ref_list].s_mv = zero_mv;
775
27.4k
    }
776
777
289k
    ih264e_get_mv_predictor(ps_left_mb_pu, &(s_top_row_pu[0]),
778
289k
                            &ps_pred_mv[i4_ref_list], i4_ref_list);
779
289k
}
780
781
/**
782
*******************************************************************************
783
*
784
* @brief This function initializes me ctxt
785
*
786
* @par Description:
787
*  Before dispatching the current job to me thread, the me context associated
788
*  with the job is initialized.
789
*
790
* @param[in] ps_proc
791
*  Process context corresponding to the job
792
*
793
* @returns  none
794
*
795
* @remarks none
796
*
797
*******************************************************************************
798
*/
799
void ih264e_init_me(process_ctxt_t *ps_proc)
800
200k
{
801
    /* me ctxt */
802
200k
    me_ctxt_t *ps_me_ctxt = &ps_proc->s_me_ctxt;
803
804
    /* codec context */
805
200k
    codec_t *ps_codec = ps_proc->ps_codec;
806
807
200k
    ps_me_ctxt->i4_skip_bias[BSLICE] = SKIP_BIAS_B;
808
809
200k
    if (ps_codec->s_cfg.u4_num_bframes == 0)
810
65.3k
    {
811
65.3k
       ps_me_ctxt->i4_skip_bias[PSLICE] = 4 * SKIP_BIAS_P;
812
65.3k
    }
813
135k
    else
814
135k
    {
815
135k
       ps_me_ctxt->i4_skip_bias[PSLICE] =  SKIP_BIAS_P;
816
135k
    }
817
818
    /* src ptr */
819
200k
    ps_me_ctxt->pu1_src_buf_luma = ps_proc->pu1_src_buf_luma;
820
821
    /* src stride */
822
200k
    ps_me_ctxt->i4_src_strd = ps_proc->i4_src_strd;
823
824
    /* ref ptrs and corresponding lagrange params */
825
200k
    ps_me_ctxt->apu1_ref_buf_luma[0] = ps_proc->apu1_ref_buf_luma[0];
826
200k
    ps_me_ctxt->apu1_ref_buf_luma[1] = ps_proc->apu1_ref_buf_luma[1];
827
828
200k
    if (ps_codec->pic_type == PIC_B)
829
95.1k
    {
830
95.1k
        ps_me_ctxt->u4_lambda_motion = gu1_qp_lambdaB[ps_me_ctxt->u1_mb_qp];
831
95.1k
    }
832
105k
    else
833
105k
    {
834
105k
        ps_me_ctxt->u4_lambda_motion = gu1_qp_lambdaIP[ps_me_ctxt->u1_mb_qp];
835
105k
    }
836
200k
}
837
838
839
/**
840
*******************************************************************************
841
*
842
* @brief This function performs motion estimation for the current mb using
843
*   single reference list
844
*
845
* @par Description:
846
*  The current mb is compared with a list of mb's in the reference frame for
847
*  least cost. The mb that offers least cost is chosen as predicted mb and the
848
*  displacement of the predicted mb from index location of the current mb is
849
*  signaled as mv. The list of the mb's that are chosen in the reference frame
850
*  are dependent on the speed of the ME configured.
851
*
852
* @param[in] ps_proc
853
*  Process context corresponding to the job
854
*
855
* @returns  motion vector of the pred mb, sad, cost.
856
*
857
* @remarks none
858
*
859
*******************************************************************************
860
*/
861
void ih264e_compute_me_single_reflist(process_ctxt_t *ps_proc)
862
105k
{
863
    /* me ctxt */
864
105k
    me_ctxt_t *ps_me_ctxt = &ps_proc->s_me_ctxt;
865
866
    /* codec context */
867
105k
    codec_t *ps_codec = ps_proc->ps_codec;
868
869
    /* recon stride */
870
105k
    WORD32 i4_rec_strd = ps_proc->i4_rec_strd;
871
872
    /* source buffer for halp pel generation functions */
873
105k
    UWORD8 *pu1_hpel_src;
874
875
    /* quantization parameters */
876
105k
    quant_params_t *ps_qp_params = ps_proc->ps_qp_params[0];
877
878
    /* Mb part ctxts for SKIP */
879
105k
    mb_part_ctxt s_skip_mbpart;
880
881
    /* Sad therholds */
882
105k
    ps_me_ctxt->pu2_sad_thrsh = ps_qp_params->pu2_sad_thrsh;
883
884
105k
    {
885
105k
        WORD32 rows_above, rows_below, columns_left, columns_right;
886
887
        /* During evaluation for motion vectors do not search through padded regions */
888
        /* Obtain number of rows and columns that are effective for computing for me evaluation */
889
105k
        rows_above = MB_SIZE + ps_proc->i4_mb_y * MB_SIZE;
890
105k
        rows_below = (ps_proc->i4_ht_mbs - ps_proc->i4_mb_y) * MB_SIZE;
891
105k
        columns_left = MB_SIZE + ps_proc->i4_mb_x * MB_SIZE;
892
105k
        columns_right = (ps_proc->i4_wd_mbs - ps_proc->i4_mb_x) * MB_SIZE;
893
894
        /* init srch range */
895
        /* NOTE : For now, lets limit the search range by DEFAULT_MAX_SRCH_RANGE_X / 2
896
         * on all sides.
897
         */
898
105k
        ps_me_ctxt->i4_srch_range_w = -MIN(columns_left, DEFAULT_MAX_SRCH_RANGE_X >> 1);
899
105k
        ps_me_ctxt->i4_srch_range_e = MIN(columns_right, DEFAULT_MAX_SRCH_RANGE_X >> 1);
900
105k
        ps_me_ctxt->i4_srch_range_n = -MIN(rows_above, DEFAULT_MAX_SRCH_RANGE_Y >> 1);
901
105k
        ps_me_ctxt->i4_srch_range_s = MIN(rows_below, DEFAULT_MAX_SRCH_RANGE_Y >> 1);
902
903
        /* this is to facilitate fast sub pel computation with minimal loads */
904
105k
        ps_me_ctxt->i4_srch_range_w += 1;
905
105k
        ps_me_ctxt->i4_srch_range_e -= 1;
906
105k
        ps_me_ctxt->i4_srch_range_n += 1;
907
105k
        ps_me_ctxt->i4_srch_range_s -= 1;
908
105k
    }
909
910
    /* Compute ME and store the MVs */
911
912
    /***********************************************************************
913
     * Compute ME for list L0
914
     ***********************************************************************/
915
916
    /* Init SATQD for the current list */
917
105k
    ps_me_ctxt->u4_min_sad_reached  = 0;
918
105k
    ps_me_ctxt->i4_min_sad = ps_proc->ps_cur_mb->u4_min_sad;
919
920
    /* Get the seed motion vector candidates                    */
921
105k
    ih264e_get_search_candidates(ps_proc, ps_me_ctxt, PRED_L0);
922
923
    /*****************************************************************
924
     * Evaluate the SKIP for current list
925
     *****************************************************************/
926
105k
    s_skip_mbpart.s_mv_curr.i2_mvx = 0;
927
105k
    s_skip_mbpart.s_mv_curr.i2_mvy = 0;
928
105k
    s_skip_mbpart.i4_mb_cost = INT_MAX;
929
105k
    s_skip_mbpart.i4_mb_distortion = INT_MAX;
930
931
105k
    ime_compute_skip_cost( ps_me_ctxt,
932
105k
                           (ime_mv_t *)(&ps_proc->ps_skip_mv[PRED_L0].s_mv),
933
105k
                           &s_skip_mbpart,
934
105k
                           ps_proc->ps_codec->s_cfg.u4_enable_satqd,
935
105k
                           PRED_L0,
936
105k
                           0 /* Not a Bslice */ );
937
938
105k
    s_skip_mbpart.s_mv_curr.i2_mvx <<= 2;
939
105k
    s_skip_mbpart.s_mv_curr.i2_mvy <<= 2;
940
941
    /******************************************************************
942
     * Evaluate ME For current list
943
     *****************************************************************/
944
105k
    ps_me_ctxt->as_mb_part[PRED_L0].s_mv_curr.i2_mvx = 0;
945
105k
    ps_me_ctxt->as_mb_part[PRED_L0].s_mv_curr.i2_mvy = 0;
946
105k
    ps_me_ctxt->as_mb_part[PRED_L0].i4_mb_cost = INT_MAX;
947
105k
    ps_me_ctxt->as_mb_part[PRED_L0].i4_mb_distortion = INT_MAX;
948
949
    /* Init Hpel */
950
105k
    ps_me_ctxt->as_mb_part[PRED_L0].pu1_best_hpel_buf = NULL;
951
952
    /* In case we found out the minimum SAD, exit the ME eval */
953
105k
    if (!ps_me_ctxt->u4_min_sad_reached)
954
91.6k
    {
955
        /* Evaluate search candidates for initial mv pt */
956
91.6k
        ime_evaluate_init_srchposn_16x16(ps_me_ctxt, PRED_L0);
957
958
        /********************************************************************/
959
        /*                  full pel motion estimation                      */
960
        /********************************************************************/
961
91.6k
        ime_full_pel_motion_estimation_16x16(ps_me_ctxt, PRED_L0);
962
963
        /* Scale the MV to qpel resolution */
964
91.6k
        ps_me_ctxt->as_mb_part[PRED_L0].s_mv_curr.i2_mvx <<= 2;
965
91.6k
        ps_me_ctxt->as_mb_part[PRED_L0].s_mv_curr.i2_mvy <<= 2;
966
967
91.6k
        if (ps_me_ctxt->u4_enable_hpel)
968
54.7k
        {
969
            /* moving src pointer to the converged motion vector location*/
970
54.7k
            pu1_hpel_src = ps_me_ctxt->apu1_ref_buf_luma[PRED_L0]
971
54.7k
                           + (ps_me_ctxt->as_mb_part[PRED_L0].s_mv_curr.i2_mvx >> 2)
972
54.7k
                           + (ps_me_ctxt->as_mb_part[PRED_L0].s_mv_curr.i2_mvy >> 2) * i4_rec_strd;
973
974
54.7k
            ps_me_ctxt->apu1_subpel_buffs[0] = ps_proc->apu1_subpel_buffs[0];
975
54.7k
            ps_me_ctxt->apu1_subpel_buffs[1] = ps_proc->apu1_subpel_buffs[1];
976
54.7k
            ps_me_ctxt->apu1_subpel_buffs[2] = ps_proc->apu1_subpel_buffs[2];
977
978
54.7k
            ps_me_ctxt->u4_subpel_buf_strd = HP_BUFF_WD;
979
980
            /* half  pel search is done for both sides of full pel,
981
             * hence half_x of width x height = 17x16 is created
982
             * starting from left half_x of converged full pel */
983
54.7k
            pu1_hpel_src -= 1;
984
985
            /* computing half_x */
986
54.7k
            ps_codec->pf_ih264e_sixtapfilter_horz(pu1_hpel_src,
987
54.7k
                                                  ps_me_ctxt->apu1_subpel_buffs[0],
988
54.7k
                                                  i4_rec_strd,
989
54.7k
                                                  ps_me_ctxt->u4_subpel_buf_strd);
990
991
            /*
992
             * Halfpel search is done for both sides of full pel,
993
             * hence half_y of width x height = 16x17 is created
994
             * starting from top half_y of converged full pel
995
             * for half_xy top_left is required
996
             * hence it starts from pu1_hpel_src = full_pel_converged_point - i4_rec_strd - 1
997
             */
998
54.7k
            pu1_hpel_src -= i4_rec_strd;
999
1000
            /* computing half_y , and half_xy*/
1001
54.7k
            ps_codec->pf_ih264e_sixtap_filter_2dvh_vert(
1002
54.7k
                            pu1_hpel_src, ps_me_ctxt->apu1_subpel_buffs[1],
1003
54.7k
                            ps_me_ctxt->apu1_subpel_buffs[2], i4_rec_strd,
1004
54.7k
                            ps_me_ctxt->u4_subpel_buf_strd, ps_proc->ai16_pred1 + 3,
1005
54.7k
                            ps_me_ctxt->u4_subpel_buf_strd);
1006
1007
54.7k
            ime_sub_pel_motion_estimation_16x16(ps_me_ctxt, PRED_L0);
1008
54.7k
        }
1009
91.6k
    }
1010
1011
1012
    /***********************************************************************
1013
     * If a particular skiip Mv is giving better sad, copy to the corresponding
1014
     * MBPART
1015
     * In B slices this loop should go only to PREDL1: If we found min sad
1016
     * we will go to the skip ref list only
1017
     * Have to find a way to make it without too much change or new vars
1018
     **********************************************************************/
1019
105k
    if (s_skip_mbpart.i4_mb_cost < ps_me_ctxt->as_mb_part[PRED_L0].i4_mb_cost)
1020
27.4k
    {
1021
27.4k
        ps_me_ctxt->as_mb_part[PRED_L0].i4_mb_cost = s_skip_mbpart.i4_mb_cost;
1022
27.4k
        ps_me_ctxt->as_mb_part[PRED_L0].i4_mb_distortion = s_skip_mbpart.i4_mb_distortion;
1023
27.4k
        ps_me_ctxt->as_mb_part[PRED_L0].s_mv_curr = s_skip_mbpart.s_mv_curr;
1024
27.4k
    }
1025
77.8k
    else if (ps_me_ctxt->as_mb_part[PRED_L0].pu1_best_hpel_buf)
1026
21.8k
    {
1027
        /* Now we have to copy the buffers */
1028
21.8k
        ps_codec->pf_inter_pred_luma_copy(
1029
21.8k
                        ps_me_ctxt->as_mb_part[PRED_L0].pu1_best_hpel_buf,
1030
21.8k
                        ps_proc->pu1_best_subpel_buf,
1031
21.8k
                        ps_me_ctxt->u4_subpel_buf_strd,
1032
21.8k
                        ps_proc->u4_bst_spel_buf_strd, MB_SIZE, MB_SIZE,
1033
21.8k
                        NULL, 0);
1034
21.8k
    }
1035
1036
    /**********************************************************************
1037
     * Now get the minimum of MB part sads by searching over all ref lists
1038
     **********************************************************************/
1039
105k
    ps_proc->ps_pu->s_me_info[PRED_L0].s_mv.i2_mvx = ps_me_ctxt->as_mb_part[PRED_L0].s_mv_curr.i2_mvx;
1040
105k
    ps_proc->ps_pu->s_me_info[PRED_L0].s_mv.i2_mvy = ps_me_ctxt->as_mb_part[PRED_L0].s_mv_curr.i2_mvy;
1041
105k
    ps_proc->ps_cur_mb->i4_mb_cost = ps_me_ctxt->as_mb_part[PRED_L0].i4_mb_cost;
1042
105k
    ps_proc->ps_cur_mb->i4_mb_distortion = ps_me_ctxt->as_mb_part[PRED_L0].i4_mb_distortion;
1043
105k
    ps_proc->ps_cur_mb->u4_mb_type = P16x16;
1044
105k
    ps_proc->ps_pu->b2_pred_mode = PRED_L0 ;
1045
1046
    /* Mark the reflists */
1047
105k
    ps_proc->ps_pu->s_me_info[0].i1_ref_idx = -1;
1048
105k
    ps_proc->ps_pu->s_me_info[1].i1_ref_idx =  0;
1049
1050
    /* number of partitions */
1051
105k
    ps_proc->u4_num_sub_partitions = 1;
1052
105k
    *(ps_proc->pu4_mb_pu_cnt) = 1;
1053
1054
    /* position in-terms of PU */
1055
105k
    ps_proc->ps_pu->b4_pos_x = 0;
1056
105k
    ps_proc->ps_pu->b4_pos_y = 0;
1057
1058
    /* PU size */
1059
105k
    ps_proc->ps_pu->b4_wd = 3;
1060
105k
    ps_proc->ps_pu->b4_ht = 3;
1061
1062
    /* Update min sad conditions */
1063
105k
    if (ps_me_ctxt->u4_min_sad_reached == 1)
1064
13.3k
    {
1065
13.3k
        ps_proc->ps_cur_mb->u4_min_sad_reached = 1;
1066
13.3k
        ps_proc->ps_cur_mb->u4_min_sad = ps_me_ctxt->i4_min_sad;
1067
13.3k
    }
1068
105k
}
1069
1070
/**
1071
*******************************************************************************
1072
*
1073
* @brief This function performs motion estimation for the current NMB
1074
*
1075
* @par Description:
1076
*  Intializes input and output pointers required by the function ih264e_compute_me
1077
*  and calls the function ih264e_compute_me in a loop to process NMBs.
1078
*
1079
* @param[in] ps_proc
1080
*  Process context corresponding to the job
1081
*
1082
* @param[in] u4_nmb_count
1083
*  Number of mb's to process
1084
*
1085
* @returns
1086
*
1087
* @remarks none
1088
*
1089
*******************************************************************************
1090
*/
1091
void ih264e_compute_me_nmb(process_ctxt_t *ps_proc, UWORD32 u4_nmb_count)
1092
75.2k
{
1093
    /* pic pu */
1094
75.2k
    enc_pu_t *ps_pu_begin = ps_proc->ps_pu;
1095
1096
    /* ME map */
1097
75.2k
    UWORD8 *pu1_me_map = ps_proc->pu1_me_map + (ps_proc->i4_mb_y * ps_proc->i4_wd_mbs);
1098
1099
    /* temp var */
1100
75.2k
    UWORD32 u4_i;
1101
1102
75.2k
    ps_proc->s_me_ctxt.u4_left_is_intra = ps_proc->s_left_mb_syntax_ele.u2_is_intra;
1103
75.2k
    ps_proc->s_me_ctxt.u4_left_is_skip = (ps_proc->s_left_mb_syntax_ele.u2_mb_type == PSKIP);
1104
1105
275k
    for (u4_i = 0; u4_i < u4_nmb_count; u4_i++)
1106
200k
    {
1107
        /* Wait for ME map */
1108
200k
        if (ps_proc->i4_mb_y > 0)
1109
121k
        {
1110
            /* Wait for top right ME to be done */
1111
121k
            UWORD8 *pu1_me_map_tp_rw = ps_proc->pu1_me_map + (ps_proc->i4_mb_y - 1) * ps_proc->i4_wd_mbs;
1112
1113
219k
            while (1)
1114
219k
            {
1115
219k
                volatile UWORD8 *pu1_buf;
1116
219k
                WORD32 idx = ps_proc->i4_mb_x + u4_i + 1;
1117
1118
219k
                idx = MIN(idx, (ps_proc->i4_wd_mbs - 1));
1119
219k
                pu1_buf =  pu1_me_map_tp_rw + idx;
1120
219k
                if(*pu1_buf)
1121
121k
                    break;
1122
97.9k
                ithread_yield();
1123
97.9k
            }
1124
121k
        }
1125
1126
200k
        ps_proc->ps_skip_mv = &(ps_proc->ps_nmb_info[u4_i].as_skip_mv[0]);
1127
200k
        ps_proc->ps_ngbr_avbl = &(ps_proc->ps_nmb_info[u4_i].s_ngbr_avbl);
1128
200k
        ps_proc->ps_pred_mv = &(ps_proc->ps_nmb_info[u4_i].as_pred_mv[0]);
1129
1130
200k
        ps_proc->ps_cur_mb = &(ps_proc->ps_nmb_info[u4_i]);
1131
1132
200k
        ps_proc->ps_cur_mb->u4_min_sad = ps_proc->u4_min_sad;
1133
200k
        ps_proc->ps_cur_mb->u4_min_sad_reached = 0;
1134
1135
200k
        ps_proc->ps_cur_mb->i4_mb_cost = INT_MAX;
1136
200k
        ps_proc->ps_cur_mb->i4_mb_distortion = SHRT_MAX;
1137
1138
        /* Set the best subpel buf to the correct mb so that the buffer can be copied */
1139
200k
        ps_proc->pu1_best_subpel_buf = ps_proc->ps_nmb_info[u4_i].pu1_best_sub_pel_buf;
1140
200k
        ps_proc->u4_bst_spel_buf_strd = ps_proc->ps_nmb_info[u4_i].u4_bst_spel_buf_strd;
1141
1142
        /* Set the min sad conditions */
1143
200k
        ps_proc->ps_cur_mb->u4_min_sad = ps_proc->ps_codec->u4_min_sad;
1144
200k
        ps_proc->ps_cur_mb->u4_min_sad_reached = 0;
1145
1146
        /* Derive neighbor availability for the current macroblock */
1147
200k
        ih264e_derive_nghbr_avbl_of_mbs(ps_proc);
1148
1149
        /* init me */
1150
200k
        ih264e_init_me(ps_proc);
1151
1152
        /* Compute ME according to slice type */
1153
200k
        ps_proc->ps_codec->apf_compute_me[ps_proc->i4_slice_type](ps_proc);
1154
1155
        /* update top and left structs */
1156
200k
        {
1157
200k
            mb_info_t *ps_top_syn = ps_proc->ps_top_row_mb_syntax_ele + ps_proc->i4_mb_x;
1158
200k
            mb_info_t *ps_top_left_syn = &(ps_proc->s_top_left_mb_syntax_ME);
1159
200k
            enc_pu_t *ps_left_mb_pu = &ps_proc->s_left_mb_pu_ME;
1160
200k
            enc_pu_t *ps_top_left_mb_pu = &ps_proc->s_top_left_mb_pu_ME;
1161
200k
            enc_pu_t *ps_top_mv = ps_proc->ps_top_row_pu_ME + ps_proc->i4_mb_x;
1162
1163
200k
            *ps_top_left_syn = *ps_top_syn;
1164
1165
200k
            *ps_top_left_mb_pu = *ps_top_mv;
1166
200k
            *ps_left_mb_pu = *ps_proc->ps_pu;
1167
200k
        }
1168
1169
200k
        ps_proc->ps_pu += *ps_proc->pu4_mb_pu_cnt;
1170
1171
        /* Copy the min sad reached info */
1172
200k
        ps_proc->ps_nmb_info[u4_i].u4_min_sad_reached = ps_proc->ps_cur_mb->u4_min_sad_reached;
1173
200k
        ps_proc->ps_nmb_info[u4_i].u4_min_sad   = ps_proc->ps_cur_mb->u4_min_sad;
1174
1175
        /*
1176
         * To make sure that the MV map is properly sync to the
1177
         * cache we need to do a DDB
1178
         */
1179
200k
        {
1180
200k
            DATA_SYNC();
1181
1182
200k
            pu1_me_map[ps_proc->i4_mb_x] = 1;
1183
200k
        }
1184
200k
        ps_proc->i4_mb_x++;
1185
1186
200k
        ps_proc->s_me_ctxt.u4_left_is_intra = 0;
1187
200k
        ps_proc->s_me_ctxt.u4_left_is_skip = (ps_proc->ps_cur_mb->u4_mb_type  == PSKIP);
1188
1189
        /* update buffers pointers */
1190
200k
        ps_proc->pu1_src_buf_luma += MB_SIZE;
1191
200k
        ps_proc->pu1_rec_buf_luma += MB_SIZE;
1192
200k
        ps_proc->apu1_ref_buf_luma[0] += MB_SIZE;
1193
200k
        ps_proc->apu1_ref_buf_luma[1] += MB_SIZE;
1194
1195
        /*
1196
         * Note: Although chroma mb size is 8, as the chroma buffers are interleaved,
1197
         * the stride per MB is MB_SIZE
1198
         */
1199
200k
        ps_proc->pu1_src_buf_chroma += MB_SIZE;
1200
200k
        ps_proc->pu1_rec_buf_chroma += MB_SIZE;
1201
200k
        ps_proc->apu1_ref_buf_chroma[0] += MB_SIZE;
1202
200k
        ps_proc->apu1_ref_buf_chroma[1] += MB_SIZE;
1203
1204
1205
200k
        ps_proc->pu4_mb_pu_cnt += 1;
1206
200k
    }
1207
1208
75.2k
    ps_proc->ps_pu = ps_pu_begin;
1209
75.2k
    ps_proc->i4_mb_x = ps_proc->i4_mb_x - u4_nmb_count;
1210
1211
    /* update buffers pointers */
1212
75.2k
    ps_proc->pu1_src_buf_luma -= MB_SIZE * u4_nmb_count;
1213
75.2k
    ps_proc->pu1_rec_buf_luma -= MB_SIZE * u4_nmb_count;
1214
75.2k
    ps_proc->apu1_ref_buf_luma[0] -= MB_SIZE * u4_nmb_count;
1215
75.2k
    ps_proc->apu1_ref_buf_luma[1] -= MB_SIZE * u4_nmb_count;
1216
1217
    /*
1218
     * Note: Although chroma mb size is 8, as the chroma buffers are interleaved,
1219
     * the stride per MB is MB_SIZE
1220
     */
1221
75.2k
    ps_proc->pu1_src_buf_chroma -= MB_SIZE * u4_nmb_count;
1222
75.2k
    ps_proc->pu1_rec_buf_chroma -= MB_SIZE * u4_nmb_count;
1223
75.2k
    ps_proc->apu1_ref_buf_chroma[0] -= MB_SIZE * u4_nmb_count;
1224
75.2k
    ps_proc->apu1_ref_buf_chroma[1] -= MB_SIZE * u4_nmb_count;
1225
1226
75.2k
    ps_proc->pu4_mb_pu_cnt -= u4_nmb_count;
1227
75.2k
}
1228
1229
1230
/**
1231
*******************************************************************************
1232
*
1233
* @brief The function computes parameters for a BSKIP MB
1234
*
1235
* @par Description:
1236
*  The function updates the skip motion vector for B Mb, check if the Mb can be
1237
*  marked as skip and returns it
1238
*
1239
* @param[in] ps_proc
1240
*  Pointer to process context
1241
*
1242
* @param[in] i4_reflist
1243
*  Current active reference list
1244
*
1245
* @returns Flag indicating if the current Mb can be skip or not
1246
*
1247
* @remarks
1248
*   The code implements the logic as described in sec 8.4.1.2.2
1249
*   It also computes co-located MB parmas according to sec 8.4.1.2.1
1250
*
1251
*   Need to add condition for this fucntion to be used in ME
1252
*
1253
*******************************************************************************
1254
*/
1255
WORD32 ih264e_find_bskip_params_me(process_ctxt_t *ps_proc, WORD32 i4_reflist)
1256
184k
{
1257
    /* Colzero for co-located MB */
1258
184k
    WORD32 i4_colzeroflag;
1259
1260
    /* motion vectors for neighbouring MBs */
1261
184k
    enc_pu_t *ps_a_pu, *ps_c_pu, *ps_b_pu;
1262
1263
    /* Variables to check if a particular mB is available */
1264
184k
    WORD32 i4_a, i4_b, i4_c, i4_c_avail;
1265
1266
    /* Mode availability, init to no modes available     */
1267
184k
    WORD32 i4_mode_avail;
1268
1269
    /*  mb neighbor availability */
1270
184k
    block_neighbors_t *ps_ngbr_avbl = ps_proc->ps_ngbr_avbl;
1271
1272
    /* Temp var */
1273
184k
    WORD32 i, i4_cmpl_mode, i4_skip_type = -1;
1274
1275
    /*
1276
     * Colocated motion vector
1277
     */
1278
184k
    mv_t s_mvcol;
1279
1280
    /*
1281
     * Colocated picture idx
1282
     */
1283
184k
    WORD32 i4_refidxcol;
1284
1285
184k
    UNUSED(i4_reflist);
1286
1287
    /**************************************************************************
1288
     *Find co-located MB parameters
1289
     *      See sec 8.4.1.2.1  for reference
1290
     **************************************************************************/
1291
184k
    {
1292
        /*
1293
         * Find the co-located Mb and update the skip and pred appropriately
1294
         * 1) Default colpic is forward ref : Table 8-6
1295
         * 2) Default mb col is current MB : Table 8-8
1296
         */
1297
1298
184k
        if (ps_proc->ps_colpu->b1_intra_flag)
1299
68.6k
        {
1300
68.6k
            s_mvcol.i2_mvx = 0;
1301
68.6k
            s_mvcol.i2_mvy = 0;
1302
68.6k
            i4_refidxcol = -1;
1303
68.6k
        }
1304
115k
        else
1305
115k
        {
1306
115k
            if (ps_proc->ps_colpu->b2_pred_mode != PRED_L1)
1307
115k
            {
1308
115k
                s_mvcol = ps_proc->ps_colpu->s_me_info[PRED_L0].s_mv;
1309
115k
                i4_refidxcol = 0;
1310
115k
            }
1311
18.4E
            else // if(ps_proc->ps_colpu->b2_pred_mode != PRED_L0)
1312
18.4E
            {
1313
18.4E
                s_mvcol = ps_proc->ps_colpu->s_me_info[PRED_L1].s_mv;
1314
18.4E
                i4_refidxcol = 0;
1315
18.4E
            }
1316
115k
        }
1317
1318
        /* RefPicList1[ 0 ]  is marked as  "used for short-term reference", as default */
1319
184k
        i4_colzeroflag = (!i4_refidxcol && (ABS(s_mvcol.i2_mvx) <= 1)
1320
184k
                        && (ABS(s_mvcol.i2_mvy) <= 1));
1321
1322
184k
    }
1323
1324
    /***************************************************************************
1325
     * Evaluating skip params : Spatial Skip
1326
     **************************************************************************/
1327
184k
    {
1328
    /* Get the neighbouring MBS according to Section 8.4.1.2.2 */
1329
184k
    ps_a_pu = &ps_proc->s_left_mb_pu_ME;
1330
184k
    ps_b_pu = (ps_proc->ps_top_row_pu_ME + ps_proc->i4_mb_x);
1331
1332
184k
    i4_c_avail = 0;
1333
184k
    if (ps_ngbr_avbl->u1_mb_c)
1334
68.0k
    {
1335
68.0k
        ps_c_pu = &((ps_proc->ps_top_row_pu_ME + ps_proc->i4_mb_x)[1]);
1336
68.0k
        i4_c_avail = 1;
1337
68.0k
    }
1338
116k
    else
1339
116k
    {
1340
116k
        ps_c_pu = &ps_proc->s_top_left_mb_pu_ME;
1341
116k
        i4_c_avail = ps_ngbr_avbl->u1_mb_d;
1342
116k
    }
1343
1344
184k
    i4_a = ps_ngbr_avbl->u1_mb_a;
1345
184k
    i4_b = ps_ngbr_avbl->u1_mb_b;
1346
184k
    i4_c = i4_c_avail;
1347
1348
    /* Init to no mode avail */
1349
184k
    i4_mode_avail = 0;
1350
552k
    for (i = 0; i < 2; i++)
1351
367k
    {
1352
367k
        i4_cmpl_mode = (i == 0) ? PRED_L1 : PRED_L0;
1353
1354
367k
        i4_mode_avail |= (i4_a && (ps_a_pu->b2_pred_mode != i4_cmpl_mode) && (ps_a_pu->s_me_info[i].i1_ref_idx != 0))<<i;
1355
367k
        i4_mode_avail |= (i4_b && (ps_b_pu->b2_pred_mode != i4_cmpl_mode) && (ps_b_pu->s_me_info[i].i1_ref_idx != 0))<<i;
1356
367k
        i4_mode_avail |= (i4_c && (ps_c_pu->b2_pred_mode != i4_cmpl_mode) && (ps_c_pu->s_me_info[i].i1_ref_idx != 0))<<i;
1357
367k
    }
1358
1359
184k
    if (i4_mode_avail == 0x3 || i4_mode_avail == 0x0)
1360
87.7k
    {
1361
87.7k
        i4_skip_type= PRED_BI;
1362
87.7k
    }
1363
96.5k
    else if(i4_mode_avail == 0x1)
1364
50.2k
    {
1365
50.2k
        i4_skip_type = PRED_L0;
1366
50.2k
    }
1367
46.2k
    else if(i4_mode_avail == 0x2)
1368
46.6k
    {
1369
46.6k
        i4_skip_type = PRED_L1;
1370
46.6k
    }
1371
1372
    /* Update skip MV for L0 */
1373
184k
    if ((i4_mode_avail & 0x1) && (!i4_colzeroflag))
1374
67.1k
    {
1375
67.1k
        ps_proc->ps_skip_mv[0].s_mv.i2_mvx = ps_proc->ps_pred_mv[0].s_mv.i2_mvx;
1376
67.1k
        ps_proc->ps_skip_mv[0].s_mv.i2_mvy = ps_proc->ps_pred_mv[0].s_mv.i2_mvy;
1377
67.1k
    }
1378
117k
    else
1379
117k
    {
1380
117k
        ps_proc->ps_skip_mv[0].s_mv.i2_mvx = 0;
1381
117k
        ps_proc->ps_skip_mv[0].s_mv.i2_mvy = 0;
1382
117k
    }
1383
1384
    /* Update skip MV for L1 */
1385
184k
    if ((i4_mode_avail & 0x2) && (!i4_colzeroflag))
1386
72.8k
    {
1387
72.8k
        ps_proc->ps_skip_mv[1].s_mv.i2_mvx = ps_proc->ps_pred_mv[1].s_mv.i2_mvx;
1388
72.8k
        ps_proc->ps_skip_mv[1].s_mv.i2_mvy = ps_proc->ps_pred_mv[1].s_mv.i2_mvy;
1389
72.8k
    }
1390
111k
    else
1391
111k
    {
1392
111k
        ps_proc->ps_skip_mv[1].s_mv.i2_mvx = 0;
1393
111k
        ps_proc->ps_skip_mv[1].s_mv.i2_mvy = 0;
1394
111k
    }
1395
1396
184k
    }
1397
1398
    /***************************************************************************
1399
     * Evaluating skip params : Temporal skip
1400
     **************************************************************************/
1401
184k
    {
1402
184k
        pic_buf_t *  ps_ref_pic[MAX_REF_PIC_CNT];
1403
184k
        WORD32 i4_td, i4_tx, i4_tb, i4_dist_scale_factor;
1404
184k
        enc_pu_mv_t *ps_skip_mv = &ps_proc->ps_skip_mv[2];
1405
1406
184k
        ps_ref_pic[PRED_L0] = ps_proc->aps_ref_pic[PRED_L0];
1407
184k
        ps_ref_pic[PRED_L1] = ps_proc->aps_ref_pic[PRED_L1];
1408
1409
184k
        i4_tb = ps_proc->ps_codec->i4_poc - ps_ref_pic[PRED_L0]->i4_abs_poc;
1410
184k
        i4_td = ps_ref_pic[PRED_L1]->i4_abs_poc - ps_ref_pic[PRED_L0]->i4_abs_poc;
1411
1412
184k
        i4_tb = CLIP3(-128, 127, i4_tb);
1413
184k
        i4_td = CLIP3(-128, 127, i4_td);
1414
1415
184k
        i4_tx = ( 16384 + ABS( i4_td / 2 ) ) / i4_td ;
1416
184k
        i4_dist_scale_factor =  CLIP3( -1024, 1023, ( i4_tb * i4_tx + 32 ) >> 6 );
1417
1418
        /* Motion vectors taken in full pel resolution , hence  -> (& 0xfffc) operation */
1419
184k
        ps_skip_mv[PRED_L0].s_mv.i2_mvx = (( i4_dist_scale_factor * s_mvcol.i2_mvx + 128 ) >> 8) & 0xfffc;
1420
184k
        ps_skip_mv[PRED_L0].s_mv.i2_mvy = (( i4_dist_scale_factor * s_mvcol.i2_mvy + 128 ) >> 8) & 0xfffc;
1421
1422
184k
        ps_skip_mv[PRED_L1].s_mv.i2_mvx = (ps_skip_mv[PRED_L0].s_mv.i2_mvx - s_mvcol.i2_mvx) & 0xfffc;
1423
184k
        ps_skip_mv[PRED_L1].s_mv.i2_mvy = (ps_skip_mv[PRED_L0].s_mv.i2_mvy - s_mvcol.i2_mvy) & 0xfffc;
1424
1425
184k
    }
1426
1427
184k
    return i4_skip_type;
1428
184k
}
1429
1430
/**
1431
*******************************************************************************
1432
*
1433
* @brief The function computes the skip motion vectoe for B mb
1434
*
1435
* @par Description:
1436
*  The function gives the skip motion vector for B Mb, check if the Mb can be
1437
*  marked as skip
1438
*
1439
* @param[in] ps_proc
1440
*  Pointer to process context
1441
*
1442
* @param[in] i4_reflist
1443
*  Dummy
1444
*
1445
* @returns Flag indicating if the current Mb can be skip or not
1446
*
1447
* @remarks The code implements the logic as described in sec 8.4.1.2.2 in H264
1448
*   specification. It also computes co-located MB parmas according to sec 8.4.1.2.1
1449
*
1450
*******************************************************************************/
1451
WORD32 ih264e_find_bskip_params(process_ctxt_t *ps_proc, WORD32 i4_reflist)
1452
65.2k
{
1453
    /* Colzero for co-located MB */
1454
65.2k
    WORD32 i4_colzeroflag;
1455
1456
    /* motion vectors */
1457
65.2k
    enc_pu_t *ps_a_pu, *ps_c_pu, *ps_b_pu;
1458
1459
    /* Syntax elem */
1460
65.2k
    mb_info_t *ps_a_syn, *ps_b_syn, *ps_c_syn;
1461
1462
    /* Variables to check if a particular mB is available */
1463
65.2k
    WORD32 i4_a, i4_b, i4_c, i4_c_avail;
1464
1465
    /* Mode availability, init to no modes available     */
1466
65.2k
    WORD32 i4_mode_avail;
1467
1468
    /*  mb neighbor availability */
1469
65.2k
    block_neighbors_t *ps_ngbr_avbl = ps_proc->ps_ngbr_avbl;
1470
1471
    /* Temp var */
1472
65.2k
    WORD32 i, i4_cmpl_mode;
1473
1474
65.2k
    UNUSED(i4_reflist);
1475
1476
    /**************************************************************************
1477
     * Find co-locates parameters
1478
     *      See sec 8.4.1.2.1  for reference
1479
     **************************************************************************/
1480
65.2k
    {
1481
        /*
1482
         * Find the co-located Mb and update the skip and pred appropriately
1483
         * 1) Default colpic is forward ref : Table 8-6
1484
         * 2) Default mb col is current MB : Table 8-8
1485
         */
1486
1487
65.2k
        mv_t s_mvcol;
1488
65.2k
        WORD32 i4_refidxcol;
1489
1490
65.2k
        if (ps_proc->ps_colpu->b1_intra_flag)
1491
18.1k
        {
1492
18.1k
            s_mvcol.i2_mvx = 0;
1493
18.1k
            s_mvcol.i2_mvy = 0;
1494
18.1k
            i4_refidxcol = -1;
1495
18.1k
        }
1496
47.0k
        else
1497
47.0k
        {
1498
47.0k
            if (ps_proc->ps_colpu->b2_pred_mode != PRED_L1)
1499
47.1k
            {
1500
47.1k
                s_mvcol = ps_proc->ps_colpu->s_me_info[PRED_L0].s_mv;
1501
47.1k
                i4_refidxcol = 0;
1502
47.1k
            }
1503
18.4E
            else // if(ps_proc->ps_colpu->b2_pred_mode != PRED_L0)
1504
18.4E
            {
1505
18.4E
                s_mvcol = ps_proc->ps_colpu->s_me_info[PRED_L1].s_mv;
1506
18.4E
                i4_refidxcol = 0;
1507
18.4E
            }
1508
47.0k
        }
1509
1510
        /* RefPicList1[ 0 ]  is marked as  "used for short-term reference", as default */
1511
65.2k
        i4_colzeroflag = (!i4_refidxcol && (ABS(s_mvcol.i2_mvx) <= 1)
1512
65.2k
                        && (ABS(s_mvcol.i2_mvy) <= 1));
1513
1514
65.2k
    }
1515
1516
    /***************************************************************************
1517
     * Evaluating skip params
1518
     **************************************************************************/
1519
    /* Section 8.4.1.2.2 */
1520
65.2k
    ps_a_syn = &ps_proc->s_left_mb_syntax_ele;
1521
65.2k
    ps_a_pu = &ps_proc->s_left_mb_pu;
1522
1523
65.2k
    ps_b_syn = ps_proc->ps_top_row_mb_syntax_ele + ps_proc->i4_mb_x;
1524
65.2k
    ps_b_pu = (ps_proc->ps_top_row_pu + ps_proc->i4_mb_x);
1525
1526
65.2k
    i4_c_avail = 0;
1527
65.2k
    if (ps_ngbr_avbl->u1_mb_c)
1528
22.5k
    {
1529
22.5k
        ps_c_syn = &((ps_proc->ps_top_row_mb_syntax_ele + ps_proc->i4_mb_x)[1]);
1530
22.5k
        ps_c_pu = &((ps_proc->ps_top_row_pu + ps_proc->i4_mb_x)[1]);
1531
22.5k
        i4_c_avail = 1;
1532
22.5k
    }
1533
42.7k
    else
1534
42.7k
    {
1535
42.7k
        ps_c_syn = &(ps_proc->s_top_left_mb_syntax_ele);
1536
42.7k
        ps_c_pu = &ps_proc->s_top_left_mb_pu;
1537
42.7k
        i4_c_avail = ps_ngbr_avbl->u1_mb_d;
1538
42.7k
    }
1539
1540
1541
65.2k
    i4_a = ps_ngbr_avbl->u1_mb_a;
1542
65.2k
    i4_a &= !ps_a_syn->u2_is_intra;
1543
1544
65.2k
    i4_b = ps_ngbr_avbl->u1_mb_b;
1545
65.2k
    i4_b &= !ps_b_syn->u2_is_intra;
1546
1547
65.2k
    i4_c = i4_c_avail;
1548
65.2k
    i4_c &= !ps_c_syn->u2_is_intra;
1549
1550
    /* Init to no mode avail */
1551
65.2k
    i4_mode_avail = 0;
1552
195k
    for (i = 0; i < 2; i++)
1553
130k
    {
1554
130k
        i4_cmpl_mode = (i == 0) ? PRED_L1 : PRED_L0;
1555
1556
130k
        i4_mode_avail |= (i4_a && (ps_a_pu->b2_pred_mode != i4_cmpl_mode) && (ps_a_pu->s_me_info[i].i1_ref_idx != 0))<<i;
1557
130k
        i4_mode_avail |= (i4_b && (ps_b_pu->b2_pred_mode != i4_cmpl_mode) && (ps_b_pu->s_me_info[i].i1_ref_idx != 0))<<i;
1558
130k
        i4_mode_avail |= (i4_c && (ps_c_pu->b2_pred_mode != i4_cmpl_mode) && (ps_c_pu->s_me_info[i].i1_ref_idx != 0))<<i;
1559
130k
    }
1560
1561
    /* Update skip MV for L0 */
1562
65.2k
    if ((i4_mode_avail & 0x1) && (!i4_colzeroflag))
1563
17.0k
    {
1564
17.0k
        ps_proc->ps_skip_mv[0].s_mv.i2_mvx = ps_proc->ps_pred_mv[0].s_mv.i2_mvx;
1565
17.0k
        ps_proc->ps_skip_mv[0].s_mv.i2_mvy = ps_proc->ps_pred_mv[0].s_mv.i2_mvy;
1566
17.0k
    }
1567
48.2k
    else
1568
48.2k
    {
1569
48.2k
        ps_proc->ps_skip_mv[0].s_mv.i2_mvx = 0;
1570
48.2k
        ps_proc->ps_skip_mv[0].s_mv.i2_mvy = 0;
1571
48.2k
    }
1572
1573
    /* Update skip MV for L1 */
1574
65.2k
    if ((i4_mode_avail & 0x2) && (!i4_colzeroflag))
1575
19.8k
    {
1576
19.8k
        ps_proc->ps_skip_mv[1].s_mv.i2_mvx = ps_proc->ps_pred_mv[1].s_mv.i2_mvx;
1577
19.8k
        ps_proc->ps_skip_mv[1].s_mv.i2_mvy = ps_proc->ps_pred_mv[1].s_mv.i2_mvy;
1578
19.8k
    }
1579
45.4k
    else
1580
45.4k
    {
1581
45.4k
        ps_proc->ps_skip_mv[1].s_mv.i2_mvx = 0;
1582
45.4k
        ps_proc->ps_skip_mv[1].s_mv.i2_mvy = 0;
1583
45.4k
    }
1584
1585
    /* Now see if the ME information matches the SKIP information */
1586
65.2k
    switch (ps_proc->ps_pu->b2_pred_mode)
1587
65.2k
    {
1588
8.26k
        case PRED_BI:
1589
8.26k
            if (  (ps_proc->ps_pu->s_me_info[0].s_mv.i2_mvx == ps_proc->ps_skip_mv[0].s_mv.i2_mvx)
1590
8.26k
               && (ps_proc->ps_pu->s_me_info[0].s_mv.i2_mvy == ps_proc->ps_skip_mv[0].s_mv.i2_mvy)
1591
8.26k
               && (ps_proc->ps_pu->s_me_info[1].s_mv.i2_mvx == ps_proc->ps_skip_mv[1].s_mv.i2_mvx)
1592
8.26k
               && (ps_proc->ps_pu->s_me_info[1].s_mv.i2_mvy == ps_proc->ps_skip_mv[1].s_mv.i2_mvy)
1593
8.26k
               && (i4_mode_avail ==  0x3 || i4_mode_avail == 0x0))
1594
725
            {
1595
725
                return 1;
1596
725
            }
1597
7.54k
            break;
1598
1599
30.3k
        case PRED_L0:
1600
30.3k
            if ( (ps_proc->ps_pu->s_me_info[0].s_mv.i2_mvx == ps_proc->ps_skip_mv[0].s_mv.i2_mvx)
1601
30.3k
              && (ps_proc->ps_pu->s_me_info[0].s_mv.i2_mvy == ps_proc->ps_skip_mv[0].s_mv.i2_mvy)
1602
30.3k
              && (i4_mode_avail == 0x1))
1603
9.01k
            {
1604
9.01k
                return 1;
1605
9.01k
            }
1606
21.2k
            break;
1607
1608
26.7k
        case PRED_L1:
1609
26.7k
            if (  (ps_proc->ps_pu->s_me_info[1].s_mv.i2_mvx == ps_proc->ps_skip_mv[1].s_mv.i2_mvx)
1610
26.7k
               && (ps_proc->ps_pu->s_me_info[1].s_mv.i2_mvy == ps_proc->ps_skip_mv[1].s_mv.i2_mvy)
1611
26.7k
               && (i4_mode_avail == 0x2))
1612
8.28k
            {
1613
8.28k
                return 1;
1614
8.28k
            }
1615
18.4k
            break;
1616
65.2k
    }
1617
1618
47.2k
    return 0;
1619
65.2k
}
1620
1621
1622
/**
1623
*******************************************************************************
1624
*
1625
* @brief This function computes the best motion vector among the tentative mv
1626
* candidates chosen.
1627
*
1628
* @par Description:
1629
*  This function determines the position in the search window at which the motion
1630
*  estimation should begin in order to minimise the number of search iterations.
1631
*
1632
* @param[in] ps_me_ctxt
1633
*  pointer to me context
1634
*
1635
* @param[in] ps_proc
1636
*  process context
1637
*
1638
* @param[in] ps_mb_ctxt_bi
1639
*  pointer to current mb partition ctxt with respect to ME
1640
*
1641
* @returns  mv pair & corresponding distortion and cost
1642
*
1643
* @remarks Currently only 4 search candiates are supported
1644
*
1645
*******************************************************************************
1646
*/
1647
void ih264e_evaluate_bipred(me_ctxt_t *ps_me_ctxt,
1648
                            process_ctxt_t *ps_proc,
1649
                            mb_part_ctxt *ps_mb_ctxt_bi)
1650
86.2k
{
1651
1652
86.2k
    UWORD32 i, u4_fast_sad;
1653
1654
86.2k
    WORD32 i4_dest_buff;
1655
1656
86.2k
    mv_t *ps_l0_pred_mv, *ps_l1_pred_mv, s_l0_mv, s_l1_mv;
1657
1658
86.2k
    UWORD8 *pu1_ref_mb_l0, *pu1_ref_mb_l1;
1659
1660
86.2k
    UWORD8 *pu1_dst_buf;
1661
1662
86.2k
    WORD32 i4_ref_l0_stride, i4_ref_l1_stride;
1663
1664
86.2k
    WORD32 i4_mb_distortion, i4_mb_cost;
1665
1666
86.2k
    u4_fast_sad = ps_me_ctxt->u4_enable_fast_sad;
1667
1668
86.2k
    i4_dest_buff = 0;
1669
1670
343k
    for (i = 0; i < ps_me_ctxt->u4_num_candidates[PRED_BI]; i += 2)
1671
257k
    {
1672
257k
        pu1_dst_buf = ps_me_ctxt->apu1_subpel_buffs[i4_dest_buff];
1673
1674
257k
        s_l0_mv.i2_mvx = ps_me_ctxt->as_mv_init_search[PRED_BI][i].i2_mvx >> 2;
1675
257k
        s_l0_mv.i2_mvy = ps_me_ctxt->as_mv_init_search[PRED_BI][i].i2_mvy >> 2;
1676
257k
        s_l1_mv.i2_mvx = ps_me_ctxt->as_mv_init_search[PRED_BI][i + 1].i2_mvx >> 2;
1677
257k
        s_l1_mv.i2_mvy = ps_me_ctxt->as_mv_init_search[PRED_BI][i + 1].i2_mvy >> 2;
1678
1679
257k
        ps_l0_pred_mv = &ps_proc->ps_pred_mv[PRED_L0].s_mv;
1680
257k
        ps_l1_pred_mv = &ps_proc->ps_pred_mv[PRED_L1].s_mv;
1681
1682
257k
        if ((ps_me_ctxt->as_mv_init_search[PRED_BI][i].i2_mvx & 0x3)||
1683
257k
                        (ps_me_ctxt->as_mv_init_search[PRED_BI][i].i2_mvy & 0x3))
1684
32.6k
        {
1685
32.6k
            pu1_ref_mb_l0 = ps_me_ctxt->as_mb_part[PRED_L0].pu1_best_hpel_buf;
1686
32.6k
            i4_ref_l0_stride = ps_me_ctxt->u4_subpel_buf_strd;
1687
32.6k
        }
1688
225k
        else
1689
225k
        {
1690
225k
            pu1_ref_mb_l0 = ps_me_ctxt->apu1_ref_buf_luma[PRED_L0] + (s_l0_mv.i2_mvx) + ((s_l0_mv.i2_mvy) * ps_me_ctxt->i4_rec_strd);
1691
225k
            i4_ref_l0_stride = ps_me_ctxt->i4_rec_strd;
1692
225k
        }
1693
1694
1695
257k
        if ((ps_me_ctxt->as_mv_init_search[PRED_BI][i + 1].i2_mvx & 0x3) ||
1696
257k
                        (ps_me_ctxt->as_mv_init_search[PRED_BI][i + 1].i2_mvy & 0x3))
1697
30.1k
        {
1698
30.1k
            pu1_ref_mb_l1 = ps_me_ctxt->as_mb_part[PRED_L1].pu1_best_hpel_buf;
1699
30.1k
            i4_ref_l1_stride = ps_me_ctxt->u4_subpel_buf_strd;
1700
30.1k
        }
1701
227k
        else
1702
227k
        {
1703
227k
            pu1_ref_mb_l1 = ps_me_ctxt->apu1_ref_buf_luma[PRED_L1] + (s_l1_mv.i2_mvx) + ((s_l1_mv.i2_mvy) * ps_me_ctxt->i4_rec_strd);
1704
227k
            i4_ref_l1_stride = ps_me_ctxt->i4_rec_strd;
1705
227k
        }
1706
1707
257k
        ps_proc->ps_codec->pf_inter_pred_luma_bilinear(
1708
257k
                        pu1_ref_mb_l0, pu1_ref_mb_l1, pu1_dst_buf,
1709
257k
                        i4_ref_l0_stride, i4_ref_l1_stride,
1710
257k
                        ps_me_ctxt->u4_subpel_buf_strd, MB_SIZE, MB_SIZE);
1711
1712
257k
        ps_me_ctxt->pf_ime_compute_sad_16x16[u4_fast_sad](
1713
257k
                        ps_me_ctxt->pu1_src_buf_luma, pu1_dst_buf,
1714
257k
                        ps_me_ctxt->i4_src_strd, ps_me_ctxt->u4_subpel_buf_strd,
1715
257k
                        INT_MAX, &i4_mb_distortion);
1716
1717
        /* compute cost */
1718
257k
        i4_mb_cost =  ps_me_ctxt->pu1_mv_bits[ps_me_ctxt->as_mv_init_search[PRED_BI][i].i2_mvx - ps_l0_pred_mv->i2_mvx];
1719
257k
        i4_mb_cost += ps_me_ctxt->pu1_mv_bits[ps_me_ctxt->as_mv_init_search[PRED_BI][i].i2_mvy - ps_l0_pred_mv->i2_mvy];
1720
257k
        i4_mb_cost += ps_me_ctxt->pu1_mv_bits[ps_me_ctxt->as_mv_init_search[PRED_BI][i + 1].i2_mvx - ps_l1_pred_mv->i2_mvx];
1721
257k
        i4_mb_cost += ps_me_ctxt->pu1_mv_bits[ps_me_ctxt->as_mv_init_search[PRED_BI][i + 1].i2_mvy - ps_l1_pred_mv->i2_mvy];
1722
1723
257k
        i4_mb_cost -= (ps_me_ctxt->i4_skip_bias[BSLICE]) * (ps_me_ctxt->i4_skip_type == PRED_BI) * (i == 0);
1724
1725
1726
257k
        i4_mb_cost *= ps_me_ctxt->u4_lambda_motion;
1727
257k
        i4_mb_cost += i4_mb_distortion;
1728
1729
257k
        if (i4_mb_cost < ps_mb_ctxt_bi->i4_mb_cost)
1730
172k
        {
1731
172k
            ps_mb_ctxt_bi->i4_srch_pos_idx = (i>>1);
1732
172k
            ps_mb_ctxt_bi->i4_mb_cost = i4_mb_cost;
1733
172k
            ps_mb_ctxt_bi->i4_mb_distortion = i4_mb_distortion;
1734
172k
            ps_mb_ctxt_bi->pu1_best_hpel_buf = pu1_dst_buf;
1735
172k
            i4_dest_buff = (i4_dest_buff + 1) % 2;
1736
172k
        }
1737
257k
    }
1738
1739
86.2k
}
1740
1741
/**
1742
*******************************************************************************
1743
*
1744
* @brief This function performs motion estimation for the current mb
1745
*
1746
* @par Description:
1747
*  The current mb is compared with a list of mb's in the reference frame for
1748
*  least cost. The mb that offers least cost is chosen as predicted mb and the
1749
*  displacement of the predicted mb from index location of the current mb is
1750
*  signaled as mv. The list of the mb's that are chosen in the reference frame
1751
*  are dependent on the speed of the ME configured.
1752
*
1753
* @param[in] ps_proc
1754
*  Process context corresponding to the job
1755
*
1756
* @returns  motion vector of the pred mb, sad, cost.
1757
*
1758
* @remarks none
1759
*
1760
*******************************************************************************
1761
*/
1762
void ih264e_compute_me_multi_reflist(process_ctxt_t *ps_proc)
1763
95.0k
{
1764
    /* me ctxt */
1765
95.0k
    me_ctxt_t *ps_me_ctxt = &ps_proc->s_me_ctxt;
1766
1767
    /* codec context */
1768
95.0k
    codec_t *ps_codec = ps_proc->ps_codec;
1769
1770
    /* Temp variables for looping over ref lists */
1771
95.0k
    WORD32 i4_reflist, i4_max_reflist;
1772
1773
    /* recon stride */
1774
95.0k
    WORD32 i4_rec_strd = ps_proc->i4_rec_strd;
1775
1776
    /* source buffer for halp pel generation functions */
1777
95.0k
    UWORD8 *pu1_hpel_src;
1778
1779
    /* quantization parameters */
1780
95.0k
    quant_params_t *ps_qp_params = ps_proc->ps_qp_params[0];
1781
1782
    /* Mb part ctxts for SKIP */
1783
95.0k
    mb_part_ctxt as_skip_mbpart[2];
1784
1785
    /* Sad therholds */
1786
95.0k
    ps_me_ctxt->pu2_sad_thrsh = ps_qp_params->pu2_sad_thrsh;
1787
1788
95.0k
    {
1789
95.0k
        WORD32 rows_above, rows_below, columns_left, columns_right;
1790
1791
        /* During evaluation for motion vectors do not search through padded regions */
1792
        /* Obtain number of rows and columns that are effective for computing for me evaluation */
1793
95.0k
        rows_above = MB_SIZE + ps_proc->i4_mb_y * MB_SIZE;
1794
95.0k
        rows_below = (ps_proc->i4_ht_mbs - ps_proc->i4_mb_y) * MB_SIZE;
1795
95.0k
        columns_left = MB_SIZE + ps_proc->i4_mb_x * MB_SIZE;
1796
95.0k
        columns_right = (ps_proc->i4_wd_mbs - ps_proc->i4_mb_x) * MB_SIZE;
1797
1798
        /* init srch range */
1799
        /* NOTE : For now, lets limit the search range by DEFAULT_MAX_SRCH_RANGE_X / 2
1800
         * on all sides.
1801
         */
1802
95.0k
        ps_me_ctxt->i4_srch_range_w = -MIN(columns_left, DEFAULT_MAX_SRCH_RANGE_X >> 1);
1803
95.0k
        ps_me_ctxt->i4_srch_range_e = MIN(columns_right, DEFAULT_MAX_SRCH_RANGE_X >> 1);
1804
95.0k
        ps_me_ctxt->i4_srch_range_n = -MIN(rows_above, DEFAULT_MAX_SRCH_RANGE_Y >> 1);
1805
95.0k
        ps_me_ctxt->i4_srch_range_s = MIN(rows_below, DEFAULT_MAX_SRCH_RANGE_Y >> 1);
1806
1807
        /* this is to facilitate fast sub pel computation with minimal loads */
1808
95.0k
        if (ps_me_ctxt->u4_enable_hpel)
1809
66.0k
        {
1810
66.0k
            ps_me_ctxt->i4_srch_range_w += 1;
1811
66.0k
            ps_me_ctxt->i4_srch_range_e -= 1;
1812
66.0k
            ps_me_ctxt->i4_srch_range_n += 1;
1813
66.0k
            ps_me_ctxt->i4_srch_range_s -= 1;
1814
66.0k
        }
1815
95.0k
    }
1816
1817
    /* Compute ME and store the MVs */
1818
95.0k
    {
1819
        /***********************************************************************
1820
         * Compute ME for lists L0 and L1
1821
         *  For L0 -> L0 skip + L0
1822
         *  for L1 -> L0 skip + L0 + L1 skip + L1
1823
         ***********************************************************************/
1824
95.0k
        i4_max_reflist = (ps_proc->i4_slice_type == PSLICE) ? PRED_L0 : PRED_L1;
1825
1826
        /* Init SATQD for the current list */
1827
95.0k
        ps_me_ctxt->u4_min_sad_reached  = 0;
1828
95.0k
        ps_me_ctxt->i4_min_sad = ps_proc->ps_cur_mb->u4_min_sad;
1829
1830
270k
        for (i4_reflist = PRED_L0; i4_reflist <= i4_max_reflist; i4_reflist++)
1831
184k
        {
1832
1833
            /* Get the seed motion vector candidates                    */
1834
184k
            ih264e_get_search_candidates(ps_proc, ps_me_ctxt, i4_reflist);
1835
1836
            /* ****************************************************************
1837
             *Evaluate the SKIP for current list
1838
             * ****************************************************************/
1839
184k
            as_skip_mbpart[i4_reflist].s_mv_curr.i2_mvx = 0;
1840
184k
            as_skip_mbpart[i4_reflist].s_mv_curr.i2_mvy = 0;
1841
184k
            as_skip_mbpart[i4_reflist].i4_mb_cost = INT_MAX;
1842
184k
            as_skip_mbpart[i4_reflist].i4_mb_distortion = INT_MAX;
1843
1844
184k
            if (ps_me_ctxt->i4_skip_type == i4_reflist)
1845
51.2k
            {
1846
51.2k
                ime_compute_skip_cost( ps_me_ctxt,
1847
51.2k
                                       (ime_mv_t *)(&ps_proc->ps_skip_mv[i4_reflist].s_mv),
1848
51.2k
                                       &as_skip_mbpart[i4_reflist],
1849
51.2k
                                       ps_proc->ps_codec->s_cfg.u4_enable_satqd,
1850
51.2k
                                       i4_reflist,
1851
51.2k
                                       (ps_proc->i4_slice_type == BSLICE) );
1852
51.2k
            }
1853
1854
184k
            as_skip_mbpart[i4_reflist].s_mv_curr.i2_mvx <<= 2;
1855
184k
            as_skip_mbpart[i4_reflist].s_mv_curr.i2_mvy <<= 2;
1856
1857
            /******************************************************************
1858
             * Evaluate ME For current list
1859
             *****************************************************************/
1860
184k
            ps_me_ctxt->as_mb_part[i4_reflist].s_mv_curr.i2_mvx = 0;
1861
184k
            ps_me_ctxt->as_mb_part[i4_reflist].s_mv_curr.i2_mvy = 0;
1862
184k
            ps_me_ctxt->as_mb_part[i4_reflist].i4_mb_cost = INT_MAX;
1863
184k
            ps_me_ctxt->as_mb_part[i4_reflist].i4_mb_distortion = INT_MAX;
1864
1865
            /* Init Hpel */
1866
184k
            ps_me_ctxt->as_mb_part[i4_reflist].pu1_best_hpel_buf = NULL;
1867
1868
            /* In case we found out the minimum SAD, exit the ME eval */
1869
184k
            if (ps_me_ctxt->u4_min_sad_reached)
1870
8.99k
            {
1871
8.99k
                i4_max_reflist = i4_reflist;
1872
8.99k
                break;
1873
8.99k
            }
1874
1875
1876
            /* Evaluate search candidates for initial mv pt */
1877
175k
            ime_evaluate_init_srchposn_16x16(ps_me_ctxt, i4_reflist);
1878
1879
            /********************************************************************/
1880
            /*                  full pel motion estimation                      */
1881
            /********************************************************************/
1882
175k
            ime_full_pel_motion_estimation_16x16(ps_me_ctxt, i4_reflist);
1883
1884
175k
            DEBUG_MV_HISTOGRAM_ADD((ps_me_ctxt->as_mb_part[i4_reflist].s_mv_curr.i2_mvx >> 2),
1885
175k
                                   (ps_me_ctxt->as_mb_part[i4_reflist].s_mv_curr.i2_mvy >> 2));
1886
1887
175k
            DEBUG_SAD_HISTOGRAM_ADD(ps_me_ctxt->as_mb_part[i4_reflist].i4_mb_distortion, 1);
1888
1889
            /* Scale the MV to qpel resolution */
1890
175k
            ps_me_ctxt->as_mb_part[i4_reflist].s_mv_curr.i2_mvx <<= 2;
1891
175k
            ps_me_ctxt->as_mb_part[i4_reflist].s_mv_curr.i2_mvy <<= 2;
1892
1893
175k
            if (ps_me_ctxt->u4_enable_hpel)
1894
121k
            {
1895
                /* moving src pointer to the converged motion vector location */
1896
121k
                pu1_hpel_src =   ps_me_ctxt->apu1_ref_buf_luma[i4_reflist]
1897
121k
                               + (ps_me_ctxt->as_mb_part[i4_reflist].s_mv_curr.i2_mvx >> 2)
1898
121k
                               + ((ps_me_ctxt->as_mb_part[i4_reflist].s_mv_curr.i2_mvy >> 2)* i4_rec_strd);
1899
1900
121k
                ps_me_ctxt->apu1_subpel_buffs[0] = ps_proc->apu1_subpel_buffs[0];
1901
121k
                ps_me_ctxt->apu1_subpel_buffs[1] = ps_proc->apu1_subpel_buffs[1];
1902
121k
                ps_me_ctxt->apu1_subpel_buffs[2] = ps_proc->apu1_subpel_buffs[2];
1903
1904
                /* Init the search position to an invalid number */
1905
121k
                ps_me_ctxt->as_mb_part[i4_reflist].i4_srch_pos_idx = 3;
1906
1907
                /* Incase a buffer is still in use by L0, replace it with spare buff */
1908
121k
                ps_me_ctxt->apu1_subpel_buffs[ps_me_ctxt->as_mb_part[PRED_L0].i4_srch_pos_idx] =
1909
121k
                                ps_proc->apu1_subpel_buffs[3];
1910
1911
1912
121k
                ps_me_ctxt->u4_subpel_buf_strd = HP_BUFF_WD;
1913
1914
                /* half  pel search is done for both sides of full pel,
1915
                 * hence half_x of width x height = 17x16 is created
1916
                 * starting from left half_x of converged full pel */
1917
121k
                pu1_hpel_src -= 1;
1918
1919
                /* computing half_x */
1920
121k
                ps_codec->pf_ih264e_sixtapfilter_horz(pu1_hpel_src,
1921
121k
                                                      ps_me_ctxt->apu1_subpel_buffs[0],
1922
121k
                                                      i4_rec_strd,
1923
121k
                                                      ps_me_ctxt->u4_subpel_buf_strd);
1924
1925
                /*
1926
                 * Halfpel search is done for both sides of full pel,
1927
                 * hence half_y of width x height = 16x17 is created
1928
                 * starting from top half_y of converged full pel
1929
                 * for half_xy top_left is required
1930
                 * hence it starts from pu1_hpel_src = full_pel_converged_point - i4_rec_strd - 1
1931
                 */
1932
121k
                pu1_hpel_src -= i4_rec_strd;
1933
1934
                /* computing half_y and half_xy */
1935
121k
                ps_codec->pf_ih264e_sixtap_filter_2dvh_vert(
1936
121k
                                pu1_hpel_src, ps_me_ctxt->apu1_subpel_buffs[1],
1937
121k
                                ps_me_ctxt->apu1_subpel_buffs[2], i4_rec_strd,
1938
121k
                                ps_me_ctxt->u4_subpel_buf_strd, ps_proc->ai16_pred1 + 3,
1939
121k
                                ps_me_ctxt->u4_subpel_buf_strd);
1940
1941
121k
                ime_sub_pel_motion_estimation_16x16(ps_me_ctxt, i4_reflist);
1942
1943
121k
            }
1944
175k
        }
1945
1946
        /***********************************************************************
1947
         * If a particular skiip Mv is giving better sad, copy to the corresponding
1948
         * MBPART
1949
         * In B slices this loop should go only to PREDL1: If we found min sad
1950
         * we will go to the skip ref list only
1951
         * Have to find a way to make it without too much change or new vars
1952
         **********************************************************************/
1953
280k
        for (i4_reflist = 0; i4_reflist <= i4_max_reflist; i4_reflist++)
1954
185k
        {
1955
185k
            if (as_skip_mbpart[i4_reflist].i4_mb_cost < ps_me_ctxt->as_mb_part[i4_reflist].i4_mb_cost)
1956
22.4k
            {
1957
22.4k
                ps_me_ctxt->as_mb_part[i4_reflist].i4_mb_cost = as_skip_mbpart[i4_reflist].i4_mb_cost;
1958
22.4k
                ps_me_ctxt->as_mb_part[i4_reflist].i4_mb_distortion = as_skip_mbpart[i4_reflist].i4_mb_distortion;
1959
22.4k
                ps_me_ctxt->as_mb_part[i4_reflist].s_mv_curr = as_skip_mbpart[i4_reflist].s_mv_curr;
1960
22.4k
            }
1961
185k
        }
1962
1963
        /***********************************************************************
1964
         * Compute ME for BI
1965
         *  In case of BI we do ME for two candidates
1966
         *   1) The best L0 and L1 Mvs
1967
         *   2) Skip L0 and L1 MVs
1968
         *
1969
         *   TODO
1970
         *   one of the search candidates is skip. Hence it may be duplicated
1971
         ***********************************************************************/
1972
95.0k
        if (i4_max_reflist == PRED_L1 && ps_me_ctxt->u4_min_sad_reached == 0)
1973
86.2k
        {
1974
86.2k
            WORD32 i, j = 0;
1975
86.2k
            WORD32 l0_srch_pos_idx, l1_srch_pos_idx;
1976
86.2k
            WORD32 i4_l0_skip_mv_idx, i4_l1_skip_mv_idx;
1977
1978
            /* Get the free buffers */
1979
86.2k
            l0_srch_pos_idx = ps_me_ctxt->as_mb_part[PRED_L0].i4_srch_pos_idx;
1980
86.2k
            l1_srch_pos_idx = ps_me_ctxt->as_mb_part[PRED_L1].i4_srch_pos_idx;
1981
1982
            /* Search for the two free buffers in subpel list */
1983
431k
            for (i = 0; i < SUBPEL_BUFF_CNT; i++)
1984
344k
            {
1985
344k
                if (i != l0_srch_pos_idx && i != l1_srch_pos_idx)
1986
211k
                {
1987
211k
                    ps_me_ctxt->apu1_subpel_buffs[j] = ps_proc->apu1_subpel_buffs[i];
1988
211k
                    j++;
1989
211k
                }
1990
344k
            }
1991
86.2k
            ps_me_ctxt->u4_subpel_buf_strd = HP_BUFF_WD;
1992
1993
            /* Copy the statial SKIP MV of each list */
1994
86.2k
            i4_l0_skip_mv_idx = ps_me_ctxt->u4_num_candidates[PRED_L0] - 2;
1995
86.2k
            i4_l1_skip_mv_idx = ps_me_ctxt->u4_num_candidates[PRED_L1] - 2;
1996
86.2k
            ps_me_ctxt->as_mv_init_search[PRED_BI][0].i2_mvx = ps_me_ctxt->as_mv_init_search[PRED_L0][i4_l0_skip_mv_idx].i2_mvx << 2;
1997
86.2k
            ps_me_ctxt->as_mv_init_search[PRED_BI][0].i2_mvy = ps_me_ctxt->as_mv_init_search[PRED_L0][i4_l0_skip_mv_idx].i2_mvy << 2;
1998
86.2k
            ps_me_ctxt->as_mv_init_search[PRED_BI][1].i2_mvx = ps_me_ctxt->as_mv_init_search[PRED_L1][i4_l1_skip_mv_idx].i2_mvx << 2;
1999
86.2k
            ps_me_ctxt->as_mv_init_search[PRED_BI][1].i2_mvy = ps_me_ctxt->as_mv_init_search[PRED_L1][i4_l1_skip_mv_idx].i2_mvy << 2;
2000
2001
            /* Copy the SKIP MV temporal of each list */
2002
86.2k
            i4_l0_skip_mv_idx++;
2003
86.2k
            i4_l1_skip_mv_idx++;
2004
86.2k
            ps_me_ctxt->as_mv_init_search[PRED_BI][2].i2_mvx = ps_me_ctxt->as_mv_init_search[PRED_L0][i4_l0_skip_mv_idx].i2_mvx << 2;
2005
86.2k
            ps_me_ctxt->as_mv_init_search[PRED_BI][2].i2_mvy = ps_me_ctxt->as_mv_init_search[PRED_L0][i4_l0_skip_mv_idx].i2_mvy << 2;
2006
86.2k
            ps_me_ctxt->as_mv_init_search[PRED_BI][3].i2_mvx = ps_me_ctxt->as_mv_init_search[PRED_L1][i4_l1_skip_mv_idx].i2_mvx << 2;
2007
86.2k
            ps_me_ctxt->as_mv_init_search[PRED_BI][3].i2_mvy = ps_me_ctxt->as_mv_init_search[PRED_L1][i4_l1_skip_mv_idx].i2_mvy << 2;
2008
2009
            /* Copy the best MV after ME */
2010
86.2k
            ps_me_ctxt->as_mv_init_search[PRED_BI][4] = ps_me_ctxt->as_mb_part[PRED_L0].s_mv_curr;
2011
86.2k
            ps_me_ctxt->as_mv_init_search[PRED_BI][5] = ps_me_ctxt->as_mb_part[PRED_L1].s_mv_curr;
2012
2013
86.2k
            ps_me_ctxt->u4_num_candidates[PRED_BI] = 6;
2014
2015
86.2k
            ps_me_ctxt->as_mb_part[PRED_BI].i4_mb_cost = INT_MAX;
2016
86.2k
            ps_me_ctxt->as_mb_part[PRED_BI].i4_mb_distortion = INT_MAX;
2017
2018
86.2k
            ih264e_evaluate_bipred(ps_me_ctxt, ps_proc,
2019
86.2k
                                   &ps_me_ctxt->as_mb_part[PRED_BI]);
2020
2021
86.2k
            i4_max_reflist = PRED_BI;
2022
86.2k
        }
2023
2024
        /**********************************************************************
2025
         * Now get the minimum of MB part sads by searching over all ref lists
2026
         **********************************************************************/
2027
95.0k
        ps_proc->ps_pu->b2_pred_mode = 0x3;
2028
2029
366k
        for (i4_reflist = 0; i4_reflist <= i4_max_reflist; i4_reflist++)
2030
271k
        {
2031
271k
            if (ps_me_ctxt->as_mb_part[i4_reflist].i4_mb_cost < ps_proc->ps_cur_mb->i4_mb_cost)
2032
153k
            {
2033
153k
                ps_proc->ps_cur_mb->i4_mb_cost = ps_me_ctxt->as_mb_part[i4_reflist].i4_mb_cost;
2034
153k
                ps_proc->ps_cur_mb->i4_mb_distortion = ps_me_ctxt->as_mb_part[i4_reflist].i4_mb_distortion;
2035
153k
                ps_proc->ps_cur_mb->u4_mb_type = (ps_proc->i4_slice_type == PSLICE) ? P16x16 : B16x16;
2036
153k
                ps_proc->ps_pu->b2_pred_mode = i4_reflist ;
2037
153k
            }
2038
271k
        }
2039
2040
        /**********************************************************************
2041
         * In case we have a BI MB, we have to copy the buffers and set proer MV's
2042
         *  1)In case its BI, we need to get the best MVs given by BI and update
2043
         *    to their corresponding MB part
2044
         *  2)We also need to copy the buffer in which bipred buff is populated
2045
         *
2046
         *  Not that if we have
2047
         **********************************************************************/
2048
95.0k
        if (ps_proc->ps_pu->b2_pred_mode == PRED_BI)
2049
13.5k
        {
2050
13.5k
            WORD32 i4_srch_pos = ps_me_ctxt->as_mb_part[PRED_BI].i4_srch_pos_idx;
2051
13.5k
            UWORD8 *pu1_bi_buf = ps_me_ctxt->as_mb_part[PRED_BI].pu1_best_hpel_buf;
2052
2053
13.5k
            ps_me_ctxt->as_mb_part[PRED_L0].s_mv_curr = ps_me_ctxt->as_mv_init_search[PRED_BI][i4_srch_pos << 1];
2054
13.5k
            ps_me_ctxt->as_mb_part[PRED_L1].s_mv_curr = ps_me_ctxt->as_mv_init_search[PRED_BI][(i4_srch_pos << 1) + 1];
2055
2056
            /* Now we have to copy the buffers */
2057
13.5k
            ps_codec->pf_inter_pred_luma_copy(pu1_bi_buf,
2058
13.5k
                                              ps_proc->pu1_best_subpel_buf,
2059
13.5k
                                              ps_me_ctxt->u4_subpel_buf_strd,
2060
13.5k
                                              ps_proc->u4_bst_spel_buf_strd,
2061
13.5k
                                              MB_SIZE, MB_SIZE, NULL, 0);
2062
2063
13.5k
        }
2064
81.4k
        else if (ps_me_ctxt->as_mb_part[ps_proc->ps_pu->b2_pred_mode].pu1_best_hpel_buf)
2065
25.3k
        {
2066
            /* Now we have to copy the buffers */
2067
25.3k
            ps_codec->pf_inter_pred_luma_copy(
2068
25.3k
                            ps_me_ctxt->as_mb_part[ps_proc->ps_pu->b2_pred_mode].pu1_best_hpel_buf,
2069
25.3k
                            ps_proc->pu1_best_subpel_buf,
2070
25.3k
                            ps_me_ctxt->u4_subpel_buf_strd,
2071
25.3k
                            ps_proc->u4_bst_spel_buf_strd, MB_SIZE, MB_SIZE,
2072
25.3k
                            NULL, 0);
2073
25.3k
        }
2074
95.0k
    }
2075
2076
    /**************************************************************************
2077
     *Now copy the MVs to the current PU with qpel scaling
2078
     ***************************************************************************/
2079
95.0k
    ps_proc->ps_pu->s_me_info[PRED_L0].s_mv.i2_mvx = (ps_me_ctxt->as_mb_part[PRED_L0].s_mv_curr.i2_mvx);
2080
95.0k
    ps_proc->ps_pu->s_me_info[PRED_L0].s_mv.i2_mvy = (ps_me_ctxt->as_mb_part[PRED_L0].s_mv_curr.i2_mvy);
2081
95.0k
    ps_proc->ps_pu->s_me_info[PRED_L1].s_mv.i2_mvx = (ps_me_ctxt->as_mb_part[PRED_L1].s_mv_curr.i2_mvx);
2082
95.0k
    ps_proc->ps_pu->s_me_info[PRED_L1].s_mv.i2_mvy = (ps_me_ctxt->as_mb_part[PRED_L1].s_mv_curr.i2_mvy);
2083
2084
2085
95.0k
    ps_proc->ps_pu->s_me_info[0].i1_ref_idx = (ps_proc->ps_pu->b2_pred_mode != PRED_L1)? -1:0;
2086
95.0k
    ps_proc->ps_pu->s_me_info[1].i1_ref_idx = (ps_proc->ps_pu->b2_pred_mode != PRED_L0)? -1:0;
2087
2088
    /* number of partitions */
2089
95.0k
    ps_proc->u4_num_sub_partitions = 1;
2090
95.0k
    *(ps_proc->pu4_mb_pu_cnt) = 1;
2091
2092
    /* position in-terms of PU */
2093
95.0k
    ps_proc->ps_pu->b4_pos_x = 0;
2094
95.0k
    ps_proc->ps_pu->b4_pos_y = 0;
2095
2096
    /* PU size */
2097
95.0k
    ps_proc->ps_pu->b4_wd = 3;
2098
95.0k
    ps_proc->ps_pu->b4_ht = 3;
2099
2100
    /* Update min sad conditions */
2101
95.0k
    if (ps_me_ctxt->u4_min_sad_reached == 1)
2102
9.01k
    {
2103
9.01k
        ps_proc->ps_cur_mb->u4_min_sad_reached = 1;
2104
9.01k
        ps_proc->ps_cur_mb->u4_min_sad = ps_me_ctxt->i4_min_sad;
2105
9.01k
    }
2106
95.0k
}
2107