Coverage Report

Created: 2025-10-10 06:51

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/libhevc/encoder/ihevce_inter_pred.c
Line
Count
Source
1
/******************************************************************************
2
 *
3
 * Copyright (C) 2018 The Android Open Source Project
4
 *
5
 * Licensed under the Apache License, Version 2.0 (the "License");
6
 * you may not use this file except in compliance with the License.
7
 * You may obtain a copy of the License at:
8
 *
9
 * http://www.apache.org/licenses/LICENSE-2.0
10
 *
11
 * Unless required by applicable law or agreed to in writing, software
12
 * distributed under the License is distributed on an "AS IS" BASIS,
13
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
 * See the License for the specific language governing permissions and
15
 * limitations under the License.
16
 *
17
 *****************************************************************************
18
 * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
19
*/
20
21
/**
22
*******************************************************************************
23
* @file
24
*  ihevce_inter_pred.c
25
*
26
* @brief
27
*  Contains funtions for giving out prediction samples for a given pu
28
*
29
* @author
30
*  Ittiam
31
*
32
* @par List of Functions:
33
*   - ihevc_inter_pred()
34
*
35
*
36
*******************************************************************************
37
*/
38
/* System include files */
39
#include <stdio.h>
40
#include <string.h>
41
#include <stdlib.h>
42
#include <assert.h>
43
#include <stdarg.h>
44
#include <math.h>
45
46
/* User include files */
47
#include "ihevc_typedefs.h"
48
#include "itt_video_api.h"
49
#include "ihevce_api.h"
50
51
#include "rc_cntrl_param.h"
52
#include "rc_frame_info_collector.h"
53
#include "rc_look_ahead_params.h"
54
55
#include "ihevc_debug.h"
56
#include "ihevc_defs.h"
57
#include "ihevc_structs.h"
58
#include "ihevc_platform_macros.h"
59
#include "ihevc_deblk.h"
60
#include "ihevc_itrans_recon.h"
61
#include "ihevc_chroma_itrans_recon.h"
62
#include "ihevc_chroma_intra_pred.h"
63
#include "ihevc_intra_pred.h"
64
#include "ihevc_inter_pred.h"
65
#include "ihevc_mem_fns.h"
66
#include "ihevc_padding.h"
67
#include "ihevc_weighted_pred.h"
68
#include "ihevc_sao.h"
69
#include "ihevc_resi_trans.h"
70
#include "ihevc_quant_iquant_ssd.h"
71
#include "ihevc_cabac_tables.h"
72
73
#include "ihevce_defs.h"
74
#include "ihevce_lap_enc_structs.h"
75
#include "ihevce_multi_thrd_structs.h"
76
#include "ihevce_me_common_defs.h"
77
#include "ihevce_had_satd.h"
78
#include "ihevce_error_codes.h"
79
#include "ihevce_bitstream.h"
80
#include "ihevce_cabac.h"
81
#include "ihevce_rdoq_macros.h"
82
#include "ihevce_function_selector.h"
83
#include "ihevce_enc_structs.h"
84
#include "ihevce_entropy_structs.h"
85
#include "ihevce_cmn_utils_instr_set_router.h"
86
#include "ihevce_enc_loop_structs.h"
87
#include "ihevce_inter_pred.h"
88
#include "ihevc_weighted_pred.h"
89
90
/*****************************************************************************/
91
/* Global tables                                                             */
92
/*****************************************************************************/
93
94
/**
95
******************************************************************************
96
* @brief  Table of filter tap coefficients for HEVC luma inter prediction
97
* input   : sub pel mv position (dx/dy = 0:3)
98
* output  : filter coeffs to be used for that position
99
*
100
* @remarks See section 8.5.2.2.2.1 Luma sample interpolation process of HEVC
101
******************************************************************************
102
*/
103
WORD8 gai1_hevc_luma_filter_taps[4][NTAPS_LUMA] = { { 0, 0, 0, 64, 0, 0, 0, 0 },
104
                                                    { -1, 4, -10, 58, 17, -5, 1, 0 },
105
                                                    { -1, 4, -11, 40, 40, -11, 4, -1 },
106
                                                    { 0, 1, -5, 17, 58, -10, 4, -1 } };
107
108
/**
109
******************************************************************************
110
* @brief  Table of filter tap coefficients for HEVC chroma inter prediction
111
* input   : chroma sub pel mv position (dx/dy = 0:7)
112
* output  : filter coeffs to be used for that position
113
*
114
* @remarks See section 8.5.2.2.2.2 Chroma sample interpolation process of HEVC
115
The filter uses only the first four elements in each array
116
******************************************************************************
117
*/
118
WORD8 gai1_hevc_chroma_filter_taps[8][NTAPS_CHROMA] = { { 0, 64, 0, 0 },    { -2, 58, 10, -2 },
119
                                                        { -4, 54, 16, -2 }, { -6, 46, 28, -4 },
120
                                                        { -4, 36, 36, -4 }, { -4, 28, 46, -6 },
121
                                                        { -2, 16, 54, -4 }, { -2, 10, 58, -2 } };
122
123
/*****************************************************************************/
124
/* Function Definitions                                                      */
125
/*****************************************************************************/
126
127
/**
128
*******************************************************************************
129
*
130
* @brief
131
*  Performs Luma inter pred based on sub pel position dxdy and store the result
132
*  in a 16 bit destination buffer
133
*
134
* @param[in] pu1_src
135
*  pointer to the source correspoding to integer pel position of a mv (left and
136
*  top justified integer position)
137
*
138
* @param[out] pi2_dst
139
*  WORD16 pointer to the destination
140
*
141
* @param[in] src_strd
142
*  source buffer stride
143
*
144
* @param[in] dst_strd
145
*  destination buffer stride
146
*
147
* @param[in] pi2_hdst_scratch
148
*  scratch buffer for intermediate storage of horizontal filter output; used as
149
*  input for vertical filtering when sub pel components (dx != 0) && (dy != 0)
150
*
151
*  Max scratch buffer required is w * (h + 7) * sizeof(WORD16)
152
*
153
* @param[in] ht
154
*  width of the prediction unit
155
*
156
* @param[in] wd
157
*  width of the prediction unit
158
*
159
* @param[in] dx
160
*  qpel position[0:3] of mv in x direction
161
*
162
* @param[in] dy
163
*  qpel position[0:3] of mv in y direction
164
*
165
* @returns
166
*   none
167
*
168
* @remarks
169
*
170
*******************************************************************************
171
*/
172
void ihevce_luma_interpolate_16bit_dxdy(
173
    UWORD8 *pu1_src,
174
    WORD16 *pi2_dst,
175
    WORD32 src_strd,
176
    WORD32 dst_strd,
177
    WORD16 *pi2_hdst_scratch,
178
    WORD32 ht,
179
    WORD32 wd,
180
    WORD32 dy,
181
    WORD32 dx,
182
    func_selector_t *ps_func_selector)
183
3.67M
{
184
3.67M
    if((0 == dx) && (0 == dy))
185
2.87M
    {
186
        /*--------- full pel position : copy input by upscaling-------*/
187
188
2.87M
        ps_func_selector->ihevc_inter_pred_luma_copy_w16out_fptr(
189
2.87M
            pu1_src, pi2_dst, src_strd, dst_strd, &gai1_hevc_luma_filter_taps[0][0], ht, wd);
190
2.87M
    }
191
804k
    else if((0 != dx) && (0 != dy))
192
257k
    {
193
        /*----------sub pel in both x and y direction---------*/
194
195
257k
        UWORD8 *pu1_horz_src = pu1_src - (3 * src_strd);
196
257k
        WORD32 hdst_buf_stride = wd;
197
257k
        WORD16 *pi2_vert_src = pi2_hdst_scratch + (3 * hdst_buf_stride);
198
199
        /* horizontal filtering of source done in a scratch buffer first  */
200
257k
        ps_func_selector->ihevc_inter_pred_luma_horz_w16out_fptr(
201
257k
            pu1_horz_src,
202
257k
            pi2_hdst_scratch,
203
257k
            src_strd,
204
257k
            hdst_buf_stride,
205
257k
            &gai1_hevc_luma_filter_taps[dx][0],
206
257k
            (ht + NTAPS_LUMA - 1),
207
257k
            wd);
208
209
        /* vertical filtering on scratch buffer and stored in desitnation  */
210
257k
        ps_func_selector->ihevc_inter_pred_luma_vert_w16inp_w16out_fptr(
211
257k
            pi2_vert_src,
212
257k
            pi2_dst,
213
257k
            hdst_buf_stride,
214
257k
            dst_strd,
215
257k
            &gai1_hevc_luma_filter_taps[dy][0],
216
257k
            ht,
217
257k
            wd);
218
257k
    }
219
547k
    else if(0 == dy)
220
236k
    {
221
        /*----------sub pel in x direction only ---------*/
222
223
236k
        ps_func_selector->ihevc_inter_pred_luma_horz_w16out_fptr(
224
236k
            pu1_src, pi2_dst, src_strd, dst_strd, &gai1_hevc_luma_filter_taps[dx][0], ht, wd);
225
236k
    }
226
311k
    else /* if (0 == dx) */
227
311k
    {
228
        /*----------sub pel in y direction only ---------*/
229
230
311k
        ps_func_selector->ihevc_inter_pred_luma_vert_w16out_fptr(
231
311k
            pu1_src, pi2_dst, src_strd, dst_strd, &gai1_hevc_luma_filter_taps[dy][0], ht, wd);
232
311k
    }
233
3.67M
}
234
235
/**
236
*******************************************************************************
237
*
238
* @brief
239
*  Performs Luma inter pred based on sub pel position dxdy and store the result
240
*  in a 8 bit destination buffer
241
*
242
* @param[in] pu1_src
243
*  pointer to the source correspoding to integer pel position of a mv (left and
244
*  top justified integer position)
245
*
246
* @param[out] pu1_dst
247
*  UWORD8 pointer to the destination
248
*
249
* @param[in] src_strd
250
*  source buffer stride
251
*
252
* @param[in] dst_strd
253
*  destination buffer stride
254
*
255
* @param[in] pi2_hdst_scratch
256
*  scratch buffer for intermediate storage of horizontal filter output; used as
257
*  input for vertical filtering when sub pel components (dx != 0) && (dy != 0)
258
*
259
*  Max scratch buffer required is w * (h + 7) * sizeof(WORD16)
260
*
261
* @param[in] ht
262
*  width of the prediction unit
263
*
264
* @param[in] wd
265
*  width of the prediction unit
266
*
267
* @param[in] dx
268
*  qpel position[0:3] of mv in x direction
269
*
270
* @param[in] dy
271
*  qpel position[0:3] of mv in y direction
272
*
273
* @returns
274
*   none
275
*
276
* @remarks
277
*
278
*******************************************************************************
279
*/
280
void ihevce_luma_interpolate_8bit_dxdy(
281
    UWORD8 *pu1_src,
282
    UWORD8 *pu1_dst,
283
    WORD32 src_strd,
284
    WORD32 dst_strd,
285
    WORD16 *pi2_hdst_scratch,
286
    WORD32 ht,
287
    WORD32 wd,
288
    WORD32 dy,
289
    WORD32 dx,
290
    func_selector_t *ps_func_selector)
291
6.43M
{
292
6.43M
    if((0 == dx) && (0 == dy))
293
5.43M
    {
294
        /*--------- full pel position : copy input as is -------*/
295
296
5.43M
        ps_func_selector->ihevc_inter_pred_luma_copy_fptr(
297
5.43M
            pu1_src, pu1_dst, src_strd, dst_strd, &gai1_hevc_luma_filter_taps[0][0], ht, wd);
298
5.43M
    }
299
993k
    else if((0 != dx) && (0 != dy))
300
340k
    {
301
        /*----------sub pel in both x and y direction---------*/
302
303
340k
        UWORD8 *pu1_horz_src = pu1_src - (3 * src_strd);
304
340k
        WORD32 hdst_buf_stride = wd;
305
340k
        WORD16 *pi2_vert_src = pi2_hdst_scratch + (3 * hdst_buf_stride);
306
307
        /* horizontal filtering of source done in a scratch buffer first  */
308
340k
        ps_func_selector->ihevc_inter_pred_luma_horz_w16out_fptr(
309
340k
            pu1_horz_src,
310
340k
            pi2_hdst_scratch,
311
340k
            src_strd,
312
340k
            hdst_buf_stride,
313
340k
            &gai1_hevc_luma_filter_taps[dx][0],
314
340k
            (ht + NTAPS_LUMA - 1),
315
340k
            wd);
316
317
        /* vertical filtering on scratch buffer and stored in desitnation  */
318
340k
        ps_func_selector->ihevc_inter_pred_luma_vert_w16inp_fptr(
319
340k
            pi2_vert_src,
320
340k
            pu1_dst,
321
340k
            hdst_buf_stride,
322
340k
            dst_strd,
323
340k
            &gai1_hevc_luma_filter_taps[dy][0],
324
340k
            ht,
325
340k
            wd);
326
340k
    }
327
653k
    else if(0 == dy)
328
323k
    {
329
        /*----------sub pel in x direction only ---------*/
330
331
323k
        ps_func_selector->ihevc_inter_pred_luma_horz_fptr(
332
323k
            pu1_src, pu1_dst, src_strd, dst_strd, &gai1_hevc_luma_filter_taps[dx][0], ht, wd);
333
323k
    }
334
329k
    else /* if (0 == dx) */
335
329k
    {
336
        /*----------sub pel in y direction only ---------*/
337
338
329k
        ps_func_selector->ihevc_inter_pred_luma_vert_fptr(
339
329k
            pu1_src, pu1_dst, src_strd, dst_strd, &gai1_hevc_luma_filter_taps[dy][0], ht, wd);
340
329k
    }
341
6.43M
}
342
343
/**
344
*******************************************************************************
345
*
346
* @brief
347
*  Performs Luma prediction for a inter prediction unit(PU)
348
*
349
* @par Description:
350
*  For a given PU, Inter prediction followed by weighted prediction (if
351
*  required)
352
*
353
* @param[in] ps_inter_pred_ctxt
354
*  context for inter prediction; contains ref list, weight offsets, ctb offsets
355
*
356
* @param[in] ps_pu
357
*  pointer to PU structure whose inter prediction needs to be done
358
*
359
* @param[in] pu1_dst_buf
360
*  pointer to destination buffer where the inter prediction is done
361
*
362
* @param[in] dst_stride
363
*  pitch of the destination buffer
364
*
365
* @returns
366
*   IV_FAIL for mvs going outside ref frame padded limits
367
*   IV_SUCCESS after completing mc for given inter pu
368
*
369
* @remarks
370
*
371
*******************************************************************************
372
*/
373
IV_API_CALL_STATUS_T ihevce_luma_inter_pred_pu(
374
    void *pv_inter_pred_ctxt,
375
    pu_t *ps_pu,
376
    void *pv_dst_buf,
377
    WORD32 dst_stride,
378
    WORD32 i4_flag_inter_pred_source)
379
8.27M
{
380
8.27M
    inter_pred_ctxt_t *ps_inter_pred_ctxt = (inter_pred_ctxt_t *)pv_inter_pred_ctxt;
381
8.27M
    func_selector_t *ps_func_selector = ps_inter_pred_ctxt->ps_func_selector;
382
383
8.27M
    WORD32 inter_pred_idc = ps_pu->b2_pred_mode;
384
8.27M
    UWORD8 *pu1_dst_buf = (UWORD8 *)pv_dst_buf;
385
8.27M
    WORD32 pu_wd = (ps_pu->b4_wd + 1) << 2;
386
8.27M
    WORD32 pu_ht = (ps_pu->b4_ht + 1) << 2;
387
388
8.27M
    WORD32 wp_flag = ps_inter_pred_ctxt->i1_weighted_pred_flag ||
389
8.27M
                     ps_inter_pred_ctxt->i1_weighted_bipred_flag;
390
391
    /* 16bit dest required for interpolate if weighted pred is on or bipred */
392
8.27M
    WORD32 store_16bit_output;
393
394
8.27M
    recon_pic_buf_t *ps_ref_pic_l0, *ps_ref_pic_l1;
395
8.27M
    UWORD8 *pu1_ref_pic, *pu1_ref_int_pel;
396
8.27M
    WORD32 ref_pic_stride;
397
398
    /* offset of reference block in integer pel units */
399
8.27M
    WORD32 frm_x_ofst, frm_y_ofst;
400
8.27M
    WORD32 frm_x_pu, frm_y_pu;
401
402
    /* scratch 16 bit buffers for interpolation in l0 and l1 direction */
403
8.27M
    WORD16 *pi2_scr_buf_l0 = &ps_inter_pred_ctxt->ai2_scratch_buf_l0[0];
404
8.27M
    WORD16 *pi2_scr_buf_l1 = &ps_inter_pred_ctxt->ai2_scratch_buf_l1[0];
405
406
    /* scratch buffer for horizontal interpolation destination */
407
8.27M
    WORD16 *pi2_horz_scratch = &ps_inter_pred_ctxt->ai2_horz_scratch[0];
408
409
8.27M
    WORD32 wgt0, wgt1, off0, off1, shift, lvl_shift0, lvl_shift1;
410
411
    /* get PU's frm x and frm y offset */
412
8.27M
    frm_x_pu = ps_inter_pred_ctxt->i4_ctb_frm_pos_x + (ps_pu->b4_pos_x << 2);
413
8.27M
    frm_y_pu = ps_inter_pred_ctxt->i4_ctb_frm_pos_y + (ps_pu->b4_pos_y << 2);
414
415
    /* sanity checks */
416
8.27M
    ASSERT((wp_flag == 0) || (wp_flag == 1));
417
8.27M
    ASSERT(dst_stride >= pu_wd);
418
8.27M
    ASSERT(ps_pu->b1_intra_flag == 0);
419
420
8.27M
    lvl_shift0 = 0;
421
8.27M
    lvl_shift1 = 0;
422
423
8.27M
    if(wp_flag)
424
0
    {
425
0
        UWORD8 u1_is_wgt_pred_L0, u1_is_wgt_pred_L1;
426
427
0
        if(inter_pred_idc != PRED_L1)
428
0
        {
429
0
            ps_ref_pic_l0 = ps_inter_pred_ctxt->ps_ref_list[0][ps_pu->mv.i1_l0_ref_idx];
430
0
            u1_is_wgt_pred_L0 = ps_ref_pic_l0->s_weight_offset.u1_luma_weight_enable_flag;
431
0
        }
432
0
        if(inter_pred_idc != PRED_L0)
433
0
        {
434
0
            ps_ref_pic_l1 = ps_inter_pred_ctxt->ps_ref_list[1][ps_pu->mv.i1_l1_ref_idx];
435
0
            u1_is_wgt_pred_L1 = ps_ref_pic_l1->s_weight_offset.u1_luma_weight_enable_flag;
436
0
        }
437
0
        if(inter_pred_idc == PRED_BI)
438
0
        {
439
0
            wp_flag = (u1_is_wgt_pred_L0 || u1_is_wgt_pred_L1);
440
0
        }
441
0
        else if(inter_pred_idc == PRED_L0)
442
0
        {
443
0
            wp_flag = u1_is_wgt_pred_L0;
444
0
        }
445
0
        else if(inter_pred_idc == PRED_L1)
446
0
        {
447
0
            wp_flag = u1_is_wgt_pred_L1;
448
0
        }
449
0
        else
450
0
        {
451
            /*other values are not allowed*/
452
0
            assert(0);
453
0
        }
454
0
    }
455
8.27M
    store_16bit_output = (inter_pred_idc == PRED_BI) || (wp_flag);
456
457
8.27M
    if(inter_pred_idc != PRED_L1)
458
8.07M
    {
459
        /*****************************************************/
460
        /*              L0 inter prediction                  */
461
        /*****************************************************/
462
463
        /* motion vecs in qpel precision                    */
464
8.07M
        WORD32 mv_x = ps_pu->mv.s_l0_mv.i2_mvx;
465
8.07M
        WORD32 mv_y = ps_pu->mv.s_l0_mv.i2_mvy;
466
467
        /* sub pel offsets in x and y direction w.r.t integer pel   */
468
8.07M
        WORD32 dx = mv_x & 0x3;
469
8.07M
        WORD32 dy = mv_y & 0x3;
470
471
        /* ref idx is currently stored in the lower 4bits           */
472
8.07M
        WORD32 ref_idx = (ps_pu->mv.i1_l0_ref_idx);
473
474
        /*  x and y integer offsets w.r.t frame start               */
475
8.07M
        frm_x_ofst = (frm_x_pu + (mv_x >> 2));
476
8.07M
        frm_y_ofst = (frm_y_pu + (mv_y >> 2));
477
478
8.07M
        ps_ref_pic_l0 = ps_inter_pred_ctxt->ps_ref_list[0][ref_idx];
479
480
        /* picture buffer start and stride */
481
8.07M
        if(i4_flag_inter_pred_source == 1)
482
0
        {
483
0
            pu1_ref_pic = (UWORD8 *)ps_ref_pic_l0->s_yuv_buf_desc_src.pv_y_buf;
484
0
        }
485
8.07M
        else
486
8.07M
        {
487
8.07M
            pu1_ref_pic = (UWORD8 *)ps_ref_pic_l0->s_yuv_buf_desc.pv_y_buf;
488
8.07M
        }
489
8.07M
        ref_pic_stride = ps_ref_pic_l0->s_yuv_buf_desc.i4_y_strd;
490
491
        /* Error check for mvs going out of ref frame padded limits */
492
8.07M
        {
493
8.07M
            WORD32 min_x, max_x = ps_ref_pic_l0->s_yuv_buf_desc.i4_y_wd;
494
8.07M
            WORD32 min_y, max_y = ps_ref_pic_l0->s_yuv_buf_desc.i4_y_ht;
495
496
8.07M
            min_x =
497
8.07M
                -(ps_inter_pred_ctxt->ai4_tile_xtra_pel[E_LEFT]
498
8.07M
                      ? (ps_inter_pred_ctxt->ai4_tile_xtra_pel[E_LEFT] - 4)
499
8.07M
                      : (PAD_HORZ - 4));
500
501
8.07M
            max_x += ps_inter_pred_ctxt->ai4_tile_xtra_pel[E_RIGHT]
502
8.07M
                         ? (ps_inter_pred_ctxt->ai4_tile_xtra_pel[E_RIGHT] - 4)
503
8.07M
                         : (PAD_HORZ - 4);
504
505
8.07M
            min_y =
506
8.07M
                -(ps_inter_pred_ctxt->ai4_tile_xtra_pel[E_TOP]
507
8.07M
                      ? (ps_inter_pred_ctxt->ai4_tile_xtra_pel[E_TOP] - 4)
508
8.07M
                      : (PAD_VERT - 4));
509
510
8.07M
            max_y += ps_inter_pred_ctxt->ai4_tile_xtra_pel[E_BOT]
511
8.07M
                         ? (ps_inter_pred_ctxt->ai4_tile_xtra_pel[E_BOT] - 4)
512
8.07M
                         : (PAD_VERT - 4);
513
514
8.07M
            if((frm_x_ofst < min_x) || (frm_x_ofst + pu_wd) > max_x)
515
                //ASSERT(0);
516
1.65k
                return (IV_FAIL);
517
518
8.07M
            if((frm_y_ofst < min_y) || (frm_y_ofst + pu_ht) > max_y)
519
                //ASSERT(0);
520
556
                return (IV_FAIL);
521
8.07M
        }
522
523
        /* point to reference start location in ref frame           */
524
        /* Assuming clipping of mv is not required here as ME would */
525
        /* take care of mv access not going beyond padded data      */
526
8.07M
        pu1_ref_int_pel = pu1_ref_pic + frm_x_ofst + (ref_pic_stride * frm_y_ofst);
527
528
        /* level shifted for subpel with both x and y componenet being non 0 */
529
        /* this is because the interpolate function subtract this to contain */
530
        /* the resulting data in 16 bits                                     */
531
8.07M
        lvl_shift0 = (dx != 0) && (dy != 0) ? OFFSET14 : 0;
532
533
8.07M
        if(store_16bit_output)
534
1.83M
        {
535
            /* do interpolation in 16bit L0 scratch buffer */
536
1.83M
            ihevce_luma_interpolate_16bit_dxdy(
537
1.83M
                pu1_ref_int_pel,
538
1.83M
                pi2_scr_buf_l0,
539
1.83M
                ref_pic_stride,
540
1.83M
                pu_wd,
541
1.83M
                pi2_horz_scratch,
542
1.83M
                pu_ht,
543
1.83M
                pu_wd,
544
1.83M
                dy,
545
1.83M
                dx,
546
1.83M
                ps_func_selector);
547
1.83M
        }
548
6.23M
        else
549
6.23M
        {
550
            /* do interpolation in 8bit destination buffer and return */
551
6.23M
            ihevce_luma_interpolate_8bit_dxdy(
552
6.23M
                pu1_ref_int_pel,
553
6.23M
                pu1_dst_buf,
554
6.23M
                ref_pic_stride,
555
6.23M
                dst_stride,
556
6.23M
                pi2_horz_scratch,
557
6.23M
                pu_ht,
558
6.23M
                pu_wd,
559
6.23M
                dy,
560
6.23M
                dx,
561
6.23M
                ps_func_selector);
562
563
6.23M
            return (IV_SUCCESS);
564
6.23M
        }
565
8.07M
    }
566
567
2.04M
    if(inter_pred_idc != PRED_L0)
568
2.04M
    {
569
        /*****************************************************/
570
        /*      L1 inter prediction                          */
571
        /*****************************************************/
572
573
        /* motion vecs in qpel precision                            */
574
2.04M
        WORD32 mv_x = ps_pu->mv.s_l1_mv.i2_mvx;
575
2.04M
        WORD32 mv_y = ps_pu->mv.s_l1_mv.i2_mvy;
576
577
        /* sub pel offsets in x and y direction w.r.t integer pel   */
578
2.04M
        WORD32 dx = mv_x & 0x3;
579
2.04M
        WORD32 dy = mv_y & 0x3;
580
581
        /* ref idx is currently stored in the lower 4bits           */
582
2.04M
        WORD32 ref_idx = (ps_pu->mv.i1_l1_ref_idx);
583
584
        /*  x and y integer offsets w.r.t frame start               */
585
2.04M
        frm_x_ofst = (frm_x_pu + (mv_x >> 2));
586
2.04M
        frm_y_ofst = (frm_y_pu + (mv_y >> 2));
587
588
2.04M
        ps_ref_pic_l1 = ps_inter_pred_ctxt->ps_ref_list[1][ref_idx];
589
590
        /* picture buffer start and stride */
591
592
2.04M
        if(i4_flag_inter_pred_source == 1)
593
0
        {
594
0
            pu1_ref_pic = (UWORD8 *)ps_ref_pic_l1->s_yuv_buf_desc_src.pv_y_buf;
595
0
        }
596
2.04M
        else
597
2.04M
        {
598
2.04M
            pu1_ref_pic = (UWORD8 *)ps_ref_pic_l1->s_yuv_buf_desc.pv_y_buf;
599
2.04M
        }
600
2.04M
        ref_pic_stride = ps_ref_pic_l1->s_yuv_buf_desc.i4_y_strd;
601
602
        /* Error check for mvs going out of ref frame padded limits */
603
2.04M
        {
604
2.04M
            WORD32 min_x, max_x = ps_ref_pic_l1->s_yuv_buf_desc.i4_y_wd;
605
2.04M
            WORD32 min_y, max_y = ps_ref_pic_l1->s_yuv_buf_desc.i4_y_ht;
606
607
2.04M
            min_x =
608
2.04M
                -(ps_inter_pred_ctxt->ai4_tile_xtra_pel[E_LEFT]
609
2.04M
                      ? (ps_inter_pred_ctxt->ai4_tile_xtra_pel[E_LEFT] - 4)
610
2.04M
                      : (PAD_HORZ - 4));
611
612
2.04M
            max_x += ps_inter_pred_ctxt->ai4_tile_xtra_pel[E_RIGHT]
613
2.04M
                         ? (ps_inter_pred_ctxt->ai4_tile_xtra_pel[E_RIGHT] - 4)
614
2.04M
                         : (PAD_HORZ - 4);
615
616
2.04M
            min_y =
617
2.04M
                -(ps_inter_pred_ctxt->ai4_tile_xtra_pel[E_TOP]
618
2.04M
                      ? (ps_inter_pred_ctxt->ai4_tile_xtra_pel[E_TOP] - 4)
619
2.04M
                      : (PAD_VERT - 4));
620
621
2.04M
            max_y += ps_inter_pred_ctxt->ai4_tile_xtra_pel[E_BOT]
622
2.04M
                         ? (ps_inter_pred_ctxt->ai4_tile_xtra_pel[E_BOT] - 4)
623
2.04M
                         : (PAD_VERT - 4);
624
625
2.04M
            if((frm_x_ofst < min_x) || (frm_x_ofst + pu_wd) > max_x)
626
                //ASSERT(0);
627
363
                return (IV_FAIL);
628
629
2.03M
            if((frm_y_ofst < min_y) || (frm_y_ofst + pu_ht) > max_y)
630
                //ASSERT(0);
631
176
                return (IV_FAIL);
632
2.03M
        }
633
634
        /* point to reference start location in ref frame           */
635
        /* Assuming clipping of mv is not required here as ME would */
636
        /* take care of mv access not going beyond padded data      */
637
2.03M
        pu1_ref_int_pel = pu1_ref_pic + frm_x_ofst + (ref_pic_stride * frm_y_ofst);
638
639
        /* level shifted for subpel with both x and y componenet being non 0 */
640
        /* this is because the interpolate function subtract this to contain */
641
        /* the resulting data in 16 bits                                     */
642
2.03M
        lvl_shift1 = (dx != 0) && (dy != 0) ? OFFSET14 : 0;
643
644
2.03M
        if(store_16bit_output)
645
1.83M
        {
646
            /* do interpolation in 16bit L1 scratch buffer */
647
1.83M
            ihevce_luma_interpolate_16bit_dxdy(
648
1.83M
                pu1_ref_int_pel,
649
1.83M
                pi2_scr_buf_l1,
650
1.83M
                ref_pic_stride,
651
1.83M
                pu_wd,
652
1.83M
                pi2_horz_scratch,
653
1.83M
                pu_ht,
654
1.83M
                pu_wd,
655
1.83M
                dy,
656
1.83M
                dx,
657
1.83M
                ps_func_selector);
658
1.83M
        }
659
200k
        else
660
200k
        {
661
            /* do interpolation in 8bit destination buffer and return */
662
200k
            ihevce_luma_interpolate_8bit_dxdy(
663
200k
                pu1_ref_int_pel,
664
200k
                pu1_dst_buf,
665
200k
                ref_pic_stride,
666
200k
                dst_stride,
667
200k
                pi2_horz_scratch,
668
200k
                pu_ht,
669
200k
                pu_wd,
670
200k
                dy,
671
200k
                dx,
672
200k
                ps_func_selector);
673
674
200k
            return (IV_SUCCESS);
675
200k
        }
676
2.03M
    }
677
678
1.83M
    if((inter_pred_idc != PRED_BI) && wp_flag)
679
0
    {
680
        /*****************************************************/
681
        /*      unidirection weighted prediction             */
682
        /*****************************************************/
683
0
        ihevce_wght_offst_t *ps_weight_offset;
684
0
        WORD16 *pi2_src;
685
0
        WORD32 lvl_shift;
686
687
        /* intialize the weight, offsets and ref based on l0/l1 mode */
688
0
        if(inter_pred_idc == PRED_L0)
689
0
        {
690
0
            pi2_src = pi2_scr_buf_l0;
691
0
            ps_weight_offset = &ps_ref_pic_l0->s_weight_offset;
692
0
            lvl_shift = lvl_shift0;
693
0
        }
694
0
        else
695
0
        {
696
0
            pi2_src = pi2_scr_buf_l1;
697
0
            ps_weight_offset = &ps_ref_pic_l1->s_weight_offset;
698
0
            lvl_shift = lvl_shift1;
699
0
        }
700
701
0
        wgt0 = ps_weight_offset->i2_luma_weight;
702
0
        off0 = ps_weight_offset->i2_luma_offset;
703
0
        shift = ps_inter_pred_ctxt->i4_log2_luma_wght_denom + SHIFT_14_MINUS_BIT_DEPTH;
704
705
        /* do the uni directional weighted prediction */
706
0
        ps_func_selector->ihevc_weighted_pred_uni_fptr(
707
0
            pi2_src, pu1_dst_buf, pu_wd, dst_stride, wgt0, off0, shift, lvl_shift, pu_ht, pu_wd);
708
0
    }
709
1.83M
    else
710
1.83M
    {
711
        /*****************************************************/
712
        /*              Bipred  prediction                   */
713
        /*****************************************************/
714
715
1.83M
        if(wp_flag)
716
0
        {
717
            /*****************************************************/
718
            /*      Bi pred  weighted prediction                 */
719
            /*****************************************************/
720
0
            wgt0 = ps_ref_pic_l0->s_weight_offset.i2_luma_weight;
721
0
            off0 = ps_ref_pic_l0->s_weight_offset.i2_luma_offset;
722
723
0
            wgt1 = ps_ref_pic_l1->s_weight_offset.i2_luma_weight;
724
0
            off1 = ps_ref_pic_l1->s_weight_offset.i2_luma_offset;
725
726
0
            shift = ps_inter_pred_ctxt->i4_log2_luma_wght_denom + SHIFT_14_MINUS_BIT_DEPTH + 1;
727
728
0
            ps_func_selector->ihevc_weighted_pred_bi_fptr(
729
0
                pi2_scr_buf_l0,
730
0
                pi2_scr_buf_l1,
731
0
                pu1_dst_buf,
732
0
                pu_wd,
733
0
                pu_wd,
734
0
                dst_stride,
735
0
                wgt0,
736
0
                off0,
737
0
                wgt1,
738
0
                off1,
739
0
                shift,
740
0
                lvl_shift0,
741
0
                lvl_shift1,
742
0
                pu_ht,
743
0
                pu_wd);
744
0
        }
745
1.83M
        else
746
1.83M
        {
747
            /*****************************************************/
748
            /*          Default Bi pred  prediction              */
749
            /*****************************************************/
750
1.83M
            ps_func_selector->ihevc_weighted_pred_bi_default_fptr(
751
1.83M
                pi2_scr_buf_l0,
752
1.83M
                pi2_scr_buf_l1,
753
1.83M
                pu1_dst_buf,
754
1.83M
                pu_wd,
755
1.83M
                pu_wd,
756
1.83M
                dst_stride,
757
1.83M
                lvl_shift0,
758
1.83M
                lvl_shift1,
759
1.83M
                pu_ht,
760
1.83M
                pu_wd);
761
1.83M
        }
762
1.83M
    }
763
764
1.83M
    return (IV_SUCCESS);
765
2.04M
}
766
767
/**
768
*******************************************************************************
769
*
770
* @brief
771
*  Performs Chroma inter pred based on sub pel position dxdy and store the
772
*  result in a 16 bit destination buffer
773
*
774
* @param[in] pu1_src
775
*  pointer to the source correspoding to integer pel position of a mv (left and
776
*  top justified integer position)
777
*
778
* @param[out] pi2_dst
779
*  WORD16 pointer to the destination
780
*
781
* @param[in] src_strd
782
*  source buffer stride
783
*
784
* @param[in] dst_strd
785
*  destination buffer stride
786
*
787
* @param[in] pi2_hdst_scratch
788
*  scratch buffer for intermediate storage of horizontal filter output; used as
789
*  input for vertical filtering when sub pel components (dx != 0) && (dy != 0)
790
*
791
*  Max scratch buffer required is w * (h + 3) * sizeof(WORD16)
792
*
793
* @param[in] ht
794
*  width of the prediction unit
795
*
796
* @param[in] wd
797
*  width of the prediction unit
798
*
799
* @param[in] dx
800
*  1/8th pel position[0:7] of mv in x direction
801
*
802
* @param[in] dy
803
*  1/8th pel position[0:7] of mv in y direction
804
*
805
* @returns
806
*   none
807
*
808
* @remarks
809
*
810
*******************************************************************************
811
*/
812
void ihevce_chroma_interpolate_16bit_dxdy(
813
    UWORD8 *pu1_src,
814
    WORD16 *pi2_dst,
815
    WORD32 src_strd,
816
    WORD32 dst_strd,
817
    WORD16 *pi2_hdst_scratch,
818
    WORD32 ht,
819
    WORD32 wd,
820
    WORD32 dy,
821
    WORD32 dx,
822
    func_selector_t *ps_func_selector)
823
869k
{
824
869k
    if((0 == dx) && (0 == dy))
825
571k
    {
826
        /*--------- full pel position : copy input by upscaling-------*/
827
828
571k
        ps_func_selector->ihevc_inter_pred_chroma_copy_w16out_fptr(
829
571k
            pu1_src, pi2_dst, src_strd, dst_strd, &gai1_hevc_chroma_filter_taps[0][0], ht, wd);
830
571k
    }
831
298k
    else if((0 != dx) && (0 != dy))
832
125k
    {
833
        /*----------sub pel in both x and y direction---------*/
834
835
125k
        UWORD8 *pu1_horz_src = pu1_src - src_strd;
836
125k
        WORD32 hdst_buf_stride = (wd << 1); /* uv interleave */
837
125k
        WORD16 *pi2_vert_src = pi2_hdst_scratch + hdst_buf_stride;
838
839
        /* horizontal filtering of source done in a scratch buffer first  */
840
125k
        ps_func_selector->ihevc_inter_pred_chroma_horz_w16out_fptr(
841
125k
            pu1_horz_src,
842
125k
            pi2_hdst_scratch,
843
125k
            src_strd,
844
125k
            hdst_buf_stride,
845
125k
            &gai1_hevc_chroma_filter_taps[dx][0],
846
125k
            (ht + NTAPS_CHROMA - 1),
847
125k
            wd);
848
849
        /* vertical filtering on scratch buffer and stored in desitnation  */
850
125k
        ps_func_selector->ihevc_inter_pred_chroma_vert_w16inp_w16out_fptr(
851
125k
            pi2_vert_src,
852
125k
            pi2_dst,
853
125k
            hdst_buf_stride,
854
125k
            dst_strd,
855
125k
            &gai1_hevc_chroma_filter_taps[dy][0],
856
125k
            ht,
857
125k
            wd);
858
125k
    }
859
172k
    else if(0 == dy)
860
74.9k
    {
861
        /*----------sub pel in x direction only ---------*/
862
863
74.9k
        ps_func_selector->ihevc_inter_pred_chroma_horz_w16out_fptr(
864
74.9k
            pu1_src, pi2_dst, src_strd, dst_strd, &gai1_hevc_chroma_filter_taps[dx][0], ht, wd);
865
74.9k
    }
866
97.9k
    else /* if (0 == dx) */
867
97.9k
    {
868
        /*----------sub pel in y direction only ---------*/
869
870
97.9k
        ps_func_selector->ihevc_inter_pred_chroma_vert_w16out_fptr(
871
97.9k
            pu1_src, pi2_dst, src_strd, dst_strd, &gai1_hevc_chroma_filter_taps[dy][0], ht, wd);
872
97.9k
    }
873
869k
}
874
875
/**
876
*******************************************************************************
877
*
878
* @brief
879
*  Performs Chroma inter pred based on sub pel position dxdy and store the
880
*  result in a 8 bit destination buffer
881
*
882
* @param[in] pu1_src
883
*  pointer to the source correspoding to integer pel position of a mv (left and
884
*  top justified integer position)
885
*
886
* @param[out] pu1_dst
887
*  UWORD8 pointer to the destination
888
*
889
* @param[in] src_strd
890
*  source buffer stride
891
*
892
* @param[in] dst_strd
893
*  destination buffer stride
894
*
895
* @param[in] pi2_hdst_scratch
896
*  scratch buffer for intermediate storage of horizontal filter output; used as
897
*  input for vertical filtering when sub pel components (dx != 0) && (dy != 0)
898
*
899
*  Max scratch buffer required is w * (h + 3) * sizeof(WORD16)
900
*
901
* @param[in] ht
902
*  width of the prediction unit
903
*
904
* @param[in] wd
905
*  width of the prediction unit
906
*
907
* @param[in] dx
908
*  1/8th pel position[0:7] of mv in x direction
909
*
910
* @param[in] dy
911
*  1/8th pel position[0:7] of mv in y direction
912
*
913
* @returns
914
*   none
915
*
916
* @remarks
917
*
918
*******************************************************************************
919
*/
920
void ihevce_chroma_interpolate_8bit_dxdy(
921
    UWORD8 *pu1_src,
922
    UWORD8 *pu1_dst,
923
    WORD32 src_strd,
924
    WORD32 dst_strd,
925
    WORD16 *pi2_hdst_scratch,
926
    WORD32 ht,
927
    WORD32 wd,
928
    WORD32 dy,
929
    WORD32 dx,
930
    func_selector_t *ps_func_selector)
931
3.41M
{
932
3.41M
    if((0 == dx) && (0 == dy))
933
2.35M
    {
934
        /*--------- full pel position : copy input as is -------*/
935
2.35M
        ps_func_selector->ihevc_inter_pred_chroma_copy_fptr(
936
2.35M
            pu1_src, pu1_dst, src_strd, dst_strd, &gai1_hevc_chroma_filter_taps[0][0], ht, wd);
937
2.35M
    }
938
1.05M
    else if((0 != dx) && (0 != dy))
939
468k
    {
940
        /*----------sub pel in both x and y direction---------*/
941
468k
        UWORD8 *pu1_horz_src = pu1_src - src_strd;
942
468k
        WORD32 hdst_buf_stride = (wd << 1); /* uv interleave */
943
468k
        WORD16 *pi2_vert_src = pi2_hdst_scratch + hdst_buf_stride;
944
945
        /* horizontal filtering of source done in a scratch buffer first  */
946
468k
        ps_func_selector->ihevc_inter_pred_chroma_horz_w16out_fptr(
947
468k
            pu1_horz_src,
948
468k
            pi2_hdst_scratch,
949
468k
            src_strd,
950
468k
            hdst_buf_stride,
951
468k
            &gai1_hevc_chroma_filter_taps[dx][0],
952
468k
            (ht + NTAPS_CHROMA - 1),
953
468k
            wd);
954
955
        /* vertical filtering on scratch buffer and stored in desitnation  */
956
468k
        ps_func_selector->ihevc_inter_pred_chroma_vert_w16inp_fptr(
957
468k
            pi2_vert_src,
958
468k
            pu1_dst,
959
468k
            hdst_buf_stride,
960
468k
            dst_strd,
961
468k
            &gai1_hevc_chroma_filter_taps[dy][0],
962
468k
            ht,
963
468k
            wd);
964
468k
    }
965
586k
    else if(0 == dy)
966
289k
    {
967
        /*----------sub pel in x direction only ---------*/
968
289k
        ps_func_selector->ihevc_inter_pred_chroma_horz_fptr(
969
289k
            pu1_src, pu1_dst, src_strd, dst_strd, &gai1_hevc_chroma_filter_taps[dx][0], ht, wd);
970
289k
    }
971
296k
    else /* if (0 == dx) */
972
296k
    {
973
        /*----------sub pel in y direction only ---------*/
974
296k
        ps_func_selector->ihevc_inter_pred_chroma_vert_fptr(
975
296k
            pu1_src, pu1_dst, src_strd, dst_strd, &gai1_hevc_chroma_filter_taps[dy][0], ht, wd);
976
296k
    }
977
3.41M
}
978
979
/**
980
*******************************************************************************
981
*
982
* @brief
983
*  Performs Chroma prediction for a inter prediction unit(PU)
984
*
985
* @par Description:
986
*  For a given PU, Inter prediction followed by weighted prediction (if
987
*  required). The reference and destination buffers are uv interleaved
988
*
989
* @param[in] ps_inter_pred_ctxt
990
*  context for inter prediction; contains ref list, weight offsets, ctb offsets
991
*
992
* @param[in] ps_pu
993
*  pointer to PU structure whose inter prediction needs to be done
994
*
995
* @param[in] pu1_dst_buf
996
*  pointer to destination buffer where the inter prediction is done
997
*
998
* @param[in] dst_stride
999
*  pitch of the destination buffer
1000
*
1001
* @returns
1002
*   none
1003
*
1004
* @remarks
1005
*
1006
*******************************************************************************
1007
*/
1008
void ihevce_chroma_inter_pred_pu(
1009
    void *pv_inter_pred_ctxt, pu_t *ps_pu, UWORD8 *pu1_dst_buf, WORD32 dst_stride)
1010
3.84M
{
1011
3.84M
    inter_pred_ctxt_t *ps_inter_pred_ctxt = (inter_pred_ctxt_t *)pv_inter_pred_ctxt;
1012
3.84M
    func_selector_t *ps_func_selector = ps_inter_pred_ctxt->ps_func_selector;
1013
1014
3.84M
    WORD32 inter_pred_idc = ps_pu->b2_pred_mode;
1015
3.84M
    UWORD8 u1_is_422 = (ps_inter_pred_ctxt->u1_chroma_array_type == 2);
1016
    /* chroma width and height are half of luma width and height */
1017
3.84M
    WORD32 pu_wd_chroma = (ps_pu->b4_wd + 1) << 1;
1018
3.84M
    WORD32 pu_ht_chroma = (ps_pu->b4_ht + 1) << (u1_is_422 + 1);
1019
1020
3.84M
    WORD32 wp_flag = ps_inter_pred_ctxt->i1_weighted_pred_flag ||
1021
3.84M
                     ps_inter_pred_ctxt->i1_weighted_bipred_flag;
1022
1023
    /* 16bit dest required for interpolate if weighted pred is on or bipred */
1024
3.84M
    WORD32 store_16bit_output;
1025
1026
3.84M
    recon_pic_buf_t *ps_ref_pic_l0, *ps_ref_pic_l1;
1027
3.84M
    UWORD8 *pu1_ref_pic, *pu1_ref_int_pel;
1028
3.84M
    WORD32 ref_pic_stride;
1029
1030
    /* offset of reference block in integer pel units */
1031
3.84M
    WORD32 frm_x_ofst, frm_y_ofst;
1032
3.84M
    WORD32 frm_x_pu, frm_y_pu;
1033
1034
    /* scratch 16 bit buffers for interpolation in l0 and l1 direction */
1035
3.84M
    WORD16 *pi2_scr_buf_l0 = &ps_inter_pred_ctxt->ai2_scratch_buf_l0[0];
1036
3.84M
    WORD16 *pi2_scr_buf_l1 = &ps_inter_pred_ctxt->ai2_scratch_buf_l1[0];
1037
1038
    /* scratch buffer for horizontal interpolation destination */
1039
3.84M
    WORD16 *pi2_horz_scratch = &ps_inter_pred_ctxt->ai2_horz_scratch[0];
1040
1041
    /* get PU's frm x and frm y offset : Note uv is interleaved */
1042
3.84M
    frm_x_pu = ps_inter_pred_ctxt->i4_ctb_frm_pos_x + (ps_pu->b4_pos_x << 2);
1043
3.84M
    frm_y_pu = (ps_inter_pred_ctxt->i4_ctb_frm_pos_y >> (u1_is_422 == 0)) +
1044
3.84M
               (ps_pu->b4_pos_y << (u1_is_422 + 1));
1045
1046
    /* sanity checks */
1047
3.84M
    ASSERT((wp_flag == 0) || (wp_flag == 1));
1048
3.84M
    ASSERT(dst_stride >= (pu_wd_chroma << 1)); /* uv interleaved */
1049
3.84M
    ASSERT(ps_pu->b1_intra_flag == 0);
1050
1051
3.84M
    if(wp_flag)
1052
0
    {
1053
0
        UWORD8 u1_is_wgt_pred_L0, u1_is_wgt_pred_L1;
1054
1055
0
        if(inter_pred_idc != PRED_L1)
1056
0
        {
1057
0
            ps_ref_pic_l0 = ps_inter_pred_ctxt->ps_ref_list[0][ps_pu->mv.i1_l0_ref_idx];
1058
0
            u1_is_wgt_pred_L0 = ps_ref_pic_l0->s_weight_offset.u1_chroma_weight_enable_flag;
1059
0
        }
1060
0
        if(inter_pred_idc != PRED_L0)
1061
0
        {
1062
0
            ps_ref_pic_l1 = ps_inter_pred_ctxt->ps_ref_list[1][ps_pu->mv.i1_l1_ref_idx];
1063
0
            u1_is_wgt_pred_L1 = ps_ref_pic_l1->s_weight_offset.u1_chroma_weight_enable_flag;
1064
0
        }
1065
0
        if(inter_pred_idc == PRED_BI)
1066
0
        {
1067
0
            wp_flag = (u1_is_wgt_pred_L0 || u1_is_wgt_pred_L1);
1068
0
        }
1069
0
        else if(inter_pred_idc == PRED_L0)
1070
0
        {
1071
0
            wp_flag = u1_is_wgt_pred_L0;
1072
0
        }
1073
0
        else if(inter_pred_idc == PRED_L1)
1074
0
        {
1075
0
            wp_flag = u1_is_wgt_pred_L1;
1076
0
        }
1077
0
        else
1078
0
        {
1079
            /*other values are not allowed*/
1080
0
            assert(0);
1081
0
        }
1082
0
    }
1083
3.84M
    store_16bit_output = (inter_pred_idc == PRED_BI) || (wp_flag);
1084
1085
3.84M
    if(inter_pred_idc != PRED_L1)
1086
3.62M
    {
1087
        /*****************************************************/
1088
        /*              L0 inter prediction(Chroma )         */
1089
        /*****************************************************/
1090
1091
        /* motion vecs in qpel precision                    */
1092
3.62M
        WORD32 mv_x = ps_pu->mv.s_l0_mv.i2_mvx;
1093
3.62M
        WORD32 mv_y = ps_pu->mv.s_l0_mv.i2_mvy;
1094
1095
        /* sub pel offsets in x and y direction w.r.t integer pel   */
1096
3.62M
        WORD32 dx = mv_x & 0x7;
1097
3.62M
        WORD32 dy = (mv_y & ((1 << (!u1_is_422 + 2)) - 1)) << u1_is_422;
1098
1099
        /* ref idx is currently stored in the lower 4bits           */
1100
3.62M
        WORD32 ref_idx = (ps_pu->mv.i1_l0_ref_idx);
1101
1102
        /*  x and y integer offsets w.r.t frame start               */
1103
1104
3.62M
        frm_x_ofst = (frm_x_pu + ((mv_x >> 3) << 1)); /* uv interleaved */
1105
3.62M
        frm_y_ofst = (frm_y_pu + ((mv_y >> (3 - u1_is_422))));
1106
1107
3.62M
        ps_ref_pic_l0 = ps_inter_pred_ctxt->ps_ref_list[0][ref_idx];
1108
1109
        /* picture buffer start and stride */
1110
3.62M
        pu1_ref_pic = (UWORD8 *)ps_ref_pic_l0->s_yuv_buf_desc.pv_u_buf;
1111
3.62M
        ref_pic_stride = ps_ref_pic_l0->s_yuv_buf_desc.i4_uv_strd;
1112
1113
        /* point to reference start location in ref frame           */
1114
        /* Assuming clipping of mv is not required here as ME would */
1115
        /* take care of mv access not going beyond padded data      */
1116
3.62M
        pu1_ref_int_pel = pu1_ref_pic + frm_x_ofst + (ref_pic_stride * frm_y_ofst);
1117
1118
3.62M
        if(store_16bit_output)
1119
434k
        {
1120
            /* do interpolation in 16bit L0 scratch buffer */
1121
434k
            ihevce_chroma_interpolate_16bit_dxdy(
1122
434k
                pu1_ref_int_pel,
1123
434k
                pi2_scr_buf_l0,
1124
434k
                ref_pic_stride,
1125
434k
                (pu_wd_chroma << 1),
1126
434k
                pi2_horz_scratch,
1127
434k
                pu_ht_chroma,
1128
434k
                pu_wd_chroma,
1129
434k
                dy,
1130
434k
                dx,
1131
434k
                ps_func_selector);
1132
434k
        }
1133
3.18M
        else
1134
3.18M
        {
1135
            /* do interpolation in 8bit destination buffer and return */
1136
3.18M
            ihevce_chroma_interpolate_8bit_dxdy(
1137
3.18M
                pu1_ref_int_pel,
1138
3.18M
                pu1_dst_buf,
1139
3.18M
                ref_pic_stride,
1140
3.18M
                dst_stride,
1141
3.18M
                pi2_horz_scratch,
1142
3.18M
                pu_ht_chroma,
1143
3.18M
                pu_wd_chroma,
1144
3.18M
                dy,
1145
3.18M
                dx,
1146
3.18M
                ps_func_selector);
1147
1148
3.18M
            return;
1149
3.18M
        }
1150
3.62M
    }
1151
1152
661k
    if(inter_pred_idc != PRED_L0)
1153
661k
    {
1154
        /*****************************************************/
1155
        /*      L1 inter prediction(Chroma)                  */
1156
        /*****************************************************/
1157
1158
        /* motion vecs in qpel precision                            */
1159
661k
        WORD32 mv_x = ps_pu->mv.s_l1_mv.i2_mvx;
1160
661k
        WORD32 mv_y = ps_pu->mv.s_l1_mv.i2_mvy;
1161
1162
        /* sub pel offsets in x and y direction w.r.t integer pel   */
1163
661k
        WORD32 dx = mv_x & 0x7;
1164
661k
        WORD32 dy = (mv_y & ((1 << (!u1_is_422 + 2)) - 1)) << u1_is_422;
1165
1166
        /* ref idx is currently stored in the lower 4bits           */
1167
661k
        WORD32 ref_idx = (ps_pu->mv.i1_l1_ref_idx);
1168
1169
        /*  x and y integer offsets w.r.t frame start               */
1170
661k
        frm_x_ofst = (frm_x_pu + ((mv_x >> 3) << 1)); /* uv interleaved */
1171
661k
        frm_y_ofst = (frm_y_pu + ((mv_y >> (3 - u1_is_422))));
1172
1173
661k
        ps_ref_pic_l1 = ps_inter_pred_ctxt->ps_ref_list[1][ref_idx];
1174
1175
        /* picture buffer start and stride */
1176
661k
        pu1_ref_pic = (UWORD8 *)ps_ref_pic_l1->s_yuv_buf_desc.pv_u_buf;
1177
661k
        ref_pic_stride = ps_ref_pic_l1->s_yuv_buf_desc.i4_uv_strd;
1178
1179
        /* point to reference start location in ref frame           */
1180
        /* Assuming clipping of mv is not required here as ME would */
1181
        /* take care of mv access not going beyond padded data      */
1182
661k
        pu1_ref_int_pel = pu1_ref_pic + frm_x_ofst + (ref_pic_stride * frm_y_ofst);
1183
1184
661k
        if(store_16bit_output)
1185
434k
        {
1186
            /* do interpolation in 16bit L1 scratch buffer */
1187
434k
            ihevce_chroma_interpolate_16bit_dxdy(
1188
434k
                pu1_ref_int_pel,
1189
434k
                pi2_scr_buf_l1,
1190
434k
                ref_pic_stride,
1191
434k
                (pu_wd_chroma << 1),
1192
434k
                pi2_horz_scratch,
1193
434k
                pu_ht_chroma,
1194
434k
                pu_wd_chroma,
1195
434k
                dy,
1196
434k
                dx,
1197
434k
                ps_func_selector);
1198
434k
        }
1199
226k
        else
1200
226k
        {
1201
            /* do interpolation in 8bit destination buffer and return */
1202
226k
            ihevce_chroma_interpolate_8bit_dxdy(
1203
226k
                pu1_ref_int_pel,
1204
226k
                pu1_dst_buf,
1205
226k
                ref_pic_stride,
1206
226k
                dst_stride,
1207
226k
                pi2_horz_scratch,
1208
226k
                pu_ht_chroma,
1209
226k
                pu_wd_chroma,
1210
226k
                dy,
1211
226k
                dx,
1212
226k
                ps_func_selector);
1213
1214
226k
            return;
1215
226k
        }
1216
661k
    }
1217
1218
434k
    if((inter_pred_idc != PRED_BI) && wp_flag)
1219
0
    {
1220
        /*****************************************************/
1221
        /*      unidirection weighted prediction(Chroma)     */
1222
        /*****************************************************/
1223
0
        ihevce_wght_offst_t *ps_weight_offset;
1224
0
        WORD16 *pi2_src;
1225
0
        WORD32 lvl_shift = 0;
1226
0
        WORD32 wgt_cb, wgt_cr, off_cb, off_cr;
1227
0
        WORD32 shift;
1228
1229
        /* intialize the weight, offsets and ref based on l0/l1 mode */
1230
0
        if(inter_pred_idc == PRED_L0)
1231
0
        {
1232
0
            pi2_src = pi2_scr_buf_l0;
1233
0
            ps_weight_offset = &ps_ref_pic_l0->s_weight_offset;
1234
0
        }
1235
0
        else
1236
0
        {
1237
0
            pi2_src = pi2_scr_buf_l1;
1238
0
            ps_weight_offset = &ps_ref_pic_l1->s_weight_offset;
1239
0
        }
1240
1241
0
        wgt_cb = ps_weight_offset->i2_cb_weight;
1242
0
        off_cb = ps_weight_offset->i2_cb_offset;
1243
0
        wgt_cr = ps_weight_offset->i2_cr_weight;
1244
0
        off_cr = ps_weight_offset->i2_cr_offset;
1245
1246
0
        shift = ps_inter_pred_ctxt->i4_log2_chroma_wght_denom + SHIFT_14_MINUS_BIT_DEPTH;
1247
1248
        /* do the uni directional weighted prediction */
1249
0
        ps_func_selector->ihevc_weighted_pred_chroma_uni_fptr(
1250
0
            pi2_src,
1251
0
            pu1_dst_buf,
1252
0
            (pu_wd_chroma << 1),
1253
0
            dst_stride,
1254
0
            wgt_cb,
1255
0
            wgt_cr,
1256
0
            off_cb,
1257
0
            off_cr,
1258
0
            shift,
1259
0
            lvl_shift,
1260
0
            pu_ht_chroma,
1261
0
            pu_wd_chroma);
1262
0
    }
1263
434k
    else
1264
434k
    {
1265
        /*****************************************************/
1266
        /*              Bipred  prediction(Chroma)           */
1267
        /*****************************************************/
1268
434k
        if(wp_flag)
1269
0
        {
1270
0
            WORD32 wgt0_cb, wgt1_cb, wgt0_cr, wgt1_cr;
1271
0
            WORD32 off0_cb, off1_cb, off0_cr, off1_cr;
1272
0
            WORD32 shift;
1273
1274
            /*****************************************************/
1275
            /*      Bi pred  weighted prediction (Chroma)        */
1276
            /*****************************************************/
1277
0
            wgt0_cb = ps_ref_pic_l0->s_weight_offset.i2_cb_weight;
1278
0
            off0_cb = ps_ref_pic_l0->s_weight_offset.i2_cb_offset;
1279
1280
0
            wgt0_cr = ps_ref_pic_l0->s_weight_offset.i2_cr_weight;
1281
0
            off0_cr = ps_ref_pic_l0->s_weight_offset.i2_cr_offset;
1282
1283
0
            wgt1_cb = ps_ref_pic_l1->s_weight_offset.i2_cb_weight;
1284
0
            off1_cb = ps_ref_pic_l1->s_weight_offset.i2_cb_offset;
1285
1286
0
            wgt1_cr = ps_ref_pic_l1->s_weight_offset.i2_cr_weight;
1287
0
            off1_cr = ps_ref_pic_l1->s_weight_offset.i2_cr_offset;
1288
1289
0
            shift = ps_inter_pred_ctxt->i4_log2_chroma_wght_denom + SHIFT_14_MINUS_BIT_DEPTH + 1;
1290
1291
0
            ps_func_selector->ihevc_weighted_pred_chroma_bi_fptr(
1292
0
                pi2_scr_buf_l0,
1293
0
                pi2_scr_buf_l1,
1294
0
                pu1_dst_buf,
1295
0
                (pu_wd_chroma << 1),
1296
0
                (pu_wd_chroma << 1),
1297
0
                dst_stride,
1298
0
                wgt0_cb,
1299
0
                wgt0_cr,
1300
0
                off0_cb,
1301
0
                off0_cr,
1302
0
                wgt1_cb,
1303
0
                wgt1_cr,
1304
0
                off1_cb,
1305
0
                off1_cr,
1306
0
                shift,
1307
0
                0,
1308
0
                0,
1309
0
                pu_ht_chroma,
1310
0
                pu_wd_chroma);
1311
0
        }
1312
434k
        else
1313
434k
        {
1314
            /*****************************************************/
1315
            /*          Default Bi pred  prediction (Chroma)     */
1316
            /*****************************************************/
1317
434k
            ps_func_selector->ihevc_weighted_pred_chroma_bi_default_fptr(
1318
434k
                pi2_scr_buf_l0,
1319
434k
                pi2_scr_buf_l1,
1320
434k
                pu1_dst_buf,
1321
434k
                (pu_wd_chroma << 1),
1322
434k
                (pu_wd_chroma << 1),
1323
434k
                dst_stride,
1324
434k
                0,
1325
434k
                0,
1326
434k
                pu_ht_chroma,
1327
434k
                pu_wd_chroma);
1328
434k
        }
1329
434k
    }
1330
434k
}