/src/libavc/common/ih264_weighted_pred.c

Source
/******************************************************************************
 *
 * Copyright (C) 2015 The Android Open Source Project
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at:
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 *
 *****************************************************************************
 * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
*/

/**
*******************************************************************************
* @file
*  ih264_weighted_pred.c
*
* @brief
*  Contains function definitions for weighted prediction functions
*
* @author
*  ittiam
*
* @par List of Functions:
*  - ih264_default_weighted_pred_luma
*  - ih264_default_weighted_pred_chroma
*  - ih264_weighted_pred_luma
*  - ih264_weighted_pred_chroma
*  - ih264_weighted_bipred_luma
*  - ih264_weighted_bipred_chroma
*
* @remarks
*
*******************************************************************************
*/

/*****************************************************************************/
/* File Includes                                                             */
/*****************************************************************************/

/* User Include Files */
#include "ih264_typedefs.h"
#include "ih264_macros.h"
#include "ih264_weighted_pred.h"
#include "ih264_platform_macros.h"


/*****************************************************************************/
/*  Function definitions                                                     */
/*****************************************************************************/

/**
*******************************************************************************
*
* @brief default weighted prediction luma.
*
* @par Description
*  This function performs the default weighted prediction as described in
*  sec 8.4.2.3.1 titled "Default weighted sample prediction process" for luma.
*  The function gets two ht x wd blocks, calculates their rounded-average and
*  stores it in the destination block. (ht,wd) can be (4,4), (8,4), (4,8),
*  (8,8), (16,8), (8,16) or (16,16)
*
* @param[in] pu1_src1
*  Pointer to source 1
*
* @param[in] pu1_src2
*  Pointer to source 2
*
* @param[in] pu1_dst
*  Pointer to destination
*
* @param[in] src_strd1
*  stride for source 1
*
* @param[in] src_strd2
*  stride for source 2
*
* @param[in] dst_strd
*  stride for destination
*
* @param[in] ht
*  height of the block
*
* @param[in] wd
*  width of the block
*
* @returns none
*
* @remarks none
*
*******************************************************************************
*/
void ih264_default_weighted_pred_luma(UWORD8 *pu1_src1,
                                      UWORD8 *pu1_src2,
                                      UWORD8 *pu1_dst,
                                      WORD32 src_strd1,
                                      WORD32 src_strd2,
                                      WORD32 dst_strd,
                                      WORD32 ht,
                                      WORD32 wd)
{
    WORD32 i, j;

    src_strd1 -= wd;
    src_strd2 -= wd;
    dst_strd -= wd;

    for(i = 0; i < ht; i++)
    {
        for(j = 0; j < wd; j++, pu1_src1++, pu1_src2++, pu1_dst++)
            *pu1_dst = (*pu1_src1 + *pu1_src2 + 1) >> 1;

        pu1_src1 += src_strd1;
        pu1_src2 += src_strd2;
        pu1_dst += dst_strd;
    }
}

/**
*******************************************************************************
*
* @brief default weighted prediction chroma.
*
* @par Description
*  This function performs the default weighted prediction as described in
*  sec 8.4.2.3.1 titled "Default weighted sample prediction process" for chroma.
*  The function gets two ht x wd blocks, calculates their rounded-average and
*  stores it in the destination block. (ht,wd) can be (2,2), (4,2), (2,4),
*  (4,4), (8,4), (4,8) or (8,8).
*
* @param[in] pu1_src1
*  Pointer to source 1
*
* @param[in] pu1_src2
*  Pointer to source 2
*
* @param[in] pu1_dst
*  Pointer to destination
*
* @param[in] src_strd1
*  stride for source 1
*
* @param[in] src_strd2
*  stride for source 2
*
* @param[in] dst_strd
*  stride for destination
*
* @param[in] ht
*  height of the block
*
* @param[in] wd
*  width of the block
*
* @returns none
*
* @remarks none
*
*******************************************************************************
*/
void ih264_default_weighted_pred_chroma(UWORD8 *pu1_src1,
                                        UWORD8 *pu1_src2,
                                        UWORD8 *pu1_dst,
                                        WORD32 src_strd1,
                                        WORD32 src_strd2,
                                        WORD32 dst_strd,
                                        WORD32 ht,
                                        WORD32 wd)
{
    WORD32 i, j;

    wd = wd << 1;

    src_strd1 -= wd;
    src_strd2 -= wd;
    dst_strd -= wd;

    for(i = 0; i < ht; i++)
    {
        for(j = 0; j < wd; j++, pu1_src1++, pu1_src2++, pu1_dst++)
            *pu1_dst = (*pu1_src1 + *pu1_src2 + 1) >> 1;

        pu1_src1 += src_strd1;
        pu1_src2 += src_strd2;
        pu1_dst += dst_strd;
    }
}

/**
*******************************************************************************
*
* @brief weighted prediction luma.
*
* @par Description
*  This function performs the weighted prediction as described in
*  sec 8.4.2.3.2 titled "weighted sample prediction process" for luma.
*  The function gets one ht x wd block, weights it, rounds it off, offsets it,
*  saturates it to unsigned 8-bit and stores it in the destination block.
*  (ht,wd) can be (4,4), (8,4), (4,8), (8,8), (16,8), (8,16) or (16,16)
*
* @param[in] pu1_src
*  Pointer to source
*
* @param[in] pu1_dst
*  Pointer to destination
*
* @param[in] src_strd
*  stride for source
*
* @param[in] dst_strd
*  stride for destination
*
* @param[in] log_wd
*  number of bits to be rounded off
*
* @param[in] wt
*  weight value
*
* @param[in] ofst
*  offset value
*
* @param[in] ht
*  height of the block
*
* @param[in] wd
*  width of the block
*
* @returns none
*
* @remarks none
*
*******************************************************************************
*/
void ih264_weighted_pred_luma(UWORD8 *pu1_src,
                              UWORD8 *pu1_dst,
                              WORD32 src_strd,
                              WORD32 dst_strd,
                              WORD32 log_wd,
                              WORD32 wt,
                              WORD32 ofst,
                              WORD32 ht,
                              WORD32 wd)
{
    WORD32 i, j;

    wt = (WORD16)(wt & 0xffff);
    ofst = (WORD8)(ofst & 0xff);

    src_strd -= wd;
    dst_strd -= wd;

    if(log_wd >= 1)
    {
        WORD32 i_ofst = (1 << (log_wd - 1)) + (ofst << log_wd);
        for(i = 0; i < ht; i++)
        {
            for(j = 0; j < wd; j++, pu1_src++, pu1_dst++)
                *pu1_dst = CLIP_U8((wt * (*pu1_src) + i_ofst) >> log_wd);

            pu1_src += src_strd;
            pu1_dst += dst_strd;
        }
    }
    else
    {
        for(i = 0; i < ht; i++)
        {
            for(j = 0; j < wd; j++, pu1_src++, pu1_dst++)
                *pu1_dst = CLIP_U8(wt * (*pu1_src) + ofst);

            pu1_src += src_strd;
            pu1_dst += dst_strd;
        }
    }
}

/**
*******************************************************************************
*
* @brief weighted prediction chroma.
*
* @par Description
*  This function performs the weighted prediction as described in
*  sec 8.4.2.3.2 titled "weighted sample prediction process" for chroma.
*  The function gets one ht x wd block, weights it, rounds it off, offsets it,
*  saturates it to unsigned 8-bit and stores it in the destination block.
*  (ht,wd) can be (2,2), (4,2), (2,4), (4,4), (8,4), (4,8) or (8,8).
*
* @param[in] pu1_src
*  Pointer to source
*
* @param[in] pu1_dst
*  Pointer to destination
*
* @param[in] src_strd
*  stride for source
*
* @param[in] dst_strd
*  stride for destination
*
* @param[in] log_wd
*  number of bits to be rounded off
*
* @param[in] wt
*  weight values for u and v
*
* @param[in] ofst
*  offset values for u and v
*
* @param[in] ht
*  height of the block
*
* @param[in] wd
*  width of the block
*
* @returns none
*
* @remarks none
*
*******************************************************************************
*/
void ih264_weighted_pred_chroma(UWORD8 *pu1_src,
                                UWORD8 *pu1_dst,
                                WORD32 src_strd,
                                WORD32 dst_strd,
                                WORD32 log_wd,
                                WORD32 wt,
                                WORD32 ofst,
                                WORD32 ht,
                                WORD32 wd)
{
    WORD32 i, j;
    WORD32 wt_u, wt_v;
    WORD32 ofst_u, ofst_v;

    wt_u = (WORD16)(wt & 0xffff);
    wt_v = (WORD16)(wt >> 16);

    ofst_u = (WORD8)(ofst & 0xff);
    ofst_v = (WORD8)(ofst >> 8);

    src_strd -= wd << 1;
    dst_strd -= wd << 1;

    if(log_wd >= 1)
    {
        ofst_u = (1 << (log_wd - 1)) + (ofst_u << log_wd);
        ofst_v = (1 << (log_wd - 1)) + (ofst_v << log_wd);

        for(i = 0; i < ht; i++)
        {
            for(j = 0; j < wd; j++, pu1_src++, pu1_dst++)
            {
                *pu1_dst = CLIP_U8((wt_u * (*pu1_src) + ofst_u) >> log_wd);
                pu1_src++;
                pu1_dst++;
                *pu1_dst = CLIP_U8((wt_v * (*pu1_src) + ofst_v) >> log_wd);
            }
            pu1_src += src_strd;
            pu1_dst += dst_strd;
        }
    }
    else
    {
        for(i = 0; i < ht; i++)
        {
            for(j = 0; j < wd; j++, pu1_src++, pu1_dst++)
            {
                *pu1_dst = CLIP_U8(wt_u * (*pu1_src) + ofst_u);
                pu1_src++;
                pu1_dst++;
                *pu1_dst = CLIP_U8(wt_v * (*pu1_src) + ofst_v);
            }
            pu1_src += src_strd;
            pu1_dst += dst_strd;
        }
    }
}

/**
*******************************************************************************
*
* @brief weighted bi-prediction luma.
*
* @par Description
*  This function performs the weighted biprediction as described in
*  sec 8.4.2.3.2 titled "weighted sample prediction process" for luma.
*  The function gets two ht x wd blocks, weights them, adds them, rounds off
*  the sum, offsets it, saturates it to unsigned 8-bit and stores it in the
*  destination block. (ht,wd) can be (4,4), (8,4), (4,8), (8,8), (16,8), (8,16)
*   or (16,16)
*
* @param[in] pu1_src1
*  Pointer to source 1
*
* @param[in] pu1_src2
*  Pointer to source 2
*
* @param[in] pu1_dst
*  Pointer to destination
*
* @param[in] src_strd1
*  stride for source 1
*
* @param[in] src_strd2
*  stride for source 2
*
* @param[in] dst_strd
*  stride for destination
*
* @param[in] log_wd
*  number of bits to be rounded off
*
* @param[in] wt1
*  weight value for source 1
*
* @param[in] wt2
*  weight value for source 2
*
* @param[in] ofst1
*  offset value for source 1
*
* @param[in] ofst2
*  offset value for source 2
*
* @param[in] ht
*  height of the block
*
* @param[in] wd
*  width of the block
*
* @returns none
*
* @remarks none
*
*******************************************************************************
*/
void ih264_weighted_bi_pred_luma(UWORD8 *pu1_src1,
                                 UWORD8 *pu1_src2,
                                 UWORD8 *pu1_dst,
                                 WORD32 src_strd1,
                                 WORD32 src_strd2,
                                 WORD32 dst_strd,
                                 WORD32 log_wd,
                                 WORD32 wt1,
                                 WORD32 wt2,
                                 WORD32 ofst1,
                                 WORD32 ofst2,
                                 WORD32 ht,
                                 WORD32 wd)
{
    WORD32 i, j;
    WORD32 shft, ofst;

    ofst1 = (WORD8)(ofst1 & 0xff);
    ofst2 = (WORD8)(ofst2 & 0xff);
    wt1 = (WORD16)(wt1 & 0xffff);
    wt2 = (WORD16)(wt2 & 0xffff);
    ofst = (ofst1 + ofst2 + 1) >> 1;

    shft = log_wd + 1;
    ofst = (1 << log_wd) + (ofst << shft);

    src_strd1 -= wd;
    src_strd2 -= wd;
    dst_strd -= wd;

    for(i = 0; i < ht; i++)
    {
        for(j = 0; j < wd; j++, pu1_src1++, pu1_src2++, pu1_dst++)
            *pu1_dst = CLIP_U8((wt1 * (*pu1_src1) + wt2 * (*pu1_src2) + ofst) >> shft);

        pu1_src1 += src_strd1;
        pu1_src2 += src_strd2;
        pu1_dst += dst_strd;
    }
}

/**
*******************************************************************************
*
* @brief weighted bi-prediction chroma.
*
* @par Description
*  This function performs the weighted biprediction as described in
*  sec 8.4.2.3.2 titled "weighted sample prediction process" for chroma.
*  The function gets two ht x wd blocks, weights them, adds them, rounds off
*  the sum, offsets it, saturates it to unsigned 8-bit and stores it in the
*  destination block. (ht,wd) can be (2,2), (4,2), (2,4), (4,4), (8,4), (4,8)
*  or (8,8)
*
* @param[in] pu1_src1
*  Pointer to source 1
*
* @param[in] pu1_src2
*  Pointer to source 2
*
* @param[in] pu1_dst
*  Pointer to destination
*
* @param[in] src_strd1
*  stride for source 1
*
* @param[in] src_strd2
*  stride for source 2
*
* @param[in] dst_strd
*  stride for destination
*
* @param[in] log_wd
*  number of bits to be rounded off
*
* @param[in] wt1
*  weight value for source 1
*
* @param[in] wt2
*  weight value for source 2
*
* @param[in] ofst1
*  offset value for source 1
*
* @param[in] ofst2
*  offset value for source 2
*
* @param[in] ht
*  height of the block
*
* @param[in] wd
*  width of the block
*
* @returns none
*
* @remarks none
*
*******************************************************************************
*/
void ih264_weighted_bi_pred_chroma(UWORD8 *pu1_src1,
                                   UWORD8 *pu1_src2,
                                   UWORD8 *pu1_dst,
                                   WORD32 src_strd1,
                                   WORD32 src_strd2,
                                   WORD32 dst_strd,
                                   WORD32 log_wd,
                                   WORD32 wt1,
                                   WORD32 wt2,
                                   WORD32 ofst1,
                                   WORD32 ofst2,
                                   WORD32 ht,
                                   WORD32 wd)
{
    WORD32 i, j;
    WORD32 wt1_u, wt1_v, wt2_u, wt2_v;
    WORD32 ofst1_u, ofst1_v, ofst2_u, ofst2_v;
    WORD32 ofst_u, ofst_v;
    WORD32 shft;

    ofst1_u = (WORD8)(ofst1 & 0xff);
    ofst1_v = (WORD8)(ofst1 >> 8);
    ofst2_u = (WORD8)(ofst2 & 0xff);
    ofst2_v = (WORD8)(ofst2 >> 8);
    wt1_u = (WORD16)(wt1 & 0xffff);
    wt1_v = (WORD16)(wt1 >> 16);
    wt2_u = (WORD16)(wt2 & 0xffff);
    wt2_v = (WORD16)(wt2 >> 16);
    ofst_u = (ofst1_u + ofst2_u + 1) >> 1;
    ofst_v = (ofst1_v + ofst2_v + 1) >> 1;

    src_strd1 -= wd << 1;
    src_strd2 -= wd << 1;
    dst_strd -= wd << 1;

    shft = log_wd + 1;
    ofst_u = (1 << log_wd) + (ofst_u << shft);
    ofst_v = (1 << log_wd) + (ofst_v << shft);

    for(i = 0; i < ht; i++)
    {
        for(j = 0; j < wd; j++, pu1_src1++, pu1_src2++, pu1_dst++)
        {
            *pu1_dst = CLIP_U8((wt1_u * (*pu1_src1) + wt2_u * (*pu1_src2) + ofst_u) >> shft);
            pu1_src1++;
            pu1_src2++;
            pu1_dst++;
            *pu1_dst = CLIP_U8((wt1_v * (*pu1_src1) + wt2_v * (*pu1_src2) + ofst_v) >> shft);
        }
        pu1_src1 += src_strd1;
        pu1_src2 += src_strd2;
        pu1_dst += dst_strd;
    }
}

Coverage Report

Created: 2025-08-26 06:38

Line	Count	Source
1		/******************************************************************************
2		*
3		* Copyright (C) 2015 The Android Open Source Project
4		*
5		* Licensed under the Apache License, Version 2.0 (the "License");
6		* you may not use this file except in compliance with the License.
7		* You may obtain a copy of the License at:
8		*
9		* http://www.apache.org/licenses/LICENSE-2.0
10		*
11		* Unless required by applicable law or agreed to in writing, software
12		* distributed under the License is distributed on an "AS IS" BASIS,
13		* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14		* See the License for the specific language governing permissions and
15		* limitations under the License.
16		*
17		*****************************************************************************
18		* Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
19		*/
20
21		/**
22		*******************************************************************************
23		* @file
24		* ih264_weighted_pred.c
25		*
26		* @brief
27		* Contains function definitions for weighted prediction functions
28		*
29		* @author
30		* ittiam
31		*
32		* @par List of Functions:
33		* - ih264_default_weighted_pred_luma
34		* - ih264_default_weighted_pred_chroma
35		* - ih264_weighted_pred_luma
36		* - ih264_weighted_pred_chroma
37		* - ih264_weighted_bipred_luma
38		* - ih264_weighted_bipred_chroma
39		*
40		* @remarks
41		*
42		*******************************************************************************
43		*/
44
45		/*****************************************************************************/
46		/* File Includes */
47		/*****************************************************************************/
48
49		/* User Include Files */
50		#include "ih264_typedefs.h"
51		#include "ih264_macros.h"
52		#include "ih264_weighted_pred.h"
53		#include "ih264_platform_macros.h"
54
55
56		/*****************************************************************************/
57		/* Function definitions */
58		/*****************************************************************************/
59
60		/**
61		*******************************************************************************
62		*
63		* @brief default weighted prediction luma.
64		*
65		* @par Description
66		* This function performs the default weighted prediction as described in
67		* sec 8.4.2.3.1 titled "Default weighted sample prediction process" for luma.
68		* The function gets two ht x wd blocks, calculates their rounded-average and
69		* stores it in the destination block. (ht,wd) can be (4,4), (8,4), (4,8),
70		* (8,8), (16,8), (8,16) or (16,16)
71		*
72		* @param[in] pu1_src1
73		* Pointer to source 1
74		*
75		* @param[in] pu1_src2
76		* Pointer to source 2
77		*
78		* @param[in] pu1_dst
79		* Pointer to destination
80		*
81		* @param[in] src_strd1
82		* stride for source 1
83		*
84		* @param[in] src_strd2
85		* stride for source 2
86		*
87		* @param[in] dst_strd
88		* stride for destination
89		*
90		* @param[in] ht
91		* height of the block
92		*
93		* @param[in] wd
94		* width of the block
95		*
96		* @returns none
97		*
98		* @remarks none
99		*
100		*******************************************************************************
101		*/
102		void ih264_default_weighted_pred_luma(UWORD8 *pu1_src1,
103		UWORD8 *pu1_src2,
104		UWORD8 *pu1_dst,
105		WORD32 src_strd1,
106		WORD32 src_strd2,
107		WORD32 dst_strd,
108		WORD32 ht,
109		WORD32 wd)
110	87.4k	{
111	87.4k	WORD32 i, j;
112
113	87.4k	src_strd1 -= wd;
114	87.4k	src_strd2 -= wd;
115	87.4k	dst_strd -= wd;
116
117	1.25M	for(i = 0; i < ht; i++)
118	1.16M	{
119	19.1M	for(j = 0; j < wd; j++, pu1_src1++, pu1_src2++, pu1_dst++)
120	18.0M	pu1_dst = (pu1_src1 + *pu1_src2 + 1) >> 1;
121
122	1.16M	pu1_src1 += src_strd1;
123	1.16M	pu1_src2 += src_strd2;
124	1.16M	pu1_dst += dst_strd;
125	1.16M	}
126	87.4k	}
127
128		/**
129		*******************************************************************************
130		*
131		* @brief default weighted prediction chroma.
132		*
133		* @par Description
134		* This function performs the default weighted prediction as described in
135		* sec 8.4.2.3.1 titled "Default weighted sample prediction process" for chroma.
136		* The function gets two ht x wd blocks, calculates their rounded-average and
137		* stores it in the destination block. (ht,wd) can be (2,2), (4,2), (2,4),
138		* (4,4), (8,4), (4,8) or (8,8).
139		*
140		* @param[in] pu1_src1
141		* Pointer to source 1
142		*
143		* @param[in] pu1_src2
144		* Pointer to source 2
145		*
146		* @param[in] pu1_dst
147		* Pointer to destination
148		*
149		* @param[in] src_strd1
150		* stride for source 1
151		*
152		* @param[in] src_strd2
153		* stride for source 2
154		*
155		* @param[in] dst_strd
156		* stride for destination
157		*
158		* @param[in] ht
159		* height of the block
160		*
161		* @param[in] wd
162		* width of the block
163		*
164		* @returns none
165		*
166		* @remarks none
167		*
168		*******************************************************************************
169		*/
170		void ih264_default_weighted_pred_chroma(UWORD8 *pu1_src1,
171		UWORD8 *pu1_src2,
172		UWORD8 *pu1_dst,
173		WORD32 src_strd1,
174		WORD32 src_strd2,
175		WORD32 dst_strd,
176		WORD32 ht,
177		WORD32 wd)
178	87.4k	{
179	87.4k	WORD32 i, j;
180
181	87.4k	wd = wd << 1;
182
183	87.4k	src_strd1 -= wd;
184	87.4k	src_strd2 -= wd;
185	87.4k	dst_strd -= wd;
186
187	668k	for(i = 0; i < ht; i++)
188	581k	{
189	9.59M	for(j = 0; j < wd; j++, pu1_src1++, pu1_src2++, pu1_dst++)
190	9.00M	pu1_dst = (pu1_src1 + *pu1_src2 + 1) >> 1;
191
192	581k	pu1_src1 += src_strd1;
193	581k	pu1_src2 += src_strd2;
194	581k	pu1_dst += dst_strd;
195	581k	}
196	87.4k	}
197
198		/**
199		*******************************************************************************
200		*
201		* @brief weighted prediction luma.
202		*
203		* @par Description
204		* This function performs the weighted prediction as described in
205		* sec 8.4.2.3.2 titled "weighted sample prediction process" for luma.
206		* The function gets one ht x wd block, weights it, rounds it off, offsets it,
207		* saturates it to unsigned 8-bit and stores it in the destination block.
208		* (ht,wd) can be (4,4), (8,4), (4,8), (8,8), (16,8), (8,16) or (16,16)
209		*
210		* @param[in] pu1_src
211		* Pointer to source
212		*
213		* @param[in] pu1_dst
214		* Pointer to destination
215		*
216		* @param[in] src_strd
217		* stride for source
218		*
219		* @param[in] dst_strd
220		* stride for destination
221		*
222		* @param[in] log_wd
223		* number of bits to be rounded off
224		*
225		* @param[in] wt
226		* weight value
227		*
228		* @param[in] ofst
229		* offset value
230		*
231		* @param[in] ht
232		* height of the block
233		*
234		* @param[in] wd
235		* width of the block
236		*
237		* @returns none
238		*
239		* @remarks none
240		*
241		*******************************************************************************
242		*/
243		void ih264_weighted_pred_luma(UWORD8 *pu1_src,
244		UWORD8 *pu1_dst,
245		WORD32 src_strd,
246		WORD32 dst_strd,
247		WORD32 log_wd,
248		WORD32 wt,
249		WORD32 ofst,
250		WORD32 ht,
251		WORD32 wd)
252	4.80M	{
253	4.80M	WORD32 i, j;
254
255	4.80M	wt = (WORD16)(wt & 0xffff);
256	4.80M	ofst = (WORD8)(ofst & 0xff);
257
258	4.80M	src_strd -= wd;
259	4.80M	dst_strd -= wd;
260
261	4.80M	if(log_wd >= 1)
262	3.66M	{
263	3.66M	WORD32 i_ofst = (1 << (log_wd - 1)) + (ofst << log_wd);
264	62.0M	for(i = 0; i < ht; i++)
265	58.3M	{
266	990M	for(j = 0; j < wd; j++, pu1_src++, pu1_dst++)
267	932M	pu1_dst = CLIP_U8((wt (*pu1_src) + i_ofst) >> log_wd);
268
269	58.3M	pu1_src += src_strd;
270	58.3M	pu1_dst += dst_strd;
271	58.3M	}
272	3.66M	}
273	1.13M	else
274	1.13M	{
275	18.8M	for(i = 0; i < ht; i++)
276	17.7M	{
277	299M	for(j = 0; j < wd; j++, pu1_src++, pu1_dst++)
278	281M	pu1_dst = CLIP_U8(wt (*pu1_src) + ofst);
279
280	17.7M	pu1_src += src_strd;
281	17.7M	pu1_dst += dst_strd;
282	17.7M	}
283	1.13M	}
284	4.80M	}
285
286		/**
287		*******************************************************************************
288		*
289		* @brief weighted prediction chroma.
290		*
291		* @par Description
292		* This function performs the weighted prediction as described in
293		* sec 8.4.2.3.2 titled "weighted sample prediction process" for chroma.
294		* The function gets one ht x wd block, weights it, rounds it off, offsets it,
295		* saturates it to unsigned 8-bit and stores it in the destination block.
296		* (ht,wd) can be (2,2), (4,2), (2,4), (4,4), (8,4), (4,8) or (8,8).
297		*
298		* @param[in] pu1_src
299		* Pointer to source
300		*
301		* @param[in] pu1_dst
302		* Pointer to destination
303		*
304		* @param[in] src_strd
305		* stride for source
306		*
307		* @param[in] dst_strd
308		* stride for destination
309		*
310		* @param[in] log_wd
311		* number of bits to be rounded off
312		*
313		* @param[in] wt
314		* weight values for u and v
315		*
316		* @param[in] ofst
317		* offset values for u and v
318		*
319		* @param[in] ht
320		* height of the block
321		*
322		* @param[in] wd
323		* width of the block
324		*
325		* @returns none
326		*
327		* @remarks none
328		*
329		*******************************************************************************
330		*/
331		void ih264_weighted_pred_chroma(UWORD8 *pu1_src,
332		UWORD8 *pu1_dst,
333		WORD32 src_strd,
334		WORD32 dst_strd,
335		WORD32 log_wd,
336		WORD32 wt,
337		WORD32 ofst,
338		WORD32 ht,
339		WORD32 wd)
340	4.80M	{
341	4.80M	WORD32 i, j;
342	4.80M	WORD32 wt_u, wt_v;
343	4.80M	WORD32 ofst_u, ofst_v;
344
345	4.80M	wt_u = (WORD16)(wt & 0xffff);
346	4.80M	wt_v = (WORD16)(wt >> 16);
347
348	4.80M	ofst_u = (WORD8)(ofst & 0xff);
349	4.80M	ofst_v = (WORD8)(ofst >> 8);
350
351	4.80M	src_strd -= wd << 1;
352	4.80M	dst_strd -= wd << 1;
353
354	4.80M	if(log_wd >= 1)
355	3.64M	{
356	3.64M	ofst_u = (1 << (log_wd - 1)) + (ofst_u << log_wd);
357	3.64M	ofst_v = (1 << (log_wd - 1)) + (ofst_v << log_wd);
358
359	32.6M	for(i = 0; i < ht; i++)
360	29.0M	{
361	261M	for(j = 0; j < wd; j++, pu1_src++, pu1_dst++)
362	232M	{
363	232M	pu1_dst = CLIP_U8((wt_u (*pu1_src) + ofst_u) >> log_wd);
364	232M	pu1_src++;
365	232M	pu1_dst++;
366	232M	pu1_dst = CLIP_U8((wt_v (*pu1_src) + ofst_v) >> log_wd);
367	232M	}
368	29.0M	pu1_src += src_strd;
369	29.0M	pu1_dst += dst_strd;
370	29.0M	}
371	3.64M	}
372	1.16M	else
373	1.16M	{
374	10.1M	for(i = 0; i < ht; i++)
375	9.01M	{
376	80.2M	for(j = 0; j < wd; j++, pu1_src++, pu1_dst++)
377	71.2M	{
378	71.2M	pu1_dst = CLIP_U8(wt_u (*pu1_src) + ofst_u);
379	71.2M	pu1_src++;
380	71.2M	pu1_dst++;
381	71.2M	pu1_dst = CLIP_U8(wt_v (*pu1_src) + ofst_v);
382	71.2M	}
383	9.01M	pu1_src += src_strd;
384	9.01M	pu1_dst += dst_strd;
385	9.01M	}
386	1.16M	}
387	4.80M	}
388
389		/**
390		*******************************************************************************
391		*
392		* @brief weighted bi-prediction luma.
393		*
394		* @par Description
395		* This function performs the weighted biprediction as described in
396		* sec 8.4.2.3.2 titled "weighted sample prediction process" for luma.
397		* The function gets two ht x wd blocks, weights them, adds them, rounds off
398		* the sum, offsets it, saturates it to unsigned 8-bit and stores it in the
399		* destination block. (ht,wd) can be (4,4), (8,4), (4,8), (8,8), (16,8), (8,16)
400		* or (16,16)
401		*
402		* @param[in] pu1_src1
403		* Pointer to source 1
404		*
405		* @param[in] pu1_src2
406		* Pointer to source 2
407		*
408		* @param[in] pu1_dst
409		* Pointer to destination
410		*
411		* @param[in] src_strd1
412		* stride for source 1
413		*
414		* @param[in] src_strd2
415		* stride for source 2
416		*
417		* @param[in] dst_strd
418		* stride for destination
419		*
420		* @param[in] log_wd
421		* number of bits to be rounded off
422		*
423		* @param[in] wt1
424		* weight value for source 1
425		*
426		* @param[in] wt2
427		* weight value for source 2
428		*
429		* @param[in] ofst1
430		* offset value for source 1
431		*
432		* @param[in] ofst2
433		* offset value for source 2
434		*
435		* @param[in] ht
436		* height of the block
437		*
438		* @param[in] wd
439		* width of the block
440		*
441		* @returns none
442		*
443		* @remarks none
444		*
445		*******************************************************************************
446		*/
447		void ih264_weighted_bi_pred_luma(UWORD8 *pu1_src1,
448		UWORD8 *pu1_src2,
449		UWORD8 *pu1_dst,
450		WORD32 src_strd1,
451		WORD32 src_strd2,
452		WORD32 dst_strd,
453		WORD32 log_wd,
454		WORD32 wt1,
455		WORD32 wt2,
456		WORD32 ofst1,
457		WORD32 ofst2,
458		WORD32 ht,
459		WORD32 wd)
460	343k	{
461	343k	WORD32 i, j;
462	343k	WORD32 shft, ofst;
463
464	343k	ofst1 = (WORD8)(ofst1 & 0xff);
465	343k	ofst2 = (WORD8)(ofst2 & 0xff);
466	343k	wt1 = (WORD16)(wt1 & 0xffff);
467	343k	wt2 = (WORD16)(wt2 & 0xffff);
468	343k	ofst = (ofst1 + ofst2 + 1) >> 1;
469
470	343k	shft = log_wd + 1;
471	343k	ofst = (1 << log_wd) + (ofst << shft);
472
473	343k	src_strd1 -= wd;
474	343k	src_strd2 -= wd;
475	343k	dst_strd -= wd;
476
477	5.69M	for(i = 0; i < ht; i++)
478	5.35M	{
479	89.6M	for(j = 0; j < wd; j++, pu1_src1++, pu1_src2++, pu1_dst++)
480	84.2M	pu1_dst = CLIP_U8((wt1 (pu1_src1) + wt2 (*pu1_src2) + ofst) >> shft);
481
482	5.35M	pu1_src1 += src_strd1;
483	5.35M	pu1_src2 += src_strd2;
484	5.35M	pu1_dst += dst_strd;
485	5.35M	}
486	343k	}
487
488		/**
489		*******************************************************************************
490		*
491		* @brief weighted bi-prediction chroma.
492		*
493		* @par Description
494		* This function performs the weighted biprediction as described in
495		* sec 8.4.2.3.2 titled "weighted sample prediction process" for chroma.
496		* The function gets two ht x wd blocks, weights them, adds them, rounds off
497		* the sum, offsets it, saturates it to unsigned 8-bit and stores it in the
498		* destination block. (ht,wd) can be (2,2), (4,2), (2,4), (4,4), (8,4), (4,8)
499		* or (8,8)
500		*
501		* @param[in] pu1_src1
502		* Pointer to source 1
503		*
504		* @param[in] pu1_src2
505		* Pointer to source 2
506		*
507		* @param[in] pu1_dst
508		* Pointer to destination
509		*
510		* @param[in] src_strd1
511		* stride for source 1
512		*
513		* @param[in] src_strd2
514		* stride for source 2
515		*
516		* @param[in] dst_strd
517		* stride for destination
518		*
519		* @param[in] log_wd
520		* number of bits to be rounded off
521		*
522		* @param[in] wt1
523		* weight value for source 1
524		*
525		* @param[in] wt2
526		* weight value for source 2
527		*
528		* @param[in] ofst1
529		* offset value for source 1
530		*
531		* @param[in] ofst2
532		* offset value for source 2
533		*
534		* @param[in] ht
535		* height of the block
536		*
537		* @param[in] wd
538		* width of the block
539		*
540		* @returns none
541		*
542		* @remarks none
543		*
544		*******************************************************************************
545		*/
546		void ih264_weighted_bi_pred_chroma(UWORD8 *pu1_src1,
547		UWORD8 *pu1_src2,
548		UWORD8 *pu1_dst,
549		WORD32 src_strd1,
550		WORD32 src_strd2,
551		WORD32 dst_strd,
552		WORD32 log_wd,
553		WORD32 wt1,
554		WORD32 wt2,
555		WORD32 ofst1,
556		WORD32 ofst2,
557		WORD32 ht,
558		WORD32 wd)
559	343k	{
560	343k	WORD32 i, j;
561	343k	WORD32 wt1_u, wt1_v, wt2_u, wt2_v;
562	343k	WORD32 ofst1_u, ofst1_v, ofst2_u, ofst2_v;
563	343k	WORD32 ofst_u, ofst_v;
564	343k	WORD32 shft;
565
566	343k	ofst1_u = (WORD8)(ofst1 & 0xff);
567	343k	ofst1_v = (WORD8)(ofst1 >> 8);
568	343k	ofst2_u = (WORD8)(ofst2 & 0xff);
569	343k	ofst2_v = (WORD8)(ofst2 >> 8);
570	343k	wt1_u = (WORD16)(wt1 & 0xffff);
571	343k	wt1_v = (WORD16)(wt1 >> 16);
572	343k	wt2_u = (WORD16)(wt2 & 0xffff);
573	343k	wt2_v = (WORD16)(wt2 >> 16);
574	343k	ofst_u = (ofst1_u + ofst2_u + 1) >> 1;
575	343k	ofst_v = (ofst1_v + ofst2_v + 1) >> 1;
576
577	343k	src_strd1 -= wd << 1;
578	343k	src_strd2 -= wd << 1;
579	343k	dst_strd -= wd << 1;
580
581	343k	shft = log_wd + 1;
582	343k	ofst_u = (1 << log_wd) + (ofst_u << shft);
583	343k	ofst_v = (1 << log_wd) + (ofst_v << shft);
584
585	3.02M	for(i = 0; i < ht; i++)
586	2.67M	{
587	23.7M	for(j = 0; j < wd; j++, pu1_src1++, pu1_src2++, pu1_dst++)
588	21.0M	{
589	21.0M	pu1_dst = CLIP_U8((wt1_u (pu1_src1) + wt2_u (*pu1_src2) + ofst_u) >> shft);
590	21.0M	pu1_src1++;
591	21.0M	pu1_src2++;
592	21.0M	pu1_dst++;
593	21.0M	pu1_dst = CLIP_U8((wt1_v (pu1_src1) + wt2_v (*pu1_src2) + ofst_v) >> shft);
594	21.0M	}
595	2.67M	pu1_src1 += src_strd1;
596	2.67M	pu1_src2 += src_strd2;
597	2.67M	pu1_dst += dst_strd;
598	2.67M	}
599	343k	}