/src/libhevc/common/ihevc_itrans_recon.c

Source
/******************************************************************************
*
* Copyright (C) 2012 Ittiam Systems Pvt Ltd, Bangalore
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at:
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
******************************************************************************/
/**
 *******************************************************************************
 * @file
 *  ihevc_itrans_recon.c
 *
 * @brief
 *  Contains function definitions for inverse transform  and reconstruction
 *
 *
 * @author
 *  100470
 *
 * @par List of Functions:
 *  - ihevc_itrans_recon_4x4_ttype1()
 *  - ihevc_itrans_recon_4x4()
 *
 * @remarks
 *  None
 *
 *******************************************************************************
 */
#include <stdio.h>
#include <string.h>
#include "ihevc_typedefs.h"
#include "ihevc_macros.h"
#include "ihevc_platform_macros.h"
#include "ihevc_defs.h"
#include "ihevc_trans_tables.h"
#include "ihevc_itrans_recon.h"
#include "ihevc_func_selector.h"
#include "ihevc_trans_macros.h"

/* All the functions here are replicated from ihevc_itrans.c and modified to */
/* include reconstruction */

/**
 *******************************************************************************
 *
 * @brief
 *  This function performs Inverse transform type 1 (DST)  and reconstruction
 * for 4x4 input block
 *
 * @par Description:
 *  Performs inverse transform and adds the prediction  data and clips output
 * to 8 bit
 *
 * @param[in] pi2_src
 *  Input 4x4 coefficients
 *
 * @param[in] pi2_tmp
 *  Temporary 4x4 buffer for storing inverse
 *
 *  transform
 *  1st stage output
 *
 * @param[in] pu1_pred
 *  Prediction 4x4 block
 *
 * @param[out] pu1_dst
 *  Output 4x4 block
 *
 * @param[in] src_strd
 *  Input stride
 *
 * @param[in] pred_strd
 *  Prediction stride
 *
 * @param[in] dst_strd
 *  Output Stride
 *
 * @param[in] zero_cols
 *  Zero columns in pi2_src
 *
 * @returns  Void
 *
 * @remarks
 *  None
 *
 *******************************************************************************
 */

void ihevc_itrans_recon_4x4_ttype1(WORD16 *pi2_src,
                                   WORD16 *pi2_tmp,
                                   UWORD8 *pu1_pred,
                                   UWORD8 *pu1_dst,
                                   WORD32 src_strd,
                                   WORD32 pred_strd,
                                   WORD32 dst_strd,
                                   WORD32 zero_cols,
                                   WORD32 zero_rows)
{
    WORD32 i, c[4];
    WORD32 add;
    WORD32 shift;
    WORD16 *pi2_tmp_orig;
    WORD32 trans_size;
    UNUSED(zero_rows);
    trans_size = TRANS_SIZE_4;

    pi2_tmp_orig = pi2_tmp;

    /* Inverse Transform 1st stage */
    shift = IT_SHIFT_STAGE_1;
    add = 1 << (shift - 1);

    for(i = 0; i < trans_size; i++)
    {
        /* Checking for Zero Cols */
        if((zero_cols & 1) == 1)
        {
            memset(pi2_tmp, 0, trans_size * sizeof(WORD16));
        }
        else
        {
            // Intermediate Variables
            c[0] = pi2_src[0] + pi2_src[2 * src_strd];
            c[1] = pi2_src[2 * src_strd] + pi2_src[3 * src_strd];
            c[2] = pi2_src[0] - pi2_src[3 * src_strd];
            c[3] = 74 * pi2_src[src_strd];

            pi2_tmp[0] =
                            CLIP_S16((29 * c[0] + 55 * c[1] + c[3] + add) >> shift);
            pi2_tmp[1] =
                            CLIP_S16((55 * c[2] - 29 * c[1] + c[3] + add) >> shift);
            pi2_tmp[2] =
                            CLIP_S16((74 * (pi2_src[0] - pi2_src[2 * src_strd] + pi2_src[3 * src_strd]) + add) >> shift);
            pi2_tmp[3] =
                            CLIP_S16((55 * c[0] + 29 * c[2] - c[3] + add) >> shift);
        }
        pi2_src++;
        pi2_tmp += trans_size;
        zero_cols = zero_cols >> 1;
    }

    pi2_tmp = pi2_tmp_orig;

    /* Inverse Transform 2nd stage */
    shift = IT_SHIFT_STAGE_2;
    add = 1 << (shift - 1);

    for(i = 0; i < trans_size; i++)
    {
        WORD32 itrans_out;
        // Intermediate Variables
        c[0] = pi2_tmp[0] + pi2_tmp[2 * trans_size];
        c[1] = pi2_tmp[2 * trans_size] + pi2_tmp[3 * trans_size];
        c[2] = pi2_tmp[0] - pi2_tmp[3 * trans_size];
        c[3] = 74 * pi2_tmp[trans_size];

        itrans_out =
                        CLIP_S16((29 * c[0] + 55 * c[1] + c[3] + add) >> shift);
        pu1_dst[0] = CLIP_U8((itrans_out + pu1_pred[0]));
        itrans_out =
                        CLIP_S16((55 * c[2] - 29 * c[1] + c[3] + add) >> shift);
        pu1_dst[1] = CLIP_U8((itrans_out + pu1_pred[1]));
        itrans_out =
                        CLIP_S16((74 * (pi2_tmp[0] - pi2_tmp[2 * trans_size] + pi2_tmp[3 * trans_size]) + add) >> shift);
        pu1_dst[2] = CLIP_U8((itrans_out + pu1_pred[2]));
        itrans_out =
                        CLIP_S16((55 * c[0] + 29 * c[2] - c[3] + add) >> shift);
        pu1_dst[3] = CLIP_U8((itrans_out + pu1_pred[3]));
        pi2_tmp++;
        pu1_pred += pred_strd;
        pu1_dst += dst_strd;
    }
}

/**
 *******************************************************************************
 *
 * @brief
 *  This function performs Inverse transform  and reconstruction for 4x4
 * input block
 *
 * @par Description:
 *  Performs inverse transform and adds the prediction  data and clips output
 * to 8 bit
 *
 * @param[in] pi2_src
 *  Input 4x4 coefficients
 *
 * @param[in] pi2_tmp
 *  Temporary 4x4 buffer for storing inverse
 *
 *  transform
 *  1st stage output
 *
 * @param[in] pu1_pred
 *  Prediction 4x4 block
 *
 * @param[out] pu1_dst
 *  Output 4x4 block
 *
 * @param[in] src_strd
 *  Input stride
 *
 * @param[in] pred_strd
 *  Prediction stride
 *
 * @param[in] dst_strd
 *  Output Stride
 *
 * @param[in] shift
 *  Output shift
 *
 * @param[in] zero_cols
 *  Zero columns in pi2_src
 *
 * @returns  Void
 *
 * @remarks
 *  None
 *
 *******************************************************************************
 */

void ihevc_itrans_recon_4x4(WORD16 *pi2_src,
                            WORD16 *pi2_tmp,
                            UWORD8 *pu1_pred,
                            UWORD8 *pu1_dst,
                            WORD32 src_strd,
                            WORD32 pred_strd,
                            WORD32 dst_strd,
                            WORD32 zero_cols,
                            WORD32 zero_rows)

{
    WORD32 j;
    WORD32 e[2], o[2];
    WORD32 add;
    WORD32 shift;
    WORD16 *pi2_tmp_orig;
    WORD32 trans_size;
    UNUSED(zero_rows);
    trans_size = TRANS_SIZE_4;

    pi2_tmp_orig = pi2_tmp;

    /* Inverse Transform 1st stage */
    shift = IT_SHIFT_STAGE_1;
    add = 1 << (shift - 1);

    for(j = 0; j < trans_size; j++)
    {
        /* Checking for Zero Cols */
        if((zero_cols & 1) == 1)
        {
            memset(pi2_tmp, 0, trans_size * sizeof(WORD16));
        }
        else
        {

            /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */
            o[0] = g_ai2_ihevc_trans_4[1][0] * pi2_src[src_strd]
                            + g_ai2_ihevc_trans_4[3][0] * pi2_src[3 * src_strd];
            o[1] = g_ai2_ihevc_trans_4[1][1] * pi2_src[src_strd]
                            + g_ai2_ihevc_trans_4[3][1] * pi2_src[3 * src_strd];
            e[0] = g_ai2_ihevc_trans_4[0][0] * pi2_src[0]
                            + g_ai2_ihevc_trans_4[2][0] * pi2_src[2 * src_strd];
            e[1] = g_ai2_ihevc_trans_4[0][1] * pi2_src[0]
                            + g_ai2_ihevc_trans_4[2][1] * pi2_src[2 * src_strd];

            pi2_tmp[0] =
                            CLIP_S16(((e[0] + o[0] + add) >> shift));
            pi2_tmp[1] =
                            CLIP_S16(((e[1] + o[1] + add) >> shift));
            pi2_tmp[2] =
                            CLIP_S16(((e[1] - o[1] + add) >> shift));
            pi2_tmp[3] =
                            CLIP_S16(((e[0] - o[0] + add) >> shift));

        }
        pi2_src++;
        pi2_tmp += trans_size;
        zero_cols = zero_cols >> 1;
    }

    pi2_tmp = pi2_tmp_orig;

    /* Inverse Transform 2nd stage */
    shift = IT_SHIFT_STAGE_2;
    add = 1 << (shift - 1);

    for(j = 0; j < trans_size; j++)
    {
        WORD32 itrans_out;
        /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */
        o[0] = g_ai2_ihevc_trans_4[1][0] * pi2_tmp[trans_size]
                        + g_ai2_ihevc_trans_4[3][0] * pi2_tmp[3 * trans_size];
        o[1] = g_ai2_ihevc_trans_4[1][1] * pi2_tmp[trans_size]
                        + g_ai2_ihevc_trans_4[3][1] * pi2_tmp[3 * trans_size];
        e[0] = g_ai2_ihevc_trans_4[0][0] * pi2_tmp[0]
                        + g_ai2_ihevc_trans_4[2][0] * pi2_tmp[2 * trans_size];
        e[1] = g_ai2_ihevc_trans_4[0][1] * pi2_tmp[0]
                        + g_ai2_ihevc_trans_4[2][1] * pi2_tmp[2 * trans_size];

        itrans_out =
                        CLIP_S16(((e[0] + o[0] + add) >> shift));
        pu1_dst[0] = CLIP_U8((itrans_out + pu1_pred[0]));
        itrans_out =
                        CLIP_S16(((e[1] + o[1] + add) >> shift));
        pu1_dst[1] = CLIP_U8((itrans_out + pu1_pred[1]));
        itrans_out =
                        CLIP_S16(((e[1] - o[1] + add) >> shift));
        pu1_dst[2] = CLIP_U8((itrans_out + pu1_pred[2]));
        itrans_out =
                        CLIP_S16(((e[0] - o[0] + add) >> shift));
        pu1_dst[3] = CLIP_U8((itrans_out + pu1_pred[3]));

        pi2_tmp++;
        pu1_pred += pred_strd;
        pu1_dst += dst_strd;

    }
}


Coverage Report

Created: 2025-12-14 06:11

Line	Count	Source
1		/******************************************************************************
2		*
3		* Copyright (C) 2012 Ittiam Systems Pvt Ltd, Bangalore
4		*
5		* Licensed under the Apache License, Version 2.0 (the "License");
6		* you may not use this file except in compliance with the License.
7		* You may obtain a copy of the License at:
8		*
9		* http://www.apache.org/licenses/LICENSE-2.0
10		*
11		* Unless required by applicable law or agreed to in writing, software
12		* distributed under the License is distributed on an "AS IS" BASIS,
13		* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14		* See the License for the specific language governing permissions and
15		* limitations under the License.
16		*
17		******************************************************************************/
18		/**
19		*******************************************************************************
20		* @file
21		* ihevc_itrans_recon.c
22		*
23		* @brief
24		* Contains function definitions for inverse transform and reconstruction
25		*
26		*
27		* @author
28		* 100470
29		*
30		* @par List of Functions:
31		* - ihevc_itrans_recon_4x4_ttype1()
32		* - ihevc_itrans_recon_4x4()
33		*
34		* @remarks
35		* None
36		*
37		*******************************************************************************
38		*/
39		#include <stdio.h>
40		#include <string.h>
41		#include "ihevc_typedefs.h"
42		#include "ihevc_macros.h"
43		#include "ihevc_platform_macros.h"
44		#include "ihevc_defs.h"
45		#include "ihevc_trans_tables.h"
46		#include "ihevc_itrans_recon.h"
47		#include "ihevc_func_selector.h"
48		#include "ihevc_trans_macros.h"
49
50		/* All the functions here are replicated from ihevc_itrans.c and modified to */
51		/* include reconstruction */
52
53		/**
54		*******************************************************************************
55		*
56		* @brief
57		* This function performs Inverse transform type 1 (DST) and reconstruction
58		* for 4x4 input block
59		*
60		* @par Description:
61		* Performs inverse transform and adds the prediction data and clips output
62		* to 8 bit
63		*
64		* @param[in] pi2_src
65		* Input 4x4 coefficients
66		*
67		* @param[in] pi2_tmp
68		* Temporary 4x4 buffer for storing inverse
69		*
70		* transform
71		* 1st stage output
72		*
73		* @param[in] pu1_pred
74		* Prediction 4x4 block
75		*
76		* @param[out] pu1_dst
77		* Output 4x4 block
78		*
79		* @param[in] src_strd
80		* Input stride
81		*
82		* @param[in] pred_strd
83		* Prediction stride
84		*
85		* @param[in] dst_strd
86		* Output Stride
87		*
88		* @param[in] zero_cols
89		* Zero columns in pi2_src
90		*
91		* @returns Void
92		*
93		* @remarks
94		* None
95		*
96		*******************************************************************************
97		*/
98
99		void ihevc_itrans_recon_4x4_ttype1(WORD16 *pi2_src,
100		WORD16 *pi2_tmp,
101		UWORD8 *pu1_pred,
102		UWORD8 *pu1_dst,
103		WORD32 src_strd,
104		WORD32 pred_strd,
105		WORD32 dst_strd,
106		WORD32 zero_cols,
107		WORD32 zero_rows)
108	3.09M	{
109	3.09M	WORD32 i, c[4];
110	3.09M	WORD32 add;
111	3.09M	WORD32 shift;
112	3.09M	WORD16 *pi2_tmp_orig;
113	3.09M	WORD32 trans_size;
114	3.09M	UNUSED(zero_rows);
115	3.09M	trans_size = TRANS_SIZE_4;
116
117	3.09M	pi2_tmp_orig = pi2_tmp;
118
119		/* Inverse Transform 1st stage */
120	3.09M	shift = IT_SHIFT_STAGE_1;
121	3.09M	add = 1 << (shift - 1);
122
123	15.4M	for(i = 0; i < trans_size; i++)
124	12.3M	{
125		/* Checking for Zero Cols */
126	12.3M	if((zero_cols & 1) == 1)
127	199k	{
128	199k	memset(pi2_tmp, 0, trans_size * sizeof(WORD16));
129	199k	}
130	12.1M	else
131	12.1M	{
132		// Intermediate Variables
133	12.1M	c[0] = pi2_src[0] + pi2_src[2 * src_strd];
134	12.1M	c[1] = pi2_src[2 * src_strd] + pi2_src[3 * src_strd];
135	12.1M	c[2] = pi2_src[0] - pi2_src[3 * src_strd];
136	12.1M	c[3] = 74 * pi2_src[src_strd];
137
138	12.1M	pi2_tmp[0] =
139	12.1M	CLIP_S16((29 * c[0] + 55 * c[1] + c[3] + add) >> shift);
140	12.1M	pi2_tmp[1] =
141	12.1M	CLIP_S16((55 * c[2] - 29 * c[1] + c[3] + add) >> shift);
142	12.1M	pi2_tmp[2] =
143	12.1M	CLIP_S16((74 * (pi2_src[0] - pi2_src[2 * src_strd] + pi2_src[3 * src_strd]) + add) >> shift);
144	12.1M	pi2_tmp[3] =
145	12.1M	CLIP_S16((55 * c[0] + 29 * c[2] - c[3] + add) >> shift);
146	12.1M	}
147	12.3M	pi2_src++;
148	12.3M	pi2_tmp += trans_size;
149	12.3M	zero_cols = zero_cols >> 1;
150	12.3M	}
151
152	3.09M	pi2_tmp = pi2_tmp_orig;
153
154		/* Inverse Transform 2nd stage */
155	3.09M	shift = IT_SHIFT_STAGE_2;
156	3.09M	add = 1 << (shift - 1);
157
158	15.4M	for(i = 0; i < trans_size; i++)
159	12.3M	{
160	12.3M	WORD32 itrans_out;
161		// Intermediate Variables
162	12.3M	c[0] = pi2_tmp[0] + pi2_tmp[2 * trans_size];
163	12.3M	c[1] = pi2_tmp[2 * trans_size] + pi2_tmp[3 * trans_size];
164	12.3M	c[2] = pi2_tmp[0] - pi2_tmp[3 * trans_size];
165	12.3M	c[3] = 74 * pi2_tmp[trans_size];
166
167	12.3M	itrans_out =
168	12.3M	CLIP_S16((29 * c[0] + 55 * c[1] + c[3] + add) >> shift);
169	12.3M	pu1_dst[0] = CLIP_U8((itrans_out + pu1_pred[0]));
170	12.3M	itrans_out =
171	12.3M	CLIP_S16((55 * c[2] - 29 * c[1] + c[3] + add) >> shift);
172	12.3M	pu1_dst[1] = CLIP_U8((itrans_out + pu1_pred[1]));
173	12.3M	itrans_out =
174	12.3M	CLIP_S16((74 * (pi2_tmp[0] - pi2_tmp[2 * trans_size] + pi2_tmp[3 * trans_size]) + add) >> shift);
175	12.3M	pu1_dst[2] = CLIP_U8((itrans_out + pu1_pred[2]));
176	12.3M	itrans_out =
177	12.3M	CLIP_S16((55 * c[0] + 29 * c[2] - c[3] + add) >> shift);
178	12.3M	pu1_dst[3] = CLIP_U8((itrans_out + pu1_pred[3]));
179	12.3M	pi2_tmp++;
180	12.3M	pu1_pred += pred_strd;
181	12.3M	pu1_dst += dst_strd;
182	12.3M	}
183	3.09M	}
184
185		/**
186		*******************************************************************************
187		*
188		* @brief
189		* This function performs Inverse transform and reconstruction for 4x4
190		* input block
191		*
192		* @par Description:
193		* Performs inverse transform and adds the prediction data and clips output
194		* to 8 bit
195		*
196		* @param[in] pi2_src
197		* Input 4x4 coefficients
198		*
199		* @param[in] pi2_tmp
200		* Temporary 4x4 buffer for storing inverse
201		*
202		* transform
203		* 1st stage output
204		*
205		* @param[in] pu1_pred
206		* Prediction 4x4 block
207		*
208		* @param[out] pu1_dst
209		* Output 4x4 block
210		*
211		* @param[in] src_strd
212		* Input stride
213		*
214		* @param[in] pred_strd
215		* Prediction stride
216		*
217		* @param[in] dst_strd
218		* Output Stride
219		*
220		* @param[in] shift
221		* Output shift
222		*
223		* @param[in] zero_cols
224		* Zero columns in pi2_src
225		*
226		* @returns Void
227		*
228		* @remarks
229		* None
230		*
231		*******************************************************************************
232		*/
233
234		void ihevc_itrans_recon_4x4(WORD16 *pi2_src,
235		WORD16 *pi2_tmp,
236		UWORD8 *pu1_pred,
237		UWORD8 *pu1_dst,
238		WORD32 src_strd,
239		WORD32 pred_strd,
240		WORD32 dst_strd,
241		WORD32 zero_cols,
242		WORD32 zero_rows)
243
244	496k	{
245	496k	WORD32 j;
246	496k	WORD32 e[2], o[2];
247	496k	WORD32 add;
248	496k	WORD32 shift;
249	496k	WORD16 *pi2_tmp_orig;
250	496k	WORD32 trans_size;
251	496k	UNUSED(zero_rows);
252	496k	trans_size = TRANS_SIZE_4;
253
254	496k	pi2_tmp_orig = pi2_tmp;
255
256		/* Inverse Transform 1st stage */
257	496k	shift = IT_SHIFT_STAGE_1;
258	496k	add = 1 << (shift - 1);
259
260	2.48M	for(j = 0; j < trans_size; j++)
261	1.98M	{
262		/* Checking for Zero Cols */
263	1.98M	if((zero_cols & 1) == 1)
264	88.0k	{
265	88.0k	memset(pi2_tmp, 0, trans_size * sizeof(WORD16));
266	88.0k	}
267	1.89M	else
268	1.89M	{
269
270		/* Utilizing symmetry properties to the maximum to minimize the number of multiplications */
271	1.89M	o[0] = g_ai2_ihevc_trans_4[1][0] * pi2_src[src_strd]
272	1.89M	+ g_ai2_ihevc_trans_4[3][0] * pi2_src[3 * src_strd];
273	1.89M	o[1] = g_ai2_ihevc_trans_4[1][1] * pi2_src[src_strd]
274	1.89M	+ g_ai2_ihevc_trans_4[3][1] * pi2_src[3 * src_strd];
275	1.89M	e[0] = g_ai2_ihevc_trans_4[0][0] * pi2_src[0]
276	1.89M	+ g_ai2_ihevc_trans_4[2][0] * pi2_src[2 * src_strd];
277	1.89M	e[1] = g_ai2_ihevc_trans_4[0][1] * pi2_src[0]
278	1.89M	+ g_ai2_ihevc_trans_4[2][1] * pi2_src[2 * src_strd];
279
280	1.89M	pi2_tmp[0] =
281	1.89M	CLIP_S16(((e[0] + o[0] + add) >> shift));
282	1.89M	pi2_tmp[1] =
283	1.89M	CLIP_S16(((e[1] + o[1] + add) >> shift));
284	1.89M	pi2_tmp[2] =
285	1.89M	CLIP_S16(((e[1] - o[1] + add) >> shift));
286	1.89M	pi2_tmp[3] =
287	1.89M	CLIP_S16(((e[0] - o[0] + add) >> shift));
288
289	1.89M	}
290	1.98M	pi2_src++;
291	1.98M	pi2_tmp += trans_size;
292	1.98M	zero_cols = zero_cols >> 1;
293	1.98M	}
294
295	496k	pi2_tmp = pi2_tmp_orig;
296
297		/* Inverse Transform 2nd stage */
298	496k	shift = IT_SHIFT_STAGE_2;
299	496k	add = 1 << (shift - 1);
300
301	2.48M	for(j = 0; j < trans_size; j++)
302	1.98M	{
303	1.98M	WORD32 itrans_out;
304		/* Utilizing symmetry properties to the maximum to minimize the number of multiplications */
305	1.98M	o[0] = g_ai2_ihevc_trans_4[1][0] * pi2_tmp[trans_size]
306	1.98M	+ g_ai2_ihevc_trans_4[3][0] * pi2_tmp[3 * trans_size];
307	1.98M	o[1] = g_ai2_ihevc_trans_4[1][1] * pi2_tmp[trans_size]
308	1.98M	+ g_ai2_ihevc_trans_4[3][1] * pi2_tmp[3 * trans_size];
309	1.98M	e[0] = g_ai2_ihevc_trans_4[0][0] * pi2_tmp[0]
310	1.98M	+ g_ai2_ihevc_trans_4[2][0] * pi2_tmp[2 * trans_size];
311	1.98M	e[1] = g_ai2_ihevc_trans_4[0][1] * pi2_tmp[0]
312	1.98M	+ g_ai2_ihevc_trans_4[2][1] * pi2_tmp[2 * trans_size];
313
314	1.98M	itrans_out =
315	1.98M	CLIP_S16(((e[0] + o[0] + add) >> shift));
316	1.98M	pu1_dst[0] = CLIP_U8((itrans_out + pu1_pred[0]));
317	1.98M	itrans_out =
318	1.98M	CLIP_S16(((e[1] + o[1] + add) >> shift));
319	1.98M	pu1_dst[1] = CLIP_U8((itrans_out + pu1_pred[1]));
320	1.98M	itrans_out =
321	1.98M	CLIP_S16(((e[1] - o[1] + add) >> shift));
322	1.98M	pu1_dst[2] = CLIP_U8((itrans_out + pu1_pred[2]));
323	1.98M	itrans_out =
324	1.98M	CLIP_S16(((e[0] - o[0] + add) >> shift));
325	1.98M	pu1_dst[3] = CLIP_U8((itrans_out + pu1_pred[3]));
326
327	1.98M	pi2_tmp++;
328	1.98M	pu1_pred += pred_strd;
329	1.98M	pu1_dst += dst_strd;
330
331	1.98M	}
332	496k	}
333