/src/libhevc/common/ihevc_itrans_recon.c
Line | Count | Source |
1 | | /****************************************************************************** |
2 | | * |
3 | | * Copyright (C) 2012 Ittiam Systems Pvt Ltd, Bangalore |
4 | | * |
5 | | * Licensed under the Apache License, Version 2.0 (the "License"); |
6 | | * you may not use this file except in compliance with the License. |
7 | | * You may obtain a copy of the License at: |
8 | | * |
9 | | * http://www.apache.org/licenses/LICENSE-2.0 |
10 | | * |
11 | | * Unless required by applicable law or agreed to in writing, software |
12 | | * distributed under the License is distributed on an "AS IS" BASIS, |
13 | | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
14 | | * See the License for the specific language governing permissions and |
15 | | * limitations under the License. |
16 | | * |
17 | | ******************************************************************************/ |
18 | | /** |
19 | | ******************************************************************************* |
20 | | * @file |
21 | | * ihevc_itrans_recon.c |
22 | | * |
23 | | * @brief |
24 | | * Contains function definitions for inverse transform and reconstruction |
25 | | * |
26 | | * |
27 | | * @author |
28 | | * 100470 |
29 | | * |
30 | | * @par List of Functions: |
31 | | * - ihevc_itrans_recon_4x4_ttype1() |
32 | | * - ihevc_itrans_recon_4x4() |
33 | | * |
34 | | * @remarks |
35 | | * None |
36 | | * |
37 | | ******************************************************************************* |
38 | | */ |
39 | | #include <stdio.h> |
40 | | #include <string.h> |
41 | | #include "ihevc_typedefs.h" |
42 | | #include "ihevc_macros.h" |
43 | | #include "ihevc_platform_macros.h" |
44 | | #include "ihevc_defs.h" |
45 | | #include "ihevc_trans_tables.h" |
46 | | #include "ihevc_itrans_recon.h" |
47 | | #include "ihevc_func_selector.h" |
48 | | #include "ihevc_trans_macros.h" |
49 | | |
50 | | /* All the functions here are replicated from ihevc_itrans.c and modified to */ |
51 | | /* include reconstruction */ |
52 | | |
53 | | /** |
54 | | ******************************************************************************* |
55 | | * |
56 | | * @brief |
57 | | * This function performs Inverse transform type 1 (DST) and reconstruction |
58 | | * for 4x4 input block |
59 | | * |
60 | | * @par Description: |
61 | | * Performs inverse transform and adds the prediction data and clips output |
62 | | * to 8 bit |
63 | | * |
64 | | * @param[in] pi2_src |
65 | | * Input 4x4 coefficients |
66 | | * |
67 | | * @param[in] pi2_tmp |
68 | | * Temporary 4x4 buffer for storing inverse |
69 | | * |
70 | | * transform |
71 | | * 1st stage output |
72 | | * |
73 | | * @param[in] pu1_pred |
74 | | * Prediction 4x4 block |
75 | | * |
76 | | * @param[out] pu1_dst |
77 | | * Output 4x4 block |
78 | | * |
79 | | * @param[in] src_strd |
80 | | * Input stride |
81 | | * |
82 | | * @param[in] pred_strd |
83 | | * Prediction stride |
84 | | * |
85 | | * @param[in] dst_strd |
86 | | * Output Stride |
87 | | * |
88 | | * @param[in] zero_cols |
89 | | * Zero columns in pi2_src |
90 | | * |
91 | | * @returns Void |
92 | | * |
93 | | * @remarks |
94 | | * None |
95 | | * |
96 | | ******************************************************************************* |
97 | | */ |
98 | | |
99 | | void ihevc_itrans_recon_4x4_ttype1(WORD16 *pi2_src, |
100 | | WORD16 *pi2_tmp, |
101 | | UWORD8 *pu1_pred, |
102 | | UWORD8 *pu1_dst, |
103 | | WORD32 src_strd, |
104 | | WORD32 pred_strd, |
105 | | WORD32 dst_strd, |
106 | | WORD32 zero_cols, |
107 | | WORD32 zero_rows) |
108 | 3.09M | { |
109 | 3.09M | WORD32 i, c[4]; |
110 | 3.09M | WORD32 add; |
111 | 3.09M | WORD32 shift; |
112 | 3.09M | WORD16 *pi2_tmp_orig; |
113 | 3.09M | WORD32 trans_size; |
114 | 3.09M | UNUSED(zero_rows); |
115 | 3.09M | trans_size = TRANS_SIZE_4; |
116 | | |
117 | 3.09M | pi2_tmp_orig = pi2_tmp; |
118 | | |
119 | | /* Inverse Transform 1st stage */ |
120 | 3.09M | shift = IT_SHIFT_STAGE_1; |
121 | 3.09M | add = 1 << (shift - 1); |
122 | | |
123 | 15.4M | for(i = 0; i < trans_size; i++) |
124 | 12.3M | { |
125 | | /* Checking for Zero Cols */ |
126 | 12.3M | if((zero_cols & 1) == 1) |
127 | 199k | { |
128 | 199k | memset(pi2_tmp, 0, trans_size * sizeof(WORD16)); |
129 | 199k | } |
130 | 12.1M | else |
131 | 12.1M | { |
132 | | // Intermediate Variables |
133 | 12.1M | c[0] = pi2_src[0] + pi2_src[2 * src_strd]; |
134 | 12.1M | c[1] = pi2_src[2 * src_strd] + pi2_src[3 * src_strd]; |
135 | 12.1M | c[2] = pi2_src[0] - pi2_src[3 * src_strd]; |
136 | 12.1M | c[3] = 74 * pi2_src[src_strd]; |
137 | | |
138 | 12.1M | pi2_tmp[0] = |
139 | 12.1M | CLIP_S16((29 * c[0] + 55 * c[1] + c[3] + add) >> shift); |
140 | 12.1M | pi2_tmp[1] = |
141 | 12.1M | CLIP_S16((55 * c[2] - 29 * c[1] + c[3] + add) >> shift); |
142 | 12.1M | pi2_tmp[2] = |
143 | 12.1M | CLIP_S16((74 * (pi2_src[0] - pi2_src[2 * src_strd] + pi2_src[3 * src_strd]) + add) >> shift); |
144 | 12.1M | pi2_tmp[3] = |
145 | 12.1M | CLIP_S16((55 * c[0] + 29 * c[2] - c[3] + add) >> shift); |
146 | 12.1M | } |
147 | 12.3M | pi2_src++; |
148 | 12.3M | pi2_tmp += trans_size; |
149 | 12.3M | zero_cols = zero_cols >> 1; |
150 | 12.3M | } |
151 | | |
152 | 3.09M | pi2_tmp = pi2_tmp_orig; |
153 | | |
154 | | /* Inverse Transform 2nd stage */ |
155 | 3.09M | shift = IT_SHIFT_STAGE_2; |
156 | 3.09M | add = 1 << (shift - 1); |
157 | | |
158 | 15.4M | for(i = 0; i < trans_size; i++) |
159 | 12.3M | { |
160 | 12.3M | WORD32 itrans_out; |
161 | | // Intermediate Variables |
162 | 12.3M | c[0] = pi2_tmp[0] + pi2_tmp[2 * trans_size]; |
163 | 12.3M | c[1] = pi2_tmp[2 * trans_size] + pi2_tmp[3 * trans_size]; |
164 | 12.3M | c[2] = pi2_tmp[0] - pi2_tmp[3 * trans_size]; |
165 | 12.3M | c[3] = 74 * pi2_tmp[trans_size]; |
166 | | |
167 | 12.3M | itrans_out = |
168 | 12.3M | CLIP_S16((29 * c[0] + 55 * c[1] + c[3] + add) >> shift); |
169 | 12.3M | pu1_dst[0] = CLIP_U8((itrans_out + pu1_pred[0])); |
170 | 12.3M | itrans_out = |
171 | 12.3M | CLIP_S16((55 * c[2] - 29 * c[1] + c[3] + add) >> shift); |
172 | 12.3M | pu1_dst[1] = CLIP_U8((itrans_out + pu1_pred[1])); |
173 | 12.3M | itrans_out = |
174 | 12.3M | CLIP_S16((74 * (pi2_tmp[0] - pi2_tmp[2 * trans_size] + pi2_tmp[3 * trans_size]) + add) >> shift); |
175 | 12.3M | pu1_dst[2] = CLIP_U8((itrans_out + pu1_pred[2])); |
176 | 12.3M | itrans_out = |
177 | 12.3M | CLIP_S16((55 * c[0] + 29 * c[2] - c[3] + add) >> shift); |
178 | 12.3M | pu1_dst[3] = CLIP_U8((itrans_out + pu1_pred[3])); |
179 | 12.3M | pi2_tmp++; |
180 | 12.3M | pu1_pred += pred_strd; |
181 | 12.3M | pu1_dst += dst_strd; |
182 | 12.3M | } |
183 | 3.09M | } |
184 | | |
185 | | /** |
186 | | ******************************************************************************* |
187 | | * |
188 | | * @brief |
189 | | * This function performs Inverse transform and reconstruction for 4x4 |
190 | | * input block |
191 | | * |
192 | | * @par Description: |
193 | | * Performs inverse transform and adds the prediction data and clips output |
194 | | * to 8 bit |
195 | | * |
196 | | * @param[in] pi2_src |
197 | | * Input 4x4 coefficients |
198 | | * |
199 | | * @param[in] pi2_tmp |
200 | | * Temporary 4x4 buffer for storing inverse |
201 | | * |
202 | | * transform |
203 | | * 1st stage output |
204 | | * |
205 | | * @param[in] pu1_pred |
206 | | * Prediction 4x4 block |
207 | | * |
208 | | * @param[out] pu1_dst |
209 | | * Output 4x4 block |
210 | | * |
211 | | * @param[in] src_strd |
212 | | * Input stride |
213 | | * |
214 | | * @param[in] pred_strd |
215 | | * Prediction stride |
216 | | * |
217 | | * @param[in] dst_strd |
218 | | * Output Stride |
219 | | * |
220 | | * @param[in] shift |
221 | | * Output shift |
222 | | * |
223 | | * @param[in] zero_cols |
224 | | * Zero columns in pi2_src |
225 | | * |
226 | | * @returns Void |
227 | | * |
228 | | * @remarks |
229 | | * None |
230 | | * |
231 | | ******************************************************************************* |
232 | | */ |
233 | | |
234 | | void ihevc_itrans_recon_4x4(WORD16 *pi2_src, |
235 | | WORD16 *pi2_tmp, |
236 | | UWORD8 *pu1_pred, |
237 | | UWORD8 *pu1_dst, |
238 | | WORD32 src_strd, |
239 | | WORD32 pred_strd, |
240 | | WORD32 dst_strd, |
241 | | WORD32 zero_cols, |
242 | | WORD32 zero_rows) |
243 | | |
244 | 496k | { |
245 | 496k | WORD32 j; |
246 | 496k | WORD32 e[2], o[2]; |
247 | 496k | WORD32 add; |
248 | 496k | WORD32 shift; |
249 | 496k | WORD16 *pi2_tmp_orig; |
250 | 496k | WORD32 trans_size; |
251 | 496k | UNUSED(zero_rows); |
252 | 496k | trans_size = TRANS_SIZE_4; |
253 | | |
254 | 496k | pi2_tmp_orig = pi2_tmp; |
255 | | |
256 | | /* Inverse Transform 1st stage */ |
257 | 496k | shift = IT_SHIFT_STAGE_1; |
258 | 496k | add = 1 << (shift - 1); |
259 | | |
260 | 2.48M | for(j = 0; j < trans_size; j++) |
261 | 1.98M | { |
262 | | /* Checking for Zero Cols */ |
263 | 1.98M | if((zero_cols & 1) == 1) |
264 | 88.0k | { |
265 | 88.0k | memset(pi2_tmp, 0, trans_size * sizeof(WORD16)); |
266 | 88.0k | } |
267 | 1.89M | else |
268 | 1.89M | { |
269 | | |
270 | | /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */ |
271 | 1.89M | o[0] = g_ai2_ihevc_trans_4[1][0] * pi2_src[src_strd] |
272 | 1.89M | + g_ai2_ihevc_trans_4[3][0] * pi2_src[3 * src_strd]; |
273 | 1.89M | o[1] = g_ai2_ihevc_trans_4[1][1] * pi2_src[src_strd] |
274 | 1.89M | + g_ai2_ihevc_trans_4[3][1] * pi2_src[3 * src_strd]; |
275 | 1.89M | e[0] = g_ai2_ihevc_trans_4[0][0] * pi2_src[0] |
276 | 1.89M | + g_ai2_ihevc_trans_4[2][0] * pi2_src[2 * src_strd]; |
277 | 1.89M | e[1] = g_ai2_ihevc_trans_4[0][1] * pi2_src[0] |
278 | 1.89M | + g_ai2_ihevc_trans_4[2][1] * pi2_src[2 * src_strd]; |
279 | | |
280 | 1.89M | pi2_tmp[0] = |
281 | 1.89M | CLIP_S16(((e[0] + o[0] + add) >> shift)); |
282 | 1.89M | pi2_tmp[1] = |
283 | 1.89M | CLIP_S16(((e[1] + o[1] + add) >> shift)); |
284 | 1.89M | pi2_tmp[2] = |
285 | 1.89M | CLIP_S16(((e[1] - o[1] + add) >> shift)); |
286 | 1.89M | pi2_tmp[3] = |
287 | 1.89M | CLIP_S16(((e[0] - o[0] + add) >> shift)); |
288 | | |
289 | 1.89M | } |
290 | 1.98M | pi2_src++; |
291 | 1.98M | pi2_tmp += trans_size; |
292 | 1.98M | zero_cols = zero_cols >> 1; |
293 | 1.98M | } |
294 | | |
295 | 496k | pi2_tmp = pi2_tmp_orig; |
296 | | |
297 | | /* Inverse Transform 2nd stage */ |
298 | 496k | shift = IT_SHIFT_STAGE_2; |
299 | 496k | add = 1 << (shift - 1); |
300 | | |
301 | 2.48M | for(j = 0; j < trans_size; j++) |
302 | 1.98M | { |
303 | 1.98M | WORD32 itrans_out; |
304 | | /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */ |
305 | 1.98M | o[0] = g_ai2_ihevc_trans_4[1][0] * pi2_tmp[trans_size] |
306 | 1.98M | + g_ai2_ihevc_trans_4[3][0] * pi2_tmp[3 * trans_size]; |
307 | 1.98M | o[1] = g_ai2_ihevc_trans_4[1][1] * pi2_tmp[trans_size] |
308 | 1.98M | + g_ai2_ihevc_trans_4[3][1] * pi2_tmp[3 * trans_size]; |
309 | 1.98M | e[0] = g_ai2_ihevc_trans_4[0][0] * pi2_tmp[0] |
310 | 1.98M | + g_ai2_ihevc_trans_4[2][0] * pi2_tmp[2 * trans_size]; |
311 | 1.98M | e[1] = g_ai2_ihevc_trans_4[0][1] * pi2_tmp[0] |
312 | 1.98M | + g_ai2_ihevc_trans_4[2][1] * pi2_tmp[2 * trans_size]; |
313 | | |
314 | 1.98M | itrans_out = |
315 | 1.98M | CLIP_S16(((e[0] + o[0] + add) >> shift)); |
316 | 1.98M | pu1_dst[0] = CLIP_U8((itrans_out + pu1_pred[0])); |
317 | 1.98M | itrans_out = |
318 | 1.98M | CLIP_S16(((e[1] + o[1] + add) >> shift)); |
319 | 1.98M | pu1_dst[1] = CLIP_U8((itrans_out + pu1_pred[1])); |
320 | 1.98M | itrans_out = |
321 | 1.98M | CLIP_S16(((e[1] - o[1] + add) >> shift)); |
322 | 1.98M | pu1_dst[2] = CLIP_U8((itrans_out + pu1_pred[2])); |
323 | 1.98M | itrans_out = |
324 | 1.98M | CLIP_S16(((e[0] - o[0] + add) >> shift)); |
325 | 1.98M | pu1_dst[3] = CLIP_U8((itrans_out + pu1_pred[3])); |
326 | | |
327 | 1.98M | pi2_tmp++; |
328 | 1.98M | pu1_pred += pred_strd; |
329 | 1.98M | pu1_dst += dst_strd; |
330 | | |
331 | 1.98M | } |
332 | 496k | } |
333 | | |