/src/libhevc/common/ihevc_itrans_recon_8x8.c
Line | Count | Source |
1 | | /****************************************************************************** |
2 | | * |
3 | | * Copyright (C) 2012 Ittiam Systems Pvt Ltd, Bangalore |
4 | | * |
5 | | * Licensed under the Apache License, Version 2.0 (the "License"); |
6 | | * you may not use this file except in compliance with the License. |
7 | | * You may obtain a copy of the License at: |
8 | | * |
9 | | * http://www.apache.org/licenses/LICENSE-2.0 |
10 | | * |
11 | | * Unless required by applicable law or agreed to in writing, software |
12 | | * distributed under the License is distributed on an "AS IS" BASIS, |
13 | | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
14 | | * See the License for the specific language governing permissions and |
15 | | * limitations under the License. |
16 | | * |
17 | | ******************************************************************************/ |
18 | | /** |
19 | | ******************************************************************************* |
20 | | * @file |
21 | | * ihevc_itrans_recon_8x8.c |
22 | | * |
23 | | * @brief |
24 | | * Contains function definitions for inverse transform and reconstruction 8x8 |
25 | | * |
26 | | * |
27 | | * @author |
28 | | * 100470 |
29 | | * |
30 | | * @par List of Functions: |
31 | | * - ihevc_itrans_recon_8x8() |
32 | | * |
33 | | * @remarks |
34 | | * None |
35 | | * |
36 | | ******************************************************************************* |
37 | | */ |
38 | | #include <stdio.h> |
39 | | #include <string.h> |
40 | | #include "ihevc_typedefs.h" |
41 | | #include "ihevc_macros.h" |
42 | | #include "ihevc_platform_macros.h" |
43 | | #include "ihevc_defs.h" |
44 | | #include "ihevc_trans_tables.h" |
45 | | #include "ihevc_itrans_recon.h" |
46 | | #include "ihevc_func_selector.h" |
47 | | #include "ihevc_trans_macros.h" |
48 | | |
49 | | /** |
50 | | ******************************************************************************* |
51 | | * |
52 | | * @brief |
53 | | * This function performs Inverse transform and reconstruction for 8x8 |
54 | | * input block |
55 | | * |
56 | | * @par Description: |
57 | | * Performs inverse transform and adds the prediction data and clips output |
58 | | * to 8 bit |
59 | | * |
60 | | * @param[in] pi2_src |
61 | | * Input 8x8 coefficients |
62 | | * |
63 | | * @param[in] pi2_tmp |
64 | | * Temporary 8x8 buffer for storing inverse |
65 | | * |
66 | | * transform |
67 | | * 1st stage output |
68 | | * |
69 | | * @param[in] pu1_pred |
70 | | * Prediction 8x8 block |
71 | | * |
72 | | * @param[out] pu1_dst |
73 | | * Output 8x8 block |
74 | | * |
75 | | * @param[in] src_strd |
76 | | * Input stride |
77 | | * |
78 | | * @param[in] pred_strd |
79 | | * Prediction stride |
80 | | * |
81 | | * @param[in] dst_strd |
82 | | * Output Stride |
83 | | * |
84 | | * @param[in] shift |
85 | | * Output shift |
86 | | * |
87 | | * @param[in] zero_cols |
88 | | * Zero columns in pi2_src |
89 | | * |
90 | | * @returns Void |
91 | | * |
92 | | * @remarks |
93 | | * None |
94 | | * |
95 | | ******************************************************************************* |
96 | | */ |
97 | | |
98 | | void ihevc_itrans_recon_8x8(WORD16 *pi2_src, |
99 | | WORD16 *pi2_tmp, |
100 | | UWORD8 *pu1_pred, |
101 | | UWORD8 *pu1_dst, |
102 | | WORD32 src_strd, |
103 | | WORD32 pred_strd, |
104 | | WORD32 dst_strd, |
105 | | WORD32 zero_cols, |
106 | | WORD32 zero_rows) |
107 | 1.11M | { |
108 | 1.11M | WORD32 j, k; |
109 | 1.11M | WORD32 e[4], o[4]; |
110 | 1.11M | WORD32 ee[2], eo[2]; |
111 | 1.11M | WORD32 add; |
112 | 1.11M | WORD32 shift; |
113 | 1.11M | WORD16 *pi2_tmp_orig; |
114 | 1.11M | WORD32 trans_size; |
115 | 1.11M | WORD32 zero_rows_2nd_stage = zero_cols; |
116 | 1.11M | WORD32 row_limit_2nd_stage; |
117 | | |
118 | 1.11M | trans_size = TRANS_SIZE_8; |
119 | | |
120 | 1.11M | pi2_tmp_orig = pi2_tmp; |
121 | | |
122 | 1.11M | if((zero_cols & 0xF0) == 0xF0) |
123 | 181k | row_limit_2nd_stage = 4; |
124 | 934k | else |
125 | 934k | row_limit_2nd_stage = TRANS_SIZE_8; |
126 | | |
127 | | |
128 | 1.11M | if((zero_rows & 0xF0) == 0xF0) /* First 4 rows of input are non-zero */ |
129 | 161k | { |
130 | | /************************************************************************************************/ |
131 | | /**********************************START - IT_RECON_8x8******************************************/ |
132 | | /************************************************************************************************/ |
133 | | |
134 | | /* Inverse Transform 1st stage */ |
135 | 161k | shift = IT_SHIFT_STAGE_1; |
136 | 161k | add = 1 << (shift - 1); |
137 | | |
138 | 1.04M | for(j = 0; j < row_limit_2nd_stage; j++) |
139 | 886k | { |
140 | | /* Checking for Zero Cols */ |
141 | 886k | if((zero_cols & 1) == 1) |
142 | 146k | { |
143 | 146k | memset(pi2_tmp, 0, trans_size * sizeof(WORD16)); |
144 | 146k | } |
145 | 740k | else |
146 | 740k | { |
147 | | /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */ |
148 | 3.70M | for(k = 0; k < 4; k++) |
149 | 2.96M | { |
150 | 2.96M | o[k] = g_ai2_ihevc_trans_8[1][k] * pi2_src[src_strd] |
151 | 2.96M | + g_ai2_ihevc_trans_8[3][k] |
152 | 2.96M | * pi2_src[3 * src_strd]; |
153 | 2.96M | } |
154 | 740k | eo[0] = g_ai2_ihevc_trans_8[2][0] * pi2_src[2 * src_strd]; |
155 | 740k | eo[1] = g_ai2_ihevc_trans_8[2][1] * pi2_src[2 * src_strd]; |
156 | 740k | ee[0] = g_ai2_ihevc_trans_8[0][0] * pi2_src[0]; |
157 | 740k | ee[1] = g_ai2_ihevc_trans_8[0][1] * pi2_src[0]; |
158 | | |
159 | | /* Combining e and o terms at each hierarchy levels to calculate the final spatial domain vector */ |
160 | 740k | e[0] = ee[0] + eo[0]; |
161 | 740k | e[3] = ee[0] - eo[0]; |
162 | 740k | e[1] = ee[1] + eo[1]; |
163 | 740k | e[2] = ee[1] - eo[1]; |
164 | 3.70M | for(k = 0; k < 4; k++) |
165 | 2.96M | { |
166 | 2.96M | pi2_tmp[k] = |
167 | 2.96M | CLIP_S16(((e[k] + o[k] + add) >> shift)); |
168 | 2.96M | pi2_tmp[k + 4] = |
169 | 2.96M | CLIP_S16(((e[3 - k] - o[3 - k] + add) >> shift)); |
170 | 2.96M | } |
171 | 740k | } |
172 | 886k | pi2_src++; |
173 | 886k | pi2_tmp += trans_size; |
174 | 886k | zero_cols = zero_cols >> 1; |
175 | 886k | } |
176 | | |
177 | 161k | pi2_tmp = pi2_tmp_orig; |
178 | | |
179 | | /* Inverse Transform 2nd stage */ |
180 | 161k | shift = IT_SHIFT_STAGE_2; |
181 | 161k | add = 1 << (shift - 1); |
182 | 161k | if((zero_rows_2nd_stage & 0xF0) == 0xF0) /* First 4 rows of output of 1st stage are non-zero */ |
183 | 101k | { |
184 | 914k | for(j = 0; j < trans_size; j++) |
185 | 812k | { |
186 | | /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */ |
187 | 4.06M | for(k = 0; k < 4; k++) |
188 | 3.24M | { |
189 | 3.24M | o[k] = g_ai2_ihevc_trans_8[1][k] * pi2_tmp[trans_size] |
190 | 3.24M | + g_ai2_ihevc_trans_8[3][k] * pi2_tmp[3 * trans_size]; |
191 | 3.24M | } |
192 | 812k | eo[0] = g_ai2_ihevc_trans_8[2][0] * pi2_tmp[2 * trans_size]; |
193 | 812k | eo[1] = g_ai2_ihevc_trans_8[2][1] * pi2_tmp[2 * trans_size]; |
194 | 812k | ee[0] = g_ai2_ihevc_trans_8[0][0] * pi2_tmp[0]; |
195 | 812k | ee[1] = g_ai2_ihevc_trans_8[0][1] * pi2_tmp[0]; |
196 | | |
197 | | /* Combining e and o terms at each hierarchy levels to calculate the final spatial domain vector */ |
198 | 812k | e[0] = ee[0] + eo[0]; |
199 | 812k | e[3] = ee[0] - eo[0]; |
200 | 812k | e[1] = ee[1] + eo[1]; |
201 | 812k | e[2] = ee[1] - eo[1]; |
202 | 4.06M | for(k = 0; k < 4; k++) |
203 | 3.24M | { |
204 | 3.24M | WORD32 itrans_out; |
205 | 3.24M | itrans_out = |
206 | 3.24M | CLIP_S16(((e[k] + o[k] + add) >> shift)); |
207 | 3.24M | pu1_dst[k] = CLIP_U8((itrans_out + pu1_pred[k])); |
208 | 3.24M | itrans_out = |
209 | 3.24M | CLIP_S16(((e[3 - k] - o[3 - k] + add) >> shift)); |
210 | 3.24M | pu1_dst[k + 4] = CLIP_U8((itrans_out + pu1_pred[k + 4])); |
211 | 3.24M | } |
212 | 812k | pi2_tmp++; |
213 | 812k | pu1_pred += pred_strd; |
214 | 812k | pu1_dst += dst_strd; |
215 | 812k | } |
216 | 101k | } |
217 | 60.0k | else /* All rows of output of 1st stage are non-zero */ |
218 | 60.0k | { |
219 | 540k | for(j = 0; j < trans_size; j++) |
220 | 480k | { |
221 | | /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */ |
222 | 2.40M | for(k = 0; k < 4; k++) |
223 | 1.92M | { |
224 | 1.92M | o[k] = g_ai2_ihevc_trans_8[1][k] * pi2_tmp[trans_size] |
225 | 1.92M | + g_ai2_ihevc_trans_8[3][k] |
226 | 1.92M | * pi2_tmp[3 * trans_size] |
227 | 1.92M | + g_ai2_ihevc_trans_8[5][k] |
228 | 1.92M | * pi2_tmp[5 * trans_size] |
229 | 1.92M | + g_ai2_ihevc_trans_8[7][k] |
230 | 1.92M | * pi2_tmp[7 * trans_size]; |
231 | 1.92M | } |
232 | | |
233 | 480k | eo[0] = g_ai2_ihevc_trans_8[2][0] * pi2_tmp[2 * trans_size] |
234 | 480k | + g_ai2_ihevc_trans_8[6][0] * pi2_tmp[6 * trans_size]; |
235 | 480k | eo[1] = g_ai2_ihevc_trans_8[2][1] * pi2_tmp[2 * trans_size] |
236 | 480k | + g_ai2_ihevc_trans_8[6][1] * pi2_tmp[6 * trans_size]; |
237 | 480k | ee[0] = g_ai2_ihevc_trans_8[0][0] * pi2_tmp[0] |
238 | 480k | + g_ai2_ihevc_trans_8[4][0] * pi2_tmp[4 * trans_size]; |
239 | 480k | ee[1] = g_ai2_ihevc_trans_8[0][1] * pi2_tmp[0] |
240 | 480k | + g_ai2_ihevc_trans_8[4][1] * pi2_tmp[4 * trans_size]; |
241 | | |
242 | | /* Combining e and o terms at each hierarchy levels to calculate the final spatial domain vector */ |
243 | 480k | e[0] = ee[0] + eo[0]; |
244 | 480k | e[3] = ee[0] - eo[0]; |
245 | 480k | e[1] = ee[1] + eo[1]; |
246 | 480k | e[2] = ee[1] - eo[1]; |
247 | 2.40M | for(k = 0; k < 4; k++) |
248 | 1.92M | { |
249 | 1.92M | WORD32 itrans_out; |
250 | 1.92M | itrans_out = |
251 | 1.92M | CLIP_S16(((e[k] + o[k] + add) >> shift)); |
252 | 1.92M | pu1_dst[k] = CLIP_U8((itrans_out + pu1_pred[k])); |
253 | 1.92M | itrans_out = |
254 | 1.92M | CLIP_S16(((e[3 - k] - o[3 - k] + add) >> shift)); |
255 | 1.92M | pu1_dst[k + 4] = CLIP_U8((itrans_out + pu1_pred[k + 4])); |
256 | 1.92M | } |
257 | 480k | pi2_tmp++; |
258 | 480k | pu1_pred += pred_strd; |
259 | 480k | pu1_dst += dst_strd; |
260 | 480k | } |
261 | 60.0k | } |
262 | | /************************************************************************************************/ |
263 | | /************************************END - IT_RECON_8x8******************************************/ |
264 | | /************************************************************************************************/ |
265 | 161k | } |
266 | 953k | else /* All rows of input are non-zero */ |
267 | 953k | { |
268 | | /************************************************************************************************/ |
269 | | /**********************************START - IT_RECON_8x8******************************************/ |
270 | | /************************************************************************************************/ |
271 | | |
272 | | /* Inverse Transform 1st stage */ |
273 | 953k | shift = IT_SHIFT_STAGE_1; |
274 | 953k | add = 1 << (shift - 1); |
275 | | |
276 | 8.26M | for(j = 0; j < row_limit_2nd_stage; j++) |
277 | 7.31M | { |
278 | | /* Checking for Zero Cols */ |
279 | 7.31M | if((zero_cols & 1) == 1) |
280 | 41.8k | { |
281 | 41.8k | memset(pi2_tmp, 0, trans_size * sizeof(WORD16)); |
282 | 41.8k | } |
283 | 7.26M | else |
284 | 7.26M | { |
285 | | /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */ |
286 | 36.3M | for(k = 0; k < 4; k++) |
287 | 29.0M | { |
288 | 29.0M | o[k] = g_ai2_ihevc_trans_8[1][k] * pi2_src[src_strd] |
289 | 29.0M | + g_ai2_ihevc_trans_8[3][k] |
290 | 29.0M | * pi2_src[3 * src_strd] |
291 | 29.0M | + g_ai2_ihevc_trans_8[5][k] |
292 | 29.0M | * pi2_src[5 * src_strd] |
293 | 29.0M | + g_ai2_ihevc_trans_8[7][k] |
294 | 29.0M | * pi2_src[7 * src_strd]; |
295 | 29.0M | } |
296 | | |
297 | 7.26M | eo[0] = g_ai2_ihevc_trans_8[2][0] * pi2_src[2 * src_strd] |
298 | 7.26M | + g_ai2_ihevc_trans_8[6][0] * pi2_src[6 * src_strd]; |
299 | 7.26M | eo[1] = g_ai2_ihevc_trans_8[2][1] * pi2_src[2 * src_strd] |
300 | 7.26M | + g_ai2_ihevc_trans_8[6][1] * pi2_src[6 * src_strd]; |
301 | 7.26M | ee[0] = g_ai2_ihevc_trans_8[0][0] * pi2_src[0] |
302 | 7.26M | + g_ai2_ihevc_trans_8[4][0] * pi2_src[4 * src_strd]; |
303 | 7.26M | ee[1] = g_ai2_ihevc_trans_8[0][1] * pi2_src[0] |
304 | 7.26M | + g_ai2_ihevc_trans_8[4][1] * pi2_src[4 * src_strd]; |
305 | | |
306 | | /* Combining e and o terms at each hierarchy levels to calculate the final spatial domain vector */ |
307 | 7.26M | e[0] = ee[0] + eo[0]; |
308 | 7.26M | e[3] = ee[0] - eo[0]; |
309 | 7.26M | e[1] = ee[1] + eo[1]; |
310 | 7.26M | e[2] = ee[1] - eo[1]; |
311 | 36.3M | for(k = 0; k < 4; k++) |
312 | 29.0M | { |
313 | 29.0M | pi2_tmp[k] = |
314 | 29.0M | CLIP_S16(((e[k] + o[k] + add) >> shift)); |
315 | 29.0M | pi2_tmp[k + 4] = |
316 | 29.0M | CLIP_S16(((e[3 - k] - o[3 - k] + add) >> shift)); |
317 | 29.0M | } |
318 | 7.26M | } |
319 | 7.31M | pi2_src++; |
320 | 7.31M | pi2_tmp += trans_size; |
321 | 7.31M | zero_cols = zero_cols >> 1; |
322 | 7.31M | } |
323 | | |
324 | 953k | pi2_tmp = pi2_tmp_orig; |
325 | | |
326 | | /* Inverse Transform 2nd stage */ |
327 | 953k | shift = IT_SHIFT_STAGE_2; |
328 | 953k | add = 1 << (shift - 1); |
329 | 953k | if((zero_rows_2nd_stage & 0xF0) == 0xF0) /* First 4 rows of output of 1st stage are non-zero */ |
330 | 79.6k | { |
331 | 717k | for(j = 0; j < trans_size; j++) |
332 | 637k | { |
333 | | /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */ |
334 | 3.18M | for(k = 0; k < 4; k++) |
335 | 2.54M | { |
336 | 2.54M | o[k] = g_ai2_ihevc_trans_8[1][k] * pi2_tmp[trans_size] |
337 | 2.54M | + g_ai2_ihevc_trans_8[3][k] * pi2_tmp[3 * trans_size]; |
338 | 2.54M | } |
339 | 637k | eo[0] = g_ai2_ihevc_trans_8[2][0] * pi2_tmp[2 * trans_size]; |
340 | 637k | eo[1] = g_ai2_ihevc_trans_8[2][1] * pi2_tmp[2 * trans_size]; |
341 | 637k | ee[0] = g_ai2_ihevc_trans_8[0][0] * pi2_tmp[0]; |
342 | 637k | ee[1] = g_ai2_ihevc_trans_8[0][1] * pi2_tmp[0]; |
343 | | |
344 | | /* Combining e and o terms at each hierarchy levels to calculate the final spatial domain vector */ |
345 | 637k | e[0] = ee[0] + eo[0]; |
346 | 637k | e[3] = ee[0] - eo[0]; |
347 | 637k | e[1] = ee[1] + eo[1]; |
348 | 637k | e[2] = ee[1] - eo[1]; |
349 | 3.18M | for(k = 0; k < 4; k++) |
350 | 2.54M | { |
351 | 2.54M | WORD32 itrans_out; |
352 | 2.54M | itrans_out = |
353 | 2.54M | CLIP_S16(((e[k] + o[k] + add) >> shift)); |
354 | 2.54M | pu1_dst[k] = CLIP_U8((itrans_out + pu1_pred[k])); |
355 | 2.54M | itrans_out = |
356 | 2.54M | CLIP_S16(((e[3 - k] - o[3 - k] + add) >> shift)); |
357 | 2.54M | pu1_dst[k + 4] = CLIP_U8((itrans_out + pu1_pred[k + 4])); |
358 | 2.54M | } |
359 | 637k | pi2_tmp++; |
360 | 637k | pu1_pred += pred_strd; |
361 | 637k | pu1_dst += dst_strd; |
362 | 637k | } |
363 | 79.6k | } |
364 | 874k | else /* All rows of output of 1st stage are non-zero */ |
365 | 874k | { |
366 | 7.86M | for(j = 0; j < trans_size; j++) |
367 | 6.99M | { |
368 | | /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */ |
369 | 34.9M | for(k = 0; k < 4; k++) |
370 | 27.9M | { |
371 | 27.9M | o[k] = g_ai2_ihevc_trans_8[1][k] * pi2_tmp[trans_size] |
372 | 27.9M | + g_ai2_ihevc_trans_8[3][k] |
373 | 27.9M | * pi2_tmp[3 * trans_size] |
374 | 27.9M | + g_ai2_ihevc_trans_8[5][k] |
375 | 27.9M | * pi2_tmp[5 * trans_size] |
376 | 27.9M | + g_ai2_ihevc_trans_8[7][k] |
377 | 27.9M | * pi2_tmp[7 * trans_size]; |
378 | 27.9M | } |
379 | | |
380 | 6.99M | eo[0] = g_ai2_ihevc_trans_8[2][0] * pi2_tmp[2 * trans_size] |
381 | 6.99M | + g_ai2_ihevc_trans_8[6][0] * pi2_tmp[6 * trans_size]; |
382 | 6.99M | eo[1] = g_ai2_ihevc_trans_8[2][1] * pi2_tmp[2 * trans_size] |
383 | 6.99M | + g_ai2_ihevc_trans_8[6][1] * pi2_tmp[6 * trans_size]; |
384 | 6.99M | ee[0] = g_ai2_ihevc_trans_8[0][0] * pi2_tmp[0] |
385 | 6.99M | + g_ai2_ihevc_trans_8[4][0] * pi2_tmp[4 * trans_size]; |
386 | 6.99M | ee[1] = g_ai2_ihevc_trans_8[0][1] * pi2_tmp[0] |
387 | 6.99M | + g_ai2_ihevc_trans_8[4][1] * pi2_tmp[4 * trans_size]; |
388 | | |
389 | | /* Combining e and o terms at each hierarchy levels to calculate the final spatial domain vector */ |
390 | 6.99M | e[0] = ee[0] + eo[0]; |
391 | 6.99M | e[3] = ee[0] - eo[0]; |
392 | 6.99M | e[1] = ee[1] + eo[1]; |
393 | 6.99M | e[2] = ee[1] - eo[1]; |
394 | 34.9M | for(k = 0; k < 4; k++) |
395 | 27.9M | { |
396 | 27.9M | WORD32 itrans_out; |
397 | 27.9M | itrans_out = |
398 | 27.9M | CLIP_S16(((e[k] + o[k] + add) >> shift)); |
399 | 27.9M | pu1_dst[k] = CLIP_U8((itrans_out + pu1_pred[k])); |
400 | 27.9M | itrans_out = |
401 | 27.9M | CLIP_S16(((e[3 - k] - o[3 - k] + add) >> shift)); |
402 | 27.9M | pu1_dst[k + 4] = CLIP_U8((itrans_out + pu1_pred[k + 4])); |
403 | 27.9M | } |
404 | 6.99M | pi2_tmp++; |
405 | 6.99M | pu1_pred += pred_strd; |
406 | 6.99M | pu1_dst += dst_strd; |
407 | 6.99M | } |
408 | 874k | } |
409 | | /************************************************************************************************/ |
410 | | /************************************END - IT_RECON_8x8******************************************/ |
411 | | /************************************************************************************************/ |
412 | 953k | } |
413 | 1.11M | } |
414 | | |