/src/libmpeg2/common/impeg2_idct.c
Line | Count | Source |
1 | | /****************************************************************************** |
2 | | * |
3 | | * Copyright (C) 2015 The Android Open Source Project |
4 | | * |
5 | | * Licensed under the Apache License, Version 2.0 (the "License"); |
6 | | * you may not use this file except in compliance with the License. |
7 | | * You may obtain a copy of the License at: |
8 | | * |
9 | | * http://www.apache.org/licenses/LICENSE-2.0 |
10 | | * |
11 | | * Unless required by applicable law or agreed to in writing, software |
12 | | * distributed under the License is distributed on an "AS IS" BASIS, |
13 | | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
14 | | * See the License for the specific language governing permissions and |
15 | | * limitations under the License. |
16 | | * |
17 | | ***************************************************************************** |
18 | | * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore |
19 | | */ |
20 | | /*****************************************************************************/ |
21 | | /* */ |
22 | | /* File Name : impeg2_idct.c */ |
23 | | /* */ |
24 | | /* Description : Contains 2d idct and invese quantization functions */ |
25 | | /* */ |
26 | | /* List of Functions : impeg2_idct_recon_dc() */ |
27 | | /* impeg2_idct_recon_dc_mismatch() */ |
28 | | /* impeg2_idct_recon() */ |
29 | | /* */ |
30 | | /* Issues / Problems : None */ |
31 | | /* */ |
32 | | /* Revision History : */ |
33 | | /* */ |
34 | | /* DD MM YYYY Author(s) Changes */ |
35 | | /* 10 09 2005 Hairsh M First Version */ |
36 | | /* */ |
37 | | /*****************************************************************************/ |
38 | | /* |
39 | | IEEE - 1180 results for this IDCT |
40 | | L 256 256 5 5 300 300 384 384 Thresholds |
41 | | H 255 255 5 5 300 300 383 383 |
42 | | sign 1 -1 1 -1 1 -1 1 -1 |
43 | | Peak Error 1 1 1 1 1 1 1 1 1 |
44 | | Peak Mean Square Error 0.0191 0.0188 0.0108 0.0111 0.0176 0.0188 0.0165 0.0177 0.06 |
45 | | Overall Mean Square Error 0.01566406 0.01597656 0.0091875 0.00908906 0.01499063 0.01533281 0.01432344 0.01412344 0.02 |
46 | | Peak Mean Error 0.0027 0.0026 0.0028 0.002 0.0017 0.0033 0.0031 0.0025 0.015 |
47 | | Overall Mean Error 0.00002656 -0.00031406 0.00016875 0.00005469 -0.00003125 0.00011406 0.00009219 0.00004219 0.0015 |
48 | | */ |
49 | | #include <stdio.h> |
50 | | #include <string.h> |
51 | | |
52 | | #include "iv_datatypedef.h" |
53 | | #include "iv.h" |
54 | | #include "impeg2_defs.h" |
55 | | #include "impeg2_platform_macros.h" |
56 | | |
57 | | #include "impeg2_macros.h" |
58 | | #include "impeg2_globals.h" |
59 | | #include "impeg2_idct.h" |
60 | | |
61 | | |
62 | | void impeg2_idct_recon_dc(WORD16 *pi2_src, |
63 | | WORD16 *pi2_tmp, |
64 | | UWORD8 *pu1_pred, |
65 | | UWORD8 *pu1_dst, |
66 | | WORD32 i4_src_strd, |
67 | | WORD32 i4_pred_strd, |
68 | | WORD32 i4_dst_strd, |
69 | | WORD32 i4_zero_cols, |
70 | | WORD32 i4_zero_rows) |
71 | 1.16M | { |
72 | 1.16M | WORD32 i4_val, i, j; |
73 | | |
74 | 1.16M | UNUSED(pi2_tmp); |
75 | 1.16M | UNUSED(i4_src_strd); |
76 | 1.16M | UNUSED(i4_zero_cols); |
77 | 1.16M | UNUSED(i4_zero_rows); |
78 | | |
79 | 1.16M | i4_val = pi2_src[0] * gai2_impeg2_idct_q15[0]; |
80 | 1.16M | i4_val = ((i4_val + IDCT_STG1_ROUND) >> IDCT_STG1_SHIFT); |
81 | 1.16M | i4_val = i4_val * gai2_impeg2_idct_q11[0]; |
82 | 1.16M | i4_val = ((i4_val + IDCT_STG2_ROUND) >> IDCT_STG2_SHIFT); |
83 | | |
84 | 10.3M | for(i = 0; i < TRANS_SIZE_8; i++) |
85 | 9.17M | { |
86 | 82.2M | for(j = 0; j < TRANS_SIZE_8; j++) |
87 | 73.1M | { |
88 | 73.1M | pu1_dst[j] = CLIP_U8(i4_val + pu1_pred[j]); |
89 | 73.1M | } |
90 | 9.17M | pu1_dst += i4_dst_strd; |
91 | 9.17M | pu1_pred += i4_pred_strd; |
92 | 9.17M | } |
93 | 1.16M | } |
94 | | void impeg2_idct_recon_dc_mismatch(WORD16 *pi2_src, |
95 | | WORD16 *pi2_tmp, |
96 | | UWORD8 *pu1_pred, |
97 | | UWORD8 *pu1_dst, |
98 | | WORD32 i4_src_strd, |
99 | | WORD32 i4_pred_strd, |
100 | | WORD32 i4_dst_strd, |
101 | | WORD32 i4_zero_cols, |
102 | | WORD32 i4_zero_rows) |
103 | | |
104 | 48.2k | { |
105 | 48.2k | WORD32 i4_val, i, j; |
106 | 48.2k | WORD32 i4_count = 0; |
107 | 48.2k | WORD32 i4_sum; |
108 | | |
109 | 48.2k | UNUSED(pi2_tmp); |
110 | 48.2k | UNUSED(i4_src_strd); |
111 | 48.2k | UNUSED(i4_zero_cols); |
112 | 48.2k | UNUSED(i4_zero_rows); |
113 | | |
114 | 48.2k | i4_val = pi2_src[0] * gai2_impeg2_idct_q15[0]; |
115 | 48.2k | i4_val = ((i4_val + IDCT_STG1_ROUND) >> IDCT_STG1_SHIFT); |
116 | | |
117 | 48.2k | i4_val *= gai2_impeg2_idct_q11[0]; |
118 | 430k | for(i = 0; i < TRANS_SIZE_8; i++) |
119 | 382k | { |
120 | 3.42M | for (j = 0; j < TRANS_SIZE_8; j++) |
121 | 3.04M | { |
122 | 3.04M | i4_sum = i4_val; |
123 | 3.04M | i4_sum += gai2_impeg2_mismatch_stg2_additive[i4_count]; |
124 | 3.04M | i4_sum = ((i4_sum + IDCT_STG2_ROUND) >> IDCT_STG2_SHIFT); |
125 | 3.04M | i4_sum += pu1_pred[j]; |
126 | 3.04M | pu1_dst[j] = CLIP_U8(i4_sum); |
127 | 3.04M | i4_count++; |
128 | 3.04M | } |
129 | | |
130 | 382k | pu1_dst += i4_dst_strd; |
131 | 382k | pu1_pred += i4_pred_strd; |
132 | 382k | } |
133 | | |
134 | 48.2k | } |
135 | | /** |
136 | | ******************************************************************************* |
137 | | * |
138 | | * @brief |
139 | | * This function performs Inverse transform and reconstruction for 8x8 |
140 | | * input block |
141 | | * |
142 | | * @par Description: |
143 | | * Performs inverse transform and adds the prediction data and clips output |
144 | | * to 8 bit |
145 | | * |
146 | | * @param[in] pi2_src |
147 | | * Input 8x8 coefficients |
148 | | * |
149 | | * @param[in] pi2_tmp |
150 | | * Temporary 8x8 buffer for storing inverse |
151 | | * |
152 | | * transform |
153 | | * 1st stage output |
154 | | * |
155 | | * @param[in] pu1_pred |
156 | | * Prediction 8x8 block |
157 | | * |
158 | | * @param[out] pu1_dst |
159 | | * Output 8x8 block |
160 | | * |
161 | | * @param[in] src_strd |
162 | | * Input stride |
163 | | * |
164 | | * @param[in] pred_strd |
165 | | * Prediction stride |
166 | | * |
167 | | * @param[in] dst_strd |
168 | | * Output Stride |
169 | | * |
170 | | * @param[in] shift |
171 | | * Output shift |
172 | | * |
173 | | * @param[in] zero_cols |
174 | | * Zero columns in pi2_src |
175 | | * |
176 | | * @returns Void |
177 | | * |
178 | | * @remarks |
179 | | * None |
180 | | * |
181 | | ******************************************************************************* |
182 | | */ |
183 | | |
184 | | void impeg2_idct_recon(WORD16 *pi2_src, |
185 | | WORD16 *pi2_tmp, |
186 | | UWORD8 *pu1_pred, |
187 | | UWORD8 *pu1_dst, |
188 | | WORD32 i4_src_strd, |
189 | | WORD32 i4_pred_strd, |
190 | | WORD32 i4_dst_strd, |
191 | | WORD32 i4_zero_cols, |
192 | | WORD32 i4_zero_rows) |
193 | 9.56M | { |
194 | 9.56M | WORD32 j, k; |
195 | 9.56M | WORD32 ai4_e[4], ai4_o[4]; |
196 | 9.56M | WORD32 ai4_ee[2], ai4_eo[2]; |
197 | 9.56M | WORD32 i4_add; |
198 | 9.56M | WORD32 i4_shift; |
199 | 9.56M | WORD16 *pi2_tmp_orig; |
200 | 9.56M | WORD32 i4_trans_size; |
201 | 9.56M | WORD32 i4_zero_rows_2nd_stage = i4_zero_cols; |
202 | 9.56M | WORD32 i4_row_limit_2nd_stage; |
203 | | |
204 | 9.56M | i4_trans_size = TRANS_SIZE_8; |
205 | | |
206 | 9.56M | pi2_tmp_orig = pi2_tmp; |
207 | | |
208 | 9.56M | if((i4_zero_cols & 0xF0) == 0xF0) |
209 | 9.35M | i4_row_limit_2nd_stage = 4; |
210 | 218k | else |
211 | 218k | i4_row_limit_2nd_stage = TRANS_SIZE_8; |
212 | | |
213 | | |
214 | 9.56M | if((i4_zero_rows & 0xF0) == 0xF0) /* First 4 rows of input are non-zero */ |
215 | 9.09M | { |
216 | | /************************************************************************************************/ |
217 | | /**********************************START - IT_RECON_8x8******************************************/ |
218 | | /************************************************************************************************/ |
219 | | |
220 | | /* Inverse Transform 1st stage */ |
221 | 9.09M | i4_shift = IDCT_STG1_SHIFT; |
222 | 9.09M | i4_add = 1 << (i4_shift - 1); |
223 | | |
224 | 45.5M | for(j = 0; j < i4_row_limit_2nd_stage; j++) |
225 | 36.4M | { |
226 | | /* Checking for Zero Cols */ |
227 | 36.4M | if((i4_zero_cols & 1) == 1) |
228 | 24.0M | { |
229 | 24.0M | memset(pi2_tmp, 0, i4_trans_size * sizeof(WORD16)); |
230 | 24.0M | } |
231 | 12.4M | else |
232 | 12.4M | { |
233 | | /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */ |
234 | 62.4M | for(k = 0; k < 4; k++) |
235 | 50.0M | { |
236 | 50.0M | ai4_o[k] = gai2_impeg2_idct_q15[1 * 8 + k] * pi2_src[i4_src_strd] |
237 | 50.0M | + gai2_impeg2_idct_q15[3 * 8 + k] |
238 | 50.0M | * pi2_src[3 * i4_src_strd]; |
239 | 50.0M | } |
240 | 12.4M | ai4_eo[0] = gai2_impeg2_idct_q15[2 * 8 + 0] * pi2_src[2 * i4_src_strd]; |
241 | 12.4M | ai4_eo[1] = gai2_impeg2_idct_q15[2 * 8 + 1] * pi2_src[2 * i4_src_strd]; |
242 | 12.4M | ai4_ee[0] = gai2_impeg2_idct_q15[0 * 8 + 0] * pi2_src[0]; |
243 | 12.4M | ai4_ee[1] = gai2_impeg2_idct_q15[0 * 8 + 1] * pi2_src[0]; |
244 | | |
245 | | /* Combining e and o terms at each hierarchy levels to calculate the final spatial domain vector */ |
246 | 12.4M | ai4_e[0] = ai4_ee[0] + ai4_eo[0]; |
247 | 12.4M | ai4_e[3] = ai4_ee[0] - ai4_eo[0]; |
248 | 12.4M | ai4_e[1] = ai4_ee[1] + ai4_eo[1]; |
249 | 12.4M | ai4_e[2] = ai4_ee[1] - ai4_eo[1]; |
250 | 62.5M | for(k = 0; k < 4; k++) |
251 | 50.0M | { |
252 | 50.0M | pi2_tmp[k] = |
253 | 50.0M | CLIP_S16(((ai4_e[k] + ai4_o[k] + i4_add) >> i4_shift)); |
254 | 50.0M | pi2_tmp[k + 4] = |
255 | 50.0M | CLIP_S16(((ai4_e[3 - k] - ai4_o[3 - k] + i4_add) >> i4_shift)); |
256 | 50.0M | } |
257 | 12.4M | } |
258 | 36.4M | pi2_src++; |
259 | 36.4M | pi2_tmp += i4_trans_size; |
260 | 36.4M | i4_zero_cols = i4_zero_cols >> 1; |
261 | 36.4M | } |
262 | | |
263 | 9.09M | pi2_tmp = pi2_tmp_orig; |
264 | | |
265 | | /* Inverse Transform 2nd stage */ |
266 | 9.09M | i4_shift = IDCT_STG2_SHIFT; |
267 | 9.09M | i4_add = 1 << (i4_shift - 1); |
268 | 9.09M | if((i4_zero_rows_2nd_stage & 0xF0) == 0xF0) /* First 4 rows of output of 1st stage are non-zero */ |
269 | 9.06M | { |
270 | 79.6M | for(j = 0; j < i4_trans_size; j++) |
271 | 70.6M | { |
272 | | /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */ |
273 | 350M | for(k = 0; k < 4; k++) |
274 | 279M | { |
275 | 279M | ai4_o[k] = gai2_impeg2_idct_q11[1 * 8 + k] * pi2_tmp[i4_trans_size] |
276 | 279M | + gai2_impeg2_idct_q11[3 * 8 + k] * pi2_tmp[3 * i4_trans_size]; |
277 | 279M | } |
278 | 70.6M | ai4_eo[0] = gai2_impeg2_idct_q11[2 * 8 + 0] * pi2_tmp[2 * i4_trans_size]; |
279 | 70.6M | ai4_eo[1] = gai2_impeg2_idct_q11[2 * 8 + 1] * pi2_tmp[2 * i4_trans_size]; |
280 | 70.6M | ai4_ee[0] = gai2_impeg2_idct_q11[0 * 8 + 0] * pi2_tmp[0]; |
281 | 70.6M | ai4_ee[1] = gai2_impeg2_idct_q11[0 * 8 + 1] * pi2_tmp[0]; |
282 | | |
283 | | /* Combining e and o terms at each hierarchy levels to calculate the final spatial domain vector */ |
284 | 70.6M | ai4_e[0] = ai4_ee[0] + ai4_eo[0]; |
285 | 70.6M | ai4_e[3] = ai4_ee[0] - ai4_eo[0]; |
286 | 70.6M | ai4_e[1] = ai4_ee[1] + ai4_eo[1]; |
287 | 70.6M | ai4_e[2] = ai4_ee[1] - ai4_eo[1]; |
288 | 345M | for(k = 0; k < 4; k++) |
289 | 274M | { |
290 | 274M | WORD32 itrans_out; |
291 | 274M | itrans_out = |
292 | 274M | CLIP_S16(((ai4_e[k] + ai4_o[k] + i4_add) >> i4_shift)); |
293 | 274M | pu1_dst[k] = CLIP_U8((itrans_out + pu1_pred[k])); |
294 | 274M | itrans_out = |
295 | 274M | CLIP_S16(((ai4_e[3 - k] - ai4_o[3 - k] + i4_add) >> i4_shift)); |
296 | 274M | pu1_dst[k + 4] = CLIP_U8((itrans_out + pu1_pred[k + 4])); |
297 | 274M | } |
298 | 70.6M | pi2_tmp++; |
299 | 70.6M | pu1_pred += i4_pred_strd; |
300 | 70.6M | pu1_dst += i4_dst_strd; |
301 | 70.6M | } |
302 | 9.06M | } |
303 | 22.7k | else /* All rows of output of 1st stage are non-zero */ |
304 | 22.7k | { |
305 | 301k | for(j = 0; j < i4_trans_size; j++) |
306 | 278k | { |
307 | | /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */ |
308 | 1.39M | for(k = 0; k < 4; k++) |
309 | 1.11M | { |
310 | 1.11M | ai4_o[k] = gai2_impeg2_idct_q11[1 * 8 + k] * pi2_tmp[i4_trans_size] |
311 | 1.11M | + gai2_impeg2_idct_q11[3 * 8 + k] |
312 | 1.11M | * pi2_tmp[3 * i4_trans_size] |
313 | 1.11M | + gai2_impeg2_idct_q11[5 * 8 + k] |
314 | 1.11M | * pi2_tmp[5 * i4_trans_size] |
315 | 1.11M | + gai2_impeg2_idct_q11[7 * 8 + k] |
316 | 1.11M | * pi2_tmp[7 * i4_trans_size]; |
317 | 1.11M | } |
318 | | |
319 | 278k | ai4_eo[0] = gai2_impeg2_idct_q11[2 * 8 + 0] * pi2_tmp[2 * i4_trans_size] |
320 | 278k | + gai2_impeg2_idct_q11[6 * 8 + 0] * pi2_tmp[6 * i4_trans_size]; |
321 | 278k | ai4_eo[1] = gai2_impeg2_idct_q11[2 * 8 + 1] * pi2_tmp[2 * i4_trans_size] |
322 | 278k | + gai2_impeg2_idct_q11[6 * 8 + 1] * pi2_tmp[6 * i4_trans_size]; |
323 | 278k | ai4_ee[0] = gai2_impeg2_idct_q11[0 * 8 + 0] * pi2_tmp[0] |
324 | 278k | + gai2_impeg2_idct_q11[4 * 8 + 0] * pi2_tmp[4 * i4_trans_size]; |
325 | 278k | ai4_ee[1] = gai2_impeg2_idct_q11[0 * 8 + 1] * pi2_tmp[0] |
326 | 278k | + gai2_impeg2_idct_q11[4 * 8 + 1] * pi2_tmp[4 * i4_trans_size]; |
327 | | |
328 | | /* Combining e and o terms at each hierarchy levels to calculate the final spatial domain vector */ |
329 | 278k | ai4_e[0] = ai4_ee[0] + ai4_eo[0]; |
330 | 278k | ai4_e[3] = ai4_ee[0] - ai4_eo[0]; |
331 | 278k | ai4_e[1] = ai4_ee[1] + ai4_eo[1]; |
332 | 278k | ai4_e[2] = ai4_ee[1] - ai4_eo[1]; |
333 | 1.39M | for(k = 0; k < 4; k++) |
334 | 1.11M | { |
335 | 1.11M | WORD32 itrans_out; |
336 | 1.11M | itrans_out = |
337 | 1.11M | CLIP_S16(((ai4_e[k] + ai4_o[k] + i4_add) >> i4_shift)); |
338 | 1.11M | pu1_dst[k] = CLIP_U8((itrans_out + pu1_pred[k])); |
339 | 1.11M | itrans_out = |
340 | 1.11M | CLIP_S16(((ai4_e[3 - k] - ai4_o[3 - k] + i4_add) >> i4_shift)); |
341 | 1.11M | pu1_dst[k + 4] = CLIP_U8((itrans_out + pu1_pred[k + 4])); |
342 | 1.11M | } |
343 | 278k | pi2_tmp++; |
344 | 278k | pu1_pred += i4_pred_strd; |
345 | 278k | pu1_dst += i4_dst_strd; |
346 | 278k | } |
347 | 22.7k | } |
348 | | /************************************************************************************************/ |
349 | | /************************************END - IT_RECON_8x8******************************************/ |
350 | | /************************************************************************************************/ |
351 | 9.09M | } |
352 | 479k | else /* All rows of input are non-zero */ |
353 | 479k | { |
354 | | /************************************************************************************************/ |
355 | | /**********************************START - IT_RECON_8x8******************************************/ |
356 | | /************************************************************************************************/ |
357 | | |
358 | | /* Inverse Transform 1st stage */ |
359 | 479k | i4_shift = IDCT_STG1_SHIFT; |
360 | 479k | i4_add = 1 << (i4_shift - 1); |
361 | | |
362 | 3.13M | for(j = 0; j < i4_row_limit_2nd_stage; j++) |
363 | 2.65M | { |
364 | | /* Checking for Zero Cols */ |
365 | 2.65M | if((i4_zero_cols & 1) == 1) |
366 | 969k | { |
367 | 969k | memset(pi2_tmp, 0, i4_trans_size * sizeof(WORD16)); |
368 | 969k | } |
369 | 1.68M | else |
370 | 1.68M | { |
371 | | /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */ |
372 | 8.42M | for(k = 0; k < 4; k++) |
373 | 6.73M | { |
374 | 6.73M | ai4_o[k] = gai2_impeg2_idct_q15[1 * 8 + k] * pi2_src[i4_src_strd] |
375 | 6.73M | + gai2_impeg2_idct_q15[3 * 8 + k] |
376 | 6.73M | * pi2_src[3 * i4_src_strd] |
377 | 6.73M | + gai2_impeg2_idct_q15[5 * 8 + k] |
378 | 6.73M | * pi2_src[5 * i4_src_strd] |
379 | 6.73M | + gai2_impeg2_idct_q15[7 * 8 + k] |
380 | 6.73M | * pi2_src[7 * i4_src_strd]; |
381 | 6.73M | } |
382 | | |
383 | 1.68M | ai4_eo[0] = gai2_impeg2_idct_q15[2 * 8 + 0] * pi2_src[2 * i4_src_strd] |
384 | 1.68M | + gai2_impeg2_idct_q15[6 * 8 + 0] * pi2_src[6 * i4_src_strd]; |
385 | 1.68M | ai4_eo[1] = gai2_impeg2_idct_q15[2 * 8 + 1] * pi2_src[2 * i4_src_strd] |
386 | 1.68M | + gai2_impeg2_idct_q15[6 * 8 + 1] * pi2_src[6 * i4_src_strd]; |
387 | 1.68M | ai4_ee[0] = gai2_impeg2_idct_q15[0 * 8 + 0] * pi2_src[0] |
388 | 1.68M | + gai2_impeg2_idct_q15[4 * 8 + 0] * pi2_src[4 * i4_src_strd]; |
389 | 1.68M | ai4_ee[1] = gai2_impeg2_idct_q15[0 * 8 + 1] * pi2_src[0] |
390 | 1.68M | + gai2_impeg2_idct_q15[4 * 8 + 1] * pi2_src[4 * i4_src_strd]; |
391 | | |
392 | | /* Combining e and o terms at each hierarchy levels to calculate the final spatial domain vector */ |
393 | 1.68M | ai4_e[0] = ai4_ee[0] + ai4_eo[0]; |
394 | 1.68M | ai4_e[3] = ai4_ee[0] - ai4_eo[0]; |
395 | 1.68M | ai4_e[1] = ai4_ee[1] + ai4_eo[1]; |
396 | 1.68M | ai4_e[2] = ai4_ee[1] - ai4_eo[1]; |
397 | 8.42M | for(k = 0; k < 4; k++) |
398 | 6.73M | { |
399 | 6.73M | pi2_tmp[k] = |
400 | 6.73M | CLIP_S16(((ai4_e[k] + ai4_o[k] + i4_add) >> i4_shift)); |
401 | 6.73M | pi2_tmp[k + 4] = |
402 | 6.73M | CLIP_S16(((ai4_e[3 - k] - ai4_o[3 - k] + i4_add) >> i4_shift)); |
403 | 6.73M | } |
404 | 1.68M | } |
405 | 2.65M | pi2_src++; |
406 | 2.65M | pi2_tmp += i4_trans_size; |
407 | 2.65M | i4_zero_cols = i4_zero_cols >> 1; |
408 | 2.65M | } |
409 | | |
410 | 479k | pi2_tmp = pi2_tmp_orig; |
411 | | |
412 | | /* Inverse Transform 2nd stage */ |
413 | 479k | i4_shift = IDCT_STG2_SHIFT; |
414 | 479k | i4_add = 1 << (i4_shift - 1); |
415 | 479k | if((i4_zero_rows_2nd_stage & 0xF0) == 0xF0) /* First 4 rows of output of 1st stage are non-zero */ |
416 | 297k | { |
417 | 2.67M | for(j = 0; j < i4_trans_size; j++) |
418 | 2.37M | { |
419 | | /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */ |
420 | 11.8M | for(k = 0; k < 4; k++) |
421 | 9.50M | { |
422 | 9.50M | ai4_o[k] = gai2_impeg2_idct_q11[1 * 8 + k] * pi2_tmp[i4_trans_size] |
423 | 9.50M | + gai2_impeg2_idct_q11[3 * 8 + k] * pi2_tmp[3 * i4_trans_size]; |
424 | 9.50M | } |
425 | 2.37M | ai4_eo[0] = gai2_impeg2_idct_q11[2 * 8 + 0] * pi2_tmp[2 * i4_trans_size]; |
426 | 2.37M | ai4_eo[1] = gai2_impeg2_idct_q11[2 * 8 + 1] * pi2_tmp[2 * i4_trans_size]; |
427 | 2.37M | ai4_ee[0] = gai2_impeg2_idct_q11[0 * 8 + 0] * pi2_tmp[0]; |
428 | 2.37M | ai4_ee[1] = gai2_impeg2_idct_q11[0 * 8 + 1] * pi2_tmp[0]; |
429 | | |
430 | | /* Combining e and o terms at each hierarchy levels to calculate the final spatial domain vector */ |
431 | 2.37M | ai4_e[0] = ai4_ee[0] + ai4_eo[0]; |
432 | 2.37M | ai4_e[3] = ai4_ee[0] - ai4_eo[0]; |
433 | 2.37M | ai4_e[1] = ai4_ee[1] + ai4_eo[1]; |
434 | 2.37M | ai4_e[2] = ai4_ee[1] - ai4_eo[1]; |
435 | 11.8M | for(k = 0; k < 4; k++) |
436 | 9.50M | { |
437 | 9.50M | WORD32 itrans_out; |
438 | 9.50M | itrans_out = |
439 | 9.50M | CLIP_S16(((ai4_e[k] + ai4_o[k] + i4_add) >> i4_shift)); |
440 | 9.50M | pu1_dst[k] = CLIP_U8((itrans_out + pu1_pred[k])); |
441 | 9.50M | itrans_out = |
442 | 9.50M | CLIP_S16(((ai4_e[3 - k] - ai4_o[3 - k] + i4_add) >> i4_shift)); |
443 | 9.50M | pu1_dst[k + 4] = CLIP_U8((itrans_out + pu1_pred[k + 4])); |
444 | 9.50M | } |
445 | 2.37M | pi2_tmp++; |
446 | 2.37M | pu1_pred += i4_pred_strd; |
447 | 2.37M | pu1_dst += i4_dst_strd; |
448 | 2.37M | } |
449 | 297k | } |
450 | 181k | else /* All rows of output of 1st stage are non-zero */ |
451 | 181k | { |
452 | 1.64M | for(j = 0; j < i4_trans_size; j++) |
453 | 1.45M | { |
454 | | /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */ |
455 | 7.29M | for(k = 0; k < 4; k++) |
456 | 5.83M | { |
457 | 5.83M | ai4_o[k] = gai2_impeg2_idct_q11[1 * 8 + k] * pi2_tmp[i4_trans_size] |
458 | 5.83M | + gai2_impeg2_idct_q11[3 * 8 + k] |
459 | 5.83M | * pi2_tmp[3 * i4_trans_size] |
460 | 5.83M | + gai2_impeg2_idct_q11[5 * 8 + k] |
461 | 5.83M | * pi2_tmp[5 * i4_trans_size] |
462 | 5.83M | + gai2_impeg2_idct_q11[7 * 8 + k] |
463 | 5.83M | * pi2_tmp[7 * i4_trans_size]; |
464 | 5.83M | } |
465 | | |
466 | 1.45M | ai4_eo[0] = gai2_impeg2_idct_q11[2 * 8 + 0] * pi2_tmp[2 * i4_trans_size] |
467 | 1.45M | + gai2_impeg2_idct_q11[6 * 8 + 0] * pi2_tmp[6 * i4_trans_size]; |
468 | 1.45M | ai4_eo[1] = gai2_impeg2_idct_q11[2 * 8 + 1] * pi2_tmp[2 * i4_trans_size] |
469 | 1.45M | + gai2_impeg2_idct_q11[6 * 8 + 1] * pi2_tmp[6 * i4_trans_size]; |
470 | 1.45M | ai4_ee[0] = gai2_impeg2_idct_q11[0 * 8 + 0] * pi2_tmp[0] |
471 | 1.45M | + gai2_impeg2_idct_q11[4 * 8 + 0] * pi2_tmp[4 * i4_trans_size]; |
472 | 1.45M | ai4_ee[1] = gai2_impeg2_idct_q11[0 * 8 + 1] * pi2_tmp[0] |
473 | 1.45M | + gai2_impeg2_idct_q11[4 * 8 + 1] * pi2_tmp[4 * i4_trans_size]; |
474 | | |
475 | | /* Combining e and o terms at each hierarchy levels to calculate the final spatial domain vector */ |
476 | 1.45M | ai4_e[0] = ai4_ee[0] + ai4_eo[0]; |
477 | 1.45M | ai4_e[3] = ai4_ee[0] - ai4_eo[0]; |
478 | 1.45M | ai4_e[1] = ai4_ee[1] + ai4_eo[1]; |
479 | 1.45M | ai4_e[2] = ai4_ee[1] - ai4_eo[1]; |
480 | 7.28M | for(k = 0; k < 4; k++) |
481 | 5.82M | { |
482 | 5.82M | WORD32 itrans_out; |
483 | 5.82M | itrans_out = |
484 | 5.82M | CLIP_S16(((ai4_e[k] + ai4_o[k] + i4_add) >> i4_shift)); |
485 | 5.82M | pu1_dst[k] = CLIP_U8((itrans_out + pu1_pred[k])); |
486 | 5.82M | itrans_out = |
487 | 5.82M | CLIP_S16(((ai4_e[3 - k] - ai4_o[3 - k] + i4_add) >> i4_shift)); |
488 | 5.82M | pu1_dst[k + 4] = CLIP_U8((itrans_out + pu1_pred[k + 4])); |
489 | 5.82M | } |
490 | 1.45M | pi2_tmp++; |
491 | 1.45M | pu1_pred += i4_pred_strd; |
492 | 1.45M | pu1_dst += i4_dst_strd; |
493 | 1.45M | } |
494 | 181k | } |
495 | | /************************************************************************************************/ |
496 | | /************************************END - IT_RECON_8x8******************************************/ |
497 | | /************************************************************************************************/ |
498 | 479k | } |
499 | 9.56M | } |
500 | | |