/src/libmpeg2/common/impeg2_idct.c
Line | Count | Source |
1 | | /****************************************************************************** |
2 | | * |
3 | | * Copyright (C) 2015 The Android Open Source Project |
4 | | * |
5 | | * Licensed under the Apache License, Version 2.0 (the "License"); |
6 | | * you may not use this file except in compliance with the License. |
7 | | * You may obtain a copy of the License at: |
8 | | * |
9 | | * http://www.apache.org/licenses/LICENSE-2.0 |
10 | | * |
11 | | * Unless required by applicable law or agreed to in writing, software |
12 | | * distributed under the License is distributed on an "AS IS" BASIS, |
13 | | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
14 | | * See the License for the specific language governing permissions and |
15 | | * limitations under the License. |
16 | | * |
17 | | ***************************************************************************** |
18 | | * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore |
19 | | */ |
20 | | /*****************************************************************************/ |
21 | | /* */ |
22 | | /* File Name : impeg2_idct.c */ |
23 | | /* */ |
24 | | /* Description : Contains 2d idct and invese quantization functions */ |
25 | | /* */ |
26 | | /* List of Functions : impeg2_idct_recon_dc() */ |
27 | | /* impeg2_idct_recon_dc_mismatch() */ |
28 | | /* impeg2_idct_recon() */ |
29 | | /* */ |
30 | | /* Issues / Problems : None */ |
31 | | /* */ |
32 | | /* Revision History : */ |
33 | | /* */ |
34 | | /* DD MM YYYY Author(s) Changes */ |
35 | | /* 10 09 2005 Hairsh M First Version */ |
36 | | /* */ |
37 | | /*****************************************************************************/ |
38 | | /* |
39 | | IEEE - 1180 results for this IDCT |
40 | | L 256 256 5 5 300 300 384 384 Thresholds |
41 | | H 255 255 5 5 300 300 383 383 |
42 | | sign 1 -1 1 -1 1 -1 1 -1 |
43 | | Peak Error 1 1 1 1 1 1 1 1 1 |
44 | | Peak Mean Square Error 0.0191 0.0188 0.0108 0.0111 0.0176 0.0188 0.0165 0.0177 0.06 |
45 | | Overall Mean Square Error 0.01566406 0.01597656 0.0091875 0.00908906 0.01499063 0.01533281 0.01432344 0.01412344 0.02 |
46 | | Peak Mean Error 0.0027 0.0026 0.0028 0.002 0.0017 0.0033 0.0031 0.0025 0.015 |
47 | | Overall Mean Error 0.00002656 -0.00031406 0.00016875 0.00005469 -0.00003125 0.00011406 0.00009219 0.00004219 0.0015 |
48 | | */ |
49 | | #include <stdio.h> |
50 | | #include <string.h> |
51 | | |
52 | | #include "iv_datatypedef.h" |
53 | | #include "iv.h" |
54 | | #include "impeg2_defs.h" |
55 | | #include "impeg2_platform_macros.h" |
56 | | |
57 | | #include "impeg2_macros.h" |
58 | | #include "impeg2_globals.h" |
59 | | #include "impeg2_idct.h" |
60 | | |
61 | | |
62 | | void impeg2_idct_recon_dc(WORD16 *pi2_src, |
63 | | WORD16 *pi2_tmp, |
64 | | UWORD8 *pu1_pred, |
65 | | UWORD8 *pu1_dst, |
66 | | WORD32 i4_src_strd, |
67 | | WORD32 i4_pred_strd, |
68 | | WORD32 i4_dst_strd, |
69 | | WORD32 i4_zero_cols, |
70 | | WORD32 i4_zero_rows) |
71 | 128k | { |
72 | 128k | WORD32 i4_val, i, j; |
73 | | |
74 | 128k | UNUSED(pi2_tmp); |
75 | 128k | UNUSED(i4_src_strd); |
76 | 128k | UNUSED(i4_zero_cols); |
77 | 128k | UNUSED(i4_zero_rows); |
78 | | |
79 | 128k | i4_val = pi2_src[0] * gai2_impeg2_idct_q15[0]; |
80 | 128k | i4_val = ((i4_val + IDCT_STG1_ROUND) >> IDCT_STG1_SHIFT); |
81 | 128k | i4_val = i4_val * gai2_impeg2_idct_q11[0]; |
82 | 128k | i4_val = ((i4_val + IDCT_STG2_ROUND) >> IDCT_STG2_SHIFT); |
83 | | |
84 | 1.14M | for(i = 0; i < TRANS_SIZE_8; i++) |
85 | 1.02M | { |
86 | 9.17M | for(j = 0; j < TRANS_SIZE_8; j++) |
87 | 8.15M | { |
88 | 8.15M | pu1_dst[j] = CLIP_U8(i4_val + pu1_pred[j]); |
89 | 8.15M | } |
90 | 1.02M | pu1_dst += i4_dst_strd; |
91 | 1.02M | pu1_pred += i4_pred_strd; |
92 | 1.02M | } |
93 | 128k | } |
94 | | void impeg2_idct_recon_dc_mismatch(WORD16 *pi2_src, |
95 | | WORD16 *pi2_tmp, |
96 | | UWORD8 *pu1_pred, |
97 | | UWORD8 *pu1_dst, |
98 | | WORD32 i4_src_strd, |
99 | | WORD32 i4_pred_strd, |
100 | | WORD32 i4_dst_strd, |
101 | | WORD32 i4_zero_cols, |
102 | | WORD32 i4_zero_rows) |
103 | | |
104 | 40.6k | { |
105 | 40.6k | WORD32 i4_val, i, j; |
106 | 40.6k | WORD32 i4_count = 0; |
107 | 40.6k | WORD32 i4_sum; |
108 | | |
109 | 40.6k | UNUSED(pi2_tmp); |
110 | 40.6k | UNUSED(i4_src_strd); |
111 | 40.6k | UNUSED(i4_zero_cols); |
112 | 40.6k | UNUSED(i4_zero_rows); |
113 | | |
114 | 40.6k | i4_val = pi2_src[0] * gai2_impeg2_idct_q15[0]; |
115 | 40.6k | i4_val = ((i4_val + IDCT_STG1_ROUND) >> IDCT_STG1_SHIFT); |
116 | | |
117 | 40.6k | i4_val *= gai2_impeg2_idct_q11[0]; |
118 | 364k | for(i = 0; i < TRANS_SIZE_8; i++) |
119 | 323k | { |
120 | 2.90M | for (j = 0; j < TRANS_SIZE_8; j++) |
121 | 2.58M | { |
122 | 2.58M | i4_sum = i4_val; |
123 | 2.58M | i4_sum += gai2_impeg2_mismatch_stg2_additive[i4_count]; |
124 | 2.58M | i4_sum = ((i4_sum + IDCT_STG2_ROUND) >> IDCT_STG2_SHIFT); |
125 | 2.58M | i4_sum += pu1_pred[j]; |
126 | 2.58M | pu1_dst[j] = CLIP_U8(i4_sum); |
127 | 2.58M | i4_count++; |
128 | 2.58M | } |
129 | | |
130 | 323k | pu1_dst += i4_dst_strd; |
131 | 323k | pu1_pred += i4_pred_strd; |
132 | 323k | } |
133 | | |
134 | 40.6k | } |
135 | | /** |
136 | | ******************************************************************************* |
137 | | * |
138 | | * @brief |
139 | | * This function performs Inverse transform and reconstruction for 8x8 |
140 | | * input block |
141 | | * |
142 | | * @par Description: |
143 | | * Performs inverse transform and adds the prediction data and clips output |
144 | | * to 8 bit |
145 | | * |
146 | | * @param[in] pi2_src |
147 | | * Input 8x8 coefficients |
148 | | * |
149 | | * @param[in] pi2_tmp |
150 | | * Temporary 8x8 buffer for storing inverse |
151 | | * |
152 | | * transform |
153 | | * 1st stage output |
154 | | * |
155 | | * @param[in] pu1_pred |
156 | | * Prediction 8x8 block |
157 | | * |
158 | | * @param[out] pu1_dst |
159 | | * Output 8x8 block |
160 | | * |
161 | | * @param[in] src_strd |
162 | | * Input stride |
163 | | * |
164 | | * @param[in] pred_strd |
165 | | * Prediction stride |
166 | | * |
167 | | * @param[in] dst_strd |
168 | | * Output Stride |
169 | | * |
170 | | * @param[in] shift |
171 | | * Output shift |
172 | | * |
173 | | * @param[in] zero_cols |
174 | | * Zero columns in pi2_src |
175 | | * |
176 | | * @returns Void |
177 | | * |
178 | | * @remarks |
179 | | * None |
180 | | * |
181 | | ******************************************************************************* |
182 | | */ |
183 | | |
184 | | void impeg2_idct_recon(WORD16 *pi2_src, |
185 | | WORD16 *pi2_tmp, |
186 | | UWORD8 *pu1_pred, |
187 | | UWORD8 *pu1_dst, |
188 | | WORD32 i4_src_strd, |
189 | | WORD32 i4_pred_strd, |
190 | | WORD32 i4_dst_strd, |
191 | | WORD32 i4_zero_cols, |
192 | | WORD32 i4_zero_rows) |
193 | 315k | { |
194 | 315k | WORD32 j, k; |
195 | 315k | WORD32 ai4_e[4], ai4_o[4]; |
196 | 315k | WORD32 ai4_ee[2], ai4_eo[2]; |
197 | 315k | WORD32 i4_add; |
198 | 315k | WORD32 i4_shift; |
199 | 315k | WORD16 *pi2_tmp_orig; |
200 | 315k | WORD32 i4_trans_size; |
201 | 315k | WORD32 i4_zero_rows_2nd_stage = i4_zero_cols; |
202 | 315k | WORD32 i4_row_limit_2nd_stage; |
203 | | |
204 | 315k | i4_trans_size = TRANS_SIZE_8; |
205 | | |
206 | 315k | pi2_tmp_orig = pi2_tmp; |
207 | | |
208 | 315k | if((i4_zero_cols & 0xF0) == 0xF0) |
209 | 174k | i4_row_limit_2nd_stage = 4; |
210 | 140k | else |
211 | 140k | i4_row_limit_2nd_stage = TRANS_SIZE_8; |
212 | | |
213 | | |
214 | 315k | if((i4_zero_rows & 0xF0) == 0xF0) /* First 4 rows of input are non-zero */ |
215 | 167k | { |
216 | | /************************************************************************************************/ |
217 | | /**********************************START - IT_RECON_8x8******************************************/ |
218 | | /************************************************************************************************/ |
219 | | |
220 | | /* Inverse Transform 1st stage */ |
221 | 167k | i4_shift = IDCT_STG1_SHIFT; |
222 | 167k | i4_add = 1 << (i4_shift - 1); |
223 | | |
224 | 868k | for(j = 0; j < i4_row_limit_2nd_stage; j++) |
225 | 701k | { |
226 | | /* Checking for Zero Cols */ |
227 | 701k | if((i4_zero_cols & 1) == 1) |
228 | 362k | { |
229 | 362k | memset(pi2_tmp, 0, i4_trans_size * sizeof(WORD16)); |
230 | 362k | } |
231 | 339k | else |
232 | 339k | { |
233 | | /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */ |
234 | 1.69M | for(k = 0; k < 4; k++) |
235 | 1.35M | { |
236 | 1.35M | ai4_o[k] = gai2_impeg2_idct_q15[1 * 8 + k] * pi2_src[i4_src_strd] |
237 | 1.35M | + gai2_impeg2_idct_q15[3 * 8 + k] |
238 | 1.35M | * pi2_src[3 * i4_src_strd]; |
239 | 1.35M | } |
240 | 339k | ai4_eo[0] = gai2_impeg2_idct_q15[2 * 8 + 0] * pi2_src[2 * i4_src_strd]; |
241 | 339k | ai4_eo[1] = gai2_impeg2_idct_q15[2 * 8 + 1] * pi2_src[2 * i4_src_strd]; |
242 | 339k | ai4_ee[0] = gai2_impeg2_idct_q15[0 * 8 + 0] * pi2_src[0]; |
243 | 339k | ai4_ee[1] = gai2_impeg2_idct_q15[0 * 8 + 1] * pi2_src[0]; |
244 | | |
245 | | /* Combining e and o terms at each hierarchy levels to calculate the final spatial domain vector */ |
246 | 339k | ai4_e[0] = ai4_ee[0] + ai4_eo[0]; |
247 | 339k | ai4_e[3] = ai4_ee[0] - ai4_eo[0]; |
248 | 339k | ai4_e[1] = ai4_ee[1] + ai4_eo[1]; |
249 | 339k | ai4_e[2] = ai4_ee[1] - ai4_eo[1]; |
250 | 1.69M | for(k = 0; k < 4; k++) |
251 | 1.35M | { |
252 | 1.35M | pi2_tmp[k] = |
253 | 1.35M | CLIP_S16(((ai4_e[k] + ai4_o[k] + i4_add) >> i4_shift)); |
254 | 1.35M | pi2_tmp[k + 4] = |
255 | 1.35M | CLIP_S16(((ai4_e[3 - k] - ai4_o[3 - k] + i4_add) >> i4_shift)); |
256 | 1.35M | } |
257 | 339k | } |
258 | 701k | pi2_src++; |
259 | 701k | pi2_tmp += i4_trans_size; |
260 | 701k | i4_zero_cols = i4_zero_cols >> 1; |
261 | 701k | } |
262 | | |
263 | 167k | pi2_tmp = pi2_tmp_orig; |
264 | | |
265 | | /* Inverse Transform 2nd stage */ |
266 | 167k | i4_shift = IDCT_STG2_SHIFT; |
267 | 167k | i4_add = 1 << (i4_shift - 1); |
268 | 167k | if((i4_zero_rows_2nd_stage & 0xF0) == 0xF0) /* First 4 rows of output of 1st stage are non-zero */ |
269 | 159k | { |
270 | 1.41M | for(j = 0; j < i4_trans_size; j++) |
271 | 1.25M | { |
272 | | /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */ |
273 | 6.24M | for(k = 0; k < 4; k++) |
274 | 4.99M | { |
275 | 4.99M | ai4_o[k] = gai2_impeg2_idct_q11[1 * 8 + k] * pi2_tmp[i4_trans_size] |
276 | 4.99M | + gai2_impeg2_idct_q11[3 * 8 + k] * pi2_tmp[3 * i4_trans_size]; |
277 | 4.99M | } |
278 | 1.25M | ai4_eo[0] = gai2_impeg2_idct_q11[2 * 8 + 0] * pi2_tmp[2 * i4_trans_size]; |
279 | 1.25M | ai4_eo[1] = gai2_impeg2_idct_q11[2 * 8 + 1] * pi2_tmp[2 * i4_trans_size]; |
280 | 1.25M | ai4_ee[0] = gai2_impeg2_idct_q11[0 * 8 + 0] * pi2_tmp[0]; |
281 | 1.25M | ai4_ee[1] = gai2_impeg2_idct_q11[0 * 8 + 1] * pi2_tmp[0]; |
282 | | |
283 | | /* Combining e and o terms at each hierarchy levels to calculate the final spatial domain vector */ |
284 | 1.25M | ai4_e[0] = ai4_ee[0] + ai4_eo[0]; |
285 | 1.25M | ai4_e[3] = ai4_ee[0] - ai4_eo[0]; |
286 | 1.25M | ai4_e[1] = ai4_ee[1] + ai4_eo[1]; |
287 | 1.25M | ai4_e[2] = ai4_ee[1] - ai4_eo[1]; |
288 | 6.21M | for(k = 0; k < 4; k++) |
289 | 4.96M | { |
290 | 4.96M | WORD32 itrans_out; |
291 | 4.96M | itrans_out = |
292 | 4.96M | CLIP_S16(((ai4_e[k] + ai4_o[k] + i4_add) >> i4_shift)); |
293 | 4.96M | pu1_dst[k] = CLIP_U8((itrans_out + pu1_pred[k])); |
294 | 4.96M | itrans_out = |
295 | 4.96M | CLIP_S16(((ai4_e[3 - k] - ai4_o[3 - k] + i4_add) >> i4_shift)); |
296 | 4.96M | pu1_dst[k + 4] = CLIP_U8((itrans_out + pu1_pred[k + 4])); |
297 | 4.96M | } |
298 | 1.25M | pi2_tmp++; |
299 | 1.25M | pu1_pred += i4_pred_strd; |
300 | 1.25M | pu1_dst += i4_dst_strd; |
301 | 1.25M | } |
302 | 159k | } |
303 | 8.30k | else /* All rows of output of 1st stage are non-zero */ |
304 | 8.30k | { |
305 | 74.5k | for(j = 0; j < i4_trans_size; j++) |
306 | 66.2k | { |
307 | | /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */ |
308 | 330k | for(k = 0; k < 4; k++) |
309 | 264k | { |
310 | 264k | ai4_o[k] = gai2_impeg2_idct_q11[1 * 8 + k] * pi2_tmp[i4_trans_size] |
311 | 264k | + gai2_impeg2_idct_q11[3 * 8 + k] |
312 | 264k | * pi2_tmp[3 * i4_trans_size] |
313 | 264k | + gai2_impeg2_idct_q11[5 * 8 + k] |
314 | 264k | * pi2_tmp[5 * i4_trans_size] |
315 | 264k | + gai2_impeg2_idct_q11[7 * 8 + k] |
316 | 264k | * pi2_tmp[7 * i4_trans_size]; |
317 | 264k | } |
318 | | |
319 | 66.2k | ai4_eo[0] = gai2_impeg2_idct_q11[2 * 8 + 0] * pi2_tmp[2 * i4_trans_size] |
320 | 66.2k | + gai2_impeg2_idct_q11[6 * 8 + 0] * pi2_tmp[6 * i4_trans_size]; |
321 | 66.2k | ai4_eo[1] = gai2_impeg2_idct_q11[2 * 8 + 1] * pi2_tmp[2 * i4_trans_size] |
322 | 66.2k | + gai2_impeg2_idct_q11[6 * 8 + 1] * pi2_tmp[6 * i4_trans_size]; |
323 | 66.2k | ai4_ee[0] = gai2_impeg2_idct_q11[0 * 8 + 0] * pi2_tmp[0] |
324 | 66.2k | + gai2_impeg2_idct_q11[4 * 8 + 0] * pi2_tmp[4 * i4_trans_size]; |
325 | 66.2k | ai4_ee[1] = gai2_impeg2_idct_q11[0 * 8 + 1] * pi2_tmp[0] |
326 | 66.2k | + gai2_impeg2_idct_q11[4 * 8 + 1] * pi2_tmp[4 * i4_trans_size]; |
327 | | |
328 | | /* Combining e and o terms at each hierarchy levels to calculate the final spatial domain vector */ |
329 | 66.2k | ai4_e[0] = ai4_ee[0] + ai4_eo[0]; |
330 | 66.2k | ai4_e[3] = ai4_ee[0] - ai4_eo[0]; |
331 | 66.2k | ai4_e[1] = ai4_ee[1] + ai4_eo[1]; |
332 | 66.2k | ai4_e[2] = ai4_ee[1] - ai4_eo[1]; |
333 | 330k | for(k = 0; k < 4; k++) |
334 | 264k | { |
335 | 264k | WORD32 itrans_out; |
336 | 264k | itrans_out = |
337 | 264k | CLIP_S16(((ai4_e[k] + ai4_o[k] + i4_add) >> i4_shift)); |
338 | 264k | pu1_dst[k] = CLIP_U8((itrans_out + pu1_pred[k])); |
339 | 264k | itrans_out = |
340 | 264k | CLIP_S16(((ai4_e[3 - k] - ai4_o[3 - k] + i4_add) >> i4_shift)); |
341 | 264k | pu1_dst[k + 4] = CLIP_U8((itrans_out + pu1_pred[k + 4])); |
342 | 264k | } |
343 | 66.2k | pi2_tmp++; |
344 | 66.2k | pu1_pred += i4_pred_strd; |
345 | 66.2k | pu1_dst += i4_dst_strd; |
346 | 66.2k | } |
347 | 8.30k | } |
348 | | /************************************************************************************************/ |
349 | | /************************************END - IT_RECON_8x8******************************************/ |
350 | | /************************************************************************************************/ |
351 | 167k | } |
352 | 148k | else /* All rows of input are non-zero */ |
353 | 148k | { |
354 | | /************************************************************************************************/ |
355 | | /**********************************START - IT_RECON_8x8******************************************/ |
356 | | /************************************************************************************************/ |
357 | | |
358 | | /* Inverse Transform 1st stage */ |
359 | 148k | i4_shift = IDCT_STG1_SHIFT; |
360 | 148k | i4_add = 1 << (i4_shift - 1); |
361 | | |
362 | 1.26M | for(j = 0; j < i4_row_limit_2nd_stage; j++) |
363 | 1.11M | { |
364 | | /* Checking for Zero Cols */ |
365 | 1.11M | if((i4_zero_cols & 1) == 1) |
366 | 670k | { |
367 | 670k | memset(pi2_tmp, 0, i4_trans_size * sizeof(WORD16)); |
368 | 670k | } |
369 | 449k | else |
370 | 449k | { |
371 | | /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */ |
372 | 2.24M | for(k = 0; k < 4; k++) |
373 | 1.79M | { |
374 | 1.79M | ai4_o[k] = gai2_impeg2_idct_q15[1 * 8 + k] * pi2_src[i4_src_strd] |
375 | 1.79M | + gai2_impeg2_idct_q15[3 * 8 + k] |
376 | 1.79M | * pi2_src[3 * i4_src_strd] |
377 | 1.79M | + gai2_impeg2_idct_q15[5 * 8 + k] |
378 | 1.79M | * pi2_src[5 * i4_src_strd] |
379 | 1.79M | + gai2_impeg2_idct_q15[7 * 8 + k] |
380 | 1.79M | * pi2_src[7 * i4_src_strd]; |
381 | 1.79M | } |
382 | | |
383 | 449k | ai4_eo[0] = gai2_impeg2_idct_q15[2 * 8 + 0] * pi2_src[2 * i4_src_strd] |
384 | 449k | + gai2_impeg2_idct_q15[6 * 8 + 0] * pi2_src[6 * i4_src_strd]; |
385 | 449k | ai4_eo[1] = gai2_impeg2_idct_q15[2 * 8 + 1] * pi2_src[2 * i4_src_strd] |
386 | 449k | + gai2_impeg2_idct_q15[6 * 8 + 1] * pi2_src[6 * i4_src_strd]; |
387 | 449k | ai4_ee[0] = gai2_impeg2_idct_q15[0 * 8 + 0] * pi2_src[0] |
388 | 449k | + gai2_impeg2_idct_q15[4 * 8 + 0] * pi2_src[4 * i4_src_strd]; |
389 | 449k | ai4_ee[1] = gai2_impeg2_idct_q15[0 * 8 + 1] * pi2_src[0] |
390 | 449k | + gai2_impeg2_idct_q15[4 * 8 + 1] * pi2_src[4 * i4_src_strd]; |
391 | | |
392 | | /* Combining e and o terms at each hierarchy levels to calculate the final spatial domain vector */ |
393 | 449k | ai4_e[0] = ai4_ee[0] + ai4_eo[0]; |
394 | 449k | ai4_e[3] = ai4_ee[0] - ai4_eo[0]; |
395 | 449k | ai4_e[1] = ai4_ee[1] + ai4_eo[1]; |
396 | 449k | ai4_e[2] = ai4_ee[1] - ai4_eo[1]; |
397 | 2.24M | for(k = 0; k < 4; k++) |
398 | 1.79M | { |
399 | 1.79M | pi2_tmp[k] = |
400 | 1.79M | CLIP_S16(((ai4_e[k] + ai4_o[k] + i4_add) >> i4_shift)); |
401 | 1.79M | pi2_tmp[k + 4] = |
402 | 1.79M | CLIP_S16(((ai4_e[3 - k] - ai4_o[3 - k] + i4_add) >> i4_shift)); |
403 | 1.79M | } |
404 | 449k | } |
405 | 1.11M | pi2_src++; |
406 | 1.11M | pi2_tmp += i4_trans_size; |
407 | 1.11M | i4_zero_cols = i4_zero_cols >> 1; |
408 | 1.11M | } |
409 | | |
410 | 148k | pi2_tmp = pi2_tmp_orig; |
411 | | |
412 | | /* Inverse Transform 2nd stage */ |
413 | 148k | i4_shift = IDCT_STG2_SHIFT; |
414 | 148k | i4_add = 1 << (i4_shift - 1); |
415 | 148k | if((i4_zero_rows_2nd_stage & 0xF0) == 0xF0) /* First 4 rows of output of 1st stage are non-zero */ |
416 | 15.6k | { |
417 | 140k | for(j = 0; j < i4_trans_size; j++) |
418 | 124k | { |
419 | | /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */ |
420 | 622k | for(k = 0; k < 4; k++) |
421 | 497k | { |
422 | 497k | ai4_o[k] = gai2_impeg2_idct_q11[1 * 8 + k] * pi2_tmp[i4_trans_size] |
423 | 497k | + gai2_impeg2_idct_q11[3 * 8 + k] * pi2_tmp[3 * i4_trans_size]; |
424 | 497k | } |
425 | 124k | ai4_eo[0] = gai2_impeg2_idct_q11[2 * 8 + 0] * pi2_tmp[2 * i4_trans_size]; |
426 | 124k | ai4_eo[1] = gai2_impeg2_idct_q11[2 * 8 + 1] * pi2_tmp[2 * i4_trans_size]; |
427 | 124k | ai4_ee[0] = gai2_impeg2_idct_q11[0 * 8 + 0] * pi2_tmp[0]; |
428 | 124k | ai4_ee[1] = gai2_impeg2_idct_q11[0 * 8 + 1] * pi2_tmp[0]; |
429 | | |
430 | | /* Combining e and o terms at each hierarchy levels to calculate the final spatial domain vector */ |
431 | 124k | ai4_e[0] = ai4_ee[0] + ai4_eo[0]; |
432 | 124k | ai4_e[3] = ai4_ee[0] - ai4_eo[0]; |
433 | 124k | ai4_e[1] = ai4_ee[1] + ai4_eo[1]; |
434 | 124k | ai4_e[2] = ai4_ee[1] - ai4_eo[1]; |
435 | 621k | for(k = 0; k < 4; k++) |
436 | 497k | { |
437 | 497k | WORD32 itrans_out; |
438 | 497k | itrans_out = |
439 | 497k | CLIP_S16(((ai4_e[k] + ai4_o[k] + i4_add) >> i4_shift)); |
440 | 497k | pu1_dst[k] = CLIP_U8((itrans_out + pu1_pred[k])); |
441 | 497k | itrans_out = |
442 | 497k | CLIP_S16(((ai4_e[3 - k] - ai4_o[3 - k] + i4_add) >> i4_shift)); |
443 | 497k | pu1_dst[k + 4] = CLIP_U8((itrans_out + pu1_pred[k + 4])); |
444 | 497k | } |
445 | 124k | pi2_tmp++; |
446 | 124k | pu1_pred += i4_pred_strd; |
447 | 124k | pu1_dst += i4_dst_strd; |
448 | 124k | } |
449 | 15.6k | } |
450 | 132k | else /* All rows of output of 1st stage are non-zero */ |
451 | 132k | { |
452 | 1.18M | for(j = 0; j < i4_trans_size; j++) |
453 | 1.05M | { |
454 | | /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */ |
455 | 5.25M | for(k = 0; k < 4; k++) |
456 | 4.20M | { |
457 | 4.20M | ai4_o[k] = gai2_impeg2_idct_q11[1 * 8 + k] * pi2_tmp[i4_trans_size] |
458 | 4.20M | + gai2_impeg2_idct_q11[3 * 8 + k] |
459 | 4.20M | * pi2_tmp[3 * i4_trans_size] |
460 | 4.20M | + gai2_impeg2_idct_q11[5 * 8 + k] |
461 | 4.20M | * pi2_tmp[5 * i4_trans_size] |
462 | 4.20M | + gai2_impeg2_idct_q11[7 * 8 + k] |
463 | 4.20M | * pi2_tmp[7 * i4_trans_size]; |
464 | 4.20M | } |
465 | | |
466 | 1.05M | ai4_eo[0] = gai2_impeg2_idct_q11[2 * 8 + 0] * pi2_tmp[2 * i4_trans_size] |
467 | 1.05M | + gai2_impeg2_idct_q11[6 * 8 + 0] * pi2_tmp[6 * i4_trans_size]; |
468 | 1.05M | ai4_eo[1] = gai2_impeg2_idct_q11[2 * 8 + 1] * pi2_tmp[2 * i4_trans_size] |
469 | 1.05M | + gai2_impeg2_idct_q11[6 * 8 + 1] * pi2_tmp[6 * i4_trans_size]; |
470 | 1.05M | ai4_ee[0] = gai2_impeg2_idct_q11[0 * 8 + 0] * pi2_tmp[0] |
471 | 1.05M | + gai2_impeg2_idct_q11[4 * 8 + 0] * pi2_tmp[4 * i4_trans_size]; |
472 | 1.05M | ai4_ee[1] = gai2_impeg2_idct_q11[0 * 8 + 1] * pi2_tmp[0] |
473 | 1.05M | + gai2_impeg2_idct_q11[4 * 8 + 1] * pi2_tmp[4 * i4_trans_size]; |
474 | | |
475 | | /* Combining e and o terms at each hierarchy levels to calculate the final spatial domain vector */ |
476 | 1.05M | ai4_e[0] = ai4_ee[0] + ai4_eo[0]; |
477 | 1.05M | ai4_e[3] = ai4_ee[0] - ai4_eo[0]; |
478 | 1.05M | ai4_e[1] = ai4_ee[1] + ai4_eo[1]; |
479 | 1.05M | ai4_e[2] = ai4_ee[1] - ai4_eo[1]; |
480 | 5.24M | for(k = 0; k < 4; k++) |
481 | 4.18M | { |
482 | 4.18M | WORD32 itrans_out; |
483 | 4.18M | itrans_out = |
484 | 4.18M | CLIP_S16(((ai4_e[k] + ai4_o[k] + i4_add) >> i4_shift)); |
485 | 4.18M | pu1_dst[k] = CLIP_U8((itrans_out + pu1_pred[k])); |
486 | 4.18M | itrans_out = |
487 | 4.18M | CLIP_S16(((ai4_e[3 - k] - ai4_o[3 - k] + i4_add) >> i4_shift)); |
488 | 4.18M | pu1_dst[k + 4] = CLIP_U8((itrans_out + pu1_pred[k + 4])); |
489 | 4.18M | } |
490 | 1.05M | pi2_tmp++; |
491 | 1.05M | pu1_pred += i4_pred_strd; |
492 | 1.05M | pu1_dst += i4_dst_strd; |
493 | 1.05M | } |
494 | 132k | } |
495 | | /************************************************************************************************/ |
496 | | /************************************END - IT_RECON_8x8******************************************/ |
497 | | /************************************************************************************************/ |
498 | 148k | } |
499 | 315k | } |
500 | | |