/src/libhevc/common/ihevc_itrans_recon_16x16.c
Line | Count | Source |
1 | | /****************************************************************************** |
2 | | * |
3 | | * Copyright (C) 2012 Ittiam Systems Pvt Ltd, Bangalore |
4 | | * |
5 | | * Licensed under the Apache License, Version 2.0 (the "License"); |
6 | | * you may not use this file except in compliance with the License. |
7 | | * You may obtain a copy of the License at: |
8 | | * |
9 | | * http://www.apache.org/licenses/LICENSE-2.0 |
10 | | * |
11 | | * Unless required by applicable law or agreed to in writing, software |
12 | | * distributed under the License is distributed on an "AS IS" BASIS, |
13 | | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
14 | | * See the License for the specific language governing permissions and |
15 | | * limitations under the License. |
16 | | * |
17 | | ******************************************************************************/ |
18 | | /** |
19 | | ******************************************************************************* |
20 | | * @file |
21 | | * ihevc_itrans_recon_16x16.c |
22 | | * |
23 | | * @brief |
24 | | * Contains function definitions for inverse transform and reconstruction 16x16 |
25 | | * |
26 | | * |
27 | | * @author |
28 | | * 100470 |
29 | | * |
30 | | * @par List of Functions: |
31 | | * - ihevc_itrans_recon_16x16() |
32 | | * |
33 | | * @remarks |
34 | | * None |
35 | | * |
36 | | ******************************************************************************* |
37 | | */ |
38 | | #include <stdio.h> |
39 | | #include <string.h> |
40 | | #include "ihevc_typedefs.h" |
41 | | #include "ihevc_macros.h" |
42 | | #include "ihevc_platform_macros.h" |
43 | | #include "ihevc_defs.h" |
44 | | #include "ihevc_trans_tables.h" |
45 | | #include "ihevc_itrans_recon.h" |
46 | | #include "ihevc_func_selector.h" |
47 | | #include "ihevc_trans_macros.h" |
48 | | |
49 | | /** |
50 | | ******************************************************************************* |
51 | | * |
52 | | * @brief |
53 | | * This function performs Inverse transform and reconstruction for 16x16 |
54 | | * input block |
55 | | * |
56 | | * @par Description: |
57 | | * Performs inverse transform and adds the prediction data and clips output |
58 | | * to 8 bit |
59 | | * |
60 | | * @param[in] pi2_src |
61 | | * Input 16x16 coefficients |
62 | | * |
63 | | * @param[in] pi2_tmp |
64 | | * Temporary 16x16 buffer for storing inverse |
65 | | * |
66 | | * transform |
67 | | * 1st stage output |
68 | | * |
69 | | * @param[in] pu1_pred |
70 | | * Prediction 16x16 block |
71 | | * |
72 | | * @param[out] pu1_dst |
73 | | * Output 16x16 block |
74 | | * |
75 | | * @param[in] src_strd |
76 | | * Input stride |
77 | | * |
78 | | * @param[in] pred_strd |
79 | | * Prediction stride |
80 | | * |
81 | | * @param[in] dst_strd |
82 | | * Output Stride |
83 | | * |
84 | | * @param[in] shift |
85 | | * Output shift |
86 | | * |
87 | | * @param[in] zero_cols |
88 | | * Zero columns in pi2_src |
89 | | * |
90 | | * @returns Void |
91 | | * |
92 | | * @remarks |
93 | | * None |
94 | | * |
95 | | ******************************************************************************* |
96 | | */ |
97 | | |
98 | | void ihevc_itrans_recon_16x16(WORD16 *pi2_src, |
99 | | WORD16 *pi2_tmp, |
100 | | UWORD8 *pu1_pred, |
101 | | UWORD8 *pu1_dst, |
102 | | WORD32 src_strd, |
103 | | WORD32 pred_strd, |
104 | | WORD32 dst_strd, |
105 | | WORD32 zero_cols, |
106 | | WORD32 zero_rows) |
107 | 1.28M | { |
108 | 1.28M | WORD32 j, k; |
109 | 1.28M | WORD32 e[8], o[8]; |
110 | 1.28M | WORD32 ee[4], eo[4]; |
111 | 1.28M | WORD32 eee[2], eeo[2]; |
112 | 1.28M | WORD32 add; |
113 | 1.28M | WORD32 shift; |
114 | 1.28M | WORD16 *pi2_tmp_orig; |
115 | 1.28M | WORD32 trans_size; |
116 | 1.28M | WORD32 zero_rows_2nd_stage = zero_cols; |
117 | 1.28M | WORD32 row_limit_2nd_stage; |
118 | | |
119 | 1.28M | if((zero_cols & 0xFFF0) == 0xFFF0) |
120 | 555k | row_limit_2nd_stage = 4; |
121 | 734k | else if((zero_cols & 0xFF00) == 0xFF00) |
122 | 295k | row_limit_2nd_stage = 8; |
123 | 439k | else |
124 | 439k | row_limit_2nd_stage = TRANS_SIZE_16; |
125 | | |
126 | 1.28M | trans_size = TRANS_SIZE_16; |
127 | 1.28M | pi2_tmp_orig = pi2_tmp; |
128 | 1.28M | if((zero_rows & 0xFFF0) == 0xFFF0) /* First 4 rows of input are non-zero */ |
129 | 376k | { |
130 | | /* Inverse Transform 1st stage */ |
131 | | /************************************************************************************************/ |
132 | | /**********************************START - IT_RECON_16x16****************************************/ |
133 | | /************************************************************************************************/ |
134 | | |
135 | 376k | shift = IT_SHIFT_STAGE_1; |
136 | 376k | add = 1 << (shift - 1); |
137 | | |
138 | 2.99M | for(j = 0; j < row_limit_2nd_stage; j++) |
139 | 2.61M | { |
140 | | /* Checking for Zero Cols */ |
141 | 2.61M | if((zero_cols & 1) == 1) |
142 | 1.56M | { |
143 | 1.56M | memset(pi2_tmp, 0, trans_size * sizeof(WORD16)); |
144 | 1.56M | } |
145 | 1.04M | else |
146 | 1.04M | { |
147 | | /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */ |
148 | 9.45M | for(k = 0; k < 8; k++) |
149 | 8.40M | { |
150 | 8.40M | o[k] = g_ai2_ihevc_trans_16[1][k] * pi2_src[src_strd] |
151 | 8.40M | + g_ai2_ihevc_trans_16[3][k] |
152 | 8.40M | * pi2_src[3 * src_strd]; |
153 | 8.40M | } |
154 | 5.25M | for(k = 0; k < 4; k++) |
155 | 4.20M | { |
156 | 4.20M | eo[k] = g_ai2_ihevc_trans_16[2][k] * pi2_src[2 * src_strd]; |
157 | 4.20M | } |
158 | 1.04M | eeo[0] = 0; |
159 | 1.04M | eee[0] = g_ai2_ihevc_trans_16[0][0] * pi2_src[0]; |
160 | 1.04M | eeo[1] = 0; |
161 | 1.04M | eee[1] = g_ai2_ihevc_trans_16[0][1] * pi2_src[0]; |
162 | | |
163 | | /* Combining e and o terms at each hierarchy levels to calculate the final spatial domain vector */ |
164 | 3.15M | for(k = 0; k < 2; k++) |
165 | 2.10M | { |
166 | 2.10M | ee[k] = eee[k] + eeo[k]; |
167 | 2.10M | ee[k + 2] = eee[1 - k] - eeo[1 - k]; |
168 | 2.10M | } |
169 | 5.25M | for(k = 0; k < 4; k++) |
170 | 4.20M | { |
171 | 4.20M | e[k] = ee[k] + eo[k]; |
172 | 4.20M | e[k + 4] = ee[3 - k] - eo[3 - k]; |
173 | 4.20M | } |
174 | 9.45M | for(k = 0; k < 8; k++) |
175 | 8.40M | { |
176 | 8.40M | pi2_tmp[k] = |
177 | 8.40M | CLIP_S16(((e[k] + o[k] + add) >> shift)); |
178 | 8.40M | pi2_tmp[k + 8] = |
179 | 8.40M | CLIP_S16(((e[7 - k] - o[7 - k] + add) >> shift)); |
180 | 8.40M | } |
181 | 1.04M | } |
182 | 2.61M | pi2_src++; |
183 | 2.61M | pi2_tmp += trans_size; |
184 | 2.61M | zero_cols = zero_cols >> 1; |
185 | 2.61M | } |
186 | | |
187 | 376k | pi2_tmp = pi2_tmp_orig; |
188 | | |
189 | | /* Inverse Transform 2nd stage */ |
190 | 376k | shift = IT_SHIFT_STAGE_2; |
191 | 376k | add = 1 << (shift - 1); |
192 | | |
193 | 376k | if((zero_rows_2nd_stage & 0xFFF0) == 0xFFF0) /* First 4 rows of output of 1st stage are non-zero */ |
194 | 246k | { |
195 | 4.17M | for(j = 0; j < trans_size; j++) |
196 | 3.93M | { |
197 | | /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */ |
198 | 35.3M | for(k = 0; k < 8; k++) |
199 | 31.3M | { |
200 | 31.3M | o[k] = g_ai2_ihevc_trans_16[1][k] * pi2_tmp[trans_size] |
201 | 31.3M | + g_ai2_ihevc_trans_16[3][k] |
202 | 31.3M | * pi2_tmp[3 * trans_size]; |
203 | 31.3M | } |
204 | 19.6M | for(k = 0; k < 4; k++) |
205 | 15.6M | { |
206 | 15.6M | eo[k] = g_ai2_ihevc_trans_16[2][k] * pi2_tmp[2 * trans_size]; |
207 | 15.6M | } |
208 | 3.93M | eeo[0] = 0; |
209 | 3.93M | eee[0] = g_ai2_ihevc_trans_16[0][0] * pi2_tmp[0]; |
210 | 3.93M | eeo[1] = 0; |
211 | 3.93M | eee[1] = g_ai2_ihevc_trans_16[0][1] * pi2_tmp[0]; |
212 | | |
213 | | /* Combining e and o terms at each hierarchy levels to calculate the final spatial domain vector */ |
214 | 11.7M | for(k = 0; k < 2; k++) |
215 | 7.83M | { |
216 | 7.83M | ee[k] = eee[k] + eeo[k]; |
217 | 7.83M | ee[k + 2] = eee[1 - k] - eeo[1 - k]; |
218 | 7.83M | } |
219 | 19.5M | for(k = 0; k < 4; k++) |
220 | 15.6M | { |
221 | 15.6M | e[k] = ee[k] + eo[k]; |
222 | 15.6M | e[k + 4] = ee[3 - k] - eo[3 - k]; |
223 | 15.6M | } |
224 | 35.0M | for(k = 0; k < 8; k++) |
225 | 31.0M | { |
226 | 31.0M | WORD32 itrans_out; |
227 | 31.0M | itrans_out = |
228 | 31.0M | CLIP_S16(((e[k] + o[k] + add) >> shift)); |
229 | 31.0M | pu1_dst[k] = CLIP_U8((itrans_out + pu1_pred[k])); |
230 | 31.0M | itrans_out = |
231 | 31.0M | CLIP_S16(((e[7 - k] - o[7 - k] + add) >> shift)); |
232 | 31.0M | pu1_dst[k + 8] = CLIP_U8((itrans_out + pu1_pred[k + 8])); |
233 | 31.0M | } |
234 | 3.93M | pi2_tmp++; |
235 | 3.93M | pu1_pred += pred_strd; |
236 | 3.93M | pu1_dst += dst_strd; |
237 | 3.93M | } |
238 | 246k | } |
239 | 129k | else if((zero_rows_2nd_stage & 0xFF00) == 0xFF00) /* First 4 rows of output of 1st stage are non-zero */ |
240 | 54.0k | { |
241 | 917k | for(j = 0; j < trans_size; j++) |
242 | 863k | { |
243 | | /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */ |
244 | 7.76M | for(k = 0; k < 8; k++) |
245 | 6.89M | { |
246 | 6.89M | o[k] = g_ai2_ihevc_trans_16[1][k] * pi2_tmp[trans_size] |
247 | 6.89M | + g_ai2_ihevc_trans_16[3][k] |
248 | 6.89M | * pi2_tmp[3 * trans_size] |
249 | 6.89M | + g_ai2_ihevc_trans_16[5][k] |
250 | 6.89M | * pi2_tmp[5 * trans_size] |
251 | 6.89M | + g_ai2_ihevc_trans_16[7][k] |
252 | 6.89M | * pi2_tmp[7 * trans_size]; |
253 | 6.89M | } |
254 | 4.31M | for(k = 0; k < 4; k++) |
255 | 3.45M | { |
256 | 3.45M | eo[k] = g_ai2_ihevc_trans_16[2][k] * pi2_tmp[2 * trans_size] |
257 | 3.45M | + g_ai2_ihevc_trans_16[6][k] |
258 | 3.45M | * pi2_tmp[6 * trans_size]; |
259 | 3.45M | } |
260 | 863k | eeo[0] = g_ai2_ihevc_trans_16[4][0] * pi2_tmp[4 * trans_size]; |
261 | 863k | eee[0] = g_ai2_ihevc_trans_16[0][0] * pi2_tmp[0]; |
262 | 863k | eeo[1] = g_ai2_ihevc_trans_16[4][1] * pi2_tmp[4 * trans_size]; |
263 | 863k | eee[1] = g_ai2_ihevc_trans_16[0][1] * pi2_tmp[0]; |
264 | | |
265 | | /* Combining e and o terms at each hierarchy levels to calculate the final spatial domain vector */ |
266 | 2.58M | for(k = 0; k < 2; k++) |
267 | 1.72M | { |
268 | 1.72M | ee[k] = eee[k] + eeo[k]; |
269 | 1.72M | ee[k + 2] = eee[1 - k] - eeo[1 - k]; |
270 | 1.72M | } |
271 | 4.31M | for(k = 0; k < 4; k++) |
272 | 3.45M | { |
273 | 3.45M | e[k] = ee[k] + eo[k]; |
274 | 3.45M | e[k + 4] = ee[3 - k] - eo[3 - k]; |
275 | 3.45M | } |
276 | 7.73M | for(k = 0; k < 8; k++) |
277 | 6.87M | { |
278 | 6.87M | WORD32 itrans_out; |
279 | 6.87M | itrans_out = |
280 | 6.87M | CLIP_S16(((e[k] + o[k] + add) >> shift)); |
281 | 6.87M | pu1_dst[k] = CLIP_U8((itrans_out + pu1_pred[k])); |
282 | 6.87M | itrans_out = |
283 | 6.87M | CLIP_S16(((e[7 - k] - o[7 - k] + add) >> shift)); |
284 | 6.87M | pu1_dst[k + 8] = CLIP_U8((itrans_out + pu1_pred[k + 8])); |
285 | 6.87M | } |
286 | 863k | pi2_tmp++; |
287 | 863k | pu1_pred += pred_strd; |
288 | 863k | pu1_dst += dst_strd; |
289 | 863k | } |
290 | 54.0k | } |
291 | 75.0k | else /* All rows of output of 1st stage are non-zero */ |
292 | 75.0k | { |
293 | 1.27M | for(j = 0; j < trans_size; j++) |
294 | 1.19M | { |
295 | | /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */ |
296 | 10.7M | for(k = 0; k < 8; k++) |
297 | 9.52M | { |
298 | 9.52M | o[k] = g_ai2_ihevc_trans_16[1][k] * pi2_tmp[trans_size] |
299 | 9.52M | + g_ai2_ihevc_trans_16[3][k] |
300 | 9.52M | * pi2_tmp[3 * trans_size] |
301 | 9.52M | + g_ai2_ihevc_trans_16[5][k] |
302 | 9.52M | * pi2_tmp[5 * trans_size] |
303 | 9.52M | + g_ai2_ihevc_trans_16[7][k] |
304 | 9.52M | * pi2_tmp[7 * trans_size] |
305 | 9.52M | + g_ai2_ihevc_trans_16[9][k] |
306 | 9.52M | * pi2_tmp[9 * trans_size] |
307 | 9.52M | + g_ai2_ihevc_trans_16[11][k] |
308 | 9.52M | * pi2_tmp[11 * trans_size] |
309 | 9.52M | + g_ai2_ihevc_trans_16[13][k] |
310 | 9.52M | * pi2_tmp[13 * trans_size] |
311 | 9.52M | + g_ai2_ihevc_trans_16[15][k] |
312 | 9.52M | * pi2_tmp[15 * trans_size]; |
313 | 9.52M | } |
314 | 5.97M | for(k = 0; k < 4; k++) |
315 | 4.78M | { |
316 | 4.78M | eo[k] = g_ai2_ihevc_trans_16[2][k] * pi2_tmp[2 * trans_size] |
317 | 4.78M | + g_ai2_ihevc_trans_16[6][k] |
318 | 4.78M | * pi2_tmp[6 * trans_size] |
319 | 4.78M | + g_ai2_ihevc_trans_16[10][k] |
320 | 4.78M | * pi2_tmp[10 * trans_size] |
321 | 4.78M | + g_ai2_ihevc_trans_16[14][k] |
322 | 4.78M | * pi2_tmp[14 * trans_size]; |
323 | 4.78M | } |
324 | 1.19M | eeo[0] = |
325 | 1.19M | g_ai2_ihevc_trans_16[4][0] * pi2_tmp[4 * trans_size] |
326 | 1.19M | + g_ai2_ihevc_trans_16[12][0] |
327 | 1.19M | * pi2_tmp[12 |
328 | 1.19M | * trans_size]; |
329 | 1.19M | eee[0] = g_ai2_ihevc_trans_16[0][0] * pi2_tmp[0] |
330 | 1.19M | + g_ai2_ihevc_trans_16[8][0] * pi2_tmp[8 * trans_size]; |
331 | 1.19M | eeo[1] = |
332 | 1.19M | g_ai2_ihevc_trans_16[4][1] * pi2_tmp[4 * trans_size] |
333 | 1.19M | + g_ai2_ihevc_trans_16[12][1] |
334 | 1.19M | * pi2_tmp[12 |
335 | 1.19M | * trans_size]; |
336 | 1.19M | eee[1] = g_ai2_ihevc_trans_16[0][1] * pi2_tmp[0] |
337 | 1.19M | + g_ai2_ihevc_trans_16[8][1] * pi2_tmp[8 * trans_size]; |
338 | | |
339 | | /* Combining e and o terms at each hierarchy levels to calculate the final spatial domain vector */ |
340 | 3.58M | for(k = 0; k < 2; k++) |
341 | 2.38M | { |
342 | 2.38M | ee[k] = eee[k] + eeo[k]; |
343 | 2.38M | ee[k + 2] = eee[1 - k] - eeo[1 - k]; |
344 | 2.38M | } |
345 | 5.95M | for(k = 0; k < 4; k++) |
346 | 4.75M | { |
347 | 4.75M | e[k] = ee[k] + eo[k]; |
348 | 4.75M | e[k + 4] = ee[3 - k] - eo[3 - k]; |
349 | 4.75M | } |
350 | 10.6M | for(k = 0; k < 8; k++) |
351 | 9.48M | { |
352 | 9.48M | WORD32 itrans_out; |
353 | 9.48M | itrans_out = |
354 | 9.48M | CLIP_S16(((e[k] + o[k] + add) >> shift)); |
355 | 9.48M | pu1_dst[k] = CLIP_U8((itrans_out + pu1_pred[k])); |
356 | 9.48M | itrans_out = |
357 | 9.48M | CLIP_S16(((e[7 - k] - o[7 - k] + add) >> shift)); |
358 | 9.48M | pu1_dst[k + 8] = CLIP_U8((itrans_out + pu1_pred[k + 8])); |
359 | 9.48M | } |
360 | 1.19M | pi2_tmp++; |
361 | 1.19M | pu1_pred += pred_strd; |
362 | 1.19M | pu1_dst += dst_strd; |
363 | 1.19M | } |
364 | 75.0k | } |
365 | | /************************************************************************************************/ |
366 | | /************************************END - IT_RECON_16x16****************************************/ |
367 | | /************************************************************************************************/ |
368 | 376k | } |
369 | 913k | else if((zero_rows & 0xFF00) == 0xFF00) /* First 8 rows of input are non-zero */ |
370 | 403k | { |
371 | | /* Inverse Transform 1st stage */ |
372 | | /************************************************************************************************/ |
373 | | /**********************************START - IT_RECON_16x16****************************************/ |
374 | | /************************************************************************************************/ |
375 | | |
376 | 403k | shift = IT_SHIFT_STAGE_1; |
377 | 403k | add = 1 << (shift - 1); |
378 | | |
379 | 3.74M | for(j = 0; j < row_limit_2nd_stage; j++) |
380 | 3.34M | { |
381 | | /* Checking for Zero Cols */ |
382 | 3.34M | if((zero_cols & 1) == 1) |
383 | 2.34M | { |
384 | 2.34M | memset(pi2_tmp, 0, trans_size * sizeof(WORD16)); |
385 | 2.34M | } |
386 | 997k | else |
387 | 997k | { |
388 | | /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */ |
389 | 9.00M | for(k = 0; k < 8; k++) |
390 | 8.00M | { |
391 | 8.00M | o[k] = g_ai2_ihevc_trans_16[1][k] * pi2_src[src_strd] |
392 | 8.00M | + g_ai2_ihevc_trans_16[3][k] |
393 | 8.00M | * pi2_src[3 * src_strd] |
394 | 8.00M | + g_ai2_ihevc_trans_16[5][k] |
395 | 8.00M | * pi2_src[5 * src_strd] |
396 | 8.00M | + g_ai2_ihevc_trans_16[7][k] |
397 | 8.00M | * pi2_src[7 * src_strd]; |
398 | 8.00M | } |
399 | 5.00M | for(k = 0; k < 4; k++) |
400 | 4.00M | { |
401 | 4.00M | eo[k] = g_ai2_ihevc_trans_16[2][k] * pi2_src[2 * src_strd] |
402 | 4.00M | + g_ai2_ihevc_trans_16[6][k] |
403 | 4.00M | * pi2_src[6 * src_strd]; |
404 | 4.00M | } |
405 | 997k | eeo[0] = g_ai2_ihevc_trans_16[4][0] * pi2_src[4 * src_strd]; |
406 | 997k | eee[0] = g_ai2_ihevc_trans_16[0][0] * pi2_src[0]; |
407 | 997k | eeo[1] = g_ai2_ihevc_trans_16[4][1] * pi2_src[4 * src_strd]; |
408 | 997k | eee[1] = g_ai2_ihevc_trans_16[0][1] * pi2_src[0]; |
409 | | |
410 | | /* Combining e and o terms at each hierarchy levels to calculate the final spatial domain vector */ |
411 | 3.00M | for(k = 0; k < 2; k++) |
412 | 2.00M | { |
413 | 2.00M | ee[k] = eee[k] + eeo[k]; |
414 | 2.00M | ee[k + 2] = eee[1 - k] - eeo[1 - k]; |
415 | 2.00M | } |
416 | 5.00M | for(k = 0; k < 4; k++) |
417 | 4.00M | { |
418 | 4.00M | e[k] = ee[k] + eo[k]; |
419 | 4.00M | e[k + 4] = ee[3 - k] - eo[3 - k]; |
420 | 4.00M | } |
421 | 9.00M | for(k = 0; k < 8; k++) |
422 | 8.00M | { |
423 | 8.00M | pi2_tmp[k] = |
424 | 8.00M | CLIP_S16(((e[k] + o[k] + add) >> shift)); |
425 | 8.00M | pi2_tmp[k + 8] = |
426 | 8.00M | CLIP_S16(((e[7 - k] - o[7 - k] + add) >> shift)); |
427 | 8.00M | } |
428 | 997k | } |
429 | 3.34M | pi2_src++; |
430 | 3.34M | pi2_tmp += trans_size; |
431 | 3.34M | zero_cols = zero_cols >> 1; |
432 | 3.34M | } |
433 | | |
434 | 403k | pi2_tmp = pi2_tmp_orig; |
435 | | |
436 | | /* Inverse Transform 2nd stage */ |
437 | 403k | shift = IT_SHIFT_STAGE_2; |
438 | 403k | add = 1 << (shift - 1); |
439 | | |
440 | 403k | if((zero_rows_2nd_stage & 0xFFF0) == 0xFFF0) /* First 4 rows of output of 1st stage are non-zero */ |
441 | 168k | { |
442 | 2.83M | for(j = 0; j < trans_size; j++) |
443 | 2.67M | { |
444 | | /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */ |
445 | 23.9M | for(k = 0; k < 8; k++) |
446 | 21.2M | { |
447 | 21.2M | o[k] = g_ai2_ihevc_trans_16[1][k] * pi2_tmp[trans_size] |
448 | 21.2M | + g_ai2_ihevc_trans_16[3][k] |
449 | 21.2M | * pi2_tmp[3 * trans_size]; |
450 | 21.2M | } |
451 | 13.3M | for(k = 0; k < 4; k++) |
452 | 10.6M | { |
453 | 10.6M | eo[k] = g_ai2_ihevc_trans_16[2][k] * pi2_tmp[2 * trans_size]; |
454 | 10.6M | } |
455 | 2.67M | eeo[0] = 0; |
456 | 2.67M | eee[0] = g_ai2_ihevc_trans_16[0][0] * pi2_tmp[0]; |
457 | 2.67M | eeo[1] = 0; |
458 | 2.67M | eee[1] = g_ai2_ihevc_trans_16[0][1] * pi2_tmp[0]; |
459 | | |
460 | | /* Combining e and o terms at each hierarchy levels to calculate the final spatial domain vector */ |
461 | 7.97M | for(k = 0; k < 2; k++) |
462 | 5.30M | { |
463 | 5.30M | ee[k] = eee[k] + eeo[k]; |
464 | 5.30M | ee[k + 2] = eee[1 - k] - eeo[1 - k]; |
465 | 5.30M | } |
466 | 13.2M | for(k = 0; k < 4; k++) |
467 | 10.5M | { |
468 | 10.5M | e[k] = ee[k] + eo[k]; |
469 | 10.5M | e[k + 4] = ee[3 - k] - eo[3 - k]; |
470 | 10.5M | } |
471 | 23.6M | for(k = 0; k < 8; k++) |
472 | 20.9M | { |
473 | 20.9M | WORD32 itrans_out; |
474 | 20.9M | itrans_out = |
475 | 20.9M | CLIP_S16(((e[k] + o[k] + add) >> shift)); |
476 | 20.9M | pu1_dst[k] = CLIP_U8((itrans_out + pu1_pred[k])); |
477 | 20.9M | itrans_out = |
478 | 20.9M | CLIP_S16(((e[7 - k] - o[7 - k] + add) >> shift)); |
479 | 20.9M | pu1_dst[k + 8] = CLIP_U8((itrans_out + pu1_pred[k + 8])); |
480 | 20.9M | } |
481 | 2.67M | pi2_tmp++; |
482 | 2.67M | pu1_pred += pred_strd; |
483 | 2.67M | pu1_dst += dst_strd; |
484 | 2.67M | } |
485 | 168k | } |
486 | 235k | else if((zero_rows_2nd_stage & 0xFF00) == 0xFF00) /* First 4 rows of output of 1st stage are non-zero */ |
487 | 135k | { |
488 | 2.29M | for(j = 0; j < trans_size; j++) |
489 | 2.16M | { |
490 | | /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */ |
491 | 19.3M | for(k = 0; k < 8; k++) |
492 | 17.2M | { |
493 | 17.2M | o[k] = g_ai2_ihevc_trans_16[1][k] * pi2_tmp[trans_size] |
494 | 17.2M | + g_ai2_ihevc_trans_16[3][k] |
495 | 17.2M | * pi2_tmp[3 * trans_size] |
496 | 17.2M | + g_ai2_ihevc_trans_16[5][k] |
497 | 17.2M | * pi2_tmp[5 * trans_size] |
498 | 17.2M | + g_ai2_ihevc_trans_16[7][k] |
499 | 17.2M | * pi2_tmp[7 * trans_size]; |
500 | 17.2M | } |
501 | 10.7M | for(k = 0; k < 4; k++) |
502 | 8.62M | { |
503 | 8.62M | eo[k] = g_ai2_ihevc_trans_16[2][k] * pi2_tmp[2 * trans_size] |
504 | 8.62M | + g_ai2_ihevc_trans_16[6][k] |
505 | 8.62M | * pi2_tmp[6 * trans_size]; |
506 | 8.62M | } |
507 | 2.16M | eeo[0] = g_ai2_ihevc_trans_16[4][0] * pi2_tmp[4 * trans_size]; |
508 | 2.16M | eee[0] = g_ai2_ihevc_trans_16[0][0] * pi2_tmp[0]; |
509 | 2.16M | eeo[1] = g_ai2_ihevc_trans_16[4][1] * pi2_tmp[4 * trans_size]; |
510 | 2.16M | eee[1] = g_ai2_ihevc_trans_16[0][1] * pi2_tmp[0]; |
511 | | |
512 | | /* Combining e and o terms at each hierarchy levels to calculate the final spatial domain vector */ |
513 | 6.47M | for(k = 0; k < 2; k++) |
514 | 4.31M | { |
515 | 4.31M | ee[k] = eee[k] + eeo[k]; |
516 | 4.31M | ee[k + 2] = eee[1 - k] - eeo[1 - k]; |
517 | 4.31M | } |
518 | 10.7M | for(k = 0; k < 4; k++) |
519 | 8.62M | { |
520 | 8.62M | e[k] = ee[k] + eo[k]; |
521 | 8.62M | e[k + 4] = ee[3 - k] - eo[3 - k]; |
522 | 8.62M | } |
523 | 19.2M | for(k = 0; k < 8; k++) |
524 | 17.0M | { |
525 | 17.0M | WORD32 itrans_out; |
526 | 17.0M | itrans_out = |
527 | 17.0M | CLIP_S16(((e[k] + o[k] + add) >> shift)); |
528 | 17.0M | pu1_dst[k] = CLIP_U8((itrans_out + pu1_pred[k])); |
529 | 17.0M | itrans_out = |
530 | 17.0M | CLIP_S16(((e[7 - k] - o[7 - k] + add) >> shift)); |
531 | 17.0M | pu1_dst[k + 8] = CLIP_U8((itrans_out + pu1_pred[k + 8])); |
532 | 17.0M | } |
533 | 2.16M | pi2_tmp++; |
534 | 2.16M | pu1_pred += pred_strd; |
535 | 2.16M | pu1_dst += dst_strd; |
536 | 2.16M | } |
537 | 135k | } |
538 | 99.7k | else /* All rows of output of 1st stage are non-zero */ |
539 | 99.7k | { |
540 | 1.66M | for(j = 0; j < trans_size; j++) |
541 | 1.56M | { |
542 | | /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */ |
543 | 13.8M | for(k = 0; k < 8; k++) |
544 | 12.2M | { |
545 | 12.2M | o[k] = g_ai2_ihevc_trans_16[1][k] * pi2_tmp[trans_size] |
546 | 12.2M | + g_ai2_ihevc_trans_16[3][k] |
547 | 12.2M | * pi2_tmp[3 * trans_size] |
548 | 12.2M | + g_ai2_ihevc_trans_16[5][k] |
549 | 12.2M | * pi2_tmp[5 * trans_size] |
550 | 12.2M | + g_ai2_ihevc_trans_16[7][k] |
551 | 12.2M | * pi2_tmp[7 * trans_size] |
552 | 12.2M | + g_ai2_ihevc_trans_16[9][k] |
553 | 12.2M | * pi2_tmp[9 * trans_size] |
554 | 12.2M | + g_ai2_ihevc_trans_16[11][k] |
555 | 12.2M | * pi2_tmp[11 * trans_size] |
556 | 12.2M | + g_ai2_ihevc_trans_16[13][k] |
557 | 12.2M | * pi2_tmp[13 * trans_size] |
558 | 12.2M | + g_ai2_ihevc_trans_16[15][k] |
559 | 12.2M | * pi2_tmp[15 * trans_size]; |
560 | 12.2M | } |
561 | 7.76M | for(k = 0; k < 4; k++) |
562 | 6.20M | { |
563 | 6.20M | eo[k] = g_ai2_ihevc_trans_16[2][k] * pi2_tmp[2 * trans_size] |
564 | 6.20M | + g_ai2_ihevc_trans_16[6][k] |
565 | 6.20M | * pi2_tmp[6 * trans_size] |
566 | 6.20M | + g_ai2_ihevc_trans_16[10][k] |
567 | 6.20M | * pi2_tmp[10 * trans_size] |
568 | 6.20M | + g_ai2_ihevc_trans_16[14][k] |
569 | 6.20M | * pi2_tmp[14 * trans_size]; |
570 | 6.20M | } |
571 | 1.56M | eeo[0] = |
572 | 1.56M | g_ai2_ihevc_trans_16[4][0] * pi2_tmp[4 * trans_size] |
573 | 1.56M | + g_ai2_ihevc_trans_16[12][0] |
574 | 1.56M | * pi2_tmp[12 |
575 | 1.56M | * trans_size]; |
576 | 1.56M | eee[0] = g_ai2_ihevc_trans_16[0][0] * pi2_tmp[0] |
577 | 1.56M | + g_ai2_ihevc_trans_16[8][0] * pi2_tmp[8 * trans_size]; |
578 | 1.56M | eeo[1] = |
579 | 1.56M | g_ai2_ihevc_trans_16[4][1] * pi2_tmp[4 * trans_size] |
580 | 1.56M | + g_ai2_ihevc_trans_16[12][1] |
581 | 1.56M | * pi2_tmp[12 |
582 | 1.56M | * trans_size]; |
583 | 1.56M | eee[1] = g_ai2_ihevc_trans_16[0][1] * pi2_tmp[0] |
584 | 1.56M | + g_ai2_ihevc_trans_16[8][1] * pi2_tmp[8 * trans_size]; |
585 | | |
586 | | /* Combining e and o terms at each hierarchy levels to calculate the final spatial domain vector */ |
587 | 4.65M | for(k = 0; k < 2; k++) |
588 | 3.09M | { |
589 | 3.09M | ee[k] = eee[k] + eeo[k]; |
590 | 3.09M | ee[k + 2] = eee[1 - k] - eeo[1 - k]; |
591 | 3.09M | } |
592 | 7.58M | for(k = 0; k < 4; k++) |
593 | 6.02M | { |
594 | 6.02M | e[k] = ee[k] + eo[k]; |
595 | 6.02M | e[k + 4] = ee[3 - k] - eo[3 - k]; |
596 | 6.02M | } |
597 | 13.4M | for(k = 0; k < 8; k++) |
598 | 11.9M | { |
599 | 11.9M | WORD32 itrans_out; |
600 | 11.9M | itrans_out = |
601 | 11.9M | CLIP_S16(((e[k] + o[k] + add) >> shift)); |
602 | 11.9M | pu1_dst[k] = CLIP_U8((itrans_out + pu1_pred[k])); |
603 | 11.9M | itrans_out = |
604 | 11.9M | CLIP_S16(((e[7 - k] - o[7 - k] + add) >> shift)); |
605 | 11.9M | pu1_dst[k + 8] = CLIP_U8((itrans_out + pu1_pred[k + 8])); |
606 | 11.9M | } |
607 | 1.56M | pi2_tmp++; |
608 | 1.56M | pu1_pred += pred_strd; |
609 | 1.56M | pu1_dst += dst_strd; |
610 | 1.56M | } |
611 | 99.7k | } |
612 | | /************************************************************************************************/ |
613 | | /************************************END - IT_RECON_16x16****************************************/ |
614 | | /************************************************************************************************/ |
615 | 403k | } |
616 | 509k | else /* All rows of input are non-zero */ |
617 | 509k | { |
618 | | /* Inverse Transform 1st stage */ |
619 | | /************************************************************************************************/ |
620 | | /**********************************START - IT_RECON_16x16****************************************/ |
621 | | /************************************************************************************************/ |
622 | | |
623 | 509k | shift = IT_SHIFT_STAGE_1; |
624 | 509k | add = 1 << (shift - 1); |
625 | | |
626 | 6.14M | for(j = 0; j < row_limit_2nd_stage; j++) |
627 | 5.63M | { |
628 | | /* Checking for Zero Cols */ |
629 | 5.63M | if((zero_cols & 1) == 1) |
630 | 1.02M | { |
631 | 1.02M | memset(pi2_tmp, 0, trans_size * sizeof(WORD16)); |
632 | 1.02M | } |
633 | 4.60M | else |
634 | 4.60M | { |
635 | | /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */ |
636 | 41.4M | for(k = 0; k < 8; k++) |
637 | 36.8M | { |
638 | 36.8M | o[k] = g_ai2_ihevc_trans_16[1][k] * pi2_src[src_strd] |
639 | 36.8M | + g_ai2_ihevc_trans_16[3][k] |
640 | 36.8M | * pi2_src[3 * src_strd] |
641 | 36.8M | + g_ai2_ihevc_trans_16[5][k] |
642 | 36.8M | * pi2_src[5 * src_strd] |
643 | 36.8M | + g_ai2_ihevc_trans_16[7][k] |
644 | 36.8M | * pi2_src[7 * src_strd] |
645 | 36.8M | + g_ai2_ihevc_trans_16[9][k] |
646 | 36.8M | * pi2_src[9 * src_strd] |
647 | 36.8M | + g_ai2_ihevc_trans_16[11][k] |
648 | 36.8M | * pi2_src[11 * src_strd] |
649 | 36.8M | + g_ai2_ihevc_trans_16[13][k] |
650 | 36.8M | * pi2_src[13 * src_strd] |
651 | 36.8M | + g_ai2_ihevc_trans_16[15][k] |
652 | 36.8M | * pi2_src[15 * src_strd]; |
653 | 36.8M | } |
654 | 23.0M | for(k = 0; k < 4; k++) |
655 | 18.4M | { |
656 | 18.4M | eo[k] = g_ai2_ihevc_trans_16[2][k] * pi2_src[2 * src_strd] |
657 | 18.4M | + g_ai2_ihevc_trans_16[6][k] |
658 | 18.4M | * pi2_src[6 * src_strd] |
659 | 18.4M | + g_ai2_ihevc_trans_16[10][k] |
660 | 18.4M | * pi2_src[10 * src_strd] |
661 | 18.4M | + g_ai2_ihevc_trans_16[14][k] |
662 | 18.4M | * pi2_src[14 * src_strd]; |
663 | 18.4M | } |
664 | 4.60M | eeo[0] = g_ai2_ihevc_trans_16[4][0] * pi2_src[4 * src_strd] |
665 | 4.60M | + g_ai2_ihevc_trans_16[12][0] |
666 | 4.60M | * pi2_src[12 * src_strd]; |
667 | 4.60M | eee[0] = |
668 | 4.60M | g_ai2_ihevc_trans_16[0][0] * pi2_src[0] |
669 | 4.60M | + g_ai2_ihevc_trans_16[8][0] |
670 | 4.60M | * pi2_src[8 |
671 | 4.60M | * src_strd]; |
672 | 4.60M | eeo[1] = g_ai2_ihevc_trans_16[4][1] * pi2_src[4 * src_strd] |
673 | 4.60M | + g_ai2_ihevc_trans_16[12][1] |
674 | 4.60M | * pi2_src[12 * src_strd]; |
675 | 4.60M | eee[1] = |
676 | 4.60M | g_ai2_ihevc_trans_16[0][1] * pi2_src[0] |
677 | 4.60M | + g_ai2_ihevc_trans_16[8][1] |
678 | 4.60M | * pi2_src[8 |
679 | 4.60M | * src_strd]; |
680 | | |
681 | | /* Combining e and o terms at each hierarchy levels to calculate the final spatial domain vector */ |
682 | 13.8M | for(k = 0; k < 2; k++) |
683 | 9.22M | { |
684 | 9.22M | ee[k] = eee[k] + eeo[k]; |
685 | 9.22M | ee[k + 2] = eee[1 - k] - eeo[1 - k]; |
686 | 9.22M | } |
687 | 23.0M | for(k = 0; k < 4; k++) |
688 | 18.4M | { |
689 | 18.4M | e[k] = ee[k] + eo[k]; |
690 | 18.4M | e[k + 4] = ee[3 - k] - eo[3 - k]; |
691 | 18.4M | } |
692 | 41.4M | for(k = 0; k < 8; k++) |
693 | 36.8M | { |
694 | 36.8M | pi2_tmp[k] = |
695 | 36.8M | CLIP_S16(((e[k] + o[k] + add) >> shift)); |
696 | 36.8M | pi2_tmp[k + 8] = |
697 | 36.8M | CLIP_S16(((e[7 - k] - o[7 - k] + add) >> shift)); |
698 | 36.8M | } |
699 | 4.60M | } |
700 | 5.63M | pi2_src++; |
701 | 5.63M | pi2_tmp += trans_size; |
702 | 5.63M | zero_cols = zero_cols >> 1; |
703 | 5.63M | } |
704 | | |
705 | 509k | pi2_tmp = pi2_tmp_orig; |
706 | | |
707 | | /* Inverse Transform 2nd stage */ |
708 | 509k | shift = IT_SHIFT_STAGE_2; |
709 | 509k | add = 1 << (shift - 1); |
710 | | |
711 | 509k | if((zero_rows_2nd_stage & 0xFFF0) == 0xFFF0) /* First 4 rows of output of 1st stage are non-zero */ |
712 | 140k | { |
713 | 2.35M | for(j = 0; j < trans_size; j++) |
714 | 2.21M | { |
715 | | /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */ |
716 | 19.7M | for(k = 0; k < 8; k++) |
717 | 17.5M | { |
718 | 17.5M | o[k] = g_ai2_ihevc_trans_16[1][k] * pi2_tmp[trans_size] |
719 | 17.5M | + g_ai2_ihevc_trans_16[3][k] |
720 | 17.5M | * pi2_tmp[3 * trans_size]; |
721 | 17.5M | } |
722 | 11.0M | for(k = 0; k < 4; k++) |
723 | 8.82M | { |
724 | 8.82M | eo[k] = g_ai2_ihevc_trans_16[2][k] * pi2_tmp[2 * trans_size]; |
725 | 8.82M | } |
726 | 2.21M | eeo[0] = 0; |
727 | 2.21M | eee[0] = g_ai2_ihevc_trans_16[0][0] * pi2_tmp[0]; |
728 | 2.21M | eeo[1] = 0; |
729 | 2.21M | eee[1] = g_ai2_ihevc_trans_16[0][1] * pi2_tmp[0]; |
730 | | |
731 | | /* Combining e and o terms at each hierarchy levels to calculate the final spatial domain vector */ |
732 | 6.62M | for(k = 0; k < 2; k++) |
733 | 4.40M | { |
734 | 4.40M | ee[k] = eee[k] + eeo[k]; |
735 | 4.40M | ee[k + 2] = eee[1 - k] - eeo[1 - k]; |
736 | 4.40M | } |
737 | 11.0M | for(k = 0; k < 4; k++) |
738 | 8.79M | { |
739 | 8.79M | e[k] = ee[k] + eo[k]; |
740 | 8.79M | e[k + 4] = ee[3 - k] - eo[3 - k]; |
741 | 8.79M | } |
742 | 19.3M | for(k = 0; k < 8; k++) |
743 | 17.1M | { |
744 | 17.1M | WORD32 itrans_out; |
745 | 17.1M | itrans_out = |
746 | 17.1M | CLIP_S16(((e[k] + o[k] + add) >> shift)); |
747 | 17.1M | pu1_dst[k] = CLIP_U8((itrans_out + pu1_pred[k])); |
748 | 17.1M | itrans_out = |
749 | 17.1M | CLIP_S16(((e[7 - k] - o[7 - k] + add) >> shift)); |
750 | 17.1M | pu1_dst[k + 8] = CLIP_U8((itrans_out + pu1_pred[k + 8])); |
751 | 17.1M | } |
752 | 2.21M | pi2_tmp++; |
753 | 2.21M | pu1_pred += pred_strd; |
754 | 2.21M | pu1_dst += dst_strd; |
755 | 2.21M | } |
756 | 140k | } |
757 | 369k | else if((zero_rows_2nd_stage & 0xFF00) == 0xFF00) /* First 4 rows of output of 1st stage are non-zero */ |
758 | 105k | { |
759 | 1.77M | for(j = 0; j < trans_size; j++) |
760 | 1.67M | { |
761 | | /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */ |
762 | 15.0M | for(k = 0; k < 8; k++) |
763 | 13.3M | { |
764 | 13.3M | o[k] = g_ai2_ihevc_trans_16[1][k] * pi2_tmp[trans_size] |
765 | 13.3M | + g_ai2_ihevc_trans_16[3][k] |
766 | 13.3M | * pi2_tmp[3 * trans_size] |
767 | 13.3M | + g_ai2_ihevc_trans_16[5][k] |
768 | 13.3M | * pi2_tmp[5 * trans_size] |
769 | 13.3M | + g_ai2_ihevc_trans_16[7][k] |
770 | 13.3M | * pi2_tmp[7 * trans_size]; |
771 | 13.3M | } |
772 | 8.36M | for(k = 0; k < 4; k++) |
773 | 6.68M | { |
774 | 6.68M | eo[k] = g_ai2_ihevc_trans_16[2][k] * pi2_tmp[2 * trans_size] |
775 | 6.68M | + g_ai2_ihevc_trans_16[6][k] |
776 | 6.68M | * pi2_tmp[6 * trans_size]; |
777 | 6.68M | } |
778 | 1.67M | eeo[0] = g_ai2_ihevc_trans_16[4][0] * pi2_tmp[4 * trans_size]; |
779 | 1.67M | eee[0] = g_ai2_ihevc_trans_16[0][0] * pi2_tmp[0]; |
780 | 1.67M | eeo[1] = g_ai2_ihevc_trans_16[4][1] * pi2_tmp[4 * trans_size]; |
781 | 1.67M | eee[1] = g_ai2_ihevc_trans_16[0][1] * pi2_tmp[0]; |
782 | | |
783 | | /* Combining e and o terms at each hierarchy levels to calculate the final spatial domain vector */ |
784 | 4.99M | for(k = 0; k < 2; k++) |
785 | 3.32M | { |
786 | 3.32M | ee[k] = eee[k] + eeo[k]; |
787 | 3.32M | ee[k + 2] = eee[1 - k] - eeo[1 - k]; |
788 | 3.32M | } |
789 | 8.32M | for(k = 0; k < 4; k++) |
790 | 6.64M | { |
791 | 6.64M | e[k] = ee[k] + eo[k]; |
792 | 6.64M | e[k + 4] = ee[3 - k] - eo[3 - k]; |
793 | 6.64M | } |
794 | 14.8M | for(k = 0; k < 8; k++) |
795 | 13.2M | { |
796 | 13.2M | WORD32 itrans_out; |
797 | 13.2M | itrans_out = |
798 | 13.2M | CLIP_S16(((e[k] + o[k] + add) >> shift)); |
799 | 13.2M | pu1_dst[k] = CLIP_U8((itrans_out + pu1_pred[k])); |
800 | 13.2M | itrans_out = |
801 | 13.2M | CLIP_S16(((e[7 - k] - o[7 - k] + add) >> shift)); |
802 | 13.2M | pu1_dst[k + 8] = CLIP_U8((itrans_out + pu1_pred[k + 8])); |
803 | 13.2M | } |
804 | 1.67M | pi2_tmp++; |
805 | 1.67M | pu1_pred += pred_strd; |
806 | 1.67M | pu1_dst += dst_strd; |
807 | 1.67M | } |
808 | 105k | } |
809 | 264k | else /* All rows of output of 1st stage are non-zero */ |
810 | 264k | { |
811 | 4.49M | for(j = 0; j < trans_size; j++) |
812 | 4.23M | { |
813 | | /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */ |
814 | 38.0M | for(k = 0; k < 8; k++) |
815 | 33.8M | { |
816 | 33.8M | o[k] = g_ai2_ihevc_trans_16[1][k] * pi2_tmp[trans_size] |
817 | 33.8M | + g_ai2_ihevc_trans_16[3][k] |
818 | 33.8M | * pi2_tmp[3 * trans_size] |
819 | 33.8M | + g_ai2_ihevc_trans_16[5][k] |
820 | 33.8M | * pi2_tmp[5 * trans_size] |
821 | 33.8M | + g_ai2_ihevc_trans_16[7][k] |
822 | 33.8M | * pi2_tmp[7 * trans_size] |
823 | 33.8M | + g_ai2_ihevc_trans_16[9][k] |
824 | 33.8M | * pi2_tmp[9 * trans_size] |
825 | 33.8M | + g_ai2_ihevc_trans_16[11][k] |
826 | 33.8M | * pi2_tmp[11 * trans_size] |
827 | 33.8M | + g_ai2_ihevc_trans_16[13][k] |
828 | 33.8M | * pi2_tmp[13 * trans_size] |
829 | 33.8M | + g_ai2_ihevc_trans_16[15][k] |
830 | 33.8M | * pi2_tmp[15 * trans_size]; |
831 | 33.8M | } |
832 | 21.1M | for(k = 0; k < 4; k++) |
833 | 16.9M | { |
834 | 16.9M | eo[k] = g_ai2_ihevc_trans_16[2][k] * pi2_tmp[2 * trans_size] |
835 | 16.9M | + g_ai2_ihevc_trans_16[6][k] |
836 | 16.9M | * pi2_tmp[6 * trans_size] |
837 | 16.9M | + g_ai2_ihevc_trans_16[10][k] |
838 | 16.9M | * pi2_tmp[10 * trans_size] |
839 | 16.9M | + g_ai2_ihevc_trans_16[14][k] |
840 | 16.9M | * pi2_tmp[14 * trans_size]; |
841 | 16.9M | } |
842 | 4.23M | eeo[0] = |
843 | 4.23M | g_ai2_ihevc_trans_16[4][0] * pi2_tmp[4 * trans_size] |
844 | 4.23M | + g_ai2_ihevc_trans_16[12][0] |
845 | 4.23M | * pi2_tmp[12 |
846 | 4.23M | * trans_size]; |
847 | 4.23M | eee[0] = g_ai2_ihevc_trans_16[0][0] * pi2_tmp[0] |
848 | 4.23M | + g_ai2_ihevc_trans_16[8][0] * pi2_tmp[8 * trans_size]; |
849 | 4.23M | eeo[1] = |
850 | 4.23M | g_ai2_ihevc_trans_16[4][1] * pi2_tmp[4 * trans_size] |
851 | 4.23M | + g_ai2_ihevc_trans_16[12][1] |
852 | 4.23M | * pi2_tmp[12 |
853 | 4.23M | * trans_size]; |
854 | 4.23M | eee[1] = g_ai2_ihevc_trans_16[0][1] * pi2_tmp[0] |
855 | 4.23M | + g_ai2_ihevc_trans_16[8][1] * pi2_tmp[8 * trans_size]; |
856 | | |
857 | | /* Combining e and o terms at each hierarchy levels to calculate the final spatial domain vector */ |
858 | 12.6M | for(k = 0; k < 2; k++) |
859 | 8.46M | { |
860 | 8.46M | ee[k] = eee[k] + eeo[k]; |
861 | 8.46M | ee[k + 2] = eee[1 - k] - eeo[1 - k]; |
862 | 8.46M | } |
863 | 21.1M | for(k = 0; k < 4; k++) |
864 | 16.9M | { |
865 | 16.9M | e[k] = ee[k] + eo[k]; |
866 | 16.9M | e[k + 4] = ee[3 - k] - eo[3 - k]; |
867 | 16.9M | } |
868 | 38.0M | for(k = 0; k < 8; k++) |
869 | 33.8M | { |
870 | 33.8M | WORD32 itrans_out; |
871 | 33.8M | itrans_out = |
872 | 33.8M | CLIP_S16(((e[k] + o[k] + add) >> shift)); |
873 | 33.8M | pu1_dst[k] = CLIP_U8((itrans_out + pu1_pred[k])); |
874 | 33.8M | itrans_out = |
875 | 33.8M | CLIP_S16(((e[7 - k] - o[7 - k] + add) >> shift)); |
876 | 33.8M | pu1_dst[k + 8] = CLIP_U8((itrans_out + pu1_pred[k + 8])); |
877 | 33.8M | } |
878 | 4.23M | pi2_tmp++; |
879 | 4.23M | pu1_pred += pred_strd; |
880 | 4.23M | pu1_dst += dst_strd; |
881 | 4.23M | } |
882 | 264k | } |
883 | | /************************************************************************************************/ |
884 | | /************************************END - IT_RECON_16x16****************************************/ |
885 | | /************************************************************************************************/ |
886 | 509k | } |
887 | | |
888 | 1.28M | } |
889 | | |