/src/libhevc/common/ihevc_itrans_recon_16x16.c
Line | Count | Source |
1 | | /****************************************************************************** |
2 | | * |
3 | | * Copyright (C) 2012 Ittiam Systems Pvt Ltd, Bangalore |
4 | | * |
5 | | * Licensed under the Apache License, Version 2.0 (the "License"); |
6 | | * you may not use this file except in compliance with the License. |
7 | | * You may obtain a copy of the License at: |
8 | | * |
9 | | * http://www.apache.org/licenses/LICENSE-2.0 |
10 | | * |
11 | | * Unless required by applicable law or agreed to in writing, software |
12 | | * distributed under the License is distributed on an "AS IS" BASIS, |
13 | | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
14 | | * See the License for the specific language governing permissions and |
15 | | * limitations under the License. |
16 | | * |
17 | | ******************************************************************************/ |
18 | | /** |
19 | | ******************************************************************************* |
20 | | * @file |
21 | | * ihevc_itrans_recon_16x16.c |
22 | | * |
23 | | * @brief |
24 | | * Contains function definitions for inverse transform and reconstruction 16x16 |
25 | | * |
26 | | * |
27 | | * @author |
28 | | * 100470 |
29 | | * |
30 | | * @par List of Functions: |
31 | | * - ihevc_itrans_recon_16x16() |
32 | | * |
33 | | * @remarks |
34 | | * None |
35 | | * |
36 | | ******************************************************************************* |
37 | | */ |
38 | | #include <stdio.h> |
39 | | #include <string.h> |
40 | | #include "ihevc_typedefs.h" |
41 | | #include "ihevc_macros.h" |
42 | | #include "ihevc_platform_macros.h" |
43 | | #include "ihevc_defs.h" |
44 | | #include "ihevc_trans_tables.h" |
45 | | #include "ihevc_itrans_recon.h" |
46 | | #include "ihevc_func_selector.h" |
47 | | #include "ihevc_trans_macros.h" |
48 | | |
49 | | /** |
50 | | ******************************************************************************* |
51 | | * |
52 | | * @brief |
53 | | * This function performs Inverse transform and reconstruction for 16x16 |
54 | | * input block |
55 | | * |
56 | | * @par Description: |
57 | | * Performs inverse transform and adds the prediction data and clips output |
58 | | * to 8 bit |
59 | | * |
60 | | * @param[in] pi2_src |
61 | | * Input 16x16 coefficients |
62 | | * |
63 | | * @param[in] pi2_tmp |
64 | | * Temporary 16x16 buffer for storing inverse |
65 | | * |
66 | | * transform |
67 | | * 1st stage output |
68 | | * |
69 | | * @param[in] pu1_pred |
70 | | * Prediction 16x16 block |
71 | | * |
72 | | * @param[out] pu1_dst |
73 | | * Output 16x16 block |
74 | | * |
75 | | * @param[in] src_strd |
76 | | * Input stride |
77 | | * |
78 | | * @param[in] pred_strd |
79 | | * Prediction stride |
80 | | * |
81 | | * @param[in] dst_strd |
82 | | * Output Stride |
83 | | * |
84 | | * @param[in] shift |
85 | | * Output shift |
86 | | * |
87 | | * @param[in] zero_cols |
88 | | * Zero columns in pi2_src |
89 | | * |
90 | | * @returns Void |
91 | | * |
92 | | * @remarks |
93 | | * None |
94 | | * |
95 | | ******************************************************************************* |
96 | | */ |
97 | | |
98 | | void ihevc_itrans_recon_16x16(WORD16 *pi2_src, |
99 | | WORD16 *pi2_tmp, |
100 | | UWORD8 *pu1_pred, |
101 | | UWORD8 *pu1_dst, |
102 | | WORD32 src_strd, |
103 | | WORD32 pred_strd, |
104 | | WORD32 dst_strd, |
105 | | WORD32 zero_cols, |
106 | | WORD32 zero_rows) |
107 | 57.2k | { |
108 | 57.2k | WORD32 j, k; |
109 | 57.2k | WORD32 e[8], o[8]; |
110 | 57.2k | WORD32 ee[4], eo[4]; |
111 | 57.2k | WORD32 eee[2], eeo[2]; |
112 | 57.2k | WORD32 add; |
113 | 57.2k | WORD32 shift; |
114 | 57.2k | WORD16 *pi2_tmp_orig; |
115 | 57.2k | WORD32 trans_size; |
116 | 57.2k | WORD32 zero_rows_2nd_stage = zero_cols; |
117 | 57.2k | WORD32 row_limit_2nd_stage; |
118 | | |
119 | 57.2k | if((zero_cols & 0xFFF0) == 0xFFF0) |
120 | 3.76k | row_limit_2nd_stage = 4; |
121 | 53.4k | else if((zero_cols & 0xFF00) == 0xFF00) |
122 | 1.95k | row_limit_2nd_stage = 8; |
123 | 51.5k | else |
124 | 51.5k | row_limit_2nd_stage = TRANS_SIZE_16; |
125 | | |
126 | 57.2k | trans_size = TRANS_SIZE_16; |
127 | 57.2k | pi2_tmp_orig = pi2_tmp; |
128 | 57.2k | if((zero_rows & 0xFFF0) == 0xFFF0) /* First 4 rows of input are non-zero */ |
129 | 6.40k | { |
130 | | /* Inverse Transform 1st stage */ |
131 | | /************************************************************************************************/ |
132 | | /**********************************START - IT_RECON_16x16****************************************/ |
133 | | /************************************************************************************************/ |
134 | | |
135 | 6.40k | shift = IT_SHIFT_STAGE_1; |
136 | 6.40k | add = 1 << (shift - 1); |
137 | | |
138 | 97.6k | for(j = 0; j < row_limit_2nd_stage; j++) |
139 | 91.2k | { |
140 | | /* Checking for Zero Cols */ |
141 | 91.2k | if((zero_cols & 1) == 1) |
142 | 1.07k | { |
143 | 1.07k | memset(pi2_tmp, 0, trans_size * sizeof(WORD16)); |
144 | 1.07k | } |
145 | 90.1k | else |
146 | 90.1k | { |
147 | | /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */ |
148 | 811k | for(k = 0; k < 8; k++) |
149 | 721k | { |
150 | 721k | o[k] = g_ai2_ihevc_trans_16[1][k] * pi2_src[src_strd] |
151 | 721k | + g_ai2_ihevc_trans_16[3][k] |
152 | 721k | * pi2_src[3 * src_strd]; |
153 | 721k | } |
154 | 450k | for(k = 0; k < 4; k++) |
155 | 360k | { |
156 | 360k | eo[k] = g_ai2_ihevc_trans_16[2][k] * pi2_src[2 * src_strd]; |
157 | 360k | } |
158 | 90.1k | eeo[0] = 0; |
159 | 90.1k | eee[0] = g_ai2_ihevc_trans_16[0][0] * pi2_src[0]; |
160 | 90.1k | eeo[1] = 0; |
161 | 90.1k | eee[1] = g_ai2_ihevc_trans_16[0][1] * pi2_src[0]; |
162 | | |
163 | | /* Combining e and o terms at each hierarchy levels to calculate the final spatial domain vector */ |
164 | 270k | for(k = 0; k < 2; k++) |
165 | 180k | { |
166 | 180k | ee[k] = eee[k] + eeo[k]; |
167 | 180k | ee[k + 2] = eee[1 - k] - eeo[1 - k]; |
168 | 180k | } |
169 | 450k | for(k = 0; k < 4; k++) |
170 | 360k | { |
171 | 360k | e[k] = ee[k] + eo[k]; |
172 | 360k | e[k + 4] = ee[3 - k] - eo[3 - k]; |
173 | 360k | } |
174 | 811k | for(k = 0; k < 8; k++) |
175 | 721k | { |
176 | 721k | pi2_tmp[k] = |
177 | 721k | CLIP_S16(((e[k] + o[k] + add) >> shift)); |
178 | 721k | pi2_tmp[k + 8] = |
179 | 721k | CLIP_S16(((e[7 - k] - o[7 - k] + add) >> shift)); |
180 | 721k | } |
181 | 90.1k | } |
182 | 91.2k | pi2_src++; |
183 | 91.2k | pi2_tmp += trans_size; |
184 | 91.2k | zero_cols = zero_cols >> 1; |
185 | 91.2k | } |
186 | | |
187 | 6.40k | pi2_tmp = pi2_tmp_orig; |
188 | | |
189 | | /* Inverse Transform 2nd stage */ |
190 | 6.40k | shift = IT_SHIFT_STAGE_2; |
191 | 6.40k | add = 1 << (shift - 1); |
192 | | |
193 | 6.40k | if((zero_rows_2nd_stage & 0xFFF0) == 0xFFF0) /* First 4 rows of output of 1st stage are non-zero */ |
194 | 833 | { |
195 | 14.1k | for(j = 0; j < trans_size; j++) |
196 | 13.3k | { |
197 | | /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */ |
198 | 119k | for(k = 0; k < 8; k++) |
199 | 106k | { |
200 | 106k | o[k] = g_ai2_ihevc_trans_16[1][k] * pi2_tmp[trans_size] |
201 | 106k | + g_ai2_ihevc_trans_16[3][k] |
202 | 106k | * pi2_tmp[3 * trans_size]; |
203 | 106k | } |
204 | 66.6k | for(k = 0; k < 4; k++) |
205 | 53.3k | { |
206 | 53.3k | eo[k] = g_ai2_ihevc_trans_16[2][k] * pi2_tmp[2 * trans_size]; |
207 | 53.3k | } |
208 | 13.3k | eeo[0] = 0; |
209 | 13.3k | eee[0] = g_ai2_ihevc_trans_16[0][0] * pi2_tmp[0]; |
210 | 13.3k | eeo[1] = 0; |
211 | 13.3k | eee[1] = g_ai2_ihevc_trans_16[0][1] * pi2_tmp[0]; |
212 | | |
213 | | /* Combining e and o terms at each hierarchy levels to calculate the final spatial domain vector */ |
214 | 39.9k | for(k = 0; k < 2; k++) |
215 | 26.6k | { |
216 | 26.6k | ee[k] = eee[k] + eeo[k]; |
217 | 26.6k | ee[k + 2] = eee[1 - k] - eeo[1 - k]; |
218 | 26.6k | } |
219 | 66.6k | for(k = 0; k < 4; k++) |
220 | 53.3k | { |
221 | 53.3k | e[k] = ee[k] + eo[k]; |
222 | 53.3k | e[k + 4] = ee[3 - k] - eo[3 - k]; |
223 | 53.3k | } |
224 | 119k | for(k = 0; k < 8; k++) |
225 | 106k | { |
226 | 106k | WORD32 itrans_out; |
227 | 106k | itrans_out = |
228 | 106k | CLIP_S16(((e[k] + o[k] + add) >> shift)); |
229 | 106k | pu1_dst[k] = CLIP_U8((itrans_out + pu1_pred[k])); |
230 | 106k | itrans_out = |
231 | 106k | CLIP_S16(((e[7 - k] - o[7 - k] + add) >> shift)); |
232 | 106k | pu1_dst[k + 8] = CLIP_U8((itrans_out + pu1_pred[k + 8])); |
233 | 106k | } |
234 | 13.3k | pi2_tmp++; |
235 | 13.3k | pu1_pred += pred_strd; |
236 | 13.3k | pu1_dst += dst_strd; |
237 | 13.3k | } |
238 | 833 | } |
239 | 5.56k | else if((zero_rows_2nd_stage & 0xFF00) == 0xFF00) /* First 4 rows of output of 1st stage are non-zero */ |
240 | 149 | { |
241 | 2.53k | for(j = 0; j < trans_size; j++) |
242 | 2.38k | { |
243 | | /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */ |
244 | 21.4k | for(k = 0; k < 8; k++) |
245 | 19.0k | { |
246 | 19.0k | o[k] = g_ai2_ihevc_trans_16[1][k] * pi2_tmp[trans_size] |
247 | 19.0k | + g_ai2_ihevc_trans_16[3][k] |
248 | 19.0k | * pi2_tmp[3 * trans_size] |
249 | 19.0k | + g_ai2_ihevc_trans_16[5][k] |
250 | 19.0k | * pi2_tmp[5 * trans_size] |
251 | 19.0k | + g_ai2_ihevc_trans_16[7][k] |
252 | 19.0k | * pi2_tmp[7 * trans_size]; |
253 | 19.0k | } |
254 | 11.9k | for(k = 0; k < 4; k++) |
255 | 9.53k | { |
256 | 9.53k | eo[k] = g_ai2_ihevc_trans_16[2][k] * pi2_tmp[2 * trans_size] |
257 | 9.53k | + g_ai2_ihevc_trans_16[6][k] |
258 | 9.53k | * pi2_tmp[6 * trans_size]; |
259 | 9.53k | } |
260 | 2.38k | eeo[0] = g_ai2_ihevc_trans_16[4][0] * pi2_tmp[4 * trans_size]; |
261 | 2.38k | eee[0] = g_ai2_ihevc_trans_16[0][0] * pi2_tmp[0]; |
262 | 2.38k | eeo[1] = g_ai2_ihevc_trans_16[4][1] * pi2_tmp[4 * trans_size]; |
263 | 2.38k | eee[1] = g_ai2_ihevc_trans_16[0][1] * pi2_tmp[0]; |
264 | | |
265 | | /* Combining e and o terms at each hierarchy levels to calculate the final spatial domain vector */ |
266 | 7.15k | for(k = 0; k < 2; k++) |
267 | 4.76k | { |
268 | 4.76k | ee[k] = eee[k] + eeo[k]; |
269 | 4.76k | ee[k + 2] = eee[1 - k] - eeo[1 - k]; |
270 | 4.76k | } |
271 | 11.9k | for(k = 0; k < 4; k++) |
272 | 9.53k | { |
273 | 9.53k | e[k] = ee[k] + eo[k]; |
274 | 9.53k | e[k + 4] = ee[3 - k] - eo[3 - k]; |
275 | 9.53k | } |
276 | 21.4k | for(k = 0; k < 8; k++) |
277 | 19.0k | { |
278 | 19.0k | WORD32 itrans_out; |
279 | 19.0k | itrans_out = |
280 | 19.0k | CLIP_S16(((e[k] + o[k] + add) >> shift)); |
281 | 19.0k | pu1_dst[k] = CLIP_U8((itrans_out + pu1_pred[k])); |
282 | 19.0k | itrans_out = |
283 | 19.0k | CLIP_S16(((e[7 - k] - o[7 - k] + add) >> shift)); |
284 | 19.0k | pu1_dst[k + 8] = CLIP_U8((itrans_out + pu1_pred[k + 8])); |
285 | 19.0k | } |
286 | 2.38k | pi2_tmp++; |
287 | 2.38k | pu1_pred += pred_strd; |
288 | 2.38k | pu1_dst += dst_strd; |
289 | 2.38k | } |
290 | 149 | } |
291 | 5.42k | else /* All rows of output of 1st stage are non-zero */ |
292 | 5.42k | { |
293 | 92.1k | for(j = 0; j < trans_size; j++) |
294 | 86.7k | { |
295 | | /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */ |
296 | 780k | for(k = 0; k < 8; k++) |
297 | 693k | { |
298 | 693k | o[k] = g_ai2_ihevc_trans_16[1][k] * pi2_tmp[trans_size] |
299 | 693k | + g_ai2_ihevc_trans_16[3][k] |
300 | 693k | * pi2_tmp[3 * trans_size] |
301 | 693k | + g_ai2_ihevc_trans_16[5][k] |
302 | 693k | * pi2_tmp[5 * trans_size] |
303 | 693k | + g_ai2_ihevc_trans_16[7][k] |
304 | 693k | * pi2_tmp[7 * trans_size] |
305 | 693k | + g_ai2_ihevc_trans_16[9][k] |
306 | 693k | * pi2_tmp[9 * trans_size] |
307 | 693k | + g_ai2_ihevc_trans_16[11][k] |
308 | 693k | * pi2_tmp[11 * trans_size] |
309 | 693k | + g_ai2_ihevc_trans_16[13][k] |
310 | 693k | * pi2_tmp[13 * trans_size] |
311 | 693k | + g_ai2_ihevc_trans_16[15][k] |
312 | 693k | * pi2_tmp[15 * trans_size]; |
313 | 693k | } |
314 | 433k | for(k = 0; k < 4; k++) |
315 | 346k | { |
316 | 346k | eo[k] = g_ai2_ihevc_trans_16[2][k] * pi2_tmp[2 * trans_size] |
317 | 346k | + g_ai2_ihevc_trans_16[6][k] |
318 | 346k | * pi2_tmp[6 * trans_size] |
319 | 346k | + g_ai2_ihevc_trans_16[10][k] |
320 | 346k | * pi2_tmp[10 * trans_size] |
321 | 346k | + g_ai2_ihevc_trans_16[14][k] |
322 | 346k | * pi2_tmp[14 * trans_size]; |
323 | 346k | } |
324 | 86.7k | eeo[0] = |
325 | 86.7k | g_ai2_ihevc_trans_16[4][0] * pi2_tmp[4 * trans_size] |
326 | 86.7k | + g_ai2_ihevc_trans_16[12][0] |
327 | 86.7k | * pi2_tmp[12 |
328 | 86.7k | * trans_size]; |
329 | 86.7k | eee[0] = g_ai2_ihevc_trans_16[0][0] * pi2_tmp[0] |
330 | 86.7k | + g_ai2_ihevc_trans_16[8][0] * pi2_tmp[8 * trans_size]; |
331 | 86.7k | eeo[1] = |
332 | 86.7k | g_ai2_ihevc_trans_16[4][1] * pi2_tmp[4 * trans_size] |
333 | 86.7k | + g_ai2_ihevc_trans_16[12][1] |
334 | 86.7k | * pi2_tmp[12 |
335 | 86.7k | * trans_size]; |
336 | 86.7k | eee[1] = g_ai2_ihevc_trans_16[0][1] * pi2_tmp[0] |
337 | 86.7k | + g_ai2_ihevc_trans_16[8][1] * pi2_tmp[8 * trans_size]; |
338 | | |
339 | | /* Combining e and o terms at each hierarchy levels to calculate the final spatial domain vector */ |
340 | 260k | for(k = 0; k < 2; k++) |
341 | 173k | { |
342 | 173k | ee[k] = eee[k] + eeo[k]; |
343 | 173k | ee[k + 2] = eee[1 - k] - eeo[1 - k]; |
344 | 173k | } |
345 | 433k | for(k = 0; k < 4; k++) |
346 | 346k | { |
347 | 346k | e[k] = ee[k] + eo[k]; |
348 | 346k | e[k + 4] = ee[3 - k] - eo[3 - k]; |
349 | 346k | } |
350 | 780k | for(k = 0; k < 8; k++) |
351 | 693k | { |
352 | 693k | WORD32 itrans_out; |
353 | 693k | itrans_out = |
354 | 693k | CLIP_S16(((e[k] + o[k] + add) >> shift)); |
355 | 693k | pu1_dst[k] = CLIP_U8((itrans_out + pu1_pred[k])); |
356 | 693k | itrans_out = |
357 | 693k | CLIP_S16(((e[7 - k] - o[7 - k] + add) >> shift)); |
358 | 693k | pu1_dst[k + 8] = CLIP_U8((itrans_out + pu1_pred[k + 8])); |
359 | 693k | } |
360 | 86.7k | pi2_tmp++; |
361 | 86.7k | pu1_pred += pred_strd; |
362 | 86.7k | pu1_dst += dst_strd; |
363 | 86.7k | } |
364 | 5.42k | } |
365 | | /************************************************************************************************/ |
366 | | /************************************END - IT_RECON_16x16****************************************/ |
367 | | /************************************************************************************************/ |
368 | 6.40k | } |
369 | 50.8k | else if((zero_rows & 0xFF00) == 0xFF00) /* First 8 rows of input are non-zero */ |
370 | 3.69k | { |
371 | | /* Inverse Transform 1st stage */ |
372 | | /************************************************************************************************/ |
373 | | /**********************************START - IT_RECON_16x16****************************************/ |
374 | | /************************************************************************************************/ |
375 | | |
376 | 3.69k | shift = IT_SHIFT_STAGE_1; |
377 | 3.69k | add = 1 << (shift - 1); |
378 | | |
379 | 55.0k | for(j = 0; j < row_limit_2nd_stage; j++) |
380 | 51.3k | { |
381 | | /* Checking for Zero Cols */ |
382 | 51.3k | if((zero_cols & 1) == 1) |
383 | 3.56k | { |
384 | 3.56k | memset(pi2_tmp, 0, trans_size * sizeof(WORD16)); |
385 | 3.56k | } |
386 | 47.7k | else |
387 | 47.7k | { |
388 | | /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */ |
389 | 429k | for(k = 0; k < 8; k++) |
390 | 382k | { |
391 | 382k | o[k] = g_ai2_ihevc_trans_16[1][k] * pi2_src[src_strd] |
392 | 382k | + g_ai2_ihevc_trans_16[3][k] |
393 | 382k | * pi2_src[3 * src_strd] |
394 | 382k | + g_ai2_ihevc_trans_16[5][k] |
395 | 382k | * pi2_src[5 * src_strd] |
396 | 382k | + g_ai2_ihevc_trans_16[7][k] |
397 | 382k | * pi2_src[7 * src_strd]; |
398 | 382k | } |
399 | 238k | for(k = 0; k < 4; k++) |
400 | 191k | { |
401 | 191k | eo[k] = g_ai2_ihevc_trans_16[2][k] * pi2_src[2 * src_strd] |
402 | 191k | + g_ai2_ihevc_trans_16[6][k] |
403 | 191k | * pi2_src[6 * src_strd]; |
404 | 191k | } |
405 | 47.7k | eeo[0] = g_ai2_ihevc_trans_16[4][0] * pi2_src[4 * src_strd]; |
406 | 47.7k | eee[0] = g_ai2_ihevc_trans_16[0][0] * pi2_src[0]; |
407 | 47.7k | eeo[1] = g_ai2_ihevc_trans_16[4][1] * pi2_src[4 * src_strd]; |
408 | 47.7k | eee[1] = g_ai2_ihevc_trans_16[0][1] * pi2_src[0]; |
409 | | |
410 | | /* Combining e and o terms at each hierarchy levels to calculate the final spatial domain vector */ |
411 | 143k | for(k = 0; k < 2; k++) |
412 | 95.5k | { |
413 | 95.5k | ee[k] = eee[k] + eeo[k]; |
414 | 95.5k | ee[k + 2] = eee[1 - k] - eeo[1 - k]; |
415 | 95.5k | } |
416 | 238k | for(k = 0; k < 4; k++) |
417 | 191k | { |
418 | 191k | e[k] = ee[k] + eo[k]; |
419 | 191k | e[k + 4] = ee[3 - k] - eo[3 - k]; |
420 | 191k | } |
421 | 429k | for(k = 0; k < 8; k++) |
422 | 382k | { |
423 | 382k | pi2_tmp[k] = |
424 | 382k | CLIP_S16(((e[k] + o[k] + add) >> shift)); |
425 | 382k | pi2_tmp[k + 8] = |
426 | 382k | CLIP_S16(((e[7 - k] - o[7 - k] + add) >> shift)); |
427 | 382k | } |
428 | 47.7k | } |
429 | 51.3k | pi2_src++; |
430 | 51.3k | pi2_tmp += trans_size; |
431 | 51.3k | zero_cols = zero_cols >> 1; |
432 | 51.3k | } |
433 | | |
434 | 3.69k | pi2_tmp = pi2_tmp_orig; |
435 | | |
436 | | /* Inverse Transform 2nd stage */ |
437 | 3.69k | shift = IT_SHIFT_STAGE_2; |
438 | 3.69k | add = 1 << (shift - 1); |
439 | | |
440 | 3.69k | if((zero_rows_2nd_stage & 0xFFF0) == 0xFFF0) /* First 4 rows of output of 1st stage are non-zero */ |
441 | 484 | { |
442 | 8.22k | for(j = 0; j < trans_size; j++) |
443 | 7.74k | { |
444 | | /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */ |
445 | 69.6k | for(k = 0; k < 8; k++) |
446 | 61.9k | { |
447 | 61.9k | o[k] = g_ai2_ihevc_trans_16[1][k] * pi2_tmp[trans_size] |
448 | 61.9k | + g_ai2_ihevc_trans_16[3][k] |
449 | 61.9k | * pi2_tmp[3 * trans_size]; |
450 | 61.9k | } |
451 | 38.7k | for(k = 0; k < 4; k++) |
452 | 30.9k | { |
453 | 30.9k | eo[k] = g_ai2_ihevc_trans_16[2][k] * pi2_tmp[2 * trans_size]; |
454 | 30.9k | } |
455 | 7.74k | eeo[0] = 0; |
456 | 7.74k | eee[0] = g_ai2_ihevc_trans_16[0][0] * pi2_tmp[0]; |
457 | 7.74k | eeo[1] = 0; |
458 | 7.74k | eee[1] = g_ai2_ihevc_trans_16[0][1] * pi2_tmp[0]; |
459 | | |
460 | | /* Combining e and o terms at each hierarchy levels to calculate the final spatial domain vector */ |
461 | 23.2k | for(k = 0; k < 2; k++) |
462 | 15.4k | { |
463 | 15.4k | ee[k] = eee[k] + eeo[k]; |
464 | 15.4k | ee[k + 2] = eee[1 - k] - eeo[1 - k]; |
465 | 15.4k | } |
466 | 38.7k | for(k = 0; k < 4; k++) |
467 | 30.9k | { |
468 | 30.9k | e[k] = ee[k] + eo[k]; |
469 | 30.9k | e[k + 4] = ee[3 - k] - eo[3 - k]; |
470 | 30.9k | } |
471 | 69.6k | for(k = 0; k < 8; k++) |
472 | 61.9k | { |
473 | 61.9k | WORD32 itrans_out; |
474 | 61.9k | itrans_out = |
475 | 61.9k | CLIP_S16(((e[k] + o[k] + add) >> shift)); |
476 | 61.9k | pu1_dst[k] = CLIP_U8((itrans_out + pu1_pred[k])); |
477 | 61.9k | itrans_out = |
478 | 61.9k | CLIP_S16(((e[7 - k] - o[7 - k] + add) >> shift)); |
479 | 61.9k | pu1_dst[k + 8] = CLIP_U8((itrans_out + pu1_pred[k + 8])); |
480 | 61.9k | } |
481 | 7.74k | pi2_tmp++; |
482 | 7.74k | pu1_pred += pred_strd; |
483 | 7.74k | pu1_dst += dst_strd; |
484 | 7.74k | } |
485 | 484 | } |
486 | 3.20k | else if((zero_rows_2nd_stage & 0xFF00) == 0xFF00) /* First 4 rows of output of 1st stage are non-zero */ |
487 | 237 | { |
488 | 4.02k | for(j = 0; j < trans_size; j++) |
489 | 3.79k | { |
490 | | /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */ |
491 | 34.1k | for(k = 0; k < 8; k++) |
492 | 30.3k | { |
493 | 30.3k | o[k] = g_ai2_ihevc_trans_16[1][k] * pi2_tmp[trans_size] |
494 | 30.3k | + g_ai2_ihevc_trans_16[3][k] |
495 | 30.3k | * pi2_tmp[3 * trans_size] |
496 | 30.3k | + g_ai2_ihevc_trans_16[5][k] |
497 | 30.3k | * pi2_tmp[5 * trans_size] |
498 | 30.3k | + g_ai2_ihevc_trans_16[7][k] |
499 | 30.3k | * pi2_tmp[7 * trans_size]; |
500 | 30.3k | } |
501 | 18.9k | for(k = 0; k < 4; k++) |
502 | 15.1k | { |
503 | 15.1k | eo[k] = g_ai2_ihevc_trans_16[2][k] * pi2_tmp[2 * trans_size] |
504 | 15.1k | + g_ai2_ihevc_trans_16[6][k] |
505 | 15.1k | * pi2_tmp[6 * trans_size]; |
506 | 15.1k | } |
507 | 3.79k | eeo[0] = g_ai2_ihevc_trans_16[4][0] * pi2_tmp[4 * trans_size]; |
508 | 3.79k | eee[0] = g_ai2_ihevc_trans_16[0][0] * pi2_tmp[0]; |
509 | 3.79k | eeo[1] = g_ai2_ihevc_trans_16[4][1] * pi2_tmp[4 * trans_size]; |
510 | 3.79k | eee[1] = g_ai2_ihevc_trans_16[0][1] * pi2_tmp[0]; |
511 | | |
512 | | /* Combining e and o terms at each hierarchy levels to calculate the final spatial domain vector */ |
513 | 11.3k | for(k = 0; k < 2; k++) |
514 | 7.58k | { |
515 | 7.58k | ee[k] = eee[k] + eeo[k]; |
516 | 7.58k | ee[k + 2] = eee[1 - k] - eeo[1 - k]; |
517 | 7.58k | } |
518 | 18.9k | for(k = 0; k < 4; k++) |
519 | 15.1k | { |
520 | 15.1k | e[k] = ee[k] + eo[k]; |
521 | 15.1k | e[k + 4] = ee[3 - k] - eo[3 - k]; |
522 | 15.1k | } |
523 | 34.1k | for(k = 0; k < 8; k++) |
524 | 30.3k | { |
525 | 30.3k | WORD32 itrans_out; |
526 | 30.3k | itrans_out = |
527 | 30.3k | CLIP_S16(((e[k] + o[k] + add) >> shift)); |
528 | 30.3k | pu1_dst[k] = CLIP_U8((itrans_out + pu1_pred[k])); |
529 | 30.3k | itrans_out = |
530 | 30.3k | CLIP_S16(((e[7 - k] - o[7 - k] + add) >> shift)); |
531 | 30.3k | pu1_dst[k + 8] = CLIP_U8((itrans_out + pu1_pred[k + 8])); |
532 | 30.3k | } |
533 | 3.79k | pi2_tmp++; |
534 | 3.79k | pu1_pred += pred_strd; |
535 | 3.79k | pu1_dst += dst_strd; |
536 | 3.79k | } |
537 | 237 | } |
538 | 2.96k | else /* All rows of output of 1st stage are non-zero */ |
539 | 2.96k | { |
540 | 50.4k | for(j = 0; j < trans_size; j++) |
541 | 47.5k | { |
542 | | /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */ |
543 | 427k | for(k = 0; k < 8; k++) |
544 | 380k | { |
545 | 380k | o[k] = g_ai2_ihevc_trans_16[1][k] * pi2_tmp[trans_size] |
546 | 380k | + g_ai2_ihevc_trans_16[3][k] |
547 | 380k | * pi2_tmp[3 * trans_size] |
548 | 380k | + g_ai2_ihevc_trans_16[5][k] |
549 | 380k | * pi2_tmp[5 * trans_size] |
550 | 380k | + g_ai2_ihevc_trans_16[7][k] |
551 | 380k | * pi2_tmp[7 * trans_size] |
552 | 380k | + g_ai2_ihevc_trans_16[9][k] |
553 | 380k | * pi2_tmp[9 * trans_size] |
554 | 380k | + g_ai2_ihevc_trans_16[11][k] |
555 | 380k | * pi2_tmp[11 * trans_size] |
556 | 380k | + g_ai2_ihevc_trans_16[13][k] |
557 | 380k | * pi2_tmp[13 * trans_size] |
558 | 380k | + g_ai2_ihevc_trans_16[15][k] |
559 | 380k | * pi2_tmp[15 * trans_size]; |
560 | 380k | } |
561 | 237k | for(k = 0; k < 4; k++) |
562 | 190k | { |
563 | 190k | eo[k] = g_ai2_ihevc_trans_16[2][k] * pi2_tmp[2 * trans_size] |
564 | 190k | + g_ai2_ihevc_trans_16[6][k] |
565 | 190k | * pi2_tmp[6 * trans_size] |
566 | 190k | + g_ai2_ihevc_trans_16[10][k] |
567 | 190k | * pi2_tmp[10 * trans_size] |
568 | 190k | + g_ai2_ihevc_trans_16[14][k] |
569 | 190k | * pi2_tmp[14 * trans_size]; |
570 | 190k | } |
571 | 47.5k | eeo[0] = |
572 | 47.5k | g_ai2_ihevc_trans_16[4][0] * pi2_tmp[4 * trans_size] |
573 | 47.5k | + g_ai2_ihevc_trans_16[12][0] |
574 | 47.5k | * pi2_tmp[12 |
575 | 47.5k | * trans_size]; |
576 | 47.5k | eee[0] = g_ai2_ihevc_trans_16[0][0] * pi2_tmp[0] |
577 | 47.5k | + g_ai2_ihevc_trans_16[8][0] * pi2_tmp[8 * trans_size]; |
578 | 47.5k | eeo[1] = |
579 | 47.5k | g_ai2_ihevc_trans_16[4][1] * pi2_tmp[4 * trans_size] |
580 | 47.5k | + g_ai2_ihevc_trans_16[12][1] |
581 | 47.5k | * pi2_tmp[12 |
582 | 47.5k | * trans_size]; |
583 | 47.5k | eee[1] = g_ai2_ihevc_trans_16[0][1] * pi2_tmp[0] |
584 | 47.5k | + g_ai2_ihevc_trans_16[8][1] * pi2_tmp[8 * trans_size]; |
585 | | |
586 | | /* Combining e and o terms at each hierarchy levels to calculate the final spatial domain vector */ |
587 | 142k | for(k = 0; k < 2; k++) |
588 | 95.0k | { |
589 | 95.0k | ee[k] = eee[k] + eeo[k]; |
590 | 95.0k | ee[k + 2] = eee[1 - k] - eeo[1 - k]; |
591 | 95.0k | } |
592 | 237k | for(k = 0; k < 4; k++) |
593 | 190k | { |
594 | 190k | e[k] = ee[k] + eo[k]; |
595 | 190k | e[k + 4] = ee[3 - k] - eo[3 - k]; |
596 | 190k | } |
597 | 427k | for(k = 0; k < 8; k++) |
598 | 380k | { |
599 | 380k | WORD32 itrans_out; |
600 | 380k | itrans_out = |
601 | 380k | CLIP_S16(((e[k] + o[k] + add) >> shift)); |
602 | 380k | pu1_dst[k] = CLIP_U8((itrans_out + pu1_pred[k])); |
603 | 380k | itrans_out = |
604 | 380k | CLIP_S16(((e[7 - k] - o[7 - k] + add) >> shift)); |
605 | 380k | pu1_dst[k + 8] = CLIP_U8((itrans_out + pu1_pred[k + 8])); |
606 | 380k | } |
607 | 47.5k | pi2_tmp++; |
608 | 47.5k | pu1_pred += pred_strd; |
609 | 47.5k | pu1_dst += dst_strd; |
610 | 47.5k | } |
611 | 2.96k | } |
612 | | /************************************************************************************************/ |
613 | | /************************************END - IT_RECON_16x16****************************************/ |
614 | | /************************************************************************************************/ |
615 | 3.69k | } |
616 | 47.1k | else /* All rows of input are non-zero */ |
617 | 47.1k | { |
618 | | /* Inverse Transform 1st stage */ |
619 | | /************************************************************************************************/ |
620 | | /**********************************START - IT_RECON_16x16****************************************/ |
621 | | /************************************************************************************************/ |
622 | | |
623 | 47.1k | shift = IT_SHIFT_STAGE_1; |
624 | 47.1k | add = 1 << (shift - 1); |
625 | | |
626 | 759k | for(j = 0; j < row_limit_2nd_stage; j++) |
627 | 712k | { |
628 | | /* Checking for Zero Cols */ |
629 | 712k | if((zero_cols & 1) == 1) |
630 | 10.0k | { |
631 | 10.0k | memset(pi2_tmp, 0, trans_size * sizeof(WORD16)); |
632 | 10.0k | } |
633 | 702k | else |
634 | 702k | { |
635 | | /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */ |
636 | 6.31M | for(k = 0; k < 8; k++) |
637 | 5.61M | { |
638 | 5.61M | o[k] = g_ai2_ihevc_trans_16[1][k] * pi2_src[src_strd] |
639 | 5.61M | + g_ai2_ihevc_trans_16[3][k] |
640 | 5.61M | * pi2_src[3 * src_strd] |
641 | 5.61M | + g_ai2_ihevc_trans_16[5][k] |
642 | 5.61M | * pi2_src[5 * src_strd] |
643 | 5.61M | + g_ai2_ihevc_trans_16[7][k] |
644 | 5.61M | * pi2_src[7 * src_strd] |
645 | 5.61M | + g_ai2_ihevc_trans_16[9][k] |
646 | 5.61M | * pi2_src[9 * src_strd] |
647 | 5.61M | + g_ai2_ihevc_trans_16[11][k] |
648 | 5.61M | * pi2_src[11 * src_strd] |
649 | 5.61M | + g_ai2_ihevc_trans_16[13][k] |
650 | 5.61M | * pi2_src[13 * src_strd] |
651 | 5.61M | + g_ai2_ihevc_trans_16[15][k] |
652 | 5.61M | * pi2_src[15 * src_strd]; |
653 | 5.61M | } |
654 | 3.51M | for(k = 0; k < 4; k++) |
655 | 2.80M | { |
656 | 2.80M | eo[k] = g_ai2_ihevc_trans_16[2][k] * pi2_src[2 * src_strd] |
657 | 2.80M | + g_ai2_ihevc_trans_16[6][k] |
658 | 2.80M | * pi2_src[6 * src_strd] |
659 | 2.80M | + g_ai2_ihevc_trans_16[10][k] |
660 | 2.80M | * pi2_src[10 * src_strd] |
661 | 2.80M | + g_ai2_ihevc_trans_16[14][k] |
662 | 2.80M | * pi2_src[14 * src_strd]; |
663 | 2.80M | } |
664 | 702k | eeo[0] = g_ai2_ihevc_trans_16[4][0] * pi2_src[4 * src_strd] |
665 | 702k | + g_ai2_ihevc_trans_16[12][0] |
666 | 702k | * pi2_src[12 * src_strd]; |
667 | 702k | eee[0] = |
668 | 702k | g_ai2_ihevc_trans_16[0][0] * pi2_src[0] |
669 | 702k | + g_ai2_ihevc_trans_16[8][0] |
670 | 702k | * pi2_src[8 |
671 | 702k | * src_strd]; |
672 | 702k | eeo[1] = g_ai2_ihevc_trans_16[4][1] * pi2_src[4 * src_strd] |
673 | 702k | + g_ai2_ihevc_trans_16[12][1] |
674 | 702k | * pi2_src[12 * src_strd]; |
675 | 702k | eee[1] = |
676 | 702k | g_ai2_ihevc_trans_16[0][1] * pi2_src[0] |
677 | 702k | + g_ai2_ihevc_trans_16[8][1] |
678 | 702k | * pi2_src[8 |
679 | 702k | * src_strd]; |
680 | | |
681 | | /* Combining e and o terms at each hierarchy levels to calculate the final spatial domain vector */ |
682 | 2.10M | for(k = 0; k < 2; k++) |
683 | 1.40M | { |
684 | 1.40M | ee[k] = eee[k] + eeo[k]; |
685 | 1.40M | ee[k + 2] = eee[1 - k] - eeo[1 - k]; |
686 | 1.40M | } |
687 | 3.51M | for(k = 0; k < 4; k++) |
688 | 2.80M | { |
689 | 2.80M | e[k] = ee[k] + eo[k]; |
690 | 2.80M | e[k + 4] = ee[3 - k] - eo[3 - k]; |
691 | 2.80M | } |
692 | 6.31M | for(k = 0; k < 8; k++) |
693 | 5.61M | { |
694 | 5.61M | pi2_tmp[k] = |
695 | 5.61M | CLIP_S16(((e[k] + o[k] + add) >> shift)); |
696 | 5.61M | pi2_tmp[k + 8] = |
697 | 5.61M | CLIP_S16(((e[7 - k] - o[7 - k] + add) >> shift)); |
698 | 5.61M | } |
699 | 702k | } |
700 | 712k | pi2_src++; |
701 | 712k | pi2_tmp += trans_size; |
702 | 712k | zero_cols = zero_cols >> 1; |
703 | 712k | } |
704 | | |
705 | 47.1k | pi2_tmp = pi2_tmp_orig; |
706 | | |
707 | | /* Inverse Transform 2nd stage */ |
708 | 47.1k | shift = IT_SHIFT_STAGE_2; |
709 | 47.1k | add = 1 << (shift - 1); |
710 | | |
711 | 47.1k | if((zero_rows_2nd_stage & 0xFFF0) == 0xFFF0) /* First 4 rows of output of 1st stage are non-zero */ |
712 | 2.44k | { |
713 | 41.5k | for(j = 0; j < trans_size; j++) |
714 | 39.1k | { |
715 | | /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */ |
716 | 352k | for(k = 0; k < 8; k++) |
717 | 313k | { |
718 | 313k | o[k] = g_ai2_ihevc_trans_16[1][k] * pi2_tmp[trans_size] |
719 | 313k | + g_ai2_ihevc_trans_16[3][k] |
720 | 313k | * pi2_tmp[3 * trans_size]; |
721 | 313k | } |
722 | 195k | for(k = 0; k < 4; k++) |
723 | 156k | { |
724 | 156k | eo[k] = g_ai2_ihevc_trans_16[2][k] * pi2_tmp[2 * trans_size]; |
725 | 156k | } |
726 | 39.1k | eeo[0] = 0; |
727 | 39.1k | eee[0] = g_ai2_ihevc_trans_16[0][0] * pi2_tmp[0]; |
728 | 39.1k | eeo[1] = 0; |
729 | 39.1k | eee[1] = g_ai2_ihevc_trans_16[0][1] * pi2_tmp[0]; |
730 | | |
731 | | /* Combining e and o terms at each hierarchy levels to calculate the final spatial domain vector */ |
732 | 117k | for(k = 0; k < 2; k++) |
733 | 78.3k | { |
734 | 78.3k | ee[k] = eee[k] + eeo[k]; |
735 | 78.3k | ee[k + 2] = eee[1 - k] - eeo[1 - k]; |
736 | 78.3k | } |
737 | 195k | for(k = 0; k < 4; k++) |
738 | 156k | { |
739 | 156k | e[k] = ee[k] + eo[k]; |
740 | 156k | e[k + 4] = ee[3 - k] - eo[3 - k]; |
741 | 156k | } |
742 | 352k | for(k = 0; k < 8; k++) |
743 | 313k | { |
744 | 313k | WORD32 itrans_out; |
745 | 313k | itrans_out = |
746 | 313k | CLIP_S16(((e[k] + o[k] + add) >> shift)); |
747 | 313k | pu1_dst[k] = CLIP_U8((itrans_out + pu1_pred[k])); |
748 | 313k | itrans_out = |
749 | 313k | CLIP_S16(((e[7 - k] - o[7 - k] + add) >> shift)); |
750 | 313k | pu1_dst[k + 8] = CLIP_U8((itrans_out + pu1_pred[k + 8])); |
751 | 313k | } |
752 | 39.1k | pi2_tmp++; |
753 | 39.1k | pu1_pred += pred_strd; |
754 | 39.1k | pu1_dst += dst_strd; |
755 | 39.1k | } |
756 | 2.44k | } |
757 | 44.6k | else if((zero_rows_2nd_stage & 0xFF00) == 0xFF00) /* First 4 rows of output of 1st stage are non-zero */ |
758 | 1.56k | { |
759 | 26.6k | for(j = 0; j < trans_size; j++) |
760 | 25.0k | { |
761 | | /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */ |
762 | 225k | for(k = 0; k < 8; k++) |
763 | 200k | { |
764 | 200k | o[k] = g_ai2_ihevc_trans_16[1][k] * pi2_tmp[trans_size] |
765 | 200k | + g_ai2_ihevc_trans_16[3][k] |
766 | 200k | * pi2_tmp[3 * trans_size] |
767 | 200k | + g_ai2_ihevc_trans_16[5][k] |
768 | 200k | * pi2_tmp[5 * trans_size] |
769 | 200k | + g_ai2_ihevc_trans_16[7][k] |
770 | 200k | * pi2_tmp[7 * trans_size]; |
771 | 200k | } |
772 | 125k | for(k = 0; k < 4; k++) |
773 | 100k | { |
774 | 100k | eo[k] = g_ai2_ihevc_trans_16[2][k] * pi2_tmp[2 * trans_size] |
775 | 100k | + g_ai2_ihevc_trans_16[6][k] |
776 | 100k | * pi2_tmp[6 * trans_size]; |
777 | 100k | } |
778 | 25.0k | eeo[0] = g_ai2_ihevc_trans_16[4][0] * pi2_tmp[4 * trans_size]; |
779 | 25.0k | eee[0] = g_ai2_ihevc_trans_16[0][0] * pi2_tmp[0]; |
780 | 25.0k | eeo[1] = g_ai2_ihevc_trans_16[4][1] * pi2_tmp[4 * trans_size]; |
781 | 25.0k | eee[1] = g_ai2_ihevc_trans_16[0][1] * pi2_tmp[0]; |
782 | | |
783 | | /* Combining e and o terms at each hierarchy levels to calculate the final spatial domain vector */ |
784 | 75.2k | for(k = 0; k < 2; k++) |
785 | 50.1k | { |
786 | 50.1k | ee[k] = eee[k] + eeo[k]; |
787 | 50.1k | ee[k + 2] = eee[1 - k] - eeo[1 - k]; |
788 | 50.1k | } |
789 | 125k | for(k = 0; k < 4; k++) |
790 | 100k | { |
791 | 100k | e[k] = ee[k] + eo[k]; |
792 | 100k | e[k + 4] = ee[3 - k] - eo[3 - k]; |
793 | 100k | } |
794 | 225k | for(k = 0; k < 8; k++) |
795 | 200k | { |
796 | 200k | WORD32 itrans_out; |
797 | 200k | itrans_out = |
798 | 200k | CLIP_S16(((e[k] + o[k] + add) >> shift)); |
799 | 200k | pu1_dst[k] = CLIP_U8((itrans_out + pu1_pred[k])); |
800 | 200k | itrans_out = |
801 | 200k | CLIP_S16(((e[7 - k] - o[7 - k] + add) >> shift)); |
802 | 200k | pu1_dst[k + 8] = CLIP_U8((itrans_out + pu1_pred[k + 8])); |
803 | 200k | } |
804 | 25.0k | pi2_tmp++; |
805 | 25.0k | pu1_pred += pred_strd; |
806 | 25.0k | pu1_dst += dst_strd; |
807 | 25.0k | } |
808 | 1.56k | } |
809 | 43.1k | else /* All rows of output of 1st stage are non-zero */ |
810 | 43.1k | { |
811 | 732k | for(j = 0; j < trans_size; j++) |
812 | 689k | { |
813 | | /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */ |
814 | 6.20M | for(k = 0; k < 8; k++) |
815 | 5.51M | { |
816 | 5.51M | o[k] = g_ai2_ihevc_trans_16[1][k] * pi2_tmp[trans_size] |
817 | 5.51M | + g_ai2_ihevc_trans_16[3][k] |
818 | 5.51M | * pi2_tmp[3 * trans_size] |
819 | 5.51M | + g_ai2_ihevc_trans_16[5][k] |
820 | 5.51M | * pi2_tmp[5 * trans_size] |
821 | 5.51M | + g_ai2_ihevc_trans_16[7][k] |
822 | 5.51M | * pi2_tmp[7 * trans_size] |
823 | 5.51M | + g_ai2_ihevc_trans_16[9][k] |
824 | 5.51M | * pi2_tmp[9 * trans_size] |
825 | 5.51M | + g_ai2_ihevc_trans_16[11][k] |
826 | 5.51M | * pi2_tmp[11 * trans_size] |
827 | 5.51M | + g_ai2_ihevc_trans_16[13][k] |
828 | 5.51M | * pi2_tmp[13 * trans_size] |
829 | 5.51M | + g_ai2_ihevc_trans_16[15][k] |
830 | 5.51M | * pi2_tmp[15 * trans_size]; |
831 | 5.51M | } |
832 | 3.44M | for(k = 0; k < 4; k++) |
833 | 2.75M | { |
834 | 2.75M | eo[k] = g_ai2_ihevc_trans_16[2][k] * pi2_tmp[2 * trans_size] |
835 | 2.75M | + g_ai2_ihevc_trans_16[6][k] |
836 | 2.75M | * pi2_tmp[6 * trans_size] |
837 | 2.75M | + g_ai2_ihevc_trans_16[10][k] |
838 | 2.75M | * pi2_tmp[10 * trans_size] |
839 | 2.75M | + g_ai2_ihevc_trans_16[14][k] |
840 | 2.75M | * pi2_tmp[14 * trans_size]; |
841 | 2.75M | } |
842 | 689k | eeo[0] = |
843 | 689k | g_ai2_ihevc_trans_16[4][0] * pi2_tmp[4 * trans_size] |
844 | 689k | + g_ai2_ihevc_trans_16[12][0] |
845 | 689k | * pi2_tmp[12 |
846 | 689k | * trans_size]; |
847 | 689k | eee[0] = g_ai2_ihevc_trans_16[0][0] * pi2_tmp[0] |
848 | 689k | + g_ai2_ihevc_trans_16[8][0] * pi2_tmp[8 * trans_size]; |
849 | 689k | eeo[1] = |
850 | 689k | g_ai2_ihevc_trans_16[4][1] * pi2_tmp[4 * trans_size] |
851 | 689k | + g_ai2_ihevc_trans_16[12][1] |
852 | 689k | * pi2_tmp[12 |
853 | 689k | * trans_size]; |
854 | 689k | eee[1] = g_ai2_ihevc_trans_16[0][1] * pi2_tmp[0] |
855 | 689k | + g_ai2_ihevc_trans_16[8][1] * pi2_tmp[8 * trans_size]; |
856 | | |
857 | | /* Combining e and o terms at each hierarchy levels to calculate the final spatial domain vector */ |
858 | 2.06M | for(k = 0; k < 2; k++) |
859 | 1.37M | { |
860 | 1.37M | ee[k] = eee[k] + eeo[k]; |
861 | 1.37M | ee[k + 2] = eee[1 - k] - eeo[1 - k]; |
862 | 1.37M | } |
863 | 3.44M | for(k = 0; k < 4; k++) |
864 | 2.75M | { |
865 | 2.75M | e[k] = ee[k] + eo[k]; |
866 | 2.75M | e[k + 4] = ee[3 - k] - eo[3 - k]; |
867 | 2.75M | } |
868 | 6.20M | for(k = 0; k < 8; k++) |
869 | 5.51M | { |
870 | 5.51M | WORD32 itrans_out; |
871 | 5.51M | itrans_out = |
872 | 5.51M | CLIP_S16(((e[k] + o[k] + add) >> shift)); |
873 | 5.51M | pu1_dst[k] = CLIP_U8((itrans_out + pu1_pred[k])); |
874 | 5.51M | itrans_out = |
875 | 5.51M | CLIP_S16(((e[7 - k] - o[7 - k] + add) >> shift)); |
876 | 5.51M | pu1_dst[k + 8] = CLIP_U8((itrans_out + pu1_pred[k + 8])); |
877 | 5.51M | } |
878 | 689k | pi2_tmp++; |
879 | 689k | pu1_pred += pred_strd; |
880 | 689k | pu1_dst += dst_strd; |
881 | 689k | } |
882 | 43.1k | } |
883 | | /************************************************************************************************/ |
884 | | /************************************END - IT_RECON_16x16****************************************/ |
885 | | /************************************************************************************************/ |
886 | 47.1k | } |
887 | | |
888 | 57.2k | } |
889 | | |