/src/libhevc/common/ihevc_chroma_itrans_recon_16x16.c
Line | Count | Source |
1 | | /****************************************************************************** |
2 | | * |
3 | | * Copyright (C) 2012 Ittiam Systems Pvt Ltd, Bangalore |
4 | | * |
5 | | * Licensed under the Apache License, Version 2.0 (the "License"); |
6 | | * you may not use this file except in compliance with the License. |
7 | | * You may obtain a copy of the License at: |
8 | | * |
9 | | * http://www.apache.org/licenses/LICENSE-2.0 |
10 | | * |
11 | | * Unless required by applicable law or agreed to in writing, software |
12 | | * distributed under the License is distributed on an "AS IS" BASIS, |
13 | | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
14 | | * See the License for the specific language governing permissions and |
15 | | * limitations under the License. |
16 | | * |
17 | | ******************************************************************************/ |
18 | | /** |
19 | | ******************************************************************************* |
20 | | * @file |
21 | | * ihevc_chroma_itrans_recon_16x16.c |
22 | | * |
23 | | * @brief |
24 | | * Contains function definitions for 16x16 inverse transform and reconstruction |
25 | | * of chroma interleaved data. |
26 | | * |
27 | | * @author |
28 | | * 100470 |
29 | | * |
30 | | * @par List of Functions: |
31 | | * - ihevc_chroma_itrans_recon_16x16() |
32 | | * |
33 | | * @remarks |
34 | | * None |
35 | | * |
36 | | ******************************************************************************* |
37 | | */ |
38 | | |
39 | | #include <stdio.h> |
40 | | #include <string.h> |
41 | | #include "ihevc_typedefs.h" |
42 | | #include "ihevc_macros.h" |
43 | | #include "ihevc_platform_macros.h" |
44 | | #include "ihevc_defs.h" |
45 | | #include "ihevc_trans_tables.h" |
46 | | #include "ihevc_chroma_itrans_recon.h" |
47 | | #include "ihevc_func_selector.h" |
48 | | #include "ihevc_trans_macros.h" |
49 | | |
50 | | /* All the functions work one component(U or V) of interleaved data depending upon pointers passed to it */ |
51 | | /* Data visualization */ |
52 | | /* U V U V U V U V */ |
53 | | /* U V U V U V U V */ |
54 | | /* U V U V U V U V */ |
55 | | /* U V U V U V U V */ |
56 | | /* If the pointer points to first byte of above stream (U) , functions will operate on U component */ |
57 | | /* If the pointer points to second byte of above stream (V) , functions will operate on V component */ |
58 | | |
59 | | |
60 | | /** |
61 | | ******************************************************************************* |
62 | | * |
63 | | * @brief |
64 | | * This function performs Inverse transform and reconstruction for 16x16 |
65 | | * input block |
66 | | * |
67 | | * @par Description: |
68 | | * Performs inverse transform and adds the prediction data and clips output |
69 | | * to 8 bit |
70 | | * |
71 | | * @param[in] pi2_src |
72 | | * Input 16x16 coefficients |
73 | | * |
74 | | * @param[in] pi2_tmp |
75 | | * Temporary 16x16 buffer for storing inverse transform |
76 | | * 1st stage output |
77 | | * |
78 | | * @param[in] pu1_pred |
79 | | * Prediction 16x16 block |
80 | | * |
81 | | * @param[out] pu1_dst |
82 | | * Output 16x16 block |
83 | | * |
84 | | * @param[in] src_strd |
85 | | * Input stride |
86 | | * |
87 | | * @param[in] pred_strd |
88 | | * Prediction stride |
89 | | * |
90 | | * @param[in] dst_strd |
91 | | * Output Stride |
92 | | * |
93 | | * @param[in] shift |
94 | | * Output shift |
95 | | * |
96 | | * @param[in] zero_cols |
97 | | * Zero columns in pi2_src |
98 | | * |
99 | | * @returns Void |
100 | | * |
101 | | * @remarks |
102 | | * None |
103 | | * |
104 | | ******************************************************************************* |
105 | | */ |
106 | | |
107 | | |
108 | | void ihevc_chroma_itrans_recon_16x16(WORD16 *pi2_src, |
109 | | WORD16 *pi2_tmp, |
110 | | UWORD8 *pu1_pred, |
111 | | UWORD8 *pu1_dst, |
112 | | WORD32 src_strd, |
113 | | WORD32 pred_strd, |
114 | | WORD32 dst_strd, |
115 | | WORD32 zero_cols, |
116 | | WORD32 zero_rows) |
117 | 85.6k | { |
118 | 85.6k | WORD32 j, k; |
119 | 85.6k | WORD32 e[8], o[8]; |
120 | 85.6k | WORD32 ee[4], eo[4]; |
121 | 85.6k | WORD32 eee[2], eeo[2]; |
122 | 85.6k | WORD32 add; |
123 | 85.6k | WORD32 shift; |
124 | 85.6k | WORD16 *pi2_tmp_orig; |
125 | 85.6k | WORD32 trans_size; |
126 | 85.6k | WORD32 row_limit_2nd_stage, zero_rows_2nd_stage = zero_cols; |
127 | | |
128 | 85.6k | trans_size = TRANS_SIZE_16; |
129 | 85.6k | pi2_tmp_orig = pi2_tmp; |
130 | | |
131 | 85.6k | if((zero_cols & 0xFFF0) == 0xFFF0) |
132 | 41.3k | row_limit_2nd_stage = 4; |
133 | 44.2k | else if((zero_cols & 0xFF00) == 0xFF00) |
134 | 18.6k | row_limit_2nd_stage = 8; |
135 | 25.6k | else |
136 | 25.6k | row_limit_2nd_stage = TRANS_SIZE_16; |
137 | | |
138 | 85.6k | if((zero_rows & 0xFFF0) == 0xFFF0) /* First 4 rows of input are non-zero */ |
139 | 41.3k | { |
140 | | /************************************************************************************************/ |
141 | | /**********************************START - IT_RECON_16x16****************************************/ |
142 | | /************************************************************************************************/ |
143 | | |
144 | | /* Inverse Transform 1st stage */ |
145 | 41.3k | shift = IT_SHIFT_STAGE_1; |
146 | 41.3k | add = 1 << (shift - 1); |
147 | | |
148 | 343k | for(j = 0; j < row_limit_2nd_stage; j++) |
149 | 302k | { |
150 | | /* Checking for Zero Cols */ |
151 | 302k | if((zero_cols & 1) == 1) |
152 | 133k | { |
153 | 133k | memset(pi2_tmp, 0, trans_size * sizeof(WORD16)); |
154 | 133k | } |
155 | 168k | else |
156 | 168k | { |
157 | | /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */ |
158 | 1.51M | for(k = 0; k < 8; k++) |
159 | 1.34M | { |
160 | 1.34M | o[k] = g_ai2_ihevc_trans_16[1][k] * pi2_src[src_strd] |
161 | 1.34M | + g_ai2_ihevc_trans_16[3][k] |
162 | 1.34M | * pi2_src[3 * src_strd]; |
163 | 1.34M | } |
164 | 843k | for(k = 0; k < 4; k++) |
165 | 674k | { |
166 | 674k | eo[k] = g_ai2_ihevc_trans_16[2][k] * pi2_src[2 * src_strd]; |
167 | 674k | } |
168 | 168k | eeo[0] = 0; |
169 | 168k | eee[0] = g_ai2_ihevc_trans_16[0][0] * pi2_src[0]; |
170 | 168k | eeo[1] = 0; |
171 | 168k | eee[1] = g_ai2_ihevc_trans_16[0][1] * pi2_src[0]; |
172 | | |
173 | | /* Combining e and o terms at each hierarchy levels to calculate the final spatial domain vector */ |
174 | 506k | for(k = 0; k < 2; k++) |
175 | 337k | { |
176 | 337k | ee[k] = eee[k] + eeo[k]; |
177 | 337k | ee[k + 2] = eee[1 - k] - eeo[1 - k]; |
178 | 337k | } |
179 | 843k | for(k = 0; k < 4; k++) |
180 | 674k | { |
181 | 674k | e[k] = ee[k] + eo[k]; |
182 | 674k | e[k + 4] = ee[3 - k] - eo[3 - k]; |
183 | 674k | } |
184 | 1.51M | for(k = 0; k < 8; k++) |
185 | 1.34M | { |
186 | 1.34M | pi2_tmp[k] = |
187 | 1.34M | CLIP_S16(((e[k] + o[k] + add) >> shift)); |
188 | 1.34M | pi2_tmp[k + 8] = |
189 | 1.34M | CLIP_S16(((e[7 - k] - o[7 - k] + add) >> shift)); |
190 | 1.34M | } |
191 | 168k | } |
192 | 302k | pi2_src++; |
193 | 302k | pi2_tmp += trans_size; |
194 | 302k | zero_cols = zero_cols >> 1; |
195 | 302k | } |
196 | | |
197 | 41.3k | pi2_tmp = pi2_tmp_orig; |
198 | | |
199 | | /* Inverse Transform 2nd stage */ |
200 | 41.3k | shift = IT_SHIFT_STAGE_2; |
201 | 41.3k | add = 1 << (shift - 1); |
202 | 41.3k | if((zero_rows_2nd_stage & 0xFFF0) == 0xFFF0) /* First 4 rows of output of 1st stage are non-zero */ |
203 | 25.1k | { |
204 | 427k | for(j = 0; j < trans_size; j++) |
205 | 402k | { |
206 | | /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */ |
207 | 3.62M | for(k = 0; k < 8; k++) |
208 | 3.22M | { |
209 | 3.22M | o[k] = g_ai2_ihevc_trans_16[1][k] * pi2_tmp[trans_size] |
210 | 3.22M | + g_ai2_ihevc_trans_16[3][k] |
211 | 3.22M | * pi2_tmp[3 * trans_size]; |
212 | 3.22M | } |
213 | 2.01M | for(k = 0; k < 4; k++) |
214 | 1.61M | { |
215 | 1.61M | eo[k] = g_ai2_ihevc_trans_16[2][k] * pi2_tmp[2 * trans_size]; |
216 | 1.61M | } |
217 | 402k | eeo[0] = 0; |
218 | 402k | eee[0] = g_ai2_ihevc_trans_16[0][0] * pi2_tmp[0]; |
219 | 402k | eeo[1] = 0; |
220 | 402k | eee[1] = g_ai2_ihevc_trans_16[0][1] * pi2_tmp[0]; |
221 | | |
222 | | /* Combining e and o terms at each hierarchy levels to calculate the final spatial domain vector */ |
223 | 1.20M | for(k = 0; k < 2; k++) |
224 | 805k | { |
225 | 805k | ee[k] = eee[k] + eeo[k]; |
226 | 805k | ee[k + 2] = eee[1 - k] - eeo[1 - k]; |
227 | 805k | } |
228 | 2.01M | for(k = 0; k < 4; k++) |
229 | 1.61M | { |
230 | 1.61M | e[k] = ee[k] + eo[k]; |
231 | 1.61M | e[k + 4] = ee[3 - k] - eo[3 - k]; |
232 | 1.61M | } |
233 | 3.62M | for(k = 0; k < 8; k++) |
234 | 3.22M | { |
235 | 3.22M | WORD32 itrans_out; |
236 | 3.22M | itrans_out = |
237 | 3.22M | CLIP_S16(((e[k] + o[k] + add) >> shift)); |
238 | 3.22M | pu1_dst[k * 2] = CLIP_U8((itrans_out + pu1_pred[k * 2])); |
239 | 3.22M | itrans_out = |
240 | 3.22M | CLIP_S16(((e[7 - k] - o[7 - k] + add) >> shift)); |
241 | 3.22M | pu1_dst[(k + 8) * 2] = CLIP_U8((itrans_out + pu1_pred[(k + 8) * 2])); |
242 | 3.22M | } |
243 | 402k | pi2_tmp++; |
244 | 402k | pu1_pred += pred_strd; |
245 | 402k | pu1_dst += dst_strd; |
246 | 402k | } |
247 | 25.1k | } |
248 | 16.1k | else if((zero_rows_2nd_stage & 0xFF00) == 0xFF00) /* First 8 rows of output of 1st stage are non-zero */ |
249 | 7.19k | { |
250 | 122k | for(j = 0; j < trans_size; j++) |
251 | 115k | { |
252 | | /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */ |
253 | 1.03M | for(k = 0; k < 8; k++) |
254 | 921k | { |
255 | 921k | o[k] = g_ai2_ihevc_trans_16[1][k] * pi2_tmp[trans_size] |
256 | 921k | + g_ai2_ihevc_trans_16[3][k] |
257 | 921k | * pi2_tmp[3 * trans_size] |
258 | 921k | + g_ai2_ihevc_trans_16[5][k] |
259 | 921k | * pi2_tmp[5 * trans_size] |
260 | 921k | + g_ai2_ihevc_trans_16[7][k] |
261 | 921k | * pi2_tmp[7 * trans_size]; |
262 | 921k | } |
263 | 575k | for(k = 0; k < 4; k++) |
264 | 460k | { |
265 | 460k | eo[k] = g_ai2_ihevc_trans_16[2][k] * pi2_tmp[2 * trans_size] |
266 | 460k | + g_ai2_ihevc_trans_16[6][k] |
267 | 460k | * pi2_tmp[6 * trans_size]; |
268 | 460k | } |
269 | 115k | eeo[0] = g_ai2_ihevc_trans_16[4][0] * pi2_tmp[4 * trans_size]; |
270 | 115k | eee[0] = g_ai2_ihevc_trans_16[0][0] * pi2_tmp[0]; |
271 | 115k | eeo[1] = g_ai2_ihevc_trans_16[4][1] * pi2_tmp[4 * trans_size]; |
272 | 115k | eee[1] = g_ai2_ihevc_trans_16[0][1] * pi2_tmp[0]; |
273 | | |
274 | | /* Combining e and o terms at each hierarchy levels to calculate the final spatial domain vector */ |
275 | 345k | for(k = 0; k < 2; k++) |
276 | 230k | { |
277 | 230k | ee[k] = eee[k] + eeo[k]; |
278 | 230k | ee[k + 2] = eee[1 - k] - eeo[1 - k]; |
279 | 230k | } |
280 | 575k | for(k = 0; k < 4; k++) |
281 | 460k | { |
282 | 460k | e[k] = ee[k] + eo[k]; |
283 | 460k | e[k + 4] = ee[3 - k] - eo[3 - k]; |
284 | 460k | } |
285 | 1.03M | for(k = 0; k < 8; k++) |
286 | 921k | { |
287 | 921k | WORD32 itrans_out; |
288 | 921k | itrans_out = |
289 | 921k | CLIP_S16(((e[k] + o[k] + add) >> shift)); |
290 | 921k | pu1_dst[k * 2] = CLIP_U8((itrans_out + pu1_pred[k * 2])); |
291 | 921k | itrans_out = |
292 | 921k | CLIP_S16(((e[7 - k] - o[7 - k] + add) >> shift)); |
293 | 921k | pu1_dst[(k + 8) * 2] = CLIP_U8((itrans_out + pu1_pred[(k + 8) * 2])); |
294 | 921k | } |
295 | 115k | pi2_tmp++; |
296 | 115k | pu1_pred += pred_strd; |
297 | 115k | pu1_dst += dst_strd; |
298 | 115k | } |
299 | 7.19k | } |
300 | 9.00k | else /* All rows of output of 1st stage are non-zero */ |
301 | 9.00k | { |
302 | 153k | for(j = 0; j < trans_size; j++) |
303 | 144k | { |
304 | | /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */ |
305 | 1.29M | for(k = 0; k < 8; k++) |
306 | 1.15M | { |
307 | 1.15M | o[k] = g_ai2_ihevc_trans_16[1][k] * pi2_tmp[trans_size] |
308 | 1.15M | + g_ai2_ihevc_trans_16[3][k] |
309 | 1.15M | * pi2_tmp[3 * trans_size] |
310 | 1.15M | + g_ai2_ihevc_trans_16[5][k] |
311 | 1.15M | * pi2_tmp[5 * trans_size] |
312 | 1.15M | + g_ai2_ihevc_trans_16[7][k] |
313 | 1.15M | * pi2_tmp[7 * trans_size] |
314 | 1.15M | + g_ai2_ihevc_trans_16[9][k] |
315 | 1.15M | * pi2_tmp[9 * trans_size] |
316 | 1.15M | + g_ai2_ihevc_trans_16[11][k] |
317 | 1.15M | * pi2_tmp[11 * trans_size] |
318 | 1.15M | + g_ai2_ihevc_trans_16[13][k] |
319 | 1.15M | * pi2_tmp[13 * trans_size] |
320 | 1.15M | + g_ai2_ihevc_trans_16[15][k] |
321 | 1.15M | * pi2_tmp[15 * trans_size]; |
322 | 1.15M | } |
323 | 720k | for(k = 0; k < 4; k++) |
324 | 576k | { |
325 | 576k | eo[k] = g_ai2_ihevc_trans_16[2][k] * pi2_tmp[2 * trans_size] |
326 | 576k | + g_ai2_ihevc_trans_16[6][k] |
327 | 576k | * pi2_tmp[6 * trans_size] |
328 | 576k | + g_ai2_ihevc_trans_16[10][k] |
329 | 576k | * pi2_tmp[10 * trans_size] |
330 | 576k | + g_ai2_ihevc_trans_16[14][k] |
331 | 576k | * pi2_tmp[14 * trans_size]; |
332 | 576k | } |
333 | 144k | eeo[0] = |
334 | 144k | g_ai2_ihevc_trans_16[4][0] * pi2_tmp[4 * trans_size] |
335 | 144k | + g_ai2_ihevc_trans_16[12][0] |
336 | 144k | * pi2_tmp[12 |
337 | 144k | * trans_size]; |
338 | 144k | eee[0] = g_ai2_ihevc_trans_16[0][0] * pi2_tmp[0] |
339 | 144k | + g_ai2_ihevc_trans_16[8][0] * pi2_tmp[8 * trans_size]; |
340 | 144k | eeo[1] = |
341 | 144k | g_ai2_ihevc_trans_16[4][1] * pi2_tmp[4 * trans_size] |
342 | 144k | + g_ai2_ihevc_trans_16[12][1] |
343 | 144k | * pi2_tmp[12 |
344 | 144k | * trans_size]; |
345 | 144k | eee[1] = g_ai2_ihevc_trans_16[0][1] * pi2_tmp[0] |
346 | 144k | + g_ai2_ihevc_trans_16[8][1] * pi2_tmp[8 * trans_size]; |
347 | | |
348 | | /* Combining e and o terms at each hierarchy levels to calculate the final spatial domain vector */ |
349 | 432k | for(k = 0; k < 2; k++) |
350 | 288k | { |
351 | 288k | ee[k] = eee[k] + eeo[k]; |
352 | 288k | ee[k + 2] = eee[1 - k] - eeo[1 - k]; |
353 | 288k | } |
354 | 720k | for(k = 0; k < 4; k++) |
355 | 576k | { |
356 | 576k | e[k] = ee[k] + eo[k]; |
357 | 576k | e[k + 4] = ee[3 - k] - eo[3 - k]; |
358 | 576k | } |
359 | 1.29M | for(k = 0; k < 8; k++) |
360 | 1.15M | { |
361 | 1.15M | WORD32 itrans_out; |
362 | 1.15M | itrans_out = |
363 | 1.15M | CLIP_S16(((e[k] + o[k] + add) >> shift)); |
364 | 1.15M | pu1_dst[k * 2] = CLIP_U8((itrans_out + pu1_pred[k * 2])); |
365 | 1.15M | itrans_out = |
366 | 1.15M | CLIP_S16(((e[7 - k] - o[7 - k] + add) >> shift)); |
367 | 1.15M | pu1_dst[(k + 8) * 2] = CLIP_U8((itrans_out + pu1_pred[(k + 8) * 2])); |
368 | 1.15M | } |
369 | 144k | pi2_tmp++; |
370 | 144k | pu1_pred += pred_strd; |
371 | 144k | pu1_dst += dst_strd; |
372 | 144k | } |
373 | 9.00k | } |
374 | | /************************************************************************************************/ |
375 | | /************************************END - IT_RECON_16x16****************************************/ |
376 | | /************************************************************************************************/ |
377 | 41.3k | } |
378 | 44.2k | else if((zero_rows & 0xFF00) == 0xFF00) /* First 8 rows of input are non-zero */ |
379 | 21.5k | { |
380 | | /************************************************************************************************/ |
381 | | /**********************************START - IT_RECON_16x16****************************************/ |
382 | | /************************************************************************************************/ |
383 | | |
384 | | /* Inverse Transform 1st stage */ |
385 | 21.5k | shift = IT_SHIFT_STAGE_1; |
386 | 21.5k | add = 1 << (shift - 1); |
387 | | |
388 | 197k | for(j = 0; j < row_limit_2nd_stage; j++) |
389 | 176k | { |
390 | | /* Checking for Zero Cols */ |
391 | 176k | if((zero_cols & 1) == 1) |
392 | 98.6k | { |
393 | 98.6k | memset(pi2_tmp, 0, trans_size * sizeof(WORD16)); |
394 | 98.6k | } |
395 | 77.4k | else |
396 | 77.4k | { |
397 | | /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */ |
398 | 696k | for(k = 0; k < 8; k++) |
399 | 619k | { |
400 | 619k | o[k] = g_ai2_ihevc_trans_16[1][k] * pi2_src[src_strd] |
401 | 619k | + g_ai2_ihevc_trans_16[3][k] |
402 | 619k | * pi2_src[3 * src_strd] |
403 | 619k | + g_ai2_ihevc_trans_16[5][k] |
404 | 619k | * pi2_src[5 * src_strd] |
405 | 619k | + g_ai2_ihevc_trans_16[7][k] |
406 | 619k | * pi2_src[7 * src_strd]; |
407 | 619k | } |
408 | 387k | for(k = 0; k < 4; k++) |
409 | 309k | { |
410 | 309k | eo[k] = g_ai2_ihevc_trans_16[2][k] * pi2_src[2 * src_strd] |
411 | 309k | + g_ai2_ihevc_trans_16[6][k] |
412 | 309k | * pi2_src[6 * src_strd]; |
413 | 309k | } |
414 | 77.4k | eeo[0] = g_ai2_ihevc_trans_16[4][0] * pi2_src[4 * src_strd]; |
415 | 77.4k | eee[0] = g_ai2_ihevc_trans_16[0][0] * pi2_src[0]; |
416 | 77.4k | eeo[1] = g_ai2_ihevc_trans_16[4][1] * pi2_src[4 * src_strd]; |
417 | 77.4k | eee[1] = g_ai2_ihevc_trans_16[0][1] * pi2_src[0]; |
418 | | |
419 | | /* Combining e and o terms at each hierarchy levels to calculate the final spatial domain vector */ |
420 | 232k | for(k = 0; k < 2; k++) |
421 | 154k | { |
422 | 154k | ee[k] = eee[k] + eeo[k]; |
423 | 154k | ee[k + 2] = eee[1 - k] - eeo[1 - k]; |
424 | 154k | } |
425 | 387k | for(k = 0; k < 4; k++) |
426 | 309k | { |
427 | 309k | e[k] = ee[k] + eo[k]; |
428 | 309k | e[k + 4] = ee[3 - k] - eo[3 - k]; |
429 | 309k | } |
430 | 696k | for(k = 0; k < 8; k++) |
431 | 619k | { |
432 | 619k | pi2_tmp[k] = |
433 | 619k | CLIP_S16(((e[k] + o[k] + add) >> shift)); |
434 | 619k | pi2_tmp[k + 8] = |
435 | 619k | CLIP_S16(((e[7 - k] - o[7 - k] + add) >> shift)); |
436 | 619k | } |
437 | 77.4k | } |
438 | 176k | pi2_src++; |
439 | 176k | pi2_tmp += trans_size; |
440 | 176k | zero_cols = zero_cols >> 1; |
441 | 176k | } |
442 | | |
443 | 21.5k | pi2_tmp = pi2_tmp_orig; |
444 | | |
445 | | /* Inverse Transform 2nd stage */ |
446 | 21.5k | shift = IT_SHIFT_STAGE_2; |
447 | 21.5k | add = 1 << (shift - 1); |
448 | 21.5k | if((zero_rows_2nd_stage & 0xFFF0) == 0xFFF0) /* First 4 rows of output of 1st stage are non-zero */ |
449 | 11.0k | { |
450 | 187k | for(j = 0; j < trans_size; j++) |
451 | 176k | { |
452 | | /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */ |
453 | 1.58M | for(k = 0; k < 8; k++) |
454 | 1.40M | { |
455 | 1.40M | o[k] = g_ai2_ihevc_trans_16[1][k] * pi2_tmp[trans_size] |
456 | 1.40M | + g_ai2_ihevc_trans_16[3][k] |
457 | 1.40M | * pi2_tmp[3 * trans_size]; |
458 | 1.40M | } |
459 | 881k | for(k = 0; k < 4; k++) |
460 | 705k | { |
461 | 705k | eo[k] = g_ai2_ihevc_trans_16[2][k] * pi2_tmp[2 * trans_size]; |
462 | 705k | } |
463 | 176k | eeo[0] = 0; |
464 | 176k | eee[0] = g_ai2_ihevc_trans_16[0][0] * pi2_tmp[0]; |
465 | 176k | eeo[1] = 0; |
466 | 176k | eee[1] = g_ai2_ihevc_trans_16[0][1] * pi2_tmp[0]; |
467 | | |
468 | | /* Combining e and o terms at each hierarchy levels to calculate the final spatial domain vector */ |
469 | 529k | for(k = 0; k < 2; k++) |
470 | 352k | { |
471 | 352k | ee[k] = eee[k] + eeo[k]; |
472 | 352k | ee[k + 2] = eee[1 - k] - eeo[1 - k]; |
473 | 352k | } |
474 | 881k | for(k = 0; k < 4; k++) |
475 | 704k | { |
476 | 704k | e[k] = ee[k] + eo[k]; |
477 | 704k | e[k + 4] = ee[3 - k] - eo[3 - k]; |
478 | 704k | } |
479 | 1.58M | for(k = 0; k < 8; k++) |
480 | 1.40M | { |
481 | 1.40M | WORD32 itrans_out; |
482 | 1.40M | itrans_out = |
483 | 1.40M | CLIP_S16(((e[k] + o[k] + add) >> shift)); |
484 | 1.40M | pu1_dst[k * 2] = CLIP_U8((itrans_out + pu1_pred[k * 2])); |
485 | 1.40M | itrans_out = |
486 | 1.40M | CLIP_S16(((e[7 - k] - o[7 - k] + add) >> shift)); |
487 | 1.40M | pu1_dst[(k + 8) * 2] = CLIP_U8((itrans_out + pu1_pred[(k + 8) * 2])); |
488 | 1.40M | } |
489 | 176k | pi2_tmp++; |
490 | 176k | pu1_pred += pred_strd; |
491 | 176k | pu1_dst += dst_strd; |
492 | 176k | } |
493 | 11.0k | } |
494 | 10.4k | else if((zero_rows_2nd_stage & 0xFF00) == 0xFF00) /* First 8 rows of output of 1st stage are non-zero */ |
495 | 4.49k | { |
496 | 76.3k | for(j = 0; j < trans_size; j++) |
497 | 71.8k | { |
498 | | /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */ |
499 | 646k | for(k = 0; k < 8; k++) |
500 | 574k | { |
501 | 574k | o[k] = g_ai2_ihevc_trans_16[1][k] * pi2_tmp[trans_size] |
502 | 574k | + g_ai2_ihevc_trans_16[3][k] |
503 | 574k | * pi2_tmp[3 * trans_size] |
504 | 574k | + g_ai2_ihevc_trans_16[5][k] |
505 | 574k | * pi2_tmp[5 * trans_size] |
506 | 574k | + g_ai2_ihevc_trans_16[7][k] |
507 | 574k | * pi2_tmp[7 * trans_size]; |
508 | 574k | } |
509 | 359k | for(k = 0; k < 4; k++) |
510 | 287k | { |
511 | 287k | eo[k] = g_ai2_ihevc_trans_16[2][k] * pi2_tmp[2 * trans_size] |
512 | 287k | + g_ai2_ihevc_trans_16[6][k] |
513 | 287k | * pi2_tmp[6 * trans_size]; |
514 | 287k | } |
515 | 71.8k | eeo[0] = g_ai2_ihevc_trans_16[4][0] * pi2_tmp[4 * trans_size]; |
516 | 71.8k | eee[0] = g_ai2_ihevc_trans_16[0][0] * pi2_tmp[0]; |
517 | 71.8k | eeo[1] = g_ai2_ihevc_trans_16[4][1] * pi2_tmp[4 * trans_size]; |
518 | 71.8k | eee[1] = g_ai2_ihevc_trans_16[0][1] * pi2_tmp[0]; |
519 | | |
520 | | /* Combining e and o terms at each hierarchy levels to calculate the final spatial domain vector */ |
521 | 215k | for(k = 0; k < 2; k++) |
522 | 143k | { |
523 | 143k | ee[k] = eee[k] + eeo[k]; |
524 | 143k | ee[k + 2] = eee[1 - k] - eeo[1 - k]; |
525 | 143k | } |
526 | 359k | for(k = 0; k < 4; k++) |
527 | 287k | { |
528 | 287k | e[k] = ee[k] + eo[k]; |
529 | 287k | e[k + 4] = ee[3 - k] - eo[3 - k]; |
530 | 287k | } |
531 | 646k | for(k = 0; k < 8; k++) |
532 | 574k | { |
533 | 574k | WORD32 itrans_out; |
534 | 574k | itrans_out = |
535 | 574k | CLIP_S16(((e[k] + o[k] + add) >> shift)); |
536 | 574k | pu1_dst[k * 2] = CLIP_U8((itrans_out + pu1_pred[k * 2])); |
537 | 574k | itrans_out = |
538 | 574k | CLIP_S16(((e[7 - k] - o[7 - k] + add) >> shift)); |
539 | 574k | pu1_dst[(k + 8) * 2] = CLIP_U8((itrans_out + pu1_pred[(k + 8) * 2])); |
540 | 574k | } |
541 | 71.8k | pi2_tmp++; |
542 | 71.8k | pu1_pred += pred_strd; |
543 | 71.8k | pu1_dst += dst_strd; |
544 | 71.8k | } |
545 | 4.49k | } |
546 | 5.99k | else /* All rows of output of 1st stage are non-zero */ |
547 | 5.99k | { |
548 | 101k | for(j = 0; j < trans_size; j++) |
549 | 95.6k | { |
550 | | /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */ |
551 | 858k | for(k = 0; k < 8; k++) |
552 | 762k | { |
553 | 762k | o[k] = g_ai2_ihevc_trans_16[1][k] * pi2_tmp[trans_size] |
554 | 762k | + g_ai2_ihevc_trans_16[3][k] |
555 | 762k | * pi2_tmp[3 * trans_size] |
556 | 762k | + g_ai2_ihevc_trans_16[5][k] |
557 | 762k | * pi2_tmp[5 * trans_size] |
558 | 762k | + g_ai2_ihevc_trans_16[7][k] |
559 | 762k | * pi2_tmp[7 * trans_size] |
560 | 762k | + g_ai2_ihevc_trans_16[9][k] |
561 | 762k | * pi2_tmp[9 * trans_size] |
562 | 762k | + g_ai2_ihevc_trans_16[11][k] |
563 | 762k | * pi2_tmp[11 * trans_size] |
564 | 762k | + g_ai2_ihevc_trans_16[13][k] |
565 | 762k | * pi2_tmp[13 * trans_size] |
566 | 762k | + g_ai2_ihevc_trans_16[15][k] |
567 | 762k | * pi2_tmp[15 * trans_size]; |
568 | 762k | } |
569 | 478k | for(k = 0; k < 4; k++) |
570 | 382k | { |
571 | 382k | eo[k] = g_ai2_ihevc_trans_16[2][k] * pi2_tmp[2 * trans_size] |
572 | 382k | + g_ai2_ihevc_trans_16[6][k] |
573 | 382k | * pi2_tmp[6 * trans_size] |
574 | 382k | + g_ai2_ihevc_trans_16[10][k] |
575 | 382k | * pi2_tmp[10 * trans_size] |
576 | 382k | + g_ai2_ihevc_trans_16[14][k] |
577 | 382k | * pi2_tmp[14 * trans_size]; |
578 | 382k | } |
579 | 95.6k | eeo[0] = |
580 | 95.6k | g_ai2_ihevc_trans_16[4][0] * pi2_tmp[4 * trans_size] |
581 | 95.6k | + g_ai2_ihevc_trans_16[12][0] |
582 | 95.6k | * pi2_tmp[12 |
583 | 95.6k | * trans_size]; |
584 | 95.6k | eee[0] = g_ai2_ihevc_trans_16[0][0] * pi2_tmp[0] |
585 | 95.6k | + g_ai2_ihevc_trans_16[8][0] * pi2_tmp[8 * trans_size]; |
586 | 95.6k | eeo[1] = |
587 | 95.6k | g_ai2_ihevc_trans_16[4][1] * pi2_tmp[4 * trans_size] |
588 | 95.6k | + g_ai2_ihevc_trans_16[12][1] |
589 | 95.6k | * pi2_tmp[12 |
590 | 95.6k | * trans_size]; |
591 | 95.6k | eee[1] = g_ai2_ihevc_trans_16[0][1] * pi2_tmp[0] |
592 | 95.6k | + g_ai2_ihevc_trans_16[8][1] * pi2_tmp[8 * trans_size]; |
593 | | |
594 | | /* Combining e and o terms at each hierarchy levels to calculate the final spatial domain vector */ |
595 | 286k | for(k = 0; k < 2; k++) |
596 | 191k | { |
597 | 191k | ee[k] = eee[k] + eeo[k]; |
598 | 191k | ee[k + 2] = eee[1 - k] - eeo[1 - k]; |
599 | 191k | } |
600 | 478k | for(k = 0; k < 4; k++) |
601 | 382k | { |
602 | 382k | e[k] = ee[k] + eo[k]; |
603 | 382k | e[k + 4] = ee[3 - k] - eo[3 - k]; |
604 | 382k | } |
605 | 858k | for(k = 0; k < 8; k++) |
606 | 762k | { |
607 | 762k | WORD32 itrans_out; |
608 | 762k | itrans_out = |
609 | 762k | CLIP_S16(((e[k] + o[k] + add) >> shift)); |
610 | 762k | pu1_dst[k * 2] = CLIP_U8((itrans_out + pu1_pred[k * 2])); |
611 | 762k | itrans_out = |
612 | 762k | CLIP_S16(((e[7 - k] - o[7 - k] + add) >> shift)); |
613 | 762k | pu1_dst[(k + 8) * 2] = CLIP_U8((itrans_out + pu1_pred[(k + 8) * 2])); |
614 | 762k | } |
615 | 95.6k | pi2_tmp++; |
616 | 95.6k | pu1_pred += pred_strd; |
617 | 95.6k | pu1_dst += dst_strd; |
618 | 95.6k | } |
619 | 5.99k | } |
620 | | /************************************************************************************************/ |
621 | | /************************************END - IT_RECON_16x16****************************************/ |
622 | | /************************************************************************************************/ |
623 | 21.5k | } |
624 | 22.6k | else /* All rows of input are non-zero */ |
625 | 22.6k | { |
626 | | /************************************************************************************************/ |
627 | | /**********************************START - IT_RECON_16x16****************************************/ |
628 | | /************************************************************************************************/ |
629 | | |
630 | | /* Inverse Transform 1st stage */ |
631 | 22.6k | shift = IT_SHIFT_STAGE_1; |
632 | 22.6k | add = 1 << (shift - 1); |
633 | | |
634 | 269k | for(j = 0; j < row_limit_2nd_stage; j++) |
635 | 246k | { |
636 | | /* Checking for Zero Cols */ |
637 | 246k | if((zero_cols & 1) == 1) |
638 | 74.2k | { |
639 | 74.2k | memset(pi2_tmp, 0, trans_size * sizeof(WORD16)); |
640 | 74.2k | } |
641 | 172k | else |
642 | 172k | { |
643 | | /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */ |
644 | 1.54M | for(k = 0; k < 8; k++) |
645 | 1.37M | { |
646 | 1.37M | o[k] = g_ai2_ihevc_trans_16[1][k] * pi2_src[src_strd] |
647 | 1.37M | + g_ai2_ihevc_trans_16[3][k] |
648 | 1.37M | * pi2_src[3 * src_strd] |
649 | 1.37M | + g_ai2_ihevc_trans_16[5][k] |
650 | 1.37M | * pi2_src[5 * src_strd] |
651 | 1.37M | + g_ai2_ihevc_trans_16[7][k] |
652 | 1.37M | * pi2_src[7 * src_strd] |
653 | 1.37M | + g_ai2_ihevc_trans_16[9][k] |
654 | 1.37M | * pi2_src[9 * src_strd] |
655 | 1.37M | + g_ai2_ihevc_trans_16[11][k] |
656 | 1.37M | * pi2_src[11 * src_strd] |
657 | 1.37M | + g_ai2_ihevc_trans_16[13][k] |
658 | 1.37M | * pi2_src[13 * src_strd] |
659 | 1.37M | + g_ai2_ihevc_trans_16[15][k] |
660 | 1.37M | * pi2_src[15 * src_strd]; |
661 | 1.37M | } |
662 | 860k | for(k = 0; k < 4; k++) |
663 | 688k | { |
664 | 688k | eo[k] = g_ai2_ihevc_trans_16[2][k] * pi2_src[2 * src_strd] |
665 | 688k | + g_ai2_ihevc_trans_16[6][k] |
666 | 688k | * pi2_src[6 * src_strd] |
667 | 688k | + g_ai2_ihevc_trans_16[10][k] |
668 | 688k | * pi2_src[10 * src_strd] |
669 | 688k | + g_ai2_ihevc_trans_16[14][k] |
670 | 688k | * pi2_src[14 * src_strd]; |
671 | 688k | } |
672 | 172k | eeo[0] = g_ai2_ihevc_trans_16[4][0] * pi2_src[4 * src_strd] |
673 | 172k | + g_ai2_ihevc_trans_16[12][0] |
674 | 172k | * pi2_src[12 * src_strd]; |
675 | 172k | eee[0] = |
676 | 172k | g_ai2_ihevc_trans_16[0][0] * pi2_src[0] |
677 | 172k | + g_ai2_ihevc_trans_16[8][0] |
678 | 172k | * pi2_src[8 |
679 | 172k | * src_strd]; |
680 | 172k | eeo[1] = g_ai2_ihevc_trans_16[4][1] * pi2_src[4 * src_strd] |
681 | 172k | + g_ai2_ihevc_trans_16[12][1] |
682 | 172k | * pi2_src[12 * src_strd]; |
683 | 172k | eee[1] = |
684 | 172k | g_ai2_ihevc_trans_16[0][1] * pi2_src[0] |
685 | 172k | + g_ai2_ihevc_trans_16[8][1] |
686 | 172k | * pi2_src[8 |
687 | 172k | * src_strd]; |
688 | | |
689 | | /* Combining e and o terms at each hierarchy levels to calculate the final spatial domain vector */ |
690 | 516k | for(k = 0; k < 2; k++) |
691 | 344k | { |
692 | 344k | ee[k] = eee[k] + eeo[k]; |
693 | 344k | ee[k + 2] = eee[1 - k] - eeo[1 - k]; |
694 | 344k | } |
695 | 860k | for(k = 0; k < 4; k++) |
696 | 688k | { |
697 | 688k | e[k] = ee[k] + eo[k]; |
698 | 688k | e[k + 4] = ee[3 - k] - eo[3 - k]; |
699 | 688k | } |
700 | 1.54M | for(k = 0; k < 8; k++) |
701 | 1.37M | { |
702 | 1.37M | pi2_tmp[k] = |
703 | 1.37M | CLIP_S16(((e[k] + o[k] + add) >> shift)); |
704 | 1.37M | pi2_tmp[k + 8] = |
705 | 1.37M | CLIP_S16(((e[7 - k] - o[7 - k] + add) >> shift)); |
706 | 1.37M | } |
707 | 172k | } |
708 | 246k | pi2_src++; |
709 | 246k | pi2_tmp += trans_size; |
710 | 246k | zero_cols = zero_cols >> 1; |
711 | 246k | } |
712 | | |
713 | 22.6k | pi2_tmp = pi2_tmp_orig; |
714 | | |
715 | | /* Inverse Transform 2nd stage */ |
716 | 22.6k | shift = IT_SHIFT_STAGE_2; |
717 | 22.6k | add = 1 << (shift - 1); |
718 | 22.6k | if((zero_rows_2nd_stage & 0xFFF0) == 0xFFF0) /* First 4 rows of output of 1st stage are non-zero */ |
719 | 5.10k | { |
720 | 86.7k | for(j = 0; j < trans_size; j++) |
721 | 81.6k | { |
722 | | /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */ |
723 | 734k | for(k = 0; k < 8; k++) |
724 | 652k | { |
725 | 652k | o[k] = g_ai2_ihevc_trans_16[1][k] * pi2_tmp[trans_size] |
726 | 652k | + g_ai2_ihevc_trans_16[3][k] |
727 | 652k | * pi2_tmp[3 * trans_size]; |
728 | 652k | } |
729 | 408k | for(k = 0; k < 4; k++) |
730 | 326k | { |
731 | 326k | eo[k] = g_ai2_ihevc_trans_16[2][k] * pi2_tmp[2 * trans_size]; |
732 | 326k | } |
733 | 81.6k | eeo[0] = 0; |
734 | 81.6k | eee[0] = g_ai2_ihevc_trans_16[0][0] * pi2_tmp[0]; |
735 | 81.6k | eeo[1] = 0; |
736 | 81.6k | eee[1] = g_ai2_ihevc_trans_16[0][1] * pi2_tmp[0]; |
737 | | |
738 | | /* Combining e and o terms at each hierarchy levels to calculate the final spatial domain vector */ |
739 | 244k | for(k = 0; k < 2; k++) |
740 | 163k | { |
741 | 163k | ee[k] = eee[k] + eeo[k]; |
742 | 163k | ee[k + 2] = eee[1 - k] - eeo[1 - k]; |
743 | 163k | } |
744 | 407k | for(k = 0; k < 4; k++) |
745 | 326k | { |
746 | 326k | e[k] = ee[k] + eo[k]; |
747 | 326k | e[k + 4] = ee[3 - k] - eo[3 - k]; |
748 | 326k | } |
749 | 732k | for(k = 0; k < 8; k++) |
750 | 650k | { |
751 | 650k | WORD32 itrans_out; |
752 | 650k | itrans_out = |
753 | 650k | CLIP_S16(((e[k] + o[k] + add) >> shift)); |
754 | 650k | pu1_dst[k * 2] = CLIP_U8((itrans_out + pu1_pred[k * 2])); |
755 | 650k | itrans_out = |
756 | 650k | CLIP_S16(((e[7 - k] - o[7 - k] + add) >> shift)); |
757 | 650k | pu1_dst[(k + 8) * 2] = CLIP_U8((itrans_out + pu1_pred[(k + 8) * 2])); |
758 | 650k | } |
759 | 81.6k | pi2_tmp++; |
760 | 81.6k | pu1_pred += pred_strd; |
761 | 81.6k | pu1_dst += dst_strd; |
762 | 81.6k | } |
763 | 5.10k | } |
764 | 17.5k | else if((zero_rows_2nd_stage & 0xFF00) == 0xFF00) /* First 8 rows of output of 1st stage are non-zero */ |
765 | 6.92k | { |
766 | 116k | for(j = 0; j < trans_size; j++) |
767 | 109k | { |
768 | | /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */ |
769 | 977k | for(k = 0; k < 8; k++) |
770 | 867k | { |
771 | 867k | o[k] = g_ai2_ihevc_trans_16[1][k] * pi2_tmp[trans_size] |
772 | 867k | + g_ai2_ihevc_trans_16[3][k] |
773 | 867k | * pi2_tmp[3 * trans_size] |
774 | 867k | + g_ai2_ihevc_trans_16[5][k] |
775 | 867k | * pi2_tmp[5 * trans_size] |
776 | 867k | + g_ai2_ihevc_trans_16[7][k] |
777 | 867k | * pi2_tmp[7 * trans_size]; |
778 | 867k | } |
779 | 546k | for(k = 0; k < 4; k++) |
780 | 436k | { |
781 | 436k | eo[k] = g_ai2_ihevc_trans_16[2][k] * pi2_tmp[2 * trans_size] |
782 | 436k | + g_ai2_ihevc_trans_16[6][k] |
783 | 436k | * pi2_tmp[6 * trans_size]; |
784 | 436k | } |
785 | 109k | eeo[0] = g_ai2_ihevc_trans_16[4][0] * pi2_tmp[4 * trans_size]; |
786 | 109k | eee[0] = g_ai2_ihevc_trans_16[0][0] * pi2_tmp[0]; |
787 | 109k | eeo[1] = g_ai2_ihevc_trans_16[4][1] * pi2_tmp[4 * trans_size]; |
788 | 109k | eee[1] = g_ai2_ihevc_trans_16[0][1] * pi2_tmp[0]; |
789 | | |
790 | | /* Combining e and o terms at each hierarchy levels to calculate the final spatial domain vector */ |
791 | 328k | for(k = 0; k < 2; k++) |
792 | 218k | { |
793 | 218k | ee[k] = eee[k] + eeo[k]; |
794 | 218k | ee[k + 2] = eee[1 - k] - eeo[1 - k]; |
795 | 218k | } |
796 | 547k | for(k = 0; k < 4; k++) |
797 | 437k | { |
798 | 437k | e[k] = ee[k] + eo[k]; |
799 | 437k | e[k + 4] = ee[3 - k] - eo[3 - k]; |
800 | 437k | } |
801 | 970k | for(k = 0; k < 8; k++) |
802 | 860k | { |
803 | 860k | WORD32 itrans_out; |
804 | 860k | itrans_out = |
805 | 860k | CLIP_S16(((e[k] + o[k] + add) >> shift)); |
806 | 860k | pu1_dst[k * 2] = CLIP_U8((itrans_out + pu1_pred[k * 2])); |
807 | 860k | itrans_out = |
808 | 860k | CLIP_S16(((e[7 - k] - o[7 - k] + add) >> shift)); |
809 | 860k | pu1_dst[(k + 8) * 2] = CLIP_U8((itrans_out + pu1_pred[(k + 8) * 2])); |
810 | 860k | } |
811 | 109k | pi2_tmp++; |
812 | 109k | pu1_pred += pred_strd; |
813 | 109k | pu1_dst += dst_strd; |
814 | 109k | } |
815 | 6.92k | } |
816 | 10.6k | else /* All rows of output of 1st stage are non-zero */ |
817 | 10.6k | { |
818 | 181k | for(j = 0; j < trans_size; j++) |
819 | 170k | { |
820 | | /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */ |
821 | 1.53M | for(k = 0; k < 8; k++) |
822 | 1.36M | { |
823 | 1.36M | o[k] = g_ai2_ihevc_trans_16[1][k] * pi2_tmp[trans_size] |
824 | 1.36M | + g_ai2_ihevc_trans_16[3][k] |
825 | 1.36M | * pi2_tmp[3 * trans_size] |
826 | 1.36M | + g_ai2_ihevc_trans_16[5][k] |
827 | 1.36M | * pi2_tmp[5 * trans_size] |
828 | 1.36M | + g_ai2_ihevc_trans_16[7][k] |
829 | 1.36M | * pi2_tmp[7 * trans_size] |
830 | 1.36M | + g_ai2_ihevc_trans_16[9][k] |
831 | 1.36M | * pi2_tmp[9 * trans_size] |
832 | 1.36M | + g_ai2_ihevc_trans_16[11][k] |
833 | 1.36M | * pi2_tmp[11 * trans_size] |
834 | 1.36M | + g_ai2_ihevc_trans_16[13][k] |
835 | 1.36M | * pi2_tmp[13 * trans_size] |
836 | 1.36M | + g_ai2_ihevc_trans_16[15][k] |
837 | 1.36M | * pi2_tmp[15 * trans_size]; |
838 | 1.36M | } |
839 | 853k | for(k = 0; k < 4; k++) |
840 | 682k | { |
841 | 682k | eo[k] = g_ai2_ihevc_trans_16[2][k] * pi2_tmp[2 * trans_size] |
842 | 682k | + g_ai2_ihevc_trans_16[6][k] |
843 | 682k | * pi2_tmp[6 * trans_size] |
844 | 682k | + g_ai2_ihevc_trans_16[10][k] |
845 | 682k | * pi2_tmp[10 * trans_size] |
846 | 682k | + g_ai2_ihevc_trans_16[14][k] |
847 | 682k | * pi2_tmp[14 * trans_size]; |
848 | 682k | } |
849 | 170k | eeo[0] = |
850 | 170k | g_ai2_ihevc_trans_16[4][0] * pi2_tmp[4 * trans_size] |
851 | 170k | + g_ai2_ihevc_trans_16[12][0] |
852 | 170k | * pi2_tmp[12 |
853 | 170k | * trans_size]; |
854 | 170k | eee[0] = g_ai2_ihevc_trans_16[0][0] * pi2_tmp[0] |
855 | 170k | + g_ai2_ihevc_trans_16[8][0] * pi2_tmp[8 * trans_size]; |
856 | 170k | eeo[1] = |
857 | 170k | g_ai2_ihevc_trans_16[4][1] * pi2_tmp[4 * trans_size] |
858 | 170k | + g_ai2_ihevc_trans_16[12][1] |
859 | 170k | * pi2_tmp[12 |
860 | 170k | * trans_size]; |
861 | 170k | eee[1] = g_ai2_ihevc_trans_16[0][1] * pi2_tmp[0] |
862 | 170k | + g_ai2_ihevc_trans_16[8][1] * pi2_tmp[8 * trans_size]; |
863 | | |
864 | | /* Combining e and o terms at each hierarchy levels to calculate the final spatial domain vector */ |
865 | 511k | for(k = 0; k < 2; k++) |
866 | 341k | { |
867 | 341k | ee[k] = eee[k] + eeo[k]; |
868 | 341k | ee[k + 2] = eee[1 - k] - eeo[1 - k]; |
869 | 341k | } |
870 | 853k | for(k = 0; k < 4; k++) |
871 | 682k | { |
872 | 682k | e[k] = ee[k] + eo[k]; |
873 | 682k | e[k + 4] = ee[3 - k] - eo[3 - k]; |
874 | 682k | } |
875 | 1.53M | for(k = 0; k < 8; k++) |
876 | 1.36M | { |
877 | 1.36M | WORD32 itrans_out; |
878 | 1.36M | itrans_out = |
879 | 1.36M | CLIP_S16(((e[k] + o[k] + add) >> shift)); |
880 | 1.36M | pu1_dst[k * 2] = CLIP_U8((itrans_out + pu1_pred[k * 2])); |
881 | 1.36M | itrans_out = |
882 | 1.36M | CLIP_S16(((e[7 - k] - o[7 - k] + add) >> shift)); |
883 | 1.36M | pu1_dst[(k + 8) * 2] = CLIP_U8((itrans_out + pu1_pred[(k + 8) * 2])); |
884 | 1.36M | } |
885 | 170k | pi2_tmp++; |
886 | 170k | pu1_pred += pred_strd; |
887 | 170k | pu1_dst += dst_strd; |
888 | 170k | } |
889 | 10.6k | } |
890 | | /************************************************************************************************/ |
891 | | /************************************END - IT_RECON_16x16****************************************/ |
892 | | /************************************************************************************************/ |
893 | 22.6k | } |
894 | 85.6k | } |
895 | | |