/src/libhevc/common/ihevc_chroma_itrans_recon_16x16.c
Line | Count | Source |
1 | | /****************************************************************************** |
2 | | * |
3 | | * Copyright (C) 2012 Ittiam Systems Pvt Ltd, Bangalore |
4 | | * |
5 | | * Licensed under the Apache License, Version 2.0 (the "License"); |
6 | | * you may not use this file except in compliance with the License. |
7 | | * You may obtain a copy of the License at: |
8 | | * |
9 | | * http://www.apache.org/licenses/LICENSE-2.0 |
10 | | * |
11 | | * Unless required by applicable law or agreed to in writing, software |
12 | | * distributed under the License is distributed on an "AS IS" BASIS, |
13 | | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
14 | | * See the License for the specific language governing permissions and |
15 | | * limitations under the License. |
16 | | * |
17 | | ******************************************************************************/ |
18 | | /** |
19 | | ******************************************************************************* |
20 | | * @file |
21 | | * ihevc_chroma_itrans_recon_16x16.c |
22 | | * |
23 | | * @brief |
24 | | * Contains function definitions for 16x16 inverse transform and reconstruction |
25 | | * of chroma interleaved data. |
26 | | * |
27 | | * @author |
28 | | * 100470 |
29 | | * |
30 | | * @par List of Functions: |
31 | | * - ihevc_chroma_itrans_recon_16x16() |
32 | | * |
33 | | * @remarks |
34 | | * None |
35 | | * |
36 | | ******************************************************************************* |
37 | | */ |
38 | | |
39 | | #include <stdio.h> |
40 | | #include <string.h> |
41 | | #include "ihevc_typedefs.h" |
42 | | #include "ihevc_macros.h" |
43 | | #include "ihevc_platform_macros.h" |
44 | | #include "ihevc_defs.h" |
45 | | #include "ihevc_trans_tables.h" |
46 | | #include "ihevc_chroma_itrans_recon.h" |
47 | | #include "ihevc_func_selector.h" |
48 | | #include "ihevc_trans_macros.h" |
49 | | |
50 | | /* All the functions work one component(U or V) of interleaved data depending upon pointers passed to it */ |
51 | | /* Data visualization */ |
52 | | /* U V U V U V U V */ |
53 | | /* U V U V U V U V */ |
54 | | /* U V U V U V U V */ |
55 | | /* U V U V U V U V */ |
56 | | /* If the pointer points to first byte of above stream (U) , functions will operate on U component */ |
57 | | /* If the pointer points to second byte of above stream (V) , functions will operate on V component */ |
58 | | |
59 | | |
60 | | /** |
61 | | ******************************************************************************* |
62 | | * |
63 | | * @brief |
64 | | * This function performs Inverse transform and reconstruction for 16x16 |
65 | | * input block |
66 | | * |
67 | | * @par Description: |
68 | | * Performs inverse transform and adds the prediction data and clips output |
69 | | * to 8 bit |
70 | | * |
71 | | * @param[in] pi2_src |
72 | | * Input 16x16 coefficients |
73 | | * |
74 | | * @param[in] pi2_tmp |
75 | | * Temporary 16x16 buffer for storing inverse transform |
76 | | * 1st stage output |
77 | | * |
78 | | * @param[in] pu1_pred |
79 | | * Prediction 16x16 block |
80 | | * |
81 | | * @param[out] pu1_dst |
82 | | * Output 16x16 block |
83 | | * |
84 | | * @param[in] src_strd |
85 | | * Input stride |
86 | | * |
87 | | * @param[in] pred_strd |
88 | | * Prediction stride |
89 | | * |
90 | | * @param[in] dst_strd |
91 | | * Output Stride |
92 | | * |
93 | | * @param[in] shift |
94 | | * Output shift |
95 | | * |
96 | | * @param[in] zero_cols |
97 | | * Zero columns in pi2_src |
98 | | * |
99 | | * @returns Void |
100 | | * |
101 | | * @remarks |
102 | | * None |
103 | | * |
104 | | ******************************************************************************* |
105 | | */ |
106 | | |
107 | | |
108 | | void ihevc_chroma_itrans_recon_16x16(WORD16 *pi2_src, |
109 | | WORD16 *pi2_tmp, |
110 | | UWORD8 *pu1_pred, |
111 | | UWORD8 *pu1_dst, |
112 | | WORD32 src_strd, |
113 | | WORD32 pred_strd, |
114 | | WORD32 dst_strd, |
115 | | WORD32 zero_cols, |
116 | | WORD32 zero_rows) |
117 | 378k | { |
118 | 378k | WORD32 j, k; |
119 | 378k | WORD32 e[8], o[8]; |
120 | 378k | WORD32 ee[4], eo[4]; |
121 | 378k | WORD32 eee[2], eeo[2]; |
122 | 378k | WORD32 add; |
123 | 378k | WORD32 shift; |
124 | 378k | WORD16 *pi2_tmp_orig; |
125 | 378k | WORD32 trans_size; |
126 | 378k | WORD32 row_limit_2nd_stage, zero_rows_2nd_stage = zero_cols; |
127 | | |
128 | 378k | trans_size = TRANS_SIZE_16; |
129 | 378k | pi2_tmp_orig = pi2_tmp; |
130 | | |
131 | 378k | if((zero_cols & 0xFFF0) == 0xFFF0) |
132 | 260k | row_limit_2nd_stage = 4; |
133 | 117k | else if((zero_cols & 0xFF00) == 0xFF00) |
134 | 80.3k | row_limit_2nd_stage = 8; |
135 | 37.5k | else |
136 | 37.5k | row_limit_2nd_stage = TRANS_SIZE_16; |
137 | | |
138 | 378k | if((zero_rows & 0xFFF0) == 0xFFF0) /* First 4 rows of input are non-zero */ |
139 | 188k | { |
140 | | /************************************************************************************************/ |
141 | | /**********************************START - IT_RECON_16x16****************************************/ |
142 | | /************************************************************************************************/ |
143 | | |
144 | | /* Inverse Transform 1st stage */ |
145 | 188k | shift = IT_SHIFT_STAGE_1; |
146 | 188k | add = 1 << (shift - 1); |
147 | | |
148 | 1.39M | for(j = 0; j < row_limit_2nd_stage; j++) |
149 | 1.20M | { |
150 | | /* Checking for Zero Cols */ |
151 | 1.20M | if((zero_cols & 1) == 1) |
152 | 644k | { |
153 | 644k | memset(pi2_tmp, 0, trans_size * sizeof(WORD16)); |
154 | 644k | } |
155 | 562k | else |
156 | 562k | { |
157 | | /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */ |
158 | 5.07M | for(k = 0; k < 8; k++) |
159 | 4.50M | { |
160 | 4.50M | o[k] = g_ai2_ihevc_trans_16[1][k] * pi2_src[src_strd] |
161 | 4.50M | + g_ai2_ihevc_trans_16[3][k] |
162 | 4.50M | * pi2_src[3 * src_strd]; |
163 | 4.50M | } |
164 | 2.81M | for(k = 0; k < 4; k++) |
165 | 2.25M | { |
166 | 2.25M | eo[k] = g_ai2_ihevc_trans_16[2][k] * pi2_src[2 * src_strd]; |
167 | 2.25M | } |
168 | 562k | eeo[0] = 0; |
169 | 562k | eee[0] = g_ai2_ihevc_trans_16[0][0] * pi2_src[0]; |
170 | 562k | eeo[1] = 0; |
171 | 562k | eee[1] = g_ai2_ihevc_trans_16[0][1] * pi2_src[0]; |
172 | | |
173 | | /* Combining e and o terms at each hierarchy levels to calculate the final spatial domain vector */ |
174 | 1.68M | for(k = 0; k < 2; k++) |
175 | 1.12M | { |
176 | 1.12M | ee[k] = eee[k] + eeo[k]; |
177 | 1.12M | ee[k + 2] = eee[1 - k] - eeo[1 - k]; |
178 | 1.12M | } |
179 | 2.81M | for(k = 0; k < 4; k++) |
180 | 2.25M | { |
181 | 2.25M | e[k] = ee[k] + eo[k]; |
182 | 2.25M | e[k + 4] = ee[3 - k] - eo[3 - k]; |
183 | 2.25M | } |
184 | 5.06M | for(k = 0; k < 8; k++) |
185 | 4.50M | { |
186 | 4.50M | pi2_tmp[k] = |
187 | 4.50M | CLIP_S16(((e[k] + o[k] + add) >> shift)); |
188 | 4.50M | pi2_tmp[k + 8] = |
189 | 4.50M | CLIP_S16(((e[7 - k] - o[7 - k] + add) >> shift)); |
190 | 4.50M | } |
191 | 562k | } |
192 | 1.20M | pi2_src++; |
193 | 1.20M | pi2_tmp += trans_size; |
194 | 1.20M | zero_cols = zero_cols >> 1; |
195 | 1.20M | } |
196 | | |
197 | 188k | pi2_tmp = pi2_tmp_orig; |
198 | | |
199 | | /* Inverse Transform 2nd stage */ |
200 | 188k | shift = IT_SHIFT_STAGE_2; |
201 | 188k | add = 1 << (shift - 1); |
202 | 188k | if((zero_rows_2nd_stage & 0xFFF0) == 0xFFF0) /* First 4 rows of output of 1st stage are non-zero */ |
203 | 114k | { |
204 | 1.94M | for(j = 0; j < trans_size; j++) |
205 | 1.83M | { |
206 | | /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */ |
207 | 16.4M | for(k = 0; k < 8; k++) |
208 | 14.6M | { |
209 | 14.6M | o[k] = g_ai2_ihevc_trans_16[1][k] * pi2_tmp[trans_size] |
210 | 14.6M | + g_ai2_ihevc_trans_16[3][k] |
211 | 14.6M | * pi2_tmp[3 * trans_size]; |
212 | 14.6M | } |
213 | 9.15M | for(k = 0; k < 4; k++) |
214 | 7.32M | { |
215 | 7.32M | eo[k] = g_ai2_ihevc_trans_16[2][k] * pi2_tmp[2 * trans_size]; |
216 | 7.32M | } |
217 | 1.83M | eeo[0] = 0; |
218 | 1.83M | eee[0] = g_ai2_ihevc_trans_16[0][0] * pi2_tmp[0]; |
219 | 1.83M | eeo[1] = 0; |
220 | 1.83M | eee[1] = g_ai2_ihevc_trans_16[0][1] * pi2_tmp[0]; |
221 | | |
222 | | /* Combining e and o terms at each hierarchy levels to calculate the final spatial domain vector */ |
223 | 5.49M | for(k = 0; k < 2; k++) |
224 | 3.66M | { |
225 | 3.66M | ee[k] = eee[k] + eeo[k]; |
226 | 3.66M | ee[k + 2] = eee[1 - k] - eeo[1 - k]; |
227 | 3.66M | } |
228 | 9.15M | for(k = 0; k < 4; k++) |
229 | 7.32M | { |
230 | 7.32M | e[k] = ee[k] + eo[k]; |
231 | 7.32M | e[k + 4] = ee[3 - k] - eo[3 - k]; |
232 | 7.32M | } |
233 | 16.4M | for(k = 0; k < 8; k++) |
234 | 14.6M | { |
235 | 14.6M | WORD32 itrans_out; |
236 | 14.6M | itrans_out = |
237 | 14.6M | CLIP_S16(((e[k] + o[k] + add) >> shift)); |
238 | 14.6M | pu1_dst[k * 2] = CLIP_U8((itrans_out + pu1_pred[k * 2])); |
239 | 14.6M | itrans_out = |
240 | 14.6M | CLIP_S16(((e[7 - k] - o[7 - k] + add) >> shift)); |
241 | 14.6M | pu1_dst[(k + 8) * 2] = CLIP_U8((itrans_out + pu1_pred[(k + 8) * 2])); |
242 | 14.6M | } |
243 | 1.83M | pi2_tmp++; |
244 | 1.83M | pu1_pred += pred_strd; |
245 | 1.83M | pu1_dst += dst_strd; |
246 | 1.83M | } |
247 | 114k | } |
248 | 73.4k | else if((zero_rows_2nd_stage & 0xFF00) == 0xFF00) /* First 8 rows of output of 1st stage are non-zero */ |
249 | 52.9k | { |
250 | 888k | for(j = 0; j < trans_size; j++) |
251 | 835k | { |
252 | | /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */ |
253 | 7.40M | for(k = 0; k < 8; k++) |
254 | 6.57M | { |
255 | 6.57M | o[k] = g_ai2_ihevc_trans_16[1][k] * pi2_tmp[trans_size] |
256 | 6.57M | + g_ai2_ihevc_trans_16[3][k] |
257 | 6.57M | * pi2_tmp[3 * trans_size] |
258 | 6.57M | + g_ai2_ihevc_trans_16[5][k] |
259 | 6.57M | * pi2_tmp[5 * trans_size] |
260 | 6.57M | + g_ai2_ihevc_trans_16[7][k] |
261 | 6.57M | * pi2_tmp[7 * trans_size]; |
262 | 6.57M | } |
263 | 4.15M | for(k = 0; k < 4; k++) |
264 | 3.32M | { |
265 | 3.32M | eo[k] = g_ai2_ihevc_trans_16[2][k] * pi2_tmp[2 * trans_size] |
266 | 3.32M | + g_ai2_ihevc_trans_16[6][k] |
267 | 3.32M | * pi2_tmp[6 * trans_size]; |
268 | 3.32M | } |
269 | 835k | eeo[0] = g_ai2_ihevc_trans_16[4][0] * pi2_tmp[4 * trans_size]; |
270 | 835k | eee[0] = g_ai2_ihevc_trans_16[0][0] * pi2_tmp[0]; |
271 | 835k | eeo[1] = g_ai2_ihevc_trans_16[4][1] * pi2_tmp[4 * trans_size]; |
272 | 835k | eee[1] = g_ai2_ihevc_trans_16[0][1] * pi2_tmp[0]; |
273 | | |
274 | | /* Combining e and o terms at each hierarchy levels to calculate the final spatial domain vector */ |
275 | 2.49M | for(k = 0; k < 2; k++) |
276 | 1.66M | { |
277 | 1.66M | ee[k] = eee[k] + eeo[k]; |
278 | 1.66M | ee[k + 2] = eee[1 - k] - eeo[1 - k]; |
279 | 1.66M | } |
280 | 4.15M | for(k = 0; k < 4; k++) |
281 | 3.32M | { |
282 | 3.32M | e[k] = ee[k] + eo[k]; |
283 | 3.32M | e[k + 4] = ee[3 - k] - eo[3 - k]; |
284 | 3.32M | } |
285 | 7.34M | for(k = 0; k < 8; k++) |
286 | 6.51M | { |
287 | 6.51M | WORD32 itrans_out; |
288 | 6.51M | itrans_out = |
289 | 6.51M | CLIP_S16(((e[k] + o[k] + add) >> shift)); |
290 | 6.51M | pu1_dst[k * 2] = CLIP_U8((itrans_out + pu1_pred[k * 2])); |
291 | 6.51M | itrans_out = |
292 | 6.51M | CLIP_S16(((e[7 - k] - o[7 - k] + add) >> shift)); |
293 | 6.51M | pu1_dst[(k + 8) * 2] = CLIP_U8((itrans_out + pu1_pred[(k + 8) * 2])); |
294 | 6.51M | } |
295 | 835k | pi2_tmp++; |
296 | 835k | pu1_pred += pred_strd; |
297 | 835k | pu1_dst += dst_strd; |
298 | 835k | } |
299 | 52.9k | } |
300 | 20.4k | else /* All rows of output of 1st stage are non-zero */ |
301 | 20.4k | { |
302 | 347k | for(j = 0; j < trans_size; j++) |
303 | 326k | { |
304 | | /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */ |
305 | 2.91M | for(k = 0; k < 8; k++) |
306 | 2.58M | { |
307 | 2.58M | o[k] = g_ai2_ihevc_trans_16[1][k] * pi2_tmp[trans_size] |
308 | 2.58M | + g_ai2_ihevc_trans_16[3][k] |
309 | 2.58M | * pi2_tmp[3 * trans_size] |
310 | 2.58M | + g_ai2_ihevc_trans_16[5][k] |
311 | 2.58M | * pi2_tmp[5 * trans_size] |
312 | 2.58M | + g_ai2_ihevc_trans_16[7][k] |
313 | 2.58M | * pi2_tmp[7 * trans_size] |
314 | 2.58M | + g_ai2_ihevc_trans_16[9][k] |
315 | 2.58M | * pi2_tmp[9 * trans_size] |
316 | 2.58M | + g_ai2_ihevc_trans_16[11][k] |
317 | 2.58M | * pi2_tmp[11 * trans_size] |
318 | 2.58M | + g_ai2_ihevc_trans_16[13][k] |
319 | 2.58M | * pi2_tmp[13 * trans_size] |
320 | 2.58M | + g_ai2_ihevc_trans_16[15][k] |
321 | 2.58M | * pi2_tmp[15 * trans_size]; |
322 | 2.58M | } |
323 | 1.62M | for(k = 0; k < 4; k++) |
324 | 1.30M | { |
325 | 1.30M | eo[k] = g_ai2_ihevc_trans_16[2][k] * pi2_tmp[2 * trans_size] |
326 | 1.30M | + g_ai2_ihevc_trans_16[6][k] |
327 | 1.30M | * pi2_tmp[6 * trans_size] |
328 | 1.30M | + g_ai2_ihevc_trans_16[10][k] |
329 | 1.30M | * pi2_tmp[10 * trans_size] |
330 | 1.30M | + g_ai2_ihevc_trans_16[14][k] |
331 | 1.30M | * pi2_tmp[14 * trans_size]; |
332 | 1.30M | } |
333 | 326k | eeo[0] = |
334 | 326k | g_ai2_ihevc_trans_16[4][0] * pi2_tmp[4 * trans_size] |
335 | 326k | + g_ai2_ihevc_trans_16[12][0] |
336 | 326k | * pi2_tmp[12 |
337 | 326k | * trans_size]; |
338 | 326k | eee[0] = g_ai2_ihevc_trans_16[0][0] * pi2_tmp[0] |
339 | 326k | + g_ai2_ihevc_trans_16[8][0] * pi2_tmp[8 * trans_size]; |
340 | 326k | eeo[1] = |
341 | 326k | g_ai2_ihevc_trans_16[4][1] * pi2_tmp[4 * trans_size] |
342 | 326k | + g_ai2_ihevc_trans_16[12][1] |
343 | 326k | * pi2_tmp[12 |
344 | 326k | * trans_size]; |
345 | 326k | eee[1] = g_ai2_ihevc_trans_16[0][1] * pi2_tmp[0] |
346 | 326k | + g_ai2_ihevc_trans_16[8][1] * pi2_tmp[8 * trans_size]; |
347 | | |
348 | | /* Combining e and o terms at each hierarchy levels to calculate the final spatial domain vector */ |
349 | 977k | for(k = 0; k < 2; k++) |
350 | 650k | { |
351 | 650k | ee[k] = eee[k] + eeo[k]; |
352 | 650k | ee[k + 2] = eee[1 - k] - eeo[1 - k]; |
353 | 650k | } |
354 | 1.63M | for(k = 0; k < 4; k++) |
355 | 1.30M | { |
356 | 1.30M | e[k] = ee[k] + eo[k]; |
357 | 1.30M | e[k + 4] = ee[3 - k] - eo[3 - k]; |
358 | 1.30M | } |
359 | 2.91M | for(k = 0; k < 8; k++) |
360 | 2.58M | { |
361 | 2.58M | WORD32 itrans_out; |
362 | 2.58M | itrans_out = |
363 | 2.58M | CLIP_S16(((e[k] + o[k] + add) >> shift)); |
364 | 2.58M | pu1_dst[k * 2] = CLIP_U8((itrans_out + pu1_pred[k * 2])); |
365 | 2.58M | itrans_out = |
366 | 2.58M | CLIP_S16(((e[7 - k] - o[7 - k] + add) >> shift)); |
367 | 2.58M | pu1_dst[(k + 8) * 2] = CLIP_U8((itrans_out + pu1_pred[(k + 8) * 2])); |
368 | 2.58M | } |
369 | 326k | pi2_tmp++; |
370 | 326k | pu1_pred += pred_strd; |
371 | 326k | pu1_dst += dst_strd; |
372 | 326k | } |
373 | 20.4k | } |
374 | | /************************************************************************************************/ |
375 | | /************************************END - IT_RECON_16x16****************************************/ |
376 | | /************************************************************************************************/ |
377 | 188k | } |
378 | 190k | else if((zero_rows & 0xFF00) == 0xFF00) /* First 8 rows of input are non-zero */ |
379 | 146k | { |
380 | | /************************************************************************************************/ |
381 | | /**********************************START - IT_RECON_16x16****************************************/ |
382 | | /************************************************************************************************/ |
383 | | |
384 | | /* Inverse Transform 1st stage */ |
385 | 146k | shift = IT_SHIFT_STAGE_1; |
386 | 146k | add = 1 << (shift - 1); |
387 | | |
388 | 847k | for(j = 0; j < row_limit_2nd_stage; j++) |
389 | 701k | { |
390 | | /* Checking for Zero Cols */ |
391 | 701k | if((zero_cols & 1) == 1) |
392 | 401k | { |
393 | 401k | memset(pi2_tmp, 0, trans_size * sizeof(WORD16)); |
394 | 401k | } |
395 | 299k | else |
396 | 299k | { |
397 | | /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */ |
398 | 2.69M | for(k = 0; k < 8; k++) |
399 | 2.39M | { |
400 | 2.39M | o[k] = g_ai2_ihevc_trans_16[1][k] * pi2_src[src_strd] |
401 | 2.39M | + g_ai2_ihevc_trans_16[3][k] |
402 | 2.39M | * pi2_src[3 * src_strd] |
403 | 2.39M | + g_ai2_ihevc_trans_16[5][k] |
404 | 2.39M | * pi2_src[5 * src_strd] |
405 | 2.39M | + g_ai2_ihevc_trans_16[7][k] |
406 | 2.39M | * pi2_src[7 * src_strd]; |
407 | 2.39M | } |
408 | 1.49M | for(k = 0; k < 4; k++) |
409 | 1.19M | { |
410 | 1.19M | eo[k] = g_ai2_ihevc_trans_16[2][k] * pi2_src[2 * src_strd] |
411 | 1.19M | + g_ai2_ihevc_trans_16[6][k] |
412 | 1.19M | * pi2_src[6 * src_strd]; |
413 | 1.19M | } |
414 | 299k | eeo[0] = g_ai2_ihevc_trans_16[4][0] * pi2_src[4 * src_strd]; |
415 | 299k | eee[0] = g_ai2_ihevc_trans_16[0][0] * pi2_src[0]; |
416 | 299k | eeo[1] = g_ai2_ihevc_trans_16[4][1] * pi2_src[4 * src_strd]; |
417 | 299k | eee[1] = g_ai2_ihevc_trans_16[0][1] * pi2_src[0]; |
418 | | |
419 | | /* Combining e and o terms at each hierarchy levels to calculate the final spatial domain vector */ |
420 | 899k | for(k = 0; k < 2; k++) |
421 | 600k | { |
422 | 600k | ee[k] = eee[k] + eeo[k]; |
423 | 600k | ee[k + 2] = eee[1 - k] - eeo[1 - k]; |
424 | 600k | } |
425 | 1.49M | for(k = 0; k < 4; k++) |
426 | 1.19M | { |
427 | 1.19M | e[k] = ee[k] + eo[k]; |
428 | 1.19M | e[k + 4] = ee[3 - k] - eo[3 - k]; |
429 | 1.19M | } |
430 | 2.69M | for(k = 0; k < 8; k++) |
431 | 2.39M | { |
432 | 2.39M | pi2_tmp[k] = |
433 | 2.39M | CLIP_S16(((e[k] + o[k] + add) >> shift)); |
434 | 2.39M | pi2_tmp[k + 8] = |
435 | 2.39M | CLIP_S16(((e[7 - k] - o[7 - k] + add) >> shift)); |
436 | 2.39M | } |
437 | 299k | } |
438 | 701k | pi2_src++; |
439 | 701k | pi2_tmp += trans_size; |
440 | 701k | zero_cols = zero_cols >> 1; |
441 | 701k | } |
442 | | |
443 | 146k | pi2_tmp = pi2_tmp_orig; |
444 | | |
445 | | /* Inverse Transform 2nd stage */ |
446 | 146k | shift = IT_SHIFT_STAGE_2; |
447 | 146k | add = 1 << (shift - 1); |
448 | 146k | if((zero_rows_2nd_stage & 0xFFF0) == 0xFFF0) /* First 4 rows of output of 1st stage are non-zero */ |
449 | 118k | { |
450 | 1.99M | for(j = 0; j < trans_size; j++) |
451 | 1.88M | { |
452 | | /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */ |
453 | 16.8M | for(k = 0; k < 8; k++) |
454 | 14.9M | { |
455 | 14.9M | o[k] = g_ai2_ihevc_trans_16[1][k] * pi2_tmp[trans_size] |
456 | 14.9M | + g_ai2_ihevc_trans_16[3][k] |
457 | 14.9M | * pi2_tmp[3 * trans_size]; |
458 | 14.9M | } |
459 | 9.36M | for(k = 0; k < 4; k++) |
460 | 7.48M | { |
461 | 7.48M | eo[k] = g_ai2_ihevc_trans_16[2][k] * pi2_tmp[2 * trans_size]; |
462 | 7.48M | } |
463 | 1.88M | eeo[0] = 0; |
464 | 1.88M | eee[0] = g_ai2_ihevc_trans_16[0][0] * pi2_tmp[0]; |
465 | 1.88M | eeo[1] = 0; |
466 | 1.88M | eee[1] = g_ai2_ihevc_trans_16[0][1] * pi2_tmp[0]; |
467 | | |
468 | | /* Combining e and o terms at each hierarchy levels to calculate the final spatial domain vector */ |
469 | 5.61M | for(k = 0; k < 2; k++) |
470 | 3.73M | { |
471 | 3.73M | ee[k] = eee[k] + eeo[k]; |
472 | 3.73M | ee[k + 2] = eee[1 - k] - eeo[1 - k]; |
473 | 3.73M | } |
474 | 9.34M | for(k = 0; k < 4; k++) |
475 | 7.46M | { |
476 | 7.46M | e[k] = ee[k] + eo[k]; |
477 | 7.46M | e[k + 4] = ee[3 - k] - eo[3 - k]; |
478 | 7.46M | } |
479 | 16.6M | for(k = 0; k < 8; k++) |
480 | 14.7M | { |
481 | 14.7M | WORD32 itrans_out; |
482 | 14.7M | itrans_out = |
483 | 14.7M | CLIP_S16(((e[k] + o[k] + add) >> shift)); |
484 | 14.7M | pu1_dst[k * 2] = CLIP_U8((itrans_out + pu1_pred[k * 2])); |
485 | 14.7M | itrans_out = |
486 | 14.7M | CLIP_S16(((e[7 - k] - o[7 - k] + add) >> shift)); |
487 | 14.7M | pu1_dst[(k + 8) * 2] = CLIP_U8((itrans_out + pu1_pred[(k + 8) * 2])); |
488 | 14.7M | } |
489 | 1.88M | pi2_tmp++; |
490 | 1.88M | pu1_pred += pred_strd; |
491 | 1.88M | pu1_dst += dst_strd; |
492 | 1.88M | } |
493 | 118k | } |
494 | 27.0k | else if((zero_rows_2nd_stage & 0xFF00) == 0xFF00) /* First 8 rows of output of 1st stage are non-zero */ |
495 | 25.8k | { |
496 | 437k | for(j = 0; j < trans_size; j++) |
497 | 411k | { |
498 | | /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */ |
499 | 3.68M | for(k = 0; k < 8; k++) |
500 | 3.27M | { |
501 | 3.27M | o[k] = g_ai2_ihevc_trans_16[1][k] * pi2_tmp[trans_size] |
502 | 3.27M | + g_ai2_ihevc_trans_16[3][k] |
503 | 3.27M | * pi2_tmp[3 * trans_size] |
504 | 3.27M | + g_ai2_ihevc_trans_16[5][k] |
505 | 3.27M | * pi2_tmp[5 * trans_size] |
506 | 3.27M | + g_ai2_ihevc_trans_16[7][k] |
507 | 3.27M | * pi2_tmp[7 * trans_size]; |
508 | 3.27M | } |
509 | 2.05M | for(k = 0; k < 4; k++) |
510 | 1.64M | { |
511 | 1.64M | eo[k] = g_ai2_ihevc_trans_16[2][k] * pi2_tmp[2 * trans_size] |
512 | 1.64M | + g_ai2_ihevc_trans_16[6][k] |
513 | 1.64M | * pi2_tmp[6 * trans_size]; |
514 | 1.64M | } |
515 | 411k | eeo[0] = g_ai2_ihevc_trans_16[4][0] * pi2_tmp[4 * trans_size]; |
516 | 411k | eee[0] = g_ai2_ihevc_trans_16[0][0] * pi2_tmp[0]; |
517 | 411k | eeo[1] = g_ai2_ihevc_trans_16[4][1] * pi2_tmp[4 * trans_size]; |
518 | 411k | eee[1] = g_ai2_ihevc_trans_16[0][1] * pi2_tmp[0]; |
519 | | |
520 | | /* Combining e and o terms at each hierarchy levels to calculate the final spatial domain vector */ |
521 | 1.23M | for(k = 0; k < 2; k++) |
522 | 820k | { |
523 | 820k | ee[k] = eee[k] + eeo[k]; |
524 | 820k | ee[k + 2] = eee[1 - k] - eeo[1 - k]; |
525 | 820k | } |
526 | 2.05M | for(k = 0; k < 4; k++) |
527 | 1.64M | { |
528 | 1.64M | e[k] = ee[k] + eo[k]; |
529 | 1.64M | e[k + 4] = ee[3 - k] - eo[3 - k]; |
530 | 1.64M | } |
531 | 3.66M | for(k = 0; k < 8; k++) |
532 | 3.25M | { |
533 | 3.25M | WORD32 itrans_out; |
534 | 3.25M | itrans_out = |
535 | 3.25M | CLIP_S16(((e[k] + o[k] + add) >> shift)); |
536 | 3.25M | pu1_dst[k * 2] = CLIP_U8((itrans_out + pu1_pred[k * 2])); |
537 | 3.25M | itrans_out = |
538 | 3.25M | CLIP_S16(((e[7 - k] - o[7 - k] + add) >> shift)); |
539 | 3.25M | pu1_dst[(k + 8) * 2] = CLIP_U8((itrans_out + pu1_pred[(k + 8) * 2])); |
540 | 3.25M | } |
541 | 411k | pi2_tmp++; |
542 | 411k | pu1_pred += pred_strd; |
543 | 411k | pu1_dst += dst_strd; |
544 | 411k | } |
545 | 25.8k | } |
546 | 1.17k | else /* All rows of output of 1st stage are non-zero */ |
547 | 1.17k | { |
548 | 20.7k | for(j = 0; j < trans_size; j++) |
549 | 19.5k | { |
550 | | /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */ |
551 | 175k | for(k = 0; k < 8; k++) |
552 | 155k | { |
553 | 155k | o[k] = g_ai2_ihevc_trans_16[1][k] * pi2_tmp[trans_size] |
554 | 155k | + g_ai2_ihevc_trans_16[3][k] |
555 | 155k | * pi2_tmp[3 * trans_size] |
556 | 155k | + g_ai2_ihevc_trans_16[5][k] |
557 | 155k | * pi2_tmp[5 * trans_size] |
558 | 155k | + g_ai2_ihevc_trans_16[7][k] |
559 | 155k | * pi2_tmp[7 * trans_size] |
560 | 155k | + g_ai2_ihevc_trans_16[9][k] |
561 | 155k | * pi2_tmp[9 * trans_size] |
562 | 155k | + g_ai2_ihevc_trans_16[11][k] |
563 | 155k | * pi2_tmp[11 * trans_size] |
564 | 155k | + g_ai2_ihevc_trans_16[13][k] |
565 | 155k | * pi2_tmp[13 * trans_size] |
566 | 155k | + g_ai2_ihevc_trans_16[15][k] |
567 | 155k | * pi2_tmp[15 * trans_size]; |
568 | 155k | } |
569 | 97.4k | for(k = 0; k < 4; k++) |
570 | 77.9k | { |
571 | 77.9k | eo[k] = g_ai2_ihevc_trans_16[2][k] * pi2_tmp[2 * trans_size] |
572 | 77.9k | + g_ai2_ihevc_trans_16[6][k] |
573 | 77.9k | * pi2_tmp[6 * trans_size] |
574 | 77.9k | + g_ai2_ihevc_trans_16[10][k] |
575 | 77.9k | * pi2_tmp[10 * trans_size] |
576 | 77.9k | + g_ai2_ihevc_trans_16[14][k] |
577 | 77.9k | * pi2_tmp[14 * trans_size]; |
578 | 77.9k | } |
579 | 19.5k | eeo[0] = |
580 | 19.5k | g_ai2_ihevc_trans_16[4][0] * pi2_tmp[4 * trans_size] |
581 | 19.5k | + g_ai2_ihevc_trans_16[12][0] |
582 | 19.5k | * pi2_tmp[12 |
583 | 19.5k | * trans_size]; |
584 | 19.5k | eee[0] = g_ai2_ihevc_trans_16[0][0] * pi2_tmp[0] |
585 | 19.5k | + g_ai2_ihevc_trans_16[8][0] * pi2_tmp[8 * trans_size]; |
586 | 19.5k | eeo[1] = |
587 | 19.5k | g_ai2_ihevc_trans_16[4][1] * pi2_tmp[4 * trans_size] |
588 | 19.5k | + g_ai2_ihevc_trans_16[12][1] |
589 | 19.5k | * pi2_tmp[12 |
590 | 19.5k | * trans_size]; |
591 | 19.5k | eee[1] = g_ai2_ihevc_trans_16[0][1] * pi2_tmp[0] |
592 | 19.5k | + g_ai2_ihevc_trans_16[8][1] * pi2_tmp[8 * trans_size]; |
593 | | |
594 | | /* Combining e and o terms at each hierarchy levels to calculate the final spatial domain vector */ |
595 | 58.4k | for(k = 0; k < 2; k++) |
596 | 38.9k | { |
597 | 38.9k | ee[k] = eee[k] + eeo[k]; |
598 | 38.9k | ee[k + 2] = eee[1 - k] - eeo[1 - k]; |
599 | 38.9k | } |
600 | 97.5k | for(k = 0; k < 4; k++) |
601 | 78.0k | { |
602 | 78.0k | e[k] = ee[k] + eo[k]; |
603 | 78.0k | e[k + 4] = ee[3 - k] - eo[3 - k]; |
604 | 78.0k | } |
605 | 174k | for(k = 0; k < 8; k++) |
606 | 155k | { |
607 | 155k | WORD32 itrans_out; |
608 | 155k | itrans_out = |
609 | 155k | CLIP_S16(((e[k] + o[k] + add) >> shift)); |
610 | 155k | pu1_dst[k * 2] = CLIP_U8((itrans_out + pu1_pred[k * 2])); |
611 | 155k | itrans_out = |
612 | 155k | CLIP_S16(((e[7 - k] - o[7 - k] + add) >> shift)); |
613 | 155k | pu1_dst[(k + 8) * 2] = CLIP_U8((itrans_out + pu1_pred[(k + 8) * 2])); |
614 | 155k | } |
615 | 19.5k | pi2_tmp++; |
616 | 19.5k | pu1_pred += pred_strd; |
617 | 19.5k | pu1_dst += dst_strd; |
618 | 19.5k | } |
619 | 1.17k | } |
620 | | /************************************************************************************************/ |
621 | | /************************************END - IT_RECON_16x16****************************************/ |
622 | | /************************************************************************************************/ |
623 | 146k | } |
624 | 44.2k | else /* All rows of input are non-zero */ |
625 | 44.2k | { |
626 | | /************************************************************************************************/ |
627 | | /**********************************START - IT_RECON_16x16****************************************/ |
628 | | /************************************************************************************************/ |
629 | | |
630 | | /* Inverse Transform 1st stage */ |
631 | 44.2k | shift = IT_SHIFT_STAGE_1; |
632 | 44.2k | add = 1 << (shift - 1); |
633 | | |
634 | 417k | for(j = 0; j < row_limit_2nd_stage; j++) |
635 | 372k | { |
636 | | /* Checking for Zero Cols */ |
637 | 372k | if((zero_cols & 1) == 1) |
638 | 75.3k | { |
639 | 75.3k | memset(pi2_tmp, 0, trans_size * sizeof(WORD16)); |
640 | 75.3k | } |
641 | 297k | else |
642 | 297k | { |
643 | | /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */ |
644 | 2.67M | for(k = 0; k < 8; k++) |
645 | 2.38M | { |
646 | 2.38M | o[k] = g_ai2_ihevc_trans_16[1][k] * pi2_src[src_strd] |
647 | 2.38M | + g_ai2_ihevc_trans_16[3][k] |
648 | 2.38M | * pi2_src[3 * src_strd] |
649 | 2.38M | + g_ai2_ihevc_trans_16[5][k] |
650 | 2.38M | * pi2_src[5 * src_strd] |
651 | 2.38M | + g_ai2_ihevc_trans_16[7][k] |
652 | 2.38M | * pi2_src[7 * src_strd] |
653 | 2.38M | + g_ai2_ihevc_trans_16[9][k] |
654 | 2.38M | * pi2_src[9 * src_strd] |
655 | 2.38M | + g_ai2_ihevc_trans_16[11][k] |
656 | 2.38M | * pi2_src[11 * src_strd] |
657 | 2.38M | + g_ai2_ihevc_trans_16[13][k] |
658 | 2.38M | * pi2_src[13 * src_strd] |
659 | 2.38M | + g_ai2_ihevc_trans_16[15][k] |
660 | 2.38M | * pi2_src[15 * src_strd]; |
661 | 2.38M | } |
662 | 1.48M | for(k = 0; k < 4; k++) |
663 | 1.19M | { |
664 | 1.19M | eo[k] = g_ai2_ihevc_trans_16[2][k] * pi2_src[2 * src_strd] |
665 | 1.19M | + g_ai2_ihevc_trans_16[6][k] |
666 | 1.19M | * pi2_src[6 * src_strd] |
667 | 1.19M | + g_ai2_ihevc_trans_16[10][k] |
668 | 1.19M | * pi2_src[10 * src_strd] |
669 | 1.19M | + g_ai2_ihevc_trans_16[14][k] |
670 | 1.19M | * pi2_src[14 * src_strd]; |
671 | 1.19M | } |
672 | 297k | eeo[0] = g_ai2_ihevc_trans_16[4][0] * pi2_src[4 * src_strd] |
673 | 297k | + g_ai2_ihevc_trans_16[12][0] |
674 | 297k | * pi2_src[12 * src_strd]; |
675 | 297k | eee[0] = |
676 | 297k | g_ai2_ihevc_trans_16[0][0] * pi2_src[0] |
677 | 297k | + g_ai2_ihevc_trans_16[8][0] |
678 | 297k | * pi2_src[8 |
679 | 297k | * src_strd]; |
680 | 297k | eeo[1] = g_ai2_ihevc_trans_16[4][1] * pi2_src[4 * src_strd] |
681 | 297k | + g_ai2_ihevc_trans_16[12][1] |
682 | 297k | * pi2_src[12 * src_strd]; |
683 | 297k | eee[1] = |
684 | 297k | g_ai2_ihevc_trans_16[0][1] * pi2_src[0] |
685 | 297k | + g_ai2_ihevc_trans_16[8][1] |
686 | 297k | * pi2_src[8 |
687 | 297k | * src_strd]; |
688 | | |
689 | | /* Combining e and o terms at each hierarchy levels to calculate the final spatial domain vector */ |
690 | 892k | for(k = 0; k < 2; k++) |
691 | 595k | { |
692 | 595k | ee[k] = eee[k] + eeo[k]; |
693 | 595k | ee[k + 2] = eee[1 - k] - eeo[1 - k]; |
694 | 595k | } |
695 | 1.48M | for(k = 0; k < 4; k++) |
696 | 1.19M | { |
697 | 1.19M | e[k] = ee[k] + eo[k]; |
698 | 1.19M | e[k + 4] = ee[3 - k] - eo[3 - k]; |
699 | 1.19M | } |
700 | 2.67M | for(k = 0; k < 8; k++) |
701 | 2.38M | { |
702 | 2.38M | pi2_tmp[k] = |
703 | 2.38M | CLIP_S16(((e[k] + o[k] + add) >> shift)); |
704 | 2.38M | pi2_tmp[k + 8] = |
705 | 2.38M | CLIP_S16(((e[7 - k] - o[7 - k] + add) >> shift)); |
706 | 2.38M | } |
707 | 297k | } |
708 | 372k | pi2_src++; |
709 | 372k | pi2_tmp += trans_size; |
710 | 372k | zero_cols = zero_cols >> 1; |
711 | 372k | } |
712 | | |
713 | 44.2k | pi2_tmp = pi2_tmp_orig; |
714 | | |
715 | | /* Inverse Transform 2nd stage */ |
716 | 44.2k | shift = IT_SHIFT_STAGE_2; |
717 | 44.2k | add = 1 << (shift - 1); |
718 | 44.2k | if((zero_rows_2nd_stage & 0xFFF0) == 0xFFF0) /* First 4 rows of output of 1st stage are non-zero */ |
719 | 26.9k | { |
720 | 451k | for(j = 0; j < trans_size; j++) |
721 | 424k | { |
722 | | /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */ |
723 | 3.78M | for(k = 0; k < 8; k++) |
724 | 3.35M | { |
725 | 3.35M | o[k] = g_ai2_ihevc_trans_16[1][k] * pi2_tmp[trans_size] |
726 | 3.35M | + g_ai2_ihevc_trans_16[3][k] |
727 | 3.35M | * pi2_tmp[3 * trans_size]; |
728 | 3.35M | } |
729 | 2.11M | for(k = 0; k < 4; k++) |
730 | 1.68M | { |
731 | 1.68M | eo[k] = g_ai2_ihevc_trans_16[2][k] * pi2_tmp[2 * trans_size]; |
732 | 1.68M | } |
733 | 424k | eeo[0] = 0; |
734 | 424k | eee[0] = g_ai2_ihevc_trans_16[0][0] * pi2_tmp[0]; |
735 | 424k | eeo[1] = 0; |
736 | 424k | eee[1] = g_ai2_ihevc_trans_16[0][1] * pi2_tmp[0]; |
737 | | |
738 | | /* Combining e and o terms at each hierarchy levels to calculate the final spatial domain vector */ |
739 | 1.26M | for(k = 0; k < 2; k++) |
740 | 840k | { |
741 | 840k | ee[k] = eee[k] + eeo[k]; |
742 | 840k | ee[k + 2] = eee[1 - k] - eeo[1 - k]; |
743 | 840k | } |
744 | 2.10M | for(k = 0; k < 4; k++) |
745 | 1.67M | { |
746 | 1.67M | e[k] = ee[k] + eo[k]; |
747 | 1.67M | e[k + 4] = ee[3 - k] - eo[3 - k]; |
748 | 1.67M | } |
749 | 3.71M | for(k = 0; k < 8; k++) |
750 | 3.29M | { |
751 | 3.29M | WORD32 itrans_out; |
752 | 3.29M | itrans_out = |
753 | 3.29M | CLIP_S16(((e[k] + o[k] + add) >> shift)); |
754 | 3.29M | pu1_dst[k * 2] = CLIP_U8((itrans_out + pu1_pred[k * 2])); |
755 | 3.29M | itrans_out = |
756 | 3.29M | CLIP_S16(((e[7 - k] - o[7 - k] + add) >> shift)); |
757 | 3.29M | pu1_dst[(k + 8) * 2] = CLIP_U8((itrans_out + pu1_pred[(k + 8) * 2])); |
758 | 3.29M | } |
759 | 424k | pi2_tmp++; |
760 | 424k | pu1_pred += pred_strd; |
761 | 424k | pu1_dst += dst_strd; |
762 | 424k | } |
763 | 26.9k | } |
764 | 17.3k | else if((zero_rows_2nd_stage & 0xFF00) == 0xFF00) /* First 8 rows of output of 1st stage are non-zero */ |
765 | 1.58k | { |
766 | 26.8k | for(j = 0; j < trans_size; j++) |
767 | 25.2k | { |
768 | | /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */ |
769 | 227k | for(k = 0; k < 8; k++) |
770 | 202k | { |
771 | 202k | o[k] = g_ai2_ihevc_trans_16[1][k] * pi2_tmp[trans_size] |
772 | 202k | + g_ai2_ihevc_trans_16[3][k] |
773 | 202k | * pi2_tmp[3 * trans_size] |
774 | 202k | + g_ai2_ihevc_trans_16[5][k] |
775 | 202k | * pi2_tmp[5 * trans_size] |
776 | 202k | + g_ai2_ihevc_trans_16[7][k] |
777 | 202k | * pi2_tmp[7 * trans_size]; |
778 | 202k | } |
779 | 126k | for(k = 0; k < 4; k++) |
780 | 101k | { |
781 | 101k | eo[k] = g_ai2_ihevc_trans_16[2][k] * pi2_tmp[2 * trans_size] |
782 | 101k | + g_ai2_ihevc_trans_16[6][k] |
783 | 101k | * pi2_tmp[6 * trans_size]; |
784 | 101k | } |
785 | 25.2k | eeo[0] = g_ai2_ihevc_trans_16[4][0] * pi2_tmp[4 * trans_size]; |
786 | 25.2k | eee[0] = g_ai2_ihevc_trans_16[0][0] * pi2_tmp[0]; |
787 | 25.2k | eeo[1] = g_ai2_ihevc_trans_16[4][1] * pi2_tmp[4 * trans_size]; |
788 | 25.2k | eee[1] = g_ai2_ihevc_trans_16[0][1] * pi2_tmp[0]; |
789 | | |
790 | | /* Combining e and o terms at each hierarchy levels to calculate the final spatial domain vector */ |
791 | 75.8k | for(k = 0; k < 2; k++) |
792 | 50.5k | { |
793 | 50.5k | ee[k] = eee[k] + eeo[k]; |
794 | 50.5k | ee[k + 2] = eee[1 - k] - eeo[1 - k]; |
795 | 50.5k | } |
796 | 126k | for(k = 0; k < 4; k++) |
797 | 101k | { |
798 | 101k | e[k] = ee[k] + eo[k]; |
799 | 101k | e[k + 4] = ee[3 - k] - eo[3 - k]; |
800 | 101k | } |
801 | 227k | for(k = 0; k < 8; k++) |
802 | 202k | { |
803 | 202k | WORD32 itrans_out; |
804 | 202k | itrans_out = |
805 | 202k | CLIP_S16(((e[k] + o[k] + add) >> shift)); |
806 | 202k | pu1_dst[k * 2] = CLIP_U8((itrans_out + pu1_pred[k * 2])); |
807 | 202k | itrans_out = |
808 | 202k | CLIP_S16(((e[7 - k] - o[7 - k] + add) >> shift)); |
809 | 202k | pu1_dst[(k + 8) * 2] = CLIP_U8((itrans_out + pu1_pred[(k + 8) * 2])); |
810 | 202k | } |
811 | 25.2k | pi2_tmp++; |
812 | 25.2k | pu1_pred += pred_strd; |
813 | 25.2k | pu1_dst += dst_strd; |
814 | 25.2k | } |
815 | 1.58k | } |
816 | 15.7k | else /* All rows of output of 1st stage are non-zero */ |
817 | 15.7k | { |
818 | 268k | for(j = 0; j < trans_size; j++) |
819 | 252k | { |
820 | | /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */ |
821 | 2.27M | for(k = 0; k < 8; k++) |
822 | 2.02M | { |
823 | 2.02M | o[k] = g_ai2_ihevc_trans_16[1][k] * pi2_tmp[trans_size] |
824 | 2.02M | + g_ai2_ihevc_trans_16[3][k] |
825 | 2.02M | * pi2_tmp[3 * trans_size] |
826 | 2.02M | + g_ai2_ihevc_trans_16[5][k] |
827 | 2.02M | * pi2_tmp[5 * trans_size] |
828 | 2.02M | + g_ai2_ihevc_trans_16[7][k] |
829 | 2.02M | * pi2_tmp[7 * trans_size] |
830 | 2.02M | + g_ai2_ihevc_trans_16[9][k] |
831 | 2.02M | * pi2_tmp[9 * trans_size] |
832 | 2.02M | + g_ai2_ihevc_trans_16[11][k] |
833 | 2.02M | * pi2_tmp[11 * trans_size] |
834 | 2.02M | + g_ai2_ihevc_trans_16[13][k] |
835 | 2.02M | * pi2_tmp[13 * trans_size] |
836 | 2.02M | + g_ai2_ihevc_trans_16[15][k] |
837 | 2.02M | * pi2_tmp[15 * trans_size]; |
838 | 2.02M | } |
839 | 1.26M | for(k = 0; k < 4; k++) |
840 | 1.01M | { |
841 | 1.01M | eo[k] = g_ai2_ihevc_trans_16[2][k] * pi2_tmp[2 * trans_size] |
842 | 1.01M | + g_ai2_ihevc_trans_16[6][k] |
843 | 1.01M | * pi2_tmp[6 * trans_size] |
844 | 1.01M | + g_ai2_ihevc_trans_16[10][k] |
845 | 1.01M | * pi2_tmp[10 * trans_size] |
846 | 1.01M | + g_ai2_ihevc_trans_16[14][k] |
847 | 1.01M | * pi2_tmp[14 * trans_size]; |
848 | 1.01M | } |
849 | 252k | eeo[0] = |
850 | 252k | g_ai2_ihevc_trans_16[4][0] * pi2_tmp[4 * trans_size] |
851 | 252k | + g_ai2_ihevc_trans_16[12][0] |
852 | 252k | * pi2_tmp[12 |
853 | 252k | * trans_size]; |
854 | 252k | eee[0] = g_ai2_ihevc_trans_16[0][0] * pi2_tmp[0] |
855 | 252k | + g_ai2_ihevc_trans_16[8][0] * pi2_tmp[8 * trans_size]; |
856 | 252k | eeo[1] = |
857 | 252k | g_ai2_ihevc_trans_16[4][1] * pi2_tmp[4 * trans_size] |
858 | 252k | + g_ai2_ihevc_trans_16[12][1] |
859 | 252k | * pi2_tmp[12 |
860 | 252k | * trans_size]; |
861 | 252k | eee[1] = g_ai2_ihevc_trans_16[0][1] * pi2_tmp[0] |
862 | 252k | + g_ai2_ihevc_trans_16[8][1] * pi2_tmp[8 * trans_size]; |
863 | | |
864 | | /* Combining e and o terms at each hierarchy levels to calculate the final spatial domain vector */ |
865 | 758k | for(k = 0; k < 2; k++) |
866 | 505k | { |
867 | 505k | ee[k] = eee[k] + eeo[k]; |
868 | 505k | ee[k + 2] = eee[1 - k] - eeo[1 - k]; |
869 | 505k | } |
870 | 1.26M | for(k = 0; k < 4; k++) |
871 | 1.01M | { |
872 | 1.01M | e[k] = ee[k] + eo[k]; |
873 | 1.01M | e[k + 4] = ee[3 - k] - eo[3 - k]; |
874 | 1.01M | } |
875 | 2.27M | for(k = 0; k < 8; k++) |
876 | 2.02M | { |
877 | 2.02M | WORD32 itrans_out; |
878 | 2.02M | itrans_out = |
879 | 2.02M | CLIP_S16(((e[k] + o[k] + add) >> shift)); |
880 | 2.02M | pu1_dst[k * 2] = CLIP_U8((itrans_out + pu1_pred[k * 2])); |
881 | 2.02M | itrans_out = |
882 | 2.02M | CLIP_S16(((e[7 - k] - o[7 - k] + add) >> shift)); |
883 | 2.02M | pu1_dst[(k + 8) * 2] = CLIP_U8((itrans_out + pu1_pred[(k + 8) * 2])); |
884 | 2.02M | } |
885 | 252k | pi2_tmp++; |
886 | 252k | pu1_pred += pred_strd; |
887 | 252k | pu1_dst += dst_strd; |
888 | 252k | } |
889 | 15.7k | } |
890 | | /************************************************************************************************/ |
891 | | /************************************END - IT_RECON_16x16****************************************/ |
892 | | /************************************************************************************************/ |
893 | 44.2k | } |
894 | 378k | } |
895 | | |