/src/libhevc/common/ihevc_itrans.c
Line | Count | Source (jump to first uncovered line) |
1 | | /****************************************************************************** |
2 | | * |
3 | | * Copyright (C) 2012 Ittiam Systems Pvt Ltd, Bangalore |
4 | | * |
5 | | * Licensed under the Apache License, Version 2.0 (the "License"); |
6 | | * you may not use this file except in compliance with the License. |
7 | | * You may obtain a copy of the License at: |
8 | | * |
9 | | * http://www.apache.org/licenses/LICENSE-2.0 |
10 | | * |
11 | | * Unless required by applicable law or agreed to in writing, software |
12 | | * distributed under the License is distributed on an "AS IS" BASIS, |
13 | | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
14 | | * See the License for the specific language governing permissions and |
15 | | * limitations under the License. |
16 | | * |
17 | | ******************************************************************************/ |
18 | | /** |
19 | | ******************************************************************************* |
20 | | * @file |
21 | | * ihevc_itrans.c |
22 | | * |
23 | | * @brief |
24 | | * Contains function definitions for single stage inverse transform |
25 | | * |
26 | | * @author |
27 | | * 100470 |
28 | | * |
29 | | * @par List of Functions: |
30 | | * - ihevc_itrans_4x4_ttype1() |
31 | | * - ihevc_itrans_4x4() |
32 | | * - ihevc_itrans_8x8() |
33 | | * - ihevc_itrans_16x16() |
34 | | * - ihevc_itrans_32x32() |
35 | | * |
36 | | * @remarks |
37 | | * None |
38 | | * |
39 | | ******************************************************************************* |
40 | | */ |
41 | | #include <stdio.h> |
42 | | #include <string.h> |
43 | | #include "ihevc_typedefs.h" |
44 | | #include "ihevc_macros.h" |
45 | | #include "ihevc_platform_macros.h" |
46 | | #include "ihevc_defs.h" |
47 | | #include "ihevc_trans_tables.h" |
48 | | #include "ihevc_func_selector.h" |
49 | | #include "ihevc_trans_macros.h" |
50 | | |
51 | | #define NON_OPTIMIZED 1 |
52 | | |
53 | | /** |
54 | | ******************************************************************************* |
55 | | * |
56 | | * @brief |
57 | | * This function performs Single stage Inverse transform type 1 (DST) for |
58 | | * 4x4 input block |
59 | | * |
60 | | * @par Description: |
61 | | * Performs single stage 4x4 inverse transform type 1 by utilizing the |
62 | | * symmetry of transformation matrix and reducing number of multiplications |
63 | | * wherever possible but keeping the number of operations |
64 | | * (addition,multiplication and shift)same |
65 | | * |
66 | | * @param[in] pi2_src |
67 | | * Input 4x4 coefficients |
68 | | * |
69 | | * @param[out] pi2_dst |
70 | | * Output 4x4 block |
71 | | * |
72 | | * @param[in] src_strd |
73 | | * Input stride |
74 | | * |
75 | | * @param[in] dst_strd |
76 | | * Output Stride |
77 | | * |
78 | | * @param[in] i4_shift |
79 | | * Output shift |
80 | | * |
81 | | * @param[in] zero_cols |
82 | | * Zero columns in pi2_src |
83 | | * |
84 | | * @returns Void |
85 | | * |
86 | | * @remarks |
87 | | * None |
88 | | * |
89 | | ******************************************************************************* |
90 | | */ |
91 | | |
92 | | |
93 | | void ihevc_itrans_4x4_ttype1(WORD16 *pi2_src, |
94 | | WORD16 *pi2_dst, |
95 | | WORD32 src_strd, |
96 | | WORD32 dst_strd, |
97 | | WORD32 i4_shift, |
98 | | WORD32 zero_cols) |
99 | 0 | { |
100 | 0 | WORD32 i, c[4]; |
101 | 0 | WORD32 add; |
102 | |
|
103 | 0 | add = 1 << (i4_shift - 1); |
104 | |
|
105 | 0 | for(i = 0; i < TRANS_SIZE_4; i++) |
106 | 0 | { |
107 | | /* Checking for Zero Cols */ |
108 | 0 | if((zero_cols & 1) == 1) |
109 | 0 | { |
110 | 0 | memset(pi2_dst, 0, TRANS_SIZE_4 * sizeof(WORD16)); |
111 | 0 | } |
112 | 0 | else |
113 | 0 | { |
114 | | // Intermediate Variables |
115 | 0 | c[0] = pi2_src[0] + pi2_src[2 * src_strd]; |
116 | 0 | c[1] = pi2_src[2 * src_strd] + pi2_src[3 * src_strd]; |
117 | 0 | c[2] = pi2_src[0] - pi2_src[3 * src_strd]; |
118 | 0 | c[3] = 74 * pi2_src[src_strd]; |
119 | |
|
120 | 0 | pi2_dst[0] = |
121 | 0 | CLIP_S16((29 * c[0] + 55 * c[1] + c[3] + add) >> i4_shift); |
122 | 0 | pi2_dst[1] = |
123 | 0 | CLIP_S16((55 * c[2] - 29 * c[1] + c[3] + add) >> i4_shift); |
124 | 0 | pi2_dst[2] = |
125 | 0 | CLIP_S16((74 * (pi2_src[0] - pi2_src[2 * src_strd] + pi2_src[3 * src_strd]) + add) >> i4_shift); |
126 | 0 | pi2_dst[3] = |
127 | 0 | CLIP_S16((55 * c[0] + 29 * c[2] - c[3] + add) >> i4_shift); |
128 | 0 | } |
129 | 0 | pi2_src++; |
130 | 0 | pi2_dst += dst_strd; |
131 | 0 | zero_cols = zero_cols >> 1; |
132 | 0 | } |
133 | 0 | } |
134 | | |
135 | | |
136 | | /** |
137 | | ******************************************************************************* |
138 | | * |
139 | | * @brief |
140 | | * This function performs Single stage Inverse transform for 4x4 input |
141 | | * block |
142 | | * |
143 | | * @par Description: |
144 | | * Performs single stage 4x4 inverse transform by utilizing the symmetry of |
145 | | * transformation matrix and reducing number of multiplications wherever |
146 | | * possible but keeping the number of operations(addition,multiplication and |
147 | | * shift) same |
148 | | * |
149 | | * @param[in] pi2_src |
150 | | * Input 4x4 coefficients |
151 | | * |
152 | | * @param[out] pi2_dst |
153 | | * Output 4x4 block |
154 | | * |
155 | | * @param[in] src_strd |
156 | | * Input stride |
157 | | * |
158 | | * @param[in] dst_strd |
159 | | * Output Stride |
160 | | * |
161 | | * @param[in] i4_shift |
162 | | * Output shift |
163 | | * |
164 | | * @param[in] zero_cols |
165 | | * Zero columns in pi2_src |
166 | | * |
167 | | * @returns Void |
168 | | * |
169 | | * @remarks |
170 | | * None |
171 | | * |
172 | | ******************************************************************************* |
173 | | */ |
174 | | |
175 | | #if NON_OPTIMIZED |
176 | | void ihevc_itrans_4x4(WORD16 *pi2_src, |
177 | | WORD16 *pi2_dst, |
178 | | WORD32 src_strd, |
179 | | WORD32 dst_strd, |
180 | | WORD32 i4_shift, |
181 | | WORD32 zero_cols) |
182 | 0 | { |
183 | 0 | WORD32 j; |
184 | 0 | WORD32 e[2], o[2]; |
185 | 0 | WORD32 add; |
186 | |
|
187 | 0 | add = 1 << (i4_shift - 1); |
188 | |
|
189 | 0 | for(j = 0; j < TRANS_SIZE_4; j++) |
190 | 0 | { |
191 | | /* Checking for Zero Cols */ |
192 | 0 | if((zero_cols & 1) == 1) |
193 | 0 | { |
194 | 0 | memset(pi2_dst, 0, TRANS_SIZE_4 * sizeof(WORD16)); |
195 | 0 | } |
196 | 0 | else |
197 | 0 | { |
198 | | |
199 | | /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */ |
200 | 0 | o[0] = g_ai2_ihevc_trans_4[1][0] * pi2_src[src_strd] |
201 | 0 | + g_ai2_ihevc_trans_4[3][0] * pi2_src[3 * src_strd]; |
202 | 0 | o[1] = g_ai2_ihevc_trans_4[1][1] * pi2_src[src_strd] |
203 | 0 | + g_ai2_ihevc_trans_4[3][1] * pi2_src[3 * src_strd]; |
204 | 0 | e[0] = g_ai2_ihevc_trans_4[0][0] * pi2_src[0] |
205 | 0 | + g_ai2_ihevc_trans_4[2][0] * pi2_src[2 * src_strd]; |
206 | 0 | e[1] = g_ai2_ihevc_trans_4[0][1] * pi2_src[0] |
207 | 0 | + g_ai2_ihevc_trans_4[2][1] * pi2_src[2 * src_strd]; |
208 | |
|
209 | 0 | pi2_dst[0] = |
210 | 0 | CLIP_S16(((e[0] + o[0] + add) >> i4_shift)); |
211 | 0 | pi2_dst[1] = |
212 | 0 | CLIP_S16(((e[1] + o[1] + add) >> i4_shift)); |
213 | 0 | pi2_dst[2] = |
214 | 0 | CLIP_S16(((e[1] - o[1] + add) >> i4_shift)); |
215 | 0 | pi2_dst[3] = |
216 | 0 | CLIP_S16(((e[0] - o[0] + add) >> i4_shift)); |
217 | |
|
218 | 0 | } |
219 | 0 | pi2_src++; |
220 | 0 | pi2_dst += dst_strd; |
221 | 0 | zero_cols = zero_cols >> 1; |
222 | 0 | } |
223 | 0 | } |
224 | | #else |
225 | | void ihevc_itrans_4x4(WORD16 *pi2_src, |
226 | | WORD16 *pi2_dst, |
227 | | WORD32 src_strd, |
228 | | WORD32 dst_strd, |
229 | | WORD32 i4_shift, |
230 | | WORD32 zero_cols) |
231 | | { |
232 | | WORD32 j; |
233 | | WORD32 e[2], o[2]; |
234 | | WORD32 add; |
235 | | |
236 | | add = 1 << (i4_shift - 1); |
237 | | |
238 | | /***************************************************************************/ |
239 | | /* Transform Matrix 4x4 */ |
240 | | /* 0 1 2 3 */ |
241 | | /* 0 { 64, 64, 64, 64}, */ |
242 | | /* 1 { 83, 36,-36,-83}, */ |
243 | | /* 2 { 64,-64,-64, 64}, */ |
244 | | /* 3 { 36,-83, 83,-36} */ |
245 | | /***************************************************************************/ |
246 | | |
247 | | for(j = 0; j < TRANS_SIZE_4; j++) |
248 | | { |
249 | | WORD32 temp; |
250 | | |
251 | | /* Checking for Zero Cols */ |
252 | | if((zero_cols & 1) == 1) |
253 | | { |
254 | | memset(pi2_dst, 0, TRANS_SIZE_4 * sizeof(WORD16)); |
255 | | } |
256 | | else |
257 | | { |
258 | | /* Common operation in o[0] and o[1] */ |
259 | | temp = (pi2_src[src_strd] + pi2_src[3 * src_strd]) * 36; |
260 | | |
261 | | o[0] = temp + 47 * pi2_src[src_strd]; |
262 | | o[1] = temp - 119 * pi2_src[3 * src_strd]; |
263 | | e[0] = (pi2_src[0] + pi2_src[2 * src_strd]) << 6; |
264 | | e[1] = (pi2_src[0] - pi2_src[2 * src_strd]) << 6; |
265 | | |
266 | | pi2_dst[0] = |
267 | | CLIP_S16(((e[0] + o[0] + add) >> i4_shift)); |
268 | | pi2_dst[1] = |
269 | | CLIP_S16(((e[1] + o[1] + add) >> i4_shift)); |
270 | | pi2_dst[2] = |
271 | | CLIP_S16(((e[1] - o[1] + add) >> i4_shift)); |
272 | | pi2_dst[3] = |
273 | | CLIP_S16(((e[0] - o[0] + add) >> i4_shift)); |
274 | | } |
275 | | pi2_src++; |
276 | | pi2_dst += dst_strd; |
277 | | zero_cols = zero_cols >> 1; |
278 | | } |
279 | | } |
280 | | #endif |
281 | | |
282 | | /** |
283 | | ******************************************************************************* |
284 | | * |
285 | | * @brief |
286 | | * This function performs Single stage Inverse transform for 8x8 input |
287 | | * block |
288 | | * |
289 | | * @par Description: |
290 | | * Performs single stage 8x8 inverse transform by utilizing the symmetry of |
291 | | * transformation matrix and reducing number of multiplications wherever |
292 | | * possible but keeping the number of operations(addition,multiplication and |
293 | | * shift) same |
294 | | * |
295 | | * @param[in] pi2_src |
296 | | * Input 8x8 coefficients |
297 | | * |
298 | | * @param[out] pi2_dst |
299 | | * Output 8x8 block |
300 | | * |
301 | | * @param[in] src_strd |
302 | | * Input stride |
303 | | * |
304 | | * @param[in] dst_strd |
305 | | * Output Stride |
306 | | * |
307 | | * @param[in] i4_shift |
308 | | * Output shift |
309 | | * |
310 | | * @param[in] zero_cols |
311 | | * Zero columns in pi2_src |
312 | | * |
313 | | * @returns Void |
314 | | * |
315 | | * @remarks |
316 | | * None |
317 | | * |
318 | | ******************************************************************************* |
319 | | */ |
320 | | |
321 | | #if NON_OPTIMIZED |
322 | | void ihevc_itrans_8x8(WORD16 *pi2_src, |
323 | | WORD16 *pi2_dst, |
324 | | WORD32 src_strd, |
325 | | WORD32 dst_strd, |
326 | | WORD32 i4_shift, |
327 | | WORD32 zero_cols) |
328 | 0 | { |
329 | 0 | WORD32 j, k; |
330 | 0 | WORD32 e[4], o[4]; |
331 | 0 | WORD32 ee[2], eo[2]; |
332 | 0 | WORD32 add; |
333 | |
|
334 | 0 | add = 1 << (i4_shift - 1); |
335 | |
|
336 | 0 | for(j = 0; j < TRANS_SIZE_8; j++) |
337 | 0 | { |
338 | | /* Checking for Zero Cols */ |
339 | 0 | if((zero_cols & 1) == 1) |
340 | 0 | { |
341 | 0 | memset(pi2_dst, 0, TRANS_SIZE_8 * sizeof(WORD16)); |
342 | 0 | } |
343 | 0 | else |
344 | 0 | { |
345 | | /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */ |
346 | 0 | for(k = 0; k < 4; k++) |
347 | 0 | { |
348 | 0 | o[k] = g_ai2_ihevc_trans_8[1][k] * pi2_src[src_strd] |
349 | 0 | + g_ai2_ihevc_trans_8[3][k] |
350 | 0 | * pi2_src[3 * src_strd] |
351 | 0 | + g_ai2_ihevc_trans_8[5][k] |
352 | 0 | * pi2_src[5 * src_strd] |
353 | 0 | + g_ai2_ihevc_trans_8[7][k] |
354 | 0 | * pi2_src[7 * src_strd]; |
355 | 0 | } |
356 | |
|
357 | 0 | eo[0] = g_ai2_ihevc_trans_8[2][0] * pi2_src[2 * src_strd] |
358 | 0 | + g_ai2_ihevc_trans_8[6][0] * pi2_src[6 * src_strd]; |
359 | 0 | eo[1] = g_ai2_ihevc_trans_8[2][1] * pi2_src[2 * src_strd] |
360 | 0 | + g_ai2_ihevc_trans_8[6][1] * pi2_src[6 * src_strd]; |
361 | 0 | ee[0] = g_ai2_ihevc_trans_8[0][0] * pi2_src[0] |
362 | 0 | + g_ai2_ihevc_trans_8[4][0] * pi2_src[4 * src_strd]; |
363 | 0 | ee[1] = g_ai2_ihevc_trans_8[0][1] * pi2_src[0] |
364 | 0 | + g_ai2_ihevc_trans_8[4][1] * pi2_src[4 * src_strd]; |
365 | | |
366 | | /* Combining e and o terms at each hierarchy levels to calculate the final spatial domain vector */ |
367 | 0 | e[0] = ee[0] + eo[0]; |
368 | 0 | e[3] = ee[0] - eo[0]; |
369 | 0 | e[1] = ee[1] + eo[1]; |
370 | 0 | e[2] = ee[1] - eo[1]; |
371 | 0 | for(k = 0; k < 4; k++) |
372 | 0 | { |
373 | 0 | pi2_dst[k] = |
374 | 0 | CLIP_S16(((e[k] + o[k] + add) >> i4_shift)); |
375 | 0 | pi2_dst[k + 4] = |
376 | 0 | CLIP_S16(((e[3 - k] - o[3 - k] + add) >> i4_shift)); |
377 | 0 | } |
378 | 0 | } |
379 | 0 | pi2_src++; |
380 | 0 | pi2_dst += dst_strd; |
381 | 0 | zero_cols = zero_cols >> 1; |
382 | 0 | } |
383 | 0 | } |
384 | | |
385 | | #else |
386 | | void ihevc_itrans_8x8(WORD16 *pi2_src, |
387 | | WORD16 *pi2_dst, |
388 | | WORD32 src_strd, |
389 | | WORD32 dst_strd, |
390 | | WORD32 i4_shift, |
391 | | WORD32 zero_cols) |
392 | | { |
393 | | /* Transform Matrix 8x8 */ |
394 | | /* 0 1 2 3 4 5 6 7 */ |
395 | | /* 0 - 64 64 64 64 64 64 64 64 */ |
396 | | /* 1 - 89 75 50 18 -18 -50 -75 -89 */ |
397 | | /* 2 - 83 36 -36 -83 -83 -36 36 83 */ |
398 | | /* 3 - 75 -18 -89 -50 50 89 18 -75 */ |
399 | | /* 4 - 64 -64 -64 64 64 -64 -64 64 */ |
400 | | /* 5 - 50 -89 18 75 -75 -18 89 -50 */ |
401 | | /* 6 - 36 -83 83 -36 -36 83 -83 36 */ |
402 | | /* 7 - 18 -50 75 -89 89 -75 50 -18 */ |
403 | | |
404 | | /* 0th and 4th row will have no multiplications */ |
405 | | /* 2nd and 6th row has only two coefff multiplies */ |
406 | | /* 1st, 3rd, 5th and 7th rows have o mirror symmetry */ |
407 | | WORD32 j, k; |
408 | | WORD32 temp1, temp2; |
409 | | WORD32 e[4], o[4]; |
410 | | WORD32 ee[2], eo[2]; |
411 | | WORD32 add; |
412 | | |
413 | | add = 1 << (i4_shift - 1); |
414 | | |
415 | | for(j = 0; j < TRANS_SIZE_8; j++) |
416 | | { |
417 | | /* Checking for Zero Cols */ |
418 | | if((zero_cols & 1) == 1) |
419 | | { |
420 | | memset(pi2_dst, 0, TRANS_SIZE_8 * sizeof(WORD16)); |
421 | | } |
422 | | else |
423 | | { |
424 | | |
425 | | /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */ |
426 | | /* |
427 | | o[0] = 89 *pi2_src[8] + 75 *pi2_src[3*8] + 50 *pi2_src[5*8] + 18 *pi2_src[7*8]; |
428 | | o[1] = 75 *pi2_src[8] + -18 *pi2_src[3*8] + -89 *pi2_src[5*8] + -50 *pi2_src[7*8]; |
429 | | o[2] = 50 *pi2_src[8] + -89 *pi2_src[3*8] + 18 *pi2_src[5*8] + 75 *pi2_src[7*8]; |
430 | | o[3] = 18 *pi2_src[8] + -50 *pi2_src[3*8] + 75 *pi2_src[5*8] + -89 *pi2_src[7*8]; |
431 | | */ |
432 | | |
433 | | /* Optimization: 4 mul + 2 add ---> 3 mul + 3 add */ |
434 | | /* |
435 | | temp1 = (pi2_src[8 ] + pi2_src[3*8]) * 75; |
436 | | temp2 = (pi2_src[5*8] + pi2_src[7*8]) * 50; |
437 | | |
438 | | o[0] = temp1 + 14 * pi2_src[8 ] + temp2 - 32 * pi2_src[7*8]; |
439 | | o[1] = temp1 - 93 * pi2_src[3*8] - temp2 - 39 * pi2_src[5*8]; |
440 | | */ |
441 | | |
442 | | temp1 = (pi2_src[src_strd] + pi2_src[3 * src_strd]) * 75; |
443 | | temp2 = (pi2_src[5 * src_strd] + pi2_src[7 * src_strd]) * 50; |
444 | | |
445 | | o[0] = temp1 + 14 * pi2_src[src_strd] + temp2 |
446 | | - (pi2_src[7 * src_strd] << 5); |
447 | | o[1] = temp1 - 93 * pi2_src[3 * src_strd] - temp2 |
448 | | - 39 * pi2_src[5 * src_strd]; |
449 | | |
450 | | /* Optimization: 4 mul + 2 add ---> 3 mul + 3 add */ |
451 | | /* |
452 | | temp1 = (pi2_src[8 ] - pi2_src[3*8]) * 50; |
453 | | temp2 = (pi2_src[5*8] + pi2_src[7*8]) * 75; |
454 | | |
455 | | o[2] = temp1 - 39 * pi2_src[3*8] + temp2 - 57 * pi2_src[5*8]; |
456 | | o[3] = temp1 - 32 * pi2_src[8 ] + temp2 - 164 * pi2_src[7*8]; |
457 | | */ |
458 | | |
459 | | temp1 = (pi2_src[src_strd] - pi2_src[3 * src_strd]) * 50; |
460 | | temp2 = (pi2_src[5 * src_strd] + pi2_src[7 * src_strd]) * 75; |
461 | | |
462 | | o[2] = temp1 - 39 * pi2_src[3 * src_strd] + temp2 |
463 | | - 57 * pi2_src[5 * src_strd]; |
464 | | o[3] = temp1 - (pi2_src[src_strd] << 5) + temp2 |
465 | | - 164 * pi2_src[7 * src_strd]; |
466 | | |
467 | | /* |
468 | | eo[0] = 83 *pi2_src[ 2*8 ] + 36 *pi2_src[ 6*8 ]; |
469 | | eo[1] = 36 *pi2_src[ 2*8 ] + -83 *pi2_src[ 6*8 ]; |
470 | | ee[0] = 64 *pi2_src[ 0 ] + 64 *pi2_src[ 4*8 ]; |
471 | | ee[1] = 64 *pi2_src[ 0 ] + -64 *pi2_src[ 4*8 ]; |
472 | | */ |
473 | | |
474 | | /* Optimization: 4 mul + 2 add ---> 3 mul + 3 add */ |
475 | | temp1 = (pi2_src[2 * src_strd] + pi2_src[6 * src_strd]) * 36; |
476 | | eo[0] = temp1 + 47 * pi2_src[2 * src_strd]; |
477 | | eo[1] = temp1 - 119 * pi2_src[6 * src_strd]; |
478 | | |
479 | | /* Optimization: 4 mul + 2 add ---> 2 i4_shift + 2 add */ |
480 | | ee[0] = (pi2_src[0] + pi2_src[4 * src_strd]) << 6; |
481 | | ee[1] = (pi2_src[0] - pi2_src[4 * src_strd]) << 6; |
482 | | |
483 | | e[0] = ee[0] + eo[0]; |
484 | | e[3] = ee[0] - eo[0]; |
485 | | e[1] = ee[1] + eo[1]; |
486 | | e[2] = ee[1] - eo[1]; |
487 | | |
488 | | for(k = 0; k < 4; k++) |
489 | | { |
490 | | pi2_dst[k] = |
491 | | CLIP_S16(((e[k] + o[k] + add) >> i4_shift)); |
492 | | pi2_dst[k + 4] = |
493 | | CLIP_S16(((e[3 - k] - o[3 - k] + add) >> i4_shift)); |
494 | | } |
495 | | } |
496 | | pi2_src++; |
497 | | pi2_dst += dst_strd; |
498 | | zero_cols = zero_cols >> 1; |
499 | | } |
500 | | |
501 | | } |
502 | | #endif |
503 | | |
504 | | |
505 | | /** |
506 | | ******************************************************************************* |
507 | | * |
508 | | * @brief |
509 | | * This function performs Single stage Inverse transform for 16x16 input |
510 | | * block |
511 | | * |
512 | | * @par Description: |
513 | | * Performs single stage 16x16 inverse transform by utilizing the symmetry |
514 | | * of transformation matrix and reducing number of multiplications wherever |
515 | | * possible but keeping the number of operations (addition,multiplication |
516 | | * and shift) same |
517 | | * |
518 | | * @param[in] pi2_src |
519 | | * Input 16x16 coefficients |
520 | | * |
521 | | * @param[out] pi2_dst |
522 | | * Output 16x16 block |
523 | | * |
524 | | * @param[in] src_strd |
525 | | * Input stride |
526 | | * |
527 | | * @param[in] dst_strd |
528 | | * Output Stride |
529 | | * |
530 | | * @param[in] i4_shift |
531 | | * Output shift |
532 | | * |
533 | | * @param[in] zero_cols |
534 | | * Zero columns in pi2_src |
535 | | * |
536 | | * @returns Void |
537 | | * |
538 | | * @remarks |
539 | | * None |
540 | | * |
541 | | ******************************************************************************* |
542 | | */ |
543 | | |
544 | | #if NON_OPTIMIZED |
545 | | void ihevc_itrans_16x16(WORD16 *pi2_src, |
546 | | WORD16 *pi2_dst, |
547 | | WORD32 src_strd, |
548 | | WORD32 dst_strd, |
549 | | WORD32 i4_shift, |
550 | | WORD32 zero_cols) |
551 | 0 | { |
552 | 0 | WORD32 j, k; |
553 | 0 | WORD32 e[8], o[8]; |
554 | 0 | WORD32 ee[4], eo[4]; |
555 | 0 | WORD32 eee[2], eeo[2]; |
556 | 0 | WORD32 add; |
557 | |
|
558 | 0 | add = 1 << (i4_shift - 1); |
559 | |
|
560 | 0 | for(j = 0; j < TRANS_SIZE_16; j++) |
561 | 0 | { |
562 | | /* Checking for Zero Cols */ |
563 | 0 | if((zero_cols & 1) == 1) |
564 | 0 | { |
565 | 0 | memset(pi2_dst, 0, TRANS_SIZE_16 * sizeof(WORD16)); |
566 | 0 | } |
567 | 0 | else |
568 | 0 | { |
569 | | /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */ |
570 | 0 | for(k = 0; k < 8; k++) |
571 | 0 | { |
572 | 0 | o[k] = g_ai2_ihevc_trans_16[1][k] * pi2_src[src_strd] |
573 | 0 | + g_ai2_ihevc_trans_16[3][k] |
574 | 0 | * pi2_src[3 * src_strd] |
575 | 0 | + g_ai2_ihevc_trans_16[5][k] |
576 | 0 | * pi2_src[5 * src_strd] |
577 | 0 | + g_ai2_ihevc_trans_16[7][k] |
578 | 0 | * pi2_src[7 * src_strd] |
579 | 0 | + g_ai2_ihevc_trans_16[9][k] |
580 | 0 | * pi2_src[9 * src_strd] |
581 | 0 | + g_ai2_ihevc_trans_16[11][k] |
582 | 0 | * pi2_src[11 * src_strd] |
583 | 0 | + g_ai2_ihevc_trans_16[13][k] |
584 | 0 | * pi2_src[13 * src_strd] |
585 | 0 | + g_ai2_ihevc_trans_16[15][k] |
586 | 0 | * pi2_src[15 * src_strd]; |
587 | 0 | } |
588 | 0 | for(k = 0; k < 4; k++) |
589 | 0 | { |
590 | 0 | eo[k] = g_ai2_ihevc_trans_16[2][k] * pi2_src[2 * src_strd] |
591 | 0 | + g_ai2_ihevc_trans_16[6][k] |
592 | 0 | * pi2_src[6 * src_strd] |
593 | 0 | + g_ai2_ihevc_trans_16[10][k] |
594 | 0 | * pi2_src[10 * src_strd] |
595 | 0 | + g_ai2_ihevc_trans_16[14][k] |
596 | 0 | * pi2_src[14 * src_strd]; |
597 | 0 | } |
598 | 0 | eeo[0] = g_ai2_ihevc_trans_16[4][0] * pi2_src[4 * src_strd] |
599 | 0 | + g_ai2_ihevc_trans_16[12][0] |
600 | 0 | * pi2_src[12 * src_strd]; |
601 | 0 | eee[0] = |
602 | 0 | g_ai2_ihevc_trans_16[0][0] * pi2_src[0] |
603 | 0 | + g_ai2_ihevc_trans_16[8][0] |
604 | 0 | * pi2_src[8 |
605 | 0 | * src_strd]; |
606 | 0 | eeo[1] = g_ai2_ihevc_trans_16[4][1] * pi2_src[4 * src_strd] |
607 | 0 | + g_ai2_ihevc_trans_16[12][1] |
608 | 0 | * pi2_src[12 * src_strd]; |
609 | 0 | eee[1] = |
610 | 0 | g_ai2_ihevc_trans_16[0][1] * pi2_src[0] |
611 | 0 | + g_ai2_ihevc_trans_16[8][1] |
612 | 0 | * pi2_src[8 |
613 | 0 | * src_strd]; |
614 | | |
615 | | /* Combining e and o terms at each hierarchy levels to calculate the final spatial domain vector */ |
616 | 0 | for(k = 0; k < 2; k++) |
617 | 0 | { |
618 | 0 | ee[k] = eee[k] + eeo[k]; |
619 | 0 | ee[k + 2] = eee[1 - k] - eeo[1 - k]; |
620 | 0 | } |
621 | 0 | for(k = 0; k < 4; k++) |
622 | 0 | { |
623 | 0 | e[k] = ee[k] + eo[k]; |
624 | 0 | e[k + 4] = ee[3 - k] - eo[3 - k]; |
625 | 0 | } |
626 | 0 | for(k = 0; k < 8; k++) |
627 | 0 | { |
628 | 0 | pi2_dst[k] = |
629 | 0 | CLIP_S16(((e[k] + o[k] + add) >> i4_shift)); |
630 | 0 | pi2_dst[k + 8] = |
631 | 0 | CLIP_S16(((e[7 - k] - o[7 - k] + add) >> i4_shift)); |
632 | 0 | } |
633 | 0 | } |
634 | 0 | pi2_src++; |
635 | 0 | pi2_dst += dst_strd; |
636 | 0 | zero_cols = zero_cols >> 1; |
637 | 0 | } |
638 | 0 | } |
639 | | #else |
640 | | void ihevc_itrans_16x16(WORD16 *pi2_src, |
641 | | WORD16 *pi2_dst, |
642 | | WORD32 src_strd, |
643 | | WORD32 dst_strd, |
644 | | WORD32 i4_shift, |
645 | | WORD32 zero_cols) |
646 | | { |
647 | | WORD32 j, k; |
648 | | WORD32 e[8], o[8]; |
649 | | WORD32 ee[4], eo[4]; |
650 | | WORD32 eee[2], eeo[2]; |
651 | | WORD32 add; |
652 | | WORD32 temp1, temp2; |
653 | | |
654 | | add = 1 << (i4_shift - 1); |
655 | | /***************************************************************************/ |
656 | | /* Transform Matrix 16x16 */ |
657 | | /* 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 */ |
658 | | /* 0 { 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64}, */ |
659 | | /* 1 { 90, 87, 80, 70, 57, 43, 25, 9, -9,-25,-43,-57,-70,-80,-87,-90}, */ |
660 | | /* 2 { 89, 75, 50, 18,-18,-50,-75,-89,-89,-75,-50,-18, 18, 50, 75, 89}, */ |
661 | | /* 3 { 87, 57, 9,-43,-80,-90,-70,-25, 25, 70, 90, 80, 43, -9,-57,-87}, */ |
662 | | /* 4 { 83, 36,-36,-83,-83,-36, 36, 83, 83, 36,-36,-83,-83,-36, 36, 83}, */ |
663 | | /* 5 { 80, 9,-70,-87,-25, 57, 90, 43,-43,-90,-57, 25, 87, 70, -9,-80}, */ |
664 | | /* 6 { 75,-18,-89,-50, 50, 89, 18,-75,-75, 18, 89, 50,-50,-89,-18, 75}, */ |
665 | | /* 7 { 70,-43,-87, 9, 90, 25,-80,-57, 57, 80,-25,-90, -9, 87, 43,-70}, */ |
666 | | /* 8 { 64,-64,-64, 64, 64,-64,-64, 64, 64,-64,-64, 64, 64,-64,-64, 64}, */ |
667 | | /* 9 { 57,-80,-25, 90, -9,-87, 43, 70,-70,-43, 87, 9,-90, 25, 80,-57}, */ |
668 | | /* 10 { 50,-89, 18, 75,-75,-18, 89,-50,-50, 89,-18,-75, 75, 18,-89, 50}, */ |
669 | | /* 11 { 43,-90, 57, 25,-87, 70, 9,-80, 80, -9,-70, 87,-25,-57, 90,-43}, */ |
670 | | /* 12 { 36,-83, 83,-36,-36, 83,-83, 36, 36,-83, 83,-36,-36, 83,-83, 36}, */ |
671 | | /* 13 { 25,-70, 90,-80, 43, 9,-57, 87,-87, 57, -9,-43, 80,-90, 70,-25}, */ |
672 | | /* 14 { 18,-50, 75,-89, 89,-75, 50,-18,-18, 50,-75, 89,-89, 75,-50, 18}, */ |
673 | | /* 15 { 9,-25, 43,-57, 70,-80, 87,-90, 90,-87, 80,-70, 57,-43, 25, -9} */ |
674 | | /***************************************************************************/ |
675 | | |
676 | | for(j = 0; j < TRANS_SIZE_16; j++) |
677 | | { |
678 | | /* Checking for Zero Cols */ |
679 | | if((zero_cols & 1) == 1) |
680 | | { |
681 | | memset(pi2_dst, 0, TRANS_SIZE_16 * sizeof(WORD16)); |
682 | | } |
683 | | else |
684 | | { |
685 | | /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */ |
686 | | { |
687 | | /* |
688 | | o[k] = g_ai2_ihevc_trans_16[ 1][k]*pi2_src[ src_strd ] + g_ai2_ihevc_trans_16[ 3][k]*pi2_src[ 3*src_strd ] + g_ai2_ihevc_trans_16[ 5][k]*pi2_src[ 5*src_strd ] + g_ai2_ihevc_trans_16[ 7][k]*pi2_src[ 7*src_strd ] + |
689 | | g_ai2_ihevc_trans_16[ 9][k]*pi2_src[ 9*src_strd ] + g_ai2_ihevc_trans_16[11][k]*pi2_src[11*src_strd ] + g_ai2_ihevc_trans_16[13][k]*pi2_src[13*src_strd ] + g_ai2_ihevc_trans_16[15][k]*pi2_src[15*src_strd ]; |
690 | | */ |
691 | | |
692 | | o[0] = 90 * pi2_src[src_strd] + 87 * pi2_src[3 * src_strd] |
693 | | + 80 * pi2_src[5 * src_strd] |
694 | | + 70 * pi2_src[7 * src_strd] |
695 | | + 57 * pi2_src[9 * src_strd] |
696 | | + 43 * pi2_src[11 * src_strd] |
697 | | + 25 * pi2_src[13 * src_strd] |
698 | | + 9 * pi2_src[15 * src_strd]; |
699 | | |
700 | | o[1] = 87 * pi2_src[src_strd] + 57 * pi2_src[3 * src_strd] |
701 | | + 9 * pi2_src[5 * src_strd] |
702 | | + -43 * pi2_src[7 * src_strd] |
703 | | + -80 * pi2_src[9 * src_strd] |
704 | | + -90 * pi2_src[11 * src_strd] |
705 | | + -70 * pi2_src[13 * src_strd] |
706 | | + -25 * pi2_src[15 * src_strd]; |
707 | | |
708 | | o[2] = 80 * pi2_src[src_strd] + 9 * pi2_src[3 * src_strd] |
709 | | + -70 * pi2_src[5 * src_strd] |
710 | | + -87 * pi2_src[7 * src_strd] |
711 | | + -25 * pi2_src[9 * src_strd] |
712 | | + 57 * pi2_src[11 * src_strd] |
713 | | + 90 * pi2_src[13 * src_strd] |
714 | | + 43 * pi2_src[15 * src_strd]; |
715 | | |
716 | | o[3] = 70 * pi2_src[src_strd] + -43 * pi2_src[3 * src_strd] |
717 | | + -87 * pi2_src[5 * src_strd] |
718 | | + 9 * pi2_src[7 * src_strd] |
719 | | + 90 * pi2_src[9 * src_strd] |
720 | | + 25 * pi2_src[11 * src_strd] |
721 | | + -80 * pi2_src[13 * src_strd] |
722 | | + -57 * pi2_src[15 * src_strd]; |
723 | | |
724 | | o[4] = 57 * pi2_src[src_strd] + -80 * pi2_src[3 * src_strd] |
725 | | + -25 * pi2_src[5 * src_strd] |
726 | | + 90 * pi2_src[7 * src_strd] |
727 | | + -9 * pi2_src[9 * src_strd] |
728 | | + -87 * pi2_src[11 * src_strd] |
729 | | + 43 * pi2_src[13 * src_strd] |
730 | | + 70 * pi2_src[15 * src_strd]; |
731 | | |
732 | | o[5] = 43 * pi2_src[src_strd] + -90 * pi2_src[3 * src_strd] |
733 | | + 57 * pi2_src[5 * src_strd] |
734 | | + 25 * pi2_src[7 * src_strd] |
735 | | + -87 * pi2_src[9 * src_strd] |
736 | | + 70 * pi2_src[11 * src_strd] |
737 | | + 9 * pi2_src[13 * src_strd] |
738 | | + -80 * pi2_src[15 * src_strd]; |
739 | | |
740 | | o[6] = 25 * pi2_src[src_strd] + -70 * pi2_src[3 * src_strd] |
741 | | + 90 * pi2_src[5 * src_strd] |
742 | | + -80 * pi2_src[7 * src_strd] |
743 | | + 43 * pi2_src[9 * src_strd] |
744 | | + 9 * pi2_src[11 * src_strd] |
745 | | + -57 * pi2_src[13 * src_strd] |
746 | | + 87 * pi2_src[15 * src_strd]; |
747 | | |
748 | | o[7] = 9 * pi2_src[src_strd] + -25 * pi2_src[3 * src_strd] |
749 | | + 43 * pi2_src[5 * src_strd] |
750 | | + -57 * pi2_src[7 * src_strd] |
751 | | + 70 * pi2_src[9 * src_strd] |
752 | | + -80 * pi2_src[11 * src_strd] |
753 | | + 87 * pi2_src[13 * src_strd] |
754 | | + -90 * pi2_src[15 * src_strd]; |
755 | | } |
756 | | { |
757 | | temp1 = (pi2_src[2 * src_strd] + pi2_src[6 * src_strd]) * 75; |
758 | | temp2 = (pi2_src[10 * src_strd] + pi2_src[14 * src_strd]) * 50; |
759 | | eo[0] = temp1 + 14 * pi2_src[2 * src_strd] + temp2 |
760 | | - (pi2_src[14 * src_strd] << 5); |
761 | | eo[1] = temp1 - 93 * pi2_src[6 * src_strd] - temp2 |
762 | | - 39 * pi2_src[10 * src_strd]; |
763 | | |
764 | | temp1 = (pi2_src[2 * src_strd] - pi2_src[6 * src_strd]) * 50; |
765 | | temp2 = (pi2_src[10 * src_strd] + pi2_src[14 * src_strd]) * 75; |
766 | | eo[2] = temp1 - 39 * pi2_src[6 * src_strd] + temp2 |
767 | | - 57 * pi2_src[10 * src_strd]; |
768 | | eo[3] = temp1 - (pi2_src[2 * src_strd] << 5) + temp2 |
769 | | - 164 * pi2_src[14 * src_strd]; |
770 | | } |
771 | | |
772 | | temp1 = (pi2_src[4 * src_strd] + pi2_src[12 * src_strd]) * 36; |
773 | | eeo[0] = temp1 + 47 * pi2_src[4 * src_strd]; |
774 | | eeo[1] = temp1 - 119 * pi2_src[12 * src_strd]; |
775 | | |
776 | | eee[0] = (pi2_src[0] + pi2_src[8 * src_strd]) << 6; |
777 | | eee[1] = (pi2_src[0] - pi2_src[8 * src_strd]) << 6; |
778 | | |
779 | | /* Combining e and o terms at each hierarchy levels to calculate the final spatial domain vector */ |
780 | | for(k = 0; k < 2; k++) |
781 | | { |
782 | | ee[k] = eee[k] + eeo[k]; |
783 | | ee[k + 2] = eee[1 - k] - eeo[1 - k]; |
784 | | } |
785 | | for(k = 0; k < 4; k++) |
786 | | { |
787 | | e[k] = ee[k] + eo[k]; |
788 | | e[k + 4] = ee[3 - k] - eo[3 - k]; |
789 | | } |
790 | | for(k = 0; k < 8; k++) |
791 | | { |
792 | | pi2_dst[k] = |
793 | | CLIP_S16(((e[k] + o[k] + add) >> i4_shift)); |
794 | | pi2_dst[k + 8] = |
795 | | CLIP_S16(((e[7 - k] - o[7 - k] + add) >> i4_shift)); |
796 | | } |
797 | | } |
798 | | pi2_src++; |
799 | | pi2_dst += dst_strd; |
800 | | zero_cols = zero_cols >> 1; |
801 | | } |
802 | | } |
803 | | #endif |
804 | | |
805 | | /** |
806 | | ******************************************************************************* |
807 | | * |
808 | | * @brief |
809 | | * This function performs Single stage Inverse transform for 32x32 input |
810 | | * block |
811 | | * |
812 | | * @par Description: |
813 | | * Performs single stage 32x32 inverse transform by utilizing the symmetry |
814 | | * of transformation matrix and reducing number of multiplications wherever |
815 | | * possible but keeping the number of operations (addition,multiplication |
816 | | * and shift) same |
817 | | * |
818 | | * @param[in] pi2_src |
819 | | * Input 32x32 coefficients |
820 | | * |
821 | | * @param[out] pi2_dst |
822 | | * Output 32x32 block |
823 | | * |
824 | | * @param[in] src_strd |
825 | | * Input stride |
826 | | * |
827 | | * @param[in] dst_strd |
828 | | * Output Stride |
829 | | * |
830 | | * @param[in] i4_shift |
831 | | * Output shift |
832 | | * |
833 | | * @param[in] zero_cols |
834 | | * Zero columns in pi2_src |
835 | | * |
836 | | * @returns Void |
837 | | * |
838 | | * @remarks |
839 | | * None |
840 | | * |
841 | | ******************************************************************************* |
842 | | */ |
843 | | |
844 | | |
845 | | void ihevc_itrans_32x32(WORD16 *pi2_src, |
846 | | WORD16 *pi2_dst, |
847 | | WORD32 src_strd, |
848 | | WORD32 dst_strd, |
849 | | WORD32 i4_shift, |
850 | | WORD32 zero_cols) |
851 | 0 | { |
852 | 0 | WORD32 j, k; |
853 | 0 | WORD32 e[16], o[16]; |
854 | 0 | WORD32 ee[8], eo[8]; |
855 | 0 | WORD32 eee[4], eeo[4]; |
856 | 0 | WORD32 eeee[2], eeeo[2]; |
857 | 0 | WORD32 add; |
858 | |
|
859 | 0 | add = 1 << (i4_shift - 1); |
860 | |
|
861 | 0 | for(j = 0; j < TRANS_SIZE_32; j++) |
862 | 0 | { |
863 | | /* Checking for Zero Cols */ |
864 | 0 | if((zero_cols & 1) == 1) |
865 | 0 | { |
866 | 0 | memset(pi2_dst, 0, TRANS_SIZE_32 * sizeof(WORD16)); |
867 | 0 | } |
868 | 0 | else |
869 | 0 | { |
870 | | /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */ |
871 | 0 | for(k = 0; k < 16; k++) |
872 | 0 | { |
873 | 0 | o[k] = g_ai2_ihevc_trans_32[1][k] * pi2_src[src_strd] |
874 | 0 | + g_ai2_ihevc_trans_32[3][k] |
875 | 0 | * pi2_src[3 * src_strd] |
876 | 0 | + g_ai2_ihevc_trans_32[5][k] |
877 | 0 | * pi2_src[5 * src_strd] |
878 | 0 | + g_ai2_ihevc_trans_32[7][k] |
879 | 0 | * pi2_src[7 * src_strd] |
880 | 0 | + g_ai2_ihevc_trans_32[9][k] |
881 | 0 | * pi2_src[9 * src_strd] |
882 | 0 | + g_ai2_ihevc_trans_32[11][k] |
883 | 0 | * pi2_src[11 * src_strd] |
884 | 0 | + g_ai2_ihevc_trans_32[13][k] |
885 | 0 | * pi2_src[13 * src_strd] |
886 | 0 | + g_ai2_ihevc_trans_32[15][k] |
887 | 0 | * pi2_src[15 * src_strd] |
888 | 0 | + g_ai2_ihevc_trans_32[17][k] |
889 | 0 | * pi2_src[17 * src_strd] |
890 | 0 | + g_ai2_ihevc_trans_32[19][k] |
891 | 0 | * pi2_src[19 * src_strd] |
892 | 0 | + g_ai2_ihevc_trans_32[21][k] |
893 | 0 | * pi2_src[21 * src_strd] |
894 | 0 | + g_ai2_ihevc_trans_32[23][k] |
895 | 0 | * pi2_src[23 * src_strd] |
896 | 0 | + g_ai2_ihevc_trans_32[25][k] |
897 | 0 | * pi2_src[25 * src_strd] |
898 | 0 | + g_ai2_ihevc_trans_32[27][k] |
899 | 0 | * pi2_src[27 * src_strd] |
900 | 0 | + g_ai2_ihevc_trans_32[29][k] |
901 | 0 | * pi2_src[29 * src_strd] |
902 | 0 | + g_ai2_ihevc_trans_32[31][k] |
903 | 0 | * pi2_src[31 * src_strd]; |
904 | 0 | } |
905 | 0 | for(k = 0; k < 8; k++) |
906 | 0 | { |
907 | 0 | eo[k] = g_ai2_ihevc_trans_32[2][k] * pi2_src[2 * src_strd] |
908 | 0 | + g_ai2_ihevc_trans_32[6][k] |
909 | 0 | * pi2_src[6 * src_strd] |
910 | 0 | + g_ai2_ihevc_trans_32[10][k] |
911 | 0 | * pi2_src[10 * src_strd] |
912 | 0 | + g_ai2_ihevc_trans_32[14][k] |
913 | 0 | * pi2_src[14 * src_strd] |
914 | 0 | + g_ai2_ihevc_trans_32[18][k] |
915 | 0 | * pi2_src[18 * src_strd] |
916 | 0 | + g_ai2_ihevc_trans_32[22][k] |
917 | 0 | * pi2_src[22 * src_strd] |
918 | 0 | + g_ai2_ihevc_trans_32[26][k] |
919 | 0 | * pi2_src[26 * src_strd] |
920 | 0 | + g_ai2_ihevc_trans_32[30][k] |
921 | 0 | * pi2_src[30 * src_strd]; |
922 | 0 | } |
923 | 0 | for(k = 0; k < 4; k++) |
924 | 0 | { |
925 | 0 | eeo[k] = g_ai2_ihevc_trans_32[4][k] * pi2_src[4 * src_strd] |
926 | 0 | + g_ai2_ihevc_trans_32[12][k] |
927 | 0 | * pi2_src[12 * src_strd] |
928 | 0 | + g_ai2_ihevc_trans_32[20][k] |
929 | 0 | * pi2_src[20 * src_strd] |
930 | 0 | + g_ai2_ihevc_trans_32[28][k] |
931 | 0 | * pi2_src[28 * src_strd]; |
932 | 0 | } |
933 | 0 | eeeo[0] = g_ai2_ihevc_trans_32[8][0] * pi2_src[8 * src_strd] |
934 | 0 | + g_ai2_ihevc_trans_32[24][0] |
935 | 0 | * pi2_src[24 * src_strd]; |
936 | 0 | eeeo[1] = g_ai2_ihevc_trans_32[8][1] * pi2_src[8 * src_strd] |
937 | 0 | + g_ai2_ihevc_trans_32[24][1] |
938 | 0 | * pi2_src[24 * src_strd]; |
939 | 0 | eeee[0] = g_ai2_ihevc_trans_32[0][0] * pi2_src[0] |
940 | 0 | + g_ai2_ihevc_trans_32[16][0] |
941 | 0 | * pi2_src[16 * src_strd]; |
942 | 0 | eeee[1] = g_ai2_ihevc_trans_32[0][1] * pi2_src[0] |
943 | 0 | + g_ai2_ihevc_trans_32[16][1] |
944 | 0 | * pi2_src[16 * src_strd]; |
945 | | |
946 | | /* Combining e and o terms at each hierarchy levels to calculate the final spatial domain vector */ |
947 | 0 | eee[0] = eeee[0] + eeeo[0]; |
948 | 0 | eee[3] = eeee[0] - eeeo[0]; |
949 | 0 | eee[1] = eeee[1] + eeeo[1]; |
950 | 0 | eee[2] = eeee[1] - eeeo[1]; |
951 | 0 | for(k = 0; k < 4; k++) |
952 | 0 | { |
953 | 0 | ee[k] = eee[k] + eeo[k]; |
954 | 0 | ee[k + 4] = eee[3 - k] - eeo[3 - k]; |
955 | 0 | } |
956 | 0 | for(k = 0; k < 8; k++) |
957 | 0 | { |
958 | 0 | e[k] = ee[k] + eo[k]; |
959 | 0 | e[k + 8] = ee[7 - k] - eo[7 - k]; |
960 | 0 | } |
961 | 0 | for(k = 0; k < 16; k++) |
962 | 0 | { |
963 | 0 | pi2_dst[k] = |
964 | 0 | CLIP_S16(((e[k] + o[k] + add) >> i4_shift)); |
965 | 0 | pi2_dst[k + 16] = |
966 | 0 | CLIP_S16(((e[15 - k] - o[15 - k] + add) >> i4_shift)); |
967 | 0 | } |
968 | 0 | } |
969 | 0 | pi2_src++; |
970 | 0 | pi2_dst += dst_strd; |
971 | 0 | zero_cols = zero_cols >> 1; |
972 | 0 | } |
973 | 0 | } |
974 | | |