Coverage Report

Created: 2023-09-25 06:49

/src/libhevc/common/ihevc_itrans.c
Line
Count
Source (jump to first uncovered line)
1
/******************************************************************************
2
*
3
* Copyright (C) 2012 Ittiam Systems Pvt Ltd, Bangalore
4
*
5
* Licensed under the Apache License, Version 2.0 (the "License");
6
* you may not use this file except in compliance with the License.
7
* You may obtain a copy of the License at:
8
*
9
* http://www.apache.org/licenses/LICENSE-2.0
10
*
11
* Unless required by applicable law or agreed to in writing, software
12
* distributed under the License is distributed on an "AS IS" BASIS,
13
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
* See the License for the specific language governing permissions and
15
* limitations under the License.
16
*
17
******************************************************************************/
18
/**
19
 *******************************************************************************
20
 * @file
21
 *  ihevc_itrans.c
22
 *
23
 * @brief
24
 *  Contains function definitions for single stage  inverse transform
25
 *
26
 * @author
27
 *  100470
28
 *
29
 * @par List of Functions:
30
 *  - ihevc_itrans_4x4_ttype1()
31
 *  - ihevc_itrans_4x4()
32
 *  - ihevc_itrans_8x8()
33
 *  - ihevc_itrans_16x16()
34
 *  - ihevc_itrans_32x32()
35
 *
36
 * @remarks
37
 *  None
38
 *
39
 *******************************************************************************
40
 */
41
#include <stdio.h>
42
#include <string.h>
43
#include "ihevc_typedefs.h"
44
#include "ihevc_macros.h"
45
#include "ihevc_platform_macros.h"
46
#include "ihevc_defs.h"
47
#include "ihevc_trans_tables.h"
48
#include "ihevc_func_selector.h"
49
#include "ihevc_trans_macros.h"
50
51
#define NON_OPTIMIZED 1
52
53
/**
54
 *******************************************************************************
55
 *
56
 * @brief
57
 *  This function performs Single stage  Inverse transform type 1 (DST) for
58
 * 4x4 input block
59
 *
60
 * @par Description:
61
 *  Performs single stage 4x4 inverse transform type 1  by utilizing the
62
 * symmetry of transformation matrix  and reducing number of multiplications
63
 * wherever  possible but keeping the number of operations
64
 * (addition,multiplication and shift)same
65
 *
66
 * @param[in] pi2_src
67
 *  Input 4x4 coefficients
68
 *
69
 * @param[out] pi2_dst
70
 *  Output 4x4 block
71
 *
72
 * @param[in] src_strd
73
 *  Input stride
74
 *
75
 * @param[in] dst_strd
76
 *  Output Stride
77
 *
78
 * @param[in] i4_shift
79
 *  Output shift
80
 *
81
 * @param[in] zero_cols
82
 *  Zero columns in pi2_src
83
 *
84
 * @returns  Void
85
 *
86
 * @remarks
87
 *  None
88
 *
89
 *******************************************************************************
90
 */
91
92
93
void ihevc_itrans_4x4_ttype1(WORD16 *pi2_src,
94
                             WORD16 *pi2_dst,
95
                             WORD32 src_strd,
96
                             WORD32 dst_strd,
97
                             WORD32 i4_shift,
98
                             WORD32 zero_cols)
99
0
{
100
0
    WORD32 i, c[4];
101
0
    WORD32 add;
102
103
0
    add = 1 << (i4_shift - 1);
104
105
0
    for(i = 0; i < TRANS_SIZE_4; i++)
106
0
    {
107
        /* Checking for Zero Cols */
108
0
        if((zero_cols & 1) == 1)
109
0
        {
110
0
            memset(pi2_dst, 0, TRANS_SIZE_4 * sizeof(WORD16));
111
0
        }
112
0
        else
113
0
        {
114
            // Intermediate Variables
115
0
            c[0] = pi2_src[0] + pi2_src[2 * src_strd];
116
0
            c[1] = pi2_src[2 * src_strd] + pi2_src[3 * src_strd];
117
0
            c[2] = pi2_src[0] - pi2_src[3 * src_strd];
118
0
            c[3] = 74 * pi2_src[src_strd];
119
120
0
            pi2_dst[0] =
121
0
                            CLIP_S16((29 * c[0] + 55 * c[1] + c[3] + add) >> i4_shift);
122
0
            pi2_dst[1] =
123
0
                            CLIP_S16((55 * c[2] - 29 * c[1] + c[3] + add) >> i4_shift);
124
0
            pi2_dst[2] =
125
0
                            CLIP_S16((74 * (pi2_src[0] - pi2_src[2 * src_strd] + pi2_src[3 * src_strd]) + add) >> i4_shift);
126
0
            pi2_dst[3] =
127
0
                            CLIP_S16((55 * c[0] + 29 * c[2] - c[3] + add) >> i4_shift);
128
0
        }
129
0
        pi2_src++;
130
0
        pi2_dst += dst_strd;
131
0
        zero_cols = zero_cols >> 1;
132
0
    }
133
0
}
134
135
136
/**
137
 *******************************************************************************
138
 *
139
 * @brief
140
 *  This function performs Single stage  Inverse transform for 4x4 input
141
 * block
142
 *
143
 * @par Description:
144
 *  Performs single stage 4x4 inverse transform by utilizing  the symmetry of
145
 * transformation matrix and reducing number  of multiplications wherever
146
 * possible but keeping the  number of operations(addition,multiplication and
147
 * shift)  same
148
 *
149
 * @param[in] pi2_src
150
 *  Input 4x4 coefficients
151
 *
152
 * @param[out] pi2_dst
153
 *  Output 4x4 block
154
 *
155
 * @param[in] src_strd
156
 *  Input stride
157
 *
158
 * @param[in] dst_strd
159
 *  Output Stride
160
 *
161
 * @param[in] i4_shift
162
 *  Output shift
163
 *
164
 * @param[in] zero_cols
165
 *  Zero columns in pi2_src
166
 *
167
 * @returns  Void
168
 *
169
 * @remarks
170
 *  None
171
 *
172
 *******************************************************************************
173
 */
174
175
#if NON_OPTIMIZED
176
void ihevc_itrans_4x4(WORD16 *pi2_src,
177
                      WORD16 *pi2_dst,
178
                      WORD32 src_strd,
179
                      WORD32 dst_strd,
180
                      WORD32 i4_shift,
181
                      WORD32 zero_cols)
182
0
{
183
0
    WORD32 j;
184
0
    WORD32 e[2], o[2];
185
0
    WORD32 add;
186
187
0
    add = 1 << (i4_shift - 1);
188
189
0
    for(j = 0; j < TRANS_SIZE_4; j++)
190
0
    {
191
        /* Checking for Zero Cols */
192
0
        if((zero_cols & 1) == 1)
193
0
        {
194
0
            memset(pi2_dst, 0, TRANS_SIZE_4 * sizeof(WORD16));
195
0
        }
196
0
        else
197
0
        {
198
199
            /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */
200
0
            o[0] = g_ai2_ihevc_trans_4[1][0] * pi2_src[src_strd]
201
0
                            + g_ai2_ihevc_trans_4[3][0] * pi2_src[3 * src_strd];
202
0
            o[1] = g_ai2_ihevc_trans_4[1][1] * pi2_src[src_strd]
203
0
                            + g_ai2_ihevc_trans_4[3][1] * pi2_src[3 * src_strd];
204
0
            e[0] = g_ai2_ihevc_trans_4[0][0] * pi2_src[0]
205
0
                            + g_ai2_ihevc_trans_4[2][0] * pi2_src[2 * src_strd];
206
0
            e[1] = g_ai2_ihevc_trans_4[0][1] * pi2_src[0]
207
0
                            + g_ai2_ihevc_trans_4[2][1] * pi2_src[2 * src_strd];
208
209
0
            pi2_dst[0] =
210
0
                            CLIP_S16(((e[0] + o[0] + add) >> i4_shift));
211
0
            pi2_dst[1] =
212
0
                            CLIP_S16(((e[1] + o[1] + add) >> i4_shift));
213
0
            pi2_dst[2] =
214
0
                            CLIP_S16(((e[1] - o[1] + add) >> i4_shift));
215
0
            pi2_dst[3] =
216
0
                            CLIP_S16(((e[0] - o[0] + add) >> i4_shift));
217
218
0
        }
219
0
        pi2_src++;
220
0
        pi2_dst += dst_strd;
221
0
        zero_cols = zero_cols >> 1;
222
0
    }
223
0
}
224
#else
225
void ihevc_itrans_4x4(WORD16 *pi2_src,
226
                      WORD16 *pi2_dst,
227
                      WORD32 src_strd,
228
                      WORD32 dst_strd,
229
                      WORD32 i4_shift,
230
                      WORD32 zero_cols)
231
{
232
    WORD32 j;
233
    WORD32 e[2], o[2];
234
    WORD32 add;
235
236
    add = 1 << (i4_shift - 1);
237
238
    /***************************************************************************/
239
    /* Transform Matrix 4x4                                                    */
240
    /*      0   1   2   3                                                      */
241
    /* 0 { 64, 64, 64, 64},                                                    */
242
    /* 1 { 83, 36,-36,-83},                                                    */
243
    /* 2 { 64,-64,-64, 64},                                                    */
244
    /* 3 { 36,-83, 83,-36}                                                     */
245
    /***************************************************************************/
246
247
    for(j = 0; j < TRANS_SIZE_4; j++)
248
    {
249
        WORD32 temp;
250
251
        /* Checking for Zero Cols */
252
        if((zero_cols & 1) == 1)
253
        {
254
            memset(pi2_dst, 0, TRANS_SIZE_4 * sizeof(WORD16));
255
        }
256
        else
257
        {
258
            /* Common operation in o[0] and o[1] */
259
            temp = (pi2_src[src_strd] + pi2_src[3 * src_strd]) * 36;
260
261
            o[0] = temp + 47 * pi2_src[src_strd];
262
            o[1] = temp - 119 * pi2_src[3 * src_strd];
263
            e[0] = (pi2_src[0] + pi2_src[2 * src_strd]) << 6;
264
            e[1] = (pi2_src[0] - pi2_src[2 * src_strd]) << 6;
265
266
            pi2_dst[0] =
267
                            CLIP_S16(((e[0] + o[0] + add) >> i4_shift));
268
            pi2_dst[1] =
269
                            CLIP_S16(((e[1] + o[1] + add) >> i4_shift));
270
            pi2_dst[2] =
271
                            CLIP_S16(((e[1] - o[1] + add) >> i4_shift));
272
            pi2_dst[3] =
273
                            CLIP_S16(((e[0] - o[0] + add) >> i4_shift));
274
        }
275
        pi2_src++;
276
        pi2_dst += dst_strd;
277
        zero_cols = zero_cols >> 1;
278
    }
279
}
280
#endif
281
282
/**
283
 *******************************************************************************
284
 *
285
 * @brief
286
 *  This function performs Single stage  Inverse transform for 8x8 input
287
 * block
288
 *
289
 * @par Description:
290
 *  Performs single stage 8x8 inverse transform by utilizing  the symmetry of
291
 * transformation matrix and reducing number  of multiplications wherever
292
 * possible but keeping the  number of operations(addition,multiplication and
293
 * shift)  same
294
 *
295
 * @param[in] pi2_src
296
 *  Input 8x8 coefficients
297
 *
298
 * @param[out] pi2_dst
299
 *  Output 8x8 block
300
 *
301
 * @param[in] src_strd
302
 *  Input stride
303
 *
304
 * @param[in] dst_strd
305
 *  Output Stride
306
 *
307
 * @param[in] i4_shift
308
 *  Output shift
309
 *
310
 * @param[in] zero_cols
311
 *  Zero columns in pi2_src
312
 *
313
 * @returns  Void
314
 *
315
 * @remarks
316
 *  None
317
 *
318
 *******************************************************************************
319
 */
320
321
#if NON_OPTIMIZED
322
void ihevc_itrans_8x8(WORD16 *pi2_src,
323
                      WORD16 *pi2_dst,
324
                      WORD32 src_strd,
325
                      WORD32 dst_strd,
326
                      WORD32 i4_shift,
327
                      WORD32 zero_cols)
328
0
{
329
0
    WORD32 j, k;
330
0
    WORD32 e[4], o[4];
331
0
    WORD32 ee[2], eo[2];
332
0
    WORD32 add;
333
334
0
    add = 1 << (i4_shift - 1);
335
336
0
    for(j = 0; j < TRANS_SIZE_8; j++)
337
0
    {
338
        /* Checking for Zero Cols */
339
0
        if((zero_cols & 1) == 1)
340
0
        {
341
0
            memset(pi2_dst, 0, TRANS_SIZE_8 * sizeof(WORD16));
342
0
        }
343
0
        else
344
0
        {
345
            /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */
346
0
            for(k = 0; k < 4; k++)
347
0
            {
348
0
                o[k] = g_ai2_ihevc_trans_8[1][k] * pi2_src[src_strd]
349
0
                                + g_ai2_ihevc_trans_8[3][k]
350
0
                                                * pi2_src[3 * src_strd]
351
0
                                + g_ai2_ihevc_trans_8[5][k]
352
0
                                                * pi2_src[5 * src_strd]
353
0
                                + g_ai2_ihevc_trans_8[7][k]
354
0
                                                * pi2_src[7 * src_strd];
355
0
            }
356
357
0
            eo[0] = g_ai2_ihevc_trans_8[2][0] * pi2_src[2 * src_strd]
358
0
                            + g_ai2_ihevc_trans_8[6][0] * pi2_src[6 * src_strd];
359
0
            eo[1] = g_ai2_ihevc_trans_8[2][1] * pi2_src[2 * src_strd]
360
0
                            + g_ai2_ihevc_trans_8[6][1] * pi2_src[6 * src_strd];
361
0
            ee[0] = g_ai2_ihevc_trans_8[0][0] * pi2_src[0]
362
0
                            + g_ai2_ihevc_trans_8[4][0] * pi2_src[4 * src_strd];
363
0
            ee[1] = g_ai2_ihevc_trans_8[0][1] * pi2_src[0]
364
0
                            + g_ai2_ihevc_trans_8[4][1] * pi2_src[4 * src_strd];
365
366
            /* Combining e and o terms at each hierarchy levels to calculate the final spatial domain vector */
367
0
            e[0] = ee[0] + eo[0];
368
0
            e[3] = ee[0] - eo[0];
369
0
            e[1] = ee[1] + eo[1];
370
0
            e[2] = ee[1] - eo[1];
371
0
            for(k = 0; k < 4; k++)
372
0
            {
373
0
                pi2_dst[k] =
374
0
                                CLIP_S16(((e[k] + o[k] + add) >> i4_shift));
375
0
                pi2_dst[k + 4] =
376
0
                                CLIP_S16(((e[3 - k] - o[3 - k] + add) >> i4_shift));
377
0
            }
378
0
        }
379
0
        pi2_src++;
380
0
        pi2_dst += dst_strd;
381
0
        zero_cols = zero_cols >> 1;
382
0
    }
383
0
}
384
385
#else
386
void ihevc_itrans_8x8(WORD16 *pi2_src,
387
                      WORD16 *pi2_dst,
388
                      WORD32 src_strd,
389
                      WORD32 dst_strd,
390
                      WORD32 i4_shift,
391
                      WORD32 zero_cols)
392
{
393
    /* Transform Matrix 8x8                          */
394
    /*              0    1    2   3   4   5   6   7  */
395
    /*     0 -      64   64   64  64  64  64  64  64 */
396
    /*     1 -      89   75   50  18 -18 -50 -75 -89 */
397
    /*     2 -      83   36  -36 -83 -83 -36  36  83 */
398
    /*     3 -      75  -18  -89 -50  50  89  18 -75 */
399
    /*     4 -      64  -64  -64  64  64 -64 -64  64 */
400
    /*     5 -      50  -89   18  75 -75 -18  89 -50 */
401
    /*     6 -      36  -83   83 -36 -36  83 -83  36 */
402
    /*     7 -      18  -50   75 -89  89 -75  50 -18 */
403
404
    /* 0th and 4th row will have no multiplications */
405
    /* 2nd and 6th row has only two coefff multiplies */
406
    /* 1st, 3rd, 5th and 7th rows have o mirror symmetry */
407
    WORD32 j, k;
408
    WORD32 temp1, temp2;
409
    WORD32 e[4], o[4];
410
    WORD32 ee[2], eo[2];
411
    WORD32 add;
412
413
    add = 1 << (i4_shift - 1);
414
415
    for(j = 0; j < TRANS_SIZE_8; j++)
416
    {
417
        /* Checking for Zero Cols */
418
        if((zero_cols & 1) == 1)
419
        {
420
            memset(pi2_dst, 0, TRANS_SIZE_8 * sizeof(WORD16));
421
        }
422
        else
423
        {
424
425
            /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */
426
            /*
427
             o[0] = 89 *pi2_src[8] +  75 *pi2_src[3*8] +  50 *pi2_src[5*8] +  18 *pi2_src[7*8];
428
             o[1] = 75 *pi2_src[8] + -18 *pi2_src[3*8] + -89 *pi2_src[5*8] + -50 *pi2_src[7*8];
429
             o[2] = 50 *pi2_src[8] + -89 *pi2_src[3*8] +  18 *pi2_src[5*8] +  75 *pi2_src[7*8];
430
             o[3] = 18 *pi2_src[8] + -50 *pi2_src[3*8] +  75 *pi2_src[5*8] + -89 *pi2_src[7*8];
431
             */
432
433
            /* Optimization: 4 mul + 2 add  ---> 3 mul + 3 add */
434
            /*
435
             temp1 = (pi2_src[8  ] + pi2_src[3*8]) * 75;
436
             temp2 = (pi2_src[5*8] + pi2_src[7*8]) * 50;
437
438
             o[0] = temp1 + 14 * pi2_src[8  ] + temp2 - 32 * pi2_src[7*8];
439
             o[1] = temp1 - 93 * pi2_src[3*8] - temp2 - 39 * pi2_src[5*8];
440
             */
441
442
            temp1 = (pi2_src[src_strd] + pi2_src[3 * src_strd]) * 75;
443
            temp2 = (pi2_src[5 * src_strd] + pi2_src[7 * src_strd]) * 50;
444
445
            o[0] = temp1 + 14 * pi2_src[src_strd] + temp2
446
                            - (pi2_src[7 * src_strd] << 5);
447
            o[1] = temp1 - 93 * pi2_src[3 * src_strd] - temp2
448
                            - 39 * pi2_src[5 * src_strd];
449
450
            /* Optimization: 4 mul + 2 add  ---> 3 mul + 3 add */
451
            /*
452
             temp1 = (pi2_src[8  ] - pi2_src[3*8]) * 50;
453
             temp2 = (pi2_src[5*8] + pi2_src[7*8]) * 75;
454
455
             o[2] = temp1 - 39 * pi2_src[3*8] + temp2 -  57 * pi2_src[5*8];
456
             o[3] = temp1 - 32 * pi2_src[8  ] + temp2 - 164 * pi2_src[7*8];
457
             */
458
459
            temp1 = (pi2_src[src_strd] - pi2_src[3 * src_strd]) * 50;
460
            temp2 = (pi2_src[5 * src_strd] + pi2_src[7 * src_strd]) * 75;
461
462
            o[2] = temp1 - 39 * pi2_src[3 * src_strd] + temp2
463
                            - 57 * pi2_src[5 * src_strd];
464
            o[3] = temp1 - (pi2_src[src_strd] << 5) + temp2
465
                            - 164 * pi2_src[7 * src_strd];
466
467
            /*
468
             eo[0] = 83 *pi2_src[ 2*8 ] +  36 *pi2_src[ 6*8 ];
469
             eo[1] = 36 *pi2_src[ 2*8 ] + -83 *pi2_src[ 6*8 ];
470
             ee[0] = 64 *pi2_src[ 0   ] +  64 *pi2_src[ 4*8 ];
471
             ee[1] = 64 *pi2_src[ 0   ] + -64 *pi2_src[ 4*8 ];
472
             */
473
474
            /* Optimization: 4 mul + 2 add  ---> 3 mul + 3 add */
475
            temp1 = (pi2_src[2 * src_strd] + pi2_src[6 * src_strd]) * 36;
476
            eo[0] = temp1 + 47 * pi2_src[2 * src_strd];
477
            eo[1] = temp1 - 119 * pi2_src[6 * src_strd];
478
479
            /* Optimization: 4 mul + 2 add  ---> 2 i4_shift + 2 add */
480
            ee[0] = (pi2_src[0] + pi2_src[4 * src_strd]) << 6;
481
            ee[1] = (pi2_src[0] - pi2_src[4 * src_strd]) << 6;
482
483
            e[0] = ee[0] + eo[0];
484
            e[3] = ee[0] - eo[0];
485
            e[1] = ee[1] + eo[1];
486
            e[2] = ee[1] - eo[1];
487
488
            for(k = 0; k < 4; k++)
489
            {
490
                pi2_dst[k] =
491
                                CLIP_S16(((e[k] + o[k] + add) >> i4_shift));
492
                pi2_dst[k + 4] =
493
                                CLIP_S16(((e[3 - k] - o[3 - k] + add) >> i4_shift));
494
            }
495
        }
496
        pi2_src++;
497
        pi2_dst += dst_strd;
498
        zero_cols = zero_cols >> 1;
499
    }
500
501
}
502
#endif
503
504
505
/**
506
 *******************************************************************************
507
 *
508
 * @brief
509
 *  This function performs Single stage  Inverse transform for 16x16 input
510
 * block
511
 *
512
 * @par Description:
513
 *  Performs single stage 16x16 inverse transform by  utilizing the symmetry
514
 * of transformation matrix  and reducing number of multiplications wherever
515
 * possible  but keeping the number of operations  (addition,multiplication
516
 * and shift) same
517
 *
518
 * @param[in] pi2_src
519
 *  Input 16x16 coefficients
520
 *
521
 * @param[out] pi2_dst
522
 *  Output 16x16 block
523
 *
524
 * @param[in] src_strd
525
 *  Input stride
526
 *
527
 * @param[in] dst_strd
528
 *  Output Stride
529
 *
530
 * @param[in] i4_shift
531
 *  Output shift
532
 *
533
 * @param[in] zero_cols
534
 *  Zero columns in pi2_src
535
 *
536
 * @returns  Void
537
 *
538
 * @remarks
539
 *  None
540
 *
541
 *******************************************************************************
542
 */
543
544
#if NON_OPTIMIZED
545
void ihevc_itrans_16x16(WORD16 *pi2_src,
546
                        WORD16 *pi2_dst,
547
                        WORD32 src_strd,
548
                        WORD32 dst_strd,
549
                        WORD32 i4_shift,
550
                        WORD32 zero_cols)
551
0
{
552
0
    WORD32 j, k;
553
0
    WORD32 e[8], o[8];
554
0
    WORD32 ee[4], eo[4];
555
0
    WORD32 eee[2], eeo[2];
556
0
    WORD32 add;
557
558
0
    add = 1 << (i4_shift - 1);
559
560
0
    for(j = 0; j < TRANS_SIZE_16; j++)
561
0
    {
562
        /* Checking for Zero Cols */
563
0
        if((zero_cols & 1) == 1)
564
0
        {
565
0
            memset(pi2_dst, 0, TRANS_SIZE_16 * sizeof(WORD16));
566
0
        }
567
0
        else
568
0
        {
569
            /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */
570
0
            for(k = 0; k < 8; k++)
571
0
            {
572
0
                o[k] = g_ai2_ihevc_trans_16[1][k] * pi2_src[src_strd]
573
0
                                + g_ai2_ihevc_trans_16[3][k]
574
0
                                                * pi2_src[3 * src_strd]
575
0
                                + g_ai2_ihevc_trans_16[5][k]
576
0
                                                * pi2_src[5 * src_strd]
577
0
                                + g_ai2_ihevc_trans_16[7][k]
578
0
                                                * pi2_src[7 * src_strd]
579
0
                                + g_ai2_ihevc_trans_16[9][k]
580
0
                                                * pi2_src[9 * src_strd]
581
0
                                + g_ai2_ihevc_trans_16[11][k]
582
0
                                                * pi2_src[11 * src_strd]
583
0
                                + g_ai2_ihevc_trans_16[13][k]
584
0
                                                * pi2_src[13 * src_strd]
585
0
                                + g_ai2_ihevc_trans_16[15][k]
586
0
                                                * pi2_src[15 * src_strd];
587
0
            }
588
0
            for(k = 0; k < 4; k++)
589
0
            {
590
0
                eo[k] = g_ai2_ihevc_trans_16[2][k] * pi2_src[2 * src_strd]
591
0
                                + g_ai2_ihevc_trans_16[6][k]
592
0
                                                * pi2_src[6 * src_strd]
593
0
                                + g_ai2_ihevc_trans_16[10][k]
594
0
                                                * pi2_src[10 * src_strd]
595
0
                                + g_ai2_ihevc_trans_16[14][k]
596
0
                                                * pi2_src[14 * src_strd];
597
0
            }
598
0
            eeo[0] = g_ai2_ihevc_trans_16[4][0] * pi2_src[4 * src_strd]
599
0
                            + g_ai2_ihevc_trans_16[12][0]
600
0
                                            * pi2_src[12 * src_strd];
601
0
            eee[0] =
602
0
                            g_ai2_ihevc_trans_16[0][0] * pi2_src[0]
603
0
                                            + g_ai2_ihevc_trans_16[8][0]
604
0
                                                            * pi2_src[8
605
0
                                                                            * src_strd];
606
0
            eeo[1] = g_ai2_ihevc_trans_16[4][1] * pi2_src[4 * src_strd]
607
0
                            + g_ai2_ihevc_trans_16[12][1]
608
0
                                            * pi2_src[12 * src_strd];
609
0
            eee[1] =
610
0
                            g_ai2_ihevc_trans_16[0][1] * pi2_src[0]
611
0
                                            + g_ai2_ihevc_trans_16[8][1]
612
0
                                                            * pi2_src[8
613
0
                                                                            * src_strd];
614
615
            /* Combining e and o terms at each hierarchy levels to calculate the final spatial domain vector */
616
0
            for(k = 0; k < 2; k++)
617
0
            {
618
0
                ee[k] = eee[k] + eeo[k];
619
0
                ee[k + 2] = eee[1 - k] - eeo[1 - k];
620
0
            }
621
0
            for(k = 0; k < 4; k++)
622
0
            {
623
0
                e[k] = ee[k] + eo[k];
624
0
                e[k + 4] = ee[3 - k] - eo[3 - k];
625
0
            }
626
0
            for(k = 0; k < 8; k++)
627
0
            {
628
0
                pi2_dst[k] =
629
0
                                CLIP_S16(((e[k] + o[k] + add) >> i4_shift));
630
0
                pi2_dst[k + 8] =
631
0
                                CLIP_S16(((e[7 - k] - o[7 - k] + add) >> i4_shift));
632
0
            }
633
0
        }
634
0
        pi2_src++;
635
0
        pi2_dst += dst_strd;
636
0
        zero_cols = zero_cols >> 1;
637
0
    }
638
0
}
639
#else
640
void ihevc_itrans_16x16(WORD16 *pi2_src,
641
                        WORD16 *pi2_dst,
642
                        WORD32 src_strd,
643
                        WORD32 dst_strd,
644
                        WORD32 i4_shift,
645
                        WORD32 zero_cols)
646
{
647
    WORD32 j, k;
648
    WORD32 e[8], o[8];
649
    WORD32 ee[4], eo[4];
650
    WORD32 eee[2], eeo[2];
651
    WORD32 add;
652
    WORD32 temp1, temp2;
653
654
    add = 1 << (i4_shift - 1);
655
    /***************************************************************************/
656
    /* Transform Matrix 16x16                                                  */
657
    /*       0   1   2   3   4   5   6   7   8   9  10  11  12  13  14  15     */
658
    /* 0  { 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64},   */
659
    /* 1  { 90, 87, 80, 70, 57, 43, 25,  9, -9,-25,-43,-57,-70,-80,-87,-90},   */
660
    /* 2  { 89, 75, 50, 18,-18,-50,-75,-89,-89,-75,-50,-18, 18, 50, 75, 89},   */
661
    /* 3  { 87, 57,  9,-43,-80,-90,-70,-25, 25, 70, 90, 80, 43, -9,-57,-87},   */
662
    /* 4  { 83, 36,-36,-83,-83,-36, 36, 83, 83, 36,-36,-83,-83,-36, 36, 83},   */
663
    /* 5  { 80,  9,-70,-87,-25, 57, 90, 43,-43,-90,-57, 25, 87, 70, -9,-80},   */
664
    /* 6  { 75,-18,-89,-50, 50, 89, 18,-75,-75, 18, 89, 50,-50,-89,-18, 75},   */
665
    /* 7  { 70,-43,-87,  9, 90, 25,-80,-57, 57, 80,-25,-90, -9, 87, 43,-70},   */
666
    /* 8  { 64,-64,-64, 64, 64,-64,-64, 64, 64,-64,-64, 64, 64,-64,-64, 64},   */
667
    /* 9  { 57,-80,-25, 90, -9,-87, 43, 70,-70,-43, 87,  9,-90, 25, 80,-57},   */
668
    /* 10 { 50,-89, 18, 75,-75,-18, 89,-50,-50, 89,-18,-75, 75, 18,-89, 50},   */
669
    /* 11 { 43,-90, 57, 25,-87, 70,  9,-80, 80, -9,-70, 87,-25,-57, 90,-43},   */
670
    /* 12 { 36,-83, 83,-36,-36, 83,-83, 36, 36,-83, 83,-36,-36, 83,-83, 36},   */
671
    /* 13 { 25,-70, 90,-80, 43,  9,-57, 87,-87, 57, -9,-43, 80,-90, 70,-25},   */
672
    /* 14 { 18,-50, 75,-89, 89,-75, 50,-18,-18, 50,-75, 89,-89, 75,-50, 18},   */
673
    /* 15 {  9,-25, 43,-57, 70,-80, 87,-90, 90,-87, 80,-70, 57,-43, 25, -9}    */
674
    /***************************************************************************/
675
676
    for(j = 0; j < TRANS_SIZE_16; j++)
677
    {
678
        /* Checking for Zero Cols */
679
        if((zero_cols & 1) == 1)
680
        {
681
            memset(pi2_dst, 0, TRANS_SIZE_16 * sizeof(WORD16));
682
        }
683
        else
684
        {
685
            /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */
686
            {
687
                /*
688
                 o[k] = g_ai2_ihevc_trans_16[ 1][k]*pi2_src[ src_strd   ] + g_ai2_ihevc_trans_16[ 3][k]*pi2_src[ 3*src_strd   ] + g_ai2_ihevc_trans_16[ 5][k]*pi2_src[ 5*src_strd   ] + g_ai2_ihevc_trans_16[ 7][k]*pi2_src[ 7*src_strd   ] +
689
                 g_ai2_ihevc_trans_16[ 9][k]*pi2_src[ 9*src_strd   ] + g_ai2_ihevc_trans_16[11][k]*pi2_src[11*src_strd   ] + g_ai2_ihevc_trans_16[13][k]*pi2_src[13*src_strd   ] + g_ai2_ihevc_trans_16[15][k]*pi2_src[15*src_strd   ];
690
                 */
691
692
                o[0] = 90 * pi2_src[src_strd] + 87 * pi2_src[3 * src_strd]
693
                                + 80 * pi2_src[5 * src_strd]
694
                                + 70 * pi2_src[7 * src_strd]
695
                                + 57 * pi2_src[9 * src_strd]
696
                                + 43 * pi2_src[11 * src_strd]
697
                                + 25 * pi2_src[13 * src_strd]
698
                                + 9 * pi2_src[15 * src_strd];
699
700
                o[1] = 87 * pi2_src[src_strd] + 57 * pi2_src[3 * src_strd]
701
                                + 9 * pi2_src[5 * src_strd]
702
                                + -43 * pi2_src[7 * src_strd]
703
                                + -80 * pi2_src[9 * src_strd]
704
                                + -90 * pi2_src[11 * src_strd]
705
                                + -70 * pi2_src[13 * src_strd]
706
                                + -25 * pi2_src[15 * src_strd];
707
708
                o[2] = 80 * pi2_src[src_strd] + 9 * pi2_src[3 * src_strd]
709
                                + -70 * pi2_src[5 * src_strd]
710
                                + -87 * pi2_src[7 * src_strd]
711
                                + -25 * pi2_src[9 * src_strd]
712
                                + 57 * pi2_src[11 * src_strd]
713
                                + 90 * pi2_src[13 * src_strd]
714
                                + 43 * pi2_src[15 * src_strd];
715
716
                o[3] = 70 * pi2_src[src_strd] + -43 * pi2_src[3 * src_strd]
717
                                + -87 * pi2_src[5 * src_strd]
718
                                + 9 * pi2_src[7 * src_strd]
719
                                + 90 * pi2_src[9 * src_strd]
720
                                + 25 * pi2_src[11 * src_strd]
721
                                + -80 * pi2_src[13 * src_strd]
722
                                + -57 * pi2_src[15 * src_strd];
723
724
                o[4] = 57 * pi2_src[src_strd] + -80 * pi2_src[3 * src_strd]
725
                                + -25 * pi2_src[5 * src_strd]
726
                                + 90 * pi2_src[7 * src_strd]
727
                                + -9 * pi2_src[9 * src_strd]
728
                                + -87 * pi2_src[11 * src_strd]
729
                                + 43 * pi2_src[13 * src_strd]
730
                                + 70 * pi2_src[15 * src_strd];
731
732
                o[5] = 43 * pi2_src[src_strd] + -90 * pi2_src[3 * src_strd]
733
                                + 57 * pi2_src[5 * src_strd]
734
                                + 25 * pi2_src[7 * src_strd]
735
                                + -87 * pi2_src[9 * src_strd]
736
                                + 70 * pi2_src[11 * src_strd]
737
                                + 9 * pi2_src[13 * src_strd]
738
                                + -80 * pi2_src[15 * src_strd];
739
740
                o[6] = 25 * pi2_src[src_strd] + -70 * pi2_src[3 * src_strd]
741
                                + 90 * pi2_src[5 * src_strd]
742
                                + -80 * pi2_src[7 * src_strd]
743
                                + 43 * pi2_src[9 * src_strd]
744
                                + 9 * pi2_src[11 * src_strd]
745
                                + -57 * pi2_src[13 * src_strd]
746
                                + 87 * pi2_src[15 * src_strd];
747
748
                o[7] = 9 * pi2_src[src_strd] + -25 * pi2_src[3 * src_strd]
749
                                + 43 * pi2_src[5 * src_strd]
750
                                + -57 * pi2_src[7 * src_strd]
751
                                + 70 * pi2_src[9 * src_strd]
752
                                + -80 * pi2_src[11 * src_strd]
753
                                + 87 * pi2_src[13 * src_strd]
754
                                + -90 * pi2_src[15 * src_strd];
755
            }
756
            {
757
                temp1 = (pi2_src[2 * src_strd] + pi2_src[6 * src_strd]) * 75;
758
                temp2 = (pi2_src[10 * src_strd] + pi2_src[14 * src_strd]) * 50;
759
                eo[0] = temp1 + 14 * pi2_src[2 * src_strd] + temp2
760
                                - (pi2_src[14 * src_strd] << 5);
761
                eo[1] = temp1 - 93 * pi2_src[6 * src_strd] - temp2
762
                                - 39 * pi2_src[10 * src_strd];
763
764
                temp1 = (pi2_src[2 * src_strd] - pi2_src[6 * src_strd]) * 50;
765
                temp2 = (pi2_src[10 * src_strd] + pi2_src[14 * src_strd]) * 75;
766
                eo[2] = temp1 - 39 * pi2_src[6 * src_strd] + temp2
767
                                - 57 * pi2_src[10 * src_strd];
768
                eo[3] = temp1 - (pi2_src[2 * src_strd] << 5) + temp2
769
                                - 164 * pi2_src[14 * src_strd];
770
            }
771
772
            temp1 = (pi2_src[4 * src_strd] + pi2_src[12 * src_strd]) * 36;
773
            eeo[0] = temp1 + 47 * pi2_src[4 * src_strd];
774
            eeo[1] = temp1 - 119 * pi2_src[12 * src_strd];
775
776
            eee[0] = (pi2_src[0] + pi2_src[8 * src_strd]) << 6;
777
            eee[1] = (pi2_src[0] - pi2_src[8 * src_strd]) << 6;
778
779
            /* Combining e and o terms at each hierarchy levels to calculate the final spatial domain vector */
780
            for(k = 0; k < 2; k++)
781
            {
782
                ee[k] = eee[k] + eeo[k];
783
                ee[k + 2] = eee[1 - k] - eeo[1 - k];
784
            }
785
            for(k = 0; k < 4; k++)
786
            {
787
                e[k] = ee[k] + eo[k];
788
                e[k + 4] = ee[3 - k] - eo[3 - k];
789
            }
790
            for(k = 0; k < 8; k++)
791
            {
792
                pi2_dst[k] =
793
                                CLIP_S16(((e[k] + o[k] + add) >> i4_shift));
794
                pi2_dst[k + 8] =
795
                                CLIP_S16(((e[7 - k] - o[7 - k] + add) >> i4_shift));
796
            }
797
        }
798
        pi2_src++;
799
        pi2_dst += dst_strd;
800
        zero_cols = zero_cols >> 1;
801
    }
802
}
803
#endif
804
805
/**
806
 *******************************************************************************
807
 *
808
 * @brief
809
 *  This function performs Single stage  Inverse transform for 32x32 input
810
 * block
811
 *
812
 * @par Description:
813
 *  Performs single stage 32x32 inverse transform by  utilizing the symmetry
814
 * of transformation matrix and  reducing number of multiplications wherever
815
 * possible  but keeping the number of operations  (addition,multiplication
816
 * and shift) same
817
 *
818
 * @param[in] pi2_src
819
 *  Input 32x32 coefficients
820
 *
821
 * @param[out] pi2_dst
822
 *  Output 32x32 block
823
 *
824
 * @param[in] src_strd
825
 *  Input stride
826
 *
827
 * @param[in] dst_strd
828
 *  Output Stride
829
 *
830
 * @param[in] i4_shift
831
 *  Output shift
832
 *
833
 * @param[in] zero_cols
834
 *  Zero columns in pi2_src
835
 *
836
 * @returns  Void
837
 *
838
 * @remarks
839
 *  None
840
 *
841
 *******************************************************************************
842
 */
843
844
845
void ihevc_itrans_32x32(WORD16 *pi2_src,
846
                        WORD16 *pi2_dst,
847
                        WORD32 src_strd,
848
                        WORD32 dst_strd,
849
                        WORD32 i4_shift,
850
                        WORD32 zero_cols)
851
0
{
852
0
    WORD32 j, k;
853
0
    WORD32 e[16], o[16];
854
0
    WORD32 ee[8], eo[8];
855
0
    WORD32 eee[4], eeo[4];
856
0
    WORD32 eeee[2], eeeo[2];
857
0
    WORD32 add;
858
859
0
    add = 1 << (i4_shift - 1);
860
861
0
    for(j = 0; j < TRANS_SIZE_32; j++)
862
0
    {
863
        /* Checking for Zero Cols */
864
0
        if((zero_cols & 1) == 1)
865
0
        {
866
0
            memset(pi2_dst, 0, TRANS_SIZE_32 * sizeof(WORD16));
867
0
        }
868
0
        else
869
0
        {
870
            /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */
871
0
            for(k = 0; k < 16; k++)
872
0
            {
873
0
                o[k] = g_ai2_ihevc_trans_32[1][k] * pi2_src[src_strd]
874
0
                                + g_ai2_ihevc_trans_32[3][k]
875
0
                                                * pi2_src[3 * src_strd]
876
0
                                + g_ai2_ihevc_trans_32[5][k]
877
0
                                                * pi2_src[5 * src_strd]
878
0
                                + g_ai2_ihevc_trans_32[7][k]
879
0
                                                * pi2_src[7 * src_strd]
880
0
                                + g_ai2_ihevc_trans_32[9][k]
881
0
                                                * pi2_src[9 * src_strd]
882
0
                                + g_ai2_ihevc_trans_32[11][k]
883
0
                                                * pi2_src[11 * src_strd]
884
0
                                + g_ai2_ihevc_trans_32[13][k]
885
0
                                                * pi2_src[13 * src_strd]
886
0
                                + g_ai2_ihevc_trans_32[15][k]
887
0
                                                * pi2_src[15 * src_strd]
888
0
                                + g_ai2_ihevc_trans_32[17][k]
889
0
                                                * pi2_src[17 * src_strd]
890
0
                                + g_ai2_ihevc_trans_32[19][k]
891
0
                                                * pi2_src[19 * src_strd]
892
0
                                + g_ai2_ihevc_trans_32[21][k]
893
0
                                                * pi2_src[21 * src_strd]
894
0
                                + g_ai2_ihevc_trans_32[23][k]
895
0
                                                * pi2_src[23 * src_strd]
896
0
                                + g_ai2_ihevc_trans_32[25][k]
897
0
                                                * pi2_src[25 * src_strd]
898
0
                                + g_ai2_ihevc_trans_32[27][k]
899
0
                                                * pi2_src[27 * src_strd]
900
0
                                + g_ai2_ihevc_trans_32[29][k]
901
0
                                                * pi2_src[29 * src_strd]
902
0
                                + g_ai2_ihevc_trans_32[31][k]
903
0
                                                * pi2_src[31 * src_strd];
904
0
            }
905
0
            for(k = 0; k < 8; k++)
906
0
            {
907
0
                eo[k] = g_ai2_ihevc_trans_32[2][k] * pi2_src[2 * src_strd]
908
0
                                + g_ai2_ihevc_trans_32[6][k]
909
0
                                                * pi2_src[6 * src_strd]
910
0
                                + g_ai2_ihevc_trans_32[10][k]
911
0
                                                * pi2_src[10 * src_strd]
912
0
                                + g_ai2_ihevc_trans_32[14][k]
913
0
                                                * pi2_src[14 * src_strd]
914
0
                                + g_ai2_ihevc_trans_32[18][k]
915
0
                                                * pi2_src[18 * src_strd]
916
0
                                + g_ai2_ihevc_trans_32[22][k]
917
0
                                                * pi2_src[22 * src_strd]
918
0
                                + g_ai2_ihevc_trans_32[26][k]
919
0
                                                * pi2_src[26 * src_strd]
920
0
                                + g_ai2_ihevc_trans_32[30][k]
921
0
                                                * pi2_src[30 * src_strd];
922
0
            }
923
0
            for(k = 0; k < 4; k++)
924
0
            {
925
0
                eeo[k] = g_ai2_ihevc_trans_32[4][k] * pi2_src[4 * src_strd]
926
0
                                + g_ai2_ihevc_trans_32[12][k]
927
0
                                                * pi2_src[12 * src_strd]
928
0
                                + g_ai2_ihevc_trans_32[20][k]
929
0
                                                * pi2_src[20 * src_strd]
930
0
                                + g_ai2_ihevc_trans_32[28][k]
931
0
                                                * pi2_src[28 * src_strd];
932
0
            }
933
0
            eeeo[0] = g_ai2_ihevc_trans_32[8][0] * pi2_src[8 * src_strd]
934
0
                            + g_ai2_ihevc_trans_32[24][0]
935
0
                                            * pi2_src[24 * src_strd];
936
0
            eeeo[1] = g_ai2_ihevc_trans_32[8][1] * pi2_src[8 * src_strd]
937
0
                            + g_ai2_ihevc_trans_32[24][1]
938
0
                                            * pi2_src[24 * src_strd];
939
0
            eeee[0] = g_ai2_ihevc_trans_32[0][0] * pi2_src[0]
940
0
                            + g_ai2_ihevc_trans_32[16][0]
941
0
                                            * pi2_src[16 * src_strd];
942
0
            eeee[1] = g_ai2_ihevc_trans_32[0][1] * pi2_src[0]
943
0
                            + g_ai2_ihevc_trans_32[16][1]
944
0
                                            * pi2_src[16 * src_strd];
945
946
            /* Combining e and o terms at each hierarchy levels to calculate the final spatial domain vector */
947
0
            eee[0] = eeee[0] + eeeo[0];
948
0
            eee[3] = eeee[0] - eeeo[0];
949
0
            eee[1] = eeee[1] + eeeo[1];
950
0
            eee[2] = eeee[1] - eeeo[1];
951
0
            for(k = 0; k < 4; k++)
952
0
            {
953
0
                ee[k] = eee[k] + eeo[k];
954
0
                ee[k + 4] = eee[3 - k] - eeo[3 - k];
955
0
            }
956
0
            for(k = 0; k < 8; k++)
957
0
            {
958
0
                e[k] = ee[k] + eo[k];
959
0
                e[k + 8] = ee[7 - k] - eo[7 - k];
960
0
            }
961
0
            for(k = 0; k < 16; k++)
962
0
            {
963
0
                pi2_dst[k] =
964
0
                                CLIP_S16(((e[k] + o[k] + add) >> i4_shift));
965
0
                pi2_dst[k + 16] =
966
0
                                CLIP_S16(((e[15 - k] - o[15 - k] + add) >> i4_shift));
967
0
            }
968
0
        }
969
0
        pi2_src++;
970
0
        pi2_dst += dst_strd;
971
0
        zero_cols = zero_cols >> 1;
972
0
    }
973
0
}
974