Coverage Report

Created: 2026-03-07 06:28

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/libhevc/common/ihevc_chroma_itrans_recon_32x32.c
Line
Count
Source
1
/******************************************************************************
2
*
3
* Copyright (C) 2025 Ittiam Systems Pvt Ltd, Bangalore
4
*
5
* Licensed under the Apache License, Version 2.0 (the "License");
6
* you may not use this file except in compliance with the License.
7
* You may obtain a copy of the License at:
8
*
9
* http://www.apache.org/licenses/LICENSE-2.0
10
*
11
* Unless required by applicable law or agreed to in writing, software
12
* distributed under the License is distributed on an "AS IS" BASIS,
13
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
* See the License for the specific language governing permissions and
15
* limitations under the License.
16
*
17
******************************************************************************/
18
/**
19
 *******************************************************************************
20
 * @file
21
 *  ihevc_chroma_itrans_recon_32x32.c
22
 *
23
 * @brief
24
 *  Contains function definitions for 32x32 inverse transform  and reconstruction
25
 * of chroma interleaved data.
26
 *
27
 * @author
28
 *  100927
29
 *
30
 * @par List of Functions:
31
 *  - ihevc_chroma_itrans_recon_32x32()
32
 *
33
 * @remarks
34
 *  None
35
 *
36
 *******************************************************************************
37
 */
38
39
#include <stdio.h>
40
#include <string.h>
41
#include "ihevc_typedefs.h"
42
#include "ihevc_macros.h"
43
#include "ihevc_platform_macros.h"
44
#include "ihevc_defs.h"
45
#include "ihevc_trans_tables.h"
46
#include "ihevc_chroma_itrans_recon.h"
47
#include "ihevc_func_selector.h"
48
#include "ihevc_trans_macros.h"
49
50
/* All the functions work one component(U or V) of interleaved data depending upon pointers passed to it */
51
/* Data visualization */
52
/* U V U V U V U V */
53
/* U V U V U V U V */
54
/* U V U V U V U V */
55
/* U V U V U V U V */
56
/* If the pointer points to first byte of above stream (U) , functions will operate on U component */
57
/* If the pointer points to second byte of above stream (V) , functions will operate on V component */
58
59
60
/**
61
 *******************************************************************************
62
 *
63
 * @brief
64
 *  This function performs Inverse transform  and reconstruction for 32x32
65
 * input block
66
 *
67
 * @par Description:
68
 *  Performs inverse transform and adds the prediction  data and clips output
69
 * to 8 bit
70
 *
71
 * @param[in] pi2_src
72
 *  Input 32x32 coefficients
73
 *
74
 * @param[in] pi2_tmp
75
 *  Temporary 32x32 buffer for storing inverse transform
76
 *  1st stage output
77
 *
78
 * @param[in] pu1_pred
79
 *  Prediction 32x32 block
80
 *
81
 * @param[out] pu1_dst
82
 *  Output 32x32 block
83
 *
84
 * @param[in] src_strd
85
 *  Input stride
86
 *
87
 * @param[in] pred_strd
88
 *  Prediction stride
89
 *
90
 * @param[in] dst_strd
91
 *  Output Stride
92
 *
93
 * @param[in] shift
94
 *  Output shift
95
 *
96
 * @param[in] zero_cols
97
 *  Zero columns in pi2_src
98
 *
99
 * @returns  Void
100
 *
101
 * @remarks
102
 *  None
103
 *
104
 *******************************************************************************
105
 */
106
107
108
void ihevc_chroma_itrans_recon_32x32(WORD16 *pi2_src,
109
                                     WORD16 *pi2_tmp,
110
                                     UWORD8 *pu1_pred,
111
                                     UWORD8 *pu1_dst,
112
                                     WORD32 src_strd,
113
                                     WORD32 pred_strd,
114
                                     WORD32 dst_strd,
115
                                     WORD32 zero_cols,
116
                                     WORD32 zero_rows)
117
0
{
118
0
    WORD32 j, k;
119
0
    WORD32 e[16], o[16];
120
0
    WORD32 ee[8], eo[8];
121
0
    WORD32 eee[4], eeo[4];
122
0
    WORD32 eeee[2], eeeo[2];
123
0
    WORD32 add;
124
0
    WORD32 shift;
125
0
    WORD16 *pi2_tmp_orig;
126
0
    WORD32 trans_size;
127
0
    WORD32 row_limit_2nd_stage, zero_rows_2nd_stage = zero_cols;
128
129
0
    trans_size = TRANS_SIZE_32;
130
0
    pi2_tmp_orig = pi2_tmp;
131
132
0
    if((zero_cols & 0xFFFFFFF0) == 0xFFFFFFF0)
133
0
        row_limit_2nd_stage = 4;
134
0
    else if((zero_cols & 0xFFFFFF00) == 0xFFFFFF00)
135
0
        row_limit_2nd_stage = 8;
136
0
    else
137
0
        row_limit_2nd_stage = TRANS_SIZE_32;
138
139
0
    if((zero_rows & 0xFFFFFFF0) == 0xFFFFFFF0)  /* First 4 rows of input are non-zero */
140
0
    {
141
        /************************************************************************************************/
142
        /**********************************START - IT_RECON_32x32****************************************/
143
        /************************************************************************************************/
144
        /* Inverse Transform 1st stage */
145
0
        shift = IT_SHIFT_STAGE_1;
146
0
        add = 1 << (shift - 1);
147
148
0
        for(j = 0; j < row_limit_2nd_stage; j++)
149
0
        {
150
            /* Checking for Zero Cols */
151
0
            if((zero_cols & 1) == 1)
152
0
            {
153
0
                memset(pi2_tmp, 0, trans_size * sizeof(WORD16));
154
0
            }
155
0
            else
156
0
            {
157
                /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */
158
0
                for(k = 0; k < 16; k++)
159
0
                {
160
0
                    o[k] = g_ai2_ihevc_trans_32[1][k] * pi2_src[src_strd]
161
0
                                    + g_ai2_ihevc_trans_32[3][k]
162
0
                                                    * pi2_src[3 * src_strd];
163
0
                }
164
0
                for(k = 0; k < 8; k++)
165
0
                {
166
0
                    eo[k] = g_ai2_ihevc_trans_32[2][k] * pi2_src[2 * src_strd];
167
0
                }
168
//                for(k = 0; k < 4; k++)
169
0
                {
170
0
                    eeo[0] = 0;
171
0
                    eeo[1] = 0;
172
0
                    eeo[2] = 0;
173
0
                    eeo[3] = 0;
174
0
                }
175
0
                eeeo[0] = 0;
176
0
                eeeo[1] = 0;
177
0
                eeee[0] = g_ai2_ihevc_trans_32[0][0] * pi2_src[0];
178
0
                eeee[1] = g_ai2_ihevc_trans_32[0][1] * pi2_src[0];
179
180
                /* Combining e and o terms at each hierarchy levels to calculate the final spatial domain vector */
181
0
                eee[0] = eeee[0] + eeeo[0];
182
0
                eee[3] = eeee[0] - eeeo[0];
183
0
                eee[1] = eeee[1] + eeeo[1];
184
0
                eee[2] = eeee[1] - eeeo[1];
185
0
                for(k = 0; k < 4; k++)
186
0
                {
187
0
                    ee[k] = eee[k] + eeo[k];
188
0
                    ee[k + 4] = eee[3 - k] - eeo[3 - k];
189
0
                }
190
0
                for(k = 0; k < 8; k++)
191
0
                {
192
0
                    e[k] = ee[k] + eo[k];
193
0
                    e[k + 8] = ee[7 - k] - eo[7 - k];
194
0
                }
195
0
                for(k = 0; k < 16; k++)
196
0
                {
197
0
                    pi2_tmp[k] =
198
0
                                    CLIP_S16(((e[k] + o[k] + add) >> shift));
199
0
                    pi2_tmp[k + 16] =
200
0
                                    CLIP_S16(((e[15 - k] - o[15 - k] + add) >> shift));
201
0
                }
202
0
            }
203
0
            pi2_src++;
204
0
            pi2_tmp += trans_size;
205
0
            zero_cols = zero_cols >> 1;
206
0
        }
207
208
0
        pi2_tmp = pi2_tmp_orig;
209
210
        /* Inverse Transform 2nd stage */
211
0
        shift = IT_SHIFT_STAGE_2;
212
0
        add = 1 << (shift - 1);
213
0
        if((zero_rows_2nd_stage & 0xFFFFFFF0) == 0xFFFFFFF0) /* First 4 rows of output of 1st stage are non-zero */
214
0
        {
215
0
            for(j = 0; j < trans_size; j++)
216
0
            {
217
                /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */
218
0
                for(k = 0; k < 16; k++)
219
0
                {
220
0
                    o[k] = g_ai2_ihevc_trans_32[1][k] * pi2_tmp[trans_size]
221
0
                                    + g_ai2_ihevc_trans_32[3][k]
222
0
                                                    * pi2_tmp[3 * trans_size];
223
0
                }
224
0
                for(k = 0; k < 8; k++)
225
0
                {
226
0
                    eo[k] = g_ai2_ihevc_trans_32[2][k] * pi2_tmp[2 * trans_size];
227
0
                }
228
//                for(k = 0; k < 4; k++)
229
0
                {
230
0
                    eeo[0] = 0;
231
0
                    eeo[1] = 0;
232
0
                    eeo[2] = 0;
233
0
                    eeo[3] = 0;
234
0
                }
235
0
                eeeo[0] = 0;
236
0
                eeeo[1] = 0;
237
0
                eeee[0] = g_ai2_ihevc_trans_32[0][0] * pi2_tmp[0];
238
0
                eeee[1] = g_ai2_ihevc_trans_32[0][1] * pi2_tmp[0];
239
240
                /* Combining e and o terms at each hierarchy levels to calculate the final spatial domain vector */
241
0
                eee[0] = eeee[0] + eeeo[0];
242
0
                eee[3] = eeee[0] - eeeo[0];
243
0
                eee[1] = eeee[1] + eeeo[1];
244
0
                eee[2] = eeee[1] - eeeo[1];
245
0
                for(k = 0; k < 4; k++)
246
0
                {
247
0
                    ee[k] = eee[k] + eeo[k];
248
0
                    ee[k + 4] = eee[3 - k] - eeo[3 - k];
249
0
                }
250
0
                for(k = 0; k < 8; k++)
251
0
                {
252
0
                    e[k] = ee[k] + eo[k];
253
0
                    e[k + 8] = ee[7 - k] - eo[7 - k];
254
0
                }
255
0
                for(k = 0; k < 16; k++)
256
0
                {
257
0
                    WORD32 itrans_out;
258
0
                    itrans_out =
259
0
                                    CLIP_S16(((e[k] + o[k] + add) >> shift));
260
0
                    pu1_dst[k * 2] = CLIP_U8((itrans_out + pu1_pred[k * 2]));
261
0
                    itrans_out =
262
0
                                    CLIP_S16(((e[15 - k] - o[15 - k] + add) >> shift));
263
0
                    pu1_dst[(k + 16) * 2] = CLIP_U8((itrans_out + pu1_pred[(k + 16) * 2]));
264
0
                }
265
0
                pi2_tmp++;
266
0
                pu1_pred += pred_strd;
267
0
                pu1_dst += dst_strd;
268
0
            }
269
0
        }
270
0
        else if((zero_rows_2nd_stage & 0xFFFFFF00) == 0xFFFFFF00) /* First 8 rows of output of 1st stage are non-zero */
271
0
        {
272
0
            for(j = 0; j < trans_size; j++)
273
0
            {
274
                /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */
275
0
                for(k = 0; k < 16; k++)
276
0
                {
277
0
                    o[k] = g_ai2_ihevc_trans_32[1][k] * pi2_tmp[trans_size]
278
0
                                    + g_ai2_ihevc_trans_32[3][k]
279
0
                                                    * pi2_tmp[3 * trans_size]
280
0
                                    + g_ai2_ihevc_trans_32[5][k]
281
0
                                                    * pi2_tmp[5 * trans_size]
282
0
                                    + g_ai2_ihevc_trans_32[7][k]
283
0
                                                    * pi2_tmp[7 * trans_size];
284
0
                }
285
0
                for(k = 0; k < 8; k++)
286
0
                {
287
0
                    eo[k] = g_ai2_ihevc_trans_32[2][k] * pi2_tmp[2 * trans_size]
288
0
                                    + g_ai2_ihevc_trans_32[6][k]
289
0
                                                    * pi2_tmp[6 * trans_size];
290
0
                }
291
0
                for(k = 0; k < 4; k++)
292
0
                {
293
0
                    eeo[k] = g_ai2_ihevc_trans_32[4][k] * pi2_tmp[4 * trans_size];
294
0
                }
295
0
                eeeo[0] = 0;
296
0
                eeeo[1] = 0;
297
0
                eeee[0] = g_ai2_ihevc_trans_32[0][0] * pi2_tmp[0];
298
0
                eeee[1] = g_ai2_ihevc_trans_32[0][1] * pi2_tmp[0];
299
300
                /* Combining e and o terms at each hierarchy levels to calculate the final spatial domain vector */
301
0
                eee[0] = eeee[0] + eeeo[0];
302
0
                eee[3] = eeee[0] - eeeo[0];
303
0
                eee[1] = eeee[1] + eeeo[1];
304
0
                eee[2] = eeee[1] - eeeo[1];
305
0
                for(k = 0; k < 4; k++)
306
0
                {
307
0
                    ee[k] = eee[k] + eeo[k];
308
0
                    ee[k + 4] = eee[3 - k] - eeo[3 - k];
309
0
                }
310
0
                for(k = 0; k < 8; k++)
311
0
                {
312
0
                    e[k] = ee[k] + eo[k];
313
0
                    e[k + 8] = ee[7 - k] - eo[7 - k];
314
0
                }
315
0
                for(k = 0; k < 16; k++)
316
0
                {
317
0
                    WORD32 itrans_out;
318
0
                    itrans_out =
319
0
                                    CLIP_S16(((e[k] + o[k] + add) >> shift));
320
0
                    pu1_dst[k * 2] = CLIP_U8((itrans_out + pu1_pred[k * 2]));
321
0
                    itrans_out =
322
0
                                    CLIP_S16(((e[15 - k] - o[15 - k] + add) >> shift));
323
0
                    pu1_dst[(k + 16) * 2] = CLIP_U8((itrans_out + pu1_pred[(k + 16) * 2]));
324
0
                }
325
0
                pi2_tmp++;
326
0
                pu1_pred += pred_strd;
327
0
                pu1_dst += dst_strd;
328
0
            }
329
0
        }
330
0
        else /* All rows of output of 1st stage are non-zero */
331
0
        {
332
0
            for(j = 0; j < trans_size; j++)
333
0
            {
334
                /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */
335
0
                for(k = 0; k < 16; k++)
336
0
                {
337
0
                    o[k] = g_ai2_ihevc_trans_32[1][k] * pi2_tmp[trans_size]
338
0
                                    + g_ai2_ihevc_trans_32[3][k]
339
0
                                                    * pi2_tmp[3 * trans_size]
340
0
                                    + g_ai2_ihevc_trans_32[5][k]
341
0
                                                    * pi2_tmp[5 * trans_size]
342
0
                                    + g_ai2_ihevc_trans_32[7][k]
343
0
                                                    * pi2_tmp[7 * trans_size]
344
0
                                    + g_ai2_ihevc_trans_32[9][k]
345
0
                                                    * pi2_tmp[9 * trans_size]
346
0
                                    + g_ai2_ihevc_trans_32[11][k]
347
0
                                                    * pi2_tmp[11 * trans_size]
348
0
                                    + g_ai2_ihevc_trans_32[13][k]
349
0
                                                    * pi2_tmp[13 * trans_size]
350
0
                                    + g_ai2_ihevc_trans_32[15][k]
351
0
                                                    * pi2_tmp[15 * trans_size]
352
0
                                    + g_ai2_ihevc_trans_32[17][k]
353
0
                                                    * pi2_tmp[17 * trans_size]
354
0
                                    + g_ai2_ihevc_trans_32[19][k]
355
0
                                                    * pi2_tmp[19 * trans_size]
356
0
                                    + g_ai2_ihevc_trans_32[21][k]
357
0
                                                    * pi2_tmp[21 * trans_size]
358
0
                                    + g_ai2_ihevc_trans_32[23][k]
359
0
                                                    * pi2_tmp[23 * trans_size]
360
0
                                    + g_ai2_ihevc_trans_32[25][k]
361
0
                                                    * pi2_tmp[25 * trans_size]
362
0
                                    + g_ai2_ihevc_trans_32[27][k]
363
0
                                                    * pi2_tmp[27 * trans_size]
364
0
                                    + g_ai2_ihevc_trans_32[29][k]
365
0
                                                    * pi2_tmp[29 * trans_size]
366
0
                                    + g_ai2_ihevc_trans_32[31][k]
367
0
                                                    * pi2_tmp[31 * trans_size];
368
0
                }
369
0
                for(k = 0; k < 8; k++)
370
0
                {
371
0
                    eo[k] = g_ai2_ihevc_trans_32[2][k] * pi2_tmp[2 * trans_size]
372
0
                                    + g_ai2_ihevc_trans_32[6][k]
373
0
                                                    * pi2_tmp[6 * trans_size]
374
0
                                    + g_ai2_ihevc_trans_32[10][k]
375
0
                                                    * pi2_tmp[10 * trans_size]
376
0
                                    + g_ai2_ihevc_trans_32[14][k]
377
0
                                                    * pi2_tmp[14 * trans_size]
378
0
                                    + g_ai2_ihevc_trans_32[18][k]
379
0
                                                    * pi2_tmp[18 * trans_size]
380
0
                                    + g_ai2_ihevc_trans_32[22][k]
381
0
                                                    * pi2_tmp[22 * trans_size]
382
0
                                    + g_ai2_ihevc_trans_32[26][k]
383
0
                                                    * pi2_tmp[26 * trans_size]
384
0
                                    + g_ai2_ihevc_trans_32[30][k]
385
0
                                                    * pi2_tmp[30 * trans_size];
386
0
                }
387
0
                for(k = 0; k < 4; k++)
388
0
                {
389
0
                    eeo[k] = g_ai2_ihevc_trans_32[4][k] * pi2_tmp[4 * trans_size]
390
0
                                    + g_ai2_ihevc_trans_32[12][k]
391
0
                                                    * pi2_tmp[12 * trans_size]
392
0
                                    + g_ai2_ihevc_trans_32[20][k]
393
0
                                                    * pi2_tmp[20 * trans_size]
394
0
                                    + g_ai2_ihevc_trans_32[28][k]
395
0
                                                    * pi2_tmp[28 * trans_size];
396
0
                }
397
0
                eeeo[0] =
398
0
                                g_ai2_ihevc_trans_32[8][0] * pi2_tmp[8 * trans_size]
399
0
                                                + g_ai2_ihevc_trans_32[24][0]
400
0
                                                                * pi2_tmp[24
401
0
                                                                                * trans_size];
402
0
                eeeo[1] =
403
0
                                g_ai2_ihevc_trans_32[8][1] * pi2_tmp[8 * trans_size]
404
0
                                                + g_ai2_ihevc_trans_32[24][1]
405
0
                                                                * pi2_tmp[24
406
0
                                                                                * trans_size];
407
0
                eeee[0] =
408
0
                                g_ai2_ihevc_trans_32[0][0] * pi2_tmp[0]
409
0
                                                + g_ai2_ihevc_trans_32[16][0]
410
0
                                                                * pi2_tmp[16
411
0
                                                                                * trans_size];
412
0
                eeee[1] =
413
0
                                g_ai2_ihevc_trans_32[0][1] * pi2_tmp[0]
414
0
                                                + g_ai2_ihevc_trans_32[16][1]
415
0
                                                                * pi2_tmp[16
416
0
                                                                                * trans_size];
417
418
                /* Combining e and o terms at each hierarchy levels to calculate the final spatial domain vector */
419
0
                eee[0] = eeee[0] + eeeo[0];
420
0
                eee[3] = eeee[0] - eeeo[0];
421
0
                eee[1] = eeee[1] + eeeo[1];
422
0
                eee[2] = eeee[1] - eeeo[1];
423
0
                for(k = 0; k < 4; k++)
424
0
                {
425
0
                    ee[k] = eee[k] + eeo[k];
426
0
                    ee[k + 4] = eee[3 - k] - eeo[3 - k];
427
0
                }
428
0
                for(k = 0; k < 8; k++)
429
0
                {
430
0
                    e[k] = ee[k] + eo[k];
431
0
                    e[k + 8] = ee[7 - k] - eo[7 - k];
432
0
                }
433
0
                for(k = 0; k < 16; k++)
434
0
                {
435
0
                    WORD32 itrans_out;
436
0
                    itrans_out =
437
0
                                    CLIP_S16(((e[k] + o[k] + add) >> shift));
438
0
                    pu1_dst[k * 2] = CLIP_U8((itrans_out + pu1_pred[k * 2]));
439
0
                    itrans_out =
440
0
                                    CLIP_S16(((e[15 - k] - o[15 - k] + add) >> shift));
441
0
                    pu1_dst[(k + 16) * 2] = CLIP_U8((itrans_out + pu1_pred[(k + 16) * 2]));
442
0
                }
443
0
                pi2_tmp++;
444
0
                pu1_pred += pred_strd;
445
0
                pu1_dst += dst_strd;
446
0
            }
447
0
        }
448
        /************************************************************************************************/
449
        /************************************END - IT_RECON_32x32****************************************/
450
        /************************************************************************************************/
451
0
    }
452
0
    else if((zero_rows & 0xFFFFFF00) == 0xFFFFFF00) /* First 8 rows of input are non-zero */
453
0
    {
454
        /************************************************************************************************/
455
        /**********************************START - IT_RECON_32x32****************************************/
456
        /************************************************************************************************/
457
        /* Inverse Transform 1st stage */
458
0
        shift = IT_SHIFT_STAGE_1;
459
0
        add = 1 << (shift - 1);
460
461
0
        for(j = 0; j < row_limit_2nd_stage; j++)
462
0
        {
463
            /* Checking for Zero Cols */
464
0
            if((zero_cols & 1) == 1)
465
0
            {
466
0
                memset(pi2_tmp, 0, trans_size * sizeof(WORD16));
467
0
            }
468
0
            else
469
0
            {
470
                /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */
471
0
                for(k = 0; k < 16; k++)
472
0
                {
473
0
                    o[k] = g_ai2_ihevc_trans_32[1][k] * pi2_src[src_strd]
474
0
                                    + g_ai2_ihevc_trans_32[3][k]
475
0
                                                    * pi2_src[3 * src_strd]
476
0
                                    + g_ai2_ihevc_trans_32[5][k]
477
0
                                                    * pi2_src[5 * src_strd]
478
0
                                    + g_ai2_ihevc_trans_32[7][k]
479
0
                                                    * pi2_src[7 * src_strd];
480
0
                }
481
0
                for(k = 0; k < 8; k++)
482
0
                {
483
0
                    eo[k] = g_ai2_ihevc_trans_32[2][k] * pi2_src[2 * src_strd]
484
0
                                    + g_ai2_ihevc_trans_32[6][k]
485
0
                                                    * pi2_src[6 * src_strd];
486
0
                }
487
0
                for(k = 0; k < 4; k++)
488
0
                {
489
0
                    eeo[k] = g_ai2_ihevc_trans_32[4][k] * pi2_src[4 * src_strd];
490
0
                }
491
0
                eeeo[0] = 0;
492
0
                eeeo[1] = 0;
493
0
                eeee[0] = g_ai2_ihevc_trans_32[0][0] * pi2_src[0];
494
0
                eeee[1] = g_ai2_ihevc_trans_32[0][1] * pi2_src[0];
495
496
                /* Combining e and o terms at each hierarchy levels to calculate the final spatial domain vector */
497
0
                eee[0] = eeee[0] + eeeo[0];
498
0
                eee[3] = eeee[0] - eeeo[0];
499
0
                eee[1] = eeee[1] + eeeo[1];
500
0
                eee[2] = eeee[1] - eeeo[1];
501
0
                for(k = 0; k < 4; k++)
502
0
                {
503
0
                    ee[k] = eee[k] + eeo[k];
504
0
                    ee[k + 4] = eee[3 - k] - eeo[3 - k];
505
0
                }
506
0
                for(k = 0; k < 8; k++)
507
0
                {
508
0
                    e[k] = ee[k] + eo[k];
509
0
                    e[k + 8] = ee[7 - k] - eo[7 - k];
510
0
                }
511
0
                for(k = 0; k < 16; k++)
512
0
                {
513
0
                    pi2_tmp[k] =
514
0
                                    CLIP_S16(((e[k] + o[k] + add) >> shift));
515
0
                    pi2_tmp[k + 16] =
516
0
                                    CLIP_S16(((e[15 - k] - o[15 - k] + add) >> shift));
517
0
                }
518
0
            }
519
0
            pi2_src++;
520
0
            pi2_tmp += trans_size;
521
0
            zero_cols = zero_cols >> 1;
522
0
        }
523
524
0
        pi2_tmp = pi2_tmp_orig;
525
526
        /* Inverse Transform 2nd stage */
527
0
        shift = IT_SHIFT_STAGE_2;
528
0
        add = 1 << (shift - 1);
529
0
        if((zero_rows_2nd_stage & 0xFFFFFFF0) == 0xFFFFFFF0) /* First 4 rows of output of 1st stage are non-zero */
530
0
        {
531
0
            for(j = 0; j < trans_size; j++)
532
0
            {
533
                /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */
534
0
                for(k = 0; k < 16; k++)
535
0
                {
536
0
                    o[k] = g_ai2_ihevc_trans_32[1][k] * pi2_tmp[trans_size]
537
0
                                    + g_ai2_ihevc_trans_32[3][k]
538
0
                                                    * pi2_tmp[3 * trans_size];
539
0
                }
540
0
                for(k = 0; k < 8; k++)
541
0
                {
542
0
                    eo[k] = g_ai2_ihevc_trans_32[2][k] * pi2_tmp[2 * trans_size];
543
0
                }
544
//                for(k = 0; k < 4; k++)
545
0
                {
546
0
                    eeo[0] = 0;
547
0
                    eeo[1] = 0;
548
0
                    eeo[2] = 0;
549
0
                    eeo[3] = 0;
550
0
                }
551
0
                eeeo[0] = 0;
552
0
                eeeo[1] = 0;
553
0
                eeee[0] = g_ai2_ihevc_trans_32[0][0] * pi2_tmp[0];
554
0
                eeee[1] = g_ai2_ihevc_trans_32[0][1] * pi2_tmp[0];
555
556
                /* Combining e and o terms at each hierarchy levels to calculate the final spatial domain vector */
557
0
                eee[0] = eeee[0] + eeeo[0];
558
0
                eee[3] = eeee[0] - eeeo[0];
559
0
                eee[1] = eeee[1] + eeeo[1];
560
0
                eee[2] = eeee[1] - eeeo[1];
561
0
                for(k = 0; k < 4; k++)
562
0
                {
563
0
                    ee[k] = eee[k] + eeo[k];
564
0
                    ee[k + 4] = eee[3 - k] - eeo[3 - k];
565
0
                }
566
0
                for(k = 0; k < 8; k++)
567
0
                {
568
0
                    e[k] = ee[k] + eo[k];
569
0
                    e[k + 8] = ee[7 - k] - eo[7 - k];
570
0
                }
571
0
                for(k = 0; k < 16; k++)
572
0
                {
573
0
                    WORD32 itrans_out;
574
0
                    itrans_out =
575
0
                                    CLIP_S16(((e[k] + o[k] + add) >> shift));
576
0
                    pu1_dst[k * 2] = CLIP_U8((itrans_out + pu1_pred[k * 2]));
577
0
                    itrans_out =
578
0
                                    CLIP_S16(((e[15 - k] - o[15 - k] + add) >> shift));
579
0
                    pu1_dst[(k + 16) * 2] = CLIP_U8((itrans_out + pu1_pred[(k + 16) * 2]));
580
0
                }
581
0
                pi2_tmp++;
582
0
                pu1_pred += pred_strd;
583
0
                pu1_dst += dst_strd;
584
0
            }
585
0
        }
586
0
        else if((zero_rows_2nd_stage & 0xFFFFFF00) == 0xFFFFFF00) /* First 8 rows of output of 1st stage are non-zero */
587
0
        {
588
0
            for(j = 0; j < trans_size; j++)
589
0
            {
590
                /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */
591
0
                for(k = 0; k < 16; k++)
592
0
                {
593
0
                    o[k] = g_ai2_ihevc_trans_32[1][k] * pi2_tmp[trans_size]
594
0
                                    + g_ai2_ihevc_trans_32[3][k]
595
0
                                                    * pi2_tmp[3 * trans_size]
596
0
                                    + g_ai2_ihevc_trans_32[5][k]
597
0
                                                    * pi2_tmp[5 * trans_size]
598
0
                                    + g_ai2_ihevc_trans_32[7][k]
599
0
                                                    * pi2_tmp[7 * trans_size];
600
0
                }
601
0
                for(k = 0; k < 8; k++)
602
0
                {
603
0
                    eo[k] = g_ai2_ihevc_trans_32[2][k] * pi2_tmp[2 * trans_size]
604
0
                                    + g_ai2_ihevc_trans_32[6][k]
605
0
                                                    * pi2_tmp[6 * trans_size];
606
0
                }
607
0
                for(k = 0; k < 4; k++)
608
0
                {
609
0
                    eeo[k] = g_ai2_ihevc_trans_32[4][k] * pi2_tmp[4 * trans_size];
610
0
                }
611
0
                eeeo[0] = 0;
612
0
                eeeo[1] = 0;
613
0
                eeee[0] = g_ai2_ihevc_trans_32[0][0] * pi2_tmp[0];
614
0
                eeee[1] = g_ai2_ihevc_trans_32[0][1] * pi2_tmp[0];
615
616
                /* Combining e and o terms at each hierarchy levels to calculate the final spatial domain vector */
617
0
                eee[0] = eeee[0] + eeeo[0];
618
0
                eee[3] = eeee[0] - eeeo[0];
619
0
                eee[1] = eeee[1] + eeeo[1];
620
0
                eee[2] = eeee[1] - eeeo[1];
621
0
                for(k = 0; k < 4; k++)
622
0
                {
623
0
                    ee[k] = eee[k] + eeo[k];
624
0
                    ee[k + 4] = eee[3 - k] - eeo[3 - k];
625
0
                }
626
0
                for(k = 0; k < 8; k++)
627
0
                {
628
0
                    e[k] = ee[k] + eo[k];
629
0
                    e[k + 8] = ee[7 - k] - eo[7 - k];
630
0
                }
631
0
                for(k = 0; k < 16; k++)
632
0
                {
633
0
                    WORD32 itrans_out;
634
0
                    itrans_out =
635
0
                                    CLIP_S16(((e[k] + o[k] + add) >> shift));
636
0
                    pu1_dst[k * 2] = CLIP_U8((itrans_out + pu1_pred[k * 2]));
637
0
                    itrans_out =
638
0
                                    CLIP_S16(((e[15 - k] - o[15 - k] + add) >> shift));
639
0
                    pu1_dst[(k + 16) * 2] = CLIP_U8((itrans_out + pu1_pred[(k + 16) * 2]));
640
0
                }
641
0
                pi2_tmp++;
642
0
                pu1_pred += pred_strd;
643
0
                pu1_dst += dst_strd;
644
0
            }
645
0
        }
646
0
        else /* All rows of output of 1st stage are non-zero */
647
0
        {
648
0
            for(j = 0; j < trans_size; j++)
649
0
            {
650
                /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */
651
0
                for(k = 0; k < 16; k++)
652
0
                {
653
0
                    o[k] = g_ai2_ihevc_trans_32[1][k] * pi2_tmp[trans_size]
654
0
                                    + g_ai2_ihevc_trans_32[3][k]
655
0
                                                    * pi2_tmp[3 * trans_size]
656
0
                                    + g_ai2_ihevc_trans_32[5][k]
657
0
                                                    * pi2_tmp[5 * trans_size]
658
0
                                    + g_ai2_ihevc_trans_32[7][k]
659
0
                                                    * pi2_tmp[7 * trans_size]
660
0
                                    + g_ai2_ihevc_trans_32[9][k]
661
0
                                                    * pi2_tmp[9 * trans_size]
662
0
                                    + g_ai2_ihevc_trans_32[11][k]
663
0
                                                    * pi2_tmp[11 * trans_size]
664
0
                                    + g_ai2_ihevc_trans_32[13][k]
665
0
                                                    * pi2_tmp[13 * trans_size]
666
0
                                    + g_ai2_ihevc_trans_32[15][k]
667
0
                                                    * pi2_tmp[15 * trans_size]
668
0
                                    + g_ai2_ihevc_trans_32[17][k]
669
0
                                                    * pi2_tmp[17 * trans_size]
670
0
                                    + g_ai2_ihevc_trans_32[19][k]
671
0
                                                    * pi2_tmp[19 * trans_size]
672
0
                                    + g_ai2_ihevc_trans_32[21][k]
673
0
                                                    * pi2_tmp[21 * trans_size]
674
0
                                    + g_ai2_ihevc_trans_32[23][k]
675
0
                                                    * pi2_tmp[23 * trans_size]
676
0
                                    + g_ai2_ihevc_trans_32[25][k]
677
0
                                                    * pi2_tmp[25 * trans_size]
678
0
                                    + g_ai2_ihevc_trans_32[27][k]
679
0
                                                    * pi2_tmp[27 * trans_size]
680
0
                                    + g_ai2_ihevc_trans_32[29][k]
681
0
                                                    * pi2_tmp[29 * trans_size]
682
0
                                    + g_ai2_ihevc_trans_32[31][k]
683
0
                                                    * pi2_tmp[31 * trans_size];
684
0
                }
685
0
                for(k = 0; k < 8; k++)
686
0
                {
687
0
                    eo[k] = g_ai2_ihevc_trans_32[2][k] * pi2_tmp[2 * trans_size]
688
0
                                    + g_ai2_ihevc_trans_32[6][k]
689
0
                                                    * pi2_tmp[6 * trans_size]
690
0
                                    + g_ai2_ihevc_trans_32[10][k]
691
0
                                                    * pi2_tmp[10 * trans_size]
692
0
                                    + g_ai2_ihevc_trans_32[14][k]
693
0
                                                    * pi2_tmp[14 * trans_size]
694
0
                                    + g_ai2_ihevc_trans_32[18][k]
695
0
                                                    * pi2_tmp[18 * trans_size]
696
0
                                    + g_ai2_ihevc_trans_32[22][k]
697
0
                                                    * pi2_tmp[22 * trans_size]
698
0
                                    + g_ai2_ihevc_trans_32[26][k]
699
0
                                                    * pi2_tmp[26 * trans_size]
700
0
                                    + g_ai2_ihevc_trans_32[30][k]
701
0
                                                    * pi2_tmp[30 * trans_size];
702
0
                }
703
0
                for(k = 0; k < 4; k++)
704
0
                {
705
0
                    eeo[k] = g_ai2_ihevc_trans_32[4][k] * pi2_tmp[4 * trans_size]
706
0
                                    + g_ai2_ihevc_trans_32[12][k]
707
0
                                                    * pi2_tmp[12 * trans_size]
708
0
                                    + g_ai2_ihevc_trans_32[20][k]
709
0
                                                    * pi2_tmp[20 * trans_size]
710
0
                                    + g_ai2_ihevc_trans_32[28][k]
711
0
                                                    * pi2_tmp[28 * trans_size];
712
0
                }
713
0
                eeeo[0] =
714
0
                                g_ai2_ihevc_trans_32[8][0] * pi2_tmp[8 * trans_size]
715
0
                                                + g_ai2_ihevc_trans_32[24][0]
716
0
                                                                * pi2_tmp[24
717
0
                                                                                * trans_size];
718
0
                eeeo[1] =
719
0
                                g_ai2_ihevc_trans_32[8][1] * pi2_tmp[8 * trans_size]
720
0
                                                + g_ai2_ihevc_trans_32[24][1]
721
0
                                                                * pi2_tmp[24
722
0
                                                                                * trans_size];
723
0
                eeee[0] =
724
0
                                g_ai2_ihevc_trans_32[0][0] * pi2_tmp[0]
725
0
                                                + g_ai2_ihevc_trans_32[16][0]
726
0
                                                                * pi2_tmp[16
727
0
                                                                                * trans_size];
728
0
                eeee[1] =
729
0
                                g_ai2_ihevc_trans_32[0][1] * pi2_tmp[0]
730
0
                                                + g_ai2_ihevc_trans_32[16][1]
731
0
                                                                * pi2_tmp[16
732
0
                                                                                * trans_size];
733
734
                /* Combining e and o terms at each hierarchy levels to calculate the final spatial domain vector */
735
0
                eee[0] = eeee[0] + eeeo[0];
736
0
                eee[3] = eeee[0] - eeeo[0];
737
0
                eee[1] = eeee[1] + eeeo[1];
738
0
                eee[2] = eeee[1] - eeeo[1];
739
0
                for(k = 0; k < 4; k++)
740
0
                {
741
0
                    ee[k] = eee[k] + eeo[k];
742
0
                    ee[k + 4] = eee[3 - k] - eeo[3 - k];
743
0
                }
744
0
                for(k = 0; k < 8; k++)
745
0
                {
746
0
                    e[k] = ee[k] + eo[k];
747
0
                    e[k + 8] = ee[7 - k] - eo[7 - k];
748
0
                }
749
0
                for(k = 0; k < 16; k++)
750
0
                {
751
0
                    WORD32 itrans_out;
752
0
                    itrans_out =
753
0
                                    CLIP_S16(((e[k] + o[k] + add) >> shift));
754
0
                    pu1_dst[k * 2] = CLIP_U8((itrans_out + pu1_pred[k * 2]));
755
0
                    itrans_out =
756
0
                                    CLIP_S16(((e[15 - k] - o[15 - k] + add) >> shift));
757
0
                    pu1_dst[(k + 16) * 2] = CLIP_U8((itrans_out + pu1_pred[(k + 16) * 2]));
758
0
                }
759
0
                pi2_tmp++;
760
0
                pu1_pred += pred_strd;
761
0
                pu1_dst += dst_strd;
762
0
            }
763
0
        }
764
        /************************************************************************************************/
765
        /************************************END - IT_RECON_32x32****************************************/
766
        /************************************************************************************************/
767
0
    }
768
0
    else  /* All rows of input are non-zero */
769
0
    {
770
        /************************************************************************************************/
771
        /**********************************START - IT_RECON_32x32****************************************/
772
        /************************************************************************************************/
773
        /* Inverse Transform 1st stage */
774
0
        shift = IT_SHIFT_STAGE_1;
775
0
        add = 1 << (shift - 1);
776
777
0
        for(j = 0; j < row_limit_2nd_stage; j++)
778
0
        {
779
            /* Checking for Zero Cols */
780
0
            if((zero_cols & 1) == 1)
781
0
            {
782
0
                memset(pi2_tmp, 0, trans_size * sizeof(WORD16));
783
0
            }
784
0
            else
785
0
            {
786
                /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */
787
0
                for(k = 0; k < 16; k++)
788
0
                {
789
0
                    o[k] = g_ai2_ihevc_trans_32[1][k] * pi2_src[src_strd]
790
0
                                    + g_ai2_ihevc_trans_32[3][k]
791
0
                                                    * pi2_src[3 * src_strd]
792
0
                                    + g_ai2_ihevc_trans_32[5][k]
793
0
                                                    * pi2_src[5 * src_strd]
794
0
                                    + g_ai2_ihevc_trans_32[7][k]
795
0
                                                    * pi2_src[7 * src_strd]
796
0
                                    + g_ai2_ihevc_trans_32[9][k]
797
0
                                                    * pi2_src[9 * src_strd]
798
0
                                    + g_ai2_ihevc_trans_32[11][k]
799
0
                                                    * pi2_src[11 * src_strd]
800
0
                                    + g_ai2_ihevc_trans_32[13][k]
801
0
                                                    * pi2_src[13 * src_strd]
802
0
                                    + g_ai2_ihevc_trans_32[15][k]
803
0
                                                    * pi2_src[15 * src_strd]
804
0
                                    + g_ai2_ihevc_trans_32[17][k]
805
0
                                                    * pi2_src[17 * src_strd]
806
0
                                    + g_ai2_ihevc_trans_32[19][k]
807
0
                                                    * pi2_src[19 * src_strd]
808
0
                                    + g_ai2_ihevc_trans_32[21][k]
809
0
                                                    * pi2_src[21 * src_strd]
810
0
                                    + g_ai2_ihevc_trans_32[23][k]
811
0
                                                    * pi2_src[23 * src_strd]
812
0
                                    + g_ai2_ihevc_trans_32[25][k]
813
0
                                                    * pi2_src[25 * src_strd]
814
0
                                    + g_ai2_ihevc_trans_32[27][k]
815
0
                                                    * pi2_src[27 * src_strd]
816
0
                                    + g_ai2_ihevc_trans_32[29][k]
817
0
                                                    * pi2_src[29 * src_strd]
818
0
                                    + g_ai2_ihevc_trans_32[31][k]
819
0
                                                    * pi2_src[31 * src_strd];
820
0
                }
821
0
                for(k = 0; k < 8; k++)
822
0
                {
823
0
                    eo[k] = g_ai2_ihevc_trans_32[2][k] * pi2_src[2 * src_strd]
824
0
                                    + g_ai2_ihevc_trans_32[6][k]
825
0
                                                    * pi2_src[6 * src_strd]
826
0
                                    + g_ai2_ihevc_trans_32[10][k]
827
0
                                                    * pi2_src[10 * src_strd]
828
0
                                    + g_ai2_ihevc_trans_32[14][k]
829
0
                                                    * pi2_src[14 * src_strd]
830
0
                                    + g_ai2_ihevc_trans_32[18][k]
831
0
                                                    * pi2_src[18 * src_strd]
832
0
                                    + g_ai2_ihevc_trans_32[22][k]
833
0
                                                    * pi2_src[22 * src_strd]
834
0
                                    + g_ai2_ihevc_trans_32[26][k]
835
0
                                                    * pi2_src[26 * src_strd]
836
0
                                    + g_ai2_ihevc_trans_32[30][k]
837
0
                                                    * pi2_src[30 * src_strd];
838
0
                }
839
0
                for(k = 0; k < 4; k++)
840
0
                {
841
0
                    eeo[k] = g_ai2_ihevc_trans_32[4][k] * pi2_src[4 * src_strd]
842
0
                                    + g_ai2_ihevc_trans_32[12][k]
843
0
                                                    * pi2_src[12 * src_strd]
844
0
                                    + g_ai2_ihevc_trans_32[20][k]
845
0
                                                    * pi2_src[20 * src_strd]
846
0
                                    + g_ai2_ihevc_trans_32[28][k]
847
0
                                                    * pi2_src[28 * src_strd];
848
0
                }
849
0
                eeeo[0] = g_ai2_ihevc_trans_32[8][0] * pi2_src[8 * src_strd]
850
0
                                + g_ai2_ihevc_trans_32[24][0]
851
0
                                                * pi2_src[24 * src_strd];
852
0
                eeeo[1] = g_ai2_ihevc_trans_32[8][1] * pi2_src[8 * src_strd]
853
0
                                + g_ai2_ihevc_trans_32[24][1]
854
0
                                                * pi2_src[24 * src_strd];
855
0
                eeee[0] = g_ai2_ihevc_trans_32[0][0] * pi2_src[0]
856
0
                                + g_ai2_ihevc_trans_32[16][0]
857
0
                                                * pi2_src[16 * src_strd];
858
0
                eeee[1] = g_ai2_ihevc_trans_32[0][1] * pi2_src[0]
859
0
                                + g_ai2_ihevc_trans_32[16][1]
860
0
                                                * pi2_src[16 * src_strd];
861
862
                /* Combining e and o terms at each hierarchy levels to calculate the final spatial domain vector */
863
0
                eee[0] = eeee[0] + eeeo[0];
864
0
                eee[3] = eeee[0] - eeeo[0];
865
0
                eee[1] = eeee[1] + eeeo[1];
866
0
                eee[2] = eeee[1] - eeeo[1];
867
0
                for(k = 0; k < 4; k++)
868
0
                {
869
0
                    ee[k] = eee[k] + eeo[k];
870
0
                    ee[k + 4] = eee[3 - k] - eeo[3 - k];
871
0
                }
872
0
                for(k = 0; k < 8; k++)
873
0
                {
874
0
                    e[k] = ee[k] + eo[k];
875
0
                    e[k + 8] = ee[7 - k] - eo[7 - k];
876
0
                }
877
0
                for(k = 0; k < 16; k++)
878
0
                {
879
0
                    pi2_tmp[k] =
880
0
                                    CLIP_S16(((e[k] + o[k] + add) >> shift));
881
0
                    pi2_tmp[k + 16] =
882
0
                                    CLIP_S16(((e[15 - k] - o[15 - k] + add) >> shift));
883
0
                }
884
0
            }
885
0
            pi2_src++;
886
0
            pi2_tmp += trans_size;
887
0
            zero_cols = zero_cols >> 1;
888
0
        }
889
890
0
        pi2_tmp = pi2_tmp_orig;
891
892
        /* Inverse Transform 2nd stage */
893
0
        shift = IT_SHIFT_STAGE_2;
894
0
        add = 1 << (shift - 1);
895
0
        if((zero_rows_2nd_stage & 0xFFFFFFF0) == 0xFFFFFFF0) /* First 4 rows of output of 1st stage are non-zero */
896
0
        {
897
0
            for(j = 0; j < trans_size; j++)
898
0
            {
899
                /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */
900
0
                for(k = 0; k < 16; k++)
901
0
                {
902
0
                    o[k] = g_ai2_ihevc_trans_32[1][k] * pi2_tmp[trans_size]
903
0
                                    + g_ai2_ihevc_trans_32[3][k]
904
0
                                                    * pi2_tmp[3 * trans_size];
905
0
                }
906
0
                for(k = 0; k < 8; k++)
907
0
                {
908
0
                    eo[k] = g_ai2_ihevc_trans_32[2][k] * pi2_tmp[2 * trans_size];
909
0
                }
910
//                for(k = 0; k < 4; k++)
911
0
                {
912
0
                    eeo[0] = 0;
913
0
                    eeo[1] = 0;
914
0
                    eeo[2] = 0;
915
0
                    eeo[3] = 0;
916
0
                }
917
0
                eeeo[0] = 0;
918
0
                eeeo[1] = 0;
919
0
                eeee[0] = g_ai2_ihevc_trans_32[0][0] * pi2_tmp[0];
920
0
                eeee[1] = g_ai2_ihevc_trans_32[0][1] * pi2_tmp[0];
921
922
                /* Combining e and o terms at each hierarchy levels to calculate the final spatial domain vector */
923
0
                eee[0] = eeee[0] + eeeo[0];
924
0
                eee[3] = eeee[0] - eeeo[0];
925
0
                eee[1] = eeee[1] + eeeo[1];
926
0
                eee[2] = eeee[1] - eeeo[1];
927
0
                for(k = 0; k < 4; k++)
928
0
                {
929
0
                    ee[k] = eee[k] + eeo[k];
930
0
                    ee[k + 4] = eee[3 - k] - eeo[3 - k];
931
0
                }
932
0
                for(k = 0; k < 8; k++)
933
0
                {
934
0
                    e[k] = ee[k] + eo[k];
935
0
                    e[k + 8] = ee[7 - k] - eo[7 - k];
936
0
                }
937
0
                for(k = 0; k < 16; k++)
938
0
                {
939
0
                    WORD32 itrans_out;
940
0
                    itrans_out =
941
0
                                    CLIP_S16(((e[k] + o[k] + add) >> shift));
942
0
                    pu1_dst[k * 2] = CLIP_U8((itrans_out + pu1_pred[k * 2]));
943
0
                    itrans_out =
944
0
                                    CLIP_S16(((e[15 - k] - o[15 - k] + add) >> shift));
945
0
                    pu1_dst[(k + 16) * 2] = CLIP_U8((itrans_out + pu1_pred[(k + 16) * 2]));
946
0
                }
947
0
                pi2_tmp++;
948
0
                pu1_pred += pred_strd;
949
0
                pu1_dst += dst_strd;
950
0
            }
951
0
        }
952
0
        else if((zero_rows_2nd_stage & 0xFFFFFF00) == 0xFFFFFF00) /* First 8 rows of output of 1st stage are non-zero */
953
0
        {
954
0
            for(j = 0; j < trans_size; j++)
955
0
            {
956
                /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */
957
0
                for(k = 0; k < 16; k++)
958
0
                {
959
0
                    o[k] = g_ai2_ihevc_trans_32[1][k] * pi2_tmp[trans_size]
960
0
                                    + g_ai2_ihevc_trans_32[3][k]
961
0
                                                    * pi2_tmp[3 * trans_size]
962
0
                                    + g_ai2_ihevc_trans_32[5][k]
963
0
                                                    * pi2_tmp[5 * trans_size]
964
0
                                    + g_ai2_ihevc_trans_32[7][k]
965
0
                                                    * pi2_tmp[7 * trans_size];
966
0
                }
967
0
                for(k = 0; k < 8; k++)
968
0
                {
969
0
                    eo[k] = g_ai2_ihevc_trans_32[2][k] * pi2_tmp[2 * trans_size]
970
0
                                    + g_ai2_ihevc_trans_32[6][k]
971
0
                                                    * pi2_tmp[6 * trans_size];
972
0
                }
973
0
                for(k = 0; k < 4; k++)
974
0
                {
975
0
                    eeo[k] = g_ai2_ihevc_trans_32[4][k] * pi2_tmp[4 * trans_size];
976
0
                }
977
0
                eeeo[0] = 0;
978
0
                eeeo[1] = 0;
979
0
                eeee[0] = g_ai2_ihevc_trans_32[0][0] * pi2_tmp[0];
980
0
                eeee[1] = g_ai2_ihevc_trans_32[0][1] * pi2_tmp[0];
981
982
                /* Combining e and o terms at each hierarchy levels to calculate the final spatial domain vector */
983
0
                eee[0] = eeee[0] + eeeo[0];
984
0
                eee[3] = eeee[0] - eeeo[0];
985
0
                eee[1] = eeee[1] + eeeo[1];
986
0
                eee[2] = eeee[1] - eeeo[1];
987
0
                for(k = 0; k < 4; k++)
988
0
                {
989
0
                    ee[k] = eee[k] + eeo[k];
990
0
                    ee[k + 4] = eee[3 - k] - eeo[3 - k];
991
0
                }
992
0
                for(k = 0; k < 8; k++)
993
0
                {
994
0
                    e[k] = ee[k] + eo[k];
995
0
                    e[k + 8] = ee[7 - k] - eo[7 - k];
996
0
                }
997
0
                for(k = 0; k < 16; k++)
998
0
                {
999
0
                    WORD32 itrans_out;
1000
0
                    itrans_out =
1001
0
                                    CLIP_S16(((e[k] + o[k] + add) >> shift));
1002
0
                    pu1_dst[k * 2] = CLIP_U8((itrans_out + pu1_pred[k * 2]));
1003
0
                    itrans_out =
1004
0
                                    CLIP_S16(((e[15 - k] - o[15 - k] + add) >> shift));
1005
0
                    pu1_dst[(k + 16) * 2] = CLIP_U8((itrans_out + pu1_pred[(k + 16) * 2]));
1006
0
                }
1007
0
                pi2_tmp++;
1008
0
                pu1_pred += pred_strd;
1009
0
                pu1_dst += dst_strd;
1010
0
            }
1011
0
        }
1012
0
        else /* All rows of output of 1st stage are non-zero */
1013
0
        {
1014
0
            for(j = 0; j < trans_size; j++)
1015
0
            {
1016
                /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */
1017
0
                for(k = 0; k < 16; k++)
1018
0
                {
1019
0
                    o[k] = g_ai2_ihevc_trans_32[1][k] * pi2_tmp[trans_size]
1020
0
                                    + g_ai2_ihevc_trans_32[3][k]
1021
0
                                                    * pi2_tmp[3 * trans_size]
1022
0
                                    + g_ai2_ihevc_trans_32[5][k]
1023
0
                                                    * pi2_tmp[5 * trans_size]
1024
0
                                    + g_ai2_ihevc_trans_32[7][k]
1025
0
                                                    * pi2_tmp[7 * trans_size]
1026
0
                                    + g_ai2_ihevc_trans_32[9][k]
1027
0
                                                    * pi2_tmp[9 * trans_size]
1028
0
                                    + g_ai2_ihevc_trans_32[11][k]
1029
0
                                                    * pi2_tmp[11 * trans_size]
1030
0
                                    + g_ai2_ihevc_trans_32[13][k]
1031
0
                                                    * pi2_tmp[13 * trans_size]
1032
0
                                    + g_ai2_ihevc_trans_32[15][k]
1033
0
                                                    * pi2_tmp[15 * trans_size]
1034
0
                                    + g_ai2_ihevc_trans_32[17][k]
1035
0
                                                    * pi2_tmp[17 * trans_size]
1036
0
                                    + g_ai2_ihevc_trans_32[19][k]
1037
0
                                                    * pi2_tmp[19 * trans_size]
1038
0
                                    + g_ai2_ihevc_trans_32[21][k]
1039
0
                                                    * pi2_tmp[21 * trans_size]
1040
0
                                    + g_ai2_ihevc_trans_32[23][k]
1041
0
                                                    * pi2_tmp[23 * trans_size]
1042
0
                                    + g_ai2_ihevc_trans_32[25][k]
1043
0
                                                    * pi2_tmp[25 * trans_size]
1044
0
                                    + g_ai2_ihevc_trans_32[27][k]
1045
0
                                                    * pi2_tmp[27 * trans_size]
1046
0
                                    + g_ai2_ihevc_trans_32[29][k]
1047
0
                                                    * pi2_tmp[29 * trans_size]
1048
0
                                    + g_ai2_ihevc_trans_32[31][k]
1049
0
                                                    * pi2_tmp[31 * trans_size];
1050
0
                }
1051
0
                for(k = 0; k < 8; k++)
1052
0
                {
1053
0
                    eo[k] = g_ai2_ihevc_trans_32[2][k] * pi2_tmp[2 * trans_size]
1054
0
                                    + g_ai2_ihevc_trans_32[6][k]
1055
0
                                                    * pi2_tmp[6 * trans_size]
1056
0
                                    + g_ai2_ihevc_trans_32[10][k]
1057
0
                                                    * pi2_tmp[10 * trans_size]
1058
0
                                    + g_ai2_ihevc_trans_32[14][k]
1059
0
                                                    * pi2_tmp[14 * trans_size]
1060
0
                                    + g_ai2_ihevc_trans_32[18][k]
1061
0
                                                    * pi2_tmp[18 * trans_size]
1062
0
                                    + g_ai2_ihevc_trans_32[22][k]
1063
0
                                                    * pi2_tmp[22 * trans_size]
1064
0
                                    + g_ai2_ihevc_trans_32[26][k]
1065
0
                                                    * pi2_tmp[26 * trans_size]
1066
0
                                    + g_ai2_ihevc_trans_32[30][k]
1067
0
                                                    * pi2_tmp[30 * trans_size];
1068
0
                }
1069
0
                for(k = 0; k < 4; k++)
1070
0
                {
1071
0
                    eeo[k] = g_ai2_ihevc_trans_32[4][k] * pi2_tmp[4 * trans_size]
1072
0
                                    + g_ai2_ihevc_trans_32[12][k]
1073
0
                                                    * pi2_tmp[12 * trans_size]
1074
0
                                    + g_ai2_ihevc_trans_32[20][k]
1075
0
                                                    * pi2_tmp[20 * trans_size]
1076
0
                                    + g_ai2_ihevc_trans_32[28][k]
1077
0
                                                    * pi2_tmp[28 * trans_size];
1078
0
                }
1079
0
                eeeo[0] =
1080
0
                                g_ai2_ihevc_trans_32[8][0] * pi2_tmp[8 * trans_size]
1081
0
                                                + g_ai2_ihevc_trans_32[24][0]
1082
0
                                                                * pi2_tmp[24
1083
0
                                                                                * trans_size];
1084
0
                eeeo[1] =
1085
0
                                g_ai2_ihevc_trans_32[8][1] * pi2_tmp[8 * trans_size]
1086
0
                                                + g_ai2_ihevc_trans_32[24][1]
1087
0
                                                                * pi2_tmp[24
1088
0
                                                                                * trans_size];
1089
0
                eeee[0] =
1090
0
                                g_ai2_ihevc_trans_32[0][0] * pi2_tmp[0]
1091
0
                                                + g_ai2_ihevc_trans_32[16][0]
1092
0
                                                                * pi2_tmp[16
1093
0
                                                                                * trans_size];
1094
0
                eeee[1] =
1095
0
                                g_ai2_ihevc_trans_32[0][1] * pi2_tmp[0]
1096
0
                                                + g_ai2_ihevc_trans_32[16][1]
1097
0
                                                                * pi2_tmp[16
1098
0
                                                                                * trans_size];
1099
1100
                /* Combining e and o terms at each hierarchy levels to calculate the final spatial domain vector */
1101
0
                eee[0] = eeee[0] + eeeo[0];
1102
0
                eee[3] = eeee[0] - eeeo[0];
1103
0
                eee[1] = eeee[1] + eeeo[1];
1104
0
                eee[2] = eeee[1] - eeeo[1];
1105
0
                for(k = 0; k < 4; k++)
1106
0
                {
1107
0
                    ee[k] = eee[k] + eeo[k];
1108
0
                    ee[k + 4] = eee[3 - k] - eeo[3 - k];
1109
0
                }
1110
0
                for(k = 0; k < 8; k++)
1111
0
                {
1112
0
                    e[k] = ee[k] + eo[k];
1113
0
                    e[k + 8] = ee[7 - k] - eo[7 - k];
1114
0
                }
1115
0
                for(k = 0; k < 16; k++)
1116
0
                {
1117
0
                    WORD32 itrans_out;
1118
0
                    itrans_out =
1119
0
                                    CLIP_S16(((e[k] + o[k] + add) >> shift));
1120
0
                    pu1_dst[k * 2] = CLIP_U8((itrans_out + pu1_pred[k * 2]));
1121
0
                    itrans_out =
1122
0
                                    CLIP_S16(((e[15 - k] - o[15 - k] + add) >> shift));
1123
0
                    pu1_dst[(k + 16) * 2] = CLIP_U8((itrans_out + pu1_pred[(k + 16) * 2]));
1124
0
                }
1125
0
                pi2_tmp++;
1126
0
                pu1_pred += pred_strd;
1127
0
                pu1_dst += dst_strd;
1128
0
            }
1129
0
        }
1130
        /************************************************************************************************/
1131
        /************************************END - IT_RECON_32x32****************************************/
1132
        /************************************************************************************************/
1133
0
    }
1134
0
}
1135