Coverage Report

Created: 2025-11-24 06:08

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/libmpeg2/common/impeg2_idct.c
Line
Count
Source
1
/******************************************************************************
2
 *
3
 * Copyright (C) 2015 The Android Open Source Project
4
 *
5
 * Licensed under the Apache License, Version 2.0 (the "License");
6
 * you may not use this file except in compliance with the License.
7
 * You may obtain a copy of the License at:
8
 *
9
 * http://www.apache.org/licenses/LICENSE-2.0
10
 *
11
 * Unless required by applicable law or agreed to in writing, software
12
 * distributed under the License is distributed on an "AS IS" BASIS,
13
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
 * See the License for the specific language governing permissions and
15
 * limitations under the License.
16
 *
17
 *****************************************************************************
18
 * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
19
*/
20
/*****************************************************************************/
21
/*                                                                           */
22
/*  File Name         : impeg2_idct.c                                        */
23
/*                                                                           */
24
/*  Description       : Contains 2d idct and invese quantization functions   */
25
/*                                                                           */
26
/*  List of Functions : impeg2_idct_recon_dc()                               */
27
/*                      impeg2_idct_recon_dc_mismatch()                      */
28
/*                      impeg2_idct_recon()                                  */
29
/*                                                                           */
30
/*  Issues / Problems : None                                                 */
31
/*                                                                           */
32
/*  Revision History  :                                                      */
33
/*                                                                           */
34
/*         DD MM YYYY   Author(s)       Changes                              */
35
/*         10 09 2005   Hairsh M        First Version                        */
36
/*                                                                           */
37
/*****************************************************************************/
38
/*
39
  IEEE - 1180 results for this IDCT
40
  L                           256         256         5           5           300         300         384         384         Thresholds
41
  H                           255         255         5           5           300         300         383         383
42
  sign                        1           -1          1           -1          1           -1          1           -1
43
  Peak Error                  1           1           1           1           1           1           1           1           1
44
  Peak Mean Square Error      0.0191      0.0188      0.0108      0.0111      0.0176      0.0188      0.0165      0.0177      0.06
45
  Overall Mean Square Error   0.01566406  0.01597656  0.0091875   0.00908906  0.01499063  0.01533281  0.01432344  0.01412344  0.02
46
  Peak Mean Error             0.0027      0.0026      0.0028      0.002       0.0017      0.0033      0.0031      0.0025      0.015
47
  Overall Mean Error          0.00002656  -0.00031406 0.00016875  0.00005469  -0.00003125 0.00011406  0.00009219  0.00004219  0.0015
48
  */
49
#include <stdio.h>
50
#include <string.h>
51
52
#include "iv_datatypedef.h"
53
#include "iv.h"
54
#include "impeg2_defs.h"
55
#include "impeg2_platform_macros.h"
56
57
#include "impeg2_macros.h"
58
#include "impeg2_globals.h"
59
#include "impeg2_idct.h"
60
61
62
void impeg2_idct_recon_dc(WORD16 *pi2_src,
63
                            WORD16 *pi2_tmp,
64
                            UWORD8 *pu1_pred,
65
                            UWORD8 *pu1_dst,
66
                            WORD32 i4_src_strd,
67
                            WORD32 i4_pred_strd,
68
                            WORD32 i4_dst_strd,
69
                            WORD32 i4_zero_cols,
70
                            WORD32 i4_zero_rows)
71
1.51M
{
72
1.51M
    WORD32 i4_val, i, j;
73
74
1.51M
    UNUSED(pi2_tmp);
75
1.51M
    UNUSED(i4_src_strd);
76
1.51M
    UNUSED(i4_zero_cols);
77
1.51M
    UNUSED(i4_zero_rows);
78
79
1.51M
    i4_val = pi2_src[0] * gai2_impeg2_idct_q15[0];
80
1.51M
    i4_val = ((i4_val + IDCT_STG1_ROUND) >> IDCT_STG1_SHIFT);
81
1.51M
    i4_val = i4_val * gai2_impeg2_idct_q11[0];
82
1.51M
    i4_val = ((i4_val + IDCT_STG2_ROUND) >> IDCT_STG2_SHIFT);
83
84
13.3M
    for(i = 0; i < TRANS_SIZE_8; i++)
85
11.8M
    {
86
106M
        for(j = 0; j < TRANS_SIZE_8; j++)
87
94.5M
        {
88
94.5M
            pu1_dst[j] = CLIP_U8(i4_val + pu1_pred[j]);
89
94.5M
        }
90
11.8M
        pu1_dst  += i4_dst_strd;
91
11.8M
        pu1_pred += i4_pred_strd;
92
11.8M
    }
93
1.51M
}
94
void impeg2_idct_recon_dc_mismatch(WORD16 *pi2_src,
95
                            WORD16 *pi2_tmp,
96
                            UWORD8 *pu1_pred,
97
                            UWORD8 *pu1_dst,
98
                            WORD32 i4_src_strd,
99
                            WORD32 i4_pred_strd,
100
                            WORD32 i4_dst_strd,
101
                            WORD32 i4_zero_cols,
102
                            WORD32 i4_zero_rows)
103
104
109k
{
105
109k
    WORD32 i4_val, i, j;
106
109k
    WORD32 i4_count = 0;
107
109k
    WORD32 i4_sum;
108
109
109k
    UNUSED(pi2_tmp);
110
109k
    UNUSED(i4_src_strd);
111
109k
    UNUSED(i4_zero_cols);
112
109k
    UNUSED(i4_zero_rows);
113
114
109k
    i4_val = pi2_src[0] * gai2_impeg2_idct_q15[0];
115
109k
    i4_val = ((i4_val + IDCT_STG1_ROUND) >> IDCT_STG1_SHIFT);
116
117
109k
    i4_val *= gai2_impeg2_idct_q11[0];
118
977k
    for(i = 0; i < TRANS_SIZE_8; i++)
119
868k
    {
120
7.80M
        for (j = 0; j < TRANS_SIZE_8; j++)
121
6.94M
        {
122
6.94M
            i4_sum = i4_val;
123
6.94M
            i4_sum += gai2_impeg2_mismatch_stg2_additive[i4_count];
124
6.94M
            i4_sum = ((i4_sum + IDCT_STG2_ROUND) >> IDCT_STG2_SHIFT);
125
6.94M
            i4_sum += pu1_pred[j];
126
6.94M
            pu1_dst[j] = CLIP_U8(i4_sum);
127
6.94M
            i4_count++;
128
6.94M
        }
129
130
868k
        pu1_dst  += i4_dst_strd;
131
868k
        pu1_pred += i4_pred_strd;
132
868k
    }
133
134
109k
}
135
/**
136
 *******************************************************************************
137
 *
138
 * @brief
139
 *  This function performs Inverse transform  and reconstruction for 8x8
140
 * input block
141
 *
142
 * @par Description:
143
 *  Performs inverse transform and adds the prediction  data and clips output
144
 * to 8 bit
145
 *
146
 * @param[in] pi2_src
147
 *  Input 8x8 coefficients
148
 *
149
 * @param[in] pi2_tmp
150
 *  Temporary 8x8 buffer for storing inverse
151
 *
152
 *  transform
153
 *  1st stage output
154
 *
155
 * @param[in] pu1_pred
156
 *  Prediction 8x8 block
157
 *
158
 * @param[out] pu1_dst
159
 *  Output 8x8 block
160
 *
161
 * @param[in] src_strd
162
 *  Input stride
163
 *
164
 * @param[in] pred_strd
165
 *  Prediction stride
166
 *
167
 * @param[in] dst_strd
168
 *  Output Stride
169
 *
170
 * @param[in] shift
171
 *  Output shift
172
 *
173
 * @param[in] zero_cols
174
 *  Zero columns in pi2_src
175
 *
176
 * @returns  Void
177
 *
178
 * @remarks
179
 *  None
180
 *
181
 *******************************************************************************
182
 */
183
184
void impeg2_idct_recon(WORD16 *pi2_src,
185
                        WORD16 *pi2_tmp,
186
                        UWORD8 *pu1_pred,
187
                        UWORD8 *pu1_dst,
188
                        WORD32 i4_src_strd,
189
                        WORD32 i4_pred_strd,
190
                        WORD32 i4_dst_strd,
191
                        WORD32 i4_zero_cols,
192
                        WORD32 i4_zero_rows)
193
18.7M
{
194
18.7M
    WORD32 j, k;
195
18.7M
    WORD32 ai4_e[4], ai4_o[4];
196
18.7M
    WORD32 ai4_ee[2], ai4_eo[2];
197
18.7M
    WORD32 i4_add;
198
18.7M
    WORD32 i4_shift;
199
18.7M
    WORD16 *pi2_tmp_orig;
200
18.7M
    WORD32 i4_trans_size;
201
18.7M
    WORD32 i4_zero_rows_2nd_stage = i4_zero_cols;
202
18.7M
    WORD32 i4_row_limit_2nd_stage;
203
204
18.7M
    i4_trans_size = TRANS_SIZE_8;
205
206
18.7M
    pi2_tmp_orig = pi2_tmp;
207
208
18.7M
    if((i4_zero_cols & 0xF0) == 0xF0)
209
18.2M
        i4_row_limit_2nd_stage = 4;
210
486k
    else
211
486k
        i4_row_limit_2nd_stage = TRANS_SIZE_8;
212
213
214
18.7M
    if((i4_zero_rows & 0xF0) == 0xF0) /* First 4 rows of input are non-zero */
215
17.6M
    {
216
        /************************************************************************************************/
217
        /**********************************START - IT_RECON_8x8******************************************/
218
        /************************************************************************************************/
219
220
        /* Inverse Transform 1st stage */
221
17.6M
        i4_shift = IDCT_STG1_SHIFT;
222
17.6M
        i4_add = 1 << (i4_shift - 1);
223
224
87.7M
        for(j = 0; j < i4_row_limit_2nd_stage; j++)
225
70.0M
        {
226
            /* Checking for Zero Cols */
227
70.0M
            if((i4_zero_cols & 1) == 1)
228
48.1M
            {
229
48.1M
                memset(pi2_tmp, 0, i4_trans_size * sizeof(WORD16));
230
48.1M
            }
231
21.9M
            else
232
21.9M
            {
233
                /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */
234
120M
                for(k = 0; k < 4; k++)
235
98.1M
                {
236
98.1M
                    ai4_o[k] = gai2_impeg2_idct_q15[1 * 8 + k] * pi2_src[i4_src_strd]
237
98.1M
                                    + gai2_impeg2_idct_q15[3 * 8 + k]
238
98.1M
                                                    * pi2_src[3 * i4_src_strd];
239
98.1M
                }
240
21.9M
                ai4_eo[0] = gai2_impeg2_idct_q15[2 * 8 + 0] * pi2_src[2 * i4_src_strd];
241
21.9M
                ai4_eo[1] = gai2_impeg2_idct_q15[2 * 8 + 1] * pi2_src[2 * i4_src_strd];
242
21.9M
                ai4_ee[0] = gai2_impeg2_idct_q15[0 * 8 + 0] * pi2_src[0];
243
21.9M
                ai4_ee[1] = gai2_impeg2_idct_q15[0 * 8 + 1] * pi2_src[0];
244
245
                /* Combining e and o terms at each hierarchy levels to calculate the final spatial domain vector */
246
21.9M
                ai4_e[0] = ai4_ee[0] + ai4_eo[0];
247
21.9M
                ai4_e[3] = ai4_ee[0] - ai4_eo[0];
248
21.9M
                ai4_e[1] = ai4_ee[1] + ai4_eo[1];
249
21.9M
                ai4_e[2] = ai4_ee[1] - ai4_eo[1];
250
120M
                for(k = 0; k < 4; k++)
251
98.3M
                {
252
98.3M
                    pi2_tmp[k] =
253
98.3M
                                    CLIP_S16(((ai4_e[k] + ai4_o[k] + i4_add) >> i4_shift));
254
98.3M
                    pi2_tmp[k + 4] =
255
98.3M
                                    CLIP_S16(((ai4_e[3 - k] - ai4_o[3 - k] + i4_add) >> i4_shift));
256
98.3M
                }
257
21.9M
            }
258
70.0M
            pi2_src++;
259
70.0M
            pi2_tmp += i4_trans_size;
260
70.0M
            i4_zero_cols = i4_zero_cols >> 1;
261
70.0M
        }
262
263
17.6M
        pi2_tmp = pi2_tmp_orig;
264
265
        /* Inverse Transform 2nd stage */
266
17.6M
        i4_shift = IDCT_STG2_SHIFT;
267
17.6M
        i4_add = 1 << (i4_shift - 1);
268
17.6M
        if((i4_zero_rows_2nd_stage & 0xF0) == 0xF0) /* First 4 rows of output of 1st stage are non-zero */
269
18.3M
        {
270
149M
            for(j = 0; j < i4_trans_size; j++)
271
131M
            {
272
                /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */
273
655M
                for(k = 0; k < 4; k++)
274
524M
                {
275
524M
                    ai4_o[k] = gai2_impeg2_idct_q11[1 * 8 + k] * pi2_tmp[i4_trans_size]
276
524M
                                    + gai2_impeg2_idct_q11[3 * 8 + k] * pi2_tmp[3 * i4_trans_size];
277
524M
                }
278
131M
                ai4_eo[0] = gai2_impeg2_idct_q11[2 * 8 + 0] * pi2_tmp[2 * i4_trans_size];
279
131M
                ai4_eo[1] = gai2_impeg2_idct_q11[2 * 8 + 1] * pi2_tmp[2 * i4_trans_size];
280
131M
                ai4_ee[0] = gai2_impeg2_idct_q11[0 * 8 + 0] * pi2_tmp[0];
281
131M
                ai4_ee[1] = gai2_impeg2_idct_q11[0 * 8 + 1] * pi2_tmp[0];
282
283
                /* Combining e and o terms at each hierarchy levels to calculate the final spatial domain vector */
284
131M
                ai4_e[0] = ai4_ee[0] + ai4_eo[0];
285
131M
                ai4_e[3] = ai4_ee[0] - ai4_eo[0];
286
131M
                ai4_e[1] = ai4_ee[1] + ai4_eo[1];
287
131M
                ai4_e[2] = ai4_ee[1] - ai4_eo[1];
288
648M
                for(k = 0; k < 4; k++)
289
517M
                {
290
517M
                    WORD32 itrans_out;
291
517M
                    itrans_out =
292
517M
                                    CLIP_S16(((ai4_e[k] + ai4_o[k] + i4_add) >> i4_shift));
293
517M
                    pu1_dst[k] = CLIP_U8((itrans_out + pu1_pred[k]));
294
517M
                    itrans_out =
295
517M
                                    CLIP_S16(((ai4_e[3 - k] - ai4_o[3 - k] + i4_add) >> i4_shift));
296
517M
                    pu1_dst[k + 4] = CLIP_U8((itrans_out + pu1_pred[k + 4]));
297
517M
                }
298
131M
                pi2_tmp++;
299
131M
                pu1_pred += i4_pred_strd;
300
131M
                pu1_dst += i4_dst_strd;
301
131M
            }
302
18.3M
        }
303
18.4E
        else /* All rows of output of 1st stage are non-zero */
304
18.4E
        {
305
18.4E
            for(j = 0; j < i4_trans_size; j++)
306
603k
            {
307
                /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */
308
3.01M
                for(k = 0; k < 4; k++)
309
2.41M
                {
310
2.41M
                    ai4_o[k] = gai2_impeg2_idct_q11[1 * 8 + k] * pi2_tmp[i4_trans_size]
311
2.41M
                                    + gai2_impeg2_idct_q11[3 * 8 + k]
312
2.41M
                                                    * pi2_tmp[3 * i4_trans_size]
313
2.41M
                                    + gai2_impeg2_idct_q11[5 * 8 + k]
314
2.41M
                                                    * pi2_tmp[5 * i4_trans_size]
315
2.41M
                                    + gai2_impeg2_idct_q11[7 * 8 + k]
316
2.41M
                                                    * pi2_tmp[7 * i4_trans_size];
317
2.41M
                }
318
319
603k
                ai4_eo[0] = gai2_impeg2_idct_q11[2 * 8 + 0] * pi2_tmp[2 * i4_trans_size]
320
603k
                                + gai2_impeg2_idct_q11[6 * 8 + 0] * pi2_tmp[6 * i4_trans_size];
321
603k
                ai4_eo[1] = gai2_impeg2_idct_q11[2 * 8 + 1] * pi2_tmp[2 * i4_trans_size]
322
603k
                                + gai2_impeg2_idct_q11[6 * 8 + 1] * pi2_tmp[6 * i4_trans_size];
323
603k
                ai4_ee[0] = gai2_impeg2_idct_q11[0 * 8 + 0] * pi2_tmp[0]
324
603k
                                + gai2_impeg2_idct_q11[4 * 8 + 0] * pi2_tmp[4 * i4_trans_size];
325
603k
                ai4_ee[1] = gai2_impeg2_idct_q11[0 * 8 + 1] * pi2_tmp[0]
326
603k
                                + gai2_impeg2_idct_q11[4 * 8 + 1] * pi2_tmp[4 * i4_trans_size];
327
328
                /* Combining e and o terms at each hierarchy levels to calculate the final spatial domain vector */
329
603k
                ai4_e[0] = ai4_ee[0] + ai4_eo[0];
330
603k
                ai4_e[3] = ai4_ee[0] - ai4_eo[0];
331
603k
                ai4_e[1] = ai4_ee[1] + ai4_eo[1];
332
603k
                ai4_e[2] = ai4_ee[1] - ai4_eo[1];
333
3.01M
                for(k = 0; k < 4; k++)
334
2.41M
                {
335
2.41M
                    WORD32 itrans_out;
336
2.41M
                    itrans_out =
337
2.41M
                                    CLIP_S16(((ai4_e[k] + ai4_o[k] + i4_add) >> i4_shift));
338
2.41M
                    pu1_dst[k] = CLIP_U8((itrans_out + pu1_pred[k]));
339
2.41M
                    itrans_out =
340
2.41M
                                    CLIP_S16(((ai4_e[3 - k] - ai4_o[3 - k] + i4_add) >> i4_shift));
341
2.41M
                    pu1_dst[k + 4] = CLIP_U8((itrans_out + pu1_pred[k + 4]));
342
2.41M
                }
343
603k
                pi2_tmp++;
344
603k
                pu1_pred += i4_pred_strd;
345
603k
                pu1_dst += i4_dst_strd;
346
603k
            }
347
18.4E
        }
348
        /************************************************************************************************/
349
        /************************************END - IT_RECON_8x8******************************************/
350
        /************************************************************************************************/
351
17.6M
    }
352
1.05M
    else /* All rows of input are non-zero */
353
1.05M
    {
354
        /************************************************************************************************/
355
        /**********************************START - IT_RECON_8x8******************************************/
356
        /************************************************************************************************/
357
358
        /* Inverse Transform 1st stage */
359
1.05M
        i4_shift = IDCT_STG1_SHIFT;
360
1.05M
        i4_add = 1 << (i4_shift - 1);
361
362
7.15M
        for(j = 0; j < i4_row_limit_2nd_stage; j++)
363
6.10M
        {
364
            /* Checking for Zero Cols */
365
6.10M
            if((i4_zero_cols & 1) == 1)
366
1.97M
            {
367
1.97M
                memset(pi2_tmp, 0, i4_trans_size * sizeof(WORD16));
368
1.97M
            }
369
4.12M
            else
370
4.12M
            {
371
                /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */
372
21.2M
                for(k = 0; k < 4; k++)
373
17.1M
                {
374
17.1M
                    ai4_o[k] = gai2_impeg2_idct_q15[1 * 8 + k] * pi2_src[i4_src_strd]
375
17.1M
                                    + gai2_impeg2_idct_q15[3 * 8 + k]
376
17.1M
                                                    * pi2_src[3 * i4_src_strd]
377
17.1M
                                    + gai2_impeg2_idct_q15[5 * 8 + k]
378
17.1M
                                                    * pi2_src[5 * i4_src_strd]
379
17.1M
                                    + gai2_impeg2_idct_q15[7 * 8 + k]
380
17.1M
                                                    * pi2_src[7 * i4_src_strd];
381
17.1M
                }
382
383
4.12M
                ai4_eo[0] = gai2_impeg2_idct_q15[2 * 8 + 0] * pi2_src[2 * i4_src_strd]
384
4.12M
                                + gai2_impeg2_idct_q15[6 * 8 + 0] * pi2_src[6 * i4_src_strd];
385
4.12M
                ai4_eo[1] = gai2_impeg2_idct_q15[2 * 8 + 1] * pi2_src[2 * i4_src_strd]
386
4.12M
                                + gai2_impeg2_idct_q15[6 * 8 + 1] * pi2_src[6 * i4_src_strd];
387
4.12M
                ai4_ee[0] = gai2_impeg2_idct_q15[0 * 8 + 0] * pi2_src[0]
388
4.12M
                                + gai2_impeg2_idct_q15[4 * 8 + 0] * pi2_src[4 * i4_src_strd];
389
4.12M
                ai4_ee[1] = gai2_impeg2_idct_q15[0 * 8 + 1] * pi2_src[0]
390
4.12M
                                + gai2_impeg2_idct_q15[4 * 8 + 1] * pi2_src[4 * i4_src_strd];
391
392
                /* Combining e and o terms at each hierarchy levels to calculate the final spatial domain vector */
393
4.12M
                ai4_e[0] = ai4_ee[0] + ai4_eo[0];
394
4.12M
                ai4_e[3] = ai4_ee[0] - ai4_eo[0];
395
4.12M
                ai4_e[1] = ai4_ee[1] + ai4_eo[1];
396
4.12M
                ai4_e[2] = ai4_ee[1] - ai4_eo[1];
397
21.2M
                for(k = 0; k < 4; k++)
398
17.1M
                {
399
17.1M
                    pi2_tmp[k] =
400
17.1M
                                    CLIP_S16(((ai4_e[k] + ai4_o[k] + i4_add) >> i4_shift));
401
17.1M
                    pi2_tmp[k + 4] =
402
17.1M
                                    CLIP_S16(((ai4_e[3 - k] - ai4_o[3 - k] + i4_add) >> i4_shift));
403
17.1M
                }
404
4.12M
            }
405
6.10M
            pi2_src++;
406
6.10M
            pi2_tmp += i4_trans_size;
407
6.10M
            i4_zero_cols = i4_zero_cols >> 1;
408
6.10M
        }
409
410
1.05M
        pi2_tmp = pi2_tmp_orig;
411
412
        /* Inverse Transform 2nd stage */
413
1.05M
        i4_shift = IDCT_STG2_SHIFT;
414
1.05M
        i4_add = 1 << (i4_shift - 1);
415
1.05M
        if((i4_zero_rows_2nd_stage & 0xF0) == 0xF0) /* First 4 rows of output of 1st stage are non-zero */
416
645k
        {
417
5.78M
            for(j = 0; j < i4_trans_size; j++)
418
5.14M
            {
419
                /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */
420
25.7M
                for(k = 0; k < 4; k++)
421
20.5M
                {
422
20.5M
                    ai4_o[k] = gai2_impeg2_idct_q11[1 * 8 + k] * pi2_tmp[i4_trans_size]
423
20.5M
                                    + gai2_impeg2_idct_q11[3 * 8 + k] * pi2_tmp[3 * i4_trans_size];
424
20.5M
                }
425
5.14M
                ai4_eo[0] = gai2_impeg2_idct_q11[2 * 8 + 0] * pi2_tmp[2 * i4_trans_size];
426
5.14M
                ai4_eo[1] = gai2_impeg2_idct_q11[2 * 8 + 1] * pi2_tmp[2 * i4_trans_size];
427
5.14M
                ai4_ee[0] = gai2_impeg2_idct_q11[0 * 8 + 0] * pi2_tmp[0];
428
5.14M
                ai4_ee[1] = gai2_impeg2_idct_q11[0 * 8 + 1] * pi2_tmp[0];
429
430
                /* Combining e and o terms at each hierarchy levels to calculate the final spatial domain vector */
431
5.14M
                ai4_e[0] = ai4_ee[0] + ai4_eo[0];
432
5.14M
                ai4_e[3] = ai4_ee[0] - ai4_eo[0];
433
5.14M
                ai4_e[1] = ai4_ee[1] + ai4_eo[1];
434
5.14M
                ai4_e[2] = ai4_ee[1] - ai4_eo[1];
435
25.6M
                for(k = 0; k < 4; k++)
436
20.5M
                {
437
20.5M
                    WORD32 itrans_out;
438
20.5M
                    itrans_out =
439
20.5M
                                    CLIP_S16(((ai4_e[k] + ai4_o[k] + i4_add) >> i4_shift));
440
20.5M
                    pu1_dst[k] = CLIP_U8((itrans_out + pu1_pred[k]));
441
20.5M
                    itrans_out =
442
20.5M
                                    CLIP_S16(((ai4_e[3 - k] - ai4_o[3 - k] + i4_add) >> i4_shift));
443
20.5M
                    pu1_dst[k + 4] = CLIP_U8((itrans_out + pu1_pred[k + 4]));
444
20.5M
                }
445
5.14M
                pi2_tmp++;
446
5.14M
                pu1_pred += i4_pred_strd;
447
5.14M
                pu1_dst += i4_dst_strd;
448
5.14M
            }
449
645k
        }
450
405k
        else /* All rows of output of 1st stage are non-zero */
451
405k
        {
452
4.10M
            for(j = 0; j < i4_trans_size; j++)
453
3.70M
            {
454
                /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */
455
18.4M
                for(k = 0; k < 4; k++)
456
14.7M
                {
457
14.7M
                    ai4_o[k] = gai2_impeg2_idct_q11[1 * 8 + k] * pi2_tmp[i4_trans_size]
458
14.7M
                                    + gai2_impeg2_idct_q11[3 * 8 + k]
459
14.7M
                                                    * pi2_tmp[3 * i4_trans_size]
460
14.7M
                                    + gai2_impeg2_idct_q11[5 * 8 + k]
461
14.7M
                                                    * pi2_tmp[5 * i4_trans_size]
462
14.7M
                                    + gai2_impeg2_idct_q11[7 * 8 + k]
463
14.7M
                                                    * pi2_tmp[7 * i4_trans_size];
464
14.7M
                }
465
466
3.70M
                ai4_eo[0] = gai2_impeg2_idct_q11[2 * 8 + 0] * pi2_tmp[2 * i4_trans_size]
467
3.70M
                                + gai2_impeg2_idct_q11[6 * 8 + 0] * pi2_tmp[6 * i4_trans_size];
468
3.70M
                ai4_eo[1] = gai2_impeg2_idct_q11[2 * 8 + 1] * pi2_tmp[2 * i4_trans_size]
469
3.70M
                                + gai2_impeg2_idct_q11[6 * 8 + 1] * pi2_tmp[6 * i4_trans_size];
470
3.70M
                ai4_ee[0] = gai2_impeg2_idct_q11[0 * 8 + 0] * pi2_tmp[0]
471
3.70M
                                + gai2_impeg2_idct_q11[4 * 8 + 0] * pi2_tmp[4 * i4_trans_size];
472
3.70M
                ai4_ee[1] = gai2_impeg2_idct_q11[0 * 8 + 1] * pi2_tmp[0]
473
3.70M
                                + gai2_impeg2_idct_q11[4 * 8 + 1] * pi2_tmp[4 * i4_trans_size];
474
475
                /* Combining e and o terms at each hierarchy levels to calculate the final spatial domain vector */
476
3.70M
                ai4_e[0] = ai4_ee[0] + ai4_eo[0];
477
3.70M
                ai4_e[3] = ai4_ee[0] - ai4_eo[0];
478
3.70M
                ai4_e[1] = ai4_ee[1] + ai4_eo[1];
479
3.70M
                ai4_e[2] = ai4_ee[1] - ai4_eo[1];
480
18.3M
                for(k = 0; k < 4; k++)
481
14.6M
                {
482
14.6M
                    WORD32 itrans_out;
483
14.6M
                    itrans_out =
484
14.6M
                                    CLIP_S16(((ai4_e[k] + ai4_o[k] + i4_add) >> i4_shift));
485
14.6M
                    pu1_dst[k] = CLIP_U8((itrans_out + pu1_pred[k]));
486
14.6M
                    itrans_out =
487
14.6M
                                    CLIP_S16(((ai4_e[3 - k] - ai4_o[3 - k] + i4_add) >> i4_shift));
488
14.6M
                    pu1_dst[k + 4] = CLIP_U8((itrans_out + pu1_pred[k + 4]));
489
14.6M
                }
490
3.70M
                pi2_tmp++;
491
3.70M
                pu1_pred += i4_pred_strd;
492
3.70M
                pu1_dst += i4_dst_strd;
493
3.70M
            }
494
405k
        }
495
        /************************************************************************************************/
496
        /************************************END - IT_RECON_8x8******************************************/
497
        /************************************************************************************************/
498
1.05M
    }
499
18.7M
}
500