Coverage Report

Created: 2026-06-15 06:30

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/libmpeg2/common/impeg2_idct.c
Line
Count
Source
1
/******************************************************************************
2
 *
3
 * Copyright (C) 2015 The Android Open Source Project
4
 *
5
 * Licensed under the Apache License, Version 2.0 (the "License");
6
 * you may not use this file except in compliance with the License.
7
 * You may obtain a copy of the License at:
8
 *
9
 * http://www.apache.org/licenses/LICENSE-2.0
10
 *
11
 * Unless required by applicable law or agreed to in writing, software
12
 * distributed under the License is distributed on an "AS IS" BASIS,
13
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
 * See the License for the specific language governing permissions and
15
 * limitations under the License.
16
 *
17
 *****************************************************************************
18
 * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
19
*/
20
/*****************************************************************************/
21
/*                                                                           */
22
/*  File Name         : impeg2_idct.c                                        */
23
/*                                                                           */
24
/*  Description       : Contains 2d idct and invese quantization functions   */
25
/*                                                                           */
26
/*  List of Functions : impeg2_idct_recon_dc()                               */
27
/*                      impeg2_idct_recon_dc_mismatch()                      */
28
/*                      impeg2_idct_recon()                                  */
29
/*                                                                           */
30
/*  Issues / Problems : None                                                 */
31
/*                                                                           */
32
/*  Revision History  :                                                      */
33
/*                                                                           */
34
/*         DD MM YYYY   Author(s)       Changes                              */
35
/*         10 09 2005   Hairsh M        First Version                        */
36
/*                                                                           */
37
/*****************************************************************************/
38
/*
39
  IEEE - 1180 results for this IDCT
40
  L                           256         256         5           5           300         300         384         384         Thresholds
41
  H                           255         255         5           5           300         300         383         383
42
  sign                        1           -1          1           -1          1           -1          1           -1
43
  Peak Error                  1           1           1           1           1           1           1           1           1
44
  Peak Mean Square Error      0.0191      0.0188      0.0108      0.0111      0.0176      0.0188      0.0165      0.0177      0.06
45
  Overall Mean Square Error   0.01566406  0.01597656  0.0091875   0.00908906  0.01499063  0.01533281  0.01432344  0.01412344  0.02
46
  Peak Mean Error             0.0027      0.0026      0.0028      0.002       0.0017      0.0033      0.0031      0.0025      0.015
47
  Overall Mean Error          0.00002656  -0.00031406 0.00016875  0.00005469  -0.00003125 0.00011406  0.00009219  0.00004219  0.0015
48
  */
49
#include <stdio.h>
50
#include <string.h>
51
52
#include "iv_datatypedef.h"
53
#include "iv.h"
54
#include "impeg2_defs.h"
55
#include "impeg2_platform_macros.h"
56
57
#include "impeg2_macros.h"
58
#include "impeg2_globals.h"
59
#include "impeg2_idct.h"
60
61
62
void impeg2_idct_recon_dc(WORD16 *pi2_src,
63
                            WORD16 *pi2_tmp,
64
                            UWORD8 *pu1_pred,
65
                            UWORD8 *pu1_dst,
66
                            WORD32 i4_src_strd,
67
                            WORD32 i4_pred_strd,
68
                            WORD32 i4_dst_strd,
69
                            WORD32 i4_zero_cols,
70
                            WORD32 i4_zero_rows)
71
1.27M
{
72
1.27M
    WORD32 i4_val, i, j;
73
74
1.27M
    UNUSED(pi2_tmp);
75
1.27M
    UNUSED(i4_src_strd);
76
1.27M
    UNUSED(i4_zero_cols);
77
1.27M
    UNUSED(i4_zero_rows);
78
79
1.27M
    i4_val = pi2_src[0] * gai2_impeg2_idct_q15[0];
80
1.27M
    i4_val = ((i4_val + IDCT_STG1_ROUND) >> IDCT_STG1_SHIFT);
81
1.27M
    i4_val = i4_val * gai2_impeg2_idct_q11[0];
82
1.27M
    i4_val = ((i4_val + IDCT_STG2_ROUND) >> IDCT_STG2_SHIFT);
83
84
11.3M
    for(i = 0; i < TRANS_SIZE_8; i++)
85
10.0M
    {
86
90.1M
        for(j = 0; j < TRANS_SIZE_8; j++)
87
80.0M
        {
88
80.0M
            pu1_dst[j] = CLIP_U8(i4_val + pu1_pred[j]);
89
80.0M
        }
90
10.0M
        pu1_dst  += i4_dst_strd;
91
10.0M
        pu1_pred += i4_pred_strd;
92
10.0M
    }
93
1.27M
}
94
void impeg2_idct_recon_dc_mismatch(WORD16 *pi2_src,
95
                            WORD16 *pi2_tmp,
96
                            UWORD8 *pu1_pred,
97
                            UWORD8 *pu1_dst,
98
                            WORD32 i4_src_strd,
99
                            WORD32 i4_pred_strd,
100
                            WORD32 i4_dst_strd,
101
                            WORD32 i4_zero_cols,
102
                            WORD32 i4_zero_rows)
103
104
192k
{
105
192k
    WORD32 i4_val, i, j;
106
192k
    WORD32 i4_count = 0;
107
192k
    WORD32 i4_sum;
108
109
192k
    UNUSED(pi2_tmp);
110
192k
    UNUSED(i4_src_strd);
111
192k
    UNUSED(i4_zero_cols);
112
192k
    UNUSED(i4_zero_rows);
113
114
192k
    i4_val = pi2_src[0] * gai2_impeg2_idct_q15[0];
115
192k
    i4_val = ((i4_val + IDCT_STG1_ROUND) >> IDCT_STG1_SHIFT);
116
117
192k
    i4_val *= gai2_impeg2_idct_q11[0];
118
1.71M
    for(i = 0; i < TRANS_SIZE_8; i++)
119
1.52M
    {
120
13.6M
        for (j = 0; j < TRANS_SIZE_8; j++)
121
12.1M
        {
122
12.1M
            i4_sum = i4_val;
123
12.1M
            i4_sum += gai2_impeg2_mismatch_stg2_additive[i4_count];
124
12.1M
            i4_sum = ((i4_sum + IDCT_STG2_ROUND) >> IDCT_STG2_SHIFT);
125
12.1M
            i4_sum += pu1_pred[j];
126
12.1M
            pu1_dst[j] = CLIP_U8(i4_sum);
127
12.1M
            i4_count++;
128
12.1M
        }
129
130
1.52M
        pu1_dst  += i4_dst_strd;
131
1.52M
        pu1_pred += i4_pred_strd;
132
1.52M
    }
133
134
192k
}
135
/**
136
 *******************************************************************************
137
 *
138
 * @brief
139
 *  This function performs Inverse transform  and reconstruction for 8x8
140
 * input block
141
 *
142
 * @par Description:
143
 *  Performs inverse transform and adds the prediction  data and clips output
144
 * to 8 bit
145
 *
146
 * @param[in] pi2_src
147
 *  Input 8x8 coefficients
148
 *
149
 * @param[in] pi2_tmp
150
 *  Temporary 8x8 buffer for storing inverse
151
 *
152
 *  transform
153
 *  1st stage output
154
 *
155
 * @param[in] pu1_pred
156
 *  Prediction 8x8 block
157
 *
158
 * @param[out] pu1_dst
159
 *  Output 8x8 block
160
 *
161
 * @param[in] src_strd
162
 *  Input stride
163
 *
164
 * @param[in] pred_strd
165
 *  Prediction stride
166
 *
167
 * @param[in] dst_strd
168
 *  Output Stride
169
 *
170
 * @param[in] shift
171
 *  Output shift
172
 *
173
 * @param[in] zero_cols
174
 *  Zero columns in pi2_src
175
 *
176
 * @returns  Void
177
 *
178
 * @remarks
179
 *  None
180
 *
181
 *******************************************************************************
182
 */
183
184
void impeg2_idct_recon(WORD16 *pi2_src,
185
                        WORD16 *pi2_tmp,
186
                        UWORD8 *pu1_pred,
187
                        UWORD8 *pu1_dst,
188
                        WORD32 i4_src_strd,
189
                        WORD32 i4_pred_strd,
190
                        WORD32 i4_dst_strd,
191
                        WORD32 i4_zero_cols,
192
                        WORD32 i4_zero_rows)
193
7.03M
{
194
7.03M
    WORD32 j, k;
195
7.03M
    WORD32 ai4_e[4], ai4_o[4];
196
7.03M
    WORD32 ai4_ee[2], ai4_eo[2];
197
7.03M
    WORD32 i4_add;
198
7.03M
    WORD32 i4_shift;
199
7.03M
    WORD16 *pi2_tmp_orig;
200
7.03M
    WORD32 i4_trans_size;
201
7.03M
    WORD32 i4_zero_rows_2nd_stage = i4_zero_cols;
202
7.03M
    WORD32 i4_row_limit_2nd_stage;
203
204
7.03M
    i4_trans_size = TRANS_SIZE_8;
205
206
7.03M
    pi2_tmp_orig = pi2_tmp;
207
208
7.03M
    if((i4_zero_cols & 0xF0) == 0xF0)
209
5.97M
        i4_row_limit_2nd_stage = 4;
210
1.05M
    else
211
1.05M
        i4_row_limit_2nd_stage = TRANS_SIZE_8;
212
213
214
7.03M
    if((i4_zero_rows & 0xF0) == 0xF0) /* First 4 rows of input are non-zero */
215
5.78M
    {
216
        /************************************************************************************************/
217
        /**********************************START - IT_RECON_8x8******************************************/
218
        /************************************************************************************************/
219
220
        /* Inverse Transform 1st stage */
221
5.78M
        i4_shift = IDCT_STG1_SHIFT;
222
5.78M
        i4_add = 1 << (i4_shift - 1);
223
224
29.0M
        for(j = 0; j < i4_row_limit_2nd_stage; j++)
225
23.2M
        {
226
            /* Checking for Zero Cols */
227
23.2M
            if((i4_zero_cols & 1) == 1)
228
15.2M
            {
229
15.2M
                memset(pi2_tmp, 0, i4_trans_size * sizeof(WORD16));
230
15.2M
            }
231
8.01M
            else
232
8.01M
            {
233
                /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */
234
40.2M
                for(k = 0; k < 4; k++)
235
32.2M
                {
236
32.2M
                    ai4_o[k] = gai2_impeg2_idct_q15[1 * 8 + k] * pi2_src[i4_src_strd]
237
32.2M
                                    + gai2_impeg2_idct_q15[3 * 8 + k]
238
32.2M
                                                    * pi2_src[3 * i4_src_strd];
239
32.2M
                }
240
8.01M
                ai4_eo[0] = gai2_impeg2_idct_q15[2 * 8 + 0] * pi2_src[2 * i4_src_strd];
241
8.01M
                ai4_eo[1] = gai2_impeg2_idct_q15[2 * 8 + 1] * pi2_src[2 * i4_src_strd];
242
8.01M
                ai4_ee[0] = gai2_impeg2_idct_q15[0 * 8 + 0] * pi2_src[0];
243
8.01M
                ai4_ee[1] = gai2_impeg2_idct_q15[0 * 8 + 1] * pi2_src[0];
244
245
                /* Combining e and o terms at each hierarchy levels to calculate the final spatial domain vector */
246
8.01M
                ai4_e[0] = ai4_ee[0] + ai4_eo[0];
247
8.01M
                ai4_e[3] = ai4_ee[0] - ai4_eo[0];
248
8.01M
                ai4_e[1] = ai4_ee[1] + ai4_eo[1];
249
8.01M
                ai4_e[2] = ai4_ee[1] - ai4_eo[1];
250
40.2M
                for(k = 0; k < 4; k++)
251
32.2M
                {
252
32.2M
                    pi2_tmp[k] =
253
32.2M
                                    CLIP_S16(((ai4_e[k] + ai4_o[k] + i4_add) >> i4_shift));
254
32.2M
                    pi2_tmp[k + 4] =
255
32.2M
                                    CLIP_S16(((ai4_e[3 - k] - ai4_o[3 - k] + i4_add) >> i4_shift));
256
32.2M
                }
257
8.01M
            }
258
23.2M
            pi2_src++;
259
23.2M
            pi2_tmp += i4_trans_size;
260
23.2M
            i4_zero_cols = i4_zero_cols >> 1;
261
23.2M
        }
262
263
5.78M
        pi2_tmp = pi2_tmp_orig;
264
265
        /* Inverse Transform 2nd stage */
266
5.78M
        i4_shift = IDCT_STG2_SHIFT;
267
5.78M
        i4_add = 1 << (i4_shift - 1);
268
5.78M
        if((i4_zero_rows_2nd_stage & 0xF0) == 0xF0) /* First 4 rows of output of 1st stage are non-zero */
269
5.74M
        {
270
50.4M
            for(j = 0; j < i4_trans_size; j++)
271
44.7M
            {
272
                /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */
273
222M
                for(k = 0; k < 4; k++)
274
177M
                {
275
177M
                    ai4_o[k] = gai2_impeg2_idct_q11[1 * 8 + k] * pi2_tmp[i4_trans_size]
276
177M
                                    + gai2_impeg2_idct_q11[3 * 8 + k] * pi2_tmp[3 * i4_trans_size];
277
177M
                }
278
44.7M
                ai4_eo[0] = gai2_impeg2_idct_q11[2 * 8 + 0] * pi2_tmp[2 * i4_trans_size];
279
44.7M
                ai4_eo[1] = gai2_impeg2_idct_q11[2 * 8 + 1] * pi2_tmp[2 * i4_trans_size];
280
44.7M
                ai4_ee[0] = gai2_impeg2_idct_q11[0 * 8 + 0] * pi2_tmp[0];
281
44.7M
                ai4_ee[1] = gai2_impeg2_idct_q11[0 * 8 + 1] * pi2_tmp[0];
282
283
                /* Combining e and o terms at each hierarchy levels to calculate the final spatial domain vector */
284
44.7M
                ai4_e[0] = ai4_ee[0] + ai4_eo[0];
285
44.7M
                ai4_e[3] = ai4_ee[0] - ai4_eo[0];
286
44.7M
                ai4_e[1] = ai4_ee[1] + ai4_eo[1];
287
44.7M
                ai4_e[2] = ai4_ee[1] - ai4_eo[1];
288
219M
                for(k = 0; k < 4; k++)
289
174M
                {
290
174M
                    WORD32 itrans_out;
291
174M
                    itrans_out =
292
174M
                                    CLIP_S16(((ai4_e[k] + ai4_o[k] + i4_add) >> i4_shift));
293
174M
                    pu1_dst[k] = CLIP_U8((itrans_out + pu1_pred[k]));
294
174M
                    itrans_out =
295
174M
                                    CLIP_S16(((ai4_e[3 - k] - ai4_o[3 - k] + i4_add) >> i4_shift));
296
174M
                    pu1_dst[k + 4] = CLIP_U8((itrans_out + pu1_pred[k + 4]));
297
174M
                }
298
44.7M
                pi2_tmp++;
299
44.7M
                pu1_pred += i4_pred_strd;
300
44.7M
                pu1_dst += i4_dst_strd;
301
44.7M
            }
302
5.74M
        }
303
37.2k
        else /* All rows of output of 1st stage are non-zero */
304
37.2k
        {
305
397k
            for(j = 0; j < i4_trans_size; j++)
306
360k
            {
307
                /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */
308
1.80M
                for(k = 0; k < 4; k++)
309
1.44M
                {
310
1.44M
                    ai4_o[k] = gai2_impeg2_idct_q11[1 * 8 + k] * pi2_tmp[i4_trans_size]
311
1.44M
                                    + gai2_impeg2_idct_q11[3 * 8 + k]
312
1.44M
                                                    * pi2_tmp[3 * i4_trans_size]
313
1.44M
                                    + gai2_impeg2_idct_q11[5 * 8 + k]
314
1.44M
                                                    * pi2_tmp[5 * i4_trans_size]
315
1.44M
                                    + gai2_impeg2_idct_q11[7 * 8 + k]
316
1.44M
                                                    * pi2_tmp[7 * i4_trans_size];
317
1.44M
                }
318
319
360k
                ai4_eo[0] = gai2_impeg2_idct_q11[2 * 8 + 0] * pi2_tmp[2 * i4_trans_size]
320
360k
                                + gai2_impeg2_idct_q11[6 * 8 + 0] * pi2_tmp[6 * i4_trans_size];
321
360k
                ai4_eo[1] = gai2_impeg2_idct_q11[2 * 8 + 1] * pi2_tmp[2 * i4_trans_size]
322
360k
                                + gai2_impeg2_idct_q11[6 * 8 + 1] * pi2_tmp[6 * i4_trans_size];
323
360k
                ai4_ee[0] = gai2_impeg2_idct_q11[0 * 8 + 0] * pi2_tmp[0]
324
360k
                                + gai2_impeg2_idct_q11[4 * 8 + 0] * pi2_tmp[4 * i4_trans_size];
325
360k
                ai4_ee[1] = gai2_impeg2_idct_q11[0 * 8 + 1] * pi2_tmp[0]
326
360k
                                + gai2_impeg2_idct_q11[4 * 8 + 1] * pi2_tmp[4 * i4_trans_size];
327
328
                /* Combining e and o terms at each hierarchy levels to calculate the final spatial domain vector */
329
360k
                ai4_e[0] = ai4_ee[0] + ai4_eo[0];
330
360k
                ai4_e[3] = ai4_ee[0] - ai4_eo[0];
331
360k
                ai4_e[1] = ai4_ee[1] + ai4_eo[1];
332
360k
                ai4_e[2] = ai4_ee[1] - ai4_eo[1];
333
1.80M
                for(k = 0; k < 4; k++)
334
1.44M
                {
335
1.44M
                    WORD32 itrans_out;
336
1.44M
                    itrans_out =
337
1.44M
                                    CLIP_S16(((ai4_e[k] + ai4_o[k] + i4_add) >> i4_shift));
338
1.44M
                    pu1_dst[k] = CLIP_U8((itrans_out + pu1_pred[k]));
339
1.44M
                    itrans_out =
340
1.44M
                                    CLIP_S16(((ai4_e[3 - k] - ai4_o[3 - k] + i4_add) >> i4_shift));
341
1.44M
                    pu1_dst[k + 4] = CLIP_U8((itrans_out + pu1_pred[k + 4]));
342
1.44M
                }
343
360k
                pi2_tmp++;
344
360k
                pu1_pred += i4_pred_strd;
345
360k
                pu1_dst += i4_dst_strd;
346
360k
            }
347
37.2k
        }
348
        /************************************************************************************************/
349
        /************************************END - IT_RECON_8x8******************************************/
350
        /************************************************************************************************/
351
5.78M
    }
352
1.24M
    else /* All rows of input are non-zero */
353
1.24M
    {
354
        /************************************************************************************************/
355
        /**********************************START - IT_RECON_8x8******************************************/
356
        /************************************************************************************************/
357
358
        /* Inverse Transform 1st stage */
359
1.24M
        i4_shift = IDCT_STG1_SHIFT;
360
1.24M
        i4_add = 1 << (i4_shift - 1);
361
362
10.2M
        for(j = 0; j < i4_row_limit_2nd_stage; j++)
363
9.00M
        {
364
            /* Checking for Zero Cols */
365
9.00M
            if((i4_zero_cols & 1) == 1)
366
4.87M
            {
367
4.87M
                memset(pi2_tmp, 0, i4_trans_size * sizeof(WORD16));
368
4.87M
            }
369
4.13M
            else
370
4.13M
            {
371
                /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */
372
20.7M
                for(k = 0; k < 4; k++)
373
16.5M
                {
374
16.5M
                    ai4_o[k] = gai2_impeg2_idct_q15[1 * 8 + k] * pi2_src[i4_src_strd]
375
16.5M
                                    + gai2_impeg2_idct_q15[3 * 8 + k]
376
16.5M
                                                    * pi2_src[3 * i4_src_strd]
377
16.5M
                                    + gai2_impeg2_idct_q15[5 * 8 + k]
378
16.5M
                                                    * pi2_src[5 * i4_src_strd]
379
16.5M
                                    + gai2_impeg2_idct_q15[7 * 8 + k]
380
16.5M
                                                    * pi2_src[7 * i4_src_strd];
381
16.5M
                }
382
383
4.13M
                ai4_eo[0] = gai2_impeg2_idct_q15[2 * 8 + 0] * pi2_src[2 * i4_src_strd]
384
4.13M
                                + gai2_impeg2_idct_q15[6 * 8 + 0] * pi2_src[6 * i4_src_strd];
385
4.13M
                ai4_eo[1] = gai2_impeg2_idct_q15[2 * 8 + 1] * pi2_src[2 * i4_src_strd]
386
4.13M
                                + gai2_impeg2_idct_q15[6 * 8 + 1] * pi2_src[6 * i4_src_strd];
387
4.13M
                ai4_ee[0] = gai2_impeg2_idct_q15[0 * 8 + 0] * pi2_src[0]
388
4.13M
                                + gai2_impeg2_idct_q15[4 * 8 + 0] * pi2_src[4 * i4_src_strd];
389
4.13M
                ai4_ee[1] = gai2_impeg2_idct_q15[0 * 8 + 1] * pi2_src[0]
390
4.13M
                                + gai2_impeg2_idct_q15[4 * 8 + 1] * pi2_src[4 * i4_src_strd];
391
392
                /* Combining e and o terms at each hierarchy levels to calculate the final spatial domain vector */
393
4.13M
                ai4_e[0] = ai4_ee[0] + ai4_eo[0];
394
4.13M
                ai4_e[3] = ai4_ee[0] - ai4_eo[0];
395
4.13M
                ai4_e[1] = ai4_ee[1] + ai4_eo[1];
396
4.13M
                ai4_e[2] = ai4_ee[1] - ai4_eo[1];
397
20.6M
                for(k = 0; k < 4; k++)
398
16.5M
                {
399
16.5M
                    pi2_tmp[k] =
400
16.5M
                                    CLIP_S16(((ai4_e[k] + ai4_o[k] + i4_add) >> i4_shift));
401
16.5M
                    pi2_tmp[k + 4] =
402
16.5M
                                    CLIP_S16(((ai4_e[3 - k] - ai4_o[3 - k] + i4_add) >> i4_shift));
403
16.5M
                }
404
4.13M
            }
405
9.00M
            pi2_src++;
406
9.00M
            pi2_tmp += i4_trans_size;
407
9.00M
            i4_zero_cols = i4_zero_cols >> 1;
408
9.00M
        }
409
410
1.24M
        pi2_tmp = pi2_tmp_orig;
411
412
        /* Inverse Transform 2nd stage */
413
1.24M
        i4_shift = IDCT_STG2_SHIFT;
414
1.24M
        i4_add = 1 << (i4_shift - 1);
415
1.24M
        if((i4_zero_rows_2nd_stage & 0xF0) == 0xF0) /* First 4 rows of output of 1st stage are non-zero */
416
240k
        {
417
2.15M
            for(j = 0; j < i4_trans_size; j++)
418
1.91M
            {
419
                /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */
420
9.59M
                for(k = 0; k < 4; k++)
421
7.67M
                {
422
7.67M
                    ai4_o[k] = gai2_impeg2_idct_q11[1 * 8 + k] * pi2_tmp[i4_trans_size]
423
7.67M
                                    + gai2_impeg2_idct_q11[3 * 8 + k] * pi2_tmp[3 * i4_trans_size];
424
7.67M
                }
425
1.91M
                ai4_eo[0] = gai2_impeg2_idct_q11[2 * 8 + 0] * pi2_tmp[2 * i4_trans_size];
426
1.91M
                ai4_eo[1] = gai2_impeg2_idct_q11[2 * 8 + 1] * pi2_tmp[2 * i4_trans_size];
427
1.91M
                ai4_ee[0] = gai2_impeg2_idct_q11[0 * 8 + 0] * pi2_tmp[0];
428
1.91M
                ai4_ee[1] = gai2_impeg2_idct_q11[0 * 8 + 1] * pi2_tmp[0];
429
430
                /* Combining e and o terms at each hierarchy levels to calculate the final spatial domain vector */
431
1.91M
                ai4_e[0] = ai4_ee[0] + ai4_eo[0];
432
1.91M
                ai4_e[3] = ai4_ee[0] - ai4_eo[0];
433
1.91M
                ai4_e[1] = ai4_ee[1] + ai4_eo[1];
434
1.91M
                ai4_e[2] = ai4_ee[1] - ai4_eo[1];
435
9.58M
                for(k = 0; k < 4; k++)
436
7.66M
                {
437
7.66M
                    WORD32 itrans_out;
438
7.66M
                    itrans_out =
439
7.66M
                                    CLIP_S16(((ai4_e[k] + ai4_o[k] + i4_add) >> i4_shift));
440
7.66M
                    pu1_dst[k] = CLIP_U8((itrans_out + pu1_pred[k]));
441
7.66M
                    itrans_out =
442
7.66M
                                    CLIP_S16(((ai4_e[3 - k] - ai4_o[3 - k] + i4_add) >> i4_shift));
443
7.66M
                    pu1_dst[k + 4] = CLIP_U8((itrans_out + pu1_pred[k + 4]));
444
7.66M
                }
445
1.91M
                pi2_tmp++;
446
1.91M
                pu1_pred += i4_pred_strd;
447
1.91M
                pu1_dst += i4_dst_strd;
448
1.91M
            }
449
240k
        }
450
1.00M
        else /* All rows of output of 1st stage are non-zero */
451
1.00M
        {
452
8.96M
            for(j = 0; j < i4_trans_size; j++)
453
7.95M
            {
454
                /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */
455
39.5M
                for(k = 0; k < 4; k++)
456
31.6M
                {
457
31.6M
                    ai4_o[k] = gai2_impeg2_idct_q11[1 * 8 + k] * pi2_tmp[i4_trans_size]
458
31.6M
                                    + gai2_impeg2_idct_q11[3 * 8 + k]
459
31.6M
                                                    * pi2_tmp[3 * i4_trans_size]
460
31.6M
                                    + gai2_impeg2_idct_q11[5 * 8 + k]
461
31.6M
                                                    * pi2_tmp[5 * i4_trans_size]
462
31.6M
                                    + gai2_impeg2_idct_q11[7 * 8 + k]
463
31.6M
                                                    * pi2_tmp[7 * i4_trans_size];
464
31.6M
                }
465
466
7.95M
                ai4_eo[0] = gai2_impeg2_idct_q11[2 * 8 + 0] * pi2_tmp[2 * i4_trans_size]
467
7.95M
                                + gai2_impeg2_idct_q11[6 * 8 + 0] * pi2_tmp[6 * i4_trans_size];
468
7.95M
                ai4_eo[1] = gai2_impeg2_idct_q11[2 * 8 + 1] * pi2_tmp[2 * i4_trans_size]
469
7.95M
                                + gai2_impeg2_idct_q11[6 * 8 + 1] * pi2_tmp[6 * i4_trans_size];
470
7.95M
                ai4_ee[0] = gai2_impeg2_idct_q11[0 * 8 + 0] * pi2_tmp[0]
471
7.95M
                                + gai2_impeg2_idct_q11[4 * 8 + 0] * pi2_tmp[4 * i4_trans_size];
472
7.95M
                ai4_ee[1] = gai2_impeg2_idct_q11[0 * 8 + 1] * pi2_tmp[0]
473
7.95M
                                + gai2_impeg2_idct_q11[4 * 8 + 1] * pi2_tmp[4 * i4_trans_size];
474
475
                /* Combining e and o terms at each hierarchy levels to calculate the final spatial domain vector */
476
7.95M
                ai4_e[0] = ai4_ee[0] + ai4_eo[0];
477
7.95M
                ai4_e[3] = ai4_ee[0] - ai4_eo[0];
478
7.95M
                ai4_e[1] = ai4_ee[1] + ai4_eo[1];
479
7.95M
                ai4_e[2] = ai4_ee[1] - ai4_eo[1];
480
39.3M
                for(k = 0; k < 4; k++)
481
31.4M
                {
482
31.4M
                    WORD32 itrans_out;
483
31.4M
                    itrans_out =
484
31.4M
                                    CLIP_S16(((ai4_e[k] + ai4_o[k] + i4_add) >> i4_shift));
485
31.4M
                    pu1_dst[k] = CLIP_U8((itrans_out + pu1_pred[k]));
486
31.4M
                    itrans_out =
487
31.4M
                                    CLIP_S16(((ai4_e[3 - k] - ai4_o[3 - k] + i4_add) >> i4_shift));
488
31.4M
                    pu1_dst[k + 4] = CLIP_U8((itrans_out + pu1_pred[k + 4]));
489
31.4M
                }
490
7.95M
                pi2_tmp++;
491
7.95M
                pu1_pred += i4_pred_strd;
492
7.95M
                pu1_dst += i4_dst_strd;
493
7.95M
            }
494
1.00M
        }
495
        /************************************************************************************************/
496
        /************************************END - IT_RECON_8x8******************************************/
497
        /************************************************************************************************/
498
1.24M
    }
499
7.03M
}
500