Coverage Report

Created: 2025-08-29 06:21

/src/libmpeg2/common/impeg2_idct.c
Line
Count
Source
1
/******************************************************************************
2
 *
3
 * Copyright (C) 2015 The Android Open Source Project
4
 *
5
 * Licensed under the Apache License, Version 2.0 (the "License");
6
 * you may not use this file except in compliance with the License.
7
 * You may obtain a copy of the License at:
8
 *
9
 * http://www.apache.org/licenses/LICENSE-2.0
10
 *
11
 * Unless required by applicable law or agreed to in writing, software
12
 * distributed under the License is distributed on an "AS IS" BASIS,
13
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
 * See the License for the specific language governing permissions and
15
 * limitations under the License.
16
 *
17
 *****************************************************************************
18
 * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
19
*/
20
/*****************************************************************************/
21
/*                                                                           */
22
/*  File Name         : impeg2_idct.c                                        */
23
/*                                                                           */
24
/*  Description       : Contains 2d idct and invese quantization functions   */
25
/*                                                                           */
26
/*  List of Functions : impeg2_idct_recon_dc()                               */
27
/*                      impeg2_idct_recon_dc_mismatch()                      */
28
/*                      impeg2_idct_recon()                                  */
29
/*                                                                           */
30
/*  Issues / Problems : None                                                 */
31
/*                                                                           */
32
/*  Revision History  :                                                      */
33
/*                                                                           */
34
/*         DD MM YYYY   Author(s)       Changes                              */
35
/*         10 09 2005   Hairsh M        First Version                        */
36
/*                                                                           */
37
/*****************************************************************************/
38
/*
39
  IEEE - 1180 results for this IDCT
40
  L                           256         256         5           5           300         300         384         384         Thresholds
41
  H                           255         255         5           5           300         300         383         383
42
  sign                        1           -1          1           -1          1           -1          1           -1
43
  Peak Error                  1           1           1           1           1           1           1           1           1
44
  Peak Mean Square Error      0.0191      0.0188      0.0108      0.0111      0.0176      0.0188      0.0165      0.0177      0.06
45
  Overall Mean Square Error   0.01566406  0.01597656  0.0091875   0.00908906  0.01499063  0.01533281  0.01432344  0.01412344  0.02
46
  Peak Mean Error             0.0027      0.0026      0.0028      0.002       0.0017      0.0033      0.0031      0.0025      0.015
47
  Overall Mean Error          0.00002656  -0.00031406 0.00016875  0.00005469  -0.00003125 0.00011406  0.00009219  0.00004219  0.0015
48
  */
49
#include <stdio.h>
50
#include <string.h>
51
52
#include "iv_datatypedef.h"
53
#include "iv.h"
54
#include "impeg2_defs.h"
55
#include "impeg2_platform_macros.h"
56
57
#include "impeg2_macros.h"
58
#include "impeg2_globals.h"
59
#include "impeg2_idct.h"
60
61
62
void impeg2_idct_recon_dc(WORD16 *pi2_src,
63
                            WORD16 *pi2_tmp,
64
                            UWORD8 *pu1_pred,
65
                            UWORD8 *pu1_dst,
66
                            WORD32 i4_src_strd,
67
                            WORD32 i4_pred_strd,
68
                            WORD32 i4_dst_strd,
69
                            WORD32 i4_zero_cols,
70
                            WORD32 i4_zero_rows)
71
1.16M
{
72
1.16M
    WORD32 i4_val, i, j;
73
74
1.16M
    UNUSED(pi2_tmp);
75
1.16M
    UNUSED(i4_src_strd);
76
1.16M
    UNUSED(i4_zero_cols);
77
1.16M
    UNUSED(i4_zero_rows);
78
79
1.16M
    i4_val = pi2_src[0] * gai2_impeg2_idct_q15[0];
80
1.16M
    i4_val = ((i4_val + IDCT_STG1_ROUND) >> IDCT_STG1_SHIFT);
81
1.16M
    i4_val = i4_val * gai2_impeg2_idct_q11[0];
82
1.16M
    i4_val = ((i4_val + IDCT_STG2_ROUND) >> IDCT_STG2_SHIFT);
83
84
10.3M
    for(i = 0; i < TRANS_SIZE_8; i++)
85
9.17M
    {
86
82.2M
        for(j = 0; j < TRANS_SIZE_8; j++)
87
73.1M
        {
88
73.1M
            pu1_dst[j] = CLIP_U8(i4_val + pu1_pred[j]);
89
73.1M
        }
90
9.17M
        pu1_dst  += i4_dst_strd;
91
9.17M
        pu1_pred += i4_pred_strd;
92
9.17M
    }
93
1.16M
}
94
void impeg2_idct_recon_dc_mismatch(WORD16 *pi2_src,
95
                            WORD16 *pi2_tmp,
96
                            UWORD8 *pu1_pred,
97
                            UWORD8 *pu1_dst,
98
                            WORD32 i4_src_strd,
99
                            WORD32 i4_pred_strd,
100
                            WORD32 i4_dst_strd,
101
                            WORD32 i4_zero_cols,
102
                            WORD32 i4_zero_rows)
103
104
48.2k
{
105
48.2k
    WORD32 i4_val, i, j;
106
48.2k
    WORD32 i4_count = 0;
107
48.2k
    WORD32 i4_sum;
108
109
48.2k
    UNUSED(pi2_tmp);
110
48.2k
    UNUSED(i4_src_strd);
111
48.2k
    UNUSED(i4_zero_cols);
112
48.2k
    UNUSED(i4_zero_rows);
113
114
48.2k
    i4_val = pi2_src[0] * gai2_impeg2_idct_q15[0];
115
48.2k
    i4_val = ((i4_val + IDCT_STG1_ROUND) >> IDCT_STG1_SHIFT);
116
117
48.2k
    i4_val *= gai2_impeg2_idct_q11[0];
118
430k
    for(i = 0; i < TRANS_SIZE_8; i++)
119
382k
    {
120
3.42M
        for (j = 0; j < TRANS_SIZE_8; j++)
121
3.04M
        {
122
3.04M
            i4_sum = i4_val;
123
3.04M
            i4_sum += gai2_impeg2_mismatch_stg2_additive[i4_count];
124
3.04M
            i4_sum = ((i4_sum + IDCT_STG2_ROUND) >> IDCT_STG2_SHIFT);
125
3.04M
            i4_sum += pu1_pred[j];
126
3.04M
            pu1_dst[j] = CLIP_U8(i4_sum);
127
3.04M
            i4_count++;
128
3.04M
        }
129
130
382k
        pu1_dst  += i4_dst_strd;
131
382k
        pu1_pred += i4_pred_strd;
132
382k
    }
133
134
48.2k
}
135
/**
136
 *******************************************************************************
137
 *
138
 * @brief
139
 *  This function performs Inverse transform  and reconstruction for 8x8
140
 * input block
141
 *
142
 * @par Description:
143
 *  Performs inverse transform and adds the prediction  data and clips output
144
 * to 8 bit
145
 *
146
 * @param[in] pi2_src
147
 *  Input 8x8 coefficients
148
 *
149
 * @param[in] pi2_tmp
150
 *  Temporary 8x8 buffer for storing inverse
151
 *
152
 *  transform
153
 *  1st stage output
154
 *
155
 * @param[in] pu1_pred
156
 *  Prediction 8x8 block
157
 *
158
 * @param[out] pu1_dst
159
 *  Output 8x8 block
160
 *
161
 * @param[in] src_strd
162
 *  Input stride
163
 *
164
 * @param[in] pred_strd
165
 *  Prediction stride
166
 *
167
 * @param[in] dst_strd
168
 *  Output Stride
169
 *
170
 * @param[in] shift
171
 *  Output shift
172
 *
173
 * @param[in] zero_cols
174
 *  Zero columns in pi2_src
175
 *
176
 * @returns  Void
177
 *
178
 * @remarks
179
 *  None
180
 *
181
 *******************************************************************************
182
 */
183
184
void impeg2_idct_recon(WORD16 *pi2_src,
185
                        WORD16 *pi2_tmp,
186
                        UWORD8 *pu1_pred,
187
                        UWORD8 *pu1_dst,
188
                        WORD32 i4_src_strd,
189
                        WORD32 i4_pred_strd,
190
                        WORD32 i4_dst_strd,
191
                        WORD32 i4_zero_cols,
192
                        WORD32 i4_zero_rows)
193
9.56M
{
194
9.56M
    WORD32 j, k;
195
9.56M
    WORD32 ai4_e[4], ai4_o[4];
196
9.56M
    WORD32 ai4_ee[2], ai4_eo[2];
197
9.56M
    WORD32 i4_add;
198
9.56M
    WORD32 i4_shift;
199
9.56M
    WORD16 *pi2_tmp_orig;
200
9.56M
    WORD32 i4_trans_size;
201
9.56M
    WORD32 i4_zero_rows_2nd_stage = i4_zero_cols;
202
9.56M
    WORD32 i4_row_limit_2nd_stage;
203
204
9.56M
    i4_trans_size = TRANS_SIZE_8;
205
206
9.56M
    pi2_tmp_orig = pi2_tmp;
207
208
9.56M
    if((i4_zero_cols & 0xF0) == 0xF0)
209
9.35M
        i4_row_limit_2nd_stage = 4;
210
218k
    else
211
218k
        i4_row_limit_2nd_stage = TRANS_SIZE_8;
212
213
214
9.56M
    if((i4_zero_rows & 0xF0) == 0xF0) /* First 4 rows of input are non-zero */
215
9.09M
    {
216
        /************************************************************************************************/
217
        /**********************************START - IT_RECON_8x8******************************************/
218
        /************************************************************************************************/
219
220
        /* Inverse Transform 1st stage */
221
9.09M
        i4_shift = IDCT_STG1_SHIFT;
222
9.09M
        i4_add = 1 << (i4_shift - 1);
223
224
45.5M
        for(j = 0; j < i4_row_limit_2nd_stage; j++)
225
36.4M
        {
226
            /* Checking for Zero Cols */
227
36.4M
            if((i4_zero_cols & 1) == 1)
228
24.0M
            {
229
24.0M
                memset(pi2_tmp, 0, i4_trans_size * sizeof(WORD16));
230
24.0M
            }
231
12.4M
            else
232
12.4M
            {
233
                /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */
234
62.4M
                for(k = 0; k < 4; k++)
235
50.0M
                {
236
50.0M
                    ai4_o[k] = gai2_impeg2_idct_q15[1 * 8 + k] * pi2_src[i4_src_strd]
237
50.0M
                                    + gai2_impeg2_idct_q15[3 * 8 + k]
238
50.0M
                                                    * pi2_src[3 * i4_src_strd];
239
50.0M
                }
240
12.4M
                ai4_eo[0] = gai2_impeg2_idct_q15[2 * 8 + 0] * pi2_src[2 * i4_src_strd];
241
12.4M
                ai4_eo[1] = gai2_impeg2_idct_q15[2 * 8 + 1] * pi2_src[2 * i4_src_strd];
242
12.4M
                ai4_ee[0] = gai2_impeg2_idct_q15[0 * 8 + 0] * pi2_src[0];
243
12.4M
                ai4_ee[1] = gai2_impeg2_idct_q15[0 * 8 + 1] * pi2_src[0];
244
245
                /* Combining e and o terms at each hierarchy levels to calculate the final spatial domain vector */
246
12.4M
                ai4_e[0] = ai4_ee[0] + ai4_eo[0];
247
12.4M
                ai4_e[3] = ai4_ee[0] - ai4_eo[0];
248
12.4M
                ai4_e[1] = ai4_ee[1] + ai4_eo[1];
249
12.4M
                ai4_e[2] = ai4_ee[1] - ai4_eo[1];
250
62.5M
                for(k = 0; k < 4; k++)
251
50.0M
                {
252
50.0M
                    pi2_tmp[k] =
253
50.0M
                                    CLIP_S16(((ai4_e[k] + ai4_o[k] + i4_add) >> i4_shift));
254
50.0M
                    pi2_tmp[k + 4] =
255
50.0M
                                    CLIP_S16(((ai4_e[3 - k] - ai4_o[3 - k] + i4_add) >> i4_shift));
256
50.0M
                }
257
12.4M
            }
258
36.4M
            pi2_src++;
259
36.4M
            pi2_tmp += i4_trans_size;
260
36.4M
            i4_zero_cols = i4_zero_cols >> 1;
261
36.4M
        }
262
263
9.09M
        pi2_tmp = pi2_tmp_orig;
264
265
        /* Inverse Transform 2nd stage */
266
9.09M
        i4_shift = IDCT_STG2_SHIFT;
267
9.09M
        i4_add = 1 << (i4_shift - 1);
268
9.09M
        if((i4_zero_rows_2nd_stage & 0xF0) == 0xF0) /* First 4 rows of output of 1st stage are non-zero */
269
9.06M
        {
270
79.6M
            for(j = 0; j < i4_trans_size; j++)
271
70.6M
            {
272
                /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */
273
350M
                for(k = 0; k < 4; k++)
274
279M
                {
275
279M
                    ai4_o[k] = gai2_impeg2_idct_q11[1 * 8 + k] * pi2_tmp[i4_trans_size]
276
279M
                                    + gai2_impeg2_idct_q11[3 * 8 + k] * pi2_tmp[3 * i4_trans_size];
277
279M
                }
278
70.6M
                ai4_eo[0] = gai2_impeg2_idct_q11[2 * 8 + 0] * pi2_tmp[2 * i4_trans_size];
279
70.6M
                ai4_eo[1] = gai2_impeg2_idct_q11[2 * 8 + 1] * pi2_tmp[2 * i4_trans_size];
280
70.6M
                ai4_ee[0] = gai2_impeg2_idct_q11[0 * 8 + 0] * pi2_tmp[0];
281
70.6M
                ai4_ee[1] = gai2_impeg2_idct_q11[0 * 8 + 1] * pi2_tmp[0];
282
283
                /* Combining e and o terms at each hierarchy levels to calculate the final spatial domain vector */
284
70.6M
                ai4_e[0] = ai4_ee[0] + ai4_eo[0];
285
70.6M
                ai4_e[3] = ai4_ee[0] - ai4_eo[0];
286
70.6M
                ai4_e[1] = ai4_ee[1] + ai4_eo[1];
287
70.6M
                ai4_e[2] = ai4_ee[1] - ai4_eo[1];
288
345M
                for(k = 0; k < 4; k++)
289
274M
                {
290
274M
                    WORD32 itrans_out;
291
274M
                    itrans_out =
292
274M
                                    CLIP_S16(((ai4_e[k] + ai4_o[k] + i4_add) >> i4_shift));
293
274M
                    pu1_dst[k] = CLIP_U8((itrans_out + pu1_pred[k]));
294
274M
                    itrans_out =
295
274M
                                    CLIP_S16(((ai4_e[3 - k] - ai4_o[3 - k] + i4_add) >> i4_shift));
296
274M
                    pu1_dst[k + 4] = CLIP_U8((itrans_out + pu1_pred[k + 4]));
297
274M
                }
298
70.6M
                pi2_tmp++;
299
70.6M
                pu1_pred += i4_pred_strd;
300
70.6M
                pu1_dst += i4_dst_strd;
301
70.6M
            }
302
9.06M
        }
303
22.7k
        else /* All rows of output of 1st stage are non-zero */
304
22.7k
        {
305
301k
            for(j = 0; j < i4_trans_size; j++)
306
278k
            {
307
                /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */
308
1.39M
                for(k = 0; k < 4; k++)
309
1.11M
                {
310
1.11M
                    ai4_o[k] = gai2_impeg2_idct_q11[1 * 8 + k] * pi2_tmp[i4_trans_size]
311
1.11M
                                    + gai2_impeg2_idct_q11[3 * 8 + k]
312
1.11M
                                                    * pi2_tmp[3 * i4_trans_size]
313
1.11M
                                    + gai2_impeg2_idct_q11[5 * 8 + k]
314
1.11M
                                                    * pi2_tmp[5 * i4_trans_size]
315
1.11M
                                    + gai2_impeg2_idct_q11[7 * 8 + k]
316
1.11M
                                                    * pi2_tmp[7 * i4_trans_size];
317
1.11M
                }
318
319
278k
                ai4_eo[0] = gai2_impeg2_idct_q11[2 * 8 + 0] * pi2_tmp[2 * i4_trans_size]
320
278k
                                + gai2_impeg2_idct_q11[6 * 8 + 0] * pi2_tmp[6 * i4_trans_size];
321
278k
                ai4_eo[1] = gai2_impeg2_idct_q11[2 * 8 + 1] * pi2_tmp[2 * i4_trans_size]
322
278k
                                + gai2_impeg2_idct_q11[6 * 8 + 1] * pi2_tmp[6 * i4_trans_size];
323
278k
                ai4_ee[0] = gai2_impeg2_idct_q11[0 * 8 + 0] * pi2_tmp[0]
324
278k
                                + gai2_impeg2_idct_q11[4 * 8 + 0] * pi2_tmp[4 * i4_trans_size];
325
278k
                ai4_ee[1] = gai2_impeg2_idct_q11[0 * 8 + 1] * pi2_tmp[0]
326
278k
                                + gai2_impeg2_idct_q11[4 * 8 + 1] * pi2_tmp[4 * i4_trans_size];
327
328
                /* Combining e and o terms at each hierarchy levels to calculate the final spatial domain vector */
329
278k
                ai4_e[0] = ai4_ee[0] + ai4_eo[0];
330
278k
                ai4_e[3] = ai4_ee[0] - ai4_eo[0];
331
278k
                ai4_e[1] = ai4_ee[1] + ai4_eo[1];
332
278k
                ai4_e[2] = ai4_ee[1] - ai4_eo[1];
333
1.39M
                for(k = 0; k < 4; k++)
334
1.11M
                {
335
1.11M
                    WORD32 itrans_out;
336
1.11M
                    itrans_out =
337
1.11M
                                    CLIP_S16(((ai4_e[k] + ai4_o[k] + i4_add) >> i4_shift));
338
1.11M
                    pu1_dst[k] = CLIP_U8((itrans_out + pu1_pred[k]));
339
1.11M
                    itrans_out =
340
1.11M
                                    CLIP_S16(((ai4_e[3 - k] - ai4_o[3 - k] + i4_add) >> i4_shift));
341
1.11M
                    pu1_dst[k + 4] = CLIP_U8((itrans_out + pu1_pred[k + 4]));
342
1.11M
                }
343
278k
                pi2_tmp++;
344
278k
                pu1_pred += i4_pred_strd;
345
278k
                pu1_dst += i4_dst_strd;
346
278k
            }
347
22.7k
        }
348
        /************************************************************************************************/
349
        /************************************END - IT_RECON_8x8******************************************/
350
        /************************************************************************************************/
351
9.09M
    }
352
479k
    else /* All rows of input are non-zero */
353
479k
    {
354
        /************************************************************************************************/
355
        /**********************************START - IT_RECON_8x8******************************************/
356
        /************************************************************************************************/
357
358
        /* Inverse Transform 1st stage */
359
479k
        i4_shift = IDCT_STG1_SHIFT;
360
479k
        i4_add = 1 << (i4_shift - 1);
361
362
3.13M
        for(j = 0; j < i4_row_limit_2nd_stage; j++)
363
2.65M
        {
364
            /* Checking for Zero Cols */
365
2.65M
            if((i4_zero_cols & 1) == 1)
366
969k
            {
367
969k
                memset(pi2_tmp, 0, i4_trans_size * sizeof(WORD16));
368
969k
            }
369
1.68M
            else
370
1.68M
            {
371
                /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */
372
8.42M
                for(k = 0; k < 4; k++)
373
6.73M
                {
374
6.73M
                    ai4_o[k] = gai2_impeg2_idct_q15[1 * 8 + k] * pi2_src[i4_src_strd]
375
6.73M
                                    + gai2_impeg2_idct_q15[3 * 8 + k]
376
6.73M
                                                    * pi2_src[3 * i4_src_strd]
377
6.73M
                                    + gai2_impeg2_idct_q15[5 * 8 + k]
378
6.73M
                                                    * pi2_src[5 * i4_src_strd]
379
6.73M
                                    + gai2_impeg2_idct_q15[7 * 8 + k]
380
6.73M
                                                    * pi2_src[7 * i4_src_strd];
381
6.73M
                }
382
383
1.68M
                ai4_eo[0] = gai2_impeg2_idct_q15[2 * 8 + 0] * pi2_src[2 * i4_src_strd]
384
1.68M
                                + gai2_impeg2_idct_q15[6 * 8 + 0] * pi2_src[6 * i4_src_strd];
385
1.68M
                ai4_eo[1] = gai2_impeg2_idct_q15[2 * 8 + 1] * pi2_src[2 * i4_src_strd]
386
1.68M
                                + gai2_impeg2_idct_q15[6 * 8 + 1] * pi2_src[6 * i4_src_strd];
387
1.68M
                ai4_ee[0] = gai2_impeg2_idct_q15[0 * 8 + 0] * pi2_src[0]
388
1.68M
                                + gai2_impeg2_idct_q15[4 * 8 + 0] * pi2_src[4 * i4_src_strd];
389
1.68M
                ai4_ee[1] = gai2_impeg2_idct_q15[0 * 8 + 1] * pi2_src[0]
390
1.68M
                                + gai2_impeg2_idct_q15[4 * 8 + 1] * pi2_src[4 * i4_src_strd];
391
392
                /* Combining e and o terms at each hierarchy levels to calculate the final spatial domain vector */
393
1.68M
                ai4_e[0] = ai4_ee[0] + ai4_eo[0];
394
1.68M
                ai4_e[3] = ai4_ee[0] - ai4_eo[0];
395
1.68M
                ai4_e[1] = ai4_ee[1] + ai4_eo[1];
396
1.68M
                ai4_e[2] = ai4_ee[1] - ai4_eo[1];
397
8.42M
                for(k = 0; k < 4; k++)
398
6.73M
                {
399
6.73M
                    pi2_tmp[k] =
400
6.73M
                                    CLIP_S16(((ai4_e[k] + ai4_o[k] + i4_add) >> i4_shift));
401
6.73M
                    pi2_tmp[k + 4] =
402
6.73M
                                    CLIP_S16(((ai4_e[3 - k] - ai4_o[3 - k] + i4_add) >> i4_shift));
403
6.73M
                }
404
1.68M
            }
405
2.65M
            pi2_src++;
406
2.65M
            pi2_tmp += i4_trans_size;
407
2.65M
            i4_zero_cols = i4_zero_cols >> 1;
408
2.65M
        }
409
410
479k
        pi2_tmp = pi2_tmp_orig;
411
412
        /* Inverse Transform 2nd stage */
413
479k
        i4_shift = IDCT_STG2_SHIFT;
414
479k
        i4_add = 1 << (i4_shift - 1);
415
479k
        if((i4_zero_rows_2nd_stage & 0xF0) == 0xF0) /* First 4 rows of output of 1st stage are non-zero */
416
297k
        {
417
2.67M
            for(j = 0; j < i4_trans_size; j++)
418
2.37M
            {
419
                /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */
420
11.8M
                for(k = 0; k < 4; k++)
421
9.50M
                {
422
9.50M
                    ai4_o[k] = gai2_impeg2_idct_q11[1 * 8 + k] * pi2_tmp[i4_trans_size]
423
9.50M
                                    + gai2_impeg2_idct_q11[3 * 8 + k] * pi2_tmp[3 * i4_trans_size];
424
9.50M
                }
425
2.37M
                ai4_eo[0] = gai2_impeg2_idct_q11[2 * 8 + 0] * pi2_tmp[2 * i4_trans_size];
426
2.37M
                ai4_eo[1] = gai2_impeg2_idct_q11[2 * 8 + 1] * pi2_tmp[2 * i4_trans_size];
427
2.37M
                ai4_ee[0] = gai2_impeg2_idct_q11[0 * 8 + 0] * pi2_tmp[0];
428
2.37M
                ai4_ee[1] = gai2_impeg2_idct_q11[0 * 8 + 1] * pi2_tmp[0];
429
430
                /* Combining e and o terms at each hierarchy levels to calculate the final spatial domain vector */
431
2.37M
                ai4_e[0] = ai4_ee[0] + ai4_eo[0];
432
2.37M
                ai4_e[3] = ai4_ee[0] - ai4_eo[0];
433
2.37M
                ai4_e[1] = ai4_ee[1] + ai4_eo[1];
434
2.37M
                ai4_e[2] = ai4_ee[1] - ai4_eo[1];
435
11.8M
                for(k = 0; k < 4; k++)
436
9.50M
                {
437
9.50M
                    WORD32 itrans_out;
438
9.50M
                    itrans_out =
439
9.50M
                                    CLIP_S16(((ai4_e[k] + ai4_o[k] + i4_add) >> i4_shift));
440
9.50M
                    pu1_dst[k] = CLIP_U8((itrans_out + pu1_pred[k]));
441
9.50M
                    itrans_out =
442
9.50M
                                    CLIP_S16(((ai4_e[3 - k] - ai4_o[3 - k] + i4_add) >> i4_shift));
443
9.50M
                    pu1_dst[k + 4] = CLIP_U8((itrans_out + pu1_pred[k + 4]));
444
9.50M
                }
445
2.37M
                pi2_tmp++;
446
2.37M
                pu1_pred += i4_pred_strd;
447
2.37M
                pu1_dst += i4_dst_strd;
448
2.37M
            }
449
297k
        }
450
181k
        else /* All rows of output of 1st stage are non-zero */
451
181k
        {
452
1.64M
            for(j = 0; j < i4_trans_size; j++)
453
1.45M
            {
454
                /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */
455
7.29M
                for(k = 0; k < 4; k++)
456
5.83M
                {
457
5.83M
                    ai4_o[k] = gai2_impeg2_idct_q11[1 * 8 + k] * pi2_tmp[i4_trans_size]
458
5.83M
                                    + gai2_impeg2_idct_q11[3 * 8 + k]
459
5.83M
                                                    * pi2_tmp[3 * i4_trans_size]
460
5.83M
                                    + gai2_impeg2_idct_q11[5 * 8 + k]
461
5.83M
                                                    * pi2_tmp[5 * i4_trans_size]
462
5.83M
                                    + gai2_impeg2_idct_q11[7 * 8 + k]
463
5.83M
                                                    * pi2_tmp[7 * i4_trans_size];
464
5.83M
                }
465
466
1.45M
                ai4_eo[0] = gai2_impeg2_idct_q11[2 * 8 + 0] * pi2_tmp[2 * i4_trans_size]
467
1.45M
                                + gai2_impeg2_idct_q11[6 * 8 + 0] * pi2_tmp[6 * i4_trans_size];
468
1.45M
                ai4_eo[1] = gai2_impeg2_idct_q11[2 * 8 + 1] * pi2_tmp[2 * i4_trans_size]
469
1.45M
                                + gai2_impeg2_idct_q11[6 * 8 + 1] * pi2_tmp[6 * i4_trans_size];
470
1.45M
                ai4_ee[0] = gai2_impeg2_idct_q11[0 * 8 + 0] * pi2_tmp[0]
471
1.45M
                                + gai2_impeg2_idct_q11[4 * 8 + 0] * pi2_tmp[4 * i4_trans_size];
472
1.45M
                ai4_ee[1] = gai2_impeg2_idct_q11[0 * 8 + 1] * pi2_tmp[0]
473
1.45M
                                + gai2_impeg2_idct_q11[4 * 8 + 1] * pi2_tmp[4 * i4_trans_size];
474
475
                /* Combining e and o terms at each hierarchy levels to calculate the final spatial domain vector */
476
1.45M
                ai4_e[0] = ai4_ee[0] + ai4_eo[0];
477
1.45M
                ai4_e[3] = ai4_ee[0] - ai4_eo[0];
478
1.45M
                ai4_e[1] = ai4_ee[1] + ai4_eo[1];
479
1.45M
                ai4_e[2] = ai4_ee[1] - ai4_eo[1];
480
7.28M
                for(k = 0; k < 4; k++)
481
5.82M
                {
482
5.82M
                    WORD32 itrans_out;
483
5.82M
                    itrans_out =
484
5.82M
                                    CLIP_S16(((ai4_e[k] + ai4_o[k] + i4_add) >> i4_shift));
485
5.82M
                    pu1_dst[k] = CLIP_U8((itrans_out + pu1_pred[k]));
486
5.82M
                    itrans_out =
487
5.82M
                                    CLIP_S16(((ai4_e[3 - k] - ai4_o[3 - k] + i4_add) >> i4_shift));
488
5.82M
                    pu1_dst[k + 4] = CLIP_U8((itrans_out + pu1_pred[k + 4]));
489
5.82M
                }
490
1.45M
                pi2_tmp++;
491
1.45M
                pu1_pred += i4_pred_strd;
492
1.45M
                pu1_dst += i4_dst_strd;
493
1.45M
            }
494
181k
        }
495
        /************************************************************************************************/
496
        /************************************END - IT_RECON_8x8******************************************/
497
        /************************************************************************************************/
498
479k
    }
499
9.56M
}
500