Coverage Report

Created: 2025-10-10 06:56

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/libmpeg2/common/impeg2_idct.c
Line
Count
Source
1
/******************************************************************************
2
 *
3
 * Copyright (C) 2015 The Android Open Source Project
4
 *
5
 * Licensed under the Apache License, Version 2.0 (the "License");
6
 * you may not use this file except in compliance with the License.
7
 * You may obtain a copy of the License at:
8
 *
9
 * http://www.apache.org/licenses/LICENSE-2.0
10
 *
11
 * Unless required by applicable law or agreed to in writing, software
12
 * distributed under the License is distributed on an "AS IS" BASIS,
13
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
 * See the License for the specific language governing permissions and
15
 * limitations under the License.
16
 *
17
 *****************************************************************************
18
 * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
19
*/
20
/*****************************************************************************/
21
/*                                                                           */
22
/*  File Name         : impeg2_idct.c                                        */
23
/*                                                                           */
24
/*  Description       : Contains 2d idct and invese quantization functions   */
25
/*                                                                           */
26
/*  List of Functions : impeg2_idct_recon_dc()                               */
27
/*                      impeg2_idct_recon_dc_mismatch()                      */
28
/*                      impeg2_idct_recon()                                  */
29
/*                                                                           */
30
/*  Issues / Problems : None                                                 */
31
/*                                                                           */
32
/*  Revision History  :                                                      */
33
/*                                                                           */
34
/*         DD MM YYYY   Author(s)       Changes                              */
35
/*         10 09 2005   Hairsh M        First Version                        */
36
/*                                                                           */
37
/*****************************************************************************/
38
/*
39
  IEEE - 1180 results for this IDCT
40
  L                           256         256         5           5           300         300         384         384         Thresholds
41
  H                           255         255         5           5           300         300         383         383
42
  sign                        1           -1          1           -1          1           -1          1           -1
43
  Peak Error                  1           1           1           1           1           1           1           1           1
44
  Peak Mean Square Error      0.0191      0.0188      0.0108      0.0111      0.0176      0.0188      0.0165      0.0177      0.06
45
  Overall Mean Square Error   0.01566406  0.01597656  0.0091875   0.00908906  0.01499063  0.01533281  0.01432344  0.01412344  0.02
46
  Peak Mean Error             0.0027      0.0026      0.0028      0.002       0.0017      0.0033      0.0031      0.0025      0.015
47
  Overall Mean Error          0.00002656  -0.00031406 0.00016875  0.00005469  -0.00003125 0.00011406  0.00009219  0.00004219  0.0015
48
  */
49
#include <stdio.h>
50
#include <string.h>
51
52
#include "iv_datatypedef.h"
53
#include "iv.h"
54
#include "impeg2_defs.h"
55
#include "impeg2_platform_macros.h"
56
57
#include "impeg2_macros.h"
58
#include "impeg2_globals.h"
59
#include "impeg2_idct.h"
60
61
62
void impeg2_idct_recon_dc(WORD16 *pi2_src,
63
                            WORD16 *pi2_tmp,
64
                            UWORD8 *pu1_pred,
65
                            UWORD8 *pu1_dst,
66
                            WORD32 i4_src_strd,
67
                            WORD32 i4_pred_strd,
68
                            WORD32 i4_dst_strd,
69
                            WORD32 i4_zero_cols,
70
                            WORD32 i4_zero_rows)
71
1.33M
{
72
1.33M
    WORD32 i4_val, i, j;
73
74
1.33M
    UNUSED(pi2_tmp);
75
1.33M
    UNUSED(i4_src_strd);
76
1.33M
    UNUSED(i4_zero_cols);
77
1.33M
    UNUSED(i4_zero_rows);
78
79
1.33M
    i4_val = pi2_src[0] * gai2_impeg2_idct_q15[0];
80
1.33M
    i4_val = ((i4_val + IDCT_STG1_ROUND) >> IDCT_STG1_SHIFT);
81
1.33M
    i4_val = i4_val * gai2_impeg2_idct_q11[0];
82
1.33M
    i4_val = ((i4_val + IDCT_STG2_ROUND) >> IDCT_STG2_SHIFT);
83
84
11.8M
    for(i = 0; i < TRANS_SIZE_8; i++)
85
10.5M
    {
86
94.4M
        for(j = 0; j < TRANS_SIZE_8; j++)
87
83.9M
        {
88
83.9M
            pu1_dst[j] = CLIP_U8(i4_val + pu1_pred[j]);
89
83.9M
        }
90
10.5M
        pu1_dst  += i4_dst_strd;
91
10.5M
        pu1_pred += i4_pred_strd;
92
10.5M
    }
93
1.33M
}
94
void impeg2_idct_recon_dc_mismatch(WORD16 *pi2_src,
95
                            WORD16 *pi2_tmp,
96
                            UWORD8 *pu1_pred,
97
                            UWORD8 *pu1_dst,
98
                            WORD32 i4_src_strd,
99
                            WORD32 i4_pred_strd,
100
                            WORD32 i4_dst_strd,
101
                            WORD32 i4_zero_cols,
102
                            WORD32 i4_zero_rows)
103
104
111k
{
105
111k
    WORD32 i4_val, i, j;
106
111k
    WORD32 i4_count = 0;
107
111k
    WORD32 i4_sum;
108
109
111k
    UNUSED(pi2_tmp);
110
111k
    UNUSED(i4_src_strd);
111
111k
    UNUSED(i4_zero_cols);
112
111k
    UNUSED(i4_zero_rows);
113
114
111k
    i4_val = pi2_src[0] * gai2_impeg2_idct_q15[0];
115
111k
    i4_val = ((i4_val + IDCT_STG1_ROUND) >> IDCT_STG1_SHIFT);
116
117
111k
    i4_val *= gai2_impeg2_idct_q11[0];
118
1.00M
    for(i = 0; i < TRANS_SIZE_8; i++)
119
890k
    {
120
8.00M
        for (j = 0; j < TRANS_SIZE_8; j++)
121
7.11M
        {
122
7.11M
            i4_sum = i4_val;
123
7.11M
            i4_sum += gai2_impeg2_mismatch_stg2_additive[i4_count];
124
7.11M
            i4_sum = ((i4_sum + IDCT_STG2_ROUND) >> IDCT_STG2_SHIFT);
125
7.11M
            i4_sum += pu1_pred[j];
126
7.11M
            pu1_dst[j] = CLIP_U8(i4_sum);
127
7.11M
            i4_count++;
128
7.11M
        }
129
130
890k
        pu1_dst  += i4_dst_strd;
131
890k
        pu1_pred += i4_pred_strd;
132
890k
    }
133
134
111k
}
135
/**
136
 *******************************************************************************
137
 *
138
 * @brief
139
 *  This function performs Inverse transform  and reconstruction for 8x8
140
 * input block
141
 *
142
 * @par Description:
143
 *  Performs inverse transform and adds the prediction  data and clips output
144
 * to 8 bit
145
 *
146
 * @param[in] pi2_src
147
 *  Input 8x8 coefficients
148
 *
149
 * @param[in] pi2_tmp
150
 *  Temporary 8x8 buffer for storing inverse
151
 *
152
 *  transform
153
 *  1st stage output
154
 *
155
 * @param[in] pu1_pred
156
 *  Prediction 8x8 block
157
 *
158
 * @param[out] pu1_dst
159
 *  Output 8x8 block
160
 *
161
 * @param[in] src_strd
162
 *  Input stride
163
 *
164
 * @param[in] pred_strd
165
 *  Prediction stride
166
 *
167
 * @param[in] dst_strd
168
 *  Output Stride
169
 *
170
 * @param[in] shift
171
 *  Output shift
172
 *
173
 * @param[in] zero_cols
174
 *  Zero columns in pi2_src
175
 *
176
 * @returns  Void
177
 *
178
 * @remarks
179
 *  None
180
 *
181
 *******************************************************************************
182
 */
183
184
void impeg2_idct_recon(WORD16 *pi2_src,
185
                        WORD16 *pi2_tmp,
186
                        UWORD8 *pu1_pred,
187
                        UWORD8 *pu1_dst,
188
                        WORD32 i4_src_strd,
189
                        WORD32 i4_pred_strd,
190
                        WORD32 i4_dst_strd,
191
                        WORD32 i4_zero_cols,
192
                        WORD32 i4_zero_rows)
193
16.8M
{
194
16.8M
    WORD32 j, k;
195
16.8M
    WORD32 ai4_e[4], ai4_o[4];
196
16.8M
    WORD32 ai4_ee[2], ai4_eo[2];
197
16.8M
    WORD32 i4_add;
198
16.8M
    WORD32 i4_shift;
199
16.8M
    WORD16 *pi2_tmp_orig;
200
16.8M
    WORD32 i4_trans_size;
201
16.8M
    WORD32 i4_zero_rows_2nd_stage = i4_zero_cols;
202
16.8M
    WORD32 i4_row_limit_2nd_stage;
203
204
16.8M
    i4_trans_size = TRANS_SIZE_8;
205
206
16.8M
    pi2_tmp_orig = pi2_tmp;
207
208
16.8M
    if((i4_zero_cols & 0xF0) == 0xF0)
209
16.7M
        i4_row_limit_2nd_stage = 4;
210
185k
    else
211
185k
        i4_row_limit_2nd_stage = TRANS_SIZE_8;
212
213
214
16.8M
    if((i4_zero_rows & 0xF0) == 0xF0) /* First 4 rows of input are non-zero */
215
16.2M
    {
216
        /************************************************************************************************/
217
        /**********************************START - IT_RECON_8x8******************************************/
218
        /************************************************************************************************/
219
220
        /* Inverse Transform 1st stage */
221
16.2M
        i4_shift = IDCT_STG1_SHIFT;
222
16.2M
        i4_add = 1 << (i4_shift - 1);
223
224
81.3M
        for(j = 0; j < i4_row_limit_2nd_stage; j++)
225
65.1M
        {
226
            /* Checking for Zero Cols */
227
65.1M
            if((i4_zero_cols & 1) == 1)
228
42.8M
            {
229
42.8M
                memset(pi2_tmp, 0, i4_trans_size * sizeof(WORD16));
230
42.8M
            }
231
22.2M
            else
232
22.2M
            {
233
                /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */
234
112M
                for(k = 0; k < 4; k++)
235
89.7M
                {
236
89.7M
                    ai4_o[k] = gai2_impeg2_idct_q15[1 * 8 + k] * pi2_src[i4_src_strd]
237
89.7M
                                    + gai2_impeg2_idct_q15[3 * 8 + k]
238
89.7M
                                                    * pi2_src[3 * i4_src_strd];
239
89.7M
                }
240
22.2M
                ai4_eo[0] = gai2_impeg2_idct_q15[2 * 8 + 0] * pi2_src[2 * i4_src_strd];
241
22.2M
                ai4_eo[1] = gai2_impeg2_idct_q15[2 * 8 + 1] * pi2_src[2 * i4_src_strd];
242
22.2M
                ai4_ee[0] = gai2_impeg2_idct_q15[0 * 8 + 0] * pi2_src[0];
243
22.2M
                ai4_ee[1] = gai2_impeg2_idct_q15[0 * 8 + 1] * pi2_src[0];
244
245
                /* Combining e and o terms at each hierarchy levels to calculate the final spatial domain vector */
246
22.2M
                ai4_e[0] = ai4_ee[0] + ai4_eo[0];
247
22.2M
                ai4_e[3] = ai4_ee[0] - ai4_eo[0];
248
22.2M
                ai4_e[1] = ai4_ee[1] + ai4_eo[1];
249
22.2M
                ai4_e[2] = ai4_ee[1] - ai4_eo[1];
250
112M
                for(k = 0; k < 4; k++)
251
89.9M
                {
252
89.9M
                    pi2_tmp[k] =
253
89.9M
                                    CLIP_S16(((ai4_e[k] + ai4_o[k] + i4_add) >> i4_shift));
254
89.9M
                    pi2_tmp[k + 4] =
255
89.9M
                                    CLIP_S16(((ai4_e[3 - k] - ai4_o[3 - k] + i4_add) >> i4_shift));
256
89.9M
                }
257
22.2M
            }
258
65.1M
            pi2_src++;
259
65.1M
            pi2_tmp += i4_trans_size;
260
65.1M
            i4_zero_cols = i4_zero_cols >> 1;
261
65.1M
        }
262
263
16.2M
        pi2_tmp = pi2_tmp_orig;
264
265
        /* Inverse Transform 2nd stage */
266
16.2M
        i4_shift = IDCT_STG2_SHIFT;
267
16.2M
        i4_add = 1 << (i4_shift - 1);
268
16.2M
        if((i4_zero_rows_2nd_stage & 0xF0) == 0xF0) /* First 4 rows of output of 1st stage are non-zero */
269
16.2M
        {
270
142M
            for(j = 0; j < i4_trans_size; j++)
271
125M
            {
272
                /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */
273
625M
                for(k = 0; k < 4; k++)
274
499M
                {
275
499M
                    ai4_o[k] = gai2_impeg2_idct_q11[1 * 8 + k] * pi2_tmp[i4_trans_size]
276
499M
                                    + gai2_impeg2_idct_q11[3 * 8 + k] * pi2_tmp[3 * i4_trans_size];
277
499M
                }
278
125M
                ai4_eo[0] = gai2_impeg2_idct_q11[2 * 8 + 0] * pi2_tmp[2 * i4_trans_size];
279
125M
                ai4_eo[1] = gai2_impeg2_idct_q11[2 * 8 + 1] * pi2_tmp[2 * i4_trans_size];
280
125M
                ai4_ee[0] = gai2_impeg2_idct_q11[0 * 8 + 0] * pi2_tmp[0];
281
125M
                ai4_ee[1] = gai2_impeg2_idct_q11[0 * 8 + 1] * pi2_tmp[0];
282
283
                /* Combining e and o terms at each hierarchy levels to calculate the final spatial domain vector */
284
125M
                ai4_e[0] = ai4_ee[0] + ai4_eo[0];
285
125M
                ai4_e[3] = ai4_ee[0] - ai4_eo[0];
286
125M
                ai4_e[1] = ai4_ee[1] + ai4_eo[1];
287
125M
                ai4_e[2] = ai4_ee[1] - ai4_eo[1];
288
615M
                for(k = 0; k < 4; k++)
289
489M
                {
290
489M
                    WORD32 itrans_out;
291
489M
                    itrans_out =
292
489M
                                    CLIP_S16(((ai4_e[k] + ai4_o[k] + i4_add) >> i4_shift));
293
489M
                    pu1_dst[k] = CLIP_U8((itrans_out + pu1_pred[k]));
294
489M
                    itrans_out =
295
489M
                                    CLIP_S16(((ai4_e[3 - k] - ai4_o[3 - k] + i4_add) >> i4_shift));
296
489M
                    pu1_dst[k + 4] = CLIP_U8((itrans_out + pu1_pred[k + 4]));
297
489M
                }
298
125M
                pi2_tmp++;
299
125M
                pu1_pred += i4_pred_strd;
300
125M
                pu1_dst += i4_dst_strd;
301
125M
            }
302
16.2M
        }
303
13.4k
        else /* All rows of output of 1st stage are non-zero */
304
13.4k
        {
305
540k
            for(j = 0; j < i4_trans_size; j++)
306
527k
            {
307
                /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */
308
2.63M
                for(k = 0; k < 4; k++)
309
2.10M
                {
310
2.10M
                    ai4_o[k] = gai2_impeg2_idct_q11[1 * 8 + k] * pi2_tmp[i4_trans_size]
311
2.10M
                                    + gai2_impeg2_idct_q11[3 * 8 + k]
312
2.10M
                                                    * pi2_tmp[3 * i4_trans_size]
313
2.10M
                                    + gai2_impeg2_idct_q11[5 * 8 + k]
314
2.10M
                                                    * pi2_tmp[5 * i4_trans_size]
315
2.10M
                                    + gai2_impeg2_idct_q11[7 * 8 + k]
316
2.10M
                                                    * pi2_tmp[7 * i4_trans_size];
317
2.10M
                }
318
319
527k
                ai4_eo[0] = gai2_impeg2_idct_q11[2 * 8 + 0] * pi2_tmp[2 * i4_trans_size]
320
527k
                                + gai2_impeg2_idct_q11[6 * 8 + 0] * pi2_tmp[6 * i4_trans_size];
321
527k
                ai4_eo[1] = gai2_impeg2_idct_q11[2 * 8 + 1] * pi2_tmp[2 * i4_trans_size]
322
527k
                                + gai2_impeg2_idct_q11[6 * 8 + 1] * pi2_tmp[6 * i4_trans_size];
323
527k
                ai4_ee[0] = gai2_impeg2_idct_q11[0 * 8 + 0] * pi2_tmp[0]
324
527k
                                + gai2_impeg2_idct_q11[4 * 8 + 0] * pi2_tmp[4 * i4_trans_size];
325
527k
                ai4_ee[1] = gai2_impeg2_idct_q11[0 * 8 + 1] * pi2_tmp[0]
326
527k
                                + gai2_impeg2_idct_q11[4 * 8 + 1] * pi2_tmp[4 * i4_trans_size];
327
328
                /* Combining e and o terms at each hierarchy levels to calculate the final spatial domain vector */
329
527k
                ai4_e[0] = ai4_ee[0] + ai4_eo[0];
330
527k
                ai4_e[3] = ai4_ee[0] - ai4_eo[0];
331
527k
                ai4_e[1] = ai4_ee[1] + ai4_eo[1];
332
527k
                ai4_e[2] = ai4_ee[1] - ai4_eo[1];
333
2.63M
                for(k = 0; k < 4; k++)
334
2.10M
                {
335
2.10M
                    WORD32 itrans_out;
336
2.10M
                    itrans_out =
337
2.10M
                                    CLIP_S16(((ai4_e[k] + ai4_o[k] + i4_add) >> i4_shift));
338
2.10M
                    pu1_dst[k] = CLIP_U8((itrans_out + pu1_pred[k]));
339
2.10M
                    itrans_out =
340
2.10M
                                    CLIP_S16(((ai4_e[3 - k] - ai4_o[3 - k] + i4_add) >> i4_shift));
341
2.10M
                    pu1_dst[k + 4] = CLIP_U8((itrans_out + pu1_pred[k + 4]));
342
2.10M
                }
343
527k
                pi2_tmp++;
344
527k
                pu1_pred += i4_pred_strd;
345
527k
                pu1_dst += i4_dst_strd;
346
527k
            }
347
13.4k
        }
348
        /************************************************************************************************/
349
        /************************************END - IT_RECON_8x8******************************************/
350
        /************************************************************************************************/
351
16.2M
    }
352
657k
    else /* All rows of input are non-zero */
353
657k
    {
354
        /************************************************************************************************/
355
        /**********************************START - IT_RECON_8x8******************************************/
356
        /************************************************************************************************/
357
358
        /* Inverse Transform 1st stage */
359
657k
        i4_shift = IDCT_STG1_SHIFT;
360
657k
        i4_add = 1 << (i4_shift - 1);
361
362
3.77M
        for(j = 0; j < i4_row_limit_2nd_stage; j++)
363
3.12M
        {
364
            /* Checking for Zero Cols */
365
3.12M
            if((i4_zero_cols & 1) == 1)
366
813k
            {
367
813k
                memset(pi2_tmp, 0, i4_trans_size * sizeof(WORD16));
368
813k
            }
369
2.30M
            else
370
2.30M
            {
371
                /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */
372
11.5M
                for(k = 0; k < 4; k++)
373
9.23M
                {
374
9.23M
                    ai4_o[k] = gai2_impeg2_idct_q15[1 * 8 + k] * pi2_src[i4_src_strd]
375
9.23M
                                    + gai2_impeg2_idct_q15[3 * 8 + k]
376
9.23M
                                                    * pi2_src[3 * i4_src_strd]
377
9.23M
                                    + gai2_impeg2_idct_q15[5 * 8 + k]
378
9.23M
                                                    * pi2_src[5 * i4_src_strd]
379
9.23M
                                    + gai2_impeg2_idct_q15[7 * 8 + k]
380
9.23M
                                                    * pi2_src[7 * i4_src_strd];
381
9.23M
                }
382
383
2.30M
                ai4_eo[0] = gai2_impeg2_idct_q15[2 * 8 + 0] * pi2_src[2 * i4_src_strd]
384
2.30M
                                + gai2_impeg2_idct_q15[6 * 8 + 0] * pi2_src[6 * i4_src_strd];
385
2.30M
                ai4_eo[1] = gai2_impeg2_idct_q15[2 * 8 + 1] * pi2_src[2 * i4_src_strd]
386
2.30M
                                + gai2_impeg2_idct_q15[6 * 8 + 1] * pi2_src[6 * i4_src_strd];
387
2.30M
                ai4_ee[0] = gai2_impeg2_idct_q15[0 * 8 + 0] * pi2_src[0]
388
2.30M
                                + gai2_impeg2_idct_q15[4 * 8 + 0] * pi2_src[4 * i4_src_strd];
389
2.30M
                ai4_ee[1] = gai2_impeg2_idct_q15[0 * 8 + 1] * pi2_src[0]
390
2.30M
                                + gai2_impeg2_idct_q15[4 * 8 + 1] * pi2_src[4 * i4_src_strd];
391
392
                /* Combining e and o terms at each hierarchy levels to calculate the final spatial domain vector */
393
2.30M
                ai4_e[0] = ai4_ee[0] + ai4_eo[0];
394
2.30M
                ai4_e[3] = ai4_ee[0] - ai4_eo[0];
395
2.30M
                ai4_e[1] = ai4_ee[1] + ai4_eo[1];
396
2.30M
                ai4_e[2] = ai4_ee[1] - ai4_eo[1];
397
11.5M
                for(k = 0; k < 4; k++)
398
9.22M
                {
399
9.22M
                    pi2_tmp[k] =
400
9.22M
                                    CLIP_S16(((ai4_e[k] + ai4_o[k] + i4_add) >> i4_shift));
401
9.22M
                    pi2_tmp[k + 4] =
402
9.22M
                                    CLIP_S16(((ai4_e[3 - k] - ai4_o[3 - k] + i4_add) >> i4_shift));
403
9.22M
                }
404
2.30M
            }
405
3.12M
            pi2_src++;
406
3.12M
            pi2_tmp += i4_trans_size;
407
3.12M
            i4_zero_cols = i4_zero_cols >> 1;
408
3.12M
        }
409
410
657k
        pi2_tmp = pi2_tmp_orig;
411
412
        /* Inverse Transform 2nd stage */
413
657k
        i4_shift = IDCT_STG2_SHIFT;
414
657k
        i4_add = 1 << (i4_shift - 1);
415
657k
        if((i4_zero_rows_2nd_stage & 0xF0) == 0xF0) /* First 4 rows of output of 1st stage are non-zero */
416
540k
        {
417
4.85M
            for(j = 0; j < i4_trans_size; j++)
418
4.31M
            {
419
                /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */
420
21.5M
                for(k = 0; k < 4; k++)
421
17.2M
                {
422
17.2M
                    ai4_o[k] = gai2_impeg2_idct_q11[1 * 8 + k] * pi2_tmp[i4_trans_size]
423
17.2M
                                    + gai2_impeg2_idct_q11[3 * 8 + k] * pi2_tmp[3 * i4_trans_size];
424
17.2M
                }
425
4.31M
                ai4_eo[0] = gai2_impeg2_idct_q11[2 * 8 + 0] * pi2_tmp[2 * i4_trans_size];
426
4.31M
                ai4_eo[1] = gai2_impeg2_idct_q11[2 * 8 + 1] * pi2_tmp[2 * i4_trans_size];
427
4.31M
                ai4_ee[0] = gai2_impeg2_idct_q11[0 * 8 + 0] * pi2_tmp[0];
428
4.31M
                ai4_ee[1] = gai2_impeg2_idct_q11[0 * 8 + 1] * pi2_tmp[0];
429
430
                /* Combining e and o terms at each hierarchy levels to calculate the final spatial domain vector */
431
4.31M
                ai4_e[0] = ai4_ee[0] + ai4_eo[0];
432
4.31M
                ai4_e[3] = ai4_ee[0] - ai4_eo[0];
433
4.31M
                ai4_e[1] = ai4_ee[1] + ai4_eo[1];
434
4.31M
                ai4_e[2] = ai4_ee[1] - ai4_eo[1];
435
21.5M
                for(k = 0; k < 4; k++)
436
17.2M
                {
437
17.2M
                    WORD32 itrans_out;
438
17.2M
                    itrans_out =
439
17.2M
                                    CLIP_S16(((ai4_e[k] + ai4_o[k] + i4_add) >> i4_shift));
440
17.2M
                    pu1_dst[k] = CLIP_U8((itrans_out + pu1_pred[k]));
441
17.2M
                    itrans_out =
442
17.2M
                                    CLIP_S16(((ai4_e[3 - k] - ai4_o[3 - k] + i4_add) >> i4_shift));
443
17.2M
                    pu1_dst[k + 4] = CLIP_U8((itrans_out + pu1_pred[k + 4]));
444
17.2M
                }
445
4.31M
                pi2_tmp++;
446
4.31M
                pu1_pred += i4_pred_strd;
447
4.31M
                pu1_dst += i4_dst_strd;
448
4.31M
            }
449
540k
        }
450
116k
        else /* All rows of output of 1st stage are non-zero */
451
116k
        {
452
1.06M
            for(j = 0; j < i4_trans_size; j++)
453
952k
            {
454
                /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */
455
4.74M
                for(k = 0; k < 4; k++)
456
3.79M
                {
457
3.79M
                    ai4_o[k] = gai2_impeg2_idct_q11[1 * 8 + k] * pi2_tmp[i4_trans_size]
458
3.79M
                                    + gai2_impeg2_idct_q11[3 * 8 + k]
459
3.79M
                                                    * pi2_tmp[3 * i4_trans_size]
460
3.79M
                                    + gai2_impeg2_idct_q11[5 * 8 + k]
461
3.79M
                                                    * pi2_tmp[5 * i4_trans_size]
462
3.79M
                                    + gai2_impeg2_idct_q11[7 * 8 + k]
463
3.79M
                                                    * pi2_tmp[7 * i4_trans_size];
464
3.79M
                }
465
466
952k
                ai4_eo[0] = gai2_impeg2_idct_q11[2 * 8 + 0] * pi2_tmp[2 * i4_trans_size]
467
952k
                                + gai2_impeg2_idct_q11[6 * 8 + 0] * pi2_tmp[6 * i4_trans_size];
468
952k
                ai4_eo[1] = gai2_impeg2_idct_q11[2 * 8 + 1] * pi2_tmp[2 * i4_trans_size]
469
952k
                                + gai2_impeg2_idct_q11[6 * 8 + 1] * pi2_tmp[6 * i4_trans_size];
470
952k
                ai4_ee[0] = gai2_impeg2_idct_q11[0 * 8 + 0] * pi2_tmp[0]
471
952k
                                + gai2_impeg2_idct_q11[4 * 8 + 0] * pi2_tmp[4 * i4_trans_size];
472
952k
                ai4_ee[1] = gai2_impeg2_idct_q11[0 * 8 + 1] * pi2_tmp[0]
473
952k
                                + gai2_impeg2_idct_q11[4 * 8 + 1] * pi2_tmp[4 * i4_trans_size];
474
475
                /* Combining e and o terms at each hierarchy levels to calculate the final spatial domain vector */
476
952k
                ai4_e[0] = ai4_ee[0] + ai4_eo[0];
477
952k
                ai4_e[3] = ai4_ee[0] - ai4_eo[0];
478
952k
                ai4_e[1] = ai4_ee[1] + ai4_eo[1];
479
952k
                ai4_e[2] = ai4_ee[1] - ai4_eo[1];
480
4.73M
                for(k = 0; k < 4; k++)
481
3.78M
                {
482
3.78M
                    WORD32 itrans_out;
483
3.78M
                    itrans_out =
484
3.78M
                                    CLIP_S16(((ai4_e[k] + ai4_o[k] + i4_add) >> i4_shift));
485
3.78M
                    pu1_dst[k] = CLIP_U8((itrans_out + pu1_pred[k]));
486
3.78M
                    itrans_out =
487
3.78M
                                    CLIP_S16(((ai4_e[3 - k] - ai4_o[3 - k] + i4_add) >> i4_shift));
488
3.78M
                    pu1_dst[k + 4] = CLIP_U8((itrans_out + pu1_pred[k + 4]));
489
3.78M
                }
490
952k
                pi2_tmp++;
491
952k
                pu1_pred += i4_pred_strd;
492
952k
                pu1_dst += i4_dst_strd;
493
952k
            }
494
116k
        }
495
        /************************************************************************************************/
496
        /************************************END - IT_RECON_8x8******************************************/
497
        /************************************************************************************************/
498
657k
    }
499
16.8M
}
500