Coverage Report

Created: 2023-09-25 06:47

/src/libmpeg2/common/impeg2_idct.c
Line
Count
Source
1
/******************************************************************************
2
 *
3
 * Copyright (C) 2015 The Android Open Source Project
4
 *
5
 * Licensed under the Apache License, Version 2.0 (the "License");
6
 * you may not use this file except in compliance with the License.
7
 * You may obtain a copy of the License at:
8
 *
9
 * http://www.apache.org/licenses/LICENSE-2.0
10
 *
11
 * Unless required by applicable law or agreed to in writing, software
12
 * distributed under the License is distributed on an "AS IS" BASIS,
13
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
 * See the License for the specific language governing permissions and
15
 * limitations under the License.
16
 *
17
 *****************************************************************************
18
 * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
19
*/
20
/*****************************************************************************/
21
/*                                                                           */
22
/*  File Name         : impeg2_idct.c                                        */
23
/*                                                                           */
24
/*  Description       : Contains 2d idct and invese quantization functions   */
25
/*                                                                           */
26
/*  List of Functions : impeg2_idct_recon_dc()                               */
27
/*                      impeg2_idct_recon_dc_mismatch()                      */
28
/*                      impeg2_idct_recon()                                  */
29
/*                                                                           */
30
/*  Issues / Problems : None                                                 */
31
/*                                                                           */
32
/*  Revision History  :                                                      */
33
/*                                                                           */
34
/*         DD MM YYYY   Author(s)       Changes                              */
35
/*         10 09 2005   Hairsh M        First Version                        */
36
/*                                                                           */
37
/*****************************************************************************/
38
/*
39
  IEEE - 1180 results for this IDCT
40
  L                           256         256         5           5           300         300         384         384         Thresholds
41
  H                           255         255         5           5           300         300         383         383
42
  sign                        1           -1          1           -1          1           -1          1           -1
43
  Peak Error                  1           1           1           1           1           1           1           1           1
44
  Peak Mean Square Error      0.0191      0.0188      0.0108      0.0111      0.0176      0.0188      0.0165      0.0177      0.06
45
  Overall Mean Square Error   0.01566406  0.01597656  0.0091875   0.00908906  0.01499063  0.01533281  0.01432344  0.01412344  0.02
46
  Peak Mean Error             0.0027      0.0026      0.0028      0.002       0.0017      0.0033      0.0031      0.0025      0.015
47
  Overall Mean Error          0.00002656  -0.00031406 0.00016875  0.00005469  -0.00003125 0.00011406  0.00009219  0.00004219  0.0015
48
  */
49
#include <stdio.h>
50
#include <string.h>
51
52
#include "iv_datatypedef.h"
53
#include "iv.h"
54
#include "impeg2_defs.h"
55
#include "impeg2_platform_macros.h"
56
57
#include "impeg2_macros.h"
58
#include "impeg2_globals.h"
59
#include "impeg2_idct.h"
60
61
62
void impeg2_idct_recon_dc(WORD16 *pi2_src,
63
                            WORD16 *pi2_tmp,
64
                            UWORD8 *pu1_pred,
65
                            UWORD8 *pu1_dst,
66
                            WORD32 i4_src_strd,
67
                            WORD32 i4_pred_strd,
68
                            WORD32 i4_dst_strd,
69
                            WORD32 i4_zero_cols,
70
                            WORD32 i4_zero_rows)
71
128k
{
72
128k
    WORD32 i4_val, i, j;
73
74
128k
    UNUSED(pi2_tmp);
75
128k
    UNUSED(i4_src_strd);
76
128k
    UNUSED(i4_zero_cols);
77
128k
    UNUSED(i4_zero_rows);
78
79
128k
    i4_val = pi2_src[0] * gai2_impeg2_idct_q15[0];
80
128k
    i4_val = ((i4_val + IDCT_STG1_ROUND) >> IDCT_STG1_SHIFT);
81
128k
    i4_val = i4_val * gai2_impeg2_idct_q11[0];
82
128k
    i4_val = ((i4_val + IDCT_STG2_ROUND) >> IDCT_STG2_SHIFT);
83
84
1.14M
    for(i = 0; i < TRANS_SIZE_8; i++)
85
1.02M
    {
86
9.17M
        for(j = 0; j < TRANS_SIZE_8; j++)
87
8.15M
        {
88
8.15M
            pu1_dst[j] = CLIP_U8(i4_val + pu1_pred[j]);
89
8.15M
        }
90
1.02M
        pu1_dst  += i4_dst_strd;
91
1.02M
        pu1_pred += i4_pred_strd;
92
1.02M
    }
93
128k
}
94
void impeg2_idct_recon_dc_mismatch(WORD16 *pi2_src,
95
                            WORD16 *pi2_tmp,
96
                            UWORD8 *pu1_pred,
97
                            UWORD8 *pu1_dst,
98
                            WORD32 i4_src_strd,
99
                            WORD32 i4_pred_strd,
100
                            WORD32 i4_dst_strd,
101
                            WORD32 i4_zero_cols,
102
                            WORD32 i4_zero_rows)
103
104
40.6k
{
105
40.6k
    WORD32 i4_val, i, j;
106
40.6k
    WORD32 i4_count = 0;
107
40.6k
    WORD32 i4_sum;
108
109
40.6k
    UNUSED(pi2_tmp);
110
40.6k
    UNUSED(i4_src_strd);
111
40.6k
    UNUSED(i4_zero_cols);
112
40.6k
    UNUSED(i4_zero_rows);
113
114
40.6k
    i4_val = pi2_src[0] * gai2_impeg2_idct_q15[0];
115
40.6k
    i4_val = ((i4_val + IDCT_STG1_ROUND) >> IDCT_STG1_SHIFT);
116
117
40.6k
    i4_val *= gai2_impeg2_idct_q11[0];
118
364k
    for(i = 0; i < TRANS_SIZE_8; i++)
119
323k
    {
120
2.90M
        for (j = 0; j < TRANS_SIZE_8; j++)
121
2.58M
        {
122
2.58M
            i4_sum = i4_val;
123
2.58M
            i4_sum += gai2_impeg2_mismatch_stg2_additive[i4_count];
124
2.58M
            i4_sum = ((i4_sum + IDCT_STG2_ROUND) >> IDCT_STG2_SHIFT);
125
2.58M
            i4_sum += pu1_pred[j];
126
2.58M
            pu1_dst[j] = CLIP_U8(i4_sum);
127
2.58M
            i4_count++;
128
2.58M
        }
129
130
323k
        pu1_dst  += i4_dst_strd;
131
323k
        pu1_pred += i4_pred_strd;
132
323k
    }
133
134
40.6k
}
135
/**
136
 *******************************************************************************
137
 *
138
 * @brief
139
 *  This function performs Inverse transform  and reconstruction for 8x8
140
 * input block
141
 *
142
 * @par Description:
143
 *  Performs inverse transform and adds the prediction  data and clips output
144
 * to 8 bit
145
 *
146
 * @param[in] pi2_src
147
 *  Input 8x8 coefficients
148
 *
149
 * @param[in] pi2_tmp
150
 *  Temporary 8x8 buffer for storing inverse
151
 *
152
 *  transform
153
 *  1st stage output
154
 *
155
 * @param[in] pu1_pred
156
 *  Prediction 8x8 block
157
 *
158
 * @param[out] pu1_dst
159
 *  Output 8x8 block
160
 *
161
 * @param[in] src_strd
162
 *  Input stride
163
 *
164
 * @param[in] pred_strd
165
 *  Prediction stride
166
 *
167
 * @param[in] dst_strd
168
 *  Output Stride
169
 *
170
 * @param[in] shift
171
 *  Output shift
172
 *
173
 * @param[in] zero_cols
174
 *  Zero columns in pi2_src
175
 *
176
 * @returns  Void
177
 *
178
 * @remarks
179
 *  None
180
 *
181
 *******************************************************************************
182
 */
183
184
void impeg2_idct_recon(WORD16 *pi2_src,
185
                        WORD16 *pi2_tmp,
186
                        UWORD8 *pu1_pred,
187
                        UWORD8 *pu1_dst,
188
                        WORD32 i4_src_strd,
189
                        WORD32 i4_pred_strd,
190
                        WORD32 i4_dst_strd,
191
                        WORD32 i4_zero_cols,
192
                        WORD32 i4_zero_rows)
193
315k
{
194
315k
    WORD32 j, k;
195
315k
    WORD32 ai4_e[4], ai4_o[4];
196
315k
    WORD32 ai4_ee[2], ai4_eo[2];
197
315k
    WORD32 i4_add;
198
315k
    WORD32 i4_shift;
199
315k
    WORD16 *pi2_tmp_orig;
200
315k
    WORD32 i4_trans_size;
201
315k
    WORD32 i4_zero_rows_2nd_stage = i4_zero_cols;
202
315k
    WORD32 i4_row_limit_2nd_stage;
203
204
315k
    i4_trans_size = TRANS_SIZE_8;
205
206
315k
    pi2_tmp_orig = pi2_tmp;
207
208
315k
    if((i4_zero_cols & 0xF0) == 0xF0)
209
174k
        i4_row_limit_2nd_stage = 4;
210
140k
    else
211
140k
        i4_row_limit_2nd_stage = TRANS_SIZE_8;
212
213
214
315k
    if((i4_zero_rows & 0xF0) == 0xF0) /* First 4 rows of input are non-zero */
215
167k
    {
216
        /************************************************************************************************/
217
        /**********************************START - IT_RECON_8x8******************************************/
218
        /************************************************************************************************/
219
220
        /* Inverse Transform 1st stage */
221
167k
        i4_shift = IDCT_STG1_SHIFT;
222
167k
        i4_add = 1 << (i4_shift - 1);
223
224
868k
        for(j = 0; j < i4_row_limit_2nd_stage; j++)
225
701k
        {
226
            /* Checking for Zero Cols */
227
701k
            if((i4_zero_cols & 1) == 1)
228
362k
            {
229
362k
                memset(pi2_tmp, 0, i4_trans_size * sizeof(WORD16));
230
362k
            }
231
339k
            else
232
339k
            {
233
                /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */
234
1.69M
                for(k = 0; k < 4; k++)
235
1.35M
                {
236
1.35M
                    ai4_o[k] = gai2_impeg2_idct_q15[1 * 8 + k] * pi2_src[i4_src_strd]
237
1.35M
                                    + gai2_impeg2_idct_q15[3 * 8 + k]
238
1.35M
                                                    * pi2_src[3 * i4_src_strd];
239
1.35M
                }
240
339k
                ai4_eo[0] = gai2_impeg2_idct_q15[2 * 8 + 0] * pi2_src[2 * i4_src_strd];
241
339k
                ai4_eo[1] = gai2_impeg2_idct_q15[2 * 8 + 1] * pi2_src[2 * i4_src_strd];
242
339k
                ai4_ee[0] = gai2_impeg2_idct_q15[0 * 8 + 0] * pi2_src[0];
243
339k
                ai4_ee[1] = gai2_impeg2_idct_q15[0 * 8 + 1] * pi2_src[0];
244
245
                /* Combining e and o terms at each hierarchy levels to calculate the final spatial domain vector */
246
339k
                ai4_e[0] = ai4_ee[0] + ai4_eo[0];
247
339k
                ai4_e[3] = ai4_ee[0] - ai4_eo[0];
248
339k
                ai4_e[1] = ai4_ee[1] + ai4_eo[1];
249
339k
                ai4_e[2] = ai4_ee[1] - ai4_eo[1];
250
1.69M
                for(k = 0; k < 4; k++)
251
1.35M
                {
252
1.35M
                    pi2_tmp[k] =
253
1.35M
                                    CLIP_S16(((ai4_e[k] + ai4_o[k] + i4_add) >> i4_shift));
254
1.35M
                    pi2_tmp[k + 4] =
255
1.35M
                                    CLIP_S16(((ai4_e[3 - k] - ai4_o[3 - k] + i4_add) >> i4_shift));
256
1.35M
                }
257
339k
            }
258
701k
            pi2_src++;
259
701k
            pi2_tmp += i4_trans_size;
260
701k
            i4_zero_cols = i4_zero_cols >> 1;
261
701k
        }
262
263
167k
        pi2_tmp = pi2_tmp_orig;
264
265
        /* Inverse Transform 2nd stage */
266
167k
        i4_shift = IDCT_STG2_SHIFT;
267
167k
        i4_add = 1 << (i4_shift - 1);
268
167k
        if((i4_zero_rows_2nd_stage & 0xF0) == 0xF0) /* First 4 rows of output of 1st stage are non-zero */
269
159k
        {
270
1.41M
            for(j = 0; j < i4_trans_size; j++)
271
1.25M
            {
272
                /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */
273
6.24M
                for(k = 0; k < 4; k++)
274
4.99M
                {
275
4.99M
                    ai4_o[k] = gai2_impeg2_idct_q11[1 * 8 + k] * pi2_tmp[i4_trans_size]
276
4.99M
                                    + gai2_impeg2_idct_q11[3 * 8 + k] * pi2_tmp[3 * i4_trans_size];
277
4.99M
                }
278
1.25M
                ai4_eo[0] = gai2_impeg2_idct_q11[2 * 8 + 0] * pi2_tmp[2 * i4_trans_size];
279
1.25M
                ai4_eo[1] = gai2_impeg2_idct_q11[2 * 8 + 1] * pi2_tmp[2 * i4_trans_size];
280
1.25M
                ai4_ee[0] = gai2_impeg2_idct_q11[0 * 8 + 0] * pi2_tmp[0];
281
1.25M
                ai4_ee[1] = gai2_impeg2_idct_q11[0 * 8 + 1] * pi2_tmp[0];
282
283
                /* Combining e and o terms at each hierarchy levels to calculate the final spatial domain vector */
284
1.25M
                ai4_e[0] = ai4_ee[0] + ai4_eo[0];
285
1.25M
                ai4_e[3] = ai4_ee[0] - ai4_eo[0];
286
1.25M
                ai4_e[1] = ai4_ee[1] + ai4_eo[1];
287
1.25M
                ai4_e[2] = ai4_ee[1] - ai4_eo[1];
288
6.21M
                for(k = 0; k < 4; k++)
289
4.96M
                {
290
4.96M
                    WORD32 itrans_out;
291
4.96M
                    itrans_out =
292
4.96M
                                    CLIP_S16(((ai4_e[k] + ai4_o[k] + i4_add) >> i4_shift));
293
4.96M
                    pu1_dst[k] = CLIP_U8((itrans_out + pu1_pred[k]));
294
4.96M
                    itrans_out =
295
4.96M
                                    CLIP_S16(((ai4_e[3 - k] - ai4_o[3 - k] + i4_add) >> i4_shift));
296
4.96M
                    pu1_dst[k + 4] = CLIP_U8((itrans_out + pu1_pred[k + 4]));
297
4.96M
                }
298
1.25M
                pi2_tmp++;
299
1.25M
                pu1_pred += i4_pred_strd;
300
1.25M
                pu1_dst += i4_dst_strd;
301
1.25M
            }
302
159k
        }
303
8.30k
        else /* All rows of output of 1st stage are non-zero */
304
8.30k
        {
305
74.5k
            for(j = 0; j < i4_trans_size; j++)
306
66.2k
            {
307
                /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */
308
330k
                for(k = 0; k < 4; k++)
309
264k
                {
310
264k
                    ai4_o[k] = gai2_impeg2_idct_q11[1 * 8 + k] * pi2_tmp[i4_trans_size]
311
264k
                                    + gai2_impeg2_idct_q11[3 * 8 + k]
312
264k
                                                    * pi2_tmp[3 * i4_trans_size]
313
264k
                                    + gai2_impeg2_idct_q11[5 * 8 + k]
314
264k
                                                    * pi2_tmp[5 * i4_trans_size]
315
264k
                                    + gai2_impeg2_idct_q11[7 * 8 + k]
316
264k
                                                    * pi2_tmp[7 * i4_trans_size];
317
264k
                }
318
319
66.2k
                ai4_eo[0] = gai2_impeg2_idct_q11[2 * 8 + 0] * pi2_tmp[2 * i4_trans_size]
320
66.2k
                                + gai2_impeg2_idct_q11[6 * 8 + 0] * pi2_tmp[6 * i4_trans_size];
321
66.2k
                ai4_eo[1] = gai2_impeg2_idct_q11[2 * 8 + 1] * pi2_tmp[2 * i4_trans_size]
322
66.2k
                                + gai2_impeg2_idct_q11[6 * 8 + 1] * pi2_tmp[6 * i4_trans_size];
323
66.2k
                ai4_ee[0] = gai2_impeg2_idct_q11[0 * 8 + 0] * pi2_tmp[0]
324
66.2k
                                + gai2_impeg2_idct_q11[4 * 8 + 0] * pi2_tmp[4 * i4_trans_size];
325
66.2k
                ai4_ee[1] = gai2_impeg2_idct_q11[0 * 8 + 1] * pi2_tmp[0]
326
66.2k
                                + gai2_impeg2_idct_q11[4 * 8 + 1] * pi2_tmp[4 * i4_trans_size];
327
328
                /* Combining e and o terms at each hierarchy levels to calculate the final spatial domain vector */
329
66.2k
                ai4_e[0] = ai4_ee[0] + ai4_eo[0];
330
66.2k
                ai4_e[3] = ai4_ee[0] - ai4_eo[0];
331
66.2k
                ai4_e[1] = ai4_ee[1] + ai4_eo[1];
332
66.2k
                ai4_e[2] = ai4_ee[1] - ai4_eo[1];
333
330k
                for(k = 0; k < 4; k++)
334
264k
                {
335
264k
                    WORD32 itrans_out;
336
264k
                    itrans_out =
337
264k
                                    CLIP_S16(((ai4_e[k] + ai4_o[k] + i4_add) >> i4_shift));
338
264k
                    pu1_dst[k] = CLIP_U8((itrans_out + pu1_pred[k]));
339
264k
                    itrans_out =
340
264k
                                    CLIP_S16(((ai4_e[3 - k] - ai4_o[3 - k] + i4_add) >> i4_shift));
341
264k
                    pu1_dst[k + 4] = CLIP_U8((itrans_out + pu1_pred[k + 4]));
342
264k
                }
343
66.2k
                pi2_tmp++;
344
66.2k
                pu1_pred += i4_pred_strd;
345
66.2k
                pu1_dst += i4_dst_strd;
346
66.2k
            }
347
8.30k
        }
348
        /************************************************************************************************/
349
        /************************************END - IT_RECON_8x8******************************************/
350
        /************************************************************************************************/
351
167k
    }
352
148k
    else /* All rows of input are non-zero */
353
148k
    {
354
        /************************************************************************************************/
355
        /**********************************START - IT_RECON_8x8******************************************/
356
        /************************************************************************************************/
357
358
        /* Inverse Transform 1st stage */
359
148k
        i4_shift = IDCT_STG1_SHIFT;
360
148k
        i4_add = 1 << (i4_shift - 1);
361
362
1.26M
        for(j = 0; j < i4_row_limit_2nd_stage; j++)
363
1.11M
        {
364
            /* Checking for Zero Cols */
365
1.11M
            if((i4_zero_cols & 1) == 1)
366
670k
            {
367
670k
                memset(pi2_tmp, 0, i4_trans_size * sizeof(WORD16));
368
670k
            }
369
449k
            else
370
449k
            {
371
                /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */
372
2.24M
                for(k = 0; k < 4; k++)
373
1.79M
                {
374
1.79M
                    ai4_o[k] = gai2_impeg2_idct_q15[1 * 8 + k] * pi2_src[i4_src_strd]
375
1.79M
                                    + gai2_impeg2_idct_q15[3 * 8 + k]
376
1.79M
                                                    * pi2_src[3 * i4_src_strd]
377
1.79M
                                    + gai2_impeg2_idct_q15[5 * 8 + k]
378
1.79M
                                                    * pi2_src[5 * i4_src_strd]
379
1.79M
                                    + gai2_impeg2_idct_q15[7 * 8 + k]
380
1.79M
                                                    * pi2_src[7 * i4_src_strd];
381
1.79M
                }
382
383
449k
                ai4_eo[0] = gai2_impeg2_idct_q15[2 * 8 + 0] * pi2_src[2 * i4_src_strd]
384
449k
                                + gai2_impeg2_idct_q15[6 * 8 + 0] * pi2_src[6 * i4_src_strd];
385
449k
                ai4_eo[1] = gai2_impeg2_idct_q15[2 * 8 + 1] * pi2_src[2 * i4_src_strd]
386
449k
                                + gai2_impeg2_idct_q15[6 * 8 + 1] * pi2_src[6 * i4_src_strd];
387
449k
                ai4_ee[0] = gai2_impeg2_idct_q15[0 * 8 + 0] * pi2_src[0]
388
449k
                                + gai2_impeg2_idct_q15[4 * 8 + 0] * pi2_src[4 * i4_src_strd];
389
449k
                ai4_ee[1] = gai2_impeg2_idct_q15[0 * 8 + 1] * pi2_src[0]
390
449k
                                + gai2_impeg2_idct_q15[4 * 8 + 1] * pi2_src[4 * i4_src_strd];
391
392
                /* Combining e and o terms at each hierarchy levels to calculate the final spatial domain vector */
393
449k
                ai4_e[0] = ai4_ee[0] + ai4_eo[0];
394
449k
                ai4_e[3] = ai4_ee[0] - ai4_eo[0];
395
449k
                ai4_e[1] = ai4_ee[1] + ai4_eo[1];
396
449k
                ai4_e[2] = ai4_ee[1] - ai4_eo[1];
397
2.24M
                for(k = 0; k < 4; k++)
398
1.79M
                {
399
1.79M
                    pi2_tmp[k] =
400
1.79M
                                    CLIP_S16(((ai4_e[k] + ai4_o[k] + i4_add) >> i4_shift));
401
1.79M
                    pi2_tmp[k + 4] =
402
1.79M
                                    CLIP_S16(((ai4_e[3 - k] - ai4_o[3 - k] + i4_add) >> i4_shift));
403
1.79M
                }
404
449k
            }
405
1.11M
            pi2_src++;
406
1.11M
            pi2_tmp += i4_trans_size;
407
1.11M
            i4_zero_cols = i4_zero_cols >> 1;
408
1.11M
        }
409
410
148k
        pi2_tmp = pi2_tmp_orig;
411
412
        /* Inverse Transform 2nd stage */
413
148k
        i4_shift = IDCT_STG2_SHIFT;
414
148k
        i4_add = 1 << (i4_shift - 1);
415
148k
        if((i4_zero_rows_2nd_stage & 0xF0) == 0xF0) /* First 4 rows of output of 1st stage are non-zero */
416
15.6k
        {
417
140k
            for(j = 0; j < i4_trans_size; j++)
418
124k
            {
419
                /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */
420
622k
                for(k = 0; k < 4; k++)
421
497k
                {
422
497k
                    ai4_o[k] = gai2_impeg2_idct_q11[1 * 8 + k] * pi2_tmp[i4_trans_size]
423
497k
                                    + gai2_impeg2_idct_q11[3 * 8 + k] * pi2_tmp[3 * i4_trans_size];
424
497k
                }
425
124k
                ai4_eo[0] = gai2_impeg2_idct_q11[2 * 8 + 0] * pi2_tmp[2 * i4_trans_size];
426
124k
                ai4_eo[1] = gai2_impeg2_idct_q11[2 * 8 + 1] * pi2_tmp[2 * i4_trans_size];
427
124k
                ai4_ee[0] = gai2_impeg2_idct_q11[0 * 8 + 0] * pi2_tmp[0];
428
124k
                ai4_ee[1] = gai2_impeg2_idct_q11[0 * 8 + 1] * pi2_tmp[0];
429
430
                /* Combining e and o terms at each hierarchy levels to calculate the final spatial domain vector */
431
124k
                ai4_e[0] = ai4_ee[0] + ai4_eo[0];
432
124k
                ai4_e[3] = ai4_ee[0] - ai4_eo[0];
433
124k
                ai4_e[1] = ai4_ee[1] + ai4_eo[1];
434
124k
                ai4_e[2] = ai4_ee[1] - ai4_eo[1];
435
621k
                for(k = 0; k < 4; k++)
436
497k
                {
437
497k
                    WORD32 itrans_out;
438
497k
                    itrans_out =
439
497k
                                    CLIP_S16(((ai4_e[k] + ai4_o[k] + i4_add) >> i4_shift));
440
497k
                    pu1_dst[k] = CLIP_U8((itrans_out + pu1_pred[k]));
441
497k
                    itrans_out =
442
497k
                                    CLIP_S16(((ai4_e[3 - k] - ai4_o[3 - k] + i4_add) >> i4_shift));
443
497k
                    pu1_dst[k + 4] = CLIP_U8((itrans_out + pu1_pred[k + 4]));
444
497k
                }
445
124k
                pi2_tmp++;
446
124k
                pu1_pred += i4_pred_strd;
447
124k
                pu1_dst += i4_dst_strd;
448
124k
            }
449
15.6k
        }
450
132k
        else /* All rows of output of 1st stage are non-zero */
451
132k
        {
452
1.18M
            for(j = 0; j < i4_trans_size; j++)
453
1.05M
            {
454
                /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */
455
5.25M
                for(k = 0; k < 4; k++)
456
4.20M
                {
457
4.20M
                    ai4_o[k] = gai2_impeg2_idct_q11[1 * 8 + k] * pi2_tmp[i4_trans_size]
458
4.20M
                                    + gai2_impeg2_idct_q11[3 * 8 + k]
459
4.20M
                                                    * pi2_tmp[3 * i4_trans_size]
460
4.20M
                                    + gai2_impeg2_idct_q11[5 * 8 + k]
461
4.20M
                                                    * pi2_tmp[5 * i4_trans_size]
462
4.20M
                                    + gai2_impeg2_idct_q11[7 * 8 + k]
463
4.20M
                                                    * pi2_tmp[7 * i4_trans_size];
464
4.20M
                }
465
466
1.05M
                ai4_eo[0] = gai2_impeg2_idct_q11[2 * 8 + 0] * pi2_tmp[2 * i4_trans_size]
467
1.05M
                                + gai2_impeg2_idct_q11[6 * 8 + 0] * pi2_tmp[6 * i4_trans_size];
468
1.05M
                ai4_eo[1] = gai2_impeg2_idct_q11[2 * 8 + 1] * pi2_tmp[2 * i4_trans_size]
469
1.05M
                                + gai2_impeg2_idct_q11[6 * 8 + 1] * pi2_tmp[6 * i4_trans_size];
470
1.05M
                ai4_ee[0] = gai2_impeg2_idct_q11[0 * 8 + 0] * pi2_tmp[0]
471
1.05M
                                + gai2_impeg2_idct_q11[4 * 8 + 0] * pi2_tmp[4 * i4_trans_size];
472
1.05M
                ai4_ee[1] = gai2_impeg2_idct_q11[0 * 8 + 1] * pi2_tmp[0]
473
1.05M
                                + gai2_impeg2_idct_q11[4 * 8 + 1] * pi2_tmp[4 * i4_trans_size];
474
475
                /* Combining e and o terms at each hierarchy levels to calculate the final spatial domain vector */
476
1.05M
                ai4_e[0] = ai4_ee[0] + ai4_eo[0];
477
1.05M
                ai4_e[3] = ai4_ee[0] - ai4_eo[0];
478
1.05M
                ai4_e[1] = ai4_ee[1] + ai4_eo[1];
479
1.05M
                ai4_e[2] = ai4_ee[1] - ai4_eo[1];
480
5.24M
                for(k = 0; k < 4; k++)
481
4.18M
                {
482
4.18M
                    WORD32 itrans_out;
483
4.18M
                    itrans_out =
484
4.18M
                                    CLIP_S16(((ai4_e[k] + ai4_o[k] + i4_add) >> i4_shift));
485
4.18M
                    pu1_dst[k] = CLIP_U8((itrans_out + pu1_pred[k]));
486
4.18M
                    itrans_out =
487
4.18M
                                    CLIP_S16(((ai4_e[3 - k] - ai4_o[3 - k] + i4_add) >> i4_shift));
488
4.18M
                    pu1_dst[k + 4] = CLIP_U8((itrans_out + pu1_pred[k + 4]));
489
4.18M
                }
490
1.05M
                pi2_tmp++;
491
1.05M
                pu1_pred += i4_pred_strd;
492
1.05M
                pu1_dst += i4_dst_strd;
493
1.05M
            }
494
132k
        }
495
        /************************************************************************************************/
496
        /************************************END - IT_RECON_8x8******************************************/
497
        /************************************************************************************************/
498
148k
    }
499
315k
}
500