Coverage Report

Created: 2026-06-13 06:29

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/libhevc/common/ihevc_itrans_recon_16x16.c
Line
Count
Source
1
/******************************************************************************
2
*
3
* Copyright (C) 2012 Ittiam Systems Pvt Ltd, Bangalore
4
*
5
* Licensed under the Apache License, Version 2.0 (the "License");
6
* you may not use this file except in compliance with the License.
7
* You may obtain a copy of the License at:
8
*
9
* http://www.apache.org/licenses/LICENSE-2.0
10
*
11
* Unless required by applicable law or agreed to in writing, software
12
* distributed under the License is distributed on an "AS IS" BASIS,
13
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
* See the License for the specific language governing permissions and
15
* limitations under the License.
16
*
17
******************************************************************************/
18
/**
19
 *******************************************************************************
20
 * @file
21
 *  ihevc_itrans_recon_16x16.c
22
 *
23
 * @brief
24
 *  Contains function definitions for inverse transform  and reconstruction 16x16
25
 *
26
 *
27
 * @author
28
 *  100470
29
 *
30
 * @par List of Functions:
31
 *  - ihevc_itrans_recon_16x16()
32
 *
33
 * @remarks
34
 *  None
35
 *
36
 *******************************************************************************
37
 */
38
#include <stdio.h>
39
#include <string.h>
40
#include "ihevc_typedefs.h"
41
#include "ihevc_macros.h"
42
#include "ihevc_platform_macros.h"
43
#include "ihevc_defs.h"
44
#include "ihevc_trans_tables.h"
45
#include "ihevc_itrans_recon.h"
46
#include "ihevc_func_selector.h"
47
#include "ihevc_trans_macros.h"
48
49
/**
50
 *******************************************************************************
51
 *
52
 * @brief
53
 *  This function performs Inverse transform  and reconstruction for 16x16
54
 * input block
55
 *
56
 * @par Description:
57
 *  Performs inverse transform and adds the prediction  data and clips output
58
 * to 8 bit
59
 *
60
 * @param[in] pi2_src
61
 *  Input 16x16 coefficients
62
 *
63
 * @param[in] pi2_tmp
64
 *  Temporary 16x16 buffer for storing inverse
65
 *
66
 *  transform
67
 *  1st stage output
68
 *
69
 * @param[in] pu1_pred
70
 *  Prediction 16x16 block
71
 *
72
 * @param[out] pu1_dst
73
 *  Output 16x16 block
74
 *
75
 * @param[in] src_strd
76
 *  Input stride
77
 *
78
 * @param[in] pred_strd
79
 *  Prediction stride
80
 *
81
 * @param[in] dst_strd
82
 *  Output Stride
83
 *
84
 * @param[in] shift
85
 *  Output shift
86
 *
87
 * @param[in] zero_cols
88
 *  Zero columns in pi2_src
89
 *
90
 * @returns  Void
91
 *
92
 * @remarks
93
 *  None
94
 *
95
 *******************************************************************************
96
 */
97
98
void ihevc_itrans_recon_16x16(WORD16 *pi2_src,
99
                              WORD16 *pi2_tmp,
100
                              UWORD8 *pu1_pred,
101
                              UWORD8 *pu1_dst,
102
                              WORD32 src_strd,
103
                              WORD32 pred_strd,
104
                              WORD32 dst_strd,
105
                              WORD32 zero_cols,
106
                              WORD32 zero_rows)
107
57.2k
{
108
57.2k
    WORD32 j, k;
109
57.2k
    WORD32 e[8], o[8];
110
57.2k
    WORD32 ee[4], eo[4];
111
57.2k
    WORD32 eee[2], eeo[2];
112
57.2k
    WORD32 add;
113
57.2k
    WORD32 shift;
114
57.2k
    WORD16 *pi2_tmp_orig;
115
57.2k
    WORD32 trans_size;
116
57.2k
    WORD32 zero_rows_2nd_stage = zero_cols;
117
57.2k
    WORD32 row_limit_2nd_stage;
118
119
57.2k
    if((zero_cols & 0xFFF0) == 0xFFF0)
120
3.76k
        row_limit_2nd_stage = 4;
121
53.4k
    else if((zero_cols & 0xFF00) == 0xFF00)
122
1.95k
        row_limit_2nd_stage = 8;
123
51.5k
    else
124
51.5k
        row_limit_2nd_stage = TRANS_SIZE_16;
125
126
57.2k
    trans_size = TRANS_SIZE_16;
127
57.2k
    pi2_tmp_orig = pi2_tmp;
128
57.2k
    if((zero_rows & 0xFFF0) == 0xFFF0)  /* First 4 rows of input are non-zero */
129
6.40k
    {
130
        /* Inverse Transform 1st stage */
131
        /************************************************************************************************/
132
        /**********************************START - IT_RECON_16x16****************************************/
133
        /************************************************************************************************/
134
135
6.40k
        shift = IT_SHIFT_STAGE_1;
136
6.40k
        add = 1 << (shift - 1);
137
138
97.6k
        for(j = 0; j < row_limit_2nd_stage; j++)
139
91.2k
        {
140
            /* Checking for Zero Cols */
141
91.2k
            if((zero_cols & 1) == 1)
142
1.07k
            {
143
1.07k
                memset(pi2_tmp, 0, trans_size * sizeof(WORD16));
144
1.07k
            }
145
90.1k
            else
146
90.1k
            {
147
                /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */
148
811k
                for(k = 0; k < 8; k++)
149
721k
                {
150
721k
                    o[k] = g_ai2_ihevc_trans_16[1][k] * pi2_src[src_strd]
151
721k
                                    + g_ai2_ihevc_trans_16[3][k]
152
721k
                                                    * pi2_src[3 * src_strd];
153
721k
                }
154
450k
                for(k = 0; k < 4; k++)
155
360k
                {
156
360k
                    eo[k] = g_ai2_ihevc_trans_16[2][k] * pi2_src[2 * src_strd];
157
360k
                }
158
90.1k
                eeo[0] = 0;
159
90.1k
                eee[0] = g_ai2_ihevc_trans_16[0][0] * pi2_src[0];
160
90.1k
                eeo[1] = 0;
161
90.1k
                eee[1] = g_ai2_ihevc_trans_16[0][1] * pi2_src[0];
162
163
                /* Combining e and o terms at each hierarchy levels to calculate the final spatial domain vector */
164
270k
                for(k = 0; k < 2; k++)
165
180k
                {
166
180k
                    ee[k] = eee[k] + eeo[k];
167
180k
                    ee[k + 2] = eee[1 - k] - eeo[1 - k];
168
180k
                }
169
450k
                for(k = 0; k < 4; k++)
170
360k
                {
171
360k
                    e[k] = ee[k] + eo[k];
172
360k
                    e[k + 4] = ee[3 - k] - eo[3 - k];
173
360k
                }
174
811k
                for(k = 0; k < 8; k++)
175
721k
                {
176
721k
                    pi2_tmp[k] =
177
721k
                                    CLIP_S16(((e[k] + o[k] + add) >> shift));
178
721k
                    pi2_tmp[k + 8] =
179
721k
                                    CLIP_S16(((e[7 - k] - o[7 - k] + add) >> shift));
180
721k
                }
181
90.1k
            }
182
91.2k
            pi2_src++;
183
91.2k
            pi2_tmp += trans_size;
184
91.2k
            zero_cols = zero_cols >> 1;
185
91.2k
        }
186
187
6.40k
        pi2_tmp = pi2_tmp_orig;
188
189
        /* Inverse Transform 2nd stage */
190
6.40k
        shift = IT_SHIFT_STAGE_2;
191
6.40k
        add = 1 << (shift - 1);
192
193
6.40k
        if((zero_rows_2nd_stage & 0xFFF0) == 0xFFF0) /* First 4 rows of output of 1st stage are non-zero */
194
833
        {
195
14.1k
            for(j = 0; j < trans_size; j++)
196
13.3k
            {
197
                /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */
198
119k
                for(k = 0; k < 8; k++)
199
106k
                {
200
106k
                    o[k] = g_ai2_ihevc_trans_16[1][k] * pi2_tmp[trans_size]
201
106k
                                    + g_ai2_ihevc_trans_16[3][k]
202
106k
                                                    * pi2_tmp[3 * trans_size];
203
106k
                }
204
66.6k
                for(k = 0; k < 4; k++)
205
53.3k
                {
206
53.3k
                    eo[k] = g_ai2_ihevc_trans_16[2][k] * pi2_tmp[2 * trans_size];
207
53.3k
                }
208
13.3k
                eeo[0] = 0;
209
13.3k
                eee[0] = g_ai2_ihevc_trans_16[0][0] * pi2_tmp[0];
210
13.3k
                eeo[1] = 0;
211
13.3k
                eee[1] = g_ai2_ihevc_trans_16[0][1] * pi2_tmp[0];
212
213
                /* Combining e and o terms at each hierarchy levels to calculate the final spatial domain vector */
214
39.9k
                for(k = 0; k < 2; k++)
215
26.6k
                {
216
26.6k
                    ee[k] = eee[k] + eeo[k];
217
26.6k
                    ee[k + 2] = eee[1 - k] - eeo[1 - k];
218
26.6k
                }
219
66.6k
                for(k = 0; k < 4; k++)
220
53.3k
                {
221
53.3k
                    e[k] = ee[k] + eo[k];
222
53.3k
                    e[k + 4] = ee[3 - k] - eo[3 - k];
223
53.3k
                }
224
119k
                for(k = 0; k < 8; k++)
225
106k
                {
226
106k
                    WORD32 itrans_out;
227
106k
                    itrans_out =
228
106k
                                    CLIP_S16(((e[k] + o[k] + add) >> shift));
229
106k
                    pu1_dst[k] = CLIP_U8((itrans_out + pu1_pred[k]));
230
106k
                    itrans_out =
231
106k
                                    CLIP_S16(((e[7 - k] - o[7 - k] + add) >> shift));
232
106k
                    pu1_dst[k + 8] = CLIP_U8((itrans_out + pu1_pred[k + 8]));
233
106k
                }
234
13.3k
                pi2_tmp++;
235
13.3k
                pu1_pred += pred_strd;
236
13.3k
                pu1_dst += dst_strd;
237
13.3k
            }
238
833
        }
239
5.56k
        else if((zero_rows_2nd_stage & 0xFF00) == 0xFF00) /* First 4 rows of output of 1st stage are non-zero */
240
149
        {
241
2.53k
            for(j = 0; j < trans_size; j++)
242
2.38k
            {
243
                /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */
244
21.4k
                for(k = 0; k < 8; k++)
245
19.0k
                {
246
19.0k
                    o[k] = g_ai2_ihevc_trans_16[1][k] * pi2_tmp[trans_size]
247
19.0k
                                    + g_ai2_ihevc_trans_16[3][k]
248
19.0k
                                                    * pi2_tmp[3 * trans_size]
249
19.0k
                                    + g_ai2_ihevc_trans_16[5][k]
250
19.0k
                                                    * pi2_tmp[5 * trans_size]
251
19.0k
                                    + g_ai2_ihevc_trans_16[7][k]
252
19.0k
                                                    * pi2_tmp[7 * trans_size];
253
19.0k
                }
254
11.9k
                for(k = 0; k < 4; k++)
255
9.53k
                {
256
9.53k
                    eo[k] = g_ai2_ihevc_trans_16[2][k] * pi2_tmp[2 * trans_size]
257
9.53k
                                    + g_ai2_ihevc_trans_16[6][k]
258
9.53k
                                                    * pi2_tmp[6 * trans_size];
259
9.53k
                }
260
2.38k
                eeo[0] = g_ai2_ihevc_trans_16[4][0] * pi2_tmp[4 * trans_size];
261
2.38k
                eee[0] = g_ai2_ihevc_trans_16[0][0] * pi2_tmp[0];
262
2.38k
                eeo[1] = g_ai2_ihevc_trans_16[4][1] * pi2_tmp[4 * trans_size];
263
2.38k
                eee[1] = g_ai2_ihevc_trans_16[0][1] * pi2_tmp[0];
264
265
                /* Combining e and o terms at each hierarchy levels to calculate the final spatial domain vector */
266
7.15k
                for(k = 0; k < 2; k++)
267
4.76k
                {
268
4.76k
                    ee[k] = eee[k] + eeo[k];
269
4.76k
                    ee[k + 2] = eee[1 - k] - eeo[1 - k];
270
4.76k
                }
271
11.9k
                for(k = 0; k < 4; k++)
272
9.53k
                {
273
9.53k
                    e[k] = ee[k] + eo[k];
274
9.53k
                    e[k + 4] = ee[3 - k] - eo[3 - k];
275
9.53k
                }
276
21.4k
                for(k = 0; k < 8; k++)
277
19.0k
                {
278
19.0k
                    WORD32 itrans_out;
279
19.0k
                    itrans_out =
280
19.0k
                                    CLIP_S16(((e[k] + o[k] + add) >> shift));
281
19.0k
                    pu1_dst[k] = CLIP_U8((itrans_out + pu1_pred[k]));
282
19.0k
                    itrans_out =
283
19.0k
                                    CLIP_S16(((e[7 - k] - o[7 - k] + add) >> shift));
284
19.0k
                    pu1_dst[k + 8] = CLIP_U8((itrans_out + pu1_pred[k + 8]));
285
19.0k
                }
286
2.38k
                pi2_tmp++;
287
2.38k
                pu1_pred += pred_strd;
288
2.38k
                pu1_dst += dst_strd;
289
2.38k
            }
290
149
        }
291
5.42k
        else /* All rows of output of 1st stage are non-zero */
292
5.42k
        {
293
92.1k
            for(j = 0; j < trans_size; j++)
294
86.7k
            {
295
                /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */
296
780k
                for(k = 0; k < 8; k++)
297
693k
                {
298
693k
                    o[k] = g_ai2_ihevc_trans_16[1][k] * pi2_tmp[trans_size]
299
693k
                                    + g_ai2_ihevc_trans_16[3][k]
300
693k
                                                    * pi2_tmp[3 * trans_size]
301
693k
                                    + g_ai2_ihevc_trans_16[5][k]
302
693k
                                                    * pi2_tmp[5 * trans_size]
303
693k
                                    + g_ai2_ihevc_trans_16[7][k]
304
693k
                                                    * pi2_tmp[7 * trans_size]
305
693k
                                    + g_ai2_ihevc_trans_16[9][k]
306
693k
                                                    * pi2_tmp[9 * trans_size]
307
693k
                                    + g_ai2_ihevc_trans_16[11][k]
308
693k
                                                    * pi2_tmp[11 * trans_size]
309
693k
                                    + g_ai2_ihevc_trans_16[13][k]
310
693k
                                                    * pi2_tmp[13 * trans_size]
311
693k
                                    + g_ai2_ihevc_trans_16[15][k]
312
693k
                                                    * pi2_tmp[15 * trans_size];
313
693k
                }
314
433k
                for(k = 0; k < 4; k++)
315
346k
                {
316
346k
                    eo[k] = g_ai2_ihevc_trans_16[2][k] * pi2_tmp[2 * trans_size]
317
346k
                                    + g_ai2_ihevc_trans_16[6][k]
318
346k
                                                    * pi2_tmp[6 * trans_size]
319
346k
                                    + g_ai2_ihevc_trans_16[10][k]
320
346k
                                                    * pi2_tmp[10 * trans_size]
321
346k
                                    + g_ai2_ihevc_trans_16[14][k]
322
346k
                                                    * pi2_tmp[14 * trans_size];
323
346k
                }
324
86.7k
                eeo[0] =
325
86.7k
                                g_ai2_ihevc_trans_16[4][0] * pi2_tmp[4 * trans_size]
326
86.7k
                                                + g_ai2_ihevc_trans_16[12][0]
327
86.7k
                                                                * pi2_tmp[12
328
86.7k
                                                                                * trans_size];
329
86.7k
                eee[0] = g_ai2_ihevc_trans_16[0][0] * pi2_tmp[0]
330
86.7k
                                + g_ai2_ihevc_trans_16[8][0] * pi2_tmp[8 * trans_size];
331
86.7k
                eeo[1] =
332
86.7k
                                g_ai2_ihevc_trans_16[4][1] * pi2_tmp[4 * trans_size]
333
86.7k
                                                + g_ai2_ihevc_trans_16[12][1]
334
86.7k
                                                                * pi2_tmp[12
335
86.7k
                                                                                * trans_size];
336
86.7k
                eee[1] = g_ai2_ihevc_trans_16[0][1] * pi2_tmp[0]
337
86.7k
                                + g_ai2_ihevc_trans_16[8][1] * pi2_tmp[8 * trans_size];
338
339
                /* Combining e and o terms at each hierarchy levels to calculate the final spatial domain vector */
340
260k
                for(k = 0; k < 2; k++)
341
173k
                {
342
173k
                    ee[k] = eee[k] + eeo[k];
343
173k
                    ee[k + 2] = eee[1 - k] - eeo[1 - k];
344
173k
                }
345
433k
                for(k = 0; k < 4; k++)
346
346k
                {
347
346k
                    e[k] = ee[k] + eo[k];
348
346k
                    e[k + 4] = ee[3 - k] - eo[3 - k];
349
346k
                }
350
780k
                for(k = 0; k < 8; k++)
351
693k
                {
352
693k
                    WORD32 itrans_out;
353
693k
                    itrans_out =
354
693k
                                    CLIP_S16(((e[k] + o[k] + add) >> shift));
355
693k
                    pu1_dst[k] = CLIP_U8((itrans_out + pu1_pred[k]));
356
693k
                    itrans_out =
357
693k
                                    CLIP_S16(((e[7 - k] - o[7 - k] + add) >> shift));
358
693k
                    pu1_dst[k + 8] = CLIP_U8((itrans_out + pu1_pred[k + 8]));
359
693k
                }
360
86.7k
                pi2_tmp++;
361
86.7k
                pu1_pred += pred_strd;
362
86.7k
                pu1_dst += dst_strd;
363
86.7k
            }
364
5.42k
        }
365
        /************************************************************************************************/
366
        /************************************END - IT_RECON_16x16****************************************/
367
        /************************************************************************************************/
368
6.40k
    }
369
50.8k
    else if((zero_rows & 0xFF00) == 0xFF00)  /* First 8 rows of input are non-zero */
370
3.69k
    {
371
        /* Inverse Transform 1st stage */
372
        /************************************************************************************************/
373
        /**********************************START - IT_RECON_16x16****************************************/
374
        /************************************************************************************************/
375
376
3.69k
        shift = IT_SHIFT_STAGE_1;
377
3.69k
        add = 1 << (shift - 1);
378
379
55.0k
        for(j = 0; j < row_limit_2nd_stage; j++)
380
51.3k
        {
381
            /* Checking for Zero Cols */
382
51.3k
            if((zero_cols & 1) == 1)
383
3.56k
            {
384
3.56k
                memset(pi2_tmp, 0, trans_size * sizeof(WORD16));
385
3.56k
            }
386
47.7k
            else
387
47.7k
            {
388
                /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */
389
429k
                for(k = 0; k < 8; k++)
390
382k
                {
391
382k
                    o[k] = g_ai2_ihevc_trans_16[1][k] * pi2_src[src_strd]
392
382k
                                    + g_ai2_ihevc_trans_16[3][k]
393
382k
                                                    * pi2_src[3 * src_strd]
394
382k
                                    + g_ai2_ihevc_trans_16[5][k]
395
382k
                                                    * pi2_src[5 * src_strd]
396
382k
                                    + g_ai2_ihevc_trans_16[7][k]
397
382k
                                                    * pi2_src[7 * src_strd];
398
382k
                }
399
238k
                for(k = 0; k < 4; k++)
400
191k
                {
401
191k
                    eo[k] = g_ai2_ihevc_trans_16[2][k] * pi2_src[2 * src_strd]
402
191k
                                    + g_ai2_ihevc_trans_16[6][k]
403
191k
                                                    * pi2_src[6 * src_strd];
404
191k
                }
405
47.7k
                eeo[0] = g_ai2_ihevc_trans_16[4][0] * pi2_src[4 * src_strd];
406
47.7k
                eee[0] = g_ai2_ihevc_trans_16[0][0] * pi2_src[0];
407
47.7k
                eeo[1] = g_ai2_ihevc_trans_16[4][1] * pi2_src[4 * src_strd];
408
47.7k
                eee[1] = g_ai2_ihevc_trans_16[0][1] * pi2_src[0];
409
410
                /* Combining e and o terms at each hierarchy levels to calculate the final spatial domain vector */
411
143k
                for(k = 0; k < 2; k++)
412
95.5k
                {
413
95.5k
                    ee[k] = eee[k] + eeo[k];
414
95.5k
                    ee[k + 2] = eee[1 - k] - eeo[1 - k];
415
95.5k
                }
416
238k
                for(k = 0; k < 4; k++)
417
191k
                {
418
191k
                    e[k] = ee[k] + eo[k];
419
191k
                    e[k + 4] = ee[3 - k] - eo[3 - k];
420
191k
                }
421
429k
                for(k = 0; k < 8; k++)
422
382k
                {
423
382k
                    pi2_tmp[k] =
424
382k
                                    CLIP_S16(((e[k] + o[k] + add) >> shift));
425
382k
                    pi2_tmp[k + 8] =
426
382k
                                    CLIP_S16(((e[7 - k] - o[7 - k] + add) >> shift));
427
382k
                }
428
47.7k
            }
429
51.3k
            pi2_src++;
430
51.3k
            pi2_tmp += trans_size;
431
51.3k
            zero_cols = zero_cols >> 1;
432
51.3k
        }
433
434
3.69k
        pi2_tmp = pi2_tmp_orig;
435
436
        /* Inverse Transform 2nd stage */
437
3.69k
        shift = IT_SHIFT_STAGE_2;
438
3.69k
        add = 1 << (shift - 1);
439
440
3.69k
        if((zero_rows_2nd_stage & 0xFFF0) == 0xFFF0) /* First 4 rows of output of 1st stage are non-zero */
441
484
        {
442
8.22k
            for(j = 0; j < trans_size; j++)
443
7.74k
            {
444
                /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */
445
69.6k
                for(k = 0; k < 8; k++)
446
61.9k
                {
447
61.9k
                    o[k] = g_ai2_ihevc_trans_16[1][k] * pi2_tmp[trans_size]
448
61.9k
                                    + g_ai2_ihevc_trans_16[3][k]
449
61.9k
                                                    * pi2_tmp[3 * trans_size];
450
61.9k
                }
451
38.7k
                for(k = 0; k < 4; k++)
452
30.9k
                {
453
30.9k
                    eo[k] = g_ai2_ihevc_trans_16[2][k] * pi2_tmp[2 * trans_size];
454
30.9k
                }
455
7.74k
                eeo[0] = 0;
456
7.74k
                eee[0] = g_ai2_ihevc_trans_16[0][0] * pi2_tmp[0];
457
7.74k
                eeo[1] = 0;
458
7.74k
                eee[1] = g_ai2_ihevc_trans_16[0][1] * pi2_tmp[0];
459
460
                /* Combining e and o terms at each hierarchy levels to calculate the final spatial domain vector */
461
23.2k
                for(k = 0; k < 2; k++)
462
15.4k
                {
463
15.4k
                    ee[k] = eee[k] + eeo[k];
464
15.4k
                    ee[k + 2] = eee[1 - k] - eeo[1 - k];
465
15.4k
                }
466
38.7k
                for(k = 0; k < 4; k++)
467
30.9k
                {
468
30.9k
                    e[k] = ee[k] + eo[k];
469
30.9k
                    e[k + 4] = ee[3 - k] - eo[3 - k];
470
30.9k
                }
471
69.6k
                for(k = 0; k < 8; k++)
472
61.9k
                {
473
61.9k
                    WORD32 itrans_out;
474
61.9k
                    itrans_out =
475
61.9k
                                    CLIP_S16(((e[k] + o[k] + add) >> shift));
476
61.9k
                    pu1_dst[k] = CLIP_U8((itrans_out + pu1_pred[k]));
477
61.9k
                    itrans_out =
478
61.9k
                                    CLIP_S16(((e[7 - k] - o[7 - k] + add) >> shift));
479
61.9k
                    pu1_dst[k + 8] = CLIP_U8((itrans_out + pu1_pred[k + 8]));
480
61.9k
                }
481
7.74k
                pi2_tmp++;
482
7.74k
                pu1_pred += pred_strd;
483
7.74k
                pu1_dst += dst_strd;
484
7.74k
            }
485
484
        }
486
3.20k
        else if((zero_rows_2nd_stage & 0xFF00) == 0xFF00) /* First 4 rows of output of 1st stage are non-zero */
487
237
        {
488
4.02k
            for(j = 0; j < trans_size; j++)
489
3.79k
            {
490
                /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */
491
34.1k
                for(k = 0; k < 8; k++)
492
30.3k
                {
493
30.3k
                    o[k] = g_ai2_ihevc_trans_16[1][k] * pi2_tmp[trans_size]
494
30.3k
                                    + g_ai2_ihevc_trans_16[3][k]
495
30.3k
                                                    * pi2_tmp[3 * trans_size]
496
30.3k
                                    + g_ai2_ihevc_trans_16[5][k]
497
30.3k
                                                    * pi2_tmp[5 * trans_size]
498
30.3k
                                    + g_ai2_ihevc_trans_16[7][k]
499
30.3k
                                                    * pi2_tmp[7 * trans_size];
500
30.3k
                }
501
18.9k
                for(k = 0; k < 4; k++)
502
15.1k
                {
503
15.1k
                    eo[k] = g_ai2_ihevc_trans_16[2][k] * pi2_tmp[2 * trans_size]
504
15.1k
                                    + g_ai2_ihevc_trans_16[6][k]
505
15.1k
                                                    * pi2_tmp[6 * trans_size];
506
15.1k
                }
507
3.79k
                eeo[0] = g_ai2_ihevc_trans_16[4][0] * pi2_tmp[4 * trans_size];
508
3.79k
                eee[0] = g_ai2_ihevc_trans_16[0][0] * pi2_tmp[0];
509
3.79k
                eeo[1] = g_ai2_ihevc_trans_16[4][1] * pi2_tmp[4 * trans_size];
510
3.79k
                eee[1] = g_ai2_ihevc_trans_16[0][1] * pi2_tmp[0];
511
512
                /* Combining e and o terms at each hierarchy levels to calculate the final spatial domain vector */
513
11.3k
                for(k = 0; k < 2; k++)
514
7.58k
                {
515
7.58k
                    ee[k] = eee[k] + eeo[k];
516
7.58k
                    ee[k + 2] = eee[1 - k] - eeo[1 - k];
517
7.58k
                }
518
18.9k
                for(k = 0; k < 4; k++)
519
15.1k
                {
520
15.1k
                    e[k] = ee[k] + eo[k];
521
15.1k
                    e[k + 4] = ee[3 - k] - eo[3 - k];
522
15.1k
                }
523
34.1k
                for(k = 0; k < 8; k++)
524
30.3k
                {
525
30.3k
                    WORD32 itrans_out;
526
30.3k
                    itrans_out =
527
30.3k
                                    CLIP_S16(((e[k] + o[k] + add) >> shift));
528
30.3k
                    pu1_dst[k] = CLIP_U8((itrans_out + pu1_pred[k]));
529
30.3k
                    itrans_out =
530
30.3k
                                    CLIP_S16(((e[7 - k] - o[7 - k] + add) >> shift));
531
30.3k
                    pu1_dst[k + 8] = CLIP_U8((itrans_out + pu1_pred[k + 8]));
532
30.3k
                }
533
3.79k
                pi2_tmp++;
534
3.79k
                pu1_pred += pred_strd;
535
3.79k
                pu1_dst += dst_strd;
536
3.79k
            }
537
237
        }
538
2.96k
        else /* All rows of output of 1st stage are non-zero */
539
2.96k
        {
540
50.4k
            for(j = 0; j < trans_size; j++)
541
47.5k
            {
542
                /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */
543
427k
                for(k = 0; k < 8; k++)
544
380k
                {
545
380k
                    o[k] = g_ai2_ihevc_trans_16[1][k] * pi2_tmp[trans_size]
546
380k
                                    + g_ai2_ihevc_trans_16[3][k]
547
380k
                                                    * pi2_tmp[3 * trans_size]
548
380k
                                    + g_ai2_ihevc_trans_16[5][k]
549
380k
                                                    * pi2_tmp[5 * trans_size]
550
380k
                                    + g_ai2_ihevc_trans_16[7][k]
551
380k
                                                    * pi2_tmp[7 * trans_size]
552
380k
                                    + g_ai2_ihevc_trans_16[9][k]
553
380k
                                                    * pi2_tmp[9 * trans_size]
554
380k
                                    + g_ai2_ihevc_trans_16[11][k]
555
380k
                                                    * pi2_tmp[11 * trans_size]
556
380k
                                    + g_ai2_ihevc_trans_16[13][k]
557
380k
                                                    * pi2_tmp[13 * trans_size]
558
380k
                                    + g_ai2_ihevc_trans_16[15][k]
559
380k
                                                    * pi2_tmp[15 * trans_size];
560
380k
                }
561
237k
                for(k = 0; k < 4; k++)
562
190k
                {
563
190k
                    eo[k] = g_ai2_ihevc_trans_16[2][k] * pi2_tmp[2 * trans_size]
564
190k
                                    + g_ai2_ihevc_trans_16[6][k]
565
190k
                                                    * pi2_tmp[6 * trans_size]
566
190k
                                    + g_ai2_ihevc_trans_16[10][k]
567
190k
                                                    * pi2_tmp[10 * trans_size]
568
190k
                                    + g_ai2_ihevc_trans_16[14][k]
569
190k
                                                    * pi2_tmp[14 * trans_size];
570
190k
                }
571
47.5k
                eeo[0] =
572
47.5k
                                g_ai2_ihevc_trans_16[4][0] * pi2_tmp[4 * trans_size]
573
47.5k
                                                + g_ai2_ihevc_trans_16[12][0]
574
47.5k
                                                                * pi2_tmp[12
575
47.5k
                                                                                * trans_size];
576
47.5k
                eee[0] = g_ai2_ihevc_trans_16[0][0] * pi2_tmp[0]
577
47.5k
                                + g_ai2_ihevc_trans_16[8][0] * pi2_tmp[8 * trans_size];
578
47.5k
                eeo[1] =
579
47.5k
                                g_ai2_ihevc_trans_16[4][1] * pi2_tmp[4 * trans_size]
580
47.5k
                                                + g_ai2_ihevc_trans_16[12][1]
581
47.5k
                                                                * pi2_tmp[12
582
47.5k
                                                                                * trans_size];
583
47.5k
                eee[1] = g_ai2_ihevc_trans_16[0][1] * pi2_tmp[0]
584
47.5k
                                + g_ai2_ihevc_trans_16[8][1] * pi2_tmp[8 * trans_size];
585
586
                /* Combining e and o terms at each hierarchy levels to calculate the final spatial domain vector */
587
142k
                for(k = 0; k < 2; k++)
588
95.0k
                {
589
95.0k
                    ee[k] = eee[k] + eeo[k];
590
95.0k
                    ee[k + 2] = eee[1 - k] - eeo[1 - k];
591
95.0k
                }
592
237k
                for(k = 0; k < 4; k++)
593
190k
                {
594
190k
                    e[k] = ee[k] + eo[k];
595
190k
                    e[k + 4] = ee[3 - k] - eo[3 - k];
596
190k
                }
597
427k
                for(k = 0; k < 8; k++)
598
380k
                {
599
380k
                    WORD32 itrans_out;
600
380k
                    itrans_out =
601
380k
                                    CLIP_S16(((e[k] + o[k] + add) >> shift));
602
380k
                    pu1_dst[k] = CLIP_U8((itrans_out + pu1_pred[k]));
603
380k
                    itrans_out =
604
380k
                                    CLIP_S16(((e[7 - k] - o[7 - k] + add) >> shift));
605
380k
                    pu1_dst[k + 8] = CLIP_U8((itrans_out + pu1_pred[k + 8]));
606
380k
                }
607
47.5k
                pi2_tmp++;
608
47.5k
                pu1_pred += pred_strd;
609
47.5k
                pu1_dst += dst_strd;
610
47.5k
            }
611
2.96k
        }
612
        /************************************************************************************************/
613
        /************************************END - IT_RECON_16x16****************************************/
614
        /************************************************************************************************/
615
3.69k
    }
616
47.1k
    else  /* All rows of input are non-zero */
617
47.1k
    {
618
        /* Inverse Transform 1st stage */
619
        /************************************************************************************************/
620
        /**********************************START - IT_RECON_16x16****************************************/
621
        /************************************************************************************************/
622
623
47.1k
        shift = IT_SHIFT_STAGE_1;
624
47.1k
        add = 1 << (shift - 1);
625
626
759k
        for(j = 0; j < row_limit_2nd_stage; j++)
627
712k
        {
628
            /* Checking for Zero Cols */
629
712k
            if((zero_cols & 1) == 1)
630
10.0k
            {
631
10.0k
                memset(pi2_tmp, 0, trans_size * sizeof(WORD16));
632
10.0k
            }
633
702k
            else
634
702k
            {
635
                /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */
636
6.31M
                for(k = 0; k < 8; k++)
637
5.61M
                {
638
5.61M
                    o[k] = g_ai2_ihevc_trans_16[1][k] * pi2_src[src_strd]
639
5.61M
                                    + g_ai2_ihevc_trans_16[3][k]
640
5.61M
                                                    * pi2_src[3 * src_strd]
641
5.61M
                                    + g_ai2_ihevc_trans_16[5][k]
642
5.61M
                                                    * pi2_src[5 * src_strd]
643
5.61M
                                    + g_ai2_ihevc_trans_16[7][k]
644
5.61M
                                                    * pi2_src[7 * src_strd]
645
5.61M
                                    + g_ai2_ihevc_trans_16[9][k]
646
5.61M
                                                    * pi2_src[9 * src_strd]
647
5.61M
                                    + g_ai2_ihevc_trans_16[11][k]
648
5.61M
                                                    * pi2_src[11 * src_strd]
649
5.61M
                                    + g_ai2_ihevc_trans_16[13][k]
650
5.61M
                                                    * pi2_src[13 * src_strd]
651
5.61M
                                    + g_ai2_ihevc_trans_16[15][k]
652
5.61M
                                                    * pi2_src[15 * src_strd];
653
5.61M
                }
654
3.51M
                for(k = 0; k < 4; k++)
655
2.80M
                {
656
2.80M
                    eo[k] = g_ai2_ihevc_trans_16[2][k] * pi2_src[2 * src_strd]
657
2.80M
                                    + g_ai2_ihevc_trans_16[6][k]
658
2.80M
                                                    * pi2_src[6 * src_strd]
659
2.80M
                                    + g_ai2_ihevc_trans_16[10][k]
660
2.80M
                                                    * pi2_src[10 * src_strd]
661
2.80M
                                    + g_ai2_ihevc_trans_16[14][k]
662
2.80M
                                                    * pi2_src[14 * src_strd];
663
2.80M
                }
664
702k
                eeo[0] = g_ai2_ihevc_trans_16[4][0] * pi2_src[4 * src_strd]
665
702k
                                + g_ai2_ihevc_trans_16[12][0]
666
702k
                                                * pi2_src[12 * src_strd];
667
702k
                eee[0] =
668
702k
                                g_ai2_ihevc_trans_16[0][0] * pi2_src[0]
669
702k
                                                + g_ai2_ihevc_trans_16[8][0]
670
702k
                                                                * pi2_src[8
671
702k
                                                                                * src_strd];
672
702k
                eeo[1] = g_ai2_ihevc_trans_16[4][1] * pi2_src[4 * src_strd]
673
702k
                                + g_ai2_ihevc_trans_16[12][1]
674
702k
                                                * pi2_src[12 * src_strd];
675
702k
                eee[1] =
676
702k
                                g_ai2_ihevc_trans_16[0][1] * pi2_src[0]
677
702k
                                                + g_ai2_ihevc_trans_16[8][1]
678
702k
                                                                * pi2_src[8
679
702k
                                                                                * src_strd];
680
681
                /* Combining e and o terms at each hierarchy levels to calculate the final spatial domain vector */
682
2.10M
                for(k = 0; k < 2; k++)
683
1.40M
                {
684
1.40M
                    ee[k] = eee[k] + eeo[k];
685
1.40M
                    ee[k + 2] = eee[1 - k] - eeo[1 - k];
686
1.40M
                }
687
3.51M
                for(k = 0; k < 4; k++)
688
2.80M
                {
689
2.80M
                    e[k] = ee[k] + eo[k];
690
2.80M
                    e[k + 4] = ee[3 - k] - eo[3 - k];
691
2.80M
                }
692
6.31M
                for(k = 0; k < 8; k++)
693
5.61M
                {
694
5.61M
                    pi2_tmp[k] =
695
5.61M
                                    CLIP_S16(((e[k] + o[k] + add) >> shift));
696
5.61M
                    pi2_tmp[k + 8] =
697
5.61M
                                    CLIP_S16(((e[7 - k] - o[7 - k] + add) >> shift));
698
5.61M
                }
699
702k
            }
700
712k
            pi2_src++;
701
712k
            pi2_tmp += trans_size;
702
712k
            zero_cols = zero_cols >> 1;
703
712k
        }
704
705
47.1k
        pi2_tmp = pi2_tmp_orig;
706
707
        /* Inverse Transform 2nd stage */
708
47.1k
        shift = IT_SHIFT_STAGE_2;
709
47.1k
        add = 1 << (shift - 1);
710
711
47.1k
        if((zero_rows_2nd_stage & 0xFFF0) == 0xFFF0) /* First 4 rows of output of 1st stage are non-zero */
712
2.44k
        {
713
41.5k
            for(j = 0; j < trans_size; j++)
714
39.1k
            {
715
                /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */
716
352k
                for(k = 0; k < 8; k++)
717
313k
                {
718
313k
                    o[k] = g_ai2_ihevc_trans_16[1][k] * pi2_tmp[trans_size]
719
313k
                                    + g_ai2_ihevc_trans_16[3][k]
720
313k
                                                    * pi2_tmp[3 * trans_size];
721
313k
                }
722
195k
                for(k = 0; k < 4; k++)
723
156k
                {
724
156k
                    eo[k] = g_ai2_ihevc_trans_16[2][k] * pi2_tmp[2 * trans_size];
725
156k
                }
726
39.1k
                eeo[0] = 0;
727
39.1k
                eee[0] = g_ai2_ihevc_trans_16[0][0] * pi2_tmp[0];
728
39.1k
                eeo[1] = 0;
729
39.1k
                eee[1] = g_ai2_ihevc_trans_16[0][1] * pi2_tmp[0];
730
731
                /* Combining e and o terms at each hierarchy levels to calculate the final spatial domain vector */
732
117k
                for(k = 0; k < 2; k++)
733
78.3k
                {
734
78.3k
                    ee[k] = eee[k] + eeo[k];
735
78.3k
                    ee[k + 2] = eee[1 - k] - eeo[1 - k];
736
78.3k
                }
737
195k
                for(k = 0; k < 4; k++)
738
156k
                {
739
156k
                    e[k] = ee[k] + eo[k];
740
156k
                    e[k + 4] = ee[3 - k] - eo[3 - k];
741
156k
                }
742
352k
                for(k = 0; k < 8; k++)
743
313k
                {
744
313k
                    WORD32 itrans_out;
745
313k
                    itrans_out =
746
313k
                                    CLIP_S16(((e[k] + o[k] + add) >> shift));
747
313k
                    pu1_dst[k] = CLIP_U8((itrans_out + pu1_pred[k]));
748
313k
                    itrans_out =
749
313k
                                    CLIP_S16(((e[7 - k] - o[7 - k] + add) >> shift));
750
313k
                    pu1_dst[k + 8] = CLIP_U8((itrans_out + pu1_pred[k + 8]));
751
313k
                }
752
39.1k
                pi2_tmp++;
753
39.1k
                pu1_pred += pred_strd;
754
39.1k
                pu1_dst += dst_strd;
755
39.1k
            }
756
2.44k
        }
757
44.6k
        else if((zero_rows_2nd_stage & 0xFF00) == 0xFF00) /* First 4 rows of output of 1st stage are non-zero */
758
1.56k
        {
759
26.6k
            for(j = 0; j < trans_size; j++)
760
25.0k
            {
761
                /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */
762
225k
                for(k = 0; k < 8; k++)
763
200k
                {
764
200k
                    o[k] = g_ai2_ihevc_trans_16[1][k] * pi2_tmp[trans_size]
765
200k
                                    + g_ai2_ihevc_trans_16[3][k]
766
200k
                                                    * pi2_tmp[3 * trans_size]
767
200k
                                    + g_ai2_ihevc_trans_16[5][k]
768
200k
                                                    * pi2_tmp[5 * trans_size]
769
200k
                                    + g_ai2_ihevc_trans_16[7][k]
770
200k
                                                    * pi2_tmp[7 * trans_size];
771
200k
                }
772
125k
                for(k = 0; k < 4; k++)
773
100k
                {
774
100k
                    eo[k] = g_ai2_ihevc_trans_16[2][k] * pi2_tmp[2 * trans_size]
775
100k
                                    + g_ai2_ihevc_trans_16[6][k]
776
100k
                                                    * pi2_tmp[6 * trans_size];
777
100k
                }
778
25.0k
                eeo[0] = g_ai2_ihevc_trans_16[4][0] * pi2_tmp[4 * trans_size];
779
25.0k
                eee[0] = g_ai2_ihevc_trans_16[0][0] * pi2_tmp[0];
780
25.0k
                eeo[1] = g_ai2_ihevc_trans_16[4][1] * pi2_tmp[4 * trans_size];
781
25.0k
                eee[1] = g_ai2_ihevc_trans_16[0][1] * pi2_tmp[0];
782
783
                /* Combining e and o terms at each hierarchy levels to calculate the final spatial domain vector */
784
75.2k
                for(k = 0; k < 2; k++)
785
50.1k
                {
786
50.1k
                    ee[k] = eee[k] + eeo[k];
787
50.1k
                    ee[k + 2] = eee[1 - k] - eeo[1 - k];
788
50.1k
                }
789
125k
                for(k = 0; k < 4; k++)
790
100k
                {
791
100k
                    e[k] = ee[k] + eo[k];
792
100k
                    e[k + 4] = ee[3 - k] - eo[3 - k];
793
100k
                }
794
225k
                for(k = 0; k < 8; k++)
795
200k
                {
796
200k
                    WORD32 itrans_out;
797
200k
                    itrans_out =
798
200k
                                    CLIP_S16(((e[k] + o[k] + add) >> shift));
799
200k
                    pu1_dst[k] = CLIP_U8((itrans_out + pu1_pred[k]));
800
200k
                    itrans_out =
801
200k
                                    CLIP_S16(((e[7 - k] - o[7 - k] + add) >> shift));
802
200k
                    pu1_dst[k + 8] = CLIP_U8((itrans_out + pu1_pred[k + 8]));
803
200k
                }
804
25.0k
                pi2_tmp++;
805
25.0k
                pu1_pred += pred_strd;
806
25.0k
                pu1_dst += dst_strd;
807
25.0k
            }
808
1.56k
        }
809
43.1k
        else /* All rows of output of 1st stage are non-zero */
810
43.1k
        {
811
732k
            for(j = 0; j < trans_size; j++)
812
689k
            {
813
                /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */
814
6.20M
                for(k = 0; k < 8; k++)
815
5.51M
                {
816
5.51M
                    o[k] = g_ai2_ihevc_trans_16[1][k] * pi2_tmp[trans_size]
817
5.51M
                                    + g_ai2_ihevc_trans_16[3][k]
818
5.51M
                                                    * pi2_tmp[3 * trans_size]
819
5.51M
                                    + g_ai2_ihevc_trans_16[5][k]
820
5.51M
                                                    * pi2_tmp[5 * trans_size]
821
5.51M
                                    + g_ai2_ihevc_trans_16[7][k]
822
5.51M
                                                    * pi2_tmp[7 * trans_size]
823
5.51M
                                    + g_ai2_ihevc_trans_16[9][k]
824
5.51M
                                                    * pi2_tmp[9 * trans_size]
825
5.51M
                                    + g_ai2_ihevc_trans_16[11][k]
826
5.51M
                                                    * pi2_tmp[11 * trans_size]
827
5.51M
                                    + g_ai2_ihevc_trans_16[13][k]
828
5.51M
                                                    * pi2_tmp[13 * trans_size]
829
5.51M
                                    + g_ai2_ihevc_trans_16[15][k]
830
5.51M
                                                    * pi2_tmp[15 * trans_size];
831
5.51M
                }
832
3.44M
                for(k = 0; k < 4; k++)
833
2.75M
                {
834
2.75M
                    eo[k] = g_ai2_ihevc_trans_16[2][k] * pi2_tmp[2 * trans_size]
835
2.75M
                                    + g_ai2_ihevc_trans_16[6][k]
836
2.75M
                                                    * pi2_tmp[6 * trans_size]
837
2.75M
                                    + g_ai2_ihevc_trans_16[10][k]
838
2.75M
                                                    * pi2_tmp[10 * trans_size]
839
2.75M
                                    + g_ai2_ihevc_trans_16[14][k]
840
2.75M
                                                    * pi2_tmp[14 * trans_size];
841
2.75M
                }
842
689k
                eeo[0] =
843
689k
                                g_ai2_ihevc_trans_16[4][0] * pi2_tmp[4 * trans_size]
844
689k
                                                + g_ai2_ihevc_trans_16[12][0]
845
689k
                                                                * pi2_tmp[12
846
689k
                                                                                * trans_size];
847
689k
                eee[0] = g_ai2_ihevc_trans_16[0][0] * pi2_tmp[0]
848
689k
                                + g_ai2_ihevc_trans_16[8][0] * pi2_tmp[8 * trans_size];
849
689k
                eeo[1] =
850
689k
                                g_ai2_ihevc_trans_16[4][1] * pi2_tmp[4 * trans_size]
851
689k
                                                + g_ai2_ihevc_trans_16[12][1]
852
689k
                                                                * pi2_tmp[12
853
689k
                                                                                * trans_size];
854
689k
                eee[1] = g_ai2_ihevc_trans_16[0][1] * pi2_tmp[0]
855
689k
                                + g_ai2_ihevc_trans_16[8][1] * pi2_tmp[8 * trans_size];
856
857
                /* Combining e and o terms at each hierarchy levels to calculate the final spatial domain vector */
858
2.06M
                for(k = 0; k < 2; k++)
859
1.37M
                {
860
1.37M
                    ee[k] = eee[k] + eeo[k];
861
1.37M
                    ee[k + 2] = eee[1 - k] - eeo[1 - k];
862
1.37M
                }
863
3.44M
                for(k = 0; k < 4; k++)
864
2.75M
                {
865
2.75M
                    e[k] = ee[k] + eo[k];
866
2.75M
                    e[k + 4] = ee[3 - k] - eo[3 - k];
867
2.75M
                }
868
6.20M
                for(k = 0; k < 8; k++)
869
5.51M
                {
870
5.51M
                    WORD32 itrans_out;
871
5.51M
                    itrans_out =
872
5.51M
                                    CLIP_S16(((e[k] + o[k] + add) >> shift));
873
5.51M
                    pu1_dst[k] = CLIP_U8((itrans_out + pu1_pred[k]));
874
5.51M
                    itrans_out =
875
5.51M
                                    CLIP_S16(((e[7 - k] - o[7 - k] + add) >> shift));
876
5.51M
                    pu1_dst[k + 8] = CLIP_U8((itrans_out + pu1_pred[k + 8]));
877
5.51M
                }
878
689k
                pi2_tmp++;
879
689k
                pu1_pred += pred_strd;
880
689k
                pu1_dst += dst_strd;
881
689k
            }
882
43.1k
        }
883
        /************************************************************************************************/
884
        /************************************END - IT_RECON_16x16****************************************/
885
        /************************************************************************************************/
886
47.1k
    }
887
888
57.2k
}
889