Coverage Report

Created: 2025-07-12 07:16

/src/libhevc/common/ihevc_itrans_recon_8x8.c
Line
Count
Source
1
/******************************************************************************
2
*
3
* Copyright (C) 2012 Ittiam Systems Pvt Ltd, Bangalore
4
*
5
* Licensed under the Apache License, Version 2.0 (the "License");
6
* you may not use this file except in compliance with the License.
7
* You may obtain a copy of the License at:
8
*
9
* http://www.apache.org/licenses/LICENSE-2.0
10
*
11
* Unless required by applicable law or agreed to in writing, software
12
* distributed under the License is distributed on an "AS IS" BASIS,
13
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
* See the License for the specific language governing permissions and
15
* limitations under the License.
16
*
17
******************************************************************************/
18
/**
19
 *******************************************************************************
20
 * @file
21
 *  ihevc_itrans_recon_8x8.c
22
 *
23
 * @brief
24
 *  Contains function definitions for inverse transform  and reconstruction 8x8
25
 *
26
 *
27
 * @author
28
 *  100470
29
 *
30
 * @par List of Functions:
31
 *  - ihevc_itrans_recon_8x8()
32
 *
33
 * @remarks
34
 *  None
35
 *
36
 *******************************************************************************
37
 */
38
#include <stdio.h>
39
#include <string.h>
40
#include "ihevc_typedefs.h"
41
#include "ihevc_macros.h"
42
#include "ihevc_platform_macros.h"
43
#include "ihevc_defs.h"
44
#include "ihevc_trans_tables.h"
45
#include "ihevc_itrans_recon.h"
46
#include "ihevc_func_selector.h"
47
#include "ihevc_trans_macros.h"
48
49
/**
50
 *******************************************************************************
51
 *
52
 * @brief
53
 *  This function performs Inverse transform  and reconstruction for 8x8
54
 * input block
55
 *
56
 * @par Description:
57
 *  Performs inverse transform and adds the prediction  data and clips output
58
 * to 8 bit
59
 *
60
 * @param[in] pi2_src
61
 *  Input 8x8 coefficients
62
 *
63
 * @param[in] pi2_tmp
64
 *  Temporary 8x8 buffer for storing inverse
65
 *
66
 *  transform
67
 *  1st stage output
68
 *
69
 * @param[in] pu1_pred
70
 *  Prediction 8x8 block
71
 *
72
 * @param[out] pu1_dst
73
 *  Output 8x8 block
74
 *
75
 * @param[in] src_strd
76
 *  Input stride
77
 *
78
 * @param[in] pred_strd
79
 *  Prediction stride
80
 *
81
 * @param[in] dst_strd
82
 *  Output Stride
83
 *
84
 * @param[in] shift
85
 *  Output shift
86
 *
87
 * @param[in] zero_cols
88
 *  Zero columns in pi2_src
89
 *
90
 * @returns  Void
91
 *
92
 * @remarks
93
 *  None
94
 *
95
 *******************************************************************************
96
 */
97
98
void ihevc_itrans_recon_8x8(WORD16 *pi2_src,
99
                            WORD16 *pi2_tmp,
100
                            UWORD8 *pu1_pred,
101
                            UWORD8 *pu1_dst,
102
                            WORD32 src_strd,
103
                            WORD32 pred_strd,
104
                            WORD32 dst_strd,
105
                            WORD32 zero_cols,
106
                            WORD32 zero_rows)
107
1.11M
{
108
1.11M
    WORD32 j, k;
109
1.11M
    WORD32 e[4], o[4];
110
1.11M
    WORD32 ee[2], eo[2];
111
1.11M
    WORD32 add;
112
1.11M
    WORD32 shift;
113
1.11M
    WORD16 *pi2_tmp_orig;
114
1.11M
    WORD32 trans_size;
115
1.11M
    WORD32 zero_rows_2nd_stage = zero_cols;
116
1.11M
    WORD32 row_limit_2nd_stage;
117
118
1.11M
    trans_size = TRANS_SIZE_8;
119
120
1.11M
    pi2_tmp_orig = pi2_tmp;
121
122
1.11M
    if((zero_cols & 0xF0) == 0xF0)
123
181k
        row_limit_2nd_stage = 4;
124
934k
    else
125
934k
        row_limit_2nd_stage = TRANS_SIZE_8;
126
127
128
1.11M
    if((zero_rows & 0xF0) == 0xF0) /* First 4 rows of input are non-zero */
129
161k
    {
130
        /************************************************************************************************/
131
        /**********************************START - IT_RECON_8x8******************************************/
132
        /************************************************************************************************/
133
134
        /* Inverse Transform 1st stage */
135
161k
        shift = IT_SHIFT_STAGE_1;
136
161k
        add = 1 << (shift - 1);
137
138
1.04M
        for(j = 0; j < row_limit_2nd_stage; j++)
139
886k
        {
140
            /* Checking for Zero Cols */
141
886k
            if((zero_cols & 1) == 1)
142
146k
            {
143
146k
                memset(pi2_tmp, 0, trans_size * sizeof(WORD16));
144
146k
            }
145
740k
            else
146
740k
            {
147
                /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */
148
3.70M
                for(k = 0; k < 4; k++)
149
2.96M
                {
150
2.96M
                    o[k] = g_ai2_ihevc_trans_8[1][k] * pi2_src[src_strd]
151
2.96M
                                    + g_ai2_ihevc_trans_8[3][k]
152
2.96M
                                                    * pi2_src[3 * src_strd];
153
2.96M
                }
154
740k
                eo[0] = g_ai2_ihevc_trans_8[2][0] * pi2_src[2 * src_strd];
155
740k
                eo[1] = g_ai2_ihevc_trans_8[2][1] * pi2_src[2 * src_strd];
156
740k
                ee[0] = g_ai2_ihevc_trans_8[0][0] * pi2_src[0];
157
740k
                ee[1] = g_ai2_ihevc_trans_8[0][1] * pi2_src[0];
158
159
                /* Combining e and o terms at each hierarchy levels to calculate the final spatial domain vector */
160
740k
                e[0] = ee[0] + eo[0];
161
740k
                e[3] = ee[0] - eo[0];
162
740k
                e[1] = ee[1] + eo[1];
163
740k
                e[2] = ee[1] - eo[1];
164
3.70M
                for(k = 0; k < 4; k++)
165
2.96M
                {
166
2.96M
                    pi2_tmp[k] =
167
2.96M
                                    CLIP_S16(((e[k] + o[k] + add) >> shift));
168
2.96M
                    pi2_tmp[k + 4] =
169
2.96M
                                    CLIP_S16(((e[3 - k] - o[3 - k] + add) >> shift));
170
2.96M
                }
171
740k
            }
172
886k
            pi2_src++;
173
886k
            pi2_tmp += trans_size;
174
886k
            zero_cols = zero_cols >> 1;
175
886k
        }
176
177
161k
        pi2_tmp = pi2_tmp_orig;
178
179
        /* Inverse Transform 2nd stage */
180
161k
        shift = IT_SHIFT_STAGE_2;
181
161k
        add = 1 << (shift - 1);
182
161k
        if((zero_rows_2nd_stage & 0xF0) == 0xF0) /* First 4 rows of output of 1st stage are non-zero */
183
101k
        {
184
914k
            for(j = 0; j < trans_size; j++)
185
812k
            {
186
                /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */
187
4.06M
                for(k = 0; k < 4; k++)
188
3.24M
                {
189
3.24M
                    o[k] = g_ai2_ihevc_trans_8[1][k] * pi2_tmp[trans_size]
190
3.24M
                                    + g_ai2_ihevc_trans_8[3][k] * pi2_tmp[3 * trans_size];
191
3.24M
                }
192
812k
                eo[0] = g_ai2_ihevc_trans_8[2][0] * pi2_tmp[2 * trans_size];
193
812k
                eo[1] = g_ai2_ihevc_trans_8[2][1] * pi2_tmp[2 * trans_size];
194
812k
                ee[0] = g_ai2_ihevc_trans_8[0][0] * pi2_tmp[0];
195
812k
                ee[1] = g_ai2_ihevc_trans_8[0][1] * pi2_tmp[0];
196
197
                /* Combining e and o terms at each hierarchy levels to calculate the final spatial domain vector */
198
812k
                e[0] = ee[0] + eo[0];
199
812k
                e[3] = ee[0] - eo[0];
200
812k
                e[1] = ee[1] + eo[1];
201
812k
                e[2] = ee[1] - eo[1];
202
4.06M
                for(k = 0; k < 4; k++)
203
3.24M
                {
204
3.24M
                    WORD32 itrans_out;
205
3.24M
                    itrans_out =
206
3.24M
                                    CLIP_S16(((e[k] + o[k] + add) >> shift));
207
3.24M
                    pu1_dst[k] = CLIP_U8((itrans_out + pu1_pred[k]));
208
3.24M
                    itrans_out =
209
3.24M
                                    CLIP_S16(((e[3 - k] - o[3 - k] + add) >> shift));
210
3.24M
                    pu1_dst[k + 4] = CLIP_U8((itrans_out + pu1_pred[k + 4]));
211
3.24M
                }
212
812k
                pi2_tmp++;
213
812k
                pu1_pred += pred_strd;
214
812k
                pu1_dst += dst_strd;
215
812k
            }
216
101k
        }
217
60.0k
        else /* All rows of output of 1st stage are non-zero */
218
60.0k
        {
219
540k
            for(j = 0; j < trans_size; j++)
220
480k
            {
221
                /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */
222
2.40M
                for(k = 0; k < 4; k++)
223
1.92M
                {
224
1.92M
                    o[k] = g_ai2_ihevc_trans_8[1][k] * pi2_tmp[trans_size]
225
1.92M
                                    + g_ai2_ihevc_trans_8[3][k]
226
1.92M
                                                    * pi2_tmp[3 * trans_size]
227
1.92M
                                    + g_ai2_ihevc_trans_8[5][k]
228
1.92M
                                                    * pi2_tmp[5 * trans_size]
229
1.92M
                                    + g_ai2_ihevc_trans_8[7][k]
230
1.92M
                                                    * pi2_tmp[7 * trans_size];
231
1.92M
                }
232
233
480k
                eo[0] = g_ai2_ihevc_trans_8[2][0] * pi2_tmp[2 * trans_size]
234
480k
                                + g_ai2_ihevc_trans_8[6][0] * pi2_tmp[6 * trans_size];
235
480k
                eo[1] = g_ai2_ihevc_trans_8[2][1] * pi2_tmp[2 * trans_size]
236
480k
                                + g_ai2_ihevc_trans_8[6][1] * pi2_tmp[6 * trans_size];
237
480k
                ee[0] = g_ai2_ihevc_trans_8[0][0] * pi2_tmp[0]
238
480k
                                + g_ai2_ihevc_trans_8[4][0] * pi2_tmp[4 * trans_size];
239
480k
                ee[1] = g_ai2_ihevc_trans_8[0][1] * pi2_tmp[0]
240
480k
                                + g_ai2_ihevc_trans_8[4][1] * pi2_tmp[4 * trans_size];
241
242
                /* Combining e and o terms at each hierarchy levels to calculate the final spatial domain vector */
243
480k
                e[0] = ee[0] + eo[0];
244
480k
                e[3] = ee[0] - eo[0];
245
480k
                e[1] = ee[1] + eo[1];
246
480k
                e[2] = ee[1] - eo[1];
247
2.40M
                for(k = 0; k < 4; k++)
248
1.92M
                {
249
1.92M
                    WORD32 itrans_out;
250
1.92M
                    itrans_out =
251
1.92M
                                    CLIP_S16(((e[k] + o[k] + add) >> shift));
252
1.92M
                    pu1_dst[k] = CLIP_U8((itrans_out + pu1_pred[k]));
253
1.92M
                    itrans_out =
254
1.92M
                                    CLIP_S16(((e[3 - k] - o[3 - k] + add) >> shift));
255
1.92M
                    pu1_dst[k + 4] = CLIP_U8((itrans_out + pu1_pred[k + 4]));
256
1.92M
                }
257
480k
                pi2_tmp++;
258
480k
                pu1_pred += pred_strd;
259
480k
                pu1_dst += dst_strd;
260
480k
            }
261
60.0k
        }
262
        /************************************************************************************************/
263
        /************************************END - IT_RECON_8x8******************************************/
264
        /************************************************************************************************/
265
161k
    }
266
953k
    else /* All rows of input are non-zero */
267
953k
    {
268
        /************************************************************************************************/
269
        /**********************************START - IT_RECON_8x8******************************************/
270
        /************************************************************************************************/
271
272
        /* Inverse Transform 1st stage */
273
953k
        shift = IT_SHIFT_STAGE_1;
274
953k
        add = 1 << (shift - 1);
275
276
8.26M
        for(j = 0; j < row_limit_2nd_stage; j++)
277
7.31M
        {
278
            /* Checking for Zero Cols */
279
7.31M
            if((zero_cols & 1) == 1)
280
41.8k
            {
281
41.8k
                memset(pi2_tmp, 0, trans_size * sizeof(WORD16));
282
41.8k
            }
283
7.26M
            else
284
7.26M
            {
285
                /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */
286
36.3M
                for(k = 0; k < 4; k++)
287
29.0M
                {
288
29.0M
                    o[k] = g_ai2_ihevc_trans_8[1][k] * pi2_src[src_strd]
289
29.0M
                                    + g_ai2_ihevc_trans_8[3][k]
290
29.0M
                                                    * pi2_src[3 * src_strd]
291
29.0M
                                    + g_ai2_ihevc_trans_8[5][k]
292
29.0M
                                                    * pi2_src[5 * src_strd]
293
29.0M
                                    + g_ai2_ihevc_trans_8[7][k]
294
29.0M
                                                    * pi2_src[7 * src_strd];
295
29.0M
                }
296
297
7.26M
                eo[0] = g_ai2_ihevc_trans_8[2][0] * pi2_src[2 * src_strd]
298
7.26M
                                + g_ai2_ihevc_trans_8[6][0] * pi2_src[6 * src_strd];
299
7.26M
                eo[1] = g_ai2_ihevc_trans_8[2][1] * pi2_src[2 * src_strd]
300
7.26M
                                + g_ai2_ihevc_trans_8[6][1] * pi2_src[6 * src_strd];
301
7.26M
                ee[0] = g_ai2_ihevc_trans_8[0][0] * pi2_src[0]
302
7.26M
                                + g_ai2_ihevc_trans_8[4][0] * pi2_src[4 * src_strd];
303
7.26M
                ee[1] = g_ai2_ihevc_trans_8[0][1] * pi2_src[0]
304
7.26M
                                + g_ai2_ihevc_trans_8[4][1] * pi2_src[4 * src_strd];
305
306
                /* Combining e and o terms at each hierarchy levels to calculate the final spatial domain vector */
307
7.26M
                e[0] = ee[0] + eo[0];
308
7.26M
                e[3] = ee[0] - eo[0];
309
7.26M
                e[1] = ee[1] + eo[1];
310
7.26M
                e[2] = ee[1] - eo[1];
311
36.3M
                for(k = 0; k < 4; k++)
312
29.0M
                {
313
29.0M
                    pi2_tmp[k] =
314
29.0M
                                    CLIP_S16(((e[k] + o[k] + add) >> shift));
315
29.0M
                    pi2_tmp[k + 4] =
316
29.0M
                                    CLIP_S16(((e[3 - k] - o[3 - k] + add) >> shift));
317
29.0M
                }
318
7.26M
            }
319
7.31M
            pi2_src++;
320
7.31M
            pi2_tmp += trans_size;
321
7.31M
            zero_cols = zero_cols >> 1;
322
7.31M
        }
323
324
953k
        pi2_tmp = pi2_tmp_orig;
325
326
        /* Inverse Transform 2nd stage */
327
953k
        shift = IT_SHIFT_STAGE_2;
328
953k
        add = 1 << (shift - 1);
329
953k
        if((zero_rows_2nd_stage & 0xF0) == 0xF0) /* First 4 rows of output of 1st stage are non-zero */
330
79.6k
        {
331
717k
            for(j = 0; j < trans_size; j++)
332
637k
            {
333
                /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */
334
3.18M
                for(k = 0; k < 4; k++)
335
2.54M
                {
336
2.54M
                    o[k] = g_ai2_ihevc_trans_8[1][k] * pi2_tmp[trans_size]
337
2.54M
                                    + g_ai2_ihevc_trans_8[3][k] * pi2_tmp[3 * trans_size];
338
2.54M
                }
339
637k
                eo[0] = g_ai2_ihevc_trans_8[2][0] * pi2_tmp[2 * trans_size];
340
637k
                eo[1] = g_ai2_ihevc_trans_8[2][1] * pi2_tmp[2 * trans_size];
341
637k
                ee[0] = g_ai2_ihevc_trans_8[0][0] * pi2_tmp[0];
342
637k
                ee[1] = g_ai2_ihevc_trans_8[0][1] * pi2_tmp[0];
343
344
                /* Combining e and o terms at each hierarchy levels to calculate the final spatial domain vector */
345
637k
                e[0] = ee[0] + eo[0];
346
637k
                e[3] = ee[0] - eo[0];
347
637k
                e[1] = ee[1] + eo[1];
348
637k
                e[2] = ee[1] - eo[1];
349
3.18M
                for(k = 0; k < 4; k++)
350
2.54M
                {
351
2.54M
                    WORD32 itrans_out;
352
2.54M
                    itrans_out =
353
2.54M
                                    CLIP_S16(((e[k] + o[k] + add) >> shift));
354
2.54M
                    pu1_dst[k] = CLIP_U8((itrans_out + pu1_pred[k]));
355
2.54M
                    itrans_out =
356
2.54M
                                    CLIP_S16(((e[3 - k] - o[3 - k] + add) >> shift));
357
2.54M
                    pu1_dst[k + 4] = CLIP_U8((itrans_out + pu1_pred[k + 4]));
358
2.54M
                }
359
637k
                pi2_tmp++;
360
637k
                pu1_pred += pred_strd;
361
637k
                pu1_dst += dst_strd;
362
637k
            }
363
79.6k
        }
364
874k
        else /* All rows of output of 1st stage are non-zero */
365
874k
        {
366
7.86M
            for(j = 0; j < trans_size; j++)
367
6.99M
            {
368
                /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */
369
34.9M
                for(k = 0; k < 4; k++)
370
27.9M
                {
371
27.9M
                    o[k] = g_ai2_ihevc_trans_8[1][k] * pi2_tmp[trans_size]
372
27.9M
                                    + g_ai2_ihevc_trans_8[3][k]
373
27.9M
                                                    * pi2_tmp[3 * trans_size]
374
27.9M
                                    + g_ai2_ihevc_trans_8[5][k]
375
27.9M
                                                    * pi2_tmp[5 * trans_size]
376
27.9M
                                    + g_ai2_ihevc_trans_8[7][k]
377
27.9M
                                                    * pi2_tmp[7 * trans_size];
378
27.9M
                }
379
380
6.99M
                eo[0] = g_ai2_ihevc_trans_8[2][0] * pi2_tmp[2 * trans_size]
381
6.99M
                                + g_ai2_ihevc_trans_8[6][0] * pi2_tmp[6 * trans_size];
382
6.99M
                eo[1] = g_ai2_ihevc_trans_8[2][1] * pi2_tmp[2 * trans_size]
383
6.99M
                                + g_ai2_ihevc_trans_8[6][1] * pi2_tmp[6 * trans_size];
384
6.99M
                ee[0] = g_ai2_ihevc_trans_8[0][0] * pi2_tmp[0]
385
6.99M
                                + g_ai2_ihevc_trans_8[4][0] * pi2_tmp[4 * trans_size];
386
6.99M
                ee[1] = g_ai2_ihevc_trans_8[0][1] * pi2_tmp[0]
387
6.99M
                                + g_ai2_ihevc_trans_8[4][1] * pi2_tmp[4 * trans_size];
388
389
                /* Combining e and o terms at each hierarchy levels to calculate the final spatial domain vector */
390
6.99M
                e[0] = ee[0] + eo[0];
391
6.99M
                e[3] = ee[0] - eo[0];
392
6.99M
                e[1] = ee[1] + eo[1];
393
6.99M
                e[2] = ee[1] - eo[1];
394
34.9M
                for(k = 0; k < 4; k++)
395
27.9M
                {
396
27.9M
                    WORD32 itrans_out;
397
27.9M
                    itrans_out =
398
27.9M
                                    CLIP_S16(((e[k] + o[k] + add) >> shift));
399
27.9M
                    pu1_dst[k] = CLIP_U8((itrans_out + pu1_pred[k]));
400
27.9M
                    itrans_out =
401
27.9M
                                    CLIP_S16(((e[3 - k] - o[3 - k] + add) >> shift));
402
27.9M
                    pu1_dst[k + 4] = CLIP_U8((itrans_out + pu1_pred[k + 4]));
403
27.9M
                }
404
6.99M
                pi2_tmp++;
405
6.99M
                pu1_pred += pred_strd;
406
6.99M
                pu1_dst += dst_strd;
407
6.99M
            }
408
874k
        }
409
        /************************************************************************************************/
410
        /************************************END - IT_RECON_8x8******************************************/
411
        /************************************************************************************************/
412
953k
    }
413
1.11M
}
414