Coverage Report

Created: 2025-07-09 06:20

/src/libhevc/common/ihevc_itrans_recon_8x8.c
Line
Count
Source
1
/******************************************************************************
2
*
3
* Copyright (C) 2012 Ittiam Systems Pvt Ltd, Bangalore
4
*
5
* Licensed under the Apache License, Version 2.0 (the "License");
6
* you may not use this file except in compliance with the License.
7
* You may obtain a copy of the License at:
8
*
9
* http://www.apache.org/licenses/LICENSE-2.0
10
*
11
* Unless required by applicable law or agreed to in writing, software
12
* distributed under the License is distributed on an "AS IS" BASIS,
13
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
* See the License for the specific language governing permissions and
15
* limitations under the License.
16
*
17
******************************************************************************/
18
/**
19
 *******************************************************************************
20
 * @file
21
 *  ihevc_itrans_recon_8x8.c
22
 *
23
 * @brief
24
 *  Contains function definitions for inverse transform  and reconstruction 8x8
25
 *
26
 *
27
 * @author
28
 *  100470
29
 *
30
 * @par List of Functions:
31
 *  - ihevc_itrans_recon_8x8()
32
 *
33
 * @remarks
34
 *  None
35
 *
36
 *******************************************************************************
37
 */
38
#include <stdio.h>
39
#include <string.h>
40
#include "ihevc_typedefs.h"
41
#include "ihevc_macros.h"
42
#include "ihevc_platform_macros.h"
43
#include "ihevc_defs.h"
44
#include "ihevc_trans_tables.h"
45
#include "ihevc_itrans_recon.h"
46
#include "ihevc_func_selector.h"
47
#include "ihevc_trans_macros.h"
48
49
/**
50
 *******************************************************************************
51
 *
52
 * @brief
53
 *  This function performs Inverse transform  and reconstruction for 8x8
54
 * input block
55
 *
56
 * @par Description:
57
 *  Performs inverse transform and adds the prediction  data and clips output
58
 * to 8 bit
59
 *
60
 * @param[in] pi2_src
61
 *  Input 8x8 coefficients
62
 *
63
 * @param[in] pi2_tmp
64
 *  Temporary 8x8 buffer for storing inverse
65
 *
66
 *  transform
67
 *  1st stage output
68
 *
69
 * @param[in] pu1_pred
70
 *  Prediction 8x8 block
71
 *
72
 * @param[out] pu1_dst
73
 *  Output 8x8 block
74
 *
75
 * @param[in] src_strd
76
 *  Input stride
77
 *
78
 * @param[in] pred_strd
79
 *  Prediction stride
80
 *
81
 * @param[in] dst_strd
82
 *  Output Stride
83
 *
84
 * @param[in] shift
85
 *  Output shift
86
 *
87
 * @param[in] zero_cols
88
 *  Zero columns in pi2_src
89
 *
90
 * @returns  Void
91
 *
92
 * @remarks
93
 *  None
94
 *
95
 *******************************************************************************
96
 */
97
98
void ihevc_itrans_recon_8x8(WORD16 *pi2_src,
99
                            WORD16 *pi2_tmp,
100
                            UWORD8 *pu1_pred,
101
                            UWORD8 *pu1_dst,
102
                            WORD32 src_strd,
103
                            WORD32 pred_strd,
104
                            WORD32 dst_strd,
105
                            WORD32 zero_cols,
106
                            WORD32 zero_rows)
107
1.01M
{
108
1.01M
    WORD32 j, k;
109
1.01M
    WORD32 e[4], o[4];
110
1.01M
    WORD32 ee[2], eo[2];
111
1.01M
    WORD32 add;
112
1.01M
    WORD32 shift;
113
1.01M
    WORD16 *pi2_tmp_orig;
114
1.01M
    WORD32 trans_size;
115
1.01M
    WORD32 zero_rows_2nd_stage = zero_cols;
116
1.01M
    WORD32 row_limit_2nd_stage;
117
118
1.01M
    trans_size = TRANS_SIZE_8;
119
120
1.01M
    pi2_tmp_orig = pi2_tmp;
121
122
1.01M
    if((zero_cols & 0xF0) == 0xF0)
123
128k
        row_limit_2nd_stage = 4;
124
886k
    else
125
886k
        row_limit_2nd_stage = TRANS_SIZE_8;
126
127
128
1.01M
    if((zero_rows & 0xF0) == 0xF0) /* First 4 rows of input are non-zero */
129
111k
    {
130
        /************************************************************************************************/
131
        /**********************************START - IT_RECON_8x8******************************************/
132
        /************************************************************************************************/
133
134
        /* Inverse Transform 1st stage */
135
111k
        shift = IT_SHIFT_STAGE_1;
136
111k
        add = 1 << (shift - 1);
137
138
767k
        for(j = 0; j < row_limit_2nd_stage; j++)
139
656k
        {
140
            /* Checking for Zero Cols */
141
656k
            if((zero_cols & 1) == 1)
142
38.1k
            {
143
38.1k
                memset(pi2_tmp, 0, trans_size * sizeof(WORD16));
144
38.1k
            }
145
618k
            else
146
618k
            {
147
                /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */
148
3.09M
                for(k = 0; k < 4; k++)
149
2.47M
                {
150
2.47M
                    o[k] = g_ai2_ihevc_trans_8[1][k] * pi2_src[src_strd]
151
2.47M
                                    + g_ai2_ihevc_trans_8[3][k]
152
2.47M
                                                    * pi2_src[3 * src_strd];
153
2.47M
                }
154
618k
                eo[0] = g_ai2_ihevc_trans_8[2][0] * pi2_src[2 * src_strd];
155
618k
                eo[1] = g_ai2_ihevc_trans_8[2][1] * pi2_src[2 * src_strd];
156
618k
                ee[0] = g_ai2_ihevc_trans_8[0][0] * pi2_src[0];
157
618k
                ee[1] = g_ai2_ihevc_trans_8[0][1] * pi2_src[0];
158
159
                /* Combining e and o terms at each hierarchy levels to calculate the final spatial domain vector */
160
618k
                e[0] = ee[0] + eo[0];
161
618k
                e[3] = ee[0] - eo[0];
162
618k
                e[1] = ee[1] + eo[1];
163
618k
                e[2] = ee[1] - eo[1];
164
3.09M
                for(k = 0; k < 4; k++)
165
2.47M
                {
166
2.47M
                    pi2_tmp[k] =
167
2.47M
                                    CLIP_S16(((e[k] + o[k] + add) >> shift));
168
2.47M
                    pi2_tmp[k + 4] =
169
2.47M
                                    CLIP_S16(((e[3 - k] - o[3 - k] + add) >> shift));
170
2.47M
                }
171
618k
            }
172
656k
            pi2_src++;
173
656k
            pi2_tmp += trans_size;
174
656k
            zero_cols = zero_cols >> 1;
175
656k
        }
176
177
111k
        pi2_tmp = pi2_tmp_orig;
178
179
        /* Inverse Transform 2nd stage */
180
111k
        shift = IT_SHIFT_STAGE_2;
181
111k
        add = 1 << (shift - 1);
182
111k
        if((zero_rows_2nd_stage & 0xF0) == 0xF0) /* First 4 rows of output of 1st stage are non-zero */
183
58.6k
        {
184
527k
            for(j = 0; j < trans_size; j++)
185
468k
            {
186
                /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */
187
2.34M
                for(k = 0; k < 4; k++)
188
1.87M
                {
189
1.87M
                    o[k] = g_ai2_ihevc_trans_8[1][k] * pi2_tmp[trans_size]
190
1.87M
                                    + g_ai2_ihevc_trans_8[3][k] * pi2_tmp[3 * trans_size];
191
1.87M
                }
192
468k
                eo[0] = g_ai2_ihevc_trans_8[2][0] * pi2_tmp[2 * trans_size];
193
468k
                eo[1] = g_ai2_ihevc_trans_8[2][1] * pi2_tmp[2 * trans_size];
194
468k
                ee[0] = g_ai2_ihevc_trans_8[0][0] * pi2_tmp[0];
195
468k
                ee[1] = g_ai2_ihevc_trans_8[0][1] * pi2_tmp[0];
196
197
                /* Combining e and o terms at each hierarchy levels to calculate the final spatial domain vector */
198
468k
                e[0] = ee[0] + eo[0];
199
468k
                e[3] = ee[0] - eo[0];
200
468k
                e[1] = ee[1] + eo[1];
201
468k
                e[2] = ee[1] - eo[1];
202
2.34M
                for(k = 0; k < 4; k++)
203
1.87M
                {
204
1.87M
                    WORD32 itrans_out;
205
1.87M
                    itrans_out =
206
1.87M
                                    CLIP_S16(((e[k] + o[k] + add) >> shift));
207
1.87M
                    pu1_dst[k] = CLIP_U8((itrans_out + pu1_pred[k]));
208
1.87M
                    itrans_out =
209
1.87M
                                    CLIP_S16(((e[3 - k] - o[3 - k] + add) >> shift));
210
1.87M
                    pu1_dst[k + 4] = CLIP_U8((itrans_out + pu1_pred[k + 4]));
211
1.87M
                }
212
468k
                pi2_tmp++;
213
468k
                pu1_pred += pred_strd;
214
468k
                pu1_dst += dst_strd;
215
468k
            }
216
58.6k
        }
217
52.7k
        else /* All rows of output of 1st stage are non-zero */
218
52.7k
        {
219
474k
            for(j = 0; j < trans_size; j++)
220
421k
            {
221
                /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */
222
2.10M
                for(k = 0; k < 4; k++)
223
1.68M
                {
224
1.68M
                    o[k] = g_ai2_ihevc_trans_8[1][k] * pi2_tmp[trans_size]
225
1.68M
                                    + g_ai2_ihevc_trans_8[3][k]
226
1.68M
                                                    * pi2_tmp[3 * trans_size]
227
1.68M
                                    + g_ai2_ihevc_trans_8[5][k]
228
1.68M
                                                    * pi2_tmp[5 * trans_size]
229
1.68M
                                    + g_ai2_ihevc_trans_8[7][k]
230
1.68M
                                                    * pi2_tmp[7 * trans_size];
231
1.68M
                }
232
233
421k
                eo[0] = g_ai2_ihevc_trans_8[2][0] * pi2_tmp[2 * trans_size]
234
421k
                                + g_ai2_ihevc_trans_8[6][0] * pi2_tmp[6 * trans_size];
235
421k
                eo[1] = g_ai2_ihevc_trans_8[2][1] * pi2_tmp[2 * trans_size]
236
421k
                                + g_ai2_ihevc_trans_8[6][1] * pi2_tmp[6 * trans_size];
237
421k
                ee[0] = g_ai2_ihevc_trans_8[0][0] * pi2_tmp[0]
238
421k
                                + g_ai2_ihevc_trans_8[4][0] * pi2_tmp[4 * trans_size];
239
421k
                ee[1] = g_ai2_ihevc_trans_8[0][1] * pi2_tmp[0]
240
421k
                                + g_ai2_ihevc_trans_8[4][1] * pi2_tmp[4 * trans_size];
241
242
                /* Combining e and o terms at each hierarchy levels to calculate the final spatial domain vector */
243
421k
                e[0] = ee[0] + eo[0];
244
421k
                e[3] = ee[0] - eo[0];
245
421k
                e[1] = ee[1] + eo[1];
246
421k
                e[2] = ee[1] - eo[1];
247
2.10M
                for(k = 0; k < 4; k++)
248
1.68M
                {
249
1.68M
                    WORD32 itrans_out;
250
1.68M
                    itrans_out =
251
1.68M
                                    CLIP_S16(((e[k] + o[k] + add) >> shift));
252
1.68M
                    pu1_dst[k] = CLIP_U8((itrans_out + pu1_pred[k]));
253
1.68M
                    itrans_out =
254
1.68M
                                    CLIP_S16(((e[3 - k] - o[3 - k] + add) >> shift));
255
1.68M
                    pu1_dst[k + 4] = CLIP_U8((itrans_out + pu1_pred[k + 4]));
256
1.68M
                }
257
421k
                pi2_tmp++;
258
421k
                pu1_pred += pred_strd;
259
421k
                pu1_dst += dst_strd;
260
421k
            }
261
52.7k
        }
262
        /************************************************************************************************/
263
        /************************************END - IT_RECON_8x8******************************************/
264
        /************************************************************************************************/
265
111k
    }
266
904k
    else /* All rows of input are non-zero */
267
904k
    {
268
        /************************************************************************************************/
269
        /**********************************START - IT_RECON_8x8******************************************/
270
        /************************************************************************************************/
271
272
        /* Inverse Transform 1st stage */
273
904k
        shift = IT_SHIFT_STAGE_1;
274
904k
        add = 1 << (shift - 1);
275
276
7.85M
        for(j = 0; j < row_limit_2nd_stage; j++)
277
6.95M
        {
278
            /* Checking for Zero Cols */
279
6.95M
            if((zero_cols & 1) == 1)
280
10.0k
            {
281
10.0k
                memset(pi2_tmp, 0, trans_size * sizeof(WORD16));
282
10.0k
            }
283
6.94M
            else
284
6.94M
            {
285
                /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */
286
34.7M
                for(k = 0; k < 4; k++)
287
27.7M
                {
288
27.7M
                    o[k] = g_ai2_ihevc_trans_8[1][k] * pi2_src[src_strd]
289
27.7M
                                    + g_ai2_ihevc_trans_8[3][k]
290
27.7M
                                                    * pi2_src[3 * src_strd]
291
27.7M
                                    + g_ai2_ihevc_trans_8[5][k]
292
27.7M
                                                    * pi2_src[5 * src_strd]
293
27.7M
                                    + g_ai2_ihevc_trans_8[7][k]
294
27.7M
                                                    * pi2_src[7 * src_strd];
295
27.7M
                }
296
297
6.94M
                eo[0] = g_ai2_ihevc_trans_8[2][0] * pi2_src[2 * src_strd]
298
6.94M
                                + g_ai2_ihevc_trans_8[6][0] * pi2_src[6 * src_strd];
299
6.94M
                eo[1] = g_ai2_ihevc_trans_8[2][1] * pi2_src[2 * src_strd]
300
6.94M
                                + g_ai2_ihevc_trans_8[6][1] * pi2_src[6 * src_strd];
301
6.94M
                ee[0] = g_ai2_ihevc_trans_8[0][0] * pi2_src[0]
302
6.94M
                                + g_ai2_ihevc_trans_8[4][0] * pi2_src[4 * src_strd];
303
6.94M
                ee[1] = g_ai2_ihevc_trans_8[0][1] * pi2_src[0]
304
6.94M
                                + g_ai2_ihevc_trans_8[4][1] * pi2_src[4 * src_strd];
305
306
                /* Combining e and o terms at each hierarchy levels to calculate the final spatial domain vector */
307
6.94M
                e[0] = ee[0] + eo[0];
308
6.94M
                e[3] = ee[0] - eo[0];
309
6.94M
                e[1] = ee[1] + eo[1];
310
6.94M
                e[2] = ee[1] - eo[1];
311
34.7M
                for(k = 0; k < 4; k++)
312
27.7M
                {
313
27.7M
                    pi2_tmp[k] =
314
27.7M
                                    CLIP_S16(((e[k] + o[k] + add) >> shift));
315
27.7M
                    pi2_tmp[k + 4] =
316
27.7M
                                    CLIP_S16(((e[3 - k] - o[3 - k] + add) >> shift));
317
27.7M
                }
318
6.94M
            }
319
6.95M
            pi2_src++;
320
6.95M
            pi2_tmp += trans_size;
321
6.95M
            zero_cols = zero_cols >> 1;
322
6.95M
        }
323
324
904k
        pi2_tmp = pi2_tmp_orig;
325
326
        /* Inverse Transform 2nd stage */
327
904k
        shift = IT_SHIFT_STAGE_2;
328
904k
        add = 1 << (shift - 1);
329
904k
        if((zero_rows_2nd_stage & 0xF0) == 0xF0) /* First 4 rows of output of 1st stage are non-zero */
330
70.2k
        {
331
632k
            for(j = 0; j < trans_size; j++)
332
561k
            {
333
                /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */
334
2.80M
                for(k = 0; k < 4; k++)
335
2.24M
                {
336
2.24M
                    o[k] = g_ai2_ihevc_trans_8[1][k] * pi2_tmp[trans_size]
337
2.24M
                                    + g_ai2_ihevc_trans_8[3][k] * pi2_tmp[3 * trans_size];
338
2.24M
                }
339
561k
                eo[0] = g_ai2_ihevc_trans_8[2][0] * pi2_tmp[2 * trans_size];
340
561k
                eo[1] = g_ai2_ihevc_trans_8[2][1] * pi2_tmp[2 * trans_size];
341
561k
                ee[0] = g_ai2_ihevc_trans_8[0][0] * pi2_tmp[0];
342
561k
                ee[1] = g_ai2_ihevc_trans_8[0][1] * pi2_tmp[0];
343
344
                /* Combining e and o terms at each hierarchy levels to calculate the final spatial domain vector */
345
561k
                e[0] = ee[0] + eo[0];
346
561k
                e[3] = ee[0] - eo[0];
347
561k
                e[1] = ee[1] + eo[1];
348
561k
                e[2] = ee[1] - eo[1];
349
2.80M
                for(k = 0; k < 4; k++)
350
2.24M
                {
351
2.24M
                    WORD32 itrans_out;
352
2.24M
                    itrans_out =
353
2.24M
                                    CLIP_S16(((e[k] + o[k] + add) >> shift));
354
2.24M
                    pu1_dst[k] = CLIP_U8((itrans_out + pu1_pred[k]));
355
2.24M
                    itrans_out =
356
2.24M
                                    CLIP_S16(((e[3 - k] - o[3 - k] + add) >> shift));
357
2.24M
                    pu1_dst[k + 4] = CLIP_U8((itrans_out + pu1_pred[k + 4]));
358
2.24M
                }
359
561k
                pi2_tmp++;
360
561k
                pu1_pred += pred_strd;
361
561k
                pu1_dst += dst_strd;
362
561k
            }
363
70.2k
        }
364
833k
        else /* All rows of output of 1st stage are non-zero */
365
833k
        {
366
7.50M
            for(j = 0; j < trans_size; j++)
367
6.67M
            {
368
                /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */
369
33.3M
                for(k = 0; k < 4; k++)
370
26.6M
                {
371
26.6M
                    o[k] = g_ai2_ihevc_trans_8[1][k] * pi2_tmp[trans_size]
372
26.6M
                                    + g_ai2_ihevc_trans_8[3][k]
373
26.6M
                                                    * pi2_tmp[3 * trans_size]
374
26.6M
                                    + g_ai2_ihevc_trans_8[5][k]
375
26.6M
                                                    * pi2_tmp[5 * trans_size]
376
26.6M
                                    + g_ai2_ihevc_trans_8[7][k]
377
26.6M
                                                    * pi2_tmp[7 * trans_size];
378
26.6M
                }
379
380
6.67M
                eo[0] = g_ai2_ihevc_trans_8[2][0] * pi2_tmp[2 * trans_size]
381
6.67M
                                + g_ai2_ihevc_trans_8[6][0] * pi2_tmp[6 * trans_size];
382
6.67M
                eo[1] = g_ai2_ihevc_trans_8[2][1] * pi2_tmp[2 * trans_size]
383
6.67M
                                + g_ai2_ihevc_trans_8[6][1] * pi2_tmp[6 * trans_size];
384
6.67M
                ee[0] = g_ai2_ihevc_trans_8[0][0] * pi2_tmp[0]
385
6.67M
                                + g_ai2_ihevc_trans_8[4][0] * pi2_tmp[4 * trans_size];
386
6.67M
                ee[1] = g_ai2_ihevc_trans_8[0][1] * pi2_tmp[0]
387
6.67M
                                + g_ai2_ihevc_trans_8[4][1] * pi2_tmp[4 * trans_size];
388
389
                /* Combining e and o terms at each hierarchy levels to calculate the final spatial domain vector */
390
6.67M
                e[0] = ee[0] + eo[0];
391
6.67M
                e[3] = ee[0] - eo[0];
392
6.67M
                e[1] = ee[1] + eo[1];
393
6.67M
                e[2] = ee[1] - eo[1];
394
33.3M
                for(k = 0; k < 4; k++)
395
26.6M
                {
396
26.6M
                    WORD32 itrans_out;
397
26.6M
                    itrans_out =
398
26.6M
                                    CLIP_S16(((e[k] + o[k] + add) >> shift));
399
26.6M
                    pu1_dst[k] = CLIP_U8((itrans_out + pu1_pred[k]));
400
26.6M
                    itrans_out =
401
26.6M
                                    CLIP_S16(((e[3 - k] - o[3 - k] + add) >> shift));
402
26.6M
                    pu1_dst[k + 4] = CLIP_U8((itrans_out + pu1_pred[k + 4]));
403
26.6M
                }
404
6.67M
                pi2_tmp++;
405
6.67M
                pu1_pred += pred_strd;
406
6.67M
                pu1_dst += dst_strd;
407
6.67M
            }
408
833k
        }
409
        /************************************************************************************************/
410
        /************************************END - IT_RECON_8x8******************************************/
411
        /************************************************************************************************/
412
904k
    }
413
1.01M
}
414